mirror of
https://git.FreeBSD.org/src.git
synced 2025-01-13 14:40:22 +00:00
Remove unused vinum files.
This commit is contained in:
parent
6e67e2a710
commit
56f7479530
Notes:
svn2git
2020-12-20 02:59:44 +00:00
svn path=/head/; revision=137198
@ -1,37 +0,0 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
@ -1,78 +0,0 @@
|
||||
#!/bin/sh
|
||||
# Make statetexts.h from vinumstate.h
|
||||
# $FreeBSD$
|
||||
# $Id: makestatetext,v 1.7 1999/12/29 07:24:54 grog Exp grog $
|
||||
infile=vinumstate.h
|
||||
ofile=statetexts.h
|
||||
echo >$ofile "/* Created by $0 on" `date`. "Do not edit */"
|
||||
echo >>$ofile
|
||||
cat >> $ofile <<FOO
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called \`\`Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided \`\`as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*/
|
||||
|
||||
FOO
|
||||
|
||||
echo >>$ofile "/* Drive state texts */"
|
||||
echo >>$ofile "char *drivestatetext [] =
|
||||
{ "
|
||||
egrep -e 'drive_[A-z0-9]*,' <$infile | grep -v = | sed 's: *drive_\([^,]*\).*: \"\1\",:' >>$ofile
|
||||
cat <<FOO >> $ofile
|
||||
};
|
||||
|
||||
/* Subdisk state texts */
|
||||
char *sdstatetext [] =
|
||||
{
|
||||
FOO
|
||||
egrep -e 'sd_[A-z0-9]*,' $infile | grep -v = | sed 's: *sd_\([^,]*\).*: \"\1\",:' >>$ofile
|
||||
cat <<FOO >> $ofile
|
||||
};
|
||||
|
||||
/* Plex state texts */
|
||||
char *plexstatetext [] =
|
||||
{
|
||||
FOO
|
||||
egrep -e 'plex_[A-z0-9]*,' $infile | grep -v = | sed 's: *plex_\([^,]*\).*: \"\1\",:' >>$ofile
|
||||
cat <<FOO >> $ofile
|
||||
};
|
||||
|
||||
/* Volume state texts */
|
||||
char *volstatetext [] =
|
||||
{
|
||||
FOO
|
||||
egrep -e 'volume_[A-z0-9]*,' $infile | grep -v = | sed 's: *volume_\([^,]*\).*: \"\1\",:' >>$ofile
|
||||
cat <<FOO >> $ofile
|
||||
};
|
||||
FOO
|
@ -1,273 +0,0 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: request.h,v 1.22 2003/04/24 04:37:08 grog Exp $
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
/* Information needed to set up a transfer */
|
||||
|
||||
enum xferinfo {
|
||||
XFR_NORMAL_READ = 1,
|
||||
XFR_NORMAL_WRITE = 2, /* write request in normal mode */
|
||||
XFR_RECOVERY_READ = 4,
|
||||
XFR_DEGRADED_WRITE = 8,
|
||||
XFR_PARITYLESS_WRITE = 0x10,
|
||||
XFR_NO_PARITY_STRIPE = 0x20, /* parity stripe is not available */
|
||||
XFR_DATA_BLOCK = 0x40, /* data block in request */
|
||||
XFR_PARITY_BLOCK = 0x80, /* parity block in request */
|
||||
XFR_BAD_SUBDISK = 0x100, /* this subdisk is dead */
|
||||
XFR_MALLOCED = 0x200, /* this buffer is malloced */
|
||||
#ifdef VINUMDEBUG
|
||||
XFR_PHASE2 = 0x800, /* documentation only: 2nd phase write */
|
||||
#endif
|
||||
XFR_REVIVECONFLICT = 0x1000, /* possible conflict with a revive operation */
|
||||
XFR_BUFLOCKED = 0x2000, /* BUF_LOCK performed on this buffer */
|
||||
XFR_COPYBUF = 0x4000, /* data buffer was copied */
|
||||
/* operations that need a parity block */
|
||||
XFR_PARITYOP = (XFR_NORMAL_WRITE | XFR_RECOVERY_READ | XFR_DEGRADED_WRITE),
|
||||
/* operations that use the group parameters */
|
||||
XFR_GROUPOP = (XFR_DEGRADED_WRITE | XFR_RECOVERY_READ),
|
||||
/* operations that that use the data parameters */
|
||||
XFR_DATAOP = (XFR_NORMAL_READ | XFR_NORMAL_WRITE | XFR_PARITYLESS_WRITE),
|
||||
/* operations requiring read before write */
|
||||
XFR_RBW = (XFR_NORMAL_WRITE | XFR_DEGRADED_WRITE),
|
||||
/* operations that need a malloced buffer */
|
||||
XFR_NEEDS_MALLOC = (XFR_NORMAL_WRITE | XFR_RECOVERY_READ | XFR_DEGRADED_WRITE)
|
||||
};
|
||||
|
||||
/*
|
||||
* Describe one low-level request, part of a
|
||||
* high-level request. This is an extended
|
||||
* struct buf buffer, and the first element
|
||||
* *must* be a struct buf. We pass this
|
||||
* structure to the I/O routines instead of a
|
||||
* struct buf in order to be able to locate the
|
||||
* high-level request when it completes.
|
||||
*
|
||||
* All offsets and lengths are in sectors.
|
||||
*/
|
||||
|
||||
struct rqelement {
|
||||
struct buf b; /* buf structure */
|
||||
struct rqgroup *rqg; /* pointer to our group */
|
||||
/* Information about the transfer */
|
||||
daddr_t sdoffset; /* offset in subdisk */
|
||||
int useroffset; /* offset in user buffer of normal data */
|
||||
/*
|
||||
* dataoffset and datalen refer to "individual" data
|
||||
* transfers which involve only this drive (normal read,
|
||||
* parityless write) and also degraded write.
|
||||
*
|
||||
* groupoffset and grouplen refer to the other "group"
|
||||
* operations (normal write, recovery read) which involve
|
||||
* more than one drive. Both the offsets are relative to
|
||||
* the start of the local buffer.
|
||||
*/
|
||||
int dataoffset; /* offset in buffer of the normal data */
|
||||
int groupoffset; /* offset in buffer of group data */
|
||||
short datalen; /* length of normal data (sectors) */
|
||||
short grouplen; /* length of group data (sectors) */
|
||||
short buflen; /* total buffer length to allocate */
|
||||
short flags; /* really enum xferinfo (see above) */
|
||||
/* Ways to find other components */
|
||||
short sdno; /* subdisk number */
|
||||
short driveno; /* drive number */
|
||||
struct timeval launchtime; /* time of launch, for info function */
|
||||
};
|
||||
|
||||
/*
|
||||
* A group of requests built to satisfy an I/O
|
||||
* transfer on a single plex.
|
||||
*/
|
||||
struct rqgroup {
|
||||
struct rqgroup *next; /* pointer to next group */
|
||||
struct request *rq; /* pointer to the request */
|
||||
short count; /* number of requests in this group */
|
||||
short active; /* and number active */
|
||||
short plexno; /* index of plex */
|
||||
int badsdno; /* index of bad subdisk or -1 */
|
||||
enum xferinfo flags; /* description of transfer */
|
||||
struct rangelock *lock; /* lock for this transfer */
|
||||
daddr_t lockbase; /* and lock address */
|
||||
struct rqelement rqe[0]; /* and the elements of this request */
|
||||
};
|
||||
|
||||
/*
|
||||
* Describe one high-level request and the
|
||||
* work we have to do to satisfy it.
|
||||
*/
|
||||
struct request {
|
||||
struct buf *bp; /* pointer to the high-level request */
|
||||
caddr_t save_data; /* for copied write buffers */
|
||||
enum xferinfo flags;
|
||||
union {
|
||||
int volno; /* volume index */
|
||||
int plexno; /* or plex index */
|
||||
} volplex;
|
||||
int error; /* current error indication */
|
||||
int sdno; /* reviving subdisk (XFR_REVIVECONFLICT) */
|
||||
short isplex; /* set if this is a plex request */
|
||||
short active; /* number of subrequests still active */
|
||||
struct rqgroup *rqg; /* pointer to the first group of requests */
|
||||
struct rqgroup *lrqg; /* and to the last group of requests */
|
||||
struct request *next; /* link of waiting requests */
|
||||
};
|
||||
|
||||
/*
|
||||
* Extended buffer header for subdisk I/O. Includes
|
||||
* a pointer to the user I/O request.
|
||||
*/
|
||||
struct sdbuf {
|
||||
struct buf b; /* our buffer */
|
||||
struct buf *bp; /* and pointer to parent */
|
||||
short driveno; /* drive index */
|
||||
short sdno; /* and subdisk index */
|
||||
};
|
||||
|
||||
/*
|
||||
* Values returned by rqe and friends. Be careful
|
||||
* with these: they are in order of increasing
|
||||
* seriousness. Some routines check for
|
||||
* > REQUEST_RECOVERED to indicate a failed request. XXX
|
||||
*/
|
||||
enum requeststatus {
|
||||
REQUEST_OK, /* request built OK */
|
||||
REQUEST_RECOVERED, /* request OK, but involves RAID5 recovery */
|
||||
REQUEST_DEGRADED, /* parts of request failed */
|
||||
REQUEST_EOF, /* parts of request failed: outside plex */
|
||||
REQUEST_DOWN, /* all of request failed: subdisk(s) down */
|
||||
REQUEST_ENOMEM /* all of request failed: ran out of memory */
|
||||
};
|
||||
|
||||
#ifdef VINUMDEBUG
|
||||
/* Trace entry for request info (DEBUG_LASTREQS) */
|
||||
enum rqinfo_type {
|
||||
loginfo_unused, /* never been used */
|
||||
loginfo_user_bp, /* this is the bp when strategy is called */
|
||||
loginfo_user_bpl, /* and this is the bp at launch time */
|
||||
loginfo_rqe, /* user RQE */
|
||||
loginfo_iodone, /* iodone */
|
||||
loginfo_raid5_data, /* write RAID-5 data block */
|
||||
loginfo_raid5_parity, /* write RAID-5 parity block */
|
||||
loginfo_sdio, /* subdisk I/O */
|
||||
loginfo_sdiol, /* subdisk I/O launch */
|
||||
loginfo_sdiodone, /* subdisk iodone */
|
||||
loginfo_lockwait, /* wait for range lock */
|
||||
loginfo_lock, /* lock range */
|
||||
loginfo_unlock, /* unlock range */
|
||||
};
|
||||
|
||||
/*
|
||||
* This is the rangelock structure with an added
|
||||
* buffer pointer and plex number. We don't need
|
||||
* the plex number for the locking protocol, but
|
||||
* it does help a lot when logging.
|
||||
*/
|
||||
struct rangelockinfo {
|
||||
daddr_t stripe; /* address + 1 of the range being locked */
|
||||
struct buf *bp; /* user's buffer pointer */
|
||||
int plexno;
|
||||
};
|
||||
|
||||
union rqinfou { /* info to pass to logrq */
|
||||
struct buf *bp;
|
||||
struct rqelement *rqe; /* address of request, for correlation */
|
||||
struct rangelockinfo *lockinfo;
|
||||
};
|
||||
|
||||
struct rqinfo {
|
||||
enum rqinfo_type type; /* kind of event */
|
||||
struct timeval timestamp; /* time it happened */
|
||||
struct buf *bp; /* point to user buffer */
|
||||
int devmajor; /* major and minor device info */
|
||||
int devminor;
|
||||
union {
|
||||
struct buf b; /* yup, the *whole* buffer header */
|
||||
struct rqelement rqe; /* and the whole rqe */
|
||||
struct rangelock lockinfo;
|
||||
} info;
|
||||
};
|
||||
|
||||
#define RQINFO_SIZE 128 /* number of info slots in buffer */
|
||||
|
||||
void logrq(enum rqinfo_type type, union rqinfou info, struct buf *ubp);
|
||||
#endif
|
||||
|
||||
/* Structures for the daemon */
|
||||
|
||||
/* types of request to the daemon */
|
||||
enum daemonrq {
|
||||
daemonrq_none, /* dummy to catch bugs */
|
||||
daemonrq_ioerror, /* error occurred on I/O */
|
||||
daemonrq_saveconfig, /* save configuration */
|
||||
daemonrq_return, /* return to userland */
|
||||
daemonrq_ping, /* show sign of life */
|
||||
daemonrq_init, /* initialize a plex */
|
||||
daemonrq_revive, /* revive a subdisk */
|
||||
daemonrq_closedrive, /* close a drive */
|
||||
};
|
||||
|
||||
/* info field for daemon requests */
|
||||
union daemoninfo { /* and the request information */
|
||||
struct request *rq; /* for daemonrq_ioerror */
|
||||
struct sd *sd; /* for daemonrq_revive */
|
||||
struct plex *plex; /* for daemonrq_init */
|
||||
struct drive *drive; /* for daemonrq_closedrive */
|
||||
int nothing; /* for passing NULL */
|
||||
};
|
||||
|
||||
struct daemonq {
|
||||
struct daemonq *next; /* pointer to next element in queue */
|
||||
enum daemonrq type; /* type of request */
|
||||
int privateinuse; /* private element, being used */
|
||||
union daemoninfo info; /* and the request information */
|
||||
};
|
||||
|
||||
void queue_daemon_request(enum daemonrq type, union daemoninfo info);
|
||||
|
||||
extern int daemon_options;
|
||||
|
||||
enum daemon_option {
|
||||
daemon_verbose = 1, /* talk about what we're doing */
|
||||
daemon_stopped = 2,
|
||||
daemon_noupdate = 4, /* don't update the disk config, for recovery */
|
||||
};
|
||||
|
||||
void freerq(struct request *rq);
|
||||
void unlockrange(int plexno, struct rangelock *);
|
||||
/* Local Variables: */
|
||||
/* fill-column: 50 */
|
||||
/* End: */
|
@ -1,91 +0,0 @@
|
||||
/* Created by ./makestatetext on Wed Jan 5 10:05:30 CST 2000. Do not edit */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
/* Drive state texts */
|
||||
char *drivestatetext[] =
|
||||
{
|
||||
"unallocated",
|
||||
"referenced",
|
||||
"down",
|
||||
"up",
|
||||
};
|
||||
|
||||
/* Subdisk state texts */
|
||||
char *sdstatetext[] =
|
||||
{
|
||||
"unallocated",
|
||||
"uninit",
|
||||
"referenced",
|
||||
"init",
|
||||
"empty",
|
||||
"initializing",
|
||||
"initialized",
|
||||
"obsolete",
|
||||
"stale",
|
||||
"crashed",
|
||||
"down",
|
||||
"reviving",
|
||||
"reborn",
|
||||
"up",
|
||||
};
|
||||
|
||||
/* Plex state texts */
|
||||
char *plexstatetext[] =
|
||||
{
|
||||
"unallocated",
|
||||
"referenced",
|
||||
"init",
|
||||
"faulty",
|
||||
"down",
|
||||
"initializing",
|
||||
"corrupt",
|
||||
"degraded",
|
||||
"flaky",
|
||||
"up",
|
||||
};
|
||||
|
||||
/* Volume state texts */
|
||||
char *volstatetext[] =
|
||||
{
|
||||
"unallocated",
|
||||
"uninit",
|
||||
"down",
|
||||
"up",
|
||||
};
|
@ -1,542 +0,0 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* Written by Greg Lehey
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: vinum.c,v 1.44 2003/05/23 00:50:55 grog Exp grog $
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#define STATIC static /* nothing while we're testing */
|
||||
|
||||
#include <dev/vinum/vinumhdr.h>
|
||||
#include <sys/sysproto.h> /* for sync(2) */
|
||||
#ifdef VINUMDEBUG
|
||||
#include <sys/reboot.h>
|
||||
int debug = 0; /* debug flags */
|
||||
extern int total_malloced;
|
||||
extern int malloccount;
|
||||
extern struct mc malloced[];
|
||||
#endif
|
||||
#include <dev/vinum/request.h>
|
||||
|
||||
struct cdevsw vinum_cdevsw = {
|
||||
.d_version = D_VERSION,
|
||||
.d_open = vinumopen,
|
||||
.d_close = vinumclose,
|
||||
.d_read = physread,
|
||||
.d_write = physwrite,
|
||||
.d_ioctl = vinumioctl,
|
||||
.d_strategy = vinumstrategy,
|
||||
.d_name = "vinum",
|
||||
.d_flags = D_DISK | D_NEEDGIANT
|
||||
};
|
||||
|
||||
/* Called by main() during pseudo-device attachment. */
|
||||
void vinumattach(void *);
|
||||
STATIC int vinum_modevent(module_t mod, modeventtype_t type, void *unused);
|
||||
STATIC void vinum_clone(void *arg, char *name, int namelen, struct cdev ** dev);
|
||||
|
||||
struct _vinum_conf vinum_conf; /* configuration information */
|
||||
|
||||
struct cdev *vinum_daemon_dev;
|
||||
struct cdev *vinum_super_dev;
|
||||
|
||||
static eventhandler_tag dev_clone_tag;
|
||||
|
||||
/*
|
||||
* Mutexes for plex synchronization. Ideally each plex
|
||||
* should have its own mutex, but the fact that the plex
|
||||
* struct can move makes that very complicated. Instead,
|
||||
* have plexes use share these mutexes based on modulo plex
|
||||
* number.
|
||||
*/
|
||||
struct mtx plexmutex[PLEXMUTEXES];
|
||||
|
||||
/*
|
||||
* Called by main() during pseudo-device attachment. All we need
|
||||
* to do is allocate enough space for devices to be configured later, and
|
||||
* add devsw entries.
|
||||
*/
|
||||
void
|
||||
vinumattach(void *dummy)
|
||||
{
|
||||
char *envp;
|
||||
int i;
|
||||
#define MUTEXNAMELEN 16
|
||||
char mutexname[MUTEXNAMELEN];
|
||||
#if PLEXMUTEXES > 10000
|
||||
#error Increase size of MUTEXNAMELEN
|
||||
#endif
|
||||
/* modload should prevent multiple loads, so this is worth a panic */
|
||||
if ((vinum_conf.flags & VF_LOADED) != 0)
|
||||
panic("vinum: already loaded");
|
||||
|
||||
log(LOG_INFO, "vinum: loaded\n");
|
||||
#ifdef VINUMDEBUG
|
||||
vinum_conf.flags |= VF_LOADED | VF_HASDEBUG; /* we're loaded now, and we support debug */
|
||||
#else
|
||||
vinum_conf.flags |= VF_LOADED; /* we're loaded now */
|
||||
#endif
|
||||
|
||||
daemonq = NULL; /* initialize daemon's work queue */
|
||||
dqend = NULL;
|
||||
|
||||
vinum_daemon_dev = make_dev(&vinum_cdevsw,
|
||||
VINUM_DAEMON_MINOR,
|
||||
UID_ROOT,
|
||||
GID_WHEEL,
|
||||
S_IRUSR | S_IWUSR,
|
||||
"vinum/controld");
|
||||
vinum_super_dev = make_dev(&vinum_cdevsw,
|
||||
VINUM_SUPERDEV_MINOR,
|
||||
UID_ROOT,
|
||||
GID_WHEEL,
|
||||
S_IRUSR | S_IWUSR,
|
||||
"vinum/control");
|
||||
|
||||
vinum_conf.version = VINUMVERSION; /* note what version we are */
|
||||
|
||||
/* allocate space: drives... */
|
||||
DRIVE = (struct drive *) Malloc(sizeof(struct drive) * INITIAL_DRIVES);
|
||||
CHECKALLOC(DRIVE, "vinum: no memory\n");
|
||||
bzero(DRIVE, sizeof(struct drive) * INITIAL_DRIVES);
|
||||
vinum_conf.drives_allocated = INITIAL_DRIVES; /* number of drive slots allocated */
|
||||
vinum_conf.drives_used = 0; /* and number in use */
|
||||
|
||||
/* volumes, ... */
|
||||
VOL = (struct volume *) Malloc(sizeof(struct volume) * INITIAL_VOLUMES);
|
||||
CHECKALLOC(VOL, "vinum: no memory\n");
|
||||
bzero(VOL, sizeof(struct volume) * INITIAL_VOLUMES);
|
||||
vinum_conf.volumes_allocated = INITIAL_VOLUMES; /* number of volume slots allocated */
|
||||
vinum_conf.volumes_used = 0; /* and number in use */
|
||||
|
||||
/* plexes, ... */
|
||||
PLEX = (struct plex *) Malloc(sizeof(struct plex) * INITIAL_PLEXES);
|
||||
CHECKALLOC(PLEX, "vinum: no memory\n");
|
||||
bzero(PLEX, sizeof(struct plex) * INITIAL_PLEXES);
|
||||
vinum_conf.plexes_allocated = INITIAL_PLEXES; /* number of plex slots allocated */
|
||||
vinum_conf.plexes_used = 0; /* and number in use */
|
||||
|
||||
for (i = 0; i < PLEXMUTEXES; i++) {
|
||||
snprintf(mutexname, MUTEXNAMELEN, "vinumplex%d", i);
|
||||
mtx_init(&plexmutex[i], mutexname, "plex", MTX_DEF);
|
||||
}
|
||||
|
||||
/* and subdisks */
|
||||
SD = (struct sd *) Malloc(sizeof(struct sd) * INITIAL_SUBDISKS);
|
||||
CHECKALLOC(SD, "vinum: no memory\n");
|
||||
bzero(SD, sizeof(struct sd) * INITIAL_SUBDISKS);
|
||||
vinum_conf.subdisks_allocated = INITIAL_SUBDISKS; /* number of sd slots allocated */
|
||||
vinum_conf.subdisks_used = 0; /* and number in use */
|
||||
dev_clone_tag = EVENTHANDLER_REGISTER(dev_clone, vinum_clone, 0, 1000);
|
||||
|
||||
/*
|
||||
* See if the loader has passed us any of the autostart
|
||||
* options.
|
||||
*/
|
||||
envp = NULL;
|
||||
if ((envp = getenv("vinum.autostart")) != NULL) { /* start all drives now */
|
||||
vinum_scandisk(NULL);
|
||||
freeenv(envp);
|
||||
} else if ((envp = getenv("vinum.drives")) != NULL) {
|
||||
vinum_scandisk(envp);
|
||||
freeenv(envp);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if we have anything open. If confopen is != 0,
|
||||
* that goes for the super device as well, otherwise
|
||||
* only for volumes.
|
||||
*
|
||||
* Return 0 if not inactive, 1 if inactive.
|
||||
*/
|
||||
int
|
||||
vinum_inactive(int confopen)
|
||||
{
|
||||
int i;
|
||||
int can_do = 1; /* assume we can do it */
|
||||
|
||||
if (confopen && (vinum_conf.flags & VF_OPEN)) /* open by vinum(8)? */
|
||||
return 0; /* can't do it while we're open */
|
||||
lock_config();
|
||||
for (i = 0; i < vinum_conf.volumes_allocated; i++) {
|
||||
if ((VOL[i].state > volume_down)
|
||||
&& (VOL[i].flags & VF_OPEN)) { /* volume is open */
|
||||
can_do = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
unlock_config();
|
||||
return can_do;
|
||||
}
|
||||
|
||||
/*
|
||||
* Free all structures.
|
||||
* If cleardrive is 0, save the configuration; otherwise
|
||||
* remove the configuration from the drive.
|
||||
*
|
||||
* Before coming here, ensure that no volumes are open.
|
||||
*/
|
||||
void
|
||||
free_vinum(int cleardrive)
|
||||
{
|
||||
int i;
|
||||
int drives_allocated = vinum_conf.drives_allocated;
|
||||
|
||||
while ((vinum_conf.flags & (VF_STOPPING | VF_DAEMONOPEN))
|
||||
== (VF_STOPPING | VF_DAEMONOPEN)) { /* at least one daemon open, we're stopping */
|
||||
queue_daemon_request(daemonrq_return, (union daemoninfo) 0); /* stop the daemon */
|
||||
tsleep(&vinumclose, PUSER, "vstop", 1); /* and wait for it */
|
||||
}
|
||||
if (DRIVE != NULL) {
|
||||
if (cleardrive) { /* remove the vinum config */
|
||||
for (i = 0; i < drives_allocated; i++)
|
||||
remove_drive(i); /* remove the drive */
|
||||
} else { /* keep the config */
|
||||
for (i = 0; i < drives_allocated; i++)
|
||||
free_drive(&DRIVE[i]); /* close files and things */
|
||||
}
|
||||
Free(DRIVE);
|
||||
}
|
||||
if (SD != NULL) {
|
||||
for (i = 0; i < vinum_conf.subdisks_allocated; i++) {
|
||||
struct sd *sd = &SD[i];
|
||||
|
||||
if (sd->state != sd_unallocated)
|
||||
free_sd(i);
|
||||
}
|
||||
Free(SD);
|
||||
}
|
||||
if (PLEX != NULL) {
|
||||
for (i = 0; i < vinum_conf.plexes_allocated; i++) {
|
||||
struct plex *plex = &PLEX[i];
|
||||
|
||||
if (plex->state != plex_unallocated) /* we have real data there */
|
||||
free_plex(i);
|
||||
}
|
||||
Free(PLEX);
|
||||
}
|
||||
if (VOL != NULL) {
|
||||
for (i = 0; i < vinum_conf.volumes_allocated; i++) {
|
||||
struct volume *volume = &VOL[i];
|
||||
|
||||
if (volume->state != volume_unallocated)
|
||||
free_volume(i);
|
||||
}
|
||||
Free(VOL);
|
||||
}
|
||||
bzero(&vinum_conf, sizeof(vinum_conf));
|
||||
vinum_conf.version = VINUMVERSION; /* reinstate version number */
|
||||
}
|
||||
|
||||
STATIC int
|
||||
vinum_modevent(module_t mod, modeventtype_t type, void *unused)
|
||||
{
|
||||
struct sync_args dummyarg =
|
||||
{0};
|
||||
int i;
|
||||
|
||||
switch (type) {
|
||||
case MOD_LOAD:
|
||||
vinumattach(NULL);
|
||||
return 0; /* OK */
|
||||
case MOD_UNLOAD:
|
||||
if (!vinum_inactive(1)) /* is anything open? */
|
||||
return EBUSY; /* yes, we can't do it */
|
||||
vinum_conf.flags |= VF_STOPPING; /* note that we want to stop */
|
||||
sync(curthread, &dummyarg); /* write out buffers */
|
||||
free_vinum(0); /* clean up */
|
||||
#ifdef VINUMDEBUG
|
||||
if (total_malloced) {
|
||||
int i;
|
||||
#ifdef INVARIANTS
|
||||
int *poke;
|
||||
#endif
|
||||
|
||||
for (i = 0; i < malloccount; i++) {
|
||||
if (debug & DEBUG_WARNINGS) /* want to hear about them */
|
||||
log(LOG_WARNING,
|
||||
"vinum: exiting with %d bytes malloced from %s:%d\n",
|
||||
malloced[i].size,
|
||||
malloced[i].file,
|
||||
malloced[i].line);
|
||||
#ifdef INVARIANTS
|
||||
poke = &((int *) malloced[i].address)
|
||||
[malloced[i].size / (2 * sizeof(int))]; /* middle of the area */
|
||||
if (*poke == 0xdeadc0de) /* already freed */
|
||||
log(LOG_ERR,
|
||||
"vinum: exiting with malloc table inconsistency at %p from %s:%d\n",
|
||||
malloced[i].address,
|
||||
malloced[i].file,
|
||||
malloced[i].line);
|
||||
#endif
|
||||
Free(malloced[i].address);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
destroy_dev(vinum_daemon_dev); /* daemon device */
|
||||
destroy_dev(vinum_super_dev);
|
||||
for (i = 0; i < PLEXMUTEXES; i++)
|
||||
mtx_destroy(&plexmutex[i]);
|
||||
log(LOG_INFO, "vinum: unloaded\n"); /* tell the world */
|
||||
EVENTHANDLER_DEREGISTER(dev_clone, dev_clone_tag);
|
||||
return 0;
|
||||
default:
|
||||
return EOPNOTSUPP;
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static moduledata_t vinum_mod =
|
||||
{
|
||||
"vinum",
|
||||
(modeventhand_t) vinum_modevent,
|
||||
0
|
||||
};
|
||||
DECLARE_MODULE(vinum, vinum_mod, SI_SUB_RAID, SI_ORDER_MIDDLE);
|
||||
|
||||
/* ARGSUSED */
|
||||
/* Open a vinum object */
|
||||
int
|
||||
vinumopen(struct cdev *dev,
|
||||
int flags,
|
||||
int fmt,
|
||||
struct thread *td)
|
||||
{
|
||||
int error;
|
||||
unsigned int index;
|
||||
struct volume *vol;
|
||||
struct plex *plex;
|
||||
struct sd *sd;
|
||||
int devminor; /* minor number */
|
||||
|
||||
devminor = minor(dev);
|
||||
error = 0;
|
||||
/* First, decide what we're looking at */
|
||||
switch (DEVTYPE(dev)) {
|
||||
case VINUM_VOLUME_TYPE:
|
||||
/*
|
||||
* The super device and daemon device are the last two
|
||||
* volume numbers, so check for them first.
|
||||
*/
|
||||
if ((devminor == VINUM_DAEMON_MINOR) /* daemon device */
|
||||
||(devminor == VINUM_SUPERDEV_MINOR)) { /* or normal super device */
|
||||
error = suser(td); /* are we root? */
|
||||
|
||||
if (error == 0) { /* yes, can do */
|
||||
if (devminor == VINUM_DAEMON_MINOR) /* daemon device */
|
||||
vinum_conf.flags |= VF_DAEMONOPEN; /* we're open */
|
||||
else /* superdev */
|
||||
vinum_conf.flags |= VF_OPEN; /* we're open */
|
||||
}
|
||||
return error;
|
||||
}
|
||||
/* Must be a real volume. Check. */
|
||||
index = Volno(dev);
|
||||
if (index >= vinum_conf.volumes_allocated)
|
||||
return ENXIO; /* no such device */
|
||||
vol = &VOL[index];
|
||||
|
||||
switch (vol->state) {
|
||||
case volume_unallocated:
|
||||
case volume_uninit:
|
||||
return ENXIO;
|
||||
|
||||
case volume_up:
|
||||
vol->flags |= VF_OPEN; /* note we're open */
|
||||
return 0;
|
||||
|
||||
case volume_down:
|
||||
return EIO;
|
||||
|
||||
default:
|
||||
return EINVAL;
|
||||
}
|
||||
|
||||
case VINUM_PLEX_TYPE:
|
||||
index = Plexno(dev); /* get plex index in vinum_conf */
|
||||
if (index >= vinum_conf.plexes_allocated)
|
||||
return ENXIO; /* no such device */
|
||||
plex = &PLEX[index];
|
||||
|
||||
switch (plex->state) {
|
||||
case plex_unallocated:
|
||||
return ENXIO;
|
||||
|
||||
case plex_referenced:
|
||||
return EINVAL;
|
||||
|
||||
default:
|
||||
plex->flags |= VF_OPEN; /* note we're open */
|
||||
return 0;
|
||||
}
|
||||
|
||||
case VINUM_SD_TYPE:
|
||||
case VINUM_SD2_TYPE:
|
||||
index = Sdno(dev); /* get the subdisk number */
|
||||
if (index >= vinum_conf.subdisks_allocated) /* not a valid SD entry */
|
||||
return ENXIO; /* no such device */
|
||||
sd = &SD[index];
|
||||
|
||||
/*
|
||||
* Opening a subdisk is always a special operation, so
|
||||
* we ignore the state as long as it represents a real
|
||||
* subdisk.
|
||||
*/
|
||||
switch (sd->state) {
|
||||
case sd_unallocated:
|
||||
return ENXIO;
|
||||
|
||||
case sd_uninit:
|
||||
case sd_referenced:
|
||||
return EINVAL;
|
||||
|
||||
default:
|
||||
sd->flags |= VF_OPEN; /* note we're open */
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return 0; /* to keep the compiler happy */
|
||||
}
|
||||
|
||||
/* ARGSUSED */
|
||||
int
|
||||
vinumclose(struct cdev *dev,
|
||||
int flags,
|
||||
int fmt,
|
||||
struct thread *td)
|
||||
{
|
||||
unsigned int index;
|
||||
struct volume *vol;
|
||||
int devminor;
|
||||
|
||||
devminor = minor(dev);
|
||||
/* First, decide what we're looking at */
|
||||
switch (DEVTYPE(dev)) {
|
||||
case VINUM_VOLUME_TYPE:
|
||||
/*
|
||||
* The super device and daemon device are the last two
|
||||
* volume numbers, so check for them first.
|
||||
*/
|
||||
if ((devminor == VINUM_DAEMON_MINOR) /* daemon device */
|
||||
||(devminor == VINUM_SUPERDEV_MINOR)) { /* or normal super device */
|
||||
/*
|
||||
* don't worry about whether we're root:
|
||||
* nobody else would get this far.
|
||||
*/
|
||||
if (devminor == VINUM_SUPERDEV_MINOR) /* normal superdev */
|
||||
vinum_conf.flags &= ~VF_OPEN; /* no longer open */
|
||||
else { /* the daemon device */
|
||||
vinum_conf.flags &= ~VF_DAEMONOPEN; /* no longer open */
|
||||
if (vinum_conf.flags & VF_STOPPING) /* we're trying to stop, */
|
||||
wakeup(&vinumclose); /* we can continue now */
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
/* Real volume */
|
||||
index = Volno(dev);
|
||||
if (index >= vinum_conf.volumes_allocated)
|
||||
return ENXIO; /* no such device */
|
||||
vol = &VOL[index];
|
||||
|
||||
switch (vol->state) {
|
||||
case volume_unallocated:
|
||||
case volume_uninit:
|
||||
return ENXIO;
|
||||
|
||||
case volume_up:
|
||||
vol->flags &= ~VF_OPEN; /* reset our flags */
|
||||
return 0;
|
||||
|
||||
case volume_down:
|
||||
return EIO;
|
||||
|
||||
default:
|
||||
return EINVAL;
|
||||
}
|
||||
|
||||
case VINUM_PLEX_TYPE:
|
||||
if (Volno(dev) >= vinum_conf.volumes_allocated)
|
||||
return ENXIO;
|
||||
index = Plexno (dev);
|
||||
if (index >= vinum_conf.plexes_allocated) /* no such plex */
|
||||
return ENXIO;
|
||||
PLEX [index].flags &= ~VF_OPEN; /* no longer open */
|
||||
return 0;
|
||||
|
||||
case VINUM_SD_TYPE:
|
||||
if ((Volno(dev) >= vinum_conf.volumes_allocated) || /* no such volume */
|
||||
(Plexno(dev) >= vinum_conf.plexes_allocated)) /* or no such plex */
|
||||
return ENXIO; /* no such device */
|
||||
index = Sdno (dev);
|
||||
if (index >= vinum_conf.subdisks_allocated) /* no such sd */
|
||||
return ENXIO;
|
||||
SD [index].flags &= ~VF_OPEN; /* no longer open */
|
||||
return 0;
|
||||
|
||||
|
||||
default:
|
||||
return ENODEV; /* don't know what to do with these */
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
vinum_clone(void *arg, char *name, int namelen, struct cdev ** dev)
|
||||
{
|
||||
struct volume *vol;
|
||||
int i;
|
||||
|
||||
if (*dev != NULL)
|
||||
return;
|
||||
if (strncmp(name, "vinum/", sizeof("vinum/") - 1) != 0)
|
||||
return;
|
||||
|
||||
name += sizeof("vinum/") - 1;
|
||||
if ((i = find_volume(name, 0)) == -1)
|
||||
return;
|
||||
|
||||
vol = &VOL[i];
|
||||
*dev = vol->dev;
|
||||
}
|
||||
|
||||
|
||||
/* Local Variables: */
|
||||
/* fill-column: 60 */
|
||||
/* End: */
|
File diff suppressed because it is too large
Load Diff
@ -1,283 +0,0 @@
|
||||
/* daemon.c: kernel part of Vinum daemon */
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: vinumdaemon.c,v 1.8 2000/01/03 05:22:03 grog Exp grog $
|
||||
*/
|
||||
|
||||
#include <dev/vinum/vinumhdr.h>
|
||||
#include <dev/vinum/request.h>
|
||||
|
||||
#ifdef VINUMDEBUG
|
||||
#include <sys/reboot.h>
|
||||
#endif
|
||||
|
||||
/* declarations */
|
||||
void recover_io(struct request *rq);
|
||||
|
||||
int daemon_options = 0; /* options */
|
||||
int daemonpid; /* PID of daemon */
|
||||
struct daemonq *daemonq; /* daemon's work queue */
|
||||
struct daemonq *dqend; /* and the end of the queue */
|
||||
|
||||
/*
|
||||
* We normally call Malloc to get a queue element. In interrupt
|
||||
* context, we can't guarantee that we'll get one, since we're not
|
||||
* allowed to wait. If malloc fails, use one of these elements.
|
||||
*/
|
||||
|
||||
#define INTQSIZE 4
|
||||
struct daemonq intq[INTQSIZE]; /* queue elements for interrupt context */
|
||||
struct daemonq *intqp; /* and pointer in it */
|
||||
|
||||
void
|
||||
vinum_daemon(void)
|
||||
{
|
||||
int s;
|
||||
struct daemonq *request;
|
||||
|
||||
PROC_LOCK(curproc);
|
||||
curproc->p_flag |= P_SYSTEM; /* we're a system process */
|
||||
mtx_lock_spin(&sched_lock);
|
||||
curproc->p_sflag |= PS_INMEM;
|
||||
mtx_unlock_spin(&sched_lock);
|
||||
PROC_UNLOCK(curproc);
|
||||
daemon_save_config(); /* start by saving the configuration */
|
||||
daemonpid = curproc->p_pid; /* mark our territory */
|
||||
while (1) {
|
||||
tsleep(&vinum_daemon, PRIBIO, "vinum", 0); /* wait for something to happen */
|
||||
|
||||
/*
|
||||
* It's conceivable that, as the result of an
|
||||
* I/O error, we'll be out of action long
|
||||
* enough that another daemon gets started.
|
||||
* That's OK, just give up gracefully.
|
||||
*/
|
||||
if (curproc->p_pid != daemonpid) { /* we've been ousted in our sleep */
|
||||
if (daemon_options & daemon_verbose)
|
||||
log(LOG_INFO, "vinum: abdicating\n");
|
||||
return;
|
||||
}
|
||||
while (daemonq != NULL) { /* we have work to do, */
|
||||
s = splhigh(); /* don't get interrupted here */
|
||||
request = daemonq; /* get the request */
|
||||
daemonq = daemonq->next; /* and detach it */
|
||||
if (daemonq == NULL) /* got to the end, */
|
||||
dqend = NULL; /* no end any more */
|
||||
splx(s);
|
||||
|
||||
switch (request->type) {
|
||||
/*
|
||||
* We had an I/O error on a request. Go through the
|
||||
* request and try to salvage it
|
||||
*/
|
||||
case daemonrq_ioerror:
|
||||
if (daemon_options & daemon_verbose) {
|
||||
struct request *rq = request->info.rq;
|
||||
|
||||
log(LOG_WARNING,
|
||||
"vinum: recovering I/O request: %p\n%s dev %d.%d, offset 0x%llx, length %ld\n",
|
||||
rq,
|
||||
rq->bp->b_iocmd == BIO_READ ? "Read" : "Write",
|
||||
major(rq->bp->b_dev),
|
||||
minor(rq->bp->b_dev),
|
||||
(long long)rq->bp->b_blkno,
|
||||
rq->bp->b_bcount);
|
||||
}
|
||||
recover_io(request->info.rq); /* the failed request */
|
||||
break;
|
||||
|
||||
/*
|
||||
* Write the config to disk. We could end up with
|
||||
* quite a few of these in a row. Only honour the
|
||||
* last one
|
||||
*/
|
||||
case daemonrq_saveconfig:
|
||||
if ((daemonq == NULL) /* no more requests */
|
||||
||(daemonq->type != daemonrq_saveconfig)) { /* or the next isn't the same */
|
||||
if (((daemon_options & daemon_noupdate) == 0) /* we're allowed to do it */
|
||||
&&((vinum_conf.flags & VF_READING_CONFIG) == 0)) { /* and we're not building the config now */
|
||||
/*
|
||||
* We obviously don't want to save a
|
||||
* partial configuration. Less obviously,
|
||||
* we don't need to do anything if we're
|
||||
* asked to write the config when we're
|
||||
* building it up, because we save it at
|
||||
* the end.
|
||||
*/
|
||||
if (daemon_options & daemon_verbose)
|
||||
log(LOG_INFO, "vinum: saving config\n");
|
||||
daemon_save_config(); /* save it */
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case daemonrq_return: /* been told to stop */
|
||||
if (daemon_options & daemon_verbose)
|
||||
log(LOG_INFO, "vinum: stopping\n");
|
||||
daemon_options |= daemon_stopped; /* note that we've stopped */
|
||||
Free(request);
|
||||
while (daemonq != NULL) { /* backed up requests, */
|
||||
request = daemonq; /* get the request */
|
||||
daemonq = daemonq->next; /* and detach it */
|
||||
Free(request); /* then free it */
|
||||
}
|
||||
wakeup(&vinumclose); /* and wake any waiting vinum(8)s */
|
||||
return;
|
||||
|
||||
case daemonrq_ping: /* tell the caller we're here */
|
||||
if (daemon_options & daemon_verbose)
|
||||
log(LOG_INFO, "vinum: ping reply\n");
|
||||
wakeup(&vinum_finddaemon); /* wake up the caller */
|
||||
break;
|
||||
|
||||
case daemonrq_closedrive: /* close a drive */
|
||||
close_drive(request->info.drive); /* do it */
|
||||
break;
|
||||
|
||||
case daemonrq_init: /* initialize a plex */
|
||||
/* XXX */
|
||||
case daemonrq_revive: /* revive a subdisk */
|
||||
/* XXX */
|
||||
/* FALLTHROUGH */
|
||||
default:
|
||||
log(LOG_WARNING, "Invalid request\n");
|
||||
break;
|
||||
}
|
||||
if (request->privateinuse) /* one of ours, */
|
||||
request->privateinuse = 0; /* no longer in use */
|
||||
else
|
||||
Free(request); /* return it */
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Recover a failed I/O operation.
|
||||
*
|
||||
* The correct way to do this is to examine the request and determine
|
||||
* how to recover each individual failure. In the case of a write,
|
||||
* this could be as simple as doing nothing: the defective drives may
|
||||
* already be down, and there may be nothing else to do. In case of
|
||||
* a read, it will be necessary to retry if there are alternative
|
||||
* copies of the data.
|
||||
*
|
||||
* The easy way (here) is just to reissue the request. This will take
|
||||
* a little longer, but nothing like as long as the failure will have
|
||||
* taken.
|
||||
*
|
||||
*/
|
||||
void
|
||||
recover_io(struct request *rq)
|
||||
{
|
||||
/*
|
||||
* This should read:
|
||||
*
|
||||
* vinumstrategy(rq->bp);
|
||||
*
|
||||
* Negotiate with phk to get it fixed.
|
||||
*/
|
||||
DEV_STRATEGY(rq->bp); /* reissue the command */
|
||||
}
|
||||
|
||||
/* Functions called to interface with the daemon */
|
||||
|
||||
/* queue a request for the daemon */
|
||||
void
|
||||
queue_daemon_request(enum daemonrq type, union daemoninfo info)
|
||||
{
|
||||
int s;
|
||||
|
||||
struct daemonq *qelt = (struct daemonq *) Malloc(sizeof(struct daemonq));
|
||||
|
||||
if (qelt == NULL) { /* malloc failed, we're prepared for that */
|
||||
/*
|
||||
* Take one of our spares. Give up if it's still in use; the only
|
||||
* message we're likely to get here is a 'drive failed' message,
|
||||
* and that'll come by again if we miss it.
|
||||
*/
|
||||
if (intqp->privateinuse) /* still in use? */
|
||||
return; /* yes, give up */
|
||||
qelt = intqp++;
|
||||
if (intqp == &intq[INTQSIZE]) /* got to the end, */
|
||||
intqp = intq; /* wrap around */
|
||||
qelt->privateinuse = 1; /* it's ours, and it's in use */
|
||||
} else
|
||||
qelt->privateinuse = 0;
|
||||
|
||||
qelt->next = NULL; /* end of the chain */
|
||||
qelt->type = type;
|
||||
qelt->info = info;
|
||||
s = splhigh();
|
||||
if (daemonq) { /* something queued already */
|
||||
dqend->next = qelt;
|
||||
dqend = qelt;
|
||||
} else { /* queue is empty, */
|
||||
daemonq = qelt; /* this is the whole queue */
|
||||
dqend = qelt;
|
||||
}
|
||||
splx(s);
|
||||
wakeup(&vinum_daemon); /* and give the dæmon a kick */
|
||||
}
|
||||
|
||||
/*
|
||||
* see if the daemon is running. Return 0 (no error)
|
||||
* if it is, ESRCH otherwise
|
||||
*/
|
||||
int
|
||||
vinum_finddaemon()
|
||||
{
|
||||
int result;
|
||||
|
||||
if (daemonpid != 0) { /* we think we have a daemon, */
|
||||
queue_daemon_request(daemonrq_ping, (union daemoninfo) 0); /* queue a ping */
|
||||
result = tsleep(&vinum_finddaemon, PUSER, "reap", 2 * hz);
|
||||
if (result == 0) /* yup, the daemon's up and running */
|
||||
return 0;
|
||||
}
|
||||
/* no daemon, or we couldn't talk to it: start it */
|
||||
vinum_daemon(); /* start the daemon */
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
vinum_setdaemonopts(int options)
|
||||
{
|
||||
daemon_options = options;
|
||||
return 0;
|
||||
}
|
@ -1,261 +0,0 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: vinumext.h,v 1.33 2003/05/23 00:57:48 grog Exp $
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
/* vinumext.h: external definitions */
|
||||
|
||||
/* *sigh* We still need this at the moment. */
|
||||
#ifdef _KERNEL
|
||||
extern struct _vinum_conf vinum_conf; /* configuration information */
|
||||
extern struct mtx plexmutex[]; /* mutexes for plexes to use */
|
||||
#else
|
||||
extern struct __vinum_conf vinum_conf; /* configuration information */
|
||||
#endif
|
||||
|
||||
#ifdef VINUMDEBUG
|
||||
extern int debug; /* debug flags */
|
||||
#endif
|
||||
|
||||
/* Physical read and write drive */
|
||||
#define read_drive(a, b, c, d) driveio (a, b, c, d, BIO_READ)
|
||||
#define write_drive(a, b, c, d) driveio (a, b, c, d, BIO_WRITE)
|
||||
|
||||
#define CHECKALLOC(ptr, msg) \
|
||||
if (ptr == NULL) \
|
||||
{ \
|
||||
printf (msg); \
|
||||
longjmp (command_fail, -1); \
|
||||
}
|
||||
#ifndef _KERNEL
|
||||
struct vnode;
|
||||
struct thread;
|
||||
#endif
|
||||
|
||||
#ifdef _KERNEL
|
||||
int vinum_inactive(int);
|
||||
void free_vinum(int);
|
||||
int give_sd_to_plex(int plexno, int sdno);
|
||||
void give_sd_to_drive(int sdno);
|
||||
int give_plex_to_volume(int, int, int);
|
||||
struct drive *check_drive(char *);
|
||||
enum drive_label_info read_drive_label(struct drive *, int);
|
||||
int parse_config(char *, struct keywordset *, int);
|
||||
int parse_user_config(char *cptr, struct keywordset *keyset);
|
||||
u_int64_t sizespec(char *spec);
|
||||
int volume_index(struct volume *volume);
|
||||
int plex_index(struct plex *plex);
|
||||
int sd_index(struct sd *sd);
|
||||
int drive_index(struct drive *drive);
|
||||
int my_plex(int volno, int plexno);
|
||||
int my_sd(int plexno, int sdno);
|
||||
int get_empty_drive(void);
|
||||
int find_drive(const char *name, int create);
|
||||
int find_drive_by_name(const char *devname, int create);
|
||||
int get_empty_sd(void);
|
||||
int find_subdisk(const char *name, int create);
|
||||
void return_drive_space(int driveno, int64_t offset, int length);
|
||||
void free_sd(int sdno);
|
||||
void free_volume(int volno);
|
||||
int get_empty_plex(void);
|
||||
int find_plex(const char *name, int create);
|
||||
void free_plex(int plexno);
|
||||
int get_empty_volume(void);
|
||||
int find_volume(const char *name, int create);
|
||||
void config_subdisk(int);
|
||||
void config_plex(int);
|
||||
void config_volume(int);
|
||||
void config_drive(int);
|
||||
void updateconfig(int);
|
||||
void update_sd_config(int sdno, int kernelstate);
|
||||
void update_plex_config(int plexno, int kernelstate);
|
||||
void update_volume_config(int volno);
|
||||
void update_config(void);
|
||||
void drive_io_done(struct buf *);
|
||||
void save_config(void);
|
||||
void daemon_save_config(void);
|
||||
void write_config(char *, int);
|
||||
int start_config(int);
|
||||
void finish_config(int);
|
||||
void remove(struct vinum_ioctl_msg *msg);
|
||||
void remove_drive_entry(int driveno, int force);
|
||||
void remove_sd_entry(int sdno, int force, int recurse);
|
||||
void remove_plex_entry(int plexno, int force, int recurse);
|
||||
void remove_volume_entry(int volno, int force, int recurse);
|
||||
|
||||
void checkdiskconfig(char *);
|
||||
int open_drive(struct drive *, struct thread *, int);
|
||||
void close_drive(struct drive *drive);
|
||||
void close_locked_drive(struct drive *drive);
|
||||
int driveio(struct drive *, char *, size_t, off_t, int);
|
||||
int set_drive_parms(struct drive *drive);
|
||||
int init_drive(struct drive *, int);
|
||||
/* void throw_rude_remark (int, struct _ioctl_reply *, char *, ...); XXX */
|
||||
void throw_rude_remark(int, char *,...);
|
||||
|
||||
void format_config(char *config, int len);
|
||||
void checkkernel(char *op);
|
||||
void free_drive(struct drive *drive);
|
||||
void down_drive(struct drive *drive);
|
||||
void remove_drive(int driveno);
|
||||
|
||||
int vinum_scandisk(char *drivename);
|
||||
|
||||
/* I/O */
|
||||
d_open_t vinumopen;
|
||||
d_close_t vinumclose;
|
||||
d_strategy_t vinumstrategy;
|
||||
d_ioctl_t vinumioctl;
|
||||
|
||||
int vinum_super_ioctl(struct cdev *, u_long, caddr_t);
|
||||
int vinumstart(struct buf *bp, int reviveok);
|
||||
int launch_requests(struct request *rq, int reviveok);
|
||||
void sdio(struct buf *bp);
|
||||
|
||||
/* XXX Do we need this? */
|
||||
int vinumpart(struct cdev *);
|
||||
|
||||
extern jmp_buf command_fail; /* return here if config fails */
|
||||
|
||||
#ifdef VINUMDEBUG
|
||||
/* Memory allocation and request tracing */
|
||||
void vinum_meminfo(caddr_t data);
|
||||
int vinum_mallocinfo(caddr_t data);
|
||||
int vinum_rqinfo(caddr_t data);
|
||||
void LongJmp(jmp_buf, int);
|
||||
char *basename(char *);
|
||||
#endif
|
||||
|
||||
#ifdef VINUMDEBUG
|
||||
void expand_table(void **, int, int, char *, int);
|
||||
#else
|
||||
void expand_table(void **, int, int);
|
||||
#endif
|
||||
|
||||
struct disklabel;
|
||||
struct request;
|
||||
struct rqgroup *allocrqg(struct request *rq, int elements);
|
||||
void deallocrqg(struct rqgroup *rqg);
|
||||
|
||||
/* Device number decoding */
|
||||
int Volno(struct cdev *x);
|
||||
int Plexno(struct cdev *x);
|
||||
int Sdno(struct cdev *x);
|
||||
|
||||
/* State transitions */
|
||||
int set_drive_state(int driveno, enum drivestate state, enum setstateflags flags);
|
||||
int set_sd_state(int sdno, enum sdstate state, enum setstateflags flags);
|
||||
enum requeststatus checksdstate(struct sd *sd, struct request *rq, daddr_t diskaddr, daddr_t diskend);
|
||||
int set_plex_state(int plexno, enum plexstate state, enum setstateflags flags);
|
||||
int set_volume_state(int volumeno, enum volumestate state, enum setstateflags flags);
|
||||
void update_sd_state(int sdno);
|
||||
void forceup(int plexno);
|
||||
void update_plex_state(int plexno);
|
||||
void update_volume_state(int volno);
|
||||
void invalidate_subdisks(struct plex *, enum sdstate);
|
||||
void start_object(struct vinum_ioctl_msg *);
|
||||
void stop_object(struct vinum_ioctl_msg *);
|
||||
void setstate(struct vinum_ioctl_msg *msg);
|
||||
void setstate_by_force(struct vinum_ioctl_msg *msg);
|
||||
void vinum_label(int);
|
||||
int vinum_writedisklabel(struct volume *, struct disklabel *);
|
||||
int initsd(int, int);
|
||||
struct buf *parityrebuild(struct plex *, u_int64_t, int, enum parityop, struct rangelock **, off_t *);
|
||||
enum requeststatus sddownstate(struct request *rq);
|
||||
|
||||
int restart_plex(int plexno);
|
||||
int revive_read(struct sd *sd);
|
||||
int revive_block(int sdno);
|
||||
void parityops(struct vinum_ioctl_msg *);
|
||||
|
||||
/* Auxiliary functions */
|
||||
enum sdstates sdstatemap(struct plex *plex);
|
||||
enum volplexstate vpstate(struct plex *plex);
|
||||
#endif
|
||||
|
||||
struct drive *validdrive(int driveno, struct _ioctl_reply *);
|
||||
struct sd *validsd(int sdno, struct _ioctl_reply *);
|
||||
struct plex *validplex(int plexno, struct _ioctl_reply *);
|
||||
struct volume *validvol(int volno, struct _ioctl_reply *);
|
||||
void resetstats(struct vinum_ioctl_msg *msg);
|
||||
|
||||
/* Locking */
|
||||
#ifdef VINUMDEBUG
|
||||
int lockdrive(struct drive *drive, char *, int);
|
||||
#else
|
||||
int lockdrive(struct drive *drive);
|
||||
#endif
|
||||
void unlockdrive(struct drive *drive);
|
||||
int lockvol(struct volume *vol);
|
||||
void unlockvol(struct volume *vol);
|
||||
int lockplex(struct plex *plex);
|
||||
void unlockplex(struct plex *plex);
|
||||
struct rangelock *lockrange(daddr_t stripe, struct buf *bp, struct plex *plex);
|
||||
int lock_config(void);
|
||||
void unlock_config(void);
|
||||
|
||||
/* Dæmon */
|
||||
|
||||
void vinum_daemon(void);
|
||||
int vinum_finddaemon(void);
|
||||
int vinum_setdaemonopts(int);
|
||||
extern struct daemonq *daemonq; /* daemon's work queue */
|
||||
extern struct daemonq *dqend; /* and the end of the queue */
|
||||
extern struct cdevsw vinum_cdevsw;
|
||||
|
||||
#undef Free /* defined in some funny net stuff */
|
||||
#ifdef _KERNEL
|
||||
#ifdef VINUMDEBUG
|
||||
#define Malloc(x) MMalloc ((x), __FILE__, __LINE__) /* show where we came from */
|
||||
#define Free(x) FFree ((x), __FILE__, __LINE__) /* show where we came from */
|
||||
caddr_t MMalloc(int size, char *, int);
|
||||
void FFree(void *mem, char *, int);
|
||||
#define LOCKDRIVE(d) lockdrive (d, __FILE__, __LINE__)
|
||||
#else
|
||||
#define Malloc(x) malloc((x), M_DEVBUF, \
|
||||
curthread->td_intr_nesting_level == 0? M_WAITOK: M_NOWAIT)
|
||||
#define Free(x) free((x), M_DEVBUF)
|
||||
#define LOCKDRIVE(d) lockdrive (d)
|
||||
#endif
|
||||
#else
|
||||
#define Malloc(x) malloc ((x)) /* just the size */
|
||||
#define Free(x) free ((x)) /* just the address */
|
||||
#endif
|
||||
|
||||
/* Local Variables: */
|
||||
/* fill-column: 50 */
|
||||
/* End: */
|
@ -1,81 +0,0 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*/
|
||||
|
||||
/* Header files used by all modules */
|
||||
/*
|
||||
* $Id: vinumhdr.h,v 1.19 2001/05/22 04:07:22 grog Exp grog $
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/param.h>
|
||||
#ifdef _KERNEL
|
||||
#include "opt_vinum.h"
|
||||
#include <sys/systm.h>
|
||||
#include <sys/kdb.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/proc.h>
|
||||
#include <sys/conf.h>
|
||||
#include <sys/mount.h>
|
||||
#include <sys/vnode.h>
|
||||
#include <sys/sysctl.h>
|
||||
#endif
|
||||
#include <sys/errno.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/bio.h>
|
||||
#include <sys/buf.h>
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/uio.h>
|
||||
#include <sys/namei.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/disk.h>
|
||||
#include <sys/disklabel.h>
|
||||
#include <sys/syslog.h>
|
||||
#include <sys/fcntl.h>
|
||||
#include <sys/queue.h>
|
||||
#ifdef _KERNEL
|
||||
#include <machine/setjmp.h>
|
||||
#include <machine/stdarg.h>
|
||||
#else
|
||||
#include <setjmp.h>
|
||||
#include <stdarg.h>
|
||||
#endif
|
||||
#include <vm/vm.h>
|
||||
#include <dev/vinum/vinumvar.h>
|
||||
#include <dev/vinum/vinumio.h>
|
||||
#include <dev/vinum/vinumkw.h>
|
||||
#include <dev/vinum/vinumext.h>
|
||||
#include <dev/vinum/vinumutil.h>
|
||||
#include <machine/cpu.h>
|
@ -1,473 +0,0 @@
|
||||
/* vinuminterrupt.c: bottom half of the driver */
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998, 1999
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* Parts copyright (c) 1997, 1998 Cybernet Corporation, NetMAX project.
|
||||
*
|
||||
* Written by Greg Lehey
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: vinuminterrupt.c,v 1.41 2003/08/24 17:55:56 obrien Exp $
|
||||
*/
|
||||
|
||||
#include <dev/vinum/vinumhdr.h>
|
||||
#include <dev/vinum/request.h>
|
||||
#include <sys/resourcevar.h>
|
||||
|
||||
void complete_raid5_write(struct rqelement *);
|
||||
void complete_rqe(struct buf *bp);
|
||||
void sdio_done(struct buf *bp);
|
||||
|
||||
/*
|
||||
* Take a completed buffer, transfer the data back if
|
||||
* it's a read, and complete the high-level request
|
||||
* if this is the last subrequest.
|
||||
*
|
||||
* The bp parameter is in fact a struct rqelement, which
|
||||
* includes a couple of extras at the end.
|
||||
*/
|
||||
void
|
||||
complete_rqe(struct buf *bp)
|
||||
{
|
||||
struct rqelement *rqe;
|
||||
struct request *rq;
|
||||
struct rqgroup *rqg;
|
||||
struct buf *ubp; /* user buffer */
|
||||
struct drive *drive;
|
||||
struct sd *sd;
|
||||
char *gravity; /* for error messages */
|
||||
|
||||
rqe = (struct rqelement *) bp; /* point to the element element that completed */
|
||||
rqg = rqe->rqg; /* and the request group */
|
||||
rq = rqg->rq; /* and the complete request */
|
||||
ubp = rq->bp; /* user buffer */
|
||||
|
||||
#ifdef VINUMDEBUG
|
||||
if (debug & DEBUG_LASTREQS)
|
||||
logrq(loginfo_iodone, (union rqinfou) rqe, ubp);
|
||||
#endif
|
||||
drive = &DRIVE[rqe->driveno];
|
||||
drive->active--; /* one less outstanding I/O on this drive */
|
||||
vinum_conf.active--; /* one less outstanding I/O globally */
|
||||
if ((drive->active == (DRIVE_MAXACTIVE - 1)) /* we were at the drive limit */
|
||||
||(vinum_conf.active == VINUM_MAXACTIVE)) /* or the global limit */
|
||||
wakeup(&launch_requests); /* let another one at it */
|
||||
if ((bp->b_io.bio_flags & BIO_ERROR) != 0) { /* transfer in error */
|
||||
gravity = "";
|
||||
sd = &SD[rqe->sdno];
|
||||
|
||||
if (bp->b_error != 0) /* did it return a number? */
|
||||
rq->error = bp->b_error; /* yes, put it in. */
|
||||
else if (rq->error == 0) /* no: do we have one already? */
|
||||
rq->error = EIO; /* no: catchall "I/O error" */
|
||||
sd->lasterror = rq->error;
|
||||
if (bp->b_iocmd == BIO_READ) { /* read operation */
|
||||
if ((rq->error == ENXIO) || (sd->flags & VF_RETRYERRORS) == 0) {
|
||||
gravity = " fatal";
|
||||
set_sd_state(rqe->sdno, sd_crashed, setstate_force); /* subdisk is crashed */
|
||||
}
|
||||
log(LOG_ERR,
|
||||
"%s:%s read error, block %lld for %ld bytes\n",
|
||||
gravity,
|
||||
sd->name,
|
||||
(long long)bp->b_blkno,
|
||||
bp->b_bcount);
|
||||
} else { /* write operation */
|
||||
if ((rq->error == ENXIO) || (sd->flags & VF_RETRYERRORS) == 0) {
|
||||
gravity = "fatal ";
|
||||
set_sd_state(rqe->sdno, sd_stale, setstate_force); /* subdisk is stale */
|
||||
}
|
||||
log(LOG_ERR,
|
||||
"%s:%s write error, block %lld for %ld bytes\n",
|
||||
gravity,
|
||||
sd->name,
|
||||
(long long)bp->b_blkno,
|
||||
bp->b_bcount);
|
||||
}
|
||||
log(LOG_ERR,
|
||||
"%s: user buffer block %lld for %ld bytes\n",
|
||||
sd->name,
|
||||
(long long)ubp->b_blkno,
|
||||
ubp->b_bcount);
|
||||
if (rq->error == ENXIO) { /* the drive's down too */
|
||||
log(LOG_ERR,
|
||||
"%s: fatal drive I/O error, block %lld for %ld bytes\n",
|
||||
DRIVE[rqe->driveno].label.name,
|
||||
(long long)bp->b_blkno,
|
||||
bp->b_bcount);
|
||||
DRIVE[rqe->driveno].lasterror = rq->error;
|
||||
set_drive_state(rqe->driveno, /* take the drive down */
|
||||
drive_down,
|
||||
setstate_force);
|
||||
}
|
||||
}
|
||||
/* Now update the statistics */
|
||||
if (bp->b_iocmd == BIO_READ) { /* read operation */
|
||||
DRIVE[rqe->driveno].reads++;
|
||||
DRIVE[rqe->driveno].bytes_read += bp->b_bcount;
|
||||
SD[rqe->sdno].reads++;
|
||||
SD[rqe->sdno].bytes_read += bp->b_bcount;
|
||||
PLEX[rqe->rqg->plexno].reads++;
|
||||
PLEX[rqe->rqg->plexno].bytes_read += bp->b_bcount;
|
||||
if (PLEX[rqe->rqg->plexno].volno >= 0) { /* volume I/O, not plex */
|
||||
VOL[PLEX[rqe->rqg->plexno].volno].reads++;
|
||||
VOL[PLEX[rqe->rqg->plexno].volno].bytes_read += bp->b_bcount;
|
||||
}
|
||||
} else { /* write operation */
|
||||
DRIVE[rqe->driveno].writes++;
|
||||
DRIVE[rqe->driveno].bytes_written += bp->b_bcount;
|
||||
SD[rqe->sdno].writes++;
|
||||
SD[rqe->sdno].bytes_written += bp->b_bcount;
|
||||
PLEX[rqe->rqg->plexno].writes++;
|
||||
PLEX[rqe->rqg->plexno].bytes_written += bp->b_bcount;
|
||||
if (PLEX[rqe->rqg->plexno].volno >= 0) { /* volume I/O, not plex */
|
||||
VOL[PLEX[rqe->rqg->plexno].volno].writes++;
|
||||
VOL[PLEX[rqe->rqg->plexno].volno].bytes_written += bp->b_bcount;
|
||||
}
|
||||
}
|
||||
if (rqg->flags & XFR_RECOVERY_READ) { /* recovery read, */
|
||||
int *sdata; /* source */
|
||||
int *data; /* and group data */
|
||||
int length; /* and count involved */
|
||||
int count; /* loop counter */
|
||||
struct rqelement *urqe = &rqg->rqe[rqg->badsdno]; /* rqe of the bad subdisk */
|
||||
|
||||
/* XOR destination is the user data */
|
||||
sdata = (int *) &rqe->b.b_data[rqe->groupoffset << DEV_BSHIFT]; /* old data contents */
|
||||
data = (int *) &urqe->b.b_data[urqe->groupoffset << DEV_BSHIFT]; /* destination */
|
||||
length = urqe->grouplen * (DEV_BSIZE / sizeof(int)); /* and number of ints */
|
||||
|
||||
for (count = 0; count < length; count++)
|
||||
data[count] ^= sdata[count];
|
||||
|
||||
/*
|
||||
* In a normal read, we will normally read directly
|
||||
* into the user buffer. This doesn't work if
|
||||
* we're also doing a recovery, so we have to
|
||||
* copy it
|
||||
*/
|
||||
if (rqe->flags & XFR_NORMAL_READ) { /* normal read as well, */
|
||||
char *src = &rqe->b.b_data[rqe->dataoffset << DEV_BSHIFT]; /* read data is here */
|
||||
char *dst;
|
||||
|
||||
dst = (char *) ubp->b_data + (rqe->useroffset << DEV_BSHIFT); /* where to put it in user buffer */
|
||||
length = rqe->datalen << DEV_BSHIFT; /* and count involved */
|
||||
bcopy(src, dst, length); /* move it */
|
||||
}
|
||||
} else if ((rqg->flags & (XFR_NORMAL_WRITE | XFR_DEGRADED_WRITE)) /* RAID 4/5 group write operation */
|
||||
&&(rqg->active == 1)) /* and this is the last active request */
|
||||
complete_raid5_write(rqe);
|
||||
/*
|
||||
* This is the earliest place where we can be
|
||||
* sure that the request has really finished,
|
||||
* since complete_raid5_write can issue new
|
||||
* requests.
|
||||
*/
|
||||
rqg->active--; /* this request now finished */
|
||||
if (rqg->active == 0) { /* request group finished, */
|
||||
rq->active--; /* one less */
|
||||
if (rqg->lock) { /* got a lock? */
|
||||
unlockrange(rqg->plexno, rqg->lock); /* yes, free it */
|
||||
rqg->lock = 0;
|
||||
}
|
||||
}
|
||||
if (rq->active == 0) { /* request finished, */
|
||||
#ifdef VINUMDEBUG
|
||||
if (debug & DEBUG_RESID) {
|
||||
if (ubp->b_resid != 0) /* still something to transfer? */
|
||||
kdb_enter("resid");
|
||||
}
|
||||
#endif
|
||||
|
||||
if (rq->error) { /* did we have an error? */
|
||||
if (rq->isplex) { /* plex operation, */
|
||||
ubp->b_io.bio_flags |= BIO_ERROR; /* yes, propagate to user */
|
||||
ubp->b_error = rq->error;
|
||||
} else /* try to recover */
|
||||
queue_daemon_request(daemonrq_ioerror, (union daemoninfo) rq); /* let the daemon complete */
|
||||
} else {
|
||||
ubp->b_resid = 0; /* completed our transfer */
|
||||
if (rq->isplex == 0) /* volume request, */
|
||||
VOL[rq->volplex.volno].active--; /* another request finished */
|
||||
if (rq->flags & XFR_COPYBUF) {
|
||||
Free(ubp->b_data);
|
||||
ubp->b_data = rq->save_data;
|
||||
}
|
||||
bufdone(ubp); /* top level buffer completed */
|
||||
freerq(rq); /* return the request storage */
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Free a request block and anything hanging off it */
|
||||
void
|
||||
freerq(struct request *rq)
|
||||
{
|
||||
struct rqgroup *rqg;
|
||||
struct rqgroup *nrqg; /* next in chain */
|
||||
int rqno;
|
||||
|
||||
for (rqg = rq->rqg; rqg != NULL; rqg = nrqg) { /* through the whole request chain */
|
||||
if (rqg->lock) /* got a lock? */
|
||||
unlockrange(rqg->plexno, rqg->lock); /* yes, free it */
|
||||
for (rqno = 0; rqno < rqg->count; rqno++) {
|
||||
if ((rqg->rqe[rqno].flags & XFR_MALLOCED) /* data buffer was malloced, */
|
||||
&&rqg->rqe[rqno].b.b_data) /* and the allocation succeeded */
|
||||
Free(rqg->rqe[rqno].b.b_data); /* free it */
|
||||
if (rqg->rqe[rqno].flags & XFR_BUFLOCKED) { /* locked this buffer, */
|
||||
BUF_UNLOCK(&rqg->rqe[rqno].b); /* unlock it again */
|
||||
BUF_LOCKFREE(&rqg->rqe[rqno].b);
|
||||
}
|
||||
}
|
||||
nrqg = rqg->next; /* note the next one */
|
||||
Free(rqg); /* and free this one */
|
||||
}
|
||||
Free(rq); /* free the request itself */
|
||||
}
|
||||
|
||||
/* I/O on subdisk completed */
|
||||
void
|
||||
sdio_done(struct buf *bp)
|
||||
{
|
||||
struct sdbuf *sbp;
|
||||
|
||||
sbp = (struct sdbuf *) bp;
|
||||
if (sbp->b.b_io.bio_flags & BIO_ERROR) { /* had an error */
|
||||
sbp->bp->b_io.bio_flags |= BIO_ERROR; /* propagate upwards */
|
||||
sbp->bp->b_error = sbp->b.b_error;
|
||||
}
|
||||
#ifdef VINUMDEBUG
|
||||
if (debug & DEBUG_LASTREQS)
|
||||
logrq(loginfo_sdiodone, (union rqinfou) bp, bp);
|
||||
#endif
|
||||
sbp->bp->b_resid = sbp->b.b_resid; /* copy the resid field */
|
||||
/* Now update the statistics */
|
||||
if (bp->b_iocmd == BIO_READ) { /* read operation */
|
||||
DRIVE[sbp->driveno].reads++;
|
||||
DRIVE[sbp->driveno].bytes_read += sbp->b.b_bcount;
|
||||
SD[sbp->sdno].reads++;
|
||||
SD[sbp->sdno].bytes_read += sbp->b.b_bcount;
|
||||
} else { /* write operation */
|
||||
DRIVE[sbp->driveno].writes++;
|
||||
DRIVE[sbp->driveno].bytes_written += sbp->b.b_bcount;
|
||||
SD[sbp->sdno].writes++;
|
||||
SD[sbp->sdno].bytes_written += sbp->b.b_bcount;
|
||||
}
|
||||
bufdone(sbp->bp); /* complete the caller's I/O */
|
||||
BUF_UNLOCK(&sbp->b);
|
||||
BUF_LOCKFREE(&sbp->b);
|
||||
Free(sbp);
|
||||
}
|
||||
|
||||
/* Start the second phase of a RAID-4 or RAID-5 group write operation. */
|
||||
void
|
||||
complete_raid5_write(struct rqelement *rqe)
|
||||
{
|
||||
int *sdata; /* source */
|
||||
int *pdata; /* and parity block data */
|
||||
int length; /* and count involved */
|
||||
int count; /* loop counter */
|
||||
int rqno; /* request index */
|
||||
int rqoffset; /* offset of request data from parity data */
|
||||
struct buf *ubp; /* user buffer header */
|
||||
struct request *rq; /* pointer to our request */
|
||||
struct rqgroup *rqg; /* and to the request group */
|
||||
struct rqelement *prqe; /* point to the parity block */
|
||||
struct drive *drive; /* drive to access */
|
||||
|
||||
rqg = rqe->rqg; /* and to our request group */
|
||||
rq = rqg->rq; /* point to our request */
|
||||
ubp = rq->bp; /* user's buffer header */
|
||||
prqe = &rqg->rqe[0]; /* point to the parity block */
|
||||
|
||||
/*
|
||||
* If we get to this function, we have normal or
|
||||
* degraded writes, or a combination of both. We do
|
||||
* the same thing in each case: we perform an
|
||||
* exclusive or to the parity block. The only
|
||||
* difference is the origin of the data and the
|
||||
* address range.
|
||||
*/
|
||||
if (rqe->flags & XFR_DEGRADED_WRITE) { /* do the degraded write stuff */
|
||||
pdata = (int *) (&prqe->b.b_data[(prqe->groupoffset) << DEV_BSHIFT]); /* parity data pointer */
|
||||
bzero(pdata, prqe->grouplen << DEV_BSHIFT); /* start with nothing in the parity block */
|
||||
|
||||
/* Now get what data we need from each block */
|
||||
for (rqno = 1; rqno < rqg->count; rqno++) { /* for all the data blocks */
|
||||
rqe = &rqg->rqe[rqno]; /* this request */
|
||||
sdata = (int *) (&rqe->b.b_data[rqe->groupoffset << DEV_BSHIFT]); /* old data */
|
||||
length = rqe->grouplen << (DEV_BSHIFT - 2); /* and count involved */
|
||||
|
||||
/*
|
||||
* Add the data block to the parity block. Before
|
||||
* we started the request, we zeroed the parity
|
||||
* block, so the result of adding all the other
|
||||
* blocks and the block we want to write will be
|
||||
* the correct parity block.
|
||||
*/
|
||||
for (count = 0; count < length; count++)
|
||||
pdata[count] ^= sdata[count];
|
||||
if ((rqe->flags & XFR_MALLOCED) /* the buffer was malloced, */
|
||||
&&((rqg->flags & XFR_NORMAL_WRITE) == 0)) { /* and we have no normal write, */
|
||||
Free(rqe->b.b_data); /* free it now */
|
||||
rqe->flags &= ~XFR_MALLOCED;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (rqg->flags & XFR_NORMAL_WRITE) { /* do normal write stuff */
|
||||
/* Get what data we need from each block */
|
||||
for (rqno = 1; rqno < rqg->count; rqno++) { /* for all the data blocks */
|
||||
rqe = &rqg->rqe[rqno]; /* this request */
|
||||
if ((rqe->flags & (XFR_DATA_BLOCK | XFR_BAD_SUBDISK | XFR_NORMAL_WRITE))
|
||||
== (XFR_DATA_BLOCK | XFR_NORMAL_WRITE)) { /* good data block to write */
|
||||
sdata = (int *) &rqe->b.b_data[rqe->dataoffset << DEV_BSHIFT]; /* old data contents */
|
||||
rqoffset = rqe->dataoffset + rqe->sdoffset - prqe->sdoffset; /* corresponding parity block offset */
|
||||
pdata = (int *) (&prqe->b.b_data[rqoffset << DEV_BSHIFT]); /* parity data pointer */
|
||||
length = rqe->datalen * (DEV_BSIZE / sizeof(int)); /* and number of ints */
|
||||
|
||||
/*
|
||||
* "remove" the old data block
|
||||
* from the parity block
|
||||
*/
|
||||
if ((pdata < ((int *) prqe->b.b_data))
|
||||
|| (&pdata[length] > ((int *) (prqe->b.b_data + prqe->b.b_bcount)))
|
||||
|| (sdata < ((int *) rqe->b.b_data))
|
||||
|| (&sdata[length] > ((int *) (rqe->b.b_data + rqe->b.b_bcount))))
|
||||
panic("complete_raid5_write: bounds overflow");
|
||||
for (count = 0; count < length; count++)
|
||||
pdata[count] ^= sdata[count];
|
||||
|
||||
/* "add" the new data block */
|
||||
sdata = (int *) (&ubp->b_data[rqe->useroffset << DEV_BSHIFT]); /* new data */
|
||||
if ((sdata < ((int *) ubp->b_data))
|
||||
|| (&sdata[length] > ((int *) (ubp->b_data + ubp->b_bcount))))
|
||||
panic("complete_raid5_write: bounds overflow");
|
||||
for (count = 0; count < length; count++)
|
||||
pdata[count] ^= sdata[count];
|
||||
|
||||
/* Free the malloced buffer */
|
||||
if (rqe->flags & XFR_MALLOCED) { /* the buffer was malloced, */
|
||||
Free(rqe->b.b_data); /* free it */
|
||||
rqe->flags &= ~XFR_MALLOCED;
|
||||
} else
|
||||
panic("complete_raid5_write: malloc conflict");
|
||||
|
||||
if ((rqe->b.b_iocmd == BIO_READ) /* this was a read */
|
||||
&&((rqe->flags & XFR_BAD_SUBDISK) == 0)) { /* and we can write this block */
|
||||
rqe->b.b_flags &= ~B_DONE; /* start a new request */
|
||||
rqe->b.b_iocmd = BIO_WRITE; /* we're writing now */
|
||||
rqe->b.b_iodone = complete_rqe; /* call us here when done */
|
||||
rqe->flags &= ~XFR_PARITYOP; /* reset flags that brought us here */
|
||||
rqe->b.b_data = &ubp->b_data[rqe->useroffset << DEV_BSHIFT]; /* point to the user data */
|
||||
rqe->b.b_bcount = rqe->datalen << DEV_BSHIFT; /* length to write */
|
||||
rqe->b.b_bufsize = rqe->b.b_bcount; /* don't claim more */
|
||||
rqe->b.b_resid = rqe->b.b_bcount; /* nothing transferred */
|
||||
rqe->b.b_blkno += rqe->dataoffset; /* point to the correct block */
|
||||
rqe->b.b_offset = rqe->b.b_blkno << DEV_BSHIFT;
|
||||
rqe->b.b_iooffset = rqe->b.b_offset;
|
||||
rqg->active++; /* another active request */
|
||||
drive = &DRIVE[rqe->driveno]; /* drive to access */
|
||||
|
||||
/* We can't sleep here, so we just increment the counters. */
|
||||
drive->active++;
|
||||
if (drive->active >= drive->maxactive)
|
||||
drive->maxactive = drive->active;
|
||||
vinum_conf.active++;
|
||||
if (vinum_conf.active >= vinum_conf.maxactive)
|
||||
vinum_conf.maxactive = vinum_conf.active;
|
||||
#ifdef VINUMDEBUG
|
||||
if (debug & DEBUG_ADDRESSES)
|
||||
log(LOG_DEBUG,
|
||||
" %s dev %d.%d, sd %d, offset 0x%x, devoffset 0x%llx, length %ld\n",
|
||||
rqe->b.b_iocmd == BIO_READ ? "Read" : "Write",
|
||||
major(rqe->b.b_dev),
|
||||
minor(rqe->b.b_dev),
|
||||
rqe->sdno,
|
||||
(u_int) (rqe->b.b_blkno - SD[rqe->sdno].driveoffset),
|
||||
(long long)rqe->b.b_blkno,
|
||||
rqe->b.b_bcount);
|
||||
if (debug & DEBUG_LASTREQS)
|
||||
logrq(loginfo_raid5_data, (union rqinfou) rqe, ubp);
|
||||
#endif
|
||||
DEV_STRATEGY(&rqe->b);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
/* Finally, write the parity block */
|
||||
rqe = &rqg->rqe[0];
|
||||
rqe->b.b_flags &= ~B_DONE; /* we're not done */
|
||||
rqe->b.b_iocmd = BIO_WRITE; /* we're writing now */
|
||||
rqe->b.b_iodone = complete_rqe; /* call us here when done */
|
||||
rqg->flags &= ~XFR_PARITYOP; /* reset flags that brought us here */
|
||||
rqe->b.b_bcount = rqe->buflen << DEV_BSHIFT; /* length to write */
|
||||
rqe->b.b_offset = rqe->b.b_blkno << DEV_BSHIFT;
|
||||
rqe->b.b_iooffset = rqe->b.b_offset;
|
||||
rqe->b.b_bufsize = rqe->b.b_bcount; /* don't claim we have more */
|
||||
rqe->b.b_resid = rqe->b.b_bcount; /* nothing transferred */
|
||||
rqg->active++; /* another active request */
|
||||
drive = &DRIVE[rqe->driveno]; /* drive to access */
|
||||
|
||||
/* We can't sleep here, so we just increment the counters. */
|
||||
drive->active++;
|
||||
if (drive->active >= drive->maxactive)
|
||||
drive->maxactive = drive->active;
|
||||
vinum_conf.active++;
|
||||
if (vinum_conf.active >= vinum_conf.maxactive)
|
||||
vinum_conf.maxactive = vinum_conf.active;
|
||||
|
||||
#ifdef VINUMDEBUG
|
||||
if (debug & DEBUG_ADDRESSES)
|
||||
log(LOG_DEBUG,
|
||||
" %s dev %d.%d, sd %d, offset 0x%x, devoffset 0x%llx, length %ld\n",
|
||||
rqe->b.b_iocmd == BIO_READ ? "Read" : "Write",
|
||||
major(rqe->b.b_dev),
|
||||
minor(rqe->b.b_dev),
|
||||
rqe->sdno,
|
||||
(u_int) (rqe->b.b_blkno - SD[rqe->sdno].driveoffset),
|
||||
(long long)rqe->b.b_blkno,
|
||||
rqe->b.b_bcount);
|
||||
if (debug & DEBUG_LASTREQS)
|
||||
logrq(loginfo_raid5_parity, (union rqinfou) rqe, ubp);
|
||||
#endif
|
||||
DEV_STRATEGY(&rqe->b);
|
||||
}
|
||||
|
||||
/* Local Variables: */
|
||||
/* fill-column: 50 */
|
||||
/* End: */
|
@ -1,918 +0,0 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: vinumio.c,v 1.39 2003/05/23 00:59:53 grog Exp grog $
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <dev/vinum/vinumhdr.h>
|
||||
#include <dev/vinum/request.h>
|
||||
|
||||
static char *sappend(char *txt, char *s);
|
||||
static int drivecmp(const void *va, const void *vb);
|
||||
|
||||
/*
|
||||
* Open the device associated with the drive, and
|
||||
* set drive's vp. Return an error number.
|
||||
*/
|
||||
int
|
||||
open_drive(struct drive *drive, struct thread *td, int verbose)
|
||||
{
|
||||
struct cdevsw *dsw; /* pointer to cdevsw entry */
|
||||
|
||||
if (drive->flags & VF_OPEN) /* open already, */
|
||||
return EBUSY; /* don't do it again */
|
||||
|
||||
drive->dev = getdiskbyname(drive->devicename);
|
||||
if (drive->dev == NULL) /* didn't find anything */
|
||||
return ENOENT;
|
||||
dev_ref(drive->dev);
|
||||
|
||||
drive->dev->si_iosize_max = DFLTPHYS;
|
||||
dsw = devsw(drive->dev);
|
||||
if (dsw == NULL) /* sanity, should not happen */
|
||||
drive->lasterror = ENOENT;
|
||||
else if ((dsw->d_flags & D_DISK) == 0)
|
||||
drive->lasterror = ENOTBLK;
|
||||
else {
|
||||
DROP_GIANT();
|
||||
drive->lasterror = (dsw->d_open) (drive->dev, FWRITE | FREAD, 0, td);
|
||||
PICKUP_GIANT();
|
||||
}
|
||||
|
||||
if (drive->lasterror != 0) { /* failed */
|
||||
drive->state = drive_down; /* just force it down */
|
||||
if (verbose)
|
||||
log(LOG_WARNING,
|
||||
"vinum open_drive %s: failed with error %d\n",
|
||||
drive->devicename, drive->lasterror);
|
||||
} else
|
||||
drive->flags |= VF_OPEN; /* we're open now */
|
||||
|
||||
return drive->lasterror;
|
||||
}
|
||||
|
||||
/*
|
||||
* Set some variables in the drive struct in more
|
||||
* convenient form. Return error indication.
|
||||
*/
|
||||
int
|
||||
set_drive_parms(struct drive *drive)
|
||||
{
|
||||
drive->blocksize = BLKDEV_IOSIZE; /* do we need this? */
|
||||
drive->secsperblock = drive->blocksize /* number of sectors per block */
|
||||
/ drive->sectorsize;
|
||||
|
||||
/* Now update the label part */
|
||||
bcopy(hostname, drive->label.sysname, VINUMHOSTNAMELEN); /* put in host name */
|
||||
microtime(&drive->label.date_of_birth); /* and current time */
|
||||
drive->label.drive_size = drive->mediasize; /* size of the drive in bytes */
|
||||
#ifdef VINUMDEBUG
|
||||
if (debug & DEBUG_BIGDRIVE) /* pretend we're 100 times as big */
|
||||
drive->label.drive_size *= 100;
|
||||
#endif
|
||||
|
||||
/* number of sectors available for subdisks */
|
||||
drive->sectors_available = drive->label.drive_size / DEV_BSIZE - DATASTART;
|
||||
|
||||
/*
|
||||
* Bug in 3.0 as of January 1998: you can open
|
||||
* non-existent slices. They have a length of 0.
|
||||
*/
|
||||
if (drive->label.drive_size < MINVINUMSLICE) { /* too small to worry about */
|
||||
set_drive_state(drive->driveno, drive_down, setstate_force);
|
||||
drive->lasterror = ENOSPC;
|
||||
return ENOSPC;
|
||||
}
|
||||
drive->freelist_size = INITIAL_DRIVE_FREELIST; /* initial number of entries */
|
||||
drive->freelist = (struct drive_freelist *)
|
||||
Malloc(INITIAL_DRIVE_FREELIST * sizeof(struct drive_freelist));
|
||||
if (drive->freelist == NULL) /* can't malloc, dammit */
|
||||
return ENOSPC;
|
||||
drive->freelist_entries = 1; /* just (almost) the complete drive */
|
||||
drive->freelist[0].offset = DATASTART; /* starts here */
|
||||
drive->freelist[0].sectors = (drive->label.drive_size >> DEV_BSHIFT) - DATASTART; /* and it's this long */
|
||||
if (drive->label.name[0] != '\0') /* got a name */
|
||||
set_drive_state(drive->driveno, drive_up, setstate_force); /* our drive is accessible */
|
||||
else /* we know about it, but that's all */
|
||||
drive->state = drive_referenced;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize a drive: open the device and add
|
||||
* device information.
|
||||
*/
|
||||
int
|
||||
init_drive(struct drive *drive, int verbose)
|
||||
{
|
||||
|
||||
drive->lasterror = open_drive(drive, curthread, verbose); /* open the drive */
|
||||
if (drive->lasterror)
|
||||
return drive->lasterror;
|
||||
|
||||
DROP_GIANT();
|
||||
drive->lasterror = (*devsw(drive->dev)->d_ioctl) (drive->dev,
|
||||
DIOCGSECTORSIZE,
|
||||
(caddr_t) & drive->sectorsize,
|
||||
FREAD,
|
||||
curthread);
|
||||
if (drive->lasterror == 0)
|
||||
drive->lasterror = (*devsw(drive->dev)->d_ioctl) (drive->dev,
|
||||
DIOCGMEDIASIZE,
|
||||
(caddr_t) & drive->mediasize,
|
||||
FREAD,
|
||||
curthread);
|
||||
PICKUP_GIANT();
|
||||
if (drive->lasterror) {
|
||||
if (verbose)
|
||||
log(LOG_ERR,
|
||||
"vinum: Can't get drive dimensions for %s: error %d\n",
|
||||
drive->devicename,
|
||||
drive->lasterror);
|
||||
close_drive(drive);
|
||||
return drive->lasterror;
|
||||
}
|
||||
return set_drive_parms(drive); /* set various odds and ends */
|
||||
}
|
||||
|
||||
/* Close a drive if it's open. */
|
||||
void
|
||||
close_drive(struct drive *drive)
|
||||
{
|
||||
LOCKDRIVE(drive); /* keep the daemon out */
|
||||
if (drive->flags & VF_OPEN)
|
||||
close_locked_drive(drive); /* and close it */
|
||||
if (drive->state > drive_down) /* if it's up */
|
||||
drive->state = drive_down; /* make sure it's down */
|
||||
unlockdrive(drive);
|
||||
}
|
||||
|
||||
/*
|
||||
* Real drive close code, called with drive already locked.
|
||||
* We have also checked that the drive is open. No errors.
|
||||
*/
|
||||
void
|
||||
close_locked_drive(struct drive *drive)
|
||||
{
|
||||
int error;
|
||||
|
||||
/*
|
||||
* If we can't access the drive, we can't flush
|
||||
* the queues, which spec_close() will try to
|
||||
* do. Get rid of them here first.
|
||||
*/
|
||||
DROP_GIANT();
|
||||
error = (*devsw(drive->dev)->d_close) (drive->dev, FWRITE | FREAD, 0, NULL);
|
||||
PICKUP_GIANT();
|
||||
drive->flags &= ~VF_OPEN; /* no longer open */
|
||||
if (drive->lasterror == 0)
|
||||
drive->lasterror = error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove drive from the configuration.
|
||||
* Caller must ensure that it isn't active.
|
||||
*/
|
||||
void
|
||||
remove_drive(int driveno)
|
||||
{
|
||||
struct drive *drive = &vinum_conf.drive[driveno];
|
||||
struct vinum_hdr *vhdr; /* buffer for header */
|
||||
int error;
|
||||
|
||||
if (drive->state > drive_referenced) { /* real drive */
|
||||
if (drive->state == drive_up) {
|
||||
vhdr = (struct vinum_hdr *) Malloc(VINUMHEADERLEN); /* allocate buffer */
|
||||
CHECKALLOC(vhdr, "Can't allocate memory");
|
||||
error = read_drive(drive, (void *) vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET);
|
||||
if (error)
|
||||
drive->lasterror = error;
|
||||
else {
|
||||
vhdr->magic = VINUM_NOMAGIC; /* obliterate the magic, but leave the rest */
|
||||
write_drive(drive, (void *) vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET);
|
||||
}
|
||||
Free(vhdr);
|
||||
}
|
||||
free_drive(drive); /* close it and free resources */
|
||||
save_config(); /* and save the updated configuration */
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Transfer drive data. Usually called from one of these defines;
|
||||
* #define read_drive(a, b, c, d) driveio (a, b, c, d, B_READ)
|
||||
* #define write_drive(a, b, c, d) driveio (a, b, c, d, B_WRITE)
|
||||
*
|
||||
* length and offset are in bytes, but must be multiples of sector
|
||||
* size. The function *does not check* for this condition, and
|
||||
* truncates ruthlessly.
|
||||
* Return error number.
|
||||
*/
|
||||
int
|
||||
driveio(struct drive *drive, char *buf, size_t length, off_t offset, int flag)
|
||||
{
|
||||
int error;
|
||||
struct buf *bp;
|
||||
|
||||
error = 0; /* to keep the compiler happy */
|
||||
while (length) { /* divide into small enough blocks */
|
||||
int len = min(length, MAXBSIZE); /* maximum block device transfer is MAXBSIZE */
|
||||
|
||||
bp = geteblk(len); /* get a buffer header */
|
||||
bp->b_flags = 0;
|
||||
bp->b_iocmd = flag;
|
||||
bp->b_dev = drive->dev; /* device */
|
||||
bp->b_blkno = offset / drive->sectorsize; /* block number */
|
||||
bp->b_offset = offset;
|
||||
bp->b_iooffset = offset;
|
||||
bp->b_saveaddr = bp->b_data;
|
||||
bp->b_data = buf;
|
||||
bp->b_bcount = len;
|
||||
DEV_STRATEGY(bp); /* initiate the transfer */
|
||||
error = bufwait(bp);
|
||||
bp->b_data = bp->b_saveaddr;
|
||||
bp->b_flags |= B_INVAL | B_AGE;
|
||||
bp->b_ioflags &= ~BIO_ERROR;
|
||||
brelse(bp);
|
||||
if (error)
|
||||
break;
|
||||
length -= len; /* update pointers */
|
||||
buf += len;
|
||||
offset += len;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check a drive for a vinum header. If found,
|
||||
* update the drive information. We come here
|
||||
* with a partially populated drive structure
|
||||
* which includes the device name.
|
||||
*
|
||||
* Return information on what we found.
|
||||
*
|
||||
* This function is called from two places: check_drive,
|
||||
* which wants to find out whether the drive is a
|
||||
* Vinum drive, and config_drive, which asserts that
|
||||
* it is a vinum drive. In the first case, we don't
|
||||
* print error messages (verbose==0), in the second
|
||||
* we do (verbose==1).
|
||||
*/
|
||||
enum drive_label_info
|
||||
read_drive_label(struct drive *drive, int verbose)
|
||||
{
|
||||
int error;
|
||||
int result; /* result of our search */
|
||||
struct vinum_hdr *vhdr; /* and as header */
|
||||
|
||||
error = init_drive(drive, 0); /* find the drive */
|
||||
if (error) /* find the drive */
|
||||
return DL_CANT_OPEN; /* not ours */
|
||||
|
||||
vhdr = (struct vinum_hdr *) Malloc(VINUMHEADERLEN); /* allocate buffers */
|
||||
CHECKALLOC(vhdr, "Can't allocate memory");
|
||||
|
||||
drive->state = drive_up; /* be optimistic */
|
||||
error = read_drive(drive, (void *) vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET);
|
||||
if (vhdr->magic == VINUM_MAGIC) { /* ours! */
|
||||
if (drive->label.name[0] /* we have a name for this drive */
|
||||
&&(strcmp(drive->label.name, vhdr->label.name))) { /* but it doesn't match the real name */
|
||||
drive->lasterror = EINVAL;
|
||||
result = DL_WRONG_DRIVE; /* it's the wrong drive */
|
||||
drive->state = drive_unallocated; /* put it back, it's not ours */
|
||||
} else
|
||||
result = DL_OURS;
|
||||
/*
|
||||
* We copy the drive anyway so that we have
|
||||
* the correct name in the drive info. This
|
||||
* may not be the name specified
|
||||
*/
|
||||
drive->label = vhdr->label; /* put in the label information */
|
||||
} else if (vhdr->magic == VINUM_NOMAGIC) /* was ours, but we gave it away */
|
||||
result = DL_DELETED_LABEL; /* and return the info */
|
||||
else
|
||||
result = DL_NOT_OURS; /* we could have it, but we don't yet */
|
||||
Free(vhdr); /* that's all. */
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check a drive for a vinum header. If found,
|
||||
* read configuration information from the drive and
|
||||
* incorporate the data into the configuration.
|
||||
*
|
||||
* Return drive number.
|
||||
*/
|
||||
struct drive *
|
||||
check_drive(char *devicename)
|
||||
{
|
||||
int driveno;
|
||||
int i;
|
||||
struct drive *drive;
|
||||
|
||||
driveno = find_drive_by_name(devicename, 1); /* if entry doesn't exist, create it */
|
||||
drive = &vinum_conf.drive[driveno]; /* and get a pointer */
|
||||
|
||||
if (drive->state >= drive_down) /* up or down, we know it */
|
||||
return drive;
|
||||
if (read_drive_label(drive, 0) == DL_OURS) { /* one of ours */
|
||||
for (i = 0; i < vinum_conf.drives_allocated; i++) { /* see if the name already exists */
|
||||
if ((i != driveno) /* not this drive */
|
||||
&&(DRIVE[i].state != drive_unallocated) /* and it's allocated */
|
||||
&&(strcmp(DRIVE[i].label.name,
|
||||
DRIVE[driveno].label.name) == 0)) { /* and it has the same name */
|
||||
struct drive *mydrive = &DRIVE[i];
|
||||
|
||||
if (mydrive->devicename[0] == '/') { /* we know a device name for it */
|
||||
/*
|
||||
* set an error, but don't take the
|
||||
* drive down: that would cause unneeded
|
||||
* error messages.
|
||||
*/
|
||||
drive->lasterror = EEXIST;
|
||||
break;
|
||||
} else { /* it's just a place holder, */
|
||||
int sdno;
|
||||
|
||||
for (sdno = 0; sdno < vinum_conf.subdisks_allocated; sdno++) { /* look at each subdisk */
|
||||
if ((SD[sdno].driveno == i) /* it's pointing to this one, */
|
||||
&&(SD[sdno].state != sd_unallocated)) { /* and it's a real subdisk */
|
||||
SD[sdno].driveno = drive->driveno; /* point to the one we found */
|
||||
update_sd_state(sdno); /* and update its state */
|
||||
}
|
||||
}
|
||||
bzero(mydrive, sizeof(struct drive)); /* don't deallocate it, just remove it */
|
||||
}
|
||||
}
|
||||
}
|
||||
return drive;
|
||||
} else { /* not ours, */
|
||||
close_drive(drive);
|
||||
free_drive(drive); /* get rid of it */
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static char *
|
||||
sappend(char *txt, char *s)
|
||||
{
|
||||
while ((*s++ = *txt++) != 0);
|
||||
return s - 1;
|
||||
}
|
||||
|
||||
void
|
||||
format_config(char *config, int len)
|
||||
{
|
||||
int i;
|
||||
int j;
|
||||
char *s = config;
|
||||
char *configend = &config[len];
|
||||
|
||||
bzero(config, len);
|
||||
|
||||
/* First write the volume configuration */
|
||||
for (i = 0; i < vinum_conf.volumes_allocated; i++) {
|
||||
struct volume *vol;
|
||||
|
||||
vol = &vinum_conf.volume[i];
|
||||
if ((vol->state > volume_uninit)
|
||||
&& (vol->name[0] != '\0')) { /* paranoia */
|
||||
snprintf(s,
|
||||
configend - s,
|
||||
"volume %s state %s",
|
||||
vol->name,
|
||||
volume_state(vol->state));
|
||||
while (*s)
|
||||
s++; /* find the end */
|
||||
s = sappend("\n", s);
|
||||
}
|
||||
}
|
||||
|
||||
/* Then the plex configuration */
|
||||
for (i = 0; i < vinum_conf.plexes_allocated; i++) {
|
||||
struct plex *plex;
|
||||
struct volume *vol;
|
||||
|
||||
plex = &vinum_conf.plex[i];
|
||||
if ((plex->state > plex_referenced)
|
||||
&& (plex->name[0] != '\0')) { /* paranoia */
|
||||
snprintf(s,
|
||||
configend - s,
|
||||
"plex name %s state %s org %s ",
|
||||
plex->name,
|
||||
plex_state(plex->state),
|
||||
plex_org(plex->organization));
|
||||
while (*s)
|
||||
s++; /* find the end */
|
||||
if (isstriped(plex)) {
|
||||
snprintf(s,
|
||||
configend - s,
|
||||
"%ds ",
|
||||
(int) plex->stripesize);
|
||||
while (*s)
|
||||
s++; /* find the end */
|
||||
}
|
||||
if (plex->volno >= 0) { /* we have a volume */
|
||||
vol = &VOL[plex->volno];
|
||||
snprintf(s,
|
||||
configend - s,
|
||||
"vol %s ",
|
||||
vol->name);
|
||||
while (*s)
|
||||
s++; /* find the end */
|
||||
if ((vol->preferred_plex >= 0) /* has a preferred plex */
|
||||
&&vol->plex[vol->preferred_plex] == i) /* and it's us */
|
||||
snprintf(s, configend - s, "preferred ");
|
||||
while (*s)
|
||||
s++; /* find the end */
|
||||
}
|
||||
for (j = 0; j < plex->subdisks; j++) {
|
||||
snprintf(s,
|
||||
configend - s,
|
||||
" sd %s",
|
||||
vinum_conf.sd[plex->sdnos[j]].name);
|
||||
}
|
||||
s = sappend("\n", s);
|
||||
}
|
||||
}
|
||||
|
||||
/* And finally the subdisk configuration */
|
||||
for (i = 0; i < vinum_conf.subdisks_allocated; i++) {
|
||||
struct sd *sd;
|
||||
char *drivename;
|
||||
|
||||
sd = &SD[i];
|
||||
if ((sd->state != sd_referenced)
|
||||
&& (sd->state != sd_unallocated)
|
||||
&& (sd->name[0] != '\0')) { /* paranoia */
|
||||
drivename = vinum_conf.drive[sd->driveno].label.name;
|
||||
/*
|
||||
* XXX We've seen cases of dead subdisks
|
||||
* which don't have a drive. If we let them
|
||||
* through here, the drive name is null, so
|
||||
* they get the drive named 'plex'.
|
||||
*
|
||||
* This is a breakage limiter, not a fix.
|
||||
*/
|
||||
if (drivename[0] == '\0')
|
||||
drivename = "*invalid*";
|
||||
snprintf(s,
|
||||
configend - s,
|
||||
"sd name %s drive %s len %llus driveoffset %llus state %s",
|
||||
sd->name,
|
||||
drivename,
|
||||
(unsigned long long) sd->sectors,
|
||||
(unsigned long long) sd->driveoffset,
|
||||
sd_state(sd->state));
|
||||
while (*s)
|
||||
s++; /* find the end */
|
||||
if (sd->plexno >= 0)
|
||||
snprintf(s,
|
||||
configend - s,
|
||||
" plex %s plexoffset %llds",
|
||||
vinum_conf.plex[sd->plexno].name,
|
||||
(long long) sd->plexoffset);
|
||||
else
|
||||
snprintf(s, configend - s, " detached");
|
||||
while (*s)
|
||||
s++; /* find the end */
|
||||
if (sd->flags & VF_RETRYERRORS) {
|
||||
snprintf(s, configend - s, " retryerrors");
|
||||
while (*s)
|
||||
s++; /* find the end */
|
||||
}
|
||||
snprintf(s, configend - s, " \n");
|
||||
while (*s)
|
||||
s++; /* find the end */
|
||||
}
|
||||
}
|
||||
if (s > &config[len - 2])
|
||||
panic("vinum: configuration data overflow");
|
||||
}
|
||||
|
||||
/*
|
||||
* issue a save config request to the dæmon. The actual work
|
||||
* is done in process context by daemon_save_config.
|
||||
*/
|
||||
void
|
||||
save_config(void)
|
||||
{
|
||||
queue_daemon_request(daemonrq_saveconfig, (union daemoninfo) 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Write the configuration to all vinum slices. This
|
||||
* is performed by the daemon only.
|
||||
*/
|
||||
void
|
||||
daemon_save_config(void)
|
||||
{
|
||||
int error;
|
||||
int written_config; /* set when we first write the config to disk */
|
||||
int driveno;
|
||||
struct drive *drive; /* point to current drive info */
|
||||
struct vinum_hdr *vhdr; /* and as header */
|
||||
char *config; /* point to config data */
|
||||
|
||||
/* don't save the configuration while we're still working on it */
|
||||
if (vinum_conf.flags & VF_CONFIGURING)
|
||||
return;
|
||||
written_config = 0; /* no config written yet */
|
||||
/* Build a volume header */
|
||||
vhdr = (struct vinum_hdr *) Malloc(VINUMHEADERLEN); /* get space for the config data */
|
||||
CHECKALLOC(vhdr, "Can't allocate config data");
|
||||
vhdr->magic = VINUM_MAGIC; /* magic number */
|
||||
vhdr->config_length = MAXCONFIG; /* length of following config info */
|
||||
|
||||
config = Malloc(MAXCONFIG); /* get space for the config data */
|
||||
CHECKALLOC(config, "Can't allocate config data");
|
||||
|
||||
format_config(config, MAXCONFIG);
|
||||
error = 0; /* no errors yet */
|
||||
for (driveno = 0; driveno < vinum_conf.drives_allocated; driveno++) {
|
||||
drive = &vinum_conf.drive[driveno]; /* point to drive */
|
||||
if (drive->state > drive_referenced) {
|
||||
LOCKDRIVE(drive); /* don't let it change */
|
||||
|
||||
/*
|
||||
* First, do some drive consistency checks. Some
|
||||
* of these are kludges, others require a process
|
||||
* context and couldn't be done before.
|
||||
*/
|
||||
if ((drive->devicename[0] == '\0')
|
||||
|| (drive->label.name[0] == '\0')) {
|
||||
unlockdrive(drive);
|
||||
free_drive(drive); /* get rid of it */
|
||||
break;
|
||||
}
|
||||
if (((drive->flags & VF_OPEN) == 0) /* drive not open */
|
||||
&&(drive->state > drive_down)) { /* and it thinks it's not down */
|
||||
unlockdrive(drive);
|
||||
set_drive_state(driveno, drive_down, setstate_force); /* tell it what's what */
|
||||
continue;
|
||||
}
|
||||
if ((drive->state == drive_down) /* it's down */
|
||||
&&(drive->flags & VF_OPEN)) { /* but open, */
|
||||
unlockdrive(drive);
|
||||
close_drive(drive); /* close it */
|
||||
} else if (drive->state > drive_down) {
|
||||
microtime(&drive->label.last_update); /* time of last update is now */
|
||||
bcopy((char *) &drive->label, /* and the label info from the drive structure */
|
||||
(char *) &vhdr->label,
|
||||
sizeof(vhdr->label));
|
||||
if ((drive->state != drive_unallocated)
|
||||
&& (drive->state != drive_referenced)) { /* and it's a real drive */
|
||||
error = write_drive(drive,
|
||||
(char *) vhdr,
|
||||
VINUMHEADERLEN,
|
||||
VINUM_LABEL_OFFSET);
|
||||
if (error == 0) /* first config copy */
|
||||
error = write_drive(drive,
|
||||
config,
|
||||
MAXCONFIG,
|
||||
VINUM_CONFIG_OFFSET);
|
||||
if (error == 0)
|
||||
error = write_drive(drive, /* second copy */
|
||||
config,
|
||||
MAXCONFIG,
|
||||
VINUM_CONFIG_OFFSET + MAXCONFIG);
|
||||
unlockdrive(drive);
|
||||
if (error) {
|
||||
log(LOG_ERR,
|
||||
"vinum: Can't write config to %s, error %d\n",
|
||||
drive->devicename,
|
||||
error);
|
||||
set_drive_state(drive->driveno, drive_down, setstate_force);
|
||||
} else
|
||||
written_config = 1; /* we've written it on at least one drive */
|
||||
}
|
||||
} else /* not worth looking at, */
|
||||
unlockdrive(drive); /* just unlock it again */
|
||||
}
|
||||
}
|
||||
Free(vhdr);
|
||||
Free(config);
|
||||
}
|
||||
|
||||
/*
|
||||
* Search disks on system for vinum slices and add
|
||||
* them to the configuuration if they're not
|
||||
* there already. devicename is a blank-separate
|
||||
* list of device names. If not provided, use
|
||||
* sysctl to get a list of all disks on the
|
||||
* system.
|
||||
*
|
||||
* Return an error indication.
|
||||
*/
|
||||
int
|
||||
vinum_scandisk(char *devicename)
|
||||
{
|
||||
struct drive *volatile drive;
|
||||
volatile int driveno;
|
||||
int firstdrive; /* first drive in this list */
|
||||
volatile int gooddrives; /* number of usable drives found */
|
||||
int firsttime; /* set if we have never configured before */
|
||||
int error;
|
||||
char *config_text; /* read the config info from disk into here */
|
||||
char *volatile cptr; /* pointer into config information */
|
||||
char *eptr; /* end pointer into config information */
|
||||
char *config_line; /* copy the config line to */
|
||||
volatile int status;
|
||||
int *drivelist; /* list of drive indices */
|
||||
char *partname; /* for creating partition names */
|
||||
char *cp; /* pointer to start of disk name */
|
||||
char *ep; /* and to first char after name */
|
||||
char *np; /* name pointer in naem we build */
|
||||
size_t alloclen;
|
||||
int malloced;
|
||||
int partnamelen; /* length of partition name */
|
||||
int drives;
|
||||
int goodpart; /* good vinum drives on this disk */
|
||||
|
||||
malloced = 0; /* devicename not malloced */
|
||||
if (devicename == NULL) { /* no devices specified, */
|
||||
/* get a list of all disks in the system */
|
||||
/* Get size of disk list */
|
||||
error = kernel_sysctlbyname(&thread0, "kern.disks", NULL,
|
||||
NULL, NULL, 0, &alloclen);
|
||||
if (error) {
|
||||
log(LOG_ERR, "vinum: can't get disk list: %d\n", error);
|
||||
return EINVAL;
|
||||
}
|
||||
devicename = Malloc(alloclen);
|
||||
if (devicename == NULL) {
|
||||
printf("vinum: can't allocate memory for drive list");
|
||||
return ENOMEM;
|
||||
} else
|
||||
malloced = 1;
|
||||
/* Now get the list of disks */
|
||||
kernel_sysctlbyname(&thread0, "kern.disks", devicename,
|
||||
&alloclen, NULL, 0, NULL);
|
||||
}
|
||||
status = 0; /* success indication */
|
||||
vinum_conf.flags |= VF_READING_CONFIG; /* reading config from disk */
|
||||
partname = Malloc(MAXPATHLEN); /* extract name of disk here */
|
||||
if (partname == NULL) {
|
||||
printf("vinum_scandisk: can't allocate memory for drive name");
|
||||
return ENOMEM;
|
||||
}
|
||||
gooddrives = 0; /* number of usable drives found */
|
||||
firstdrive = vinum_conf.drives_used; /* the first drive */
|
||||
firsttime = vinum_conf.drives_used == 0; /* are we a virgin? */
|
||||
|
||||
/* allocate a drive pointer list */
|
||||
drives = 256; /* should be enough for most cases */
|
||||
drivelist = (int *) Malloc(drives * sizeof(int));
|
||||
CHECKALLOC(drivelist, "Can't allocate memory");
|
||||
error = lock_config(); /* make sure we're alone here */
|
||||
if (error)
|
||||
return error;
|
||||
error = setjmp(command_fail); /* come back here on error */
|
||||
if (error) /* longjmped out */
|
||||
return error;
|
||||
|
||||
/* Open all drives and find which was modified most recently */
|
||||
for (cp = devicename; *cp; cp = ep) {
|
||||
char part; /* UNIX partition */
|
||||
#ifdef __i386__
|
||||
int slice;
|
||||
#endif
|
||||
|
||||
while (*cp == ' ')
|
||||
cp++; /* find start of name */
|
||||
if (*cp == '\0') /* done, */
|
||||
break;
|
||||
ep = cp;
|
||||
while (*ep && (*ep != ' ')) /* find end of name */
|
||||
ep++;
|
||||
|
||||
np = partname; /* start building up a name here */
|
||||
if (*cp != '/') { /* name doesn't start with /, */
|
||||
strcpy(np, "/dev/"); /* assume /dev */
|
||||
np += strlen("/dev/");
|
||||
}
|
||||
memcpy(np, cp, ep - cp); /* put in name */
|
||||
np += ep - cp; /* and point past */
|
||||
|
||||
goodpart = 0; /* no partitions on this disk yet */
|
||||
partnamelen = MAXPATHLEN + np - partname; /* remaining length in partition name */
|
||||
#ifdef __i386__
|
||||
/* first try the partition table */
|
||||
for (slice = 1; slice < 5; slice++)
|
||||
for (part = 'a'; part < 'i'; part++) {
|
||||
if (part != 'c') { /* don't do the c partition */
|
||||
snprintf(np,
|
||||
partnamelen,
|
||||
"s%d%c",
|
||||
slice,
|
||||
part);
|
||||
drive = check_drive(partname); /* try to open it */
|
||||
if (drive) { /* got something, */
|
||||
if (drive->flags & VF_CONFIGURED) /* already read this config, */
|
||||
log(LOG_WARNING,
|
||||
"vinum: already read config from %s\n", /* say so */
|
||||
drive->label.name);
|
||||
else {
|
||||
if (gooddrives == drives) /* ran out of entries */
|
||||
EXPAND(drivelist, int, drives, drives); /* double the size */
|
||||
drivelist[gooddrives] = drive->driveno; /* keep the drive index */
|
||||
drive->flags &= ~VF_NEWBORN; /* which is no longer newly born */
|
||||
gooddrives++;
|
||||
goodpart++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
/*
|
||||
* If the machine doesn't have a BIOS
|
||||
* partition table, try normal devices.
|
||||
*/
|
||||
if (goodpart == 0) { /* didn't find anything, */
|
||||
for (part = 'a'; part < 'i'; part++) /* try the compatibility partition */
|
||||
if (part != 'c') { /* don't do the c partition */
|
||||
snprintf(np,
|
||||
partnamelen,
|
||||
"%c",
|
||||
part);
|
||||
drive = check_drive(partname); /* try to open it */
|
||||
if (drive) { /* got something, */
|
||||
if (drive->flags & VF_CONFIGURED) /* already read this config, */
|
||||
log(LOG_WARNING,
|
||||
"vinum: already read config from %s\n", /* say so */
|
||||
drive->label.name);
|
||||
else {
|
||||
if (gooddrives == drives) /* ran out of entries */
|
||||
EXPAND(drivelist, int, drives, drives); /* double the size */
|
||||
drivelist[gooddrives] = drive->driveno; /* keep the drive index */
|
||||
drive->flags &= ~VF_NEWBORN; /* which is no longer newly born */
|
||||
gooddrives++;
|
||||
goodpart++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Free(partname);
|
||||
|
||||
if (gooddrives == 0) {
|
||||
if (firsttime)
|
||||
log(LOG_WARNING, "vinum: no drives found\n");
|
||||
else
|
||||
log(LOG_INFO, "vinum: no additional drives found\n");
|
||||
if (malloced)
|
||||
Free(devicename);
|
||||
unlock_config();
|
||||
return ENOENT;
|
||||
}
|
||||
/*
|
||||
* We now have at least one drive open. Sort
|
||||
* them in order of config time and merge the
|
||||
* config info with what we have already.
|
||||
*/
|
||||
qsort(drivelist, gooddrives, sizeof(int), drivecmp);
|
||||
config_text = (char *) Malloc(MAXCONFIG * 2); /* allocate buffers */
|
||||
CHECKALLOC(config_text, "Can't allocate memory");
|
||||
config_line = (char *) Malloc(MAXCONFIGLINE * 2); /* allocate buffers */
|
||||
CHECKALLOC(config_line, "Can't allocate memory");
|
||||
for (driveno = 0; driveno < gooddrives; driveno++) { /* now include the config */
|
||||
drive = &DRIVE[drivelist[driveno]]; /* point to the drive */
|
||||
|
||||
if (firsttime && (driveno == 0)) /* we've never configured before, */
|
||||
log(LOG_INFO, "vinum: reading configuration from %s\n", drive->devicename);
|
||||
else
|
||||
log(LOG_INFO, "vinum: updating configuration from %s\n", drive->devicename);
|
||||
|
||||
if (drive->state == drive_up)
|
||||
/* Read in both copies of the configuration information */
|
||||
error = read_drive(drive, config_text, MAXCONFIG * 2, VINUM_CONFIG_OFFSET);
|
||||
else {
|
||||
error = EIO;
|
||||
printf("vinum_scandisk: %s is %s\n", drive->devicename, drive_state(drive->state));
|
||||
}
|
||||
|
||||
if (error != 0) {
|
||||
log(LOG_ERR, "vinum: Can't read device %s, error %d\n", drive->devicename, error);
|
||||
free_drive(drive); /* give it back */
|
||||
status = error;
|
||||
}
|
||||
/*
|
||||
* At this point, check that the two copies
|
||||
* are the same, and do something useful if
|
||||
* not. In particular, consider which is
|
||||
* newer, and what this means for the
|
||||
* integrity of the data on the drive.
|
||||
*/
|
||||
else {
|
||||
vinum_conf.drives_used++; /* another drive in use */
|
||||
/* Parse the configuration, and add it to the global configuration */
|
||||
for (cptr = config_text; *cptr != '\0';) { /* love this style(9) */
|
||||
volatile int parse_status; /* return value from parse_config */
|
||||
|
||||
for (eptr = config_line; (*cptr != '\n') && (*cptr != '\0');) /* until the end of the line */
|
||||
*eptr++ = *cptr++;
|
||||
*eptr = '\0'; /* and delimit */
|
||||
if (setjmp(command_fail) == 0) { /* come back here on error and continue */
|
||||
parse_status = parse_config(config_line, &keyword_set, 1); /* parse the config line */
|
||||
/*
|
||||
* parse_config recognizes referenced
|
||||
* drives and builds a drive entry for
|
||||
* them. This may expand the drive
|
||||
* table, thus invalidating the pointer.
|
||||
*/
|
||||
drive = &DRIVE[drivelist[driveno]]; /* point to the drive */
|
||||
|
||||
if (parse_status < 0) { /* error in config */
|
||||
/*
|
||||
* This config should have been parsed
|
||||
* in user space. If we run into
|
||||
* problems here, something serious is
|
||||
* afoot. Complain and let the user
|
||||
* snarf the config to see what's
|
||||
* wrong.
|
||||
*/
|
||||
log(LOG_ERR,
|
||||
"vinum: Config error on %s, aborting integration\n",
|
||||
drive->devicename);
|
||||
free_drive(drive); /* give it back */
|
||||
status = EINVAL;
|
||||
}
|
||||
}
|
||||
while (*cptr == '\n')
|
||||
cptr++; /* skip to next line */
|
||||
}
|
||||
}
|
||||
drive->flags |= VF_CONFIGURED; /* this drive's configuration is complete */
|
||||
}
|
||||
|
||||
Free(config_line);
|
||||
Free(config_text);
|
||||
Free(drivelist);
|
||||
vinum_conf.flags &= ~VF_READING_CONFIG; /* no longer reading from disk */
|
||||
if (status != 0)
|
||||
printf("vinum: couldn't read configuration");
|
||||
else
|
||||
updateconfig(VF_READING_CONFIG); /* update from disk config */
|
||||
if (malloced)
|
||||
Free(devicename);
|
||||
unlock_config();
|
||||
return status;
|
||||
}
|
||||
|
||||
/*
|
||||
* Compare the modification dates of the drives, for qsort.
|
||||
* Return 1 if a < b, 0 if a == b, 01 if a > b: in other
|
||||
* words, sort backwards.
|
||||
*/
|
||||
int
|
||||
drivecmp(const void *va, const void *vb)
|
||||
{
|
||||
const struct drive *a = &DRIVE[*(const int *) va];
|
||||
const struct drive *b = &DRIVE[*(const int *) vb];
|
||||
|
||||
if ((a->label.last_update.tv_sec == b->label.last_update.tv_sec)
|
||||
&& (a->label.last_update.tv_usec == b->label.last_update.tv_usec))
|
||||
return 0;
|
||||
else if ((a->label.last_update.tv_sec > b->label.last_update.tv_sec)
|
||||
|| ((a->label.last_update.tv_sec == b->label.last_update.tv_sec)
|
||||
&& (a->label.last_update.tv_usec > b->label.last_update.tv_usec)))
|
||||
return -1;
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
/* Local Variables: */
|
||||
/* fill-column: 50 */
|
||||
/* End: */
|
@ -1,154 +0,0 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: vinumio.h,v 1.23 2003/05/04 05:25:46 grog Exp grog $
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#define L 'F' /* ID letter of our ioctls */
|
||||
|
||||
#define MAX_IOCTL_REPLY 1024
|
||||
|
||||
#ifdef VINUMDEBUG
|
||||
struct debuginfo {
|
||||
int changeit;
|
||||
int param;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
enum objecttype {
|
||||
drive_object,
|
||||
sd_object,
|
||||
plex_object,
|
||||
volume_object,
|
||||
invalid_object
|
||||
};
|
||||
|
||||
/*
|
||||
* The state to set with VINUM_SETSTATE. Since each object has a
|
||||
* different set of states, we need to translate later.
|
||||
*/
|
||||
enum objectstate {
|
||||
object_down,
|
||||
object_initializing,
|
||||
object_initialized,
|
||||
object_up
|
||||
};
|
||||
|
||||
/*
|
||||
* This structure is used for modifying objects
|
||||
* (VINUM_SETSTATE, VINUM_REMOVE, VINUM_RESETSTATS, VINUM_ATTACH,
|
||||
* VINUM_DETACH, VINUM_REPLACE
|
||||
*/
|
||||
struct vinum_ioctl_msg {
|
||||
int index;
|
||||
enum objecttype type;
|
||||
enum objectstate state; /* state to set (VINUM_SETSTATE) */
|
||||
enum parityop op; /* for parity ops */
|
||||
int force; /* do it even if it doesn't make sense */
|
||||
int recurse; /* recurse (VINUM_REMOVE) */
|
||||
int verify; /* verify (initsd, rebuildparity) */
|
||||
int otherobject; /* superordinate object (attach),
|
||||
* replacement object (replace) */
|
||||
int rename; /* rename object (attach) */
|
||||
int64_t offset; /* offset of subdisk (for attach) */
|
||||
int blocksize; /* size of block to revive (bytes) */
|
||||
};
|
||||
|
||||
/* VINUM_CREATE returns a buffer of this kind */
|
||||
struct _ioctl_reply {
|
||||
int error;
|
||||
char msg[MAX_IOCTL_REPLY];
|
||||
};
|
||||
|
||||
struct vinum_rename_msg {
|
||||
int index;
|
||||
int recurse; /* rename subordinate objects too */
|
||||
enum objecttype type;
|
||||
char newname[MAXNAME]; /* new name to give to object */
|
||||
};
|
||||
|
||||
/* ioctl requests */
|
||||
#define BUFSIZE 1024 /* size of buffer, including continuations */
|
||||
#define VINUM_CREATE _IOC(IOC_IN | IOC_OUT, L, 64, BUFSIZE) /* configure vinum */
|
||||
#define VINUM_GETCONFIG _IOR(L, 65, struct __vinum_conf) /* get global config */
|
||||
#define VINUM_DRIVECONFIG _IOWR(L, 66, struct _drive) /* get drive config */
|
||||
#define VINUM_SDCONFIG _IOWR(L, 67, struct _sd) /* get subdisk config */
|
||||
#define VINUM_PLEXCONFIG _IOWR(L, 68, struct _plex) /* get plex config */
|
||||
#define VINUM_VOLCONFIG _IOWR(L, 69, struct _volume) /* get volume config */
|
||||
#define VINUM_PLEXSDCONFIG _IOWR(L, 70, struct _sd) /* get sd config for plex (plex, sdno) */
|
||||
#define VINUM_GETFREELIST _IOWR(L, 71, struct drive_freelist) /* get freelist element (drive, fe) */
|
||||
#define VINUM_SAVECONFIG _IOW(L, 72, int) /* write config to disk */
|
||||
#define VINUM_RESETCONFIG _IOC(0, L, 73, 0) /* trash config on disk */
|
||||
#define VINUM_INIT _IOC(0, L, 74, 0) /* read config from disk */
|
||||
#define VINUM_READCONFIG _IOC(IOC_IN | IOC_OUT, L, 75, BUFSIZE) /* read config from disk */
|
||||
#ifdef VINUMDEBUG
|
||||
#define VINUM_DEBUG _IOWR(L, 127, struct debuginfo) /* call the debugger from ioctl () */
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Start an object. Pass two integers:
|
||||
* msg [0] index in vinum_conf.<object>
|
||||
* msg [1] type of object (see below)
|
||||
*
|
||||
* Return ioctl_reply
|
||||
*/
|
||||
#define VINUM_SETSTATE _IOC(IOC_IN | IOC_OUT, L, 76, MAX_IOCTL_REPLY) /* start an object */
|
||||
#define VINUM_RELEASECONFIG _IOC(0, L, 77, 0) /* release locks and write config to disk */
|
||||
#define VINUM_STARTCONFIG _IOW(L, 78, int) /* start a configuration operation */
|
||||
#define VINUM_MEMINFO _IOR(L, 79, struct meminfo) /* get memory usage summary */
|
||||
#define VINUM_MALLOCINFO _IOWR(L, 80, struct mc) /* get specific malloc information [i] */
|
||||
#define VINUM_INITSD _IOW(L, 82, int) /* initialize a subdisk */
|
||||
#define VINUM_REMOVE _IOWR(L, 83, struct _ioctl_reply) /* remove an object */
|
||||
#define VINUM_READPOL _IOWR(L, 84, struct _ioctl_reply) /* set read policy */
|
||||
#define VINUM_SETSTATE_FORCE _IOC(IOC_IN | IOC_OUT, L, 85, MAX_IOCTL_REPLY) /* diddle object state */
|
||||
#define VINUM_RESETSTATS _IOWR(L, 86, struct _ioctl_reply) /* reset object stats */
|
||||
#define VINUM_ATTACH _IOWR(L, 87, struct _ioctl_reply) /* attach an object */
|
||||
#define VINUM_DETACH _IOWR(L, 88, struct _ioctl_reply) /* remove an object */
|
||||
|
||||
#define VINUM_RENAME _IOWR(L, 89, struct _ioctl_reply) /* rename an object */
|
||||
#define VINUM_REPLACE _IOWR(L, 90, struct _ioctl_reply) /* replace an object */
|
||||
|
||||
#ifdef VINUMDEBUG
|
||||
#define VINUM_RQINFO _IOWR(L, 91, struct rqinfo) /* get request info [i] from trace buffer */
|
||||
#endif
|
||||
|
||||
#define VINUM_DAEMON _IOC(0, L, 92, 0) /* perform the kernel part of Vinum daemon */
|
||||
#define VINUM_FINDDAEMON _IOC(0, L, 93, 0) /* check for presence of Vinum daemon */
|
||||
#define VINUM_SETDAEMON _IOW(L, 94, int) /* set daemon flags */
|
||||
#define VINUM_GETDAEMON _IOR(L, 95, int) /* get daemon flags */
|
||||
#define VINUM_PARITYOP _IOWR(L, 96, struct _ioctl_reply) /* check/rebuild RAID-4/5 parity */
|
||||
#define VINUM_MOVE _IOWR(L, 98, struct _ioctl_reply) /* move an object */
|
@ -1,960 +0,0 @@
|
||||
/*
|
||||
* XXX replace all the checks on object validity with
|
||||
* calls to valid<object>
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998, 1999
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* Parts copyright (c) 1997, 1998 Cybernet Corporation, NetMAX project.
|
||||
*
|
||||
* Written by Greg Lehey
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: vinumioctl.c,v 1.23 2003/05/23 01:02:22 grog Exp grog $
|
||||
*/
|
||||
|
||||
#include <dev/vinum/vinumhdr.h>
|
||||
#include <dev/vinum/request.h>
|
||||
|
||||
#ifdef VINUMDEBUG
|
||||
#include <sys/reboot.h>
|
||||
#endif
|
||||
|
||||
void attachobject(struct vinum_ioctl_msg *);
|
||||
void detachobject(struct vinum_ioctl_msg *);
|
||||
void renameobject(struct vinum_rename_msg *);
|
||||
void replaceobject(struct vinum_ioctl_msg *);
|
||||
void moveobject(struct vinum_ioctl_msg *);
|
||||
void setreadpol(struct vinum_ioctl_msg *);
|
||||
|
||||
jmp_buf command_fail; /* return on a failed command */
|
||||
|
||||
/* ioctl routine */
|
||||
int
|
||||
vinumioctl(struct cdev *dev,
|
||||
u_long cmd,
|
||||
caddr_t data,
|
||||
int flag,
|
||||
struct thread *td)
|
||||
{
|
||||
unsigned int objno;
|
||||
struct sd *sd;
|
||||
struct plex *plex;
|
||||
struct volume *vol;
|
||||
|
||||
/* First, decide what we're looking at */
|
||||
if ((minor(dev) == VINUM_SUPERDEV_MINOR)
|
||||
|| (minor(dev) == VINUM_DAEMON_MINOR))
|
||||
return vinum_super_ioctl(dev, cmd, data);
|
||||
else /* real device */
|
||||
switch (DEVTYPE(dev)) {
|
||||
case VINUM_SD_TYPE:
|
||||
case VINUM_SD2_TYPE: /* second half of sd namespace */
|
||||
objno = Sdno(dev);
|
||||
|
||||
sd = &SD[objno];
|
||||
|
||||
switch (cmd) {
|
||||
case DIOCGSECTORSIZE:
|
||||
*(u_int *) data = sd->sectorsize;
|
||||
return 0;
|
||||
|
||||
case DIOCGMEDIASIZE:
|
||||
*(u_int64_t *) data = sd->sectors * sd->sectorsize;
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* We don't have this stuff on hardware,
|
||||
* so just pretend to do it so that
|
||||
* utilities don't get upset.
|
||||
*/
|
||||
case DIOCWDINFO: /* write partition info */
|
||||
case DIOCSDINFO: /* set partition info */
|
||||
return 0; /* not a titty */
|
||||
|
||||
default:
|
||||
return ENOTTY; /* not my kind of ioctl */
|
||||
}
|
||||
|
||||
return 0; /* pretend we did it */
|
||||
|
||||
case VINUM_PLEX_TYPE:
|
||||
objno = Plexno(dev);
|
||||
|
||||
plex = &PLEX[objno];
|
||||
|
||||
switch (cmd) {
|
||||
case DIOCGSECTORSIZE:
|
||||
*(u_int64_t *) data = plex->sectorsize;
|
||||
return 0;
|
||||
|
||||
case DIOCGMEDIASIZE:
|
||||
*(u_int64_t *) data = plex->length * plex->sectorsize;
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* We don't have this stuff on hardware,
|
||||
* so just pretend to do it so that
|
||||
* utilities don't get upset.
|
||||
*/
|
||||
case DIOCWDINFO: /* write partition info */
|
||||
case DIOCSDINFO: /* set partition info */
|
||||
return 0; /* not a titty */
|
||||
|
||||
default:
|
||||
return ENOTTY; /* not my kind of ioctl */
|
||||
}
|
||||
|
||||
return 0; /* pretend we did it */
|
||||
|
||||
case VINUM_VOLUME_TYPE:
|
||||
objno = Volno(dev);
|
||||
|
||||
if ((unsigned) objno >= (unsigned) vinum_conf.volumes_allocated) /* not a valid volume */
|
||||
return ENXIO;
|
||||
vol = &VOL[objno];
|
||||
if (vol->state != volume_up) /* not up, */
|
||||
return EIO; /* I/O error */
|
||||
|
||||
switch (cmd) {
|
||||
case DIOCGSECTORSIZE:
|
||||
*(u_int *) data = vol->sectorsize;
|
||||
return 0;
|
||||
|
||||
case DIOCGMEDIASIZE:
|
||||
*(u_int64_t *) data = vol->size * vol->sectorsize;
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* We don't have this stuff on hardware,
|
||||
* so just pretend to do it so that
|
||||
* utilities don't get upset.
|
||||
*/
|
||||
case DIOCWDINFO: /* write partition info */
|
||||
case DIOCSDINFO: /* set partition info */
|
||||
return 0; /* not a titty */
|
||||
|
||||
default:
|
||||
return ENOTTY; /* not my kind of ioctl */
|
||||
}
|
||||
break;
|
||||
}
|
||||
return 0; /* XXX */
|
||||
}
|
||||
|
||||
/* Handle ioctls for the super device */
|
||||
int
|
||||
vinum_super_ioctl(struct cdev *dev,
|
||||
u_long cmd,
|
||||
caddr_t data)
|
||||
{
|
||||
int error = 0;
|
||||
unsigned int index; /* for transferring config info */
|
||||
unsigned int sdno; /* for transferring config info */
|
||||
int fe; /* free list element number */
|
||||
struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) data; /* struct to return */
|
||||
|
||||
ioctl_reply = (struct _ioctl_reply *) data; /* save the address to reply to */
|
||||
if (error) /* bombed out */
|
||||
return 0; /* the reply will contain meaningful info */
|
||||
switch (cmd) {
|
||||
#ifdef VINUMDEBUG
|
||||
case VINUM_DEBUG:
|
||||
if (((struct debuginfo *) data)->changeit) /* change debug settings */
|
||||
debug = (((struct debuginfo *) data)->param);
|
||||
else {
|
||||
if (debug & DEBUG_REMOTEGDB)
|
||||
boothowto |= RB_GDB; /* serial debug line */
|
||||
else
|
||||
boothowto &= ~RB_GDB; /* local ddb */
|
||||
kdb_enter("vinum debug");
|
||||
}
|
||||
ioctl_reply = (struct _ioctl_reply *) data; /* reinstate the address to reply to */
|
||||
ioctl_reply->error = 0;
|
||||
return 0;
|
||||
#endif
|
||||
|
||||
case VINUM_CREATE: /* create a vinum object */
|
||||
error = lock_config(); /* get the config for us alone */
|
||||
if (error) /* can't do it, */
|
||||
return error; /* give up */
|
||||
error = setjmp(command_fail); /* come back here on error */
|
||||
if (error == 0) /* first time, */
|
||||
ioctl_reply->error = parse_user_config((char *) data, /* update the config */
|
||||
&keyword_set);
|
||||
else if (ioctl_reply->error == 0) { /* longjmp, but no error status */
|
||||
ioctl_reply->error = EINVAL; /* note that something's up */
|
||||
ioctl_reply->msg[0] = '\0'; /* no message? */
|
||||
}
|
||||
unlock_config();
|
||||
return 0; /* must be 0 to return the real error info */
|
||||
|
||||
case VINUM_GETCONFIG: /* get the configuration information */
|
||||
bcopy(&vinum_conf, data, sizeof(vinum_conf));
|
||||
return 0;
|
||||
|
||||
/* start configuring the subsystem */
|
||||
case VINUM_STARTCONFIG:
|
||||
return start_config(*(int *) data); /* just lock it. Parameter is 'force' */
|
||||
|
||||
/*
|
||||
* Move the individual parts of the config to user space.
|
||||
*
|
||||
* Specify the index of the object in the first word of data,
|
||||
* and return the object there
|
||||
*/
|
||||
case VINUM_DRIVECONFIG:
|
||||
index = *(int *) data; /* get the index */
|
||||
if (index >= (unsigned) vinum_conf.drives_allocated) /* can't do it */
|
||||
return ENXIO; /* bang */
|
||||
bcopy(&DRIVE[index], data, sizeof(struct _drive)); /* copy the config item out */
|
||||
return 0;
|
||||
|
||||
case VINUM_SDCONFIG:
|
||||
index = *(int *) data; /* get the index */
|
||||
if (index >= (unsigned) vinum_conf.subdisks_allocated) /* can't do it */
|
||||
return ENXIO; /* bang */
|
||||
bcopy(&SD[index], data, sizeof(struct _sd)); /* copy the config item out */
|
||||
return 0;
|
||||
|
||||
case VINUM_PLEXCONFIG:
|
||||
index = *(int *) data; /* get the index */
|
||||
if (index >= (unsigned) vinum_conf.plexes_allocated) /* can't do it */
|
||||
return ENXIO; /* bang */
|
||||
bcopy(&PLEX[index], data, sizeof(struct _plex)); /* copy the config item out */
|
||||
return 0;
|
||||
|
||||
case VINUM_VOLCONFIG:
|
||||
index = *(int *) data; /* get the index */
|
||||
if (index >= (unsigned) vinum_conf.volumes_allocated) /* can't do it */
|
||||
return ENXIO; /* bang */
|
||||
bcopy(&VOL[index], data, sizeof(struct _volume)); /* copy the config item out */
|
||||
return 0;
|
||||
|
||||
case VINUM_PLEXSDCONFIG:
|
||||
index = *(int *) data; /* get the plex index */
|
||||
sdno = ((int *) data)[1]; /* and the sd index */
|
||||
if ((index >= (unsigned) vinum_conf.plexes_allocated) /* plex doesn't exist */
|
||||
||(sdno >= PLEX[index].subdisks)) /* or it doesn't have this many subdisks */
|
||||
return ENXIO; /* bang */
|
||||
bcopy(&SD[PLEX[index].sdnos[sdno]], /* copy the config item out */
|
||||
data,
|
||||
sizeof(struct _sd));
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* We get called in two places: one from the
|
||||
* userland config routines, which call us
|
||||
* to complete the config and save it. This
|
||||
* call supplies the value 0 as a parameter.
|
||||
*
|
||||
* The other place is from the user "saveconfig"
|
||||
* routine, which can only work if we're *not*
|
||||
* configuring. In this case, supply parameter 1.
|
||||
*/
|
||||
case VINUM_SAVECONFIG:
|
||||
if (VFLAGS & VF_CONFIGURING) { /* must be us, the others are asleep */
|
||||
if (*(int *) data == 0) /* finish config */
|
||||
finish_config(1); /* finish the configuration and update it */
|
||||
else
|
||||
return EBUSY; /* can't do it now */
|
||||
}
|
||||
save_config(); /* save configuration to disk */
|
||||
return 0;
|
||||
|
||||
case VINUM_RELEASECONFIG: /* release the config */
|
||||
if (VFLAGS & VF_CONFIGURING) { /* must be us, the others are asleep */
|
||||
finish_config(0); /* finish the configuration, don't change it */
|
||||
save_config(); /* save configuration to disk */
|
||||
} else
|
||||
error = EINVAL; /* release what config? */
|
||||
return error;
|
||||
|
||||
case VINUM_READCONFIG:
|
||||
if (((char *) data)[0] == '\0')
|
||||
ioctl_reply->error = vinum_scandisk(NULL); /* built your own list */
|
||||
else
|
||||
ioctl_reply->error = vinum_scandisk((char *) data);
|
||||
if (ioctl_reply->error == ENOENT) {
|
||||
if (vinum_conf.drives_used > 0)
|
||||
strcpy(ioctl_reply->msg, "no additional drives found");
|
||||
else
|
||||
strcpy(ioctl_reply->msg, "no drives found");
|
||||
} else if (ioctl_reply->error)
|
||||
strcpy(ioctl_reply->msg, "can't read configuration information, see log file");
|
||||
return 0; /* must be 0 to return the real error info */
|
||||
|
||||
case VINUM_INIT:
|
||||
ioctl_reply = (struct _ioctl_reply *) data; /* reinstate the address to reply to */
|
||||
ioctl_reply->error = 0;
|
||||
return 0;
|
||||
|
||||
case VINUM_RESETCONFIG:
|
||||
if (vinum_inactive(0)) { /* if the volumes are not active */
|
||||
/*
|
||||
* Note the open count. We may be called from v, so we'll be open.
|
||||
* Keep the count so we don't underflow
|
||||
*/
|
||||
free_vinum(1); /* clean up everything */
|
||||
log(LOG_NOTICE, "vinum: CONFIGURATION OBLITERATED\n");
|
||||
ioctl_reply = (struct _ioctl_reply *) data; /* reinstate the address to reply to */
|
||||
ioctl_reply->error = 0;
|
||||
return 0;
|
||||
}
|
||||
return EBUSY;
|
||||
|
||||
case VINUM_SETSTATE:
|
||||
setstate((struct vinum_ioctl_msg *) data); /* set an object state */
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Set state by force, without changing
|
||||
* anything else.
|
||||
*/
|
||||
case VINUM_SETSTATE_FORCE:
|
||||
setstate_by_force((struct vinum_ioctl_msg *) data); /* set an object state */
|
||||
return 0;
|
||||
|
||||
#ifdef VINUMDEBUG
|
||||
case VINUM_MEMINFO:
|
||||
vinum_meminfo(data);
|
||||
return 0;
|
||||
|
||||
case VINUM_MALLOCINFO:
|
||||
return vinum_mallocinfo(data);
|
||||
|
||||
case VINUM_RQINFO:
|
||||
return vinum_rqinfo(data);
|
||||
#endif
|
||||
|
||||
case VINUM_REMOVE:
|
||||
remove((struct vinum_ioctl_msg *) data); /* remove an object */
|
||||
return 0;
|
||||
|
||||
case VINUM_GETFREELIST: /* get a drive free list element */
|
||||
index = *(int *) data; /* get the drive index */
|
||||
fe = ((int *) data)[1]; /* and the free list element */
|
||||
if ((index >= (unsigned) vinum_conf.drives_allocated) /* plex doesn't exist */
|
||||
||(DRIVE[index].state == drive_unallocated))
|
||||
return ENODEV;
|
||||
if (fe >= DRIVE[index].freelist_entries) /* no such entry */
|
||||
return ENOENT;
|
||||
bcopy(&DRIVE[index].freelist[fe],
|
||||
data,
|
||||
sizeof(struct drive_freelist));
|
||||
return 0;
|
||||
|
||||
case VINUM_RESETSTATS:
|
||||
resetstats((struct vinum_ioctl_msg *) data); /* reset object stats */
|
||||
return 0;
|
||||
|
||||
/* attach an object to a superordinate object */
|
||||
case VINUM_ATTACH:
|
||||
attachobject((struct vinum_ioctl_msg *) data);
|
||||
return 0;
|
||||
|
||||
/* detach an object from a superordinate object */
|
||||
case VINUM_DETACH:
|
||||
detachobject((struct vinum_ioctl_msg *) data);
|
||||
return 0;
|
||||
|
||||
/* rename an object */
|
||||
case VINUM_RENAME:
|
||||
renameobject((struct vinum_rename_msg *) data);
|
||||
return 0;
|
||||
|
||||
/* replace an object */
|
||||
case VINUM_REPLACE:
|
||||
replaceobject((struct vinum_ioctl_msg *) data);
|
||||
return 0;
|
||||
|
||||
case VINUM_DAEMON:
|
||||
vinum_daemon(); /* perform the daemon */
|
||||
return 0;
|
||||
|
||||
case VINUM_FINDDAEMON: /* check for presence of daemon */
|
||||
return vinum_finddaemon();
|
||||
return 0;
|
||||
|
||||
case VINUM_SETDAEMON: /* set daemon flags */
|
||||
return vinum_setdaemonopts(*(int *) data);
|
||||
|
||||
case VINUM_GETDAEMON: /* get daemon flags */
|
||||
*(int *) data = daemon_options;
|
||||
return 0;
|
||||
|
||||
case VINUM_PARITYOP: /* check/rebuild RAID-4/5 parity */
|
||||
parityops((struct vinum_ioctl_msg *) data);
|
||||
return 0;
|
||||
|
||||
/* move an object */
|
||||
case VINUM_MOVE:
|
||||
moveobject((struct vinum_ioctl_msg *) data);
|
||||
return 0;
|
||||
|
||||
case VINUM_READPOL:
|
||||
setreadpol((struct vinum_ioctl_msg *) data);
|
||||
return 0;
|
||||
|
||||
default:
|
||||
/* FALLTHROUGH */
|
||||
break;
|
||||
}
|
||||
return 0; /* to keep the compiler happy */
|
||||
}
|
||||
|
||||
/*
|
||||
* The following four functions check the supplied
|
||||
* object index and return a pointer to the object
|
||||
* if it exists. Otherwise they longjump out via
|
||||
* throw_rude_remark.
|
||||
*/
|
||||
struct drive *
|
||||
validdrive(int driveno, struct _ioctl_reply *reply)
|
||||
{
|
||||
if ((driveno < vinum_conf.drives_allocated)
|
||||
&& (DRIVE[driveno].state > drive_referenced))
|
||||
return &DRIVE[driveno];
|
||||
strcpy(reply->msg, "No such drive");
|
||||
reply->error = ENOENT;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct sd *
|
||||
validsd(int sdno, struct _ioctl_reply *reply)
|
||||
{
|
||||
if ((sdno < vinum_conf.subdisks_allocated)
|
||||
&& (SD[sdno].state > sd_referenced))
|
||||
return &SD[sdno];
|
||||
strcpy(reply->msg, "No such subdisk");
|
||||
reply->error = ENOENT;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct plex *
|
||||
validplex(int plexno, struct _ioctl_reply *reply)
|
||||
{
|
||||
if ((plexno < vinum_conf.plexes_allocated)
|
||||
&& (PLEX[plexno].state > plex_referenced))
|
||||
return &PLEX[plexno];
|
||||
strcpy(reply->msg, "No such plex");
|
||||
reply->error = ENOENT;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct volume *
|
||||
validvol(int volno, struct _ioctl_reply *reply)
|
||||
{
|
||||
if ((volno < vinum_conf.volumes_allocated)
|
||||
&& (VOL[volno].state > volume_uninit))
|
||||
return &VOL[volno];
|
||||
strcpy(reply->msg, "No such volume");
|
||||
reply->error = ENOENT;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* reset an object's stats */
|
||||
void
|
||||
resetstats(struct vinum_ioctl_msg *msg)
|
||||
{
|
||||
struct _ioctl_reply *reply = (struct _ioctl_reply *) msg;
|
||||
|
||||
switch (msg->type) {
|
||||
case drive_object:
|
||||
if (msg->index < vinum_conf.drives_allocated) {
|
||||
struct drive *drive = &DRIVE[msg->index];
|
||||
if (drive->state > drive_referenced) {
|
||||
drive->reads = 0; /* number of reads on this drive */
|
||||
drive->writes = 0; /* number of writes on this drive */
|
||||
drive->bytes_read = 0; /* number of bytes read */
|
||||
drive->bytes_written = 0; /* number of bytes written */
|
||||
reply->error = 0;
|
||||
return;
|
||||
}
|
||||
reply->error = EINVAL;
|
||||
return;
|
||||
}
|
||||
case sd_object:
|
||||
if (msg->index < vinum_conf.subdisks_allocated) {
|
||||
struct sd *sd = &SD[msg->index];
|
||||
if (sd->state > sd_referenced) {
|
||||
sd->reads = 0; /* number of reads on this subdisk */
|
||||
sd->writes = 0; /* number of writes on this subdisk */
|
||||
sd->bytes_read = 0; /* number of bytes read */
|
||||
sd->bytes_written = 0; /* number of bytes written */
|
||||
reply->error = 0;
|
||||
return;
|
||||
}
|
||||
reply->error = EINVAL;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
|
||||
case plex_object:
|
||||
if (msg->index < vinum_conf.plexes_allocated) {
|
||||
struct plex *plex = &PLEX[msg->index];
|
||||
if (plex->state > plex_referenced) {
|
||||
plex->reads = 0;
|
||||
plex->writes = 0; /* number of writes on this plex */
|
||||
plex->bytes_read = 0; /* number of bytes read */
|
||||
plex->bytes_written = 0; /* number of bytes written */
|
||||
plex->recovered_reads = 0; /* number of recovered read operations */
|
||||
plex->degraded_writes = 0; /* number of degraded writes */
|
||||
plex->parityless_writes = 0; /* number of parityless writes */
|
||||
plex->multiblock = 0; /* requests that needed more than one block */
|
||||
plex->multistripe = 0; /* requests that needed more than one stripe */
|
||||
reply->error = 0;
|
||||
return;
|
||||
}
|
||||
reply->error = EINVAL;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
|
||||
case volume_object:
|
||||
if (msg->index < vinum_conf.volumes_allocated) {
|
||||
struct volume *vol = &VOL[msg->index];
|
||||
if (vol->state > volume_uninit) {
|
||||
vol->bytes_read = 0; /* number of bytes read */
|
||||
vol->bytes_written = 0; /* number of bytes written */
|
||||
vol->reads = 0; /* number of reads on this volume */
|
||||
vol->writes = 0; /* number of writes on this volume */
|
||||
vol->recovered_reads = 0; /* reads recovered from another plex */
|
||||
reply->error = 0;
|
||||
return;
|
||||
}
|
||||
reply->error = EINVAL;
|
||||
return;
|
||||
}
|
||||
case invalid_object: /* can't get this */
|
||||
reply->error = EINVAL;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* attach an object to a superior object */
|
||||
void
|
||||
attachobject(struct vinum_ioctl_msg *msg)
|
||||
{
|
||||
struct _ioctl_reply *reply = (struct _ioctl_reply *) msg;
|
||||
int sdno;
|
||||
struct sd *sd;
|
||||
struct plex *plex;
|
||||
struct volume *vol;
|
||||
|
||||
switch (msg->type) {
|
||||
case drive_object: /* you can't attach a drive to anything */
|
||||
case volume_object: /* nor a volume */
|
||||
case invalid_object: /* "this can't happen" */
|
||||
reply->error = EINVAL;
|
||||
reply->msg[0] = '\0'; /* vinum(8) doesn't do this */
|
||||
return;
|
||||
|
||||
case sd_object:
|
||||
sd = validsd(msg->index, reply);
|
||||
if (sd == NULL) /* not a valid subdisk */
|
||||
return;
|
||||
plex = validplex(msg->otherobject, reply);
|
||||
if (plex) {
|
||||
/*
|
||||
* We should be more intelligent about this.
|
||||
* We should be able to reattach a dead
|
||||
* subdisk, but if we want to increase the total
|
||||
* number of subdisks, we have a lot of reshuffling
|
||||
* to do. XXX
|
||||
*/
|
||||
if ((plex->organization != plex_concat) /* can't attach to striped and RAID-4/5 */
|
||||
&&(!msg->force)) { /* without using force */
|
||||
reply->error = EINVAL; /* no message, the user should check */
|
||||
strcpy(reply->msg, "Can't attach to this plex organization");
|
||||
} else if (sd->plexno >= 0) { /* already belong to a plex */
|
||||
reply->error = EBUSY; /* no message, the user should check */
|
||||
sprintf(reply->msg, "%s is already attached to %s",
|
||||
sd->name,
|
||||
sd[sd->plexno].name);
|
||||
reply->msg[0] = '\0';
|
||||
} else {
|
||||
sd->plexoffset = msg->offset; /* this is where we want it */
|
||||
set_sd_state(sd->sdno, sd_stale, setstate_force); /* make sure it's stale */
|
||||
give_sd_to_plex(plex->plexno, sd->sdno); /* and give it to the plex */
|
||||
update_sd_config(sd->sdno, 0);
|
||||
save_config();
|
||||
if (sd->state == sd_reviving)
|
||||
reply->error = EAGAIN; /* need to revive it */
|
||||
else
|
||||
reply->error = 0;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case plex_object:
|
||||
plex = validplex(msg->index, reply); /* get plex */
|
||||
if (plex == NULL)
|
||||
return;
|
||||
vol = validvol(msg->otherobject, reply); /* and volume information */
|
||||
if (vol) {
|
||||
if (vol->plexes == MAXPLEX) { /* we have too many already */
|
||||
reply->error = ENOSPC; /* nowhere to put it */
|
||||
strcpy(reply->msg, "Too many plexes");
|
||||
} else if (plex->volno >= 0) { /* the plex has an owner */
|
||||
reply->error = EBUSY; /* no message, the user should check */
|
||||
sprintf(reply->msg, "%s is already attached to %s",
|
||||
plex->name,
|
||||
VOL[plex->volno].name);
|
||||
} else {
|
||||
for (sdno = 0; sdno < plex->subdisks; sdno++) {
|
||||
sd = &SD[plex->sdnos[sdno]];
|
||||
|
||||
if (sd->state > sd_down) /* real subdisk, vaguely accessible */
|
||||
set_sd_state(plex->sdnos[sdno], sd_stale, setstate_force); /* make it stale */
|
||||
}
|
||||
set_plex_state(plex->plexno, plex_up, setstate_none); /* update plex state */
|
||||
give_plex_to_volume(msg->otherobject, msg->index, 0); /* and give it to the volume */
|
||||
update_plex_config(plex->plexno, 0);
|
||||
save_config();
|
||||
reply->error = 0; /* all went well */
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* detach an object from a superior object */
|
||||
void
|
||||
detachobject(struct vinum_ioctl_msg *msg)
|
||||
{
|
||||
struct _ioctl_reply *reply = (struct _ioctl_reply *) msg;
|
||||
struct sd *sd;
|
||||
struct plex *plex;
|
||||
struct volume *vol;
|
||||
int sdno;
|
||||
int plexno;
|
||||
|
||||
switch (msg->type) {
|
||||
case drive_object: /* you can't detach a drive from anything */
|
||||
case volume_object: /* nor a volume */
|
||||
case invalid_object: /* "this can't happen" */
|
||||
reply->error = EINVAL;
|
||||
reply->msg[0] = '\0'; /* vinum(8) doesn't do this */
|
||||
return;
|
||||
|
||||
case sd_object:
|
||||
sd = validsd(msg->index, reply);
|
||||
if (sd == NULL)
|
||||
return;
|
||||
if (sd->plexno < 0) { /* doesn't belong to a plex */
|
||||
reply->error = ENOENT;
|
||||
strcpy(reply->msg, "Subdisk is not attached");
|
||||
return;
|
||||
} else { /* valid plex number */
|
||||
plex = &PLEX[sd->plexno];
|
||||
if ((!msg->force) /* don't force things */
|
||||
&&((plex->state == plex_up) /* and the plex is up */
|
||||
||((plex->state == plex_flaky) && sd->state == sd_up))) { /* or flaky with this sd up */
|
||||
reply->error = EBUSY; /* we need this sd */
|
||||
reply->msg[0] = '\0';
|
||||
return;
|
||||
}
|
||||
sd->plexno = -1; /* anonymous sd */
|
||||
if (plex->subdisks == 1) { /* this was the only subdisk */
|
||||
Free(plex->sdnos); /* free the subdisk array */
|
||||
plex->sdnos = NULL; /* and note the fact */
|
||||
plex->subdisks_allocated = 0; /* no subdisk space */
|
||||
} else {
|
||||
for (sdno = 0; sdno < plex->subdisks; sdno++) {
|
||||
if (plex->sdnos[sdno] == msg->index) /* found our subdisk */
|
||||
break;
|
||||
}
|
||||
if (sdno < (plex->subdisks - 1)) /* not the last one, compact */
|
||||
bcopy(&plex->sdnos[sdno + 1],
|
||||
&plex->sdnos[sdno],
|
||||
(plex->subdisks - 1 - sdno) * sizeof(int));
|
||||
}
|
||||
plex->subdisks--;
|
||||
if (!bcmp(plex->name, sd->name, strlen(plex->name) + 1))
|
||||
/* this subdisk is named after the plex */
|
||||
{
|
||||
bcopy(sd->name,
|
||||
&sd->name[3],
|
||||
min(strlen(sd->name) + 1, MAXSDNAME - 3));
|
||||
bcopy("ex-", sd->name, 3);
|
||||
sd->name[MAXSDNAME - 1] = '\0';
|
||||
}
|
||||
update_plex_config(plex->plexno, 0);
|
||||
if (isstriped(plex)) /* we've just mutilated our plex, */
|
||||
set_plex_state(plex->plexno,
|
||||
plex_down,
|
||||
setstate_force | setstate_configuring);
|
||||
if (plex->volno >= 0) /* plex attached to volume, */
|
||||
update_volume_config(plex->volno);
|
||||
save_config();
|
||||
reply->error = 0;
|
||||
}
|
||||
return;
|
||||
|
||||
case plex_object:
|
||||
plex = validplex(msg->index, reply); /* get plex */
|
||||
if (plex == NULL)
|
||||
return;
|
||||
if (plex->volno >= 0) {
|
||||
int volno = plex->volno;
|
||||
|
||||
vol = &VOL[volno];
|
||||
if ((!msg->force) /* don't force things */
|
||||
&&((vol->state == volume_up) /* and the volume is up */
|
||||
&&(vol->plexes == 1))) { /* and this is the last plex */
|
||||
/*
|
||||
* XXX As elsewhere, check whether we will lose
|
||||
* mapping by removing this plex
|
||||
*/
|
||||
reply->error = EBUSY; /* we need this plex */
|
||||
reply->msg[0] = '\0';
|
||||
return;
|
||||
}
|
||||
plex->volno = -1; /* anonymous plex */
|
||||
for (plexno = 0; plexno < vol->plexes; plexno++) {
|
||||
if (vol->plex[plexno] == msg->index) /* found our plex */
|
||||
break;
|
||||
}
|
||||
if (plexno < (vol->plexes - 1)) /* not the last one, compact */
|
||||
bcopy(&vol->plex[plexno + 1],
|
||||
&vol->plex[plexno],
|
||||
(vol->plexes - 1 - plexno) * sizeof(int));
|
||||
vol->plexes--;
|
||||
vol->last_plex_read = 0; /* don't go beyond the end */
|
||||
if (!bcmp(vol->name, plex->name, strlen(vol->name) + 1))
|
||||
/* this plex is named after the volume */
|
||||
{
|
||||
/* First, check if the subdisks are the same */
|
||||
if (msg->recurse) {
|
||||
int sdno;
|
||||
|
||||
for (sdno = 0; sdno < plex->subdisks; sdno++) {
|
||||
struct sd *sd = &SD[plex->sdnos[sdno]];
|
||||
|
||||
if (!bcmp(plex->name, sd->name, strlen(plex->name) + 1))
|
||||
/* subdisk is named after the plex */
|
||||
{
|
||||
bcopy(sd->name,
|
||||
&sd->name[3],
|
||||
min(strlen(sd->name) + 1, MAXSDNAME - 3));
|
||||
bcopy("ex-", sd->name, 3);
|
||||
sd->name[MAXSDNAME - 1] = '\0';
|
||||
}
|
||||
}
|
||||
}
|
||||
bcopy(plex->name,
|
||||
&plex->name[3],
|
||||
min(strlen(plex->name) + 1, MAXPLEXNAME - 3));
|
||||
bcopy("ex-", plex->name, 3);
|
||||
plex->name[MAXPLEXNAME - 1] = '\0';
|
||||
}
|
||||
update_volume_config(volno);
|
||||
save_config();
|
||||
reply->error = 0;
|
||||
} else {
|
||||
reply->error = ENOENT;
|
||||
strcpy(reply->msg, "Plex is not attached");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
renameobject(struct vinum_rename_msg *msg)
|
||||
{
|
||||
struct _ioctl_reply *reply = (struct _ioctl_reply *) msg;
|
||||
struct drive *drive;
|
||||
struct sd *sd;
|
||||
struct plex *plex;
|
||||
struct volume *vol;
|
||||
|
||||
switch (msg->type) {
|
||||
case drive_object: /* you can't attach a drive to anything */
|
||||
if (find_drive(msg->newname, 0) >= 0) { /* we have that name already, */
|
||||
reply->error = EEXIST;
|
||||
reply->msg[0] = '\0';
|
||||
return;
|
||||
}
|
||||
drive = validdrive(msg->index, reply);
|
||||
if (drive) {
|
||||
bcopy(msg->newname, drive->label.name, MAXDRIVENAME);
|
||||
save_config();
|
||||
reply->error = 0;
|
||||
}
|
||||
return;
|
||||
|
||||
case sd_object: /* you can't attach a subdisk to anything */
|
||||
if (find_subdisk(msg->newname, 0) >= 0) { /* we have that name already, */
|
||||
reply->error = EEXIST;
|
||||
reply->msg[0] = '\0';
|
||||
return;
|
||||
}
|
||||
sd = validsd(msg->index, reply);
|
||||
if (sd) {
|
||||
bcopy(msg->newname, sd->name, MAXSDNAME);
|
||||
update_sd_config(sd->sdno, 0);
|
||||
save_config();
|
||||
reply->error = 0;
|
||||
}
|
||||
return;
|
||||
|
||||
case plex_object: /* you can't attach a plex to anything */
|
||||
if (find_plex(msg->newname, 0) >= 0) { /* we have that name already, */
|
||||
reply->error = EEXIST;
|
||||
reply->msg[0] = '\0';
|
||||
return;
|
||||
}
|
||||
plex = validplex(msg->index, reply);
|
||||
if (plex) {
|
||||
bcopy(msg->newname, plex->name, MAXPLEXNAME);
|
||||
update_plex_config(plex->plexno, 0);
|
||||
save_config();
|
||||
reply->error = 0;
|
||||
}
|
||||
return;
|
||||
|
||||
case volume_object: /* you can't attach a volume to anything */
|
||||
if (find_volume(msg->newname, 0) >= 0) { /* we have that name already, */
|
||||
reply->error = EEXIST;
|
||||
reply->msg[0] = '\0';
|
||||
return;
|
||||
}
|
||||
vol = validvol(msg->index, reply);
|
||||
if (vol) {
|
||||
bcopy(msg->newname, vol->name, MAXVOLNAME);
|
||||
update_volume_config(msg->index);
|
||||
save_config();
|
||||
reply->error = 0;
|
||||
}
|
||||
return;
|
||||
|
||||
case invalid_object:
|
||||
reply->error = EINVAL;
|
||||
reply->msg[0] = '\0';
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Replace one object with another.
|
||||
* Currently only for drives.
|
||||
* message->index is the drive number of the old drive
|
||||
* message->otherobject is the drive number of the new drive
|
||||
*/
|
||||
void
|
||||
replaceobject(struct vinum_ioctl_msg *msg)
|
||||
{
|
||||
struct _ioctl_reply *reply = (struct _ioctl_reply *) msg;
|
||||
|
||||
reply->error = ENODEV; /* until I know how to do this */
|
||||
strcpy(reply->msg, "replace not implemented yet");
|
||||
/* save_config (); */
|
||||
}
|
||||
|
||||
void
|
||||
moveobject(struct vinum_ioctl_msg *msg)
|
||||
{
|
||||
struct _ioctl_reply *reply = (struct _ioctl_reply *) msg;
|
||||
struct drive *drive;
|
||||
struct sd *sd;
|
||||
|
||||
/* Check that our objects are valid (i.e. they exist) */
|
||||
drive = validdrive(msg->index, (struct _ioctl_reply *) msg);
|
||||
if (drive == NULL)
|
||||
return;
|
||||
sd = validsd(msg->otherobject, (struct _ioctl_reply *) msg);
|
||||
if (sd == NULL)
|
||||
return;
|
||||
if (sd->driveno == msg->index) /* sd already belongs to drive */
|
||||
return;
|
||||
|
||||
if (sd->state > sd_stale)
|
||||
set_sd_state(sd->sdno, sd_stale, setstate_force); /* make the subdisk stale */
|
||||
else
|
||||
sd->state = sd_empty;
|
||||
if (sd->plexno >= 0) /* part of a plex, */
|
||||
update_plex_state(sd->plexno); /* update its state */
|
||||
|
||||
/* Return the space on the old drive */
|
||||
if ((sd->driveno >= 0) /* we have a drive, */
|
||||
&&(sd->sectors > 0)) /* and some space on it */
|
||||
return_drive_space(sd->driveno, /* return the space */
|
||||
sd->driveoffset,
|
||||
sd->sectors);
|
||||
|
||||
/* Reassign the old subdisk */
|
||||
sd->driveno = msg->index;
|
||||
sd->driveoffset = -1; /* let the drive decide where to put us */
|
||||
give_sd_to_drive(sd->sdno);
|
||||
reply->error = 0;
|
||||
}
|
||||
|
||||
void
|
||||
setreadpol(struct vinum_ioctl_msg *msg)
|
||||
{
|
||||
struct _ioctl_reply *reply = (struct _ioctl_reply *) msg;
|
||||
struct volume *vol;
|
||||
struct plex *plex;
|
||||
int myplexno = -1;
|
||||
|
||||
/* Check that our objects are valid (i.e. they exist) */
|
||||
vol = validvol(msg->index, reply);
|
||||
if (vol == NULL)
|
||||
return;
|
||||
|
||||
/* If a plex was specified, check that is is valid */
|
||||
if (msg->otherobject >= 0) {
|
||||
plex = validplex(msg->otherobject, reply);
|
||||
if (vol == NULL)
|
||||
return;
|
||||
|
||||
/* Is it attached to this volume? */
|
||||
myplexno = my_plex(msg->index, msg->otherobject);
|
||||
if (myplexno < 0) {
|
||||
strcpy(reply->msg, "Plex is not attached to volume");
|
||||
reply->error = ENOENT;
|
||||
return;
|
||||
}
|
||||
}
|
||||
lock_config();
|
||||
vol->preferred_plex = myplexno;
|
||||
save_config();
|
||||
unlock_config();
|
||||
reply->error = 0;
|
||||
}
|
||||
|
||||
/* Local Variables: */
|
||||
/* fill-column: 50 */
|
||||
/* End: */
|
@ -1,152 +0,0 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: vinumkw.h,v 1.20 2003/05/07 03:32:09 grog Exp grog $
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
/*
|
||||
* Command keywords that vinum knows. These include both user-level
|
||||
* and kernel-level stuff
|
||||
*/
|
||||
|
||||
/*
|
||||
* Our complete vocabulary. The names of the commands are
|
||||
* the same as the identifier without the kw_ at the beginning
|
||||
* (i.e. kw_create defines the "create" keyword). Preprocessor
|
||||
* magic in parser.c does the rest.
|
||||
*
|
||||
* To add a new word: put it in the table below and one of the
|
||||
* lists in vinumparser.c (probably keywords).
|
||||
*/
|
||||
enum keyword {
|
||||
kw_create,
|
||||
kw_modify,
|
||||
kw_list,
|
||||
kw_l = kw_list,
|
||||
kw_ld, /* list drive */
|
||||
kw_ls, /* list subdisk */
|
||||
kw_lp, /* list plex */
|
||||
kw_lv, /* list volume */
|
||||
kw_set,
|
||||
kw_rm,
|
||||
kw_mv, /* move object */
|
||||
kw_move, /* synonym for mv */
|
||||
kw_start,
|
||||
kw_stop,
|
||||
kw_makedev, /* make /dev/vinum devices */
|
||||
kw_setdaemon, /* set daemon flags */
|
||||
kw_getdaemon, /* set daemon flags */
|
||||
kw_help,
|
||||
kw_drive,
|
||||
kw_partition,
|
||||
kw_sd,
|
||||
kw_subdisk = kw_sd,
|
||||
kw_plex,
|
||||
kw_volume,
|
||||
kw_vol = kw_volume,
|
||||
kw_read,
|
||||
kw_readpol,
|
||||
kw_org,
|
||||
kw_name,
|
||||
kw_concat,
|
||||
kw_striped,
|
||||
kw_raid4,
|
||||
kw_raid5,
|
||||
kw_driveoffset,
|
||||
kw_plexoffset,
|
||||
kw_len,
|
||||
kw_length = kw_len,
|
||||
kw_size = kw_len,
|
||||
kw_state,
|
||||
kw_setupstate,
|
||||
kw_d, /* flag names */
|
||||
kw_f,
|
||||
kw_r,
|
||||
kw_s,
|
||||
kw_v,
|
||||
kw_w,
|
||||
kw_round, /* round robin */
|
||||
/*
|
||||
* The first of these is a volume attibute ("prefer plex"), and the
|
||||
* second is a plex attribute ("preferred" means that the volume
|
||||
* prefers this plex).
|
||||
*/
|
||||
kw_prefer, /* prefer plex */
|
||||
kw_preferred, /* preferred plex */
|
||||
kw_device,
|
||||
kw_init,
|
||||
kw_resetconfig,
|
||||
kw_writethrough,
|
||||
kw_writeback,
|
||||
kw_replace,
|
||||
kw_resetstats,
|
||||
kw_attach,
|
||||
kw_detach,
|
||||
kw_rename,
|
||||
kw_printconfig,
|
||||
kw_saveconfig,
|
||||
kw_hotspare,
|
||||
kw_detached,
|
||||
kw_debug, /* go into debugger */
|
||||
kw_stripe,
|
||||
kw_mirror,
|
||||
kw_info,
|
||||
kw_quit,
|
||||
kw_max,
|
||||
kw_setstate,
|
||||
kw_checkparity,
|
||||
kw_rebuildparity,
|
||||
kw_dumpconfig,
|
||||
kw_retryerrors,
|
||||
kw_invalid_keyword = -1
|
||||
};
|
||||
|
||||
struct _keywords {
|
||||
char *name;
|
||||
enum keyword keyword;
|
||||
};
|
||||
|
||||
struct keywordset {
|
||||
int size;
|
||||
struct _keywords *k;
|
||||
};
|
||||
|
||||
extern struct _keywords keywords[];
|
||||
extern struct _keywords flag_keywords[];
|
||||
|
||||
extern struct keywordset keyword_set;
|
||||
extern struct keywordset flag_set;
|
||||
|
||||
/* Parser functions */
|
||||
|
||||
enum keyword get_keyword(char *, struct keywordset *);
|
||||
int tokenize(char *, char *[], int);
|
@ -1,266 +0,0 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* Parts copyright (c) 1997, 1998 Cybernet Corporation, NetMAX project.
|
||||
*
|
||||
* Written by Greg Lehey
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: vinumlock.c,v 1.19 2003/05/23 01:07:18 grog Exp $
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <dev/vinum/vinumhdr.h>
|
||||
#include <dev/vinum/request.h>
|
||||
|
||||
/* Lock a drive, wait if it's in use */
|
||||
#ifdef VINUMDEBUG
|
||||
int
|
||||
lockdrive(struct drive *drive, char *file, int line)
|
||||
#else
|
||||
int
|
||||
lockdrive(struct drive *drive)
|
||||
#endif
|
||||
{
|
||||
int error;
|
||||
|
||||
/* XXX get rid of drive->flags |= VF_LOCKING; */
|
||||
if ((drive->flags & VF_LOCKED) /* it's locked */
|
||||
&&(drive->pid == curproc->p_pid)) { /* by us! */
|
||||
#ifdef VINUMDEBUG
|
||||
log(LOG_WARNING,
|
||||
"vinum lockdrive: already locking %s from %s:%d, called from %s:%d\n",
|
||||
drive->label.name,
|
||||
drive->lockfilename,
|
||||
drive->lockline,
|
||||
basename(file),
|
||||
line);
|
||||
#else
|
||||
log(LOG_WARNING,
|
||||
"vinum lockdrive: already locking %s\n",
|
||||
drive->label.name);
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
while ((drive->flags & VF_LOCKED) != 0) {
|
||||
/*
|
||||
* There are problems sleeping on a unique identifier,
|
||||
* since the drive structure can move, and the unlock
|
||||
* function can be called after killing the drive.
|
||||
* Solve this by waiting on this function; the number
|
||||
* of conflicts is negligible.
|
||||
*/
|
||||
if ((error = tsleep(&lockdrive,
|
||||
PRIBIO,
|
||||
"vindrv",
|
||||
0)) != 0)
|
||||
return error;
|
||||
}
|
||||
drive->flags |= VF_LOCKED;
|
||||
drive->pid = curproc->p_pid; /* it's a panic error if curproc is null */
|
||||
#ifdef VINUMDEBUG
|
||||
bcopy(basename(file), drive->lockfilename, 15);
|
||||
drive->lockfilename[15] = '\0'; /* truncate if necessary */
|
||||
drive->lockline = line;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Unlock a drive and let the next one at it */
|
||||
void
|
||||
unlockdrive(struct drive *drive)
|
||||
{
|
||||
drive->flags &= ~VF_LOCKED;
|
||||
/* we don't reset pid: it's of hysterical interest */
|
||||
wakeup(&lockdrive);
|
||||
}
|
||||
|
||||
/* Lock a stripe of a plex, wait if it's in use */
|
||||
struct rangelock *
|
||||
lockrange(daddr_t stripe, struct buf *bp, struct plex *plex)
|
||||
{
|
||||
struct rangelock *lock;
|
||||
struct rangelock *pos; /* position of first free lock */
|
||||
int foundlocks; /* number of locks found */
|
||||
|
||||
/*
|
||||
* We could get by without counting the number
|
||||
* of locks we find, but we have a linear search
|
||||
* through a table which in most cases will be
|
||||
* empty. It's faster to stop when we've found
|
||||
* all the locks that are there. This is also
|
||||
* the reason why we put pos at the beginning
|
||||
* instead of the end, though it requires an
|
||||
* extra test.
|
||||
*/
|
||||
pos = NULL;
|
||||
foundlocks = 0;
|
||||
|
||||
/*
|
||||
* we can't use 0 as a valid address, so
|
||||
* increment all addresses by 1.
|
||||
*/
|
||||
stripe++;
|
||||
mtx_lock(plex->lockmtx);
|
||||
|
||||
/* Wait here if the table is full */
|
||||
while (plex->usedlocks == PLEX_LOCKS) /* all in use */
|
||||
msleep(&plex->usedlocks, plex->lockmtx, PRIBIO, "vlock", 0);
|
||||
|
||||
#ifdef DIAGNOSTIC
|
||||
if (plex->usedlocks >= PLEX_LOCKS)
|
||||
panic("lockrange: Too many locks in use");
|
||||
#endif
|
||||
|
||||
lock = plex->lock; /* pointer in lock table */
|
||||
if (plex->usedlocks > 0) /* something locked, */
|
||||
/* Search the lock table for our stripe */
|
||||
for (; lock < &plex->lock[PLEX_LOCKS]
|
||||
&& foundlocks < plex->usedlocks;
|
||||
lock++) {
|
||||
if (lock->stripe) { /* in use */
|
||||
foundlocks++; /* found another one in use */
|
||||
if ((lock->stripe == stripe) /* it's our stripe */
|
||||
&&(lock->bp != bp)) { /* but not our request */
|
||||
#ifdef VINUMDEBUG
|
||||
if (debug & DEBUG_LOCKREQS) {
|
||||
struct rangelockinfo lockinfo;
|
||||
|
||||
lockinfo.stripe = stripe;
|
||||
lockinfo.bp = bp;
|
||||
lockinfo.plexno = plex->plexno;
|
||||
logrq(loginfo_lockwait, (union rqinfou) &lockinfo, bp);
|
||||
}
|
||||
#endif
|
||||
plex->lockwaits++; /* waited one more time */
|
||||
msleep(lock, plex->lockmtx, PRIBIO, "vrlock", 0);
|
||||
lock = &plex->lock[-1]; /* start again */
|
||||
foundlocks = 0;
|
||||
pos = NULL;
|
||||
}
|
||||
} else if (pos == NULL) /* still looking for somewhere? */
|
||||
pos = lock; /* a place to put this one */
|
||||
}
|
||||
/*
|
||||
* This untidy looking code ensures that we'll
|
||||
* always end up pointing to the first free lock
|
||||
* entry, thus minimizing the number of
|
||||
* iterations necessary.
|
||||
*/
|
||||
if (pos == NULL) /* didn't find one on the way, */
|
||||
pos = lock; /* use the one we're pointing to */
|
||||
|
||||
/*
|
||||
* The address range is free, and we're pointing
|
||||
* to the first unused entry. Make it ours.
|
||||
*/
|
||||
pos->stripe = stripe;
|
||||
pos->bp = bp;
|
||||
plex->usedlocks++; /* one more lock */
|
||||
mtx_unlock(plex->lockmtx);
|
||||
#ifdef VINUMDEBUG
|
||||
if (debug & DEBUG_LOCKREQS) {
|
||||
struct rangelockinfo lockinfo;
|
||||
|
||||
lockinfo.stripe = stripe;
|
||||
lockinfo.bp = bp;
|
||||
lockinfo.plexno = plex->plexno;
|
||||
logrq(loginfo_lock, (union rqinfou) &lockinfo, bp);
|
||||
}
|
||||
#endif
|
||||
return pos;
|
||||
}
|
||||
|
||||
/* Unlock a volume and let the next one at it */
|
||||
void
|
||||
unlockrange(int plexno, struct rangelock *lock)
|
||||
{
|
||||
struct plex *plex;
|
||||
|
||||
plex = &PLEX[plexno];
|
||||
#ifdef DIAGNOSTIC
|
||||
if (lock < &plex->lock[0] || lock >= &plex->lock[PLEX_LOCKS])
|
||||
panic("vinum: rangelock %p on plex %d invalid, not between %p and %p",
|
||||
lock,
|
||||
plexno,
|
||||
&plex->lock[0],
|
||||
&plex->lock[PLEX_LOCKS]);
|
||||
#endif
|
||||
#ifdef VINUMDEBUG
|
||||
if (debug & DEBUG_LOCKREQS) {
|
||||
struct rangelockinfo lockinfo;
|
||||
|
||||
lockinfo.stripe = lock->stripe;
|
||||
lockinfo.bp = lock->bp;
|
||||
lockinfo.plexno = plex->plexno;
|
||||
logrq(loginfo_lockwait, (union rqinfou) &lockinfo, lock->bp);
|
||||
}
|
||||
#endif
|
||||
lock->stripe = 0; /* no longer used */
|
||||
plex->usedlocks--; /* one less lock */
|
||||
if (plex->usedlocks == PLEX_LOCKS - 1) /* we were full, */
|
||||
wakeup(&plex->usedlocks); /* get a waiter if one's there */
|
||||
wakeup((void *) lock);
|
||||
}
|
||||
|
||||
/* Get a lock for the global config. Wait if it's not available. */
|
||||
int
|
||||
lock_config(void)
|
||||
{
|
||||
int error;
|
||||
|
||||
while ((vinum_conf.flags & VF_LOCKED) != 0) {
|
||||
vinum_conf.flags |= VF_LOCKING;
|
||||
if ((error = tsleep(&vinum_conf, PRIBIO, "vincfg", 0)) != 0)
|
||||
return error;
|
||||
}
|
||||
vinum_conf.flags |= VF_LOCKED;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Unlock global config and wake up any waiters. */
|
||||
void
|
||||
unlock_config(void)
|
||||
{
|
||||
vinum_conf.flags &= ~VF_LOCKED;
|
||||
if ((vinum_conf.flags & VF_LOCKING) != 0) {
|
||||
vinum_conf.flags &= ~VF_LOCKING;
|
||||
wakeup(&vinum_conf);
|
||||
}
|
||||
}
|
||||
/* Local Variables: */
|
||||
/* fill-column: 50 */
|
||||
/* End: */
|
@ -1,290 +0,0 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: vinummemory.c,v 1.31 2003/05/23 01:08:36 grog Exp $
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <dev/vinum/vinumhdr.h>
|
||||
|
||||
#ifdef VINUMDEBUG
|
||||
#include <dev/vinum/request.h>
|
||||
extern struct rqinfo rqinfo[];
|
||||
extern struct rqinfo *rqip;
|
||||
int rqinfo_size = RQINFO_SIZE; /* for debugger */
|
||||
|
||||
#undef longjmp /* this was defined as LongJmp */
|
||||
#define strrchr rindex
|
||||
#ifdef __i386__ /* check for validity */
|
||||
void
|
||||
LongJmp(jmp_buf buf, int retval)
|
||||
{
|
||||
/*
|
||||
* longjmp is not documented, not even jmp_buf.
|
||||
* This is what's in i386/i386/support.s:
|
||||
* ENTRY(longjmp)
|
||||
* movl 4(%esp),%eax
|
||||
* movl (%eax),%ebx restore ebx
|
||||
* movl 4(%eax),%esp restore esp
|
||||
* movl 8(%eax),%ebp restore ebp
|
||||
* movl 12(%eax),%esi restore esi
|
||||
* movl 16(%eax),%edi restore edi
|
||||
* movl 20(%eax),%edx get rta
|
||||
* movl %edx,(%esp) put in return frame
|
||||
* xorl %eax,%eax return(1);
|
||||
* incl %eax
|
||||
* ret
|
||||
*
|
||||
* from which we deduce the structure of jmp_buf:
|
||||
*/
|
||||
struct JmpBuf {
|
||||
int jb_ebx;
|
||||
int jb_esp;
|
||||
int jb_ebp;
|
||||
int jb_esi;
|
||||
int jb_edi;
|
||||
int jb_eip;
|
||||
};
|
||||
|
||||
struct JmpBuf *jb = (struct JmpBuf *) buf;
|
||||
|
||||
if ((jb->jb_esp < 0xc0000000)
|
||||
|| (jb->jb_ebp < 0xc0000000)
|
||||
|| (jb->jb_eip < 0xc0000000))
|
||||
panic("Invalid longjmp");
|
||||
longjmp(buf, retval);
|
||||
}
|
||||
|
||||
#else /* not i386 */
|
||||
#define LongJmp longjmp /* just use the kernel function */
|
||||
#endif /* i386 */
|
||||
|
||||
/* find the base name of a path name */
|
||||
char *
|
||||
basename(char *file)
|
||||
{
|
||||
char *f = strrchr(file, '/'); /* chop off dirname if present */
|
||||
|
||||
if (f == NULL)
|
||||
return file;
|
||||
else
|
||||
return ++f; /* skip the / */
|
||||
}
|
||||
#endif /* VINUMDEBUG */
|
||||
|
||||
#ifdef VINUMDEBUG
|
||||
void
|
||||
expand_table(void **table, int oldsize, int newsize, char *file, int line)
|
||||
#else
|
||||
void
|
||||
expand_table(void **table, int oldsize, int newsize)
|
||||
#endif
|
||||
{
|
||||
if (newsize > oldsize) {
|
||||
int *temp;
|
||||
int s;
|
||||
|
||||
s = splhigh();
|
||||
#ifdef VINUMDEBUG
|
||||
temp = (int *) MMalloc(newsize, file, line); /* allocate a new table */
|
||||
#else
|
||||
temp = (int *) Malloc(newsize); /* allocate a new table */
|
||||
#endif
|
||||
CHECKALLOC(temp, "vinum: Can't expand table\n");
|
||||
bzero((char *) temp, newsize); /* clean it all out */
|
||||
if (*table != NULL) { /* already something there, */
|
||||
bcopy((char *) *table, (char *) temp, oldsize); /* copy it to the old table */
|
||||
#ifdef VINUMDEBUG
|
||||
FFree(*table, file, line);
|
||||
#else
|
||||
Free(*table);
|
||||
#endif
|
||||
}
|
||||
*table = temp;
|
||||
splx(s);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef VINUMDEBUG
|
||||
#define MALLOCENTRIES 16384
|
||||
int malloccount = 0;
|
||||
int highwater = 0; /* highest index ever allocated */
|
||||
struct mc malloced[MALLOCENTRIES];
|
||||
|
||||
#define FREECOUNT 64
|
||||
int freecount = FREECOUNT; /* for debugger */
|
||||
int lastfree = 0;
|
||||
struct mc freeinfo[FREECOUNT];
|
||||
|
||||
int total_malloced;
|
||||
static int mallocseq = 0;
|
||||
|
||||
caddr_t
|
||||
MMalloc(int size, char *file, int line)
|
||||
{
|
||||
int s;
|
||||
caddr_t result;
|
||||
int i;
|
||||
|
||||
if (malloccount >= MALLOCENTRIES) { /* too many */
|
||||
log(LOG_ERR, "vinum: can't allocate table space to trace memory allocation");
|
||||
return 0; /* can't continue */
|
||||
}
|
||||
/* Wait for malloc if we can */
|
||||
result = malloc(size,
|
||||
M_DEVBUF,
|
||||
curthread->td_intr_nesting_level == 0 ? M_WAITOK : M_NOWAIT);
|
||||
if (result == NULL)
|
||||
log(LOG_ERR, "vinum: can't allocate %d bytes from %s:%d\n", size, file, line);
|
||||
else {
|
||||
s = splhigh();
|
||||
for (i = 0; i < malloccount; i++) {
|
||||
if (((result + size) > malloced[i].address)
|
||||
&& (result < malloced[i].address + malloced[i].size)) /* overlap */
|
||||
kdb_enter("Malloc overlap");
|
||||
}
|
||||
if (result) {
|
||||
char *f = basename(file);
|
||||
|
||||
i = malloccount++;
|
||||
total_malloced += size;
|
||||
microtime(&malloced[i].time);
|
||||
malloced[i].seq = mallocseq++;
|
||||
malloced[i].size = size;
|
||||
malloced[i].line = line;
|
||||
malloced[i].address = result;
|
||||
strlcpy(malloced[i].file, f, MCFILENAMELEN);
|
||||
}
|
||||
if (malloccount > highwater)
|
||||
highwater = malloccount;
|
||||
splx(s);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void
|
||||
FFree(void *mem, char *file, int line)
|
||||
{
|
||||
int s;
|
||||
int i;
|
||||
|
||||
s = splhigh();
|
||||
for (i = 0; i < malloccount; i++) {
|
||||
if ((caddr_t) mem == malloced[i].address) { /* found it */
|
||||
bzero(mem, malloced[i].size); /* XXX */
|
||||
free(mem, M_DEVBUF);
|
||||
malloccount--;
|
||||
total_malloced -= malloced[i].size;
|
||||
if (debug & DEBUG_MEMFREE) { /* keep track of recent frees */
|
||||
char *f = strrchr(file, '/'); /* chop off dirname if present */
|
||||
|
||||
if (f == NULL)
|
||||
f = file;
|
||||
else
|
||||
f++; /* skip the / */
|
||||
|
||||
microtime(&freeinfo[lastfree].time);
|
||||
freeinfo[lastfree].seq = malloced[i].seq;
|
||||
freeinfo[lastfree].size = malloced[i].size;
|
||||
freeinfo[lastfree].line = line;
|
||||
freeinfo[lastfree].address = mem;
|
||||
bcopy(f, freeinfo[lastfree].file, MCFILENAMELEN);
|
||||
if (++lastfree == FREECOUNT)
|
||||
lastfree = 0;
|
||||
}
|
||||
if (i < malloccount) /* more coming after */
|
||||
bcopy(&malloced[i + 1], &malloced[i], (malloccount - i) * sizeof(struct mc));
|
||||
splx(s);
|
||||
return;
|
||||
}
|
||||
}
|
||||
splx(s);
|
||||
log(LOG_ERR,
|
||||
"Freeing unallocated data at 0x%p from %s, line %d\n",
|
||||
mem,
|
||||
file,
|
||||
line);
|
||||
kdb_enter("Free");
|
||||
}
|
||||
|
||||
void
|
||||
vinum_meminfo(caddr_t data)
|
||||
{
|
||||
struct meminfo *m = (struct meminfo *) data;
|
||||
|
||||
m->mallocs = malloccount;
|
||||
m->total_malloced = total_malloced;
|
||||
m->malloced = malloced;
|
||||
m->highwater = highwater;
|
||||
}
|
||||
|
||||
int
|
||||
vinum_mallocinfo(caddr_t data)
|
||||
{
|
||||
struct mc *m = (struct mc *) data;
|
||||
unsigned int ent = m->seq; /* index of entry to return */
|
||||
|
||||
if (ent >= malloccount)
|
||||
return ENOENT;
|
||||
m->address = malloced[ent].address;
|
||||
m->size = malloced[ent].size;
|
||||
m->line = malloced[ent].line;
|
||||
m->seq = malloced[ent].seq;
|
||||
strlcpy(m->file, malloced[ent].file, MCFILENAMELEN);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* return the nth request trace buffer entry. This
|
||||
* is indexed back from the current entry (which
|
||||
* has index 0)
|
||||
*/
|
||||
int
|
||||
vinum_rqinfo(caddr_t data)
|
||||
{
|
||||
struct rqinfo *rq = (struct rqinfo *) data;
|
||||
int ent = *(int *) data; /* 1st word is index */
|
||||
int lastent = rqip - rqinfo; /* entry number of current entry */
|
||||
|
||||
if (ent >= RQINFO_SIZE) /* out of the table */
|
||||
return ENOENT;
|
||||
if ((ent = lastent - ent - 1) < 0)
|
||||
ent += RQINFO_SIZE; /* roll over backwards */
|
||||
bcopy(&rqinfo[ent], rq, sizeof(struct rqinfo));
|
||||
return 0;
|
||||
}
|
||||
#endif
|
@ -1,321 +0,0 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998, 1999
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* Parts copyright (c) 1997, 1998 Cybernet Corporation, NetMAX project.
|
||||
*
|
||||
* Written by Greg Lehey
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: vinumobj.h,v 1.7 2003/05/23 01:08:58 grog Exp $
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
/*
|
||||
* Definitions of Vinum objects: drive, subdisk, plex and volume.
|
||||
* This file is included both by userland programs and by kernel code.
|
||||
* The userland structures are a subset of the kernel structures, and
|
||||
* all userland fields are at the beginning, so that a simple copy in
|
||||
* the length of the userland structure will be sufficient. In order
|
||||
* to perform this copy, vinumioctl must know both structures, so it
|
||||
* includes this file again with _KERNEL reset.
|
||||
*/
|
||||
|
||||
#ifndef _KERNEL
|
||||
/*
|
||||
* Flags for all objects. Most of them only apply
|
||||
* to specific objects, but we currently have
|
||||
* space for all in any 32 bit flags word.
|
||||
*/
|
||||
enum objflags {
|
||||
VF_LOCKED = 1, /* somebody has locked access to this object */
|
||||
VF_LOCKING = 2, /* we want access to this object */
|
||||
VF_OPEN = 4, /* object has openers */
|
||||
VF_WRITETHROUGH = 8, /* volume: write through */
|
||||
VF_INITED = 0x10, /* unit has been initialized */
|
||||
VF_WLABEL = 0x20, /* label area is writable */
|
||||
VF_LABELLING = 0x40, /* unit is currently being labelled */
|
||||
VF_WANTED = 0x80, /* someone is waiting to obtain a lock */
|
||||
VF_RAW = 0x100, /* raw volume (no file system) */
|
||||
VF_LOADED = 0x200, /* module is loaded */
|
||||
VF_CONFIGURING = 0x400, /* somebody is changing the config */
|
||||
VF_WILL_CONFIGURE = 0x800, /* somebody wants to change the config */
|
||||
VF_CONFIG_INCOMPLETE = 0x1000, /* haven't finished changing the config */
|
||||
VF_CONFIG_SETUPSTATE = 0x2000, /* set a volume up if all plexes are empty */
|
||||
VF_READING_CONFIG = 0x4000, /* we're reading config database from disk */
|
||||
VF_FORCECONFIG = 0x8000, /* configure drives even with different names */
|
||||
VF_NEWBORN = 0x10000, /* for objects: we've just created it */
|
||||
VF_CONFIGURED = 0x20000, /* for drives: we read the config */
|
||||
VF_STOPPING = 0x40000, /* for vinum_conf: stop on last close */
|
||||
VF_DAEMONOPEN = 0x80000, /* the daemon has us open (only superdev) */
|
||||
VF_CREATED = 0x100000, /* for volumes: freshly created, more then new */
|
||||
VF_HOTSPARE = 0x200000, /* for drives: use as hot spare */
|
||||
VF_RETRYERRORS = 0x400000, /* don't down subdisks on I/O errors */
|
||||
VF_HASDEBUG = 0x800000, /* set if we support debug */
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
/* Global configuration information for the vinum subsystem */
|
||||
#ifdef _KERNEL
|
||||
struct _vinum_conf
|
||||
#else
|
||||
struct __vinum_conf
|
||||
#endif
|
||||
{
|
||||
int version; /* version of structures */
|
||||
#ifdef _KERNEL
|
||||
/* Pointers to vinum structures */
|
||||
struct drive *drive;
|
||||
struct sd *sd;
|
||||
struct plex *plex;
|
||||
struct volume *volume;
|
||||
#else
|
||||
/* Pointers to vinum structures */
|
||||
struct _drive *drive;
|
||||
struct _sd *sd;
|
||||
struct _plex *plex;
|
||||
struct _volume *volume;
|
||||
#endif
|
||||
|
||||
/* the number allocated of each object */
|
||||
int drives_allocated;
|
||||
int subdisks_allocated;
|
||||
int plexes_allocated;
|
||||
int volumes_allocated;
|
||||
|
||||
/* and the number currently in use */
|
||||
/*
|
||||
* Note that drives_used is not valid during drive recognition
|
||||
* (vinum_scandisk and friends). Many invalid drives are added and
|
||||
* later removed; the count isn't correct until we leave
|
||||
* vinum_scandisk.
|
||||
*/
|
||||
int drives_used;
|
||||
int subdisks_used;
|
||||
int plexes_used;
|
||||
int volumes_used;
|
||||
|
||||
int flags; /* see above */
|
||||
|
||||
#define VINUM_MAXACTIVE 30000 /* maximum number of active requests */
|
||||
int active; /* current number of requests outstanding */
|
||||
int maxactive; /* maximum number of requests ever outstanding */
|
||||
#ifdef _KERNEL
|
||||
#ifdef VINUMDEBUG
|
||||
struct request *lastrq;
|
||||
struct buf *lastbuf;
|
||||
#endif
|
||||
#endif
|
||||
};
|
||||
|
||||
/* Use these defines to simplify code */
|
||||
#define DRIVE vinum_conf.drive
|
||||
#define SD vinum_conf.sd
|
||||
#define PLEX vinum_conf.plex
|
||||
#define VOL vinum_conf.volume
|
||||
#define VFLAGS vinum_conf.flags
|
||||
|
||||
/*
|
||||
* A drive corresponds to a disk slice. We use a different term to show
|
||||
* the difference in usage: it doesn't have to be a slice, and could
|
||||
* theoretically be a complete, unpartitioned disk
|
||||
*/
|
||||
|
||||
#ifdef _KERNEL
|
||||
struct drive
|
||||
#else
|
||||
struct _drive
|
||||
#endif
|
||||
{
|
||||
char devicename[MAXDRIVENAME]; /* name of the slice it's on */
|
||||
struct vinum_label label; /* and the label information */
|
||||
enum drivestate state; /* current state */
|
||||
int flags; /* flags */
|
||||
int subdisks_allocated; /* number of entries in sd */
|
||||
int subdisks_used; /* and the number used */
|
||||
int blocksize; /* size of fs blocks */
|
||||
int pid; /* of locker */
|
||||
u_int64_t sectors_available; /* number of sectors still available */
|
||||
int secsperblock;
|
||||
int lasterror; /* last error on drive */
|
||||
int driveno; /* index of drive in vinum_conf */
|
||||
int opencount; /* number of up subdisks */
|
||||
u_int64_t reads; /* number of reads on this drive */
|
||||
u_int64_t writes; /* number of writes on this drive */
|
||||
u_int64_t bytes_read; /* number of bytes read */
|
||||
u_int64_t bytes_written; /* number of bytes written */
|
||||
#define DRIVE_MAXACTIVE 30000 /* maximum number of active requests */
|
||||
int active; /* current number of requests outstanding */
|
||||
int maxactive; /* maximum number of requests ever outstanding */
|
||||
int freelist_size; /* number of entries alloced in free list */
|
||||
int freelist_entries; /* number of entries used in free list */
|
||||
struct drive_freelist *freelist; /* sorted list of free space on drive */
|
||||
#ifdef _KERNEL
|
||||
u_int sectorsize;
|
||||
off_t mediasize;
|
||||
struct cdev *dev; /* device information */
|
||||
#ifdef VINUMDEBUG
|
||||
char lockfilename[16]; /* name of file from which we were locked */
|
||||
int lockline; /* and the line number */
|
||||
#endif
|
||||
#endif
|
||||
};
|
||||
|
||||
#ifdef _KERNEL
|
||||
struct sd
|
||||
#else
|
||||
struct _sd
|
||||
#endif
|
||||
{
|
||||
char name[MAXSDNAME]; /* name of subdisk */
|
||||
enum sdstate state; /* state */
|
||||
int flags;
|
||||
int lasterror; /* last error occurred */
|
||||
/* offsets in blocks */
|
||||
int64_t driveoffset; /* offset on drive */
|
||||
/*
|
||||
* plexoffset is the offset from the beginning
|
||||
* of the plex to the very first part of the
|
||||
* subdisk, in sectors. For striped, RAID-4 and
|
||||
* RAID-5 plexes, only the first stripe is
|
||||
* located at this offset
|
||||
*/
|
||||
int64_t plexoffset; /* offset in plex */
|
||||
u_int64_t sectors; /* and length in sectors */
|
||||
int sectorsize; /* sector size for DIOCGSECTORSIZE */
|
||||
int plexno; /* index of plex, if it belongs */
|
||||
int driveno; /* index of the drive on which it is located */
|
||||
int sdno; /* our index in vinum_conf */
|
||||
int plexsdno; /* and our number in our plex */
|
||||
/* (undefined if no plex) */
|
||||
u_int64_t reads; /* number of reads on this subdisk */
|
||||
u_int64_t writes; /* number of writes on this subdisk */
|
||||
u_int64_t bytes_read; /* number of bytes read */
|
||||
u_int64_t bytes_written; /* number of bytes written */
|
||||
/* revive parameters */
|
||||
u_int64_t revived; /* block number of current revive request */
|
||||
int revive_blocksize; /* revive block size (bytes) */
|
||||
int revive_interval; /* and time to wait between transfers */
|
||||
pid_t reviver; /* PID of reviving process */
|
||||
/* init parameters */
|
||||
u_int64_t initialized; /* block number of current init request */
|
||||
int init_blocksize; /* init block size (bytes) */
|
||||
int init_interval; /* and time to wait between transfers */
|
||||
#ifdef _KERNEL
|
||||
struct request *waitlist; /* list of requests waiting on revive op */
|
||||
struct cdev *dev; /* associated device */
|
||||
#endif
|
||||
};
|
||||
|
||||
#ifdef _KERNEL
|
||||
struct plex
|
||||
#else
|
||||
struct _plex
|
||||
#endif
|
||||
{
|
||||
enum plexorg organization; /* Plex organization */
|
||||
enum plexstate state; /* and current state */
|
||||
u_int64_t length; /* total length of plex (sectors) */
|
||||
int flags;
|
||||
int stripesize; /* size of stripe or raid band, in sectors */
|
||||
int sectorsize; /* sector size for DIOCGSECTORSIZE */
|
||||
int subdisks; /* number of associated subdisks */
|
||||
int subdisks_allocated; /* number of subdisks allocated space for */
|
||||
int *sdnos; /* list of component subdisks */
|
||||
int plexno; /* index of plex in vinum_conf */
|
||||
int volno; /* index of volume */
|
||||
int volplexno; /* number of plex in volume */
|
||||
/* Statistics */
|
||||
u_int64_t reads; /* number of reads on this plex */
|
||||
u_int64_t writes; /* number of writes on this plex */
|
||||
u_int64_t bytes_read; /* number of bytes read */
|
||||
u_int64_t bytes_written; /* number of bytes written */
|
||||
u_int64_t recovered_reads; /* number of recovered read operations */
|
||||
u_int64_t degraded_writes; /* number of degraded writes */
|
||||
u_int64_t parityless_writes; /* number of parityless writes */
|
||||
u_int64_t multiblock; /* requests that needed more than one block */
|
||||
u_int64_t multistripe; /* requests that needed more than one stripe */
|
||||
int sddowncount; /* number of subdisks down */
|
||||
/* Lock information */
|
||||
int usedlocks; /* number currently in use */
|
||||
int lockwaits; /* and number of waits for locks */
|
||||
off_t checkblock; /* block number for parity op */
|
||||
char name[MAXPLEXNAME]; /* name of plex */
|
||||
#ifdef _KERNEL
|
||||
struct rangelock *lock; /* ranges of locked addresses */
|
||||
struct mtx *lockmtx; /* lock mutex, one of plexmutex [] */
|
||||
daddr_t last_addr; /* last address read from this plex */
|
||||
struct cdev *dev; /* associated device */
|
||||
#endif
|
||||
};
|
||||
|
||||
#ifdef _KERNEL
|
||||
struct volume
|
||||
#else
|
||||
struct _volume
|
||||
#endif
|
||||
{
|
||||
char name[MAXVOLNAME]; /* name of volume */
|
||||
enum volumestate state; /* current state */
|
||||
int plexes; /* number of plexes */
|
||||
int preferred_plex; /* index of plex to read from,
|
||||
* -1 for round-robin */
|
||||
/*
|
||||
* index of plex used for last read, for
|
||||
* round-robin.
|
||||
*/
|
||||
int last_plex_read;
|
||||
int volno; /* volume number */
|
||||
int flags; /* status and configuration flags */
|
||||
int openflags; /* flags supplied to last open(2) */
|
||||
u_int64_t size; /* size of volume */
|
||||
int blocksize; /* logical block size */
|
||||
int sectorsize; /* sector size for DIOCGSECTORSIZE */
|
||||
int active; /* number of outstanding requests active */
|
||||
int subops; /* and the number of suboperations */
|
||||
/* Statistics */
|
||||
u_int64_t bytes_read; /* number of bytes read */
|
||||
u_int64_t bytes_written; /* number of bytes written */
|
||||
u_int64_t reads; /* number of reads on this volume */
|
||||
u_int64_t writes; /* number of writes on this volume */
|
||||
u_int64_t recovered_reads; /* reads recovered from another plex */
|
||||
/*
|
||||
* Unlike subdisks in the plex, space for the
|
||||
* plex pointers is static.
|
||||
*/
|
||||
int plex[MAXPLEX]; /* index of plexes */
|
||||
#ifdef _KERNEL
|
||||
struct cdev *dev; /* associated device */
|
||||
#endif
|
||||
};
|
@ -1,236 +0,0 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: vinumparser.c,v 1.25 2003/05/07 03:33:28 grog Exp grog $
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
/*
|
||||
* This file contains the parser for the configuration routines. It's used
|
||||
* both in the kernel and in the user interface program, thus the separate file.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Go through a text and split up into text tokens. These are either non-blank
|
||||
* sequences, or any sequence (except \0) enclosed in ' or ". Embedded ' or
|
||||
* " characters may be escaped by \, which otherwise has no special meaning.
|
||||
*
|
||||
* Delimit by following with a \0, and return pointers to the starts at token [].
|
||||
* Return the number of tokens found as the return value.
|
||||
*
|
||||
* This method has the restriction that a closing " or ' must be followed by
|
||||
* grey space.
|
||||
*
|
||||
* Error conditions are end of line before end of quote, or no space after
|
||||
* a closing quote. In this case, tokenize() returns -1.
|
||||
*/
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <dev/vinum/vinumkw.h>
|
||||
#ifdef _KERNEL
|
||||
#include <sys/systm.h>
|
||||
#include <sys/conf.h>
|
||||
#include <machine/setjmp.h>
|
||||
/* All this mess for a single struct definition */
|
||||
#include <sys/uio.h>
|
||||
#include <sys/namei.h>
|
||||
#include <sys/mount.h>
|
||||
|
||||
#include <dev/vinum/vinumvar.h>
|
||||
#include <dev/vinum/vinumio.h>
|
||||
#include <dev/vinum/vinumext.h>
|
||||
#define iswhite(c) ((c == ' ') || (c == '\t')) /* check for white space */
|
||||
#else /* userland */
|
||||
#include <ctype.h>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <string.h>
|
||||
#define iswhite isspace /* use the ctype macro */
|
||||
#endif
|
||||
|
||||
/* enum keyword is defined in vinumvar.h */
|
||||
|
||||
#define keypair(x) { #x, kw_##x } /* create pair "foo", kw_foo */
|
||||
#define flagkeypair(x) { "-"#x, kw_##x } /* create pair "-foo", kw_foo */
|
||||
#define KEYWORDSET(x) {sizeof (x) / sizeof (struct _keywords), x}
|
||||
|
||||
/* Normal keywords. These are all the words that vinum knows. */
|
||||
struct _keywords keywords[] =
|
||||
{keypair(drive),
|
||||
keypair(partition),
|
||||
keypair(sd),
|
||||
keypair(subdisk),
|
||||
keypair(plex),
|
||||
keypair(volume),
|
||||
keypair(vol),
|
||||
keypair(setupstate),
|
||||
keypair(readpol),
|
||||
keypair(org),
|
||||
keypair(name),
|
||||
keypair(writethrough),
|
||||
keypair(writeback),
|
||||
keypair(device),
|
||||
keypair(concat),
|
||||
keypair(raid4),
|
||||
keypair(raid5),
|
||||
keypair(striped),
|
||||
keypair(plexoffset),
|
||||
keypair(driveoffset),
|
||||
keypair(length),
|
||||
keypair(len),
|
||||
keypair(size),
|
||||
keypair(state),
|
||||
keypair(round),
|
||||
keypair(prefer),
|
||||
keypair(preferred),
|
||||
keypair(rename),
|
||||
keypair(detached),
|
||||
#ifndef _KERNEL /* for vinum(8) only */
|
||||
keypair(debug),
|
||||
keypair(stripe),
|
||||
keypair(mirror),
|
||||
#endif
|
||||
keypair(attach),
|
||||
keypair(detach),
|
||||
keypair(printconfig),
|
||||
keypair(saveconfig),
|
||||
keypair(replace),
|
||||
keypair(create),
|
||||
keypair(read),
|
||||
keypair(modify),
|
||||
keypair(list),
|
||||
keypair(l),
|
||||
keypair(ld),
|
||||
keypair(ls),
|
||||
keypair(lp),
|
||||
keypair(lv),
|
||||
keypair(info),
|
||||
keypair(set),
|
||||
keypair(rm),
|
||||
keypair(mv),
|
||||
keypair(move),
|
||||
keypair(init),
|
||||
keypair(resetconfig),
|
||||
keypair(start),
|
||||
keypair(stop),
|
||||
keypair(makedev),
|
||||
keypair(help),
|
||||
keypair(quit),
|
||||
keypair(setdaemon),
|
||||
keypair(getdaemon),
|
||||
keypair(max),
|
||||
keypair(replace),
|
||||
keypair(readpol),
|
||||
keypair(resetstats),
|
||||
keypair(setstate),
|
||||
keypair(checkparity),
|
||||
keypair(rebuildparity),
|
||||
keypair(dumpconfig),
|
||||
keypair(retryerrors)
|
||||
};
|
||||
struct keywordset keyword_set = KEYWORDSET(keywords);
|
||||
|
||||
#ifndef _KERNEL
|
||||
struct _keywords flag_keywords[] =
|
||||
{flagkeypair(f),
|
||||
flagkeypair(d),
|
||||
flagkeypair(v),
|
||||
flagkeypair(s),
|
||||
flagkeypair(r),
|
||||
flagkeypair(w)
|
||||
};
|
||||
struct keywordset flag_set = KEYWORDSET(flag_keywords);
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Take a blank separated list of tokens and turn it into a list of
|
||||
* individual nul-delimited strings. Build a list of pointers at
|
||||
* token, which must have enough space for the tokens. Return the
|
||||
* number of tokens, or -1 on error (typically a missing string
|
||||
* delimiter).
|
||||
*/
|
||||
int
|
||||
tokenize(char *cptr, char *token[], int maxtoken)
|
||||
{
|
||||
char delim; /* delimiter for searching for the partner */
|
||||
int tokennr; /* index of this token */
|
||||
|
||||
for (tokennr = 0; tokennr < maxtoken;) {
|
||||
while (iswhite(*cptr))
|
||||
cptr++; /* skip initial white space */
|
||||
if ((*cptr == '\0') || (*cptr == '\n') || (*cptr == '#')) /* end of line */
|
||||
return tokennr; /* return number of tokens found */
|
||||
delim = *cptr;
|
||||
token[tokennr] = cptr; /* point to it */
|
||||
tokennr++; /* one more */
|
||||
if (tokennr == maxtoken) /* run off the end? */
|
||||
return tokennr;
|
||||
if ((delim == '\'') || (delim == '"')) { /* delimitered */
|
||||
for (;;) {
|
||||
cptr++;
|
||||
if ((*cptr == delim) && (cptr[-1] != '\\')) { /* found the partner */
|
||||
cptr++; /* move on past */
|
||||
if (!iswhite(*cptr)) /* error, no space after closing quote */
|
||||
return -1;
|
||||
*cptr++ = '\0'; /* delimit */
|
||||
} else if ((*cptr == '\0') || (*cptr == '\n')) /* end of line */
|
||||
return -1;
|
||||
}
|
||||
} else { /* not quoted */
|
||||
while ((*cptr != '\0') && (!iswhite(*cptr)) && (*cptr != '\n'))
|
||||
cptr++;
|
||||
if (*cptr != '\0') /* not end of the line, */
|
||||
*cptr++ = '\0'; /* delimit and move to the next */
|
||||
}
|
||||
}
|
||||
return maxtoken; /* can't get here */
|
||||
}
|
||||
|
||||
/* Find a keyword and return an index */
|
||||
enum keyword
|
||||
get_keyword(char *name, struct keywordset *keywordset)
|
||||
{
|
||||
int i;
|
||||
struct _keywords *keywords = keywordset->k; /* point to the keywords */
|
||||
if (name != NULL) { /* parameter exists */
|
||||
for (i = 0; i < keywordset->size; i++)
|
||||
if (!strcmp(name, keywords[i].name))
|
||||
return (enum keyword) keywords[i].keyword;
|
||||
}
|
||||
return kw_invalid_keyword;
|
||||
}
|
@ -1,700 +0,0 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Cybernet Corporation and Nan Yang Computer Services Limited.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This software was developed as part of the NetMAX project.
|
||||
*
|
||||
* Written by Greg Lehey
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Cybernet Corporation
|
||||
* and Nan Yang Computer Services Limited
|
||||
* 4. Neither the name of the Companies nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: vinumraid5.c,v 1.23 2003/02/08 03:32:45 grog Exp $
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
#include <dev/vinum/vinumhdr.h>
|
||||
#include <dev/vinum/request.h>
|
||||
#include <sys/resourcevar.h>
|
||||
|
||||
/*
|
||||
* Parameters which describe the current transfer.
|
||||
* These are only used for calculation, but they
|
||||
* need to be passed to other functions, so it's
|
||||
* tidier to put them in a struct
|
||||
*/
|
||||
struct metrics {
|
||||
daddr_t stripebase; /* base address of stripe (1st subdisk) */
|
||||
int stripeoffset; /* offset in stripe */
|
||||
int stripesectors; /* total sectors to transfer in this stripe */
|
||||
daddr_t sdbase; /* offset in subdisk of stripe base */
|
||||
int sdcount; /* number of disks involved in this transfer */
|
||||
daddr_t diskstart; /* remember where this transfer starts */
|
||||
int psdno; /* number of parity subdisk */
|
||||
int badsdno; /* number of down subdisk, if there is one */
|
||||
int firstsdno; /* first data subdisk number */
|
||||
/* These correspond to the fields in rqelement, sort of */
|
||||
int useroffset;
|
||||
/*
|
||||
* Initial offset and length values for the first
|
||||
* data block
|
||||
*/
|
||||
int initoffset; /* start address of block to transfer */
|
||||
short initlen; /* length in sectors of data transfer */
|
||||
/* Define a normal operation */
|
||||
int dataoffset; /* start address of block to transfer */
|
||||
int datalen; /* length in sectors of data transfer */
|
||||
/* Define a group operation */
|
||||
int groupoffset; /* subdisk offset of group operation */
|
||||
int grouplen; /* length in sectors of group operation */
|
||||
/* Define a normal write operation */
|
||||
int writeoffset; /* subdisk offset of normal write */
|
||||
int writelen; /* length in sectors of write operation */
|
||||
enum xferinfo flags; /* to check what we're doing */
|
||||
int rqcount; /* number of elements in request */
|
||||
};
|
||||
|
||||
enum requeststatus bre5(struct request *rq,
|
||||
int plexno,
|
||||
daddr_t * diskstart,
|
||||
daddr_t diskend);
|
||||
void complete_raid5_write(struct rqelement *);
|
||||
enum requeststatus build_rq_buffer(struct rqelement *rqe, struct plex *plex);
|
||||
void setrqebounds(struct rqelement *rqe, struct metrics *mp);
|
||||
|
||||
/*
|
||||
* define the low-level requests needed to perform
|
||||
* a high-level I/O operation for a specific plex
|
||||
* 'plexno'.
|
||||
*
|
||||
* Return 0 if all subdisks involved in the
|
||||
* request are up, 1 if some subdisks are not up,
|
||||
* and -1 if the request is at least partially
|
||||
* outside the bounds of the subdisks.
|
||||
*
|
||||
* Modify the pointer *diskstart to point to the
|
||||
* end address. On read, return on the first bad
|
||||
* subdisk, so that the caller
|
||||
* (build_read_request) can try alternatives.
|
||||
*
|
||||
* On entry to this routine, the prq structures
|
||||
* are not assigned. The assignment is performed
|
||||
* by expandrq(). Strictly speaking, the elements
|
||||
* rqe->sdno of all entries should be set to -1,
|
||||
* since 0 (from bzero) is a valid subdisk number.
|
||||
* We avoid this problem by initializing the ones
|
||||
* we use, and not looking at the others (index >=
|
||||
* prq->requests).
|
||||
*/
|
||||
enum requeststatus
|
||||
bre5(struct request *rq,
|
||||
int plexno,
|
||||
daddr_t * diskaddr,
|
||||
daddr_t diskend)
|
||||
{
|
||||
struct metrics m; /* most of the information */
|
||||
struct sd *sd;
|
||||
struct plex *plex;
|
||||
struct buf *bp; /* user's bp */
|
||||
struct rqgroup *rqg; /* the request group that we will create */
|
||||
struct rqelement *rqe; /* point to this request information */
|
||||
int rsectors; /* sectors remaining in this stripe */
|
||||
int mysdno; /* another sd index in loops */
|
||||
int rqno; /* request number */
|
||||
|
||||
rqg = NULL; /* shut up, damn compiler */
|
||||
m.diskstart = *diskaddr; /* start of transfer */
|
||||
bp = rq->bp; /* buffer pointer */
|
||||
plex = &PLEX[plexno]; /* point to the plex */
|
||||
|
||||
|
||||
while (*diskaddr < diskend) { /* until we get it all sorted out */
|
||||
if (*diskaddr >= plex->length) /* beyond the end of the plex */
|
||||
return REQUEST_EOF; /* can't continue */
|
||||
|
||||
m.badsdno = -1; /* no bad subdisk yet */
|
||||
|
||||
/* Part A: Define the request */
|
||||
/*
|
||||
* First, calculate some sizes:
|
||||
* The offset of the start address from
|
||||
* the start of the stripe.
|
||||
*/
|
||||
m.stripeoffset = *diskaddr % (plex->stripesize * (plex->subdisks - 1));
|
||||
|
||||
/*
|
||||
* The plex-relative address of the
|
||||
* start of the stripe.
|
||||
*/
|
||||
m.stripebase = *diskaddr - m.stripeoffset;
|
||||
|
||||
/* subdisk containing the parity stripe */
|
||||
if (plex->organization == plex_raid5)
|
||||
m.psdno = plex->subdisks - 1
|
||||
- (*diskaddr / (plex->stripesize * (plex->subdisks - 1)))
|
||||
% plex->subdisks;
|
||||
else /* RAID-4 */
|
||||
m.psdno = plex->subdisks - 1;
|
||||
|
||||
/*
|
||||
* The number of the subdisk in which
|
||||
* the start is located.
|
||||
*/
|
||||
m.firstsdno = m.stripeoffset / plex->stripesize;
|
||||
if (m.firstsdno >= m.psdno) /* at or past parity sd */
|
||||
m.firstsdno++; /* increment it */
|
||||
|
||||
/*
|
||||
* The offset from the beginning of
|
||||
* the stripe on this subdisk.
|
||||
*/
|
||||
m.initoffset = m.stripeoffset % plex->stripesize;
|
||||
|
||||
/* The offset of the stripe start relative to this subdisk */
|
||||
m.sdbase = m.stripebase / (plex->subdisks - 1);
|
||||
|
||||
m.useroffset = *diskaddr - m.diskstart; /* The offset of the start in the user buffer */
|
||||
|
||||
/*
|
||||
* The number of sectors to transfer in the
|
||||
* current (first) subdisk.
|
||||
*/
|
||||
m.initlen = min(diskend - *diskaddr, /* the amount remaining to transfer */
|
||||
plex->stripesize - m.initoffset); /* and the amount left in this block */
|
||||
|
||||
/*
|
||||
* The number of sectors to transfer in this stripe
|
||||
* is the minumum of the amount remaining to transfer
|
||||
* and the amount left in this stripe.
|
||||
*/
|
||||
m.stripesectors = min(diskend - *diskaddr,
|
||||
plex->stripesize * (plex->subdisks - 1) - m.stripeoffset);
|
||||
|
||||
/* The number of data subdisks involved in this request */
|
||||
m.sdcount = (m.stripesectors + m.initoffset + plex->stripesize - 1) / plex->stripesize;
|
||||
|
||||
/* Part B: decide what kind of transfer this will be.
|
||||
|
||||
* start and end addresses of the transfer in
|
||||
* the current block.
|
||||
*
|
||||
* There are a number of different kinds of
|
||||
* transfer, each of which relates to a
|
||||
* specific subdisk:
|
||||
*
|
||||
* 1. Normal read. All participating subdisks
|
||||
* are up, and the transfer can be made
|
||||
* directly to the user buffer. The bounds
|
||||
* of the transfer are described by
|
||||
* m.dataoffset and m.datalen. We have
|
||||
* already calculated m.initoffset and
|
||||
* m.initlen, which define the parameters
|
||||
* for the first data block.
|
||||
*
|
||||
* 2. Recovery read. One participating
|
||||
* subdisk is down. To recover data, all
|
||||
* the other subdisks, including the parity
|
||||
* subdisk, must be read. The data is
|
||||
* recovered by exclusive-oring all the
|
||||
* other blocks. The bounds of the
|
||||
* transfer are described by m.groupoffset
|
||||
* and m.grouplen.
|
||||
*
|
||||
* 3. A read request may request reading both
|
||||
* available data (normal read) and
|
||||
* non-available data (recovery read).
|
||||
* This can be a problem if the address
|
||||
* ranges of the two reads do not coincide:
|
||||
* in this case, the normal read needs to
|
||||
* be extended to cover the address range
|
||||
* of the recovery read, and must thus be
|
||||
* performed out of malloced memory.
|
||||
*
|
||||
* 4. Normal write. All the participating
|
||||
* subdisks are up. The bounds of the
|
||||
* transfer are described by m.dataoffset
|
||||
* and m.datalen. Since these values
|
||||
* differ for each block, we calculate the
|
||||
* bounds for the parity block
|
||||
* independently as the maximum of the
|
||||
* individual blocks and store these values
|
||||
* in m.writeoffset and m.writelen. This
|
||||
* write proceeds in four phases:
|
||||
*
|
||||
* i. Read the old contents of each block
|
||||
* and the parity block.
|
||||
* ii. ``Remove'' the old contents from
|
||||
* the parity block with exclusive or.
|
||||
* iii. ``Insert'' the new contents of the
|
||||
* block in the parity block, again
|
||||
* with exclusive or.
|
||||
*
|
||||
* iv. Write the new contents of the data
|
||||
* blocks and the parity block. The data
|
||||
* block transfers can be made directly from
|
||||
* the user buffer.
|
||||
*
|
||||
* 5. Degraded write where the data block is
|
||||
* not available. The bounds of the
|
||||
* transfer are described by m.groupoffset
|
||||
* and m.grouplen. This requires the
|
||||
* following steps:
|
||||
*
|
||||
* i. Read in all the other data blocks,
|
||||
* excluding the parity block.
|
||||
*
|
||||
* ii. Recreate the parity block from the
|
||||
* other data blocks and the data to be
|
||||
* written.
|
||||
*
|
||||
* iii. Write the parity block.
|
||||
*
|
||||
* 6. Parityless write, a write where the
|
||||
* parity block is not available. This is
|
||||
* in fact the simplest: just write the
|
||||
* data blocks. This can proceed directly
|
||||
* from the user buffer. The bounds of the
|
||||
* transfer are described by m.dataoffset
|
||||
* and m.datalen.
|
||||
*
|
||||
* 7. Combination of degraded data block write
|
||||
* and normal write. In this case the
|
||||
* address ranges of the reads may also
|
||||
* need to be extended to cover all
|
||||
* participating blocks.
|
||||
*
|
||||
* All requests in a group transfer transfer
|
||||
* the same address range relative to their
|
||||
* subdisk. The individual transfers may
|
||||
* vary, but since our group of requests is
|
||||
* all in a single slice, we can define a
|
||||
* range in which they all fall.
|
||||
*
|
||||
* In the following code section, we determine
|
||||
* which kind of transfer we will perform. If
|
||||
* there is a group transfer, we also decide
|
||||
* its bounds relative to the subdisks. At
|
||||
* the end, we have the following values:
|
||||
*
|
||||
* m.flags indicates the kinds of transfers
|
||||
* we will perform.
|
||||
* m.initoffset indicates the offset of the
|
||||
* beginning of any data operation relative
|
||||
* to the beginning of the stripe base.
|
||||
* m.initlen specifies the length of any data
|
||||
* operation.
|
||||
* m.dataoffset contains the same value as
|
||||
* m.initoffset.
|
||||
* m.datalen contains the same value as
|
||||
* m.initlen. Initially dataoffset and
|
||||
* datalen describe the parameters for the
|
||||
* first data block; while building the data
|
||||
* block requests, they are updated for each
|
||||
* block.
|
||||
* m.groupoffset indicates the offset of any
|
||||
* group operation relative to the beginning
|
||||
* of the stripe base.
|
||||
* m.grouplen specifies the length of any
|
||||
* group operation.
|
||||
* m.writeoffset indicates the offset of a
|
||||
* normal write relative to the beginning of
|
||||
* the stripe base. This value differs from
|
||||
* m.dataoffset in that it applies to the
|
||||
* entire operation, and not just the first
|
||||
* block.
|
||||
* m.writelen specifies the total span of a
|
||||
* normal write operation. writeoffset and
|
||||
* writelen are used to define the parity
|
||||
* block.
|
||||
*/
|
||||
m.groupoffset = 0; /* assume no group... */
|
||||
m.grouplen = 0; /* until we know we have one */
|
||||
m.writeoffset = m.initoffset; /* start offset of transfer */
|
||||
m.writelen = 0; /* nothing to write yet */
|
||||
m.flags = 0; /* no flags yet */
|
||||
rsectors = m.stripesectors; /* remaining sectors to examine */
|
||||
m.dataoffset = m.initoffset; /* start at the beginning of the transfer */
|
||||
m.datalen = m.initlen;
|
||||
|
||||
if (m.sdcount > 1) {
|
||||
plex->multiblock++; /* more than one block for the request */
|
||||
/*
|
||||
* If we have two transfers that don't overlap,
|
||||
* (one at the end of the first block, the other
|
||||
* at the beginning of the second block),
|
||||
* it's cheaper to split them.
|
||||
*/
|
||||
if (rsectors < plex->stripesize) {
|
||||
m.sdcount = 1; /* just one subdisk */
|
||||
m.stripesectors = m.initlen; /* and just this many sectors */
|
||||
rsectors = m.initlen; /* and in the loop counter */
|
||||
}
|
||||
}
|
||||
if (SD[plex->sdnos[m.psdno]].state < sd_reborn) /* is our parity subdisk down? */
|
||||
m.badsdno = m.psdno; /* note that it's down */
|
||||
if (bp->b_iocmd == BIO_READ) { /* read operation */
|
||||
for (mysdno = m.firstsdno; rsectors > 0; mysdno++) {
|
||||
if (mysdno == m.psdno) /* ignore parity on read */
|
||||
mysdno++;
|
||||
if (mysdno == plex->subdisks) /* wraparound */
|
||||
mysdno = 0;
|
||||
if (mysdno == m.psdno) /* parity, */
|
||||
mysdno++; /* we've given already */
|
||||
|
||||
if (SD[plex->sdnos[mysdno]].state < sd_reborn) { /* got a bad subdisk, */
|
||||
if (m.badsdno >= 0) /* we had one already, */
|
||||
return REQUEST_DOWN; /* we can't take a second */
|
||||
m.badsdno = mysdno; /* got the first */
|
||||
m.groupoffset = m.dataoffset; /* define the bounds */
|
||||
m.grouplen = m.datalen;
|
||||
m.flags |= XFR_RECOVERY_READ; /* we need recovery */
|
||||
plex->recovered_reads++; /* count another one */
|
||||
} else
|
||||
m.flags |= XFR_NORMAL_READ; /* normal read */
|
||||
|
||||
/* Update the pointers for the next block */
|
||||
m.dataoffset = 0; /* back to the start of the stripe */
|
||||
rsectors -= m.datalen; /* remaining sectors to examine */
|
||||
m.datalen = min(rsectors, plex->stripesize); /* amount that will fit in this block */
|
||||
}
|
||||
} else { /* write operation */
|
||||
for (mysdno = m.firstsdno; rsectors > 0; mysdno++) {
|
||||
if (mysdno == m.psdno) /* parity stripe, we've dealt with that */
|
||||
mysdno++;
|
||||
if (mysdno == plex->subdisks) /* wraparound */
|
||||
mysdno = 0;
|
||||
if (mysdno == m.psdno) /* parity, */
|
||||
mysdno++; /* we've given already */
|
||||
|
||||
sd = &SD[plex->sdnos[mysdno]];
|
||||
if (sd->state != sd_up) {
|
||||
enum requeststatus s;
|
||||
|
||||
s = checksdstate(sd, rq, *diskaddr, diskend); /* do we need to change state? */
|
||||
if (s && (m.badsdno >= 0)) { /* second bad disk, */
|
||||
int sdno;
|
||||
/*
|
||||
* If the parity disk is down, there's
|
||||
* no recovery. We make all involved
|
||||
* subdisks stale. Otherwise, we
|
||||
* should be able to recover, but it's
|
||||
* like pulling teeth. Fix it later.
|
||||
*/
|
||||
for (sdno = 0; sdno < m.sdcount; sdno++) {
|
||||
struct sd *sd = &SD[plex->sdnos[sdno]];
|
||||
if (sd->state >= sd_reborn) /* sort of up, */
|
||||
set_sd_state(sd->sdno, sd_stale, setstate_force); /* make it stale */
|
||||
}
|
||||
return s; /* and crap out */
|
||||
}
|
||||
m.badsdno = mysdno; /* note which one is bad */
|
||||
m.flags |= XFR_DEGRADED_WRITE; /* we need recovery */
|
||||
plex->degraded_writes++; /* count another one */
|
||||
m.groupoffset = m.dataoffset; /* define the bounds */
|
||||
m.grouplen = m.datalen;
|
||||
} else {
|
||||
m.flags |= XFR_NORMAL_WRITE; /* normal write operation */
|
||||
if (m.writeoffset > m.dataoffset) { /* move write operation lower */
|
||||
m.writelen = max(m.writeoffset + m.writelen,
|
||||
m.dataoffset + m.datalen)
|
||||
- m.dataoffset;
|
||||
m.writeoffset = m.dataoffset;
|
||||
} else
|
||||
m.writelen = max(m.writeoffset + m.writelen,
|
||||
m.dataoffset + m.datalen)
|
||||
- m.writeoffset;
|
||||
}
|
||||
|
||||
/* Update the pointers for the next block */
|
||||
m.dataoffset = 0; /* back to the start of the stripe */
|
||||
rsectors -= m.datalen; /* remaining sectors to examine */
|
||||
m.datalen = min(rsectors, plex->stripesize); /* amount that will fit in this block */
|
||||
}
|
||||
if (m.badsdno == m.psdno) { /* got a bad parity block, */
|
||||
struct sd *psd = &SD[plex->sdnos[m.psdno]];
|
||||
|
||||
if (psd->state == sd_down)
|
||||
set_sd_state(psd->sdno, sd_obsolete, setstate_force); /* it's obsolete now */
|
||||
else if (psd->state == sd_crashed)
|
||||
set_sd_state(psd->sdno, sd_stale, setstate_force); /* it's stale now */
|
||||
m.flags &= ~XFR_NORMAL_WRITE; /* this write isn't normal, */
|
||||
m.flags |= XFR_PARITYLESS_WRITE; /* it's parityless */
|
||||
plex->parityless_writes++; /* count another one */
|
||||
}
|
||||
}
|
||||
|
||||
/* reset the initial transfer values */
|
||||
m.dataoffset = m.initoffset; /* start at the beginning of the transfer */
|
||||
m.datalen = m.initlen;
|
||||
|
||||
/* decide how many requests we need */
|
||||
if (m.flags & (XFR_RECOVERY_READ | XFR_DEGRADED_WRITE))
|
||||
/* doing a recovery read or degraded write, */
|
||||
m.rqcount = plex->subdisks; /* all subdisks */
|
||||
else if (m.flags & XFR_NORMAL_WRITE) /* normal write, */
|
||||
m.rqcount = m.sdcount + 1; /* all data blocks and the parity block */
|
||||
else /* parityless write or normal read */
|
||||
m.rqcount = m.sdcount; /* just the data blocks */
|
||||
|
||||
/* Part C: build the requests */
|
||||
rqg = allocrqg(rq, m.rqcount); /* get a request group */
|
||||
if (rqg == NULL) { /* malloc failed */
|
||||
bp->b_error = ENOMEM;
|
||||
bp->b_ioflags |= BIO_ERROR;
|
||||
return REQUEST_ENOMEM;
|
||||
}
|
||||
rqg->plexno = plexno;
|
||||
rqg->flags = m.flags;
|
||||
rqno = 0; /* index in the request group */
|
||||
|
||||
/* 1: PARITY BLOCK */
|
||||
/*
|
||||
* Are we performing an operation which requires parity? In that case,
|
||||
* work out the parameters and define the parity block.
|
||||
* XFR_PARITYOP is XFR_NORMAL_WRITE | XFR_RECOVERY_READ | XFR_DEGRADED_WRITE
|
||||
*/
|
||||
if (m.flags & XFR_PARITYOP) { /* need parity */
|
||||
rqe = &rqg->rqe[rqno]; /* point to element */
|
||||
sd = &SD[plex->sdnos[m.psdno]]; /* the subdisk in question */
|
||||
rqe->rqg = rqg; /* point back to group */
|
||||
rqe->flags = (m.flags | XFR_PARITY_BLOCK | XFR_MALLOCED) /* always malloc parity block */
|
||||
&~(XFR_NORMAL_READ | XFR_PARITYLESS_WRITE); /* transfer flags without data op stuf */
|
||||
setrqebounds(rqe, &m); /* set up the bounds of the transfer */
|
||||
rqe->sdno = sd->sdno; /* subdisk number */
|
||||
rqe->driveno = sd->driveno;
|
||||
if (build_rq_buffer(rqe, plex)) /* build the buffer */
|
||||
return REQUEST_ENOMEM; /* can't do it */
|
||||
rqe->b.b_iocmd = BIO_READ; /* we must read first */
|
||||
m.sdcount++; /* adjust the subdisk count */
|
||||
rqno++; /* and point to the next request */
|
||||
}
|
||||
/*
|
||||
* 2: DATA BLOCKS
|
||||
* Now build up requests for the blocks required
|
||||
* for individual transfers
|
||||
*/
|
||||
for (mysdno = m.firstsdno; rqno < m.sdcount; mysdno++, rqno++) {
|
||||
if (mysdno == m.psdno) /* parity, */
|
||||
mysdno++; /* we've given already */
|
||||
if (mysdno == plex->subdisks) /* got to the end, */
|
||||
mysdno = 0; /* wrap around */
|
||||
if (mysdno == m.psdno) /* parity, */
|
||||
mysdno++; /* we've given already */
|
||||
|
||||
rqe = &rqg->rqe[rqno]; /* point to element */
|
||||
sd = &SD[plex->sdnos[mysdno]]; /* the subdisk in question */
|
||||
rqe->rqg = rqg; /* point to group */
|
||||
if (m.flags & XFR_NEEDS_MALLOC) /* we need a malloced buffer first */
|
||||
rqe->flags = m.flags | XFR_DATA_BLOCK | XFR_MALLOCED; /* transfer flags */
|
||||
else
|
||||
rqe->flags = m.flags | XFR_DATA_BLOCK; /* transfer flags */
|
||||
if (mysdno == m.badsdno) { /* this is the bad subdisk */
|
||||
rqg->badsdno = rqno; /* note which one */
|
||||
rqe->flags |= XFR_BAD_SUBDISK; /* note that it's dead */
|
||||
/*
|
||||
* we can't read or write from/to it,
|
||||
* but we don't need to malloc
|
||||
*/
|
||||
rqe->flags &= ~(XFR_MALLOCED | XFR_NORMAL_READ | XFR_NORMAL_WRITE);
|
||||
}
|
||||
setrqebounds(rqe, &m); /* set up the bounds of the transfer */
|
||||
rqe->useroffset = m.useroffset; /* offset in user buffer */
|
||||
rqe->sdno = sd->sdno; /* subdisk number */
|
||||
rqe->driveno = sd->driveno;
|
||||
if (build_rq_buffer(rqe, plex)) /* build the buffer */
|
||||
return REQUEST_ENOMEM; /* can't do it */
|
||||
if ((m.flags & XFR_PARITYOP) /* parity operation, */
|
||||
&&((m.flags & XFR_BAD_SUBDISK) == 0)) /* and not the bad subdisk, */
|
||||
rqe->b.b_iocmd = BIO_READ; /* we must read first */
|
||||
|
||||
/* Now update pointers for the next block */
|
||||
*diskaddr += m.datalen; /* skip past what we've done */
|
||||
m.stripesectors -= m.datalen; /* deduct from what's left */
|
||||
m.useroffset += m.datalen; /* and move on in the user buffer */
|
||||
m.datalen = min(m.stripesectors, plex->stripesize); /* and recalculate */
|
||||
m.dataoffset = 0; /* start at the beginning of next block */
|
||||
}
|
||||
|
||||
/*
|
||||
* 3: REMAINING BLOCKS FOR RECOVERY
|
||||
* Finally, if we have a recovery operation, build
|
||||
* up transfers for the other subdisks. Follow the
|
||||
* subdisks around until we get to where we started.
|
||||
* These requests use only the group parameters.
|
||||
*/
|
||||
if ((rqno < m.rqcount) /* haven't done them all already */
|
||||
&&(m.flags & (XFR_RECOVERY_READ | XFR_DEGRADED_WRITE))) {
|
||||
for (; rqno < m.rqcount; rqno++, mysdno++) {
|
||||
if (mysdno == m.psdno) /* parity, */
|
||||
mysdno++; /* we've given already */
|
||||
if (mysdno == plex->subdisks) /* got to the end, */
|
||||
mysdno = 0; /* wrap around */
|
||||
if (mysdno == m.psdno) /* parity, */
|
||||
mysdno++; /* we've given already */
|
||||
|
||||
rqe = &rqg->rqe[rqno]; /* point to element */
|
||||
sd = &SD[plex->sdnos[mysdno]]; /* the subdisk in question */
|
||||
rqe->rqg = rqg; /* point to group */
|
||||
|
||||
rqe->sdoffset = m.sdbase + m.groupoffset; /* start of transfer */
|
||||
rqe->dataoffset = 0; /* for tidiness' sake */
|
||||
rqe->groupoffset = 0; /* group starts at the beginining */
|
||||
rqe->datalen = 0;
|
||||
rqe->grouplen = m.grouplen;
|
||||
rqe->buflen = m.grouplen;
|
||||
rqe->flags = (m.flags | XFR_MALLOCED) /* transfer flags without data op stuf */
|
||||
&~XFR_DATAOP;
|
||||
rqe->sdno = sd->sdno; /* subdisk number */
|
||||
rqe->driveno = sd->driveno;
|
||||
if (build_rq_buffer(rqe, plex)) /* build the buffer */
|
||||
return REQUEST_ENOMEM; /* can't do it */
|
||||
rqe->b.b_iocmd = BIO_READ; /* we must read first */
|
||||
}
|
||||
}
|
||||
/*
|
||||
* We need to lock the address range before
|
||||
* doing anything. We don't have to be
|
||||
* performing a recovery operation: somebody
|
||||
* else could be doing so, and the results could
|
||||
* influence us. Note the fact here, we'll perform
|
||||
* the lock in launch_requests.
|
||||
*/
|
||||
rqg->lockbase = m.stripebase;
|
||||
if (*diskaddr < diskend) /* didn't finish the request on this stripe */
|
||||
plex->multistripe++; /* count another one */
|
||||
}
|
||||
return REQUEST_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Helper function for rqe5: adjust the bounds of
|
||||
* the transfers to minimize the buffer
|
||||
* allocation.
|
||||
*
|
||||
* Each request can handle two of three different
|
||||
* data ranges:
|
||||
*
|
||||
* 1. The range described by the parameters
|
||||
* dataoffset and datalen, for normal read or
|
||||
* parityless write.
|
||||
* 2. The range described by the parameters
|
||||
* groupoffset and grouplen, for recovery read
|
||||
* and degraded write.
|
||||
* 3. For normal write, the range depends on the
|
||||
* kind of block. For data blocks, the range
|
||||
* is defined by dataoffset and datalen. For
|
||||
* parity blocks, it is defined by writeoffset
|
||||
* and writelen.
|
||||
*
|
||||
* In order not to allocate more memory than
|
||||
* necessary, this function adjusts the bounds
|
||||
* parameter for each request to cover just the
|
||||
* minimum necessary for the function it performs.
|
||||
* This will normally vary from one request to the
|
||||
* next.
|
||||
*
|
||||
* Things are slightly different for the parity
|
||||
* block. In this case, the bounds defined by
|
||||
* mp->writeoffset and mp->writelen also play a
|
||||
* rôle. Select this case by setting the
|
||||
* parameter forparity != 0.
|
||||
*/
|
||||
void
|
||||
setrqebounds(struct rqelement *rqe, struct metrics *mp)
|
||||
{
|
||||
/* parity block of a normal write */
|
||||
if ((rqe->flags & (XFR_NORMAL_WRITE | XFR_PARITY_BLOCK))
|
||||
== (XFR_NORMAL_WRITE | XFR_PARITY_BLOCK)) { /* case 3 */
|
||||
if (rqe->flags & XFR_DEGRADED_WRITE) { /* also degraded write */
|
||||
/*
|
||||
* With a combined normal and degraded write, we
|
||||
* will zero out the area of the degraded write
|
||||
* in the second phase, so we don't need to read
|
||||
* it in. Unfortunately, we need a way to tell
|
||||
* build_request_buffer the size of the buffer,
|
||||
* and currently that's the length of the read.
|
||||
* As a result, we read everything, even the stuff
|
||||
* that we're going to nuke.
|
||||
* FIXME XXX
|
||||
*/
|
||||
if (mp->groupoffset < mp->writeoffset) { /* group operation starts lower */
|
||||
rqe->sdoffset = mp->sdbase + mp->groupoffset; /* start of transfer */
|
||||
rqe->dataoffset = mp->writeoffset - mp->groupoffset; /* data starts here */
|
||||
rqe->groupoffset = 0; /* and the group at the beginning */
|
||||
} else { /* individual data starts first */
|
||||
rqe->sdoffset = mp->sdbase + mp->writeoffset; /* start of transfer */
|
||||
rqe->dataoffset = 0; /* individual data starts at the beginning */
|
||||
rqe->groupoffset = mp->groupoffset - mp->writeoffset; /* group starts here */
|
||||
}
|
||||
rqe->datalen = mp->writelen;
|
||||
rqe->grouplen = mp->grouplen;
|
||||
} else { /* just normal write (case 3) */
|
||||
rqe->sdoffset = mp->sdbase + mp->writeoffset; /* start of transfer */
|
||||
rqe->dataoffset = 0; /* degradation starts at the beginning */
|
||||
rqe->groupoffset = 0; /* for tidiness' sake */
|
||||
rqe->datalen = mp->writelen;
|
||||
rqe->grouplen = 0;
|
||||
}
|
||||
} else if (rqe->flags & XFR_DATAOP) { /* data operation (case 1 or 3) */
|
||||
if (rqe->flags & XFR_GROUPOP) { /* also a group operation (case 2) */
|
||||
if (mp->groupoffset < mp->dataoffset) { /* group operation starts lower */
|
||||
rqe->sdoffset = mp->sdbase + mp->groupoffset; /* start of transfer */
|
||||
rqe->dataoffset = mp->dataoffset - mp->groupoffset; /* data starts here */
|
||||
rqe->groupoffset = 0; /* and the group at the beginning */
|
||||
} else { /* individual data starts first */
|
||||
rqe->sdoffset = mp->sdbase + mp->dataoffset; /* start of transfer */
|
||||
rqe->dataoffset = 0; /* individual data starts at the beginning */
|
||||
rqe->groupoffset = mp->groupoffset - mp->dataoffset; /* group starts here */
|
||||
}
|
||||
rqe->datalen = mp->datalen;
|
||||
rqe->grouplen = mp->grouplen;
|
||||
} else { /* just data operation (case 1) */
|
||||
rqe->sdoffset = mp->sdbase + mp->dataoffset; /* start of transfer */
|
||||
rqe->dataoffset = 0; /* degradation starts at the beginning */
|
||||
rqe->groupoffset = 0; /* for tidiness' sake */
|
||||
rqe->datalen = mp->datalen;
|
||||
rqe->grouplen = 0;
|
||||
}
|
||||
} else { /* just group operations (case 2) */
|
||||
rqe->sdoffset = mp->sdbase + mp->groupoffset; /* start of transfer */
|
||||
rqe->dataoffset = 0; /* for tidiness' sake */
|
||||
rqe->groupoffset = 0; /* group starts at the beginining */
|
||||
rqe->datalen = 0;
|
||||
rqe->grouplen = mp->grouplen;
|
||||
}
|
||||
rqe->buflen = max(rqe->dataoffset + rqe->datalen, /* total buffer length */
|
||||
rqe->groupoffset + rqe->grouplen);
|
||||
}
|
||||
/* Local Variables: */
|
||||
/* fill-column: 50 */
|
||||
/* End: */
|
File diff suppressed because it is too large
Load Diff
@ -1,620 +0,0 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998, 1999
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* Parts copyright (c) 1997, 1998 Cybernet Corporation, NetMAX project.
|
||||
*
|
||||
* Written by Greg Lehey
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: vinumrevive.c,v 1.19 2003/05/08 04:34:47 grog Exp grog $
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
|
||||
__FBSDID("$FreeBSD$");
|
||||
#include <dev/vinum/vinumhdr.h>
|
||||
#include <dev/vinum/request.h>
|
||||
|
||||
/*
|
||||
* Revive a block of a subdisk. Return an error
|
||||
* indication. EAGAIN means successful copy, but
|
||||
* that more blocks remain to be copied. EINVAL
|
||||
* means that the subdisk isn't associated with a
|
||||
* plex (which means a programming error if we get
|
||||
* here at all; FIXME).
|
||||
*/
|
||||
|
||||
int
|
||||
revive_block(int sdno)
|
||||
{
|
||||
int s; /* priority level */
|
||||
struct sd *sd;
|
||||
struct plex *plex;
|
||||
struct volume *vol;
|
||||
struct buf *bp;
|
||||
int error = EAGAIN;
|
||||
int size; /* size of revive block, bytes */
|
||||
daddr_t plexblkno; /* lblkno in plex */
|
||||
int psd; /* parity subdisk number */
|
||||
u_int64_t stripe; /* stripe number */
|
||||
int paritysd = 0; /* set if this is the parity stripe */
|
||||
struct rangelock *lock; /* for locking */
|
||||
daddr_t stripeoffset; /* offset in stripe */
|
||||
|
||||
plexblkno = 0; /* to keep the compiler happy */
|
||||
sd = &SD[sdno];
|
||||
lock = NULL;
|
||||
if (sd->plexno < 0) /* no plex? */
|
||||
return EINVAL;
|
||||
plex = &PLEX[sd->plexno]; /* point to plex */
|
||||
if (plex->volno >= 0)
|
||||
vol = &VOL[plex->volno];
|
||||
else
|
||||
vol = NULL;
|
||||
|
||||
if ((sd->revive_blocksize == 0) /* no block size */
|
||||
||(sd->revive_blocksize & ((1 << DEV_BSHIFT) - 1))) /* or invalid block size */
|
||||
sd->revive_blocksize = DEFAULT_REVIVE_BLOCKSIZE;
|
||||
else if (sd->revive_blocksize > MAX_REVIVE_BLOCKSIZE)
|
||||
sd->revive_blocksize = MAX_REVIVE_BLOCKSIZE;
|
||||
size = min(sd->revive_blocksize >> DEV_BSHIFT, sd->sectors - sd->revived) << DEV_BSHIFT;
|
||||
sd->reviver = curproc->p_pid; /* note who last had a bash at it */
|
||||
|
||||
/* Now decide where to read from */
|
||||
switch (plex->organization) {
|
||||
case plex_concat:
|
||||
plexblkno = sd->revived + sd->plexoffset; /* corresponding address in plex */
|
||||
break;
|
||||
|
||||
case plex_striped:
|
||||
stripeoffset = sd->revived % plex->stripesize; /* offset from beginning of stripe */
|
||||
if (stripeoffset + (size >> DEV_BSHIFT) > plex->stripesize)
|
||||
size = (plex->stripesize - stripeoffset) << DEV_BSHIFT;
|
||||
plexblkno = sd->plexoffset /* base */
|
||||
+ (sd->revived - stripeoffset) * plex->subdisks /* offset to beginning of stripe */
|
||||
+ stripeoffset; /* offset from beginning of stripe */
|
||||
break;
|
||||
|
||||
case plex_raid4:
|
||||
case plex_raid5:
|
||||
stripeoffset = sd->revived % plex->stripesize; /* offset from beginning of stripe */
|
||||
plexblkno = sd->plexoffset /* base */
|
||||
+ (sd->revived - stripeoffset) * (plex->subdisks - 1) /* offset to beginning of stripe */
|
||||
+stripeoffset; /* offset from beginning of stripe */
|
||||
stripe = (sd->revived / plex->stripesize); /* stripe number */
|
||||
|
||||
/* Make sure we don't go beyond the end of the band. */
|
||||
size = min(size, (plex->stripesize - stripeoffset) << DEV_BSHIFT);
|
||||
if (plex->organization == plex_raid4)
|
||||
psd = plex->subdisks - 1; /* parity subdisk for this stripe */
|
||||
else
|
||||
psd = plex->subdisks - 1 - stripe % plex->subdisks; /* parity subdisk for this stripe */
|
||||
paritysd = plex->sdnos[psd] == sdno; /* note if it's the parity subdisk */
|
||||
|
||||
/*
|
||||
* Now adjust for the strangenesses
|
||||
* in RAID-4 and RAID-5 striping.
|
||||
*/
|
||||
if (sd->plexsdno > psd) /* beyond the parity stripe, */
|
||||
plexblkno -= plex->stripesize; /* one stripe less */
|
||||
else if (paritysd)
|
||||
plexblkno -= plex->stripesize * sd->plexsdno; /* go back to the beginning of the band */
|
||||
break;
|
||||
|
||||
case plex_disorg: /* to keep the compiler happy */
|
||||
break; /* to keep the pedants happy */
|
||||
}
|
||||
|
||||
if (paritysd) { /* we're reviving a parity block, */
|
||||
bp = parityrebuild(plex, sd->revived, size, rebuildparity, &lock, NULL); /* do the grunt work */
|
||||
if (bp == NULL) /* no buffer space */
|
||||
return ENOMEM; /* chicken out */
|
||||
} else { /* data block */
|
||||
s = splbio();
|
||||
bp = geteblk(size); /* Get a buffer */
|
||||
splx(s);
|
||||
if (bp == NULL)
|
||||
return ENOMEM;
|
||||
|
||||
/*
|
||||
* Amount to transfer: block size, unless it
|
||||
* would overlap the end.
|
||||
*/
|
||||
bp->b_bcount = size;
|
||||
bp->b_resid = bp->b_bcount;
|
||||
bp->b_blkno = plexblkno; /* start here */
|
||||
if (isstriped(plex)) /* we need to lock striped plexes */
|
||||
lock = lockrange(plexblkno << DEV_BSHIFT, bp, plex); /* lock it */
|
||||
if (vol != NULL) /* it's part of a volume, */
|
||||
/*
|
||||
* First, read the data from the volume. We
|
||||
* don't care which plex, that's bre's job.
|
||||
*/
|
||||
bp->b_dev = VOL[plex->volno].dev; /* create the device number */
|
||||
else /* it's an unattached plex */
|
||||
bp->b_dev = PLEX[sd->plexno].dev; /* create the device number */
|
||||
|
||||
bp->b_iocmd = BIO_READ; /* either way, read it */
|
||||
bp->b_flags = 0;
|
||||
vinumstart(bp, 1);
|
||||
bufwait(bp);
|
||||
}
|
||||
|
||||
if (bp->b_ioflags & BIO_ERROR) {
|
||||
error = bp->b_error;
|
||||
if (lock) /* we took a lock, */
|
||||
unlockrange(sd->plexno, lock); /* give it back */
|
||||
} else
|
||||
/* Now write to the subdisk */
|
||||
{
|
||||
bp->b_dev = SD[sdno].dev; /* create the device number */
|
||||
bp->b_flags &= ~B_DONE; /* no longer done */
|
||||
bp->b_ioflags = 0;
|
||||
bp->b_iocmd = BIO_WRITE;
|
||||
bp->b_resid = bp->b_bcount;
|
||||
bp->b_blkno = sd->revived; /* write it to here */
|
||||
sdio(bp); /* perform the I/O */
|
||||
bufwait(bp);
|
||||
if (bp->b_ioflags & BIO_ERROR)
|
||||
error = bp->b_error;
|
||||
else {
|
||||
sd->revived += bp->b_bcount >> DEV_BSHIFT; /* moved this much further down */
|
||||
if (sd->revived >= sd->sectors) { /* finished */
|
||||
sd->revived = 0;
|
||||
set_sd_state(sdno, sd_up, setstate_force); /* bring the sd up */
|
||||
log(LOG_INFO, "vinum: %s is %s\n", sd->name, sd_state(sd->state));
|
||||
save_config(); /* and save the updated configuration */
|
||||
error = 0; /* we're done */
|
||||
}
|
||||
}
|
||||
if (lock) /* we took a lock, */
|
||||
unlockrange(sd->plexno, lock); /* give it back */
|
||||
while (sd->waitlist) { /* we have waiting requests */
|
||||
#ifdef VINUMDEBUG
|
||||
struct request *rq = sd->waitlist;
|
||||
|
||||
if (debug & DEBUG_REVIVECONFLICT)
|
||||
log(LOG_DEBUG,
|
||||
"Relaunch revive conflict sd %d: %p\n%s dev %d.%d, offset 0x%jx, length %ld\n",
|
||||
rq->sdno,
|
||||
rq,
|
||||
rq->bp->b_iocmd == BIO_READ ? "Read" : "Write",
|
||||
major(rq->bp->b_dev),
|
||||
minor(rq->bp->b_dev),
|
||||
(intmax_t) rq->bp->b_blkno,
|
||||
rq->bp->b_bcount);
|
||||
#endif
|
||||
launch_requests(sd->waitlist, 1); /* do them now */
|
||||
sd->waitlist = sd->waitlist->next; /* and move on to the next */
|
||||
}
|
||||
}
|
||||
if (bp->b_qindex == 0) { /* not on a queue, */
|
||||
bp->b_flags |= B_INVAL;
|
||||
bp->b_ioflags &= ~BIO_ERROR;
|
||||
brelse(bp); /* is this kosher? */
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check or rebuild the parity blocks of a RAID-4
|
||||
* or RAID-5 plex.
|
||||
*
|
||||
* The variables plex->checkblock and
|
||||
* plex->rebuildblock represent the
|
||||
* subdisk-relative address of the stripe we're
|
||||
* looking at, not the plex-relative address. We
|
||||
* store it in the plex and not as a local
|
||||
* variable because this function could be
|
||||
* stopped, and we don't want to repeat the part
|
||||
* we've already done. This is also the reason
|
||||
* why we don't initialize it here except at the
|
||||
* end. It gets initialized with the plex on
|
||||
* creation.
|
||||
*
|
||||
* Each call to this function processes at most
|
||||
* one stripe. We can't loop in this function,
|
||||
* because we're unstoppable, so we have to be
|
||||
* called repeatedly from userland.
|
||||
*/
|
||||
void
|
||||
parityops(struct vinum_ioctl_msg *data)
|
||||
{
|
||||
int plexno;
|
||||
struct plex *plex;
|
||||
int size; /* I/O transfer size, bytes */
|
||||
int stripe; /* stripe number in plex */
|
||||
int psd; /* parity subdisk number */
|
||||
struct rangelock *lock; /* lock on stripe */
|
||||
struct _ioctl_reply *reply;
|
||||
off_t pstripe; /* pointer to our stripe counter */
|
||||
struct buf *pbp;
|
||||
off_t errorloc; /* offset of parity error */
|
||||
enum parityop op; /* operation to perform */
|
||||
|
||||
plexno = data->index;
|
||||
op = data->op;
|
||||
pbp = NULL;
|
||||
reply = (struct _ioctl_reply *) data;
|
||||
reply->error = EAGAIN; /* expect to repeat this call */
|
||||
plex = &PLEX[plexno];
|
||||
if (!isparity(plex)) { /* not RAID-4 or RAID-5 */
|
||||
reply->error = EINVAL;
|
||||
return;
|
||||
} else if (plex->state < plex_flaky) {
|
||||
reply->error = EIO;
|
||||
strcpy(reply->msg, "Plex is not completely accessible\n");
|
||||
return;
|
||||
}
|
||||
pstripe = data->offset;
|
||||
stripe = pstripe / plex->stripesize; /* stripe number */
|
||||
psd = plex->subdisks - 1 - stripe % plex->subdisks; /* parity subdisk for this stripe */
|
||||
size = min(DEFAULT_REVIVE_BLOCKSIZE, /* one block at a time */
|
||||
plex->stripesize << DEV_BSHIFT);
|
||||
|
||||
pbp = parityrebuild(plex, pstripe, size, op, &lock, &errorloc); /* do the grunt work */
|
||||
if (pbp == NULL) { /* no buffer space */
|
||||
reply->error = ENOMEM;
|
||||
return; /* chicken out */
|
||||
}
|
||||
/*
|
||||
* Now we have a result in the data buffer of
|
||||
* the parity buffer header, which we have kept.
|
||||
* Decide what to do with it.
|
||||
*/
|
||||
reply->msg[0] = '\0'; /* until shown otherwise */
|
||||
if ((pbp->b_ioflags & BIO_ERROR) == 0) { /* no error */
|
||||
if ((op == rebuildparity)
|
||||
|| (op == rebuildandcheckparity)) {
|
||||
pbp->b_iocmd = BIO_WRITE;
|
||||
pbp->b_resid = pbp->b_bcount;
|
||||
sdio(pbp); /* write the parity block */
|
||||
bufwait(pbp);
|
||||
}
|
||||
if (((op == checkparity)
|
||||
|| (op == rebuildandcheckparity))
|
||||
&& (errorloc != -1)) {
|
||||
if (op == checkparity)
|
||||
reply->error = EIO;
|
||||
sprintf(reply->msg,
|
||||
"Parity incorrect at offset 0x%jx\n",
|
||||
(intmax_t) errorloc);
|
||||
}
|
||||
if (reply->error == EAGAIN) { /* still OK, */
|
||||
plex->checkblock = pstripe + (pbp->b_bcount >> DEV_BSHIFT); /* moved this much further down */
|
||||
if (plex->checkblock >= SD[plex->sdnos[0]].sectors) { /* finished */
|
||||
plex->checkblock = 0;
|
||||
reply->error = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (pbp->b_ioflags & BIO_ERROR)
|
||||
reply->error = pbp->b_error;
|
||||
pbp->b_flags |= B_INVAL;
|
||||
pbp->b_ioflags &= ~BIO_ERROR;
|
||||
brelse(pbp);
|
||||
unlockrange(plexno, lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Rebuild a parity stripe. Return pointer to
|
||||
* parity bp. On return,
|
||||
*
|
||||
* 1. The band is locked. The caller must unlock
|
||||
* the band and release the buffer header.
|
||||
*
|
||||
* 2. All buffer headers except php have been
|
||||
* released. The caller must release pbp.
|
||||
*
|
||||
* 3. For checkparity and rebuildandcheckparity,
|
||||
* the parity is compared with the current
|
||||
* parity block. If it's different, the
|
||||
* offset of the error is returned to
|
||||
* errorloc. The caller can set the value of
|
||||
* the pointer to NULL if this is called for
|
||||
* rebuilding parity.
|
||||
*
|
||||
* pstripe is the subdisk-relative base address of
|
||||
* the data to be reconstructed, size is the size
|
||||
* of the transfer in bytes.
|
||||
*/
|
||||
struct buf *
|
||||
parityrebuild(struct plex *plex,
|
||||
u_int64_t pstripe,
|
||||
int size,
|
||||
enum parityop op,
|
||||
struct rangelock **lockp,
|
||||
off_t * errorloc)
|
||||
{
|
||||
int error;
|
||||
int s;
|
||||
int sdno;
|
||||
u_int64_t stripe; /* stripe number */
|
||||
int *parity_buf; /* buffer address for current parity block */
|
||||
int *newparity_buf; /* and for new parity block */
|
||||
int mysize; /* I/O transfer size for this transfer */
|
||||
int isize; /* mysize in ints */
|
||||
int i;
|
||||
int psd; /* parity subdisk number */
|
||||
int newpsd; /* and "subdisk number" of new parity */
|
||||
struct buf **bpp; /* pointers to our bps */
|
||||
struct buf *pbp; /* buffer header for parity stripe */
|
||||
int *sbuf;
|
||||
int bufcount; /* number of buffers we need */
|
||||
|
||||
stripe = pstripe / plex->stripesize; /* stripe number */
|
||||
psd = plex->subdisks - 1 - stripe % plex->subdisks; /* parity subdisk for this stripe */
|
||||
parity_buf = NULL; /* to keep the compiler happy */
|
||||
error = 0;
|
||||
|
||||
/*
|
||||
* It's possible that the default transfer size
|
||||
* we chose is not a factor of the stripe size.
|
||||
* We *must* limit this operation to a single
|
||||
* stripe, at least for RAID-5 rebuild, since
|
||||
* the parity subdisk changes between stripes,
|
||||
* so in this case we need to perform a short
|
||||
* transfer. Set variable mysize to reflect
|
||||
* this.
|
||||
*/
|
||||
mysize = min(size, (plex->stripesize * (stripe + 1) - pstripe) << DEV_BSHIFT);
|
||||
isize = mysize / (sizeof(int)); /* number of ints in the buffer */
|
||||
bufcount = plex->subdisks + 1; /* sd buffers plus result buffer */
|
||||
newpsd = plex->subdisks;
|
||||
bpp = (struct buf **) Malloc(bufcount * sizeof(struct buf *)); /* array of pointers to bps */
|
||||
|
||||
/* First, build requests for all subdisks */
|
||||
for (sdno = 0; sdno < bufcount; sdno++) { /* for each subdisk */
|
||||
if ((sdno != psd) || (op != rebuildparity)) {
|
||||
/* Get a buffer header and initialize it. */
|
||||
s = splbio();
|
||||
bpp[sdno] = geteblk(mysize); /* Get a buffer */
|
||||
if (bpp[sdno] == NULL) {
|
||||
while (sdno-- > 0) { /* release the ones we got */
|
||||
bpp[sdno]->b_flags |= B_INVAL;
|
||||
brelse(bpp[sdno]); /* give back our resources */
|
||||
}
|
||||
splx(s);
|
||||
printf("vinum: can't allocate buffer space for parity op.\n");
|
||||
return NULL; /* no bpps */
|
||||
}
|
||||
splx(s);
|
||||
if (sdno == psd)
|
||||
parity_buf = (int *) bpp[sdno]->b_data;
|
||||
if (sdno == newpsd) /* the new one? */
|
||||
bpp[sdno]->b_dev = SD[plex->sdnos[psd]].dev; /* write back to the parity SD */
|
||||
else
|
||||
bpp[sdno]->b_dev = SD[plex->sdnos[sdno]].dev; /* device number */
|
||||
bpp[sdno]->b_iocmd = BIO_READ; /* either way, read it */
|
||||
bpp[sdno]->b_flags = 0;
|
||||
bpp[sdno]->b_bcount = mysize;
|
||||
bpp[sdno]->b_resid = bpp[sdno]->b_bcount;
|
||||
bpp[sdno]->b_blkno = pstripe; /* transfer from here */
|
||||
}
|
||||
}
|
||||
|
||||
/* Initialize result buffer */
|
||||
pbp = bpp[newpsd];
|
||||
newparity_buf = (int *) bpp[newpsd]->b_data;
|
||||
bzero(newparity_buf, mysize);
|
||||
|
||||
/*
|
||||
* Now lock the stripe with the first non-parity
|
||||
* bp as locking bp.
|
||||
*/
|
||||
*lockp = lockrange(pstripe * plex->stripesize * (plex->subdisks - 1),
|
||||
bpp[psd ? 0 : 1],
|
||||
plex);
|
||||
|
||||
/*
|
||||
* Then issue requests for all subdisks in
|
||||
* parallel. Don't transfer the parity stripe
|
||||
* if we're rebuilding parity, unless we also
|
||||
* want to check it.
|
||||
*/
|
||||
for (sdno = 0; sdno < plex->subdisks; sdno++) { /* for each real subdisk */
|
||||
if ((sdno != psd) || (op != rebuildparity)) {
|
||||
sdio(bpp[sdno]);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Next, wait for the requests to complete.
|
||||
* We wait in the order in which they were
|
||||
* issued, which isn't necessarily the order in
|
||||
* which they complete, but we don't have a
|
||||
* convenient way of doing the latter, and the
|
||||
* delay is minimal.
|
||||
*/
|
||||
for (sdno = 0; sdno < plex->subdisks; sdno++) { /* for each subdisk */
|
||||
if ((sdno != psd) || (op != rebuildparity)) {
|
||||
bufwait(bpp[sdno]);
|
||||
if (bpp[sdno]->b_ioflags & BIO_ERROR) /* can't read, */
|
||||
error = bpp[sdno]->b_error;
|
||||
else if (sdno != psd) { /* update parity */
|
||||
sbuf = (int *) bpp[sdno]->b_data;
|
||||
for (i = 0; i < isize; i++)
|
||||
((int *) newparity_buf)[i] ^= sbuf[i]; /* xor in the buffer */
|
||||
}
|
||||
}
|
||||
if (sdno != psd) { /* release all bps except parity */
|
||||
bpp[sdno]->b_flags |= B_INVAL;
|
||||
brelse(bpp[sdno]); /* give back our resources */
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If we're checking, compare the calculated
|
||||
* and the read parity block. If they're
|
||||
* different, return the plex-relative offset;
|
||||
* otherwise return -1.
|
||||
*/
|
||||
if ((op == checkparity)
|
||||
|| (op == rebuildandcheckparity)) {
|
||||
*errorloc = -1; /* no error yet */
|
||||
for (i = 0; i < isize; i++) {
|
||||
if (parity_buf[i] != newparity_buf[i]) {
|
||||
*errorloc = (off_t) (pstripe << DEV_BSHIFT) * (plex->subdisks - 1)
|
||||
+ i * sizeof(int);
|
||||
break;
|
||||
}
|
||||
}
|
||||
bpp[psd]->b_flags |= B_INVAL;
|
||||
brelse(bpp[psd]); /* give back our resources */
|
||||
}
|
||||
/* release our resources */
|
||||
Free(bpp);
|
||||
if (error) {
|
||||
pbp->b_ioflags |= BIO_ERROR;
|
||||
pbp->b_error = error;
|
||||
}
|
||||
return pbp;
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize a subdisk by writing zeroes to the
|
||||
* complete address space. If verify is set,
|
||||
* check each transfer for correctness.
|
||||
*
|
||||
* Each call to this function writes (and maybe
|
||||
* checks) a single block.
|
||||
*/
|
||||
int
|
||||
initsd(int sdno, int verify)
|
||||
{
|
||||
int s; /* priority level */
|
||||
struct sd *sd;
|
||||
struct plex *plex;
|
||||
struct volume *vol;
|
||||
struct buf *bp;
|
||||
int error;
|
||||
int size; /* size of init block, bytes */
|
||||
daddr_t plexblkno; /* lblkno in plex */
|
||||
int verified; /* set when we're happy with what we wrote */
|
||||
|
||||
error = 0;
|
||||
plexblkno = 0; /* to keep the compiler happy */
|
||||
sd = &SD[sdno];
|
||||
if (sd->plexno < 0) /* no plex? */
|
||||
return EINVAL;
|
||||
plex = &PLEX[sd->plexno]; /* point to plex */
|
||||
if (plex->volno >= 0)
|
||||
vol = &VOL[plex->volno];
|
||||
else
|
||||
vol = NULL;
|
||||
|
||||
if (sd->init_blocksize == 0) {
|
||||
sd->init_blocksize = DEFAULT_REVIVE_BLOCKSIZE;
|
||||
} else if (sd->init_blocksize > MAX_REVIVE_BLOCKSIZE)
|
||||
sd->init_blocksize = MAX_REVIVE_BLOCKSIZE;
|
||||
|
||||
size = min(sd->init_blocksize >> DEV_BSHIFT, sd->sectors - sd->initialized) << DEV_BSHIFT;
|
||||
|
||||
verified = 0;
|
||||
while (!verified) { /* until we're happy with it, */
|
||||
s = splbio();
|
||||
bp = geteblk(size); /* Get a buffer */
|
||||
splx(s);
|
||||
if (bp == NULL)
|
||||
return ENOMEM;
|
||||
|
||||
bp->b_bcount = size;
|
||||
bp->b_resid = bp->b_bcount;
|
||||
bp->b_blkno = sd->initialized; /* write it to here */
|
||||
bzero(bp->b_data, bp->b_bcount);
|
||||
bp->b_dev = SD[sdno].dev; /* create the device number */
|
||||
bp->b_iocmd = BIO_WRITE;
|
||||
sdio(bp); /* perform the I/O */
|
||||
bufwait(bp);
|
||||
if (bp->b_ioflags & BIO_ERROR)
|
||||
error = bp->b_error;
|
||||
if (bp->b_qindex == 0) { /* not on a queue, */
|
||||
bp->b_flags |= B_INVAL;
|
||||
bp->b_ioflags &= ~BIO_ERROR;
|
||||
brelse(bp); /* is this kosher? */
|
||||
}
|
||||
if ((error == 0) && verify) { /* check that it got there */
|
||||
s = splbio();
|
||||
bp = geteblk(size); /* get a buffer */
|
||||
if (bp == NULL) {
|
||||
splx(s);
|
||||
error = ENOMEM;
|
||||
} else {
|
||||
bp->b_bcount = size;
|
||||
bp->b_resid = bp->b_bcount;
|
||||
bp->b_blkno = sd->initialized; /* read from here */
|
||||
bp->b_dev = SD[sdno].dev; /* create the device number */
|
||||
bp->b_iocmd = BIO_READ; /* read it back */
|
||||
splx(s);
|
||||
sdio(bp);
|
||||
bufwait(bp);
|
||||
/*
|
||||
* XXX Bug fix code. This is hopefully no
|
||||
* longer needed (21 February 2000).
|
||||
*/
|
||||
if (bp->b_ioflags & BIO_ERROR)
|
||||
error = bp->b_error;
|
||||
else if ((*bp->b_data != 0) /* first word spammed */
|
||||
||(bcmp(bp->b_data, &bp->b_data[1], bp->b_bcount - 1))) { /* or one of the others */
|
||||
printf("vinum: init error on %s, offset 0x%llx sectors\n",
|
||||
sd->name,
|
||||
(long long) sd->initialized);
|
||||
verified = 0;
|
||||
} else
|
||||
verified = 1;
|
||||
if (bp->b_qindex == 0) { /* not on a queue, */
|
||||
bp->b_flags |= B_INVAL;
|
||||
bp->b_ioflags &= ~BIO_ERROR;
|
||||
brelse(bp); /* is this kosher? */
|
||||
}
|
||||
}
|
||||
} else
|
||||
verified = 1;
|
||||
}
|
||||
if (error == 0) { /* did it, */
|
||||
sd->initialized += size >> DEV_BSHIFT; /* moved this much further down */
|
||||
if (sd->initialized >= sd->sectors) { /* finished */
|
||||
sd->initialized = 0;
|
||||
set_sd_state(sdno, sd_initialized, setstate_force); /* bring the sd up */
|
||||
log(LOG_INFO, "vinum: %s is %s\n", sd->name, sd_state(sd->state));
|
||||
save_config(); /* and save the updated configuration */
|
||||
} else /* more to go, */
|
||||
error = EAGAIN; /* ya'll come back, see? */
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
/* Local Variables: */
|
||||
/* fill-column: 50 */
|
||||
/* End: */
|
File diff suppressed because it is too large
Load Diff
@ -1,257 +0,0 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file gets read by makestatetext to create text files
|
||||
* with the names of the states, so don't change the file
|
||||
* format
|
||||
*/
|
||||
|
||||
enum volumestate {
|
||||
volume_unallocated,
|
||||
/* present but unused. Must be 0 */
|
||||
|
||||
volume_uninit,
|
||||
/* mentioned elsewhere but not known to the configuration */
|
||||
|
||||
volume_down,
|
||||
|
||||
/* The volume is up and functional, but not all plexes may be available */
|
||||
volume_up,
|
||||
volume_laststate = volume_up /* last value, for table dimensions */
|
||||
};
|
||||
|
||||
enum plexstate {
|
||||
/* An empty entry, not a plex at all. */
|
||||
plex_unallocated,
|
||||
|
||||
/* The plex has been referenced by a volume */
|
||||
plex_referenced,
|
||||
/*
|
||||
* The plex has been allocated, but there configuration
|
||||
* is not complete
|
||||
*/
|
||||
plex_init,
|
||||
|
||||
/*
|
||||
* A plex which has gone completely down because of
|
||||
* I/O errors.
|
||||
*/
|
||||
plex_faulty,
|
||||
|
||||
/*
|
||||
* A plex which has been taken down by the
|
||||
* administrator.
|
||||
*/
|
||||
plex_down,
|
||||
|
||||
/* A plex which is being initialized */
|
||||
plex_initializing,
|
||||
|
||||
/*
|
||||
* *** The remaining states represent plexes which are
|
||||
* at least partially up. Keep these separate so that
|
||||
* they can be checked more easily.
|
||||
*/
|
||||
|
||||
/*
|
||||
* A plex entry which is at least partially up. Not
|
||||
* all subdisks are available, and an inconsistency
|
||||
* has occurred. If no other plex is uncorrupted,
|
||||
* the volume is no longer consistent.
|
||||
*/
|
||||
plex_corrupt,
|
||||
|
||||
plex_firstup = plex_corrupt, /* first "up" state */
|
||||
|
||||
/*
|
||||
* A RAID-5 plex entry which is accessible, but one
|
||||
* subdisk is down, requiring recovery for many
|
||||
* I/O requests.
|
||||
*/
|
||||
plex_degraded,
|
||||
|
||||
/*
|
||||
* A plex which is really up, but which has a reborn
|
||||
* subdisk which we don't completely trust, and
|
||||
* which we don't want to read if we can avoid it
|
||||
*/
|
||||
plex_flaky,
|
||||
|
||||
/*
|
||||
* A plex entry which is completely up. All subdisks
|
||||
* are up.
|
||||
*/
|
||||
plex_up,
|
||||
|
||||
plex_laststate = plex_up /* last value, for table dimensions */
|
||||
};
|
||||
|
||||
/* subdisk states */
|
||||
enum sdstate {
|
||||
/* An empty entry, not a subdisk at all. */
|
||||
sd_unallocated,
|
||||
|
||||
/*
|
||||
* A subdisk entry which has not been created
|
||||
* completely. Some fields may be empty.
|
||||
*/
|
||||
sd_uninit,
|
||||
|
||||
/* The subdisk has been referenced by a plex */
|
||||
sd_referenced,
|
||||
|
||||
/*
|
||||
* A subdisk entry which has been created completely.
|
||||
* All fields are correct, but the disk hasn't
|
||||
* been updated.
|
||||
*/
|
||||
sd_init,
|
||||
|
||||
/*
|
||||
* A subdisk entry which has been created completely.
|
||||
* All fields are correct, and the disk has been
|
||||
* updated, but there is no data on the disk.
|
||||
*/
|
||||
sd_empty,
|
||||
|
||||
/*
|
||||
* A subdisk entry which has been created completely and
|
||||
* which is currently being initialized
|
||||
*/
|
||||
sd_initializing,
|
||||
|
||||
/*
|
||||
* A subdisk entry which has been initialized,
|
||||
* but which can't come up because it would
|
||||
* cause inconsistencies.
|
||||
*/
|
||||
sd_initialized,
|
||||
|
||||
/* *** The following states represent invalid data */
|
||||
/*
|
||||
* A subdisk entry which has been created completely.
|
||||
* All fields are correct, the config on disk has been
|
||||
* updated, and the data was valid, but since then the
|
||||
* drive has been taken down, and as a result updates
|
||||
* have been missed.
|
||||
*/
|
||||
sd_obsolete,
|
||||
|
||||
/*
|
||||
* A subdisk entry which has been created completely.
|
||||
* All fields are correct, the disk has been updated,
|
||||
* and the data was valid, but since then the drive
|
||||
* has been crashed and updates have been lost.
|
||||
*/
|
||||
sd_stale,
|
||||
|
||||
/* *** The following states represent valid, inaccessible data */
|
||||
|
||||
/*
|
||||
* A subdisk entry which has been created completely.
|
||||
* All fields are correct, the disk has been updated,
|
||||
* and the data was valid, but since then the drive
|
||||
* has gone down. No attempt has been made to write
|
||||
* to the subdisk since the crash, so the data is valid.
|
||||
*/
|
||||
sd_crashed,
|
||||
|
||||
/*
|
||||
* A subdisk entry which was up, which contained
|
||||
* valid data, and which was taken down by the
|
||||
* administrator. The data is valid.
|
||||
*/
|
||||
sd_down,
|
||||
|
||||
/*
|
||||
* *** This is invalid data (the subdisk previously had
|
||||
* a numerically lower state), but it is currently in the
|
||||
* process of being revived. We can write but not read.
|
||||
*/
|
||||
sd_reviving,
|
||||
|
||||
/*
|
||||
* *** The following states represent accessible subdisks
|
||||
* with valid data
|
||||
*/
|
||||
|
||||
/*
|
||||
* A subdisk entry which has been created completely.
|
||||
* All fields are correct, the disk has been updated,
|
||||
* and the data was valid, but since then the drive
|
||||
* has gone down and up again. No updates were lost,
|
||||
* but it is possible that the subdisk has been
|
||||
* damaged. We won't read from this subdisk if we
|
||||
* have a choice. If this is the only subdisk which
|
||||
* covers this address space in the plex, we set its
|
||||
* state to sd_up under these circumstances, so this
|
||||
* status implies that there is another subdisk to
|
||||
* fulfil the request.
|
||||
*/
|
||||
sd_reborn,
|
||||
|
||||
/*
|
||||
* A subdisk entry which has been created completely.
|
||||
* All fields are correct, the disk has been updated,
|
||||
* and the data is valid.
|
||||
*/
|
||||
sd_up,
|
||||
|
||||
sd_laststate = sd_up /* last value, for table dimensions */
|
||||
};
|
||||
|
||||
enum drivestate {
|
||||
drive_unallocated,
|
||||
/* present but unused. Must be 0 */
|
||||
|
||||
drive_referenced,
|
||||
/* just mentioned in some other config entry */
|
||||
|
||||
drive_down,
|
||||
/* not accessible */
|
||||
|
||||
drive_up,
|
||||
/* up and running */
|
||||
|
||||
drive_laststate = drive_up /* last value, for table dimensions */
|
||||
};
|
||||
|
||||
/* Local Variables: */
|
||||
/* fill-column: 50 */
|
||||
/* End: */
|
@ -1,311 +0,0 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998, 1999
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* Written by Greg Lehey
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: vinumutil.c,v 1.17 2003/04/28 02:54:43 grog Exp $
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
/* This file contains utility routines used both in kernel and user context */
|
||||
|
||||
#include <dev/vinum/vinumhdr.h>
|
||||
#include <dev/vinum/statetexts.h>
|
||||
#ifndef _KERNEL
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
extern jmp_buf command_fail; /* return on a failed command */
|
||||
#endif
|
||||
|
||||
static char numeric_state[32]; /* temporary buffer for ASCII conversions */
|
||||
#define STATECOUNT(x) (sizeof (x##statetext) / sizeof (char *))
|
||||
/* Return drive state as a string */
|
||||
char *
|
||||
drive_state(enum drivestate state)
|
||||
{
|
||||
if (((unsigned) state) >= STATECOUNT(drive)) {
|
||||
sprintf(numeric_state, "Invalid state %d", (int) state);
|
||||
return numeric_state;
|
||||
} else
|
||||
return drivestatetext[state];
|
||||
}
|
||||
|
||||
/* Return volume state as a string */
|
||||
char *
|
||||
volume_state(enum volumestate state)
|
||||
{
|
||||
if (((unsigned) state) >= STATECOUNT(vol)) {
|
||||
sprintf(numeric_state, "Invalid state %d", (int) state);
|
||||
return numeric_state;
|
||||
} else
|
||||
return volstatetext[state];
|
||||
}
|
||||
|
||||
/* Return plex state as a string */
|
||||
char *
|
||||
plex_state(enum plexstate state)
|
||||
{
|
||||
if (((unsigned) state) >= STATECOUNT(plex)) {
|
||||
sprintf(numeric_state, "Invalid state %d", (int) state);
|
||||
return numeric_state;
|
||||
} else
|
||||
return plexstatetext[state];
|
||||
}
|
||||
|
||||
/* Return plex organization as a string */
|
||||
char *
|
||||
plex_org(enum plexorg org)
|
||||
{
|
||||
switch (org) {
|
||||
case plex_disorg: /* disorganized */
|
||||
return "disorg";
|
||||
break;
|
||||
|
||||
case plex_concat: /* concatenated plex */
|
||||
return "concat";
|
||||
break;
|
||||
|
||||
case plex_striped: /* striped plex */
|
||||
return "striped";
|
||||
break;
|
||||
|
||||
case plex_raid4: /* RAID-4 plex */
|
||||
return "raid4";
|
||||
|
||||
case plex_raid5: /* RAID-5 plex */
|
||||
return "raid5";
|
||||
break;
|
||||
|
||||
default:
|
||||
sprintf(numeric_state, "Invalid org %d", (int) org);
|
||||
return numeric_state;
|
||||
}
|
||||
}
|
||||
|
||||
/* Return sd state as a string */
|
||||
char *
|
||||
sd_state(enum sdstate state)
|
||||
{
|
||||
if (((unsigned) state) >= STATECOUNT(sd)) {
|
||||
sprintf(numeric_state, "Invalid state %d", (int) state);
|
||||
return numeric_state;
|
||||
} else
|
||||
return sdstatetext[state];
|
||||
}
|
||||
|
||||
/* Now convert in the other direction */
|
||||
/*
|
||||
* These are currently used only internally,
|
||||
* so we don't do too much error checking
|
||||
*/
|
||||
enum drivestate
|
||||
DriveState(char *text)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < STATECOUNT(drive); i++)
|
||||
if (strcmp(text, drivestatetext[i]) == 0) /* found it */
|
||||
return (enum drivestate) i;
|
||||
return -1;
|
||||
}
|
||||
|
||||
enum sdstate
|
||||
SdState(char *text)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < STATECOUNT(sd); i++)
|
||||
if (strcmp(text, sdstatetext[i]) == 0) /* found it */
|
||||
return (enum sdstate) i;
|
||||
return -1;
|
||||
}
|
||||
|
||||
enum plexstate
|
||||
PlexState(char *text)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < STATECOUNT(plex); i++)
|
||||
if (strcmp(text, plexstatetext[i]) == 0) /* found it */
|
||||
return (enum plexstate) i;
|
||||
return -1;
|
||||
}
|
||||
|
||||
enum volumestate
|
||||
VolState(char *text)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < STATECOUNT(vol); i++)
|
||||
if (strcmp(text, volstatetext[i]) == 0) /* found it */
|
||||
return (enum volumestate) i;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Take a number with an optional scale factor and convert
|
||||
* it to a number of bytes.
|
||||
*
|
||||
* The scale factors are:
|
||||
*
|
||||
* s sectors (of 512 bytes)
|
||||
* b blocks (of 512 bytes). This unit is deprecated,
|
||||
* because it's confusing, but maintained to avoid
|
||||
* confusing Veritas users.
|
||||
* k kilobytes (1024 bytes)
|
||||
* m megabytes (of 1024 * 1024 bytes)
|
||||
* g gigabytes (of 1024 * 1024 * 1024 bytes)
|
||||
*/
|
||||
u_int64_t
|
||||
sizespec(char *spec)
|
||||
{
|
||||
u_int64_t size;
|
||||
char *s;
|
||||
int sign = 1; /* -1 if negative */
|
||||
|
||||
size = 0;
|
||||
if (spec != NULL) { /* we have a parameter */
|
||||
s = spec;
|
||||
if (*s == '-') { /* negative, */
|
||||
sign = -1;
|
||||
s++; /* skip */
|
||||
}
|
||||
if ((*s >= '0') && (*s <= '9')) { /* it's numeric */
|
||||
while ((*s >= '0') && (*s <= '9')) /* it's numeric */
|
||||
size = size * 10 + *s++ - '0'; /* convert it */
|
||||
switch (*s) {
|
||||
case '\0':
|
||||
return size * sign;
|
||||
|
||||
case 'B':
|
||||
case 'b':
|
||||
case 'S':
|
||||
case 's':
|
||||
return size * sign * 512;
|
||||
|
||||
case 'K':
|
||||
case 'k':
|
||||
return size * sign * 1024;
|
||||
|
||||
case 'M':
|
||||
case 'm':
|
||||
return size * sign * 1024 * 1024;
|
||||
|
||||
case 'G':
|
||||
case 'g':
|
||||
return size * sign * 1024 * 1024 * 1024;
|
||||
}
|
||||
}
|
||||
#ifdef _KERNEL
|
||||
throw_rude_remark(EINVAL, "Invalid length specification: %s", spec);
|
||||
#else
|
||||
fprintf(stderr, "Invalid length specification: %s", spec);
|
||||
longjmp(command_fail, 1);
|
||||
#endif
|
||||
}
|
||||
#ifdef _KERNEL
|
||||
throw_rude_remark(EINVAL, "Missing length specification");
|
||||
#else
|
||||
fprintf(stderr, "Missing length specification");
|
||||
longjmp(command_fail, 1);
|
||||
#endif
|
||||
/* NOTREACHED */
|
||||
return -1;
|
||||
}
|
||||
|
||||
#ifdef _KERNEL
|
||||
#define FOOTYPE struct cdev *
|
||||
#else
|
||||
#define FOOTYPE dev_t
|
||||
#endif
|
||||
/*
|
||||
* Extract the volume number from a device number. Check that it's
|
||||
* the correct type, and that it isn't one of the superdevs.
|
||||
*/
|
||||
int
|
||||
Volno(FOOTYPE dev)
|
||||
{
|
||||
int volno = minor(dev);
|
||||
|
||||
if (OBJTYPE(dev) != VINUM_VOLUME_TYPE)
|
||||
return -1;
|
||||
else
|
||||
volno = ((volno & 0x3fff0000) >> 8) | (volno & 0xff);
|
||||
if ((volno == VINUM_SUPERDEV_VOL)
|
||||
|| (volno == VINUM_DAEMON_VOL))
|
||||
return -1;
|
||||
else
|
||||
return volno;
|
||||
}
|
||||
|
||||
/*
|
||||
* Extract a plex number from a device number.
|
||||
* Don't check the major number, but check the
|
||||
* type. Return -1 for invalid types.
|
||||
*/
|
||||
int
|
||||
Plexno(FOOTYPE dev)
|
||||
{
|
||||
int plexno = minor(dev);
|
||||
|
||||
if (OBJTYPE(dev) != VINUM_PLEX_TYPE)
|
||||
return -1;
|
||||
else
|
||||
return ((plexno & 0x3fff0000) >> 8) | (plexno & 0xff);
|
||||
}
|
||||
|
||||
/*
|
||||
* Extract a subdisk number from a device number.
|
||||
* Don't check the major number, but check the
|
||||
* type. Return -1 for invalid types.
|
||||
*/
|
||||
int
|
||||
Sdno(FOOTYPE dev)
|
||||
{
|
||||
int sdno = minor(dev);
|
||||
|
||||
/*
|
||||
* Care: VINUM_SD_TYPE is 2 or 3, which is why we use < instead of
|
||||
* !=. It's not clear that this makes any sense abstracting it to
|
||||
* this level.
|
||||
*/
|
||||
if (OBJTYPE(dev) < VINUM_SD_TYPE)
|
||||
return -1;
|
||||
else
|
||||
/*
|
||||
* Note that the number we return includes the low-order bit of the
|
||||
* type field. This gives us twice as many potential subdisks as
|
||||
* plexes or volumes.
|
||||
*/
|
||||
return ((sdno & 0x7fff0000) >> 8) | (sdno & 0xff);
|
||||
}
|
@ -1,54 +0,0 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998, 1999
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* Written by Greg Lehey
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: vinumutil.h,v 1.1 2001/05/22 04:07:22 grog Exp grog $
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
/*
|
||||
* Functions defined in vinumutil.c, which is used both in userland
|
||||
* and in the kernel.
|
||||
*/
|
||||
char *drive_state(enum drivestate);
|
||||
char *volume_state(enum volumestate);
|
||||
char *plex_state(enum plexstate);
|
||||
char *plex_org(enum plexorg);
|
||||
char *sd_state(enum sdstate);
|
||||
enum drivestate DriveState(char *text);
|
||||
enum sdstate SdState(char *text);
|
||||
enum plexstate PlexState(char *text);
|
||||
enum volumestate VolState(char *text);
|
@ -1,395 +0,0 @@
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998, 1999
|
||||
* Nan Yang Computer Services Limited. All rights reserved.
|
||||
*
|
||||
* Parts copyright (c) 1997, 1998 Cybernet Corporation, NetMAX project.
|
||||
*
|
||||
* Written by Greg Lehey
|
||||
*
|
||||
* This software is distributed under the so-called ``Berkeley
|
||||
* License'':
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by Nan Yang Computer
|
||||
* Services Limited.
|
||||
* 4. Neither the name of the Company nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* This software is provided ``as is'', and any express or implied
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose are disclaimed.
|
||||
* In no event shall the company or contributors be liable for any
|
||||
* direct, indirect, incidental, special, exemplary, or consequential
|
||||
* damages (including, but not limited to, procurement of substitute
|
||||
* goods or services; loss of use, data, or profits; or business
|
||||
* interruption) however caused and on any theory of liability, whether
|
||||
* in contract, strict liability, or tort (including negligence or
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: vinumvar.h,v 1.33 2003/05/23 01:09:23 grog Exp grog $
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <sys/time.h>
|
||||
#include <dev/vinum/vinumstate.h>
|
||||
#include <sys/mutex.h>
|
||||
|
||||
/* Directory for device nodes. */
|
||||
#define VINUM_DIR "/dev/vinum"
|
||||
|
||||
/*
|
||||
* Some configuration maxima. They're an enum because
|
||||
* we can't define global constants. Sorry about that.
|
||||
*
|
||||
* These aren't as bad as they look: most of them are soft limits.
|
||||
*/
|
||||
|
||||
#define VINUMROOT
|
||||
enum constants {
|
||||
/*
|
||||
* Current version of the data structures. This
|
||||
* is used to ensure synchronization between
|
||||
* kernel module and userland vinum(8).
|
||||
*/
|
||||
VINUMVERSION = 1,
|
||||
VINUM_HEADER = 512, /* size of header on disk */
|
||||
MAXCONFIGLINE = 1024, /* maximum size of a single config line */
|
||||
MINVINUMSLICE = 1048576, /* minimum size of a slice */
|
||||
|
||||
ROUND_ROBIN_READPOL = -1, /* round robin read policy */
|
||||
|
||||
/*
|
||||
* Type field in high-order two bits of minor
|
||||
* number. Subdisks are in fact both type 2 and
|
||||
* type 3, giving twice the number of subdisks.
|
||||
* This causes some ugliness in the code.
|
||||
*/
|
||||
VINUM_VOLUME_TYPE = 0,
|
||||
VINUM_PLEX_TYPE = 1,
|
||||
VINUM_SD_TYPE = 2,
|
||||
VINUM_SD2_TYPE = 3,
|
||||
|
||||
|
||||
/*
|
||||
* Define a minor device number.
|
||||
* This is not used directly; instead, it's
|
||||
* called by the other macros.
|
||||
*/
|
||||
#define VINUMMINOR(o,t) ((o & 0xff) | ((o & 0x3fff00) << 8) | (t << VINUM_TYPE_SHIFT))
|
||||
|
||||
VINUM_TYPE_SHIFT = 30,
|
||||
VINUM_MAXVOL = 0x3ffffd, /* highest numbered volume */
|
||||
|
||||
/*
|
||||
* The super device and the daemon device are
|
||||
* magic: they're the two highest-numbered
|
||||
* volumes.
|
||||
*/
|
||||
VINUM_SUPERDEV_VOL = 0x3ffffe,
|
||||
VINUM_DAEMON_VOL = 0x3fffff,
|
||||
VINUM_MAXPLEX = 0x3fffff,
|
||||
VINUM_MAXSD = 0x7fffff,
|
||||
|
||||
#define VINUM_SUPERDEV_MINOR VINUMMINOR (VINUM_SUPERDEV_VOL, VINUM_VOLUME_TYPE)
|
||||
#define VINUM_DAEMON_MINOR VINUMMINOR (VINUM_DAEMON_VOL, VINUM_VOLUME_TYPE)
|
||||
|
||||
/*
|
||||
* Mask for the number part of each object.
|
||||
* Plexes and volumes are the same, subdisks use
|
||||
* the low-order bit of the type field and thus
|
||||
* have twice the number.
|
||||
*/
|
||||
|
||||
MAJORDEV_SHIFT = 8,
|
||||
|
||||
MAXPLEX = 8, /* maximum number of plexes in a volume */
|
||||
MAXSD = 256, /* maximum number of subdisks in a plex */
|
||||
MAXDRIVENAME = 32, /* maximum length of a device name */
|
||||
MAXSDNAME = 64, /* maximum length of a subdisk name */
|
||||
MAXPLEXNAME = 64, /* maximum length of a plex name */
|
||||
MAXVOLNAME = 64, /* maximum length of a volume name */
|
||||
MAXNAME = 64, /* maximum length of any name */
|
||||
|
||||
|
||||
#define OBJTYPE(x) ((minor(x) >> VINUM_TYPE_SHIFT) & 3)
|
||||
|
||||
/* extract device type */
|
||||
#define DEVTYPE(x) ((minor (x) >> VINUM_TYPE_SHIFT) & 3)
|
||||
|
||||
#define VINUM_SUPERDEV_NAME VINUM_DIR"/control" /* normal super device */
|
||||
#define VINUM_DAEMON_DEV_NAME VINUM_DIR"/controld" /* super device for daemon only */
|
||||
|
||||
/*
|
||||
* the number of object entries to cater for initially, and also the
|
||||
* value by which they are incremented. It doesn't take long
|
||||
* to extend them, so theoretically we could start with 1 of each, but
|
||||
* it's untidy to allocate such small areas. These values are
|
||||
* probably too small.
|
||||
*/
|
||||
|
||||
INITIAL_DRIVES = 4,
|
||||
INITIAL_VOLUMES = 4,
|
||||
INITIAL_PLEXES = 8,
|
||||
INITIAL_SUBDISKS = 16,
|
||||
INITIAL_SUBDISKS_IN_PLEX = 4, /* number of subdisks to allocate to a plex */
|
||||
INITIAL_SUBDISKS_IN_DRIVE = 4, /* number of subdisks to allocate to a drive */
|
||||
INITIAL_DRIVE_FREELIST = 16, /* number of entries in drive freelist */
|
||||
PLEX_REGION_TABLE_SIZE = 8, /* number of entries in plex region tables */
|
||||
PLEX_LOCKS = 256, /* number of locks to allocate to a plex */
|
||||
PLEXMUTEXES = 32,
|
||||
MAX_REVIVE_BLOCKSIZE = MAXPHYS, /* maximum revive block size */
|
||||
DEFAULT_REVIVE_BLOCKSIZE = 65536, /* default revive block size */
|
||||
VINUMHOSTNAMELEN = 32, /* host name field in label */
|
||||
};
|
||||
|
||||
/*
|
||||
* Slice header
|
||||
*
|
||||
* Vinum drives start with this structure:
|
||||
*
|
||||
*\ Sector
|
||||
* |--------------------------------------|
|
||||
* | PDP-11 memorial boot block | 0
|
||||
* |--------------------------------------|
|
||||
* | Disk label, maybe | 1
|
||||
* |--------------------------------------|
|
||||
* | Slice definition (vinum_hdr) | 8
|
||||
* |--------------------------------------|
|
||||
* | |
|
||||
* | Configuration info, first copy | 9
|
||||
* | |
|
||||
* |--------------------------------------|
|
||||
* | |
|
||||
* | Configuration info, second copy | 9 + size of config
|
||||
* | |
|
||||
* |--------------------------------------|
|
||||
*/
|
||||
|
||||
/* Sizes and offsets of our information */
|
||||
enum {
|
||||
VINUM_LABEL_OFFSET = 4096, /* offset of vinum label */
|
||||
VINUMHEADERLEN = 512, /* size of vinum label */
|
||||
VINUM_CONFIG_OFFSET = 4608, /* offset of first config copy */
|
||||
MAXCONFIG = 65536, /* and size of config copy */
|
||||
DATASTART = (MAXCONFIG * 2 + VINUM_CONFIG_OFFSET) / DEV_BSIZE /* this is where the data starts */
|
||||
};
|
||||
|
||||
/*
|
||||
* hostname is 256 bytes long, but we don't need to shlep
|
||||
* multiple copies in vinum. We use the host name just
|
||||
* to identify this system, and 32 bytes should be ample
|
||||
* for that purpose
|
||||
*/
|
||||
|
||||
struct vinum_label {
|
||||
char sysname[VINUMHOSTNAMELEN]; /* system name at time of creation */
|
||||
char name[MAXDRIVENAME]; /* our name of the drive */
|
||||
struct timeval date_of_birth; /* the time it was created */
|
||||
struct timeval last_update; /* and the time of last update */
|
||||
/*
|
||||
* total size in bytes of the drive. This value
|
||||
* includes the headers.
|
||||
*/
|
||||
off_t drive_size;
|
||||
};
|
||||
|
||||
struct vinum_hdr {
|
||||
uint64_t magic; /* we're long on magic numbers */
|
||||
#define VINUM_MAGIC 22322600044678729LL /* should be this */
|
||||
#define VINUM_NOMAGIC 22322600044678990LL /* becomes this after obliteration */
|
||||
/*
|
||||
* Size in bytes of each copy of the
|
||||
* configuration info. This must be a multiple
|
||||
* of the sector size.
|
||||
*/
|
||||
int config_length;
|
||||
struct vinum_label label; /* unique label */
|
||||
};
|
||||
|
||||
/* Information returned from read_drive_label */
|
||||
enum drive_label_info {
|
||||
DL_CANT_OPEN, /* invalid partition */
|
||||
DL_NOT_OURS, /* valid partition, but no vinum label */
|
||||
DL_DELETED_LABEL, /* valid partition, deleted label found */
|
||||
DL_WRONG_DRIVE, /* drive name doesn't match */
|
||||
DL_OURS /* valid partition and label found */
|
||||
};
|
||||
|
||||
/* kinds of plex organization */
|
||||
enum plexorg {
|
||||
plex_disorg, /* disorganized */
|
||||
plex_concat, /* concatenated plex */
|
||||
plex_striped, /* striped plex */
|
||||
plex_raid4, /* RAID4 plex */
|
||||
plex_raid5 /* RAID5 plex */
|
||||
};
|
||||
|
||||
/* Recognize plex organizations */
|
||||
#define isstriped(p) (p->organization >= plex_striped) /* RAID 1, 4 or 5 */
|
||||
#define isparity(p) (p->organization >= plex_raid4) /* RAID 4 or 5 */
|
||||
|
||||
/* Address range definitions, for locking volumes */
|
||||
struct rangelock {
|
||||
daddr_t stripe; /* address + 1 of the range being locked */
|
||||
struct buf *bp; /* user's buffer pointer */
|
||||
};
|
||||
|
||||
struct drive_freelist { /* sorted list of free space on drive */
|
||||
u_int64_t offset; /* offset of entry */
|
||||
u_int64_t sectors; /* and length in sectors */
|
||||
};
|
||||
|
||||
/*
|
||||
* Include the structure definitions shared
|
||||
* between userland and kernel.
|
||||
*/
|
||||
|
||||
#ifdef _KERNEL
|
||||
#include <dev/vinum/vinumobj.h>
|
||||
#undef _KERNEL
|
||||
#include <dev/vinum/vinumobj.h>
|
||||
#define _KERNEL
|
||||
#else
|
||||
#include <dev/vinum/vinumobj.h>
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Table expansion. Expand table, which contains oldcount
|
||||
* entries of type element, by increment entries, and change
|
||||
* oldcount accordingly
|
||||
*/
|
||||
#ifdef VINUMDEBUG
|
||||
#define EXPAND(table, element, oldcount, increment) \
|
||||
{ \
|
||||
expand_table ((void **) &table, \
|
||||
oldcount * sizeof (element), \
|
||||
(oldcount + increment) * sizeof (element), \
|
||||
__FILE__, \
|
||||
__LINE__ ); \
|
||||
oldcount += increment; \
|
||||
}
|
||||
#else
|
||||
#define EXPAND(table, element, oldcount, increment) \
|
||||
{ \
|
||||
expand_table ((void **) &table, \
|
||||
oldcount * sizeof (element), \
|
||||
(oldcount + increment) * sizeof (element)); \
|
||||
oldcount += increment; \
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Information on vinum's memory usage */
|
||||
struct meminfo {
|
||||
int mallocs; /* number of malloced blocks */
|
||||
int total_malloced; /* total amount malloced */
|
||||
int highwater; /* maximum number of mallocs */
|
||||
struct mc *malloced; /* pointer to kernel table */
|
||||
};
|
||||
|
||||
#define MCFILENAMELEN 16
|
||||
struct mc {
|
||||
struct timeval time;
|
||||
int seq;
|
||||
int size;
|
||||
short line;
|
||||
caddr_t address;
|
||||
char file[MCFILENAMELEN];
|
||||
};
|
||||
|
||||
/*
|
||||
* These enums are used by the state transition
|
||||
* routines. They're in bit map format:
|
||||
*
|
||||
* Bit 0: Other plexes in the volume are down
|
||||
* Bit 1: Other plexes in the volume are up
|
||||
* Bit 2: The current plex is up
|
||||
* Maybe they should be local to
|
||||
* state.c
|
||||
*/
|
||||
enum volplexstate {
|
||||
volplex_onlyusdown = 0, /* 0: we're the only plex, and we're down */
|
||||
volplex_alldown, /* 1: another plex is down, and so are we */
|
||||
volplex_otherup, /* 2: another plex is up */
|
||||
volplex_otherupdown, /* 3: other plexes are up and down */
|
||||
volplex_onlyus, /* 4: we're up and alone */
|
||||
volplex_onlyusup, /* 5: only we are up, others are down */
|
||||
volplex_allup, /* 6: all plexes are up */
|
||||
volplex_someup /* 7: some plexes are up, including us */
|
||||
};
|
||||
|
||||
/* state map for plex */
|
||||
enum sdstates {
|
||||
sd_emptystate = 1,
|
||||
sd_downstate = 2, /* SD is down */
|
||||
sd_crashedstate = 4, /* SD is crashed */
|
||||
sd_obsoletestate = 8, /* SD is obsolete */
|
||||
sd_stalestate = 16, /* SD is stale */
|
||||
sd_rebornstate = 32, /* SD is reborn */
|
||||
sd_upstate = 64, /* SD is up */
|
||||
sd_initstate = 128, /* SD is initializing */
|
||||
sd_initializedstate = 256, /* SD is initialized */
|
||||
sd_otherstate = 512, /* SD is in some other state */
|
||||
};
|
||||
|
||||
/*
|
||||
* This is really just a parameter to pass to
|
||||
* set_<foo>_state, but since it needs to be known
|
||||
* in the external definitions, we need to define
|
||||
* it here
|
||||
*/
|
||||
enum setstateflags {
|
||||
setstate_none = 0, /* no flags */
|
||||
setstate_force = 1, /* force the state change */
|
||||
setstate_configuring = 2, /* we're currently configuring, don't save */
|
||||
};
|
||||
|
||||
/* Operations for parityops to perform. */
|
||||
enum parityop {
|
||||
checkparity,
|
||||
rebuildparity,
|
||||
rebuildandcheckparity, /* rebuildparity with the -v option */
|
||||
};
|
||||
|
||||
/*
|
||||
* When doing round-robin reads from a multi-plex volume, switch to the
|
||||
* next plex if the difference of the last read sector and the next sector
|
||||
* to be read is this many sectors.
|
||||
*/
|
||||
#define ROUNDROBIN_SWITCH 128 /* 64k */
|
||||
|
||||
#ifdef VINUMDEBUG
|
||||
/* Debugging stuff */
|
||||
enum debugflags {
|
||||
DEBUG_ADDRESSES = 1, /* show buffer information during requests */
|
||||
DEBUG_NUMOUTPUT = 2, /* show the value of vp->v_numoutput */
|
||||
DEBUG_RESID = 4, /* go into debugger in complete_rqe */
|
||||
DEBUG_LASTREQS = 8, /* keep a circular buffer of last requests */
|
||||
DEBUG_REVIVECONFLICT = 16, /* print info about revive conflicts */
|
||||
DEBUG_EOFINFO = 32, /* print info about EOF detection */
|
||||
DEBUG_MEMFREE = 64, /* keep info about Frees */
|
||||
DEBUG_BIGDRIVE = 128, /* pretend our drives are 100 times the size */
|
||||
DEBUG_REMOTEGDB = 256, /* go into remote gdb */
|
||||
DEBUG_WARNINGS = 512, /* log various relatively harmless warnings */
|
||||
DEBUG_LOCKREQS = 1024, /* log locking requests */
|
||||
};
|
||||
|
||||
#ifdef _KERNEL
|
||||
#ifdef __i386__
|
||||
#define longjmp LongJmp /* test our longjmps */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
/* Local Variables: */
|
||||
/* fill-column: 50 */
|
||||
/* End: */
|
Loading…
Reference in New Issue
Block a user