1
0
mirror of https://git.FreeBSD.org/src.git synced 2024-12-17 10:26:15 +00:00

Merge libzfs_core branch:

includes MFV 238590, 238592, 247580

MFV 238590, 238592:
  In the first zfs ioctl restructuring phase, the libzfs_core library was
  introduced. It is a new thin library that wraps around kernel ioctl's.
  The idea is to provide a forward-compatible way of dealing with new
  features. Arguments are passed in nvlists and not random zfs_cmd fields,
  new-style ioctls are logged to pool history using a new method of
  history logging.

  http://blog.delphix.com/matt/2012/01/17/the-future-of-libzfs/

MFV 247580 [1]:
  To address issues of several deadlocks and race conditions the locking
  code around dsl_dataset was rewritten and the interface to synctasks
  was changed.

User-Visible Changes:
  "zfs snapshot" can create more arbitrary snapshots at once (atomically)
  "zfs destroy" destroys multiple snapshots at once
  "zfs recv" has improved performance

Backward Compatibility:
  I have extended the compatibility layer to support full backward
  compatibility by remapping or rewriting the responsible ioctl arguments.
  Old utilities are fully supported by the new kernel module.

Forward Compatibility:
  New utilities work with old kernels with the following restrictions:
    - creating, destroying, holding and releasing of multiple snapshots
      at once is not supported, this includes recursive (-r) commands

Illumos ZFS issues:
  2882 implement libzfs_core
  2900 "zfs snapshot" should be able to create multiple,
       arbitrary snapshots at once
  3464 zfs synctask code needs restructuring

References:
  https://www.illumos.org/issues/2882
  https://www.illumos.org/issues/2900
  https://www.illumos.org/issues/3464 [1]

MFC after:	1 month
Sponsored by:	Hybrid Logic Inc. [1]
This commit is contained in:
Martin Matuska 2013-03-21 08:38:03 +00:00
commit 05f49d92ef
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=248571
105 changed files with 9747 additions and 7134 deletions

View File

@ -1389,6 +1389,7 @@ _prebuild_libs= ${_kerberos5_lib_libasn1} \
lib/libopie lib/libpam ${_lib_libthr} \
lib/libradius lib/libsbuf lib/libtacplus \
${_cddl_lib_libumem} ${_cddl_lib_libnvpair} \
${_cddl_lib_libzfs_core} \
lib/libutil ${_lib_libypclnt} lib/libz lib/msun \
${_secure_lib_libcrypto} ${_secure_lib_libssh} \
${_secure_lib_libssl}
@ -1417,7 +1418,9 @@ lib/libopie__L lib/libtacplus__L: lib/libmd__L
.if ${MK_CDDL} != "no"
_cddl_lib_libumem= cddl/lib/libumem
_cddl_lib_libnvpair= cddl/lib/libnvpair
_cddl_lib_libzfs_core= cddl/lib/libzfs_core
_cddl_lib= cddl/lib
cddl/lib/libzfs_core__L: cddl/lib/libnvpair__L
.endif
.if ${MK_CRYPT} != "no"

View File

@ -57,6 +57,7 @@
#include <sys/arc.h>
#include <sys/ddt.h>
#include <sys/zfeature.h>
#include <zfs_comutil.h>
#undef ZFS_MAXNAMELEN
#undef verify
#include <libzfs.h>
@ -206,6 +207,27 @@ dump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size)
nvlist_free(nv);
}
/* ARGSUSED */
static void
dump_history_offsets(objset_t *os, uint64_t object, void *data, size_t size)
{
spa_history_phys_t *shp = data;
if (shp == NULL)
return;
(void) printf("\t\tpool_create_len = %llu\n",
(u_longlong_t)shp->sh_pool_create_len);
(void) printf("\t\tphys_max_off = %llu\n",
(u_longlong_t)shp->sh_phys_max_off);
(void) printf("\t\tbof = %llu\n",
(u_longlong_t)shp->sh_bof);
(void) printf("\t\teof = %llu\n",
(u_longlong_t)shp->sh_eof);
(void) printf("\t\trecords_lost = %llu\n",
(u_longlong_t)shp->sh_records_lost);
}
static void
zdb_nicenum(uint64_t num, char *buf)
{
@ -857,21 +879,22 @@ dump_history(spa_t *spa)
for (int i = 0; i < num; i++) {
uint64_t time, txg, ievent;
char *cmd, *intstr;
boolean_t printed = B_FALSE;
if (nvlist_lookup_uint64(events[i], ZPOOL_HIST_TIME,
&time) != 0)
continue;
goto next;
if (nvlist_lookup_string(events[i], ZPOOL_HIST_CMD,
&cmd) != 0) {
if (nvlist_lookup_uint64(events[i],
ZPOOL_HIST_INT_EVENT, &ievent) != 0)
continue;
goto next;
verify(nvlist_lookup_uint64(events[i],
ZPOOL_HIST_TXG, &txg) == 0);
verify(nvlist_lookup_string(events[i],
ZPOOL_HIST_INT_STR, &intstr) == 0);
if (ievent >= LOG_END)
continue;
if (ievent >= ZFS_NUM_LEGACY_HISTORY_EVENTS)
goto next;
(void) snprintf(internalstr,
sizeof (internalstr),
@ -884,6 +907,14 @@ dump_history(spa_t *spa)
(void) localtime_r(&tsec, &t);
(void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t);
(void) printf("%s %s\n", tbuf, cmd);
printed = B_TRUE;
next:
if (dump_opt['h'] > 1) {
if (!printed)
(void) printf("unrecognized record:\n");
dump_nvlist(events[i], 2);
}
}
}
@ -1496,7 +1527,7 @@ static object_viewer_t *object_viewer[DMU_OT_NUMTYPES + 1] = {
dump_zap, /* other ZAP */
dump_zap, /* persistent error log */
dump_uint8, /* SPA history */
dump_uint64, /* SPA history offsets */
dump_history_offsets, /* SPA history offsets */
dump_zap, /* Pool properties */
dump_zap, /* DSL permissions */
dump_acl, /* ZFS ACL */
@ -1661,7 +1692,9 @@ dump_dir(objset_t *os)
int print_header = 1;
int i, error;
dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
dmu_objset_fast_stat(os, &dds);
dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
if (dds.dds_type < DMU_OST_NUMTYPES)
type = objset_types[dds.dds_type];
@ -2070,7 +2103,6 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
NULL, NULL, ZIO_PRIORITY_ASYNC_READ, flags, zb));
free(data);
if (ioerr && !(flags & ZIO_FLAG_SPECULATIVE)) {
zcb->zcb_haderrors = 1;
zcb->zcb_errors[ioerr]++;

View File

@ -28,7 +28,7 @@
.\"
.\" $FreeBSD$
.\"
.Dd March 1, 2013
.Dd March 21, 2013
.Dt ZFS 8
.Os
.Sh NAME
@ -65,6 +65,7 @@
.Op Fl r
.Oo Fl o Ar property Ns = Ns Ar value Oc Ns ...
.Ar filesystem@snapname Ns | Ns Ar volume@snapname
.Ar filesystem@snapname Ns | Ns Ar volume@snapname Ns ...
.Nm
.Cm rollback
.Op Fl rRf
@ -1617,7 +1618,11 @@ multiple snapshots.
Destroy (or mark for deferred deletion) all snapshots with this name in
descendent file systems.
.It Fl R
Recursively destroy all dependents.
Recursively destroy all clones of these snapshots, including the clones,
snapshots, and children.
If this flag is specified, the
.Op fl d
flag will have no effect.
.It Fl n
Do a dry-run ("No-op") deletion. No data will be deleted. This is useful in
conjunction with the
@ -1645,17 +1650,18 @@ behavior for mounted file systems in use.
.Op Fl r
.Oo Fl o Ar property Ns = Ns Ar value Oc Ns ...
.Ar filesystem@snapname Ns | Ns volume@snapname
.Ar filesystem@snapname Ns | Ns volume@snapname Ns ...
.Xc
.Pp
Creates a snapshot with the given name. All previous modifications by
successful system calls to the file system are part of the snapshot. See the
Creates snapshots with the given names. All previous modifications by
successful system calls to the file system are part of the snapshots.
Snapshots are taken atomically, so that all snapshots correspond to the same
moment in time. See the
.Qq Sx Snapshots
section for details.
.Bl -tag -width indent
.It Fl r
Recursively create snapshots of all descendent datasets. Snapshots are taken
atomically, so that all recursive snapshots correspond to the same moment in
time.
Recursively create snapshots of all descendent datasets
.It Fl o Ar property Ns = Ns Ar value
Sets the specified property; see
.Qq Nm Cm create

View File

@ -58,6 +58,7 @@
#include <time.h>
#include <libzfs.h>
#include <libzfs_core.h>
#include <zfs_prop.h>
#include <zfs_deleg.h>
#include <libuutil.h>
@ -74,6 +75,7 @@ libzfs_handle_t *g_zfs;
static FILE *mnttab_file;
static char history_str[HIS_MAX_RECORD_LEN];
static boolean_t log_history = B_TRUE;
static int zfs_do_clone(int argc, char **argv);
static int zfs_do_create(int argc, char **argv);
@ -276,7 +278,7 @@ get_usage(zfs_help_t idx)
return (gettext("\tshare <-a | filesystem>\n"));
case HELP_SNAPSHOT:
return (gettext("\tsnapshot [-r] [-o property=value] ... "
"<filesystem@snapname|volume@snapname>\n"));
"<filesystem@snapname|volume@snapname> ...\n"));
case HELP_UNMOUNT:
return (gettext("\tunmount [-f] "
"<-a | filesystem|mountpoint>\n"));
@ -914,11 +916,12 @@ typedef struct destroy_cbdata {
boolean_t cb_parsable;
boolean_t cb_dryrun;
nvlist_t *cb_nvl;
nvlist_t *cb_batchedsnaps;
/* first snap in contiguous run */
zfs_handle_t *cb_firstsnap;
char *cb_firstsnap;
/* previous snap in contiguous run */
zfs_handle_t *cb_prevsnap;
char *cb_prevsnap;
int64_t cb_snapused;
char *cb_snapspec;
} destroy_cbdata_t;
@ -1010,9 +1013,27 @@ destroy_callback(zfs_handle_t *zhp, void *data)
zfs_close(zhp);
return (0);
}
if (cb->cb_dryrun) {
zfs_close(zhp);
return (0);
}
if (!cb->cb_dryrun) {
if (zfs_unmount(zhp, NULL, cb->cb_force ? MS_FORCE : 0) != 0 ||
/*
* We batch up all contiguous snapshots (even of different
* filesystems) and destroy them with one ioctl. We can't
* simply do all snap deletions and then all fs deletions,
* because we must delete a clone before its origin.
*/
if (zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT) {
fnvlist_add_boolean(cb->cb_batchedsnaps, name);
} else {
int error = zfs_destroy_snaps_nvl(g_zfs,
cb->cb_batchedsnaps, B_FALSE);
fnvlist_free(cb->cb_batchedsnaps);
cb->cb_batchedsnaps = fnvlist_alloc();
if (error != 0 ||
zfs_unmount(zhp, NULL, cb->cb_force ? MS_FORCE : 0) != 0 ||
zfs_destroy(zhp, cb->cb_defer_destroy) != 0) {
zfs_close(zhp);
return (-1);
@ -1032,11 +1053,13 @@ destroy_print_cb(zfs_handle_t *zhp, void *arg)
if (nvlist_exists(cb->cb_nvl, name)) {
if (cb->cb_firstsnap == NULL)
cb->cb_firstsnap = zfs_handle_dup(zhp);
cb->cb_firstsnap = strdup(name);
if (cb->cb_prevsnap != NULL)
zfs_close(cb->cb_prevsnap);
free(cb->cb_prevsnap);
/* this snap continues the current range */
cb->cb_prevsnap = zfs_handle_dup(zhp);
cb->cb_prevsnap = strdup(name);
if (cb->cb_firstsnap == NULL || cb->cb_prevsnap == NULL)
nomem();
if (cb->cb_verbose) {
if (cb->cb_parsable) {
(void) printf("destroy\t%s\n", name);
@ -1051,12 +1074,12 @@ destroy_print_cb(zfs_handle_t *zhp, void *arg)
} else if (cb->cb_firstsnap != NULL) {
/* end of this range */
uint64_t used = 0;
err = zfs_get_snapused_int(cb->cb_firstsnap,
err = lzc_snaprange_space(cb->cb_firstsnap,
cb->cb_prevsnap, &used);
cb->cb_snapused += used;
zfs_close(cb->cb_firstsnap);
free(cb->cb_firstsnap);
cb->cb_firstsnap = NULL;
zfs_close(cb->cb_prevsnap);
free(cb->cb_prevsnap);
cb->cb_prevsnap = NULL;
}
zfs_close(zhp);
@ -1073,13 +1096,13 @@ destroy_print_snapshots(zfs_handle_t *fs_zhp, destroy_cbdata_t *cb)
if (cb->cb_firstsnap != NULL) {
uint64_t used = 0;
if (err == 0) {
err = zfs_get_snapused_int(cb->cb_firstsnap,
err = lzc_snaprange_space(cb->cb_firstsnap,
cb->cb_prevsnap, &used);
}
cb->cb_snapused += used;
zfs_close(cb->cb_firstsnap);
free(cb->cb_firstsnap);
cb->cb_firstsnap = NULL;
zfs_close(cb->cb_prevsnap);
free(cb->cb_prevsnap);
cb->cb_prevsnap = NULL;
}
return (err);
@ -1166,8 +1189,10 @@ static int
zfs_do_destroy(int argc, char **argv)
{
destroy_cbdata_t cb = { 0 };
int rv = 0;
int err = 0;
int c;
zfs_handle_t *zhp;
zfs_handle_t *zhp = NULL;
char *at;
zfs_type_t type = ZFS_TYPE_DATASET;
@ -1221,11 +1246,9 @@ zfs_do_destroy(int argc, char **argv)
at = strchr(argv[0], '@');
if (at != NULL) {
int err = 0;
/* Build the list of snaps to destroy in cb_nvl. */
if (nvlist_alloc(&cb.cb_nvl, NV_UNIQUE_NAME, 0) != 0)
nomem();
cb.cb_nvl = fnvlist_alloc();
*at = '\0';
zhp = zfs_open(g_zfs, argv[0],
@ -1236,17 +1259,15 @@ zfs_do_destroy(int argc, char **argv)
cb.cb_snapspec = at + 1;
if (gather_snapshots(zfs_handle_dup(zhp), &cb) != 0 ||
cb.cb_error) {
zfs_close(zhp);
nvlist_free(cb.cb_nvl);
return (1);
rv = 1;
goto out;
}
if (nvlist_empty(cb.cb_nvl)) {
(void) fprintf(stderr, gettext("could not find any "
"snapshots to destroy; check snapshot names.\n"));
zfs_close(zhp);
nvlist_free(cb.cb_nvl);
return (1);
rv = 1;
goto out;
}
if (cb.cb_verbose) {
@ -1265,18 +1286,26 @@ zfs_do_destroy(int argc, char **argv)
}
if (!cb.cb_dryrun) {
if (cb.cb_doclones)
if (cb.cb_doclones) {
cb.cb_batchedsnaps = fnvlist_alloc();
err = destroy_clones(&cb);
if (err == 0) {
err = zfs_destroy_snaps_nvl(g_zfs,
cb.cb_batchedsnaps, B_FALSE);
}
if (err != 0) {
rv = 1;
goto out;
}
}
if (err == 0) {
err = zfs_destroy_snaps_nvl(zhp, cb.cb_nvl,
err = zfs_destroy_snaps_nvl(g_zfs, cb.cb_nvl,
cb.cb_defer_destroy);
}
}
zfs_close(zhp);
nvlist_free(cb.cb_nvl);
if (err != 0)
return (1);
rv = 1;
} else {
/* Open the given dataset */
if ((zhp = zfs_open(g_zfs, argv[0], type)) == NULL)
@ -1297,8 +1326,8 @@ zfs_do_destroy(int argc, char **argv)
zfs_get_name(zhp));
(void) fprintf(stderr, gettext("use 'zpool destroy %s' "
"to destroy the pool itself\n"), zfs_get_name(zhp));
zfs_close(zhp);
return (1);
rv = 1;
goto out;
}
/*
@ -1308,30 +1337,42 @@ zfs_do_destroy(int argc, char **argv)
if (!cb.cb_doclones &&
zfs_iter_dependents(zhp, B_TRUE, destroy_check_dependent,
&cb) != 0) {
zfs_close(zhp);
return (1);
rv = 1;
goto out;
}
if (cb.cb_error) {
zfs_close(zhp);
return (1);
rv = 1;
goto out;
}
cb.cb_batchedsnaps = fnvlist_alloc();
if (zfs_iter_dependents(zhp, B_FALSE, destroy_callback,
&cb) != 0) {
zfs_close(zhp);
return (1);
rv = 1;
goto out;
}
/*
* Do the real thing. The callback will close the
* handle regardless of whether it succeeds or not.
*/
if (destroy_callback(zhp, &cb) != 0)
return (1);
err = destroy_callback(zhp, &cb);
zhp = NULL;
if (err == 0) {
err = zfs_destroy_snaps_nvl(g_zfs,
cb.cb_batchedsnaps, cb.cb_defer_destroy);
}
if (err != 0)
rv = 1;
}
return (0);
out:
fnvlist_free(cb.cb_batchedsnaps);
fnvlist_free(cb.cb_nvl);
if (zhp != NULL)
zfs_close(zhp);
return (rv);
}
static boolean_t
@ -1932,9 +1973,11 @@ upgrade_set_callback(zfs_handle_t *zhp, void *data)
/*
* If they did "zfs upgrade -a", then we could
* be doing ioctls to different pools. We need
* to log this history once to each pool.
* to log this history once to each pool, and bypass
* the normal history logging that happens in main().
*/
verify(zpool_stage_history(g_zfs, history_str) == 0);
(void) zpool_log_history(g_zfs, history_str);
log_history = B_FALSE;
}
if (zfs_prop_set(zhp, "version", verstr) == 0)
cb->cb_numupgraded++;
@ -3472,6 +3515,32 @@ zfs_do_set(int argc, char **argv)
return (ret);
}
typedef struct snap_cbdata {
nvlist_t *sd_nvl;
boolean_t sd_recursive;
const char *sd_snapname;
} snap_cbdata_t;
static int
zfs_snapshot_cb(zfs_handle_t *zhp, void *arg)
{
snap_cbdata_t *sd = arg;
char *name;
int rv = 0;
int error;
error = asprintf(&name, "%s@%s", zfs_get_name(zhp), sd->sd_snapname);
if (error == -1)
nomem();
fnvlist_add_boolean(sd->sd_nvl, name);
free(name);
if (sd->sd_recursive)
rv = zfs_iter_filesystems(zhp, zfs_snapshot_cb, sd);
zfs_close(zhp);
return (rv);
}
/*
* zfs snapshot [-r] [-o prop=value] ... <fs@snap>
*
@ -3481,13 +3550,16 @@ zfs_do_set(int argc, char **argv)
static int
zfs_do_snapshot(int argc, char **argv)
{
boolean_t recursive = B_FALSE;
int ret = 0;
char c;
nvlist_t *props;
snap_cbdata_t sd = { 0 };
boolean_t multiple_snaps = B_FALSE;
if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0)
nomem();
if (nvlist_alloc(&sd.sd_nvl, NV_UNIQUE_NAME, 0) != 0)
nomem();
/* check options */
while ((c = getopt(argc, argv, "ro:")) != -1) {
@ -3497,7 +3569,8 @@ zfs_do_snapshot(int argc, char **argv)
return (1);
break;
case 'r':
recursive = B_TRUE;
sd.sd_recursive = B_TRUE;
multiple_snaps = B_TRUE;
break;
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
@ -3514,18 +3587,35 @@ zfs_do_snapshot(int argc, char **argv)
(void) fprintf(stderr, gettext("missing snapshot argument\n"));
goto usage;
}
if (argc > 1) {
(void) fprintf(stderr, gettext("too many arguments\n"));
goto usage;
if (argc > 1)
multiple_snaps = B_TRUE;
for (; argc > 0; argc--, argv++) {
char *atp;
zfs_handle_t *zhp;
atp = strchr(argv[0], '@');
if (atp == NULL)
goto usage;
*atp = '\0';
sd.sd_snapname = atp + 1;
zhp = zfs_open(g_zfs, argv[0],
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
if (zhp == NULL)
goto usage;
if (zfs_snapshot_cb(zhp, &sd) != 0)
goto usage;
}
ret = zfs_snapshot(g_zfs, argv[0], recursive, props);
ret = zfs_snapshot_nvl(g_zfs, sd.sd_nvl, props);
nvlist_free(sd.sd_nvl);
nvlist_free(props);
if (ret && recursive)
if (ret != 0 && multiple_snaps)
(void) fprintf(stderr, gettext("no snapshots were created\n"));
return (ret != 0);
usage:
nvlist_free(sd.sd_nvl);
nvlist_free(props);
usage(B_FALSE);
return (-1);
@ -5068,28 +5158,12 @@ zfs_do_allow_unallow_impl(int argc, char **argv, boolean_t un)
return (error);
}
/*
* zfs allow [-r] [-t] <tag> <snap> ...
*
* -r Recursively hold
* -t Temporary hold (hidden option)
*
* Apply a user-hold with the given tag to the list of snapshots.
*/
static int
zfs_do_allow(int argc, char **argv)
{
return (zfs_do_allow_unallow_impl(argc, argv, B_FALSE));
}
/*
* zfs unallow [-r] [-t] <tag> <snap> ...
*
* -r Recursively hold
* -t Temporary hold (hidden option)
*
* Apply a user-hold with the given tag to the list of snapshots.
*/
static int
zfs_do_unallow(int argc, char **argv)
{
@ -5103,7 +5177,6 @@ zfs_do_hold_rele_impl(int argc, char **argv, boolean_t holding)
int i;
const char *tag;
boolean_t recursive = B_FALSE;
boolean_t temphold = B_FALSE;
const char *opts = holding ? "rt" : "r";
int c;
@ -5113,9 +5186,6 @@ zfs_do_hold_rele_impl(int argc, char **argv, boolean_t holding)
case 'r':
recursive = B_TRUE;
break;
case 't':
temphold = B_TRUE;
break;
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
@ -5164,7 +5234,7 @@ zfs_do_hold_rele_impl(int argc, char **argv, boolean_t holding)
}
if (holding) {
if (zfs_hold(zhp, delim+1, tag, recursive,
temphold, B_FALSE, -1, 0, 0) != 0)
B_FALSE, -1) != 0)
++errors;
} else {
if (zfs_release(zhp, delim+1, tag, recursive) != 0)
@ -5180,7 +5250,6 @@ zfs_do_hold_rele_impl(int argc, char **argv, boolean_t holding)
* zfs hold [-r] [-t] <tag> <snap> ...
*
* -r Recursively hold
* -t Temporary hold (hidden option)
*
* Apply a user-hold with the given tag to the list of snapshots.
*/
@ -6602,8 +6671,7 @@ main(int argc, char **argv)
return (1);
}
zpool_set_history_str("zfs", argc, argv, history_str);
verify(zpool_stage_history(g_zfs, history_str) == 0);
zfs_save_arguments(argc, argv, history_str, sizeof (history_str));
libzfs_print_on_error(g_zfs, B_TRUE);
@ -6672,6 +6740,9 @@ main(int argc, char **argv)
(void) fclose(mnttab_file);
if (ret == 0 && log_history)
(void) zpool_log_history(g_zfs, history_str);
libzfs_fini(g_zfs);
/*

View File

@ -46,6 +46,7 @@
#include <sys/zio_checksum.h>
#include <sys/zio_compress.h>
#include <sys/zfeature.h>
#include <sys/dmu_tx.h>
#undef ZFS_MAXNAMELEN
#undef verify
#include <libzfs.h>
@ -273,12 +274,15 @@ zhack_do_feature_stat(int argc, char **argv)
}
static void
feature_enable_sync(void *arg1, void *arg2, dmu_tx_t *tx)
feature_enable_sync(void *arg, dmu_tx_t *tx)
{
spa_t *spa = arg1;
zfeature_info_t *feature = arg2;
spa_t *spa = dmu_tx_pool(tx)->dp_spa;
zfeature_info_t *feature = arg;
spa_feature_enable(spa, feature, tx);
spa_history_log_internal(spa, "zhack enable feature", tx,
"name=%s can_readonly=%u",
feature->fi_guid, feature->fi_can_readonly);
}
static void
@ -341,8 +345,8 @@ zhack_do_feature_enable(int argc, char **argv)
if (0 == zap_contains(mos, spa->spa_feat_desc_obj, feature.fi_guid))
fatal("feature already enabled: %s", feature.fi_guid);
VERIFY3U(0, ==, dsl_sync_task_do(spa->spa_dsl_pool, NULL,
feature_enable_sync, spa, &feature, 5));
VERIFY0(dsl_sync_task(spa_name(spa), NULL,
feature_enable_sync, &feature, 5));
spa_close(spa, FTAG);
@ -350,21 +354,25 @@ zhack_do_feature_enable(int argc, char **argv)
}
static void
feature_incr_sync(void *arg1, void *arg2, dmu_tx_t *tx)
feature_incr_sync(void *arg, dmu_tx_t *tx)
{
spa_t *spa = arg1;
zfeature_info_t *feature = arg2;
spa_t *spa = dmu_tx_pool(tx)->dp_spa;
zfeature_info_t *feature = arg;
spa_feature_incr(spa, feature, tx);
spa_history_log_internal(spa, "zhack feature incr", tx,
"name=%s", feature->fi_guid);
}
static void
feature_decr_sync(void *arg1, void *arg2, dmu_tx_t *tx)
feature_decr_sync(void *arg, dmu_tx_t *tx)
{
spa_t *spa = arg1;
zfeature_info_t *feature = arg2;
spa_t *spa = dmu_tx_pool(tx)->dp_spa;
zfeature_info_t *feature = arg;
spa_feature_decr(spa, feature, tx);
spa_history_log_internal(spa, "zhack feature decr", tx,
"name=%s", feature->fi_guid);
}
static void
@ -435,8 +443,8 @@ zhack_do_feature_ref(int argc, char **argv)
if (decr && !spa_feature_is_active(spa, &feature))
fatal("feature refcount already 0: %s", feature.fi_guid);
VERIFY3U(0, ==, dsl_sync_task_do(spa->spa_dsl_pool, NULL,
decr ? feature_decr_sync : feature_incr_sync, spa, &feature, 5));
VERIFY0(dsl_sync_task(spa_name(spa), NULL,
decr ? feature_decr_sync : feature_incr_sync, &feature, 5));
spa_close(spa, FTAG);
}

View File

@ -192,9 +192,9 @@ static zpool_command_t command_table[] = {
#define NCOMMAND (sizeof (command_table) / sizeof (command_table[0]))
zpool_command_t *current_command;
static zpool_command_t *current_command;
static char history_str[HIS_MAX_RECORD_LEN];
static boolean_t log_history = B_TRUE;
static uint_t timestamp_fmt = NODATE;
static const char *
@ -1093,7 +1093,10 @@ zpool_do_destroy(int argc, char **argv)
return (1);
}
ret = (zpool_destroy(zhp) != 0);
/* The history must be logged as part of the export */
log_history = B_FALSE;
ret = (zpool_destroy(zhp, history_str) != 0);
zpool_close(zhp);
@ -1157,10 +1160,13 @@ zpool_do_export(int argc, char **argv)
continue;
}
/* The history must be logged as part of the export */
log_history = B_FALSE;
if (hardforce) {
if (zpool_export_force(zhp) != 0)
if (zpool_export_force(zhp, history_str) != 0)
ret = 1;
} else if (zpool_export(zhp, force) != 0) {
} else if (zpool_export(zhp, force, history_str) != 0) {
ret = 1;
}
@ -4563,6 +4569,14 @@ upgrade_cb(zpool_handle_t *zhp, void *arg)
if (count > 0) {
cbp->cb_first = B_FALSE;
printnl = B_TRUE;
/*
* If they did "zpool upgrade -a", then we could
* be doing ioctls to different pools. We need
* to log this history once to each pool, and bypass
* the normal history logging that happens in main().
*/
(void) zpool_log_history(g_zfs, history_str);
log_history = B_FALSE;
}
}
@ -4924,8 +4938,8 @@ zpool_do_upgrade(int argc, char **argv)
typedef struct hist_cbdata {
boolean_t first;
int longfmt;
int internal;
boolean_t longfmt;
boolean_t internal;
} hist_cbdata_t;
/*
@ -4937,21 +4951,8 @@ get_history_one(zpool_handle_t *zhp, void *data)
nvlist_t *nvhis;
nvlist_t **records;
uint_t numrecords;
char *cmdstr;
char *pathstr;
uint64_t dst_time;
time_t tsec;
struct tm t;
char tbuf[30];
int ret, i;
uint64_t who;
struct passwd *pwd;
char *hostname;
char *zonename;
char internalstr[MAXPATHLEN];
hist_cbdata_t *cb = (hist_cbdata_t *)data;
uint64_t txg;
uint64_t ievent;
cb->first = B_FALSE;
@ -4963,64 +4964,94 @@ get_history_one(zpool_handle_t *zhp, void *data)
verify(nvlist_lookup_nvlist_array(nvhis, ZPOOL_HIST_RECORD,
&records, &numrecords) == 0);
for (i = 0; i < numrecords; i++) {
if (nvlist_lookup_uint64(records[i], ZPOOL_HIST_TIME,
&dst_time) != 0)
continue;
nvlist_t *rec = records[i];
char tbuf[30] = "";
/* is it an internal event or a standard event? */
if (nvlist_lookup_string(records[i], ZPOOL_HIST_CMD,
&cmdstr) != 0) {
if (cb->internal == 0)
continue;
if (nvlist_exists(rec, ZPOOL_HIST_TIME)) {
time_t tsec;
struct tm t;
if (nvlist_lookup_uint64(records[i],
ZPOOL_HIST_INT_EVENT, &ievent) != 0)
continue;
verify(nvlist_lookup_uint64(records[i],
ZPOOL_HIST_TXG, &txg) == 0);
verify(nvlist_lookup_string(records[i],
ZPOOL_HIST_INT_STR, &pathstr) == 0);
if (ievent >= LOG_END)
continue;
(void) snprintf(internalstr,
sizeof (internalstr),
"[internal %s txg:%lld] %s",
zfs_history_event_names[ievent], txg,
pathstr);
cmdstr = internalstr;
tsec = fnvlist_lookup_uint64(records[i],
ZPOOL_HIST_TIME);
(void) localtime_r(&tsec, &t);
(void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t);
}
if (nvlist_exists(rec, ZPOOL_HIST_CMD)) {
(void) printf("%s %s", tbuf,
fnvlist_lookup_string(rec, ZPOOL_HIST_CMD));
} else if (nvlist_exists(rec, ZPOOL_HIST_INT_EVENT)) {
int ievent =
fnvlist_lookup_uint64(rec, ZPOOL_HIST_INT_EVENT);
if (!cb->internal)
continue;
if (ievent >= ZFS_NUM_LEGACY_HISTORY_EVENTS) {
(void) printf("%s unrecognized record:\n",
tbuf);
dump_nvlist(rec, 4);
continue;
}
(void) printf("%s [internal %s txg:%lld] %s", tbuf,
zfs_history_event_names[ievent],
fnvlist_lookup_uint64(rec, ZPOOL_HIST_TXG),
fnvlist_lookup_string(rec, ZPOOL_HIST_INT_STR));
} else if (nvlist_exists(rec, ZPOOL_HIST_INT_NAME)) {
if (!cb->internal)
continue;
(void) printf("%s [txg:%lld] %s", tbuf,
fnvlist_lookup_uint64(rec, ZPOOL_HIST_TXG),
fnvlist_lookup_string(rec, ZPOOL_HIST_INT_NAME));
if (nvlist_exists(rec, ZPOOL_HIST_DSNAME)) {
(void) printf(" %s (%llu)",
fnvlist_lookup_string(rec,
ZPOOL_HIST_DSNAME),
fnvlist_lookup_uint64(rec,
ZPOOL_HIST_DSID));
}
(void) printf(" %s", fnvlist_lookup_string(rec,
ZPOOL_HIST_INT_STR));
} else if (nvlist_exists(rec, ZPOOL_HIST_IOCTL)) {
if (!cb->internal)
continue;
(void) printf("%s ioctl %s\n", tbuf,
fnvlist_lookup_string(rec, ZPOOL_HIST_IOCTL));
if (nvlist_exists(rec, ZPOOL_HIST_INPUT_NVL)) {
(void) printf(" input:\n");
dump_nvlist(fnvlist_lookup_nvlist(rec,
ZPOOL_HIST_INPUT_NVL), 8);
}
if (nvlist_exists(rec, ZPOOL_HIST_OUTPUT_NVL)) {
(void) printf(" output:\n");
dump_nvlist(fnvlist_lookup_nvlist(rec,
ZPOOL_HIST_OUTPUT_NVL), 8);
}
} else {
if (!cb->internal)
continue;
(void) printf("%s unrecognized record:\n", tbuf);
dump_nvlist(rec, 4);
}
tsec = dst_time;
(void) localtime_r(&tsec, &t);
(void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t);
(void) printf("%s %s", tbuf, cmdstr);
if (!cb->longfmt) {
(void) printf("\n");
continue;
}
(void) printf(" [");
if (nvlist_lookup_uint64(records[i],
ZPOOL_HIST_WHO, &who) == 0) {
pwd = getpwuid((uid_t)who);
if (pwd)
(void) printf("user %s on",
pwd->pw_name);
else
(void) printf("user %d on",
(int)who);
} else {
(void) printf(gettext("no info]\n"));
continue;
if (nvlist_exists(rec, ZPOOL_HIST_WHO)) {
uid_t who = fnvlist_lookup_uint64(rec, ZPOOL_HIST_WHO);
struct passwd *pwd = getpwuid(who);
(void) printf("user %d ", (int)who);
if (pwd != NULL)
(void) printf("(%s) ", pwd->pw_name);
}
if (nvlist_lookup_string(records[i],
ZPOOL_HIST_HOST, &hostname) == 0) {
(void) printf(" %s", hostname);
if (nvlist_exists(rec, ZPOOL_HIST_HOST)) {
(void) printf("on %s",
fnvlist_lookup_string(rec, ZPOOL_HIST_HOST));
}
if (nvlist_lookup_string(records[i],
ZPOOL_HIST_ZONE, &zonename) == 0) {
(void) printf(":%s", zonename);
if (nvlist_exists(rec, ZPOOL_HIST_ZONE)) {
(void) printf(":%s",
fnvlist_lookup_string(rec, ZPOOL_HIST_ZONE));
}
(void) printf("]");
(void) printf("\n");
}
@ -5035,8 +5066,6 @@ get_history_one(zpool_handle_t *zhp, void *data)
*
* Displays the history of commands that modified pools.
*/
int
zpool_do_history(int argc, char **argv)
{
@ -5049,10 +5078,10 @@ zpool_do_history(int argc, char **argv)
while ((c = getopt(argc, argv, "li")) != -1) {
switch (c) {
case 'l':
cbdata.longfmt = 1;
cbdata.longfmt = B_TRUE;
break;
case 'i':
cbdata.internal = 1;
cbdata.internal = B_TRUE;
break;
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
@ -5277,8 +5306,7 @@ main(int argc, char **argv)
if (strcmp(cmdname, "-?") == 0)
usage(B_TRUE);
zpool_set_history_str("zpool", argc, argv, history_str);
verify(zpool_stage_history(g_zfs, history_str) == 0);
zfs_save_arguments(argc, argv, history_str, sizeof (history_str));
/*
* Run the appropriate command.
@ -5305,6 +5333,9 @@ main(int argc, char **argv)
usage(B_FALSE);
}
if (ret == 0 && log_history)
(void) zpool_log_history(g_zfs, history_str);
libzfs_fini(g_zfs);
/*

View File

@ -104,10 +104,12 @@
#include <sys/metaslab_impl.h>
#include <sys/dsl_prop.h>
#include <sys/dsl_dataset.h>
#include <sys/dsl_destroy.h>
#include <sys/dsl_scan.h>
#include <sys/zio_checksum.h>
#include <sys/refcount.h>
#include <sys/zfeature.h>
#include <sys/dsl_userhold.h>
#include <stdio.h>
#include <stdio_ext.h>
#include <stdlib.h>
@ -367,7 +369,7 @@ ztest_info_t ztest_info[] = {
{ ztest_scrub, 1, &zopt_rarely },
{ ztest_spa_upgrade, 1, &zopt_rarely },
{ ztest_dsl_dataset_promote_busy, 1, &zopt_rarely },
{ ztest_vdev_attach_detach, 1, &zopt_rarely },
{ ztest_vdev_attach_detach, 1, &zopt_sometimes },
{ ztest_vdev_LUN_growth, 1, &zopt_rarely },
{ ztest_vdev_add_remove, 1,
&ztest_opts.zo_vdevtime },
@ -1008,9 +1010,8 @@ ztest_dsl_prop_set_uint64(char *osname, zfs_prop_t prop, uint64_t value,
uint64_t curval;
int error;
error = dsl_prop_set(osname, propname,
(inherit ? ZPROP_SRC_NONE : ZPROP_SRC_LOCAL),
sizeof (value), 1, &value);
error = dsl_prop_set_int(osname, propname,
(inherit ? ZPROP_SRC_NONE : ZPROP_SRC_LOCAL), value);
if (error == ENOSPC) {
ztest_record_enospc(FTAG);
@ -1018,8 +1019,7 @@ ztest_dsl_prop_set_uint64(char *osname, zfs_prop_t prop, uint64_t value,
}
ASSERT0(error);
VERIFY3U(dsl_prop_get(osname, propname, sizeof (curval),
1, &curval, setpoint), ==, 0);
VERIFY0(dsl_prop_get_integer(osname, propname, &curval, setpoint));
if (ztest_opts.zo_verbose >= 6) {
VERIFY(zfs_prop_index_to_string(prop, curval, &valname) == 0);
@ -2332,7 +2332,7 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id)
*/
nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1);
VERIFY3U(ENOENT, ==,
spa_create("ztest_bad_file", nvroot, NULL, NULL, NULL));
spa_create("ztest_bad_file", nvroot, NULL, NULL));
nvlist_free(nvroot);
/*
@ -2340,7 +2340,7 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id)
*/
nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 2, 1);
VERIFY3U(ENOENT, ==,
spa_create("ztest_bad_mirror", nvroot, NULL, NULL, NULL));
spa_create("ztest_bad_mirror", nvroot, NULL, NULL));
nvlist_free(nvroot);
/*
@ -2349,7 +2349,7 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id)
*/
(void) rw_rdlock(&ztest_name_lock);
nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1);
VERIFY3U(EEXIST, ==, spa_create(zo->zo_pool, nvroot, NULL, NULL, NULL));
VERIFY3U(EEXIST, ==, spa_create(zo->zo_pool, nvroot, NULL, NULL));
nvlist_free(nvroot);
VERIFY3U(0, ==, spa_open(zo->zo_pool, &spa, FTAG));
VERIFY3U(EBUSY, ==, spa_destroy(zo->zo_pool));
@ -2407,7 +2407,7 @@ ztest_spa_upgrade(ztest_ds_t *zd, uint64_t id)
props = fnvlist_alloc();
fnvlist_add_uint64(props,
zpool_prop_to_name(ZPOOL_PROP_VERSION), version);
VERIFY0(spa_create(name, nvroot, props, NULL, NULL));
VERIFY0(spa_create(name, nvroot, props, NULL));
fnvlist_free(nvroot);
fnvlist_free(props);
@ -2481,8 +2481,7 @@ ztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id)
int error;
VERIFY(mutex_lock(&ztest_vdev_lock) == 0);
leaves =
MAX(zs->zs_mirrors + zs->zs_splits, 1) * ztest_opts.zo_raidz;
leaves = MAX(zs->zs_mirrors + zs->zs_splits, 1) * ztest_opts.zo_raidz;
spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
@ -3182,7 +3181,7 @@ ztest_objset_destroy_cb(const char *name, void *arg)
/*
* Verify that the dataset contains a directory object.
*/
VERIFY3U(0, ==, dmu_objset_hold(name, FTAG, &os));
VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_TRUE, FTAG, &os));
error = dmu_object_info(os, ZTEST_DIROBJ, &doi);
if (error != ENOENT) {
/* We could have crashed in the middle of destroying it */
@ -3190,12 +3189,16 @@ ztest_objset_destroy_cb(const char *name, void *arg)
ASSERT3U(doi.doi_type, ==, DMU_OT_ZAP_OTHER);
ASSERT3S(doi.doi_physical_blocks_512, >=, 0);
}
dmu_objset_rele(os, FTAG);
dmu_objset_disown(os, FTAG);
/*
* Destroy the dataset.
*/
VERIFY3U(0, ==, dmu_objset_destroy(name, B_FALSE));
if (strchr(name, '@') != NULL) {
VERIFY0(dsl_destroy_snapshot(name, B_FALSE));
} else {
VERIFY0(dsl_destroy_head(name));
}
return (0);
}
@ -3205,17 +3208,17 @@ ztest_snapshot_create(char *osname, uint64_t id)
char snapname[MAXNAMELEN];
int error;
(void) snprintf(snapname, MAXNAMELEN, "%s@%llu", osname,
(u_longlong_t)id);
(void) snprintf(snapname, sizeof (snapname), "%llu", (u_longlong_t)id);
error = dmu_objset_snapshot(osname, strchr(snapname, '@') + 1,
NULL, NULL, B_FALSE, B_FALSE, -1);
error = dmu_objset_snapshot_one(osname, snapname);
if (error == ENOSPC) {
ztest_record_enospc(FTAG);
return (B_FALSE);
}
if (error != 0 && error != EEXIST)
fatal(0, "ztest_snapshot_create(%s) = %d", snapname, error);
if (error != 0 && error != EEXIST) {
fatal(0, "ztest_snapshot_create(%s@%s) = %d", osname,
snapname, error);
}
return (B_TRUE);
}
@ -3228,7 +3231,7 @@ ztest_snapshot_destroy(char *osname, uint64_t id)
(void) snprintf(snapname, MAXNAMELEN, "%s@%llu", osname,
(u_longlong_t)id);
error = dmu_objset_destroy(snapname, B_FALSE);
error = dsl_destroy_snapshot(snapname, B_FALSE);
if (error != 0 && error != ENOENT)
fatal(0, "ztest_snapshot_destroy(%s) = %d", snapname, error);
return (B_TRUE);
@ -3274,7 +3277,8 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id)
/*
* Verify that the destroyed dataset is no longer in the namespace.
*/
VERIFY3U(ENOENT, ==, dmu_objset_hold(name, FTAG, &os));
VERIFY3U(ENOENT, ==, dmu_objset_own(name, DMU_OST_OTHER, B_TRUE,
FTAG, &os));
/*
* Verify that we can create a new dataset.
@ -3289,8 +3293,7 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id)
fatal(0, "dmu_objset_create(%s) = %d", name, error);
}
VERIFY3U(0, ==,
dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os));
VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os));
ztest_zd_init(&zdtmp, NULL, os);
@ -3366,21 +3369,21 @@ ztest_dsl_dataset_cleanup(char *osname, uint64_t id)
(void) snprintf(clone2name, MAXNAMELEN, "%s/c2_%llu", osname, id);
(void) snprintf(snap3name, MAXNAMELEN, "%s@s3_%llu", clone1name, id);
error = dmu_objset_destroy(clone2name, B_FALSE);
error = dsl_destroy_head(clone2name);
if (error && error != ENOENT)
fatal(0, "dmu_objset_destroy(%s) = %d", clone2name, error);
error = dmu_objset_destroy(snap3name, B_FALSE);
fatal(0, "dsl_destroy_head(%s) = %d", clone2name, error);
error = dsl_destroy_snapshot(snap3name, B_FALSE);
if (error && error != ENOENT)
fatal(0, "dmu_objset_destroy(%s) = %d", snap3name, error);
error = dmu_objset_destroy(snap2name, B_FALSE);
fatal(0, "dsl_destroy_snapshot(%s) = %d", snap3name, error);
error = dsl_destroy_snapshot(snap2name, B_FALSE);
if (error && error != ENOENT)
fatal(0, "dmu_objset_destroy(%s) = %d", snap2name, error);
error = dmu_objset_destroy(clone1name, B_FALSE);
fatal(0, "dsl_destroy_snapshot(%s) = %d", snap2name, error);
error = dsl_destroy_head(clone1name);
if (error && error != ENOENT)
fatal(0, "dmu_objset_destroy(%s) = %d", clone1name, error);
error = dmu_objset_destroy(snap1name, B_FALSE);
fatal(0, "dsl_destroy_head(%s) = %d", clone1name, error);
error = dsl_destroy_snapshot(snap1name, B_FALSE);
if (error && error != ENOENT)
fatal(0, "dmu_objset_destroy(%s) = %d", snap1name, error);
fatal(0, "dsl_destroy_snapshot(%s) = %d", snap1name, error);
}
/*
@ -3389,8 +3392,7 @@ ztest_dsl_dataset_cleanup(char *osname, uint64_t id)
void
ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id)
{
objset_t *clone;
dsl_dataset_t *ds;
objset_t *os;
char snap1name[MAXNAMELEN];
char clone1name[MAXNAMELEN];
char snap2name[MAXNAMELEN];
@ -3409,8 +3411,7 @@ ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id)
(void) snprintf(clone2name, MAXNAMELEN, "%s/c2_%llu", osname, id);
(void) snprintf(snap3name, MAXNAMELEN, "%s@s3_%llu", clone1name, id);
error = dmu_objset_snapshot(osname, strchr(snap1name, '@')+1,
NULL, NULL, B_FALSE, B_FALSE, -1);
error = dmu_objset_snapshot_one(osname, strchr(snap1name, '@') + 1);
if (error && error != EEXIST) {
if (error == ENOSPC) {
ztest_record_enospc(FTAG);
@ -3419,12 +3420,7 @@ ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id)
fatal(0, "dmu_take_snapshot(%s) = %d", snap1name, error);
}
error = dmu_objset_hold(snap1name, FTAG, &clone);
if (error)
fatal(0, "dmu_open_snapshot(%s) = %d", snap1name, error);
error = dmu_objset_clone(clone1name, dmu_objset_ds(clone), 0);
dmu_objset_rele(clone, FTAG);
error = dmu_objset_clone(clone1name, snap1name);
if (error) {
if (error == ENOSPC) {
ztest_record_enospc(FTAG);
@ -3433,8 +3429,7 @@ ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id)
fatal(0, "dmu_objset_create(%s) = %d", clone1name, error);
}
error = dmu_objset_snapshot(clone1name, strchr(snap2name, '@')+1,
NULL, NULL, B_FALSE, B_FALSE, -1);
error = dmu_objset_snapshot_one(clone1name, strchr(snap2name, '@') + 1);
if (error && error != EEXIST) {
if (error == ENOSPC) {
ztest_record_enospc(FTAG);
@ -3443,8 +3438,7 @@ ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id)
fatal(0, "dmu_open_snapshot(%s) = %d", snap2name, error);
}
error = dmu_objset_snapshot(clone1name, strchr(snap3name, '@')+1,
NULL, NULL, B_FALSE, B_FALSE, -1);
error = dmu_objset_snapshot_one(clone1name, strchr(snap3name, '@') + 1);
if (error && error != EEXIST) {
if (error == ENOSPC) {
ztest_record_enospc(FTAG);
@ -3453,12 +3447,7 @@ ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id)
fatal(0, "dmu_open_snapshot(%s) = %d", snap3name, error);
}
error = dmu_objset_hold(snap3name, FTAG, &clone);
if (error)
fatal(0, "dmu_open_snapshot(%s) = %d", snap3name, error);
error = dmu_objset_clone(clone2name, dmu_objset_ds(clone), 0);
dmu_objset_rele(clone, FTAG);
error = dmu_objset_clone(clone2name, snap3name);
if (error) {
if (error == ENOSPC) {
ztest_record_enospc(FTAG);
@ -3467,14 +3456,14 @@ ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id)
fatal(0, "dmu_objset_create(%s) = %d", clone2name, error);
}
error = dsl_dataset_own(snap2name, B_FALSE, FTAG, &ds);
error = dmu_objset_own(snap2name, DMU_OST_ANY, B_TRUE, FTAG, &os);
if (error)
fatal(0, "dsl_dataset_own(%s) = %d", snap2name, error);
fatal(0, "dmu_objset_own(%s) = %d", snap2name, error);
error = dsl_dataset_promote(clone2name, NULL);
if (error != EBUSY)
fatal(0, "dsl_dataset_promote(%s), %d, not EBUSY", clone2name,
error);
dsl_dataset_disown(ds, FTAG);
dmu_objset_disown(os, FTAG);
out:
ztest_dsl_dataset_cleanup(osname, id);
@ -4286,7 +4275,7 @@ ztest_zap_parallel(ztest_ds_t *zd, uint64_t id)
}
count = -1ULL;
VERIFY(zap_count(os, object, &count) == 0);
VERIFY0(zap_count(os, object, &count));
ASSERT(count != -1ULL);
/*
@ -4597,6 +4586,22 @@ ztest_spa_prop_get_set(ztest_ds_t *zd, uint64_t id)
(void) rw_unlock(&ztest_name_lock);
}
static int
user_release_one(const char *snapname, const char *holdname)
{
nvlist_t *snaps, *holds;
int error;
snaps = fnvlist_alloc();
holds = fnvlist_alloc();
fnvlist_add_boolean(holds, holdname);
fnvlist_add_nvlist(snaps, snapname, holds);
fnvlist_free(holds);
error = dsl_dataset_user_release(snaps, NULL);
fnvlist_free(snaps);
return (error);
}
/*
* Test snapshot hold/release and deferred destroy.
*/
@ -4611,29 +4616,36 @@ ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id)
char clonename[100];
char tag[100];
char osname[MAXNAMELEN];
nvlist_t *holds;
(void) rw_rdlock(&ztest_name_lock);
dmu_objset_name(os, osname);
(void) snprintf(snapname, 100, "sh1_%llu", id);
(void) snprintf(fullname, 100, "%s@%s", osname, snapname);
(void) snprintf(clonename, 100, "%s/ch1_%llu", osname, id);
(void) snprintf(tag, 100, "%tag_%llu", id);
(void) snprintf(snapname, sizeof (snapname), "sh1_%llu", id);
(void) snprintf(fullname, sizeof (fullname), "%s@%s", osname, snapname);
(void) snprintf(clonename, sizeof (clonename),
"%s/ch1_%llu", osname, id);
(void) snprintf(tag, sizeof (tag), "tag_%llu", id);
/*
* Clean up from any previous run.
*/
(void) dmu_objset_destroy(clonename, B_FALSE);
(void) dsl_dataset_user_release(osname, snapname, tag, B_FALSE);
(void) dmu_objset_destroy(fullname, B_FALSE);
error = dsl_destroy_head(clonename);
if (error != ENOENT)
ASSERT0(error);
error = user_release_one(fullname, tag);
if (error != ESRCH && error != ENOENT)
ASSERT0(error);
error = dsl_destroy_snapshot(fullname, B_FALSE);
if (error != ENOENT)
ASSERT0(error);
/*
* Create snapshot, clone it, mark snap for deferred destroy,
* destroy clone, verify snap was also destroyed.
*/
error = dmu_objset_snapshot(osname, snapname, NULL, NULL, FALSE,
FALSE, -1);
error = dmu_objset_snapshot_one(osname, snapname);
if (error) {
if (error == ENOSPC) {
ztest_record_enospc("dmu_objset_snapshot");
@ -4642,12 +4654,7 @@ ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id)
fatal(0, "dmu_objset_snapshot(%s) = %d", fullname, error);
}
error = dmu_objset_hold(fullname, FTAG, &origin);
if (error)
fatal(0, "dmu_objset_hold(%s) = %d", fullname, error);
error = dmu_objset_clone(clonename, dmu_objset_ds(origin), 0);
dmu_objset_rele(origin, FTAG);
error = dmu_objset_clone(clonename, fullname);
if (error) {
if (error == ENOSPC) {
ztest_record_enospc("dmu_objset_clone");
@ -4656,15 +4663,15 @@ ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id)
fatal(0, "dmu_objset_clone(%s) = %d", clonename, error);
}
error = dmu_objset_destroy(fullname, B_TRUE);
error = dsl_destroy_snapshot(fullname, B_TRUE);
if (error) {
fatal(0, "dmu_objset_destroy(%s, B_TRUE) = %d",
fatal(0, "dsl_destroy_snapshot(%s, B_TRUE) = %d",
fullname, error);
}
error = dmu_objset_destroy(clonename, B_FALSE);
error = dsl_destroy_head(clonename);
if (error)
fatal(0, "dmu_objset_destroy(%s) = %d", clonename, error);
fatal(0, "dsl_destroy_head(%s) = %d", clonename, error);
error = dmu_objset_hold(fullname, FTAG, &origin);
if (error != ENOENT)
@ -4675,8 +4682,7 @@ ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id)
* destroy a held snapshot, mark for deferred destroy,
* release hold, verify snapshot was destroyed.
*/
error = dmu_objset_snapshot(osname, snapname, NULL, NULL, FALSE,
FALSE, -1);
error = dmu_objset_snapshot_one(osname, snapname);
if (error) {
if (error == ENOSPC) {
ztest_record_enospc("dmu_objset_snapshot");
@ -4685,28 +4691,31 @@ ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id)
fatal(0, "dmu_objset_snapshot(%s) = %d", fullname, error);
}
error = dsl_dataset_user_hold(osname, snapname, tag, B_FALSE,
B_TRUE, -1);
holds = fnvlist_alloc();
fnvlist_add_string(holds, fullname, tag);
error = dsl_dataset_user_hold(holds, 0, NULL);
fnvlist_free(holds);
if (error)
fatal(0, "dsl_dataset_user_hold(%s)", fullname, tag);
error = dmu_objset_destroy(fullname, B_FALSE);
error = dsl_destroy_snapshot(fullname, B_FALSE);
if (error != EBUSY) {
fatal(0, "dmu_objset_destroy(%s, B_FALSE) = %d",
fatal(0, "dsl_destroy_snapshot(%s, B_FALSE) = %d",
fullname, error);
}
error = dmu_objset_destroy(fullname, B_TRUE);
error = dsl_destroy_snapshot(fullname, B_TRUE);
if (error) {
fatal(0, "dmu_objset_destroy(%s, B_TRUE) = %d",
fatal(0, "dsl_destroy_snapshot(%s, B_TRUE) = %d",
fullname, error);
}
error = dsl_dataset_user_release(osname, snapname, tag, B_FALSE);
error = user_release_one(fullname, tag);
if (error)
fatal(0, "dsl_dataset_user_release(%s)", fullname, tag);
fatal(0, "user_release_one(%s)", fullname, tag);
VERIFY(dmu_objset_hold(fullname, FTAG, &origin) == ENOENT);
VERIFY3U(dmu_objset_hold(fullname, FTAG, &origin), ==, ENOENT);
out:
(void) rw_unlock(&ztest_name_lock);
@ -4960,8 +4969,12 @@ ztest_ddt_repair(ztest_ds_t *zd, uint64_t id)
*/
for (int i = 0; i < copies; i++) {
uint64_t offset = i * blocksize;
VERIFY0(dmu_buf_hold(os, object, offset, FTAG, &db,
DMU_READ_NO_PREFETCH));
int error = dmu_buf_hold(os, object, offset, FTAG, &db,
DMU_READ_NO_PREFETCH);
if (error != 0) {
fatal(B_FALSE, "dmu_buf_hold(%p, %llu, %llu) = %u",
os, (long long)object, (long long) offset, error);
}
ASSERT(db->db_offset == offset);
ASSERT(db->db_size == blocksize);
ASSERT(ztest_pattern_match(db->db_data, db->db_size, pattern) ||
@ -5172,6 +5185,7 @@ ztest_spa_import_export(char *oldname, char *newname)
nvlist_t *config, *newconfig;
uint64_t pool_guid;
spa_t *spa;
int error;
if (ztest_opts.zo_verbose >= 4) {
(void) printf("import/export: old = %s, new = %s\n",
@ -5216,7 +5230,12 @@ ztest_spa_import_export(char *oldname, char *newname)
/*
* Import it under the new name.
*/
VERIFY3U(0, ==, spa_import(newname, config, NULL, 0));
error = spa_import(newname, config, NULL, 0);
if (error != 0) {
dump_nvlist(config, 0);
fatal(B_FALSE, "couldn't import pool %s as %s: error %u",
oldname, newname, error);
}
ztest_walk_pool_directory("pools after import");
@ -5423,7 +5442,7 @@ ztest_dataset_open(int d)
}
ASSERT(error == 0 || error == EEXIST);
VERIFY0(dmu_objset_hold(name, zd, &os));
VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, zd, &os));
(void) rw_unlock(&ztest_name_lock);
ztest_zd_init(zd, ZTEST_GET_SHARED_DS(d), os);
@ -5464,7 +5483,7 @@ ztest_dataset_close(int d)
ztest_ds_t *zd = &ztest_ds[d];
zil_close(zd->zd_zilog);
dmu_objset_rele(zd->zd_os, zd);
dmu_objset_disown(zd->zd_os, zd);
ztest_zd_fini(zd);
}
@ -5508,13 +5527,14 @@ ztest_run(ztest_shared_t *zs)
* Open our pool.
*/
kernel_init(FREAD | FWRITE);
VERIFY(spa_open(ztest_opts.zo_pool, &spa, FTAG) == 0);
VERIFY0(spa_open(ztest_opts.zo_pool, &spa, FTAG));
spa->spa_debug = B_TRUE;
ztest_spa = spa;
VERIFY3U(0, ==, dmu_objset_hold(ztest_opts.zo_pool, FTAG, &os));
VERIFY0(dmu_objset_own(ztest_opts.zo_pool,
DMU_OST_ANY, B_TRUE, FTAG, &os));
zs->zs_guid = dmu_objset_fsid_guid(os);
dmu_objset_rele(os, FTAG);
dmu_objset_disown(os, FTAG);
spa->spa_dedup_ditto = 2 * ZIO_DEDUPDITTO_MIN;
@ -5791,8 +5811,7 @@ ztest_init(ztest_shared_t *zs)
spa_feature_table[i].fi_uname);
VERIFY3U(0, ==, nvlist_add_uint64(props, buf, 0));
}
VERIFY3U(0, ==, spa_create(ztest_opts.zo_pool, nvroot, props,
NULL, NULL));
VERIFY3U(0, ==, spa_create(ztest_opts.zo_pool, nvroot, props, NULL));
nvlist_free(nvroot);
VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG));

View File

@ -57,7 +57,8 @@ extern "C" {
/*
* libzfs errors
*/
enum {
typedef enum zfs_error {
EZFS_SUCCESS = 0, /* no error -- success */
EZFS_NOMEM = 2000, /* out of memory */
EZFS_BADPROP, /* invalid property value */
EZFS_PROPREADONLY, /* cannot set readonly property */
@ -129,7 +130,7 @@ enum {
EZFS_DIFFDATA, /* bad zfs diff data */
EZFS_POOLREADONLY, /* pool is in read-only mode */
EZFS_UNKNOWN
};
} zfs_error_t;
/*
* The following data structures are all part
@ -185,6 +186,9 @@ extern libzfs_handle_t *zfs_get_handle(zfs_handle_t *);
extern void libzfs_print_on_error(libzfs_handle_t *, boolean_t);
extern void zfs_save_arguments(int argc, char **, char *, int);
extern int zpool_log_history(libzfs_handle_t *, const char *);
extern int libzfs_errno(libzfs_handle_t *);
extern const char *libzfs_error_action(libzfs_handle_t *);
extern const char *libzfs_error_description(libzfs_handle_t *);
@ -220,7 +224,7 @@ extern int zpool_iter(libzfs_handle_t *, zpool_iter_f, void *);
*/
extern int zpool_create(libzfs_handle_t *, const char *, nvlist_t *,
nvlist_t *, nvlist_t *);
extern int zpool_destroy(zpool_handle_t *);
extern int zpool_destroy(zpool_handle_t *, const char *);
extern int zpool_add(zpool_handle_t *, nvlist_t *);
typedef struct splitflags {
@ -343,8 +347,8 @@ extern int zpool_get_errlog(zpool_handle_t *, nvlist_t **);
/*
* Import and export functions
*/
extern int zpool_export(zpool_handle_t *, boolean_t);
extern int zpool_export_force(zpool_handle_t *);
extern int zpool_export(zpool_handle_t *, boolean_t, const char *);
extern int zpool_export_force(zpool_handle_t *, const char *);
extern int zpool_import(libzfs_handle_t *, nvlist_t *, const char *,
char *altroot);
extern int zpool_import_props(libzfs_handle_t *, nvlist_t *, const char *,
@ -378,7 +382,7 @@ extern nvlist_t *zpool_find_import_cached(libzfs_handle_t *, const char *,
*/
struct zfs_cmd;
extern const char *zfs_history_event_names[LOG_END];
extern const char *zfs_history_event_names[];
extern char *zpool_vdev_name(libzfs_handle_t *, zpool_handle_t *, nvlist_t *,
boolean_t verbose);
@ -386,12 +390,9 @@ extern int zpool_upgrade(zpool_handle_t *, uint64_t);
extern int zpool_get_history(zpool_handle_t *, nvlist_t **);
extern int zpool_history_unpack(char *, uint64_t, uint64_t *,
nvlist_t ***, uint_t *);
extern void zpool_set_history_str(const char *subcommand, int argc,
char **argv, char *history_str);
extern int zpool_stage_history(libzfs_handle_t *, const char *);
extern void zpool_obj_to_path(zpool_handle_t *, uint64_t, uint64_t, char *,
size_t len);
extern int zfs_ioctl(libzfs_handle_t *, unsigned long, struct zfs_cmd *);
extern int zfs_ioctl(libzfs_handle_t *, int request, struct zfs_cmd *);
extern int zpool_get_physpath(zpool_handle_t *, char *, size_t);
extern void zpool_explain_recover(libzfs_handle_t *, const char *, int,
nvlist_t *);
@ -441,8 +442,6 @@ extern int zfs_prop_get_written(zfs_handle_t *zhp, const char *propname,
char *propbuf, int proplen, boolean_t literal);
extern int zfs_prop_get_feature(zfs_handle_t *zhp, const char *propname,
char *buf, size_t len);
extern int zfs_get_snapused_int(zfs_handle_t *firstsnap, zfs_handle_t *lastsnap,
uint64_t *usedp);
extern uint64_t zfs_prop_get_int(zfs_handle_t *, zfs_prop_t);
extern int zfs_prop_inherit(zfs_handle_t *, const char *, boolean_t);
extern const char *zfs_prop_values(zfs_prop_t);
@ -555,9 +554,11 @@ extern int zfs_create(libzfs_handle_t *, const char *, zfs_type_t,
extern int zfs_create_ancestors(libzfs_handle_t *, const char *);
extern int zfs_destroy(zfs_handle_t *, boolean_t);
extern int zfs_destroy_snaps(zfs_handle_t *, char *, boolean_t);
extern int zfs_destroy_snaps_nvl(zfs_handle_t *, nvlist_t *, boolean_t);
extern int zfs_destroy_snaps_nvl(libzfs_handle_t *, nvlist_t *, boolean_t);
extern int zfs_clone(zfs_handle_t *, const char *, nvlist_t *);
extern int zfs_snapshot(libzfs_handle_t *, const char *, boolean_t, nvlist_t *);
extern int zfs_snapshot_nvl(libzfs_handle_t *hdl, nvlist_t *snaps,
nvlist_t *props);
extern int zfs_rollback(zfs_handle_t *, zfs_handle_t *, boolean_t);
typedef struct renameflags {
@ -609,8 +610,8 @@ extern int zfs_send(zfs_handle_t *, const char *, const char *,
sendflags_t *, int, snapfilter_cb_t, void *, nvlist_t **);
extern int zfs_promote(zfs_handle_t *);
extern int zfs_hold(zfs_handle_t *, const char *, const char *, boolean_t,
boolean_t, boolean_t, int, uint64_t, uint64_t);
extern int zfs_hold(zfs_handle_t *, const char *, const char *,
boolean_t, boolean_t, int);
extern int zfs_release(zfs_handle_t *, const char *, const char *, boolean_t);
extern int zfs_get_holds(zfs_handle_t *, nvlist_t **);
extern uint64_t zvol_volsize_to_reservation(uint64_t, nvlist_t *);

View File

@ -0,0 +1,103 @@
/*
* CDDL HEADER SART
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2013 Martin Matuska <mm@FreeBSD.org>. All rights reserved.
*/
#include "libzfs_compat.h"
int zfs_ioctl_version = ZFS_IOCVER_UNDEF;
static int zfs_spa_version = -1;
/*
* Get zfs_ioctl_version
*/
int
get_zfs_ioctl_version(void)
{
size_t ver_size;
int ver = ZFS_IOCVER_NONE;
ver_size = sizeof(ver);
sysctlbyname("vfs.zfs.version.ioctl", &ver, &ver_size, NULL, 0);
return (ver);
}
/*
* Get the SPA version
*/
static int
get_zfs_spa_version(void)
{
size_t ver_size;
int ver = 0;
ver_size = sizeof(ver);
sysctlbyname("vfs.zfs.version.spa", &ver, &ver_size, NULL, 0);
return (ver);
}
/*
* This is FreeBSD version of ioctl, because Solaris' ioctl() updates
* zc_nvlist_dst_size even if an error is returned, on FreeBSD if an
* error is returned zc_nvlist_dst_size won't be updated.
*/
int
zcmd_ioctl(int fd, int request, zfs_cmd_t *zc)
{
size_t oldsize;
int ret, cflag = ZFS_CMD_COMPAT_NONE;
if (zfs_ioctl_version == ZFS_IOCVER_UNDEF)
zfs_ioctl_version = get_zfs_ioctl_version();
if (zfs_ioctl_version == ZFS_IOCVER_DEADMAN)
cflag = ZFS_CMD_COMPAT_DEADMAN;
/*
* If vfs.zfs.version.ioctl is not defined, assume we have v28
* compatible binaries and use vfs.zfs.version.spa to test for v15
*/
if (zfs_ioctl_version < ZFS_IOCVER_DEADMAN) {
cflag = ZFS_CMD_COMPAT_V28;
if (zfs_spa_version < 0)
zfs_spa_version = get_zfs_spa_version();
if (zfs_spa_version == SPA_VERSION_15 ||
zfs_spa_version == SPA_VERSION_14 ||
zfs_spa_version == SPA_VERSION_13)
cflag = ZFS_CMD_COMPAT_V15;
}
oldsize = zc->zc_nvlist_dst_size;
ret = zcmd_ioctl_compat(fd, request, zc, cflag);
if (ret == 0 && oldsize < zc->zc_nvlist_dst_size) {
ret = -1;
errno = ENOMEM;
}
return (ret);
}

View File

@ -0,0 +1,44 @@
/*
* CDDL HEADER SART
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2013 Martin Matuska <mm@FreeBSD.org>. All rights reserved.
*/
#ifndef _LIBZFS_COMPAT_H
#define _LIBZFS_COMPAT_H
#include <zfs_ioctl_compat.h>
#ifdef __cplusplus
extern "C" {
#endif
int get_zfs_ioctl_version(void);
int zcmd_ioctl(int fd, int request, zfs_cmd_t *zc);
#define ioctl(fd, ioc, zc) zcmd_ioctl((fd), (ioc), (zc))
#ifdef __cplusplus
}
#endif
#endif /* _LIBZFS_COMPAT_H */

View File

@ -1447,7 +1447,6 @@ zfs_prop_set(zfs_handle_t *zhp, const char *propname, const char *propval)
nvlist_t *nvl = NULL, *realprops;
zfs_prop_t prop;
boolean_t do_prefix = B_TRUE;
uint64_t idx;
int added_resv;
(void) snprintf(errbuf, sizeof (errbuf),
@ -2017,10 +2016,7 @@ get_clones_cb(zfs_handle_t *zhp, void *arg)
NULL, NULL, 0, B_TRUE) != 0)
goto out;
if (strcmp(gca->buf, gca->origin) == 0) {
if (nvlist_add_boolean(gca->value, zfs_get_name(zhp)) != 0) {
zfs_close(zhp);
return (no_memory(zhp->zfs_hdl));
}
fnvlist_add_boolean(gca->value, zfs_get_name(zhp));
gca->numclones--;
}
@ -2711,25 +2707,6 @@ zfs_prop_get_written(zfs_handle_t *zhp, const char *propname,
return (0);
}
int
zfs_get_snapused_int(zfs_handle_t *firstsnap, zfs_handle_t *lastsnap,
uint64_t *usedp)
{
int err;
zfs_cmd_t zc = { 0 };
(void) strlcpy(zc.zc_name, lastsnap->zfs_name, sizeof (zc.zc_name));
(void) strlcpy(zc.zc_value, firstsnap->zfs_name, sizeof (zc.zc_value));
err = ioctl(lastsnap->zfs_hdl->libzfs_fd, ZFS_IOC_SPACE_SNAPS, &zc);
if (err)
return (err);
*usedp = zc.zc_cookie;
return (0);
}
/*
* Returns the name of the given zfs handle.
*/
@ -2930,7 +2907,6 @@ create_parents(libzfs_handle_t *hdl, char *target, int prefixlen)
*/
for (cp = target + prefixlen + 1;
cp = strchr(cp, '/'); *cp = '/', cp++) {
char *logstr;
*cp = '\0';
@ -2941,16 +2917,12 @@ create_parents(libzfs_handle_t *hdl, char *target, int prefixlen)
continue;
}
logstr = hdl->libzfs_log_str;
hdl->libzfs_log_str = NULL;
if (zfs_create(hdl, target, ZFS_TYPE_FILESYSTEM,
NULL) != 0) {
hdl->libzfs_log_str = logstr;
opname = dgettext(TEXT_DOMAIN, "create");
goto ancestorerr;
}
hdl->libzfs_log_str = logstr;
h = zfs_open(hdl, target, ZFS_TYPE_FILESYSTEM);
if (h == NULL) {
opname = dgettext(TEXT_DOMAIN, "open");
@ -3008,12 +2980,12 @@ int
zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type,
nvlist_t *props)
{
zfs_cmd_t zc = { 0 };
int ret;
uint64_t size = 0;
uint64_t blocksize = zfs_prop_default_numeric(ZFS_PROP_VOLBLOCKSIZE);
char errbuf[1024];
uint64_t zoned;
dmu_objset_type_t ost;
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
"cannot create '%s'"), path);
@ -3033,17 +3005,16 @@ zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type,
* will return ENOENT, not EEXIST. To prevent this from happening, we
* first try to see if the dataset exists.
*/
(void) strlcpy(zc.zc_name, path, sizeof (zc.zc_name));
if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
if (zfs_dataset_exists(hdl, path, ZFS_TYPE_DATASET)) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"dataset already exists"));
return (zfs_error(hdl, EZFS_EXISTS, errbuf));
}
if (type == ZFS_TYPE_VOLUME)
zc.zc_objset_type = DMU_OST_ZVOL;
ost = DMU_OST_ZVOL;
else
zc.zc_objset_type = DMU_OST_ZFS;
ost = DMU_OST_ZFS;
if (props && (props = zfs_valid_proplist(hdl, type, props,
zoned, NULL, errbuf)) == 0)
@ -3095,14 +3066,9 @@ zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type,
}
}
if (props && zcmd_write_src_nvlist(hdl, &zc, props) != 0)
return (-1);
nvlist_free(props);
/* create the dataset */
ret = zfs_ioctl(hdl, ZFS_IOC_CREATE, &zc);
zcmd_free_nvlists(&zc);
ret = lzc_create(path, ost, props);
nvlist_free(props);
/* check for failure */
if (ret != 0) {
@ -3228,47 +3194,53 @@ zfs_destroy_snaps(zfs_handle_t *zhp, char *snapname, boolean_t defer)
dgettext(TEXT_DOMAIN, "cannot destroy '%s@%s'"),
zhp->zfs_name, snapname);
} else {
ret = zfs_destroy_snaps_nvl(zhp, dd.nvl, defer);
ret = zfs_destroy_snaps_nvl(zhp->zfs_hdl, dd.nvl, defer);
}
nvlist_free(dd.nvl);
return (ret);
}
/*
* Destroys all the snapshots named in the nvlist. They must be underneath
* the zhp (either snapshots of it, or snapshots of its descendants).
* Destroys all the snapshots named in the nvlist.
*/
int
zfs_destroy_snaps_nvl(zfs_handle_t *zhp, nvlist_t *snaps, boolean_t defer)
zfs_destroy_snaps_nvl(libzfs_handle_t *hdl, nvlist_t *snaps, boolean_t defer)
{
int ret;
zfs_cmd_t zc = { 0 };
nvlist_t *errlist;
(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
if (zcmd_write_src_nvlist(zhp->zfs_hdl, &zc, snaps) != 0)
return (-1);
zc.zc_defer_destroy = defer;
ret = lzc_destroy_snaps(snaps, defer, &errlist);
ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_DESTROY_SNAPS_NVL, &zc);
if (ret != 0) {
if (ret == 0)
return (0);
if (nvlist_next_nvpair(errlist, NULL) == NULL) {
char errbuf[1024];
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot destroy snapshots"));
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
"cannot destroy snapshots in %s"), zc.zc_name);
ret = zfs_standard_error(hdl, ret, errbuf);
}
for (nvpair_t *pair = nvlist_next_nvpair(errlist, NULL);
pair != NULL; pair = nvlist_next_nvpair(errlist, pair)) {
char errbuf[1024];
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot destroy snapshot %s"),
nvpair_name(pair));
switch (errno) {
switch (fnvpair_value_int32(pair)) {
case EEXIST:
zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
"snapshot is cloned"));
return (zfs_error(zhp->zfs_hdl, EZFS_EXISTS, errbuf));
zfs_error_aux(hdl,
dgettext(TEXT_DOMAIN, "snapshot is cloned"));
ret = zfs_error(hdl, EZFS_EXISTS, errbuf);
break;
default:
return (zfs_standard_error(zhp->zfs_hdl, errno,
errbuf));
ret = zfs_standard_error(hdl, errno, errbuf);
break;
}
}
return (0);
return (ret);
}
/*
@ -3277,12 +3249,10 @@ zfs_destroy_snaps_nvl(zfs_handle_t *zhp, nvlist_t *snaps, boolean_t defer)
int
zfs_clone(zfs_handle_t *zhp, const char *target, nvlist_t *props)
{
zfs_cmd_t zc = { 0 };
char parent[ZFS_MAXNAMELEN];
int ret;
char errbuf[1024];
libzfs_handle_t *hdl = zhp->zfs_hdl;
zfs_type_t type;
uint64_t zoned;
assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
@ -3301,32 +3271,21 @@ zfs_clone(zfs_handle_t *zhp, const char *target, nvlist_t *props)
(void) parent_name(target, parent, sizeof (parent));
/* do the clone */
if (ZFS_IS_VOLUME(zhp)) {
zc.zc_objset_type = DMU_OST_ZVOL;
type = ZFS_TYPE_VOLUME;
} else {
zc.zc_objset_type = DMU_OST_ZFS;
type = ZFS_TYPE_FILESYSTEM;
}
if (props) {
zfs_type_t type;
if (ZFS_IS_VOLUME(zhp)) {
type = ZFS_TYPE_VOLUME;
} else {
type = ZFS_TYPE_FILESYSTEM;
}
if ((props = zfs_valid_proplist(hdl, type, props, zoned,
zhp, errbuf)) == NULL)
return (-1);
if (zcmd_write_src_nvlist(hdl, &zc, props) != 0) {
nvlist_free(props);
return (-1);
}
nvlist_free(props);
}
(void) strlcpy(zc.zc_name, target, sizeof (zc.zc_name));
(void) strlcpy(zc.zc_value, zhp->zfs_name, sizeof (zc.zc_value));
ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_CREATE, &zc);
zcmd_free_nvlists(&zc);
ret = lzc_clone(target, zhp->zfs_name, props);
nvlist_free(props);
if (ret != 0) {
switch (errno) {
@ -3411,74 +3370,134 @@ zfs_promote(zfs_handle_t *zhp)
return (ret);
}
typedef struct snapdata {
nvlist_t *sd_nvl;
const char *sd_snapname;
} snapdata_t;
static int
zfs_snapshot_cb(zfs_handle_t *zhp, void *arg)
{
snapdata_t *sd = arg;
char name[ZFS_MAXNAMELEN];
int rv = 0;
(void) snprintf(name, sizeof (name),
"%s@%s", zfs_get_name(zhp), sd->sd_snapname);
fnvlist_add_boolean(sd->sd_nvl, name);
rv = zfs_iter_filesystems(zhp, zfs_snapshot_cb, sd);
zfs_close(zhp);
return (rv);
}
/*
* Takes a snapshot of the given dataset.
* Creates snapshots. The keys in the snaps nvlist are the snapshots to be
* created.
*/
int
zfs_snapshot_nvl(libzfs_handle_t *hdl, nvlist_t *snaps, nvlist_t *props)
{
int ret;
char errbuf[1024];
nvpair_t *elem;
nvlist_t *errors;
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
"cannot create snapshots "));
elem = NULL;
while ((elem = nvlist_next_nvpair(snaps, elem)) != NULL) {
const char *snapname = nvpair_name(elem);
/* validate the target name */
if (!zfs_validate_name(hdl, snapname, ZFS_TYPE_SNAPSHOT,
B_TRUE)) {
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN,
"cannot create snapshot '%s'"), snapname);
return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
}
}
if (props != NULL &&
(props = zfs_valid_proplist(hdl, ZFS_TYPE_SNAPSHOT,
props, B_FALSE, NULL, errbuf)) == NULL) {
return (-1);
}
ret = lzc_snapshot(snaps, props, &errors);
if (ret != 0) {
boolean_t printed = B_FALSE;
for (elem = nvlist_next_nvpair(errors, NULL);
elem != NULL;
elem = nvlist_next_nvpair(errors, elem)) {
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN,
"cannot create snapshot '%s'"), nvpair_name(elem));
(void) zfs_standard_error(hdl,
fnvpair_value_int32(elem), errbuf);
printed = B_TRUE;
}
if (!printed) {
switch (ret) {
case EXDEV:
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"multiple snapshots of same "
"fs not allowed"));
(void) zfs_error(hdl, EZFS_EXISTS, errbuf);
break;
default:
(void) zfs_standard_error(hdl, ret, errbuf);
}
}
}
nvlist_free(props);
nvlist_free(errors);
return (ret);
}
int
zfs_snapshot(libzfs_handle_t *hdl, const char *path, boolean_t recursive,
nvlist_t *props)
{
const char *delim;
char parent[ZFS_MAXNAMELEN];
zfs_handle_t *zhp;
zfs_cmd_t zc = { 0 };
int ret;
snapdata_t sd = { 0 };
char fsname[ZFS_MAXNAMELEN];
char *cp;
zfs_handle_t *zhp;
char errbuf[1024];
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
"cannot snapshot '%s'"), path);
"cannot snapshot %s"), path);
/* validate the target name */
if (!zfs_validate_name(hdl, path, ZFS_TYPE_SNAPSHOT, B_TRUE))
return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
if (props) {
if ((props = zfs_valid_proplist(hdl, ZFS_TYPE_SNAPSHOT,
props, B_FALSE, NULL, errbuf)) == NULL)
return (-1);
(void) strlcpy(fsname, path, sizeof (fsname));
cp = strchr(fsname, '@');
*cp = '\0';
sd.sd_snapname = cp + 1;
if (zcmd_write_src_nvlist(hdl, &zc, props) != 0) {
nvlist_free(props);
return (-1);
}
nvlist_free(props);
}
/* make sure the parent exists and is of the appropriate type */
delim = strchr(path, '@');
(void) strncpy(parent, path, delim - path);
parent[delim - path] = '\0';
if ((zhp = zfs_open(hdl, parent, ZFS_TYPE_FILESYSTEM |
if ((zhp = zfs_open(hdl, fsname, ZFS_TYPE_FILESYSTEM |
ZFS_TYPE_VOLUME)) == NULL) {
zcmd_free_nvlists(&zc);
return (-1);
}
(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
(void) strlcpy(zc.zc_value, delim+1, sizeof (zc.zc_value));
if (ZFS_IS_VOLUME(zhp))
zc.zc_objset_type = DMU_OST_ZVOL;
else
zc.zc_objset_type = DMU_OST_ZFS;
zc.zc_cookie = recursive;
ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SNAPSHOT, &zc);
zcmd_free_nvlists(&zc);
/*
* if it was recursive, the one that actually failed will be in
* zc.zc_name.
*/
if (ret != 0) {
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
"cannot create snapshot '%s@%s'"), zc.zc_name, zc.zc_value);
(void) zfs_standard_error(hdl, errno, errbuf);
verify(nvlist_alloc(&sd.sd_nvl, NV_UNIQUE_NAME, 0) == 0);
if (recursive) {
(void) zfs_snapshot_cb(zfs_handle_dup(zhp), &sd);
} else {
fnvlist_add_boolean(sd.sd_nvl, path);
}
ret = zfs_snapshot_nvl(hdl, sd.sd_nvl, props);
nvlist_free(sd.sd_nvl);
zfs_close(zhp);
return (ret);
}
@ -3506,17 +3525,13 @@ rollback_destroy(zfs_handle_t *zhp, void *data)
zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT &&
zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) >
cbp->cb_create) {
char *logstr;
cbp->cb_dependent = B_TRUE;
cbp->cb_error |= zfs_iter_dependents(zhp, B_FALSE,
rollback_destroy, cbp);
cbp->cb_dependent = B_FALSE;
logstr = zhp->zfs_hdl->libzfs_log_str;
zhp->zfs_hdl->libzfs_log_str = NULL;
cbp->cb_error |= zfs_destroy(zhp, B_FALSE);
zhp->zfs_hdl->libzfs_log_str = logstr;
}
} else {
/* We must destroy this clone; first unmount it */
@ -4120,7 +4135,7 @@ zfs_userspace(zfs_handle_t *zhp, zfs_userquota_prop_t type,
zc.zc_nvlist_dst_size = sizeof (buf);
if (zfs_ioctl(hdl, ZFS_IOC_USERSPACE_MANY, &zc) != 0) {
char errbuf[ZFS_MAXNAMELEN + 32];
char errbuf[1024];
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN,
@ -4142,37 +4157,83 @@ zfs_userspace(zfs_handle_t *zhp, zfs_userquota_prop_t type,
return (0);
}
struct holdarg {
nvlist_t *nvl;
const char *snapname;
const char *tag;
boolean_t recursive;
};
static int
zfs_hold_one(zfs_handle_t *zhp, void *arg)
{
struct holdarg *ha = arg;
zfs_handle_t *szhp;
char name[ZFS_MAXNAMELEN];
int rv = 0;
(void) snprintf(name, sizeof (name),
"%s@%s", zhp->zfs_name, ha->snapname);
szhp = make_dataset_handle(zhp->zfs_hdl, name);
if (szhp) {
fnvlist_add_string(ha->nvl, name, ha->tag);
zfs_close(szhp);
}
if (ha->recursive)
rv = zfs_iter_filesystems(zhp, zfs_hold_one, ha);
zfs_close(zhp);
return (rv);
}
int
zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag,
boolean_t recursive, boolean_t temphold, boolean_t enoent_ok,
int cleanup_fd, uint64_t dsobj, uint64_t createtxg)
boolean_t recursive, boolean_t enoent_ok, int cleanup_fd)
{
zfs_cmd_t zc = { 0 };
int ret;
struct holdarg ha;
nvlist_t *errors;
libzfs_handle_t *hdl = zhp->zfs_hdl;
char errbuf[1024];
nvpair_t *elem;
ASSERT(!recursive || dsobj == 0);
ha.nvl = fnvlist_alloc();
ha.snapname = snapname;
ha.tag = tag;
ha.recursive = recursive;
(void) zfs_hold_one(zfs_handle_dup(zhp), &ha);
ret = lzc_hold(ha.nvl, cleanup_fd, &errors);
fnvlist_free(ha.nvl);
(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
(void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
if (strlcpy(zc.zc_string, tag, sizeof (zc.zc_string))
>= sizeof (zc.zc_string))
return (zfs_error(hdl, EZFS_TAGTOOLONG, tag));
zc.zc_cookie = recursive;
zc.zc_temphold = temphold;
zc.zc_cleanup_fd = cleanup_fd;
zc.zc_sendobj = dsobj;
zc.zc_createtxg = createtxg;
if (ret == 0)
return (0);
if (zfs_ioctl(hdl, ZFS_IOC_HOLD, &zc) != 0) {
char errbuf[ZFS_MAXNAMELEN+32];
if (nvlist_next_nvpair(errors, NULL) == NULL) {
/* no hold-specific errors */
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot hold"));
switch (ret) {
case ENOTSUP:
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"pool must be upgraded"));
(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
break;
case EINVAL:
(void) zfs_error(hdl, EZFS_BADTYPE, errbuf);
break;
default:
(void) zfs_standard_error(hdl, ret, errbuf);
}
}
/*
* if it was recursive, the one that actually failed will be in
* zc.zc_name.
*/
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
"cannot hold '%s@%s'"), zc.zc_name, snapname);
switch (errno) {
for (elem = nvlist_next_nvpair(errors, NULL);
elem != NULL;
elem = nvlist_next_nvpair(errors, elem)) {
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN,
"cannot hold snapshot '%s'"), nvpair_name(elem));
switch (fnvpair_value_int32(elem)) {
case E2BIG:
/*
* Temporary tags wind up having the ds object id
@ -4180,66 +4241,122 @@ zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag,
* above, it's still possible for the tag to wind
* up being slightly too long.
*/
return (zfs_error(hdl, EZFS_TAGTOOLONG, errbuf));
case ENOTSUP:
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"pool must be upgraded"));
return (zfs_error(hdl, EZFS_BADVERSION, errbuf));
(void) zfs_error(hdl, EZFS_TAGTOOLONG, errbuf);
break;
case EINVAL:
return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
(void) zfs_error(hdl, EZFS_BADTYPE, errbuf);
break;
case EEXIST:
return (zfs_error(hdl, EZFS_REFTAG_HOLD, errbuf));
(void) zfs_error(hdl, EZFS_REFTAG_HOLD, errbuf);
break;
case ENOENT:
if (enoent_ok)
return (ENOENT);
/* FALLTHROUGH */
default:
return (zfs_standard_error_fmt(hdl, errno, errbuf));
(void) zfs_standard_error(hdl,
fnvpair_value_int32(elem), errbuf);
}
}
return (0);
fnvlist_free(errors);
return (ret);
}
struct releasearg {
nvlist_t *nvl;
const char *snapname;
const char *tag;
boolean_t recursive;
};
static int
zfs_release_one(zfs_handle_t *zhp, void *arg)
{
struct holdarg *ha = arg;
zfs_handle_t *szhp;
char name[ZFS_MAXNAMELEN];
int rv = 0;
(void) snprintf(name, sizeof (name),
"%s@%s", zhp->zfs_name, ha->snapname);
szhp = make_dataset_handle(zhp->zfs_hdl, name);
if (szhp) {
nvlist_t *holds = fnvlist_alloc();
fnvlist_add_boolean(holds, ha->tag);
fnvlist_add_nvlist(ha->nvl, name, holds);
zfs_close(szhp);
}
if (ha->recursive)
rv = zfs_iter_filesystems(zhp, zfs_release_one, ha);
zfs_close(zhp);
return (rv);
}
int
zfs_release(zfs_handle_t *zhp, const char *snapname, const char *tag,
boolean_t recursive)
{
zfs_cmd_t zc = { 0 };
int ret;
struct holdarg ha;
nvlist_t *errors;
nvpair_t *elem;
libzfs_handle_t *hdl = zhp->zfs_hdl;
(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
(void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
if (strlcpy(zc.zc_string, tag, sizeof (zc.zc_string))
>= sizeof (zc.zc_string))
return (zfs_error(hdl, EZFS_TAGTOOLONG, tag));
zc.zc_cookie = recursive;
ha.nvl = fnvlist_alloc();
ha.snapname = snapname;
ha.tag = tag;
ha.recursive = recursive;
(void) zfs_release_one(zfs_handle_dup(zhp), &ha);
ret = lzc_release(ha.nvl, &errors);
fnvlist_free(ha.nvl);
if (zfs_ioctl(hdl, ZFS_IOC_RELEASE, &zc) != 0) {
char errbuf[ZFS_MAXNAMELEN+32];
if (ret == 0)
return (0);
if (nvlist_next_nvpair(errors, NULL) == NULL) {
/* no hold-specific errors */
char errbuf[1024];
/*
* if it was recursive, the one that actually failed will be in
* zc.zc_name.
*/
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
"cannot release '%s' from '%s@%s'"), tag, zc.zc_name,
snapname);
"cannot release"));
switch (errno) {
case ESRCH:
return (zfs_error(hdl, EZFS_REFTAG_RELE, errbuf));
case ENOTSUP:
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"pool must be upgraded"));
return (zfs_error(hdl, EZFS_BADVERSION, errbuf));
case EINVAL:
return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
break;
default:
return (zfs_standard_error_fmt(hdl, errno, errbuf));
(void) zfs_standard_error_fmt(hdl, errno, errbuf);
}
}
return (0);
for (elem = nvlist_next_nvpair(errors, NULL);
elem != NULL;
elem = nvlist_next_nvpair(errors, elem)) {
char errbuf[1024];
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN,
"cannot release hold from snapshot '%s'"),
nvpair_name(elem));
switch (fnvpair_value_int32(elem)) {
case ESRCH:
(void) zfs_error(hdl, EZFS_REFTAG_RELE, errbuf);
break;
case EINVAL:
(void) zfs_error(hdl, EZFS_BADTYPE, errbuf);
break;
default:
(void) zfs_standard_error_fmt(hdl,
fnvpair_value_int32(elem), errbuf);
}
}
fnvlist_free(errors);
return (ret);
}
int
@ -4250,7 +4367,7 @@ zfs_get_fsacl(zfs_handle_t *zhp, nvlist_t **nvl)
int nvsz = 2048;
void *nvbuf;
int err = 0;
char errbuf[ZFS_MAXNAMELEN+32];
char errbuf[1024];
assert(zhp->zfs_type == ZFS_TYPE_VOLUME ||
zhp->zfs_type == ZFS_TYPE_FILESYSTEM);
@ -4315,7 +4432,7 @@ zfs_set_fsacl(zfs_handle_t *zhp, boolean_t un, nvlist_t *nvl)
zfs_cmd_t zc = { 0 };
libzfs_handle_t *hdl = zhp->zfs_hdl;
char *nvbuf;
char errbuf[ZFS_MAXNAMELEN+32];
char errbuf[1024];
size_t nvsz;
int err;
@ -4366,38 +4483,18 @@ zfs_set_fsacl(zfs_handle_t *zhp, boolean_t un, nvlist_t *nvl)
int
zfs_get_holds(zfs_handle_t *zhp, nvlist_t **nvl)
{
zfs_cmd_t zc = { 0 };
libzfs_handle_t *hdl = zhp->zfs_hdl;
int nvsz = 2048;
void *nvbuf;
int err = 0;
char errbuf[ZFS_MAXNAMELEN+32];
int err;
char errbuf[1024];
assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
err = lzc_get_holds(zhp->zfs_name, nvl);
tryagain:
if (err != 0) {
libzfs_handle_t *hdl = zhp->zfs_hdl;
nvbuf = malloc(nvsz);
if (nvbuf == NULL) {
err = (zfs_error(hdl, EZFS_NOMEM, strerror(errno)));
goto out;
}
zc.zc_nvlist_dst_size = nvsz;
zc.zc_nvlist_dst = (uintptr_t)nvbuf;
(void) strlcpy(zc.zc_name, zhp->zfs_name, ZFS_MAXNAMELEN);
if (zfs_ioctl(hdl, ZFS_IOC_GET_HOLDS, &zc) != 0) {
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot get holds for '%s'"),
zc.zc_name);
switch (errno) {
case ENOMEM:
free(nvbuf);
nvsz = zc.zc_nvlist_dst_size;
goto tryagain;
zhp->zfs_name);
switch (err) {
case ENOTSUP:
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"pool must be upgraded"));
@ -4413,19 +4510,8 @@ zfs_get_holds(zfs_handle_t *zhp, nvlist_t **nvl)
err = zfs_standard_error_fmt(hdl, errno, errbuf);
break;
}
} else {
/* success */
int rc = nvlist_unpack(nvbuf, zc.zc_nvlist_dst_size, nvl, 0);
if (rc) {
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot get holds for '%s'"),
zc.zc_name);
err = zfs_standard_error_fmt(hdl, rc, errbuf);
}
}
free(nvbuf);
out:
return (err);
}

View File

@ -23,12 +23,12 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>.
* All rights reserved.
* Copyright (c) 2011 by Delphix. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
* Copyright (c) 2013 Martin Matuska <mm@FreeBSD.org>. All rights reserved.
*/
#ifndef _LIBFS_IMPL_H
#define _LIBFS_IMPL_H
#ifndef _LIBZFS_IMPL_H
#define _LIBZFS_IMPL_H
#include <sys/dmu.h>
#include <sys/fs/zfs.h>
@ -39,8 +39,8 @@
#include <libshare.h>
#include <libuutil.h>
#include <libzfs.h>
#include "zfs_ioctl_compat.h"
#include <libzfs_core.h>
#include <libzfs_compat.h>
#ifdef __cplusplus
extern "C" {
@ -70,7 +70,6 @@ struct libzfs_handle {
int libzfs_desc_active;
char libzfs_action[1024];
char libzfs_desc[1024];
char *libzfs_log_str;
int libzfs_printerr;
int libzfs_storeerr; /* stuff error messages into buffer */
void *libzfs_sharehdl; /* libshare handle */
@ -215,62 +214,8 @@ extern int zfs_unshare_proto(zfs_handle_t *,
extern void libzfs_fru_clear(libzfs_handle_t *, boolean_t);
#ifndef sun
static int zfs_kernel_version = 0;
static int zfs_ioctl_version = 0;
/*
* This is FreeBSD version of ioctl, because Solaris' ioctl() updates
* zc_nvlist_dst_size even if an error is returned, on FreeBSD if an
* error is returned zc_nvlist_dst_size won't be updated.
*/
static __inline int
zcmd_ioctl(int fd, unsigned long cmd, zfs_cmd_t *zc)
{
size_t oldsize, zfs_kernel_version_size, zfs_ioctl_version_size;
int version, ret, cflag = ZFS_CMD_COMPAT_NONE;
zfs_ioctl_version_size = sizeof(zfs_ioctl_version);
if (zfs_ioctl_version == 0) {
sysctlbyname("vfs.zfs.version.ioctl", &zfs_ioctl_version,
&zfs_ioctl_version_size, NULL, 0);
}
/*
* If vfs.zfs.version.ioctl is not defined, assume we have v28
* compatible binaries and use vfs.zfs.version.spa to test for v15
*/
if (zfs_ioctl_version < ZFS_IOCVER_DEADMAN) {
cflag = ZFS_CMD_COMPAT_V28;
zfs_kernel_version_size = sizeof(zfs_kernel_version);
if (zfs_kernel_version == 0) {
sysctlbyname("vfs.zfs.version.spa",
&zfs_kernel_version,
&zfs_kernel_version_size, NULL, 0);
}
if (zfs_kernel_version == SPA_VERSION_15 ||
zfs_kernel_version == SPA_VERSION_14 ||
zfs_kernel_version == SPA_VERSION_13)
cflag = ZFS_CMD_COMPAT_V15;
}
oldsize = zc->zc_nvlist_dst_size;
ret = zcmd_ioctl_compat(fd, cmd, zc, cflag);
if (ret == 0 && oldsize < zc->zc_nvlist_dst_size) {
ret = -1;
errno = ENOMEM;
}
return (ret);
}
#define ioctl(fd, cmd, zc) zcmd_ioctl((fd), (cmd), (zc))
#endif /* !sun */
#ifdef __cplusplus
}
#endif
#endif /* _LIBFS_IMPL_H */
#endif /* _LIBZFS_IMPL_H */

View File

@ -22,7 +22,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2010 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2011 by Delphix. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
* Copyright (c) 2012 Pawel Jakub Dawidek <pawel@dawidek.net>.
* All rights reserved.
*/
@ -308,12 +308,11 @@ int
zfs_iter_snapspec(zfs_handle_t *fs_zhp, const char *spec_orig,
zfs_iter_f func, void *arg)
{
char buf[ZFS_MAXNAMELEN];
char *comma_separated, *cp;
char *buf, *comma_separated, *cp;
int err = 0;
int ret = 0;
(void) strlcpy(buf, spec_orig, sizeof (buf));
buf = zfs_strdup(fs_zhp->zfs_hdl, spec_orig);
cp = buf;
while ((comma_separated = strsep(&cp, ",")) != NULL) {
@ -371,6 +370,7 @@ zfs_iter_snapspec(zfs_handle_t *fs_zhp, const char *spec_orig,
}
}
free(buf);
return (ret);
}

View File

@ -36,6 +36,7 @@
#include <stdlib.h>
#include <strings.h>
#include <unistd.h>
#include <libgen.h>
#include <sys/zfs_ioctl.h>
#include <dlfcn.h>
@ -1237,7 +1238,7 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
* datasets left in the pool.
*/
int
zpool_destroy(zpool_handle_t *zhp)
zpool_destroy(zpool_handle_t *zhp, const char *log_str)
{
zfs_cmd_t zc = { 0 };
zfs_handle_t *zfp = NULL;
@ -1249,6 +1250,7 @@ zpool_destroy(zpool_handle_t *zhp)
return (-1);
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
zc.zc_history = (uint64_t)(uintptr_t)log_str;
if (zfs_ioctl(hdl, ZFS_IOC_POOL_DESTROY, &zc) != 0) {
(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
@ -1403,8 +1405,9 @@ zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
* Exports the pool from the system. The caller must ensure that there are no
* mounted datasets in the pool.
*/
int
zpool_export_common(zpool_handle_t *zhp, boolean_t force, boolean_t hardforce)
static int
zpool_export_common(zpool_handle_t *zhp, boolean_t force, boolean_t hardforce,
const char *log_str)
{
zfs_cmd_t zc = { 0 };
char msg[1024];
@ -1415,6 +1418,7 @@ zpool_export_common(zpool_handle_t *zhp, boolean_t force, boolean_t hardforce)
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
zc.zc_cookie = force;
zc.zc_guid = hardforce;
zc.zc_history = (uint64_t)(uintptr_t)log_str;
if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_EXPORT, &zc) != 0) {
switch (errno) {
@ -1436,15 +1440,15 @@ zpool_export_common(zpool_handle_t *zhp, boolean_t force, boolean_t hardforce)
}
int
zpool_export(zpool_handle_t *zhp, boolean_t force)
zpool_export(zpool_handle_t *zhp, boolean_t force, const char *log_str)
{
return (zpool_export_common(zhp, force, B_FALSE));
return (zpool_export_common(zhp, force, B_FALSE, log_str));
}
int
zpool_export_force(zpool_handle_t *zhp)
zpool_export_force(zpool_handle_t *zhp, const char *log_str)
{
return (zpool_export_common(zhp, B_TRUE, B_TRUE));
return (zpool_export_common(zhp, B_TRUE, B_TRUE, log_str));
}
static void
@ -3632,40 +3636,30 @@ zpool_upgrade(zpool_handle_t *zhp, uint64_t new_version)
}
void
zpool_set_history_str(const char *subcommand, int argc, char **argv,
char *history_str)
zfs_save_arguments(int argc, char **argv, char *string, int len)
{
int i;
(void) strlcpy(history_str, subcommand, HIS_MAX_RECORD_LEN);
for (i = 1; i < argc; i++) {
if (strlen(history_str) + 1 + strlen(argv[i]) >
HIS_MAX_RECORD_LEN)
break;
(void) strlcat(history_str, " ", HIS_MAX_RECORD_LEN);
(void) strlcat(history_str, argv[i], HIS_MAX_RECORD_LEN);
(void) strlcpy(string, basename(argv[0]), len);
for (int i = 1; i < argc; i++) {
(void) strlcat(string, " ", len);
(void) strlcat(string, argv[i], len);
}
}
/*
* Stage command history for logging.
*/
int
zpool_stage_history(libzfs_handle_t *hdl, const char *history_str)
zpool_log_history(libzfs_handle_t *hdl, const char *message)
{
if (history_str == NULL)
return (EINVAL);
zfs_cmd_t zc = { 0 };
nvlist_t *args;
int err;
if (strlen(history_str) > HIS_MAX_RECORD_LEN)
return (EINVAL);
if (hdl->libzfs_log_str != NULL)
free(hdl->libzfs_log_str);
if ((hdl->libzfs_log_str = strdup(history_str)) == NULL)
return (no_memory(hdl));
return (0);
args = fnvlist_alloc();
fnvlist_add_string(args, "message", message);
err = zcmd_write_src_nvlist(hdl, &zc, args);
if (err == 0)
err = ioctl(hdl->libzfs_fd, ZFS_IOC_LOG_HISTORY, &zc);
nvlist_free(args);
zcmd_free_nvlists(&zc);
return (err);
}
/*

View File

@ -53,6 +53,10 @@
#include <sys/zio_checksum.h>
#include <sys/ddt.h>
#ifdef __FreeBSD__
extern int zfs_ioctl_version;
#endif
/* in libzfs_dataset.c */
extern void zfs_setprop_error(libzfs_handle_t *, zfs_prop_t, int, char *);
/* We need to use something for ENODATA. */
@ -978,9 +982,7 @@ hold_for_send(zfs_handle_t *zhp, send_dump_data_t *sdd)
*/
if (pzhp) {
error = zfs_hold(pzhp, thissnap, sdd->holdtag,
B_FALSE, B_TRUE, B_TRUE, sdd->cleanup_fd,
zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID),
zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG));
B_FALSE, B_TRUE, sdd->cleanup_fd);
zfs_close(pzhp);
}
@ -1719,12 +1721,11 @@ recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname,
err = ENOENT;
}
if (err != 0 && strncmp(name+baselen, "recv-", 5) != 0) {
if (err != 0 && strncmp(name + baselen, "recv-", 5) != 0) {
seq++;
(void) strncpy(newname, name, baselen);
(void) snprintf(newname+baselen, ZFS_MAXNAMELEN-baselen,
"recv-%u-%u", getpid(), seq);
(void) snprintf(newname, ZFS_MAXNAMELEN, "%.*srecv-%u-%u",
baselen, name, getpid(), seq);
(void) strlcpy(zc.zc_value, newname, sizeof (zc.zc_value));
if (flags->verbose) {
@ -2676,9 +2677,17 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
/*
* Determine name of destination snapshot, store in zc_value.
*/
(void) strcpy(zc.zc_top_ds, tosnap);
(void) strcpy(zc.zc_value, tosnap);
(void) strncat(zc.zc_value, chopprefix, sizeof (zc.zc_value));
#ifdef __FreeBSD__
if (zfs_ioctl_version == ZFS_IOCVER_UNDEF)
zfs_ioctl_version = get_zfs_ioctl_version();
/*
* For forward compatibility hide tosnap in zc_value
*/
if (zfs_ioctl_version < ZFS_IOCVER_LZC)
(void) strcpy(zc.zc_value + strlen(zc.zc_value) + 1, tosnap);
#endif
free(cp);
if (!zfs_name_valid(zc.zc_value, ZFS_TYPE_SNAPSHOT)) {
zcmd_free_nvlists(&zc);

View File

@ -48,6 +48,7 @@
#include <sys/types.h>
#include <libzfs.h>
#include <libzfs_core.h>
#include "libzfs_impl.h"
#include "zfs_prop.h"
@ -659,6 +660,14 @@ libzfs_init(void)
hdl->libzfs_sharetab = fopen(ZFS_EXPORTS_PATH, "r");
if (libzfs_core_init() != 0) {
(void) close(hdl->libzfs_fd);
(void) fclose(hdl->libzfs_mnttab);
(void) fclose(hdl->libzfs_sharetab);
free(hdl);
return (NULL);
}
zfs_prop_init();
zpool_prop_init();
zpool_feature_init();
@ -676,14 +685,13 @@ libzfs_fini(libzfs_handle_t *hdl)
if (hdl->libzfs_sharetab)
(void) fclose(hdl->libzfs_sharetab);
zfs_uninit_libshare(hdl);
if (hdl->libzfs_log_str)
(void) free(hdl->libzfs_log_str);
zpool_free_handles(hdl);
#ifdef sun
libzfs_fru_clear(hdl, B_TRUE);
#endif
namespace_clear(hdl);
libzfs_mnttab_fini(hdl);
libzfs_core_fini();
free(hdl);
}
@ -857,19 +865,9 @@ zcmd_read_dst_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, nvlist_t **nvlp)
}
int
zfs_ioctl(libzfs_handle_t *hdl, unsigned long request, zfs_cmd_t *zc)
zfs_ioctl(libzfs_handle_t *hdl, int request, zfs_cmd_t *zc)
{
int error;
zc->zc_history = (uint64_t)(uintptr_t)hdl->libzfs_log_str;
error = ioctl(hdl->libzfs_fd, request, zc);
if (hdl->libzfs_log_str) {
free(hdl->libzfs_log_str);
hdl->libzfs_log_str = NULL;
}
zc->zc_history = 0;
return (error);
return (ioctl(hdl->libzfs_fd, request, zc));
}
/*

View File

@ -0,0 +1,618 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
/*
* LibZFS_Core (lzc) is intended to replace most functionality in libzfs.
* It has the following characteristics:
*
* - Thread Safe. libzfs_core is accessible concurrently from multiple
* threads. This is accomplished primarily by avoiding global data
* (e.g. caching). Since it's thread-safe, there is no reason for a
* process to have multiple libzfs "instances". Therefore, we store
* our few pieces of data (e.g. the file descriptor) in global
* variables. The fd is reference-counted so that the libzfs_core
* library can be "initialized" multiple times (e.g. by different
* consumers within the same process).
*
* - Committed Interface. The libzfs_core interface will be committed,
* therefore consumers can compile against it and be confident that
* their code will continue to work on future releases of this code.
* Currently, the interface is Evolving (not Committed), but we intend
* to commit to it once it is more complete and we determine that it
* meets the needs of all consumers.
*
* - Programatic Error Handling. libzfs_core communicates errors with
* defined error numbers, and doesn't print anything to stdout/stderr.
*
* - Thin Layer. libzfs_core is a thin layer, marshaling arguments
* to/from the kernel ioctls. There is generally a 1:1 correspondence
* between libzfs_core functions and ioctls to /dev/zfs.
*
* - Clear Atomicity. Because libzfs_core functions are generally 1:1
* with kernel ioctls, and kernel ioctls are general atomic, each
* libzfs_core function is atomic. For example, creating multiple
* snapshots with a single call to lzc_snapshot() is atomic -- it
* can't fail with only some of the requested snapshots created, even
* in the event of power loss or system crash.
*
* - Continued libzfs Support. Some higher-level operations (e.g.
* support for "zfs send -R") are too complicated to fit the scope of
* libzfs_core. This functionality will continue to live in libzfs.
* Where appropriate, libzfs will use the underlying atomic operations
* of libzfs_core. For example, libzfs may implement "zfs send -R |
* zfs receive" by using individual "send one snapshot", rename,
* destroy, and "receive one snapshot" operations in libzfs_core.
* /sbin/zfs and /zbin/zpool will link with both libzfs and
* libzfs_core. Other consumers should aim to use only libzfs_core,
* since that will be the supported, stable interface going forwards.
*/
#define _IN_LIBZFS_CORE_
#include <libzfs_core.h>
#include <ctype.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <fcntl.h>
#include <pthread.h>
#include <sys/nvpair.h>
#include <sys/param.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/zfs_ioctl.h>
#include "libzfs_core_compat.h"
#include "libzfs_compat.h"
#ifdef __FreeBSD__
extern int zfs_ioctl_version;
#endif
static int g_fd;
static pthread_mutex_t g_lock = PTHREAD_MUTEX_INITIALIZER;
static int g_refcount;
int
libzfs_core_init(void)
{
(void) pthread_mutex_lock(&g_lock);
if (g_refcount == 0) {
g_fd = open("/dev/zfs", O_RDWR);
if (g_fd < 0) {
(void) pthread_mutex_unlock(&g_lock);
return (errno);
}
}
g_refcount++;
(void) pthread_mutex_unlock(&g_lock);
return (0);
}
void
libzfs_core_fini(void)
{
(void) pthread_mutex_lock(&g_lock);
ASSERT3S(g_refcount, >, 0);
g_refcount--;
if (g_refcount == 0)
(void) close(g_fd);
(void) pthread_mutex_unlock(&g_lock);
}
static int
lzc_ioctl(zfs_ioc_t ioc, const char *name,
nvlist_t *source, nvlist_t **resultp)
{
zfs_cmd_t zc = { 0 };
int error = 0;
char *packed;
#ifdef __FreeBSD__
nvlist_t *oldsource;
#endif
size_t size;
ASSERT3S(g_refcount, >, 0);
(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
#ifdef __FreeBSD__
if (zfs_ioctl_version == ZFS_IOCVER_UNDEF)
zfs_ioctl_version = get_zfs_ioctl_version();
if (zfs_ioctl_version < ZFS_IOCVER_LZC) {
oldsource = source;
error = lzc_compat_pre(&zc, &ioc, &source);
if (error)
return (error);
}
#endif
packed = fnvlist_pack(source, &size);
zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed;
zc.zc_nvlist_src_size = size;
if (resultp != NULL) {
*resultp = NULL;
zc.zc_nvlist_dst_size = MAX(size * 2, 128 * 1024);
zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
malloc(zc.zc_nvlist_dst_size);
#ifdef illumos
if (zc.zc_nvlist_dst == NULL) {
#else
if (zc.zc_nvlist_dst == 0) {
#endif
error = ENOMEM;
goto out;
}
}
while (ioctl(g_fd, ioc, &zc) != 0) {
if (errno == ENOMEM && resultp != NULL) {
free((void *)(uintptr_t)zc.zc_nvlist_dst);
zc.zc_nvlist_dst_size *= 2;
zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
malloc(zc.zc_nvlist_dst_size);
#ifdef illumos
if (zc.zc_nvlist_dst == NULL) {
#else
if (zc.zc_nvlist_dst == 0) {
#endif
error = ENOMEM;
goto out;
}
} else {
error = errno;
break;
}
}
#ifdef __FreeBSD__
if (zfs_ioctl_version < ZFS_IOCVER_LZC)
lzc_compat_post(&zc, ioc);
#endif
if (zc.zc_nvlist_dst_filled) {
*resultp = fnvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst,
zc.zc_nvlist_dst_size);
}
#ifdef __FreeBSD__
if (zfs_ioctl_version < ZFS_IOCVER_LZC)
lzc_compat_outnvl(&zc, ioc, resultp);
#endif
out:
#ifdef __FreeBSD__
if (zfs_ioctl_version < ZFS_IOCVER_LZC) {
if (source != oldsource)
nvlist_free(source);
source = oldsource;
}
#endif
fnvlist_pack_free(packed, size);
free((void *)(uintptr_t)zc.zc_nvlist_dst);
return (error);
}
int
lzc_create(const char *fsname, dmu_objset_type_t type, nvlist_t *props)
{
int error;
nvlist_t *args = fnvlist_alloc();
fnvlist_add_int32(args, "type", type);
if (props != NULL)
fnvlist_add_nvlist(args, "props", props);
error = lzc_ioctl(ZFS_IOC_CREATE, fsname, args, NULL);
nvlist_free(args);
return (error);
}
int
lzc_clone(const char *fsname, const char *origin,
nvlist_t *props)
{
int error;
nvlist_t *args = fnvlist_alloc();
fnvlist_add_string(args, "origin", origin);
if (props != NULL)
fnvlist_add_nvlist(args, "props", props);
error = lzc_ioctl(ZFS_IOC_CLONE, fsname, args, NULL);
nvlist_free(args);
return (error);
}
/*
* Creates snapshots.
*
* The keys in the snaps nvlist are the snapshots to be created.
* They must all be in the same pool.
*
* The props nvlist is properties to set. Currently only user properties
* are supported. { user:prop_name -> string value }
*
* The returned results nvlist will have an entry for each snapshot that failed.
* The value will be the (int32) error code.
*
* The return value will be 0 if all snapshots were created, otherwise it will
* be the errno of a (unspecified) snapshot that failed.
*/
int
lzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist)
{
nvpair_t *elem;
nvlist_t *args;
int error;
char pool[MAXNAMELEN];
*errlist = NULL;
/* determine the pool name */
elem = nvlist_next_nvpair(snaps, NULL);
if (elem == NULL)
return (0);
(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
pool[strcspn(pool, "/@")] = '\0';
args = fnvlist_alloc();
fnvlist_add_nvlist(args, "snaps", snaps);
if (props != NULL)
fnvlist_add_nvlist(args, "props", props);
error = lzc_ioctl(ZFS_IOC_SNAPSHOT, pool, args, errlist);
nvlist_free(args);
return (error);
}
/*
* Destroys snapshots.
*
* The keys in the snaps nvlist are the snapshots to be destroyed.
* They must all be in the same pool.
*
* Snapshots that do not exist will be silently ignored.
*
* If 'defer' is not set, and a snapshot has user holds or clones, the
* destroy operation will fail and none of the snapshots will be
* destroyed.
*
* If 'defer' is set, and a snapshot has user holds or clones, it will be
* marked for deferred destruction, and will be destroyed when the last hold
* or clone is removed/destroyed.
*
* The return value will be 0 if all snapshots were destroyed (or marked for
* later destruction if 'defer' is set) or didn't exist to begin with.
*
* Otherwise the return value will be the errno of a (unspecified) snapshot
* that failed, no snapshots will be destroyed, and the errlist will have an
* entry for each snapshot that failed. The value in the errlist will be
* the (int32) error code.
*/
int
lzc_destroy_snaps(nvlist_t *snaps, boolean_t defer, nvlist_t **errlist)
{
nvpair_t *elem;
nvlist_t *args;
int error;
char pool[MAXNAMELEN];
/* determine the pool name */
elem = nvlist_next_nvpair(snaps, NULL);
if (elem == NULL)
return (0);
(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
pool[strcspn(pool, "/@")] = '\0';
args = fnvlist_alloc();
fnvlist_add_nvlist(args, "snaps", snaps);
if (defer)
fnvlist_add_boolean(args, "defer");
error = lzc_ioctl(ZFS_IOC_DESTROY_SNAPS, pool, args, errlist);
nvlist_free(args);
return (error);
}
int
lzc_snaprange_space(const char *firstsnap, const char *lastsnap,
uint64_t *usedp)
{
nvlist_t *args;
nvlist_t *result;
int err;
char fs[MAXNAMELEN];
char *atp;
/* determine the fs name */
(void) strlcpy(fs, firstsnap, sizeof (fs));
atp = strchr(fs, '@');
if (atp == NULL)
return (EINVAL);
*atp = '\0';
args = fnvlist_alloc();
fnvlist_add_string(args, "firstsnap", firstsnap);
err = lzc_ioctl(ZFS_IOC_SPACE_SNAPS, lastsnap, args, &result);
nvlist_free(args);
if (err == 0)
*usedp = fnvlist_lookup_uint64(result, "used");
fnvlist_free(result);
return (err);
}
boolean_t
lzc_exists(const char *dataset)
{
/*
* The objset_stats ioctl is still legacy, so we need to construct our
* own zfs_cmd_t rather than using zfsc_ioctl().
*/
zfs_cmd_t zc = { 0 };
(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
return (ioctl(g_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0);
}
/*
* Create "user holds" on snapshots. If there is a hold on a snapshot,
* the snapshot can not be destroyed. (However, it can be marked for deletion
* by lzc_destroy_snaps(defer=B_TRUE).)
*
* The keys in the nvlist are snapshot names.
* The snapshots must all be in the same pool.
* The value is the name of the hold (string type).
*
* If cleanup_fd is not -1, it must be the result of open("/dev/zfs", O_EXCL).
* In this case, when the cleanup_fd is closed (including on process
* termination), the holds will be released. If the system is shut down
* uncleanly, the holds will be released when the pool is next opened
* or imported.
*
* The return value will be 0 if all holds were created. Otherwise the return
* value will be the errno of a (unspecified) hold that failed, no holds will
* be created, and the errlist will have an entry for each hold that
* failed (name = snapshot). The value in the errlist will be the error
* code (int32).
*/
int
lzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist)
{
char pool[MAXNAMELEN];
nvlist_t *args;
nvpair_t *elem;
int error;
/* determine the pool name */
elem = nvlist_next_nvpair(holds, NULL);
if (elem == NULL)
return (0);
(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
pool[strcspn(pool, "/@")] = '\0';
args = fnvlist_alloc();
fnvlist_add_nvlist(args, "holds", holds);
if (cleanup_fd != -1)
fnvlist_add_int32(args, "cleanup_fd", cleanup_fd);
error = lzc_ioctl(ZFS_IOC_HOLD, pool, args, errlist);
nvlist_free(args);
return (error);
}
/*
* Release "user holds" on snapshots. If the snapshot has been marked for
* deferred destroy (by lzc_destroy_snaps(defer=B_TRUE)), it does not have
* any clones, and all the user holds are removed, then the snapshot will be
* destroyed.
*
* The keys in the nvlist are snapshot names.
* The snapshots must all be in the same pool.
* The value is a nvlist whose keys are the holds to remove.
*
* The return value will be 0 if all holds were removed.
* Otherwise the return value will be the errno of a (unspecified) release
* that failed, no holds will be released, and the errlist will have an
* entry for each snapshot that has failed releases (name = snapshot).
* The value in the errlist will be the error code (int32) of a failed release.
*/
int
lzc_release(nvlist_t *holds, nvlist_t **errlist)
{
char pool[MAXNAMELEN];
nvpair_t *elem;
/* determine the pool name */
elem = nvlist_next_nvpair(holds, NULL);
if (elem == NULL)
return (0);
(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
pool[strcspn(pool, "/@")] = '\0';
return (lzc_ioctl(ZFS_IOC_RELEASE, pool, holds, errlist));
}
/*
* Retrieve list of user holds on the specified snapshot.
*
* On success, *holdsp will be set to a nvlist which the caller must free.
* The keys are the names of the holds, and the value is the creation time
* of the hold (uint64) in seconds since the epoch.
*/
int
lzc_get_holds(const char *snapname, nvlist_t **holdsp)
{
int error;
nvlist_t *innvl = fnvlist_alloc();
error = lzc_ioctl(ZFS_IOC_GET_HOLDS, snapname, innvl, holdsp);
fnvlist_free(innvl);
return (error);
}
/*
* If fromsnap is NULL, a full (non-incremental) stream will be sent.
*/
int
lzc_send(const char *snapname, const char *fromsnap, int fd)
{
nvlist_t *args;
int err;
args = fnvlist_alloc();
fnvlist_add_int32(args, "fd", fd);
if (fromsnap != NULL)
fnvlist_add_string(args, "fromsnap", fromsnap);
err = lzc_ioctl(ZFS_IOC_SEND_NEW, snapname, args, NULL);
nvlist_free(args);
return (err);
}
/*
* If fromsnap is NULL, a full (non-incremental) stream will be estimated.
*/
int
lzc_send_space(const char *snapname, const char *fromsnap, uint64_t *spacep)
{
nvlist_t *args;
nvlist_t *result;
int err;
args = fnvlist_alloc();
if (fromsnap != NULL)
fnvlist_add_string(args, "fromsnap", fromsnap);
err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result);
nvlist_free(args);
if (err == 0)
*spacep = fnvlist_lookup_uint64(result, "space");
nvlist_free(result);
return (err);
}
static int
recv_read(int fd, void *buf, int ilen)
{
char *cp = buf;
int rv;
int len = ilen;
do {
rv = read(fd, cp, len);
cp += rv;
len -= rv;
} while (rv > 0);
if (rv < 0 || len != 0)
return (EIO);
return (0);
}
/*
* The simplest receive case: receive from the specified fd, creating the
* specified snapshot. Apply the specified properties a "received" properties
* (which can be overridden by locally-set properties). If the stream is a
* clone, its origin snapshot must be specified by 'origin'. The 'force'
* flag will cause the target filesystem to be rolled back or destroyed if
* necessary to receive.
*
* Return 0 on success or an errno on failure.
*
* Note: this interface does not work on dedup'd streams
* (those with DMU_BACKUP_FEATURE_DEDUP).
*/
int
lzc_receive(const char *snapname, nvlist_t *props, const char *origin,
boolean_t force, int fd)
{
/*
* The receive ioctl is still legacy, so we need to construct our own
* zfs_cmd_t rather than using zfsc_ioctl().
*/
zfs_cmd_t zc = { 0 };
char *atp;
char *packed = NULL;
size_t size;
dmu_replay_record_t drr;
int error;
ASSERT3S(g_refcount, >, 0);
/* zc_name is name of containing filesystem */
(void) strlcpy(zc.zc_name, snapname, sizeof (zc.zc_name));
atp = strchr(zc.zc_name, '@');
if (atp == NULL)
return (EINVAL);
*atp = '\0';
/* if the fs does not exist, try its parent. */
if (!lzc_exists(zc.zc_name)) {
char *slashp = strrchr(zc.zc_name, '/');
if (slashp == NULL)
return (ENOENT);
*slashp = '\0';
}
/* zc_value is full name of the snapshot to create */
(void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
if (props != NULL) {
/* zc_nvlist_src is props to set */
packed = fnvlist_pack(props, &size);
zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed;
zc.zc_nvlist_src_size = size;
}
/* zc_string is name of clone origin (if DRR_FLAG_CLONE) */
if (origin != NULL)
(void) strlcpy(zc.zc_string, origin, sizeof (zc.zc_string));
/* zc_begin_record is non-byteswapped BEGIN record */
error = recv_read(fd, &drr, sizeof (drr));
if (error != 0)
goto out;
zc.zc_begin_record = drr.drr_u.drr_begin;
/* zc_cookie is fd to read from */
zc.zc_cookie = fd;
/* zc guid is force flag */
zc.zc_guid = force;
/* zc_cleanup_fd is unused */
zc.zc_cleanup_fd = -1;
error = ioctl(g_fd, ZFS_IOC_RECV, &zc);
if (error != 0)
error = errno;
out:
if (packed != NULL)
fnvlist_pack_free(packed, size);
free((void*)(uintptr_t)zc.zc_nvlist_dst);
return (error);
}

View File

@ -0,0 +1,67 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2012 by Delphix. All rights reserved.
* Copyright (c) 2013 by Martin Matuska <mm@FreeBSD.org>. All rights reserved.
*/
#ifndef _LIBZFS_CORE_H
#define _LIBZFS_CORE_H
#include <libnvpair.h>
#include <sys/param.h>
#include <sys/types.h>
#include <sys/fs/zfs.h>
#ifdef __cplusplus
extern "C" {
#endif
int libzfs_core_init(void);
void libzfs_core_fini(void);
int lzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist);
int lzc_create(const char *fsname, dmu_objset_type_t type, nvlist_t *props);
int lzc_clone(const char *fsname, const char *origin, nvlist_t *props);
int lzc_destroy_snaps(nvlist_t *snaps, boolean_t defer, nvlist_t **errlist);
int lzc_snaprange_space(const char *firstsnap, const char *lastsnap,
uint64_t *usedp);
int lzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist);
int lzc_release(nvlist_t *holds, nvlist_t **errlist);
int lzc_get_holds(const char *snapname, nvlist_t **holdsp);
int lzc_send(const char *snapname, const char *fromsnap, int fd);
int lzc_receive(const char *snapname, nvlist_t *props, const char *origin,
boolean_t force, int fd);
int lzc_send_space(const char *snapname, const char *fromsnap,
uint64_t *result);
boolean_t lzc_exists(const char *dataset);
#ifdef __cplusplus
}
#endif
#endif /* _LIBZFS_CORE_H */

View File

@ -0,0 +1,189 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2013 Martin Matuska <mm@FreeBSD.org>. All rights reserved.
*/
#include <sys/zfs_ioctl.h>
#include <zfs_ioctl_compat.h>
#include "libzfs_core_compat.h"
extern int zfs_ioctl_version;
int
lzc_compat_pre(zfs_cmd_t *zc, zfs_ioc_t *ioc, nvlist_t **source)
{
nvlist_t *nvl = NULL;
nvpair_t *pair, *hpair;
char *buf, *val;
zfs_ioc_t vecnum;
uint32_t type32;
int32_t cleanup_fd;
int error = 0;
int pos;
if (zfs_ioctl_version >= ZFS_IOCVER_LZC)
return (0);
vecnum = *ioc;
switch (vecnum) {
case ZFS_IOC_CREATE:
type32 = fnvlist_lookup_int32(*source, "type");
zc->zc_objset_type = (uint64_t)type32;
nvlist_lookup_nvlist(*source, "props", &nvl);
*source = nvl;
break;
case ZFS_IOC_CLONE:
buf = fnvlist_lookup_string(*source, "origin");
strlcpy(zc->zc_value, buf, MAXPATHLEN);
nvlist_lookup_nvlist(*source, "props", &nvl);
*ioc = ZFS_IOC_CREATE;
*source = nvl;
break;
case ZFS_IOC_SNAPSHOT:
nvl = fnvlist_lookup_nvlist(*source, "snaps");
pair = nvlist_next_nvpair(nvl, NULL);
if (pair != NULL) {
buf = nvpair_name(pair);
pos = strcspn(buf, "@");
strlcpy(zc->zc_name, buf, pos + 1);
strlcpy(zc->zc_value, buf + pos + 1, MAXPATHLEN);
} else
error = EINVAL;
/* old kernel cannot create multiple snapshots */
if (!error && nvlist_next_nvpair(nvl, pair) != NULL)
error = EOPNOTSUPP;
nvlist_free(nvl);
nvl = NULL;
nvlist_lookup_nvlist(*source, "props", &nvl);
*source = nvl;
break;
case ZFS_IOC_SPACE_SNAPS:
buf = fnvlist_lookup_string(*source, "firstsnap");
strlcpy(zc->zc_value, buf, MAXPATHLEN);
break;
case ZFS_IOC_DESTROY_SNAPS:
nvl = fnvlist_lookup_nvlist(*source, "snaps");
pair = nvlist_next_nvpair(nvl, NULL);
if (pair != NULL) {
buf = nvpair_name(pair);
pos = strcspn(buf, "@");
strlcpy(zc->zc_name, buf, pos + 1);
} else
error = EINVAL;
/* old kernel cannot atomically destroy multiple snaps */
if (!error && nvlist_next_nvpair(nvl, pair) != NULL)
error = EOPNOTSUPP;
*source = nvl;
break;
case ZFS_IOC_HOLD:
nvl = fnvlist_lookup_nvlist(*source, "holds");
pair = nvlist_next_nvpair(nvl, NULL);
if (pair != NULL) {
buf = nvpair_name(pair);
pos = strcspn(buf, "@");
strlcpy(zc->zc_name, buf, pos + 1);
strlcpy(zc->zc_value, buf + pos + 1, MAXPATHLEN);
if (nvpair_value_string(pair, &val) == 0)
strlcpy(zc->zc_string, val, MAXNAMELEN);
else
error = EINVAL;
} else
error = EINVAL;
/* old kernel cannot atomically create multiple holds */
if (!error && nvlist_next_nvpair(nvl, pair) != NULL)
error = EOPNOTSUPP;
nvlist_free(nvl);
if (nvlist_lookup_int32(*source, "cleanup_fd",
&cleanup_fd) == 0)
zc->zc_cleanup_fd = cleanup_fd;
else
zc->zc_cleanup_fd = -1;
break;
case ZFS_IOC_RELEASE:
pair = nvlist_next_nvpair(*source, NULL);
if (pair != NULL) {
buf = nvpair_name(pair);
pos = strcspn(buf, "@");
strlcpy(zc->zc_name, buf, pos + 1);
strlcpy(zc->zc_value, buf + pos + 1, MAXPATHLEN);
if (nvpair_value_nvlist(pair, &nvl) == 0) {
hpair = nvlist_next_nvpair(nvl, NULL);
if (hpair != NULL)
strlcpy(zc->zc_string,
nvpair_name(hpair), MAXNAMELEN);
else
error = EINVAL;
if (!error && nvlist_next_nvpair(nvl,
hpair) != NULL)
error = EOPNOTSUPP;
} else
error = EINVAL;
} else
error = EINVAL;
/* old kernel cannot atomically release multiple holds */
if (!error && nvlist_next_nvpair(nvl, pair) != NULL)
error = EOPNOTSUPP;
break;
}
return (error);
}
void
lzc_compat_post(zfs_cmd_t *zc, const zfs_ioc_t ioc)
{
if (zfs_ioctl_version >= ZFS_IOCVER_LZC)
return;
switch (ioc) {
case ZFS_IOC_CREATE:
case ZFS_IOC_CLONE:
case ZFS_IOC_SNAPSHOT:
case ZFS_IOC_SPACE_SNAPS:
case ZFS_IOC_DESTROY_SNAPS:
zc->zc_nvlist_dst_filled = B_FALSE;
break;
}
}
int
lzc_compat_outnvl(zfs_cmd_t *zc, const zfs_ioc_t ioc, nvlist_t **outnvl)
{
nvlist_t *nvl;
if (zfs_ioctl_version >= ZFS_IOCVER_LZC)
return (0);
switch (ioc) {
case ZFS_IOC_SPACE_SNAPS:
nvl = fnvlist_alloc();
fnvlist_add_uint64(nvl, "used", zc->zc_cookie);
fnvlist_add_uint64(nvl, "compressed", zc->zc_objset_type);
fnvlist_add_uint64(nvl, "uncompressed", zc->zc_perm_action);
*outnvl = nvl;
break;
}
return (0);
}

View File

@ -0,0 +1,47 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2013 by Martin Matuska <mm@FreeBSD.org>. All rights reserved.
*/
#ifndef _LIBZFS_CORE_COMPAT_H
#define _LIBZFS_CORE_COMPAT_H
#include <libnvpair.h>
#include <sys/param.h>
#include <sys/types.h>
#include <sys/fs/zfs.h>
#include <sys/zfs_ioctl.h>
#ifdef __cplusplus
extern "C" {
#endif
int lzc_compat_pre(zfs_cmd_t *, zfs_ioc_t *, nvlist_t **);
void lzc_compat_post(zfs_cmd_t *, const zfs_ioc_t);
int lzc_compat_outnvl(zfs_cmd_t *, const zfs_ioc_t, nvlist_t **);
#ifdef __cplusplus
}
#endif
#endif /* _LIBZFS_CORE_COMPAT_H */

View File

@ -33,6 +33,7 @@
#include <sys/stat.h>
#include <sys/processor.h>
#include <sys/zfs_context.h>
#include <sys/rrwlock.h>
#include <sys/zmod.h>
#include <sys/utsname.h>
#include <sys/systeminfo.h>
@ -885,6 +886,8 @@ umem_out_of_memory(void)
void
kernel_init(int mode)
{
extern uint_t rrw_tsd_key;
umem_nofail_callback(umem_out_of_memory);
physmem = sysconf(_SC_PHYS_PAGES);
@ -905,6 +908,8 @@ kernel_init(int mode)
#endif
spa_init(mode);
tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
}
void
@ -952,6 +957,12 @@ crgetuid(cred_t *cr)
return (0);
}
uid_t
crgetruid(cred_t *cr)
{
return (0);
}
gid_t
crgetgid(cred_t *cr)
{

View File

@ -60,6 +60,8 @@ extern "C" {
#include <umem.h>
#include <inttypes.h>
#include <fsshare.h>
#include <pthread.h>
#include <sys/debug.h>
#include <sys/note.h>
#include <sys/types.h>
#include <sys/cred.h>
@ -242,6 +244,9 @@ typedef int krw_t;
#define RW_WRITE_HELD(x) ((x)->rw_owner == curthread)
#define RW_LOCK_HELD(x) rw_lock_held(x)
#undef RW_LOCK_HELD
#define RW_LOCK_HELD(x) (RW_READ_HELD(x) || RW_WRITE_HELD(x))
extern void rw_init(krwlock_t *rwlp, char *name, int type, void *arg);
extern void rw_destroy(krwlock_t *rwlp);
extern void rw_enter(krwlock_t *rwlp, krw_t rw);
@ -252,6 +257,7 @@ extern int rw_lock_held(krwlock_t *rwlp);
#define rw_downgrade(rwlp) do { } while (0)
extern uid_t crgetuid(cred_t *cr);
extern uid_t crgetruid(cred_t *cr);
extern gid_t crgetgid(cred_t *cr);
extern int crgetngroups(cred_t *cr);
extern gid_t *crgetgroups(cred_t *cr);
@ -270,6 +276,14 @@ extern clock_t cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime);
extern void cv_signal(kcondvar_t *cv);
extern void cv_broadcast(kcondvar_t *cv);
/*
* Thread-specific data
*/
#define tsd_get(k) pthread_getspecific(k)
#define tsd_set(k, v) pthread_setspecific(k, v)
#define tsd_create(kp, d) pthread_key_create(kp, d)
#define tsd_destroy(kp) /* nothing */
/*
* Kernel memory
*/
@ -519,7 +533,7 @@ typedef struct callb_cpr {
#define INGLOBALZONE(z) (1)
extern char *kmem_asprintf(const char *fmt, ...);
#define strfree(str) kmem_free((str), strlen(str)+1)
#define strfree(str) kmem_free((str), strlen(str) + 1)
/*
* Hostname information

View File

@ -9,10 +9,12 @@ SUBDIR= ${_drti} \
libnvpair \
libumem \
libuutil \
${_libzfs_core} \
${_libzfs} \
${_libzpool}
.if ${MK_ZFS} != "no"
_libzfs_core= libzfs_core
_libzfs= libzfs
.if ${MK_LIBTHR} != "no"
_libzpool= libzpool

View File

@ -6,8 +6,9 @@
.PATH: ${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libzfs/common
LIB= zfs
DPADD= ${LIBMD} ${LIBPTHREAD} ${LIBUMEM} ${LIBUTIL} ${LIBM} ${LIBNVPAIR}
LDADD= -lmd -lpthread -lumem -lutil -lm -lnvpair
DPADD= ${LIBMD} ${LIBPTHREAD} ${LIBUMEM} ${LIBUTIL} ${LIBM} ${LIBNVPAIR} \
${LIBZFS_CORE}
LDADD= -lmd -lpthread -lumem -lutil -lm -lnvpair -lzfs_core
SRCS= deviceid.c \
fsshare.c \
@ -17,6 +18,7 @@ SRCS= deviceid.c \
zone.c
SRCS+= libzfs_changelist.c \
libzfs_compat.c \
libzfs_config.c \
libzfs_dataset.c \
libzfs_diff.c \
@ -54,5 +56,6 @@ CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common
CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libnvpair
CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libuutil/common
CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libzfs/common
CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libzfs_core/common
.include <bsd.lib.mk>

View File

@ -0,0 +1,37 @@
# $FreeBSD$
.PATH: ${.CURDIR}/../../../cddl/compat/opensolaris/misc
.PATH: ${.CURDIR}/../../../sys/cddl/contrib/opensolaris/common/zfs
.PATH: ${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common/fs/zfs
.PATH: ${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libzfs_core/common
.PATH: ${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libzfs/common
LIB= zfs_core
DPADD= ${LIBNVPAIR}
LDADD= -lnvpair
SRCS= libzfs_core.c \
libzfs_core_compat.c
SRCS+= libzfs_compat.c
WARNS?= 0
CSTD= c99
CFLAGS+= -DZFS_NO_ACL
CFLAGS+= -I${.CURDIR}/../../../sbin/mount
CFLAGS+= -I${.CURDIR}/../../../cddl/lib/libumem
CFLAGS+= -I${.CURDIR}/../../../sys/cddl/compat/opensolaris
CFLAGS+= -I${.CURDIR}/../../../cddl/compat/opensolaris/include
CFLAGS+= -I${.CURDIR}/../../../cddl/compat/opensolaris/lib/libumem
CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libzpool/common
CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/common/zfs
CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common/fs/zfs
CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common/sys
CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/head
CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common
CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libnvpair
CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libuutil/common
CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libzfs_core/common
CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libzfs/common
.include <bsd.lib.mk>

View File

@ -14,6 +14,7 @@ CFLAGS+= -I${.CURDIR}/../../../sys/cddl/compat/opensolaris
CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/head
CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libuutil/common
CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libzfs/common
CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libzfs_core/common
CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libumem/common
CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libnvpair
CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common
@ -22,7 +23,7 @@ CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common/sys
CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/common/zfs
DPADD= ${LIBGEOM} ${LIBJAIL} ${LIBNVPAIR} ${LIBUMEM} \
${LIBUTIL} ${LIBUUTIL} ${LIBZFS}
LDADD= -lgeom -ljail -lnvpair -lumem -lutil -luutil -lzfs
${LIBUTIL} ${LIBUUTIL} ${LIBZFS_CORE} ${LIBZFS}
LDADD= -lgeom -ljail -lnvpair -lumem -lutil -luutil -lzfs_core -lzfs
.include <bsd.prog.mk>

View File

@ -27,7 +27,7 @@ CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libzpool/common
CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/cmd/stat/common
DPADD= ${LIBAVL} ${LIBGEOM} ${LIBNVPAIR} \
${LIBUMEM} ${LIBUTIL} ${LIBUUTIL} ${LIBZFS}
LDADD= -lavl -lgeom -lnvpair -lumem -lutil -luutil -lzfs
${LIBUMEM} ${LIBUTIL} ${LIBUUTIL} ${LIBZFS_CORE} ${LIBZFS}
LDADD= -lavl -lgeom -lnvpair -lumem -lutil -luutil -lzfs_core -lzfs
.include <bsd.prog.mk>

View File

@ -20,7 +20,7 @@ CFLAGS+= -I${.CURDIR}/../../contrib/opensolaris/head
CFLAGS+= -I${.CURDIR}/../../lib/libumem
DPADD= ${LIBGEOM} ${LIBM} ${LIBNVPAIR} ${LIBUMEM} ${LIBUUTIL} \
${LIBZFS} ${LIBZPOOL}
LDADD= -lgeom -lm -lnvpair -lumem -luutil -lzfs -lzpool
${LIBZFS_CORE} ${LIBZFS} ${LIBZPOOL}
LDADD= -lgeom -lm -lnvpair -lumem -luutil -lzfs_core -lzfs -lzpool
.include <bsd.prog.mk>

View File

@ -19,8 +19,9 @@ CFLAGS+= -I${.CURDIR}/../../contrib/opensolaris/head
CFLAGS+= -I${.CURDIR}/../../lib/libumem
DPADD= ${LIBGEOM} ${LIBM} ${LIBNVPAIR} ${LIBUMEM} ${LIBZPOOL} \
${LIBPTHREAD} ${LIBAVL} ${LIBZFS} ${LIBUUTIL}
LDADD= -lgeom -lm -lnvpair -lumem -lzpool -lpthread -lavl -lzfs -luutil
${LIBPTHREAD} ${LIBAVL} ${LIBZFS_CORE} ${LIBZFS} ${LIBUUTIL}
LDADD= -lgeom -lm -lnvpair -lumem -lzpool -lpthread -lavl -lzfs_core -lzfs \
-luutil
CSTD= c99

View File

@ -24,8 +24,8 @@ CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/head
CFLAGS+= -I${.CURDIR}/../../lib/libumem
DPADD= ${LIBGEOM} ${LIBM} ${LIBNVPAIR} ${LIBPTHREAD} ${LIBUMEM} \
${LIBUUTIL} ${LIBZFS} ${LIBZPOOL}
LDADD= -lgeom -lm -lnvpair -lpthread -lumem -luutil -lzfs -lzpool
${LIBUUTIL} ${LIBZFS_CORE} ${LIBZFS} ${LIBZPOOL}
LDADD= -lgeom -lm -lnvpair -lpthread -lumem -luutil -lzfs_core -lzfs -lzpool
CFLAGS+= -DDEBUG=1
#DEBUG_FLAGS+= -g

View File

@ -23,8 +23,8 @@ CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/head
CFLAGS+= -I${.CURDIR}/../../lib/libumem
DPADD= ${LIBGEOM} ${LIBM} ${LIBNVPAIR} ${LIBPTHREAD} ${LIBUMEM} \
${LIBUUTIL} ${LIBZFS} ${LIBZPOOL}
LDADD= -lgeom -lm -lnvpair -lpthread -lumem -luutil -lzfs -lzpool
${LIBUUTIL} ${LIBZFS_CORE} ${LIBZFS} ${LIBZPOOL}
LDADD= -lgeom -lm -lnvpair -lpthread -lumem -luutil -lzfs_core -lzfs -lzpool
CFLAGS+= -DDEBUG=1
#DEBUG_FLAGS+= -g

View File

@ -123,7 +123,7 @@ CRUNCH_LIBS+= -lalias -lcam -lcurses -ldevstat -lipsec
CRUNCH_LIBS+= -lipx
.endif
.if ${MK_ZFS} != "no"
CRUNCH_LIBS+= -lavl -ljail -lzfs -lnvpair -lpthread -luutil -lumem
CRUNCH_LIBS+= -lavl -ljail -lzfs_core -lzfs -lnvpair -lpthread -luutil -lumem
.endif
CRUNCH_LIBS+= -lgeom -lbsdxml -lkiconv -lmd -lsbuf -lufs -lz

View File

@ -174,4 +174,5 @@ LIBY?= ${DESTDIR}${LIBDIR}/liby.a
LIBYPCLNT?= ${DESTDIR}${LIBDIR}/libypclnt.a
LIBZ?= ${DESTDIR}${LIBDIR}/libz.a
LIBZFS?= ${DESTDIR}${LIBDIR}/libzfs.a
LIBZFS_CORE?= ${DESTDIR}${LIBDIR}/libzfs_core.a
LIBZPOOL?= ${DESTDIR}${LIBDIR}/libzpool.a

View File

@ -46,6 +46,7 @@ typedef struct ucred ucred_t;
#define kcred (thread0.td_ucred)
#define crgetuid(cred) ((cred)->cr_uid)
#define crgetruid(cred) ((cred)->cr_ruid)
#define crgetgid(cred) ((cred)->cr_gid)
#define crgetgroups(cred) ((cred)->cr_groups)
#define crgetngroups(cred) ((cred)->cr_ngroups)

View File

@ -30,6 +30,8 @@
#else
#include <sys/debug.h>
#include <sys/kmem.h>
#include <sys/param.h>
#include <sys/debug.h>
#endif
/*
@ -116,6 +118,18 @@ fnvlist_merge(nvlist_t *dst, nvlist_t *src)
VERIFY0(nvlist_merge(dst, src, KM_SLEEP));
}
size_t
fnvlist_num_pairs(nvlist_t *nvl)
{
size_t count = 0;
nvpair_t *pair;
for (pair = nvlist_next_nvpair(nvl, 0); pair != NULL;
pair = nvlist_next_nvpair(nvl, pair))
count++;
return (count);
}
void
fnvlist_add_boolean(nvlist_t *nvl, const char *name)
{

View File

@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
/*
@ -156,7 +157,11 @@ zfs_spa_version_map(int zpl_version)
return (version);
}
const char *zfs_history_event_names[LOG_END] = {
/*
* This is the table of legacy internal event names; it should not be modified.
* The internal events are now stored in the history log as strings.
*/
const char *zfs_history_event_names[ZFS_NUM_LEGACY_HISTORY_EVENTS] = {
"invalid event",
"pool create",
"vdev add",

View File

@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifndef _ZFS_COMUTIL_H
@ -37,7 +38,8 @@ extern void zpool_get_rewind_policy(nvlist_t *, zpool_rewind_policy_t *);
extern int zfs_zpl_version_map(int spa_version);
extern int zfs_spa_version_map(int zpl_version);
extern const char *zfs_history_event_names[LOG_END];
#define ZFS_NUM_LEGACY_HISTORY_EVENTS 41
extern const char *zfs_history_event_names[ZFS_NUM_LEGACY_HISTORY_EVENTS];
#ifdef __cplusplus
}

View File

@ -33,6 +33,7 @@
#include <sys/nvpair.h>
#include <sys/dsl_deleg.h>
#include <sys/zfs_ioctl.h>
#include "zfs_namecheck.h"
#include "zfs_ioctl_compat.h"
static int zfs_version_ioctl = ZFS_IOCVER_CURRENT;
@ -49,8 +50,52 @@ zfs_cmd_compat_get(zfs_cmd_t *zc, caddr_t addr, const int cflag)
{
zfs_cmd_v15_t *zc_c;
zfs_cmd_v28_t *zc28_c;
zfs_cmd_deadman_t *zcdm_c;
switch (cflag) {
case ZFS_CMD_COMPAT_DEADMAN:
zcdm_c = (void *)addr;
/* zc */
strlcpy(zc->zc_name, zcdm_c->zc_name, MAXPATHLEN);
strlcpy(zc->zc_value, zcdm_c->zc_value, MAXPATHLEN * 2);
strlcpy(zc->zc_string, zcdm_c->zc_string, MAXPATHLEN);
zc->zc_guid = zcdm_c->zc_guid;
zc->zc_nvlist_conf = zcdm_c->zc_nvlist_conf;
zc->zc_nvlist_conf_size = zcdm_c->zc_nvlist_conf_size;
zc->zc_nvlist_src = zcdm_c->zc_nvlist_src;
zc->zc_nvlist_src_size = zcdm_c->zc_nvlist_src_size;
zc->zc_nvlist_dst = zcdm_c->zc_nvlist_dst;
zc->zc_nvlist_dst_size = zcdm_c->zc_nvlist_dst_size;
zc->zc_cookie = zcdm_c->zc_cookie;
zc->zc_objset_type = zcdm_c->zc_objset_type;
zc->zc_perm_action = zcdm_c->zc_perm_action;
zc->zc_history = zcdm_c->zc_history;
zc->zc_history_len = zcdm_c->zc_history_len;
zc->zc_history_offset = zcdm_c->zc_history_offset;
zc->zc_obj = zcdm_c->zc_obj;
zc->zc_iflags = zcdm_c->zc_iflags;
zc->zc_share = zcdm_c->zc_share;
zc->zc_jailid = zcdm_c->zc_jailid;
zc->zc_objset_stats = zcdm_c->zc_objset_stats;
zc->zc_begin_record = zcdm_c->zc_begin_record;
zc->zc_defer_destroy = zcdm_c->zc_defer_destroy;
zc->zc_temphold = zcdm_c->zc_temphold;
zc->zc_action_handle = zcdm_c->zc_action_handle;
zc->zc_cleanup_fd = zcdm_c->zc_cleanup_fd;
zc->zc_simple = zcdm_c->zc_simple;
bcopy(zcdm_c->zc_pad, zc->zc_pad, sizeof(zc->zc_pad));
zc->zc_sendobj = zcdm_c->zc_sendobj;
zc->zc_fromobj = zcdm_c->zc_fromobj;
zc->zc_createtxg = zcdm_c->zc_createtxg;
zc->zc_stat = zcdm_c->zc_stat;
/* zc_inject_record doesn't change in libzfs_core */
zcdm_c->zc_inject_record = zc->zc_inject_record;
/* we always assume zc_nvlist_dst_filled is true */
zc->zc_nvlist_dst_filled = B_TRUE;
break;
case ZFS_CMD_COMPAT_V28:
zc28_c = (void *)addr;
@ -58,7 +103,6 @@ zfs_cmd_compat_get(zfs_cmd_t *zc, caddr_t addr, const int cflag)
strlcpy(zc->zc_name, zc28_c->zc_name, MAXPATHLEN);
strlcpy(zc->zc_value, zc28_c->zc_value, MAXPATHLEN * 2);
strlcpy(zc->zc_string, zc28_c->zc_string, MAXPATHLEN);
strlcpy(zc->zc_top_ds, zc28_c->zc_top_ds, MAXPATHLEN);
zc->zc_guid = zc28_c->zc_guid;
zc->zc_nvlist_conf = zc28_c->zc_nvlist_conf;
zc->zc_nvlist_conf_size = zc28_c->zc_nvlist_conf_size;
@ -174,19 +218,66 @@ zfs_cmd_compat_get(zfs_cmd_t *zc, caddr_t addr, const int cflag)
}
void
zfs_cmd_compat_put(zfs_cmd_t *zc, caddr_t addr, const int cflag)
zfs_cmd_compat_put(zfs_cmd_t *zc, caddr_t addr, const int request,
const int cflag)
{
zfs_cmd_v15_t *zc_c;
zfs_cmd_v28_t *zc28_c;
zfs_cmd_deadman_t *zcdm_c;
switch (cflag) {
case ZFS_CMD_COMPAT_DEADMAN:
zcdm_c = (void *)addr;
strlcpy(zcdm_c->zc_name, zc->zc_name, MAXPATHLEN);
strlcpy(zcdm_c->zc_value, zc->zc_value, MAXPATHLEN * 2);
strlcpy(zcdm_c->zc_string, zc->zc_string, MAXPATHLEN);
zcdm_c->zc_guid = zc->zc_guid;
zcdm_c->zc_nvlist_conf = zc->zc_nvlist_conf;
zcdm_c->zc_nvlist_conf_size = zc->zc_nvlist_conf_size;
zcdm_c->zc_nvlist_src = zc->zc_nvlist_src;
zcdm_c->zc_nvlist_src_size = zc->zc_nvlist_src_size;
zcdm_c->zc_nvlist_dst = zc->zc_nvlist_dst;
zcdm_c->zc_nvlist_dst_size = zc->zc_nvlist_dst_size;
zcdm_c->zc_cookie = zc->zc_cookie;
zcdm_c->zc_objset_type = zc->zc_objset_type;
zcdm_c->zc_perm_action = zc->zc_perm_action;
zcdm_c->zc_history = zc->zc_history;
zcdm_c->zc_history_len = zc->zc_history_len;
zcdm_c->zc_history_offset = zc->zc_history_offset;
zcdm_c->zc_obj = zc->zc_obj;
zcdm_c->zc_iflags = zc->zc_iflags;
zcdm_c->zc_share = zc->zc_share;
zcdm_c->zc_jailid = zc->zc_jailid;
zcdm_c->zc_objset_stats = zc->zc_objset_stats;
zcdm_c->zc_begin_record = zc->zc_begin_record;
zcdm_c->zc_defer_destroy = zc->zc_defer_destroy;
zcdm_c->zc_temphold = zc->zc_temphold;
zcdm_c->zc_action_handle = zc->zc_action_handle;
zcdm_c->zc_cleanup_fd = zc->zc_cleanup_fd;
zcdm_c->zc_simple = zc->zc_simple;
bcopy(zc->zc_pad, zcdm_c->zc_pad, sizeof(zcdm_c->zc_pad));
zcdm_c->zc_sendobj = zc->zc_sendobj;
zcdm_c->zc_fromobj = zc->zc_fromobj;
zcdm_c->zc_createtxg = zc->zc_createtxg;
zcdm_c->zc_stat = zc->zc_stat;
/* zc_inject_record doesn't change in libzfs_core */
zc->zc_inject_record = zcdm_c->zc_inject_record;
#ifndef _KERNEL
if (request == ZFS_IOC_RECV)
strlcpy(zcdm_c->zc_top_ds,
zc->zc_value + strlen(zc->zc_value) + 1,
(MAXPATHLEN * 2) - strlen(zc->zc_value) - 1);
#endif
break;
case ZFS_CMD_COMPAT_V28:
zc28_c = (void *)addr;
strlcpy(zc28_c->zc_name, zc->zc_name, MAXPATHLEN);
strlcpy(zc28_c->zc_value, zc->zc_value, MAXPATHLEN * 2);
strlcpy(zc28_c->zc_string, zc->zc_string, MAXPATHLEN);
strlcpy(zc28_c->zc_top_ds, zc->zc_top_ds, MAXPATHLEN);
zc28_c->zc_guid = zc->zc_guid;
zc28_c->zc_nvlist_conf = zc->zc_nvlist_conf;
zc28_c->zc_nvlist_conf_size = zc->zc_nvlist_conf_size;
@ -216,7 +307,12 @@ zfs_cmd_compat_put(zfs_cmd_t *zc, caddr_t addr, const int cflag)
zc28_c->zc_fromobj = zc->zc_fromobj;
zc28_c->zc_createtxg = zc->zc_createtxg;
zc28_c->zc_stat = zc->zc_stat;
#ifndef _KERNEL
if (request == ZFS_IOC_RECV)
strlcpy(zc28_c->zc_top_ds,
zc->zc_value + strlen(zc->zc_value) + 1,
MAXPATHLEN * 2 - strlen(zc->zc_value) - 1);
#endif
/* zc_inject_record */
zc28_c->zc_inject_record.zi_objset =
zc->zc_inject_record.zi_objset;
@ -476,7 +572,7 @@ zfs_ioctl_compat_pool_get_props(zfs_cmd_t *zc)
#ifndef _KERNEL
int
zcmd_ioctl_compat(int fd, unsigned long cmd, zfs_cmd_t *zc, const int cflag)
zcmd_ioctl_compat(int fd, int request, zfs_cmd_t *zc, const int cflag)
{
int nc, ret;
void *zc_c;
@ -484,14 +580,19 @@ zcmd_ioctl_compat(int fd, unsigned long cmd, zfs_cmd_t *zc, const int cflag)
switch (cflag) {
case ZFS_CMD_COMPAT_NONE:
ret = ioctl(fd, cmd, zc);
ncmd = _IOWR('Z', request, struct zfs_cmd);
ret = ioctl(fd, ncmd, zc);
return (ret);
case ZFS_CMD_COMPAT_DEADMAN:
zc_c = malloc(sizeof(zfs_cmd_deadman_t));
ncmd = _IOWR('Z', request, struct zfs_cmd_deadman);
break;
case ZFS_CMD_COMPAT_V28:
zc_c = malloc(sizeof(zfs_cmd_v28_t));
ncmd = _IOWR('Z', ZFS_IOC(cmd), struct zfs_cmd_v28);
ncmd = _IOWR('Z', request, struct zfs_cmd_v28);
break;
case ZFS_CMD_COMPAT_V15:
nc = zfs_ioctl_v28_to_v15[ZFS_IOC(cmd)];
nc = zfs_ioctl_v28_to_v15[request];
zc_c = malloc(sizeof(zfs_cmd_v15_t));
ncmd = _IOWR('Z', nc, struct zfs_cmd_v15);
break;
@ -499,24 +600,25 @@ zcmd_ioctl_compat(int fd, unsigned long cmd, zfs_cmd_t *zc, const int cflag)
return (EINVAL);
}
if (ZFS_IOC(ncmd) == ZFS_IOC_COMPAT_FAIL)
if (ZFS_IOCREQ(ncmd) == ZFS_IOC_COMPAT_FAIL)
return (ENOTSUP);
zfs_cmd_compat_put(zc, (caddr_t)zc_c, cflag);
zfs_cmd_compat_put(zc, (caddr_t)zc_c, request, cflag);
ret = ioctl(fd, ncmd, zc_c);
if (cflag == ZFS_CMD_COMPAT_V15 &&
nc == 2 /* ZFS_IOC_POOL_IMPORT */)
ret = ioctl(fd, _IOWR('Z', 4 /* ZFS_IOC_POOL_CONFIGS */,
nc == ZFS_IOC_POOL_IMPORT)
ret = ioctl(fd, _IOWR('Z', ZFS_IOC_POOL_CONFIGS,
struct zfs_cmd_v15), zc_c);
zfs_cmd_compat_get(zc, (caddr_t)zc_c, cflag);
free(zc_c);
if (cflag == ZFS_CMD_COMPAT_V15) {
switch (nc) {
case 2: /* ZFS_IOC_POOL_IMPORT */
case 4: /* ZFS_IOC_POOL_CONFIGS */
case 5: /* ZFS_IOC_POOL_STATS */
case 6: /* ZFS_IOC_POOL_TRYIMPORT */
case ZFS_IOC_POOL_IMPORT:
case ZFS_IOC_POOL_CONFIGS:
case ZFS_IOC_POOL_STATS:
case ZFS_IOC_POOL_TRYIMPORT:
zfs_ioctl_compat_fix_stats(zc, nc);
break;
case 41: /* ZFS_IOC_POOL_GET_PROPS (v15) */
@ -528,16 +630,25 @@ zcmd_ioctl_compat(int fd, unsigned long cmd, zfs_cmd_t *zc, const int cflag)
return (ret);
}
#else /* _KERNEL */
void
int
zfs_ioctl_compat_pre(zfs_cmd_t *zc, int *vec, const int cflag)
{
if (cflag == ZFS_CMD_COMPAT_V15)
int error = 0;
/* are we creating a clone? */
if (*vec == ZFS_IOC_CREATE && zc->zc_value[0] != '\0')
*vec = ZFS_IOC_CLONE;
if (cflag == ZFS_CMD_COMPAT_V15) {
switch (*vec) {
case 7: /* ZFS_IOC_POOL_SCRUB (v15) */
zc->zc_cookie = POOL_SCAN_SCRUB;
break;
}
}
return (error);
}
void
@ -545,9 +656,9 @@ zfs_ioctl_compat_post(zfs_cmd_t *zc, int vec, const int cflag)
{
if (cflag == ZFS_CMD_COMPAT_V15) {
switch (vec) {
case 4: /* ZFS_IOC_POOL_CONFIGS */
case 5: /* ZFS_IOC_POOL_STATS */
case 6: /* ZFS_IOC_POOL_TRYIMPORT */
case ZFS_IOC_POOL_CONFIGS:
case ZFS_IOC_POOL_STATS:
case ZFS_IOC_POOL_TRYIMPORT:
zfs_ioctl_compat_fix_stats(zc, vec);
break;
case 41: /* ZFS_IOC_POOL_GET_PROPS (v15) */
@ -556,4 +667,193 @@ zfs_ioctl_compat_post(zfs_cmd_t *zc, int vec, const int cflag)
}
}
}
nvlist_t *
zfs_ioctl_compat_innvl(zfs_cmd_t *zc, nvlist_t * innvl, const int vec,
const int cflag)
{
nvlist_t *nvl, *tmpnvl, *hnvl;
nvpair_t *elem;
char *poolname, *snapname;
int err;
if (cflag == ZFS_CMD_COMPAT_NONE)
goto out;
switch (vec) {
case ZFS_IOC_CREATE:
nvl = fnvlist_alloc();
fnvlist_add_int32(nvl, "type", zc->zc_objset_type);
if (innvl != NULL) {
fnvlist_add_nvlist(nvl, "props", innvl);
nvlist_free(innvl);
}
return (nvl);
break;
case ZFS_IOC_CLONE:
nvl = fnvlist_alloc();
fnvlist_add_string(nvl, "origin", zc->zc_value);
if (innvl != NULL) {
fnvlist_add_nvlist(nvl, "props", innvl);
nvlist_free(innvl);
}
return (nvl);
break;
case ZFS_IOC_SNAPSHOT:
if (innvl == NULL)
goto out;
nvl = fnvlist_alloc();
fnvlist_add_nvlist(nvl, "props", innvl);
tmpnvl = fnvlist_alloc();
snapname = kmem_asprintf("%s@%s", zc->zc_name, zc->zc_value);
fnvlist_add_boolean(tmpnvl, snapname);
kmem_free(snapname, strlen(snapname + 1));
/* check if we are doing a recursive snapshot */
if (zc->zc_cookie)
dmu_get_recursive_snaps_nvl(zc->zc_name, zc->zc_value,
tmpnvl);
fnvlist_add_nvlist(nvl, "snaps", tmpnvl);
fnvlist_free(tmpnvl);
nvlist_free(innvl);
/* strip dataset part from zc->zc_name */
zc->zc_name[strcspn(zc->zc_name, "/@")] = '\0';
return (nvl);
break;
case ZFS_IOC_SPACE_SNAPS:
nvl = fnvlist_alloc();
fnvlist_add_string(nvl, "firstsnap", zc->zc_value);
if (innvl != NULL)
nvlist_free(innvl);
return (nvl);
break;
case ZFS_IOC_DESTROY_SNAPS:
if (innvl == NULL && cflag == ZFS_CMD_COMPAT_DEADMAN)
goto out;
nvl = fnvlist_alloc();
if (innvl != NULL) {
fnvlist_add_nvlist(nvl, "snaps", innvl);
} else {
/*
* We are probably called by even older binaries,
* allocate and populate nvlist with recursive
* snapshots
*/
if (snapshot_namecheck(zc->zc_value, NULL,
NULL) == 0) {
tmpnvl = fnvlist_alloc();
if (dmu_get_recursive_snaps_nvl(zc->zc_name,
zc->zc_value, tmpnvl) == 0)
fnvlist_add_nvlist(nvl, "snaps",
tmpnvl);
nvlist_free(tmpnvl);
}
}
if (innvl != NULL)
nvlist_free(innvl);
/* strip dataset part from zc->zc_name */
zc->zc_name[strcspn(zc->zc_name, "/@")] = '\0';
return (nvl);
break;
case ZFS_IOC_HOLD:
nvl = fnvlist_alloc();
tmpnvl = fnvlist_alloc();
if (zc->zc_cleanup_fd != -1)
fnvlist_add_int32(nvl, "cleanup_fd",
(int32_t)zc->zc_cleanup_fd);
if (zc->zc_cookie) {
hnvl = fnvlist_alloc();
if (dmu_get_recursive_snaps_nvl(zc->zc_name,
zc->zc_value, hnvl) == 0) {
elem = NULL;
while ((elem = nvlist_next_nvpair(hnvl,
elem)) != NULL) {
nvlist_add_string(tmpnvl,
nvpair_name(elem), zc->zc_string);
}
}
nvlist_free(hnvl);
} else {
snapname = kmem_asprintf("%s@%s", zc->zc_name,
zc->zc_value);
nvlist_add_string(tmpnvl, snapname, zc->zc_string);
kmem_free(snapname, strlen(snapname + 1));
}
fnvlist_add_nvlist(nvl, "holds", tmpnvl);
nvlist_free(tmpnvl);
if (innvl != NULL)
nvlist_free(innvl);
/* strip dataset part from zc->zc_name */
zc->zc_name[strcspn(zc->zc_name, "/@")] = '\0';
return (nvl);
break;
case ZFS_IOC_RELEASE:
nvl = fnvlist_alloc();
tmpnvl = fnvlist_alloc();
if (zc->zc_cookie) {
hnvl = fnvlist_alloc();
if (dmu_get_recursive_snaps_nvl(zc->zc_name,
zc->zc_value, hnvl) == 0) {
elem = NULL;
while ((elem = nvlist_next_nvpair(hnvl,
elem)) != NULL) {
fnvlist_add_boolean(tmpnvl,
zc->zc_string);
fnvlist_add_nvlist(nvl,
nvpair_name(elem), tmpnvl);
}
}
nvlist_free(hnvl);
} else {
snapname = kmem_asprintf("%s@%s", zc->zc_name,
zc->zc_value);
fnvlist_add_boolean(tmpnvl, zc->zc_string);
fnvlist_add_nvlist(nvl, snapname, tmpnvl);
kmem_free(snapname, strlen(snapname + 1));
}
nvlist_free(tmpnvl);
if (innvl != NULL)
nvlist_free(innvl);
/* strip dataset part from zc->zc_name */
zc->zc_name[strcspn(zc->zc_name, "/@")] = '\0';
return (nvl);
break;
}
out:
return (innvl);
}
nvlist_t *
zfs_ioctl_compat_outnvl(zfs_cmd_t *zc, nvlist_t * outnvl, const int vec,
const int cflag)
{
nvlist_t *tmpnvl;
if (cflag == ZFS_CMD_COMPAT_NONE)
return (outnvl);
switch (vec) {
case ZFS_IOC_SPACE_SNAPS:
(void) nvlist_lookup_uint64(outnvl, "used", &zc->zc_cookie);
(void) nvlist_lookup_uint64(outnvl, "compressed",
&zc->zc_objset_type);
(void) nvlist_lookup_uint64(outnvl, "uncompressed",
&zc->zc_perm_action);
nvlist_free(outnvl);
/* return empty outnvl */
tmpnvl = fnvlist_alloc();
return (tmpnvl);
break;
case ZFS_IOC_CREATE:
case ZFS_IOC_CLONE:
case ZFS_IOC_HOLD:
case ZFS_IOC_RELEASE:
nvlist_free(outnvl);
/* return empty outnvl */
tmpnvl = fnvlist_alloc();
return (tmpnvl);
break;
}
return (outnvl);
}
#endif /* KERNEL */

View File

@ -45,17 +45,23 @@ extern "C" {
*/
/* ioctl versions for vfs.zfs.version.ioctl */
#define ZFS_IOCVER_UNDEF -1
#define ZFS_IOCVER_NONE 0
#define ZFS_IOCVER_DEADMAN 1
#define ZFS_IOCVER_CURRENT ZFS_IOCVER_DEADMAN
#define ZFS_IOCVER_LZC 2
#define ZFS_IOCVER_CURRENT ZFS_IOCVER_LZC
/* compatibility conversion flag */
#define ZFS_CMD_COMPAT_NONE 0
#define ZFS_CMD_COMPAT_V15 1
#define ZFS_CMD_COMPAT_V28 2
#define ZFS_CMD_COMPAT_DEADMAN 3
#define ZFS_IOC_COMPAT_PASS 254
#define ZFS_IOC_COMPAT_FAIL 255
#define ZFS_IOCREQ(ioreq) ((ioreq) & 0xff)
typedef struct zinject_record_v15 {
uint64_t zi_objset;
uint64_t zi_object;
@ -148,6 +154,44 @@ typedef struct zfs_cmd_v28 {
zfs_stat_t zc_stat;
} zfs_cmd_v28_t;
typedef struct zfs_cmd_deadman {
char zc_name[MAXPATHLEN];
char zc_value[MAXPATHLEN * 2];
char zc_string[MAXNAMELEN];
char zc_top_ds[MAXPATHLEN];
uint64_t zc_guid;
uint64_t zc_nvlist_conf; /* really (char *) */
uint64_t zc_nvlist_conf_size;
uint64_t zc_nvlist_src; /* really (char *) */
uint64_t zc_nvlist_src_size;
uint64_t zc_nvlist_dst; /* really (char *) */
uint64_t zc_nvlist_dst_size;
uint64_t zc_cookie;
uint64_t zc_objset_type;
uint64_t zc_perm_action;
uint64_t zc_history; /* really (char *) */
uint64_t zc_history_len;
uint64_t zc_history_offset;
uint64_t zc_obj;
uint64_t zc_iflags; /* internal to zfs(7fs) */
zfs_share_t zc_share;
uint64_t zc_jailid;
dmu_objset_stats_t zc_objset_stats;
struct drr_begin zc_begin_record;
/* zc_inject_record doesn't change in libzfs_core */
zinject_record_t zc_inject_record;
boolean_t zc_defer_destroy;
boolean_t zc_temphold;
uint64_t zc_action_handle;
int zc_cleanup_fd;
uint8_t zc_simple;
uint8_t zc_pad[3]; /* alignment */
uint64_t zc_sendobj;
uint64_t zc_fromobj;
uint64_t zc_createtxg;
zfs_stat_t zc_stat;
} zfs_cmd_deadman_t;
#ifdef _KERNEL
unsigned static long zfs_ioctl_v15_to_v28[] = {
0, /* 0 ZFS_IOC_POOL_CREATE */
@ -272,13 +316,17 @@ unsigned static long zfs_ioctl_v28_to_v15[] = {
#endif /* ! _KERNEL */
#ifdef _KERNEL
void zfs_ioctl_compat_pre(zfs_cmd_t *, int *, const int);
int zfs_ioctl_compat_pre(zfs_cmd_t *, int *, const int);
void zfs_ioctl_compat_post(zfs_cmd_t *, const int, const int);
nvlist_t *zfs_ioctl_compat_innvl(zfs_cmd_t *, nvlist_t *, const int,
const int);
nvlist_t *zfs_ioctl_compat_outnvl(zfs_cmd_t *, nvlist_t *, const int,
const int);
#else
int zcmd_ioctl_compat(int, unsigned long, zfs_cmd_t *, const int);
int zcmd_ioctl_compat(int, int, zfs_cmd_t *, const int);
#endif /* _KERNEL */
void zfs_cmd_compat_get(zfs_cmd_t *, caddr_t, const int);
void zfs_cmd_compat_put(zfs_cmd_t *, caddr_t, const int);
void zfs_cmd_compat_put(zfs_cmd_t *, caddr_t, const int, const int);
#ifdef __cplusplus
}

View File

@ -22,6 +22,9 @@
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
/*
* Common routines used by zfs and zpool property management.
@ -129,7 +132,8 @@ zprop_register_hidden(int prop, const char *name, zprop_type_t type,
zprop_attr_t attr, int objset_types, const char *colname)
{
zprop_register_impl(prop, name, type, 0, NULL, attr,
objset_types, NULL, colname, B_FALSE, B_FALSE, NULL);
objset_types, NULL, colname,
type == PROP_TYPE_NUMBER, B_FALSE, NULL);
}

View File

@ -49,8 +49,10 @@ ZFS_COMMON_OBJS += \
dsl_dir.o \
dsl_dataset.o \
dsl_deadlist.o \
dsl_destroy.o \
dsl_pool.o \
dsl_synctask.o \
dsl_userhold.o \
dmu_zfetch.o \
dsl_deleg.o \
dsl_prop.o \
@ -61,6 +63,7 @@ ZFS_COMMON_OBJS += \
lzjb.o \
metaslab.o \
refcount.o \
rrwlock.o \
sa.o \
sha256.o \
spa.o \
@ -120,7 +123,6 @@ ZFS_OBJS += \
zfs_onexit.o \
zfs_replay.o \
zfs_rlock.o \
rrwlock.o \
zfs_vfsops.o \
zfs_vnops.o \
zvol.o

View File

@ -1787,12 +1787,12 @@ arc_buf_free(arc_buf_t *buf, void *tag)
}
}
int
boolean_t
arc_buf_remove_ref(arc_buf_t *buf, void* tag)
{
arc_buf_hdr_t *hdr = buf->b_hdr;
kmutex_t *hash_lock = HDR_LOCK(hdr);
int no_callback = (buf->b_efunc == NULL);
boolean_t no_callback = (buf->b_efunc == NULL);
if (hdr->b_state == arc_anon) {
ASSERT(hdr->b_datacnt == 1);
@ -2042,7 +2042,7 @@ arc_evict(arc_state_t *state, uint64_t spa, int64_t bytes, boolean_t recycle,
ARCSTAT_INCR(arcstat_mutex_miss, missed);
/*
* We have just evicted some date into the ghost state, make
* We have just evicted some data into the ghost state, make
* sure we also adjust the ghost state size if necessary.
*/
if (arc_no_grow &&
@ -2875,7 +2875,7 @@ arc_bcopy_func(zio_t *zio, arc_buf_t *buf, void *arg)
{
if (zio == NULL || zio->io_error == 0)
bcopy(buf->b_data, arg, buf->b_hdr->b_size);
VERIFY(arc_buf_remove_ref(buf, arg) == 1);
VERIFY(arc_buf_remove_ref(buf, arg));
}
/* a generic arc_done_func_t */
@ -2884,7 +2884,7 @@ arc_getbuf_func(zio_t *zio, arc_buf_t *buf, void *arg)
{
arc_buf_t **bufp = arg;
if (zio && zio->io_error) {
VERIFY(arc_buf_remove_ref(buf, arg) == 1);
VERIFY(arc_buf_remove_ref(buf, arg));
*bufp = NULL;
} else {
*bufp = buf;

View File

@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/bplist.h>
@ -52,6 +53,12 @@ bplist_append(bplist_t *bpl, const blkptr_t *bp)
mutex_exit(&bpl->bpl_lock);
}
/*
* To aid debugging, we keep the most recently removed entry. This way if
* we are in the callback, we can easily locate the entry.
*/
static bplist_entry_t *bplist_iterate_last_removed;
void
bplist_iterate(bplist_t *bpl, bplist_itor_t *func, void *arg, dmu_tx_t *tx)
{
@ -59,6 +66,7 @@ bplist_iterate(bplist_t *bpl, bplist_itor_t *func, void *arg, dmu_tx_t *tx)
mutex_enter(&bpl->bpl_lock);
while (bpe = list_head(&bpl->bpl_list)) {
bplist_iterate_last_removed = bpe;
list_remove(&bpl->bpl_list, bpe);
mutex_exit(&bpl->bpl_lock);
func(arg, &bpe->bpe_blk, tx);

View File

@ -392,6 +392,10 @@ bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx)
DMU_OT_BPOBJ_SUBOBJ, SPA_MAXBLOCKSIZE, DMU_OT_NONE, 0, tx);
}
dmu_object_info_t doi;
ASSERT0(dmu_object_info(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, &doi));
ASSERT3U(doi.doi_type, ==, DMU_OT_BPOBJ_SUBOBJ);
mutex_enter(&bpo->bpo_lock);
dmu_write(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs,
bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj),

View File

@ -39,7 +39,7 @@
#include <sys/sa_impl.h>
static void dbuf_destroy(dmu_buf_impl_t *db);
static int dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
static boolean_t dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
static void dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx);
/*
@ -499,7 +499,7 @@ dbuf_read_done(zio_t *zio, arc_buf_t *buf, void *vdb)
} else {
ASSERT(db->db_blkid != DMU_BONUS_BLKID);
ASSERT3P(db->db_buf, ==, NULL);
VERIFY(arc_buf_remove_ref(buf, db) == 1);
VERIFY(arc_buf_remove_ref(buf, db));
db->db_state = DB_UNCACHED;
}
cv_broadcast(&db->db_changed);
@ -828,10 +828,12 @@ dbuf_free_range(dnode_t *dn, uint64_t start, uint64_t end, dmu_tx_t *tx)
continue;
/* found a level 0 buffer in the range */
if (dbuf_undirty(db, tx))
continue;
mutex_enter(&db->db_mtx);
if (dbuf_undirty(db, tx)) {
/* mutex has been dropped and dbuf destroyed */
continue;
}
if (db->db_state == DB_UNCACHED ||
db->db_state == DB_NOFILL ||
db->db_state == DB_EVICTING) {
@ -958,7 +960,7 @@ dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx)
mutex_enter(&db->db_mtx);
dbuf_set_data(db, buf);
VERIFY(arc_buf_remove_ref(obuf, db) == 1);
VERIFY(arc_buf_remove_ref(obuf, db));
db->db.db_size = size;
if (db->db_level == 0) {
@ -1258,7 +1260,10 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
return (dr);
}
static int
/*
* Return TRUE if this evicted the dbuf.
*/
static boolean_t
dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
{
dnode_t *dn;
@ -1267,18 +1272,17 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
ASSERT(txg != 0);
ASSERT(db->db_blkid != DMU_BONUS_BLKID);
ASSERT0(db->db_level);
ASSERT(MUTEX_HELD(&db->db_mtx));
mutex_enter(&db->db_mtx);
/*
* If this buffer is not dirty, we're done.
*/
for (drp = &db->db_last_dirty; (dr = *drp) != NULL; drp = &dr->dr_next)
if (dr->dr_txg <= txg)
break;
if (dr == NULL || dr->dr_txg < txg) {
mutex_exit(&db->db_mtx);
return (0);
}
if (dr == NULL || dr->dr_txg < txg)
return (B_FALSE);
ASSERT(dr->dr_txg == txg);
ASSERT(dr->dr_dbuf == db);
@ -1286,24 +1290,12 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
dn = DB_DNODE(db);
/*
* If this buffer is currently held, we cannot undirty
* it, since one of the current holders may be in the
* middle of an update. Note that users of dbuf_undirty()
* should not place a hold on the dbuf before the call.
* Also note: we can get here with a spill block, so
* test for that similar to how dbuf_dirty does.
* Note: This code will probably work even if there are concurrent
* holders, but it is untested in that scenerio, as the ZPL and
* ztest have additional locking (the range locks) that prevents
* that type of concurrent access.
*/
if (refcount_count(&db->db_holds) > db->db_dirtycnt) {
mutex_exit(&db->db_mtx);
/* Make sure we don't toss this buffer at sync phase */
if (db->db_blkid != DMU_SPILL_BLKID) {
mutex_enter(&dn->dn_mtx);
dnode_clear_range(dn, db->db_blkid, 1, tx);
mutex_exit(&dn->dn_mtx);
}
DB_DNODE_EXIT(db);
return (0);
}
ASSERT3U(refcount_count(&db->db_holds), ==, db->db_dirtycnt);
dprintf_dbuf(db, "size=%llx\n", (u_longlong_t)db->db.db_size);
@ -1332,21 +1324,13 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
}
DB_DNODE_EXIT(db);
if (db->db_level == 0) {
if (db->db_state != DB_NOFILL) {
dbuf_unoverride(dr);
if (db->db_state != DB_NOFILL) {
dbuf_unoverride(dr);
ASSERT(db->db_buf != NULL);
ASSERT(dr->dt.dl.dr_data != NULL);
if (dr->dt.dl.dr_data != db->db_buf)
VERIFY(arc_buf_remove_ref(dr->dt.dl.dr_data,
db) == 1);
}
} else {
ASSERT(db->db_buf != NULL);
ASSERT(list_head(&dr->dt.di.dr_children) == NULL);
mutex_destroy(&dr->dt.di.dr_mtx);
list_destroy(&dr->dt.di.dr_children);
ASSERT(dr->dt.dl.dr_data != NULL);
if (dr->dt.dl.dr_data != db->db_buf)
VERIFY(arc_buf_remove_ref(dr->dt.dl.dr_data, db));
}
kmem_free(dr, sizeof (dbuf_dirty_record_t));
@ -1358,13 +1342,12 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
ASSERT(db->db_state == DB_NOFILL || arc_released(buf));
dbuf_set_data(db, NULL);
VERIFY(arc_buf_remove_ref(buf, db) == 1);
VERIFY(arc_buf_remove_ref(buf, db));
dbuf_evict(db);
return (1);
return (B_TRUE);
}
mutex_exit(&db->db_mtx);
return (0);
return (B_FALSE);
}
#pragma weak dmu_buf_will_dirty = dbuf_will_dirty
@ -1463,7 +1446,7 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx)
mutex_exit(&db->db_mtx);
(void) dbuf_dirty(db, tx);
bcopy(buf->b_data, db->db.db_data, db->db.db_size);
VERIFY(arc_buf_remove_ref(buf, db) == 1);
VERIFY(arc_buf_remove_ref(buf, db));
xuio_stat_wbuf_copied();
return;
}
@ -1481,10 +1464,10 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx)
arc_release(db->db_buf, db);
}
dr->dt.dl.dr_data = buf;
VERIFY(arc_buf_remove_ref(db->db_buf, db) == 1);
VERIFY(arc_buf_remove_ref(db->db_buf, db));
} else if (dr == NULL || dr->dt.dl.dr_data != db->db_buf) {
arc_release(db->db_buf, db);
VERIFY(arc_buf_remove_ref(db->db_buf, db) == 1);
VERIFY(arc_buf_remove_ref(db->db_buf, db));
}
db->db_buf = NULL;
}
@ -2067,10 +2050,10 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag)
* This dbuf has anonymous data associated with it.
*/
dbuf_set_data(db, NULL);
VERIFY(arc_buf_remove_ref(buf, db) == 1);
VERIFY(arc_buf_remove_ref(buf, db));
dbuf_evict(db);
} else {
VERIFY(arc_buf_remove_ref(db->db_buf, db) == 0);
VERIFY(!arc_buf_remove_ref(db->db_buf, db));
/*
* A dbuf will be eligible for eviction if either the
@ -2571,7 +2554,7 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb)
if (db->db_state != DB_NOFILL) {
if (dr->dt.dl.dr_data != db->db_buf)
VERIFY(arc_buf_remove_ref(dr->dt.dl.dr_data,
db) == 1);
db));
else if (!arc_released(db->db_buf))
arc_set_callback(db->db_buf, dbuf_do_evict, db);
}

View File

@ -1204,7 +1204,7 @@ void
dmu_return_arcbuf(arc_buf_t *buf)
{
arc_return_buf(buf, FTAG);
VERIFY(arc_buf_remove_ref(buf, FTAG) == 1);
VERIFY(arc_buf_remove_ref(buf, FTAG));
}
/*

View File

@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/dmu.h>
@ -178,51 +179,53 @@ diff_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
}
int
dmu_diff(objset_t *tosnap, objset_t *fromsnap, struct file *fp, offset_t *offp)
dmu_diff(const char *tosnap_name, const char *fromsnap_name,
#ifdef illumos
struct vnode *vp, offset_t *offp)
#else
struct file *fp, offset_t *offp)
#endif
{
struct diffarg da;
dsl_dataset_t *ds = tosnap->os_dsl_dataset;
dsl_dataset_t *fromds = fromsnap->os_dsl_dataset;
dsl_dataset_t *findds;
dsl_dataset_t *relds;
int err = 0;
dsl_dataset_t *fromsnap;
dsl_dataset_t *tosnap;
dsl_pool_t *dp;
int error;
uint64_t fromtxg;
/* make certain we are looking at snapshots */
if (!dsl_dataset_is_snapshot(ds) || !dsl_dataset_is_snapshot(fromds))
if (strchr(tosnap_name, '@') == NULL ||
strchr(fromsnap_name, '@') == NULL)
return (EINVAL);
/* fromsnap must be earlier and from the same lineage as tosnap */
if (fromds->ds_phys->ds_creation_txg >= ds->ds_phys->ds_creation_txg)
return (EXDEV);
error = dsl_pool_hold(tosnap_name, FTAG, &dp);
if (error != 0)
return (error);
relds = NULL;
findds = ds;
while (fromds->ds_dir != findds->ds_dir) {
dsl_pool_t *dp = ds->ds_dir->dd_pool;
if (!dsl_dir_is_clone(findds->ds_dir)) {
if (relds)
dsl_dataset_rele(relds, FTAG);
return (EXDEV);
}
rw_enter(&dp->dp_config_rwlock, RW_READER);
err = dsl_dataset_hold_obj(dp,
findds->ds_dir->dd_phys->dd_origin_obj, FTAG, &findds);
rw_exit(&dp->dp_config_rwlock);
if (relds)
dsl_dataset_rele(relds, FTAG);
if (err)
return (EXDEV);
relds = findds;
error = dsl_dataset_hold(dp, tosnap_name, FTAG, &tosnap);
if (error != 0) {
dsl_pool_rele(dp, FTAG);
return (error);
}
if (relds)
dsl_dataset_rele(relds, FTAG);
error = dsl_dataset_hold(dp, fromsnap_name, FTAG, &fromsnap);
if (error != 0) {
dsl_dataset_rele(tosnap, FTAG);
dsl_pool_rele(dp, FTAG);
return (error);
}
if (!dsl_dataset_is_before(tosnap, fromsnap)) {
dsl_dataset_rele(fromsnap, FTAG);
dsl_dataset_rele(tosnap, FTAG);
dsl_pool_rele(dp, FTAG);
return (EXDEV);
}
fromtxg = fromsnap->ds_phys->ds_creation_txg;
dsl_dataset_rele(fromsnap, FTAG);
dsl_dataset_long_hold(tosnap, FTAG);
dsl_pool_rele(dp, FTAG);
da.da_fp = fp;
da.da_offp = offp;
@ -231,15 +234,18 @@ dmu_diff(objset_t *tosnap, objset_t *fromsnap, struct file *fp, offset_t *offp)
da.da_err = 0;
da.da_td = curthread;
err = traverse_dataset(ds, fromds->ds_phys->ds_creation_txg,
error = traverse_dataset(tosnap, fromtxg,
TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA, diff_cb, &da);
if (err) {
da.da_err = err;
if (error != 0) {
da.da_err = error;
} else {
/* we set the da.da_err we return as side-effect */
(void) write_record(&da);
}
dsl_dataset_long_rele(tosnap, FTAG);
dsl_dataset_rele(tosnap, FTAG);
return (da.da_err);
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -265,7 +265,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf,
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
if (err)
if (err != 0)
return (err);
cbp = buf->b_data;
@ -282,7 +282,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
zb->zb_level - 1,
zb->zb_blkid * epb + i);
err = traverse_visitbp(td, dnp, &cbp[i], &czb);
if (err) {
if (err != 0) {
if (!hard)
break;
lasterr = err;
@ -295,7 +295,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf,
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
if (err)
if (err != 0)
return (err);
dnp = buf->b_data;
@ -308,7 +308,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
for (i = 0; i < epb; i++) {
err = traverse_dnode(td, &dnp[i], zb->zb_objset,
zb->zb_blkid * epb + i);
if (err) {
if (err != 0) {
if (!hard)
break;
lasterr = err;
@ -321,7 +321,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf,
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
if (err)
if (err != 0)
return (err);
osp = buf->b_data;
@ -405,7 +405,7 @@ traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp,
for (j = 0; j < dnp->dn_nblkptr; j++) {
SET_BOOKMARK(&czb, objset, object, dnp->dn_nlevels - 1, j);
err = traverse_visitbp(td, dnp, &dnp->dn_blkptr[j], &czb);
if (err) {
if (err != 0) {
if (!hard)
break;
lasterr = err;
@ -415,7 +415,7 @@ traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp,
if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
SET_BOOKMARK(&czb, objset, object, 0, DMU_SPILL_BLKID);
err = traverse_visitbp(td, dnp, &dnp->dn_spill, &czb);
if (err) {
if (err != 0) {
if (!hard)
return (err);
lasterr = err;
@ -514,14 +514,20 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
cv_init(&pd.pd_cv, NULL, CV_DEFAULT, NULL);
/* See comment on ZIL traversal in dsl_scan_visitds. */
if (ds != NULL && !dsl_dataset_is_snapshot(ds)) {
objset_t *os;
if (ds != NULL && !dsl_dataset_is_snapshot(ds) && !BP_IS_HOLE(rootbp)) {
uint32_t flags = ARC_WAIT;
objset_phys_t *osp;
arc_buf_t *buf;
err = dmu_objset_from_ds(ds, &os);
if (err)
err = arc_read(NULL, td.td_spa, rootbp,
arc_getbuf_func, &buf,
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, NULL);
if (err != 0)
return (err);
traverse_zil(&td, &os->os_zil_header);
osp = buf->b_data;
traverse_zil(&td, &osp->os_zil_header);
(void) arc_buf_remove_ref(buf, &buf);
}
if (!(flags & TRAVERSE_PREFETCH_DATA) ||
@ -583,7 +589,7 @@ traverse_pool(spa_t *spa, uint64_t txg_start, int flags,
/* visit the MOS */
err = traverse_impl(spa, NULL, 0, spa_get_rootblkptr(spa),
txg_start, NULL, flags, func, arg);
if (err)
if (err != 0)
return (err);
/* visit each dataset */
@ -592,7 +598,7 @@ traverse_pool(spa_t *spa, uint64_t txg_start, int flags,
dmu_object_info_t doi;
err = dmu_object_info(mos, obj, &doi);
if (err) {
if (err != 0) {
if (!hard)
return (err);
lasterr = err;
@ -603,10 +609,10 @@ traverse_pool(spa_t *spa, uint64_t txg_start, int flags,
dsl_dataset_t *ds;
uint64_t txg = txg_start;
rw_enter(&dp->dp_config_rwlock, RW_READER);
dsl_pool_config_enter(dp, FTAG);
err = dsl_dataset_hold_obj(dp, obj, FTAG, &ds);
rw_exit(&dp->dp_config_rwlock);
if (err) {
dsl_pool_config_exit(dp, FTAG);
if (err != 0) {
if (!hard)
return (err);
lasterr = err;
@ -616,7 +622,7 @@ traverse_pool(spa_t *spa, uint64_t txg_start, int flags,
txg = ds->ds_phys->ds_prev_snap_txg;
err = traverse_dataset(ds, txg, flags, func, arg);
dsl_dataset_rele(ds, FTAG);
if (err) {
if (err != 0) {
if (!hard)
return (err);
lasterr = err;

View File

@ -48,7 +48,7 @@ dmu_tx_create_dd(dsl_dir_t *dd)
{
dmu_tx_t *tx = kmem_zalloc(sizeof (dmu_tx_t), KM_SLEEP);
tx->tx_dir = dd;
if (dd)
if (dd != NULL)
tx->tx_pool = dd->dd_pool;
list_create(&tx->tx_holds, sizeof (dmu_tx_hold_t),
offsetof(dmu_tx_hold_t, txh_node));
@ -898,7 +898,7 @@ dmu_tx_dirty_buf(dmu_tx_t *tx, dmu_buf_impl_t *db)
#endif
static int
dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how)
dmu_tx_try_assign(dmu_tx_t *tx, txg_how_t txg_how)
{
dmu_tx_hold_t *txh;
spa_t *spa = tx->tx_pool->dp_spa;
@ -961,13 +961,6 @@ dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how)
fudge += txh->txh_fudge;
}
/*
* NB: This check must be after we've held the dnodes, so that
* the dmu_tx_unassign() logic will work properly
*/
if (txg_how >= TXG_INITIAL && txg_how != tx->tx_txg)
return (ERESTART);
/*
* If a snapshot has been taken since we made our estimates,
* assume that we won't be able to free or overwrite anything.
@ -1048,26 +1041,25 @@ dmu_tx_unassign(dmu_tx_t *tx)
*
* (1) TXG_WAIT. If the current open txg is full, waits until there's
* a new one. This should be used when you're not holding locks.
* If will only fail if we're truly out of space (or over quota).
* It will only fail if we're truly out of space (or over quota).
*
* (2) TXG_NOWAIT. If we can't assign into the current open txg without
* blocking, returns immediately with ERESTART. This should be used
* whenever you're holding locks. On an ERESTART error, the caller
* should drop locks, do a dmu_tx_wait(tx), and try again.
*
* (3) A specific txg. Use this if you need to ensure that multiple
* transactions all sync in the same txg. Like TXG_NOWAIT, it
* returns ERESTART if it can't assign you into the requested txg.
*/
int
dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how)
dmu_tx_assign(dmu_tx_t *tx, txg_how_t txg_how)
{
int err;
ASSERT(tx->tx_txg == 0);
ASSERT(txg_how != 0);
ASSERT(txg_how == TXG_WAIT || txg_how == TXG_NOWAIT);
ASSERT(!dsl_pool_sync_context(tx->tx_pool));
/* If we might wait, we must not hold the config lock. */
ASSERT(txg_how != TXG_WAIT || !dsl_pool_config_held(tx->tx_pool));
while ((err = dmu_tx_try_assign(tx, txg_how)) != 0) {
dmu_tx_unassign(tx);
@ -1088,6 +1080,7 @@ dmu_tx_wait(dmu_tx_t *tx)
spa_t *spa = tx->tx_pool->dp_spa;
ASSERT(tx->tx_txg == 0);
ASSERT(!dsl_pool_config_held(tx->tx_pool));
/*
* It's possible that the pool has become active after this thread
@ -1214,6 +1207,14 @@ dmu_tx_get_txg(dmu_tx_t *tx)
return (tx->tx_txg);
}
dsl_pool_t *
dmu_tx_pool(dmu_tx_t *tx)
{
ASSERT(tx->tx_pool != NULL);
return (tx->tx_pool);
}
void
dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *func, void *data)
{

View File

@ -74,7 +74,11 @@ dnode_cons(void *arg, void *unused, int kmflag)
mutex_init(&dn->dn_dbufs_mtx, NULL, MUTEX_DEFAULT, NULL);
cv_init(&dn->dn_notxholds, NULL, CV_DEFAULT, NULL);
refcount_create(&dn->dn_holds);
/*
* Every dbuf has a reference, and dropping a tracked reference is
* O(number of references), so don't track dn_holds.
*/
refcount_create_untracked(&dn->dn_holds);
refcount_create(&dn->dn_tx_holds);
list_link_init(&dn->dn_link);

View File

@ -480,6 +480,7 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx)
dnode_undirty_dbufs(&dn->dn_dirty_records[txgoff]);
dnode_evict_dbufs(dn);
ASSERT3P(list_head(&dn->dn_dbufs), ==, NULL);
ASSERT3P(dn->dn_bonus, ==, NULL);
/*
* XXX - It would be nice to assert this, but we may still

File diff suppressed because it is too large Load Diff

View File

@ -147,28 +147,37 @@ dsl_deleg_can_unallow(char *ddname, nvlist_t *nvp, cred_t *cr)
return (0);
}
static void
dsl_deleg_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
{
dsl_dir_t *dd = arg1;
nvlist_t *nvp = arg2;
objset_t *mos = dd->dd_pool->dp_meta_objset;
nvpair_t *whopair = NULL;
uint64_t zapobj = dd->dd_phys->dd_deleg_zapobj;
typedef struct dsl_deleg_arg {
const char *dda_name;
nvlist_t *dda_nvlist;
} dsl_deleg_arg_t;
static void
dsl_deleg_set_sync(void *arg, dmu_tx_t *tx)
{
dsl_deleg_arg_t *dda = arg;
dsl_dir_t *dd;
dsl_pool_t *dp = dmu_tx_pool(tx);
objset_t *mos = dp->dp_meta_objset;
nvpair_t *whopair = NULL;
uint64_t zapobj;
VERIFY0(dsl_dir_hold(dp, dda->dda_name, FTAG, &dd, NULL));
zapobj = dd->dd_phys->dd_deleg_zapobj;
if (zapobj == 0) {
dmu_buf_will_dirty(dd->dd_dbuf, tx);
zapobj = dd->dd_phys->dd_deleg_zapobj = zap_create(mos,
DMU_OT_DSL_PERMS, DMU_OT_NONE, 0, tx);
}
while (whopair = nvlist_next_nvpair(nvp, whopair)) {
while (whopair = nvlist_next_nvpair(dda->dda_nvlist, whopair)) {
const char *whokey = nvpair_name(whopair);
nvlist_t *perms;
nvpair_t *permpair = NULL;
uint64_t jumpobj;
VERIFY(nvpair_value_nvlist(whopair, &perms) == 0);
perms = fnvpair_value_nvlist(whopair);
if (zap_lookup(mos, zapobj, whokey, 8, 1, &jumpobj) != 0) {
jumpobj = zap_create_link(mos, DMU_OT_DSL_PERMS,
@ -181,27 +190,31 @@ dsl_deleg_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
VERIFY(zap_update(mos, jumpobj,
perm, 8, 1, &n, tx) == 0);
spa_history_log_internal(LOG_DS_PERM_UPDATE,
dd->dd_pool->dp_spa, tx,
"%s %s dataset = %llu", whokey, perm,
dd->dd_phys->dd_head_dataset_obj);
spa_history_log_internal_dd(dd, "permission update", tx,
"%s %s", whokey, perm);
}
}
dsl_dir_rele(dd, FTAG);
}
static void
dsl_deleg_unset_sync(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_deleg_unset_sync(void *arg, dmu_tx_t *tx)
{
dsl_dir_t *dd = arg1;
nvlist_t *nvp = arg2;
objset_t *mos = dd->dd_pool->dp_meta_objset;
dsl_deleg_arg_t *dda = arg;
dsl_dir_t *dd;
dsl_pool_t *dp = dmu_tx_pool(tx);
objset_t *mos = dp->dp_meta_objset;
nvpair_t *whopair = NULL;
uint64_t zapobj = dd->dd_phys->dd_deleg_zapobj;
uint64_t zapobj;
if (zapobj == 0)
VERIFY0(dsl_dir_hold(dp, dda->dda_name, FTAG, &dd, NULL));
zapobj = dd->dd_phys->dd_deleg_zapobj;
if (zapobj == 0) {
dsl_dir_rele(dd, FTAG);
return;
}
while (whopair = nvlist_next_nvpair(nvp, whopair)) {
while (whopair = nvlist_next_nvpair(dda->dda_nvlist, whopair)) {
const char *whokey = nvpair_name(whopair);
nvlist_t *perms;
nvpair_t *permpair = NULL;
@ -213,10 +226,8 @@ dsl_deleg_unset_sync(void *arg1, void *arg2, dmu_tx_t *tx)
(void) zap_remove(mos, zapobj, whokey, tx);
VERIFY(0 == zap_destroy(mos, jumpobj, tx));
}
spa_history_log_internal(LOG_DS_PERM_WHO_REMOVE,
dd->dd_pool->dp_spa, tx,
"%s dataset = %llu", whokey,
dd->dd_phys->dd_head_dataset_obj);
spa_history_log_internal_dd(dd, "permission who remove",
tx, "%s", whokey);
continue;
}
@ -234,41 +245,44 @@ dsl_deleg_unset_sync(void *arg1, void *arg2, dmu_tx_t *tx)
VERIFY(0 == zap_destroy(mos,
jumpobj, tx));
}
spa_history_log_internal(LOG_DS_PERM_REMOVE,
dd->dd_pool->dp_spa, tx,
"%s %s dataset = %llu", whokey, perm,
dd->dd_phys->dd_head_dataset_obj);
spa_history_log_internal_dd(dd, "permission remove", tx,
"%s %s", whokey, perm);
}
}
dsl_dir_rele(dd, FTAG);
}
static int
dsl_deleg_check(void *arg, dmu_tx_t *tx)
{
dsl_deleg_arg_t *dda = arg;
dsl_dir_t *dd;
int error;
if (spa_version(dmu_tx_pool(tx)->dp_spa) <
SPA_VERSION_DELEGATED_PERMS) {
return (ENOTSUP);
}
error = dsl_dir_hold(dmu_tx_pool(tx), dda->dda_name, FTAG, &dd, NULL);
if (error == 0)
dsl_dir_rele(dd, FTAG);
return (error);
}
int
dsl_deleg_set(const char *ddname, nvlist_t *nvp, boolean_t unset)
{
dsl_dir_t *dd;
int error;
nvpair_t *whopair = NULL;
int blocks_modified = 0;
dsl_deleg_arg_t dda;
error = dsl_dir_open(ddname, FTAG, &dd, NULL);
if (error)
return (error);
/* nvp must already have been verified to be valid */
if (spa_version(dmu_objset_spa(dd->dd_pool->dp_meta_objset)) <
SPA_VERSION_DELEGATED_PERMS) {
dsl_dir_close(dd, FTAG);
return (ENOTSUP);
}
dda.dda_name = ddname;
dda.dda_nvlist = nvp;
while (whopair = nvlist_next_nvpair(nvp, whopair))
blocks_modified++;
error = dsl_sync_task_do(dd->dd_pool, NULL,
return (dsl_sync_task(ddname, dsl_deleg_check,
unset ? dsl_deleg_unset_sync : dsl_deleg_set_sync,
dd, nvp, blocks_modified);
dsl_dir_close(dd, FTAG);
return (error);
&dda, fnvlist_num_pairs(nvp)));
}
/*
@ -296,16 +310,21 @@ dsl_deleg_get(const char *ddname, nvlist_t **nvp)
int error;
objset_t *mos;
error = dsl_dir_open(ddname, FTAG, &startdd, NULL);
if (error)
error = dsl_pool_hold(ddname, FTAG, &dp);
if (error != 0)
return (error);
error = dsl_dir_hold(dp, ddname, FTAG, &startdd, NULL);
if (error != 0) {
dsl_pool_rele(dp, FTAG);
return (error);
}
dp = startdd->dd_pool;
mos = dp->dp_meta_objset;
VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
rw_enter(&dp->dp_config_rwlock, RW_READER);
for (dd = startdd; dd != NULL; dd = dd->dd_parent) {
zap_cursor_t basezc;
zap_attribute_t baseza;
@ -313,15 +332,12 @@ dsl_deleg_get(const char *ddname, nvlist_t **nvp)
uint64_t n;
char source[MAXNAMELEN];
if (dd->dd_phys->dd_deleg_zapobj &&
(zap_count(mos, dd->dd_phys->dd_deleg_zapobj,
&n) == 0) && n) {
VERIFY(nvlist_alloc(&sp_nvp,
NV_UNIQUE_NAME, KM_SLEEP) == 0);
} else {
if (dd->dd_phys->dd_deleg_zapobj == 0 ||
zap_count(mos, dd->dd_phys->dd_deleg_zapobj, &n) != 0 ||
n == 0)
continue;
}
sp_nvp = fnvlist_alloc();
for (zap_cursor_init(&basezc, mos,
dd->dd_phys->dd_deleg_zapobj);
zap_cursor_retrieve(&basezc, &baseza) == 0;
@ -333,29 +349,26 @@ dsl_deleg_get(const char *ddname, nvlist_t **nvp)
ASSERT(baseza.za_integer_length == 8);
ASSERT(baseza.za_num_integers == 1);
VERIFY(nvlist_alloc(&perms_nvp,
NV_UNIQUE_NAME, KM_SLEEP) == 0);
perms_nvp = fnvlist_alloc();
for (zap_cursor_init(&zc, mos, baseza.za_first_integer);
zap_cursor_retrieve(&zc, &za) == 0;
zap_cursor_advance(&zc)) {
VERIFY(nvlist_add_boolean(perms_nvp,
za.za_name) == 0);
fnvlist_add_boolean(perms_nvp, za.za_name);
}
zap_cursor_fini(&zc);
VERIFY(nvlist_add_nvlist(sp_nvp, baseza.za_name,
perms_nvp) == 0);
nvlist_free(perms_nvp);
fnvlist_add_nvlist(sp_nvp, baseza.za_name, perms_nvp);
fnvlist_free(perms_nvp);
}
zap_cursor_fini(&basezc);
dsl_dir_name(dd, source);
VERIFY(nvlist_add_nvlist(*nvp, source, sp_nvp) == 0);
fnvlist_add_nvlist(*nvp, source, sp_nvp);
nvlist_free(sp_nvp);
}
rw_exit(&dp->dp_config_rwlock);
dsl_dir_close(startdd, FTAG);
dsl_dir_rele(startdd, FTAG);
dsl_pool_rele(dp, FTAG);
return (0);
}
@ -524,12 +537,10 @@ dsl_load_user_sets(objset_t *mos, uint64_t zapobj, avl_tree_t *avl,
}
/*
* Check if user has requested permission. If descendent is set, must have
* descendent perms.
* Check if user has requested permission.
*/
int
dsl_deleg_access_impl(dsl_dataset_t *ds, boolean_t descendent, const char *perm,
cred_t *cr)
dsl_deleg_access_impl(dsl_dataset_t *ds, const char *perm, cred_t *cr)
{
dsl_dir_t *dd;
dsl_pool_t *dp;
@ -550,7 +561,7 @@ dsl_deleg_access_impl(dsl_dataset_t *ds, boolean_t descendent, const char *perm,
SPA_VERSION_DELEGATED_PERMS)
return (EPERM);
if (dsl_dataset_is_snapshot(ds) || descendent) {
if (dsl_dataset_is_snapshot(ds)) {
/*
* Snapshots are treated as descendents only,
* local permissions do not apply.
@ -563,7 +574,7 @@ dsl_deleg_access_impl(dsl_dataset_t *ds, boolean_t descendent, const char *perm,
avl_create(&permsets, perm_set_compare, sizeof (perm_set_t),
offsetof(perm_set_t, p_node));
rw_enter(&dp->dp_config_rwlock, RW_READER);
ASSERT(dsl_pool_config_held(dp));
for (dd = ds->ds_dir; dd != NULL; dd = dd->dd_parent,
checkflag = ZFS_DELEG_DESCENDENT) {
uint64_t zapobj;
@ -624,7 +635,6 @@ dsl_deleg_access_impl(dsl_dataset_t *ds, boolean_t descendent, const char *perm,
}
error = EPERM;
success:
rw_exit(&dp->dp_config_rwlock);
cookie = NULL;
while ((setnode = avl_destroy_nodes(&permsets, &cookie)) != NULL)
@ -636,15 +646,19 @@ dsl_deleg_access_impl(dsl_dataset_t *ds, boolean_t descendent, const char *perm,
int
dsl_deleg_access(const char *dsname, const char *perm, cred_t *cr)
{
dsl_pool_t *dp;
dsl_dataset_t *ds;
int error;
error = dsl_dataset_hold(dsname, FTAG, &ds);
if (error)
error = dsl_pool_hold(dsname, FTAG, &dp);
if (error != 0)
return (error);
error = dsl_deleg_access_impl(ds, B_FALSE, perm, cr);
dsl_dataset_rele(ds, FTAG);
error = dsl_dataset_hold(dp, dsname, FTAG, &ds);
if (error == 0) {
error = dsl_deleg_access_impl(ds, perm, cr);
dsl_dataset_rele(ds, FTAG);
}
dsl_pool_rele(dp, FTAG);
return (error);
}

View File

@ -0,0 +1,926 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
#include <sys/dsl_userhold.h>
#include <sys/dsl_dataset.h>
#include <sys/dsl_synctask.h>
#include <sys/dmu_tx.h>
#include <sys/dsl_pool.h>
#include <sys/dsl_dir.h>
#include <sys/dmu_traverse.h>
#include <sys/dsl_scan.h>
#include <sys/dmu_objset.h>
#include <sys/zap.h>
#include <sys/zfeature.h>
#include <sys/zfs_ioctl.h>
#include <sys/dsl_deleg.h>
typedef struct dmu_snapshots_destroy_arg {
nvlist_t *dsda_snaps;
nvlist_t *dsda_successful_snaps;
boolean_t dsda_defer;
nvlist_t *dsda_errlist;
} dmu_snapshots_destroy_arg_t;
/*
* ds must be owned.
*/
static int
dsl_destroy_snapshot_check_impl(dsl_dataset_t *ds, boolean_t defer)
{
if (!dsl_dataset_is_snapshot(ds))
return (EINVAL);
if (dsl_dataset_long_held(ds))
return (EBUSY);
/*
* Only allow deferred destroy on pools that support it.
* NOTE: deferred destroy is only supported on snapshots.
*/
if (defer) {
if (spa_version(ds->ds_dir->dd_pool->dp_spa) <
SPA_VERSION_USERREFS)
return (ENOTSUP);
return (0);
}
/*
* If this snapshot has an elevated user reference count,
* we can't destroy it yet.
*/
if (ds->ds_userrefs > 0)
return (EBUSY);
/*
* Can't delete a branch point.
*/
if (ds->ds_phys->ds_num_children > 1)
return (EEXIST);
return (0);
}
static int
dsl_destroy_snapshot_check(void *arg, dmu_tx_t *tx)
{
dmu_snapshots_destroy_arg_t *dsda = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
nvpair_t *pair;
int error = 0;
if (!dmu_tx_is_syncing(tx))
return (0);
for (pair = nvlist_next_nvpair(dsda->dsda_snaps, NULL);
pair != NULL; pair = nvlist_next_nvpair(dsda->dsda_snaps, pair)) {
dsl_dataset_t *ds;
error = dsl_dataset_hold(dp, nvpair_name(pair),
FTAG, &ds);
/*
* If the snapshot does not exist, silently ignore it
* (it's "already destroyed").
*/
if (error == ENOENT)
continue;
if (error == 0) {
error = dsl_destroy_snapshot_check_impl(ds,
dsda->dsda_defer);
dsl_dataset_rele(ds, FTAG);
}
if (error == 0) {
fnvlist_add_boolean(dsda->dsda_successful_snaps,
nvpair_name(pair));
} else {
fnvlist_add_int32(dsda->dsda_errlist,
nvpair_name(pair), error);
}
}
pair = nvlist_next_nvpair(dsda->dsda_errlist, NULL);
if (pair != NULL)
return (fnvpair_value_int32(pair));
return (0);
}
struct process_old_arg {
dsl_dataset_t *ds;
dsl_dataset_t *ds_prev;
boolean_t after_branch_point;
zio_t *pio;
uint64_t used, comp, uncomp;
};
static int
process_old_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
{
struct process_old_arg *poa = arg;
dsl_pool_t *dp = poa->ds->ds_dir->dd_pool;
if (bp->blk_birth <= poa->ds->ds_phys->ds_prev_snap_txg) {
dsl_deadlist_insert(&poa->ds->ds_deadlist, bp, tx);
if (poa->ds_prev && !poa->after_branch_point &&
bp->blk_birth >
poa->ds_prev->ds_phys->ds_prev_snap_txg) {
poa->ds_prev->ds_phys->ds_unique_bytes +=
bp_get_dsize_sync(dp->dp_spa, bp);
}
} else {
poa->used += bp_get_dsize_sync(dp->dp_spa, bp);
poa->comp += BP_GET_PSIZE(bp);
poa->uncomp += BP_GET_UCSIZE(bp);
dsl_free_sync(poa->pio, dp, tx->tx_txg, bp);
}
return (0);
}
static void
process_old_deadlist(dsl_dataset_t *ds, dsl_dataset_t *ds_prev,
dsl_dataset_t *ds_next, boolean_t after_branch_point, dmu_tx_t *tx)
{
struct process_old_arg poa = { 0 };
dsl_pool_t *dp = ds->ds_dir->dd_pool;
objset_t *mos = dp->dp_meta_objset;
uint64_t deadlist_obj;
ASSERT(ds->ds_deadlist.dl_oldfmt);
ASSERT(ds_next->ds_deadlist.dl_oldfmt);
poa.ds = ds;
poa.ds_prev = ds_prev;
poa.after_branch_point = after_branch_point;
poa.pio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
VERIFY0(bpobj_iterate(&ds_next->ds_deadlist.dl_bpobj,
process_old_cb, &poa, tx));
VERIFY0(zio_wait(poa.pio));
ASSERT3U(poa.used, ==, ds->ds_phys->ds_unique_bytes);
/* change snapused */
dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP,
-poa.used, -poa.comp, -poa.uncomp, tx);
/* swap next's deadlist to our deadlist */
dsl_deadlist_close(&ds->ds_deadlist);
dsl_deadlist_close(&ds_next->ds_deadlist);
deadlist_obj = ds->ds_phys->ds_deadlist_obj;
ds->ds_phys->ds_deadlist_obj = ds_next->ds_phys->ds_deadlist_obj;
ds_next->ds_phys->ds_deadlist_obj = deadlist_obj;
dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj);
dsl_deadlist_open(&ds_next->ds_deadlist, mos,
ds_next->ds_phys->ds_deadlist_obj);
}
static void
dsl_dataset_remove_clones_key(dsl_dataset_t *ds, uint64_t mintxg, dmu_tx_t *tx)
{
objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
zap_cursor_t zc;
zap_attribute_t za;
/*
* If it is the old version, dd_clones doesn't exist so we can't
* find the clones, but dsl_deadlist_remove_key() is a no-op so it
* doesn't matter.
*/
if (ds->ds_dir->dd_phys->dd_clones == 0)
return;
for (zap_cursor_init(&zc, mos, ds->ds_dir->dd_phys->dd_clones);
zap_cursor_retrieve(&zc, &za) == 0;
zap_cursor_advance(&zc)) {
dsl_dataset_t *clone;
VERIFY0(dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
za.za_first_integer, FTAG, &clone));
if (clone->ds_dir->dd_origin_txg > mintxg) {
dsl_deadlist_remove_key(&clone->ds_deadlist,
mintxg, tx);
dsl_dataset_remove_clones_key(clone, mintxg, tx);
}
dsl_dataset_rele(clone, FTAG);
}
zap_cursor_fini(&zc);
}
void
dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx)
{
int err;
int after_branch_point = FALSE;
dsl_pool_t *dp = ds->ds_dir->dd_pool;
objset_t *mos = dp->dp_meta_objset;
dsl_dataset_t *ds_prev = NULL;
uint64_t obj;
ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg);
ASSERT(refcount_is_zero(&ds->ds_longholds));
if (defer &&
(ds->ds_userrefs > 0 || ds->ds_phys->ds_num_children > 1)) {
ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS);
dmu_buf_will_dirty(ds->ds_dbuf, tx);
ds->ds_phys->ds_flags |= DS_FLAG_DEFER_DESTROY;
spa_history_log_internal_ds(ds, "defer_destroy", tx, "");
return;
}
ASSERT3U(ds->ds_phys->ds_num_children, <=, 1);
/* We need to log before removing it from the namespace. */
spa_history_log_internal_ds(ds, "destroy", tx, "");
dsl_scan_ds_destroyed(ds, tx);
obj = ds->ds_object;
if (ds->ds_phys->ds_prev_snap_obj != 0) {
ASSERT3P(ds->ds_prev, ==, NULL);
VERIFY0(dsl_dataset_hold_obj(dp,
ds->ds_phys->ds_prev_snap_obj, FTAG, &ds_prev));
after_branch_point =
(ds_prev->ds_phys->ds_next_snap_obj != obj);
dmu_buf_will_dirty(ds_prev->ds_dbuf, tx);
if (after_branch_point &&
ds_prev->ds_phys->ds_next_clones_obj != 0) {
dsl_dataset_remove_from_next_clones(ds_prev, obj, tx);
if (ds->ds_phys->ds_next_snap_obj != 0) {
VERIFY0(zap_add_int(mos,
ds_prev->ds_phys->ds_next_clones_obj,
ds->ds_phys->ds_next_snap_obj, tx));
}
}
if (!after_branch_point) {
ds_prev->ds_phys->ds_next_snap_obj =
ds->ds_phys->ds_next_snap_obj;
}
}
dsl_dataset_t *ds_next;
uint64_t old_unique;
uint64_t used = 0, comp = 0, uncomp = 0;
VERIFY0(dsl_dataset_hold_obj(dp,
ds->ds_phys->ds_next_snap_obj, FTAG, &ds_next));
ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj);
old_unique = ds_next->ds_phys->ds_unique_bytes;
dmu_buf_will_dirty(ds_next->ds_dbuf, tx);
ds_next->ds_phys->ds_prev_snap_obj =
ds->ds_phys->ds_prev_snap_obj;
ds_next->ds_phys->ds_prev_snap_txg =
ds->ds_phys->ds_prev_snap_txg;
ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==,
ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0);
if (ds_next->ds_deadlist.dl_oldfmt) {
process_old_deadlist(ds, ds_prev, ds_next,
after_branch_point, tx);
} else {
/* Adjust prev's unique space. */
if (ds_prev && !after_branch_point) {
dsl_deadlist_space_range(&ds_next->ds_deadlist,
ds_prev->ds_phys->ds_prev_snap_txg,
ds->ds_phys->ds_prev_snap_txg,
&used, &comp, &uncomp);
ds_prev->ds_phys->ds_unique_bytes += used;
}
/* Adjust snapused. */
dsl_deadlist_space_range(&ds_next->ds_deadlist,
ds->ds_phys->ds_prev_snap_txg, UINT64_MAX,
&used, &comp, &uncomp);
dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP,
-used, -comp, -uncomp, tx);
/* Move blocks to be freed to pool's free list. */
dsl_deadlist_move_bpobj(&ds_next->ds_deadlist,
&dp->dp_free_bpobj, ds->ds_phys->ds_prev_snap_txg,
tx);
dsl_dir_diduse_space(tx->tx_pool->dp_free_dir,
DD_USED_HEAD, used, comp, uncomp, tx);
/* Merge our deadlist into next's and free it. */
dsl_deadlist_merge(&ds_next->ds_deadlist,
ds->ds_phys->ds_deadlist_obj, tx);
}
dsl_deadlist_close(&ds->ds_deadlist);
dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx);
dmu_buf_will_dirty(ds->ds_dbuf, tx);
ds->ds_phys->ds_deadlist_obj = 0;
/* Collapse range in clone heads */
dsl_dataset_remove_clones_key(ds,
ds->ds_phys->ds_creation_txg, tx);
if (dsl_dataset_is_snapshot(ds_next)) {
dsl_dataset_t *ds_nextnext;
/*
* Update next's unique to include blocks which
* were previously shared by only this snapshot
* and it. Those blocks will be born after the
* prev snap and before this snap, and will have
* died after the next snap and before the one
* after that (ie. be on the snap after next's
* deadlist).
*/
VERIFY0(dsl_dataset_hold_obj(dp,
ds_next->ds_phys->ds_next_snap_obj, FTAG, &ds_nextnext));
dsl_deadlist_space_range(&ds_nextnext->ds_deadlist,
ds->ds_phys->ds_prev_snap_txg,
ds->ds_phys->ds_creation_txg,
&used, &comp, &uncomp);
ds_next->ds_phys->ds_unique_bytes += used;
dsl_dataset_rele(ds_nextnext, FTAG);
ASSERT3P(ds_next->ds_prev, ==, NULL);
/* Collapse range in this head. */
dsl_dataset_t *hds;
VERIFY0(dsl_dataset_hold_obj(dp,
ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &hds));
dsl_deadlist_remove_key(&hds->ds_deadlist,
ds->ds_phys->ds_creation_txg, tx);
dsl_dataset_rele(hds, FTAG);
} else {
ASSERT3P(ds_next->ds_prev, ==, ds);
dsl_dataset_rele(ds_next->ds_prev, ds_next);
ds_next->ds_prev = NULL;
if (ds_prev) {
VERIFY0(dsl_dataset_hold_obj(dp,
ds->ds_phys->ds_prev_snap_obj,
ds_next, &ds_next->ds_prev));
}
dsl_dataset_recalc_head_uniq(ds_next);
/*
* Reduce the amount of our unconsumed refreservation
* being charged to our parent by the amount of
* new unique data we have gained.
*/
if (old_unique < ds_next->ds_reserved) {
int64_t mrsdelta;
uint64_t new_unique =
ds_next->ds_phys->ds_unique_bytes;
ASSERT(old_unique <= new_unique);
mrsdelta = MIN(new_unique - old_unique,
ds_next->ds_reserved - old_unique);
dsl_dir_diduse_space(ds->ds_dir,
DD_USED_REFRSRV, -mrsdelta, 0, 0, tx);
}
}
dsl_dataset_rele(ds_next, FTAG);
/*
* This must be done after the dsl_traverse(), because it will
* re-open the objset.
*/
if (ds->ds_objset) {
dmu_objset_evict(ds->ds_objset);
ds->ds_objset = NULL;
}
/* remove from snapshot namespace */
dsl_dataset_t *ds_head;
ASSERT(ds->ds_phys->ds_snapnames_zapobj == 0);
VERIFY0(dsl_dataset_hold_obj(dp,
ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ds_head));
VERIFY0(dsl_dataset_get_snapname(ds));
#ifdef ZFS_DEBUG
{
uint64_t val;
err = dsl_dataset_snap_lookup(ds_head,
ds->ds_snapname, &val);
ASSERT0(err);
ASSERT3U(val, ==, obj);
}
#endif
VERIFY0(dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx));
dsl_dataset_rele(ds_head, FTAG);
if (ds_prev != NULL)
dsl_dataset_rele(ds_prev, FTAG);
spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx);
if (ds->ds_phys->ds_next_clones_obj != 0) {
uint64_t count;
ASSERT0(zap_count(mos,
ds->ds_phys->ds_next_clones_obj, &count) && count == 0);
VERIFY0(dmu_object_free(mos,
ds->ds_phys->ds_next_clones_obj, tx));
}
if (ds->ds_phys->ds_props_obj != 0)
VERIFY0(zap_destroy(mos, ds->ds_phys->ds_props_obj, tx));
if (ds->ds_phys->ds_userrefs_obj != 0)
VERIFY0(zap_destroy(mos, ds->ds_phys->ds_userrefs_obj, tx));
dsl_dir_rele(ds->ds_dir, ds);
ds->ds_dir = NULL;
VERIFY0(dmu_object_free(mos, obj, tx));
}
static void
dsl_destroy_snapshot_sync(void *arg, dmu_tx_t *tx)
{
dmu_snapshots_destroy_arg_t *dsda = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
nvpair_t *pair;
for (pair = nvlist_next_nvpair(dsda->dsda_successful_snaps, NULL);
pair != NULL;
pair = nvlist_next_nvpair(dsda->dsda_successful_snaps, pair)) {
dsl_dataset_t *ds;
VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
dsl_destroy_snapshot_sync_impl(ds, dsda->dsda_defer, tx);
dsl_dataset_rele(ds, FTAG);
}
}
/*
* The semantics of this function are described in the comment above
* lzc_destroy_snaps(). To summarize:
*
* The snapshots must all be in the same pool.
*
* Snapshots that don't exist will be silently ignored (considered to be
* "already deleted").
*
* On success, all snaps will be destroyed and this will return 0.
* On failure, no snaps will be destroyed, the errlist will be filled in,
* and this will return an errno.
*/
int
dsl_destroy_snapshots_nvl(nvlist_t *snaps, boolean_t defer,
nvlist_t *errlist)
{
dmu_snapshots_destroy_arg_t dsda;
int error;
nvpair_t *pair;
pair = nvlist_next_nvpair(snaps, NULL);
if (pair == NULL)
return (0);
dsda.dsda_snaps = snaps;
dsda.dsda_successful_snaps = fnvlist_alloc();
dsda.dsda_defer = defer;
dsda.dsda_errlist = errlist;
error = dsl_sync_task(nvpair_name(pair),
dsl_destroy_snapshot_check, dsl_destroy_snapshot_sync,
&dsda, 0);
fnvlist_free(dsda.dsda_successful_snaps);
return (error);
}
int
dsl_destroy_snapshot(const char *name, boolean_t defer)
{
int error;
nvlist_t *nvl = fnvlist_alloc();
nvlist_t *errlist = fnvlist_alloc();
fnvlist_add_boolean(nvl, name);
error = dsl_destroy_snapshots_nvl(nvl, defer, errlist);
fnvlist_free(errlist);
fnvlist_free(nvl);
return (error);
}
struct killarg {
dsl_dataset_t *ds;
dmu_tx_t *tx;
};
/* ARGSUSED */
static int
kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
{
struct killarg *ka = arg;
dmu_tx_t *tx = ka->tx;
if (bp == NULL)
return (0);
if (zb->zb_level == ZB_ZIL_LEVEL) {
ASSERT(zilog != NULL);
/*
* It's a block in the intent log. It has no
* accounting, so just free it.
*/
dsl_free(ka->tx->tx_pool, ka->tx->tx_txg, bp);
} else {
ASSERT(zilog == NULL);
ASSERT3U(bp->blk_birth, >, ka->ds->ds_phys->ds_prev_snap_txg);
(void) dsl_dataset_block_kill(ka->ds, bp, tx, B_FALSE);
}
return (0);
}
static void
old_synchronous_dataset_destroy(dsl_dataset_t *ds, dmu_tx_t *tx)
{
struct killarg ka;
/*
* Free everything that we point to (that's born after
* the previous snapshot, if we are a clone)
*
* NB: this should be very quick, because we already
* freed all the objects in open context.
*/
ka.ds = ds;
ka.tx = tx;
VERIFY0(traverse_dataset(ds,
ds->ds_phys->ds_prev_snap_txg, TRAVERSE_POST,
kill_blkptr, &ka));
ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || ds->ds_phys->ds_unique_bytes == 0);
}
typedef struct dsl_destroy_head_arg {
const char *ddha_name;
} dsl_destroy_head_arg_t;
int
dsl_destroy_head_check_impl(dsl_dataset_t *ds, int expected_holds)
{
int error;
uint64_t count;
objset_t *mos;
if (dsl_dataset_is_snapshot(ds))
return (EINVAL);
if (refcount_count(&ds->ds_longholds) != expected_holds)
return (EBUSY);
mos = ds->ds_dir->dd_pool->dp_meta_objset;
/*
* Can't delete a head dataset if there are snapshots of it.
* (Except if the only snapshots are from the branch we cloned
* from.)
*/
if (ds->ds_prev != NULL &&
ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object)
return (EBUSY);
/*
* Can't delete if there are children of this fs.
*/
error = zap_count(mos,
ds->ds_dir->dd_phys->dd_child_dir_zapobj, &count);
if (error != 0)
return (error);
if (count != 0)
return (EEXIST);
if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev) &&
ds->ds_prev->ds_phys->ds_num_children == 2 &&
ds->ds_prev->ds_userrefs == 0) {
/* We need to remove the origin snapshot as well. */
if (!refcount_is_zero(&ds->ds_prev->ds_longholds))
return (EBUSY);
}
return (0);
}
static int
dsl_destroy_head_check(void *arg, dmu_tx_t *tx)
{
dsl_destroy_head_arg_t *ddha = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dataset_t *ds;
int error;
error = dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds);
if (error != 0)
return (error);
error = dsl_destroy_head_check_impl(ds, 0);
dsl_dataset_rele(ds, FTAG);
return (error);
}
static void
dsl_dir_destroy_sync(uint64_t ddobj, dmu_tx_t *tx)
{
dsl_dir_t *dd;
dsl_pool_t *dp = dmu_tx_pool(tx);
objset_t *mos = dp->dp_meta_objset;
dd_used_t t;
ASSERT(RRW_WRITE_HELD(&dmu_tx_pool(tx)->dp_config_rwlock));
VERIFY0(dsl_dir_hold_obj(dp, ddobj, NULL, FTAG, &dd));
ASSERT0(dd->dd_phys->dd_head_dataset_obj);
/*
* Remove our reservation. The impl() routine avoids setting the
* actual property, which would require the (already destroyed) ds.
*/
dsl_dir_set_reservation_sync_impl(dd, 0, tx);
ASSERT0(dd->dd_phys->dd_used_bytes);
ASSERT0(dd->dd_phys->dd_reserved);
for (t = 0; t < DD_USED_NUM; t++)
ASSERT0(dd->dd_phys->dd_used_breakdown[t]);
VERIFY0(zap_destroy(mos, dd->dd_phys->dd_child_dir_zapobj, tx));
VERIFY0(zap_destroy(mos, dd->dd_phys->dd_props_zapobj, tx));
VERIFY0(dsl_deleg_destroy(mos, dd->dd_phys->dd_deleg_zapobj, tx));
VERIFY0(zap_remove(mos,
dd->dd_parent->dd_phys->dd_child_dir_zapobj, dd->dd_myname, tx));
dsl_dir_rele(dd, FTAG);
VERIFY0(dmu_object_free(mos, ddobj, tx));
}
void
dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx)
{
dsl_pool_t *dp = dmu_tx_pool(tx);
objset_t *mos = dp->dp_meta_objset;
uint64_t obj, ddobj, prevobj = 0;
boolean_t rmorigin;
ASSERT3U(ds->ds_phys->ds_num_children, <=, 1);
ASSERT(ds->ds_prev == NULL ||
ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object);
ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg);
ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
/* We need to log before removing it from the namespace. */
spa_history_log_internal_ds(ds, "destroy", tx, "");
rmorigin = (dsl_dir_is_clone(ds->ds_dir) &&
DS_IS_DEFER_DESTROY(ds->ds_prev) &&
ds->ds_prev->ds_phys->ds_num_children == 2 &&
ds->ds_prev->ds_userrefs == 0);
/* Remove our reservation */
if (ds->ds_reserved != 0) {
dsl_dataset_set_refreservation_sync_impl(ds,
(ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED),
0, tx);
ASSERT0(ds->ds_reserved);
}
dsl_scan_ds_destroyed(ds, tx);
obj = ds->ds_object;
if (ds->ds_phys->ds_prev_snap_obj != 0) {
/* This is a clone */
ASSERT(ds->ds_prev != NULL);
ASSERT3U(ds->ds_prev->ds_phys->ds_next_snap_obj, !=, obj);
ASSERT0(ds->ds_phys->ds_next_snap_obj);
dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
if (ds->ds_prev->ds_phys->ds_next_clones_obj != 0) {
dsl_dataset_remove_from_next_clones(ds->ds_prev,
obj, tx);
}
ASSERT3U(ds->ds_prev->ds_phys->ds_num_children, >, 1);
ds->ds_prev->ds_phys->ds_num_children--;
}
zfeature_info_t *async_destroy =
&spa_feature_table[SPA_FEATURE_ASYNC_DESTROY];
objset_t *os;
/*
* Destroy the deadlist. Unless it's a clone, the
* deadlist should be empty. (If it's a clone, it's
* safe to ignore the deadlist contents.)
*/
dsl_deadlist_close(&ds->ds_deadlist);
dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx);
dmu_buf_will_dirty(ds->ds_dbuf, tx);
ds->ds_phys->ds_deadlist_obj = 0;
VERIFY0(dmu_objset_from_ds(ds, &os));
if (!spa_feature_is_enabled(dp->dp_spa, async_destroy)) {
old_synchronous_dataset_destroy(ds, tx);
} else {
/*
* Move the bptree into the pool's list of trees to
* clean up and update space accounting information.
*/
uint64_t used, comp, uncomp;
zil_destroy_sync(dmu_objset_zil(os), tx);
if (!spa_feature_is_active(dp->dp_spa, async_destroy)) {
spa_feature_incr(dp->dp_spa, async_destroy, tx);
dp->dp_bptree_obj = bptree_alloc(mos, tx);
VERIFY0(zap_add(mos,
DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1,
&dp->dp_bptree_obj, tx));
}
used = ds->ds_dir->dd_phys->dd_used_bytes;
comp = ds->ds_dir->dd_phys->dd_compressed_bytes;
uncomp = ds->ds_dir->dd_phys->dd_uncompressed_bytes;
ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
ds->ds_phys->ds_unique_bytes == used);
bptree_add(mos, dp->dp_bptree_obj,
&ds->ds_phys->ds_bp, ds->ds_phys->ds_prev_snap_txg,
used, comp, uncomp, tx);
dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
-used, -comp, -uncomp, tx);
dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD,
used, comp, uncomp, tx);
}
if (ds->ds_prev != NULL) {
if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
VERIFY0(zap_remove_int(mos,
ds->ds_prev->ds_dir->dd_phys->dd_clones,
ds->ds_object, tx));
}
prevobj = ds->ds_prev->ds_object;
dsl_dataset_rele(ds->ds_prev, ds);
ds->ds_prev = NULL;
}
/*
* This must be done after the dsl_traverse(), because it will
* re-open the objset.
*/
if (ds->ds_objset) {
dmu_objset_evict(ds->ds_objset);
ds->ds_objset = NULL;
}
/* Erase the link in the dir */
dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx);
ds->ds_dir->dd_phys->dd_head_dataset_obj = 0;
ddobj = ds->ds_dir->dd_object;
ASSERT(ds->ds_phys->ds_snapnames_zapobj != 0);
VERIFY0(zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx));
spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx);
ASSERT0(ds->ds_phys->ds_next_clones_obj);
ASSERT0(ds->ds_phys->ds_props_obj);
ASSERT0(ds->ds_phys->ds_userrefs_obj);
dsl_dir_rele(ds->ds_dir, ds);
ds->ds_dir = NULL;
VERIFY0(dmu_object_free(mos, obj, tx));
dsl_dir_destroy_sync(ddobj, tx);
if (rmorigin) {
dsl_dataset_t *prev;
VERIFY0(dsl_dataset_hold_obj(dp, prevobj, FTAG, &prev));
dsl_destroy_snapshot_sync_impl(prev, B_FALSE, tx);
dsl_dataset_rele(prev, FTAG);
}
}
static void
dsl_destroy_head_sync(void *arg, dmu_tx_t *tx)
{
dsl_destroy_head_arg_t *ddha = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dataset_t *ds;
VERIFY0(dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds));
dsl_destroy_head_sync_impl(ds, tx);
dsl_dataset_rele(ds, FTAG);
}
static void
dsl_destroy_head_begin_sync(void *arg, dmu_tx_t *tx)
{
dsl_destroy_head_arg_t *ddha = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dataset_t *ds;
VERIFY0(dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds));
/* Mark it as inconsistent on-disk, in case we crash */
dmu_buf_will_dirty(ds->ds_dbuf, tx);
ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT;
spa_history_log_internal_ds(ds, "destroy begin", tx, "");
dsl_dataset_rele(ds, FTAG);
}
int
dsl_destroy_head(const char *name)
{
dsl_destroy_head_arg_t ddha;
int error;
spa_t *spa;
boolean_t isenabled;
#ifdef _KERNEL
zfs_destroy_unmount_origin(name);
#endif
error = spa_open(name, &spa, FTAG);
if (error != 0)
return (error);
isenabled = spa_feature_is_enabled(spa,
&spa_feature_table[SPA_FEATURE_ASYNC_DESTROY]);
spa_close(spa, FTAG);
ddha.ddha_name = name;
if (!isenabled) {
objset_t *os;
error = dsl_sync_task(name, dsl_destroy_head_check,
dsl_destroy_head_begin_sync, &ddha, 0);
if (error != 0)
return (error);
/*
* Head deletion is processed in one txg on old pools;
* remove the objects from open context so that the txg sync
* is not too long.
*/
error = dmu_objset_own(name, DMU_OST_ANY, B_FALSE, FTAG, &os);
if (error == 0) {
uint64_t prev_snap_txg =
dmu_objset_ds(os)->ds_phys->ds_prev_snap_txg;
for (uint64_t obj = 0; error == 0;
error = dmu_object_next(os, &obj, FALSE,
prev_snap_txg))
(void) dmu_free_object(os, obj);
/* sync out all frees */
txg_wait_synced(dmu_objset_pool(os), 0);
dmu_objset_disown(os, FTAG);
}
}
return (dsl_sync_task(name, dsl_destroy_head_check,
dsl_destroy_head_sync, &ddha, 0));
}
/*
* Note, this function is used as the callback for dmu_objset_find(). We
* always return 0 so that we will continue to find and process
* inconsistent datasets, even if we encounter an error trying to
* process one of them.
*/
/* ARGSUSED */
int
dsl_destroy_inconsistent(const char *dsname, void *arg)
{
objset_t *os;
if (dmu_objset_hold(dsname, FTAG, &os) == 0) {
boolean_t inconsistent = DS_IS_INCONSISTENT(dmu_objset_ds(os));
dmu_objset_rele(os, FTAG);
if (inconsistent)
(void) dsl_destroy_head(dsname);
}
return (0);
}

View File

@ -22,6 +22,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>.
* All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/dmu.h>
@ -45,8 +46,6 @@
#include "zfs_namecheck.h"
static uint64_t dsl_dir_space_towrite(dsl_dir_t *dd);
static void dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx);
/* ARGSUSED */
static void
@ -63,7 +62,7 @@ dsl_dir_evict(dmu_buf_t *db, void *arg)
}
if (dd->dd_parent)
dsl_dir_close(dd->dd_parent, dd);
dsl_dir_rele(dd->dd_parent, dd);
spa_close(dd->dd_pool->dp_spa, dd);
@ -77,18 +76,17 @@ dsl_dir_evict(dmu_buf_t *db, void *arg)
}
int
dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
const char *tail, void *tag, dsl_dir_t **ddp)
{
dmu_buf_t *dbuf;
dsl_dir_t *dd;
int err;
ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
dsl_pool_sync_context(dp));
ASSERT(dsl_pool_config_held(dp));
err = dmu_bonus_hold(dp->dp_meta_objset, ddobj, tag, &dbuf);
if (err)
if (err != 0)
return (err);
dd = dmu_buf_get_user(dbuf);
#ifdef ZFS_DEBUG
@ -115,9 +113,9 @@ dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
dsl_dir_snap_cmtime_update(dd);
if (dd->dd_phys->dd_parent_obj) {
err = dsl_dir_open_obj(dp, dd->dd_phys->dd_parent_obj,
err = dsl_dir_hold_obj(dp, dd->dd_phys->dd_parent_obj,
NULL, dd, &dd->dd_parent);
if (err)
if (err != 0)
goto errout;
if (tail) {
#ifdef ZFS_DEBUG
@ -134,7 +132,7 @@ dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
dd->dd_parent->dd_phys->dd_child_dir_zapobj,
ddobj, 0, dd->dd_myname);
}
if (err)
if (err != 0)
goto errout;
} else {
(void) strcpy(dd->dd_myname, spa_name(dp->dp_spa));
@ -151,7 +149,7 @@ dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
*/
err = dmu_bonus_hold(dp->dp_meta_objset,
dd->dd_phys->dd_origin_obj, FTAG, &origin_bonus);
if (err)
if (err != 0)
goto errout;
origin_phys = origin_bonus->db_data;
dd->dd_origin_txg =
@ -163,7 +161,7 @@ dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
dsl_dir_evict);
if (winner) {
if (dd->dd_parent)
dsl_dir_close(dd->dd_parent, dd);
dsl_dir_rele(dd->dd_parent, dd);
mutex_destroy(&dd->dd_lock);
kmem_free(dd, sizeof (dsl_dir_t));
dd = winner;
@ -190,7 +188,7 @@ dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
errout:
if (dd->dd_parent)
dsl_dir_close(dd->dd_parent, dd);
dsl_dir_rele(dd->dd_parent, dd);
mutex_destroy(&dd->dd_lock);
kmem_free(dd, sizeof (dsl_dir_t));
dmu_buf_rele(dbuf, tag);
@ -198,7 +196,7 @@ dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
}
void
dsl_dir_close(dsl_dir_t *dd, void *tag)
dsl_dir_rele(dsl_dir_t *dd, void *tag)
{
dprintf_dd(dd, "%s\n", "");
spa_close(dd->dd_pool->dp_spa, tag);
@ -255,6 +253,7 @@ static int
getcomponent(const char *path, char *component, const char **nextp)
{
char *p;
if ((path == NULL) || (path[0] == '\0'))
return (ENOENT);
/* This would be a good place to reserve some namespace... */
@ -277,10 +276,10 @@ getcomponent(const char *path, char *component, const char **nextp)
(void) strcpy(component, path);
p = NULL;
} else if (p[0] == '/') {
if (p-path >= MAXNAMELEN)
if (p - path >= MAXNAMELEN)
return (ENAMETOOLONG);
(void) strncpy(component, path, p - path);
component[p-path] = '\0';
component[p - path] = '\0';
p++;
} else if (p[0] == '@') {
/*
@ -289,65 +288,54 @@ getcomponent(const char *path, char *component, const char **nextp)
*/
if (strchr(path, '/'))
return (EINVAL);
if (p-path >= MAXNAMELEN)
if (p - path >= MAXNAMELEN)
return (ENAMETOOLONG);
(void) strncpy(component, path, p - path);
component[p-path] = '\0';
component[p - path] = '\0';
} else {
ASSERT(!"invalid p");
panic("invalid p=%p", (void *)p);
}
*nextp = p;
return (0);
}
/*
* same as dsl_open_dir, ignore the first component of name and use the
* spa instead
* Return the dsl_dir_t, and possibly the last component which couldn't
* be found in *tail. The name must be in the specified dsl_pool_t. This
* thread must hold the dp_config_rwlock for the pool. Returns NULL if the
* path is bogus, or if tail==NULL and we couldn't parse the whole name.
* (*tail)[0] == '@' means that the last component is a snapshot.
*/
int
dsl_dir_open_spa(spa_t *spa, const char *name, void *tag,
dsl_dir_hold(dsl_pool_t *dp, const char *name, void *tag,
dsl_dir_t **ddp, const char **tailp)
{
char buf[MAXNAMELEN];
const char *next, *nextnext = NULL;
const char *spaname, *next, *nextnext = NULL;
int err;
dsl_dir_t *dd;
dsl_pool_t *dp;
uint64_t ddobj;
int openedspa = FALSE;
dprintf("%s\n", name);
err = getcomponent(name, buf, &next);
if (err)
if (err != 0)
return (err);
if (spa == NULL) {
err = spa_open(buf, &spa, FTAG);
if (err) {
dprintf("spa_open(%s) failed\n", buf);
return (err);
}
openedspa = TRUE;
/* XXX this assertion belongs in spa_open */
ASSERT(!dsl_pool_sync_context(spa_get_dsl(spa)));
}
/* Make sure the name is in the specified pool. */
spaname = spa_name(dp->dp_spa);
if (strcmp(buf, spaname) != 0)
return (EINVAL);
dp = spa_get_dsl(spa);
ASSERT(dsl_pool_config_held(dp));
rw_enter(&dp->dp_config_rwlock, RW_READER);
err = dsl_dir_open_obj(dp, dp->dp_root_dir_obj, NULL, tag, &dd);
if (err) {
rw_exit(&dp->dp_config_rwlock);
if (openedspa)
spa_close(spa, FTAG);
err = dsl_dir_hold_obj(dp, dp->dp_root_dir_obj, NULL, tag, &dd);
if (err != 0) {
return (err);
}
while (next != NULL) {
dsl_dir_t *child_ds;
err = getcomponent(next, buf, &nextnext);
if (err)
if (err != 0)
break;
ASSERT(next[0] != '\0');
if (next[0] == '@')
@ -358,25 +346,22 @@ dsl_dir_open_spa(spa_t *spa, const char *name, void *tag,
err = zap_lookup(dp->dp_meta_objset,
dd->dd_phys->dd_child_dir_zapobj,
buf, sizeof (ddobj), 1, &ddobj);
if (err) {
if (err != 0) {
if (err == ENOENT)
err = 0;
break;
}
err = dsl_dir_open_obj(dp, ddobj, buf, tag, &child_ds);
if (err)
err = dsl_dir_hold_obj(dp, ddobj, buf, tag, &child_ds);
if (err != 0)
break;
dsl_dir_close(dd, tag);
dsl_dir_rele(dd, tag);
dd = child_ds;
next = nextnext;
}
rw_exit(&dp->dp_config_rwlock);
if (err) {
dsl_dir_close(dd, tag);
if (openedspa)
spa_close(spa, FTAG);
if (err != 0) {
dsl_dir_rele(dd, tag);
return (err);
}
@ -387,30 +372,16 @@ dsl_dir_open_spa(spa_t *spa, const char *name, void *tag,
if (next != NULL &&
(tailp == NULL || (nextnext && nextnext[0] != '\0'))) {
/* bad path name */
dsl_dir_close(dd, tag);
dsl_dir_rele(dd, tag);
dprintf("next=%p (%s) tail=%p\n", next, next?next:"", tailp);
err = ENOENT;
}
if (tailp)
if (tailp != NULL)
*tailp = next;
if (openedspa)
spa_close(spa, FTAG);
*ddp = dd;
return (err);
}
/*
* Return the dsl_dir_t, and possibly the last component which couldn't
* be found in *tail. Return NULL if the path is bogus, or if
* tail==NULL and we couldn't parse the whole name. (*tail)[0] == '@'
* means that the last component is a snapshot.
*/
int
dsl_dir_open(const char *name, void *tag, dsl_dir_t **ddp, const char **tailp)
{
return (dsl_dir_open_spa(NULL, name, tag, ddp, tailp));
}
uint64_t
dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name,
dmu_tx_t *tx)
@ -448,77 +419,6 @@ dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name,
return (ddobj);
}
/* ARGSUSED */
int
dsl_dir_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx)
{
dsl_dataset_t *ds = arg1;
dsl_dir_t *dd = ds->ds_dir;
dsl_pool_t *dp = dd->dd_pool;
objset_t *mos = dp->dp_meta_objset;
int err;
uint64_t count;
/*
* There should be exactly two holds, both from
* dsl_dataset_destroy: one on the dd directory, and one on its
* head ds. If there are more holds, then a concurrent thread is
* performing a lookup inside this dir while we're trying to destroy
* it. To minimize this possibility, we perform this check only
* in syncing context and fail the operation if we encounter
* additional holds. The dp_config_rwlock ensures that nobody else
* opens it after we check.
*/
if (dmu_tx_is_syncing(tx) && dmu_buf_refcount(dd->dd_dbuf) > 2)
return (EBUSY);
err = zap_count(mos, dd->dd_phys->dd_child_dir_zapobj, &count);
if (err)
return (err);
if (count != 0)
return (EEXIST);
return (0);
}
void
dsl_dir_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
{
dsl_dataset_t *ds = arg1;
dsl_dir_t *dd = ds->ds_dir;
objset_t *mos = dd->dd_pool->dp_meta_objset;
dsl_prop_setarg_t psa;
uint64_t value = 0;
uint64_t obj;
dd_used_t t;
ASSERT(RW_WRITE_HELD(&dd->dd_pool->dp_config_rwlock));
ASSERT(dd->dd_phys->dd_head_dataset_obj == 0);
/* Remove our reservation. */
dsl_prop_setarg_init_uint64(&psa, "reservation",
(ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED),
&value);
psa.psa_effective_value = 0; /* predict default value */
dsl_dir_set_reservation_sync(ds, &psa, tx);
ASSERT0(dd->dd_phys->dd_used_bytes);
ASSERT0(dd->dd_phys->dd_reserved);
for (t = 0; t < DD_USED_NUM; t++)
ASSERT0(dd->dd_phys->dd_used_breakdown[t]);
VERIFY(0 == zap_destroy(mos, dd->dd_phys->dd_child_dir_zapobj, tx));
VERIFY(0 == zap_destroy(mos, dd->dd_phys->dd_props_zapobj, tx));
VERIFY(0 == dsl_deleg_destroy(mos, dd->dd_phys->dd_deleg_zapobj, tx));
VERIFY(0 == zap_remove(mos,
dd->dd_parent->dd_phys->dd_child_dir_zapobj, dd->dd_myname, tx));
obj = dd->dd_object;
dsl_dir_close(dd, tag);
VERIFY(0 == dmu_object_free(mos, obj, tx));
}
boolean_t
dsl_dir_is_clone(dsl_dir_t *dd)
{
@ -556,18 +456,16 @@ dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv)
}
mutex_exit(&dd->dd_lock);
rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
if (dsl_dir_is_clone(dd)) {
dsl_dataset_t *ds;
char buf[MAXNAMELEN];
VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool,
VERIFY0(dsl_dataset_hold_obj(dd->dd_pool,
dd->dd_phys->dd_origin_obj, FTAG, &ds));
dsl_dataset_name(ds, buf);
dsl_dataset_rele(ds, FTAG);
dsl_prop_nvlist_add_string(nv, ZFS_PROP_ORIGIN, buf);
}
rw_exit(&dd->dd_pool->dp_config_rwlock);
}
void
@ -577,7 +475,7 @@ dsl_dir_dirty(dsl_dir_t *dd, dmu_tx_t *tx)
ASSERT(dd->dd_phys);
if (txg_list_add(&dp->dp_dirty_dirs, dd, tx->tx_txg) == 0) {
if (txg_list_add(&dp->dp_dirty_dirs, dd, tx->tx_txg)) {
/* up the hold count until we can be written out */
dmu_buf_add_ref(dd->dd_dbuf, dd);
}
@ -864,7 +762,7 @@ dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsize, uint64_t asize,
FALSE, asize > usize, tr_list, tx, TRUE);
}
if (err)
if (err != 0)
dsl_dir_tempreserve_clear(tr_list, tx);
else
*tr_cookiep = tr_list;
@ -1015,115 +913,123 @@ dsl_dir_transfer_space(dsl_dir_t *dd, int64_t delta,
mutex_exit(&dd->dd_lock);
}
typedef struct dsl_dir_set_qr_arg {
const char *ddsqra_name;
zprop_source_t ddsqra_source;
uint64_t ddsqra_value;
} dsl_dir_set_qr_arg_t;
static int
dsl_dir_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_dir_set_quota_check(void *arg, dmu_tx_t *tx)
{
dsl_dataset_t *ds = arg1;
dsl_dir_t *dd = ds->ds_dir;
dsl_prop_setarg_t *psa = arg2;
int err;
uint64_t towrite;
dsl_dir_set_qr_arg_t *ddsqra = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dataset_t *ds;
int error;
uint64_t towrite, newval;
if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0)
return (err);
error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds);
if (error != 0)
return (error);
if (psa->psa_effective_value == 0)
error = dsl_prop_predict(ds->ds_dir, "quota",
ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval);
if (error != 0) {
dsl_dataset_rele(ds, FTAG);
return (error);
}
if (newval == 0) {
dsl_dataset_rele(ds, FTAG);
return (0);
}
mutex_enter(&dd->dd_lock);
mutex_enter(&ds->ds_dir->dd_lock);
/*
* If we are doing the preliminary check in open context, and
* there are pending changes, then don't fail it, since the
* pending changes could under-estimate the amount of space to be
* freed up.
*/
towrite = dsl_dir_space_towrite(dd);
towrite = dsl_dir_space_towrite(ds->ds_dir);
if ((dmu_tx_is_syncing(tx) || towrite == 0) &&
(psa->psa_effective_value < dd->dd_phys->dd_reserved ||
psa->psa_effective_value < dd->dd_phys->dd_used_bytes + towrite)) {
err = ENOSPC;
(newval < ds->ds_dir->dd_phys->dd_reserved ||
newval < ds->ds_dir->dd_phys->dd_used_bytes + towrite)) {
error = ENOSPC;
}
mutex_exit(&dd->dd_lock);
return (err);
mutex_exit(&ds->ds_dir->dd_lock);
dsl_dataset_rele(ds, FTAG);
return (error);
}
extern dsl_syncfunc_t dsl_prop_set_sync;
static void
dsl_dir_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_dir_set_quota_sync(void *arg, dmu_tx_t *tx)
{
dsl_dataset_t *ds = arg1;
dsl_dir_t *dd = ds->ds_dir;
dsl_prop_setarg_t *psa = arg2;
uint64_t effective_value = psa->psa_effective_value;
dsl_dir_set_qr_arg_t *ddsqra = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dataset_t *ds;
uint64_t newval;
dsl_prop_set_sync(ds, psa, tx);
DSL_PROP_CHECK_PREDICTION(dd, psa);
VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds));
dmu_buf_will_dirty(dd->dd_dbuf, tx);
dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_QUOTA),
ddsqra->ddsqra_source, sizeof (ddsqra->ddsqra_value), 1,
&ddsqra->ddsqra_value, tx);
mutex_enter(&dd->dd_lock);
dd->dd_phys->dd_quota = effective_value;
mutex_exit(&dd->dd_lock);
VERIFY0(dsl_prop_get_int_ds(ds,
zfs_prop_to_name(ZFS_PROP_QUOTA), &newval));
dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx);
mutex_enter(&ds->ds_dir->dd_lock);
ds->ds_dir->dd_phys->dd_quota = newval;
mutex_exit(&ds->ds_dir->dd_lock);
dsl_dataset_rele(ds, FTAG);
}
int
dsl_dir_set_quota(const char *ddname, zprop_source_t source, uint64_t quota)
{
dsl_dir_t *dd;
dsl_dataset_t *ds;
dsl_prop_setarg_t psa;
int err;
dsl_dir_set_qr_arg_t ddsqra;
dsl_prop_setarg_init_uint64(&psa, "quota", source, &quota);
ddsqra.ddsqra_name = ddname;
ddsqra.ddsqra_source = source;
ddsqra.ddsqra_value = quota;
err = dsl_dataset_hold(ddname, FTAG, &ds);
if (err)
return (err);
err = dsl_dir_open(ddname, FTAG, &dd, NULL);
if (err) {
dsl_dataset_rele(ds, FTAG);
return (err);
}
ASSERT(ds->ds_dir == dd);
/*
* If someone removes a file, then tries to set the quota, we want to
* make sure the file freeing takes effect.
*/
txg_wait_open(dd->dd_pool, 0);
err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_quota_check,
dsl_dir_set_quota_sync, ds, &psa, 0);
dsl_dir_close(dd, FTAG);
dsl_dataset_rele(ds, FTAG);
return (err);
return (dsl_sync_task(ddname, dsl_dir_set_quota_check,
dsl_dir_set_quota_sync, &ddsqra, 0));
}
int
dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_dir_set_reservation_check(void *arg, dmu_tx_t *tx)
{
dsl_dataset_t *ds = arg1;
dsl_dir_t *dd = ds->ds_dir;
dsl_prop_setarg_t *psa = arg2;
uint64_t effective_value;
uint64_t used, avail;
int err;
dsl_dir_set_qr_arg_t *ddsqra = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dataset_t *ds;
dsl_dir_t *dd;
uint64_t newval, used, avail;
int error;
if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0)
return (err);
effective_value = psa->psa_effective_value;
error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds);
if (error != 0)
return (error);
dd = ds->ds_dir;
/*
* If we are doing the preliminary check in open context, the
* space estimates may be inaccurate.
*/
if (!dmu_tx_is_syncing(tx))
if (!dmu_tx_is_syncing(tx)) {
dsl_dataset_rele(ds, FTAG);
return (0);
}
error = dsl_prop_predict(ds->ds_dir,
zfs_prop_to_name(ZFS_PROP_RESERVATION),
ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval);
if (error != 0) {
dsl_dataset_rele(ds, FTAG);
return (error);
}
mutex_enter(&dd->dd_lock);
used = dd->dd_phys->dd_used_bytes;
@ -1136,40 +1042,32 @@ dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx)
avail = dsl_pool_adjustedsize(dd->dd_pool, B_FALSE) - used;
}
if (MAX(used, effective_value) > MAX(used, dd->dd_phys->dd_reserved)) {
uint64_t delta = MAX(used, effective_value) -
if (MAX(used, newval) > MAX(used, dd->dd_phys->dd_reserved)) {
uint64_t delta = MAX(used, newval) -
MAX(used, dd->dd_phys->dd_reserved);
if (delta > avail)
return (ENOSPC);
if (dd->dd_phys->dd_quota > 0 &&
effective_value > dd->dd_phys->dd_quota)
return (ENOSPC);
if (delta > avail ||
(dd->dd_phys->dd_quota > 0 &&
newval > dd->dd_phys->dd_quota))
error = ENOSPC;
}
return (0);
dsl_dataset_rele(ds, FTAG);
return (error);
}
static void
dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx)
void
dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, uint64_t value, dmu_tx_t *tx)
{
dsl_dataset_t *ds = arg1;
dsl_dir_t *dd = ds->ds_dir;
dsl_prop_setarg_t *psa = arg2;
uint64_t effective_value = psa->psa_effective_value;
uint64_t used;
int64_t delta;
dsl_prop_set_sync(ds, psa, tx);
DSL_PROP_CHECK_PREDICTION(dd, psa);
dmu_buf_will_dirty(dd->dd_dbuf, tx);
mutex_enter(&dd->dd_lock);
used = dd->dd_phys->dd_used_bytes;
delta = MAX(used, effective_value) -
MAX(used, dd->dd_phys->dd_reserved);
dd->dd_phys->dd_reserved = effective_value;
delta = MAX(used, value) - MAX(used, dd->dd_phys->dd_reserved);
dd->dd_phys->dd_reserved = value;
if (dd->dd_parent != NULL) {
/* Roll up this additional usage into our ancestors */
@ -1179,35 +1077,39 @@ dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx)
mutex_exit(&dd->dd_lock);
}
static void
dsl_dir_set_reservation_sync(void *arg, dmu_tx_t *tx)
{
dsl_dir_set_qr_arg_t *ddsqra = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dataset_t *ds;
uint64_t newval;
VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds));
dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_RESERVATION),
ddsqra->ddsqra_source, sizeof (ddsqra->ddsqra_value), 1,
&ddsqra->ddsqra_value, tx);
VERIFY0(dsl_prop_get_int_ds(ds,
zfs_prop_to_name(ZFS_PROP_RESERVATION), &newval));
dsl_dir_set_reservation_sync_impl(ds->ds_dir, newval, tx);
dsl_dataset_rele(ds, FTAG);
}
int
dsl_dir_set_reservation(const char *ddname, zprop_source_t source,
uint64_t reservation)
{
dsl_dir_t *dd;
dsl_dataset_t *ds;
dsl_prop_setarg_t psa;
int err;
dsl_dir_set_qr_arg_t ddsqra;
dsl_prop_setarg_init_uint64(&psa, "reservation", source, &reservation);
ddsqra.ddsqra_name = ddname;
ddsqra.ddsqra_source = source;
ddsqra.ddsqra_value = reservation;
err = dsl_dataset_hold(ddname, FTAG, &ds);
if (err)
return (err);
err = dsl_dir_open(ddname, FTAG, &dd, NULL);
if (err) {
dsl_dataset_rele(ds, FTAG);
return (err);
}
ASSERT(ds->ds_dir == dd);
err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_reservation_check,
dsl_dir_set_reservation_sync, ds, &psa, 0);
dsl_dir_close(dd, FTAG);
dsl_dataset_rele(ds, FTAG);
return (err);
return (dsl_sync_task(ddname, dsl_dir_set_reservation_check,
dsl_dir_set_reservation_sync, &ddsqra, 0));
}
static dsl_dir_t *
@ -1239,78 +1141,125 @@ would_change(dsl_dir_t *dd, int64_t delta, dsl_dir_t *ancestor)
return (would_change(dd->dd_parent, delta, ancestor));
}
struct renamearg {
dsl_dir_t *newparent;
const char *mynewname;
boolean_t allowmounted;
};
typedef struct dsl_dir_rename_arg {
const char *ddra_oldname;
const char *ddra_newname;
} dsl_dir_rename_arg_t;
/* ARGSUSED */
static int
dsl_valid_rename(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
{
int *deltap = arg;
char namebuf[MAXNAMELEN];
dsl_dataset_name(ds, namebuf);
if (strlen(namebuf) + *deltap >= MAXNAMELEN)
return (ENAMETOOLONG);
return (0);
}
static int
dsl_dir_rename_check(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_dir_rename_check(void *arg, dmu_tx_t *tx)
{
dsl_dir_t *dd = arg1;
struct renamearg *ra = arg2;
dsl_pool_t *dp = dd->dd_pool;
objset_t *mos = dp->dp_meta_objset;
int err;
uint64_t val;
dsl_dir_rename_arg_t *ddra = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dir_t *dd, *newparent;
const char *mynewname;
int error;
int delta = strlen(ddra->ddra_newname) - strlen(ddra->ddra_oldname);
/*
* There should only be one reference, from dmu_objset_rename().
* Fleeting holds are also possible (eg, from "zfs list" getting
* stats), but any that are present in open context will likely
* be gone by syncing context, so only fail from syncing
* context.
* Don't check if we allow renaming of busy (mounted) dataset.
*/
if (!ra->allowmounted && dmu_tx_is_syncing(tx) &&
dmu_buf_refcount(dd->dd_dbuf) > 1) {
return (EBUSY);
/* target dir should exist */
error = dsl_dir_hold(dp, ddra->ddra_oldname, FTAG, &dd, NULL);
if (error != 0)
return (error);
/* new parent should exist */
error = dsl_dir_hold(dp, ddra->ddra_newname, FTAG,
&newparent, &mynewname);
if (error != 0) {
dsl_dir_rele(dd, FTAG);
return (error);
}
/* check for existing name */
err = zap_lookup(mos, ra->newparent->dd_phys->dd_child_dir_zapobj,
ra->mynewname, 8, 1, &val);
if (err == 0)
return (EEXIST);
if (err != ENOENT)
return (err);
/* can't rename to different pool */
if (dd->dd_pool != newparent->dd_pool) {
dsl_dir_rele(newparent, FTAG);
dsl_dir_rele(dd, FTAG);
return (ENXIO);
}
if (ra->newparent != dd->dd_parent) {
/* new name should not already exist */
if (mynewname == NULL) {
dsl_dir_rele(newparent, FTAG);
dsl_dir_rele(dd, FTAG);
return (EEXIST);
}
/* if the name length is growing, validate child name lengths */
if (delta > 0) {
error = dmu_objset_find_dp(dp, dd->dd_object, dsl_valid_rename,
&delta, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
if (error != 0) {
dsl_dir_rele(newparent, FTAG);
dsl_dir_rele(dd, FTAG);
return (error);
}
}
if (newparent != dd->dd_parent) {
/* is there enough space? */
uint64_t myspace =
MAX(dd->dd_phys->dd_used_bytes, dd->dd_phys->dd_reserved);
/* no rename into our descendant */
if (closest_common_ancestor(dd, ra->newparent) == dd)
if (closest_common_ancestor(dd, newparent) == dd) {
dsl_dir_rele(newparent, FTAG);
dsl_dir_rele(dd, FTAG);
return (EINVAL);
}
if (err = dsl_dir_transfer_possible(dd->dd_parent,
ra->newparent, myspace))
return (err);
error = dsl_dir_transfer_possible(dd->dd_parent,
newparent, myspace);
if (error != 0) {
dsl_dir_rele(newparent, FTAG);
dsl_dir_rele(dd, FTAG);
return (error);
}
}
dsl_dir_rele(newparent, FTAG);
dsl_dir_rele(dd, FTAG);
return (0);
}
static void
dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_dir_rename_sync(void *arg, dmu_tx_t *tx)
{
char oldname[MAXPATHLEN], newname[MAXPATHLEN];
dsl_dir_t *dd = arg1;
struct renamearg *ra = arg2;
dsl_pool_t *dp = dd->dd_pool;
dsl_dir_rename_arg_t *ddra = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dir_t *dd, *newparent;
const char *mynewname;
int error;
objset_t *mos = dp->dp_meta_objset;
int err;
ASSERT(ra->allowmounted || dmu_buf_refcount(dd->dd_dbuf) <= 2);
ASSERT(dmu_buf_refcount(dd->dd_dbuf) <= 2);
if (ra->newparent != dd->dd_parent) {
VERIFY0(dsl_dir_hold(dp, ddra->ddra_oldname, FTAG, &dd, NULL));
VERIFY0(dsl_dir_hold(dp, ddra->ddra_newname, FTAG, &newparent,
&mynewname));
/* Log this before we change the name. */
spa_history_log_internal_dd(dd, "rename", tx,
"-> %s", ddra->ddra_newname);
if (newparent != dd->dd_parent) {
dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD,
-dd->dd_phys->dd_used_bytes,
-dd->dd_phys->dd_compressed_bytes,
-dd->dd_phys->dd_uncompressed_bytes, tx);
dsl_dir_diduse_space(ra->newparent, DD_USED_CHILD,
dsl_dir_diduse_space(newparent, DD_USED_CHILD,
dd->dd_phys->dd_used_bytes,
dd->dd_phys->dd_compressed_bytes,
dd->dd_phys->dd_uncompressed_bytes, tx);
@ -1321,7 +1270,7 @@ dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD_RSRV,
-unused_rsrv, 0, 0, tx);
dsl_dir_diduse_space(ra->newparent, DD_USED_CHILD_RSRV,
dsl_dir_diduse_space(newparent, DD_USED_CHILD_RSRV,
unused_rsrv, 0, 0, tx);
}
}
@ -1329,62 +1278,43 @@ dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx)
dmu_buf_will_dirty(dd->dd_dbuf, tx);
/* remove from old parent zapobj */
dsl_dir_name(dd, oldname);
err = zap_remove(mos, dd->dd_parent->dd_phys->dd_child_dir_zapobj,
error = zap_remove(mos, dd->dd_parent->dd_phys->dd_child_dir_zapobj,
dd->dd_myname, tx);
ASSERT0(err);
ASSERT0(error);
(void) strcpy(dd->dd_myname, ra->mynewname);
dsl_dir_close(dd->dd_parent, dd);
dd->dd_phys->dd_parent_obj = ra->newparent->dd_object;
VERIFY(0 == dsl_dir_open_obj(dd->dd_pool,
ra->newparent->dd_object, NULL, dd, &dd->dd_parent));
(void) strcpy(dd->dd_myname, mynewname);
dsl_dir_rele(dd->dd_parent, dd);
dd->dd_phys->dd_parent_obj = newparent->dd_object;
VERIFY0(dsl_dir_hold_obj(dp,
newparent->dd_object, NULL, dd, &dd->dd_parent));
/* add to new parent zapobj */
err = zap_add(mos, ra->newparent->dd_phys->dd_child_dir_zapobj,
dd->dd_myname, 8, 1, &dd->dd_object, tx);
ASSERT0(err);
dsl_dir_name(dd, newname);
VERIFY0(zap_add(mos, newparent->dd_phys->dd_child_dir_zapobj,
dd->dd_myname, 8, 1, &dd->dd_object, tx));
#ifdef __FreeBSD__
#ifdef _KERNEL
zfsvfs_update_fromname(oldname, newname);
zvol_rename_minors(oldname, newname);
zfsvfs_update_fromname(ddra->ddra_oldname, ddra->ddra_newname);
zvol_rename_minors(ddra->ddra_oldname, ddra->ddra_newname);
#endif
#endif
spa_history_log_internal(LOG_DS_RENAME, dd->dd_pool->dp_spa,
tx, "dataset = %llu", dd->dd_phys->dd_head_dataset_obj);
dsl_prop_notify_all(dd);
dsl_dir_rele(newparent, FTAG);
dsl_dir_rele(dd, FTAG);
}
int
dsl_dir_rename(dsl_dir_t *dd, const char *newname, int flags)
dsl_dir_rename(const char *oldname, const char *newname)
{
struct renamearg ra;
int err;
dsl_dir_rename_arg_t ddra;
/* new parent should exist */
err = dsl_dir_open(newname, FTAG, &ra.newparent, &ra.mynewname);
if (err)
return (err);
ddra.ddra_oldname = oldname;
ddra.ddra_newname = newname;
/* can't rename to different pool */
if (dd->dd_pool != ra.newparent->dd_pool) {
err = ENXIO;
goto out;
}
/* new name should not already exist */
if (ra.mynewname == NULL) {
err = EEXIST;
goto out;
}
ra.allowmounted = !!(flags & ZFS_RENAME_ALLOW_MOUNTED);
err = dsl_sync_task_do(dd->dd_pool,
dsl_dir_rename_check, dsl_dir_rename_sync, dd, &ra, 3);
out:
dsl_dir_close(ra.newparent, FTAG);
return (err);
return (dsl_sync_task(oldname,
dsl_dir_rename_check, dsl_dir_rename_sync, &ddra, 3));
}
int

View File

@ -43,6 +43,7 @@
#include <sys/bptree.h>
#include <sys/zfeature.h>
#include <sys/zil_impl.h>
#include <sys/dsl_userhold.h>
int zfs_no_write_throttle = 0;
int zfs_write_limit_shift = 3; /* 1/8th of physical memory */
@ -94,7 +95,7 @@ dsl_pool_open_special_dir(dsl_pool_t *dp, const char *name, dsl_dir_t **ddp)
if (err)
return (err);
return (dsl_dir_open_obj(dp, obj, name, dp, ddp));
return (dsl_dir_hold_obj(dp, obj, name, dp, ddp));
}
static dsl_pool_t *
@ -106,7 +107,7 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg)
dp = kmem_zalloc(sizeof (dsl_pool_t), KM_SLEEP);
dp->dp_spa = spa;
dp->dp_meta_rootbp = *bp;
rw_init(&dp->dp_config_rwlock, NULL, RW_DEFAULT, NULL);
rrw_init(&dp->dp_config_rwlock, B_TRUE);
dp->dp_write_limit = zfs_write_limit_min;
txg_init(dp, txg);
@ -117,7 +118,7 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg)
txg_list_create(&dp->dp_dirty_dirs,
offsetof(dsl_dir_t, dd_dirty_link));
txg_list_create(&dp->dp_sync_tasks,
offsetof(dsl_sync_task_group_t, dstg_node));
offsetof(dsl_sync_task_t, dst_node));
mutex_init(&dp->dp_lock, NULL, MUTEX_DEFAULT, NULL);
@ -151,14 +152,14 @@ dsl_pool_open(dsl_pool_t *dp)
dsl_dataset_t *ds;
uint64_t obj;
rw_enter(&dp->dp_config_rwlock, RW_WRITER);
rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG);
err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1,
&dp->dp_root_dir_obj);
if (err)
goto out;
err = dsl_dir_open_obj(dp, dp->dp_root_dir_obj,
err = dsl_dir_hold_obj(dp, dp->dp_root_dir_obj,
NULL, dp, &dp->dp_root_dir);
if (err)
goto out;
@ -179,7 +180,7 @@ dsl_pool_open(dsl_pool_t *dp)
&dp->dp_origin_snap);
dsl_dataset_rele(ds, FTAG);
}
dsl_dir_close(dd, dp);
dsl_dir_rele(dd, dp);
if (err)
goto out;
}
@ -194,7 +195,7 @@ dsl_pool_open(dsl_pool_t *dp)
DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj);
if (err)
goto out;
VERIFY3U(0, ==, bpobj_open(&dp->dp_free_bpobj,
VERIFY0(bpobj_open(&dp->dp_free_bpobj,
dp->dp_meta_objset, obj));
}
@ -227,7 +228,7 @@ dsl_pool_open(dsl_pool_t *dp)
err = dsl_scan_init(dp, dp->dp_tx.tx_open_txg);
out:
rw_exit(&dp->dp_config_rwlock);
rrw_exit(&dp->dp_config_rwlock, FTAG);
return (err);
}
@ -242,13 +243,13 @@ dsl_pool_close(dsl_pool_t *dp)
* and not a hold, so just drop that here.
*/
if (dp->dp_origin_snap)
dsl_dataset_drop_ref(dp->dp_origin_snap, dp);
dsl_dataset_rele(dp->dp_origin_snap, dp);
if (dp->dp_mos_dir)
dsl_dir_close(dp->dp_mos_dir, dp);
dsl_dir_rele(dp->dp_mos_dir, dp);
if (dp->dp_free_dir)
dsl_dir_close(dp->dp_free_dir, dp);
dsl_dir_rele(dp->dp_free_dir, dp);
if (dp->dp_root_dir)
dsl_dir_close(dp->dp_root_dir, dp);
dsl_dir_rele(dp->dp_root_dir, dp);
bpobj_close(&dp->dp_free_bpobj);
@ -264,7 +265,7 @@ dsl_pool_close(dsl_pool_t *dp)
arc_flush(dp->dp_spa);
txg_fini(dp);
dsl_scan_fini(dp);
rw_destroy(&dp->dp_config_rwlock);
rrw_destroy(&dp->dp_config_rwlock);
mutex_destroy(&dp->dp_lock);
taskq_destroy(dp->dp_vnrele_taskq);
if (dp->dp_blkstats)
@ -282,6 +283,8 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg)
dsl_dataset_t *ds;
uint64_t obj;
rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG);
/* create and open the MOS (meta-objset) */
dp->dp_meta_objset = dmu_objset_create_impl(spa,
NULL, &dp->dp_meta_rootbp, DMU_OST_META, tx);
@ -292,30 +295,30 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg)
ASSERT0(err);
/* Initialize scan structures */
VERIFY3U(0, ==, dsl_scan_init(dp, txg));
VERIFY0(dsl_scan_init(dp, txg));
/* create and open the root dir */
dp->dp_root_dir_obj = dsl_dir_create_sync(dp, NULL, NULL, tx);
VERIFY(0 == dsl_dir_open_obj(dp, dp->dp_root_dir_obj,
VERIFY0(dsl_dir_hold_obj(dp, dp->dp_root_dir_obj,
NULL, dp, &dp->dp_root_dir));
/* create and open the meta-objset dir */
(void) dsl_dir_create_sync(dp, dp->dp_root_dir, MOS_DIR_NAME, tx);
VERIFY(0 == dsl_pool_open_special_dir(dp,
VERIFY0(dsl_pool_open_special_dir(dp,
MOS_DIR_NAME, &dp->dp_mos_dir));
if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
/* create and open the free dir */
(void) dsl_dir_create_sync(dp, dp->dp_root_dir,
FREE_DIR_NAME, tx);
VERIFY(0 == dsl_pool_open_special_dir(dp,
VERIFY0(dsl_pool_open_special_dir(dp,
FREE_DIR_NAME, &dp->dp_free_dir));
/* create and open the free_bplist */
obj = bpobj_alloc(dp->dp_meta_objset, SPA_MAXBLOCKSIZE, tx);
VERIFY(zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj, tx) == 0);
VERIFY3U(0, ==, bpobj_open(&dp->dp_free_bpobj,
VERIFY0(bpobj_open(&dp->dp_free_bpobj,
dp->dp_meta_objset, obj));
}
@ -326,7 +329,7 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg)
obj = dsl_dataset_create_sync_dd(dp->dp_root_dir, NULL, 0, tx);
/* create the root objset */
VERIFY(0 == dsl_dataset_hold_obj(dp, obj, FTAG, &ds));
VERIFY0(dsl_dataset_hold_obj(dp, obj, FTAG, &ds));
os = dmu_objset_create_impl(dp->dp_spa, ds,
dsl_dataset_get_blkptr(ds), DMU_OST_ZFS, tx);
#ifdef _KERNEL
@ -336,6 +339,8 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg)
dmu_tx_commit(tx);
rrw_exit(&dp->dp_config_rwlock, FTAG);
return (dp);
}
@ -358,10 +363,7 @@ static int
deadlist_enqueue_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
{
dsl_deadlist_t *dl = arg;
dsl_pool_t *dp = dmu_objset_pool(dl->dl_os);
rw_enter(&dp->dp_config_rwlock, RW_READER);
dsl_deadlist_insert(dl, bp, tx);
rw_exit(&dp->dp_config_rwlock);
return (0);
}
@ -383,7 +385,7 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
/*
* We need to copy dp_space_towrite() before doing
* dsl_sync_task_group_sync(), because
* dsl_sync_task_sync(), because
* dsl_dataset_snapshot_reserve_space() will increase
* dp_space_towrite but not actually write anything.
*/
@ -497,14 +499,14 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
*/
DTRACE_PROBE(pool_sync__3task);
if (!txg_list_empty(&dp->dp_sync_tasks, txg)) {
dsl_sync_task_group_t *dstg;
dsl_sync_task_t *dst;
/*
* No more sync tasks should have been added while we
* were syncing.
*/
ASSERT(spa_sync_pass(dp->dp_spa) == 1);
while (dstg = txg_list_remove(&dp->dp_sync_tasks, txg))
dsl_sync_task_group_sync(dstg, tx);
while (dst = txg_list_remove(&dp->dp_sync_tasks, txg))
dsl_sync_task_sync(dst, tx);
}
dmu_tx_commit(tx);
@ -679,14 +681,13 @@ dsl_pool_willuse_space(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx)
/* ARGSUSED */
static int
upgrade_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
upgrade_clones_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg)
{
dmu_tx_t *tx = arg;
dsl_dataset_t *ds, *prev = NULL;
int err;
dsl_pool_t *dp = spa_get_dsl(spa);
err = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
err = dsl_dataset_hold_obj(dp, hds->ds_object, FTAG, &ds);
if (err)
return (err);
@ -712,7 +713,7 @@ upgrade_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
* The $ORIGIN can't have any data, or the accounting
* will be wrong.
*/
ASSERT(prev->ds_phys->ds_bp.blk_birth == 0);
ASSERT0(prev->ds_phys->ds_bp.blk_birth);
/* The origin doesn't get attached to itself */
if (ds->ds_object == prev->ds_object) {
@ -732,13 +733,13 @@ upgrade_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
if (ds->ds_phys->ds_next_snap_obj == 0) {
ASSERT(ds->ds_prev == NULL);
VERIFY(0 == dsl_dataset_hold_obj(dp,
VERIFY0(dsl_dataset_hold_obj(dp,
ds->ds_phys->ds_prev_snap_obj, ds, &ds->ds_prev));
}
}
ASSERT(ds->ds_dir->dd_phys->dd_origin_obj == prev->ds_object);
ASSERT(ds->ds_phys->ds_prev_snap_obj == prev->ds_object);
ASSERT3U(ds->ds_dir->dd_phys->dd_origin_obj, ==, prev->ds_object);
ASSERT3U(ds->ds_phys->ds_prev_snap_obj, ==, prev->ds_object);
if (prev->ds_phys->ds_next_clones_obj == 0) {
dmu_buf_will_dirty(prev->ds_dbuf, tx);
@ -746,7 +747,7 @@ upgrade_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
zap_create(dp->dp_meta_objset,
DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx);
}
VERIFY(0 == zap_add_int(dp->dp_meta_objset,
VERIFY0(zap_add_int(dp->dp_meta_objset,
prev->ds_phys->ds_next_clones_obj, ds->ds_object, tx));
dsl_dataset_rele(ds, FTAG);
@ -761,25 +762,21 @@ dsl_pool_upgrade_clones(dsl_pool_t *dp, dmu_tx_t *tx)
ASSERT(dmu_tx_is_syncing(tx));
ASSERT(dp->dp_origin_snap != NULL);
VERIFY3U(0, ==, dmu_objset_find_spa(dp->dp_spa, NULL, upgrade_clones_cb,
VERIFY0(dmu_objset_find_dp(dp, dp->dp_root_dir_obj, upgrade_clones_cb,
tx, DS_FIND_CHILDREN));
}
/* ARGSUSED */
static int
upgrade_dir_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
upgrade_dir_clones_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
{
dmu_tx_t *tx = arg;
dsl_dataset_t *ds;
dsl_pool_t *dp = spa_get_dsl(spa);
objset_t *mos = dp->dp_meta_objset;
VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
if (ds->ds_dir->dd_phys->dd_origin_obj) {
if (ds->ds_dir->dd_phys->dd_origin_obj != 0) {
dsl_dataset_t *origin;
VERIFY3U(0, ==, dsl_dataset_hold_obj(dp,
VERIFY0(dsl_dataset_hold_obj(dp,
ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &origin));
if (origin->ds_dir->dd_phys->dd_clones == 0) {
@ -788,13 +785,11 @@ upgrade_dir_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx);
}
VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset,
origin->ds_dir->dd_phys->dd_clones, dsobj, tx));
VERIFY0(zap_add_int(dp->dp_meta_objset,
origin->ds_dir->dd_phys->dd_clones, ds->ds_object, tx));
dsl_dataset_rele(origin, FTAG);
}
dsl_dataset_rele(ds, FTAG);
return (0);
}
@ -805,7 +800,7 @@ dsl_pool_upgrade_dir_clones(dsl_pool_t *dp, dmu_tx_t *tx)
uint64_t obj;
(void) dsl_dir_create_sync(dp, dp->dp_root_dir, FREE_DIR_NAME, tx);
VERIFY(0 == dsl_pool_open_special_dir(dp,
VERIFY0(dsl_pool_open_special_dir(dp,
FREE_DIR_NAME, &dp->dp_free_dir));
/*
@ -815,12 +810,11 @@ dsl_pool_upgrade_dir_clones(dsl_pool_t *dp, dmu_tx_t *tx)
*/
obj = dmu_object_alloc(dp->dp_meta_objset, DMU_OT_BPOBJ,
SPA_MAXBLOCKSIZE, DMU_OT_BPOBJ_HDR, sizeof (bpobj_phys_t), tx);
VERIFY3U(0, ==, zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
VERIFY0(zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj, tx));
VERIFY3U(0, ==, bpobj_open(&dp->dp_free_bpobj,
dp->dp_meta_objset, obj));
VERIFY0(bpobj_open(&dp->dp_free_bpobj, dp->dp_meta_objset, obj));
VERIFY3U(0, ==, dmu_objset_find_spa(dp->dp_spa, NULL,
VERIFY0(dmu_objset_find_dp(dp, dp->dp_root_dir_obj,
upgrade_dir_clones_cb, tx, DS_FIND_CHILDREN));
}
@ -832,17 +826,16 @@ dsl_pool_create_origin(dsl_pool_t *dp, dmu_tx_t *tx)
ASSERT(dmu_tx_is_syncing(tx));
ASSERT(dp->dp_origin_snap == NULL);
ASSERT(rrw_held(&dp->dp_config_rwlock, RW_WRITER));
/* create the origin dir, ds, & snap-ds */
rw_enter(&dp->dp_config_rwlock, RW_WRITER);
dsobj = dsl_dataset_create_sync(dp->dp_root_dir, ORIGIN_DIR_NAME,
NULL, 0, kcred, tx);
VERIFY(0 == dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
dsl_dataset_snapshot_sync(ds, ORIGIN_DIR_NAME, tx);
VERIFY(0 == dsl_dataset_hold_obj(dp, ds->ds_phys->ds_prev_snap_obj,
VERIFY0(dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
dsl_dataset_snapshot_sync_impl(ds, ORIGIN_DIR_NAME, tx);
VERIFY0(dsl_dataset_hold_obj(dp, ds->ds_phys->ds_prev_snap_obj,
dp, &dp->dp_origin_snap));
dsl_dataset_rele(ds, FTAG);
rw_exit(&dp->dp_config_rwlock);
}
taskq_t *
@ -877,7 +870,7 @@ dsl_pool_clean_tmp_userrefs(dsl_pool_t *dp)
*htag = '\0';
++htag;
dsobj = strtonum(za.za_name, NULL);
(void) dsl_dataset_user_release_tmp(dp, dsobj, htag, B_FALSE);
dsl_dataset_user_release_tmp(dp, dsobj, htag);
}
zap_cursor_fini(&zc);
}
@ -899,7 +892,7 @@ dsl_pool_user_hold_create_obj(dsl_pool_t *dp, dmu_tx_t *tx)
static int
dsl_pool_user_hold_rele_impl(dsl_pool_t *dp, uint64_t dsobj,
const char *tag, uint64_t *now, dmu_tx_t *tx, boolean_t holding)
const char *tag, uint64_t now, dmu_tx_t *tx, boolean_t holding)
{
objset_t *mos = dp->dp_meta_objset;
uint64_t zapobj = dp->dp_tmp_userrefs_obj;
@ -924,7 +917,7 @@ dsl_pool_user_hold_rele_impl(dsl_pool_t *dp, uint64_t dsobj,
name = kmem_asprintf("%llx-%s", (u_longlong_t)dsobj, tag);
if (holding)
error = zap_add(mos, zapobj, name, 8, 1, now, tx);
error = zap_add(mos, zapobj, name, 8, 1, &now, tx);
else
error = zap_remove(mos, zapobj, name, tx);
strfree(name);
@ -937,7 +930,7 @@ dsl_pool_user_hold_rele_impl(dsl_pool_t *dp, uint64_t dsobj,
*/
int
dsl_pool_user_hold(dsl_pool_t *dp, uint64_t dsobj, const char *tag,
uint64_t *now, dmu_tx_t *tx)
uint64_t now, dmu_tx_t *tx)
{
return (dsl_pool_user_hold_rele_impl(dp, dsobj, tag, now, tx, B_TRUE));
}
@ -949,6 +942,109 @@ int
dsl_pool_user_release(dsl_pool_t *dp, uint64_t dsobj, const char *tag,
dmu_tx_t *tx)
{
return (dsl_pool_user_hold_rele_impl(dp, dsobj, tag, NULL,
return (dsl_pool_user_hold_rele_impl(dp, dsobj, tag, 0,
tx, B_FALSE));
}
/*
* DSL Pool Configuration Lock
*
* The dp_config_rwlock protects against changes to DSL state (e.g. dataset
* creation / destruction / rename / property setting). It must be held for
* read to hold a dataset or dsl_dir. I.e. you must call
* dsl_pool_config_enter() or dsl_pool_hold() before calling
* dsl_{dataset,dir}_hold{_obj}. In most circumstances, the dp_config_rwlock
* must be held continuously until all datasets and dsl_dirs are released.
*
* The only exception to this rule is that if a "long hold" is placed on
* a dataset, then the dp_config_rwlock may be dropped while the dataset
* is still held. The long hold will prevent the dataset from being
* destroyed -- the destroy will fail with EBUSY. A long hold can be
* obtained by calling dsl_dataset_long_hold(), or by "owning" a dataset
* (by calling dsl_{dataset,objset}_{try}own{_obj}).
*
* Legitimate long-holders (including owners) should be long-running, cancelable
* tasks that should cause "zfs destroy" to fail. This includes DMU
* consumers (i.e. a ZPL filesystem being mounted or ZVOL being open),
* "zfs send", and "zfs diff". There are several other long-holders whose
* uses are suboptimal (e.g. "zfs promote", and zil_suspend()).
*
* The usual formula for long-holding would be:
* dsl_pool_hold()
* dsl_dataset_hold()
* ... perform checks ...
* dsl_dataset_long_hold()
* dsl_pool_rele()
* ... perform long-running task ...
* dsl_dataset_long_rele()
* dsl_dataset_rele()
*
* Note that when the long hold is released, the dataset is still held but
* the pool is not held. The dataset may change arbitrarily during this time
* (e.g. it could be destroyed). Therefore you shouldn't do anything to the
* dataset except release it.
*
* User-initiated operations (e.g. ioctls, zfs_ioc_*()) are either read-only
* or modifying operations.
*
* Modifying operations should generally use dsl_sync_task(). The synctask
* infrastructure enforces proper locking strategy with respect to the
* dp_config_rwlock. See the comment above dsl_sync_task() for details.
*
* Read-only operations will manually hold the pool, then the dataset, obtain
* information from the dataset, then release the pool and dataset.
* dmu_objset_{hold,rele}() are convenience routines that also do the pool
* hold/rele.
*/
int
dsl_pool_hold(const char *name, void *tag, dsl_pool_t **dp)
{
spa_t *spa;
int error;
error = spa_open(name, &spa, tag);
if (error == 0) {
*dp = spa_get_dsl(spa);
dsl_pool_config_enter(*dp, tag);
}
return (error);
}
void
dsl_pool_rele(dsl_pool_t *dp, void *tag)
{
dsl_pool_config_exit(dp, tag);
spa_close(dp->dp_spa, tag);
}
void
dsl_pool_config_enter(dsl_pool_t *dp, void *tag)
{
/*
* We use a "reentrant" reader-writer lock, but not reentrantly.
*
* The rrwlock can (with the track_all flag) track all reading threads,
* which is very useful for debugging which code path failed to release
* the lock, and for verifying that the *current* thread does hold
* the lock.
*
* (Unlike a rwlock, which knows that N threads hold it for
* read, but not *which* threads, so rw_held(RW_READER) returns TRUE
* if any thread holds it for read, even if this thread doesn't).
*/
ASSERT(!rrw_held(&dp->dp_config_rwlock, RW_READER));
rrw_enter(&dp->dp_config_rwlock, RW_READER, tag);
}
void
dsl_pool_config_exit(dsl_pool_t *dp, void *tag)
{
rrw_exit(&dp->dp_config_rwlock, tag);
}
boolean_t
dsl_pool_config_held(dsl_pool_t *dp)
{
return (RRW_LOCK_HELD(&dp->dp_config_rwlock));
}

View File

@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@ -81,7 +82,7 @@ dsl_prop_get_dd(dsl_dir_t *dd, const char *propname,
char *inheritstr;
char *recvdstr;
ASSERT(RW_LOCK_HELD(&dd->dd_pool->dp_config_rwlock));
ASSERT(dsl_pool_config_held(dd->dd_pool));
if (setpoint)
setpoint[0] = '\0';
@ -96,8 +97,6 @@ dsl_prop_get_dd(dsl_dir_t *dd, const char *propname,
* after this loop.
*/
for (; dd != NULL; dd = dd->dd_parent) {
ASSERT(RW_LOCK_HELD(&dd->dd_pool->dp_config_rwlock));
if (dd != target || snapshot) {
if (!inheritable)
break;
@ -166,7 +165,7 @@ dsl_prop_get_ds(dsl_dataset_t *ds, const char *propname,
boolean_t snapshot;
uint64_t zapobj;
ASSERT(RW_LOCK_HELD(&ds->ds_dir->dd_pool->dp_config_rwlock));
ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool));
inheritable = (prop == ZPROP_INVAL || zfs_prop_inheritable(prop));
snapshot = (ds->ds_phys != NULL && dsl_dataset_is_snapshot(ds));
zapobj = (ds->ds_phys == NULL ? 0 : ds->ds_phys->ds_props_obj);
@ -234,18 +233,12 @@ dsl_prop_register(dsl_dataset_t *ds, const char *propname,
uint64_t value;
dsl_prop_cb_record_t *cbr;
int err;
int need_rwlock;
need_rwlock = !RW_WRITE_HELD(&dp->dp_config_rwlock);
if (need_rwlock)
rw_enter(&dp->dp_config_rwlock, RW_READER);
ASSERT(dsl_pool_config_held(dp));
err = dsl_prop_get_ds(ds, propname, 8, 1, &value, NULL);
if (err != 0) {
if (need_rwlock)
rw_exit(&dp->dp_config_rwlock);
err = dsl_prop_get_int_ds(ds, propname, &value);
if (err != 0)
return (err);
}
cbr = kmem_alloc(sizeof (dsl_prop_cb_record_t), KM_SLEEP);
cbr->cbr_ds = ds;
@ -258,9 +251,6 @@ dsl_prop_register(dsl_dataset_t *ds, const char *propname,
mutex_exit(&dd->dd_lock);
cbr->cbr_func(cbr->cbr_arg, value);
if (need_rwlock)
rw_exit(&dp->dp_config_rwlock);
return (0);
}
@ -268,19 +258,18 @@ int
dsl_prop_get(const char *dsname, const char *propname,
int intsz, int numints, void *buf, char *setpoint)
{
dsl_dataset_t *ds;
int err;
objset_t *os;
int error;
err = dsl_dataset_hold(dsname, FTAG, &ds);
if (err)
return (err);
error = dmu_objset_hold(dsname, FTAG, &os);
if (error != 0)
return (error);
rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
err = dsl_prop_get_ds(ds, propname, intsz, numints, buf, setpoint);
rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
error = dsl_prop_get_ds(dmu_objset_ds(os), propname,
intsz, numints, buf, setpoint);
dsl_dataset_rele(ds, FTAG);
return (err);
dmu_objset_rele(os, FTAG);
return (error);
}
/*
@ -298,17 +287,11 @@ dsl_prop_get_integer(const char *ddname, const char *propname,
return (dsl_prop_get(ddname, propname, 8, 1, valuep, setpoint));
}
void
dsl_prop_setarg_init_uint64(dsl_prop_setarg_t *psa, const char *propname,
zprop_source_t source, uint64_t *value)
int
dsl_prop_get_int_ds(dsl_dataset_t *ds, const char *propname,
uint64_t *valuep)
{
psa->psa_name = propname;
psa->psa_source = source;
psa->psa_intsz = 8;
psa->psa_numints = 1;
psa->psa_value = value;
psa->psa_effective_value = -1ULL;
return (dsl_prop_get_ds(ds, propname, 8, 1, valuep, NULL));
}
/*
@ -322,11 +305,10 @@ dsl_prop_setarg_init_uint64(dsl_prop_setarg_t *psa, const char *propname,
* a property not handled by this function.
*/
int
dsl_prop_predict_sync(dsl_dir_t *dd, dsl_prop_setarg_t *psa)
dsl_prop_predict(dsl_dir_t *dd, const char *propname,
zprop_source_t source, uint64_t value, uint64_t *newvalp)
{
const char *propname = psa->psa_name;
zfs_prop_t prop = zfs_name_to_prop(propname);
zprop_source_t source = psa->psa_source;
objset_t *mos;
uint64_t zapobj;
uint64_t version;
@ -358,36 +340,33 @@ dsl_prop_predict_sync(dsl_dir_t *dd, dsl_prop_setarg_t *psa)
switch (source) {
case ZPROP_SRC_NONE:
/* Revert to the received value, if any. */
err = zap_lookup(mos, zapobj, recvdstr, 8, 1,
&psa->psa_effective_value);
err = zap_lookup(mos, zapobj, recvdstr, 8, 1, newvalp);
if (err == ENOENT)
psa->psa_effective_value = 0;
*newvalp = 0;
break;
case ZPROP_SRC_LOCAL:
psa->psa_effective_value = *(uint64_t *)psa->psa_value;
*newvalp = value;
break;
case ZPROP_SRC_RECEIVED:
/*
* If there's no local setting, then the new received value will
* be the effective value.
*/
err = zap_lookup(mos, zapobj, propname, 8, 1,
&psa->psa_effective_value);
err = zap_lookup(mos, zapobj, propname, 8, 1, newvalp);
if (err == ENOENT)
psa->psa_effective_value = *(uint64_t *)psa->psa_value;
*newvalp = value;
break;
case (ZPROP_SRC_NONE | ZPROP_SRC_RECEIVED):
/*
* We're clearing the received value, so the local setting (if
* it exists) remains the effective value.
*/
err = zap_lookup(mos, zapobj, propname, 8, 1,
&psa->psa_effective_value);
err = zap_lookup(mos, zapobj, propname, 8, 1, newvalp);
if (err == ENOENT)
psa->psa_effective_value = 0;
*newvalp = 0;
break;
default:
cmn_err(CE_PANIC, "unexpected property source: %d", source);
panic("unexpected property source: %d", source);
}
strfree(recvdstr);
@ -398,37 +377,6 @@ dsl_prop_predict_sync(dsl_dir_t *dd, dsl_prop_setarg_t *psa)
return (err);
}
#ifdef ZFS_DEBUG
void
dsl_prop_check_prediction(dsl_dir_t *dd, dsl_prop_setarg_t *psa)
{
zfs_prop_t prop = zfs_name_to_prop(psa->psa_name);
uint64_t intval;
char setpoint[MAXNAMELEN];
uint64_t version = spa_version(dd->dd_pool->dp_spa);
int err;
if (version < SPA_VERSION_RECVD_PROPS) {
switch (prop) {
case ZFS_PROP_QUOTA:
case ZFS_PROP_RESERVATION:
return;
}
}
err = dsl_prop_get_dd(dd, psa->psa_name, 8, 1, &intval,
setpoint, B_FALSE);
if (err == 0 && intval != psa->psa_effective_value) {
cmn_err(CE_PANIC, "%s property, source: %x, "
"predicted effective value: %llu, "
"actual effective value: %llu (setpoint: %s)",
psa->psa_name, psa->psa_source,
(unsigned long long)psa->psa_effective_value,
(unsigned long long)intval, setpoint);
}
}
#endif
/*
* Unregister this callback. Return 0 on success, ENOENT if ddname is
* invalid, ENOMSG if no matching callback registered.
@ -463,25 +411,57 @@ dsl_prop_unregister(dsl_dataset_t *ds, const char *propname,
return (0);
}
/*
* Return the number of callbacks that are registered for this dataset.
*/
int
dsl_prop_numcb(dsl_dataset_t *ds)
boolean_t
dsl_prop_hascb(dsl_dataset_t *ds)
{
dsl_dir_t *dd = ds->ds_dir;
boolean_t rv = B_FALSE;
dsl_prop_cb_record_t *cbr;
mutex_enter(&dd->dd_lock);
for (cbr = list_head(&dd->dd_prop_cbs); cbr;
cbr = list_next(&dd->dd_prop_cbs, cbr)) {
if (cbr->cbr_ds == ds) {
rv = B_TRUE;
break;
}
}
mutex_exit(&dd->dd_lock);
return (rv);
}
/* ARGSUSED */
static int
dsl_prop_notify_all_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
{
dsl_dir_t *dd = ds->ds_dir;
dsl_prop_cb_record_t *cbr;
int num = 0;
mutex_enter(&dd->dd_lock);
for (cbr = list_head(&dd->dd_prop_cbs);
cbr; cbr = list_next(&dd->dd_prop_cbs, cbr)) {
if (cbr->cbr_ds == ds)
num++;
for (cbr = list_head(&dd->dd_prop_cbs); cbr;
cbr = list_next(&dd->dd_prop_cbs, cbr)) {
uint64_t value;
if (dsl_prop_get_ds(cbr->cbr_ds, cbr->cbr_propname,
sizeof (value), 1, &value, NULL) == 0)
cbr->cbr_func(cbr->cbr_arg, value);
}
mutex_exit(&dd->dd_lock);
return (num);
return (0);
}
/*
* Update all property values for ddobj & its descendants. This is used
* when renaming the dir.
*/
void
dsl_prop_notify_all(dsl_dir_t *dd)
{
dsl_pool_t *dp = dd->dd_pool;
ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
(void) dmu_objset_find_dp(dp, dd->dd_object, dsl_prop_notify_all_cb,
NULL, DS_FIND_CHILDREN);
}
static void
@ -495,8 +475,8 @@ dsl_prop_changed_notify(dsl_pool_t *dp, uint64_t ddobj,
zap_attribute_t *za;
int err;
ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
err = dsl_dir_open_obj(dp, ddobj, NULL, FTAG, &dd);
ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
err = dsl_dir_hold_obj(dp, ddobj, NULL, FTAG, &dd);
if (err)
return;
@ -507,7 +487,7 @@ dsl_prop_changed_notify(dsl_pool_t *dp, uint64_t ddobj,
*/
err = zap_contains(mos, dd->dd_phys->dd_props_zapobj, propname);
if (err == 0) {
dsl_dir_close(dd, FTAG);
dsl_dir_rele(dd, FTAG);
return;
}
ASSERT3U(err, ==, ENOENT);
@ -542,26 +522,24 @@ dsl_prop_changed_notify(dsl_pool_t *dp, uint64_t ddobj,
}
kmem_free(za, sizeof (zap_attribute_t));
zap_cursor_fini(&zc);
dsl_dir_close(dd, FTAG);
dsl_dir_rele(dd, FTAG);
}
void
dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname,
zprop_source_t source, int intsz, int numints, const void *value,
dmu_tx_t *tx)
{
dsl_dataset_t *ds = arg1;
dsl_prop_setarg_t *psa = arg2;
objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
uint64_t zapobj, intval, dummy;
int isint;
char valbuf[32];
char *valstr = NULL;
const char *valstr = NULL;
char *inheritstr;
char *recvdstr;
char *tbuf = NULL;
int err;
uint64_t version = spa_version(ds->ds_dir->dd_pool->dp_spa);
const char *propname = psa->psa_name;
zprop_source_t source = psa->psa_source;
isint = (dodefault(propname, 8, 1, &intval) == 0);
@ -611,8 +589,8 @@ dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
*/
err = zap_remove(mos, zapobj, inheritstr, tx);
ASSERT(err == 0 || err == ENOENT);
VERIFY(0 == zap_update(mos, zapobj, propname,
psa->psa_intsz, psa->psa_numints, psa->psa_value, tx));
VERIFY0(zap_update(mos, zapobj, propname,
intsz, numints, value, tx));
break;
case ZPROP_SRC_INHERITED:
/*
@ -623,12 +601,10 @@ dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
err = zap_remove(mos, zapobj, propname, tx);
ASSERT(err == 0 || err == ENOENT);
if (version >= SPA_VERSION_RECVD_PROPS &&
dsl_prop_get_ds(ds, ZPROP_HAS_RECVD, 8, 1, &dummy,
NULL) == 0) {
dsl_prop_get_int_ds(ds, ZPROP_HAS_RECVD, &dummy) == 0) {
dummy = 0;
err = zap_update(mos, zapobj, inheritstr,
8, 1, &dummy, tx);
ASSERT(err == 0);
VERIFY0(zap_update(mos, zapobj, inheritstr,
8, 1, &dummy, tx));
}
break;
case ZPROP_SRC_RECEIVED:
@ -636,7 +612,7 @@ dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
* set propname$recvd -> value
*/
err = zap_update(mos, zapobj, recvdstr,
psa->psa_intsz, psa->psa_numints, psa->psa_value, tx);
intsz, numints, value, tx);
ASSERT(err == 0);
break;
case (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED):
@ -666,7 +642,7 @@ dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
strfree(recvdstr);
if (isint) {
VERIFY(0 == dsl_prop_get_ds(ds, propname, 8, 1, &intval, NULL));
VERIFY0(dsl_prop_get_int_ds(ds, propname, &intval));
if (ds->ds_phys != NULL && dsl_dataset_is_snapshot(ds)) {
dsl_prop_cb_record_t *cbr;
@ -693,7 +669,7 @@ dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
valstr = valbuf;
} else {
if (source == ZPROP_SRC_LOCAL) {
valstr = (char *)psa->psa_value;
valstr = value;
} else {
tbuf = kmem_alloc(ZAP_MAXVALUELEN, KM_SLEEP);
if (dsl_prop_get_ds(ds, propname, 1,
@ -702,146 +678,81 @@ dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
}
}
spa_history_log_internal((source == ZPROP_SRC_NONE ||
source == ZPROP_SRC_INHERITED) ? LOG_DS_INHERIT :
LOG_DS_PROPSET, ds->ds_dir->dd_pool->dp_spa, tx,
"%s=%s dataset = %llu", propname,
(valstr == NULL ? "" : valstr), ds->ds_object);
spa_history_log_internal_ds(ds, (source == ZPROP_SRC_NONE ||
source == ZPROP_SRC_INHERITED) ? "inherit" : "set", tx,
"%s=%s", propname, (valstr == NULL ? "" : valstr));
if (tbuf != NULL)
kmem_free(tbuf, ZAP_MAXVALUELEN);
}
void
dsl_props_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
int
dsl_prop_set_int(const char *dsname, const char *propname,
zprop_source_t source, uint64_t value)
{
dsl_dataset_t *ds = arg1;
dsl_props_arg_t *pa = arg2;
nvlist_t *props = pa->pa_props;
dsl_prop_setarg_t psa;
nvpair_t *elem = NULL;
nvlist_t *nvl = fnvlist_alloc();
int error;
psa.psa_source = pa->pa_source;
while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
nvpair_t *pair = elem;
psa.psa_name = nvpair_name(pair);
if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
/*
* dsl_prop_get_all_impl() returns properties in this
* format.
*/
nvlist_t *attrs;
VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
&pair) == 0);
}
if (nvpair_type(pair) == DATA_TYPE_STRING) {
VERIFY(nvpair_value_string(pair,
(char **)&psa.psa_value) == 0);
psa.psa_intsz = 1;
psa.psa_numints = strlen(psa.psa_value) + 1;
} else {
uint64_t intval;
VERIFY(nvpair_value_uint64(pair, &intval) == 0);
psa.psa_intsz = sizeof (intval);
psa.psa_numints = 1;
psa.psa_value = &intval;
}
dsl_prop_set_sync(ds, &psa, tx);
}
}
void
dsl_dir_prop_set_uint64_sync(dsl_dir_t *dd, const char *name, uint64_t val,
dmu_tx_t *tx)
{
objset_t *mos = dd->dd_pool->dp_meta_objset;
uint64_t zapobj = dd->dd_phys->dd_props_zapobj;
ASSERT(dmu_tx_is_syncing(tx));
VERIFY(0 == zap_update(mos, zapobj, name, sizeof (val), 1, &val, tx));
dsl_prop_changed_notify(dd->dd_pool, dd->dd_object, name, val, TRUE);
spa_history_log_internal(LOG_DS_PROPSET, dd->dd_pool->dp_spa, tx,
"%s=%llu dataset = %llu", name, (u_longlong_t)val,
dd->dd_phys->dd_head_dataset_obj);
fnvlist_add_uint64(nvl, propname, value);
error = dsl_props_set(dsname, source, nvl);
fnvlist_free(nvl);
return (error);
}
int
dsl_prop_set(const char *dsname, const char *propname, zprop_source_t source,
int intsz, int numints, const void *buf)
dsl_prop_set_string(const char *dsname, const char *propname,
zprop_source_t source, const char *value)
{
dsl_dataset_t *ds;
uint64_t version;
int err;
dsl_prop_setarg_t psa;
nvlist_t *nvl = fnvlist_alloc();
int error;
/*
* We must do these checks before we get to the syncfunc, since
* it can't fail.
*/
if (strlen(propname) >= ZAP_MAXNAMELEN)
return (ENAMETOOLONG);
err = dsl_dataset_hold(dsname, FTAG, &ds);
if (err)
return (err);
version = spa_version(ds->ds_dir->dd_pool->dp_spa);
if (intsz * numints >= (version < SPA_VERSION_STMF_PROP ?
ZAP_OLDMAXVALUELEN : ZAP_MAXVALUELEN)) {
dsl_dataset_rele(ds, FTAG);
return (E2BIG);
}
if (dsl_dataset_is_snapshot(ds) &&
version < SPA_VERSION_SNAP_PROPS) {
dsl_dataset_rele(ds, FTAG);
return (ENOTSUP);
}
psa.psa_name = propname;
psa.psa_source = source;
psa.psa_intsz = intsz;
psa.psa_numints = numints;
psa.psa_value = buf;
psa.psa_effective_value = -1ULL;
err = dsl_sync_task_do(ds->ds_dir->dd_pool,
NULL, dsl_prop_set_sync, ds, &psa, 2);
dsl_dataset_rele(ds, FTAG);
return (err);
fnvlist_add_string(nvl, propname, value);
error = dsl_props_set(dsname, source, nvl);
fnvlist_free(nvl);
return (error);
}
int
dsl_props_set(const char *dsname, zprop_source_t source, nvlist_t *props)
dsl_prop_inherit(const char *dsname, const char *propname,
zprop_source_t source)
{
nvlist_t *nvl = fnvlist_alloc();
int error;
fnvlist_add_boolean(nvl, propname);
error = dsl_props_set(dsname, source, nvl);
fnvlist_free(nvl);
return (error);
}
typedef struct dsl_props_set_arg {
const char *dpsa_dsname;
zprop_source_t dpsa_source;
nvlist_t *dpsa_props;
} dsl_props_set_arg_t;
static int
dsl_props_set_check(void *arg, dmu_tx_t *tx)
{
dsl_props_set_arg_t *dpsa = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dataset_t *ds;
uint64_t version;
nvpair_t *elem = NULL;
dsl_props_arg_t pa;
int err;
if (err = dsl_dataset_hold(dsname, FTAG, &ds))
err = dsl_dataset_hold(dp, dpsa->dpsa_dsname, FTAG, &ds);
if (err != 0)
return (err);
/*
* Do these checks before the syncfunc, since it can't fail.
*/
version = spa_version(ds->ds_dir->dd_pool->dp_spa);
while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
while ((elem = nvlist_next_nvpair(dpsa->dpsa_props, elem)) != NULL) {
if (strlen(nvpair_name(elem)) >= ZAP_MAXNAMELEN) {
dsl_dataset_rele(ds, FTAG);
return (ENAMETOOLONG);
}
if (nvpair_type(elem) == DATA_TYPE_STRING) {
char *valstr;
VERIFY(nvpair_value_string(elem, &valstr) == 0);
char *valstr = fnvpair_value_string(elem);
if (strlen(valstr) >= (version <
SPA_VERSION_STMF_PROP ?
ZAP_OLDMAXVALUELEN : ZAP_MAXVALUELEN)) {
@ -851,20 +762,83 @@ dsl_props_set(const char *dsname, zprop_source_t source, nvlist_t *props)
}
}
if (dsl_dataset_is_snapshot(ds) &&
version < SPA_VERSION_SNAP_PROPS) {
if (dsl_dataset_is_snapshot(ds) && version < SPA_VERSION_SNAP_PROPS) {
dsl_dataset_rele(ds, FTAG);
return (ENOTSUP);
}
pa.pa_props = props;
pa.pa_source = source;
err = dsl_sync_task_do(ds->ds_dir->dd_pool,
NULL, dsl_props_set_sync, ds, &pa, 2);
dsl_dataset_rele(ds, FTAG);
return (err);
return (0);
}
void
dsl_props_set_sync_impl(dsl_dataset_t *ds, zprop_source_t source,
nvlist_t *props, dmu_tx_t *tx)
{
nvpair_t *elem = NULL;
while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
nvpair_t *pair = elem;
if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
/*
* dsl_prop_get_all_impl() returns properties in this
* format.
*/
nvlist_t *attrs = fnvpair_value_nvlist(pair);
pair = fnvlist_lookup_nvpair(attrs, ZPROP_VALUE);
}
if (nvpair_type(pair) == DATA_TYPE_STRING) {
const char *value = fnvpair_value_string(pair);
dsl_prop_set_sync_impl(ds, nvpair_name(pair),
source, 1, strlen(value) + 1, value, tx);
} else if (nvpair_type(pair) == DATA_TYPE_UINT64) {
uint64_t intval = fnvpair_value_uint64(pair);
dsl_prop_set_sync_impl(ds, nvpair_name(pair),
source, sizeof (intval), 1, &intval, tx);
} else if (nvpair_type(pair) == DATA_TYPE_BOOLEAN) {
dsl_prop_set_sync_impl(ds, nvpair_name(pair),
source, 0, 0, NULL, tx);
} else {
panic("invalid nvpair type");
}
}
}
static void
dsl_props_set_sync(void *arg, dmu_tx_t *tx)
{
dsl_props_set_arg_t *dpsa = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dataset_t *ds;
VERIFY0(dsl_dataset_hold(dp, dpsa->dpsa_dsname, FTAG, &ds));
dsl_props_set_sync_impl(ds, dpsa->dpsa_source, dpsa->dpsa_props, tx);
dsl_dataset_rele(ds, FTAG);
}
/*
* All-or-nothing; if any prop can't be set, nothing will be modified.
*/
int
dsl_props_set(const char *dsname, zprop_source_t source, nvlist_t *props)
{
dsl_props_set_arg_t dpsa;
int nblks = 0;
dpsa.dpsa_dsname = dsname;
dpsa.dpsa_source = source;
dpsa.dpsa_props = props;
/*
* If the source includes NONE, then we will only be removing entries
* from the ZAP object. In that case don't check for ENOSPC.
*/
if ((source & ZPROP_SRC_NONE) == 0)
nblks = 2 * fnvlist_num_pairs(props);
return (dsl_sync_task(dsname, dsl_props_set_check, dsl_props_set_sync,
&dpsa, nblks));
}
typedef enum dsl_prop_getflags {
@ -1014,7 +988,7 @@ dsl_prop_get_all_ds(dsl_dataset_t *ds, nvlist_t **nvp,
if (dsl_dataset_is_snapshot(ds))
flags |= DSL_PROP_GET_SNAPSHOT;
rw_enter(&dp->dp_config_rwlock, RW_READER);
ASSERT(dsl_pool_config_held(dp));
if (ds->ds_phys->ds_props_obj != 0) {
ASSERT(flags & DSL_PROP_GET_SNAPSHOT);
@ -1039,58 +1013,51 @@ dsl_prop_get_all_ds(dsl_dataset_t *ds, nvlist_t **nvp,
break;
}
out:
rw_exit(&dp->dp_config_rwlock);
return (err);
}
boolean_t
dsl_prop_get_hasrecvd(objset_t *os)
dsl_prop_get_hasrecvd(const char *dsname)
{
dsl_dataset_t *ds = os->os_dsl_dataset;
int rc;
uint64_t dummy;
rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
rc = dsl_prop_get_ds(ds, ZPROP_HAS_RECVD, 8, 1, &dummy, NULL);
rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
ASSERT(rc != 0 || spa_version(os->os_spa) >= SPA_VERSION_RECVD_PROPS);
return (rc == 0);
return (0 ==
dsl_prop_get_integer(dsname, ZPROP_HAS_RECVD, &dummy, NULL));
}
static void
dsl_prop_set_hasrecvd_impl(objset_t *os, zprop_source_t source)
static int
dsl_prop_set_hasrecvd_impl(const char *dsname, zprop_source_t source)
{
dsl_dataset_t *ds = os->os_dsl_dataset;
uint64_t dummy = 0;
dsl_prop_setarg_t psa;
uint64_t version;
spa_t *spa;
int error = 0;
if (spa_version(os->os_spa) < SPA_VERSION_RECVD_PROPS)
return;
VERIFY0(spa_open(dsname, &spa, FTAG));
version = spa_version(spa);
spa_close(spa, FTAG);
dsl_prop_setarg_init_uint64(&psa, ZPROP_HAS_RECVD, source, &dummy);
(void) dsl_sync_task_do(ds->ds_dir->dd_pool, NULL,
dsl_prop_set_sync, ds, &psa, 2);
if (version >= SPA_VERSION_RECVD_PROPS)
error = dsl_prop_set_int(dsname, ZPROP_HAS_RECVD, source, 0);
return (error);
}
/*
* Call after successfully receiving properties to ensure that only the first
* receive on or after SPA_VERSION_RECVD_PROPS blows away local properties.
*/
void
dsl_prop_set_hasrecvd(objset_t *os)
int
dsl_prop_set_hasrecvd(const char *dsname)
{
if (dsl_prop_get_hasrecvd(os)) {
ASSERT(spa_version(os->os_spa) >= SPA_VERSION_RECVD_PROPS);
return;
}
dsl_prop_set_hasrecvd_impl(os, ZPROP_SRC_LOCAL);
int error = 0;
if (!dsl_prop_get_hasrecvd(dsname))
error = dsl_prop_set_hasrecvd_impl(dsname, ZPROP_SRC_LOCAL);
return (error);
}
void
dsl_prop_unset_hasrecvd(objset_t *os)
dsl_prop_unset_hasrecvd(const char *dsname)
{
dsl_prop_set_hasrecvd_impl(os, ZPROP_SRC_NONE);
VERIFY0(dsl_prop_set_hasrecvd_impl(dsname, ZPROP_SRC_NONE));
}
int
@ -1100,16 +1067,25 @@ dsl_prop_get_all(objset_t *os, nvlist_t **nvp)
}
int
dsl_prop_get_received(objset_t *os, nvlist_t **nvp)
dsl_prop_get_received(const char *dsname, nvlist_t **nvp)
{
objset_t *os;
int error;
/*
* Received properties are not distinguishable from local properties
* until the dataset has received properties on or after
* SPA_VERSION_RECVD_PROPS.
*/
dsl_prop_getflags_t flags = (dsl_prop_get_hasrecvd(os) ?
dsl_prop_getflags_t flags = (dsl_prop_get_hasrecvd(dsname) ?
DSL_PROP_GET_RECEIVED : DSL_PROP_GET_LOCAL);
return (dsl_prop_get_all_ds(os->os_dsl_dataset, nvp, flags));
error = dmu_objset_hold(dsname, FTAG, &os);
if (error != 0)
return (error);
error = dsl_prop_get_all_ds(os->os_dsl_dataset, nvp, flags);
dmu_objset_rele(os, FTAG);
return (error);
}
void

View File

@ -55,7 +55,7 @@ typedef int (scan_cb_t)(dsl_pool_t *, const blkptr_t *, const zbookmark_t *);
static scan_cb_t dsl_scan_defrag_cb;
static scan_cb_t dsl_scan_scrub_cb;
static scan_cb_t dsl_scan_remove_cb;
static dsl_syncfunc_t dsl_scan_cancel_sync;
static void dsl_scan_cancel_sync(void *, dmu_tx_t *);
static void dsl_scan_sync_state(dsl_scan_t *, dmu_tx_t *tx);
unsigned int zfs_top_maxinflight = 32; /* maximum I/Os per top-level */
@ -184,9 +184,9 @@ dsl_scan_fini(dsl_pool_t *dp)
/* ARGSUSED */
static int
dsl_scan_setup_check(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_scan_setup_check(void *arg, dmu_tx_t *tx)
{
dsl_scan_t *scn = arg1;
dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan;
if (scn->scn_phys.scn_state == DSS_SCANNING)
return (EBUSY);
@ -194,12 +194,11 @@ dsl_scan_setup_check(void *arg1, void *arg2, dmu_tx_t *tx)
return (0);
}
/* ARGSUSED */
static void
dsl_scan_setup_sync(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_scan_setup_sync(void *arg, dmu_tx_t *tx)
{
dsl_scan_t *scn = arg1;
pool_scan_func_t *funcp = arg2;
dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan;
pool_scan_func_t *funcp = arg;
dmu_object_type_t ot = 0;
dsl_pool_t *dp = scn->scn_dp;
spa_t *spa = dp->dp_spa;
@ -258,7 +257,7 @@ dsl_scan_setup_sync(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_scan_sync_state(scn, tx);
spa_history_log_internal(LOG_POOL_SCAN, spa, tx,
spa_history_log_internal(spa, "scan setup", tx,
"func=%u mintxg=%llu maxtxg=%llu",
*funcp, scn->scn_phys.scn_min_txg, scn->scn_phys.scn_max_txg);
}
@ -307,7 +306,7 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx)
else
scn->scn_phys.scn_state = DSS_CANCELED;
spa_history_log_internal(LOG_POOL_SCAN_DONE, spa, tx,
spa_history_log_internal(spa, "scan done", tx,
"complete=%u", complete);
if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) {
@ -345,9 +344,9 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx)
/* ARGSUSED */
static int
dsl_scan_cancel_check(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_scan_cancel_check(void *arg, dmu_tx_t *tx)
{
dsl_scan_t *scn = arg1;
dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan;
if (scn->scn_phys.scn_state != DSS_SCANNING)
return (ENOENT);
@ -356,9 +355,9 @@ dsl_scan_cancel_check(void *arg1, void *arg2, dmu_tx_t *tx)
/* ARGSUSED */
static void
dsl_scan_cancel_sync(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_scan_cancel_sync(void *arg, dmu_tx_t *tx)
{
dsl_scan_t *scn = arg1;
dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan;
dsl_scan_done(scn, B_FALSE, tx);
dsl_scan_sync_state(scn, tx);
@ -367,12 +366,8 @@ dsl_scan_cancel_sync(void *arg1, void *arg2, dmu_tx_t *tx)
int
dsl_scan_cancel(dsl_pool_t *dp)
{
boolean_t complete = B_FALSE;
int err;
err = dsl_sync_task_do(dp, dsl_scan_cancel_check,
dsl_scan_cancel_sync, dp->dp_scan, &complete, 3);
return (err);
return (dsl_sync_task(spa_name(dp->dp_spa), dsl_scan_cancel_check,
dsl_scan_cancel_sync, NULL, 3));
}
static void dsl_scan_visitbp(blkptr_t *bp,
@ -409,7 +404,7 @@ dsl_scan_ds_maxtxg(dsl_dataset_t *ds)
static void
dsl_scan_sync_state(dsl_scan_t *scn, dmu_tx_t *tx)
{
VERIFY(0 == zap_update(scn->scn_dp->dp_meta_objset,
VERIFY0(zap_update(scn->scn_dp->dp_meta_objset,
DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_SCAN, sizeof (uint64_t), SCAN_PHYS_NUMINTS,
&scn->scn_phys, tx));
@ -981,33 +976,33 @@ struct enqueue_clones_arg {
/* ARGSUSED */
static int
enqueue_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
enqueue_clones_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg)
{
struct enqueue_clones_arg *eca = arg;
dsl_dataset_t *ds;
int err;
dsl_pool_t *dp = spa->spa_dsl_pool;
dsl_scan_t *scn = dp->dp_scan;
err = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
if (hds->ds_dir->dd_phys->dd_origin_obj != eca->originobj)
return (0);
err = dsl_dataset_hold_obj(dp, hds->ds_object, FTAG, &ds);
if (err)
return (err);
if (ds->ds_dir->dd_phys->dd_origin_obj == eca->originobj) {
while (ds->ds_phys->ds_prev_snap_obj != eca->originobj) {
dsl_dataset_t *prev;
err = dsl_dataset_hold_obj(dp,
ds->ds_phys->ds_prev_snap_obj, FTAG, &prev);
while (ds->ds_phys->ds_prev_snap_obj != eca->originobj) {
dsl_dataset_t *prev;
err = dsl_dataset_hold_obj(dp,
ds->ds_phys->ds_prev_snap_obj, FTAG, &prev);
dsl_dataset_rele(ds, FTAG);
if (err)
return (err);
ds = prev;
}
VERIFY(zap_add_int_key(dp->dp_meta_objset,
scn->scn_phys.scn_queue_obj, ds->ds_object,
ds->ds_phys->ds_prev_snap_txg, eca->tx) == 0);
dsl_dataset_rele(ds, FTAG);
if (err)
return (err);
ds = prev;
}
VERIFY(zap_add_int_key(dp->dp_meta_objset,
scn->scn_phys.scn_queue_obj, ds->ds_object,
ds->ds_phys->ds_prev_snap_txg, eca->tx) == 0);
dsl_dataset_rele(ds, FTAG);
return (0);
}
@ -1096,17 +1091,17 @@ dsl_scan_visitds(dsl_scan_t *scn, uint64_t dsobj, dmu_tx_t *tx)
}
if (usenext) {
VERIFY(zap_join_key(dp->dp_meta_objset,
VERIFY0(zap_join_key(dp->dp_meta_objset,
ds->ds_phys->ds_next_clones_obj,
scn->scn_phys.scn_queue_obj,
ds->ds_phys->ds_creation_txg, tx) == 0);
ds->ds_phys->ds_creation_txg, tx));
} else {
struct enqueue_clones_arg eca;
eca.tx = tx;
eca.originobj = ds->ds_object;
(void) dmu_objset_find_spa(ds->ds_dir->dd_pool->dp_spa,
NULL, enqueue_clones_cb, &eca, DS_FIND_CHILDREN);
VERIFY0(dmu_objset_find_dp(dp, dp->dp_root_dir_obj,
enqueue_clones_cb, &eca, DS_FIND_CHILDREN));
}
}
@ -1116,15 +1111,14 @@ dsl_scan_visitds(dsl_scan_t *scn, uint64_t dsobj, dmu_tx_t *tx)
/* ARGSUSED */
static int
enqueue_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
enqueue_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg)
{
dmu_tx_t *tx = arg;
dsl_dataset_t *ds;
int err;
dsl_pool_t *dp = spa->spa_dsl_pool;
dsl_scan_t *scn = dp->dp_scan;
err = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
err = dsl_dataset_hold_obj(dp, hds->ds_object, FTAG, &ds);
if (err)
return (err);
@ -1279,8 +1273,8 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx)
return;
if (spa_version(dp->dp_spa) < SPA_VERSION_DSL_SCRUB) {
VERIFY(0 == dmu_objset_find_spa(dp->dp_spa,
NULL, enqueue_cb, tx, DS_FIND_CHILDREN));
VERIFY0(dmu_objset_find_dp(dp, dp->dp_root_dir_obj,
enqueue_cb, tx, DS_FIND_CHILDREN));
} else {
dsl_scan_visitds(scn,
dp->dp_origin_snap->ds_object, tx);
@ -1415,7 +1409,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
func = POOL_SCAN_RESILVER;
zfs_dbgmsg("restarting scan func=%u txg=%llu",
func, tx->tx_txg);
dsl_scan_setup_sync(scn, &func, tx);
dsl_scan_setup_sync(&func, tx);
}
if (!dsl_scan_active(scn) ||
@ -1449,21 +1443,21 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
err = bptree_iterate(dp->dp_meta_objset,
dp->dp_bptree_obj, B_TRUE, dsl_scan_free_block_cb,
scn, tx);
VERIFY3U(0, ==, zio_wait(scn->scn_zio_root));
if (err != 0)
return;
VERIFY0(zio_wait(scn->scn_zio_root));
/* disable async destroy feature */
spa_feature_decr(spa,
&spa_feature_table[SPA_FEATURE_ASYNC_DESTROY], tx);
ASSERT(!spa_feature_is_active(spa,
&spa_feature_table[SPA_FEATURE_ASYNC_DESTROY]));
VERIFY3U(0, ==, zap_remove(dp->dp_meta_objset,
DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_BPTREE_OBJ, tx));
VERIFY3U(0, ==, bptree_free(dp->dp_meta_objset,
dp->dp_bptree_obj, tx));
dp->dp_bptree_obj = 0;
if (err == 0) {
zfeature_info_t *feat = &spa_feature_table
[SPA_FEATURE_ASYNC_DESTROY];
/* finished; deactivate async destroy feature */
spa_feature_decr(spa, feat, tx);
ASSERT(!spa_feature_is_active(spa, feat));
VERIFY0(zap_remove(dp->dp_meta_objset,
DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_BPTREE_OBJ, tx));
VERIFY0(bptree_free(dp->dp_meta_objset,
dp->dp_bptree_obj, tx));
dp->dp_bptree_obj = 0;
}
}
if (scn->scn_visited_this_txg) {
zfs_dbgmsg("freed %llu blocks in %llums from "
@ -1510,7 +1504,9 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
scn->scn_zio_root = zio_root(dp->dp_spa, NULL,
NULL, ZIO_FLAG_CANFAIL);
dsl_pool_config_enter(dp, FTAG);
dsl_scan_visit(scn, tx);
dsl_pool_config_exit(dp, FTAG);
(void) zio_wait(scn->scn_zio_root);
scn->scn_zio_root = NULL;
@ -1746,6 +1742,6 @@ dsl_scan(dsl_pool_t *dp, pool_scan_func_t func)
spa->spa_scrub_reopen = B_FALSE;
(void) spa_vdev_state_exit(spa, NULL, 0);
return (dsl_sync_task_do(dp, dsl_scan_setup_check,
dsl_scan_setup_sync, dp->dp_scan, &func, 0));
return (dsl_sync_task(spa_name(spa), dsl_scan_setup_check,
dsl_scan_setup_sync, &func, 0));
}

View File

@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/dmu.h>
@ -33,135 +34,115 @@
/* ARGSUSED */
static int
dsl_null_checkfunc(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_null_checkfunc(void *arg, dmu_tx_t *tx)
{
return (0);
}
dsl_sync_task_group_t *
dsl_sync_task_group_create(dsl_pool_t *dp)
{
dsl_sync_task_group_t *dstg;
dstg = kmem_zalloc(sizeof (dsl_sync_task_group_t), KM_SLEEP);
list_create(&dstg->dstg_tasks, sizeof (dsl_sync_task_t),
offsetof(dsl_sync_task_t, dst_node));
dstg->dstg_pool = dp;
return (dstg);
}
void
dsl_sync_task_create(dsl_sync_task_group_t *dstg,
dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
void *arg1, void *arg2, int blocks_modified)
{
dsl_sync_task_t *dst;
if (checkfunc == NULL)
checkfunc = dsl_null_checkfunc;
dst = kmem_zalloc(sizeof (dsl_sync_task_t), KM_SLEEP);
dst->dst_checkfunc = checkfunc;
dst->dst_syncfunc = syncfunc;
dst->dst_arg1 = arg1;
dst->dst_arg2 = arg2;
list_insert_tail(&dstg->dstg_tasks, dst);
dstg->dstg_space += blocks_modified << DST_AVG_BLKSHIFT;
}
/*
* Called from open context to perform a callback in syncing context. Waits
* for the operation to complete.
*
* The checkfunc will be called from open context as a preliminary check
* which can quickly fail. If it succeeds, it will be called again from
* syncing context. The checkfunc should generally be designed to work
* properly in either context, but if necessary it can check
* dmu_tx_is_syncing(tx).
*
* The synctask infrastructure enforces proper locking strategy with respect
* to the dp_config_rwlock -- the lock will always be held when the callbacks
* are called. It will be held for read during the open-context (preliminary)
* call to the checkfunc, and then held for write from syncing context during
* the calls to the check and sync funcs.
*
* A dataset or pool name can be passed as the first argument. Typically,
* the check func will hold, check the return value of the hold, and then
* release the dataset. The sync func will VERIFYO(hold()) the dataset.
* This is safe because no changes can be made between the check and sync funcs,
* and the sync func will only be called if the check func successfully opened
* the dataset.
*/
int
dsl_sync_task_group_wait(dsl_sync_task_group_t *dstg)
dsl_sync_task(const char *pool, dsl_checkfunc_t *checkfunc,
dsl_syncfunc_t *syncfunc, void *arg, int blocks_modified)
{
spa_t *spa;
dmu_tx_t *tx;
uint64_t txg;
dsl_sync_task_t *dst;
int err;
dsl_sync_task_t dst = { 0 };
dsl_pool_t *dp;
err = spa_open(pool, &spa, FTAG);
if (err != 0)
return (err);
dp = spa_get_dsl(spa);
top:
tx = dmu_tx_create_dd(dstg->dstg_pool->dp_mos_dir);
VERIFY(0 == dmu_tx_assign(tx, TXG_WAIT));
tx = dmu_tx_create_dd(dp->dp_mos_dir);
VERIFY0(dmu_tx_assign(tx, TXG_WAIT));
txg = dmu_tx_get_txg(tx);
dst.dst_pool = dp;
dst.dst_txg = dmu_tx_get_txg(tx);
dst.dst_space = blocks_modified << DST_AVG_BLKSHIFT;
dst.dst_checkfunc = checkfunc != NULL ? checkfunc : dsl_null_checkfunc;
dst.dst_syncfunc = syncfunc;
dst.dst_arg = arg;
dst.dst_error = 0;
dst.dst_nowaiter = B_FALSE;
/* Do a preliminary error check. */
dstg->dstg_err = 0;
rw_enter(&dstg->dstg_pool->dp_config_rwlock, RW_READER);
for (dst = list_head(&dstg->dstg_tasks); dst;
dst = list_next(&dstg->dstg_tasks, dst)) {
#ifdef ZFS_DEBUG
/*
* Only check half the time, otherwise, the sync-context
* check will almost never fail.
*/
if (spa_get_random(2) == 0)
continue;
#endif
dst->dst_err =
dst->dst_checkfunc(dst->dst_arg1, dst->dst_arg2, tx);
if (dst->dst_err)
dstg->dstg_err = dst->dst_err;
}
rw_exit(&dstg->dstg_pool->dp_config_rwlock);
dsl_pool_config_enter(dp, FTAG);
err = dst.dst_checkfunc(arg, tx);
dsl_pool_config_exit(dp, FTAG);
if (dstg->dstg_err) {
if (err != 0) {
dmu_tx_commit(tx);
return (dstg->dstg_err);
spa_close(spa, FTAG);
return (err);
}
/*
* We don't generally have many sync tasks, so pay the price of
* add_tail to get the tasks executed in the right order.
*/
VERIFY(0 == txg_list_add_tail(&dstg->dstg_pool->dp_sync_tasks,
dstg, txg));
VERIFY(txg_list_add_tail(&dp->dp_sync_tasks, &dst, dst.dst_txg));
dmu_tx_commit(tx);
txg_wait_synced(dstg->dstg_pool, txg);
txg_wait_synced(dp, dst.dst_txg);
if (dstg->dstg_err == EAGAIN) {
txg_wait_synced(dstg->dstg_pool, txg + TXG_DEFER_SIZE);
if (dst.dst_error == EAGAIN) {
txg_wait_synced(dp, dst.dst_txg + TXG_DEFER_SIZE);
goto top;
}
return (dstg->dstg_err);
spa_close(spa, FTAG);
return (dst.dst_error);
}
void
dsl_sync_task_group_nowait(dsl_sync_task_group_t *dstg, dmu_tx_t *tx)
dsl_sync_task_nowait(dsl_pool_t *dp, dsl_syncfunc_t *syncfunc, void *arg,
int blocks_modified, dmu_tx_t *tx)
{
uint64_t txg;
dsl_sync_task_t *dst = kmem_zalloc(sizeof (*dst), KM_SLEEP);
dstg->dstg_nowaiter = B_TRUE;
txg = dmu_tx_get_txg(tx);
/*
* We don't generally have many sync tasks, so pay the price of
* add_tail to get the tasks executed in the right order.
*/
VERIFY(0 == txg_list_add_tail(&dstg->dstg_pool->dp_sync_tasks,
dstg, txg));
dst->dst_pool = dp;
dst->dst_txg = dmu_tx_get_txg(tx);
dst->dst_space = blocks_modified << DST_AVG_BLKSHIFT;
dst->dst_checkfunc = dsl_null_checkfunc;
dst->dst_syncfunc = syncfunc;
dst->dst_arg = arg;
dst->dst_error = 0;
dst->dst_nowaiter = B_TRUE;
VERIFY(txg_list_add_tail(&dp->dp_sync_tasks, dst, dst->dst_txg));
}
/*
* Called in syncing context to execute the synctask.
*/
void
dsl_sync_task_group_destroy(dsl_sync_task_group_t *dstg)
dsl_sync_task_sync(dsl_sync_task_t *dst, dmu_tx_t *tx)
{
dsl_sync_task_t *dst;
while (dst = list_head(&dstg->dstg_tasks)) {
list_remove(&dstg->dstg_tasks, dst);
kmem_free(dst, sizeof (dsl_sync_task_t));
}
kmem_free(dstg, sizeof (dsl_sync_task_group_t));
}
void
dsl_sync_task_group_sync(dsl_sync_task_group_t *dstg, dmu_tx_t *tx)
{
dsl_sync_task_t *dst;
dsl_pool_t *dp = dstg->dstg_pool;
dsl_pool_t *dp = dst->dst_pool;
uint64_t quota, used;
ASSERT0(dstg->dstg_err);
ASSERT0(dst->dst_error);
/*
* Check for sufficient space. We just check against what's
@ -173,63 +154,21 @@ dsl_sync_task_group_sync(dsl_sync_task_group_t *dstg, dmu_tx_t *tx)
metaslab_class_get_deferred(spa_normal_class(dp->dp_spa));
used = dp->dp_root_dir->dd_phys->dd_used_bytes;
/* MOS space is triple-dittoed, so we multiply by 3. */
if (dstg->dstg_space > 0 && used + dstg->dstg_space * 3 > quota) {
dstg->dstg_err = ENOSPC;
if (dst->dst_space > 0 && used + dst->dst_space * 3 > quota) {
dst->dst_error = ENOSPC;
if (dst->dst_nowaiter)
kmem_free(dst, sizeof (*dst));
return;
}
/*
* Check for errors by calling checkfuncs.
* Check for errors by calling checkfunc.
*/
rw_enter(&dp->dp_config_rwlock, RW_WRITER);
for (dst = list_head(&dstg->dstg_tasks); dst;
dst = list_next(&dstg->dstg_tasks, dst)) {
dst->dst_err =
dst->dst_checkfunc(dst->dst_arg1, dst->dst_arg2, tx);
if (dst->dst_err)
dstg->dstg_err = dst->dst_err;
}
if (dstg->dstg_err == 0) {
/*
* Execute sync tasks.
*/
for (dst = list_head(&dstg->dstg_tasks); dst;
dst = list_next(&dstg->dstg_tasks, dst)) {
dst->dst_syncfunc(dst->dst_arg1, dst->dst_arg2, tx);
}
}
rw_exit(&dp->dp_config_rwlock);
if (dstg->dstg_nowaiter)
dsl_sync_task_group_destroy(dstg);
}
int
dsl_sync_task_do(dsl_pool_t *dp,
dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
void *arg1, void *arg2, int blocks_modified)
{
dsl_sync_task_group_t *dstg;
int err;
ASSERT(spa_writeable(dp->dp_spa));
dstg = dsl_sync_task_group_create(dp);
dsl_sync_task_create(dstg, checkfunc, syncfunc,
arg1, arg2, blocks_modified);
err = dsl_sync_task_group_wait(dstg);
dsl_sync_task_group_destroy(dstg);
return (err);
}
void
dsl_sync_task_do_nowait(dsl_pool_t *dp,
dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
void *arg1, void *arg2, int blocks_modified, dmu_tx_t *tx)
{
dsl_sync_task_group_t *dstg = dsl_sync_task_group_create(dp);
dsl_sync_task_create(dstg, checkfunc, syncfunc,
arg1, arg2, blocks_modified);
dsl_sync_task_group_nowait(dstg, tx);
rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG);
dst->dst_error = dst->dst_checkfunc(dst->dst_arg, tx);
if (dst->dst_error == 0)
dst->dst_syncfunc(dst->dst_arg, tx);
rrw_exit(&dp->dp_config_rwlock, FTAG);
if (dst->dst_nowaiter)
kmem_free(dst, sizeof (*dst));
}

View File

@ -0,0 +1,536 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
#include <sys/dsl_userhold.h>
#include <sys/dsl_dataset.h>
#include <sys/dsl_destroy.h>
#include <sys/dsl_synctask.h>
#include <sys/dmu_tx.h>
#include <sys/zfs_onexit.h>
#include <sys/dsl_pool.h>
#include <sys/dsl_dir.h>
#include <sys/zfs_ioctl.h>
#include <sys/zap.h>
typedef struct dsl_dataset_user_hold_arg {
nvlist_t *dduha_holds;
nvlist_t *dduha_errlist;
minor_t dduha_minor;
} dsl_dataset_user_hold_arg_t;
/*
* If you add new checks here, you may need to add additional checks to the
* "temporary" case in snapshot_check() in dmu_objset.c.
*/
int
dsl_dataset_user_hold_check_one(dsl_dataset_t *ds, const char *htag,
boolean_t temphold, dmu_tx_t *tx)
{
dsl_pool_t *dp = dmu_tx_pool(tx);
objset_t *mos = dp->dp_meta_objset;
int error = 0;
if (strlen(htag) > MAXNAMELEN)
return (E2BIG);
/* Tempholds have a more restricted length */
if (temphold && strlen(htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN)
return (E2BIG);
/* tags must be unique (if ds already exists) */
if (ds != NULL) {
mutex_enter(&ds->ds_lock);
if (ds->ds_phys->ds_userrefs_obj != 0) {
uint64_t value;
error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj,
htag, 8, 1, &value);
if (error == 0)
error = EEXIST;
else if (error == ENOENT)
error = 0;
}
mutex_exit(&ds->ds_lock);
}
return (error);
}
static int
dsl_dataset_user_hold_check(void *arg, dmu_tx_t *tx)
{
dsl_dataset_user_hold_arg_t *dduha = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
nvpair_t *pair;
int rv = 0;
if (spa_version(dp->dp_spa) < SPA_VERSION_USERREFS)
return (ENOTSUP);
for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL;
pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) {
int error = 0;
dsl_dataset_t *ds;
char *htag;
/* must be a snapshot */
if (strchr(nvpair_name(pair), '@') == NULL)
error = EINVAL;
if (error == 0)
error = nvpair_value_string(pair, &htag);
if (error == 0) {
error = dsl_dataset_hold(dp,
nvpair_name(pair), FTAG, &ds);
}
if (error == 0) {
error = dsl_dataset_user_hold_check_one(ds, htag,
dduha->dduha_minor != 0, tx);
dsl_dataset_rele(ds, FTAG);
}
if (error != 0) {
rv = error;
fnvlist_add_int32(dduha->dduha_errlist,
nvpair_name(pair), error);
}
}
return (rv);
}
void
dsl_dataset_user_hold_sync_one(dsl_dataset_t *ds, const char *htag,
minor_t minor, uint64_t now, dmu_tx_t *tx)
{
dsl_pool_t *dp = ds->ds_dir->dd_pool;
objset_t *mos = dp->dp_meta_objset;
uint64_t zapobj;
mutex_enter(&ds->ds_lock);
if (ds->ds_phys->ds_userrefs_obj == 0) {
/*
* This is the first user hold for this dataset. Create
* the userrefs zap object.
*/
dmu_buf_will_dirty(ds->ds_dbuf, tx);
zapobj = ds->ds_phys->ds_userrefs_obj =
zap_create(mos, DMU_OT_USERREFS, DMU_OT_NONE, 0, tx);
} else {
zapobj = ds->ds_phys->ds_userrefs_obj;
}
ds->ds_userrefs++;
mutex_exit(&ds->ds_lock);
VERIFY0(zap_add(mos, zapobj, htag, 8, 1, &now, tx));
if (minor != 0) {
VERIFY0(dsl_pool_user_hold(dp, ds->ds_object,
htag, now, tx));
dsl_register_onexit_hold_cleanup(ds, htag, minor);
}
spa_history_log_internal_ds(ds, "hold", tx,
"tag=%s temp=%d refs=%llu",
htag, minor != 0, ds->ds_userrefs);
}
static void
dsl_dataset_user_hold_sync(void *arg, dmu_tx_t *tx)
{
dsl_dataset_user_hold_arg_t *dduha = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
nvpair_t *pair;
uint64_t now = gethrestime_sec();
for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL;
pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) {
dsl_dataset_t *ds;
VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
dsl_dataset_user_hold_sync_one(ds, fnvpair_value_string(pair),
dduha->dduha_minor, now, tx);
dsl_dataset_rele(ds, FTAG);
}
}
/*
* holds is nvl of snapname -> holdname
* errlist will be filled in with snapname -> error
* if cleanup_minor is not 0, the holds will be temporary, cleaned up
* when the process exits.
*
* if any fails, all will fail.
*/
int
dsl_dataset_user_hold(nvlist_t *holds, minor_t cleanup_minor, nvlist_t *errlist)
{
dsl_dataset_user_hold_arg_t dduha;
nvpair_t *pair;
pair = nvlist_next_nvpair(holds, NULL);
if (pair == NULL)
return (0);
dduha.dduha_holds = holds;
dduha.dduha_errlist = errlist;
dduha.dduha_minor = cleanup_minor;
return (dsl_sync_task(nvpair_name(pair), dsl_dataset_user_hold_check,
dsl_dataset_user_hold_sync, &dduha, fnvlist_num_pairs(holds)));
}
typedef struct dsl_dataset_user_release_arg {
nvlist_t *ddura_holds;
nvlist_t *ddura_todelete;
nvlist_t *ddura_errlist;
} dsl_dataset_user_release_arg_t;
static int
dsl_dataset_user_release_check_one(dsl_dataset_t *ds,
nvlist_t *holds, boolean_t *todelete)
{
uint64_t zapobj;
nvpair_t *pair;
objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
int error;
int numholds = 0;
*todelete = B_FALSE;
if (!dsl_dataset_is_snapshot(ds))
return (EINVAL);
zapobj = ds->ds_phys->ds_userrefs_obj;
if (zapobj == 0)
return (ESRCH);
for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
pair = nvlist_next_nvpair(holds, pair)) {
/* Make sure the hold exists */
uint64_t tmp;
error = zap_lookup(mos, zapobj, nvpair_name(pair), 8, 1, &tmp);
if (error == ENOENT)
error = ESRCH;
if (error != 0)
return (error);
numholds++;
}
if (DS_IS_DEFER_DESTROY(ds) && ds->ds_phys->ds_num_children == 1 &&
ds->ds_userrefs == numholds) {
/* we need to destroy the snapshot as well */
if (dsl_dataset_long_held(ds))
return (EBUSY);
*todelete = B_TRUE;
}
return (0);
}
static int
dsl_dataset_user_release_check(void *arg, dmu_tx_t *tx)
{
dsl_dataset_user_release_arg_t *ddura = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
nvpair_t *pair;
int rv = 0;
if (!dmu_tx_is_syncing(tx))
return (0);
for (pair = nvlist_next_nvpair(ddura->ddura_holds, NULL); pair != NULL;
pair = nvlist_next_nvpair(ddura->ddura_holds, pair)) {
const char *name = nvpair_name(pair);
int error;
dsl_dataset_t *ds;
nvlist_t *holds;
error = nvpair_value_nvlist(pair, &holds);
if (error != 0)
return (EINVAL);
error = dsl_dataset_hold(dp, name, FTAG, &ds);
if (error == 0) {
boolean_t deleteme;
error = dsl_dataset_user_release_check_one(ds,
holds, &deleteme);
if (error == 0 && deleteme) {
fnvlist_add_boolean(ddura->ddura_todelete,
name);
}
dsl_dataset_rele(ds, FTAG);
}
if (error != 0) {
if (ddura->ddura_errlist != NULL) {
fnvlist_add_int32(ddura->ddura_errlist,
name, error);
}
rv = error;
}
}
return (rv);
}
static void
dsl_dataset_user_release_sync_one(dsl_dataset_t *ds, nvlist_t *holds,
dmu_tx_t *tx)
{
dsl_pool_t *dp = ds->ds_dir->dd_pool;
objset_t *mos = dp->dp_meta_objset;
uint64_t zapobj;
int error;
nvpair_t *pair;
for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
pair = nvlist_next_nvpair(holds, pair)) {
ds->ds_userrefs--;
error = dsl_pool_user_release(dp, ds->ds_object,
nvpair_name(pair), tx);
VERIFY(error == 0 || error == ENOENT);
zapobj = ds->ds_phys->ds_userrefs_obj;
VERIFY0(zap_remove(mos, zapobj, nvpair_name(pair), tx));
spa_history_log_internal_ds(ds, "release", tx,
"tag=%s refs=%lld", nvpair_name(pair),
(longlong_t)ds->ds_userrefs);
}
}
static void
dsl_dataset_user_release_sync(void *arg, dmu_tx_t *tx)
{
dsl_dataset_user_release_arg_t *ddura = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
nvpair_t *pair;
for (pair = nvlist_next_nvpair(ddura->ddura_holds, NULL); pair != NULL;
pair = nvlist_next_nvpair(ddura->ddura_holds, pair)) {
dsl_dataset_t *ds;
VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
dsl_dataset_user_release_sync_one(ds,
fnvpair_value_nvlist(pair), tx);
if (nvlist_exists(ddura->ddura_todelete,
nvpair_name(pair))) {
ASSERT(ds->ds_userrefs == 0 &&
ds->ds_phys->ds_num_children == 1 &&
DS_IS_DEFER_DESTROY(ds));
dsl_destroy_snapshot_sync_impl(ds, B_FALSE, tx);
}
dsl_dataset_rele(ds, FTAG);
}
}
/*
* holds is nvl of snapname -> { holdname, ... }
* errlist will be filled in with snapname -> error
*
* if any fails, all will fail.
*/
int
dsl_dataset_user_release(nvlist_t *holds, nvlist_t *errlist)
{
dsl_dataset_user_release_arg_t ddura;
nvpair_t *pair;
int error;
pair = nvlist_next_nvpair(holds, NULL);
if (pair == NULL)
return (0);
ddura.ddura_holds = holds;
ddura.ddura_errlist = errlist;
ddura.ddura_todelete = fnvlist_alloc();
error = dsl_sync_task(nvpair_name(pair), dsl_dataset_user_release_check,
dsl_dataset_user_release_sync, &ddura, fnvlist_num_pairs(holds));
fnvlist_free(ddura.ddura_todelete);
return (error);
}
typedef struct dsl_dataset_user_release_tmp_arg {
uint64_t ddurta_dsobj;
nvlist_t *ddurta_holds;
boolean_t ddurta_deleteme;
} dsl_dataset_user_release_tmp_arg_t;
static int
dsl_dataset_user_release_tmp_check(void *arg, dmu_tx_t *tx)
{
dsl_dataset_user_release_tmp_arg_t *ddurta = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dataset_t *ds;
int error;
if (!dmu_tx_is_syncing(tx))
return (0);
error = dsl_dataset_hold_obj(dp, ddurta->ddurta_dsobj, FTAG, &ds);
if (error)
return (error);
error = dsl_dataset_user_release_check_one(ds,
ddurta->ddurta_holds, &ddurta->ddurta_deleteme);
dsl_dataset_rele(ds, FTAG);
return (error);
}
static void
dsl_dataset_user_release_tmp_sync(void *arg, dmu_tx_t *tx)
{
dsl_dataset_user_release_tmp_arg_t *ddurta = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dataset_t *ds;
VERIFY0(dsl_dataset_hold_obj(dp, ddurta->ddurta_dsobj, FTAG, &ds));
dsl_dataset_user_release_sync_one(ds, ddurta->ddurta_holds, tx);
if (ddurta->ddurta_deleteme) {
ASSERT(ds->ds_userrefs == 0 &&
ds->ds_phys->ds_num_children == 1 &&
DS_IS_DEFER_DESTROY(ds));
dsl_destroy_snapshot_sync_impl(ds, B_FALSE, tx);
}
dsl_dataset_rele(ds, FTAG);
}
/*
* Called at spa_load time to release a stale temporary user hold.
* Also called by the onexit code.
*/
void
dsl_dataset_user_release_tmp(dsl_pool_t *dp, uint64_t dsobj, const char *htag)
{
dsl_dataset_user_release_tmp_arg_t ddurta;
dsl_dataset_t *ds;
int error;
#ifdef _KERNEL
/* Make sure it is not mounted. */
dsl_pool_config_enter(dp, FTAG);
error = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
if (error == 0) {
char name[MAXNAMELEN];
dsl_dataset_name(ds, name);
dsl_dataset_rele(ds, FTAG);
dsl_pool_config_exit(dp, FTAG);
zfs_unmount_snap(name);
} else {
dsl_pool_config_exit(dp, FTAG);
}
#endif
ddurta.ddurta_dsobj = dsobj;
ddurta.ddurta_holds = fnvlist_alloc();
fnvlist_add_boolean(ddurta.ddurta_holds, htag);
(void) dsl_sync_task(spa_name(dp->dp_spa),
dsl_dataset_user_release_tmp_check,
dsl_dataset_user_release_tmp_sync, &ddurta, 1);
fnvlist_free(ddurta.ddurta_holds);
}
typedef struct zfs_hold_cleanup_arg {
char zhca_spaname[MAXNAMELEN];
uint64_t zhca_spa_load_guid;
uint64_t zhca_dsobj;
char zhca_htag[MAXNAMELEN];
} zfs_hold_cleanup_arg_t;
static void
dsl_dataset_user_release_onexit(void *arg)
{
zfs_hold_cleanup_arg_t *ca = arg;
spa_t *spa;
int error;
error = spa_open(ca->zhca_spaname, &spa, FTAG);
if (error != 0) {
zfs_dbgmsg("couldn't release hold on pool=%s ds=%llu tag=%s "
"because pool is no longer loaded",
ca->zhca_spaname, ca->zhca_dsobj, ca->zhca_htag);
return;
}
if (spa_load_guid(spa) != ca->zhca_spa_load_guid) {
zfs_dbgmsg("couldn't release hold on pool=%s ds=%llu tag=%s "
"because pool is no longer loaded (guid doesn't match)",
ca->zhca_spaname, ca->zhca_dsobj, ca->zhca_htag);
spa_close(spa, FTAG);
return;
}
dsl_dataset_user_release_tmp(spa_get_dsl(spa),
ca->zhca_dsobj, ca->zhca_htag);
kmem_free(ca, sizeof (zfs_hold_cleanup_arg_t));
spa_close(spa, FTAG);
}
void
dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag,
minor_t minor)
{
zfs_hold_cleanup_arg_t *ca = kmem_alloc(sizeof (*ca), KM_SLEEP);
spa_t *spa = dsl_dataset_get_spa(ds);
(void) strlcpy(ca->zhca_spaname, spa_name(spa),
sizeof (ca->zhca_spaname));
ca->zhca_spa_load_guid = spa_load_guid(spa);
ca->zhca_dsobj = ds->ds_object;
(void) strlcpy(ca->zhca_htag, htag, sizeof (ca->zhca_htag));
VERIFY0(zfs_onexit_add_cb(minor,
dsl_dataset_user_release_onexit, ca, NULL));
}
int
dsl_dataset_get_holds(const char *dsname, nvlist_t *nvl)
{
dsl_pool_t *dp;
dsl_dataset_t *ds;
int err;
err = dsl_pool_hold(dsname, FTAG, &dp);
if (err != 0)
return (err);
err = dsl_dataset_hold(dp, dsname, FTAG, &ds);
if (err != 0) {
dsl_pool_rele(dp, FTAG);
return (err);
}
if (ds->ds_phys->ds_userrefs_obj != 0) {
zap_attribute_t *za;
zap_cursor_t zc;
za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
for (zap_cursor_init(&zc, ds->ds_dir->dd_pool->dp_meta_objset,
ds->ds_phys->ds_userrefs_obj);
zap_cursor_retrieve(&zc, za) == 0;
zap_cursor_advance(&zc)) {
fnvlist_add_uint64(nvl, za->za_name,
za->za_first_integer);
}
zap_cursor_fini(&zc);
kmem_free(za, sizeof (zap_attribute_t));
}
dsl_dataset_rele(ds, FTAG);
dsl_pool_rele(dp, FTAG);
return (0);
}

View File

@ -1875,3 +1875,41 @@ metaslab_claim(spa_t *spa, const blkptr_t *bp, uint64_t txg)
return (error);
}
static void
checkmap(space_map_t *sm, uint64_t off, uint64_t size)
{
space_seg_t *ss;
avl_index_t where;
mutex_enter(sm->sm_lock);
ss = space_map_find(sm, off, size, &where);
if (ss != NULL)
panic("freeing free block; ss=%p", (void *)ss);
mutex_exit(sm->sm_lock);
}
void
metaslab_check_free(spa_t *spa, const blkptr_t *bp)
{
if ((zfs_flags & ZFS_DEBUG_ZIO_FREE) == 0)
return;
spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
for (int i = 0; i < BP_GET_NDVAS(bp); i++) {
uint64_t vdid = DVA_GET_VDEV(&bp->blk_dva[i]);
vdev_t *vd = vdev_lookup_top(spa, vdid);
uint64_t off = DVA_GET_OFFSET(&bp->blk_dva[i]);
uint64_t size = DVA_GET_ASIZE(&bp->blk_dva[i]);
metaslab_t *ms = vd->vdev_ms[off >> vd->vdev_ms_shift];
if (ms->ms_map->sm_loaded)
checkmap(ms->ms_map, off, size);
for (int j = 0; j < TXG_SIZE; j++)
checkmap(ms->ms_freemap[j], off, size);
for (int j = 0; j < TXG_DEFER_SIZE; j++)
checkmap(ms->ms_defermap[j], off, size);
}
spa_config_exit(spa, SCL_VDEV, FTAG);
}

View File

@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@ -32,7 +33,7 @@ int reference_tracking_enable = FALSE; /* runs out of memory too easily */
#else
int reference_tracking_enable = TRUE;
#endif
int reference_history = 4; /* tunable */
int reference_history = 3; /* tunable */
static kmem_cache_t *reference_cache;
static kmem_cache_t *reference_history_cache;
@ -64,6 +65,14 @@ refcount_create(refcount_t *rc)
offsetof(reference_t, ref_link));
rc->rc_count = 0;
rc->rc_removed_count = 0;
rc->rc_tracked = reference_tracking_enable;
}
void
refcount_create_untracked(refcount_t *rc)
{
refcount_create(rc);
rc->rc_tracked = B_FALSE;
}
void
@ -96,14 +105,12 @@ refcount_destroy(refcount_t *rc)
int
refcount_is_zero(refcount_t *rc)
{
ASSERT(rc->rc_count >= 0);
return (rc->rc_count == 0);
}
int64_t
refcount_count(refcount_t *rc)
{
ASSERT(rc->rc_count >= 0);
return (rc->rc_count);
}
@ -113,14 +120,14 @@ refcount_add_many(refcount_t *rc, uint64_t number, void *holder)
reference_t *ref = NULL;
int64_t count;
if (reference_tracking_enable) {
if (rc->rc_tracked) {
ref = kmem_cache_alloc(reference_cache, KM_SLEEP);
ref->ref_holder = holder;
ref->ref_number = number;
}
mutex_enter(&rc->rc_mtx);
ASSERT(rc->rc_count >= 0);
if (reference_tracking_enable)
if (rc->rc_tracked)
list_insert_head(&rc->rc_list, ref);
rc->rc_count += number;
count = rc->rc_count;
@ -144,7 +151,7 @@ refcount_remove_many(refcount_t *rc, uint64_t number, void *holder)
mutex_enter(&rc->rc_mtx);
ASSERT(rc->rc_count >= number);
if (!reference_tracking_enable) {
if (!rc->rc_tracked) {
rc->rc_count -= number;
count = rc->rc_count;
mutex_exit(&rc->rc_mtx);
@ -161,7 +168,7 @@ refcount_remove_many(refcount_t *rc, uint64_t number, void *holder)
KM_SLEEP);
list_insert_head(&rc->rc_removed, ref);
rc->rc_removed_count++;
if (rc->rc_removed_count >= reference_history) {
if (rc->rc_removed_count > reference_history) {
ref = list_tail(&rc->rc_removed);
list_remove(&rc->rc_removed, ref);
kmem_cache_free(reference_history_cache,

View File

@ -22,6 +22,9 @@
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/refcount.h>
#include <sys/rrwlock.h>
@ -72,8 +75,9 @@
uint_t rrw_tsd_key;
typedef struct rrw_node {
struct rrw_node *rn_next;
rrwlock_t *rn_rrl;
struct rrw_node *rn_next;
rrwlock_t *rn_rrl;
void *rn_tag;
} rrw_node_t;
static rrw_node_t *
@ -95,13 +99,14 @@ rrn_find(rrwlock_t *rrl)
* Add a node to the head of the singly linked list.
*/
static void
rrn_add(rrwlock_t *rrl)
rrn_add(rrwlock_t *rrl, void *tag)
{
rrw_node_t *rn;
rn = kmem_alloc(sizeof (*rn), KM_SLEEP);
rn->rn_rrl = rrl;
rn->rn_next = tsd_get(rrw_tsd_key);
rn->rn_tag = tag;
VERIFY(tsd_set(rrw_tsd_key, rn) == 0);
}
@ -110,7 +115,7 @@ rrn_add(rrwlock_t *rrl)
* thread's list and return TRUE; otherwise return FALSE.
*/
static boolean_t
rrn_find_and_remove(rrwlock_t *rrl)
rrn_find_and_remove(rrwlock_t *rrl, void *tag)
{
rrw_node_t *rn;
rrw_node_t *prev = NULL;
@ -119,7 +124,7 @@ rrn_find_and_remove(rrwlock_t *rrl)
return (B_FALSE);
for (rn = tsd_get(rrw_tsd_key); rn != NULL; rn = rn->rn_next) {
if (rn->rn_rrl == rrl) {
if (rn->rn_rrl == rrl && rn->rn_tag == tag) {
if (prev)
prev->rn_next = rn->rn_next;
else
@ -133,7 +138,7 @@ rrn_find_and_remove(rrwlock_t *rrl)
}
void
rrw_init(rrwlock_t *rrl)
rrw_init(rrwlock_t *rrl, boolean_t track_all)
{
mutex_init(&rrl->rr_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&rrl->rr_cv, NULL, CV_DEFAULT, NULL);
@ -141,6 +146,7 @@ rrw_init(rrwlock_t *rrl)
refcount_create(&rrl->rr_anon_rcount);
refcount_create(&rrl->rr_linked_rcount);
rrl->rr_writer_wanted = B_FALSE;
rrl->rr_track_all = track_all;
}
void
@ -153,12 +159,13 @@ rrw_destroy(rrwlock_t *rrl)
refcount_destroy(&rrl->rr_linked_rcount);
}
static void
void
rrw_enter_read(rrwlock_t *rrl, void *tag)
{
mutex_enter(&rrl->rr_lock);
#if !defined(DEBUG) && defined(_KERNEL)
if (!rrl->rr_writer && !rrl->rr_writer_wanted) {
if (rrl->rr_writer == NULL && !rrl->rr_writer_wanted &&
!rrl->rr_track_all) {
rrl->rr_anon_rcount.rc_count++;
mutex_exit(&rrl->rr_lock);
return;
@ -168,14 +175,14 @@ rrw_enter_read(rrwlock_t *rrl, void *tag)
ASSERT(rrl->rr_writer != curthread);
ASSERT(refcount_count(&rrl->rr_anon_rcount) >= 0);
while (rrl->rr_writer || (rrl->rr_writer_wanted &&
while (rrl->rr_writer != NULL || (rrl->rr_writer_wanted &&
refcount_is_zero(&rrl->rr_anon_rcount) &&
rrn_find(rrl) == NULL))
cv_wait(&rrl->rr_cv, &rrl->rr_lock);
if (rrl->rr_writer_wanted) {
if (rrl->rr_writer_wanted || rrl->rr_track_all) {
/* may or may not be a re-entrant enter */
rrn_add(rrl);
rrn_add(rrl, tag);
(void) refcount_add(&rrl->rr_linked_rcount, tag);
} else {
(void) refcount_add(&rrl->rr_anon_rcount, tag);
@ -184,7 +191,7 @@ rrw_enter_read(rrwlock_t *rrl, void *tag)
mutex_exit(&rrl->rr_lock);
}
static void
void
rrw_enter_write(rrwlock_t *rrl)
{
mutex_enter(&rrl->rr_lock);
@ -230,10 +237,12 @@ rrw_exit(rrwlock_t *rrl, void *tag)
if (rrl->rr_writer == NULL) {
int64_t count;
if (rrn_find_and_remove(rrl))
if (rrn_find_and_remove(rrl, tag)) {
count = refcount_remove(&rrl->rr_linked_rcount, tag);
else
} else {
ASSERT(!rrl->rr_track_all);
count = refcount_remove(&rrl->rr_anon_rcount, tag);
}
if (count == 0)
cv_broadcast(&rrl->rr_cv);
} else {
@ -246,6 +255,11 @@ rrw_exit(rrwlock_t *rrl, void *tag)
mutex_exit(&rrl->rr_lock);
}
/*
* If the lock was created with track_all, rrw_held(RW_READER) will return
* B_TRUE iff the current thread has the lock for reader. Otherwise it may
* return B_TRUE if any thread has the lock for reader.
*/
boolean_t
rrw_held(rrwlock_t *rrl, krw_t rw)
{
@ -256,9 +270,19 @@ rrw_held(rrwlock_t *rrl, krw_t rw)
held = (rrl->rr_writer == curthread);
} else {
held = (!refcount_is_zero(&rrl->rr_anon_rcount) ||
!refcount_is_zero(&rrl->rr_linked_rcount));
rrn_find(rrl) != NULL);
}
mutex_exit(&rrl->rr_lock);
return (held);
}
void
rrw_tsd_destroy(void *arg)
{
rrw_node_t *rn = arg;
if (rn != NULL) {
panic("thread %p terminating with rrw lock %p held",
(void *)curthread, (void *)rn->rn_rrl);
}
}

View File

@ -1004,10 +1004,10 @@ sa_setup(objset_t *os, uint64_t sa_obj, sa_attr_reg_t *reg_attrs, int count,
sa_attr_type_t *tb;
int error;
mutex_enter(&os->os_lock);
mutex_enter(&os->os_user_ptr_lock);
if (os->os_sa) {
mutex_enter(&os->os_sa->sa_lock);
mutex_exit(&os->os_lock);
mutex_exit(&os->os_user_ptr_lock);
tb = os->os_sa->sa_user_table;
mutex_exit(&os->os_sa->sa_lock);
*user_table = tb;
@ -1020,7 +1020,7 @@ sa_setup(objset_t *os, uint64_t sa_obj, sa_attr_reg_t *reg_attrs, int count,
os->os_sa = sa;
mutex_enter(&sa->sa_lock);
mutex_exit(&os->os_lock);
mutex_exit(&os->os_user_ptr_lock);
avl_create(&sa->sa_layout_num_tree, layout_num_compare,
sizeof (sa_lot_t), offsetof(sa_lot_t, lot_num_node));
avl_create(&sa->sa_layout_hash_tree, layout_hash_compare,

View File

@ -61,6 +61,9 @@
#include <sys/spa_boot.h>
#include <sys/zfs_ioctl.h>
#include <sys/dsl_scan.h>
#include <sys/dmu_send.h>
#include <sys/dsl_destroy.h>
#include <sys/dsl_userhold.h>
#include <sys/zfeature.h>
#include <sys/zvol.h>
#include <sys/trim_map.h>
@ -120,10 +123,8 @@ const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = {
{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL },
};
static dsl_syncfunc_t spa_sync_version;
static dsl_syncfunc_t spa_sync_props;
static dsl_checkfunc_t spa_change_guid_check;
static dsl_syncfunc_t spa_change_guid_sync;
static void spa_sync_version(void *arg, dmu_tx_t *tx);
static void spa_sync_props(void *arg, dmu_tx_t *tx);
static boolean_t spa_has_active_shared_spare(spa_t *spa);
static int spa_load_impl(spa_t *spa, uint64_t, nvlist_t *config,
spa_load_state_t state, spa_import_type_t type, boolean_t mosconfig,
@ -324,10 +325,10 @@ spa_prop_get(spa_t *spa, nvlist_t **nvp)
dsl_dataset_t *ds = NULL;
dp = spa_get_dsl(spa);
rw_enter(&dp->dp_config_rwlock, RW_READER);
dsl_pool_config_enter(dp, FTAG);
if (err = dsl_dataset_hold_obj(dp,
za.za_first_integer, FTAG, &ds)) {
rw_exit(&dp->dp_config_rwlock);
dsl_pool_config_exit(dp, FTAG);
break;
}
@ -336,7 +337,7 @@ spa_prop_get(spa_t *spa, nvlist_t **nvp)
KM_SLEEP);
dsl_dataset_name(ds, strval);
dsl_dataset_rele(ds, FTAG);
rw_exit(&dp->dp_config_rwlock);
dsl_pool_config_exit(dp, FTAG);
} else {
strval = NULL;
intval = za.za_first_integer;
@ -490,9 +491,10 @@ spa_prop_validate(spa_t *spa, nvlist_t *props)
if (dmu_objset_type(os) != DMU_OST_ZFS) {
error = ENOTSUP;
} else if ((error = dsl_prop_get_integer(strval,
} else if ((error =
dsl_prop_get_int_ds(dmu_objset_ds(os),
zfs_prop_to_name(ZFS_PROP_COMPRESSION),
&compress, NULL)) == 0 &&
&compress)) == 0 &&
!BOOTFS_COMPRESS_VALID(compress)) {
error = ENOTSUP;
} else {
@ -659,8 +661,8 @@ spa_prop_set(spa_t *spa, nvlist_t *nvp)
* read object, the features for write object, or the
* feature descriptions object.
*/
error = dsl_sync_task_do(spa_get_dsl(spa), NULL,
spa_sync_version, spa, &ver, 6);
error = dsl_sync_task(spa->spa_name, NULL,
spa_sync_version, &ver, 6);
if (error)
return (error);
continue;
@ -671,8 +673,8 @@ spa_prop_set(spa_t *spa, nvlist_t *nvp)
}
if (need_sync) {
return (dsl_sync_task_do(spa_get_dsl(spa), NULL, spa_sync_props,
spa, nvp, 6));
return (dsl_sync_task(spa->spa_name, NULL, spa_sync_props,
nvp, 6));
}
return (0);
@ -694,10 +696,10 @@ spa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx)
/*ARGSUSED*/
static int
spa_change_guid_check(void *arg1, void *arg2, dmu_tx_t *tx)
spa_change_guid_check(void *arg, dmu_tx_t *tx)
{
spa_t *spa = arg1;
uint64_t *newguid = arg2;
uint64_t *newguid = arg;
spa_t *spa = dmu_tx_pool(tx)->dp_spa;
vdev_t *rvd = spa->spa_root_vdev;
uint64_t vdev_state;
@ -714,10 +716,10 @@ spa_change_guid_check(void *arg1, void *arg2, dmu_tx_t *tx)
}
static void
spa_change_guid_sync(void *arg1, void *arg2, dmu_tx_t *tx)
spa_change_guid_sync(void *arg, dmu_tx_t *tx)
{
spa_t *spa = arg1;
uint64_t *newguid = arg2;
uint64_t *newguid = arg;
spa_t *spa = dmu_tx_pool(tx)->dp_spa;
uint64_t oldguid;
vdev_t *rvd = spa->spa_root_vdev;
@ -729,17 +731,8 @@ spa_change_guid_sync(void *arg1, void *arg2, dmu_tx_t *tx)
vdev_config_dirty(rvd);
spa_config_exit(spa, SCL_STATE, FTAG);
#ifdef __FreeBSD__
/*
* TODO: until recent illumos logging changes are merged
* log reguid as pool property change
*/
spa_history_log_internal(LOG_POOL_PROPSET, spa, tx,
"guid change old=%llu new=%llu", oldguid, *newguid);
#else
spa_history_log_internal(spa, "guid change", tx, "old=%lld new=%lld",
spa_history_log_internal(spa, "guid change", tx, "old=%llu new=%llu",
oldguid, *newguid);
#endif
}
/*
@ -760,8 +753,8 @@ spa_change_guid(spa_t *spa)
mutex_enter(&spa_namespace_lock);
guid = spa_generate_guid(NULL);
error = dsl_sync_task_do(spa_get_dsl(spa), spa_change_guid_check,
spa_change_guid_sync, spa, &guid, 5);
error = dsl_sync_task(spa->spa_name, spa_change_guid_check,
spa_change_guid_sync, &guid, 5);
if (error == 0) {
spa_config_sync(spa, B_FALSE, B_TRUE);
@ -1663,21 +1656,22 @@ spa_config_valid(spa_t *spa, nvlist_t *config)
/*
* Check for missing log devices
*/
static int
static boolean_t
spa_check_logs(spa_t *spa)
{
boolean_t rv = B_FALSE;
switch (spa->spa_log_state) {
case SPA_LOG_MISSING:
/* need to recheck in case slog has been restored */
case SPA_LOG_UNKNOWN:
if (dmu_objset_find(spa->spa_name, zil_check_log_chain, NULL,
DS_FIND_CHILDREN)) {
rv = (dmu_objset_find(spa->spa_name, zil_check_log_chain,
NULL, DS_FIND_CHILDREN) != 0);
if (rv)
spa_set_log_state(spa, SPA_LOG_MISSING);
return (1);
}
break;
}
return (0);
return (rv);
}
static boolean_t
@ -1723,11 +1717,11 @@ spa_activate_log(spa_t *spa)
int
spa_offline_log(spa_t *spa)
{
int error = 0;
if ((error = dmu_objset_find(spa_name(spa), zil_vdev_offline,
NULL, DS_FIND_CHILDREN)) == 0) {
int error;
error = dmu_objset_find(spa_name(spa), zil_vdev_offline,
NULL, DS_FIND_CHILDREN);
if (error == 0) {
/*
* We successfully offlined the log device, sync out the
* current txg so that the "stubby" block can be removed
@ -2642,6 +2636,12 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
vdev_resilver_needed(rvd, NULL, NULL))
spa_async_request(spa, SPA_ASYNC_RESILVER);
/*
* Log the fact that we booted up (so that we can detect if
* we rebooted in the middle of an operation).
*/
spa_history_log_version(spa, "open");
/*
* Delete any inconsistent datasets.
*/
@ -3327,7 +3327,7 @@ spa_l2cache_drop(spa_t *spa)
*/
int
spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
const char *history_str, nvlist_t *zplprops)
nvlist_t *zplprops)
{
spa_t *spa;
char *altroot = NULL;
@ -3530,7 +3530,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
if (props != NULL) {
spa_configfile_set(spa, props, B_FALSE);
spa_sync_props(spa, props, tx);
spa_sync_props(props, tx);
}
dmu_tx_commit(tx);
@ -3546,9 +3546,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
spa_config_sync(spa, B_FALSE, B_TRUE);
if (version >= SPA_VERSION_ZPOOL_HISTORY && history_str != NULL)
(void) spa_history_log(spa, history_str, LOG_CMD_POOL_CREATE);
spa_history_log_version(spa, LOG_POOL_CREATE);
spa_history_log_version(spa, "create");
spa->spa_minref = refcount_count(&spa->spa_refcount);
@ -3749,7 +3747,6 @@ spa_import_rootpool(char *devpath, char *devid)
}
error = 0;
spa_history_log_version(spa, LOG_POOL_IMPORT);
out:
spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
vdev_free(rvd);
@ -4006,7 +4003,7 @@ spa_import(const char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags)
spa_config_sync(spa, B_FALSE, B_TRUE);
mutex_exit(&spa_namespace_lock);
spa_history_log_version(spa, LOG_POOL_IMPORT);
spa_history_log_version(spa, "import");
return (0);
}
@ -4137,7 +4134,7 @@ spa_import(const char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags)
spa_async_request(spa, SPA_ASYNC_AUTOEXPAND);
mutex_exit(&spa_namespace_lock);
spa_history_log_version(spa, LOG_POOL_IMPORT);
spa_history_log_version(spa, "import");
#ifdef __FreeBSD__
#ifdef _KERNEL
@ -4680,7 +4677,7 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
*/
(void) spa_vdev_exit(spa, newrootvd, dtl_max_txg, 0);
spa_history_log_internal(LOG_POOL_VDEV_ATTACH, spa, NULL,
spa_history_log_internal(spa, "vdev attach", NULL,
"%s vdev=%s %s vdev=%s",
replacing && newvd_isspare ? "spare in" :
replacing ? "replace" : "attach", newvdpath,
@ -4897,7 +4894,7 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done)
error = spa_vdev_exit(spa, vd, txg, 0);
spa_history_log_internal(LOG_POOL_VDEV_DETACH, spa, NULL,
spa_history_log_internal(spa, "detach", NULL,
"vdev=%s", vdpath);
spa_strfree(vdpath);
@ -5173,9 +5170,8 @@ spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config,
if (vml[c] != NULL) {
vdev_split(vml[c]);
if (error == 0)
spa_history_log_internal(LOG_POOL_VDEV_DETACH,
spa, tx, "vdev=%s",
vml[c]->vdev_path);
spa_history_log_internal(spa, "detach", tx,
"vdev=%s", vml[c]->vdev_path);
vdev_free(vml[c]);
}
}
@ -5190,8 +5186,8 @@ spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config,
zio_handle_panic_injection(spa, FTAG, 3);
/* split is complete; log a history record */
spa_history_log_internal(LOG_POOL_SPLIT, newspa, NULL,
"split new pool %s from pool %s", newname, spa_name(spa));
spa_history_log_internal(newspa, "split", NULL,
"from pool %s", spa_name(spa));
kmem_free(vml, children * sizeof (vdev_t *));
@ -5778,8 +5774,7 @@ spa_async_thread(void *arg)
* then log an internal history event.
*/
if (new_space != old_space) {
spa_history_log_internal(LOG_POOL_VDEV_ONLINE,
spa, NULL,
spa_history_log_internal(spa, "vdev online", NULL,
"pool '%s' size: %llu(+%llu)",
spa_name(spa), new_space, new_space - old_space);
}
@ -6008,10 +6003,11 @@ spa_sync_config_object(spa_t *spa, dmu_tx_t *tx)
}
static void
spa_sync_version(void *arg1, void *arg2, dmu_tx_t *tx)
spa_sync_version(void *arg, dmu_tx_t *tx)
{
spa_t *spa = arg1;
uint64_t version = *(uint64_t *)arg2;
uint64_t *versionp = arg;
uint64_t version = *versionp;
spa_t *spa = dmu_tx_pool(tx)->dp_spa;
/*
* Setting the version is special cased when first creating the pool.
@ -6023,17 +6019,18 @@ spa_sync_version(void *arg1, void *arg2, dmu_tx_t *tx)
spa->spa_uberblock.ub_version = version;
vdev_config_dirty(spa->spa_root_vdev);
spa_history_log_internal(spa, "set", tx, "version=%lld", version);
}
/*
* Set zpool properties.
*/
static void
spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx)
spa_sync_props(void *arg, dmu_tx_t *tx)
{
spa_t *spa = arg1;
nvlist_t *nvp = arg;
spa_t *spa = dmu_tx_pool(tx)->dp_spa;
objset_t *mos = spa->spa_meta_objset;
nvlist_t *nvp = arg2;
nvpair_t *elem = NULL;
mutex_enter(&spa->spa_props_lock);
@ -6057,6 +6054,8 @@ spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx)
VERIFY3U(0, ==, zfeature_lookup_name(fname, &feature));
spa_feature_enable(spa, feature, tx);
spa_history_log_internal(spa, "set", tx,
"%s=enabled", nvpair_name(elem));
break;
case ZPOOL_PROP_VERSION:
@ -6096,6 +6095,8 @@ spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx)
*/
if (tx->tx_txg != TXG_INITIAL)
vdev_config_dirty(spa->spa_root_vdev);
spa_history_log_internal(spa, "set", tx,
"%s=%s", nvpair_name(elem), strval);
break;
default:
/*
@ -6118,7 +6119,8 @@ spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx)
VERIFY(zap_update(mos,
spa->spa_pool_props_object, propname,
1, strlen(strval) + 1, strval, tx) == 0);
spa_history_log_internal(spa, "set", tx,
"%s=%s", nvpair_name(elem), strval);
} else if (nvpair_type(elem) == DATA_TYPE_UINT64) {
VERIFY(nvpair_value_uint64(elem, &intval) == 0);
@ -6130,6 +6132,8 @@ spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx)
VERIFY(zap_update(mos,
spa->spa_pool_props_object, propname,
8, 1, &intval, tx) == 0);
spa_history_log_internal(spa, "set", tx,
"%s=%lld", nvpair_name(elem), intval);
} else {
ASSERT(0); /* not allowed */
}
@ -6158,13 +6162,6 @@ spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx)
}
}
/* log internal history if this is not a zpool create */
if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY &&
tx->tx_txg != TXG_INITIAL) {
spa_history_log_internal(LOG_POOL_PROPSET,
spa, tx, "%s %lld %s",
nvpair_name(elem), intval, spa_name(spa));
}
}
mutex_exit(&spa->spa_props_lock);
@ -6184,6 +6181,8 @@ spa_sync_upgrades(spa_t *spa, dmu_tx_t *tx)
ASSERT(spa->spa_sync_pass == 1);
rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG);
if (spa->spa_ubsync.ub_version < SPA_VERSION_ORIGIN &&
spa->spa_uberblock.ub_version >= SPA_VERSION_ORIGIN) {
dsl_pool_create_origin(dp, tx);
@ -6209,6 +6208,7 @@ spa_sync_upgrades(spa_t *spa, dmu_tx_t *tx)
spa->spa_uberblock.ub_version >= SPA_VERSION_FEATURES) {
spa_feature_create_zap_objects(spa, tx);
}
rrw_exit(&dp->dp_config_rwlock, FTAG);
}
/*

View File

@ -21,7 +21,7 @@
/*
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 by Delphix. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/spa.h>
@ -30,8 +30,11 @@
#include <sys/dsl_synctask.h>
#include <sys/dmu_tx.h>
#include <sys/dmu_objset.h>
#include <sys/dsl_dataset.h>
#include <sys/dsl_dir.h>
#include <sys/utsname.h>
#include <sys/sunddi.h>
#include <sys/cred.h>
#include "zfs_comutil.h"
#ifdef _KERNEL
#include <sys/cmn_err.h>
@ -176,14 +179,14 @@ spa_history_write(spa_t *spa, void *buf, uint64_t len, spa_history_phys_t *shpp,
}
static char *
spa_history_zone()
spa_history_zone(void)
{
#ifdef _KERNEL
/* XXX: pr_hostname can be changed by default from within a jail! */
if (jailed(curthread->td_ucred))
return (curthread->td_ucred->cr_prison->pr_hostname);
#endif
return ("global");
return (NULL);
}
/*
@ -191,17 +194,15 @@ spa_history_zone()
*/
/*ARGSUSED*/
static void
spa_history_log_sync(void *arg1, void *arg2, dmu_tx_t *tx)
spa_history_log_sync(void *arg, dmu_tx_t *tx)
{
spa_t *spa = arg1;
history_arg_t *hap = arg2;
const char *history_str = hap->ha_history_str;
nvlist_t *nvl = arg;
spa_t *spa = dmu_tx_pool(tx)->dp_spa;
objset_t *mos = spa->spa_meta_objset;
dmu_buf_t *dbp;
spa_history_phys_t *shpp;
size_t reclen;
uint64_t le_len;
nvlist_t *nvrecord;
char *record_packed = NULL;
int ret;
@ -218,7 +219,7 @@ spa_history_log_sync(void *arg1, void *arg2, dmu_tx_t *tx)
* Get the offset of where we need to write via the bonus buffer.
* Update the offset when the write completes.
*/
VERIFY(0 == dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp));
VERIFY0(dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp));
shpp = dbp->db_data;
dmu_buf_will_dirty(dbp, tx);
@ -231,46 +232,35 @@ spa_history_log_sync(void *arg1, void *arg2, dmu_tx_t *tx)
}
#endif
VERIFY(nvlist_alloc(&nvrecord, NV_UNIQUE_NAME, KM_SLEEP) == 0);
VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_TIME,
gethrestime_sec()) == 0);
VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_WHO, hap->ha_uid) == 0);
if (hap->ha_zone != NULL)
VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_ZONE,
hap->ha_zone) == 0);
fnvlist_add_uint64(nvl, ZPOOL_HIST_TIME, gethrestime_sec());
#ifdef _KERNEL
VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_HOST,
utsname.nodename) == 0);
fnvlist_add_string(nvl, ZPOOL_HIST_HOST, utsname.nodename);
#endif
if (hap->ha_log_type == LOG_CMD_POOL_CREATE ||
hap->ha_log_type == LOG_CMD_NORMAL) {
VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_CMD,
history_str) == 0);
zfs_dbgmsg("command: %s", history_str);
} else {
VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_INT_EVENT,
hap->ha_event) == 0);
VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_TXG,
tx->tx_txg) == 0);
VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_INT_STR,
history_str) == 0);
zfs_dbgmsg("internal %s pool:%s txg:%llu %s",
zfs_history_event_names[hap->ha_event], spa_name(spa),
(longlong_t)tx->tx_txg, history_str);
if (nvlist_exists(nvl, ZPOOL_HIST_CMD)) {
zfs_dbgmsg("command: %s",
fnvlist_lookup_string(nvl, ZPOOL_HIST_CMD));
} else if (nvlist_exists(nvl, ZPOOL_HIST_INT_NAME)) {
if (nvlist_exists(nvl, ZPOOL_HIST_DSNAME)) {
zfs_dbgmsg("txg %lld %s %s (id %llu) %s",
fnvlist_lookup_uint64(nvl, ZPOOL_HIST_TXG),
fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME),
fnvlist_lookup_string(nvl, ZPOOL_HIST_DSNAME),
fnvlist_lookup_uint64(nvl, ZPOOL_HIST_DSID),
fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_STR));
} else {
zfs_dbgmsg("txg %lld %s %s",
fnvlist_lookup_uint64(nvl, ZPOOL_HIST_TXG),
fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME),
fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_STR));
}
} else if (nvlist_exists(nvl, ZPOOL_HIST_IOCTL)) {
zfs_dbgmsg("ioctl %s",
fnvlist_lookup_string(nvl, ZPOOL_HIST_IOCTL));
}
VERIFY(nvlist_size(nvrecord, &reclen, NV_ENCODE_XDR) == 0);
record_packed = kmem_alloc(reclen, KM_SLEEP);
VERIFY(nvlist_pack(nvrecord, &record_packed, &reclen,
NV_ENCODE_XDR, KM_SLEEP) == 0);
record_packed = fnvlist_pack(nvl, &reclen);
mutex_enter(&spa->spa_history_lock);
if (hap->ha_log_type == LOG_CMD_POOL_CREATE)
VERIFY(shpp->sh_eof == shpp->sh_pool_create_len);
/* write out the packed length as little endian */
le_len = LE_64((uint64_t)reclen);
@ -278,33 +268,42 @@ spa_history_log_sync(void *arg1, void *arg2, dmu_tx_t *tx)
if (!ret)
ret = spa_history_write(spa, record_packed, reclen, shpp, tx);
if (!ret && hap->ha_log_type == LOG_CMD_POOL_CREATE) {
shpp->sh_pool_create_len += sizeof (le_len) + reclen;
shpp->sh_bof = shpp->sh_pool_create_len;
/* The first command is the create, which we keep forever */
if (ret == 0 && shpp->sh_pool_create_len == 0 &&
nvlist_exists(nvl, ZPOOL_HIST_CMD)) {
shpp->sh_pool_create_len = shpp->sh_bof = shpp->sh_eof;
}
mutex_exit(&spa->spa_history_lock);
nvlist_free(nvrecord);
kmem_free(record_packed, reclen);
fnvlist_pack_free(record_packed, reclen);
dmu_buf_rele(dbp, FTAG);
strfree(hap->ha_history_str);
if (hap->ha_zone != NULL)
strfree(hap->ha_zone);
kmem_free(hap, sizeof (history_arg_t));
fnvlist_free(nvl);
}
/*
* Write out a history event.
*/
int
spa_history_log(spa_t *spa, const char *history_str, history_log_type_t what)
spa_history_log(spa_t *spa, const char *msg)
{
int err;
nvlist_t *nvl = fnvlist_alloc();
fnvlist_add_string(nvl, ZPOOL_HIST_CMD, msg);
err = spa_history_log_nvl(spa, nvl);
fnvlist_free(nvl);
return (err);
}
int
spa_history_log_nvl(spa_t *spa, nvlist_t *nvl)
{
history_arg_t *ha;
int err = 0;
dmu_tx_t *tx;
nvlist_t *nvarg;
ASSERT(what != LOG_INTERNAL);
if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY)
return (EINVAL);
if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY || !spa_writeable(spa))
return (EINVAL);
@ -316,19 +315,21 @@ spa_history_log(spa_t *spa, const char *history_str, history_log_type_t what)
return (err);
}
ha = kmem_alloc(sizeof (history_arg_t), KM_SLEEP);
ha->ha_history_str = strdup(history_str);
ha->ha_zone = strdup(spa_history_zone());
ha->ha_log_type = what;
ha->ha_uid = crgetuid(CRED());
nvarg = fnvlist_dup(nvl);
if (spa_history_zone() != NULL) {
fnvlist_add_string(nvarg, ZPOOL_HIST_ZONE,
spa_history_zone());
}
fnvlist_add_uint64(nvarg, ZPOOL_HIST_WHO, crgetruid(CRED()));
/* Kick this off asynchronously; errors are ignored. */
dsl_sync_task_do_nowait(spa_get_dsl(spa), NULL,
spa_history_log_sync, spa, ha, 0, tx);
dsl_sync_task_nowait(spa_get_dsl(spa), spa_history_log_sync,
nvarg, 0, tx);
dmu_tx_commit(tx);
/* spa_history_log_sync will free ha and strings */
/* spa_history_log_sync will free nvl */
return (err);
}
/*
@ -345,7 +346,7 @@ spa_history_get(spa_t *spa, uint64_t *offp, uint64_t *len, char *buf)
int err;
/*
* If the command history doesn't exist (older pool),
* If the command history doesn't exist (older pool),
* that's ok, just return ENOENT.
*/
if (!spa->spa_history)
@ -428,11 +429,14 @@ spa_history_get(spa_t *spa, uint64_t *offp, uint64_t *len, char *buf)
return (err);
}
/*
* The nvlist will be consumed by this call.
*/
static void
log_internal(history_internal_events_t event, spa_t *spa,
log_internal(nvlist_t *nvl, const char *operation, spa_t *spa,
dmu_tx_t *tx, const char *fmt, va_list adx)
{
history_arg_t *ha;
char *msg;
va_list adx2;
/*
@ -440,35 +444,34 @@ log_internal(history_internal_events_t event, spa_t *spa,
* initialized yet, so don't bother logging the internal events.
* Likewise if the pool is not writeable.
*/
if (tx->tx_txg == TXG_INITIAL || !spa_writeable(spa))
if (tx->tx_txg == TXG_INITIAL || !spa_writeable(spa)) {
fnvlist_free(nvl);
return;
}
va_copy(adx2, adx);
ha = kmem_alloc(sizeof (history_arg_t), KM_SLEEP);
ha->ha_history_str = kmem_alloc(vsnprintf(NULL, 0, fmt, adx2) + 1,
KM_SLEEP);
msg = kmem_alloc(vsnprintf(NULL, 0, fmt, adx) + 1, KM_SLEEP);
(void) vsprintf(msg, fmt, adx2);
fnvlist_add_string(nvl, ZPOOL_HIST_INT_STR, msg);
strfree(msg);
va_end(adx2);
(void) vsprintf(ha->ha_history_str, fmt, adx);
ha->ha_log_type = LOG_INTERNAL;
ha->ha_event = event;
ha->ha_zone = NULL;
ha->ha_uid = 0;
fnvlist_add_string(nvl, ZPOOL_HIST_INT_NAME, operation);
fnvlist_add_uint64(nvl, ZPOOL_HIST_TXG, tx->tx_txg);
if (dmu_tx_is_syncing(tx)) {
spa_history_log_sync(spa, ha, tx);
spa_history_log_sync(nvl, tx);
} else {
dsl_sync_task_do_nowait(spa_get_dsl(spa), NULL,
spa_history_log_sync, spa, ha, 0, tx);
dsl_sync_task_nowait(spa_get_dsl(spa),
spa_history_log_sync, nvl, 0, tx);
}
/* spa_history_log_sync() will free ha and strings */
/* spa_history_log_sync() will free nvl */
}
void
spa_history_log_internal(history_internal_events_t event, spa_t *spa,
spa_history_log_internal(spa_t *spa, const char *operation,
dmu_tx_t *tx, const char *fmt, ...)
{
dmu_tx_t *htx = tx;
@ -484,7 +487,7 @@ spa_history_log_internal(history_internal_events_t event, spa_t *spa,
}
va_start(adx, fmt);
log_internal(event, spa, htx, fmt, adx);
log_internal(fnvlist_alloc(), operation, spa, htx, fmt, adx);
va_end(adx);
/* if we didn't get a tx from the caller, commit the one we made */
@ -493,23 +496,50 @@ spa_history_log_internal(history_internal_events_t event, spa_t *spa,
}
void
spa_history_log_version(spa_t *spa, history_internal_events_t event)
spa_history_log_internal_ds(dsl_dataset_t *ds, const char *operation,
dmu_tx_t *tx, const char *fmt, ...)
{
#ifdef _KERNEL
uint64_t current_vers = spa_version(spa);
va_list adx;
char namebuf[MAXNAMELEN];
nvlist_t *nvl = fnvlist_alloc();
if (current_vers >= SPA_VERSION_ZPOOL_HISTORY) {
spa_history_log_internal(event, spa, NULL,
"pool spa %llu; zfs spa %llu; zpl %d; uts %s %s %s %s",
(u_longlong_t)current_vers, SPA_VERSION, ZPL_VERSION,
utsname.nodename, utsname.release, utsname.version,
utsname.machine);
}
#if 0
cmn_err(CE_CONT, "!%s version %llu pool %s using %llu",
event == LOG_POOL_IMPORT ? "imported" :
event == LOG_POOL_CREATE ? "created" : "accessed",
(u_longlong_t)current_vers, spa_name(spa), SPA_VERSION);
#endif
#endif
ASSERT(tx != NULL);
dsl_dataset_name(ds, namebuf);
fnvlist_add_string(nvl, ZPOOL_HIST_DSNAME, namebuf);
fnvlist_add_uint64(nvl, ZPOOL_HIST_DSID, ds->ds_object);
va_start(adx, fmt);
log_internal(nvl, operation, dsl_dataset_get_spa(ds), tx, fmt, adx);
va_end(adx);
}
void
spa_history_log_internal_dd(dsl_dir_t *dd, const char *operation,
dmu_tx_t *tx, const char *fmt, ...)
{
va_list adx;
char namebuf[MAXNAMELEN];
nvlist_t *nvl = fnvlist_alloc();
ASSERT(tx != NULL);
dsl_dir_name(dd, namebuf);
fnvlist_add_string(nvl, ZPOOL_HIST_DSNAME, namebuf);
fnvlist_add_uint64(nvl, ZPOOL_HIST_DSID,
dd->dd_phys->dd_head_dataset_obj);
va_start(adx, fmt);
log_internal(nvl, operation, dd->dd_pool->dp_spa, tx, fmt, adx);
va_end(adx);
}
void
spa_history_log_version(spa_t *spa, const char *operation)
{
spa_history_log_internal(spa, operation, NULL,
"pool version %llu; software version %llu/%d; uts %s %s %s %s",
(u_longlong_t)spa_version(spa), SPA_VERSION, ZPL_VERSION,
utsname.nodename, utsname.release, utsname.version,
utsname.machine);
}

View File

@ -238,8 +238,8 @@ kmem_cache_t *spa_buffer_pool;
int spa_mode_global;
#ifdef ZFS_DEBUG
/* Everything except dprintf is on by default in debug builds */
int zfs_flags = ~ZFS_DEBUG_DPRINTF;
/* Everything except dprintf and spa is on by default in debug builds */
int zfs_flags = ~(ZFS_DEBUG_DPRINTF | ZFS_DEBUG_SPA);
#else
int zfs_flags = 0;
#endif
@ -314,7 +314,7 @@ spa_config_lock_init(spa_t *spa)
spa_config_lock_t *scl = &spa->spa_config_lock[i];
mutex_init(&scl->scl_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&scl->scl_cv, NULL, CV_DEFAULT, NULL);
refcount_create(&scl->scl_count);
refcount_create_untracked(&scl->scl_count);
scl->scl_writer = NULL;
scl->scl_write_wanted = 0;
}
@ -367,6 +367,8 @@ spa_config_enter(spa_t *spa, int locks, void *tag, krw_t rw)
{
int wlocks_held = 0;
ASSERT3U(SCL_LOCKS, <, sizeof (wlocks_held) * NBBY);
for (int i = 0; i < SCL_LOCKS; i++) {
spa_config_lock_t *scl = &spa->spa_config_lock[i];
if (scl->scl_writer == curthread)
@ -445,27 +447,22 @@ spa_lookup(const char *name)
static spa_t search; /* spa_t is large; don't allocate on stack */
spa_t *spa;
avl_index_t where;
char c;
char *cp;
ASSERT(MUTEX_HELD(&spa_namespace_lock));
(void) strlcpy(search.spa_name, name, sizeof (search.spa_name));
/*
* If it's a full dataset name, figure out the pool name and
* just use that.
*/
cp = strpbrk(name, "/@");
if (cp) {
c = *cp;
cp = strpbrk(search.spa_name, "/@");
if (cp != NULL)
*cp = '\0';
}
(void) strlcpy(search.spa_name, name, sizeof (search.spa_name));
spa = avl_find(&spa_namespace_avl, &search, &where);
if (cp)
*cp = c;
return (spa);
}
@ -600,6 +597,8 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
KM_SLEEP) == 0);
}
spa->spa_debug = ((zfs_flags & ZFS_DEBUG_SPA) != 0);
return (spa);
}

View File

@ -109,7 +109,7 @@ void
space_map_add(space_map_t *sm, uint64_t start, uint64_t size)
{
avl_index_t where;
space_seg_t ssearch, *ss_before, *ss_after, *ss;
space_seg_t *ss_before, *ss_after, *ss;
uint64_t end = start + size;
int merge_before, merge_after;
@ -122,11 +122,8 @@ space_map_add(space_map_t *sm, uint64_t start, uint64_t size)
VERIFY(P2PHASE(start, 1ULL << sm->sm_shift) == 0);
VERIFY(P2PHASE(size, 1ULL << sm->sm_shift) == 0);
again:
ssearch.ss_start = start;
ssearch.ss_end = end;
ss = avl_find(&sm->sm_root, &ssearch, &where);
if (ss != NULL && ss->ss_start <= start && ss->ss_end >= end) {
ss = space_map_find(sm, start, size, &where);
if (ss != NULL) {
zfs_panic_recover("zfs: allocating allocated segment"
"(offset=%llu size=%llu)\n",
(longlong_t)start, (longlong_t)size);
@ -194,19 +191,19 @@ space_map_add(space_map_t *sm, uint64_t start, uint64_t size)
void
space_map_remove(space_map_t *sm, uint64_t start, uint64_t size)
{
space_seg_t ssearch, *ss, *newseg;
#ifdef illumos
avl_index_t where;
#endif
space_seg_t *ss, *newseg;
uint64_t end = start + size;
int left_over, right_over;
ASSERT(MUTEX_HELD(sm->sm_lock));
VERIFY(!sm->sm_condensing);
VERIFY(size != 0);
VERIFY(P2PHASE(start, 1ULL << sm->sm_shift) == 0);
VERIFY(P2PHASE(size, 1ULL << sm->sm_shift) == 0);
ssearch.ss_start = start;
ssearch.ss_end = end;
ss = avl_find(&sm->sm_root, &ssearch, NULL);
#ifdef illumos
ss = space_map_find(sm, start, size, &where);
#else
ss = space_map_find(sm, start, size, NULL);
#endif
/* Make sure we completely overlap with someone */
if (ss == NULL) {
@ -249,12 +246,11 @@ space_map_remove(space_map_t *sm, uint64_t start, uint64_t size)
sm->sm_space -= size;
}
boolean_t
space_map_contains(space_map_t *sm, uint64_t start, uint64_t size)
space_seg_t *
space_map_find(space_map_t *sm, uint64_t start, uint64_t size,
avl_index_t *wherep)
{
avl_index_t where;
space_seg_t ssearch, *ss;
uint64_t end = start + size;
ASSERT(MUTEX_HELD(sm->sm_lock));
VERIFY(size != 0);
@ -262,10 +258,20 @@ space_map_contains(space_map_t *sm, uint64_t start, uint64_t size)
VERIFY(P2PHASE(size, 1ULL << sm->sm_shift) == 0);
ssearch.ss_start = start;
ssearch.ss_end = end;
ss = avl_find(&sm->sm_root, &ssearch, &where);
ssearch.ss_end = start + size;
ss = avl_find(&sm->sm_root, &ssearch, wherep);
return (ss != NULL && ss->ss_start <= start && ss->ss_end >= end);
if (ss != NULL && ss->ss_start <= start && ss->ss_end >= start + size)
return (ss);
return (NULL);
}
boolean_t
space_map_contains(space_map_t *sm, uint64_t start, uint64_t size)
{
avl_index_t where;
return (space_map_find(sm, start, size, &where) != 0);
}
void

View File

@ -89,7 +89,7 @@ arc_buf_t *arc_loan_buf(spa_t *spa, int size);
void arc_return_buf(arc_buf_t *buf, void *tag);
void arc_loan_inuse_buf(arc_buf_t *buf, void *tag);
void arc_buf_add_ref(arc_buf_t *buf, void *tag);
int arc_buf_remove_ref(arc_buf_t *buf, void *tag);
boolean_t arc_buf_remove_ref(arc_buf_t *buf, void *tag);
int arc_buf_size(arc_buf_t *buf);
void arc_release(arc_buf_t *buf, void *tag);
int arc_released(arc_buf_t *buf);

View File

@ -311,20 +311,17 @@ void dbuf_fini(void);
boolean_t dbuf_is_metadata(dmu_buf_impl_t *db);
#define DBUF_IS_METADATA(_db) \
(dbuf_is_metadata(_db))
#define DBUF_GET_BUFC_TYPE(_db) \
(DBUF_IS_METADATA(_db) ? ARC_BUFC_METADATA : ARC_BUFC_DATA)
(dbuf_is_metadata(_db) ? ARC_BUFC_METADATA : ARC_BUFC_DATA)
#define DBUF_IS_CACHEABLE(_db) \
((_db)->db_objset->os_primary_cache == ZFS_CACHE_ALL || \
(DBUF_IS_METADATA(_db) && \
(dbuf_is_metadata(_db) && \
((_db)->db_objset->os_primary_cache == ZFS_CACHE_METADATA)))
#define DBUF_IS_L2CACHEABLE(_db) \
((_db)->db_objset->os_secondary_cache == ZFS_CACHE_ALL || \
(DBUF_IS_METADATA(_db) && \
(dbuf_is_metadata(_db) && \
((_db)->db_objset->os_secondary_cache == ZFS_CACHE_METADATA)))
#ifdef ZFS_DEBUG

View File

@ -43,6 +43,7 @@
#include <sys/param.h>
#include <sys/cred.h>
#include <sys/time.h>
#include <sys/fs/zfs.h>
#ifdef __cplusplus
extern "C" {
@ -216,15 +217,10 @@ typedef enum dmu_object_type {
DMU_OTN_ZAP_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE),
} dmu_object_type_t;
typedef enum dmu_objset_type {
DMU_OST_NONE,
DMU_OST_META,
DMU_OST_ZFS,
DMU_OST_ZVOL,
DMU_OST_OTHER, /* For testing only! */
DMU_OST_ANY, /* Be careful! */
DMU_OST_NUMTYPES
} dmu_objset_type_t;
typedef enum txg_how {
TXG_WAIT = 1,
TXG_NOWAIT,
} txg_how_t;
void byteswap_uint64_array(void *buf, size_t size);
void byteswap_uint32_array(void *buf, size_t size);
@ -264,22 +260,21 @@ void dmu_objset_rele(objset_t *os, void *tag);
void dmu_objset_disown(objset_t *os, void *tag);
int dmu_objset_open_ds(struct dsl_dataset *ds, objset_t **osp);
int dmu_objset_evict_dbufs(objset_t *os);
void dmu_objset_evict_dbufs(objset_t *os);
int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg);
int dmu_objset_clone(const char *name, struct dsl_dataset *clone_origin,
uint64_t flags);
int dmu_objset_destroy(const char *name, boolean_t defer);
int dmu_get_recursive_snaps_nvl(const char *fsname, const char *snapname,
int dmu_get_recursive_snaps_nvl(char *fsname, const char *snapname,
struct nvlist *snaps);
int dmu_snapshots_destroy_nvl(struct nvlist *snaps, boolean_t defer, char *);
int dmu_objset_snapshot(char *fsname, char *snapname, char *tag,
struct nvlist *props, boolean_t recursive, boolean_t temporary, int fd);
int dmu_objset_rename(const char *name, const char *newname,
boolean_t recursive);
int dmu_objset_find(const char *name, int func(const char *, void *), void *arg,
int dmu_objset_clone(const char *name, const char *origin);
int dsl_destroy_snapshots_nvl(struct nvlist *snaps, boolean_t defer,
struct nvlist *errlist);
int dmu_objset_snapshot_one(const char *fsname, const char *snapname);
int dmu_objset_snapshot_tmp(const char *, const char *, int);
int dmu_objset_find(char *name, int func(const char *, void *), void *arg,
int flags);
void dmu_objset_byteswap(void *buf, size_t size);
int dsl_dataset_rename_snapshot(const char *fsname,
const char *oldsnapname, const char *newsnapname, boolean_t recursive);
typedef struct dmu_buf {
uint64_t db_object; /* object that this buffer is part of */
@ -554,7 +549,7 @@ void dmu_tx_hold_spill(dmu_tx_t *tx, uint64_t object);
void dmu_tx_hold_sa(dmu_tx_t *tx, struct sa_handle *hdl, boolean_t may_grow);
void dmu_tx_hold_sa_create(dmu_tx_t *tx, int total_size);
void dmu_tx_abort(dmu_tx_t *tx);
int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how);
int dmu_tx_assign(dmu_tx_t *tx, enum txg_how txg_how);
void dmu_tx_wait(dmu_tx_t *tx);
void dmu_tx_commit(dmu_tx_t *tx);
@ -796,38 +791,8 @@ typedef void (*dmu_traverse_cb_t)(objset_t *os, void *arg, struct blkptr *bp,
uint64_t object, uint64_t offset, int len);
void dmu_traverse_objset(objset_t *os, uint64_t txg_start,
dmu_traverse_cb_t cb, void *arg);
int dmu_send(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin,
int outfd, struct file *fp, offset_t *off);
int dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap,
boolean_t fromorigin, uint64_t *sizep);
typedef struct dmu_recv_cookie {
/*
* This structure is opaque!
*
* If logical and real are different, we are recving the stream
* into the "real" temporary clone, and then switching it with
* the "logical" target.
*/
struct dsl_dataset *drc_logical_ds;
struct dsl_dataset *drc_real_ds;
struct drr_begin *drc_drrb;
char *drc_tosnap;
char *drc_top_ds;
boolean_t drc_newfs;
boolean_t drc_force;
struct avl_tree *drc_guid_to_ds_map;
} dmu_recv_cookie_t;
int dmu_recv_begin(char *tofs, char *tosnap, char *topds, struct drr_begin *,
boolean_t force, objset_t *origin, dmu_recv_cookie_t *);
int dmu_recv_stream(dmu_recv_cookie_t *drc, struct file *fp, offset_t *voffp,
int cleanup_fd, uint64_t *action_handlep);
int dmu_recv_end(dmu_recv_cookie_t *drc);
int dmu_diff(objset_t *tosnap, objset_t *fromsnap, struct file *fp,
offset_t *off);
int dmu_diff(const char *tosnap_name, const char *fromsnap_name,
struct file *fp, offset_t *offp);
/* CRC64 table */
#define ZFS_CRC64_POLY 0xC96C5795D7870F42ULL /* ECMA-182, reflected form */

View File

@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
/* Portions Copyright 2010 Robert Milkowski */
@ -42,6 +43,7 @@ extern "C" {
extern krwlock_t os_lock;
struct dsl_pool;
struct dsl_dataset;
struct dmu_tx;
@ -113,8 +115,6 @@ struct objset {
/* stuff we store for the user */
kmutex_t os_user_ptr_lock;
void *os_user_ptr;
/* SA layout/attribute registration */
sa_os_t *os_sa;
};
@ -137,25 +137,16 @@ void dmu_objset_rele(objset_t *os, void *tag);
void dmu_objset_disown(objset_t *os, void *tag);
int dmu_objset_from_ds(struct dsl_dataset *ds, objset_t **osp);
int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg);
int dmu_objset_clone(const char *name, struct dsl_dataset *clone_origin,
uint64_t flags);
int dmu_objset_destroy(const char *name, boolean_t defer);
int dmu_objset_snapshot(char *fsname, char *snapname, char *tag,
struct nvlist *props, boolean_t recursive, boolean_t temporary, int fd);
void dmu_objset_stats(objset_t *os, nvlist_t *nv);
void dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat);
void dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp,
uint64_t *usedobjsp, uint64_t *availobjsp);
uint64_t dmu_objset_fsid_guid(objset_t *os);
int dmu_objset_find(const char *name, int func(const char *, void *), void *arg,
int flags);
int dmu_objset_find_spa(spa_t *spa, const char *name,
int func(spa_t *, uint64_t, const char *, void *), void *arg, int flags);
int dmu_objset_find_dp(struct dsl_pool *dp, uint64_t ddobj,
int func(struct dsl_pool *, struct dsl_dataset *, void *),
void *arg, int flags);
int dmu_objset_prefetch(const char *name, void *arg);
void dmu_objset_byteswap(void *buf, size_t size);
int dmu_objset_evict_dbufs(objset_t *os);
void dmu_objset_evict_dbufs(objset_t *os);
timestruc_t dmu_objset_snap_cmtime(objset_t *os);
/* called from dsl */
@ -171,6 +162,7 @@ void dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx);
boolean_t dmu_objset_userused_enabled(objset_t *os);
int dmu_objset_userspace_upgrade(objset_t *os);
boolean_t dmu_objset_userspace_present(objset_t *os);
int dmu_fsname(const char *snapname, char *buf);
void dmu_objset_init(void);
void dmu_objset_fini(void);

View File

@ -0,0 +1,77 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
#ifndef _DMU_SEND_H
#define _DMU_SEND_H
#include <sys/spa.h>
struct vnode;
struct dsl_dataset;
struct drr_begin;
struct avl_tree;
int dmu_send(const char *tosnap, const char *fromsnap, int outfd,
#ifdef illumos
struct vnode *vp, offset_t *off);
#else
struct file *fp, offset_t *off);
#endif
int dmu_send_estimate(struct dsl_dataset *ds, struct dsl_dataset *fromds,
uint64_t *sizep);
int dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
#ifdef illumos
int outfd, struct vnode *vp, offset_t *off);
#else
int outfd, struct file *fp, offset_t *off);
#endif
typedef struct dmu_recv_cookie {
struct dsl_dataset *drc_ds;
struct drr_begin *drc_drrb;
const char *drc_tofs;
const char *drc_tosnap;
boolean_t drc_newfs;
boolean_t drc_byteswap;
boolean_t drc_force;
struct avl_tree *drc_guid_to_ds_map;
zio_cksum_t drc_cksum;
uint64_t drc_newsnapobj;
} dmu_recv_cookie_t;
int dmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *drrb,
boolean_t force, char *origin, dmu_recv_cookie_t *drc);
#ifdef illumos
int dmu_recv_stream(dmu_recv_cookie_t *drc, struct vnode *vp, offset_t *voffp,
#else
int dmu_recv_stream(dmu_recv_cookie_t *drc, struct file *fp, offset_t *voffp,
#endif
int cleanup_fd, uint64_t *action_handlep);
int dmu_recv_end(dmu_recv_cookie_t *drc);
#endif /* _DMU_SEND_H */

View File

@ -22,6 +22,9 @@
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifndef _SYS_DMU_TX_H
#define _SYS_DMU_TX_H
@ -107,10 +110,11 @@ typedef struct dmu_tx_callback {
* These routines are defined in dmu.h, and are called by the user.
*/
dmu_tx_t *dmu_tx_create(objset_t *dd);
int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how);
int dmu_tx_assign(dmu_tx_t *tx, txg_how_t txg_how);
void dmu_tx_commit(dmu_tx_t *tx);
void dmu_tx_abort(dmu_tx_t *tx);
uint64_t dmu_tx_get_txg(dmu_tx_t *tx);
struct dsl_pool *dmu_tx_pool(dmu_tx_t *tx);
void dmu_tx_wait(dmu_tx_t *tx);
void dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *dcb_func,

View File

@ -20,8 +20,6 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>.
* All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
@ -37,6 +35,7 @@
#include <sys/dsl_synctask.h>
#include <sys/zfs_context.h>
#include <sys/dsl_deadlist.h>
#include <sys/refcount.h>
#ifdef __cplusplus
extern "C" {
@ -50,10 +49,8 @@ struct dsl_pool;
#define DS_IS_INCONSISTENT(ds) \
((ds)->ds_phys->ds_flags & DS_FLAG_INCONSISTENT)
/*
* NB: nopromote can not yet be set, but we want support for it in this
* on-disk version, so that we don't need to upgrade for it later. It
* will be needed when we implement 'zfs split' (where the split off
* clone should not be promoted).
* Note: nopromote can not yet be set, but we want support for it in this
* on-disk version, so that we don't need to upgrade for it later.
*/
#define DS_FLAG_NOPROMOTE (1ULL<<1)
@ -78,6 +75,8 @@ struct dsl_pool;
*/
#define DS_FLAG_CI_DATASET (1ULL<<16)
#define DS_CREATE_FLAG_NODIRTY (1ULL<<24)
typedef struct dsl_dataset_phys {
uint64_t ds_dir_obj; /* DMU_OT_DSL_DIR */
uint64_t ds_prev_snap_obj; /* DMU_OT_DSL_DATASET */
@ -127,9 +126,6 @@ typedef struct dsl_dataset {
dsl_deadlist_t ds_deadlist;
bplist_t ds_pending_deadlist;
/* to protect against multiple concurrent incremental recv */
kmutex_t ds_recvlock;
/* protected by lock on pool's dp_dirty_datasets list */
txg_node_t ds_dirty_link;
list_node_t ds_synced_link;
@ -141,13 +137,15 @@ typedef struct dsl_dataset {
kmutex_t ds_lock;
objset_t *ds_objset;
uint64_t ds_userrefs;
void *ds_owner;
/*
* ds_owner is protected by the ds_rwlock and the ds_lock
* Long holds prevent the ds from being destroyed; they allow the
* ds to remain held even after dropping the dp_config_rwlock.
* Owning counts as a long hold. See the comments above
* dsl_pool_hold() for details.
*/
krwlock_t ds_rwlock;
kcondvar_t ds_exclusive_cv;
void *ds_owner;
refcount_t ds_longholds;
/* no locking; only for making guesses */
uint64_t ds_trysnap_txg;
@ -165,83 +163,44 @@ typedef struct dsl_dataset {
char ds_snapname[MAXNAMELEN];
} dsl_dataset_t;
struct dsl_ds_destroyarg {
dsl_dataset_t *ds; /* ds to destroy */
dsl_dataset_t *rm_origin; /* also remove our origin? */
boolean_t is_origin_rm; /* set if removing origin snap */
boolean_t defer; /* destroy -d requested? */
boolean_t releasing; /* destroying due to release? */
boolean_t need_prep; /* do we need to retry due to EBUSY? */
};
/*
* The max length of a temporary tag prefix is the number of hex digits
* required to express UINT64_MAX plus one for the hyphen.
*/
#define MAX_TAG_PREFIX_LEN 17
struct dsl_ds_holdarg {
dsl_sync_task_group_t *dstg;
char *htag;
char *snapname;
boolean_t recursive;
boolean_t gotone;
boolean_t temphold;
char failed[MAXPATHLEN];
};
/*
* Flags for dsl_dataset_rename().
*/
#define ZFS_RENAME_RECURSIVE 0x01
#define ZFS_RENAME_ALLOW_MOUNTED 0x02
#define dsl_dataset_is_snapshot(ds) \
((ds)->ds_phys->ds_num_children != 0)
#define DS_UNIQUE_IS_ACCURATE(ds) \
(((ds)->ds_phys->ds_flags & DS_FLAG_UNIQUE_ACCURATE) != 0)
int dsl_dataset_hold(const char *name, void *tag, dsl_dataset_t **dsp);
int dsl_dataset_hold_obj(struct dsl_pool *dp, uint64_t dsobj,
void *tag, dsl_dataset_t **);
int dsl_dataset_own(const char *name, boolean_t inconsistentok,
int dsl_dataset_hold(struct dsl_pool *dp, const char *name, void *tag,
dsl_dataset_t **dsp);
int dsl_dataset_hold_obj(struct dsl_pool *dp, uint64_t dsobj, void *tag,
dsl_dataset_t **);
void dsl_dataset_rele(dsl_dataset_t *ds, void *tag);
int dsl_dataset_own(struct dsl_pool *dp, const char *name,
void *tag, dsl_dataset_t **dsp);
int dsl_dataset_own_obj(struct dsl_pool *dp, uint64_t dsobj,
boolean_t inconsistentok, void *tag, dsl_dataset_t **dsp);
void dsl_dataset_name(dsl_dataset_t *ds, char *name);
void dsl_dataset_rele(dsl_dataset_t *ds, void *tag);
void *tag, dsl_dataset_t **dsp);
void dsl_dataset_disown(dsl_dataset_t *ds, void *tag);
void dsl_dataset_drop_ref(dsl_dataset_t *ds, void *tag);
boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok,
void *tag);
void dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *tag);
void dsl_dataset_name(dsl_dataset_t *ds, char *name);
boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, void *tag);
void dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag,
minor_t minor);
uint64_t dsl_dataset_create_sync(dsl_dir_t *pds, const char *lastname,
dsl_dataset_t *origin, uint64_t flags, cred_t *, dmu_tx_t *);
uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
uint64_t flags, dmu_tx_t *tx);
int dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer);
int dsl_snapshots_destroy(char *fsname, char *snapname, boolean_t defer);
dsl_checkfunc_t dsl_dataset_destroy_check;
dsl_syncfunc_t dsl_dataset_destroy_sync;
dsl_checkfunc_t dsl_dataset_snapshot_check;
dsl_syncfunc_t dsl_dataset_snapshot_sync;
dsl_syncfunc_t dsl_dataset_user_hold_sync;
int dsl_dataset_rename(char *name, const char *newname, int flags);
int dsl_dataset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors);
int dsl_dataset_promote(const char *name, char *conflsnap);
int dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head,
boolean_t force);
int dsl_dataset_user_hold(char *dsname, char *snapname, char *htag,
boolean_t recursive, boolean_t temphold, int cleanup_fd);
int dsl_dataset_user_hold_for_send(dsl_dataset_t *ds, char *htag,
boolean_t temphold);
int dsl_dataset_user_release(char *dsname, char *snapname, char *htag,
boolean_t recursive);
int dsl_dataset_user_release_tmp(struct dsl_pool *dp, uint64_t dsobj,
char *htag, boolean_t retry);
int dsl_dataset_get_holds(const char *dsname, nvlist_t **nvp);
int dsl_dataset_rename_snapshot(const char *fsname,
const char *oldsnapname, const char *newsnapname, boolean_t recursive);
int dsl_dataset_snapshot_tmp(const char *fsname, const char *snapname,
minor_t cleanup_minor, const char *htag);
blkptr_t *dsl_dataset_get_blkptr(dsl_dataset_t *ds);
void dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx);
@ -278,13 +237,35 @@ int dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf);
int dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota,
uint64_t asize, uint64_t inflight, uint64_t *used,
uint64_t *ref_rsrv);
int dsl_dataset_set_quota(const char *dsname, zprop_source_t source,
int dsl_dataset_set_refquota(const char *dsname, zprop_source_t source,
uint64_t quota);
dsl_syncfunc_t dsl_dataset_set_quota_sync;
int dsl_dataset_set_reservation(const char *dsname, zprop_source_t source,
int dsl_dataset_set_refreservation(const char *dsname, zprop_source_t source,
uint64_t reservation);
int dsl_destroy_inconsistent(const char *dsname, void *arg);
boolean_t dsl_dataset_is_before(dsl_dataset_t *later, dsl_dataset_t *earlier);
void dsl_dataset_long_hold(dsl_dataset_t *ds, void *tag);
void dsl_dataset_long_rele(dsl_dataset_t *ds, void *tag);
boolean_t dsl_dataset_long_held(dsl_dataset_t *ds);
int dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone,
dsl_dataset_t *origin_head, boolean_t force);
void dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone,
dsl_dataset_t *origin_head, dmu_tx_t *tx);
int dsl_dataset_snapshot_check_impl(dsl_dataset_t *ds, const char *snapname,
dmu_tx_t *tx);
void dsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname,
dmu_tx_t *tx);
void dsl_dataset_remove_from_next_clones(dsl_dataset_t *ds, uint64_t obj,
dmu_tx_t *tx);
void dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds);
int dsl_dataset_get_snapname(dsl_dataset_t *ds);
int dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name,
uint64_t *value);
int dsl_dataset_snap_remove(dsl_dataset_t *ds, const char *name, dmu_tx_t *tx);
void dsl_dataset_set_refreservation_sync_impl(dsl_dataset_t *ds,
zprop_source_t source, uint64_t value, dmu_tx_t *tx);
int dsl_dataset_rollback(const char *fsname);
#ifdef ZFS_DEBUG
#define dprintf_ds(ds, fmt, ...) do { \

View File

@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 by Delphix. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifndef _SYS_DSL_DELEG_H
@ -65,8 +65,7 @@ extern "C" {
int dsl_deleg_get(const char *ddname, nvlist_t **nvp);
int dsl_deleg_set(const char *ddname, nvlist_t *nvp, boolean_t unset);
int dsl_deleg_access(const char *ddname, const char *perm, cred_t *cr);
int dsl_deleg_access_impl(struct dsl_dataset *ds, boolean_t descendent,
const char *perm, cred_t *cr);
int dsl_deleg_access_impl(struct dsl_dataset *ds, const char *perm, cred_t *cr);
void dsl_deleg_set_create_perms(dsl_dir_t *dd, dmu_tx_t *tx, cred_t *cr);
int dsl_deleg_can_allow(char *ddname, nvlist_t *nvp, cred_t *cr);
int dsl_deleg_can_unallow(char *ddname, nvlist_t *nvp, cred_t *cr);

View File

@ -0,0 +1,52 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
#ifndef _SYS_DSL_DESTROY_H
#define _SYS_DSL_DESTROY_H
#ifdef __cplusplus
extern "C" {
#endif
struct nvlist;
struct dsl_dataset;
struct dmu_tx;
int dsl_destroy_snapshots_nvl(struct nvlist *snaps, boolean_t defer,
struct nvlist *errlist);
int dsl_destroy_snapshot(const char *name, boolean_t defer);
int dsl_destroy_head(const char *name);
int dsl_destroy_head_check_impl(struct dsl_dataset *ds, int expected_holds);
void dsl_destroy_head_sync_impl(struct dsl_dataset *ds, struct dmu_tx *tx);
int dsl_destroy_inconsistent(const char *dsname, void *arg);
void dsl_destroy_snapshot_sync_impl(struct dsl_dataset *ds,
boolean_t defer, struct dmu_tx *tx);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_DSL_DESTROY_H */

View File

@ -20,8 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>.
* All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifndef _SYS_DSL_DIR_H
@ -103,18 +102,15 @@ struct dsl_dir {
char dd_myname[MAXNAMELEN];
};
void dsl_dir_close(dsl_dir_t *dd, void *tag);
int dsl_dir_open(const char *name, void *tag, dsl_dir_t **, const char **tail);
int dsl_dir_open_spa(spa_t *spa, const char *name, void *tag, dsl_dir_t **,
const char **tailp);
int dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
void dsl_dir_rele(dsl_dir_t *dd, void *tag);
int dsl_dir_hold(dsl_pool_t *dp, const char *name, void *tag,
dsl_dir_t **, const char **tail);
int dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
const char *tail, void *tag, dsl_dir_t **);
void dsl_dir_name(dsl_dir_t *dd, char *buf);
int dsl_dir_namelen(dsl_dir_t *dd);
uint64_t dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds,
const char *name, dmu_tx_t *tx);
dsl_checkfunc_t dsl_dir_destroy_check;
dsl_syncfunc_t dsl_dir_destroy_sync;
void dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv);
uint64_t dsl_dir_space_available(dsl_dir_t *dd,
dsl_dir_t *ancestor, int64_t delta, int ondiskonly);
@ -133,14 +129,15 @@ int dsl_dir_set_quota(const char *ddname, zprop_source_t source,
uint64_t quota);
int dsl_dir_set_reservation(const char *ddname, zprop_source_t source,
uint64_t reservation);
int dsl_dir_rename(dsl_dir_t *dd, const char *newname, int flags);
int dsl_dir_rename(const char *oldname, const char *newname);
int dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, uint64_t space);
int dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx);
boolean_t dsl_dir_is_clone(dsl_dir_t *dd);
void dsl_dir_new_refreservation(dsl_dir_t *dd, struct dsl_dataset *ds,
uint64_t reservation, cred_t *cr, dmu_tx_t *tx);
void dsl_dir_snap_cmtime_update(dsl_dir_t *dd);
timestruc_t dsl_dir_snap_cmtime(dsl_dir_t *dd);
void dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, uint64_t value,
dmu_tx_t *tx);
/* internal reserved dir name */
#define MOS_DIR_NAME "$MOS"

View File

@ -36,6 +36,7 @@
#include <sys/arc.h>
#include <sys/bpobj.h>
#include <sys/bptree.h>
#include <sys/rrwlock.h>
#ifdef __cplusplus
extern "C" {
@ -113,7 +114,7 @@ typedef struct dsl_pool {
* syncing context does not need to ever have it for read, since
* nobody else could possibly have it for write.
*/
krwlock_t dp_config_rwlock;
rrwlock_t dp_config_rwlock;
zfs_all_blkstats_t *dp_blkstats;
} dsl_pool_t;
@ -139,15 +140,20 @@ void dsl_pool_upgrade_clones(dsl_pool_t *dp, dmu_tx_t *tx);
void dsl_pool_upgrade_dir_clones(dsl_pool_t *dp, dmu_tx_t *tx);
void dsl_pool_mos_diduse_space(dsl_pool_t *dp,
int64_t used, int64_t comp, int64_t uncomp);
void dsl_pool_config_enter(dsl_pool_t *dp, void *tag);
void dsl_pool_config_exit(dsl_pool_t *dp, void *tag);
boolean_t dsl_pool_config_held(dsl_pool_t *dp);
taskq_t *dsl_pool_vnrele_taskq(dsl_pool_t *dp);
extern int dsl_pool_user_hold(dsl_pool_t *dp, uint64_t dsobj,
const char *tag, uint64_t *now, dmu_tx_t *tx);
extern int dsl_pool_user_release(dsl_pool_t *dp, uint64_t dsobj,
int dsl_pool_user_hold(dsl_pool_t *dp, uint64_t dsobj,
const char *tag, uint64_t now, dmu_tx_t *tx);
int dsl_pool_user_release(dsl_pool_t *dp, uint64_t dsobj,
const char *tag, dmu_tx_t *tx);
extern void dsl_pool_clean_tmp_userrefs(dsl_pool_t *dp);
void dsl_pool_clean_tmp_userrefs(dsl_pool_t *dp);
int dsl_pool_open_special_dir(dsl_pool_t *dp, const char *name, dsl_dir_t **);
int dsl_pool_hold(const char *name, void *tag, dsl_pool_t **dp);
void dsl_pool_rele(dsl_pool_t *dp, void *tag);
#ifdef __cplusplus
}

View File

@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifndef _SYS_DSL_PROP_H
@ -53,60 +54,47 @@ typedef struct dsl_props_arg {
zprop_source_t pa_source;
} dsl_props_arg_t;
typedef struct dsl_prop_set_arg {
const char *psa_name;
zprop_source_t psa_source;
int psa_intsz;
int psa_numints;
const void *psa_value;
/*
* Used to handle the special requirements of the quota and reservation
* properties.
*/
uint64_t psa_effective_value;
} dsl_prop_setarg_t;
int dsl_prop_register(struct dsl_dataset *ds, const char *propname,
dsl_prop_changed_cb_t *callback, void *cbarg);
int dsl_prop_unregister(struct dsl_dataset *ds, const char *propname,
dsl_prop_changed_cb_t *callback, void *cbarg);
int dsl_prop_numcb(struct dsl_dataset *ds);
void dsl_prop_notify_all(struct dsl_dir *dd);
boolean_t dsl_prop_hascb(struct dsl_dataset *ds);
int dsl_prop_get(const char *ddname, const char *propname,
int intsz, int numints, void *buf, char *setpoint);
int dsl_prop_get_integer(const char *ddname, const char *propname,
uint64_t *valuep, char *setpoint);
int dsl_prop_get_all(objset_t *os, nvlist_t **nvp);
int dsl_prop_get_received(objset_t *os, nvlist_t **nvp);
int dsl_prop_get_received(const char *dsname, nvlist_t **nvp);
int dsl_prop_get_ds(struct dsl_dataset *ds, const char *propname,
int intsz, int numints, void *buf, char *setpoint);
int dsl_prop_get_int_ds(struct dsl_dataset *ds, const char *propname,
uint64_t *valuep);
int dsl_prop_get_dd(struct dsl_dir *dd, const char *propname,
int intsz, int numints, void *buf, char *setpoint,
boolean_t snapshot);
dsl_syncfunc_t dsl_props_set_sync;
int dsl_prop_set(const char *ddname, const char *propname,
zprop_source_t source, int intsz, int numints, const void *buf);
int dsl_props_set(const char *dsname, zprop_source_t source, nvlist_t *nvl);
void dsl_dir_prop_set_uint64_sync(dsl_dir_t *dd, const char *name, uint64_t val,
void dsl_props_set_sync_impl(struct dsl_dataset *ds, zprop_source_t source,
nvlist_t *props, dmu_tx_t *tx);
void dsl_prop_set_sync_impl(struct dsl_dataset *ds, const char *propname,
zprop_source_t source, int intsz, int numints, const void *value,
dmu_tx_t *tx);
int dsl_props_set(const char *dsname, zprop_source_t source, nvlist_t *nvl);
int dsl_prop_set_int(const char *dsname, const char *propname,
zprop_source_t source, uint64_t value);
int dsl_prop_set_string(const char *dsname, const char *propname,
zprop_source_t source, const char *value);
int dsl_prop_inherit(const char *dsname, const char *propname,
zprop_source_t source);
void dsl_prop_setarg_init_uint64(dsl_prop_setarg_t *psa, const char *propname,
zprop_source_t source, uint64_t *value);
int dsl_prop_predict_sync(dsl_dir_t *dd, dsl_prop_setarg_t *psa);
#ifdef ZFS_DEBUG
void dsl_prop_check_prediction(dsl_dir_t *dd, dsl_prop_setarg_t *psa);
#define DSL_PROP_CHECK_PREDICTION(dd, psa) \
dsl_prop_check_prediction((dd), (psa))
#else
#define DSL_PROP_CHECK_PREDICTION(dd, psa) /* nothing */
#endif
int dsl_prop_predict(dsl_dir_t *dd, const char *propname,
zprop_source_t source, uint64_t value, uint64_t *newvalp);
/* flag first receive on or after SPA_VERSION_RECVD_PROPS */
boolean_t dsl_prop_get_hasrecvd(objset_t *os);
void dsl_prop_set_hasrecvd(objset_t *os);
void dsl_prop_unset_hasrecvd(objset_t *os);
boolean_t dsl_prop_get_hasrecvd(const char *dsname);
int dsl_prop_set_hasrecvd(const char *dsname);
void dsl_prop_unset_hasrecvd(const char *dsname);
void dsl_prop_nvlist_add_uint64(nvlist_t *nv, zfs_prop_t prop, uint64_t value);
void dsl_prop_nvlist_add_string(nvlist_t *nv,

View File

@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifndef _SYS_DSL_SYNCTASK_H
@ -34,43 +35,26 @@ extern "C" {
struct dsl_pool;
typedef int (dsl_checkfunc_t)(void *, void *, dmu_tx_t *);
typedef void (dsl_syncfunc_t)(void *, void *, dmu_tx_t *);
typedef int (dsl_checkfunc_t)(void *, dmu_tx_t *);
typedef void (dsl_syncfunc_t)(void *, dmu_tx_t *);
typedef struct dsl_sync_task {
list_node_t dst_node;
txg_node_t dst_node;
struct dsl_pool *dst_pool;
uint64_t dst_txg;
int dst_space;
dsl_checkfunc_t *dst_checkfunc;
dsl_syncfunc_t *dst_syncfunc;
void *dst_arg1;
void *dst_arg2;
int dst_err;
void *dst_arg;
int dst_error;
boolean_t dst_nowaiter;
} dsl_sync_task_t;
typedef struct dsl_sync_task_group {
txg_node_t dstg_node;
list_t dstg_tasks;
struct dsl_pool *dstg_pool;
uint64_t dstg_txg;
int dstg_err;
int dstg_space;
boolean_t dstg_nowaiter;
} dsl_sync_task_group_t;
dsl_sync_task_group_t *dsl_sync_task_group_create(struct dsl_pool *dp);
void dsl_sync_task_create(dsl_sync_task_group_t *dstg,
dsl_checkfunc_t *, dsl_syncfunc_t *,
void *arg1, void *arg2, int blocks_modified);
int dsl_sync_task_group_wait(dsl_sync_task_group_t *dstg);
void dsl_sync_task_group_nowait(dsl_sync_task_group_t *dstg, dmu_tx_t *tx);
void dsl_sync_task_group_destroy(dsl_sync_task_group_t *dstg);
void dsl_sync_task_group_sync(dsl_sync_task_group_t *dstg, dmu_tx_t *tx);
int dsl_sync_task_do(struct dsl_pool *dp,
dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
void *arg1, void *arg2, int blocks_modified);
void dsl_sync_task_do_nowait(struct dsl_pool *dp,
dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
void *arg1, void *arg2, int blocks_modified, dmu_tx_t *tx);
void dsl_sync_task_sync(dsl_sync_task_t *dst, dmu_tx_t *tx);
int dsl_sync_task(const char *pool, dsl_checkfunc_t *checkfunc,
dsl_syncfunc_t *syncfunc, void *arg, int blocks_modified);
void dsl_sync_task_nowait(struct dsl_pool *dp, dsl_syncfunc_t *syncfunc,
void *arg, int blocks_modified, dmu_tx_t *tx);
#ifdef __cplusplus
}

View File

@ -0,0 +1,57 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
#ifndef _SYS_DSL_USERHOLD_H
#define _SYS_DSL_USERHOLD_H
#include <sys/nvpair.h>
#include <sys/types.h>
#ifdef __cplusplus
extern "C" {
#endif
struct dsl_pool;
struct dsl_dataset;
struct dmu_tx;
int dsl_dataset_user_hold(nvlist_t *holds, minor_t cleanup_minor,
nvlist_t *errlist);
int dsl_dataset_user_release(nvlist_t *holds, nvlist_t *errlist);
int dsl_dataset_get_holds(const char *dsname, nvlist_t *nvl);
void dsl_dataset_user_release_tmp(struct dsl_pool *dp, uint64_t dsobj,
const char *htag);
int dsl_dataset_user_hold_check_one(struct dsl_dataset *ds, const char *htag,
boolean_t temphold, struct dmu_tx *tx);
void dsl_dataset_user_hold_sync_one(struct dsl_dataset *ds, const char *htag,
minor_t minor, uint64_t now, struct dmu_tx *tx);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_DSL_USERHOLD_H */

View File

@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 by Delphix. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifndef _SYS_METASLAB_H
@ -56,6 +56,7 @@ extern int metaslab_alloc(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
extern void metaslab_free(spa_t *spa, const blkptr_t *bp, uint64_t txg,
boolean_t now);
extern int metaslab_claim(spa_t *spa, const blkptr_t *bp, uint64_t txg);
extern void metaslab_check_free(spa_t *spa, const blkptr_t *bp);
extern metaslab_class_t *metaslab_class_create(spa_t *spa,
space_map_ops_t *ops);

View File

@ -53,15 +53,17 @@ typedef struct reference {
typedef struct refcount {
kmutex_t rc_mtx;
boolean_t rc_tracked;
list_t rc_list;
list_t rc_removed;
uint64_t rc_count;
uint64_t rc_removed_count;
} refcount_t;
/* Note: refcount_t must be initialized with refcount_create() */
/* Note: refcount_t must be initialized with refcount_create[_untracked]() */
void refcount_create(refcount_t *rc);
void refcount_create_untracked(refcount_t *rc);
void refcount_destroy(refcount_t *rc);
void refcount_destroy_many(refcount_t *rc, uint64_t number);
int refcount_is_zero(refcount_t *rc);
@ -82,6 +84,7 @@ typedef struct refcount {
} refcount_t;
#define refcount_create(rc) ((rc)->rc_count = 0)
#define refcount_create_untracked(rc) ((rc)->rc_count = 0)
#define refcount_destroy(rc) ((rc)->rc_count = 0)
#define refcount_destroy_many(rc, number) ((rc)->rc_count = 0)
#define refcount_is_zero(rc) ((rc)->rc_count == 0)

View File

@ -22,12 +22,13 @@
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifndef _SYS_RR_RW_LOCK_H
#define _SYS_RR_RW_LOCK_H
#pragma ident "%Z%%M% %I% %E% SMI"
#ifdef __cplusplus
extern "C" {
#endif
@ -56,6 +57,7 @@ typedef struct rrwlock {
refcount_t rr_anon_rcount;
refcount_t rr_linked_rcount;
boolean_t rr_writer_wanted;
boolean_t rr_track_all;
} rrwlock_t;
/*
@ -63,14 +65,19 @@ typedef struct rrwlock {
* 'tag' must be the same in a rrw_enter() as in its
* corresponding rrw_exit().
*/
void rrw_init(rrwlock_t *rrl);
void rrw_init(rrwlock_t *rrl, boolean_t track_all);
void rrw_destroy(rrwlock_t *rrl);
void rrw_enter(rrwlock_t *rrl, krw_t rw, void *tag);
void rrw_enter_read(rrwlock_t *rrl, void *tag);
void rrw_enter_write(rrwlock_t *rrl);
void rrw_exit(rrwlock_t *rrl, void *tag);
boolean_t rrw_held(rrwlock_t *rrl, krw_t rw);
void rrw_tsd_destroy(void *arg);
#define RRW_READ_HELD(x) rrw_held(x, RW_READER)
#define RRW_WRITE_HELD(x) rrw_held(x, RW_WRITER)
#define RRW_LOCK_HELD(x) \
(rrw_held(x, RW_WRITER) || rrw_held(x, RW_READER))
#ifdef __cplusplus
}

View File

@ -52,6 +52,7 @@ typedef struct spa_aux_vdev spa_aux_vdev_t;
typedef struct ddt ddt_t;
typedef struct ddt_entry ddt_entry_t;
struct dsl_pool;
struct dsl_dataset;
/*
* General-purpose 32-bit and 64-bit bitfield encodings.
@ -418,7 +419,7 @@ extern int spa_open_rewind(const char *pool, spa_t **, void *tag,
extern int spa_get_stats(const char *pool, nvlist_t **config, char *altroot,
size_t buflen);
extern int spa_create(const char *pool, nvlist_t *config, nvlist_t *props,
const char *history_str, nvlist_t *zplprops);
nvlist_t *zplprops);
#if defined(sun)
extern int spa_import_rootpool(char *devpath, char *devid);
#else
@ -630,31 +631,20 @@ extern int spa_mode(spa_t *spa);
extern uint64_t zfs_strtonum(const char *str, char **nptr);
#define strtonum(str, nptr) zfs_strtonum((str), (nptr))
/* history logging */
typedef enum history_log_type {
LOG_CMD_POOL_CREATE,
LOG_CMD_NORMAL,
LOG_INTERNAL
} history_log_type_t;
typedef struct history_arg {
char *ha_history_str;
history_log_type_t ha_log_type;
history_internal_events_t ha_event;
char *ha_zone;
uid_t ha_uid;
} history_arg_t;
extern char *spa_his_ievent_table[];
extern void spa_history_create_obj(spa_t *spa, dmu_tx_t *tx);
extern int spa_history_get(spa_t *spa, uint64_t *offset, uint64_t *len_read,
char *his_buf);
extern int spa_history_log(spa_t *spa, const char *his_buf,
history_log_type_t what);
extern void spa_history_log_internal(history_internal_events_t event,
spa_t *spa, dmu_tx_t *tx, const char *fmt, ...);
extern void spa_history_log_version(spa_t *spa, history_internal_events_t evt);
extern int spa_history_log(spa_t *spa, const char *his_buf);
extern int spa_history_log_nvl(spa_t *spa, nvlist_t *nvl);
extern void spa_history_log_version(spa_t *spa, const char *operation);
extern void spa_history_log_internal(spa_t *spa, const char *operation,
dmu_tx_t *tx, const char *fmt, ...);
extern void spa_history_log_internal_ds(struct dsl_dataset *ds, const char *op,
dmu_tx_t *tx, const char *fmt, ...);
extern void spa_history_log_internal_dd(dsl_dir_t *dd, const char *operation,
dmu_tx_t *tx, const char *fmt, ...);
/* error handling */
struct zbookmark;

View File

@ -149,6 +149,8 @@ extern void space_map_add(space_map_t *sm, uint64_t start, uint64_t size);
extern void space_map_remove(space_map_t *sm, uint64_t start, uint64_t size);
extern boolean_t space_map_contains(space_map_t *sm,
uint64_t start, uint64_t size);
extern space_seg_t *space_map_find(space_map_t *sm, uint64_t start,
uint64_t size, avl_index_t *wherep);
extern void space_map_swap(space_map_t **msrc, space_map_t **mdest);
extern void space_map_vacate(space_map_t *sm,
space_map_func_t *func, space_map_t *mdest);

View File

@ -45,9 +45,6 @@ extern "C" {
/* Number of txgs worth of frees we defer adding to in-core spacemaps */
#define TXG_DEFER_SIZE 2
#define TXG_WAIT 1ULL
#define TXG_NOWAIT 2ULL
typedef struct tx_cpu tx_cpu_t;
typedef struct txg_handle {
@ -119,11 +116,11 @@ extern boolean_t txg_sync_waiting(struct dsl_pool *dp);
extern void txg_list_create(txg_list_t *tl, size_t offset);
extern void txg_list_destroy(txg_list_t *tl);
extern boolean_t txg_list_empty(txg_list_t *tl, uint64_t txg);
extern int txg_list_add(txg_list_t *tl, void *p, uint64_t txg);
extern int txg_list_add_tail(txg_list_t *tl, void *p, uint64_t txg);
extern boolean_t txg_list_add(txg_list_t *tl, void *p, uint64_t txg);
extern boolean_t txg_list_add_tail(txg_list_t *tl, void *p, uint64_t txg);
extern void *txg_list_remove(txg_list_t *tl, uint64_t txg);
extern void *txg_list_remove_this(txg_list_t *tl, void *p, uint64_t txg);
extern int txg_list_member(txg_list_t *tl, void *p, uint64_t txg);
extern boolean_t txg_list_member(txg_list_t *tl, void *p, uint64_t txg);
extern void *txg_list_head(txg_list_t *tl, uint64_t txg);
extern void *txg_list_next(txg_list_t *tl, void *p, uint64_t txg);

View File

@ -26,7 +26,6 @@
#ifndef _SYS_ZFEATURE_H
#define _SYS_ZFEATURE_H
#include <sys/dmu.h>
#include <sys/nvpair.h>
#include "zfeature_common.h"
@ -34,14 +33,18 @@
extern "C" {
#endif
extern boolean_t feature_is_supported(objset_t *os, uint64_t obj,
struct spa;
struct dmu_tx;
struct objset;
extern boolean_t feature_is_supported(struct objset *os, uint64_t obj,
uint64_t desc_obj, nvlist_t *unsup_feat, nvlist_t *enabled_feat);
struct spa;
extern void spa_feature_create_zap_objects(struct spa *, dmu_tx_t *);
extern void spa_feature_enable(struct spa *, zfeature_info_t *, dmu_tx_t *);
extern void spa_feature_incr(struct spa *, zfeature_info_t *, dmu_tx_t *);
extern void spa_feature_decr(struct spa *, zfeature_info_t *, dmu_tx_t *);
extern void spa_feature_create_zap_objects(struct spa *, struct dmu_tx *);
extern void spa_feature_enable(struct spa *, zfeature_info_t *,
struct dmu_tx *);
extern void spa_feature_incr(struct spa *, zfeature_info_t *, struct dmu_tx *);
extern void spa_feature_decr(struct spa *, zfeature_info_t *, struct dmu_tx *);
extern boolean_t spa_feature_is_enabled(struct spa *, zfeature_info_t *);
extern boolean_t spa_feature_is_active(struct spa *, zfeature_info_t *);

View File

@ -50,11 +50,13 @@ extern "C" {
extern int zfs_flags;
#define ZFS_DEBUG_DPRINTF 0x0001
#define ZFS_DEBUG_DBUF_VERIFY 0x0002
#define ZFS_DEBUG_DNODE_VERIFY 0x0004
#define ZFS_DEBUG_SNAPNAMES 0x0008
#define ZFS_DEBUG_MODIFY 0x0010
#define ZFS_DEBUG_DPRINTF (1<<0)
#define ZFS_DEBUG_DBUF_VERIFY (1<<1)
#define ZFS_DEBUG_DNODE_VERIFY (1<<2)
#define ZFS_DEBUG_SNAPNAMES (1<<3)
#define ZFS_DEBUG_MODIFY (1<<4)
#define ZFS_DEBUG_SPA (1<<5)
#define ZFS_DEBUG_ZIO_FREE (1<<6)
#ifdef ZFS_DEBUG
extern void __dprintf(const char *file, const char *func,

View File

@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifndef _SYS_ZFS_IOCTL_H
@ -40,6 +41,15 @@
extern "C" {
#endif
/*
* The structures in this file are passed between userland and the
* kernel. Userland may be running a 32-bit process, while the kernel
* is 64-bit. Therefore, these structures need to compile the same in
* 32-bit and 64-bit. This means not using type "long", and adding
* explicit padding so that the 32-bit structure will not be packed more
* tightly than the 64-bit structure (which requires 64-bit alignment).
*/
/*
* Property values for snapdir
*/
@ -284,22 +294,28 @@ typedef enum zfs_case {
} zfs_case_t;
typedef struct zfs_cmd {
char zc_name[MAXPATHLEN];
char zc_value[MAXPATHLEN * 2];
char zc_string[MAXNAMELEN];
char zc_top_ds[MAXPATHLEN];
uint64_t zc_guid;
uint64_t zc_nvlist_conf; /* really (char *) */
uint64_t zc_nvlist_conf_size;
char zc_name[MAXPATHLEN]; /* name of pool or dataset */
uint64_t zc_nvlist_src; /* really (char *) */
uint64_t zc_nvlist_src_size;
uint64_t zc_nvlist_dst; /* really (char *) */
uint64_t zc_nvlist_dst_size;
boolean_t zc_nvlist_dst_filled; /* put an nvlist in dst? */
int zc_pad2;
/*
* The following members are for legacy ioctls which haven't been
* converted to the new method.
*/
uint64_t zc_history; /* really (char *) */
char zc_value[MAXPATHLEN * 2];
char zc_string[MAXNAMELEN];
uint64_t zc_guid;
uint64_t zc_nvlist_conf; /* really (char *) */
uint64_t zc_nvlist_conf_size;
uint64_t zc_cookie;
uint64_t zc_objset_type;
uint64_t zc_perm_action;
uint64_t zc_history; /* really (char *) */
uint64_t zc_history_len;
uint64_t zc_history_len;
uint64_t zc_history_offset;
uint64_t zc_obj;
uint64_t zc_iflags; /* internal to zfs(7fs) */
@ -344,7 +360,8 @@ extern int zfs_secpolicy_rename_perms(const char *from,
const char *to, cred_t *cr);
extern int zfs_secpolicy_destroy_perms(const char *name, cred_t *cr);
extern int zfs_busy(void);
extern int zfs_unmount_snap(const char *, void *);
extern void zfs_unmount_snap(const char *);
extern void zfs_destroy_unmount_origin(const char *);
/*
* ZFS minor numbers can refer to either a control device instance or

View File

@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifndef _SYS_FS_ZFS_ZNODE_H
@ -259,7 +260,7 @@ VTOZ(vnode_t *vp)
*/
#define ZFS_ENTER(zfsvfs) \
{ \
rrw_enter(&(zfsvfs)->z_teardown_lock, RW_READER, FTAG); \
rrw_enter_read(&(zfsvfs)->z_teardown_lock, FTAG); \
if ((zfsvfs)->z_unmounted) { \
ZFS_EXIT(zfsvfs); \
return (EIO); \

View File

@ -411,8 +411,8 @@ extern int zil_check_log_chain(const char *osname, void *txarg);
extern void zil_sync(zilog_t *zilog, dmu_tx_t *tx);
extern void zil_clean(zilog_t *zilog, uint64_t synced_txg);
extern int zil_suspend(zilog_t *zilog);
extern void zil_resume(zilog_t *zilog);
extern int zil_suspend(const char *osname, void **cookiep);
extern void zil_resume(void *cookie);
extern void zil_add_block(zilog_t *zilog, const blkptr_t *bp);
extern int zil_bp_tree_add(zilog_t *zilog, const blkptr_t *bp);

View File

@ -585,6 +585,8 @@ txg_wait_synced(dsl_pool_t *dp, uint64_t txg)
{
tx_state_t *tx = &dp->dp_tx;
ASSERT(!dsl_pool_config_held(dp));
mutex_enter(&tx->tx_sync_lock);
ASSERT(tx->tx_threads == 2);
if (txg == 0)
@ -608,6 +610,8 @@ txg_wait_open(dsl_pool_t *dp, uint64_t txg)
{
tx_state_t *tx = &dp->dp_tx;
ASSERT(!dsl_pool_config_held(dp));
mutex_enter(&tx->tx_sync_lock);
ASSERT(tx->tx_threads == 2);
if (txg == 0)
@ -673,42 +677,43 @@ txg_list_empty(txg_list_t *tl, uint64_t txg)
}
/*
* Add an entry to the list.
* Returns 0 if it's a new entry, 1 if it's already there.
* Add an entry to the list (unless it's already on the list).
* Returns B_TRUE if it was actually added.
*/
int
boolean_t
txg_list_add(txg_list_t *tl, void *p, uint64_t txg)
{
int t = txg & TXG_MASK;
txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset);
int already_on_list;
boolean_t add;
mutex_enter(&tl->tl_lock);
already_on_list = tn->tn_member[t];
if (!already_on_list) {
add = (tn->tn_member[t] == 0);
if (add) {
tn->tn_member[t] = 1;
tn->tn_next[t] = tl->tl_head[t];
tl->tl_head[t] = tn;
}
mutex_exit(&tl->tl_lock);
return (already_on_list);
return (add);
}
/*
* Add an entry to the end of the list (walks list to find end).
* Returns 0 if it's a new entry, 1 if it's already there.
* Add an entry to the end of the list, unless it's already on the list.
* (walks list to find end)
* Returns B_TRUE if it was actually added.
*/
int
boolean_t
txg_list_add_tail(txg_list_t *tl, void *p, uint64_t txg)
{
int t = txg & TXG_MASK;
txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset);
int already_on_list;
boolean_t add;
mutex_enter(&tl->tl_lock);
already_on_list = tn->tn_member[t];
if (!already_on_list) {
add = (tn->tn_member[t] == 0);
if (add) {
txg_node_t **tp;
for (tp = &tl->tl_head[t]; *tp != NULL; tp = &(*tp)->tn_next[t])
@ -720,7 +725,7 @@ txg_list_add_tail(txg_list_t *tl, void *p, uint64_t txg)
}
mutex_exit(&tl->tl_lock);
return (already_on_list);
return (add);
}
/*
@ -771,13 +776,13 @@ txg_list_remove_this(txg_list_t *tl, void *p, uint64_t txg)
return (NULL);
}
int
boolean_t
txg_list_member(txg_list_t *tl, void *p, uint64_t txg)
{
int t = txg & TXG_MASK;
txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset);
return (tn->tn_member[t]);
return (tn->tn_member[t] != 0);
}
/*

View File

@ -20,8 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>.
* All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
/*
@ -73,6 +72,7 @@
#include <sys/gfs.h>
#include <sys/stat.h>
#include <sys/dmu.h>
#include <sys/dsl_destroy.h>
#include <sys/dsl_deleg.h>
#include <sys/mount.h>
#include <sys/sunddi.h>
@ -743,7 +743,7 @@ zfsctl_snapdir_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm,
zfsvfs_t *zfsvfs;
avl_index_t where;
char from[MAXNAMELEN], to[MAXNAMELEN];
char real[MAXNAMELEN];
char real[MAXNAMELEN], fsname[MAXNAMELEN];
int err;
zfsvfs = sdvp->v_vfsp->vfs_data;
@ -762,12 +762,14 @@ zfsctl_snapdir_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm,
ZFS_EXIT(zfsvfs);
dmu_objset_name(zfsvfs->z_os, fsname);
err = zfsctl_snapshot_zname(sdvp, snm, MAXNAMELEN, from);
if (!err)
if (err == 0)
err = zfsctl_snapshot_zname(tdvp, tnm, MAXNAMELEN, to);
if (!err)
if (err == 0)
err = zfs_secpolicy_rename_perms(from, to, cr);
if (err)
if (err != 0)
return (err);
/*
@ -787,7 +789,7 @@ zfsctl_snapdir_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm,
return (ENOENT);
}
err = dmu_objset_rename(from, to, 0);
err = dsl_dataset_rename_snapshot(fsname, snm, tnm, 0);
if (err == 0)
zfsctl_rename_snap(sdp, sep, tnm);
@ -829,9 +831,9 @@ zfsctl_snapdir_remove(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr,
ZFS_EXIT(zfsvfs);
err = zfsctl_snapshot_zname(dvp, name, MAXNAMELEN, snapname);
if (!err)
if (err == 0)
err = zfs_secpolicy_destroy_perms(snapname, cr);
if (err)
if (err != 0)
return (err);
mutex_enter(&sdp->sd_lock);
@ -841,13 +843,10 @@ zfsctl_snapdir_remove(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr,
if (sep) {
avl_remove(&sdp->sd_snaps, sep);
err = zfsctl_unmount_snap(sep, MS_FORCE, cr);
if (err) {
avl_index_t where;
if (avl_find(&sdp->sd_snaps, sep, &where) == NULL)
avl_insert(&sdp->sd_snaps, sep, where);
} else
err = dmu_objset_destroy(snapname, B_FALSE);
if (err != 0)
avl_add(&sdp->sd_snaps, sep);
else
err = dsl_destroy_snapshot(snapname, B_FALSE);
} else {
err = ENOENT;
}
@ -880,13 +879,12 @@ zfsctl_snapdir_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp,
*vpp = NULL;
err = zfs_secpolicy_snapshot_perms(name, cr);
if (err)
if (err != 0)
return (err);
if (err == 0) {
err = dmu_objset_snapshot(name, dirname, NULL, NULL,
B_FALSE, B_FALSE, -1);
if (err)
err = dmu_objset_snapshot_one(name, dirname);
if (err != 0)
return (err);
err = lookupnameat(dirname, seg, follow, NULL, vpp, dvp);
}
@ -994,7 +992,7 @@ zfsctl_snapdir_lookup(ap)
*vpp = sep->se_root;
VN_HOLD(*vpp);
err = traverse(vpp, LK_EXCLUSIVE | LK_RETRY);
if (err) {
if (err != 0) {
VN_RELE(*vpp);
*vpp = NULL;
} else if (*vpp == sep->se_root) {
@ -1021,7 +1019,7 @@ zfsctl_snapdir_lookup(ap)
* The requested snapshot is not currently mounted, look it up.
*/
err = zfsctl_snapshot_zname(dvp, nm, MAXNAMELEN, snapname);
if (err) {
if (err != 0) {
mutex_exit(&sdp->sd_lock);
ZFS_EXIT(zfsvfs);
/*
@ -1074,8 +1072,20 @@ zfsctl_snapdir_lookup(ap)
}
mutex_exit(&sdp->sd_lock);
ZFS_EXIT(zfsvfs);
#ifdef illumos
/*
* If we had an error, drop our hold on the vnode and
* zfsctl_snapshot_inactive() will clean up.
*/
if (err != 0) {
VN_RELE(*vpp);
*vpp = NULL;
}
#else
if (err != 0)
*vpp = NULL;
#endif
return (err);
}
@ -1133,8 +1143,10 @@ zfsctl_snapdir_readdir_cb(vnode_t *vp, void *dp, int *eofp,
ZFS_ENTER(zfsvfs);
cookie = *offp;
dsl_pool_config_enter(dmu_objset_pool(zfsvfs->z_os), FTAG);
error = dmu_snapshot_list_next(zfsvfs->z_os, MAXNAMELEN, snapname, &id,
&cookie, &case_conflict);
dsl_pool_config_exit(dmu_objset_pool(zfsvfs->z_os), FTAG);
if (error) {
ZFS_EXIT(zfsvfs);
if (error == ENOENT) {

File diff suppressed because it is too large Load Diff

View File

@ -22,6 +22,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>.
* All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
/* Portions Copyright 2010 Robert Milkowski */
@ -395,8 +396,10 @@ zfs_register_callbacks(vfs_t *vfsp)
boolean_t do_setuid = B_FALSE;
boolean_t exec = B_FALSE;
boolean_t do_exec = B_FALSE;
#ifdef illumos
boolean_t devices = B_FALSE;
boolean_t do_devices = B_FALSE;
#endif
boolean_t xattr = B_FALSE;
boolean_t do_xattr = B_FALSE;
boolean_t atime = B_FALSE;
@ -492,25 +495,33 @@ zfs_register_callbacks(vfs_t *vfsp)
* overboard...
*/
ds = dmu_objset_ds(os);
error = dsl_prop_register(ds, "atime", atime_changed_cb, zfsvfs);
dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
error = dsl_prop_register(ds,
zfs_prop_to_name(ZFS_PROP_ATIME), atime_changed_cb, zfsvfs);
error = error ? error : dsl_prop_register(ds,
"xattr", xattr_changed_cb, zfsvfs);
zfs_prop_to_name(ZFS_PROP_XATTR), xattr_changed_cb, zfsvfs);
error = error ? error : dsl_prop_register(ds,
"recordsize", blksz_changed_cb, zfsvfs);
zfs_prop_to_name(ZFS_PROP_RECORDSIZE), blksz_changed_cb, zfsvfs);
error = error ? error : dsl_prop_register(ds,
"readonly", readonly_changed_cb, zfsvfs);
zfs_prop_to_name(ZFS_PROP_READONLY), readonly_changed_cb, zfsvfs);
#ifdef illumos
error = error ? error : dsl_prop_register(ds,
"setuid", setuid_changed_cb, zfsvfs);
zfs_prop_to_name(ZFS_PROP_DEVICES), devices_changed_cb, zfsvfs);
#endif
error = error ? error : dsl_prop_register(ds,
"exec", exec_changed_cb, zfsvfs);
zfs_prop_to_name(ZFS_PROP_SETUID), setuid_changed_cb, zfsvfs);
error = error ? error : dsl_prop_register(ds,
"snapdir", snapdir_changed_cb, zfsvfs);
zfs_prop_to_name(ZFS_PROP_EXEC), exec_changed_cb, zfsvfs);
error = error ? error : dsl_prop_register(ds,
"aclmode", acl_mode_changed_cb, zfsvfs);
zfs_prop_to_name(ZFS_PROP_SNAPDIR), snapdir_changed_cb, zfsvfs);
error = error ? error : dsl_prop_register(ds,
"aclinherit", acl_inherit_changed_cb, zfsvfs);
zfs_prop_to_name(ZFS_PROP_ACLMODE), acl_mode_changed_cb, zfsvfs);
error = error ? error : dsl_prop_register(ds,
"vscan", vscan_changed_cb, zfsvfs);
zfs_prop_to_name(ZFS_PROP_ACLINHERIT), acl_inherit_changed_cb,
zfsvfs);
error = error ? error : dsl_prop_register(ds,
zfs_prop_to_name(ZFS_PROP_VSCAN), vscan_changed_cb, zfsvfs);
dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
if (error)
goto unregister;
@ -538,27 +549,37 @@ zfs_register_callbacks(vfs_t *vfsp)
* registered, but this is OK; it will simply return ENOMSG,
* which we will ignore.
*/
(void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zfsvfs);
(void) dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zfsvfs);
(void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zfsvfs);
(void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zfsvfs);
(void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs);
(void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs);
(void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs);
(void) dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs);
(void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb,
zfsvfs);
(void) dsl_prop_unregister(ds, "vscan", vscan_changed_cb, zfsvfs);
(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_ATIME),
atime_changed_cb, zfsvfs);
(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_XATTR),
xattr_changed_cb, zfsvfs);
(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_RECORDSIZE),
blksz_changed_cb, zfsvfs);
(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_READONLY),
readonly_changed_cb, zfsvfs);
#ifdef illumos
(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_DEVICES),
devices_changed_cb, zfsvfs);
#endif
(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_SETUID),
setuid_changed_cb, zfsvfs);
(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_EXEC),
exec_changed_cb, zfsvfs);
(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_SNAPDIR),
snapdir_changed_cb, zfsvfs);
(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_ACLMODE),
acl_mode_changed_cb, zfsvfs);
(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_ACLINHERIT),
acl_inherit_changed_cb, zfsvfs);
(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_VSCAN),
vscan_changed_cb, zfsvfs);
return (error);
}
static int
zfs_space_delta_cb(dmu_object_type_t bonustype, void *data,
uint64_t *userp, uint64_t *groupp)
{
int error = 0;
/*
* Is it a valid type of object to track?
*/
@ -615,7 +636,7 @@ zfs_space_delta_cb(dmu_object_type_t bonustype, void *data,
*groupp = BSWAP_64(*groupp);
}
}
return (error);
return (0);
}
static void
@ -967,7 +988,7 @@ zfsvfs_create(const char *osname, zfsvfs_t **zfvp)
mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL);
list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
offsetof(znode_t, z_link_node));
rrw_init(&zfsvfs->z_teardown_lock);
rrw_init(&zfsvfs->z_teardown_lock, B_FALSE);
rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL);
rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL);
for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
@ -1414,8 +1435,9 @@ zfs_mount_label_policy(vfs_t *vfsp, char *osname)
char *str = NULL;
if (l_to_str_internal(mnt_sl, &str) == 0 &&
dsl_prop_set(osname, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
ZPROP_SRC_LOCAL, 1, strlen(str) + 1, str) == 0)
dsl_prop_set_string(osname,
zfs_prop_to_name(ZFS_PROP_MLSLABEL),
ZPROP_SRC_LOCAL, str) == 0)
retv = 0;
if (str != NULL)
kmem_free(str, strlen(str) + 1);
@ -1875,7 +1897,7 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
if (dsl_dataset_is_dirty(dmu_objset_ds(zfsvfs->z_os)) &&
!(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY))
txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
(void) dmu_objset_evict_dbufs(zfsvfs->z_os);
dmu_objset_evict_dbufs(zfsvfs->z_os);
return (0);
}
@ -2408,9 +2430,8 @@ zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers)
sa_register_update_callback(os, zfs_sa_upgrade);
}
spa_history_log_internal(LOG_DS_UPGRADE,
dmu_objset_spa(os), tx, "oldver=%llu newver=%llu dataset = %llu",
zfsvfs->z_version, newvers, dmu_objset_id(os));
spa_history_log_internal_ds(dmu_objset_ds(os), "upgrade", tx,
"from %llu to %llu", zfsvfs->z_version, newvers);
dmu_tx_commit(tx);

View File

@ -246,7 +246,7 @@ zil_read_log_block(zilog_t *zilog, const blkptr_t *bp, blkptr_t *nbp, void *dst,
}
}
VERIFY(arc_buf_remove_ref(abuf, &abuf) == 1);
VERIFY(arc_buf_remove_ref(abuf, &abuf));
}
return (error);
@ -343,7 +343,7 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
break;
error = zil_read_log_block(zilog, &blk, &next_blk, lrbuf, &end);
if (error)
if (error != 0)
break;
for (lrp = lrbuf; lrp < end; lrp += reclen) {
@ -478,7 +478,7 @@ zilog_dirty(zilog_t *zilog, uint64_t txg)
if (dsl_dataset_is_snapshot(ds))
panic("dirtying snapshot!");
if (txg_list_add(&dp->dp_dirty_zilogs, zilog, txg) == 0) {
if (txg_list_add(&dp->dp_dirty_zilogs, zilog, txg)) {
/* up the hold count until we can be written out */
dmu_buf_add_ref(ds->ds_dbuf, zilog);
}
@ -637,8 +637,8 @@ zil_claim(const char *osname, void *txarg)
objset_t *os;
int error;
error = dmu_objset_hold(osname, FTAG, &os);
if (error) {
error = dmu_objset_own(osname, DMU_OST_ANY, B_FALSE, FTAG, &os);
if (error != 0) {
cmn_err(CE_WARN, "can't open objset for %s", osname);
return (0);
}
@ -651,7 +651,7 @@ zil_claim(const char *osname, void *txarg)
zio_free_zil(zilog->zl_spa, first_txg, &zh->zh_log);
BP_ZERO(&zh->zh_log);
dsl_dataset_dirty(dmu_objset_ds(os), tx);
dmu_objset_rele(os, FTAG);
dmu_objset_disown(os, FTAG);
return (0);
}
@ -676,7 +676,7 @@ zil_claim(const char *osname, void *txarg)
}
ASSERT3U(first_txg, ==, (spa_last_synced_txg(zilog->zl_spa) + 1));
dmu_objset_rele(os, FTAG);
dmu_objset_disown(os, FTAG);
return (0);
}
@ -696,7 +696,7 @@ zil_check_log_chain(const char *osname, void *tx)
ASSERT(tx == NULL);
error = dmu_objset_hold(osname, FTAG, &os);
if (error) {
if (error != 0) {
cmn_err(CE_WARN, "can't open objset for %s", osname);
return (0);
}
@ -984,7 +984,7 @@ zil_lwb_write_start(zilog_t *zilog, lwb_t *lwb)
/* pass the old blkptr in order to spread log blocks across devs */
error = zio_alloc_zil(spa, txg, bp, &lwb->lwb_blk, zil_blksz,
USE_SLOG(zilog));
if (!error) {
if (error == 0) {
ASSERT3U(bp->blk_birth, ==, txg);
bp->blk_cksum = lwb->lwb_blk.blk_cksum;
bp->blk_cksum.zc_word[ZIL_ZC_SEQ]++;
@ -1095,7 +1095,7 @@ zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb)
txg_wait_synced(zilog->zl_dmu_pool, txg);
return (lwb);
}
if (error) {
if (error != 0) {
ASSERT(error == ENOENT || error == EEXIST ||
error == EALREADY);
return (lwb);
@ -1719,6 +1719,9 @@ zil_free(zilog_t *zilog)
{
zilog->zl_stop_sync = 1;
ASSERT0(zilog->zl_suspend);
ASSERT0(zilog->zl_suspending);
ASSERT(list_is_empty(&zilog->zl_lwb_list));
list_destroy(&zilog->zl_lwb_list);
@ -1814,32 +1817,100 @@ zil_close(zilog_t *zilog)
mutex_exit(&zilog->zl_lock);
}
static char *suspend_tag = "zil suspending";
/*
* Suspend an intent log. While in suspended mode, we still honor
* synchronous semantics, but we rely on txg_wait_synced() to do it.
* We suspend the log briefly when taking a snapshot so that the snapshot
* contains all the data it's supposed to, and has an empty intent log.
* On old version pools, we suspend the log briefly when taking a
* snapshot so that it will have an empty intent log.
*
* Long holds are not really intended to be used the way we do here --
* held for such a short time. A concurrent caller of dsl_dataset_long_held()
* could fail. Therefore we take pains to only put a long hold if it is
* actually necessary. Fortunately, it will only be necessary if the
* objset is currently mounted (or the ZVOL equivalent). In that case it
* will already have a long hold, so we are not really making things any worse.
*
* Ideally, we would locate the existing long-holder (i.e. the zfsvfs_t or
* zvol_state_t), and use their mechanism to prevent their hold from being
* dropped (e.g. VFS_HOLD()). However, that would be even more pain for
* very little gain.
*
* if cookiep == NULL, this does both the suspend & resume.
* Otherwise, it returns with the dataset "long held", and the cookie
* should be passed into zil_resume().
*/
int
zil_suspend(zilog_t *zilog)
zil_suspend(const char *osname, void **cookiep)
{
const zil_header_t *zh = zilog->zl_header;
objset_t *os;
zilog_t *zilog;
const zil_header_t *zh;
int error;
error = dmu_objset_hold(osname, suspend_tag, &os);
if (error != 0)
return (error);
zilog = dmu_objset_zil(os);
mutex_enter(&zilog->zl_lock);
zh = zilog->zl_header;
if (zh->zh_flags & ZIL_REPLAY_NEEDED) { /* unplayed log */
mutex_exit(&zilog->zl_lock);
dmu_objset_rele(os, suspend_tag);
return (EBUSY);
}
if (zilog->zl_suspend++ != 0) {
/*
* Don't put a long hold in the cases where we can avoid it. This
* is when there is no cookie so we are doing a suspend & resume
* (i.e. called from zil_vdev_offline()), and there's nothing to do
* for the suspend because it's already suspended, or there's no ZIL.
*/
if (cookiep == NULL && !zilog->zl_suspending &&
(zilog->zl_suspend > 0 || BP_IS_HOLE(&zh->zh_log))) {
mutex_exit(&zilog->zl_lock);
dmu_objset_rele(os, suspend_tag);
return (0);
}
dsl_dataset_long_hold(dmu_objset_ds(os), suspend_tag);
dsl_pool_rele(dmu_objset_pool(os), suspend_tag);
zilog->zl_suspend++;
if (zilog->zl_suspend > 1) {
/*
* Someone else already began a suspend.
* Someone else is already suspending it.
* Just wait for them to finish.
*/
while (zilog->zl_suspending)
cv_wait(&zilog->zl_cv_suspend, &zilog->zl_lock);
mutex_exit(&zilog->zl_lock);
if (cookiep == NULL)
zil_resume(os);
else
*cookiep = os;
return (0);
}
/*
* If there is no pointer to an on-disk block, this ZIL must not
* be active (e.g. filesystem not mounted), so there's nothing
* to clean up.
*/
if (BP_IS_HOLE(&zh->zh_log)) {
ASSERT(cookiep != NULL); /* fast path already handled */
*cookiep = os;
mutex_exit(&zilog->zl_lock);
return (0);
}
zilog->zl_suspending = B_TRUE;
mutex_exit(&zilog->zl_lock);
@ -1852,16 +1923,25 @@ zil_suspend(zilog_t *zilog)
cv_broadcast(&zilog->zl_cv_suspend);
mutex_exit(&zilog->zl_lock);
if (cookiep == NULL)
zil_resume(os);
else
*cookiep = os;
return (0);
}
void
zil_resume(zilog_t *zilog)
zil_resume(void *cookie)
{
objset_t *os = cookie;
zilog_t *zilog = dmu_objset_zil(os);
mutex_enter(&zilog->zl_lock);
ASSERT(zilog->zl_suspend != 0);
zilog->zl_suspend--;
mutex_exit(&zilog->zl_lock);
dsl_dataset_long_rele(dmu_objset_ds(os), suspend_tag);
dsl_dataset_rele(dmu_objset_ds(os), suspend_tag);
}
typedef struct zil_replay_arg {
@ -1934,7 +2014,7 @@ zil_replay_log_record(zilog_t *zilog, lr_t *lr, void *zra, uint64_t claim_txg)
if (txtype == TX_WRITE && reclen == sizeof (lr_write_t)) {
error = zil_read_log_data(zilog, (lr_write_t *)lr,
zr->zr_lr + reclen);
if (error)
if (error != 0)
return (zil_replay_error(zilog, lr, error));
}
@ -1955,7 +2035,7 @@ zil_replay_log_record(zilog_t *zilog, lr_t *lr, void *zra, uint64_t claim_txg)
* is updated if we are in replay mode.
*/
error = zr->zr_replay[txtype](zr->zr_arg, zr->zr_lr, zr->zr_byteswap);
if (error) {
if (error != 0) {
/*
* The DMU's dnode layer doesn't see removes until the txg
* commits, so a subsequent claim can spuriously fail with
@ -1965,7 +2045,7 @@ zil_replay_log_record(zilog_t *zilog, lr_t *lr, void *zra, uint64_t claim_txg)
*/
txg_wait_synced(spa_get_dsl(zilog->zl_spa), 0);
error = zr->zr_replay[txtype](zr->zr_arg, zr->zr_lr, B_FALSE);
if (error)
if (error != 0)
return (zil_replay_error(zilog, lr, error));
}
return (0);
@ -2039,19 +2119,10 @@ zil_replaying(zilog_t *zilog, dmu_tx_t *tx)
int
zil_vdev_offline(const char *osname, void *arg)
{
objset_t *os;
zilog_t *zilog;
int error;
error = dmu_objset_hold(osname, FTAG, &os);
if (error)
return (error);
zilog = dmu_objset_zil(os);
if (zil_suspend(zilog) != 0)
error = EEXIST;
else
zil_resume(zilog);
dmu_objset_rele(os, FTAG);
return (error);
error = zil_suspend(osname, NULL);
if (error != 0)
return (EEXIST);
return (0);
}

Some files were not shown because too many files have changed in this diff Show More