mirror of
https://git.FreeBSD.org/src.git
synced 2025-01-30 16:51:41 +00:00
3835 zfs need not store 2 copies of all metadata
illumos/illumos-gate@edf345e6b8
This commit is contained in:
parent
c338968df5
commit
bec2af9b62
Notes:
svn2git
2020-12-20 02:59:44 +00:00
svn path=/vendor-sys/illumos/dist/; revision=266766
@ -20,7 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2013 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
|
||||
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
|
||||
*/
|
||||
@ -198,7 +198,18 @@ zfs_prop_init(void)
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
static zprop_index_t redundant_metadata_table[] = {
|
||||
{ "all", ZFS_REDUNDANT_METADATA_ALL },
|
||||
{ "most", ZFS_REDUNDANT_METADATA_MOST },
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
/* inherit index properties */
|
||||
zprop_register_index(ZFS_PROP_REDUNDANT_METADATA, "redundant_metadata",
|
||||
ZFS_REDUNDANT_METADATA_ALL,
|
||||
PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
|
||||
"all | most", "REDUND_MD",
|
||||
redundant_metadata_table);
|
||||
zprop_register_index(ZFS_PROP_SYNC, "sync", ZFS_SYNC_STANDARD,
|
||||
PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
|
||||
"standard | always | disabled", "SYNC",
|
||||
|
@ -22,7 +22,7 @@
|
||||
.\"
|
||||
.\" Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved.
|
||||
.\" Copyright 2011 Joshua M. Clulow <josh@sysmgr.org>
|
||||
.\" Copyright (c) 2013 by Delphix. All rights reserved.
|
||||
.\" Copyright (c) 2014 by Delphix. All rights reserved.
|
||||
.\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
|
||||
.\" Copyright 2013 Nexenta Systems, Inc. All Rights Reserved.
|
||||
.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
|
||||
@ -1253,6 +1253,37 @@ This property can also be referred to by its shortened column name,
|
||||
\fBrecsize\fR.
|
||||
.RE
|
||||
|
||||
.sp
|
||||
.ne 2
|
||||
.na
|
||||
\fB\fBredundant_metadata\fR=\fBall\fR | \fBmost\fR\fR
|
||||
.ad
|
||||
.sp .6
|
||||
.RS 4n
|
||||
Controls what types of metadata are stored redundantly. ZFS stores an
|
||||
extra copy of metadata, so that if a single block is corrupted, the
|
||||
amount of user data lost is limited. This extra copy is in addition to
|
||||
any redundancy provided at the pool level (e.g. by mirroring or RAID-Z),
|
||||
and is in addition to an extra copy specified by the \fBcopies\fR
|
||||
property (up to a total of 3 copies). For example if the pool is
|
||||
mirrored, \fBcopies\fR=2, and \fBredundant_metadata\fR=most, then ZFS
|
||||
stores 6 copies of most metadata, and 4 copies of data and some
|
||||
metadata.
|
||||
.sp
|
||||
When set to \fBall\fR, ZFS stores an extra copy of all metadata. If a
|
||||
single on-disk block is corrupt, at worst a single block of user data
|
||||
(which is \fBrecordsize\fR bytes long) can be lost.
|
||||
.sp
|
||||
When set to \fBmost\fR, ZFS stores an extra copy of most types of
|
||||
metadata. This can improve performance of random writes, because less
|
||||
metadata must be written. In practice, at worst about 100 blocks (of
|
||||
\fBrecordsize\fR bytes each) of user data can be lost if a single
|
||||
on-disk block is corrupt. The exact behavior of which metadata blocks
|
||||
are stored redundantly may change in future releases.
|
||||
.sp
|
||||
The default value is \fBall\fR.
|
||||
.RE
|
||||
|
||||
.sp
|
||||
.ne 2
|
||||
.na
|
||||
|
@ -20,7 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2013 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
|
||||
*/
|
||||
/* Copyright (c) 2013 by Saso Kiselkov. All rights reserved. */
|
||||
/* Copyright (c) 2013, Joyent, Inc. All rights reserved. */
|
||||
@ -1558,6 +1558,12 @@ dmu_object_set_compress(objset_t *os, uint64_t object, uint8_t compress,
|
||||
|
||||
int zfs_mdcomp_disable = 0;
|
||||
|
||||
/*
|
||||
* When the "redundant_metadata" property is set to "most", only indirect
|
||||
* blocks of this level and higher will have an additional ditto block.
|
||||
*/
|
||||
int zfs_redundant_metadata_most_ditto_level = 2;
|
||||
|
||||
void
|
||||
dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp)
|
||||
{
|
||||
@ -1597,6 +1603,13 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp)
|
||||
if (zio_checksum_table[checksum].ci_correctable < 1 ||
|
||||
zio_checksum_table[checksum].ci_eck)
|
||||
checksum = ZIO_CHECKSUM_FLETCHER_4;
|
||||
|
||||
if (os->os_redundant_metadata == ZFS_REDUNDANT_METADATA_ALL ||
|
||||
(os->os_redundant_metadata ==
|
||||
ZFS_REDUNDANT_METADATA_MOST &&
|
||||
(level >= zfs_redundant_metadata_most_ditto_level ||
|
||||
DMU_OT_IS_METADATA(type) || (wp & WP_SPILL))))
|
||||
copies++;
|
||||
} else if (wp & WP_NOFILL) {
|
||||
ASSERT(level == 0);
|
||||
|
||||
@ -1644,7 +1657,7 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp)
|
||||
zp->zp_compress = compress;
|
||||
zp->zp_type = (wp & WP_SPILL) ? dn->dn_bonustype : type;
|
||||
zp->zp_level = level;
|
||||
zp->zp_copies = MIN(copies + ismd, spa_max_replication(os->os_spa));
|
||||
zp->zp_copies = MIN(copies, spa_max_replication(os->os_spa));
|
||||
zp->zp_dedup = dedup;
|
||||
zp->zp_dedup_verify = dedup && dedup_verify;
|
||||
zp->zp_nopwrite = nopwrite;
|
||||
|
@ -20,7 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2013 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
|
||||
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
|
||||
*/
|
||||
@ -115,13 +115,13 @@ dmu_objset_id(objset_t *os)
|
||||
return (ds ? ds->ds_object : 0);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
zfs_sync_type_t
|
||||
dmu_objset_syncprop(objset_t *os)
|
||||
{
|
||||
return (os->os_sync);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
zfs_logbias_op_t
|
||||
dmu_objset_logbias(objset_t *os)
|
||||
{
|
||||
return (os->os_logbias);
|
||||
@ -229,6 +229,20 @@ sync_changed_cb(void *arg, uint64_t newval)
|
||||
zil_set_sync(os->os_zil, newval);
|
||||
}
|
||||
|
||||
static void
|
||||
redundant_metadata_changed_cb(void *arg, uint64_t newval)
|
||||
{
|
||||
objset_t *os = arg;
|
||||
|
||||
/*
|
||||
* Inheritance and range checking should have been done by now.
|
||||
*/
|
||||
ASSERT(newval == ZFS_REDUNDANT_METADATA_ALL ||
|
||||
newval == ZFS_REDUNDANT_METADATA_MOST);
|
||||
|
||||
os->os_redundant_metadata = newval;
|
||||
}
|
||||
|
||||
static void
|
||||
logbias_changed_cb(void *arg, uint64_t newval)
|
||||
{
|
||||
@ -364,6 +378,12 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
|
||||
zfs_prop_to_name(ZFS_PROP_SYNC),
|
||||
sync_changed_cb, os);
|
||||
}
|
||||
if (err == 0) {
|
||||
err = dsl_prop_register(ds,
|
||||
zfs_prop_to_name(
|
||||
ZFS_PROP_REDUNDANT_METADATA),
|
||||
redundant_metadata_changed_cb, os);
|
||||
}
|
||||
}
|
||||
if (err != 0) {
|
||||
VERIFY(arc_buf_remove_ref(os->os_phys_buf,
|
||||
@ -377,9 +397,9 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
|
||||
os->os_compress = ZIO_COMPRESS_LZJB;
|
||||
os->os_copies = spa_max_replication(spa);
|
||||
os->os_dedup_checksum = ZIO_CHECKSUM_OFF;
|
||||
os->os_dedup_verify = 0;
|
||||
os->os_logbias = 0;
|
||||
os->os_sync = 0;
|
||||
os->os_dedup_verify = B_FALSE;
|
||||
os->os_logbias = ZFS_LOGBIAS_LATENCY;
|
||||
os->os_sync = ZFS_SYNC_STANDARD;
|
||||
os->os_primary_cache = ZFS_CACHE_ALL;
|
||||
os->os_secondary_cache = ZFS_CACHE_ALL;
|
||||
}
|
||||
@ -622,6 +642,9 @@ dmu_objset_evict(objset_t *os)
|
||||
VERIFY0(dsl_prop_unregister(ds,
|
||||
zfs_prop_to_name(ZFS_PROP_SYNC),
|
||||
sync_changed_cb, os));
|
||||
VERIFY0(dsl_prop_unregister(ds,
|
||||
zfs_prop_to_name(ZFS_PROP_REDUNDANT_METADATA),
|
||||
redundant_metadata_changed_cb, os));
|
||||
}
|
||||
VERIFY0(dsl_prop_unregister(ds,
|
||||
zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE),
|
||||
|
@ -21,7 +21,7 @@
|
||||
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2013 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
|
||||
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
|
||||
* Copyright 2013 DEY Storage Systems, Inc.
|
||||
@ -737,8 +737,8 @@ extern struct dsl_dataset *dmu_objset_ds(objset_t *os);
|
||||
extern void dmu_objset_name(objset_t *os, char *buf);
|
||||
extern dmu_objset_type_t dmu_objset_type(objset_t *os);
|
||||
extern uint64_t dmu_objset_id(objset_t *os);
|
||||
extern uint64_t dmu_objset_syncprop(objset_t *os);
|
||||
extern uint64_t dmu_objset_logbias(objset_t *os);
|
||||
extern zfs_sync_type_t dmu_objset_syncprop(objset_t *os);
|
||||
extern zfs_logbias_op_t dmu_objset_logbias(objset_t *os);
|
||||
extern int dmu_snapshot_list_next(objset_t *os, int namelen, char *name,
|
||||
uint64_t *id, uint64_t *offp, boolean_t *case_conflict);
|
||||
extern int dmu_snapshot_realname(objset_t *os, char *name, char *real,
|
||||
|
@ -20,7 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
|
||||
*/
|
||||
|
||||
@ -85,15 +85,16 @@ struct objset {
|
||||
zilog_t *os_zil;
|
||||
|
||||
/* can change, under dsl_dir's locks: */
|
||||
uint8_t os_checksum;
|
||||
uint8_t os_compress;
|
||||
enum zio_checksum os_checksum;
|
||||
enum zio_compress os_compress;
|
||||
uint8_t os_copies;
|
||||
uint8_t os_dedup_checksum;
|
||||
uint8_t os_dedup_verify;
|
||||
uint8_t os_logbias;
|
||||
uint8_t os_primary_cache;
|
||||
uint8_t os_secondary_cache;
|
||||
uint8_t os_sync;
|
||||
enum zio_checksum os_dedup_checksum;
|
||||
boolean_t os_dedup_verify;
|
||||
zfs_logbias_op_t os_logbias;
|
||||
zfs_cache_type_t os_primary_cache;
|
||||
zfs_cache_type_t os_secondary_cache;
|
||||
zfs_sync_type_t os_sync;
|
||||
zfs_redundant_metadata_type_t os_redundant_metadata;
|
||||
|
||||
/* no lock needed: */
|
||||
struct dmu_tx *os_synctx; /* XXX sketchy */
|
||||
|
@ -21,7 +21,7 @@
|
||||
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2013 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
|
||||
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
|
||||
*/
|
||||
@ -146,6 +146,7 @@ typedef enum {
|
||||
ZFS_PROP_SNAPSHOT_LIMIT,
|
||||
ZFS_PROP_FILESYSTEM_COUNT,
|
||||
ZFS_PROP_SNAPSHOT_COUNT,
|
||||
ZFS_PROP_REDUNDANT_METADATA,
|
||||
ZFS_NUM_PROPS
|
||||
} zfs_prop_t;
|
||||
|
||||
@ -339,6 +340,10 @@ typedef enum {
|
||||
ZFS_SYNC_DISABLED = 2
|
||||
} zfs_sync_type_t;
|
||||
|
||||
typedef enum {
|
||||
ZFS_REDUNDANT_METADATA_ALL,
|
||||
ZFS_REDUNDANT_METADATA_MOST
|
||||
} zfs_redundant_metadata_type_t;
|
||||
|
||||
/*
|
||||
* On-disk version number.
|
||||
|
Loading…
Reference in New Issue
Block a user