1
0
mirror of https://git.FreeBSD.org/src.git synced 2024-11-28 08:02:54 +00:00

Increase limit of redaction list by using spill block

Currently redaction bookmarks and their associated redaction lists
have a relatively low limit of 36 redaction snapshots. This is imposed
by the number of snapshot GUIDs that fit in the bonus buffer of the
redaction list object. While this is more than enough for most use
cases, there are some limited cases where larger numbers would be
useful to support.

We tweak the redaction list creation code to use a spill block if
the number of redaction snapshots is above the amount that would fit
in the bonus buffer. We also make a small change to allow spill blocks
to be use for types of data besides SA. In order to fully leverage
this logic, we also change the redaction code to use vmem_alloc, to
handle extremely large allocations if needed. Finally, small tweaks
were made to the zfs commands and the test suite.

Reviewed-by: Matthew Ahrens <mahrens@delphix.com>
Signed-off-by: Paul Dagnelie <pcd@delphix.com>
Closes #15018
This commit is contained in:
Paul Dagnelie 2023-08-26 11:34:43 -07:00 committed by GitHub
parent 11326f8eb1
commit bee9cfb813
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 128 additions and 34 deletions

View File

@ -5293,8 +5293,18 @@ dump_one_objset(const char *dsname, void *arg)
avl_first(&dmu_objset_ds(os)->ds_bookmarks); dbn != NULL;
dbn = AVL_NEXT(&dmu_objset_ds(os)->ds_bookmarks, dbn)) {
mos_obj_refd(dbn->dbn_phys.zbm_redaction_obj);
if (dbn->dbn_phys.zbm_redaction_obj != 0)
global_feature_count[SPA_FEATURE_REDACTION_BOOKMARKS]++;
if (dbn->dbn_phys.zbm_redaction_obj != 0) {
global_feature_count[
SPA_FEATURE_REDACTION_BOOKMARKS]++;
objset_t *mos = os->os_spa->spa_meta_objset;
dnode_t *rl;
VERIFY0(dnode_hold(mos,
dbn->dbn_phys.zbm_redaction_obj, FTAG, &rl));
if (rl->dn_have_spill) {
global_feature_count[
SPA_FEATURE_REDACTION_LIST_SPILL]++;
}
}
if (dbn->dbn_phys.zbm_flags & ZBM_FLAG_HAS_FBN)
global_feature_count[SPA_FEATURE_BOOKMARK_WRITTEN]++;
}
@ -8135,6 +8145,7 @@ dump_zpool(spa_t *spa)
for (spa_feature_t f = 0; f < SPA_FEATURES; f++)
global_feature_count[f] = UINT64_MAX;
global_feature_count[SPA_FEATURE_REDACTION_BOOKMARKS] = 0;
global_feature_count[SPA_FEATURE_REDACTION_LIST_SPILL] = 0;
global_feature_count[SPA_FEATURE_BOOKMARK_WRITTEN] = 0;
global_feature_count[SPA_FEATURE_LIVELIST] = 0;

View File

@ -3978,6 +3978,10 @@ zfs_do_redact(int argc, char **argv)
(void) fprintf(stderr, gettext("potentially invalid redaction "
"snapshot; full dataset names required\n"));
break;
case ESRCH:
(void) fprintf(stderr, gettext("attempted to resume redaction "
" with a mismatched redaction list\n"));
break;
default:
(void) fprintf(stderr, gettext("internal error: %s\n"),
strerror(errno));

View File

@ -72,6 +72,7 @@ typedef struct redaction_list_phys {
typedef struct redaction_list {
dmu_buf_user_t rl_dbu;
redaction_list_phys_t *rl_phys;
dmu_buf_t *rl_bonus;
dmu_buf_t *rl_dbuf;
uint64_t rl_object;
zfs_refcount_t rl_longholds;

View File

@ -80,6 +80,7 @@ typedef enum spa_feature {
SPA_FEATURE_BLAKE3,
SPA_FEATURE_BLOCK_CLONING,
SPA_FEATURE_AVZ_V2,
SPA_FEATURE_REDACTION_LIST_SPILL,
SPA_FEATURES
} spa_feature_t;

View File

@ -596,7 +596,7 @@
<elf-symbol name='fletcher_4_superscalar_ops' size='128' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='libzfs_config_ops' size='16' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='sa_protocol_names' size='16' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='spa_feature_table' size='2184' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='spa_feature_table' size='2240' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfeature_checks_disable' size='4' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_deleg_perm_tab' size='512' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_history_event_names' size='328' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
@ -5809,7 +5809,8 @@
<enumerator name='SPA_FEATURE_BLAKE3' value='36'/>
<enumerator name='SPA_FEATURE_BLOCK_CLONING' value='37'/>
<enumerator name='SPA_FEATURE_AVZ_V2' value='38'/>
<enumerator name='SPA_FEATURES' value='39'/>
<enumerator name='SPA_FEATURE_REDACTION_LIST_SPILL' value='39'/>
<enumerator name='SPA_FEATURES' value='40'/>
</enum-decl>
<typedef-decl name='spa_feature_t' type-id='33ecb627' id='d6618c78'/>
<qualified-type-def type-id='22cce67b' const='yes' id='d2816df0'/>
@ -8706,8 +8707,8 @@
</function-decl>
</abi-instr>
<abi-instr address-size='64' path='module/zcommon/zfeature_common.c' language='LANG_C99'>
<array-type-def dimensions='1' type-id='83f29ca2' size-in-bits='17472' id='dd432c71'>
<subrange length='39' type-id='7359adad' id='ae4a9561'/>
<array-type-def dimensions='1' type-id='83f29ca2' size-in-bits='17920' id='dd432c71'>
<subrange length='40' type-id='7359adad' id='ae4a9561'/>
</array-type-def>
<enum-decl name='zfeature_flags' id='6db816a4'>
<underlying-type type-id='9cac1fee'/>

View File

@ -947,6 +947,18 @@ once all filesystems that have ever had their
property set to
.Sy zstd
are destroyed.
.
.feature com.delphix redaction_list_spill no redaction_bookmarks
This feature enables the redaction list created by zfs redact to store
many more entries.
It becomes
.Sy active
when a redaction list is created with more than 36 entries,
and returns to being
.Sy enabled
when no long redaction lists remain in the pool.
For more information about redacted sends, see
.Xr zfs-send 8 .
.El
.
.Sh SEE ALSO

View File

@ -737,6 +737,18 @@ zpool_feature_init(void)
ZFEATURE_FLAG_MOS, ZFEATURE_TYPE_BOOLEAN, NULL,
sfeatures);
{
static const spa_feature_t redact_list_spill_deps[] = {
SPA_FEATURE_REDACTION_BOOKMARKS,
SPA_FEATURE_NONE
};
zfeature_register(SPA_FEATURE_REDACTION_LIST_SPILL,
"com.delphix:redaction_list_spill", "redaction_list_spill",
"Support for increased number of redaction_snapshot "
"arguments in zfs redact.", 0, ZFEATURE_TYPE_BOOLEAN,
redact_list_spill_deps, sfeatures);
}
zfs_mod_list_supported_free(sfeatures);
}

View File

@ -746,7 +746,7 @@ perform_thread_merge(bqueue_t *q, uint32_t num_threads,
bqueue_enqueue(q, record, sizeof (*record));
return (0);
}
redact_nodes = kmem_zalloc(num_threads *
redact_nodes = vmem_zalloc(num_threads *
sizeof (*redact_nodes), KM_SLEEP);
avl_create(&start_tree, redact_node_compare_start,
@ -820,7 +820,7 @@ perform_thread_merge(bqueue_t *q, uint32_t num_threads,
avl_destroy(&start_tree);
avl_destroy(&end_tree);
kmem_free(redact_nodes, num_threads * sizeof (*redact_nodes));
vmem_free(redact_nodes, num_threads * sizeof (*redact_nodes));
if (current_record != NULL)
bqueue_enqueue(q, current_record, sizeof (*current_record));
return (err);
@ -1030,7 +1030,7 @@ dmu_redact_snap(const char *snapname, nvlist_t *redactnvl,
numsnaps = fnvlist_num_pairs(redactnvl);
if (numsnaps > 0)
args = kmem_zalloc(numsnaps * sizeof (*args), KM_SLEEP);
args = vmem_zalloc(numsnaps * sizeof (*args), KM_SLEEP);
nvpair_t *pair = NULL;
for (int i = 0; i < numsnaps; i++) {
@ -1079,7 +1079,7 @@ dmu_redact_snap(const char *snapname, nvlist_t *redactnvl,
kmem_free(newredactbook,
sizeof (char) * ZFS_MAX_DATASET_NAME_LEN);
if (args != NULL)
kmem_free(args, numsnaps * sizeof (*args));
vmem_free(args, numsnaps * sizeof (*args));
return (SET_ERROR(ENAMETOOLONG));
}
err = dsl_bookmark_lookup(dp, newredactbook, NULL, &bookmark);
@ -1119,7 +1119,7 @@ dmu_redact_snap(const char *snapname, nvlist_t *redactnvl,
} else {
uint64_t *guids = NULL;
if (numsnaps > 0) {
guids = kmem_zalloc(numsnaps * sizeof (uint64_t),
guids = vmem_zalloc(numsnaps * sizeof (uint64_t),
KM_SLEEP);
}
for (int i = 0; i < numsnaps; i++) {
@ -1131,10 +1131,9 @@ dmu_redact_snap(const char *snapname, nvlist_t *redactnvl,
dp = NULL;
err = dsl_bookmark_create_redacted(newredactbook, snapname,
numsnaps, guids, FTAG, &new_rl);
kmem_free(guids, numsnaps * sizeof (uint64_t));
if (err != 0) {
vmem_free(guids, numsnaps * sizeof (uint64_t));
if (err != 0)
goto out;
}
}
for (int i = 0; i < numsnaps; i++) {
@ -1188,7 +1187,7 @@ dmu_redact_snap(const char *snapname, nvlist_t *redactnvl,
}
if (args != NULL)
kmem_free(args, numsnaps * sizeof (*args));
vmem_free(args, numsnaps * sizeof (*args));
if (dp != NULL)
dsl_pool_rele(dp, FTAG);
if (ds != NULL) {

View File

@ -720,6 +720,7 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
ASSERT(DMU_OT_IS_VALID(ot));
ASSERT((bonustype == DMU_OT_NONE && bonuslen == 0) ||
(bonustype == DMU_OT_SA && bonuslen == 0) ||
(bonustype == DMU_OTN_UINT64_METADATA && bonuslen == 0) ||
(bonustype != DMU_OT_NONE && bonuslen != 0));
ASSERT(DMU_OT_IS_VALID(bonustype));
ASSERT3U(bonuslen, <=, DN_SLOTS_TO_BONUSLEN(dn_slots));

View File

@ -34,6 +34,7 @@
#include <sys/dsl_bookmark.h>
#include <zfs_namecheck.h>
#include <sys/dmu_send.h>
#include <sys/dbuf.h>
static int
dsl_bookmark_hold_ds(dsl_pool_t *dp, const char *fullname,
@ -459,25 +460,42 @@ dsl_bookmark_create_sync_impl_snap(const char *bookmark, const char *snapshot,
SPA_FEATURE_REDACTED_DATASETS, &dsnumsnaps, &dsredactsnaps);
if (redaction_list != NULL || bookmark_redacted) {
redaction_list_t *local_rl;
boolean_t spill = B_FALSE;
if (bookmark_redacted) {
redact_snaps = dsredactsnaps;
num_redact_snaps = dsnumsnaps;
}
int bonuslen = sizeof (redaction_list_phys_t) +
num_redact_snaps * sizeof (uint64_t);
if (bonuslen > dmu_bonus_max())
spill = B_TRUE;
dbn->dbn_phys.zbm_redaction_obj = dmu_object_alloc(mos,
DMU_OTN_UINT64_METADATA, SPA_OLD_MAXBLOCKSIZE,
DMU_OTN_UINT64_METADATA, sizeof (redaction_list_phys_t) +
num_redact_snaps * sizeof (uint64_t), tx);
DMU_OTN_UINT64_METADATA, spill ? 0 : bonuslen, tx);
spa_feature_incr(dp->dp_spa,
SPA_FEATURE_REDACTION_BOOKMARKS, tx);
if (spill) {
spa_feature_incr(dp->dp_spa,
SPA_FEATURE_REDACTION_LIST_SPILL, tx);
}
VERIFY0(dsl_redaction_list_hold_obj(dp,
dbn->dbn_phys.zbm_redaction_obj, tag, &local_rl));
dsl_redaction_list_long_hold(dp, local_rl, tag);
ASSERT3U((local_rl)->rl_dbuf->db_size, >=,
sizeof (redaction_list_phys_t) + num_redact_snaps *
sizeof (uint64_t));
dmu_buf_will_dirty(local_rl->rl_dbuf, tx);
if (!spill) {
ASSERT3U(local_rl->rl_bonus->db_size, >=, bonuslen);
dmu_buf_will_dirty(local_rl->rl_bonus, tx);
} else {
dmu_buf_t *db;
VERIFY0(dmu_spill_hold_by_bonus(local_rl->rl_bonus,
DB_RF_MUST_SUCCEED, FTAG, &db));
dmu_buf_will_fill(db, tx);
VERIFY0(dbuf_spill_set_blksz(db, P2ROUNDUP(bonuslen,
SPA_MINBLOCKSIZE), tx));
local_rl->rl_phys = db->db_data;
local_rl->rl_dbuf = db;
}
memcpy(local_rl->rl_phys->rlp_snaps, redact_snaps,
sizeof (uint64_t) * num_redact_snaps);
local_rl->rl_phys->rlp_num_snaps = num_redact_snaps;
@ -636,11 +654,15 @@ dsl_bookmark_create_redacted_check(void *arg, dmu_tx_t *tx)
SPA_FEATURE_REDACTION_BOOKMARKS))
return (SET_ERROR(ENOTSUP));
/*
* If the list of redact snaps will not fit in the bonus buffer with
* the furthest reached object and offset, fail.
* If the list of redact snaps will not fit in the bonus buffer (or
* spill block, with the REDACTION_LIST_SPILL feature) with the
* furthest reached object and offset, fail.
*/
if (dbcra->dbcra_numsnaps > (dmu_bonus_max() -
sizeof (redaction_list_phys_t)) / sizeof (uint64_t))
uint64_t snaplimit = ((spa_feature_is_enabled(dp->dp_spa,
SPA_FEATURE_REDACTION_LIST_SPILL) ? spa_maxblocksize(dp->dp_spa) :
dmu_bonus_max()) -
sizeof (redaction_list_phys_t)) / sizeof (uint64_t);
if (dbcra->dbcra_numsnaps > snaplimit)
return (SET_ERROR(E2BIG));
if (dsl_bookmark_create_nvl_validate_pair(
@ -1040,6 +1062,14 @@ dsl_bookmark_destroy_sync_impl(dsl_dataset_t *ds, const char *name,
}
if (dbn->dbn_phys.zbm_redaction_obj != 0) {
dnode_t *rl;
VERIFY0(dnode_hold(mos,
dbn->dbn_phys.zbm_redaction_obj, FTAG, &rl));
if (rl->dn_have_spill) {
spa_feature_decr(dmu_objset_spa(mos),
SPA_FEATURE_REDACTION_LIST_SPILL, tx);
}
dnode_rele(rl, FTAG);
VERIFY0(dmu_object_free(mos,
dbn->dbn_phys.zbm_redaction_obj, tx));
spa_feature_decr(dmu_objset_spa(mos),
@ -1213,7 +1243,9 @@ redaction_list_evict_sync(void *rlu)
void
dsl_redaction_list_rele(redaction_list_t *rl, const void *tag)
{
dmu_buf_rele(rl->rl_dbuf, tag);
if (rl->rl_bonus != rl->rl_dbuf)
dmu_buf_rele(rl->rl_dbuf, tag);
dmu_buf_rele(rl->rl_bonus, tag);
}
int
@ -1221,7 +1253,7 @@ dsl_redaction_list_hold_obj(dsl_pool_t *dp, uint64_t rlobj, const void *tag,
redaction_list_t **rlp)
{
objset_t *mos = dp->dp_meta_objset;
dmu_buf_t *dbuf;
dmu_buf_t *dbuf, *spill_dbuf;
redaction_list_t *rl;
int err;
@ -1236,13 +1268,18 @@ dsl_redaction_list_hold_obj(dsl_pool_t *dp, uint64_t rlobj, const void *tag,
redaction_list_t *winner = NULL;
rl = kmem_zalloc(sizeof (redaction_list_t), KM_SLEEP);
rl->rl_dbuf = dbuf;
rl->rl_bonus = dbuf;
if (dmu_spill_hold_existing(dbuf, tag, &spill_dbuf) == 0) {
rl->rl_dbuf = spill_dbuf;
} else {
rl->rl_dbuf = dbuf;
}
rl->rl_object = rlobj;
rl->rl_phys = dbuf->db_data;
rl->rl_phys = rl->rl_dbuf->db_data;
rl->rl_mos = dp->dp_meta_objset;
zfs_refcount_create(&rl->rl_longholds);
dmu_buf_init_user(&rl->rl_dbu, redaction_list_evict_sync, NULL,
&rl->rl_dbuf);
&rl->rl_bonus);
if ((winner = dmu_buf_set_user_ie(dbuf, &rl->rl_dbu)) != NULL) {
kmem_free(rl, sizeof (*rl));
rl = winner;

View File

@ -1125,6 +1125,16 @@ dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx)
while ((dbn = avl_destroy_nodes(&ds->ds_bookmarks, &cookie)) !=
NULL) {
if (dbn->dbn_phys.zbm_redaction_obj != 0) {
dnode_t *rl;
VERIFY0(dnode_hold(mos,
dbn->dbn_phys.zbm_redaction_obj, FTAG,
&rl));
if (rl->dn_have_spill) {
spa_feature_decr(dmu_objset_spa(mos),
SPA_FEATURE_REDACTION_LIST_SPILL,
tx);
}
dnode_rele(rl, FTAG);
VERIFY0(dmu_object_free(mos,
dbn->dbn_phys.zbm_redaction_obj, tx));
spa_feature_decr(dmu_objset_spa(mos),

View File

@ -86,6 +86,7 @@ typeset -a properties=(
"feature@log_spacemap"
"feature@device_rebuild"
"feature@draid"
"feature@redaction_list_spill"
)
if is_linux || is_freebsd; then

View File

@ -27,7 +27,6 @@
# second (the last block in the file) is common to them all.
# 2. Verify a redacted stream with a reasonable redaction list length can
# be correctly processed.
# 3. Verify that if the list is too long, the send fails gracefully.
#
typeset ds_name="many_clones"
@ -56,13 +55,18 @@ for i in {1..64}; do
log_must zfs snapshot ${clone}$i@snap
done
# The limit isn't necessarily 32 snapshots. The maximum number of snapshots in
# The limit isn't necessarily 64 snapshots. The maximum number of snapshots in
# the redacted list is determined in dsl_bookmark_create_redacted_check().
log_must zfs redact $sendfs@snap book1 $clone{1..32}@snap
log_must zfs redact $sendfs@snap book1 $clone{1..64}@snap
log_must eval "zfs send --redact book1 $sendfs@snap >$stream"
log_must eval "zfs recv $recvfs <$stream"
compare_files $sendfs $recvfs "f2" "$RANGE8"
log_mustnot zfs redact $sendfs@snap book2 $clone{1..64}@snap
rls_value="$(zpool get -H -o value feature@redaction_list_spill $POOL)"
if [ "$rls_value" = "active" ]; then
log_note "redaction_list_spill feature active"
else
log_fail "redaction_list_spill feature not active"
fi
log_pass "Redacted send can deal with a large redaction list."