mirror of
https://git.FreeBSD.org/src.git
synced 2025-01-25 16:13:17 +00:00
zfs: merge openzfs/zfs@804414aad
Notable upstream pull request merges:
#15024 Add missed DMU_PROJECTUSED_OBJECT prefetch
#15029 Do not request data L1 buffers on scan prefetch
#15036 FreeBSD: catch up to __FreeBSD_version 1400093
#15039 Fix raw receive with different indirect block size
#15047 FreeBSD: Fix build on stable/13 after 1302506
#15049 Fix the ZFS checksum error histograms with larger record sizes
#15052 Reduce bloat in ereport.fs.zfs.checksum events
#15056 Avoid extra snprintf() in dsl_deadlist_merge()
#15061 Ignore pool ashift property during vdev attachment
#15063 Don't panic if setting vdev properties is unsupported for this vdev type
#15067 spa_min_alloc should be GCD, not min
#15071 Add explicit prefetches to bpobj_iterate()
#15072 Adjust prefetch parameters
#15076 Refactor dmu_prefetch()
#15079 set autotrim default to 'off' everywhere
#15080 ZIL: Fix config lock deadlock
#15088 metaslab: tuneable to better control force ganging
#15096 Avoid waiting in dmu_sync_late_arrival()
#15097 BRT should return EOPNOTSUPP
#15103 Remove zl_issuer_lock from zil_suspend()
#15107 Remove fastwrite mechanism
#15113 libzfs: sendrecv: send_progress_thread: handle SIGINFO/SIGUSR1
#15122 ZIL: Second attempt to reduce scope of zl_issuer_lock
#15129 zpool_vdev_remove() should handle EALREADY error return
#15132 ZIL: Replay blocks without next block pointer
#15148 zfs_clone_range should return descriptive error codes
#15153 ZIL: Avoid dbuf_read() before dmu_sync()
#15172 copy_file_range: fix fallback when source create on same txg
#15180 Update outdated assertion from zio_write_compress
Obtained from: OpenZFS
OpenZFS commit: 804414aad2
This commit is contained in:
commit
315ee00fa9
@ -3042,6 +3042,7 @@ _prebuild_libs= ${_kerberos5_lib_libasn1} \
|
||||
lib/libpam/libpam lib/libthr \
|
||||
${_lib_libradius} lib/libsbuf lib/libtacplus \
|
||||
lib/libgeom \
|
||||
${_lib_librt} \
|
||||
${_cddl_lib_libumem} ${_cddl_lib_libnvpair} \
|
||||
${_cddl_lib_libuutil} \
|
||||
${_cddl_lib_libavl} \
|
||||
@ -3097,6 +3098,7 @@ lib/libpjdlog__L: lib/libutil__L
|
||||
lib/libcasper__L: lib/libnv__L
|
||||
lib/liblzma__L: lib/libmd__L lib/libthr__L
|
||||
lib/libzstd__L: lib/libthr__L
|
||||
lib/librt__L: lib/libthr__L
|
||||
|
||||
_generic_libs= ${_cddl_lib} gnu/lib ${_kerberos5_lib} lib ${_secure_lib}
|
||||
.if ${MK_IPFILTER} != "no"
|
||||
@ -3122,6 +3124,7 @@ cddl/lib/libnvpair__L: cddl/lib/libspl__L
|
||||
cddl/lib/libuutil__L: cddl/lib/libavl__L cddl/lib/libspl__L
|
||||
|
||||
.if ${MK_ZFS} != "no"
|
||||
_lib_librt= lib/librt
|
||||
_cddl_lib_libicp= cddl/lib/libicp
|
||||
_cddl_lib_libicp_rescue= cddl/lib/libicp_rescue
|
||||
_cddl_lib_libtpool= cddl/lib/libtpool
|
||||
@ -3136,7 +3139,7 @@ cddl/lib/libzutil__L: cddl/lib/libavl__L lib/libgeom__L lib/msun__L cddl/lib/lib
|
||||
|
||||
cddl/lib/libzfs_core__L: cddl/lib/libnvpair__L cddl/lib/libspl__L cddl/lib/libzutil__L
|
||||
|
||||
cddl/lib/libzfs__L: cddl/lib/libzfs_core__L lib/msun__L lib/libutil__L
|
||||
cddl/lib/libzfs__L: cddl/lib/libzfs_core__L lib/msun__L lib/libutil__L lib/librt__L
|
||||
cddl/lib/libzfs__L: lib/libthr__L lib/libmd__L lib/libz__L cddl/lib/libumem__L
|
||||
cddl/lib/libzfs__L: cddl/lib/libuutil__L cddl/lib/libavl__L lib/libgeom__L
|
||||
cddl/lib/libzfs__L: cddl/lib/libnvpair__L cddl/lib/libzutil__L
|
||||
|
@ -19,6 +19,7 @@ LIBADD= \
|
||||
md \
|
||||
nvpair \
|
||||
pthread \
|
||||
rt \
|
||||
umem \
|
||||
util \
|
||||
uutil \
|
||||
|
@ -15,6 +15,7 @@ DIRDEPS = \
|
||||
lib/libexpat \
|
||||
lib/libgeom \
|
||||
lib/libmd \
|
||||
lib/librt \
|
||||
lib/libthr \
|
||||
lib/libutil \
|
||||
lib/libz \
|
||||
|
@ -143,7 +143,7 @@ CRUNCH_PROGS_usr.sbin+= zdb
|
||||
|
||||
CRUNCH_LIBS+= -l80211 -lalias -lcam -lncursesw -ldevstat -lipsec -llzma
|
||||
.if ${MK_ZFS} != "no"
|
||||
CRUNCH_LIBS+= -lavl -lpthread -luutil -lumem -ltpool -lspl
|
||||
CRUNCH_LIBS+= -lavl -lpthread -luutil -lumem -ltpool -lspl -lrt
|
||||
CRUNCH_LIBS_zfs+= ${LIBBE} \
|
||||
${LIBZPOOL} \
|
||||
${LIBZFS} \
|
||||
|
@ -39,6 +39,7 @@ DIRDEPS = \
|
||||
lib/libmd \
|
||||
lib/libmt \
|
||||
lib/libnv \
|
||||
lib/librt \
|
||||
lib/libsbuf \
|
||||
lib/libthr \
|
||||
lib/libufs \
|
||||
|
@ -413,7 +413,7 @@ _DP_fifolog= z
|
||||
_DP_ipf= kvm
|
||||
_DP_tpool= spl
|
||||
_DP_uutil= avl spl
|
||||
_DP_zfs= md pthread umem util uutil m avl bsdxml crypto geom nvpair \
|
||||
_DP_zfs= md pthread rt umem util uutil m avl bsdxml crypto geom nvpair \
|
||||
z zfs_core zutil
|
||||
_DP_zfsbootenv= zfs nvpair
|
||||
_DP_zfs_core= nvpair spl zutil
|
||||
|
@ -1,10 +1,10 @@
|
||||
Meta: 1
|
||||
Name: zfs
|
||||
Branch: 1.0
|
||||
Version: 2.2.0
|
||||
Release: rc1
|
||||
Version: 2.2.99
|
||||
Release: 1
|
||||
Release-Tags: relext
|
||||
License: CDDL
|
||||
Author: OpenZFS
|
||||
Linux-Maximum: 6.3
|
||||
Linux-Maximum: 6.4
|
||||
Linux-Minimum: 3.10
|
||||
|
@ -79,6 +79,7 @@
|
||||
#include <sys/dsl_crypt.h>
|
||||
#include <sys/dsl_scan.h>
|
||||
#include <sys/btree.h>
|
||||
#include <sys/brt.h>
|
||||
#include <zfs_comutil.h>
|
||||
#include <sys/zstd/zstd.h>
|
||||
|
||||
@ -5342,12 +5343,20 @@ static const char *zdb_ot_extname[] = {
|
||||
#define ZB_TOTAL DN_MAX_LEVELS
|
||||
#define SPA_MAX_FOR_16M (SPA_MAXBLOCKSHIFT+1)
|
||||
|
||||
typedef struct zdb_brt_entry {
|
||||
dva_t zbre_dva;
|
||||
uint64_t zbre_refcount;
|
||||
avl_node_t zbre_node;
|
||||
} zdb_brt_entry_t;
|
||||
|
||||
typedef struct zdb_cb {
|
||||
zdb_blkstats_t zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1];
|
||||
uint64_t zcb_removing_size;
|
||||
uint64_t zcb_checkpoint_size;
|
||||
uint64_t zcb_dedup_asize;
|
||||
uint64_t zcb_dedup_blocks;
|
||||
uint64_t zcb_clone_asize;
|
||||
uint64_t zcb_clone_blocks;
|
||||
uint64_t zcb_psize_count[SPA_MAX_FOR_16M];
|
||||
uint64_t zcb_lsize_count[SPA_MAX_FOR_16M];
|
||||
uint64_t zcb_asize_count[SPA_MAX_FOR_16M];
|
||||
@ -5368,6 +5377,8 @@ typedef struct zdb_cb {
|
||||
int zcb_haderrors;
|
||||
spa_t *zcb_spa;
|
||||
uint32_t **zcb_vd_obsolete_counts;
|
||||
avl_tree_t zcb_brt;
|
||||
boolean_t zcb_brt_is_active;
|
||||
} zdb_cb_t;
|
||||
|
||||
/* test if two DVA offsets from same vdev are within the same metaslab */
|
||||
@ -5662,6 +5673,45 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
|
||||
zcb->zcb_asize_len[bin] += BP_GET_ASIZE(bp);
|
||||
zcb->zcb_asize_total += BP_GET_ASIZE(bp);
|
||||
|
||||
if (zcb->zcb_brt_is_active && brt_maybe_exists(zcb->zcb_spa, bp)) {
|
||||
/*
|
||||
* Cloned blocks are special. We need to count them, so we can
|
||||
* later uncount them when reporting leaked space, and we must
|
||||
* only claim them them once.
|
||||
*
|
||||
* To do this, we keep our own in-memory BRT. For each block
|
||||
* we haven't seen before, we look it up in the real BRT and
|
||||
* if its there, we note it and its refcount then proceed as
|
||||
* normal. If we see the block again, we count it as a clone
|
||||
* and then give it no further consideration.
|
||||
*/
|
||||
zdb_brt_entry_t zbre_search, *zbre;
|
||||
avl_index_t where;
|
||||
|
||||
zbre_search.zbre_dva = bp->blk_dva[0];
|
||||
zbre = avl_find(&zcb->zcb_brt, &zbre_search, &where);
|
||||
if (zbre != NULL) {
|
||||
zcb->zcb_clone_asize += BP_GET_ASIZE(bp);
|
||||
zcb->zcb_clone_blocks++;
|
||||
|
||||
zbre->zbre_refcount--;
|
||||
if (zbre->zbre_refcount == 0) {
|
||||
avl_remove(&zcb->zcb_brt, zbre);
|
||||
umem_free(zbre, sizeof (zdb_brt_entry_t));
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
uint64_t crefcnt = brt_entry_get_refcount(zcb->zcb_spa, bp);
|
||||
if (crefcnt > 0) {
|
||||
zbre = umem_zalloc(sizeof (zdb_brt_entry_t),
|
||||
UMEM_NOFAIL);
|
||||
zbre->zbre_dva = bp->blk_dva[0];
|
||||
zbre->zbre_refcount = crefcnt;
|
||||
avl_insert(&zcb->zcb_brt, zbre, where);
|
||||
}
|
||||
}
|
||||
|
||||
if (dump_opt['L'])
|
||||
return;
|
||||
|
||||
@ -6664,6 +6714,20 @@ deleted_livelists_dump_mos(spa_t *spa)
|
||||
iterate_deleted_livelists(spa, dump_livelist_cb, NULL);
|
||||
}
|
||||
|
||||
static int
|
||||
zdb_brt_entry_compare(const void *zcn1, const void *zcn2)
|
||||
{
|
||||
const dva_t *dva1 = &((const zdb_brt_entry_t *)zcn1)->zbre_dva;
|
||||
const dva_t *dva2 = &((const zdb_brt_entry_t *)zcn2)->zbre_dva;
|
||||
int cmp;
|
||||
|
||||
cmp = TREE_CMP(DVA_GET_VDEV(dva1), DVA_GET_VDEV(dva2));
|
||||
if (cmp == 0)
|
||||
cmp = TREE_CMP(DVA_GET_OFFSET(dva1), DVA_GET_OFFSET(dva2));
|
||||
|
||||
return (cmp);
|
||||
}
|
||||
|
||||
static int
|
||||
dump_block_stats(spa_t *spa)
|
||||
{
|
||||
@ -6678,6 +6742,13 @@ dump_block_stats(spa_t *spa)
|
||||
|
||||
zcb = umem_zalloc(sizeof (zdb_cb_t), UMEM_NOFAIL);
|
||||
|
||||
if (spa_feature_is_active(spa, SPA_FEATURE_BLOCK_CLONING)) {
|
||||
avl_create(&zcb->zcb_brt, zdb_brt_entry_compare,
|
||||
sizeof (zdb_brt_entry_t),
|
||||
offsetof(zdb_brt_entry_t, zbre_node));
|
||||
zcb->zcb_brt_is_active = B_TRUE;
|
||||
}
|
||||
|
||||
(void) printf("\nTraversing all blocks %s%s%s%s%s...\n\n",
|
||||
(dump_opt['c'] || !dump_opt['L']) ? "to verify " : "",
|
||||
(dump_opt['c'] == 1) ? "metadata " : "",
|
||||
@ -6779,7 +6850,8 @@ dump_block_stats(spa_t *spa)
|
||||
metaslab_class_get_alloc(spa_special_class(spa)) +
|
||||
metaslab_class_get_alloc(spa_dedup_class(spa)) +
|
||||
get_unflushed_alloc_space(spa);
|
||||
total_found = tzb->zb_asize - zcb->zcb_dedup_asize +
|
||||
total_found =
|
||||
tzb->zb_asize - zcb->zcb_dedup_asize - zcb->zcb_clone_asize +
|
||||
zcb->zcb_removing_size + zcb->zcb_checkpoint_size;
|
||||
|
||||
if (total_found == total_alloc && !dump_opt['L']) {
|
||||
@ -6820,6 +6892,9 @@ dump_block_stats(spa_t *spa)
|
||||
"bp deduped:", (u_longlong_t)zcb->zcb_dedup_asize,
|
||||
(u_longlong_t)zcb->zcb_dedup_blocks,
|
||||
(double)zcb->zcb_dedup_asize / tzb->zb_asize + 1.0);
|
||||
(void) printf("\t%-16s %14llu count: %6llu\n",
|
||||
"bp cloned:", (u_longlong_t)zcb->zcb_clone_asize,
|
||||
(u_longlong_t)zcb->zcb_clone_blocks);
|
||||
(void) printf("\t%-16s %14llu used: %5.2f%%\n", "Normal class:",
|
||||
(u_longlong_t)norm_alloc, 100.0 * norm_alloc / norm_space);
|
||||
|
||||
|
@ -607,8 +607,6 @@ zfs_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *data)
|
||||
*/
|
||||
if (nvlist_lookup_string(nvl, dp->dd_prop, &path) != 0 ||
|
||||
strcmp(dp->dd_compare, path) != 0) {
|
||||
zed_log_msg(LOG_INFO, " %s: no match (%s != vdev %s)",
|
||||
__func__, dp->dd_compare, path);
|
||||
return;
|
||||
}
|
||||
if (dp->dd_new_vdev_guid != 0 && dp->dd_new_vdev_guid != guid) {
|
||||
|
@ -416,6 +416,11 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
|
||||
FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, &vdev_guid) != 0)
|
||||
return;
|
||||
|
||||
if (vdev_guid == 0) {
|
||||
fmd_hdl_debug(hdl, "Got a zero GUID");
|
||||
return;
|
||||
}
|
||||
|
||||
if (spare) {
|
||||
int nspares = find_and_remove_spares(zhdl, vdev_guid);
|
||||
fmd_hdl_debug(hdl, "%d spares removed", nspares);
|
||||
|
61
sys/contrib/openzfs/cmd/zed/zed.d/statechange-slot_off.sh
Executable file
61
sys/contrib/openzfs/cmd/zed/zed.d/statechange-slot_off.sh
Executable file
@ -0,0 +1,61 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Turn off disk's enclosure slot if it becomes FAULTED.
|
||||
#
|
||||
# Bad SCSI disks can often "disappear and reappear" causing all sorts of chaos
|
||||
# as they flip between FAULTED and ONLINE. If
|
||||
# ZED_POWER_OFF_ENCLOUSRE_SLOT_ON_FAULT is set in zed.rc, and the disk gets
|
||||
# FAULTED, then power down the slot via sysfs:
|
||||
#
|
||||
# /sys/class/enclosure/<enclosure>/<slot>/power_status
|
||||
#
|
||||
# We assume the user will be responsible for turning the slot back on again.
|
||||
#
|
||||
# Note that this script requires that your enclosure be supported by the
|
||||
# Linux SCSI Enclosure services (SES) driver. The script will do nothing
|
||||
# if you have no enclosure, or if your enclosure isn't supported.
|
||||
#
|
||||
# Exit codes:
|
||||
# 0: slot successfully powered off
|
||||
# 1: enclosure not available
|
||||
# 2: ZED_POWER_OFF_ENCLOUSRE_SLOT_ON_FAULT disabled
|
||||
# 3: vdev was not FAULTED
|
||||
# 4: The enclosure sysfs path passed from ZFS does not exist
|
||||
# 5: Enclosure slot didn't actually turn off after we told it to
|
||||
|
||||
[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc"
|
||||
. "${ZED_ZEDLET_DIR}/zed-functions.sh"
|
||||
|
||||
if [ ! -d /sys/class/enclosure ] ; then
|
||||
# No JBOD enclosure or NVMe slots
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ "${ZED_POWER_OFF_ENCLOUSRE_SLOT_ON_FAULT}" != "1" ] ; then
|
||||
exit 2
|
||||
fi
|
||||
|
||||
if [ "$ZEVENT_VDEV_STATE_STR" != "FAULTED" ] ; then
|
||||
exit 3
|
||||
fi
|
||||
|
||||
if [ ! -f "$ZEVENT_VDEV_ENC_SYSFS_PATH/power_status" ] ; then
|
||||
exit 4
|
||||
fi
|
||||
|
||||
echo "off" | tee "$ZEVENT_VDEV_ENC_SYSFS_PATH/power_status"
|
||||
|
||||
# Wait for sysfs for report that the slot is off. It can take ~400ms on some
|
||||
# enclosures.
|
||||
for i in $(seq 1 20) ; do
|
||||
if [ "$(cat $ZEVENT_VDEV_ENC_SYSFS_PATH/power_status)" == "off" ] ; then
|
||||
break
|
||||
fi
|
||||
sleep 0.1
|
||||
done
|
||||
|
||||
if [ "$(cat $ZEVENT_VDEV_ENC_SYSFS_PATH/power_status)" != "off" ] ; then
|
||||
exit 5
|
||||
fi
|
||||
|
||||
zed_log_msg "powered down slot $ZEVENT_VDEV_ENC_SYSFS_PATH for $ZEVENT_VDEV_PATH"
|
@ -142,3 +142,8 @@ ZED_SYSLOG_SUBCLASS_EXCLUDE="history_event"
|
||||
# Disabled by default, 1 to enable and 0 to disable.
|
||||
#ZED_SYSLOG_DISPLAY_GUIDS=1
|
||||
|
||||
##
|
||||
# Power off the drive's slot in the enclosure if it becomes FAULTED. This can
|
||||
# help silence misbehaving drives. This assumes your drive enclosure fully
|
||||
# supports slot power control via sysfs.
|
||||
#ZED_POWER_OFF_ENCLOUSRE_SLOT_ON_FAULT=1
|
||||
|
@ -2412,7 +2412,6 @@ ztest_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf,
|
||||
int error;
|
||||
|
||||
ASSERT3P(lwb, !=, NULL);
|
||||
ASSERT3P(zio, !=, NULL);
|
||||
ASSERT3U(size, !=, 0);
|
||||
|
||||
ztest_object_lock(zd, object, RL_READER);
|
||||
@ -2446,6 +2445,7 @@ ztest_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf,
|
||||
DMU_READ_NO_PREFETCH);
|
||||
ASSERT0(error);
|
||||
} else {
|
||||
ASSERT3P(zio, !=, NULL);
|
||||
size = doi.doi_data_block_size;
|
||||
if (ISP2(size)) {
|
||||
offset = P2ALIGN(offset, size);
|
||||
|
@ -4,6 +4,7 @@
|
||||
# Not following: a was not specified as input (see shellcheck -x). [SC1091]
|
||||
# Prefer putting braces around variable references even when not strictly required. [SC2250]
|
||||
# Consider invoking this command separately to avoid masking its return value (or use '|| true' to ignore). [SC2312]
|
||||
# Command appears to be unreachable. Check usage (or ignore if invoked indirectly). [SC2317]
|
||||
# In POSIX sh, 'local' is undefined. [SC2039] # older ShellCheck versions
|
||||
# In POSIX sh, 'local' is undefined. [SC3043] # newer ShellCheck versions
|
||||
|
||||
@ -18,7 +19,7 @@ PHONY += shellcheck
|
||||
_STGT = $(subst ^,/,$(subst shellcheck-here-,,$@))
|
||||
shellcheck-here-%:
|
||||
if HAVE_SHELLCHECK
|
||||
shellcheck --format=gcc --enable=all --exclude=SC1090,SC1091,SC2039,SC2250,SC2312,SC3043 $$([ -n "$(SHELLCHECK_SHELL)" ] && echo "--shell=$(SHELLCHECK_SHELL)") "$$([ -e "$(_STGT)" ] || echo "$(srcdir)/")$(_STGT)"
|
||||
shellcheck --format=gcc --enable=all --exclude=SC1090,SC1091,SC2039,SC2250,SC2312,SC2317,SC3043 $$([ -n "$(SHELLCHECK_SHELL)" ] && echo "--shell=$(SHELLCHECK_SHELL)") "$$([ -e "$(_STGT)" ] || echo "$(srcdir)/")$(_STGT)"
|
||||
else
|
||||
@echo "skipping shellcheck of" $(_STGT) "because shellcheck is not installed"
|
||||
endif
|
||||
|
@ -16,12 +16,63 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_GET_BY_PATH], [
|
||||
])
|
||||
])
|
||||
|
||||
dnl #
|
||||
dnl # 6.5.x API change,
|
||||
dnl # blkdev_get_by_path() takes 4 args
|
||||
dnl #
|
||||
AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_GET_BY_PATH_4ARG], [
|
||||
ZFS_LINUX_TEST_SRC([blkdev_get_by_path_4arg], [
|
||||
#include <linux/fs.h>
|
||||
#include <linux/blkdev.h>
|
||||
], [
|
||||
struct block_device *bdev __attribute__ ((unused)) = NULL;
|
||||
const char *path = "path";
|
||||
fmode_t mode = 0;
|
||||
void *holder = NULL;
|
||||
struct blk_holder_ops h;
|
||||
|
||||
bdev = blkdev_get_by_path(path, mode, holder, &h);
|
||||
])
|
||||
])
|
||||
|
||||
AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_GET_BY_PATH], [
|
||||
AC_MSG_CHECKING([whether blkdev_get_by_path() exists])
|
||||
AC_MSG_CHECKING([whether blkdev_get_by_path() exists and takes 3 args])
|
||||
ZFS_LINUX_TEST_RESULT([blkdev_get_by_path], [
|
||||
AC_MSG_RESULT(yes)
|
||||
], [
|
||||
ZFS_LINUX_TEST_ERROR([blkdev_get_by_path()])
|
||||
AC_MSG_RESULT(no)
|
||||
AC_MSG_CHECKING([whether blkdev_get_by_path() exists and takes 4 args])
|
||||
ZFS_LINUX_TEST_RESULT([blkdev_get_by_path_4arg], [
|
||||
AC_DEFINE(HAVE_BLKDEV_GET_BY_PATH_4ARG, 1,
|
||||
[blkdev_get_by_path() exists and takes 4 args])
|
||||
AC_MSG_RESULT(yes)
|
||||
], [
|
||||
ZFS_LINUX_TEST_ERROR([blkdev_get_by_path()])
|
||||
])
|
||||
])
|
||||
])
|
||||
|
||||
dnl #
|
||||
dnl # 6.5.x API change
|
||||
dnl # blk_mode_t was added as a type to supercede some places where fmode_t
|
||||
dnl # is used
|
||||
dnl #
|
||||
AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_BLK_MODE_T], [
|
||||
ZFS_LINUX_TEST_SRC([blk_mode_t], [
|
||||
#include <linux/fs.h>
|
||||
#include <linux/blkdev.h>
|
||||
], [
|
||||
blk_mode_t m __attribute((unused)) = (blk_mode_t)0;
|
||||
])
|
||||
])
|
||||
|
||||
AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_BLK_MODE_T], [
|
||||
AC_MSG_CHECKING([whether blk_mode_t is defined])
|
||||
ZFS_LINUX_TEST_RESULT([blk_mode_t], [
|
||||
AC_MSG_RESULT(yes)
|
||||
AC_DEFINE(HAVE_BLK_MODE_T, 1, [blk_mode_t is defined])
|
||||
], [
|
||||
AC_MSG_RESULT(no)
|
||||
])
|
||||
])
|
||||
|
||||
@ -41,12 +92,35 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_PUT], [
|
||||
])
|
||||
])
|
||||
|
||||
dnl #
|
||||
dnl # 6.5.x API change.
|
||||
dnl # blkdev_put() takes (void* holder) as arg 2
|
||||
dnl #
|
||||
AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_PUT_HOLDER], [
|
||||
ZFS_LINUX_TEST_SRC([blkdev_put_holder], [
|
||||
#include <linux/fs.h>
|
||||
#include <linux/blkdev.h>
|
||||
], [
|
||||
struct block_device *bdev = NULL;
|
||||
void *holder = NULL;
|
||||
|
||||
blkdev_put(bdev, holder);
|
||||
])
|
||||
])
|
||||
|
||||
AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_PUT], [
|
||||
AC_MSG_CHECKING([whether blkdev_put() exists])
|
||||
ZFS_LINUX_TEST_RESULT([blkdev_put], [
|
||||
AC_MSG_RESULT(yes)
|
||||
], [
|
||||
ZFS_LINUX_TEST_ERROR([blkdev_put()])
|
||||
AC_MSG_CHECKING([whether blkdev_put() accepts void* as arg 2])
|
||||
ZFS_LINUX_TEST_RESULT([blkdev_put_holder], [
|
||||
AC_MSG_RESULT(yes)
|
||||
AC_DEFINE(HAVE_BLKDEV_PUT_HOLDER, 1,
|
||||
[blkdev_put() accepts void* as arg 2])
|
||||
], [
|
||||
ZFS_LINUX_TEST_ERROR([blkdev_put()])
|
||||
])
|
||||
])
|
||||
])
|
||||
|
||||
@ -103,6 +177,33 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_CHECK_DISK_CHANGE], [
|
||||
])
|
||||
])
|
||||
|
||||
dnl #
|
||||
dnl # 6.5.x API change
|
||||
dnl # disk_check_media_change() was added
|
||||
dnl #
|
||||
AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_DISK_CHECK_MEDIA_CHANGE], [
|
||||
ZFS_LINUX_TEST_SRC([disk_check_media_change], [
|
||||
#include <linux/fs.h>
|
||||
#include <linux/blkdev.h>
|
||||
], [
|
||||
struct block_device *bdev = NULL;
|
||||
bool error;
|
||||
|
||||
error = disk_check_media_change(bdev->bd_disk);
|
||||
])
|
||||
])
|
||||
|
||||
AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_DISK_CHECK_MEDIA_CHANGE], [
|
||||
AC_MSG_CHECKING([whether disk_check_media_change() exists])
|
||||
ZFS_LINUX_TEST_RESULT([disk_check_media_change], [
|
||||
AC_MSG_RESULT(yes)
|
||||
AC_DEFINE(HAVE_DISK_CHECK_MEDIA_CHANGE, 1,
|
||||
[disk_check_media_change() exists])
|
||||
], [
|
||||
AC_MSG_RESULT(no)
|
||||
])
|
||||
])
|
||||
|
||||
dnl #
|
||||
dnl # bdev_kobj() is introduced from 5.12
|
||||
dnl #
|
||||
@ -443,9 +544,34 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_GET_ERESTARTSYS], [
|
||||
])
|
||||
])
|
||||
|
||||
dnl #
|
||||
dnl # 6.5.x API change
|
||||
dnl # BLK_STS_NEXUS replaced with BLK_STS_RESV_CONFLICT
|
||||
dnl #
|
||||
AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_BLK_STS_RESV_CONFLICT], [
|
||||
ZFS_LINUX_TEST_SRC([blk_sts_resv_conflict], [
|
||||
#include <linux/blkdev.h>
|
||||
],[
|
||||
blk_status_t s __attribute__ ((unused)) = BLK_STS_RESV_CONFLICT;
|
||||
])
|
||||
])
|
||||
|
||||
AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_BLK_STS_RESV_CONFLICT], [
|
||||
AC_MSG_CHECKING([whether BLK_STS_RESV_CONFLICT is defined])
|
||||
ZFS_LINUX_TEST_RESULT([blk_sts_resv_conflict], [
|
||||
AC_DEFINE(HAVE_BLK_STS_RESV_CONFLICT, 1, [BLK_STS_RESV_CONFLICT is defined])
|
||||
AC_MSG_RESULT(yes)
|
||||
], [
|
||||
AC_MSG_RESULT(no)
|
||||
])
|
||||
])
|
||||
])
|
||||
|
||||
AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV], [
|
||||
ZFS_AC_KERNEL_SRC_BLKDEV_GET_BY_PATH
|
||||
ZFS_AC_KERNEL_SRC_BLKDEV_GET_BY_PATH_4ARG
|
||||
ZFS_AC_KERNEL_SRC_BLKDEV_PUT
|
||||
ZFS_AC_KERNEL_SRC_BLKDEV_PUT_HOLDER
|
||||
ZFS_AC_KERNEL_SRC_BLKDEV_REREAD_PART
|
||||
ZFS_AC_KERNEL_SRC_BLKDEV_INVALIDATE_BDEV
|
||||
ZFS_AC_KERNEL_SRC_BLKDEV_LOOKUP_BDEV
|
||||
@ -458,6 +584,9 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV], [
|
||||
ZFS_AC_KERNEL_SRC_BLKDEV_ISSUE_SECURE_ERASE
|
||||
ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_KOBJ
|
||||
ZFS_AC_KERNEL_SRC_BLKDEV_PART_TO_DEV
|
||||
ZFS_AC_KERNEL_SRC_BLKDEV_DISK_CHECK_MEDIA_CHANGE
|
||||
ZFS_AC_KERNEL_SRC_BLKDEV_BLK_STS_RESV_CONFLICT
|
||||
ZFS_AC_KERNEL_SRC_BLKDEV_BLK_MODE_T
|
||||
])
|
||||
|
||||
AC_DEFUN([ZFS_AC_KERNEL_BLKDEV], [
|
||||
@ -476,4 +605,7 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV], [
|
||||
ZFS_AC_KERNEL_BLKDEV_ISSUE_SECURE_ERASE
|
||||
ZFS_AC_KERNEL_BLKDEV_BDEV_KOBJ
|
||||
ZFS_AC_KERNEL_BLKDEV_PART_TO_DEV
|
||||
ZFS_AC_KERNEL_BLKDEV_DISK_CHECK_MEDIA_CHANGE
|
||||
ZFS_AC_KERNEL_BLKDEV_BLK_STS_RESV_CONFLICT
|
||||
ZFS_AC_KERNEL_BLKDEV_BLK_MODE_T
|
||||
])
|
||||
|
@ -49,12 +49,42 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID], [
|
||||
], [], [])
|
||||
])
|
||||
|
||||
dnl #
|
||||
dnl # 5.9.x API change
|
||||
dnl #
|
||||
AC_DEFUN([ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS_RELEASE_1ARG], [
|
||||
ZFS_LINUX_TEST_SRC([block_device_operations_release_void_1arg], [
|
||||
#include <linux/blkdev.h>
|
||||
|
||||
void blk_release(struct gendisk *g) {
|
||||
(void) g;
|
||||
return;
|
||||
}
|
||||
|
||||
static const struct block_device_operations
|
||||
bops __attribute__ ((unused)) = {
|
||||
.open = NULL,
|
||||
.release = blk_release,
|
||||
.ioctl = NULL,
|
||||
.compat_ioctl = NULL,
|
||||
};
|
||||
], [], [])
|
||||
])
|
||||
|
||||
AC_DEFUN([ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID], [
|
||||
AC_MSG_CHECKING([whether bops->release() is void])
|
||||
AC_MSG_CHECKING([whether bops->release() is void and takes 2 args])
|
||||
ZFS_LINUX_TEST_RESULT([block_device_operations_release_void], [
|
||||
AC_MSG_RESULT(yes)
|
||||
],[
|
||||
ZFS_LINUX_TEST_ERROR([bops->release()])
|
||||
AC_MSG_RESULT(no)
|
||||
AC_MSG_CHECKING([whether bops->release() is void and takes 1 arg])
|
||||
ZFS_LINUX_TEST_RESULT([block_device_operations_release_void_1arg], [
|
||||
AC_MSG_RESULT(yes)
|
||||
AC_DEFINE([HAVE_BLOCK_DEVICE_OPERATIONS_RELEASE_1ARG], [1],
|
||||
[Define if release() in block_device_operations takes 1 arg])
|
||||
],[
|
||||
ZFS_LINUX_TEST_ERROR([bops->release()])
|
||||
])
|
||||
])
|
||||
])
|
||||
|
||||
@ -92,6 +122,7 @@ AC_DEFUN([ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_REVALIDATE_DISK], [
|
||||
AC_DEFUN([ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS], [
|
||||
ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS
|
||||
ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID
|
||||
ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS_RELEASE_1ARG
|
||||
ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS_REVALIDATE_DISK
|
||||
])
|
||||
|
||||
|
25
sys/contrib/openzfs/config/kernel-filemap-splice-read.m4
Normal file
25
sys/contrib/openzfs/config/kernel-filemap-splice-read.m4
Normal file
@ -0,0 +1,25 @@
|
||||
AC_DEFUN([ZFS_AC_KERNEL_SRC_COPY_SPLICE_READ], [
|
||||
dnl #
|
||||
dnl # Kernel 6.5 - generic_file_splice_read was removed in favor
|
||||
dnl # of copy_splice_read for the .splice_read member of the
|
||||
dnl # file_operations struct.
|
||||
dnl #
|
||||
ZFS_LINUX_TEST_SRC([has_copy_splice_read], [
|
||||
#include <linux/fs.h>
|
||||
|
||||
struct file_operations fops __attribute__((unused)) = {
|
||||
.splice_read = copy_splice_read,
|
||||
};
|
||||
],[])
|
||||
])
|
||||
|
||||
AC_DEFUN([ZFS_AC_KERNEL_COPY_SPLICE_READ], [
|
||||
AC_MSG_CHECKING([whether copy_splice_read() exists])
|
||||
ZFS_LINUX_TEST_RESULT([has_copy_splice_read], [
|
||||
AC_MSG_RESULT(yes)
|
||||
AC_DEFINE(HAVE_COPY_SPLICE_READ, 1,
|
||||
[copy_splice_read exists])
|
||||
],[
|
||||
AC_MSG_RESULT(no)
|
||||
])
|
||||
])
|
27
sys/contrib/openzfs/config/kernel-register_sysctl_table.m4
Normal file
27
sys/contrib/openzfs/config/kernel-register_sysctl_table.m4
Normal file
@ -0,0 +1,27 @@
|
||||
dnl #
|
||||
dnl # Linux 6.5 removes register_sysctl_table
|
||||
dnl #
|
||||
AC_DEFUN([ZFS_AC_KERNEL_SRC_REGISTER_SYSCTL_TABLE], [
|
||||
ZFS_LINUX_TEST_SRC([has_register_sysctl_table], [
|
||||
#include <linux/sysctl.h>
|
||||
|
||||
static struct ctl_table dummy_table[] = {
|
||||
{}
|
||||
};
|
||||
|
||||
],[
|
||||
struct ctl_table_header *h
|
||||
__attribute((unused)) = register_sysctl_table(dummy_table);
|
||||
])
|
||||
])
|
||||
|
||||
AC_DEFUN([ZFS_AC_KERNEL_REGISTER_SYSCTL_TABLE], [
|
||||
AC_MSG_CHECKING([whether register_sysctl_table exists])
|
||||
ZFS_LINUX_TEST_RESULT([has_register_sysctl_table], [
|
||||
AC_MSG_RESULT([yes])
|
||||
AC_DEFINE(HAVE_REGISTER_SYSCTL_TABLE, 1,
|
||||
[register_sysctl_table exists])
|
||||
],[
|
||||
AC_MSG_RESULT([no])
|
||||
])
|
||||
])
|
50
sys/contrib/openzfs/config/kernel-vfs-extended-file_range.m4
Normal file
50
sys/contrib/openzfs/config/kernel-vfs-extended-file_range.m4
Normal file
@ -0,0 +1,50 @@
|
||||
dnl #
|
||||
dnl # EL7 have backported copy_file_range and clone_file_range and
|
||||
dnl # added them to an "extended" file_operations struct.
|
||||
dnl #
|
||||
dnl # We're testing for both functions in one here, because they will only
|
||||
dnl # ever appear together and we don't want to match a similar method in
|
||||
dnl # some future vendor kernel.
|
||||
dnl #
|
||||
AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_FILE_OPERATIONS_EXTEND], [
|
||||
ZFS_LINUX_TEST_SRC([vfs_file_operations_extend], [
|
||||
#include <linux/fs.h>
|
||||
|
||||
static ssize_t test_copy_file_range(struct file *src_file,
|
||||
loff_t src_off, struct file *dst_file, loff_t dst_off,
|
||||
size_t len, unsigned int flags) {
|
||||
(void) src_file; (void) src_off;
|
||||
(void) dst_file; (void) dst_off;
|
||||
(void) len; (void) flags;
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int test_clone_file_range(struct file *src_file,
|
||||
loff_t src_off, struct file *dst_file, loff_t dst_off,
|
||||
u64 len) {
|
||||
(void) src_file; (void) src_off;
|
||||
(void) dst_file; (void) dst_off;
|
||||
(void) len;
|
||||
return (0);
|
||||
}
|
||||
|
||||
static const struct file_operations_extend
|
||||
fops __attribute__ ((unused)) = {
|
||||
.kabi_fops = {},
|
||||
.copy_file_range = test_copy_file_range,
|
||||
.clone_file_range = test_clone_file_range,
|
||||
};
|
||||
],[])
|
||||
])
|
||||
AC_DEFUN([ZFS_AC_KERNEL_VFS_FILE_OPERATIONS_EXTEND], [
|
||||
AC_MSG_CHECKING([whether file_operations_extend takes \
|
||||
.copy_file_range() and .clone_file_range()])
|
||||
ZFS_LINUX_TEST_RESULT([vfs_file_operations_extend], [
|
||||
AC_MSG_RESULT([yes])
|
||||
AC_DEFINE(HAVE_VFS_FILE_OPERATIONS_EXTEND, 1,
|
||||
[file_operations_extend takes .copy_file_range()
|
||||
and .clone_file_range()])
|
||||
],[
|
||||
AC_MSG_RESULT([no])
|
||||
])
|
||||
])
|
164
sys/contrib/openzfs/config/kernel-vfs-file_range.m4
Normal file
164
sys/contrib/openzfs/config/kernel-vfs-file_range.m4
Normal file
@ -0,0 +1,164 @@
|
||||
dnl #
|
||||
dnl # The *_file_range APIs have a long history:
|
||||
dnl #
|
||||
dnl # 2.6.29: BTRFS_IOC_CLONE and BTRFS_IOC_CLONE_RANGE ioctl introduced
|
||||
dnl # 3.12: BTRFS_IOC_FILE_EXTENT_SAME ioctl introduced
|
||||
dnl #
|
||||
dnl # 4.5: copy_file_range() syscall introduced, added to VFS
|
||||
dnl # 4.5: BTRFS_IOC_CLONE and BTRFS_IOC_CLONE_RANGE renamed to FICLONE ands
|
||||
dnl # FICLONERANGE, added to VFS as clone_file_range()
|
||||
dnl # 4.5: BTRFS_IOC_FILE_EXTENT_SAME renamed to FIDEDUPERANGE, added to VFS
|
||||
dnl # as dedupe_file_range()
|
||||
dnl #
|
||||
dnl # 4.20: VFS clone_file_range() and dedupe_file_range() replaced by
|
||||
dnl # remap_file_range()
|
||||
dnl #
|
||||
dnl # 5.3: VFS copy_file_range() expected to do its own fallback,
|
||||
dnl # generic_copy_file_range() added to support it
|
||||
dnl #
|
||||
AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_COPY_FILE_RANGE], [
|
||||
ZFS_LINUX_TEST_SRC([vfs_copy_file_range], [
|
||||
#include <linux/fs.h>
|
||||
|
||||
static ssize_t test_copy_file_range(struct file *src_file,
|
||||
loff_t src_off, struct file *dst_file, loff_t dst_off,
|
||||
size_t len, unsigned int flags) {
|
||||
(void) src_file; (void) src_off;
|
||||
(void) dst_file; (void) dst_off;
|
||||
(void) len; (void) flags;
|
||||
return (0);
|
||||
}
|
||||
|
||||
static const struct file_operations
|
||||
fops __attribute__ ((unused)) = {
|
||||
.copy_file_range = test_copy_file_range,
|
||||
};
|
||||
],[])
|
||||
])
|
||||
AC_DEFUN([ZFS_AC_KERNEL_VFS_COPY_FILE_RANGE], [
|
||||
AC_MSG_CHECKING([whether fops->copy_file_range() is available])
|
||||
ZFS_LINUX_TEST_RESULT([vfs_copy_file_range], [
|
||||
AC_MSG_RESULT([yes])
|
||||
AC_DEFINE(HAVE_VFS_COPY_FILE_RANGE, 1,
|
||||
[fops->copy_file_range() is available])
|
||||
],[
|
||||
AC_MSG_RESULT([no])
|
||||
])
|
||||
])
|
||||
|
||||
AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_GENERIC_COPY_FILE_RANGE], [
|
||||
ZFS_LINUX_TEST_SRC([generic_copy_file_range], [
|
||||
#include <linux/fs.h>
|
||||
], [
|
||||
struct file *src_file __attribute__ ((unused)) = NULL;
|
||||
loff_t src_off __attribute__ ((unused)) = 0;
|
||||
struct file *dst_file __attribute__ ((unused)) = NULL;
|
||||
loff_t dst_off __attribute__ ((unused)) = 0;
|
||||
size_t len __attribute__ ((unused)) = 0;
|
||||
unsigned int flags __attribute__ ((unused)) = 0;
|
||||
generic_copy_file_range(src_file, src_off, dst_file, dst_off,
|
||||
len, flags);
|
||||
])
|
||||
])
|
||||
AC_DEFUN([ZFS_AC_KERNEL_VFS_GENERIC_COPY_FILE_RANGE], [
|
||||
AC_MSG_CHECKING([whether generic_copy_file_range() is available])
|
||||
ZFS_LINUX_TEST_RESULT_SYMBOL([generic_copy_file_range],
|
||||
[generic_copy_file_range], [fs/read_write.c], [
|
||||
AC_MSG_RESULT(yes)
|
||||
AC_DEFINE(HAVE_VFS_GENERIC_COPY_FILE_RANGE, 1,
|
||||
[generic_copy_file_range() is available])
|
||||
],[
|
||||
AC_MSG_RESULT(no)
|
||||
])
|
||||
])
|
||||
|
||||
AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_CLONE_FILE_RANGE], [
|
||||
ZFS_LINUX_TEST_SRC([vfs_clone_file_range], [
|
||||
#include <linux/fs.h>
|
||||
|
||||
static int test_clone_file_range(struct file *src_file,
|
||||
loff_t src_off, struct file *dst_file, loff_t dst_off,
|
||||
u64 len) {
|
||||
(void) src_file; (void) src_off;
|
||||
(void) dst_file; (void) dst_off;
|
||||
(void) len;
|
||||
return (0);
|
||||
}
|
||||
|
||||
static const struct file_operations
|
||||
fops __attribute__ ((unused)) = {
|
||||
.clone_file_range = test_clone_file_range,
|
||||
};
|
||||
],[])
|
||||
])
|
||||
AC_DEFUN([ZFS_AC_KERNEL_VFS_CLONE_FILE_RANGE], [
|
||||
AC_MSG_CHECKING([whether fops->clone_file_range() is available])
|
||||
ZFS_LINUX_TEST_RESULT([vfs_clone_file_range], [
|
||||
AC_MSG_RESULT([yes])
|
||||
AC_DEFINE(HAVE_VFS_CLONE_FILE_RANGE, 1,
|
||||
[fops->clone_file_range() is available])
|
||||
],[
|
||||
AC_MSG_RESULT([no])
|
||||
])
|
||||
])
|
||||
|
||||
AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_DEDUPE_FILE_RANGE], [
|
||||
ZFS_LINUX_TEST_SRC([vfs_dedupe_file_range], [
|
||||
#include <linux/fs.h>
|
||||
|
||||
static int test_dedupe_file_range(struct file *src_file,
|
||||
loff_t src_off, struct file *dst_file, loff_t dst_off,
|
||||
u64 len) {
|
||||
(void) src_file; (void) src_off;
|
||||
(void) dst_file; (void) dst_off;
|
||||
(void) len;
|
||||
return (0);
|
||||
}
|
||||
|
||||
static const struct file_operations
|
||||
fops __attribute__ ((unused)) = {
|
||||
.dedupe_file_range = test_dedupe_file_range,
|
||||
};
|
||||
],[])
|
||||
])
|
||||
AC_DEFUN([ZFS_AC_KERNEL_VFS_DEDUPE_FILE_RANGE], [
|
||||
AC_MSG_CHECKING([whether fops->dedupe_file_range() is available])
|
||||
ZFS_LINUX_TEST_RESULT([vfs_dedupe_file_range], [
|
||||
AC_MSG_RESULT([yes])
|
||||
AC_DEFINE(HAVE_VFS_DEDUPE_FILE_RANGE, 1,
|
||||
[fops->dedupe_file_range() is available])
|
||||
],[
|
||||
AC_MSG_RESULT([no])
|
||||
])
|
||||
])
|
||||
|
||||
AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_REMAP_FILE_RANGE], [
|
||||
ZFS_LINUX_TEST_SRC([vfs_remap_file_range], [
|
||||
#include <linux/fs.h>
|
||||
|
||||
static loff_t test_remap_file_range(struct file *src_file,
|
||||
loff_t src_off, struct file *dst_file, loff_t dst_off,
|
||||
loff_t len, unsigned int flags) {
|
||||
(void) src_file; (void) src_off;
|
||||
(void) dst_file; (void) dst_off;
|
||||
(void) len; (void) flags;
|
||||
return (0);
|
||||
}
|
||||
|
||||
static const struct file_operations
|
||||
fops __attribute__ ((unused)) = {
|
||||
.remap_file_range = test_remap_file_range,
|
||||
};
|
||||
],[])
|
||||
])
|
||||
|
||||
AC_DEFUN([ZFS_AC_KERNEL_VFS_REMAP_FILE_RANGE], [
|
||||
AC_MSG_CHECKING([whether fops->remap_file_range() is available])
|
||||
ZFS_LINUX_TEST_RESULT([vfs_remap_file_range], [
|
||||
AC_MSG_RESULT([yes])
|
||||
AC_DEFINE(HAVE_VFS_REMAP_FILE_RANGE, 1,
|
||||
[fops->remap_file_range() is available])
|
||||
],[
|
||||
AC_MSG_RESULT([no])
|
||||
])
|
||||
])
|
@ -6,8 +6,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_IOV_ITER], [
|
||||
#include <linux/fs.h>
|
||||
#include <linux/uio.h>
|
||||
],[
|
||||
int type __attribute__ ((unused)) =
|
||||
ITER_IOVEC | ITER_KVEC | ITER_BVEC | ITER_PIPE;
|
||||
int type __attribute__ ((unused)) = ITER_KVEC;
|
||||
])
|
||||
|
||||
ZFS_LINUX_TEST_SRC([iov_iter_advance], [
|
||||
@ -93,6 +92,14 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_IOV_ITER], [
|
||||
struct iov_iter iter = { 0 };
|
||||
__attribute__((unused)) enum iter_type i = iov_iter_type(&iter);
|
||||
])
|
||||
|
||||
ZFS_LINUX_TEST_SRC([iter_iov], [
|
||||
#include <linux/fs.h>
|
||||
#include <linux/uio.h>
|
||||
],[
|
||||
struct iov_iter iter = { 0 };
|
||||
__attribute__((unused)) const struct iovec *iov = iter_iov(&iter);
|
||||
])
|
||||
])
|
||||
|
||||
AC_DEFUN([ZFS_AC_KERNEL_VFS_IOV_ITER], [
|
||||
@ -201,4 +208,19 @@ AC_DEFUN([ZFS_AC_KERNEL_VFS_IOV_ITER], [
|
||||
AC_DEFINE(HAVE_VFS_IOV_ITER, 1,
|
||||
[All required iov_iter interfaces are available])
|
||||
])
|
||||
|
||||
dnl #
|
||||
dnl # Kernel 6.5 introduces the iter_iov() function that returns the
|
||||
dnl # __iov member of an iov_iter*. The iov member was renamed to this
|
||||
dnl # __iov member, and is intended to be accessed via the helper
|
||||
dnl # function now.
|
||||
dnl #
|
||||
AC_MSG_CHECKING([whether iter_iov() is available])
|
||||
ZFS_LINUX_TEST_RESULT([iter_iov], [
|
||||
AC_MSG_RESULT(yes)
|
||||
AC_DEFINE(HAVE_ITER_IOV, 1,
|
||||
[iter_iov() is available])
|
||||
],[
|
||||
AC_MSG_RESULT(no)
|
||||
])
|
||||
])
|
||||
|
@ -116,6 +116,12 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
|
||||
ZFS_AC_KERNEL_SRC_VFS_RW_ITERATE
|
||||
ZFS_AC_KERNEL_SRC_VFS_GENERIC_WRITE_CHECKS
|
||||
ZFS_AC_KERNEL_SRC_VFS_IOV_ITER
|
||||
ZFS_AC_KERNEL_SRC_VFS_COPY_FILE_RANGE
|
||||
ZFS_AC_KERNEL_SRC_VFS_GENERIC_COPY_FILE_RANGE
|
||||
ZFS_AC_KERNEL_SRC_VFS_REMAP_FILE_RANGE
|
||||
ZFS_AC_KERNEL_SRC_VFS_CLONE_FILE_RANGE
|
||||
ZFS_AC_KERNEL_SRC_VFS_DEDUPE_FILE_RANGE
|
||||
ZFS_AC_KERNEL_SRC_VFS_FILE_OPERATIONS_EXTEND
|
||||
ZFS_AC_KERNEL_SRC_KMAP_ATOMIC_ARGS
|
||||
ZFS_AC_KERNEL_SRC_FOLLOW_DOWN_ONE
|
||||
ZFS_AC_KERNEL_SRC_MAKE_REQUEST_FN
|
||||
@ -154,6 +160,8 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
|
||||
ZFS_AC_KERNEL_SRC_FILEMAP
|
||||
ZFS_AC_KERNEL_SRC_WRITEPAGE_T
|
||||
ZFS_AC_KERNEL_SRC_RECLAIMED
|
||||
ZFS_AC_KERNEL_SRC_REGISTER_SYSCTL_TABLE
|
||||
ZFS_AC_KERNEL_SRC_COPY_SPLICE_READ
|
||||
case "$host_cpu" in
|
||||
powerpc*)
|
||||
ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE
|
||||
@ -249,6 +257,12 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
|
||||
ZFS_AC_KERNEL_VFS_RW_ITERATE
|
||||
ZFS_AC_KERNEL_VFS_GENERIC_WRITE_CHECKS
|
||||
ZFS_AC_KERNEL_VFS_IOV_ITER
|
||||
ZFS_AC_KERNEL_VFS_COPY_FILE_RANGE
|
||||
ZFS_AC_KERNEL_VFS_GENERIC_COPY_FILE_RANGE
|
||||
ZFS_AC_KERNEL_VFS_REMAP_FILE_RANGE
|
||||
ZFS_AC_KERNEL_VFS_CLONE_FILE_RANGE
|
||||
ZFS_AC_KERNEL_VFS_DEDUPE_FILE_RANGE
|
||||
ZFS_AC_KERNEL_VFS_FILE_OPERATIONS_EXTEND
|
||||
ZFS_AC_KERNEL_KMAP_ATOMIC_ARGS
|
||||
ZFS_AC_KERNEL_FOLLOW_DOWN_ONE
|
||||
ZFS_AC_KERNEL_MAKE_REQUEST_FN
|
||||
@ -287,6 +301,8 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
|
||||
ZFS_AC_KERNEL_FILEMAP
|
||||
ZFS_AC_KERNEL_WRITEPAGE_T
|
||||
ZFS_AC_KERNEL_RECLAIMED
|
||||
ZFS_AC_KERNEL_REGISTER_SYSCTL_TABLE
|
||||
ZFS_AC_KERNEL_COPY_SPLICE_READ
|
||||
case "$host_cpu" in
|
||||
powerpc*)
|
||||
ZFS_AC_KERNEL_CPU_HAS_FEATURE
|
||||
|
@ -1,3 +1,9 @@
|
||||
openzfs-linux (2.2.99-1) unstable; urgency=low
|
||||
|
||||
* OpenZFS 2.2 is tagged.
|
||||
|
||||
-- Umer Saleem <usaleem@ixsystems.com> Wed, 12 Jul 2022 15:00:00 -0400
|
||||
|
||||
openzfs-linux (2.1.99-1) unstable; urgency=low
|
||||
|
||||
* Integrate minimally modified Debian packaging from ZFS on Linux
|
||||
|
@ -1,10 +1,8 @@
|
||||
sbin/zinject
|
||||
sbin/ztest
|
||||
usr/bin/raidz_test
|
||||
usr/share/man/man1/raidz_test.1
|
||||
usr/share/man/man1/test-runner.1
|
||||
usr/share/man/man1/ztest.1
|
||||
usr/share/man/man8/zinject.8
|
||||
usr/share/zfs/common.sh
|
||||
usr/share/zfs/runfiles/
|
||||
usr/share/zfs/test-runner
|
||||
|
@ -27,6 +27,7 @@ sbin/zfs
|
||||
sbin/zfs_ids_to_path
|
||||
sbin/zgenhostid
|
||||
sbin/zhack
|
||||
sbin/zinject
|
||||
sbin/zpool
|
||||
sbin/zstream
|
||||
sbin/zstreamdump
|
||||
@ -92,6 +93,7 @@ usr/share/man/man8/zfs_ids_to_path.8
|
||||
usr/share/man/man7/zfsconcepts.7
|
||||
usr/share/man/man7/zfsprops.7
|
||||
usr/share/man/man8/zgenhostid.8
|
||||
usr/share/man/man8/zinject.8
|
||||
usr/share/man/man8/zpool-add.8
|
||||
usr/share/man/man8/zpool-attach.8
|
||||
usr/share/man/man8/zpool-checkpoint.8
|
||||
|
@ -12,11 +12,12 @@ ExecStart=/bin/sh -c '
|
||||
decode_root_args || exit 0; \
|
||||
[ "$root" = "zfs:AUTO" ] && root="$(@sbindir@/zpool list -H -o bootfs | grep -m1 -vFx -)"; \
|
||||
rootflags="$(getarg rootflags=)"; \
|
||||
case ",$rootflags," in \
|
||||
*,zfsutil,*) ;; \
|
||||
,,) rootflags=zfsutil ;; \
|
||||
*) rootflags="zfsutil,$rootflags" ;; \
|
||||
esac; \
|
||||
[ "$(@sbindir@/zfs get -H -o value mountpoint "$root")" = legacy ] || \
|
||||
case ",$rootflags," in \
|
||||
*,zfsutil,*) ;; \
|
||||
,,) rootflags=zfsutil ;; \
|
||||
*) rootflags="zfsutil,$rootflags" ;; \
|
||||
esac; \
|
||||
exec systemctl set-environment BOOTFS="$root" BOOTFSFLAGS="$rootflags"'
|
||||
|
||||
[Install]
|
||||
|
@ -2,7 +2,7 @@
|
||||
Description=Rollback bootfs just before it is mounted
|
||||
Requisite=zfs-import.target
|
||||
After=zfs-import.target dracut-pre-mount.service zfs-snapshot-bootfs.service
|
||||
Before=dracut-mount.service
|
||||
Before=dracut-mount.service sysroot.mount
|
||||
DefaultDependencies=no
|
||||
ConditionKernelCommandLine=bootfs.rollback
|
||||
ConditionEnvironment=BOOTFS
|
||||
|
@ -36,7 +36,11 @@ struct xucred;
|
||||
typedef struct flock flock64_t;
|
||||
typedef struct vnode vnode_t;
|
||||
typedef struct vattr vattr_t;
|
||||
#define vtype_t __enum_uint8(vtype)
|
||||
#if __FreeBSD_version < 1400093
|
||||
typedef enum vtype vtype_t;
|
||||
#else
|
||||
#define vtype_t __enum_uint8(vtype)
|
||||
#endif
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/queue.h>
|
||||
|
@ -181,7 +181,11 @@ bi_status_to_errno(blk_status_t status)
|
||||
return (ENOLINK);
|
||||
case BLK_STS_TARGET:
|
||||
return (EREMOTEIO);
|
||||
#ifdef HAVE_BLK_STS_RESV_CONFLICT
|
||||
case BLK_STS_RESV_CONFLICT:
|
||||
#else
|
||||
case BLK_STS_NEXUS:
|
||||
#endif
|
||||
return (EBADE);
|
||||
case BLK_STS_MEDIUM:
|
||||
return (ENODATA);
|
||||
@ -215,7 +219,11 @@ errno_to_bi_status(int error)
|
||||
case EREMOTEIO:
|
||||
return (BLK_STS_TARGET);
|
||||
case EBADE:
|
||||
#ifdef HAVE_BLK_STS_RESV_CONFLICT
|
||||
return (BLK_STS_RESV_CONFLICT);
|
||||
#else
|
||||
return (BLK_STS_NEXUS);
|
||||
#endif
|
||||
case ENODATA:
|
||||
return (BLK_STS_MEDIUM);
|
||||
case EILSEQ:
|
||||
@ -337,6 +345,9 @@ zfs_check_media_change(struct block_device *bdev)
|
||||
return (0);
|
||||
}
|
||||
#define vdev_bdev_reread_part(bdev) zfs_check_media_change(bdev)
|
||||
#elif defined(HAVE_DISK_CHECK_MEDIA_CHANGE)
|
||||
#define vdev_bdev_reread_part(bdev) disk_check_media_change(bdev->bd_disk)
|
||||
#define zfs_check_media_change(bdev) disk_check_media_change(bdev->bd_disk)
|
||||
#else
|
||||
/*
|
||||
* This is encountered if check_disk_change() and bdev_check_media_change()
|
||||
@ -387,6 +398,12 @@ vdev_lookup_bdev(const char *path, dev_t *dev)
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(HAVE_BLK_MODE_T)
|
||||
#define blk_mode_is_open_write(flag) ((flag) & BLK_OPEN_WRITE)
|
||||
#else
|
||||
#define blk_mode_is_open_write(flag) ((flag) & FMODE_WRITE)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Kernels without bio_set_op_attrs use bi_rw for the bio flags.
|
||||
*/
|
||||
|
@ -198,6 +198,14 @@ extern uint64_t spl_kmem_cache_entry_size(kmem_cache_t *cache);
|
||||
spl_kmem_cache_create(name, size, align, ctor, dtor, rclm, priv, vmp, fl)
|
||||
#define kmem_cache_set_move(skc, move) spl_kmem_cache_set_move(skc, move)
|
||||
#define kmem_cache_destroy(skc) spl_kmem_cache_destroy(skc)
|
||||
/*
|
||||
* This is necessary to be compatible with other kernel modules
|
||||
* or in-tree filesystem that may define kmem_cache_alloc,
|
||||
* like bcachefs does it now.
|
||||
*/
|
||||
#ifdef kmem_cache_alloc
|
||||
#undef kmem_cache_alloc
|
||||
#endif
|
||||
#define kmem_cache_alloc(skc, flags) spl_kmem_cache_alloc(skc, flags)
|
||||
#define kmem_cache_free(skc, obj) spl_kmem_cache_free(skc, obj)
|
||||
#define kmem_cache_reap_now(skc) spl_kmem_cache_reap_now(skc)
|
||||
|
@ -38,7 +38,7 @@ typedef unsigned long ulong_t;
|
||||
typedef unsigned long long u_longlong_t;
|
||||
typedef long long longlong_t;
|
||||
|
||||
typedef unsigned long intptr_t;
|
||||
typedef long intptr_t;
|
||||
typedef unsigned long long rlim64_t;
|
||||
|
||||
typedef struct task_struct kthread_t;
|
||||
|
@ -173,4 +173,16 @@ zfs_uio_iov_iter_init(zfs_uio_t *uio, struct iov_iter *iter, offset_t offset,
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_ITER_IOV)
|
||||
#define zfs_uio_iter_iov(iter) iter_iov((iter))
|
||||
#else
|
||||
#define zfs_uio_iter_iov(iter) (iter)->iov
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_IOV_ITER_TYPE)
|
||||
#define zfs_uio_iov_iter_type(iter) iov_iter_type((iter))
|
||||
#else
|
||||
#define zfs_uio_iov_iter_type(iter) (iter)->type
|
||||
#endif
|
||||
|
||||
#endif /* SPL_UIO_H */
|
||||
|
@ -52,7 +52,11 @@ extern const struct inode_operations zpl_special_inode_operations;
|
||||
|
||||
/* zpl_file.c */
|
||||
extern const struct address_space_operations zpl_address_space_operations;
|
||||
#ifdef HAVE_VFS_FILE_OPERATIONS_EXTEND
|
||||
extern const struct file_operations_extend zpl_file_operations;
|
||||
#else
|
||||
extern const struct file_operations zpl_file_operations;
|
||||
#endif
|
||||
extern const struct file_operations zpl_dir_file_operations;
|
||||
|
||||
/* zpl_super.c */
|
||||
@ -180,6 +184,55 @@ zpl_dir_emit_dots(struct file *file, zpl_dir_context_t *ctx)
|
||||
}
|
||||
#endif /* HAVE_VFS_ITERATE */
|
||||
|
||||
|
||||
/* zpl_file_range.c */
|
||||
|
||||
/* handlers for file_operations of the same name */
|
||||
extern ssize_t zpl_copy_file_range(struct file *src_file, loff_t src_off,
|
||||
struct file *dst_file, loff_t dst_off, size_t len, unsigned int flags);
|
||||
extern loff_t zpl_remap_file_range(struct file *src_file, loff_t src_off,
|
||||
struct file *dst_file, loff_t dst_off, loff_t len, unsigned int flags);
|
||||
extern int zpl_clone_file_range(struct file *src_file, loff_t src_off,
|
||||
struct file *dst_file, loff_t dst_off, uint64_t len);
|
||||
extern int zpl_dedupe_file_range(struct file *src_file, loff_t src_off,
|
||||
struct file *dst_file, loff_t dst_off, uint64_t len);
|
||||
|
||||
/* compat for FICLONE/FICLONERANGE/FIDEDUPERANGE ioctls */
|
||||
typedef struct {
|
||||
int64_t fcr_src_fd;
|
||||
uint64_t fcr_src_offset;
|
||||
uint64_t fcr_src_length;
|
||||
uint64_t fcr_dest_offset;
|
||||
} zfs_ioc_compat_file_clone_range_t;
|
||||
|
||||
typedef struct {
|
||||
int64_t fdri_dest_fd;
|
||||
uint64_t fdri_dest_offset;
|
||||
uint64_t fdri_bytes_deduped;
|
||||
int32_t fdri_status;
|
||||
uint32_t fdri_reserved;
|
||||
} zfs_ioc_compat_dedupe_range_info_t;
|
||||
|
||||
typedef struct {
|
||||
uint64_t fdr_src_offset;
|
||||
uint64_t fdr_src_length;
|
||||
uint16_t fdr_dest_count;
|
||||
uint16_t fdr_reserved1;
|
||||
uint32_t fdr_reserved2;
|
||||
zfs_ioc_compat_dedupe_range_info_t fdr_info[];
|
||||
} zfs_ioc_compat_dedupe_range_t;
|
||||
|
||||
#define ZFS_IOC_COMPAT_FICLONE _IOW(0x94, 9, int)
|
||||
#define ZFS_IOC_COMPAT_FICLONERANGE \
|
||||
_IOW(0x94, 13, zfs_ioc_compat_file_clone_range_t)
|
||||
#define ZFS_IOC_COMPAT_FIDEDUPERANGE \
|
||||
_IOWR(0x94, 54, zfs_ioc_compat_dedupe_range_t)
|
||||
|
||||
extern long zpl_ioctl_ficlone(struct file *filp, void *arg);
|
||||
extern long zpl_ioctl_ficlonerange(struct file *filp, void *arg);
|
||||
extern long zpl_ioctl_fideduperange(struct file *filp, void *arg);
|
||||
|
||||
|
||||
#if defined(HAVE_INODE_TIMESTAMP_TRUNCATE)
|
||||
#define zpl_inode_timestamp_truncate(ts, ip) timestamp_truncate(ts, ip)
|
||||
#elif defined(HAVE_INODE_TIMESPEC64_TIMES)
|
||||
|
@ -60,7 +60,7 @@ typedef struct bpobj {
|
||||
kmutex_t bpo_lock;
|
||||
objset_t *bpo_os;
|
||||
uint64_t bpo_object;
|
||||
int bpo_epb;
|
||||
uint32_t bpo_epb;
|
||||
uint8_t bpo_havecomp;
|
||||
uint8_t bpo_havesubobj;
|
||||
uint8_t bpo_havefreed;
|
||||
|
@ -36,6 +36,7 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
extern boolean_t brt_entry_decref(spa_t *spa, const blkptr_t *bp);
|
||||
extern uint64_t brt_entry_get_refcount(spa_t *spa, const blkptr_t *bp);
|
||||
|
||||
extern uint64_t brt_get_dspace(spa_t *spa);
|
||||
extern uint64_t brt_get_used(spa_t *spa);
|
||||
|
@ -572,11 +572,15 @@ int dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset,
|
||||
int dmu_buf_hold_array(objset_t *os, uint64_t object, uint64_t offset,
|
||||
uint64_t length, int read, const void *tag, int *numbufsp,
|
||||
dmu_buf_t ***dbpp);
|
||||
int dmu_buf_hold_noread(objset_t *os, uint64_t object, uint64_t offset,
|
||||
const void *tag, dmu_buf_t **dbp);
|
||||
int dmu_buf_hold_by_dnode(dnode_t *dn, uint64_t offset,
|
||||
const void *tag, dmu_buf_t **dbp, int flags);
|
||||
int dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset,
|
||||
uint64_t length, boolean_t read, const void *tag, int *numbufsp,
|
||||
dmu_buf_t ***dbpp, uint32_t flags);
|
||||
int dmu_buf_hold_noread_by_dnode(dnode_t *dn, uint64_t offset, const void *tag,
|
||||
dmu_buf_t **dbp);
|
||||
/*
|
||||
* Add a reference to a dmu buffer that has already been held via
|
||||
* dmu_buf_hold() in the current context.
|
||||
@ -885,6 +889,7 @@ extern uint_t zfs_max_recordsize;
|
||||
*/
|
||||
void dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset,
|
||||
uint64_t len, enum zio_priority pri);
|
||||
void dmu_prefetch_dnode(objset_t *os, uint64_t object, enum zio_priority pri);
|
||||
|
||||
typedef struct dmu_object_info {
|
||||
/* All sizes are in bytes unless otherwise indicated. */
|
||||
|
@ -247,8 +247,6 @@ typedef struct dmu_sendstatus {
|
||||
|
||||
void dmu_object_zapify(objset_t *, uint64_t, dmu_object_type_t, dmu_tx_t *);
|
||||
void dmu_object_free_zapified(objset_t *, uint64_t, dmu_tx_t *);
|
||||
int dmu_buf_hold_noread(objset_t *, uint64_t, uint64_t,
|
||||
const void *, dmu_buf_t **);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
@ -36,8 +36,6 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern uint64_t zfetch_array_rd_sz;
|
||||
|
||||
struct dnode; /* so we can reference dnode */
|
||||
|
||||
typedef struct zfetch {
|
||||
|
@ -102,8 +102,6 @@ extern "C" {
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_TIMESTAMP "zio_timestamp"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_DELTA "zio_delta"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_PREV_STATE "prev_state"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_CKSUM_EXPECTED "cksum_expected"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_CKSUM_ACTUAL "cksum_actual"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_CKSUM_ALGO "cksum_algorithm"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_CKSUM_BYTESWAP "cksum_byteswap"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_BAD_OFFSET_RANGES "bad_ranges"
|
||||
@ -112,8 +110,6 @@ extern "C" {
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_BAD_RANGE_CLEARS "bad_range_clears"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_BAD_SET_BITS "bad_set_bits"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_BAD_CLEARED_BITS "bad_cleared_bits"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_BAD_SET_HISTOGRAM "bad_set_histogram"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_BAD_CLEARED_HISTOGRAM "bad_cleared_histogram"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_SNAPSHOT_NAME "snapshot_name"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_DEVICE_NAME "device_name"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_RAW_DEVICE_NAME "raw_name"
|
||||
|
@ -80,7 +80,6 @@ uint64_t metaslab_largest_allocatable(metaslab_t *);
|
||||
#define METASLAB_ASYNC_ALLOC 0x8
|
||||
#define METASLAB_DONT_THROTTLE 0x10
|
||||
#define METASLAB_MUST_RESERVE 0x20
|
||||
#define METASLAB_FASTWRITE 0x40
|
||||
#define METASLAB_ZIL 0x80
|
||||
|
||||
int metaslab_alloc(spa_t *, metaslab_class_t *, uint64_t,
|
||||
@ -96,8 +95,6 @@ void metaslab_unalloc_dva(spa_t *, const dva_t *, uint64_t);
|
||||
int metaslab_claim(spa_t *, const blkptr_t *, uint64_t);
|
||||
int metaslab_claim_impl(vdev_t *, uint64_t, uint64_t, uint64_t);
|
||||
void metaslab_check_free(spa_t *, const blkptr_t *);
|
||||
void metaslab_fastwrite_mark(spa_t *, const blkptr_t *);
|
||||
void metaslab_fastwrite_unmark(spa_t *, const blkptr_t *);
|
||||
|
||||
void metaslab_stat_init(void);
|
||||
void metaslab_stat_fini(void);
|
||||
|
@ -313,7 +313,7 @@ struct metaslab_group {
|
||||
* Each metaslab maintains a set of in-core trees to track metaslab
|
||||
* operations. The in-core free tree (ms_allocatable) contains the list of
|
||||
* free segments which are eligible for allocation. As blocks are
|
||||
* allocated, the allocated segment are removed from the ms_allocatable and
|
||||
* allocated, the allocated segments are removed from the ms_allocatable and
|
||||
* added to a per txg allocation tree (ms_allocating). As blocks are
|
||||
* freed, they are added to the free tree (ms_freeing). These trees
|
||||
* allow us to process all allocations and frees in syncing context
|
||||
@ -366,9 +366,9 @@ struct metaslab_group {
|
||||
struct metaslab {
|
||||
/*
|
||||
* This is the main lock of the metaslab and its purpose is to
|
||||
* coordinate our allocations and frees [e.g metaslab_block_alloc(),
|
||||
* coordinate our allocations and frees [e.g., metaslab_block_alloc(),
|
||||
* metaslab_free_concrete(), ..etc] with our various syncing
|
||||
* procedures [e.g. metaslab_sync(), metaslab_sync_done(), ..etc].
|
||||
* procedures [e.g., metaslab_sync(), metaslab_sync_done(), ..etc].
|
||||
*
|
||||
* The lock is also used during some miscellaneous operations like
|
||||
* using the metaslab's histogram for the metaslab group's histogram
|
||||
|
@ -250,6 +250,7 @@ struct spa {
|
||||
uint64_t spa_min_ashift; /* of vdevs in normal class */
|
||||
uint64_t spa_max_ashift; /* of vdevs in normal class */
|
||||
uint64_t spa_min_alloc; /* of vdevs in normal class */
|
||||
uint64_t spa_gcd_alloc; /* of vdevs in normal class */
|
||||
uint64_t spa_config_guid; /* config pool guid */
|
||||
uint64_t spa_load_guid; /* spa_load initialized guid */
|
||||
uint64_t spa_last_synced_guid; /* last synced guid */
|
||||
|
@ -266,7 +266,6 @@ struct vdev {
|
||||
metaslab_group_t *vdev_mg; /* metaslab group */
|
||||
metaslab_group_t *vdev_log_mg; /* embedded slog metaslab group */
|
||||
metaslab_t **vdev_ms; /* metaslab array */
|
||||
uint64_t vdev_pending_fastwrite; /* allocated fastwrites */
|
||||
txg_list_t vdev_ms_list; /* per-txg dirty metaslab lists */
|
||||
txg_list_t vdev_dtl_list; /* per-txg dirty DTL lists */
|
||||
txg_node_t vdev_txg_node; /* per-txg dirty vdev linkage */
|
||||
@ -420,6 +419,7 @@ struct vdev {
|
||||
boolean_t vdev_copy_uberblocks; /* post expand copy uberblocks */
|
||||
boolean_t vdev_resilver_deferred; /* resilver deferred */
|
||||
boolean_t vdev_kobj_flag; /* kobj event record */
|
||||
boolean_t vdev_attaching; /* vdev attach ashift handling */
|
||||
vdev_queue_t vdev_queue; /* I/O deadline schedule queue */
|
||||
spa_aux_vdev_t *vdev_aux; /* for l2cache and spares vdevs */
|
||||
zio_t *vdev_probe_zio; /* root of current probe */
|
||||
|
@ -38,14 +38,22 @@ extern "C" {
|
||||
/*
|
||||
* Possible states for a given lwb structure.
|
||||
*
|
||||
* An lwb will start out in the "closed" state, and then transition to
|
||||
* the "opened" state via a call to zil_lwb_write_open(). When
|
||||
* transitioning from "closed" to "opened" the zilog's "zl_issuer_lock"
|
||||
* must be held.
|
||||
* An lwb will start out in the "new" state, and transition to the "opened"
|
||||
* state via a call to zil_lwb_write_open() on first itx assignment. When
|
||||
* transitioning from "new" to "opened" the zilog's "zl_issuer_lock" must be
|
||||
* held.
|
||||
*
|
||||
* After the lwb is "opened", it can transition into the "issued" state
|
||||
* via zil_lwb_write_close(). Again, the zilog's "zl_issuer_lock" must
|
||||
* be held when making this transition.
|
||||
* After the lwb is "opened", it can be assigned number of itxs and transition
|
||||
* into the "closed" state via zil_lwb_write_close() when full or on timeout.
|
||||
* When transitioning from "opened" to "closed" the zilog's "zl_issuer_lock"
|
||||
* must be held. New lwb allocation also takes "zl_lock" to protect the list.
|
||||
*
|
||||
* After the lwb is "closed", it can transition into the "ready" state via
|
||||
* zil_lwb_write_issue(). "zl_lock" must be held when making this transition.
|
||||
* Since it is done by the same thread, "zl_issuer_lock" is not needed.
|
||||
*
|
||||
* When lwb in "ready" state receives its block pointer, it can transition to
|
||||
* "issued". "zl_lock" must be held when making this transition.
|
||||
*
|
||||
* After the lwb's write zio completes, it transitions into the "write
|
||||
* done" state via zil_lwb_write_done(); and then into the "flush done"
|
||||
@ -62,17 +70,20 @@ extern "C" {
|
||||
*
|
||||
* Additionally, correctness when reading an lwb's state is often
|
||||
* achieved by exploiting the fact that these state transitions occur in
|
||||
* this specific order; i.e. "closed" to "opened" to "issued" to "done".
|
||||
* this specific order; i.e. "new" to "opened" to "closed" to "ready" to
|
||||
* "issued" to "write_done" and finally "flush_done".
|
||||
*
|
||||
* Thus, if an lwb is in the "closed" or "opened" state, holding the
|
||||
* Thus, if an lwb is in the "new" or "opened" state, holding the
|
||||
* "zl_issuer_lock" will prevent a concurrent thread from transitioning
|
||||
* that lwb to the "issued" state. Likewise, if an lwb is already in the
|
||||
* "issued" state, holding the "zl_lock" will prevent a concurrent
|
||||
* thread from transitioning that lwb to the "write done" state.
|
||||
* that lwb to the "closed" state. Likewise, if an lwb is already in the
|
||||
* "ready" state, holding the "zl_lock" will prevent a concurrent thread
|
||||
* from transitioning that lwb to the "issued" state.
|
||||
*/
|
||||
typedef enum {
|
||||
LWB_STATE_CLOSED,
|
||||
LWB_STATE_NEW,
|
||||
LWB_STATE_OPENED,
|
||||
LWB_STATE_CLOSED,
|
||||
LWB_STATE_READY,
|
||||
LWB_STATE_ISSUED,
|
||||
LWB_STATE_WRITE_DONE,
|
||||
LWB_STATE_FLUSH_DONE,
|
||||
@ -91,18 +102,21 @@ typedef enum {
|
||||
typedef struct lwb {
|
||||
zilog_t *lwb_zilog; /* back pointer to log struct */
|
||||
blkptr_t lwb_blk; /* on disk address of this log blk */
|
||||
boolean_t lwb_fastwrite; /* is blk marked for fastwrite? */
|
||||
boolean_t lwb_slim; /* log block has slim format */
|
||||
boolean_t lwb_slog; /* lwb_blk is on SLOG device */
|
||||
boolean_t lwb_indirect; /* do not postpone zil_lwb_commit() */
|
||||
int lwb_error; /* log block allocation error */
|
||||
int lwb_nmax; /* max bytes in the buffer */
|
||||
int lwb_nused; /* # used bytes in buffer */
|
||||
int lwb_nfilled; /* # filled bytes in buffer */
|
||||
int lwb_sz; /* size of block and buffer */
|
||||
lwb_state_t lwb_state; /* the state of this lwb */
|
||||
char *lwb_buf; /* log write buffer */
|
||||
zio_t *lwb_child_zio; /* parent zio for children */
|
||||
zio_t *lwb_write_zio; /* zio for the lwb buffer */
|
||||
zio_t *lwb_root_zio; /* root zio for lwb write and flushes */
|
||||
hrtime_t lwb_issued_timestamp; /* when was the lwb issued? */
|
||||
uint64_t lwb_issued_txg; /* the txg when the write is issued */
|
||||
uint64_t lwb_alloc_txg; /* the txg when lwb_blk is allocated */
|
||||
uint64_t lwb_max_txg; /* highest txg in this lwb */
|
||||
list_node_t lwb_node; /* zilog->zl_lwb_list linkage */
|
||||
list_node_t lwb_issue_node; /* linkage of lwbs ready for issue */
|
||||
|
@ -222,7 +222,6 @@ typedef uint64_t zio_flag_t;
|
||||
#define ZIO_FLAG_NOPWRITE (1ULL << 28)
|
||||
#define ZIO_FLAG_REEXECUTED (1ULL << 29)
|
||||
#define ZIO_FLAG_DELEGATED (1ULL << 30)
|
||||
#define ZIO_FLAG_FASTWRITE (1ULL << 31)
|
||||
|
||||
#define ZIO_FLAG_MUSTSUCCEED 0
|
||||
#define ZIO_FLAG_RAW (ZIO_FLAG_RAW_COMPRESS | ZIO_FLAG_RAW_ENCRYPT)
|
||||
|
@ -94,8 +94,6 @@ typedef const struct zio_checksum_info {
|
||||
} zio_checksum_info_t;
|
||||
|
||||
typedef struct zio_bad_cksum {
|
||||
zio_cksum_t zbc_expected;
|
||||
zio_cksum_t zbc_actual;
|
||||
const char *zbc_checksum_name;
|
||||
uint8_t zbc_byteswapped;
|
||||
uint8_t zbc_injected;
|
||||
|
@ -57,7 +57,7 @@ libzfs_la_LIBADD = \
|
||||
libzutil.la \
|
||||
libuutil.la
|
||||
|
||||
libzfs_la_LIBADD += -lm $(LIBCRYPTO_LIBS) $(ZLIB_LIBS) $(LIBFETCH_LIBS) $(LTLIBINTL)
|
||||
libzfs_la_LIBADD += -lrt -lm $(LIBCRYPTO_LIBS) $(ZLIB_LIBS) $(LIBFETCH_LIBS) $(LTLIBINTL)
|
||||
|
||||
libzfs_la_LDFLAGS = -pthread
|
||||
|
||||
|
@ -3926,6 +3926,12 @@ zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
|
||||
|
||||
switch (errno) {
|
||||
|
||||
case EALREADY:
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"removal for this vdev is already in progress."));
|
||||
(void) zfs_error(hdl, EZFS_BUSY, errbuf);
|
||||
break;
|
||||
|
||||
case EINVAL:
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"invalid config; all top-level vdevs must "
|
||||
|
@ -928,6 +928,39 @@ zfs_send_progress(zfs_handle_t *zhp, int fd, uint64_t *bytes_written,
|
||||
return (0);
|
||||
}
|
||||
|
||||
static volatile boolean_t send_progress_thread_signal_duetotimer;
|
||||
static void
|
||||
send_progress_thread_act(int sig, siginfo_t *info, void *ucontext)
|
||||
{
|
||||
(void) sig, (void) ucontext;
|
||||
send_progress_thread_signal_duetotimer = info->si_code == SI_TIMER;
|
||||
}
|
||||
|
||||
struct timer_desirability {
|
||||
timer_t timer;
|
||||
boolean_t desired;
|
||||
};
|
||||
static void
|
||||
timer_delete_cleanup(void *timer)
|
||||
{
|
||||
struct timer_desirability *td = timer;
|
||||
if (td->desired)
|
||||
timer_delete(td->timer);
|
||||
}
|
||||
|
||||
#ifdef SIGINFO
|
||||
#define SEND_PROGRESS_THREAD_PARENT_BLOCK_SIGINFO sigaddset(&new, SIGINFO)
|
||||
#else
|
||||
#define SEND_PROGRESS_THREAD_PARENT_BLOCK_SIGINFO
|
||||
#endif
|
||||
#define SEND_PROGRESS_THREAD_PARENT_BLOCK(old) { \
|
||||
sigset_t new; \
|
||||
sigemptyset(&new); \
|
||||
sigaddset(&new, SIGUSR1); \
|
||||
SEND_PROGRESS_THREAD_PARENT_BLOCK_SIGINFO; \
|
||||
pthread_sigmask(SIG_BLOCK, &new, old); \
|
||||
}
|
||||
|
||||
static void *
|
||||
send_progress_thread(void *arg)
|
||||
{
|
||||
@ -941,6 +974,26 @@ send_progress_thread(void *arg)
|
||||
struct tm tm;
|
||||
int err;
|
||||
|
||||
const struct sigaction signal_action =
|
||||
{.sa_sigaction = send_progress_thread_act, .sa_flags = SA_SIGINFO};
|
||||
struct sigevent timer_cfg =
|
||||
{.sigev_notify = SIGEV_SIGNAL, .sigev_signo = SIGUSR1};
|
||||
const struct itimerspec timer_time =
|
||||
{.it_value = {.tv_sec = 1}, .it_interval = {.tv_sec = 1}};
|
||||
struct timer_desirability timer = {};
|
||||
|
||||
sigaction(SIGUSR1, &signal_action, NULL);
|
||||
#ifdef SIGINFO
|
||||
sigaction(SIGINFO, &signal_action, NULL);
|
||||
#endif
|
||||
|
||||
if ((timer.desired = pa->pa_progress || pa->pa_astitle)) {
|
||||
if (timer_create(CLOCK_MONOTONIC, &timer_cfg, &timer.timer))
|
||||
return ((void *)(uintptr_t)errno);
|
||||
(void) timer_settime(timer.timer, 0, &timer_time, NULL);
|
||||
}
|
||||
pthread_cleanup_push(timer_delete_cleanup, &timer);
|
||||
|
||||
if (!pa->pa_parsable && pa->pa_progress) {
|
||||
(void) fprintf(stderr,
|
||||
"TIME %s %sSNAPSHOT %s\n",
|
||||
@ -953,12 +1006,12 @@ send_progress_thread(void *arg)
|
||||
* Print the progress from ZFS_IOC_SEND_PROGRESS every second.
|
||||
*/
|
||||
for (;;) {
|
||||
(void) sleep(1);
|
||||
pause();
|
||||
if ((err = zfs_send_progress(zhp, pa->pa_fd, &bytes,
|
||||
&blocks)) != 0) {
|
||||
if (err == EINTR || err == ENOENT)
|
||||
return ((void *)0);
|
||||
return ((void *)(uintptr_t)err);
|
||||
err = 0;
|
||||
pthread_exit(((void *)(uintptr_t)err));
|
||||
}
|
||||
|
||||
(void) time(&t);
|
||||
@ -991,21 +1044,25 @@ send_progress_thread(void *arg)
|
||||
(void) fprintf(stderr, "%02d:%02d:%02d\t%llu\t%s\n",
|
||||
tm.tm_hour, tm.tm_min, tm.tm_sec,
|
||||
(u_longlong_t)bytes, zhp->zfs_name);
|
||||
} else if (pa->pa_progress) {
|
||||
} else if (pa->pa_progress ||
|
||||
!send_progress_thread_signal_duetotimer) {
|
||||
zfs_nicebytes(bytes, buf, sizeof (buf));
|
||||
(void) fprintf(stderr, "%02d:%02d:%02d %5s %s\n",
|
||||
tm.tm_hour, tm.tm_min, tm.tm_sec,
|
||||
buf, zhp->zfs_name);
|
||||
}
|
||||
}
|
||||
pthread_cleanup_pop(B_TRUE);
|
||||
}
|
||||
|
||||
static boolean_t
|
||||
send_progress_thread_exit(libzfs_handle_t *hdl, pthread_t ptid)
|
||||
send_progress_thread_exit(
|
||||
libzfs_handle_t *hdl, pthread_t ptid, sigset_t *oldmask)
|
||||
{
|
||||
void *status = NULL;
|
||||
(void) pthread_cancel(ptid);
|
||||
(void) pthread_join(ptid, &status);
|
||||
pthread_sigmask(SIG_SETMASK, oldmask, NULL);
|
||||
int error = (int)(uintptr_t)status;
|
||||
if (error != 0 && status != PTHREAD_CANCELED)
|
||||
return (zfs_standard_error(hdl, error,
|
||||
@ -1199,7 +1256,8 @@ dump_snapshot(zfs_handle_t *zhp, void *arg)
|
||||
* If progress reporting is requested, spawn a new thread to
|
||||
* poll ZFS_IOC_SEND_PROGRESS at a regular interval.
|
||||
*/
|
||||
if (sdd->progress || sdd->progressastitle) {
|
||||
sigset_t oldmask;
|
||||
{
|
||||
pa.pa_zhp = zhp;
|
||||
pa.pa_fd = sdd->outfd;
|
||||
pa.pa_parsable = sdd->parsable;
|
||||
@ -1214,13 +1272,13 @@ dump_snapshot(zfs_handle_t *zhp, void *arg)
|
||||
zfs_close(zhp);
|
||||
return (err);
|
||||
}
|
||||
SEND_PROGRESS_THREAD_PARENT_BLOCK(&oldmask);
|
||||
}
|
||||
|
||||
err = dump_ioctl(zhp, sdd->prevsnap, sdd->prevsnap_obj,
|
||||
fromorigin, sdd->outfd, flags, sdd->debugnv);
|
||||
|
||||
if ((sdd->progress || sdd->progressastitle) &&
|
||||
send_progress_thread_exit(zhp->zfs_hdl, tid))
|
||||
if (send_progress_thread_exit(zhp->zfs_hdl, tid, &oldmask))
|
||||
return (-1);
|
||||
}
|
||||
|
||||
@ -1562,8 +1620,9 @@ estimate_size(zfs_handle_t *zhp, const char *from, int fd, sendflags_t *flags,
|
||||
progress_arg_t pa = { 0 };
|
||||
int err = 0;
|
||||
pthread_t ptid;
|
||||
sigset_t oldmask;
|
||||
|
||||
if (flags->progress || flags->progressastitle) {
|
||||
{
|
||||
pa.pa_zhp = zhp;
|
||||
pa.pa_fd = fd;
|
||||
pa.pa_parsable = flags->parsable;
|
||||
@ -1577,6 +1636,7 @@ estimate_size(zfs_handle_t *zhp, const char *from, int fd, sendflags_t *flags,
|
||||
return (zfs_error(zhp->zfs_hdl,
|
||||
EZFS_THREADCREATEFAILED, errbuf));
|
||||
}
|
||||
SEND_PROGRESS_THREAD_PARENT_BLOCK(&oldmask);
|
||||
}
|
||||
|
||||
err = lzc_send_space_resume_redacted(zhp->zfs_name, from,
|
||||
@ -1584,8 +1644,7 @@ estimate_size(zfs_handle_t *zhp, const char *from, int fd, sendflags_t *flags,
|
||||
redactbook, fd, &size);
|
||||
*sizep = size;
|
||||
|
||||
if ((flags->progress || flags->progressastitle) &&
|
||||
send_progress_thread_exit(zhp->zfs_hdl, ptid))
|
||||
if (send_progress_thread_exit(zhp->zfs_hdl, ptid, &oldmask))
|
||||
return (-1);
|
||||
|
||||
if (!flags->progress && !flags->parsable)
|
||||
@ -1876,11 +1935,12 @@ zfs_send_resume_impl_cb_impl(libzfs_handle_t *hdl, sendflags_t *flags,
|
||||
if (!flags->dryrun) {
|
||||
progress_arg_t pa = { 0 };
|
||||
pthread_t tid;
|
||||
sigset_t oldmask;
|
||||
/*
|
||||
* If progress reporting is requested, spawn a new thread to
|
||||
* poll ZFS_IOC_SEND_PROGRESS at a regular interval.
|
||||
*/
|
||||
if (flags->progress || flags->progressastitle) {
|
||||
{
|
||||
pa.pa_zhp = zhp;
|
||||
pa.pa_fd = outfd;
|
||||
pa.pa_parsable = flags->parsable;
|
||||
@ -1898,6 +1958,7 @@ zfs_send_resume_impl_cb_impl(libzfs_handle_t *hdl, sendflags_t *flags,
|
||||
zfs_close(zhp);
|
||||
return (error);
|
||||
}
|
||||
SEND_PROGRESS_THREAD_PARENT_BLOCK(&oldmask);
|
||||
}
|
||||
|
||||
error = lzc_send_resume_redacted(zhp->zfs_name, fromname, outfd,
|
||||
@ -1905,8 +1966,7 @@ zfs_send_resume_impl_cb_impl(libzfs_handle_t *hdl, sendflags_t *flags,
|
||||
if (redact_book != NULL)
|
||||
free(redact_book);
|
||||
|
||||
if ((flags->progressastitle || flags->progress) &&
|
||||
send_progress_thread_exit(hdl, tid)) {
|
||||
if (send_progress_thread_exit(hdl, tid, &oldmask)) {
|
||||
zfs_close(zhp);
|
||||
return (-1);
|
||||
}
|
||||
@ -2691,7 +2751,8 @@ zfs_send_one_cb_impl(zfs_handle_t *zhp, const char *from, int fd,
|
||||
* If progress reporting is requested, spawn a new thread to poll
|
||||
* ZFS_IOC_SEND_PROGRESS at a regular interval.
|
||||
*/
|
||||
if (flags->progress || flags->progressastitle) {
|
||||
sigset_t oldmask;
|
||||
{
|
||||
pa.pa_zhp = zhp;
|
||||
pa.pa_fd = fd;
|
||||
pa.pa_parsable = flags->parsable;
|
||||
@ -2708,13 +2769,13 @@ zfs_send_one_cb_impl(zfs_handle_t *zhp, const char *from, int fd,
|
||||
return (zfs_error(zhp->zfs_hdl,
|
||||
EZFS_THREADCREATEFAILED, errbuf));
|
||||
}
|
||||
SEND_PROGRESS_THREAD_PARENT_BLOCK(&oldmask);
|
||||
}
|
||||
|
||||
err = lzc_send_redacted(name, from, fd,
|
||||
lzc_flags_from_sendflags(flags), redactbook);
|
||||
|
||||
if ((flags->progress || flags->progressastitle) &&
|
||||
send_progress_thread_exit(hdl, ptid))
|
||||
if (send_progress_thread_exit(hdl, ptid, &oldmask))
|
||||
return (-1);
|
||||
|
||||
if (err == 0 && (flags->props || flags->holds || flags->backup)) {
|
||||
|
@ -15,7 +15,7 @@
|
||||
.\" own identifying information:
|
||||
.\" Portions Copyright [yyyy] [name of copyright owner]
|
||||
.\"
|
||||
.Dd January 10, 2023
|
||||
.Dd July 21, 2023
|
||||
.Dt ZFS 4
|
||||
.Os
|
||||
.
|
||||
@ -239,6 +239,11 @@ relative to the pool.
|
||||
Make some blocks above a certain size be gang blocks.
|
||||
This option is used by the test suite to facilitate testing.
|
||||
.
|
||||
.It Sy metaslab_force_ganging_pct Ns = Ns Sy 3 Ns % Pq uint
|
||||
For blocks that could be forced to be a gang block (due to
|
||||
.Sy metaslab_force_ganging ) ,
|
||||
force this many of them to be gang blocks.
|
||||
.
|
||||
.It Sy zfs_ddt_zap_default_bs Ns = Ns Sy 15 Po 32 KiB Pc Pq int
|
||||
Default DDT ZAP data block size as a power of 2. Note that changing this after
|
||||
creating a DDT on the pool will not affect existing DDTs, only newly created
|
||||
@ -519,9 +524,6 @@ However, this is limited by
|
||||
Maximum micro ZAP size.
|
||||
A micro ZAP is upgraded to a fat ZAP, once it grows beyond the specified size.
|
||||
.
|
||||
.It Sy zfetch_array_rd_sz Ns = Ns Sy 1048576 Ns B Po 1 MiB Pc Pq u64
|
||||
If prefetching is enabled, disable prefetching for reads larger than this size.
|
||||
.
|
||||
.It Sy zfetch_min_distance Ns = Ns Sy 4194304 Ns B Po 4 MiB Pc Pq uint
|
||||
Min bytes to prefetch per stream.
|
||||
Prefetch distance starts from the demand access size and quickly grows to
|
||||
|
@ -29,7 +29,7 @@
|
||||
.\" Copyright 2018 Nexenta Systems, Inc.
|
||||
.\" Copyright 2019 Joyent, Inc.
|
||||
.\"
|
||||
.Dd January 12, 2023
|
||||
.Dd July 27, 2023
|
||||
.Dt ZFS-SEND 8
|
||||
.Os
|
||||
.
|
||||
@ -297,6 +297,12 @@ This flag can only be used in conjunction with
|
||||
.It Fl v , -verbose
|
||||
Print verbose information about the stream package generated.
|
||||
This information includes a per-second report of how much data has been sent.
|
||||
The same report can be requested by sending
|
||||
.Dv SIGINFO
|
||||
or
|
||||
.Dv SIGUSR1 ,
|
||||
regardless of
|
||||
.Fl v .
|
||||
.Pp
|
||||
The format of the stream is committed.
|
||||
You will be able to receive your streams on future versions of ZFS.
|
||||
@ -433,6 +439,12 @@ and the verbose output goes to standard error
|
||||
.It Fl v , -verbose
|
||||
Print verbose information about the stream package generated.
|
||||
This information includes a per-second report of how much data has been sent.
|
||||
The same report can be requested by sending
|
||||
.Dv SIGINFO
|
||||
or
|
||||
.Dv SIGUSR1 ,
|
||||
regardless of
|
||||
.Fl v .
|
||||
.El
|
||||
.It Xo
|
||||
.Nm zfs
|
||||
@ -669,6 +681,10 @@ ones on the source, and are ready to be used, while the parent snapshot on the
|
||||
target contains none of the username and password data present on the source,
|
||||
because it was removed by the redacted send operation.
|
||||
.
|
||||
.Sh SIGNALS
|
||||
See
|
||||
.Fl v .
|
||||
.
|
||||
.Sh EXAMPLES
|
||||
.\" These are, respectively, examples 12, 13 from zfs.8
|
||||
.\" Make sure to update them bidirectionally
|
||||
|
@ -26,7 +26,7 @@
|
||||
.\" Copyright 2017 Nexenta Systems, Inc.
|
||||
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
|
||||
.\"
|
||||
.Dd May 27, 2021
|
||||
.Dd July 11, 2023
|
||||
.Dt ZPOOL-EVENTS 8
|
||||
.Os
|
||||
.
|
||||
@ -305,10 +305,6 @@ The time when a given I/O request was submitted.
|
||||
The time required to service a given I/O request.
|
||||
.It Sy prev_state
|
||||
The previous state of the vdev.
|
||||
.It Sy cksum_expected
|
||||
The expected checksum value for the block.
|
||||
.It Sy cksum_actual
|
||||
The actual checksum value for an errant block.
|
||||
.It Sy cksum_algorithm
|
||||
Checksum algorithm used.
|
||||
See
|
||||
@ -362,23 +358,6 @@ Like
|
||||
but contains
|
||||
.Pq Ar good data No & ~( Ns Ar bad data ) ;
|
||||
that is, the bits set in the good data which are cleared in the bad data.
|
||||
.It Sy bad_set_histogram
|
||||
If this field exists, it is an array of counters.
|
||||
Each entry counts bits set in a particular bit of a big-endian uint64 type.
|
||||
The first entry counts bits
|
||||
set in the high-order bit of the first byte, the 9th byte, etc, and the last
|
||||
entry counts bits set of the low-order bit of the 8th byte, the 16th byte, etc.
|
||||
This information is useful for observing a stuck bit in a parallel data path,
|
||||
such as IDE or parallel SCSI.
|
||||
.It Sy bad_cleared_histogram
|
||||
If this field exists, it is an array of counters.
|
||||
Each entry counts bit clears in a particular bit of a big-endian uint64 type.
|
||||
The first entry counts bits
|
||||
clears of the high-order bit of the first byte, the 9th byte, etc, and the
|
||||
last entry counts clears of the low-order bit of the 8th byte, the 16th byte,
|
||||
etc.
|
||||
This information is useful for observing a stuck bit in a parallel data
|
||||
path, such as IDE or parallel SCSI.
|
||||
.El
|
||||
.
|
||||
.Sh I/O STAGES
|
||||
|
@ -461,6 +461,7 @@ ZFS_OBJS_OS := \
|
||||
zpl_ctldir.o \
|
||||
zpl_export.o \
|
||||
zpl_file.o \
|
||||
zpl_file_range.o \
|
||||
zpl_inode.o \
|
||||
zpl_super.o \
|
||||
zpl_xattr.o \
|
||||
|
@ -1869,10 +1869,8 @@ zfs_readdir(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, int *eofp,
|
||||
|
||||
ASSERT3S(outcount, <=, bufsize);
|
||||
|
||||
/* Prefetch znode */
|
||||
if (prefetch)
|
||||
dmu_prefetch(os, objnum, 0, 0, 0,
|
||||
ZIO_PRIORITY_SYNC_READ);
|
||||
dmu_prefetch_dnode(os, objnum, ZIO_PRIORITY_SYNC_READ);
|
||||
|
||||
/*
|
||||
* Move to the next entry, fill in the previous offset.
|
||||
@ -6268,7 +6266,8 @@ zfs_freebsd_copy_file_range(struct vop_copy_file_range_args *ap)
|
||||
goto bad_write_fallback;
|
||||
}
|
||||
} else {
|
||||
#if __FreeBSD_version >= 1400086
|
||||
#if (__FreeBSD_version >= 1302506 && __FreeBSD_version < 1400000) || \
|
||||
__FreeBSD_version >= 1400086
|
||||
vn_lock_pair(invp, false, LK_EXCLUSIVE, outvp, false,
|
||||
LK_EXCLUSIVE);
|
||||
#else
|
||||
@ -6294,7 +6293,8 @@ zfs_freebsd_copy_file_range(struct vop_copy_file_range_args *ap)
|
||||
|
||||
error = zfs_clone_range(VTOZ(invp), ap->a_inoffp, VTOZ(outvp),
|
||||
ap->a_outoffp, &len, ap->a_outcred);
|
||||
if (error == EXDEV)
|
||||
if (error == EXDEV || error == EAGAIN || error == EINVAL ||
|
||||
error == EOPNOTSUPP)
|
||||
goto bad_locked_fallback;
|
||||
*ap->a_lenp = (size_t)len;
|
||||
out_locked:
|
||||
|
@ -624,6 +624,7 @@ static struct ctl_table spl_table[] = {
|
||||
.mode = 0644,
|
||||
.proc_handler = &proc_dohostid,
|
||||
},
|
||||
#ifdef HAVE_REGISTER_SYSCTL_TABLE
|
||||
{
|
||||
.procname = "kmem",
|
||||
.mode = 0555,
|
||||
@ -634,9 +635,11 @@ static struct ctl_table spl_table[] = {
|
||||
.mode = 0555,
|
||||
.child = spl_kstat_table,
|
||||
},
|
||||
#endif
|
||||
{},
|
||||
};
|
||||
|
||||
#ifdef HAVE_REGISTER_SYSCTL_TABLE
|
||||
static struct ctl_table spl_dir[] = {
|
||||
{
|
||||
.procname = "spl",
|
||||
@ -648,21 +651,38 @@ static struct ctl_table spl_dir[] = {
|
||||
|
||||
static struct ctl_table spl_root[] = {
|
||||
{
|
||||
.procname = "kernel",
|
||||
.mode = 0555,
|
||||
.child = spl_dir,
|
||||
.procname = "kernel",
|
||||
.mode = 0555,
|
||||
.child = spl_dir,
|
||||
},
|
||||
{}
|
||||
};
|
||||
#endif
|
||||
|
||||
int
|
||||
spl_proc_init(void)
|
||||
{
|
||||
int rc = 0;
|
||||
|
||||
#ifdef HAVE_REGISTER_SYSCTL_TABLE
|
||||
spl_header = register_sysctl_table(spl_root);
|
||||
if (spl_header == NULL)
|
||||
return (-EUNATCH);
|
||||
#else
|
||||
spl_header = register_sysctl("kernel/spl", spl_table);
|
||||
if (spl_header == NULL)
|
||||
return (-EUNATCH);
|
||||
|
||||
if (register_sysctl("kernel/spl/kmem", spl_kmem_table) == NULL) {
|
||||
rc = -EUNATCH;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (register_sysctl("kernel/spl/kstat", spl_kstat_table) == NULL) {
|
||||
rc = -EUNATCH;
|
||||
goto out;
|
||||
}
|
||||
#endif
|
||||
|
||||
proc_spl = proc_mkdir("spl", NULL);
|
||||
if (proc_spl == NULL) {
|
||||
|
@ -80,9 +80,22 @@ typedef struct dio_request {
|
||||
|
||||
static unsigned int zfs_vdev_failfast_mask = 1;
|
||||
|
||||
#ifdef HAVE_BLK_MODE_T
|
||||
static blk_mode_t
|
||||
#else
|
||||
static fmode_t
|
||||
#endif
|
||||
vdev_bdev_mode(spa_mode_t spa_mode)
|
||||
{
|
||||
#ifdef HAVE_BLK_MODE_T
|
||||
blk_mode_t mode = 0;
|
||||
|
||||
if (spa_mode & SPA_MODE_READ)
|
||||
mode |= BLK_OPEN_READ;
|
||||
|
||||
if (spa_mode & SPA_MODE_WRITE)
|
||||
mode |= BLK_OPEN_WRITE;
|
||||
#else
|
||||
fmode_t mode = 0;
|
||||
|
||||
if (spa_mode & SPA_MODE_READ)
|
||||
@ -90,6 +103,7 @@ vdev_bdev_mode(spa_mode_t spa_mode)
|
||||
|
||||
if (spa_mode & SPA_MODE_WRITE)
|
||||
mode |= FMODE_WRITE;
|
||||
#endif
|
||||
|
||||
return (mode);
|
||||
}
|
||||
@ -197,12 +211,47 @@ vdev_disk_kobj_evt_post(vdev_t *v)
|
||||
}
|
||||
}
|
||||
|
||||
#if !defined(HAVE_BLKDEV_GET_BY_PATH_4ARG)
|
||||
/*
|
||||
* Define a dummy struct blk_holder_ops for kernel versions
|
||||
* prior to 6.5.
|
||||
*/
|
||||
struct blk_holder_ops {};
|
||||
#endif
|
||||
|
||||
static struct block_device *
|
||||
vdev_blkdev_get_by_path(const char *path, spa_mode_t mode, void *holder,
|
||||
const struct blk_holder_ops *hops)
|
||||
{
|
||||
#ifdef HAVE_BLKDEV_GET_BY_PATH_4ARG
|
||||
return (blkdev_get_by_path(path,
|
||||
vdev_bdev_mode(mode) | BLK_OPEN_EXCL, holder, hops));
|
||||
#else
|
||||
return (blkdev_get_by_path(path,
|
||||
vdev_bdev_mode(mode) | FMODE_EXCL, holder));
|
||||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
vdev_blkdev_put(struct block_device *bdev, spa_mode_t mode, void *holder)
|
||||
{
|
||||
#ifdef HAVE_BLKDEV_PUT_HOLDER
|
||||
return (blkdev_put(bdev, holder));
|
||||
#else
|
||||
return (blkdev_put(bdev, vdev_bdev_mode(mode) | FMODE_EXCL));
|
||||
#endif
|
||||
}
|
||||
|
||||
static int
|
||||
vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
|
||||
uint64_t *logical_ashift, uint64_t *physical_ashift)
|
||||
{
|
||||
struct block_device *bdev;
|
||||
#ifdef HAVE_BLK_MODE_T
|
||||
blk_mode_t mode = vdev_bdev_mode(spa_mode(v->vdev_spa));
|
||||
#else
|
||||
fmode_t mode = vdev_bdev_mode(spa_mode(v->vdev_spa));
|
||||
#endif
|
||||
hrtime_t timeout = MSEC2NSEC(zfs_vdev_open_timeout_ms);
|
||||
vdev_disk_t *vd;
|
||||
|
||||
@ -252,15 +301,15 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
|
||||
reread_part = B_TRUE;
|
||||
}
|
||||
|
||||
blkdev_put(bdev, mode | FMODE_EXCL);
|
||||
vdev_blkdev_put(bdev, mode, zfs_vdev_holder);
|
||||
}
|
||||
|
||||
if (reread_part) {
|
||||
bdev = blkdev_get_by_path(disk_name, mode | FMODE_EXCL,
|
||||
zfs_vdev_holder);
|
||||
bdev = vdev_blkdev_get_by_path(disk_name, mode,
|
||||
zfs_vdev_holder, NULL);
|
||||
if (!IS_ERR(bdev)) {
|
||||
int error = vdev_bdev_reread_part(bdev);
|
||||
blkdev_put(bdev, mode | FMODE_EXCL);
|
||||
vdev_blkdev_put(bdev, mode, zfs_vdev_holder);
|
||||
if (error == 0) {
|
||||
timeout = MSEC2NSEC(
|
||||
zfs_vdev_open_timeout_ms * 2);
|
||||
@ -305,8 +354,8 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
|
||||
hrtime_t start = gethrtime();
|
||||
bdev = ERR_PTR(-ENXIO);
|
||||
while (IS_ERR(bdev) && ((gethrtime() - start) < timeout)) {
|
||||
bdev = blkdev_get_by_path(v->vdev_path, mode | FMODE_EXCL,
|
||||
zfs_vdev_holder);
|
||||
bdev = vdev_blkdev_get_by_path(v->vdev_path, mode,
|
||||
zfs_vdev_holder, NULL);
|
||||
if (unlikely(PTR_ERR(bdev) == -ENOENT)) {
|
||||
/*
|
||||
* There is no point of waiting since device is removed
|
||||
@ -382,8 +431,8 @@ vdev_disk_close(vdev_t *v)
|
||||
return;
|
||||
|
||||
if (vd->vd_bdev != NULL) {
|
||||
blkdev_put(vd->vd_bdev,
|
||||
vdev_bdev_mode(spa_mode(v->vdev_spa)) | FMODE_EXCL);
|
||||
vdev_blkdev_put(vd->vd_bdev, spa_mode(v->vdev_spa),
|
||||
zfs_vdev_holder);
|
||||
}
|
||||
|
||||
rw_destroy(&vd->vd_lock);
|
||||
|
@ -478,17 +478,19 @@ zfsctl_is_snapdir(struct inode *ip)
|
||||
*/
|
||||
static struct inode *
|
||||
zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id,
|
||||
const struct file_operations *fops, const struct inode_operations *ops)
|
||||
const struct file_operations *fops, const struct inode_operations *ops,
|
||||
uint64_t creation)
|
||||
{
|
||||
inode_timespec_t now;
|
||||
struct inode *ip;
|
||||
znode_t *zp;
|
||||
inode_timespec_t now = {.tv_sec = creation};
|
||||
|
||||
ip = new_inode(zfsvfs->z_sb);
|
||||
if (ip == NULL)
|
||||
return (NULL);
|
||||
|
||||
now = current_time(ip);
|
||||
if (!creation)
|
||||
now = current_time(ip);
|
||||
zp = ITOZ(ip);
|
||||
ASSERT3P(zp->z_dirlocks, ==, NULL);
|
||||
ASSERT3P(zp->z_acl_cached, ==, NULL);
|
||||
@ -552,14 +554,28 @@ zfsctl_inode_lookup(zfsvfs_t *zfsvfs, uint64_t id,
|
||||
const struct file_operations *fops, const struct inode_operations *ops)
|
||||
{
|
||||
struct inode *ip = NULL;
|
||||
uint64_t creation = 0;
|
||||
dsl_dataset_t *snap_ds;
|
||||
dsl_pool_t *pool;
|
||||
|
||||
while (ip == NULL) {
|
||||
ip = ilookup(zfsvfs->z_sb, (unsigned long)id);
|
||||
if (ip)
|
||||
break;
|
||||
|
||||
if (id <= ZFSCTL_INO_SNAPDIRS && !creation) {
|
||||
pool = dmu_objset_pool(zfsvfs->z_os);
|
||||
dsl_pool_config_enter(pool, FTAG);
|
||||
if (!dsl_dataset_hold_obj(pool,
|
||||
ZFSCTL_INO_SNAPDIRS - id, FTAG, &snap_ds)) {
|
||||
creation = dsl_get_creation(snap_ds);
|
||||
dsl_dataset_rele(snap_ds, FTAG);
|
||||
}
|
||||
dsl_pool_config_exit(pool, FTAG);
|
||||
}
|
||||
|
||||
/* May fail due to concurrent zfsctl_inode_alloc() */
|
||||
ip = zfsctl_inode_alloc(zfsvfs, id, fops, ops);
|
||||
ip = zfsctl_inode_alloc(zfsvfs, id, fops, ops, creation);
|
||||
}
|
||||
|
||||
return (ip);
|
||||
@ -581,7 +597,7 @@ zfsctl_create(zfsvfs_t *zfsvfs)
|
||||
ASSERT(zfsvfs->z_ctldir == NULL);
|
||||
|
||||
zfsvfs->z_ctldir = zfsctl_inode_alloc(zfsvfs, ZFSCTL_INO_ROOT,
|
||||
&zpl_fops_root, &zpl_ops_root);
|
||||
&zpl_fops_root, &zpl_ops_root, 0);
|
||||
if (zfsvfs->z_ctldir == NULL)
|
||||
return (SET_ERROR(ENOENT));
|
||||
|
||||
|
@ -1662,6 +1662,7 @@ zfs_umount(struct super_block *sb)
|
||||
}
|
||||
|
||||
zfsvfs_free(zfsvfs);
|
||||
sb->s_fs_info = NULL;
|
||||
return (0);
|
||||
}
|
||||
|
||||
@ -2091,6 +2092,9 @@ zfs_init(void)
|
||||
zfs_znode_init();
|
||||
dmu_objset_register_type(DMU_OST_ZFS, zpl_get_file_info);
|
||||
register_filesystem(&zpl_fs_type);
|
||||
#ifdef HAVE_VFS_FILE_OPERATIONS_EXTEND
|
||||
register_fo_extend(&zpl_file_operations);
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
@ -2101,6 +2105,9 @@ zfs_fini(void)
|
||||
*/
|
||||
taskq_wait(system_delay_taskq);
|
||||
taskq_wait(system_taskq);
|
||||
#ifdef HAVE_VFS_FILE_OPERATIONS_EXTEND
|
||||
unregister_fo_extend(&zpl_file_operations);
|
||||
#endif
|
||||
unregister_filesystem(&zpl_fs_type);
|
||||
zfs_znode_fini();
|
||||
zfsctl_fini();
|
||||
|
@ -186,7 +186,7 @@ zfs_open(struct inode *ip, int mode, int flag, cred_t *cr)
|
||||
return (error);
|
||||
|
||||
/* Honor ZFS_APPENDONLY file attribute */
|
||||
if ((mode & FMODE_WRITE) && (zp->z_pflags & ZFS_APPENDONLY) &&
|
||||
if (blk_mode_is_open_write(mode) && (zp->z_pflags & ZFS_APPENDONLY) &&
|
||||
((flag & O_APPEND) == 0)) {
|
||||
zfs_exit(zfsvfs, FTAG);
|
||||
return (SET_ERROR(EPERM));
|
||||
@ -1610,11 +1610,8 @@ zfs_readdir(struct inode *ip, zpl_dir_context_t *ctx, cred_t *cr)
|
||||
if (done)
|
||||
break;
|
||||
|
||||
/* Prefetch znode */
|
||||
if (prefetch) {
|
||||
dmu_prefetch(os, objnum, 0, 0, 0,
|
||||
ZIO_PRIORITY_SYNC_READ);
|
||||
}
|
||||
if (prefetch)
|
||||
dmu_prefetch_dnode(os, objnum, ZIO_PRIORITY_SYNC_READ);
|
||||
|
||||
/*
|
||||
* Move to the next entry, fill in the previous offset.
|
||||
|
@ -415,7 +415,11 @@ zfs_inode_set_ops(zfsvfs_t *zfsvfs, struct inode *ip)
|
||||
switch (ip->i_mode & S_IFMT) {
|
||||
case S_IFREG:
|
||||
ip->i_op = &zpl_inode_operations;
|
||||
#ifdef HAVE_VFS_FILE_OPERATIONS_EXTEND
|
||||
ip->i_fop = &zpl_file_operations.kabi_fops;
|
||||
#else
|
||||
ip->i_fop = &zpl_file_operations;
|
||||
#endif
|
||||
ip->i_mapping->a_ops = &zpl_address_space_operations;
|
||||
break;
|
||||
|
||||
@ -455,7 +459,11 @@ zfs_inode_set_ops(zfsvfs_t *zfsvfs, struct inode *ip)
|
||||
/* Assume the inode is a file and attempt to continue */
|
||||
ip->i_mode = S_IFREG | 0644;
|
||||
ip->i_op = &zpl_inode_operations;
|
||||
#ifdef HAVE_VFS_FILE_OPERATIONS_EXTEND
|
||||
ip->i_fop = &zpl_file_operations.kabi_fops;
|
||||
#else
|
||||
ip->i_fop = &zpl_file_operations;
|
||||
#endif
|
||||
ip->i_mapping->a_ops = &zpl_address_space_operations;
|
||||
break;
|
||||
}
|
||||
|
@ -42,7 +42,7 @@
|
||||
static int
|
||||
zpl_common_open(struct inode *ip, struct file *filp)
|
||||
{
|
||||
if (filp->f_mode & FMODE_WRITE)
|
||||
if (blk_mode_is_open_write(filp->f_mode))
|
||||
return (-EACCES);
|
||||
|
||||
return (generic_file_open(ip, filp));
|
||||
|
@ -301,15 +301,10 @@ zpl_uio_init(zfs_uio_t *uio, struct kiocb *kiocb, struct iov_iter *to,
|
||||
#if defined(HAVE_VFS_IOV_ITER)
|
||||
zfs_uio_iov_iter_init(uio, to, pos, count, skip);
|
||||
#else
|
||||
#ifdef HAVE_IOV_ITER_TYPE
|
||||
zfs_uio_iovec_init(uio, to->iov, to->nr_segs, pos,
|
||||
iov_iter_type(to) & ITER_KVEC ? UIO_SYSSPACE : UIO_USERSPACE,
|
||||
zfs_uio_iovec_init(uio, zfs_uio_iter_iov(to), to->nr_segs, pos,
|
||||
zfs_uio_iov_iter_type(to) & ITER_KVEC ?
|
||||
UIO_SYSSPACE : UIO_USERSPACE,
|
||||
count, skip);
|
||||
#else
|
||||
zfs_uio_iovec_init(uio, to->iov, to->nr_segs, pos,
|
||||
to->type & ITER_KVEC ? UIO_SYSSPACE : UIO_USERSPACE,
|
||||
count, skip);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -1257,6 +1252,12 @@ zpl_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
||||
return (zpl_ioctl_getdosflags(filp, (void *)arg));
|
||||
case ZFS_IOC_SETDOSFLAGS:
|
||||
return (zpl_ioctl_setdosflags(filp, (void *)arg));
|
||||
case ZFS_IOC_COMPAT_FICLONE:
|
||||
return (zpl_ioctl_ficlone(filp, (void *)arg));
|
||||
case ZFS_IOC_COMPAT_FICLONERANGE:
|
||||
return (zpl_ioctl_ficlonerange(filp, (void *)arg));
|
||||
case ZFS_IOC_COMPAT_FIDEDUPERANGE:
|
||||
return (zpl_ioctl_fideduperange(filp, (void *)arg));
|
||||
default:
|
||||
return (-ENOTTY);
|
||||
}
|
||||
@ -1283,7 +1284,6 @@ zpl_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
||||
}
|
||||
#endif /* CONFIG_COMPAT */
|
||||
|
||||
|
||||
const struct address_space_operations zpl_address_space_operations = {
|
||||
#ifdef HAVE_VFS_READPAGES
|
||||
.readpages = zpl_readpages,
|
||||
@ -1306,7 +1306,12 @@ const struct address_space_operations zpl_address_space_operations = {
|
||||
#endif
|
||||
};
|
||||
|
||||
#ifdef HAVE_VFS_FILE_OPERATIONS_EXTEND
|
||||
const struct file_operations_extend zpl_file_operations = {
|
||||
.kabi_fops = {
|
||||
#else
|
||||
const struct file_operations zpl_file_operations = {
|
||||
#endif
|
||||
.open = zpl_open,
|
||||
.release = zpl_release,
|
||||
.llseek = zpl_llseek,
|
||||
@ -1318,7 +1323,11 @@ const struct file_operations zpl_file_operations = {
|
||||
.read_iter = zpl_iter_read,
|
||||
.write_iter = zpl_iter_write,
|
||||
#ifdef HAVE_VFS_IOV_ITER
|
||||
#ifdef HAVE_COPY_SPLICE_READ
|
||||
.splice_read = copy_splice_read,
|
||||
#else
|
||||
.splice_read = generic_file_splice_read,
|
||||
#endif
|
||||
.splice_write = iter_file_splice_write,
|
||||
#endif
|
||||
#else
|
||||
@ -1333,6 +1342,18 @@ const struct file_operations zpl_file_operations = {
|
||||
.aio_fsync = zpl_aio_fsync,
|
||||
#endif
|
||||
.fallocate = zpl_fallocate,
|
||||
#ifdef HAVE_VFS_COPY_FILE_RANGE
|
||||
.copy_file_range = zpl_copy_file_range,
|
||||
#endif
|
||||
#ifdef HAVE_VFS_CLONE_FILE_RANGE
|
||||
.clone_file_range = zpl_clone_file_range,
|
||||
#endif
|
||||
#ifdef HAVE_VFS_REMAP_FILE_RANGE
|
||||
.remap_file_range = zpl_remap_file_range,
|
||||
#endif
|
||||
#ifdef HAVE_VFS_DEDUPE_FILE_RANGE
|
||||
.dedupe_file_range = zpl_dedupe_file_range,
|
||||
#endif
|
||||
#ifdef HAVE_FILE_FADVISE
|
||||
.fadvise = zpl_fadvise,
|
||||
#endif
|
||||
@ -1340,6 +1361,11 @@ const struct file_operations zpl_file_operations = {
|
||||
#ifdef CONFIG_COMPAT
|
||||
.compat_ioctl = zpl_compat_ioctl,
|
||||
#endif
|
||||
#ifdef HAVE_VFS_FILE_OPERATIONS_EXTEND
|
||||
}, /* kabi_fops */
|
||||
.copy_file_range = zpl_copy_file_range,
|
||||
.clone_file_range = zpl_clone_file_range,
|
||||
#endif
|
||||
};
|
||||
|
||||
const struct file_operations zpl_dir_file_operations = {
|
||||
|
272
sys/contrib/openzfs/module/os/linux/zfs/zpl_file_range.c
Normal file
272
sys/contrib/openzfs/module/os/linux/zfs/zpl_file_range.c
Normal file
@ -0,0 +1,272 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or https://opensource.org/licenses/CDDL-1.0.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2023, Klara Inc.
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
#include <linux/compat.h>
|
||||
#endif
|
||||
#include <linux/fs.h>
|
||||
#include <sys/file.h>
|
||||
#include <sys/zfs_znode.h>
|
||||
#include <sys/zfs_vnops.h>
|
||||
#include <sys/zfeature.h>
|
||||
|
||||
/*
|
||||
* Clone part of a file via block cloning.
|
||||
*
|
||||
* Note that we are not required to update file offsets; the kernel will take
|
||||
* care of that depending on how it was called.
|
||||
*/
|
||||
static ssize_t
|
||||
__zpl_clone_file_range(struct file *src_file, loff_t src_off,
|
||||
struct file *dst_file, loff_t dst_off, size_t len)
|
||||
{
|
||||
struct inode *src_i = file_inode(src_file);
|
||||
struct inode *dst_i = file_inode(dst_file);
|
||||
uint64_t src_off_o = (uint64_t)src_off;
|
||||
uint64_t dst_off_o = (uint64_t)dst_off;
|
||||
uint64_t len_o = (uint64_t)len;
|
||||
cred_t *cr = CRED();
|
||||
fstrans_cookie_t cookie;
|
||||
int err;
|
||||
|
||||
if (!spa_feature_is_enabled(
|
||||
dmu_objset_spa(ITOZSB(dst_i)->z_os), SPA_FEATURE_BLOCK_CLONING))
|
||||
return (-EOPNOTSUPP);
|
||||
|
||||
if (src_i != dst_i)
|
||||
spl_inode_lock_shared(src_i);
|
||||
spl_inode_lock(dst_i);
|
||||
|
||||
crhold(cr);
|
||||
cookie = spl_fstrans_mark();
|
||||
|
||||
err = -zfs_clone_range(ITOZ(src_i), &src_off_o, ITOZ(dst_i),
|
||||
&dst_off_o, &len_o, cr);
|
||||
|
||||
spl_fstrans_unmark(cookie);
|
||||
crfree(cr);
|
||||
|
||||
spl_inode_unlock(dst_i);
|
||||
if (src_i != dst_i)
|
||||
spl_inode_unlock_shared(src_i);
|
||||
|
||||
if (err < 0)
|
||||
return (err);
|
||||
|
||||
return ((ssize_t)len_o);
|
||||
}
|
||||
|
||||
#if defined(HAVE_VFS_COPY_FILE_RANGE) || \
|
||||
defined(HAVE_VFS_FILE_OPERATIONS_EXTEND)
|
||||
/*
|
||||
* Entry point for copy_file_range(). Copy len bytes from src_off in src_file
|
||||
* to dst_off in dst_file. We are permitted to do this however we like, so we
|
||||
* try to just clone the blocks, and if we can't support it, fall back to the
|
||||
* kernel's generic byte copy function.
|
||||
*/
|
||||
ssize_t
|
||||
zpl_copy_file_range(struct file *src_file, loff_t src_off,
|
||||
struct file *dst_file, loff_t dst_off, size_t len, unsigned int flags)
|
||||
{
|
||||
ssize_t ret;
|
||||
|
||||
if (flags != 0)
|
||||
return (-EINVAL);
|
||||
|
||||
/* Try to do it via zfs_clone_range() */
|
||||
ret = __zpl_clone_file_range(src_file, src_off,
|
||||
dst_file, dst_off, len);
|
||||
|
||||
#ifdef HAVE_VFS_GENERIC_COPY_FILE_RANGE
|
||||
/*
|
||||
* Since Linux 5.3 the filesystem driver is responsible for executing
|
||||
* an appropriate fallback, and a generic fallback function is provided.
|
||||
*/
|
||||
if (ret == -EOPNOTSUPP || ret == -EINVAL || ret == -EXDEV ||
|
||||
ret == -EAGAIN)
|
||||
ret = generic_copy_file_range(src_file, src_off, dst_file,
|
||||
dst_off, len, flags);
|
||||
#else
|
||||
/*
|
||||
* Before Linux 5.3 the filesystem has to return -EOPNOTSUPP to signal
|
||||
* to the kernel that it should fallback to a content copy.
|
||||
*/
|
||||
if (ret == -EINVAL || ret == -EXDEV || ret == -EAGAIN)
|
||||
ret = -EOPNOTSUPP;
|
||||
#endif /* HAVE_VFS_GENERIC_COPY_FILE_RANGE */
|
||||
|
||||
return (ret);
|
||||
}
|
||||
#endif /* HAVE_VFS_COPY_FILE_RANGE || HAVE_VFS_FILE_OPERATIONS_EXTEND */
|
||||
|
||||
#ifdef HAVE_VFS_REMAP_FILE_RANGE
|
||||
/*
|
||||
* Entry point for FICLONE/FICLONERANGE/FIDEDUPERANGE.
|
||||
*
|
||||
* FICLONE and FICLONERANGE are basically the same as copy_file_range(), except
|
||||
* that they must clone - they cannot fall back to copying. FICLONE is exactly
|
||||
* FICLONERANGE, for the entire file. We don't need to try to tell them apart;
|
||||
* the kernel will sort that out for us.
|
||||
*
|
||||
* FIDEDUPERANGE is for turning a non-clone into a clone, that is, compare the
|
||||
* range in both files and if they're the same, arrange for them to be backed
|
||||
* by the same storage.
|
||||
*/
|
||||
loff_t
|
||||
zpl_remap_file_range(struct file *src_file, loff_t src_off,
|
||||
struct file *dst_file, loff_t dst_off, loff_t len, unsigned int flags)
|
||||
{
|
||||
if (flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_CAN_SHORTEN))
|
||||
return (-EINVAL);
|
||||
|
||||
/*
|
||||
* REMAP_FILE_CAN_SHORTEN lets us know we can clone less than the given
|
||||
* range if we want. Its designed for filesystems that make data past
|
||||
* EOF available, and don't want it to be visible in both files. ZFS
|
||||
* doesn't do that, so we just turn the flag off.
|
||||
*/
|
||||
flags &= ~REMAP_FILE_CAN_SHORTEN;
|
||||
|
||||
if (flags & REMAP_FILE_DEDUP)
|
||||
/* No support for dedup yet */
|
||||
return (-EOPNOTSUPP);
|
||||
|
||||
/* Zero length means to clone everything to the end of the file */
|
||||
if (len == 0)
|
||||
len = i_size_read(file_inode(src_file)) - src_off;
|
||||
|
||||
return (__zpl_clone_file_range(src_file, src_off,
|
||||
dst_file, dst_off, len));
|
||||
}
|
||||
#endif /* HAVE_VFS_REMAP_FILE_RANGE */
|
||||
|
||||
#if defined(HAVE_VFS_CLONE_FILE_RANGE) || \
|
||||
defined(HAVE_VFS_FILE_OPERATIONS_EXTEND)
|
||||
/*
|
||||
* Entry point for FICLONE and FICLONERANGE, before Linux 4.20.
|
||||
*/
|
||||
int
|
||||
zpl_clone_file_range(struct file *src_file, loff_t src_off,
|
||||
struct file *dst_file, loff_t dst_off, uint64_t len)
|
||||
{
|
||||
/* Zero length means to clone everything to the end of the file */
|
||||
if (len == 0)
|
||||
len = i_size_read(file_inode(src_file)) - src_off;
|
||||
|
||||
return (__zpl_clone_file_range(src_file, src_off,
|
||||
dst_file, dst_off, len));
|
||||
}
|
||||
#endif /* HAVE_VFS_CLONE_FILE_RANGE || HAVE_VFS_FILE_OPERATIONS_EXTEND */
|
||||
|
||||
#ifdef HAVE_VFS_DEDUPE_FILE_RANGE
|
||||
/*
|
||||
* Entry point for FIDEDUPERANGE, before Linux 4.20.
|
||||
*/
|
||||
int
|
||||
zpl_dedupe_file_range(struct file *src_file, loff_t src_off,
|
||||
struct file *dst_file, loff_t dst_off, uint64_t len)
|
||||
{
|
||||
/* No support for dedup yet */
|
||||
return (-EOPNOTSUPP);
|
||||
}
|
||||
#endif /* HAVE_VFS_DEDUPE_FILE_RANGE */
|
||||
|
||||
/* Entry point for FICLONE, before Linux 4.5. */
|
||||
long
|
||||
zpl_ioctl_ficlone(struct file *dst_file, void *arg)
|
||||
{
|
||||
unsigned long sfd = (unsigned long)arg;
|
||||
|
||||
struct file *src_file = fget(sfd);
|
||||
if (src_file == NULL)
|
||||
return (-EBADF);
|
||||
|
||||
if (dst_file->f_op != src_file->f_op)
|
||||
return (-EXDEV);
|
||||
|
||||
size_t len = i_size_read(file_inode(src_file));
|
||||
|
||||
ssize_t ret =
|
||||
__zpl_clone_file_range(src_file, 0, dst_file, 0, len);
|
||||
|
||||
fput(src_file);
|
||||
|
||||
if (ret < 0) {
|
||||
if (ret == -EOPNOTSUPP)
|
||||
return (-ENOTTY);
|
||||
return (ret);
|
||||
}
|
||||
|
||||
if (ret != len)
|
||||
return (-EINVAL);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/* Entry point for FICLONERANGE, before Linux 4.5. */
|
||||
long
|
||||
zpl_ioctl_ficlonerange(struct file *dst_file, void __user *arg)
|
||||
{
|
||||
zfs_ioc_compat_file_clone_range_t fcr;
|
||||
|
||||
if (copy_from_user(&fcr, arg, sizeof (fcr)))
|
||||
return (-EFAULT);
|
||||
|
||||
struct file *src_file = fget(fcr.fcr_src_fd);
|
||||
if (src_file == NULL)
|
||||
return (-EBADF);
|
||||
|
||||
if (dst_file->f_op != src_file->f_op)
|
||||
return (-EXDEV);
|
||||
|
||||
size_t len = fcr.fcr_src_length;
|
||||
if (len == 0)
|
||||
len = i_size_read(file_inode(src_file)) - fcr.fcr_src_offset;
|
||||
|
||||
ssize_t ret = __zpl_clone_file_range(src_file, fcr.fcr_src_offset,
|
||||
dst_file, fcr.fcr_dest_offset, len);
|
||||
|
||||
fput(src_file);
|
||||
|
||||
if (ret < 0) {
|
||||
if (ret == -EOPNOTSUPP)
|
||||
return (-ENOTTY);
|
||||
return (ret);
|
||||
}
|
||||
|
||||
if (ret != len)
|
||||
return (-EINVAL);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/* Entry point for FIDEDUPERANGE, before Linux 4.5. */
|
||||
long
|
||||
zpl_ioctl_fideduperange(struct file *filp, void *arg)
|
||||
{
|
||||
(void) arg;
|
||||
|
||||
/* No support for dedup yet */
|
||||
return (-ENOTTY);
|
||||
}
|
@ -277,8 +277,6 @@ zpl_test_super(struct super_block *s, void *data)
|
||||
{
|
||||
zfsvfs_t *zfsvfs = s->s_fs_info;
|
||||
objset_t *os = data;
|
||||
int match;
|
||||
|
||||
/*
|
||||
* If the os doesn't match the z_os in the super_block, assume it is
|
||||
* not a match. Matching would imply a multimount of a dataset. It is
|
||||
@ -286,19 +284,7 @@ zpl_test_super(struct super_block *s, void *data)
|
||||
* that changes the z_os, e.g., rollback, where the match will be
|
||||
* missed, but in that case the user will get an EBUSY.
|
||||
*/
|
||||
if (zfsvfs == NULL || os != zfsvfs->z_os)
|
||||
return (0);
|
||||
|
||||
/*
|
||||
* If they do match, recheck with the lock held to prevent mounting the
|
||||
* wrong dataset since z_os can be stale when the teardown lock is held.
|
||||
*/
|
||||
if (zpl_enter(zfsvfs, FTAG) != 0)
|
||||
return (0);
|
||||
match = (os == zfsvfs->z_os);
|
||||
zpl_exit(zfsvfs, FTAG);
|
||||
|
||||
return (match);
|
||||
return (zfsvfs != NULL && os == zfsvfs->z_os);
|
||||
}
|
||||
|
||||
static struct super_block *
|
||||
@ -324,12 +310,35 @@ zpl_mount_impl(struct file_system_type *fs_type, int flags, zfs_mnt_t *zm)
|
||||
|
||||
s = sget(fs_type, zpl_test_super, set_anon_super, flags, os);
|
||||
|
||||
/*
|
||||
* Recheck with the lock held to prevent mounting the wrong dataset
|
||||
* since z_os can be stale when the teardown lock is held.
|
||||
*
|
||||
* We can't do this in zpl_test_super in since it's under spinlock and
|
||||
* also s_umount lock is not held there so it would race with
|
||||
* zfs_umount and zfsvfs can be freed.
|
||||
*/
|
||||
if (!IS_ERR(s) && s->s_fs_info != NULL) {
|
||||
zfsvfs_t *zfsvfs = s->s_fs_info;
|
||||
if (zpl_enter(zfsvfs, FTAG) == 0) {
|
||||
if (os != zfsvfs->z_os)
|
||||
err = -SET_ERROR(EBUSY);
|
||||
zpl_exit(zfsvfs, FTAG);
|
||||
} else {
|
||||
err = -SET_ERROR(EBUSY);
|
||||
}
|
||||
}
|
||||
dsl_dataset_long_rele(dmu_objset_ds(os), FTAG);
|
||||
dsl_dataset_rele(dmu_objset_ds(os), FTAG);
|
||||
|
||||
if (IS_ERR(s))
|
||||
return (ERR_CAST(s));
|
||||
|
||||
if (err) {
|
||||
deactivate_locked_super(s);
|
||||
return (ERR_PTR(err));
|
||||
}
|
||||
|
||||
if (s->s_root == NULL) {
|
||||
err = zpl_fill_super(s, zm, flags & SB_SILENT ? 1 : 0);
|
||||
if (err) {
|
||||
|
@ -671,7 +671,11 @@ zvol_request(struct request_queue *q, struct bio *bio)
|
||||
}
|
||||
|
||||
static int
|
||||
#ifdef HAVE_BLK_MODE_T
|
||||
zvol_open(struct gendisk *disk, blk_mode_t flag)
|
||||
#else
|
||||
zvol_open(struct block_device *bdev, fmode_t flag)
|
||||
#endif
|
||||
{
|
||||
zvol_state_t *zv;
|
||||
int error = 0;
|
||||
@ -686,10 +690,14 @@ zvol_open(struct block_device *bdev, fmode_t flag)
|
||||
/*
|
||||
* Obtain a copy of private_data under the zvol_state_lock to make
|
||||
* sure that either the result of zvol free code path setting
|
||||
* bdev->bd_disk->private_data to NULL is observed, or zvol_os_free()
|
||||
* disk->private_data to NULL is observed, or zvol_os_free()
|
||||
* is not called on this zv because of the positive zv_open_count.
|
||||
*/
|
||||
#ifdef HAVE_BLK_MODE_T
|
||||
zv = disk->private_data;
|
||||
#else
|
||||
zv = bdev->bd_disk->private_data;
|
||||
#endif
|
||||
if (zv == NULL) {
|
||||
rw_exit(&zvol_state_lock);
|
||||
return (SET_ERROR(-ENXIO));
|
||||
@ -769,14 +777,15 @@ zvol_open(struct block_device *bdev, fmode_t flag)
|
||||
}
|
||||
}
|
||||
|
||||
error = -zvol_first_open(zv, !(flag & FMODE_WRITE));
|
||||
error = -zvol_first_open(zv, !(blk_mode_is_open_write(flag)));
|
||||
|
||||
if (drop_namespace)
|
||||
mutex_exit(&spa_namespace_lock);
|
||||
}
|
||||
|
||||
if (error == 0) {
|
||||
if ((flag & FMODE_WRITE) && (zv->zv_flags & ZVOL_RDONLY)) {
|
||||
if ((blk_mode_is_open_write(flag)) &&
|
||||
(zv->zv_flags & ZVOL_RDONLY)) {
|
||||
if (zv->zv_open_count == 0)
|
||||
zvol_last_close(zv);
|
||||
|
||||
@ -791,14 +800,25 @@ zvol_open(struct block_device *bdev, fmode_t flag)
|
||||
rw_exit(&zv->zv_suspend_lock);
|
||||
|
||||
if (error == 0)
|
||||
#ifdef HAVE_BLK_MODE_T
|
||||
disk_check_media_change(disk);
|
||||
#else
|
||||
zfs_check_media_change(bdev);
|
||||
#endif
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
static void
|
||||
zvol_release(struct gendisk *disk, fmode_t mode)
|
||||
#ifdef HAVE_BLOCK_DEVICE_OPERATIONS_RELEASE_1ARG
|
||||
zvol_release(struct gendisk *disk)
|
||||
#else
|
||||
zvol_release(struct gendisk *disk, fmode_t unused)
|
||||
#endif
|
||||
{
|
||||
#if !defined(HAVE_BLOCK_DEVICE_OPERATIONS_RELEASE_1ARG)
|
||||
(void) unused;
|
||||
#endif
|
||||
zvol_state_t *zv;
|
||||
boolean_t drop_suspend = B_TRUE;
|
||||
|
||||
|
@ -284,7 +284,17 @@ bpobj_iterate_blkptrs(bpobj_info_t *bpi, bpobj_itor_t func, void *arg,
|
||||
dmu_buf_t *dbuf = NULL;
|
||||
bpobj_t *bpo = bpi->bpi_bpo;
|
||||
|
||||
for (int64_t i = bpo->bpo_phys->bpo_num_blkptrs - 1; i >= start; i--) {
|
||||
int64_t i = bpo->bpo_phys->bpo_num_blkptrs - 1;
|
||||
uint64_t pe = P2ALIGN_TYPED(i, bpo->bpo_epb, uint64_t) *
|
||||
sizeof (blkptr_t);
|
||||
uint64_t ps = start * sizeof (blkptr_t);
|
||||
uint64_t pb = MAX((pe > dmu_prefetch_max) ? pe - dmu_prefetch_max : 0,
|
||||
ps);
|
||||
if (pe > pb) {
|
||||
dmu_prefetch(bpo->bpo_os, bpo->bpo_object, 0, pb, pe - pb,
|
||||
ZIO_PRIORITY_ASYNC_READ);
|
||||
}
|
||||
for (; i >= start; i--) {
|
||||
uint64_t offset = i * sizeof (blkptr_t);
|
||||
uint64_t blkoff = P2PHASE(i, bpo->bpo_epb);
|
||||
|
||||
@ -292,9 +302,16 @@ bpobj_iterate_blkptrs(bpobj_info_t *bpi, bpobj_itor_t func, void *arg,
|
||||
if (dbuf)
|
||||
dmu_buf_rele(dbuf, FTAG);
|
||||
err = dmu_buf_hold(bpo->bpo_os, bpo->bpo_object,
|
||||
offset, FTAG, &dbuf, 0);
|
||||
offset, FTAG, &dbuf, DMU_READ_NO_PREFETCH);
|
||||
if (err)
|
||||
break;
|
||||
pe = pb;
|
||||
pb = MAX((dbuf->db_offset > dmu_prefetch_max) ?
|
||||
dbuf->db_offset - dmu_prefetch_max : 0, ps);
|
||||
if (pe > pb) {
|
||||
dmu_prefetch(bpo->bpo_os, bpo->bpo_object, 0,
|
||||
pb, pe - pb, ZIO_PRIORITY_ASYNC_READ);
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT3U(offset, >=, dbuf->db_offset);
|
||||
@ -466,22 +483,30 @@ bpobj_iterate_impl(bpobj_t *initial_bpo, bpobj_itor_t func, void *arg,
|
||||
int64_t i = bpi->bpi_unprocessed_subobjs - 1;
|
||||
uint64_t offset = i * sizeof (uint64_t);
|
||||
|
||||
uint64_t obj_from_sublist;
|
||||
uint64_t subobj;
|
||||
err = dmu_read(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs,
|
||||
offset, sizeof (uint64_t), &obj_from_sublist,
|
||||
DMU_READ_PREFETCH);
|
||||
offset, sizeof (uint64_t), &subobj,
|
||||
DMU_READ_NO_PREFETCH);
|
||||
if (err)
|
||||
break;
|
||||
bpobj_t *sublist = kmem_alloc(sizeof (bpobj_t),
|
||||
|
||||
bpobj_t *subbpo = kmem_alloc(sizeof (bpobj_t),
|
||||
KM_SLEEP);
|
||||
|
||||
err = bpobj_open(sublist, bpo->bpo_os,
|
||||
obj_from_sublist);
|
||||
if (err)
|
||||
err = bpobj_open(subbpo, bpo->bpo_os, subobj);
|
||||
if (err) {
|
||||
kmem_free(subbpo, sizeof (bpobj_t));
|
||||
break;
|
||||
}
|
||||
|
||||
list_insert_head(&stack, bpi_alloc(sublist, bpi, i));
|
||||
mutex_enter(&sublist->bpo_lock);
|
||||
if (subbpo->bpo_havesubobj &&
|
||||
subbpo->bpo_phys->bpo_subobjs != 0) {
|
||||
dmu_prefetch(subbpo->bpo_os,
|
||||
subbpo->bpo_phys->bpo_subobjs, 0, 0, 0,
|
||||
ZIO_PRIORITY_ASYNC_READ);
|
||||
}
|
||||
|
||||
list_insert_head(&stack, bpi_alloc(subbpo, bpi, i));
|
||||
mutex_enter(&subbpo->bpo_lock);
|
||||
bpi->bpi_unprocessed_subobjs--;
|
||||
}
|
||||
}
|
||||
|
@ -174,7 +174,7 @@
|
||||
* size_t len, unsigned int flags);
|
||||
*
|
||||
* Even though offsets and length represent bytes, they have to be
|
||||
* block-aligned or we will return the EXDEV error so the upper layer can
|
||||
* block-aligned or we will return an error so the upper layer can
|
||||
* fallback to the generic mechanism that will just copy the data.
|
||||
* Using copy_file_range(2) will call OS-independent zfs_clone_range() function.
|
||||
* This function was implemented based on zfs_write(), but instead of writing
|
||||
@ -192,9 +192,9 @@
|
||||
* Some special cases to consider and how we address them:
|
||||
* - The block we want to clone may have been created within the same
|
||||
* transaction group that we are trying to clone. Such block has no BP
|
||||
* allocated yet, so cannot be immediately cloned. We return EXDEV.
|
||||
* allocated yet, so cannot be immediately cloned. We return EAGAIN.
|
||||
* - The block we want to clone may have been modified within the same
|
||||
* transaction group. We return EXDEV.
|
||||
* transaction group. We return EAGAIN.
|
||||
* - A block may be cloned multiple times during one transaction group (that's
|
||||
* why pending list is actually a tree and not an append-only list - this
|
||||
* way we can figure out faster if this block is cloned for the first time
|
||||
@ -680,7 +680,7 @@ brt_vdev_realloc(brt_t *brt, brt_vdev_t *brtvd)
|
||||
size = (vdev_get_min_asize(vd) - 1) / brt->brt_rangesize + 1;
|
||||
spa_config_exit(brt->brt_spa, SCL_VDEV, FTAG);
|
||||
|
||||
entcount = kmem_zalloc(sizeof (entcount[0]) * size, KM_SLEEP);
|
||||
entcount = vmem_zalloc(sizeof (entcount[0]) * size, KM_SLEEP);
|
||||
nblocks = BRT_RANGESIZE_TO_NBLOCKS(size);
|
||||
bitmap = kmem_zalloc(BT_SIZEOFMAP(nblocks), KM_SLEEP);
|
||||
|
||||
@ -709,7 +709,7 @@ brt_vdev_realloc(brt_t *brt, brt_vdev_t *brtvd)
|
||||
sizeof (entcount[0]) * MIN(size, brtvd->bv_size));
|
||||
memcpy(bitmap, brtvd->bv_bitmap, MIN(BT_SIZEOFMAP(nblocks),
|
||||
BT_SIZEOFMAP(brtvd->bv_nblocks)));
|
||||
kmem_free(brtvd->bv_entcount,
|
||||
vmem_free(brtvd->bv_entcount,
|
||||
sizeof (entcount[0]) * brtvd->bv_size);
|
||||
kmem_free(brtvd->bv_bitmap, BT_SIZEOFMAP(brtvd->bv_nblocks));
|
||||
}
|
||||
@ -792,7 +792,7 @@ brt_vdev_dealloc(brt_t *brt, brt_vdev_t *brtvd)
|
||||
ASSERT(RW_WRITE_HELD(&brt->brt_lock));
|
||||
ASSERT(brtvd->bv_initiated);
|
||||
|
||||
kmem_free(brtvd->bv_entcount, sizeof (uint16_t) * brtvd->bv_size);
|
||||
vmem_free(brtvd->bv_entcount, sizeof (uint16_t) * brtvd->bv_size);
|
||||
brtvd->bv_entcount = NULL;
|
||||
kmem_free(brtvd->bv_bitmap, BT_SIZEOFMAP(brtvd->bv_nblocks));
|
||||
brtvd->bv_bitmap = NULL;
|
||||
@ -1544,6 +1544,37 @@ brt_entry_decref(spa_t *spa, const blkptr_t *bp)
|
||||
return (B_FALSE);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
brt_entry_get_refcount(spa_t *spa, const blkptr_t *bp)
|
||||
{
|
||||
brt_t *brt = spa->spa_brt;
|
||||
brt_vdev_t *brtvd;
|
||||
brt_entry_t bre_search, *bre;
|
||||
uint64_t vdevid, refcnt;
|
||||
int error;
|
||||
|
||||
brt_entry_fill(bp, &bre_search, &vdevid);
|
||||
|
||||
brt_rlock(brt);
|
||||
|
||||
brtvd = brt_vdev(brt, vdevid);
|
||||
ASSERT(brtvd != NULL);
|
||||
|
||||
bre = avl_find(&brtvd->bv_tree, &bre_search, NULL);
|
||||
if (bre == NULL) {
|
||||
error = brt_entry_lookup(brt, brtvd, &bre_search);
|
||||
ASSERT(error == 0 || error == ENOENT);
|
||||
if (error == ENOENT)
|
||||
refcnt = 0;
|
||||
else
|
||||
refcnt = bre_search.bre_refcount;
|
||||
} else
|
||||
refcnt = bre->bre_refcount;
|
||||
|
||||
brt_unlock(brt);
|
||||
return (refcnt);
|
||||
}
|
||||
|
||||
static void
|
||||
brt_prefetch(brt_t *brt, const blkptr_t *bp)
|
||||
{
|
||||
|
@ -2701,7 +2701,7 @@ dmu_buf_will_clone(dmu_buf_t *db_fake, dmu_tx_t *tx)
|
||||
*/
|
||||
mutex_enter(&db->db_mtx);
|
||||
VERIFY(!dbuf_undirty(db, tx));
|
||||
ASSERT(list_head(&db->db_dirty_records) == NULL);
|
||||
ASSERT0(dbuf_find_dirty_eq(db, tx->tx_txg));
|
||||
if (db->db_buf != NULL) {
|
||||
arc_buf_destroy(db->db_buf, db);
|
||||
db->db_buf = NULL;
|
||||
@ -4457,6 +4457,15 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
|
||||
} else if (db->db_state == DB_FILL) {
|
||||
/* This buffer was freed and is now being re-filled */
|
||||
ASSERT(db->db.db_data != dr->dt.dl.dr_data);
|
||||
} else if (db->db_state == DB_READ) {
|
||||
/*
|
||||
* This buffer has a clone we need to write, and an in-flight
|
||||
* read on the BP we're about to clone. Its safe to issue the
|
||||
* write here because the read has already been issued and the
|
||||
* contents won't change.
|
||||
*/
|
||||
ASSERT(dr->dt.dl.dr_brtwrite &&
|
||||
dr->dt.dl.dr_override_state == DR_OVERRIDDEN);
|
||||
} else {
|
||||
ASSERT(db->db_state == DB_CACHED || db->db_state == DB_NOFILL);
|
||||
}
|
||||
|
@ -89,7 +89,11 @@ static int zfs_dmu_offset_next_sync = 1;
|
||||
* helps to limit the amount of memory that can be used by prefetching.
|
||||
* Larger objects should be prefetched a bit at a time.
|
||||
*/
|
||||
#ifdef _ILP32
|
||||
uint_t dmu_prefetch_max = 8 * 1024 * 1024;
|
||||
#else
|
||||
uint_t dmu_prefetch_max = 8 * SPA_MAXBLOCKSIZE;
|
||||
#endif
|
||||
|
||||
const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = {
|
||||
{DMU_BSWAP_UINT8, TRUE, FALSE, FALSE, "unallocated" },
|
||||
@ -161,7 +165,7 @@ dmu_object_byteswap_info_t dmu_ot_byteswap[DMU_BSWAP_NUMFUNCS] = {
|
||||
{ zfs_acl_byteswap, "acl" }
|
||||
};
|
||||
|
||||
static int
|
||||
int
|
||||
dmu_buf_hold_noread_by_dnode(dnode_t *dn, uint64_t offset,
|
||||
const void *tag, dmu_buf_t **dbp)
|
||||
{
|
||||
@ -181,6 +185,7 @@ dmu_buf_hold_noread_by_dnode(dnode_t *dn, uint64_t offset,
|
||||
*dbp = &db->db;
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
dmu_buf_hold_noread(objset_t *os, uint64_t object, uint64_t offset,
|
||||
const void *tag, dmu_buf_t **dbp)
|
||||
@ -552,8 +557,7 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
|
||||
zio = zio_root(dn->dn_objset->os_spa, NULL, NULL,
|
||||
ZIO_FLAG_CANFAIL);
|
||||
blkid = dbuf_whichblock(dn, 0, offset);
|
||||
if ((flags & DMU_READ_NO_PREFETCH) == 0 &&
|
||||
length <= zfetch_array_rd_sz) {
|
||||
if ((flags & DMU_READ_NO_PREFETCH) == 0) {
|
||||
/*
|
||||
* Prepare the zfetch before initiating the demand reads, so
|
||||
* that if multiple threads block on same indirect block, we
|
||||
@ -691,74 +695,93 @@ dmu_buf_rele_array(dmu_buf_t **dbp_fake, int numbufs, const void *tag)
|
||||
}
|
||||
|
||||
/*
|
||||
* Issue prefetch i/os for the given blocks. If level is greater than 0, the
|
||||
* Issue prefetch I/Os for the given blocks. If level is greater than 0, the
|
||||
* indirect blocks prefetched will be those that point to the blocks containing
|
||||
* the data starting at offset, and continuing to offset + len.
|
||||
* the data starting at offset, and continuing to offset + len. If the range
|
||||
* it too long, prefetch the first dmu_prefetch_max bytes as requested, while
|
||||
* for the rest only a higher level, also fitting within dmu_prefetch_max. It
|
||||
* should primarily help random reads, since for long sequential reads there is
|
||||
* a speculative prefetcher.
|
||||
*
|
||||
* Note that if the indirect blocks above the blocks being prefetched are not
|
||||
* in cache, they will be asynchronously read in.
|
||||
* in cache, they will be asynchronously read in. Dnode read by dnode_hold()
|
||||
* is currently synchronous.
|
||||
*/
|
||||
void
|
||||
dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset,
|
||||
uint64_t len, zio_priority_t pri)
|
||||
{
|
||||
dnode_t *dn;
|
||||
uint64_t blkid;
|
||||
int nblks, err;
|
||||
int64_t level2 = level;
|
||||
uint64_t start, end, start2, end2;
|
||||
|
||||
if (len == 0) { /* they're interested in the bonus buffer */
|
||||
dn = DMU_META_DNODE(os);
|
||||
|
||||
if (object == 0 || object >= DN_MAX_OBJECT)
|
||||
return;
|
||||
|
||||
rw_enter(&dn->dn_struct_rwlock, RW_READER);
|
||||
blkid = dbuf_whichblock(dn, level,
|
||||
object * sizeof (dnode_phys_t));
|
||||
dbuf_prefetch(dn, level, blkid, pri, 0);
|
||||
rw_exit(&dn->dn_struct_rwlock);
|
||||
if (dmu_prefetch_max == 0 || len == 0) {
|
||||
dmu_prefetch_dnode(os, object, pri);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* See comment before the definition of dmu_prefetch_max.
|
||||
*/
|
||||
len = MIN(len, dmu_prefetch_max);
|
||||
|
||||
/*
|
||||
* XXX - Note, if the dnode for the requested object is not
|
||||
* already cached, we will do a *synchronous* read in the
|
||||
* dnode_hold() call. The same is true for any indirects.
|
||||
*/
|
||||
err = dnode_hold(os, object, FTAG, &dn);
|
||||
if (err != 0)
|
||||
if (dnode_hold(os, object, FTAG, &dn) != 0)
|
||||
return;
|
||||
|
||||
/*
|
||||
* offset + len - 1 is the last byte we want to prefetch for, and offset
|
||||
* is the first. Then dbuf_whichblk(dn, level, off + len - 1) is the
|
||||
* last block we want to prefetch, and dbuf_whichblock(dn, level,
|
||||
* offset) is the first. Then the number we need to prefetch is the
|
||||
* last - first + 1.
|
||||
* Depending on len we may do two prefetches: blocks [start, end) at
|
||||
* level, and following blocks [start2, end2) at higher level2.
|
||||
*/
|
||||
rw_enter(&dn->dn_struct_rwlock, RW_READER);
|
||||
if (level > 0 || dn->dn_datablkshift != 0) {
|
||||
nblks = dbuf_whichblock(dn, level, offset + len - 1) -
|
||||
dbuf_whichblock(dn, level, offset) + 1;
|
||||
if (dn->dn_datablkshift != 0) {
|
||||
/*
|
||||
* The object has multiple blocks. Calculate the full range
|
||||
* of blocks [start, end2) and then split it into two parts,
|
||||
* so that the first [start, end) fits into dmu_prefetch_max.
|
||||
*/
|
||||
start = dbuf_whichblock(dn, level, offset);
|
||||
end2 = dbuf_whichblock(dn, level, offset + len - 1) + 1;
|
||||
uint8_t ibs = dn->dn_indblkshift;
|
||||
uint8_t bs = (level == 0) ? dn->dn_datablkshift : ibs;
|
||||
uint_t limit = P2ROUNDUP(dmu_prefetch_max, 1 << bs) >> bs;
|
||||
start2 = end = MIN(end2, start + limit);
|
||||
|
||||
/*
|
||||
* Find level2 where [start2, end2) fits into dmu_prefetch_max.
|
||||
*/
|
||||
uint8_t ibps = ibs - SPA_BLKPTRSHIFT;
|
||||
limit = P2ROUNDUP(dmu_prefetch_max, 1 << ibs) >> ibs;
|
||||
do {
|
||||
level2++;
|
||||
start2 = P2ROUNDUP(start2, 1 << ibps) >> ibps;
|
||||
end2 = P2ROUNDUP(end2, 1 << ibps) >> ibps;
|
||||
} while (end2 - start2 > limit);
|
||||
} else {
|
||||
nblks = (offset < dn->dn_datablksz);
|
||||
/* There is only one block. Prefetch it or nothing. */
|
||||
start = start2 = end2 = 0;
|
||||
end = start + (level == 0 && offset < dn->dn_datablksz);
|
||||
}
|
||||
|
||||
if (nblks != 0) {
|
||||
blkid = dbuf_whichblock(dn, level, offset);
|
||||
for (int i = 0; i < nblks; i++)
|
||||
dbuf_prefetch(dn, level, blkid + i, pri, 0);
|
||||
}
|
||||
for (uint64_t i = start; i < end; i++)
|
||||
dbuf_prefetch(dn, level, i, pri, 0);
|
||||
for (uint64_t i = start2; i < end2; i++)
|
||||
dbuf_prefetch(dn, level2, i, pri, 0);
|
||||
rw_exit(&dn->dn_struct_rwlock);
|
||||
|
||||
dnode_rele(dn, FTAG);
|
||||
}
|
||||
|
||||
/*
|
||||
* Issue prefetch I/Os for the given object's dnode.
|
||||
*/
|
||||
void
|
||||
dmu_prefetch_dnode(objset_t *os, uint64_t object, zio_priority_t pri)
|
||||
{
|
||||
if (object == 0 || object >= DN_MAX_OBJECT)
|
||||
return;
|
||||
|
||||
dnode_t *dn = DMU_META_DNODE(os);
|
||||
rw_enter(&dn->dn_struct_rwlock, RW_READER);
|
||||
uint64_t blkid = dbuf_whichblock(dn, 0, object * sizeof (dnode_phys_t));
|
||||
dbuf_prefetch(dn, 0, blkid, pri, 0);
|
||||
rw_exit(&dn->dn_struct_rwlock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Get the next "chunk" of file data to free. We traverse the file from
|
||||
* the end so that the file gets shorter over time (if we crashes in the
|
||||
@ -1650,10 +1673,22 @@ dmu_sync_late_arrival(zio_t *pio, objset_t *os, dmu_sync_cb_t *done, zgd_t *zgd,
|
||||
{
|
||||
dmu_sync_arg_t *dsa;
|
||||
dmu_tx_t *tx;
|
||||
int error;
|
||||
|
||||
error = dbuf_read((dmu_buf_impl_t *)zgd->zgd_db, NULL,
|
||||
DB_RF_CANFAIL | DB_RF_NOPREFETCH);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
|
||||
tx = dmu_tx_create(os);
|
||||
dmu_tx_hold_space(tx, zgd->zgd_db->db_size);
|
||||
if (dmu_tx_assign(tx, TXG_WAIT) != 0) {
|
||||
/*
|
||||
* This transaction does not produce any dirty data or log blocks, so
|
||||
* it should not be throttled. All other cases wait for TXG sync, by
|
||||
* which time the log block we are writing will be obsolete, so we can
|
||||
* skip waiting and just return error here instead.
|
||||
*/
|
||||
if (dmu_tx_assign(tx, TXG_NOWAIT | TXG_NOTHROTTLE) != 0) {
|
||||
dmu_tx_abort(tx);
|
||||
/* Make zl_get_data do txg_waited_synced() */
|
||||
return (SET_ERROR(EIO));
|
||||
|
@ -1795,17 +1795,19 @@ receive_handle_existing_object(const struct receive_writer_arg *rwa,
|
||||
}
|
||||
|
||||
/*
|
||||
* The dmu does not currently support decreasing nlevels
|
||||
* or changing the number of dnode slots on an object. For
|
||||
* non-raw sends, this does not matter and the new object
|
||||
* can just use the previous one's nlevels. For raw sends,
|
||||
* however, the structure of the received dnode (including
|
||||
* nlevels and dnode slots) must match that of the send
|
||||
* side. Therefore, instead of using dmu_object_reclaim(),
|
||||
* we must free the object completely and call
|
||||
* dmu_object_claim_dnsize() instead.
|
||||
* The dmu does not currently support decreasing nlevels or changing
|
||||
* indirect block size if there is already one, same as changing the
|
||||
* number of of dnode slots on an object. For non-raw sends this
|
||||
* does not matter and the new object can just use the previous one's
|
||||
* parameters. For raw sends, however, the structure of the received
|
||||
* dnode (including indirects and dnode slots) must match that of the
|
||||
* send side. Therefore, instead of using dmu_object_reclaim(), we
|
||||
* must free the object completely and call dmu_object_claim_dnsize()
|
||||
* instead.
|
||||
*/
|
||||
if ((rwa->raw && drro->drr_nlevels < doi->doi_indirection) ||
|
||||
if ((rwa->raw && ((doi->doi_indirection > 1 &&
|
||||
indblksz != doi->doi_metadata_block_size) ||
|
||||
drro->drr_nlevels < doi->doi_indirection)) ||
|
||||
dn_slots != doi->doi_dnodesize >> DNODE_SHIFT) {
|
||||
err = dmu_free_long_object(rwa->os, drro->drr_object);
|
||||
if (err != 0)
|
||||
|
@ -52,14 +52,19 @@ static unsigned int zfetch_max_streams = 8;
|
||||
static unsigned int zfetch_min_sec_reap = 1;
|
||||
/* max time before stream delete */
|
||||
static unsigned int zfetch_max_sec_reap = 2;
|
||||
#ifdef _ILP32
|
||||
/* min bytes to prefetch per stream (default 2MB) */
|
||||
static unsigned int zfetch_min_distance = 2 * 1024 * 1024;
|
||||
/* max bytes to prefetch per stream (default 8MB) */
|
||||
unsigned int zfetch_max_distance = 8 * 1024 * 1024;
|
||||
#else
|
||||
/* min bytes to prefetch per stream (default 4MB) */
|
||||
static unsigned int zfetch_min_distance = 4 * 1024 * 1024;
|
||||
/* max bytes to prefetch per stream (default 64MB) */
|
||||
unsigned int zfetch_max_distance = 64 * 1024 * 1024;
|
||||
#endif
|
||||
/* max bytes to prefetch indirects for per stream (default 64MB) */
|
||||
unsigned int zfetch_max_idistance = 64 * 1024 * 1024;
|
||||
/* max number of bytes in an array_read in which we allow prefetching (1MB) */
|
||||
uint64_t zfetch_array_rd_sz = 1024 * 1024;
|
||||
|
||||
typedef struct zfetch_stats {
|
||||
kstat_named_t zfetchstat_hits;
|
||||
@ -580,6 +585,3 @@ ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, max_distance, UINT, ZMOD_RW,
|
||||
|
||||
ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, max_idistance, UINT, ZMOD_RW,
|
||||
"Max bytes to prefetch indirects for per stream");
|
||||
|
||||
ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, array_rd_sz, U64, ZMOD_RW,
|
||||
"Number of bytes in a array_read");
|
||||
|
@ -1882,7 +1882,7 @@ dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx)
|
||||
if (ibs == dn->dn_indblkshift)
|
||||
ibs = 0;
|
||||
|
||||
if (size >> SPA_MINBLOCKSHIFT == dn->dn_datablkszsec && ibs == 0)
|
||||
if (size == dn->dn_datablksz && ibs == 0)
|
||||
return (0);
|
||||
|
||||
rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
|
||||
@ -1905,24 +1905,25 @@ dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx)
|
||||
if (ibs && dn->dn_nlevels != 1)
|
||||
goto fail;
|
||||
|
||||
/* resize the old block */
|
||||
err = dbuf_hold_impl(dn, 0, 0, TRUE, FALSE, FTAG, &db);
|
||||
if (err == 0) {
|
||||
dbuf_new_size(db, size, tx);
|
||||
} else if (err != ENOENT) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
dnode_setdblksz(dn, size);
|
||||
dnode_setdirty(dn, tx);
|
||||
dn->dn_next_blksz[tx->tx_txg&TXG_MASK] = size;
|
||||
if (size != dn->dn_datablksz) {
|
||||
/* resize the old block */
|
||||
err = dbuf_hold_impl(dn, 0, 0, TRUE, FALSE, FTAG, &db);
|
||||
if (err == 0) {
|
||||
dbuf_new_size(db, size, tx);
|
||||
} else if (err != ENOENT) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
dnode_setdblksz(dn, size);
|
||||
dn->dn_next_blksz[tx->tx_txg & TXG_MASK] = size;
|
||||
if (db)
|
||||
dbuf_rele(db, FTAG);
|
||||
}
|
||||
if (ibs) {
|
||||
dn->dn_indblkshift = ibs;
|
||||
dn->dn_next_indblkshift[tx->tx_txg&TXG_MASK] = ibs;
|
||||
dn->dn_next_indblkshift[tx->tx_txg & TXG_MASK] = ibs;
|
||||
}
|
||||
/* release after we have fixed the blocksize in the dnode */
|
||||
if (db)
|
||||
dbuf_rele(db, FTAG);
|
||||
|
||||
rw_exit(&dn->dn_struct_rwlock);
|
||||
return (0);
|
||||
|
@ -173,8 +173,8 @@ dsl_deadlist_load_tree(dsl_deadlist_t *dl)
|
||||
* in parallel. Then open them all in a second pass.
|
||||
*/
|
||||
dle->dle_bpobj.bpo_object = za.za_first_integer;
|
||||
dmu_prefetch(dl->dl_os, dle->dle_bpobj.bpo_object,
|
||||
0, 0, 0, ZIO_PRIORITY_SYNC_READ);
|
||||
dmu_prefetch_dnode(dl->dl_os, dle->dle_bpobj.bpo_object,
|
||||
ZIO_PRIORITY_SYNC_READ);
|
||||
|
||||
avl_add(&dl->dl_tree, dle);
|
||||
}
|
||||
@ -235,8 +235,8 @@ dsl_deadlist_load_cache(dsl_deadlist_t *dl)
|
||||
* in parallel. Then open them all in a second pass.
|
||||
*/
|
||||
dlce->dlce_bpobj = za.za_first_integer;
|
||||
dmu_prefetch(dl->dl_os, dlce->dlce_bpobj,
|
||||
0, 0, 0, ZIO_PRIORITY_SYNC_READ);
|
||||
dmu_prefetch_dnode(dl->dl_os, dlce->dlce_bpobj,
|
||||
ZIO_PRIORITY_SYNC_READ);
|
||||
avl_add(&dl->dl_cache, dlce);
|
||||
}
|
||||
VERIFY3U(error, ==, ENOENT);
|
||||
@ -892,9 +892,9 @@ dsl_deadlist_merge(dsl_deadlist_t *dl, uint64_t obj, dmu_tx_t *tx)
|
||||
for (zap_cursor_init(&zc, dl->dl_os, obj);
|
||||
(error = zap_cursor_retrieve(&zc, za)) == 0;
|
||||
zap_cursor_advance(&zc)) {
|
||||
uint64_t mintxg = zfs_strtonum(za->za_name, NULL);
|
||||
dsl_deadlist_insert_bpobj(dl, za->za_first_integer, mintxg, tx);
|
||||
VERIFY0(zap_remove_int(dl->dl_os, obj, mintxg, tx));
|
||||
dsl_deadlist_insert_bpobj(dl, za->za_first_integer,
|
||||
zfs_strtonum(za->za_name, NULL), tx);
|
||||
VERIFY0(zap_remove(dl->dl_os, obj, za->za_name, tx));
|
||||
if (perror == 0) {
|
||||
dsl_deadlist_prefetch_bpobj(dl, pza->za_first_integer,
|
||||
zfs_strtonum(pza->za_name, NULL));
|
||||
|
@ -2015,6 +2015,11 @@ dsl_scan_prefetch_cb(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp,
|
||||
zb->zb_objset, DMU_META_DNODE_OBJECT);
|
||||
|
||||
if (OBJSET_BUF_HAS_USERUSED(buf)) {
|
||||
if (OBJSET_BUF_HAS_PROJECTUSED(buf)) {
|
||||
dsl_scan_prefetch_dnode(scn,
|
||||
&osp->os_projectused_dnode, zb->zb_objset,
|
||||
DMU_PROJECTUSED_OBJECT);
|
||||
}
|
||||
dsl_scan_prefetch_dnode(scn,
|
||||
&osp->os_groupused_dnode, zb->zb_objset,
|
||||
DMU_GROUPUSED_OBJECT);
|
||||
@ -2075,10 +2080,16 @@ dsl_scan_prefetch_thread(void *arg)
|
||||
zio_flags |= ZIO_FLAG_RAW;
|
||||
}
|
||||
|
||||
/* We don't need data L1 buffer since we do not prefetch L0. */
|
||||
blkptr_t *bp = &spic->spic_bp;
|
||||
if (BP_GET_LEVEL(bp) == 1 && BP_GET_TYPE(bp) != DMU_OT_DNODE &&
|
||||
BP_GET_TYPE(bp) != DMU_OT_OBJSET)
|
||||
flags |= ARC_FLAG_NO_BUF;
|
||||
|
||||
/* issue the prefetch asynchronously */
|
||||
(void) arc_read(scn->scn_zio_root, scn->scn_dp->dp_spa,
|
||||
&spic->spic_bp, dsl_scan_prefetch_cb, spic->spic_spc,
|
||||
ZIO_PRIORITY_SCRUB, zio_flags, &flags, &spic->spic_zb);
|
||||
(void) arc_read(scn->scn_zio_root, spa, bp,
|
||||
dsl_scan_prefetch_cb, spic->spic_spc, ZIO_PRIORITY_SCRUB,
|
||||
zio_flags, &flags, &spic->spic_zb);
|
||||
|
||||
kmem_free(spic, sizeof (scan_prefetch_issue_ctx_t));
|
||||
}
|
||||
|
@ -58,6 +58,11 @@ static uint64_t metaslab_aliquot = 1024 * 1024;
|
||||
*/
|
||||
uint64_t metaslab_force_ganging = SPA_MAXBLOCKSIZE + 1;
|
||||
|
||||
/*
|
||||
* Of blocks of size >= metaslab_force_ganging, actually gang them this often.
|
||||
*/
|
||||
uint_t metaslab_force_ganging_pct = 3;
|
||||
|
||||
/*
|
||||
* In pools where the log space map feature is not enabled we touch
|
||||
* multiple metaslabs (and their respective space maps) with each
|
||||
@ -1287,7 +1292,7 @@ metaslab_group_allocatable(metaslab_group_t *mg, metaslab_group_t *rotor,
|
||||
|
||||
/*
|
||||
* If this metaslab group is below its qmax or it's
|
||||
* the only allocatable metasable group, then attempt
|
||||
* the only allocatable metaslab group, then attempt
|
||||
* to allocate from it.
|
||||
*/
|
||||
if (qdepth < qmax || mc->mc_alloc_groups == 1)
|
||||
@ -5096,7 +5101,7 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
|
||||
zio_alloc_list_t *zal, int allocator)
|
||||
{
|
||||
metaslab_class_allocator_t *mca = &mc->mc_allocator[allocator];
|
||||
metaslab_group_t *mg, *fast_mg, *rotor;
|
||||
metaslab_group_t *mg, *rotor;
|
||||
vdev_t *vd;
|
||||
boolean_t try_hard = B_FALSE;
|
||||
|
||||
@ -5109,7 +5114,9 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
|
||||
* damage can result in extremely long reconstruction times. This
|
||||
* will also test spilling from special to normal.
|
||||
*/
|
||||
if (psize >= metaslab_force_ganging && (random_in_range(100) < 3)) {
|
||||
if (psize >= metaslab_force_ganging &&
|
||||
metaslab_force_ganging_pct > 0 &&
|
||||
(random_in_range(100) < MIN(metaslab_force_ganging_pct, 100))) {
|
||||
metaslab_trace_add(zal, NULL, NULL, psize, d, TRACE_FORCE_GANG,
|
||||
allocator);
|
||||
return (SET_ERROR(ENOSPC));
|
||||
@ -5157,15 +5164,6 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
|
||||
} else if (d != 0) {
|
||||
vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d - 1]));
|
||||
mg = vd->vdev_mg->mg_next;
|
||||
} else if (flags & METASLAB_FASTWRITE) {
|
||||
mg = fast_mg = mca->mca_rotor;
|
||||
|
||||
do {
|
||||
if (fast_mg->mg_vd->vdev_pending_fastwrite <
|
||||
mg->mg_vd->vdev_pending_fastwrite)
|
||||
mg = fast_mg;
|
||||
} while ((fast_mg = fast_mg->mg_next) != mca->mca_rotor);
|
||||
|
||||
} else {
|
||||
ASSERT(mca->mca_rotor != NULL);
|
||||
mg = mca->mca_rotor;
|
||||
@ -5290,7 +5288,7 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
|
||||
mg->mg_bias = 0;
|
||||
}
|
||||
|
||||
if ((flags & METASLAB_FASTWRITE) ||
|
||||
if ((flags & METASLAB_ZIL) ||
|
||||
atomic_add_64_nv(&mca->mca_aliquot, asize) >=
|
||||
mg->mg_aliquot + mg->mg_bias) {
|
||||
mca->mca_rotor = mg->mg_next;
|
||||
@ -5303,11 +5301,6 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
|
||||
((flags & METASLAB_GANG_HEADER) ? 1 : 0));
|
||||
DVA_SET_ASIZE(&dva[d], asize);
|
||||
|
||||
if (flags & METASLAB_FASTWRITE) {
|
||||
atomic_add_64(&vd->vdev_pending_fastwrite,
|
||||
psize);
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
next:
|
||||
@ -5943,55 +5936,6 @@ metaslab_claim(spa_t *spa, const blkptr_t *bp, uint64_t txg)
|
||||
return (error);
|
||||
}
|
||||
|
||||
void
|
||||
metaslab_fastwrite_mark(spa_t *spa, const blkptr_t *bp)
|
||||
{
|
||||
const dva_t *dva = bp->blk_dva;
|
||||
int ndvas = BP_GET_NDVAS(bp);
|
||||
uint64_t psize = BP_GET_PSIZE(bp);
|
||||
int d;
|
||||
vdev_t *vd;
|
||||
|
||||
ASSERT(!BP_IS_HOLE(bp));
|
||||
ASSERT(!BP_IS_EMBEDDED(bp));
|
||||
ASSERT(psize > 0);
|
||||
|
||||
spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
|
||||
|
||||
for (d = 0; d < ndvas; d++) {
|
||||
if ((vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d]))) == NULL)
|
||||
continue;
|
||||
atomic_add_64(&vd->vdev_pending_fastwrite, psize);
|
||||
}
|
||||
|
||||
spa_config_exit(spa, SCL_VDEV, FTAG);
|
||||
}
|
||||
|
||||
void
|
||||
metaslab_fastwrite_unmark(spa_t *spa, const blkptr_t *bp)
|
||||
{
|
||||
const dva_t *dva = bp->blk_dva;
|
||||
int ndvas = BP_GET_NDVAS(bp);
|
||||
uint64_t psize = BP_GET_PSIZE(bp);
|
||||
int d;
|
||||
vdev_t *vd;
|
||||
|
||||
ASSERT(!BP_IS_HOLE(bp));
|
||||
ASSERT(!BP_IS_EMBEDDED(bp));
|
||||
ASSERT(psize > 0);
|
||||
|
||||
spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
|
||||
|
||||
for (d = 0; d < ndvas; d++) {
|
||||
if ((vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d]))) == NULL)
|
||||
continue;
|
||||
ASSERT3U(vd->vdev_pending_fastwrite, >=, psize);
|
||||
atomic_sub_64(&vd->vdev_pending_fastwrite, psize);
|
||||
}
|
||||
|
||||
spa_config_exit(spa, SCL_VDEV, FTAG);
|
||||
}
|
||||
|
||||
static void
|
||||
metaslab_check_free_impl_cb(uint64_t inner, vdev_t *vd, uint64_t offset,
|
||||
uint64_t size, void *arg)
|
||||
@ -6266,7 +6210,10 @@ ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, switch_threshold, INT, ZMOD_RW,
|
||||
"Segment-based metaslab selection maximum buckets before switching");
|
||||
|
||||
ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, force_ganging, U64, ZMOD_RW,
|
||||
"Blocks larger than this size are forced to be gang blocks");
|
||||
"Blocks larger than this size are sometimes forced to be gang blocks");
|
||||
|
||||
ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, force_ganging_pct, UINT, ZMOD_RW,
|
||||
"Percentage of large blocks that will be forced to be gang blocks");
|
||||
|
||||
ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, df_max_search, UINT, ZMOD_RW,
|
||||
"Max distance (bytes) to search forward before using size tree");
|
||||
|
@ -1147,8 +1147,8 @@ spa_ld_log_sm_data(spa_t *spa)
|
||||
/* Prefetch log spacemaps dnodes. */
|
||||
for (sls = avl_first(&spa->spa_sm_logs_by_txg); sls;
|
||||
sls = AVL_NEXT(&spa->spa_sm_logs_by_txg, sls)) {
|
||||
dmu_prefetch(spa_meta_objset(spa), sls->sls_sm_obj,
|
||||
0, 0, 0, ZIO_PRIORITY_SYNC_READ);
|
||||
dmu_prefetch_dnode(spa_meta_objset(spa), sls->sls_sm_obj,
|
||||
ZIO_PRIORITY_SYNC_READ);
|
||||
}
|
||||
|
||||
uint_t pn = 0;
|
||||
|
@ -772,6 +772,7 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
|
||||
spa->spa_min_ashift = INT_MAX;
|
||||
spa->spa_max_ashift = 0;
|
||||
spa->spa_min_alloc = INT_MAX;
|
||||
spa->spa_gcd_alloc = INT_MAX;
|
||||
|
||||
/* Reset cached value */
|
||||
spa->spa_dedup_dspace = ~0ULL;
|
||||
|
@ -889,9 +889,15 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
|
||||
&vd->vdev_not_present);
|
||||
|
||||
/*
|
||||
* Get the alignment requirement.
|
||||
* Get the alignment requirement. Ignore pool ashift for vdev
|
||||
* attach case.
|
||||
*/
|
||||
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ASHIFT, &vd->vdev_ashift);
|
||||
if (alloctype != VDEV_ALLOC_ATTACH) {
|
||||
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ASHIFT,
|
||||
&vd->vdev_ashift);
|
||||
} else {
|
||||
vd->vdev_attaching = B_TRUE;
|
||||
}
|
||||
|
||||
/*
|
||||
* Retrieve the vdev creation time.
|
||||
@ -1186,7 +1192,6 @@ vdev_top_transfer(vdev_t *svd, vdev_t *tvd)
|
||||
|
||||
ASSERT(tvd == tvd->vdev_top);
|
||||
|
||||
tvd->vdev_pending_fastwrite = svd->vdev_pending_fastwrite;
|
||||
tvd->vdev_ms_array = svd->vdev_ms_array;
|
||||
tvd->vdev_ms_shift = svd->vdev_ms_shift;
|
||||
tvd->vdev_ms_count = svd->vdev_ms_count;
|
||||
@ -1393,6 +1398,36 @@ vdev_remove_parent(vdev_t *cvd)
|
||||
vdev_free(mvd);
|
||||
}
|
||||
|
||||
/*
|
||||
* Choose GCD for spa_gcd_alloc.
|
||||
*/
|
||||
static uint64_t
|
||||
vdev_gcd(uint64_t a, uint64_t b)
|
||||
{
|
||||
while (b != 0) {
|
||||
uint64_t t = b;
|
||||
b = a % b;
|
||||
a = t;
|
||||
}
|
||||
return (a);
|
||||
}
|
||||
|
||||
/*
|
||||
* Set spa_min_alloc and spa_gcd_alloc.
|
||||
*/
|
||||
static void
|
||||
vdev_spa_set_alloc(spa_t *spa, uint64_t min_alloc)
|
||||
{
|
||||
if (min_alloc < spa->spa_min_alloc)
|
||||
spa->spa_min_alloc = min_alloc;
|
||||
if (spa->spa_gcd_alloc == INT_MAX) {
|
||||
spa->spa_gcd_alloc = min_alloc;
|
||||
} else {
|
||||
spa->spa_gcd_alloc = vdev_gcd(min_alloc,
|
||||
spa->spa_gcd_alloc);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
vdev_metaslab_group_create(vdev_t *vd)
|
||||
{
|
||||
@ -1445,8 +1480,7 @@ vdev_metaslab_group_create(vdev_t *vd)
|
||||
spa->spa_min_ashift = vd->vdev_ashift;
|
||||
|
||||
uint64_t min_alloc = vdev_get_min_alloc(vd);
|
||||
if (min_alloc < spa->spa_min_alloc)
|
||||
spa->spa_min_alloc = min_alloc;
|
||||
vdev_spa_set_alloc(spa, min_alloc);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1620,7 +1654,6 @@ vdev_metaslab_fini(vdev_t *vd)
|
||||
}
|
||||
}
|
||||
ASSERT0(vd->vdev_ms_count);
|
||||
ASSERT3U(vd->vdev_pending_fastwrite, ==, 0);
|
||||
}
|
||||
|
||||
typedef struct vdev_probe_stats {
|
||||
@ -2144,9 +2177,9 @@ vdev_open(vdev_t *vd)
|
||||
return (SET_ERROR(EDOM));
|
||||
}
|
||||
|
||||
if (vd->vdev_top == vd) {
|
||||
if (vd->vdev_top == vd && vd->vdev_attaching == B_FALSE)
|
||||
vdev_ashift_optimize(vd);
|
||||
}
|
||||
vd->vdev_attaching = B_FALSE;
|
||||
}
|
||||
if (vd->vdev_ashift != 0 && (vd->vdev_ashift < ASHIFT_MIN ||
|
||||
vd->vdev_ashift > ASHIFT_MAX)) {
|
||||
@ -2207,8 +2240,7 @@ vdev_open(vdev_t *vd)
|
||||
if (vd->vdev_top == vd && vd->vdev_ashift != 0 &&
|
||||
vd->vdev_islog == 0 && vd->vdev_aux == NULL) {
|
||||
uint64_t min_alloc = vdev_get_min_alloc(vd);
|
||||
if (min_alloc < spa->spa_min_alloc)
|
||||
spa->spa_min_alloc = min_alloc;
|
||||
vdev_spa_set_alloc(spa, min_alloc);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -5688,6 +5720,7 @@ vdev_props_set_sync(void *arg, dmu_tx_t *tx)
|
||||
objset_t *mos = spa->spa_meta_objset;
|
||||
nvpair_t *elem = NULL;
|
||||
uint64_t vdev_guid;
|
||||
uint64_t objid;
|
||||
nvlist_t *nvprops;
|
||||
|
||||
vdev_guid = fnvlist_lookup_uint64(nvp, ZPOOL_VDEV_PROPS_SET_VDEV);
|
||||
@ -5698,31 +5731,28 @@ vdev_props_set_sync(void *arg, dmu_tx_t *tx)
|
||||
if (vd == NULL)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Set vdev property values in the vdev props mos object.
|
||||
*/
|
||||
if (vd->vdev_root_zap != 0) {
|
||||
objid = vd->vdev_root_zap;
|
||||
} else if (vd->vdev_top_zap != 0) {
|
||||
objid = vd->vdev_top_zap;
|
||||
} else if (vd->vdev_leaf_zap != 0) {
|
||||
objid = vd->vdev_leaf_zap;
|
||||
} else {
|
||||
panic("unexpected vdev type");
|
||||
}
|
||||
|
||||
mutex_enter(&spa->spa_props_lock);
|
||||
|
||||
while ((elem = nvlist_next_nvpair(nvprops, elem)) != NULL) {
|
||||
uint64_t intval, objid = 0;
|
||||
uint64_t intval;
|
||||
const char *strval;
|
||||
vdev_prop_t prop;
|
||||
const char *propname = nvpair_name(elem);
|
||||
zprop_type_t proptype;
|
||||
|
||||
/*
|
||||
* Set vdev property values in the vdev props mos object.
|
||||
*/
|
||||
if (vd->vdev_root_zap != 0) {
|
||||
objid = vd->vdev_root_zap;
|
||||
} else if (vd->vdev_top_zap != 0) {
|
||||
objid = vd->vdev_top_zap;
|
||||
} else if (vd->vdev_leaf_zap != 0) {
|
||||
objid = vd->vdev_leaf_zap;
|
||||
} else {
|
||||
/*
|
||||
* XXX: implement vdev_props_set_check()
|
||||
*/
|
||||
panic("vdev not root/top/leaf");
|
||||
}
|
||||
|
||||
switch (prop = vdev_name_to_prop(propname)) {
|
||||
case VDEV_PROP_USERPROP:
|
||||
if (vdev_prop_user(propname)) {
|
||||
@ -5791,6 +5821,12 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
|
||||
|
||||
ASSERT(vd != NULL);
|
||||
|
||||
/* Check that vdev has a zap we can use */
|
||||
if (vd->vdev_root_zap == 0 &&
|
||||
vd->vdev_top_zap == 0 &&
|
||||
vd->vdev_leaf_zap == 0)
|
||||
return (SET_ERROR(EINVAL));
|
||||
|
||||
if (nvlist_lookup_uint64(innvl, ZPOOL_VDEV_PROPS_SET_VDEV,
|
||||
&vdev_guid) != 0)
|
||||
return (SET_ERROR(EINVAL));
|
||||
|
@ -1398,7 +1398,7 @@ vdev_indirect_checksum_error(zio_t *zio,
|
||||
vd->vdev_stat.vs_checksum_errors++;
|
||||
mutex_exit(&vd->vdev_stat_lock);
|
||||
|
||||
zio_bad_cksum_t zbc = {{{ 0 }}};
|
||||
zio_bad_cksum_t zbc = { 0 };
|
||||
abd_t *bad_abd = ic->ic_data;
|
||||
abd_t *good_abd = is->is_good_child->ic_data;
|
||||
(void) zfs_ereport_post_checksum(zio->io_spa, vd, NULL, zio,
|
||||
|
@ -1785,7 +1785,7 @@ vdev_raidz_checksum_error(zio_t *zio, raidz_col_t *rc, abd_t *bad_data)
|
||||
static int
|
||||
raidz_checksum_verify(zio_t *zio)
|
||||
{
|
||||
zio_bad_cksum_t zbc = {{{0}}};
|
||||
zio_bad_cksum_t zbc = {0};
|
||||
raidz_map_t *rm = zio->io_vsd;
|
||||
|
||||
int ret = zio_checksum_error(zio, &zbc);
|
||||
|
@ -754,10 +754,6 @@ zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out,
|
||||
#define MAX_RANGES 16
|
||||
|
||||
typedef struct zfs_ecksum_info {
|
||||
/* histograms of set and cleared bits by bit number in a 64-bit word */
|
||||
uint8_t zei_histogram_set[sizeof (uint64_t) * NBBY];
|
||||
uint8_t zei_histogram_cleared[sizeof (uint64_t) * NBBY];
|
||||
|
||||
/* inline arrays of bits set and cleared. */
|
||||
uint64_t zei_bits_set[ZFM_MAX_INLINE];
|
||||
uint64_t zei_bits_cleared[ZFM_MAX_INLINE];
|
||||
@ -781,7 +777,7 @@ typedef struct zfs_ecksum_info {
|
||||
} zfs_ecksum_info_t;
|
||||
|
||||
static void
|
||||
update_histogram(uint64_t value_arg, uint8_t *hist, uint32_t *count)
|
||||
update_bad_bits(uint64_t value_arg, uint32_t *count)
|
||||
{
|
||||
size_t i;
|
||||
size_t bits = 0;
|
||||
@ -789,10 +785,8 @@ update_histogram(uint64_t value_arg, uint8_t *hist, uint32_t *count)
|
||||
|
||||
/* We store the bits in big-endian (largest-first) order */
|
||||
for (i = 0; i < 64; i++) {
|
||||
if (value & (1ull << i)) {
|
||||
hist[63 - i]++;
|
||||
if (value & (1ull << i))
|
||||
++bits;
|
||||
}
|
||||
}
|
||||
/* update the count of bits changed */
|
||||
*count += bits;
|
||||
@ -920,14 +914,6 @@ annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info,
|
||||
|
||||
if (info != NULL && info->zbc_has_cksum) {
|
||||
fm_payload_set(ereport,
|
||||
FM_EREPORT_PAYLOAD_ZFS_CKSUM_EXPECTED,
|
||||
DATA_TYPE_UINT64_ARRAY,
|
||||
sizeof (info->zbc_expected) / sizeof (uint64_t),
|
||||
(uint64_t *)&info->zbc_expected,
|
||||
FM_EREPORT_PAYLOAD_ZFS_CKSUM_ACTUAL,
|
||||
DATA_TYPE_UINT64_ARRAY,
|
||||
sizeof (info->zbc_actual) / sizeof (uint64_t),
|
||||
(uint64_t *)&info->zbc_actual,
|
||||
FM_EREPORT_PAYLOAD_ZFS_CKSUM_ALGO,
|
||||
DATA_TYPE_STRING,
|
||||
info->zbc_checksum_name,
|
||||
@ -1010,10 +996,8 @@ annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info,
|
||||
offset++;
|
||||
}
|
||||
|
||||
update_histogram(set, eip->zei_histogram_set,
|
||||
&eip->zei_range_sets[range]);
|
||||
update_histogram(cleared, eip->zei_histogram_cleared,
|
||||
&eip->zei_range_clears[range]);
|
||||
update_bad_bits(set, &eip->zei_range_sets[range]);
|
||||
update_bad_bits(cleared, &eip->zei_range_clears[range]);
|
||||
}
|
||||
|
||||
/* convert to byte offsets */
|
||||
@ -1049,15 +1033,6 @@ annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info,
|
||||
DATA_TYPE_UINT8_ARRAY,
|
||||
inline_size, (uint8_t *)eip->zei_bits_cleared,
|
||||
NULL);
|
||||
} else {
|
||||
fm_payload_set(ereport,
|
||||
FM_EREPORT_PAYLOAD_ZFS_BAD_SET_HISTOGRAM,
|
||||
DATA_TYPE_UINT8_ARRAY,
|
||||
NBBY * sizeof (uint64_t), eip->zei_histogram_set,
|
||||
FM_EREPORT_PAYLOAD_ZFS_BAD_CLEARED_HISTOGRAM,
|
||||
DATA_TYPE_UINT8_ARRAY,
|
||||
NBBY * sizeof (uint64_t), eip->zei_histogram_cleared,
|
||||
NULL);
|
||||
}
|
||||
return (eip);
|
||||
}
|
||||
|
@ -839,7 +839,6 @@ zfs_get_data(void *arg, uint64_t gen, lr_write_t *lr, char *buf,
|
||||
uint64_t zp_gen;
|
||||
|
||||
ASSERT3P(lwb, !=, NULL);
|
||||
ASSERT3P(zio, !=, NULL);
|
||||
ASSERT3U(size, !=, 0);
|
||||
|
||||
/*
|
||||
@ -889,6 +888,7 @@ zfs_get_data(void *arg, uint64_t gen, lr_write_t *lr, char *buf,
|
||||
}
|
||||
ASSERT(error == 0 || error == ENOENT);
|
||||
} else { /* indirect write */
|
||||
ASSERT3P(zio, !=, NULL);
|
||||
/*
|
||||
* Have to lock the whole block to ensure when it's
|
||||
* written out and its checksum is being calculated
|
||||
@ -917,8 +917,8 @@ zfs_get_data(void *arg, uint64_t gen, lr_write_t *lr, char *buf,
|
||||
}
|
||||
#endif
|
||||
if (error == 0)
|
||||
error = dmu_buf_hold(os, object, offset, zgd, &db,
|
||||
DMU_READ_NO_PREFETCH);
|
||||
error = dmu_buf_hold_noread(os, object, offset, zgd,
|
||||
&db);
|
||||
|
||||
if (error == 0) {
|
||||
blkptr_t *bp = &lr->lr_blkptr;
|
||||
@ -1028,6 +1028,10 @@ zfs_exit_two(zfsvfs_t *zfsvfs1, zfsvfs_t *zfsvfs2, const char *tag)
|
||||
*
|
||||
* On success, the function return the number of bytes copied in *lenp.
|
||||
* Note, it doesn't return how much bytes are left to be copied.
|
||||
* On errors which are caused by any file system limitations or
|
||||
* brt limitations `EINVAL` is returned. In the most cases a user
|
||||
* requested bad parameters, it could be possible to clone the file but
|
||||
* some parameters don't match the requirements.
|
||||
*/
|
||||
int
|
||||
zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
|
||||
@ -1078,6 +1082,16 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
|
||||
return (SET_ERROR(EXDEV));
|
||||
}
|
||||
|
||||
/*
|
||||
* outos and inos belongs to the same storage pool.
|
||||
* see a few lines above, only one check.
|
||||
*/
|
||||
if (!spa_feature_is_enabled(dmu_objset_spa(outos),
|
||||
SPA_FEATURE_BLOCK_CLONING)) {
|
||||
zfs_exit_two(inzfsvfs, outzfsvfs, FTAG);
|
||||
return (SET_ERROR(EOPNOTSUPP));
|
||||
}
|
||||
|
||||
ASSERT(!outzfsvfs->z_replay);
|
||||
|
||||
error = zfs_verify_zp(inzp);
|
||||
@ -1088,12 +1102,6 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
|
||||
return (error);
|
||||
}
|
||||
|
||||
if (!spa_feature_is_enabled(dmu_objset_spa(outos),
|
||||
SPA_FEATURE_BLOCK_CLONING)) {
|
||||
zfs_exit_two(inzfsvfs, outzfsvfs, FTAG);
|
||||
return (SET_ERROR(EXDEV));
|
||||
}
|
||||
|
||||
/*
|
||||
* We don't copy source file's flags that's why we don't allow to clone
|
||||
* files that are in quarantine.
|
||||
@ -1167,7 +1175,7 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
|
||||
* We cannot clone into files with different block size.
|
||||
*/
|
||||
if (inblksz != outzp->z_blksz && outzp->z_size > inblksz) {
|
||||
error = SET_ERROR(EXDEV);
|
||||
error = SET_ERROR(EINVAL);
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
@ -1175,7 +1183,7 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
|
||||
* Offsets and len must be at block boundries.
|
||||
*/
|
||||
if ((inoff % inblksz) != 0 || (outoff % inblksz) != 0) {
|
||||
error = SET_ERROR(EXDEV);
|
||||
error = SET_ERROR(EINVAL);
|
||||
goto unlock;
|
||||
}
|
||||
/*
|
||||
@ -1183,7 +1191,7 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
|
||||
*/
|
||||
if ((len % inblksz) != 0 &&
|
||||
(len < inzp->z_size - inoff || len < outzp->z_size - outoff)) {
|
||||
error = SET_ERROR(EXDEV);
|
||||
error = SET_ERROR(EINVAL);
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
@ -1212,7 +1220,7 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
|
||||
gid = KGID_TO_SGID(ZTOGID(outzp));
|
||||
projid = outzp->z_projid;
|
||||
|
||||
bps = kmem_alloc(sizeof (bps[0]) * maxblocks, KM_SLEEP);
|
||||
bps = vmem_alloc(sizeof (bps[0]) * maxblocks, KM_SLEEP);
|
||||
|
||||
/*
|
||||
* Clone the file in reasonable size chunks. Each chunk is cloned
|
||||
@ -1238,13 +1246,11 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
|
||||
&nbps);
|
||||
if (error != 0) {
|
||||
/*
|
||||
* If we are tyring to clone a block that was created
|
||||
* in the current transaction group. Return an error,
|
||||
* so the caller can fallback to just copying the data.
|
||||
* If we are trying to clone a block that was created
|
||||
* in the current transaction group, error will be
|
||||
* EAGAIN here, which we can just return to the caller
|
||||
* so it can fallback if it likes.
|
||||
*/
|
||||
if (error == EAGAIN) {
|
||||
error = SET_ERROR(EXDEV);
|
||||
}
|
||||
break;
|
||||
}
|
||||
/*
|
||||
@ -1330,7 +1336,7 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
|
||||
done += size;
|
||||
}
|
||||
|
||||
kmem_free(bps, sizeof (bps[0]) * maxblocks);
|
||||
vmem_free(bps, sizeof (bps[0]) * maxblocks);
|
||||
zfs_znode_update_vfs(outzp);
|
||||
|
||||
unlock:
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1596,6 +1596,19 @@ zio_shrink(zio_t *zio, uint64_t size)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Round provided allocation size up to a value that can be allocated
|
||||
* by at least some vdev(s) in the pool with minimum or no additional
|
||||
* padding and without extra space usage on others
|
||||
*/
|
||||
static uint64_t
|
||||
zio_roundup_alloc_size(spa_t *spa, uint64_t size)
|
||||
{
|
||||
if (size > spa->spa_min_alloc)
|
||||
return (roundup(size, spa->spa_gcd_alloc));
|
||||
return (spa->spa_min_alloc);
|
||||
}
|
||||
|
||||
/*
|
||||
* ==========================================================================
|
||||
* Prepare to read and write logical blocks
|
||||
@ -1762,8 +1775,9 @@ zio_write_compress(zio_t *zio)
|
||||
compress = ZIO_COMPRESS_OFF;
|
||||
|
||||
/* Make sure someone doesn't change their mind on overwrites */
|
||||
ASSERT(BP_IS_EMBEDDED(bp) || MIN(zp->zp_copies + BP_IS_GANG(bp),
|
||||
spa_max_replication(spa)) == BP_GET_NDVAS(bp));
|
||||
ASSERT(BP_IS_EMBEDDED(bp) || BP_IS_GANG(bp) ||
|
||||
MIN(zp->zp_copies, spa_max_replication(spa))
|
||||
== BP_GET_NDVAS(bp));
|
||||
}
|
||||
|
||||
/* If it's a compressed write that is not raw, compress the buffer. */
|
||||
@ -1802,9 +1816,8 @@ zio_write_compress(zio_t *zio)
|
||||
* in that we charge for the padding used to fill out
|
||||
* the last sector.
|
||||
*/
|
||||
ASSERT3U(spa->spa_min_alloc, >=, SPA_MINBLOCKSHIFT);
|
||||
size_t rounded = (size_t)roundup(psize,
|
||||
spa->spa_min_alloc);
|
||||
size_t rounded = (size_t)zio_roundup_alloc_size(spa,
|
||||
psize);
|
||||
if (rounded >= lsize) {
|
||||
compress = ZIO_COMPRESS_OFF;
|
||||
zio_buf_free(cbuf, lsize);
|
||||
@ -1847,8 +1860,8 @@ zio_write_compress(zio_t *zio)
|
||||
* take this codepath because it will change the on-disk block
|
||||
* and decryption will fail.
|
||||
*/
|
||||
size_t rounded = MIN((size_t)roundup(psize,
|
||||
spa->spa_min_alloc), lsize);
|
||||
size_t rounded = MIN((size_t)zio_roundup_alloc_size(spa, psize),
|
||||
lsize);
|
||||
|
||||
if (rounded != psize) {
|
||||
abd_t *cdata = abd_alloc_linear(rounded, B_TRUE);
|
||||
@ -3012,11 +3025,6 @@ zio_write_gang_block(zio_t *pio, metaslab_class_t *mc)
|
||||
*/
|
||||
pio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
|
||||
|
||||
/*
|
||||
* We didn't allocate this bp, so make sure it doesn't get unmarked.
|
||||
*/
|
||||
pio->io_flags &= ~ZIO_FLAG_FASTWRITE;
|
||||
|
||||
zio_nowait(zio);
|
||||
|
||||
return (pio);
|
||||
@ -3604,7 +3612,6 @@ zio_dva_allocate(zio_t *zio)
|
||||
ASSERT3U(zio->io_prop.zp_copies, <=, spa_max_replication(spa));
|
||||
ASSERT3U(zio->io_size, ==, BP_GET_PSIZE(bp));
|
||||
|
||||
flags |= (zio->io_flags & ZIO_FLAG_FASTWRITE) ? METASLAB_FASTWRITE : 0;
|
||||
if (zio->io_flags & ZIO_FLAG_NODATA)
|
||||
flags |= METASLAB_DONT_THROTTLE;
|
||||
if (zio->io_flags & ZIO_FLAG_GANG_CHILD)
|
||||
@ -3764,7 +3771,7 @@ zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, blkptr_t *new_bp,
|
||||
* of, so we just hash the objset ID to pick the allocator to get
|
||||
* some parallelism.
|
||||
*/
|
||||
int flags = METASLAB_FASTWRITE | METASLAB_ZIL;
|
||||
int flags = METASLAB_ZIL;
|
||||
int allocator = (uint_t)cityhash4(0, 0, 0,
|
||||
os->os_dsl_dataset->ds_object) % spa->spa_alloc_count;
|
||||
error = metaslab_alloc(spa, spa_log_class(spa), size, new_bp, 1,
|
||||
@ -4460,8 +4467,8 @@ zio_ready(zio_t *zio)
|
||||
zio_t *pio, *pio_next;
|
||||
zio_link_t *zl = NULL;
|
||||
|
||||
if (zio_wait_for_children(zio, ZIO_CHILD_GANG_BIT | ZIO_CHILD_DDT_BIT,
|
||||
ZIO_WAIT_READY)) {
|
||||
if (zio_wait_for_children(zio, ZIO_CHILD_LOGICAL_BIT |
|
||||
ZIO_CHILD_GANG_BIT | ZIO_CHILD_DDT_BIT, ZIO_WAIT_READY)) {
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
@ -4919,12 +4926,6 @@ zio_done(zio_t *zio)
|
||||
zfs_ereport_free_checksum(zcr);
|
||||
}
|
||||
|
||||
if (zio->io_flags & ZIO_FLAG_FASTWRITE && zio->io_bp &&
|
||||
!BP_IS_HOLE(zio->io_bp) && !BP_IS_EMBEDDED(zio->io_bp) &&
|
||||
!(zio->io_flags & ZIO_FLAG_NOPWRITE)) {
|
||||
metaslab_fastwrite_unmark(zio->io_spa, zio->io_bp);
|
||||
}
|
||||
|
||||
/*
|
||||
* It is the responsibility of the done callback to ensure that this
|
||||
* particular zio is no longer discoverable for adoption, and as
|
||||
|
@ -515,8 +515,6 @@ zio_checksum_error_impl(spa_t *spa, const blkptr_t *bp,
|
||||
}
|
||||
|
||||
if (info != NULL) {
|
||||
info->zbc_expected = expected_cksum;
|
||||
info->zbc_actual = actual_cksum;
|
||||
info->zbc_checksum_name = ci->ci_name;
|
||||
info->zbc_byteswapped = byteswap;
|
||||
info->zbc_injected = 0;
|
||||
|
@ -698,7 +698,6 @@ zvol_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf,
|
||||
int error;
|
||||
|
||||
ASSERT3P(lwb, !=, NULL);
|
||||
ASSERT3P(zio, !=, NULL);
|
||||
ASSERT3U(size, !=, 0);
|
||||
|
||||
zgd = kmem_zalloc(sizeof (zgd_t), KM_SLEEP);
|
||||
@ -717,6 +716,7 @@ zvol_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf,
|
||||
error = dmu_read_by_dnode(zv->zv_dn, offset, size, buf,
|
||||
DMU_READ_NO_PREFETCH);
|
||||
} else { /* indirect write */
|
||||
ASSERT3P(zio, !=, NULL);
|
||||
/*
|
||||
* Have to lock the whole block to ensure when it's written out
|
||||
* and its checksum is being calculated that no one can change
|
||||
@ -727,8 +727,8 @@ zvol_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf,
|
||||
offset = P2ALIGN_TYPED(offset, size, uint64_t);
|
||||
zgd->zgd_lr = zfs_rangelock_enter(&zv->zv_rangelock, offset,
|
||||
size, RL_READER);
|
||||
error = dmu_buf_hold_by_dnode(zv->zv_dn, offset, zgd, &db,
|
||||
DMU_READ_NO_PREFETCH);
|
||||
error = dmu_buf_hold_noread_by_dnode(zv->zv_dn, offset, zgd,
|
||||
&db);
|
||||
if (error == 0) {
|
||||
blkptr_t *bp = &lr->lr_blkptr;
|
||||
|
||||
@ -981,7 +981,7 @@ zvol_prefetch_minors_impl(void *arg)
|
||||
job->error = dmu_objset_own(dsname, DMU_OST_ZVOL, B_TRUE, B_TRUE,
|
||||
FTAG, &os);
|
||||
if (job->error == 0) {
|
||||
dmu_prefetch(os, ZVOL_OBJ, 0, 0, 0, ZIO_PRIORITY_SYNC_READ);
|
||||
dmu_prefetch_dnode(os, ZVOL_OBJ, ZIO_PRIORITY_SYNC_READ);
|
||||
dmu_objset_disown(os, B_TRUE, FTAG);
|
||||
}
|
||||
}
|
||||
|
@ -34,6 +34,17 @@ tags = ['functional', 'acl', 'posix-sa']
|
||||
tests = ['atime_003_pos', 'root_relatime_on']
|
||||
tags = ['functional', 'atime']
|
||||
|
||||
[tests/functional/block_cloning:Linux]
|
||||
tests = ['block_cloning_copyfilerange', 'block_cloning_copyfilerange_partial',
|
||||
'block_cloning_copyfilerange_fallback',
|
||||
'block_cloning_ficlone', 'block_cloning_ficlonerange',
|
||||
'block_cloning_ficlonerange_partial',
|
||||
'block_cloning_disabled_copyfilerange', 'block_cloning_disabled_ficlone',
|
||||
'block_cloning_disabled_ficlonerange',
|
||||
'block_cloning_copyfilerange_cross_dataset',
|
||||
'block_cloning_copyfilerange_fallback_same_txg']
|
||||
tags = ['functional', 'block_cloning']
|
||||
|
||||
[tests/functional/chattr:Linux]
|
||||
tests = ['chattr_001_pos', 'chattr_002_neg']
|
||||
tags = ['functional', 'chattr']
|
||||
|
@ -134,6 +134,12 @@ ci_reason = 'CI runner doesn\'t have all requirements'
|
||||
#
|
||||
idmap_reason = 'Idmapped mount needs kernel 5.12+'
|
||||
|
||||
#
|
||||
# copy_file_range() is not supported by all kernels
|
||||
#
|
||||
cfr_reason = 'Kernel copy_file_range support required'
|
||||
cfr_cross_reason = 'copy_file_range(2) cross-filesystem needs kernel 5.3+'
|
||||
|
||||
#
|
||||
# These tests are known to fail, thus we use this list to prevent these
|
||||
# failures from failing the job as a whole; only unexpected failures
|
||||
@ -288,6 +294,18 @@ elif sys.platform.startswith('linux'):
|
||||
'idmap_mount/idmap_mount_003': ['SKIP', idmap_reason],
|
||||
'idmap_mount/idmap_mount_004': ['SKIP', idmap_reason],
|
||||
'idmap_mount/idmap_mount_005': ['SKIP', idmap_reason],
|
||||
'block_cloning/block_cloning_disabled_copyfilerange':
|
||||
['SKIP', cfr_reason],
|
||||
'block_cloning/block_cloning_copyfilerange':
|
||||
['SKIP', cfr_reason],
|
||||
'block_cloning/block_cloning_copyfilerange_partial':
|
||||
['SKIP', cfr_reason],
|
||||
'block_cloning/block_cloning_copyfilerange_fallback':
|
||||
['SKIP', cfr_reason],
|
||||
'block_cloning/block_cloning_copyfilerange_cross_dataset':
|
||||
['SKIP', cfr_cross_reason],
|
||||
'block_cloning/block_cloning_copyfilerange_fallback_same_txg':
|
||||
['SKIP', cfr_cross_reason],
|
||||
})
|
||||
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
/badsend
|
||||
/btree_test
|
||||
/chg_usr_exec
|
||||
/clonefile
|
||||
/devname2devid
|
||||
/dir_rd_update
|
||||
/draid
|
||||
|
@ -119,6 +119,7 @@ scripts_zfs_tests_bin_PROGRAMS += %D%/renameat2
|
||||
scripts_zfs_tests_bin_PROGRAMS += %D%/xattrtest
|
||||
scripts_zfs_tests_bin_PROGRAMS += %D%/zed_fd_spill-zedlet
|
||||
scripts_zfs_tests_bin_PROGRAMS += %D%/idmap_util
|
||||
scripts_zfs_tests_bin_PROGRAMS += %D%/clonefile
|
||||
|
||||
%C%_idmap_util_LDADD = libspl.la
|
||||
|
||||
|
333
sys/contrib/openzfs/tests/zfs-tests/cmd/clonefile.c
Normal file
333
sys/contrib/openzfs/tests/zfs-tests/cmd/clonefile.c
Normal file
@ -0,0 +1,333 @@
|
||||
/*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Copyright (c) 2023, Rob Norris <robn@despairlabs.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal in the Software without restriction, including without limitation the
|
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
* sell copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This program is to test the availability and behaviour of copy_file_range,
|
||||
* FICLONE, FICLONERANGE and FIDEDUPERANGE in the Linux kernel. It should
|
||||
* compile and run even if these features aren't exposed through the libc.
|
||||
*/
|
||||
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
#include <stdint.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <stdlib.h>
|
||||
#include <limits.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
|
||||
#ifndef __NR_copy_file_range
|
||||
#if defined(__x86_64__)
|
||||
#define __NR_copy_file_range (326)
|
||||
#elif defined(__i386__)
|
||||
#define __NR_copy_file_range (377)
|
||||
#elif defined(__s390__)
|
||||
#define __NR_copy_file_range (375)
|
||||
#elif defined(__arm__)
|
||||
#define __NR_copy_file_range (391)
|
||||
#elif defined(__aarch64__)
|
||||
#define __NR_copy_file_range (285)
|
||||
#elif defined(__powerpc__)
|
||||
#define __NR_copy_file_range (379)
|
||||
#else
|
||||
#error "no definition of __NR_copy_file_range for this platform"
|
||||
#endif
|
||||
#endif /* __NR_copy_file_range */
|
||||
|
||||
ssize_t
|
||||
copy_file_range(int, loff_t *, int, loff_t *, size_t, unsigned int)
|
||||
__attribute__((weak));
|
||||
|
||||
static inline ssize_t
|
||||
cf_copy_file_range(int sfd, loff_t *soff, int dfd, loff_t *doff,
|
||||
size_t len, unsigned int flags)
|
||||
{
|
||||
if (copy_file_range)
|
||||
return (copy_file_range(sfd, soff, dfd, doff, len, flags));
|
||||
return (
|
||||
syscall(__NR_copy_file_range, sfd, soff, dfd, doff, len, flags));
|
||||
}
|
||||
|
||||
/* Define missing FICLONE */
|
||||
#ifdef FICLONE
|
||||
#define CF_FICLONE FICLONE
|
||||
#else
|
||||
#define CF_FICLONE _IOW(0x94, 9, int)
|
||||
#endif
|
||||
|
||||
/* Define missing FICLONERANGE and support structs */
|
||||
#ifdef FICLONERANGE
|
||||
#define CF_FICLONERANGE FICLONERANGE
|
||||
typedef struct file_clone_range cf_file_clone_range_t;
|
||||
#else
|
||||
typedef struct {
|
||||
int64_t src_fd;
|
||||
uint64_t src_offset;
|
||||
uint64_t src_length;
|
||||
uint64_t dest_offset;
|
||||
} cf_file_clone_range_t;
|
||||
#define CF_FICLONERANGE _IOW(0x94, 13, cf_file_clone_range_t)
|
||||
#endif
|
||||
|
||||
/* Define missing FIDEDUPERANGE and support structs */
|
||||
#ifdef FIDEDUPERANGE
|
||||
#define CF_FIDEDUPERANGE FIDEDUPERANGE
|
||||
#define CF_FILE_DEDUPE_RANGE_SAME FILE_DEDUPE_RANGE_SAME
|
||||
#define CF_FILE_DEDUPE_RANGE_DIFFERS FILE_DEDUPE_RANGE_DIFFERS
|
||||
typedef struct file_dedupe_range_info cf_file_dedupe_range_info_t;
|
||||
typedef struct file_dedupe_range cf_file_dedupe_range_t;
|
||||
#else
|
||||
typedef struct {
|
||||
int64_t dest_fd;
|
||||
uint64_t dest_offset;
|
||||
uint64_t bytes_deduped;
|
||||
int32_t status;
|
||||
uint32_t reserved;
|
||||
} cf_file_dedupe_range_info_t;
|
||||
typedef struct {
|
||||
uint64_t src_offset;
|
||||
uint64_t src_length;
|
||||
uint16_t dest_count;
|
||||
uint16_t reserved1;
|
||||
uint32_t reserved2;
|
||||
cf_file_dedupe_range_info_t info[0];
|
||||
} cf_file_dedupe_range_t;
|
||||
#define CF_FIDEDUPERANGE _IOWR(0x94, 54, cf_file_dedupe_range_t)
|
||||
#define CF_FILE_DEDUPE_RANGE_SAME (0)
|
||||
#define CF_FILE_DEDUPE_RANGE_DIFFERS (1)
|
||||
#endif
|
||||
|
||||
typedef enum {
|
||||
CF_MODE_NONE,
|
||||
CF_MODE_CLONE,
|
||||
CF_MODE_CLONERANGE,
|
||||
CF_MODE_COPYFILERANGE,
|
||||
CF_MODE_DEDUPERANGE,
|
||||
} cf_mode_t;
|
||||
|
||||
static int
|
||||
usage(void)
|
||||
{
|
||||
printf(
|
||||
"usage:\n"
|
||||
" FICLONE:\n"
|
||||
" clonefile -c <src> <dst>\n"
|
||||
" FICLONERANGE:\n"
|
||||
" clonefile -r <src> <dst> <soff> <doff> <len>\n"
|
||||
" copy_file_range:\n"
|
||||
" clonefile -f <src> <dst> <soff> <doff> <len>\n"
|
||||
" FIDEDUPERANGE:\n"
|
||||
" clonefile -d <src> <dst> <soff> <doff> <len>\n");
|
||||
return (1);
|
||||
}
|
||||
|
||||
int do_clone(int sfd, int dfd);
|
||||
int do_clonerange(int sfd, int dfd, loff_t soff, loff_t doff, size_t len);
|
||||
int do_copyfilerange(int sfd, int dfd, loff_t soff, loff_t doff, size_t len);
|
||||
int do_deduperange(int sfd, int dfd, loff_t soff, loff_t doff, size_t len);
|
||||
|
||||
int quiet = 0;
|
||||
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
cf_mode_t mode = CF_MODE_NONE;
|
||||
|
||||
char c;
|
||||
while ((c = getopt(argc, argv, "crfdq")) != -1) {
|
||||
switch (c) {
|
||||
case 'c':
|
||||
mode = CF_MODE_CLONE;
|
||||
break;
|
||||
case 'r':
|
||||
mode = CF_MODE_CLONERANGE;
|
||||
break;
|
||||
case 'f':
|
||||
mode = CF_MODE_COPYFILERANGE;
|
||||
break;
|
||||
case 'd':
|
||||
mode = CF_MODE_DEDUPERANGE;
|
||||
break;
|
||||
case 'q':
|
||||
quiet = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (mode == CF_MODE_NONE || (argc-optind) < 2 ||
|
||||
(mode != CF_MODE_CLONE && (argc-optind) < 5))
|
||||
return (usage());
|
||||
|
||||
loff_t soff = 0, doff = 0;
|
||||
size_t len = 0;
|
||||
if (mode != CF_MODE_CLONE) {
|
||||
soff = strtoull(argv[optind+2], NULL, 10);
|
||||
if (soff == ULLONG_MAX) {
|
||||
fprintf(stderr, "invalid source offset");
|
||||
return (1);
|
||||
}
|
||||
doff = strtoull(argv[optind+3], NULL, 10);
|
||||
if (doff == ULLONG_MAX) {
|
||||
fprintf(stderr, "invalid dest offset");
|
||||
return (1);
|
||||
}
|
||||
len = strtoull(argv[optind+4], NULL, 10);
|
||||
if (len == ULLONG_MAX) {
|
||||
fprintf(stderr, "invalid length");
|
||||
return (1);
|
||||
}
|
||||
}
|
||||
|
||||
int sfd = open(argv[optind], O_RDONLY);
|
||||
if (sfd < 0) {
|
||||
fprintf(stderr, "open: %s: %s\n",
|
||||
argv[optind], strerror(errno));
|
||||
return (1);
|
||||
}
|
||||
|
||||
int dfd = open(argv[optind+1], O_WRONLY|O_CREAT,
|
||||
S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH);
|
||||
if (dfd < 0) {
|
||||
fprintf(stderr, "open: %s: %s\n",
|
||||
argv[optind+1], strerror(errno));
|
||||
close(sfd);
|
||||
return (1);
|
||||
}
|
||||
|
||||
int err;
|
||||
switch (mode) {
|
||||
case CF_MODE_CLONE:
|
||||
err = do_clone(sfd, dfd);
|
||||
break;
|
||||
case CF_MODE_CLONERANGE:
|
||||
err = do_clonerange(sfd, dfd, soff, doff, len);
|
||||
break;
|
||||
case CF_MODE_COPYFILERANGE:
|
||||
err = do_copyfilerange(sfd, dfd, soff, doff, len);
|
||||
break;
|
||||
case CF_MODE_DEDUPERANGE:
|
||||
err = do_deduperange(sfd, dfd, soff, doff, len);
|
||||
break;
|
||||
default:
|
||||
abort();
|
||||
}
|
||||
|
||||
off_t spos = lseek(sfd, 0, SEEK_CUR);
|
||||
off_t slen = lseek(sfd, 0, SEEK_END);
|
||||
off_t dpos = lseek(dfd, 0, SEEK_CUR);
|
||||
off_t dlen = lseek(dfd, 0, SEEK_END);
|
||||
|
||||
fprintf(stderr, "file offsets: src=%lu/%lu; dst=%lu/%lu\n", spos, slen,
|
||||
dpos, dlen);
|
||||
|
||||
close(dfd);
|
||||
close(sfd);
|
||||
|
||||
return (err == 0 ? 0 : 1);
|
||||
}
|
||||
|
||||
int
|
||||
do_clone(int sfd, int dfd)
|
||||
{
|
||||
fprintf(stderr, "using FICLONE\n");
|
||||
int err = ioctl(dfd, CF_FICLONE, sfd);
|
||||
if (err < 0) {
|
||||
fprintf(stderr, "ioctl(FICLONE): %s\n", strerror(errno));
|
||||
return (err);
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
do_clonerange(int sfd, int dfd, loff_t soff, loff_t doff, size_t len)
|
||||
{
|
||||
fprintf(stderr, "using FICLONERANGE\n");
|
||||
cf_file_clone_range_t fcr = {
|
||||
.src_fd = sfd,
|
||||
.src_offset = soff,
|
||||
.src_length = len,
|
||||
.dest_offset = doff,
|
||||
};
|
||||
int err = ioctl(dfd, CF_FICLONERANGE, &fcr);
|
||||
if (err < 0) {
|
||||
fprintf(stderr, "ioctl(FICLONERANGE): %s\n", strerror(errno));
|
||||
return (err);
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
do_copyfilerange(int sfd, int dfd, loff_t soff, loff_t doff, size_t len)
|
||||
{
|
||||
fprintf(stderr, "using copy_file_range\n");
|
||||
ssize_t copied = cf_copy_file_range(sfd, &soff, dfd, &doff, len, 0);
|
||||
if (copied < 0) {
|
||||
fprintf(stderr, "copy_file_range: %s\n", strerror(errno));
|
||||
return (1);
|
||||
}
|
||||
if (copied != len) {
|
||||
fprintf(stderr, "copy_file_range: copied less than requested: "
|
||||
"requested=%lu; copied=%lu\n", len, copied);
|
||||
return (1);
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
do_deduperange(int sfd, int dfd, loff_t soff, loff_t doff, size_t len)
|
||||
{
|
||||
fprintf(stderr, "using FIDEDUPERANGE\n");
|
||||
|
||||
char buf[sizeof (cf_file_dedupe_range_t)+
|
||||
sizeof (cf_file_dedupe_range_info_t)] = {0};
|
||||
cf_file_dedupe_range_t *fdr = (cf_file_dedupe_range_t *)&buf[0];
|
||||
cf_file_dedupe_range_info_t *fdri =
|
||||
(cf_file_dedupe_range_info_t *)
|
||||
&buf[sizeof (cf_file_dedupe_range_t)];
|
||||
|
||||
fdr->src_offset = soff;
|
||||
fdr->src_length = len;
|
||||
fdr->dest_count = 1;
|
||||
|
||||
fdri->dest_fd = dfd;
|
||||
fdri->dest_offset = doff;
|
||||
|
||||
int err = ioctl(sfd, CF_FIDEDUPERANGE, fdr);
|
||||
if (err != 0)
|
||||
fprintf(stderr, "ioctl(FIDEDUPERANGE): %s\n", strerror(errno));
|
||||
|
||||
if (fdri->status < 0) {
|
||||
fprintf(stderr, "dedup failed: %s\n", strerror(-fdri->status));
|
||||
err = -1;
|
||||
} else if (fdri->status == CF_FILE_DEDUPE_RANGE_DIFFERS) {
|
||||
fprintf(stderr, "dedup failed: range differs\n");
|
||||
err = -1;
|
||||
}
|
||||
|
||||
return (err);
|
||||
}
|
@ -44,6 +44,7 @@
|
||||
#include <fcntl.h>
|
||||
#include <errno.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/types.h>
|
||||
#include <time.h>
|
||||
|
||||
int
|
||||
|
@ -182,6 +182,7 @@ export ZFS_FILES='zdb
|
||||
export ZFSTEST_FILES='badsend
|
||||
btree_test
|
||||
chg_usr_exec
|
||||
clonefile
|
||||
devname2devid
|
||||
dir_rd_update
|
||||
draid
|
||||
|
@ -90,6 +90,7 @@ nobase_dist_datadir_zfs_tests_tests_DATA += \
|
||||
functional/alloc_class/alloc_class.kshlib \
|
||||
functional/atime/atime.cfg \
|
||||
functional/atime/atime_common.kshlib \
|
||||
functional/block_cloning/block_cloning.kshlib \
|
||||
functional/cache/cache.cfg \
|
||||
functional/cache/cache.kshlib \
|
||||
functional/cachefile/cachefile.cfg \
|
||||
@ -437,6 +438,19 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
|
||||
functional/atime/root_atime_on.ksh \
|
||||
functional/atime/root_relatime_on.ksh \
|
||||
functional/atime/setup.ksh \
|
||||
functional/block_cloning/cleanup.ksh \
|
||||
functional/block_cloning/setup.ksh \
|
||||
functional/block_cloning/block_cloning_copyfilerange_cross_dataset.ksh \
|
||||
functional/block_cloning/block_cloning_copyfilerange_fallback.ksh \
|
||||
functional/block_cloning/block_cloning_copyfilerange_fallback_same_txg.ksh \
|
||||
functional/block_cloning/block_cloning_copyfilerange.ksh \
|
||||
functional/block_cloning/block_cloning_copyfilerange_partial.ksh \
|
||||
functional/block_cloning/block_cloning_disabled_copyfilerange.ksh \
|
||||
functional/block_cloning/block_cloning_disabled_ficlone.ksh \
|
||||
functional/block_cloning/block_cloning_disabled_ficlonerange.ksh \
|
||||
functional/block_cloning/block_cloning_ficlone.ksh \
|
||||
functional/block_cloning/block_cloning_ficlonerange.ksh \
|
||||
functional/block_cloning/block_cloning_ficlonerange_partial.ksh \
|
||||
functional/bootfs/bootfs_001_pos.ksh \
|
||||
functional/bootfs/bootfs_002_neg.ksh \
|
||||
functional/bootfs/bootfs_003_pos.ksh \
|
||||
|
@ -0,0 +1,54 @@
|
||||
#!/bin/ksh -p
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or https://opensource.org/licenses/CDDL-1.0.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2023, Klara Inc.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
|
||||
function have_same_content
|
||||
{
|
||||
typeset hash1=$(cat $1 | md5sum)
|
||||
typeset hash2=$(cat $2 | md5sum)
|
||||
|
||||
log_must [ "$hash1" = "$hash2" ]
|
||||
}
|
||||
|
||||
#
|
||||
# get_same_blocks dataset1 path/to/file1 dataset2 path/to/file2
|
||||
#
|
||||
# Returns a space-separated list of the indexes (starting at 0) of the L0
|
||||
# blocks that are shared between both files (by first DVA and checksum).
|
||||
# Assumes that the two files have the same content, use have_same_content to
|
||||
# confirm that.
|
||||
#
|
||||
function get_same_blocks
|
||||
{
|
||||
typeset zdbout=${TMPDIR:-$TEST_BASE_DIR}/zdbout.$$
|
||||
zdb -vvvvv $1 -O $2 | \
|
||||
awk '/ L0 / { print l++ " " $3 " " $7 }' > $zdbout.a
|
||||
zdb -vvvvv $3 -O $4 | \
|
||||
awk '/ L0 / { print l++ " " $3 " " $7 }' > $zdbout.b
|
||||
echo $(sort $zdbout.a $zdbout.b | uniq -d | cut -f1 -d' ')
|
||||
}
|
||||
|
@ -0,0 +1,60 @@
|
||||
#!/bin/ksh -p
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or https://opensource.org/licenses/CDDL-1.0.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2023, Klara Inc.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/block_cloning/block_cloning.kshlib
|
||||
|
||||
verify_runnable "global"
|
||||
|
||||
if [[ $(linux_version) -lt $(linux_version "4.5") ]]; then
|
||||
log_unsupported "copy_file_range not available before Linux 4.5"
|
||||
fi
|
||||
|
||||
claim="The copy_file_range syscall can clone whole files."
|
||||
|
||||
log_assert $claim
|
||||
|
||||
function cleanup
|
||||
{
|
||||
datasetexists $TESTPOOL && destroy_pool $TESTPOOL
|
||||
}
|
||||
|
||||
log_onexit cleanup
|
||||
|
||||
log_must zpool create -o feature@block_cloning=enabled $TESTPOOL $DISKS
|
||||
|
||||
log_must dd if=/dev/urandom of=/$TESTPOOL/file1 bs=128K count=4
|
||||
log_must sync_pool $TESTPOOL
|
||||
|
||||
log_must clonefile -f /$TESTPOOL/file1 /$TESTPOOL/file2 0 0 524288
|
||||
log_must sync_pool $TESTPOOL
|
||||
|
||||
log_must have_same_content /$TESTPOOL/file1 /$TESTPOOL/file2
|
||||
|
||||
typeset blocks=$(get_same_blocks $TESTPOOL file1 $TESTPOOL file2)
|
||||
log_must [ "$blocks" = "0 1 2 3" ]
|
||||
|
||||
log_pass $claim
|
@ -0,0 +1,65 @@
|
||||
#!/bin/ksh -p
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or https://opensource.org/licenses/CDDL-1.0.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2023, Klara Inc.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/block_cloning/block_cloning.kshlib
|
||||
|
||||
verify_runnable "global"
|
||||
|
||||
if [[ $(linux_version) -lt $(linux_version "5.3") ]]; then
|
||||
log_unsupported "copy_file_range can't copy cross-filesystem before Linux 5.3"
|
||||
fi
|
||||
|
||||
claim="The copy_file_range syscall can clone across datasets."
|
||||
|
||||
log_assert $claim
|
||||
|
||||
function cleanup
|
||||
{
|
||||
datasetexists $TESTPOOL && destroy_pool $TESTPOOL
|
||||
}
|
||||
|
||||
log_onexit cleanup
|
||||
|
||||
log_must zpool create -o feature@block_cloning=enabled $TESTPOOL $DISKS
|
||||
|
||||
log_must zfs create $TESTPOOL/$TESTFS1
|
||||
log_must zfs create $TESTPOOL/$TESTFS2
|
||||
|
||||
log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS1/file1 bs=128K count=4
|
||||
log_must sync_pool $TESTPOOL
|
||||
|
||||
log_must \
|
||||
clonefile -f /$TESTPOOL/$TESTFS1/file1 /$TESTPOOL/$TESTFS2/file2 0 0 524288
|
||||
log_must sync_pool $TESTPOOL
|
||||
|
||||
log_must have_same_content /$TESTPOOL/$TESTFS1/file1 /$TESTPOOL/$TESTFS2/file2
|
||||
|
||||
typeset blocks=$(get_same_blocks \
|
||||
$TESTPOOL/$TESTFS1 file1 $TESTPOOL/$TESTFS2 file2)
|
||||
log_must [ "$blocks" = "0 1 2 3" ]
|
||||
|
||||
log_pass $claim
|
@ -0,0 +1,86 @@
|
||||
#!/bin/ksh -p
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or https://opensource.org/licenses/CDDL-1.0.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2023, Klara Inc.
|
||||
# Copyright (c) 2023, Rob Norris <robn@despairlabs.com>
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/block_cloning/block_cloning.kshlib
|
||||
|
||||
verify_runnable "global"
|
||||
|
||||
if [[ $(linux_version) -lt $(linux_version "4.5") ]]; then
|
||||
log_unsupported "copy_file_range not available before Linux 4.5"
|
||||
fi
|
||||
|
||||
claim="copy_file_range will fall back to copy when cloning not possible."
|
||||
|
||||
log_assert $claim
|
||||
|
||||
function cleanup
|
||||
{
|
||||
datasetexists $TESTPOOL && destroy_pool $TESTPOOL
|
||||
}
|
||||
|
||||
log_onexit cleanup
|
||||
|
||||
log_must zpool create -o feature@block_cloning=enabled $TESTPOOL $DISKS
|
||||
|
||||
log_must dd if=/dev/urandom of=/$TESTPOOL/file bs=128K count=4
|
||||
log_must sync_pool $TESTPOOL
|
||||
|
||||
|
||||
log_note "Copying entire file with copy_file_range"
|
||||
|
||||
log_must clonefile -f /$TESTPOOL/file /$TESTPOOL/clone 0 0 524288
|
||||
log_must sync_pool $TESTPOOL
|
||||
|
||||
log_must have_same_content /$TESTPOOL/file /$TESTPOOL/clone
|
||||
|
||||
typeset blocks=$(get_same_blocks $TESTPOOL file $TESTPOOL clone)
|
||||
log_must [ "$blocks" = "0 1 2 3" ]
|
||||
|
||||
|
||||
log_note "Copying within a block with copy_file_range"
|
||||
|
||||
log_must clonefile -f /$TESTPOOL/file /$TESTPOOL/clone 32768 32768 65536
|
||||
log_must sync_pool $TESTPOOL
|
||||
|
||||
log_must have_same_content /$TESTPOOL/file /$TESTPOOL/clone
|
||||
|
||||
typeset blocks=$(get_same_blocks $TESTPOOL file $TESTPOOL clone)
|
||||
log_must [ "$blocks" = "1 2 3" ]
|
||||
|
||||
|
||||
log_note "Copying across a block with copy_file_range"
|
||||
|
||||
log_must clonefile -f /$TESTPOOL/file /$TESTPOOL/clone 327680 327680 131072
|
||||
log_must sync_pool $TESTPOOL
|
||||
|
||||
log_must have_same_content /$TESTPOOL/file /$TESTPOOL/clone
|
||||
|
||||
typeset blocks=$(get_same_blocks $TESTPOOL file $TESTPOOL clone)
|
||||
log_must [ "$blocks" = "1" ]
|
||||
|
||||
log_pass $claim
|
@ -0,0 +1,66 @@
|
||||
#!/bin/ksh -p
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or https://opensource.org/licenses/CDDL-1.0.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2023, Klara Inc.
|
||||
# Copyright (c) 2023, Rob Norris <robn@despairlabs.com>
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/block_cloning/block_cloning.kshlib
|
||||
|
||||
verify_runnable "global"
|
||||
|
||||
if [[ $(linux_version) -lt $(linux_version "4.5") ]]; then
|
||||
log_unsupported "copy_file_range not available before Linux 4.5"
|
||||
fi
|
||||
|
||||
claim="copy_file_range will fall back to copy when cloning on same txg"
|
||||
|
||||
log_assert $claim
|
||||
|
||||
typeset timeout=$(get_tunable TXG_TIMEOUT)
|
||||
|
||||
function cleanup
|
||||
{
|
||||
datasetexists $TESTPOOL && destroy_pool $TESTPOOL
|
||||
set_tunable64 TXG_TIMEOUT $timeout
|
||||
}
|
||||
|
||||
log_onexit cleanup
|
||||
|
||||
log_must zpool create -o feature@block_cloning=enabled $TESTPOOL $DISKS
|
||||
|
||||
log_must set_tunable64 TXG_TIMEOUT 5000
|
||||
|
||||
log_must dd if=/dev/urandom of=/$TESTPOOL/file bs=128K count=4
|
||||
log_must clonefile -f /$TESTPOOL/file /$TESTPOOL/clone 0 0 524288
|
||||
|
||||
log_must sync_pool $TESTPOOL
|
||||
|
||||
log_must have_same_content /$TESTPOOL/file /$TESTPOOL/clone
|
||||
|
||||
typeset blocks=$(get_same_blocks $TESTPOOL file $TESTPOOL clone)
|
||||
log_must [ "$blocks" = "" ]
|
||||
|
||||
log_pass $claim
|
||||
|
@ -0,0 +1,68 @@
|
||||
#!/bin/ksh -p
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or https://opensource.org/licenses/CDDL-1.0.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2023, Klara Inc.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/block_cloning/block_cloning.kshlib
|
||||
|
||||
verify_runnable "global"
|
||||
|
||||
if [[ $(linux_version) -lt $(linux_version "4.5") ]]; then
|
||||
log_unsupported "copy_file_range not available before Linux 4.5"
|
||||
fi
|
||||
|
||||
claim="The copy_file_range syscall can clone parts of a file."
|
||||
|
||||
log_assert $claim
|
||||
|
||||
function cleanup
|
||||
{
|
||||
datasetexists $TESTPOOL && destroy_pool $TESTPOOL
|
||||
}
|
||||
|
||||
log_onexit cleanup
|
||||
|
||||
log_must zpool create -o feature@block_cloning=enabled $TESTPOOL $DISKS
|
||||
|
||||
log_must dd if=/dev/urandom of=/$TESTPOOL/file1 bs=128K count=4
|
||||
log_must sync_pool $TESTPOOL
|
||||
|
||||
log_must dd if=/$TESTPOOL/file1 of=/$TESTPOOL/file2 bs=128K count=4
|
||||
log_must sync_pool $TESTPOOL
|
||||
|
||||
log_must have_same_content /$TESTPOOL/file1 /$TESTPOOL/file2
|
||||
|
||||
typeset blocks=$(get_same_blocks $TESTPOOL file1 $TESTPOOL file2)
|
||||
log_must [ "$blocks" = "" ]
|
||||
|
||||
log_must clonefile -f /$TESTPOOL/file1 /$TESTPOOL/file2 131072 131072 262144
|
||||
log_must sync_pool $TESTPOOL
|
||||
|
||||
log_must have_same_content /$TESTPOOL/file1 /$TESTPOOL/file2
|
||||
|
||||
typeset blocks=$(get_same_blocks $TESTPOOL file1 $TESTPOOL file2)
|
||||
log_must [ "$blocks" = "1 2" ]
|
||||
|
||||
log_pass $claim
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user