mirror of
https://git.FreeBSD.org/src.git
synced 2024-10-18 02:19:39 +00:00
zfsd: fault disks that generate too many I/O delay events
If ZFS reports that a disk had at least 8 I/O operations over 60s that were each delayed by at least 30s (implying a queue depth > 4 or I/O aggregation, obviously), fault that disk. Disks that respond this slowly can degrade the entire system's performance. MFC after: 2 weeks Sponsored by: Axcient Reviewed by: delphij Differential Revision: https://reviews.freebsd.org/D42825
This commit is contained in:
parent
272a406042
commit
d565784a7e
@ -470,7 +470,8 @@ CaseFile::ReEvaluate(const ZfsEvent &event)
|
||||
consumed = true;
|
||||
}
|
||||
else if (event.Value("class") == "ereport.fs.zfs.io" ||
|
||||
event.Value("class") == "ereport.fs.zfs.checksum") {
|
||||
event.Value("class") == "ereport.fs.zfs.checksum" ||
|
||||
event.Value("class") == "ereport.fs.zfs.delay") {
|
||||
|
||||
m_tentativeEvents.push_front(event.DeepCopy());
|
||||
RegisterCallout(event);
|
||||
@ -1171,6 +1172,13 @@ IsIOEvent(const Event* const event)
|
||||
return ("ereport.fs.zfs.io" == event->Value("type"));
|
||||
}
|
||||
|
||||
/* Does the argument event refer to an IO delay? */
|
||||
static bool
|
||||
IsDelayEvent(const Event* const event)
|
||||
{
|
||||
return ("ereport.fs.zfs.delay" == event->Value("type"));
|
||||
}
|
||||
|
||||
bool
|
||||
CaseFile::ShouldDegrade() const
|
||||
{
|
||||
@ -1181,8 +1189,14 @@ CaseFile::ShouldDegrade() const
|
||||
bool
|
||||
CaseFile::ShouldFault() const
|
||||
{
|
||||
return (std::count_if(m_events.begin(), m_events.end(),
|
||||
IsIOEvent) > ZFS_DEGRADE_IO_COUNT);
|
||||
bool should_fault_for_io, should_fault_for_delay;
|
||||
|
||||
should_fault_for_io = std::count_if(m_events.begin(), m_events.end(),
|
||||
IsIOEvent) > ZFS_DEGRADE_IO_COUNT;
|
||||
should_fault_for_delay = std::count_if(m_events.begin(), m_events.end(),
|
||||
IsDelayEvent) > ZFS_FAULT_DELAY_COUNT;
|
||||
|
||||
return (should_fault_for_io || should_fault_for_delay);
|
||||
}
|
||||
|
||||
nvlist_t *
|
||||
|
@ -242,7 +242,11 @@ class CaseFile
|
||||
* to transition a vdev from healthy to degraded
|
||||
* status.
|
||||
*/
|
||||
ZFS_DEGRADE_IO_COUNT = 50
|
||||
ZFS_DEGRADE_IO_COUNT = 50,
|
||||
/**
|
||||
* The number of delay errors on a vdev required to fault it
|
||||
*/
|
||||
ZFS_FAULT_DELAY_COUNT = 8,
|
||||
};
|
||||
|
||||
static CalloutFunc_t OnGracePeriodEnded;
|
||||
|
@ -519,6 +519,72 @@ TEST_F(CaseFileTest, PoolDestroy)
|
||||
EXPECT_TRUE(m_caseFile->ReEvaluate(*zfs_event));
|
||||
}
|
||||
|
||||
/*
|
||||
* A Vdev with a very large number of Delay errors should fault
|
||||
* For performance reasons, RefreshVdevState should be called at most once
|
||||
*/
|
||||
TEST_F(CaseFileTest, VeryManyDelayErrors)
|
||||
{
|
||||
EXPECT_CALL(*m_caseFile, RefreshVdevState())
|
||||
.Times(::testing::AtMost(1))
|
||||
.WillRepeatedly(::testing::Return(true));
|
||||
|
||||
for(int i=0; i<100; i++) {
|
||||
stringstream evStringStream;
|
||||
evStringStream <<
|
||||
"!system=ZFS "
|
||||
"class=ereport.fs.zfs.delay "
|
||||
"ena=12091638756982918145 "
|
||||
"parent_guid=13237004955564865395 "
|
||||
"parent_type=raidz "
|
||||
"pool=testpool.4415 "
|
||||
"pool_context=0 "
|
||||
"pool_failmode=wait "
|
||||
"pool_guid=456 "
|
||||
"pool_state= 0"
|
||||
"subsystem=ZFS "
|
||||
"time=";
|
||||
evStringStream << i << "0000000000000000 ";
|
||||
evStringStream << "timestamp=" << i << " ";
|
||||
evStringStream <<
|
||||
"type=ereport.fs.zfs.delay "
|
||||
"vdev_ashift=12 "
|
||||
"vdev_cksum_errors=0 "
|
||||
"vdev_complete_ts=948336226469 "
|
||||
"vdev_delays=77 "
|
||||
"vdev_delta_ts=123998485899 "
|
||||
"vdev_guid=123 "
|
||||
"vdev_path=/dev/da400 "
|
||||
"vdev_read_errors=0 "
|
||||
"vdev_spare_guids= "
|
||||
"vdev_type=disk "
|
||||
"vdev_write_errors=0 "
|
||||
"zio_blkid=622 "
|
||||
"zio_delay=31000041101 "
|
||||
"zio_delta=123998485899 "
|
||||
"zio_err=0 "
|
||||
"zio_flags=1572992 "
|
||||
"zio_level=-2 "
|
||||
"zio_object=0 "
|
||||
"zio_objset=37 "
|
||||
"zio_offset=25598976 "
|
||||
"zio_pipeline=48234496 "
|
||||
"zio_priority=3 "
|
||||
"zio_size=1024"
|
||||
"zio_stage=33554432 "
|
||||
"zio_timestamp=824337740570 ";
|
||||
Event *event(Event::CreateEvent(*m_eventFactory,
|
||||
evStringStream.str()));
|
||||
ZfsEvent *zfs_event = static_cast<ZfsEvent*>(event);
|
||||
EXPECT_TRUE(m_caseFile->ReEvaluate(*zfs_event));
|
||||
delete event;
|
||||
}
|
||||
|
||||
m_caseFile->SpliceEvents();
|
||||
EXPECT_FALSE(m_caseFile->ShouldDegrade());
|
||||
EXPECT_TRUE(m_caseFile->ShouldFault());
|
||||
}
|
||||
|
||||
/*
|
||||
* A Vdev with a very large number of IO errors should fault
|
||||
* For performance reasons, RefreshVdevState should be called at most once
|
||||
|
@ -21,6 +21,7 @@ ${PACKAGE}FILES+= zfsd_autoreplace_003_pos.ksh
|
||||
${PACKAGE}FILES+= zfsd_degrade_001_pos.ksh
|
||||
${PACKAGE}FILES+= zfsd_degrade_002_pos.ksh
|
||||
${PACKAGE}FILES+= zfsd_fault_001_pos.ksh
|
||||
${PACKAGE}FILES+= zfsd_fault_002_pos.ksh
|
||||
${PACKAGE}FILES+= zfsd_hotspare_001_pos.ksh
|
||||
${PACKAGE}FILES+= zfsd_hotspare_002_pos.ksh
|
||||
${PACKAGE}FILES+= zfsd_hotspare_003_pos.ksh
|
||||
|
99
tests/sys/cddl/zfs/tests/zfsd/zfsd_fault_002_pos.ksh
Normal file
99
tests/sys/cddl/zfs/tests/zfsd/zfsd_fault_002_pos.ksh
Normal file
@ -0,0 +1,99 @@
|
||||
#!/usr/local/bin/ksh93 -p
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or http://www.opensolaris.org/os/licensing.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2023 Axcient. All rights reserved.
|
||||
# Use is subject to license terms.
|
||||
#
|
||||
# $FreeBSD$
|
||||
|
||||
. $STF_SUITE/include/libtest.kshlib
|
||||
. $STF_SUITE/include/libgnop.kshlib
|
||||
|
||||
################################################################################
|
||||
#
|
||||
# __stc_assertion_start
|
||||
#
|
||||
# ID: zfsd_fault_002_pos
|
||||
#
|
||||
# DESCRIPTION:
|
||||
# If a vdev experiences delayed I/O, it will become faulted.
|
||||
#
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Create a storage pool. Use gnop vdevs so we can inject I/O delays.
|
||||
# 2. Inject IO delays while doing IO to the pool.
|
||||
# 3. Verify that the vdev becomes FAULTED.
|
||||
# 4. ONLINE it and verify that it resilvers and joins the pool.
|
||||
#
|
||||
# TESTABILITY: explicit
|
||||
#
|
||||
# TEST_AUTOMATION_LEVEL: automated
|
||||
#
|
||||
# __stc_assertion_end
|
||||
#
|
||||
###############################################################################
|
||||
|
||||
verify_runnable "global"
|
||||
|
||||
log_assert "ZFS will fault a vdev that experiences delayed I/O"
|
||||
|
||||
ensure_zfsd_running
|
||||
|
||||
DISK0_NOP=${DISK0}.nop
|
||||
DISK1_NOP=${DISK1}.nop
|
||||
|
||||
log_must create_gnops $DISK0 $DISK1
|
||||
|
||||
for type in "raidz" "mirror"; do
|
||||
log_note "Testing raid type $type"
|
||||
|
||||
# Create a pool on the supplied disks
|
||||
create_pool $TESTPOOL $type "$DISK0_NOP" "$DISK1_NOP"
|
||||
log_must $ZFS create $TESTPOOL/$TESTFS
|
||||
|
||||
# Cause some IO delays writing to the pool
|
||||
while true; do
|
||||
# ZFS currently considers an I/O to be "slow" if it's delayed
|
||||
# for 30 seconds (zio_slow_io_ms).
|
||||
log_must gnop configure -d 31000 -x 100 "$DISK1_NOP"
|
||||
$DD if=/dev/zero bs=128k count=1 >> \
|
||||
/$TESTPOOL/$TESTFS/$TESTFILE 2> /dev/null
|
||||
$FSYNC /$TESTPOOL/$TESTFS/$TESTFILE
|
||||
# Check to see if the pool is faulted yet
|
||||
$ZPOOL status $TESTPOOL | grep -q 'state: DEGRADED'
|
||||
if [ $? == 0 ]
|
||||
then
|
||||
log_note "$TESTPOOL got degraded"
|
||||
$ZPOOL status -s $TESTPOOL
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
log_must check_state $TESTPOOL $TMPDISK "FAULTED"
|
||||
|
||||
log_must gnop configure -x 0 "$DISK1_NOP"
|
||||
destroy_pool $TESTPOOL
|
||||
log_must $RM -rf /$TESTPOOL
|
||||
done
|
||||
|
||||
log_pass
|
@ -56,6 +56,37 @@ zfsd_fault_001_pos_cleanup()
|
||||
}
|
||||
|
||||
|
||||
atf_test_case zfsd_fault_002_pos cleanup
|
||||
zfsd_fault_002_pos_head()
|
||||
{
|
||||
atf_set "descr" "ZFS will fault a vdev that experiences delayed I/O"
|
||||
atf_set "require.progs" "ksh93 gnop zfs zpool zfsd"
|
||||
atf_set "timeout" 300
|
||||
}
|
||||
zfsd_fault_002_pos_body()
|
||||
{
|
||||
. $(atf_get_srcdir)/../../include/default.cfg
|
||||
. $(atf_get_srcdir)/../hotspare/hotspare.kshlib
|
||||
. $(atf_get_srcdir)/../hotspare/hotspare.cfg
|
||||
. $(atf_get_srcdir)/zfsd.cfg
|
||||
|
||||
verify_disk_count "$DISKS" 2
|
||||
verify_zfsd_running
|
||||
ksh93 $(atf_get_srcdir)/setup.ksh || atf_fail "Setup failed"
|
||||
ksh93 $(atf_get_srcdir)/zfsd_fault_002_pos.ksh
|
||||
if [[ $? != 0 ]]; then
|
||||
save_artifacts
|
||||
atf_fail "Testcase failed"
|
||||
fi
|
||||
}
|
||||
zfsd_fault_002_pos_cleanup()
|
||||
{
|
||||
. $(atf_get_srcdir)/../../include/default.cfg
|
||||
. $(atf_get_srcdir)/zfsd.cfg
|
||||
|
||||
ksh93 $(atf_get_srcdir)/cleanup.ksh || atf_fail "Cleanup failed"
|
||||
}
|
||||
|
||||
atf_test_case zfsd_degrade_001_pos cleanup
|
||||
zfsd_degrade_001_pos_head()
|
||||
{
|
||||
@ -631,6 +662,7 @@ zfsd_import_001_pos_cleanup()
|
||||
atf_init_test_cases()
|
||||
{
|
||||
atf_add_test_case zfsd_fault_001_pos
|
||||
atf_add_test_case zfsd_fault_002_pos
|
||||
atf_add_test_case zfsd_degrade_001_pos
|
||||
atf_add_test_case zfsd_degrade_002_pos
|
||||
atf_add_test_case zfsd_hotspare_001_pos
|
||||
|
Loading…
Reference in New Issue
Block a user