From b8dfc8b08da4a7a388b6193bea9816fd65c155f1 Mon Sep 17 00:00:00 2001 From: Mark Johnston Date: Thu, 8 Oct 2015 04:29:39 +0000 Subject: [PATCH 1/2] 6271 dtrace caused excessive fork time Author: Bryan Cantrill Reviewed by: Adam Leventhal Reviewed by: Dan McDonald Reviewed by: Richard Lowe Approved by: Gordon Ross illumos/illumos-gate@7bd3c1d12d0c764e1517c3aca62c634409356764 --- .../tst/common/usdt/tst.sameprovmulti.ksh | 99 +++++++++++++++++++ .../tst/common/usdt/tst.sameprovmulti.ksh.out | 4 + uts/common/dtrace/dtrace.c | 11 ++- uts/common/dtrace/fasttrap.c | 28 +++--- uts/common/sys/dtrace.h | 18 ++-- 5 files changed, 136 insertions(+), 24 deletions(-) create mode 100644 cmd/dtrace/test/tst/common/usdt/tst.sameprovmulti.ksh create mode 100644 cmd/dtrace/test/tst/common/usdt/tst.sameprovmulti.ksh.out diff --git a/cmd/dtrace/test/tst/common/usdt/tst.sameprovmulti.ksh b/cmd/dtrace/test/tst/common/usdt/tst.sameprovmulti.ksh new file mode 100644 index 000000000000..04b76207ea7a --- /dev/null +++ b/cmd/dtrace/test/tst/common/usdt/tst.sameprovmulti.ksh @@ -0,0 +1,99 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2015, Joyent, Inc. All rights reserved. +# + +# +# This test assures that we can have the same provider name across multiple +# probe definitions, and that the result will be the union of those +# definitions. In particular, libusdt depends on this when (for example) +# node modules that create a provider are loaded multiple times due to +# being included by different modules. +# + +if [ $# != 1 ]; then + echo expected one argument: '<'dtrace-path'>' + exit 2 +fi + +dtrace=$1 +DIR=/var/tmp/dtest.$$ + +mkdir $DIR +cd $DIR + +cat > test.c < + +void +main() +{ +EOF + +objs= + +for oogle in bagnoogle stalloogle cockoogle; do + cat > $oogle.c < + +void +$oogle() +{ + DTRACE_PROBE(doogle, $oogle); +} +EOF + + cat > $oogle.d <> test.c +done + +echo "}" >> test.c + +gcc -m32 -o test test.c $objs + +if [ $? -ne 0 ]; then + print -u2 "failed to compile test.c" + exit 1 +fi + +$dtrace -n 'doogle$target:::{@[probename] = count()}' \ + -n 'END{printa("%-10s %@d\n", @)}' -x quiet -x aggsortkey -Zc ./test + +if [ $? -ne 0 ]; then + print -u2 "failed to execute test" + exit 1 +fi + +cd / +/usr/bin/rm -rf $DIR +exit 0 diff --git a/cmd/dtrace/test/tst/common/usdt/tst.sameprovmulti.ksh.out b/cmd/dtrace/test/tst/common/usdt/tst.sameprovmulti.ksh.out new file mode 100644 index 000000000000..bdeeb1ef29b9 --- /dev/null +++ b/cmd/dtrace/test/tst/common/usdt/tst.sameprovmulti.ksh.out @@ -0,0 +1,4 @@ +bagnoogle 1 +cockoogle 1 +stalloogle 1 + diff --git a/uts/common/dtrace/dtrace.c b/uts/common/dtrace/dtrace.c index f2c3fec010dd..533ce81ab41c 100644 --- a/uts/common/dtrace/dtrace.c +++ b/uts/common/dtrace/dtrace.c @@ -14809,8 +14809,8 @@ dtrace_helper_provider_add(dof_helper_t *dofhp, int gen) * Check to make sure this isn't a duplicate. */ for (i = 0; i < help->dthps_nprovs; i++) { - if (dofhp->dofhp_dof == - help->dthps_provs[i]->dthp_prov.dofhp_dof) + if (dofhp->dofhp_addr == + help->dthps_provs[i]->dthp_prov.dofhp_addr) return (EALREADY); } @@ -15162,7 +15162,14 @@ dtrace_helper_slurp(dof_hdr_t *dof, dof_helper_t *dhp) dtrace_enabling_destroy(enab); if (dhp != NULL && nprovs > 0) { + /* + * Now that this is in-kernel, we change the sense of the + * members: dofhp_dof denotes the in-kernel copy of the DOF + * and dofhp_addr denotes the address at user-level. + */ + dhp->dofhp_addr = dhp->dofhp_dof; dhp->dofhp_dof = (uint64_t)(uintptr_t)dof; + if (dtrace_helper_provider_add(dhp, gen) == 0) { mutex_exit(&dtrace_lock); dtrace_helper_provider_register(curproc, help, dhp); diff --git a/uts/common/dtrace/fasttrap.c b/uts/common/dtrace/fasttrap.c index 215b1a411142..3948dd258eab 100644 --- a/uts/common/dtrace/fasttrap.c +++ b/uts/common/dtrace/fasttrap.c @@ -1784,6 +1784,18 @@ fasttrap_meta_provide(void *arg, dtrace_helper_provdesc_t *dhpv, pid_t pid) return (provider); } +/* + * We know a few things about our context here: we know that the probe being + * created doesn't already exist (DTrace won't load DOF at the same address + * twice, even if explicitly told to do so) and we know that we are + * single-threaded with respect to the meta provider machinery. Knowing that + * this is a new probe and that there is no way for us to race with another + * operation on this provider allows us an important optimization: we need not + * lookup a probe before adding it. Saving this lookup is important because + * this code is in the fork path for processes with USDT probes, and lookups + * here are potentially very expensive because of long hash conflicts on + * module, function and name (DTrace doesn't hash on provider name). + */ /*ARGSUSED*/ static void fasttrap_meta_create_probe(void *arg, void *parg, @@ -1820,19 +1832,6 @@ fasttrap_meta_create_probe(void *arg, void *parg, return; } - /* - * Grab the creation lock to ensure consistency between calls to - * dtrace_probe_lookup() and dtrace_probe_create() in the face of - * other threads creating probes. - */ - mutex_enter(&provider->ftp_cmtx); - - if (dtrace_probe_lookup(provider->ftp_provid, dhpb->dthpb_mod, - dhpb->dthpb_func, dhpb->dthpb_name) != 0) { - mutex_exit(&provider->ftp_cmtx); - return; - } - ntps = dhpb->dthpb_noffs + dhpb->dthpb_nenoffs; ASSERT(ntps > 0); @@ -1840,7 +1839,6 @@ fasttrap_meta_create_probe(void *arg, void *parg, if (fasttrap_total > fasttrap_max) { atomic_add_32(&fasttrap_total, -ntps); - mutex_exit(&provider->ftp_cmtx); return; } @@ -1904,8 +1902,6 @@ fasttrap_meta_create_probe(void *arg, void *parg, */ pp->ftp_id = dtrace_probe_create(provider->ftp_provid, dhpb->dthpb_mod, dhpb->dthpb_func, dhpb->dthpb_name, FASTTRAP_OFFSET_AFRAMES, pp); - - mutex_exit(&provider->ftp_cmtx); } /*ARGSUSED*/ diff --git a/uts/common/sys/dtrace.h b/uts/common/sys/dtrace.h index 3b1869a4ff89..b1bb6d8dd2c6 100644 --- a/uts/common/sys/dtrace.h +++ b/uts/common/sys/dtrace.h @@ -2131,12 +2131,18 @@ extern void dtrace_probe(dtrace_id_t, uintptr_t arg0, uintptr_t arg1, * * 1.2.4 Caller's context * - * dtms_create_probe() is called from either ioctl() or module load context. - * The DTrace framework is locked in such a way that meta providers may not - * register or unregister. This means that the meta provider cannot call - * dtrace_meta_register() or dtrace_meta_unregister(). However, the context is - * such that the provider may (and is expected to) call provider-related - * DTrace provider APIs including dtrace_probe_create(). + * dtms_create_probe() is called from either ioctl() or module load context + * in the context of a newly-created provider (that is, a provider that + * is a result of a call to dtms_provide_pid()). The DTrace framework is + * locked in such a way that meta providers may not register or unregister, + * such that no other thread can call into a meta provider operation and that + * atomicity is assured with respect to meta provider operations across + * dtms_provide_pid() and subsequent calls to dtms_create_probe(). + * The context is thus effectively single-threaded with respect to the meta + * provider, and that the meta provider cannot call dtrace_meta_register() + * or dtrace_meta_unregister(). However, the context is such that the + * provider may (and is expected to) call provider-related DTrace provider + * APIs including dtrace_probe_create(). * * 1.3 void *dtms_provide_pid(void *arg, dtrace_meta_provider_t *mprov, * pid_t pid) From 1138983b227dae833b3c23cc94672a8961cdbbdf Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Mon, 12 Oct 2015 14:23:00 +0000 Subject: [PATCH 2/2] 6250 zvol_dump_init() can hold txg open Reviewed by: Matthew Ahrens Reviewed by: Prakash Surya Reviewed by: Albert Lee Reviewed by: Xin Li Approved by: Garrett D'Amore Author: George Wilson illumos/illumos-gate@b10bba72460aeaa53119c76ff5e647fd5585bece --- uts/common/fs/zfs/zvol.c | 107 +++++++++++++++++++++++---------------- 1 file changed, 63 insertions(+), 44 deletions(-) diff --git a/uts/common/fs/zfs/zvol.c b/uts/common/fs/zfs/zvol.c index 101566a1ea55..585500bbac86 100644 --- a/uts/common/fs/zfs/zvol.c +++ b/uts/common/fs/zfs/zvol.c @@ -1893,13 +1893,15 @@ zvol_dump_init(zvol_state_t *zv, boolean_t resize) vdev_t *vd = spa->spa_root_vdev; nvlist_t *nv = NULL; uint64_t version = spa_version(spa); - enum zio_checksum checksum; + uint64_t checksum, compress, refresrv, vbs, dedup; ASSERT(MUTEX_HELD(&zfsdev_state_lock)); ASSERT(vd->vdev_ops == &vdev_root_ops); error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, 0, DMU_OBJECT_END); + if (error != 0) + return (error); /* wait for dmu_free_long_range to actually free the blocks */ txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0); @@ -1923,23 +1925,41 @@ zvol_dump_init(zvol_state_t *zv, boolean_t resize) 2, ZFS_SPACE_CHECK_RESERVED); } + if (!resize) { + error = dsl_prop_get_integer(zv->zv_name, + zfs_prop_to_name(ZFS_PROP_COMPRESSION), &compress, NULL); + if (error == 0) { + error = dsl_prop_get_integer(zv->zv_name, + zfs_prop_to_name(ZFS_PROP_CHECKSUM), &checksum, + NULL); + } + if (error == 0) { + error = dsl_prop_get_integer(zv->zv_name, + zfs_prop_to_name(ZFS_PROP_REFRESERVATION), + &refresrv, NULL); + } + if (error == 0) { + error = dsl_prop_get_integer(zv->zv_name, + zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), &vbs, + NULL); + } + if (version >= SPA_VERSION_DEDUP && error == 0) { + error = dsl_prop_get_integer(zv->zv_name, + zfs_prop_to_name(ZFS_PROP_DEDUP), &dedup, NULL); + } + } + if (error != 0) + return (error); + tx = dmu_tx_create(os); dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); dmu_tx_hold_bonus(tx, ZVOL_OBJ); error = dmu_tx_assign(tx, TXG_WAIT); - if (error) { + if (error != 0) { dmu_tx_abort(tx); return (error); } - /* - * If MULTI_VDEV_CRASH_DUMP is active, use the NOPARITY checksum - * function. Otherwise, use the old default -- OFF. - */ - checksum = spa_feature_is_active(spa, - SPA_FEATURE_MULTI_VDEV_CRASH_DUMP) ? ZIO_CHECKSUM_NOPARITY : - ZIO_CHECKSUM_OFF; - /* * If we are resizing the dump device then we only need to * update the refreservation to match the newly updated @@ -1951,37 +1971,30 @@ zvol_dump_init(zvol_state_t *zv, boolean_t resize) zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 8, 1, &zv->zv_volsize, tx); } else { - uint64_t checksum, compress, refresrv, vbs, dedup; - - error = dsl_prop_get_integer(zv->zv_name, - zfs_prop_to_name(ZFS_PROP_COMPRESSION), &compress, NULL); - error = error ? error : dsl_prop_get_integer(zv->zv_name, - zfs_prop_to_name(ZFS_PROP_CHECKSUM), &checksum, NULL); - error = error ? error : dsl_prop_get_integer(zv->zv_name, - zfs_prop_to_name(ZFS_PROP_REFRESERVATION), &refresrv, NULL); - error = error ? error : dsl_prop_get_integer(zv->zv_name, - zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), &vbs, NULL); - if (version >= SPA_VERSION_DEDUP) { - error = error ? error : - dsl_prop_get_integer(zv->zv_name, - zfs_prop_to_name(ZFS_PROP_DEDUP), &dedup, NULL); - } - - error = error ? error : zap_update(os, ZVOL_ZAP_OBJ, + error = zap_update(os, ZVOL_ZAP_OBJ, zfs_prop_to_name(ZFS_PROP_COMPRESSION), 8, 1, &compress, tx); - error = error ? error : zap_update(os, ZVOL_ZAP_OBJ, - zfs_prop_to_name(ZFS_PROP_CHECKSUM), 8, 1, &checksum, tx); - error = error ? error : zap_update(os, ZVOL_ZAP_OBJ, - zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 8, 1, - &refresrv, tx); - error = error ? error : zap_update(os, ZVOL_ZAP_OBJ, - zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), 8, 1, - &vbs, tx); - error = error ? error : dmu_object_set_blocksize( - os, ZVOL_OBJ, SPA_OLD_MAXBLOCKSIZE, 0, tx); - if (version >= SPA_VERSION_DEDUP) { - error = error ? error : zap_update(os, ZVOL_ZAP_OBJ, + if (error == 0) { + error = zap_update(os, ZVOL_ZAP_OBJ, + zfs_prop_to_name(ZFS_PROP_CHECKSUM), 8, 1, + &checksum, tx); + } + if (error == 0) { + error = zap_update(os, ZVOL_ZAP_OBJ, + zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 8, 1, + &refresrv, tx); + } + if (error == 0) { + error = zap_update(os, ZVOL_ZAP_OBJ, + zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), 8, 1, + &vbs, tx); + } + if (error == 0) { + error = dmu_object_set_blocksize( + os, ZVOL_OBJ, SPA_OLD_MAXBLOCKSIZE, 0, tx); + } + if (version >= SPA_VERSION_DEDUP && error == 0) { + error = zap_update(os, ZVOL_ZAP_OBJ, zfs_prop_to_name(ZFS_PROP_DEDUP), 8, 1, &dedup, tx); } @@ -1994,7 +2007,15 @@ zvol_dump_init(zvol_state_t *zv, boolean_t resize) * We only need update the zvol's property if we are initializing * the dump area for the first time. */ - if (!resize) { + if (error == 0 && !resize) { + /* + * If MULTI_VDEV_CRASH_DUMP is active, use the NOPARITY checksum + * function. Otherwise, use the old default -- OFF. + */ + checksum = spa_feature_is_active(spa, + SPA_FEATURE_MULTI_VDEV_CRASH_DUMP) ? ZIO_CHECKSUM_NOPARITY : + ZIO_CHECKSUM_OFF; + VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_uint64(nv, zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 0) == 0); @@ -2013,13 +2034,11 @@ zvol_dump_init(zvol_state_t *zv, boolean_t resize) error = zfs_set_prop_nvlist(zv->zv_name, ZPROP_SRC_LOCAL, nv, NULL); nvlist_free(nv); - - if (error) - return (error); } /* Allocate the space for the dump */ - error = zvol_prealloc(zv); + if (error == 0) + error = zvol_prealloc(zv); return (error); }