1
0
mirror of https://git.FreeBSD.org/src.git synced 2025-01-29 16:44:03 +00:00

Update vendor-sys/illumos/dist to illumos-gate 13971:d1648d817bd6

Illumos ZFS issues:
  3581 spa_zio_taskq[ZIO_TYPE_FREE][ZIO_TASKQ_ISSUE]->tq_lock is piping hot
This commit is contained in:
Martin Matuska 2013-03-01 20:49:56 +00:00
parent 1dd7ebb604
commit b9192d31aa
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/vendor-sys/illumos/dist/; revision=247578
4 changed files with 173 additions and 84 deletions

View File

@ -77,23 +77,25 @@
#include "zfs_comutil.h"
typedef enum zti_modes {
zti_mode_fixed, /* value is # of threads (min 1) */
zti_mode_online_percent, /* value is % of online CPUs */
zti_mode_batch, /* cpu-intensive; value is ignored */
zti_mode_null, /* don't create a taskq */
zti_nmodes
ZTI_MODE_FIXED, /* value is # of threads (min 1) */
ZTI_MODE_ONLINE_PERCENT, /* value is % of online CPUs */
ZTI_MODE_BATCH, /* cpu-intensive; value is ignored */
ZTI_MODE_NULL, /* don't create a taskq */
ZTI_NMODES
} zti_modes_t;
#define ZTI_FIX(n) { zti_mode_fixed, (n) }
#define ZTI_PCT(n) { zti_mode_online_percent, (n) }
#define ZTI_BATCH { zti_mode_batch, 0 }
#define ZTI_NULL { zti_mode_null, 0 }
#define ZTI_P(n, q) { ZTI_MODE_FIXED, (n), (q) }
#define ZTI_PCT(n) { ZTI_MODE_ONLINE_PERCENT, (n), 1 }
#define ZTI_BATCH { ZTI_MODE_BATCH, 0, 1 }
#define ZTI_NULL { ZTI_MODE_NULL, 0, 0 }
#define ZTI_ONE ZTI_FIX(1)
#define ZTI_N(n) ZTI_P(n, 1)
#define ZTI_ONE ZTI_N(1)
typedef struct zio_taskq_info {
enum zti_modes zti_mode;
zti_modes_t zti_mode;
uint_t zti_value;
uint_t zti_count;
} zio_taskq_info_t;
static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = {
@ -101,17 +103,30 @@ static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = {
};
/*
* Define the taskq threads for the following I/O types:
* NULL, READ, WRITE, FREE, CLAIM, and IOCTL
* This table defines the taskq settings for each ZFS I/O type. When
* initializing a pool, we use this table to create an appropriately sized
* taskq. Some operations are low volume and therefore have a small, static
* number of threads assigned to their taskqs using the ZTI_N(#) or ZTI_ONE
* macros. Other operations process a large amount of data; the ZTI_BATCH
* macro causes us to create a taskq oriented for throughput. Some operations
* are so high frequency and short-lived that the taskq itself can become a a
* point of lock contention. The ZTI_P(#, #) macro indicates that we need an
* additional degree of parallelism specified by the number of threads per-
* taskq and the number of taskqs; when dispatching an event in this case, the
* particular taskq is chosen at random.
*
* The different taskq priorities are to handle the different contexts (issue
* and interrupt) and then to reserve threads for ZIO_PRIORITY_NOW I/Os that
* need to be handled with minimum delay.
*/
const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = {
/* ISSUE ISSUE_HIGH INTR INTR_HIGH */
{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL },
{ ZTI_FIX(8), ZTI_NULL, ZTI_BATCH, ZTI_NULL },
{ ZTI_BATCH, ZTI_FIX(5), ZTI_FIX(8), ZTI_FIX(5) },
{ ZTI_FIX(100), ZTI_NULL, ZTI_ONE, ZTI_NULL },
{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL },
{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL },
{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* NULL */
{ ZTI_N(8), ZTI_NULL, ZTI_BATCH, ZTI_NULL }, /* READ */
{ ZTI_BATCH, ZTI_N(5), ZTI_N(8), ZTI_N(5) }, /* WRITE */
{ ZTI_P(12, 8), ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* FREE */
{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* CLAIM */
{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* IOCTL */
};
static dsl_syncfunc_t spa_sync_version;
@ -794,48 +809,120 @@ spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub)
offsetof(spa_error_entry_t, se_avl));
}
static taskq_t *
spa_taskq_create(spa_t *spa, const char *name, enum zti_modes mode,
uint_t value)
static void
spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
{
const zio_taskq_info_t *ztip = &zio_taskqs[t][q];
enum zti_modes mode = ztip->zti_mode;
uint_t value = ztip->zti_value;
uint_t count = ztip->zti_count;
spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q];
char name[32];
uint_t flags = 0;
boolean_t batch = B_FALSE;
switch (mode) {
case zti_mode_null:
return (NULL); /* no taskq needed */
case zti_mode_fixed:
ASSERT3U(value, >=, 1);
value = MAX(value, 1);
break;
case zti_mode_batch:
batch = B_TRUE;
flags |= TASKQ_THREADS_CPU_PCT;
value = zio_taskq_batch_pct;
break;
case zti_mode_online_percent:
flags |= TASKQ_THREADS_CPU_PCT;
break;
default:
panic("unrecognized mode for %s taskq (%u:%u) in "
"spa_activate()",
name, mode, value);
break;
if (mode == ZTI_MODE_NULL) {
tqs->stqs_count = 0;
tqs->stqs_taskq = NULL;
return;
}
if (zio_taskq_sysdc && spa->spa_proc != &p0) {
if (batch)
flags |= TASKQ_DC_BATCH;
ASSERT3U(count, >, 0);
return (taskq_create_sysdc(name, value, 50, INT_MAX,
spa->spa_proc, zio_taskq_basedc, flags));
tqs->stqs_count = count;
tqs->stqs_taskq = kmem_alloc(count * sizeof (taskq_t *), KM_SLEEP);
for (uint_t i = 0; i < count; i++) {
taskq_t *tq;
switch (mode) {
case ZTI_MODE_FIXED:
ASSERT3U(value, >=, 1);
value = MAX(value, 1);
break;
case ZTI_MODE_BATCH:
batch = B_TRUE;
flags |= TASKQ_THREADS_CPU_PCT;
value = zio_taskq_batch_pct;
break;
case ZTI_MODE_ONLINE_PERCENT:
flags |= TASKQ_THREADS_CPU_PCT;
break;
default:
panic("unrecognized mode for %s_%s taskq (%u:%u) in "
"spa_activate()",
zio_type_name[t], zio_taskq_types[q], mode, value);
break;
}
if (count > 1) {
(void) snprintf(name, sizeof (name), "%s_%s_%u",
zio_type_name[t], zio_taskq_types[q], i);
} else {
(void) snprintf(name, sizeof (name), "%s_%s",
zio_type_name[t], zio_taskq_types[q]);
}
if (zio_taskq_sysdc && spa->spa_proc != &p0) {
if (batch)
flags |= TASKQ_DC_BATCH;
tq = taskq_create_sysdc(name, value, 50, INT_MAX,
spa->spa_proc, zio_taskq_basedc, flags);
} else {
tq = taskq_create_proc(name, value, maxclsyspri, 50,
INT_MAX, spa->spa_proc, flags);
}
tqs->stqs_taskq[i] = tq;
}
return (taskq_create_proc(name, value, maxclsyspri, 50, INT_MAX,
spa->spa_proc, flags));
}
static void
spa_taskqs_fini(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
{
spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q];
if (tqs->stqs_taskq == NULL) {
ASSERT0(tqs->stqs_count);
return;
}
for (uint_t i = 0; i < tqs->stqs_count; i++) {
ASSERT3P(tqs->stqs_taskq[i], !=, NULL);
taskq_destroy(tqs->stqs_taskq[i]);
}
kmem_free(tqs->stqs_taskq, tqs->stqs_count * sizeof (taskq_t *));
tqs->stqs_taskq = NULL;
}
/*
* Dispatch a task to the appropriate taskq for the ZFS I/O type and priority.
* Note that a type may have multiple discrete taskqs to avoid lock contention
* on the taskq itself. In that case we choose which taskq at random by using
* the low bits of gethrtime().
*/
void
spa_taskq_dispatch_ent(spa_t *spa, zio_type_t t, zio_taskq_type_t q,
task_func_t *func, void *arg, uint_t flags, taskq_ent_t *ent)
{
spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q];
taskq_t *tq;
ASSERT3P(tqs->stqs_taskq, !=, NULL);
ASSERT3U(tqs->stqs_count, !=, 0);
if (tqs->stqs_count == 1) {
tq = tqs->stqs_taskq[0];
} else {
tq = tqs->stqs_taskq[gethrtime() % tqs->stqs_count];
}
taskq_dispatch_ent(tq, func, arg, flags, ent);
}
static void
@ -843,16 +930,7 @@ spa_create_zio_taskqs(spa_t *spa)
{
for (int t = 0; t < ZIO_TYPES; t++) {
for (int q = 0; q < ZIO_TASKQ_TYPES; q++) {
const zio_taskq_info_t *ztip = &zio_taskqs[t][q];
enum zti_modes mode = ztip->zti_mode;
uint_t value = ztip->zti_value;
char name[32];
(void) snprintf(name, sizeof (name),
"%s_%s", zio_type_name[t], zio_taskq_types[q]);
spa->spa_zio_taskq[t][q] =
spa_taskq_create(spa, name, mode, value);
spa_taskqs_init(spa, t, q);
}
}
}
@ -1009,9 +1087,7 @@ spa_deactivate(spa_t *spa)
for (int t = 0; t < ZIO_TYPES; t++) {
for (int q = 0; q < ZIO_TASKQ_TYPES; q++) {
if (spa->spa_zio_taskq[t][q] != NULL)
taskq_destroy(spa->spa_zio_taskq[t][q]);
spa->spa_zio_taskq[t][q] = NULL;
spa_taskqs_fini(spa, t, q);
}
}

View File

@ -80,16 +80,16 @@ typedef struct spa_config_dirent {
char *scd_path;
} spa_config_dirent_t;
enum zio_taskq_type {
typedef enum zio_taskq_type {
ZIO_TASKQ_ISSUE = 0,
ZIO_TASKQ_ISSUE_HIGH,
ZIO_TASKQ_INTERRUPT,
ZIO_TASKQ_INTERRUPT_HIGH,
ZIO_TASKQ_TYPES
};
} zio_taskq_type_t;
/*
* State machine for the zpool-pooname process. The states transitions
* State machine for the zpool-poolname process. The states transitions
* are done as follows:
*
* From To Routine
@ -107,6 +107,11 @@ typedef enum spa_proc_state {
SPA_PROC_GONE /* spa_thread() is exiting, spa_proc = &p0 */
} spa_proc_state_t;
typedef struct spa_taskqs {
uint_t stqs_count;
taskq_t **stqs_taskq;
} spa_taskqs_t;
struct spa {
/*
* Fields protected by spa_namespace_lock.
@ -125,7 +130,7 @@ struct spa {
uint8_t spa_sync_on; /* sync threads are running */
spa_load_state_t spa_load_state; /* current load operation */
uint64_t spa_import_flags; /* import specific flags */
taskq_t *spa_zio_taskq[ZIO_TYPES][ZIO_TASKQ_TYPES];
spa_taskqs_t spa_zio_taskq[ZIO_TYPES][ZIO_TASKQ_TYPES];
dsl_pool_t *spa_dsl_pool;
boolean_t spa_is_initializing; /* true while opening pool */
metaslab_class_t *spa_normal_class; /* normal data class */
@ -245,6 +250,9 @@ struct spa {
extern const char *spa_config_path;
extern void spa_taskq_dispatch_ent(spa_t *spa, zio_type_t t, zio_taskq_type_t q,
task_func_t *func, void *arg, uint_t flags, taskq_ent_t *ent);
#ifdef __cplusplus
}
#endif

View File

@ -224,7 +224,7 @@ vdev_file_io_start(zio_t *zio)
bp->b_private = vf->vf_vnode;
bp->b_iodone = (int (*)())vdev_file_io_intr;
taskq_dispatch_ent(spa->spa_zio_taskq[ZIO_TYPE_FREE][ZIO_TASKQ_ISSUE],
spa_taskq_dispatch_ent(spa, ZIO_TYPE_FREE, ZIO_TASKQ_ISSUE,
vdev_file_io_strategy, bp, 0, &zio->io_tqent);
return (ZIO_PIPELINE_STOP);

View File

@ -1107,7 +1107,7 @@ zio_free_bp_init(zio_t *zio)
*/
static void
zio_taskq_dispatch(zio_t *zio, enum zio_taskq_type q, boolean_t cutinline)
zio_taskq_dispatch(zio_t *zio, zio_taskq_type_t q, boolean_t cutinline)
{
spa_t *spa = zio->io_spa;
zio_type_t t = zio->io_type;
@ -1128,10 +1128,11 @@ zio_taskq_dispatch(zio_t *zio, enum zio_taskq_type q, boolean_t cutinline)
t = ZIO_TYPE_NULL;
/*
* If this is a high priority I/O, then use the high priority taskq.
* If this is a high priority I/O, then use the high priority taskq if
* available.
*/
if (zio->io_priority == ZIO_PRIORITY_NOW &&
spa->spa_zio_taskq[t][q + 1] != NULL)
spa->spa_zio_taskq[t][q + 1].stqs_count != 0)
q++;
ASSERT3U(q, <, ZIO_TASKQ_TYPES);
@ -1142,19 +1143,24 @@ zio_taskq_dispatch(zio_t *zio, enum zio_taskq_type q, boolean_t cutinline)
* to dispatch the zio to another taskq at the same time.
*/
ASSERT(zio->io_tqent.tqent_next == NULL);
taskq_dispatch_ent(spa->spa_zio_taskq[t][q],
(task_func_t *)zio_execute, zio, flags, &zio->io_tqent);
spa_taskq_dispatch_ent(spa, t, q, (task_func_t *)zio_execute, zio,
flags, &zio->io_tqent);
}
static boolean_t
zio_taskq_member(zio_t *zio, enum zio_taskq_type q)
zio_taskq_member(zio_t *zio, zio_taskq_type_t q)
{
kthread_t *executor = zio->io_executor;
spa_t *spa = zio->io_spa;
for (zio_type_t t = 0; t < ZIO_TYPES; t++)
if (taskq_member(spa->spa_zio_taskq[t][q], executor))
return (B_TRUE);
for (zio_type_t t = 0; t < ZIO_TYPES; t++) {
spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q];
uint_t i;
for (i = 0; i < tqs->stqs_count; i++) {
if (taskq_member(tqs->stqs_taskq[i], executor))
return (B_TRUE);
}
}
return (B_FALSE);
}
@ -3017,10 +3023,9 @@ zio_done(zio_t *zio)
* Hand it off to the otherwise-unused claim taskq.
*/
ASSERT(zio->io_tqent.tqent_next == NULL);
taskq_dispatch_ent(
spa->spa_zio_taskq[ZIO_TYPE_CLAIM][ZIO_TASKQ_ISSUE],
(task_func_t *)zio_reexecute, zio, 0,
&zio->io_tqent);
spa_taskq_dispatch_ent(spa, ZIO_TYPE_CLAIM,
ZIO_TASKQ_ISSUE, (task_func_t *)zio_reexecute, zio,
0, &zio->io_tqent);
}
return (ZIO_PIPELINE_STOP);
}