From 23cfd1c3cf3f2dcd23c937052fcb6ecd01129d51 Mon Sep 17 00:00:00 2001 From: Andriy Gapon Date: Fri, 22 Sep 2017 08:23:24 +0000 Subject: [PATCH] 8648 Fix range locking in ZIL commit codepath illumos/illumos-gate@42b14111721da2ebd5159e7b45012a3eb0e3384c https://github.com/illumos/illumos-gate/commit/42b14111721da2ebd5159e7b45012a3eb0e3384c https://www.illumos.org/issues/8648 I'm opening this bug to track integration of the following ZFS on Linux commit into illumos: commit f763c3d1df569a8d6b60bcb5e95cf07aa7a189e6 Author: LOLi Date: Mon Aug 21 17:59:48 2017 +0200 Fix range locking in ZIL commit codepath Since OpenZFS 7578 (1b7c1e5) if we have a ZVOL with logbias=throughput we will force WR_INDIRECT itxs in zvol_log_write() setting itx->itx_lr offset and length to the offset and length of the BIO from zvol_write()->zvol_log_write(): these offset and length are later used to take a range lock in zillog->zl_get_data function: zvol_get_data(). Now suppose we have a ZVOL with blocksize=8K and push 4K writes to offset 0: we will only be range-locking 0-4096. This means the ASSERTion we make in dbuf_unoverride() is no longer valid because now dmu_sync() is called from zilog's get_data functions holding a partial lock on the dbuf. Fix this by taking a range lock on the whole block in zvol_get_data(). Reviewed-by: Chunwei Chen Reviewed-by: Brian Behlendorf Signed-off-by: loli10K Reviewed by: Igor Kozhukhov Reviewed by: Matt Ahrens Reviewed by: Andriy Gapon Reviewed by: Alexander Motin Approved by: Robert Mustacchi Author: LOLi --- uts/common/fs/zfs/zfs_vnops.c | 2 +- uts/common/fs/zfs/zvol.c | 15 ++++++++++++--- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/uts/common/fs/zfs/zfs_vnops.c b/uts/common/fs/zfs/zfs_vnops.c index 37f543ac3f1c..7b60c76b8b49 100644 --- a/uts/common/fs/zfs/zfs_vnops.c +++ b/uts/common/fs/zfs/zfs_vnops.c @@ -1101,7 +1101,7 @@ zfs_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, zio_t *zio) } else { /* indirect write */ /* * Have to lock the whole block to ensure when it's - * written out and it's checksum is being calculated + * written out and its checksum is being calculated * that no one can change the data. We need to re-check * blocksize after we get the lock in case it's changed! */ diff --git a/uts/common/fs/zfs/zvol.c b/uts/common/fs/zfs/zvol.c index 9a02d465ede0..35b5b69efa04 100644 --- a/uts/common/fs/zfs/zvol.c +++ b/uts/common/fs/zfs/zvol.c @@ -1003,7 +1003,6 @@ zvol_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, zio_t *zio) zgd = kmem_zalloc(sizeof (zgd_t), KM_SLEEP); zgd->zgd_lwb = lwb; - zgd->zgd_rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_READER); /* * Write records come in two flavors: immediate and indirect. @@ -1012,12 +1011,22 @@ zvol_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, zio_t *zio) * sync the data and get a pointer to it (indirect) so that * we don't have to write the data twice. */ - if (buf != NULL) { /* immediate write */ + if (buf != NULL) { /* immediate write */ + zgd->zgd_rl = zfs_range_lock(&zv->zv_znode, offset, size, + RL_READER); error = dmu_read(os, object, offset, size, buf, DMU_READ_NO_PREFETCH); - } else { + } else { /* indirect write */ + /* + * Have to lock the whole block to ensure when it's written out + * and its checksum is being calculated that no one can change + * the data. Contrarily to zfs_get_data we need not re-check + * blocksize after we get the lock because it cannot be changed. + */ size = zv->zv_volblocksize; offset = P2ALIGN(offset, size); + zgd->zgd_rl = zfs_range_lock(&zv->zv_znode, offset, size, + RL_READER); error = dmu_buf_hold(os, object, offset, zgd, &db, DMU_READ_NO_PREFETCH); if (error == 0) {