mirror of
https://git.FreeBSD.org/src.git
synced 2024-12-03 09:00:21 +00:00
MFV r348596: 9689 zfs range lock code should not be zpl-specific
illumos/illumos-gate@7931524763 FreeBSD note: some tweaking was needed to avoid a conflict with sys/rangelock.h. Author: Matthew Ahrens <mahrens@delphix.com> Obtained from: illumos MFC after: 3 weeks
This commit is contained in:
commit
786c532a8f
Notes:
svn2git
2020-12-20 02:59:44 +00:00
svn path=/head/; revision=353634
@ -242,7 +242,9 @@ typedef struct bufwad {
|
||||
} bufwad_t;
|
||||
|
||||
/*
|
||||
* XXX -- fix zfs range locks to be generic so we can use them here.
|
||||
* It would be better to use a rangelock_t per object. Unfortunately
|
||||
* the rangelock_t is not a drop-in replacement for rl_t, because we
|
||||
* still need to map from object ID to rangelock_t.
|
||||
*/
|
||||
typedef enum {
|
||||
RL_READER,
|
||||
@ -1943,12 +1945,12 @@ static void
|
||||
ztest_get_done(zgd_t *zgd, int error)
|
||||
{
|
||||
ztest_ds_t *zd = zgd->zgd_private;
|
||||
uint64_t object = zgd->zgd_rl->rl_object;
|
||||
uint64_t object = ((rl_t *)zgd->zgd_lr)->rl_object;
|
||||
|
||||
if (zgd->zgd_db)
|
||||
dmu_buf_rele(zgd->zgd_db, zgd);
|
||||
|
||||
ztest_range_unlock(zgd->zgd_rl);
|
||||
ztest_range_unlock((rl_t *)zgd->zgd_lr);
|
||||
ztest_object_unlock(zd, object);
|
||||
|
||||
umem_free(zgd, sizeof (*zgd));
|
||||
@ -1998,8 +2000,8 @@ ztest_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb,
|
||||
zgd->zgd_private = zd;
|
||||
|
||||
if (buf != NULL) { /* immediate write */
|
||||
zgd->zgd_rl = ztest_range_lock(zd, object, offset, size,
|
||||
RL_READER);
|
||||
zgd->zgd_lr = (struct locked_range *)ztest_range_lock(zd,
|
||||
object, offset, size, RL_READER);
|
||||
|
||||
error = dmu_read(os, object, offset, size, buf,
|
||||
DMU_READ_NO_PREFETCH);
|
||||
@ -2013,8 +2015,8 @@ ztest_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb,
|
||||
offset = 0;
|
||||
}
|
||||
|
||||
zgd->zgd_rl = ztest_range_lock(zd, object, offset, size,
|
||||
RL_READER);
|
||||
zgd->zgd_lr = (struct locked_range *)ztest_range_lock(zd,
|
||||
object, offset, size, RL_READER);
|
||||
|
||||
error = dmu_buf_hold(os, object, offset, zgd, &db,
|
||||
DMU_READ_NO_PREFETCH);
|
||||
|
@ -75,6 +75,7 @@ struct arc_buf;
|
||||
struct zio_prop;
|
||||
struct sa_handle;
|
||||
struct file;
|
||||
struct locked_range;
|
||||
|
||||
typedef struct objset objset_t;
|
||||
typedef struct dmu_tx dmu_tx_t;
|
||||
@ -966,7 +967,7 @@ typedef struct zgd {
|
||||
struct lwb *zgd_lwb;
|
||||
struct blkptr *zgd_bp;
|
||||
dmu_buf_t *zgd_db;
|
||||
struct rl *zgd_rl;
|
||||
struct locked_range *zgd_lr;
|
||||
void *zgd_private;
|
||||
} zgd_t;
|
||||
|
||||
|
@ -22,6 +22,9 @@
|
||||
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2018 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_FS_ZFS_RLOCK_H
|
||||
#define _SYS_FS_ZFS_RLOCK_H
|
||||
@ -30,54 +33,53 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifdef _KERNEL
|
||||
|
||||
#include <sys/zfs_znode.h>
|
||||
#ifdef __FreeBSD__
|
||||
#define rangelock_init zfs_rangelock_init
|
||||
#define rangelock_fini zfs_rangelock_fini
|
||||
#endif
|
||||
|
||||
typedef enum {
|
||||
RL_READER,
|
||||
RL_WRITER,
|
||||
RL_APPEND
|
||||
} rl_type_t;
|
||||
} rangelock_type_t;
|
||||
|
||||
typedef struct rl {
|
||||
znode_t *r_zp; /* znode this lock applies to */
|
||||
avl_node_t r_node; /* avl node link */
|
||||
uint64_t r_off; /* file range offset */
|
||||
uint64_t r_len; /* file range length */
|
||||
uint_t r_cnt; /* range reference count in tree */
|
||||
rl_type_t r_type; /* range type */
|
||||
kcondvar_t r_wr_cv; /* cv for waiting writers */
|
||||
kcondvar_t r_rd_cv; /* cv for waiting readers */
|
||||
uint8_t r_proxy; /* acting for original range */
|
||||
uint8_t r_write_wanted; /* writer wants to lock this range */
|
||||
uint8_t r_read_wanted; /* reader wants to lock this range */
|
||||
} rl_t;
|
||||
struct locked_range;
|
||||
|
||||
/*
|
||||
* Lock a range (offset, length) as either shared (RL_READER)
|
||||
* or exclusive (RL_WRITER or RL_APPEND). RL_APPEND is a special type that
|
||||
* is converted to RL_WRITER that specified to lock from the start of the
|
||||
* end of file. Returns the range lock structure.
|
||||
*/
|
||||
rl_t *zfs_range_lock(znode_t *zp, uint64_t off, uint64_t len, rl_type_t type);
|
||||
typedef void (rangelock_cb_t)(struct locked_range *, void *);
|
||||
|
||||
/* Unlock range and destroy range lock structure. */
|
||||
void zfs_range_unlock(rl_t *rl);
|
||||
#ifdef __FreeBSD__
|
||||
typedef struct zfs_rangelock {
|
||||
#else
|
||||
typedef struct rangelock {
|
||||
#endif
|
||||
avl_tree_t rl_tree; /* contains locked_range_t */
|
||||
kmutex_t rl_lock;
|
||||
rangelock_cb_t *rl_cb;
|
||||
void *rl_arg;
|
||||
} rangelock_t;
|
||||
|
||||
/*
|
||||
* Reduce range locked as RW_WRITER from whole file to specified range.
|
||||
* Asserts the whole file was previously locked.
|
||||
*/
|
||||
void zfs_range_reduce(rl_t *rl, uint64_t off, uint64_t len);
|
||||
typedef struct locked_range {
|
||||
rangelock_t *lr_rangelock; /* rangelock that this lock applies to */
|
||||
avl_node_t lr_node; /* avl node link */
|
||||
uint64_t lr_offset; /* file range offset */
|
||||
uint64_t lr_length; /* file range length */
|
||||
uint_t lr_count; /* range reference count in tree */
|
||||
rangelock_type_t lr_type; /* range type */
|
||||
kcondvar_t lr_write_cv; /* cv for waiting writers */
|
||||
kcondvar_t lr_read_cv; /* cv for waiting readers */
|
||||
uint8_t lr_proxy; /* acting for original range */
|
||||
uint8_t lr_write_wanted; /* writer wants to lock this range */
|
||||
uint8_t lr_read_wanted; /* reader wants to lock this range */
|
||||
} locked_range_t;
|
||||
|
||||
/*
|
||||
* AVL comparison function used to order range locks
|
||||
* Locks are ordered on the start offset of the range.
|
||||
*/
|
||||
int zfs_range_compare(const void *arg1, const void *arg2);
|
||||
void rangelock_init(rangelock_t *, rangelock_cb_t *, void *);
|
||||
void rangelock_fini(rangelock_t *);
|
||||
|
||||
#endif /* _KERNEL */
|
||||
locked_range_t *rangelock_enter(rangelock_t *,
|
||||
uint64_t, uint64_t, rangelock_type_t);
|
||||
void rangelock_exit(locked_range_t *);
|
||||
void rangelock_reduce(locked_range_t *, uint64_t, uint64_t);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
@ -20,7 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2012, 2018 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2014 Integros [integros.com]
|
||||
* Copyright 2016 Nexenta Systems, Inc. All rights reserved.
|
||||
*/
|
||||
@ -36,6 +36,7 @@
|
||||
#include <sys/rrwlock.h>
|
||||
#include <sys/zfs_sa.h>
|
||||
#include <sys/zfs_stat.h>
|
||||
#include <sys/zfs_rlock.h>
|
||||
#endif
|
||||
#include <sys/zfs_acl.h>
|
||||
#include <sys/zil.h>
|
||||
@ -57,8 +58,8 @@ extern "C" {
|
||||
#define ZFS_APPENDONLY 0x0000004000000000
|
||||
#define ZFS_NODUMP 0x0000008000000000
|
||||
#define ZFS_OPAQUE 0x0000010000000000
|
||||
#define ZFS_AV_QUARANTINED 0x0000020000000000
|
||||
#define ZFS_AV_MODIFIED 0x0000040000000000
|
||||
#define ZFS_AV_QUARANTINED 0x0000020000000000
|
||||
#define ZFS_AV_MODIFIED 0x0000040000000000
|
||||
#define ZFS_REPARSE 0x0000080000000000
|
||||
#define ZFS_OFFLINE 0x0000100000000000
|
||||
#define ZFS_SPARSE 0x0000200000000000
|
||||
@ -78,8 +79,8 @@ extern "C" {
|
||||
*/
|
||||
#define ZFS_XATTR 0x1 /* is an extended attribute */
|
||||
#define ZFS_INHERIT_ACE 0x2 /* ace has inheritable ACEs */
|
||||
#define ZFS_ACL_TRIVIAL 0x4 /* files ACL is trivial */
|
||||
#define ZFS_ACL_OBJ_ACE 0x8 /* ACL has CMPLX Object ACE */
|
||||
#define ZFS_ACL_TRIVIAL 0x4 /* files ACL is trivial */
|
||||
#define ZFS_ACL_OBJ_ACE 0x8 /* ACL has CMPLX Object ACE */
|
||||
#define ZFS_ACL_PROTECTED 0x10 /* ACL protected */
|
||||
#define ZFS_ACL_DEFAULTED 0x20 /* ACL should be defaulted */
|
||||
#define ZFS_ACL_AUTO_INHERIT 0x40 /* ACL should be inherited */
|
||||
@ -177,8 +178,7 @@ typedef struct znode {
|
||||
krwlock_t z_name_lock; /* "master" lock for dirent locks */
|
||||
zfs_dirlock_t *z_dirlocks; /* directory entry lock list */
|
||||
#endif
|
||||
kmutex_t z_range_lock; /* protects changes to z_range_avl */
|
||||
avl_tree_t z_range_avl; /* avl tree of file range locks */
|
||||
rangelock_t z_rangelock; /* file range locks */
|
||||
uint8_t z_unlinked; /* file has been unlinked */
|
||||
uint8_t z_atime_dirty; /* atime needs to be synced */
|
||||
uint8_t z_zn_prefetch; /* Prefetch znodes? */
|
||||
|
@ -23,7 +23,7 @@
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2012, 2018 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
/*
|
||||
@ -34,9 +34,9 @@
|
||||
* Interface
|
||||
* ---------
|
||||
* Defined in zfs_rlock.h but essentially:
|
||||
* rl = zfs_range_lock(zp, off, len, lock_type);
|
||||
* zfs_range_unlock(rl);
|
||||
* zfs_range_reduce(rl, off, len);
|
||||
* lr = rangelock_enter(zp, off, len, lock_type);
|
||||
* rangelock_reduce(lr, off, len); // optional
|
||||
* rangelock_exit(lr);
|
||||
*
|
||||
* AVL tree
|
||||
* --------
|
||||
@ -46,9 +46,10 @@
|
||||
*
|
||||
* Common case
|
||||
* -----------
|
||||
* The (hopefully) usual case is of no overlaps or contention for
|
||||
* locks. On entry to zfs_lock_range() a rl_t is allocated; the tree
|
||||
* searched that finds no overlap, and *this* rl_t is placed in the tree.
|
||||
* The (hopefully) usual case is of no overlaps or contention for locks. On
|
||||
* entry to rangelock_enter(), a locked_range_t is allocated; the tree
|
||||
* searched that finds no overlap, and *this* locked_range_t is placed in the
|
||||
* tree.
|
||||
*
|
||||
* Overlaps/Reference counting/Proxy locks
|
||||
* ---------------------------------------
|
||||
@ -87,67 +88,90 @@
|
||||
*
|
||||
* Grow block handling
|
||||
* -------------------
|
||||
* ZFS supports multiple block sizes currently upto 128K. The smallest
|
||||
* ZFS supports multiple block sizes, up to 16MB. The smallest
|
||||
* block size is used for the file which is grown as needed. During this
|
||||
* growth all other writers and readers must be excluded.
|
||||
* So if the block size needs to be grown then the whole file is
|
||||
* exclusively locked, then later the caller will reduce the lock
|
||||
* range to just the range to be written using zfs_reduce_range.
|
||||
* range to just the range to be written using rangelock_reduce().
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/avl.h>
|
||||
#include <sys/zfs_rlock.h>
|
||||
|
||||
/*
|
||||
* AVL comparison function used to order range locks
|
||||
* Locks are ordered on the start offset of the range.
|
||||
*/
|
||||
static int
|
||||
rangelock_compare(const void *arg1, const void *arg2)
|
||||
{
|
||||
const locked_range_t *rl1 = arg1;
|
||||
const locked_range_t *rl2 = arg2;
|
||||
|
||||
if (rl1->lr_offset > rl2->lr_offset)
|
||||
return (1);
|
||||
if (rl1->lr_offset < rl2->lr_offset)
|
||||
return (-1);
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* The callback is invoked when acquiring a RL_WRITER or RL_APPEND lock.
|
||||
* It must convert RL_APPEND to RL_WRITER (starting at the end of the file),
|
||||
* and may increase the range that's locked for RL_WRITER.
|
||||
*/
|
||||
void
|
||||
rangelock_init(rangelock_t *rl, rangelock_cb_t *cb, void *arg)
|
||||
{
|
||||
mutex_init(&rl->rl_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
avl_create(&rl->rl_tree, rangelock_compare,
|
||||
sizeof (locked_range_t), offsetof(locked_range_t, lr_node));
|
||||
rl->rl_cb = cb;
|
||||
rl->rl_arg = arg;
|
||||
}
|
||||
|
||||
void
|
||||
rangelock_fini(rangelock_t *rl)
|
||||
{
|
||||
mutex_destroy(&rl->rl_lock);
|
||||
avl_destroy(&rl->rl_tree);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if a write lock can be grabbed, or wait and recheck until available.
|
||||
*/
|
||||
static void
|
||||
zfs_range_lock_writer(znode_t *zp, rl_t *new)
|
||||
rangelock_enter_writer(rangelock_t *rl, locked_range_t *new)
|
||||
{
|
||||
avl_tree_t *tree = &zp->z_range_avl;
|
||||
rl_t *rl;
|
||||
avl_tree_t *tree = &rl->rl_tree;
|
||||
locked_range_t *lr;
|
||||
avl_index_t where;
|
||||
uint64_t end_size;
|
||||
uint64_t off = new->r_off;
|
||||
uint64_t len = new->r_len;
|
||||
uint64_t orig_off = new->lr_offset;
|
||||
uint64_t orig_len = new->lr_length;
|
||||
rangelock_type_t orig_type = new->lr_type;
|
||||
|
||||
for (;;) {
|
||||
/*
|
||||
* Range locking is also used by zvol and uses a
|
||||
* dummied up znode. However, for zvol, we don't need to
|
||||
* append or grow blocksize, and besides we don't have
|
||||
* a "sa" data or z_zfsvfs - so skip that processing.
|
||||
*
|
||||
* Yes, this is ugly, and would be solved by not handling
|
||||
* grow or append in range lock code. If that was done then
|
||||
* we could make the range locking code generically available
|
||||
* to other non-zfs consumers.
|
||||
* Call callback which can modify new->r_off,len,type.
|
||||
* Note, the callback is used by the ZPL to handle appending
|
||||
* and changing blocksizes. It isn't needed for zvols.
|
||||
*/
|
||||
if (zp->z_vnode) { /* caller is ZPL */
|
||||
/*
|
||||
* If in append mode pick up the current end of file.
|
||||
* This is done under z_range_lock to avoid races.
|
||||
*/
|
||||
if (new->r_type == RL_APPEND)
|
||||
new->r_off = zp->z_size;
|
||||
|
||||
/*
|
||||
* If we need to grow the block size then grab the whole
|
||||
* file range. This is also done under z_range_lock to
|
||||
* avoid races.
|
||||
*/
|
||||
end_size = MAX(zp->z_size, new->r_off + len);
|
||||
if (end_size > zp->z_blksz && (!ISP2(zp->z_blksz) ||
|
||||
zp->z_blksz < zp->z_zfsvfs->z_max_blksz)) {
|
||||
new->r_off = 0;
|
||||
new->r_len = UINT64_MAX;
|
||||
}
|
||||
if (rl->rl_cb != NULL) {
|
||||
rl->rl_cb(new, rl->rl_arg);
|
||||
}
|
||||
|
||||
/*
|
||||
* If the type was APPEND, the callback must convert it to
|
||||
* WRITER.
|
||||
*/
|
||||
ASSERT3U(new->lr_type, ==, RL_WRITER);
|
||||
|
||||
/*
|
||||
* First check for the usual case of no locks
|
||||
*/
|
||||
if (avl_numnodes(tree) == 0) {
|
||||
new->r_type = RL_WRITER; /* convert to writer */
|
||||
avl_add(tree, new);
|
||||
return;
|
||||
}
|
||||
@ -155,31 +179,33 @@ zfs_range_lock_writer(znode_t *zp, rl_t *new)
|
||||
/*
|
||||
* Look for any locks in the range.
|
||||
*/
|
||||
rl = avl_find(tree, new, &where);
|
||||
if (rl)
|
||||
lr = avl_find(tree, new, &where);
|
||||
if (lr != NULL)
|
||||
goto wait; /* already locked at same offset */
|
||||
|
||||
rl = (rl_t *)avl_nearest(tree, where, AVL_AFTER);
|
||||
if (rl && (rl->r_off < new->r_off + new->r_len))
|
||||
lr = (locked_range_t *)avl_nearest(tree, where, AVL_AFTER);
|
||||
if (lr != NULL &&
|
||||
lr->lr_offset < new->lr_offset + new->lr_length)
|
||||
goto wait;
|
||||
|
||||
rl = (rl_t *)avl_nearest(tree, where, AVL_BEFORE);
|
||||
if (rl && rl->r_off + rl->r_len > new->r_off)
|
||||
lr = (locked_range_t *)avl_nearest(tree, where, AVL_BEFORE);
|
||||
if (lr != NULL &&
|
||||
lr->lr_offset + lr->lr_length > new->lr_offset)
|
||||
goto wait;
|
||||
|
||||
new->r_type = RL_WRITER; /* convert possible RL_APPEND */
|
||||
avl_insert(tree, new, where);
|
||||
return;
|
||||
wait:
|
||||
if (!rl->r_write_wanted) {
|
||||
cv_init(&rl->r_wr_cv, NULL, CV_DEFAULT, NULL);
|
||||
rl->r_write_wanted = B_TRUE;
|
||||
if (!lr->lr_write_wanted) {
|
||||
cv_init(&lr->lr_write_cv, NULL, CV_DEFAULT, NULL);
|
||||
lr->lr_write_wanted = B_TRUE;
|
||||
}
|
||||
cv_wait(&rl->r_wr_cv, &zp->z_range_lock);
|
||||
cv_wait(&lr->lr_write_cv, &rl->rl_lock);
|
||||
|
||||
/* reset to original */
|
||||
new->r_off = off;
|
||||
new->r_len = len;
|
||||
new->lr_offset = orig_off;
|
||||
new->lr_length = orig_len;
|
||||
new->lr_type = orig_type;
|
||||
}
|
||||
}
|
||||
|
||||
@ -187,29 +213,29 @@ zfs_range_lock_writer(znode_t *zp, rl_t *new)
|
||||
* If this is an original (non-proxy) lock then replace it by
|
||||
* a proxy and return the proxy.
|
||||
*/
|
||||
static rl_t *
|
||||
zfs_range_proxify(avl_tree_t *tree, rl_t *rl)
|
||||
static locked_range_t *
|
||||
rangelock_proxify(avl_tree_t *tree, locked_range_t *lr)
|
||||
{
|
||||
rl_t *proxy;
|
||||
locked_range_t *proxy;
|
||||
|
||||
if (rl->r_proxy)
|
||||
return (rl); /* already a proxy */
|
||||
if (lr->lr_proxy)
|
||||
return (lr); /* already a proxy */
|
||||
|
||||
ASSERT3U(rl->r_cnt, ==, 1);
|
||||
ASSERT(rl->r_write_wanted == B_FALSE);
|
||||
ASSERT(rl->r_read_wanted == B_FALSE);
|
||||
avl_remove(tree, rl);
|
||||
rl->r_cnt = 0;
|
||||
ASSERT3U(lr->lr_count, ==, 1);
|
||||
ASSERT(lr->lr_write_wanted == B_FALSE);
|
||||
ASSERT(lr->lr_read_wanted == B_FALSE);
|
||||
avl_remove(tree, lr);
|
||||
lr->lr_count = 0;
|
||||
|
||||
/* create a proxy range lock */
|
||||
proxy = kmem_alloc(sizeof (rl_t), KM_SLEEP);
|
||||
proxy->r_off = rl->r_off;
|
||||
proxy->r_len = rl->r_len;
|
||||
proxy->r_cnt = 1;
|
||||
proxy->r_type = RL_READER;
|
||||
proxy->r_proxy = B_TRUE;
|
||||
proxy->r_write_wanted = B_FALSE;
|
||||
proxy->r_read_wanted = B_FALSE;
|
||||
proxy = kmem_alloc(sizeof (locked_range_t), KM_SLEEP);
|
||||
proxy->lr_offset = lr->lr_offset;
|
||||
proxy->lr_length = lr->lr_length;
|
||||
proxy->lr_count = 1;
|
||||
proxy->lr_type = RL_READER;
|
||||
proxy->lr_proxy = B_TRUE;
|
||||
proxy->lr_write_wanted = B_FALSE;
|
||||
proxy->lr_read_wanted = B_FALSE;
|
||||
avl_add(tree, proxy);
|
||||
|
||||
return (proxy);
|
||||
@ -219,29 +245,27 @@ zfs_range_proxify(avl_tree_t *tree, rl_t *rl)
|
||||
* Split the range lock at the supplied offset
|
||||
* returning the *front* proxy.
|
||||
*/
|
||||
static rl_t *
|
||||
zfs_range_split(avl_tree_t *tree, rl_t *rl, uint64_t off)
|
||||
static locked_range_t *
|
||||
rangelock_split(avl_tree_t *tree, locked_range_t *lr, uint64_t off)
|
||||
{
|
||||
rl_t *front, *rear;
|
||||
|
||||
ASSERT3U(rl->r_len, >, 1);
|
||||
ASSERT3U(off, >, rl->r_off);
|
||||
ASSERT3U(off, <, rl->r_off + rl->r_len);
|
||||
ASSERT(rl->r_write_wanted == B_FALSE);
|
||||
ASSERT(rl->r_read_wanted == B_FALSE);
|
||||
ASSERT3U(lr->lr_length, >, 1);
|
||||
ASSERT3U(off, >, lr->lr_offset);
|
||||
ASSERT3U(off, <, lr->lr_offset + lr->lr_length);
|
||||
ASSERT(lr->lr_write_wanted == B_FALSE);
|
||||
ASSERT(lr->lr_read_wanted == B_FALSE);
|
||||
|
||||
/* create the rear proxy range lock */
|
||||
rear = kmem_alloc(sizeof (rl_t), KM_SLEEP);
|
||||
rear->r_off = off;
|
||||
rear->r_len = rl->r_off + rl->r_len - off;
|
||||
rear->r_cnt = rl->r_cnt;
|
||||
rear->r_type = RL_READER;
|
||||
rear->r_proxy = B_TRUE;
|
||||
rear->r_write_wanted = B_FALSE;
|
||||
rear->r_read_wanted = B_FALSE;
|
||||
locked_range_t *rear = kmem_alloc(sizeof (locked_range_t), KM_SLEEP);
|
||||
rear->lr_offset = off;
|
||||
rear->lr_length = lr->lr_offset + lr->lr_length - off;
|
||||
rear->lr_count = lr->lr_count;
|
||||
rear->lr_type = RL_READER;
|
||||
rear->lr_proxy = B_TRUE;
|
||||
rear->lr_write_wanted = B_FALSE;
|
||||
rear->lr_read_wanted = B_FALSE;
|
||||
|
||||
front = zfs_range_proxify(tree, rl);
|
||||
front->r_len = off - rl->r_off;
|
||||
locked_range_t *front = rangelock_proxify(tree, lr);
|
||||
front->lr_length = off - lr->lr_offset;
|
||||
|
||||
avl_insert_here(tree, rear, front, AVL_AFTER);
|
||||
return (front);
|
||||
@ -251,28 +275,27 @@ zfs_range_split(avl_tree_t *tree, rl_t *rl, uint64_t off)
|
||||
* Create and add a new proxy range lock for the supplied range.
|
||||
*/
|
||||
static void
|
||||
zfs_range_new_proxy(avl_tree_t *tree, uint64_t off, uint64_t len)
|
||||
rangelock_new_proxy(avl_tree_t *tree, uint64_t off, uint64_t len)
|
||||
{
|
||||
rl_t *rl;
|
||||
|
||||
ASSERT(len);
|
||||
rl = kmem_alloc(sizeof (rl_t), KM_SLEEP);
|
||||
rl->r_off = off;
|
||||
rl->r_len = len;
|
||||
rl->r_cnt = 1;
|
||||
rl->r_type = RL_READER;
|
||||
rl->r_proxy = B_TRUE;
|
||||
rl->r_write_wanted = B_FALSE;
|
||||
rl->r_read_wanted = B_FALSE;
|
||||
avl_add(tree, rl);
|
||||
ASSERT(len != 0);
|
||||
locked_range_t *lr = kmem_alloc(sizeof (locked_range_t), KM_SLEEP);
|
||||
lr->lr_offset = off;
|
||||
lr->lr_length = len;
|
||||
lr->lr_count = 1;
|
||||
lr->lr_type = RL_READER;
|
||||
lr->lr_proxy = B_TRUE;
|
||||
lr->lr_write_wanted = B_FALSE;
|
||||
lr->lr_read_wanted = B_FALSE;
|
||||
avl_add(tree, lr);
|
||||
}
|
||||
|
||||
static void
|
||||
zfs_range_add_reader(avl_tree_t *tree, rl_t *new, rl_t *prev, avl_index_t where)
|
||||
rangelock_add_reader(avl_tree_t *tree, locked_range_t *new,
|
||||
locked_range_t *prev, avl_index_t where)
|
||||
{
|
||||
rl_t *next;
|
||||
uint64_t off = new->r_off;
|
||||
uint64_t len = new->r_len;
|
||||
locked_range_t *next;
|
||||
uint64_t off = new->lr_offset;
|
||||
uint64_t len = new->lr_length;
|
||||
|
||||
/*
|
||||
* prev arrives either:
|
||||
@ -281,37 +304,37 @@ zfs_range_add_reader(avl_tree_t *tree, rl_t *new, rl_t *prev, avl_index_t where)
|
||||
* range may overlap with the new range
|
||||
* - null, if there were no ranges starting before the new one
|
||||
*/
|
||||
if (prev) {
|
||||
if (prev->r_off + prev->r_len <= off) {
|
||||
if (prev != NULL) {
|
||||
if (prev->lr_offset + prev->lr_length <= off) {
|
||||
prev = NULL;
|
||||
} else if (prev->r_off != off) {
|
||||
} else if (prev->lr_offset != off) {
|
||||
/*
|
||||
* convert to proxy if needed then
|
||||
* split this entry and bump ref count
|
||||
*/
|
||||
prev = zfs_range_split(tree, prev, off);
|
||||
prev = rangelock_split(tree, prev, off);
|
||||
prev = AVL_NEXT(tree, prev); /* move to rear range */
|
||||
}
|
||||
}
|
||||
ASSERT((prev == NULL) || (prev->r_off == off));
|
||||
ASSERT((prev == NULL) || (prev->lr_offset == off));
|
||||
|
||||
if (prev)
|
||||
if (prev != NULL)
|
||||
next = prev;
|
||||
else
|
||||
next = (rl_t *)avl_nearest(tree, where, AVL_AFTER);
|
||||
next = avl_nearest(tree, where, AVL_AFTER);
|
||||
|
||||
if (next == NULL || off + len <= next->r_off) {
|
||||
if (next == NULL || off + len <= next->lr_offset) {
|
||||
/* no overlaps, use the original new rl_t in the tree */
|
||||
avl_insert(tree, new, where);
|
||||
return;
|
||||
}
|
||||
|
||||
if (off < next->r_off) {
|
||||
if (off < next->lr_offset) {
|
||||
/* Add a proxy for initial range before the overlap */
|
||||
zfs_range_new_proxy(tree, off, next->r_off - off);
|
||||
rangelock_new_proxy(tree, off, next->lr_offset - off);
|
||||
}
|
||||
|
||||
new->r_cnt = 0; /* will use proxies in tree */
|
||||
new->lr_count = 0; /* will use proxies in tree */
|
||||
/*
|
||||
* We now search forward through the ranges, until we go past the end
|
||||
* of the new range. For each entry we make it a proxy if it
|
||||
@ -319,47 +342,51 @@ zfs_range_add_reader(avl_tree_t *tree, rl_t *new, rl_t *prev, avl_index_t where)
|
||||
* gaps between the ranges then we create a new proxy range.
|
||||
*/
|
||||
for (prev = NULL; next; prev = next, next = AVL_NEXT(tree, next)) {
|
||||
if (off + len <= next->r_off)
|
||||
if (off + len <= next->lr_offset)
|
||||
break;
|
||||
if (prev && prev->r_off + prev->r_len < next->r_off) {
|
||||
if (prev != NULL && prev->lr_offset + prev->lr_length <
|
||||
next->lr_offset) {
|
||||
/* there's a gap */
|
||||
ASSERT3U(next->r_off, >, prev->r_off + prev->r_len);
|
||||
zfs_range_new_proxy(tree, prev->r_off + prev->r_len,
|
||||
next->r_off - (prev->r_off + prev->r_len));
|
||||
ASSERT3U(next->lr_offset, >,
|
||||
prev->lr_offset + prev->lr_length);
|
||||
rangelock_new_proxy(tree,
|
||||
prev->lr_offset + prev->lr_length,
|
||||
next->lr_offset -
|
||||
(prev->lr_offset + prev->lr_length));
|
||||
}
|
||||
if (off + len == next->r_off + next->r_len) {
|
||||
if (off + len == next->lr_offset + next->lr_length) {
|
||||
/* exact overlap with end */
|
||||
next = zfs_range_proxify(tree, next);
|
||||
next->r_cnt++;
|
||||
next = rangelock_proxify(tree, next);
|
||||
next->lr_count++;
|
||||
return;
|
||||
}
|
||||
if (off + len < next->r_off + next->r_len) {
|
||||
if (off + len < next->lr_offset + next->lr_length) {
|
||||
/* new range ends in the middle of this block */
|
||||
next = zfs_range_split(tree, next, off + len);
|
||||
next->r_cnt++;
|
||||
next = rangelock_split(tree, next, off + len);
|
||||
next->lr_count++;
|
||||
return;
|
||||
}
|
||||
ASSERT3U(off + len, >, next->r_off + next->r_len);
|
||||
next = zfs_range_proxify(tree, next);
|
||||
next->r_cnt++;
|
||||
ASSERT3U(off + len, >, next->lr_offset + next->lr_length);
|
||||
next = rangelock_proxify(tree, next);
|
||||
next->lr_count++;
|
||||
}
|
||||
|
||||
/* Add the remaining end range. */
|
||||
zfs_range_new_proxy(tree, prev->r_off + prev->r_len,
|
||||
(off + len) - (prev->r_off + prev->r_len));
|
||||
rangelock_new_proxy(tree, prev->lr_offset + prev->lr_length,
|
||||
(off + len) - (prev->lr_offset + prev->lr_length));
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if a reader lock can be grabbed, or wait and recheck until available.
|
||||
*/
|
||||
static void
|
||||
zfs_range_lock_reader(znode_t *zp, rl_t *new)
|
||||
rangelock_enter_reader(rangelock_t *rl, locked_range_t *new)
|
||||
{
|
||||
avl_tree_t *tree = &zp->z_range_avl;
|
||||
rl_t *prev, *next;
|
||||
avl_tree_t *tree = &rl->rl_tree;
|
||||
locked_range_t *prev, *next;
|
||||
avl_index_t where;
|
||||
uint64_t off = new->r_off;
|
||||
uint64_t len = new->r_len;
|
||||
uint64_t off = new->lr_offset;
|
||||
uint64_t len = new->lr_length;
|
||||
|
||||
/*
|
||||
* Look for any writer locks in the range.
|
||||
@ -367,21 +394,22 @@ zfs_range_lock_reader(znode_t *zp, rl_t *new)
|
||||
retry:
|
||||
prev = avl_find(tree, new, &where);
|
||||
if (prev == NULL)
|
||||
prev = (rl_t *)avl_nearest(tree, where, AVL_BEFORE);
|
||||
prev = (locked_range_t *)avl_nearest(tree, where, AVL_BEFORE);
|
||||
|
||||
/*
|
||||
* Check the previous range for a writer lock overlap.
|
||||
*/
|
||||
if (prev && (off < prev->r_off + prev->r_len)) {
|
||||
if ((prev->r_type == RL_WRITER) || (prev->r_write_wanted)) {
|
||||
if (!prev->r_read_wanted) {
|
||||
cv_init(&prev->r_rd_cv, NULL, CV_DEFAULT, NULL);
|
||||
prev->r_read_wanted = B_TRUE;
|
||||
if (prev && (off < prev->lr_offset + prev->lr_length)) {
|
||||
if ((prev->lr_type == RL_WRITER) || (prev->lr_write_wanted)) {
|
||||
if (!prev->lr_read_wanted) {
|
||||
cv_init(&prev->lr_read_cv,
|
||||
NULL, CV_DEFAULT, NULL);
|
||||
prev->lr_read_wanted = B_TRUE;
|
||||
}
|
||||
cv_wait(&prev->r_rd_cv, &zp->z_range_lock);
|
||||
cv_wait(&prev->lr_read_cv, &rl->rl_lock);
|
||||
goto retry;
|
||||
}
|
||||
if (off + len < prev->r_off + prev->r_len)
|
||||
if (off + len < prev->lr_offset + prev->lr_length)
|
||||
goto got_lock;
|
||||
}
|
||||
|
||||
@ -389,70 +417,71 @@ zfs_range_lock_reader(znode_t *zp, rl_t *new)
|
||||
* Search through the following ranges to see if there's
|
||||
* write lock any overlap.
|
||||
*/
|
||||
if (prev)
|
||||
if (prev != NULL)
|
||||
next = AVL_NEXT(tree, prev);
|
||||
else
|
||||
next = (rl_t *)avl_nearest(tree, where, AVL_AFTER);
|
||||
for (; next; next = AVL_NEXT(tree, next)) {
|
||||
if (off + len <= next->r_off)
|
||||
next = (locked_range_t *)avl_nearest(tree, where, AVL_AFTER);
|
||||
for (; next != NULL; next = AVL_NEXT(tree, next)) {
|
||||
if (off + len <= next->lr_offset)
|
||||
goto got_lock;
|
||||
if ((next->r_type == RL_WRITER) || (next->r_write_wanted)) {
|
||||
if (!next->r_read_wanted) {
|
||||
cv_init(&next->r_rd_cv, NULL, CV_DEFAULT, NULL);
|
||||
next->r_read_wanted = B_TRUE;
|
||||
if ((next->lr_type == RL_WRITER) || (next->lr_write_wanted)) {
|
||||
if (!next->lr_read_wanted) {
|
||||
cv_init(&next->lr_read_cv,
|
||||
NULL, CV_DEFAULT, NULL);
|
||||
next->lr_read_wanted = B_TRUE;
|
||||
}
|
||||
cv_wait(&next->r_rd_cv, &zp->z_range_lock);
|
||||
cv_wait(&next->lr_read_cv, &rl->rl_lock);
|
||||
goto retry;
|
||||
}
|
||||
if (off + len <= next->r_off + next->r_len)
|
||||
if (off + len <= next->lr_offset + next->lr_length)
|
||||
goto got_lock;
|
||||
}
|
||||
|
||||
got_lock:
|
||||
/*
|
||||
* Add the read lock, which may involve splitting existing
|
||||
* locks and bumping ref counts (r_cnt).
|
||||
* locks and bumping ref counts (r_count).
|
||||
*/
|
||||
zfs_range_add_reader(tree, new, prev, where);
|
||||
rangelock_add_reader(tree, new, prev, where);
|
||||
}
|
||||
|
||||
/*
|
||||
* Lock a range (offset, length) as either shared (RL_READER)
|
||||
* or exclusive (RL_WRITER). Returns the range lock structure
|
||||
* for later unlocking or reduce range (if entire file
|
||||
* previously locked as RL_WRITER).
|
||||
* Lock a range (offset, length) as either shared (RL_READER) or exclusive
|
||||
* (RL_WRITER or RL_APPEND). If RL_APPEND is specified, rl_cb() will convert
|
||||
* it to a RL_WRITER lock (with the offset at the end of the file). Returns
|
||||
* the range lock structure for later unlocking (or reduce range if the
|
||||
* entire file is locked as RL_WRITER).
|
||||
*/
|
||||
rl_t *
|
||||
zfs_range_lock(znode_t *zp, uint64_t off, uint64_t len, rl_type_t type)
|
||||
locked_range_t *
|
||||
rangelock_enter(rangelock_t *rl, uint64_t off, uint64_t len,
|
||||
rangelock_type_t type)
|
||||
{
|
||||
rl_t *new;
|
||||
|
||||
ASSERT(type == RL_READER || type == RL_WRITER || type == RL_APPEND);
|
||||
|
||||
new = kmem_alloc(sizeof (rl_t), KM_SLEEP);
|
||||
new->r_zp = zp;
|
||||
new->r_off = off;
|
||||
locked_range_t *new = kmem_alloc(sizeof (locked_range_t), KM_SLEEP);
|
||||
new->lr_rangelock = rl;
|
||||
new->lr_offset = off;
|
||||
if (len + off < off) /* overflow */
|
||||
len = UINT64_MAX - off;
|
||||
new->r_len = len;
|
||||
new->r_cnt = 1; /* assume it's going to be in the tree */
|
||||
new->r_type = type;
|
||||
new->r_proxy = B_FALSE;
|
||||
new->r_write_wanted = B_FALSE;
|
||||
new->r_read_wanted = B_FALSE;
|
||||
new->lr_length = len;
|
||||
new->lr_count = 1; /* assume it's going to be in the tree */
|
||||
new->lr_type = type;
|
||||
new->lr_proxy = B_FALSE;
|
||||
new->lr_write_wanted = B_FALSE;
|
||||
new->lr_read_wanted = B_FALSE;
|
||||
|
||||
mutex_enter(&zp->z_range_lock);
|
||||
mutex_enter(&rl->rl_lock);
|
||||
if (type == RL_READER) {
|
||||
/*
|
||||
* First check for the usual case of no locks
|
||||
*/
|
||||
if (avl_numnodes(&zp->z_range_avl) == 0)
|
||||
avl_add(&zp->z_range_avl, new);
|
||||
if (avl_numnodes(&rl->rl_tree) == 0)
|
||||
avl_add(&rl->rl_tree, new);
|
||||
else
|
||||
zfs_range_lock_reader(zp, new);
|
||||
rangelock_enter_reader(rl, new);
|
||||
} else
|
||||
zfs_range_lock_writer(zp, new); /* RL_WRITER or RL_APPEND */
|
||||
mutex_exit(&zp->z_range_lock);
|
||||
rangelock_enter_writer(rl, new); /* RL_WRITER or RL_APPEND */
|
||||
mutex_exit(&rl->rl_lock);
|
||||
return (new);
|
||||
}
|
||||
|
||||
@ -460,10 +489,9 @@ zfs_range_lock(znode_t *zp, uint64_t off, uint64_t len, rl_type_t type)
|
||||
* Unlock a reader lock
|
||||
*/
|
||||
static void
|
||||
zfs_range_unlock_reader(znode_t *zp, rl_t *remove)
|
||||
rangelock_exit_reader(rangelock_t *rl, locked_range_t *remove)
|
||||
{
|
||||
avl_tree_t *tree = &zp->z_range_avl;
|
||||
rl_t *rl, *next = NULL;
|
||||
avl_tree_t *tree = &rl->rl_tree;
|
||||
uint64_t len;
|
||||
|
||||
/*
|
||||
@ -473,129 +501,118 @@ zfs_range_unlock_reader(znode_t *zp, rl_t *remove)
|
||||
* removed from the tree and replaced by proxies (one or
|
||||
* more ranges mapping to the entire range).
|
||||
*/
|
||||
if (remove->r_cnt == 1) {
|
||||
if (remove->lr_count == 1) {
|
||||
avl_remove(tree, remove);
|
||||
if (remove->r_write_wanted) {
|
||||
cv_broadcast(&remove->r_wr_cv);
|
||||
cv_destroy(&remove->r_wr_cv);
|
||||
if (remove->lr_write_wanted) {
|
||||
cv_broadcast(&remove->lr_write_cv);
|
||||
cv_destroy(&remove->lr_write_cv);
|
||||
}
|
||||
if (remove->r_read_wanted) {
|
||||
cv_broadcast(&remove->r_rd_cv);
|
||||
cv_destroy(&remove->r_rd_cv);
|
||||
if (remove->lr_read_wanted) {
|
||||
cv_broadcast(&remove->lr_read_cv);
|
||||
cv_destroy(&remove->lr_read_cv);
|
||||
}
|
||||
} else {
|
||||
ASSERT0(remove->r_cnt);
|
||||
ASSERT0(remove->r_write_wanted);
|
||||
ASSERT0(remove->r_read_wanted);
|
||||
ASSERT0(remove->lr_count);
|
||||
ASSERT0(remove->lr_write_wanted);
|
||||
ASSERT0(remove->lr_read_wanted);
|
||||
/*
|
||||
* Find start proxy representing this reader lock,
|
||||
* then decrement ref count on all proxies
|
||||
* that make up this range, freeing them as needed.
|
||||
*/
|
||||
rl = avl_find(tree, remove, NULL);
|
||||
ASSERT(rl);
|
||||
ASSERT(rl->r_cnt);
|
||||
ASSERT(rl->r_type == RL_READER);
|
||||
for (len = remove->r_len; len != 0; rl = next) {
|
||||
len -= rl->r_len;
|
||||
if (len) {
|
||||
next = AVL_NEXT(tree, rl);
|
||||
ASSERT(next);
|
||||
ASSERT(rl->r_off + rl->r_len == next->r_off);
|
||||
ASSERT(next->r_cnt);
|
||||
ASSERT(next->r_type == RL_READER);
|
||||
locked_range_t *lr = avl_find(tree, remove, NULL);
|
||||
ASSERT3P(lr, !=, NULL);
|
||||
ASSERT3U(lr->lr_count, !=, 0);
|
||||
ASSERT3U(lr->lr_type, ==, RL_READER);
|
||||
locked_range_t *next = NULL;
|
||||
for (len = remove->lr_length; len != 0; lr = next) {
|
||||
len -= lr->lr_length;
|
||||
if (len != 0) {
|
||||
next = AVL_NEXT(tree, lr);
|
||||
ASSERT3P(next, !=, NULL);
|
||||
ASSERT3U(lr->lr_offset + lr->lr_length, ==,
|
||||
next->lr_offset);
|
||||
ASSERT3U(next->lr_count, !=, 0);
|
||||
ASSERT3U(next->lr_type, ==, RL_READER);
|
||||
}
|
||||
rl->r_cnt--;
|
||||
if (rl->r_cnt == 0) {
|
||||
avl_remove(tree, rl);
|
||||
if (rl->r_write_wanted) {
|
||||
cv_broadcast(&rl->r_wr_cv);
|
||||
cv_destroy(&rl->r_wr_cv);
|
||||
lr->lr_count--;
|
||||
if (lr->lr_count == 0) {
|
||||
avl_remove(tree, lr);
|
||||
if (lr->lr_write_wanted) {
|
||||
cv_broadcast(&lr->lr_write_cv);
|
||||
cv_destroy(&lr->lr_write_cv);
|
||||
}
|
||||
if (rl->r_read_wanted) {
|
||||
cv_broadcast(&rl->r_rd_cv);
|
||||
cv_destroy(&rl->r_rd_cv);
|
||||
if (lr->lr_read_wanted) {
|
||||
cv_broadcast(&lr->lr_read_cv);
|
||||
cv_destroy(&lr->lr_read_cv);
|
||||
}
|
||||
kmem_free(rl, sizeof (rl_t));
|
||||
kmem_free(lr, sizeof (locked_range_t));
|
||||
}
|
||||
}
|
||||
}
|
||||
kmem_free(remove, sizeof (rl_t));
|
||||
kmem_free(remove, sizeof (locked_range_t));
|
||||
}
|
||||
|
||||
/*
|
||||
* Unlock range and destroy range lock structure.
|
||||
*/
|
||||
void
|
||||
zfs_range_unlock(rl_t *rl)
|
||||
rangelock_exit(locked_range_t *lr)
|
||||
{
|
||||
znode_t *zp = rl->r_zp;
|
||||
rangelock_t *rl = lr->lr_rangelock;
|
||||
|
||||
ASSERT(rl->r_type == RL_WRITER || rl->r_type == RL_READER);
|
||||
ASSERT(rl->r_cnt == 1 || rl->r_cnt == 0);
|
||||
ASSERT(!rl->r_proxy);
|
||||
ASSERT(lr->lr_type == RL_WRITER || lr->lr_type == RL_READER);
|
||||
ASSERT(lr->lr_count == 1 || lr->lr_count == 0);
|
||||
ASSERT(!lr->lr_proxy);
|
||||
|
||||
mutex_enter(&zp->z_range_lock);
|
||||
if (rl->r_type == RL_WRITER) {
|
||||
mutex_enter(&rl->rl_lock);
|
||||
if (lr->lr_type == RL_WRITER) {
|
||||
/* writer locks can't be shared or split */
|
||||
avl_remove(&zp->z_range_avl, rl);
|
||||
mutex_exit(&zp->z_range_lock);
|
||||
if (rl->r_write_wanted) {
|
||||
cv_broadcast(&rl->r_wr_cv);
|
||||
cv_destroy(&rl->r_wr_cv);
|
||||
avl_remove(&rl->rl_tree, lr);
|
||||
mutex_exit(&rl->rl_lock);
|
||||
if (lr->lr_write_wanted) {
|
||||
cv_broadcast(&lr->lr_write_cv);
|
||||
cv_destroy(&lr->lr_write_cv);
|
||||
}
|
||||
if (rl->r_read_wanted) {
|
||||
cv_broadcast(&rl->r_rd_cv);
|
||||
cv_destroy(&rl->r_rd_cv);
|
||||
if (lr->lr_read_wanted) {
|
||||
cv_broadcast(&lr->lr_read_cv);
|
||||
cv_destroy(&lr->lr_read_cv);
|
||||
}
|
||||
kmem_free(rl, sizeof (rl_t));
|
||||
kmem_free(lr, sizeof (locked_range_t));
|
||||
} else {
|
||||
/*
|
||||
* lock may be shared, let zfs_range_unlock_reader()
|
||||
* lock may be shared, let rangelock_exit_reader()
|
||||
* release the lock and free the rl_t
|
||||
*/
|
||||
zfs_range_unlock_reader(zp, rl);
|
||||
mutex_exit(&zp->z_range_lock);
|
||||
rangelock_exit_reader(rl, lr);
|
||||
mutex_exit(&rl->rl_lock);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Reduce range locked as RL_WRITER from whole file to specified range.
|
||||
* Asserts the whole file is exclusivly locked and so there's only one
|
||||
* Asserts the whole file is exclusively locked and so there's only one
|
||||
* entry in the tree.
|
||||
*/
|
||||
void
|
||||
zfs_range_reduce(rl_t *rl, uint64_t off, uint64_t len)
|
||||
rangelock_reduce(locked_range_t *lr, uint64_t off, uint64_t len)
|
||||
{
|
||||
znode_t *zp = rl->r_zp;
|
||||
rangelock_t *rl = lr->lr_rangelock;
|
||||
|
||||
/* Ensure there are no other locks */
|
||||
ASSERT(avl_numnodes(&zp->z_range_avl) == 1);
|
||||
ASSERT(rl->r_off == 0);
|
||||
ASSERT(rl->r_type == RL_WRITER);
|
||||
ASSERT(!rl->r_proxy);
|
||||
ASSERT3U(rl->r_len, ==, UINT64_MAX);
|
||||
ASSERT3U(rl->r_cnt, ==, 1);
|
||||
ASSERT3U(avl_numnodes(&rl->rl_tree), ==, 1);
|
||||
ASSERT3U(lr->lr_offset, ==, 0);
|
||||
ASSERT3U(lr->lr_type, ==, RL_WRITER);
|
||||
ASSERT(!lr->lr_proxy);
|
||||
ASSERT3U(lr->lr_length, ==, UINT64_MAX);
|
||||
ASSERT3U(lr->lr_count, ==, 1);
|
||||
|
||||
mutex_enter(&zp->z_range_lock);
|
||||
rl->r_off = off;
|
||||
rl->r_len = len;
|
||||
mutex_exit(&zp->z_range_lock);
|
||||
if (rl->r_write_wanted)
|
||||
cv_broadcast(&rl->r_wr_cv);
|
||||
if (rl->r_read_wanted)
|
||||
cv_broadcast(&rl->r_rd_cv);
|
||||
}
|
||||
|
||||
/*
|
||||
* AVL comparison function used to order range locks
|
||||
* Locks are ordered on the start offset of the range.
|
||||
*/
|
||||
int
|
||||
zfs_range_compare(const void *arg1, const void *arg2)
|
||||
{
|
||||
const rl_t *rl1 = (const rl_t *)arg1;
|
||||
const rl_t *rl2 = (const rl_t *)arg2;
|
||||
|
||||
return (AVL_CMP(rl1->r_off, rl2->r_off));
|
||||
mutex_enter(&rl->rl_lock);
|
||||
lr->lr_offset = off;
|
||||
lr->lr_length = len;
|
||||
mutex_exit(&rl->rl_lock);
|
||||
if (lr->lr_write_wanted)
|
||||
cv_broadcast(&lr->lr_write_cv);
|
||||
if (lr->lr_read_wanted)
|
||||
cv_broadcast(&lr->lr_read_cv);
|
||||
}
|
||||
|
@ -655,7 +655,6 @@ zfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
|
||||
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
|
||||
ssize_t n, nbytes;
|
||||
int error = 0;
|
||||
rl_t *rl;
|
||||
xuio_t *xuio = NULL;
|
||||
|
||||
ZFS_ENTER(zfsvfs);
|
||||
@ -703,7 +702,8 @@ zfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
|
||||
/*
|
||||
* Lock the range against changes.
|
||||
*/
|
||||
rl = zfs_range_lock(zp, uio->uio_loffset, uio->uio_resid, RL_READER);
|
||||
locked_range_t *lr = rangelock_enter(&zp->z_rangelock,
|
||||
uio->uio_loffset, uio->uio_resid, RL_READER);
|
||||
|
||||
/*
|
||||
* If we are reading past end-of-file we can skip
|
||||
@ -773,7 +773,7 @@ zfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
|
||||
n -= nbytes;
|
||||
}
|
||||
out:
|
||||
zfs_range_unlock(rl);
|
||||
rangelock_exit(lr);
|
||||
|
||||
ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
|
||||
ZFS_EXIT(zfsvfs);
|
||||
@ -813,7 +813,6 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
|
||||
zilog_t *zilog;
|
||||
offset_t woff;
|
||||
ssize_t n, nbytes;
|
||||
rl_t *rl;
|
||||
int max_blksz = zfsvfs->z_max_blksz;
|
||||
int error = 0;
|
||||
arc_buf_t *abuf;
|
||||
@ -881,7 +880,7 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
|
||||
}
|
||||
|
||||
/*
|
||||
* Check for mandatory locks before calling zfs_range_lock()
|
||||
* Check for mandatory locks before calling rangelock_enter()
|
||||
* in order to prevent a deadlock with locks set via fcntl().
|
||||
*/
|
||||
if (MANDMODE((mode_t)zp->z_mode) &&
|
||||
@ -906,14 +905,15 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
|
||||
/*
|
||||
* If in append mode, set the io offset pointer to eof.
|
||||
*/
|
||||
locked_range_t *lr;
|
||||
if (ioflag & FAPPEND) {
|
||||
/*
|
||||
* Obtain an appending range lock to guarantee file append
|
||||
* semantics. We reset the write offset once we have the lock.
|
||||
*/
|
||||
rl = zfs_range_lock(zp, 0, n, RL_APPEND);
|
||||
woff = rl->r_off;
|
||||
if (rl->r_len == UINT64_MAX) {
|
||||
lr = rangelock_enter(&zp->z_rangelock, 0, n, RL_APPEND);
|
||||
woff = lr->lr_offset;
|
||||
if (lr->lr_length == UINT64_MAX) {
|
||||
/*
|
||||
* We overlocked the file because this write will cause
|
||||
* the file block size to increase.
|
||||
@ -928,17 +928,17 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
|
||||
* this write, then this range lock will lock the entire file
|
||||
* so that we can re-write the block safely.
|
||||
*/
|
||||
rl = zfs_range_lock(zp, woff, n, RL_WRITER);
|
||||
lr = rangelock_enter(&zp->z_rangelock, woff, n, RL_WRITER);
|
||||
}
|
||||
|
||||
if (vn_rlimit_fsize(vp, uio, uio->uio_td)) {
|
||||
zfs_range_unlock(rl);
|
||||
rangelock_exit(lr);
|
||||
ZFS_EXIT(zfsvfs);
|
||||
return (EFBIG);
|
||||
}
|
||||
|
||||
if (woff >= limit) {
|
||||
zfs_range_unlock(rl);
|
||||
rangelock_exit(lr);
|
||||
ZFS_EXIT(zfsvfs);
|
||||
return (SET_ERROR(EFBIG));
|
||||
}
|
||||
@ -1019,12 +1019,12 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
|
||||
}
|
||||
|
||||
/*
|
||||
* If zfs_range_lock() over-locked we grow the blocksize
|
||||
* If rangelock_enter() over-locked we grow the blocksize
|
||||
* and then reduce the lock range. This will only happen
|
||||
* on the first iteration since zfs_range_reduce() will
|
||||
* shrink down r_len to the appropriate size.
|
||||
* on the first iteration since rangelock_reduce() will
|
||||
* shrink down lr_length to the appropriate size.
|
||||
*/
|
||||
if (rl->r_len == UINT64_MAX) {
|
||||
if (lr->lr_length == UINT64_MAX) {
|
||||
uint64_t new_blksz;
|
||||
|
||||
if (zp->z_blksz > max_blksz) {
|
||||
@ -1040,7 +1040,7 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
|
||||
new_blksz = MIN(end_size, max_blksz);
|
||||
}
|
||||
zfs_grow_blocksize(zp, new_blksz, tx);
|
||||
zfs_range_reduce(rl, woff, n);
|
||||
rangelock_reduce(lr, woff, n);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1166,7 +1166,7 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
|
||||
#endif
|
||||
}
|
||||
|
||||
zfs_range_unlock(rl);
|
||||
rangelock_exit(lr);
|
||||
|
||||
/*
|
||||
* If we're in replay mode, or we made no progress, return error.
|
||||
@ -1206,7 +1206,7 @@ zfs_get_done(zgd_t *zgd, int error)
|
||||
if (zgd->zgd_db)
|
||||
dmu_buf_rele(zgd->zgd_db, zgd);
|
||||
|
||||
zfs_range_unlock(zgd->zgd_rl);
|
||||
rangelock_exit(zgd->zgd_lr);
|
||||
|
||||
/*
|
||||
* Release the vnode asynchronously as we currently have the
|
||||
@ -1268,7 +1268,8 @@ zfs_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, zio_t *zio)
|
||||
* we don't have to write the data twice.
|
||||
*/
|
||||
if (buf != NULL) { /* immediate write */
|
||||
zgd->zgd_rl = zfs_range_lock(zp, offset, size, RL_READER);
|
||||
zgd->zgd_lr = rangelock_enter(&zp->z_rangelock,
|
||||
offset, size, RL_READER);
|
||||
/* test for truncation needs to be done while range locked */
|
||||
if (offset >= zp->z_size) {
|
||||
error = SET_ERROR(ENOENT);
|
||||
@ -1289,12 +1290,12 @@ zfs_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, zio_t *zio)
|
||||
size = zp->z_blksz;
|
||||
blkoff = ISP2(size) ? P2PHASE(offset, size) : offset;
|
||||
offset -= blkoff;
|
||||
zgd->zgd_rl = zfs_range_lock(zp, offset, size,
|
||||
RL_READER);
|
||||
zgd->zgd_lr = rangelock_enter(&zp->z_rangelock,
|
||||
offset, size, RL_READER);
|
||||
if (zp->z_blksz == size)
|
||||
break;
|
||||
offset += blkoff;
|
||||
zfs_range_unlock(zgd->zgd_rl);
|
||||
rangelock_exit(zgd->zgd_lr);
|
||||
}
|
||||
/* test for truncation needs to be done while range locked */
|
||||
if (lr->lr_offset >= zp->z_size)
|
||||
@ -4484,7 +4485,7 @@ zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind,
|
||||
znode_t *zp = VTOZ(vp);
|
||||
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
|
||||
objset_t *os = zp->z_zfsvfs->z_os;
|
||||
rl_t *rl;
|
||||
locked_range_t *lr;
|
||||
vm_object_t object;
|
||||
off_t start, end, obj_size;
|
||||
uint_t blksz;
|
||||
@ -4503,11 +4504,11 @@ zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind,
|
||||
*/
|
||||
for (;;) {
|
||||
blksz = zp->z_blksz;
|
||||
rl = zfs_range_lock(zp, rounddown(start, blksz),
|
||||
lr = rangelock_enter(&zp->z_rangelock, rounddown(start, blksz),
|
||||
roundup(end, blksz) - rounddown(start, blksz), RL_READER);
|
||||
if (blksz == zp->z_blksz)
|
||||
break;
|
||||
zfs_range_unlock(rl);
|
||||
rangelock_exit(lr);
|
||||
}
|
||||
|
||||
object = ma[0]->object;
|
||||
@ -4515,7 +4516,7 @@ zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind,
|
||||
obj_size = object->un_pager.vnp.vnp_size;
|
||||
zfs_vmobject_wunlock(object);
|
||||
if (IDX_TO_OFF(ma[count - 1]->pindex) >= obj_size) {
|
||||
zfs_range_unlock(rl);
|
||||
rangelock_exit(lr);
|
||||
ZFS_EXIT(zfsvfs);
|
||||
return (zfs_vm_pagerret_bad);
|
||||
}
|
||||
@ -4543,7 +4544,7 @@ zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind,
|
||||
error = dmu_read_pages(os, zp->z_id, ma, count, &pgsin_b, &pgsin_a,
|
||||
MIN(end, obj_size) - (end - PAGE_SIZE));
|
||||
|
||||
zfs_range_unlock(rl);
|
||||
rangelock_exit(lr);
|
||||
ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
|
||||
ZFS_EXIT(zfsvfs);
|
||||
|
||||
@ -4580,7 +4581,7 @@ zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags,
|
||||
{
|
||||
znode_t *zp = VTOZ(vp);
|
||||
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
|
||||
rl_t *rl;
|
||||
locked_range_t *lr;
|
||||
dmu_tx_t *tx;
|
||||
struct sf_buf *sf;
|
||||
vm_object_t object;
|
||||
@ -4613,7 +4614,7 @@ zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags,
|
||||
blksz = zp->z_blksz;
|
||||
lo_off = rounddown(off, blksz);
|
||||
lo_len = roundup(len + (off - lo_off), blksz);
|
||||
rl = zfs_range_lock(zp, lo_off, lo_len, RL_WRITER);
|
||||
lr = rangelock_enter(&zp->z_rangelock, lo_off, lo_len, RL_WRITER);
|
||||
|
||||
zfs_vmobject_wlock(object);
|
||||
if (len + off > object->un_pager.vnp.vnp_size) {
|
||||
@ -4707,7 +4708,7 @@ zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags,
|
||||
dmu_tx_commit(tx);
|
||||
|
||||
out:
|
||||
zfs_range_unlock(rl);
|
||||
rangelock_exit(lr);
|
||||
if ((flags & (zfs_vm_pagerput_sync | zfs_vm_pagerput_inval)) != 0 ||
|
||||
zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
|
||||
zil_commit(zfsvfs->z_log, zp->z_id);
|
||||
|
@ -20,7 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2012, 2018 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2014 Integros [integros.com]
|
||||
*/
|
||||
|
||||
@ -116,6 +116,38 @@ extern struct vop_vector zfs_vnodeops;
|
||||
extern struct vop_vector zfs_fifoops;
|
||||
extern struct vop_vector zfs_shareops;
|
||||
|
||||
/*
|
||||
* This callback is invoked when acquiring a RL_WRITER or RL_APPEND lock on
|
||||
* z_rangelock. It will modify the offset and length of the lock to reflect
|
||||
* znode-specific information, and convert RL_APPEND to RL_WRITER. This is
|
||||
* called with the rangelock_t's rl_lock held, which avoids races.
|
||||
*/
|
||||
static void
|
||||
zfs_rangelock_cb(locked_range_t *new, void *arg)
|
||||
{
|
||||
znode_t *zp = arg;
|
||||
|
||||
/*
|
||||
* If in append mode, convert to writer and lock starting at the
|
||||
* current end of file.
|
||||
*/
|
||||
if (new->lr_type == RL_APPEND) {
|
||||
new->lr_offset = zp->z_size;
|
||||
new->lr_type = RL_WRITER;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we need to grow the block size then lock the whole file range.
|
||||
*/
|
||||
uint64_t end_size = MAX(zp->z_size, new->lr_offset + new->lr_length);
|
||||
if (end_size > zp->z_blksz && (!ISP2(zp->z_blksz) ||
|
||||
zp->z_blksz < zp->z_zfsvfs->z_max_blksz)) {
|
||||
new->lr_offset = 0;
|
||||
new->lr_length = UINT64_MAX;
|
||||
}
|
||||
}
|
||||
|
||||
/*ARGSUSED*/
|
||||
static int
|
||||
zfs_znode_cache_constructor(void *buf, void *arg, int kmflags)
|
||||
{
|
||||
@ -127,9 +159,7 @@ zfs_znode_cache_constructor(void *buf, void *arg, int kmflags)
|
||||
|
||||
mutex_init(&zp->z_acl_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
|
||||
mutex_init(&zp->z_range_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
avl_create(&zp->z_range_avl, zfs_range_compare,
|
||||
sizeof (rl_t), offsetof(rl_t, r_node));
|
||||
rangelock_init(&zp->z_rangelock, zfs_rangelock_cb, zp);
|
||||
|
||||
zp->z_acl_cached = NULL;
|
||||
zp->z_vnode = NULL;
|
||||
@ -147,8 +177,7 @@ zfs_znode_cache_destructor(void *buf, void *arg)
|
||||
ASSERT3P(zp->z_vnode, ==, NULL);
|
||||
ASSERT(!list_link_active(&zp->z_link_node));
|
||||
mutex_destroy(&zp->z_acl_lock);
|
||||
avl_destroy(&zp->z_range_avl);
|
||||
mutex_destroy(&zp->z_range_lock);
|
||||
rangelock_fini(&zp->z_rangelock);
|
||||
|
||||
ASSERT(zp->z_acl_cached == NULL);
|
||||
}
|
||||
@ -183,7 +212,6 @@ zfs_znode_move_impl(znode_t *ozp, znode_t *nzp)
|
||||
|
||||
nzp->z_id = ozp->z_id;
|
||||
ASSERT(ozp->z_dirlocks == NULL); /* znode not in use */
|
||||
ASSERT(avl_numnodes(&ozp->z_range_avl) == 0);
|
||||
nzp->z_unlinked = ozp->z_unlinked;
|
||||
nzp->z_atime_dirty = ozp->z_atime_dirty;
|
||||
nzp->z_zn_prefetch = ozp->z_zn_prefetch;
|
||||
@ -1569,20 +1597,20 @@ zfs_extend(znode_t *zp, uint64_t end)
|
||||
{
|
||||
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
|
||||
dmu_tx_t *tx;
|
||||
rl_t *rl;
|
||||
locked_range_t *lr;
|
||||
uint64_t newblksz;
|
||||
int error;
|
||||
|
||||
/*
|
||||
* We will change zp_size, lock the whole file.
|
||||
*/
|
||||
rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER);
|
||||
lr = rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_WRITER);
|
||||
|
||||
/*
|
||||
* Nothing to do if file already at desired length.
|
||||
*/
|
||||
if (end <= zp->z_size) {
|
||||
zfs_range_unlock(rl);
|
||||
rangelock_exit(lr);
|
||||
return (0);
|
||||
}
|
||||
tx = dmu_tx_create(zfsvfs->z_os);
|
||||
@ -1612,7 +1640,7 @@ zfs_extend(znode_t *zp, uint64_t end)
|
||||
error = dmu_tx_assign(tx, TXG_WAIT);
|
||||
if (error) {
|
||||
dmu_tx_abort(tx);
|
||||
zfs_range_unlock(rl);
|
||||
rangelock_exit(lr);
|
||||
return (error);
|
||||
}
|
||||
|
||||
@ -1626,7 +1654,7 @@ zfs_extend(znode_t *zp, uint64_t end)
|
||||
|
||||
vnode_pager_setsize(ZTOV(zp), end);
|
||||
|
||||
zfs_range_unlock(rl);
|
||||
rangelock_exit(lr);
|
||||
|
||||
dmu_tx_commit(tx);
|
||||
|
||||
@ -1646,19 +1674,19 @@ static int
|
||||
zfs_free_range(znode_t *zp, uint64_t off, uint64_t len)
|
||||
{
|
||||
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
|
||||
rl_t *rl;
|
||||
locked_range_t *lr;
|
||||
int error;
|
||||
|
||||
/*
|
||||
* Lock the range being freed.
|
||||
*/
|
||||
rl = zfs_range_lock(zp, off, len, RL_WRITER);
|
||||
lr = rangelock_enter(&zp->z_rangelock, off, len, RL_WRITER);
|
||||
|
||||
/*
|
||||
* Nothing to do if file already at desired length.
|
||||
*/
|
||||
if (off >= zp->z_size) {
|
||||
zfs_range_unlock(rl);
|
||||
rangelock_exit(lr);
|
||||
return (0);
|
||||
}
|
||||
|
||||
@ -1676,7 +1704,7 @@ zfs_free_range(znode_t *zp, uint64_t off, uint64_t len)
|
||||
vnode_pager_setsize(ZTOV(zp), off);
|
||||
}
|
||||
|
||||
zfs_range_unlock(rl);
|
||||
rangelock_exit(lr);
|
||||
|
||||
return (error);
|
||||
}
|
||||
@ -1695,7 +1723,7 @@ zfs_trunc(znode_t *zp, uint64_t end)
|
||||
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
|
||||
vnode_t *vp = ZTOV(zp);
|
||||
dmu_tx_t *tx;
|
||||
rl_t *rl;
|
||||
locked_range_t *lr;
|
||||
int error;
|
||||
sa_bulk_attr_t bulk[2];
|
||||
int count = 0;
|
||||
@ -1703,20 +1731,20 @@ zfs_trunc(znode_t *zp, uint64_t end)
|
||||
/*
|
||||
* We will change zp_size, lock the whole file.
|
||||
*/
|
||||
rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER);
|
||||
lr = rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_WRITER);
|
||||
|
||||
/*
|
||||
* Nothing to do if file already at desired length.
|
||||
*/
|
||||
if (end >= zp->z_size) {
|
||||
zfs_range_unlock(rl);
|
||||
rangelock_exit(lr);
|
||||
return (0);
|
||||
}
|
||||
|
||||
error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, end,
|
||||
DMU_OBJECT_END);
|
||||
if (error) {
|
||||
zfs_range_unlock(rl);
|
||||
rangelock_exit(lr);
|
||||
return (error);
|
||||
}
|
||||
tx = dmu_tx_create(zfsvfs->z_os);
|
||||
@ -1726,7 +1754,7 @@ zfs_trunc(znode_t *zp, uint64_t end)
|
||||
error = dmu_tx_assign(tx, TXG_WAIT);
|
||||
if (error) {
|
||||
dmu_tx_abort(tx);
|
||||
zfs_range_unlock(rl);
|
||||
rangelock_exit(lr);
|
||||
return (error);
|
||||
}
|
||||
|
||||
@ -1751,7 +1779,7 @@ zfs_trunc(znode_t *zp, uint64_t end)
|
||||
*/
|
||||
vnode_pager_setsize(vp, end);
|
||||
|
||||
zfs_range_unlock(rl);
|
||||
rangelock_exit(lr);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
@ -94,6 +94,7 @@
|
||||
#include <sys/zio_checksum.h>
|
||||
#include <sys/zil_impl.h>
|
||||
#include <sys/filio.h>
|
||||
#include <sys/zfs_rlock.h>
|
||||
|
||||
#include <geom/geom.h>
|
||||
|
||||
@ -173,7 +174,7 @@ typedef struct zvol_state {
|
||||
uint32_t zv_sync_cnt; /* synchronous open count */
|
||||
zilog_t *zv_zilog; /* ZIL handle */
|
||||
list_t zv_extents; /* List of extents for dump */
|
||||
znode_t zv_znode; /* for range locking */
|
||||
rangelock_t zv_rangelock;
|
||||
dnode_t *zv_dn; /* dnode hold */
|
||||
#ifndef illumos
|
||||
int zv_state;
|
||||
@ -737,9 +738,7 @@ zvol_create_minor(const char *name)
|
||||
zv->zv_objset = os;
|
||||
if (dmu_objset_is_snapshot(os) || !spa_writeable(dmu_objset_spa(os)))
|
||||
zv->zv_flags |= ZVOL_RDONLY;
|
||||
mutex_init(&zv->zv_znode.z_range_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
avl_create(&zv->zv_znode.z_range_avl, zfs_range_compare,
|
||||
sizeof (rl_t), offsetof(rl_t, r_node));
|
||||
rangelock_init(&zv->zv_rangelock, NULL, NULL);
|
||||
list_create(&zv->zv_extents, sizeof (zvol_extent_t),
|
||||
offsetof(zvol_extent_t, ze_node));
|
||||
#ifdef illumos
|
||||
@ -809,8 +808,7 @@ zvol_remove_zv(zvol_state_t *zv)
|
||||
}
|
||||
#endif
|
||||
|
||||
avl_destroy(&zv->zv_znode.z_range_avl);
|
||||
mutex_destroy(&zv->zv_znode.z_range_lock);
|
||||
rangelock_fini(&zv->zv_rangelock);
|
||||
|
||||
kmem_free(zv, sizeof (zvol_state_t));
|
||||
#ifdef illumos
|
||||
@ -1328,7 +1326,7 @@ zvol_get_done(zgd_t *zgd, int error)
|
||||
if (zgd->zgd_db)
|
||||
dmu_buf_rele(zgd->zgd_db, zgd);
|
||||
|
||||
zfs_range_unlock(zgd->zgd_rl);
|
||||
rangelock_exit(zgd->zgd_lr);
|
||||
|
||||
kmem_free(zgd, sizeof (zgd_t));
|
||||
}
|
||||
@ -1361,7 +1359,7 @@ zvol_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, zio_t *zio)
|
||||
* we don't have to write the data twice.
|
||||
*/
|
||||
if (buf != NULL) { /* immediate write */
|
||||
zgd->zgd_rl = zfs_range_lock(&zv->zv_znode, offset, size,
|
||||
zgd->zgd_lr = rangelock_enter(&zv->zv_rangelock, offset, size,
|
||||
RL_READER);
|
||||
error = dmu_read_by_dnode(zv->zv_dn, offset, size, buf,
|
||||
DMU_READ_NO_PREFETCH);
|
||||
@ -1374,7 +1372,7 @@ zvol_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, zio_t *zio)
|
||||
*/
|
||||
size = zv->zv_volblocksize;
|
||||
offset = P2ALIGN(offset, size);
|
||||
zgd->zgd_rl = zfs_range_lock(&zv->zv_znode, offset, size,
|
||||
zgd->zgd_lr = rangelock_enter(&zv->zv_rangelock, offset, size,
|
||||
RL_READER);
|
||||
error = dmu_buf_hold_by_dnode(zv->zv_dn, offset, zgd, &db,
|
||||
DMU_READ_NO_PREFETCH);
|
||||
@ -1582,7 +1580,6 @@ zvol_strategy(struct bio *bp)
|
||||
size_t resid;
|
||||
char *addr;
|
||||
objset_t *os;
|
||||
rl_t *rl;
|
||||
int error = 0;
|
||||
#ifdef illumos
|
||||
boolean_t doread = bp->b_flags & B_READ;
|
||||
@ -1688,7 +1685,7 @@ zvol_strategy(struct bio *bp)
|
||||
* There must be no buffer changes when doing a dmu_sync() because
|
||||
* we can't change the data whilst calculating the checksum.
|
||||
*/
|
||||
rl = zfs_range_lock(&zv->zv_znode, off, resid,
|
||||
locked_range_t *lr = rangelock_enter(&zv->zv_rangelock, off, resid,
|
||||
doread ? RL_READER : RL_WRITER);
|
||||
|
||||
#ifndef illumos
|
||||
@ -1745,7 +1742,7 @@ zvol_strategy(struct bio *bp)
|
||||
#ifndef illumos
|
||||
unlock:
|
||||
#endif
|
||||
zfs_range_unlock(rl);
|
||||
rangelock_exit(lr);
|
||||
|
||||
#ifdef illumos
|
||||
if ((bp->b_resid = resid) == bp->b_bcount)
|
||||
@ -1836,7 +1833,6 @@ zvol_read(struct cdev *dev, struct uio *uio, int ioflag)
|
||||
#endif /* illumos */
|
||||
zvol_state_t *zv;
|
||||
uint64_t volsize;
|
||||
rl_t *rl;
|
||||
int error = 0;
|
||||
|
||||
#ifdef illumos
|
||||
@ -1861,8 +1857,8 @@ zvol_read(struct cdev *dev, struct uio *uio, int ioflag)
|
||||
}
|
||||
#endif
|
||||
|
||||
rl = zfs_range_lock(&zv->zv_znode, uio->uio_loffset, uio->uio_resid,
|
||||
RL_READER);
|
||||
locked_range_t *lr = rangelock_enter(&zv->zv_rangelock,
|
||||
uio->uio_loffset, uio->uio_resid, RL_READER);
|
||||
while (uio->uio_resid > 0 && uio->uio_loffset < volsize) {
|
||||
uint64_t bytes = MIN(uio->uio_resid, DMU_MAX_ACCESS >> 1);
|
||||
|
||||
@ -1878,7 +1874,8 @@ zvol_read(struct cdev *dev, struct uio *uio, int ioflag)
|
||||
break;
|
||||
}
|
||||
}
|
||||
zfs_range_unlock(rl);
|
||||
rangelock_exit(lr);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
@ -1895,7 +1892,6 @@ zvol_write(struct cdev *dev, struct uio *uio, int ioflag)
|
||||
#endif /* illumos */
|
||||
zvol_state_t *zv;
|
||||
uint64_t volsize;
|
||||
rl_t *rl;
|
||||
int error = 0;
|
||||
boolean_t sync;
|
||||
|
||||
@ -1926,8 +1922,8 @@ zvol_write(struct cdev *dev, struct uio *uio, int ioflag)
|
||||
#endif
|
||||
(zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS);
|
||||
|
||||
rl = zfs_range_lock(&zv->zv_znode, uio->uio_loffset, uio->uio_resid,
|
||||
RL_WRITER);
|
||||
locked_range_t *lr = rangelock_enter(&zv->zv_rangelock,
|
||||
uio->uio_loffset, uio->uio_resid, RL_WRITER);
|
||||
while (uio->uio_resid > 0 && uio->uio_loffset < volsize) {
|
||||
uint64_t bytes = MIN(uio->uio_resid, DMU_MAX_ACCESS >> 1);
|
||||
uint64_t off = uio->uio_loffset;
|
||||
@ -1950,7 +1946,8 @@ zvol_write(struct cdev *dev, struct uio *uio, int ioflag)
|
||||
if (error)
|
||||
break;
|
||||
}
|
||||
zfs_range_unlock(rl);
|
||||
rangelock_exit(lr);
|
||||
|
||||
if (sync)
|
||||
zil_commit(zv->zv_zilog, ZVOL_OBJ);
|
||||
return (error);
|
||||
@ -2042,7 +2039,7 @@ zvol_get_volume_params(minor_t minor, uint64_t *blksize,
|
||||
*minor_hdl = zv;
|
||||
*objset_hdl = zv->zv_objset;
|
||||
*zil_hdl = zv->zv_zilog;
|
||||
*rl_hdl = &zv->zv_znode;
|
||||
*rl_hdl = &zv->zv_rangelock;
|
||||
*dnode_hdl = zv->zv_dn;
|
||||
return (0);
|
||||
}
|
||||
@ -2123,7 +2120,7 @@ zvol_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp)
|
||||
zvol_state_t *zv;
|
||||
struct dk_callback *dkc;
|
||||
int error = 0;
|
||||
rl_t *rl;
|
||||
locked_range_t *lr;
|
||||
|
||||
mutex_enter(&zfsdev_state_lock);
|
||||
|
||||
@ -2240,19 +2237,19 @@ zvol_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp)
|
||||
break;
|
||||
|
||||
case DKIOCDUMPINIT:
|
||||
rl = zfs_range_lock(&zv->zv_znode, 0, zv->zv_volsize,
|
||||
lr = rangelock_enter(&zv->zv_rangelock, 0, zv->zv_volsize,
|
||||
RL_WRITER);
|
||||
error = zvol_dumpify(zv);
|
||||
zfs_range_unlock(rl);
|
||||
rangelock_exit(lr);
|
||||
break;
|
||||
|
||||
case DKIOCDUMPFINI:
|
||||
if (!(zv->zv_flags & ZVOL_DUMPIFIED))
|
||||
break;
|
||||
rl = zfs_range_lock(&zv->zv_znode, 0, zv->zv_volsize,
|
||||
lr = rangelock_enter(&zv->zv_rangelock, 0, zv->zv_volsize,
|
||||
RL_WRITER);
|
||||
error = zvol_dump_fini(zv);
|
||||
zfs_range_unlock(rl);
|
||||
rangelock_exit(lr);
|
||||
break;
|
||||
|
||||
case DKIOCFREE:
|
||||
@ -2295,7 +2292,7 @@ zvol_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp)
|
||||
length = end - start;
|
||||
}
|
||||
|
||||
rl = zfs_range_lock(&zv->zv_znode, start, length,
|
||||
lr = rangelock_enter(&zv->zv_rangelock, start, length,
|
||||
RL_WRITER);
|
||||
tx = dmu_tx_create(zv->zv_objset);
|
||||
error = dmu_tx_assign(tx, TXG_WAIT);
|
||||
@ -2309,7 +2306,7 @@ zvol_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp)
|
||||
ZVOL_OBJ, start, length);
|
||||
}
|
||||
|
||||
zfs_range_unlock(rl);
|
||||
rangelock_exit(lr);
|
||||
|
||||
if (error != 0)
|
||||
break;
|
||||
@ -3166,7 +3163,7 @@ static int
|
||||
zvol_d_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread *td)
|
||||
{
|
||||
zvol_state_t *zv;
|
||||
rl_t *rl;
|
||||
locked_range_t *lr;
|
||||
off_t offset, length;
|
||||
int i, error;
|
||||
boolean_t sync;
|
||||
@ -3203,7 +3200,8 @@ zvol_d_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct threa
|
||||
break;
|
||||
}
|
||||
|
||||
rl = zfs_range_lock(&zv->zv_znode, offset, length, RL_WRITER);
|
||||
lr = rangelock_enter(&zv->zv_rangelock, offset, length,
|
||||
RL_WRITER);
|
||||
dmu_tx_t *tx = dmu_tx_create(zv->zv_objset);
|
||||
error = dmu_tx_assign(tx, TXG_WAIT);
|
||||
if (error != 0) {
|
||||
@ -3216,7 +3214,7 @@ zvol_d_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct threa
|
||||
error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ,
|
||||
offset, length);
|
||||
}
|
||||
zfs_range_unlock(rl);
|
||||
rangelock_exit(lr);
|
||||
if (sync)
|
||||
zil_commit(zv->zv_zilog, ZVOL_OBJ);
|
||||
break;
|
||||
|
Loading…
Reference in New Issue
Block a user