mirror of
https://git.FreeBSD.org/src.git
synced 2024-12-17 10:26:15 +00:00
5008 lock contention (rrw_exit) while running a read only load
Reviewed by: Matthew Ahrens <matthew.ahrens@delphix.com> Reviewed by: George Wilson <george.wilson@delphix.com> Reviewed by: Alex Reece <alex.reece@delphix.com> Reviewed by: Christopher Siden <christopher.siden@delphix.com> Reviewed by: Richard Yao <ryao@gentoo.org> Reviewed by: Saso Kiselkov <skiselkov.ml@gmail.com> Approved by: Garrett D'Amore <garrett@damore.org> illumos/illumos-gate@c9030f6c93
This commit is contained in:
parent
747c0e4125
commit
4fa00fc6d6
Notes:
svn2git
2020-12-20 02:59:44 +00:00
svn path=/vendor-sys/illumos/dist/; revision=268852
@ -286,3 +286,91 @@ rrw_tsd_destroy(void *arg)
|
||||
(void *)curthread, (void *)rn->rn_rrl);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* A reader-mostly lock implementation, tuning above reader-writer locks
|
||||
* for hightly parallel read acquisitions, while pessimizing writes.
|
||||
*
|
||||
* The idea is to split single busy lock into array of locks, so that
|
||||
* each reader can lock only one of them for read, depending on result
|
||||
* of simple hash function. That proportionally reduces lock congestion.
|
||||
* Writer same time has to sequentially aquire write on all the locks.
|
||||
* That makes write aquisition proportionally slower, but in places where
|
||||
* it is used (filesystem unmount) performance is not critical.
|
||||
*
|
||||
* All the functions below are direct wrappers around functions above.
|
||||
*/
|
||||
void
|
||||
rrm_init(rrmlock_t *rrl, boolean_t track_all)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < RRM_NUM_LOCKS; i++)
|
||||
rrw_init(&rrl->locks[i], track_all);
|
||||
}
|
||||
|
||||
void
|
||||
rrm_destroy(rrmlock_t *rrl)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < RRM_NUM_LOCKS; i++)
|
||||
rrw_destroy(&rrl->locks[i]);
|
||||
}
|
||||
|
||||
void
|
||||
rrm_enter(rrmlock_t *rrl, krw_t rw, void *tag)
|
||||
{
|
||||
if (rw == RW_READER)
|
||||
rrm_enter_read(rrl, tag);
|
||||
else
|
||||
rrm_enter_write(rrl);
|
||||
}
|
||||
|
||||
/*
|
||||
* This maps the current thread to a specific lock. Note that the lock
|
||||
* must be released by the same thread that acquired it. We do this
|
||||
* mapping by taking the thread pointer mod a prime number. We examine
|
||||
* only the low 32 bits of the thread pointer, because 32-bit division
|
||||
* is faster than 64-bit division, and the high 32 bits have little
|
||||
* entropy anyway.
|
||||
*/
|
||||
#define RRM_TD_LOCK() (((uint32_t)(uintptr_t)(curthread)) % RRM_NUM_LOCKS)
|
||||
|
||||
void
|
||||
rrm_enter_read(rrmlock_t *rrl, void *tag)
|
||||
{
|
||||
rrw_enter_read(&rrl->locks[RRM_TD_LOCK()], tag);
|
||||
}
|
||||
|
||||
void
|
||||
rrm_enter_write(rrmlock_t *rrl)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < RRM_NUM_LOCKS; i++)
|
||||
rrw_enter_write(&rrl->locks[i]);
|
||||
}
|
||||
|
||||
void
|
||||
rrm_exit(rrmlock_t *rrl, void *tag)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (rrl->locks[0].rr_writer == curthread) {
|
||||
for (i = 0; i < RRM_NUM_LOCKS; i++)
|
||||
rrw_exit(&rrl->locks[i], tag);
|
||||
} else {
|
||||
rrw_exit(&rrl->locks[RRM_TD_LOCK()], tag);
|
||||
}
|
||||
}
|
||||
|
||||
boolean_t
|
||||
rrm_held(rrmlock_t *rrl, krw_t rw)
|
||||
{
|
||||
if (rw == RW_WRITER) {
|
||||
return (rrw_held(&rrl->locks[0], rw));
|
||||
} else {
|
||||
return (rrw_held(&rrl->locks[RRM_TD_LOCK()], rw));
|
||||
}
|
||||
}
|
||||
|
@ -80,6 +80,31 @@ void rrw_tsd_destroy(void *arg);
|
||||
#define RRW_LOCK_HELD(x) \
|
||||
(rrw_held(x, RW_WRITER) || rrw_held(x, RW_READER))
|
||||
|
||||
/*
|
||||
* A reader-mostly lock implementation, tuning above reader-writer locks
|
||||
* for hightly parallel read acquisitions, pessimizing write acquisitions.
|
||||
*
|
||||
* This should be a prime number. See comment in rrwlock.c near
|
||||
* RRM_TD_LOCK() for details.
|
||||
*/
|
||||
#define RRM_NUM_LOCKS 17
|
||||
typedef struct rrmlock {
|
||||
rrwlock_t locks[RRM_NUM_LOCKS];
|
||||
} rrmlock_t;
|
||||
|
||||
void rrm_init(rrmlock_t *rrl, boolean_t track_all);
|
||||
void rrm_destroy(rrmlock_t *rrl);
|
||||
void rrm_enter(rrmlock_t *rrl, krw_t rw, void *tag);
|
||||
void rrm_enter_read(rrmlock_t *rrl, void *tag);
|
||||
void rrm_enter_write(rrmlock_t *rrl);
|
||||
void rrm_exit(rrmlock_t *rrl, void *tag);
|
||||
boolean_t rrm_held(rrmlock_t *rrl, krw_t rw);
|
||||
|
||||
#define RRM_READ_HELD(x) rrm_held(x, RW_READER)
|
||||
#define RRM_WRITE_HELD(x) rrm_held(x, RW_WRITER)
|
||||
#define RRM_LOCK_HELD(x) \
|
||||
(rrm_held(x, RW_WRITER) || rrm_held(x, RW_READER))
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@ -64,7 +64,7 @@ struct zfsvfs {
|
||||
int z_norm; /* normalization flags */
|
||||
boolean_t z_atime; /* enable atimes mount option */
|
||||
boolean_t z_unmounted; /* unmounted */
|
||||
rrwlock_t z_teardown_lock;
|
||||
rrmlock_t z_teardown_lock;
|
||||
krwlock_t z_teardown_inactive_lock;
|
||||
list_t z_all_znodes; /* all vnodes in the fs */
|
||||
kmutex_t z_znodes_lock; /* lock for z_all_znodes */
|
||||
|
@ -238,7 +238,7 @@ typedef struct znode {
|
||||
/* Called on entry to each ZFS vnode and vfs operation */
|
||||
#define ZFS_ENTER(zfsvfs) \
|
||||
{ \
|
||||
rrw_enter_read(&(zfsvfs)->z_teardown_lock, FTAG); \
|
||||
rrm_enter_read(&(zfsvfs)->z_teardown_lock, FTAG); \
|
||||
if ((zfsvfs)->z_unmounted) { \
|
||||
ZFS_EXIT(zfsvfs); \
|
||||
return (EIO); \
|
||||
@ -246,7 +246,7 @@ typedef struct znode {
|
||||
}
|
||||
|
||||
/* Must be called before exiting the vop */
|
||||
#define ZFS_EXIT(zfsvfs) rrw_exit(&(zfsvfs)->z_teardown_lock, FTAG)
|
||||
#define ZFS_EXIT(zfsvfs) rrm_exit(&(zfsvfs)->z_teardown_lock, FTAG)
|
||||
|
||||
/* Verifies the znode is valid */
|
||||
#define ZFS_VERIFY_ZP(zp) \
|
||||
|
@ -1420,7 +1420,7 @@ zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp, boolean_t writer)
|
||||
if (getzfsvfs(name, zfvp) != 0)
|
||||
error = zfsvfs_create(name, zfvp);
|
||||
if (error == 0) {
|
||||
rrw_enter(&(*zfvp)->z_teardown_lock, (writer) ? RW_WRITER :
|
||||
rrm_enter(&(*zfvp)->z_teardown_lock, (writer) ? RW_WRITER :
|
||||
RW_READER, tag);
|
||||
if ((*zfvp)->z_unmounted) {
|
||||
/*
|
||||
@ -1428,7 +1428,7 @@ zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp, boolean_t writer)
|
||||
* thread should be just about to disassociate the
|
||||
* objset from the zfsvfs.
|
||||
*/
|
||||
rrw_exit(&(*zfvp)->z_teardown_lock, tag);
|
||||
rrm_exit(&(*zfvp)->z_teardown_lock, tag);
|
||||
return (SET_ERROR(EBUSY));
|
||||
}
|
||||
}
|
||||
@ -1438,7 +1438,7 @@ zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp, boolean_t writer)
|
||||
static void
|
||||
zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag)
|
||||
{
|
||||
rrw_exit(&zfsvfs->z_teardown_lock, tag);
|
||||
rrm_exit(&zfsvfs->z_teardown_lock, tag);
|
||||
|
||||
if (zfsvfs->z_vfs) {
|
||||
VFS_RELE(zfsvfs->z_vfs);
|
||||
|
@ -1004,7 +1004,7 @@ zfsvfs_create(const char *osname, zfsvfs_t **zfvp)
|
||||
mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
|
||||
offsetof(znode_t, z_link_node));
|
||||
rrw_init(&zfsvfs->z_teardown_lock, B_FALSE);
|
||||
rrm_init(&zfsvfs->z_teardown_lock, B_FALSE);
|
||||
rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL);
|
||||
rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL);
|
||||
for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
|
||||
@ -1119,7 +1119,7 @@ zfsvfs_free(zfsvfs_t *zfsvfs)
|
||||
mutex_destroy(&zfsvfs->z_znodes_lock);
|
||||
mutex_destroy(&zfsvfs->z_lock);
|
||||
list_destroy(&zfsvfs->z_all_znodes);
|
||||
rrw_destroy(&zfsvfs->z_teardown_lock);
|
||||
rrm_destroy(&zfsvfs->z_teardown_lock);
|
||||
rw_destroy(&zfsvfs->z_teardown_inactive_lock);
|
||||
rw_destroy(&zfsvfs->z_fuid_lock);
|
||||
for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
|
||||
@ -1784,7 +1784,7 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
|
||||
{
|
||||
znode_t *zp;
|
||||
|
||||
rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG);
|
||||
rrm_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG);
|
||||
|
||||
if (!unmounting) {
|
||||
/*
|
||||
@ -1814,7 +1814,7 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
|
||||
*/
|
||||
if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) {
|
||||
rw_exit(&zfsvfs->z_teardown_inactive_lock);
|
||||
rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
|
||||
rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
|
||||
return (SET_ERROR(EIO));
|
||||
}
|
||||
|
||||
@ -1841,7 +1841,7 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
|
||||
*/
|
||||
if (unmounting) {
|
||||
zfsvfs->z_unmounted = B_TRUE;
|
||||
rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
|
||||
rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
|
||||
rw_exit(&zfsvfs->z_teardown_inactive_lock);
|
||||
}
|
||||
|
||||
@ -2073,7 +2073,7 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname)
|
||||
znode_t *zp;
|
||||
uint64_t sa_obj = 0;
|
||||
|
||||
ASSERT(RRW_WRITE_HELD(&zfsvfs->z_teardown_lock));
|
||||
ASSERT(RRM_WRITE_HELD(&zfsvfs->z_teardown_lock));
|
||||
ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock));
|
||||
|
||||
/*
|
||||
@ -2129,7 +2129,7 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname)
|
||||
bail:
|
||||
/* release the VOPs */
|
||||
rw_exit(&zfsvfs->z_teardown_inactive_lock);
|
||||
rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
|
||||
rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
|
||||
|
||||
if (err) {
|
||||
/*
|
||||
|
@ -276,7 +276,7 @@ zfs_znode_move(void *buf, void *newbuf, size_t size, void *arg)
|
||||
* can safely ensure that the filesystem is not and will not be
|
||||
* unmounted. The next statement is equivalent to ZFS_ENTER().
|
||||
*/
|
||||
rrw_enter(&zfsvfs->z_teardown_lock, RW_READER, FTAG);
|
||||
rrm_enter(&zfsvfs->z_teardown_lock, RW_READER, FTAG);
|
||||
if (zfsvfs->z_unmounted) {
|
||||
ZFS_EXIT(zfsvfs);
|
||||
rw_exit(&zfsvfs_lock);
|
||||
|
Loading…
Reference in New Issue
Block a user