1
0
mirror of https://git.FreeBSD.org/src.git synced 2024-10-18 02:19:39 +00:00

libthr: Patch to reduce latency to acquire+release a pthread mutex.

The acquisition and release of an uncontended default/normal pthread
mutex on FreeBSD is suprisingly slow, e.g., pthread wrlocks and binary
semaphores both exhibit roughly 33% lower latency, while default/normal
mutexes on Linux exhibit roughly 67% lower latency than FreeBSD. This is
likely explained by the fact that AFAICT in the best case to acquire an
uncontended mutex on Linux one need touch only 1 page and read+modify
only 1 cacheline, whereas on FreeBSD we need to touch at least 4 pages,
read 6 cachelines, and modify at least 4 cachelines.

This patch does not address the pthread mutex architecture. Instead,
it improves performance by adding the __always_inline attribute to
mutex_lock_common() and mutex_unlock_common() to encourage constant
folding and propagation, thereby lowering the latency to acquire and
release a mutex due to a shorter code path with fewer compares, jumps,
and mispredicts.

With this patch on a stock build I see a reduction in latency of roughly
7% for default/normal mutexes, and 17% for robust mutexes. When built
without PTHREADS_ASSERTIONS enabled I see a reduction in latency of
roughly 15% and 26%, respectively. Suprisingly, I see similar reductions
in latency for heavily contended mutexes.

By default, this patch increases the size of libthr.so.3 by 2448 bytes,
but when built without PTHREAD_ASSERTIONS enabled it only increases by
448 bytes.

Reviewed by:	jhb (previous version), kib
MFC after:	1 week
Differential revision:	https://reviews.freebsd.org/D40912
This commit is contained in:
Greg Becker 2023-07-07 17:03:14 -05:00 committed by Konstantin Belousov
parent bfa51318de
commit b370ef156a

View File

@ -596,7 +596,7 @@ check_and_init_mutex(pthread_mutex_t *mutex, struct pthread_mutex **m)
*m = *mutex;
ret = 0;
if (*m == THR_PSHARED_PTR) {
if (__predict_false(*m == THR_PSHARED_PTR)) {
*m = __thr_pshared_offpage(mutex, 0);
if (*m == NULL)
ret = EINVAL;
@ -714,7 +714,7 @@ mutex_lock_sleep(struct pthread *curthread, struct pthread_mutex *m,
return (ret);
}
static inline int
static __always_inline int
mutex_lock_common(struct pthread_mutex *m, const struct timespec *abstime,
bool cvattach, bool rb_onlist)
{
@ -728,7 +728,7 @@ mutex_lock_common(struct pthread_mutex *m, const struct timespec *abstime,
if (!rb_onlist)
robust = _mutex_enter_robust(curthread, m);
ret = _thr_umutex_trylock2(&m->m_lock, TID(curthread));
if (ret == 0 || ret == EOWNERDEAD) {
if (__predict_true(ret == 0) || ret == EOWNERDEAD) {
enqueue_mutex(curthread, m, ret);
if (ret == EOWNERDEAD)
m->m_lock.m_flags |= UMUTEX_NONCONSISTENT;
@ -951,7 +951,7 @@ mutex_self_lock(struct pthread_mutex *m, const struct timespec *abstime)
return (ret);
}
static int
static __always_inline int
mutex_unlock_common(struct pthread_mutex *m, bool cv, int *mtx_defer)
{
struct pthread *curthread;