mirror of
https://git.FreeBSD.org/src.git
synced 2024-12-29 12:03:03 +00:00
Import CK as of commit b19ed4c6a56ec93215ab567ba18ba61bf1cfbac8
It should fix ck_pr_[load|store]_ptr on mips and riscv, make sure no *fence instructions are used on i386, as older cpus don't support it, and make sure we don't rely on gcc builtins that can lead to calls to libatomic when linked with -O0. MFC after: 1 week
This commit is contained in:
commit
271ce40239
Notes:
svn2git
2020-12-20 02:59:44 +00:00
svn path=/head/; revision=331898
@ -8,4 +8,5 @@
|
|||||||
*/regressions
|
*/regressions
|
||||||
*/tools
|
*/tools
|
||||||
*/include/ck_md.h.in
|
*/include/ck_md.h.in
|
||||||
|
*/include/freebsd/ck_md.h.in
|
||||||
*/src/Makefile.in
|
*/src/Makefile.in
|
||||||
|
@ -104,41 +104,35 @@
|
|||||||
#define CK_CC_TYPEOF(X, DEFAULT) (DEFAULT)
|
#define CK_CC_TYPEOF(X, DEFAULT) (DEFAULT)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#define CK_F_CC_FFS_G(L, T) \
|
||||||
|
CK_CC_INLINE static int \
|
||||||
|
ck_cc_##L(T v) \
|
||||||
|
{ \
|
||||||
|
unsigned int i; \
|
||||||
|
\
|
||||||
|
if (v == 0) \
|
||||||
|
return 0; \
|
||||||
|
\
|
||||||
|
for (i = 1; (v & 1) == 0; i++, v >>= 1); \
|
||||||
|
return i; \
|
||||||
|
}
|
||||||
|
|
||||||
#ifndef CK_F_CC_FFS
|
#ifndef CK_F_CC_FFS
|
||||||
#define CK_F_CC_FFS
|
#define CK_F_CC_FFS
|
||||||
CK_CC_INLINE static int
|
CK_F_CC_FFS_G(ffs, unsigned int)
|
||||||
ck_cc_ffs(unsigned int x)
|
#endif /* CK_F_CC_FFS */
|
||||||
{
|
|
||||||
unsigned int i;
|
|
||||||
|
|
||||||
if (x == 0)
|
#ifndef CK_F_CC_FFSL
|
||||||
return 0;
|
#define CK_F_CC_FFSL
|
||||||
|
CK_F_CC_FFS_G(ffsl, unsigned long)
|
||||||
|
#endif /* CK_F_CC_FFSL */
|
||||||
|
|
||||||
for (i = 1; (x & 1) == 0; i++, x >>= 1);
|
#ifndef CK_F_CC_FFSLL
|
||||||
|
#define CK_F_CC_FFSLL
|
||||||
|
CK_F_CC_FFS_G(ffsll, unsigned long long)
|
||||||
|
#endif /* CK_F_CC_FFSLL */
|
||||||
|
|
||||||
return i;
|
#undef CK_F_CC_FFS_G
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef CK_F_CC_CLZ
|
|
||||||
#define CK_F_CC_CLZ
|
|
||||||
#include <ck_limits.h>
|
|
||||||
|
|
||||||
CK_CC_INLINE static int
|
|
||||||
ck_cc_clz(unsigned int x)
|
|
||||||
{
|
|
||||||
unsigned int count, i;
|
|
||||||
|
|
||||||
for (count = 0, i = sizeof(unsigned int) * CHAR_BIT; i > 0; count++) {
|
|
||||||
unsigned int bit = 1U << --i;
|
|
||||||
|
|
||||||
if (x & bit)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
return count;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef CK_F_CC_CTZ
|
#ifndef CK_F_CC_CTZ
|
||||||
#define CK_F_CC_CTZ
|
#define CK_F_CC_CTZ
|
||||||
@ -151,7 +145,6 @@ ck_cc_ctz(unsigned int x)
|
|||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
for (i = 0; (x & 1) == 0; i++, x >>= 1);
|
for (i = 0; (x & 1) == 0; i++, x >>= 1);
|
||||||
|
|
||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -100,10 +100,11 @@ struct ck_hs_stat {
|
|||||||
struct ck_hs_iterator {
|
struct ck_hs_iterator {
|
||||||
void **cursor;
|
void **cursor;
|
||||||
unsigned long offset;
|
unsigned long offset;
|
||||||
|
struct ck_hs_map *map;
|
||||||
};
|
};
|
||||||
typedef struct ck_hs_iterator ck_hs_iterator_t;
|
typedef struct ck_hs_iterator ck_hs_iterator_t;
|
||||||
|
|
||||||
#define CK_HS_ITERATOR_INITIALIZER { NULL, 0 }
|
#define CK_HS_ITERATOR_INITIALIZER { NULL, 0, NULL }
|
||||||
|
|
||||||
/* Convenience wrapper to table hash function. */
|
/* Convenience wrapper to table hash function. */
|
||||||
#define CK_HS_HASH(T, F, K) F((K), (T)->seed)
|
#define CK_HS_HASH(T, F, K) F((K), (T)->seed)
|
||||||
@ -112,6 +113,7 @@ typedef void *ck_hs_apply_fn_t(void *, void *);
|
|||||||
bool ck_hs_apply(ck_hs_t *, unsigned long, const void *, ck_hs_apply_fn_t *, void *);
|
bool ck_hs_apply(ck_hs_t *, unsigned long, const void *, ck_hs_apply_fn_t *, void *);
|
||||||
void ck_hs_iterator_init(ck_hs_iterator_t *);
|
void ck_hs_iterator_init(ck_hs_iterator_t *);
|
||||||
bool ck_hs_next(ck_hs_t *, ck_hs_iterator_t *, void **);
|
bool ck_hs_next(ck_hs_t *, ck_hs_iterator_t *, void **);
|
||||||
|
bool ck_hs_next_spmc(ck_hs_t *, ck_hs_iterator_t *, void **);
|
||||||
bool ck_hs_move(ck_hs_t *, ck_hs_t *, ck_hs_hash_cb_t *,
|
bool ck_hs_move(ck_hs_t *, ck_hs_t *, ck_hs_hash_cb_t *,
|
||||||
ck_hs_compare_cb_t *, struct ck_malloc *);
|
ck_hs_compare_cb_t *, struct ck_malloc *);
|
||||||
bool ck_hs_init(ck_hs_t *, unsigned int, ck_hs_hash_cb_t *,
|
bool ck_hs_init(ck_hs_t *, unsigned int, ck_hs_hash_cb_t *,
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright 2011-2012 Samy Al Bahra.
|
* Copyright 2018 Samy Al Bahra.
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
@ -23,45 +23,95 @@
|
|||||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
* SUCH DAMAGE.
|
* SUCH DAMAGE.
|
||||||
*
|
*
|
||||||
* $FreeBSD$
|
* $FreeBSD: head/sys/contrib/ck/include/ck_md.h 329388 2018-02-16 17:50:06Z cog
|
||||||
|
net $
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This header file is meant for use of Concurrency Kit in the FreeBSD kernel.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef CK_MD_H
|
#ifndef CK_MD_H
|
||||||
#define CK_MD_H
|
#define CK_MD_H
|
||||||
|
|
||||||
|
#include <sys/param.h>
|
||||||
|
|
||||||
|
#ifndef _KERNEL
|
||||||
|
#error This header file is meant for the FreeBSD kernel.
|
||||||
|
#endif /* _KERNEL */
|
||||||
|
|
||||||
#ifndef CK_MD_CACHELINE
|
#ifndef CK_MD_CACHELINE
|
||||||
|
/*
|
||||||
|
* FreeBSD's CACHE_LINE macro is a compile-time maximum cache-line size for an
|
||||||
|
* architecture, defined to be 128 bytes by default on x86*. Even in presence
|
||||||
|
* of adjacent sector prefetch, this doesn't make sense from a modeling
|
||||||
|
* perspective.
|
||||||
|
*/
|
||||||
|
#if defined(__amd64__) || defined(__i386__)
|
||||||
#define CK_MD_CACHELINE (64)
|
#define CK_MD_CACHELINE (64)
|
||||||
#endif
|
#else
|
||||||
|
#define CK_MD_CACHELINE (CACHE_LINE_SIZE)
|
||||||
|
#endif /* !__amd64__ && !__i386__ */
|
||||||
|
#endif /* CK_MD_CACHELINE */
|
||||||
|
|
||||||
#ifndef CK_MD_PAGESIZE
|
#ifndef CK_MD_PAGESIZE
|
||||||
#define CK_MD_PAGESIZE (4096)
|
#define CK_MD_PAGESIZE (PAGE_SIZE)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Once FreeBSD has a mechanism to detect RTM, this can be enabled and RTM
|
||||||
|
* facilities can be called. These facilities refer to TSX.
|
||||||
|
*/
|
||||||
#ifndef CK_MD_RTM_DISABLE
|
#ifndef CK_MD_RTM_DISABLE
|
||||||
#define CK_MD_RTM_DISABLE
|
#define CK_MD_RTM_DISABLE
|
||||||
#endif /* CK_MD_RTM_DISABLE */
|
#endif /* CK_MD_RTM_DISABLE */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Do not enable pointer-packing-related (VMA) optimizations in kernel-space.
|
||||||
|
*/
|
||||||
#ifndef CK_MD_POINTER_PACK_DISABLE
|
#ifndef CK_MD_POINTER_PACK_DISABLE
|
||||||
#define CK_MD_POINTER_PACK_DISABLE
|
#define CK_MD_POINTER_PACK_DISABLE
|
||||||
#endif /* CK_MD_POINTER_PACK_DISABLE */
|
#endif /* CK_MD_POINTER_PACK_DISABLE */
|
||||||
|
|
||||||
#ifndef CK_MD_VMA_BITS_UNKNOWN
|
/*
|
||||||
#define CK_MD_VMA_BITS_UNKNOWN
|
* The following would be used for pointer-packing tricks, disabled for the
|
||||||
|
* kernel.
|
||||||
|
*/
|
||||||
|
#ifndef CK_MD_VMA_BITS_UNKNOWN
|
||||||
|
#define CK_MD_VMA_BITS_UNKNOWN
|
||||||
#endif /* CK_MD_VMA_BITS_UNKNOWN */
|
#endif /* CK_MD_VMA_BITS_UNKNOWN */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Do not enable double operations in kernel-space.
|
||||||
|
*/
|
||||||
#ifndef CK_PR_DISABLE_DOUBLE
|
#ifndef CK_PR_DISABLE_DOUBLE
|
||||||
#define CK_PR_DISABLE_DOUBLE
|
#define CK_PR_DISABLE_DOUBLE
|
||||||
#endif /* CK_PR_DISABLE_DOUBLE */
|
#endif /* CK_PR_DISABLE_DOUBLE */
|
||||||
|
|
||||||
#define CK_VERSION "0.6.0"
|
/*
|
||||||
#define CK_GIT_SHA ""
|
* If building for a uni-processor target, then enable the uniprocessor
|
||||||
|
* feature flag. This, among other things, will remove the lock prefix.
|
||||||
|
*/
|
||||||
|
#ifndef SMP
|
||||||
|
#define CK_MD_UMP
|
||||||
|
#endif /* SMP */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Disable the use of compiler builtin functions.
|
||||||
|
*/
|
||||||
|
#define CK_MD_CC_BUILTIN_DISABLE 1
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* CK expects those, which are normally defined by the build system.
|
* CK expects those, which are normally defined by the build system.
|
||||||
*/
|
*/
|
||||||
#if defined(__i386__) && !defined(__x86__)
|
#if defined(__i386__) && !defined(__x86__)
|
||||||
#define __x86__
|
#define __x86__
|
||||||
|
/*
|
||||||
|
* If x86 becomes more relevant, we may want to consider importing in
|
||||||
|
* __mbk() to avoid potential issues around false sharing.
|
||||||
|
*/
|
||||||
#define CK_MD_TSO
|
#define CK_MD_TSO
|
||||||
|
#define CK_MD_SSE_DISABLE 1
|
||||||
#elif defined(__amd64__)
|
#elif defined(__amd64__)
|
||||||
#define CK_MD_TSO
|
#define CK_MD_TSO
|
||||||
#elif defined(__sparc64__) && !defined(__sparcv9__)
|
#elif defined(__sparc64__) && !defined(__sparcv9__)
|
||||||
@ -73,8 +123,12 @@
|
|||||||
#define __ppc__
|
#define __ppc__
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* If no memory model has been defined, assume RMO. */
|
||||||
#if !defined(CK_MD_RMO) && !defined(CK_MD_TSO) && !defined(CK_MD_PSO)
|
#if !defined(CK_MD_RMO) && !defined(CK_MD_TSO) && !defined(CK_MD_PSO)
|
||||||
#define CK_MD_RMO
|
#define CK_MD_RMO
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#define CK_VERSION "0.7.0"
|
||||||
|
#define CK_GIT_SHA "db5db44"
|
||||||
|
|
||||||
#endif /* CK_MD_H */
|
#endif /* CK_MD_H */
|
||||||
|
@ -43,6 +43,8 @@
|
|||||||
#include "gcc/sparcv9/ck_pr.h"
|
#include "gcc/sparcv9/ck_pr.h"
|
||||||
#elif defined(__ppc64__)
|
#elif defined(__ppc64__)
|
||||||
#include "gcc/ppc64/ck_pr.h"
|
#include "gcc/ppc64/ck_pr.h"
|
||||||
|
#elif defined(__s390x__)
|
||||||
|
#include "gcc/s390x/ck_pr.h"
|
||||||
#elif defined(__ppc__)
|
#elif defined(__ppc__)
|
||||||
#include "gcc/ppc/ck_pr.h"
|
#include "gcc/ppc/ck_pr.h"
|
||||||
#elif defined(__arm__)
|
#elif defined(__arm__)
|
||||||
|
@ -235,7 +235,7 @@ struct { \
|
|||||||
* Singly-linked Tail queue functions.
|
* Singly-linked Tail queue functions.
|
||||||
*/
|
*/
|
||||||
#define CK_STAILQ_CONCAT(head1, head2) do { \
|
#define CK_STAILQ_CONCAT(head1, head2) do { \
|
||||||
if ((head2)->stqh_first == NULL) { \
|
if ((head2)->stqh_first != NULL) { \
|
||||||
ck_pr_store_ptr((head1)->stqh_last, (head2)->stqh_first); \
|
ck_pr_store_ptr((head1)->stqh_last, (head2)->stqh_first); \
|
||||||
ck_pr_fence_store(); \
|
ck_pr_fence_store(); \
|
||||||
(head1)->stqh_last = (head2)->stqh_last; \
|
(head1)->stqh_last = (head2)->stqh_last; \
|
||||||
|
@ -176,23 +176,54 @@ _ck_ring_enqueue_mp(struct ck_ring *ring,
|
|||||||
|
|
||||||
producer = ck_pr_load_uint(&ring->p_head);
|
producer = ck_pr_load_uint(&ring->p_head);
|
||||||
|
|
||||||
do {
|
for (;;) {
|
||||||
/*
|
/*
|
||||||
* The snapshot of producer must be up to date with
|
* The snapshot of producer must be up to date with respect to
|
||||||
* respect to consumer.
|
* consumer.
|
||||||
*/
|
*/
|
||||||
ck_pr_fence_load();
|
ck_pr_fence_load();
|
||||||
consumer = ck_pr_load_uint(&ring->c_head);
|
consumer = ck_pr_load_uint(&ring->c_head);
|
||||||
|
|
||||||
delta = producer + 1;
|
delta = producer + 1;
|
||||||
if (CK_CC_UNLIKELY((delta & mask) == (consumer & mask))) {
|
|
||||||
r = false;
|
/*
|
||||||
goto leave;
|
* Only try to CAS if the producer is not clearly stale (not
|
||||||
|
* less than consumer) and the buffer is definitely not full.
|
||||||
|
*/
|
||||||
|
if (CK_CC_LIKELY((producer - consumer) < mask)) {
|
||||||
|
if (ck_pr_cas_uint_value(&ring->p_head,
|
||||||
|
producer, delta, &producer) == true) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
unsigned int new_producer;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Slow path. Either the buffer is full or we have a
|
||||||
|
* stale snapshot of p_head. Execute a second read of
|
||||||
|
* p_read that must be ordered wrt the snapshot of
|
||||||
|
* c_head.
|
||||||
|
*/
|
||||||
|
ck_pr_fence_load();
|
||||||
|
new_producer = ck_pr_load_uint(&ring->p_head);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Only fail if we haven't made forward progress in
|
||||||
|
* production: the buffer must have been full when we
|
||||||
|
* read new_producer (or we wrapped around UINT_MAX
|
||||||
|
* during this iteration).
|
||||||
|
*/
|
||||||
|
if (producer == new_producer) {
|
||||||
|
r = false;
|
||||||
|
goto leave;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* p_head advanced during this iteration. Try again.
|
||||||
|
*/
|
||||||
|
producer = new_producer;
|
||||||
}
|
}
|
||||||
} while (ck_pr_cas_uint_value(&ring->p_head,
|
}
|
||||||
producer,
|
|
||||||
delta,
|
|
||||||
&producer) == false);
|
|
||||||
|
|
||||||
buffer = (char *)buffer + ts * (producer & mask);
|
buffer = (char *)buffer + ts * (producer & mask);
|
||||||
memcpy(buffer, entry, ts);
|
memcpy(buffer, entry, ts);
|
||||||
|
@ -103,28 +103,26 @@
|
|||||||
#define CK_CC_TYPEOF(X, DEFAULT) __typeof__(X)
|
#define CK_CC_TYPEOF(X, DEFAULT) __typeof__(X)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Portability wrappers for bitwise ops.
|
* Portability wrappers for bitwise operations.
|
||||||
*/
|
*/
|
||||||
|
#ifndef CK_MD_CC_BUILTIN_DISABLE
|
||||||
#define CK_F_CC_FFS
|
#define CK_F_CC_FFS
|
||||||
#define CK_F_CC_CLZ
|
|
||||||
#define CK_F_CC_CTZ
|
|
||||||
#define CK_F_CC_POPCOUNT
|
|
||||||
|
|
||||||
CK_CC_INLINE static int
|
CK_CC_INLINE static int
|
||||||
ck_cc_ffs(unsigned int x)
|
ck_cc_ffs(unsigned int x)
|
||||||
{
|
{
|
||||||
|
|
||||||
return __builtin_ffs(x);
|
return __builtin_ffsl(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define CK_F_CC_FFSL
|
||||||
CK_CC_INLINE static int
|
CK_CC_INLINE static int
|
||||||
ck_cc_clz(unsigned int x)
|
ck_cc_ffsl(unsigned long x)
|
||||||
{
|
{
|
||||||
|
|
||||||
return __builtin_clz(x);
|
return __builtin_ffsll(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define CK_F_CC_CTZ
|
||||||
CK_CC_INLINE static int
|
CK_CC_INLINE static int
|
||||||
ck_cc_ctz(unsigned int x)
|
ck_cc_ctz(unsigned int x)
|
||||||
{
|
{
|
||||||
@ -132,11 +130,12 @@ ck_cc_ctz(unsigned int x)
|
|||||||
return __builtin_ctz(x);
|
return __builtin_ctz(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define CK_F_CC_POPCOUNT
|
||||||
CK_CC_INLINE static int
|
CK_CC_INLINE static int
|
||||||
ck_cc_popcount(unsigned int x)
|
ck_cc_popcount(unsigned int x)
|
||||||
{
|
{
|
||||||
|
|
||||||
return __builtin_popcount(x);
|
return __builtin_popcount(x);
|
||||||
}
|
}
|
||||||
|
#endif /* CK_MD_CC_BUILTIN_DISABLE */
|
||||||
#endif /* CK_GCC_CC_H */
|
#endif /* CK_GCC_CC_H */
|
||||||
|
@ -80,7 +80,7 @@ ck_pr_md_load_ptr(const void *target)
|
|||||||
void *r;
|
void *r;
|
||||||
|
|
||||||
ck_pr_barrier();
|
ck_pr_barrier();
|
||||||
r = CK_CC_DECONST_PTR(CK_PR_ACCESS(target));
|
r = CK_CC_DECONST_PTR(*(volatile void *const*)(target));
|
||||||
ck_pr_barrier();
|
ck_pr_barrier();
|
||||||
|
|
||||||
return r;
|
return r;
|
||||||
@ -91,7 +91,7 @@ ck_pr_md_store_ptr(void *target, const void *v)
|
|||||||
{
|
{
|
||||||
|
|
||||||
ck_pr_barrier();
|
ck_pr_barrier();
|
||||||
CK_PR_ACCESS(target) = CK_CC_DECONST_PTR(v);
|
*(volatile void **)target = CK_CC_DECONST_PTR(v);
|
||||||
ck_pr_barrier();
|
ck_pr_barrier();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -76,7 +76,7 @@ CK_PR_FENCE(store, "membar #StoreStore")
|
|||||||
CK_PR_FENCE(store_load, "membar #StoreLoad")
|
CK_PR_FENCE(store_load, "membar #StoreLoad")
|
||||||
CK_PR_FENCE(load, "membar #LoadLoad")
|
CK_PR_FENCE(load, "membar #LoadLoad")
|
||||||
CK_PR_FENCE(load_store, "membar #LoadStore")
|
CK_PR_FENCE(load_store, "membar #LoadStore")
|
||||||
CK_PR_FENCE(memory, "membar #LoadLoad | #LoadStore | #StoreStore | #StoreLoad")
|
CK_PR_FENCE(memory, "membar #MemIssue")
|
||||||
CK_PR_FENCE(acquire, "membar #LoadLoad | #LoadStore")
|
CK_PR_FENCE(acquire, "membar #LoadLoad | #LoadStore")
|
||||||
CK_PR_FENCE(release, "membar #LoadStore | #StoreStore")
|
CK_PR_FENCE(release, "membar #LoadStore | #StoreStore")
|
||||||
CK_PR_FENCE(acqrel, "membar #LoadLoad | #LoadStore | #StoreStore")
|
CK_PR_FENCE(acqrel, "membar #LoadLoad | #LoadStore | #StoreStore")
|
||||||
|
@ -45,15 +45,9 @@
|
|||||||
/* Minimum requirements for the CK_PR interface are met. */
|
/* Minimum requirements for the CK_PR interface are met. */
|
||||||
#define CK_F_PR
|
#define CK_F_PR
|
||||||
|
|
||||||
#ifdef CK_MD_UMP
|
|
||||||
#define CK_PR_LOCK_PREFIX
|
|
||||||
#else
|
|
||||||
#define CK_PR_LOCK_PREFIX "lock "
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Prevent speculative execution in busy-wait loops (P4 <=)
|
* Prevent speculative execution in busy-wait loops (P4 <=) or "predefined
|
||||||
* or "predefined delay".
|
* delay".
|
||||||
*/
|
*/
|
||||||
CK_CC_INLINE static void
|
CK_CC_INLINE static void
|
||||||
ck_pr_stall(void)
|
ck_pr_stall(void)
|
||||||
@ -62,28 +56,52 @@ ck_pr_stall(void)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CK_MD_UMP
|
||||||
|
#define CK_PR_LOCK_PREFIX
|
||||||
|
#define CK_PR_FENCE(T, I) \
|
||||||
|
CK_CC_INLINE static void \
|
||||||
|
ck_pr_fence_strict_##T(void) \
|
||||||
|
{ \
|
||||||
|
__asm__ __volatile__("" ::: "memory"); \
|
||||||
|
return; \
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
#define CK_PR_LOCK_PREFIX "lock "
|
||||||
#define CK_PR_FENCE(T, I) \
|
#define CK_PR_FENCE(T, I) \
|
||||||
CK_CC_INLINE static void \
|
CK_CC_INLINE static void \
|
||||||
ck_pr_fence_strict_##T(void) \
|
ck_pr_fence_strict_##T(void) \
|
||||||
{ \
|
{ \
|
||||||
__asm__ __volatile__(I ::: "memory"); \
|
__asm__ __volatile__(I ::: "memory"); \
|
||||||
|
return; \
|
||||||
}
|
}
|
||||||
|
#endif /* CK_MD_UMP */
|
||||||
|
|
||||||
CK_PR_FENCE(atomic, "sfence")
|
#if defined(CK_MD_SSE_DISABLE)
|
||||||
CK_PR_FENCE(atomic_store, "sfence")
|
/* If SSE is disabled, then use atomic operations for serialization. */
|
||||||
CK_PR_FENCE(atomic_load, "mfence")
|
#define CK_MD_X86_MFENCE "lock addl $0, (%%esp)"
|
||||||
CK_PR_FENCE(store_atomic, "sfence")
|
#define CK_MD_X86_SFENCE CK_MD_X86_MFENCE
|
||||||
CK_PR_FENCE(load_atomic, "mfence")
|
#define CK_MD_X86_LFENCE CK_MD_X86_MFENCE
|
||||||
CK_PR_FENCE(load, "lfence")
|
#else
|
||||||
CK_PR_FENCE(load_store, "mfence")
|
#define CK_MD_X86_SFENCE "sfence"
|
||||||
CK_PR_FENCE(store, "sfence")
|
#define CK_MD_X86_LFENCE "lfence"
|
||||||
CK_PR_FENCE(store_load, "mfence")
|
#define CK_MD_X86_MFENCE "mfence"
|
||||||
CK_PR_FENCE(memory, "mfence")
|
#endif /* !CK_MD_SSE_DISABLE */
|
||||||
CK_PR_FENCE(release, "mfence")
|
|
||||||
CK_PR_FENCE(acquire, "mfence")
|
CK_PR_FENCE(atomic, "")
|
||||||
CK_PR_FENCE(acqrel, "mfence")
|
CK_PR_FENCE(atomic_store, "")
|
||||||
CK_PR_FENCE(lock, "mfence")
|
CK_PR_FENCE(atomic_load, "")
|
||||||
CK_PR_FENCE(unlock, "mfence")
|
CK_PR_FENCE(store_atomic, "")
|
||||||
|
CK_PR_FENCE(load_atomic, "")
|
||||||
|
CK_PR_FENCE(load, CK_MD_X86_LFENCE)
|
||||||
|
CK_PR_FENCE(load_store, CK_MD_X86_MFENCE)
|
||||||
|
CK_PR_FENCE(store, CK_MD_X86_SFENCE)
|
||||||
|
CK_PR_FENCE(store_load, CK_MD_X86_MFENCE)
|
||||||
|
CK_PR_FENCE(memory, CK_MD_X86_MFENCE)
|
||||||
|
CK_PR_FENCE(release, CK_MD_X86_MFENCE)
|
||||||
|
CK_PR_FENCE(acquire, CK_MD_X86_MFENCE)
|
||||||
|
CK_PR_FENCE(acqrel, CK_MD_X86_MFENCE)
|
||||||
|
CK_PR_FENCE(lock, CK_MD_X86_MFENCE)
|
||||||
|
CK_PR_FENCE(unlock, CK_MD_X86_MFENCE)
|
||||||
|
|
||||||
#undef CK_PR_FENCE
|
#undef CK_PR_FENCE
|
||||||
|
|
||||||
|
@ -58,8 +58,8 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Prevent speculative execution in busy-wait loops (P4 <=)
|
* Prevent speculative execution in busy-wait loops (P4 <=) or "predefined
|
||||||
* or "predefined delay".
|
* delay".
|
||||||
*/
|
*/
|
||||||
CK_CC_INLINE static void
|
CK_CC_INLINE static void
|
||||||
ck_pr_stall(void)
|
ck_pr_stall(void)
|
||||||
@ -75,18 +75,39 @@ ck_pr_stall(void)
|
|||||||
__asm__ __volatile__(I ::: "memory"); \
|
__asm__ __volatile__(I ::: "memory"); \
|
||||||
}
|
}
|
||||||
|
|
||||||
CK_PR_FENCE(atomic, "sfence")
|
/* Atomic operations are always serializing. */
|
||||||
CK_PR_FENCE(atomic_store, "sfence")
|
CK_PR_FENCE(atomic, "")
|
||||||
CK_PR_FENCE(atomic_load, "mfence")
|
CK_PR_FENCE(atomic_store, "")
|
||||||
CK_PR_FENCE(store_atomic, "sfence")
|
CK_PR_FENCE(atomic_load, "")
|
||||||
CK_PR_FENCE(load_atomic, "mfence")
|
CK_PR_FENCE(store_atomic, "")
|
||||||
|
CK_PR_FENCE(load_atomic, "")
|
||||||
|
|
||||||
|
/* Traditional fence interface. */
|
||||||
CK_PR_FENCE(load, "lfence")
|
CK_PR_FENCE(load, "lfence")
|
||||||
CK_PR_FENCE(load_store, "mfence")
|
CK_PR_FENCE(load_store, "mfence")
|
||||||
CK_PR_FENCE(store, "sfence")
|
CK_PR_FENCE(store, "sfence")
|
||||||
CK_PR_FENCE(store_load, "mfence")
|
CK_PR_FENCE(store_load, "mfence")
|
||||||
CK_PR_FENCE(memory, "mfence")
|
CK_PR_FENCE(memory, "mfence")
|
||||||
|
|
||||||
|
/* Below are stdatomic-style fences. */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Provides load-store and store-store ordering. However, Intel specifies that
|
||||||
|
* the WC memory model is relaxed. It is likely an sfence *is* sufficient (in
|
||||||
|
* particular, stores are not re-ordered with respect to prior loads and it is
|
||||||
|
* really just the stores that are subject to re-ordering). However, we take
|
||||||
|
* the conservative route as the manuals are too ambiguous for my taste.
|
||||||
|
*/
|
||||||
CK_PR_FENCE(release, "mfence")
|
CK_PR_FENCE(release, "mfence")
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Provides load-load and load-store ordering. The lfence instruction ensures
|
||||||
|
* all prior load operations are complete before any subsequent instructions
|
||||||
|
* actually begin execution. However, the manual also ends up going to describe
|
||||||
|
* WC memory as a relaxed model.
|
||||||
|
*/
|
||||||
CK_PR_FENCE(acquire, "mfence")
|
CK_PR_FENCE(acquire, "mfence")
|
||||||
|
|
||||||
CK_PR_FENCE(acqrel, "mfence")
|
CK_PR_FENCE(acqrel, "mfence")
|
||||||
CK_PR_FENCE(lock, "mfence")
|
CK_PR_FENCE(lock, "mfence")
|
||||||
CK_PR_FENCE(unlock, "mfence")
|
CK_PR_FENCE(unlock, "mfence")
|
||||||
|
@ -111,7 +111,8 @@ ck_spinlock_dec_lock_eb(struct ck_spinlock_dec *lock)
|
|||||||
if (r == true)
|
if (r == true)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
ck_backoff_eb(&backoff);
|
while (ck_pr_load_uint(&lock->value) != 1)
|
||||||
|
ck_backoff_eb(&backoff);
|
||||||
}
|
}
|
||||||
|
|
||||||
ck_pr_fence_lock();
|
ck_pr_fence_lock();
|
||||||
|
@ -105,21 +105,10 @@ ck_hs_map_signal(struct ck_hs_map *map, unsigned long h)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
static bool
|
||||||
ck_hs_iterator_init(struct ck_hs_iterator *iterator)
|
_ck_hs_next(struct ck_hs *hs, struct ck_hs_map *map, struct ck_hs_iterator *i, void **key)
|
||||||
{
|
{
|
||||||
|
|
||||||
iterator->cursor = NULL;
|
|
||||||
iterator->offset = 0;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool
|
|
||||||
ck_hs_next(struct ck_hs *hs, struct ck_hs_iterator *i, void **key)
|
|
||||||
{
|
|
||||||
struct ck_hs_map *map = hs->map;
|
|
||||||
void *value;
|
void *value;
|
||||||
|
|
||||||
if (i->offset >= map->capacity)
|
if (i->offset >= map->capacity)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
@ -129,6 +118,8 @@ ck_hs_next(struct ck_hs *hs, struct ck_hs_iterator *i, void **key)
|
|||||||
#ifdef CK_HS_PP
|
#ifdef CK_HS_PP
|
||||||
if (hs->mode & CK_HS_MODE_OBJECT)
|
if (hs->mode & CK_HS_MODE_OBJECT)
|
||||||
value = CK_HS_VMA(value);
|
value = CK_HS_VMA(value);
|
||||||
|
#else
|
||||||
|
(void)hs; /* Avoid unused parameter warning. */
|
||||||
#endif
|
#endif
|
||||||
i->offset++;
|
i->offset++;
|
||||||
*key = value;
|
*key = value;
|
||||||
@ -139,6 +130,32 @@ ck_hs_next(struct ck_hs *hs, struct ck_hs_iterator *i, void **key)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
ck_hs_iterator_init(struct ck_hs_iterator *iterator)
|
||||||
|
{
|
||||||
|
|
||||||
|
iterator->cursor = NULL;
|
||||||
|
iterator->offset = 0;
|
||||||
|
iterator->map = NULL;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
ck_hs_next(struct ck_hs *hs, struct ck_hs_iterator *i, void **key)
|
||||||
|
{
|
||||||
|
return _ck_hs_next(hs, hs->map, i, key);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
ck_hs_next_spmc(struct ck_hs *hs, struct ck_hs_iterator *i, void **key)
|
||||||
|
{
|
||||||
|
struct ck_hs_map *m = i->map;
|
||||||
|
if (m == NULL) {
|
||||||
|
m = i->map = ck_pr_load_ptr(&hs->map);
|
||||||
|
}
|
||||||
|
return _ck_hs_next(hs, m, i, key);
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
ck_hs_stat(struct ck_hs *hs, struct ck_hs_stat *st)
|
ck_hs_stat(struct ck_hs *hs, struct ck_hs_stat *st)
|
||||||
{
|
{
|
||||||
@ -206,7 +223,7 @@ ck_hs_map_create(struct ck_hs *hs, unsigned long entries)
|
|||||||
map->probe_limit = (unsigned int)limit;
|
map->probe_limit = (unsigned int)limit;
|
||||||
map->probe_maximum = 0;
|
map->probe_maximum = 0;
|
||||||
map->capacity = n_entries;
|
map->capacity = n_entries;
|
||||||
map->step = ck_internal_bsf(n_entries);
|
map->step = ck_cc_ffsl(n_entries);
|
||||||
map->mask = n_entries - 1;
|
map->mask = n_entries - 1;
|
||||||
map->n_entries = 0;
|
map->n_entries = 0;
|
||||||
|
|
||||||
|
@ -171,7 +171,7 @@ ck_ht_map_create(struct ck_ht *table, CK_HT_TYPE entries)
|
|||||||
map->deletions = 0;
|
map->deletions = 0;
|
||||||
map->probe_maximum = 0;
|
map->probe_maximum = 0;
|
||||||
map->capacity = n_entries;
|
map->capacity = n_entries;
|
||||||
map->step = ck_internal_bsf_64(map->capacity);
|
map->step = ck_cc_ffsll(map->capacity);
|
||||||
map->mask = map->capacity - 1;
|
map->mask = map->capacity - 1;
|
||||||
map->n_entries = 0;
|
map->n_entries = 0;
|
||||||
map->entries = (struct ck_ht_entry *)(((uintptr_t)&map[1] + prefix +
|
map->entries = (struct ck_ht_entry *)(((uintptr_t)&map[1] + prefix +
|
||||||
|
@ -88,7 +88,15 @@ static inline uint64_t rotl64 ( uint64_t x, int8_t r )
|
|||||||
|
|
||||||
FORCE_INLINE static uint32_t getblock ( const uint32_t * p, int i )
|
FORCE_INLINE static uint32_t getblock ( const uint32_t * p, int i )
|
||||||
{
|
{
|
||||||
|
#ifdef __s390x__
|
||||||
|
uint32_t res;
|
||||||
|
|
||||||
|
__asm__ (" lrv %0,%1\n"
|
||||||
|
: "=r" (res) : "Q" (p[i]) : "cc", "mem");
|
||||||
|
return res;
|
||||||
|
#else
|
||||||
return p[i];
|
return p[i];
|
||||||
|
#endif /* !__s390x__ */
|
||||||
}
|
}
|
||||||
|
|
||||||
//-----------------------------------------------------------------------------
|
//-----------------------------------------------------------------------------
|
||||||
@ -147,7 +155,9 @@ static inline void MurmurHash3_x86_32 ( const void * key, int len,
|
|||||||
switch(len & 3)
|
switch(len & 3)
|
||||||
{
|
{
|
||||||
case 3: k1 ^= tail[2] << 16;
|
case 3: k1 ^= tail[2] << 16;
|
||||||
|
/* fall through */
|
||||||
case 2: k1 ^= tail[1] << 8;
|
case 2: k1 ^= tail[1] << 8;
|
||||||
|
/* fall through */
|
||||||
case 1: k1 ^= tail[0];
|
case 1: k1 ^= tail[0];
|
||||||
k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
|
k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
|
||||||
};
|
};
|
||||||
@ -196,11 +206,17 @@ static inline uint64_t MurmurHash64A ( const void * key, int len, uint64_t seed
|
|||||||
switch(len & 7)
|
switch(len & 7)
|
||||||
{
|
{
|
||||||
case 7: h ^= (uint64_t)(data2[6]) << 48;
|
case 7: h ^= (uint64_t)(data2[6]) << 48;
|
||||||
|
/* fall through */
|
||||||
case 6: h ^= (uint64_t)(data2[5]) << 40;
|
case 6: h ^= (uint64_t)(data2[5]) << 40;
|
||||||
|
/* fall through */
|
||||||
case 5: h ^= (uint64_t)(data2[4]) << 32;
|
case 5: h ^= (uint64_t)(data2[4]) << 32;
|
||||||
|
/* fall through */
|
||||||
case 4: h ^= (uint64_t)(data2[3]) << 24;
|
case 4: h ^= (uint64_t)(data2[3]) << 24;
|
||||||
|
/* fall through */
|
||||||
case 3: h ^= (uint64_t)(data2[2]) << 16;
|
case 3: h ^= (uint64_t)(data2[2]) << 16;
|
||||||
|
/* fall through */
|
||||||
case 2: h ^= (uint64_t)(data2[1]) << 8;
|
case 2: h ^= (uint64_t)(data2[1]) << 8;
|
||||||
|
/* fall through */
|
||||||
case 1: h ^= (uint64_t)(data2[0]);
|
case 1: h ^= (uint64_t)(data2[0]);
|
||||||
h *= m;
|
h *= m;
|
||||||
};
|
};
|
||||||
@ -249,7 +265,9 @@ static inline uint64_t MurmurHash64B ( const void * key, int len, uint64_t seed
|
|||||||
switch(len)
|
switch(len)
|
||||||
{
|
{
|
||||||
case 3: h2 ^= ((const unsigned char*)data)[2] << 16;
|
case 3: h2 ^= ((const unsigned char*)data)[2] << 16;
|
||||||
|
/* fall through */
|
||||||
case 2: h2 ^= ((const unsigned char*)data)[1] << 8;
|
case 2: h2 ^= ((const unsigned char*)data)[1] << 8;
|
||||||
|
/* fall through */
|
||||||
case 1: h2 ^= ((const unsigned char*)data)[0];
|
case 1: h2 ^= ((const unsigned char*)data)[0];
|
||||||
h2 *= m;
|
h2 *= m;
|
||||||
};
|
};
|
||||||
|
@ -80,40 +80,3 @@ ck_internal_max_32(uint32_t x, uint32_t y)
|
|||||||
|
|
||||||
return x ^ ((x ^ y) & -(x < y));
|
return x ^ ((x ^ y) & -(x < y));
|
||||||
}
|
}
|
||||||
|
|
||||||
CK_CC_INLINE static unsigned long
|
|
||||||
ck_internal_bsf(unsigned long v)
|
|
||||||
{
|
|
||||||
#if defined(__GNUC__)
|
|
||||||
return __builtin_ffs(v);
|
|
||||||
#else
|
|
||||||
unsigned int i;
|
|
||||||
const unsigned int s = sizeof(unsigned long) * 8 - 1;
|
|
||||||
|
|
||||||
for (i = 0; i < s; i++) {
|
|
||||||
if (v & (1UL << (s - i)))
|
|
||||||
return sizeof(unsigned long) * 8 - i;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 1;
|
|
||||||
#endif /* !__GNUC__ */
|
|
||||||
}
|
|
||||||
|
|
||||||
CK_CC_INLINE static uint64_t
|
|
||||||
ck_internal_bsf_64(uint64_t v)
|
|
||||||
{
|
|
||||||
#if defined(__GNUC__)
|
|
||||||
return __builtin_ffs(v);
|
|
||||||
#else
|
|
||||||
unsigned int i;
|
|
||||||
const unsigned int s = sizeof(unsigned long) * 8 - 1;
|
|
||||||
|
|
||||||
for (i = 0; i < s; i++) {
|
|
||||||
if (v & (1ULL << (63U - i)))
|
|
||||||
return i;
|
|
||||||
}
|
|
||||||
#endif /* !__GNUC__ */
|
|
||||||
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
@ -366,7 +366,7 @@ ck_rhs_map_create(struct ck_rhs *hs, unsigned long entries)
|
|||||||
map->probe_limit = (unsigned int)limit;
|
map->probe_limit = (unsigned int)limit;
|
||||||
map->probe_maximum = 0;
|
map->probe_maximum = 0;
|
||||||
map->capacity = n_entries;
|
map->capacity = n_entries;
|
||||||
map->step = ck_internal_bsf(n_entries);
|
map->step = ck_cc_ffsl(n_entries);
|
||||||
map->mask = n_entries - 1;
|
map->mask = n_entries - 1;
|
||||||
map->n_entries = 0;
|
map->n_entries = 0;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user