ExtendedAtomicOps-clang-gcc.h
16.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
#ifndef UNITY_ATOMIC_FORCE_LOCKFREE_IMPLEMENTATION
# define UNITY_ATOMIC_FORCE_LOCKFREE_IMPLEMENTATION 1
#endif
namespace detail
{
#if UNITY_ATOMIC_USE_CLANG_ATOMICS && UNITY_ATOMIC_USE_GCC_ATOMICS
# error Cannot use both Clang and GCC atomic built-ins
#elif UNITY_ATOMIC_USE_CLANG_ATOMICS
# if !__has_feature(c_atomic) && !__has_extension(c_atomic)
# error "missing atomic built-in functions"
# endif
# define INTERNAL_UNITY_ATOMIC_THREAD_FENCE(memorder) __c11_atomic_thread_fence(memorder)
# define INTERNAL_UNITY_ATOMIC_LOAD(ptr, memorder) __c11_atomic_load(ptr, memorder)
# define INTERNAL_UNITY_ATOMIC_STORE(ptr, value, memorder) __c11_atomic_store(ptr, value, memorder)
# define INTERNAL_UNITY_ATOMIC_EXCHANGE(ptr, value, memorder) __c11_atomic_exchange(ptr, value, memorder)
# define INTERNAL_UNITY_ATOMIC_COMPARE_EXCHANGE_STRONG(ptr, oldval, newval, success, fail) __c11_atomic_compare_exchange_strong(ptr, oldval, newval, success, fail)
# define INTERNAL_UNITY_ATOMIC_COMPARE_EXCHANGE_WEAK(ptr, oldval, newval, success, fail) __c11_atomic_compare_exchange_weak(ptr, oldval, newval, success, fail)
# define INTERNAL_UNITY_ATOMIC_FETCH_ADD(ptr, value, memorder) __c11_atomic_fetch_add(ptr, value, memorder)
# define INTERNAL_UNITY_ATOMIC_FETCH_SUB(ptr, value, memorder) __c11_atomic_fetch_sub(ptr, value, memorder)
# define INTERNAL_UNITY_ATOMIC_TYPE(type) _Atomic(type)
# define INTERNAL_UNITY_ATOMIC_IS_LOCK_FREE(type) __c11_atomic_is_lock_free(sizeof(type))
#elif UNITY_ATOMIC_USE_GCC_ATOMICS
# if (!PLATFORM_PS4) && ((__GNUC__ < 4) || (__GNUC__ == 4 && __GNUC_MINOR__ < 7))
# error "__atomic built-in functions not supported on GCC versions older than 4.7"
# endif
# if UNITY_ATOMIC_FORCE_LOCKFREE_IMPLEMENTATION
# if __GCC_ATOMIC_INT_LOCK_FREE + 0 != 2 || __GCC_ATOMIC_LLONG_LOCK_FREE + 0 != 2
# error "atomic ops are not lock-free for some required data types"
# endif
# endif
# define INTERNAL_UNITY_ATOMIC_THREAD_FENCE(memorder) __atomic_thread_fence(memorder)
# define INTERNAL_UNITY_ATOMIC_LOAD(ptr, memorder) __atomic_load_n(ptr, memorder)
# define INTERNAL_UNITY_ATOMIC_STORE(ptr, value, memorder) __atomic_store_n(ptr, value, memorder)
# define INTERNAL_UNITY_ATOMIC_EXCHANGE(ptr, value, memorder) __atomic_exchange_n(ptr, value, memorder)
# define INTERNAL_UNITY_ATOMIC_COMPARE_EXCHANGE_STRONG(ptr, oldval, newval, success, fail) __atomic_compare_exchange_n(ptr, oldval, newval, false, success, fail)
# define INTERNAL_UNITY_ATOMIC_COMPARE_EXCHANGE_WEAK(ptr, oldval, newval, success, fail) __atomic_compare_exchange_n(ptr, oldval, newval, true, success, fail)
# define INTERNAL_UNITY_ATOMIC_FETCH_ADD(ptr, value, memorder) __atomic_fetch_add(ptr, value, memorder)
# define INTERNAL_UNITY_ATOMIC_FETCH_SUB(ptr, value, memorder) __atomic_fetch_sub(ptr, value, memorder)
# define INTERNAL_UNITY_ATOMIC_TYPE(type) type
# if __GNUC__ >= 5
// GCC pre-5 did not allow __atomic_always_lock_free in static expressions such as CompileTimeAssert
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=62024
# define INTERNAL_UNITY_ATOMIC_IS_LOCK_FREE(type) __atomic_always_lock_free(sizeof(type), 0)
# else
# define INTERNAL_UNITY_ATOMIC_IS_LOCK_FREE(type) true
# endif
#else
# error One of UNITY_ATOMIC_USE_CLANG_ATOMICS or UNITY_ATOMIC_USE_GCC_ATOMICS must be defined to 1
#endif
inline int MemOrder(memory_order_relaxed_t) { return __ATOMIC_RELAXED; }
inline int MemOrder(memory_order_release_t) { return __ATOMIC_RELEASE; }
inline int MemOrder(memory_order_acquire_t) { return __ATOMIC_ACQUIRE; }
inline int MemOrder(memory_order_acq_rel_t) { return __ATOMIC_ACQ_REL; }
inline int MemOrder(memory_order_seq_cst_t) { return __ATOMIC_SEQ_CST; }
void MemOrder(...); // generate compile error on unsupported mem order types
#define INTERNAL_UNITY_ATOMIC_TYPEDEF(nonatomic, atomic) \
typedef INTERNAL_UNITY_ATOMIC_TYPE(nonatomic) atomic; \
CompileTimeAssert(!UNITY_ATOMIC_FORCE_LOCKFREE_IMPLEMENTATION || INTERNAL_UNITY_ATOMIC_IS_LOCK_FREE(atomic), #atomic " is not lock-free on this platform")
INTERNAL_UNITY_ATOMIC_TYPEDEF(non_atomic_word, native_atomic_word);
INTERNAL_UNITY_ATOMIC_TYPEDEF(non_atomic_word2, native_atomic_word2);
INTERNAL_UNITY_ATOMIC_TYPEDEF(int, native_atomic_int);
#if UNITY_ATOMIC_FORCE_LOCKFREE_IMPLEMENTATION
CompileTimeAssert(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 + 0, "requires 32bit CAS");
CompileTimeAssert(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 + 0, "requires 64bit CAS");
// we will have special implementation for arm64
#if __SIZEOF_POINTER__ == 8 && !defined(__arm64__)
CompileTimeAssert(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_16 + 0, "requires 128bit CAS");
#endif
#endif
#undef INTERNAL_UNITY_ATOMIC_TYPEDEF
inline native_atomic_word* AtomicPtr(atomic_word* p) { return reinterpret_cast<native_atomic_word*>(p); }
inline volatile native_atomic_word* AtomicPtr(volatile atomic_word* p) { return reinterpret_cast<volatile native_atomic_word*>(p); }
inline native_atomic_word2* AtomicPtr(atomic_word2* p) { return reinterpret_cast<native_atomic_word2*>(&p->v); }
inline volatile native_atomic_word2* AtomicPtr(volatile atomic_word2* p) { return reinterpret_cast<volatile native_atomic_word2*>(&p->v); }
inline non_atomic_word* NonAtomicPtr(atomic_word* v) { return v; }
// same as above: inline non_atomic_word* NonAtomicPtr(non_atomic_word* v) { return v; }
inline non_atomic_word2* NonAtomicPtr(atomic_word2* v) { return &v->v; }
inline non_atomic_word2* NonAtomicPtr(non_atomic_word2* v) { return v; }
inline non_atomic_word NonAtomicValue(atomic_word v) { return v; }
// same as above: inline non_atomic_word NonAtomicValue(non_atomic_word v) { return v; }
inline non_atomic_word2 NonAtomicValue(atomic_word2 v) { return v.v; }
inline non_atomic_word2 NonAtomicValue(non_atomic_word2 v) { return v; }
inline atomic_word UnityAtomicValue(non_atomic_word v) { return v; }
inline atomic_word2 UnityAtomicValue(non_atomic_word2 v) { atomic_word2 r; r.v = v; return r; }
#ifdef UNITY_ATOMIC_INT_OVERLOAD
inline native_atomic_int* AtomicPtr(int* p) { return reinterpret_cast<native_atomic_int*>(p); }
inline volatile native_atomic_int* AtomicPtr(volatile int* p) { return reinterpret_cast<volatile native_atomic_int*>(p); }
inline int* NonAtomicPtr(int* v) { return v; }
inline int NonAtomicValue(int v) { return v; }
inline int UnityAtomicValue(int v) { return v; }
#endif
template<typename T> struct Identity { typedef T type; };
} // namespace detail
template<typename MemOrder>
static inline void atomic_thread_fence(MemOrder memOrder)
{
INTERNAL_UNITY_ATOMIC_THREAD_FENCE(detail::MemOrder(memOrder));
}
template<typename T, typename MemOrder>
static inline T atomic_load_explicit(const volatile T* p, MemOrder memOrder)
{
return detail::UnityAtomicValue(INTERNAL_UNITY_ATOMIC_LOAD(detail::AtomicPtr(const_cast<T*>(p)), detail::MemOrder(memOrder)));
}
template<typename T, typename MemOrder>
static inline void atomic_store_explicit(volatile T* p, typename detail::Identity<T>::type v, MemOrder memOrder)
{
INTERNAL_UNITY_ATOMIC_STORE(detail::AtomicPtr(p), detail::NonAtomicValue(v), detail::MemOrder(memOrder));
}
template<typename T, typename MemOrder>
static inline T atomic_exchange_explicit(volatile T* p, typename detail::Identity<T>::type v, MemOrder memOrder)
{
return detail::UnityAtomicValue(INTERNAL_UNITY_ATOMIC_EXCHANGE(detail::AtomicPtr(p), detail::NonAtomicValue(v), detail::MemOrder(memOrder)));
}
template<typename T, typename MemOrderSuccess, typename MemOrderFail>
static inline bool atomic_compare_exchange_weak_explicit(volatile T* p, T* oldval, typename detail::Identity<T>::type newval,
MemOrderSuccess memOrderSuccess, MemOrderFail memOrderFail)
{
return INTERNAL_UNITY_ATOMIC_COMPARE_EXCHANGE_WEAK(detail::AtomicPtr(p), detail::NonAtomicPtr(oldval), detail::NonAtomicValue(newval),
detail::MemOrder(memOrderSuccess), detail::MemOrder(memOrderFail));
}
template<typename T, typename MemOrderSuccess, typename MemOrderFail>
static inline bool atomic_compare_exchange_strong_explicit(volatile T* p, T* oldval, typename detail::Identity<T>::type newval,
MemOrderSuccess memOrderSuccess, MemOrderFail memOrderFail)
{
return INTERNAL_UNITY_ATOMIC_COMPARE_EXCHANGE_STRONG(detail::AtomicPtr(p), detail::NonAtomicPtr(oldval), detail::NonAtomicValue(newval),
detail::MemOrder(memOrderSuccess), detail::MemOrder(memOrderFail));
}
template<typename T, typename MemOrder>
static inline T atomic_fetch_add_explicit(volatile T* p, typename detail::Identity<T>::type v, MemOrder memOrder)
{
return detail::UnityAtomicValue(INTERNAL_UNITY_ATOMIC_FETCH_ADD(detail::AtomicPtr(p), detail::NonAtomicValue(v), detail::MemOrder(memOrder)));
}
template<typename T, typename MemOrder>
static inline T atomic_fetch_sub_explicit(volatile T* p, typename detail::Identity<T>::type v, MemOrder memOrder)
{
return detail::UnityAtomicValue(INTERNAL_UNITY_ATOMIC_FETCH_SUB(detail::AtomicPtr(p), detail::NonAtomicValue(v), detail::MemOrder(memOrder)));
}
/*
* extensions
*/
static inline void atomic_retain(volatile int* p)
{
atomic_fetch_add_explicit(p, 1, ::memory_order_relaxed);
}
static inline bool atomic_release(volatile int* p)
{
// Both paths here should be correct on any platform
// On architectures where read-modify-write with memory_order_acq_rel is more expensive than memory_order_release
// the idea is to use a global memory_order_acquire fence instead, but only when the reference count drops to 0.
// Only then the acquire/release synchronization is needed to make sure everything prior to atomic_release happens before running a d'tor.
#if defined(__arm__) || defined(__arm64__)
bool res = atomic_fetch_sub_explicit(p, 1, ::memory_order_release) == 1;
if (res)
{
atomic_thread_fence(::memory_order_acquire);
}
return res;
#else
return atomic_fetch_sub_explicit(p, 1, ::memory_order_acq_rel) == 1;
#endif
}
#undef INTERNAL_UNITY_ATOMIC_THREAD_FENCE
#undef INTERNAL_UNITY_ATOMIC_LOAD
#undef INTERNAL_UNITY_ATOMIC_STORE
#undef INTERNAL_UNITY_ATOMIC_EXCHANGE
#undef INTERNAL_UNITY_ATOMIC_COMPARE_EXCHANGE_STRONG
#undef INTERNAL_UNITY_ATOMIC_COMPARE_EXCHANGE_WEAK
#undef INTERNAL_UNITY_ATOMIC_FETCH_ADD
#undef INTERNAL_UNITY_ATOMIC_FETCH_SUB
#undef INTERNAL_UNITY_ATOMIC_TYPE
#undef INTERNAL_UNITY_ATOMIC_IS_LOCK_FREE
// the only way to get atomic 128-bit memory accesses on ARM64 is to use ld(r|a)ex/st(r|a)ex with a loop
// going forward we want to get rid of most of it, by undefing ATOMIC_HAS_DCAS and providing custom impl of AtomicQueue and friends
#if __SIZEOF_POINTER__ == 8 && (defined(__arm64__) || defined(__aarch64__))
static inline atomic_word2 atomic_load_explicit(const volatile atomic_word2* p, memory_order_relaxed_t)
{
non_atomic_word2 v; volatile non_atomic_word2* pv = (volatile non_atomic_word2*)&p->v;
do
{
v = __builtin_arm_ldrex(pv);
}
while (__builtin_arm_strex(v, pv));
return (atomic_word2) {.v = v};
}
static inline atomic_word2 atomic_load_explicit(const volatile atomic_word2* p, memory_order_acquire_t)
{
non_atomic_word2 v; volatile non_atomic_word2* pv = (volatile non_atomic_word2*)&p->v;
do
{
v = __builtin_arm_ldaex(pv);
}
while (__builtin_arm_strex(v, pv));
return (atomic_word2) {.v = v};
}
static inline void atomic_store_explicit(volatile atomic_word2* p, atomic_word2 v, memory_order_relaxed_t)
{
non_atomic_word2 tmp; volatile non_atomic_word2* pv = &p->v;
do
{
tmp = __builtin_arm_ldrex(pv);
}
while (__builtin_arm_strex(v.v, pv));
}
static inline void atomic_store_explicit(volatile atomic_word2* p, atomic_word2 v, memory_order_release_t)
{
non_atomic_word2 tmp; volatile non_atomic_word2* pv = &p->v;
do
{
tmp = __builtin_arm_ldrex(pv);
}
while (__builtin_arm_stlex(v.v, pv));
}
static inline atomic_word2 atomic_exchange_explicit(volatile atomic_word2* p, atomic_word2 val, memory_order_acq_rel_t)
{
non_atomic_word2 ret; volatile non_atomic_word2* pv = &p->v;
do
{
ret = __builtin_arm_ldaex(pv);
}
while (__builtin_arm_stlex(val.v, pv));
return (atomic_word2) {.v = ret};
}
// the story behind this: in arm64 asm impl header we had overloads for memory_order_acquire_t, memory_order_release_t and int
// int being taken in all other cases - it was memory_order_acq_rel_t impl below
// now as we moved them here, int overload "looses" to template above, so it was never taken (and that is why we needed explicit overload)
// for seq_cst being same as acq_rel:
// first of all - it was the case for asm impl (and it worked for quite some time)
// second: it seems apple itself uses ldaxr/stlxr in that case (without extra dmb)
// this is both the case with some apple open-source code and asm generated for OSAtomicAdd32Barrier and friends
#define COMPARE_EXCHANGE_IMPL(LOAD_FUNC, STORE_FUNC) \
const non_atomic_word2 cmp = oldval->v; volatile non_atomic_word2* pv = &p->v; bool success = false; \
do \
{ \
non_atomic_word2 cur = oldval->v = LOAD_FUNC(pv); \
success = (cur == cmp); \
if (!success) \
{ \
__builtin_arm_clrex(); \
break; \
} \
} \
while (STORE_FUNC(newval.v, pv)); \
return success; \
static inline bool atomic_compare_exchange_strong_explicit(volatile atomic_word2* p, atomic_word2* oldval, atomic_word2 newval, memory_order_acquire_t, memory_order_relaxed_t)
{
COMPARE_EXCHANGE_IMPL(__builtin_arm_ldaex, __builtin_arm_strex);
}
static inline bool atomic_compare_exchange_strong_explicit(volatile atomic_word2* p, atomic_word2* oldval, atomic_word2 newval, memory_order_release_t, memory_order_relaxed_t)
{
COMPARE_EXCHANGE_IMPL(__builtin_arm_ldrex, __builtin_arm_stlex);
}
static inline bool atomic_compare_exchange_strong_explicit(volatile atomic_word2* p, atomic_word2* oldval, atomic_word2 newval, memory_order_acq_rel_t, memory_order_relaxed_t)
{
COMPARE_EXCHANGE_IMPL(__builtin_arm_ldaex, __builtin_arm_stlex);
}
static inline bool atomic_compare_exchange_strong_explicit(volatile atomic_word2* p, atomic_word2* oldval, atomic_word2 newval, memory_order_seq_cst_t, memory_order_relaxed_t)
{
COMPARE_EXCHANGE_IMPL(__builtin_arm_ldaex, __builtin_arm_stlex);
}
#undef COMPARE_EXCHANGE_IMPL
#endif // __SIZEOF_POINTER__ == 8 && defined(__arm64__)
// when implementing atomic operations in arm-specific way we need to take care of armv7/armv8 differences
// armv8: has ldaex/stlex that add acquire/release semantics
// armv7: we need to insert fence ourselves
#if defined(__arm__) || defined(__arm64__) || defined(__aarch64__)
#if defined(__arm64__) || defined(__aarch64__)
#define UNITY_ATOMIC_ARMV7_DMB_ISH
#define UNITY_ATOMIC_ARMV8_LDAEX __builtin_arm_ldaex
#define UNITY_ATOMIC_ARMV8_STLEX __builtin_arm_stlex
#else
#define UNITY_ATOMIC_ARMV7_DMB_ISH __builtin_arm_dmb(11);
#define UNITY_ATOMIC_ARMV8_LDAEX __builtin_arm_ldrex
#define UNITY_ATOMIC_ARMV8_STLEX __builtin_arm_strex
#endif
#endif