1/*
2 * Copyright (C) 2007-2017 Apple Inc. All rights reserved.
3 * Copyright (C) 2007 Justin Haygood ([email protected])
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 * DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY
18 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
20 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
21 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
23 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#pragma once
27
28#include <atomic>
29#include <wtf/StdLibExtras.h>
30
31#if OS(WINDOWS)
32#if !COMPILER(GCC_COMPATIBLE)
33extern "C" void _ReadWriteBarrier(void);
34#pragma intrinsic(_ReadWriteBarrier)
35#endif
36#include <windows.h>
37#include <intrin.h>
38#endif
39
40namespace WTF {
41
42ALWAYS_INLINE bool hasFence(std::memory_order order)
43{
44 return order != std::memory_order_relaxed;
45}
46
47// Atomic wraps around std::atomic with the sole purpose of making the compare_exchange
48// operations not alter the expected value. This is more in line with how we typically
49// use CAS in our code.
50//
51// Atomic is a struct without explicitly defined constructors so that it can be
52// initialized at compile time.
53
54template<typename T>
55struct Atomic {
56 WTF_MAKE_STRUCT_FAST_ALLOCATED;
57
58 // Don't pass a non-default value for the order parameter unless you really know
59 // what you are doing and have thought about it very hard. The cost of seq_cst
60 // is usually not high enough to justify the risk.
61
62 ALWAYS_INLINE T load(std::memory_order order = std::memory_order_seq_cst) const { return value.load(order); }
63
64 ALWAYS_INLINE T loadRelaxed() const { return load(std::memory_order_relaxed); }
65
66 // This is a load that simultaneously does a full fence - neither loads nor stores will move
67 // above or below it.
68 ALWAYS_INLINE T loadFullyFenced() const
69 {
70 Atomic<T>* ptr = const_cast<Atomic<T>*>(this);
71 return ptr->exchangeAdd(T());
72 }
73
74 ALWAYS_INLINE void store(T desired, std::memory_order order = std::memory_order_seq_cst) { value.store(desired, order); }
75
76 ALWAYS_INLINE void storeRelaxed(T desired) { store(desired, std::memory_order_relaxed); }
77
78 // This is a store that simultaneously does a full fence - neither loads nor stores will move
79 // above or below it.
80 ALWAYS_INLINE void storeFullyFenced(T desired)
81 {
82 exchange(desired);
83 }
84
85 ALWAYS_INLINE bool compareExchangeWeak(T expected, T desired, std::memory_order order = std::memory_order_seq_cst)
86 {
87 T expectedOrActual = expected;
88 return value.compare_exchange_weak(expectedOrActual, desired, order);
89 }
90
91 ALWAYS_INLINE bool compareExchangeWeakRelaxed(T expected, T desired)
92 {
93 return compareExchangeWeak(expected, desired, std::memory_order_relaxed);
94 }
95
96 ALWAYS_INLINE bool compareExchangeWeak(T expected, T desired, std::memory_order order_success, std::memory_order order_failure)
97 {
98 T expectedOrActual = expected;
99 return value.compare_exchange_weak(expectedOrActual, desired, order_success, order_failure);
100 }
101
102 // WARNING: This does not have strong fencing guarantees when it fails. For example, stores could
103 // sink below it in that case.
104 ALWAYS_INLINE T compareExchangeStrong(T expected, T desired, std::memory_order order = std::memory_order_seq_cst)
105 {
106 T expectedOrActual = expected;
107 value.compare_exchange_strong(expectedOrActual, desired, order);
108 return expectedOrActual;
109 }
110
111 ALWAYS_INLINE T compareExchangeStrong(T expected, T desired, std::memory_order order_success, std::memory_order order_failure)
112 {
113 T expectedOrActual = expected;
114 value.compare_exchange_strong(expectedOrActual, desired, order_success, order_failure);
115 return expectedOrActual;
116 }
117
118 template<typename U>
119 ALWAYS_INLINE T exchangeAdd(U operand, std::memory_order order = std::memory_order_seq_cst) { return value.fetch_add(operand, order); }
120
121 template<typename U>
122 ALWAYS_INLINE T exchangeAnd(U operand, std::memory_order order = std::memory_order_seq_cst) { return value.fetch_and(operand, order); }
123
124 template<typename U>
125 ALWAYS_INLINE T exchangeOr(U operand, std::memory_order order = std::memory_order_seq_cst) { return value.fetch_or(operand, order); }
126
127 template<typename U>
128 ALWAYS_INLINE T exchangeSub(U operand, std::memory_order order = std::memory_order_seq_cst) { return value.fetch_sub(operand, order); }
129
130 template<typename U>
131 ALWAYS_INLINE T exchangeXor(U operand, std::memory_order order = std::memory_order_seq_cst) { return value.fetch_xor(operand, order); }
132
133 ALWAYS_INLINE T exchange(T newValue, std::memory_order order = std::memory_order_seq_cst) { return value.exchange(newValue, order); }
134
135 template<typename Func>
136 ALWAYS_INLINE bool transaction(const Func& func, std::memory_order order = std::memory_order_seq_cst)
137 {
138 for (;;) {
139 T oldValue = load(std::memory_order_relaxed);
140 T newValue = oldValue;
141 if (!func(newValue))
142 return false;
143 if (compareExchangeWeak(oldValue, newValue, order))
144 return true;
145 }
146 }
147
148 template<typename Func>
149 ALWAYS_INLINE bool transactionRelaxed(const Func& func)
150 {
151 return transaction(func, std::memory_order_relaxed);
152 }
153
154 Atomic() = default;
155 constexpr Atomic(T initial)
156 : value(std::forward<T>(initial))
157 {
158 }
159
160 std::atomic<T> value;
161};
162
163template<typename T>
164inline T atomicLoad(T* location, std::memory_order order = std::memory_order_seq_cst)
165{
166 return bitwise_cast<Atomic<T>*>(location)->load(order);
167}
168
169template<typename T>
170inline T atomicLoadFullyFenced(T* location)
171{
172 return bitwise_cast<Atomic<T>*>(location)->loadFullyFenced();
173}
174
175template<typename T>
176inline void atomicStore(T* location, T newValue, std::memory_order order = std::memory_order_seq_cst)
177{
178 bitwise_cast<Atomic<T>*>(location)->store(newValue, order);
179}
180
181template<typename T>
182inline void atomicStoreFullyFenced(T* location, T newValue)
183{
184 bitwise_cast<Atomic<T>*>(location)->storeFullyFenced(newValue);
185}
186
187template<typename T>
188inline bool atomicCompareExchangeWeak(T* location, T expected, T newValue, std::memory_order order = std::memory_order_seq_cst)
189{
190 return bitwise_cast<Atomic<T>*>(location)->compareExchangeWeak(expected, newValue, order);
191}
192
193template<typename T>
194inline bool atomicCompareExchangeWeakRelaxed(T* location, T expected, T newValue)
195{
196 return bitwise_cast<Atomic<T>*>(location)->compareExchangeWeakRelaxed(expected, newValue);
197}
198
199template<typename T>
200inline T atomicCompareExchangeStrong(T* location, T expected, T newValue, std::memory_order order = std::memory_order_seq_cst)
201{
202 return bitwise_cast<Atomic<T>*>(location)->compareExchangeStrong(expected, newValue, order);
203}
204
205template<typename T, typename U>
206inline T atomicExchangeAdd(T* location, U operand, std::memory_order order = std::memory_order_seq_cst)
207{
208 return bitwise_cast<Atomic<T>*>(location)->exchangeAdd(operand, order);
209}
210
211template<typename T, typename U>
212inline T atomicExchangeAnd(T* location, U operand, std::memory_order order = std::memory_order_seq_cst)
213{
214 return bitwise_cast<Atomic<T>*>(location)->exchangeAnd(operand, order);
215}
216
217template<typename T, typename U>
218inline T atomicExchangeOr(T* location, U operand, std::memory_order order = std::memory_order_seq_cst)
219{
220 return bitwise_cast<Atomic<T>*>(location)->exchangeOr(operand, order);
221}
222
223template<typename T, typename U>
224inline T atomicExchangeSub(T* location, U operand, std::memory_order order = std::memory_order_seq_cst)
225{
226 return bitwise_cast<Atomic<T>*>(location)->exchangeSub(operand, order);
227}
228
229template<typename T, typename U>
230inline T atomicExchangeXor(T* location, U operand, std::memory_order order = std::memory_order_seq_cst)
231{
232 return bitwise_cast<Atomic<T>*>(location)->exchangeXor(operand, order);
233}
234
235template<typename T>
236inline T atomicExchange(T* location, T newValue, std::memory_order order = std::memory_order_seq_cst)
237{
238 return bitwise_cast<Atomic<T>*>(location)->exchange(newValue, order);
239}
240
241// Just a compiler fence. Has no effect on the hardware, but tells the compiler
242// not to move things around this call. Should not affect the compiler's ability
243// to do things like register allocation and code motion over pure operations.
244inline void compilerFence()
245{
246#if OS(WINDOWS) && !COMPILER(GCC_COMPATIBLE)
247 _ReadWriteBarrier();
248#else
249 asm volatile("" ::: "memory");
250#endif
251}
252
253#if CPU(ARM_THUMB2) || CPU(ARM64)
254
255// Full memory fence. No accesses will float above this, and no accesses will sink
256// below it.
257inline void arm_dmb()
258{
259 asm volatile("dmb ish" ::: "memory");
260}
261
262// Like the above, but only affects stores.
263inline void arm_dmb_st()
264{
265 asm volatile("dmb ishst" ::: "memory");
266}
267
268inline void arm_isb()
269{
270 asm volatile("isb" ::: "memory");
271}
272
273inline void loadLoadFence() { arm_dmb(); }
274inline void loadStoreFence() { arm_dmb(); }
275inline void storeLoadFence() { arm_dmb(); }
276inline void storeStoreFence() { arm_dmb_st(); }
277inline void memoryBarrierAfterLock() { arm_dmb(); }
278inline void memoryBarrierBeforeUnlock() { arm_dmb(); }
279inline void crossModifyingCodeFence() { arm_isb(); }
280
281#elif CPU(X86) || CPU(X86_64)
282
283inline void x86_ortop()
284{
285#if OS(WINDOWS)
286 MemoryBarrier();
287#elif CPU(X86_64)
288 // This has acqrel semantics and is much cheaper than mfence. For exampe, in the JSC GC, using
289 // mfence as a store-load fence was a 9% slow-down on Octane/splay while using this was neutral.
290 asm volatile("lock; orl $0, (%%rsp)" ::: "memory");
291#else
292 asm volatile("lock; orl $0, (%%esp)" ::: "memory");
293#endif
294}
295
296inline void x86_cpuid()
297{
298#if OS(WINDOWS)
299 int info[4];
300 __cpuid(info, 0);
301#else
302 intptr_t a = 0, b, c, d;
303 asm volatile(
304 "cpuid"
305 : "+a"(a), "=b"(b), "=c"(c), "=d"(d)
306 :
307 : "memory");
308#endif
309}
310
311inline void loadLoadFence() { compilerFence(); }
312inline void loadStoreFence() { compilerFence(); }
313inline void storeLoadFence() { x86_ortop(); }
314inline void storeStoreFence() { compilerFence(); }
315inline void memoryBarrierAfterLock() { compilerFence(); }
316inline void memoryBarrierBeforeUnlock() { compilerFence(); }
317inline void crossModifyingCodeFence() { x86_cpuid(); }
318
319#else
320
321inline void loadLoadFence() { std::atomic_thread_fence(std::memory_order_seq_cst); }
322inline void loadStoreFence() { std::atomic_thread_fence(std::memory_order_seq_cst); }
323inline void storeLoadFence() { std::atomic_thread_fence(std::memory_order_seq_cst); }
324inline void storeStoreFence() { std::atomic_thread_fence(std::memory_order_seq_cst); }
325inline void memoryBarrierAfterLock() { std::atomic_thread_fence(std::memory_order_seq_cst); }
326inline void memoryBarrierBeforeUnlock() { std::atomic_thread_fence(std::memory_order_seq_cst); }
327inline void crossModifyingCodeFence() { std::atomic_thread_fence(std::memory_order_seq_cst); } // Probably not strong enough.
328
329#endif
330
331typedef unsigned InternalDependencyType;
332
333inline InternalDependencyType opaqueMixture()
334{
335 return 0;
336}
337
338template<typename... Arguments, typename T>
339inline InternalDependencyType opaqueMixture(T value, Arguments... arguments)
340{
341 union {
342 InternalDependencyType copy;
343 T value;
344 } u;
345 u.copy = 0;
346 u.value = value;
347 return opaqueMixture(arguments...) + u.copy;
348}
349
350class Dependency {
351 WTF_MAKE_FAST_ALLOCATED;
352public:
353 Dependency()
354 : m_value(0)
355 {
356 }
357
358 // On TSO architectures, this is a load-load fence and the value it returns is not meaningful (it's
359 // zero). The load-load fence is usually just a compiler fence. On ARM, this is a self-xor that
360 // produces zero, but it's concealed from the compiler. The CPU understands this dummy op to be a
361 // phantom dependency.
362 template<typename... Arguments>
363 static Dependency fence(Arguments... arguments)
364 {
365 InternalDependencyType input = opaqueMixture(arguments...);
366 InternalDependencyType output;
367#if CPU(ARM64)
368 // Create a magical zero value through inline assembly, whose computation
369 // isn't visible to the optimizer. This zero is then usable as an offset in
370 // further address computations: adding zero does nothing, but the compiler
371 // doesn't know it. It's magical because it creates an address dependency
372 // from the load of `location` to the uses of the dependency, which triggers
373 // the ARM ISA's address dependency rule, a.k.a. the mythical C++ consume
374 // ordering. This forces weak memory order CPUs to observe `location` and
375 // dependent loads in their store order without the reader using a barrier
376 // or an acquire load.
377 asm("eor %w[out], %w[in], %w[in]"
378 : [out] "=r"(output)
379 : [in] "r"(input));
380#elif CPU(ARM)
381 asm("eor %[out], %[in], %[in]"
382 : [out] "=r"(output)
383 : [in] "r"(input));
384#else
385 // No dependency is needed for this architecture.
386 loadLoadFence();
387 output = 0;
388 UNUSED_PARAM(input);
389#endif
390 Dependency result;
391 result.m_value = output;
392 return result;
393 }
394
395 // On TSO architectures, this just returns the pointer you pass it. On ARM, this produces a new
396 // pointer that is dependent on this dependency and the input pointer.
397 template<typename T>
398 T* consume(T* pointer)
399 {
400#if CPU(ARM64) || CPU(ARM)
401 return bitwise_cast<T*>(bitwise_cast<char*>(pointer) + m_value);
402#else
403 UNUSED_PARAM(m_value);
404 return pointer;
405#endif
406 }
407
408private:
409 InternalDependencyType m_value;
410};
411
412template<typename InputType, typename ValueType>
413struct InputAndValue {
414 WTF_MAKE_STRUCT_FAST_ALLOCATED;
415
416 InputAndValue() { }
417
418 InputAndValue(InputType input, ValueType value)
419 : input(input)
420 , value(value)
421 {
422 }
423
424 InputType input;
425 ValueType value;
426};
427
428template<typename InputType, typename ValueType>
429InputAndValue<InputType, ValueType> inputAndValue(InputType input, ValueType value)
430{
431 return InputAndValue<InputType, ValueType>(input, value);
432}
433
434template<typename T, typename Func>
435ALWAYS_INLINE T& ensurePointer(Atomic<T*>& pointer, const Func& func)
436{
437 for (;;) {
438 T* oldValue = pointer.load(std::memory_order_relaxed);
439 if (oldValue) {
440 // On all sensible CPUs, we get an implicit dependency-based load-load barrier when
441 // loading this.
442 return *oldValue;
443 }
444 T* newValue = func();
445 if (pointer.compareExchangeWeak(oldValue, newValue))
446 return *newValue;
447 delete newValue;
448 }
449}
450
451} // namespace WTF
452
453using WTF::Atomic;
454using WTF::Dependency;
455using WTF::InputAndValue;
456using WTF::inputAndValue;
457using WTF::ensurePointer;
458using WTF::opaqueMixture;
459