1/*
2 * Copyright (C) 2007-2017 Apple Inc. All rights reserved.
3 * Copyright (C) 2007 Justin Haygood ([email protected])
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 * DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY
18 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
20 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
21 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
23 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#pragma once
27
28#include <atomic>
29#include <wtf/StdLibExtras.h>
30
31#if OS(WINDOWS)
32#if !COMPILER(GCC_COMPATIBLE)
33extern "C" void _ReadWriteBarrier(void);
34#pragma intrinsic(_ReadWriteBarrier)
35#endif
36#include <windows.h>
37#include <intrin.h>
38#endif
39
40namespace WTF {
41
42ALWAYS_INLINE bool hasFence(std::memory_order order)
43{
44 return order != std::memory_order_relaxed;
45}
46
47// Atomic wraps around std::atomic with the sole purpose of making the compare_exchange
48// operations not alter the expected value. This is more in line with how we typically
49// use CAS in our code.
50//
51// Atomic is a struct without explicitly defined constructors so that it can be
52// initialized at compile time.
53
54template<typename T>
55struct Atomic {
56 // Don't pass a non-default value for the order parameter unless you really know
57 // what you are doing and have thought about it very hard. The cost of seq_cst
58 // is usually not high enough to justify the risk.
59
60 ALWAYS_INLINE T load(std::memory_order order = std::memory_order_seq_cst) const { return value.load(order); }
61
62 ALWAYS_INLINE T loadRelaxed() const { return load(std::memory_order_relaxed); }
63
64 // This is a load that simultaneously does a full fence - neither loads nor stores will move
65 // above or below it.
66 ALWAYS_INLINE T loadFullyFenced() const
67 {
68 Atomic<T>* ptr = const_cast<Atomic<T>*>(this);
69 return ptr->exchangeAdd(T());
70 }
71
72 ALWAYS_INLINE void store(T desired, std::memory_order order = std::memory_order_seq_cst) { value.store(desired, order); }
73
74 ALWAYS_INLINE void storeRelaxed(T desired) { store(desired, std::memory_order_relaxed); }
75
76 // This is a store that simultaneously does a full fence - neither loads nor stores will move
77 // above or below it.
78 ALWAYS_INLINE void storeFullyFenced(T desired)
79 {
80 exchange(desired);
81 }
82
83 ALWAYS_INLINE bool compareExchangeWeak(T expected, T desired, std::memory_order order = std::memory_order_seq_cst)
84 {
85 T expectedOrActual = expected;
86 return value.compare_exchange_weak(expectedOrActual, desired, order);
87 }
88
89 ALWAYS_INLINE bool compareExchangeWeakRelaxed(T expected, T desired)
90 {
91 return compareExchangeWeak(expected, desired, std::memory_order_relaxed);
92 }
93
94 ALWAYS_INLINE bool compareExchangeWeak(T expected, T desired, std::memory_order order_success, std::memory_order order_failure)
95 {
96 T expectedOrActual = expected;
97 return value.compare_exchange_weak(expectedOrActual, desired, order_success, order_failure);
98 }
99
100 // WARNING: This does not have strong fencing guarantees when it fails. For example, stores could
101 // sink below it in that case.
102 ALWAYS_INLINE T compareExchangeStrong(T expected, T desired, std::memory_order order = std::memory_order_seq_cst)
103 {
104 T expectedOrActual = expected;
105 value.compare_exchange_strong(expectedOrActual, desired, order);
106 return expectedOrActual;
107 }
108
109 ALWAYS_INLINE T compareExchangeStrong(T expected, T desired, std::memory_order order_success, std::memory_order order_failure)
110 {
111 T expectedOrActual = expected;
112 value.compare_exchange_strong(expectedOrActual, desired, order_success, order_failure);
113 return expectedOrActual;
114 }
115
116 template<typename U>
117 ALWAYS_INLINE T exchangeAdd(U operand, std::memory_order order = std::memory_order_seq_cst) { return value.fetch_add(operand, order); }
118
119 template<typename U>
120 ALWAYS_INLINE T exchangeAnd(U operand, std::memory_order order = std::memory_order_seq_cst) { return value.fetch_and(operand, order); }
121
122 template<typename U>
123 ALWAYS_INLINE T exchangeOr(U operand, std::memory_order order = std::memory_order_seq_cst) { return value.fetch_or(operand, order); }
124
125 template<typename U>
126 ALWAYS_INLINE T exchangeSub(U operand, std::memory_order order = std::memory_order_seq_cst) { return value.fetch_sub(operand, order); }
127
128 template<typename U>
129 ALWAYS_INLINE T exchangeXor(U operand, std::memory_order order = std::memory_order_seq_cst) { return value.fetch_xor(operand, order); }
130
131 ALWAYS_INLINE T exchange(T newValue, std::memory_order order = std::memory_order_seq_cst) { return value.exchange(newValue, order); }
132
133 template<typename Func>
134 ALWAYS_INLINE bool transaction(const Func& func, std::memory_order order = std::memory_order_seq_cst)
135 {
136 for (;;) {
137 T oldValue = load(std::memory_order_relaxed);
138 T newValue = oldValue;
139 if (!func(newValue))
140 return false;
141 if (compareExchangeWeak(oldValue, newValue, order))
142 return true;
143 }
144 }
145
146 template<typename Func>
147 ALWAYS_INLINE bool transactionRelaxed(const Func& func)
148 {
149 return transaction(func, std::memory_order_relaxed);
150 }
151
152 Atomic() = default;
153 constexpr Atomic(T initial)
154 : value(std::forward<T>(initial))
155 {
156 }
157
158 std::atomic<T> value;
159};
160
161template<typename T>
162inline T atomicLoad(T* location, std::memory_order order = std::memory_order_seq_cst)
163{
164 return bitwise_cast<Atomic<T>*>(location)->load(order);
165}
166
167template<typename T>
168inline T atomicLoadFullyFenced(T* location)
169{
170 return bitwise_cast<Atomic<T>*>(location)->loadFullyFenced();
171}
172
173template<typename T>
174inline void atomicStore(T* location, T newValue, std::memory_order order = std::memory_order_seq_cst)
175{
176 bitwise_cast<Atomic<T>*>(location)->store(newValue, order);
177}
178
179template<typename T>
180inline void atomicStoreFullyFenced(T* location, T newValue)
181{
182 bitwise_cast<Atomic<T>*>(location)->storeFullyFenced(newValue);
183}
184
185template<typename T>
186inline bool atomicCompareExchangeWeak(T* location, T expected, T newValue, std::memory_order order = std::memory_order_seq_cst)
187{
188 return bitwise_cast<Atomic<T>*>(location)->compareExchangeWeak(expected, newValue, order);
189}
190
191template<typename T>
192inline bool atomicCompareExchangeWeakRelaxed(T* location, T expected, T newValue)
193{
194 return bitwise_cast<Atomic<T>*>(location)->compareExchangeWeakRelaxed(expected, newValue);
195}
196
197template<typename T>
198inline T atomicCompareExchangeStrong(T* location, T expected, T newValue, std::memory_order order = std::memory_order_seq_cst)
199{
200 return bitwise_cast<Atomic<T>*>(location)->compareExchangeStrong(expected, newValue, order);
201}
202
203template<typename T, typename U>
204inline T atomicExchangeAdd(T* location, U operand, std::memory_order order = std::memory_order_seq_cst)
205{
206 return bitwise_cast<Atomic<T>*>(location)->exchangeAdd(operand, order);
207}
208
209template<typename T, typename U>
210inline T atomicExchangeAnd(T* location, U operand, std::memory_order order = std::memory_order_seq_cst)
211{
212 return bitwise_cast<Atomic<T>*>(location)->exchangeAnd(operand, order);
213}
214
215template<typename T, typename U>
216inline T atomicExchangeOr(T* location, U operand, std::memory_order order = std::memory_order_seq_cst)
217{
218 return bitwise_cast<Atomic<T>*>(location)->exchangeOr(operand, order);
219}
220
221template<typename T, typename U>
222inline T atomicExchangeSub(T* location, U operand, std::memory_order order = std::memory_order_seq_cst)
223{
224 return bitwise_cast<Atomic<T>*>(location)->exchangeSub(operand, order);
225}
226
227template<typename T, typename U>
228inline T atomicExchangeXor(T* location, U operand, std::memory_order order = std::memory_order_seq_cst)
229{
230 return bitwise_cast<Atomic<T>*>(location)->exchangeXor(operand, order);
231}
232
233template<typename T>
234inline T atomicExchange(T* location, T newValue, std::memory_order order = std::memory_order_seq_cst)
235{
236 return bitwise_cast<Atomic<T>*>(location)->exchange(newValue, order);
237}
238
239// Just a compiler fence. Has no effect on the hardware, but tells the compiler
240// not to move things around this call. Should not affect the compiler's ability
241// to do things like register allocation and code motion over pure operations.
242inline void compilerFence()
243{
244#if OS(WINDOWS) && !COMPILER(GCC_COMPATIBLE)
245 _ReadWriteBarrier();
246#else
247 asm volatile("" ::: "memory");
248#endif
249}
250
251#if CPU(ARM_THUMB2) || CPU(ARM64)
252
253// Full memory fence. No accesses will float above this, and no accesses will sink
254// below it.
255inline void arm_dmb()
256{
257 asm volatile("dmb ish" ::: "memory");
258}
259
260// Like the above, but only affects stores.
261inline void arm_dmb_st()
262{
263 asm volatile("dmb ishst" ::: "memory");
264}
265
266inline void arm_isb()
267{
268 asm volatile("isb" ::: "memory");
269}
270
271inline void loadLoadFence() { arm_dmb(); }
272inline void loadStoreFence() { arm_dmb(); }
273inline void storeLoadFence() { arm_dmb(); }
274inline void storeStoreFence() { arm_dmb_st(); }
275inline void memoryBarrierAfterLock() { arm_dmb(); }
276inline void memoryBarrierBeforeUnlock() { arm_dmb(); }
277inline void crossModifyingCodeFence() { arm_isb(); }
278
279#elif CPU(X86) || CPU(X86_64)
280
281inline void x86_ortop()
282{
283#if OS(WINDOWS)
284 MemoryBarrier();
285#elif CPU(X86_64)
286 // This has acqrel semantics and is much cheaper than mfence. For exampe, in the JSC GC, using
287 // mfence as a store-load fence was a 9% slow-down on Octane/splay while using this was neutral.
288 asm volatile("lock; orl $0, (%%rsp)" ::: "memory");
289#else
290 asm volatile("lock; orl $0, (%%esp)" ::: "memory");
291#endif
292}
293
294inline void x86_cpuid()
295{
296#if OS(WINDOWS)
297 int info[4];
298 __cpuid(info, 0);
299#else
300 intptr_t a = 0, b, c, d;
301 asm volatile(
302 "cpuid"
303 : "+a"(a), "=b"(b), "=c"(c), "=d"(d)
304 :
305 : "memory");
306#endif
307}
308
309inline void loadLoadFence() { compilerFence(); }
310inline void loadStoreFence() { compilerFence(); }
311inline void storeLoadFence() { x86_ortop(); }
312inline void storeStoreFence() { compilerFence(); }
313inline void memoryBarrierAfterLock() { compilerFence(); }
314inline void memoryBarrierBeforeUnlock() { compilerFence(); }
315inline void crossModifyingCodeFence() { x86_cpuid(); }
316
317#else
318
319inline void loadLoadFence() { std::atomic_thread_fence(std::memory_order_seq_cst); }
320inline void loadStoreFence() { std::atomic_thread_fence(std::memory_order_seq_cst); }
321inline void storeLoadFence() { std::atomic_thread_fence(std::memory_order_seq_cst); }
322inline void storeStoreFence() { std::atomic_thread_fence(std::memory_order_seq_cst); }
323inline void memoryBarrierAfterLock() { std::atomic_thread_fence(std::memory_order_seq_cst); }
324inline void memoryBarrierBeforeUnlock() { std::atomic_thread_fence(std::memory_order_seq_cst); }
325inline void crossModifyingCodeFence() { std::atomic_thread_fence(std::memory_order_seq_cst); } // Probably not strong enough.
326
327#endif
328
329typedef unsigned InternalDependencyType;
330
331inline InternalDependencyType opaqueMixture()
332{
333 return 0;
334}
335
336template<typename... Arguments, typename T>
337inline InternalDependencyType opaqueMixture(T value, Arguments... arguments)
338{
339 union {
340 InternalDependencyType copy;
341 T value;
342 } u;
343 u.copy = 0;
344 u.value = value;
345 return opaqueMixture(arguments...) + u.copy;
346}
347
348class Dependency {
349public:
350 Dependency()
351 : m_value(0)
352 {
353 }
354
355 // On TSO architectures, this is a load-load fence and the value it returns is not meaningful (it's
356 // zero). The load-load fence is usually just a compiler fence. On ARM, this is a self-xor that
357 // produces zero, but it's concealed from the compiler. The CPU understands this dummy op to be a
358 // phantom dependency.
359 template<typename... Arguments>
360 static Dependency fence(Arguments... arguments)
361 {
362 InternalDependencyType input = opaqueMixture(arguments...);
363 InternalDependencyType output;
364#if CPU(ARM64)
365 // Create a magical zero value through inline assembly, whose computation
366 // isn't visible to the optimizer. This zero is then usable as an offset in
367 // further address computations: adding zero does nothing, but the compiler
368 // doesn't know it. It's magical because it creates an address dependency
369 // from the load of `location` to the uses of the dependency, which triggers
370 // the ARM ISA's address dependency rule, a.k.a. the mythical C++ consume
371 // ordering. This forces weak memory order CPUs to observe `location` and
372 // dependent loads in their store order without the reader using a barrier
373 // or an acquire load.
374 asm("eor %w[out], %w[in], %w[in]"
375 : [out] "=r"(output)
376 : [in] "r"(input));
377#elif CPU(ARM)
378 asm("eor %[out], %[in], %[in]"
379 : [out] "=r"(output)
380 : [in] "r"(input));
381#else
382 // No dependency is needed for this architecture.
383 loadLoadFence();
384 output = 0;
385 UNUSED_PARAM(input);
386#endif
387 Dependency result;
388 result.m_value = output;
389 return result;
390 }
391
392 // On TSO architectures, this just returns the pointer you pass it. On ARM, this produces a new
393 // pointer that is dependent on this dependency and the input pointer.
394 template<typename T>
395 T* consume(T* pointer)
396 {
397#if CPU(ARM64) || CPU(ARM)
398 return bitwise_cast<T*>(bitwise_cast<char*>(pointer) + m_value);
399#else
400 UNUSED_PARAM(m_value);
401 return pointer;
402#endif
403 }
404
405private:
406 InternalDependencyType m_value;
407};
408
409template<typename InputType, typename ValueType>
410struct InputAndValue {
411 InputAndValue() { }
412
413 InputAndValue(InputType input, ValueType value)
414 : input(input)
415 , value(value)
416 {
417 }
418
419 InputType input;
420 ValueType value;
421};
422
423template<typename InputType, typename ValueType>
424InputAndValue<InputType, ValueType> inputAndValue(InputType input, ValueType value)
425{
426 return InputAndValue<InputType, ValueType>(input, value);
427}
428
429template<typename T, typename Func>
430ALWAYS_INLINE T& ensurePointer(Atomic<T*>& pointer, const Func& func)
431{
432 for (;;) {
433 T* oldValue = pointer.load(std::memory_order_relaxed);
434 if (oldValue) {
435 // On all sensible CPUs, we get an implicit dependency-based load-load barrier when
436 // loading this.
437 return *oldValue;
438 }
439 T* newValue = func();
440 if (pointer.compareExchangeWeak(oldValue, newValue))
441 return *newValue;
442 delete newValue;
443 }
444}
445
446} // namespace WTF
447
448using WTF::Atomic;
449using WTF::Dependency;
450using WTF::InputAndValue;
451using WTF::inputAndValue;
452using WTF::ensurePointer;
453using WTF::opaqueMixture;
454