Atomics.h source code [webcore/DerivedSources/ForwardingHeaders/wtf/Atomics.h]

1	/*
2	* Copyright (C) 2007-2017 Apple Inc. All rights reserved.
3	* Copyright (C) 2007 Justin Haygood ([email protected])
4	*
5	* Redistribution and use in source and binary forms, with or without
6	* modification, are permitted provided that the following conditions
7	* are met:
8	* 1. Redistributions of source code must retain the above copyright
9	* notice, this list of conditions and the following disclaimer.
10	* 2. Redistributions in binary form must reproduce the above copyright
11	* notice, this list of conditions and the following disclaimer in the
12	* documentation and/or other materials provided with the distribution.
13	*
14	* THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY
15	* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16	* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17	* DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY
18	* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19	* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
20	* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
21	* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
23	* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24	*/
25
26	#pragma once
27
28	#include <atomic>
29	#include <wtf/StdLibExtras.h>
30
31	#if OS(WINDOWS)
32	#if !COMPILER(GCC_COMPATIBLE)
33	extern "C" void _ReadWriteBarrier(void);
34	#pragma intrinsic(_ReadWriteBarrier)
35	#endif
36	#include <windows.h>
37	#include <intrin.h>
38	#endif
39
40	namespace WTF {
41
42	ALWAYS_INLINE bool hasFence(std::memory_order order)
43	{
44	return order != std::memory_order_relaxed;
45	}
46
47	// Atomic wraps around std::atomic with the sole purpose of making the compare_exchange
48	// operations not alter the expected value. This is more in line with how we typically
49	// use CAS in our code.
50	//
51	// Atomic is a struct without explicitly defined constructors so that it can be
52	// initialized at compile time.
53
54	template<typename T>
55	struct Atomic {
56	// Don't pass a non-default value for the order parameter unless you really know
57	// what you are doing and have thought about it very hard. The cost of seq_cst
58	// is usually not high enough to justify the risk.
59
60	ALWAYS_INLINE T load(std::memory_order order = std::memory_order_seq_cst) const { return value.load(order); }
61
62	ALWAYS_INLINE T loadRelaxed() const { return load(std::memory_order_relaxed); }
63
64	// This is a load that simultaneously does a full fence - neither loads nor stores will move
65	// above or below it.
66	ALWAYS_INLINE T loadFullyFenced() const
67	{
68	Atomic<T>* ptr = const_cast<Atomic<T>>(this*);
69	return ptr->exchangeAdd(T());
70	}
71
72	ALWAYS_INLINE void store(T desired, std::memory_order order = std::memory_order_seq_cst) { value.store(desired, order); }
73
74	ALWAYS_INLINE void storeRelaxed(T desired) { store(desired, std::memory_order_relaxed); }
75
76	// This is a store that simultaneously does a full fence - neither loads nor stores will move
77	// above or below it.
78	ALWAYS_INLINE void storeFullyFenced(T desired)
79	{
80	exchange(desired);
81	}
82
83	ALWAYS_INLINE bool compareExchangeWeak(T expected, T desired, std::memory_order order = std::memory_order_seq_cst)
84	{
85	T expectedOrActual = expected;
86	return value.compare_exchange_weak(expectedOrActual, desired, order);
87	}
88
89	ALWAYS_INLINE bool compareExchangeWeakRelaxed(T expected, T desired)
90	{
91	return compareExchangeWeak(expected, desired, std::memory_order_relaxed);
92	}
93
94	ALWAYS_INLINE bool compareExchangeWeak(T expected, T desired, std::memory_order order_success, std::memory_order order_failure)
95	{
96	T expectedOrActual = expected;
97	return value.compare_exchange_weak(expectedOrActual, desired, order_success, order_failure);
98	}
99
100	// WARNING: This does not have strong fencing guarantees when it fails. For example, stores could
101	// sink below it in that case.
102	ALWAYS_INLINE T compareExchangeStrong(T expected, T desired, std::memory_order order = std::memory_order_seq_cst)
103	{
104	T expectedOrActual = expected;
105	value.compare_exchange_strong(expectedOrActual, desired, order);
106	return expectedOrActual;
107	}
108
109	ALWAYS_INLINE T compareExchangeStrong(T expected, T desired, std::memory_order order_success, std::memory_order order_failure)
110	{
111	T expectedOrActual = expected;
112	value.compare_exchange_strong(expectedOrActual, desired, order_success, order_failure);
113	return expectedOrActual;
114	}
115
116	template<typename U>
117	ALWAYS_INLINE T exchangeAdd(U operand, std::memory_order order = std::memory_order_seq_cst) { return value.fetch_add(operand, order); }
118
119	template<typename U>
120	ALWAYS_INLINE T exchangeAnd(U operand, std::memory_order order = std::memory_order_seq_cst) { return value.fetch_and(operand, order); }
121
122	template<typename U>
123	ALWAYS_INLINE T exchangeOr(U operand, std::memory_order order = std::memory_order_seq_cst) { return value.fetch_or(operand, order); }
124
125	template<typename U>
126	ALWAYS_INLINE T exchangeSub(U operand, std::memory_order order = std::memory_order_seq_cst) { return value.fetch_sub(operand, order); }
127
128	template<typename U>
129	ALWAYS_INLINE T exchangeXor(U operand, std::memory_order order = std::memory_order_seq_cst) { return value.fetch_xor(operand, order); }
130
131	ALWAYS_INLINE T exchange(T newValue, std::memory_order order = std::memory_order_seq_cst) { return value.exchange(newValue, order); }
132
133	template<typename Func>
134	ALWAYS_INLINE bool transaction(const Func& func, std::memory_order order = std::memory_order_seq_cst)
135	{
136	for (;;) {
137	T oldValue = load(std::memory_order_relaxed);
138	T newValue = oldValue;
139	if (!func(newValue))
140	return false;
141	if (compareExchangeWeak(oldValue, newValue, order))
142	return true;
143	}
144	}
145
146	template<typename Func>
147	ALWAYS_INLINE bool transactionRelaxed(const Func& func)
148	{
149	return transaction(func, std::memory_order_relaxed);
150	}
151
152	Atomic() = default;
153	constexpr Atomic(T initial)
154	: value(std::forward<T>(initial))
155	{
156	}
157
158	std::atomic<T> value;
159	};
160
161	template<typename T>
162	inline T atomicLoad(T* location, std::memory_order order = std::memory_order_seq_cst)
163	{
164	return bitwise_cast<Atomic<T>*>(location)->load(order);
165	}
166
167	template<typename T>
168	inline T atomicLoadFullyFenced(T* location)
169	{
170	return bitwise_cast<Atomic<T>*>(location)->loadFullyFenced();
171	}
172
173	template<typename T>
174	inline void atomicStore(T* location, T newValue, std::memory_order order = std::memory_order_seq_cst)
175	{
176	bitwise_cast<Atomic<T>*>(location)->store(newValue, order);
177	}
178
179	template<typename T>
180	inline void atomicStoreFullyFenced(T* location, T newValue)
181	{
182	bitwise_cast<Atomic<T>*>(location)->storeFullyFenced(newValue);
183	}
184
185	template<typename T>
186	inline bool atomicCompareExchangeWeak(T* location, T expected, T newValue, std::memory_order order = std::memory_order_seq_cst)
187	{
188	return bitwise_cast<Atomic<T>*>(location)->compareExchangeWeak(expected, newValue, order);
189	}
190
191	template<typename T>
192	inline bool atomicCompareExchangeWeakRelaxed(T* location, T expected, T newValue)
193	{
194	return bitwise_cast<Atomic<T>*>(location)->compareExchangeWeakRelaxed(expected, newValue);
195	}
196
197	template<typename T>
198	inline T atomicCompareExchangeStrong(T* location, T expected, T newValue, std::memory_order order = std::memory_order_seq_cst)
199	{
200	return bitwise_cast<Atomic<T>*>(location)->compareExchangeStrong(expected, newValue, order);
201	}
202
203	template<typename T, typename U>
204	inline T atomicExchangeAdd(T* location, U operand, std::memory_order order = std::memory_order_seq_cst)
205	{
206	return bitwise_cast<Atomic<T>*>(location)->exchangeAdd(operand, order);
207	}
208
209	template<typename T, typename U>
210	inline T atomicExchangeAnd(T* location, U operand, std::memory_order order = std::memory_order_seq_cst)
211	{
212	return bitwise_cast<Atomic<T>*>(location)->exchangeAnd(operand, order);
213	}
214
215	template<typename T, typename U>
216	inline T atomicExchangeOr(T* location, U operand, std::memory_order order = std::memory_order_seq_cst)
217	{
218	return bitwise_cast<Atomic<T>*>(location)->exchangeOr(operand, order);
219	}
220
221	template<typename T, typename U>
222	inline T atomicExchangeSub(T* location, U operand, std::memory_order order = std::memory_order_seq_cst)
223	{
224	return bitwise_cast<Atomic<T>*>(location)->exchangeSub(operand, order);
225	}
226
227	template<typename T, typename U>
228	inline T atomicExchangeXor(T* location, U operand, std::memory_order order = std::memory_order_seq_cst)
229	{
230	return bitwise_cast<Atomic<T>*>(location)->exchangeXor(operand, order);
231	}
232
233	template<typename T>
234	inline T atomicExchange(T* location, T newValue, std::memory_order order = std::memory_order_seq_cst)
235	{
236	return bitwise_cast<Atomic<T>*>(location)->exchange(newValue, order);
237	}
238
239	// Just a compiler fence. Has no effect on the hardware, but tells the compiler
240	// not to move things around this call. Should not affect the compiler's ability
241	// to do things like register allocation and code motion over pure operations.
242	inline void compilerFence()
243	{
244	#if OS(WINDOWS) && !COMPILER(GCC_COMPATIBLE)
245	_ReadWriteBarrier();
246	#else
247	asm volatile("" ::: "memory");
248	#endif
249	}
250
251	#if CPU(ARM_THUMB2) \|\| CPU(ARM64)
252
253	// Full memory fence. No accesses will float above this, and no accesses will sink
254	// below it.
255	inline void arm_dmb()
256	{
257	asm volatile("dmb ish" ::: "memory");
258	}
259
260	// Like the above, but only affects stores.
261	inline void arm_dmb_st()
262	{
263	asm volatile("dmb ishst" ::: "memory");
264	}
265
266	inline void arm_isb()
267	{
268	asm volatile("isb" ::: "memory");
269	}
270
271	inline void loadLoadFence() { arm_dmb(); }
272	inline void loadStoreFence() { arm_dmb(); }
273	inline void storeLoadFence() { arm_dmb(); }
274	inline void storeStoreFence() { arm_dmb_st(); }
275	inline void memoryBarrierAfterLock() { arm_dmb(); }
276	inline void memoryBarrierBeforeUnlock() { arm_dmb(); }
277	inline void crossModifyingCodeFence() { arm_isb(); }
278
279	#elif CPU(X86) \|\| CPU(X86_64)
280
281	inline void x86_ortop()
282	{
283	#if OS(WINDOWS)
284	MemoryBarrier();
285	#elif CPU(X86_64)
286	// This has acqrel semantics and is much cheaper than mfence. For exampe, in the JSC GC, using
287	// mfence as a store-load fence was a 9% slow-down on Octane/splay while using this was neutral.
288	asm volatile("lock; orl $0, (%%rsp)" ::: "memory");
289	#else
290	asm volatile("lock; orl $0, (%%esp)" ::: "memory");
291	#endif
292	}
293
294	inline void x86_cpuid()
295	{
296	#if OS(WINDOWS)
297	int info[`4`];
298	__cpuid(info, `0`);
299	#else
300	intptr_t a = `0`, b, c, d;
301	asm volatile(
302	"cpuid"
303	: "+a"(a), "=b"(b), "=c"(c), "=d"(d)
304	:
305	: "memory");
306	#endif
307	}
308
309	inline void loadLoadFence() { compilerFence(); }
310	inline void loadStoreFence() { compilerFence(); }
311	inline void storeLoadFence() { x86_ortop(); }
312	inline void storeStoreFence() { compilerFence(); }
313	inline void memoryBarrierAfterLock() { compilerFence(); }
314	inline void memoryBarrierBeforeUnlock() { compilerFence(); }
315	inline void crossModifyingCodeFence() { x86_cpuid(); }
316
317	#else
318
319	inline void loadLoadFence() { std::atomic_thread_fence(std::memory_order_seq_cst); }
320	inline void loadStoreFence() { std::atomic_thread_fence(std::memory_order_seq_cst); }
321	inline void storeLoadFence() { std::atomic_thread_fence(std::memory_order_seq_cst); }
322	inline void storeStoreFence() { std::atomic_thread_fence(std::memory_order_seq_cst); }
323	inline void memoryBarrierAfterLock() { std::atomic_thread_fence(std::memory_order_seq_cst); }
324	inline void memoryBarrierBeforeUnlock() { std::atomic_thread_fence(std::memory_order_seq_cst); }
325	inline void crossModifyingCodeFence() { std::atomic_thread_fence(std::memory_order_seq_cst); } // Probably not strong enough.
326
327	#endif
328
329	typedef unsigned InternalDependencyType;
330
331	inline InternalDependencyType opaqueMixture()
332	{
333	return `0`;
334	}
335
336	template<typename... Arguments, typename T>
337	inline InternalDependencyType opaqueMixture(T value, Arguments... arguments)
338	{
339	union {
340	InternalDependencyType copy;
341	T value;
342	} u;
343	u.copy = `0`;
344	u.value = value;
345	return opaqueMixture(arguments...) + u.copy;
346	}
347
348	class Dependency {
349	public:
350	Dependency()
351	: m_value(`0`)
352	{
353	}
354
355	// On TSO architectures, this is a load-load fence and the value it returns is not meaningful (it's
356	// zero). The load-load fence is usually just a compiler fence. On ARM, this is a self-xor that
357	// produces zero, but it's concealed from the compiler. The CPU understands this dummy op to be a
358	// phantom dependency.
359	template<typename... Arguments>
360	static Dependency fence(Arguments... arguments)
361	{
362	InternalDependencyType input = opaqueMixture(arguments...);
363	InternalDependencyType output;
364	#if CPU(ARM64)
365	// Create a magical zero value through inline assembly, whose computation
366	// isn't visible to the optimizer. This zero is then usable as an offset in
367	// further address computations: adding zero does nothing, but the compiler
368	// doesn't know it. It's magical because it creates an address dependency
369	// from the load of `location` to the uses of the dependency, which triggers
370	// the ARM ISA's address dependency rule, a.k.a. the mythical C++ consume
371	// ordering. This forces weak memory order CPUs to observe `location` and
372	// dependent loads in their store order without the reader using a barrier
373	// or an acquire load.
374	asm("eor %w[out], %w[in], %w[in]"
375	: [out] "=r"(output)
376	: [in] "r"(input));
377	#elif CPU(ARM)
378	asm("eor %[out], %[in], %[in]"
379	: [out] "=r"(output)
380	: [in] "r"(input));
381	#else
382	// No dependency is needed for this architecture.
383	loadLoadFence();
384	output = `0`;
385	UNUSED_PARAM(input);
386	#endif
387	Dependency result;
388	result.m_value = output;
389	return result;
390	}
391
392	// On TSO architectures, this just returns the pointer you pass it. On ARM, this produces a new
393	// pointer that is dependent on this dependency and the input pointer.
394	template<typename T>
395	T* consume(T* pointer)
396	{
397	#if CPU(ARM64) \|\| CPU(ARM)
398	return bitwise_cast<T>(bitwise_cast<char**>(pointer) + m_value);
399	#else
400	UNUSED_PARAM(m_value);
401	return pointer;
402	#endif
403	}
404
405	private:
406	InternalDependencyType m_value;
407	};
408
409	template<typename InputType, typename ValueType>
410	struct InputAndValue {
411	InputAndValue() { }
412
413	InputAndValue(InputType input, ValueType value)
414	: input(input)
415	, value(value)
416	{
417	}
418
419	InputType input;
420	ValueType value;
421	};
422
423	template<typename InputType, typename ValueType>
424	InputAndValue<InputType, ValueType> inputAndValue(InputType input, ValueType value)
425	{
426	return InputAndValue<InputType, ValueType>(input, value);
427	}
428
429	template<typename T, typename Func>
430	ALWAYS_INLINE T& ensurePointer(Atomic<T>& pointer, const* Func& func)
431	{
432	for (;;) {
433	T* oldValue = pointer.load(std::memory_order_relaxed);
434	if (oldValue) {
435	// On all sensible CPUs, we get an implicit dependency-based load-load barrier when
436	// loading this.
437	return *oldValue;
438	}
439	T* newValue = func();
440	if (pointer.compareExchangeWeak(oldValue, newValue))
441	return *newValue;
442	delete newValue;
443	}
444	}
445
446	} // namespace WTF
447
448	using WTF::Atomic;
449	using WTF::Dependency;
450	using WTF::InputAndValue;
451	using WTF::inputAndValue;
452	using WTF::ensurePointer;
453	using WTF::opaqueMixture;
454

Browse the source code of webcore/DerivedSources/ForwardingHeaders/wtf/Atomics.h