DFGCSEPhase.cpp source code [jsc/Source/JavaScriptCore/dfg/DFGCSEPhase.cpp]

1	/*
2	* Copyright (C) 2011-2019 Apple Inc. All rights reserved.
3	*
4	* Redistribution and use in source and binary forms, with or without
5	* modification, are permitted provided that the following conditions
6	* are met:
7	* 1. Redistributions of source code must retain the above copyright
8	* notice, this list of conditions and the following disclaimer.
9	* 2. Redistributions in binary form must reproduce the above copyright
10	* notice, this list of conditions and the following disclaimer in the
11	* documentation and/or other materials provided with the distribution.
12	*
13	* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14	* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16	* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17	* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18	* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19	* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20	* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21	* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23	* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24	*/
25
26	#include "config.h"
27	#include "DFGCSEPhase.h"
28
29	#if ENABLE(DFG_JIT)
30
31	#include "DFGAbstractHeap.h"
32	#include "DFGBlockMapInlines.h"
33	#include "DFGClobberSet.h"
34	#include "DFGClobberize.h"
35	#include "DFGDominators.h"
36	#include "DFGGraph.h"
37	#include "DFGPhase.h"
38	#include "JSCInlines.h"
39	#include <array>
40
41	namespace JSC { namespace DFG {
42
43	// This file contains two CSE implementations: local and global. LocalCSE typically runs when we're
44	// in DFG mode, i.e. we want to compile quickly. LocalCSE contains a lot of optimizations for
45	// compile time. GlobalCSE, on the other hand, is fairly straight-forward. It will find more
46	// optimization opportunities by virtue of being global.
47
48	namespace {
49
50	namespace DFGCSEPhaseInternal {
51	static constexpr bool verbose = false;
52	}
53
54	class ImpureDataSlot {
55	WTF_MAKE_NONCOPYABLE(ImpureDataSlot);
56	WTF_MAKE_FAST_ALLOCATED;
57	public:
58	ImpureDataSlot(HeapLocation key, LazyNode value, unsigned hash)
59	: key (key), value (value), hash(hash)
60	{ }
61
62	HeapLocation key;
63	LazyNode value;
64	unsigned hash;
65	};
66
67	struct ImpureDataSlotHash : public DefaultHash<std::unique_ptr<ImpureDataSlot>>::Hash {
68	static unsigned hash(const std::unique_ptr<ImpureDataSlot>& key)
69	{
70	return key ->hash;
71	}
72
73	static bool equal(const std::unique_ptr<ImpureDataSlot>& a, const std::unique_ptr<ImpureDataSlot>& b)
74	{
75	// The ImpureDataSlot are unique per table per HeapLocation. This lets us compare the key
76	// by just comparing the pointers of the unique ImpureDataSlots.
77	ASSERT(a != b \|\| a ->key == b ->key);
78	return a == b;
79	}
80	};
81
82	struct ImpureDataTranslator {
83	static unsigned hash(const HeapLocation& key)
84	{
85	return key.hash();
86	}
87
88	static bool equal(const std::unique_ptr<ImpureDataSlot>& slot, const HeapLocation& key)
89	{
90	if (!slot)
91	return false;
92	if (HashTraits<std::unique_ptr<ImpureDataSlot>>::isDeletedValue(slot))
93	return false;
94	return slot ->key == key;
95	}
96
97	static void translate(std::unique_ptr<ImpureDataSlot>& slot, const HeapLocation& key, unsigned hashCode)
98	{
99	new (NotNull, std::addressof(slot)) std::unique_ptr<ImpureDataSlot>(new ImpureDataSlot {key, LazyNode (), hashCode});
100	}
101	};
102
103	class ImpureMap {
104	WTF_MAKE_FAST_ALLOCATED;
105	WTF_MAKE_NONCOPYABLE(ImpureMap);
106	public:
107	ImpureMap() = default;
108
109	ImpureMap(ImpureMap&& other)
110	{
111	m_abstractHeapStackMap.swap(other.m_abstractHeapStackMap);
112	m_fallbackStackMap.swap(other.m_fallbackStackMap);
113	m_heapMap.swap(other.m_heapMap);
114	#if !defined(NDEBUG)
115	m_debugImpureData.swap(other.m_debugImpureData);
116	#endif
117	}
118
119	const ImpureDataSlot* add(const HeapLocation& location, const LazyNode& node)
120	{
121	const ImpureDataSlot* result = addImpl(location, node);
122
123	#if !defined(NDEBUG)
124	auto addResult = m_debugImpureData.add(location, node);
125	ASSERT(!!result == !addResult.isNewEntry);
126	#endif
127	return result;
128	}
129
130	LazyNode get(const HeapLocation& location) const
131	{
132	LazyNode result = getImpl(location);
133	#if !defined(NDEBUG)
134	ASSERT(result == m_debugImpureData.get(location));
135	#endif
136	return result;
137	}
138
139	void clobber(AbstractHeap heap, bool clobberConservatively)
140	{
141	switch (heap.kind()) {
142	case World: {
143	clear();
144	break;
145	}
146	case SideState:
147	break;
148	case Stack: {
149	ASSERT(!heap.payload().isTop());
150	ASSERT(heap.payload().value() == heap.payload().value32());
151	m_abstractHeapStackMap.remove(heap.payload().value32());
152	if (clobberConservatively)
153	m_fallbackStackMap.clear();
154	else
155	clobber(m_fallbackStackMap, heap);
156	break;
157	}
158	default:
159	if (clobberConservatively)
160	m_heapMap.clear();
161	else
162	clobber(m_heapMap, heap);
163	break;
164	}
165	#if !defined(NDEBUG)
166	m_debugImpureData.removeIf([heap, clobberConservatively, this](const HashMap<HeapLocation, LazyNode>::KeyValuePairType& pair) -> bool {
167	switch (heap.kind()) {
168	case World:
169	case SideState:
170	break;
171	case Stack: {
172	if (!clobberConservatively)
173	break;
174	if (pair.key.heap().kind() == Stack) {
175	auto iterator = m_abstractHeapStackMap.find(pair.key.heap().payload().value32());
176	if (iterator != m_abstractHeapStackMap.end() && iterator ->value ->key == pair.key)
177	return false;
178	return true;
179	}
180	break;
181	}
182	default: {
183	if (!clobberConservatively)
184	break;
185	AbstractHeapKind kind = pair.key.heap().kind();
186	if (kind != World && kind != SideState && kind != Stack)
187	return true;
188	break;
189	}
190	}
191	return heap.overlaps(pair.key.heap());
192	});
193	ASSERT(m_debugImpureData.size()
194	== (m_heapMap.size()
195	+ m_abstractHeapStackMap.size()
196	+ m_fallbackStackMap.size()));
197
198	const bool verifyClobber = false;
199	if (verifyClobber) {
200	for (auto& pair : m_debugImpureData)
201	ASSERT(!!get(pair.key));
202	}
203	#endif
204	}
205
206	void clear()
207	{
208	m_abstractHeapStackMap.clear();
209	m_fallbackStackMap.clear();
210	m_heapMap.clear();
211	#if !defined(NDEBUG)
212	m_debugImpureData.clear();
213	#endif
214	}
215
216	private:
217	typedef HashSet<std::unique_ptr<ImpureDataSlot>, ImpureDataSlotHash> Map;
218
219	const ImpureDataSlot* addImpl(const HeapLocation& location, const LazyNode& node)
220	{
221	switch (location.heap().kind()) {
222	case World:
223	case SideState:
224	RELEASE_ASSERT_NOT_REACHED();
225	case Stack: {
226	AbstractHeap abstractHeap = location.heap();
227	if (abstractHeap.payload().isTop())
228	return add(m_fallbackStackMap, location, node);
229	ASSERT(abstractHeap.payload().value() == abstractHeap.payload().value32());
230	auto addResult = m_abstractHeapStackMap.add(abstractHeap.payload().value32(), nullptr);
231	if (addResult.isNewEntry) {
232	addResult.iterator ->value.reset(new ImpureDataSlot {location, node, `0`});
233	return nullptr;
234	}
235	if (addResult.iterator ->value ->key == location)
236	return addResult.iterator ->value.get();
237	return add(m_fallbackStackMap, location, node);
238	}
239	default:
240	return add(m_heapMap, location, node);
241	}
242	return nullptr;
243	}
244
245	LazyNode getImpl(const HeapLocation& location) const
246	{
247	switch (location.heap().kind()) {
248	case World:
249	case SideState:
250	RELEASE_ASSERT_NOT_REACHED();
251	case Stack: {
252	ASSERT(location.heap().payload().value() == location.heap().payload().value32());
253	auto iterator = m_abstractHeapStackMap.find(location.heap().payload().value32());
254	if (iterator != m_abstractHeapStackMap.end()
255	&& iterator ->value ->key == location)
256	return iterator ->value ->value;
257	return get(m_fallbackStackMap, location);
258	}
259	default:
260	return get(m_heapMap, location);
261	}
262	return LazyNode ();
263	}
264
265	static const ImpureDataSlot* add(Map& map, const HeapLocation& location, const LazyNode& node)
266	{
267	auto result = map.add<ImpureDataTranslator>(location);
268	if (result.isNewEntry) {
269	(*result.iterator)->value = node;
270	return nullptr;
271	}
272	return result.iterator ->get();
273	}
274
275	static LazyNode get(const Map& map, const HeapLocation& location)
276	{
277	auto iterator = map.find<ImpureDataTranslator>(location);
278	if (iterator != map.end())
279	return (*iterator)->value;
280	return LazyNode ();
281	}
282
283	static void clobber(Map& map, AbstractHeap heap)
284	{
285	map.removeIf([heap](const std::unique_ptr<ImpureDataSlot>& slot) -> bool {
286	return heap.overlaps(slot ->key.heap());
287	});
288	}
289
290	// The majority of Impure Stack Slots are unique per value.
291	// This is very useful for fast clobber(), we can just remove the slot addressed by AbstractHeap
292	// in O(1).
293	//
294	// When there are conflict, any additional HeapLocation is added in the fallback map.
295	// This works well because fallbackStackMap remains tiny.
296	//
297	// One cannot assume a unique ImpureData is in m_abstractHeapStackMap. It may have been
298	// a duplicate in the past and now only live in m_fallbackStackMap.
299	//
300	// Obviously, TOP always goes into m_fallbackStackMap since it does not have a unique value.
301	HashMap<int32_t, std::unique_ptr<ImpureDataSlot>, DefaultHash<int32_t>::Hash, WTF::SignedWithZeroKeyHashTraits<int32_t>> m_abstractHeapStackMap;
302	Map m_fallbackStackMap;
303
304	Map m_heapMap;
305
306	#if !defined(NDEBUG)
307	HashMap<HeapLocation, LazyNode> m_debugImpureData;
308	#endif
309	};
310
311	class LocalCSEPhase : public Phase {
312	public:
313	LocalCSEPhase(Graph& graph)
314	: Phase (graph, "local common subexpression elimination")
315	, m_smallBlock (graph)
316	, m_largeBlock (graph)
317	, m_hugeBlock (graph)
318	{
319	}
320
321	bool run()
322	{
323	ASSERT(m_graph.m_fixpointState == FixpointNotConverged);
324	ASSERT(m_graph.m_form == ThreadedCPS \|\| m_graph.m_form == LoadStore);
325
326	bool changed = false;
327
328	m_graph.clearReplacements();
329
330	for (BlockIndex blockIndex = m_graph.numBlocks(); blockIndex--;) {
331	BasicBlock* block = m_graph.block(blockIndex);
332	if (!block)
333	continue;
334
335	if (block->size() <= SmallMaps::capacity)
336	changed \|= m_smallBlock.run(block);
337	else if (block->size() <= Options::maxDFGNodesInBasicBlockForPreciseAnalysis())
338	changed \|= m_largeBlock.run(block);
339	else
340	changed \|= m_hugeBlock.run(block);
341	}
342
343	return changed;
344	}
345
346	private:
347	class SmallMaps {
348	public:
349	// This permits SmallMaps to be used for blocks that have up to 100 nodes. In practice,
350	// fewer than half of the nodes in a block have pure defs, and even fewer have impure defs.
351	// Thus, a capacity limit of 100 probably means that somewhere around ~40 things may end up
352	// in one of these "small" list-based maps. That number still seems largeish, except that
353	// the overhead of HashMaps can be quite high currently: clearing them, or even removing
354	// enough things from them, deletes (or resizes) their backing store eagerly. Hence
355	// HashMaps induce a lot of malloc traffic.
356	static constexpr unsigned capacity = `100`;
357
358	SmallMaps()
359	: m_pureLength(`0`)
360	, m_impureLength(`0`)
361	{
362	}
363
364	void clear()
365	{
366	m_pureLength = `0`;
367	m_impureLength = `0`;
368	}
369
370	void write(AbstractHeap heap)
371	{
372	if (heap.kind() == SideState)
373	return;
374
375	for (unsigned i = `0`; i < m_impureLength; ++i) {
376	if (heap.overlaps(m_impureMap[i].key.heap()))
377	m_impureMap[i--] = m_impureMap[--m_impureLength];
378	}
379	}
380
381	Node* addPure(PureValue value, Node* node)
382	{
383	for (unsigned i = m_pureLength; i--;) {
384	if (m_pureMap[i].key == value)
385	return m_pureMap[i].value;
386	}
387
388	RELEASE_ASSERT(m_pureLength < capacity);
389	m_pureMap[m_pureLength++] = WTF::KeyValuePair<PureValue, Node*>(value, node);
390	return nullptr;
391	}
392
393	LazyNode findReplacement(HeapLocation location)
394	{
395	for (unsigned i = m_impureLength; i--;) {
396	if (m_impureMap[i].key == location)
397	return m_impureMap[i].value;
398	}
399	return nullptr;
400	}
401
402	LazyNode addImpure(HeapLocation location, LazyNode node)
403	{
404	// FIXME: If we are using small maps, we must not def() derived values.
405	// For now the only derived values we def() are constant-based.
406	if (location.index() && !location.index().isNode())
407	return nullptr;
408	if (LazyNode result = findReplacement(location))
409	return result;
410	RELEASE_ASSERT(m_impureLength < capacity);
411	m_impureMap[m_impureLength++] = WTF::KeyValuePair<HeapLocation, LazyNode>(location, node);
412	return nullptr;
413	}
414
415	private:
416	WTF::KeyValuePair<PureValue, Node*> m_pureMap[capacity];
417	WTF::KeyValuePair<HeapLocation, LazyNode> m_impureMap[capacity];
418	unsigned m_pureLength;
419	unsigned m_impureLength;
420	};
421
422	class LargeMaps {
423	public:
424	LargeMaps()
425	{
426	}
427
428	void clear()
429	{
430	m_pureMap.clear();
431	m_impureMap.clear();
432	}
433
434	void write(AbstractHeap heap)
435	{
436	bool clobberConservatively = false;
437	m_impureMap.clobber(heap, clobberConservatively);
438	}
439
440	Node* addPure(PureValue value, Node* node)
441	{
442	auto result = m_pureMap.add(value, node);
443	if (result.isNewEntry)
444	return nullptr;
445	return result.iterator ->value;
446	}
447
448	LazyNode findReplacement(HeapLocation location)
449	{
450	return m_impureMap.get(location);
451	}
452
453	LazyNode addImpure(const HeapLocation& location, const LazyNode& node)
454	{
455	if (const ImpureDataSlot* slot = m_impureMap.add(location, node))
456	return slot->value;
457	return LazyNode ();
458	}
459
460	private:
461	HashMap<PureValue, Node*> m_pureMap;
462	ImpureMap m_impureMap;
463	};
464
465	// This is used only for huge basic blocks. Our usual CSE is quadratic complexity for # of DFG nodes in a basic block.
466	// HugeMaps model results conservatively to avoid an O(N^2) algorithm. In particular, we clear all the slots of the specified heap kind
467	// in ImpureMap instead of iterating slots and removing a matched slot. This change makes the complexity O(N).
468	// FIXME: We can make LargeMap O(N) without introducing conservative behavior if we track clobbering by hierarchical epochs.
469	// https://bugs.webkit.org/show_bug.cgi?id=200014
470	class HugeMaps {
471	public:
472	HugeMaps() = default;
473
474	void clear()
475	{
476	m_pureMap.clear();
477	m_impureMap.clear();
478	}
479
480	void write(AbstractHeap heap)
481	{
482	bool clobberConservatively = true;
483	m_impureMap.clobber(heap, clobberConservatively);
484	}
485
486	Node* addPure(PureValue value, Node* node)
487	{
488	auto result = m_pureMap.add(value, node);
489	if (result.isNewEntry)
490	return nullptr;
491	return result.iterator ->value;
492	}
493
494	LazyNode findReplacement(HeapLocation location)
495	{
496	return m_impureMap.get(location);
497	}
498
499	LazyNode addImpure(const HeapLocation& location, const LazyNode& node)
500	{
501	if (const ImpureDataSlot* slot = m_impureMap.add(location, node))
502	return slot->value;
503	return LazyNode ();
504	}
505
506	private:
507	HashMap<PureValue, Node*> m_pureMap;
508	ImpureMap m_impureMap;
509	};
510
511	template<typename Maps>
512	class BlockCSE {
513	public:
514	BlockCSE(Graph& graph)
515	: m_graph(graph)
516	, m_insertionSet (graph)
517	{
518	}
519
520	bool run(BasicBlock* block)
521	{
522	m_maps.clear();
523	m_changed = false;
524	m_block = block;
525
526	for (unsigned nodeIndex = `0`; nodeIndex < block->size(); ++nodeIndex) {
527	m_node = block->at(nodeIndex);
528	m_graph.performSubstitution(m_node);
529
530	if (m_node->op() == Identity \|\| m_node->op() == IdentityWithProfile) {
531	m_node->replaceWith(m_graph, m_node->child1().node());
532	m_changed = true;
533	} else {
534	// This rule only makes sense for local CSE, since in SSA form we have already
535	// factored the bounds check out of the PutByVal. It's kind of gross, but we
536	// still have reason to believe that PutByValAlias is a good optimization and
537	// that it's better to do it with a single node rather than separating out the
538	// CheckInBounds.
539	if (m_node->op() == PutByVal \|\| m_node->op() == PutByValDirect) {
540	HeapLocation heap;
541
542	Node* base = m_graph.varArgChild(m_node, `0`).node();
543	Node* index = m_graph.varArgChild(m_node, `1`).node();
544	LocationKind indexedPropertyLoc = indexedPropertyLocForResultType(m_node->result());
545
546	ArrayMode mode = m_node->arrayMode();
547	switch (mode.type()) {
548	case Array::Int32:
549	if (!mode.isInBounds())
550	break;
551	heap = HeapLocation (indexedPropertyLoc, IndexedInt32Properties, base, index);
552	break;
553
554	case Array::Double: {
555	if (!mode.isInBounds())
556	break;
557	LocationKind kind = mode.isSaneChain() ? IndexedPropertyDoubleSaneChainLoc : IndexedPropertyDoubleLoc;
558	heap = HeapLocation (kind, IndexedDoubleProperties, base, index);
559	break;
560	}
561
562	case Array::Contiguous:
563	if (!mode.isInBounds())
564	break;
565	heap = HeapLocation (indexedPropertyLoc, IndexedContiguousProperties, base, index);
566	break;
567
568	case Array::Int8Array:
569	case Array::Int16Array:
570	case Array::Int32Array:
571	case Array::Uint8Array:
572	case Array::Uint8ClampedArray:
573	case Array::Uint16Array:
574	case Array::Uint32Array:
575	case Array::Float32Array:
576	case Array::Float64Array:
577	if (!mode.isInBounds())
578	break;
579	heap = HeapLocation (
580	indexedPropertyLoc, TypedArrayProperties, base, index);
581	break;
582
583	default:
584	break;
585	}
586
587	if (!!heap && m_maps.findReplacement(heap))
588	m_node->setOp(PutByValAlias);
589	}
590
591	clobberize(m_graph, m_node, *this);
592	}
593	}
594
595	m_insertionSet.execute(block);
596
597	return m_changed;
598	}
599
600	void read(AbstractHeap) { }
601
602	void write(AbstractHeap heap)
603	{
604	m_maps.write(heap);
605	}
606
607	void def(PureValue value)
608	{
609	Node* match = m_maps.addPure(value, m_node);
610	if (!match)
611	return;
612
613	m_node->replaceWith(m_graph, match);
614	m_changed = true;
615	}
616
617	void def(const HeapLocation& location, const LazyNode& value)
618	{
619	LazyNode match = m_maps.addImpure(location, value);
620	if (!match)
621	return;
622
623	if (m_node->op() == GetLocal) {
624	// Usually the CPS rethreading phase does this. But it's OK for us to mess with
625	// locals so long as:
626	//
627	// - We dethread the graph. Any changes we make may invalidate the assumptions of
628	// our CPS form, particularly if this GetLocal is linked to the variablesAtTail.
629	//
630	// - We don't introduce a Phantom for the child of the GetLocal. This wouldn't be
631	// totally wrong but it would pessimize the code. Just because there is a
632	// GetLocal doesn't mean that the child was live. Simply rerouting the all uses
633	// of this GetLocal will preserve the live-at-exit information just fine.
634	//
635	// We accomplish the latter by just clearing the child; then the Phantom that we
636	// introduce won't have children and so it will eventually just be deleted.
637
638	m_node->child1() = Edge ();
639	m_graph.dethread();
640	}
641
642	if (value.isNode() && value.asNode() == m_node) {
643	match.ensureIsNode(m_insertionSet, m_block, `0`)->owner = m_block;
644	ASSERT(match.isNode());
645	m_node->replaceWith(m_graph, match.asNode());
646	m_changed = true;
647	}
648	}
649
650	private:
651	Graph& m_graph;
652
653	bool m_changed;
654	Node* m_node;
655	BasicBlock* m_block;
656
657	Maps m_maps;
658
659	InsertionSet m_insertionSet;
660	};
661
662	BlockCSE<SmallMaps> m_smallBlock;
663	BlockCSE<LargeMaps> m_largeBlock;
664	BlockCSE<HugeMaps> m_hugeBlock;
665	};
666
667	class GlobalCSEPhase : public Phase {
668	public:
669	GlobalCSEPhase(Graph& graph)
670	: Phase (graph, "global common subexpression elimination")
671	, m_impureDataMap (graph)
672	, m_insertionSet (graph)
673	{
674	}
675
676	bool run()
677	{
678	ASSERT(m_graph.m_fixpointState == FixpointNotConverged);
679	ASSERT(m_graph.m_form == SSA);
680
681	m_graph.initializeNodeOwners();
682	m_graph.ensureSSADominators();
683
684	m_preOrder = m_graph.blocksInPreOrder();
685
686	// First figure out what gets clobbered by blocks. Node that this uses the preOrder list
687	// for convenience only.
688	for (unsigned i = m_preOrder.size(); i--;) {
689	m_block = m_preOrder [i];
690	m_impureData = &m_impureDataMap [m_block];
691	for (unsigned nodeIndex = m_block->size(); nodeIndex--;)
692	addWrites(m_graph, m_block->at(nodeIndex), m_impureData->writes);
693	}
694
695	// Based on my experience doing this before, what follows might have to be made iterative.
696	// Right now it doesn't have to be iterative because everything is dominator-bsed. But when
697	// validation is enabled, we check if iterating would find new CSE opportunities.
698
699	bool changed = iterate();
700
701	// FIXME: It should be possible to assert that CSE will not find any new opportunities if you
702	// run it a second time. Unfortunately, we cannot assert this right now. Note that if we did
703	// this, we'd have to first reset all of our state.
704	// https://bugs.webkit.org/show_bug.cgi?id=145853
705
706	return changed;
707	}
708
709	bool iterate()
710	{
711	if (DFGCSEPhaseInternal::verbose)
712	dataLog("Performing iteration.\n");
713
714	m_changed = false;
715	m_graph.clearReplacements();
716
717	for (unsigned i = `0`; i < m_preOrder.size(); ++i) {
718	m_block = m_preOrder [i];
719	m_impureData = &m_impureDataMap [m_block];
720	m_writesSoFar.clear();
721
722	if (DFGCSEPhaseInternal::verbose)
723	dataLog("Processing block ", *m_block, ":\n");
724
725	for (unsigned nodeIndex = `0`; nodeIndex < m_block->size(); ++nodeIndex) {
726	m_nodeIndex = nodeIndex;
727	m_node = m_block->at(nodeIndex);
728	if (DFGCSEPhaseInternal::verbose)
729	dataLog(" Looking at node ", m_node, ":\n");
730
731	m_graph.performSubstitution(m_node);
732
733	if (m_node->op() == Identity \|\| m_node->op() == IdentityWithProfile) {
734	m_node->replaceWith(m_graph, m_node->child1().node());
735	m_changed = true;
736	} else
737	clobberize(m_graph, m_node, *this);
738	}
739
740	m_insertionSet.execute(m_block);
741
742	m_impureData->didVisit = true;
743	}
744
745	return m_changed;
746	}
747
748	void read(AbstractHeap) { }
749
750	void write(AbstractHeap heap)
751	{
752	bool clobberConservatively = false;
753	m_impureData->availableAtTail.clobber(heap, clobberConservatively);
754	m_writesSoFar.add(heap);
755	}
756
757	void def(PureValue value)
758	{
759	// With pure values we do not have to worry about the possibility of some control flow path
760	// clobbering the value. So, we just search for all of the like values that have been
761	// computed. We pick one that is in a block that dominates ours. Note that this means that
762	// a PureValue will map to a list of nodes, since there may be many places in the control
763	// flow graph that compute a value but only one of them that dominates us. We may build up
764	// a large list of nodes that compute some value in the case of gnarly control flow. This
765	// is probably OK.
766
767	auto result = m_pureValues.add(value, Vector<Node*>());
768	if (result.isNewEntry) {
769	result.iterator ->value.append(m_node);
770	return;
771	}
772
773	for (unsigned i = result.iterator ->value.size(); i--;) {
774	Node* candidate = result.iterator ->value [i];
775	if (m_graph.m_ssaDominators ->dominates(candidate->owner, m_block)) {
776	m_node->replaceWith(m_graph, candidate);
777	m_changed = true;
778	return;
779	}
780	}
781
782	result.iterator ->value.append(m_node);
783	}
784
785	LazyNode findReplacement(HeapLocation location)
786	{
787	// At this instant, our "availableAtTail" reflects the set of things that are available in
788	// this block so far. We check this map to find block-local CSE opportunities before doing
789	// a global search.
790	LazyNode match = m_impureData->availableAtTail.get(location);
791	if (!!match) {
792	if (DFGCSEPhaseInternal::verbose)
793	dataLog(" Found local match: ", match, "\n");
794	return match;
795	}
796
797	// If it's not available at this point in the block, and at some prior point in the block
798	// we have clobbered this heap location, then there is no point in doing a global search.
799	if (m_writesSoFar.overlaps(location.heap())) {
800	if (DFGCSEPhaseInternal::verbose)
801	dataLog(" Not looking globally because of local clobber: ", m_writesSoFar, "\n");
802	return nullptr;
803	}
804
805	// This perfoms a backward search over the control flow graph to find some possible
806	// non-local def() that matches our heap location. Such a match is only valid if there does
807	// not exist any path from that def() to our block that contains a write() that overlaps
808	// our heap. This algorithm looks for both of these things (the matching def and the
809	// overlapping writes) in one backwards DFS pass.
810	//
811	// This starts by looking at the starting block's predecessors, and then it continues along
812	// their predecessors. As soon as this finds a possible def() - one that defines the heap
813	// location we want while dominating our starting block - it assumes that this one must be
814	// the match. It then lets the DFS over predecessors complete, but it doesn't add the
815	// def()'s predecessors; this ensures that any blocks we visit thereafter are on some path
816	// from the def() to us. As soon as the DFG finds a write() that overlaps the location's
817	// heap, it stops, assuming that there is no possible match. Note that the write() case may
818	// trigger before we find a def(), or after. Either way, the write() case causes this
819	// function to immediately return nullptr.
820	//
821	// If the write() is found before we find the def(), then we know that any def() we would
822	// find would have a path to us that trips over the write() and hence becomes invalid. This
823	// is just a direct outcome of us looking for a def() that dominates us. Given a block A
824	// that dominates block B - so that A is the one that would have the def() and B is our
825	// starting block - we know that any other block must either be on the path from A to B, or
826	// it must be on a path from the root to A, but not both. So, if we haven't found A yet but
827	// we already have found a block C that has a write(), then C must be on some path from A
828	// to B, which means that A's def() is invalid for our purposes. Hence, before we find the
829	// def(), stopping on write() is the right thing to do.
830	//
831	// Stopping on write() is also the right thing to do after we find the def(). After we find
832	// the def(), we don't add that block's predecessors to the search worklist. That means
833	// that henceforth the only blocks we will see in the search are blocks on the path from
834	// the def() to us. If any such block has a write() that clobbers our heap then we should
835	// give up.
836	//
837	// Hence this graph search algorithm ends up being deceptively simple: any overlapping
838	// write() causes us to immediately return nullptr, and a matching def() means that we just
839	// record it and neglect to visit its precessors.
840
841	Vector<BasicBlock*, `8`> worklist;
842	Vector<BasicBlock*, `8`> seenList;
843	BitVector seen;
844
845	for (unsigned i = m_block->predecessors.size(); i--;) {
846	BasicBlock* predecessor = m_block->predecessors [i];
847	if (!seen.get(predecessor->index)) {
848	worklist.append(predecessor);
849	seen.set(predecessor->index);
850	}
851	}
852
853	while (!worklist.isEmpty()) {
854	BasicBlock* block = worklist.takeLast();
855	seenList.append(block);
856
857	if (DFGCSEPhaseInternal::verbose)
858	dataLog(" Searching in block ", *block, "\n");
859	ImpureBlockData& data = m_impureDataMap [block];
860
861	// We require strict domination because this would only see things in our own block if
862	// they came after* our position in the block. Clearly, while our block dominates*
863	// itself, the things in the block after us don't dominate us.
864	if (m_graph.m_ssaDominators ->strictlyDominates(block, m_block)) {
865	if (DFGCSEPhaseInternal::verbose)
866	dataLog(" It strictly dominates.\n");
867	DFG_ASSERT(m_graph, m_node, data.didVisit);
868	DFG_ASSERT(m_graph, m_node, !match);
869	match = data.availableAtTail.get(location);
870	if (DFGCSEPhaseInternal::verbose)
871	dataLog(" Availability: ", match, "\n");
872	if (!!match) {
873	// Don't examine the predecessors of a match. At this point we just want to
874	// establish that other blocks on the path from here to there don't clobber
875	// the location we're interested in.
876	continue;
877	}
878	}
879
880	if (DFGCSEPhaseInternal::verbose)
881	dataLog(" Dealing with write set ", data.writes, "\n");
882	if (data.writes.overlaps(location.heap())) {
883	if (DFGCSEPhaseInternal::verbose)
884	dataLog(" Clobbered.\n");
885	return nullptr;
886	}
887
888	for (unsigned i = block->predecessors.size(); i--;) {
889	BasicBlock* predecessor = block->predecessors [i];
890	if (!seen.get(predecessor->index)) {
891	worklist.append(predecessor);
892	seen.set(predecessor->index);
893	}
894	}
895	}
896
897	if (!match)
898	return nullptr;
899
900	// Cache the results for next time. We cache them both for this block and for all of our
901	// predecessors, since even though we've already visited our predecessors, our predecessors
902	// probably have successors other than us.
903	// FIXME: Consider caching failed searches as well, when match is null. It's not clear that
904	// the reduction in compile time would warrant the increase in complexity, though.
905	// https://bugs.webkit.org/show_bug.cgi?id=134876
906	for (BasicBlock* block : seenList)
907	m_impureDataMap [block].availableAtTail.add(location, match);
908	m_impureData->availableAtTail.add(location, match);
909
910	return match;
911	}
912
913	void def(HeapLocation location, LazyNode value)
914	{
915	if (DFGCSEPhaseInternal::verbose)
916	dataLog(" Got heap location def: ", location, " -> ", value, "\n");
917
918	LazyNode match = findReplacement(location);
919
920	if (DFGCSEPhaseInternal::verbose)
921	dataLog(" Got match: ", match, "\n");
922
923	if (!match) {
924	if (DFGCSEPhaseInternal::verbose)
925	dataLog(" Adding at-tail mapping: ", location, " -> ", value, "\n");
926	auto result = m_impureData->availableAtTail.add(location, value);
927	ASSERT_UNUSED(result, !result);
928	return;
929	}
930
931	if (value.isNode() && value.asNode() == m_node) {
932	if (!match.isNode()) {
933	// We need to properly record the constant in order to use an existing one if applicable.
934	// This ensures that re-running GCSE will not find new optimizations.
935	match.ensureIsNode(m_insertionSet, m_block, m_nodeIndex)->owner = m_block;
936	auto result = m_pureValues.add(PureValue (match.asNode(), match ->constant()), Vector<Node*>());
937	bool replaced = false;
938	if (!result.isNewEntry) {
939	for (unsigned i = result.iterator ->value.size(); i--;) {
940	Node* candidate = result.iterator ->value [i];
941	if (m_graph.m_ssaDominators ->dominates(candidate->owner, m_block)) {
942	ASSERT(candidate);
943	match ->replaceWith(m_graph, candidate);
944	match.setNode(candidate);
945	replaced = true;
946	break;
947	}
948	}
949	}
950	if (!replaced)
951	result.iterator ->value.append(match.asNode());
952	}
953	ASSERT(match.asNode());
954	m_node->replaceWith(m_graph, match.asNode());
955	m_changed = true;
956	}
957	}
958
959	struct ImpureBlockData {
960	ImpureBlockData()
961	: didVisit(false)
962	{
963	}
964
965	ClobberSet writes;
966	ImpureMap availableAtTail;
967	bool didVisit;
968	};
969
970	Vector<BasicBlock*> m_preOrder;
971
972	PureMultiMap m_pureValues;
973	BlockMap<ImpureBlockData> m_impureDataMap;
974
975	BasicBlock* m_block;
976	Node* m_node;
977	unsigned m_nodeIndex;
978	ImpureBlockData* m_impureData;
979	ClobberSet m_writesSoFar;
980	InsertionSet m_insertionSet;
981
982	bool m_changed;
983	};
984
985	} // anonymous namespace
986
987	bool performLocalCSE(Graph& graph)
988	{
989	return runPhase<LocalCSEPhase>(graph);
990	}
991
992	bool performGlobalCSE(Graph& graph)
993	{
994	return runPhase<GlobalCSEPhase>(graph);
995	}
996
997	} } // namespace JSC::DFG
998
999	#endif // ENABLE(DFG_JIT)
1000

Browse the source code of jsc/Source/JavaScriptCore/dfg/DFGCSEPhase.cpp