1/*
2 * Copyright (C) 2011-2019 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "config.h"
27#include "DFGCSEPhase.h"
28
29#if ENABLE(DFG_JIT)
30
31#include "DFGAbstractHeap.h"
32#include "DFGBlockMapInlines.h"
33#include "DFGClobberSet.h"
34#include "DFGClobberize.h"
35#include "DFGDominators.h"
36#include "DFGGraph.h"
37#include "DFGPhase.h"
38#include "JSCInlines.h"
39#include <array>
40
41namespace JSC { namespace DFG {
42
43// This file contains two CSE implementations: local and global. LocalCSE typically runs when we're
44// in DFG mode, i.e. we want to compile quickly. LocalCSE contains a lot of optimizations for
45// compile time. GlobalCSE, on the other hand, is fairly straight-forward. It will find more
46// optimization opportunities by virtue of being global.
47
48namespace {
49
50namespace DFGCSEPhaseInternal {
51static constexpr bool verbose = false;
52}
53
54class ImpureDataSlot {
55 WTF_MAKE_NONCOPYABLE(ImpureDataSlot);
56 WTF_MAKE_FAST_ALLOCATED;
57public:
58 ImpureDataSlot(HeapLocation key, LazyNode value, unsigned hash)
59 : key(key), value(value), hash(hash)
60 { }
61
62 HeapLocation key;
63 LazyNode value;
64 unsigned hash;
65};
66
67struct ImpureDataSlotHash : public DefaultHash<std::unique_ptr<ImpureDataSlot>>::Hash {
68 static unsigned hash(const std::unique_ptr<ImpureDataSlot>& key)
69 {
70 return key->hash;
71 }
72
73 static bool equal(const std::unique_ptr<ImpureDataSlot>& a, const std::unique_ptr<ImpureDataSlot>& b)
74 {
75 // The ImpureDataSlot are unique per table per HeapLocation. This lets us compare the key
76 // by just comparing the pointers of the unique ImpureDataSlots.
77 ASSERT(a != b || a->key == b->key);
78 return a == b;
79 }
80};
81
82struct ImpureDataTranslator {
83 static unsigned hash(const HeapLocation& key)
84 {
85 return key.hash();
86 }
87
88 static bool equal(const std::unique_ptr<ImpureDataSlot>& slot, const HeapLocation& key)
89 {
90 if (!slot)
91 return false;
92 if (HashTraits<std::unique_ptr<ImpureDataSlot>>::isDeletedValue(slot))
93 return false;
94 return slot->key == key;
95 }
96
97 static void translate(std::unique_ptr<ImpureDataSlot>& slot, const HeapLocation& key, unsigned hashCode)
98 {
99 new (NotNull, std::addressof(slot)) std::unique_ptr<ImpureDataSlot>(new ImpureDataSlot {key, LazyNode(), hashCode});
100 }
101};
102
103class ImpureMap {
104 WTF_MAKE_FAST_ALLOCATED;
105 WTF_MAKE_NONCOPYABLE(ImpureMap);
106public:
107 ImpureMap() = default;
108
109 ImpureMap(ImpureMap&& other)
110 {
111 m_abstractHeapStackMap.swap(other.m_abstractHeapStackMap);
112 m_fallbackStackMap.swap(other.m_fallbackStackMap);
113 m_heapMap.swap(other.m_heapMap);
114#if !defined(NDEBUG)
115 m_debugImpureData.swap(other.m_debugImpureData);
116#endif
117 }
118
119 const ImpureDataSlot* add(const HeapLocation& location, const LazyNode& node)
120 {
121 const ImpureDataSlot* result = addImpl(location, node);
122
123#if !defined(NDEBUG)
124 auto addResult = m_debugImpureData.add(location, node);
125 ASSERT(!!result == !addResult.isNewEntry);
126#endif
127 return result;
128 }
129
130 LazyNode get(const HeapLocation& location) const
131 {
132 LazyNode result = getImpl(location);
133#if !defined(NDEBUG)
134 ASSERT(result == m_debugImpureData.get(location));
135#endif
136 return result;
137 }
138
139 void clobber(AbstractHeap heap, bool clobberConservatively)
140 {
141 switch (heap.kind()) {
142 case World: {
143 clear();
144 break;
145 }
146 case SideState:
147 break;
148 case Stack: {
149 ASSERT(!heap.payload().isTop());
150 ASSERT(heap.payload().value() == heap.payload().value32());
151 m_abstractHeapStackMap.remove(heap.payload().value32());
152 if (clobberConservatively)
153 m_fallbackStackMap.clear();
154 else
155 clobber(m_fallbackStackMap, heap);
156 break;
157 }
158 default:
159 if (clobberConservatively)
160 m_heapMap.clear();
161 else
162 clobber(m_heapMap, heap);
163 break;
164 }
165#if !defined(NDEBUG)
166 m_debugImpureData.removeIf([heap, clobberConservatively, this](const HashMap<HeapLocation, LazyNode>::KeyValuePairType& pair) -> bool {
167 switch (heap.kind()) {
168 case World:
169 case SideState:
170 break;
171 case Stack: {
172 if (!clobberConservatively)
173 break;
174 if (pair.key.heap().kind() == Stack) {
175 auto iterator = m_abstractHeapStackMap.find(pair.key.heap().payload().value32());
176 if (iterator != m_abstractHeapStackMap.end() && iterator->value->key == pair.key)
177 return false;
178 return true;
179 }
180 break;
181 }
182 default: {
183 if (!clobberConservatively)
184 break;
185 AbstractHeapKind kind = pair.key.heap().kind();
186 if (kind != World && kind != SideState && kind != Stack)
187 return true;
188 break;
189 }
190 }
191 return heap.overlaps(pair.key.heap());
192 });
193 ASSERT(m_debugImpureData.size()
194 == (m_heapMap.size()
195 + m_abstractHeapStackMap.size()
196 + m_fallbackStackMap.size()));
197
198 const bool verifyClobber = false;
199 if (verifyClobber) {
200 for (auto& pair : m_debugImpureData)
201 ASSERT(!!get(pair.key));
202 }
203#endif
204 }
205
206 void clear()
207 {
208 m_abstractHeapStackMap.clear();
209 m_fallbackStackMap.clear();
210 m_heapMap.clear();
211#if !defined(NDEBUG)
212 m_debugImpureData.clear();
213#endif
214 }
215
216private:
217 typedef HashSet<std::unique_ptr<ImpureDataSlot>, ImpureDataSlotHash> Map;
218
219 const ImpureDataSlot* addImpl(const HeapLocation& location, const LazyNode& node)
220 {
221 switch (location.heap().kind()) {
222 case World:
223 case SideState:
224 RELEASE_ASSERT_NOT_REACHED();
225 case Stack: {
226 AbstractHeap abstractHeap = location.heap();
227 if (abstractHeap.payload().isTop())
228 return add(m_fallbackStackMap, location, node);
229 ASSERT(abstractHeap.payload().value() == abstractHeap.payload().value32());
230 auto addResult = m_abstractHeapStackMap.add(abstractHeap.payload().value32(), nullptr);
231 if (addResult.isNewEntry) {
232 addResult.iterator->value.reset(new ImpureDataSlot {location, node, 0});
233 return nullptr;
234 }
235 if (addResult.iterator->value->key == location)
236 return addResult.iterator->value.get();
237 return add(m_fallbackStackMap, location, node);
238 }
239 default:
240 return add(m_heapMap, location, node);
241 }
242 return nullptr;
243 }
244
245 LazyNode getImpl(const HeapLocation& location) const
246 {
247 switch (location.heap().kind()) {
248 case World:
249 case SideState:
250 RELEASE_ASSERT_NOT_REACHED();
251 case Stack: {
252 ASSERT(location.heap().payload().value() == location.heap().payload().value32());
253 auto iterator = m_abstractHeapStackMap.find(location.heap().payload().value32());
254 if (iterator != m_abstractHeapStackMap.end()
255 && iterator->value->key == location)
256 return iterator->value->value;
257 return get(m_fallbackStackMap, location);
258 }
259 default:
260 return get(m_heapMap, location);
261 }
262 return LazyNode();
263 }
264
265 static const ImpureDataSlot* add(Map& map, const HeapLocation& location, const LazyNode& node)
266 {
267 auto result = map.add<ImpureDataTranslator>(location);
268 if (result.isNewEntry) {
269 (*result.iterator)->value = node;
270 return nullptr;
271 }
272 return result.iterator->get();
273 }
274
275 static LazyNode get(const Map& map, const HeapLocation& location)
276 {
277 auto iterator = map.find<ImpureDataTranslator>(location);
278 if (iterator != map.end())
279 return (*iterator)->value;
280 return LazyNode();
281 }
282
283 static void clobber(Map& map, AbstractHeap heap)
284 {
285 map.removeIf([heap](const std::unique_ptr<ImpureDataSlot>& slot) -> bool {
286 return heap.overlaps(slot->key.heap());
287 });
288 }
289
290 // The majority of Impure Stack Slots are unique per value.
291 // This is very useful for fast clobber(), we can just remove the slot addressed by AbstractHeap
292 // in O(1).
293 //
294 // When there are conflict, any additional HeapLocation is added in the fallback map.
295 // This works well because fallbackStackMap remains tiny.
296 //
297 // One cannot assume a unique ImpureData is in m_abstractHeapStackMap. It may have been
298 // a duplicate in the past and now only live in m_fallbackStackMap.
299 //
300 // Obviously, TOP always goes into m_fallbackStackMap since it does not have a unique value.
301 HashMap<int32_t, std::unique_ptr<ImpureDataSlot>, DefaultHash<int32_t>::Hash, WTF::SignedWithZeroKeyHashTraits<int32_t>> m_abstractHeapStackMap;
302 Map m_fallbackStackMap;
303
304 Map m_heapMap;
305
306#if !defined(NDEBUG)
307 HashMap<HeapLocation, LazyNode> m_debugImpureData;
308#endif
309};
310
311class LocalCSEPhase : public Phase {
312public:
313 LocalCSEPhase(Graph& graph)
314 : Phase(graph, "local common subexpression elimination")
315 , m_smallBlock(graph)
316 , m_largeBlock(graph)
317 , m_hugeBlock(graph)
318 {
319 }
320
321 bool run()
322 {
323 ASSERT(m_graph.m_fixpointState == FixpointNotConverged);
324 ASSERT(m_graph.m_form == ThreadedCPS || m_graph.m_form == LoadStore);
325
326 bool changed = false;
327
328 m_graph.clearReplacements();
329
330 for (BlockIndex blockIndex = m_graph.numBlocks(); blockIndex--;) {
331 BasicBlock* block = m_graph.block(blockIndex);
332 if (!block)
333 continue;
334
335 if (block->size() <= SmallMaps::capacity)
336 changed |= m_smallBlock.run(block);
337 else if (block->size() <= Options::maxDFGNodesInBasicBlockForPreciseAnalysis())
338 changed |= m_largeBlock.run(block);
339 else
340 changed |= m_hugeBlock.run(block);
341 }
342
343 return changed;
344 }
345
346private:
347 class SmallMaps {
348 public:
349 // This permits SmallMaps to be used for blocks that have up to 100 nodes. In practice,
350 // fewer than half of the nodes in a block have pure defs, and even fewer have impure defs.
351 // Thus, a capacity limit of 100 probably means that somewhere around ~40 things may end up
352 // in one of these "small" list-based maps. That number still seems largeish, except that
353 // the overhead of HashMaps can be quite high currently: clearing them, or even removing
354 // enough things from them, deletes (or resizes) their backing store eagerly. Hence
355 // HashMaps induce a lot of malloc traffic.
356 static constexpr unsigned capacity = 100;
357
358 SmallMaps()
359 : m_pureLength(0)
360 , m_impureLength(0)
361 {
362 }
363
364 void clear()
365 {
366 m_pureLength = 0;
367 m_impureLength = 0;
368 }
369
370 void write(AbstractHeap heap)
371 {
372 if (heap.kind() == SideState)
373 return;
374
375 for (unsigned i = 0; i < m_impureLength; ++i) {
376 if (heap.overlaps(m_impureMap[i].key.heap()))
377 m_impureMap[i--] = m_impureMap[--m_impureLength];
378 }
379 }
380
381 Node* addPure(PureValue value, Node* node)
382 {
383 for (unsigned i = m_pureLength; i--;) {
384 if (m_pureMap[i].key == value)
385 return m_pureMap[i].value;
386 }
387
388 RELEASE_ASSERT(m_pureLength < capacity);
389 m_pureMap[m_pureLength++] = WTF::KeyValuePair<PureValue, Node*>(value, node);
390 return nullptr;
391 }
392
393 LazyNode findReplacement(HeapLocation location)
394 {
395 for (unsigned i = m_impureLength; i--;) {
396 if (m_impureMap[i].key == location)
397 return m_impureMap[i].value;
398 }
399 return nullptr;
400 }
401
402 LazyNode addImpure(HeapLocation location, LazyNode node)
403 {
404 // FIXME: If we are using small maps, we must not def() derived values.
405 // For now the only derived values we def() are constant-based.
406 if (location.index() && !location.index().isNode())
407 return nullptr;
408 if (LazyNode result = findReplacement(location))
409 return result;
410 RELEASE_ASSERT(m_impureLength < capacity);
411 m_impureMap[m_impureLength++] = WTF::KeyValuePair<HeapLocation, LazyNode>(location, node);
412 return nullptr;
413 }
414
415 private:
416 WTF::KeyValuePair<PureValue, Node*> m_pureMap[capacity];
417 WTF::KeyValuePair<HeapLocation, LazyNode> m_impureMap[capacity];
418 unsigned m_pureLength;
419 unsigned m_impureLength;
420 };
421
422 class LargeMaps {
423 public:
424 LargeMaps()
425 {
426 }
427
428 void clear()
429 {
430 m_pureMap.clear();
431 m_impureMap.clear();
432 }
433
434 void write(AbstractHeap heap)
435 {
436 bool clobberConservatively = false;
437 m_impureMap.clobber(heap, clobberConservatively);
438 }
439
440 Node* addPure(PureValue value, Node* node)
441 {
442 auto result = m_pureMap.add(value, node);
443 if (result.isNewEntry)
444 return nullptr;
445 return result.iterator->value;
446 }
447
448 LazyNode findReplacement(HeapLocation location)
449 {
450 return m_impureMap.get(location);
451 }
452
453 LazyNode addImpure(const HeapLocation& location, const LazyNode& node)
454 {
455 if (const ImpureDataSlot* slot = m_impureMap.add(location, node))
456 return slot->value;
457 return LazyNode();
458 }
459
460 private:
461 HashMap<PureValue, Node*> m_pureMap;
462 ImpureMap m_impureMap;
463 };
464
465 // This is used only for huge basic blocks. Our usual CSE is quadratic complexity for # of DFG nodes in a basic block.
466 // HugeMaps model results conservatively to avoid an O(N^2) algorithm. In particular, we clear all the slots of the specified heap kind
467 // in ImpureMap instead of iterating slots and removing a matched slot. This change makes the complexity O(N).
468 // FIXME: We can make LargeMap O(N) without introducing conservative behavior if we track clobbering by hierarchical epochs.
469 // https://bugs.webkit.org/show_bug.cgi?id=200014
470 class HugeMaps {
471 public:
472 HugeMaps() = default;
473
474 void clear()
475 {
476 m_pureMap.clear();
477 m_impureMap.clear();
478 }
479
480 void write(AbstractHeap heap)
481 {
482 bool clobberConservatively = true;
483 m_impureMap.clobber(heap, clobberConservatively);
484 }
485
486 Node* addPure(PureValue value, Node* node)
487 {
488 auto result = m_pureMap.add(value, node);
489 if (result.isNewEntry)
490 return nullptr;
491 return result.iterator->value;
492 }
493
494 LazyNode findReplacement(HeapLocation location)
495 {
496 return m_impureMap.get(location);
497 }
498
499 LazyNode addImpure(const HeapLocation& location, const LazyNode& node)
500 {
501 if (const ImpureDataSlot* slot = m_impureMap.add(location, node))
502 return slot->value;
503 return LazyNode();
504 }
505
506 private:
507 HashMap<PureValue, Node*> m_pureMap;
508 ImpureMap m_impureMap;
509 };
510
511 template<typename Maps>
512 class BlockCSE {
513 public:
514 BlockCSE(Graph& graph)
515 : m_graph(graph)
516 , m_insertionSet(graph)
517 {
518 }
519
520 bool run(BasicBlock* block)
521 {
522 m_maps.clear();
523 m_changed = false;
524 m_block = block;
525
526 for (unsigned nodeIndex = 0; nodeIndex < block->size(); ++nodeIndex) {
527 m_node = block->at(nodeIndex);
528 m_graph.performSubstitution(m_node);
529
530 if (m_node->op() == Identity || m_node->op() == IdentityWithProfile) {
531 m_node->replaceWith(m_graph, m_node->child1().node());
532 m_changed = true;
533 } else {
534 // This rule only makes sense for local CSE, since in SSA form we have already
535 // factored the bounds check out of the PutByVal. It's kind of gross, but we
536 // still have reason to believe that PutByValAlias is a good optimization and
537 // that it's better to do it with a single node rather than separating out the
538 // CheckInBounds.
539 if (m_node->op() == PutByVal || m_node->op() == PutByValDirect) {
540 HeapLocation heap;
541
542 Node* base = m_graph.varArgChild(m_node, 0).node();
543 Node* index = m_graph.varArgChild(m_node, 1).node();
544 LocationKind indexedPropertyLoc = indexedPropertyLocForResultType(m_node->result());
545
546 ArrayMode mode = m_node->arrayMode();
547 switch (mode.type()) {
548 case Array::Int32:
549 if (!mode.isInBounds())
550 break;
551 heap = HeapLocation(indexedPropertyLoc, IndexedInt32Properties, base, index);
552 break;
553
554 case Array::Double: {
555 if (!mode.isInBounds())
556 break;
557 LocationKind kind = mode.isSaneChain() ? IndexedPropertyDoubleSaneChainLoc : IndexedPropertyDoubleLoc;
558 heap = HeapLocation(kind, IndexedDoubleProperties, base, index);
559 break;
560 }
561
562 case Array::Contiguous:
563 if (!mode.isInBounds())
564 break;
565 heap = HeapLocation(indexedPropertyLoc, IndexedContiguousProperties, base, index);
566 break;
567
568 case Array::Int8Array:
569 case Array::Int16Array:
570 case Array::Int32Array:
571 case Array::Uint8Array:
572 case Array::Uint8ClampedArray:
573 case Array::Uint16Array:
574 case Array::Uint32Array:
575 case Array::Float32Array:
576 case Array::Float64Array:
577 if (!mode.isInBounds())
578 break;
579 heap = HeapLocation(
580 indexedPropertyLoc, TypedArrayProperties, base, index);
581 break;
582
583 default:
584 break;
585 }
586
587 if (!!heap && m_maps.findReplacement(heap))
588 m_node->setOp(PutByValAlias);
589 }
590
591 clobberize(m_graph, m_node, *this);
592 }
593 }
594
595 m_insertionSet.execute(block);
596
597 return m_changed;
598 }
599
600 void read(AbstractHeap) { }
601
602 void write(AbstractHeap heap)
603 {
604 m_maps.write(heap);
605 }
606
607 void def(PureValue value)
608 {
609 Node* match = m_maps.addPure(value, m_node);
610 if (!match)
611 return;
612
613 m_node->replaceWith(m_graph, match);
614 m_changed = true;
615 }
616
617 void def(const HeapLocation& location, const LazyNode& value)
618 {
619 LazyNode match = m_maps.addImpure(location, value);
620 if (!match)
621 return;
622
623 if (m_node->op() == GetLocal) {
624 // Usually the CPS rethreading phase does this. But it's OK for us to mess with
625 // locals so long as:
626 //
627 // - We dethread the graph. Any changes we make may invalidate the assumptions of
628 // our CPS form, particularly if this GetLocal is linked to the variablesAtTail.
629 //
630 // - We don't introduce a Phantom for the child of the GetLocal. This wouldn't be
631 // totally wrong but it would pessimize the code. Just because there is a
632 // GetLocal doesn't mean that the child was live. Simply rerouting the all uses
633 // of this GetLocal will preserve the live-at-exit information just fine.
634 //
635 // We accomplish the latter by just clearing the child; then the Phantom that we
636 // introduce won't have children and so it will eventually just be deleted.
637
638 m_node->child1() = Edge();
639 m_graph.dethread();
640 }
641
642 if (value.isNode() && value.asNode() == m_node) {
643 match.ensureIsNode(m_insertionSet, m_block, 0)->owner = m_block;
644 ASSERT(match.isNode());
645 m_node->replaceWith(m_graph, match.asNode());
646 m_changed = true;
647 }
648 }
649
650 private:
651 Graph& m_graph;
652
653 bool m_changed;
654 Node* m_node;
655 BasicBlock* m_block;
656
657 Maps m_maps;
658
659 InsertionSet m_insertionSet;
660 };
661
662 BlockCSE<SmallMaps> m_smallBlock;
663 BlockCSE<LargeMaps> m_largeBlock;
664 BlockCSE<HugeMaps> m_hugeBlock;
665};
666
667class GlobalCSEPhase : public Phase {
668public:
669 GlobalCSEPhase(Graph& graph)
670 : Phase(graph, "global common subexpression elimination")
671 , m_impureDataMap(graph)
672 , m_insertionSet(graph)
673 {
674 }
675
676 bool run()
677 {
678 ASSERT(m_graph.m_fixpointState == FixpointNotConverged);
679 ASSERT(m_graph.m_form == SSA);
680
681 m_graph.initializeNodeOwners();
682 m_graph.ensureSSADominators();
683
684 m_preOrder = m_graph.blocksInPreOrder();
685
686 // First figure out what gets clobbered by blocks. Node that this uses the preOrder list
687 // for convenience only.
688 for (unsigned i = m_preOrder.size(); i--;) {
689 m_block = m_preOrder[i];
690 m_impureData = &m_impureDataMap[m_block];
691 for (unsigned nodeIndex = m_block->size(); nodeIndex--;)
692 addWrites(m_graph, m_block->at(nodeIndex), m_impureData->writes);
693 }
694
695 // Based on my experience doing this before, what follows might have to be made iterative.
696 // Right now it doesn't have to be iterative because everything is dominator-bsed. But when
697 // validation is enabled, we check if iterating would find new CSE opportunities.
698
699 bool changed = iterate();
700
701 // FIXME: It should be possible to assert that CSE will not find any new opportunities if you
702 // run it a second time. Unfortunately, we cannot assert this right now. Note that if we did
703 // this, we'd have to first reset all of our state.
704 // https://bugs.webkit.org/show_bug.cgi?id=145853
705
706 return changed;
707 }
708
709 bool iterate()
710 {
711 if (DFGCSEPhaseInternal::verbose)
712 dataLog("Performing iteration.\n");
713
714 m_changed = false;
715 m_graph.clearReplacements();
716
717 for (unsigned i = 0; i < m_preOrder.size(); ++i) {
718 m_block = m_preOrder[i];
719 m_impureData = &m_impureDataMap[m_block];
720 m_writesSoFar.clear();
721
722 if (DFGCSEPhaseInternal::verbose)
723 dataLog("Processing block ", *m_block, ":\n");
724
725 for (unsigned nodeIndex = 0; nodeIndex < m_block->size(); ++nodeIndex) {
726 m_nodeIndex = nodeIndex;
727 m_node = m_block->at(nodeIndex);
728 if (DFGCSEPhaseInternal::verbose)
729 dataLog(" Looking at node ", m_node, ":\n");
730
731 m_graph.performSubstitution(m_node);
732
733 if (m_node->op() == Identity || m_node->op() == IdentityWithProfile) {
734 m_node->replaceWith(m_graph, m_node->child1().node());
735 m_changed = true;
736 } else
737 clobberize(m_graph, m_node, *this);
738 }
739
740 m_insertionSet.execute(m_block);
741
742 m_impureData->didVisit = true;
743 }
744
745 return m_changed;
746 }
747
748 void read(AbstractHeap) { }
749
750 void write(AbstractHeap heap)
751 {
752 bool clobberConservatively = false;
753 m_impureData->availableAtTail.clobber(heap, clobberConservatively);
754 m_writesSoFar.add(heap);
755 }
756
757 void def(PureValue value)
758 {
759 // With pure values we do not have to worry about the possibility of some control flow path
760 // clobbering the value. So, we just search for all of the like values that have been
761 // computed. We pick one that is in a block that dominates ours. Note that this means that
762 // a PureValue will map to a list of nodes, since there may be many places in the control
763 // flow graph that compute a value but only one of them that dominates us. We may build up
764 // a large list of nodes that compute some value in the case of gnarly control flow. This
765 // is probably OK.
766
767 auto result = m_pureValues.add(value, Vector<Node*>());
768 if (result.isNewEntry) {
769 result.iterator->value.append(m_node);
770 return;
771 }
772
773 for (unsigned i = result.iterator->value.size(); i--;) {
774 Node* candidate = result.iterator->value[i];
775 if (m_graph.m_ssaDominators->dominates(candidate->owner, m_block)) {
776 m_node->replaceWith(m_graph, candidate);
777 m_changed = true;
778 return;
779 }
780 }
781
782 result.iterator->value.append(m_node);
783 }
784
785 LazyNode findReplacement(HeapLocation location)
786 {
787 // At this instant, our "availableAtTail" reflects the set of things that are available in
788 // this block so far. We check this map to find block-local CSE opportunities before doing
789 // a global search.
790 LazyNode match = m_impureData->availableAtTail.get(location);
791 if (!!match) {
792 if (DFGCSEPhaseInternal::verbose)
793 dataLog(" Found local match: ", match, "\n");
794 return match;
795 }
796
797 // If it's not available at this point in the block, and at some prior point in the block
798 // we have clobbered this heap location, then there is no point in doing a global search.
799 if (m_writesSoFar.overlaps(location.heap())) {
800 if (DFGCSEPhaseInternal::verbose)
801 dataLog(" Not looking globally because of local clobber: ", m_writesSoFar, "\n");
802 return nullptr;
803 }
804
805 // This perfoms a backward search over the control flow graph to find some possible
806 // non-local def() that matches our heap location. Such a match is only valid if there does
807 // not exist any path from that def() to our block that contains a write() that overlaps
808 // our heap. This algorithm looks for both of these things (the matching def and the
809 // overlapping writes) in one backwards DFS pass.
810 //
811 // This starts by looking at the starting block's predecessors, and then it continues along
812 // their predecessors. As soon as this finds a possible def() - one that defines the heap
813 // location we want while dominating our starting block - it assumes that this one must be
814 // the match. It then lets the DFS over predecessors complete, but it doesn't add the
815 // def()'s predecessors; this ensures that any blocks we visit thereafter are on some path
816 // from the def() to us. As soon as the DFG finds a write() that overlaps the location's
817 // heap, it stops, assuming that there is no possible match. Note that the write() case may
818 // trigger before we find a def(), or after. Either way, the write() case causes this
819 // function to immediately return nullptr.
820 //
821 // If the write() is found before we find the def(), then we know that any def() we would
822 // find would have a path to us that trips over the write() and hence becomes invalid. This
823 // is just a direct outcome of us looking for a def() that dominates us. Given a block A
824 // that dominates block B - so that A is the one that would have the def() and B is our
825 // starting block - we know that any other block must either be on the path from A to B, or
826 // it must be on a path from the root to A, but not both. So, if we haven't found A yet but
827 // we already have found a block C that has a write(), then C must be on some path from A
828 // to B, which means that A's def() is invalid for our purposes. Hence, before we find the
829 // def(), stopping on write() is the right thing to do.
830 //
831 // Stopping on write() is also the right thing to do after we find the def(). After we find
832 // the def(), we don't add that block's predecessors to the search worklist. That means
833 // that henceforth the only blocks we will see in the search are blocks on the path from
834 // the def() to us. If any such block has a write() that clobbers our heap then we should
835 // give up.
836 //
837 // Hence this graph search algorithm ends up being deceptively simple: any overlapping
838 // write() causes us to immediately return nullptr, and a matching def() means that we just
839 // record it and neglect to visit its precessors.
840
841 Vector<BasicBlock*, 8> worklist;
842 Vector<BasicBlock*, 8> seenList;
843 BitVector seen;
844
845 for (unsigned i = m_block->predecessors.size(); i--;) {
846 BasicBlock* predecessor = m_block->predecessors[i];
847 if (!seen.get(predecessor->index)) {
848 worklist.append(predecessor);
849 seen.set(predecessor->index);
850 }
851 }
852
853 while (!worklist.isEmpty()) {
854 BasicBlock* block = worklist.takeLast();
855 seenList.append(block);
856
857 if (DFGCSEPhaseInternal::verbose)
858 dataLog(" Searching in block ", *block, "\n");
859 ImpureBlockData& data = m_impureDataMap[block];
860
861 // We require strict domination because this would only see things in our own block if
862 // they came *after* our position in the block. Clearly, while our block dominates
863 // itself, the things in the block after us don't dominate us.
864 if (m_graph.m_ssaDominators->strictlyDominates(block, m_block)) {
865 if (DFGCSEPhaseInternal::verbose)
866 dataLog(" It strictly dominates.\n");
867 DFG_ASSERT(m_graph, m_node, data.didVisit);
868 DFG_ASSERT(m_graph, m_node, !match);
869 match = data.availableAtTail.get(location);
870 if (DFGCSEPhaseInternal::verbose)
871 dataLog(" Availability: ", match, "\n");
872 if (!!match) {
873 // Don't examine the predecessors of a match. At this point we just want to
874 // establish that other blocks on the path from here to there don't clobber
875 // the location we're interested in.
876 continue;
877 }
878 }
879
880 if (DFGCSEPhaseInternal::verbose)
881 dataLog(" Dealing with write set ", data.writes, "\n");
882 if (data.writes.overlaps(location.heap())) {
883 if (DFGCSEPhaseInternal::verbose)
884 dataLog(" Clobbered.\n");
885 return nullptr;
886 }
887
888 for (unsigned i = block->predecessors.size(); i--;) {
889 BasicBlock* predecessor = block->predecessors[i];
890 if (!seen.get(predecessor->index)) {
891 worklist.append(predecessor);
892 seen.set(predecessor->index);
893 }
894 }
895 }
896
897 if (!match)
898 return nullptr;
899
900 // Cache the results for next time. We cache them both for this block and for all of our
901 // predecessors, since even though we've already visited our predecessors, our predecessors
902 // probably have successors other than us.
903 // FIXME: Consider caching failed searches as well, when match is null. It's not clear that
904 // the reduction in compile time would warrant the increase in complexity, though.
905 // https://bugs.webkit.org/show_bug.cgi?id=134876
906 for (BasicBlock* block : seenList)
907 m_impureDataMap[block].availableAtTail.add(location, match);
908 m_impureData->availableAtTail.add(location, match);
909
910 return match;
911 }
912
913 void def(HeapLocation location, LazyNode value)
914 {
915 if (DFGCSEPhaseInternal::verbose)
916 dataLog(" Got heap location def: ", location, " -> ", value, "\n");
917
918 LazyNode match = findReplacement(location);
919
920 if (DFGCSEPhaseInternal::verbose)
921 dataLog(" Got match: ", match, "\n");
922
923 if (!match) {
924 if (DFGCSEPhaseInternal::verbose)
925 dataLog(" Adding at-tail mapping: ", location, " -> ", value, "\n");
926 auto result = m_impureData->availableAtTail.add(location, value);
927 ASSERT_UNUSED(result, !result);
928 return;
929 }
930
931 if (value.isNode() && value.asNode() == m_node) {
932 if (!match.isNode()) {
933 // We need to properly record the constant in order to use an existing one if applicable.
934 // This ensures that re-running GCSE will not find new optimizations.
935 match.ensureIsNode(m_insertionSet, m_block, m_nodeIndex)->owner = m_block;
936 auto result = m_pureValues.add(PureValue(match.asNode(), match->constant()), Vector<Node*>());
937 bool replaced = false;
938 if (!result.isNewEntry) {
939 for (unsigned i = result.iterator->value.size(); i--;) {
940 Node* candidate = result.iterator->value[i];
941 if (m_graph.m_ssaDominators->dominates(candidate->owner, m_block)) {
942 ASSERT(candidate);
943 match->replaceWith(m_graph, candidate);
944 match.setNode(candidate);
945 replaced = true;
946 break;
947 }
948 }
949 }
950 if (!replaced)
951 result.iterator->value.append(match.asNode());
952 }
953 ASSERT(match.asNode());
954 m_node->replaceWith(m_graph, match.asNode());
955 m_changed = true;
956 }
957 }
958
959 struct ImpureBlockData {
960 ImpureBlockData()
961 : didVisit(false)
962 {
963 }
964
965 ClobberSet writes;
966 ImpureMap availableAtTail;
967 bool didVisit;
968 };
969
970 Vector<BasicBlock*> m_preOrder;
971
972 PureMultiMap m_pureValues;
973 BlockMap<ImpureBlockData> m_impureDataMap;
974
975 BasicBlock* m_block;
976 Node* m_node;
977 unsigned m_nodeIndex;
978 ImpureBlockData* m_impureData;
979 ClobberSet m_writesSoFar;
980 InsertionSet m_insertionSet;
981
982 bool m_changed;
983};
984
985} // anonymous namespace
986
987bool performLocalCSE(Graph& graph)
988{
989 return runPhase<LocalCSEPhase>(graph);
990}
991
992bool performGlobalCSE(Graph& graph)
993{
994 return runPhase<GlobalCSEPhase>(graph);
995}
996
997} } // namespace JSC::DFG
998
999#endif // ENABLE(DFG_JIT)
1000