1/*
2 * Copyright (C) 2015-2018 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "config.h"
27#include "B3ReduceStrength.h"
28
29#if ENABLE(B3_JIT)
30
31#include "B3AtomicValue.h"
32#include "B3BasicBlockInlines.h"
33#include "B3BlockInsertionSet.h"
34#include "B3ComputeDivisionMagic.h"
35#include "B3Dominators.h"
36#include "B3EliminateDeadCode.h"
37#include "B3InsertionSetInlines.h"
38#include "B3MemoryValueInlines.h"
39#include "B3PhaseScope.h"
40#include "B3PhiChildren.h"
41#include "B3ProcedureInlines.h"
42#include "B3PureCSE.h"
43#include "B3SlotBaseValue.h"
44#include "B3StackSlot.h"
45#include "B3UpsilonValue.h"
46#include "B3ValueKeyInlines.h"
47#include "B3ValueInlines.h"
48#include "B3Variable.h"
49#include "B3VariableValue.h"
50#include <wtf/GraphNodeWorklist.h>
51#include <wtf/HashMap.h>
52#include <wtf/IndexSet.h>
53
54namespace JSC { namespace B3 {
55
56namespace {
57
58// The goal of this phase is to:
59//
60// - Replace operations with less expensive variants. This includes constant folding and classic
61// strength reductions like turning Mul(x, 1 << k) into Shl(x, k).
62//
63// - Reassociate constant operations. For example, Load(Add(x, c)) is turned into Load(x, offset = c)
64// and Add(Add(x, c), d) is turned into Add(x, c + d).
65//
66// - Canonicalize operations. There are some cases where it's not at all obvious which kind of
67// operation is less expensive, but it's useful for subsequent phases - particularly LowerToAir -
68// to have only one way of representing things.
69//
70// This phase runs to fixpoint. Therefore, the canonicalizations must be designed to be monotonic.
71// For example, if we had a canonicalization that said that Add(x, -c) should be Sub(x, c) and
72// another canonicalization that said that Sub(x, d) should be Add(x, -d), then this phase would end
73// up running forever. We don't want that.
74//
75// Therefore, we need to prioritize certain canonical forms over others. Naively, we want strength
76// reduction to reduce the number of values, and so a form involving fewer total values is more
77// canonical. But we might break this, for example when reducing strength of Mul(x, 9). This could be
78// better written as Add(Shl(x, 3), x), which also happens to be representable using a single
79// instruction on x86.
80//
81// Here are some of the rules we have:
82//
83// Canonical form of logical not: BitXor(value, 1). We may have to avoid using this form if we don't
84// know for sure that 'value' is 0-or-1 (i.e. returnsBool). In that case we fall back on
85// Equal(value, 0).
86//
87// Canonical form of commutative operations: if the operation involves a constant, the constant must
88// come second. Add(x, constant) is canonical, while Add(constant, x) is not. If there are no
89// constants then the canonical form involves the lower-indexed value first. Given Add(x, y), it's
90// canonical if x->index() <= y->index().
91
92namespace B3ReduceStrengthInternal {
93static const bool verbose = false;
94}
95
96// FIXME: This IntRange stuff should be refactored into a general constant propagator. It's weird
97// that it's just sitting here in this file.
98class IntRange {
99public:
100 IntRange()
101 {
102 }
103
104 IntRange(int64_t min, int64_t max)
105 : m_min(min)
106 , m_max(max)
107 {
108 }
109
110 template<typename T>
111 static IntRange top()
112 {
113 return IntRange(std::numeric_limits<T>::min(), std::numeric_limits<T>::max());
114 }
115
116 static IntRange top(Type type)
117 {
118 switch (type) {
119 case Int32:
120 return top<int32_t>();
121 case Int64:
122 return top<int64_t>();
123 default:
124 RELEASE_ASSERT_NOT_REACHED();
125 return IntRange();
126 }
127 }
128
129 template<typename T>
130 static IntRange rangeForMask(T mask)
131 {
132 if (!(mask + 1))
133 return top<T>();
134 return IntRange(0, mask);
135 }
136
137 static IntRange rangeForMask(int64_t mask, Type type)
138 {
139 switch (type) {
140 case Int32:
141 return rangeForMask<int32_t>(static_cast<int32_t>(mask));
142 case Int64:
143 return rangeForMask<int64_t>(mask);
144 default:
145 RELEASE_ASSERT_NOT_REACHED();
146 return IntRange();
147 }
148 }
149
150 template<typename T>
151 static IntRange rangeForZShr(int32_t shiftAmount)
152 {
153 typename std::make_unsigned<T>::type mask = 0;
154 mask--;
155 mask >>= shiftAmount;
156 return rangeForMask<T>(static_cast<T>(mask));
157 }
158
159 static IntRange rangeForZShr(int32_t shiftAmount, Type type)
160 {
161 switch (type) {
162 case Int32:
163 return rangeForZShr<int32_t>(shiftAmount);
164 case Int64:
165 return rangeForZShr<int64_t>(shiftAmount);
166 default:
167 RELEASE_ASSERT_NOT_REACHED();
168 return IntRange();
169 }
170 }
171
172 int64_t min() const { return m_min; }
173 int64_t max() const { return m_max; }
174
175 void dump(PrintStream& out) const
176 {
177 out.print("[", m_min, ",", m_max, "]");
178 }
179
180 template<typename T>
181 bool couldOverflowAdd(const IntRange& other)
182 {
183 return sumOverflows<T>(m_min, other.m_min)
184 || sumOverflows<T>(m_min, other.m_max)
185 || sumOverflows<T>(m_max, other.m_min)
186 || sumOverflows<T>(m_max, other.m_max);
187 }
188
189 bool couldOverflowAdd(const IntRange& other, Type type)
190 {
191 switch (type) {
192 case Int32:
193 return couldOverflowAdd<int32_t>(other);
194 case Int64:
195 return couldOverflowAdd<int64_t>(other);
196 default:
197 return true;
198 }
199 }
200
201 template<typename T>
202 bool couldOverflowSub(const IntRange& other)
203 {
204 return differenceOverflows<T>(m_min, other.m_min)
205 || differenceOverflows<T>(m_min, other.m_max)
206 || differenceOverflows<T>(m_max, other.m_min)
207 || differenceOverflows<T>(m_max, other.m_max);
208 }
209
210 bool couldOverflowSub(const IntRange& other, Type type)
211 {
212 switch (type) {
213 case Int32:
214 return couldOverflowSub<int32_t>(other);
215 case Int64:
216 return couldOverflowSub<int64_t>(other);
217 default:
218 return true;
219 }
220 }
221
222 template<typename T>
223 bool couldOverflowMul(const IntRange& other)
224 {
225 return productOverflows<T>(m_min, other.m_min)
226 || productOverflows<T>(m_min, other.m_max)
227 || productOverflows<T>(m_max, other.m_min)
228 || productOverflows<T>(m_max, other.m_max);
229 }
230
231 bool couldOverflowMul(const IntRange& other, Type type)
232 {
233 switch (type) {
234 case Int32:
235 return couldOverflowMul<int32_t>(other);
236 case Int64:
237 return couldOverflowMul<int64_t>(other);
238 default:
239 return true;
240 }
241 }
242
243 template<typename T>
244 IntRange shl(int32_t shiftAmount)
245 {
246 T newMin = static_cast<T>(m_min) << static_cast<T>(shiftAmount);
247 T newMax = static_cast<T>(m_max) << static_cast<T>(shiftAmount);
248
249 if ((newMin >> shiftAmount) != static_cast<T>(m_min))
250 newMin = std::numeric_limits<T>::min();
251 if ((newMax >> shiftAmount) != static_cast<T>(m_max))
252 newMax = std::numeric_limits<T>::max();
253
254 return IntRange(newMin, newMax);
255 }
256
257 IntRange shl(int32_t shiftAmount, Type type)
258 {
259 switch (type) {
260 case Int32:
261 return shl<int32_t>(shiftAmount);
262 case Int64:
263 return shl<int64_t>(shiftAmount);
264 default:
265 RELEASE_ASSERT_NOT_REACHED();
266 return IntRange();
267 }
268 }
269
270 template<typename T>
271 IntRange sShr(int32_t shiftAmount)
272 {
273 T newMin = static_cast<T>(m_min) >> static_cast<T>(shiftAmount);
274 T newMax = static_cast<T>(m_max) >> static_cast<T>(shiftAmount);
275
276 return IntRange(newMin, newMax);
277 }
278
279 IntRange sShr(int32_t shiftAmount, Type type)
280 {
281 switch (type) {
282 case Int32:
283 return sShr<int32_t>(shiftAmount);
284 case Int64:
285 return sShr<int64_t>(shiftAmount);
286 default:
287 RELEASE_ASSERT_NOT_REACHED();
288 return IntRange();
289 }
290 }
291
292 template<typename T>
293 IntRange zShr(int32_t shiftAmount)
294 {
295 // This is an awkward corner case for all of the other logic.
296 if (!shiftAmount)
297 return *this;
298
299 // If the input range may be negative, then all we can say about the output range is that it
300 // will be masked. That's because -1 right shifted just produces that mask.
301 if (m_min < 0)
302 return rangeForZShr<T>(shiftAmount);
303
304 // If the input range is non-negative, then this just brings the range closer to zero.
305 typedef typename std::make_unsigned<T>::type UnsignedT;
306 UnsignedT newMin = static_cast<UnsignedT>(m_min) >> static_cast<UnsignedT>(shiftAmount);
307 UnsignedT newMax = static_cast<UnsignedT>(m_max) >> static_cast<UnsignedT>(shiftAmount);
308
309 return IntRange(newMin, newMax);
310 }
311
312 IntRange zShr(int32_t shiftAmount, Type type)
313 {
314 switch (type) {
315 case Int32:
316 return zShr<int32_t>(shiftAmount);
317 case Int64:
318 return zShr<int64_t>(shiftAmount);
319 default:
320 RELEASE_ASSERT_NOT_REACHED();
321 return IntRange();
322 }
323 }
324
325 template<typename T>
326 IntRange add(const IntRange& other)
327 {
328 if (couldOverflowAdd<T>(other))
329 return top<T>();
330 return IntRange(m_min + other.m_min, m_max + other.m_max);
331 }
332
333 IntRange add(const IntRange& other, Type type)
334 {
335 switch (type) {
336 case Int32:
337 return add<int32_t>(other);
338 case Int64:
339 return add<int64_t>(other);
340 default:
341 RELEASE_ASSERT_NOT_REACHED();
342 return IntRange();
343 }
344 }
345
346 template<typename T>
347 IntRange sub(const IntRange& other)
348 {
349 if (couldOverflowSub<T>(other))
350 return top<T>();
351 return IntRange(m_min - other.m_max, m_max - other.m_min);
352 }
353
354 IntRange sub(const IntRange& other, Type type)
355 {
356 switch (type) {
357 case Int32:
358 return sub<int32_t>(other);
359 case Int64:
360 return sub<int64_t>(other);
361 default:
362 RELEASE_ASSERT_NOT_REACHED();
363 return IntRange();
364 }
365 }
366
367 template<typename T>
368 IntRange mul(const IntRange& other)
369 {
370 if (couldOverflowMul<T>(other))
371 return top<T>();
372 return IntRange(
373 std::min(
374 std::min(m_min * other.m_min, m_min * other.m_max),
375 std::min(m_max * other.m_min, m_max * other.m_max)),
376 std::max(
377 std::max(m_min * other.m_min, m_min * other.m_max),
378 std::max(m_max * other.m_min, m_max * other.m_max)));
379 }
380
381 IntRange mul(const IntRange& other, Type type)
382 {
383 switch (type) {
384 case Int32:
385 return mul<int32_t>(other);
386 case Int64:
387 return mul<int64_t>(other);
388 default:
389 RELEASE_ASSERT_NOT_REACHED();
390 return IntRange();
391 }
392 }
393
394private:
395 int64_t m_min { 0 };
396 int64_t m_max { 0 };
397};
398
399class ReduceStrength {
400public:
401 ReduceStrength(Procedure& proc)
402 : m_proc(proc)
403 , m_insertionSet(proc)
404 , m_blockInsertionSet(proc)
405 , m_root(proc.at(0))
406 {
407 }
408
409 bool run()
410 {
411 bool result = false;
412 bool first = true;
413 unsigned index = 0;
414 do {
415 m_changed = false;
416 m_changedCFG = false;
417 ++index;
418
419 if (first)
420 first = false;
421 else if (B3ReduceStrengthInternal::verbose) {
422 dataLog("B3 after iteration #", index - 1, " of reduceStrength:\n");
423 dataLog(m_proc);
424 }
425
426 simplifyCFG();
427
428 if (m_changedCFG) {
429 m_proc.resetReachability();
430 m_proc.invalidateCFG();
431 m_changed = true;
432 }
433
434 // We definitely want to do DCE before we do CSE so that we don't hoist things. For
435 // example:
436 //
437 // @dead = Mul(@a, @b)
438 // ... lots of control flow and stuff
439 // @thing = Mul(@a, @b)
440 //
441 // If we do CSE before DCE, we will remove @thing and keep @dead. Effectively, we will
442 // "hoist" @thing. On the other hand, if we run DCE before CSE, we will kill @dead and
443 // keep @thing. That's better, since we usually want things to stay wherever the client
444 // put them. We're not actually smart enough to move things around at random.
445 m_changed |= eliminateDeadCodeImpl(m_proc);
446 m_valueForConstant.clear();
447
448 simplifySSA();
449
450 if (m_proc.optLevel() >= 2) {
451 m_proc.resetValueOwners();
452 m_dominators = &m_proc.dominators(); // Recompute if necessary.
453 m_pureCSE.clear();
454 }
455
456 for (BasicBlock* block : m_proc.blocksInPreOrder()) {
457 m_block = block;
458
459 for (m_index = 0; m_index < block->size(); ++m_index) {
460 if (B3ReduceStrengthInternal::verbose) {
461 dataLog(
462 "Looking at ", *block, " #", m_index, ": ",
463 deepDump(m_proc, block->at(m_index)), "\n");
464 }
465 m_value = m_block->at(m_index);
466 m_value->performSubstitution();
467 reduceValueStrength();
468 if (m_proc.optLevel() >= 2)
469 replaceIfRedundant();
470 }
471 m_insertionSet.execute(m_block);
472 }
473
474 m_changedCFG |= m_blockInsertionSet.execute();
475 handleChangedCFGIfNecessary();
476
477 result |= m_changed;
478 } while (m_changed && m_proc.optLevel() >= 2);
479
480 if (m_proc.optLevel() < 2) {
481 m_changedCFG = false;
482 simplifyCFG();
483 handleChangedCFGIfNecessary();
484 }
485
486 return result;
487 }
488
489private:
490 void reduceValueStrength()
491 {
492 switch (m_value->opcode()) {
493 case Opaque:
494 // Turn this: Opaque(Opaque(value))
495 // Into this: Opaque(value)
496 if (m_value->child(0)->opcode() == Opaque) {
497 replaceWithIdentity(m_value->child(0));
498 break;
499 }
500 break;
501
502 case Add:
503 handleCommutativity();
504
505 if (m_value->child(0)->opcode() == Add && m_value->isInteger()) {
506 // Turn this: Add(Add(value, constant1), constant2)
507 // Into this: Add(value, constant1 + constant2)
508 Value* newSum = m_value->child(1)->addConstant(m_proc, m_value->child(0)->child(1));
509 if (newSum) {
510 m_insertionSet.insertValue(m_index, newSum);
511 m_value->child(0) = m_value->child(0)->child(0);
512 m_value->child(1) = newSum;
513 m_changed = true;
514 break;
515 }
516
517 // Turn this: Add(Add(value, constant), otherValue)
518 // Into this: Add(Add(value, otherValue), constant)
519 if (!m_value->child(1)->hasInt() && m_value->child(0)->child(1)->hasInt()) {
520 Value* value = m_value->child(0)->child(0);
521 Value* constant = m_value->child(0)->child(1);
522 Value* otherValue = m_value->child(1);
523 // This could create duplicate code if Add(value, constant) is used elsewhere.
524 // However, we already model adding a constant as if it was free in other places
525 // so let's just roll with it. The alternative would mean having to do good use
526 // counts, which reduceStrength() currently doesn't have.
527 m_value->child(0) =
528 m_insertionSet.insert<Value>(
529 m_index, Add, m_value->origin(), value, otherValue);
530 m_value->child(1) = constant;
531 m_changed = true;
532 break;
533 }
534 }
535
536 // Turn this: Add(otherValue, Add(value, constant))
537 // Into this: Add(Add(value, otherValue), constant)
538 if (m_value->isInteger()
539 && !m_value->child(0)->hasInt()
540 && m_value->child(1)->opcode() == Add
541 && m_value->child(1)->child(1)->hasInt()) {
542 Value* value = m_value->child(1)->child(0);
543 Value* constant = m_value->child(1)->child(1);
544 Value* otherValue = m_value->child(0);
545 // This creates a duplicate add. That's dangerous but probably fine, see above.
546 m_value->child(0) =
547 m_insertionSet.insert<Value>(
548 m_index, Add, m_value->origin(), value, otherValue);
549 m_value->child(1) = constant;
550 m_changed = true;
551 break;
552 }
553
554 // Turn this: Add(constant1, constant2)
555 // Into this: constant1 + constant2
556 if (Value* constantAdd = m_value->child(0)->addConstant(m_proc, m_value->child(1))) {
557 replaceWithNewValue(constantAdd);
558 break;
559 }
560
561 // Turn this: Integer Add(value, value)
562 // Into this: Shl(value, 1)
563 // This is a useful canonicalization. It's not meant to be a strength reduction.
564 if (m_value->isInteger() && m_value->child(0) == m_value->child(1)) {
565 replaceWithNewValue(
566 m_proc.add<Value>(
567 Shl, m_value->origin(), m_value->child(0),
568 m_insertionSet.insert<Const32Value>(m_index, m_value->origin(), 1)));
569 break;
570 }
571
572 // Turn this: Add(value, zero)
573 // Into an Identity.
574 //
575 // Addition is subtle with doubles. Zero is not the neutral value, negative zero is:
576 // 0 + 0 = 0
577 // 0 + -0 = 0
578 // -0 + 0 = 0
579 // -0 + -0 = -0
580 if (m_value->child(1)->isInt(0) || m_value->child(1)->isNegativeZero()) {
581 replaceWithIdentity(m_value->child(0));
582 break;
583 }
584
585 if (m_value->isInteger()) {
586 // Turn this: Integer Add(value, Neg(otherValue))
587 // Into this: Sub(value, otherValue)
588 if (m_value->child(1)->opcode() == Neg) {
589 replaceWithNew<Value>(Sub, m_value->origin(), m_value->child(0), m_value->child(1)->child(0));
590 break;
591 }
592
593 // Turn this: Integer Add(Neg(value), otherValue)
594 // Into this: Sub(otherValue, value)
595 if (m_value->child(0)->opcode() == Neg) {
596 replaceWithNew<Value>(Sub, m_value->origin(), m_value->child(1), m_value->child(0)->child(0));
597 break;
598 }
599
600 // Turn this: Integer Add(Sub(0, value), -1)
601 // Into this: BitXor(value, -1)
602 if (m_value->child(0)->opcode() == Sub
603 && m_value->child(1)->isInt(-1)
604 && m_value->child(0)->child(0)->isInt(0)) {
605 replaceWithNew<Value>(BitXor, m_value->origin(), m_value->child(0)->child(1), m_value->child(1));
606 break;
607 }
608
609 if (handleMulDistributivity())
610 break;
611 }
612
613 break;
614
615 case Sub:
616 // Turn this: Sub(constant1, constant2)
617 // Into this: constant1 - constant2
618 if (Value* constantSub = m_value->child(0)->subConstant(m_proc, m_value->child(1))) {
619 replaceWithNewValue(constantSub);
620 break;
621 }
622
623 if (m_value->isInteger()) {
624 // Turn this: Sub(value, constant)
625 // Into this: Add(value, -constant)
626 if (Value* negatedConstant = m_value->child(1)->negConstant(m_proc)) {
627 m_insertionSet.insertValue(m_index, negatedConstant);
628 replaceWithNew<Value>(
629 Add, m_value->origin(), m_value->child(0), negatedConstant);
630 break;
631 }
632
633 // Turn this: Sub(0, value)
634 // Into this: Neg(value)
635 if (m_value->child(0)->isInt(0)) {
636 replaceWithNew<Value>(Neg, m_value->origin(), m_value->child(1));
637 break;
638 }
639
640 // Turn this: Sub(value, value)
641 // Into this: 0
642 if (m_value->child(0) == m_value->child(1)) {
643 replaceWithNewValue(m_proc.addIntConstant(m_value, 0));
644 break;
645 }
646
647 // Turn this: Sub(value, Neg(otherValue))
648 // Into this: Add(value, otherValue)
649 if (m_value->child(1)->opcode() == Neg) {
650 replaceWithNew<Value>(Add, m_value->origin(), m_value->child(0), m_value->child(1)->child(0));
651 break;
652 }
653
654 if (handleMulDistributivity())
655 break;
656 }
657
658 break;
659
660 case Neg:
661 // Turn this: Neg(constant)
662 // Into this: -constant
663 if (Value* constant = m_value->child(0)->negConstant(m_proc)) {
664 replaceWithNewValue(constant);
665 break;
666 }
667
668 // Turn this: Neg(Neg(value))
669 // Into this: value
670 if (m_value->child(0)->opcode() == Neg) {
671 replaceWithIdentity(m_value->child(0)->child(0));
672 break;
673 }
674
675 if (m_value->isInteger()) {
676 // Turn this: Integer Neg(Sub(value, otherValue))
677 // Into this: Sub(otherValue, value)
678 if (m_value->child(0)->opcode() == Sub) {
679 replaceWithNew<Value>(Sub, m_value->origin(), m_value->child(0)->child(1), m_value->child(0)->child(0));
680 break;
681 }
682
683 // Turn this: Integer Neg(Mul(value, c))
684 // Into this: Mul(value, -c), as long as -c does not overflow
685 if (m_value->child(0)->opcode() == Mul && m_value->child(0)->child(1)->hasInt()) {
686 int64_t factor = m_value->child(0)->child(1)->asInt();
687 if (m_value->type() == Int32 && factor != std::numeric_limits<int32_t>::min()) {
688 Value* newFactor = m_insertionSet.insert<Const32Value>(m_index, m_value->child(0)->child(1)->origin(), -factor);
689 replaceWithNew<Value>(Mul, m_value->origin(), m_value->child(0)->child(0), newFactor);
690 } else if (m_value->type() == Int64 && factor != std::numeric_limits<int64_t>::min()) {
691 Value* newFactor = m_insertionSet.insert<Const64Value>(m_index, m_value->child(0)->child(1)->origin(), -factor);
692 replaceWithNew<Value>(Mul, m_value->origin(), m_value->child(0)->child(0), newFactor);
693 }
694 }
695 }
696
697
698 break;
699
700 case Mul:
701 handleCommutativity();
702
703 // Turn this: Mul(constant1, constant2)
704 // Into this: constant1 * constant2
705 if (Value* value = m_value->child(0)->mulConstant(m_proc, m_value->child(1))) {
706 replaceWithNewValue(value);
707 break;
708 }
709
710 if (m_value->child(1)->hasInt()) {
711 int64_t factor = m_value->child(1)->asInt();
712
713 // Turn this: Mul(value, 0)
714 // Into this: 0
715 // Note that we don't do this for doubles because that's wrong. For example, -1 * 0
716 // and 1 * 0 yield different results.
717 if (!factor) {
718 replaceWithIdentity(m_value->child(1));
719 break;
720 }
721
722 // Turn this: Mul(value, 1)
723 // Into this: value
724 if (factor == 1) {
725 replaceWithIdentity(m_value->child(0));
726 break;
727 }
728
729 // Turn this: Mul(value, -1)
730 // Into this: Neg(value)
731 if (factor == -1) {
732 replaceWithNew<Value>(Neg, m_value->origin(), m_value->child(0));
733 break;
734 }
735
736 // Turn this: Mul(value, constant)
737 // Into this: Shl(value, log2(constant))
738 if (hasOneBitSet(factor)) {
739 unsigned shiftAmount = WTF::fastLog2(static_cast<uint64_t>(factor));
740 replaceWithNewValue(
741 m_proc.add<Value>(
742 Shl, m_value->origin(), m_value->child(0),
743 m_insertionSet.insert<Const32Value>(
744 m_index, m_value->origin(), shiftAmount)));
745 break;
746 }
747 } else if (m_value->child(1)->hasDouble()) {
748 double factor = m_value->child(1)->asDouble();
749
750 // Turn this: Mul(value, 1)
751 // Into this: value
752 if (factor == 1) {
753 replaceWithIdentity(m_value->child(0));
754 break;
755 }
756 }
757
758 if (m_value->isInteger()) {
759 // Turn this: Integer Mul(value, Neg(otherValue))
760 // Into this: Neg(Mul(value, otherValue))
761 if (m_value->child(1)->opcode() == Neg) {
762 Value* newMul = m_insertionSet.insert<Value>(m_index, Mul, m_value->origin(), m_value->child(0), m_value->child(1)->child(0));
763 replaceWithNew<Value>(Neg, m_value->origin(), newMul);
764 break;
765 }
766 // Turn this: Integer Mul(Neg(value), otherValue)
767 // Into this: Neg(Mul(value, value2))
768 if (m_value->child(0)->opcode() == Neg) {
769 Value* newMul = m_insertionSet.insert<Value>(m_index, Mul, m_value->origin(), m_value->child(0)->child(0), m_value->child(1));
770 replaceWithNew<Value>(Neg, m_value->origin(), newMul);
771 break;
772 }
773 }
774
775 break;
776
777 case Div:
778 // Turn this: Div(constant1, constant2)
779 // Into this: constant1 / constant2
780 // Note that this uses Div<Chill> semantics. That's fine, because the rules for Div
781 // are strictly weaker: it has corner cases where it's allowed to do anything it
782 // likes.
783 if (replaceWithNewValue(m_value->child(0)->divConstant(m_proc, m_value->child(1))))
784 break;
785
786 if (m_value->child(1)->hasInt()) {
787 switch (m_value->child(1)->asInt()) {
788 case -1:
789 // Turn this: Div(value, -1)
790 // Into this: Neg(value)
791 replaceWithNewValue(
792 m_proc.add<Value>(Neg, m_value->origin(), m_value->child(0)));
793 break;
794
795 case 0:
796 // Turn this: Div(value, 0)
797 // Into this: 0
798 // We can do this because it's precisely correct for ChillDiv and for Div we
799 // are allowed to do whatever we want.
800 replaceWithIdentity(m_value->child(1));
801 break;
802
803 case 1:
804 // Turn this: Div(value, 1)
805 // Into this: value
806 replaceWithIdentity(m_value->child(0));
807 break;
808
809 default:
810 // Perform super comprehensive strength reduction of division. Currently we
811 // only do this for 32-bit divisions, since we need a high multiply
812 // operation. We emulate it using 64-bit multiply. We can't emulate 64-bit
813 // high multiply with a 128-bit multiply because we don't have a 128-bit
814 // multiply. We could do it with a patchpoint if we cared badly enough.
815
816 if (m_value->type() != Int32)
817 break;
818
819 if (m_proc.optLevel() < 2)
820 break;
821
822 int32_t divisor = m_value->child(1)->asInt32();
823 DivisionMagic<int32_t> magic = computeDivisionMagic(divisor);
824
825 // Perform the "high" multiplication. We do it just to get the high bits.
826 // This is sort of like multiplying by the reciprocal, just more gnarly. It's
827 // from Hacker's Delight and I don't claim to understand it.
828 Value* magicQuotient = m_insertionSet.insert<Value>(
829 m_index, Trunc, m_value->origin(),
830 m_insertionSet.insert<Value>(
831 m_index, ZShr, m_value->origin(),
832 m_insertionSet.insert<Value>(
833 m_index, Mul, m_value->origin(),
834 m_insertionSet.insert<Value>(
835 m_index, SExt32, m_value->origin(), m_value->child(0)),
836 m_insertionSet.insert<Const64Value>(
837 m_index, m_value->origin(), magic.magicMultiplier)),
838 m_insertionSet.insert<Const32Value>(
839 m_index, m_value->origin(), 32)));
840
841 if (divisor > 0 && magic.magicMultiplier < 0) {
842 magicQuotient = m_insertionSet.insert<Value>(
843 m_index, Add, m_value->origin(), magicQuotient, m_value->child(0));
844 }
845 if (divisor < 0 && magic.magicMultiplier > 0) {
846 magicQuotient = m_insertionSet.insert<Value>(
847 m_index, Sub, m_value->origin(), magicQuotient, m_value->child(0));
848 }
849 if (magic.shift > 0) {
850 magicQuotient = m_insertionSet.insert<Value>(
851 m_index, SShr, m_value->origin(), magicQuotient,
852 m_insertionSet.insert<Const32Value>(
853 m_index, m_value->origin(), magic.shift));
854 }
855 replaceWithIdentity(
856 m_insertionSet.insert<Value>(
857 m_index, Add, m_value->origin(), magicQuotient,
858 m_insertionSet.insert<Value>(
859 m_index, ZShr, m_value->origin(), magicQuotient,
860 m_insertionSet.insert<Const32Value>(
861 m_index, m_value->origin(), 31))));
862 break;
863 }
864 break;
865 }
866 break;
867
868 case UDiv:
869 // Turn this: UDiv(constant1, constant2)
870 // Into this: constant1 / constant2
871 if (replaceWithNewValue(m_value->child(0)->uDivConstant(m_proc, m_value->child(1))))
872 break;
873
874 if (m_value->child(1)->hasInt()) {
875 switch (m_value->child(1)->asInt()) {
876 case 0:
877 // Turn this: UDiv(value, 0)
878 // Into this: 0
879 // We can do whatever we want here so we might as well do the chill thing,
880 // in case we add chill versions of UDiv in the future.
881 replaceWithIdentity(m_value->child(1));
882 break;
883
884 case 1:
885 // Turn this: UDiv(value, 1)
886 // Into this: value
887 replaceWithIdentity(m_value->child(0));
888 break;
889 default:
890 // FIXME: We should do comprehensive strength reduction for unsigned numbers. Likely,
891 // we will just want copy what llvm does. https://bugs.webkit.org/show_bug.cgi?id=164809
892 break;
893 }
894 }
895 break;
896
897 case Mod:
898 // Turn this: Mod(constant1, constant2)
899 // Into this: constant1 / constant2
900 // Note that this uses Mod<Chill> semantics.
901 if (replaceWithNewValue(m_value->child(0)->modConstant(m_proc, m_value->child(1))))
902 break;
903
904 // Modulo by constant is more efficient if we turn it into Div, and then let Div get
905 // optimized.
906 if (m_value->child(1)->hasInt()) {
907 switch (m_value->child(1)->asInt()) {
908 case 0:
909 // Turn this: Mod(value, 0)
910 // Into this: 0
911 // This is correct according to ChillMod semantics.
912 replaceWithIdentity(m_value->child(1));
913 break;
914
915 default:
916 if (m_proc.optLevel() < 2)
917 break;
918
919 // Turn this: Mod(N, D)
920 // Into this: Sub(N, Mul(Div(N, D), D))
921 //
922 // This is a speed-up because we use our existing Div optimizations.
923 //
924 // Here's an easier way to look at it:
925 // N % D = N - N / D * D
926 //
927 // Note that this does not work for D = 0 and ChillMod. The expected result is 0.
928 // That's why we have a special-case above.
929 // X % 0 = X - X / 0 * 0 = X (should be 0)
930 //
931 // This does work for the D = -1 special case.
932 // -2^31 % -1 = -2^31 - -2^31 / -1 * -1
933 // = -2^31 - -2^31 * -1
934 // = -2^31 - -2^31
935 // = 0
936
937 Kind divKind = Div;
938 divKind.setIsChill(m_value->isChill());
939
940 replaceWithIdentity(
941 m_insertionSet.insert<Value>(
942 m_index, Sub, m_value->origin(),
943 m_value->child(0),
944 m_insertionSet.insert<Value>(
945 m_index, Mul, m_value->origin(),
946 m_insertionSet.insert<Value>(
947 m_index, divKind, m_value->origin(),
948 m_value->child(0), m_value->child(1)),
949 m_value->child(1))));
950 break;
951 }
952 break;
953 }
954
955 break;
956
957 case UMod:
958 // Turn this: UMod(constant1, constant2)
959 // Into this: constant1 / constant2
960 replaceWithNewValue(m_value->child(0)->uModConstant(m_proc, m_value->child(1)));
961 // FIXME: We should do what we do for Mod since the same principle applies here.
962 // https://bugs.webkit.org/show_bug.cgi?id=164809
963 break;
964
965 case BitAnd:
966 handleCommutativity();
967
968 // Turn this: BitAnd(constant1, constant2)
969 // Into this: constant1 & constant2
970 if (Value* constantBitAnd = m_value->child(0)->bitAndConstant(m_proc, m_value->child(1))) {
971 replaceWithNewValue(constantBitAnd);
972 break;
973 }
974
975 // Turn this: BitAnd(BitAnd(value, constant1), constant2)
976 // Into this: BitAnd(value, constant1 & constant2).
977 if (m_value->child(0)->opcode() == BitAnd) {
978 Value* newConstant = m_value->child(1)->bitAndConstant(m_proc, m_value->child(0)->child(1));
979 if (newConstant) {
980 m_insertionSet.insertValue(m_index, newConstant);
981 m_value->child(0) = m_value->child(0)->child(0);
982 m_value->child(1) = newConstant;
983 m_changed = true;
984 }
985 }
986
987 // Turn this: BitAnd(valueX, valueX)
988 // Into this: valueX.
989 if (m_value->child(0) == m_value->child(1)) {
990 replaceWithIdentity(m_value->child(0));
991 break;
992 }
993
994 // Turn this: BitAnd(value, zero-constant)
995 // Into this: zero-constant.
996 if (m_value->child(1)->isInt(0)) {
997 replaceWithIdentity(m_value->child(1));
998 break;
999 }
1000
1001 // Turn this: BitAnd(value, all-ones)
1002 // Into this: value.
1003 if ((m_value->type() == Int64 && m_value->child(1)->isInt(std::numeric_limits<uint64_t>::max()))
1004 || (m_value->type() == Int32 && m_value->child(1)->isInt(std::numeric_limits<uint32_t>::max()))) {
1005 replaceWithIdentity(m_value->child(0));
1006 break;
1007 }
1008
1009 // Turn this: BitAnd(64-bit value, 32 ones)
1010 // Into this: ZExt32(Trunc(64-bit value))
1011 if (m_value->child(1)->isInt64(0xffffffffllu)) {
1012 Value* newValue = m_insertionSet.insert<Value>(
1013 m_index, ZExt32, m_value->origin(),
1014 m_insertionSet.insert<Value>(m_index, Trunc, m_value->origin(), m_value->child(0)));
1015 replaceWithIdentity(newValue);
1016 break;
1017 }
1018
1019 // Turn this: BitAnd(SExt8(value), mask) where (mask & 0xffffff00) == 0
1020 // Into this: BitAnd(value, mask)
1021 if (m_value->child(0)->opcode() == SExt8 && m_value->child(1)->hasInt32()
1022 && !(m_value->child(1)->asInt32() & 0xffffff00)) {
1023 m_value->child(0) = m_value->child(0)->child(0);
1024 m_changed = true;
1025 break;
1026 }
1027
1028 // Turn this: BitAnd(SExt16(value), mask) where (mask & 0xffff0000) == 0
1029 // Into this: BitAnd(value, mask)
1030 if (m_value->child(0)->opcode() == SExt16 && m_value->child(1)->hasInt32()
1031 && !(m_value->child(1)->asInt32() & 0xffff0000)) {
1032 m_value->child(0) = m_value->child(0)->child(0);
1033 m_changed = true;
1034 break;
1035 }
1036
1037 // Turn this: BitAnd(SExt32(value), mask) where (mask & 0xffffffff00000000) == 0
1038 // Into this: BitAnd(ZExt32(value), mask)
1039 if (m_value->child(0)->opcode() == SExt32 && m_value->child(1)->hasInt32()
1040 && !(m_value->child(1)->asInt32() & 0xffffffff00000000llu)) {
1041 m_value->child(0) = m_insertionSet.insert<Value>(
1042 m_index, ZExt32, m_value->origin(),
1043 m_value->child(0)->child(0), m_value->child(0)->child(1));
1044 m_changed = true;
1045 break;
1046 }
1047
1048 // Turn this: BitAnd(Op(value, constant1), constant2)
1049 // where !(constant1 & constant2)
1050 // and Op is BitOr or BitXor
1051 // into this: BitAnd(value, constant2)
1052 if (m_value->child(1)->hasInt()) {
1053 int64_t constant2 = m_value->child(1)->asInt();
1054 switch (m_value->child(0)->opcode()) {
1055 case BitOr:
1056 case BitXor:
1057 if (m_value->child(0)->child(1)->hasInt()
1058 && !(m_value->child(0)->child(1)->asInt() & constant2)) {
1059 m_value->child(0) = m_value->child(0)->child(0);
1060 m_changed = true;
1061 break;
1062 }
1063 break;
1064 default:
1065 break;
1066 }
1067 break;
1068 }
1069
1070 // Turn this: BitAnd(BitXor(x1, allOnes), BitXor(x2, allOnes)
1071 // Into this: BitXor(BitOr(x1, x2), allOnes)
1072 // By applying De Morgan laws
1073 if (m_value->child(0)->opcode() == BitXor
1074 && m_value->child(1)->opcode() == BitXor
1075 && ((m_value->type() == Int64
1076 && m_value->child(0)->child(1)->isInt(std::numeric_limits<uint64_t>::max())
1077 && m_value->child(1)->child(1)->isInt(std::numeric_limits<uint64_t>::max()))
1078 || (m_value->type() == Int32
1079 && m_value->child(0)->child(1)->isInt(std::numeric_limits<uint32_t>::max())
1080 && m_value->child(1)->child(1)->isInt(std::numeric_limits<uint32_t>::max())))) {
1081 Value* bitOr = m_insertionSet.insert<Value>(m_index, BitOr, m_value->origin(), m_value->child(0)->child(0), m_value->child(1)->child(0));
1082 replaceWithNew<Value>(BitXor, m_value->origin(), bitOr, m_value->child(1)->child(1));
1083 break;
1084 }
1085
1086 // Turn this: BitAnd(BitXor(x, allOnes), c)
1087 // Into this: BitXor(BitOr(x, ~c), allOnes)
1088 // This is a variation on the previous optimization, treating c as if it were BitXor(~c, allOnes)
1089 // It does not reduce the number of operations, but provides some normalization (we try to get BitXor by allOnes at the outermost point), and some chance to float Xors to a place where they might get eliminated.
1090 if (m_value->child(0)->opcode() == BitXor
1091 && m_value->child(1)->hasInt()
1092 && ((m_value->type() == Int64
1093 && m_value->child(0)->child(1)->isInt(std::numeric_limits<uint64_t>::max()))
1094 || (m_value->type() == Int32
1095 && m_value->child(0)->child(1)->isInt(std::numeric_limits<uint32_t>::max())))) {
1096 Value* bitOr = m_insertionSet.insert<Value>(m_index, BitOr, m_value->origin(), m_value->child(0)->child(0), m_value->child(1)->bitXorConstant(m_proc, m_value->child(0)->child(1)));
1097 replaceWithNew<Value>(BitXor, m_value->origin(), bitOr, m_value->child(0)->child(1));
1098 break;
1099 }
1100
1101 break;
1102
1103 case BitOr:
1104 handleCommutativity();
1105
1106 // Turn this: BitOr(constant1, constant2)
1107 // Into this: constant1 | constant2
1108 if (Value* constantBitOr = m_value->child(0)->bitOrConstant(m_proc, m_value->child(1))) {
1109 replaceWithNewValue(constantBitOr);
1110 break;
1111 }
1112
1113 // Turn this: BitOr(BitOr(value, constant1), constant2)
1114 // Into this: BitOr(value, constant1 & constant2).
1115 if (m_value->child(0)->opcode() == BitOr) {
1116 Value* newConstant = m_value->child(1)->bitOrConstant(m_proc, m_value->child(0)->child(1));
1117 if (newConstant) {
1118 m_insertionSet.insertValue(m_index, newConstant);
1119 m_value->child(0) = m_value->child(0)->child(0);
1120 m_value->child(1) = newConstant;
1121 m_changed = true;
1122 }
1123 }
1124
1125 // Turn this: BitOr(valueX, valueX)
1126 // Into this: valueX.
1127 if (m_value->child(0) == m_value->child(1)) {
1128 replaceWithIdentity(m_value->child(0));
1129 break;
1130 }
1131
1132 // Turn this: BitOr(value, zero-constant)
1133 // Into this: value.
1134 if (m_value->child(1)->isInt(0)) {
1135 replaceWithIdentity(m_value->child(0));
1136 break;
1137 }
1138
1139 // Turn this: BitOr(value, all-ones)
1140 // Into this: all-ones.
1141 if ((m_value->type() == Int64 && m_value->child(1)->isInt(std::numeric_limits<uint64_t>::max()))
1142 || (m_value->type() == Int32 && m_value->child(1)->isInt(std::numeric_limits<uint32_t>::max()))) {
1143 replaceWithIdentity(m_value->child(1));
1144 break;
1145 }
1146
1147 // Turn this: BitOr(BitXor(x1, allOnes), BitXor(x2, allOnes)
1148 // Into this: BitXor(BitAnd(x1, x2), allOnes)
1149 // By applying De Morgan laws
1150 if (m_value->child(0)->opcode() == BitXor
1151 && m_value->child(1)->opcode() == BitXor
1152 && ((m_value->type() == Int64
1153 && m_value->child(0)->child(1)->isInt(std::numeric_limits<uint64_t>::max())
1154 && m_value->child(1)->child(1)->isInt(std::numeric_limits<uint64_t>::max()))
1155 || (m_value->type() == Int32
1156 && m_value->child(0)->child(1)->isInt(std::numeric_limits<uint32_t>::max())
1157 && m_value->child(1)->child(1)->isInt(std::numeric_limits<uint32_t>::max())))) {
1158 Value* bitAnd = m_insertionSet.insert<Value>(m_index, BitAnd, m_value->origin(), m_value->child(0)->child(0), m_value->child(1)->child(0));
1159 replaceWithNew<Value>(BitXor, m_value->origin(), bitAnd, m_value->child(1)->child(1));
1160 break;
1161 }
1162
1163 // Turn this: BitOr(BitXor(x, allOnes), c)
1164 // Into this: BitXor(BitAnd(x, ~c), allOnes)
1165 // This is a variation on the previous optimization, treating c as if it were BitXor(~c, allOnes)
1166 // It does not reduce the number of operations, but provides some normalization (we try to get BitXor by allOnes at the outermost point), and some chance to float Xors to a place where they might get eliminated.
1167 if (m_value->child(0)->opcode() == BitXor
1168 && m_value->child(1)->hasInt()
1169 && ((m_value->type() == Int64
1170 && m_value->child(0)->child(1)->isInt(std::numeric_limits<uint64_t>::max()))
1171 || (m_value->type() == Int32
1172 && m_value->child(0)->child(1)->isInt(std::numeric_limits<uint32_t>::max())))) {
1173 Value* bitAnd = m_insertionSet.insert<Value>(m_index, BitAnd, m_value->origin(), m_value->child(0)->child(0), m_value->child(1)->bitXorConstant(m_proc, m_value->child(0)->child(1)));
1174 replaceWithNew<Value>(BitXor, m_value->origin(), bitAnd, m_value->child(0)->child(1));
1175 break;
1176 }
1177
1178 if (handleBitAndDistributivity())
1179 break;
1180
1181 break;
1182
1183 case BitXor:
1184 handleCommutativity();
1185
1186 // Turn this: BitXor(constant1, constant2)
1187 // Into this: constant1 ^ constant2
1188 if (Value* constantBitXor = m_value->child(0)->bitXorConstant(m_proc, m_value->child(1))) {
1189 replaceWithNewValue(constantBitXor);
1190 break;
1191 }
1192
1193 // Turn this: BitXor(BitXor(value, constant1), constant2)
1194 // Into this: BitXor(value, constant1 ^ constant2).
1195 if (m_value->child(0)->opcode() == BitXor) {
1196 Value* newConstant = m_value->child(1)->bitXorConstant(m_proc, m_value->child(0)->child(1));
1197 if (newConstant) {
1198 m_insertionSet.insertValue(m_index, newConstant);
1199 m_value->child(0) = m_value->child(0)->child(0);
1200 m_value->child(1) = newConstant;
1201 m_changed = true;
1202 }
1203 }
1204
1205 // Turn this: BitXor(compare, 1)
1206 // Into this: invertedCompare
1207 if (m_value->child(1)->isInt32(1)) {
1208 if (Value* invertedCompare = m_value->child(0)->invertedCompare(m_proc)) {
1209 replaceWithNewValue(invertedCompare);
1210 break;
1211 }
1212 }
1213
1214 // Turn this: BitXor(valueX, valueX)
1215 // Into this: zero-constant.
1216 if (m_value->child(0) == m_value->child(1)) {
1217 replaceWithNewValue(m_proc.addIntConstant(m_value, 0));
1218 break;
1219 }
1220
1221 // Turn this: BitXor(value, zero-constant)
1222 // Into this: value.
1223 if (m_value->child(1)->isInt(0)) {
1224 replaceWithIdentity(m_value->child(0));
1225 break;
1226 }
1227
1228 if (handleBitAndDistributivity())
1229 break;
1230
1231 break;
1232
1233 case Shl:
1234 // Turn this: Shl(constant1, constant2)
1235 // Into this: constant1 << constant2
1236 if (Value* constant = m_value->child(0)->shlConstant(m_proc, m_value->child(1))) {
1237 replaceWithNewValue(constant);
1238 break;
1239 }
1240
1241 // Turn this: Shl(<S|Z>Shr(@x, @const), @const)
1242 // Into this: BitAnd(@x, -(1<<@const))
1243 if ((m_value->child(0)->opcode() == SShr || m_value->child(0)->opcode() == ZShr)
1244 && m_value->child(0)->child(1)->hasInt()
1245 && m_value->child(1)->hasInt()
1246 && m_value->child(0)->child(1)->asInt() == m_value->child(1)->asInt()) {
1247 int shiftAmount = m_value->child(1)->asInt() & (m_value->type() == Int32 ? 31 : 63);
1248 Value* newConst = m_proc.addIntConstant(m_value, - static_cast<int64_t>(1ull << shiftAmount));
1249 m_insertionSet.insertValue(m_index, newConst);
1250 replaceWithNew<Value>(BitAnd, m_value->origin(), m_value->child(0)->child(0), newConst);
1251 break;
1252 }
1253
1254 handleShiftAmount();
1255 break;
1256
1257 case SShr:
1258 // Turn this: SShr(constant1, constant2)
1259 // Into this: constant1 >> constant2
1260 if (Value* constant = m_value->child(0)->sShrConstant(m_proc, m_value->child(1))) {
1261 replaceWithNewValue(constant);
1262 break;
1263 }
1264
1265 if (m_value->child(1)->hasInt32()
1266 && m_value->child(0)->opcode() == Shl
1267 && m_value->child(0)->child(1)->hasInt32()
1268 && m_value->child(1)->asInt32() == m_value->child(0)->child(1)->asInt32()) {
1269 switch (m_value->child(1)->asInt32()) {
1270 case 16:
1271 if (m_value->type() == Int32) {
1272 // Turn this: SShr(Shl(value, 16), 16)
1273 // Into this: SExt16(value)
1274 replaceWithNewValue(
1275 m_proc.add<Value>(
1276 SExt16, m_value->origin(), m_value->child(0)->child(0)));
1277 }
1278 break;
1279
1280 case 24:
1281 if (m_value->type() == Int32) {
1282 // Turn this: SShr(Shl(value, 24), 24)
1283 // Into this: SExt8(value)
1284 replaceWithNewValue(
1285 m_proc.add<Value>(
1286 SExt8, m_value->origin(), m_value->child(0)->child(0)));
1287 }
1288 break;
1289
1290 case 32:
1291 if (m_value->type() == Int64) {
1292 // Turn this: SShr(Shl(value, 32), 32)
1293 // Into this: SExt32(Trunc(value))
1294 replaceWithNewValue(
1295 m_proc.add<Value>(
1296 SExt32, m_value->origin(),
1297 m_insertionSet.insert<Value>(
1298 m_index, Trunc, m_value->origin(),
1299 m_value->child(0)->child(0))));
1300 }
1301 break;
1302
1303 // FIXME: Add cases for 48 and 56, but that would translate to SExt32(SExt8) or
1304 // SExt32(SExt16), which we don't currently lower efficiently.
1305
1306 default:
1307 break;
1308 }
1309
1310 if (m_value->opcode() != SShr)
1311 break;
1312 }
1313
1314 handleShiftAmount();
1315 break;
1316
1317 case ZShr:
1318 // Turn this: ZShr(constant1, constant2)
1319 // Into this: (unsigned)constant1 >> constant2
1320 if (Value* constant = m_value->child(0)->zShrConstant(m_proc, m_value->child(1))) {
1321 replaceWithNewValue(constant);
1322 break;
1323 }
1324
1325 handleShiftAmount();
1326 break;
1327
1328 case RotR:
1329 // Turn this: RotR(constant1, constant2)
1330 // Into this: (constant1 >> constant2) | (constant1 << sizeof(constant1) * 8 - constant2)
1331 if (Value* constant = m_value->child(0)->rotRConstant(m_proc, m_value->child(1))) {
1332 replaceWithNewValue(constant);
1333 break;
1334 }
1335
1336 handleShiftAmount();
1337 break;
1338
1339 case RotL:
1340 // Turn this: RotL(constant1, constant2)
1341 // Into this: (constant1 << constant2) | (constant1 >> sizeof(constant1) * 8 - constant2)
1342 if (Value* constant = m_value->child(0)->rotLConstant(m_proc, m_value->child(1))) {
1343 replaceWithNewValue(constant);
1344 break;
1345 }
1346
1347 handleShiftAmount();
1348 break;
1349
1350 case Abs:
1351 // Turn this: Abs(constant)
1352 // Into this: fabs<value->type()>(constant)
1353 if (Value* constant = m_value->child(0)->absConstant(m_proc)) {
1354 replaceWithNewValue(constant);
1355 break;
1356 }
1357
1358 // Turn this: Abs(Abs(value))
1359 // Into this: Abs(value)
1360 if (m_value->child(0)->opcode() == Abs) {
1361 replaceWithIdentity(m_value->child(0));
1362 break;
1363 }
1364
1365 // Turn this: Abs(Neg(value))
1366 // Into this: Abs(value)
1367 if (m_value->child(0)->opcode() == Neg) {
1368 m_value->child(0) = m_value->child(0)->child(0);
1369 m_changed = true;
1370 break;
1371 }
1372
1373 // Turn this: Abs(BitwiseCast(value))
1374 // Into this: BitwiseCast(And(value, mask-top-bit))
1375 if (m_value->child(0)->opcode() == BitwiseCast) {
1376 Value* mask;
1377 if (m_value->type() == Double)
1378 mask = m_insertionSet.insert<Const64Value>(m_index, m_value->origin(), ~(1ll << 63));
1379 else
1380 mask = m_insertionSet.insert<Const32Value>(m_index, m_value->origin(), ~(1l << 31));
1381
1382 Value* bitAnd = m_insertionSet.insert<Value>(m_index, BitAnd, m_value->origin(),
1383 m_value->child(0)->child(0),
1384 mask);
1385 Value* cast = m_insertionSet.insert<Value>(m_index, BitwiseCast, m_value->origin(), bitAnd);
1386 replaceWithIdentity(cast);
1387 break;
1388 }
1389 break;
1390
1391 case Ceil:
1392 // Turn this: Ceil(constant)
1393 // Into this: ceil<value->type()>(constant)
1394 if (Value* constant = m_value->child(0)->ceilConstant(m_proc)) {
1395 replaceWithNewValue(constant);
1396 break;
1397 }
1398
1399 // Turn this: Ceil(roundedValue)
1400 // Into this: roundedValue
1401 if (m_value->child(0)->isRounded()) {
1402 replaceWithIdentity(m_value->child(0));
1403 break;
1404 }
1405 break;
1406
1407 case Floor:
1408 // Turn this: Floor(constant)
1409 // Into this: floor<value->type()>(constant)
1410 if (Value* constant = m_value->child(0)->floorConstant(m_proc)) {
1411 replaceWithNewValue(constant);
1412 break;
1413 }
1414
1415 // Turn this: Floor(roundedValue)
1416 // Into this: roundedValue
1417 if (m_value->child(0)->isRounded()) {
1418 replaceWithIdentity(m_value->child(0));
1419 break;
1420 }
1421 break;
1422
1423 case Sqrt:
1424 // Turn this: Sqrt(constant)
1425 // Into this: sqrt<value->type()>(constant)
1426 if (Value* constant = m_value->child(0)->sqrtConstant(m_proc)) {
1427 replaceWithNewValue(constant);
1428 break;
1429 }
1430 break;
1431
1432 case BitwiseCast:
1433 // Turn this: BitwiseCast(constant)
1434 // Into this: bitwise_cast<value->type()>(constant)
1435 if (Value* constant = m_value->child(0)->bitwiseCastConstant(m_proc)) {
1436 replaceWithNewValue(constant);
1437 break;
1438 }
1439
1440 // Turn this: BitwiseCast(BitwiseCast(value))
1441 // Into this: value
1442 if (m_value->child(0)->opcode() == BitwiseCast) {
1443 replaceWithIdentity(m_value->child(0)->child(0));
1444 break;
1445 }
1446 break;
1447
1448 case SExt8:
1449 // Turn this: SExt8(constant)
1450 // Into this: static_cast<int8_t>(constant)
1451 if (m_value->child(0)->hasInt32()) {
1452 int32_t result = static_cast<int8_t>(m_value->child(0)->asInt32());
1453 replaceWithNewValue(m_proc.addIntConstant(m_value, result));
1454 break;
1455 }
1456
1457 // Turn this: SExt8(SExt8(value))
1458 // or this: SExt8(SExt16(value))
1459 // Into this: SExt8(value)
1460 if (m_value->child(0)->opcode() == SExt8 || m_value->child(0)->opcode() == SExt16) {
1461 m_value->child(0) = m_value->child(0)->child(0);
1462 m_changed = true;
1463 }
1464
1465 if (m_value->child(0)->opcode() == BitAnd && m_value->child(0)->child(1)->hasInt32()) {
1466 Value* input = m_value->child(0)->child(0);
1467 int32_t mask = m_value->child(0)->child(1)->asInt32();
1468
1469 // Turn this: SExt8(BitAnd(input, mask)) where (mask & 0xff) == 0xff
1470 // Into this: SExt8(input)
1471 if ((mask & 0xff) == 0xff) {
1472 m_value->child(0) = input;
1473 m_changed = true;
1474 break;
1475 }
1476
1477 // Turn this: SExt8(BitAnd(input, mask)) where (mask & 0x80) == 0
1478 // Into this: BitAnd(input, const & 0x7f)
1479 if (!(mask & 0x80)) {
1480 replaceWithNewValue(
1481 m_proc.add<Value>(
1482 BitAnd, m_value->origin(), input,
1483 m_insertionSet.insert<Const32Value>(
1484 m_index, m_value->origin(), mask & 0x7f)));
1485 break;
1486 }
1487 }
1488
1489 if (!m_proc.hasQuirks()) {
1490 // Turn this: SExt8(AtomicXchg___)
1491 // Into this: AtomicXchg___
1492 if (isAtomicXchg(m_value->child(0)->opcode())
1493 && m_value->child(0)->as<AtomicValue>()->accessWidth() == Width8) {
1494 replaceWithIdentity(m_value->child(0));
1495 break;
1496 }
1497 }
1498 break;
1499
1500 case SExt16:
1501 // Turn this: SExt16(constant)
1502 // Into this: static_cast<int16_t>(constant)
1503 if (m_value->child(0)->hasInt32()) {
1504 int32_t result = static_cast<int16_t>(m_value->child(0)->asInt32());
1505 replaceWithNewValue(m_proc.addIntConstant(m_value, result));
1506 break;
1507 }
1508
1509 // Turn this: SExt16(SExt16(value))
1510 // Into this: SExt16(value)
1511 if (m_value->child(0)->opcode() == SExt16) {
1512 m_value->child(0) = m_value->child(0)->child(0);
1513 m_changed = true;
1514 }
1515
1516 // Turn this: SExt16(SExt8(value))
1517 // Into this: SExt8(value)
1518 if (m_value->child(0)->opcode() == SExt8) {
1519 replaceWithIdentity(m_value->child(0));
1520 break;
1521 }
1522
1523 if (m_value->child(0)->opcode() == BitAnd && m_value->child(0)->child(1)->hasInt32()) {
1524 Value* input = m_value->child(0)->child(0);
1525 int32_t mask = m_value->child(0)->child(1)->asInt32();
1526
1527 // Turn this: SExt16(BitAnd(input, mask)) where (mask & 0xffff) == 0xffff
1528 // Into this: SExt16(input)
1529 if ((mask & 0xffff) == 0xffff) {
1530 m_value->child(0) = input;
1531 m_changed = true;
1532 break;
1533 }
1534
1535 // Turn this: SExt16(BitAnd(input, mask)) where (mask & 0x8000) == 0
1536 // Into this: BitAnd(input, const & 0x7fff)
1537 if (!(mask & 0x8000)) {
1538 replaceWithNewValue(
1539 m_proc.add<Value>(
1540 BitAnd, m_value->origin(), input,
1541 m_insertionSet.insert<Const32Value>(
1542 m_index, m_value->origin(), mask & 0x7fff)));
1543 break;
1544 }
1545 }
1546
1547 if (!m_proc.hasQuirks()) {
1548 // Turn this: SExt16(AtomicXchg___)
1549 // Into this: AtomicXchg___
1550 if (isAtomicXchg(m_value->child(0)->opcode())
1551 && m_value->child(0)->as<AtomicValue>()->accessWidth() == Width16) {
1552 replaceWithIdentity(m_value->child(0));
1553 break;
1554 }
1555 }
1556 break;
1557
1558 case SExt32:
1559 // Turn this: SExt32(constant)
1560 // Into this: static_cast<int64_t>(constant)
1561 if (m_value->child(0)->hasInt32()) {
1562 replaceWithNewValue(m_proc.addIntConstant(m_value, m_value->child(0)->asInt32()));
1563 break;
1564 }
1565
1566 // Turn this: SExt32(BitAnd(input, mask)) where (mask & 0x80000000) == 0
1567 // Into this: ZExt32(BitAnd(input, mask))
1568 if (m_value->child(0)->opcode() == BitAnd && m_value->child(0)->child(1)->hasInt32()
1569 && !(m_value->child(0)->child(1)->asInt32() & 0x80000000)) {
1570 replaceWithNewValue(
1571 m_proc.add<Value>(
1572 ZExt32, m_value->origin(), m_value->child(0)));
1573 break;
1574 }
1575 break;
1576
1577 case ZExt32:
1578 // Turn this: ZExt32(constant)
1579 // Into this: static_cast<uint64_t>(static_cast<uint32_t>(constant))
1580 if (m_value->child(0)->hasInt32()) {
1581 replaceWithNewValue(
1582 m_proc.addIntConstant(
1583 m_value,
1584 static_cast<uint64_t>(static_cast<uint32_t>(m_value->child(0)->asInt32()))));
1585 break;
1586 }
1587 break;
1588
1589 case Trunc:
1590 // Turn this: Trunc(constant)
1591 // Into this: static_cast<int32_t>(constant)
1592 if (m_value->child(0)->hasInt64() || m_value->child(0)->hasDouble()) {
1593 replaceWithNewValue(
1594 m_proc.addIntConstant(m_value, static_cast<int32_t>(m_value->child(0)->asInt64())));
1595 break;
1596 }
1597
1598 // Turn this: Trunc(SExt32(value)) or Trunc(ZExt32(value))
1599 // Into this: value
1600 if (m_value->child(0)->opcode() == SExt32 || m_value->child(0)->opcode() == ZExt32) {
1601 replaceWithIdentity(m_value->child(0)->child(0));
1602 break;
1603 }
1604
1605 // Turn this: Trunc(Op(value, constant))
1606 // where !(constant & 0xffffffff)
1607 // and Op is Add, Sub, BitOr, or BitXor
1608 // into this: Trunc(value)
1609 switch (m_value->child(0)->opcode()) {
1610 case Add:
1611 case Sub:
1612 case BitOr:
1613 case BitXor:
1614 if (m_value->child(0)->child(1)->hasInt64()
1615 && !(m_value->child(0)->child(1)->asInt64() & 0xffffffffll)) {
1616 m_value->child(0) = m_value->child(0)->child(0);
1617 m_changed = true;
1618 break;
1619 }
1620 break;
1621 default:
1622 break;
1623 }
1624 break;
1625
1626 case IToD:
1627 // Turn this: IToD(constant)
1628 // Into this: ConstDouble(constant)
1629 if (Value* constant = m_value->child(0)->iToDConstant(m_proc)) {
1630 replaceWithNewValue(constant);
1631 break;
1632 }
1633 break;
1634
1635 case IToF:
1636 // Turn this: IToF(constant)
1637 // Into this: ConstFloat(constant)
1638 if (Value* constant = m_value->child(0)->iToFConstant(m_proc)) {
1639 replaceWithNewValue(constant);
1640 break;
1641 }
1642 break;
1643
1644 case FloatToDouble:
1645 // Turn this: FloatToDouble(constant)
1646 // Into this: ConstDouble(constant)
1647 if (Value* constant = m_value->child(0)->floatToDoubleConstant(m_proc)) {
1648 replaceWithNewValue(constant);
1649 break;
1650 }
1651 break;
1652
1653 case DoubleToFloat:
1654 // Turn this: DoubleToFloat(FloatToDouble(value))
1655 // Into this: value
1656 if (m_value->child(0)->opcode() == FloatToDouble) {
1657 replaceWithIdentity(m_value->child(0)->child(0));
1658 break;
1659 }
1660
1661 // Turn this: DoubleToFloat(constant)
1662 // Into this: ConstFloat(constant)
1663 if (Value* constant = m_value->child(0)->doubleToFloatConstant(m_proc)) {
1664 replaceWithNewValue(constant);
1665 break;
1666 }
1667 break;
1668
1669 case Select:
1670 // Turn this: Select(constant, a, b)
1671 // Into this: constant ? a : b
1672 if (m_value->child(0)->hasInt32()) {
1673 replaceWithIdentity(
1674 m_value->child(0)->asInt32() ? m_value->child(1) : m_value->child(2));
1675 break;
1676 }
1677
1678 // Turn this: Select(Equal(x, 0), a, b)
1679 // Into this: Select(x, b, a)
1680 if (m_value->child(0)->opcode() == Equal && m_value->child(0)->child(1)->isInt(0)) {
1681 m_value->child(0) = m_value->child(0)->child(0);
1682 std::swap(m_value->child(1), m_value->child(2));
1683 m_changed = true;
1684 break;
1685 }
1686
1687 // Turn this: Select(BitXor(bool, 1), a, b)
1688 // Into this: Select(bool, b, a)
1689 if (m_value->child(0)->opcode() == BitXor
1690 && m_value->child(0)->child(1)->isInt32(1)
1691 && m_value->child(0)->child(0)->returnsBool()) {
1692 m_value->child(0) = m_value->child(0)->child(0);
1693 std::swap(m_value->child(1), m_value->child(2));
1694 m_changed = true;
1695 break;
1696 }
1697
1698 // Turn this: Select(BitAnd(bool, xyz1), a, b)
1699 // Into this: Select(bool, a, b)
1700 if (m_value->child(0)->opcode() == BitAnd
1701 && m_value->child(0)->child(1)->hasInt()
1702 && m_value->child(0)->child(1)->asInt() & 1
1703 && m_value->child(0)->child(0)->returnsBool()) {
1704 m_value->child(0) = m_value->child(0)->child(0);
1705 m_changed = true;
1706 break;
1707 }
1708
1709 // Turn this: Select(stuff, x, x)
1710 // Into this: x
1711 if (m_value->child(1) == m_value->child(2)) {
1712 replaceWithIdentity(m_value->child(1));
1713 break;
1714 }
1715 break;
1716
1717 case Load8Z:
1718 case Load8S:
1719 case Load16Z:
1720 case Load16S:
1721 case Load:
1722 case Store8:
1723 case Store16:
1724 case Store: {
1725 Value* address = m_value->lastChild();
1726 MemoryValue* memory = m_value->as<MemoryValue>();
1727
1728 // Turn this: Load(Add(address, offset1), offset = offset2)
1729 // Into this: Load(address, offset = offset1 + offset2)
1730 //
1731 // Also turns this: Store(value, Add(address, offset1), offset = offset2)
1732 // Into this: Store(value, address, offset = offset1 + offset2)
1733 if (address->opcode() == Add && address->child(1)->hasIntPtr()) {
1734 intptr_t offset = address->child(1)->asIntPtr();
1735 if (!sumOverflows<intptr_t>(offset, memory->offset())) {
1736 offset += memory->offset();
1737 Value::OffsetType smallOffset = static_cast<Value::OffsetType>(offset);
1738 if (smallOffset == offset) {
1739 address = address->child(0);
1740 memory->lastChild() = address;
1741 memory->setOffset(smallOffset);
1742 m_changed = true;
1743 }
1744 }
1745 }
1746
1747 // Turn this: Load(constant1, offset = constant2)
1748 // Into this: Load(constant1 + constant2)
1749 //
1750 // This is a fun canonicalization. It purely regresses naively generated code. We rely
1751 // on constant materialization to be smart enough to materialize this constant the smart
1752 // way. We want this canonicalization because we want to know if two memory accesses see
1753 // the same address.
1754 if (memory->offset()) {
1755 if (Value* newAddress = address->addConstant(m_proc, memory->offset())) {
1756 m_insertionSet.insertValue(m_index, newAddress);
1757 address = newAddress;
1758 memory->lastChild() = newAddress;
1759 memory->setOffset(0);
1760 m_changed = true;
1761 }
1762 }
1763
1764 break;
1765 }
1766
1767 case CCall: {
1768 // Turn this: Call(fmod, constant1, constant2)
1769 // Into this: fcall-constant(constant1, constant2)
1770 auto* fmodDouble = tagCFunctionPtr<double (*)(double, double)>(fmod, B3CCallPtrTag);
1771 if (m_value->type() == Double
1772 && m_value->numChildren() == 3
1773 && m_value->child(0)->isIntPtr(reinterpret_cast<intptr_t>(fmodDouble))
1774 && m_value->child(1)->type() == Double
1775 && m_value->child(2)->type() == Double) {
1776 replaceWithNewValue(m_value->child(1)->modConstant(m_proc, m_value->child(2)));
1777 }
1778 break;
1779 }
1780 case Equal:
1781 handleCommutativity();
1782
1783 // Turn this: Equal(bool, 0)
1784 // Into this: BitXor(bool, 1)
1785 if (m_value->child(0)->returnsBool() && m_value->child(1)->isInt32(0)) {
1786 replaceWithNew<Value>(
1787 BitXor, m_value->origin(), m_value->child(0),
1788 m_insertionSet.insert<Const32Value>(m_index, m_value->origin(), 1));
1789 break;
1790 }
1791
1792 // Turn this Equal(bool, 1)
1793 // Into this: bool
1794 if (m_value->child(0)->returnsBool() && m_value->child(1)->isInt32(1)) {
1795 replaceWithIdentity(m_value->child(0));
1796 break;
1797 }
1798
1799 // Turn this: Equal(const1, const2)
1800 // Into this: const1 == const2
1801 replaceWithNewValue(
1802 m_proc.addBoolConstant(
1803 m_value->origin(),
1804 m_value->child(0)->equalConstant(m_value->child(1))));
1805 break;
1806
1807 case NotEqual:
1808 handleCommutativity();
1809
1810 if (m_value->child(0)->returnsBool()) {
1811 // Turn this: NotEqual(bool, 0)
1812 // Into this: bool
1813 if (m_value->child(1)->isInt32(0)) {
1814 replaceWithIdentity(m_value->child(0));
1815 break;
1816 }
1817
1818 // Turn this: NotEqual(bool, 1)
1819 // Into this: Equal(bool, 0)
1820 if (m_value->child(1)->isInt32(1)) {
1821 replaceWithNew<Value>(
1822 Equal, m_value->origin(), m_value->child(0),
1823 m_insertionSet.insertIntConstant(m_index, m_value->origin(), Int32, 0));
1824 break;
1825 }
1826 }
1827
1828 // Turn this: NotEqual(const1, const2)
1829 // Into this: const1 != const2
1830 replaceWithNewValue(
1831 m_proc.addBoolConstant(
1832 m_value->origin(),
1833 m_value->child(0)->notEqualConstant(m_value->child(1))));
1834 break;
1835
1836 case LessThan:
1837 case GreaterThan:
1838 case LessEqual:
1839 case GreaterEqual:
1840 case Above:
1841 case Below:
1842 case AboveEqual:
1843 case BelowEqual: {
1844 CanonicalizedComparison comparison = canonicalizeComparison(m_value);
1845 TriState result = MixedTriState;
1846 switch (comparison.opcode) {
1847 case LessThan:
1848 result = comparison.operands[1]->greaterThanConstant(comparison.operands[0]);
1849 break;
1850 case GreaterThan:
1851 result = comparison.operands[1]->lessThanConstant(comparison.operands[0]);
1852 break;
1853 case LessEqual:
1854 result = comparison.operands[1]->greaterEqualConstant(comparison.operands[0]);
1855 break;
1856 case GreaterEqual:
1857 result = comparison.operands[1]->lessEqualConstant(comparison.operands[0]);
1858 break;
1859 case Above:
1860 result = comparison.operands[1]->belowConstant(comparison.operands[0]);
1861 break;
1862 case Below:
1863 result = comparison.operands[1]->aboveConstant(comparison.operands[0]);
1864 break;
1865 case AboveEqual:
1866 result = comparison.operands[1]->belowEqualConstant(comparison.operands[0]);
1867 break;
1868 case BelowEqual:
1869 result = comparison.operands[1]->aboveEqualConstant(comparison.operands[0]);
1870 break;
1871 default:
1872 RELEASE_ASSERT_NOT_REACHED();
1873 break;
1874 }
1875
1876 if (auto* constant = m_proc.addBoolConstant(m_value->origin(), result)) {
1877 replaceWithNewValue(constant);
1878 break;
1879 }
1880 if (comparison.opcode != m_value->opcode()) {
1881 replaceWithNew<Value>(comparison.opcode, m_value->origin(), comparison.operands[0], comparison.operands[1]);
1882 break;
1883 }
1884 break;
1885 }
1886
1887 case EqualOrUnordered:
1888 handleCommutativity();
1889
1890 // Turn this: Equal(const1, const2)
1891 // Into this: isunordered(const1, const2) || const1 == const2.
1892 // Turn this: Equal(value, const_NaN)
1893 // Into this: 1.
1894 replaceWithNewValue(
1895 m_proc.addBoolConstant(
1896 m_value->origin(),
1897 m_value->child(1)->equalOrUnorderedConstant(m_value->child(0))));
1898 break;
1899
1900 case CheckAdd: {
1901 if (replaceWithNewValue(m_value->child(0)->checkAddConstant(m_proc, m_value->child(1))))
1902 break;
1903
1904 handleCommutativity();
1905
1906 if (m_value->child(1)->isInt(0)) {
1907 replaceWithIdentity(m_value->child(0));
1908 break;
1909 }
1910
1911 IntRange leftRange = rangeFor(m_value->child(0));
1912 IntRange rightRange = rangeFor(m_value->child(1));
1913 if (!leftRange.couldOverflowAdd(rightRange, m_value->type())) {
1914 replaceWithNewValue(
1915 m_proc.add<Value>(Add, m_value->origin(), m_value->child(0), m_value->child(1)));
1916 break;
1917 }
1918 break;
1919 }
1920
1921 case CheckSub: {
1922 if (replaceWithNewValue(m_value->child(0)->checkSubConstant(m_proc, m_value->child(1))))
1923 break;
1924
1925 if (m_value->child(1)->isInt(0)) {
1926 replaceWithIdentity(m_value->child(0));
1927 break;
1928 }
1929
1930 if (Value* negatedConstant = m_value->child(1)->checkNegConstant(m_proc)) {
1931 m_insertionSet.insertValue(m_index, negatedConstant);
1932 m_value->as<CheckValue>()->convertToAdd();
1933 m_value->child(1) = negatedConstant;
1934 m_changed = true;
1935 break;
1936 }
1937
1938 IntRange leftRange = rangeFor(m_value->child(0));
1939 IntRange rightRange = rangeFor(m_value->child(1));
1940 if (!leftRange.couldOverflowSub(rightRange, m_value->type())) {
1941 replaceWithNewValue(
1942 m_proc.add<Value>(Sub, m_value->origin(), m_value->child(0), m_value->child(1)));
1943 break;
1944 }
1945 break;
1946 }
1947
1948 case CheckMul: {
1949 if (replaceWithNewValue(m_value->child(0)->checkMulConstant(m_proc, m_value->child(1))))
1950 break;
1951
1952 handleCommutativity();
1953
1954 if (m_value->child(1)->hasInt()) {
1955 bool modified = true;
1956 switch (m_value->child(1)->asInt()) {
1957 case 0:
1958 replaceWithNewValue(m_proc.addIntConstant(m_value, 0));
1959 break;
1960 case 1:
1961 replaceWithIdentity(m_value->child(0));
1962 break;
1963 case 2:
1964 m_value->as<CheckValue>()->convertToAdd();
1965 m_value->child(1) = m_value->child(0);
1966 m_changed = true;
1967 break;
1968 default:
1969 modified = false;
1970 break;
1971 }
1972 if (modified)
1973 break;
1974 }
1975
1976 IntRange leftRange = rangeFor(m_value->child(0));
1977 IntRange rightRange = rangeFor(m_value->child(1));
1978 if (!leftRange.couldOverflowMul(rightRange, m_value->type())) {
1979 replaceWithNewValue(
1980 m_proc.add<Value>(Mul, m_value->origin(), m_value->child(0), m_value->child(1)));
1981 break;
1982 }
1983 break;
1984 }
1985
1986 case Check: {
1987 CheckValue* checkValue = m_value->as<CheckValue>();
1988
1989 if (checkValue->child(0)->isLikeZero()) {
1990 checkValue->replaceWithNop();
1991 m_changed = true;
1992 break;
1993 }
1994
1995 if (checkValue->child(0)->isLikeNonZero()) {
1996 PatchpointValue* patchpoint =
1997 m_insertionSet.insert<PatchpointValue>(m_index, Void, checkValue->origin());
1998
1999 patchpoint->effects = Effects();
2000 patchpoint->effects.reads = HeapRange::top();
2001 patchpoint->effects.exitsSideways = true;
2002
2003 for (unsigned i = 1; i < checkValue->numChildren(); ++i)
2004 patchpoint->append(checkValue->constrainedChild(i));
2005
2006 patchpoint->setGenerator(checkValue->generator());
2007
2008 // Replace the rest of the block with an Oops.
2009 for (unsigned i = m_index + 1; i < m_block->size() - 1; ++i)
2010 m_block->at(i)->replaceWithBottom(m_insertionSet, m_index);
2011 m_block->last()->replaceWithOops(m_block);
2012 m_block->last()->setOrigin(checkValue->origin());
2013
2014 // Replace ourselves last.
2015 checkValue->replaceWithNop();
2016 m_changedCFG = true;
2017 break;
2018 }
2019
2020 if (checkValue->child(0)->opcode() == NotEqual
2021 && checkValue->child(0)->child(1)->isInt(0)) {
2022 checkValue->child(0) = checkValue->child(0)->child(0);
2023 m_changed = true;
2024 }
2025
2026 if (m_proc.optLevel() < 2)
2027 break;
2028
2029 // If we are checking some bounded-size SSA expression that leads to a Select that
2030 // has a constant as one of its results, then turn the Select into a Branch and split
2031 // the code between the Check and the Branch. For example, this:
2032 //
2033 // @a = Select(@p, @x, 42)
2034 // @b = Add(@a, 35)
2035 // Check(@b)
2036 //
2037 // becomes this:
2038 //
2039 // Branch(@p, #truecase, #falsecase)
2040 //
2041 // BB#truecase:
2042 // @b_truecase = Add(@x, 35)
2043 // Check(@b_truecase)
2044 // Upsilon(@x, ^a)
2045 // Upsilon(@b_truecase, ^b)
2046 // Jump(#continuation)
2047 //
2048 // BB#falsecase:
2049 // @b_falsecase = Add(42, 35)
2050 // Check(@b_falsecase)
2051 // Upsilon(42, ^a)
2052 // Upsilon(@b_falsecase, ^b)
2053 // Jump(#continuation)
2054 //
2055 // BB#continuation:
2056 // @a = Phi()
2057 // @b = Phi()
2058 //
2059 // The goal of this optimization is to kill a lot of code in one of those basic
2060 // blocks. This is pretty much guaranteed since one of those blocks will replace all
2061 // uses of the Select with a constant, and that constant will be transitively used
2062 // from the check.
2063 static const unsigned selectSpecializationBound = 3;
2064 Value* select = findRecentNodeMatching(
2065 m_value->child(0), selectSpecializationBound,
2066 [&] (Value* value) -> bool {
2067 return value->opcode() == Select
2068 && (value->child(1)->isConstant() && value->child(2)->isConstant());
2069 });
2070
2071 if (select) {
2072 specializeSelect(select);
2073 break;
2074 }
2075 break;
2076 }
2077
2078 case Branch: {
2079 // Turn this: Branch(NotEqual(x, 0))
2080 // Into this: Branch(x)
2081 if (m_value->child(0)->opcode() == NotEqual && m_value->child(0)->child(1)->isInt(0)) {
2082 m_value->child(0) = m_value->child(0)->child(0);
2083 m_changed = true;
2084 }
2085
2086 // Turn this: Branch(Equal(x, 0), then, else)
2087 // Into this: Branch(x, else, then)
2088 if (m_value->child(0)->opcode() == Equal && m_value->child(0)->child(1)->isInt(0)) {
2089 m_value->child(0) = m_value->child(0)->child(0);
2090 std::swap(m_block->taken(), m_block->notTaken());
2091 m_changed = true;
2092 }
2093
2094 // Turn this: Branch(BitXor(bool, 1), then, else)
2095 // Into this: Branch(bool, else, then)
2096 if (m_value->child(0)->opcode() == BitXor
2097 && m_value->child(0)->child(1)->isInt32(1)
2098 && m_value->child(0)->child(0)->returnsBool()) {
2099 m_value->child(0) = m_value->child(0)->child(0);
2100 std::swap(m_block->taken(), m_block->notTaken());
2101 m_changed = true;
2102 }
2103
2104 // Turn this: Branch(BitAnd(bool, xyb1), then, else)
2105 // Into this: Branch(bool, then, else)
2106 if (m_value->child(0)->opcode() == BitAnd
2107 && m_value->child(0)->child(1)->hasInt()
2108 && m_value->child(0)->child(1)->asInt() & 1
2109 && m_value->child(0)->child(0)->returnsBool()) {
2110 m_value->child(0) = m_value->child(0)->child(0);
2111 m_changed = true;
2112 }
2113
2114 TriState triState = m_value->child(0)->asTriState();
2115
2116 // Turn this: Branch(0, then, else)
2117 // Into this: Jump(else)
2118 if (triState == FalseTriState) {
2119 m_block->taken().block()->removePredecessor(m_block);
2120 m_value->replaceWithJump(m_block, m_block->notTaken());
2121 m_changedCFG = true;
2122 break;
2123 }
2124
2125 // Turn this: Branch(not 0, then, else)
2126 // Into this: Jump(then)
2127 if (triState == TrueTriState) {
2128 m_block->notTaken().block()->removePredecessor(m_block);
2129 m_value->replaceWithJump(m_block, m_block->taken());
2130 m_changedCFG = true;
2131 break;
2132 }
2133
2134 if (m_proc.optLevel() >= 2) {
2135 // If a check for the same property dominates us, we can kill the branch. This sort
2136 // of makes sense here because it's cheap, but hacks like this show that we're going
2137 // to need SCCP.
2138 Value* check = m_pureCSE.findMatch(
2139 ValueKey(Check, Void, m_value->child(0)), m_block, *m_dominators);
2140 if (check) {
2141 // The Check would have side-exited if child(0) was non-zero. So, it must be
2142 // zero here.
2143 m_block->taken().block()->removePredecessor(m_block);
2144 m_value->replaceWithJump(m_block, m_block->notTaken());
2145 m_changedCFG = true;
2146 }
2147 }
2148 break;
2149 }
2150
2151 case Const32:
2152 case Const64:
2153 case ConstFloat:
2154 case ConstDouble: {
2155 ValueKey key = m_value->key();
2156 if (Value* constInRoot = m_valueForConstant.get(key)) {
2157 if (constInRoot != m_value) {
2158 m_value->replaceWithIdentity(constInRoot);
2159 m_changed = true;
2160 }
2161 } else if (m_block == m_root)
2162 m_valueForConstant.add(key, m_value);
2163 else {
2164 Value* constInRoot = m_proc.clone(m_value);
2165 ASSERT(m_root && m_root->size() >= 1);
2166 m_root->appendNonTerminal(constInRoot);
2167 m_valueForConstant.add(key, constInRoot);
2168 m_value->replaceWithIdentity(constInRoot);
2169 m_changed = true;
2170 }
2171 break;
2172 }
2173
2174 default:
2175 break;
2176 }
2177 }
2178
2179 // Find a node that:
2180 // - functor(node) returns true.
2181 // - it's reachable from the given node via children.
2182 // - it's in the last "bound" slots in the current basic block.
2183 // This algorithm is optimized under the assumption that the bound is small.
2184 template<typename Functor>
2185 Value* findRecentNodeMatching(Value* start, unsigned bound, const Functor& functor)
2186 {
2187 unsigned startIndex = bound < m_index ? m_index - bound : 0;
2188 Value* result = nullptr;
2189 start->walk(
2190 [&] (Value* value) -> Value::WalkStatus {
2191 bool found = false;
2192 for (unsigned i = startIndex; i <= m_index; ++i) {
2193 if (m_block->at(i) == value)
2194 found = true;
2195 }
2196 if (!found)
2197 return Value::IgnoreChildren;
2198
2199 if (functor(value)) {
2200 result = value;
2201 return Value::Stop;
2202 }
2203
2204 return Value::Continue;
2205 });
2206 return result;
2207 }
2208
2209 // This specializes a sequence of code up to a Select. This doesn't work when we're at a
2210 // terminal. It would be cool to fix that eventually. The main problem is that instead of
2211 // splitting the block, we should just insert the then/else blocks. We'll have to create
2212 // double the Phis and double the Upsilons. It'll probably be the sort of optimization that
2213 // we want to do only after we've done loop optimizations, since this will *definitely*
2214 // obscure things. In fact, even this simpler form of select specialization will possibly
2215 // obscure other optimizations. It would be great to have two modes of strength reduction,
2216 // one that does obscuring optimizations and runs late, and another that does not do
2217 // obscuring optimizations and runs early.
2218 // FIXME: Make select specialization handle branches.
2219 // FIXME: Have a form of strength reduction that does no obscuring optimizations and runs
2220 // early.
2221 void specializeSelect(Value* source)
2222 {
2223 if (B3ReduceStrengthInternal::verbose)
2224 dataLog("Specializing select: ", deepDump(m_proc, source), "\n");
2225
2226 // This mutates startIndex to account for the fact that m_block got the front of it
2227 // chopped off.
2228 BasicBlock* predecessor = m_blockInsertionSet.splitForward(m_block, m_index, &m_insertionSet);
2229 if (m_block == m_root) {
2230 m_root = predecessor;
2231 m_valueForConstant.clear();
2232 }
2233
2234 // Splitting will commit the insertion set, which changes the exact position of the
2235 // source. That's why we do the search after splitting.
2236 unsigned startIndex = UINT_MAX;
2237 for (unsigned i = predecessor->size(); i--;) {
2238 if (predecessor->at(i) == source) {
2239 startIndex = i;
2240 break;
2241 }
2242 }
2243
2244 RELEASE_ASSERT(startIndex != UINT_MAX);
2245
2246 // By BasicBlock convention, caseIndex == 0 => then, caseIndex == 1 => else.
2247 static const unsigned numCases = 2;
2248 BasicBlock* cases[numCases];
2249 for (unsigned i = 0; i < numCases; ++i)
2250 cases[i] = m_blockInsertionSet.insertBefore(m_block);
2251
2252 HashMap<Value*, Value*> mappings[2];
2253
2254 // Save things we want to know about the source.
2255 Value* predicate = source->child(0);
2256
2257 for (unsigned i = 0; i < numCases; ++i)
2258 mappings[i].add(source, source->child(1 + i));
2259
2260 auto cloneValue = [&] (Value* value) {
2261 ASSERT(value != source);
2262
2263 for (unsigned i = 0; i < numCases; ++i) {
2264 Value* clone = m_proc.clone(value);
2265 for (Value*& child : clone->children()) {
2266 if (Value* newChild = mappings[i].get(child))
2267 child = newChild;
2268 }
2269 if (value->type() != Void)
2270 mappings[i].add(value, clone);
2271
2272 cases[i]->append(clone);
2273 if (value->type() != Void)
2274 cases[i]->appendNew<UpsilonValue>(m_proc, value->origin(), clone, value);
2275 }
2276
2277 value->replaceWithPhi();
2278 };
2279
2280 // The jump that the splitter inserted is of no use to us.
2281 predecessor->removeLast(m_proc);
2282
2283 // Hance the source, it's special.
2284 for (unsigned i = 0; i < numCases; ++i) {
2285 cases[i]->appendNew<UpsilonValue>(
2286 m_proc, source->origin(), source->child(1 + i), source);
2287 }
2288 source->replaceWithPhi();
2289 m_insertionSet.insertValue(m_index, source);
2290
2291 // Now handle all values between the source and the check.
2292 for (unsigned i = startIndex + 1; i < predecessor->size(); ++i) {
2293 Value* value = predecessor->at(i);
2294 value->owner = nullptr;
2295
2296 cloneValue(value);
2297
2298 if (value->type() != Void)
2299 m_insertionSet.insertValue(m_index, value);
2300 else
2301 m_proc.deleteValue(value);
2302 }
2303
2304 // Finally, deal with the check.
2305 cloneValue(m_value);
2306
2307 // Remove the values from the predecessor.
2308 predecessor->values().resize(startIndex);
2309
2310 predecessor->appendNew<Value>(m_proc, Branch, source->origin(), predicate);
2311 predecessor->setSuccessors(FrequentedBlock(cases[0]), FrequentedBlock(cases[1]));
2312
2313 for (unsigned i = 0; i < numCases; ++i) {
2314 cases[i]->appendNew<Value>(m_proc, Jump, m_value->origin());
2315 cases[i]->setSuccessors(FrequentedBlock(m_block));
2316 }
2317
2318 m_changed = true;
2319
2320 predecessor->updatePredecessorsAfter();
2321 }
2322
2323 static bool shouldSwapBinaryOperands(Value* value)
2324 {
2325 // Note that we have commutative operations that take more than two children. Those operations may
2326 // commute their first two children while leaving the rest unaffected.
2327 ASSERT(value->numChildren() >= 2);
2328
2329 // Leave it alone if the right child is a constant.
2330 if (value->child(1)->isConstant()
2331 || value->child(0)->opcode() == AtomicStrongCAS)
2332 return false;
2333
2334 if (value->child(0)->isConstant())
2335 return true;
2336
2337 if (value->child(1)->opcode() == AtomicStrongCAS)
2338 return true;
2339
2340 // Sort the operands. This is an important canonicalization. We use the index instead of
2341 // the address to make this at least slightly deterministic.
2342 if (value->child(0)->index() > value->child(1)->index())
2343 return true;
2344
2345 return false;
2346 }
2347
2348 // Turn this: Add(constant, value)
2349 // Into this: Add(value, constant)
2350 //
2351 // Also:
2352 // Turn this: Add(value1, value2)
2353 // Into this: Add(value2, value1)
2354 // If we decide that value2 coming first is the canonical ordering.
2355 void handleCommutativity()
2356 {
2357 if (shouldSwapBinaryOperands(m_value)) {
2358 std::swap(m_value->child(0), m_value->child(1));
2359 m_changed = true;
2360 }
2361 }
2362
2363 // For Op==Add or Sub, turn any of these:
2364 // Op(Mul(x1, x2), Mul(x1, x3))
2365 // Op(Mul(x2, x1), Mul(x1, x3))
2366 // Op(Mul(x1, x2), Mul(x3, x1))
2367 // Op(Mul(x2, x1), Mul(x3, x1))
2368 // Into this: Mul(x1, Op(x2, x3))
2369 bool handleMulDistributivity()
2370 {
2371 ASSERT(m_value->opcode() == Add || m_value->opcode() == Sub);
2372 Value* x1 = nullptr;
2373 Value* x2 = nullptr;
2374 Value* x3 = nullptr;
2375 if (m_value->child(0)->opcode() == Mul && m_value->child(1)->opcode() == Mul) {
2376 if (m_value->child(0)->child(0) == m_value->child(1)->child(0)) {
2377 // Op(Mul(x1, x2), Mul(x1, x3))
2378 x1 = m_value->child(0)->child(0);
2379 x2 = m_value->child(0)->child(1);
2380 x3 = m_value->child(1)->child(1);
2381 } else if (m_value->child(0)->child(1) == m_value->child(1)->child(0)) {
2382 // Op(Mul(x2, x1), Mul(x1, x3))
2383 x1 = m_value->child(0)->child(1);
2384 x2 = m_value->child(0)->child(0);
2385 x3 = m_value->child(1)->child(1);
2386 } else if (m_value->child(0)->child(0) == m_value->child(1)->child(1)) {
2387 // Op(Mul(x1, x2), Mul(x3, x1))
2388 x1 = m_value->child(0)->child(0);
2389 x2 = m_value->child(0)->child(1);
2390 x3 = m_value->child(1)->child(0);
2391 } else if (m_value->child(0)->child(1) == m_value->child(1)->child(1)) {
2392 // Op(Mul(x2, x1), Mul(x3, x1))
2393 x1 = m_value->child(0)->child(1);
2394 x2 = m_value->child(0)->child(0);
2395 x3 = m_value->child(1)->child(0);
2396 }
2397 }
2398 if (x1 != nullptr) {
2399 ASSERT(x2 != nullptr && x3 != nullptr);
2400 Value* newOp = m_insertionSet.insert<Value>(m_index, m_value->opcode(), m_value->origin(), x2, x3);
2401 replaceWithNew<Value>(Mul, m_value->origin(), x1, newOp);
2402 return true;
2403 }
2404 return false;
2405 }
2406
2407 // For Op==BitOr or BitXor, turn any of these:
2408 // Op(BitAnd(x1, x2), BitAnd(x1, x3))
2409 // Op(BitAnd(x2, x1), BitAnd(x1, x3))
2410 // Op(BitAnd(x1, x2), BitAnd(x3, x1))
2411 // Op(BitAnd(x2, x1), BitAnd(x3, x1))
2412 // Into this: BitAnd(Op(x2, x3), x1)
2413 // And any of these:
2414 // Op(BitAnd(x1, x2), x1)
2415 // Op(BitAnd(x2, x1), x1)
2416 // Op(x1, BitAnd(x1, x2))
2417 // Op(x1, BitAnd(x2, x1))
2418 // Into this: BitAnd(Op(x2, x1), x1)
2419 // This second set is equivalent to doing x1 => BitAnd(x1, x1), and then applying the first set.
2420 // It does not reduce the number of operations executed, but provides some useful normalization: we prefer to have BitAnd at the outermost, then BitXor, and finally BitOr at the innermost
2421 bool handleBitAndDistributivity()
2422 {
2423 ASSERT(m_value->opcode() == BitOr || m_value->opcode() == BitXor);
2424 Value* x1 = nullptr;
2425 Value* x2 = nullptr;
2426 Value* x3 = nullptr;
2427 if (m_value->child(0)->opcode() == BitAnd && m_value->child(1)->opcode() == BitAnd) {
2428 if (m_value->child(0)->child(0) == m_value->child(1)->child(0)) {
2429 x1 = m_value->child(0)->child(0);
2430 x2 = m_value->child(0)->child(1);
2431 x3 = m_value->child(1)->child(1);
2432 } else if (m_value->child(0)->child(1) == m_value->child(1)->child(0)) {
2433 x1 = m_value->child(0)->child(1);
2434 x2 = m_value->child(0)->child(0);
2435 x3 = m_value->child(1)->child(1);
2436 } else if (m_value->child(0)->child(0) == m_value->child(1)->child(1)) {
2437 x1 = m_value->child(0)->child(0);
2438 x2 = m_value->child(0)->child(1);
2439 x3 = m_value->child(1)->child(0);
2440 } else if (m_value->child(0)->child(1) == m_value->child(1)->child(1)) {
2441 x1 = m_value->child(0)->child(1);
2442 x2 = m_value->child(0)->child(0);
2443 x3 = m_value->child(1)->child(0);
2444 }
2445 } else if (m_value->child(0)->opcode() == BitAnd) {
2446 if (m_value->child(0)->child(0) == m_value->child(1)) {
2447 x1 = x3 = m_value->child(1);
2448 x2 = m_value->child(0)->child(1);
2449 } else if (m_value->child(0)->child(1) == m_value->child(1)) {
2450 x1 = x3 = m_value->child(1);
2451 x2 = m_value->child(0)->child(0);
2452 }
2453 } else if (m_value->child(1)->opcode() == BitAnd) {
2454 if (m_value->child(1)->child(0) == m_value->child(0)) {
2455 x1 = x3 = m_value->child(0);
2456 x2 = m_value->child(1)->child(1);
2457 } else if (m_value->child(1)->child(1) == m_value->child(0)) {
2458 x1 = x3 = m_value->child(0);
2459 x2 = m_value->child(1)->child(0);
2460 }
2461 }
2462 if (x1 != nullptr) {
2463 ASSERT(x2 != nullptr && x3 != nullptr);
2464 Value* bitOp = m_insertionSet.insert<Value>(m_index, m_value->opcode(), m_value->origin(), x2, x3);
2465 replaceWithNew<Value>(BitAnd, m_value->origin(), x1, bitOp);
2466 return true;
2467 }
2468 return false;
2469 }
2470
2471 struct CanonicalizedComparison {
2472 Opcode opcode;
2473 Value* operands[2];
2474 };
2475 static CanonicalizedComparison canonicalizeComparison(Value* value)
2476 {
2477 auto flip = [] (Opcode opcode) {
2478 switch (opcode) {
2479 case LessThan:
2480 return GreaterThan;
2481 case GreaterThan:
2482 return LessThan;
2483 case LessEqual:
2484 return GreaterEqual;
2485 case GreaterEqual:
2486 return LessEqual;
2487 case Above:
2488 return Below;
2489 case Below:
2490 return Above;
2491 case AboveEqual:
2492 return BelowEqual;
2493 case BelowEqual:
2494 return AboveEqual;
2495 default:
2496 return opcode;
2497 }
2498 };
2499 if (shouldSwapBinaryOperands(value))
2500 return { flip(value->opcode()), { value->child(1), value->child(0) } };
2501 return { value->opcode(), { value->child(0), value->child(1) } };
2502 }
2503
2504 // FIXME: This should really be a forward analysis. Instead, we uses a bounded-search backwards
2505 // analysis.
2506 IntRange rangeFor(Value* value, unsigned timeToLive = 5)
2507 {
2508 if (!timeToLive)
2509 return IntRange::top(value->type());
2510
2511 switch (value->opcode()) {
2512 case Const32:
2513 case Const64: {
2514 int64_t intValue = value->asInt();
2515 return IntRange(intValue, intValue);
2516 }
2517
2518 case BitAnd:
2519 if (value->child(1)->hasInt())
2520 return IntRange::rangeForMask(value->child(1)->asInt(), value->type());
2521 break;
2522
2523 case SShr:
2524 if (value->child(1)->hasInt32()) {
2525 return rangeFor(value->child(0), timeToLive - 1).sShr(
2526 value->child(1)->asInt32(), value->type());
2527 }
2528 break;
2529
2530 case ZShr:
2531 if (value->child(1)->hasInt32()) {
2532 return rangeFor(value->child(0), timeToLive - 1).zShr(
2533 value->child(1)->asInt32(), value->type());
2534 }
2535 break;
2536
2537 case Shl:
2538 if (value->child(1)->hasInt32()) {
2539 return rangeFor(value->child(0), timeToLive - 1).shl(
2540 value->child(1)->asInt32(), value->type());
2541 }
2542 break;
2543
2544 case Add:
2545 return rangeFor(value->child(0), timeToLive - 1).add(
2546 rangeFor(value->child(1), timeToLive - 1), value->type());
2547
2548 case Sub:
2549 return rangeFor(value->child(0), timeToLive - 1).sub(
2550 rangeFor(value->child(1), timeToLive - 1), value->type());
2551
2552 case Mul:
2553 return rangeFor(value->child(0), timeToLive - 1).mul(
2554 rangeFor(value->child(1), timeToLive - 1), value->type());
2555
2556 default:
2557 break;
2558 }
2559
2560 return IntRange::top(value->type());
2561 }
2562
2563 template<typename ValueType, typename... Arguments>
2564 void replaceWithNew(Arguments... arguments)
2565 {
2566 replaceWithNewValue(m_proc.add<ValueType>(arguments...));
2567 }
2568
2569 bool replaceWithNewValue(Value* newValue)
2570 {
2571 if (!newValue)
2572 return false;
2573 m_insertionSet.insertValue(m_index, newValue);
2574 m_value->replaceWithIdentity(newValue);
2575 m_changed = true;
2576 return true;
2577 }
2578
2579 void replaceWithIdentity(Value* newValue)
2580 {
2581 m_value->replaceWithIdentity(newValue);
2582 m_changed = true;
2583 }
2584
2585 void handleShiftAmount()
2586 {
2587 // Shift anything by zero is identity.
2588 if (m_value->child(1)->isInt32(0)) {
2589 replaceWithIdentity(m_value->child(0));
2590 return;
2591 }
2592
2593 // The shift already masks its shift amount. If the shift amount is being masked by a
2594 // redundant amount, then remove the mask. For example,
2595 // Turn this: Shl(@x, BitAnd(@y, 63))
2596 // Into this: Shl(@x, @y)
2597 unsigned mask = sizeofType(m_value->type()) * 8 - 1;
2598 if (m_value->child(1)->opcode() == BitAnd
2599 && m_value->child(1)->child(1)->hasInt32()
2600 && (m_value->child(1)->child(1)->asInt32() & mask) == mask) {
2601 m_value->child(1) = m_value->child(1)->child(0);
2602 m_changed = true;
2603 }
2604 }
2605
2606 void replaceIfRedundant()
2607 {
2608 m_changed |= m_pureCSE.process(m_value, *m_dominators);
2609 }
2610
2611 void simplifyCFG()
2612 {
2613 if (B3ReduceStrengthInternal::verbose) {
2614 dataLog("Before simplifyCFG:\n");
2615 dataLog(m_proc);
2616 }
2617
2618 // We have three easy simplification rules:
2619 //
2620 // 1) If a successor is a block that just jumps to another block, then jump directly to
2621 // that block.
2622 //
2623 // 2) If all successors are the same and the operation has no effects, then use a jump
2624 // instead.
2625 //
2626 // 3) If you jump to a block that is not you and has one predecessor, then merge.
2627 //
2628 // Note that because of the first rule, this phase may introduce critical edges. That's fine.
2629 // If you need broken critical edges, then you have to break them yourself.
2630
2631 // Note that this relies on predecessors being at least conservatively correct. It's fine for
2632 // predecessors to mention a block that isn't actually a predecessor. It's *not* fine for a
2633 // predecessor to be omitted. We assert as much in the loop. In practice, we precisely preserve
2634 // predecessors during strength reduction since that minimizes the total number of fixpoint
2635 // iterations needed to kill a lot of code.
2636
2637 for (BasicBlock* block : m_proc.blocksInPostOrder()) {
2638 if (B3ReduceStrengthInternal::verbose)
2639 dataLog("Considering block ", *block, ":\n");
2640
2641 checkPredecessorValidity();
2642
2643 // We don't care about blocks that don't have successors.
2644 if (!block->numSuccessors())
2645 continue;
2646
2647 // First check if any of the successors of this block can be forwarded over.
2648 for (BasicBlock*& successor : block->successorBlocks()) {
2649 if (successor != block
2650 && successor->size() == 1
2651 && successor->last()->opcode() == Jump) {
2652 BasicBlock* newSuccessor = successor->successorBlock(0);
2653 if (newSuccessor != successor) {
2654 if (B3ReduceStrengthInternal::verbose) {
2655 dataLog(
2656 "Replacing ", pointerDump(block), "->", pointerDump(successor),
2657 " with ", pointerDump(block), "->", pointerDump(newSuccessor),
2658 "\n");
2659 }
2660 // Note that we do not do replacePredecessor() because the block we're
2661 // skipping will still have newSuccessor as its successor.
2662 newSuccessor->addPredecessor(block);
2663 successor = newSuccessor;
2664 m_changedCFG = true;
2665 }
2666 }
2667 }
2668
2669 // Now check if the block's terminal can be replaced with a jump.
2670 if (block->numSuccessors() > 1) {
2671 // The terminal must not have weird effects.
2672 Effects effects = block->last()->effects();
2673 effects.terminal = false;
2674 if (!effects.mustExecute()) {
2675 // All of the successors must be the same.
2676 bool allSame = true;
2677 BasicBlock* firstSuccessor = block->successorBlock(0);
2678 for (unsigned i = 1; i < block->numSuccessors(); ++i) {
2679 if (block->successorBlock(i) != firstSuccessor) {
2680 allSame = false;
2681 break;
2682 }
2683 }
2684 if (allSame) {
2685 if (B3ReduceStrengthInternal::verbose) {
2686 dataLog(
2687 "Changing ", pointerDump(block), "'s terminal to a Jump.\n");
2688 }
2689 block->last()->replaceWithJump(block, FrequentedBlock(firstSuccessor));
2690 m_changedCFG = true;
2691 }
2692 }
2693 }
2694
2695 // Finally handle jumps to a block with one predecessor.
2696 if (block->numSuccessors() == 1) {
2697 BasicBlock* successor = block->successorBlock(0);
2698 if (successor != block && successor->numPredecessors() == 1) {
2699 RELEASE_ASSERT(successor->predecessor(0) == block);
2700
2701 // We can merge the two blocks, because the predecessor only jumps to the successor
2702 // and the successor is only reachable from the predecessor.
2703
2704 // Remove the terminal.
2705 Value* value = block->values().takeLast();
2706 Origin jumpOrigin = value->origin();
2707 RELEASE_ASSERT(value->effects().terminal);
2708 m_proc.deleteValue(value);
2709
2710 // Append the full contents of the successor to the predecessor.
2711 block->values().appendVector(successor->values());
2712 block->successors() = successor->successors();
2713
2714 // Make sure that the successor has nothing left in it. Make sure that the block
2715 // has a terminal so that nobody chokes when they look at it.
2716 successor->values().shrink(0);
2717 successor->appendNew<Value>(m_proc, Oops, jumpOrigin);
2718 successor->clearSuccessors();
2719
2720 // Ensure that predecessors of block's new successors know what's up.
2721 for (BasicBlock* newSuccessor : block->successorBlocks())
2722 newSuccessor->replacePredecessor(successor, block);
2723
2724 if (B3ReduceStrengthInternal::verbose) {
2725 dataLog(
2726 "Merged ", pointerDump(block), "->", pointerDump(successor), "\n");
2727 }
2728
2729 m_changedCFG = true;
2730 }
2731 }
2732 }
2733
2734 if (m_changedCFG && B3ReduceStrengthInternal::verbose) {
2735 dataLog("B3 after simplifyCFG:\n");
2736 dataLog(m_proc);
2737 }
2738 }
2739
2740 void handleChangedCFGIfNecessary()
2741 {
2742 if (m_changedCFG) {
2743 m_proc.resetReachability();
2744 m_proc.invalidateCFG();
2745 m_dominators = nullptr; // Dominators are not valid anymore, and we don't need them yet.
2746 m_changed = true;
2747 }
2748 }
2749
2750 void checkPredecessorValidity()
2751 {
2752 if (!shouldValidateIRAtEachPhase())
2753 return;
2754
2755 for (BasicBlock* block : m_proc) {
2756 for (BasicBlock* successor : block->successorBlocks())
2757 RELEASE_ASSERT(successor->containsPredecessor(block));
2758 }
2759 }
2760
2761 void simplifySSA()
2762 {
2763 // This runs Aycock and Horspool's algorithm on our Phi functions [1]. For most CFG patterns,
2764 // this can take a suboptimal arrangement of Phi functions and make it optimal, as if you had
2765 // run Cytron, Ferrante, Rosen, Wegman, and Zadeck. It's only suboptimal for irreducible
2766 // CFGs. In practice, that doesn't matter, since we expect clients of B3 to run their own SSA
2767 // conversion before lowering to B3, and in the case of the DFG, that conversion uses Cytron
2768 // et al. In that context, this algorithm is intended to simplify Phi functions that were
2769 // made redundant by prior CFG simplification. But according to Aycock and Horspool's paper,
2770 // this algorithm is good enough that a B3 client could just give us maximal Phi's (i.e. Phi
2771 // for each variable at each basic block) and we will make them optimal.
2772 // [1] http://pages.cpsc.ucalgary.ca/~aycock/papers/ssa.ps
2773
2774 // Aycock and Horspool prescribe two rules that are to be run to fixpoint:
2775 //
2776 // 1) If all of the Phi's children are the same (i.e. it's one child referenced from one or
2777 // more Upsilons), then replace all uses of the Phi with the one child.
2778 //
2779 // 2) If all of the Phi's children are either the Phi itself or exactly one other child, then
2780 // replace all uses of the Phi with the one other child.
2781 //
2782 // Rule (2) subsumes rule (1), so we can just run (2). We only run one fixpoint iteration
2783 // here. This premise is that in common cases, this will only find optimization opportunities
2784 // as a result of CFG simplification and usually CFG simplification will only do one round
2785 // of block merging per ReduceStrength fixpoint iteration, so it's OK for this to only do one
2786 // round of Phi merging - since Phis are the value analogue of blocks.
2787
2788 PhiChildren phiChildren(m_proc);
2789
2790 for (Value* phi : phiChildren.phis()) {
2791 Value* otherChild = nullptr;
2792 bool ok = true;
2793 for (Value* child : phiChildren[phi].values()) {
2794 if (child == phi)
2795 continue;
2796 if (child == otherChild)
2797 continue;
2798 if (!otherChild) {
2799 otherChild = child;
2800 continue;
2801 }
2802 ok = false;
2803 break;
2804 }
2805 if (!ok)
2806 continue;
2807 if (!otherChild) {
2808 // Wow, this would be super weird. It probably won't happen, except that things could
2809 // get weird as a consequence of stepwise simplifications in the strength reduction
2810 // fixpoint.
2811 continue;
2812 }
2813
2814 // Turn the Phi into an Identity and turn the Upsilons into Nops.
2815 m_changed = true;
2816 for (Value* upsilon : phiChildren[phi])
2817 upsilon->replaceWithNop();
2818 phi->replaceWithIdentity(otherChild);
2819 }
2820 }
2821
2822 Procedure& m_proc;
2823 InsertionSet m_insertionSet;
2824 BlockInsertionSet m_blockInsertionSet;
2825 HashMap<ValueKey, Value*> m_valueForConstant;
2826 BasicBlock* m_root { nullptr };
2827 BasicBlock* m_block { nullptr };
2828 unsigned m_index { 0 };
2829 Value* m_value { nullptr };
2830 Dominators* m_dominators { nullptr };
2831 PureCSE m_pureCSE;
2832 bool m_changed { false };
2833 bool m_changedCFG { false };
2834};
2835
2836} // anonymous namespace
2837
2838bool reduceStrength(Procedure& proc)
2839{
2840 PhaseScope phaseScope(proc, "reduceStrength");
2841 ReduceStrength reduceStrength(proc);
2842 return reduceStrength.run();
2843}
2844
2845} } // namespace JSC::B3
2846
2847#endif // ENABLE(B3_JIT)
2848
2849