1/*
2 * Copyright (C) 2015-2019 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "config.h"
27#include "B3ReduceStrength.h"
28
29#if ENABLE(B3_JIT)
30
31#include "B3AtomicValue.h"
32#include "B3BasicBlockInlines.h"
33#include "B3BlockInsertionSet.h"
34#include "B3ComputeDivisionMagic.h"
35#include "B3Dominators.h"
36#include "B3EliminateDeadCode.h"
37#include "B3InsertionSetInlines.h"
38#include "B3MemoryValueInlines.h"
39#include "B3PhaseScope.h"
40#include "B3PhiChildren.h"
41#include "B3ProcedureInlines.h"
42#include "B3PureCSE.h"
43#include "B3SlotBaseValue.h"
44#include "B3StackSlot.h"
45#include "B3UpsilonValue.h"
46#include "B3ValueKeyInlines.h"
47#include "B3ValueInlines.h"
48#include "B3Variable.h"
49#include "B3VariableValue.h"
50#include <wtf/GraphNodeWorklist.h>
51#include <wtf/HashMap.h>
52#include <wtf/IndexSet.h>
53
54namespace JSC { namespace B3 {
55
56namespace {
57
58// The goal of this phase is to:
59//
60// - Replace operations with less expensive variants. This includes constant folding and classic
61// strength reductions like turning Mul(x, 1 << k) into Shl(x, k).
62//
63// - Reassociate constant operations. For example, Load(Add(x, c)) is turned into Load(x, offset = c)
64// and Add(Add(x, c), d) is turned into Add(x, c + d).
65//
66// - Canonicalize operations. There are some cases where it's not at all obvious which kind of
67// operation is less expensive, but it's useful for subsequent phases - particularly LowerToAir -
68// to have only one way of representing things.
69//
70// This phase runs to fixpoint. Therefore, the canonicalizations must be designed to be monotonic.
71// For example, if we had a canonicalization that said that Add(x, -c) should be Sub(x, c) and
72// another canonicalization that said that Sub(x, d) should be Add(x, -d), then this phase would end
73// up running forever. We don't want that.
74//
75// Therefore, we need to prioritize certain canonical forms over others. Naively, we want strength
76// reduction to reduce the number of values, and so a form involving fewer total values is more
77// canonical. But we might break this, for example when reducing strength of Mul(x, 9). This could be
78// better written as Add(Shl(x, 3), x), which also happens to be representable using a single
79// instruction on x86.
80//
81// Here are some of the rules we have:
82//
83// Canonical form of logical not: BitXor(value, 1). We may have to avoid using this form if we don't
84// know for sure that 'value' is 0-or-1 (i.e. returnsBool). In that case we fall back on
85// Equal(value, 0).
86//
87// Canonical form of commutative operations: if the operation involves a constant, the constant must
88// come second. Add(x, constant) is canonical, while Add(constant, x) is not. If there are no
89// constants then the canonical form involves the lower-indexed value first. Given Add(x, y), it's
90// canonical if x->index() <= y->index().
91
92namespace B3ReduceStrengthInternal {
93static constexpr bool verbose = false;
94}
95
96// FIXME: This IntRange stuff should be refactored into a general constant propagator. It's weird
97// that it's just sitting here in this file.
98class IntRange {
99public:
100 IntRange()
101 {
102 }
103
104 IntRange(int64_t min, int64_t max)
105 : m_min(min)
106 , m_max(max)
107 {
108 }
109
110 template<typename T>
111 static IntRange top()
112 {
113 return IntRange(std::numeric_limits<T>::min(), std::numeric_limits<T>::max());
114 }
115
116 static IntRange top(Type type)
117 {
118 switch (type.kind()) {
119 case Int32:
120 return top<int32_t>();
121 case Int64:
122 return top<int64_t>();
123 default:
124 RELEASE_ASSERT_NOT_REACHED();
125 return IntRange();
126 }
127 }
128
129 template<typename T>
130 static IntRange rangeForMask(T mask)
131 {
132 if (!(mask + 1))
133 return top<T>();
134 return IntRange(0, mask);
135 }
136
137 static IntRange rangeForMask(int64_t mask, Type type)
138 {
139 switch (type.kind()) {
140 case Int32:
141 return rangeForMask<int32_t>(static_cast<int32_t>(mask));
142 case Int64:
143 return rangeForMask<int64_t>(mask);
144 default:
145 RELEASE_ASSERT_NOT_REACHED();
146 return IntRange();
147 }
148 }
149
150 template<typename T>
151 static IntRange rangeForZShr(int32_t shiftAmount)
152 {
153 typename std::make_unsigned<T>::type mask = 0;
154 mask--;
155 mask >>= shiftAmount;
156 return rangeForMask<T>(static_cast<T>(mask));
157 }
158
159 static IntRange rangeForZShr(int32_t shiftAmount, Type type)
160 {
161 switch (type.kind()) {
162 case Int32:
163 return rangeForZShr<int32_t>(shiftAmount);
164 case Int64:
165 return rangeForZShr<int64_t>(shiftAmount);
166 default:
167 RELEASE_ASSERT_NOT_REACHED();
168 return IntRange();
169 }
170 }
171
172 int64_t min() const { return m_min; }
173 int64_t max() const { return m_max; }
174
175 void dump(PrintStream& out) const
176 {
177 out.print("[", m_min, ",", m_max, "]");
178 }
179
180 template<typename T>
181 bool couldOverflowAdd(const IntRange& other)
182 {
183 return sumOverflows<T>(m_min, other.m_min)
184 || sumOverflows<T>(m_min, other.m_max)
185 || sumOverflows<T>(m_max, other.m_min)
186 || sumOverflows<T>(m_max, other.m_max);
187 }
188
189 bool couldOverflowAdd(const IntRange& other, Type type)
190 {
191 switch (type.kind()) {
192 case Int32:
193 return couldOverflowAdd<int32_t>(other);
194 case Int64:
195 return couldOverflowAdd<int64_t>(other);
196 default:
197 return true;
198 }
199 }
200
201 template<typename T>
202 bool couldOverflowSub(const IntRange& other)
203 {
204 return differenceOverflows<T>(m_min, other.m_min)
205 || differenceOverflows<T>(m_min, other.m_max)
206 || differenceOverflows<T>(m_max, other.m_min)
207 || differenceOverflows<T>(m_max, other.m_max);
208 }
209
210 bool couldOverflowSub(const IntRange& other, Type type)
211 {
212 switch (type.kind()) {
213 case Int32:
214 return couldOverflowSub<int32_t>(other);
215 case Int64:
216 return couldOverflowSub<int64_t>(other);
217 default:
218 return true;
219 }
220 }
221
222 template<typename T>
223 bool couldOverflowMul(const IntRange& other)
224 {
225 return productOverflows<T>(m_min, other.m_min)
226 || productOverflows<T>(m_min, other.m_max)
227 || productOverflows<T>(m_max, other.m_min)
228 || productOverflows<T>(m_max, other.m_max);
229 }
230
231 bool couldOverflowMul(const IntRange& other, Type type)
232 {
233 switch (type.kind()) {
234 case Int32:
235 return couldOverflowMul<int32_t>(other);
236 case Int64:
237 return couldOverflowMul<int64_t>(other);
238 default:
239 return true;
240 }
241 }
242
243 template<typename T>
244 IntRange shl(int32_t shiftAmount)
245 {
246 T newMin = static_cast<T>(m_min) << static_cast<T>(shiftAmount);
247 T newMax = static_cast<T>(m_max) << static_cast<T>(shiftAmount);
248
249 if ((newMin >> shiftAmount) != static_cast<T>(m_min))
250 newMin = std::numeric_limits<T>::min();
251 if ((newMax >> shiftAmount) != static_cast<T>(m_max))
252 newMax = std::numeric_limits<T>::max();
253
254 return IntRange(newMin, newMax);
255 }
256
257 IntRange shl(int32_t shiftAmount, Type type)
258 {
259 switch (type.kind()) {
260 case Int32:
261 return shl<int32_t>(shiftAmount);
262 case Int64:
263 return shl<int64_t>(shiftAmount);
264 default:
265 RELEASE_ASSERT_NOT_REACHED();
266 return IntRange();
267 }
268 }
269
270 template<typename T>
271 IntRange sShr(int32_t shiftAmount)
272 {
273 T newMin = static_cast<T>(m_min) >> static_cast<T>(shiftAmount);
274 T newMax = static_cast<T>(m_max) >> static_cast<T>(shiftAmount);
275
276 return IntRange(newMin, newMax);
277 }
278
279 IntRange sShr(int32_t shiftAmount, Type type)
280 {
281 switch (type.kind()) {
282 case Int32:
283 return sShr<int32_t>(shiftAmount);
284 case Int64:
285 return sShr<int64_t>(shiftAmount);
286 default:
287 RELEASE_ASSERT_NOT_REACHED();
288 return IntRange();
289 }
290 }
291
292 template<typename T>
293 IntRange zShr(int32_t shiftAmount)
294 {
295 // This is an awkward corner case for all of the other logic.
296 if (!shiftAmount)
297 return *this;
298
299 // If the input range may be negative, then all we can say about the output range is that it
300 // will be masked. That's because -1 right shifted just produces that mask.
301 if (m_min < 0)
302 return rangeForZShr<T>(shiftAmount);
303
304 // If the input range is non-negative, then this just brings the range closer to zero.
305 typedef typename std::make_unsigned<T>::type UnsignedT;
306 UnsignedT newMin = static_cast<UnsignedT>(m_min) >> static_cast<UnsignedT>(shiftAmount);
307 UnsignedT newMax = static_cast<UnsignedT>(m_max) >> static_cast<UnsignedT>(shiftAmount);
308
309 return IntRange(newMin, newMax);
310 }
311
312 IntRange zShr(int32_t shiftAmount, Type type)
313 {
314 switch (type.kind()) {
315 case Int32:
316 return zShr<int32_t>(shiftAmount);
317 case Int64:
318 return zShr<int64_t>(shiftAmount);
319 default:
320 RELEASE_ASSERT_NOT_REACHED();
321 return IntRange();
322 }
323 }
324
325 template<typename T>
326 IntRange add(const IntRange& other)
327 {
328 if (couldOverflowAdd<T>(other))
329 return top<T>();
330 return IntRange(m_min + other.m_min, m_max + other.m_max);
331 }
332
333 IntRange add(const IntRange& other, Type type)
334 {
335 switch (type.kind()) {
336 case Int32:
337 return add<int32_t>(other);
338 case Int64:
339 return add<int64_t>(other);
340 default:
341 RELEASE_ASSERT_NOT_REACHED();
342 return IntRange();
343 }
344 }
345
346 template<typename T>
347 IntRange sub(const IntRange& other)
348 {
349 if (couldOverflowSub<T>(other))
350 return top<T>();
351 return IntRange(m_min - other.m_max, m_max - other.m_min);
352 }
353
354 IntRange sub(const IntRange& other, Type type)
355 {
356 switch (type.kind()) {
357 case Int32:
358 return sub<int32_t>(other);
359 case Int64:
360 return sub<int64_t>(other);
361 default:
362 RELEASE_ASSERT_NOT_REACHED();
363 return IntRange();
364 }
365 }
366
367 template<typename T>
368 IntRange mul(const IntRange& other)
369 {
370 if (couldOverflowMul<T>(other))
371 return top<T>();
372 return IntRange(
373 std::min(
374 std::min(m_min * other.m_min, m_min * other.m_max),
375 std::min(m_max * other.m_min, m_max * other.m_max)),
376 std::max(
377 std::max(m_min * other.m_min, m_min * other.m_max),
378 std::max(m_max * other.m_min, m_max * other.m_max)));
379 }
380
381 IntRange mul(const IntRange& other, Type type)
382 {
383 switch (type.kind()) {
384 case Int32:
385 return mul<int32_t>(other);
386 case Int64:
387 return mul<int64_t>(other);
388 default:
389 RELEASE_ASSERT_NOT_REACHED();
390 return IntRange();
391 }
392 }
393
394 IntRange zExt32()
395 {
396 ASSERT(m_min >= INT32_MIN);
397 ASSERT(m_max <= INT32_MAX);
398 int32_t min = m_min;
399 int32_t max = m_max;
400 return IntRange(static_cast<uint64_t>(static_cast<uint32_t>(min)), static_cast<uint64_t>(static_cast<uint32_t>(max)));
401 }
402
403private:
404 int64_t m_min { 0 };
405 int64_t m_max { 0 };
406};
407
408class ReduceStrength {
409public:
410 ReduceStrength(Procedure& proc)
411 : m_proc(proc)
412 , m_insertionSet(proc)
413 , m_blockInsertionSet(proc)
414 , m_root(proc.at(0))
415 {
416 }
417
418 bool run()
419 {
420 bool result = false;
421 bool first = true;
422 unsigned index = 0;
423 do {
424 m_changed = false;
425 m_changedCFG = false;
426 ++index;
427
428 if (first)
429 first = false;
430 else if (B3ReduceStrengthInternal::verbose) {
431 dataLog("B3 after iteration #", index - 1, " of reduceStrength:\n");
432 dataLog(m_proc);
433 }
434
435 simplifyCFG();
436
437 if (m_changedCFG) {
438 m_proc.resetReachability();
439 m_proc.invalidateCFG();
440 m_changed = true;
441 }
442
443 // We definitely want to do DCE before we do CSE so that we don't hoist things. For
444 // example:
445 //
446 // @dead = Mul(@a, @b)
447 // ... lots of control flow and stuff
448 // @thing = Mul(@a, @b)
449 //
450 // If we do CSE before DCE, we will remove @thing and keep @dead. Effectively, we will
451 // "hoist" @thing. On the other hand, if we run DCE before CSE, we will kill @dead and
452 // keep @thing. That's better, since we usually want things to stay wherever the client
453 // put them. We're not actually smart enough to move things around at random.
454 m_changed |= eliminateDeadCodeImpl(m_proc);
455 m_valueForConstant.clear();
456
457 simplifySSA();
458
459 if (m_proc.optLevel() >= 2) {
460 m_proc.resetValueOwners();
461 m_dominators = &m_proc.dominators(); // Recompute if necessary.
462 m_pureCSE.clear();
463 }
464
465 for (BasicBlock* block : m_proc.blocksInPreOrder()) {
466 m_block = block;
467
468 for (m_index = 0; m_index < block->size(); ++m_index) {
469 if (B3ReduceStrengthInternal::verbose) {
470 dataLog(
471 "Looking at ", *block, " #", m_index, ": ",
472 deepDump(m_proc, block->at(m_index)), "\n");
473 }
474 m_value = m_block->at(m_index);
475 m_value->performSubstitution();
476 reduceValueStrength();
477 if (m_proc.optLevel() >= 2)
478 replaceIfRedundant();
479 }
480 m_insertionSet.execute(m_block);
481 }
482
483 m_changedCFG |= m_blockInsertionSet.execute();
484 handleChangedCFGIfNecessary();
485
486 result |= m_changed;
487 } while (m_changed && m_proc.optLevel() >= 2);
488
489 if (m_proc.optLevel() < 2) {
490 m_changedCFG = false;
491 simplifyCFG();
492 handleChangedCFGIfNecessary();
493 }
494
495 return result;
496 }
497
498private:
499 void reduceValueStrength()
500 {
501 switch (m_value->opcode()) {
502 case Opaque:
503 // Turn this: Opaque(Opaque(value))
504 // Into this: Opaque(value)
505 if (m_value->child(0)->opcode() == Opaque) {
506 replaceWithIdentity(m_value->child(0));
507 break;
508 }
509 break;
510
511 case Add:
512 handleCommutativity();
513
514 if (m_value->child(0)->opcode() == Add && m_value->isInteger()) {
515 // Turn this: Add(Add(value, constant1), constant2)
516 // Into this: Add(value, constant1 + constant2)
517 Value* newSum = m_value->child(1)->addConstant(m_proc, m_value->child(0)->child(1));
518 if (newSum) {
519 m_insertionSet.insertValue(m_index, newSum);
520 m_value->child(0) = m_value->child(0)->child(0);
521 m_value->child(1) = newSum;
522 m_changed = true;
523 break;
524 }
525
526 // Turn this: Add(Add(value, constant), otherValue)
527 // Into this: Add(Add(value, otherValue), constant)
528 if (!m_value->child(1)->hasInt() && m_value->child(0)->child(1)->hasInt()) {
529 Value* value = m_value->child(0)->child(0);
530 Value* constant = m_value->child(0)->child(1);
531 Value* otherValue = m_value->child(1);
532 // This could create duplicate code if Add(value, constant) is used elsewhere.
533 // However, we already model adding a constant as if it was free in other places
534 // so let's just roll with it. The alternative would mean having to do good use
535 // counts, which reduceStrength() currently doesn't have.
536 m_value->child(0) =
537 m_insertionSet.insert<Value>(
538 m_index, Add, m_value->origin(), value, otherValue);
539 m_value->child(1) = constant;
540 m_changed = true;
541 break;
542 }
543 }
544
545 // Turn this: Add(otherValue, Add(value, constant))
546 // Into this: Add(Add(value, otherValue), constant)
547 if (m_value->isInteger()
548 && !m_value->child(0)->hasInt()
549 && m_value->child(1)->opcode() == Add
550 && m_value->child(1)->child(1)->hasInt()) {
551 Value* value = m_value->child(1)->child(0);
552 Value* constant = m_value->child(1)->child(1);
553 Value* otherValue = m_value->child(0);
554 // This creates a duplicate add. That's dangerous but probably fine, see above.
555 m_value->child(0) =
556 m_insertionSet.insert<Value>(
557 m_index, Add, m_value->origin(), value, otherValue);
558 m_value->child(1) = constant;
559 m_changed = true;
560 break;
561 }
562
563 // Turn this: Add(constant1, constant2)
564 // Into this: constant1 + constant2
565 if (Value* constantAdd = m_value->child(0)->addConstant(m_proc, m_value->child(1))) {
566 replaceWithNewValue(constantAdd);
567 break;
568 }
569
570 // Turn this: Integer Add(value, value)
571 // Into this: Shl(value, 1)
572 // This is a useful canonicalization. It's not meant to be a strength reduction.
573 if (m_value->isInteger() && m_value->child(0) == m_value->child(1)) {
574 replaceWithNewValue(
575 m_proc.add<Value>(
576 Shl, m_value->origin(), m_value->child(0),
577 m_insertionSet.insert<Const32Value>(m_index, m_value->origin(), 1)));
578 break;
579 }
580
581 // Turn this: Add(value, zero)
582 // Into an Identity.
583 //
584 // Addition is subtle with doubles. Zero is not the neutral value, negative zero is:
585 // 0 + 0 = 0
586 // 0 + -0 = 0
587 // -0 + 0 = 0
588 // -0 + -0 = -0
589 if (m_value->child(1)->isInt(0) || m_value->child(1)->isNegativeZero()) {
590 replaceWithIdentity(m_value->child(0));
591 break;
592 }
593
594 if (m_value->isInteger()) {
595 // Turn this: Integer Add(value, Neg(otherValue))
596 // Into this: Sub(value, otherValue)
597 if (m_value->child(1)->opcode() == Neg) {
598 replaceWithNew<Value>(Sub, m_value->origin(), m_value->child(0), m_value->child(1)->child(0));
599 break;
600 }
601
602 // Turn this: Integer Add(Neg(value), otherValue)
603 // Into this: Sub(otherValue, value)
604 if (m_value->child(0)->opcode() == Neg) {
605 replaceWithNew<Value>(Sub, m_value->origin(), m_value->child(1), m_value->child(0)->child(0));
606 break;
607 }
608
609 // Turn this: Integer Add(Sub(0, value), -1)
610 // Into this: BitXor(value, -1)
611 if (m_value->child(0)->opcode() == Sub
612 && m_value->child(1)->isInt(-1)
613 && m_value->child(0)->child(0)->isInt(0)) {
614 replaceWithNew<Value>(BitXor, m_value->origin(), m_value->child(0)->child(1), m_value->child(1));
615 break;
616 }
617
618 if (handleMulDistributivity())
619 break;
620 }
621
622 break;
623
624 case Sub:
625 // Turn this: Sub(constant1, constant2)
626 // Into this: constant1 - constant2
627 if (Value* constantSub = m_value->child(0)->subConstant(m_proc, m_value->child(1))) {
628 replaceWithNewValue(constantSub);
629 break;
630 }
631
632 if (m_value->isInteger()) {
633 // Turn this: Sub(value, constant)
634 // Into this: Add(value, -constant)
635 if (Value* negatedConstant = m_value->child(1)->negConstant(m_proc)) {
636 m_insertionSet.insertValue(m_index, negatedConstant);
637 replaceWithNew<Value>(
638 Add, m_value->origin(), m_value->child(0), negatedConstant);
639 break;
640 }
641
642 // Turn this: Sub(0, value)
643 // Into this: Neg(value)
644 if (m_value->child(0)->isInt(0)) {
645 replaceWithNew<Value>(Neg, m_value->origin(), m_value->child(1));
646 break;
647 }
648
649 // Turn this: Sub(value, value)
650 // Into this: 0
651 if (m_value->child(0) == m_value->child(1)) {
652 replaceWithNewValue(m_proc.addIntConstant(m_value, 0));
653 break;
654 }
655
656 // Turn this: Sub(value, Neg(otherValue))
657 // Into this: Add(value, otherValue)
658 if (m_value->child(1)->opcode() == Neg) {
659 replaceWithNew<Value>(Add, m_value->origin(), m_value->child(0), m_value->child(1)->child(0));
660 break;
661 }
662
663 // Turn this: Sub(Neg(value), value2)
664 // Into this: Neg(Add(value, value2))
665 if (m_value->child(0)->opcode() == Neg) {
666 replaceWithNew<Value>(Neg, m_value->origin(),
667 m_insertionSet.insert<Value>(m_index, Add, m_value->origin(), m_value->child(0)->child(0), m_value->child(1)));
668 break;
669 }
670
671 // Turn this: Sub(Sub(a, b), c)
672 // Into this: Sub(a, Add(b, c))
673 if (m_value->child(0)->opcode() == Sub) {
674 replaceWithNew<Value>(Sub, m_value->origin(), m_value->child(0)->child(0),
675 m_insertionSet.insert<Value>(m_index, Add, m_value->origin(), m_value->child(0)->child(1), m_value->child(1)));
676 break;
677 }
678
679 // Turn this: Sub(a, Sub(b, c))
680 // Into this: Add(Sub(a, b), c)
681 if (m_value->child(1)->opcode() == Sub) {
682 replaceWithNew<Value>(Add, m_value->origin(),
683 m_insertionSet.insert<Value>(m_index, Sub, m_value->origin(), m_value->child(0), m_value->child(1)->child(0)),
684 m_value->child(1)->child(1));
685 break;
686 }
687
688 // Turn this: Sub(Add(a, b), c)
689 // Into this: Add(a, Sub(b, c))
690 if (m_value->child(0)->opcode() == Add) {
691 replaceWithNew<Value>(Add, m_value->origin(), m_value->child(0)->child(0),
692 m_insertionSet.insert<Value>(m_index, Sub, m_value->origin(), m_value->child(0)->child(1), m_value->child(1)));
693 break;
694 }
695
696 if (handleMulDistributivity())
697 break;
698 }
699
700 break;
701
702 case Neg:
703 // Turn this: Neg(constant)
704 // Into this: -constant
705 if (Value* constant = m_value->child(0)->negConstant(m_proc)) {
706 replaceWithNewValue(constant);
707 break;
708 }
709
710 // Turn this: Neg(Neg(value))
711 // Into this: value
712 if (m_value->child(0)->opcode() == Neg) {
713 replaceWithIdentity(m_value->child(0)->child(0));
714 break;
715 }
716
717 if (m_value->isInteger()) {
718 // Turn this: Integer Neg(Sub(value, otherValue))
719 // Into this: Sub(otherValue, value)
720 if (m_value->child(0)->opcode() == Sub) {
721 replaceWithNew<Value>(Sub, m_value->origin(), m_value->child(0)->child(1), m_value->child(0)->child(0));
722 break;
723 }
724
725 // Turn this: Integer Neg(Mul(value, c))
726 // Into this: Mul(value, -c), as long as -c does not overflow
727 if (m_value->child(0)->opcode() == Mul && m_value->child(0)->child(1)->hasInt()) {
728 int64_t factor = m_value->child(0)->child(1)->asInt();
729 if (m_value->type() == Int32 && factor != std::numeric_limits<int32_t>::min()) {
730 Value* newFactor = m_insertionSet.insert<Const32Value>(m_index, m_value->child(0)->child(1)->origin(), -factor);
731 replaceWithNew<Value>(Mul, m_value->origin(), m_value->child(0)->child(0), newFactor);
732 } else if (m_value->type() == Int64 && factor != std::numeric_limits<int64_t>::min()) {
733 Value* newFactor = m_insertionSet.insert<Const64Value>(m_index, m_value->child(0)->child(1)->origin(), -factor);
734 replaceWithNew<Value>(Mul, m_value->origin(), m_value->child(0)->child(0), newFactor);
735 }
736 }
737 }
738
739
740 break;
741
742 case Mul:
743 handleCommutativity();
744
745 // Turn this: Mul(constant1, constant2)
746 // Into this: constant1 * constant2
747 if (Value* value = m_value->child(0)->mulConstant(m_proc, m_value->child(1))) {
748 replaceWithNewValue(value);
749 break;
750 }
751
752 if (m_value->child(1)->hasInt()) {
753 int64_t factor = m_value->child(1)->asInt();
754
755 // Turn this: Mul(value, 0)
756 // Into this: 0
757 // Note that we don't do this for doubles because that's wrong. For example, -1 * 0
758 // and 1 * 0 yield different results.
759 if (!factor) {
760 replaceWithIdentity(m_value->child(1));
761 break;
762 }
763
764 // Turn this: Mul(value, 1)
765 // Into this: value
766 if (factor == 1) {
767 replaceWithIdentity(m_value->child(0));
768 break;
769 }
770
771 // Turn this: Mul(value, -1)
772 // Into this: Neg(value)
773 if (factor == -1) {
774 replaceWithNew<Value>(Neg, m_value->origin(), m_value->child(0));
775 break;
776 }
777
778 // Turn this: Mul(value, constant)
779 // Into this: Shl(value, log2(constant))
780 if (hasOneBitSet(factor)) {
781 unsigned shiftAmount = WTF::fastLog2(static_cast<uint64_t>(factor));
782 replaceWithNewValue(
783 m_proc.add<Value>(
784 Shl, m_value->origin(), m_value->child(0),
785 m_insertionSet.insert<Const32Value>(
786 m_index, m_value->origin(), shiftAmount)));
787 break;
788 }
789 } else if (m_value->child(1)->hasDouble()) {
790 double factor = m_value->child(1)->asDouble();
791
792 // Turn this: Mul(value, 1)
793 // Into this: value
794 if (factor == 1) {
795 replaceWithIdentity(m_value->child(0));
796 break;
797 }
798 }
799
800 if (m_value->isInteger()) {
801 // Turn this: Integer Mul(value, Neg(otherValue))
802 // Into this: Neg(Mul(value, otherValue))
803 if (m_value->child(1)->opcode() == Neg) {
804 Value* newMul = m_insertionSet.insert<Value>(m_index, Mul, m_value->origin(), m_value->child(0), m_value->child(1)->child(0));
805 replaceWithNew<Value>(Neg, m_value->origin(), newMul);
806 break;
807 }
808 // Turn this: Integer Mul(Neg(value), otherValue)
809 // Into this: Neg(Mul(value, value2))
810 if (m_value->child(0)->opcode() == Neg) {
811 Value* newMul = m_insertionSet.insert<Value>(m_index, Mul, m_value->origin(), m_value->child(0)->child(0), m_value->child(1));
812 replaceWithNew<Value>(Neg, m_value->origin(), newMul);
813 break;
814 }
815 }
816
817 break;
818
819 case Div:
820 // Turn this: Div(constant1, constant2)
821 // Into this: constant1 / constant2
822 // Note that this uses Div<Chill> semantics. That's fine, because the rules for Div
823 // are strictly weaker: it has corner cases where it's allowed to do anything it
824 // likes.
825 if (replaceWithNewValue(m_value->child(0)->divConstant(m_proc, m_value->child(1))))
826 break;
827
828 if (m_value->child(1)->hasInt()) {
829 switch (m_value->child(1)->asInt()) {
830 case -1:
831 // Turn this: Div(value, -1)
832 // Into this: Neg(value)
833 replaceWithNewValue(
834 m_proc.add<Value>(Neg, m_value->origin(), m_value->child(0)));
835 break;
836
837 case 0:
838 // Turn this: Div(value, 0)
839 // Into this: 0
840 // We can do this because it's precisely correct for ChillDiv and for Div we
841 // are allowed to do whatever we want.
842 replaceWithIdentity(m_value->child(1));
843 break;
844
845 case 1:
846 // Turn this: Div(value, 1)
847 // Into this: value
848 replaceWithIdentity(m_value->child(0));
849 break;
850
851 default:
852 // Perform super comprehensive strength reduction of division. Currently we
853 // only do this for 32-bit divisions, since we need a high multiply
854 // operation. We emulate it using 64-bit multiply. We can't emulate 64-bit
855 // high multiply with a 128-bit multiply because we don't have a 128-bit
856 // multiply. We could do it with a patchpoint if we cared badly enough.
857
858 if (m_value->type() != Int32)
859 break;
860
861 if (m_proc.optLevel() < 2)
862 break;
863
864 int32_t divisor = m_value->child(1)->asInt32();
865 DivisionMagic<int32_t> magic = computeDivisionMagic(divisor);
866
867 // Perform the "high" multiplication. We do it just to get the high bits.
868 // This is sort of like multiplying by the reciprocal, just more gnarly. It's
869 // from Hacker's Delight and I don't claim to understand it.
870 Value* magicQuotient = m_insertionSet.insert<Value>(
871 m_index, Trunc, m_value->origin(),
872 m_insertionSet.insert<Value>(
873 m_index, ZShr, m_value->origin(),
874 m_insertionSet.insert<Value>(
875 m_index, Mul, m_value->origin(),
876 m_insertionSet.insert<Value>(
877 m_index, SExt32, m_value->origin(), m_value->child(0)),
878 m_insertionSet.insert<Const64Value>(
879 m_index, m_value->origin(), magic.magicMultiplier)),
880 m_insertionSet.insert<Const32Value>(
881 m_index, m_value->origin(), 32)));
882
883 if (divisor > 0 && magic.magicMultiplier < 0) {
884 magicQuotient = m_insertionSet.insert<Value>(
885 m_index, Add, m_value->origin(), magicQuotient, m_value->child(0));
886 }
887 if (divisor < 0 && magic.magicMultiplier > 0) {
888 magicQuotient = m_insertionSet.insert<Value>(
889 m_index, Sub, m_value->origin(), magicQuotient, m_value->child(0));
890 }
891 if (magic.shift > 0) {
892 magicQuotient = m_insertionSet.insert<Value>(
893 m_index, SShr, m_value->origin(), magicQuotient,
894 m_insertionSet.insert<Const32Value>(
895 m_index, m_value->origin(), magic.shift));
896 }
897 replaceWithIdentity(
898 m_insertionSet.insert<Value>(
899 m_index, Add, m_value->origin(), magicQuotient,
900 m_insertionSet.insert<Value>(
901 m_index, ZShr, m_value->origin(), magicQuotient,
902 m_insertionSet.insert<Const32Value>(
903 m_index, m_value->origin(), 31))));
904 break;
905 }
906 break;
907 }
908 break;
909
910 case UDiv:
911 // Turn this: UDiv(constant1, constant2)
912 // Into this: constant1 / constant2
913 if (replaceWithNewValue(m_value->child(0)->uDivConstant(m_proc, m_value->child(1))))
914 break;
915
916 if (m_value->child(1)->hasInt()) {
917 switch (m_value->child(1)->asInt()) {
918 case 0:
919 // Turn this: UDiv(value, 0)
920 // Into this: 0
921 // We can do whatever we want here so we might as well do the chill thing,
922 // in case we add chill versions of UDiv in the future.
923 replaceWithIdentity(m_value->child(1));
924 break;
925
926 case 1:
927 // Turn this: UDiv(value, 1)
928 // Into this: value
929 replaceWithIdentity(m_value->child(0));
930 break;
931 default:
932 // FIXME: We should do comprehensive strength reduction for unsigned numbers. Likely,
933 // we will just want copy what llvm does. https://bugs.webkit.org/show_bug.cgi?id=164809
934 break;
935 }
936 }
937 break;
938
939 case Mod:
940 // Turn this: Mod(constant1, constant2)
941 // Into this: constant1 / constant2
942 // Note that this uses Mod<Chill> semantics.
943 if (replaceWithNewValue(m_value->child(0)->modConstant(m_proc, m_value->child(1))))
944 break;
945
946 // Modulo by constant is more efficient if we turn it into Div, and then let Div get
947 // optimized.
948 if (m_value->child(1)->hasInt()) {
949 switch (m_value->child(1)->asInt()) {
950 case 0:
951 // Turn this: Mod(value, 0)
952 // Into this: 0
953 // This is correct according to ChillMod semantics.
954 replaceWithIdentity(m_value->child(1));
955 break;
956
957 default:
958 if (m_proc.optLevel() < 2)
959 break;
960
961 // Turn this: Mod(N, D)
962 // Into this: Sub(N, Mul(Div(N, D), D))
963 //
964 // This is a speed-up because we use our existing Div optimizations.
965 //
966 // Here's an easier way to look at it:
967 // N % D = N - N / D * D
968 //
969 // Note that this does not work for D = 0 and ChillMod. The expected result is 0.
970 // That's why we have a special-case above.
971 // X % 0 = X - X / 0 * 0 = X (should be 0)
972 //
973 // This does work for the D = -1 special case.
974 // -2^31 % -1 = -2^31 - -2^31 / -1 * -1
975 // = -2^31 - -2^31 * -1
976 // = -2^31 - -2^31
977 // = 0
978
979 Kind divKind = Div;
980 divKind.setIsChill(m_value->isChill());
981
982 replaceWithIdentity(
983 m_insertionSet.insert<Value>(
984 m_index, Sub, m_value->origin(),
985 m_value->child(0),
986 m_insertionSet.insert<Value>(
987 m_index, Mul, m_value->origin(),
988 m_insertionSet.insert<Value>(
989 m_index, divKind, m_value->origin(),
990 m_value->child(0), m_value->child(1)),
991 m_value->child(1))));
992 break;
993 }
994 break;
995 }
996
997 break;
998
999 case UMod:
1000 // Turn this: UMod(constant1, constant2)
1001 // Into this: constant1 / constant2
1002 replaceWithNewValue(m_value->child(0)->uModConstant(m_proc, m_value->child(1)));
1003 // FIXME: We should do what we do for Mod since the same principle applies here.
1004 // https://bugs.webkit.org/show_bug.cgi?id=164809
1005 break;
1006
1007 case BitAnd:
1008 handleCommutativity();
1009
1010 // Turn this: BitAnd(constant1, constant2)
1011 // Into this: constant1 & constant2
1012 if (Value* constantBitAnd = m_value->child(0)->bitAndConstant(m_proc, m_value->child(1))) {
1013 replaceWithNewValue(constantBitAnd);
1014 break;
1015 }
1016
1017 // Turn this: BitAnd(BitAnd(value, constant1), constant2)
1018 // Into this: BitAnd(value, constant1 & constant2).
1019 if (m_value->child(0)->opcode() == BitAnd) {
1020 Value* newConstant = m_value->child(1)->bitAndConstant(m_proc, m_value->child(0)->child(1));
1021 if (newConstant) {
1022 m_insertionSet.insertValue(m_index, newConstant);
1023 m_value->child(0) = m_value->child(0)->child(0);
1024 m_value->child(1) = newConstant;
1025 m_changed = true;
1026 }
1027 }
1028
1029 // Turn this: BitAnd(valueX, valueX)
1030 // Into this: valueX.
1031 if (m_value->child(0) == m_value->child(1)) {
1032 replaceWithIdentity(m_value->child(0));
1033 break;
1034 }
1035
1036 // Turn this: BitAnd(value, zero-constant)
1037 // Into this: zero-constant.
1038 if (m_value->child(1)->isInt(0)) {
1039 replaceWithIdentity(m_value->child(1));
1040 break;
1041 }
1042
1043 // Turn this: BitAnd(value, all-ones)
1044 // Into this: value.
1045 if ((m_value->type() == Int64 && m_value->child(1)->isInt64(std::numeric_limits<uint64_t>::max()))
1046 || (m_value->type() == Int32 && m_value->child(1)->isInt32(std::numeric_limits<uint32_t>::max()))) {
1047 replaceWithIdentity(m_value->child(0));
1048 break;
1049 }
1050
1051 // Turn this: BitAnd(64-bit value, 32 ones)
1052 // Into this: ZExt32(Trunc(64-bit value))
1053 if (m_value->child(1)->isInt64(0xffffffffllu)) {
1054 Value* newValue = m_insertionSet.insert<Value>(
1055 m_index, ZExt32, m_value->origin(),
1056 m_insertionSet.insert<Value>(m_index, Trunc, m_value->origin(), m_value->child(0)));
1057 replaceWithIdentity(newValue);
1058 break;
1059 }
1060
1061 // Turn this: BitAnd(SExt8(value), mask) where (mask & 0xffffff00) == 0
1062 // Into this: BitAnd(value, mask)
1063 if (m_value->child(0)->opcode() == SExt8 && m_value->child(1)->hasInt32()
1064 && !(m_value->child(1)->asInt32() & 0xffffff00)) {
1065 m_value->child(0) = m_value->child(0)->child(0);
1066 m_changed = true;
1067 break;
1068 }
1069
1070 // Turn this: BitAnd(SExt16(value), mask) where (mask & 0xffff0000) == 0
1071 // Into this: BitAnd(value, mask)
1072 if (m_value->child(0)->opcode() == SExt16 && m_value->child(1)->hasInt32()
1073 && !(m_value->child(1)->asInt32() & 0xffff0000)) {
1074 m_value->child(0) = m_value->child(0)->child(0);
1075 m_changed = true;
1076 break;
1077 }
1078
1079 // Turn this: BitAnd(SExt32(value), mask) where (mask & 0xffffffff00000000) == 0
1080 // Into this: BitAnd(ZExt32(value), mask)
1081 if (m_value->child(0)->opcode() == SExt32 && m_value->child(1)->hasInt32()
1082 && !(m_value->child(1)->asInt32() & 0xffffffff00000000llu)) {
1083 m_value->child(0) = m_insertionSet.insert<Value>(
1084 m_index, ZExt32, m_value->origin(),
1085 m_value->child(0)->child(0), m_value->child(0)->child(1));
1086 m_changed = true;
1087 break;
1088 }
1089
1090 // Turn this: BitAnd(Op(value, constant1), constant2)
1091 // where !(constant1 & constant2)
1092 // and Op is BitOr or BitXor
1093 // into this: BitAnd(value, constant2)
1094 if (m_value->child(1)->hasInt()) {
1095 bool replaced = false;
1096 int64_t constant2 = m_value->child(1)->asInt();
1097 switch (m_value->child(0)->opcode()) {
1098 case BitOr:
1099 case BitXor:
1100 if (m_value->child(0)->child(1)->hasInt()
1101 && !(m_value->child(0)->child(1)->asInt() & constant2)) {
1102 m_value->child(0) = m_value->child(0)->child(0);
1103 m_changed = true;
1104 replaced = true;
1105 break;
1106 }
1107 break;
1108 default:
1109 break;
1110 }
1111 if (replaced)
1112 break;
1113 }
1114
1115 // Turn this: BitAnd(BitXor(x1, allOnes), BitXor(x2, allOnes)
1116 // Into this: BitXor(BitOr(x1, x2), allOnes)
1117 // By applying De Morgan laws
1118 if (m_value->child(0)->opcode() == BitXor
1119 && m_value->child(1)->opcode() == BitXor
1120 && ((m_value->type() == Int64
1121 && m_value->child(0)->child(1)->isInt64(std::numeric_limits<uint64_t>::max())
1122 && m_value->child(1)->child(1)->isInt64(std::numeric_limits<uint64_t>::max()))
1123 || (m_value->type() == Int32
1124 && m_value->child(0)->child(1)->isInt32(std::numeric_limits<uint32_t>::max())
1125 && m_value->child(1)->child(1)->isInt32(std::numeric_limits<uint32_t>::max())))) {
1126 Value* bitOr = m_insertionSet.insert<Value>(m_index, BitOr, m_value->origin(), m_value->child(0)->child(0), m_value->child(1)->child(0));
1127 replaceWithNew<Value>(BitXor, m_value->origin(), bitOr, m_value->child(1)->child(1));
1128 break;
1129 }
1130
1131 // Turn this: BitAnd(BitXor(x, allOnes), c)
1132 // Into this: BitXor(BitOr(x, ~c), allOnes)
1133 // This is a variation on the previous optimization, treating c as if it were BitXor(~c, allOnes)
1134 // It does not reduce the number of operations, but provides some normalization (we try to get BitXor by allOnes at the outermost point), and some chance to float Xors to a place where they might get eliminated.
1135 if (m_value->child(0)->opcode() == BitXor
1136 && m_value->child(1)->hasInt()
1137 && ((m_value->type() == Int64
1138 && m_value->child(0)->child(1)->isInt64(std::numeric_limits<uint64_t>::max()))
1139 || (m_value->type() == Int32
1140 && m_value->child(0)->child(1)->isInt32(std::numeric_limits<uint32_t>::max())))) {
1141 Value* newConstant = m_value->child(1)->bitXorConstant(m_proc, m_value->child(0)->child(1));
1142 ASSERT(newConstant);
1143 m_insertionSet.insertValue(m_index, newConstant);
1144 Value* bitOr = m_insertionSet.insert<Value>(m_index, BitOr, m_value->origin(), m_value->child(0)->child(0), newConstant);
1145 replaceWithNew<Value>(BitXor, m_value->origin(), bitOr, m_value->child(0)->child(1));
1146 break;
1147 }
1148
1149 break;
1150
1151 case BitOr:
1152 handleCommutativity();
1153
1154 // Turn this: BitOr(constant1, constant2)
1155 // Into this: constant1 | constant2
1156 if (Value* constantBitOr = m_value->child(0)->bitOrConstant(m_proc, m_value->child(1))) {
1157 replaceWithNewValue(constantBitOr);
1158 break;
1159 }
1160
1161 // Turn this: BitOr(BitOr(value, constant1), constant2)
1162 // Into this: BitOr(value, constant1 & constant2).
1163 if (m_value->child(0)->opcode() == BitOr) {
1164 Value* newConstant = m_value->child(1)->bitOrConstant(m_proc, m_value->child(0)->child(1));
1165 if (newConstant) {
1166 m_insertionSet.insertValue(m_index, newConstant);
1167 m_value->child(0) = m_value->child(0)->child(0);
1168 m_value->child(1) = newConstant;
1169 m_changed = true;
1170 }
1171 }
1172
1173 // Turn this: BitOr(valueX, valueX)
1174 // Into this: valueX.
1175 if (m_value->child(0) == m_value->child(1)) {
1176 replaceWithIdentity(m_value->child(0));
1177 break;
1178 }
1179
1180 // Turn this: BitOr(value, zero-constant)
1181 // Into this: value.
1182 if (m_value->child(1)->isInt(0)) {
1183 replaceWithIdentity(m_value->child(0));
1184 break;
1185 }
1186
1187 // Turn this: BitOr(value, all-ones)
1188 // Into this: all-ones.
1189 if ((m_value->type() == Int64 && m_value->child(1)->isInt64(std::numeric_limits<uint64_t>::max()))
1190 || (m_value->type() == Int32 && m_value->child(1)->isInt32(std::numeric_limits<uint32_t>::max()))) {
1191 replaceWithIdentity(m_value->child(1));
1192 break;
1193 }
1194
1195 // Turn this: BitOr(BitXor(x1, allOnes), BitXor(x2, allOnes)
1196 // Into this: BitXor(BitAnd(x1, x2), allOnes)
1197 // By applying De Morgan laws
1198 if (m_value->child(0)->opcode() == BitXor
1199 && m_value->child(1)->opcode() == BitXor
1200 && ((m_value->type() == Int64
1201 && m_value->child(0)->child(1)->isInt64(std::numeric_limits<uint64_t>::max())
1202 && m_value->child(1)->child(1)->isInt64(std::numeric_limits<uint64_t>::max()))
1203 || (m_value->type() == Int32
1204 && m_value->child(0)->child(1)->isInt32(std::numeric_limits<uint32_t>::max())
1205 && m_value->child(1)->child(1)->isInt32(std::numeric_limits<uint32_t>::max())))) {
1206 Value* bitAnd = m_insertionSet.insert<Value>(m_index, BitAnd, m_value->origin(), m_value->child(0)->child(0), m_value->child(1)->child(0));
1207 replaceWithNew<Value>(BitXor, m_value->origin(), bitAnd, m_value->child(1)->child(1));
1208 break;
1209 }
1210
1211 // Turn this: BitOr(BitXor(x, allOnes), c)
1212 // Into this: BitXor(BitAnd(x, ~c), allOnes)
1213 // This is a variation on the previous optimization, treating c as if it were BitXor(~c, allOnes)
1214 // It does not reduce the number of operations, but provides some normalization (we try to get BitXor by allOnes at the outermost point), and some chance to float Xors to a place where they might get eliminated.
1215 if (m_value->child(0)->opcode() == BitXor
1216 && m_value->child(1)->hasInt()
1217 && ((m_value->type() == Int64
1218 && m_value->child(0)->child(1)->isInt64(std::numeric_limits<uint64_t>::max()))
1219 || (m_value->type() == Int32
1220 && m_value->child(0)->child(1)->isInt32(std::numeric_limits<uint32_t>::max())))) {
1221 Value* newConstant = m_value->child(1)->bitXorConstant(m_proc, m_value->child(0)->child(1));
1222 ASSERT(newConstant);
1223 m_insertionSet.insertValue(m_index, newConstant);
1224 Value* bitAnd = m_insertionSet.insert<Value>(m_index, BitAnd, m_value->origin(), m_value->child(0)->child(0), newConstant);
1225 replaceWithNew<Value>(BitXor, m_value->origin(), bitAnd, m_value->child(0)->child(1));
1226 break;
1227 }
1228
1229 if (handleBitAndDistributivity())
1230 break;
1231
1232 break;
1233
1234 case BitXor:
1235 handleCommutativity();
1236
1237 // Turn this: BitXor(constant1, constant2)
1238 // Into this: constant1 ^ constant2
1239 if (Value* constantBitXor = m_value->child(0)->bitXorConstant(m_proc, m_value->child(1))) {
1240 replaceWithNewValue(constantBitXor);
1241 break;
1242 }
1243
1244 // Turn this: BitXor(BitXor(value, constant1), constant2)
1245 // Into this: BitXor(value, constant1 ^ constant2).
1246 if (m_value->child(0)->opcode() == BitXor) {
1247 Value* newConstant = m_value->child(1)->bitXorConstant(m_proc, m_value->child(0)->child(1));
1248 if (newConstant) {
1249 m_insertionSet.insertValue(m_index, newConstant);
1250 m_value->child(0) = m_value->child(0)->child(0);
1251 m_value->child(1) = newConstant;
1252 m_changed = true;
1253 }
1254 }
1255
1256 // Turn this: BitXor(compare, 1)
1257 // Into this: invertedCompare
1258 if (m_value->child(1)->isInt32(1)) {
1259 if (Value* invertedCompare = m_value->child(0)->invertedCompare(m_proc)) {
1260 replaceWithNewValue(invertedCompare);
1261 break;
1262 }
1263 }
1264
1265 // Turn this: BitXor(valueX, valueX)
1266 // Into this: zero-constant.
1267 if (m_value->child(0) == m_value->child(1)) {
1268 replaceWithNewValue(m_proc.addIntConstant(m_value, 0));
1269 break;
1270 }
1271
1272 // Turn this: BitXor(value, zero-constant)
1273 // Into this: value.
1274 if (m_value->child(1)->isInt(0)) {
1275 replaceWithIdentity(m_value->child(0));
1276 break;
1277 }
1278
1279 if (handleBitAndDistributivity())
1280 break;
1281
1282 break;
1283
1284 case Shl:
1285 // Turn this: Shl(constant1, constant2)
1286 // Into this: constant1 << constant2
1287 if (Value* constant = m_value->child(0)->shlConstant(m_proc, m_value->child(1))) {
1288 replaceWithNewValue(constant);
1289 break;
1290 }
1291
1292 // Turn this: Shl(<S|Z>Shr(@x, @const), @const)
1293 // Into this: BitAnd(@x, -(1<<@const))
1294 if ((m_value->child(0)->opcode() == SShr || m_value->child(0)->opcode() == ZShr)
1295 && m_value->child(0)->child(1)->hasInt()
1296 && m_value->child(1)->hasInt()
1297 && m_value->child(0)->child(1)->asInt() == m_value->child(1)->asInt()) {
1298 int shiftAmount = m_value->child(1)->asInt() & (m_value->type() == Int32 ? 31 : 63);
1299 Value* newConst = m_proc.addIntConstant(m_value, - static_cast<int64_t>(1ull << shiftAmount));
1300 m_insertionSet.insertValue(m_index, newConst);
1301 replaceWithNew<Value>(BitAnd, m_value->origin(), m_value->child(0)->child(0), newConst);
1302 break;
1303 }
1304
1305 handleShiftAmount();
1306 break;
1307
1308 case SShr:
1309 // Turn this: SShr(constant1, constant2)
1310 // Into this: constant1 >> constant2
1311 if (Value* constant = m_value->child(0)->sShrConstant(m_proc, m_value->child(1))) {
1312 replaceWithNewValue(constant);
1313 break;
1314 }
1315
1316 if (m_value->child(1)->hasInt32()
1317 && m_value->child(0)->opcode() == Shl
1318 && m_value->child(0)->child(1)->hasInt32()
1319 && m_value->child(1)->asInt32() == m_value->child(0)->child(1)->asInt32()) {
1320 switch (m_value->child(1)->asInt32()) {
1321 case 16:
1322 if (m_value->type() == Int32) {
1323 // Turn this: SShr(Shl(value, 16), 16)
1324 // Into this: SExt16(value)
1325 replaceWithNewValue(
1326 m_proc.add<Value>(
1327 SExt16, m_value->origin(), m_value->child(0)->child(0)));
1328 }
1329 break;
1330
1331 case 24:
1332 if (m_value->type() == Int32) {
1333 // Turn this: SShr(Shl(value, 24), 24)
1334 // Into this: SExt8(value)
1335 replaceWithNewValue(
1336 m_proc.add<Value>(
1337 SExt8, m_value->origin(), m_value->child(0)->child(0)));
1338 }
1339 break;
1340
1341 case 32:
1342 if (m_value->type() == Int64) {
1343 // Turn this: SShr(Shl(value, 32), 32)
1344 // Into this: SExt32(Trunc(value))
1345 replaceWithNewValue(
1346 m_proc.add<Value>(
1347 SExt32, m_value->origin(),
1348 m_insertionSet.insert<Value>(
1349 m_index, Trunc, m_value->origin(),
1350 m_value->child(0)->child(0))));
1351 }
1352 break;
1353
1354 // FIXME: Add cases for 48 and 56, but that would translate to SExt32(SExt8) or
1355 // SExt32(SExt16), which we don't currently lower efficiently.
1356
1357 default:
1358 break;
1359 }
1360
1361 if (m_value->opcode() != SShr)
1362 break;
1363 }
1364
1365 handleShiftAmount();
1366 break;
1367
1368 case ZShr:
1369 // Turn this: ZShr(constant1, constant2)
1370 // Into this: (unsigned)constant1 >> constant2
1371 if (Value* constant = m_value->child(0)->zShrConstant(m_proc, m_value->child(1))) {
1372 replaceWithNewValue(constant);
1373 break;
1374 }
1375
1376 handleShiftAmount();
1377 break;
1378
1379 case RotR:
1380 // Turn this: RotR(constant1, constant2)
1381 // Into this: (constant1 >> constant2) | (constant1 << sizeof(constant1) * 8 - constant2)
1382 if (Value* constant = m_value->child(0)->rotRConstant(m_proc, m_value->child(1))) {
1383 replaceWithNewValue(constant);
1384 break;
1385 }
1386
1387 handleShiftAmount();
1388 break;
1389
1390 case RotL:
1391 // Turn this: RotL(constant1, constant2)
1392 // Into this: (constant1 << constant2) | (constant1 >> sizeof(constant1) * 8 - constant2)
1393 if (Value* constant = m_value->child(0)->rotLConstant(m_proc, m_value->child(1))) {
1394 replaceWithNewValue(constant);
1395 break;
1396 }
1397
1398 handleShiftAmount();
1399 break;
1400
1401 case Abs:
1402 // Turn this: Abs(constant)
1403 // Into this: fabs<value->type()>(constant)
1404 if (Value* constant = m_value->child(0)->absConstant(m_proc)) {
1405 replaceWithNewValue(constant);
1406 break;
1407 }
1408
1409 // Turn this: Abs(Abs(value))
1410 // Into this: Abs(value)
1411 if (m_value->child(0)->opcode() == Abs) {
1412 replaceWithIdentity(m_value->child(0));
1413 break;
1414 }
1415
1416 // Turn this: Abs(Neg(value))
1417 // Into this: Abs(value)
1418 if (m_value->child(0)->opcode() == Neg) {
1419 m_value->child(0) = m_value->child(0)->child(0);
1420 m_changed = true;
1421 break;
1422 }
1423
1424 // Turn this: Abs(BitwiseCast(value))
1425 // Into this: BitwiseCast(And(value, mask-top-bit))
1426 if (m_value->child(0)->opcode() == BitwiseCast) {
1427 Value* mask;
1428 if (m_value->type() == Double)
1429 mask = m_insertionSet.insert<Const64Value>(m_index, m_value->origin(), ~(1ll << 63));
1430 else
1431 mask = m_insertionSet.insert<Const32Value>(m_index, m_value->origin(), ~(1l << 31));
1432
1433 Value* bitAnd = m_insertionSet.insert<Value>(m_index, BitAnd, m_value->origin(),
1434 m_value->child(0)->child(0),
1435 mask);
1436 Value* cast = m_insertionSet.insert<Value>(m_index, BitwiseCast, m_value->origin(), bitAnd);
1437 replaceWithIdentity(cast);
1438 break;
1439 }
1440 break;
1441
1442 case Ceil:
1443 // Turn this: Ceil(constant)
1444 // Into this: ceil<value->type()>(constant)
1445 if (Value* constant = m_value->child(0)->ceilConstant(m_proc)) {
1446 replaceWithNewValue(constant);
1447 break;
1448 }
1449
1450 // Turn this: Ceil(roundedValue)
1451 // Into this: roundedValue
1452 if (m_value->child(0)->isRounded()) {
1453 replaceWithIdentity(m_value->child(0));
1454 break;
1455 }
1456 break;
1457
1458 case Floor:
1459 // Turn this: Floor(constant)
1460 // Into this: floor<value->type()>(constant)
1461 if (Value* constant = m_value->child(0)->floorConstant(m_proc)) {
1462 replaceWithNewValue(constant);
1463 break;
1464 }
1465
1466 // Turn this: Floor(roundedValue)
1467 // Into this: roundedValue
1468 if (m_value->child(0)->isRounded()) {
1469 replaceWithIdentity(m_value->child(0));
1470 break;
1471 }
1472 break;
1473
1474 case Sqrt:
1475 // Turn this: Sqrt(constant)
1476 // Into this: sqrt<value->type()>(constant)
1477 if (Value* constant = m_value->child(0)->sqrtConstant(m_proc)) {
1478 replaceWithNewValue(constant);
1479 break;
1480 }
1481 break;
1482
1483 case BitwiseCast:
1484 // Turn this: BitwiseCast(constant)
1485 // Into this: bitwise_cast<value->type()>(constant)
1486 if (Value* constant = m_value->child(0)->bitwiseCastConstant(m_proc)) {
1487 replaceWithNewValue(constant);
1488 break;
1489 }
1490
1491 // Turn this: BitwiseCast(BitwiseCast(value))
1492 // Into this: value
1493 if (m_value->child(0)->opcode() == BitwiseCast) {
1494 replaceWithIdentity(m_value->child(0)->child(0));
1495 break;
1496 }
1497 break;
1498
1499 case SExt8:
1500 // Turn this: SExt8(constant)
1501 // Into this: static_cast<int8_t>(constant)
1502 if (m_value->child(0)->hasInt32()) {
1503 int32_t result = static_cast<int8_t>(m_value->child(0)->asInt32());
1504 replaceWithNewValue(m_proc.addIntConstant(m_value, result));
1505 break;
1506 }
1507
1508 // Turn this: SExt8(SExt8(value))
1509 // or this: SExt8(SExt16(value))
1510 // Into this: SExt8(value)
1511 if (m_value->child(0)->opcode() == SExt8 || m_value->child(0)->opcode() == SExt16) {
1512 m_value->child(0) = m_value->child(0)->child(0);
1513 m_changed = true;
1514 }
1515
1516 if (m_value->child(0)->opcode() == BitAnd && m_value->child(0)->child(1)->hasInt32()) {
1517 Value* input = m_value->child(0)->child(0);
1518 int32_t mask = m_value->child(0)->child(1)->asInt32();
1519
1520 // Turn this: SExt8(BitAnd(input, mask)) where (mask & 0xff) == 0xff
1521 // Into this: SExt8(input)
1522 if ((mask & 0xff) == 0xff) {
1523 m_value->child(0) = input;
1524 m_changed = true;
1525 break;
1526 }
1527
1528 // Turn this: SExt8(BitAnd(input, mask)) where (mask & 0x80) == 0
1529 // Into this: BitAnd(input, const & 0x7f)
1530 if (!(mask & 0x80)) {
1531 replaceWithNewValue(
1532 m_proc.add<Value>(
1533 BitAnd, m_value->origin(), input,
1534 m_insertionSet.insert<Const32Value>(
1535 m_index, m_value->origin(), mask & 0x7f)));
1536 break;
1537 }
1538 }
1539
1540 if (!m_proc.hasQuirks()) {
1541 // Turn this: SExt8(AtomicXchg___)
1542 // Into this: AtomicXchg___
1543 if (isAtomicXchg(m_value->child(0)->opcode())
1544 && m_value->child(0)->as<AtomicValue>()->accessWidth() == Width8) {
1545 replaceWithIdentity(m_value->child(0));
1546 break;
1547 }
1548 }
1549 break;
1550
1551 case SExt16:
1552 // Turn this: SExt16(constant)
1553 // Into this: static_cast<int16_t>(constant)
1554 if (m_value->child(0)->hasInt32()) {
1555 int32_t result = static_cast<int16_t>(m_value->child(0)->asInt32());
1556 replaceWithNewValue(m_proc.addIntConstant(m_value, result));
1557 break;
1558 }
1559
1560 // Turn this: SExt16(SExt16(value))
1561 // Into this: SExt16(value)
1562 if (m_value->child(0)->opcode() == SExt16) {
1563 m_value->child(0) = m_value->child(0)->child(0);
1564 m_changed = true;
1565 }
1566
1567 // Turn this: SExt16(SExt8(value))
1568 // Into this: SExt8(value)
1569 if (m_value->child(0)->opcode() == SExt8) {
1570 replaceWithIdentity(m_value->child(0));
1571 break;
1572 }
1573
1574 if (m_value->child(0)->opcode() == BitAnd && m_value->child(0)->child(1)->hasInt32()) {
1575 Value* input = m_value->child(0)->child(0);
1576 int32_t mask = m_value->child(0)->child(1)->asInt32();
1577
1578 // Turn this: SExt16(BitAnd(input, mask)) where (mask & 0xffff) == 0xffff
1579 // Into this: SExt16(input)
1580 if ((mask & 0xffff) == 0xffff) {
1581 m_value->child(0) = input;
1582 m_changed = true;
1583 break;
1584 }
1585
1586 // Turn this: SExt16(BitAnd(input, mask)) where (mask & 0x8000) == 0
1587 // Into this: BitAnd(input, const & 0x7fff)
1588 if (!(mask & 0x8000)) {
1589 replaceWithNewValue(
1590 m_proc.add<Value>(
1591 BitAnd, m_value->origin(), input,
1592 m_insertionSet.insert<Const32Value>(
1593 m_index, m_value->origin(), mask & 0x7fff)));
1594 break;
1595 }
1596 }
1597
1598 if (!m_proc.hasQuirks()) {
1599 // Turn this: SExt16(AtomicXchg___)
1600 // Into this: AtomicXchg___
1601 if (isAtomicXchg(m_value->child(0)->opcode())
1602 && m_value->child(0)->as<AtomicValue>()->accessWidth() == Width16) {
1603 replaceWithIdentity(m_value->child(0));
1604 break;
1605 }
1606 }
1607 break;
1608
1609 case SExt32:
1610 // Turn this: SExt32(constant)
1611 // Into this: static_cast<int64_t>(constant)
1612 if (m_value->child(0)->hasInt32()) {
1613 replaceWithNewValue(m_proc.addIntConstant(m_value, m_value->child(0)->asInt32()));
1614 break;
1615 }
1616
1617 // Turn this: SExt32(BitAnd(input, mask)) where (mask & 0x80000000) == 0
1618 // Into this: ZExt32(BitAnd(input, mask))
1619 if (m_value->child(0)->opcode() == BitAnd && m_value->child(0)->child(1)->hasInt32()
1620 && !(m_value->child(0)->child(1)->asInt32() & 0x80000000)) {
1621 replaceWithNewValue(
1622 m_proc.add<Value>(
1623 ZExt32, m_value->origin(), m_value->child(0)));
1624 break;
1625 }
1626 break;
1627
1628 case ZExt32:
1629 // Turn this: ZExt32(constant)
1630 // Into this: static_cast<uint64_t>(static_cast<uint32_t>(constant))
1631 if (m_value->child(0)->hasInt32()) {
1632 replaceWithNewValue(
1633 m_proc.addIntConstant(
1634 m_value,
1635 static_cast<uint64_t>(static_cast<uint32_t>(m_value->child(0)->asInt32()))));
1636 break;
1637 }
1638 break;
1639
1640 case Trunc:
1641 // Turn this: Trunc(constant)
1642 // Into this: static_cast<int32_t>(constant)
1643 if (m_value->child(0)->hasInt64() || m_value->child(0)->hasDouble()) {
1644 replaceWithNewValue(
1645 m_proc.addIntConstant(m_value, static_cast<int32_t>(m_value->child(0)->asInt64())));
1646 break;
1647 }
1648
1649 // Turn this: Trunc(SExt32(value)) or Trunc(ZExt32(value))
1650 // Into this: value
1651 if (m_value->child(0)->opcode() == SExt32 || m_value->child(0)->opcode() == ZExt32) {
1652 replaceWithIdentity(m_value->child(0)->child(0));
1653 break;
1654 }
1655
1656 // Turn this: Trunc(Op(value, constant))
1657 // where !(constant & 0xffffffff)
1658 // and Op is Add, Sub, BitOr, or BitXor
1659 // into this: Trunc(value)
1660 switch (m_value->child(0)->opcode()) {
1661 case Add:
1662 case Sub:
1663 case BitOr:
1664 case BitXor:
1665 if (m_value->child(0)->child(1)->hasInt64()
1666 && !(m_value->child(0)->child(1)->asInt64() & 0xffffffffll)) {
1667 m_value->child(0) = m_value->child(0)->child(0);
1668 m_changed = true;
1669 break;
1670 }
1671 break;
1672 default:
1673 break;
1674 }
1675 break;
1676
1677 case IToD:
1678 // Turn this: IToD(constant)
1679 // Into this: ConstDouble(constant)
1680 if (Value* constant = m_value->child(0)->iToDConstant(m_proc)) {
1681 replaceWithNewValue(constant);
1682 break;
1683 }
1684 break;
1685
1686 case IToF:
1687 // Turn this: IToF(constant)
1688 // Into this: ConstFloat(constant)
1689 if (Value* constant = m_value->child(0)->iToFConstant(m_proc)) {
1690 replaceWithNewValue(constant);
1691 break;
1692 }
1693 break;
1694
1695 case FloatToDouble:
1696 // Turn this: FloatToDouble(constant)
1697 // Into this: ConstDouble(constant)
1698 if (Value* constant = m_value->child(0)->floatToDoubleConstant(m_proc)) {
1699 replaceWithNewValue(constant);
1700 break;
1701 }
1702 break;
1703
1704 case DoubleToFloat:
1705 // Turn this: DoubleToFloat(FloatToDouble(value))
1706 // Into this: value
1707 if (m_value->child(0)->opcode() == FloatToDouble) {
1708 replaceWithIdentity(m_value->child(0)->child(0));
1709 break;
1710 }
1711
1712 // Turn this: DoubleToFloat(constant)
1713 // Into this: ConstFloat(constant)
1714 if (Value* constant = m_value->child(0)->doubleToFloatConstant(m_proc)) {
1715 replaceWithNewValue(constant);
1716 break;
1717 }
1718 break;
1719
1720 case Select:
1721 // Turn this: Select(constant, a, b)
1722 // Into this: constant ? a : b
1723 if (m_value->child(0)->hasInt32()) {
1724 replaceWithIdentity(
1725 m_value->child(0)->asInt32() ? m_value->child(1) : m_value->child(2));
1726 break;
1727 }
1728
1729 // Turn this: Select(Equal(x, 0), a, b)
1730 // Into this: Select(x, b, a)
1731 if (m_value->child(0)->opcode() == Equal && m_value->child(0)->child(1)->isInt(0)) {
1732 m_value->child(0) = m_value->child(0)->child(0);
1733 std::swap(m_value->child(1), m_value->child(2));
1734 m_changed = true;
1735 break;
1736 }
1737
1738 // Turn this: Select(BitXor(bool, 1), a, b)
1739 // Into this: Select(bool, b, a)
1740 if (m_value->child(0)->opcode() == BitXor
1741 && m_value->child(0)->child(1)->isInt32(1)
1742 && m_value->child(0)->child(0)->returnsBool()) {
1743 m_value->child(0) = m_value->child(0)->child(0);
1744 std::swap(m_value->child(1), m_value->child(2));
1745 m_changed = true;
1746 break;
1747 }
1748
1749 // Turn this: Select(BitAnd(bool, xyz1), a, b)
1750 // Into this: Select(bool, a, b)
1751 if (m_value->child(0)->opcode() == BitAnd
1752 && m_value->child(0)->child(1)->hasInt()
1753 && m_value->child(0)->child(1)->asInt() & 1
1754 && m_value->child(0)->child(0)->returnsBool()) {
1755 m_value->child(0) = m_value->child(0)->child(0);
1756 m_changed = true;
1757 break;
1758 }
1759
1760 // Turn this: Select(stuff, x, x)
1761 // Into this: x
1762 if (m_value->child(1) == m_value->child(2)) {
1763 replaceWithIdentity(m_value->child(1));
1764 break;
1765 }
1766 break;
1767
1768 case Load8Z:
1769 case Load8S:
1770 case Load16Z:
1771 case Load16S:
1772 case Load:
1773 case Store8:
1774 case Store16:
1775 case Store: {
1776 Value* address = m_value->lastChild();
1777 MemoryValue* memory = m_value->as<MemoryValue>();
1778
1779 // Turn this: Load(Add(address, offset1), offset = offset2)
1780 // Into this: Load(address, offset = offset1 + offset2)
1781 //
1782 // Also turns this: Store(value, Add(address, offset1), offset = offset2)
1783 // Into this: Store(value, address, offset = offset1 + offset2)
1784 if (address->opcode() == Add && address->child(1)->hasIntPtr()) {
1785 intptr_t offset = address->child(1)->asIntPtr();
1786 if (!sumOverflows<intptr_t>(offset, memory->offset())) {
1787 offset += memory->offset();
1788 Value::OffsetType smallOffset = static_cast<Value::OffsetType>(offset);
1789 if (smallOffset == offset) {
1790 address = address->child(0);
1791 memory->lastChild() = address;
1792 memory->setOffset(smallOffset);
1793 m_changed = true;
1794 }
1795 }
1796 }
1797
1798 // Turn this: Load(constant1, offset = constant2)
1799 // Into this: Load(constant1 + constant2)
1800 //
1801 // This is a fun canonicalization. It purely regresses naively generated code. We rely
1802 // on constant materialization to be smart enough to materialize this constant the smart
1803 // way. We want this canonicalization because we want to know if two memory accesses see
1804 // the same address.
1805 if (memory->offset()) {
1806 if (Value* newAddress = address->addConstant(m_proc, memory->offset())) {
1807 m_insertionSet.insertValue(m_index, newAddress);
1808 address = newAddress;
1809 memory->lastChild() = newAddress;
1810 memory->setOffset(0);
1811 m_changed = true;
1812 }
1813 }
1814
1815 break;
1816 }
1817
1818 case CCall: {
1819 // Turn this: Call(fmod, constant1, constant2)
1820 // Into this: fcall-constant(constant1, constant2)
1821 auto* fmodDouble = tagCFunctionPtr<double (*)(double, double)>(fmod, B3CCallPtrTag);
1822 if (m_value->type() == Double
1823 && m_value->numChildren() == 3
1824 && m_value->child(0)->isIntPtr(reinterpret_cast<intptr_t>(fmodDouble))
1825 && m_value->child(1)->type() == Double
1826 && m_value->child(2)->type() == Double) {
1827 replaceWithNewValue(m_value->child(1)->modConstant(m_proc, m_value->child(2)));
1828 }
1829 break;
1830 }
1831 case Equal:
1832 handleCommutativity();
1833
1834 // Turn this: Equal(bool, 0)
1835 // Into this: BitXor(bool, 1)
1836 if (m_value->child(0)->returnsBool() && m_value->child(1)->isInt32(0)) {
1837 replaceWithNew<Value>(
1838 BitXor, m_value->origin(), m_value->child(0),
1839 m_insertionSet.insert<Const32Value>(m_index, m_value->origin(), 1));
1840 break;
1841 }
1842
1843 // Turn this Equal(bool, 1)
1844 // Into this: bool
1845 if (m_value->child(0)->returnsBool() && m_value->child(1)->isInt32(1)) {
1846 replaceWithIdentity(m_value->child(0));
1847 break;
1848 }
1849
1850 // Turn this: Equal(const1, const2)
1851 // Into this: const1 == const2
1852 replaceWithNewValue(
1853 m_proc.addBoolConstant(
1854 m_value->origin(),
1855 m_value->child(0)->equalConstant(m_value->child(1))));
1856 break;
1857
1858 case NotEqual:
1859 handleCommutativity();
1860
1861 if (m_value->child(0)->returnsBool()) {
1862 // Turn this: NotEqual(bool, 0)
1863 // Into this: bool
1864 if (m_value->child(1)->isInt32(0)) {
1865 replaceWithIdentity(m_value->child(0));
1866 break;
1867 }
1868
1869 // Turn this: NotEqual(bool, 1)
1870 // Into this: Equal(bool, 0)
1871 if (m_value->child(1)->isInt32(1)) {
1872 replaceWithNew<Value>(
1873 Equal, m_value->origin(), m_value->child(0),
1874 m_insertionSet.insertIntConstant(m_index, m_value->origin(), Int32, 0));
1875 break;
1876 }
1877 }
1878
1879 // Turn this: NotEqual(const1, const2)
1880 // Into this: const1 != const2
1881 replaceWithNewValue(
1882 m_proc.addBoolConstant(
1883 m_value->origin(),
1884 m_value->child(0)->notEqualConstant(m_value->child(1))));
1885 break;
1886
1887 case LessThan:
1888 case GreaterThan:
1889 case LessEqual:
1890 case GreaterEqual:
1891 case Above:
1892 case Below:
1893 case AboveEqual:
1894 case BelowEqual: {
1895 CanonicalizedComparison comparison = canonicalizeComparison(m_value);
1896 TriState result = MixedTriState;
1897 switch (comparison.opcode) {
1898 case LessThan:
1899 result = comparison.operands[1]->greaterThanConstant(comparison.operands[0]);
1900 break;
1901 case GreaterThan:
1902 result = comparison.operands[1]->lessThanConstant(comparison.operands[0]);
1903 break;
1904 case LessEqual:
1905 result = comparison.operands[1]->greaterEqualConstant(comparison.operands[0]);
1906 break;
1907 case GreaterEqual:
1908 result = comparison.operands[1]->lessEqualConstant(comparison.operands[0]);
1909 break;
1910 case Above:
1911 result = comparison.operands[1]->belowConstant(comparison.operands[0]);
1912 break;
1913 case Below:
1914 result = comparison.operands[1]->aboveConstant(comparison.operands[0]);
1915 break;
1916 case AboveEqual:
1917 result = comparison.operands[1]->belowEqualConstant(comparison.operands[0]);
1918 break;
1919 case BelowEqual:
1920 result = comparison.operands[1]->aboveEqualConstant(comparison.operands[0]);
1921 break;
1922 default:
1923 RELEASE_ASSERT_NOT_REACHED();
1924 break;
1925 }
1926
1927 if (auto* constant = m_proc.addBoolConstant(m_value->origin(), result)) {
1928 replaceWithNewValue(constant);
1929 break;
1930 }
1931 if (comparison.opcode != m_value->opcode()) {
1932 replaceWithNew<Value>(comparison.opcode, m_value->origin(), comparison.operands[0], comparison.operands[1]);
1933 break;
1934 }
1935 break;
1936 }
1937
1938 case EqualOrUnordered:
1939 handleCommutativity();
1940
1941 // Turn this: Equal(const1, const2)
1942 // Into this: isunordered(const1, const2) || const1 == const2.
1943 // Turn this: Equal(value, const_NaN)
1944 // Into this: 1.
1945 replaceWithNewValue(
1946 m_proc.addBoolConstant(
1947 m_value->origin(),
1948 m_value->child(1)->equalOrUnorderedConstant(m_value->child(0))));
1949 break;
1950
1951 case CheckAdd: {
1952 if (replaceWithNewValue(m_value->child(0)->checkAddConstant(m_proc, m_value->child(1))))
1953 break;
1954
1955 handleCommutativity();
1956
1957 if (m_value->child(1)->isInt(0)) {
1958 replaceWithIdentity(m_value->child(0));
1959 break;
1960 }
1961
1962 IntRange leftRange = rangeFor(m_value->child(0));
1963 IntRange rightRange = rangeFor(m_value->child(1));
1964 if (!leftRange.couldOverflowAdd(rightRange, m_value->type())) {
1965 replaceWithNewValue(
1966 m_proc.add<Value>(Add, m_value->origin(), m_value->child(0), m_value->child(1)));
1967 break;
1968 }
1969 break;
1970 }
1971
1972 case CheckSub: {
1973 if (replaceWithNewValue(m_value->child(0)->checkSubConstant(m_proc, m_value->child(1))))
1974 break;
1975
1976 if (m_value->child(1)->isInt(0)) {
1977 replaceWithIdentity(m_value->child(0));
1978 break;
1979 }
1980
1981 if (Value* negatedConstant = m_value->child(1)->checkNegConstant(m_proc)) {
1982 m_insertionSet.insertValue(m_index, negatedConstant);
1983 m_value->as<CheckValue>()->convertToAdd();
1984 m_value->child(1) = negatedConstant;
1985 m_changed = true;
1986 break;
1987 }
1988
1989 IntRange leftRange = rangeFor(m_value->child(0));
1990 IntRange rightRange = rangeFor(m_value->child(1));
1991 if (!leftRange.couldOverflowSub(rightRange, m_value->type())) {
1992 replaceWithNewValue(
1993 m_proc.add<Value>(Sub, m_value->origin(), m_value->child(0), m_value->child(1)));
1994 break;
1995 }
1996 break;
1997 }
1998
1999 case CheckMul: {
2000 if (replaceWithNewValue(m_value->child(0)->checkMulConstant(m_proc, m_value->child(1))))
2001 break;
2002
2003 handleCommutativity();
2004
2005 if (m_value->child(1)->hasInt()) {
2006 bool modified = true;
2007 switch (m_value->child(1)->asInt()) {
2008 case 0:
2009 replaceWithNewValue(m_proc.addIntConstant(m_value, 0));
2010 break;
2011 case 1:
2012 replaceWithIdentity(m_value->child(0));
2013 break;
2014 case 2:
2015 m_value->as<CheckValue>()->convertToAdd();
2016 m_value->child(1) = m_value->child(0);
2017 m_changed = true;
2018 break;
2019 default:
2020 modified = false;
2021 break;
2022 }
2023 if (modified)
2024 break;
2025 }
2026
2027 IntRange leftRange = rangeFor(m_value->child(0));
2028 IntRange rightRange = rangeFor(m_value->child(1));
2029 if (!leftRange.couldOverflowMul(rightRange, m_value->type())) {
2030 replaceWithNewValue(
2031 m_proc.add<Value>(Mul, m_value->origin(), m_value->child(0), m_value->child(1)));
2032 break;
2033 }
2034 break;
2035 }
2036
2037 case Check: {
2038 CheckValue* checkValue = m_value->as<CheckValue>();
2039
2040 if (checkValue->child(0)->isLikeZero()) {
2041 checkValue->replaceWithNop();
2042 m_changed = true;
2043 break;
2044 }
2045
2046 if (checkValue->child(0)->isLikeNonZero()) {
2047 PatchpointValue* patchpoint =
2048 m_insertionSet.insert<PatchpointValue>(m_index, Void, checkValue->origin());
2049
2050 patchpoint->effects = Effects();
2051 patchpoint->effects.reads = HeapRange::top();
2052 patchpoint->effects.exitsSideways = true;
2053
2054 for (unsigned i = 1; i < checkValue->numChildren(); ++i)
2055 patchpoint->append(checkValue->constrainedChild(i));
2056
2057 patchpoint->setGenerator(checkValue->generator());
2058
2059 // Replace the rest of the block with an Oops.
2060 for (unsigned i = m_index + 1; i < m_block->size() - 1; ++i)
2061 m_block->at(i)->replaceWithBottom(m_insertionSet, m_index);
2062 m_block->last()->replaceWithOops(m_block);
2063 m_block->last()->setOrigin(checkValue->origin());
2064
2065 // Replace ourselves last.
2066 checkValue->replaceWithNop();
2067 m_changedCFG = true;
2068 break;
2069 }
2070
2071 if (checkValue->child(0)->opcode() == NotEqual
2072 && checkValue->child(0)->child(1)->isInt(0)) {
2073 checkValue->child(0) = checkValue->child(0)->child(0);
2074 m_changed = true;
2075 }
2076
2077 if (m_proc.optLevel() < 2)
2078 break;
2079
2080 // If we are checking some bounded-size SSA expression that leads to a Select that
2081 // has a constant as one of its results, then turn the Select into a Branch and split
2082 // the code between the Check and the Branch. For example, this:
2083 //
2084 // @a = Select(@p, @x, 42)
2085 // @b = Add(@a, 35)
2086 // Check(@b)
2087 //
2088 // becomes this:
2089 //
2090 // Branch(@p, #truecase, #falsecase)
2091 //
2092 // BB#truecase:
2093 // @b_truecase = Add(@x, 35)
2094 // Check(@b_truecase)
2095 // Upsilon(@x, ^a)
2096 // Upsilon(@b_truecase, ^b)
2097 // Jump(#continuation)
2098 //
2099 // BB#falsecase:
2100 // @b_falsecase = Add(42, 35)
2101 // Check(@b_falsecase)
2102 // Upsilon(42, ^a)
2103 // Upsilon(@b_falsecase, ^b)
2104 // Jump(#continuation)
2105 //
2106 // BB#continuation:
2107 // @a = Phi()
2108 // @b = Phi()
2109 //
2110 // The goal of this optimization is to kill a lot of code in one of those basic
2111 // blocks. This is pretty much guaranteed since one of those blocks will replace all
2112 // uses of the Select with a constant, and that constant will be transitively used
2113 // from the check.
2114 static constexpr unsigned selectSpecializationBound = 3;
2115 Value* select = findRecentNodeMatching(
2116 m_value->child(0), selectSpecializationBound,
2117 [&] (Value* value) -> bool {
2118 return value->opcode() == Select
2119 && (value->child(1)->isConstant() && value->child(2)->isConstant());
2120 });
2121
2122 if (select) {
2123 specializeSelect(select);
2124 break;
2125 }
2126 break;
2127 }
2128
2129 case Branch: {
2130 // Turn this: Branch(NotEqual(x, 0))
2131 // Into this: Branch(x)
2132 if (m_value->child(0)->opcode() == NotEqual && m_value->child(0)->child(1)->isInt(0)) {
2133 m_value->child(0) = m_value->child(0)->child(0);
2134 m_changed = true;
2135 }
2136
2137 // Turn this: Branch(Equal(x, 0), then, else)
2138 // Into this: Branch(x, else, then)
2139 if (m_value->child(0)->opcode() == Equal && m_value->child(0)->child(1)->isInt(0)) {
2140 m_value->child(0) = m_value->child(0)->child(0);
2141 std::swap(m_block->taken(), m_block->notTaken());
2142 m_changed = true;
2143 }
2144
2145 // Turn this: Branch(BitXor(bool, 1), then, else)
2146 // Into this: Branch(bool, else, then)
2147 if (m_value->child(0)->opcode() == BitXor
2148 && m_value->child(0)->child(1)->isInt32(1)
2149 && m_value->child(0)->child(0)->returnsBool()) {
2150 m_value->child(0) = m_value->child(0)->child(0);
2151 std::swap(m_block->taken(), m_block->notTaken());
2152 m_changed = true;
2153 }
2154
2155 // Turn this: Branch(BitAnd(bool, xyb1), then, else)
2156 // Into this: Branch(bool, then, else)
2157 if (m_value->child(0)->opcode() == BitAnd
2158 && m_value->child(0)->child(1)->hasInt()
2159 && m_value->child(0)->child(1)->asInt() & 1
2160 && m_value->child(0)->child(0)->returnsBool()) {
2161 m_value->child(0) = m_value->child(0)->child(0);
2162 m_changed = true;
2163 }
2164
2165 TriState triState = m_value->child(0)->asTriState();
2166
2167 // Turn this: Branch(0, then, else)
2168 // Into this: Jump(else)
2169 if (triState == FalseTriState) {
2170 m_block->taken().block()->removePredecessor(m_block);
2171 m_value->replaceWithJump(m_block, m_block->notTaken());
2172 m_changedCFG = true;
2173 break;
2174 }
2175
2176 // Turn this: Branch(not 0, then, else)
2177 // Into this: Jump(then)
2178 if (triState == TrueTriState) {
2179 m_block->notTaken().block()->removePredecessor(m_block);
2180 m_value->replaceWithJump(m_block, m_block->taken());
2181 m_changedCFG = true;
2182 break;
2183 }
2184
2185 if (m_proc.optLevel() >= 2) {
2186 // If a check for the same property dominates us, we can kill the branch. This sort
2187 // of makes sense here because it's cheap, but hacks like this show that we're going
2188 // to need SCCP.
2189 Value* check = m_pureCSE.findMatch(
2190 ValueKey(Check, Void, m_value->child(0)), m_block, *m_dominators);
2191 if (check) {
2192 // The Check would have side-exited if child(0) was non-zero. So, it must be
2193 // zero here.
2194 m_block->taken().block()->removePredecessor(m_block);
2195 m_value->replaceWithJump(m_block, m_block->notTaken());
2196 m_changedCFG = true;
2197 }
2198 }
2199 break;
2200 }
2201
2202 case Const32:
2203 case Const64:
2204 case ConstFloat:
2205 case ConstDouble: {
2206 ValueKey key = m_value->key();
2207 if (Value* constInRoot = m_valueForConstant.get(key)) {
2208 if (constInRoot != m_value) {
2209 m_value->replaceWithIdentity(constInRoot);
2210 m_changed = true;
2211 }
2212 } else if (m_block == m_root)
2213 m_valueForConstant.add(key, m_value);
2214 else {
2215 Value* constInRoot = m_proc.clone(m_value);
2216 ASSERT(m_root && m_root->size() >= 1);
2217 m_root->appendNonTerminal(constInRoot);
2218 m_valueForConstant.add(key, constInRoot);
2219 m_value->replaceWithIdentity(constInRoot);
2220 m_changed = true;
2221 }
2222 break;
2223 }
2224
2225 default:
2226 break;
2227 }
2228 }
2229
2230 // Find a node that:
2231 // - functor(node) returns true.
2232 // - it's reachable from the given node via children.
2233 // - it's in the last "bound" slots in the current basic block.
2234 // This algorithm is optimized under the assumption that the bound is small.
2235 template<typename Functor>
2236 Value* findRecentNodeMatching(Value* start, unsigned bound, const Functor& functor)
2237 {
2238 unsigned startIndex = bound < m_index ? m_index - bound : 0;
2239 Value* result = nullptr;
2240 start->walk(
2241 [&] (Value* value) -> Value::WalkStatus {
2242 bool found = false;
2243 for (unsigned i = startIndex; i <= m_index; ++i) {
2244 if (m_block->at(i) == value)
2245 found = true;
2246 }
2247 if (!found)
2248 return Value::IgnoreChildren;
2249
2250 if (functor(value)) {
2251 result = value;
2252 return Value::Stop;
2253 }
2254
2255 return Value::Continue;
2256 });
2257 return result;
2258 }
2259
2260 // This specializes a sequence of code up to a Select. This doesn't work when we're at a
2261 // terminal. It would be cool to fix that eventually. The main problem is that instead of
2262 // splitting the block, we should just insert the then/else blocks. We'll have to create
2263 // double the Phis and double the Upsilons. It'll probably be the sort of optimization that
2264 // we want to do only after we've done loop optimizations, since this will *definitely*
2265 // obscure things. In fact, even this simpler form of select specialization will possibly
2266 // obscure other optimizations. It would be great to have two modes of strength reduction,
2267 // one that does obscuring optimizations and runs late, and another that does not do
2268 // obscuring optimizations and runs early.
2269 // FIXME: Make select specialization handle branches.
2270 // FIXME: Have a form of strength reduction that does no obscuring optimizations and runs
2271 // early.
2272 void specializeSelect(Value* source)
2273 {
2274 if (B3ReduceStrengthInternal::verbose)
2275 dataLog("Specializing select: ", deepDump(m_proc, source), "\n");
2276
2277 // This mutates startIndex to account for the fact that m_block got the front of it
2278 // chopped off.
2279 BasicBlock* predecessor = m_blockInsertionSet.splitForward(m_block, m_index, &m_insertionSet);
2280 if (m_block == m_root) {
2281 m_root = predecessor;
2282 m_valueForConstant.clear();
2283 }
2284
2285 // Splitting will commit the insertion set, which changes the exact position of the
2286 // source. That's why we do the search after splitting.
2287 unsigned startIndex = UINT_MAX;
2288 for (unsigned i = predecessor->size(); i--;) {
2289 if (predecessor->at(i) == source) {
2290 startIndex = i;
2291 break;
2292 }
2293 }
2294
2295 RELEASE_ASSERT(startIndex != UINT_MAX);
2296
2297 // By BasicBlock convention, caseIndex == 0 => then, caseIndex == 1 => else.
2298 static constexpr unsigned numCases = 2;
2299 BasicBlock* cases[numCases];
2300 for (unsigned i = 0; i < numCases; ++i)
2301 cases[i] = m_blockInsertionSet.insertBefore(m_block);
2302
2303 HashMap<Value*, Value*> mappings[2];
2304
2305 // Save things we want to know about the source.
2306 Value* predicate = source->child(0);
2307
2308 for (unsigned i = 0; i < numCases; ++i)
2309 mappings[i].add(source, source->child(1 + i));
2310
2311 auto cloneValue = [&] (Value* value) {
2312 ASSERT(value != source);
2313
2314 for (unsigned i = 0; i < numCases; ++i) {
2315 Value* clone = m_proc.clone(value);
2316 for (Value*& child : clone->children()) {
2317 if (Value* newChild = mappings[i].get(child))
2318 child = newChild;
2319 }
2320 if (value->type() != Void)
2321 mappings[i].add(value, clone);
2322
2323 cases[i]->append(clone);
2324 if (value->type() != Void)
2325 cases[i]->appendNew<UpsilonValue>(m_proc, value->origin(), clone, value);
2326 }
2327
2328 value->replaceWithPhi();
2329 };
2330
2331 // The jump that the splitter inserted is of no use to us.
2332 predecessor->removeLast(m_proc);
2333
2334 // Hance the source, it's special.
2335 for (unsigned i = 0; i < numCases; ++i) {
2336 cases[i]->appendNew<UpsilonValue>(
2337 m_proc, source->origin(), source->child(1 + i), source);
2338 }
2339 source->replaceWithPhi();
2340 m_insertionSet.insertValue(m_index, source);
2341
2342 // Now handle all values between the source and the check.
2343 for (unsigned i = startIndex + 1; i < predecessor->size(); ++i) {
2344 Value* value = predecessor->at(i);
2345 value->owner = nullptr;
2346
2347 cloneValue(value);
2348
2349 if (value->type() != Void)
2350 m_insertionSet.insertValue(m_index, value);
2351 else
2352 m_proc.deleteValue(value);
2353 }
2354
2355 // Finally, deal with the check.
2356 cloneValue(m_value);
2357
2358 // Remove the values from the predecessor.
2359 predecessor->values().resize(startIndex);
2360
2361 predecessor->appendNew<Value>(m_proc, Branch, source->origin(), predicate);
2362 predecessor->setSuccessors(FrequentedBlock(cases[0]), FrequentedBlock(cases[1]));
2363
2364 for (unsigned i = 0; i < numCases; ++i) {
2365 cases[i]->appendNew<Value>(m_proc, Jump, m_value->origin());
2366 cases[i]->setSuccessors(FrequentedBlock(m_block));
2367 }
2368
2369 m_changed = true;
2370
2371 predecessor->updatePredecessorsAfter();
2372 }
2373
2374 static bool shouldSwapBinaryOperands(Value* value)
2375 {
2376 // Note that we have commutative operations that take more than two children. Those operations may
2377 // commute their first two children while leaving the rest unaffected.
2378 ASSERT(value->numChildren() >= 2);
2379
2380 // Leave it alone if the right child is a constant.
2381 if (value->child(1)->isConstant()
2382 || value->child(0)->opcode() == AtomicStrongCAS)
2383 return false;
2384
2385 if (value->child(0)->isConstant())
2386 return true;
2387
2388 if (value->child(1)->opcode() == AtomicStrongCAS)
2389 return true;
2390
2391 // Sort the operands. This is an important canonicalization. We use the index instead of
2392 // the address to make this at least slightly deterministic.
2393 if (value->child(0)->index() > value->child(1)->index())
2394 return true;
2395
2396 return false;
2397 }
2398
2399 // Turn this: Add(constant, value)
2400 // Into this: Add(value, constant)
2401 //
2402 // Also:
2403 // Turn this: Add(value1, value2)
2404 // Into this: Add(value2, value1)
2405 // If we decide that value2 coming first is the canonical ordering.
2406 void handleCommutativity()
2407 {
2408 if (shouldSwapBinaryOperands(m_value)) {
2409 std::swap(m_value->child(0), m_value->child(1));
2410 m_changed = true;
2411 }
2412 }
2413
2414 // For Op==Add or Sub, turn any of these:
2415 // Op(Mul(x1, x2), Mul(x1, x3))
2416 // Op(Mul(x2, x1), Mul(x1, x3))
2417 // Op(Mul(x1, x2), Mul(x3, x1))
2418 // Op(Mul(x2, x1), Mul(x3, x1))
2419 // Into this: Mul(x1, Op(x2, x3))
2420 bool handleMulDistributivity()
2421 {
2422 ASSERT(m_value->opcode() == Add || m_value->opcode() == Sub);
2423 Value* x1 = nullptr;
2424 Value* x2 = nullptr;
2425 Value* x3 = nullptr;
2426 if (m_value->child(0)->opcode() == Mul && m_value->child(1)->opcode() == Mul) {
2427 if (m_value->child(0)->child(0) == m_value->child(1)->child(0)) {
2428 // Op(Mul(x1, x2), Mul(x1, x3))
2429 x1 = m_value->child(0)->child(0);
2430 x2 = m_value->child(0)->child(1);
2431 x3 = m_value->child(1)->child(1);
2432 } else if (m_value->child(0)->child(1) == m_value->child(1)->child(0)) {
2433 // Op(Mul(x2, x1), Mul(x1, x3))
2434 x1 = m_value->child(0)->child(1);
2435 x2 = m_value->child(0)->child(0);
2436 x3 = m_value->child(1)->child(1);
2437 } else if (m_value->child(0)->child(0) == m_value->child(1)->child(1)) {
2438 // Op(Mul(x1, x2), Mul(x3, x1))
2439 x1 = m_value->child(0)->child(0);
2440 x2 = m_value->child(0)->child(1);
2441 x3 = m_value->child(1)->child(0);
2442 } else if (m_value->child(0)->child(1) == m_value->child(1)->child(1)) {
2443 // Op(Mul(x2, x1), Mul(x3, x1))
2444 x1 = m_value->child(0)->child(1);
2445 x2 = m_value->child(0)->child(0);
2446 x3 = m_value->child(1)->child(0);
2447 }
2448 }
2449 if (x1 != nullptr) {
2450 ASSERT(x2 != nullptr && x3 != nullptr);
2451 Value* newOp = m_insertionSet.insert<Value>(m_index, m_value->opcode(), m_value->origin(), x2, x3);
2452 replaceWithNew<Value>(Mul, m_value->origin(), x1, newOp);
2453 return true;
2454 }
2455 return false;
2456 }
2457
2458 // For Op==BitOr or BitXor, turn any of these:
2459 // Op(BitAnd(x1, x2), BitAnd(x1, x3))
2460 // Op(BitAnd(x2, x1), BitAnd(x1, x3))
2461 // Op(BitAnd(x1, x2), BitAnd(x3, x1))
2462 // Op(BitAnd(x2, x1), BitAnd(x3, x1))
2463 // Into this: BitAnd(Op(x2, x3), x1)
2464 // And any of these:
2465 // Op(BitAnd(x1, x2), x1)
2466 // Op(BitAnd(x2, x1), x1)
2467 // Op(x1, BitAnd(x1, x2))
2468 // Op(x1, BitAnd(x2, x1))
2469 // Into this: BitAnd(Op(x2, x1), x1)
2470 // This second set is equivalent to doing x1 => BitAnd(x1, x1), and then applying the first set.
2471 // It does not reduce the number of operations executed, but provides some useful normalization: we prefer to have BitAnd at the outermost, then BitXor, and finally BitOr at the innermost
2472 bool handleBitAndDistributivity()
2473 {
2474 ASSERT(m_value->opcode() == BitOr || m_value->opcode() == BitXor);
2475 Value* x1 = nullptr;
2476 Value* x2 = nullptr;
2477 Value* x3 = nullptr;
2478 if (m_value->child(0)->opcode() == BitAnd && m_value->child(1)->opcode() == BitAnd) {
2479 if (m_value->child(0)->child(0) == m_value->child(1)->child(0)) {
2480 x1 = m_value->child(0)->child(0);
2481 x2 = m_value->child(0)->child(1);
2482 x3 = m_value->child(1)->child(1);
2483 } else if (m_value->child(0)->child(1) == m_value->child(1)->child(0)) {
2484 x1 = m_value->child(0)->child(1);
2485 x2 = m_value->child(0)->child(0);
2486 x3 = m_value->child(1)->child(1);
2487 } else if (m_value->child(0)->child(0) == m_value->child(1)->child(1)) {
2488 x1 = m_value->child(0)->child(0);
2489 x2 = m_value->child(0)->child(1);
2490 x3 = m_value->child(1)->child(0);
2491 } else if (m_value->child(0)->child(1) == m_value->child(1)->child(1)) {
2492 x1 = m_value->child(0)->child(1);
2493 x2 = m_value->child(0)->child(0);
2494 x3 = m_value->child(1)->child(0);
2495 }
2496 } else if (m_value->child(0)->opcode() == BitAnd) {
2497 if (m_value->child(0)->child(0) == m_value->child(1)) {
2498 x1 = x3 = m_value->child(1);
2499 x2 = m_value->child(0)->child(1);
2500 } else if (m_value->child(0)->child(1) == m_value->child(1)) {
2501 x1 = x3 = m_value->child(1);
2502 x2 = m_value->child(0)->child(0);
2503 }
2504 } else if (m_value->child(1)->opcode() == BitAnd) {
2505 if (m_value->child(1)->child(0) == m_value->child(0)) {
2506 x1 = x3 = m_value->child(0);
2507 x2 = m_value->child(1)->child(1);
2508 } else if (m_value->child(1)->child(1) == m_value->child(0)) {
2509 x1 = x3 = m_value->child(0);
2510 x2 = m_value->child(1)->child(0);
2511 }
2512 }
2513 if (x1 != nullptr) {
2514 ASSERT(x2 != nullptr && x3 != nullptr);
2515 Value* bitOp = m_insertionSet.insert<Value>(m_index, m_value->opcode(), m_value->origin(), x2, x3);
2516 replaceWithNew<Value>(BitAnd, m_value->origin(), x1, bitOp);
2517 return true;
2518 }
2519 return false;
2520 }
2521
2522 struct CanonicalizedComparison {
2523 Opcode opcode;
2524 Value* operands[2];
2525 };
2526 static CanonicalizedComparison canonicalizeComparison(Value* value)
2527 {
2528 auto flip = [] (Opcode opcode) {
2529 switch (opcode) {
2530 case LessThan:
2531 return GreaterThan;
2532 case GreaterThan:
2533 return LessThan;
2534 case LessEqual:
2535 return GreaterEqual;
2536 case GreaterEqual:
2537 return LessEqual;
2538 case Above:
2539 return Below;
2540 case Below:
2541 return Above;
2542 case AboveEqual:
2543 return BelowEqual;
2544 case BelowEqual:
2545 return AboveEqual;
2546 default:
2547 return opcode;
2548 }
2549 };
2550 if (shouldSwapBinaryOperands(value))
2551 return { flip(value->opcode()), { value->child(1), value->child(0) } };
2552 return { value->opcode(), { value->child(0), value->child(1) } };
2553 }
2554
2555 // FIXME: This should really be a forward analysis. Instead, we uses a bounded-search backwards
2556 // analysis.
2557 IntRange rangeFor(Value* value, unsigned timeToLive = 5)
2558 {
2559 if (!timeToLive)
2560 return IntRange::top(value->type());
2561
2562 switch (value->opcode()) {
2563 case Const32:
2564 case Const64: {
2565 int64_t intValue = value->asInt();
2566 return IntRange(intValue, intValue);
2567 }
2568
2569 case BitAnd:
2570 if (value->child(1)->hasInt())
2571 return IntRange::rangeForMask(value->child(1)->asInt(), value->type());
2572 break;
2573
2574 case SShr:
2575 if (value->child(1)->hasInt32()) {
2576 return rangeFor(value->child(0), timeToLive - 1).sShr(
2577 value->child(1)->asInt32(), value->type());
2578 }
2579 break;
2580
2581 case ZShr:
2582 if (value->child(1)->hasInt32()) {
2583 return rangeFor(value->child(0), timeToLive - 1).zShr(
2584 value->child(1)->asInt32(), value->type());
2585 }
2586 break;
2587
2588 case Shl:
2589 if (value->child(1)->hasInt32()) {
2590 return rangeFor(value->child(0), timeToLive - 1).shl(
2591 value->child(1)->asInt32(), value->type());
2592 }
2593 break;
2594
2595 case Add:
2596 return rangeFor(value->child(0), timeToLive - 1).add(
2597 rangeFor(value->child(1), timeToLive - 1), value->type());
2598
2599 case Sub:
2600 return rangeFor(value->child(0), timeToLive - 1).sub(
2601 rangeFor(value->child(1), timeToLive - 1), value->type());
2602
2603 case Mul:
2604 return rangeFor(value->child(0), timeToLive - 1).mul(
2605 rangeFor(value->child(1), timeToLive - 1), value->type());
2606
2607 case SExt8:
2608 case SExt16:
2609 case SExt32:
2610 return rangeFor(value->child(0), timeToLive - 1);
2611
2612 case ZExt32:
2613 return rangeFor(value->child(0), timeToLive - 1).zExt32();
2614
2615 default:
2616 break;
2617 }
2618
2619 return IntRange::top(value->type());
2620 }
2621
2622 template<typename ValueType, typename... Arguments>
2623 void replaceWithNew(Arguments... arguments)
2624 {
2625 replaceWithNewValue(m_proc.add<ValueType>(arguments...));
2626 }
2627
2628 bool replaceWithNewValue(Value* newValue)
2629 {
2630 if (!newValue)
2631 return false;
2632 m_insertionSet.insertValue(m_index, newValue);
2633 m_value->replaceWithIdentity(newValue);
2634 m_changed = true;
2635 return true;
2636 }
2637
2638 void replaceWithIdentity(Value* newValue)
2639 {
2640 m_value->replaceWithIdentity(newValue);
2641 m_changed = true;
2642 }
2643
2644 void handleShiftAmount()
2645 {
2646 // Shift anything by zero is identity.
2647 if (m_value->child(1)->isInt32(0)) {
2648 replaceWithIdentity(m_value->child(0));
2649 return;
2650 }
2651
2652 // The shift already masks its shift amount. If the shift amount is being masked by a
2653 // redundant amount, then remove the mask. For example,
2654 // Turn this: Shl(@x, BitAnd(@y, 63))
2655 // Into this: Shl(@x, @y)
2656 unsigned mask = sizeofType(m_value->type()) * 8 - 1;
2657 if (m_value->child(1)->opcode() == BitAnd
2658 && m_value->child(1)->child(1)->hasInt32()
2659 && (m_value->child(1)->child(1)->asInt32() & mask) == mask) {
2660 m_value->child(1) = m_value->child(1)->child(0);
2661 m_changed = true;
2662 }
2663 }
2664
2665 void replaceIfRedundant()
2666 {
2667 m_changed |= m_pureCSE.process(m_value, *m_dominators);
2668 }
2669
2670 void simplifyCFG()
2671 {
2672 if (B3ReduceStrengthInternal::verbose) {
2673 dataLog("Before simplifyCFG:\n");
2674 dataLog(m_proc);
2675 }
2676
2677 // We have three easy simplification rules:
2678 //
2679 // 1) If a successor is a block that just jumps to another block, then jump directly to
2680 // that block.
2681 //
2682 // 2) If all successors are the same and the operation has no effects, then use a jump
2683 // instead.
2684 //
2685 // 3) If you jump to a block that is not you and has one predecessor, then merge.
2686 //
2687 // Note that because of the first rule, this phase may introduce critical edges. That's fine.
2688 // If you need broken critical edges, then you have to break them yourself.
2689
2690 // Note that this relies on predecessors being at least conservatively correct. It's fine for
2691 // predecessors to mention a block that isn't actually a predecessor. It's *not* fine for a
2692 // predecessor to be omitted. We assert as much in the loop. In practice, we precisely preserve
2693 // predecessors during strength reduction since that minimizes the total number of fixpoint
2694 // iterations needed to kill a lot of code.
2695
2696 for (BasicBlock* block : m_proc.blocksInPostOrder()) {
2697 if (B3ReduceStrengthInternal::verbose)
2698 dataLog("Considering block ", *block, ":\n");
2699
2700 checkPredecessorValidity();
2701
2702 // We don't care about blocks that don't have successors.
2703 if (!block->numSuccessors())
2704 continue;
2705
2706 // First check if any of the successors of this block can be forwarded over.
2707 for (BasicBlock*& successor : block->successorBlocks()) {
2708 if (successor != block
2709 && successor->size() == 1
2710 && successor->last()->opcode() == Jump) {
2711 BasicBlock* newSuccessor = successor->successorBlock(0);
2712 if (newSuccessor != successor) {
2713 if (B3ReduceStrengthInternal::verbose) {
2714 dataLog(
2715 "Replacing ", pointerDump(block), "->", pointerDump(successor),
2716 " with ", pointerDump(block), "->", pointerDump(newSuccessor),
2717 "\n");
2718 }
2719 // Note that we do not do replacePredecessor() because the block we're
2720 // skipping will still have newSuccessor as its successor.
2721 newSuccessor->addPredecessor(block);
2722 successor = newSuccessor;
2723 m_changedCFG = true;
2724 }
2725 }
2726 }
2727
2728 // Now check if the block's terminal can be replaced with a jump.
2729 if (block->numSuccessors() > 1) {
2730 // The terminal must not have weird effects.
2731 Effects effects = block->last()->effects();
2732 effects.terminal = false;
2733 if (!effects.mustExecute()) {
2734 // All of the successors must be the same.
2735 bool allSame = true;
2736 BasicBlock* firstSuccessor = block->successorBlock(0);
2737 for (unsigned i = 1; i < block->numSuccessors(); ++i) {
2738 if (block->successorBlock(i) != firstSuccessor) {
2739 allSame = false;
2740 break;
2741 }
2742 }
2743 if (allSame) {
2744 if (B3ReduceStrengthInternal::verbose) {
2745 dataLog(
2746 "Changing ", pointerDump(block), "'s terminal to a Jump.\n");
2747 }
2748 block->last()->replaceWithJump(block, FrequentedBlock(firstSuccessor));
2749 m_changedCFG = true;
2750 }
2751 }
2752 }
2753
2754 // Finally handle jumps to a block with one predecessor.
2755 if (block->numSuccessors() == 1) {
2756 BasicBlock* successor = block->successorBlock(0);
2757 if (successor != block && successor->numPredecessors() == 1) {
2758 RELEASE_ASSERT(successor->predecessor(0) == block);
2759
2760 // We can merge the two blocks, because the predecessor only jumps to the successor
2761 // and the successor is only reachable from the predecessor.
2762
2763 // Remove the terminal.
2764 Value* value = block->values().takeLast();
2765 Origin jumpOrigin = value->origin();
2766 RELEASE_ASSERT(value->effects().terminal);
2767 m_proc.deleteValue(value);
2768
2769 // Append the full contents of the successor to the predecessor.
2770 block->values().appendVector(successor->values());
2771 block->successors() = successor->successors();
2772
2773 // Make sure that the successor has nothing left in it. Make sure that the block
2774 // has a terminal so that nobody chokes when they look at it.
2775 successor->values().shrink(0);
2776 successor->appendNew<Value>(m_proc, Oops, jumpOrigin);
2777 successor->clearSuccessors();
2778
2779 // Ensure that predecessors of block's new successors know what's up.
2780 for (BasicBlock* newSuccessor : block->successorBlocks())
2781 newSuccessor->replacePredecessor(successor, block);
2782
2783 if (B3ReduceStrengthInternal::verbose) {
2784 dataLog(
2785 "Merged ", pointerDump(block), "->", pointerDump(successor), "\n");
2786 }
2787
2788 m_changedCFG = true;
2789 }
2790 }
2791 }
2792
2793 if (m_changedCFG && B3ReduceStrengthInternal::verbose) {
2794 dataLog("B3 after simplifyCFG:\n");
2795 dataLog(m_proc);
2796 }
2797 }
2798
2799 void handleChangedCFGIfNecessary()
2800 {
2801 if (m_changedCFG) {
2802 m_proc.resetReachability();
2803 m_proc.invalidateCFG();
2804 m_dominators = nullptr; // Dominators are not valid anymore, and we don't need them yet.
2805 m_changed = true;
2806 }
2807 }
2808
2809 void checkPredecessorValidity()
2810 {
2811 if (!shouldValidateIRAtEachPhase())
2812 return;
2813
2814 for (BasicBlock* block : m_proc) {
2815 for (BasicBlock* successor : block->successorBlocks())
2816 RELEASE_ASSERT(successor->containsPredecessor(block));
2817 }
2818 }
2819
2820 void simplifySSA()
2821 {
2822 // This runs Aycock and Horspool's algorithm on our Phi functions [1]. For most CFG patterns,
2823 // this can take a suboptimal arrangement of Phi functions and make it optimal, as if you had
2824 // run Cytron, Ferrante, Rosen, Wegman, and Zadeck. It's only suboptimal for irreducible
2825 // CFGs. In practice, that doesn't matter, since we expect clients of B3 to run their own SSA
2826 // conversion before lowering to B3, and in the case of the DFG, that conversion uses Cytron
2827 // et al. In that context, this algorithm is intended to simplify Phi functions that were
2828 // made redundant by prior CFG simplification. But according to Aycock and Horspool's paper,
2829 // this algorithm is good enough that a B3 client could just give us maximal Phi's (i.e. Phi
2830 // for each variable at each basic block) and we will make them optimal.
2831 // [1] http://pages.cpsc.ucalgary.ca/~aycock/papers/ssa.ps
2832
2833 // Aycock and Horspool prescribe two rules that are to be run to fixpoint:
2834 //
2835 // 1) If all of the Phi's children are the same (i.e. it's one child referenced from one or
2836 // more Upsilons), then replace all uses of the Phi with the one child.
2837 //
2838 // 2) If all of the Phi's children are either the Phi itself or exactly one other child, then
2839 // replace all uses of the Phi with the one other child.
2840 //
2841 // Rule (2) subsumes rule (1), so we can just run (2). We only run one fixpoint iteration
2842 // here. This premise is that in common cases, this will only find optimization opportunities
2843 // as a result of CFG simplification and usually CFG simplification will only do one round
2844 // of block merging per ReduceStrength fixpoint iteration, so it's OK for this to only do one
2845 // round of Phi merging - since Phis are the value analogue of blocks.
2846
2847 PhiChildren phiChildren(m_proc);
2848
2849 for (Value* phi : phiChildren.phis()) {
2850 Value* otherChild = nullptr;
2851 bool ok = true;
2852 for (Value* child : phiChildren[phi].values()) {
2853 if (child == phi)
2854 continue;
2855 if (child == otherChild)
2856 continue;
2857 if (!otherChild) {
2858 otherChild = child;
2859 continue;
2860 }
2861 ok = false;
2862 break;
2863 }
2864 if (!ok)
2865 continue;
2866 if (!otherChild) {
2867 // Wow, this would be super weird. It probably won't happen, except that things could
2868 // get weird as a consequence of stepwise simplifications in the strength reduction
2869 // fixpoint.
2870 continue;
2871 }
2872
2873 // Turn the Phi into an Identity and turn the Upsilons into Nops.
2874 m_changed = true;
2875 for (Value* upsilon : phiChildren[phi])
2876 upsilon->replaceWithNop();
2877 phi->replaceWithIdentity(otherChild);
2878 }
2879 }
2880
2881 Procedure& m_proc;
2882 InsertionSet m_insertionSet;
2883 BlockInsertionSet m_blockInsertionSet;
2884 HashMap<ValueKey, Value*> m_valueForConstant;
2885 BasicBlock* m_root { nullptr };
2886 BasicBlock* m_block { nullptr };
2887 unsigned m_index { 0 };
2888 Value* m_value { nullptr };
2889 Dominators* m_dominators { nullptr };
2890 PureCSE m_pureCSE;
2891 bool m_changed { false };
2892 bool m_changedCFG { false };
2893};
2894
2895} // anonymous namespace
2896
2897bool reduceStrength(Procedure& proc)
2898{
2899 PhaseScope phaseScope(proc, "reduceStrength");
2900 ReduceStrength reduceStrength(proc);
2901 return reduceStrength.run();
2902}
2903
2904} } // namespace JSC::B3
2905
2906#endif // ENABLE(B3_JIT)
2907
2908