1/*
2 * Copyright (C) 2015-2017 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "config.h"
27#include "AirOptimizeBlockOrder.h"
28
29#if ENABLE(B3_JIT)
30
31#include "AirBlockWorklist.h"
32#include "AirCode.h"
33#include "AirInstInlines.h"
34#include "AirPhaseScope.h"
35#include <wtf/BubbleSort.h>
36
37namespace JSC { namespace B3 { namespace Air {
38
39namespace {
40
41class SortedSuccessors {
42public:
43 SortedSuccessors()
44 {
45 }
46
47 void append(BasicBlock* block)
48 {
49 m_successors.append(block);
50 }
51
52 void process(BlockWorklist& worklist)
53 {
54 // We prefer a stable sort, and we don't want it to go off the rails if we see NaN. Also, the number
55 // of successors is bounded. In fact, it currently cannot be more than 2. :-)
56 bubbleSort(
57 m_successors.begin(), m_successors.end(),
58 [] (BasicBlock* left, BasicBlock* right) {
59 return left->frequency() < right->frequency();
60 });
61
62 // Pushing the successors in ascending order of frequency ensures that the very next block we visit
63 // is our highest-frequency successor (unless that successor has already been visited).
64 for (unsigned i = 0; i < m_successors.size(); ++i)
65 worklist.push(m_successors[i]);
66
67 m_successors.shrink(0);
68 }
69
70private:
71 Vector<BasicBlock*, 2> m_successors;
72};
73
74} // anonymous namespace
75
76Vector<BasicBlock*> blocksInOptimizedOrder(Code& code)
77{
78 Vector<BasicBlock*> blocksInOrder;
79
80 BlockWorklist fastWorklist;
81 SortedSuccessors sortedSuccessors;
82 SortedSuccessors sortedSlowSuccessors;
83
84 // We expect entrypoint lowering to have already happened.
85 RELEASE_ASSERT(code.numEntrypoints());
86
87 auto appendSuccessor = [&] (const FrequentedBlock& block) {
88 if (block.isRare())
89 sortedSlowSuccessors.append(block.block());
90 else
91 sortedSuccessors.append(block.block());
92 };
93
94 // For everything but the first entrypoint, we push them in order of frequency and frequency
95 // class.
96 for (unsigned i = 1; i < code.numEntrypoints(); ++i)
97 appendSuccessor(code.entrypoint(i));
98
99 // Always push the primary successor last so that it gets highest priority.
100 fastWorklist.push(code.entrypoint(0).block());
101
102 while (BasicBlock* block = fastWorklist.pop()) {
103 blocksInOrder.append(block);
104 for (FrequentedBlock& successor : block->successors())
105 appendSuccessor(successor);
106 sortedSuccessors.process(fastWorklist);
107 }
108
109 BlockWorklist slowWorklist;
110 sortedSlowSuccessors.process(slowWorklist);
111
112 while (BasicBlock* block = slowWorklist.pop()) {
113 // We might have already processed this block.
114 if (fastWorklist.saw(block))
115 continue;
116
117 blocksInOrder.append(block);
118 for (BasicBlock* successor : block->successorBlocks())
119 sortedSuccessors.append(successor);
120 sortedSuccessors.process(slowWorklist);
121 }
122
123 ASSERT(fastWorklist.isEmpty());
124 ASSERT(slowWorklist.isEmpty());
125
126 return blocksInOrder;
127}
128
129void optimizeBlockOrder(Code& code)
130{
131 PhaseScope phaseScope(code, "optimizeBlockOrder");
132
133 Vector<BasicBlock*> blocksInOrder = blocksInOptimizedOrder(code);
134
135 // Place blocks into Code's block list according to the ordering in blocksInOrder. We do this by leaking
136 // all of the blocks and then readopting them.
137 for (auto& entry : code.blockList())
138 entry.release();
139
140 code.blockList().shrink(0);
141
142 for (unsigned i = 0; i < blocksInOrder.size(); ++i) {
143 BasicBlock* block = blocksInOrder[i];
144 block->setIndex(i);
145 code.blockList().append(std::unique_ptr<BasicBlock>(block));
146 }
147
148 // Finally, flip any branches that we recognize. It's most optimal if the taken successor does not point
149 // at the next block.
150 for (BasicBlock* block : code) {
151 Inst& branch = block->last();
152
153 // It's somewhat tempting to just say that if the block has two successors and the first arg is
154 // invertible, then we can do the optimization. But that's wagging the dog. The fact that an
155 // instruction happens to have an argument that is invertible doesn't mean it's a branch, even though
156 // it is true that currently only branches have invertible arguments. It's also tempting to say that
157 // the /branch flag in AirOpcode.opcodes tells us that something is a branch - except that there,
158 // /branch also means Jump. The approach taken here means that if you add new branch instructions and
159 // forget about this phase, then at worst your new instructions won't opt into the inversion
160 // optimization. You'll probably realize that as soon as you look at the disassembly, and it
161 // certainly won't cause any correctness issues.
162
163 switch (branch.kind.opcode) {
164 case Branch8:
165 case Branch32:
166 case Branch64:
167 case BranchTest8:
168 case BranchTest32:
169 case BranchTest64:
170 case BranchFloat:
171 case BranchDouble:
172 case BranchAdd32:
173 case BranchAdd64:
174 case BranchMul32:
175 case BranchMul64:
176 case BranchSub32:
177 case BranchSub64:
178 case BranchNeg32:
179 case BranchNeg64:
180 case BranchAtomicStrongCAS8:
181 case BranchAtomicStrongCAS16:
182 case BranchAtomicStrongCAS32:
183 case BranchAtomicStrongCAS64:
184 if (code.findNextBlock(block) == block->successorBlock(0) && branch.args[0].isInvertible()) {
185 std::swap(block->successor(0), block->successor(1));
186 branch.args[0] = branch.args[0].inverted();
187 }
188 break;
189
190 default:
191 break;
192 }
193 }
194}
195
196} } } // namespace JSC::B3::Air
197
198#endif // ENABLE(B3_JIT)
199