1 | /* |
2 | * Copyright (C) 2015-2017 Apple Inc. All rights reserved. |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions |
6 | * are met: |
7 | * 1. Redistributions of source code must retain the above copyright |
8 | * notice, this list of conditions and the following disclaimer. |
9 | * 2. Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * |
13 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
14 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
15 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
16 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
17 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
18 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
19 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
20 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
21 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
22 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
23 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
24 | */ |
25 | |
26 | #include "config.h" |
27 | #include "AirGenerate.h" |
28 | |
29 | #if ENABLE(B3_JIT) |
30 | |
31 | #include "AirAllocateRegistersAndStackAndGenerateCode.h" |
32 | #include "AirAllocateRegistersAndStackByLinearScan.h" |
33 | #include "AirAllocateRegistersByGraphColoring.h" |
34 | #include "AirAllocateStackByGraphColoring.h" |
35 | #include "AirCode.h" |
36 | #include "AirEliminateDeadCode.h" |
37 | #include "AirFixObviousSpills.h" |
38 | #include "AirFixPartialRegisterStalls.h" |
39 | #include "AirGenerationContext.h" |
40 | #include "AirHandleCalleeSaves.h" |
41 | #include "AirLiveness.h" |
42 | #include "AirLogRegisterPressure.h" |
43 | #include "AirLowerAfterRegAlloc.h" |
44 | #include "AirLowerEntrySwitch.h" |
45 | #include "AirLowerMacros.h" |
46 | #include "AirLowerStackArgs.h" |
47 | #include "AirOpcodeUtils.h" |
48 | #include "AirOptimizeBlockOrder.h" |
49 | #include "AirReportUsedRegisters.h" |
50 | #include "AirSimplifyCFG.h" |
51 | #include "AirStackAllocation.h" |
52 | #include "AirTmpMap.h" |
53 | #include "AirValidate.h" |
54 | #include "B3Common.h" |
55 | #include "B3Procedure.h" |
56 | #include "B3TimingScope.h" |
57 | #include "B3ValueInlines.h" |
58 | #include "CCallHelpers.h" |
59 | #include "DisallowMacroScratchRegisterUsage.h" |
60 | #include "LinkBuffer.h" |
61 | #include <wtf/IndexMap.h> |
62 | |
63 | namespace JSC { namespace B3 { namespace Air { |
64 | |
65 | void prepareForGeneration(Code& code) |
66 | { |
67 | TimingScope timingScope("Air::prepareForGeneration" ); |
68 | |
69 | // If we're doing super verbose dumping, the phase scope of any phase will already do a dump. |
70 | if (shouldDumpIR(AirMode) && !shouldDumpIRAtEachPhase(AirMode)) { |
71 | dataLog("Initial air:\n" ); |
72 | dataLog(code); |
73 | } |
74 | |
75 | // We don't expect the incoming code to have predecessors computed. |
76 | code.resetReachability(); |
77 | |
78 | if (shouldValidateIR()) |
79 | validate(code); |
80 | |
81 | if (!code.optLevel()) { |
82 | lowerMacros(code); |
83 | |
84 | // FIXME: The name of this phase doesn't make much sense in O0 since we do this before |
85 | // register allocation. |
86 | lowerAfterRegAlloc(code); |
87 | |
88 | // Actually create entrypoints. |
89 | lowerEntrySwitch(code); |
90 | |
91 | // This sorts the basic blocks in Code to achieve an ordering that maximizes the likelihood that a high |
92 | // frequency successor is also the fall-through target. |
93 | optimizeBlockOrder(code); |
94 | |
95 | if (shouldValidateIR()) |
96 | validate(code); |
97 | |
98 | if (shouldDumpIR(AirMode)) { |
99 | dataLog("Air after " , code.lastPhaseName(), ", before generation:\n" ); |
100 | dataLog(code); |
101 | } |
102 | |
103 | code.m_generateAndAllocateRegisters = std::make_unique<GenerateAndAllocateRegisters>(code); |
104 | code.m_generateAndAllocateRegisters->prepareForGeneration(); |
105 | |
106 | return; |
107 | } |
108 | |
109 | simplifyCFG(code); |
110 | |
111 | lowerMacros(code); |
112 | |
113 | // This is where we run our optimizations and transformations. |
114 | // FIXME: Add Air optimizations. |
115 | // https://bugs.webkit.org/show_bug.cgi?id=150456 |
116 | |
117 | eliminateDeadCode(code); |
118 | |
119 | if (code.optLevel() == 1) { |
120 | // When we're compiling quickly, we do register and stack allocation in one linear scan |
121 | // phase. It's fast because it computes liveness only once. |
122 | allocateRegistersAndStackByLinearScan(code); |
123 | |
124 | if (Options::logAirRegisterPressure()) { |
125 | dataLog("Register pressure after register allocation:\n" ); |
126 | logRegisterPressure(code); |
127 | } |
128 | |
129 | // We may still need to do post-allocation lowering. Doing it after both register and |
130 | // stack allocation is less optimal, but it works fine. |
131 | lowerAfterRegAlloc(code); |
132 | } else { |
133 | // NOTE: B3 -O2 generates code that runs 1.5x-2x faster than code generated by -O1. |
134 | // Most of this performance benefit is from -O2's graph coloring register allocation |
135 | // and stack allocation pipeline, which you see below. |
136 | |
137 | // Register allocation for all the Tmps that do not have a corresponding machine |
138 | // register. After this phase, every Tmp has a reg. |
139 | allocateRegistersByGraphColoring(code); |
140 | |
141 | if (Options::logAirRegisterPressure()) { |
142 | dataLog("Register pressure after register allocation:\n" ); |
143 | logRegisterPressure(code); |
144 | } |
145 | |
146 | // This replaces uses of spill slots with registers or constants if possible. It |
147 | // does this by minimizing the amount that we perturb the already-chosen register |
148 | // allocation. It may extend the live ranges of registers though. |
149 | fixObviousSpills(code); |
150 | |
151 | lowerAfterRegAlloc(code); |
152 | |
153 | // This does first-fit allocation of stack slots using an interference graph plus a |
154 | // bunch of other optimizations. |
155 | allocateStackByGraphColoring(code); |
156 | } |
157 | |
158 | // This turns all Stack and CallArg Args into Addr args that use the frame pointer. |
159 | lowerStackArgs(code); |
160 | |
161 | // If we coalesced moves then we can unbreak critical edges. This is the main reason for this |
162 | // phase. |
163 | simplifyCFG(code); |
164 | |
165 | // This is needed to satisfy a requirement of B3::StackmapValue. This also removes dead |
166 | // code. We can avoid running this when certain optimizations are disabled. |
167 | if (code.optLevel() >= 2 || code.needsUsedRegisters()) |
168 | reportUsedRegisters(code); |
169 | |
170 | // Attempt to remove false dependencies between instructions created by partial register changes. |
171 | // This must be executed as late as possible as it depends on the instructions order and register |
172 | // use. We _must_ run this after reportUsedRegisters(), since that kills variable assignments |
173 | // that seem dead. Luckily, this phase does not change register liveness, so that's OK. |
174 | fixPartialRegisterStalls(code); |
175 | |
176 | // Actually create entrypoints. |
177 | lowerEntrySwitch(code); |
178 | |
179 | // The control flow graph can be simplified further after we have lowered EntrySwitch. |
180 | simplifyCFG(code); |
181 | |
182 | // This sorts the basic blocks in Code to achieve an ordering that maximizes the likelihood that a high |
183 | // frequency successor is also the fall-through target. |
184 | optimizeBlockOrder(code); |
185 | |
186 | if (shouldValidateIR()) |
187 | validate(code); |
188 | |
189 | // Do a final dump of Air. Note that we have to do this even if we are doing per-phase dumping, |
190 | // since the final generation is not a phase. |
191 | if (shouldDumpIR(AirMode)) { |
192 | dataLog("Air after " , code.lastPhaseName(), ", before generation:\n" ); |
193 | dataLog(code); |
194 | } |
195 | } |
196 | |
197 | static void generateWithAlreadyAllocatedRegisters(Code& code, CCallHelpers& jit) |
198 | { |
199 | TimingScope timingScope("Air::generate" ); |
200 | |
201 | DisallowMacroScratchRegisterUsage disallowScratch(jit); |
202 | |
203 | // And now, we generate code. |
204 | GenerationContext context; |
205 | context.code = &code; |
206 | context.blockLabels.resize(code.size()); |
207 | for (BasicBlock* block : code) |
208 | context.blockLabels[block] = Box<CCallHelpers::Label>::create(); |
209 | IndexMap<BasicBlock*, CCallHelpers::JumpList> blockJumps(code.size()); |
210 | |
211 | auto link = [&] (CCallHelpers::Jump jump, BasicBlock* target) { |
212 | if (context.blockLabels[target]->isSet()) { |
213 | jump.linkTo(*context.blockLabels[target], &jit); |
214 | return; |
215 | } |
216 | |
217 | blockJumps[target].append(jump); |
218 | }; |
219 | |
220 | PCToOriginMap& pcToOriginMap = code.proc().pcToOriginMap(); |
221 | auto addItem = [&] (Inst& inst) { |
222 | if (!inst.origin) { |
223 | pcToOriginMap.appendItem(jit.labelIgnoringWatchpoints(), Origin()); |
224 | return; |
225 | } |
226 | pcToOriginMap.appendItem(jit.labelIgnoringWatchpoints(), inst.origin->origin()); |
227 | }; |
228 | |
229 | Disassembler* disassembler = code.disassembler(); |
230 | |
231 | for (BasicBlock* block : code) { |
232 | context.currentBlock = block; |
233 | context.indexInBlock = UINT_MAX; |
234 | blockJumps[block].link(&jit); |
235 | CCallHelpers::Label label = jit.label(); |
236 | *context.blockLabels[block] = label; |
237 | |
238 | if (disassembler) |
239 | disassembler->startBlock(block, jit); |
240 | |
241 | if (Optional<unsigned> entrypointIndex = code.entrypointIndex(block)) { |
242 | ASSERT(code.isEntrypoint(block)); |
243 | |
244 | if (disassembler) |
245 | disassembler->startEntrypoint(jit); |
246 | |
247 | code.prologueGeneratorForEntrypoint(*entrypointIndex)->run(jit, code); |
248 | |
249 | if (disassembler) |
250 | disassembler->endEntrypoint(jit); |
251 | } else |
252 | ASSERT(!code.isEntrypoint(block)); |
253 | |
254 | ASSERT(block->size() >= 1); |
255 | for (unsigned i = 0; i < block->size() - 1; ++i) { |
256 | context.indexInBlock = i; |
257 | Inst& inst = block->at(i); |
258 | addItem(inst); |
259 | auto start = jit.labelIgnoringWatchpoints(); |
260 | CCallHelpers::Jump jump = inst.generate(jit, context); |
261 | ASSERT_UNUSED(jump, !jump.isSet()); |
262 | auto end = jit.labelIgnoringWatchpoints(); |
263 | if (disassembler) |
264 | disassembler->addInst(&inst, start, end); |
265 | } |
266 | |
267 | context.indexInBlock = block->size() - 1; |
268 | |
269 | if (block->last().kind.opcode == Jump |
270 | && block->successorBlock(0) == code.findNextBlock(block)) |
271 | continue; |
272 | |
273 | addItem(block->last()); |
274 | |
275 | if (isReturn(block->last().kind.opcode)) { |
276 | // We currently don't represent the full prologue/epilogue in Air, so we need to |
277 | // have this override. |
278 | auto start = jit.labelIgnoringWatchpoints(); |
279 | if (code.frameSize()) { |
280 | jit.emitRestore(code.calleeSaveRegisterAtOffsetList()); |
281 | jit.emitFunctionEpilogue(); |
282 | } else |
283 | jit.emitFunctionEpilogueWithEmptyFrame(); |
284 | jit.ret(); |
285 | addItem(block->last()); |
286 | auto end = jit.labelIgnoringWatchpoints(); |
287 | if (disassembler) |
288 | disassembler->addInst(&block->last(), start, end); |
289 | continue; |
290 | } |
291 | |
292 | auto start = jit.labelIgnoringWatchpoints(); |
293 | CCallHelpers::Jump jump = block->last().generate(jit, context); |
294 | auto end = jit.labelIgnoringWatchpoints(); |
295 | if (disassembler) |
296 | disassembler->addInst(&block->last(), start, end); |
297 | |
298 | // The jump won't be set for patchpoints. It won't be set for Oops because then it won't have |
299 | // any successors. |
300 | if (jump.isSet()) { |
301 | switch (block->numSuccessors()) { |
302 | case 1: |
303 | link(jump, block->successorBlock(0)); |
304 | break; |
305 | case 2: |
306 | link(jump, block->successorBlock(0)); |
307 | if (block->successorBlock(1) != code.findNextBlock(block)) |
308 | link(jit.jump(), block->successorBlock(1)); |
309 | break; |
310 | default: |
311 | RELEASE_ASSERT_NOT_REACHED(); |
312 | break; |
313 | } |
314 | } |
315 | addItem(block->last()); |
316 | } |
317 | |
318 | context.currentBlock = nullptr; |
319 | context.indexInBlock = UINT_MAX; |
320 | |
321 | Vector<CCallHelpers::Label> entrypointLabels(code.numEntrypoints()); |
322 | for (unsigned i = code.numEntrypoints(); i--;) |
323 | entrypointLabels[i] = *context.blockLabels[code.entrypoint(i).block()]; |
324 | code.setEntrypointLabels(WTFMove(entrypointLabels)); |
325 | |
326 | pcToOriginMap.appendItem(jit.label(), Origin()); |
327 | // FIXME: Make late paths have Origins: https://bugs.webkit.org/show_bug.cgi?id=153689 |
328 | if (disassembler) |
329 | disassembler->startLatePath(jit); |
330 | |
331 | for (auto& latePath : context.latePaths) |
332 | latePath->run(jit, context); |
333 | |
334 | if (disassembler) |
335 | disassembler->endLatePath(jit); |
336 | pcToOriginMap.appendItem(jit.labelIgnoringWatchpoints(), Origin()); |
337 | } |
338 | |
339 | void generate(Code& code, CCallHelpers& jit) |
340 | { |
341 | if (code.optLevel()) |
342 | generateWithAlreadyAllocatedRegisters(code, jit); |
343 | else |
344 | code.m_generateAndAllocateRegisters->generate(jit); |
345 | } |
346 | |
347 | } } } // namespace JSC::B3::Air |
348 | |
349 | #endif // ENABLE(B3_JIT) |
350 | |