1/*
2 * Copyright (C) 2015-2017 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "config.h"
27#include "AirGenerate.h"
28
29#if ENABLE(B3_JIT)
30
31#include "AirAllocateRegistersAndStackAndGenerateCode.h"
32#include "AirAllocateRegistersAndStackByLinearScan.h"
33#include "AirAllocateRegistersByGraphColoring.h"
34#include "AirAllocateStackByGraphColoring.h"
35#include "AirCode.h"
36#include "AirEliminateDeadCode.h"
37#include "AirFixObviousSpills.h"
38#include "AirFixPartialRegisterStalls.h"
39#include "AirGenerationContext.h"
40#include "AirHandleCalleeSaves.h"
41#include "AirLiveness.h"
42#include "AirLogRegisterPressure.h"
43#include "AirLowerAfterRegAlloc.h"
44#include "AirLowerEntrySwitch.h"
45#include "AirLowerMacros.h"
46#include "AirLowerStackArgs.h"
47#include "AirOpcodeUtils.h"
48#include "AirOptimizeBlockOrder.h"
49#include "AirReportUsedRegisters.h"
50#include "AirSimplifyCFG.h"
51#include "AirStackAllocation.h"
52#include "AirTmpMap.h"
53#include "AirValidate.h"
54#include "B3Common.h"
55#include "B3Procedure.h"
56#include "B3TimingScope.h"
57#include "B3ValueInlines.h"
58#include "CCallHelpers.h"
59#include "DisallowMacroScratchRegisterUsage.h"
60#include "LinkBuffer.h"
61#include <wtf/IndexMap.h>
62
63namespace JSC { namespace B3 { namespace Air {
64
65void prepareForGeneration(Code& code)
66{
67 TimingScope timingScope("Air::prepareForGeneration");
68
69 // If we're doing super verbose dumping, the phase scope of any phase will already do a dump.
70 if (shouldDumpIR(AirMode) && !shouldDumpIRAtEachPhase(AirMode)) {
71 dataLog("Initial air:\n");
72 dataLog(code);
73 }
74
75 // We don't expect the incoming code to have predecessors computed.
76 code.resetReachability();
77
78 if (shouldValidateIR())
79 validate(code);
80
81 if (!code.optLevel()) {
82 lowerMacros(code);
83
84 // FIXME: The name of this phase doesn't make much sense in O0 since we do this before
85 // register allocation.
86 lowerAfterRegAlloc(code);
87
88 // Actually create entrypoints.
89 lowerEntrySwitch(code);
90
91 // This sorts the basic blocks in Code to achieve an ordering that maximizes the likelihood that a high
92 // frequency successor is also the fall-through target.
93 optimizeBlockOrder(code);
94
95 if (shouldValidateIR())
96 validate(code);
97
98 if (shouldDumpIR(AirMode)) {
99 dataLog("Air after ", code.lastPhaseName(), ", before generation:\n");
100 dataLog(code);
101 }
102
103 code.m_generateAndAllocateRegisters = std::make_unique<GenerateAndAllocateRegisters>(code);
104 code.m_generateAndAllocateRegisters->prepareForGeneration();
105
106 return;
107 }
108
109 simplifyCFG(code);
110
111 lowerMacros(code);
112
113 // This is where we run our optimizations and transformations.
114 // FIXME: Add Air optimizations.
115 // https://bugs.webkit.org/show_bug.cgi?id=150456
116
117 eliminateDeadCode(code);
118
119 if (code.optLevel() == 1) {
120 // When we're compiling quickly, we do register and stack allocation in one linear scan
121 // phase. It's fast because it computes liveness only once.
122 allocateRegistersAndStackByLinearScan(code);
123
124 if (Options::logAirRegisterPressure()) {
125 dataLog("Register pressure after register allocation:\n");
126 logRegisterPressure(code);
127 }
128
129 // We may still need to do post-allocation lowering. Doing it after both register and
130 // stack allocation is less optimal, but it works fine.
131 lowerAfterRegAlloc(code);
132 } else {
133 // NOTE: B3 -O2 generates code that runs 1.5x-2x faster than code generated by -O1.
134 // Most of this performance benefit is from -O2's graph coloring register allocation
135 // and stack allocation pipeline, which you see below.
136
137 // Register allocation for all the Tmps that do not have a corresponding machine
138 // register. After this phase, every Tmp has a reg.
139 allocateRegistersByGraphColoring(code);
140
141 if (Options::logAirRegisterPressure()) {
142 dataLog("Register pressure after register allocation:\n");
143 logRegisterPressure(code);
144 }
145
146 // This replaces uses of spill slots with registers or constants if possible. It
147 // does this by minimizing the amount that we perturb the already-chosen register
148 // allocation. It may extend the live ranges of registers though.
149 fixObviousSpills(code);
150
151 lowerAfterRegAlloc(code);
152
153 // This does first-fit allocation of stack slots using an interference graph plus a
154 // bunch of other optimizations.
155 allocateStackByGraphColoring(code);
156 }
157
158 // This turns all Stack and CallArg Args into Addr args that use the frame pointer.
159 lowerStackArgs(code);
160
161 // If we coalesced moves then we can unbreak critical edges. This is the main reason for this
162 // phase.
163 simplifyCFG(code);
164
165 // This is needed to satisfy a requirement of B3::StackmapValue. This also removes dead
166 // code. We can avoid running this when certain optimizations are disabled.
167 if (code.optLevel() >= 2 || code.needsUsedRegisters())
168 reportUsedRegisters(code);
169
170 // Attempt to remove false dependencies between instructions created by partial register changes.
171 // This must be executed as late as possible as it depends on the instructions order and register
172 // use. We _must_ run this after reportUsedRegisters(), since that kills variable assignments
173 // that seem dead. Luckily, this phase does not change register liveness, so that's OK.
174 fixPartialRegisterStalls(code);
175
176 // Actually create entrypoints.
177 lowerEntrySwitch(code);
178
179 // The control flow graph can be simplified further after we have lowered EntrySwitch.
180 simplifyCFG(code);
181
182 // This sorts the basic blocks in Code to achieve an ordering that maximizes the likelihood that a high
183 // frequency successor is also the fall-through target.
184 optimizeBlockOrder(code);
185
186 if (shouldValidateIR())
187 validate(code);
188
189 // Do a final dump of Air. Note that we have to do this even if we are doing per-phase dumping,
190 // since the final generation is not a phase.
191 if (shouldDumpIR(AirMode)) {
192 dataLog("Air after ", code.lastPhaseName(), ", before generation:\n");
193 dataLog(code);
194 }
195}
196
197static void generateWithAlreadyAllocatedRegisters(Code& code, CCallHelpers& jit)
198{
199 TimingScope timingScope("Air::generate");
200
201 DisallowMacroScratchRegisterUsage disallowScratch(jit);
202
203 // And now, we generate code.
204 GenerationContext context;
205 context.code = &code;
206 context.blockLabels.resize(code.size());
207 for (BasicBlock* block : code)
208 context.blockLabels[block] = Box<CCallHelpers::Label>::create();
209 IndexMap<BasicBlock*, CCallHelpers::JumpList> blockJumps(code.size());
210
211 auto link = [&] (CCallHelpers::Jump jump, BasicBlock* target) {
212 if (context.blockLabels[target]->isSet()) {
213 jump.linkTo(*context.blockLabels[target], &jit);
214 return;
215 }
216
217 blockJumps[target].append(jump);
218 };
219
220 PCToOriginMap& pcToOriginMap = code.proc().pcToOriginMap();
221 auto addItem = [&] (Inst& inst) {
222 if (!inst.origin) {
223 pcToOriginMap.appendItem(jit.labelIgnoringWatchpoints(), Origin());
224 return;
225 }
226 pcToOriginMap.appendItem(jit.labelIgnoringWatchpoints(), inst.origin->origin());
227 };
228
229 Disassembler* disassembler = code.disassembler();
230
231 for (BasicBlock* block : code) {
232 context.currentBlock = block;
233 context.indexInBlock = UINT_MAX;
234 blockJumps[block].link(&jit);
235 CCallHelpers::Label label = jit.label();
236 *context.blockLabels[block] = label;
237
238 if (disassembler)
239 disassembler->startBlock(block, jit);
240
241 if (Optional<unsigned> entrypointIndex = code.entrypointIndex(block)) {
242 ASSERT(code.isEntrypoint(block));
243
244 if (disassembler)
245 disassembler->startEntrypoint(jit);
246
247 code.prologueGeneratorForEntrypoint(*entrypointIndex)->run(jit, code);
248
249 if (disassembler)
250 disassembler->endEntrypoint(jit);
251 } else
252 ASSERT(!code.isEntrypoint(block));
253
254 ASSERT(block->size() >= 1);
255 for (unsigned i = 0; i < block->size() - 1; ++i) {
256 context.indexInBlock = i;
257 Inst& inst = block->at(i);
258 addItem(inst);
259 auto start = jit.labelIgnoringWatchpoints();
260 CCallHelpers::Jump jump = inst.generate(jit, context);
261 ASSERT_UNUSED(jump, !jump.isSet());
262 auto end = jit.labelIgnoringWatchpoints();
263 if (disassembler)
264 disassembler->addInst(&inst, start, end);
265 }
266
267 context.indexInBlock = block->size() - 1;
268
269 if (block->last().kind.opcode == Jump
270 && block->successorBlock(0) == code.findNextBlock(block))
271 continue;
272
273 addItem(block->last());
274
275 if (isReturn(block->last().kind.opcode)) {
276 // We currently don't represent the full prologue/epilogue in Air, so we need to
277 // have this override.
278 auto start = jit.labelIgnoringWatchpoints();
279 if (code.frameSize()) {
280 jit.emitRestore(code.calleeSaveRegisterAtOffsetList());
281 jit.emitFunctionEpilogue();
282 } else
283 jit.emitFunctionEpilogueWithEmptyFrame();
284 jit.ret();
285 addItem(block->last());
286 auto end = jit.labelIgnoringWatchpoints();
287 if (disassembler)
288 disassembler->addInst(&block->last(), start, end);
289 continue;
290 }
291
292 auto start = jit.labelIgnoringWatchpoints();
293 CCallHelpers::Jump jump = block->last().generate(jit, context);
294 auto end = jit.labelIgnoringWatchpoints();
295 if (disassembler)
296 disassembler->addInst(&block->last(), start, end);
297
298 // The jump won't be set for patchpoints. It won't be set for Oops because then it won't have
299 // any successors.
300 if (jump.isSet()) {
301 switch (block->numSuccessors()) {
302 case 1:
303 link(jump, block->successorBlock(0));
304 break;
305 case 2:
306 link(jump, block->successorBlock(0));
307 if (block->successorBlock(1) != code.findNextBlock(block))
308 link(jit.jump(), block->successorBlock(1));
309 break;
310 default:
311 RELEASE_ASSERT_NOT_REACHED();
312 break;
313 }
314 }
315 addItem(block->last());
316 }
317
318 context.currentBlock = nullptr;
319 context.indexInBlock = UINT_MAX;
320
321 Vector<CCallHelpers::Label> entrypointLabels(code.numEntrypoints());
322 for (unsigned i = code.numEntrypoints(); i--;)
323 entrypointLabels[i] = *context.blockLabels[code.entrypoint(i).block()];
324 code.setEntrypointLabels(WTFMove(entrypointLabels));
325
326 pcToOriginMap.appendItem(jit.label(), Origin());
327 // FIXME: Make late paths have Origins: https://bugs.webkit.org/show_bug.cgi?id=153689
328 if (disassembler)
329 disassembler->startLatePath(jit);
330
331 for (auto& latePath : context.latePaths)
332 latePath->run(jit, context);
333
334 if (disassembler)
335 disassembler->endLatePath(jit);
336 pcToOriginMap.appendItem(jit.labelIgnoringWatchpoints(), Origin());
337}
338
339void generate(Code& code, CCallHelpers& jit)
340{
341 if (code.optLevel())
342 generateWithAlreadyAllocatedRegisters(code, jit);
343 else
344 code.m_generateAndAllocateRegisters->generate(jit);
345}
346
347} } } // namespace JSC::B3::Air
348
349#endif // ENABLE(B3_JIT)
350