1 | /* |
2 | * Copyright (C) 2019 Apple Inc. All rights reserved. |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions |
6 | * are met: |
7 | * 1. Redistributions of source code must retain the above copyright |
8 | * notice, this list of conditions and the following disclaimer. |
9 | * 2. Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * |
13 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
14 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
15 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
16 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
17 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
18 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
19 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
20 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
21 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
22 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
23 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
24 | */ |
25 | |
26 | #include "config.h" |
27 | #include "AirAllocateRegistersAndStackAndGenerateCode.h" |
28 | |
29 | #if ENABLE(B3_JIT) |
30 | |
31 | #include "AirArgInlines.h" |
32 | #include "AirBlockInsertionSet.h" |
33 | #include "AirCode.h" |
34 | #include "AirHandleCalleeSaves.h" |
35 | #include "AirLowerStackArgs.h" |
36 | #include "AirStackAllocation.h" |
37 | #include "AirTmpMap.h" |
38 | #include "CCallHelpers.h" |
39 | #include "DisallowMacroScratchRegisterUsage.h" |
40 | |
41 | namespace JSC { namespace B3 { namespace Air { |
42 | |
43 | GenerateAndAllocateRegisters::GenerateAndAllocateRegisters(Code& code) |
44 | : m_code(code) |
45 | , m_map(code) |
46 | { } |
47 | |
48 | ALWAYS_INLINE void GenerateAndAllocateRegisters::checkConsistency() |
49 | { |
50 | // This isn't exactly the right option for this but adding a new one for just this seems silly. |
51 | if (Options::validateGraph() || Options::validateGraphAtEachPhase()) { |
52 | m_code.forEachTmp([&] (Tmp tmp) { |
53 | Reg reg = m_map[tmp].reg; |
54 | if (!reg) |
55 | return; |
56 | |
57 | ASSERT(!m_availableRegs[tmp.bank()].contains(reg)); |
58 | ASSERT(m_currentAllocation->at(reg) == tmp); |
59 | }); |
60 | |
61 | for (Reg reg : RegisterSet::allRegisters()) { |
62 | if (isDisallowedRegister(reg)) |
63 | continue; |
64 | |
65 | Tmp tmp = m_currentAllocation->at(reg); |
66 | if (!tmp) { |
67 | ASSERT(m_availableRegs[bankForReg(reg)].contains(reg)); |
68 | continue; |
69 | } |
70 | |
71 | ASSERT(!m_availableRegs[tmp.bank()].contains(reg)); |
72 | ASSERT(m_map[tmp].reg == reg); |
73 | } |
74 | } |
75 | } |
76 | |
77 | void GenerateAndAllocateRegisters::buildLiveRanges(UnifiedTmpLiveness& liveness) |
78 | { |
79 | m_liveRangeEnd = TmpMap<size_t>(m_code, 0); |
80 | |
81 | m_globalInstIndex = 0; |
82 | for (BasicBlock* block : m_code) { |
83 | for (Tmp tmp : liveness.liveAtHead(block)) { |
84 | if (!tmp.isReg()) |
85 | m_liveRangeEnd[tmp] = m_globalInstIndex; |
86 | } |
87 | for (Inst& inst : *block) { |
88 | inst.forEachTmpFast([&] (Tmp tmp) { |
89 | if (!tmp.isReg()) |
90 | m_liveRangeEnd[tmp] = m_globalInstIndex; |
91 | }); |
92 | ++m_globalInstIndex; |
93 | } |
94 | for (Tmp tmp : liveness.liveAtTail(block)) { |
95 | if (!tmp.isReg()) |
96 | m_liveRangeEnd[tmp] = m_globalInstIndex; |
97 | } |
98 | } |
99 | } |
100 | |
101 | void GenerateAndAllocateRegisters::insertBlocksForFlushAfterTerminalPatchpoints() |
102 | { |
103 | BlockInsertionSet blockInsertionSet(m_code); |
104 | for (BasicBlock* block : m_code) { |
105 | Inst& inst = block->last(); |
106 | if (inst.kind.opcode != Patch) |
107 | continue; |
108 | |
109 | HashMap<Tmp, Arg*> needToDef; |
110 | |
111 | inst.forEachArg([&] (Arg& arg, Arg::Role role, Bank, Width) { |
112 | if (!arg.isTmp()) |
113 | return; |
114 | Tmp tmp = arg.tmp(); |
115 | if (Arg::isAnyDef(role) && !tmp.isReg()) |
116 | needToDef.add(tmp, &arg); |
117 | }); |
118 | |
119 | if (needToDef.isEmpty()) |
120 | continue; |
121 | |
122 | for (FrequentedBlock& frequentedSuccessor : block->successors()) { |
123 | BasicBlock* successor = frequentedSuccessor.block(); |
124 | BasicBlock* newBlock = blockInsertionSet.insertBefore(successor, successor->frequency()); |
125 | newBlock->appendInst(Inst(Jump, inst.origin)); |
126 | newBlock->setSuccessors(successor); |
127 | newBlock->addPredecessor(block); |
128 | frequentedSuccessor.block() = newBlock; |
129 | successor->replacePredecessor(block, newBlock); |
130 | |
131 | m_blocksAfterTerminalPatchForSpilling.add(newBlock, PatchSpillData { CCallHelpers::Jump(), CCallHelpers::Label(), needToDef }); |
132 | } |
133 | } |
134 | |
135 | blockInsertionSet.execute(); |
136 | } |
137 | |
138 | static ALWAYS_INLINE CCallHelpers::Address callFrameAddr(CCallHelpers& jit, intptr_t offsetFromFP) |
139 | { |
140 | if (isX86()) { |
141 | ASSERT(Arg::addr(Air::Tmp(GPRInfo::callFrameRegister), offsetFromFP).isValidForm(Width64)); |
142 | return CCallHelpers::Address(GPRInfo::callFrameRegister, offsetFromFP); |
143 | } |
144 | |
145 | ASSERT(pinnedExtendedOffsetAddrRegister()); |
146 | auto addr = Arg::addr(Air::Tmp(GPRInfo::callFrameRegister), offsetFromFP); |
147 | if (addr.isValidForm(Width64)) |
148 | return CCallHelpers::Address(GPRInfo::callFrameRegister, offsetFromFP); |
149 | GPRReg reg = *pinnedExtendedOffsetAddrRegister(); |
150 | jit.move(CCallHelpers::TrustedImmPtr(offsetFromFP), reg); |
151 | jit.add64(GPRInfo::callFrameRegister, reg); |
152 | return CCallHelpers::Address(reg); |
153 | } |
154 | |
155 | ALWAYS_INLINE void GenerateAndAllocateRegisters::release(Tmp tmp, Reg reg) |
156 | { |
157 | ASSERT(reg); |
158 | ASSERT(m_currentAllocation->at(reg) == tmp); |
159 | m_currentAllocation->at(reg) = Tmp(); |
160 | ASSERT(!m_availableRegs[tmp.bank()].contains(reg)); |
161 | m_availableRegs[tmp.bank()].set(reg); |
162 | ASSERT(m_map[tmp].reg == reg); |
163 | m_map[tmp].reg = Reg(); |
164 | } |
165 | |
166 | |
167 | ALWAYS_INLINE void GenerateAndAllocateRegisters::flush(Tmp tmp, Reg reg) |
168 | { |
169 | ASSERT(tmp); |
170 | intptr_t offset = m_map[tmp].spillSlot->offsetFromFP(); |
171 | if (tmp.isGP()) |
172 | m_jit->store64(reg.gpr(), callFrameAddr(*m_jit, offset)); |
173 | else |
174 | m_jit->storeDouble(reg.fpr(), callFrameAddr(*m_jit, offset)); |
175 | } |
176 | |
177 | ALWAYS_INLINE void GenerateAndAllocateRegisters::spill(Tmp tmp, Reg reg) |
178 | { |
179 | ASSERT(reg); |
180 | ASSERT(m_map[tmp].reg == reg); |
181 | flush(tmp, reg); |
182 | release(tmp, reg); |
183 | } |
184 | |
185 | ALWAYS_INLINE void GenerateAndAllocateRegisters::alloc(Tmp tmp, Reg reg, bool isDef) |
186 | { |
187 | if (Tmp occupyingTmp = m_currentAllocation->at(reg)) |
188 | spill(occupyingTmp, reg); |
189 | else { |
190 | ASSERT(!m_currentAllocation->at(reg)); |
191 | ASSERT(m_availableRegs[tmp.bank()].get(reg)); |
192 | } |
193 | |
194 | m_map[tmp].reg = reg; |
195 | m_availableRegs[tmp.bank()].clear(reg); |
196 | m_currentAllocation->at(reg) = tmp; |
197 | |
198 | if (!isDef) { |
199 | intptr_t offset = m_map[tmp].spillSlot->offsetFromFP(); |
200 | if (tmp.bank() == GP) |
201 | m_jit->load64(callFrameAddr(*m_jit, offset), reg.gpr()); |
202 | else |
203 | m_jit->loadDouble(callFrameAddr(*m_jit, offset), reg.fpr()); |
204 | } |
205 | } |
206 | |
207 | ALWAYS_INLINE void GenerateAndAllocateRegisters::freeDeadTmpsIfNeeded() |
208 | { |
209 | if (m_didAlreadyFreeDeadSlots) |
210 | return; |
211 | |
212 | m_didAlreadyFreeDeadSlots = true; |
213 | for (size_t i = 0; i < m_currentAllocation->size(); ++i) { |
214 | Tmp tmp = m_currentAllocation->at(i); |
215 | if (!tmp) |
216 | continue; |
217 | if (tmp.isReg()) |
218 | continue; |
219 | if (m_liveRangeEnd[tmp] >= m_globalInstIndex) |
220 | continue; |
221 | |
222 | release(tmp, Reg::fromIndex(i)); |
223 | } |
224 | } |
225 | |
226 | ALWAYS_INLINE bool GenerateAndAllocateRegisters::assignTmp(Tmp& tmp, Bank bank, bool isDef) |
227 | { |
228 | ASSERT(!tmp.isReg()); |
229 | if (Reg reg = m_map[tmp].reg) { |
230 | ASSERT(!m_namedDefdRegs.contains(reg)); |
231 | tmp = Tmp(reg); |
232 | m_namedUsedRegs.set(reg); |
233 | ASSERT(!m_availableRegs[bank].get(reg)); |
234 | return true; |
235 | } |
236 | |
237 | if (!m_availableRegs[bank].numberOfSetRegisters()) |
238 | freeDeadTmpsIfNeeded(); |
239 | |
240 | if (m_availableRegs[bank].numberOfSetRegisters()) { |
241 | // We first take an available register. |
242 | for (Reg reg : m_registers[bank]) { |
243 | if (m_namedUsedRegs.contains(reg) || m_namedDefdRegs.contains(reg)) |
244 | continue; |
245 | if (!m_availableRegs[bank].contains(reg)) |
246 | continue; |
247 | m_namedUsedRegs.set(reg); // At this point, it doesn't matter if we add it to the m_namedUsedRegs or m_namedDefdRegs. We just need to mark that we can't use it again. |
248 | alloc(tmp, reg, isDef); |
249 | tmp = Tmp(reg); |
250 | return true; |
251 | } |
252 | |
253 | RELEASE_ASSERT_NOT_REACHED(); |
254 | } |
255 | |
256 | // Nothing was available, let's make some room. |
257 | for (Reg reg : m_registers[bank]) { |
258 | if (m_namedUsedRegs.contains(reg) || m_namedDefdRegs.contains(reg)) |
259 | continue; |
260 | |
261 | m_namedUsedRegs.set(reg); |
262 | |
263 | alloc(tmp, reg, isDef); |
264 | tmp = Tmp(reg); |
265 | return true; |
266 | } |
267 | |
268 | // This can happen if we have a #WarmAnys > #Available registers |
269 | return false; |
270 | } |
271 | |
272 | ALWAYS_INLINE bool GenerateAndAllocateRegisters::isDisallowedRegister(Reg reg) |
273 | { |
274 | return !m_allowedRegisters.get(reg); |
275 | } |
276 | |
277 | void GenerateAndAllocateRegisters::prepareForGeneration() |
278 | { |
279 | // We pessimistically assume we use all callee saves. |
280 | handleCalleeSaves(m_code, RegisterSet::calleeSaveRegisters()); |
281 | allocateEscapedStackSlots(m_code); |
282 | |
283 | // Each Tmp gets its own stack slot. |
284 | auto createStackSlot = [&] (const Tmp& tmp) { |
285 | TmpData data; |
286 | data.spillSlot = m_code.addStackSlot(8, StackSlotKind::Spill); |
287 | data.reg = Reg(); |
288 | m_map[tmp] = data; |
289 | #if !ASSERT_DISABLED |
290 | m_allTmps[tmp.bank()].append(tmp); |
291 | #endif |
292 | }; |
293 | |
294 | m_code.forEachTmp([&] (Tmp tmp) { |
295 | ASSERT(!tmp.isReg()); |
296 | createStackSlot(tmp); |
297 | }); |
298 | |
299 | m_allowedRegisters = RegisterSet(); |
300 | |
301 | forEachBank([&] (Bank bank) { |
302 | m_registers[bank] = m_code.regsInPriorityOrder(bank); |
303 | |
304 | for (Reg reg : m_registers[bank]) { |
305 | m_allowedRegisters.set(reg); |
306 | createStackSlot(Tmp(reg)); |
307 | } |
308 | }); |
309 | |
310 | { |
311 | unsigned nextIndex = 0; |
312 | for (StackSlot* slot : m_code.stackSlots()) { |
313 | if (slot->isLocked()) |
314 | continue; |
315 | intptr_t offset = -static_cast<intptr_t>(m_code.frameSize()) - static_cast<intptr_t>(nextIndex) * 8 - 8; |
316 | ++nextIndex; |
317 | slot->setOffsetFromFP(offset); |
318 | } |
319 | } |
320 | |
321 | updateFrameSizeBasedOnStackSlots(m_code); |
322 | m_code.setStackIsAllocated(true); |
323 | |
324 | lowerStackArgs(m_code); |
325 | |
326 | // Verify none of these passes add any tmps. |
327 | #if !ASSERT_DISABLED |
328 | forEachBank([&] (Bank bank) { |
329 | ASSERT(m_allTmps[bank].size() - m_registers[bank].size() == m_code.numTmps(bank)); |
330 | }); |
331 | #endif |
332 | } |
333 | |
334 | void GenerateAndAllocateRegisters::generate(CCallHelpers& jit) |
335 | { |
336 | m_jit = &jit; |
337 | |
338 | TimingScope timingScope("Air::generateAndAllocateRegisters" ); |
339 | |
340 | insertBlocksForFlushAfterTerminalPatchpoints(); |
341 | |
342 | DisallowMacroScratchRegisterUsage disallowScratch(*m_jit); |
343 | |
344 | UnifiedTmpLiveness liveness(m_code); |
345 | buildLiveRanges(liveness); |
346 | |
347 | IndexMap<BasicBlock*, IndexMap<Reg, Tmp>> currentAllocationMap(m_code.size()); |
348 | { |
349 | IndexMap<Reg, Tmp> defaultCurrentAllocation(Reg::maxIndex() + 1); |
350 | for (BasicBlock* block : m_code) |
351 | currentAllocationMap[block] = defaultCurrentAllocation; |
352 | |
353 | // The only things live that are in registers at the root blocks are |
354 | // the explicitly named registers that are live. |
355 | |
356 | for (unsigned i = m_code.numEntrypoints(); i--;) { |
357 | BasicBlock* entrypoint = m_code.entrypoint(i).block(); |
358 | for (Tmp tmp : liveness.liveAtHead(entrypoint)) { |
359 | if (tmp.isReg()) |
360 | currentAllocationMap[entrypoint][tmp.reg()] = tmp; |
361 | } |
362 | } |
363 | } |
364 | |
365 | // And now, we generate code. |
366 | GenerationContext context; |
367 | context.code = &m_code; |
368 | context.blockLabels.resize(m_code.size()); |
369 | for (BasicBlock* block : m_code) |
370 | context.blockLabels[block] = Box<CCallHelpers::Label>::create(); |
371 | IndexMap<BasicBlock*, CCallHelpers::JumpList> blockJumps(m_code.size()); |
372 | |
373 | auto link = [&] (CCallHelpers::Jump jump, BasicBlock* target) { |
374 | if (context.blockLabels[target]->isSet()) { |
375 | jump.linkTo(*context.blockLabels[target], m_jit); |
376 | return; |
377 | } |
378 | |
379 | blockJumps[target].append(jump); |
380 | }; |
381 | |
382 | Disassembler* disassembler = m_code.disassembler(); |
383 | |
384 | m_globalInstIndex = 0; |
385 | |
386 | for (BasicBlock* block : m_code) { |
387 | context.currentBlock = block; |
388 | context.indexInBlock = UINT_MAX; |
389 | blockJumps[block].link(m_jit); |
390 | CCallHelpers::Label label = m_jit->label(); |
391 | *context.blockLabels[block] = label; |
392 | |
393 | if (disassembler) |
394 | disassembler->startBlock(block, *m_jit); |
395 | |
396 | if (Optional<unsigned> entrypointIndex = m_code.entrypointIndex(block)) { |
397 | ASSERT(m_code.isEntrypoint(block)); |
398 | if (disassembler) |
399 | disassembler->startEntrypoint(*m_jit); |
400 | |
401 | m_code.prologueGeneratorForEntrypoint(*entrypointIndex)->run(*m_jit, m_code); |
402 | |
403 | if (disassembler) |
404 | disassembler->endEntrypoint(*m_jit); |
405 | } else |
406 | ASSERT(!m_code.isEntrypoint(block)); |
407 | |
408 | auto startLabel = m_jit->labelIgnoringWatchpoints(); |
409 | |
410 | { |
411 | auto iter = m_blocksAfterTerminalPatchForSpilling.find(block); |
412 | if (iter != m_blocksAfterTerminalPatchForSpilling.end()) { |
413 | auto& data = iter->value; |
414 | data.jump = m_jit->jump(); |
415 | data.continueLabel = m_jit->label(); |
416 | } |
417 | } |
418 | |
419 | forEachBank([&] (Bank bank) { |
420 | #if !ASSERT_DISABLED |
421 | // By default, everything is spilled at block boundaries. We do this after we process each block |
422 | // so we don't have to walk all Tmps, since #Tmps >> #Available regs. Instead, we walk the register file at |
423 | // each block boundary and clear entries in this map. |
424 | for (Tmp tmp : m_allTmps[bank]) |
425 | ASSERT(m_map[tmp].reg == Reg()); |
426 | #endif |
427 | |
428 | RegisterSet availableRegisters; |
429 | for (Reg reg : m_registers[bank]) |
430 | availableRegisters.set(reg); |
431 | m_availableRegs[bank] = WTFMove(availableRegisters); |
432 | }); |
433 | |
434 | IndexMap<Reg, Tmp>& currentAllocation = currentAllocationMap[block]; |
435 | m_currentAllocation = ¤tAllocation; |
436 | |
437 | for (unsigned i = 0; i < currentAllocation.size(); ++i) { |
438 | Tmp tmp = currentAllocation[i]; |
439 | if (!tmp) |
440 | continue; |
441 | Reg reg = Reg::fromIndex(i); |
442 | m_map[tmp].reg = reg; |
443 | m_availableRegs[tmp.bank()].clear(reg); |
444 | } |
445 | |
446 | bool isReplayingSameInst = false; |
447 | for (size_t instIndex = 0; instIndex < block->size(); ++instIndex) { |
448 | checkConsistency(); |
449 | |
450 | if (instIndex && !isReplayingSameInst) |
451 | startLabel = m_jit->labelIgnoringWatchpoints(); |
452 | |
453 | context.indexInBlock = instIndex; |
454 | |
455 | Inst& inst = block->at(instIndex); |
456 | |
457 | m_didAlreadyFreeDeadSlots = false; |
458 | |
459 | m_namedUsedRegs = RegisterSet(); |
460 | m_namedDefdRegs = RegisterSet(); |
461 | |
462 | bool needsToGenerate = ([&] () -> bool { |
463 | // FIXME: We should consider trying to figure out if we can also elide Mov32s |
464 | if (!(inst.kind.opcode == Move || inst.kind.opcode == MoveDouble)) |
465 | return true; |
466 | |
467 | ASSERT(inst.args.size() >= 2); |
468 | Arg source = inst.args[0]; |
469 | Arg dest = inst.args[1]; |
470 | if (!source.isTmp() || !dest.isTmp()) |
471 | return true; |
472 | |
473 | // FIXME: We don't track where the last use of a reg is globally so we don't know where we can elide them. |
474 | ASSERT(source.isReg() || m_liveRangeEnd[source.tmp()] >= m_globalInstIndex); |
475 | if (source.isReg() || m_liveRangeEnd[source.tmp()] != m_globalInstIndex) |
476 | return true; |
477 | |
478 | // If we are doing a self move at the end of the temps liveness we can trivially elide the move. |
479 | if (source == dest) |
480 | return false; |
481 | |
482 | Reg sourceReg = m_map[source.tmp()].reg; |
483 | // If the value is not already materialized into a register we may still move it into one so let the normal generation code run. |
484 | if (!sourceReg) |
485 | return true; |
486 | |
487 | ASSERT(m_currentAllocation->at(sourceReg) == source.tmp()); |
488 | |
489 | if (dest.isReg() && dest.reg() != sourceReg) |
490 | return true; |
491 | |
492 | if (Reg oldReg = m_map[dest.tmp()].reg) |
493 | release(dest.tmp(), oldReg); |
494 | |
495 | m_map[dest.tmp()].reg = sourceReg; |
496 | m_currentAllocation->at(sourceReg) = dest.tmp(); |
497 | m_map[source.tmp()].reg = Reg(); |
498 | return false; |
499 | })(); |
500 | checkConsistency(); |
501 | |
502 | inst.forEachArg([&] (Arg& arg, Arg::Role role, Bank, Width) { |
503 | if (!arg.isTmp()) |
504 | return; |
505 | |
506 | Tmp tmp = arg.tmp(); |
507 | if (tmp.isReg() && isDisallowedRegister(tmp.reg())) |
508 | return; |
509 | |
510 | if (tmp.isReg()) { |
511 | if (Arg::isAnyUse(role)) |
512 | m_namedUsedRegs.set(tmp.reg()); |
513 | if (Arg::isAnyDef(role)) |
514 | m_namedDefdRegs.set(tmp.reg()); |
515 | } |
516 | |
517 | // We convert any cold uses that are already in the stack to just point to |
518 | // the canonical stack location. |
519 | if (!Arg::isColdUse(role)) |
520 | return; |
521 | |
522 | if (!inst.admitsStack(arg)) |
523 | return; |
524 | |
525 | auto& entry = m_map[tmp]; |
526 | if (!entry.reg) { |
527 | // We're a cold use, and our current location is already on the stack. Just use that. |
528 | arg = Arg::addr(Tmp(GPRInfo::callFrameRegister), entry.spillSlot->offsetFromFP()); |
529 | } |
530 | }); |
531 | |
532 | RegisterSet clobberedRegisters; |
533 | { |
534 | Inst* nextInst = block->get(instIndex + 1); |
535 | if (inst.kind.opcode == Patch || (nextInst && nextInst->kind.opcode == Patch)) { |
536 | if (inst.kind.opcode == Patch) |
537 | clobberedRegisters.merge(inst.extraClobberedRegs()); |
538 | if (nextInst && nextInst->kind.opcode == Patch) |
539 | clobberedRegisters.merge(nextInst->extraEarlyClobberedRegs()); |
540 | |
541 | clobberedRegisters.filter(m_allowedRegisters); |
542 | clobberedRegisters.exclude(m_namedDefdRegs); |
543 | |
544 | m_namedDefdRegs.merge(clobberedRegisters); |
545 | } |
546 | } |
547 | |
548 | auto allocNamed = [&] (const RegisterSet& named, bool isDef) { |
549 | for (Reg reg : named) { |
550 | if (Tmp occupyingTmp = currentAllocation[reg]) { |
551 | if (occupyingTmp == Tmp(reg)) |
552 | continue; |
553 | } |
554 | |
555 | freeDeadTmpsIfNeeded(); // We don't want to spill a dead tmp. |
556 | alloc(Tmp(reg), reg, isDef); |
557 | } |
558 | }; |
559 | |
560 | allocNamed(m_namedUsedRegs, false); // Must come before the defd registers since we may use and def the same register. |
561 | allocNamed(m_namedDefdRegs, true); |
562 | |
563 | if (needsToGenerate) { |
564 | auto tryAllocate = [&] { |
565 | Vector<Tmp*, 8> usesToAlloc; |
566 | Vector<Tmp*, 8> defsToAlloc; |
567 | |
568 | inst.forEachTmp([&] (Tmp& tmp, Arg::Role role, Bank, Width) { |
569 | if (tmp.isReg()) |
570 | return; |
571 | |
572 | // We treat Use+Def as a use. |
573 | if (Arg::isAnyUse(role)) |
574 | usesToAlloc.append(&tmp); |
575 | else if (Arg::isAnyDef(role)) |
576 | defsToAlloc.append(&tmp); |
577 | }); |
578 | |
579 | auto tryAllocateTmps = [&] (auto& vector, bool isDef) { |
580 | bool success = true; |
581 | for (Tmp* tmp : vector) |
582 | success &= assignTmp(*tmp, tmp->bank(), isDef); |
583 | return success; |
584 | }; |
585 | |
586 | // We first handle uses, then defs. We want to be able to tell the register allocator |
587 | // which tmps need to be loaded from memory into their assigned register. Those such |
588 | // tmps are uses. Defs don't need to be reloaded since we're defining them. However, |
589 | // some tmps may both be used and defd. So we handle uses first since forEachTmp could |
590 | // walk uses/defs in any order. |
591 | bool success = true; |
592 | success &= tryAllocateTmps(usesToAlloc, false); |
593 | success &= tryAllocateTmps(defsToAlloc, true); |
594 | return success; |
595 | }; |
596 | |
597 | // We first allocate trying to give any Tmp a register. If that makes us exhaust the |
598 | // available registers, we convert anything that accepts stack to be a stack addr |
599 | // instead. This can happen for programs Insts that take in many args, but most |
600 | // args can just be stack values. |
601 | bool success = tryAllocate(); |
602 | if (!success) { |
603 | RELEASE_ASSERT(!isReplayingSameInst); // We should only need to do the below at most once per inst. |
604 | |
605 | // We need to capture the register state before we start spilling things |
606 | // since we may have multiple arguments that are the same register. |
607 | IndexMap<Reg, Tmp> allocationSnapshot = currentAllocation; |
608 | |
609 | // We rewind this Inst to be in its previous state, however, if any arg admits stack, |
610 | // we move to providing that arg in stack form. This will allow us to fully allocate |
611 | // this inst when we rewind. |
612 | inst.forEachArg([&] (Arg& arg, Arg::Role, Bank, Width) { |
613 | if (!arg.isTmp()) |
614 | return; |
615 | |
616 | Tmp tmp = arg.tmp(); |
617 | if (tmp.isReg() && isDisallowedRegister(tmp.reg())) |
618 | return; |
619 | |
620 | if (tmp.isReg()) { |
621 | Tmp originalTmp = allocationSnapshot[tmp.reg()]; |
622 | if (originalTmp.isReg()) { |
623 | ASSERT(tmp.reg() == originalTmp.reg()); |
624 | // This means this Inst referred to this reg directly. We leave these as is. |
625 | return; |
626 | } |
627 | tmp = originalTmp; |
628 | } |
629 | |
630 | if (!inst.admitsStack(arg)) { |
631 | arg = tmp; |
632 | return; |
633 | } |
634 | |
635 | auto& entry = m_map[tmp]; |
636 | if (Reg reg = entry.reg) |
637 | spill(tmp, reg); |
638 | |
639 | arg = Arg::addr(Tmp(GPRInfo::callFrameRegister), entry.spillSlot->offsetFromFP()); |
640 | }); |
641 | |
642 | --instIndex; |
643 | isReplayingSameInst = true; |
644 | continue; |
645 | } |
646 | |
647 | isReplayingSameInst = false; |
648 | } |
649 | |
650 | if (m_code.needsUsedRegisters() && inst.kind.opcode == Patch) { |
651 | freeDeadTmpsIfNeeded(); |
652 | RegisterSet registerSet; |
653 | for (size_t i = 0; i < currentAllocation.size(); ++i) { |
654 | if (currentAllocation[i]) |
655 | registerSet.set(Reg::fromIndex(i)); |
656 | } |
657 | inst.reportUsedRegisters(registerSet); |
658 | } |
659 | |
660 | if (inst.isTerminal() && block->numSuccessors()) { |
661 | // By default, we spill everything between block boundaries. However, we have a small |
662 | // heuristic to pass along register state. We should eventually make this better. |
663 | // What we do now is if we have a successor with a single predecessor (us), and we |
664 | // haven't yet generated code for it, we give it our register state. If all our successors |
665 | // can take on our register state, we don't flush at the end of this block. |
666 | |
667 | bool everySuccessorGetsOurRegisterState = true; |
668 | for (unsigned i = 0; i < block->numSuccessors(); ++i) { |
669 | BasicBlock* successor = block->successorBlock(i); |
670 | if (successor->numPredecessors() == 1 && !context.blockLabels[successor]->isSet()) |
671 | currentAllocationMap[successor] = currentAllocation; |
672 | else |
673 | everySuccessorGetsOurRegisterState = false; |
674 | } |
675 | if (!everySuccessorGetsOurRegisterState) { |
676 | for (Tmp tmp : liveness.liveAtTail(block)) { |
677 | if (tmp.isReg() && isDisallowedRegister(tmp.reg())) |
678 | continue; |
679 | if (Reg reg = m_map[tmp].reg) |
680 | flush(tmp, reg); |
681 | } |
682 | } |
683 | } |
684 | |
685 | if (!inst.isTerminal()) { |
686 | CCallHelpers::Jump jump; |
687 | if (needsToGenerate) |
688 | jump = inst.generate(*m_jit, context); |
689 | ASSERT_UNUSED(jump, !jump.isSet()); |
690 | |
691 | for (Reg reg : clobberedRegisters) { |
692 | Tmp tmp(reg); |
693 | ASSERT(currentAllocation[reg] == tmp); |
694 | m_availableRegs[tmp.bank()].set(reg); |
695 | m_currentAllocation->at(reg) = Tmp(); |
696 | m_map[tmp].reg = Reg(); |
697 | } |
698 | } else { |
699 | ASSERT(needsToGenerate); |
700 | if (inst.kind.opcode == Jump && block->successorBlock(0) == m_code.findNextBlock(block)) |
701 | needsToGenerate = false; |
702 | |
703 | if (isReturn(inst.kind.opcode)) { |
704 | needsToGenerate = false; |
705 | |
706 | // We currently don't represent the full epilogue in Air, so we need to |
707 | // have this override. |
708 | if (m_code.frameSize()) { |
709 | m_jit->emitRestore(m_code.calleeSaveRegisterAtOffsetList()); |
710 | m_jit->emitFunctionEpilogue(); |
711 | } else |
712 | m_jit->emitFunctionEpilogueWithEmptyFrame(); |
713 | m_jit->ret(); |
714 | } |
715 | |
716 | if (needsToGenerate) { |
717 | CCallHelpers::Jump jump = inst.generate(*m_jit, context); |
718 | |
719 | // The jump won't be set for patchpoints. It won't be set for Oops because then it won't have |
720 | // any successors. |
721 | if (jump.isSet()) { |
722 | switch (block->numSuccessors()) { |
723 | case 1: |
724 | link(jump, block->successorBlock(0)); |
725 | break; |
726 | case 2: |
727 | link(jump, block->successorBlock(0)); |
728 | if (block->successorBlock(1) != m_code.findNextBlock(block)) |
729 | link(m_jit->jump(), block->successorBlock(1)); |
730 | break; |
731 | default: |
732 | RELEASE_ASSERT_NOT_REACHED(); |
733 | break; |
734 | } |
735 | } |
736 | } |
737 | } |
738 | |
739 | auto endLabel = m_jit->labelIgnoringWatchpoints(); |
740 | if (disassembler) |
741 | disassembler->addInst(&inst, startLabel, endLabel); |
742 | |
743 | ++m_globalInstIndex; |
744 | } |
745 | |
746 | // Registers usually get spilled at block boundaries. We do it this way since we don't |
747 | // want to iterate the entire TmpMap, since usually #Tmps >> #Regs. We may not actually spill |
748 | // all registers, but at the top of this loop we handle that case by pre-populating register |
749 | // state. Here, we just clear this map. After this loop, this map should contain only |
750 | // null entries. |
751 | for (size_t i = 0; i < currentAllocation.size(); ++i) { |
752 | if (Tmp tmp = currentAllocation[i]) |
753 | m_map[tmp].reg = Reg(); |
754 | } |
755 | } |
756 | |
757 | for (auto& entry : m_blocksAfterTerminalPatchForSpilling) { |
758 | entry.value.jump.linkTo(m_jit->label(), m_jit); |
759 | const HashMap<Tmp, Arg*>& spills = entry.value.defdTmps; |
760 | for (auto& entry : spills) { |
761 | Arg* arg = entry.value; |
762 | if (!arg->isTmp()) |
763 | continue; |
764 | Tmp originalTmp = entry.key; |
765 | Tmp currentTmp = arg->tmp(); |
766 | ASSERT_WITH_MESSAGE(currentTmp.isReg(), "We already did register allocation so we should have assigned this Tmp to a register." ); |
767 | flush(originalTmp, currentTmp.reg()); |
768 | } |
769 | m_jit->jump().linkTo(entry.value.continueLabel, m_jit); |
770 | } |
771 | |
772 | context.currentBlock = nullptr; |
773 | context.indexInBlock = UINT_MAX; |
774 | |
775 | Vector<CCallHelpers::Label> entrypointLabels(m_code.numEntrypoints()); |
776 | for (unsigned i = m_code.numEntrypoints(); i--;) |
777 | entrypointLabels[i] = *context.blockLabels[m_code.entrypoint(i).block()]; |
778 | m_code.setEntrypointLabels(WTFMove(entrypointLabels)); |
779 | |
780 | if (disassembler) |
781 | disassembler->startLatePath(*m_jit); |
782 | |
783 | // FIXME: Make late paths have Origins: https://bugs.webkit.org/show_bug.cgi?id=153689 |
784 | for (auto& latePath : context.latePaths) |
785 | latePath->run(*m_jit, context); |
786 | |
787 | if (disassembler) |
788 | disassembler->endLatePath(*m_jit); |
789 | } |
790 | |
791 | } } } // namespace JSC::B3::Air |
792 | |
793 | #endif // ENABLE(B3_JIT) |
794 | |