1 | /* |
2 | * Copyright (C) 2015-2019 Apple Inc. All rights reserved. |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions |
6 | * are met: |
7 | * 1. Redistributions of source code must retain the above copyright |
8 | * notice, this list of conditions and the following disclaimer. |
9 | * 2. Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * |
13 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
14 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
15 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
16 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
17 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
18 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
19 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
20 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
21 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
22 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
23 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
24 | */ |
25 | |
26 | #include "config.h" |
27 | #include "B3LowerMacros.h" |
28 | |
29 | #if ENABLE(B3_JIT) |
30 | |
31 | #include "AllowMacroScratchRegisterUsage.h" |
32 | #include "B3AtomicValue.h" |
33 | #include "B3BasicBlockInlines.h" |
34 | #include "B3BlockInsertionSet.h" |
35 | #include "B3CCallValue.h" |
36 | #include "B3CaseCollectionInlines.h" |
37 | #include "B3ConstPtrValue.h" |
38 | #include "B3FenceValue.h" |
39 | #include "B3InsertionSetInlines.h" |
40 | #include "B3MemoryValueInlines.h" |
41 | #include "B3PatchpointValue.h" |
42 | #include "B3PhaseScope.h" |
43 | #include "B3ProcedureInlines.h" |
44 | #include "B3StackmapGenerationParams.h" |
45 | #include "B3SwitchValue.h" |
46 | #include "B3UpsilonValue.h" |
47 | #include "B3UseCounts.h" |
48 | #include "B3ValueInlines.h" |
49 | #include "CCallHelpers.h" |
50 | #include "LinkBuffer.h" |
51 | #include <cmath> |
52 | #include <wtf/BitVector.h> |
53 | |
54 | namespace JSC { namespace B3 { |
55 | |
56 | namespace { |
57 | |
58 | class LowerMacros { |
59 | public: |
60 | LowerMacros(Procedure& proc) |
61 | : m_proc(proc) |
62 | , m_blockInsertionSet(proc) |
63 | , m_insertionSet(proc) |
64 | , m_useCounts(proc) |
65 | { |
66 | } |
67 | |
68 | bool run() |
69 | { |
70 | RELEASE_ASSERT(!m_proc.hasQuirks()); |
71 | |
72 | for (BasicBlock* block : m_proc) { |
73 | m_block = block; |
74 | processCurrentBlock(); |
75 | } |
76 | m_changed |= m_blockInsertionSet.execute(); |
77 | if (m_changed) { |
78 | m_proc.resetReachability(); |
79 | m_proc.invalidateCFG(); |
80 | } |
81 | |
82 | // This indicates that we've |
83 | m_proc.setHasQuirks(true); |
84 | |
85 | return m_changed; |
86 | } |
87 | |
88 | private: |
89 | void processCurrentBlock() |
90 | { |
91 | for (m_index = 0; m_index < m_block->size(); ++m_index) { |
92 | m_value = m_block->at(m_index); |
93 | m_origin = m_value->origin(); |
94 | switch (m_value->opcode()) { |
95 | case Mod: { |
96 | if (m_value->isChill()) { |
97 | if (isARM64()) { |
98 | BasicBlock* before = m_blockInsertionSet.splitForward(m_block, m_index, &m_insertionSet); |
99 | BasicBlock* zeroDenCase = m_blockInsertionSet.insertBefore(m_block); |
100 | BasicBlock* normalModCase = m_blockInsertionSet.insertBefore(m_block); |
101 | |
102 | before->replaceLastWithNew<Value>(m_proc, Branch, m_origin, m_value->child(1)); |
103 | before->setSuccessors( |
104 | FrequentedBlock(normalModCase, FrequencyClass::Normal), |
105 | FrequentedBlock(zeroDenCase, FrequencyClass::Rare)); |
106 | |
107 | Value* divResult = normalModCase->appendNew<Value>(m_proc, chill(Div), m_origin, m_value->child(0), m_value->child(1)); |
108 | Value* multipliedBack = normalModCase->appendNew<Value>(m_proc, Mul, m_origin, divResult, m_value->child(1)); |
109 | Value* result = normalModCase->appendNew<Value>(m_proc, Sub, m_origin, m_value->child(0), multipliedBack); |
110 | UpsilonValue* normalResult = normalModCase->appendNew<UpsilonValue>(m_proc, m_origin, result); |
111 | normalModCase->appendNew<Value>(m_proc, Jump, m_origin); |
112 | normalModCase->setSuccessors(FrequentedBlock(m_block)); |
113 | |
114 | UpsilonValue* zeroResult = zeroDenCase->appendNew<UpsilonValue>( |
115 | m_proc, m_origin, |
116 | zeroDenCase->appendIntConstant(m_proc, m_value, 0)); |
117 | zeroDenCase->appendNew<Value>(m_proc, Jump, m_origin); |
118 | zeroDenCase->setSuccessors(FrequentedBlock(m_block)); |
119 | |
120 | Value* phi = m_insertionSet.insert<Value>(m_index, Phi, m_value->type(), m_origin); |
121 | normalResult->setPhi(phi); |
122 | zeroResult->setPhi(phi); |
123 | m_value->replaceWithIdentity(phi); |
124 | before->updatePredecessorsAfter(); |
125 | m_changed = true; |
126 | } else |
127 | makeDivisionChill(Mod); |
128 | break; |
129 | } |
130 | |
131 | auto* fmodDouble = tagCFunctionPtr<double (*)(double, double)>(fmod, B3CCallPtrTag); |
132 | if (m_value->type() == Double) { |
133 | Value* functionAddress = m_insertionSet.insert<ConstPtrValue>(m_index, m_origin, fmodDouble); |
134 | Value* result = m_insertionSet.insert<CCallValue>(m_index, Double, m_origin, |
135 | Effects::none(), |
136 | functionAddress, |
137 | m_value->child(0), |
138 | m_value->child(1)); |
139 | m_value->replaceWithIdentity(result); |
140 | m_changed = true; |
141 | } else if (m_value->type() == Float) { |
142 | Value* numeratorAsDouble = m_insertionSet.insert<Value>(m_index, FloatToDouble, m_origin, m_value->child(0)); |
143 | Value* denominatorAsDouble = m_insertionSet.insert<Value>(m_index, FloatToDouble, m_origin, m_value->child(1)); |
144 | Value* functionAddress = m_insertionSet.insert<ConstPtrValue>(m_index, m_origin, fmodDouble); |
145 | Value* doubleMod = m_insertionSet.insert<CCallValue>(m_index, Double, m_origin, |
146 | Effects::none(), |
147 | functionAddress, |
148 | numeratorAsDouble, |
149 | denominatorAsDouble); |
150 | Value* result = m_insertionSet.insert<Value>(m_index, DoubleToFloat, m_origin, doubleMod); |
151 | m_value->replaceWithIdentity(result); |
152 | m_changed = true; |
153 | } else if (isARM64()) { |
154 | Value* divResult = m_insertionSet.insert<Value>(m_index, chill(Div), m_origin, m_value->child(0), m_value->child(1)); |
155 | Value* multipliedBack = m_insertionSet.insert<Value>(m_index, Mul, m_origin, divResult, m_value->child(1)); |
156 | Value* result = m_insertionSet.insert<Value>(m_index, Sub, m_origin, m_value->child(0), multipliedBack); |
157 | m_value->replaceWithIdentity(result); |
158 | m_changed = true; |
159 | } |
160 | break; |
161 | } |
162 | |
163 | case UMod: { |
164 | if (isARM64()) { |
165 | Value* divResult = m_insertionSet.insert<Value>(m_index, UDiv, m_origin, m_value->child(0), m_value->child(1)); |
166 | Value* multipliedBack = m_insertionSet.insert<Value>(m_index, Mul, m_origin, divResult, m_value->child(1)); |
167 | Value* result = m_insertionSet.insert<Value>(m_index, Sub, m_origin, m_value->child(0), multipliedBack); |
168 | m_value->replaceWithIdentity(result); |
169 | m_changed = true; |
170 | } |
171 | break; |
172 | } |
173 | |
174 | case Div: { |
175 | if (m_value->isChill()) |
176 | makeDivisionChill(Div); |
177 | break; |
178 | } |
179 | |
180 | case CheckMul: { |
181 | if (isARM64() && m_value->child(0)->type() == Int32) { |
182 | CheckValue* checkMul = m_value->as<CheckValue>(); |
183 | |
184 | Value* left = m_insertionSet.insert<Value>(m_index, SExt32, m_origin, m_value->child(0)); |
185 | Value* right = m_insertionSet.insert<Value>(m_index, SExt32, m_origin, m_value->child(1)); |
186 | Value* mulResult = m_insertionSet.insert<Value>(m_index, Mul, m_origin, left, right); |
187 | Value* mulResult32 = m_insertionSet.insert<Value>(m_index, Trunc, m_origin, mulResult); |
188 | Value* upperResult = m_insertionSet.insert<Value>(m_index, Trunc, m_origin, |
189 | m_insertionSet.insert<Value>(m_index, SShr, m_origin, mulResult, m_insertionSet.insert<Const32Value>(m_index, m_origin, 32))); |
190 | Value* signBit = m_insertionSet.insert<Value>(m_index, SShr, m_origin, |
191 | mulResult32, |
192 | m_insertionSet.insert<Const32Value>(m_index, m_origin, 31)); |
193 | Value* hasOverflowed = m_insertionSet.insert<Value>(m_index, NotEqual, m_origin, upperResult, signBit); |
194 | |
195 | CheckValue* check = m_insertionSet.insert<CheckValue>(m_index, Check, m_origin, hasOverflowed); |
196 | check->setGenerator(checkMul->generator()); |
197 | check->clobberEarly(checkMul->earlyClobbered()); |
198 | check->clobberLate(checkMul->lateClobbered()); |
199 | auto children = checkMul->constrainedChildren(); |
200 | auto it = children.begin(); |
201 | for (std::advance(it, 2); it != children.end(); ++it) |
202 | check->append(*it); |
203 | |
204 | m_value->replaceWithIdentity(mulResult32); |
205 | m_changed = true; |
206 | } |
207 | break; |
208 | } |
209 | |
210 | case Switch: { |
211 | SwitchValue* switchValue = m_value->as<SwitchValue>(); |
212 | Vector<SwitchCase> cases; |
213 | for (SwitchCase switchCase : switchValue->cases(m_block)) |
214 | cases.append(switchCase); |
215 | std::sort( |
216 | cases.begin(), cases.end(), |
217 | [] (const SwitchCase& left, const SwitchCase& right) { |
218 | return left.caseValue() < right.caseValue(); |
219 | }); |
220 | FrequentedBlock fallThrough = m_block->fallThrough(); |
221 | m_block->values().removeLast(); |
222 | recursivelyBuildSwitch(cases, fallThrough, 0, false, cases.size(), m_block); |
223 | m_proc.deleteValue(switchValue); |
224 | m_block->updatePredecessorsAfter(); |
225 | m_changed = true; |
226 | break; |
227 | } |
228 | |
229 | case Depend: { |
230 | if (isX86()) { |
231 | // Create a load-load fence. This codegens to nothing on X86. We use it to tell the |
232 | // compiler not to block load motion. |
233 | FenceValue* fence = m_insertionSet.insert<FenceValue>(m_index, m_origin); |
234 | fence->read = HeapRange(); |
235 | fence->write = HeapRange::top(); |
236 | |
237 | // Kill the Depend, which should unlock a bunch of code simplification. |
238 | m_value->replaceWithBottom(m_insertionSet, m_index); |
239 | |
240 | m_changed = true; |
241 | } |
242 | break; |
243 | } |
244 | |
245 | case AtomicWeakCAS: |
246 | case AtomicStrongCAS: { |
247 | AtomicValue* atomic = m_value->as<AtomicValue>(); |
248 | Width width = atomic->accessWidth(); |
249 | |
250 | if (isCanonicalWidth(width)) |
251 | break; |
252 | |
253 | Value* expectedValue = atomic->child(0); |
254 | |
255 | if (!isX86()) { |
256 | // On ARM, the load part of the CAS does a load with zero extension. Therefore, we need |
257 | // to zero-extend the input. |
258 | Value* maskedExpectedValue = m_insertionSet.insert<Value>( |
259 | m_index, BitAnd, m_origin, expectedValue, |
260 | m_insertionSet.insertIntConstant(m_index, expectedValue, mask(width))); |
261 | |
262 | atomic->child(0) = maskedExpectedValue; |
263 | m_changed = true; |
264 | } |
265 | |
266 | if (atomic->opcode() == AtomicStrongCAS) { |
267 | Value* newValue = m_insertionSet.insert<Value>( |
268 | m_index, signExtendOpcode(width), m_origin, |
269 | m_insertionSet.insertClone(m_index, atomic)); |
270 | |
271 | atomic->replaceWithIdentity(newValue); |
272 | m_changed = true; |
273 | } |
274 | |
275 | break; |
276 | } |
277 | |
278 | case AtomicXchgAdd: |
279 | case AtomicXchgAnd: |
280 | case AtomicXchgOr: |
281 | case AtomicXchgSub: |
282 | case AtomicXchgXor: |
283 | case AtomicXchg: { |
284 | // On X86, these may actually return garbage in the high bits. On ARM64, these sorta |
285 | // zero-extend their high bits, except that the high bits might get polluted by high |
286 | // bits in the operand. So, either way, we need to throw a sign-extend on these |
287 | // things. |
288 | |
289 | if (isX86()) { |
290 | if (m_value->opcode() == AtomicXchgSub && m_useCounts.numUses(m_value)) { |
291 | // On x86, xchgadd is better than xchgsub if it has any users. |
292 | m_value->setOpcodeUnsafely(AtomicXchgAdd); |
293 | m_value->child(0) = m_insertionSet.insert<Value>( |
294 | m_index, Neg, m_origin, m_value->child(0)); |
295 | } |
296 | |
297 | bool exempt = false; |
298 | switch (m_value->opcode()) { |
299 | case AtomicXchgAnd: |
300 | case AtomicXchgOr: |
301 | case AtomicXchgSub: |
302 | case AtomicXchgXor: |
303 | exempt = true; |
304 | break; |
305 | default: |
306 | break; |
307 | } |
308 | if (exempt) |
309 | break; |
310 | } |
311 | |
312 | AtomicValue* atomic = m_value->as<AtomicValue>(); |
313 | Width width = atomic->accessWidth(); |
314 | |
315 | if (isCanonicalWidth(width)) |
316 | break; |
317 | |
318 | Value* newValue = m_insertionSet.insert<Value>( |
319 | m_index, signExtendOpcode(width), m_origin, |
320 | m_insertionSet.insertClone(m_index, atomic)); |
321 | |
322 | atomic->replaceWithIdentity(newValue); |
323 | m_changed = true; |
324 | break; |
325 | } |
326 | |
327 | case Load8Z: |
328 | case Load16Z: { |
329 | if (isX86()) |
330 | break; |
331 | |
332 | MemoryValue* memory = m_value->as<MemoryValue>(); |
333 | if (!memory->hasFence()) |
334 | break; |
335 | |
336 | // Sub-width load-acq on ARM64 always sign extends. |
337 | Value* newLoad = m_insertionSet.insertClone(m_index, memory); |
338 | newLoad->setOpcodeUnsafely(memory->opcode() == Load8Z ? Load8S : Load16S); |
339 | |
340 | Value* newValue = m_insertionSet.insert<Value>( |
341 | m_index, BitAnd, m_origin, newLoad, |
342 | m_insertionSet.insertIntConstant( |
343 | m_index, m_origin, Int32, mask(memory->accessWidth()))); |
344 | |
345 | m_value->replaceWithIdentity(newValue); |
346 | m_changed = true; |
347 | break; |
348 | } |
349 | |
350 | default: |
351 | break; |
352 | } |
353 | } |
354 | m_insertionSet.execute(m_block); |
355 | } |
356 | |
357 | void makeDivisionChill(Opcode nonChillOpcode) |
358 | { |
359 | ASSERT(nonChillOpcode == Div || nonChillOpcode == Mod); |
360 | |
361 | // ARM supports this instruction natively. |
362 | if (isARM64()) |
363 | return; |
364 | |
365 | // We implement "res = Div<Chill>/Mod<Chill>(num, den)" as follows: |
366 | // |
367 | // if (den + 1 <=_unsigned 1) { |
368 | // if (!den) { |
369 | // res = 0; |
370 | // goto done; |
371 | // } |
372 | // if (num == -2147483648) { |
373 | // res = isDiv ? num : 0; |
374 | // goto done; |
375 | // } |
376 | // } |
377 | // res = num (/ or %) dev; |
378 | // done: |
379 | m_changed = true; |
380 | |
381 | Value* num = m_value->child(0); |
382 | Value* den = m_value->child(1); |
383 | |
384 | Value* one = m_insertionSet.insertIntConstant(m_index, m_value, 1); |
385 | Value* isDenOK = m_insertionSet.insert<Value>( |
386 | m_index, Above, m_origin, |
387 | m_insertionSet.insert<Value>(m_index, Add, m_origin, den, one), |
388 | one); |
389 | |
390 | BasicBlock* before = m_blockInsertionSet.splitForward(m_block, m_index, &m_insertionSet); |
391 | |
392 | BasicBlock* normalDivCase = m_blockInsertionSet.insertBefore(m_block); |
393 | BasicBlock* shadyDenCase = m_blockInsertionSet.insertBefore(m_block); |
394 | BasicBlock* zeroDenCase = m_blockInsertionSet.insertBefore(m_block); |
395 | BasicBlock* neg1DenCase = m_blockInsertionSet.insertBefore(m_block); |
396 | BasicBlock* intMinCase = m_blockInsertionSet.insertBefore(m_block); |
397 | |
398 | before->replaceLastWithNew<Value>(m_proc, Branch, m_origin, isDenOK); |
399 | before->setSuccessors( |
400 | FrequentedBlock(normalDivCase, FrequencyClass::Normal), |
401 | FrequentedBlock(shadyDenCase, FrequencyClass::Rare)); |
402 | |
403 | UpsilonValue* normalResult = normalDivCase->appendNew<UpsilonValue>( |
404 | m_proc, m_origin, |
405 | normalDivCase->appendNew<Value>(m_proc, nonChillOpcode, m_origin, num, den)); |
406 | normalDivCase->appendNew<Value>(m_proc, Jump, m_origin); |
407 | normalDivCase->setSuccessors(FrequentedBlock(m_block)); |
408 | |
409 | shadyDenCase->appendNew<Value>(m_proc, Branch, m_origin, den); |
410 | shadyDenCase->setSuccessors( |
411 | FrequentedBlock(neg1DenCase, FrequencyClass::Normal), |
412 | FrequentedBlock(zeroDenCase, FrequencyClass::Rare)); |
413 | |
414 | UpsilonValue* zeroResult = zeroDenCase->appendNew<UpsilonValue>( |
415 | m_proc, m_origin, |
416 | zeroDenCase->appendIntConstant(m_proc, m_value, 0)); |
417 | zeroDenCase->appendNew<Value>(m_proc, Jump, m_origin); |
418 | zeroDenCase->setSuccessors(FrequentedBlock(m_block)); |
419 | |
420 | int64_t badNumeratorConst = 0; |
421 | switch (m_value->type().kind()) { |
422 | case Int32: |
423 | badNumeratorConst = std::numeric_limits<int32_t>::min(); |
424 | break; |
425 | case Int64: |
426 | badNumeratorConst = std::numeric_limits<int64_t>::min(); |
427 | break; |
428 | default: |
429 | ASSERT_NOT_REACHED(); |
430 | badNumeratorConst = 0; |
431 | } |
432 | |
433 | Value* badNumerator = |
434 | neg1DenCase->appendIntConstant(m_proc, m_value, badNumeratorConst); |
435 | |
436 | neg1DenCase->appendNew<Value>( |
437 | m_proc, Branch, m_origin, |
438 | neg1DenCase->appendNew<Value>( |
439 | m_proc, Equal, m_origin, num, badNumerator)); |
440 | neg1DenCase->setSuccessors( |
441 | FrequentedBlock(intMinCase, FrequencyClass::Rare), |
442 | FrequentedBlock(normalDivCase, FrequencyClass::Normal)); |
443 | |
444 | Value* intMinResult = nonChillOpcode == Div ? badNumerator : intMinCase->appendIntConstant(m_proc, m_value, 0); |
445 | UpsilonValue* intMinResultUpsilon = intMinCase->appendNew<UpsilonValue>( |
446 | m_proc, m_origin, intMinResult); |
447 | intMinCase->appendNew<Value>(m_proc, Jump, m_origin); |
448 | intMinCase->setSuccessors(FrequentedBlock(m_block)); |
449 | |
450 | Value* phi = m_insertionSet.insert<Value>( |
451 | m_index, Phi, m_value->type(), m_origin); |
452 | normalResult->setPhi(phi); |
453 | zeroResult->setPhi(phi); |
454 | intMinResultUpsilon->setPhi(phi); |
455 | |
456 | m_value->replaceWithIdentity(phi); |
457 | before->updatePredecessorsAfter(); |
458 | } |
459 | |
460 | void recursivelyBuildSwitch( |
461 | const Vector<SwitchCase>& cases, FrequentedBlock fallThrough, unsigned start, bool hardStart, |
462 | unsigned end, BasicBlock* before) |
463 | { |
464 | Value* child = m_value->child(0); |
465 | Type type = child->type(); |
466 | |
467 | // It's a good idea to use a table-based switch in some cases: the number of cases has to be |
468 | // large enough and they have to be dense enough. This could probably be improved a lot. For |
469 | // example, we could still use a jump table in cases where the inputs are sparse so long as we |
470 | // shift off the uninteresting bits. On the other hand, it's not clear that this would |
471 | // actually be any better than what we have done here and it's not clear that it would be |
472 | // better than a binary switch. |
473 | const unsigned minCasesForTable = 7; |
474 | const unsigned densityLimit = 4; |
475 | if (end - start >= minCasesForTable) { |
476 | int64_t firstValue = cases[start].caseValue(); |
477 | int64_t lastValue = cases[end - 1].caseValue(); |
478 | if ((lastValue - firstValue + 1) / (end - start) < densityLimit) { |
479 | BasicBlock* switchBlock = m_blockInsertionSet.insertAfter(m_block); |
480 | Value* index = before->appendNew<Value>( |
481 | m_proc, Sub, m_origin, child, |
482 | before->appendIntConstant(m_proc, m_origin, type, firstValue)); |
483 | before->appendNew<Value>( |
484 | m_proc, Branch, m_origin, |
485 | before->appendNew<Value>( |
486 | m_proc, Above, m_origin, index, |
487 | before->appendIntConstant(m_proc, m_origin, type, lastValue - firstValue))); |
488 | before->setSuccessors(fallThrough, FrequentedBlock(switchBlock)); |
489 | |
490 | size_t tableSize = lastValue - firstValue + 1; |
491 | |
492 | if (index->type() != pointerType() && index->type() == Int32) |
493 | index = switchBlock->appendNew<Value>(m_proc, ZExt32, m_origin, index); |
494 | |
495 | PatchpointValue* patchpoint = |
496 | switchBlock->appendNew<PatchpointValue>(m_proc, Void, m_origin); |
497 | |
498 | // Even though this loads from the jump table, the jump table is immutable. For the |
499 | // purpose of alias analysis, reading something immutable is like reading nothing. |
500 | patchpoint->effects = Effects(); |
501 | patchpoint->effects.terminal = true; |
502 | |
503 | patchpoint->appendSomeRegister(index); |
504 | patchpoint->numGPScratchRegisters = 2; |
505 | // Technically, we don't have to clobber macro registers on X86_64. This is probably |
506 | // OK though. |
507 | patchpoint->clobber(RegisterSet::macroScratchRegisters()); |
508 | |
509 | BitVector handledIndices; |
510 | for (unsigned i = start; i < end; ++i) { |
511 | FrequentedBlock block = cases[i].target(); |
512 | int64_t value = cases[i].caseValue(); |
513 | switchBlock->appendSuccessor(block); |
514 | size_t index = value - firstValue; |
515 | ASSERT(!handledIndices.get(index)); |
516 | handledIndices.set(index); |
517 | } |
518 | |
519 | bool hasUnhandledIndex = false; |
520 | for (unsigned i = 0; i < tableSize; ++i) { |
521 | if (!handledIndices.get(i)) { |
522 | hasUnhandledIndex = true; |
523 | break; |
524 | } |
525 | } |
526 | |
527 | if (hasUnhandledIndex) |
528 | switchBlock->appendSuccessor(fallThrough); |
529 | |
530 | patchpoint->setGenerator( |
531 | [=] (CCallHelpers& jit, const StackmapGenerationParams& params) { |
532 | AllowMacroScratchRegisterUsage allowScratch(jit); |
533 | |
534 | using JumpTableCodePtr = MacroAssemblerCodePtr<JSSwitchPtrTag>; |
535 | JumpTableCodePtr* jumpTable = static_cast<JumpTableCodePtr*>( |
536 | params.proc().addDataSection(sizeof(JumpTableCodePtr) * tableSize)); |
537 | |
538 | GPRReg index = params[0].gpr(); |
539 | GPRReg scratch = params.gpScratch(0); |
540 | |
541 | jit.move(CCallHelpers::TrustedImmPtr(jumpTable), scratch); |
542 | jit.load64(CCallHelpers::BaseIndex(scratch, index, CCallHelpers::timesPtr()), scratch); |
543 | jit.farJump(scratch, JSSwitchPtrTag); |
544 | |
545 | // These labels are guaranteed to be populated before either late paths or |
546 | // link tasks run. |
547 | Vector<Box<CCallHelpers::Label>> labels = params.successorLabels(); |
548 | |
549 | jit.addLinkTask( |
550 | [=] (LinkBuffer& linkBuffer) { |
551 | if (hasUnhandledIndex) { |
552 | JumpTableCodePtr fallThrough = linkBuffer.locationOf<JSSwitchPtrTag>(*labels.last()); |
553 | for (unsigned i = tableSize; i--;) |
554 | jumpTable[i] = fallThrough; |
555 | } |
556 | |
557 | unsigned labelIndex = 0; |
558 | for (unsigned tableIndex : handledIndices) |
559 | jumpTable[tableIndex] = linkBuffer.locationOf<JSSwitchPtrTag>(*labels[labelIndex++]); |
560 | }); |
561 | }); |
562 | return; |
563 | } |
564 | } |
565 | |
566 | // See comments in jit/BinarySwitch.cpp for a justification of this algorithm. The only |
567 | // thing we do differently is that we don't use randomness. |
568 | |
569 | const unsigned leafThreshold = 3; |
570 | |
571 | unsigned size = end - start; |
572 | |
573 | if (size <= leafThreshold) { |
574 | bool allConsecutive = false; |
575 | |
576 | if ((hardStart || (start && cases[start - 1].caseValue() == cases[start].caseValue() - 1)) |
577 | && end < cases.size() |
578 | && cases[end - 1].caseValue() == cases[end].caseValue() - 1) { |
579 | allConsecutive = true; |
580 | for (unsigned i = 0; i < size - 1; ++i) { |
581 | if (cases[start + i].caseValue() + 1 != cases[start + i + 1].caseValue()) { |
582 | allConsecutive = false; |
583 | break; |
584 | } |
585 | } |
586 | } |
587 | |
588 | unsigned limit = allConsecutive ? size - 1 : size; |
589 | |
590 | for (unsigned i = 0; i < limit; ++i) { |
591 | BasicBlock* nextCheck = m_blockInsertionSet.insertAfter(m_block); |
592 | before->appendNew<Value>( |
593 | m_proc, Branch, m_origin, |
594 | before->appendNew<Value>( |
595 | m_proc, Equal, m_origin, child, |
596 | before->appendIntConstant( |
597 | m_proc, m_origin, type, |
598 | cases[start + i].caseValue()))); |
599 | before->setSuccessors(cases[start + i].target(), FrequentedBlock(nextCheck)); |
600 | |
601 | before = nextCheck; |
602 | } |
603 | |
604 | before->appendNew<Value>(m_proc, Jump, m_origin); |
605 | if (allConsecutive) |
606 | before->setSuccessors(cases[end - 1].target()); |
607 | else |
608 | before->setSuccessors(fallThrough); |
609 | return; |
610 | } |
611 | |
612 | unsigned medianIndex = (start + end) / 2; |
613 | |
614 | BasicBlock* left = m_blockInsertionSet.insertAfter(m_block); |
615 | BasicBlock* right = m_blockInsertionSet.insertAfter(m_block); |
616 | |
617 | before->appendNew<Value>( |
618 | m_proc, Branch, m_origin, |
619 | before->appendNew<Value>( |
620 | m_proc, LessThan, m_origin, child, |
621 | before->appendIntConstant( |
622 | m_proc, m_origin, type, |
623 | cases[medianIndex].caseValue()))); |
624 | before->setSuccessors(FrequentedBlock(left), FrequentedBlock(right)); |
625 | |
626 | recursivelyBuildSwitch(cases, fallThrough, start, hardStart, medianIndex, left); |
627 | recursivelyBuildSwitch(cases, fallThrough, medianIndex, true, end, right); |
628 | } |
629 | |
630 | Procedure& m_proc; |
631 | BlockInsertionSet m_blockInsertionSet; |
632 | InsertionSet m_insertionSet; |
633 | UseCounts m_useCounts; |
634 | BasicBlock* m_block; |
635 | unsigned m_index; |
636 | Value* m_value; |
637 | Origin m_origin; |
638 | bool m_changed { false }; |
639 | }; |
640 | |
641 | } // anonymous namespace |
642 | |
643 | bool lowerMacros(Procedure& proc) |
644 | { |
645 | PhaseScope phaseScope(proc, "B3::lowerMacros" ); |
646 | LowerMacros lowerMacros(proc); |
647 | return lowerMacros.run(); |
648 | } |
649 | |
650 | } } // namespace JSC::B3 |
651 | |
652 | #endif // ENABLE(B3_JIT) |
653 | |
654 | |