1/*
2 * Copyright (C) 2016-2017 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "config.h"
27#include "AirEmitShuffle.h"
28
29#if ENABLE(B3_JIT)
30
31#include "AirCode.h"
32#include "AirInstInlines.h"
33#include <wtf/GraphNodeWorklist.h>
34#include <wtf/ListDump.h>
35
36namespace JSC { namespace B3 { namespace Air {
37
38namespace {
39
40namespace AirEmitShuffleInternal {
41static const bool verbose = false;
42}
43
44template<typename Functor>
45Tmp findPossibleScratch(Code& code, Bank bank, const Functor& functor) {
46 for (Reg reg : code.regsInPriorityOrder(bank)) {
47 Tmp tmp(reg);
48 if (functor(tmp))
49 return tmp;
50 }
51 return Tmp();
52}
53
54Tmp findPossibleScratch(Code& code, Bank bank, const Arg& arg1, const Arg& arg2) {
55 return findPossibleScratch(
56 code, bank,
57 [&] (Tmp tmp) -> bool {
58 return !arg1.usesTmp(tmp) && !arg2.usesTmp(tmp);
59 });
60}
61
62// Example: (a => b, b => a, a => c, b => d)
63struct Rotate {
64 Vector<ShufflePair> loop; // in the example, this is the loop: (a => b, b => a)
65 Vector<ShufflePair> fringe; // in the example, these are the associated shifts: (a => c, b => d)
66};
67
68} // anonymous namespace
69
70Bank ShufflePair::bank() const
71{
72 if (src().isMemory() && dst().isMemory() && width() > pointerWidth()) {
73 // 8-byte memory-to-memory moves on a 32-bit platform are best handled as float moves.
74 return FP;
75 }
76
77 if (src().isGP() && dst().isGP()) {
78 // This means that gpPairs gets memory-to-memory shuffles. The assumption is that we
79 // can do that more efficiently using GPRs, except in the special case above.
80 return GP;
81 }
82
83 return FP;
84}
85
86Vector<Inst, 2> ShufflePair::insts(Code& code, Value* origin) const
87{
88 if (UNLIKELY(src().isMemory() && dst().isMemory()))
89 return { Inst(moveFor(bank(), width()), origin, src(), dst(), code.newTmp(bank())) };
90
91 if (isValidForm(moveFor(bank(), width()), src().kind(), dst().kind()))
92 return { Inst(moveFor(bank(), width()), origin, src(), dst()) };
93
94 // We must be a store immediate or a move immediate if we reach here. The reason:
95 // 1. We're not a mem->mem move, given the above check.
96 // 2. It's always valid to do a load from Addr into a tmp using Move/Move32/MoveFloat/MoveDouble.
97 ASSERT(isValidForm(moveFor(bank(), width()), Arg::Addr, Arg::Tmp));
98 // 3. It's also always valid to do a Tmp->Tmp move.
99 ASSERT(isValidForm(moveFor(bank(), width()), Arg::Tmp, Arg::Tmp));
100 // 4. It's always valid to do a Tmp->Addr store.
101 ASSERT(isValidForm(moveFor(bank(), width()), Arg::Tmp, Arg::Addr));
102
103 ASSERT(src().isSomeImm());
104 Tmp tmp = code.newTmp(bank());
105 ASSERT(isValidForm(Move, Arg::BigImm, Arg::Tmp));
106 ASSERT(isValidForm(moveFor(bank(), width()), Arg::Tmp, dst().kind()));
107 return {
108 Inst(Move, origin, Arg::bigImm(src().value()), tmp),
109 Inst(moveFor(bank(), width()), origin, tmp, dst()),
110 };
111}
112
113void ShufflePair::dump(PrintStream& out) const
114{
115 out.print(width(), ":", src(), "=>", dst());
116}
117
118Inst createShuffle(Value* origin, const Vector<ShufflePair>& pairs)
119{
120 Inst result(Shuffle, origin);
121 for (const ShufflePair& pair : pairs)
122 result.append(pair.src(), pair.dst(), Arg::widthArg(pair.width()));
123 return result;
124}
125
126Vector<Inst> emitShuffle(
127 Code& code, Vector<ShufflePair> pairs, std::array<Arg, 2> scratches, Bank bank,
128 Value* origin)
129{
130 if (AirEmitShuffleInternal::verbose) {
131 dataLog(
132 "Dealing with pairs: ", listDump(pairs), " and scratches ", scratches[0], ", ",
133 scratches[1], "\n");
134 }
135
136 pairs.removeAllMatching(
137 [&] (const ShufflePair& pair) -> bool {
138 return pair.src() == pair.dst();
139 });
140
141 // First validate that this is the kind of shuffle that we know how to deal with.
142#if !ASSERT_DISABLED
143 for (const ShufflePair& pair : pairs) {
144 ASSERT(pair.src().isBank(bank));
145 ASSERT(pair.dst().isBank(bank));
146 ASSERT(pair.dst().isTmp() || pair.dst().isMemory());
147 }
148#endif // !ASSERT_DISABLED
149
150 // There are two possible kinds of operations that we will do:
151 //
152 // - Shift. Example: (a => b, b => c). We emit this as "Move b, c; Move a, b". This only requires
153 // scratch registers if there are memory->memory moves. We want to find as many of these as
154 // possible because they are cheaper. Note that shifts can involve the same source mentioned
155 // multiple times. Example: (a => b, a => c, b => d, b => e).
156 //
157 // - Rotate. Example: (a => b, b => a). We want to emit this as "Swap a, b", but that instruction
158 // may not be available, in which case we may need a scratch register or a scratch memory
159 // location. A gnarlier example is (a => b, b => c, c => a). We can emit this as "Swap b, c;
160 // Swap a, b". Note that swapping has to be careful about differing widths.
161 //
162 // Note that a rotate can have "fringe". For example, we might have (a => b, b => a, a =>c,
163 // b => d). This has a rotate loop (a => b, b => a) and some fringe (a => c, b => d). We treat
164 // the whole thing as a single rotate.
165 //
166 // We will find multiple disjoint such operations. We can execute them in any order.
167
168 // We interpret these as Moves that should be executed backwards. All shifts are keyed by their
169 // starting source.
170 HashMap<Arg, Vector<ShufflePair>> shifts;
171
172 // We interpret these as Swaps over src()'s that should be executed backwards, i.e. for a list
173 // of size 3 we would do "Swap list[1].src(), list[2].src(); Swap list[0].src(), list[1].src()".
174 // Note that we actually can't do that if the widths don't match or other bad things happen.
175 // But, prior to executing all of that, we need to execute the fringe: the shifts comming off the
176 // rotate.
177 Vector<Rotate> rotates;
178
179 {
180 HashMap<Arg, Vector<ShufflePair>> mapping;
181 for (const ShufflePair& pair : pairs)
182 mapping.add(pair.src(), Vector<ShufflePair>()).iterator->value.append(pair);
183
184 Vector<ShufflePair> currentPairs;
185
186 while (!mapping.isEmpty()) {
187 ASSERT(currentPairs.isEmpty());
188 Arg originalSrc = mapping.begin()->key;
189 ASSERT(!shifts.contains(originalSrc));
190 if (AirEmitShuffleInternal::verbose)
191 dataLog("Processing from ", originalSrc, "\n");
192
193 GraphNodeWorklist<Arg> worklist;
194 worklist.push(originalSrc);
195 while (Arg src = worklist.pop()) {
196 HashMap<Arg, Vector<ShufflePair>>::iterator iter = mapping.find(src);
197 if (iter == mapping.end()) {
198 // With a shift it's possible that we previously built the tail of this shift.
199 // See if that's the case now.
200 if (AirEmitShuffleInternal::verbose)
201 dataLog("Trying to append shift at ", src, "\n");
202 currentPairs.appendVector(shifts.take(src));
203 continue;
204 }
205 Vector<ShufflePair> pairs = WTFMove(iter->value);
206 mapping.remove(iter);
207
208 for (const ShufflePair& pair : pairs) {
209 currentPairs.append(pair);
210 ASSERT(pair.src() == src);
211 worklist.push(pair.dst());
212 }
213 }
214
215 ASSERT(currentPairs.size());
216 ASSERT(currentPairs[0].src() == originalSrc);
217
218 if (AirEmitShuffleInternal::verbose)
219 dataLog("currentPairs = ", listDump(currentPairs), "\n");
220
221 bool isRotate = false;
222 for (const ShufflePair& pair : currentPairs) {
223 if (pair.dst() == originalSrc) {
224 isRotate = true;
225 break;
226 }
227 }
228
229 if (isRotate) {
230 if (AirEmitShuffleInternal::verbose)
231 dataLog("It's a rotate.\n");
232 Rotate rotate;
233
234 // The common case is that the rotate does not have fringe. The only way to
235 // check for this is to examine the whole rotate.
236 bool ok;
237 if (currentPairs.last().dst() == originalSrc) {
238 ok = true;
239 for (unsigned i = currentPairs.size() - 1; i--;)
240 ok &= currentPairs[i].dst() == currentPairs[i + 1].src();
241 } else
242 ok = false;
243
244 if (ok)
245 rotate.loop = WTFMove(currentPairs);
246 else {
247 // This is the slow path. The rotate has fringe.
248
249 HashMap<Arg, ShufflePair> dstMapping;
250 for (const ShufflePair& pair : currentPairs)
251 dstMapping.add(pair.dst(), pair);
252
253 ShufflePair pair = dstMapping.take(originalSrc);
254 for (;;) {
255 rotate.loop.append(pair);
256
257 auto iter = dstMapping.find(pair.src());
258 if (iter == dstMapping.end())
259 break;
260 pair = iter->value;
261 dstMapping.remove(iter);
262 }
263
264 rotate.loop.reverse();
265
266 // Make sure that the fringe appears in the same order as how it appeared in the
267 // currentPairs, since that's the DFS order.
268 for (const ShufflePair& pair : currentPairs) {
269 // But of course we only include it if it's not in the loop.
270 if (dstMapping.contains(pair.dst()))
271 rotate.fringe.append(pair);
272 }
273 }
274
275 // If the graph search terminates because we returned to the first source, then the
276 // pair list has to have a very particular shape.
277 for (unsigned i = rotate.loop.size() - 1; i--;)
278 ASSERT(rotate.loop[i].dst() == rotate.loop[i + 1].src());
279 rotates.append(WTFMove(rotate));
280 currentPairs.shrink(0);
281 } else {
282 if (AirEmitShuffleInternal::verbose)
283 dataLog("It's a shift.\n");
284 shifts.add(originalSrc, WTFMove(currentPairs));
285 }
286 }
287 }
288
289 if (AirEmitShuffleInternal::verbose) {
290 dataLog("Shifts:\n");
291 for (auto& entry : shifts)
292 dataLog(" ", entry.key, ": ", listDump(entry.value), "\n");
293 dataLog("Rotates:\n");
294 for (auto& rotate : rotates)
295 dataLog(" loop = ", listDump(rotate.loop), ", fringe = ", listDump(rotate.fringe), "\n");
296 }
297
298 // In the worst case, we need two scratch registers. The way we do this is that the client passes
299 // us what scratch registers he happens to have laying around. We will need scratch registers in
300 // the following cases:
301 //
302 // - Shuffle pairs where both src and dst refer to memory.
303 // - Rotate when no Swap instruction is available.
304 //
305 // Lucky for us, we are guaranteed to have extra scratch registers anytime we have a Shift that
306 // ends with a register. We search for such a register right now.
307
308 auto moveForWidth = [&] (Width width) -> Opcode {
309 return moveFor(bank, width);
310 };
311
312 Opcode conservativeMove = moveForWidth(conservativeWidth(bank));
313
314 // We will emit things in reverse. We maintain a list of packs of instructions, and then we emit
315 // append them together in reverse (for example the thing at the end of resultPacks is placed
316 // first). This is useful because the last thing we emit frees up its destination registers, so
317 // it affects how we emit things before it.
318 Vector<Vector<Inst>> resultPacks;
319 Vector<Inst> result;
320
321 auto commitResult = [&] () {
322 resultPacks.append(WTFMove(result));
323 };
324
325 auto getScratch = [&] (unsigned index, Tmp possibleScratch) -> Tmp {
326 if (scratches[index].isTmp())
327 return scratches[index].tmp();
328
329 if (!possibleScratch)
330 return Tmp();
331 result.append(Inst(conservativeMove, origin, possibleScratch, scratches[index]));
332 return possibleScratch;
333 };
334
335 auto returnScratch = [&] (unsigned index, Tmp tmp) {
336 if (Arg(tmp) != scratches[index])
337 result.append(Inst(conservativeMove, origin, scratches[index], tmp));
338 };
339
340 auto handleShiftPair = [&] (const ShufflePair& pair, unsigned scratchIndex) {
341 Opcode move = moveForWidth(pair.width());
342
343 if (!isValidForm(move, pair.src().kind(), pair.dst().kind())) {
344 Tmp scratch =
345 getScratch(scratchIndex, findPossibleScratch(code, bank, pair.src(), pair.dst()));
346 RELEASE_ASSERT(scratch);
347 if (isValidForm(move, pair.src().kind(), Arg::Tmp))
348 result.append(Inst(moveForWidth(pair.width()), origin, pair.src(), scratch));
349 else {
350 ASSERT(pair.src().isSomeImm());
351 ASSERT(move == Move32);
352 result.append(Inst(Move, origin, Arg::bigImm(pair.src().value()), scratch));
353 }
354 result.append(Inst(moveForWidth(pair.width()), origin, scratch, pair.dst()));
355 returnScratch(scratchIndex, scratch);
356 return;
357 }
358
359 result.append(Inst(move, origin, pair.src(), pair.dst()));
360 };
361
362 auto handleShift = [&] (Vector<ShufflePair>& shift) {
363 // FIXME: We could optimize the spill behavior of the shifter by checking if any of the
364 // shifts need spills. If they do, then we could try to get a register out here. Note that
365 // this may fail where the current strategy succeeds: out here we need a register that does
366 // not interfere with any of the shifts, while the current strategy only needs to find a
367 // scratch register that does not interfer with a particular shift. So, this optimization
368 // will be opportunistic: if it succeeds, then the individual shifts can use that scratch,
369 // otherwise they will do what they do now.
370
371 for (unsigned i = shift.size(); i--;)
372 handleShiftPair(shift[i], 0);
373
374 Arg lastDst = shift.last().dst();
375 if (lastDst.isTmp()) {
376 for (Arg& scratch : scratches) {
377 ASSERT(scratch != lastDst);
378 if (!scratch.isTmp()) {
379 scratch = lastDst;
380 break;
381 }
382 }
383 }
384 };
385
386 // First handle shifts whose last destination is a tmp because these free up scratch registers.
387 // These end up last in the final sequence, so the final destination of these shifts will be
388 // available as a scratch location for anything emitted prior (so, after, since we're emitting in
389 // reverse).
390 for (auto& entry : shifts) {
391 Vector<ShufflePair>& shift = entry.value;
392 if (shift.last().dst().isTmp())
393 handleShift(shift);
394 commitResult();
395 }
396
397 // Now handle the rest of the shifts.
398 for (auto& entry : shifts) {
399 Vector<ShufflePair>& shift = entry.value;
400 if (!shift.last().dst().isTmp())
401 handleShift(shift);
402 commitResult();
403 }
404
405 for (Rotate& rotate : rotates) {
406 if (!rotate.fringe.isEmpty()) {
407 // Make sure we do the fringe first! This won't clobber any of the registers that are
408 // part of the rotation.
409 handleShift(rotate.fringe);
410 }
411
412 bool canSwap = false;
413 Opcode swap = Oops;
414 Width swapWidth = Width8; // bogus value
415
416 // Currently, the swap instruction is not available for floating point on any architecture we
417 // support.
418 if (bank == GP) {
419 // Figure out whether we will be doing 64-bit swaps or 32-bit swaps. If we have a mix of
420 // widths we handle that by fixing up the relevant register with zero-extends.
421 swap = Swap32;
422 swapWidth = Width32;
423 bool hasMemory = false;
424 bool hasIndex = false;
425 for (ShufflePair& pair : rotate.loop) {
426 switch (pair.width()) {
427 case Width32:
428 break;
429 case Width64:
430 swap = Swap64;
431 swapWidth = Width64;
432 break;
433 default:
434 RELEASE_ASSERT_NOT_REACHED();
435 break;
436 }
437
438 hasMemory |= pair.src().isMemory() || pair.dst().isMemory();
439 hasIndex |= pair.src().isIndex() || pair.dst().isIndex();
440 }
441
442 canSwap = isValidForm(swap, Arg::Tmp, Arg::Tmp);
443
444 // We can totally use swaps even if there are shuffles involving memory. But, we play it
445 // safe in that case. There are corner cases we don't handle, and our ability to do it is
446 // contingent upon swap form availability.
447
448 if (hasMemory) {
449 canSwap &= isValidForm(swap, Arg::Tmp, Arg::Addr);
450
451 // We don't take the swapping path if there is a mix of widths and some of the
452 // shuffles involve memory. That gets too confusing. We might be able to relax this
453 // to only bail if there are subwidth pairs involving memory, but I haven't thought
454 // about it very hard. Anyway, this case is not common: rotates involving memory
455 // don't arise for function calls, and they will only happen for rotates in user code
456 // if some of the variables get spilled. It's hard to imagine a program that rotates
457 // data around in variables while also doing a combination of uint32->uint64 and
458 // int64->int32 casts.
459 for (ShufflePair& pair : rotate.loop)
460 canSwap &= pair.width() == swapWidth;
461 }
462
463 if (hasIndex)
464 canSwap &= isValidForm(swap, Arg::Tmp, Arg::Index);
465 }
466
467 if (canSwap) {
468 for (unsigned i = rotate.loop.size() - 1; i--;) {
469 Arg left = rotate.loop[i].src();
470 Arg right = rotate.loop[i + 1].src();
471
472 if (left.isMemory() && right.isMemory()) {
473 // Note that this is a super rare outcome. Rotates are rare. Spills are rare.
474 // Moving data between two spills is rare. To get here a lot of rare stuff has to
475 // all happen at once.
476
477 Tmp scratch = getScratch(0, findPossibleScratch(code, bank, left, right));
478 RELEASE_ASSERT(scratch);
479 result.append(Inst(moveForWidth(swapWidth), origin, left, scratch));
480 result.append(Inst(swap, origin, scratch, right));
481 result.append(Inst(moveForWidth(swapWidth), origin, scratch, left));
482 returnScratch(0, scratch);
483 continue;
484 }
485
486 if (left.isMemory())
487 std::swap(left, right);
488
489 result.append(Inst(swap, origin, left, right));
490 }
491
492 for (ShufflePair pair : rotate.loop) {
493 if (pair.width() == swapWidth)
494 continue;
495
496 RELEASE_ASSERT(pair.width() == Width32);
497 RELEASE_ASSERT(swapWidth == Width64);
498 RELEASE_ASSERT(pair.dst().isTmp());
499
500 // Need to do an extra zero extension.
501 result.append(Inst(Move32, origin, pair.dst(), pair.dst()));
502 }
503 } else {
504 // We can treat this as a shift so long as we take the last destination (i.e. first
505 // source) and save it first. Then we handle the first entry in the pair in the rotate
506 // specially, after we restore the last destination. This requires some special care to
507 // find a scratch register. It's possible that we have a rotate that uses the entire
508 // available register file.
509
510 Tmp scratch = findPossibleScratch(
511 code, bank,
512 [&] (Tmp tmp) -> bool {
513 for (ShufflePair pair : rotate.loop) {
514 if (pair.src().usesTmp(tmp))
515 return false;
516 if (pair.dst().usesTmp(tmp))
517 return false;
518 }
519 return true;
520 });
521
522 // NOTE: This is the most likely use of scratch registers.
523 scratch = getScratch(0, scratch);
524
525 // We may not have found a scratch register. When this happens, we can just use the spill
526 // slot directly.
527 Arg rotateSave = scratch ? Arg(scratch) : scratches[0];
528
529 handleShiftPair(
530 ShufflePair(rotate.loop.last().dst(), rotateSave, rotate.loop[0].width()), 1);
531
532 for (unsigned i = rotate.loop.size(); i-- > 1;)
533 handleShiftPair(rotate.loop[i], 1);
534
535 handleShiftPair(
536 ShufflePair(rotateSave, rotate.loop[0].dst(), rotate.loop[0].width()), 1);
537
538 if (scratch)
539 returnScratch(0, scratch);
540 }
541
542 commitResult();
543 }
544
545 ASSERT(result.isEmpty());
546
547 for (unsigned i = resultPacks.size(); i--;)
548 result.appendVector(resultPacks[i]);
549
550 return result;
551}
552
553Vector<Inst> emitShuffle(
554 Code& code, const Vector<ShufflePair>& pairs,
555 const std::array<Arg, 2>& gpScratch, const std::array<Arg, 2>& fpScratch,
556 Value* origin)
557{
558 Vector<ShufflePair> gpPairs;
559 Vector<ShufflePair> fpPairs;
560 for (const ShufflePair& pair : pairs) {
561 switch (pair.bank()) {
562 case GP:
563 gpPairs.append(pair);
564 break;
565 case FP:
566 fpPairs.append(pair);
567 break;
568 }
569 }
570
571 Vector<Inst> result;
572 result.appendVector(emitShuffle(code, gpPairs, gpScratch, GP, origin));
573 result.appendVector(emitShuffle(code, fpPairs, fpScratch, FP, origin));
574 return result;
575}
576
577} } } // namespace JSC::B3::Air
578
579#endif // ENABLE(B3_JIT)
580
581