1/*
2 * Copyright (C) 2015-2017 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#pragma once
27
28#if ENABLE(B3_JIT)
29
30#include "B3Type.h"
31#include "B3Width.h"
32#include <wtf/Optional.h>
33#include <wtf/StdLibExtras.h>
34
35namespace JSC { namespace B3 {
36
37// Warning: In B3, an Opcode is just one part of a Kind. Kind is used the way that an opcode
38// would be used in simple IRs. See B3Kind.h.
39
40enum Opcode : uint8_t {
41 // A no-op that returns Void, useful for when you want to remove a value.
42 Nop,
43
44 // Polymorphic identity, usable with any value type.
45 Identity,
46
47 // This is an identity, but we prohibit the compiler from realizing this until the bitter end. This can
48 // be used to block reassociation and other compiler reasoning, if we find that it's wrong or
49 // unprofitable and we need an escape hatch.
50 Opaque,
51
52 // Constants. Use the ConstValue* classes. Constants exist in the control flow, so that we can
53 // reason about where we would construct them. Large constants are expensive to create.
54 Const32,
55 Const64,
56 ConstDouble,
57 ConstFloat,
58
59 // B3 supports non-SSA variables. These are accessed using Get and Set opcodes. Use the
60 // VariableValue class. It's a good idea to run fixSSA() to turn these into SSA. The
61 // optimizer will do that eventually, but if your input tends to use these opcodes, you
62 // should run fixSSA() directly before launching the optimizer.
63 Set,
64 Get,
65
66 // Gets the base address of a StackSlot.
67 SlotBase,
68
69 // The magical argument register. This is viewed as executing at the top of the program
70 // regardless of where in control flow you put it, and the compiler takes care to ensure that we
71 // don't clobber the value by register allocation or calls (either by saving the argument to the
72 // stack or preserving it in a callee-save register). Use the ArgumentRegValue class. The return
73 // type is either pointer() (for GPRs) or Double (for FPRs).
74 ArgumentReg,
75
76 // The frame pointer. You can put this anywhere in control flow but it will always yield the
77 // frame pointer, with a caveat: if our compiler changes the frame pointer temporarily for some
78 // silly reason, the FramePointer intrinsic will return where the frame pointer *should* be not
79 // where it happens to be right now.
80 FramePointer,
81
82 // Polymorphic math, usable with any value type.
83 Add,
84 Sub,
85 Mul,
86 Div, // All bets are off as to what will happen when you execute this for -2^31/-1 and x/0.
87 UDiv,
88 Mod, // All bets are off as to what will happen when you execute this for -2^31%-1 and x%0.
89 UMod,
90
91 // Polymorphic negation. Note that we only need this for floating point, since integer negation
92 // is exactly like Sub(0, x). But that's not true for floating point. Sub(0, 0) is 0, while
93 // Neg(0) is -0. Also, we canonicalize Sub(0, x) into Neg(x) in case of integers.
94 Neg,
95
96 // Integer math.
97 BitAnd,
98 BitOr,
99 BitXor,
100 Shl,
101 SShr, // Arithmetic Shift.
102 ZShr, // Logical Shift.
103 RotR, // Rotate Right.
104 RotL, // Rotate Left.
105 Clz, // Count leading zeros.
106
107 // Floating point math.
108 Abs,
109 Ceil,
110 Floor,
111 Sqrt,
112
113 // Casts and such.
114 // Bitwise Cast of Double->Int64 or Int64->Double
115 BitwiseCast,
116 // Takes and returns Int32:
117 SExt8,
118 SExt16,
119 // Takes Int32 and returns Int64:
120 SExt32,
121 ZExt32,
122 // Does a bitwise truncation of Int64->Int32 and Double->Float:
123 Trunc,
124 // Takes ints and returns floating point value. Note that we don't currently provide the opposite operation,
125 // because double-to-int conversions have weirdly different semantics on different platforms. Use
126 // a patchpoint if you need to do that.
127 IToD,
128 IToF,
129 // Convert between double and float.
130 FloatToDouble,
131 DoubleToFloat,
132
133 // Polymorphic comparisons, usable with any value type. Returns int32 0 or 1. Note that "Not"
134 // is just Equal(x, 0), and "ToBoolean" is just NotEqual(x, 0).
135 Equal,
136 NotEqual,
137 LessThan,
138 GreaterThan,
139 LessEqual,
140 GreaterEqual,
141
142 // Integer comparisons. Returns int32 0 or 1.
143 Above,
144 Below,
145 AboveEqual,
146 BelowEqual,
147
148 // Unordered floating point compare: values are equal or either one is NaN.
149 EqualOrUnordered,
150
151 // SSA form of conditional move. The first child is evaluated for truthiness. If true, the second child
152 // is returned. Otherwise, the third child is returned.
153 Select,
154
155 // Memory loads. Opcode indicates how we load and the loaded type. These use MemoryValue.
156 // These return Int32:
157 Load8Z,
158 Load8S,
159 Load16Z,
160 Load16S,
161 // This returns whatever the return type is:
162 Load,
163
164 // Memory stores. Opcode indicates how the value is stored. These use MemoryValue.
165 // These take an Int32 value:
166 Store8,
167 Store16,
168 // This is a polymorphic store for Int32, Int64, Float, and Double.
169 Store,
170
171 // Atomic compare and swap that returns a boolean. May choose to do nothing and return false. You can
172 // usually assume that this is faster and results in less code than AtomicStrongCAS, though that's
173 // not necessarily true on Intel, if instruction selection does its job. Imagine that this opcode is
174 // as if you did this atomically:
175 //
176 // template<typename T>
177 // bool AtomicWeakCAS(T expectedValue, T newValue, T* ptr)
178 // {
179 // if (!rand())
180 // return false; // Real world example of this: context switch on ARM while doing CAS.
181 // if (*ptr != expectedValue)
182 // return false;
183 // *ptr = newValue;
184 // return true;
185 // }
186 //
187 // Note that all atomics put the pointer last to be consistent with how loads and stores work. This
188 // is a goofy tradition, but it's harmless, and better than being inconsistent.
189 //
190 // Note that weak CAS has no fencing guarantees when it fails. This means that the following
191 // transformation is always valid:
192 //
193 // Before:
194 //
195 // Branch(AtomicWeakCAS(expected, new, ptr))
196 // Successors: Then:#success, Else:#fail
197 //
198 // After:
199 //
200 // Branch(Equal(Load(ptr), expected))
201 // Successors: Then:#attempt, Else:#fail
202 // BB#attempt:
203 // Branch(AtomicWeakCAS(expected, new, ptr))
204 // Successors: Then:#success, Else:#fail
205 //
206 // Both kinds of CAS for non-canonical widths (Width8 and Width16) ignore the irrelevant bits of the
207 // input.
208 AtomicWeakCAS,
209
210 // Atomic compare and swap that returns the old value. Does not have the nondeterminism of WeakCAS.
211 // This is a bit more code and a bit slower in some cases, though not by a lot. Imagine that this
212 // opcode is as if you did this atomically:
213 //
214 // template<typename T>
215 // T AtomicStrongCAS(T expectedValue, T newValue, T* ptr)
216 // {
217 // T oldValue = *ptr;
218 // if (oldValue == expectedValue)
219 // *ptr = newValue;
220 // return oldValue
221 // }
222 //
223 // AtomicStrongCAS sign-extends its result for subwidth operations.
224 //
225 // Note that AtomicWeakCAS and AtomicStrongCAS sort of have this kind of equivalence:
226 //
227 // AtomicWeakCAS(@exp, @new, @ptr) == Equal(AtomicStrongCAS(@exp, @new, @ptr), @exp)
228 //
229 // Assuming that the WeakCAS does not spuriously fail, of course.
230 AtomicStrongCAS,
231
232 // Atomically ___ a memory location and return the old value. Syntax:
233 //
234 // @oldValue = AtomicXchg___(@operand, @ptr)
235 //
236 // For non-canonical widths (Width8 and Width16), these return sign-extended results and ignore the
237 // irrelevant bits of their inputs.
238 AtomicXchgAdd,
239 AtomicXchgAnd,
240 AtomicXchgOr,
241 AtomicXchgSub,
242 AtomicXchgXor,
243
244 // FIXME: Maybe we should have AtomicXchgNeg.
245 // https://bugs.webkit.org/show_bug.cgi?id=169252
246
247 // Atomically exchange a value with a memory location. Syntax:
248 //
249 // @oldValue = AtomicXchg(@newValue, @ptr)
250 AtomicXchg,
251
252 // Introduce an invisible dependency for blocking motion of loads with respect to each other. Syntax:
253 //
254 // @result = Depend(@phantom)
255 //
256 // This is eventually codegenerated to have local semantics as if we did:
257 //
258 // @result = $0
259 //
260 // But it ensures that the users of @result cannot execute until @phantom is computed.
261 //
262 // The compiler is not allowed to reason about the fact that Depend codegenerates this way. Any kind
263 // of transformation or analysis that relies on the insight that Depend is really zero is unsound,
264 // because it unlocks reordering of users of @result and @phantom.
265 //
266 // On X86, this is lowered to a load-load fence and @result folds to zero.
267 //
268 // On ARM, this is lowered as if like:
269 //
270 // @result = BitXor(@phantom, @phantom)
271 //
272 // Except that the compiler never gets an opportunity to simplify out the BitXor.
273 Depend,
274
275 // This is used to compute the actual address of a Wasm memory operation. It takes an IntPtr
276 // and a pinned register then computes the appropriate IntPtr address. For the use-case of
277 // Wasm it is important that the first child initially be a ZExt32 so the top bits are cleared.
278 // We do WasmAddress(ZExt32(ptr), ...) so that we can avoid generating extraneous moves in Air.
279 WasmAddress,
280
281 // This is used to represent standalone fences - i.e. fences that are not part of other
282 // instructions. It's expressive enough to expose mfence on x86 and dmb ish/ishst on ARM. On
283 // x86, it also acts as a compiler store-store fence in those cases where it would have been a
284 // dmb ishst on ARM.
285 Fence,
286
287 // This is a regular ordinary C function call, using the system C calling convention. Make sure
288 // that the arguments are passed using the right types. The first argument is the callee.
289 CCall,
290
291 // This is a patchpoint. Use the PatchpointValue class. This is viewed as behaving like a call,
292 // but only emits code via a code generation callback. That callback gets to emit code inline.
293 // You can pass a stackmap along with constraints on how each stackmap argument must be passed.
294 // It's legal to request that a stackmap argument is in some register and it's legal to request
295 // that a stackmap argument is at some offset from the top of the argument passing area on the
296 // stack.
297 Patchpoint,
298
299 // This is a projection out of a tuple. Currently only Patchpoints, Get, and Phi can produce tuples.
300 // It's assumumed that each entry in a tuple has a fixed Numeric B3 Type (i.e. not Void or Tuple).
301 Extract,
302
303 // Checked math. Use the CheckValue class. Like a Patchpoint, this takes a code generation
304 // callback. That callback gets to emit some code after the epilogue, and gets to link the jump
305 // from the check, and the choice of registers. You also get to supply a stackmap. Note that you
306 // are not allowed to jump back into the mainline code from your slow path, since the compiler
307 // will assume that the execution of these instructions proves that overflow didn't happen. For
308 // example, if you have two CheckAdd's:
309 //
310 // a = CheckAdd(x, y)
311 // b = CheckAdd(x, y)
312 //
313 // Then it's valid to change this to:
314 //
315 // a = CheckAdd(x, y)
316 // b = Identity(a)
317 //
318 // This is valid regardless of the callbacks used by the two CheckAdds. They may have different
319 // callbacks. Yet, this transformation is valid even if they are different because we know that
320 // after the first CheckAdd executes, the second CheckAdd could not have possibly taken slow
321 // path. Therefore, the second CheckAdd's callback is irrelevant.
322 //
323 // Note that the first two children of these operations have ValueRep's as input constraints but do
324 // not have output constraints.
325 CheckAdd,
326 CheckSub,
327 CheckMul,
328
329 // Check that side-exits. Use the CheckValue class. Like CheckAdd and friends, this has a
330 // stackmap with a generation callback. This takes an int argument that this branches on, with
331 // full branch fusion in the instruction selector. A true value jumps to the generator's slow
332 // path. Note that the predicate child is has both an input ValueRep. The input constraint must be
333 // WarmAny. It will not have an output constraint.
334 Check,
335
336 // Special Wasm opcode that takes a Int32, a special pinned gpr and an offset. This node exists
337 // to allow us to CSE WasmBoundsChecks if both use the same pointer and one dominates the other.
338 // Without some such node B3 would not have enough information about the inner workings of wasm
339 // to be able to perform such optimizations.
340 WasmBoundsCheck,
341
342 // SSA support, in the style of DFG SSA.
343 Upsilon, // This uses the UpsilonValue class.
344 Phi,
345
346 // Jump.
347 Jump,
348
349 // Polymorphic branch, usable with any integer type. Branches if not equal to zero. The 0-index
350 // successor is the true successor.
351 Branch,
352
353 // Switch. Switches over either Int32 or Int64. Uses the SwitchValue class.
354 Switch,
355
356 // Multiple entrypoints are supported via the EntrySwitch operation. Place this in the root
357 // block and list the entrypoints as the successors. All blocks backwards-reachable from
358 // EntrySwitch are duplicated for each entrypoint.
359 EntrySwitch,
360
361 // Return. Note that B3 procedures don't know their return type, so this can just return any
362 // type.
363 Return,
364
365 // This is a terminal that indicates that we will never get here.
366 Oops
367};
368
369inline bool isCheckMath(Opcode opcode)
370{
371 switch (opcode) {
372 case CheckAdd:
373 case CheckSub:
374 case CheckMul:
375 return true;
376 default:
377 return false;
378 }
379}
380
381Optional<Opcode> invertedCompare(Opcode, Type);
382
383inline Opcode constPtrOpcode()
384{
385 if (is64Bit())
386 return Const64;
387 return Const32;
388}
389
390inline bool isConstant(Opcode opcode)
391{
392 switch (opcode) {
393 case Const32:
394 case Const64:
395 case ConstDouble:
396 case ConstFloat:
397 return true;
398 default:
399 return false;
400 }
401}
402
403inline Opcode opcodeForConstant(Type type)
404{
405 switch (type.kind()) {
406 case Int32: return Const32;
407 case Int64: return Const64;
408 case Float: return ConstFloat;
409 case Double: return ConstDouble;
410 default:
411 RELEASE_ASSERT_NOT_REACHED();
412 }
413}
414
415inline bool isDefinitelyTerminal(Opcode opcode)
416{
417 switch (opcode) {
418 case Jump:
419 case Branch:
420 case Switch:
421 case Oops:
422 case Return:
423 return true;
424 default:
425 return false;
426 }
427}
428
429inline bool isLoad(Opcode opcode)
430{
431 switch (opcode) {
432 case Load8Z:
433 case Load8S:
434 case Load16Z:
435 case Load16S:
436 case Load:
437 return true;
438 default:
439 return false;
440 }
441}
442
443inline bool isStore(Opcode opcode)
444{
445 switch (opcode) {
446 case Store8:
447 case Store16:
448 case Store:
449 return true;
450 default:
451 return false;
452 }
453}
454
455inline bool isLoadStore(Opcode opcode)
456{
457 switch (opcode) {
458 case Load8Z:
459 case Load8S:
460 case Load16Z:
461 case Load16S:
462 case Load:
463 case Store8:
464 case Store16:
465 case Store:
466 return true;
467 default:
468 return false;
469 }
470}
471
472inline bool isAtom(Opcode opcode)
473{
474 switch (opcode) {
475 case AtomicWeakCAS:
476 case AtomicStrongCAS:
477 case AtomicXchgAdd:
478 case AtomicXchgAnd:
479 case AtomicXchgOr:
480 case AtomicXchgSub:
481 case AtomicXchgXor:
482 case AtomicXchg:
483 return true;
484 default:
485 return false;
486 }
487}
488
489inline bool isAtomicCAS(Opcode opcode)
490{
491 switch (opcode) {
492 case AtomicWeakCAS:
493 case AtomicStrongCAS:
494 return true;
495 default:
496 return false;
497 }
498}
499
500inline bool isAtomicXchg(Opcode opcode)
501{
502 switch (opcode) {
503 case AtomicXchgAdd:
504 case AtomicXchgAnd:
505 case AtomicXchgOr:
506 case AtomicXchgSub:
507 case AtomicXchgXor:
508 case AtomicXchg:
509 return true;
510 default:
511 return false;
512 }
513}
514
515inline bool isMemoryAccess(Opcode opcode)
516{
517 return isAtom(opcode) || isLoadStore(opcode);
518}
519
520inline Opcode signExtendOpcode(Width width)
521{
522 switch (width) {
523 case Width8:
524 return SExt8;
525 case Width16:
526 return SExt16;
527 default:
528 RELEASE_ASSERT_NOT_REACHED();
529 return Oops;
530 }
531}
532
533JS_EXPORT_PRIVATE Opcode storeOpcode(Bank bank, Width width);
534
535} } // namespace JSC::B3
536
537namespace WTF {
538
539class PrintStream;
540
541JS_EXPORT_PRIVATE void printInternal(PrintStream&, JSC::B3::Opcode);
542
543} // namespace WTF
544
545#endif // ENABLE(B3_JIT)
546