BinarySwitch.cpp source code [jsc/Source/JavaScriptCore/jit/BinarySwitch.cpp]

1	/*
2	* Copyright (C) 2013-2019 Apple Inc. All rights reserved.
3	*
4	* Redistribution and use in source and binary forms, with or without
5	* modification, are permitted provided that the following conditions
6	* are met:
7	* 1. Redistributions of source code must retain the above copyright
8	* notice, this list of conditions and the following disclaimer.
9	* 2. Redistributions in binary form must reproduce the above copyright
10	* notice, this list of conditions and the following disclaimer in the
11	* documentation and/or other materials provided with the distribution.
12	*
13	* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14	* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16	* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17	* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18	* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19	* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20	* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21	* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23	* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24	*/
25
26	#include "config.h"
27	#include "BinarySwitch.h"
28
29	#if ENABLE(JIT)
30
31	#include "JSCInlines.h"
32	#include <wtf/ListDump.h>
33
34	namespace JSC {
35
36	namespace BinarySwitchInternal {
37	static constexpr bool verbose = false;
38	}
39
40	static unsigned globalCounter; // We use a different seed every time we are invoked.
41
42	BinarySwitch::BinarySwitch(GPRReg value, const Vector<int64_t>& cases, Type type)
43	: m_type(type)
44	, m_value(value)
45	, m_weakRandom (globalCounter++)
46	, m_index(`0`)
47	, m_caseIndex(UINT_MAX)
48	{
49	if (cases.isEmpty())
50	return;
51
52	if (BinarySwitchInternal::verbose)
53	dataLog("Original cases: ", listDump(cases), "\n");
54
55	for (unsigned i = `0`; i < cases.size(); ++i)
56	m_cases.append(Case (cases [i], i));
57
58	std::sort(m_cases.begin(), m_cases.end());
59
60	if (BinarySwitchInternal::verbose)
61	dataLog("Sorted cases: ", listDump(m_cases), "\n");
62
63	#if !ASSERT_DISABLED
64	for (unsigned i = `1`; i < m_cases.size(); ++i)
65	ASSERT(m_cases[i - `1`] < m_cases[i], i, m_cases.size(), m_cases[i].value, m_cases[i].index);
66	#endif
67
68	build(`0`, false, m_cases.size());
69	}
70
71	BinarySwitch::~BinarySwitch()
72	{
73	}
74
75	bool BinarySwitch::advance(MacroAssembler& jit)
76	{
77	if (m_cases.isEmpty()) {
78	m_fallThrough.append(jit.jump());
79	return false;
80	}
81
82	if (m_index == m_branches.size()) {
83	RELEASE_ASSERT(m_jumpStack.isEmpty());
84	return false;
85	}
86
87	for (;;) {
88	const BranchCode& code = m_branches [m_index++];
89	switch (code.kind) {
90	case NotEqualToFallThrough:
91	switch (m_type) {
92	case Int32:
93	m_fallThrough.append(jit.branch32(
94	MacroAssembler::NotEqual, m_value,
95	MacroAssembler::Imm32 (static_cast<int32_t>(m_cases [code.index].value))));
96	break;
97	case IntPtr:
98	m_fallThrough.append(jit.branchPtr(
99	MacroAssembler::NotEqual, m_value,
100	MacroAssembler::ImmPtr (bitwise_cast<const void>(static_cast*<intptr_t>(m_cases [code.index].value)))));
101	break;
102	}
103	break;
104	case NotEqualToPush:
105	switch (m_type) {
106	case Int32:
107	m_jumpStack.append(jit.branch32(
108	MacroAssembler::NotEqual, m_value,
109	MacroAssembler::Imm32 (static_cast<int32_t>(m_cases [code.index].value))));
110	break;
111	case IntPtr:
112	m_jumpStack.append(jit.branchPtr(
113	MacroAssembler::NotEqual, m_value,
114	MacroAssembler::ImmPtr (bitwise_cast<const void>(static_cast*<intptr_t>(m_cases [code.index].value)))));
115	break;
116	}
117	break;
118	case LessThanToPush:
119	switch (m_type) {
120	case Int32:
121	m_jumpStack.append(jit.branch32(
122	MacroAssembler::LessThan, m_value,
123	MacroAssembler::Imm32 (static_cast<int32_t>(m_cases [code.index].value))));
124	break;
125	case IntPtr:
126	m_jumpStack.append(jit.branchPtr(
127	MacroAssembler::LessThan, m_value,
128	MacroAssembler::ImmPtr (bitwise_cast<const void>(static_cast*<intptr_t>(m_cases [code.index].value)))));
129	break;
130	}
131	break;
132	case Pop:
133	m_jumpStack.takeLast().link(&jit);
134	break;
135	case ExecuteCase:
136	m_caseIndex = code.index;
137	return true;
138	}
139	}
140	}
141
142	class RandomNumberGenerator {
143	public:
144	using result_type = uint32_t;
145
146	RandomNumberGenerator(WeakRandom& weakRandom)
147	: m_weakRandom(weakRandom)
148	{
149	}
150
151	uint32_t operator()()
152	{
153	return m_weakRandom.getUint32();
154	}
155
156	static constexpr uint32_t min() { return std::numeric_limits<uint32_t>::min(); }
157	static constexpr uint32_t max() { return std::numeric_limits<uint32_t>::max(); }
158
159	private:
160	WeakRandom& m_weakRandom;
161	};
162
163	void BinarySwitch::build(unsigned start, bool hardStart, unsigned end)
164	{
165	if (BinarySwitchInternal::verbose)
166	dataLog("Building with start = ", start, ", hardStart = ", hardStart, ", end = ", end, "\n");
167
168	auto append = [&] (const BranchCode& code) {
169	if (BinarySwitchInternal::verbose)
170	dataLog("==> ", code, "\n");
171	m_branches.append(code);
172	};
173
174	unsigned size = end - start;
175
176	RELEASE_ASSERT(size);
177
178	// This code uses some random numbers to keep things balanced. It's important to keep in mind
179	// that this does not improve average-case throughput under the assumption that all cases fire
180	// with equal probability. It just ensures that there will not be some switch structure that
181	// when combined with some input will always produce pathologically good or pathologically bad
182	// performance.
183
184	const unsigned leafThreshold = `3`;
185
186	if (size <= leafThreshold) {
187	if (BinarySwitchInternal::verbose)
188	dataLog("It's a leaf.\n");
189
190	// It turns out that for exactly three cases or less, it's better to just compare each
191	// case individually. This saves 1/6 of a branch on average, and up to 1/3 of a branch in
192	// extreme cases where the divide-and-conquer bottoms out in a lot of 3-case subswitches.
193	//
194	// This assumes that we care about the cost of hitting some case more than we care about
195	// bottoming out in a default case. I believe that in most places where we use switch
196	// statements, we are more likely to hit one of the cases than we are to fall through to
197	// default. Intuitively, if we wanted to improve the performance of default, we would
198	// reduce the value of leafThreshold to 2 or even to 1. See below for a deeper discussion.
199
200	bool allConsecutive = false;
201
202	if ((hardStart \|\| (start && m_cases [start - `1`].value == m_cases [start].value - `1`))
203	&& start + size < m_cases.size()
204	&& m_cases [start + size - `1`].value == m_cases [start + size].value - `1`) {
205	allConsecutive = true;
206	for (unsigned i = `0`; i < size - `1`; ++i) {
207	if (m_cases [start + i].value + `1` != m_cases [start + i + `1`].value) {
208	allConsecutive = false;
209	break;
210	}
211	}
212	}
213
214	if (BinarySwitchInternal::verbose)
215	dataLog("allConsecutive = ", allConsecutive, "\n");
216
217	Vector<unsigned, `3`> localCaseIndices;
218	for (unsigned i = `0`; i < size; ++i)
219	localCaseIndices.append(start + i);
220
221	std::shuffle(
222	localCaseIndices.begin(), localCaseIndices.end(),
223	RandomNumberGenerator (m_weakRandom));
224
225	for (unsigned i = `0`; i < size - `1`; ++i) {
226	append (BranchCode (NotEqualToPush, localCaseIndices [i]));
227	append (BranchCode (ExecuteCase, localCaseIndices [i]));
228	append (BranchCode (Pop));
229	}
230
231	if (!allConsecutive)
232	append (BranchCode (NotEqualToFallThrough, localCaseIndices.last()));
233
234	append (BranchCode (ExecuteCase, localCaseIndices.last()));
235	return;
236	}
237
238	if (BinarySwitchInternal::verbose)
239	dataLog("It's not a leaf.\n");
240
241	// There are two different strategies we could consider here:
242	//
243	// Isolate median and split: pick a median and check if the comparison value is equal to it;
244	// if so, execute the median case. Otherwise check if the value is less than the median, and
245	// recurse left or right based on this. This has two subvariants: we could either first test
246	// equality for the median and then do the less-than, or we could first do the less-than and
247	// then check equality on the not-less-than path.
248	//
249	// Ignore median and split: do a less-than comparison on a value that splits the cases in two
250	// equal-sized halves. Recurse left or right based on the comparison. Do not test for equality
251	// against the median (or anything else); let the recursion handle those equality comparisons
252	// once we bottom out in a list that case 3 cases or less (see above).
253	//
254	// I'll refer to these strategies as Isolate and Ignore. I initially believed that Isolate
255	// would be faster since it leads to less branching for some lucky cases. It turns out that
256	// Isolate is almost a total fail in the average, assuming all cases are equally likely. How
257	// bad Isolate is depends on whether you believe that doing two consecutive branches based on
258	// the same comparison is cheaper than doing the compare/branches separately. This is
259	// difficult to evaluate. For small immediates that aren't blinded, we just care about
260	// avoiding a second compare instruction. For large immediates or when blinding is in play, we
261	// also care about the instructions used to materialize the immediate a second time. Isolate
262	// can help with both costs since it involves first doing a < compare+branch on some value,
263	// followed by a == compare+branch on the same exact value (or vice-versa). Ignore will do a <
264	// compare+branch on some value, and then the == compare+branch on that same value will happen
265	// much later.
266	//
267	// To evaluate these costs, I wrote the recurrence relation for Isolate and Ignore, assuming
268	// that ComparisonCost is the cost of a compare+branch and ChainedComparisonCost is the cost
269	// of a compare+branch on some value that you've just done another compare+branch for. These
270	// recurrence relations compute the total cost incurred if you executed the switch statement
271	// on each matching value. So the average cost of hitting some case can be computed as
272	// Isolate[n]/n or Ignore[n]/n, respectively for the two relations.
273	//
274	// Isolate[1] = ComparisonCost
275	// Isolate[2] = (2 + 1) ComparisonCost*
276	// Isolate[3] = (3 + 2 + 1) ComparisonCost*
277	// Isolate[n_] := With[
278	// {medianIndex = Floor[n/2] + If[EvenQ[n], RandomInteger[], 1]},
279	// ComparisonCost + ChainedComparisonCost +
280	// (ComparisonCost (medianIndex - 1) + Isolate[medianIndex - 1]) +*
281	// (2 ComparisonCost * (n - medianIndex) + Isolate[n - medianIndex])]*
282	//
283	// Ignore[1] = ComparisonCost
284	// Ignore[2] = (2 + 1) ComparisonCost*
285	// Ignore[3] = (3 + 2 + 1) ComparisonCost*
286	// Ignore[n_] := With[
287	// {medianIndex = If[EvenQ[n], n/2, Floor[n/2] + RandomInteger[]]},
288	// (medianIndex ComparisonCost + Ignore[medianIndex]) +*
289	// ((n - medianIndex) ComparisonCost + Ignore[n - medianIndex])]*
290	//
291	// This does not account for the average cost of hitting the default case. See further below
292	// for a discussion of that.
293	//
294	// It turns out that for ComparisonCost = 1 and ChainedComparisonCost = 1, Ignore is always
295	// better than Isolate. If we assume that ChainedComparisonCost = 0, then Isolate wins for
296	// switch statements that have 20 cases or fewer, though the margin of victory is never large
297	// - it might sometimes save an average of 0.3 ComparisonCost. For larger switch statements,
298	// we see divergence between the two with Ignore winning. This is of course rather
299	// unrealistic since the chained comparison is never free. For ChainedComparisonCost = 0.5, we
300	// see Isolate winning for 10 cases or fewer, by maybe 0.2 ComparisonCost. Again we see
301	// divergence for large switches with Ignore winning, for example if a switch statement has
302	// 100 cases then Ignore saves one branch on average.
303	//
304	// Our current JIT backends don't provide for optimization for chained comparisons, except for
305	// reducing the code for materializing the immediate if the immediates are large or blinding
306	// comes into play. Probably our JIT backends live somewhere north of
307	// ChainedComparisonCost = 0.5.
308	//
309	// This implies that using the Ignore strategy is likely better. If we wanted to incorporate
310	// the Isolate strategy, we'd want to determine the switch size threshold at which the two
311	// cross over and then use Isolate for switches that are smaller than that size.
312	//
313	// The average cost of hitting the default case is similar, but involves a different cost for
314	// the base cases: you have to assume that you will always fail each branch. For the Ignore
315	// strategy we would get this recurrence relation; the same kind of thing happens to the
316	// Isolate strategy:
317	//
318	// Ignore[1] = ComparisonCost
319	// Ignore[2] = (2 + 2) ComparisonCost*
320	// Ignore[3] = (3 + 3 + 3) ComparisonCost*
321	// Ignore[n_] := With[
322	// {medianIndex = If[EvenQ[n], n/2, Floor[n/2] + RandomInteger[]]},
323	// (medianIndex ComparisonCost + Ignore[medianIndex]) +*
324	// ((n - medianIndex) ComparisonCost + Ignore[n - medianIndex])]*
325	//
326	// This means that if we cared about the default case more, we would likely reduce
327	// leafThreshold. Reducing it to 2 would reduce the average cost of the default case by 1/3
328	// in the most extreme cases (num switch cases = 3, 6, 12, 24, ...). But it would also
329	// increase the average cost of taking one of the non-default cases by 1/3. Typically the
330	// difference is 1/6 in either direction. This makes it a very simple trade-off: if we believe
331	// that the default case is more important then we would want leafThreshold to be 2, and the
332	// default case would become 1/6 faster on average. But we believe that most switch statements
333	// are more likely to take one of the cases than the default, so we use leafThreshold = 3
334	// and get a 1/6 speed-up on average for taking an explicit case.
335
336	unsigned medianIndex = (start + end) / `2`;
337
338	if (BinarySwitchInternal::verbose)
339	dataLog("medianIndex = ", medianIndex, "\n");
340
341	// We want medianIndex to point to the thing we will do a less-than compare against. We want
342	// this less-than compare to split the current sublist into equal-sized sublists, or
343	// nearly-equal-sized with some randomness if we're in the odd case. With the above
344	// calculation, in the odd case we will have medianIndex pointing at either the element we
345	// want or the element to the left of the one we want. Consider the case of five elements:
346	//
347	// 0 1 2 3 4
348	//
349	// start will be 0, end will be 5. The average is 2.5, which rounds down to 2. If we do
350	// value < 2, then we will split the list into 2 elements on the left and three on the right.
351	// That's pretty good, but in this odd case we'd like to at random choose 3 instead to ensure
352	// that we don't become unbalanced on the right. This does not improve throughput since one
353	// side will always get shafted, and that side might still be odd, in which case it will also
354	// have two sides and one of them will get shafted - and so on. We just want to avoid
355	// deterministic pathologies.
356	//
357	// In the even case, we will always end up pointing at the element we want:
358	//
359	// 0 1 2 3
360	//
361	// start will be 0, end will be 4. So, the average is 2, which is what we'd like.
362	if (size & `1`) {
363	RELEASE_ASSERT(medianIndex - start + `1` == end - medianIndex);
364	medianIndex += m_weakRandom.getUint32() & `1`;
365	} else
366	RELEASE_ASSERT(medianIndex - start == end - medianIndex);
367
368	RELEASE_ASSERT(medianIndex > start);
369	RELEASE_ASSERT(medianIndex + `1` < end);
370
371	if (BinarySwitchInternal::verbose)
372	dataLog("fixed medianIndex = ", medianIndex, "\n");
373
374	append (BranchCode (LessThanToPush, medianIndex));
375	build(medianIndex, true, end);
376	append (BranchCode (Pop));
377	build(start, hardStart, medianIndex);
378	}
379
380	void BinarySwitch::Case::dump(PrintStream& out) const
381	{
382	out.print("<value: " , value, ", index: ", index, ">");
383	}
384
385	void BinarySwitch::BranchCode::dump(PrintStream& out) const
386	{
387	switch (kind) {
388	case NotEqualToFallThrough:
389	out.print("NotEqualToFallThrough");
390	break;
391	case NotEqualToPush:
392	out.print("NotEqualToPush");
393	break;
394	case LessThanToPush:
395	out.print("LessThanToPush");
396	break;
397	case Pop:
398	out.print("Pop");
399	break;
400	case ExecuteCase:
401	out.print("ExecuteCase");
402	break;
403	}
404
405	if (index != UINT_MAX)
406	out.print("(", index, ")");
407	}
408
409	} // namespace JSC
410
411	#endif // ENABLE(JIT)
412
413

Browse the source code of jsc/Source/JavaScriptCore/jit/BinarySwitch.cpp