1/*
2 * Copyright (C) 2009-2018 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "config.h"
27#include "YarrJIT.h"
28
29#include <wtf/ASCIICType.h>
30#include "LinkBuffer.h"
31#include "Options.h"
32#include "VM.h"
33#include "Yarr.h"
34#include "YarrCanonicalize.h"
35#include "YarrDisassembler.h"
36
37#if ENABLE(YARR_JIT)
38
39namespace JSC { namespace Yarr {
40
41template<YarrJITCompileMode compileMode>
42class YarrGenerator : public YarrJITInfo, private MacroAssembler {
43
44#if CPU(ARM_THUMB2)
45 static const RegisterID input = ARMRegisters::r0;
46 static const RegisterID index = ARMRegisters::r1;
47 static const RegisterID length = ARMRegisters::r2;
48 static const RegisterID output = ARMRegisters::r3;
49
50 static const RegisterID regT0 = ARMRegisters::r4;
51 static const RegisterID regT1 = ARMRegisters::r5;
52 static const RegisterID initialStart = ARMRegisters::r8;
53
54 static const RegisterID returnRegister = ARMRegisters::r0;
55 static const RegisterID returnRegister2 = ARMRegisters::r1;
56
57#elif CPU(ARM64)
58 // Argument registers
59 static const RegisterID input = ARM64Registers::x0;
60 static const RegisterID index = ARM64Registers::x1;
61 static const RegisterID length = ARM64Registers::x2;
62 static const RegisterID output = ARM64Registers::x3;
63 static const RegisterID freelistRegister = ARM64Registers::x4;
64 static const RegisterID freelistSizeRegister = ARM64Registers::x5; // Only used during initialization.
65
66 // Scratch registers
67 static const RegisterID regT0 = ARM64Registers::x6;
68 static const RegisterID regT1 = ARM64Registers::x7;
69 static const RegisterID regT2 = ARM64Registers::x8;
70 static const RegisterID remainingMatchCount = ARM64Registers::x9;
71 static const RegisterID regUnicodeInputAndTrail = ARM64Registers::x10;
72 static const RegisterID unicodeTemp = ARM64Registers::x5;
73 static const RegisterID initialStart = ARM64Registers::x11;
74 static const RegisterID supplementaryPlanesBase = ARM64Registers::x12;
75 static const RegisterID leadingSurrogateTag = ARM64Registers::x13;
76 static const RegisterID trailingSurrogateTag = ARM64Registers::x14;
77 static const RegisterID endOfStringAddress = ARM64Registers::x15;
78
79 static const RegisterID returnRegister = ARM64Registers::x0;
80 static const RegisterID returnRegister2 = ARM64Registers::x1;
81
82 const TrustedImm32 surrogateTagMask = TrustedImm32(0xfffffc00);
83#define JIT_UNICODE_EXPRESSIONS
84#elif CPU(MIPS)
85 static const RegisterID input = MIPSRegisters::a0;
86 static const RegisterID index = MIPSRegisters::a1;
87 static const RegisterID length = MIPSRegisters::a2;
88 static const RegisterID output = MIPSRegisters::a3;
89
90 static const RegisterID regT0 = MIPSRegisters::t4;
91 static const RegisterID regT1 = MIPSRegisters::t5;
92 static const RegisterID initialStart = MIPSRegisters::t6;
93
94 static const RegisterID returnRegister = MIPSRegisters::v0;
95 static const RegisterID returnRegister2 = MIPSRegisters::v1;
96
97#elif CPU(X86_64)
98#if !OS(WINDOWS)
99 // Argument registers
100 static const RegisterID input = X86Registers::edi;
101 static const RegisterID index = X86Registers::esi;
102 static const RegisterID length = X86Registers::edx;
103 static const RegisterID output = X86Registers::ecx;
104 static const RegisterID freelistRegister = X86Registers::r8;
105 static const RegisterID freelistSizeRegister = X86Registers::r9; // Only used during initialization.
106#else
107 // If the return value doesn't fit in 64bits, its destination is pointed by rcx and the parameters are shifted.
108 // http://msdn.microsoft.com/en-us/library/7572ztz4.aspx
109 COMPILE_ASSERT(sizeof(MatchResult) > sizeof(void*), MatchResult_does_not_fit_in_64bits);
110 static const RegisterID input = X86Registers::edx;
111 static const RegisterID index = X86Registers::r8;
112 static const RegisterID length = X86Registers::r9;
113 static const RegisterID output = X86Registers::r10;
114#endif
115
116 // Scratch registers
117 static const RegisterID regT0 = X86Registers::eax;
118#if !OS(WINDOWS)
119 static const RegisterID regT1 = X86Registers::r9;
120 static const RegisterID regT2 = X86Registers::r10;
121#else
122 static const RegisterID regT1 = X86Registers::ecx;
123 static const RegisterID regT2 = X86Registers::edi;
124#endif
125
126 static const RegisterID initialStart = X86Registers::ebx;
127#if !OS(WINDOWS)
128 static const RegisterID remainingMatchCount = X86Registers::r12;
129#else
130 static const RegisterID remainingMatchCount = X86Registers::esi;
131#endif
132 static const RegisterID regUnicodeInputAndTrail = X86Registers::r13;
133 static const RegisterID unicodeTemp = X86Registers::r14;
134 static const RegisterID endOfStringAddress = X86Registers::r15;
135
136 static const RegisterID returnRegister = X86Registers::eax;
137 static const RegisterID returnRegister2 = X86Registers::edx;
138
139 const TrustedImm32 supplementaryPlanesBase = TrustedImm32(0x10000);
140 const TrustedImm32 leadingSurrogateTag = TrustedImm32(0xd800);
141 const TrustedImm32 trailingSurrogateTag = TrustedImm32(0xdc00);
142 const TrustedImm32 surrogateTagMask = TrustedImm32(0xfffffc00);
143#define JIT_UNICODE_EXPRESSIONS
144#endif
145
146#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
147 struct ParenContextSizes {
148 size_t m_numSubpatterns;
149 size_t m_frameSlots;
150
151 ParenContextSizes(size_t numSubpatterns, size_t frameSlots)
152 : m_numSubpatterns(numSubpatterns)
153 , m_frameSlots(frameSlots)
154 {
155 }
156
157 size_t numSubpatterns() { return m_numSubpatterns; }
158
159 size_t frameSlots() { return m_frameSlots; }
160 };
161
162 struct ParenContext {
163 struct ParenContext* next;
164 uint32_t begin;
165 uint32_t matchAmount;
166 uintptr_t returnAddress;
167 struct Subpatterns {
168 unsigned start;
169 unsigned end;
170 } subpatterns[0];
171 uintptr_t frameSlots[0];
172
173 static size_t sizeFor(ParenContextSizes& parenContextSizes)
174 {
175 return sizeof(ParenContext) + sizeof(Subpatterns) * parenContextSizes.numSubpatterns() + sizeof(uintptr_t) * parenContextSizes.frameSlots();
176 }
177
178 static ptrdiff_t nextOffset()
179 {
180 return offsetof(ParenContext, next);
181 }
182
183 static ptrdiff_t beginOffset()
184 {
185 return offsetof(ParenContext, begin);
186 }
187
188 static ptrdiff_t matchAmountOffset()
189 {
190 return offsetof(ParenContext, matchAmount);
191 }
192
193 static ptrdiff_t returnAddressOffset()
194 {
195 return offsetof(ParenContext, returnAddress);
196 }
197
198 static ptrdiff_t subpatternOffset(size_t subpattern)
199 {
200 return offsetof(ParenContext, subpatterns) + (subpattern - 1) * sizeof(Subpatterns);
201 }
202
203 static ptrdiff_t savedFrameOffset(ParenContextSizes& parenContextSizes)
204 {
205 return offsetof(ParenContext, subpatterns) + (parenContextSizes.numSubpatterns()) * sizeof(Subpatterns);
206 }
207 };
208
209 void initParenContextFreeList()
210 {
211 RegisterID parenContextPointer = regT0;
212 RegisterID nextParenContextPointer = regT2;
213
214 size_t parenContextSize = ParenContext::sizeFor(m_parenContextSizes);
215
216 parenContextSize = WTF::roundUpToMultipleOf<sizeof(uintptr_t)>(parenContextSize);
217
218 if (parenContextSize > VM::patternContextBufferSize) {
219 m_failureReason = JITFailureReason::ParenthesisNestedTooDeep;
220 return;
221 }
222
223 Jump emptyFreeList = branchTestPtr(Zero, freelistRegister);
224 move(freelistRegister, parenContextPointer);
225 addPtr(TrustedImm32(parenContextSize), freelistRegister, nextParenContextPointer);
226 addPtr(freelistRegister, freelistSizeRegister);
227 subPtr(TrustedImm32(parenContextSize), freelistSizeRegister);
228
229 Label loopTop(this);
230 Jump initDone = branchPtr(Above, nextParenContextPointer, freelistSizeRegister);
231 storePtr(nextParenContextPointer, Address(parenContextPointer, ParenContext::nextOffset()));
232 move(nextParenContextPointer, parenContextPointer);
233 addPtr(TrustedImm32(parenContextSize), parenContextPointer, nextParenContextPointer);
234 jump(loopTop);
235
236 initDone.link(this);
237 storePtr(TrustedImmPtr(nullptr), Address(parenContextPointer, ParenContext::nextOffset()));
238 emptyFreeList.link(this);
239 }
240
241 void allocateParenContext(RegisterID result)
242 {
243 m_abortExecution.append(branchTestPtr(Zero, freelistRegister));
244 sub32(TrustedImm32(1), remainingMatchCount);
245 m_hitMatchLimit.append(branchTestPtr(Zero, remainingMatchCount));
246 move(freelistRegister, result);
247 loadPtr(Address(freelistRegister, ParenContext::nextOffset()), freelistRegister);
248 }
249
250 void freeParenContext(RegisterID headPtrRegister, RegisterID newHeadPtrRegister)
251 {
252 loadPtr(Address(headPtrRegister, ParenContext::nextOffset()), newHeadPtrRegister);
253 storePtr(freelistRegister, Address(headPtrRegister, ParenContext::nextOffset()));
254 move(headPtrRegister, freelistRegister);
255 }
256
257 void saveParenContext(RegisterID parenContextReg, RegisterID tempReg, unsigned firstSubpattern, unsigned lastSubpattern, unsigned subpatternBaseFrameLocation)
258 {
259 store32(index, Address(parenContextReg, ParenContext::beginOffset()));
260 loadFromFrame(subpatternBaseFrameLocation + BackTrackInfoParentheses::matchAmountIndex(), tempReg);
261 store32(tempReg, Address(parenContextReg, ParenContext::matchAmountOffset()));
262 loadFromFrame(subpatternBaseFrameLocation + BackTrackInfoParentheses::returnAddressIndex(), tempReg);
263 storePtr(tempReg, Address(parenContextReg, ParenContext::returnAddressOffset()));
264 if (compileMode == IncludeSubpatterns) {
265 for (unsigned subpattern = firstSubpattern; subpattern <= lastSubpattern; subpattern++) {
266 loadPtr(Address(output, (subpattern << 1) * sizeof(unsigned)), tempReg);
267 storePtr(tempReg, Address(parenContextReg, ParenContext::subpatternOffset(subpattern)));
268 clearSubpatternStart(subpattern);
269 }
270 }
271 subpatternBaseFrameLocation += YarrStackSpaceForBackTrackInfoParentheses;
272 for (unsigned frameLocation = subpatternBaseFrameLocation; frameLocation < m_parenContextSizes.frameSlots(); frameLocation++) {
273 loadFromFrame(frameLocation, tempReg);
274 storePtr(tempReg, Address(parenContextReg, ParenContext::savedFrameOffset(m_parenContextSizes) + frameLocation * sizeof(uintptr_t)));
275 }
276 }
277
278 void restoreParenContext(RegisterID parenContextReg, RegisterID tempReg, unsigned firstSubpattern, unsigned lastSubpattern, unsigned subpatternBaseFrameLocation)
279 {
280 load32(Address(parenContextReg, ParenContext::beginOffset()), index);
281 storeToFrame(index, subpatternBaseFrameLocation + BackTrackInfoParentheses::beginIndex());
282 load32(Address(parenContextReg, ParenContext::matchAmountOffset()), tempReg);
283 storeToFrame(tempReg, subpatternBaseFrameLocation + BackTrackInfoParentheses::matchAmountIndex());
284 loadPtr(Address(parenContextReg, ParenContext::returnAddressOffset()), tempReg);
285 storeToFrame(tempReg, subpatternBaseFrameLocation + BackTrackInfoParentheses::returnAddressIndex());
286 if (compileMode == IncludeSubpatterns) {
287 for (unsigned subpattern = firstSubpattern; subpattern <= lastSubpattern; subpattern++) {
288 loadPtr(Address(parenContextReg, ParenContext::subpatternOffset(subpattern)), tempReg);
289 storePtr(tempReg, Address(output, (subpattern << 1) * sizeof(unsigned)));
290 }
291 }
292 subpatternBaseFrameLocation += YarrStackSpaceForBackTrackInfoParentheses;
293 for (unsigned frameLocation = subpatternBaseFrameLocation; frameLocation < m_parenContextSizes.frameSlots(); frameLocation++) {
294 loadPtr(Address(parenContextReg, ParenContext::savedFrameOffset(m_parenContextSizes) + frameLocation * sizeof(uintptr_t)), tempReg);
295 storeToFrame(tempReg, frameLocation);
296 }
297 }
298#endif
299
300 void optimizeAlternative(PatternAlternative* alternative)
301 {
302 if (!alternative->m_terms.size())
303 return;
304
305 for (unsigned i = 0; i < alternative->m_terms.size() - 1; ++i) {
306 PatternTerm& term = alternative->m_terms[i];
307 PatternTerm& nextTerm = alternative->m_terms[i + 1];
308
309 // We can move BMP only character classes after fixed character terms.
310 if ((term.type == PatternTerm::TypeCharacterClass)
311 && (term.quantityType == QuantifierFixedCount)
312 && (!m_decodeSurrogatePairs || (term.characterClass->hasOneCharacterSize() && !term.m_invert))
313 && (nextTerm.type == PatternTerm::TypePatternCharacter)
314 && (nextTerm.quantityType == QuantifierFixedCount)) {
315 PatternTerm termCopy = term;
316 alternative->m_terms[i] = nextTerm;
317 alternative->m_terms[i + 1] = termCopy;
318 }
319 }
320 }
321
322 void matchCharacterClassRange(RegisterID character, JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar32* matches, unsigned matchCount)
323 {
324 do {
325 // pick which range we're going to generate
326 int which = count >> 1;
327 char lo = ranges[which].begin;
328 char hi = ranges[which].end;
329
330 // check if there are any ranges or matches below lo. If not, just jl to failure -
331 // if there is anything else to check, check that first, if it falls through jmp to failure.
332 if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
333 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
334
335 // generate code for all ranges before this one
336 if (which)
337 matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
338
339 while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
340 matchDest.append(branch32(Equal, character, Imm32((unsigned short)matches[*matchIndex])));
341 ++*matchIndex;
342 }
343 failures.append(jump());
344
345 loOrAbove.link(this);
346 } else if (which) {
347 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
348
349 matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
350 failures.append(jump());
351
352 loOrAbove.link(this);
353 } else
354 failures.append(branch32(LessThan, character, Imm32((unsigned short)lo)));
355
356 while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi))
357 ++*matchIndex;
358
359 matchDest.append(branch32(LessThanOrEqual, character, Imm32((unsigned short)hi)));
360 // fall through to here, the value is above hi.
361
362 // shuffle along & loop around if there are any more matches to handle.
363 unsigned next = which + 1;
364 ranges += next;
365 count -= next;
366 } while (count);
367 }
368
369 void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass)
370 {
371 if (charClass->m_table && !m_decodeSurrogatePairs) {
372 ExtendedAddress tableEntry(character, reinterpret_cast<intptr_t>(charClass->m_table));
373 matchDest.append(branchTest8(charClass->m_tableInverted ? Zero : NonZero, tableEntry));
374 return;
375 }
376
377 JumpList unicodeFail;
378 if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) {
379 JumpList isAscii;
380 if (charClass->m_matches.size() || charClass->m_ranges.size())
381 isAscii.append(branch32(LessThanOrEqual, character, TrustedImm32(0x7f)));
382
383 if (charClass->m_matchesUnicode.size()) {
384 for (unsigned i = 0; i < charClass->m_matchesUnicode.size(); ++i) {
385 UChar32 ch = charClass->m_matchesUnicode[i];
386 matchDest.append(branch32(Equal, character, Imm32(ch)));
387 }
388 }
389
390 if (charClass->m_rangesUnicode.size()) {
391 for (unsigned i = 0; i < charClass->m_rangesUnicode.size(); ++i) {
392 UChar32 lo = charClass->m_rangesUnicode[i].begin;
393 UChar32 hi = charClass->m_rangesUnicode[i].end;
394
395 Jump below = branch32(LessThan, character, Imm32(lo));
396 matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi)));
397 below.link(this);
398 }
399 }
400
401 if (charClass->m_matches.size() || charClass->m_ranges.size())
402 unicodeFail = jump();
403 isAscii.link(this);
404 }
405
406 if (charClass->m_ranges.size()) {
407 unsigned matchIndex = 0;
408 JumpList failures;
409 matchCharacterClassRange(character, failures, matchDest, charClass->m_ranges.begin(), charClass->m_ranges.size(), &matchIndex, charClass->m_matches.begin(), charClass->m_matches.size());
410 while (matchIndex < charClass->m_matches.size())
411 matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass->m_matches[matchIndex++])));
412
413 failures.link(this);
414 } else if (charClass->m_matches.size()) {
415 // optimization: gather 'a','A' etc back together, can mask & test once.
416 Vector<char> matchesAZaz;
417
418 for (unsigned i = 0; i < charClass->m_matches.size(); ++i) {
419 char ch = charClass->m_matches[i];
420 if (m_pattern.ignoreCase()) {
421 if (isASCIILower(ch)) {
422 matchesAZaz.append(ch);
423 continue;
424 }
425 if (isASCIIUpper(ch))
426 continue;
427 }
428 matchDest.append(branch32(Equal, character, Imm32((unsigned short)ch)));
429 }
430
431 if (unsigned countAZaz = matchesAZaz.size()) {
432 or32(TrustedImm32(32), character);
433 for (unsigned i = 0; i < countAZaz; ++i)
434 matchDest.append(branch32(Equal, character, TrustedImm32(matchesAZaz[i])));
435 }
436 }
437
438 if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size())
439 unicodeFail.link(this);
440 }
441
442#ifdef JIT_UNICODE_EXPRESSIONS
443 void advanceIndexAfterCharacterClassTermMatch(const PatternTerm* term, JumpList& failuresAfterIncrementingIndex, const RegisterID character)
444 {
445 ASSERT(term->type == PatternTerm::TypeCharacterClass);
446
447 if (term->isFixedWidthCharacterClass())
448 add32(TrustedImm32(term->characterClass->hasNonBMPCharacters() ? 2 : 1), index);
449 else {
450 add32(TrustedImm32(1), index);
451 Jump isBMPChar = branch32(LessThan, character, supplementaryPlanesBase);
452 failuresAfterIncrementingIndex.append(atEndOfInput());
453 add32(TrustedImm32(1), index);
454 isBMPChar.link(this);
455 }
456 }
457#endif
458
459 // Jumps if input not available; will have (incorrectly) incremented already!
460 Jump jumpIfNoAvailableInput(unsigned countToCheck = 0)
461 {
462 if (countToCheck)
463 add32(Imm32(countToCheck), index);
464 return branch32(Above, index, length);
465 }
466
467 Jump jumpIfAvailableInput(unsigned countToCheck)
468 {
469 add32(Imm32(countToCheck), index);
470 return branch32(BelowOrEqual, index, length);
471 }
472
473 Jump checkNotEnoughInput(RegisterID additionalAmount)
474 {
475 add32(index, additionalAmount);
476 return branch32(Above, additionalAmount, length);
477 }
478
479 Jump checkInput()
480 {
481 return branch32(BelowOrEqual, index, length);
482 }
483
484 Jump atEndOfInput()
485 {
486 return branch32(Equal, index, length);
487 }
488
489 Jump notAtEndOfInput()
490 {
491 return branch32(NotEqual, index, length);
492 }
493
494 BaseIndex negativeOffsetIndexedAddress(Checked<unsigned> negativeCharacterOffset, RegisterID tempReg, RegisterID indexReg = index)
495 {
496 RegisterID base = input;
497
498 // BaseIndex() addressing can take a int32_t offset. Given that we can have a regular
499 // expression that has unsigned character offsets, BaseIndex's signed offset is insufficient
500 // for addressing in extreme cases where we might underflow. Therefore we check to see if
501 // negativeCharacterOffset will underflow directly or after converting for 16 bit characters.
502 // If so, we do our own address calculating by adjusting the base, using the result register
503 // as a temp address register.
504 unsigned maximumNegativeOffsetForCharacterSize = m_charSize == Char8 ? 0x7fffffff : 0x3fffffff;
505 unsigned offsetAdjustAmount = 0x40000000;
506 if (negativeCharacterOffset.unsafeGet() > maximumNegativeOffsetForCharacterSize) {
507 base = tempReg;
508 move(input, base);
509 while (negativeCharacterOffset.unsafeGet() > maximumNegativeOffsetForCharacterSize) {
510 subPtr(TrustedImm32(offsetAdjustAmount), base);
511 if (m_charSize != Char8)
512 subPtr(TrustedImm32(offsetAdjustAmount), base);
513 negativeCharacterOffset -= offsetAdjustAmount;
514 }
515 }
516
517 Checked<int32_t> characterOffset(-static_cast<int32_t>(negativeCharacterOffset.unsafeGet()));
518
519 if (m_charSize == Char8)
520 return BaseIndex(input, indexReg, TimesOne, (characterOffset * static_cast<int32_t>(sizeof(char))).unsafeGet());
521
522 return BaseIndex(input, indexReg, TimesTwo, (characterOffset * static_cast<int32_t>(sizeof(UChar))).unsafeGet());
523 }
524
525#ifdef JIT_UNICODE_EXPRESSIONS
526 void tryReadUnicodeCharImpl(RegisterID resultReg)
527 {
528 ASSERT(m_charSize == Char16);
529
530 JumpList notUnicode;
531
532 load16Unaligned(regUnicodeInputAndTrail, resultReg);
533
534 // Is the character a leading surrogate?
535 and32(surrogateTagMask, resultReg, unicodeTemp);
536 notUnicode.append(branch32(NotEqual, unicodeTemp, leadingSurrogateTag));
537
538 // Is the input long enough to read a trailing surrogate?
539 addPtr(TrustedImm32(2), regUnicodeInputAndTrail);
540 notUnicode.append(branchPtr(AboveOrEqual, regUnicodeInputAndTrail, endOfStringAddress));
541
542 // Is the character a trailing surrogate?
543 load16Unaligned(Address(regUnicodeInputAndTrail), regUnicodeInputAndTrail);
544 and32(surrogateTagMask, regUnicodeInputAndTrail, unicodeTemp);
545 notUnicode.append(branch32(NotEqual, unicodeTemp, trailingSurrogateTag));
546
547 // Combine leading and trailing surrogates to produce a code point.
548 lshift32(TrustedImm32(10), resultReg);
549 getEffectiveAddress(BaseIndex(resultReg, regUnicodeInputAndTrail, TimesOne, -U16_SURROGATE_OFFSET), resultReg);
550 notUnicode.link(this);
551 }
552
553 void tryReadUnicodeChar(BaseIndex address, RegisterID resultReg)
554 {
555 ASSERT(m_charSize == Char16);
556
557 getEffectiveAddress(address, regUnicodeInputAndTrail);
558
559 if (resultReg == regT0)
560 m_tryReadUnicodeCharacterCalls.append(nearCall());
561 else
562 tryReadUnicodeCharImpl(resultReg);
563 }
564#endif
565
566 void readCharacter(Checked<unsigned> negativeCharacterOffset, RegisterID resultReg, RegisterID indexReg = index)
567 {
568 BaseIndex address = negativeOffsetIndexedAddress(negativeCharacterOffset, resultReg, indexReg);
569
570 if (m_charSize == Char8)
571 load8(address, resultReg);
572#ifdef JIT_UNICODE_EXPRESSIONS
573 else if (m_decodeSurrogatePairs)
574 tryReadUnicodeChar(address, resultReg);
575#endif
576 else
577 load16Unaligned(address, resultReg);
578 }
579
580 Jump jumpIfCharNotEquals(UChar32 ch, Checked<unsigned> negativeCharacterOffset, RegisterID character)
581 {
582 readCharacter(negativeCharacterOffset, character);
583
584 // For case-insesitive compares, non-ascii characters that have different
585 // upper & lower case representations are converted to a character class.
586 ASSERT(!m_pattern.ignoreCase() || isASCIIAlpha(ch) || isCanonicallyUnique(ch, m_canonicalMode));
587 if (m_pattern.ignoreCase() && isASCIIAlpha(ch)) {
588 or32(TrustedImm32(0x20), character);
589 ch |= 0x20;
590 }
591
592 return branch32(NotEqual, character, Imm32(ch));
593 }
594
595 void storeToFrame(RegisterID reg, unsigned frameLocation)
596 {
597 poke(reg, frameLocation);
598 }
599
600 void storeToFrame(TrustedImm32 imm, unsigned frameLocation)
601 {
602 poke(imm, frameLocation);
603 }
604
605#if CPU(ARM64) || CPU(X86_64)
606 void storeToFrame(TrustedImmPtr imm, unsigned frameLocation)
607 {
608 poke(imm, frameLocation);
609 }
610#endif
611
612 DataLabelPtr storeToFrameWithPatch(unsigned frameLocation)
613 {
614 return storePtrWithPatch(TrustedImmPtr(nullptr), Address(stackPointerRegister, frameLocation * sizeof(void*)));
615 }
616
617 void loadFromFrame(unsigned frameLocation, RegisterID reg)
618 {
619 peek(reg, frameLocation);
620 }
621
622 void loadFromFrameAndJump(unsigned frameLocation)
623 {
624 farJump(Address(stackPointerRegister, frameLocation * sizeof(void*)), YarrBacktrackPtrTag);
625 }
626
627 unsigned alignCallFrameSizeInBytes(unsigned callFrameSize)
628 {
629 if (!callFrameSize)
630 return 0;
631
632 callFrameSize *= sizeof(void*);
633 if (callFrameSize / sizeof(void*) != m_pattern.m_body->m_callFrameSize)
634 CRASH();
635 callFrameSize = (callFrameSize + 0x3f) & ~0x3f;
636 return callFrameSize;
637 }
638 void initCallFrame()
639 {
640 unsigned callFrameSizeInBytes = alignCallFrameSizeInBytes(m_pattern.m_body->m_callFrameSize);
641 if (callFrameSizeInBytes) {
642#if CPU(X86_64) || CPU(ARM64)
643 if (Options::zeroStackFrame()) {
644 // We need to start from the stack pointer, because we could have spilled callee saves
645 move(stackPointerRegister, regT0);
646 subPtr(Imm32(callFrameSizeInBytes), stackPointerRegister);
647 if (callFrameSizeInBytes <= 128) {
648 for (unsigned offset = 0; offset < callFrameSizeInBytes; offset += sizeof(intptr_t))
649 storePtr(TrustedImm32(0), Address(regT0, -8 - offset));
650 } else {
651 Label zeroLoop = label();
652 subPtr(TrustedImm32(sizeof(intptr_t) * 2), regT0);
653#if CPU(ARM64)
654 storePair64(ARM64Registers::zr, ARM64Registers::zr, regT0);
655#else
656 storePtr(TrustedImm32(0), Address(regT0));
657 storePtr(TrustedImm32(0), Address(regT0, sizeof(intptr_t)));
658#endif
659 branchPtr(NotEqual, regT0, stackPointerRegister).linkTo(zeroLoop, this);
660 }
661 } else
662#endif
663 subPtr(Imm32(callFrameSizeInBytes), stackPointerRegister);
664
665 }
666 }
667 void removeCallFrame()
668 {
669 unsigned callFrameSizeInBytes = alignCallFrameSizeInBytes(m_pattern.m_body->m_callFrameSize);
670 if (callFrameSizeInBytes)
671 addPtr(Imm32(callFrameSizeInBytes), stackPointerRegister);
672 }
673
674 void generateFailReturn()
675 {
676 move(TrustedImmPtr((void*)WTF::notFound), returnRegister);
677 move(TrustedImm32(0), returnRegister2);
678 generateReturn();
679 }
680
681 void generateJITFailReturn()
682 {
683 if (m_abortExecution.empty() && m_hitMatchLimit.empty())
684 return;
685
686 JumpList finishExiting;
687 if (!m_abortExecution.empty()) {
688 m_abortExecution.link(this);
689 move(TrustedImmPtr((void*)static_cast<size_t>(-2)), returnRegister);
690 finishExiting.append(jump());
691 }
692
693 if (!m_hitMatchLimit.empty()) {
694 m_hitMatchLimit.link(this);
695 move(TrustedImmPtr((void*)static_cast<size_t>(-1)), returnRegister);
696 }
697
698 finishExiting.link(this);
699 removeCallFrame();
700 move(TrustedImm32(0), returnRegister2);
701 generateReturn();
702 }
703
704 // Used to record subpatterns, should only be called if compileMode is IncludeSubpatterns.
705 void setSubpatternStart(RegisterID reg, unsigned subpattern)
706 {
707 ASSERT(subpattern);
708 // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-(
709 store32(reg, Address(output, (subpattern << 1) * sizeof(int)));
710 }
711 void setSubpatternEnd(RegisterID reg, unsigned subpattern)
712 {
713 ASSERT(subpattern);
714 // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-(
715 store32(reg, Address(output, ((subpattern << 1) + 1) * sizeof(int)));
716 }
717 void clearSubpatternStart(unsigned subpattern)
718 {
719 ASSERT(subpattern);
720 // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-(
721 store32(TrustedImm32(-1), Address(output, (subpattern << 1) * sizeof(int)));
722 }
723
724 void clearMatches(unsigned subpattern, unsigned lastSubpattern)
725 {
726 for (; subpattern <= lastSubpattern; subpattern++)
727 clearSubpatternStart(subpattern);
728 }
729
730 // We use one of three different strategies to track the start of the current match,
731 // while matching.
732 // 1) If the pattern has a fixed size, do nothing! - we calculate the value lazily
733 // at the end of matching. This is irrespective of compileMode, and in this case
734 // these methods should never be called.
735 // 2) If we're compiling IncludeSubpatterns, 'output' contains a pointer to an output
736 // vector, store the match start in the output vector.
737 // 3) If we're compiling MatchOnly, 'output' is unused, store the match start directly
738 // in this register.
739 void setMatchStart(RegisterID reg)
740 {
741 ASSERT(!m_pattern.m_body->m_hasFixedSize);
742 if (compileMode == IncludeSubpatterns)
743 store32(reg, output);
744 else
745 move(reg, output);
746 }
747 void getMatchStart(RegisterID reg)
748 {
749 ASSERT(!m_pattern.m_body->m_hasFixedSize);
750 if (compileMode == IncludeSubpatterns)
751 load32(output, reg);
752 else
753 move(output, reg);
754 }
755
756 enum YarrOpCode : uint8_t {
757 // These nodes wrap body alternatives - those in the main disjunction,
758 // rather than subpatterns or assertions. These are chained together in
759 // a doubly linked list, with a 'begin' node for the first alternative,
760 // a 'next' node for each subsequent alternative, and an 'end' node at
761 // the end. In the case of repeating alternatives, the 'end' node also
762 // has a reference back to 'begin'.
763 OpBodyAlternativeBegin,
764 OpBodyAlternativeNext,
765 OpBodyAlternativeEnd,
766 // Similar to the body alternatives, but used for subpatterns with two
767 // or more alternatives.
768 OpNestedAlternativeBegin,
769 OpNestedAlternativeNext,
770 OpNestedAlternativeEnd,
771 // Used for alternatives in subpatterns where there is only a single
772 // alternative (backtracking is easier in these cases), or for alternatives
773 // which never need to be backtracked (those in parenthetical assertions,
774 // terminal subpatterns).
775 OpSimpleNestedAlternativeBegin,
776 OpSimpleNestedAlternativeNext,
777 OpSimpleNestedAlternativeEnd,
778 // Used to wrap 'Once' subpattern matches (quantityMaxCount == 1).
779 OpParenthesesSubpatternOnceBegin,
780 OpParenthesesSubpatternOnceEnd,
781 // Used to wrap 'Terminal' subpattern matches (at the end of the regexp).
782 OpParenthesesSubpatternTerminalBegin,
783 OpParenthesesSubpatternTerminalEnd,
784 // Used to wrap generic captured matches
785 OpParenthesesSubpatternBegin,
786 OpParenthesesSubpatternEnd,
787 // Used to wrap parenthetical assertions.
788 OpParentheticalAssertionBegin,
789 OpParentheticalAssertionEnd,
790 // Wraps all simple terms (pattern characters, character classes).
791 OpTerm,
792 // Where an expression contains only 'once through' body alternatives
793 // and no repeating ones, this op is used to return match failure.
794 OpMatchFailed
795 };
796
797 // This structure is used to hold the compiled opcode information,
798 // including reference back to the original PatternTerm/PatternAlternatives,
799 // and JIT compilation data structures.
800 struct YarrOp {
801 explicit YarrOp(PatternTerm* term)
802 : m_term(term)
803 , m_op(OpTerm)
804 , m_isDeadCode(false)
805 {
806 }
807
808 explicit YarrOp(YarrOpCode op)
809 : m_op(op)
810 , m_isDeadCode(false)
811 {
812 }
813
814 // For alternatives, this holds the PatternAlternative and doubly linked
815 // references to this alternative's siblings. In the case of the
816 // OpBodyAlternativeEnd node at the end of a section of repeating nodes,
817 // m_nextOp will reference the OpBodyAlternativeBegin node of the first
818 // repeating alternative.
819 PatternAlternative* m_alternative;
820 size_t m_previousOp;
821 size_t m_nextOp;
822
823 // The operation, as a YarrOpCode, and also a reference to the PatternTerm.
824 PatternTerm* m_term;
825 YarrOpCode m_op;
826
827 // Used to record a set of Jumps out of the generated code, typically
828 // used for jumps out to backtracking code, and a single reentry back
829 // into the code for a node (likely where a backtrack will trigger
830 // rematching).
831 Label m_reentry;
832 JumpList m_jumps;
833
834 // Used for backtracking when the prior alternative did not consume any
835 // characters but matched.
836 Jump m_zeroLengthMatch;
837
838 // This flag is used to null out the second pattern character, when
839 // two are fused to match a pair together.
840 bool m_isDeadCode;
841
842 // Currently used in the case of some of the more complex management of
843 // 'm_checkedOffset', to cache the offset used in this alternative, to avoid
844 // recalculating it.
845 Checked<unsigned> m_checkAdjust;
846
847 // Used by OpNestedAlternativeNext/End to hold the pointer to the
848 // value that will be pushed into the pattern's frame to return to,
849 // upon backtracking back into the disjunction.
850 DataLabelPtr m_returnAddress;
851 };
852
853 // BacktrackingState
854 // This class encapsulates information about the state of code generation
855 // whilst generating the code for backtracking, when a term fails to match.
856 // Upon entry to code generation of the backtracking code for a given node,
857 // the Backtracking state will hold references to all control flow sources
858 // that are outputs in need of further backtracking from the prior node
859 // generated (which is the subsequent operation in the regular expression,
860 // and in the m_ops Vector, since we generated backtracking backwards).
861 // These references to control flow take the form of:
862 // - A jump list of jumps, to be linked to code that will backtrack them
863 // further.
864 // - A set of DataLabelPtr values, to be populated with values to be
865 // treated effectively as return addresses backtracking into complex
866 // subpatterns.
867 // - A flag indicating that the current sequence of generated code up to
868 // this point requires backtracking.
869 class BacktrackingState {
870 public:
871 BacktrackingState()
872 : m_pendingFallthrough(false)
873 {
874 }
875
876 // Add a jump or jumps, a return address, or set the flag indicating
877 // that the current 'fallthrough' control flow requires backtracking.
878 void append(const Jump& jump)
879 {
880 m_laterFailures.append(jump);
881 }
882 void append(JumpList& jumpList)
883 {
884 m_laterFailures.append(jumpList);
885 }
886 void append(const DataLabelPtr& returnAddress)
887 {
888 m_pendingReturns.append(returnAddress);
889 }
890 void fallthrough()
891 {
892 ASSERT(!m_pendingFallthrough);
893 m_pendingFallthrough = true;
894 }
895
896 // These methods clear the backtracking state, either linking to the
897 // current location, a provided label, or copying the backtracking out
898 // to a JumpList. All actions may require code generation to take place,
899 // and as such are passed a pointer to the assembler.
900 void link(MacroAssembler* assembler)
901 {
902 if (m_pendingReturns.size()) {
903 Label here(assembler);
904 for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
905 m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here));
906 m_pendingReturns.clear();
907 }
908 m_laterFailures.link(assembler);
909 m_laterFailures.clear();
910 m_pendingFallthrough = false;
911 }
912 void linkTo(Label label, MacroAssembler* assembler)
913 {
914 if (m_pendingReturns.size()) {
915 for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
916 m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], label));
917 m_pendingReturns.clear();
918 }
919 if (m_pendingFallthrough)
920 assembler->jump(label);
921 m_laterFailures.linkTo(label, assembler);
922 m_laterFailures.clear();
923 m_pendingFallthrough = false;
924 }
925 void takeBacktracksToJumpList(JumpList& jumpList, MacroAssembler* assembler)
926 {
927 if (m_pendingReturns.size()) {
928 Label here(assembler);
929 for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
930 m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here));
931 m_pendingReturns.clear();
932 m_pendingFallthrough = true;
933 }
934 if (m_pendingFallthrough)
935 jumpList.append(assembler->jump());
936 jumpList.append(m_laterFailures);
937 m_laterFailures.clear();
938 m_pendingFallthrough = false;
939 }
940
941 bool isEmpty()
942 {
943 return m_laterFailures.empty() && m_pendingReturns.isEmpty() && !m_pendingFallthrough;
944 }
945
946 // Called at the end of code generation to link all return addresses.
947 void linkDataLabels(LinkBuffer& linkBuffer)
948 {
949 ASSERT(isEmpty());
950 for (unsigned i = 0; i < m_backtrackRecords.size(); ++i)
951 linkBuffer.patch(m_backtrackRecords[i].m_dataLabel, linkBuffer.locationOf<YarrBacktrackPtrTag>(m_backtrackRecords[i].m_backtrackLocation));
952 }
953
954 private:
955 struct ReturnAddressRecord {
956 ReturnAddressRecord(DataLabelPtr dataLabel, Label backtrackLocation)
957 : m_dataLabel(dataLabel)
958 , m_backtrackLocation(backtrackLocation)
959 {
960 }
961
962 DataLabelPtr m_dataLabel;
963 Label m_backtrackLocation;
964 };
965
966 JumpList m_laterFailures;
967 bool m_pendingFallthrough;
968 Vector<DataLabelPtr, 4> m_pendingReturns;
969 Vector<ReturnAddressRecord, 4> m_backtrackRecords;
970 };
971
972 // Generation methods:
973 // ===================
974
975 // This method provides a default implementation of backtracking common
976 // to many terms; terms commonly jump out of the forwards matching path
977 // on any failed conditions, and add these jumps to the m_jumps list. If
978 // no special handling is required we can often just backtrack to m_jumps.
979 void backtrackTermDefault(size_t opIndex)
980 {
981 YarrOp& op = m_ops[opIndex];
982 m_backtrackingState.append(op.m_jumps);
983 }
984
985 void generateAssertionBOL(size_t opIndex)
986 {
987 YarrOp& op = m_ops[opIndex];
988 PatternTerm* term = op.m_term;
989
990 if (m_pattern.multiline()) {
991 const RegisterID character = regT0;
992
993 JumpList matchDest;
994 if (!term->inputPosition)
995 matchDest.append(branch32(Equal, index, Imm32(m_checkedOffset.unsafeGet())));
996
997 readCharacter(m_checkedOffset - term->inputPosition + 1, character);
998 matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
999 op.m_jumps.append(jump());
1000
1001 matchDest.link(this);
1002 } else {
1003 // Erk, really should poison out these alternatives early. :-/
1004 if (term->inputPosition)
1005 op.m_jumps.append(jump());
1006 else
1007 op.m_jumps.append(branch32(NotEqual, index, Imm32(m_checkedOffset.unsafeGet())));
1008 }
1009 }
1010 void backtrackAssertionBOL(size_t opIndex)
1011 {
1012 backtrackTermDefault(opIndex);
1013 }
1014
1015 void generateAssertionEOL(size_t opIndex)
1016 {
1017 YarrOp& op = m_ops[opIndex];
1018 PatternTerm* term = op.m_term;
1019
1020 if (m_pattern.multiline()) {
1021 const RegisterID character = regT0;
1022
1023 JumpList matchDest;
1024 if (term->inputPosition == m_checkedOffset.unsafeGet())
1025 matchDest.append(atEndOfInput());
1026
1027 readCharacter(m_checkedOffset - term->inputPosition, character);
1028 matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
1029 op.m_jumps.append(jump());
1030
1031 matchDest.link(this);
1032 } else {
1033 if (term->inputPosition == m_checkedOffset.unsafeGet())
1034 op.m_jumps.append(notAtEndOfInput());
1035 // Erk, really should poison out these alternatives early. :-/
1036 else
1037 op.m_jumps.append(jump());
1038 }
1039 }
1040 void backtrackAssertionEOL(size_t opIndex)
1041 {
1042 backtrackTermDefault(opIndex);
1043 }
1044
1045 // Also falls though on nextIsNotWordChar.
1046 void matchAssertionWordchar(size_t opIndex, JumpList& nextIsWordChar, JumpList& nextIsNotWordChar)
1047 {
1048 YarrOp& op = m_ops[opIndex];
1049 PatternTerm* term = op.m_term;
1050
1051 const RegisterID character = regT0;
1052
1053 if (term->inputPosition == m_checkedOffset.unsafeGet())
1054 nextIsNotWordChar.append(atEndOfInput());
1055
1056 readCharacter(m_checkedOffset - term->inputPosition, character);
1057
1058 CharacterClass* wordcharCharacterClass;
1059
1060 if (m_unicodeIgnoreCase)
1061 wordcharCharacterClass = m_pattern.wordUnicodeIgnoreCaseCharCharacterClass();
1062 else
1063 wordcharCharacterClass = m_pattern.wordcharCharacterClass();
1064
1065 matchCharacterClass(character, nextIsWordChar, wordcharCharacterClass);
1066 }
1067
1068 void generateAssertionWordBoundary(size_t opIndex)
1069 {
1070 YarrOp& op = m_ops[opIndex];
1071 PatternTerm* term = op.m_term;
1072
1073 const RegisterID character = regT0;
1074
1075 Jump atBegin;
1076 JumpList matchDest;
1077 if (!term->inputPosition)
1078 atBegin = branch32(Equal, index, Imm32(m_checkedOffset.unsafeGet()));
1079 readCharacter(m_checkedOffset - term->inputPosition + 1, character);
1080
1081 CharacterClass* wordcharCharacterClass;
1082
1083 if (m_unicodeIgnoreCase)
1084 wordcharCharacterClass = m_pattern.wordUnicodeIgnoreCaseCharCharacterClass();
1085 else
1086 wordcharCharacterClass = m_pattern.wordcharCharacterClass();
1087
1088 matchCharacterClass(character, matchDest, wordcharCharacterClass);
1089 if (!term->inputPosition)
1090 atBegin.link(this);
1091
1092 // We fall through to here if the last character was not a wordchar.
1093 JumpList nonWordCharThenWordChar;
1094 JumpList nonWordCharThenNonWordChar;
1095 if (term->invert()) {
1096 matchAssertionWordchar(opIndex, nonWordCharThenNonWordChar, nonWordCharThenWordChar);
1097 nonWordCharThenWordChar.append(jump());
1098 } else {
1099 matchAssertionWordchar(opIndex, nonWordCharThenWordChar, nonWordCharThenNonWordChar);
1100 nonWordCharThenNonWordChar.append(jump());
1101 }
1102 op.m_jumps.append(nonWordCharThenNonWordChar);
1103
1104 // We jump here if the last character was a wordchar.
1105 matchDest.link(this);
1106 JumpList wordCharThenWordChar;
1107 JumpList wordCharThenNonWordChar;
1108 if (term->invert()) {
1109 matchAssertionWordchar(opIndex, wordCharThenNonWordChar, wordCharThenWordChar);
1110 wordCharThenWordChar.append(jump());
1111 } else {
1112 matchAssertionWordchar(opIndex, wordCharThenWordChar, wordCharThenNonWordChar);
1113 // This can fall-though!
1114 }
1115
1116 op.m_jumps.append(wordCharThenWordChar);
1117
1118 nonWordCharThenWordChar.link(this);
1119 wordCharThenNonWordChar.link(this);
1120 }
1121 void backtrackAssertionWordBoundary(size_t opIndex)
1122 {
1123 backtrackTermDefault(opIndex);
1124 }
1125
1126#if ENABLE(YARR_JIT_BACKREFERENCES)
1127 void matchBackreference(size_t opIndex, JumpList& characterMatchFails, RegisterID character, RegisterID patternIndex, RegisterID patternCharacter)
1128 {
1129 YarrOp& op = m_ops[opIndex];
1130 PatternTerm* term = op.m_term;
1131 unsigned subpatternId = term->backReferenceSubpatternId;
1132
1133 Label loop(this);
1134
1135 readCharacter(0, patternCharacter, patternIndex);
1136 readCharacter(m_checkedOffset - term->inputPosition, character);
1137
1138 if (!m_pattern.ignoreCase())
1139 characterMatchFails.append(branch32(NotEqual, character, patternCharacter));
1140 else {
1141 Jump charactersMatch = branch32(Equal, character, patternCharacter);
1142 ExtendedAddress characterTableEntry(character, reinterpret_cast<intptr_t>(&canonicalTableLChar));
1143 load16(characterTableEntry, character);
1144 ExtendedAddress patternTableEntry(patternCharacter, reinterpret_cast<intptr_t>(&canonicalTableLChar));
1145 load16(patternTableEntry, patternCharacter);
1146 characterMatchFails.append(branch32(NotEqual, character, patternCharacter));
1147 charactersMatch.link(this);
1148 }
1149
1150 add32(TrustedImm32(1), index);
1151 add32(TrustedImm32(1), patternIndex);
1152
1153 if (m_decodeSurrogatePairs) {
1154 Jump isBMPChar = branch32(LessThan, character, supplementaryPlanesBase);
1155 add32(TrustedImm32(1), index);
1156 add32(TrustedImm32(1), patternIndex);
1157 isBMPChar.link(this);
1158 }
1159
1160 branch32(NotEqual, patternIndex, Address(output, ((subpatternId << 1) + 1) * sizeof(int))).linkTo(loop, this);
1161 }
1162
1163 void generateBackReference(size_t opIndex)
1164 {
1165 YarrOp& op = m_ops[opIndex];
1166 PatternTerm* term = op.m_term;
1167
1168 if (m_pattern.ignoreCase() && m_charSize != Char8) {
1169 m_failureReason = JITFailureReason::BackReference;
1170 return;
1171 }
1172
1173 unsigned subpatternId = term->backReferenceSubpatternId;
1174 unsigned parenthesesFrameLocation = term->frameLocation;
1175
1176 const RegisterID characterOrTemp = regT0;
1177 const RegisterID patternIndex = regT1;
1178 const RegisterID patternTemp = regT2;
1179
1180 storeToFrame(index, parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex());
1181 if (term->quantityType != QuantifierFixedCount || term->quantityMaxCount != 1)
1182 storeToFrame(TrustedImm32(0), parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1183
1184 JumpList matches;
1185
1186 if (term->quantityType != QuantifierNonGreedy) {
1187 load32(Address(output, (subpatternId << 1) * sizeof(int)), patternIndex);
1188 load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternTemp);
1189
1190 // An empty match is successful without consuming characters
1191 if (term->quantityType != QuantifierFixedCount || term->quantityMaxCount != 1) {
1192 matches.append(branch32(Equal, TrustedImm32(-1), patternIndex));
1193 matches.append(branch32(Equal, patternIndex, patternTemp));
1194 } else {
1195 Jump zeroLengthMatch = branch32(Equal, TrustedImm32(-1), patternIndex);
1196 Jump tryNonZeroMatch = branch32(NotEqual, patternIndex, patternTemp);
1197 zeroLengthMatch.link(this);
1198 storeToFrame(TrustedImm32(1), parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1199 matches.append(jump());
1200 tryNonZeroMatch.link(this);
1201 }
1202 }
1203
1204 switch (term->quantityType) {
1205 case QuantifierFixedCount: {
1206 Label outerLoop(this);
1207
1208 // PatternTemp should contain pattern end index at this point
1209 sub32(patternIndex, patternTemp);
1210 op.m_jumps.append(checkNotEnoughInput(patternTemp));
1211
1212 matchBackreference(opIndex, op.m_jumps, characterOrTemp, patternIndex, patternTemp);
1213
1214 if (term->quantityMaxCount != 1) {
1215 loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex(), characterOrTemp);
1216 add32(TrustedImm32(1), characterOrTemp);
1217 storeToFrame(characterOrTemp, parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1218 matches.append(branch32(Equal, Imm32(term->quantityMaxCount.unsafeGet()), characterOrTemp));
1219 load32(Address(output, (subpatternId << 1) * sizeof(int)), patternIndex);
1220 load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternTemp);
1221 jump(outerLoop);
1222 }
1223 matches.link(this);
1224 break;
1225 }
1226
1227 case QuantifierGreedy: {
1228 JumpList incompleteMatches;
1229
1230 Label outerLoop(this);
1231
1232 // PatternTemp should contain pattern end index at this point
1233 sub32(patternIndex, patternTemp);
1234 matches.append(checkNotEnoughInput(patternTemp));
1235
1236 matchBackreference(opIndex, incompleteMatches, characterOrTemp, patternIndex, patternTemp);
1237
1238 loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex(), characterOrTemp);
1239 add32(TrustedImm32(1), characterOrTemp);
1240 storeToFrame(characterOrTemp, parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1241 if (term->quantityMaxCount != quantifyInfinite)
1242 matches.append(branch32(Equal, Imm32(term->quantityMaxCount.unsafeGet()), characterOrTemp));
1243 load32(Address(output, (subpatternId << 1) * sizeof(int)), patternIndex);
1244 load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternTemp);
1245
1246 // Store current index in frame for restoring after a partial match
1247 storeToFrame(index, parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex());
1248 jump(outerLoop);
1249
1250 incompleteMatches.link(this);
1251 loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex(), index);
1252
1253 matches.link(this);
1254 op.m_reentry = label();
1255 break;
1256 }
1257
1258 case QuantifierNonGreedy: {
1259 JumpList incompleteMatches;
1260
1261 matches.append(jump());
1262
1263 op.m_reentry = label();
1264
1265 load32(Address(output, (subpatternId << 1) * sizeof(int)), patternIndex);
1266 load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternTemp);
1267
1268 // An empty match is successful without consuming characters
1269 Jump zeroLengthMatch = branch32(Equal, TrustedImm32(-1), patternIndex);
1270 Jump tryNonZeroMatch = branch32(NotEqual, patternIndex, patternTemp);
1271 zeroLengthMatch.link(this);
1272 storeToFrame(TrustedImm32(1), parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1273 matches.append(jump());
1274 tryNonZeroMatch.link(this);
1275
1276 // Check if we have input remaining to match
1277 sub32(patternIndex, patternTemp);
1278 matches.append(checkNotEnoughInput(patternTemp));
1279
1280 storeToFrame(index, parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex());
1281
1282 matchBackreference(opIndex, incompleteMatches, characterOrTemp, patternIndex, patternTemp);
1283
1284 matches.append(jump());
1285
1286 incompleteMatches.link(this);
1287 loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex(), index);
1288
1289 matches.link(this);
1290 break;
1291 }
1292 }
1293 }
1294 void backtrackBackReference(size_t opIndex)
1295 {
1296 YarrOp& op = m_ops[opIndex];
1297 PatternTerm* term = op.m_term;
1298
1299 unsigned subpatternId = term->backReferenceSubpatternId;
1300
1301 m_backtrackingState.link(this);
1302 op.m_jumps.link(this);
1303
1304 JumpList failures;
1305
1306 unsigned parenthesesFrameLocation = term->frameLocation;
1307 switch (term->quantityType) {
1308 case QuantifierFixedCount:
1309 loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex(), index);
1310 break;
1311
1312 case QuantifierGreedy: {
1313 const RegisterID matchAmount = regT0;
1314 const RegisterID patternStartIndex = regT1;
1315 const RegisterID patternEndIndexOrLen = regT2;
1316
1317 loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex(), matchAmount);
1318 failures.append(branchTest32(Zero, matchAmount));
1319
1320 load32(Address(output, (subpatternId << 1) * sizeof(int)), patternStartIndex);
1321 load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternEndIndexOrLen);
1322 sub32(patternStartIndex, patternEndIndexOrLen);
1323 sub32(patternEndIndexOrLen, index);
1324
1325 sub32(TrustedImm32(1), matchAmount);
1326 storeToFrame(matchAmount, parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1327 jump(op.m_reentry);
1328 break;
1329 }
1330
1331 case QuantifierNonGreedy: {
1332 const RegisterID matchAmount = regT0;
1333
1334 failures.append(atEndOfInput());
1335 loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex(), matchAmount);
1336 if (term->quantityMaxCount != quantifyInfinite)
1337 failures.append(branch32(AboveOrEqual, Imm32(term->quantityMaxCount.unsafeGet()), matchAmount));
1338 add32(TrustedImm32(1), matchAmount);
1339 storeToFrame(matchAmount, parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1340 jump(op.m_reentry);
1341 break;
1342 }
1343 }
1344 failures.link(this);
1345 m_backtrackingState.fallthrough();
1346 }
1347#endif
1348
1349 void generatePatternCharacterOnce(size_t opIndex)
1350 {
1351 YarrOp& op = m_ops[opIndex];
1352
1353 if (op.m_isDeadCode)
1354 return;
1355
1356 // m_ops always ends with a OpBodyAlternativeEnd or OpMatchFailed
1357 // node, so there must always be at least one more node.
1358 ASSERT(opIndex + 1 < m_ops.size());
1359 YarrOp* nextOp = &m_ops[opIndex + 1];
1360
1361 PatternTerm* term = op.m_term;
1362 UChar32 ch = term->patternCharacter;
1363
1364 if (!isLatin1(ch) && (m_charSize == Char8)) {
1365 // Have a 16 bit pattern character and an 8 bit string - short circuit
1366 op.m_jumps.append(jump());
1367 return;
1368 }
1369
1370 const RegisterID character = regT0;
1371#if CPU(X86_64) || CPU(ARM64)
1372 unsigned maxCharactersAtOnce = m_charSize == Char8 ? 8 : 4;
1373#else
1374 unsigned maxCharactersAtOnce = m_charSize == Char8 ? 4 : 2;
1375#endif
1376 uint64_t ignoreCaseMask = 0;
1377#if CPU(BIG_ENDIAN)
1378 uint64_t allCharacters = ch << (m_charSize == Char8 ? 24 : 16);
1379#else
1380 uint64_t allCharacters = ch;
1381#endif
1382 unsigned numberCharacters;
1383 unsigned startTermPosition = term->inputPosition;
1384
1385 // For case-insesitive compares, non-ascii characters that have different
1386 // upper & lower case representations are converted to a character class.
1387 ASSERT(!m_pattern.ignoreCase() || isASCIIAlpha(ch) || isCanonicallyUnique(ch, m_canonicalMode));
1388
1389 if (m_pattern.ignoreCase() && isASCIIAlpha(ch)) {
1390#if CPU(BIG_ENDIAN)
1391 ignoreCaseMask |= 32 << (m_charSize == Char8 ? 24 : 16);
1392#else
1393 ignoreCaseMask |= 32;
1394#endif
1395 }
1396
1397 for (numberCharacters = 1; numberCharacters < maxCharactersAtOnce && nextOp->m_op == OpTerm; ++numberCharacters, nextOp = &m_ops[opIndex + numberCharacters]) {
1398 PatternTerm* nextTerm = nextOp->m_term;
1399
1400 // YarrJIT handles decoded surrogate pair as one character if unicode flag is enabled.
1401 // Note that the numberCharacters become 1 while the width of the pattern character becomes 32bit in this case.
1402 if (nextTerm->type != PatternTerm::TypePatternCharacter
1403 || nextTerm->quantityType != QuantifierFixedCount
1404 || nextTerm->quantityMaxCount != 1
1405 || nextTerm->inputPosition != (startTermPosition + numberCharacters)
1406 || (U16_LENGTH(nextTerm->patternCharacter) != 1 && m_decodeSurrogatePairs))
1407 break;
1408
1409 nextOp->m_isDeadCode = true;
1410
1411#if CPU(BIG_ENDIAN)
1412 int shiftAmount = (m_charSize == Char8 ? 24 : 16) - ((m_charSize == Char8 ? 8 : 16) * numberCharacters);
1413#else
1414 int shiftAmount = (m_charSize == Char8 ? 8 : 16) * numberCharacters;
1415#endif
1416
1417 UChar32 currentCharacter = nextTerm->patternCharacter;
1418
1419 if (!isLatin1(currentCharacter) && (m_charSize == Char8)) {
1420 // Have a 16 bit pattern character and an 8 bit string - short circuit
1421 op.m_jumps.append(jump());
1422 return;
1423 }
1424
1425 // For case-insesitive compares, non-ascii characters that have different
1426 // upper & lower case representations are converted to a character class.
1427 ASSERT(!m_pattern.ignoreCase() || isASCIIAlpha(currentCharacter) || isCanonicallyUnique(currentCharacter, m_canonicalMode));
1428
1429 allCharacters |= (static_cast<uint64_t>(currentCharacter) << shiftAmount);
1430
1431 if ((m_pattern.ignoreCase()) && (isASCIIAlpha(currentCharacter)))
1432 ignoreCaseMask |= 32ULL << shiftAmount;
1433 }
1434
1435 if (m_decodeSurrogatePairs)
1436 op.m_jumps.append(jumpIfNoAvailableInput());
1437
1438 if (m_charSize == Char8) {
1439 auto check1 = [&] (Checked<unsigned> offset, UChar32 characters) {
1440 op.m_jumps.append(jumpIfCharNotEquals(characters, offset, character));
1441 };
1442
1443 auto check2 = [&] (Checked<unsigned> offset, uint16_t characters, uint16_t mask) {
1444 load16Unaligned(negativeOffsetIndexedAddress(offset, character), character);
1445 if (mask)
1446 or32(Imm32(mask), character);
1447 op.m_jumps.append(branch32(NotEqual, character, Imm32(characters | mask)));
1448 };
1449
1450 auto check4 = [&] (Checked<unsigned> offset, unsigned characters, unsigned mask) {
1451 if (mask) {
1452 load32WithUnalignedHalfWords(negativeOffsetIndexedAddress(offset, character), character);
1453 if (mask)
1454 or32(Imm32(mask), character);
1455 op.m_jumps.append(branch32(NotEqual, character, Imm32(characters | mask)));
1456 return;
1457 }
1458 op.m_jumps.append(branch32WithUnalignedHalfWords(NotEqual, negativeOffsetIndexedAddress(offset, character), TrustedImm32(characters)));
1459 };
1460
1461#if CPU(X86_64) || CPU(ARM64)
1462 auto check8 = [&] (Checked<unsigned> offset, uint64_t characters, uint64_t mask) {
1463 load64(negativeOffsetIndexedAddress(offset, character), character);
1464 if (mask)
1465 or64(TrustedImm64(mask), character);
1466 op.m_jumps.append(branch64(NotEqual, character, TrustedImm64(characters | mask)));
1467 };
1468#endif
1469
1470 switch (numberCharacters) {
1471 case 1:
1472 // Use 32bit width of allCharacters since Yarr counts surrogate pairs as one character with unicode flag.
1473 check1(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff);
1474 return;
1475 case 2: {
1476 check2(m_checkedOffset - startTermPosition, allCharacters & 0xffff, ignoreCaseMask & 0xffff);
1477 return;
1478 }
1479 case 3: {
1480 check2(m_checkedOffset - startTermPosition, allCharacters & 0xffff, ignoreCaseMask & 0xffff);
1481 check1(m_checkedOffset - startTermPosition - 2, (allCharacters >> 16) & 0xff);
1482 return;
1483 }
1484 case 4: {
1485 check4(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
1486 return;
1487 }
1488#if CPU(X86_64) || CPU(ARM64)
1489 case 5: {
1490 check4(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
1491 check1(m_checkedOffset - startTermPosition - 4, (allCharacters >> 32) & 0xff);
1492 return;
1493 }
1494 case 6: {
1495 check4(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
1496 check2(m_checkedOffset - startTermPosition - 4, (allCharacters >> 32) & 0xffff, (ignoreCaseMask >> 32) & 0xffff);
1497 return;
1498 }
1499 case 7: {
1500 check4(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
1501 check2(m_checkedOffset - startTermPosition - 4, (allCharacters >> 32) & 0xffff, (ignoreCaseMask >> 32) & 0xffff);
1502 check1(m_checkedOffset - startTermPosition - 6, (allCharacters >> 48) & 0xff);
1503 return;
1504 }
1505 case 8: {
1506 check8(m_checkedOffset - startTermPosition, allCharacters, ignoreCaseMask);
1507 return;
1508 }
1509#endif
1510 }
1511 } else {
1512 auto check1 = [&] (Checked<unsigned> offset, UChar32 characters) {
1513 op.m_jumps.append(jumpIfCharNotEquals(characters, offset, character));
1514 };
1515
1516 auto check2 = [&] (Checked<unsigned> offset, unsigned characters, unsigned mask) {
1517 if (mask) {
1518 load32WithUnalignedHalfWords(negativeOffsetIndexedAddress(offset, character), character);
1519 if (mask)
1520 or32(Imm32(mask), character);
1521 op.m_jumps.append(branch32(NotEqual, character, Imm32(characters | mask)));
1522 return;
1523 }
1524 op.m_jumps.append(branch32WithUnalignedHalfWords(NotEqual, negativeOffsetIndexedAddress(offset, character), TrustedImm32(characters)));
1525 };
1526
1527#if CPU(X86_64) || CPU(ARM64)
1528 auto check4 = [&] (Checked<unsigned> offset, uint64_t characters, uint64_t mask) {
1529 load64(negativeOffsetIndexedAddress(offset, character), character);
1530 if (mask)
1531 or64(TrustedImm64(mask), character);
1532 op.m_jumps.append(branch64(NotEqual, character, TrustedImm64(characters | mask)));
1533 };
1534#endif
1535
1536 switch (numberCharacters) {
1537 case 1:
1538 // Use 32bit width of allCharacters since Yarr counts surrogate pairs as one character with unicode flag.
1539 check1(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff);
1540 return;
1541 case 2:
1542 check2(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
1543 return;
1544#if CPU(X86_64) || CPU(ARM64)
1545 case 3:
1546 check2(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
1547 check1(m_checkedOffset - startTermPosition - 2, (allCharacters >> 32) & 0xffff);
1548 return;
1549 case 4:
1550 check4(m_checkedOffset - startTermPosition, allCharacters, ignoreCaseMask);
1551 return;
1552#endif
1553 }
1554 }
1555 }
1556 void backtrackPatternCharacterOnce(size_t opIndex)
1557 {
1558 backtrackTermDefault(opIndex);
1559 }
1560
1561 void generatePatternCharacterFixed(size_t opIndex)
1562 {
1563 YarrOp& op = m_ops[opIndex];
1564 PatternTerm* term = op.m_term;
1565 UChar32 ch = term->patternCharacter;
1566
1567 const RegisterID character = regT0;
1568 const RegisterID countRegister = regT1;
1569
1570 if (m_decodeSurrogatePairs)
1571 op.m_jumps.append(jumpIfNoAvailableInput());
1572
1573 move(index, countRegister);
1574 Checked<unsigned> scaledMaxCount = term->quantityMaxCount;
1575 scaledMaxCount *= U_IS_BMP(ch) ? 1 : 2;
1576 sub32(Imm32(scaledMaxCount.unsafeGet()), countRegister);
1577
1578 Label loop(this);
1579 readCharacter(m_checkedOffset - term->inputPosition - scaledMaxCount, character, countRegister);
1580 // For case-insesitive compares, non-ascii characters that have different
1581 // upper & lower case representations are converted to a character class.
1582 ASSERT(!m_pattern.ignoreCase() || isASCIIAlpha(ch) || isCanonicallyUnique(ch, m_canonicalMode));
1583 if (m_pattern.ignoreCase() && isASCIIAlpha(ch)) {
1584 or32(TrustedImm32(0x20), character);
1585 ch |= 0x20;
1586 }
1587
1588 op.m_jumps.append(branch32(NotEqual, character, Imm32(ch)));
1589#ifdef JIT_UNICODE_EXPRESSIONS
1590 if (m_decodeSurrogatePairs && !U_IS_BMP(ch))
1591 add32(TrustedImm32(2), countRegister);
1592 else
1593#endif
1594 add32(TrustedImm32(1), countRegister);
1595 branch32(NotEqual, countRegister, index).linkTo(loop, this);
1596 }
1597 void backtrackPatternCharacterFixed(size_t opIndex)
1598 {
1599 backtrackTermDefault(opIndex);
1600 }
1601
1602 void generatePatternCharacterGreedy(size_t opIndex)
1603 {
1604 YarrOp& op = m_ops[opIndex];
1605 PatternTerm* term = op.m_term;
1606 UChar32 ch = term->patternCharacter;
1607
1608 const RegisterID character = regT0;
1609 const RegisterID countRegister = regT1;
1610
1611 move(TrustedImm32(0), countRegister);
1612
1613 // Unless have a 16 bit pattern character and an 8 bit string - short circuit
1614 if (!(!isLatin1(ch) && (m_charSize == Char8))) {
1615 JumpList failures;
1616 Label loop(this);
1617 failures.append(atEndOfInput());
1618 failures.append(jumpIfCharNotEquals(ch, m_checkedOffset - term->inputPosition, character));
1619
1620 add32(TrustedImm32(1), index);
1621#ifdef JIT_UNICODE_EXPRESSIONS
1622 if (m_decodeSurrogatePairs && !U_IS_BMP(ch)) {
1623 Jump surrogatePairOk = notAtEndOfInput();
1624 sub32(TrustedImm32(1), index);
1625 failures.append(jump());
1626 surrogatePairOk.link(this);
1627 add32(TrustedImm32(1), index);
1628 }
1629#endif
1630 add32(TrustedImm32(1), countRegister);
1631
1632 if (term->quantityMaxCount == quantifyInfinite)
1633 jump(loop);
1634 else
1635 branch32(NotEqual, countRegister, Imm32(term->quantityMaxCount.unsafeGet())).linkTo(loop, this);
1636
1637 failures.link(this);
1638 }
1639 op.m_reentry = label();
1640
1641 storeToFrame(countRegister, term->frameLocation + BackTrackInfoPatternCharacter::matchAmountIndex());
1642 }
1643 void backtrackPatternCharacterGreedy(size_t opIndex)
1644 {
1645 YarrOp& op = m_ops[opIndex];
1646 PatternTerm* term = op.m_term;
1647
1648 const RegisterID countRegister = regT1;
1649
1650 m_backtrackingState.link(this);
1651
1652 loadFromFrame(term->frameLocation + BackTrackInfoPatternCharacter::matchAmountIndex(), countRegister);
1653 m_backtrackingState.append(branchTest32(Zero, countRegister));
1654 sub32(TrustedImm32(1), countRegister);
1655 if (!m_decodeSurrogatePairs || U_IS_BMP(term->patternCharacter))
1656 sub32(TrustedImm32(1), index);
1657 else
1658 sub32(TrustedImm32(2), index);
1659 jump(op.m_reentry);
1660 }
1661
1662 void generatePatternCharacterNonGreedy(size_t opIndex)
1663 {
1664 YarrOp& op = m_ops[opIndex];
1665 PatternTerm* term = op.m_term;
1666
1667 const RegisterID countRegister = regT1;
1668
1669 move(TrustedImm32(0), countRegister);
1670 op.m_reentry = label();
1671 storeToFrame(countRegister, term->frameLocation + BackTrackInfoPatternCharacter::matchAmountIndex());
1672 }
1673 void backtrackPatternCharacterNonGreedy(size_t opIndex)
1674 {
1675 YarrOp& op = m_ops[opIndex];
1676 PatternTerm* term = op.m_term;
1677 UChar32 ch = term->patternCharacter;
1678
1679 const RegisterID character = regT0;
1680 const RegisterID countRegister = regT1;
1681
1682 m_backtrackingState.link(this);
1683
1684 loadFromFrame(term->frameLocation + BackTrackInfoPatternCharacter::matchAmountIndex(), countRegister);
1685
1686 // Unless have a 16 bit pattern character and an 8 bit string - short circuit
1687 if (!(!isLatin1(ch) && (m_charSize == Char8))) {
1688 JumpList nonGreedyFailures;
1689 nonGreedyFailures.append(atEndOfInput());
1690 if (term->quantityMaxCount != quantifyInfinite)
1691 nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityMaxCount.unsafeGet())));
1692 nonGreedyFailures.append(jumpIfCharNotEquals(ch, m_checkedOffset - term->inputPosition, character));
1693
1694 add32(TrustedImm32(1), index);
1695#ifdef JIT_UNICODE_EXPRESSIONS
1696 if (m_decodeSurrogatePairs && !U_IS_BMP(ch)) {
1697 Jump surrogatePairOk = notAtEndOfInput();
1698 sub32(TrustedImm32(1), index);
1699 nonGreedyFailures.append(jump());
1700 surrogatePairOk.link(this);
1701 add32(TrustedImm32(1), index);
1702 }
1703#endif
1704 add32(TrustedImm32(1), countRegister);
1705
1706 jump(op.m_reentry);
1707 nonGreedyFailures.link(this);
1708 }
1709
1710 if (m_decodeSurrogatePairs && !U_IS_BMP(ch)) {
1711 // subtract countRegister*2 for non-BMP characters
1712 lshift32(TrustedImm32(1), countRegister);
1713 }
1714
1715 sub32(countRegister, index);
1716 m_backtrackingState.fallthrough();
1717 }
1718
1719 void generateCharacterClassOnce(size_t opIndex)
1720 {
1721 YarrOp& op = m_ops[opIndex];
1722 PatternTerm* term = op.m_term;
1723
1724 const RegisterID character = regT0;
1725
1726 if (m_decodeSurrogatePairs) {
1727 op.m_jumps.append(jumpIfNoAvailableInput());
1728 storeToFrame(index, term->frameLocation + BackTrackInfoCharacterClass::beginIndex());
1729 }
1730
1731 JumpList matchDest;
1732 readCharacter(m_checkedOffset - term->inputPosition, character);
1733 // If we are matching the "any character" builtin class we only need to read the
1734 // character and don't need to match as it will always succeed.
1735 if (term->invert() || !term->characterClass->m_anyCharacter) {
1736 matchCharacterClass(character, matchDest, term->characterClass);
1737
1738 if (term->invert())
1739 op.m_jumps.append(matchDest);
1740 else {
1741 op.m_jumps.append(jump());
1742 matchDest.link(this);
1743 }
1744 }
1745#ifdef JIT_UNICODE_EXPRESSIONS
1746 if (m_decodeSurrogatePairs && (!term->characterClass->hasOneCharacterSize() || term->invert())) {
1747 Jump isBMPChar = branch32(LessThan, character, supplementaryPlanesBase);
1748 op.m_jumps.append(atEndOfInput());
1749 add32(TrustedImm32(1), index);
1750 isBMPChar.link(this);
1751 }
1752#endif
1753 }
1754 void backtrackCharacterClassOnce(size_t opIndex)
1755 {
1756#ifdef JIT_UNICODE_EXPRESSIONS
1757 if (m_decodeSurrogatePairs) {
1758 YarrOp& op = m_ops[opIndex];
1759 PatternTerm* term = op.m_term;
1760
1761 m_backtrackingState.link(this);
1762 loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::beginIndex(), index);
1763 m_backtrackingState.fallthrough();
1764 }
1765#endif
1766 backtrackTermDefault(opIndex);
1767 }
1768
1769 void generateCharacterClassFixed(size_t opIndex)
1770 {
1771 YarrOp& op = m_ops[opIndex];
1772 PatternTerm* term = op.m_term;
1773
1774 const RegisterID character = regT0;
1775 const RegisterID countRegister = regT1;
1776
1777 if (m_decodeSurrogatePairs)
1778 op.m_jumps.append(jumpIfNoAvailableInput());
1779
1780 move(index, countRegister);
1781
1782 Checked<unsigned> scaledMaxCount = term->quantityMaxCount;
1783
1784#ifdef JIT_UNICODE_EXPRESSIONS
1785 if (m_decodeSurrogatePairs && term->characterClass->hasOnlyNonBMPCharacters() && !term->invert())
1786 scaledMaxCount *= 2;
1787#endif
1788 sub32(Imm32(scaledMaxCount.unsafeGet()), countRegister);
1789
1790 Label loop(this);
1791 JumpList matchDest;
1792 readCharacter(m_checkedOffset - term->inputPosition - scaledMaxCount, character, countRegister);
1793 // If we are matching the "any character" builtin class we only need to read the
1794 // character and don't need to match as it will always succeed.
1795 if (term->invert() || !term->characterClass->m_anyCharacter) {
1796 matchCharacterClass(character, matchDest, term->characterClass);
1797
1798 if (term->invert())
1799 op.m_jumps.append(matchDest);
1800 else {
1801 op.m_jumps.append(jump());
1802 matchDest.link(this);
1803 }
1804 }
1805
1806#ifdef JIT_UNICODE_EXPRESSIONS
1807 if (m_decodeSurrogatePairs) {
1808 if (term->isFixedWidthCharacterClass())
1809 add32(TrustedImm32(term->characterClass->hasNonBMPCharacters() ? 2 : 1), countRegister);
1810 else {
1811 add32(TrustedImm32(1), countRegister);
1812 Jump isBMPChar = branch32(LessThan, character, supplementaryPlanesBase);
1813 op.m_jumps.append(atEndOfInput());
1814 add32(TrustedImm32(1), countRegister);
1815 add32(TrustedImm32(1), index);
1816 isBMPChar.link(this);
1817 }
1818 } else
1819#endif
1820 add32(TrustedImm32(1), countRegister);
1821 branch32(NotEqual, countRegister, index).linkTo(loop, this);
1822 }
1823 void backtrackCharacterClassFixed(size_t opIndex)
1824 {
1825 backtrackTermDefault(opIndex);
1826 }
1827
1828 void generateCharacterClassGreedy(size_t opIndex)
1829 {
1830 YarrOp& op = m_ops[opIndex];
1831 PatternTerm* term = op.m_term;
1832
1833 const RegisterID character = regT0;
1834 const RegisterID countRegister = regT1;
1835
1836 if (m_decodeSurrogatePairs && (!term->characterClass->hasOneCharacterSize() || term->invert()))
1837 storeToFrame(index, term->frameLocation + BackTrackInfoCharacterClass::beginIndex());
1838 move(TrustedImm32(0), countRegister);
1839
1840 JumpList failures;
1841 JumpList failuresDecrementIndex;
1842 Label loop(this);
1843#ifdef JIT_UNICODE_EXPRESSIONS
1844 if (term->isFixedWidthCharacterClass() && term->characterClass->hasNonBMPCharacters()) {
1845 move(TrustedImm32(1), character);
1846 failures.append(checkNotEnoughInput(character));
1847 } else
1848#endif
1849 failures.append(atEndOfInput());
1850
1851 if (term->invert()) {
1852 readCharacter(m_checkedOffset - term->inputPosition, character);
1853 matchCharacterClass(character, failures, term->characterClass);
1854 } else {
1855 JumpList matchDest;
1856 readCharacter(m_checkedOffset - term->inputPosition, character);
1857 // If we are matching the "any character" builtin class for non-unicode patterns,
1858 // we only need to read the character and don't need to match as it will always succeed.
1859 if (!term->characterClass->m_anyCharacter) {
1860 matchCharacterClass(character, matchDest, term->characterClass);
1861 failures.append(jump());
1862 }
1863 matchDest.link(this);
1864 }
1865
1866#ifdef JIT_UNICODE_EXPRESSIONS
1867 if (m_decodeSurrogatePairs)
1868 advanceIndexAfterCharacterClassTermMatch(term, failuresDecrementIndex, character);
1869 else
1870#endif
1871 add32(TrustedImm32(1), index);
1872 add32(TrustedImm32(1), countRegister);
1873
1874 if (term->quantityMaxCount != quantifyInfinite) {
1875 branch32(NotEqual, countRegister, Imm32(term->quantityMaxCount.unsafeGet())).linkTo(loop, this);
1876 failures.append(jump());
1877 } else
1878 jump(loop);
1879
1880 if (!failuresDecrementIndex.empty()) {
1881 failuresDecrementIndex.link(this);
1882 sub32(TrustedImm32(1), index);
1883 }
1884
1885 failures.link(this);
1886 op.m_reentry = label();
1887
1888 storeToFrame(countRegister, term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex());
1889 }
1890 void backtrackCharacterClassGreedy(size_t opIndex)
1891 {
1892 YarrOp& op = m_ops[opIndex];
1893 PatternTerm* term = op.m_term;
1894
1895 const RegisterID countRegister = regT1;
1896
1897 m_backtrackingState.link(this);
1898
1899 loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex(), countRegister);
1900 m_backtrackingState.append(branchTest32(Zero, countRegister));
1901 sub32(TrustedImm32(1), countRegister);
1902 storeToFrame(countRegister, term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex());
1903
1904 if (!m_decodeSurrogatePairs)
1905 sub32(TrustedImm32(1), index);
1906 else if (term->isFixedWidthCharacterClass())
1907 sub32(TrustedImm32(term->characterClass->hasNonBMPCharacters() ? 2 : 1), index);
1908 else {
1909 // Rematch one less
1910 const RegisterID character = regT0;
1911
1912 loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::beginIndex(), index);
1913
1914 Label rematchLoop(this);
1915 Jump doneRematching = branchTest32(Zero, countRegister);
1916
1917 readCharacter(m_checkedOffset - term->inputPosition, character);
1918
1919 sub32(TrustedImm32(1), countRegister);
1920 add32(TrustedImm32(1), index);
1921
1922#ifdef JIT_UNICODE_EXPRESSIONS
1923 Jump isBMPChar = branch32(LessThan, character, supplementaryPlanesBase);
1924 add32(TrustedImm32(1), index);
1925 isBMPChar.link(this);
1926#endif
1927
1928 jump(rematchLoop);
1929 doneRematching.link(this);
1930
1931 loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex(), countRegister);
1932 }
1933 jump(op.m_reentry);
1934 }
1935
1936 void generateCharacterClassNonGreedy(size_t opIndex)
1937 {
1938 YarrOp& op = m_ops[opIndex];
1939 PatternTerm* term = op.m_term;
1940
1941 const RegisterID countRegister = regT1;
1942
1943 move(TrustedImm32(0), countRegister);
1944 op.m_reentry = label();
1945
1946#ifdef JIT_UNICODE_EXPRESSIONS
1947 if (m_decodeSurrogatePairs) {
1948 if (!term->characterClass->hasOneCharacterSize() || term->invert())
1949 storeToFrame(index, term->frameLocation + BackTrackInfoCharacterClass::beginIndex());
1950 }
1951#endif
1952
1953 storeToFrame(countRegister, term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex());
1954 }
1955
1956 void backtrackCharacterClassNonGreedy(size_t opIndex)
1957 {
1958 YarrOp& op = m_ops[opIndex];
1959 PatternTerm* term = op.m_term;
1960
1961 const RegisterID character = regT0;
1962 const RegisterID countRegister = regT1;
1963
1964 JumpList nonGreedyFailures;
1965 JumpList nonGreedyFailuresDecrementIndex;
1966
1967 m_backtrackingState.link(this);
1968
1969#ifdef JIT_UNICODE_EXPRESSIONS
1970 if (m_decodeSurrogatePairs) {
1971 if (!term->characterClass->hasOneCharacterSize() || term->invert())
1972 loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::beginIndex(), index);
1973 }
1974#endif
1975
1976 loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex(), countRegister);
1977
1978 nonGreedyFailures.append(atEndOfInput());
1979 nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityMaxCount.unsafeGet())));
1980
1981 JumpList matchDest;
1982 readCharacter(m_checkedOffset - term->inputPosition, character);
1983 // If we are matching the "any character" builtin class for non-unicode patterns,
1984 // we only need to read the character and don't need to match as it will always succeed.
1985 if (term->invert() || !term->characterClass->m_anyCharacter) {
1986 matchCharacterClass(character, matchDest, term->characterClass);
1987
1988 if (term->invert())
1989 nonGreedyFailures.append(matchDest);
1990 else {
1991 nonGreedyFailures.append(jump());
1992 matchDest.link(this);
1993 }
1994 }
1995
1996#ifdef JIT_UNICODE_EXPRESSIONS
1997 if (m_decodeSurrogatePairs)
1998 advanceIndexAfterCharacterClassTermMatch(term, nonGreedyFailuresDecrementIndex, character);
1999 else
2000#endif
2001 add32(TrustedImm32(1), index);
2002 add32(TrustedImm32(1), countRegister);
2003
2004 jump(op.m_reentry);
2005
2006 if (!nonGreedyFailuresDecrementIndex.empty()) {
2007 nonGreedyFailuresDecrementIndex.link(this);
2008 breakpoint();
2009 }
2010 nonGreedyFailures.link(this);
2011 sub32(countRegister, index);
2012 m_backtrackingState.fallthrough();
2013 }
2014
2015 void generateDotStarEnclosure(size_t opIndex)
2016 {
2017 YarrOp& op = m_ops[opIndex];
2018 PatternTerm* term = op.m_term;
2019
2020 const RegisterID character = regT0;
2021 const RegisterID matchPos = regT1;
2022 JumpList foundBeginningNewLine;
2023 JumpList saveStartIndex;
2024 JumpList foundEndingNewLine;
2025
2026 if (m_pattern.dotAll()) {
2027 move(TrustedImm32(0), matchPos);
2028 setMatchStart(matchPos);
2029 move(length, index);
2030 return;
2031 }
2032
2033 ASSERT(!m_pattern.m_body->m_hasFixedSize);
2034 getMatchStart(matchPos);
2035
2036 saveStartIndex.append(branch32(BelowOrEqual, matchPos, initialStart));
2037 Label findBOLLoop(this);
2038 sub32(TrustedImm32(1), matchPos);
2039 if (m_charSize == Char8)
2040 load8(BaseIndex(input, matchPos, TimesOne, 0), character);
2041 else
2042 load16(BaseIndex(input, matchPos, TimesTwo, 0), character);
2043 matchCharacterClass(character, foundBeginningNewLine, m_pattern.newlineCharacterClass());
2044
2045 branch32(Above, matchPos, initialStart).linkTo(findBOLLoop, this);
2046 saveStartIndex.append(jump());
2047
2048 foundBeginningNewLine.link(this);
2049 add32(TrustedImm32(1), matchPos); // Advance past newline
2050 saveStartIndex.link(this);
2051
2052 if (!m_pattern.multiline() && term->anchors.bolAnchor)
2053 op.m_jumps.append(branchTest32(NonZero, matchPos));
2054
2055 ASSERT(!m_pattern.m_body->m_hasFixedSize);
2056 setMatchStart(matchPos);
2057
2058 move(index, matchPos);
2059
2060 Label findEOLLoop(this);
2061 foundEndingNewLine.append(branch32(Equal, matchPos, length));
2062 if (m_charSize == Char8)
2063 load8(BaseIndex(input, matchPos, TimesOne, 0), character);
2064 else
2065 load16(BaseIndex(input, matchPos, TimesTwo, 0), character);
2066 matchCharacterClass(character, foundEndingNewLine, m_pattern.newlineCharacterClass());
2067 add32(TrustedImm32(1), matchPos);
2068 jump(findEOLLoop);
2069
2070 foundEndingNewLine.link(this);
2071
2072 if (!m_pattern.multiline() && term->anchors.eolAnchor)
2073 op.m_jumps.append(branch32(NotEqual, matchPos, length));
2074
2075 move(matchPos, index);
2076 }
2077
2078 void backtrackDotStarEnclosure(size_t opIndex)
2079 {
2080 backtrackTermDefault(opIndex);
2081 }
2082
2083 // Code generation/backtracking for simple terms
2084 // (pattern characters, character classes, and assertions).
2085 // These methods farm out work to the set of functions above.
2086 void generateTerm(size_t opIndex)
2087 {
2088 YarrOp& op = m_ops[opIndex];
2089 PatternTerm* term = op.m_term;
2090
2091 switch (term->type) {
2092 case PatternTerm::TypePatternCharacter:
2093 switch (term->quantityType) {
2094 case QuantifierFixedCount:
2095 if (term->quantityMaxCount == 1)
2096 generatePatternCharacterOnce(opIndex);
2097 else
2098 generatePatternCharacterFixed(opIndex);
2099 break;
2100 case QuantifierGreedy:
2101 generatePatternCharacterGreedy(opIndex);
2102 break;
2103 case QuantifierNonGreedy:
2104 generatePatternCharacterNonGreedy(opIndex);
2105 break;
2106 }
2107 break;
2108
2109 case PatternTerm::TypeCharacterClass:
2110 switch (term->quantityType) {
2111 case QuantifierFixedCount:
2112 if (term->quantityMaxCount == 1)
2113 generateCharacterClassOnce(opIndex);
2114 else
2115 generateCharacterClassFixed(opIndex);
2116 break;
2117 case QuantifierGreedy:
2118 generateCharacterClassGreedy(opIndex);
2119 break;
2120 case QuantifierNonGreedy:
2121 generateCharacterClassNonGreedy(opIndex);
2122 break;
2123 }
2124 break;
2125
2126 case PatternTerm::TypeAssertionBOL:
2127 generateAssertionBOL(opIndex);
2128 break;
2129
2130 case PatternTerm::TypeAssertionEOL:
2131 generateAssertionEOL(opIndex);
2132 break;
2133
2134 case PatternTerm::TypeAssertionWordBoundary:
2135 generateAssertionWordBoundary(opIndex);
2136 break;
2137
2138 case PatternTerm::TypeForwardReference:
2139 m_failureReason = JITFailureReason::ForwardReference;
2140 break;
2141
2142 case PatternTerm::TypeParenthesesSubpattern:
2143 case PatternTerm::TypeParentheticalAssertion:
2144 RELEASE_ASSERT_NOT_REACHED();
2145
2146 case PatternTerm::TypeBackReference:
2147#if ENABLE(YARR_JIT_BACKREFERENCES)
2148 generateBackReference(opIndex);
2149#else
2150 m_failureReason = JITFailureReason::BackReference;
2151#endif
2152 break;
2153 case PatternTerm::TypeDotStarEnclosure:
2154 generateDotStarEnclosure(opIndex);
2155 break;
2156 }
2157 }
2158 void backtrackTerm(size_t opIndex)
2159 {
2160 YarrOp& op = m_ops[opIndex];
2161 PatternTerm* term = op.m_term;
2162
2163 switch (term->type) {
2164 case PatternTerm::TypePatternCharacter:
2165 switch (term->quantityType) {
2166 case QuantifierFixedCount:
2167 if (term->quantityMaxCount == 1)
2168 backtrackPatternCharacterOnce(opIndex);
2169 else
2170 backtrackPatternCharacterFixed(opIndex);
2171 break;
2172 case QuantifierGreedy:
2173 backtrackPatternCharacterGreedy(opIndex);
2174 break;
2175 case QuantifierNonGreedy:
2176 backtrackPatternCharacterNonGreedy(opIndex);
2177 break;
2178 }
2179 break;
2180
2181 case PatternTerm::TypeCharacterClass:
2182 switch (term->quantityType) {
2183 case QuantifierFixedCount:
2184 if (term->quantityMaxCount == 1)
2185 backtrackCharacterClassOnce(opIndex);
2186 else
2187 backtrackCharacterClassFixed(opIndex);
2188 break;
2189 case QuantifierGreedy:
2190 backtrackCharacterClassGreedy(opIndex);
2191 break;
2192 case QuantifierNonGreedy:
2193 backtrackCharacterClassNonGreedy(opIndex);
2194 break;
2195 }
2196 break;
2197
2198 case PatternTerm::TypeAssertionBOL:
2199 backtrackAssertionBOL(opIndex);
2200 break;
2201
2202 case PatternTerm::TypeAssertionEOL:
2203 backtrackAssertionEOL(opIndex);
2204 break;
2205
2206 case PatternTerm::TypeAssertionWordBoundary:
2207 backtrackAssertionWordBoundary(opIndex);
2208 break;
2209
2210 case PatternTerm::TypeForwardReference:
2211 m_failureReason = JITFailureReason::ForwardReference;
2212 break;
2213
2214 case PatternTerm::TypeParenthesesSubpattern:
2215 case PatternTerm::TypeParentheticalAssertion:
2216 RELEASE_ASSERT_NOT_REACHED();
2217
2218 case PatternTerm::TypeBackReference:
2219#if ENABLE(YARR_JIT_BACKREFERENCES)
2220 backtrackBackReference(opIndex);
2221#else
2222 m_failureReason = JITFailureReason::BackReference;
2223#endif
2224 break;
2225
2226 case PatternTerm::TypeDotStarEnclosure:
2227 backtrackDotStarEnclosure(opIndex);
2228 break;
2229 }
2230 }
2231
2232 void generate()
2233 {
2234 // Forwards generate the matching code.
2235 ASSERT(m_ops.size());
2236 size_t opIndex = 0;
2237
2238 do {
2239 if (m_disassembler)
2240 m_disassembler->setForGenerate(opIndex, label());
2241
2242 YarrOp& op = m_ops[opIndex];
2243 switch (op.m_op) {
2244
2245 case OpTerm:
2246 generateTerm(opIndex);
2247 break;
2248
2249 // OpBodyAlternativeBegin/Next/End
2250 //
2251 // These nodes wrap the set of alternatives in the body of the regular expression.
2252 // There may be either one or two chains of OpBodyAlternative nodes, one representing
2253 // the 'once through' sequence of alternatives (if any exist), and one representing
2254 // the repeating alternatives (again, if any exist).
2255 //
2256 // Upon normal entry to the Begin alternative, we will check that input is available.
2257 // Reentry to the Begin alternative will take place after the check has taken place,
2258 // and will assume that the input position has already been progressed as appropriate.
2259 //
2260 // Entry to subsequent Next/End alternatives occurs when the prior alternative has
2261 // successfully completed a match - return a success state from JIT code.
2262 //
2263 // Next alternatives allow for reentry optimized to suit backtracking from its
2264 // preceding alternative. It expects the input position to still be set to a position
2265 // appropriate to its predecessor, and it will only perform an input check if the
2266 // predecessor had a minimum size less than its own.
2267 //
2268 // In the case 'once through' expressions, the End node will also have a reentry
2269 // point to jump to when the last alternative fails. Again, this expects the input
2270 // position to still reflect that expected by the prior alternative.
2271 case OpBodyAlternativeBegin: {
2272 PatternAlternative* alternative = op.m_alternative;
2273
2274 // Upon entry at the head of the set of alternatives, check if input is available
2275 // to run the first alternative. (This progresses the input position).
2276 op.m_jumps.append(jumpIfNoAvailableInput(alternative->m_minimumSize));
2277 // We will reenter after the check, and assume the input position to have been
2278 // set as appropriate to this alternative.
2279 op.m_reentry = label();
2280
2281 m_checkedOffset += alternative->m_minimumSize;
2282 break;
2283 }
2284 case OpBodyAlternativeNext:
2285 case OpBodyAlternativeEnd: {
2286 PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
2287 PatternAlternative* alternative = op.m_alternative;
2288
2289 // If we get here, the prior alternative matched - return success.
2290
2291 // Adjust the stack pointer to remove the pattern's frame.
2292 removeCallFrame();
2293
2294 // Load appropriate values into the return register and the first output
2295 // slot, and return. In the case of pattern with a fixed size, we will
2296 // not have yet set the value in the first
2297 ASSERT(index != returnRegister);
2298 if (m_pattern.m_body->m_hasFixedSize) {
2299 move(index, returnRegister);
2300 if (priorAlternative->m_minimumSize)
2301 sub32(Imm32(priorAlternative->m_minimumSize), returnRegister);
2302 if (compileMode == IncludeSubpatterns)
2303 store32(returnRegister, output);
2304 } else
2305 getMatchStart(returnRegister);
2306 if (compileMode == IncludeSubpatterns)
2307 store32(index, Address(output, 4));
2308 move(index, returnRegister2);
2309
2310 generateReturn();
2311
2312 // This is the divide between the tail of the prior alternative, above, and
2313 // the head of the subsequent alternative, below.
2314
2315 if (op.m_op == OpBodyAlternativeNext) {
2316 // This is the reentry point for the Next alternative. We expect any code
2317 // that jumps here to do so with the input position matching that of the
2318 // PRIOR alteranative, and we will only check input availability if we
2319 // need to progress it forwards.
2320 op.m_reentry = label();
2321 if (alternative->m_minimumSize > priorAlternative->m_minimumSize) {
2322 add32(Imm32(alternative->m_minimumSize - priorAlternative->m_minimumSize), index);
2323 op.m_jumps.append(jumpIfNoAvailableInput());
2324 } else if (priorAlternative->m_minimumSize > alternative->m_minimumSize)
2325 sub32(Imm32(priorAlternative->m_minimumSize - alternative->m_minimumSize), index);
2326 } else if (op.m_nextOp == notFound) {
2327 // This is the reentry point for the End of 'once through' alternatives,
2328 // jumped to when the last alternative fails to match.
2329 op.m_reentry = label();
2330 sub32(Imm32(priorAlternative->m_minimumSize), index);
2331 }
2332
2333 if (op.m_op == OpBodyAlternativeNext)
2334 m_checkedOffset += alternative->m_minimumSize;
2335 m_checkedOffset -= priorAlternative->m_minimumSize;
2336 break;
2337 }
2338
2339 // OpSimpleNestedAlternativeBegin/Next/End
2340 // OpNestedAlternativeBegin/Next/End
2341 //
2342 // These nodes are used to handle sets of alternatives that are nested within
2343 // subpatterns and parenthetical assertions. The 'simple' forms are used where
2344 // we do not need to be able to backtrack back into any alternative other than
2345 // the last, the normal forms allow backtracking into any alternative.
2346 //
2347 // Each Begin/Next node is responsible for planting an input check to ensure
2348 // sufficient input is available on entry. Next nodes additionally need to
2349 // jump to the end - Next nodes use the End node's m_jumps list to hold this
2350 // set of jumps.
2351 //
2352 // In the non-simple forms, successful alternative matches must store a
2353 // 'return address' using a DataLabelPtr, used to store the address to jump
2354 // to when backtracking, to get to the code for the appropriate alternative.
2355 case OpSimpleNestedAlternativeBegin:
2356 case OpNestedAlternativeBegin: {
2357 PatternTerm* term = op.m_term;
2358 PatternAlternative* alternative = op.m_alternative;
2359 PatternDisjunction* disjunction = term->parentheses.disjunction;
2360
2361 // Calculate how much input we need to check for, and if non-zero check.
2362 op.m_checkAdjust = Checked<unsigned>(alternative->m_minimumSize);
2363 if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion))
2364 op.m_checkAdjust -= disjunction->m_minimumSize;
2365 if (op.m_checkAdjust)
2366 op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust.unsafeGet()));
2367
2368 m_checkedOffset += op.m_checkAdjust;
2369 break;
2370 }
2371 case OpSimpleNestedAlternativeNext:
2372 case OpNestedAlternativeNext: {
2373 PatternTerm* term = op.m_term;
2374 PatternAlternative* alternative = op.m_alternative;
2375 PatternDisjunction* disjunction = term->parentheses.disjunction;
2376
2377 // In the non-simple case, store a 'return address' so we can backtrack correctly.
2378 if (op.m_op == OpNestedAlternativeNext) {
2379 unsigned parenthesesFrameLocation = term->frameLocation;
2380 op.m_returnAddress = storeToFrameWithPatch(parenthesesFrameLocation + BackTrackInfoParentheses::returnAddressIndex());
2381 }
2382
2383 if (term->quantityType != QuantifierFixedCount && !m_ops[op.m_previousOp].m_alternative->m_minimumSize) {
2384 // If the previous alternative matched without consuming characters then
2385 // backtrack to try to match while consumming some input.
2386 op.m_zeroLengthMatch = branch32(Equal, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
2387 }
2388
2389 // If we reach here then the last alternative has matched - jump to the
2390 // End node, to skip over any further alternatives.
2391 //
2392 // FIXME: this is logically O(N^2) (though N can be expected to be very
2393 // small). We could avoid this either by adding an extra jump to the JIT
2394 // data structures, or by making backtracking code that jumps to Next
2395 // alternatives are responsible for checking that input is available (if
2396 // we didn't need to plant the input checks, then m_jumps would be free).
2397 YarrOp* endOp = &m_ops[op.m_nextOp];
2398 while (endOp->m_nextOp != notFound) {
2399 ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext || endOp->m_op == OpNestedAlternativeNext);
2400 endOp = &m_ops[endOp->m_nextOp];
2401 }
2402 ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd || endOp->m_op == OpNestedAlternativeEnd);
2403 endOp->m_jumps.append(jump());
2404
2405 // This is the entry point for the next alternative.
2406 op.m_reentry = label();
2407
2408 // Calculate how much input we need to check for, and if non-zero check.
2409 op.m_checkAdjust = alternative->m_minimumSize;
2410 if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion))
2411 op.m_checkAdjust -= disjunction->m_minimumSize;
2412 if (op.m_checkAdjust)
2413 op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust.unsafeGet()));
2414
2415 YarrOp& lastOp = m_ops[op.m_previousOp];
2416 m_checkedOffset -= lastOp.m_checkAdjust;
2417 m_checkedOffset += op.m_checkAdjust;
2418 break;
2419 }
2420 case OpSimpleNestedAlternativeEnd:
2421 case OpNestedAlternativeEnd: {
2422 PatternTerm* term = op.m_term;
2423
2424 // In the non-simple case, store a 'return address' so we can backtrack correctly.
2425 if (op.m_op == OpNestedAlternativeEnd) {
2426 unsigned parenthesesFrameLocation = term->frameLocation;
2427 op.m_returnAddress = storeToFrameWithPatch(parenthesesFrameLocation + BackTrackInfoParentheses::returnAddressIndex());
2428 }
2429
2430 if (term->quantityType != QuantifierFixedCount && !m_ops[op.m_previousOp].m_alternative->m_minimumSize) {
2431 // If the previous alternative matched without consuming characters then
2432 // backtrack to try to match while consumming some input.
2433 op.m_zeroLengthMatch = branch32(Equal, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
2434 }
2435
2436 // If this set of alternatives contains more than one alternative,
2437 // then the Next nodes will have planted jumps to the End, and added
2438 // them to this node's m_jumps list.
2439 op.m_jumps.link(this);
2440 op.m_jumps.clear();
2441
2442 YarrOp& lastOp = m_ops[op.m_previousOp];
2443 m_checkedOffset -= lastOp.m_checkAdjust;
2444 break;
2445 }
2446
2447 // OpParenthesesSubpatternOnceBegin/End
2448 //
2449 // These nodes support (optionally) capturing subpatterns, that have a
2450 // quantity count of 1 (this covers fixed once, and ?/?? quantifiers).
2451 case OpParenthesesSubpatternOnceBegin: {
2452 PatternTerm* term = op.m_term;
2453 unsigned parenthesesFrameLocation = term->frameLocation;
2454 const RegisterID indexTemporary = regT0;
2455 ASSERT(term->quantityMaxCount == 1);
2456
2457 // Upon entry to a Greedy quantified set of parenthese store the index.
2458 // We'll use this for two purposes:
2459 // - To indicate which iteration we are on of mathing the remainder of
2460 // the expression after the parentheses - the first, including the
2461 // match within the parentheses, or the second having skipped over them.
2462 // - To check for empty matches, which must be rejected.
2463 //
2464 // At the head of a NonGreedy set of parentheses we'll immediately set the
2465 // value on the stack to -1 (indicating a match skipping the subpattern),
2466 // and plant a jump to the end. We'll also plant a label to backtrack to
2467 // to reenter the subpattern later, with a store to set up index on the
2468 // second iteration.
2469 //
2470 // FIXME: for capturing parens, could use the index in the capture array?
2471 if (term->quantityType == QuantifierGreedy)
2472 storeToFrame(index, parenthesesFrameLocation + BackTrackInfoParenthesesOnce::beginIndex());
2473 else if (term->quantityType == QuantifierNonGreedy) {
2474 storeToFrame(TrustedImm32(-1), parenthesesFrameLocation + BackTrackInfoParenthesesOnce::beginIndex());
2475 op.m_jumps.append(jump());
2476 op.m_reentry = label();
2477 storeToFrame(index, parenthesesFrameLocation + BackTrackInfoParenthesesOnce::beginIndex());
2478 }
2479
2480 // If the parenthese are capturing, store the starting index value to the
2481 // captures array, offsetting as necessary.
2482 //
2483 // FIXME: could avoid offsetting this value in JIT code, apply
2484 // offsets only afterwards, at the point the results array is
2485 // being accessed.
2486 if (term->capture() && compileMode == IncludeSubpatterns) {
2487 unsigned inputOffset = (m_checkedOffset - term->inputPosition).unsafeGet();
2488 if (term->quantityType == QuantifierFixedCount)
2489 inputOffset += term->parentheses.disjunction->m_minimumSize;
2490 if (inputOffset) {
2491 move(index, indexTemporary);
2492 sub32(Imm32(inputOffset), indexTemporary);
2493 setSubpatternStart(indexTemporary, term->parentheses.subpatternId);
2494 } else
2495 setSubpatternStart(index, term->parentheses.subpatternId);
2496 }
2497 break;
2498 }
2499 case OpParenthesesSubpatternOnceEnd: {
2500 PatternTerm* term = op.m_term;
2501 const RegisterID indexTemporary = regT0;
2502 ASSERT(term->quantityMaxCount == 1);
2503
2504 // If the nested alternative matched without consuming any characters, punt this back to the interpreter.
2505 // FIXME: <https://bugs.webkit.org/show_bug.cgi?id=200786> Add ability for the YARR JIT to properly
2506 // handle nested expressions that can match without consuming characters
2507 if (term->quantityType != QuantifierFixedCount && !term->parentheses.disjunction->m_minimumSize)
2508 m_abortExecution.append(branch32(Equal, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*))));
2509
2510 // If the parenthese are capturing, store the ending index value to the
2511 // captures array, offsetting as necessary.
2512 //
2513 // FIXME: could avoid offsetting this value in JIT code, apply
2514 // offsets only afterwards, at the point the results array is
2515 // being accessed.
2516 if (term->capture() && compileMode == IncludeSubpatterns) {
2517 unsigned inputOffset = (m_checkedOffset - term->inputPosition).unsafeGet();
2518 if (inputOffset) {
2519 move(index, indexTemporary);
2520 sub32(Imm32(inputOffset), indexTemporary);
2521 setSubpatternEnd(indexTemporary, term->parentheses.subpatternId);
2522 } else
2523 setSubpatternEnd(index, term->parentheses.subpatternId);
2524 }
2525
2526 // If the parentheses are quantified Greedy then add a label to jump back
2527 // to if we get a failed match from after the parentheses. For NonGreedy
2528 // parentheses, link the jump from before the subpattern to here.
2529 if (term->quantityType == QuantifierGreedy)
2530 op.m_reentry = label();
2531 else if (term->quantityType == QuantifierNonGreedy) {
2532 YarrOp& beginOp = m_ops[op.m_previousOp];
2533 beginOp.m_jumps.link(this);
2534 }
2535 break;
2536 }
2537
2538 // OpParenthesesSubpatternTerminalBegin/End
2539 case OpParenthesesSubpatternTerminalBegin: {
2540 PatternTerm* term = op.m_term;
2541 ASSERT(term->quantityType == QuantifierGreedy);
2542 ASSERT(term->quantityMaxCount == quantifyInfinite);
2543 ASSERT(!term->capture());
2544
2545 // Upon entry set a label to loop back to.
2546 op.m_reentry = label();
2547
2548 // Store the start index of the current match; we need to reject zero
2549 // length matches.
2550 storeToFrame(index, term->frameLocation + BackTrackInfoParenthesesTerminal::beginIndex());
2551 break;
2552 }
2553 case OpParenthesesSubpatternTerminalEnd: {
2554 YarrOp& beginOp = m_ops[op.m_previousOp];
2555 PatternTerm* term = op.m_term;
2556
2557 // If the nested alternative matched without consuming any characters, punt this back to the interpreter.
2558 // FIXME: <https://bugs.webkit.org/show_bug.cgi?id=200786> Add ability for the YARR JIT to properly
2559 // handle nested expressions that can match without consuming characters
2560 if (term->quantityType != QuantifierFixedCount && !term->parentheses.disjunction->m_minimumSize)
2561 m_abortExecution.append(branch32(Equal, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*))));
2562
2563 // We know that the match is non-zero, we can accept it and
2564 // loop back up to the head of the subpattern.
2565 jump(beginOp.m_reentry);
2566
2567 // This is the entry point to jump to when we stop matching - we will
2568 // do so once the subpattern cannot match any more.
2569 op.m_reentry = label();
2570 break;
2571 }
2572
2573 // OpParenthesesSubpatternBegin/End
2574 //
2575 // These nodes support generic subpatterns.
2576 case OpParenthesesSubpatternBegin: {
2577#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
2578 PatternTerm* term = op.m_term;
2579 unsigned parenthesesFrameLocation = term->frameLocation;
2580
2581 // Upon entry to a Greedy quantified set of parenthese store the index.
2582 // We'll use this for two purposes:
2583 // - To indicate which iteration we are on of mathing the remainder of
2584 // the expression after the parentheses - the first, including the
2585 // match within the parentheses, or the second having skipped over them.
2586 // - To check for empty matches, which must be rejected.
2587 //
2588 // At the head of a NonGreedy set of parentheses we'll immediately set 'begin'
2589 // in the backtrack info to -1 (indicating a match skipping the subpattern),
2590 // and plant a jump to the end. We'll also plant a label to backtrack to
2591 // to reenter the subpattern later, with a store to set 'begin' to current index
2592 // on the second iteration.
2593 //
2594 // FIXME: for capturing parens, could use the index in the capture array?
2595 if (term->quantityType == QuantifierGreedy || term->quantityType == QuantifierNonGreedy) {
2596 storeToFrame(TrustedImm32(0), parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex());
2597 storeToFrame(TrustedImmPtr(nullptr), parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex());
2598
2599 if (term->quantityType == QuantifierNonGreedy) {
2600 storeToFrame(TrustedImm32(-1), parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex());
2601 op.m_jumps.append(jump());
2602 }
2603
2604 op.m_reentry = label();
2605 RegisterID currParenContextReg = regT0;
2606 RegisterID newParenContextReg = regT1;
2607
2608 loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex(), currParenContextReg);
2609 allocateParenContext(newParenContextReg);
2610 storePtr(currParenContextReg, newParenContextReg);
2611 storeToFrame(newParenContextReg, parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex());
2612 saveParenContext(newParenContextReg, regT2, term->parentheses.subpatternId, term->parentheses.lastSubpatternId, parenthesesFrameLocation);
2613 storeToFrame(index, parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex());
2614 }
2615
2616 // If the parenthese are capturing, store the starting index value to the
2617 // captures array, offsetting as necessary.
2618 //
2619 // FIXME: could avoid offsetting this value in JIT code, apply
2620 // offsets only afterwards, at the point the results array is
2621 // being accessed.
2622 if (term->capture() && compileMode == IncludeSubpatterns) {
2623 const RegisterID indexTemporary = regT0;
2624 unsigned inputOffset = (m_checkedOffset - term->inputPosition).unsafeGet();
2625 if (term->quantityType == QuantifierFixedCount)
2626 inputOffset += term->parentheses.disjunction->m_minimumSize;
2627 if (inputOffset) {
2628 move(index, indexTemporary);
2629 sub32(Imm32(inputOffset), indexTemporary);
2630 setSubpatternStart(indexTemporary, term->parentheses.subpatternId);
2631 } else
2632 setSubpatternStart(index, term->parentheses.subpatternId);
2633 }
2634#else // !YARR_JIT_ALL_PARENS_EXPRESSIONS
2635 RELEASE_ASSERT_NOT_REACHED();
2636#endif
2637 break;
2638 }
2639 case OpParenthesesSubpatternEnd: {
2640#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
2641 PatternTerm* term = op.m_term;
2642 unsigned parenthesesFrameLocation = term->frameLocation;
2643
2644 // If the nested alternative matched without consuming any characters, punt this back to the interpreter.
2645 // FIXME: <https://bugs.webkit.org/show_bug.cgi?id=200786> Add ability for the YARR JIT to properly
2646 // handle nested expressions that can match without consuming characters
2647 if (term->quantityType != QuantifierFixedCount && !term->parentheses.disjunction->m_minimumSize)
2648 m_abortExecution.append(branch32(Equal, index, Address(stackPointerRegister, parenthesesFrameLocation * sizeof(void*))));
2649
2650 const RegisterID countTemporary = regT1;
2651
2652 YarrOp& beginOp = m_ops[op.m_previousOp];
2653 loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex(), countTemporary);
2654 add32(TrustedImm32(1), countTemporary);
2655 storeToFrame(countTemporary, parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex());
2656
2657 // If the parenthese are capturing, store the ending index value to the
2658 // captures array, offsetting as necessary.
2659 //
2660 // FIXME: could avoid offsetting this value in JIT code, apply
2661 // offsets only afterwards, at the point the results array is
2662 // being accessed.
2663 if (term->capture() && compileMode == IncludeSubpatterns) {
2664 const RegisterID indexTemporary = regT0;
2665
2666 unsigned inputOffset = (m_checkedOffset - term->inputPosition).unsafeGet();
2667 if (inputOffset) {
2668 move(index, indexTemporary);
2669 sub32(Imm32(inputOffset), indexTemporary);
2670 setSubpatternEnd(indexTemporary, term->parentheses.subpatternId);
2671 } else
2672 setSubpatternEnd(index, term->parentheses.subpatternId);
2673 }
2674
2675 // If the parentheses are quantified Greedy then add a label to jump back
2676 // to if we get a failed match from after the parentheses. For NonGreedy
2677 // parentheses, link the jump from before the subpattern to here.
2678 if (term->quantityType == QuantifierGreedy) {
2679 if (term->quantityMaxCount != quantifyInfinite)
2680 branch32(Below, countTemporary, Imm32(term->quantityMaxCount.unsafeGet())).linkTo(beginOp.m_reentry, this);
2681 else
2682 jump(beginOp.m_reentry);
2683
2684 op.m_reentry = label();
2685 } else if (term->quantityType == QuantifierNonGreedy) {
2686 YarrOp& beginOp = m_ops[op.m_previousOp];
2687 beginOp.m_jumps.link(this);
2688 op.m_reentry = label();
2689 }
2690#else // !YARR_JIT_ALL_PARENS_EXPRESSIONS
2691 RELEASE_ASSERT_NOT_REACHED();
2692#endif
2693 break;
2694 }
2695
2696 // OpParentheticalAssertionBegin/End
2697 case OpParentheticalAssertionBegin: {
2698 PatternTerm* term = op.m_term;
2699
2700 // Store the current index - assertions should not update index, so
2701 // we will need to restore it upon a successful match.
2702 unsigned parenthesesFrameLocation = term->frameLocation;
2703 storeToFrame(index, parenthesesFrameLocation + BackTrackInfoParentheticalAssertion::beginIndex());
2704
2705 // Check
2706 op.m_checkAdjust = m_checkedOffset - term->inputPosition;
2707 if (op.m_checkAdjust)
2708 sub32(Imm32(op.m_checkAdjust.unsafeGet()), index);
2709
2710 m_checkedOffset -= op.m_checkAdjust;
2711 break;
2712 }
2713 case OpParentheticalAssertionEnd: {
2714 PatternTerm* term = op.m_term;
2715
2716 // Restore the input index value.
2717 unsigned parenthesesFrameLocation = term->frameLocation;
2718 loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheticalAssertion::beginIndex(), index);
2719
2720 // If inverted, a successful match of the assertion must be treated
2721 // as a failure, so jump to backtracking.
2722 if (term->invert()) {
2723 op.m_jumps.append(jump());
2724 op.m_reentry = label();
2725 }
2726
2727 YarrOp& lastOp = m_ops[op.m_previousOp];
2728 m_checkedOffset += lastOp.m_checkAdjust;
2729 break;
2730 }
2731
2732 case OpMatchFailed:
2733 removeCallFrame();
2734 generateFailReturn();
2735 break;
2736 }
2737
2738 ++opIndex;
2739 } while (opIndex < m_ops.size());
2740 }
2741
2742 void backtrack()
2743 {
2744 // Backwards generate the backtracking code.
2745 size_t opIndex = m_ops.size();
2746 ASSERT(opIndex);
2747
2748 do {
2749 --opIndex;
2750
2751 if (m_disassembler)
2752 m_disassembler->setForBacktrack(opIndex, label());
2753
2754 YarrOp& op = m_ops[opIndex];
2755 switch (op.m_op) {
2756
2757 case OpTerm:
2758 backtrackTerm(opIndex);
2759 break;
2760
2761 // OpBodyAlternativeBegin/Next/End
2762 //
2763 // For each Begin/Next node representing an alternative, we need to decide what to do
2764 // in two circumstances:
2765 // - If we backtrack back into this node, from within the alternative.
2766 // - If the input check at the head of the alternative fails (if this exists).
2767 //
2768 // We treat these two cases differently since in the former case we have slightly
2769 // more information - since we are backtracking out of a prior alternative we know
2770 // that at least enough input was available to run it. For example, given the regular
2771 // expression /a|b/, if we backtrack out of the first alternative (a failed pattern
2772 // character match of 'a'), then we need not perform an additional input availability
2773 // check before running the second alternative.
2774 //
2775 // Backtracking required differs for the last alternative, which in the case of the
2776 // repeating set of alternatives must loop. The code generated for the last alternative
2777 // will also be used to handle all input check failures from any prior alternatives -
2778 // these require similar functionality, in seeking the next available alternative for
2779 // which there is sufficient input.
2780 //
2781 // Since backtracking of all other alternatives simply requires us to link backtracks
2782 // to the reentry point for the subsequent alternative, we will only be generating any
2783 // code when backtracking the last alternative.
2784 case OpBodyAlternativeBegin:
2785 case OpBodyAlternativeNext: {
2786 PatternAlternative* alternative = op.m_alternative;
2787
2788 if (op.m_op == OpBodyAlternativeNext) {
2789 PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
2790 m_checkedOffset += priorAlternative->m_minimumSize;
2791 }
2792 m_checkedOffset -= alternative->m_minimumSize;
2793
2794 // Is this the last alternative? If not, then if we backtrack to this point we just
2795 // need to jump to try to match the next alternative.
2796 if (m_ops[op.m_nextOp].m_op != OpBodyAlternativeEnd) {
2797 m_backtrackingState.linkTo(m_ops[op.m_nextOp].m_reentry, this);
2798 break;
2799 }
2800 YarrOp& endOp = m_ops[op.m_nextOp];
2801
2802 YarrOp* beginOp = &op;
2803 while (beginOp->m_op != OpBodyAlternativeBegin) {
2804 ASSERT(beginOp->m_op == OpBodyAlternativeNext);
2805 beginOp = &m_ops[beginOp->m_previousOp];
2806 }
2807
2808 bool onceThrough = endOp.m_nextOp == notFound;
2809
2810 JumpList lastStickyAlternativeFailures;
2811
2812 // First, generate code to handle cases where we backtrack out of an attempted match
2813 // of the last alternative. If this is a 'once through' set of alternatives then we
2814 // have nothing to do - link this straight through to the End.
2815 if (onceThrough)
2816 m_backtrackingState.linkTo(endOp.m_reentry, this);
2817 else {
2818 // If we don't need to move the input poistion, and the pattern has a fixed size
2819 // (in which case we omit the store of the start index until the pattern has matched)
2820 // then we can just link the backtrack out of the last alternative straight to the
2821 // head of the first alternative.
2822 if (m_pattern.m_body->m_hasFixedSize
2823 && (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize)
2824 && (alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize == 1))
2825 m_backtrackingState.linkTo(beginOp->m_reentry, this);
2826 else if (m_pattern.sticky() && m_ops[op.m_nextOp].m_op == OpBodyAlternativeEnd) {
2827 // It is a sticky pattern and the last alternative failed, jump to the end.
2828 m_backtrackingState.takeBacktracksToJumpList(lastStickyAlternativeFailures, this);
2829 } else {
2830 // We need to generate a trampoline of code to execute before looping back
2831 // around to the first alternative.
2832 m_backtrackingState.link(this);
2833
2834 // No need to advance and retry for a sticky pattern.
2835 if (!m_pattern.sticky()) {
2836 // If the pattern size is not fixed, then store the start index for use if we match.
2837 if (!m_pattern.m_body->m_hasFixedSize) {
2838 if (alternative->m_minimumSize == 1)
2839 setMatchStart(index);
2840 else {
2841 move(index, regT0);
2842 if (alternative->m_minimumSize)
2843 sub32(Imm32(alternative->m_minimumSize - 1), regT0);
2844 else
2845 add32(TrustedImm32(1), regT0);
2846 setMatchStart(regT0);
2847 }
2848 }
2849
2850 // Generate code to loop. Check whether the last alternative is longer than the
2851 // first (e.g. /a|xy/ or /a|xyz/).
2852 if (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize) {
2853 // We want to loop, and increment input position. If the delta is 1, it is
2854 // already correctly incremented, if more than one then decrement as appropriate.
2855 unsigned delta = alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize;
2856 ASSERT(delta);
2857 if (delta != 1)
2858 sub32(Imm32(delta - 1), index);
2859 jump(beginOp->m_reentry);
2860 } else {
2861 // If the first alternative has minimum size 0xFFFFFFFFu, then there cannot
2862 // be sufficent input available to handle this, so just fall through.
2863 unsigned delta = beginOp->m_alternative->m_minimumSize - alternative->m_minimumSize;
2864 if (delta != 0xFFFFFFFFu) {
2865 // We need to check input because we are incrementing the input.
2866 add32(Imm32(delta + 1), index);
2867 checkInput().linkTo(beginOp->m_reentry, this);
2868 }
2869 }
2870 }
2871 }
2872 }
2873
2874 // We can reach this point in the code in two ways:
2875 // - Fallthrough from the code above (a repeating alternative backtracked out of its
2876 // last alternative, and did not have sufficent input to run the first).
2877 // - We will loop back up to the following label when a repeating alternative loops,
2878 // following a failed input check.
2879 //
2880 // Either way, we have just failed the input check for the first alternative.
2881 Label firstInputCheckFailed(this);
2882
2883 // Generate code to handle input check failures from alternatives except the last.
2884 // prevOp is the alternative we're handling a bail out from (initially Begin), and
2885 // nextOp is the alternative we will be attempting to reenter into.
2886 //
2887 // We will link input check failures from the forwards matching path back to the code
2888 // that can handle them.
2889 YarrOp* prevOp = beginOp;
2890 YarrOp* nextOp = &m_ops[beginOp->m_nextOp];
2891 while (nextOp->m_op != OpBodyAlternativeEnd) {
2892 prevOp->m_jumps.link(this);
2893
2894 // We only get here if an input check fails, it is only worth checking again
2895 // if the next alternative has a minimum size less than the last.
2896 if (prevOp->m_alternative->m_minimumSize > nextOp->m_alternative->m_minimumSize) {
2897 // FIXME: if we added an extra label to YarrOp, we could avoid needing to
2898 // subtract delta back out, and reduce this code. Should performance test
2899 // the benefit of this.
2900 unsigned delta = prevOp->m_alternative->m_minimumSize - nextOp->m_alternative->m_minimumSize;
2901 sub32(Imm32(delta), index);
2902 Jump fail = jumpIfNoAvailableInput();
2903 add32(Imm32(delta), index);
2904 jump(nextOp->m_reentry);
2905 fail.link(this);
2906 } else if (prevOp->m_alternative->m_minimumSize < nextOp->m_alternative->m_minimumSize)
2907 add32(Imm32(nextOp->m_alternative->m_minimumSize - prevOp->m_alternative->m_minimumSize), index);
2908 prevOp = nextOp;
2909 nextOp = &m_ops[nextOp->m_nextOp];
2910 }
2911
2912 // We fall through to here if there is insufficient input to run the last alternative.
2913
2914 // If there is insufficient input to run the last alternative, then for 'once through'
2915 // alternatives we are done - just jump back up into the forwards matching path at the End.
2916 if (onceThrough) {
2917 op.m_jumps.linkTo(endOp.m_reentry, this);
2918 jump(endOp.m_reentry);
2919 break;
2920 }
2921
2922 // For repeating alternatives, link any input check failure from the last alternative to
2923 // this point.
2924 op.m_jumps.link(this);
2925
2926 bool needsToUpdateMatchStart = !m_pattern.m_body->m_hasFixedSize;
2927
2928 // Check for cases where input position is already incremented by 1 for the last
2929 // alternative (this is particularly useful where the minimum size of the body
2930 // disjunction is 0, e.g. /a*|b/).
2931 if (needsToUpdateMatchStart && alternative->m_minimumSize == 1) {
2932 // index is already incremented by 1, so just store it now!
2933 setMatchStart(index);
2934 needsToUpdateMatchStart = false;
2935 }
2936
2937 if (!m_pattern.sticky()) {
2938 // Check whether there is sufficient input to loop. Increment the input position by
2939 // one, and check. Also add in the minimum disjunction size before checking - there
2940 // is no point in looping if we're just going to fail all the input checks around
2941 // the next iteration.
2942 ASSERT(alternative->m_minimumSize >= m_pattern.m_body->m_minimumSize);
2943 if (alternative->m_minimumSize == m_pattern.m_body->m_minimumSize) {
2944 // If the last alternative had the same minimum size as the disjunction,
2945 // just simply increment input pos by 1, no adjustment based on minimum size.
2946 add32(TrustedImm32(1), index);
2947 } else {
2948 // If the minumum for the last alternative was one greater than than that
2949 // for the disjunction, we're already progressed by 1, nothing to do!
2950 unsigned delta = (alternative->m_minimumSize - m_pattern.m_body->m_minimumSize) - 1;
2951 if (delta)
2952 sub32(Imm32(delta), index);
2953 }
2954 Jump matchFailed = jumpIfNoAvailableInput();
2955
2956 if (needsToUpdateMatchStart) {
2957 if (!m_pattern.m_body->m_minimumSize)
2958 setMatchStart(index);
2959 else {
2960 move(index, regT0);
2961 sub32(Imm32(m_pattern.m_body->m_minimumSize), regT0);
2962 setMatchStart(regT0);
2963 }
2964 }
2965
2966 // Calculate how much more input the first alternative requires than the minimum
2967 // for the body as a whole. If no more is needed then we dont need an additional
2968 // input check here - jump straight back up to the start of the first alternative.
2969 if (beginOp->m_alternative->m_minimumSize == m_pattern.m_body->m_minimumSize)
2970 jump(beginOp->m_reentry);
2971 else {
2972 if (beginOp->m_alternative->m_minimumSize > m_pattern.m_body->m_minimumSize)
2973 add32(Imm32(beginOp->m_alternative->m_minimumSize - m_pattern.m_body->m_minimumSize), index);
2974 else
2975 sub32(Imm32(m_pattern.m_body->m_minimumSize - beginOp->m_alternative->m_minimumSize), index);
2976 checkInput().linkTo(beginOp->m_reentry, this);
2977 jump(firstInputCheckFailed);
2978 }
2979
2980 // We jump to here if we iterate to the point that there is insufficient input to
2981 // run any matches, and need to return a failure state from JIT code.
2982 matchFailed.link(this);
2983 }
2984
2985 lastStickyAlternativeFailures.link(this);
2986 removeCallFrame();
2987 generateFailReturn();
2988 break;
2989 }
2990 case OpBodyAlternativeEnd: {
2991 // We should never backtrack back into a body disjunction.
2992 ASSERT(m_backtrackingState.isEmpty());
2993
2994 PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
2995 m_checkedOffset += priorAlternative->m_minimumSize;
2996 break;
2997 }
2998
2999 // OpSimpleNestedAlternativeBegin/Next/End
3000 // OpNestedAlternativeBegin/Next/End
3001 //
3002 // Generate code for when we backtrack back out of an alternative into
3003 // a Begin or Next node, or when the entry input count check fails. If
3004 // there are more alternatives we need to jump to the next alternative,
3005 // if not we backtrack back out of the current set of parentheses.
3006 //
3007 // In the case of non-simple nested assertions we need to also link the
3008 // 'return address' appropriately to backtrack back out into the correct
3009 // alternative.
3010 case OpSimpleNestedAlternativeBegin:
3011 case OpSimpleNestedAlternativeNext:
3012 case OpNestedAlternativeBegin:
3013 case OpNestedAlternativeNext: {
3014 YarrOp& nextOp = m_ops[op.m_nextOp];
3015 bool isBegin = op.m_previousOp == notFound;
3016 bool isLastAlternative = nextOp.m_nextOp == notFound;
3017 ASSERT(isBegin == (op.m_op == OpSimpleNestedAlternativeBegin || op.m_op == OpNestedAlternativeBegin));
3018 ASSERT(isLastAlternative == (nextOp.m_op == OpSimpleNestedAlternativeEnd || nextOp.m_op == OpNestedAlternativeEnd));
3019
3020 // Treat an input check failure the same as a failed match.
3021 m_backtrackingState.append(op.m_jumps);
3022
3023 // Set the backtracks to jump to the appropriate place. We may need
3024 // to link the backtracks in one of three different way depending on
3025 // the type of alternative we are dealing with:
3026 // - A single alternative, with no simplings.
3027 // - The last alternative of a set of two or more.
3028 // - An alternative other than the last of a set of two or more.
3029 //
3030 // In the case of a single alternative on its own, we don't need to
3031 // jump anywhere - if the alternative fails to match we can just
3032 // continue to backtrack out of the parentheses without jumping.
3033 //
3034 // In the case of the last alternative in a set of more than one, we
3035 // need to jump to return back out to the beginning. We'll do so by
3036 // adding a jump to the End node's m_jumps list, and linking this
3037 // when we come to generate the Begin node. For alternatives other
3038 // than the last, we need to jump to the next alternative.
3039 //
3040 // If the alternative had adjusted the input position we must link
3041 // backtracking to here, correct, and then jump on. If not we can
3042 // link the backtracks directly to their destination.
3043 if (op.m_checkAdjust) {
3044 // Handle the cases where we need to link the backtracks here.
3045 m_backtrackingState.link(this);
3046 sub32(Imm32(op.m_checkAdjust.unsafeGet()), index);
3047 if (!isLastAlternative) {
3048 // An alternative that is not the last should jump to its successor.
3049 jump(nextOp.m_reentry);
3050 } else if (!isBegin) {
3051 // The last of more than one alternatives must jump back to the beginning.
3052 nextOp.m_jumps.append(jump());
3053 } else {
3054 // A single alternative on its own can fall through.
3055 m_backtrackingState.fallthrough();
3056 }
3057 } else {
3058 // Handle the cases where we can link the backtracks directly to their destinations.
3059 if (!isLastAlternative) {
3060 // An alternative that is not the last should jump to its successor.
3061 m_backtrackingState.linkTo(nextOp.m_reentry, this);
3062 } else if (!isBegin) {
3063 // The last of more than one alternatives must jump back to the beginning.
3064 m_backtrackingState.takeBacktracksToJumpList(nextOp.m_jumps, this);
3065 }
3066 // In the case of a single alternative on its own do nothing - it can fall through.
3067 }
3068
3069 // If there is a backtrack jump from a zero length match link it here.
3070 if (op.m_zeroLengthMatch.isSet())
3071 m_backtrackingState.append(op.m_zeroLengthMatch);
3072
3073 // At this point we've handled the backtracking back into this node.
3074 // Now link any backtracks that need to jump to here.
3075
3076 // For non-simple alternatives, link the alternative's 'return address'
3077 // so that we backtrack back out into the previous alternative.
3078 if (op.m_op == OpNestedAlternativeNext)
3079 m_backtrackingState.append(op.m_returnAddress);
3080
3081 // If there is more than one alternative, then the last alternative will
3082 // have planted a jump to be linked to the end. This jump was added to the
3083 // End node's m_jumps list. If we are back at the beginning, link it here.
3084 if (isBegin) {
3085 YarrOp* endOp = &m_ops[op.m_nextOp];
3086 while (endOp->m_nextOp != notFound) {
3087 ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext || endOp->m_op == OpNestedAlternativeNext);
3088 endOp = &m_ops[endOp->m_nextOp];
3089 }
3090 ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd || endOp->m_op == OpNestedAlternativeEnd);
3091 m_backtrackingState.append(endOp->m_jumps);
3092 }
3093
3094 if (!isBegin) {
3095 YarrOp& lastOp = m_ops[op.m_previousOp];
3096 m_checkedOffset += lastOp.m_checkAdjust;
3097 }
3098 m_checkedOffset -= op.m_checkAdjust;
3099 break;
3100 }
3101 case OpSimpleNestedAlternativeEnd:
3102 case OpNestedAlternativeEnd: {
3103 PatternTerm* term = op.m_term;
3104
3105 // If there is a backtrack jump from a zero length match link it here.
3106 if (op.m_zeroLengthMatch.isSet())
3107 m_backtrackingState.append(op.m_zeroLengthMatch);
3108
3109 // If we backtrack into the end of a simple subpattern do nothing;
3110 // just continue through into the last alternative. If we backtrack
3111 // into the end of a non-simple set of alterntives we need to jump
3112 // to the backtracking return address set up during generation.
3113 if (op.m_op == OpNestedAlternativeEnd) {
3114 m_backtrackingState.link(this);
3115
3116 // Plant a jump to the return address.
3117 unsigned parenthesesFrameLocation = term->frameLocation;
3118 loadFromFrameAndJump(parenthesesFrameLocation + BackTrackInfoParentheses::returnAddressIndex());
3119
3120 // Link the DataLabelPtr associated with the end of the last
3121 // alternative to this point.
3122 m_backtrackingState.append(op.m_returnAddress);
3123 }
3124
3125 YarrOp& lastOp = m_ops[op.m_previousOp];
3126 m_checkedOffset += lastOp.m_checkAdjust;
3127 break;
3128 }
3129
3130 // OpParenthesesSubpatternOnceBegin/End
3131 //
3132 // When we are backtracking back out of a capturing subpattern we need
3133 // to clear the start index in the matches output array, to record that
3134 // this subpattern has not been captured.
3135 //
3136 // When backtracking back out of a Greedy quantified subpattern we need
3137 // to catch this, and try running the remainder of the alternative after
3138 // the subpattern again, skipping the parentheses.
3139 //
3140 // Upon backtracking back into a quantified set of parentheses we need to
3141 // check whether we were currently skipping the subpattern. If not, we
3142 // can backtrack into them, if we were we need to either backtrack back
3143 // out of the start of the parentheses, or jump back to the forwards
3144 // matching start, depending of whether the match is Greedy or NonGreedy.
3145 case OpParenthesesSubpatternOnceBegin: {
3146 PatternTerm* term = op.m_term;
3147 ASSERT(term->quantityMaxCount == 1);
3148
3149 // We only need to backtrack to this point if capturing or greedy.
3150 if ((term->capture() && compileMode == IncludeSubpatterns) || term->quantityType == QuantifierGreedy) {
3151 m_backtrackingState.link(this);
3152
3153 // If capturing, clear the capture (we only need to reset start).
3154 if (term->capture() && compileMode == IncludeSubpatterns)
3155 clearSubpatternStart(term->parentheses.subpatternId);
3156
3157 // If Greedy, jump to the end.
3158 if (term->quantityType == QuantifierGreedy) {
3159 // Clear the flag in the stackframe indicating we ran through the subpattern.
3160 unsigned parenthesesFrameLocation = term->frameLocation;
3161 storeToFrame(TrustedImm32(-1), parenthesesFrameLocation + BackTrackInfoParenthesesOnce::beginIndex());
3162 // Jump to after the parentheses, skipping the subpattern.
3163 jump(m_ops[op.m_nextOp].m_reentry);
3164 // A backtrack from after the parentheses, when skipping the subpattern,
3165 // will jump back to here.
3166 op.m_jumps.link(this);
3167 }
3168
3169 m_backtrackingState.fallthrough();
3170 }
3171 break;
3172 }
3173 case OpParenthesesSubpatternOnceEnd: {
3174 PatternTerm* term = op.m_term;
3175
3176 if (term->quantityType != QuantifierFixedCount) {
3177 m_backtrackingState.link(this);
3178
3179 // Check whether we should backtrack back into the parentheses, or if we
3180 // are currently in a state where we had skipped over the subpattern
3181 // (in which case the flag value on the stack will be -1).
3182 unsigned parenthesesFrameLocation = term->frameLocation;
3183 Jump hadSkipped = branch32(Equal, Address(stackPointerRegister, (parenthesesFrameLocation + BackTrackInfoParenthesesOnce::beginIndex()) * sizeof(void*)), TrustedImm32(-1));
3184
3185 if (term->quantityType == QuantifierGreedy) {
3186 // For Greedy parentheses, we skip after having already tried going
3187 // through the subpattern, so if we get here we're done.
3188 YarrOp& beginOp = m_ops[op.m_previousOp];
3189 beginOp.m_jumps.append(hadSkipped);
3190 } else {
3191 // For NonGreedy parentheses, we try skipping the subpattern first,
3192 // so if we get here we need to try running through the subpattern
3193 // next. Jump back to the start of the parentheses in the forwards
3194 // matching path.
3195 ASSERT(term->quantityType == QuantifierNonGreedy);
3196 YarrOp& beginOp = m_ops[op.m_previousOp];
3197 hadSkipped.linkTo(beginOp.m_reentry, this);
3198 }
3199
3200 m_backtrackingState.fallthrough();
3201 }
3202
3203 m_backtrackingState.append(op.m_jumps);
3204 break;
3205 }
3206
3207 // OpParenthesesSubpatternTerminalBegin/End
3208 //
3209 // Terminal subpatterns will always match - there is nothing after them to
3210 // force a backtrack, and they have a minimum count of 0, and as such will
3211 // always produce an acceptable result.
3212 case OpParenthesesSubpatternTerminalBegin: {
3213 // We will backtrack to this point once the subpattern cannot match any
3214 // more. Since no match is accepted as a successful match (we are Greedy
3215 // quantified with a minimum of zero) jump back to the forwards matching
3216 // path at the end.
3217 YarrOp& endOp = m_ops[op.m_nextOp];
3218 m_backtrackingState.linkTo(endOp.m_reentry, this);
3219 break;
3220 }
3221 case OpParenthesesSubpatternTerminalEnd:
3222 // We should never be backtracking to here (hence the 'terminal' in the name).
3223 ASSERT(m_backtrackingState.isEmpty());
3224 m_backtrackingState.append(op.m_jumps);
3225 break;
3226
3227 // OpParenthesesSubpatternBegin/End
3228 //
3229 // When we are backtracking back out of a capturing subpattern we need
3230 // to clear the start index in the matches output array, to record that
3231 // this subpattern has not been captured.
3232 //
3233 // When backtracking back out of a Greedy quantified subpattern we need
3234 // to catch this, and try running the remainder of the alternative after
3235 // the subpattern again, skipping the parentheses.
3236 //
3237 // Upon backtracking back into a quantified set of parentheses we need to
3238 // check whether we were currently skipping the subpattern. If not, we
3239 // can backtrack into them, if we were we need to either backtrack back
3240 // out of the start of the parentheses, or jump back to the forwards
3241 // matching start, depending of whether the match is Greedy or NonGreedy.
3242 case OpParenthesesSubpatternBegin: {
3243#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
3244 PatternTerm* term = op.m_term;
3245 unsigned parenthesesFrameLocation = term->frameLocation;
3246
3247 if (term->quantityType != QuantifierFixedCount) {
3248 m_backtrackingState.link(this);
3249
3250 RegisterID currParenContextReg = regT0;
3251 RegisterID newParenContextReg = regT1;
3252
3253 loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex(), currParenContextReg);
3254
3255 restoreParenContext(currParenContextReg, regT2, term->parentheses.subpatternId, term->parentheses.lastSubpatternId, parenthesesFrameLocation);
3256
3257 freeParenContext(currParenContextReg, newParenContextReg);
3258 storeToFrame(newParenContextReg, parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex());
3259
3260 const RegisterID countTemporary = regT0;
3261 loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex(), countTemporary);
3262 Jump zeroLengthMatch = branchTest32(Zero, countTemporary);
3263
3264 sub32(TrustedImm32(1), countTemporary);
3265 storeToFrame(countTemporary, parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex());
3266
3267 jump(m_ops[op.m_nextOp].m_reentry);
3268
3269 zeroLengthMatch.link(this);
3270
3271 // Clear the flag in the stackframe indicating we didn't run through the subpattern.
3272 storeToFrame(TrustedImm32(-1), parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex());
3273
3274 if (term->quantityType == QuantifierGreedy)
3275 jump(m_ops[op.m_nextOp].m_reentry);
3276
3277 // If Greedy, jump to the end.
3278 if (term->quantityType == QuantifierGreedy) {
3279 // A backtrack from after the parentheses, when skipping the subpattern,
3280 // will jump back to here.
3281 op.m_jumps.link(this);
3282 }
3283
3284 m_backtrackingState.fallthrough();
3285 }
3286#else // !YARR_JIT_ALL_PARENS_EXPRESSIONS
3287 RELEASE_ASSERT_NOT_REACHED();
3288#endif
3289 break;
3290 }
3291 case OpParenthesesSubpatternEnd: {
3292#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
3293 PatternTerm* term = op.m_term;
3294
3295 if (term->quantityType != QuantifierFixedCount) {
3296 m_backtrackingState.link(this);
3297
3298 unsigned parenthesesFrameLocation = term->frameLocation;
3299
3300 if (term->quantityType == QuantifierGreedy) {
3301 // Check whether we should backtrack back into the parentheses, or if we
3302 // are currently in a state where we had skipped over the subpattern
3303 // (in which case the flag value on the stack will be -1).
3304 Jump hadSkipped = branch32(Equal, Address(stackPointerRegister, (parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex()) * sizeof(void*)), TrustedImm32(-1));
3305
3306 // For Greedy parentheses, we skip after having already tried going
3307 // through the subpattern, so if we get here we're done.
3308 YarrOp& beginOp = m_ops[op.m_previousOp];
3309 beginOp.m_jumps.append(hadSkipped);
3310 } else {
3311 // For NonGreedy parentheses, we try skipping the subpattern first,
3312 // so if we get here we need to try running through the subpattern
3313 // next. Jump back to the start of the parentheses in the forwards
3314 // matching path.
3315 ASSERT(term->quantityType == QuantifierNonGreedy);
3316
3317 const RegisterID beginTemporary = regT0;
3318 const RegisterID countTemporary = regT1;
3319
3320 YarrOp& beginOp = m_ops[op.m_previousOp];
3321
3322 loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex(), beginTemporary);
3323 branch32(Equal, beginTemporary, TrustedImm32(-1)).linkTo(beginOp.m_reentry, this);
3324
3325 JumpList exceededMatchLimit;
3326
3327 if (term->quantityMaxCount != quantifyInfinite) {
3328 loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex(), countTemporary);
3329 exceededMatchLimit.append(branch32(AboveOrEqual, countTemporary, Imm32(term->quantityMaxCount.unsafeGet())));
3330 }
3331
3332 branch32(Above, index, beginTemporary).linkTo(beginOp.m_reentry, this);
3333
3334 exceededMatchLimit.link(this);
3335 }
3336
3337 m_backtrackingState.fallthrough();
3338 }
3339
3340 m_backtrackingState.append(op.m_jumps);
3341#else // !YARR_JIT_ALL_PARENS_EXPRESSIONS
3342 RELEASE_ASSERT_NOT_REACHED();
3343#endif
3344 break;
3345 }
3346
3347 // OpParentheticalAssertionBegin/End
3348 case OpParentheticalAssertionBegin: {
3349 PatternTerm* term = op.m_term;
3350 YarrOp& endOp = m_ops[op.m_nextOp];
3351
3352 // We need to handle the backtracks upon backtracking back out
3353 // of a parenthetical assertion if either we need to correct
3354 // the input index, or the assertion was inverted.
3355 if (op.m_checkAdjust || term->invert()) {
3356 m_backtrackingState.link(this);
3357
3358 if (op.m_checkAdjust)
3359 add32(Imm32(op.m_checkAdjust.unsafeGet()), index);
3360
3361 // In an inverted assertion failure to match the subpattern
3362 // is treated as a successful match - jump to the end of the
3363 // subpattern. We already have adjusted the input position
3364 // back to that before the assertion, which is correct.
3365 if (term->invert())
3366 jump(endOp.m_reentry);
3367
3368 m_backtrackingState.fallthrough();
3369 }
3370
3371 // The End node's jump list will contain any backtracks into
3372 // the end of the assertion. Also, if inverted, we will have
3373 // added the failure caused by a successful match to this.
3374 m_backtrackingState.append(endOp.m_jumps);
3375
3376 m_checkedOffset += op.m_checkAdjust;
3377 break;
3378 }
3379 case OpParentheticalAssertionEnd: {
3380 // FIXME: We should really be clearing any nested subpattern
3381 // matches on bailing out from after the pattern. Firefox has
3382 // this bug too (presumably because they use YARR!)
3383
3384 // Never backtrack into an assertion; later failures bail to before the begin.
3385 m_backtrackingState.takeBacktracksToJumpList(op.m_jumps, this);
3386
3387 YarrOp& lastOp = m_ops[op.m_previousOp];
3388 m_checkedOffset -= lastOp.m_checkAdjust;
3389 break;
3390 }
3391
3392 case OpMatchFailed:
3393 break;
3394 }
3395
3396 } while (opIndex);
3397 }
3398
3399 // Compilation methods:
3400 // ====================
3401
3402 // opCompileParenthesesSubpattern
3403 // Emits ops for a subpattern (set of parentheses). These consist
3404 // of a set of alternatives wrapped in an outer set of nodes for
3405 // the parentheses.
3406 // Supported types of parentheses are 'Once' (quantityMaxCount == 1),
3407 // 'Terminal' (non-capturing parentheses quantified as greedy
3408 // and infinite), and 0 based greedy / non-greedy quantified parentheses.
3409 // Alternatives will use the 'Simple' set of ops if either the
3410 // subpattern is terminal (in which case we will never need to
3411 // backtrack), or if the subpattern only contains one alternative.
3412 void opCompileParenthesesSubpattern(PatternTerm* term)
3413 {
3414 YarrOpCode parenthesesBeginOpCode;
3415 YarrOpCode parenthesesEndOpCode;
3416 YarrOpCode alternativeBeginOpCode = OpSimpleNestedAlternativeBegin;
3417 YarrOpCode alternativeNextOpCode = OpSimpleNestedAlternativeNext;
3418 YarrOpCode alternativeEndOpCode = OpSimpleNestedAlternativeEnd;
3419
3420 if (UNLIKELY(!m_vm->isSafeToRecurse())) {
3421 m_failureReason = JITFailureReason::ParenthesisNestedTooDeep;
3422 return;
3423 }
3424
3425 // We can currently only compile quantity 1 subpatterns that are
3426 // not copies. We generate a copy in the case of a range quantifier,
3427 // e.g. /(?:x){3,9}/, or /(?:x)+/ (These are effectively expanded to
3428 // /(?:x){3,3}(?:x){0,6}/ and /(?:x)(?:x)*/ repectively). The problem
3429 // comes where the subpattern is capturing, in which case we would
3430 // need to restore the capture from the first subpattern upon a
3431 // failure in the second.
3432 if (term->quantityMinCount && term->quantityMinCount != term->quantityMaxCount) {
3433 m_failureReason = JITFailureReason::VariableCountedParenthesisWithNonZeroMinimum;
3434 return;
3435 }
3436
3437 if (term->quantityMaxCount == 1 && !term->parentheses.isCopy) {
3438 // Select the 'Once' nodes.
3439 parenthesesBeginOpCode = OpParenthesesSubpatternOnceBegin;
3440 parenthesesEndOpCode = OpParenthesesSubpatternOnceEnd;
3441
3442 // If there is more than one alternative we cannot use the 'simple' nodes.
3443 if (term->parentheses.disjunction->m_alternatives.size() != 1) {
3444 alternativeBeginOpCode = OpNestedAlternativeBegin;
3445 alternativeNextOpCode = OpNestedAlternativeNext;
3446 alternativeEndOpCode = OpNestedAlternativeEnd;
3447 }
3448 } else if (term->parentheses.isTerminal) {
3449 // Select the 'Terminal' nodes.
3450 parenthesesBeginOpCode = OpParenthesesSubpatternTerminalBegin;
3451 parenthesesEndOpCode = OpParenthesesSubpatternTerminalEnd;
3452 } else {
3453#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
3454 // We only handle generic parenthesis with non-fixed counts.
3455 if (term->quantityType == QuantifierFixedCount) {
3456 // This subpattern is not supported by the JIT.
3457 m_failureReason = JITFailureReason::FixedCountParenthesizedSubpattern;
3458 return;
3459 }
3460
3461 m_containsNestedSubpatterns = true;
3462
3463 // Select the 'Generic' nodes.
3464 parenthesesBeginOpCode = OpParenthesesSubpatternBegin;
3465 parenthesesEndOpCode = OpParenthesesSubpatternEnd;
3466
3467 // If there is more than one alternative we cannot use the 'simple' nodes.
3468 if (term->parentheses.disjunction->m_alternatives.size() != 1) {
3469 alternativeBeginOpCode = OpNestedAlternativeBegin;
3470 alternativeNextOpCode = OpNestedAlternativeNext;
3471 alternativeEndOpCode = OpNestedAlternativeEnd;
3472 }
3473#else
3474 // This subpattern is not supported by the JIT.
3475 m_failureReason = JITFailureReason::ParenthesizedSubpattern;
3476 return;
3477#endif
3478 }
3479
3480 size_t parenBegin = m_ops.size();
3481 m_ops.append(parenthesesBeginOpCode);
3482
3483 m_ops.append(alternativeBeginOpCode);
3484 m_ops.last().m_previousOp = notFound;
3485 m_ops.last().m_term = term;
3486 Vector<std::unique_ptr<PatternAlternative>>& alternatives = term->parentheses.disjunction->m_alternatives;
3487 for (unsigned i = 0; i < alternatives.size(); ++i) {
3488 size_t lastOpIndex = m_ops.size() - 1;
3489
3490 PatternAlternative* nestedAlternative = alternatives[i].get();
3491 opCompileAlternative(nestedAlternative);
3492
3493 size_t thisOpIndex = m_ops.size();
3494 m_ops.append(YarrOp(alternativeNextOpCode));
3495
3496 YarrOp& lastOp = m_ops[lastOpIndex];
3497 YarrOp& thisOp = m_ops[thisOpIndex];
3498
3499 lastOp.m_alternative = nestedAlternative;
3500 lastOp.m_nextOp = thisOpIndex;
3501 thisOp.m_previousOp = lastOpIndex;
3502 thisOp.m_term = term;
3503 }
3504 YarrOp& lastOp = m_ops.last();
3505 ASSERT(lastOp.m_op == alternativeNextOpCode);
3506 lastOp.m_op = alternativeEndOpCode;
3507 lastOp.m_alternative = 0;
3508 lastOp.m_nextOp = notFound;
3509
3510 size_t parenEnd = m_ops.size();
3511 m_ops.append(parenthesesEndOpCode);
3512
3513 m_ops[parenBegin].m_term = term;
3514 m_ops[parenBegin].m_previousOp = notFound;
3515 m_ops[parenBegin].m_nextOp = parenEnd;
3516 m_ops[parenEnd].m_term = term;
3517 m_ops[parenEnd].m_previousOp = parenBegin;
3518 m_ops[parenEnd].m_nextOp = notFound;
3519 }
3520
3521 // opCompileParentheticalAssertion
3522 // Emits ops for a parenthetical assertion. These consist of an
3523 // OpSimpleNestedAlternativeBegin/Next/End set of nodes wrapping
3524 // the alternatives, with these wrapped by an outer pair of
3525 // OpParentheticalAssertionBegin/End nodes.
3526 // We can always use the OpSimpleNestedAlternative nodes in the
3527 // case of parenthetical assertions since these only ever match
3528 // once, and will never backtrack back into the assertion.
3529 void opCompileParentheticalAssertion(PatternTerm* term)
3530 {
3531 if (UNLIKELY(!m_vm->isSafeToRecurse())) {
3532 m_failureReason = JITFailureReason::ParenthesisNestedTooDeep;
3533 return;
3534 }
3535
3536 size_t parenBegin = m_ops.size();
3537 m_ops.append(OpParentheticalAssertionBegin);
3538
3539 m_ops.append(OpSimpleNestedAlternativeBegin);
3540 m_ops.last().m_previousOp = notFound;
3541 m_ops.last().m_term = term;
3542 Vector<std::unique_ptr<PatternAlternative>>& alternatives = term->parentheses.disjunction->m_alternatives;
3543 for (unsigned i = 0; i < alternatives.size(); ++i) {
3544 size_t lastOpIndex = m_ops.size() - 1;
3545
3546 PatternAlternative* nestedAlternative = alternatives[i].get();
3547 opCompileAlternative(nestedAlternative);
3548
3549 size_t thisOpIndex = m_ops.size();
3550 m_ops.append(YarrOp(OpSimpleNestedAlternativeNext));
3551
3552 YarrOp& lastOp = m_ops[lastOpIndex];
3553 YarrOp& thisOp = m_ops[thisOpIndex];
3554
3555 lastOp.m_alternative = nestedAlternative;
3556 lastOp.m_nextOp = thisOpIndex;
3557 thisOp.m_previousOp = lastOpIndex;
3558 thisOp.m_term = term;
3559 }
3560 YarrOp& lastOp = m_ops.last();
3561 ASSERT(lastOp.m_op == OpSimpleNestedAlternativeNext);
3562 lastOp.m_op = OpSimpleNestedAlternativeEnd;
3563 lastOp.m_alternative = 0;
3564 lastOp.m_nextOp = notFound;
3565
3566 size_t parenEnd = m_ops.size();
3567 m_ops.append(OpParentheticalAssertionEnd);
3568
3569 m_ops[parenBegin].m_term = term;
3570 m_ops[parenBegin].m_previousOp = notFound;
3571 m_ops[parenBegin].m_nextOp = parenEnd;
3572 m_ops[parenEnd].m_term = term;
3573 m_ops[parenEnd].m_previousOp = parenBegin;
3574 m_ops[parenEnd].m_nextOp = notFound;
3575 }
3576
3577 // opCompileAlternative
3578 // Called to emit nodes for all terms in an alternative.
3579 void opCompileAlternative(PatternAlternative* alternative)
3580 {
3581 optimizeAlternative(alternative);
3582
3583 for (unsigned i = 0; i < alternative->m_terms.size(); ++i) {
3584 PatternTerm* term = &alternative->m_terms[i];
3585
3586 switch (term->type) {
3587 case PatternTerm::TypeParenthesesSubpattern:
3588 opCompileParenthesesSubpattern(term);
3589 break;
3590
3591 case PatternTerm::TypeParentheticalAssertion:
3592 opCompileParentheticalAssertion(term);
3593 break;
3594
3595 default:
3596 m_ops.append(term);
3597 }
3598 }
3599 }
3600
3601 // opCompileBody
3602 // This method compiles the body disjunction of the regular expression.
3603 // The body consists of two sets of alternatives - zero or more 'once
3604 // through' (BOL anchored) alternatives, followed by zero or more
3605 // repeated alternatives.
3606 // For each of these two sets of alteratives, if not empty they will be
3607 // wrapped in a set of OpBodyAlternativeBegin/Next/End nodes (with the
3608 // 'begin' node referencing the first alternative, and 'next' nodes
3609 // referencing any further alternatives. The begin/next/end nodes are
3610 // linked together in a doubly linked list. In the case of repeating
3611 // alternatives, the end node is also linked back to the beginning.
3612 // If no repeating alternatives exist, then a OpMatchFailed node exists
3613 // to return the failing result.
3614 void opCompileBody(PatternDisjunction* disjunction)
3615 {
3616 if (UNLIKELY(!m_vm->isSafeToRecurse())) {
3617 m_failureReason = JITFailureReason::ParenthesisNestedTooDeep;
3618 return;
3619 }
3620
3621 Vector<std::unique_ptr<PatternAlternative>>& alternatives = disjunction->m_alternatives;
3622 size_t currentAlternativeIndex = 0;
3623
3624 // Emit the 'once through' alternatives.
3625 if (alternatives.size() && alternatives[0]->onceThrough()) {
3626 m_ops.append(YarrOp(OpBodyAlternativeBegin));
3627 m_ops.last().m_previousOp = notFound;
3628
3629 do {
3630 size_t lastOpIndex = m_ops.size() - 1;
3631 PatternAlternative* alternative = alternatives[currentAlternativeIndex].get();
3632 opCompileAlternative(alternative);
3633
3634 size_t thisOpIndex = m_ops.size();
3635 m_ops.append(YarrOp(OpBodyAlternativeNext));
3636
3637 YarrOp& lastOp = m_ops[lastOpIndex];
3638 YarrOp& thisOp = m_ops[thisOpIndex];
3639
3640 lastOp.m_alternative = alternative;
3641 lastOp.m_nextOp = thisOpIndex;
3642 thisOp.m_previousOp = lastOpIndex;
3643
3644 ++currentAlternativeIndex;
3645 } while (currentAlternativeIndex < alternatives.size() && alternatives[currentAlternativeIndex]->onceThrough());
3646
3647 YarrOp& lastOp = m_ops.last();
3648
3649 ASSERT(lastOp.m_op == OpBodyAlternativeNext);
3650 lastOp.m_op = OpBodyAlternativeEnd;
3651 lastOp.m_alternative = 0;
3652 lastOp.m_nextOp = notFound;
3653 }
3654
3655 if (currentAlternativeIndex == alternatives.size()) {
3656 m_ops.append(YarrOp(OpMatchFailed));
3657 return;
3658 }
3659
3660 // Emit the repeated alternatives.
3661 size_t repeatLoop = m_ops.size();
3662 m_ops.append(YarrOp(OpBodyAlternativeBegin));
3663 m_ops.last().m_previousOp = notFound;
3664 do {
3665 size_t lastOpIndex = m_ops.size() - 1;
3666 PatternAlternative* alternative = alternatives[currentAlternativeIndex].get();
3667 ASSERT(!alternative->onceThrough());
3668 opCompileAlternative(alternative);
3669
3670 size_t thisOpIndex = m_ops.size();
3671 m_ops.append(YarrOp(OpBodyAlternativeNext));
3672
3673 YarrOp& lastOp = m_ops[lastOpIndex];
3674 YarrOp& thisOp = m_ops[thisOpIndex];
3675
3676 lastOp.m_alternative = alternative;
3677 lastOp.m_nextOp = thisOpIndex;
3678 thisOp.m_previousOp = lastOpIndex;
3679
3680 ++currentAlternativeIndex;
3681 } while (currentAlternativeIndex < alternatives.size());
3682 YarrOp& lastOp = m_ops.last();
3683 ASSERT(lastOp.m_op == OpBodyAlternativeNext);
3684 lastOp.m_op = OpBodyAlternativeEnd;
3685 lastOp.m_alternative = 0;
3686 lastOp.m_nextOp = repeatLoop;
3687 }
3688
3689 void generateTryReadUnicodeCharacterHelper()
3690 {
3691#ifdef JIT_UNICODE_EXPRESSIONS
3692 if (m_tryReadUnicodeCharacterCalls.isEmpty())
3693 return;
3694
3695 ASSERT(m_decodeSurrogatePairs);
3696
3697 m_tryReadUnicodeCharacterEntry = label();
3698
3699 tagReturnAddress();
3700
3701 tryReadUnicodeCharImpl(regT0);
3702
3703 ret();
3704#endif
3705 }
3706
3707 void generateEnter()
3708 {
3709#if CPU(X86_64)
3710 push(X86Registers::ebp);
3711 move(stackPointerRegister, X86Registers::ebp);
3712
3713 if (m_pattern.m_saveInitialStartValue)
3714 push(X86Registers::ebx);
3715
3716#if OS(WINDOWS)
3717 push(X86Registers::edi);
3718#endif
3719#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
3720 if (m_containsNestedSubpatterns) {
3721#if OS(WINDOWS)
3722 push(X86Registers::esi);
3723#endif
3724 push(X86Registers::r12);
3725 }
3726#endif
3727
3728 if (m_decodeSurrogatePairs) {
3729 push(X86Registers::r13);
3730 push(X86Registers::r14);
3731 push(X86Registers::r15);
3732 }
3733 // The ABI doesn't guarantee the upper bits are zero on unsigned arguments, so clear them ourselves.
3734 zeroExtend32ToPtr(index, index);
3735 zeroExtend32ToPtr(length, length);
3736#if OS(WINDOWS)
3737 if (compileMode == IncludeSubpatterns)
3738 loadPtr(Address(X86Registers::ebp, 6 * sizeof(void*)), output);
3739 // rcx is the pointer to the allocated space for result in x64 Windows.
3740 push(X86Registers::ecx);
3741#endif
3742#elif CPU(ARM64)
3743 tagReturnAddress();
3744 if (m_decodeSurrogatePairs) {
3745 pushPair(framePointerRegister, linkRegister);
3746 move(TrustedImm32(0x10000), supplementaryPlanesBase);
3747 move(TrustedImm32(0xd800), leadingSurrogateTag);
3748 move(TrustedImm32(0xdc00), trailingSurrogateTag);
3749 }
3750
3751 // The ABI doesn't guarantee the upper bits are zero on unsigned arguments, so clear them ourselves.
3752 zeroExtend32ToPtr(index, index);
3753 zeroExtend32ToPtr(length, length);
3754#elif CPU(ARM_THUMB2)
3755 push(ARMRegisters::r4);
3756 push(ARMRegisters::r5);
3757 push(ARMRegisters::r6);
3758 push(ARMRegisters::r8);
3759#elif CPU(MIPS)
3760 // Do nothing.
3761#endif
3762
3763 store8(TrustedImm32(1), &m_vm->isExecutingInRegExpJIT);
3764 }
3765
3766 void generateReturn()
3767 {
3768 store8(TrustedImm32(0), &m_vm->isExecutingInRegExpJIT);
3769
3770#if CPU(X86_64)
3771#if OS(WINDOWS)
3772 // Store the return value in the allocated space pointed by rcx.
3773 pop(X86Registers::ecx);
3774 store64(returnRegister, Address(X86Registers::ecx));
3775 store64(returnRegister2, Address(X86Registers::ecx, sizeof(void*)));
3776 move(X86Registers::ecx, returnRegister);
3777#endif
3778 if (m_decodeSurrogatePairs) {
3779 pop(X86Registers::r15);
3780 pop(X86Registers::r14);
3781 pop(X86Registers::r13);
3782 }
3783
3784#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
3785 if (m_containsNestedSubpatterns) {
3786 pop(X86Registers::r12);
3787#if OS(WINDOWS)
3788 pop(X86Registers::esi);
3789#endif
3790 }
3791#endif
3792#if OS(WINDOWS)
3793 pop(X86Registers::edi);
3794#endif
3795
3796 if (m_pattern.m_saveInitialStartValue)
3797 pop(X86Registers::ebx);
3798 pop(X86Registers::ebp);
3799#elif CPU(ARM64)
3800 if (m_decodeSurrogatePairs)
3801 popPair(framePointerRegister, linkRegister);
3802#elif CPU(ARM_THUMB2)
3803 pop(ARMRegisters::r8);
3804 pop(ARMRegisters::r6);
3805 pop(ARMRegisters::r5);
3806 pop(ARMRegisters::r4);
3807#elif CPU(MIPS)
3808 // Do nothing
3809#endif
3810 ret();
3811 }
3812
3813public:
3814 YarrGenerator(VM* vm, YarrPattern& pattern, String& patternString, YarrCodeBlock& codeBlock, YarrCharSize charSize)
3815 : m_vm(vm)
3816 , m_pattern(pattern)
3817 , m_patternString(patternString)
3818 , m_codeBlock(codeBlock)
3819 , m_charSize(charSize)
3820 , m_decodeSurrogatePairs(m_charSize == Char16 && m_pattern.unicode())
3821 , m_unicodeIgnoreCase(m_pattern.unicode() && m_pattern.ignoreCase())
3822 , m_fixedSizedAlternative(false)
3823 , m_canonicalMode(m_pattern.unicode() ? CanonicalMode::Unicode : CanonicalMode::UCS2)
3824#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
3825 , m_containsNestedSubpatterns(false)
3826 , m_parenContextSizes(compileMode == IncludeSubpatterns ? m_pattern.m_numSubpatterns : 0, m_pattern.m_body->m_callFrameSize)
3827#endif
3828 {
3829 }
3830
3831 void compile()
3832 {
3833 YarrCodeBlock& codeBlock = m_codeBlock;
3834
3835#ifndef JIT_UNICODE_EXPRESSIONS
3836 if (m_decodeSurrogatePairs) {
3837 codeBlock.setFallBackWithFailureReason(JITFailureReason::DecodeSurrogatePair);
3838 return;
3839 }
3840#endif
3841
3842 if (m_pattern.m_containsBackreferences
3843#if ENABLE(YARR_JIT_BACKREFERENCES)
3844 && (compileMode == MatchOnly || (m_pattern.ignoreCase() && m_charSize != Char8))
3845#endif
3846 ) {
3847 codeBlock.setFallBackWithFailureReason(JITFailureReason::BackReference);
3848 return;
3849 }
3850
3851 // We need to compile before generating code since we set flags based on compilation that
3852 // are used during generation.
3853 opCompileBody(m_pattern.m_body);
3854
3855 if (m_failureReason) {
3856 codeBlock.setFallBackWithFailureReason(*m_failureReason);
3857 return;
3858 }
3859
3860 if (UNLIKELY(Options::dumpDisassembly() || Options::dumpRegExpDisassembly()))
3861 m_disassembler = makeUnique<YarrDisassembler>(this);
3862
3863 if (m_disassembler)
3864 m_disassembler->setStartOfCode(label());
3865
3866#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
3867 if (m_containsNestedSubpatterns)
3868 codeBlock.setUsesPatternContextBuffer();
3869#endif
3870
3871 generateEnter();
3872
3873 Jump hasInput = checkInput();
3874 generateFailReturn();
3875 hasInput.link(this);
3876
3877#ifdef JIT_UNICODE_EXPRESSIONS
3878 if (m_decodeSurrogatePairs)
3879 getEffectiveAddress(BaseIndex(input, length, TimesTwo), endOfStringAddress);
3880#endif
3881
3882#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
3883 if (m_containsNestedSubpatterns)
3884 move(TrustedImm32(matchLimit), remainingMatchCount);
3885#endif
3886
3887 if (compileMode == IncludeSubpatterns) {
3888 for (unsigned i = 0; i < m_pattern.m_numSubpatterns + 1; ++i)
3889 store32(TrustedImm32(-1), Address(output, (i << 1) * sizeof(int)));
3890 }
3891
3892 if (!m_pattern.m_body->m_hasFixedSize)
3893 setMatchStart(index);
3894
3895 initCallFrame();
3896
3897#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
3898 if (m_containsNestedSubpatterns) {
3899 initParenContextFreeList();
3900 if (m_failureReason) {
3901 codeBlock.setFallBackWithFailureReason(*m_failureReason);
3902 return;
3903 }
3904 }
3905#endif
3906
3907 if (m_pattern.m_saveInitialStartValue)
3908 move(index, initialStart);
3909
3910 generate();
3911 if (m_disassembler)
3912 m_disassembler->setEndOfGenerate(label());
3913 backtrack();
3914 if (m_disassembler)
3915 m_disassembler->setEndOfBacktrack(label());
3916
3917 generateTryReadUnicodeCharacterHelper();
3918
3919 generateJITFailReturn();
3920
3921 if (m_disassembler)
3922 m_disassembler->setEndOfCode(label());
3923
3924 LinkBuffer linkBuffer(*this, REGEXP_CODE_ID, JITCompilationCanFail);
3925 if (linkBuffer.didFailToAllocate()) {
3926 codeBlock.setFallBackWithFailureReason(JITFailureReason::ExecutableMemoryAllocationFailure);
3927 return;
3928 }
3929
3930 if (!m_tryReadUnicodeCharacterCalls.isEmpty()) {
3931 CodeLocationLabel<NoPtrTag> tryReadUnicodeCharacterHelper = linkBuffer.locationOf<NoPtrTag>(m_tryReadUnicodeCharacterEntry);
3932
3933 for (auto call : m_tryReadUnicodeCharacterCalls)
3934 linkBuffer.link(call, tryReadUnicodeCharacterHelper);
3935 }
3936
3937 m_backtrackingState.linkDataLabels(linkBuffer);
3938
3939 if (m_disassembler)
3940 m_disassembler->dump(linkBuffer);
3941
3942 if (compileMode == MatchOnly) {
3943 if (m_charSize == Char8)
3944 codeBlock.set8BitCodeMatchOnly(FINALIZE_REGEXP_CODE(linkBuffer, YarrMatchOnly8BitPtrTag, "Match-only 8-bit regular expression"));
3945 else
3946 codeBlock.set16BitCodeMatchOnly(FINALIZE_REGEXP_CODE(linkBuffer, YarrMatchOnly16BitPtrTag, "Match-only 16-bit regular expression"));
3947 } else {
3948 if (m_charSize == Char8)
3949 codeBlock.set8BitCode(FINALIZE_REGEXP_CODE(linkBuffer, Yarr8BitPtrTag, "8-bit regular expression"));
3950 else
3951 codeBlock.set16BitCode(FINALIZE_REGEXP_CODE(linkBuffer, Yarr16BitPtrTag, "16-bit regular expression"));
3952 }
3953 if (m_failureReason)
3954 codeBlock.setFallBackWithFailureReason(*m_failureReason);
3955 }
3956
3957 const char* variant() override
3958 {
3959 if (compileMode == MatchOnly) {
3960 if (m_charSize == Char8)
3961 return "Match-only 8-bit regular expression";
3962
3963 return "Match-only 16-bit regular expression";
3964 }
3965
3966 if (m_charSize == Char8)
3967 return "8-bit regular expression";
3968
3969 return "16-bit regular expression";
3970 }
3971
3972 unsigned opCount() override
3973 {
3974 return m_ops.size();
3975 }
3976
3977 void dumpPatternString(PrintStream& out) override
3978 {
3979 m_pattern.dumpPatternString(out, m_patternString);
3980 }
3981
3982 int dumpFor(PrintStream& out, unsigned opIndex) override
3983 {
3984 if (opIndex >= opCount())
3985 return 0;
3986
3987 out.printf("%4d:", opIndex);
3988
3989 YarrOp& op = m_ops[opIndex];
3990 PatternTerm* term = op.m_term;
3991 switch (op.m_op) {
3992 case OpTerm: {
3993 out.print("OpTerm ");
3994 switch (term->type) {
3995 case PatternTerm::TypeAssertionBOL:
3996 out.print("Assert BOL");
3997 break;
3998
3999 case PatternTerm::TypeAssertionEOL:
4000 out.print("Assert EOL");
4001 break;
4002
4003 case PatternTerm::TypeBackReference:
4004 out.printf("BackReference pattern #%u", term->backReferenceSubpatternId);
4005 term->dumpQuantifier(out);
4006 break;
4007
4008 case PatternTerm::TypePatternCharacter:
4009 out.print("TypePatternCharacter ");
4010 dumpUChar32(out, term->patternCharacter);
4011 if (m_pattern.ignoreCase())
4012 out.print(" ignore case");
4013
4014 term->dumpQuantifier(out);
4015 break;
4016
4017 case PatternTerm::TypeCharacterClass:
4018 out.print("TypePatternCharacterClass ");
4019 if (term->invert())
4020 out.print("not ");
4021 dumpCharacterClass(out, &m_pattern, term->characterClass);
4022 term->dumpQuantifier(out);
4023 break;
4024
4025 case PatternTerm::TypeAssertionWordBoundary:
4026 out.printf("%sword boundary", term->invert() ? "non-" : "");
4027 break;
4028
4029 case PatternTerm::TypeDotStarEnclosure:
4030 out.print(".* enclosure");
4031 break;
4032
4033 case PatternTerm::TypeForwardReference:
4034 out.print("TypeForwardReference <not handled>");
4035 break;
4036
4037 case PatternTerm::TypeParenthesesSubpattern:
4038 case PatternTerm::TypeParentheticalAssertion:
4039 RELEASE_ASSERT_NOT_REACHED();
4040 break;
4041 }
4042
4043 if (op.m_isDeadCode)
4044 out.print(" already handled");
4045 out.print("\n");
4046 return(0);
4047 }
4048
4049 case OpBodyAlternativeBegin:
4050 out.printf("OpBodyAlternativeBegin minimum size %u\n", op.m_alternative->m_minimumSize);
4051 return(0);
4052
4053 case OpBodyAlternativeNext:
4054 out.printf("OpBodyAlternativeNext minimum size %u\n", op.m_alternative->m_minimumSize);
4055 return(0);
4056
4057 case OpBodyAlternativeEnd:
4058 out.print("OpBodyAlternativeEnd\n");
4059 return(0);
4060
4061 case OpSimpleNestedAlternativeBegin:
4062 out.printf("OpSimpleNestedAlternativeBegin minimum size %u\n", op.m_alternative->m_minimumSize);
4063 return(1);
4064
4065 case OpNestedAlternativeBegin:
4066 out.printf("OpNestedAlternativeBegin minimum size %u\n", op.m_alternative->m_minimumSize);
4067 return(1);
4068
4069 case OpSimpleNestedAlternativeNext:
4070 out.printf("OpSimpleNestedAlternativeNext minimum size %u\n", op.m_alternative->m_minimumSize);
4071 return(0);
4072
4073 case OpNestedAlternativeNext:
4074 out.printf("OpNestedAlternativeNext minimum size %u\n", op.m_alternative->m_minimumSize);
4075 return(0);
4076
4077 case OpSimpleNestedAlternativeEnd:
4078 out.print("OpSimpleNestedAlternativeEnd");
4079 term->dumpQuantifier(out);
4080 out.print("\n");
4081 return(-1);
4082
4083 case OpNestedAlternativeEnd:
4084 out.print("OpNestedAlternativeEnd");
4085 term->dumpQuantifier(out);
4086 out.print("\n");
4087 return(-1);
4088
4089 case OpParenthesesSubpatternOnceBegin:
4090 out.print("OpParenthesesSubpatternOnceBegin ");
4091 if (term->capture())
4092 out.printf("capturing pattern #%u", term->parentheses.subpatternId);
4093 else
4094 out.print("non-capturing");
4095 term->dumpQuantifier(out);
4096 out.print("\n");
4097 return(0);
4098
4099 case OpParenthesesSubpatternOnceEnd:
4100 out.print("OpParenthesesSubpatternOnceEnd ");
4101 if (term->capture())
4102 out.printf("capturing pattern #%u", term->parentheses.subpatternId);
4103 else
4104 out.print("non-capturing");
4105 term->dumpQuantifier(out);
4106 out.print("\n");
4107 return(0);
4108
4109 case OpParenthesesSubpatternTerminalBegin:
4110 out.print("OpParenthesesSubpatternTerminalBegin ");
4111 if (term->capture())
4112 out.printf("capturing pattern #%u\n", term->parentheses.subpatternId);
4113 else
4114 out.print("non-capturing\n");
4115 return(0);
4116
4117 case OpParenthesesSubpatternTerminalEnd:
4118 out.print("OpParenthesesSubpatternTerminalEnd ");
4119 if (term->capture())
4120 out.printf("capturing pattern #%u\n", term->parentheses.subpatternId);
4121 else
4122 out.print("non-capturing\n");
4123 return(0);
4124
4125 case OpParenthesesSubpatternBegin:
4126 out.print("OpParenthesesSubpatternBegin ");
4127 if (term->capture())
4128 out.printf("capturing pattern #%u", term->parentheses.subpatternId);
4129 else
4130 out.print("non-capturing");
4131 term->dumpQuantifier(out);
4132 out.print("\n");
4133 return(0);
4134
4135 case OpParenthesesSubpatternEnd:
4136 out.print("OpParenthesesSubpatternEnd ");
4137 if (term->capture())
4138 out.printf("capturing pattern #%u", term->parentheses.subpatternId);
4139 else
4140 out.print("non-capturing");
4141 term->dumpQuantifier(out);
4142 out.print("\n");
4143 return(0);
4144
4145 case OpParentheticalAssertionBegin:
4146 out.printf("OpParentheticalAssertionBegin%s\n", term->invert() ? " inverted" : "");
4147 return(0);
4148
4149 case OpParentheticalAssertionEnd:
4150 out.printf("OpParentheticalAssertionEnd%s\n", term->invert() ? " inverted" : "");
4151 return(0);
4152
4153 case OpMatchFailed:
4154 out.print("OpMatchFailed\n");
4155 return(0);
4156 }
4157
4158 return(0);
4159 }
4160
4161private:
4162 VM* m_vm;
4163
4164 YarrPattern& m_pattern;
4165 String& m_patternString;
4166
4167 YarrCodeBlock& m_codeBlock;
4168 YarrCharSize m_charSize;
4169
4170 // Used to detect regular expression constructs that are not currently
4171 // supported in the JIT; fall back to the interpreter when this is detected.
4172 Optional<JITFailureReason> m_failureReason;
4173
4174 bool m_decodeSurrogatePairs;
4175 bool m_unicodeIgnoreCase;
4176 bool m_fixedSizedAlternative;
4177 CanonicalMode m_canonicalMode;
4178#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
4179 bool m_containsNestedSubpatterns;
4180 ParenContextSizes m_parenContextSizes;
4181#endif
4182 JumpList m_abortExecution;
4183 JumpList m_hitMatchLimit;
4184 Vector<Call> m_tryReadUnicodeCharacterCalls;
4185 Label m_tryReadUnicodeCharacterEntry;
4186
4187 // The regular expression expressed as a linear sequence of operations.
4188 Vector<YarrOp, 128> m_ops;
4189
4190 // This records the current input offset being applied due to the current
4191 // set of alternatives we are nested within. E.g. when matching the
4192 // character 'b' within the regular expression /abc/, we will know that
4193 // the minimum size for the alternative is 3, checked upon entry to the
4194 // alternative, and that 'b' is at offset 1 from the start, and as such
4195 // when matching 'b' we need to apply an offset of -2 to the load.
4196 //
4197 // FIXME: This should go away. Rather than tracking this value throughout
4198 // code generation, we should gather this information up front & store it
4199 // on the YarrOp structure.
4200 Checked<unsigned> m_checkedOffset;
4201
4202 // This class records state whilst generating the backtracking path of code.
4203 BacktrackingState m_backtrackingState;
4204
4205 std::unique_ptr<YarrDisassembler> m_disassembler;
4206};
4207
4208static void dumpCompileFailure(JITFailureReason failure)
4209{
4210 switch (failure) {
4211 case JITFailureReason::DecodeSurrogatePair:
4212 dataLog("Can't JIT a pattern decoding surrogate pairs\n");
4213 break;
4214 case JITFailureReason::BackReference:
4215 dataLog("Can't JIT some patterns containing back references\n");
4216 break;
4217 case JITFailureReason::ForwardReference:
4218 dataLog("Can't JIT a pattern containing forward references\n");
4219 break;
4220 case JITFailureReason::VariableCountedParenthesisWithNonZeroMinimum:
4221 dataLog("Can't JIT a pattern containing a variable counted parenthesis with a non-zero minimum\n");
4222 break;
4223 case JITFailureReason::ParenthesizedSubpattern:
4224 dataLog("Can't JIT a pattern containing parenthesized subpatterns\n");
4225 break;
4226 case JITFailureReason::FixedCountParenthesizedSubpattern:
4227 dataLog("Can't JIT a pattern containing fixed count parenthesized subpatterns\n");
4228 break;
4229 case JITFailureReason::ParenthesisNestedTooDeep:
4230 dataLog("Can't JIT pattern due to parentheses nested too deeply\n");
4231 break;
4232 case JITFailureReason::ExecutableMemoryAllocationFailure:
4233 dataLog("Can't JIT because of failure of allocation of executable memory\n");
4234 break;
4235 }
4236}
4237
4238void jitCompile(YarrPattern& pattern, String& patternString, YarrCharSize charSize, VM* vm, YarrCodeBlock& codeBlock, YarrJITCompileMode mode)
4239{
4240 if (mode == MatchOnly)
4241 YarrGenerator<MatchOnly>(vm, pattern, patternString, codeBlock, charSize).compile();
4242 else
4243 YarrGenerator<IncludeSubpatterns>(vm, pattern, patternString, codeBlock, charSize).compile();
4244
4245 if (auto failureReason = codeBlock.failureReason()) {
4246 if (Options::dumpCompiledRegExpPatterns()) {
4247 pattern.dumpPatternString(WTF::dataFile(), patternString);
4248 dataLog(" : ");
4249 dumpCompileFailure(*failureReason);
4250 }
4251 }
4252}
4253
4254}}
4255
4256#endif
4257