1/*
2 * Copyright (C) 2009-2018 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "config.h"
27#include "YarrJIT.h"
28
29#include <wtf/ASCIICType.h>
30#include "LinkBuffer.h"
31#include "Options.h"
32#include "VM.h"
33#include "Yarr.h"
34#include "YarrCanonicalize.h"
35#include "YarrDisassembler.h"
36
37#if ENABLE(YARR_JIT)
38
39namespace JSC { namespace Yarr {
40
41template<YarrJITCompileMode compileMode>
42class YarrGenerator : public YarrJITInfo, private MacroAssembler {
43
44#if CPU(ARM_THUMB2)
45 static const RegisterID input = ARMRegisters::r0;
46 static const RegisterID index = ARMRegisters::r1;
47 static const RegisterID length = ARMRegisters::r2;
48 static const RegisterID output = ARMRegisters::r3;
49
50 static const RegisterID regT0 = ARMRegisters::r4;
51 static const RegisterID regT1 = ARMRegisters::r5;
52 static const RegisterID initialStart = ARMRegisters::r8;
53
54 static const RegisterID returnRegister = ARMRegisters::r0;
55 static const RegisterID returnRegister2 = ARMRegisters::r1;
56
57#define HAVE_INITIAL_START_REG
58#elif CPU(ARM64)
59 // Argument registers
60 static const RegisterID input = ARM64Registers::x0;
61 static const RegisterID index = ARM64Registers::x1;
62 static const RegisterID length = ARM64Registers::x2;
63 static const RegisterID output = ARM64Registers::x3;
64 static const RegisterID freelistRegister = ARM64Registers::x4;
65 static const RegisterID freelistSizeRegister = ARM64Registers::x5;
66
67 // Scratch registers
68 static const RegisterID regT0 = ARM64Registers::x6;
69 static const RegisterID regT1 = ARM64Registers::x7;
70 static const RegisterID regT2 = ARM64Registers::x8;
71 static const RegisterID remainingMatchCount = ARM64Registers::x9;
72 static const RegisterID regUnicodeInputAndTrail = ARM64Registers::x10;
73 static const RegisterID initialStart = ARM64Registers::x11;
74 static const RegisterID supplementaryPlanesBase = ARM64Registers::x12;
75 static const RegisterID leadingSurrogateTag = ARM64Registers::x13;
76 static const RegisterID trailingSurrogateTag = ARM64Registers::x14;
77 static const RegisterID endOfStringAddress = ARM64Registers::x15;
78
79 static const RegisterID returnRegister = ARM64Registers::x0;
80 static const RegisterID returnRegister2 = ARM64Registers::x1;
81
82 const TrustedImm32 surrogateTagMask = TrustedImm32(0xfffffc00);
83#define HAVE_INITIAL_START_REG
84#define JIT_UNICODE_EXPRESSIONS
85#elif CPU(MIPS)
86 static const RegisterID input = MIPSRegisters::a0;
87 static const RegisterID index = MIPSRegisters::a1;
88 static const RegisterID length = MIPSRegisters::a2;
89 static const RegisterID output = MIPSRegisters::a3;
90
91 static const RegisterID regT0 = MIPSRegisters::t4;
92 static const RegisterID regT1 = MIPSRegisters::t5;
93 static const RegisterID initialStart = MIPSRegisters::t6;
94
95 static const RegisterID returnRegister = MIPSRegisters::v0;
96 static const RegisterID returnRegister2 = MIPSRegisters::v1;
97
98#define HAVE_INITIAL_START_REG
99#elif CPU(X86)
100 static const RegisterID input = X86Registers::eax;
101 static const RegisterID index = X86Registers::edx;
102 static const RegisterID length = X86Registers::ecx;
103 static const RegisterID output = X86Registers::edi;
104
105 static const RegisterID regT0 = X86Registers::ebx;
106 static const RegisterID regT1 = X86Registers::esi;
107
108 static const RegisterID returnRegister = X86Registers::eax;
109 static const RegisterID returnRegister2 = X86Registers::edx;
110#elif CPU(X86_64)
111#if !OS(WINDOWS)
112 // Argument registers
113 static const RegisterID input = X86Registers::edi;
114 static const RegisterID index = X86Registers::esi;
115 static const RegisterID length = X86Registers::edx;
116 static const RegisterID output = X86Registers::ecx;
117 static const RegisterID freelistRegister = X86Registers::r8;
118 static const RegisterID freelistSizeRegister = X86Registers::r9; // Only used during initialization.
119#else
120 // If the return value doesn't fit in 64bits, its destination is pointed by rcx and the parameters are shifted.
121 // http://msdn.microsoft.com/en-us/library/7572ztz4.aspx
122 COMPILE_ASSERT(sizeof(MatchResult) > sizeof(void*), MatchResult_does_not_fit_in_64bits);
123 static const RegisterID input = X86Registers::edx;
124 static const RegisterID index = X86Registers::r8;
125 static const RegisterID length = X86Registers::r9;
126 static const RegisterID output = X86Registers::r10;
127#endif
128
129 // Scratch registers
130 static const RegisterID regT0 = X86Registers::eax;
131#if !OS(WINDOWS)
132 static const RegisterID regT1 = X86Registers::r9;
133 static const RegisterID regT2 = X86Registers::r10;
134#else
135 static const RegisterID regT1 = X86Registers::ecx;
136 static const RegisterID regT2 = X86Registers::edi;
137#endif
138
139 static const RegisterID initialStart = X86Registers::ebx;
140#if !OS(WINDOWS)
141 static const RegisterID remainingMatchCount = X86Registers::r12;
142#else
143 static const RegisterID remainingMatchCount = X86Registers::esi;
144#endif
145 static const RegisterID regUnicodeInputAndTrail = X86Registers::r13;
146 static const RegisterID leadingSurrogateTag = X86Registers::r14;
147 static const RegisterID endOfStringAddress = X86Registers::r15;
148
149 static const RegisterID returnRegister = X86Registers::eax;
150 static const RegisterID returnRegister2 = X86Registers::edx;
151
152 const TrustedImm32 supplementaryPlanesBase = TrustedImm32(0x10000);
153 const TrustedImm32 trailingSurrogateTag = TrustedImm32(0xdc00);
154 const TrustedImm32 surrogateTagMask = TrustedImm32(0xfffffc00);
155#define HAVE_INITIAL_START_REG
156#define JIT_UNICODE_EXPRESSIONS
157#endif
158
159#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
160 struct ParenContextSizes {
161 size_t m_numSubpatterns;
162 size_t m_frameSlots;
163
164 ParenContextSizes(size_t numSubpatterns, size_t frameSlots)
165 : m_numSubpatterns(numSubpatterns)
166 , m_frameSlots(frameSlots)
167 {
168 }
169
170 size_t numSubpatterns() { return m_numSubpatterns; }
171
172 size_t frameSlots() { return m_frameSlots; }
173 };
174
175 struct ParenContext {
176 struct ParenContext* next;
177 uint32_t begin;
178 uint32_t matchAmount;
179 uintptr_t returnAddress;
180 struct Subpatterns {
181 unsigned start;
182 unsigned end;
183 } subpatterns[0];
184 uintptr_t frameSlots[0];
185
186 static size_t sizeFor(ParenContextSizes& parenContextSizes)
187 {
188 return sizeof(ParenContext) + sizeof(Subpatterns) * parenContextSizes.numSubpatterns() + sizeof(uintptr_t) * parenContextSizes.frameSlots();
189 }
190
191 static ptrdiff_t nextOffset()
192 {
193 return offsetof(ParenContext, next);
194 }
195
196 static ptrdiff_t beginOffset()
197 {
198 return offsetof(ParenContext, begin);
199 }
200
201 static ptrdiff_t matchAmountOffset()
202 {
203 return offsetof(ParenContext, matchAmount);
204 }
205
206 static ptrdiff_t returnAddressOffset()
207 {
208 return offsetof(ParenContext, returnAddress);
209 }
210
211 static ptrdiff_t subpatternOffset(size_t subpattern)
212 {
213 return offsetof(ParenContext, subpatterns) + (subpattern - 1) * sizeof(Subpatterns);
214 }
215
216 static ptrdiff_t savedFrameOffset(ParenContextSizes& parenContextSizes)
217 {
218 return offsetof(ParenContext, subpatterns) + (parenContextSizes.numSubpatterns()) * sizeof(Subpatterns);
219 }
220 };
221
222 void initParenContextFreeList()
223 {
224 RegisterID parenContextPointer = regT0;
225 RegisterID nextParenContextPointer = regT2;
226
227 size_t parenContextSize = ParenContext::sizeFor(m_parenContextSizes);
228
229 parenContextSize = WTF::roundUpToMultipleOf<sizeof(uintptr_t)>(parenContextSize);
230
231 if (parenContextSize > VM::patternContextBufferSize) {
232 m_failureReason = JITFailureReason::ParenthesisNestedTooDeep;
233 return;
234 }
235
236 Jump emptyFreeList = branchTestPtr(Zero, freelistRegister);
237 move(freelistRegister, parenContextPointer);
238 addPtr(TrustedImm32(parenContextSize), freelistRegister, nextParenContextPointer);
239 addPtr(freelistRegister, freelistSizeRegister);
240 subPtr(TrustedImm32(parenContextSize), freelistSizeRegister);
241
242 Label loopTop(this);
243 Jump initDone = branchPtr(Above, nextParenContextPointer, freelistSizeRegister);
244 storePtr(nextParenContextPointer, Address(parenContextPointer, ParenContext::nextOffset()));
245 move(nextParenContextPointer, parenContextPointer);
246 addPtr(TrustedImm32(parenContextSize), parenContextPointer, nextParenContextPointer);
247 jump(loopTop);
248
249 initDone.link(this);
250 storePtr(TrustedImmPtr(nullptr), Address(parenContextPointer, ParenContext::nextOffset()));
251 emptyFreeList.link(this);
252 }
253
254 void allocateParenContext(RegisterID result)
255 {
256 m_abortExecution.append(branchTestPtr(Zero, freelistRegister));
257 sub32(TrustedImm32(1), remainingMatchCount);
258 m_hitMatchLimit.append(branchTestPtr(Zero, remainingMatchCount));
259 move(freelistRegister, result);
260 loadPtr(Address(freelistRegister, ParenContext::nextOffset()), freelistRegister);
261 }
262
263 void freeParenContext(RegisterID headPtrRegister, RegisterID newHeadPtrRegister)
264 {
265 loadPtr(Address(headPtrRegister, ParenContext::nextOffset()), newHeadPtrRegister);
266 storePtr(freelistRegister, Address(headPtrRegister, ParenContext::nextOffset()));
267 move(headPtrRegister, freelistRegister);
268 }
269
270 void saveParenContext(RegisterID parenContextReg, RegisterID tempReg, unsigned firstSubpattern, unsigned lastSubpattern, unsigned subpatternBaseFrameLocation)
271 {
272 store32(index, Address(parenContextReg, ParenContext::beginOffset()));
273 loadFromFrame(subpatternBaseFrameLocation + BackTrackInfoParentheses::matchAmountIndex(), tempReg);
274 store32(tempReg, Address(parenContextReg, ParenContext::matchAmountOffset()));
275 loadFromFrame(subpatternBaseFrameLocation + BackTrackInfoParentheses::returnAddressIndex(), tempReg);
276 storePtr(tempReg, Address(parenContextReg, ParenContext::returnAddressOffset()));
277 if (compileMode == IncludeSubpatterns) {
278 for (unsigned subpattern = firstSubpattern; subpattern <= lastSubpattern; subpattern++) {
279 loadPtr(Address(output, (subpattern << 1) * sizeof(unsigned)), tempReg);
280 storePtr(tempReg, Address(parenContextReg, ParenContext::subpatternOffset(subpattern)));
281 clearSubpatternStart(subpattern);
282 }
283 }
284 subpatternBaseFrameLocation += YarrStackSpaceForBackTrackInfoParentheses;
285 for (unsigned frameLocation = subpatternBaseFrameLocation; frameLocation < m_parenContextSizes.frameSlots(); frameLocation++) {
286 loadFromFrame(frameLocation, tempReg);
287 storePtr(tempReg, Address(parenContextReg, ParenContext::savedFrameOffset(m_parenContextSizes) + frameLocation * sizeof(uintptr_t)));
288 }
289 }
290
291 void restoreParenContext(RegisterID parenContextReg, RegisterID tempReg, unsigned firstSubpattern, unsigned lastSubpattern, unsigned subpatternBaseFrameLocation)
292 {
293 load32(Address(parenContextReg, ParenContext::beginOffset()), index);
294 storeToFrame(index, subpatternBaseFrameLocation + BackTrackInfoParentheses::beginIndex());
295 load32(Address(parenContextReg, ParenContext::matchAmountOffset()), tempReg);
296 storeToFrame(tempReg, subpatternBaseFrameLocation + BackTrackInfoParentheses::matchAmountIndex());
297 loadPtr(Address(parenContextReg, ParenContext::returnAddressOffset()), tempReg);
298 storeToFrame(tempReg, subpatternBaseFrameLocation + BackTrackInfoParentheses::returnAddressIndex());
299 if (compileMode == IncludeSubpatterns) {
300 for (unsigned subpattern = firstSubpattern; subpattern <= lastSubpattern; subpattern++) {
301 loadPtr(Address(parenContextReg, ParenContext::subpatternOffset(subpattern)), tempReg);
302 storePtr(tempReg, Address(output, (subpattern << 1) * sizeof(unsigned)));
303 }
304 }
305 subpatternBaseFrameLocation += YarrStackSpaceForBackTrackInfoParentheses;
306 for (unsigned frameLocation = subpatternBaseFrameLocation; frameLocation < m_parenContextSizes.frameSlots(); frameLocation++) {
307 loadPtr(Address(parenContextReg, ParenContext::savedFrameOffset(m_parenContextSizes) + frameLocation * sizeof(uintptr_t)), tempReg);
308 storeToFrame(tempReg, frameLocation);
309 }
310 }
311#endif
312
313 void optimizeAlternative(PatternAlternative* alternative)
314 {
315 if (!alternative->m_terms.size())
316 return;
317
318 for (unsigned i = 0; i < alternative->m_terms.size() - 1; ++i) {
319 PatternTerm& term = alternative->m_terms[i];
320 PatternTerm& nextTerm = alternative->m_terms[i + 1];
321
322 // We can move BMP only character classes after fixed character terms.
323 if ((term.type == PatternTerm::TypeCharacterClass)
324 && (term.quantityType == QuantifierFixedCount)
325 && (!m_decodeSurrogatePairs || (term.characterClass->hasOneCharacterSize() && !term.m_invert))
326 && (nextTerm.type == PatternTerm::TypePatternCharacter)
327 && (nextTerm.quantityType == QuantifierFixedCount)) {
328 PatternTerm termCopy = term;
329 alternative->m_terms[i] = nextTerm;
330 alternative->m_terms[i + 1] = termCopy;
331 }
332 }
333 }
334
335 void matchCharacterClassRange(RegisterID character, JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar32* matches, unsigned matchCount)
336 {
337 do {
338 // pick which range we're going to generate
339 int which = count >> 1;
340 char lo = ranges[which].begin;
341 char hi = ranges[which].end;
342
343 // check if there are any ranges or matches below lo. If not, just jl to failure -
344 // if there is anything else to check, check that first, if it falls through jmp to failure.
345 if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
346 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
347
348 // generate code for all ranges before this one
349 if (which)
350 matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
351
352 while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
353 matchDest.append(branch32(Equal, character, Imm32((unsigned short)matches[*matchIndex])));
354 ++*matchIndex;
355 }
356 failures.append(jump());
357
358 loOrAbove.link(this);
359 } else if (which) {
360 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
361
362 matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
363 failures.append(jump());
364
365 loOrAbove.link(this);
366 } else
367 failures.append(branch32(LessThan, character, Imm32((unsigned short)lo)));
368
369 while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi))
370 ++*matchIndex;
371
372 matchDest.append(branch32(LessThanOrEqual, character, Imm32((unsigned short)hi)));
373 // fall through to here, the value is above hi.
374
375 // shuffle along & loop around if there are any more matches to handle.
376 unsigned next = which + 1;
377 ranges += next;
378 count -= next;
379 } while (count);
380 }
381
382 void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass)
383 {
384 if (charClass->m_table && !m_decodeSurrogatePairs) {
385 ExtendedAddress tableEntry(character, reinterpret_cast<intptr_t>(charClass->m_table));
386 matchDest.append(branchTest8(charClass->m_tableInverted ? Zero : NonZero, tableEntry));
387 return;
388 }
389
390 JumpList unicodeFail;
391 if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) {
392 JumpList isAscii;
393 if (charClass->m_matches.size() || charClass->m_ranges.size())
394 isAscii.append(branch32(LessThanOrEqual, character, TrustedImm32(0x7f)));
395
396 if (charClass->m_matchesUnicode.size()) {
397 for (unsigned i = 0; i < charClass->m_matchesUnicode.size(); ++i) {
398 UChar32 ch = charClass->m_matchesUnicode[i];
399 matchDest.append(branch32(Equal, character, Imm32(ch)));
400 }
401 }
402
403 if (charClass->m_rangesUnicode.size()) {
404 for (unsigned i = 0; i < charClass->m_rangesUnicode.size(); ++i) {
405 UChar32 lo = charClass->m_rangesUnicode[i].begin;
406 UChar32 hi = charClass->m_rangesUnicode[i].end;
407
408 Jump below = branch32(LessThan, character, Imm32(lo));
409 matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi)));
410 below.link(this);
411 }
412 }
413
414 if (charClass->m_matches.size() || charClass->m_ranges.size())
415 unicodeFail = jump();
416 isAscii.link(this);
417 }
418
419 if (charClass->m_ranges.size()) {
420 unsigned matchIndex = 0;
421 JumpList failures;
422 matchCharacterClassRange(character, failures, matchDest, charClass->m_ranges.begin(), charClass->m_ranges.size(), &matchIndex, charClass->m_matches.begin(), charClass->m_matches.size());
423 while (matchIndex < charClass->m_matches.size())
424 matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass->m_matches[matchIndex++])));
425
426 failures.link(this);
427 } else if (charClass->m_matches.size()) {
428 // optimization: gather 'a','A' etc back together, can mask & test once.
429 Vector<char> matchesAZaz;
430
431 for (unsigned i = 0; i < charClass->m_matches.size(); ++i) {
432 char ch = charClass->m_matches[i];
433 if (m_pattern.ignoreCase()) {
434 if (isASCIILower(ch)) {
435 matchesAZaz.append(ch);
436 continue;
437 }
438 if (isASCIIUpper(ch))
439 continue;
440 }
441 matchDest.append(branch32(Equal, character, Imm32((unsigned short)ch)));
442 }
443
444 if (unsigned countAZaz = matchesAZaz.size()) {
445 or32(TrustedImm32(32), character);
446 for (unsigned i = 0; i < countAZaz; ++i)
447 matchDest.append(branch32(Equal, character, TrustedImm32(matchesAZaz[i])));
448 }
449 }
450
451 if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size())
452 unicodeFail.link(this);
453 }
454
455#ifdef JIT_UNICODE_EXPRESSIONS
456 void advanceIndexAfterCharacterClassTermMatch(const PatternTerm* term, JumpList& failures, const RegisterID character)
457 {
458 ASSERT(term->type == PatternTerm::TypeCharacterClass);
459
460 if (term->characterClass->hasOneCharacterSize() && !term->invert())
461 add32(TrustedImm32(term->characterClass->hasNonBMPCharacters() ? 2 : 1), index);
462 else {
463 add32(TrustedImm32(1), index);
464 failures.append(atEndOfInput());
465 Jump isBMPChar = branch32(LessThan, character, supplementaryPlanesBase);
466 add32(TrustedImm32(1), index);
467 isBMPChar.link(this);
468 }
469 }
470#endif
471
472 // Jumps if input not available; will have (incorrectly) incremented already!
473 Jump jumpIfNoAvailableInput(unsigned countToCheck = 0)
474 {
475 if (countToCheck)
476 add32(Imm32(countToCheck), index);
477 return branch32(Above, index, length);
478 }
479
480 Jump jumpIfAvailableInput(unsigned countToCheck)
481 {
482 add32(Imm32(countToCheck), index);
483 return branch32(BelowOrEqual, index, length);
484 }
485
486 Jump checkNotEnoughInput(RegisterID additionalAmount)
487 {
488 add32(index, additionalAmount);
489 return branch32(Above, additionalAmount, length);
490 }
491
492 Jump checkInput()
493 {
494 return branch32(BelowOrEqual, index, length);
495 }
496
497 Jump atEndOfInput()
498 {
499 return branch32(Equal, index, length);
500 }
501
502 Jump notAtEndOfInput()
503 {
504 return branch32(NotEqual, index, length);
505 }
506
507 BaseIndex negativeOffsetIndexedAddress(Checked<unsigned> negativeCharacterOffset, RegisterID tempReg, RegisterID indexReg = index)
508 {
509 RegisterID base = input;
510
511 // BaseIndex() addressing can take a int32_t offset. Given that we can have a regular
512 // expression that has unsigned character offsets, BaseIndex's signed offset is insufficient
513 // for addressing in extreme cases where we might underflow. Therefore we check to see if
514 // negativeCharacterOffset will underflow directly or after converting for 16 bit characters.
515 // If so, we do our own address calculating by adjusting the base, using the result register
516 // as a temp address register.
517 unsigned maximumNegativeOffsetForCharacterSize = m_charSize == Char8 ? 0x7fffffff : 0x3fffffff;
518 unsigned offsetAdjustAmount = 0x40000000;
519 if (negativeCharacterOffset.unsafeGet() > maximumNegativeOffsetForCharacterSize) {
520 base = tempReg;
521 move(input, base);
522 while (negativeCharacterOffset.unsafeGet() > maximumNegativeOffsetForCharacterSize) {
523 subPtr(TrustedImm32(offsetAdjustAmount), base);
524 if (m_charSize != Char8)
525 subPtr(TrustedImm32(offsetAdjustAmount), base);
526 negativeCharacterOffset -= offsetAdjustAmount;
527 }
528 }
529
530 Checked<int32_t> characterOffset(-static_cast<int32_t>(negativeCharacterOffset.unsafeGet()));
531
532 if (m_charSize == Char8)
533 return BaseIndex(input, indexReg, TimesOne, (characterOffset * static_cast<int32_t>(sizeof(char))).unsafeGet());
534
535 return BaseIndex(input, indexReg, TimesTwo, (characterOffset * static_cast<int32_t>(sizeof(UChar))).unsafeGet());
536 }
537
538#ifdef JIT_UNICODE_EXPRESSIONS
539 void tryReadUnicodeCharImpl(RegisterID resultReg)
540 {
541 ASSERT(m_charSize == Char16);
542
543 JumpList notUnicode;
544
545 load16Unaligned(regUnicodeInputAndTrail, resultReg);
546 and32(surrogateTagMask, resultReg, regT2);
547 notUnicode.append(branch32(NotEqual, regT2, leadingSurrogateTag));
548 addPtr(TrustedImm32(2), regUnicodeInputAndTrail);
549 notUnicode.append(branchPtr(AboveOrEqual, regUnicodeInputAndTrail, endOfStringAddress));
550 load16Unaligned(Address(regUnicodeInputAndTrail), regUnicodeInputAndTrail);
551 and32(surrogateTagMask, regUnicodeInputAndTrail, regT2);
552 notUnicode.append(branch32(NotEqual, regT2, trailingSurrogateTag));
553 sub32(leadingSurrogateTag, resultReg);
554 sub32(trailingSurrogateTag, regUnicodeInputAndTrail);
555 lshift32(TrustedImm32(10), resultReg);
556 or32(regUnicodeInputAndTrail, resultReg);
557 add32(supplementaryPlanesBase, resultReg);
558 notUnicode.link(this);
559 }
560
561 void tryReadUnicodeChar(BaseIndex address, RegisterID resultReg)
562 {
563 ASSERT(m_charSize == Char16);
564
565 getEffectiveAddress(address, regUnicodeInputAndTrail);
566
567 if (resultReg == regT0)
568 m_tryReadUnicodeCharacterCalls.append(nearCall());
569 else
570 tryReadUnicodeCharImpl(resultReg);
571 }
572#endif
573
574 void readCharacterDontDecodeSurrogates(Checked<unsigned> negativeCharacterOffset, RegisterID resultReg, RegisterID indexReg = index)
575 {
576 BaseIndex address = negativeOffsetIndexedAddress(negativeCharacterOffset, resultReg, indexReg);
577
578 if (m_charSize == Char8)
579 load8(address, resultReg);
580 else
581 load16Unaligned(address, resultReg);
582 }
583
584 void readCharacter(Checked<unsigned> negativeCharacterOffset, RegisterID resultReg, RegisterID indexReg = index)
585 {
586 BaseIndex address = negativeOffsetIndexedAddress(negativeCharacterOffset, resultReg, indexReg);
587
588 if (m_charSize == Char8)
589 load8(address, resultReg);
590#ifdef JIT_UNICODE_EXPRESSIONS
591 else if (m_decodeSurrogatePairs)
592 tryReadUnicodeChar(address, resultReg);
593#endif
594 else
595 load16Unaligned(address, resultReg);
596 }
597
598 Jump jumpIfCharNotEquals(UChar32 ch, Checked<unsigned> negativeCharacterOffset, RegisterID character)
599 {
600 readCharacter(negativeCharacterOffset, character);
601
602 // For case-insesitive compares, non-ascii characters that have different
603 // upper & lower case representations are converted to a character class.
604 ASSERT(!m_pattern.ignoreCase() || isASCIIAlpha(ch) || isCanonicallyUnique(ch, m_canonicalMode));
605 if (m_pattern.ignoreCase() && isASCIIAlpha(ch)) {
606 or32(TrustedImm32(0x20), character);
607 ch |= 0x20;
608 }
609
610 return branch32(NotEqual, character, Imm32(ch));
611 }
612
613 void storeToFrame(RegisterID reg, unsigned frameLocation)
614 {
615 poke(reg, frameLocation);
616 }
617
618 void storeToFrame(TrustedImm32 imm, unsigned frameLocation)
619 {
620 poke(imm, frameLocation);
621 }
622
623#if CPU(ARM64) || CPU(X86_64)
624 void storeToFrame(TrustedImmPtr imm, unsigned frameLocation)
625 {
626 poke(imm, frameLocation);
627 }
628#endif
629
630 DataLabelPtr storeToFrameWithPatch(unsigned frameLocation)
631 {
632 return storePtrWithPatch(TrustedImmPtr(nullptr), Address(stackPointerRegister, frameLocation * sizeof(void*)));
633 }
634
635 void loadFromFrame(unsigned frameLocation, RegisterID reg)
636 {
637 peek(reg, frameLocation);
638 }
639
640 void loadFromFrameAndJump(unsigned frameLocation)
641 {
642 jump(Address(stackPointerRegister, frameLocation * sizeof(void*)), YarrBacktrackPtrTag);
643 }
644
645 unsigned alignCallFrameSizeInBytes(unsigned callFrameSize)
646 {
647 if (!callFrameSize)
648 return 0;
649
650 callFrameSize *= sizeof(void*);
651 if (callFrameSize / sizeof(void*) != m_pattern.m_body->m_callFrameSize)
652 CRASH();
653 callFrameSize = (callFrameSize + 0x3f) & ~0x3f;
654 return callFrameSize;
655 }
656 void initCallFrame()
657 {
658 unsigned callFrameSizeInBytes = alignCallFrameSizeInBytes(m_pattern.m_body->m_callFrameSize);
659 if (callFrameSizeInBytes) {
660#if CPU(X86_64) || CPU(ARM64)
661 if (Options::zeroStackFrame()) {
662 // We need to start from the stack pointer, because we could have spilled callee saves
663 move(stackPointerRegister, regT0);
664 subPtr(Imm32(callFrameSizeInBytes), stackPointerRegister);
665 if (callFrameSizeInBytes <= 128) {
666 for (unsigned offset = 0; offset < callFrameSizeInBytes; offset += sizeof(intptr_t))
667 storePtr(TrustedImm32(0), Address(regT0, -8 - offset));
668 } else {
669 Label zeroLoop = label();
670 subPtr(TrustedImm32(sizeof(intptr_t) * 2), regT0);
671#if CPU(ARM64)
672 storePair64(ARM64Registers::zr, ARM64Registers::zr, regT0);
673#else
674 storePtr(TrustedImm32(0), Address(regT0));
675 storePtr(TrustedImm32(0), Address(regT0, sizeof(intptr_t)));
676#endif
677 branchPtr(NotEqual, regT0, stackPointerRegister).linkTo(zeroLoop, this);
678 }
679 } else
680#endif
681 subPtr(Imm32(callFrameSizeInBytes), stackPointerRegister);
682
683 }
684 }
685 void removeCallFrame()
686 {
687 unsigned callFrameSizeInBytes = alignCallFrameSizeInBytes(m_pattern.m_body->m_callFrameSize);
688 if (callFrameSizeInBytes)
689 addPtr(Imm32(callFrameSizeInBytes), stackPointerRegister);
690 }
691
692 void generateFailReturn()
693 {
694 move(TrustedImmPtr((void*)WTF::notFound), returnRegister);
695 move(TrustedImm32(0), returnRegister2);
696 generateReturn();
697 }
698
699 void generateJITFailReturn()
700 {
701 if (m_abortExecution.empty() && m_hitMatchLimit.empty())
702 return;
703
704 JumpList finishExiting;
705 if (!m_abortExecution.empty()) {
706 m_abortExecution.link(this);
707 move(TrustedImmPtr((void*)static_cast<size_t>(-2)), returnRegister);
708 finishExiting.append(jump());
709 }
710
711 if (!m_hitMatchLimit.empty()) {
712 m_hitMatchLimit.link(this);
713 move(TrustedImmPtr((void*)static_cast<size_t>(-1)), returnRegister);
714 }
715
716 finishExiting.link(this);
717 removeCallFrame();
718 move(TrustedImm32(0), returnRegister2);
719 generateReturn();
720 }
721
722 // Used to record subpatterns, should only be called if compileMode is IncludeSubpatterns.
723 void setSubpatternStart(RegisterID reg, unsigned subpattern)
724 {
725 ASSERT(subpattern);
726 // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-(
727 store32(reg, Address(output, (subpattern << 1) * sizeof(int)));
728 }
729 void setSubpatternEnd(RegisterID reg, unsigned subpattern)
730 {
731 ASSERT(subpattern);
732 // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-(
733 store32(reg, Address(output, ((subpattern << 1) + 1) * sizeof(int)));
734 }
735 void clearSubpatternStart(unsigned subpattern)
736 {
737 ASSERT(subpattern);
738 // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-(
739 store32(TrustedImm32(-1), Address(output, (subpattern << 1) * sizeof(int)));
740 }
741
742 void clearMatches(unsigned subpattern, unsigned lastSubpattern)
743 {
744 for (; subpattern <= lastSubpattern; subpattern++)
745 clearSubpatternStart(subpattern);
746 }
747
748 // We use one of three different strategies to track the start of the current match,
749 // while matching.
750 // 1) If the pattern has a fixed size, do nothing! - we calculate the value lazily
751 // at the end of matching. This is irrespective of compileMode, and in this case
752 // these methods should never be called.
753 // 2) If we're compiling IncludeSubpatterns, 'output' contains a pointer to an output
754 // vector, store the match start in the output vector.
755 // 3) If we're compiling MatchOnly, 'output' is unused, store the match start directly
756 // in this register.
757 void setMatchStart(RegisterID reg)
758 {
759 ASSERT(!m_pattern.m_body->m_hasFixedSize);
760 if (compileMode == IncludeSubpatterns)
761 store32(reg, output);
762 else
763 move(reg, output);
764 }
765 void getMatchStart(RegisterID reg)
766 {
767 ASSERT(!m_pattern.m_body->m_hasFixedSize);
768 if (compileMode == IncludeSubpatterns)
769 load32(output, reg);
770 else
771 move(output, reg);
772 }
773
774 enum YarrOpCode : uint8_t {
775 // These nodes wrap body alternatives - those in the main disjunction,
776 // rather than subpatterns or assertions. These are chained together in
777 // a doubly linked list, with a 'begin' node for the first alternative,
778 // a 'next' node for each subsequent alternative, and an 'end' node at
779 // the end. In the case of repeating alternatives, the 'end' node also
780 // has a reference back to 'begin'.
781 OpBodyAlternativeBegin,
782 OpBodyAlternativeNext,
783 OpBodyAlternativeEnd,
784 // Similar to the body alternatives, but used for subpatterns with two
785 // or more alternatives.
786 OpNestedAlternativeBegin,
787 OpNestedAlternativeNext,
788 OpNestedAlternativeEnd,
789 // Used for alternatives in subpatterns where there is only a single
790 // alternative (backtracking is easier in these cases), or for alternatives
791 // which never need to be backtracked (those in parenthetical assertions,
792 // terminal subpatterns).
793 OpSimpleNestedAlternativeBegin,
794 OpSimpleNestedAlternativeNext,
795 OpSimpleNestedAlternativeEnd,
796 // Used to wrap 'Once' subpattern matches (quantityMaxCount == 1).
797 OpParenthesesSubpatternOnceBegin,
798 OpParenthesesSubpatternOnceEnd,
799 // Used to wrap 'Terminal' subpattern matches (at the end of the regexp).
800 OpParenthesesSubpatternTerminalBegin,
801 OpParenthesesSubpatternTerminalEnd,
802 // Used to wrap generic captured matches
803 OpParenthesesSubpatternBegin,
804 OpParenthesesSubpatternEnd,
805 // Used to wrap parenthetical assertions.
806 OpParentheticalAssertionBegin,
807 OpParentheticalAssertionEnd,
808 // Wraps all simple terms (pattern characters, character classes).
809 OpTerm,
810 // Where an expression contains only 'once through' body alternatives
811 // and no repeating ones, this op is used to return match failure.
812 OpMatchFailed
813 };
814
815 // This structure is used to hold the compiled opcode information,
816 // including reference back to the original PatternTerm/PatternAlternatives,
817 // and JIT compilation data structures.
818 struct YarrOp {
819 explicit YarrOp(PatternTerm* term)
820 : m_term(term)
821 , m_op(OpTerm)
822 , m_isDeadCode(false)
823 {
824 }
825
826 explicit YarrOp(YarrOpCode op)
827 : m_op(op)
828 , m_isDeadCode(false)
829 {
830 }
831
832 // For alternatives, this holds the PatternAlternative and doubly linked
833 // references to this alternative's siblings. In the case of the
834 // OpBodyAlternativeEnd node at the end of a section of repeating nodes,
835 // m_nextOp will reference the OpBodyAlternativeBegin node of the first
836 // repeating alternative.
837 PatternAlternative* m_alternative;
838 size_t m_previousOp;
839 size_t m_nextOp;
840
841 // The operation, as a YarrOpCode, and also a reference to the PatternTerm.
842 PatternTerm* m_term;
843 YarrOpCode m_op;
844
845 // Used to record a set of Jumps out of the generated code, typically
846 // used for jumps out to backtracking code, and a single reentry back
847 // into the code for a node (likely where a backtrack will trigger
848 // rematching).
849 Label m_reentry;
850 JumpList m_jumps;
851
852 // Used for backtracking when the prior alternative did not consume any
853 // characters but matched.
854 Jump m_zeroLengthMatch;
855
856 // This flag is used to null out the second pattern character, when
857 // two are fused to match a pair together.
858 bool m_isDeadCode;
859
860 // Currently used in the case of some of the more complex management of
861 // 'm_checkedOffset', to cache the offset used in this alternative, to avoid
862 // recalculating it.
863 Checked<unsigned> m_checkAdjust;
864
865 // Used by OpNestedAlternativeNext/End to hold the pointer to the
866 // value that will be pushed into the pattern's frame to return to,
867 // upon backtracking back into the disjunction.
868 DataLabelPtr m_returnAddress;
869 };
870
871 // BacktrackingState
872 // This class encapsulates information about the state of code generation
873 // whilst generating the code for backtracking, when a term fails to match.
874 // Upon entry to code generation of the backtracking code for a given node,
875 // the Backtracking state will hold references to all control flow sources
876 // that are outputs in need of further backtracking from the prior node
877 // generated (which is the subsequent operation in the regular expression,
878 // and in the m_ops Vector, since we generated backtracking backwards).
879 // These references to control flow take the form of:
880 // - A jump list of jumps, to be linked to code that will backtrack them
881 // further.
882 // - A set of DataLabelPtr values, to be populated with values to be
883 // treated effectively as return addresses backtracking into complex
884 // subpatterns.
885 // - A flag indicating that the current sequence of generated code up to
886 // this point requires backtracking.
887 class BacktrackingState {
888 public:
889 BacktrackingState()
890 : m_pendingFallthrough(false)
891 {
892 }
893
894 // Add a jump or jumps, a return address, or set the flag indicating
895 // that the current 'fallthrough' control flow requires backtracking.
896 void append(const Jump& jump)
897 {
898 m_laterFailures.append(jump);
899 }
900 void append(JumpList& jumpList)
901 {
902 m_laterFailures.append(jumpList);
903 }
904 void append(const DataLabelPtr& returnAddress)
905 {
906 m_pendingReturns.append(returnAddress);
907 }
908 void fallthrough()
909 {
910 ASSERT(!m_pendingFallthrough);
911 m_pendingFallthrough = true;
912 }
913
914 // These methods clear the backtracking state, either linking to the
915 // current location, a provided label, or copying the backtracking out
916 // to a JumpList. All actions may require code generation to take place,
917 // and as such are passed a pointer to the assembler.
918 void link(MacroAssembler* assembler)
919 {
920 if (m_pendingReturns.size()) {
921 Label here(assembler);
922 for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
923 m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here));
924 m_pendingReturns.clear();
925 }
926 m_laterFailures.link(assembler);
927 m_laterFailures.clear();
928 m_pendingFallthrough = false;
929 }
930 void linkTo(Label label, MacroAssembler* assembler)
931 {
932 if (m_pendingReturns.size()) {
933 for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
934 m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], label));
935 m_pendingReturns.clear();
936 }
937 if (m_pendingFallthrough)
938 assembler->jump(label);
939 m_laterFailures.linkTo(label, assembler);
940 m_laterFailures.clear();
941 m_pendingFallthrough = false;
942 }
943 void takeBacktracksToJumpList(JumpList& jumpList, MacroAssembler* assembler)
944 {
945 if (m_pendingReturns.size()) {
946 Label here(assembler);
947 for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
948 m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here));
949 m_pendingReturns.clear();
950 m_pendingFallthrough = true;
951 }
952 if (m_pendingFallthrough)
953 jumpList.append(assembler->jump());
954 jumpList.append(m_laterFailures);
955 m_laterFailures.clear();
956 m_pendingFallthrough = false;
957 }
958
959 bool isEmpty()
960 {
961 return m_laterFailures.empty() && m_pendingReturns.isEmpty() && !m_pendingFallthrough;
962 }
963
964 // Called at the end of code generation to link all return addresses.
965 void linkDataLabels(LinkBuffer& linkBuffer)
966 {
967 ASSERT(isEmpty());
968 for (unsigned i = 0; i < m_backtrackRecords.size(); ++i)
969 linkBuffer.patch(m_backtrackRecords[i].m_dataLabel, linkBuffer.locationOf<YarrBacktrackPtrTag>(m_backtrackRecords[i].m_backtrackLocation));
970 }
971
972 private:
973 struct ReturnAddressRecord {
974 ReturnAddressRecord(DataLabelPtr dataLabel, Label backtrackLocation)
975 : m_dataLabel(dataLabel)
976 , m_backtrackLocation(backtrackLocation)
977 {
978 }
979
980 DataLabelPtr m_dataLabel;
981 Label m_backtrackLocation;
982 };
983
984 JumpList m_laterFailures;
985 bool m_pendingFallthrough;
986 Vector<DataLabelPtr, 4> m_pendingReturns;
987 Vector<ReturnAddressRecord, 4> m_backtrackRecords;
988 };
989
990 // Generation methods:
991 // ===================
992
993 // This method provides a default implementation of backtracking common
994 // to many terms; terms commonly jump out of the forwards matching path
995 // on any failed conditions, and add these jumps to the m_jumps list. If
996 // no special handling is required we can often just backtrack to m_jumps.
997 void backtrackTermDefault(size_t opIndex)
998 {
999 YarrOp& op = m_ops[opIndex];
1000 m_backtrackingState.append(op.m_jumps);
1001 }
1002
1003 void generateAssertionBOL(size_t opIndex)
1004 {
1005 YarrOp& op = m_ops[opIndex];
1006 PatternTerm* term = op.m_term;
1007
1008 if (m_pattern.multiline()) {
1009 const RegisterID character = regT0;
1010
1011 JumpList matchDest;
1012 if (!term->inputPosition)
1013 matchDest.append(branch32(Equal, index, Imm32(m_checkedOffset.unsafeGet())));
1014
1015 readCharacter(m_checkedOffset - term->inputPosition + 1, character);
1016 matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
1017 op.m_jumps.append(jump());
1018
1019 matchDest.link(this);
1020 } else {
1021 // Erk, really should poison out these alternatives early. :-/
1022 if (term->inputPosition)
1023 op.m_jumps.append(jump());
1024 else
1025 op.m_jumps.append(branch32(NotEqual, index, Imm32(m_checkedOffset.unsafeGet())));
1026 }
1027 }
1028 void backtrackAssertionBOL(size_t opIndex)
1029 {
1030 backtrackTermDefault(opIndex);
1031 }
1032
1033 void generateAssertionEOL(size_t opIndex)
1034 {
1035 YarrOp& op = m_ops[opIndex];
1036 PatternTerm* term = op.m_term;
1037
1038 if (m_pattern.multiline()) {
1039 const RegisterID character = regT0;
1040
1041 JumpList matchDest;
1042 if (term->inputPosition == m_checkedOffset.unsafeGet())
1043 matchDest.append(atEndOfInput());
1044
1045 readCharacter(m_checkedOffset - term->inputPosition, character);
1046 matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
1047 op.m_jumps.append(jump());
1048
1049 matchDest.link(this);
1050 } else {
1051 if (term->inputPosition == m_checkedOffset.unsafeGet())
1052 op.m_jumps.append(notAtEndOfInput());
1053 // Erk, really should poison out these alternatives early. :-/
1054 else
1055 op.m_jumps.append(jump());
1056 }
1057 }
1058 void backtrackAssertionEOL(size_t opIndex)
1059 {
1060 backtrackTermDefault(opIndex);
1061 }
1062
1063 // Also falls though on nextIsNotWordChar.
1064 void matchAssertionWordchar(size_t opIndex, JumpList& nextIsWordChar, JumpList& nextIsNotWordChar)
1065 {
1066 YarrOp& op = m_ops[opIndex];
1067 PatternTerm* term = op.m_term;
1068
1069 const RegisterID character = regT0;
1070
1071 if (term->inputPosition == m_checkedOffset.unsafeGet())
1072 nextIsNotWordChar.append(atEndOfInput());
1073
1074 readCharacter(m_checkedOffset - term->inputPosition, character);
1075
1076 CharacterClass* wordcharCharacterClass;
1077
1078 if (m_unicodeIgnoreCase)
1079 wordcharCharacterClass = m_pattern.wordUnicodeIgnoreCaseCharCharacterClass();
1080 else
1081 wordcharCharacterClass = m_pattern.wordcharCharacterClass();
1082
1083 matchCharacterClass(character, nextIsWordChar, wordcharCharacterClass);
1084 }
1085
1086 void generateAssertionWordBoundary(size_t opIndex)
1087 {
1088 YarrOp& op = m_ops[opIndex];
1089 PatternTerm* term = op.m_term;
1090
1091 const RegisterID character = regT0;
1092
1093 Jump atBegin;
1094 JumpList matchDest;
1095 if (!term->inputPosition)
1096 atBegin = branch32(Equal, index, Imm32(m_checkedOffset.unsafeGet()));
1097 readCharacter(m_checkedOffset - term->inputPosition + 1, character);
1098
1099 CharacterClass* wordcharCharacterClass;
1100
1101 if (m_unicodeIgnoreCase)
1102 wordcharCharacterClass = m_pattern.wordUnicodeIgnoreCaseCharCharacterClass();
1103 else
1104 wordcharCharacterClass = m_pattern.wordcharCharacterClass();
1105
1106 matchCharacterClass(character, matchDest, wordcharCharacterClass);
1107 if (!term->inputPosition)
1108 atBegin.link(this);
1109
1110 // We fall through to here if the last character was not a wordchar.
1111 JumpList nonWordCharThenWordChar;
1112 JumpList nonWordCharThenNonWordChar;
1113 if (term->invert()) {
1114 matchAssertionWordchar(opIndex, nonWordCharThenNonWordChar, nonWordCharThenWordChar);
1115 nonWordCharThenWordChar.append(jump());
1116 } else {
1117 matchAssertionWordchar(opIndex, nonWordCharThenWordChar, nonWordCharThenNonWordChar);
1118 nonWordCharThenNonWordChar.append(jump());
1119 }
1120 op.m_jumps.append(nonWordCharThenNonWordChar);
1121
1122 // We jump here if the last character was a wordchar.
1123 matchDest.link(this);
1124 JumpList wordCharThenWordChar;
1125 JumpList wordCharThenNonWordChar;
1126 if (term->invert()) {
1127 matchAssertionWordchar(opIndex, wordCharThenNonWordChar, wordCharThenWordChar);
1128 wordCharThenWordChar.append(jump());
1129 } else {
1130 matchAssertionWordchar(opIndex, wordCharThenWordChar, wordCharThenNonWordChar);
1131 // This can fall-though!
1132 }
1133
1134 op.m_jumps.append(wordCharThenWordChar);
1135
1136 nonWordCharThenWordChar.link(this);
1137 wordCharThenNonWordChar.link(this);
1138 }
1139 void backtrackAssertionWordBoundary(size_t opIndex)
1140 {
1141 backtrackTermDefault(opIndex);
1142 }
1143
1144#if ENABLE(YARR_JIT_BACKREFERENCES)
1145 void matchBackreference(size_t opIndex, JumpList& characterMatchFails, RegisterID character, RegisterID patternIndex, RegisterID patternCharacter)
1146 {
1147 YarrOp& op = m_ops[opIndex];
1148 PatternTerm* term = op.m_term;
1149 unsigned subpatternId = term->backReferenceSubpatternId;
1150
1151 Label loop(this);
1152
1153 readCharacterDontDecodeSurrogates(0, patternCharacter, patternIndex);
1154 readCharacterDontDecodeSurrogates(m_checkedOffset - term->inputPosition, character);
1155
1156 if (!m_pattern.ignoreCase())
1157 characterMatchFails.append(branch32(NotEqual, character, patternCharacter));
1158 else {
1159 Jump charactersMatch = branch32(Equal, character, patternCharacter);
1160 ExtendedAddress characterTableEntry(character, reinterpret_cast<intptr_t>(&canonicalTableLChar));
1161 load16(characterTableEntry, character);
1162 ExtendedAddress patternTableEntry(patternCharacter, reinterpret_cast<intptr_t>(&canonicalTableLChar));
1163 load16(patternTableEntry, patternCharacter);
1164 characterMatchFails.append(branch32(NotEqual, character, patternCharacter));
1165 charactersMatch.link(this);
1166 }
1167
1168
1169 add32(TrustedImm32(1), index);
1170 add32(TrustedImm32(1), patternIndex);
1171
1172 branch32(NotEqual, patternIndex, Address(output, ((subpatternId << 1) + 1) * sizeof(int))).linkTo(loop, this);
1173 }
1174
1175 void generateBackReference(size_t opIndex)
1176 {
1177 YarrOp& op = m_ops[opIndex];
1178 PatternTerm* term = op.m_term;
1179
1180 if (m_pattern.ignoreCase() && m_charSize != Char8) {
1181 m_failureReason = JITFailureReason::BackReference;
1182 return;
1183 }
1184
1185 unsigned subpatternId = term->backReferenceSubpatternId;
1186 unsigned parenthesesFrameLocation = term->frameLocation;
1187
1188 const RegisterID characterOrTemp = regT0;
1189 const RegisterID patternIndex = regT1;
1190 const RegisterID patternTemp = regT2;
1191
1192 storeToFrame(index, parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex());
1193 if (term->quantityType != QuantifierFixedCount || term->quantityMaxCount != 1)
1194 storeToFrame(TrustedImm32(0), parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1195
1196 JumpList matches;
1197
1198 if (term->quantityType != QuantifierNonGreedy) {
1199 load32(Address(output, (subpatternId << 1) * sizeof(int)), patternIndex);
1200 load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternTemp);
1201
1202 // An empty match is successful without consuming characters
1203 if (term->quantityType != QuantifierFixedCount || term->quantityMaxCount != 1) {
1204 matches.append(branch32(Equal, TrustedImm32(-1), patternIndex));
1205 matches.append(branch32(Equal, patternIndex, patternTemp));
1206 } else {
1207 Jump zeroLengthMatch = branch32(Equal, TrustedImm32(-1), patternIndex);
1208 Jump tryNonZeroMatch = branch32(NotEqual, patternIndex, patternTemp);
1209 zeroLengthMatch.link(this);
1210 storeToFrame(TrustedImm32(1), parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1211 matches.append(jump());
1212 tryNonZeroMatch.link(this);
1213 }
1214 }
1215
1216 switch (term->quantityType) {
1217 case QuantifierFixedCount: {
1218 Label outerLoop(this);
1219
1220 // PatternTemp should contain pattern end index at this point
1221 sub32(patternIndex, patternTemp);
1222 op.m_jumps.append(checkNotEnoughInput(patternTemp));
1223
1224 matchBackreference(opIndex, op.m_jumps, characterOrTemp, patternIndex, patternTemp);
1225
1226 if (term->quantityMaxCount != 1) {
1227 loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex(), characterOrTemp);
1228 add32(TrustedImm32(1), characterOrTemp);
1229 storeToFrame(characterOrTemp, parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1230 matches.append(branch32(Equal, Imm32(term->quantityMaxCount.unsafeGet()), characterOrTemp));
1231 load32(Address(output, (subpatternId << 1) * sizeof(int)), patternIndex);
1232 load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternTemp);
1233 jump(outerLoop);
1234 }
1235 matches.link(this);
1236 break;
1237 }
1238
1239 case QuantifierGreedy: {
1240 JumpList incompleteMatches;
1241
1242 Label outerLoop(this);
1243
1244 // PatternTemp should contain pattern end index at this point
1245 sub32(patternIndex, patternTemp);
1246 matches.append(checkNotEnoughInput(patternTemp));
1247
1248 matchBackreference(opIndex, incompleteMatches, characterOrTemp, patternIndex, patternTemp);
1249
1250 loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex(), characterOrTemp);
1251 add32(TrustedImm32(1), characterOrTemp);
1252 storeToFrame(characterOrTemp, parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1253 if (term->quantityMaxCount != quantifyInfinite)
1254 matches.append(branch32(Equal, Imm32(term->quantityMaxCount.unsafeGet()), characterOrTemp));
1255 load32(Address(output, (subpatternId << 1) * sizeof(int)), patternIndex);
1256 load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternTemp);
1257
1258 // Store current index in frame for restoring after a partial match
1259 storeToFrame(index, parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex());
1260 jump(outerLoop);
1261
1262 incompleteMatches.link(this);
1263 loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex(), index);
1264
1265 matches.link(this);
1266 op.m_reentry = label();
1267 break;
1268 }
1269
1270 case QuantifierNonGreedy: {
1271 JumpList incompleteMatches;
1272
1273 matches.append(jump());
1274
1275 op.m_reentry = label();
1276
1277 load32(Address(output, (subpatternId << 1) * sizeof(int)), patternIndex);
1278 load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternTemp);
1279
1280 // An empty match is successful without consuming characters
1281 Jump zeroLengthMatch = branch32(Equal, TrustedImm32(-1), patternIndex);
1282 Jump tryNonZeroMatch = branch32(NotEqual, patternIndex, patternTemp);
1283 zeroLengthMatch.link(this);
1284 storeToFrame(TrustedImm32(1), parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1285 matches.append(jump());
1286 tryNonZeroMatch.link(this);
1287
1288 // Check if we have input remaining to match
1289 sub32(patternIndex, patternTemp);
1290 matches.append(checkNotEnoughInput(patternTemp));
1291
1292 storeToFrame(index, parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex());
1293
1294 matchBackreference(opIndex, incompleteMatches, characterOrTemp, patternIndex, patternTemp);
1295
1296 matches.append(jump());
1297
1298 incompleteMatches.link(this);
1299 loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex(), index);
1300
1301 matches.link(this);
1302 break;
1303 }
1304 }
1305 }
1306 void backtrackBackReference(size_t opIndex)
1307 {
1308 YarrOp& op = m_ops[opIndex];
1309 PatternTerm* term = op.m_term;
1310
1311 unsigned subpatternId = term->backReferenceSubpatternId;
1312
1313 m_backtrackingState.link(this);
1314 op.m_jumps.link(this);
1315
1316 JumpList failures;
1317
1318 unsigned parenthesesFrameLocation = term->frameLocation;
1319 switch (term->quantityType) {
1320 case QuantifierFixedCount:
1321 loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex(), index);
1322 break;
1323
1324 case QuantifierGreedy: {
1325 const RegisterID matchAmount = regT0;
1326 const RegisterID patternStartIndex = regT1;
1327 const RegisterID patternEndIndexOrLen = regT2;
1328
1329 loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex(), matchAmount);
1330 failures.append(branchTest32(Zero, matchAmount));
1331
1332 load32(Address(output, (subpatternId << 1) * sizeof(int)), patternStartIndex);
1333 load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternEndIndexOrLen);
1334 sub32(patternStartIndex, patternEndIndexOrLen);
1335 sub32(patternEndIndexOrLen, index);
1336
1337 sub32(TrustedImm32(1), matchAmount);
1338 storeToFrame(matchAmount, parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1339 jump(op.m_reentry);
1340 break;
1341 }
1342
1343 case QuantifierNonGreedy: {
1344 const RegisterID matchAmount = regT0;
1345
1346 failures.append(atEndOfInput());
1347 loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex(), matchAmount);
1348 if (term->quantityMaxCount != quantifyInfinite)
1349 failures.append(branch32(AboveOrEqual, Imm32(term->quantityMaxCount.unsafeGet()), matchAmount));
1350 add32(TrustedImm32(1), matchAmount);
1351 storeToFrame(matchAmount, parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1352 jump(op.m_reentry);
1353 break;
1354 }
1355 }
1356 failures.link(this);
1357 m_backtrackingState.fallthrough();
1358 }
1359#endif
1360
1361 void generatePatternCharacterOnce(size_t opIndex)
1362 {
1363 YarrOp& op = m_ops[opIndex];
1364
1365 if (op.m_isDeadCode)
1366 return;
1367
1368 // m_ops always ends with a OpBodyAlternativeEnd or OpMatchFailed
1369 // node, so there must always be at least one more node.
1370 ASSERT(opIndex + 1 < m_ops.size());
1371 YarrOp* nextOp = &m_ops[opIndex + 1];
1372
1373 PatternTerm* term = op.m_term;
1374 UChar32 ch = term->patternCharacter;
1375
1376 if ((ch > 0xff) && (m_charSize == Char8)) {
1377 // Have a 16 bit pattern character and an 8 bit string - short circuit
1378 op.m_jumps.append(jump());
1379 return;
1380 }
1381
1382 const RegisterID character = regT0;
1383#if CPU(X86_64) || CPU(ARM64)
1384 unsigned maxCharactersAtOnce = m_charSize == Char8 ? 8 : 4;
1385#else
1386 unsigned maxCharactersAtOnce = m_charSize == Char8 ? 4 : 2;
1387#endif
1388 uint64_t ignoreCaseMask = 0;
1389#if CPU(BIG_ENDIAN)
1390 uint64_t allCharacters = ch << (m_charSize == Char8 ? 24 : 16);
1391#else
1392 uint64_t allCharacters = ch;
1393#endif
1394 unsigned numberCharacters;
1395 unsigned startTermPosition = term->inputPosition;
1396
1397 // For case-insesitive compares, non-ascii characters that have different
1398 // upper & lower case representations are converted to a character class.
1399 ASSERT(!m_pattern.ignoreCase() || isASCIIAlpha(ch) || isCanonicallyUnique(ch, m_canonicalMode));
1400
1401 if (m_pattern.ignoreCase() && isASCIIAlpha(ch)) {
1402#if CPU(BIG_ENDIAN)
1403 ignoreCaseMask |= 32 << (m_charSize == Char8 ? 24 : 16);
1404#else
1405 ignoreCaseMask |= 32;
1406#endif
1407 }
1408
1409 for (numberCharacters = 1; numberCharacters < maxCharactersAtOnce && nextOp->m_op == OpTerm; ++numberCharacters, nextOp = &m_ops[opIndex + numberCharacters]) {
1410 PatternTerm* nextTerm = nextOp->m_term;
1411
1412 // YarrJIT handles decoded surrogate pair as one character if unicode flag is enabled.
1413 // Note that the numberCharacters become 1 while the width of the pattern character becomes 32bit in this case.
1414 if (nextTerm->type != PatternTerm::TypePatternCharacter
1415 || nextTerm->quantityType != QuantifierFixedCount
1416 || nextTerm->quantityMaxCount != 1
1417 || nextTerm->inputPosition != (startTermPosition + numberCharacters)
1418 || (U16_LENGTH(nextTerm->patternCharacter) != 1 && m_decodeSurrogatePairs))
1419 break;
1420
1421 nextOp->m_isDeadCode = true;
1422
1423#if CPU(BIG_ENDIAN)
1424 int shiftAmount = (m_charSize == Char8 ? 24 : 16) - ((m_charSize == Char8 ? 8 : 16) * numberCharacters);
1425#else
1426 int shiftAmount = (m_charSize == Char8 ? 8 : 16) * numberCharacters;
1427#endif
1428
1429 UChar32 currentCharacter = nextTerm->patternCharacter;
1430
1431 if ((currentCharacter > 0xff) && (m_charSize == Char8)) {
1432 // Have a 16 bit pattern character and an 8 bit string - short circuit
1433 op.m_jumps.append(jump());
1434 return;
1435 }
1436
1437 // For case-insesitive compares, non-ascii characters that have different
1438 // upper & lower case representations are converted to a character class.
1439 ASSERT(!m_pattern.ignoreCase() || isASCIIAlpha(currentCharacter) || isCanonicallyUnique(currentCharacter, m_canonicalMode));
1440
1441 allCharacters |= (static_cast<uint64_t>(currentCharacter) << shiftAmount);
1442
1443 if ((m_pattern.ignoreCase()) && (isASCIIAlpha(currentCharacter)))
1444 ignoreCaseMask |= 32ULL << shiftAmount;
1445 }
1446
1447 if (m_decodeSurrogatePairs)
1448 op.m_jumps.append(jumpIfNoAvailableInput());
1449
1450 if (m_charSize == Char8) {
1451 auto check1 = [&] (Checked<unsigned> offset, UChar32 characters) {
1452 op.m_jumps.append(jumpIfCharNotEquals(characters, offset, character));
1453 };
1454
1455 auto check2 = [&] (Checked<unsigned> offset, uint16_t characters, uint16_t mask) {
1456 load16Unaligned(negativeOffsetIndexedAddress(offset, character), character);
1457 if (mask)
1458 or32(Imm32(mask), character);
1459 op.m_jumps.append(branch32(NotEqual, character, Imm32(characters | mask)));
1460 };
1461
1462 auto check4 = [&] (Checked<unsigned> offset, unsigned characters, unsigned mask) {
1463 if (mask) {
1464 load32WithUnalignedHalfWords(negativeOffsetIndexedAddress(offset, character), character);
1465 if (mask)
1466 or32(Imm32(mask), character);
1467 op.m_jumps.append(branch32(NotEqual, character, Imm32(characters | mask)));
1468 return;
1469 }
1470 op.m_jumps.append(branch32WithUnalignedHalfWords(NotEqual, negativeOffsetIndexedAddress(offset, character), TrustedImm32(characters)));
1471 };
1472
1473#if CPU(X86_64) || CPU(ARM64)
1474 auto check8 = [&] (Checked<unsigned> offset, uint64_t characters, uint64_t mask) {
1475 load64(negativeOffsetIndexedAddress(offset, character), character);
1476 if (mask)
1477 or64(TrustedImm64(mask), character);
1478 op.m_jumps.append(branch64(NotEqual, character, TrustedImm64(characters | mask)));
1479 };
1480#endif
1481
1482 switch (numberCharacters) {
1483 case 1:
1484 // Use 32bit width of allCharacters since Yarr counts surrogate pairs as one character with unicode flag.
1485 check1(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff);
1486 return;
1487 case 2: {
1488 check2(m_checkedOffset - startTermPosition, allCharacters & 0xffff, ignoreCaseMask & 0xffff);
1489 return;
1490 }
1491 case 3: {
1492 check2(m_checkedOffset - startTermPosition, allCharacters & 0xffff, ignoreCaseMask & 0xffff);
1493 check1(m_checkedOffset - startTermPosition - 2, (allCharacters >> 16) & 0xff);
1494 return;
1495 }
1496 case 4: {
1497 check4(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
1498 return;
1499 }
1500#if CPU(X86_64) || CPU(ARM64)
1501 case 5: {
1502 check4(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
1503 check1(m_checkedOffset - startTermPosition - 4, (allCharacters >> 32) & 0xff);
1504 return;
1505 }
1506 case 6: {
1507 check4(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
1508 check2(m_checkedOffset - startTermPosition - 4, (allCharacters >> 32) & 0xffff, (ignoreCaseMask >> 32) & 0xffff);
1509 return;
1510 }
1511 case 7: {
1512 check4(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
1513 check2(m_checkedOffset - startTermPosition - 4, (allCharacters >> 32) & 0xffff, (ignoreCaseMask >> 32) & 0xffff);
1514 check1(m_checkedOffset - startTermPosition - 6, (allCharacters >> 48) & 0xff);
1515 return;
1516 }
1517 case 8: {
1518 check8(m_checkedOffset - startTermPosition, allCharacters, ignoreCaseMask);
1519 return;
1520 }
1521#endif
1522 }
1523 } else {
1524 auto check1 = [&] (Checked<unsigned> offset, UChar32 characters) {
1525 op.m_jumps.append(jumpIfCharNotEquals(characters, offset, character));
1526 };
1527
1528 auto check2 = [&] (Checked<unsigned> offset, unsigned characters, unsigned mask) {
1529 if (mask) {
1530 load32WithUnalignedHalfWords(negativeOffsetIndexedAddress(offset, character), character);
1531 if (mask)
1532 or32(Imm32(mask), character);
1533 op.m_jumps.append(branch32(NotEqual, character, Imm32(characters | mask)));
1534 return;
1535 }
1536 op.m_jumps.append(branch32WithUnalignedHalfWords(NotEqual, negativeOffsetIndexedAddress(offset, character), TrustedImm32(characters)));
1537 };
1538
1539#if CPU(X86_64) || CPU(ARM64)
1540 auto check4 = [&] (Checked<unsigned> offset, uint64_t characters, uint64_t mask) {
1541 load64(negativeOffsetIndexedAddress(offset, character), character);
1542 if (mask)
1543 or64(TrustedImm64(mask), character);
1544 op.m_jumps.append(branch64(NotEqual, character, TrustedImm64(characters | mask)));
1545 };
1546#endif
1547
1548 switch (numberCharacters) {
1549 case 1:
1550 // Use 32bit width of allCharacters since Yarr counts surrogate pairs as one character with unicode flag.
1551 check1(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff);
1552 return;
1553 case 2:
1554 check2(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
1555 return;
1556#if CPU(X86_64) || CPU(ARM64)
1557 case 3:
1558 check2(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
1559 check1(m_checkedOffset - startTermPosition - 2, (allCharacters >> 32) & 0xffff);
1560 return;
1561 case 4:
1562 check4(m_checkedOffset - startTermPosition, allCharacters, ignoreCaseMask);
1563 return;
1564#endif
1565 }
1566 }
1567 }
1568 void backtrackPatternCharacterOnce(size_t opIndex)
1569 {
1570 backtrackTermDefault(opIndex);
1571 }
1572
1573 void generatePatternCharacterFixed(size_t opIndex)
1574 {
1575 YarrOp& op = m_ops[opIndex];
1576 PatternTerm* term = op.m_term;
1577 UChar32 ch = term->patternCharacter;
1578
1579 const RegisterID character = regT0;
1580 const RegisterID countRegister = regT1;
1581
1582 if (m_decodeSurrogatePairs)
1583 op.m_jumps.append(jumpIfNoAvailableInput());
1584
1585 move(index, countRegister);
1586 Checked<unsigned> scaledMaxCount = term->quantityMaxCount;
1587 scaledMaxCount *= U_IS_BMP(ch) ? 1 : 2;
1588 sub32(Imm32(scaledMaxCount.unsafeGet()), countRegister);
1589
1590 Label loop(this);
1591 readCharacter(m_checkedOffset - term->inputPosition - scaledMaxCount, character, countRegister);
1592 // For case-insesitive compares, non-ascii characters that have different
1593 // upper & lower case representations are converted to a character class.
1594 ASSERT(!m_pattern.ignoreCase() || isASCIIAlpha(ch) || isCanonicallyUnique(ch, m_canonicalMode));
1595 if (m_pattern.ignoreCase() && isASCIIAlpha(ch)) {
1596 or32(TrustedImm32(0x20), character);
1597 ch |= 0x20;
1598 }
1599
1600 op.m_jumps.append(branch32(NotEqual, character, Imm32(ch)));
1601#ifdef JIT_UNICODE_EXPRESSIONS
1602 if (m_decodeSurrogatePairs && !U_IS_BMP(ch))
1603 add32(TrustedImm32(2), countRegister);
1604 else
1605#endif
1606 add32(TrustedImm32(1), countRegister);
1607 branch32(NotEqual, countRegister, index).linkTo(loop, this);
1608 }
1609 void backtrackPatternCharacterFixed(size_t opIndex)
1610 {
1611 backtrackTermDefault(opIndex);
1612 }
1613
1614 void generatePatternCharacterGreedy(size_t opIndex)
1615 {
1616 YarrOp& op = m_ops[opIndex];
1617 PatternTerm* term = op.m_term;
1618 UChar32 ch = term->patternCharacter;
1619
1620 const RegisterID character = regT0;
1621 const RegisterID countRegister = regT1;
1622
1623 move(TrustedImm32(0), countRegister);
1624
1625 // Unless have a 16 bit pattern character and an 8 bit string - short circuit
1626 if (!((ch > 0xff) && (m_charSize == Char8))) {
1627 JumpList failures;
1628 Label loop(this);
1629 failures.append(atEndOfInput());
1630 failures.append(jumpIfCharNotEquals(ch, m_checkedOffset - term->inputPosition, character));
1631
1632 add32(TrustedImm32(1), index);
1633#ifdef JIT_UNICODE_EXPRESSIONS
1634 if (m_decodeSurrogatePairs && !U_IS_BMP(ch)) {
1635 Jump surrogatePairOk = notAtEndOfInput();
1636 sub32(TrustedImm32(1), index);
1637 failures.append(jump());
1638 surrogatePairOk.link(this);
1639 add32(TrustedImm32(1), index);
1640 }
1641#endif
1642 add32(TrustedImm32(1), countRegister);
1643
1644 if (term->quantityMaxCount == quantifyInfinite)
1645 jump(loop);
1646 else
1647 branch32(NotEqual, countRegister, Imm32(term->quantityMaxCount.unsafeGet())).linkTo(loop, this);
1648
1649 failures.link(this);
1650 }
1651 op.m_reentry = label();
1652
1653 storeToFrame(countRegister, term->frameLocation + BackTrackInfoPatternCharacter::matchAmountIndex());
1654 }
1655 void backtrackPatternCharacterGreedy(size_t opIndex)
1656 {
1657 YarrOp& op = m_ops[opIndex];
1658 PatternTerm* term = op.m_term;
1659
1660 const RegisterID countRegister = regT1;
1661
1662 m_backtrackingState.link(this);
1663
1664 loadFromFrame(term->frameLocation + BackTrackInfoPatternCharacter::matchAmountIndex(), countRegister);
1665 m_backtrackingState.append(branchTest32(Zero, countRegister));
1666 sub32(TrustedImm32(1), countRegister);
1667 if (!m_decodeSurrogatePairs || U_IS_BMP(term->patternCharacter))
1668 sub32(TrustedImm32(1), index);
1669 else
1670 sub32(TrustedImm32(2), index);
1671 jump(op.m_reentry);
1672 }
1673
1674 void generatePatternCharacterNonGreedy(size_t opIndex)
1675 {
1676 YarrOp& op = m_ops[opIndex];
1677 PatternTerm* term = op.m_term;
1678
1679 const RegisterID countRegister = regT1;
1680
1681 move(TrustedImm32(0), countRegister);
1682 op.m_reentry = label();
1683 storeToFrame(countRegister, term->frameLocation + BackTrackInfoPatternCharacter::matchAmountIndex());
1684 }
1685 void backtrackPatternCharacterNonGreedy(size_t opIndex)
1686 {
1687 YarrOp& op = m_ops[opIndex];
1688 PatternTerm* term = op.m_term;
1689 UChar32 ch = term->patternCharacter;
1690
1691 const RegisterID character = regT0;
1692 const RegisterID countRegister = regT1;
1693
1694 m_backtrackingState.link(this);
1695
1696 loadFromFrame(term->frameLocation + BackTrackInfoPatternCharacter::matchAmountIndex(), countRegister);
1697
1698 // Unless have a 16 bit pattern character and an 8 bit string - short circuit
1699 if (!((ch > 0xff) && (m_charSize == Char8))) {
1700 JumpList nonGreedyFailures;
1701 nonGreedyFailures.append(atEndOfInput());
1702 if (term->quantityMaxCount != quantifyInfinite)
1703 nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityMaxCount.unsafeGet())));
1704 nonGreedyFailures.append(jumpIfCharNotEquals(ch, m_checkedOffset - term->inputPosition, character));
1705
1706 add32(TrustedImm32(1), index);
1707#ifdef JIT_UNICODE_EXPRESSIONS
1708 if (m_decodeSurrogatePairs && !U_IS_BMP(ch)) {
1709 Jump surrogatePairOk = notAtEndOfInput();
1710 sub32(TrustedImm32(1), index);
1711 nonGreedyFailures.append(jump());
1712 surrogatePairOk.link(this);
1713 add32(TrustedImm32(1), index);
1714 }
1715#endif
1716 add32(TrustedImm32(1), countRegister);
1717
1718 jump(op.m_reentry);
1719 nonGreedyFailures.link(this);
1720 }
1721
1722 if (m_decodeSurrogatePairs && !U_IS_BMP(ch)) {
1723 // subtract countRegister*2 for non-BMP characters
1724 lshift32(TrustedImm32(1), countRegister);
1725 }
1726
1727 sub32(countRegister, index);
1728 m_backtrackingState.fallthrough();
1729 }
1730
1731 void generateCharacterClassOnce(size_t opIndex)
1732 {
1733 YarrOp& op = m_ops[opIndex];
1734 PatternTerm* term = op.m_term;
1735
1736 const RegisterID character = regT0;
1737
1738 if (m_decodeSurrogatePairs) {
1739 op.m_jumps.append(jumpIfNoAvailableInput());
1740 storeToFrame(index, term->frameLocation + BackTrackInfoCharacterClass::beginIndex());
1741 }
1742
1743 JumpList matchDest;
1744 readCharacter(m_checkedOffset - term->inputPosition, character);
1745 // If we are matching the "any character" builtin class we only need to read the
1746 // character and don't need to match as it will always succeed.
1747 if (term->invert() || !term->characterClass->m_anyCharacter) {
1748 matchCharacterClass(character, matchDest, term->characterClass);
1749
1750 if (term->invert())
1751 op.m_jumps.append(matchDest);
1752 else {
1753 op.m_jumps.append(jump());
1754 matchDest.link(this);
1755 }
1756 }
1757#ifdef JIT_UNICODE_EXPRESSIONS
1758 if (m_decodeSurrogatePairs && (!term->characterClass->hasOneCharacterSize() || term->invert())) {
1759 Jump isBMPChar = branch32(LessThan, character, supplementaryPlanesBase);
1760 add32(TrustedImm32(1), index);
1761 isBMPChar.link(this);
1762 }
1763#endif
1764 }
1765 void backtrackCharacterClassOnce(size_t opIndex)
1766 {
1767#ifdef JIT_UNICODE_EXPRESSIONS
1768 if (m_decodeSurrogatePairs) {
1769 YarrOp& op = m_ops[opIndex];
1770 PatternTerm* term = op.m_term;
1771
1772 m_backtrackingState.link(this);
1773 loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::beginIndex(), index);
1774 m_backtrackingState.fallthrough();
1775 }
1776#endif
1777 backtrackTermDefault(opIndex);
1778 }
1779
1780 void generateCharacterClassFixed(size_t opIndex)
1781 {
1782 YarrOp& op = m_ops[opIndex];
1783 PatternTerm* term = op.m_term;
1784
1785 const RegisterID character = regT0;
1786 const RegisterID countRegister = regT1;
1787
1788 if (m_decodeSurrogatePairs)
1789 op.m_jumps.append(jumpIfNoAvailableInput());
1790
1791 move(index, countRegister);
1792
1793 Checked<unsigned> scaledMaxCount = term->quantityMaxCount;
1794
1795#ifdef JIT_UNICODE_EXPRESSIONS
1796 if (m_decodeSurrogatePairs && term->characterClass->hasOnlyNonBMPCharacters() && !term->invert())
1797 scaledMaxCount *= 2;
1798#endif
1799 sub32(Imm32(scaledMaxCount.unsafeGet()), countRegister);
1800
1801 Label loop(this);
1802 JumpList matchDest;
1803 readCharacter(m_checkedOffset - term->inputPosition - scaledMaxCount, character, countRegister);
1804 // If we are matching the "any character" builtin class we only need to read the
1805 // character and don't need to match as it will always succeed.
1806 if (term->invert() || !term->characterClass->m_anyCharacter) {
1807 matchCharacterClass(character, matchDest, term->characterClass);
1808
1809 if (term->invert())
1810 op.m_jumps.append(matchDest);
1811 else {
1812 op.m_jumps.append(jump());
1813 matchDest.link(this);
1814 }
1815 }
1816
1817#ifdef JIT_UNICODE_EXPRESSIONS
1818 if (m_decodeSurrogatePairs) {
1819 if (term->characterClass->hasOneCharacterSize() && !term->invert())
1820 add32(TrustedImm32(term->characterClass->hasNonBMPCharacters() ? 2 : 1), countRegister);
1821 else {
1822 add32(TrustedImm32(1), countRegister);
1823 Jump isBMPChar = branch32(LessThan, character, supplementaryPlanesBase);
1824 op.m_jumps.append(atEndOfInput());
1825 add32(TrustedImm32(1), countRegister);
1826 add32(TrustedImm32(1), index);
1827 isBMPChar.link(this);
1828 }
1829 } else
1830#endif
1831 add32(TrustedImm32(1), countRegister);
1832 branch32(NotEqual, countRegister, index).linkTo(loop, this);
1833 }
1834 void backtrackCharacterClassFixed(size_t opIndex)
1835 {
1836 backtrackTermDefault(opIndex);
1837 }
1838
1839 void generateCharacterClassGreedy(size_t opIndex)
1840 {
1841 YarrOp& op = m_ops[opIndex];
1842 PatternTerm* term = op.m_term;
1843
1844 const RegisterID character = regT0;
1845 const RegisterID countRegister = regT1;
1846
1847 if (m_decodeSurrogatePairs && (!term->characterClass->hasOneCharacterSize() || term->invert()))
1848 storeToFrame(index, term->frameLocation + BackTrackInfoCharacterClass::beginIndex());
1849 move(TrustedImm32(0), countRegister);
1850
1851 JumpList failures;
1852 Label loop(this);
1853#ifdef JIT_UNICODE_EXPRESSIONS
1854 if (term->characterClass->hasOneCharacterSize() && !term->invert() && term->characterClass->hasNonBMPCharacters()) {
1855 move(TrustedImm32(1), character);
1856 failures.append(checkNotEnoughInput(character));
1857 } else
1858#endif
1859 failures.append(atEndOfInput());
1860
1861 if (term->invert()) {
1862 readCharacter(m_checkedOffset - term->inputPosition, character);
1863 matchCharacterClass(character, failures, term->characterClass);
1864 } else {
1865 JumpList matchDest;
1866 readCharacter(m_checkedOffset - term->inputPosition, character);
1867 // If we are matching the "any character" builtin class for non-unicode patterns,
1868 // we only need to read the character and don't need to match as it will always succeed.
1869 if (!term->characterClass->m_anyCharacter) {
1870 matchCharacterClass(character, matchDest, term->characterClass);
1871 failures.append(jump());
1872 }
1873 matchDest.link(this);
1874 }
1875
1876#ifdef JIT_UNICODE_EXPRESSIONS
1877 if (m_decodeSurrogatePairs)
1878 advanceIndexAfterCharacterClassTermMatch(term, failures, character);
1879 else
1880#endif
1881 add32(TrustedImm32(1), index);
1882 add32(TrustedImm32(1), countRegister);
1883
1884 if (term->quantityMaxCount != quantifyInfinite) {
1885 branch32(NotEqual, countRegister, Imm32(term->quantityMaxCount.unsafeGet())).linkTo(loop, this);
1886 failures.append(jump());
1887 } else
1888 jump(loop);
1889
1890 failures.link(this);
1891 op.m_reentry = label();
1892
1893 storeToFrame(countRegister, term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex());
1894 }
1895 void backtrackCharacterClassGreedy(size_t opIndex)
1896 {
1897 YarrOp& op = m_ops[opIndex];
1898 PatternTerm* term = op.m_term;
1899
1900 const RegisterID countRegister = regT1;
1901
1902 m_backtrackingState.link(this);
1903
1904 loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex(), countRegister);
1905 m_backtrackingState.append(branchTest32(Zero, countRegister));
1906 sub32(TrustedImm32(1), countRegister);
1907 storeToFrame(countRegister, term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex());
1908
1909 if (!m_decodeSurrogatePairs)
1910 sub32(TrustedImm32(1), index);
1911 else if (term->characterClass->hasOneCharacterSize() && !term->invert())
1912 sub32(TrustedImm32(term->characterClass->hasNonBMPCharacters() ? 2 : 1), index);
1913 else {
1914 // Rematch one less
1915 const RegisterID character = regT0;
1916
1917 loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::beginIndex(), index);
1918
1919 Label rematchLoop(this);
1920 readCharacter(m_checkedOffset - term->inputPosition, character);
1921
1922 sub32(TrustedImm32(1), countRegister);
1923 add32(TrustedImm32(1), index);
1924
1925#ifdef JIT_UNICODE_EXPRESSIONS
1926 Jump isBMPChar = branch32(LessThan, character, supplementaryPlanesBase);
1927 add32(TrustedImm32(1), index);
1928 isBMPChar.link(this);
1929#endif
1930
1931 branchTest32(Zero, countRegister).linkTo(rematchLoop, this);
1932
1933 loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex(), countRegister);
1934 }
1935 jump(op.m_reentry);
1936 }
1937
1938 void generateCharacterClassNonGreedy(size_t opIndex)
1939 {
1940 YarrOp& op = m_ops[opIndex];
1941 PatternTerm* term = op.m_term;
1942
1943 const RegisterID countRegister = regT1;
1944
1945 move(TrustedImm32(0), countRegister);
1946 op.m_reentry = label();
1947
1948#ifdef JIT_UNICODE_EXPRESSIONS
1949 if (m_decodeSurrogatePairs) {
1950 if (!term->characterClass->hasOneCharacterSize() || term->invert())
1951 storeToFrame(index, term->frameLocation + BackTrackInfoCharacterClass::beginIndex());
1952 }
1953#endif
1954
1955 storeToFrame(countRegister, term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex());
1956 }
1957
1958 void backtrackCharacterClassNonGreedy(size_t opIndex)
1959 {
1960 YarrOp& op = m_ops[opIndex];
1961 PatternTerm* term = op.m_term;
1962
1963 const RegisterID character = regT0;
1964 const RegisterID countRegister = regT1;
1965
1966 JumpList nonGreedyFailures;
1967
1968 m_backtrackingState.link(this);
1969
1970#ifdef JIT_UNICODE_EXPRESSIONS
1971 if (m_decodeSurrogatePairs) {
1972 if (!term->characterClass->hasOneCharacterSize() || term->invert())
1973 loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::beginIndex(), index);
1974 }
1975#endif
1976
1977 loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex(), countRegister);
1978
1979 nonGreedyFailures.append(atEndOfInput());
1980 nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityMaxCount.unsafeGet())));
1981
1982 JumpList matchDest;
1983 readCharacter(m_checkedOffset - term->inputPosition, character);
1984 // If we are matching the "any character" builtin class for non-unicode patterns,
1985 // we only need to read the character and don't need to match as it will always succeed.
1986 if (term->invert() || !term->characterClass->m_anyCharacter) {
1987 matchCharacterClass(character, matchDest, term->characterClass);
1988
1989 if (term->invert())
1990 nonGreedyFailures.append(matchDest);
1991 else {
1992 nonGreedyFailures.append(jump());
1993 matchDest.link(this);
1994 }
1995 }
1996
1997#ifdef JIT_UNICODE_EXPRESSIONS
1998 if (m_decodeSurrogatePairs)
1999 advanceIndexAfterCharacterClassTermMatch(term, nonGreedyFailures, character);
2000 else
2001#endif
2002 add32(TrustedImm32(1), index);
2003 add32(TrustedImm32(1), countRegister);
2004
2005 jump(op.m_reentry);
2006
2007 nonGreedyFailures.link(this);
2008 sub32(countRegister, index);
2009 m_backtrackingState.fallthrough();
2010 }
2011
2012 void generateDotStarEnclosure(size_t opIndex)
2013 {
2014 YarrOp& op = m_ops[opIndex];
2015 PatternTerm* term = op.m_term;
2016
2017 const RegisterID character = regT0;
2018 const RegisterID matchPos = regT1;
2019#ifndef HAVE_INITIAL_START_REG
2020 const RegisterID initialStart = character;
2021#endif
2022
2023 JumpList foundBeginningNewLine;
2024 JumpList saveStartIndex;
2025 JumpList foundEndingNewLine;
2026
2027 if (m_pattern.dotAll()) {
2028 move(TrustedImm32(0), matchPos);
2029 setMatchStart(matchPos);
2030 move(length, index);
2031 return;
2032 }
2033
2034 ASSERT(!m_pattern.m_body->m_hasFixedSize);
2035 getMatchStart(matchPos);
2036
2037#ifndef HAVE_INITIAL_START_REG
2038 loadFromFrame(m_pattern.m_initialStartValueFrameLocation, initialStart);
2039#endif
2040 saveStartIndex.append(branch32(BelowOrEqual, matchPos, initialStart));
2041 Label findBOLLoop(this);
2042 sub32(TrustedImm32(1), matchPos);
2043 if (m_charSize == Char8)
2044 load8(BaseIndex(input, matchPos, TimesOne, 0), character);
2045 else
2046 load16(BaseIndex(input, matchPos, TimesTwo, 0), character);
2047 matchCharacterClass(character, foundBeginningNewLine, m_pattern.newlineCharacterClass());
2048
2049#ifndef HAVE_INITIAL_START_REG
2050 loadFromFrame(m_pattern.m_initialStartValueFrameLocation, initialStart);
2051#endif
2052 branch32(Above, matchPos, initialStart).linkTo(findBOLLoop, this);
2053 saveStartIndex.append(jump());
2054
2055 foundBeginningNewLine.link(this);
2056 add32(TrustedImm32(1), matchPos); // Advance past newline
2057 saveStartIndex.link(this);
2058
2059 if (!m_pattern.multiline() && term->anchors.bolAnchor)
2060 op.m_jumps.append(branchTest32(NonZero, matchPos));
2061
2062 ASSERT(!m_pattern.m_body->m_hasFixedSize);
2063 setMatchStart(matchPos);
2064
2065 move(index, matchPos);
2066
2067 Label findEOLLoop(this);
2068 foundEndingNewLine.append(branch32(Equal, matchPos, length));
2069 if (m_charSize == Char8)
2070 load8(BaseIndex(input, matchPos, TimesOne, 0), character);
2071 else
2072 load16(BaseIndex(input, matchPos, TimesTwo, 0), character);
2073 matchCharacterClass(character, foundEndingNewLine, m_pattern.newlineCharacterClass());
2074 add32(TrustedImm32(1), matchPos);
2075 jump(findEOLLoop);
2076
2077 foundEndingNewLine.link(this);
2078
2079 if (!m_pattern.multiline() && term->anchors.eolAnchor)
2080 op.m_jumps.append(branch32(NotEqual, matchPos, length));
2081
2082 move(matchPos, index);
2083 }
2084
2085 void backtrackDotStarEnclosure(size_t opIndex)
2086 {
2087 backtrackTermDefault(opIndex);
2088 }
2089
2090 // Code generation/backtracking for simple terms
2091 // (pattern characters, character classes, and assertions).
2092 // These methods farm out work to the set of functions above.
2093 void generateTerm(size_t opIndex)
2094 {
2095 YarrOp& op = m_ops[opIndex];
2096 PatternTerm* term = op.m_term;
2097
2098 switch (term->type) {
2099 case PatternTerm::TypePatternCharacter:
2100 switch (term->quantityType) {
2101 case QuantifierFixedCount:
2102 if (term->quantityMaxCount == 1)
2103 generatePatternCharacterOnce(opIndex);
2104 else
2105 generatePatternCharacterFixed(opIndex);
2106 break;
2107 case QuantifierGreedy:
2108 generatePatternCharacterGreedy(opIndex);
2109 break;
2110 case QuantifierNonGreedy:
2111 generatePatternCharacterNonGreedy(opIndex);
2112 break;
2113 }
2114 break;
2115
2116 case PatternTerm::TypeCharacterClass:
2117 switch (term->quantityType) {
2118 case QuantifierFixedCount:
2119 if (term->quantityMaxCount == 1)
2120 generateCharacterClassOnce(opIndex);
2121 else
2122 generateCharacterClassFixed(opIndex);
2123 break;
2124 case QuantifierGreedy:
2125 generateCharacterClassGreedy(opIndex);
2126 break;
2127 case QuantifierNonGreedy:
2128 generateCharacterClassNonGreedy(opIndex);
2129 break;
2130 }
2131 break;
2132
2133 case PatternTerm::TypeAssertionBOL:
2134 generateAssertionBOL(opIndex);
2135 break;
2136
2137 case PatternTerm::TypeAssertionEOL:
2138 generateAssertionEOL(opIndex);
2139 break;
2140
2141 case PatternTerm::TypeAssertionWordBoundary:
2142 generateAssertionWordBoundary(opIndex);
2143 break;
2144
2145 case PatternTerm::TypeForwardReference:
2146 m_failureReason = JITFailureReason::ForwardReference;
2147 break;
2148
2149 case PatternTerm::TypeParenthesesSubpattern:
2150 case PatternTerm::TypeParentheticalAssertion:
2151 RELEASE_ASSERT_NOT_REACHED();
2152
2153 case PatternTerm::TypeBackReference:
2154#if ENABLE(YARR_JIT_BACKREFERENCES)
2155 generateBackReference(opIndex);
2156#else
2157 m_failureReason = JITFailureReason::BackReference;
2158#endif
2159 break;
2160 case PatternTerm::TypeDotStarEnclosure:
2161 generateDotStarEnclosure(opIndex);
2162 break;
2163 }
2164 }
2165 void backtrackTerm(size_t opIndex)
2166 {
2167 YarrOp& op = m_ops[opIndex];
2168 PatternTerm* term = op.m_term;
2169
2170 switch (term->type) {
2171 case PatternTerm::TypePatternCharacter:
2172 switch (term->quantityType) {
2173 case QuantifierFixedCount:
2174 if (term->quantityMaxCount == 1)
2175 backtrackPatternCharacterOnce(opIndex);
2176 else
2177 backtrackPatternCharacterFixed(opIndex);
2178 break;
2179 case QuantifierGreedy:
2180 backtrackPatternCharacterGreedy(opIndex);
2181 break;
2182 case QuantifierNonGreedy:
2183 backtrackPatternCharacterNonGreedy(opIndex);
2184 break;
2185 }
2186 break;
2187
2188 case PatternTerm::TypeCharacterClass:
2189 switch (term->quantityType) {
2190 case QuantifierFixedCount:
2191 if (term->quantityMaxCount == 1)
2192 backtrackCharacterClassOnce(opIndex);
2193 else
2194 backtrackCharacterClassFixed(opIndex);
2195 break;
2196 case QuantifierGreedy:
2197 backtrackCharacterClassGreedy(opIndex);
2198 break;
2199 case QuantifierNonGreedy:
2200 backtrackCharacterClassNonGreedy(opIndex);
2201 break;
2202 }
2203 break;
2204
2205 case PatternTerm::TypeAssertionBOL:
2206 backtrackAssertionBOL(opIndex);
2207 break;
2208
2209 case PatternTerm::TypeAssertionEOL:
2210 backtrackAssertionEOL(opIndex);
2211 break;
2212
2213 case PatternTerm::TypeAssertionWordBoundary:
2214 backtrackAssertionWordBoundary(opIndex);
2215 break;
2216
2217 case PatternTerm::TypeForwardReference:
2218 m_failureReason = JITFailureReason::ForwardReference;
2219 break;
2220
2221 case PatternTerm::TypeParenthesesSubpattern:
2222 case PatternTerm::TypeParentheticalAssertion:
2223 RELEASE_ASSERT_NOT_REACHED();
2224
2225 case PatternTerm::TypeBackReference:
2226#if ENABLE(YARR_JIT_BACKREFERENCES)
2227 backtrackBackReference(opIndex);
2228#else
2229 m_failureReason = JITFailureReason::BackReference;
2230#endif
2231 break;
2232
2233 case PatternTerm::TypeDotStarEnclosure:
2234 backtrackDotStarEnclosure(opIndex);
2235 break;
2236 }
2237 }
2238
2239 void generate()
2240 {
2241 // Forwards generate the matching code.
2242 ASSERT(m_ops.size());
2243 size_t opIndex = 0;
2244
2245 do {
2246 if (m_disassembler)
2247 m_disassembler->setForGenerate(opIndex, label());
2248
2249 YarrOp& op = m_ops[opIndex];
2250 switch (op.m_op) {
2251
2252 case OpTerm:
2253 generateTerm(opIndex);
2254 break;
2255
2256 // OpBodyAlternativeBegin/Next/End
2257 //
2258 // These nodes wrap the set of alternatives in the body of the regular expression.
2259 // There may be either one or two chains of OpBodyAlternative nodes, one representing
2260 // the 'once through' sequence of alternatives (if any exist), and one representing
2261 // the repeating alternatives (again, if any exist).
2262 //
2263 // Upon normal entry to the Begin alternative, we will check that input is available.
2264 // Reentry to the Begin alternative will take place after the check has taken place,
2265 // and will assume that the input position has already been progressed as appropriate.
2266 //
2267 // Entry to subsequent Next/End alternatives occurs when the prior alternative has
2268 // successfully completed a match - return a success state from JIT code.
2269 //
2270 // Next alternatives allow for reentry optimized to suit backtracking from its
2271 // preceding alternative. It expects the input position to still be set to a position
2272 // appropriate to its predecessor, and it will only perform an input check if the
2273 // predecessor had a minimum size less than its own.
2274 //
2275 // In the case 'once through' expressions, the End node will also have a reentry
2276 // point to jump to when the last alternative fails. Again, this expects the input
2277 // position to still reflect that expected by the prior alternative.
2278 case OpBodyAlternativeBegin: {
2279 PatternAlternative* alternative = op.m_alternative;
2280
2281 // Upon entry at the head of the set of alternatives, check if input is available
2282 // to run the first alternative. (This progresses the input position).
2283 op.m_jumps.append(jumpIfNoAvailableInput(alternative->m_minimumSize));
2284 // We will reenter after the check, and assume the input position to have been
2285 // set as appropriate to this alternative.
2286 op.m_reentry = label();
2287
2288 m_checkedOffset += alternative->m_minimumSize;
2289 break;
2290 }
2291 case OpBodyAlternativeNext:
2292 case OpBodyAlternativeEnd: {
2293 PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
2294 PatternAlternative* alternative = op.m_alternative;
2295
2296 // If we get here, the prior alternative matched - return success.
2297
2298 // Adjust the stack pointer to remove the pattern's frame.
2299 removeCallFrame();
2300
2301 // Load appropriate values into the return register and the first output
2302 // slot, and return. In the case of pattern with a fixed size, we will
2303 // not have yet set the value in the first
2304 ASSERT(index != returnRegister);
2305 if (m_pattern.m_body->m_hasFixedSize) {
2306 move(index, returnRegister);
2307 if (priorAlternative->m_minimumSize)
2308 sub32(Imm32(priorAlternative->m_minimumSize), returnRegister);
2309 if (compileMode == IncludeSubpatterns)
2310 store32(returnRegister, output);
2311 } else
2312 getMatchStart(returnRegister);
2313 if (compileMode == IncludeSubpatterns)
2314 store32(index, Address(output, 4));
2315 move(index, returnRegister2);
2316
2317 generateReturn();
2318
2319 // This is the divide between the tail of the prior alternative, above, and
2320 // the head of the subsequent alternative, below.
2321
2322 if (op.m_op == OpBodyAlternativeNext) {
2323 // This is the reentry point for the Next alternative. We expect any code
2324 // that jumps here to do so with the input position matching that of the
2325 // PRIOR alteranative, and we will only check input availability if we
2326 // need to progress it forwards.
2327 op.m_reentry = label();
2328 if (alternative->m_minimumSize > priorAlternative->m_minimumSize) {
2329 add32(Imm32(alternative->m_minimumSize - priorAlternative->m_minimumSize), index);
2330 op.m_jumps.append(jumpIfNoAvailableInput());
2331 } else if (priorAlternative->m_minimumSize > alternative->m_minimumSize)
2332 sub32(Imm32(priorAlternative->m_minimumSize - alternative->m_minimumSize), index);
2333 } else if (op.m_nextOp == notFound) {
2334 // This is the reentry point for the End of 'once through' alternatives,
2335 // jumped to when the last alternative fails to match.
2336 op.m_reentry = label();
2337 sub32(Imm32(priorAlternative->m_minimumSize), index);
2338 }
2339
2340 if (op.m_op == OpBodyAlternativeNext)
2341 m_checkedOffset += alternative->m_minimumSize;
2342 m_checkedOffset -= priorAlternative->m_minimumSize;
2343 break;
2344 }
2345
2346 // OpSimpleNestedAlternativeBegin/Next/End
2347 // OpNestedAlternativeBegin/Next/End
2348 //
2349 // These nodes are used to handle sets of alternatives that are nested within
2350 // subpatterns and parenthetical assertions. The 'simple' forms are used where
2351 // we do not need to be able to backtrack back into any alternative other than
2352 // the last, the normal forms allow backtracking into any alternative.
2353 //
2354 // Each Begin/Next node is responsible for planting an input check to ensure
2355 // sufficient input is available on entry. Next nodes additionally need to
2356 // jump to the end - Next nodes use the End node's m_jumps list to hold this
2357 // set of jumps.
2358 //
2359 // In the non-simple forms, successful alternative matches must store a
2360 // 'return address' using a DataLabelPtr, used to store the address to jump
2361 // to when backtracking, to get to the code for the appropriate alternative.
2362 case OpSimpleNestedAlternativeBegin:
2363 case OpNestedAlternativeBegin: {
2364 PatternTerm* term = op.m_term;
2365 PatternAlternative* alternative = op.m_alternative;
2366 PatternDisjunction* disjunction = term->parentheses.disjunction;
2367
2368 // Calculate how much input we need to check for, and if non-zero check.
2369 op.m_checkAdjust = Checked<unsigned>(alternative->m_minimumSize);
2370 if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion))
2371 op.m_checkAdjust -= disjunction->m_minimumSize;
2372 if (op.m_checkAdjust)
2373 op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust.unsafeGet()));
2374
2375 m_checkedOffset += op.m_checkAdjust;
2376 break;
2377 }
2378 case OpSimpleNestedAlternativeNext:
2379 case OpNestedAlternativeNext: {
2380 PatternTerm* term = op.m_term;
2381 PatternAlternative* alternative = op.m_alternative;
2382 PatternDisjunction* disjunction = term->parentheses.disjunction;
2383
2384 // In the non-simple case, store a 'return address' so we can backtrack correctly.
2385 if (op.m_op == OpNestedAlternativeNext) {
2386 unsigned parenthesesFrameLocation = term->frameLocation;
2387 op.m_returnAddress = storeToFrameWithPatch(parenthesesFrameLocation + BackTrackInfoParentheses::returnAddressIndex());
2388 }
2389
2390 if (term->quantityType != QuantifierFixedCount && !m_ops[op.m_previousOp].m_alternative->m_minimumSize) {
2391 // If the previous alternative matched without consuming characters then
2392 // backtrack to try to match while consumming some input.
2393 op.m_zeroLengthMatch = branch32(Equal, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
2394 }
2395
2396 // If we reach here then the last alternative has matched - jump to the
2397 // End node, to skip over any further alternatives.
2398 //
2399 // FIXME: this is logically O(N^2) (though N can be expected to be very
2400 // small). We could avoid this either by adding an extra jump to the JIT
2401 // data structures, or by making backtracking code that jumps to Next
2402 // alternatives are responsible for checking that input is available (if
2403 // we didn't need to plant the input checks, then m_jumps would be free).
2404 YarrOp* endOp = &m_ops[op.m_nextOp];
2405 while (endOp->m_nextOp != notFound) {
2406 ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext || endOp->m_op == OpNestedAlternativeNext);
2407 endOp = &m_ops[endOp->m_nextOp];
2408 }
2409 ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd || endOp->m_op == OpNestedAlternativeEnd);
2410 endOp->m_jumps.append(jump());
2411
2412 // This is the entry point for the next alternative.
2413 op.m_reentry = label();
2414
2415 // Calculate how much input we need to check for, and if non-zero check.
2416 op.m_checkAdjust = alternative->m_minimumSize;
2417 if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion))
2418 op.m_checkAdjust -= disjunction->m_minimumSize;
2419 if (op.m_checkAdjust)
2420 op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust.unsafeGet()));
2421
2422 YarrOp& lastOp = m_ops[op.m_previousOp];
2423 m_checkedOffset -= lastOp.m_checkAdjust;
2424 m_checkedOffset += op.m_checkAdjust;
2425 break;
2426 }
2427 case OpSimpleNestedAlternativeEnd:
2428 case OpNestedAlternativeEnd: {
2429 PatternTerm* term = op.m_term;
2430
2431 // In the non-simple case, store a 'return address' so we can backtrack correctly.
2432 if (op.m_op == OpNestedAlternativeEnd) {
2433 unsigned parenthesesFrameLocation = term->frameLocation;
2434 op.m_returnAddress = storeToFrameWithPatch(parenthesesFrameLocation + BackTrackInfoParentheses::returnAddressIndex());
2435 }
2436
2437 if (term->quantityType != QuantifierFixedCount && !m_ops[op.m_previousOp].m_alternative->m_minimumSize) {
2438 // If the previous alternative matched without consuming characters then
2439 // backtrack to try to match while consumming some input.
2440 op.m_zeroLengthMatch = branch32(Equal, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
2441 }
2442
2443 // If this set of alternatives contains more than one alternative,
2444 // then the Next nodes will have planted jumps to the End, and added
2445 // them to this node's m_jumps list.
2446 op.m_jumps.link(this);
2447 op.m_jumps.clear();
2448
2449 YarrOp& lastOp = m_ops[op.m_previousOp];
2450 m_checkedOffset -= lastOp.m_checkAdjust;
2451 break;
2452 }
2453
2454 // OpParenthesesSubpatternOnceBegin/End
2455 //
2456 // These nodes support (optionally) capturing subpatterns, that have a
2457 // quantity count of 1 (this covers fixed once, and ?/?? quantifiers).
2458 case OpParenthesesSubpatternOnceBegin: {
2459 PatternTerm* term = op.m_term;
2460 unsigned parenthesesFrameLocation = term->frameLocation;
2461 const RegisterID indexTemporary = regT0;
2462 ASSERT(term->quantityMaxCount == 1);
2463
2464 // Upon entry to a Greedy quantified set of parenthese store the index.
2465 // We'll use this for two purposes:
2466 // - To indicate which iteration we are on of mathing the remainder of
2467 // the expression after the parentheses - the first, including the
2468 // match within the parentheses, or the second having skipped over them.
2469 // - To check for empty matches, which must be rejected.
2470 //
2471 // At the head of a NonGreedy set of parentheses we'll immediately set the
2472 // value on the stack to -1 (indicating a match skipping the subpattern),
2473 // and plant a jump to the end. We'll also plant a label to backtrack to
2474 // to reenter the subpattern later, with a store to set up index on the
2475 // second iteration.
2476 //
2477 // FIXME: for capturing parens, could use the index in the capture array?
2478 if (term->quantityType == QuantifierGreedy)
2479 storeToFrame(index, parenthesesFrameLocation + BackTrackInfoParenthesesOnce::beginIndex());
2480 else if (term->quantityType == QuantifierNonGreedy) {
2481 storeToFrame(TrustedImm32(-1), parenthesesFrameLocation + BackTrackInfoParenthesesOnce::beginIndex());
2482 op.m_jumps.append(jump());
2483 op.m_reentry = label();
2484 storeToFrame(index, parenthesesFrameLocation + BackTrackInfoParenthesesOnce::beginIndex());
2485 }
2486
2487 // If the parenthese are capturing, store the starting index value to the
2488 // captures array, offsetting as necessary.
2489 //
2490 // FIXME: could avoid offsetting this value in JIT code, apply
2491 // offsets only afterwards, at the point the results array is
2492 // being accessed.
2493 if (term->capture() && compileMode == IncludeSubpatterns) {
2494 unsigned inputOffset = (m_checkedOffset - term->inputPosition).unsafeGet();
2495 if (term->quantityType == QuantifierFixedCount)
2496 inputOffset += term->parentheses.disjunction->m_minimumSize;
2497 if (inputOffset) {
2498 move(index, indexTemporary);
2499 sub32(Imm32(inputOffset), indexTemporary);
2500 setSubpatternStart(indexTemporary, term->parentheses.subpatternId);
2501 } else
2502 setSubpatternStart(index, term->parentheses.subpatternId);
2503 }
2504 break;
2505 }
2506 case OpParenthesesSubpatternOnceEnd: {
2507 PatternTerm* term = op.m_term;
2508 const RegisterID indexTemporary = regT0;
2509 ASSERT(term->quantityMaxCount == 1);
2510
2511 // Runtime ASSERT to make sure that the nested alternative handled the
2512 // "no input consumed" check.
2513 if (!ASSERT_DISABLED && term->quantityType != QuantifierFixedCount && !term->parentheses.disjunction->m_minimumSize) {
2514 Jump pastBreakpoint;
2515 pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
2516 abortWithReason(YARRNoInputConsumed);
2517 pastBreakpoint.link(this);
2518 }
2519
2520 // If the parenthese are capturing, store the ending index value to the
2521 // captures array, offsetting as necessary.
2522 //
2523 // FIXME: could avoid offsetting this value in JIT code, apply
2524 // offsets only afterwards, at the point the results array is
2525 // being accessed.
2526 if (term->capture() && compileMode == IncludeSubpatterns) {
2527 unsigned inputOffset = (m_checkedOffset - term->inputPosition).unsafeGet();
2528 if (inputOffset) {
2529 move(index, indexTemporary);
2530 sub32(Imm32(inputOffset), indexTemporary);
2531 setSubpatternEnd(indexTemporary, term->parentheses.subpatternId);
2532 } else
2533 setSubpatternEnd(index, term->parentheses.subpatternId);
2534 }
2535
2536 // If the parentheses are quantified Greedy then add a label to jump back
2537 // to if we get a failed match from after the parentheses. For NonGreedy
2538 // parentheses, link the jump from before the subpattern to here.
2539 if (term->quantityType == QuantifierGreedy)
2540 op.m_reentry = label();
2541 else if (term->quantityType == QuantifierNonGreedy) {
2542 YarrOp& beginOp = m_ops[op.m_previousOp];
2543 beginOp.m_jumps.link(this);
2544 }
2545 break;
2546 }
2547
2548 // OpParenthesesSubpatternTerminalBegin/End
2549 case OpParenthesesSubpatternTerminalBegin: {
2550 PatternTerm* term = op.m_term;
2551 ASSERT(term->quantityType == QuantifierGreedy);
2552 ASSERT(term->quantityMaxCount == quantifyInfinite);
2553 ASSERT(!term->capture());
2554
2555 // Upon entry set a label to loop back to.
2556 op.m_reentry = label();
2557
2558 // Store the start index of the current match; we need to reject zero
2559 // length matches.
2560 storeToFrame(index, term->frameLocation + BackTrackInfoParenthesesTerminal::beginIndex());
2561 break;
2562 }
2563 case OpParenthesesSubpatternTerminalEnd: {
2564 YarrOp& beginOp = m_ops[op.m_previousOp];
2565 if (!ASSERT_DISABLED) {
2566 PatternTerm* term = op.m_term;
2567
2568 // Runtime ASSERT to make sure that the nested alternative handled the
2569 // "no input consumed" check.
2570 Jump pastBreakpoint;
2571 pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
2572 abortWithReason(YARRNoInputConsumed);
2573 pastBreakpoint.link(this);
2574 }
2575
2576 // We know that the match is non-zero, we can accept it and
2577 // loop back up to the head of the subpattern.
2578 jump(beginOp.m_reentry);
2579
2580 // This is the entry point to jump to when we stop matching - we will
2581 // do so once the subpattern cannot match any more.
2582 op.m_reentry = label();
2583 break;
2584 }
2585
2586 // OpParenthesesSubpatternBegin/End
2587 //
2588 // These nodes support generic subpatterns.
2589 case OpParenthesesSubpatternBegin: {
2590#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
2591 PatternTerm* term = op.m_term;
2592 unsigned parenthesesFrameLocation = term->frameLocation;
2593
2594 // Upon entry to a Greedy quantified set of parenthese store the index.
2595 // We'll use this for two purposes:
2596 // - To indicate which iteration we are on of mathing the remainder of
2597 // the expression after the parentheses - the first, including the
2598 // match within the parentheses, or the second having skipped over them.
2599 // - To check for empty matches, which must be rejected.
2600 //
2601 // At the head of a NonGreedy set of parentheses we'll immediately set 'begin'
2602 // in the backtrack info to -1 (indicating a match skipping the subpattern),
2603 // and plant a jump to the end. We'll also plant a label to backtrack to
2604 // to reenter the subpattern later, with a store to set 'begin' to current index
2605 // on the second iteration.
2606 //
2607 // FIXME: for capturing parens, could use the index in the capture array?
2608 if (term->quantityType == QuantifierGreedy || term->quantityType == QuantifierNonGreedy) {
2609 storeToFrame(TrustedImm32(0), parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex());
2610 storeToFrame(TrustedImmPtr(nullptr), parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex());
2611
2612 if (term->quantityType == QuantifierNonGreedy) {
2613 storeToFrame(TrustedImm32(-1), parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex());
2614 op.m_jumps.append(jump());
2615 }
2616
2617 op.m_reentry = label();
2618 RegisterID currParenContextReg = regT0;
2619 RegisterID newParenContextReg = regT1;
2620
2621 loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex(), currParenContextReg);
2622 allocateParenContext(newParenContextReg);
2623 storePtr(currParenContextReg, newParenContextReg);
2624 storeToFrame(newParenContextReg, parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex());
2625 saveParenContext(newParenContextReg, regT2, term->parentheses.subpatternId, term->parentheses.lastSubpatternId, parenthesesFrameLocation);
2626 storeToFrame(index, parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex());
2627 }
2628
2629 // If the parenthese are capturing, store the starting index value to the
2630 // captures array, offsetting as necessary.
2631 //
2632 // FIXME: could avoid offsetting this value in JIT code, apply
2633 // offsets only afterwards, at the point the results array is
2634 // being accessed.
2635 if (term->capture() && compileMode == IncludeSubpatterns) {
2636 const RegisterID indexTemporary = regT0;
2637 unsigned inputOffset = (m_checkedOffset - term->inputPosition).unsafeGet();
2638 if (term->quantityType == QuantifierFixedCount)
2639 inputOffset += term->parentheses.disjunction->m_minimumSize;
2640 if (inputOffset) {
2641 move(index, indexTemporary);
2642 sub32(Imm32(inputOffset), indexTemporary);
2643 setSubpatternStart(indexTemporary, term->parentheses.subpatternId);
2644 } else
2645 setSubpatternStart(index, term->parentheses.subpatternId);
2646 }
2647#else // !YARR_JIT_ALL_PARENS_EXPRESSIONS
2648 RELEASE_ASSERT_NOT_REACHED();
2649#endif
2650 break;
2651 }
2652 case OpParenthesesSubpatternEnd: {
2653#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
2654 PatternTerm* term = op.m_term;
2655 unsigned parenthesesFrameLocation = term->frameLocation;
2656
2657 // Runtime ASSERT to make sure that the nested alternative handled the
2658 // "no input consumed" check.
2659 if (!ASSERT_DISABLED && term->quantityType != QuantifierFixedCount && !term->parentheses.disjunction->m_minimumSize) {
2660 Jump pastBreakpoint;
2661 pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, parenthesesFrameLocation * sizeof(void*)));
2662 abortWithReason(YARRNoInputConsumed);
2663 pastBreakpoint.link(this);
2664 }
2665
2666 const RegisterID countTemporary = regT1;
2667
2668 YarrOp& beginOp = m_ops[op.m_previousOp];
2669 loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex(), countTemporary);
2670 add32(TrustedImm32(1), countTemporary);
2671 storeToFrame(countTemporary, parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex());
2672
2673 // If the parenthese are capturing, store the ending index value to the
2674 // captures array, offsetting as necessary.
2675 //
2676 // FIXME: could avoid offsetting this value in JIT code, apply
2677 // offsets only afterwards, at the point the results array is
2678 // being accessed.
2679 if (term->capture() && compileMode == IncludeSubpatterns) {
2680 const RegisterID indexTemporary = regT0;
2681
2682 unsigned inputOffset = (m_checkedOffset - term->inputPosition).unsafeGet();
2683 if (inputOffset) {
2684 move(index, indexTemporary);
2685 sub32(Imm32(inputOffset), indexTemporary);
2686 setSubpatternEnd(indexTemporary, term->parentheses.subpatternId);
2687 } else
2688 setSubpatternEnd(index, term->parentheses.subpatternId);
2689 }
2690
2691 // If the parentheses are quantified Greedy then add a label to jump back
2692 // to if we get a failed match from after the parentheses. For NonGreedy
2693 // parentheses, link the jump from before the subpattern to here.
2694 if (term->quantityType == QuantifierGreedy) {
2695 if (term->quantityMaxCount != quantifyInfinite)
2696 branch32(Below, countTemporary, Imm32(term->quantityMaxCount.unsafeGet())).linkTo(beginOp.m_reentry, this);
2697 else
2698 jump(beginOp.m_reentry);
2699
2700 op.m_reentry = label();
2701 } else if (term->quantityType == QuantifierNonGreedy) {
2702 YarrOp& beginOp = m_ops[op.m_previousOp];
2703 beginOp.m_jumps.link(this);
2704 op.m_reentry = label();
2705 }
2706#else // !YARR_JIT_ALL_PARENS_EXPRESSIONS
2707 RELEASE_ASSERT_NOT_REACHED();
2708#endif
2709 break;
2710 }
2711
2712 // OpParentheticalAssertionBegin/End
2713 case OpParentheticalAssertionBegin: {
2714 PatternTerm* term = op.m_term;
2715
2716 // Store the current index - assertions should not update index, so
2717 // we will need to restore it upon a successful match.
2718 unsigned parenthesesFrameLocation = term->frameLocation;
2719 storeToFrame(index, parenthesesFrameLocation + BackTrackInfoParentheticalAssertion::beginIndex());
2720
2721 // Check
2722 op.m_checkAdjust = m_checkedOffset - term->inputPosition;
2723 if (op.m_checkAdjust)
2724 sub32(Imm32(op.m_checkAdjust.unsafeGet()), index);
2725
2726 m_checkedOffset -= op.m_checkAdjust;
2727 break;
2728 }
2729 case OpParentheticalAssertionEnd: {
2730 PatternTerm* term = op.m_term;
2731
2732 // Restore the input index value.
2733 unsigned parenthesesFrameLocation = term->frameLocation;
2734 loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheticalAssertion::beginIndex(), index);
2735
2736 // If inverted, a successful match of the assertion must be treated
2737 // as a failure, so jump to backtracking.
2738 if (term->invert()) {
2739 op.m_jumps.append(jump());
2740 op.m_reentry = label();
2741 }
2742
2743 YarrOp& lastOp = m_ops[op.m_previousOp];
2744 m_checkedOffset += lastOp.m_checkAdjust;
2745 break;
2746 }
2747
2748 case OpMatchFailed:
2749 removeCallFrame();
2750 generateFailReturn();
2751 break;
2752 }
2753
2754 ++opIndex;
2755 } while (opIndex < m_ops.size());
2756 }
2757
2758 void backtrack()
2759 {
2760 // Backwards generate the backtracking code.
2761 size_t opIndex = m_ops.size();
2762 ASSERT(opIndex);
2763
2764 do {
2765 --opIndex;
2766
2767 if (m_disassembler)
2768 m_disassembler->setForBacktrack(opIndex, label());
2769
2770 YarrOp& op = m_ops[opIndex];
2771 switch (op.m_op) {
2772
2773 case OpTerm:
2774 backtrackTerm(opIndex);
2775 break;
2776
2777 // OpBodyAlternativeBegin/Next/End
2778 //
2779 // For each Begin/Next node representing an alternative, we need to decide what to do
2780 // in two circumstances:
2781 // - If we backtrack back into this node, from within the alternative.
2782 // - If the input check at the head of the alternative fails (if this exists).
2783 //
2784 // We treat these two cases differently since in the former case we have slightly
2785 // more information - since we are backtracking out of a prior alternative we know
2786 // that at least enough input was available to run it. For example, given the regular
2787 // expression /a|b/, if we backtrack out of the first alternative (a failed pattern
2788 // character match of 'a'), then we need not perform an additional input availability
2789 // check before running the second alternative.
2790 //
2791 // Backtracking required differs for the last alternative, which in the case of the
2792 // repeating set of alternatives must loop. The code generated for the last alternative
2793 // will also be used to handle all input check failures from any prior alternatives -
2794 // these require similar functionality, in seeking the next available alternative for
2795 // which there is sufficient input.
2796 //
2797 // Since backtracking of all other alternatives simply requires us to link backtracks
2798 // to the reentry point for the subsequent alternative, we will only be generating any
2799 // code when backtracking the last alternative.
2800 case OpBodyAlternativeBegin:
2801 case OpBodyAlternativeNext: {
2802 PatternAlternative* alternative = op.m_alternative;
2803
2804 if (op.m_op == OpBodyAlternativeNext) {
2805 PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
2806 m_checkedOffset += priorAlternative->m_minimumSize;
2807 }
2808 m_checkedOffset -= alternative->m_minimumSize;
2809
2810 // Is this the last alternative? If not, then if we backtrack to this point we just
2811 // need to jump to try to match the next alternative.
2812 if (m_ops[op.m_nextOp].m_op != OpBodyAlternativeEnd) {
2813 m_backtrackingState.linkTo(m_ops[op.m_nextOp].m_reentry, this);
2814 break;
2815 }
2816 YarrOp& endOp = m_ops[op.m_nextOp];
2817
2818 YarrOp* beginOp = &op;
2819 while (beginOp->m_op != OpBodyAlternativeBegin) {
2820 ASSERT(beginOp->m_op == OpBodyAlternativeNext);
2821 beginOp = &m_ops[beginOp->m_previousOp];
2822 }
2823
2824 bool onceThrough = endOp.m_nextOp == notFound;
2825
2826 JumpList lastStickyAlternativeFailures;
2827
2828 // First, generate code to handle cases where we backtrack out of an attempted match
2829 // of the last alternative. If this is a 'once through' set of alternatives then we
2830 // have nothing to do - link this straight through to the End.
2831 if (onceThrough)
2832 m_backtrackingState.linkTo(endOp.m_reentry, this);
2833 else {
2834 // If we don't need to move the input poistion, and the pattern has a fixed size
2835 // (in which case we omit the store of the start index until the pattern has matched)
2836 // then we can just link the backtrack out of the last alternative straight to the
2837 // head of the first alternative.
2838 if (m_pattern.m_body->m_hasFixedSize
2839 && (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize)
2840 && (alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize == 1))
2841 m_backtrackingState.linkTo(beginOp->m_reentry, this);
2842 else if (m_pattern.sticky() && m_ops[op.m_nextOp].m_op == OpBodyAlternativeEnd) {
2843 // It is a sticky pattern and the last alternative failed, jump to the end.
2844 m_backtrackingState.takeBacktracksToJumpList(lastStickyAlternativeFailures, this);
2845 } else {
2846 // We need to generate a trampoline of code to execute before looping back
2847 // around to the first alternative.
2848 m_backtrackingState.link(this);
2849
2850 // No need to advance and retry for a sticky pattern.
2851 if (!m_pattern.sticky()) {
2852 // If the pattern size is not fixed, then store the start index for use if we match.
2853 if (!m_pattern.m_body->m_hasFixedSize) {
2854 if (alternative->m_minimumSize == 1)
2855 setMatchStart(index);
2856 else {
2857 move(index, regT0);
2858 if (alternative->m_minimumSize)
2859 sub32(Imm32(alternative->m_minimumSize - 1), regT0);
2860 else
2861 add32(TrustedImm32(1), regT0);
2862 setMatchStart(regT0);
2863 }
2864 }
2865
2866 // Generate code to loop. Check whether the last alternative is longer than the
2867 // first (e.g. /a|xy/ or /a|xyz/).
2868 if (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize) {
2869 // We want to loop, and increment input position. If the delta is 1, it is
2870 // already correctly incremented, if more than one then decrement as appropriate.
2871 unsigned delta = alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize;
2872 ASSERT(delta);
2873 if (delta != 1)
2874 sub32(Imm32(delta - 1), index);
2875 jump(beginOp->m_reentry);
2876 } else {
2877 // If the first alternative has minimum size 0xFFFFFFFFu, then there cannot
2878 // be sufficent input available to handle this, so just fall through.
2879 unsigned delta = beginOp->m_alternative->m_minimumSize - alternative->m_minimumSize;
2880 if (delta != 0xFFFFFFFFu) {
2881 // We need to check input because we are incrementing the input.
2882 add32(Imm32(delta + 1), index);
2883 checkInput().linkTo(beginOp->m_reentry, this);
2884 }
2885 }
2886 }
2887 }
2888 }
2889
2890 // We can reach this point in the code in two ways:
2891 // - Fallthrough from the code above (a repeating alternative backtracked out of its
2892 // last alternative, and did not have sufficent input to run the first).
2893 // - We will loop back up to the following label when a repeating alternative loops,
2894 // following a failed input check.
2895 //
2896 // Either way, we have just failed the input check for the first alternative.
2897 Label firstInputCheckFailed(this);
2898
2899 // Generate code to handle input check failures from alternatives except the last.
2900 // prevOp is the alternative we're handling a bail out from (initially Begin), and
2901 // nextOp is the alternative we will be attempting to reenter into.
2902 //
2903 // We will link input check failures from the forwards matching path back to the code
2904 // that can handle them.
2905 YarrOp* prevOp = beginOp;
2906 YarrOp* nextOp = &m_ops[beginOp->m_nextOp];
2907 while (nextOp->m_op != OpBodyAlternativeEnd) {
2908 prevOp->m_jumps.link(this);
2909
2910 // We only get here if an input check fails, it is only worth checking again
2911 // if the next alternative has a minimum size less than the last.
2912 if (prevOp->m_alternative->m_minimumSize > nextOp->m_alternative->m_minimumSize) {
2913 // FIXME: if we added an extra label to YarrOp, we could avoid needing to
2914 // subtract delta back out, and reduce this code. Should performance test
2915 // the benefit of this.
2916 unsigned delta = prevOp->m_alternative->m_minimumSize - nextOp->m_alternative->m_minimumSize;
2917 sub32(Imm32(delta), index);
2918 Jump fail = jumpIfNoAvailableInput();
2919 add32(Imm32(delta), index);
2920 jump(nextOp->m_reentry);
2921 fail.link(this);
2922 } else if (prevOp->m_alternative->m_minimumSize < nextOp->m_alternative->m_minimumSize)
2923 add32(Imm32(nextOp->m_alternative->m_minimumSize - prevOp->m_alternative->m_minimumSize), index);
2924 prevOp = nextOp;
2925 nextOp = &m_ops[nextOp->m_nextOp];
2926 }
2927
2928 // We fall through to here if there is insufficient input to run the last alternative.
2929
2930 // If there is insufficient input to run the last alternative, then for 'once through'
2931 // alternatives we are done - just jump back up into the forwards matching path at the End.
2932 if (onceThrough) {
2933 op.m_jumps.linkTo(endOp.m_reentry, this);
2934 jump(endOp.m_reentry);
2935 break;
2936 }
2937
2938 // For repeating alternatives, link any input check failure from the last alternative to
2939 // this point.
2940 op.m_jumps.link(this);
2941
2942 bool needsToUpdateMatchStart = !m_pattern.m_body->m_hasFixedSize;
2943
2944 // Check for cases where input position is already incremented by 1 for the last
2945 // alternative (this is particularly useful where the minimum size of the body
2946 // disjunction is 0, e.g. /a*|b/).
2947 if (needsToUpdateMatchStart && alternative->m_minimumSize == 1) {
2948 // index is already incremented by 1, so just store it now!
2949 setMatchStart(index);
2950 needsToUpdateMatchStart = false;
2951 }
2952
2953 if (!m_pattern.sticky()) {
2954 // Check whether there is sufficient input to loop. Increment the input position by
2955 // one, and check. Also add in the minimum disjunction size before checking - there
2956 // is no point in looping if we're just going to fail all the input checks around
2957 // the next iteration.
2958 ASSERT(alternative->m_minimumSize >= m_pattern.m_body->m_minimumSize);
2959 if (alternative->m_minimumSize == m_pattern.m_body->m_minimumSize) {
2960 // If the last alternative had the same minimum size as the disjunction,
2961 // just simply increment input pos by 1, no adjustment based on minimum size.
2962 add32(TrustedImm32(1), index);
2963 } else {
2964 // If the minumum for the last alternative was one greater than than that
2965 // for the disjunction, we're already progressed by 1, nothing to do!
2966 unsigned delta = (alternative->m_minimumSize - m_pattern.m_body->m_minimumSize) - 1;
2967 if (delta)
2968 sub32(Imm32(delta), index);
2969 }
2970 Jump matchFailed = jumpIfNoAvailableInput();
2971
2972 if (needsToUpdateMatchStart) {
2973 if (!m_pattern.m_body->m_minimumSize)
2974 setMatchStart(index);
2975 else {
2976 move(index, regT0);
2977 sub32(Imm32(m_pattern.m_body->m_minimumSize), regT0);
2978 setMatchStart(regT0);
2979 }
2980 }
2981
2982 // Calculate how much more input the first alternative requires than the minimum
2983 // for the body as a whole. If no more is needed then we dont need an additional
2984 // input check here - jump straight back up to the start of the first alternative.
2985 if (beginOp->m_alternative->m_minimumSize == m_pattern.m_body->m_minimumSize)
2986 jump(beginOp->m_reentry);
2987 else {
2988 if (beginOp->m_alternative->m_minimumSize > m_pattern.m_body->m_minimumSize)
2989 add32(Imm32(beginOp->m_alternative->m_minimumSize - m_pattern.m_body->m_minimumSize), index);
2990 else
2991 sub32(Imm32(m_pattern.m_body->m_minimumSize - beginOp->m_alternative->m_minimumSize), index);
2992 checkInput().linkTo(beginOp->m_reentry, this);
2993 jump(firstInputCheckFailed);
2994 }
2995
2996 // We jump to here if we iterate to the point that there is insufficient input to
2997 // run any matches, and need to return a failure state from JIT code.
2998 matchFailed.link(this);
2999 }
3000
3001 lastStickyAlternativeFailures.link(this);
3002 removeCallFrame();
3003 generateFailReturn();
3004 break;
3005 }
3006 case OpBodyAlternativeEnd: {
3007 // We should never backtrack back into a body disjunction.
3008 ASSERT(m_backtrackingState.isEmpty());
3009
3010 PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
3011 m_checkedOffset += priorAlternative->m_minimumSize;
3012 break;
3013 }
3014
3015 // OpSimpleNestedAlternativeBegin/Next/End
3016 // OpNestedAlternativeBegin/Next/End
3017 //
3018 // Generate code for when we backtrack back out of an alternative into
3019 // a Begin or Next node, or when the entry input count check fails. If
3020 // there are more alternatives we need to jump to the next alternative,
3021 // if not we backtrack back out of the current set of parentheses.
3022 //
3023 // In the case of non-simple nested assertions we need to also link the
3024 // 'return address' appropriately to backtrack back out into the correct
3025 // alternative.
3026 case OpSimpleNestedAlternativeBegin:
3027 case OpSimpleNestedAlternativeNext:
3028 case OpNestedAlternativeBegin:
3029 case OpNestedAlternativeNext: {
3030 YarrOp& nextOp = m_ops[op.m_nextOp];
3031 bool isBegin = op.m_previousOp == notFound;
3032 bool isLastAlternative = nextOp.m_nextOp == notFound;
3033 ASSERT(isBegin == (op.m_op == OpSimpleNestedAlternativeBegin || op.m_op == OpNestedAlternativeBegin));
3034 ASSERT(isLastAlternative == (nextOp.m_op == OpSimpleNestedAlternativeEnd || nextOp.m_op == OpNestedAlternativeEnd));
3035
3036 // Treat an input check failure the same as a failed match.
3037 m_backtrackingState.append(op.m_jumps);
3038
3039 // Set the backtracks to jump to the appropriate place. We may need
3040 // to link the backtracks in one of three different way depending on
3041 // the type of alternative we are dealing with:
3042 // - A single alternative, with no simplings.
3043 // - The last alternative of a set of two or more.
3044 // - An alternative other than the last of a set of two or more.
3045 //
3046 // In the case of a single alternative on its own, we don't need to
3047 // jump anywhere - if the alternative fails to match we can just
3048 // continue to backtrack out of the parentheses without jumping.
3049 //
3050 // In the case of the last alternative in a set of more than one, we
3051 // need to jump to return back out to the beginning. We'll do so by
3052 // adding a jump to the End node's m_jumps list, and linking this
3053 // when we come to generate the Begin node. For alternatives other
3054 // than the last, we need to jump to the next alternative.
3055 //
3056 // If the alternative had adjusted the input position we must link
3057 // backtracking to here, correct, and then jump on. If not we can
3058 // link the backtracks directly to their destination.
3059 if (op.m_checkAdjust) {
3060 // Handle the cases where we need to link the backtracks here.
3061 m_backtrackingState.link(this);
3062 sub32(Imm32(op.m_checkAdjust.unsafeGet()), index);
3063 if (!isLastAlternative) {
3064 // An alternative that is not the last should jump to its successor.
3065 jump(nextOp.m_reentry);
3066 } else if (!isBegin) {
3067 // The last of more than one alternatives must jump back to the beginning.
3068 nextOp.m_jumps.append(jump());
3069 } else {
3070 // A single alternative on its own can fall through.
3071 m_backtrackingState.fallthrough();
3072 }
3073 } else {
3074 // Handle the cases where we can link the backtracks directly to their destinations.
3075 if (!isLastAlternative) {
3076 // An alternative that is not the last should jump to its successor.
3077 m_backtrackingState.linkTo(nextOp.m_reentry, this);
3078 } else if (!isBegin) {
3079 // The last of more than one alternatives must jump back to the beginning.
3080 m_backtrackingState.takeBacktracksToJumpList(nextOp.m_jumps, this);
3081 }
3082 // In the case of a single alternative on its own do nothing - it can fall through.
3083 }
3084
3085 // If there is a backtrack jump from a zero length match link it here.
3086 if (op.m_zeroLengthMatch.isSet())
3087 m_backtrackingState.append(op.m_zeroLengthMatch);
3088
3089 // At this point we've handled the backtracking back into this node.
3090 // Now link any backtracks that need to jump to here.
3091
3092 // For non-simple alternatives, link the alternative's 'return address'
3093 // so that we backtrack back out into the previous alternative.
3094 if (op.m_op == OpNestedAlternativeNext)
3095 m_backtrackingState.append(op.m_returnAddress);
3096
3097 // If there is more than one alternative, then the last alternative will
3098 // have planted a jump to be linked to the end. This jump was added to the
3099 // End node's m_jumps list. If we are back at the beginning, link it here.
3100 if (isBegin) {
3101 YarrOp* endOp = &m_ops[op.m_nextOp];
3102 while (endOp->m_nextOp != notFound) {
3103 ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext || endOp->m_op == OpNestedAlternativeNext);
3104 endOp = &m_ops[endOp->m_nextOp];
3105 }
3106 ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd || endOp->m_op == OpNestedAlternativeEnd);
3107 m_backtrackingState.append(endOp->m_jumps);
3108 }
3109
3110 if (!isBegin) {
3111 YarrOp& lastOp = m_ops[op.m_previousOp];
3112 m_checkedOffset += lastOp.m_checkAdjust;
3113 }
3114 m_checkedOffset -= op.m_checkAdjust;
3115 break;
3116 }
3117 case OpSimpleNestedAlternativeEnd:
3118 case OpNestedAlternativeEnd: {
3119 PatternTerm* term = op.m_term;
3120
3121 // If there is a backtrack jump from a zero length match link it here.
3122 if (op.m_zeroLengthMatch.isSet())
3123 m_backtrackingState.append(op.m_zeroLengthMatch);
3124
3125 // If we backtrack into the end of a simple subpattern do nothing;
3126 // just continue through into the last alternative. If we backtrack
3127 // into the end of a non-simple set of alterntives we need to jump
3128 // to the backtracking return address set up during generation.
3129 if (op.m_op == OpNestedAlternativeEnd) {
3130 m_backtrackingState.link(this);
3131
3132 // Plant a jump to the return address.
3133 unsigned parenthesesFrameLocation = term->frameLocation;
3134 loadFromFrameAndJump(parenthesesFrameLocation + BackTrackInfoParentheses::returnAddressIndex());
3135
3136 // Link the DataLabelPtr associated with the end of the last
3137 // alternative to this point.
3138 m_backtrackingState.append(op.m_returnAddress);
3139 }
3140
3141 YarrOp& lastOp = m_ops[op.m_previousOp];
3142 m_checkedOffset += lastOp.m_checkAdjust;
3143 break;
3144 }
3145
3146 // OpParenthesesSubpatternOnceBegin/End
3147 //
3148 // When we are backtracking back out of a capturing subpattern we need
3149 // to clear the start index in the matches output array, to record that
3150 // this subpattern has not been captured.
3151 //
3152 // When backtracking back out of a Greedy quantified subpattern we need
3153 // to catch this, and try running the remainder of the alternative after
3154 // the subpattern again, skipping the parentheses.
3155 //
3156 // Upon backtracking back into a quantified set of parentheses we need to
3157 // check whether we were currently skipping the subpattern. If not, we
3158 // can backtrack into them, if we were we need to either backtrack back
3159 // out of the start of the parentheses, or jump back to the forwards
3160 // matching start, depending of whether the match is Greedy or NonGreedy.
3161 case OpParenthesesSubpatternOnceBegin: {
3162 PatternTerm* term = op.m_term;
3163 ASSERT(term->quantityMaxCount == 1);
3164
3165 // We only need to backtrack to this point if capturing or greedy.
3166 if ((term->capture() && compileMode == IncludeSubpatterns) || term->quantityType == QuantifierGreedy) {
3167 m_backtrackingState.link(this);
3168
3169 // If capturing, clear the capture (we only need to reset start).
3170 if (term->capture() && compileMode == IncludeSubpatterns)
3171 clearSubpatternStart(term->parentheses.subpatternId);
3172
3173 // If Greedy, jump to the end.
3174 if (term->quantityType == QuantifierGreedy) {
3175 // Clear the flag in the stackframe indicating we ran through the subpattern.
3176 unsigned parenthesesFrameLocation = term->frameLocation;
3177 storeToFrame(TrustedImm32(-1), parenthesesFrameLocation + BackTrackInfoParenthesesOnce::beginIndex());
3178 // Jump to after the parentheses, skipping the subpattern.
3179 jump(m_ops[op.m_nextOp].m_reentry);
3180 // A backtrack from after the parentheses, when skipping the subpattern,
3181 // will jump back to here.
3182 op.m_jumps.link(this);
3183 }
3184
3185 m_backtrackingState.fallthrough();
3186 }
3187 break;
3188 }
3189 case OpParenthesesSubpatternOnceEnd: {
3190 PatternTerm* term = op.m_term;
3191
3192 if (term->quantityType != QuantifierFixedCount) {
3193 m_backtrackingState.link(this);
3194
3195 // Check whether we should backtrack back into the parentheses, or if we
3196 // are currently in a state where we had skipped over the subpattern
3197 // (in which case the flag value on the stack will be -1).
3198 unsigned parenthesesFrameLocation = term->frameLocation;
3199 Jump hadSkipped = branch32(Equal, Address(stackPointerRegister, (parenthesesFrameLocation + BackTrackInfoParenthesesOnce::beginIndex()) * sizeof(void*)), TrustedImm32(-1));
3200
3201 if (term->quantityType == QuantifierGreedy) {
3202 // For Greedy parentheses, we skip after having already tried going
3203 // through the subpattern, so if we get here we're done.
3204 YarrOp& beginOp = m_ops[op.m_previousOp];
3205 beginOp.m_jumps.append(hadSkipped);
3206 } else {
3207 // For NonGreedy parentheses, we try skipping the subpattern first,
3208 // so if we get here we need to try running through the subpattern
3209 // next. Jump back to the start of the parentheses in the forwards
3210 // matching path.
3211 ASSERT(term->quantityType == QuantifierNonGreedy);
3212 YarrOp& beginOp = m_ops[op.m_previousOp];
3213 hadSkipped.linkTo(beginOp.m_reentry, this);
3214 }
3215
3216 m_backtrackingState.fallthrough();
3217 }
3218
3219 m_backtrackingState.append(op.m_jumps);
3220 break;
3221 }
3222
3223 // OpParenthesesSubpatternTerminalBegin/End
3224 //
3225 // Terminal subpatterns will always match - there is nothing after them to
3226 // force a backtrack, and they have a minimum count of 0, and as such will
3227 // always produce an acceptable result.
3228 case OpParenthesesSubpatternTerminalBegin: {
3229 // We will backtrack to this point once the subpattern cannot match any
3230 // more. Since no match is accepted as a successful match (we are Greedy
3231 // quantified with a minimum of zero) jump back to the forwards matching
3232 // path at the end.
3233 YarrOp& endOp = m_ops[op.m_nextOp];
3234 m_backtrackingState.linkTo(endOp.m_reentry, this);
3235 break;
3236 }
3237 case OpParenthesesSubpatternTerminalEnd:
3238 // We should never be backtracking to here (hence the 'terminal' in the name).
3239 ASSERT(m_backtrackingState.isEmpty());
3240 m_backtrackingState.append(op.m_jumps);
3241 break;
3242
3243 // OpParenthesesSubpatternBegin/End
3244 //
3245 // When we are backtracking back out of a capturing subpattern we need
3246 // to clear the start index in the matches output array, to record that
3247 // this subpattern has not been captured.
3248 //
3249 // When backtracking back out of a Greedy quantified subpattern we need
3250 // to catch this, and try running the remainder of the alternative after
3251 // the subpattern again, skipping the parentheses.
3252 //
3253 // Upon backtracking back into a quantified set of parentheses we need to
3254 // check whether we were currently skipping the subpattern. If not, we
3255 // can backtrack into them, if we were we need to either backtrack back
3256 // out of the start of the parentheses, or jump back to the forwards
3257 // matching start, depending of whether the match is Greedy or NonGreedy.
3258 case OpParenthesesSubpatternBegin: {
3259#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
3260 PatternTerm* term = op.m_term;
3261 unsigned parenthesesFrameLocation = term->frameLocation;
3262
3263 if (term->quantityType != QuantifierFixedCount) {
3264 m_backtrackingState.link(this);
3265
3266 RegisterID currParenContextReg = regT0;
3267 RegisterID newParenContextReg = regT1;
3268
3269 loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex(), currParenContextReg);
3270
3271 restoreParenContext(currParenContextReg, regT2, term->parentheses.subpatternId, term->parentheses.lastSubpatternId, parenthesesFrameLocation);
3272
3273 freeParenContext(currParenContextReg, newParenContextReg);
3274 storeToFrame(newParenContextReg, parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex());
3275
3276 const RegisterID countTemporary = regT0;
3277 loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex(), countTemporary);
3278 Jump zeroLengthMatch = branchTest32(Zero, countTemporary);
3279
3280 sub32(TrustedImm32(1), countTemporary);
3281 storeToFrame(countTemporary, parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex());
3282
3283 jump(m_ops[op.m_nextOp].m_reentry);
3284
3285 zeroLengthMatch.link(this);
3286
3287 // Clear the flag in the stackframe indicating we didn't run through the subpattern.
3288 storeToFrame(TrustedImm32(-1), parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex());
3289
3290 if (term->quantityType == QuantifierGreedy)
3291 jump(m_ops[op.m_nextOp].m_reentry);
3292
3293 // If Greedy, jump to the end.
3294 if (term->quantityType == QuantifierGreedy) {
3295 // A backtrack from after the parentheses, when skipping the subpattern,
3296 // will jump back to here.
3297 op.m_jumps.link(this);
3298 }
3299
3300 m_backtrackingState.fallthrough();
3301 }
3302#else // !YARR_JIT_ALL_PARENS_EXPRESSIONS
3303 RELEASE_ASSERT_NOT_REACHED();
3304#endif
3305 break;
3306 }
3307 case OpParenthesesSubpatternEnd: {
3308#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
3309 PatternTerm* term = op.m_term;
3310
3311 if (term->quantityType != QuantifierFixedCount) {
3312 m_backtrackingState.link(this);
3313
3314 unsigned parenthesesFrameLocation = term->frameLocation;
3315
3316 if (term->quantityType == QuantifierGreedy) {
3317 // Check whether we should backtrack back into the parentheses, or if we
3318 // are currently in a state where we had skipped over the subpattern
3319 // (in which case the flag value on the stack will be -1).
3320 Jump hadSkipped = branch32(Equal, Address(stackPointerRegister, (parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex()) * sizeof(void*)), TrustedImm32(-1));
3321
3322 // For Greedy parentheses, we skip after having already tried going
3323 // through the subpattern, so if we get here we're done.
3324 YarrOp& beginOp = m_ops[op.m_previousOp];
3325 beginOp.m_jumps.append(hadSkipped);
3326 } else {
3327 // For NonGreedy parentheses, we try skipping the subpattern first,
3328 // so if we get here we need to try running through the subpattern
3329 // next. Jump back to the start of the parentheses in the forwards
3330 // matching path.
3331 ASSERT(term->quantityType == QuantifierNonGreedy);
3332
3333 const RegisterID beginTemporary = regT0;
3334 const RegisterID countTemporary = regT1;
3335
3336 YarrOp& beginOp = m_ops[op.m_previousOp];
3337
3338 loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex(), beginTemporary);
3339 branch32(Equal, beginTemporary, TrustedImm32(-1)).linkTo(beginOp.m_reentry, this);
3340
3341 JumpList exceededMatchLimit;
3342
3343 if (term->quantityMaxCount != quantifyInfinite) {
3344 loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex(), countTemporary);
3345 exceededMatchLimit.append(branch32(AboveOrEqual, countTemporary, Imm32(term->quantityMaxCount.unsafeGet())));
3346 }
3347
3348 branch32(Above, index, beginTemporary).linkTo(beginOp.m_reentry, this);
3349
3350 exceededMatchLimit.link(this);
3351 }
3352
3353 m_backtrackingState.fallthrough();
3354 }
3355
3356 m_backtrackingState.append(op.m_jumps);
3357#else // !YARR_JIT_ALL_PARENS_EXPRESSIONS
3358 RELEASE_ASSERT_NOT_REACHED();
3359#endif
3360 break;
3361 }
3362
3363 // OpParentheticalAssertionBegin/End
3364 case OpParentheticalAssertionBegin: {
3365 PatternTerm* term = op.m_term;
3366 YarrOp& endOp = m_ops[op.m_nextOp];
3367
3368 // We need to handle the backtracks upon backtracking back out
3369 // of a parenthetical assertion if either we need to correct
3370 // the input index, or the assertion was inverted.
3371 if (op.m_checkAdjust || term->invert()) {
3372 m_backtrackingState.link(this);
3373
3374 if (op.m_checkAdjust)
3375 add32(Imm32(op.m_checkAdjust.unsafeGet()), index);
3376
3377 // In an inverted assertion failure to match the subpattern
3378 // is treated as a successful match - jump to the end of the
3379 // subpattern. We already have adjusted the input position
3380 // back to that before the assertion, which is correct.
3381 if (term->invert())
3382 jump(endOp.m_reentry);
3383
3384 m_backtrackingState.fallthrough();
3385 }
3386
3387 // The End node's jump list will contain any backtracks into
3388 // the end of the assertion. Also, if inverted, we will have
3389 // added the failure caused by a successful match to this.
3390 m_backtrackingState.append(endOp.m_jumps);
3391
3392 m_checkedOffset += op.m_checkAdjust;
3393 break;
3394 }
3395 case OpParentheticalAssertionEnd: {
3396 // FIXME: We should really be clearing any nested subpattern
3397 // matches on bailing out from after the pattern. Firefox has
3398 // this bug too (presumably because they use YARR!)
3399
3400 // Never backtrack into an assertion; later failures bail to before the begin.
3401 m_backtrackingState.takeBacktracksToJumpList(op.m_jumps, this);
3402
3403 YarrOp& lastOp = m_ops[op.m_previousOp];
3404 m_checkedOffset -= lastOp.m_checkAdjust;
3405 break;
3406 }
3407
3408 case OpMatchFailed:
3409 break;
3410 }
3411
3412 } while (opIndex);
3413 }
3414
3415 // Compilation methods:
3416 // ====================
3417
3418 // opCompileParenthesesSubpattern
3419 // Emits ops for a subpattern (set of parentheses). These consist
3420 // of a set of alternatives wrapped in an outer set of nodes for
3421 // the parentheses.
3422 // Supported types of parentheses are 'Once' (quantityMaxCount == 1),
3423 // 'Terminal' (non-capturing parentheses quantified as greedy
3424 // and infinite), and 0 based greedy / non-greedy quantified parentheses.
3425 // Alternatives will use the 'Simple' set of ops if either the
3426 // subpattern is terminal (in which case we will never need to
3427 // backtrack), or if the subpattern only contains one alternative.
3428 void opCompileParenthesesSubpattern(PatternTerm* term)
3429 {
3430 YarrOpCode parenthesesBeginOpCode;
3431 YarrOpCode parenthesesEndOpCode;
3432 YarrOpCode alternativeBeginOpCode = OpSimpleNestedAlternativeBegin;
3433 YarrOpCode alternativeNextOpCode = OpSimpleNestedAlternativeNext;
3434 YarrOpCode alternativeEndOpCode = OpSimpleNestedAlternativeEnd;
3435
3436 if (UNLIKELY(!m_vm->isSafeToRecurse())) {
3437 m_failureReason = JITFailureReason::ParenthesisNestedTooDeep;
3438 return;
3439 }
3440
3441 // We can currently only compile quantity 1 subpatterns that are
3442 // not copies. We generate a copy in the case of a range quantifier,
3443 // e.g. /(?:x){3,9}/, or /(?:x)+/ (These are effectively expanded to
3444 // /(?:x){3,3}(?:x){0,6}/ and /(?:x)(?:x)*/ repectively). The problem
3445 // comes where the subpattern is capturing, in which case we would
3446 // need to restore the capture from the first subpattern upon a
3447 // failure in the second.
3448 if (term->quantityMinCount && term->quantityMinCount != term->quantityMaxCount) {
3449 m_failureReason = JITFailureReason::VariableCountedParenthesisWithNonZeroMinimum;
3450 return;
3451 }
3452
3453 if (term->quantityMaxCount == 1 && !term->parentheses.isCopy) {
3454 // Select the 'Once' nodes.
3455 parenthesesBeginOpCode = OpParenthesesSubpatternOnceBegin;
3456 parenthesesEndOpCode = OpParenthesesSubpatternOnceEnd;
3457
3458 // If there is more than one alternative we cannot use the 'simple' nodes.
3459 if (term->parentheses.disjunction->m_alternatives.size() != 1) {
3460 alternativeBeginOpCode = OpNestedAlternativeBegin;
3461 alternativeNextOpCode = OpNestedAlternativeNext;
3462 alternativeEndOpCode = OpNestedAlternativeEnd;
3463 }
3464 } else if (term->parentheses.isTerminal) {
3465 // Select the 'Terminal' nodes.
3466 parenthesesBeginOpCode = OpParenthesesSubpatternTerminalBegin;
3467 parenthesesEndOpCode = OpParenthesesSubpatternTerminalEnd;
3468 } else {
3469#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
3470 // We only handle generic parenthesis with non-fixed counts.
3471 if (term->quantityType == QuantifierFixedCount) {
3472 // This subpattern is not supported by the JIT.
3473 m_failureReason = JITFailureReason::FixedCountParenthesizedSubpattern;
3474 return;
3475 }
3476
3477 m_containsNestedSubpatterns = true;
3478
3479 // Select the 'Generic' nodes.
3480 parenthesesBeginOpCode = OpParenthesesSubpatternBegin;
3481 parenthesesEndOpCode = OpParenthesesSubpatternEnd;
3482
3483 // If there is more than one alternative we cannot use the 'simple' nodes.
3484 if (term->parentheses.disjunction->m_alternatives.size() != 1) {
3485 alternativeBeginOpCode = OpNestedAlternativeBegin;
3486 alternativeNextOpCode = OpNestedAlternativeNext;
3487 alternativeEndOpCode = OpNestedAlternativeEnd;
3488 }
3489#else
3490 // This subpattern is not supported by the JIT.
3491 m_failureReason = JITFailureReason::ParenthesizedSubpattern;
3492 return;
3493#endif
3494 }
3495
3496 size_t parenBegin = m_ops.size();
3497 m_ops.append(parenthesesBeginOpCode);
3498
3499 m_ops.append(alternativeBeginOpCode);
3500 m_ops.last().m_previousOp = notFound;
3501 m_ops.last().m_term = term;
3502 Vector<std::unique_ptr<PatternAlternative>>& alternatives = term->parentheses.disjunction->m_alternatives;
3503 for (unsigned i = 0; i < alternatives.size(); ++i) {
3504 size_t lastOpIndex = m_ops.size() - 1;
3505
3506 PatternAlternative* nestedAlternative = alternatives[i].get();
3507 opCompileAlternative(nestedAlternative);
3508
3509 size_t thisOpIndex = m_ops.size();
3510 m_ops.append(YarrOp(alternativeNextOpCode));
3511
3512 YarrOp& lastOp = m_ops[lastOpIndex];
3513 YarrOp& thisOp = m_ops[thisOpIndex];
3514
3515 lastOp.m_alternative = nestedAlternative;
3516 lastOp.m_nextOp = thisOpIndex;
3517 thisOp.m_previousOp = lastOpIndex;
3518 thisOp.m_term = term;
3519 }
3520 YarrOp& lastOp = m_ops.last();
3521 ASSERT(lastOp.m_op == alternativeNextOpCode);
3522 lastOp.m_op = alternativeEndOpCode;
3523 lastOp.m_alternative = 0;
3524 lastOp.m_nextOp = notFound;
3525
3526 size_t parenEnd = m_ops.size();
3527 m_ops.append(parenthesesEndOpCode);
3528
3529 m_ops[parenBegin].m_term = term;
3530 m_ops[parenBegin].m_previousOp = notFound;
3531 m_ops[parenBegin].m_nextOp = parenEnd;
3532 m_ops[parenEnd].m_term = term;
3533 m_ops[parenEnd].m_previousOp = parenBegin;
3534 m_ops[parenEnd].m_nextOp = notFound;
3535 }
3536
3537 // opCompileParentheticalAssertion
3538 // Emits ops for a parenthetical assertion. These consist of an
3539 // OpSimpleNestedAlternativeBegin/Next/End set of nodes wrapping
3540 // the alternatives, with these wrapped by an outer pair of
3541 // OpParentheticalAssertionBegin/End nodes.
3542 // We can always use the OpSimpleNestedAlternative nodes in the
3543 // case of parenthetical assertions since these only ever match
3544 // once, and will never backtrack back into the assertion.
3545 void opCompileParentheticalAssertion(PatternTerm* term)
3546 {
3547 if (UNLIKELY(!m_vm->isSafeToRecurse())) {
3548 m_failureReason = JITFailureReason::ParenthesisNestedTooDeep;
3549 return;
3550 }
3551
3552 size_t parenBegin = m_ops.size();
3553 m_ops.append(OpParentheticalAssertionBegin);
3554
3555 m_ops.append(OpSimpleNestedAlternativeBegin);
3556 m_ops.last().m_previousOp = notFound;
3557 m_ops.last().m_term = term;
3558 Vector<std::unique_ptr<PatternAlternative>>& alternatives = term->parentheses.disjunction->m_alternatives;
3559 for (unsigned i = 0; i < alternatives.size(); ++i) {
3560 size_t lastOpIndex = m_ops.size() - 1;
3561
3562 PatternAlternative* nestedAlternative = alternatives[i].get();
3563 opCompileAlternative(nestedAlternative);
3564
3565 size_t thisOpIndex = m_ops.size();
3566 m_ops.append(YarrOp(OpSimpleNestedAlternativeNext));
3567
3568 YarrOp& lastOp = m_ops[lastOpIndex];
3569 YarrOp& thisOp = m_ops[thisOpIndex];
3570
3571 lastOp.m_alternative = nestedAlternative;
3572 lastOp.m_nextOp = thisOpIndex;
3573 thisOp.m_previousOp = lastOpIndex;
3574 thisOp.m_term = term;
3575 }
3576 YarrOp& lastOp = m_ops.last();
3577 ASSERT(lastOp.m_op == OpSimpleNestedAlternativeNext);
3578 lastOp.m_op = OpSimpleNestedAlternativeEnd;
3579 lastOp.m_alternative = 0;
3580 lastOp.m_nextOp = notFound;
3581
3582 size_t parenEnd = m_ops.size();
3583 m_ops.append(OpParentheticalAssertionEnd);
3584
3585 m_ops[parenBegin].m_term = term;
3586 m_ops[parenBegin].m_previousOp = notFound;
3587 m_ops[parenBegin].m_nextOp = parenEnd;
3588 m_ops[parenEnd].m_term = term;
3589 m_ops[parenEnd].m_previousOp = parenBegin;
3590 m_ops[parenEnd].m_nextOp = notFound;
3591 }
3592
3593 // opCompileAlternative
3594 // Called to emit nodes for all terms in an alternative.
3595 void opCompileAlternative(PatternAlternative* alternative)
3596 {
3597 optimizeAlternative(alternative);
3598
3599 for (unsigned i = 0; i < alternative->m_terms.size(); ++i) {
3600 PatternTerm* term = &alternative->m_terms[i];
3601
3602 switch (term->type) {
3603 case PatternTerm::TypeParenthesesSubpattern:
3604 opCompileParenthesesSubpattern(term);
3605 break;
3606
3607 case PatternTerm::TypeParentheticalAssertion:
3608 opCompileParentheticalAssertion(term);
3609 break;
3610
3611 default:
3612 m_ops.append(term);
3613 }
3614 }
3615 }
3616
3617 // opCompileBody
3618 // This method compiles the body disjunction of the regular expression.
3619 // The body consists of two sets of alternatives - zero or more 'once
3620 // through' (BOL anchored) alternatives, followed by zero or more
3621 // repeated alternatives.
3622 // For each of these two sets of alteratives, if not empty they will be
3623 // wrapped in a set of OpBodyAlternativeBegin/Next/End nodes (with the
3624 // 'begin' node referencing the first alternative, and 'next' nodes
3625 // referencing any further alternatives. The begin/next/end nodes are
3626 // linked together in a doubly linked list. In the case of repeating
3627 // alternatives, the end node is also linked back to the beginning.
3628 // If no repeating alternatives exist, then a OpMatchFailed node exists
3629 // to return the failing result.
3630 void opCompileBody(PatternDisjunction* disjunction)
3631 {
3632 if (UNLIKELY(!m_vm->isSafeToRecurse())) {
3633 m_failureReason = JITFailureReason::ParenthesisNestedTooDeep;
3634 return;
3635 }
3636
3637 Vector<std::unique_ptr<PatternAlternative>>& alternatives = disjunction->m_alternatives;
3638 size_t currentAlternativeIndex = 0;
3639
3640 // Emit the 'once through' alternatives.
3641 if (alternatives.size() && alternatives[0]->onceThrough()) {
3642 m_ops.append(YarrOp(OpBodyAlternativeBegin));
3643 m_ops.last().m_previousOp = notFound;
3644
3645 do {
3646 size_t lastOpIndex = m_ops.size() - 1;
3647 PatternAlternative* alternative = alternatives[currentAlternativeIndex].get();
3648 opCompileAlternative(alternative);
3649
3650 size_t thisOpIndex = m_ops.size();
3651 m_ops.append(YarrOp(OpBodyAlternativeNext));
3652
3653 YarrOp& lastOp = m_ops[lastOpIndex];
3654 YarrOp& thisOp = m_ops[thisOpIndex];
3655
3656 lastOp.m_alternative = alternative;
3657 lastOp.m_nextOp = thisOpIndex;
3658 thisOp.m_previousOp = lastOpIndex;
3659
3660 ++currentAlternativeIndex;
3661 } while (currentAlternativeIndex < alternatives.size() && alternatives[currentAlternativeIndex]->onceThrough());
3662
3663 YarrOp& lastOp = m_ops.last();
3664
3665 ASSERT(lastOp.m_op == OpBodyAlternativeNext);
3666 lastOp.m_op = OpBodyAlternativeEnd;
3667 lastOp.m_alternative = 0;
3668 lastOp.m_nextOp = notFound;
3669 }
3670
3671 if (currentAlternativeIndex == alternatives.size()) {
3672 m_ops.append(YarrOp(OpMatchFailed));
3673 return;
3674 }
3675
3676 // Emit the repeated alternatives.
3677 size_t repeatLoop = m_ops.size();
3678 m_ops.append(YarrOp(OpBodyAlternativeBegin));
3679 m_ops.last().m_previousOp = notFound;
3680 do {
3681 size_t lastOpIndex = m_ops.size() - 1;
3682 PatternAlternative* alternative = alternatives[currentAlternativeIndex].get();
3683 ASSERT(!alternative->onceThrough());
3684 opCompileAlternative(alternative);
3685
3686 size_t thisOpIndex = m_ops.size();
3687 m_ops.append(YarrOp(OpBodyAlternativeNext));
3688
3689 YarrOp& lastOp = m_ops[lastOpIndex];
3690 YarrOp& thisOp = m_ops[thisOpIndex];
3691
3692 lastOp.m_alternative = alternative;
3693 lastOp.m_nextOp = thisOpIndex;
3694 thisOp.m_previousOp = lastOpIndex;
3695
3696 ++currentAlternativeIndex;
3697 } while (currentAlternativeIndex < alternatives.size());
3698 YarrOp& lastOp = m_ops.last();
3699 ASSERT(lastOp.m_op == OpBodyAlternativeNext);
3700 lastOp.m_op = OpBodyAlternativeEnd;
3701 lastOp.m_alternative = 0;
3702 lastOp.m_nextOp = repeatLoop;
3703 }
3704
3705 void generateTryReadUnicodeCharacterHelper()
3706 {
3707#ifdef JIT_UNICODE_EXPRESSIONS
3708 if (m_tryReadUnicodeCharacterCalls.isEmpty())
3709 return;
3710
3711 ASSERT(m_decodeSurrogatePairs);
3712
3713 m_tryReadUnicodeCharacterEntry = label();
3714
3715 tagReturnAddress();
3716
3717 tryReadUnicodeCharImpl(regT0);
3718
3719 ret();
3720#endif
3721 }
3722
3723 void generateEnter()
3724 {
3725#if CPU(X86_64)
3726 push(X86Registers::ebp);
3727 move(stackPointerRegister, X86Registers::ebp);
3728
3729 if (m_pattern.m_saveInitialStartValue)
3730 push(X86Registers::ebx);
3731
3732#if OS(WINDOWS)
3733 push(X86Registers::edi);
3734#endif
3735#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
3736 if (m_containsNestedSubpatterns) {
3737#if OS(WINDOWS)
3738 push(X86Registers::esi);
3739#endif
3740 push(X86Registers::r12);
3741 }
3742#endif
3743
3744 if (m_decodeSurrogatePairs) {
3745 push(X86Registers::r13);
3746 push(X86Registers::r14);
3747 push(X86Registers::r15);
3748
3749 move(TrustedImm32(0xd800), leadingSurrogateTag);
3750 }
3751 // The ABI doesn't guarantee the upper bits are zero on unsigned arguments, so clear them ourselves.
3752 zeroExtend32ToPtr(index, index);
3753 zeroExtend32ToPtr(length, length);
3754#if OS(WINDOWS)
3755 if (compileMode == IncludeSubpatterns)
3756 loadPtr(Address(X86Registers::ebp, 6 * sizeof(void*)), output);
3757 // rcx is the pointer to the allocated space for result in x64 Windows.
3758 push(X86Registers::ecx);
3759#endif
3760#elif CPU(X86)
3761 push(X86Registers::ebp);
3762 move(stackPointerRegister, X86Registers::ebp);
3763 // TODO: do we need spill registers to fill the output pointer if there are no sub captures?
3764 push(X86Registers::ebx);
3765 push(X86Registers::edi);
3766 push(X86Registers::esi);
3767 // load output into edi (2 = saved ebp + return address).
3768 #if COMPILER(MSVC)
3769 loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), input);
3770 loadPtr(Address(X86Registers::ebp, 3 * sizeof(void*)), index);
3771 loadPtr(Address(X86Registers::ebp, 4 * sizeof(void*)), length);
3772 if (compileMode == IncludeSubpatterns)
3773 loadPtr(Address(X86Registers::ebp, 5 * sizeof(void*)), output);
3774 #else
3775 if (compileMode == IncludeSubpatterns)
3776 loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), output);
3777 #endif
3778#elif CPU(ARM64)
3779 tagReturnAddress();
3780 if (m_decodeSurrogatePairs) {
3781 pushPair(framePointerRegister, linkRegister);
3782 move(TrustedImm32(0x10000), supplementaryPlanesBase);
3783 move(TrustedImm32(0xd800), leadingSurrogateTag);
3784 move(TrustedImm32(0xdc00), trailingSurrogateTag);
3785 }
3786
3787 // The ABI doesn't guarantee the upper bits are zero on unsigned arguments, so clear them ourselves.
3788 zeroExtend32ToPtr(index, index);
3789 zeroExtend32ToPtr(length, length);
3790#elif CPU(ARM_THUMB2)
3791 push(ARMRegisters::r4);
3792 push(ARMRegisters::r5);
3793 push(ARMRegisters::r6);
3794 push(ARMRegisters::r8);
3795#elif CPU(MIPS)
3796 // Do nothing.
3797#endif
3798
3799 store8(TrustedImm32(1), &m_vm->isExecutingInRegExpJIT);
3800 }
3801
3802 void generateReturn()
3803 {
3804 store8(TrustedImm32(0), &m_vm->isExecutingInRegExpJIT);
3805
3806#if CPU(X86_64)
3807#if OS(WINDOWS)
3808 // Store the return value in the allocated space pointed by rcx.
3809 pop(X86Registers::ecx);
3810 store64(returnRegister, Address(X86Registers::ecx));
3811 store64(returnRegister2, Address(X86Registers::ecx, sizeof(void*)));
3812 move(X86Registers::ecx, returnRegister);
3813#endif
3814 if (m_decodeSurrogatePairs) {
3815 pop(X86Registers::r15);
3816 pop(X86Registers::r14);
3817 pop(X86Registers::r13);
3818 }
3819
3820#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
3821 if (m_containsNestedSubpatterns) {
3822 pop(X86Registers::r12);
3823#if OS(WINDOWS)
3824 pop(X86Registers::esi);
3825#endif
3826 }
3827#endif
3828#if OS(WINDOWS)
3829 pop(X86Registers::edi);
3830#endif
3831
3832 if (m_pattern.m_saveInitialStartValue)
3833 pop(X86Registers::ebx);
3834 pop(X86Registers::ebp);
3835#elif CPU(X86)
3836 pop(X86Registers::esi);
3837 pop(X86Registers::edi);
3838 pop(X86Registers::ebx);
3839 pop(X86Registers::ebp);
3840#elif CPU(ARM64)
3841 if (m_decodeSurrogatePairs)
3842 popPair(framePointerRegister, linkRegister);
3843#elif CPU(ARM_THUMB2)
3844 pop(ARMRegisters::r8);
3845 pop(ARMRegisters::r6);
3846 pop(ARMRegisters::r5);
3847 pop(ARMRegisters::r4);
3848#elif CPU(MIPS)
3849 // Do nothing
3850#endif
3851 ret();
3852 }
3853
3854public:
3855 YarrGenerator(VM* vm, YarrPattern& pattern, String& patternString, YarrCodeBlock& codeBlock, YarrCharSize charSize)
3856 : m_vm(vm)
3857 , m_pattern(pattern)
3858 , m_patternString(patternString)
3859 , m_codeBlock(codeBlock)
3860 , m_charSize(charSize)
3861 , m_decodeSurrogatePairs(m_charSize == Char16 && m_pattern.unicode())
3862 , m_unicodeIgnoreCase(m_pattern.unicode() && m_pattern.ignoreCase())
3863 , m_fixedSizedAlternative(false)
3864 , m_canonicalMode(m_pattern.unicode() ? CanonicalMode::Unicode : CanonicalMode::UCS2)
3865#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
3866 , m_containsNestedSubpatterns(false)
3867 , m_parenContextSizes(compileMode == IncludeSubpatterns ? m_pattern.m_numSubpatterns : 0, m_pattern.m_body->m_callFrameSize)
3868#endif
3869 {
3870 }
3871
3872 void compile()
3873 {
3874 YarrCodeBlock& codeBlock = m_codeBlock;
3875
3876#ifndef JIT_UNICODE_EXPRESSIONS
3877 if (m_decodeSurrogatePairs) {
3878 codeBlock.setFallBackWithFailureReason(JITFailureReason::DecodeSurrogatePair);
3879 return;
3880 }
3881#endif
3882
3883 if (m_pattern.m_containsBackreferences
3884#if ENABLE(YARR_JIT_BACKREFERENCES)
3885 && (compileMode == MatchOnly || (m_pattern.ignoreCase() && m_charSize != Char8))
3886#endif
3887 ) {
3888 codeBlock.setFallBackWithFailureReason(JITFailureReason::BackReference);
3889 return;
3890 }
3891
3892 // We need to compile before generating code since we set flags based on compilation that
3893 // are used during generation.
3894 opCompileBody(m_pattern.m_body);
3895
3896 if (m_failureReason) {
3897 codeBlock.setFallBackWithFailureReason(*m_failureReason);
3898 return;
3899 }
3900
3901 if (UNLIKELY(Options::dumpDisassembly() || Options::dumpRegExpDisassembly()))
3902 m_disassembler = std::make_unique<YarrDisassembler>(this);
3903
3904 if (m_disassembler)
3905 m_disassembler->setStartOfCode(label());
3906
3907#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
3908 if (m_containsNestedSubpatterns)
3909 codeBlock.setUsesPatternContextBuffer();
3910#endif
3911
3912 generateEnter();
3913
3914 Jump hasInput = checkInput();
3915 generateFailReturn();
3916 hasInput.link(this);
3917
3918#ifdef JIT_UNICODE_EXPRESSIONS
3919 if (m_decodeSurrogatePairs)
3920 getEffectiveAddress(BaseIndex(input, length, TimesTwo), endOfStringAddress);
3921#endif
3922
3923#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
3924 if (m_containsNestedSubpatterns)
3925 move(TrustedImm32(matchLimit), remainingMatchCount);
3926#endif
3927
3928 if (compileMode == IncludeSubpatterns) {
3929 for (unsigned i = 0; i < m_pattern.m_numSubpatterns + 1; ++i)
3930 store32(TrustedImm32(-1), Address(output, (i << 1) * sizeof(int)));
3931 }
3932
3933 if (!m_pattern.m_body->m_hasFixedSize)
3934 setMatchStart(index);
3935
3936 initCallFrame();
3937
3938#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
3939 if (m_containsNestedSubpatterns) {
3940 initParenContextFreeList();
3941 if (m_failureReason) {
3942 codeBlock.setFallBackWithFailureReason(*m_failureReason);
3943 return;
3944 }
3945 }
3946#endif
3947
3948 if (m_pattern.m_saveInitialStartValue) {
3949#ifdef HAVE_INITIAL_START_REG
3950 move(index, initialStart);
3951#else
3952 storeToFrame(index, m_pattern.m_initialStartValueFrameLocation);
3953#endif
3954 }
3955
3956 generate();
3957 if (m_disassembler)
3958 m_disassembler->setEndOfGenerate(label());
3959 backtrack();
3960 if (m_disassembler)
3961 m_disassembler->setEndOfBacktrack(label());
3962
3963 generateTryReadUnicodeCharacterHelper();
3964
3965 generateJITFailReturn();
3966
3967 if (m_disassembler)
3968 m_disassembler->setEndOfCode(label());
3969
3970 LinkBuffer linkBuffer(*this, REGEXP_CODE_ID, JITCompilationCanFail);
3971 if (linkBuffer.didFailToAllocate()) {
3972 codeBlock.setFallBackWithFailureReason(JITFailureReason::ExecutableMemoryAllocationFailure);
3973 return;
3974 }
3975
3976 if (!m_tryReadUnicodeCharacterCalls.isEmpty()) {
3977 CodeLocationLabel<NoPtrTag> tryReadUnicodeCharacterHelper = linkBuffer.locationOf<NoPtrTag>(m_tryReadUnicodeCharacterEntry);
3978
3979 for (auto call : m_tryReadUnicodeCharacterCalls)
3980 linkBuffer.link(call, tryReadUnicodeCharacterHelper);
3981 }
3982
3983 m_backtrackingState.linkDataLabels(linkBuffer);
3984
3985 if (m_disassembler)
3986 m_disassembler->dump(linkBuffer);
3987
3988 if (compileMode == MatchOnly) {
3989 if (m_charSize == Char8)
3990 codeBlock.set8BitCodeMatchOnly(FINALIZE_REGEXP_CODE(linkBuffer, YarrMatchOnly8BitPtrTag, "Match-only 8-bit regular expression"));
3991 else
3992 codeBlock.set16BitCodeMatchOnly(FINALIZE_REGEXP_CODE(linkBuffer, YarrMatchOnly16BitPtrTag, "Match-only 16-bit regular expression"));
3993 } else {
3994 if (m_charSize == Char8)
3995 codeBlock.set8BitCode(FINALIZE_REGEXP_CODE(linkBuffer, Yarr8BitPtrTag, "8-bit regular expression"));
3996 else
3997 codeBlock.set16BitCode(FINALIZE_REGEXP_CODE(linkBuffer, Yarr16BitPtrTag, "16-bit regular expression"));
3998 }
3999 if (m_failureReason)
4000 codeBlock.setFallBackWithFailureReason(*m_failureReason);
4001 }
4002
4003 const char* variant() override
4004 {
4005 if (compileMode == MatchOnly) {
4006 if (m_charSize == Char8)
4007 return "Match-only 8-bit regular expression";
4008
4009 return "Match-only 16-bit regular expression";
4010 }
4011
4012 if (m_charSize == Char8)
4013 return "8-bit regular expression";
4014
4015 return "16-bit regular expression";
4016 }
4017
4018 unsigned opCount() override
4019 {
4020 return m_ops.size();
4021 }
4022
4023 void dumpPatternString(PrintStream& out) override
4024 {
4025 m_pattern.dumpPatternString(out, m_patternString);
4026 }
4027
4028 int dumpFor(PrintStream& out, unsigned opIndex) override
4029 {
4030 if (opIndex >= opCount())
4031 return 0;
4032
4033 out.printf("%4d:", opIndex);
4034
4035 YarrOp& op = m_ops[opIndex];
4036 PatternTerm* term = op.m_term;
4037 switch (op.m_op) {
4038 case OpTerm: {
4039 out.print("OpTerm ");
4040 switch (term->type) {
4041 case PatternTerm::TypeAssertionBOL:
4042 out.print("Assert BOL");
4043 break;
4044
4045 case PatternTerm::TypeAssertionEOL:
4046 out.print("Assert EOL");
4047 break;
4048
4049 case PatternTerm::TypeBackReference:
4050 out.printf("BackReference pattern #%u", term->backReferenceSubpatternId);
4051 term->dumpQuantifier(out);
4052 break;
4053
4054 case PatternTerm::TypePatternCharacter:
4055 out.print("TypePatternCharacter ");
4056 dumpUChar32(out, term->patternCharacter);
4057 if (m_pattern.ignoreCase())
4058 out.print(" ignore case");
4059
4060 term->dumpQuantifier(out);
4061 break;
4062
4063 case PatternTerm::TypeCharacterClass:
4064 out.print("TypePatternCharacterClass ");
4065 if (term->invert())
4066 out.print("not ");
4067 dumpCharacterClass(out, &m_pattern, term->characterClass);
4068 term->dumpQuantifier(out);
4069 break;
4070
4071 case PatternTerm::TypeAssertionWordBoundary:
4072 out.printf("%sword boundary", term->invert() ? "non-" : "");
4073 break;
4074
4075 case PatternTerm::TypeDotStarEnclosure:
4076 out.print(".* enclosure");
4077 break;
4078
4079 case PatternTerm::TypeForwardReference:
4080 out.print("TypeForwardReference <not handled>");
4081 break;
4082
4083 case PatternTerm::TypeParenthesesSubpattern:
4084 case PatternTerm::TypeParentheticalAssertion:
4085 RELEASE_ASSERT_NOT_REACHED();
4086 break;
4087 }
4088
4089 if (op.m_isDeadCode)
4090 out.print(" already handled");
4091 out.print("\n");
4092 return(0);
4093 }
4094
4095 case OpBodyAlternativeBegin:
4096 out.printf("OpBodyAlternativeBegin minimum size %u\n", op.m_alternative->m_minimumSize);
4097 return(0);
4098
4099 case OpBodyAlternativeNext:
4100 out.printf("OpBodyAlternativeNext minimum size %u\n", op.m_alternative->m_minimumSize);
4101 return(0);
4102
4103 case OpBodyAlternativeEnd:
4104 out.print("OpBodyAlternativeEnd\n");
4105 return(0);
4106
4107 case OpSimpleNestedAlternativeBegin:
4108 out.printf("OpSimpleNestedAlternativeBegin minimum size %u\n", op.m_alternative->m_minimumSize);
4109 return(1);
4110
4111 case OpNestedAlternativeBegin:
4112 out.printf("OpNestedAlternativeBegin minimum size %u\n", op.m_alternative->m_minimumSize);
4113 return(1);
4114
4115 case OpSimpleNestedAlternativeNext:
4116 out.printf("OpSimpleNestedAlternativeNext minimum size %u\n", op.m_alternative->m_minimumSize);
4117 return(0);
4118
4119 case OpNestedAlternativeNext:
4120 out.printf("OpNestedAlternativeNext minimum size %u\n", op.m_alternative->m_minimumSize);
4121 return(0);
4122
4123 case OpSimpleNestedAlternativeEnd:
4124 out.print("OpSimpleNestedAlternativeEnd");
4125 term->dumpQuantifier(out);
4126 out.print("\n");
4127 return(-1);
4128
4129 case OpNestedAlternativeEnd:
4130 out.print("OpNestedAlternativeEnd");
4131 term->dumpQuantifier(out);
4132 out.print("\n");
4133 return(-1);
4134
4135 case OpParenthesesSubpatternOnceBegin:
4136 out.print("OpParenthesesSubpatternOnceBegin ");
4137 if (term->capture())
4138 out.printf("capturing pattern #%u", term->parentheses.subpatternId);
4139 else
4140 out.print("non-capturing");
4141 term->dumpQuantifier(out);
4142 out.print("\n");
4143 return(0);
4144
4145 case OpParenthesesSubpatternOnceEnd:
4146 out.print("OpParenthesesSubpatternOnceEnd ");
4147 if (term->capture())
4148 out.printf("capturing pattern #%u", term->parentheses.subpatternId);
4149 else
4150 out.print("non-capturing");
4151 term->dumpQuantifier(out);
4152 out.print("\n");
4153 return(0);
4154
4155 case OpParenthesesSubpatternTerminalBegin:
4156 out.print("OpParenthesesSubpatternTerminalBegin ");
4157 if (term->capture())
4158 out.printf("capturing pattern #%u\n", term->parentheses.subpatternId);
4159 else
4160 out.print("non-capturing\n");
4161 return(0);
4162
4163 case OpParenthesesSubpatternTerminalEnd:
4164 out.print("OpParenthesesSubpatternTerminalEnd ");
4165 if (term->capture())
4166 out.printf("capturing pattern #%u\n", term->parentheses.subpatternId);
4167 else
4168 out.print("non-capturing\n");
4169 return(0);
4170
4171 case OpParenthesesSubpatternBegin:
4172 out.print("OpParenthesesSubpatternBegin ");
4173 if (term->capture())
4174 out.printf("capturing pattern #%u", term->parentheses.subpatternId);
4175 else
4176 out.print("non-capturing");
4177 term->dumpQuantifier(out);
4178 out.print("\n");
4179 return(0);
4180
4181 case OpParenthesesSubpatternEnd:
4182 out.print("OpParenthesesSubpatternEnd ");
4183 if (term->capture())
4184 out.printf("capturing pattern #%u", term->parentheses.subpatternId);
4185 else
4186 out.print("non-capturing");
4187 term->dumpQuantifier(out);
4188 out.print("\n");
4189 return(0);
4190
4191 case OpParentheticalAssertionBegin:
4192 out.printf("OpParentheticalAssertionBegin%s\n", term->invert() ? " inverted" : "");
4193 return(0);
4194
4195 case OpParentheticalAssertionEnd:
4196 out.printf("OpParentheticalAssertionEnd%s\n", term->invert() ? " inverted" : "");
4197 return(0);
4198
4199 case OpMatchFailed:
4200 out.print("OpMatchFailed\n");
4201 return(0);
4202 }
4203
4204 return(0);
4205 }
4206
4207private:
4208 VM* m_vm;
4209
4210 YarrPattern& m_pattern;
4211 String& m_patternString;
4212
4213 YarrCodeBlock& m_codeBlock;
4214 YarrCharSize m_charSize;
4215
4216 // Used to detect regular expression constructs that are not currently
4217 // supported in the JIT; fall back to the interpreter when this is detected.
4218 Optional<JITFailureReason> m_failureReason;
4219
4220 bool m_decodeSurrogatePairs;
4221 bool m_unicodeIgnoreCase;
4222 bool m_fixedSizedAlternative;
4223 CanonicalMode m_canonicalMode;
4224#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
4225 bool m_containsNestedSubpatterns;
4226 ParenContextSizes m_parenContextSizes;
4227#endif
4228 JumpList m_abortExecution;
4229 JumpList m_hitMatchLimit;
4230 Vector<Call> m_tryReadUnicodeCharacterCalls;
4231 Label m_tryReadUnicodeCharacterEntry;
4232
4233 // The regular expression expressed as a linear sequence of operations.
4234 Vector<YarrOp, 128> m_ops;
4235
4236 // This records the current input offset being applied due to the current
4237 // set of alternatives we are nested within. E.g. when matching the
4238 // character 'b' within the regular expression /abc/, we will know that
4239 // the minimum size for the alternative is 3, checked upon entry to the
4240 // alternative, and that 'b' is at offset 1 from the start, and as such
4241 // when matching 'b' we need to apply an offset of -2 to the load.
4242 //
4243 // FIXME: This should go away. Rather than tracking this value throughout
4244 // code generation, we should gather this information up front & store it
4245 // on the YarrOp structure.
4246 Checked<unsigned> m_checkedOffset;
4247
4248 // This class records state whilst generating the backtracking path of code.
4249 BacktrackingState m_backtrackingState;
4250
4251 std::unique_ptr<YarrDisassembler> m_disassembler;
4252};
4253
4254static void dumpCompileFailure(JITFailureReason failure)
4255{
4256 switch (failure) {
4257 case JITFailureReason::DecodeSurrogatePair:
4258 dataLog("Can't JIT a pattern decoding surrogate pairs\n");
4259 break;
4260 case JITFailureReason::BackReference:
4261 dataLog("Can't JIT some patterns containing back references\n");
4262 break;
4263 case JITFailureReason::ForwardReference:
4264 dataLog("Can't JIT a pattern containing forward references\n");
4265 break;
4266 case JITFailureReason::VariableCountedParenthesisWithNonZeroMinimum:
4267 dataLog("Can't JIT a pattern containing a variable counted parenthesis with a non-zero minimum\n");
4268 break;
4269 case JITFailureReason::ParenthesizedSubpattern:
4270 dataLog("Can't JIT a pattern containing parenthesized subpatterns\n");
4271 break;
4272 case JITFailureReason::FixedCountParenthesizedSubpattern:
4273 dataLog("Can't JIT a pattern containing fixed count parenthesized subpatterns\n");
4274 break;
4275 case JITFailureReason::ParenthesisNestedTooDeep:
4276 dataLog("Can't JIT pattern due to parentheses nested too deeply\n");
4277 break;
4278 case JITFailureReason::ExecutableMemoryAllocationFailure:
4279 dataLog("Can't JIT because of failure of allocation of executable memory\n");
4280 break;
4281 }
4282}
4283
4284void jitCompile(YarrPattern& pattern, String& patternString, YarrCharSize charSize, VM* vm, YarrCodeBlock& codeBlock, YarrJITCompileMode mode)
4285{
4286 if (mode == MatchOnly)
4287 YarrGenerator<MatchOnly>(vm, pattern, patternString, codeBlock, charSize).compile();
4288 else
4289 YarrGenerator<IncludeSubpatterns>(vm, pattern, patternString, codeBlock, charSize).compile();
4290
4291 if (auto failureReason = codeBlock.failureReason()) {
4292 if (Options::dumpCompiledRegExpPatterns()) {
4293 pattern.dumpPatternString(WTF::dataFile(), patternString);
4294 dataLog(" : ");
4295 dumpCompileFailure(*failureReason);
4296 }
4297 }
4298}
4299
4300}}
4301
4302#endif
4303