YarrInterpreter.cpp source code [jsc/Source/JavaScriptCore/yarr/YarrInterpreter.cpp]

1	/*
2	* Copyright (C) 2009, 2013-2017 Apple Inc. All rights reserved.
3	* Copyright (C) 2010 Peter Varga ([email protected]), University of Szeged
4	*
5	* Redistribution and use in source and binary forms, with or without
6	* modification, are permitted provided that the following conditions
7	* are met:
8	* 1. Redistributions of source code must retain the above copyright
9	* notice, this list of conditions and the following disclaimer.
10	* 2. Redistributions in binary form must reproduce the above copyright
11	* notice, this list of conditions and the following disclaimer in the
12	* documentation and/or other materials provided with the distribution.
13	*
14	* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15	* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17	* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
18	* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19	* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20	* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21	* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22	* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24	* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25	*/
26
27	#include "config.h"
28	#include "YarrInterpreter.h"
29
30	#include "Options.h"
31	#include "SuperSampler.h"
32	#include "Yarr.h"
33	#include "YarrCanonicalize.h"
34	#include <wtf/BumpPointerAllocator.h>
35	#include <wtf/CheckedArithmetic.h>
36	#include <wtf/DataLog.h>
37	#include <wtf/text/CString.h>
38	#include <wtf/text/WTFString.h>
39
40	namespace JSC { namespace Yarr {
41
42	template<typename CharType>
43	class Interpreter {
44	public:
45	struct ParenthesesDisjunctionContext;
46
47	struct BackTrackInfoParentheses {
48	uintptr_t matchAmount;
49	ParenthesesDisjunctionContext* lastContext;
50	};
51
52	static inline void appendParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack, ParenthesesDisjunctionContext* context)
53	{
54	context->next = backTrack->lastContext;
55	backTrack->lastContext = context;
56	++backTrack->matchAmount;
57	}
58
59	static inline void popParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack)
60	{
61	RELEASE_ASSERT(backTrack->matchAmount);
62	RELEASE_ASSERT(backTrack->lastContext);
63	backTrack->lastContext = backTrack->lastContext->next;
64	--backTrack->matchAmount;
65	}
66
67	struct DisjunctionContext
68	{
69	DisjunctionContext() = default;
70
71	void* operator new(size_t, void* where)
72	{
73	return where;
74	}
75
76	static size_t allocationSize(unsigned numberOfFrames)
77	{
78	static_assert(alignof(DisjunctionContext) <= sizeof(void*), "");
79	size_t rawSize = (sizeof(DisjunctionContext) - sizeof(uintptr_t) + Checked<size_t>(numberOfFrames) * sizeof(uintptr_t)).unsafeGet();
80	size_t roundedSize = roundUpToMultipleOf<sizeof(void*)>(rawSize);
81	RELEASE_ASSERT(roundedSize >= rawSize);
82	return roundedSize;
83	}
84
85	int term { `0` };
86	unsigned matchBegin;
87	unsigned matchEnd;
88	uintptr_t frame[`1`];
89	};
90
91	DisjunctionContext* allocDisjunctionContext(ByteDisjunction* disjunction)
92	{
93	size_t size = DisjunctionContext::allocationSize(disjunction->m_frameSize);
94	allocatorPool = allocatorPool->ensureCapacity(size);
95	RELEASE_ASSERT(allocatorPool);
96	return new (allocatorPool->alloc(size)) DisjunctionContext();
97	}
98
99	void freeDisjunctionContext(DisjunctionContext* context)
100	{
101	allocatorPool = allocatorPool->dealloc(context);
102	}
103
104	struct ParenthesesDisjunctionContext
105	{
106	ParenthesesDisjunctionContext(unsigned* output, ByteTerm& term)
107	{
108	unsigned firstSubpatternId = term.atom.subpatternId;
109	unsigned numNestedSubpatterns = term.atom.parenthesesDisjunction->m_numSubpatterns;
110
111	for (unsigned i = `0`; i < (numNestedSubpatterns << `1`); ++i) {
112	subpatternBackup[i] = output[(firstSubpatternId << `1`) + i];
113	output[(firstSubpatternId << `1`) + i] = offsetNoMatch;
114	}
115
116	new (getDisjunctionContext(term)) DisjunctionContext();
117	}
118
119	void* operator new(size_t, void* where)
120	{
121	return where;
122	}
123
124	void restoreOutput(unsigned* output, unsigned firstSubpatternId, unsigned numNestedSubpatterns)
125	{
126	for (unsigned i = `0`; i < (numNestedSubpatterns << `1`); ++i)
127	output[(firstSubpatternId << `1`) + i] = subpatternBackup[i];
128	}
129
130	DisjunctionContext* getDisjunctionContext(ByteTerm& term)
131	{
132	return bitwise_cast<DisjunctionContext>(bitwise_cast<uintptr_t>(this*) + allocationSize(term.atom.parenthesesDisjunction->m_numSubpatterns));
133	}
134
135	static size_t allocationSize(unsigned numberOfSubpatterns)
136	{
137	static_assert(alignof(ParenthesesDisjunctionContext) <= sizeof(void*), "");
138	size_t rawSize = (sizeof(ParenthesesDisjunctionContext) - sizeof(unsigned) + (Checked<size_t>(numberOfSubpatterns) * `2U`) * sizeof(unsigned)).unsafeGet();
139	size_t roundedSize = roundUpToMultipleOf<sizeof(void*)>(rawSize);
140	RELEASE_ASSERT(roundedSize >= rawSize);
141	return roundedSize;
142	}
143
144	ParenthesesDisjunctionContext* next { nullptr };
145	unsigned subpatternBackup[`1`];
146	};
147
148	ParenthesesDisjunctionContext* allocParenthesesDisjunctionContext(ByteDisjunction* disjunction, unsigned* output, ByteTerm& term)
149	{
150	size_t size = (Checked<size_t>(ParenthesesDisjunctionContext::allocationSize(term.atom.parenthesesDisjunction->m_numSubpatterns)) + DisjunctionContext::allocationSize(disjunction->m_frameSize)).unsafeGet();
151	allocatorPool = allocatorPool->ensureCapacity(size);
152	RELEASE_ASSERT(allocatorPool);
153	return new (allocatorPool->alloc(size)) ParenthesesDisjunctionContext(output, term);
154	}
155
156	void freeParenthesesDisjunctionContext(ParenthesesDisjunctionContext* context)
157	{
158	allocatorPool = allocatorPool->dealloc(context);
159	}
160
161	class InputStream {
162	public:
163	InputStream(const CharType* input, unsigned start, unsigned length, bool decodeSurrogatePairs)
164	: input(input)
165	, pos(start)
166	, length(length)
167	, decodeSurrogatePairs(decodeSurrogatePairs)
168	{
169	}
170
171	void next()
172	{
173	++pos;
174	}
175
176	void rewind(unsigned amount)
177	{
178	ASSERT(pos >= amount);
179	pos -= amount;
180	}
181
182	int read()
183	{
184	ASSERT(pos < length);
185	if (pos < length)
186	return input[pos];
187	return -`1`;
188	}
189
190	int readPair()
191	{
192	ASSERT(pos + `1` < length);
193	return input[pos] \| input[pos + `1`] << `16`;
194	}
195
196	int readChecked(unsigned negativePositionOffest)
197	{
198	RELEASE_ASSERT(pos >= negativePositionOffest);
199	unsigned p = pos - negativePositionOffest;
200	ASSERT(p < length);
201	int result = input[p];
202	if (U16_IS_LEAD(result) && decodeSurrogatePairs && p + `1` < length && U16_IS_TRAIL(input[p + `1`])) {
203	if (atEnd())
204	return -`1`;
205
206	result = U16_GET_SUPPLEMENTARY(result, input[p + `1`]);
207	next();
208	}
209	return result;
210	}
211
212	int readSurrogatePairChecked(unsigned negativePositionOffset)
213	{
214	RELEASE_ASSERT(pos >= negativePositionOffset);
215	unsigned p = pos - negativePositionOffset;
216	ASSERT(p < length);
217	if (p + `1` >= length)
218	return -`1`;
219
220	int first = input[p];
221	int second = input[p + `1`];
222	if (U16_IS_LEAD(first) && U16_IS_TRAIL(second))
223	return U16_GET_SUPPLEMENTARY(first, second);
224
225	return -`1`;
226	}
227
228	int reread(unsigned from)
229	{
230	ASSERT(from < length);
231	int result = input[from];
232	if (U16_IS_LEAD(result) && decodeSurrogatePairs && from + `1` < length && U16_IS_TRAIL(input[from + `1`]))
233	result = U16_GET_SUPPLEMENTARY(result, input[from + `1`]);
234	return result;
235	}
236
237	int prev()
238	{
239	ASSERT(!(pos > length));
240	if (pos && length)
241	return input[pos - `1`];
242	return -`1`;
243	}
244
245	unsigned getPos()
246	{
247	return pos;
248	}
249
250	void setPos(unsigned p)
251	{
252	pos = p;
253	}
254
255	bool atStart()
256	{
257	return pos == `0`;
258	}
259
260	bool atEnd()
261	{
262	return pos == length;
263	}
264
265	unsigned end()
266	{
267	return length;
268	}
269
270	bool checkInput(unsigned count)
271	{
272	if (((pos + count) <= length) && ((pos + count) >= pos)) {
273	pos += count;
274	return true;
275	}
276	return false;
277	}
278
279	void uncheckInput(unsigned count)
280	{
281	RELEASE_ASSERT(pos >= count);
282	pos -= count;
283	}
284
285	bool atStart(unsigned negativePositionOffset)
286	{
287	return pos == negativePositionOffset;
288	}
289
290	bool atEnd(unsigned negativePositionOffest)
291	{
292	RELEASE_ASSERT(pos >= negativePositionOffest);
293	return (pos - negativePositionOffest) == length;
294	}
295
296	bool isAvailableInput(unsigned offset)
297	{
298	return (((pos + offset) <= length) && ((pos + offset) >= pos));
299	}
300
301	private:
302	const CharType* input;
303	unsigned pos;
304	unsigned length;
305	bool decodeSurrogatePairs;
306	};
307
308	bool testCharacterClass(CharacterClass* characterClass, int ch)
309	{
310	auto linearSearchMatches = [&ch](const Vector<UChar32>& matches) {
311	for (unsigned i = `0`; i < matches.size(); ++i) {
312	if (ch == matches [i])
313	return true;
314	}
315
316	return false;
317	};
318
319	auto binarySearchMatches = [&ch](const Vector<UChar32>& matches) {
320	size_t low = `0`;
321	size_t high = matches.size() - `1`;
322
323	while (low <= high) {
324	size_t mid = low + (high - low) / `2`;
325	int diff = ch - matches [mid];
326	if (!diff)
327	return true;
328
329	if (diff < `0`) {
330	if (mid == low)
331	return false;
332	high = mid - `1`;
333	} else
334	low = mid + `1`;
335	}
336	return false;
337	};
338
339	auto linearSearchRanges = [&ch](const Vector<CharacterRange>& ranges) {
340	for (unsigned i = `0`; i < ranges.size(); ++i) {
341	if ((ch >= ranges [i].begin) && (ch <= ranges [i].end))
342	return true;
343	}
344
345	return false;
346	};
347
348	auto binarySearchRanges = [&ch](const Vector<CharacterRange>& ranges) {
349	size_t low = `0`;
350	size_t high = ranges.size() - `1`;
351
352	while (low <= high) {
353	size_t mid = low + (high - low) / `2`;
354	int rangeBeginDiff = ch - ranges [mid].begin;
355	if (rangeBeginDiff >= `0` && ch <= ranges [mid].end)
356	return true;
357
358	if (rangeBeginDiff < `0`) {
359	if (mid == low)
360	return false;
361	high = mid - `1`;
362	} else
363	low = mid + `1`;
364	}
365	return false;
366	};
367
368	if (characterClass->m_anyCharacter)
369	return true;
370
371	const size_t thresholdForBinarySearch = `6`;
372
373	if (!isASCII(ch)) {
374	if (characterClass->m_matchesUnicode.size()) {
375	if (characterClass->m_matchesUnicode.size() > thresholdForBinarySearch) {
376	if (binarySearchMatches(characterClass->m_matchesUnicode))
377	return true;
378	} else if (linearSearchMatches(characterClass->m_matchesUnicode))
379	return true;
380	}
381
382	if (characterClass->m_rangesUnicode.size()) {
383	if (characterClass->m_rangesUnicode.size() > thresholdForBinarySearch) {
384	if (binarySearchRanges(characterClass->m_rangesUnicode))
385	return true;
386	} else if (linearSearchRanges(characterClass->m_rangesUnicode))
387	return true;
388	}
389	} else {
390	if (characterClass->m_matches.size()) {
391	if (characterClass->m_matches.size() > thresholdForBinarySearch) {
392	if (binarySearchMatches(characterClass->m_matches))
393	return true;
394	} else if (linearSearchMatches(characterClass->m_matches))
395	return true;
396	}
397
398	if (characterClass->m_ranges.size()) {
399	if (characterClass->m_ranges.size() > thresholdForBinarySearch) {
400	if (binarySearchRanges(characterClass->m_ranges))
401	return true;
402	} else if (linearSearchRanges(characterClass->m_ranges))
403	return true;
404	}
405	}
406
407	return false;
408	}
409
410	bool checkCharacter(int testChar, unsigned negativeInputOffset)
411	{
412	return testChar == input.readChecked(negativeInputOffset);
413	}
414
415	bool checkSurrogatePair(int testUnicodeChar, unsigned negativeInputOffset)
416	{
417	return testUnicodeChar == input.readSurrogatePairChecked(negativeInputOffset);
418	}
419
420	bool checkCasedCharacter(int loChar, int hiChar, unsigned negativeInputOffset)
421	{
422	int ch = input.readChecked(negativeInputOffset);
423	return (loChar == ch) \|\| (hiChar == ch);
424	}
425
426	bool checkCharacterClass(CharacterClass* characterClass, bool invert, unsigned negativeInputOffset)
427	{
428	bool match = testCharacterClass(characterClass, input.readChecked(negativeInputOffset));
429	return invert ? !match : match;
430	}
431
432	bool checkCharacterClassDontAdvanceInputForNonBMP(CharacterClass* characterClass, unsigned negativeInputOffset)
433	{
434	int readCharacter = characterClass->hasOnlyNonBMPCharacters() ? input.readSurrogatePairChecked(negativeInputOffset) : input.readChecked(negativeInputOffset);
435	return testCharacterClass(characterClass, readCharacter);
436	}
437
438	bool tryConsumeBackReference(int matchBegin, int matchEnd, unsigned negativeInputOffset)
439	{
440	unsigned matchSize = (unsigned)(matchEnd - matchBegin);
441
442	if (!input.checkInput(matchSize))
443	return false;
444
445	for (unsigned i = `0`; i < matchSize; ++i) {
446	int oldCh = input.reread(matchBegin + i);
447	int ch;
448	if (!U_IS_BMP(oldCh)) {
449	ch = input.readSurrogatePairChecked(negativeInputOffset + matchSize - i);
450	++i;
451	} else
452	ch = input.readChecked(negativeInputOffset + matchSize - i);
453
454	if (oldCh == ch)
455	continue;
456
457	if (pattern->ignoreCase()) {
458	// See ES 6.0, 21.2.2.8.2 for the definition of Canonicalize(). For non-Unicode
459	// patterns, Unicode values are never allowed to match against ASCII ones.
460	// For Unicode, we need to check all canonical equivalents of a character.
461	if (!unicode && (isASCII(oldCh) \|\| isASCII(ch))) {
462	if (toASCIIUpper(oldCh) == toASCIIUpper(ch))
463	continue;
464	} else if (areCanonicallyEquivalent(oldCh, ch, unicode ? CanonicalMode::Unicode : CanonicalMode::UCS2))
465	continue;
466	}
467
468	input.uncheckInput(matchSize);
469	return false;
470	}
471
472	return true;
473	}
474
475	bool matchAssertionBOL(ByteTerm& term)
476	{
477	return (input.atStart(term.inputPosition)) \|\| (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition + `1`)));
478	}
479
480	bool matchAssertionEOL(ByteTerm& term)
481	{
482	if (term.inputPosition)
483	return (input.atEnd(term.inputPosition)) \|\| (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition)));
484
485	return (input.atEnd()) \|\| (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.read()));
486	}
487
488	bool matchAssertionWordBoundary(ByteTerm& term)
489	{
490	bool prevIsWordchar = !input.atStart(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition + `1`));
491	bool readIsWordchar;
492	if (term.inputPosition)
493	readIsWordchar = !input.atEnd(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition));
494	else
495	readIsWordchar = !input.atEnd() && testCharacterClass(pattern->wordcharCharacterClass, input.read());
496
497	bool wordBoundary = prevIsWordchar != readIsWordchar;
498	return term.invert() ? !wordBoundary : wordBoundary;
499	}
500
501	bool backtrackPatternCharacter(ByteTerm& term, DisjunctionContext* context)
502	{
503	BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
504
505	switch (term.atom.quantityType) {
506	case QuantifierFixedCount:
507	break;
508
509	case QuantifierGreedy:
510	if (backTrack->matchAmount) {
511	--backTrack->matchAmount;
512	input.uncheckInput(U16_LENGTH(term.atom.patternCharacter));
513	return true;
514	}
515	break;
516
517	case QuantifierNonGreedy:
518	if ((backTrack->matchAmount < term.atom.quantityMaxCount) && input.checkInput(`1`)) {
519	++backTrack->matchAmount;
520	if (checkCharacter(term.atom.patternCharacter, term.inputPosition + `1`))
521	return true;
522	}
523	input.setPos(backTrack->begin);
524	break;
525	}
526
527	return false;
528	}
529
530	bool backtrackPatternCasedCharacter(ByteTerm& term, DisjunctionContext* context)
531	{
532	BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
533
534	switch (term.atom.quantityType) {
535	case QuantifierFixedCount:
536	break;
537
538	case QuantifierGreedy:
539	if (backTrack->matchAmount) {
540	--backTrack->matchAmount;
541	input.uncheckInput(`1`);
542	return true;
543	}
544	break;
545
546	case QuantifierNonGreedy:
547	if ((backTrack->matchAmount < term.atom.quantityMaxCount) && input.checkInput(`1`)) {
548	++backTrack->matchAmount;
549	if (checkCasedCharacter(term.atom.casedCharacter.lo, term.atom.casedCharacter.hi, term.inputPosition + `1`))
550	return true;
551	}
552	input.uncheckInput(backTrack->matchAmount);
553	break;
554	}
555
556	return false;
557	}
558
559	bool matchCharacterClass(ByteTerm& term, DisjunctionContext* context)
560	{
561	ASSERT(term.type == ByteTerm::TypeCharacterClass);
562	BackTrackInfoCharacterClass* backTrack = reinterpret_cast<BackTrackInfoCharacterClass*>(context->frame + term.frameLocation);
563
564	switch (term.atom.quantityType) {
565	case QuantifierFixedCount: {
566	if (unicode) {
567	CharacterClass* charClass = term.atom.characterClass;
568	backTrack->begin = input.getPos();
569	unsigned matchAmount = `0`;
570	for (matchAmount = `0`; matchAmount < term.atom.quantityMaxCount; ++matchAmount) {
571	if (term.invert()) {
572	if (!checkCharacterClass(charClass, term.invert(), term.inputPosition - matchAmount)) {
573	input.setPos(backTrack->begin);
574	return false;
575	}
576	} else {
577	unsigned matchOffset = matchAmount * (charClass->hasOnlyNonBMPCharacters() ? `2` : `1`);
578	if (!checkCharacterClassDontAdvanceInputForNonBMP(charClass, term.inputPosition - matchOffset)) {
579	input.setPos(backTrack->begin);
580	return false;
581	}
582	}
583	}
584
585	return true;
586	}
587
588	for (unsigned matchAmount = `0`; matchAmount < term.atom.quantityMaxCount; ++matchAmount) {
589	if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - matchAmount))
590	return false;
591	}
592	return true;
593	}
594
595	case QuantifierGreedy: {
596	unsigned position = input.getPos();
597	backTrack->begin = position;
598	unsigned matchAmount = `0`;
599	while ((matchAmount < term.atom.quantityMaxCount) && input.checkInput(`1`)) {
600	if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + `1`)) {
601	input.setPos(position);
602	break;
603	}
604	++matchAmount;
605	position = input.getPos();
606	}
607	backTrack->matchAmount = matchAmount;
608
609	return true;
610	}
611
612	case QuantifierNonGreedy:
613	backTrack->begin = input.getPos();
614	backTrack->matchAmount = `0`;
615	return true;
616	}
617
618	RELEASE_ASSERT_NOT_REACHED();
619	return false;
620	}
621
622	bool backtrackCharacterClass(ByteTerm& term, DisjunctionContext* context)
623	{
624	ASSERT(term.type == ByteTerm::TypeCharacterClass);
625	BackTrackInfoCharacterClass* backTrack = reinterpret_cast<BackTrackInfoCharacterClass*>(context->frame + term.frameLocation);
626
627	switch (term.atom.quantityType) {
628	case QuantifierFixedCount:
629	if (unicode)
630	input.setPos(backTrack->begin);
631	break;
632
633	case QuantifierGreedy:
634	if (backTrack->matchAmount) {
635	if (unicode) {
636	// Rematch one less match
637	input.setPos(backTrack->begin);
638	--backTrack->matchAmount;
639	for (unsigned matchAmount = `0`; (matchAmount < backTrack->matchAmount) && input.checkInput(`1`); ++matchAmount) {
640	if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + `1`)) {
641	input.uncheckInput(`1`);
642	break;
643	}
644	}
645	return true;
646	}
647	--backTrack->matchAmount;
648	input.uncheckInput(`1`);
649	return true;
650	}
651	break;
652
653	case QuantifierNonGreedy:
654	if ((backTrack->matchAmount < term.atom.quantityMaxCount) && input.checkInput(`1`)) {
655	++backTrack->matchAmount;
656	if (checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + `1`))
657	return true;
658	}
659	input.setPos(backTrack->begin);
660	break;
661	}
662
663	return false;
664	}
665
666	bool matchBackReference(ByteTerm& term, DisjunctionContext* context)
667	{
668	ASSERT(term.type == ByteTerm::TypeBackReference);
669	BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
670
671	unsigned matchBegin = output[(term.atom.subpatternId << `1`)];
672	unsigned matchEnd = output[(term.atom.subpatternId << `1`) + `1`];
673
674	// If the end position of the referenced match hasn't set yet then the backreference in the same parentheses where it references to that.
675	// In this case the result of match is empty string like when it references to a parentheses with zero-width match.
676	// Eg.: /(a\1)/
677	if (matchEnd == offsetNoMatch)
678	return true;
679
680	if (matchBegin == offsetNoMatch)
681	return true;
682
683	ASSERT(matchBegin <= matchEnd);
684
685	if (matchBegin == matchEnd)
686	return true;
687
688	switch (term.atom.quantityType) {
689	case QuantifierFixedCount: {
690	backTrack->begin = input.getPos();
691	for (unsigned matchAmount = `0`; matchAmount < term.atom.quantityMaxCount; ++matchAmount) {
692	if (!tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
693	input.setPos(backTrack->begin);
694	return false;
695	}
696	}
697	return true;
698	}
699
700	case QuantifierGreedy: {
701	unsigned matchAmount = `0`;
702	while ((matchAmount < term.atom.quantityMaxCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition))
703	++matchAmount;
704	backTrack->matchAmount = matchAmount;
705	return true;
706	}
707
708	case QuantifierNonGreedy:
709	backTrack->begin = input.getPos();
710	backTrack->matchAmount = `0`;
711	return true;
712	}
713
714	RELEASE_ASSERT_NOT_REACHED();
715	return false;
716	}
717
718	bool backtrackBackReference(ByteTerm& term, DisjunctionContext* context)
719	{
720	ASSERT(term.type == ByteTerm::TypeBackReference);
721	BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
722
723	unsigned matchBegin = output[(term.atom.subpatternId << `1`)];
724	unsigned matchEnd = output[(term.atom.subpatternId << `1`) + `1`];
725
726	if (matchBegin == offsetNoMatch)
727	return false;
728
729	ASSERT(matchBegin <= matchEnd);
730
731	if (matchBegin == matchEnd)
732	return false;
733
734	switch (term.atom.quantityType) {
735	case QuantifierFixedCount:
736	// for quantityMaxCount == 1, could rewind.
737	input.setPos(backTrack->begin);
738	break;
739
740	case QuantifierGreedy:
741	if (backTrack->matchAmount) {
742	--backTrack->matchAmount;
743	input.rewind(matchEnd - matchBegin);
744	return true;
745	}
746	break;
747
748	case QuantifierNonGreedy:
749	if ((backTrack->matchAmount < term.atom.quantityMaxCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
750	++backTrack->matchAmount;
751	return true;
752	}
753	input.setPos(backTrack->begin);
754	break;
755	}
756
757	return false;
758	}
759
760	void recordParenthesesMatch(ByteTerm& term, ParenthesesDisjunctionContext* context)
761	{
762	if (term.capture()) {
763	unsigned subpatternId = term.atom.subpatternId;
764	output[(subpatternId << `1`)] = context->getDisjunctionContext(term)->matchBegin - term.inputPosition;
765	output[(subpatternId << `1`) + `1`] = context->getDisjunctionContext(term)->matchEnd - term.inputPosition;
766	}
767	}
768	void resetMatches(ByteTerm& term, ParenthesesDisjunctionContext* context)
769	{
770	unsigned firstSubpatternId = term.atom.subpatternId;
771	unsigned count = term.atom.parenthesesDisjunction->m_numSubpatterns;
772	context->restoreOutput(output, firstSubpatternId, count);
773	}
774	JSRegExpResult parenthesesDoBacktrack(ByteTerm& term, BackTrackInfoParentheses* backTrack)
775	{
776	while (backTrack->matchAmount) {
777	ParenthesesDisjunctionContext* context = backTrack->lastContext;
778
779	JSRegExpResult result = matchDisjunction(term.atom.parenthesesDisjunction, context->getDisjunctionContext(term), true);
780	if (result == JSRegExpMatch)
781	return JSRegExpMatch;
782
783	resetMatches(term, context);
784	popParenthesesDisjunctionContext(backTrack);
785	freeParenthesesDisjunctionContext(context);
786
787	if (result != JSRegExpNoMatch)
788	return result;
789	}
790
791	return JSRegExpNoMatch;
792	}
793
794	bool matchParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
795	{
796	ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
797	ASSERT(term.atom.quantityMaxCount == `1`);
798
799	BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
800
801	switch (term.atom.quantityType) {
802	case QuantifierGreedy: {
803	// set this speculatively; if we get to the parens end this will be true.
804	backTrack->begin = input.getPos();
805	break;
806	}
807	case QuantifierNonGreedy: {
808	backTrack->begin = notFound;
809	context->term += term.atom.parenthesesWidth;
810	return true;
811	}
812	case QuantifierFixedCount:
813	break;
814	}
815
816	if (term.capture()) {
817	unsigned subpatternId = term.atom.subpatternId;
818	output[(subpatternId << `1`)] = input.getPos() - term.inputPosition;
819	}
820
821	return true;
822	}
823
824	bool matchParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
825	{
826	ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
827	ASSERT(term.atom.quantityMaxCount == `1`);
828
829	if (term.capture()) {
830	unsigned subpatternId = term.atom.subpatternId;
831	output[(subpatternId << `1`) + `1`] = input.getPos() - term.inputPosition;
832	}
833
834	if (term.atom.quantityType == QuantifierFixedCount)
835	return true;
836
837	BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
838	return backTrack->begin != input.getPos();
839	}
840
841	bool backtrackParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
842	{
843	ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
844	ASSERT(term.atom.quantityMaxCount == `1`);
845
846	BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
847
848	if (term.capture()) {
849	unsigned subpatternId = term.atom.subpatternId;
850	output[(subpatternId << `1`)] = offsetNoMatch;
851	output[(subpatternId << `1`) + `1`] = offsetNoMatch;
852	}
853
854	switch (term.atom.quantityType) {
855	case QuantifierGreedy:
856	// if we backtrack to this point, there is another chance - try matching nothing.
857	ASSERT(backTrack->begin != notFound);
858	backTrack->begin = notFound;
859	context->term += term.atom.parenthesesWidth;
860	return true;
861	case QuantifierNonGreedy:
862	ASSERT(backTrack->begin != notFound);
863	FALLTHROUGH;
864	case QuantifierFixedCount:
865	break;
866	}
867
868	return false;
869	}
870
871	bool backtrackParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
872	{
873	ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
874	ASSERT(term.atom.quantityMaxCount == `1`);
875
876	BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
877
878	switch (term.atom.quantityType) {
879	case QuantifierGreedy:
880	if (backTrack->begin == notFound) {
881	context->term -= term.atom.parenthesesWidth;
882	return false;
883	}
884	FALLTHROUGH;
885	case QuantifierNonGreedy:
886	if (backTrack->begin == notFound) {
887	backTrack->begin = input.getPos();
888	if (term.capture()) {
889	// Technically this access to inputPosition should be accessing the begin term's
890	// inputPosition, but for repeats other than fixed these values should be
891	// the same anyway! (We don't pre-check for greedy or non-greedy matches.)
892	ASSERT((&term - term.atom.parenthesesWidth)->type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
893	ASSERT((&term - term.atom.parenthesesWidth)->inputPosition == term.inputPosition);
894	unsigned subpatternId = term.atom.subpatternId;
895	output[subpatternId << `1`] = input.getPos() - term.inputPosition;
896	}
897	context->term -= term.atom.parenthesesWidth;
898	return true;
899	}
900	FALLTHROUGH;
901	case QuantifierFixedCount:
902	break;
903	}
904
905	return false;
906	}
907
908	bool matchParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
909	{
910	ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
911	ASSERT(term.atom.quantityType == QuantifierGreedy);
912	ASSERT(term.atom.quantityMaxCount == quantifyInfinite);
913	ASSERT(!term.capture());
914
915	BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
916	backTrack->begin = input.getPos();
917	return true;
918	}
919
920	bool matchParenthesesTerminalEnd(ByteTerm& term, DisjunctionContext* context)
921	{
922	ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalEnd);
923
924	BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
925	// Empty match is a failed match.
926	if (backTrack->begin == input.getPos())
927	return false;
928
929	// Successful match! Okay, what's next? - loop around and try to match more!
930	context->term -= (term.atom.parenthesesWidth + `1`);
931	return true;
932	}
933
934	bool backtrackParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
935	{
936	ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
937	ASSERT(term.atom.quantityType == QuantifierGreedy);
938	ASSERT(term.atom.quantityMaxCount == quantifyInfinite);
939	ASSERT(!term.capture());
940
941	// If we backtrack to this point, we have failed to match this iteration of the parens.
942	// Since this is greedy / zero minimum a failed is also accepted as a match!
943	context->term += term.atom.parenthesesWidth;
944	return true;
945	}
946
947	bool backtrackParenthesesTerminalEnd(ByteTerm&, DisjunctionContext*)
948	{
949	// 'Terminal' parentheses are at the end of the regex, and as such a match past end
950	// should always be returned as a successful match - we should never backtrack to here.
951	RELEASE_ASSERT_NOT_REACHED();
952	return false;
953	}
954
955	bool matchParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
956	{
957	ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
958	ASSERT(term.atom.quantityMaxCount == `1`);
959
960	BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
961
962	backTrack->begin = input.getPos();
963	return true;
964	}
965
966	bool matchParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
967	{
968	ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
969	ASSERT(term.atom.quantityMaxCount == `1`);
970
971	BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
972
973	input.setPos(backTrack->begin);
974
975	// We've reached the end of the parens; if they are inverted, this is failure.
976	if (term.invert()) {
977	context->term -= term.atom.parenthesesWidth;
978	return false;
979	}
980
981	return true;
982	}
983
984	bool backtrackParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
985	{
986	ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
987	ASSERT(term.atom.quantityMaxCount == `1`);
988
989	// We've failed to match parens; if they are inverted, this is win!
990	if (term.invert()) {
991	context->term += term.atom.parenthesesWidth;
992	return true;
993	}
994
995	return false;
996	}
997
998	bool backtrackParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
999	{
1000	ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
1001	ASSERT(term.atom.quantityMaxCount == `1`);
1002
1003	BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
1004
1005	input.setPos(backTrack->begin);
1006
1007	context->term -= term.atom.parenthesesWidth;
1008	return false;
1009	}
1010
1011	JSRegExpResult matchParentheses(ByteTerm& term, DisjunctionContext* context)
1012	{
1013	ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
1014
1015	BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
1016	ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
1017
1018	backTrack->matchAmount = `0`;
1019	backTrack->lastContext = `0`;
1020
1021	ASSERT(term.atom.quantityType != QuantifierFixedCount \|\| term.atom.quantityMinCount == term.atom.quantityMaxCount);
1022
1023	unsigned minimumMatchCount = term.atom.quantityMinCount;
1024	JSRegExpResult fixedMatchResult;
1025
1026	// Handle fixed matches and the minimum part of a variable length match.
1027	if (minimumMatchCount) {
1028	// While we haven't yet reached our fixed limit,
1029	while (backTrack->matchAmount < minimumMatchCount) {
1030	// Try to do a match, and it it succeeds, add it to the list.
1031	ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
1032	fixedMatchResult = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
1033	if (fixedMatchResult == JSRegExpMatch)
1034	appendParenthesesDisjunctionContext(backTrack, context);
1035	else {
1036	// The match failed; try to find an alternate point to carry on from.
1037	resetMatches(term, context);
1038	freeParenthesesDisjunctionContext(context);
1039
1040	if (fixedMatchResult != JSRegExpNoMatch)
1041	return fixedMatchResult;
1042	JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
1043	if (backtrackResult != JSRegExpMatch)
1044	return backtrackResult;
1045	}
1046	}
1047
1048	ParenthesesDisjunctionContext* context = backTrack->lastContext;
1049	recordParenthesesMatch(term, context);
1050	}
1051
1052	switch (term.atom.quantityType) {
1053	case QuantifierFixedCount: {
1054	ASSERT(backTrack->matchAmount == term.atom.quantityMaxCount);
1055	return JSRegExpMatch;
1056	}
1057
1058	case QuantifierGreedy: {
1059	while (backTrack->matchAmount < term.atom.quantityMaxCount) {
1060	ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
1061	JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
1062	if (result == JSRegExpMatch)
1063	appendParenthesesDisjunctionContext(backTrack, context);
1064	else {
1065	resetMatches(term, context);
1066	freeParenthesesDisjunctionContext(context);
1067
1068	if (result != JSRegExpNoMatch)
1069	return result;
1070
1071	break;
1072	}
1073	}
1074
1075	if (backTrack->matchAmount) {
1076	ParenthesesDisjunctionContext* context = backTrack->lastContext;
1077	recordParenthesesMatch(term, context);
1078	}
1079	return JSRegExpMatch;
1080	}
1081
1082	case QuantifierNonGreedy:
1083	return JSRegExpMatch;
1084	}
1085
1086	RELEASE_ASSERT_NOT_REACHED();
1087	return JSRegExpErrorNoMatch;
1088	}
1089
1090	// Rules for backtracking differ depending on whether this is greedy or non-greedy.
1091	//
1092	// Greedy matches never should try just adding more - you should already have done
1093	// the 'more' cases. Always backtrack, at least a leetle bit. However cases where
1094	// you backtrack an item off the list needs checking, since we'll never have matched
1095	// the one less case. Tracking forwards, still add as much as possible.
1096	//
1097	// Non-greedy, we've already done the one less case, so don't match on popping.
1098	// We haven't done the one more case, so always try to add that.
1099	//
1100	JSRegExpResult backtrackParentheses(ByteTerm& term, DisjunctionContext* context)
1101	{
1102	ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
1103
1104	BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
1105	ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
1106
1107	switch (term.atom.quantityType) {
1108	case QuantifierFixedCount: {
1109	ASSERT(backTrack->matchAmount == term.atom.quantityMaxCount);
1110
1111	ParenthesesDisjunctionContext* context = `0`;
1112	JSRegExpResult result = parenthesesDoBacktrack(term, backTrack);
1113
1114	if (result != JSRegExpMatch)
1115	return result;
1116
1117	// While we haven't yet reached our fixed limit,
1118	while (backTrack->matchAmount < term.atom.quantityMaxCount) {
1119	// Try to do a match, and it it succeeds, add it to the list.
1120	context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
1121	result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
1122
1123	if (result == JSRegExpMatch)
1124	appendParenthesesDisjunctionContext(backTrack, context);
1125	else {
1126	// The match failed; try to find an alternate point to carry on from.
1127	resetMatches(term, context);
1128	freeParenthesesDisjunctionContext(context);
1129
1130	if (result != JSRegExpNoMatch)
1131	return result;
1132	JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
1133	if (backtrackResult != JSRegExpMatch)
1134	return backtrackResult;
1135	}
1136	}
1137
1138	ASSERT(backTrack->matchAmount == term.atom.quantityMaxCount);
1139	context = backTrack->lastContext;
1140	recordParenthesesMatch(term, context);
1141	return JSRegExpMatch;
1142	}
1143
1144	case QuantifierGreedy: {
1145	if (!backTrack->matchAmount)
1146	return JSRegExpNoMatch;
1147
1148	ParenthesesDisjunctionContext* context = backTrack->lastContext;
1149	JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
1150	if (result == JSRegExpMatch) {
1151	while (backTrack->matchAmount < term.atom.quantityMaxCount) {
1152	ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
1153	JSRegExpResult parenthesesResult = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
1154	if (parenthesesResult == JSRegExpMatch)
1155	appendParenthesesDisjunctionContext(backTrack, context);
1156	else {
1157	resetMatches(term, context);
1158	freeParenthesesDisjunctionContext(context);
1159
1160	if (parenthesesResult != JSRegExpNoMatch)
1161	return parenthesesResult;
1162
1163	break;
1164	}
1165	}
1166	} else {
1167	resetMatches(term, context);
1168	popParenthesesDisjunctionContext(backTrack);
1169	freeParenthesesDisjunctionContext(context);
1170
1171	if (result != JSRegExpNoMatch \|\| backTrack->matchAmount < term.atom.quantityMinCount)
1172	return result;
1173	}
1174
1175	if (backTrack->matchAmount) {
1176	ParenthesesDisjunctionContext* context = backTrack->lastContext;
1177	recordParenthesesMatch(term, context);
1178	}
1179	return JSRegExpMatch;
1180	}
1181
1182	case QuantifierNonGreedy: {
1183	// If we've not reached the limit, try to add one more match.
1184	if (backTrack->matchAmount < term.atom.quantityMaxCount) {
1185	ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
1186	JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
1187	if (result == JSRegExpMatch) {
1188	appendParenthesesDisjunctionContext(backTrack, context);
1189	recordParenthesesMatch(term, context);
1190	return JSRegExpMatch;
1191	}
1192
1193	resetMatches(term, context);
1194	freeParenthesesDisjunctionContext(context);
1195
1196	if (result != JSRegExpNoMatch)
1197	return result;
1198	}
1199
1200	// Nope - okay backtrack looking for an alternative.
1201	while (backTrack->matchAmount) {
1202	ParenthesesDisjunctionContext* context = backTrack->lastContext;
1203	JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
1204	if (result == JSRegExpMatch) {
1205	// successful backtrack! we're back in the game!
1206	if (backTrack->matchAmount) {
1207	context = backTrack->lastContext;
1208	recordParenthesesMatch(term, context);
1209	}
1210	return JSRegExpMatch;
1211	}
1212
1213	// pop a match off the stack
1214	resetMatches(term, context);
1215	popParenthesesDisjunctionContext(backTrack);
1216	freeParenthesesDisjunctionContext(context);
1217
1218	if (result != JSRegExpNoMatch)
1219	return result;
1220	}
1221
1222	return JSRegExpNoMatch;
1223	}
1224	}
1225
1226	RELEASE_ASSERT_NOT_REACHED();
1227	return JSRegExpErrorNoMatch;
1228	}
1229
1230	bool matchDotStarEnclosure(ByteTerm& term, DisjunctionContext* context)
1231	{
1232	UNUSED_PARAM(term);
1233
1234	if (pattern->dotAll()) {
1235	context->matchBegin = startOffset;
1236	context->matchEnd = input.end();
1237	return true;
1238	}
1239
1240	unsigned matchBegin = context->matchBegin;
1241
1242	if (matchBegin > startOffset) {
1243	for (matchBegin--; true; matchBegin--) {
1244	if (testCharacterClass(pattern->newlineCharacterClass, input.reread(matchBegin))) {
1245	++matchBegin;
1246	break;
1247	}
1248
1249	if (matchBegin == startOffset)
1250	break;
1251	}
1252	}
1253
1254	unsigned matchEnd = input.getPos();
1255
1256	for (; (matchEnd != input.end())
1257	&& (!testCharacterClass(pattern->newlineCharacterClass, input.reread(matchEnd))); matchEnd++) { }
1258
1259	if (((matchBegin && term.anchors.m_bol)
1260	\|\| ((matchEnd != input.end()) && term.anchors.m_eol))
1261	&& !pattern->multiline())
1262	return false;
1263
1264	context->matchBegin = matchBegin;
1265	context->matchEnd = matchEnd;
1266	return true;
1267	}
1268
1269	#define MATCH_NEXT() { ++context->term; goto matchAgain; }
1270	#define BACKTRACK() { --context->term; goto backtrack; }
1271	#define currentTerm() (disjunction->terms[context->term])
1272	JSRegExpResult matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
1273	{
1274	if (!--remainingMatchCount)
1275	return JSRegExpErrorHitLimit;
1276
1277	if (btrack)
1278	BACKTRACK();
1279
1280	context->matchBegin = input.getPos();
1281	context->term = `0`;
1282
1283	matchAgain:
1284	ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
1285
1286	switch (currentTerm().type) {
1287	case ByteTerm::TypeSubpatternBegin:
1288	MATCH_NEXT();
1289	case ByteTerm::TypeSubpatternEnd:
1290	context->matchEnd = input.getPos();
1291	return JSRegExpMatch;
1292
1293	case ByteTerm::TypeBodyAlternativeBegin:
1294	MATCH_NEXT();
1295	case ByteTerm::TypeBodyAlternativeDisjunction:
1296	case ByteTerm::TypeBodyAlternativeEnd:
1297	context->matchEnd = input.getPos();
1298	return JSRegExpMatch;
1299
1300	case ByteTerm::TypeAlternativeBegin:
1301	MATCH_NEXT();
1302	case ByteTerm::TypeAlternativeDisjunction:
1303	case ByteTerm::TypeAlternativeEnd: {
1304	int offset = currentTerm().alternative.end;
1305	BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
1306	backTrack->offset = offset;
1307	context->term += offset;
1308	MATCH_NEXT();
1309	}
1310
1311	case ByteTerm::TypeAssertionBOL:
1312	if (matchAssertionBOL(currentTerm()))
1313	MATCH_NEXT();
1314	BACKTRACK();
1315	case ByteTerm::TypeAssertionEOL:
1316	if (matchAssertionEOL(currentTerm()))
1317	MATCH_NEXT();
1318	BACKTRACK();
1319	case ByteTerm::TypeAssertionWordBoundary:
1320	if (matchAssertionWordBoundary(currentTerm()))
1321	MATCH_NEXT();
1322	BACKTRACK();
1323
1324	case ByteTerm::TypePatternCharacterOnce:
1325	case ByteTerm::TypePatternCharacterFixed: {
1326	if (unicode) {
1327	if (!U_IS_BMP(currentTerm().atom.patternCharacter)) {
1328	for (unsigned matchAmount = `0`; matchAmount < currentTerm().atom.quantityMaxCount; ++matchAmount) {
1329	if (!checkSurrogatePair(currentTerm().atom.patternCharacter, currentTerm().inputPosition - `2` * matchAmount)) {
1330	BACKTRACK();
1331	}
1332	}
1333	MATCH_NEXT();
1334	}
1335	}
1336	unsigned position = input.getPos(); // May need to back out reading a surrogate pair.
1337
1338	for (unsigned matchAmount = `0`; matchAmount < currentTerm().atom.quantityMaxCount; ++matchAmount) {
1339	if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition - matchAmount)) {
1340	input.setPos(position);
1341	BACKTRACK();
1342	}
1343	}
1344	MATCH_NEXT();
1345	}
1346	case ByteTerm::TypePatternCharacterGreedy: {
1347	BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1348	unsigned matchAmount = `0`;
1349	unsigned position = input.getPos(); // May need to back out reading a surrogate pair.
1350	while ((matchAmount < currentTerm().atom.quantityMaxCount) && input.checkInput(`1`)) {
1351	if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition + `1`)) {
1352	input.setPos(position);
1353	break;
1354	}
1355	++matchAmount;
1356	position = input.getPos();
1357	}
1358	backTrack->matchAmount = matchAmount;
1359
1360	MATCH_NEXT();
1361	}
1362	case ByteTerm::TypePatternCharacterNonGreedy: {
1363	BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1364	backTrack->begin = input.getPos();
1365	backTrack->matchAmount = `0`;
1366	MATCH_NEXT();
1367	}
1368
1369	case ByteTerm::TypePatternCasedCharacterOnce:
1370	case ByteTerm::TypePatternCasedCharacterFixed: {
1371	if (unicode) {
1372	// Case insensitive matching of unicode characters is handled as TypeCharacterClass.
1373	ASSERT(U_IS_BMP(currentTerm().atom.patternCharacter));
1374
1375	unsigned position = input.getPos(); // May need to back out reading a surrogate pair.
1376
1377	for (unsigned matchAmount = `0`; matchAmount < currentTerm().atom.quantityMaxCount; ++matchAmount) {
1378	if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - matchAmount)) {
1379	input.setPos(position);
1380	BACKTRACK();
1381	}
1382	}
1383	MATCH_NEXT();
1384	}
1385
1386	for (unsigned matchAmount = `0`; matchAmount < currentTerm().atom.quantityMaxCount; ++matchAmount) {
1387	if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - matchAmount))
1388	BACKTRACK();
1389	}
1390	MATCH_NEXT();
1391	}
1392	case ByteTerm::TypePatternCasedCharacterGreedy: {
1393	BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1394
1395	// Case insensitive matching of unicode characters is handled as TypeCharacterClass.
1396	ASSERT(!unicode \|\| U_IS_BMP(currentTerm().atom.patternCharacter));
1397
1398	unsigned matchAmount = `0`;
1399	while ((matchAmount < currentTerm().atom.quantityMaxCount) && input.checkInput(`1`)) {
1400	if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition + `1`)) {
1401	input.uncheckInput(`1`);
1402	break;
1403	}
1404	++matchAmount;
1405	}
1406	backTrack->matchAmount = matchAmount;
1407
1408	MATCH_NEXT();
1409	}
1410	case ByteTerm::TypePatternCasedCharacterNonGreedy: {
1411	BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1412
1413	// Case insensitive matching of unicode characters is handled as TypeCharacterClass.
1414	ASSERT(!unicode \|\| U_IS_BMP(currentTerm().atom.patternCharacter));
1415
1416	backTrack->matchAmount = `0`;
1417	MATCH_NEXT();
1418	}
1419
1420	case ByteTerm::TypeCharacterClass:
1421	if (matchCharacterClass(currentTerm(), context))
1422	MATCH_NEXT();
1423	BACKTRACK();
1424	case ByteTerm::TypeBackReference:
1425	if (matchBackReference(currentTerm(), context))
1426	MATCH_NEXT();
1427	BACKTRACK();
1428	case ByteTerm::TypeParenthesesSubpattern: {
1429	JSRegExpResult result = matchParentheses(currentTerm(), context);
1430
1431	if (result == JSRegExpMatch) {
1432	MATCH_NEXT();
1433	} else if (result != JSRegExpNoMatch)
1434	return result;
1435
1436	BACKTRACK();
1437	}
1438	case ByteTerm::TypeParenthesesSubpatternOnceBegin:
1439	if (matchParenthesesOnceBegin(currentTerm(), context))
1440	MATCH_NEXT();
1441	BACKTRACK();
1442	case ByteTerm::TypeParenthesesSubpatternOnceEnd:
1443	if (matchParenthesesOnceEnd(currentTerm(), context))
1444	MATCH_NEXT();
1445	BACKTRACK();
1446	case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
1447	if (matchParenthesesTerminalBegin(currentTerm(), context))
1448	MATCH_NEXT();
1449	BACKTRACK();
1450	case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
1451	if (matchParenthesesTerminalEnd(currentTerm(), context))
1452	MATCH_NEXT();
1453	BACKTRACK();
1454	case ByteTerm::TypeParentheticalAssertionBegin:
1455	if (matchParentheticalAssertionBegin(currentTerm(), context))
1456	MATCH_NEXT();
1457	BACKTRACK();
1458	case ByteTerm::TypeParentheticalAssertionEnd:
1459	if (matchParentheticalAssertionEnd(currentTerm(), context))
1460	MATCH_NEXT();
1461	BACKTRACK();
1462
1463	case ByteTerm::TypeCheckInput:
1464	if (input.checkInput(currentTerm().checkInputCount))
1465	MATCH_NEXT();
1466	BACKTRACK();
1467
1468	case ByteTerm::TypeUncheckInput:
1469	input.uncheckInput(currentTerm().checkInputCount);
1470	MATCH_NEXT();
1471
1472	case ByteTerm::TypeDotStarEnclosure:
1473	if (matchDotStarEnclosure(currentTerm(), context))
1474	return JSRegExpMatch;
1475	BACKTRACK();
1476	}
1477
1478	// We should never fall-through to here.
1479	RELEASE_ASSERT_NOT_REACHED();
1480
1481	backtrack:
1482	ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
1483
1484	switch (currentTerm().type) {
1485	case ByteTerm::TypeSubpatternBegin:
1486	return JSRegExpNoMatch;
1487	case ByteTerm::TypeSubpatternEnd:
1488	RELEASE_ASSERT_NOT_REACHED();
1489
1490	case ByteTerm::TypeBodyAlternativeBegin:
1491	case ByteTerm::TypeBodyAlternativeDisjunction: {
1492	int offset = currentTerm().alternative.next;
1493	context->term += offset;
1494	if (offset > `0`)
1495	MATCH_NEXT();
1496
1497	if (input.atEnd() \|\| pattern->sticky())
1498	return JSRegExpNoMatch;
1499
1500	input.next();
1501
1502	context->matchBegin = input.getPos();
1503
1504	if (currentTerm().alternative.onceThrough)
1505	context->term += currentTerm().alternative.next;
1506
1507	MATCH_NEXT();
1508	}
1509	case ByteTerm::TypeBodyAlternativeEnd:
1510	RELEASE_ASSERT_NOT_REACHED();
1511
1512	case ByteTerm::TypeAlternativeBegin:
1513	case ByteTerm::TypeAlternativeDisjunction: {
1514	int offset = currentTerm().alternative.next;
1515	context->term += offset;
1516	if (offset > `0`)
1517	MATCH_NEXT();
1518	BACKTRACK();
1519	}
1520	case ByteTerm::TypeAlternativeEnd: {
1521	// We should never backtrack back into an alternative of the main body of the regex.
1522	BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
1523	unsigned offset = backTrack->offset;
1524	context->term -= offset;
1525	BACKTRACK();
1526	}
1527
1528	case ByteTerm::TypeAssertionBOL:
1529	case ByteTerm::TypeAssertionEOL:
1530	case ByteTerm::TypeAssertionWordBoundary:
1531	BACKTRACK();
1532
1533	case ByteTerm::TypePatternCharacterOnce:
1534	case ByteTerm::TypePatternCharacterFixed:
1535	case ByteTerm::TypePatternCharacterGreedy:
1536	case ByteTerm::TypePatternCharacterNonGreedy:
1537	if (backtrackPatternCharacter(currentTerm(), context))
1538	MATCH_NEXT();
1539	BACKTRACK();
1540	case ByteTerm::TypePatternCasedCharacterOnce:
1541	case ByteTerm::TypePatternCasedCharacterFixed:
1542	case ByteTerm::TypePatternCasedCharacterGreedy:
1543	case ByteTerm::TypePatternCasedCharacterNonGreedy:
1544	if (backtrackPatternCasedCharacter(currentTerm(), context))
1545	MATCH_NEXT();
1546	BACKTRACK();
1547	case ByteTerm::TypeCharacterClass:
1548	if (backtrackCharacterClass(currentTerm(), context))
1549	MATCH_NEXT();
1550	BACKTRACK();
1551	case ByteTerm::TypeBackReference:
1552	if (backtrackBackReference(currentTerm(), context))
1553	MATCH_NEXT();
1554	BACKTRACK();
1555	case ByteTerm::TypeParenthesesSubpattern: {
1556	JSRegExpResult result = backtrackParentheses(currentTerm(), context);
1557
1558	if (result == JSRegExpMatch) {
1559	MATCH_NEXT();
1560	} else if (result != JSRegExpNoMatch)
1561	return result;
1562
1563	BACKTRACK();
1564	}
1565	case ByteTerm::TypeParenthesesSubpatternOnceBegin:
1566	if (backtrackParenthesesOnceBegin(currentTerm(), context))
1567	MATCH_NEXT();
1568	BACKTRACK();
1569	case ByteTerm::TypeParenthesesSubpatternOnceEnd:
1570	if (backtrackParenthesesOnceEnd(currentTerm(), context))
1571	MATCH_NEXT();
1572	BACKTRACK();
1573	case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
1574	if (backtrackParenthesesTerminalBegin(currentTerm(), context))
1575	MATCH_NEXT();
1576	BACKTRACK();
1577	case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
1578	if (backtrackParenthesesTerminalEnd(currentTerm(), context))
1579	MATCH_NEXT();
1580	BACKTRACK();
1581	case ByteTerm::TypeParentheticalAssertionBegin:
1582	if (backtrackParentheticalAssertionBegin(currentTerm(), context))
1583	MATCH_NEXT();
1584	BACKTRACK();
1585	case ByteTerm::TypeParentheticalAssertionEnd:
1586	if (backtrackParentheticalAssertionEnd(currentTerm(), context))
1587	MATCH_NEXT();
1588	BACKTRACK();
1589
1590	case ByteTerm::TypeCheckInput:
1591	input.uncheckInput(currentTerm().checkInputCount);
1592	BACKTRACK();
1593
1594	case ByteTerm::TypeUncheckInput:
1595	input.checkInput(currentTerm().checkInputCount);
1596	BACKTRACK();
1597
1598	case ByteTerm::TypeDotStarEnclosure:
1599	RELEASE_ASSERT_NOT_REACHED();
1600	}
1601
1602	RELEASE_ASSERT_NOT_REACHED();
1603	return JSRegExpErrorNoMatch;
1604	}
1605
1606	JSRegExpResult matchNonZeroDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
1607	{
1608	JSRegExpResult result = matchDisjunction(disjunction, context, btrack);
1609
1610	if (result == JSRegExpMatch) {
1611	while (context->matchBegin == context->matchEnd) {
1612	result = matchDisjunction(disjunction, context, true);
1613	if (result != JSRegExpMatch)
1614	return result;
1615	}
1616	return JSRegExpMatch;
1617	}
1618
1619	return result;
1620	}
1621
1622	unsigned interpret()
1623	{
1624	// FIXME: https://bugs.webkit.org/show_bug.cgi?id=195970
1625	// [Yarr Interpreter] The interpreter doesn't have checks for stack overflow due to deep recursion
1626	if (!input.isAvailableInput(`0`))
1627	return offsetNoMatch;
1628
1629	if (pattern->m_lock)
1630	pattern->m_lock->lock();
1631
1632	for (unsigned i = `0`; i < pattern->m_body ->m_numSubpatterns + `1`; ++i)
1633	output[i << `1`] = offsetNoMatch;
1634
1635	allocatorPool = pattern->m_allocator->startAllocator();
1636	RELEASE_ASSERT(allocatorPool);
1637
1638	DisjunctionContext* context = allocDisjunctionContext(pattern->m_body.get());
1639
1640	JSRegExpResult result = matchDisjunction(pattern->m_body.get(), context, false);
1641	if (result == JSRegExpMatch) {
1642	output[`0`] = context->matchBegin;
1643	output[`1`] = context->matchEnd;
1644	}
1645
1646	freeDisjunctionContext(context);
1647
1648	pattern->m_allocator->stopAllocator();
1649
1650	ASSERT((result == JSRegExpMatch) == (output[`0`] != offsetNoMatch));
1651
1652	if (pattern->m_lock)
1653	pattern->m_lock->unlock();
1654
1655	return output[`0`];
1656	}
1657
1658	Interpreter(BytecodePattern* pattern, unsigned* output, const CharType* input, unsigned length, unsigned start)
1659	: pattern(pattern)
1660	, unicode(pattern->unicode())
1661	, output(output)
1662	, input(input, start, length, pattern->unicode())
1663	, startOffset(start)
1664	, remainingMatchCount(matchLimit)
1665	{
1666	}
1667
1668	private:
1669	BytecodePattern* pattern;
1670	bool unicode;
1671	unsigned* output;
1672	InputStream input;
1673	WTF::BumpPointerPool* allocatorPool { nullptr };
1674	unsigned startOffset;
1675	unsigned remainingMatchCount;
1676	};
1677
1678	class ByteCompiler {
1679	struct ParenthesesStackEntry {
1680	unsigned beginTerm;
1681	unsigned savedAlternativeIndex;
1682	ParenthesesStackEntry(unsigned beginTerm, unsigned savedAlternativeIndex/, unsigned subpatternId, bool capture = false/)
1683	: beginTerm(beginTerm)
1684	, savedAlternativeIndex(savedAlternativeIndex)
1685	{
1686	}
1687	};
1688
1689	public:
1690	ByteCompiler(YarrPattern& pattern)
1691	: m_pattern(pattern)
1692	{
1693	}
1694
1695	std::unique_ptr<BytecodePattern> compile(BumpPointerAllocator* allocator, ConcurrentJSLock* lock, ErrorCode& errorCode)
1696	{
1697	regexBegin(m_pattern.m_numSubpatterns, m_pattern.m_body->m_callFrameSize, m_pattern.m_body->m_alternatives [`0`]->onceThrough());
1698	if (auto error = emitDisjunction(m_pattern.m_body, `0`, `0`)) {
1699	errorCode = error.value();
1700	return nullptr;
1701	}
1702	regexEnd();
1703
1704	#ifndef NDEBUG
1705	if (Options::dumpCompiledRegExpPatterns())
1706	dumpDisjunction(m_bodyDisjunction.get());
1707	#endif
1708
1709	return std::make_unique<BytecodePattern>(WTFMove(m_bodyDisjunction), m_allParenthesesInfo, m_pattern, allocator, lock);
1710	}
1711
1712	void checkInput(unsigned count)
1713	{
1714	m_bodyDisjunction ->terms.append(ByteTerm::CheckInput(count));
1715	}
1716
1717	void uncheckInput(unsigned count)
1718	{
1719	m_bodyDisjunction ->terms.append(ByteTerm::UncheckInput(count));
1720	}
1721
1722	void assertionBOL(unsigned inputPosition)
1723	{
1724	m_bodyDisjunction ->terms.append(ByteTerm::BOL(inputPosition));
1725	}
1726
1727	void assertionEOL(unsigned inputPosition)
1728	{
1729	m_bodyDisjunction ->terms.append(ByteTerm::EOL(inputPosition));
1730	}
1731
1732	void assertionWordBoundary(bool invert, unsigned inputPosition)
1733	{
1734	m_bodyDisjunction ->terms.append(ByteTerm::WordBoundary(invert, inputPosition));
1735	}
1736
1737	void atomPatternCharacter(UChar32 ch, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMaxCount, QuantifierType quantityType)
1738	{
1739	if (m_pattern.ignoreCase()) {
1740	UChar32 lo = u_tolower(ch);
1741	UChar32 hi = u_toupper(ch);
1742
1743	if (lo != hi) {
1744	m_bodyDisjunction ->terms.append(ByteTerm (lo, hi, inputPosition, frameLocation, quantityMaxCount, quantityType));
1745	return;
1746	}
1747	}
1748
1749	m_bodyDisjunction ->terms.append(ByteTerm (ch, inputPosition, frameLocation, quantityMaxCount, quantityType));
1750	}
1751
1752	void atomCharacterClass(CharacterClass* characterClass, bool invert, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMaxCount, QuantifierType quantityType)
1753	{
1754	m_bodyDisjunction ->terms.append(ByteTerm (characterClass, invert, inputPosition));
1755
1756	m_bodyDisjunction ->terms.last().atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1757	m_bodyDisjunction ->terms.last().atom.quantityType = quantityType;
1758	m_bodyDisjunction ->terms.last().frameLocation = frameLocation;
1759	}
1760
1761	void atomBackReference(unsigned subpatternId, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMaxCount, QuantifierType quantityType)
1762	{
1763	ASSERT(subpatternId);
1764
1765	m_bodyDisjunction ->terms.append(ByteTerm::BackReference(subpatternId, inputPosition));
1766
1767	m_bodyDisjunction ->terms.last().atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1768	m_bodyDisjunction ->terms.last().atom.quantityType = quantityType;
1769	m_bodyDisjunction ->terms.last().frameLocation = frameLocation;
1770	}
1771
1772	void atomParenthesesOnceBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1773	{
1774	unsigned beginTerm = m_bodyDisjunction ->terms.size();
1775
1776	m_bodyDisjunction ->terms.append(ByteTerm (ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
1777	m_bodyDisjunction ->terms.last().frameLocation = frameLocation;
1778	m_bodyDisjunction ->terms.append(ByteTerm::AlternativeBegin());
1779	m_bodyDisjunction ->terms.last().frameLocation = alternativeFrameLocation;
1780
1781	m_parenthesesStack.append(ParenthesesStackEntry (beginTerm, m_currentAlternativeIndex));
1782	m_currentAlternativeIndex = beginTerm + `1`;
1783	}
1784
1785	void atomParenthesesTerminalBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1786	{
1787	unsigned beginTerm = m_bodyDisjunction ->terms.size();
1788
1789	m_bodyDisjunction ->terms.append(ByteTerm (ByteTerm::TypeParenthesesSubpatternTerminalBegin, subpatternId, capture, false, inputPosition));
1790	m_bodyDisjunction ->terms.last().frameLocation = frameLocation;
1791	m_bodyDisjunction ->terms.append(ByteTerm::AlternativeBegin());
1792	m_bodyDisjunction ->terms.last().frameLocation = alternativeFrameLocation;
1793
1794	m_parenthesesStack.append(ParenthesesStackEntry (beginTerm, m_currentAlternativeIndex));
1795	m_currentAlternativeIndex = beginTerm + `1`;
1796	}
1797
1798	void atomParenthesesSubpatternBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1799	{
1800	// Errrk! - this is a little crazy, we initially generate as a TypeParenthesesSubpatternOnceBegin,
1801	// then fix this up at the end! - simplifying this should make it much clearer.
1802	// https://bugs.webkit.org/show_bug.cgi?id=50136
1803
1804	unsigned beginTerm = m_bodyDisjunction ->terms.size();
1805
1806	m_bodyDisjunction ->terms.append(ByteTerm (ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
1807	m_bodyDisjunction ->terms.last().frameLocation = frameLocation;
1808	m_bodyDisjunction ->terms.append(ByteTerm::AlternativeBegin());
1809	m_bodyDisjunction ->terms.last().frameLocation = alternativeFrameLocation;
1810
1811	m_parenthesesStack.append(ParenthesesStackEntry (beginTerm, m_currentAlternativeIndex));
1812	m_currentAlternativeIndex = beginTerm + `1`;
1813	}
1814
1815	void atomParentheticalAssertionBegin(unsigned subpatternId, bool invert, unsigned frameLocation, unsigned alternativeFrameLocation)
1816	{
1817	unsigned beginTerm = m_bodyDisjunction ->terms.size();
1818
1819	m_bodyDisjunction ->terms.append(ByteTerm (ByteTerm::TypeParentheticalAssertionBegin, subpatternId, false, invert, `0`));
1820	m_bodyDisjunction ->terms.last().frameLocation = frameLocation;
1821	m_bodyDisjunction ->terms.append(ByteTerm::AlternativeBegin());
1822	m_bodyDisjunction ->terms.last().frameLocation = alternativeFrameLocation;
1823
1824	m_parenthesesStack.append(ParenthesesStackEntry (beginTerm, m_currentAlternativeIndex));
1825	m_currentAlternativeIndex = beginTerm + `1`;
1826	}
1827
1828	void atomParentheticalAssertionEnd(unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMaxCount, QuantifierType quantityType)
1829	{
1830	unsigned beginTerm = popParenthesesStack();
1831	closeAlternative(beginTerm + `1`);
1832	unsigned endTerm = m_bodyDisjunction ->terms.size();
1833
1834	ASSERT(m_bodyDisjunction ->terms[beginTerm].type == ByteTerm::TypeParentheticalAssertionBegin);
1835
1836	bool invert = m_bodyDisjunction ->terms [beginTerm].invert();
1837	unsigned subpatternId = m_bodyDisjunction ->terms [beginTerm].atom.subpatternId;
1838
1839	m_bodyDisjunction ->terms.append(ByteTerm (ByteTerm::TypeParentheticalAssertionEnd, subpatternId, false, invert, inputPosition));
1840	m_bodyDisjunction ->terms [beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1841	m_bodyDisjunction ->terms [endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1842	m_bodyDisjunction ->terms [endTerm].frameLocation = frameLocation;
1843
1844	m_bodyDisjunction ->terms [beginTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1845	m_bodyDisjunction ->terms [beginTerm].atom.quantityType = quantityType;
1846	m_bodyDisjunction ->terms [endTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1847	m_bodyDisjunction ->terms [endTerm].atom.quantityType = quantityType;
1848	}
1849
1850	void assertionDotStarEnclosure(bool bolAnchored, bool eolAnchored)
1851	{
1852	m_bodyDisjunction ->terms.append(ByteTerm::DotStarEnclosure(bolAnchored, eolAnchored));
1853	}
1854
1855	unsigned popParenthesesStack()
1856	{
1857	ASSERT(m_parenthesesStack.size());
1858	unsigned beginTerm = m_parenthesesStack.last().beginTerm;
1859	m_currentAlternativeIndex = m_parenthesesStack.last().savedAlternativeIndex;
1860	m_parenthesesStack.removeLast();
1861
1862	ASSERT(beginTerm < m_bodyDisjunction ->terms.size());
1863	ASSERT(m_currentAlternativeIndex < m_bodyDisjunction ->terms.size());
1864
1865	return beginTerm;
1866	}
1867
1868	void closeAlternative(unsigned beginTerm)
1869	{
1870	unsigned origBeginTerm = beginTerm;
1871	ASSERT(m_bodyDisjunction ->terms[beginTerm].type == ByteTerm::TypeAlternativeBegin);
1872	unsigned endIndex = m_bodyDisjunction ->terms.size();
1873
1874	unsigned frameLocation = m_bodyDisjunction ->terms [beginTerm].frameLocation;
1875
1876	if (!m_bodyDisjunction ->terms [beginTerm].alternative.next)
1877	m_bodyDisjunction ->terms.remove(beginTerm);
1878	else {
1879	while (m_bodyDisjunction ->terms [beginTerm].alternative.next) {
1880	beginTerm += m_bodyDisjunction ->terms [beginTerm].alternative.next;
1881	ASSERT(m_bodyDisjunction ->terms[beginTerm].type == ByteTerm::TypeAlternativeDisjunction);
1882	m_bodyDisjunction ->terms [beginTerm].alternative.end = endIndex - beginTerm;
1883	m_bodyDisjunction ->terms [beginTerm].frameLocation = frameLocation;
1884	}
1885
1886	m_bodyDisjunction ->terms [beginTerm].alternative.next = origBeginTerm - beginTerm;
1887
1888	m_bodyDisjunction ->terms.append(ByteTerm::AlternativeEnd());
1889	m_bodyDisjunction ->terms [endIndex].frameLocation = frameLocation;
1890	}
1891	}
1892
1893	void closeBodyAlternative()
1894	{
1895	unsigned beginTerm = `0`;
1896	unsigned origBeginTerm = `0`;
1897	ASSERT(m_bodyDisjunction ->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeBegin);
1898	unsigned endIndex = m_bodyDisjunction ->terms.size();
1899
1900	unsigned frameLocation = m_bodyDisjunction ->terms [beginTerm].frameLocation;
1901
1902	while (m_bodyDisjunction ->terms [beginTerm].alternative.next) {
1903	beginTerm += m_bodyDisjunction ->terms [beginTerm].alternative.next;
1904	ASSERT(m_bodyDisjunction ->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeDisjunction);
1905	m_bodyDisjunction ->terms [beginTerm].alternative.end = endIndex - beginTerm;
1906	m_bodyDisjunction ->terms [beginTerm].frameLocation = frameLocation;
1907	}
1908
1909	m_bodyDisjunction ->terms [beginTerm].alternative.next = origBeginTerm - beginTerm;
1910
1911	m_bodyDisjunction ->terms.append(ByteTerm::BodyAlternativeEnd());
1912	m_bodyDisjunction ->terms [endIndex].frameLocation = frameLocation;
1913	}
1914
1915	void atomParenthesesSubpatternEnd(unsigned lastSubpatternId, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMinCount, Checked<unsigned> quantityMaxCount, QuantifierType quantityType, unsigned callFrameSize = `0`)
1916	{
1917	unsigned beginTerm = popParenthesesStack();
1918	closeAlternative(beginTerm + `1`);
1919	unsigned endTerm = m_bodyDisjunction ->terms.size();
1920
1921	ASSERT(m_bodyDisjunction ->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
1922
1923	ByteTerm& parenthesesBegin = m_bodyDisjunction ->terms [beginTerm];
1924
1925	bool capture = parenthesesBegin.capture();
1926	unsigned subpatternId = parenthesesBegin.atom.subpatternId;
1927
1928	unsigned numSubpatterns = lastSubpatternId - subpatternId + `1`;
1929	auto parenthesesDisjunction = std::make_unique<ByteDisjunction>(numSubpatterns, callFrameSize);
1930
1931	unsigned firstTermInParentheses = beginTerm + `1`;
1932	parenthesesDisjunction ->terms.reserveInitialCapacity(endTerm - firstTermInParentheses + `2`);
1933
1934	parenthesesDisjunction ->terms.append(ByteTerm::SubpatternBegin());
1935	for (unsigned termInParentheses = firstTermInParentheses; termInParentheses < endTerm; ++termInParentheses)
1936	parenthesesDisjunction ->terms.append(m_bodyDisjunction ->terms [termInParentheses]);
1937	parenthesesDisjunction ->terms.append(ByteTerm::SubpatternEnd());
1938
1939	m_bodyDisjunction ->terms.shrink(beginTerm);
1940
1941	m_bodyDisjunction ->terms.append(ByteTerm (ByteTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction.get(), capture, inputPosition));
1942	m_allParenthesesInfo.append(WTFMove(parenthesesDisjunction));
1943
1944	m_bodyDisjunction ->terms [beginTerm].atom.quantityMinCount = quantityMinCount.unsafeGet();
1945	m_bodyDisjunction ->terms [beginTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1946	m_bodyDisjunction ->terms [beginTerm].atom.quantityType = quantityType;
1947	m_bodyDisjunction ->terms [beginTerm].frameLocation = frameLocation;
1948	}
1949
1950	void atomParenthesesOnceEnd(unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMinCount, Checked<unsigned> quantityMaxCount, QuantifierType quantityType)
1951	{
1952	unsigned beginTerm = popParenthesesStack();
1953	closeAlternative(beginTerm + `1`);
1954	unsigned endTerm = m_bodyDisjunction ->terms.size();
1955
1956	ASSERT(m_bodyDisjunction ->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
1957
1958	bool capture = m_bodyDisjunction ->terms [beginTerm].capture();
1959	unsigned subpatternId = m_bodyDisjunction ->terms [beginTerm].atom.subpatternId;
1960
1961	m_bodyDisjunction ->terms.append(ByteTerm (ByteTerm::TypeParenthesesSubpatternOnceEnd, subpatternId, capture, false, inputPosition));
1962	m_bodyDisjunction ->terms [beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1963	m_bodyDisjunction ->terms [endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1964	m_bodyDisjunction ->terms [endTerm].frameLocation = frameLocation;
1965
1966	m_bodyDisjunction ->terms [beginTerm].atom.quantityMinCount = quantityMinCount.unsafeGet();
1967	m_bodyDisjunction ->terms [beginTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1968	m_bodyDisjunction ->terms [beginTerm].atom.quantityType = quantityType;
1969	m_bodyDisjunction ->terms [endTerm].atom.quantityMinCount = quantityMinCount.unsafeGet();
1970	m_bodyDisjunction ->terms [endTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1971	m_bodyDisjunction ->terms [endTerm].atom.quantityType = quantityType;
1972	}
1973
1974	void atomParenthesesTerminalEnd(unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMinCount, Checked<unsigned> quantityMaxCount, QuantifierType quantityType)
1975	{
1976	unsigned beginTerm = popParenthesesStack();
1977	closeAlternative(beginTerm + `1`);
1978	unsigned endTerm = m_bodyDisjunction ->terms.size();
1979
1980	ASSERT(m_bodyDisjunction ->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
1981
1982	bool capture = m_bodyDisjunction ->terms [beginTerm].capture();
1983	unsigned subpatternId = m_bodyDisjunction ->terms [beginTerm].atom.subpatternId;
1984
1985	m_bodyDisjunction ->terms.append(ByteTerm (ByteTerm::TypeParenthesesSubpatternTerminalEnd, subpatternId, capture, false, inputPosition));
1986	m_bodyDisjunction ->terms [beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1987	m_bodyDisjunction ->terms [endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1988	m_bodyDisjunction ->terms [endTerm].frameLocation = frameLocation;
1989
1990	m_bodyDisjunction ->terms [beginTerm].atom.quantityMinCount = quantityMinCount.unsafeGet();
1991	m_bodyDisjunction ->terms [beginTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1992	m_bodyDisjunction ->terms [beginTerm].atom.quantityType = quantityType;
1993	m_bodyDisjunction ->terms [endTerm].atom.quantityMinCount = quantityMinCount.unsafeGet();
1994	m_bodyDisjunction ->terms [endTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1995	m_bodyDisjunction ->terms [endTerm].atom.quantityType = quantityType;
1996	}
1997
1998	void regexBegin(unsigned numSubpatterns, unsigned callFrameSize, bool onceThrough)
1999	{
2000	m_bodyDisjunction = std::make_unique<ByteDisjunction>(numSubpatterns, callFrameSize);
2001	m_bodyDisjunction ->terms.append(ByteTerm::BodyAlternativeBegin(onceThrough));
2002	m_bodyDisjunction ->terms [`0`].frameLocation = `0`;
2003	m_currentAlternativeIndex = `0`;
2004	}
2005
2006	void regexEnd()
2007	{
2008	closeBodyAlternative();
2009	}
2010
2011	void alternativeBodyDisjunction(bool onceThrough)
2012	{
2013	unsigned newAlternativeIndex = m_bodyDisjunction ->terms.size();
2014	m_bodyDisjunction ->terms [m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
2015	m_bodyDisjunction ->terms.append(ByteTerm::BodyAlternativeDisjunction(onceThrough));
2016
2017	m_currentAlternativeIndex = newAlternativeIndex;
2018	}
2019
2020	void alternativeDisjunction()
2021	{
2022	unsigned newAlternativeIndex = m_bodyDisjunction ->terms.size();
2023	m_bodyDisjunction ->terms [m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
2024	m_bodyDisjunction ->terms.append(ByteTerm::AlternativeDisjunction());
2025
2026	m_currentAlternativeIndex = newAlternativeIndex;
2027	}
2028
2029	Optional<ErrorCode> WARN_UNUSED_RETURN emitDisjunction(PatternDisjunction* disjunction, Checked<unsigned, RecordOverflow> inputCountAlreadyChecked, unsigned parenthesesInputCountAlreadyChecked)
2030	{
2031	for (unsigned alt = `0`; alt < disjunction->m_alternatives.size(); ++alt) {
2032	auto currentCountAlreadyChecked = inputCountAlreadyChecked;
2033
2034	PatternAlternative* alternative = disjunction->m_alternatives [alt].get();
2035
2036	if (alt) {
2037	if (disjunction == m_pattern.m_body)
2038	alternativeBodyDisjunction(alternative->onceThrough());
2039	else
2040	alternativeDisjunction();
2041	}
2042
2043	unsigned minimumSize = alternative->m_minimumSize;
2044	ASSERT(minimumSize >= parenthesesInputCountAlreadyChecked);
2045	unsigned countToCheck = minimumSize - parenthesesInputCountAlreadyChecked;
2046
2047	if (countToCheck) {
2048	checkInput(countToCheck);
2049	currentCountAlreadyChecked += countToCheck;
2050	if (currentCountAlreadyChecked.hasOverflowed())
2051	return ErrorCode::OffsetTooLarge;
2052	}
2053
2054	for (auto& term : alternative->m_terms) {
2055	switch (term.type) {
2056	case PatternTerm::TypeAssertionBOL:
2057	assertionBOL((currentCountAlreadyChecked - term.inputPosition).unsafeGet());
2058	break;
2059
2060	case PatternTerm::TypeAssertionEOL:
2061	assertionEOL((currentCountAlreadyChecked - term.inputPosition).unsafeGet());
2062	break;
2063
2064	case PatternTerm::TypeAssertionWordBoundary:
2065	assertionWordBoundary(term.invert(), (currentCountAlreadyChecked - term.inputPosition).unsafeGet());
2066	break;
2067
2068	case PatternTerm::TypePatternCharacter:
2069	atomPatternCharacter(term.patternCharacter, (currentCountAlreadyChecked - term.inputPosition).unsafeGet(), term.frameLocation, term.quantityMaxCount, term.quantityType);
2070	break;
2071
2072	case PatternTerm::TypeCharacterClass:
2073	atomCharacterClass(term.characterClass, term.invert(), (currentCountAlreadyChecked - term.inputPosition).unsafeGet(), term.frameLocation, term.quantityMaxCount, term.quantityType);
2074	break;
2075
2076	case PatternTerm::TypeBackReference:
2077	atomBackReference(term.backReferenceSubpatternId, (currentCountAlreadyChecked - term.inputPosition).unsafeGet(), term.frameLocation, term.quantityMaxCount, term.quantityType);
2078	break;
2079
2080	case PatternTerm::TypeForwardReference:
2081	break;
2082
2083	case PatternTerm::TypeParenthesesSubpattern: {
2084	unsigned disjunctionAlreadyCheckedCount = `0`;
2085	if (term.quantityMaxCount == `1` && !term.parentheses.isCopy) {
2086	unsigned alternativeFrameLocation = term.frameLocation;
2087	// For QuantifierFixedCount we pre-check the minimum size; for greedy/non-greedy we reserve a slot in the frame.
2088	if (term.quantityType == QuantifierFixedCount)
2089	disjunctionAlreadyCheckedCount = term.parentheses.disjunction->m_minimumSize;
2090	else
2091	alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
2092	unsigned delegateEndInputOffset = (currentCountAlreadyChecked - term.inputPosition).unsafeGet();
2093	atomParenthesesOnceBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount + delegateEndInputOffset, term.frameLocation, alternativeFrameLocation);
2094	if (auto error = emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount))
2095	return error;
2096	atomParenthesesOnceEnd(delegateEndInputOffset, term.frameLocation, term.quantityMinCount, term.quantityMaxCount, term.quantityType);
2097	} else if (term.parentheses.isTerminal) {
2098	unsigned delegateEndInputOffset = (currentCountAlreadyChecked - term.inputPosition).unsafeGet();
2099	atomParenthesesTerminalBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount + delegateEndInputOffset, term.frameLocation, term.frameLocation + YarrStackSpaceForBackTrackInfoParenthesesTerminal);
2100	if (auto error = emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount))
2101	return error;
2102	atomParenthesesTerminalEnd(delegateEndInputOffset, term.frameLocation, term.quantityMinCount, term.quantityMaxCount, term.quantityType);
2103	} else {
2104	unsigned delegateEndInputOffset = (currentCountAlreadyChecked - term.inputPosition).unsafeGet();
2105	atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount + delegateEndInputOffset, term.frameLocation, `0`);
2106	if (auto error = emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, `0`))
2107	return error;
2108	atomParenthesesSubpatternEnd(term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityMinCount, term.quantityMaxCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
2109	}
2110	break;
2111	}
2112
2113	case PatternTerm::TypeParentheticalAssertion: {
2114	unsigned alternativeFrameLocation = term.frameLocation + YarrStackSpaceForBackTrackInfoParentheticalAssertion;
2115	unsigned positiveInputOffset = (currentCountAlreadyChecked - term.inputPosition).unsafeGet();
2116	unsigned uncheckAmount = `0`;
2117	if (positiveInputOffset > term.parentheses.disjunction->m_minimumSize) {
2118	uncheckAmount = positiveInputOffset - term.parentheses.disjunction->m_minimumSize;
2119	uncheckInput(uncheckAmount);
2120	currentCountAlreadyChecked -= uncheckAmount;
2121	if (currentCountAlreadyChecked.hasOverflowed())
2122	return ErrorCode::OffsetTooLarge;
2123	}
2124
2125	atomParentheticalAssertionBegin(term.parentheses.subpatternId, term.invert(), term.frameLocation, alternativeFrameLocation);
2126	if (auto error = emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, positiveInputOffset - uncheckAmount))
2127	return error;
2128	atomParentheticalAssertionEnd(`0`, term.frameLocation, term.quantityMaxCount, term.quantityType);
2129	if (uncheckAmount) {
2130	checkInput(uncheckAmount);
2131	currentCountAlreadyChecked += uncheckAmount;
2132	if (currentCountAlreadyChecked.hasOverflowed())
2133	return ErrorCode::OffsetTooLarge;
2134	}
2135	break;
2136	}
2137
2138	case PatternTerm::TypeDotStarEnclosure:
2139	assertionDotStarEnclosure(term.anchors.bolAnchor, term.anchors.eolAnchor);
2140	break;
2141	}
2142	}
2143	}
2144	return WTF::nullopt;
2145	}
2146	#ifndef NDEBUG
2147	void dumpDisjunction(ByteDisjunction* disjunction, unsigned nesting = `0`)
2148	{
2149	PrintStream& out = WTF::dataFile();
2150
2151	unsigned termIndexNest = `0`;
2152
2153	if (!nesting) {
2154	out.printf("ByteDisjunction(%p):\n", disjunction);
2155	nesting = `1`;
2156	} else {
2157	termIndexNest = nesting - `1`;
2158	nesting = `2`;
2159	}
2160
2161	auto outputTermIndexAndNest = [&](size_t index, unsigned termNesting) {
2162	for (unsigned nestingDepth = `0`; nestingDepth < termIndexNest; nestingDepth++)
2163	out.print(" ");
2164	out.printf("%4zu", index);
2165	for (unsigned nestingDepth = `0`; nestingDepth < termNesting; nestingDepth++)
2166	out.print(" ");
2167	};
2168
2169	auto dumpQuantity = [&](ByteTerm& term) {
2170	if (term.atom.quantityType == QuantifierFixedCount && term.atom.quantityMinCount == `1` && term.atom.quantityMaxCount == `1`)
2171	return;
2172
2173	out.print(" {", term.atom.quantityMinCount);
2174	if (term.atom.quantityMinCount != term.atom.quantityMaxCount) {
2175	if (term.atom.quantityMaxCount == UINT_MAX)
2176	out.print(",inf");
2177	else
2178	out.print(",", term.atom.quantityMaxCount);
2179	}
2180	out.print("}");
2181	if (term.atom.quantityType == QuantifierGreedy)
2182	out.print(" greedy");
2183	else if (term.atom.quantityType == QuantifierNonGreedy)
2184	out.print(" non-greedy");
2185	};
2186
2187	auto dumpCaptured = [&](ByteTerm& term) {
2188	if (term.capture())
2189	out.print(" captured (#", term.atom.subpatternId, ")");
2190	};
2191
2192	auto dumpInverted = [&](ByteTerm& term) {
2193	if (term.invert())
2194	out.print(" inverted");
2195	};
2196
2197	auto dumpInputPosition = [&](ByteTerm& term) {
2198	out.printf(" inputPosition %u", term.inputPosition);
2199	};
2200
2201	auto dumpFrameLocation = [&](ByteTerm& term) {
2202	out.printf(" frameLocation %u", term.frameLocation);
2203	};
2204
2205	auto dumpCharacter = [&](ByteTerm& term) {
2206	out.print(" ");
2207	dumpUChar32(out, term.atom.patternCharacter);
2208	};
2209
2210	auto dumpCharClass = [&](ByteTerm& term) {
2211	out.print(" ");
2212	dumpCharacterClass(out, &m_pattern, term.atom.characterClass);
2213	};
2214
2215	for (size_t idx = `0`; idx < disjunction->terms.size(); ++idx) {
2216	ByteTerm term = disjunction->terms [idx];
2217
2218	bool outputNewline = true;
2219
2220	switch (term.type) {
2221	case ByteTerm::TypeBodyAlternativeBegin:
2222	outputTermIndexAndNest(idx, nesting++);
2223	out.print("BodyAlternativeBegin");
2224	if (term.alternative.onceThrough)
2225	out.print(" onceThrough");
2226	dumpFrameLocation(term);
2227	break;
2228	case ByteTerm::TypeBodyAlternativeDisjunction:
2229	outputTermIndexAndNest(idx, nesting - `1`);
2230	out.print("BodyAlternativeDisjunction");
2231	dumpFrameLocation(term);
2232	break;
2233	case ByteTerm::TypeBodyAlternativeEnd:
2234	outputTermIndexAndNest(idx, --nesting);
2235	out.print("BodyAlternativeEnd");
2236	dumpFrameLocation(term);
2237	break;
2238	case ByteTerm::TypeAlternativeBegin:
2239	outputTermIndexAndNest(idx, nesting++);
2240	out.print("AlternativeBegin");
2241	dumpFrameLocation(term);
2242	break;
2243	case ByteTerm::TypeAlternativeDisjunction:
2244	outputTermIndexAndNest(idx, nesting - `1`);
2245	out.print("AlternativeDisjunction");
2246	dumpFrameLocation(term);
2247	break;
2248	case ByteTerm::TypeAlternativeEnd:
2249	outputTermIndexAndNest(idx, --nesting);
2250	out.print("AlternativeEnd");
2251	dumpFrameLocation(term);
2252	break;
2253	case ByteTerm::TypeSubpatternBegin:
2254	outputTermIndexAndNest(idx, nesting++);
2255	out.print("SubpatternBegin");
2256	break;
2257	case ByteTerm::TypeSubpatternEnd:
2258	outputTermIndexAndNest(idx, --nesting);
2259	out.print("SubpatternEnd");
2260	break;
2261	case ByteTerm::TypeAssertionBOL:
2262	outputTermIndexAndNest(idx, nesting);
2263	out.print("AssertionBOL");
2264	break;
2265	case ByteTerm::TypeAssertionEOL:
2266	outputTermIndexAndNest(idx, nesting);
2267	out.print("AssertionEOL");
2268	break;
2269	case ByteTerm::TypeAssertionWordBoundary:
2270	outputTermIndexAndNest(idx, nesting);
2271	out.print("AssertionWordBoundary");
2272	break;
2273	case ByteTerm::TypePatternCharacterOnce:
2274	outputTermIndexAndNest(idx, nesting);
2275	out.print("PatternCharacterOnce");
2276	dumpInverted(term);
2277	dumpInputPosition(term);
2278	dumpFrameLocation(term);
2279	dumpCharacter(term);
2280	dumpQuantity(term);
2281	break;
2282	case ByteTerm::TypePatternCharacterFixed:
2283	outputTermIndexAndNest(idx, nesting);
2284	out.print("PatternCharacterFixed");
2285	dumpInverted(term);
2286	dumpInputPosition(term);
2287	dumpFrameLocation(term);
2288	dumpCharacter(term);
2289	out.print(" {", term.atom.quantityMinCount, "}");
2290	break;
2291	case ByteTerm::TypePatternCharacterGreedy:
2292	outputTermIndexAndNest(idx, nesting);
2293	out.print("PatternCharacterGreedy");
2294	dumpInverted(term);
2295	dumpInputPosition(term);
2296	dumpFrameLocation(term);
2297	dumpCharacter(term);
2298	dumpQuantity(term);
2299	break;
2300	case ByteTerm::TypePatternCharacterNonGreedy:
2301	outputTermIndexAndNest(idx, nesting);
2302	out.print("PatternCharacterNonGreedy");
2303	dumpInverted(term);
2304	dumpInputPosition(term);
2305	dumpFrameLocation(term);
2306	dumpCharacter(term);
2307	dumpQuantity(term);
2308	break;
2309	case ByteTerm::TypePatternCasedCharacterOnce:
2310	outputTermIndexAndNest(idx, nesting);
2311	out.print("PatternCasedCharacterOnce");
2312	break;
2313	case ByteTerm::TypePatternCasedCharacterFixed:
2314	outputTermIndexAndNest(idx, nesting);
2315	out.print("PatternCasedCharacterFixed");
2316	break;
2317	case ByteTerm::TypePatternCasedCharacterGreedy:
2318	outputTermIndexAndNest(idx, nesting);
2319	out.print("PatternCasedCharacterGreedy");
2320	break;
2321	case ByteTerm::TypePatternCasedCharacterNonGreedy:
2322	outputTermIndexAndNest(idx, nesting);
2323	out.print("PatternCasedCharacterNonGreedy");
2324	break;
2325	case ByteTerm::TypeCharacterClass:
2326	outputTermIndexAndNest(idx, nesting);
2327	out.print("CharacterClass");
2328	dumpInverted(term);
2329	dumpInputPosition(term);
2330	dumpFrameLocation(term);
2331	dumpCharClass(term);
2332	dumpQuantity(term);
2333	break;
2334	case ByteTerm::TypeBackReference:
2335	outputTermIndexAndNest(idx, nesting);
2336	out.print("BackReference #", term.atom.subpatternId);
2337	dumpQuantity(term);
2338	break;
2339	case ByteTerm::TypeParenthesesSubpattern:
2340	outputTermIndexAndNest(idx, nesting);
2341	out.print("ParenthesesSubpattern");
2342	dumpCaptured(term);
2343	dumpInverted(term);
2344	dumpInputPosition(term);
2345	dumpFrameLocation(term);
2346	dumpQuantity(term);
2347	out.print("\n");
2348	outputNewline = false;
2349	dumpDisjunction(term.atom.parenthesesDisjunction, nesting);
2350	break;
2351	case ByteTerm::TypeParenthesesSubpatternOnceBegin:
2352	outputTermIndexAndNest(idx, nesting++);
2353	out.print("ParenthesesSubpatternOnceBegin");
2354	dumpCaptured(term);
2355	dumpInverted(term);
2356	dumpInputPosition(term);
2357	dumpFrameLocation(term);
2358	break;
2359	case ByteTerm::TypeParenthesesSubpatternOnceEnd:
2360	outputTermIndexAndNest(idx, --nesting);
2361	out.print("ParenthesesSubpatternOnceEnd");
2362	dumpFrameLocation(term);
2363	break;
2364	case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
2365	outputTermIndexAndNest(idx, nesting++);
2366	out.print("ParenthesesSubpatternTerminalBegin");
2367	dumpInverted(term);
2368	dumpInputPosition(term);
2369	dumpFrameLocation(term);
2370	break;
2371	case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
2372	outputTermIndexAndNest(idx, --nesting);
2373	out.print("ParenthesesSubpatternTerminalEnd");
2374	dumpFrameLocation(term);
2375	break;
2376	case ByteTerm::TypeParentheticalAssertionBegin:
2377	outputTermIndexAndNest(idx, nesting++);
2378	out.print("ParentheticalAssertionBegin");
2379	dumpInverted(term);
2380	dumpInputPosition(term);
2381	dumpFrameLocation(term);
2382	break;
2383	case ByteTerm::TypeParentheticalAssertionEnd:
2384	outputTermIndexAndNest(idx, --nesting);
2385	out.print("ParentheticalAssertionEnd");
2386	dumpFrameLocation(term);
2387	break;
2388	case ByteTerm::TypeCheckInput:
2389	outputTermIndexAndNest(idx, nesting);
2390	out.print("CheckInput ", term.checkInputCount);
2391	break;
2392	case ByteTerm::TypeUncheckInput:
2393	outputTermIndexAndNest(idx, nesting);
2394	out.print("UncheckInput ", term.checkInputCount);
2395	break;
2396	case ByteTerm::TypeDotStarEnclosure:
2397	outputTermIndexAndNest(idx, nesting);
2398	out.print("DotStarEnclosure");
2399	break;
2400	}
2401	if (outputNewline)
2402	out.print("\n");
2403	}
2404	}
2405	#endif
2406
2407	private:
2408	YarrPattern& m_pattern;
2409	std::unique_ptr<ByteDisjunction> m_bodyDisjunction;
2410	unsigned m_currentAlternativeIndex { `0` };
2411	Vector<ParenthesesStackEntry> m_parenthesesStack;
2412	Vector<std::unique_ptr<ByteDisjunction>> m_allParenthesesInfo;
2413	};
2414
2415	std::unique_ptr<BytecodePattern> byteCompile(YarrPattern& pattern, BumpPointerAllocator* allocator, ErrorCode& errorCode, ConcurrentJSLock* lock)
2416	{
2417	return ByteCompiler (pattern).compile(allocator, lock, errorCode);
2418	}
2419
2420	unsigned interpret(BytecodePattern* bytecode, const String& input, unsigned start, unsigned* output)
2421	{
2422	SuperSamplerScope superSamplerScope(false);
2423	if (input.is8Bit())
2424	return Interpreter<LChar>(bytecode, output, input.characters8(), input.length(), start).interpret();
2425	return Interpreter<UChar>(bytecode, output, input.characters16(), input.length(), start).interpret();
2426	}
2427
2428	unsigned interpret(BytecodePattern* bytecode, const LChar* input, unsigned length, unsigned start, unsigned* output)
2429	{
2430	SuperSamplerScope superSamplerScope(false);
2431	return Interpreter<LChar>(bytecode, output, input, length, start).interpret();
2432	}
2433
2434	unsigned interpret(BytecodePattern* bytecode, const UChar* input, unsigned length, unsigned start, unsigned* output)
2435	{
2436	SuperSamplerScope superSamplerScope(false);
2437	return Interpreter<UChar>(bytecode, output, input, length, start).interpret();
2438	}
2439
2440	// These should be the same for both UChar & LChar.
2441	COMPILE_ASSERT(sizeof(BackTrackInfoPatternCharacter) == (YarrStackSpaceForBackTrackInfoPatternCharacter * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoPatternCharacter);
2442	COMPILE_ASSERT(sizeof(BackTrackInfoCharacterClass) == (YarrStackSpaceForBackTrackInfoCharacterClass * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoCharacterClass);
2443	COMPILE_ASSERT(sizeof(BackTrackInfoBackReference) == (YarrStackSpaceForBackTrackInfoBackReference * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoBackReference);
2444	COMPILE_ASSERT(sizeof(BackTrackInfoAlternative) == (YarrStackSpaceForBackTrackInfoAlternative * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoAlternative);
2445	COMPILE_ASSERT(sizeof(BackTrackInfoParentheticalAssertion) == (YarrStackSpaceForBackTrackInfoParentheticalAssertion * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheticalAssertion);
2446	COMPILE_ASSERT(sizeof(BackTrackInfoParenthesesOnce) == (YarrStackSpaceForBackTrackInfoParenthesesOnce * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParenthesesOnce);
2447	COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParentheses) <= (YarrStackSpaceForBackTrackInfoParentheses * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheses);
2448
2449
2450	} }
2451

Browse the source code of jsc/Source/JavaScriptCore/yarr/YarrInterpreter.cpp