YarrInterpreter.cpp source code [jsc/Source/JavaScriptCore/yarr/YarrInterpreter.cpp]

1	/*
2	* Copyright (C) 2009, 2013-2017 Apple Inc. All rights reserved.
3	* Copyright (C) 2010 Peter Varga ([email protected]), University of Szeged
4	*
5	* Redistribution and use in source and binary forms, with or without
6	* modification, are permitted provided that the following conditions
7	* are met:
8	* 1. Redistributions of source code must retain the above copyright
9	* notice, this list of conditions and the following disclaimer.
10	* 2. Redistributions in binary form must reproduce the above copyright
11	* notice, this list of conditions and the following disclaimer in the
12	* documentation and/or other materials provided with the distribution.
13	*
14	* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15	* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17	* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
18	* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19	* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20	* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21	* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22	* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24	* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25	*/
26
27	#include "config.h"
28	#include "YarrInterpreter.h"
29
30	#include "Options.h"
31	#include "SuperSampler.h"
32	#include "Yarr.h"
33	#include "YarrCanonicalize.h"
34	#include <wtf/BumpPointerAllocator.h>
35	#include <wtf/CheckedArithmetic.h>
36	#include <wtf/DataLog.h>
37	#include <wtf/StackCheck.h>
38	#include <wtf/text/CString.h>
39	#include <wtf/text/WTFString.h>
40
41	namespace JSC { namespace Yarr {
42
43	template<typename CharType>
44	class Interpreter {
45	public:
46	struct ParenthesesDisjunctionContext;
47
48	struct BackTrackInfoParentheses {
49	uintptr_t matchAmount;
50	ParenthesesDisjunctionContext* lastContext;
51	};
52
53	static inline void appendParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack, ParenthesesDisjunctionContext* context)
54	{
55	context->next = backTrack->lastContext;
56	backTrack->lastContext = context;
57	++backTrack->matchAmount;
58	}
59
60	static inline void popParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack)
61	{
62	RELEASE_ASSERT(backTrack->matchAmount);
63	RELEASE_ASSERT(backTrack->lastContext);
64	backTrack->lastContext = backTrack->lastContext->next;
65	--backTrack->matchAmount;
66	}
67
68	struct DisjunctionContext
69	{
70	DisjunctionContext() = default;
71
72	void* operator new(size_t, void* where)
73	{
74	return where;
75	}
76
77	static size_t allocationSize(unsigned numberOfFrames)
78	{
79	static_assert(alignof(DisjunctionContext) <= sizeof(void*), "");
80	size_t rawSize = (sizeof(DisjunctionContext) - sizeof(uintptr_t) + Checked<size_t>(numberOfFrames) * sizeof(uintptr_t)).unsafeGet();
81	size_t roundedSize = roundUpToMultipleOf<sizeof(void*)>(rawSize);
82	RELEASE_ASSERT(roundedSize >= rawSize);
83	return roundedSize;
84	}
85
86	int term { `0` };
87	unsigned matchBegin;
88	unsigned matchEnd;
89	uintptr_t frame[`1`];
90	};
91
92	DisjunctionContext* allocDisjunctionContext(ByteDisjunction* disjunction)
93	{
94	size_t size = DisjunctionContext::allocationSize(disjunction->m_frameSize);
95	allocatorPool = allocatorPool->ensureCapacity(size);
96	RELEASE_ASSERT(allocatorPool);
97	return new (allocatorPool->alloc(size)) DisjunctionContext();
98	}
99
100	void freeDisjunctionContext(DisjunctionContext* context)
101	{
102	allocatorPool = allocatorPool->dealloc(context);
103	}
104
105	struct ParenthesesDisjunctionContext
106	{
107	ParenthesesDisjunctionContext(unsigned* output, ByteTerm& term)
108	{
109	unsigned firstSubpatternId = term.atom.subpatternId;
110	unsigned numNestedSubpatterns = term.atom.parenthesesDisjunction->m_numSubpatterns;
111
112	for (unsigned i = `0`; i < (numNestedSubpatterns << `1`); ++i) {
113	subpatternBackup[i] = output[(firstSubpatternId << `1`) + i];
114	output[(firstSubpatternId << `1`) + i] = offsetNoMatch;
115	}
116
117	new (getDisjunctionContext(term)) DisjunctionContext();
118	}
119
120	void* operator new(size_t, void* where)
121	{
122	return where;
123	}
124
125	void restoreOutput(unsigned* output, unsigned firstSubpatternId, unsigned numNestedSubpatterns)
126	{
127	for (unsigned i = `0`; i < (numNestedSubpatterns << `1`); ++i)
128	output[(firstSubpatternId << `1`) + i] = subpatternBackup[i];
129	}
130
131	DisjunctionContext* getDisjunctionContext(ByteTerm& term)
132	{
133	return bitwise_cast<DisjunctionContext>(bitwise_cast<uintptr_t>(this*) + allocationSize(term.atom.parenthesesDisjunction->m_numSubpatterns));
134	}
135
136	static size_t allocationSize(unsigned numberOfSubpatterns)
137	{
138	static_assert(alignof(ParenthesesDisjunctionContext) <= sizeof(void*), "");
139	size_t rawSize = (sizeof(ParenthesesDisjunctionContext) - sizeof(unsigned) + (Checked<size_t>(numberOfSubpatterns) * `2U`) * sizeof(unsigned)).unsafeGet();
140	size_t roundedSize = roundUpToMultipleOf<sizeof(void*)>(rawSize);
141	RELEASE_ASSERT(roundedSize >= rawSize);
142	return roundedSize;
143	}
144
145	ParenthesesDisjunctionContext* next { nullptr };
146	unsigned subpatternBackup[`1`];
147	};
148
149	ParenthesesDisjunctionContext* allocParenthesesDisjunctionContext(ByteDisjunction* disjunction, unsigned* output, ByteTerm& term)
150	{
151	size_t size = (Checked<size_t>(ParenthesesDisjunctionContext::allocationSize(term.atom.parenthesesDisjunction->m_numSubpatterns)) + DisjunctionContext::allocationSize(disjunction->m_frameSize)).unsafeGet();
152	allocatorPool = allocatorPool->ensureCapacity(size);
153	RELEASE_ASSERT(allocatorPool);
154	return new (allocatorPool->alloc(size)) ParenthesesDisjunctionContext(output, term);
155	}
156
157	void freeParenthesesDisjunctionContext(ParenthesesDisjunctionContext* context)
158	{
159	allocatorPool = allocatorPool->dealloc(context);
160	}
161
162	class InputStream {
163	public:
164	InputStream(const CharType* input, unsigned start, unsigned length, bool decodeSurrogatePairs)
165	: input(input)
166	, pos(start)
167	, length(length)
168	, decodeSurrogatePairs(decodeSurrogatePairs)
169	{
170	}
171
172	void next()
173	{
174	++pos;
175	}
176
177	void rewind(unsigned amount)
178	{
179	ASSERT(pos >= amount);
180	pos -= amount;
181	}
182
183	int read()
184	{
185	ASSERT(pos < length);
186	if (pos < length)
187	return input[pos];
188	return -`1`;
189	}
190
191	int readPair()
192	{
193	ASSERT(pos + `1` < length);
194	return input[pos] \| input[pos + `1`] << `16`;
195	}
196
197	int readChecked(unsigned negativePositionOffest)
198	{
199	RELEASE_ASSERT(pos >= negativePositionOffest);
200	unsigned p = pos - negativePositionOffest;
201	ASSERT(p < length);
202	int result = input[p];
203	if (U16_IS_LEAD(result) && decodeSurrogatePairs && p + `1` < length && U16_IS_TRAIL(input[p + `1`])) {
204	if (atEnd())
205	return -`1`;
206
207	result = U16_GET_SUPPLEMENTARY(result, input[p + `1`]);
208	next();
209	}
210	return result;
211	}
212
213	int readSurrogatePairChecked(unsigned negativePositionOffset)
214	{
215	RELEASE_ASSERT(pos >= negativePositionOffset);
216	unsigned p = pos - negativePositionOffset;
217	ASSERT(p < length);
218	if (p + `1` >= length)
219	return -`1`;
220
221	int first = input[p];
222	int second = input[p + `1`];
223	if (U16_IS_LEAD(first) && U16_IS_TRAIL(second))
224	return U16_GET_SUPPLEMENTARY(first, second);
225
226	return -`1`;
227	}
228
229	int reread(unsigned from)
230	{
231	ASSERT(from < length);
232	int result = input[from];
233	if (U16_IS_LEAD(result) && decodeSurrogatePairs && from + `1` < length && U16_IS_TRAIL(input[from + `1`]))
234	result = U16_GET_SUPPLEMENTARY(result, input[from + `1`]);
235	return result;
236	}
237
238	int prev()
239	{
240	ASSERT(!(pos > length));
241	if (pos && length)
242	return input[pos - `1`];
243	return -`1`;
244	}
245
246	unsigned getPos()
247	{
248	return pos;
249	}
250
251	void setPos(unsigned p)
252	{
253	pos = p;
254	}
255
256	bool atStart()
257	{
258	return pos == `0`;
259	}
260
261	bool atEnd()
262	{
263	return pos == length;
264	}
265
266	unsigned end()
267	{
268	return length;
269	}
270
271	bool checkInput(unsigned count)
272	{
273	if (((pos + count) <= length) && ((pos + count) >= pos)) {
274	pos += count;
275	return true;
276	}
277	return false;
278	}
279
280	void uncheckInput(unsigned count)
281	{
282	RELEASE_ASSERT(pos >= count);
283	pos -= count;
284	}
285
286	bool atStart(unsigned negativePositionOffset)
287	{
288	return pos == negativePositionOffset;
289	}
290
291	bool atEnd(unsigned negativePositionOffest)
292	{
293	RELEASE_ASSERT(pos >= negativePositionOffest);
294	return (pos - negativePositionOffest) == length;
295	}
296
297	bool isAvailableInput(unsigned offset)
298	{
299	return (((pos + offset) <= length) && ((pos + offset) >= pos));
300	}
301
302	private:
303	const CharType* input;
304	unsigned pos;
305	unsigned length;
306	bool decodeSurrogatePairs;
307	};
308
309	bool testCharacterClass(CharacterClass* characterClass, int ch)
310	{
311	auto linearSearchMatches = [&ch](const Vector<UChar32>& matches) {
312	for (unsigned i = `0`; i < matches.size(); ++i) {
313	if (ch == matches [i])
314	return true;
315	}
316
317	return false;
318	};
319
320	auto binarySearchMatches = [&ch](const Vector<UChar32>& matches) {
321	size_t low = `0`;
322	size_t high = matches.size() - `1`;
323
324	while (low <= high) {
325	size_t mid = low + (high - low) / `2`;
326	int diff = ch - matches [mid];
327	if (!diff)
328	return true;
329
330	if (diff < `0`) {
331	if (mid == low)
332	return false;
333	high = mid - `1`;
334	} else
335	low = mid + `1`;
336	}
337	return false;
338	};
339
340	auto linearSearchRanges = [&ch](const Vector<CharacterRange>& ranges) {
341	for (unsigned i = `0`; i < ranges.size(); ++i) {
342	if ((ch >= ranges [i].begin) && (ch <= ranges [i].end))
343	return true;
344	}
345
346	return false;
347	};
348
349	auto binarySearchRanges = [&ch](const Vector<CharacterRange>& ranges) {
350	size_t low = `0`;
351	size_t high = ranges.size() - `1`;
352
353	while (low <= high) {
354	size_t mid = low + (high - low) / `2`;
355	int rangeBeginDiff = ch - ranges [mid].begin;
356	if (rangeBeginDiff >= `0` && ch <= ranges [mid].end)
357	return true;
358
359	if (rangeBeginDiff < `0`) {
360	if (mid == low)
361	return false;
362	high = mid - `1`;
363	} else
364	low = mid + `1`;
365	}
366	return false;
367	};
368
369	if (characterClass->m_anyCharacter)
370	return true;
371
372	const size_t thresholdForBinarySearch = `6`;
373
374	if (!isASCII(ch)) {
375	if (characterClass->m_matchesUnicode.size()) {
376	if (characterClass->m_matchesUnicode.size() > thresholdForBinarySearch) {
377	if (binarySearchMatches(characterClass->m_matchesUnicode))
378	return true;
379	} else if (linearSearchMatches(characterClass->m_matchesUnicode))
380	return true;
381	}
382
383	if (characterClass->m_rangesUnicode.size()) {
384	if (characterClass->m_rangesUnicode.size() > thresholdForBinarySearch) {
385	if (binarySearchRanges(characterClass->m_rangesUnicode))
386	return true;
387	} else if (linearSearchRanges(characterClass->m_rangesUnicode))
388	return true;
389	}
390	} else {
391	if (characterClass->m_matches.size()) {
392	if (characterClass->m_matches.size() > thresholdForBinarySearch) {
393	if (binarySearchMatches(characterClass->m_matches))
394	return true;
395	} else if (linearSearchMatches(characterClass->m_matches))
396	return true;
397	}
398
399	if (characterClass->m_ranges.size()) {
400	if (characterClass->m_ranges.size() > thresholdForBinarySearch) {
401	if (binarySearchRanges(characterClass->m_ranges))
402	return true;
403	} else if (linearSearchRanges(characterClass->m_ranges))
404	return true;
405	}
406	}
407
408	return false;
409	}
410
411	bool checkCharacter(int testChar, unsigned negativeInputOffset)
412	{
413	return testChar == input.readChecked(negativeInputOffset);
414	}
415
416	bool checkSurrogatePair(int testUnicodeChar, unsigned negativeInputOffset)
417	{
418	return testUnicodeChar == input.readSurrogatePairChecked(negativeInputOffset);
419	}
420
421	bool checkCasedCharacter(int loChar, int hiChar, unsigned negativeInputOffset)
422	{
423	int ch = input.readChecked(negativeInputOffset);
424	return (loChar == ch) \|\| (hiChar == ch);
425	}
426
427	bool checkCharacterClass(CharacterClass* characterClass, bool invert, unsigned negativeInputOffset)
428	{
429	bool match = testCharacterClass(characterClass, input.readChecked(negativeInputOffset));
430	return invert ? !match : match;
431	}
432
433	bool checkCharacterClassDontAdvanceInputForNonBMP(CharacterClass* characterClass, unsigned negativeInputOffset)
434	{
435	int readCharacter = characterClass->hasOnlyNonBMPCharacters() ? input.readSurrogatePairChecked(negativeInputOffset) : input.readChecked(negativeInputOffset);
436	return testCharacterClass(characterClass, readCharacter);
437	}
438
439	bool tryConsumeBackReference(int matchBegin, int matchEnd, unsigned negativeInputOffset)
440	{
441	unsigned matchSize = (unsigned)(matchEnd - matchBegin);
442
443	if (!input.checkInput(matchSize))
444	return false;
445
446	for (unsigned i = `0`; i < matchSize; ++i) {
447	int oldCh = input.reread(matchBegin + i);
448	int ch;
449	if (!U_IS_BMP(oldCh)) {
450	ch = input.readSurrogatePairChecked(negativeInputOffset + matchSize - i);
451	++i;
452	} else
453	ch = input.readChecked(negativeInputOffset + matchSize - i);
454
455	if (oldCh == ch)
456	continue;
457
458	if (pattern->ignoreCase()) {
459	// See ES 6.0, 21.2.2.8.2 for the definition of Canonicalize(). For non-Unicode
460	// patterns, Unicode values are never allowed to match against ASCII ones.
461	// For Unicode, we need to check all canonical equivalents of a character.
462	if (!unicode && (isASCII(oldCh) \|\| isASCII(ch))) {
463	if (toASCIIUpper(oldCh) == toASCIIUpper(ch))
464	continue;
465	} else if (areCanonicallyEquivalent(oldCh, ch, unicode ? CanonicalMode::Unicode : CanonicalMode::UCS2))
466	continue;
467	}
468
469	input.uncheckInput(matchSize);
470	return false;
471	}
472
473	return true;
474	}
475
476	bool matchAssertionBOL(ByteTerm& term)
477	{
478	return (input.atStart(term.inputPosition)) \|\| (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition + `1`)));
479	}
480
481	bool matchAssertionEOL(ByteTerm& term)
482	{
483	if (term.inputPosition)
484	return (input.atEnd(term.inputPosition)) \|\| (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition)));
485
486	return (input.atEnd()) \|\| (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.read()));
487	}
488
489	bool matchAssertionWordBoundary(ByteTerm& term)
490	{
491	bool prevIsWordchar = !input.atStart(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition + `1`));
492	bool readIsWordchar;
493	if (term.inputPosition)
494	readIsWordchar = !input.atEnd(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition));
495	else
496	readIsWordchar = !input.atEnd() && testCharacterClass(pattern->wordcharCharacterClass, input.read());
497
498	bool wordBoundary = prevIsWordchar != readIsWordchar;
499	return term.invert() ? !wordBoundary : wordBoundary;
500	}
501
502	bool backtrackPatternCharacter(ByteTerm& term, DisjunctionContext* context)
503	{
504	BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
505
506	switch (term.atom.quantityType) {
507	case QuantifierFixedCount:
508	break;
509
510	case QuantifierGreedy:
511	if (backTrack->matchAmount) {
512	--backTrack->matchAmount;
513	input.uncheckInput(U16_LENGTH(term.atom.patternCharacter));
514	return true;
515	}
516	break;
517
518	case QuantifierNonGreedy:
519	if ((backTrack->matchAmount < term.atom.quantityMaxCount) && input.checkInput(`1`)) {
520	++backTrack->matchAmount;
521	if (checkCharacter(term.atom.patternCharacter, term.inputPosition + `1`))
522	return true;
523	}
524	input.setPos(backTrack->begin);
525	break;
526	}
527
528	return false;
529	}
530
531	bool backtrackPatternCasedCharacter(ByteTerm& term, DisjunctionContext* context)
532	{
533	BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
534
535	switch (term.atom.quantityType) {
536	case QuantifierFixedCount:
537	break;
538
539	case QuantifierGreedy:
540	if (backTrack->matchAmount) {
541	--backTrack->matchAmount;
542	input.uncheckInput(`1`);
543	return true;
544	}
545	break;
546
547	case QuantifierNonGreedy:
548	if ((backTrack->matchAmount < term.atom.quantityMaxCount) && input.checkInput(`1`)) {
549	++backTrack->matchAmount;
550	if (checkCasedCharacter(term.atom.casedCharacter.lo, term.atom.casedCharacter.hi, term.inputPosition + `1`))
551	return true;
552	}
553	input.uncheckInput(backTrack->matchAmount);
554	break;
555	}
556
557	return false;
558	}
559
560	bool matchCharacterClass(ByteTerm& term, DisjunctionContext* context)
561	{
562	ASSERT(term.type == ByteTerm::TypeCharacterClass);
563	BackTrackInfoCharacterClass* backTrack = reinterpret_cast<BackTrackInfoCharacterClass*>(context->frame + term.frameLocation);
564
565	switch (term.atom.quantityType) {
566	case QuantifierFixedCount: {
567	if (unicode) {
568	CharacterClass* charClass = term.atom.characterClass;
569	backTrack->begin = input.getPos();
570	unsigned matchAmount = `0`;
571	for (matchAmount = `0`; matchAmount < term.atom.quantityMaxCount; ++matchAmount) {
572	if (term.invert()) {
573	if (!checkCharacterClass(charClass, term.invert(), term.inputPosition - matchAmount)) {
574	input.setPos(backTrack->begin);
575	return false;
576	}
577	} else {
578	unsigned matchOffset = matchAmount * (charClass->hasOnlyNonBMPCharacters() ? `2` : `1`);
579	if (!checkCharacterClassDontAdvanceInputForNonBMP(charClass, term.inputPosition - matchOffset)) {
580	input.setPos(backTrack->begin);
581	return false;
582	}
583	}
584	}
585
586	return true;
587	}
588
589	for (unsigned matchAmount = `0`; matchAmount < term.atom.quantityMaxCount; ++matchAmount) {
590	if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - matchAmount))
591	return false;
592	}
593	return true;
594	}
595
596	case QuantifierGreedy: {
597	unsigned position = input.getPos();
598	backTrack->begin = position;
599	unsigned matchAmount = `0`;
600	while ((matchAmount < term.atom.quantityMaxCount) && input.checkInput(`1`)) {
601	if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + `1`)) {
602	input.setPos(position);
603	break;
604	}
605	++matchAmount;
606	position = input.getPos();
607	}
608	backTrack->matchAmount = matchAmount;
609
610	return true;
611	}
612
613	case QuantifierNonGreedy:
614	backTrack->begin = input.getPos();
615	backTrack->matchAmount = `0`;
616	return true;
617	}
618
619	RELEASE_ASSERT_NOT_REACHED();
620	return false;
621	}
622
623	bool backtrackCharacterClass(ByteTerm& term, DisjunctionContext* context)
624	{
625	ASSERT(term.type == ByteTerm::TypeCharacterClass);
626	BackTrackInfoCharacterClass* backTrack = reinterpret_cast<BackTrackInfoCharacterClass*>(context->frame + term.frameLocation);
627
628	switch (term.atom.quantityType) {
629	case QuantifierFixedCount:
630	if (unicode)
631	input.setPos(backTrack->begin);
632	break;
633
634	case QuantifierGreedy:
635	if (backTrack->matchAmount) {
636	if (unicode) {
637	// Rematch one less match
638	input.setPos(backTrack->begin);
639	--backTrack->matchAmount;
640	for (unsigned matchAmount = `0`; (matchAmount < backTrack->matchAmount) && input.checkInput(`1`); ++matchAmount) {
641	if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + `1`)) {
642	input.uncheckInput(`1`);
643	break;
644	}
645	}
646	return true;
647	}
648	--backTrack->matchAmount;
649	input.uncheckInput(`1`);
650	return true;
651	}
652	break;
653
654	case QuantifierNonGreedy:
655	if ((backTrack->matchAmount < term.atom.quantityMaxCount) && input.checkInput(`1`)) {
656	++backTrack->matchAmount;
657	if (checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + `1`))
658	return true;
659	}
660	input.setPos(backTrack->begin);
661	break;
662	}
663
664	return false;
665	}
666
667	bool matchBackReference(ByteTerm& term, DisjunctionContext* context)
668	{
669	ASSERT(term.type == ByteTerm::TypeBackReference);
670	BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
671
672	unsigned matchBegin = output[(term.atom.subpatternId << `1`)];
673	unsigned matchEnd = output[(term.atom.subpatternId << `1`) + `1`];
674
675	// If the end position of the referenced match hasn't set yet then the backreference in the same parentheses where it references to that.
676	// In this case the result of match is empty string like when it references to a parentheses with zero-width match.
677	// Eg.: /(a\1)/
678	if (matchEnd == offsetNoMatch)
679	return true;
680
681	if (matchBegin == offsetNoMatch)
682	return true;
683
684	ASSERT(matchBegin <= matchEnd);
685
686	if (matchBegin == matchEnd)
687	return true;
688
689	switch (term.atom.quantityType) {
690	case QuantifierFixedCount: {
691	backTrack->begin = input.getPos();
692	for (unsigned matchAmount = `0`; matchAmount < term.atom.quantityMaxCount; ++matchAmount) {
693	if (!tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
694	input.setPos(backTrack->begin);
695	return false;
696	}
697	}
698	return true;
699	}
700
701	case QuantifierGreedy: {
702	unsigned matchAmount = `0`;
703	while ((matchAmount < term.atom.quantityMaxCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition))
704	++matchAmount;
705	backTrack->matchAmount = matchAmount;
706	return true;
707	}
708
709	case QuantifierNonGreedy:
710	backTrack->begin = input.getPos();
711	backTrack->matchAmount = `0`;
712	return true;
713	}
714
715	RELEASE_ASSERT_NOT_REACHED();
716	return false;
717	}
718
719	bool backtrackBackReference(ByteTerm& term, DisjunctionContext* context)
720	{
721	ASSERT(term.type == ByteTerm::TypeBackReference);
722	BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
723
724	unsigned matchBegin = output[(term.atom.subpatternId << `1`)];
725	unsigned matchEnd = output[(term.atom.subpatternId << `1`) + `1`];
726
727	if (matchBegin == offsetNoMatch)
728	return false;
729
730	ASSERT(matchBegin <= matchEnd);
731
732	if (matchBegin == matchEnd)
733	return false;
734
735	switch (term.atom.quantityType) {
736	case QuantifierFixedCount:
737	// for quantityMaxCount == 1, could rewind.
738	input.setPos(backTrack->begin);
739	break;
740
741	case QuantifierGreedy:
742	if (backTrack->matchAmount) {
743	--backTrack->matchAmount;
744	input.rewind(matchEnd - matchBegin);
745	return true;
746	}
747	break;
748
749	case QuantifierNonGreedy:
750	if ((backTrack->matchAmount < term.atom.quantityMaxCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
751	++backTrack->matchAmount;
752	return true;
753	}
754	input.setPos(backTrack->begin);
755	break;
756	}
757
758	return false;
759	}
760
761	void recordParenthesesMatch(ByteTerm& term, ParenthesesDisjunctionContext* context)
762	{
763	if (term.capture()) {
764	unsigned subpatternId = term.atom.subpatternId;
765	output[(subpatternId << `1`)] = context->getDisjunctionContext(term)->matchBegin - term.inputPosition;
766	output[(subpatternId << `1`) + `1`] = context->getDisjunctionContext(term)->matchEnd - term.inputPosition;
767	}
768	}
769	void resetMatches(ByteTerm& term, ParenthesesDisjunctionContext* context)
770	{
771	unsigned firstSubpatternId = term.atom.subpatternId;
772	unsigned count = term.atom.parenthesesDisjunction->m_numSubpatterns;
773	context->restoreOutput(output, firstSubpatternId, count);
774	}
775	JSRegExpResult parenthesesDoBacktrack(ByteTerm& term, BackTrackInfoParentheses* backTrack)
776	{
777	while (backTrack->matchAmount) {
778	ParenthesesDisjunctionContext* context = backTrack->lastContext;
779
780	JSRegExpResult result = matchDisjunction(term.atom.parenthesesDisjunction, context->getDisjunctionContext(term), true);
781	if (result == JSRegExpMatch)
782	return JSRegExpMatch;
783
784	resetMatches(term, context);
785	popParenthesesDisjunctionContext(backTrack);
786	freeParenthesesDisjunctionContext(context);
787
788	if (result != JSRegExpNoMatch)
789	return result;
790	}
791
792	return JSRegExpNoMatch;
793	}
794
795	bool matchParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
796	{
797	ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
798	ASSERT(term.atom.quantityMaxCount == `1`);
799
800	BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
801
802	switch (term.atom.quantityType) {
803	case QuantifierGreedy: {
804	// set this speculatively; if we get to the parens end this will be true.
805	backTrack->begin = input.getPos();
806	break;
807	}
808	case QuantifierNonGreedy: {
809	backTrack->begin = notFound;
810	context->term += term.atom.parenthesesWidth;
811	return true;
812	}
813	case QuantifierFixedCount:
814	break;
815	}
816
817	if (term.capture()) {
818	unsigned subpatternId = term.atom.subpatternId;
819	output[(subpatternId << `1`)] = input.getPos() - term.inputPosition;
820	}
821
822	return true;
823	}
824
825	bool matchParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
826	{
827	ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
828	ASSERT(term.atom.quantityMaxCount == `1`);
829
830	if (term.capture()) {
831	unsigned subpatternId = term.atom.subpatternId;
832	output[(subpatternId << `1`) + `1`] = input.getPos() - term.inputPosition;
833	}
834
835	if (term.atom.quantityType == QuantifierFixedCount)
836	return true;
837
838	BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
839	return backTrack->begin != input.getPos();
840	}
841
842	bool backtrackParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
843	{
844	ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
845	ASSERT(term.atom.quantityMaxCount == `1`);
846
847	BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
848
849	if (term.capture()) {
850	unsigned subpatternId = term.atom.subpatternId;
851	output[(subpatternId << `1`)] = offsetNoMatch;
852	output[(subpatternId << `1`) + `1`] = offsetNoMatch;
853	}
854
855	switch (term.atom.quantityType) {
856	case QuantifierGreedy:
857	// if we backtrack to this point, there is another chance - try matching nothing.
858	ASSERT(backTrack->begin != notFound);
859	backTrack->begin = notFound;
860	context->term += term.atom.parenthesesWidth;
861	return true;
862	case QuantifierNonGreedy:
863	ASSERT(backTrack->begin != notFound);
864	FALLTHROUGH;
865	case QuantifierFixedCount:
866	break;
867	}
868
869	return false;
870	}
871
872	bool backtrackParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
873	{
874	ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
875	ASSERT(term.atom.quantityMaxCount == `1`);
876
877	BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
878
879	switch (term.atom.quantityType) {
880	case QuantifierGreedy:
881	if (backTrack->begin == notFound) {
882	context->term -= term.atom.parenthesesWidth;
883	return false;
884	}
885	FALLTHROUGH;
886	case QuantifierNonGreedy:
887	if (backTrack->begin == notFound) {
888	backTrack->begin = input.getPos();
889	if (term.capture()) {
890	// Technically this access to inputPosition should be accessing the begin term's
891	// inputPosition, but for repeats other than fixed these values should be
892	// the same anyway! (We don't pre-check for greedy or non-greedy matches.)
893	ASSERT((&term - term.atom.parenthesesWidth)->type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
894	ASSERT((&term - term.atom.parenthesesWidth)->inputPosition == term.inputPosition);
895	unsigned subpatternId = term.atom.subpatternId;
896	output[subpatternId << `1`] = input.getPos() - term.inputPosition;
897	}
898	context->term -= term.atom.parenthesesWidth;
899	return true;
900	}
901	FALLTHROUGH;
902	case QuantifierFixedCount:
903	break;
904	}
905
906	return false;
907	}
908
909	bool matchParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
910	{
911	ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
912	ASSERT(term.atom.quantityType == QuantifierGreedy);
913	ASSERT(term.atom.quantityMaxCount == quantifyInfinite);
914	ASSERT(!term.capture());
915
916	BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
917	backTrack->begin = input.getPos();
918	return true;
919	}
920
921	bool matchParenthesesTerminalEnd(ByteTerm& term, DisjunctionContext* context)
922	{
923	ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalEnd);
924
925	BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
926	// Empty match is a failed match.
927	if (backTrack->begin == input.getPos())
928	return false;
929
930	// Successful match! Okay, what's next? - loop around and try to match more!
931	context->term -= (term.atom.parenthesesWidth + `1`);
932	return true;
933	}
934
935	bool backtrackParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
936	{
937	ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
938	ASSERT(term.atom.quantityType == QuantifierGreedy);
939	ASSERT(term.atom.quantityMaxCount == quantifyInfinite);
940	ASSERT(!term.capture());
941
942	// If we backtrack to this point, we have failed to match this iteration of the parens.
943	// Since this is greedy / zero minimum a failed is also accepted as a match!
944	context->term += term.atom.parenthesesWidth;
945	return true;
946	}
947
948	bool backtrackParenthesesTerminalEnd(ByteTerm&, DisjunctionContext*)
949	{
950	// 'Terminal' parentheses are at the end of the regex, and as such a match past end
951	// should always be returned as a successful match - we should never backtrack to here.
952	RELEASE_ASSERT_NOT_REACHED();
953	return false;
954	}
955
956	bool matchParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
957	{
958	ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
959	ASSERT(term.atom.quantityMaxCount == `1`);
960
961	BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
962
963	backTrack->begin = input.getPos();
964	return true;
965	}
966
967	bool matchParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
968	{
969	ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
970	ASSERT(term.atom.quantityMaxCount == `1`);
971
972	BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
973
974	input.setPos(backTrack->begin);
975
976	// We've reached the end of the parens; if they are inverted, this is failure.
977	if (term.invert()) {
978	context->term -= term.atom.parenthesesWidth;
979	return false;
980	}
981
982	return true;
983	}
984
985	bool backtrackParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
986	{
987	ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
988	ASSERT(term.atom.quantityMaxCount == `1`);
989
990	// We've failed to match parens; if they are inverted, this is win!
991	if (term.invert()) {
992	context->term += term.atom.parenthesesWidth;
993	return true;
994	}
995
996	return false;
997	}
998
999	bool backtrackParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
1000	{
1001	ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
1002	ASSERT(term.atom.quantityMaxCount == `1`);
1003
1004	BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
1005
1006	input.setPos(backTrack->begin);
1007
1008	context->term -= term.atom.parenthesesWidth;
1009	return false;
1010	}
1011
1012	JSRegExpResult matchParentheses(ByteTerm& term, DisjunctionContext* context)
1013	{
1014	ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
1015
1016	BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
1017	ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
1018
1019	backTrack->matchAmount = `0`;
1020	backTrack->lastContext = `0`;
1021
1022	ASSERT(term.atom.quantityType != QuantifierFixedCount \|\| term.atom.quantityMinCount == term.atom.quantityMaxCount);
1023
1024	unsigned minimumMatchCount = term.atom.quantityMinCount;
1025	JSRegExpResult fixedMatchResult;
1026
1027	// Handle fixed matches and the minimum part of a variable length match.
1028	if (minimumMatchCount) {
1029	// While we haven't yet reached our fixed limit,
1030	while (backTrack->matchAmount < minimumMatchCount) {
1031	// Try to do a match, and it it succeeds, add it to the list.
1032	ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
1033	fixedMatchResult = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
1034	if (fixedMatchResult == JSRegExpMatch)
1035	appendParenthesesDisjunctionContext(backTrack, context);
1036	else {
1037	// The match failed; try to find an alternate point to carry on from.
1038	resetMatches(term, context);
1039	freeParenthesesDisjunctionContext(context);
1040
1041	if (fixedMatchResult != JSRegExpNoMatch)
1042	return fixedMatchResult;
1043	JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
1044	if (backtrackResult != JSRegExpMatch)
1045	return backtrackResult;
1046	}
1047	}
1048
1049	ParenthesesDisjunctionContext* context = backTrack->lastContext;
1050	recordParenthesesMatch(term, context);
1051	}
1052
1053	switch (term.atom.quantityType) {
1054	case QuantifierFixedCount: {
1055	ASSERT(backTrack->matchAmount == term.atom.quantityMaxCount);
1056	return JSRegExpMatch;
1057	}
1058
1059	case QuantifierGreedy: {
1060	while (backTrack->matchAmount < term.atom.quantityMaxCount) {
1061	ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
1062	JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
1063	if (result == JSRegExpMatch)
1064	appendParenthesesDisjunctionContext(backTrack, context);
1065	else {
1066	resetMatches(term, context);
1067	freeParenthesesDisjunctionContext(context);
1068
1069	if (result != JSRegExpNoMatch)
1070	return result;
1071
1072	break;
1073	}
1074	}
1075
1076	if (backTrack->matchAmount) {
1077	ParenthesesDisjunctionContext* context = backTrack->lastContext;
1078	recordParenthesesMatch(term, context);
1079	}
1080	return JSRegExpMatch;
1081	}
1082
1083	case QuantifierNonGreedy:
1084	return JSRegExpMatch;
1085	}
1086
1087	RELEASE_ASSERT_NOT_REACHED();
1088	return JSRegExpErrorNoMatch;
1089	}
1090
1091	// Rules for backtracking differ depending on whether this is greedy or non-greedy.
1092	//
1093	// Greedy matches never should try just adding more - you should already have done
1094	// the 'more' cases. Always backtrack, at least a leetle bit. However cases where
1095	// you backtrack an item off the list needs checking, since we'll never have matched
1096	// the one less case. Tracking forwards, still add as much as possible.
1097	//
1098	// Non-greedy, we've already done the one less case, so don't match on popping.
1099	// We haven't done the one more case, so always try to add that.
1100	//
1101	JSRegExpResult backtrackParentheses(ByteTerm& term, DisjunctionContext* context)
1102	{
1103	ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
1104
1105	BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
1106	ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
1107
1108	switch (term.atom.quantityType) {
1109	case QuantifierFixedCount: {
1110	ASSERT(backTrack->matchAmount == term.atom.quantityMaxCount);
1111
1112	ParenthesesDisjunctionContext* context = `0`;
1113	JSRegExpResult result = parenthesesDoBacktrack(term, backTrack);
1114
1115	if (result != JSRegExpMatch)
1116	return result;
1117
1118	// While we haven't yet reached our fixed limit,
1119	while (backTrack->matchAmount < term.atom.quantityMaxCount) {
1120	// Try to do a match, and it it succeeds, add it to the list.
1121	context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
1122	result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
1123
1124	if (result == JSRegExpMatch)
1125	appendParenthesesDisjunctionContext(backTrack, context);
1126	else {
1127	// The match failed; try to find an alternate point to carry on from.
1128	resetMatches(term, context);
1129	freeParenthesesDisjunctionContext(context);
1130
1131	if (result != JSRegExpNoMatch)
1132	return result;
1133	JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
1134	if (backtrackResult != JSRegExpMatch)
1135	return backtrackResult;
1136	}
1137	}
1138
1139	ASSERT(backTrack->matchAmount == term.atom.quantityMaxCount);
1140	context = backTrack->lastContext;
1141	recordParenthesesMatch(term, context);
1142	return JSRegExpMatch;
1143	}
1144
1145	case QuantifierGreedy: {
1146	if (!backTrack->matchAmount)
1147	return JSRegExpNoMatch;
1148
1149	ParenthesesDisjunctionContext* context = backTrack->lastContext;
1150	JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
1151	if (result == JSRegExpMatch) {
1152	while (backTrack->matchAmount < term.atom.quantityMaxCount) {
1153	ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
1154	JSRegExpResult parenthesesResult = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
1155	if (parenthesesResult == JSRegExpMatch)
1156	appendParenthesesDisjunctionContext(backTrack, context);
1157	else {
1158	resetMatches(term, context);
1159	freeParenthesesDisjunctionContext(context);
1160
1161	if (parenthesesResult != JSRegExpNoMatch)
1162	return parenthesesResult;
1163
1164	break;
1165	}
1166	}
1167	} else {
1168	resetMatches(term, context);
1169	popParenthesesDisjunctionContext(backTrack);
1170	freeParenthesesDisjunctionContext(context);
1171
1172	if (result != JSRegExpNoMatch \|\| backTrack->matchAmount < term.atom.quantityMinCount)
1173	return result;
1174	}
1175
1176	if (backTrack->matchAmount) {
1177	ParenthesesDisjunctionContext* context = backTrack->lastContext;
1178	recordParenthesesMatch(term, context);
1179	}
1180	return JSRegExpMatch;
1181	}
1182
1183	case QuantifierNonGreedy: {
1184	// If we've not reached the limit, try to add one more match.
1185	if (backTrack->matchAmount < term.atom.quantityMaxCount) {
1186	ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
1187	JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
1188	if (result == JSRegExpMatch) {
1189	appendParenthesesDisjunctionContext(backTrack, context);
1190	recordParenthesesMatch(term, context);
1191	return JSRegExpMatch;
1192	}
1193
1194	resetMatches(term, context);
1195	freeParenthesesDisjunctionContext(context);
1196
1197	if (result != JSRegExpNoMatch)
1198	return result;
1199	}
1200
1201	// Nope - okay backtrack looking for an alternative.
1202	while (backTrack->matchAmount) {
1203	ParenthesesDisjunctionContext* context = backTrack->lastContext;
1204	JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
1205	if (result == JSRegExpMatch) {
1206	// successful backtrack! we're back in the game!
1207	if (backTrack->matchAmount) {
1208	context = backTrack->lastContext;
1209	recordParenthesesMatch(term, context);
1210	}
1211	return JSRegExpMatch;
1212	}
1213
1214	// pop a match off the stack
1215	resetMatches(term, context);
1216	popParenthesesDisjunctionContext(backTrack);
1217	freeParenthesesDisjunctionContext(context);
1218
1219	if (result != JSRegExpNoMatch)
1220	return result;
1221	}
1222
1223	return JSRegExpNoMatch;
1224	}
1225	}
1226
1227	RELEASE_ASSERT_NOT_REACHED();
1228	return JSRegExpErrorNoMatch;
1229	}
1230
1231	bool matchDotStarEnclosure(ByteTerm& term, DisjunctionContext* context)
1232	{
1233	UNUSED_PARAM(term);
1234
1235	if (pattern->dotAll()) {
1236	context->matchBegin = startOffset;
1237	context->matchEnd = input.end();
1238	return true;
1239	}
1240
1241	unsigned matchBegin = context->matchBegin;
1242
1243	if (matchBegin > startOffset) {
1244	for (matchBegin--; true; matchBegin--) {
1245	if (testCharacterClass(pattern->newlineCharacterClass, input.reread(matchBegin))) {
1246	++matchBegin;
1247	break;
1248	}
1249
1250	if (matchBegin == startOffset)
1251	break;
1252	}
1253	}
1254
1255	unsigned matchEnd = input.getPos();
1256
1257	for (; (matchEnd != input.end())
1258	&& (!testCharacterClass(pattern->newlineCharacterClass, input.reread(matchEnd))); matchEnd++) { }
1259
1260	if (((matchBegin && term.anchors.m_bol)
1261	\|\| ((matchEnd != input.end()) && term.anchors.m_eol))
1262	&& !pattern->multiline())
1263	return false;
1264
1265	context->matchBegin = matchBegin;
1266	context->matchEnd = matchEnd;
1267	return true;
1268	}
1269
1270	#define MATCH_NEXT() { ++context->term; goto matchAgain; }
1271	#define BACKTRACK() { --context->term; goto backtrack; }
1272	#define currentTerm() (disjunction->terms[context->term])
1273	JSRegExpResult matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
1274	{
1275	if (!--remainingMatchCount)
1276	return JSRegExpErrorHitLimit;
1277
1278	if (btrack)
1279	BACKTRACK();
1280
1281	context->matchBegin = input.getPos();
1282	context->term = `0`;
1283
1284	matchAgain:
1285	ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
1286
1287	switch (currentTerm().type) {
1288	case ByteTerm::TypeSubpatternBegin:
1289	MATCH_NEXT();
1290	case ByteTerm::TypeSubpatternEnd:
1291	context->matchEnd = input.getPos();
1292	return JSRegExpMatch;
1293
1294	case ByteTerm::TypeBodyAlternativeBegin:
1295	MATCH_NEXT();
1296	case ByteTerm::TypeBodyAlternativeDisjunction:
1297	case ByteTerm::TypeBodyAlternativeEnd:
1298	context->matchEnd = input.getPos();
1299	return JSRegExpMatch;
1300
1301	case ByteTerm::TypeAlternativeBegin:
1302	MATCH_NEXT();
1303	case ByteTerm::TypeAlternativeDisjunction:
1304	case ByteTerm::TypeAlternativeEnd: {
1305	int offset = currentTerm().alternative.end;
1306	BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
1307	backTrack->offset = offset;
1308	context->term += offset;
1309	MATCH_NEXT();
1310	}
1311
1312	case ByteTerm::TypeAssertionBOL:
1313	if (matchAssertionBOL(currentTerm()))
1314	MATCH_NEXT();
1315	BACKTRACK();
1316	case ByteTerm::TypeAssertionEOL:
1317	if (matchAssertionEOL(currentTerm()))
1318	MATCH_NEXT();
1319	BACKTRACK();
1320	case ByteTerm::TypeAssertionWordBoundary:
1321	if (matchAssertionWordBoundary(currentTerm()))
1322	MATCH_NEXT();
1323	BACKTRACK();
1324
1325	case ByteTerm::TypePatternCharacterOnce:
1326	case ByteTerm::TypePatternCharacterFixed: {
1327	if (unicode) {
1328	if (!U_IS_BMP(currentTerm().atom.patternCharacter)) {
1329	for (unsigned matchAmount = `0`; matchAmount < currentTerm().atom.quantityMaxCount; ++matchAmount) {
1330	if (!checkSurrogatePair(currentTerm().atom.patternCharacter, currentTerm().inputPosition - `2` * matchAmount)) {
1331	BACKTRACK();
1332	}
1333	}
1334	MATCH_NEXT();
1335	}
1336	}
1337	unsigned position = input.getPos(); // May need to back out reading a surrogate pair.
1338
1339	for (unsigned matchAmount = `0`; matchAmount < currentTerm().atom.quantityMaxCount; ++matchAmount) {
1340	if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition - matchAmount)) {
1341	input.setPos(position);
1342	BACKTRACK();
1343	}
1344	}
1345	MATCH_NEXT();
1346	}
1347	case ByteTerm::TypePatternCharacterGreedy: {
1348	BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1349	unsigned matchAmount = `0`;
1350	unsigned position = input.getPos(); // May need to back out reading a surrogate pair.
1351	while ((matchAmount < currentTerm().atom.quantityMaxCount) && input.checkInput(`1`)) {
1352	if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition + `1`)) {
1353	input.setPos(position);
1354	break;
1355	}
1356	++matchAmount;
1357	position = input.getPos();
1358	}
1359	backTrack->matchAmount = matchAmount;
1360
1361	MATCH_NEXT();
1362	}
1363	case ByteTerm::TypePatternCharacterNonGreedy: {
1364	BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1365	backTrack->begin = input.getPos();
1366	backTrack->matchAmount = `0`;
1367	MATCH_NEXT();
1368	}
1369
1370	case ByteTerm::TypePatternCasedCharacterOnce:
1371	case ByteTerm::TypePatternCasedCharacterFixed: {
1372	if (unicode) {
1373	// Case insensitive matching of unicode characters is handled as TypeCharacterClass.
1374	ASSERT(U_IS_BMP(currentTerm().atom.patternCharacter));
1375
1376	unsigned position = input.getPos(); // May need to back out reading a surrogate pair.
1377
1378	for (unsigned matchAmount = `0`; matchAmount < currentTerm().atom.quantityMaxCount; ++matchAmount) {
1379	if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - matchAmount)) {
1380	input.setPos(position);
1381	BACKTRACK();
1382	}
1383	}
1384	MATCH_NEXT();
1385	}
1386
1387	for (unsigned matchAmount = `0`; matchAmount < currentTerm().atom.quantityMaxCount; ++matchAmount) {
1388	if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - matchAmount))
1389	BACKTRACK();
1390	}
1391	MATCH_NEXT();
1392	}
1393	case ByteTerm::TypePatternCasedCharacterGreedy: {
1394	BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1395
1396	// Case insensitive matching of unicode characters is handled as TypeCharacterClass.
1397	ASSERT(!unicode \|\| U_IS_BMP(currentTerm().atom.patternCharacter));
1398
1399	unsigned matchAmount = `0`;
1400	while ((matchAmount < currentTerm().atom.quantityMaxCount) && input.checkInput(`1`)) {
1401	if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition + `1`)) {
1402	input.uncheckInput(`1`);
1403	break;
1404	}
1405	++matchAmount;
1406	}
1407	backTrack->matchAmount = matchAmount;
1408
1409	MATCH_NEXT();
1410	}
1411	case ByteTerm::TypePatternCasedCharacterNonGreedy: {
1412	BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1413
1414	// Case insensitive matching of unicode characters is handled as TypeCharacterClass.
1415	ASSERT(!unicode \|\| U_IS_BMP(currentTerm().atom.patternCharacter));
1416
1417	backTrack->matchAmount = `0`;
1418	MATCH_NEXT();
1419	}
1420
1421	case ByteTerm::TypeCharacterClass:
1422	if (matchCharacterClass(currentTerm(), context))
1423	MATCH_NEXT();
1424	BACKTRACK();
1425	case ByteTerm::TypeBackReference:
1426	if (matchBackReference(currentTerm(), context))
1427	MATCH_NEXT();
1428	BACKTRACK();
1429	case ByteTerm::TypeParenthesesSubpattern: {
1430	JSRegExpResult result = matchParentheses(currentTerm(), context);
1431
1432	if (result == JSRegExpMatch) {
1433	MATCH_NEXT();
1434	} else if (result != JSRegExpNoMatch)
1435	return result;
1436
1437	BACKTRACK();
1438	}
1439	case ByteTerm::TypeParenthesesSubpatternOnceBegin:
1440	if (matchParenthesesOnceBegin(currentTerm(), context))
1441	MATCH_NEXT();
1442	BACKTRACK();
1443	case ByteTerm::TypeParenthesesSubpatternOnceEnd:
1444	if (matchParenthesesOnceEnd(currentTerm(), context))
1445	MATCH_NEXT();
1446	BACKTRACK();
1447	case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
1448	if (matchParenthesesTerminalBegin(currentTerm(), context))
1449	MATCH_NEXT();
1450	BACKTRACK();
1451	case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
1452	if (matchParenthesesTerminalEnd(currentTerm(), context))
1453	MATCH_NEXT();
1454	BACKTRACK();
1455	case ByteTerm::TypeParentheticalAssertionBegin:
1456	if (matchParentheticalAssertionBegin(currentTerm(), context))
1457	MATCH_NEXT();
1458	BACKTRACK();
1459	case ByteTerm::TypeParentheticalAssertionEnd:
1460	if (matchParentheticalAssertionEnd(currentTerm(), context))
1461	MATCH_NEXT();
1462	BACKTRACK();
1463
1464	case ByteTerm::TypeCheckInput:
1465	if (input.checkInput(currentTerm().checkInputCount))
1466	MATCH_NEXT();
1467	BACKTRACK();
1468
1469	case ByteTerm::TypeUncheckInput:
1470	input.uncheckInput(currentTerm().checkInputCount);
1471	MATCH_NEXT();
1472
1473	case ByteTerm::TypeDotStarEnclosure:
1474	if (matchDotStarEnclosure(currentTerm(), context))
1475	return JSRegExpMatch;
1476	BACKTRACK();
1477	}
1478
1479	// We should never fall-through to here.
1480	RELEASE_ASSERT_NOT_REACHED();
1481
1482	backtrack:
1483	ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
1484
1485	switch (currentTerm().type) {
1486	case ByteTerm::TypeSubpatternBegin:
1487	return JSRegExpNoMatch;
1488	case ByteTerm::TypeSubpatternEnd:
1489	RELEASE_ASSERT_NOT_REACHED();
1490
1491	case ByteTerm::TypeBodyAlternativeBegin:
1492	case ByteTerm::TypeBodyAlternativeDisjunction: {
1493	int offset = currentTerm().alternative.next;
1494	context->term += offset;
1495	if (offset > `0`)
1496	MATCH_NEXT();
1497
1498	if (input.atEnd() \|\| pattern->sticky())
1499	return JSRegExpNoMatch;
1500
1501	input.next();
1502
1503	context->matchBegin = input.getPos();
1504
1505	if (currentTerm().alternative.onceThrough)
1506	context->term += currentTerm().alternative.next;
1507
1508	MATCH_NEXT();
1509	}
1510	case ByteTerm::TypeBodyAlternativeEnd:
1511	RELEASE_ASSERT_NOT_REACHED();
1512
1513	case ByteTerm::TypeAlternativeBegin:
1514	case ByteTerm::TypeAlternativeDisjunction: {
1515	int offset = currentTerm().alternative.next;
1516	context->term += offset;
1517	if (offset > `0`)
1518	MATCH_NEXT();
1519	BACKTRACK();
1520	}
1521	case ByteTerm::TypeAlternativeEnd: {
1522	// We should never backtrack back into an alternative of the main body of the regex.
1523	BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
1524	unsigned offset = backTrack->offset;
1525	context->term -= offset;
1526	BACKTRACK();
1527	}
1528
1529	case ByteTerm::TypeAssertionBOL:
1530	case ByteTerm::TypeAssertionEOL:
1531	case ByteTerm::TypeAssertionWordBoundary:
1532	BACKTRACK();
1533
1534	case ByteTerm::TypePatternCharacterOnce:
1535	case ByteTerm::TypePatternCharacterFixed:
1536	case ByteTerm::TypePatternCharacterGreedy:
1537	case ByteTerm::TypePatternCharacterNonGreedy:
1538	if (backtrackPatternCharacter(currentTerm(), context))
1539	MATCH_NEXT();
1540	BACKTRACK();
1541	case ByteTerm::TypePatternCasedCharacterOnce:
1542	case ByteTerm::TypePatternCasedCharacterFixed:
1543	case ByteTerm::TypePatternCasedCharacterGreedy:
1544	case ByteTerm::TypePatternCasedCharacterNonGreedy:
1545	if (backtrackPatternCasedCharacter(currentTerm(), context))
1546	MATCH_NEXT();
1547	BACKTRACK();
1548	case ByteTerm::TypeCharacterClass:
1549	if (backtrackCharacterClass(currentTerm(), context))
1550	MATCH_NEXT();
1551	BACKTRACK();
1552	case ByteTerm::TypeBackReference:
1553	if (backtrackBackReference(currentTerm(), context))
1554	MATCH_NEXT();
1555	BACKTRACK();
1556	case ByteTerm::TypeParenthesesSubpattern: {
1557	JSRegExpResult result = backtrackParentheses(currentTerm(), context);
1558
1559	if (result == JSRegExpMatch) {
1560	MATCH_NEXT();
1561	} else if (result != JSRegExpNoMatch)
1562	return result;
1563
1564	BACKTRACK();
1565	}
1566	case ByteTerm::TypeParenthesesSubpatternOnceBegin:
1567	if (backtrackParenthesesOnceBegin(currentTerm(), context))
1568	MATCH_NEXT();
1569	BACKTRACK();
1570	case ByteTerm::TypeParenthesesSubpatternOnceEnd:
1571	if (backtrackParenthesesOnceEnd(currentTerm(), context))
1572	MATCH_NEXT();
1573	BACKTRACK();
1574	case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
1575	if (backtrackParenthesesTerminalBegin(currentTerm(), context))
1576	MATCH_NEXT();
1577	BACKTRACK();
1578	case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
1579	if (backtrackParenthesesTerminalEnd(currentTerm(), context))
1580	MATCH_NEXT();
1581	BACKTRACK();
1582	case ByteTerm::TypeParentheticalAssertionBegin:
1583	if (backtrackParentheticalAssertionBegin(currentTerm(), context))
1584	MATCH_NEXT();
1585	BACKTRACK();
1586	case ByteTerm::TypeParentheticalAssertionEnd:
1587	if (backtrackParentheticalAssertionEnd(currentTerm(), context))
1588	MATCH_NEXT();
1589	BACKTRACK();
1590
1591	case ByteTerm::TypeCheckInput:
1592	input.uncheckInput(currentTerm().checkInputCount);
1593	BACKTRACK();
1594
1595	case ByteTerm::TypeUncheckInput:
1596	input.checkInput(currentTerm().checkInputCount);
1597	BACKTRACK();
1598
1599	case ByteTerm::TypeDotStarEnclosure:
1600	RELEASE_ASSERT_NOT_REACHED();
1601	}
1602
1603	RELEASE_ASSERT_NOT_REACHED();
1604	return JSRegExpErrorNoMatch;
1605	}
1606
1607	JSRegExpResult matchNonZeroDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
1608	{
1609	JSRegExpResult result = matchDisjunction(disjunction, context, btrack);
1610
1611	if (result == JSRegExpMatch) {
1612	while (context->matchBegin == context->matchEnd) {
1613	result = matchDisjunction(disjunction, context, true);
1614	if (result != JSRegExpMatch)
1615	return result;
1616	}
1617	return JSRegExpMatch;
1618	}
1619
1620	return result;
1621	}
1622
1623	unsigned interpret()
1624	{
1625	// FIXME: https://bugs.webkit.org/show_bug.cgi?id=195970
1626	// [Yarr Interpreter] The interpreter doesn't have checks for stack overflow due to deep recursion
1627	if (!input.isAvailableInput(`0`))
1628	return offsetNoMatch;
1629
1630	if (pattern->m_lock)
1631	pattern->m_lock->lock();
1632
1633	for (unsigned i = `0`; i < pattern->m_body ->m_numSubpatterns + `1`; ++i)
1634	output[i << `1`] = offsetNoMatch;
1635
1636	allocatorPool = pattern->m_allocator->startAllocator();
1637	RELEASE_ASSERT(allocatorPool);
1638
1639	DisjunctionContext* context = allocDisjunctionContext(pattern->m_body.get());
1640
1641	JSRegExpResult result = matchDisjunction(pattern->m_body.get(), context, false);
1642	if (result == JSRegExpMatch) {
1643	output[`0`] = context->matchBegin;
1644	output[`1`] = context->matchEnd;
1645	}
1646
1647	freeDisjunctionContext(context);
1648
1649	pattern->m_allocator->stopAllocator();
1650
1651	ASSERT((result == JSRegExpMatch) == (output[`0`] != offsetNoMatch));
1652
1653	if (pattern->m_lock)
1654	pattern->m_lock->unlock();
1655
1656	return output[`0`];
1657	}
1658
1659	Interpreter(BytecodePattern* pattern, unsigned* output, const CharType* input, unsigned length, unsigned start)
1660	: pattern(pattern)
1661	, unicode(pattern->unicode())
1662	, output(output)
1663	, input(input, start, length, pattern->unicode())
1664	, startOffset(start)
1665	, remainingMatchCount(matchLimit)
1666	{
1667	}
1668
1669	private:
1670	BytecodePattern* pattern;
1671	bool unicode;
1672	unsigned* output;
1673	InputStream input;
1674	WTF::BumpPointerPool* allocatorPool { nullptr };
1675	unsigned startOffset;
1676	unsigned remainingMatchCount;
1677	};
1678
1679	class ByteCompiler {
1680	struct ParenthesesStackEntry {
1681	unsigned beginTerm;
1682	unsigned savedAlternativeIndex;
1683	ParenthesesStackEntry(unsigned beginTerm, unsigned savedAlternativeIndex/, unsigned subpatternId, bool capture = false/)
1684	: beginTerm(beginTerm)
1685	, savedAlternativeIndex(savedAlternativeIndex)
1686	{
1687	}
1688	};
1689
1690	public:
1691	ByteCompiler(YarrPattern& pattern)
1692	: m_pattern(pattern)
1693	{
1694	}
1695
1696	std::unique_ptr<BytecodePattern> compile(BumpPointerAllocator* allocator, ConcurrentJSLock* lock, ErrorCode& errorCode)
1697	{
1698	if (UNLIKELY(!isSafeToRecurse())) {
1699	errorCode = ErrorCode::TooManyDisjunctions;
1700	return nullptr;
1701	}
1702
1703	regexBegin(m_pattern.m_numSubpatterns, m_pattern.m_body->m_callFrameSize, m_pattern.m_body->m_alternatives [`0`]->onceThrough());
1704	if (auto error = emitDisjunction(m_pattern.m_body, `0`, `0`)) {
1705	errorCode = error.value();
1706	return nullptr;
1707	}
1708	regexEnd();
1709
1710	#ifndef NDEBUG
1711	if (Options::dumpCompiledRegExpPatterns())
1712	dumpDisjunction(m_bodyDisjunction.get());
1713	#endif
1714
1715	return makeUnique<BytecodePattern>(WTFMove(m_bodyDisjunction), m_allParenthesesInfo, m_pattern, allocator, lock);
1716	}
1717
1718	void checkInput(unsigned count)
1719	{
1720	m_bodyDisjunction ->terms.append(ByteTerm::CheckInput(count));
1721	}
1722
1723	void uncheckInput(unsigned count)
1724	{
1725	m_bodyDisjunction ->terms.append(ByteTerm::UncheckInput(count));
1726	}
1727
1728	void assertionBOL(unsigned inputPosition)
1729	{
1730	m_bodyDisjunction ->terms.append(ByteTerm::BOL(inputPosition));
1731	}
1732
1733	void assertionEOL(unsigned inputPosition)
1734	{
1735	m_bodyDisjunction ->terms.append(ByteTerm::EOL(inputPosition));
1736	}
1737
1738	void assertionWordBoundary(bool invert, unsigned inputPosition)
1739	{
1740	m_bodyDisjunction ->terms.append(ByteTerm::WordBoundary(invert, inputPosition));
1741	}
1742
1743	void atomPatternCharacter(UChar32 ch, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMaxCount, QuantifierType quantityType)
1744	{
1745	if (m_pattern.ignoreCase()) {
1746	UChar32 lo = u_tolower(ch);
1747	UChar32 hi = u_toupper(ch);
1748
1749	if (lo != hi) {
1750	m_bodyDisjunction ->terms.append(ByteTerm (lo, hi, inputPosition, frameLocation, quantityMaxCount, quantityType));
1751	return;
1752	}
1753	}
1754
1755	m_bodyDisjunction ->terms.append(ByteTerm (ch, inputPosition, frameLocation, quantityMaxCount, quantityType));
1756	}
1757
1758	void atomCharacterClass(CharacterClass* characterClass, bool invert, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMaxCount, QuantifierType quantityType)
1759	{
1760	m_bodyDisjunction ->terms.append(ByteTerm (characterClass, invert, inputPosition));
1761
1762	m_bodyDisjunction ->terms.last().atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1763	m_bodyDisjunction ->terms.last().atom.quantityType = quantityType;
1764	m_bodyDisjunction ->terms.last().frameLocation = frameLocation;
1765	}
1766
1767	void atomBackReference(unsigned subpatternId, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMaxCount, QuantifierType quantityType)
1768	{
1769	ASSERT(subpatternId);
1770
1771	m_bodyDisjunction ->terms.append(ByteTerm::BackReference(subpatternId, inputPosition));
1772
1773	m_bodyDisjunction ->terms.last().atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1774	m_bodyDisjunction ->terms.last().atom.quantityType = quantityType;
1775	m_bodyDisjunction ->terms.last().frameLocation = frameLocation;
1776	}
1777
1778	void atomParenthesesOnceBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1779	{
1780	unsigned beginTerm = m_bodyDisjunction ->terms.size();
1781
1782	m_bodyDisjunction ->terms.append(ByteTerm (ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
1783	m_bodyDisjunction ->terms.last().frameLocation = frameLocation;
1784	m_bodyDisjunction ->terms.append(ByteTerm::AlternativeBegin());
1785	m_bodyDisjunction ->terms.last().frameLocation = alternativeFrameLocation;
1786
1787	m_parenthesesStack.append(ParenthesesStackEntry (beginTerm, m_currentAlternativeIndex));
1788	m_currentAlternativeIndex = beginTerm + `1`;
1789	}
1790
1791	void atomParenthesesTerminalBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1792	{
1793	unsigned beginTerm = m_bodyDisjunction ->terms.size();
1794
1795	m_bodyDisjunction ->terms.append(ByteTerm (ByteTerm::TypeParenthesesSubpatternTerminalBegin, subpatternId, capture, false, inputPosition));
1796	m_bodyDisjunction ->terms.last().frameLocation = frameLocation;
1797	m_bodyDisjunction ->terms.append(ByteTerm::AlternativeBegin());
1798	m_bodyDisjunction ->terms.last().frameLocation = alternativeFrameLocation;
1799
1800	m_parenthesesStack.append(ParenthesesStackEntry (beginTerm, m_currentAlternativeIndex));
1801	m_currentAlternativeIndex = beginTerm + `1`;
1802	}
1803
1804	void atomParenthesesSubpatternBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1805	{
1806	// Errrk! - this is a little crazy, we initially generate as a TypeParenthesesSubpatternOnceBegin,
1807	// then fix this up at the end! - simplifying this should make it much clearer.
1808	// https://bugs.webkit.org/show_bug.cgi?id=50136
1809
1810	unsigned beginTerm = m_bodyDisjunction ->terms.size();
1811
1812	m_bodyDisjunction ->terms.append(ByteTerm (ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
1813	m_bodyDisjunction ->terms.last().frameLocation = frameLocation;
1814	m_bodyDisjunction ->terms.append(ByteTerm::AlternativeBegin());
1815	m_bodyDisjunction ->terms.last().frameLocation = alternativeFrameLocation;
1816
1817	m_parenthesesStack.append(ParenthesesStackEntry (beginTerm, m_currentAlternativeIndex));
1818	m_currentAlternativeIndex = beginTerm + `1`;
1819	}
1820
1821	void atomParentheticalAssertionBegin(unsigned subpatternId, bool invert, unsigned frameLocation, unsigned alternativeFrameLocation)
1822	{
1823	unsigned beginTerm = m_bodyDisjunction ->terms.size();
1824
1825	m_bodyDisjunction ->terms.append(ByteTerm (ByteTerm::TypeParentheticalAssertionBegin, subpatternId, false, invert, `0`));
1826	m_bodyDisjunction ->terms.last().frameLocation = frameLocation;
1827	m_bodyDisjunction ->terms.append(ByteTerm::AlternativeBegin());
1828	m_bodyDisjunction ->terms.last().frameLocation = alternativeFrameLocation;
1829
1830	m_parenthesesStack.append(ParenthesesStackEntry (beginTerm, m_currentAlternativeIndex));
1831	m_currentAlternativeIndex = beginTerm + `1`;
1832	}
1833
1834	void atomParentheticalAssertionEnd(unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMaxCount, QuantifierType quantityType)
1835	{
1836	unsigned beginTerm = popParenthesesStack();
1837	closeAlternative(beginTerm + `1`);
1838	unsigned endTerm = m_bodyDisjunction ->terms.size();
1839
1840	ASSERT(m_bodyDisjunction ->terms[beginTerm].type == ByteTerm::TypeParentheticalAssertionBegin);
1841
1842	bool invert = m_bodyDisjunction ->terms [beginTerm].invert();
1843	unsigned subpatternId = m_bodyDisjunction ->terms [beginTerm].atom.subpatternId;
1844
1845	m_bodyDisjunction ->terms.append(ByteTerm (ByteTerm::TypeParentheticalAssertionEnd, subpatternId, false, invert, inputPosition));
1846	m_bodyDisjunction ->terms [beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1847	m_bodyDisjunction ->terms [endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1848	m_bodyDisjunction ->terms [endTerm].frameLocation = frameLocation;
1849
1850	m_bodyDisjunction ->terms [beginTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1851	m_bodyDisjunction ->terms [beginTerm].atom.quantityType = quantityType;
1852	m_bodyDisjunction ->terms [endTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1853	m_bodyDisjunction ->terms [endTerm].atom.quantityType = quantityType;
1854	}
1855
1856	void assertionDotStarEnclosure(bool bolAnchored, bool eolAnchored)
1857	{
1858	m_bodyDisjunction ->terms.append(ByteTerm::DotStarEnclosure(bolAnchored, eolAnchored));
1859	}
1860
1861	unsigned popParenthesesStack()
1862	{
1863	ASSERT(m_parenthesesStack.size());
1864	unsigned beginTerm = m_parenthesesStack.last().beginTerm;
1865	m_currentAlternativeIndex = m_parenthesesStack.last().savedAlternativeIndex;
1866	m_parenthesesStack.removeLast();
1867
1868	ASSERT(beginTerm < m_bodyDisjunction ->terms.size());
1869	ASSERT(m_currentAlternativeIndex < m_bodyDisjunction ->terms.size());
1870
1871	return beginTerm;
1872	}
1873
1874	void closeAlternative(unsigned beginTerm)
1875	{
1876	unsigned origBeginTerm = beginTerm;
1877	ASSERT(m_bodyDisjunction ->terms[beginTerm].type == ByteTerm::TypeAlternativeBegin);
1878	unsigned endIndex = m_bodyDisjunction ->terms.size();
1879
1880	unsigned frameLocation = m_bodyDisjunction ->terms [beginTerm].frameLocation;
1881
1882	if (!m_bodyDisjunction ->terms [beginTerm].alternative.next)
1883	m_bodyDisjunction ->terms.remove(beginTerm);
1884	else {
1885	while (m_bodyDisjunction ->terms [beginTerm].alternative.next) {
1886	beginTerm += m_bodyDisjunction ->terms [beginTerm].alternative.next;
1887	ASSERT(m_bodyDisjunction ->terms[beginTerm].type == ByteTerm::TypeAlternativeDisjunction);
1888	m_bodyDisjunction ->terms [beginTerm].alternative.end = endIndex - beginTerm;
1889	m_bodyDisjunction ->terms [beginTerm].frameLocation = frameLocation;
1890	}
1891
1892	m_bodyDisjunction ->terms [beginTerm].alternative.next = origBeginTerm - beginTerm;
1893
1894	m_bodyDisjunction ->terms.append(ByteTerm::AlternativeEnd());
1895	m_bodyDisjunction ->terms [endIndex].frameLocation = frameLocation;
1896	}
1897	}
1898
1899	void closeBodyAlternative()
1900	{
1901	unsigned beginTerm = `0`;
1902	unsigned origBeginTerm = `0`;
1903	ASSERT(m_bodyDisjunction ->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeBegin);
1904	unsigned endIndex = m_bodyDisjunction ->terms.size();
1905
1906	unsigned frameLocation = m_bodyDisjunction ->terms [beginTerm].frameLocation;
1907
1908	while (m_bodyDisjunction ->terms [beginTerm].alternative.next) {
1909	beginTerm += m_bodyDisjunction ->terms [beginTerm].alternative.next;
1910	ASSERT(m_bodyDisjunction ->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeDisjunction);
1911	m_bodyDisjunction ->terms [beginTerm].alternative.end = endIndex - beginTerm;
1912	m_bodyDisjunction ->terms [beginTerm].frameLocation = frameLocation;
1913	}
1914
1915	m_bodyDisjunction ->terms [beginTerm].alternative.next = origBeginTerm - beginTerm;
1916
1917	m_bodyDisjunction ->terms.append(ByteTerm::BodyAlternativeEnd());
1918	m_bodyDisjunction ->terms [endIndex].frameLocation = frameLocation;
1919	}
1920
1921	void atomParenthesesSubpatternEnd(unsigned lastSubpatternId, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMinCount, Checked<unsigned> quantityMaxCount, QuantifierType quantityType, unsigned callFrameSize = `0`)
1922	{
1923	unsigned beginTerm = popParenthesesStack();
1924	closeAlternative(beginTerm + `1`);
1925	unsigned endTerm = m_bodyDisjunction ->terms.size();
1926
1927	ASSERT(m_bodyDisjunction ->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
1928
1929	ByteTerm& parenthesesBegin = m_bodyDisjunction ->terms [beginTerm];
1930
1931	bool capture = parenthesesBegin.capture();
1932	unsigned subpatternId = parenthesesBegin.atom.subpatternId;
1933
1934	unsigned numSubpatterns = lastSubpatternId - subpatternId + `1`;
1935	auto parenthesesDisjunction = makeUnique<ByteDisjunction>(numSubpatterns, callFrameSize);
1936
1937	unsigned firstTermInParentheses = beginTerm + `1`;
1938	parenthesesDisjunction ->terms.reserveInitialCapacity(endTerm - firstTermInParentheses + `2`);
1939
1940	parenthesesDisjunction ->terms.append(ByteTerm::SubpatternBegin());
1941	for (unsigned termInParentheses = firstTermInParentheses; termInParentheses < endTerm; ++termInParentheses)
1942	parenthesesDisjunction ->terms.append(m_bodyDisjunction ->terms [termInParentheses]);
1943	parenthesesDisjunction ->terms.append(ByteTerm::SubpatternEnd());
1944
1945	m_bodyDisjunction ->terms.shrink(beginTerm);
1946
1947	m_bodyDisjunction ->terms.append(ByteTerm (ByteTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction.get(), capture, inputPosition));
1948	m_allParenthesesInfo.append(WTFMove(parenthesesDisjunction));
1949
1950	m_bodyDisjunction ->terms [beginTerm].atom.quantityMinCount = quantityMinCount.unsafeGet();
1951	m_bodyDisjunction ->terms [beginTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1952	m_bodyDisjunction ->terms [beginTerm].atom.quantityType = quantityType;
1953	m_bodyDisjunction ->terms [beginTerm].frameLocation = frameLocation;
1954	}
1955
1956	void atomParenthesesOnceEnd(unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMinCount, Checked<unsigned> quantityMaxCount, QuantifierType quantityType)
1957	{
1958	unsigned beginTerm = popParenthesesStack();
1959	closeAlternative(beginTerm + `1`);
1960	unsigned endTerm = m_bodyDisjunction ->terms.size();
1961
1962	ASSERT(m_bodyDisjunction ->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
1963
1964	bool capture = m_bodyDisjunction ->terms [beginTerm].capture();
1965	unsigned subpatternId = m_bodyDisjunction ->terms [beginTerm].atom.subpatternId;
1966
1967	m_bodyDisjunction ->terms.append(ByteTerm (ByteTerm::TypeParenthesesSubpatternOnceEnd, subpatternId, capture, false, inputPosition));
1968	m_bodyDisjunction ->terms [beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1969	m_bodyDisjunction ->terms [endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1970	m_bodyDisjunction ->terms [endTerm].frameLocation = frameLocation;
1971
1972	m_bodyDisjunction ->terms [beginTerm].atom.quantityMinCount = quantityMinCount.unsafeGet();
1973	m_bodyDisjunction ->terms [beginTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1974	m_bodyDisjunction ->terms [beginTerm].atom.quantityType = quantityType;
1975	m_bodyDisjunction ->terms [endTerm].atom.quantityMinCount = quantityMinCount.unsafeGet();
1976	m_bodyDisjunction ->terms [endTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1977	m_bodyDisjunction ->terms [endTerm].atom.quantityType = quantityType;
1978	}
1979
1980	void atomParenthesesTerminalEnd(unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMinCount, Checked<unsigned> quantityMaxCount, QuantifierType quantityType)
1981	{
1982	unsigned beginTerm = popParenthesesStack();
1983	closeAlternative(beginTerm + `1`);
1984	unsigned endTerm = m_bodyDisjunction ->terms.size();
1985
1986	ASSERT(m_bodyDisjunction ->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
1987
1988	bool capture = m_bodyDisjunction ->terms [beginTerm].capture();
1989	unsigned subpatternId = m_bodyDisjunction ->terms [beginTerm].atom.subpatternId;
1990
1991	m_bodyDisjunction ->terms.append(ByteTerm (ByteTerm::TypeParenthesesSubpatternTerminalEnd, subpatternId, capture, false, inputPosition));
1992	m_bodyDisjunction ->terms [beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1993	m_bodyDisjunction ->terms [endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1994	m_bodyDisjunction ->terms [endTerm].frameLocation = frameLocation;
1995
1996	m_bodyDisjunction ->terms [beginTerm].atom.quantityMinCount = quantityMinCount.unsafeGet();
1997	m_bodyDisjunction ->terms [beginTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1998	m_bodyDisjunction ->terms [beginTerm].atom.quantityType = quantityType;
1999	m_bodyDisjunction ->terms [endTerm].atom.quantityMinCount = quantityMinCount.unsafeGet();
2000	m_bodyDisjunction ->terms [endTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
2001	m_bodyDisjunction ->terms [endTerm].atom.quantityType = quantityType;
2002	}
2003
2004	void regexBegin(unsigned numSubpatterns, unsigned callFrameSize, bool onceThrough)
2005	{
2006	m_bodyDisjunction = makeUnique<ByteDisjunction>(numSubpatterns, callFrameSize);
2007	m_bodyDisjunction ->terms.append(ByteTerm::BodyAlternativeBegin(onceThrough));
2008	m_bodyDisjunction ->terms [`0`].frameLocation = `0`;
2009	m_currentAlternativeIndex = `0`;
2010	}
2011
2012	void regexEnd()
2013	{
2014	closeBodyAlternative();
2015	}
2016
2017	void alternativeBodyDisjunction(bool onceThrough)
2018	{
2019	unsigned newAlternativeIndex = m_bodyDisjunction ->terms.size();
2020	m_bodyDisjunction ->terms [m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
2021	m_bodyDisjunction ->terms.append(ByteTerm::BodyAlternativeDisjunction(onceThrough));
2022
2023	m_currentAlternativeIndex = newAlternativeIndex;
2024	}
2025
2026	void alternativeDisjunction()
2027	{
2028	unsigned newAlternativeIndex = m_bodyDisjunction ->terms.size();
2029	m_bodyDisjunction ->terms [m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
2030	m_bodyDisjunction ->terms.append(ByteTerm::AlternativeDisjunction());
2031
2032	m_currentAlternativeIndex = newAlternativeIndex;
2033	}
2034
2035	Optional<ErrorCode> WARN_UNUSED_RETURN emitDisjunction(PatternDisjunction* disjunction, Checked<unsigned, RecordOverflow> inputCountAlreadyChecked, unsigned parenthesesInputCountAlreadyChecked)
2036	{
2037	if (UNLIKELY(!isSafeToRecurse()))
2038	return ErrorCode::TooManyDisjunctions;
2039
2040	for (unsigned alt = `0`; alt < disjunction->m_alternatives.size(); ++alt) {
2041	auto currentCountAlreadyChecked = inputCountAlreadyChecked;
2042
2043	PatternAlternative* alternative = disjunction->m_alternatives [alt].get();
2044
2045	if (alt) {
2046	if (disjunction == m_pattern.m_body)
2047	alternativeBodyDisjunction(alternative->onceThrough());
2048	else
2049	alternativeDisjunction();
2050	}
2051
2052	unsigned minimumSize = alternative->m_minimumSize;
2053	ASSERT(minimumSize >= parenthesesInputCountAlreadyChecked);
2054	unsigned countToCheck = minimumSize - parenthesesInputCountAlreadyChecked;
2055
2056	if (countToCheck) {
2057	checkInput(countToCheck);
2058	currentCountAlreadyChecked += countToCheck;
2059	if (currentCountAlreadyChecked.hasOverflowed())
2060	return ErrorCode::OffsetTooLarge;
2061	}
2062
2063	for (auto& term : alternative->m_terms) {
2064	switch (term.type) {
2065	case PatternTerm::TypeAssertionBOL:
2066	assertionBOL((currentCountAlreadyChecked - term.inputPosition).unsafeGet());
2067	break;
2068
2069	case PatternTerm::TypeAssertionEOL:
2070	assertionEOL((currentCountAlreadyChecked - term.inputPosition).unsafeGet());
2071	break;
2072
2073	case PatternTerm::TypeAssertionWordBoundary:
2074	assertionWordBoundary(term.invert(), (currentCountAlreadyChecked - term.inputPosition).unsafeGet());
2075	break;
2076
2077	case PatternTerm::TypePatternCharacter:
2078	atomPatternCharacter(term.patternCharacter, (currentCountAlreadyChecked - term.inputPosition).unsafeGet(), term.frameLocation, term.quantityMaxCount, term.quantityType);
2079	break;
2080
2081	case PatternTerm::TypeCharacterClass:
2082	atomCharacterClass(term.characterClass, term.invert(), (currentCountAlreadyChecked - term.inputPosition).unsafeGet(), term.frameLocation, term.quantityMaxCount, term.quantityType);
2083	break;
2084
2085	case PatternTerm::TypeBackReference:
2086	atomBackReference(term.backReferenceSubpatternId, (currentCountAlreadyChecked - term.inputPosition).unsafeGet(), term.frameLocation, term.quantityMaxCount, term.quantityType);
2087	break;
2088
2089	case PatternTerm::TypeForwardReference:
2090	break;
2091
2092	case PatternTerm::TypeParenthesesSubpattern: {
2093	unsigned disjunctionAlreadyCheckedCount = `0`;
2094	if (term.quantityMaxCount == `1` && !term.parentheses.isCopy) {
2095	unsigned alternativeFrameLocation = term.frameLocation;
2096	// For QuantifierFixedCount we pre-check the minimum size; for greedy/non-greedy we reserve a slot in the frame.
2097	if (term.quantityType == QuantifierFixedCount)
2098	disjunctionAlreadyCheckedCount = term.parentheses.disjunction->m_minimumSize;
2099	else
2100	alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
2101	unsigned delegateEndInputOffset = (currentCountAlreadyChecked - term.inputPosition).unsafeGet();
2102	atomParenthesesOnceBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount + delegateEndInputOffset, term.frameLocation, alternativeFrameLocation);
2103	if (auto error = emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount))
2104	return error;
2105	atomParenthesesOnceEnd(delegateEndInputOffset, term.frameLocation, term.quantityMinCount, term.quantityMaxCount, term.quantityType);
2106	} else if (term.parentheses.isTerminal) {
2107	unsigned delegateEndInputOffset = (currentCountAlreadyChecked - term.inputPosition).unsafeGet();
2108	atomParenthesesTerminalBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount + delegateEndInputOffset, term.frameLocation, term.frameLocation + YarrStackSpaceForBackTrackInfoParenthesesTerminal);
2109	if (auto error = emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount))
2110	return error;
2111	atomParenthesesTerminalEnd(delegateEndInputOffset, term.frameLocation, term.quantityMinCount, term.quantityMaxCount, term.quantityType);
2112	} else {
2113	unsigned delegateEndInputOffset = (currentCountAlreadyChecked - term.inputPosition).unsafeGet();
2114	atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount + delegateEndInputOffset, term.frameLocation, `0`);
2115	if (auto error = emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, `0`))
2116	return error;
2117	atomParenthesesSubpatternEnd(term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityMinCount, term.quantityMaxCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
2118	}
2119	break;
2120	}
2121
2122	case PatternTerm::TypeParentheticalAssertion: {
2123	unsigned alternativeFrameLocation = term.frameLocation + YarrStackSpaceForBackTrackInfoParentheticalAssertion;
2124	unsigned positiveInputOffset = (currentCountAlreadyChecked - term.inputPosition).unsafeGet();
2125	unsigned uncheckAmount = `0`;
2126	if (positiveInputOffset > term.parentheses.disjunction->m_minimumSize) {
2127	uncheckAmount = positiveInputOffset - term.parentheses.disjunction->m_minimumSize;
2128	uncheckInput(uncheckAmount);
2129	currentCountAlreadyChecked -= uncheckAmount;
2130	if (currentCountAlreadyChecked.hasOverflowed())
2131	return ErrorCode::OffsetTooLarge;
2132	}
2133
2134	atomParentheticalAssertionBegin(term.parentheses.subpatternId, term.invert(), term.frameLocation, alternativeFrameLocation);
2135	if (auto error = emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, positiveInputOffset - uncheckAmount))
2136	return error;
2137	atomParentheticalAssertionEnd(`0`, term.frameLocation, term.quantityMaxCount, term.quantityType);
2138	if (uncheckAmount) {
2139	checkInput(uncheckAmount);
2140	currentCountAlreadyChecked += uncheckAmount;
2141	if (currentCountAlreadyChecked.hasOverflowed())
2142	return ErrorCode::OffsetTooLarge;
2143	}
2144	break;
2145	}
2146
2147	case PatternTerm::TypeDotStarEnclosure:
2148	assertionDotStarEnclosure(term.anchors.bolAnchor, term.anchors.eolAnchor);
2149	break;
2150	}
2151	}
2152	}
2153	return WTF::nullopt;
2154	}
2155	#ifndef NDEBUG
2156	void dumpDisjunction(ByteDisjunction* disjunction, unsigned nesting = `0`)
2157	{
2158	PrintStream& out = WTF::dataFile();
2159
2160	unsigned termIndexNest = `0`;
2161
2162	if (!nesting) {
2163	out.printf("ByteDisjunction(%p):\n", disjunction);
2164	nesting = `1`;
2165	} else {
2166	termIndexNest = nesting - `1`;
2167	nesting = `2`;
2168	}
2169
2170	auto outputTermIndexAndNest = [&](size_t index, unsigned termNesting) {
2171	for (unsigned nestingDepth = `0`; nestingDepth < termIndexNest; nestingDepth++)
2172	out.print(" ");
2173	out.printf("%4zu", index);
2174	for (unsigned nestingDepth = `0`; nestingDepth < termNesting; nestingDepth++)
2175	out.print(" ");
2176	};
2177
2178	auto dumpQuantity = [&](ByteTerm& term) {
2179	if (term.atom.quantityType == QuantifierFixedCount && term.atom.quantityMinCount == `1` && term.atom.quantityMaxCount == `1`)
2180	return;
2181
2182	out.print(" {", term.atom.quantityMinCount);
2183	if (term.atom.quantityMinCount != term.atom.quantityMaxCount) {
2184	if (term.atom.quantityMaxCount == UINT_MAX)
2185	out.print(",inf");
2186	else
2187	out.print(",", term.atom.quantityMaxCount);
2188	}
2189	out.print("}");
2190	if (term.atom.quantityType == QuantifierGreedy)
2191	out.print(" greedy");
2192	else if (term.atom.quantityType == QuantifierNonGreedy)
2193	out.print(" non-greedy");
2194	};
2195
2196	auto dumpCaptured = [&](ByteTerm& term) {
2197	if (term.capture())
2198	out.print(" captured (#", term.atom.subpatternId, ")");
2199	};
2200
2201	auto dumpInverted = [&](ByteTerm& term) {
2202	if (term.invert())
2203	out.print(" inverted");
2204	};
2205
2206	auto dumpInputPosition = [&](ByteTerm& term) {
2207	out.printf(" inputPosition %u", term.inputPosition);
2208	};
2209
2210	auto dumpFrameLocation = [&](ByteTerm& term) {
2211	out.printf(" frameLocation %u", term.frameLocation);
2212	};
2213
2214	auto dumpCharacter = [&](ByteTerm& term) {
2215	out.print(" ");
2216	dumpUChar32(out, term.atom.patternCharacter);
2217	};
2218
2219	auto dumpCharClass = [&](ByteTerm& term) {
2220	out.print(" ");
2221	dumpCharacterClass(out, &m_pattern, term.atom.characterClass);
2222	};
2223
2224	for (size_t idx = `0`; idx < disjunction->terms.size(); ++idx) {
2225	ByteTerm term = disjunction->terms [idx];
2226
2227	bool outputNewline = true;
2228
2229	switch (term.type) {
2230	case ByteTerm::TypeBodyAlternativeBegin:
2231	outputTermIndexAndNest(idx, nesting++);
2232	out.print("BodyAlternativeBegin");
2233	if (term.alternative.onceThrough)
2234	out.print(" onceThrough");
2235	dumpFrameLocation(term);
2236	break;
2237	case ByteTerm::TypeBodyAlternativeDisjunction:
2238	outputTermIndexAndNest(idx, nesting - `1`);
2239	out.print("BodyAlternativeDisjunction");
2240	dumpFrameLocation(term);
2241	break;
2242	case ByteTerm::TypeBodyAlternativeEnd:
2243	outputTermIndexAndNest(idx, --nesting);
2244	out.print("BodyAlternativeEnd");
2245	dumpFrameLocation(term);
2246	break;
2247	case ByteTerm::TypeAlternativeBegin:
2248	outputTermIndexAndNest(idx, nesting++);
2249	out.print("AlternativeBegin");
2250	dumpFrameLocation(term);
2251	break;
2252	case ByteTerm::TypeAlternativeDisjunction:
2253	outputTermIndexAndNest(idx, nesting - `1`);
2254	out.print("AlternativeDisjunction");
2255	dumpFrameLocation(term);
2256	break;
2257	case ByteTerm::TypeAlternativeEnd:
2258	outputTermIndexAndNest(idx, --nesting);
2259	out.print("AlternativeEnd");
2260	dumpFrameLocation(term);
2261	break;
2262	case ByteTerm::TypeSubpatternBegin:
2263	outputTermIndexAndNest(idx, nesting++);
2264	out.print("SubpatternBegin");
2265	break;
2266	case ByteTerm::TypeSubpatternEnd:
2267	outputTermIndexAndNest(idx, --nesting);
2268	out.print("SubpatternEnd");
2269	break;
2270	case ByteTerm::TypeAssertionBOL:
2271	outputTermIndexAndNest(idx, nesting);
2272	out.print("AssertionBOL");
2273	break;
2274	case ByteTerm::TypeAssertionEOL:
2275	outputTermIndexAndNest(idx, nesting);
2276	out.print("AssertionEOL");
2277	break;
2278	case ByteTerm::TypeAssertionWordBoundary:
2279	outputTermIndexAndNest(idx, nesting);
2280	out.print("AssertionWordBoundary");
2281	break;
2282	case ByteTerm::TypePatternCharacterOnce:
2283	outputTermIndexAndNest(idx, nesting);
2284	out.print("PatternCharacterOnce");
2285	dumpInverted(term);
2286	dumpInputPosition(term);
2287	dumpFrameLocation(term);
2288	dumpCharacter(term);
2289	dumpQuantity(term);
2290	break;
2291	case ByteTerm::TypePatternCharacterFixed:
2292	outputTermIndexAndNest(idx, nesting);
2293	out.print("PatternCharacterFixed");
2294	dumpInverted(term);
2295	dumpInputPosition(term);
2296	dumpFrameLocation(term);
2297	dumpCharacter(term);
2298	out.print(" {", term.atom.quantityMinCount, "}");
2299	break;
2300	case ByteTerm::TypePatternCharacterGreedy:
2301	outputTermIndexAndNest(idx, nesting);
2302	out.print("PatternCharacterGreedy");
2303	dumpInverted(term);
2304	dumpInputPosition(term);
2305	dumpFrameLocation(term);
2306	dumpCharacter(term);
2307	dumpQuantity(term);
2308	break;
2309	case ByteTerm::TypePatternCharacterNonGreedy:
2310	outputTermIndexAndNest(idx, nesting);
2311	out.print("PatternCharacterNonGreedy");
2312	dumpInverted(term);
2313	dumpInputPosition(term);
2314	dumpFrameLocation(term);
2315	dumpCharacter(term);
2316	dumpQuantity(term);
2317	break;
2318	case ByteTerm::TypePatternCasedCharacterOnce:
2319	outputTermIndexAndNest(idx, nesting);
2320	out.print("PatternCasedCharacterOnce");
2321	break;
2322	case ByteTerm::TypePatternCasedCharacterFixed:
2323	outputTermIndexAndNest(idx, nesting);
2324	out.print("PatternCasedCharacterFixed");
2325	break;
2326	case ByteTerm::TypePatternCasedCharacterGreedy:
2327	outputTermIndexAndNest(idx, nesting);
2328	out.print("PatternCasedCharacterGreedy");
2329	break;
2330	case ByteTerm::TypePatternCasedCharacterNonGreedy:
2331	outputTermIndexAndNest(idx, nesting);
2332	out.print("PatternCasedCharacterNonGreedy");
2333	break;
2334	case ByteTerm::TypeCharacterClass:
2335	outputTermIndexAndNest(idx, nesting);
2336	out.print("CharacterClass");
2337	dumpInverted(term);
2338	dumpInputPosition(term);
2339	dumpFrameLocation(term);
2340	dumpCharClass(term);
2341	dumpQuantity(term);
2342	break;
2343	case ByteTerm::TypeBackReference:
2344	outputTermIndexAndNest(idx, nesting);
2345	out.print("BackReference #", term.atom.subpatternId);
2346	dumpQuantity(term);
2347	break;
2348	case ByteTerm::TypeParenthesesSubpattern:
2349	outputTermIndexAndNest(idx, nesting);
2350	out.print("ParenthesesSubpattern");
2351	dumpCaptured(term);
2352	dumpInverted(term);
2353	dumpInputPosition(term);
2354	dumpFrameLocation(term);
2355	dumpQuantity(term);
2356	out.print("\n");
2357	outputNewline = false;
2358	dumpDisjunction(term.atom.parenthesesDisjunction, nesting);
2359	break;
2360	case ByteTerm::TypeParenthesesSubpatternOnceBegin:
2361	outputTermIndexAndNest(idx, nesting++);
2362	out.print("ParenthesesSubpatternOnceBegin");
2363	dumpCaptured(term);
2364	dumpInverted(term);
2365	dumpInputPosition(term);
2366	dumpFrameLocation(term);
2367	break;
2368	case ByteTerm::TypeParenthesesSubpatternOnceEnd:
2369	outputTermIndexAndNest(idx, --nesting);
2370	out.print("ParenthesesSubpatternOnceEnd");
2371	dumpFrameLocation(term);
2372	break;
2373	case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
2374	outputTermIndexAndNest(idx, nesting++);
2375	out.print("ParenthesesSubpatternTerminalBegin");
2376	dumpInverted(term);
2377	dumpInputPosition(term);
2378	dumpFrameLocation(term);
2379	break;
2380	case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
2381	outputTermIndexAndNest(idx, --nesting);
2382	out.print("ParenthesesSubpatternTerminalEnd");
2383	dumpFrameLocation(term);
2384	break;
2385	case ByteTerm::TypeParentheticalAssertionBegin:
2386	outputTermIndexAndNest(idx, nesting++);
2387	out.print("ParentheticalAssertionBegin");
2388	dumpInverted(term);
2389	dumpInputPosition(term);
2390	dumpFrameLocation(term);
2391	break;
2392	case ByteTerm::TypeParentheticalAssertionEnd:
2393	outputTermIndexAndNest(idx, --nesting);
2394	out.print("ParentheticalAssertionEnd");
2395	dumpFrameLocation(term);
2396	break;
2397	case ByteTerm::TypeCheckInput:
2398	outputTermIndexAndNest(idx, nesting);
2399	out.print("CheckInput ", term.checkInputCount);
2400	break;
2401	case ByteTerm::TypeUncheckInput:
2402	outputTermIndexAndNest(idx, nesting);
2403	out.print("UncheckInput ", term.checkInputCount);
2404	break;
2405	case ByteTerm::TypeDotStarEnclosure:
2406	outputTermIndexAndNest(idx, nesting);
2407	out.print("DotStarEnclosure");
2408	break;
2409	}
2410	if (outputNewline)
2411	out.print("\n");
2412	}
2413	}
2414	#endif
2415
2416	private:
2417	inline bool isSafeToRecurse() { return m_stackCheck.isSafeToRecurse(); }
2418
2419	YarrPattern& m_pattern;
2420	std::unique_ptr<ByteDisjunction> m_bodyDisjunction;
2421	StackCheck m_stackCheck;
2422	unsigned m_currentAlternativeIndex { `0` };
2423	Vector<ParenthesesStackEntry> m_parenthesesStack;
2424	Vector<std::unique_ptr<ByteDisjunction>> m_allParenthesesInfo;
2425	};
2426
2427	std::unique_ptr<BytecodePattern> byteCompile(YarrPattern& pattern, BumpPointerAllocator* allocator, ErrorCode& errorCode, ConcurrentJSLock* lock)
2428	{
2429	return ByteCompiler (pattern).compile(allocator, lock, errorCode);
2430	}
2431
2432	unsigned interpret(BytecodePattern* bytecode, const String& input, unsigned start, unsigned* output)
2433	{
2434	SuperSamplerScope superSamplerScope(false);
2435	if (input.is8Bit())
2436	return Interpreter<LChar>(bytecode, output, input.characters8(), input.length(), start).interpret();
2437	return Interpreter<UChar>(bytecode, output, input.characters16(), input.length(), start).interpret();
2438	}
2439
2440	unsigned interpret(BytecodePattern* bytecode, const LChar* input, unsigned length, unsigned start, unsigned* output)
2441	{
2442	SuperSamplerScope superSamplerScope(false);
2443	return Interpreter<LChar>(bytecode, output, input, length, start).interpret();
2444	}
2445
2446	unsigned interpret(BytecodePattern* bytecode, const UChar* input, unsigned length, unsigned start, unsigned* output)
2447	{
2448	SuperSamplerScope superSamplerScope(false);
2449	return Interpreter<UChar>(bytecode, output, input, length, start).interpret();
2450	}
2451
2452	// These should be the same for both UChar & LChar.
2453	COMPILE_ASSERT(sizeof(BackTrackInfoPatternCharacter) == (YarrStackSpaceForBackTrackInfoPatternCharacter * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoPatternCharacter);
2454	COMPILE_ASSERT(sizeof(BackTrackInfoCharacterClass) == (YarrStackSpaceForBackTrackInfoCharacterClass * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoCharacterClass);
2455	COMPILE_ASSERT(sizeof(BackTrackInfoBackReference) == (YarrStackSpaceForBackTrackInfoBackReference * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoBackReference);
2456	COMPILE_ASSERT(sizeof(BackTrackInfoAlternative) == (YarrStackSpaceForBackTrackInfoAlternative * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoAlternative);
2457	COMPILE_ASSERT(sizeof(BackTrackInfoParentheticalAssertion) == (YarrStackSpaceForBackTrackInfoParentheticalAssertion * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheticalAssertion);
2458	COMPILE_ASSERT(sizeof(BackTrackInfoParenthesesOnce) == (YarrStackSpaceForBackTrackInfoParenthesesOnce * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParenthesesOnce);
2459	COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParentheses) <= (YarrStackSpaceForBackTrackInfoParentheses * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheses);
2460
2461
2462	} }
2463

Browse the source code of jsc/Source/JavaScriptCore/yarr/YarrInterpreter.cpp