Lexer.cpp source code [webcore/Source/JavaScriptCore/parser/Lexer.cpp]

1	/*
2	* Copyright (C) 1999-2000 Harri Porten ([email protected])
3	* Copyright (C) 2006-2017 Apple Inc. All Rights Reserved.
4	* Copyright (C) 2007 Cameron Zwarich ([email protected])
5	* Copyright (C) 2010 Zoltan Herczeg ([email protected])
6	* Copyright (C) 2012 Mathias Bynens ([email protected])
7	*
8	* This library is free software; you can redistribute it and/or
9	* modify it under the terms of the GNU Library General Public
10	* License as published by the Free Software Foundation; either
11	* version 2 of the License, or (at your option) any later version.
12	*
13	* This library is distributed in the hope that it will be useful,
14	* but WITHOUT ANY WARRANTY; without even the implied warranty of
15	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16	* Library General Public License for more details.
17	*
18	* You should have received a copy of the GNU Library General Public License
19	* along with this library; see the file COPYING.LIB. If not, write to
20	* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21	* Boston, MA 02110-1301, USA.
22	*
23	*/
24
25	#include "config.h"
26	#include "Lexer.h"
27
28	#include "BuiltinNames.h"
29	#include "Identifier.h"
30	#include "JSCInlines.h"
31	#include "JSFunctionInlines.h"
32	#include "KeywordLookup.h"
33	#include "Lexer.lut.h"
34	#include "Nodes.h"
35	#include "ParseInt.h"
36	#include "Parser.h"
37	#include <ctype.h>
38	#include <limits.h>
39	#include <string.h>
40	#include <wtf/Assertions.h>
41	#include <wtf/HexNumber.h>
42	#include <wtf/Variant.h>
43	#include <wtf/dtoa.h>
44
45	namespace JSC {
46
47	bool isLexerKeyword(const Identifier& identifier)
48	{
49	return JSC::mainTable.entry(identifier);
50	}
51
52	enum CharacterType {
53	// Types for the main switch
54
55	// The first three types are fixed, and also used for identifying
56	// ASCII alpha and alphanumeric characters (see isIdentStart and isIdentPart).
57	CharacterIdentifierStart,
58	CharacterZero,
59	CharacterNumber,
60
61	// For single-byte characters grandfathered into Other_ID_Continue -- namely just U+00B7 MIDDLE DOT.
62	// (http://unicode.org/reports/tr31/#Backward_Compatibility)
63	CharacterOtherIdentifierPart,
64
65	CharacterInvalid,
66	CharacterLineTerminator,
67	CharacterExclamationMark,
68	CharacterOpenParen,
69	CharacterCloseParen,
70	CharacterOpenBracket,
71	CharacterCloseBracket,
72	CharacterComma,
73	CharacterColon,
74	CharacterQuestion,
75	CharacterTilde,
76	CharacterQuote,
77	CharacterBackQuote,
78	CharacterDot,
79	CharacterSlash,
80	CharacterBackSlash,
81	CharacterSemicolon,
82	CharacterOpenBrace,
83	CharacterCloseBrace,
84
85	CharacterAdd,
86	CharacterSub,
87	CharacterMultiply,
88	CharacterModulo,
89	CharacterAnd,
90	CharacterXor,
91	CharacterOr,
92	CharacterLess,
93	CharacterGreater,
94	CharacterEqual,
95
96	// Other types (only one so far)
97	CharacterWhiteSpace,
98	CharacterPrivateIdentifierStart
99	};
100
101	// 256 Latin-1 codes
102	static constexpr const unsigned short typesOfLatin1Characters[`256`] = {
103	/ 0 - Null / CharacterInvalid,
104	/ 1 - Start of Heading / CharacterInvalid,
105	/ 2 - Start of Text / CharacterInvalid,
106	/ 3 - End of Text / CharacterInvalid,
107	/ 4 - End of Transm. / CharacterInvalid,
108	/ 5 - Enquiry / CharacterInvalid,
109	/ 6 - Acknowledgment / CharacterInvalid,
110	/ 7 - Bell / CharacterInvalid,
111	/ 8 - Back Space / CharacterInvalid,
112	/ 9 - Horizontal Tab / CharacterWhiteSpace,
113	/ 10 - Line Feed / CharacterLineTerminator,
114	/ 11 - Vertical Tab / CharacterWhiteSpace,
115	/ 12 - Form Feed / CharacterWhiteSpace,
116	/ 13 - Carriage Return / CharacterLineTerminator,
117	/ 14 - Shift Out / CharacterInvalid,
118	/ 15 - Shift In / CharacterInvalid,
119	/ 16 - Data Line Escape / CharacterInvalid,
120	/ 17 - Device Control 1 / CharacterInvalid,
121	/ 18 - Device Control 2 / CharacterInvalid,
122	/ 19 - Device Control 3 / CharacterInvalid,
123	/ 20 - Device Control 4 / CharacterInvalid,
124	/ 21 - Negative Ack. / CharacterInvalid,
125	/ 22 - Synchronous Idle / CharacterInvalid,
126	/ 23 - End of Transmit / CharacterInvalid,
127	/ 24 - Cancel / CharacterInvalid,
128	/ 25 - End of Medium / CharacterInvalid,
129	/ 26 - Substitute / CharacterInvalid,
130	/ 27 - Escape / CharacterInvalid,
131	/ 28 - File Separator / CharacterInvalid,
132	/ 29 - Group Separator / CharacterInvalid,
133	/ 30 - Record Separator / CharacterInvalid,
134	/ 31 - Unit Separator / CharacterInvalid,
135	/ 32 - Space / CharacterWhiteSpace,
136	/ 33 - ! / CharacterExclamationMark,
137	/ 34 - " / CharacterQuote,
138	/ 35 - # / CharacterInvalid,
139	/ 36 - $ / CharacterIdentifierStart,
140	/ 37 - % / CharacterModulo,
141	/ 38 - & / CharacterAnd,
142	/ 39 - ' / CharacterQuote,
143	/ 40 - ( / CharacterOpenParen,
144	/ 41 - ) / CharacterCloseParen,
145	/ 42 - * / CharacterMultiply,
146	/ 43 - + / CharacterAdd,
147	/ 44 - , / CharacterComma,
148	/ 45 - - / CharacterSub,
149	/ 46 - . / CharacterDot,
150	/ 47 - / / CharacterSlash,
151	/ 48 - 0 / CharacterZero,
152	/ 49 - 1 / CharacterNumber,
153	/ 50 - 2 / CharacterNumber,
154	/ 51 - 3 / CharacterNumber,
155	/ 52 - 4 / CharacterNumber,
156	/ 53 - 5 / CharacterNumber,
157	/ 54 - 6 / CharacterNumber,
158	/ 55 - 7 / CharacterNumber,
159	/ 56 - 8 / CharacterNumber,
160	/ 57 - 9 / CharacterNumber,
161	/ 58 - : / CharacterColon,
162	/ 59 - ; / CharacterSemicolon,
163	/ 60 - < / CharacterLess,
164	/ 61 - = / CharacterEqual,
165	/ 62 - > / CharacterGreater,
166	/ 63 - ? / CharacterQuestion,
167	/ 64 - @ / CharacterPrivateIdentifierStart,
168	/ 65 - A / CharacterIdentifierStart,
169	/ 66 - B / CharacterIdentifierStart,
170	/ 67 - C / CharacterIdentifierStart,
171	/ 68 - D / CharacterIdentifierStart,
172	/ 69 - E / CharacterIdentifierStart,
173	/ 70 - F / CharacterIdentifierStart,
174	/ 71 - G / CharacterIdentifierStart,
175	/ 72 - H / CharacterIdentifierStart,
176	/ 73 - I / CharacterIdentifierStart,
177	/ 74 - J / CharacterIdentifierStart,
178	/ 75 - K / CharacterIdentifierStart,
179	/ 76 - L / CharacterIdentifierStart,
180	/ 77 - M / CharacterIdentifierStart,
181	/ 78 - N / CharacterIdentifierStart,
182	/ 79 - O / CharacterIdentifierStart,
183	/ 80 - P / CharacterIdentifierStart,
184	/ 81 - Q / CharacterIdentifierStart,
185	/ 82 - R / CharacterIdentifierStart,
186	/ 83 - S / CharacterIdentifierStart,
187	/ 84 - T / CharacterIdentifierStart,
188	/ 85 - U / CharacterIdentifierStart,
189	/ 86 - V / CharacterIdentifierStart,
190	/ 87 - W / CharacterIdentifierStart,
191	/ 88 - X / CharacterIdentifierStart,
192	/ 89 - Y / CharacterIdentifierStart,
193	/ 90 - Z / CharacterIdentifierStart,
194	/ 91 - [ / CharacterOpenBracket,
195	/ 92 - \ / CharacterBackSlash,
196	/ 93 - ] / CharacterCloseBracket,
197	/ 94 - ^ / CharacterXor,
198	/ 95 - _ / CharacterIdentifierStart,
199	/ 96 - ` / CharacterBackQuote,
200	/ 97 - a / CharacterIdentifierStart,
201	/ 98 - b / CharacterIdentifierStart,
202	/ 99 - c / CharacterIdentifierStart,
203	/ 100 - d / CharacterIdentifierStart,
204	/ 101 - e / CharacterIdentifierStart,
205	/ 102 - f / CharacterIdentifierStart,
206	/ 103 - g / CharacterIdentifierStart,
207	/ 104 - h / CharacterIdentifierStart,
208	/ 105 - i / CharacterIdentifierStart,
209	/ 106 - j / CharacterIdentifierStart,
210	/ 107 - k / CharacterIdentifierStart,
211	/ 108 - l / CharacterIdentifierStart,
212	/ 109 - m / CharacterIdentifierStart,
213	/ 110 - n / CharacterIdentifierStart,
214	/ 111 - o / CharacterIdentifierStart,
215	/ 112 - p / CharacterIdentifierStart,
216	/ 113 - q / CharacterIdentifierStart,
217	/ 114 - r / CharacterIdentifierStart,
218	/ 115 - s / CharacterIdentifierStart,
219	/ 116 - t / CharacterIdentifierStart,
220	/ 117 - u / CharacterIdentifierStart,
221	/ 118 - v / CharacterIdentifierStart,
222	/ 119 - w / CharacterIdentifierStart,
223	/ 120 - x / CharacterIdentifierStart,
224	/ 121 - y / CharacterIdentifierStart,
225	/ 122 - z / CharacterIdentifierStart,
226	/ 123 - { / CharacterOpenBrace,
227	/ 124 - \| / CharacterOr,
228	/ 125 - } / CharacterCloseBrace,
229	/ 126 - ~ / CharacterTilde,
230	/ 127 - Delete / CharacterInvalid,
231	/ 128 - Cc category / CharacterInvalid,
232	/ 129 - Cc category / CharacterInvalid,
233	/ 130 - Cc category / CharacterInvalid,
234	/ 131 - Cc category / CharacterInvalid,
235	/ 132 - Cc category / CharacterInvalid,
236	/ 133 - Cc category / CharacterInvalid,
237	/ 134 - Cc category / CharacterInvalid,
238	/ 135 - Cc category / CharacterInvalid,
239	/ 136 - Cc category / CharacterInvalid,
240	/ 137 - Cc category / CharacterInvalid,
241	/ 138 - Cc category / CharacterInvalid,
242	/ 139 - Cc category / CharacterInvalid,
243	/ 140 - Cc category / CharacterInvalid,
244	/ 141 - Cc category / CharacterInvalid,
245	/ 142 - Cc category / CharacterInvalid,
246	/ 143 - Cc category / CharacterInvalid,
247	/ 144 - Cc category / CharacterInvalid,
248	/ 145 - Cc category / CharacterInvalid,
249	/ 146 - Cc category / CharacterInvalid,
250	/ 147 - Cc category / CharacterInvalid,
251	/ 148 - Cc category / CharacterInvalid,
252	/ 149 - Cc category / CharacterInvalid,
253	/ 150 - Cc category / CharacterInvalid,
254	/ 151 - Cc category / CharacterInvalid,
255	/ 152 - Cc category / CharacterInvalid,
256	/ 153 - Cc category / CharacterInvalid,
257	/ 154 - Cc category / CharacterInvalid,
258	/ 155 - Cc category / CharacterInvalid,
259	/ 156 - Cc category / CharacterInvalid,
260	/ 157 - Cc category / CharacterInvalid,
261	/ 158 - Cc category / CharacterInvalid,
262	/ 159 - Cc category / CharacterInvalid,
263	/ 160 - Zs category (nbsp) / CharacterWhiteSpace,
264	/ 161 - Po category / CharacterInvalid,
265	/ 162 - Sc category / CharacterInvalid,
266	/ 163 - Sc category / CharacterInvalid,
267	/ 164 - Sc category / CharacterInvalid,
268	/ 165 - Sc category / CharacterInvalid,
269	/ 166 - So category / CharacterInvalid,
270	/ 167 - So category / CharacterInvalid,
271	/ 168 - Sk category / CharacterInvalid,
272	/ 169 - So category / CharacterInvalid,
273	/ 170 - Ll category / CharacterIdentifierStart,
274	/ 171 - Pi category / CharacterInvalid,
275	/ 172 - Sm category / CharacterInvalid,
276	/ 173 - Cf category / CharacterInvalid,
277	/ 174 - So category / CharacterInvalid,
278	/ 175 - Sk category / CharacterInvalid,
279	/ 176 - So category / CharacterInvalid,
280	/ 177 - Sm category / CharacterInvalid,
281	/ 178 - No category / CharacterInvalid,
282	/ 179 - No category / CharacterInvalid,
283	/ 180 - Sk category / CharacterInvalid,
284	/ 181 - Ll category / CharacterIdentifierStart,
285	/ 182 - So category / CharacterInvalid,
286	/ 183 - Po category / CharacterOtherIdentifierPart,
287	/ 184 - Sk category / CharacterInvalid,
288	/ 185 - No category / CharacterInvalid,
289	/ 186 - Ll category / CharacterIdentifierStart,
290	/ 187 - Pf category / CharacterInvalid,
291	/ 188 - No category / CharacterInvalid,
292	/ 189 - No category / CharacterInvalid,
293	/ 190 - No category / CharacterInvalid,
294	/ 191 - Po category / CharacterInvalid,
295	/ 192 - Lu category / CharacterIdentifierStart,
296	/ 193 - Lu category / CharacterIdentifierStart,
297	/ 194 - Lu category / CharacterIdentifierStart,
298	/ 195 - Lu category / CharacterIdentifierStart,
299	/ 196 - Lu category / CharacterIdentifierStart,
300	/ 197 - Lu category / CharacterIdentifierStart,
301	/ 198 - Lu category / CharacterIdentifierStart,
302	/ 199 - Lu category / CharacterIdentifierStart,
303	/ 200 - Lu category / CharacterIdentifierStart,
304	/ 201 - Lu category / CharacterIdentifierStart,
305	/ 202 - Lu category / CharacterIdentifierStart,
306	/ 203 - Lu category / CharacterIdentifierStart,
307	/ 204 - Lu category / CharacterIdentifierStart,
308	/ 205 - Lu category / CharacterIdentifierStart,
309	/ 206 - Lu category / CharacterIdentifierStart,
310	/ 207 - Lu category / CharacterIdentifierStart,
311	/ 208 - Lu category / CharacterIdentifierStart,
312	/ 209 - Lu category / CharacterIdentifierStart,
313	/ 210 - Lu category / CharacterIdentifierStart,
314	/ 211 - Lu category / CharacterIdentifierStart,
315	/ 212 - Lu category / CharacterIdentifierStart,
316	/ 213 - Lu category / CharacterIdentifierStart,
317	/ 214 - Lu category / CharacterIdentifierStart,
318	/ 215 - Sm category / CharacterInvalid,
319	/ 216 - Lu category / CharacterIdentifierStart,
320	/ 217 - Lu category / CharacterIdentifierStart,
321	/ 218 - Lu category / CharacterIdentifierStart,
322	/ 219 - Lu category / CharacterIdentifierStart,
323	/ 220 - Lu category / CharacterIdentifierStart,
324	/ 221 - Lu category / CharacterIdentifierStart,
325	/ 222 - Lu category / CharacterIdentifierStart,
326	/ 223 - Ll category / CharacterIdentifierStart,
327	/ 224 - Ll category / CharacterIdentifierStart,
328	/ 225 - Ll category / CharacterIdentifierStart,
329	/ 226 - Ll category / CharacterIdentifierStart,
330	/ 227 - Ll category / CharacterIdentifierStart,
331	/ 228 - Ll category / CharacterIdentifierStart,
332	/ 229 - Ll category / CharacterIdentifierStart,
333	/ 230 - Ll category / CharacterIdentifierStart,
334	/ 231 - Ll category / CharacterIdentifierStart,
335	/ 232 - Ll category / CharacterIdentifierStart,
336	/ 233 - Ll category / CharacterIdentifierStart,
337	/ 234 - Ll category / CharacterIdentifierStart,
338	/ 235 - Ll category / CharacterIdentifierStart,
339	/ 236 - Ll category / CharacterIdentifierStart,
340	/ 237 - Ll category / CharacterIdentifierStart,
341	/ 238 - Ll category / CharacterIdentifierStart,
342	/ 239 - Ll category / CharacterIdentifierStart,
343	/ 240 - Ll category / CharacterIdentifierStart,
344	/ 241 - Ll category / CharacterIdentifierStart,
345	/ 242 - Ll category / CharacterIdentifierStart,
346	/ 243 - Ll category / CharacterIdentifierStart,
347	/ 244 - Ll category / CharacterIdentifierStart,
348	/ 245 - Ll category / CharacterIdentifierStart,
349	/ 246 - Ll category / CharacterIdentifierStart,
350	/ 247 - Sm category / CharacterInvalid,
351	/ 248 - Ll category / CharacterIdentifierStart,
352	/ 249 - Ll category / CharacterIdentifierStart,
353	/ 250 - Ll category / CharacterIdentifierStart,
354	/ 251 - Ll category / CharacterIdentifierStart,
355	/ 252 - Ll category / CharacterIdentifierStart,
356	/ 253 - Ll category / CharacterIdentifierStart,
357	/ 254 - Ll category / CharacterIdentifierStart,
358	/ 255 - Ll category / CharacterIdentifierStart
359	};
360
361	// This table provides the character that results from \X where X is the index in the table beginning
362	// with SPACE. A table value of 0 means that more processing needs to be done.
363	static constexpr const LChar singleCharacterEscapeValuesForASCII[`128`] = {
364	/ 0 - Null / `0`,
365	/ 1 - Start of Heading / `0`,
366	/ 2 - Start of Text / `0`,
367	/ 3 - End of Text / `0`,
368	/ 4 - End of Transm. / `0`,
369	/ 5 - Enquiry / `0`,
370	/ 6 - Acknowledgment / `0`,
371	/ 7 - Bell / `0`,
372	/ 8 - Back Space / `0`,
373	/ 9 - Horizontal Tab / `0`,
374	/ 10 - Line Feed / `0`,
375	/ 11 - Vertical Tab / `0`,
376	/ 12 - Form Feed / `0`,
377	/ 13 - Carriage Return / `0`,
378	/ 14 - Shift Out / `0`,
379	/ 15 - Shift In / `0`,
380	/ 16 - Data Line Escape / `0`,
381	/ 17 - Device Control 1 / `0`,
382	/ 18 - Device Control 2 / `0`,
383	/ 19 - Device Control 3 / `0`,
384	/ 20 - Device Control 4 / `0`,
385	/ 21 - Negative Ack. / `0`,
386	/ 22 - Synchronous Idle / `0`,
387	/ 23 - End of Transmit / `0`,
388	/ 24 - Cancel / `0`,
389	/ 25 - End of Medium / `0`,
390	/ 26 - Substitute / `0`,
391	/ 27 - Escape / `0`,
392	/ 28 - File Separator / `0`,
393	/ 29 - Group Separator / `0`,
394	/ 30 - Record Separator / `0`,
395	/ 31 - Unit Separator / `0`,
396	/ 32 - Space / `' '`,
397	/ 33 - ! / `'!'`,
398	/ 34 - " / `'"'`,
399	/ 35 - # / `'#'`,
400	/ 36 - $ / `'$'`,
401	/ 37 - % / `'%'`,
402	/ 38 - & / `'&'`,
403	/ 39 - ' / `'\''`,
404	/ 40 - ( / `'('`,
405	/ 41 - ) / `')'`,
406	/ 42 - * / `'*'`,
407	/ 43 - + / `'+'`,
408	/ 44 - , / `','`,
409	/ 45 - - / `'-'`,
410	/ 46 - . / `'.'`,
411	/ 47 - / / `'/'`,
412	/ 48 - 0 / `0`,
413	/ 49 - 1 / `0`,
414	/ 50 - 2 / `0`,
415	/ 51 - 3 / `0`,
416	/ 52 - 4 / `0`,
417	/ 53 - 5 / `0`,
418	/ 54 - 6 / `0`,
419	/ 55 - 7 / `0`,
420	/ 56 - 8 / `0`,
421	/ 57 - 9 / `0`,
422	/ 58 - : / `':'`,
423	/ 59 - ; / `';'`,
424	/ 60 - < / `'<'`,
425	/ 61 - = / `'='`,
426	/ 62 - > / `'>'`,
427	/ 63 - ? / `'?'`,
428	/ 64 - @ / `'@'`,
429	/ 65 - A / `'A'`,
430	/ 66 - B / `'B'`,
431	/ 67 - C / `'C'`,
432	/ 68 - D / `'D'`,
433	/ 69 - E / `'E'`,
434	/ 70 - F / `'F'`,
435	/ 71 - G / `'G'`,
436	/ 72 - H / `'H'`,
437	/ 73 - I / `'I'`,
438	/ 74 - J / `'J'`,
439	/ 75 - K / `'K'`,
440	/ 76 - L / `'L'`,
441	/ 77 - M / `'M'`,
442	/ 78 - N / `'N'`,
443	/ 79 - O / `'O'`,
444	/ 80 - P / `'P'`,
445	/ 81 - Q / `'Q'`,
446	/ 82 - R / `'R'`,
447	/ 83 - S / `'S'`,
448	/ 84 - T / `'T'`,
449	/ 85 - U / `'U'`,
450	/ 86 - V / `'V'`,
451	/ 87 - W / `'W'`,
452	/ 88 - X / `'X'`,
453	/ 89 - Y / `'Y'`,
454	/ 90 - Z / `'Z'`,
455	/ 91 - [ / `'['`,
456	/ 92 - \ / `'\\'`,
457	/ 93 - ] / `']'`,
458	/ 94 - ^ / `'^'`,
459	/ 95 - _ / `'_'`,
460	/ 96 - ` / '`',
461	/ 97 - a / `'a'`,
462	/ 98 - b / `0x08`,
463	/ 99 - c / `'c'`,
464	/ 100 - d / `'d'`,
465	/ 101 - e / `'e'`,
466	/ 102 - f / `0x0C`,
467	/ 103 - g / `'g'`,
468	/ 104 - h / `'h'`,
469	/ 105 - i / `'i'`,
470	/ 106 - j / `'j'`,
471	/ 107 - k / `'k'`,
472	/ 108 - l / `'l'`,
473	/ 109 - m / `'m'`,
474	/ 110 - n / `0x0A`,
475	/ 111 - o / `'o'`,
476	/ 112 - p / `'p'`,
477	/ 113 - q / `'q'`,
478	/ 114 - r / `0x0D`,
479	/ 115 - s / `'s'`,
480	/ 116 - t / `0x09`,
481	/ 117 - u / `0`,
482	/ 118 - v / `0x0B`,
483	/ 119 - w / `'w'`,
484	/ 120 - x / `0`,
485	/ 121 - y / `'y'`,
486	/ 122 - z / `'z'`,
487	/ 123 - { / `'{'`,
488	/ 124 - \| / `'\|'`,
489	/ 125 - } / `'}'`,
490	/ 126 - ~ / `'~'`,
491	/ 127 - Delete / `0`
492	};
493
494	template <typename T>
495	Lexer<T>::Lexer(VM* vm, JSParserBuiltinMode builtinMode, JSParserScriptMode scriptMode)
496	: m_isReparsingFunction(false)
497	, m_vm(vm)
498	, m_parsingBuiltinFunction(builtinMode == JSParserBuiltinMode::Builtin)
499	, m_scriptMode(scriptMode)
500	{
501	}
502
503	static inline JSTokenType tokenTypeForIntegerLikeToken(double doubleValue)
504	{
505	if ((doubleValue \|\| !std::signbit(doubleValue)) && static_cast<int64_t>(doubleValue) == doubleValue)
506	return INTEGER;
507	return DOUBLE;
508	}
509
510	template <typename T>
511	Lexer<T>::~Lexer()
512	{
513	}
514
515	template <typename T>
516	String Lexer<T>::invalidCharacterMessage() const
517	{
518	switch (m_current) {
519	case `0`:
520	return "Invalid character: '\\0'"_s;
521	case `10`:
522	return "Invalid character: '\\n'"_s;
523	case `11`:
524	return "Invalid character: '\\v'"_s;
525	case `13`:
526	return "Invalid character: '\\r'"_s;
527	case `35`:
528	return "Invalid character: '#'"_s;
529	case `64`:
530	return "Invalid character: '@'"_s;
531	case `96`:
532	return "Invalid character: '`'"_s;
533	default:
534	return makeString("Invalid character '\\u", hex(m_current, `4`, Lowercase), `'\''`);
535	}
536	}
537
538	template <typename T>
539	ALWAYS_INLINE const T* Lexer<T>::currentSourcePtr() const
540	{
541	ASSERT(m_code <= m_codeEnd);
542	return m_code;
543	}
544
545	template <typename T>
546	void Lexer<T>::setCode(const SourceCode& source, ParserArena* arena)
547	{
548	m_arena = &arena->identifierArena();
549
550	m_lineNumber = source.firstLine().oneBasedInt();
551	m_lastToken = -`1`;
552
553	StringView sourceString = source.provider()->source();
554
555	if (!sourceString.isNull())
556	setCodeStart(sourceString);
557	else
558	m_codeStart = `0`;
559
560	m_source = &source;
561	m_sourceOffset = source.startOffset();
562	m_codeStartPlusOffset = m_codeStart + source.startOffset();
563	m_code = m_codeStartPlusOffset;
564	m_codeEnd = m_codeStart + source.endOffset();
565	m_error = false;
566	m_atLineStart = true;
567	m_lineStart = m_code;
568	m_lexErrorMessage = String ();
569	m_sourceURLDirective = String ();
570	m_sourceMappingURLDirective = String ();
571
572	m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
573	m_buffer16.reserveInitialCapacity(initialReadBufferCapacity);
574	m_bufferForRawTemplateString16.reserveInitialCapacity(initialReadBufferCapacity);
575
576	if (LIKELY(m_code < m_codeEnd))
577	m_current = *m_code;
578	else
579	m_current = `0`;
580	ASSERT(currentOffset() == source.startOffset());
581	}
582
583	template <typename T>
584	template <int shiftAmount> ALWAYS_INLINE void Lexer<T>::internalShift()
585	{
586	m_code += shiftAmount;
587	ASSERT(currentOffset() >= currentLineStartOffset());
588	m_current = *m_code;
589	}
590
591	template <typename T>
592	ALWAYS_INLINE void Lexer<T>::shift()
593	{
594	// At one point timing showed that setting m_current to 0 unconditionally was faster than an if-else sequence.
595	m_current = `0`;
596	++m_code;
597	if (LIKELY(m_code < m_codeEnd))
598	m_current = *m_code;
599	}
600
601	template <typename T>
602	ALWAYS_INLINE bool Lexer<T>::atEnd() const
603	{
604	ASSERT(!m_current \|\| m_code < m_codeEnd);
605	return UNLIKELY(UNLIKELY(!m_current) && m_code == m_codeEnd);
606	}
607
608	template <typename T>
609	ALWAYS_INLINE T Lexer<T>::peek(int offset) const
610	{
611	ASSERT(offset > `0` && offset < `5`);
612	const T* code = m_code + offset;
613	return (code < m_codeEnd) ? *code : `0`;
614	}
615
616	struct ParsedUnicodeEscapeValue {
617	ParsedUnicodeEscapeValue(UChar32 value)
618	: m_value(value)
619	{
620	ASSERT(isValid());
621	}
622
623	enum SpecialValueType { Incomplete = -`2`, Invalid = -`1` };
624	ParsedUnicodeEscapeValue(SpecialValueType type)
625	: m_value(type)
626	{
627	}
628
629	bool isValid() const { return m_value >= `0`; }
630	bool isIncomplete() const { return m_value == Incomplete; }
631
632	UChar32 value() const
633	{
634	ASSERT(isValid());
635	return m_value;
636	}
637
638	private:
639	UChar32 m_value;
640	};
641
642	template<typename CharacterType>
643	ParsedUnicodeEscapeValue Lexer<CharacterType>::parseUnicodeEscape()
644	{
645	if (m_current == `'{'`) {
646	shift();
647	UChar32 codePoint = `0`;
648	do {
649	if (!isASCIIHexDigit(m_current))
650	return m_current ? ParsedUnicodeEscapeValue::Invalid : ParsedUnicodeEscapeValue::Incomplete;
651	codePoint = (codePoint << `4`) \| toASCIIHexValue(m_current);
652	if (codePoint > UCHAR_MAX_VALUE) {
653	// For raw template literal syntax, we consume `NotEscapeSequence`.
654	// Here, we consume NotCodePoint's HexDigits.
655	//
656	// NotEscapeSequence ::
657	// u { [lookahread not one of HexDigit]
658	// u { NotCodePoint
659	// u { CodePoint [lookahead != }]
660	//
661	// NotCodePoint ::
662	// HexDigits but not if MV of HexDigits <= 0x10FFFF
663	//
664	// CodePoint ::
665	// HexDigits but not if MV of HexDigits > 0x10FFFF
666	shift();
667	while (isASCIIHexDigit(m_current))
668	shift();
669
670	return atEnd() ? ParsedUnicodeEscapeValue::Incomplete : ParsedUnicodeEscapeValue::Invalid;
671	}
672	shift();
673	} while (m_current != `'}'`);
674	shift();
675	return codePoint;
676	}
677
678	auto character2 = peek(`1`);
679	auto character3 = peek(`2`);
680	auto character4 = peek(`3`);
681	if (UNLIKELY(!isASCIIHexDigit(m_current) \|\| !isASCIIHexDigit(character2) \|\| !isASCIIHexDigit(character3) \|\| !isASCIIHexDigit(character4))) {
682	auto result = (m_code + `4`) >= m_codeEnd ? ParsedUnicodeEscapeValue::Incomplete : ParsedUnicodeEscapeValue::Invalid;
683
684	// For raw template literal syntax, we consume `NotEscapeSequence`.
685	//
686	// NotEscapeSequence ::
687	// u [lookahead not one of HexDigit][lookahead != {]
688	// u HexDigit [lookahead not one of HexDigit]
689	// u HexDigit HexDigit [lookahead not one of HexDigit]
690	// u HexDigit HexDigit HexDigit [lookahead not one of HexDigit]
691	while (isASCIIHexDigit(m_current))
692	shift();
693
694	return result;
695	}
696
697	auto result = convertUnicode(m_current, character2, character3, character4);
698	shift();
699	shift();
700	shift();
701	shift();
702	return result;
703	}
704
705	template <typename T>
706	void Lexer<T>::shiftLineTerminator()
707	{
708	ASSERT(isLineTerminator(m_current));
709
710	m_positionBeforeLastNewline = currentPosition();
711	T prev = m_current;
712	shift();
713
714	if (prev == `'\r'` && m_current == `'\n'`)
715	shift();
716
717	++m_lineNumber;
718	}
719
720	template <typename T>
721	ALWAYS_INLINE bool Lexer<T>::lastTokenWasRestrKeyword() const
722	{
723	return m_lastToken == CONTINUE \|\| m_lastToken == BREAK \|\| m_lastToken == RETURN \|\| m_lastToken == THROW;
724	}
725
726	template <typename T>
727	ALWAYS_INLINE void Lexer<T>::skipWhitespace()
728	{
729	while (isWhiteSpace(m_current))
730	shift();
731	}
732
733	static NEVER_INLINE bool isNonLatin1IdentStart(UChar c)
734	{
735	return u_hasBinaryProperty(c, UCHAR_ID_START);
736	}
737
738	static inline bool isIdentStart(LChar c)
739	{
740	return typesOfLatin1Characters[c] == CharacterIdentifierStart;
741	}
742
743	static inline bool isIdentStart(UChar32 c)
744	{
745	return isLatin1(c) ? isIdentStart(static_cast<LChar>(c)) : isNonLatin1IdentStart(c);
746	}
747
748	static NEVER_INLINE bool isNonLatin1IdentPart(UChar32 c)
749	{
750	return u_hasBinaryProperty(c, UCHAR_ID_CONTINUE) \|\| c == `0x200C` \|\| c == `0x200D`;
751	}
752
753	static ALWAYS_INLINE bool isIdentPart(LChar c)
754	{
755	// Character types are divided into two groups depending on whether they can be part of an
756	// identifier or not. Those whose type value is less or equal than CharacterOtherIdentifierPart can be
757	// part of an identifier. (See the CharacterType definition for more details.)
758	return typesOfLatin1Characters[c] <= CharacterOtherIdentifierPart;
759	}
760
761	static ALWAYS_INLINE bool isIdentPart(UChar32 c)
762	{
763	return isLatin1(c) ? isIdentPart(static_cast<LChar>(c)) : isNonLatin1IdentPart(c);
764	}
765
766	static ALWAYS_INLINE bool isIdentPart(UChar c)
767	{
768	return isIdentPart(static_cast<UChar32>(c));
769	}
770
771	template<typename CharacterType> ALWAYS_INLINE bool isIdentPartIncludingEscapeTemplate(const CharacterType* code, const CharacterType* codeEnd)
772	{
773	if (isIdentPart(code[`0`]))
774	return true;
775
776	// Shortest sequence handled below is \u{0}, which is 5 characters.
777	if (!(code[`0`] == `'\\'` && codeEnd - code >= `5` && code[`1`] == `'u'`))
778	return false;
779
780	if (code[`2`] == `'{'`) {
781	UChar32 codePoint = `0`;
782	const CharacterType* pointer;
783	for (pointer = &code[`3`]; pointer < codeEnd; ++pointer) {
784	auto digit = *pointer;
785	if (!isASCIIHexDigit(digit))
786	break;
787	codePoint = (codePoint << `4`) \| toASCIIHexValue(digit);
788	if (codePoint > UCHAR_MAX_VALUE)
789	return false;
790	}
791	return isIdentPart(codePoint) && pointer < codeEnd && *pointer == `'}'`;
792	}
793
794	// Shortest sequence handled below is \uXXXX, which is 6 characters.
795	if (codeEnd - code < `6`)
796	return false;
797
798	auto character1 = code[`2`];
799	auto character2 = code[`3`];
800	auto character3 = code[`4`];
801	auto character4 = code[`5`];
802	return isASCIIHexDigit(character1) && isASCIIHexDigit(character2) && isASCIIHexDigit(character3) && isASCIIHexDigit(character4)
803	&& isIdentPart(Lexer<LChar>::convertUnicode(character1, character2, character3, character4));
804	}
805
806	static ALWAYS_INLINE bool isIdentPartIncludingEscape(const LChar* code, const LChar* codeEnd)
807	{
808	return isIdentPartIncludingEscapeTemplate(code, codeEnd);
809	}
810
811	static ALWAYS_INLINE bool isIdentPartIncludingEscape(const UChar* code, const UChar* codeEnd)
812	{
813	return isIdentPartIncludingEscapeTemplate(code, codeEnd);
814	}
815
816	template<typename CharacterType>
817	static inline bool isASCIIDigitOrSeparator(CharacterType character)
818	{
819	return isASCIIDigit(character) \|\| character == `'_'`;
820	}
821
822	template<typename CharacterType>
823	static inline bool isASCIIHexDigitOrSeparator(CharacterType character)
824	{
825	return isASCIIHexDigit(character) \|\| character == `'_'`;
826	}
827
828	template<typename CharacterType>
829	static inline bool isASCIIBinaryDigitOrSeparator(CharacterType character)
830	{
831	return isASCIIBinaryDigit(character) \|\| character == `'_'`;
832	}
833
834	template<typename CharacterType>
835	static inline bool isASCIIOctalDigitOrSeparator(CharacterType character)
836	{
837	return isASCIIOctalDigit(character) \|\| character == `'_'`;
838	}
839
840	static inline LChar singleEscape(int c)
841	{
842	if (c < `128`) {
843	ASSERT(static_cast<size_t>(c) < WTF_ARRAY_LENGTH(singleCharacterEscapeValuesForASCII));
844	return singleCharacterEscapeValuesForASCII[c];
845	}
846	return `0`;
847	}
848
849	template <typename T>
850	inline void Lexer<T>::record8(int c)
851	{
852	ASSERT(c >= `0`);
853	ASSERT(c <= `0xFF`);
854	m_buffer8.append(static_cast<LChar>(c));
855	}
856
857	template <typename T>
858	inline void assertCharIsIn8BitRange(T c)
859	{
860	UNUSED_PARAM(c);
861	ASSERT(c >= `0`);
862	ASSERT(c <= `0xFF`);
863	}
864
865	template <>
866	inline void assertCharIsIn8BitRange(UChar c)
867	{
868	UNUSED_PARAM(c);
869	ASSERT(c <= `0xFF`);
870	}
871
872	template <>
873	inline void assertCharIsIn8BitRange(LChar)
874	{
875	}
876
877	template <typename T>
878	inline void Lexer<T>::append8(const T* p, size_t length)
879	{
880	size_t currentSize = m_buffer8.size();
881	m_buffer8.grow(currentSize + length);
882	LChar* rawBuffer = m_buffer8.data() + currentSize;
883
884	for (size_t i = `0`; i < length; i++) {
885	T c = p[i];
886	assertCharIsIn8BitRange(c);
887	rawBuffer[i] = c;
888	}
889	}
890
891	template <typename T>
892	inline void Lexer<T>::append16(const LChar* p, size_t length)
893	{
894	size_t currentSize = m_buffer16.size();
895	m_buffer16.grow(currentSize + length);
896	UChar* rawBuffer = m_buffer16.data() + currentSize;
897
898	for (size_t i = `0`; i < length; i++)
899	rawBuffer[i] = p[i];
900	}
901
902	template <typename T>
903	inline void Lexer<T>::record16(T c)
904	{
905	m_buffer16.append(c);
906	}
907
908	template <typename T>
909	inline void Lexer<T>::record16(int c)
910	{
911	ASSERT(c >= `0`);
912	ASSERT(c <= static_cast<int>(USHRT_MAX));
913	m_buffer16.append(static_cast<UChar>(c));
914	}
915
916	template<typename CharacterType> inline void Lexer<CharacterType>::recordUnicodeCodePoint(UChar32 codePoint)
917	{
918	ASSERT(codePoint >= `0`);
919	ASSERT(codePoint <= UCHAR_MAX_VALUE);
920	if (U_IS_BMP(codePoint))
921	record16(codePoint);
922	else {
923	UChar codeUnits[`2`] = { U16_LEAD(codePoint), U16_TRAIL(codePoint) };
924	append16(codeUnits, `2`);
925	}
926	}
927
928	#if !ASSERT_DISABLED
929	bool isSafeBuiltinIdentifier(VM& vm, const Identifier* ident)
930	{
931	if (!ident)
932	return true;
933	/ Just block any use of suspicious identifiers. This is intended to*
934	* be used as a safety net while implementing builtins.
935	*/
936	// FIXME: How can a debug-only assertion be a safety net?
937	if (*ident == vm.propertyNames->builtinNames().callPublicName())
938	return false;
939	if (*ident == vm.propertyNames->builtinNames().applyPublicName())
940	return false;
941	if (*ident == vm.propertyNames->eval)
942	return false;
943	if (*ident == vm.propertyNames->Function)
944	return false;
945	return true;
946	}
947	#endif
948
949	template <>
950	template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<LChar>::parseIdentifier(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
951	{
952	tokenData->escaped = false;
953	const ptrdiff_t remaining = m_codeEnd - m_code;
954	if ((remaining >= maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) {
955	JSTokenType keyword = parseKeyword<shouldCreateIdentifier>(tokenData);
956	if (keyword != IDENT) {
957	ASSERT((!shouldCreateIdentifier) \|\| tokenData->ident);
958	return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword;
959	}
960	}
961
962	bool isPrivateName = m_current == `'@'` && m_parsingBuiltinFunction;
963	if (isPrivateName)
964	shift();
965
966	const LChar* identifierStart = currentSourcePtr();
967	unsigned identifierLineStart = currentLineStartOffset();
968
969	while (isIdentPart(m_current))
970	shift();
971
972	if (UNLIKELY(m_current == `'\\'`)) {
973	setOffsetFromSourcePtr(identifierStart, identifierLineStart);
974	return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode);
975	}
976
977	const Identifier* ident = nullptr;
978
979	if (shouldCreateIdentifier \|\| m_parsingBuiltinFunction) {
980	int identifierLength = currentSourcePtr() - identifierStart;
981	ident = makeIdentifier(identifierStart, identifierLength);
982	if (m_parsingBuiltinFunction) {
983	if (!isSafeBuiltinIdentifier(*m_vm, ident) && !isPrivateName) {
984	m_lexErrorMessage = makeString("The use of '", ident->string(), "' is disallowed in builtin functions.");
985	return ERRORTOK;
986	}
987	if (isPrivateName)
988	ident = &m_arena->makeIdentifier(m_vm, m_vm->propertyNames->lookUpPrivateName(*ident));
989	else if (*ident == m_vm->propertyNames->undefinedKeyword)
990	tokenData->ident = &m_vm->propertyNames->undefinedPrivateName;
991	if (!ident)
992	return INVALID_PRIVATE_NAME_ERRORTOK;
993	}
994	tokenData->ident = ident;
995	} else
996	tokenData->ident = nullptr;
997
998	if (UNLIKELY((remaining < maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) && !isPrivateName) {
999	ASSERT(shouldCreateIdentifier);
1000	if (remaining < maxTokenLength) {
1001	const HashTableValue* entry = JSC::mainTable.entry(*ident);
1002	ASSERT((remaining < maxTokenLength) \|\| !entry);
1003	if (!entry)
1004	return IDENT;
1005	JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
1006	return (token != RESERVED_IF_STRICT) \|\| strictMode ? token : IDENT;
1007	}
1008	return IDENT;
1009	}
1010
1011	return IDENT;
1012	}
1013
1014	template <>
1015	template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<UChar>::parseIdentifier(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
1016	{
1017	tokenData->escaped = false;
1018	const ptrdiff_t remaining = m_codeEnd - m_code;
1019	if ((remaining >= maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) {
1020	JSTokenType keyword = parseKeyword<shouldCreateIdentifier>(tokenData);
1021	if (keyword != IDENT) {
1022	ASSERT((!shouldCreateIdentifier) \|\| tokenData->ident);
1023	return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword;
1024	}
1025	}
1026
1027	bool isPrivateName = m_current == `'@'` && m_parsingBuiltinFunction;
1028	if (isPrivateName)
1029	shift();
1030
1031	const UChar* identifierStart = currentSourcePtr();
1032	int identifierLineStart = currentLineStartOffset();
1033
1034	UChar orAllChars = `0`;
1035
1036	while (isIdentPart(m_current)) {
1037	orAllChars \|= m_current;
1038	shift();
1039	}
1040
1041	if (UNLIKELY(m_current == `'\\'`)) {
1042	ASSERT(!isPrivateName);
1043	setOffsetFromSourcePtr(identifierStart, identifierLineStart);
1044	return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode);
1045	}
1046
1047	bool isAll8Bit = false;
1048
1049	if (!(orAllChars & ~`0xff`))
1050	isAll8Bit = true;
1051
1052	const Identifier* ident = nullptr;
1053
1054	if (shouldCreateIdentifier \|\| m_parsingBuiltinFunction) {
1055	int identifierLength = currentSourcePtr() - identifierStart;
1056	if (isAll8Bit)
1057	ident = makeIdentifierLCharFromUChar(identifierStart, identifierLength);
1058	else
1059	ident = makeIdentifier(identifierStart, identifierLength);
1060	if (m_parsingBuiltinFunction) {
1061	if (!isSafeBuiltinIdentifier(*m_vm, ident) && !isPrivateName) {
1062	m_lexErrorMessage = makeString("The use of '", ident->string(), "' is disallowed in builtin functions.");
1063	return ERRORTOK;
1064	}
1065	if (isPrivateName)
1066	ident = &m_arena->makeIdentifier(m_vm, m_vm->propertyNames->lookUpPrivateName(*ident));
1067	else if (*ident == m_vm->propertyNames->undefinedKeyword)
1068	tokenData->ident = &m_vm->propertyNames->undefinedPrivateName;
1069	if (!ident)
1070	return INVALID_PRIVATE_NAME_ERRORTOK;
1071	}
1072	tokenData->ident = ident;
1073	} else
1074	tokenData->ident = nullptr;
1075
1076	if (UNLIKELY((remaining < maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) && !isPrivateName) {
1077	ASSERT(shouldCreateIdentifier);
1078	if (remaining < maxTokenLength) {
1079	const HashTableValue* entry = JSC::mainTable.entry(*ident);
1080	ASSERT((remaining < maxTokenLength) \|\| !entry);
1081	if (!entry)
1082	return IDENT;
1083	JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
1084	return (token != RESERVED_IF_STRICT) \|\| strictMode ? token : IDENT;
1085	}
1086	return IDENT;
1087	}
1088
1089	return IDENT;
1090	}
1091
1092	template<typename CharacterType> template<bool shouldCreateIdentifier> JSTokenType Lexer<CharacterType>::parseIdentifierSlowCase(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
1093	{
1094	tokenData->escaped = true;
1095	auto identifierStart = currentSourcePtr();
1096	bool bufferRequired = false;
1097
1098	while (true) {
1099	if (LIKELY(isIdentPart(m_current))) {
1100	shift();
1101	continue;
1102	}
1103	if (LIKELY(m_current != `'\\'`))
1104	break;
1105
1106	// \uXXXX unicode characters.
1107	bufferRequired = true;
1108	if (identifierStart != currentSourcePtr())
1109	m_buffer16.append(identifierStart, currentSourcePtr() - identifierStart);
1110	shift();
1111	if (UNLIKELY(m_current != `'u'`))
1112	return atEnd() ? UNTERMINATED_IDENTIFIER_ESCAPE_ERRORTOK : INVALID_IDENTIFIER_ESCAPE_ERRORTOK;
1113	shift();
1114	auto character = parseUnicodeEscape();
1115	if (UNLIKELY(!character.isValid()))
1116	return character.isIncomplete() ? UNTERMINATED_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK : INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK;
1117	if (UNLIKELY(m_buffer16.size() ? !isIdentPart(character.value()) : !isIdentStart(character.value())))
1118	return INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK;
1119	if (shouldCreateIdentifier)
1120	recordUnicodeCodePoint(character.value());
1121	identifierStart = currentSourcePtr();
1122	}
1123
1124	int identifierLength;
1125	const Identifier* ident = nullptr;
1126	if (shouldCreateIdentifier) {
1127	if (!bufferRequired) {
1128	identifierLength = currentSourcePtr() - identifierStart;
1129	ident = makeIdentifier(identifierStart, identifierLength);
1130	} else {
1131	if (identifierStart != currentSourcePtr())
1132	m_buffer16.append(identifierStart, currentSourcePtr() - identifierStart);
1133	ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
1134	}
1135
1136	tokenData->ident = ident;
1137	} else
1138	tokenData->ident = nullptr;
1139
1140	m_buffer16.shrink(`0`);
1141
1142	if (LIKELY(!(lexerFlags & LexerFlagsIgnoreReservedWords))) {
1143	ASSERT(shouldCreateIdentifier);
1144	const HashTableValue* entry = JSC::mainTable.entry(*ident);
1145	if (!entry)
1146	return IDENT;
1147	JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
1148	if ((token != RESERVED_IF_STRICT) \|\| strictMode)
1149	return bufferRequired ? UNEXPECTED_ESCAPE_ERRORTOK : token;
1150	}
1151
1152	return IDENT;
1153	}
1154
1155	static ALWAYS_INLINE bool characterRequiresParseStringSlowCase(LChar character)
1156	{
1157	return character < `0xE`;
1158	}
1159
1160	static ALWAYS_INLINE bool characterRequiresParseStringSlowCase(UChar character)
1161	{
1162	return character < `0xE` \|\| character > `0xFF`;
1163	}
1164
1165	template <typename T>
1166	template <bool shouldBuildStrings> ALWAYS_INLINE typename Lexer<T>::StringParseResult Lexer<T>::parseString(JSTokenData* tokenData, bool strictMode)
1167	{
1168	int startingOffset = currentOffset();
1169	int startingLineStartOffset = currentLineStartOffset();
1170	int startingLineNumber = lineNumber();
1171	T stringQuoteCharacter = m_current;
1172	shift();
1173
1174	const T* stringStart = currentSourcePtr();
1175
1176	while (m_current != stringQuoteCharacter) {
1177	if (UNLIKELY(m_current == `'\\'`)) {
1178	if (stringStart != currentSourcePtr() && shouldBuildStrings)
1179	append8(stringStart, currentSourcePtr() - stringStart);
1180	shift();
1181
1182	LChar escape = singleEscape(m_current);
1183
1184	// Most common escape sequences first.
1185	if (escape) {
1186	if (shouldBuildStrings)
1187	record8(escape);
1188	shift();
1189	} else if (UNLIKELY(isLineTerminator(m_current)))
1190	shiftLineTerminator();
1191	else if (m_current == `'x'`) {
1192	shift();
1193	if (!isASCIIHexDigit(m_current) \|\| !isASCIIHexDigit(peek(`1`))) {
1194	m_lexErrorMessage = "\\x can only be followed by a hex character sequence"_s;
1195	return (atEnd() \|\| (isASCIIHexDigit(m_current) && (m_code + `1` == m_codeEnd))) ? StringUnterminated : StringCannotBeParsed;
1196	}
1197	T prev = m_current;
1198	shift();
1199	if (shouldBuildStrings)
1200	record8(convertHex(prev, m_current));
1201	shift();
1202	} else {
1203	setOffset(startingOffset, startingLineStartOffset);
1204	setLineNumber(startingLineNumber);
1205	m_buffer8.shrink(`0`);
1206	return parseStringSlowCase<shouldBuildStrings>(tokenData, strictMode);
1207	}
1208	stringStart = currentSourcePtr();
1209	continue;
1210	}
1211
1212	if (UNLIKELY(characterRequiresParseStringSlowCase(m_current))) {
1213	setOffset(startingOffset, startingLineStartOffset);
1214	setLineNumber(startingLineNumber);
1215	m_buffer8.shrink(`0`);
1216	return parseStringSlowCase<shouldBuildStrings>(tokenData, strictMode);
1217	}
1218
1219	shift();
1220	}
1221
1222	if (currentSourcePtr() != stringStart && shouldBuildStrings)
1223	append8(stringStart, currentSourcePtr() - stringStart);
1224	if (shouldBuildStrings) {
1225	tokenData->ident = makeIdentifier(m_buffer8.data(), m_buffer8.size());
1226	m_buffer8.shrink(`0`);
1227	} else
1228	tokenData->ident = `0`;
1229
1230	return StringParsedSuccessfully;
1231	}
1232
1233	template <typename T>
1234	template <bool shouldBuildStrings, LexerEscapeParseMode escapeParseMode> ALWAYS_INLINE auto Lexer<T>::parseComplexEscape(bool strictMode, T stringQuoteCharacter) -> StringParseResult
1235	{
1236	if (m_current == `'x'`) {
1237	shift();
1238	if (!isASCIIHexDigit(m_current) \|\| !isASCIIHexDigit(peek(`1`))) {
1239	// For raw template literal syntax, we consume `NotEscapeSequence`.
1240	//
1241	// NotEscapeSequence ::
1242	// x [lookahread not one of HexDigit]
1243	// x HexDigit [lookahread not one of HexDigit]
1244	if (isASCIIHexDigit(m_current))
1245	shift();
1246	ASSERT(!isASCIIHexDigit(m_current));
1247
1248	m_lexErrorMessage = "\\x can only be followed by a hex character sequence"_s;
1249	return atEnd() ? StringUnterminated : StringCannotBeParsed;
1250	}
1251
1252	T prev = m_current;
1253	shift();
1254	if (shouldBuildStrings)
1255	record16(convertHex(prev, m_current));
1256	shift();
1257
1258	return StringParsedSuccessfully;
1259	}
1260
1261	if (m_current == `'u'`) {
1262	shift();
1263
1264	if (escapeParseMode == LexerEscapeParseMode::String && m_current == stringQuoteCharacter) {
1265	if (shouldBuildStrings)
1266	record16(`'u'`);
1267	return StringParsedSuccessfully;
1268	}
1269
1270	auto character = parseUnicodeEscape();
1271	if (character.isValid()) {
1272	if (shouldBuildStrings)
1273	recordUnicodeCodePoint(character.value());
1274	return StringParsedSuccessfully;
1275	}
1276
1277	m_lexErrorMessage = "\\u can only be followed by a Unicode character sequence"_s;
1278	return atEnd() ? StringUnterminated : StringCannotBeParsed;
1279	}
1280
1281	if (strictMode) {
1282	if (isASCIIDigit(m_current)) {
1283	// The only valid numeric escape in strict mode is '\0', and this must not be followed by a decimal digit.
1284	int character1 = m_current;
1285	shift();
1286	if (character1 != `'0'` \|\| isASCIIDigit(m_current)) {
1287	// For raw template literal syntax, we consume `NotEscapeSequence`.
1288	//
1289	// NotEscapeSequence ::
1290	// 0 DecimalDigit
1291	// DecimalDigit but not 0
1292	if (character1 == `'0'`)
1293	shift();
1294
1295	m_lexErrorMessage = "The only valid numeric escape in strict mode is '\\0'"_s;
1296	return atEnd() ? StringUnterminated : StringCannotBeParsed;
1297	}
1298	if (shouldBuildStrings)
1299	record16(`0`);
1300	return StringParsedSuccessfully;
1301	}
1302	} else {
1303	if (isASCIIOctalDigit(m_current)) {
1304	// Octal character sequences
1305	T character1 = m_current;
1306	shift();
1307	if (isASCIIOctalDigit(m_current)) {
1308	// Two octal characters
1309	T character2 = m_current;
1310	shift();
1311	if (character1 >= `'0'` && character1 <= `'3'` && isASCIIOctalDigit(m_current)) {
1312	if (shouldBuildStrings)
1313	record16((character1 - `'0'`) * `64` + (character2 - `'0'`) * `8` + m_current - `'0'`);
1314	shift();
1315	} else {
1316	if (shouldBuildStrings)
1317	record16((character1 - `'0'`) * `8` + character2 - `'0'`);
1318	}
1319	} else {
1320	if (shouldBuildStrings)
1321	record16(character1 - `'0'`);
1322	}
1323	return StringParsedSuccessfully;
1324	}
1325	}
1326
1327	if (!atEnd()) {
1328	if (shouldBuildStrings)
1329	record16(m_current);
1330	shift();
1331	return StringParsedSuccessfully;
1332	}
1333
1334	m_lexErrorMessage = "Unterminated string constant"_s;
1335	return StringUnterminated;
1336	}
1337
1338	template <typename T>
1339	template <bool shouldBuildStrings> auto Lexer<T>::parseStringSlowCase(JSTokenData* tokenData, bool strictMode) -> StringParseResult
1340	{
1341	T stringQuoteCharacter = m_current;
1342	shift();
1343
1344	const T* stringStart = currentSourcePtr();
1345
1346	while (m_current != stringQuoteCharacter) {
1347	if (UNLIKELY(m_current == `'\\'`)) {
1348	if (stringStart != currentSourcePtr() && shouldBuildStrings)
1349	append16(stringStart, currentSourcePtr() - stringStart);
1350	shift();
1351
1352	LChar escape = singleEscape(m_current);
1353
1354	// Most common escape sequences first
1355	if (escape) {
1356	if (shouldBuildStrings)
1357	record16(escape);
1358	shift();
1359	} else if (UNLIKELY(isLineTerminator(m_current)))
1360	shiftLineTerminator();
1361	else {
1362	StringParseResult result = parseComplexEscape<shouldBuildStrings, LexerEscapeParseMode::String>(strictMode, stringQuoteCharacter);
1363	if (result != StringParsedSuccessfully)
1364	return result;
1365	}
1366
1367	stringStart = currentSourcePtr();
1368	continue;
1369	}
1370	// Fast check for characters that require special handling.
1371	// Catches 0, \n, and \r as efficiently as possible, and lets through all common ASCII characters.
1372	static_assert(std::is_unsigned<T>::value, "Lexer expects an unsigned character type");
1373	if (UNLIKELY(m_current < `0xE`)) {
1374	// New-line or end of input is not allowed
1375	if (atEnd() \|\| m_current == `'\r'` \|\| m_current == `'\n'`) {
1376	m_lexErrorMessage = "Unexpected EOF"_s;
1377	return atEnd() ? StringUnterminated : StringCannotBeParsed;
1378	}
1379	// Anything else is just a normal character
1380	}
1381	shift();
1382	}
1383
1384	if (currentSourcePtr() != stringStart && shouldBuildStrings)
1385	append16(stringStart, currentSourcePtr() - stringStart);
1386	if (shouldBuildStrings)
1387	tokenData->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
1388	else
1389	tokenData->ident = `0`;
1390
1391	m_buffer16.shrink(`0`);
1392	return StringParsedSuccessfully;
1393	}
1394
1395	template <typename T>
1396	typename Lexer<T>::StringParseResult Lexer<T>::parseTemplateLiteral(JSTokenData* tokenData, RawStringsBuildMode rawStringsBuildMode)
1397	{
1398	bool parseCookedFailed = false;
1399	const T* stringStart = currentSourcePtr();
1400	const T* rawStringStart = currentSourcePtr();
1401
1402	while (m_current != '`') {
1403	if (UNLIKELY(m_current == `'\\'`)) {
1404	if (stringStart != currentSourcePtr())
1405	append16(stringStart, currentSourcePtr() - stringStart);
1406	shift();
1407
1408	LChar escape = singleEscape(m_current);
1409
1410	// Most common escape sequences first.
1411	if (escape) {
1412	record16(escape);
1413	shift();
1414	} else if (UNLIKELY(isLineTerminator(m_current))) {
1415	// Normalize <CR>, <CR><LF> to <LF>.
1416	if (m_current == `'\r'`) {
1417	ASSERT_WITH_MESSAGE(rawStringStart != currentSourcePtr(), "We should have at least shifted the escape.");
1418
1419	if (rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings) {
1420	m_bufferForRawTemplateString16.append(rawStringStart, currentSourcePtr() - rawStringStart);
1421	m_bufferForRawTemplateString16.append(`'\n'`);
1422	}
1423
1424	shiftLineTerminator();
1425	rawStringStart = currentSourcePtr();
1426	} else
1427	shiftLineTerminator();
1428	} else {
1429	bool strictMode = true;
1430	StringParseResult result = parseComplexEscape<true, LexerEscapeParseMode::Template>(strictMode, '`');
1431	if (result != StringParsedSuccessfully) {
1432	if (rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings && result == StringCannotBeParsed)
1433	parseCookedFailed = true;
1434	else
1435	return result;
1436	}
1437	}
1438
1439	stringStart = currentSourcePtr();
1440	continue;
1441	}
1442
1443	if (m_current == `'$'` && peek(`1`) == `'{'`)
1444	break;
1445
1446	// Fast check for characters that require special handling.
1447	// Catches 0, \n, \r, 0x2028, and 0x2029 as efficiently
1448	// as possible, and lets through all common ASCII characters.
1449	if (UNLIKELY(((static_cast<unsigned>(m_current) - `0xE`) & `0x2000`))) {
1450	// End of input is not allowed.
1451	// Unlike String, line terminator is allowed.
1452	if (atEnd()) {
1453	m_lexErrorMessage = "Unexpected EOF"_s;
1454	return StringUnterminated;
1455	}
1456
1457	if (isLineTerminator(m_current)) {
1458	if (m_current == `'\r'`) {
1459	// Normalize <CR>, <CR><LF> to <LF>.
1460	if (stringStart != currentSourcePtr())
1461	append16(stringStart, currentSourcePtr() - stringStart);
1462	if (rawStringStart != currentSourcePtr() && rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
1463	m_bufferForRawTemplateString16.append(rawStringStart, currentSourcePtr() - rawStringStart);
1464
1465	record16(`'\n'`);
1466	if (rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
1467	m_bufferForRawTemplateString16.append(`'\n'`);
1468	shiftLineTerminator();
1469	stringStart = currentSourcePtr();
1470	rawStringStart = currentSourcePtr();
1471	} else
1472	shiftLineTerminator();
1473	continue;
1474	}
1475	// Anything else is just a normal character
1476	}
1477
1478	shift();
1479	}
1480
1481	bool isTail = m_current == '`';
1482
1483	if (currentSourcePtr() != stringStart)
1484	append16(stringStart, currentSourcePtr() - stringStart);
1485	if (rawStringStart != currentSourcePtr() && rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
1486	m_bufferForRawTemplateString16.append(rawStringStart, currentSourcePtr() - rawStringStart);
1487
1488	if (!parseCookedFailed)
1489	tokenData->cooked = makeIdentifier(m_buffer16.data(), m_buffer16.size());
1490	else
1491	tokenData->cooked = nullptr;
1492
1493	// Line terminator normalization (e.g. <CR> => <LF>) should be applied to both the raw and cooked representations.
1494	if (rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
1495	tokenData->raw = makeIdentifier(m_bufferForRawTemplateString16.data(), m_bufferForRawTemplateString16.size());
1496	else
1497	tokenData->raw = nullptr;
1498
1499	tokenData->isTail = isTail;
1500
1501	m_buffer16.shrink(`0`);
1502	m_bufferForRawTemplateString16.shrink(`0`);
1503
1504	if (isTail) {
1505	// Skip `
1506	shift();
1507	} else {
1508	// Skip $ and {
1509	shift();
1510	shift();
1511	}
1512
1513	return StringParsedSuccessfully;
1514	}
1515
1516	template <typename T>
1517	ALWAYS_INLINE auto Lexer<T>::parseHex() -> Optional<NumberParseResult>
1518	{
1519	ASSERT(isASCIIHexDigit(m_current));
1520
1521	// Optimization: most hexadecimal values fit into 4 bytes.
1522	uint32_t hexValue = `0`;
1523	int maximumDigits = `7`;
1524
1525	do {
1526	if (m_current == `'_'`) {
1527	if (UNLIKELY(!isASCIIHexDigit(peek(`1`))))
1528	return WTF::nullopt;
1529
1530	shift();
1531	}
1532
1533	hexValue = (hexValue << `4`) + toASCIIHexValue(m_current);
1534	shift();
1535	--maximumDigits;
1536	} while (isASCIIHexDigitOrSeparator(m_current) && maximumDigits >= `0`);
1537
1538	if (LIKELY(maximumDigits >= `0` && m_current != `'n'`))
1539	return NumberParseResult { hexValue };
1540
1541	// No more place in the hexValue buffer.
1542	// The values are shifted out and placed into the m_buffer8 vector.
1543	for (int i = `0`; i < `8`; ++i) {
1544	int digit = hexValue >> `28`;
1545	if (digit < `10`)
1546	record8(digit + `'0'`);
1547	else
1548	record8(digit - `10` + `'a'`);
1549	hexValue <<= `4`;
1550	}
1551
1552	while (isASCIIHexDigitOrSeparator(m_current)) {
1553	if (m_current == `'_'`) {
1554	if (UNLIKELY(!isASCIIHexDigit(peek(`1`))))
1555	return WTF::nullopt;
1556
1557	shift();
1558	}
1559
1560	record8(m_current);
1561	shift();
1562	}
1563
1564	if (UNLIKELY(Options::useBigInt() && m_current == `'n'`))
1565	return NumberParseResult { makeIdentifier(m_buffer8.data(), m_buffer8.size()) };
1566
1567	return NumberParseResult { parseIntOverflow(m_buffer8.data(), m_buffer8.size(), `16`) };
1568	}
1569
1570	template <typename T>
1571	ALWAYS_INLINE auto Lexer<T>::parseBinary() -> Optional<NumberParseResult>
1572	{
1573	ASSERT(isASCIIBinaryDigit(m_current));
1574
1575	// Optimization: most binary values fit into 4 bytes.
1576	uint32_t binaryValue = `0`;
1577	const unsigned maximumDigits = `32`;
1578	int digit = maximumDigits - `1`;
1579	// Temporary buffer for the digits. Makes easier
1580	// to reconstruct the input characters when needed.
1581	LChar digits[maximumDigits];
1582
1583	do {
1584	if (m_current == `'_'`) {
1585	if (UNLIKELY(!isASCIIBinaryDigit(peek(`1`))))
1586	return WTF::nullopt;
1587
1588	shift();
1589	}
1590
1591	binaryValue = (binaryValue << `1`) + (m_current - `'0'`);
1592	digits[digit] = m_current;
1593	shift();
1594	--digit;
1595	} while (isASCIIBinaryDigitOrSeparator(m_current) && digit >= `0`);
1596
1597	if (LIKELY(!isASCIIDigitOrSeparator(m_current) && digit >= `0` && m_current != `'n'`))
1598	return NumberParseResult { binaryValue };
1599
1600	for (int i = maximumDigits - `1`; i > digit; --i)
1601	record8(digits[i]);
1602
1603	while (isASCIIBinaryDigitOrSeparator(m_current)) {
1604	if (m_current == `'_'`) {
1605	if (UNLIKELY(!isASCIIBinaryDigit(peek(`1`))))
1606	return WTF::nullopt;
1607
1608	shift();
1609	}
1610
1611	record8(m_current);
1612	shift();
1613	}
1614
1615	if (UNLIKELY(Options::useBigInt() && m_current == `'n'`))
1616	return NumberParseResult { makeIdentifier(m_buffer8.data(), m_buffer8.size()) };
1617
1618	if (isASCIIDigit(m_current))
1619	return WTF::nullopt;
1620
1621	return NumberParseResult { parseIntOverflow(m_buffer8.data(), m_buffer8.size(), `2`) };
1622	}
1623
1624	template <typename T>
1625	ALWAYS_INLINE auto Lexer<T>::parseOctal() -> Optional<NumberParseResult>
1626	{
1627	ASSERT(isASCIIOctalDigit(m_current));
1628
1629	// Optimization: most octal values fit into 4 bytes.
1630	uint32_t octalValue = `0`;
1631	const unsigned maximumDigits = `10`;
1632	int digit = maximumDigits - `1`;
1633	// Temporary buffer for the digits. Makes easier
1634	// to reconstruct the input characters when needed.
1635	LChar digits[maximumDigits];
1636
1637	do {
1638	if (m_current == `'_'`) {
1639	if (UNLIKELY(!isASCIIOctalDigit(peek(`1`))))
1640	return WTF::nullopt;
1641
1642	shift();
1643	}
1644
1645	octalValue = octalValue * `8` + (m_current - `'0'`);
1646	digits[digit] = m_current;
1647	shift();
1648	--digit;
1649	} while (isASCIIOctalDigitOrSeparator(m_current) && digit >= `0`);
1650
1651	if (LIKELY(!isASCIIDigitOrSeparator(m_current) && digit >= `0` && m_current != `'n'`))
1652	return NumberParseResult { octalValue };
1653
1654	for (int i = maximumDigits - `1`; i > digit; --i)
1655	record8(digits[i]);
1656
1657	while (isASCIIOctalDigitOrSeparator(m_current)) {
1658	if (m_current == `'_'`) {
1659	if (UNLIKELY(!isASCIIOctalDigit(peek(`1`))))
1660	return WTF::nullopt;
1661
1662	shift();
1663	}
1664
1665	record8(m_current);
1666	shift();
1667	}
1668
1669	if (UNLIKELY(Options::useBigInt() && m_current == `'n'`))
1670	return NumberParseResult { makeIdentifier(m_buffer8.data(), m_buffer8.size()) };
1671
1672	if (isASCIIDigit(m_current))
1673	return WTF::nullopt;
1674
1675	return NumberParseResult { parseIntOverflow(m_buffer8.data(), m_buffer8.size(), `8`) };
1676	}
1677
1678	template <typename T>
1679	ALWAYS_INLINE auto Lexer<T>::parseDecimal() -> Optional<NumberParseResult>
1680	{
1681	ASSERT(isASCIIDigit(m_current) \|\| m_buffer8.size());
1682
1683	// Optimization: most decimal values fit into 4 bytes.
1684	uint32_t decimalValue = `0`;
1685
1686	// Since parseOctal may be executed before parseDecimal,
1687	// the m_buffer8 may hold ascii digits.
1688	if (!m_buffer8.size()) {
1689	const unsigned maximumDigits = `10`;
1690	int digit = maximumDigits - `1`;
1691	// Temporary buffer for the digits. Makes easier
1692	// to reconstruct the input characters when needed.
1693	LChar digits[maximumDigits];
1694
1695	do {
1696	if (m_current == `'_'`) {
1697	if (UNLIKELY(!isASCIIDigit(peek(`1`))))
1698	return WTF::nullopt;
1699
1700	shift();
1701	}
1702
1703	decimalValue = decimalValue * `10` + (m_current - `'0'`);
1704	digits[digit] = m_current;
1705	shift();
1706	--digit;
1707	} while (isASCIIDigitOrSeparator(m_current) && digit >= `0`);
1708
1709	if (digit >= `0` && m_current != `'.'` && !isASCIIAlphaCaselessEqual(m_current, `'e'`) && m_current != `'n'`)
1710	return NumberParseResult { decimalValue };
1711
1712	for (int i = maximumDigits - `1`; i > digit; --i)
1713	record8(digits[i]);
1714	}
1715
1716	while (isASCIIDigitOrSeparator(m_current)) {
1717	if (m_current == `'_'`) {
1718	if (UNLIKELY(!isASCIIDigit(peek(`1`))))
1719	return WTF::nullopt;
1720
1721	shift();
1722	}
1723
1724	record8(m_current);
1725	shift();
1726	}
1727
1728	if (UNLIKELY(Options::useBigInt() && m_current == `'n'`))
1729	return NumberParseResult { makeIdentifier(m_buffer8.data(), m_buffer8.size()) };
1730
1731	return WTF::nullopt;
1732	}
1733
1734	template <typename T>
1735	ALWAYS_INLINE bool Lexer<T>::parseNumberAfterDecimalPoint()
1736	{
1737	ASSERT(isASCIIDigit(m_current));
1738	record8(`'.'`);
1739
1740	do {
1741	if (m_current == `'_'`) {
1742	if (UNLIKELY(!isASCIIDigit(peek(`1`))))
1743	return false;
1744
1745	shift();
1746	}
1747
1748	record8(m_current);
1749	shift();
1750	} while (isASCIIDigitOrSeparator(m_current));
1751
1752	return true;
1753	}
1754
1755	template <typename T>
1756	ALWAYS_INLINE bool Lexer<T>::parseNumberAfterExponentIndicator()
1757	{
1758	record8(`'e'`);
1759	shift();
1760	if (m_current == `'+'` \|\| m_current == `'-'`) {
1761	record8(m_current);
1762	shift();
1763	}
1764
1765	if (!isASCIIDigit(m_current))
1766	return false;
1767
1768	do {
1769	if (m_current == `'_'`) {
1770	if (UNLIKELY(!isASCIIDigit(peek(`1`))))
1771	return false;
1772
1773	shift();
1774	}
1775
1776	record8(m_current);
1777	shift();
1778	} while (isASCIIDigitOrSeparator(m_current));
1779
1780	return true;
1781	}
1782
1783	template <typename T>
1784	ALWAYS_INLINE bool Lexer<T>::parseMultilineComment()
1785	{
1786	while (true) {
1787	while (UNLIKELY(m_current == `'*'`)) {
1788	shift();
1789	if (m_current == `'/'`) {
1790	shift();
1791	return true;
1792	}
1793	}
1794
1795	if (atEnd())
1796	return false;
1797
1798	if (isLineTerminator(m_current)) {
1799	shiftLineTerminator();
1800	m_hasLineTerminatorBeforeToken = true;
1801	} else
1802	shift();
1803	}
1804	}
1805
1806	template <typename T>
1807	ALWAYS_INLINE void Lexer<T>::parseCommentDirective()
1808	{
1809	// sourceURL and sourceMappingURL directives.
1810	if (!consume("source"))
1811	return;
1812
1813	if (consume("URL=")) {
1814	m_sourceURLDirective = parseCommentDirectiveValue();
1815	return;
1816	}
1817
1818	if (consume("MappingURL=")) {
1819	m_sourceMappingURLDirective = parseCommentDirectiveValue();
1820	return;
1821	}
1822	}
1823
1824	template <typename T>
1825	ALWAYS_INLINE String Lexer<T>::parseCommentDirectiveValue()
1826	{
1827	skipWhitespace();
1828	const T* stringStart = currentSourcePtr();
1829	while (!isWhiteSpace(m_current) && !isLineTerminator(m_current) && m_current != `'"'` && m_current != `'\''` && !atEnd())
1830	shift();
1831	const T* stringEnd = currentSourcePtr();
1832	skipWhitespace();
1833
1834	if (!isLineTerminator(m_current) && !atEnd())
1835	return String ();
1836
1837	append8(stringStart, stringEnd - stringStart);
1838	String result = String(m_buffer8.data(), m_buffer8.size());
1839	m_buffer8.shrink(`0`);
1840	return result;
1841	}
1842
1843	template <typename T>
1844	template <unsigned length>
1845	ALWAYS_INLINE bool Lexer<T>::consume(const char (&input)[length])
1846	{
1847	unsigned lengthToCheck = length - `1`; // Ignore the ending NULL byte in the string literal.
1848
1849	unsigned i = `0`;
1850	for (; i < lengthToCheck && m_current == input[i]; i++)
1851	shift();
1852
1853	return i == lengthToCheck;
1854	}
1855
1856	template <typename T>
1857	bool Lexer<T>::nextTokenIsColon()
1858	{
1859	const T* code = m_code;
1860	while (code < m_codeEnd && (isWhiteSpace(code) \|\| isLineTerminator(code)))
1861	code++;
1862
1863	return code < m_codeEnd && *code == `':'`;
1864	}
1865
1866	template <typename T>
1867	void Lexer<T>::fillTokenInfo(JSToken* tokenRecord, JSTokenType token, int lineNumber, int endOffset, int lineStartOffset, JSTextPosition endPosition)
1868	{
1869	JSTokenLocation* tokenLocation = &tokenRecord->m_location;
1870	tokenLocation->line = lineNumber;
1871	tokenLocation->endOffset = endOffset;
1872	tokenLocation->lineStartOffset = lineStartOffset;
1873	ASSERT(tokenLocation->endOffset >= tokenLocation->lineStartOffset);
1874	tokenRecord->m_endPosition = endPosition;
1875	m_lastToken = token;
1876	}
1877
1878	template <typename T>
1879	JSTokenType Lexer<T>::lexWithoutClearingLineTerminator(JSToken* tokenRecord, unsigned lexerFlags, bool strictMode)
1880	{
1881	JSTokenData* tokenData = &tokenRecord->m_data;
1882	JSTokenLocation* tokenLocation = &tokenRecord->m_location;
1883	m_lastTokenLocation = JSTokenLocation (tokenRecord->m_location);
1884
1885	ASSERT(!m_error);
1886	ASSERT(m_buffer8.isEmpty());
1887	ASSERT(m_buffer16.isEmpty());
1888
1889	JSTokenType token = ERRORTOK;
1890
1891	start:
1892	skipWhitespace();
1893
1894	tokenLocation->startOffset = currentOffset();
1895	ASSERT(currentOffset() >= currentLineStartOffset());
1896	tokenRecord->m_startPosition = currentPosition();
1897
1898	if (atEnd()) {
1899	token = EOFTOK;
1900	goto returnToken;
1901	}
1902
1903	CharacterType type;
1904	if (LIKELY(isLatin1(m_current)))
1905	type = static_cast<CharacterType>(typesOfLatin1Characters[m_current]);
1906	else if (isNonLatin1IdentStart(m_current))
1907	type = CharacterIdentifierStart;
1908	else if (isLineTerminator(m_current))
1909	type = CharacterLineTerminator;
1910	else
1911	type = CharacterInvalid;
1912
1913	switch (type) {
1914	case CharacterGreater:
1915	shift();
1916	if (m_current == `'>'`) {
1917	shift();
1918	if (m_current == `'>'`) {
1919	shift();
1920	if (m_current == `'='`) {
1921	shift();
1922	token = URSHIFTEQUAL;
1923	break;
1924	}
1925	token = URSHIFT;
1926	break;
1927	}
1928	if (m_current == `'='`) {
1929	shift();
1930	token = RSHIFTEQUAL;
1931	break;
1932	}
1933	token = RSHIFT;
1934	break;
1935	}
1936	if (m_current == `'='`) {
1937	shift();
1938	token = GE;
1939	break;
1940	}
1941	token = GT;
1942	break;
1943	case CharacterEqual: {
1944	if (peek(`1`) == `'>'`) {
1945	token = ARROWFUNCTION;
1946	tokenData->line = lineNumber();
1947	tokenData->offset = currentOffset();
1948	tokenData->lineStartOffset = currentLineStartOffset();
1949	ASSERT(tokenData->offset >= tokenData->lineStartOffset);
1950	shift();
1951	shift();
1952	break;
1953	}
1954
1955	shift();
1956	if (m_current == `'='`) {
1957	shift();
1958	if (m_current == `'='`) {
1959	shift();
1960	token = STREQ;
1961	break;
1962	}
1963	token = EQEQ;
1964	break;
1965	}
1966	token = EQUAL;
1967	break;
1968	}
1969	case CharacterLess:
1970	shift();
1971	if (m_current == `'!'` && peek(`1`) == `'-'` && peek(`2`) == `'-'`) {
1972	if (m_scriptMode == JSParserScriptMode::Classic) {
1973	// <!-- marks the beginning of a line comment (for www usage)
1974	goto inSingleLineComment;
1975	}
1976	}
1977	if (m_current == `'<'`) {
1978	shift();
1979	if (m_current == `'='`) {
1980	shift();
1981	token = LSHIFTEQUAL;
1982	break;
1983	}
1984	token = LSHIFT;
1985	break;
1986	}
1987	if (m_current == `'='`) {
1988	shift();
1989	token = LE;
1990	break;
1991	}
1992	token = LT;
1993	break;
1994	case CharacterExclamationMark:
1995	shift();
1996	if (m_current == `'='`) {
1997	shift();
1998	if (m_current == `'='`) {
1999	shift();
2000	token = STRNEQ;
2001	break;
2002	}
2003	token = NE;
2004	break;
2005	}
2006	token = EXCLAMATION;
2007	break;
2008	case CharacterAdd:
2009	shift();
2010	if (m_current == `'+'`) {
2011	shift();
2012	token = (!m_hasLineTerminatorBeforeToken) ? PLUSPLUS : AUTOPLUSPLUS;
2013	break;
2014	}
2015	if (m_current == `'='`) {
2016	shift();
2017	token = PLUSEQUAL;
2018	break;
2019	}
2020	token = PLUS;
2021	break;
2022	case CharacterSub:
2023	shift();
2024	if (m_current == `'-'`) {
2025	shift();
2026	if ((m_atLineStart \|\| m_hasLineTerminatorBeforeToken) && m_current == `'>'`) {
2027	if (m_scriptMode == JSParserScriptMode::Classic) {
2028	shift();
2029	goto inSingleLineComment;
2030	}
2031	}
2032	token = (!m_hasLineTerminatorBeforeToken) ? MINUSMINUS : AUTOMINUSMINUS;
2033	break;
2034	}
2035	if (m_current == `'='`) {
2036	shift();
2037	token = MINUSEQUAL;
2038	break;
2039	}
2040	token = MINUS;
2041	break;
2042	case CharacterMultiply:
2043	shift();
2044	if (m_current == `'='`) {
2045	shift();
2046	token = MULTEQUAL;
2047	break;
2048	}
2049	if (m_current == `'*'`) {
2050	shift();
2051	if (m_current == `'='`) {
2052	shift();
2053	token = POWEQUAL;
2054	break;
2055	}
2056	token = POW;
2057	break;
2058	}
2059	token = TIMES;
2060	break;
2061	case CharacterSlash:
2062	shift();
2063	if (m_current == `'/'`) {
2064	shift();
2065	goto inSingleLineCommentCheckForDirectives;
2066	}
2067	if (m_current == `'*'`) {
2068	shift();
2069	if (parseMultilineComment())
2070	goto start;
2071	m_lexErrorMessage = "Multiline comment was not closed properly"_s;
2072	token = UNTERMINATED_MULTILINE_COMMENT_ERRORTOK;
2073	goto returnError;
2074	}
2075	if (m_current == `'='`) {
2076	shift();
2077	token = DIVEQUAL;
2078	break;
2079	}
2080	token = DIVIDE;
2081	break;
2082	case CharacterAnd:
2083	shift();
2084	if (m_current == `'&'`) {
2085	shift();
2086	token = AND;
2087	break;
2088	}
2089	if (m_current == `'='`) {
2090	shift();
2091	token = ANDEQUAL;
2092	break;
2093	}
2094	token = BITAND;
2095	break;
2096	case CharacterXor:
2097	shift();
2098	if (m_current == `'='`) {
2099	shift();
2100	token = XOREQUAL;
2101	break;
2102	}
2103	token = BITXOR;
2104	break;
2105	case CharacterModulo:
2106	shift();
2107	if (m_current == `'='`) {
2108	shift();
2109	token = MODEQUAL;
2110	break;
2111	}
2112	token = MOD;
2113	break;
2114	case CharacterOr:
2115	shift();
2116	if (m_current == `'='`) {
2117	shift();
2118	token = OREQUAL;
2119	break;
2120	}
2121	if (m_current == `'\|'`) {
2122	shift();
2123	token = OR;
2124	break;
2125	}
2126	token = BITOR;
2127	break;
2128	case CharacterOpenParen:
2129	token = OPENPAREN;
2130	tokenData->line = lineNumber();
2131	tokenData->offset = currentOffset();
2132	tokenData->lineStartOffset = currentLineStartOffset();
2133	shift();
2134	break;
2135	case CharacterCloseParen:
2136	token = CLOSEPAREN;
2137	shift();
2138	break;
2139	case CharacterOpenBracket:
2140	token = OPENBRACKET;
2141	shift();
2142	break;
2143	case CharacterCloseBracket:
2144	token = CLOSEBRACKET;
2145	shift();
2146	break;
2147	case CharacterComma:
2148	token = COMMA;
2149	shift();
2150	break;
2151	case CharacterColon:
2152	token = COLON;
2153	shift();
2154	break;
2155	case CharacterQuestion:
2156	token = QUESTION;
2157	shift();
2158	break;
2159	case CharacterTilde:
2160	token = TILDE;
2161	shift();
2162	break;
2163	case CharacterSemicolon:
2164	shift();
2165	token = SEMICOLON;
2166	break;
2167	case CharacterBackQuote:
2168	shift();
2169	token = BACKQUOTE;
2170	break;
2171	case CharacterOpenBrace:
2172	tokenData->line = lineNumber();
2173	tokenData->offset = currentOffset();
2174	tokenData->lineStartOffset = currentLineStartOffset();
2175	ASSERT(tokenData->offset >= tokenData->lineStartOffset);
2176	shift();
2177	token = OPENBRACE;
2178	break;
2179	case CharacterCloseBrace:
2180	tokenData->line = lineNumber();
2181	tokenData->offset = currentOffset();
2182	tokenData->lineStartOffset = currentLineStartOffset();
2183	ASSERT(tokenData->offset >= tokenData->lineStartOffset);
2184	shift();
2185	token = CLOSEBRACE;
2186	break;
2187	case CharacterDot:
2188	shift();
2189	if (!isASCIIDigit(m_current)) {
2190	if (UNLIKELY((m_current == `'.'`) && (peek(`1`) == `'.'`))) {
2191	shift();
2192	shift();
2193	token = DOTDOTDOT;
2194	break;
2195	}
2196	token = DOT;
2197	break;
2198	}
2199	if (UNLIKELY(!parseNumberAfterDecimalPoint())) {
2200	m_lexErrorMessage = "Non-number found after decimal point"_s;
2201	token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
2202	goto returnError;
2203	}
2204	token = DOUBLE;
2205	if (UNLIKELY(isASCIIAlphaCaselessEqual(m_current, `'e'`) && !parseNumberAfterExponentIndicator())) {
2206	m_lexErrorMessage = "Non-number found after exponent indicator"_s;
2207	token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
2208	goto returnError;
2209	}
2210	size_t parsedLength;
2211	tokenData->doubleValue = parseDouble(m_buffer8.data(), m_buffer8.size(), parsedLength);
2212	if (token == INTEGER)
2213	token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2214
2215	if (UNLIKELY(isIdentStart(m_current))) {
2216	m_lexErrorMessage = "No identifiers allowed directly after numeric literal"_s;
2217	token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
2218	goto returnError;
2219	}
2220	m_buffer8.shrink(`0`);
2221	break;
2222	case CharacterZero:
2223	shift();
2224	if (isASCIIAlphaCaselessEqual(m_current, `'x'`)) {
2225	if (UNLIKELY(!isASCIIHexDigit(peek(`1`)))) {
2226	m_lexErrorMessage = "No hexadecimal digits after '0x'"_s;
2227	token = UNTERMINATED_HEX_NUMBER_ERRORTOK;
2228	goto returnError;
2229	}
2230
2231	// Shift out the 'x' prefix.
2232	shift();
2233
2234	auto parseNumberResult = parseHex();
2235	if (!parseNumberResult)
2236	tokenData->doubleValue = `0`;
2237	else if (WTF::holds_alternative<double>(*parseNumberResult))
2238	tokenData->doubleValue = WTF::get<double>(*parseNumberResult);
2239	else {
2240	token = BIGINT;
2241	shift();
2242	tokenData->bigIntString = WTF::get<const Identifier>(parseNumberResult);
2243	tokenData->radix = `16`;
2244	}
2245
2246	if (UNLIKELY(isIdentStart(m_current))) {
2247	m_lexErrorMessage = "No space between hexadecimal literal and identifier"_s;
2248	token = UNTERMINATED_HEX_NUMBER_ERRORTOK;
2249	goto returnError;
2250	}
2251	if (LIKELY(token != BIGINT))
2252	token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2253	m_buffer8.shrink(`0`);
2254	break;
2255	}
2256	if (isASCIIAlphaCaselessEqual(m_current, `'b'`)) {
2257	if (UNLIKELY(!isASCIIBinaryDigit(peek(`1`)))) {
2258	m_lexErrorMessage = "No binary digits after '0b'"_s;
2259	token = UNTERMINATED_BINARY_NUMBER_ERRORTOK;
2260	goto returnError;
2261	}
2262
2263	// Shift out the 'b' prefix.
2264	shift();
2265
2266	auto parseNumberResult = parseBinary();
2267	if (!parseNumberResult)
2268	tokenData->doubleValue = `0`;
2269	else if (WTF::holds_alternative<double>(*parseNumberResult))
2270	tokenData->doubleValue = WTF::get<double>(*parseNumberResult);
2271	else {
2272	token = BIGINT;
2273	shift();
2274	tokenData->bigIntString = WTF::get<const Identifier>(parseNumberResult);
2275	tokenData->radix = `2`;
2276	}
2277
2278	if (UNLIKELY(isIdentStart(m_current))) {
2279	m_lexErrorMessage = "No space between binary literal and identifier"_s;
2280	token = UNTERMINATED_BINARY_NUMBER_ERRORTOK;
2281	goto returnError;
2282	}
2283	if (LIKELY(token != BIGINT))
2284	token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2285	m_buffer8.shrink(`0`);
2286	break;
2287	}
2288
2289	if (isASCIIAlphaCaselessEqual(m_current, `'o'`)) {
2290	if (UNLIKELY(!isASCIIOctalDigit(peek(`1`)))) {
2291	m_lexErrorMessage = "No octal digits after '0o'"_s;
2292	token = UNTERMINATED_OCTAL_NUMBER_ERRORTOK;
2293	goto returnError;
2294	}
2295
2296	// Shift out the 'o' prefix.
2297	shift();
2298
2299	auto parseNumberResult = parseOctal();
2300	if (!parseNumberResult)
2301	tokenData->doubleValue = `0`;
2302	else if (WTF::holds_alternative<double>(*parseNumberResult))
2303	tokenData->doubleValue = WTF::get<double>(*parseNumberResult);
2304	else {
2305	token = BIGINT;
2306	shift();
2307	tokenData->bigIntString = WTF::get<const Identifier>(parseNumberResult);
2308	tokenData->radix = `8`;
2309	}
2310
2311	if (UNLIKELY(isIdentStart(m_current))) {
2312	m_lexErrorMessage = "No space between octal literal and identifier"_s;
2313	token = UNTERMINATED_OCTAL_NUMBER_ERRORTOK;
2314	goto returnError;
2315	}
2316	if (LIKELY(token != BIGINT))
2317	token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2318	m_buffer8.shrink(`0`);
2319	break;
2320	}
2321
2322	if (UNLIKELY(m_current == `'_'`)) {
2323	m_lexErrorMessage = "Numeric literals may not begin with 0_"_s;
2324	token = UNTERMINATED_OCTAL_NUMBER_ERRORTOK;
2325	goto returnError;
2326	}
2327
2328	record8(`'0'`);
2329	if (UNLIKELY(strictMode && isASCIIDigit(m_current))) {
2330	m_lexErrorMessage = "Decimal integer literals with a leading zero are forbidden in strict mode"_s;
2331	token = UNTERMINATED_OCTAL_NUMBER_ERRORTOK;
2332	goto returnError;
2333	}
2334	if (isASCIIOctalDigit(m_current)) {
2335	auto parseNumberResult = parseOctal();
2336	if (parseNumberResult && WTF::holds_alternative<double>(*parseNumberResult)) {
2337	tokenData->doubleValue = WTF::get<double>(*parseNumberResult);
2338	token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2339	}
2340	}
2341	FALLTHROUGH;
2342	case CharacterNumber:
2343	if (LIKELY(token != INTEGER && token != DOUBLE)) {
2344	auto parseNumberResult = parseDecimal();
2345	if (parseNumberResult) {
2346	if (WTF::holds_alternative<double>(*parseNumberResult)) {
2347	tokenData->doubleValue = WTF::get<double>(*parseNumberResult);
2348	token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2349	} else {
2350	token = BIGINT;
2351	shift();
2352	tokenData->bigIntString = WTF::get<const Identifier>(parseNumberResult);
2353	tokenData->radix = `10`;
2354	}
2355	} else {
2356	token = INTEGER;
2357	if (m_current == `'.'`) {
2358	shift();
2359	if (UNLIKELY(isASCIIDigit(m_current) && !parseNumberAfterDecimalPoint())) {
2360	m_lexErrorMessage = "Non-number found after decimal point"_s;
2361	token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
2362	goto returnError;
2363	}
2364	token = DOUBLE;
2365	}
2366	if (UNLIKELY(isASCIIAlphaCaselessEqual(m_current, `'e'`) && !parseNumberAfterExponentIndicator())) {
2367	m_lexErrorMessage = "Non-number found after exponent indicator"_s;
2368	token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
2369	goto returnError;
2370	}
2371	size_t parsedLength;
2372	tokenData->doubleValue = parseDouble(m_buffer8.data(), m_buffer8.size(), parsedLength);
2373	if (token == INTEGER)
2374	token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2375	}
2376	}
2377
2378	if (UNLIKELY(isIdentStart(m_current))) {
2379	m_lexErrorMessage = "No identifiers allowed directly after numeric literal"_s;
2380	token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
2381	goto returnError;
2382	}
2383	m_buffer8.shrink(`0`);
2384	break;
2385	case CharacterQuote: {
2386	StringParseResult result = StringCannotBeParsed;
2387	if (lexerFlags & LexerFlagsDontBuildStrings)
2388	result = parseString<false>(tokenData, strictMode);
2389	else
2390	result = parseString<true>(tokenData, strictMode);
2391
2392	if (UNLIKELY(result != StringParsedSuccessfully)) {
2393	token = result == StringUnterminated ? UNTERMINATED_STRING_LITERAL_ERRORTOK : INVALID_STRING_LITERAL_ERRORTOK;
2394	goto returnError;
2395	}
2396	shift();
2397	token = STRING;
2398	break;
2399	}
2400	case CharacterIdentifierStart:
2401	ASSERT(isIdentStart(m_current));
2402	FALLTHROUGH;
2403	case CharacterBackSlash:
2404	parseIdent:
2405	if (lexerFlags & LexexFlagsDontBuildKeywords)
2406	token = parseIdentifier<false>(tokenData, lexerFlags, strictMode);
2407	else
2408	token = parseIdentifier<true>(tokenData, lexerFlags, strictMode);
2409	break;
2410	case CharacterLineTerminator:
2411	ASSERT(isLineTerminator(m_current));
2412	shiftLineTerminator();
2413	m_atLineStart = true;
2414	m_hasLineTerminatorBeforeToken = true;
2415	m_lineStart = m_code;
2416	goto start;
2417	case CharacterPrivateIdentifierStart:
2418	if (m_parsingBuiltinFunction)
2419	goto parseIdent;
2420
2421	FALLTHROUGH;
2422	case CharacterOtherIdentifierPart:
2423	case CharacterInvalid:
2424	m_lexErrorMessage = invalidCharacterMessage();
2425	token = ERRORTOK;
2426	goto returnError;
2427	default:
2428	RELEASE_ASSERT_NOT_REACHED();
2429	m_lexErrorMessage = "Internal Error"_s;
2430	token = ERRORTOK;
2431	goto returnError;
2432	}
2433
2434	m_atLineStart = false;
2435	goto returnToken;
2436
2437	inSingleLineCommentCheckForDirectives:
2438	// Script comment directives like "//# sourceURL=test.js".
2439	if (UNLIKELY((m_current == `'#'` \|\| m_current == `'@'`) && isWhiteSpace(peek(`1`)))) {
2440	shift();
2441	shift();
2442	parseCommentDirective();
2443	}
2444	// Fall through to complete single line comment parsing.
2445
2446	inSingleLineComment:
2447	{
2448	auto lineNumber = m_lineNumber;
2449	auto endOffset = currentOffset();
2450	auto lineStartOffset = currentLineStartOffset();
2451	auto endPosition = currentPosition();
2452
2453	while (!isLineTerminator(m_current)) {
2454	if (atEnd()) {
2455	token = EOFTOK;
2456	fillTokenInfo(tokenRecord, token, lineNumber, endOffset, lineStartOffset, endPosition);
2457	return token;
2458	}
2459	shift();
2460	}
2461	shiftLineTerminator();
2462	m_atLineStart = true;
2463	m_hasLineTerminatorBeforeToken = true;
2464	m_lineStart = m_code;
2465	if (!lastTokenWasRestrKeyword())
2466	goto start;
2467
2468	token = SEMICOLON;
2469	fillTokenInfo(tokenRecord, token, lineNumber, endOffset, lineStartOffset, endPosition);
2470	return token;
2471	}
2472
2473	returnToken:
2474	fillTokenInfo(tokenRecord, token, m_lineNumber, currentOffset(), currentLineStartOffset(), currentPosition());
2475	return token;
2476
2477	returnError:
2478	m_error = true;
2479	fillTokenInfo(tokenRecord, token, m_lineNumber, currentOffset(), currentLineStartOffset(), currentPosition());
2480	RELEASE_ASSERT(token & ErrorTokenFlag);
2481	return token;
2482	}
2483
2484	template <typename T>
2485	static inline void orCharacter(UChar&, UChar);
2486
2487	template <>
2488	inline void orCharacter<LChar>(UChar&, UChar) { }
2489
2490	template <>
2491	inline void orCharacter<UChar>(UChar& orAccumulator, UChar character)
2492	{
2493	orAccumulator \|= character;
2494	}
2495
2496	template <typename T>
2497	JSTokenType Lexer<T>::scanRegExp(JSToken* tokenRecord, UChar patternPrefix)
2498	{
2499	JSTokenData* tokenData = &tokenRecord->m_data;
2500	ASSERT(m_buffer16.isEmpty());
2501
2502	bool lastWasEscape = false;
2503	bool inBrackets = false;
2504	UChar charactersOredTogether = `0`;
2505
2506	if (patternPrefix) {
2507	ASSERT(!isLineTerminator(patternPrefix));
2508	ASSERT(patternPrefix != `'/'`);
2509	ASSERT(patternPrefix != `'['`);
2510	record16(patternPrefix);
2511	}
2512
2513	while (true) {
2514	if (isLineTerminator(m_current) \|\| atEnd()) {
2515	m_buffer16.shrink(`0`);
2516	JSTokenType token = UNTERMINATED_REGEXP_LITERAL_ERRORTOK;
2517	fillTokenInfo(tokenRecord, token, m_lineNumber, currentOffset(), currentLineStartOffset(), currentPosition());
2518	m_error = true;
2519	m_lexErrorMessage = makeString("Unterminated regular expression literal '", getToken(*tokenRecord), "'");
2520	return token;
2521	}
2522
2523	T prev = m_current;
2524
2525	shift();
2526
2527	if (prev == `'/'` && !lastWasEscape && !inBrackets)
2528	break;
2529
2530	record16(prev);
2531	orCharacter<T>(charactersOredTogether, prev);
2532
2533	if (lastWasEscape) {
2534	lastWasEscape = false;
2535	continue;
2536	}
2537
2538	switch (prev) {
2539	case `'['`:
2540	inBrackets = true;
2541	break;
2542	case `']'`:
2543	inBrackets = false;
2544	break;
2545	case `'\\'`:
2546	lastWasEscape = true;
2547	break;
2548	}
2549	}
2550
2551	tokenData->pattern = makeRightSizedIdentifier(m_buffer16.data(), m_buffer16.size(), charactersOredTogether);
2552
2553	m_buffer16.shrink(`0`);
2554	charactersOredTogether = `0`;
2555
2556	while (isIdentPart(m_current)) {
2557	record16(m_current);
2558	orCharacter<T>(charactersOredTogether, m_current);
2559	shift();
2560	}
2561
2562	tokenData->flags = makeRightSizedIdentifier(m_buffer16.data(), m_buffer16.size(), charactersOredTogether);
2563	m_buffer16.shrink(`0`);
2564
2565	// Since RegExp always ends with /, m_atLineStart always becomes false.
2566	m_atLineStart = false;
2567
2568	JSTokenType token = REGEXP;
2569	fillTokenInfo(tokenRecord, token, m_lineNumber, currentOffset(), currentLineStartOffset(), currentPosition());
2570	return token;
2571	}
2572
2573	template <typename T>
2574	JSTokenType Lexer<T>::scanTemplateString(JSToken* tokenRecord, RawStringsBuildMode rawStringsBuildMode)
2575	{
2576	JSTokenData* tokenData = &tokenRecord->m_data;
2577	ASSERT(!m_error);
2578	ASSERT(m_buffer16.isEmpty());
2579
2580	// Leading backquote ` (for template head) or closing brace } (for template trailing) are already shifted in the previous token scan.
2581	// So in this re-scan phase, shift() is not needed here.
2582	StringParseResult result = parseTemplateLiteral(tokenData, rawStringsBuildMode);
2583	JSTokenType token = ERRORTOK;
2584	if (UNLIKELY(result != StringParsedSuccessfully)) {
2585	token = result == StringUnterminated ? UNTERMINATED_TEMPLATE_LITERAL_ERRORTOK : INVALID_TEMPLATE_LITERAL_ERRORTOK;
2586	m_error = true;
2587	} else
2588	token = TEMPLATE;
2589
2590	// Since TemplateString always ends with ` or }, m_atLineStart always becomes false.
2591	m_atLineStart = false;
2592	fillTokenInfo(tokenRecord, token, m_lineNumber, currentOffset(), currentLineStartOffset(), currentPosition());
2593	return token;
2594	}
2595
2596	template <typename T>
2597	void Lexer<T>::clear()
2598	{
2599	m_arena = `0`;
2600
2601	Vector<LChar> newBuffer8;
2602	m_buffer8.swap(newBuffer8);
2603
2604	Vector<UChar> newBuffer16;
2605	m_buffer16.swap(newBuffer16);
2606
2607	Vector<UChar> newBufferForRawTemplateString16;
2608	m_bufferForRawTemplateString16.swap(newBufferForRawTemplateString16);
2609
2610	m_isReparsingFunction = false;
2611	}
2612
2613	// Instantiate the two flavors of Lexer we need instead of putting most of this file in Lexer.h
2614	template class Lexer<LChar>;
2615	template class Lexer<UChar>;
2616
2617	} // namespace JSC
2618

Browse the source code of webcore/Source/JavaScriptCore/parser/Lexer.cpp