1/*
2 * Copyright (C) 1999-2000 Harri Porten ([email protected])
3 * Copyright (C) 2006-2019 Apple Inc. All Rights Reserved.
4 * Copyright (C) 2007 Cameron Zwarich ([email protected])
5 * Copyright (C) 2010 Zoltan Herczeg ([email protected])
6 * Copyright (C) 2012 Mathias Bynens ([email protected])
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Library General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Library General Public License for more details.
17 *
18 * You should have received a copy of the GNU Library General Public License
19 * along with this library; see the file COPYING.LIB. If not, write to
20 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 * Boston, MA 02110-1301, USA.
22 *
23 */
24
25#include "config.h"
26#include "Lexer.h"
27
28#include "BuiltinNames.h"
29#include "Identifier.h"
30#include "JSCInlines.h"
31#include "JSFunctionInlines.h"
32#include "KeywordLookup.h"
33#include "Lexer.lut.h"
34#include "Nodes.h"
35#include "ParseInt.h"
36#include "Parser.h"
37#include <ctype.h>
38#include <limits.h>
39#include <string.h>
40#include <wtf/Assertions.h>
41#include <wtf/HexNumber.h>
42#include <wtf/Variant.h>
43#include <wtf/dtoa.h>
44
45namespace JSC {
46
47bool isLexerKeyword(const Identifier& identifier)
48{
49 return JSC::mainTable.entry(identifier);
50}
51
52enum CharacterType {
53 // Types for the main switch
54
55 // The first three types are fixed, and also used for identifying
56 // ASCII alpha and alphanumeric characters (see isIdentStart and isIdentPart).
57 CharacterIdentifierStart,
58 CharacterZero,
59 CharacterNumber,
60
61 // For single-byte characters grandfathered into Other_ID_Continue -- namely just U+00B7 MIDDLE DOT.
62 // (http://unicode.org/reports/tr31/#Backward_Compatibility)
63 CharacterOtherIdentifierPart,
64
65 CharacterInvalid,
66 CharacterLineTerminator,
67 CharacterExclamationMark,
68 CharacterOpenParen,
69 CharacterCloseParen,
70 CharacterOpenBracket,
71 CharacterCloseBracket,
72 CharacterComma,
73 CharacterColon,
74 CharacterQuestion,
75 CharacterTilde,
76 CharacterQuote,
77 CharacterBackQuote,
78 CharacterDot,
79 CharacterSlash,
80 CharacterBackSlash,
81 CharacterSemicolon,
82 CharacterOpenBrace,
83 CharacterCloseBrace,
84
85 CharacterAdd,
86 CharacterSub,
87 CharacterMultiply,
88 CharacterModulo,
89 CharacterAnd,
90 CharacterXor,
91 CharacterOr,
92 CharacterLess,
93 CharacterGreater,
94 CharacterEqual,
95
96 // Other types (only one so far)
97 CharacterWhiteSpace,
98 CharacterHash,
99 CharacterPrivateIdentifierStart
100};
101
102// 256 Latin-1 codes
103static constexpr const unsigned short typesOfLatin1Characters[256] = {
104/* 0 - Null */ CharacterInvalid,
105/* 1 - Start of Heading */ CharacterInvalid,
106/* 2 - Start of Text */ CharacterInvalid,
107/* 3 - End of Text */ CharacterInvalid,
108/* 4 - End of Transm. */ CharacterInvalid,
109/* 5 - Enquiry */ CharacterInvalid,
110/* 6 - Acknowledgment */ CharacterInvalid,
111/* 7 - Bell */ CharacterInvalid,
112/* 8 - Back Space */ CharacterInvalid,
113/* 9 - Horizontal Tab */ CharacterWhiteSpace,
114/* 10 - Line Feed */ CharacterLineTerminator,
115/* 11 - Vertical Tab */ CharacterWhiteSpace,
116/* 12 - Form Feed */ CharacterWhiteSpace,
117/* 13 - Carriage Return */ CharacterLineTerminator,
118/* 14 - Shift Out */ CharacterInvalid,
119/* 15 - Shift In */ CharacterInvalid,
120/* 16 - Data Line Escape */ CharacterInvalid,
121/* 17 - Device Control 1 */ CharacterInvalid,
122/* 18 - Device Control 2 */ CharacterInvalid,
123/* 19 - Device Control 3 */ CharacterInvalid,
124/* 20 - Device Control 4 */ CharacterInvalid,
125/* 21 - Negative Ack. */ CharacterInvalid,
126/* 22 - Synchronous Idle */ CharacterInvalid,
127/* 23 - End of Transmit */ CharacterInvalid,
128/* 24 - Cancel */ CharacterInvalid,
129/* 25 - End of Medium */ CharacterInvalid,
130/* 26 - Substitute */ CharacterInvalid,
131/* 27 - Escape */ CharacterInvalid,
132/* 28 - File Separator */ CharacterInvalid,
133/* 29 - Group Separator */ CharacterInvalid,
134/* 30 - Record Separator */ CharacterInvalid,
135/* 31 - Unit Separator */ CharacterInvalid,
136/* 32 - Space */ CharacterWhiteSpace,
137/* 33 - ! */ CharacterExclamationMark,
138/* 34 - " */ CharacterQuote,
139/* 35 - # */ CharacterHash,
140/* 36 - $ */ CharacterIdentifierStart,
141/* 37 - % */ CharacterModulo,
142/* 38 - & */ CharacterAnd,
143/* 39 - ' */ CharacterQuote,
144/* 40 - ( */ CharacterOpenParen,
145/* 41 - ) */ CharacterCloseParen,
146/* 42 - * */ CharacterMultiply,
147/* 43 - + */ CharacterAdd,
148/* 44 - , */ CharacterComma,
149/* 45 - - */ CharacterSub,
150/* 46 - . */ CharacterDot,
151/* 47 - / */ CharacterSlash,
152/* 48 - 0 */ CharacterZero,
153/* 49 - 1 */ CharacterNumber,
154/* 50 - 2 */ CharacterNumber,
155/* 51 - 3 */ CharacterNumber,
156/* 52 - 4 */ CharacterNumber,
157/* 53 - 5 */ CharacterNumber,
158/* 54 - 6 */ CharacterNumber,
159/* 55 - 7 */ CharacterNumber,
160/* 56 - 8 */ CharacterNumber,
161/* 57 - 9 */ CharacterNumber,
162/* 58 - : */ CharacterColon,
163/* 59 - ; */ CharacterSemicolon,
164/* 60 - < */ CharacterLess,
165/* 61 - = */ CharacterEqual,
166/* 62 - > */ CharacterGreater,
167/* 63 - ? */ CharacterQuestion,
168/* 64 - @ */ CharacterPrivateIdentifierStart,
169/* 65 - A */ CharacterIdentifierStart,
170/* 66 - B */ CharacterIdentifierStart,
171/* 67 - C */ CharacterIdentifierStart,
172/* 68 - D */ CharacterIdentifierStart,
173/* 69 - E */ CharacterIdentifierStart,
174/* 70 - F */ CharacterIdentifierStart,
175/* 71 - G */ CharacterIdentifierStart,
176/* 72 - H */ CharacterIdentifierStart,
177/* 73 - I */ CharacterIdentifierStart,
178/* 74 - J */ CharacterIdentifierStart,
179/* 75 - K */ CharacterIdentifierStart,
180/* 76 - L */ CharacterIdentifierStart,
181/* 77 - M */ CharacterIdentifierStart,
182/* 78 - N */ CharacterIdentifierStart,
183/* 79 - O */ CharacterIdentifierStart,
184/* 80 - P */ CharacterIdentifierStart,
185/* 81 - Q */ CharacterIdentifierStart,
186/* 82 - R */ CharacterIdentifierStart,
187/* 83 - S */ CharacterIdentifierStart,
188/* 84 - T */ CharacterIdentifierStart,
189/* 85 - U */ CharacterIdentifierStart,
190/* 86 - V */ CharacterIdentifierStart,
191/* 87 - W */ CharacterIdentifierStart,
192/* 88 - X */ CharacterIdentifierStart,
193/* 89 - Y */ CharacterIdentifierStart,
194/* 90 - Z */ CharacterIdentifierStart,
195/* 91 - [ */ CharacterOpenBracket,
196/* 92 - \ */ CharacterBackSlash,
197/* 93 - ] */ CharacterCloseBracket,
198/* 94 - ^ */ CharacterXor,
199/* 95 - _ */ CharacterIdentifierStart,
200/* 96 - ` */ CharacterBackQuote,
201/* 97 - a */ CharacterIdentifierStart,
202/* 98 - b */ CharacterIdentifierStart,
203/* 99 - c */ CharacterIdentifierStart,
204/* 100 - d */ CharacterIdentifierStart,
205/* 101 - e */ CharacterIdentifierStart,
206/* 102 - f */ CharacterIdentifierStart,
207/* 103 - g */ CharacterIdentifierStart,
208/* 104 - h */ CharacterIdentifierStart,
209/* 105 - i */ CharacterIdentifierStart,
210/* 106 - j */ CharacterIdentifierStart,
211/* 107 - k */ CharacterIdentifierStart,
212/* 108 - l */ CharacterIdentifierStart,
213/* 109 - m */ CharacterIdentifierStart,
214/* 110 - n */ CharacterIdentifierStart,
215/* 111 - o */ CharacterIdentifierStart,
216/* 112 - p */ CharacterIdentifierStart,
217/* 113 - q */ CharacterIdentifierStart,
218/* 114 - r */ CharacterIdentifierStart,
219/* 115 - s */ CharacterIdentifierStart,
220/* 116 - t */ CharacterIdentifierStart,
221/* 117 - u */ CharacterIdentifierStart,
222/* 118 - v */ CharacterIdentifierStart,
223/* 119 - w */ CharacterIdentifierStart,
224/* 120 - x */ CharacterIdentifierStart,
225/* 121 - y */ CharacterIdentifierStart,
226/* 122 - z */ CharacterIdentifierStart,
227/* 123 - { */ CharacterOpenBrace,
228/* 124 - | */ CharacterOr,
229/* 125 - } */ CharacterCloseBrace,
230/* 126 - ~ */ CharacterTilde,
231/* 127 - Delete */ CharacterInvalid,
232/* 128 - Cc category */ CharacterInvalid,
233/* 129 - Cc category */ CharacterInvalid,
234/* 130 - Cc category */ CharacterInvalid,
235/* 131 - Cc category */ CharacterInvalid,
236/* 132 - Cc category */ CharacterInvalid,
237/* 133 - Cc category */ CharacterInvalid,
238/* 134 - Cc category */ CharacterInvalid,
239/* 135 - Cc category */ CharacterInvalid,
240/* 136 - Cc category */ CharacterInvalid,
241/* 137 - Cc category */ CharacterInvalid,
242/* 138 - Cc category */ CharacterInvalid,
243/* 139 - Cc category */ CharacterInvalid,
244/* 140 - Cc category */ CharacterInvalid,
245/* 141 - Cc category */ CharacterInvalid,
246/* 142 - Cc category */ CharacterInvalid,
247/* 143 - Cc category */ CharacterInvalid,
248/* 144 - Cc category */ CharacterInvalid,
249/* 145 - Cc category */ CharacterInvalid,
250/* 146 - Cc category */ CharacterInvalid,
251/* 147 - Cc category */ CharacterInvalid,
252/* 148 - Cc category */ CharacterInvalid,
253/* 149 - Cc category */ CharacterInvalid,
254/* 150 - Cc category */ CharacterInvalid,
255/* 151 - Cc category */ CharacterInvalid,
256/* 152 - Cc category */ CharacterInvalid,
257/* 153 - Cc category */ CharacterInvalid,
258/* 154 - Cc category */ CharacterInvalid,
259/* 155 - Cc category */ CharacterInvalid,
260/* 156 - Cc category */ CharacterInvalid,
261/* 157 - Cc category */ CharacterInvalid,
262/* 158 - Cc category */ CharacterInvalid,
263/* 159 - Cc category */ CharacterInvalid,
264/* 160 - Zs category (nbsp) */ CharacterWhiteSpace,
265/* 161 - Po category */ CharacterInvalid,
266/* 162 - Sc category */ CharacterInvalid,
267/* 163 - Sc category */ CharacterInvalid,
268/* 164 - Sc category */ CharacterInvalid,
269/* 165 - Sc category */ CharacterInvalid,
270/* 166 - So category */ CharacterInvalid,
271/* 167 - So category */ CharacterInvalid,
272/* 168 - Sk category */ CharacterInvalid,
273/* 169 - So category */ CharacterInvalid,
274/* 170 - Ll category */ CharacterIdentifierStart,
275/* 171 - Pi category */ CharacterInvalid,
276/* 172 - Sm category */ CharacterInvalid,
277/* 173 - Cf category */ CharacterInvalid,
278/* 174 - So category */ CharacterInvalid,
279/* 175 - Sk category */ CharacterInvalid,
280/* 176 - So category */ CharacterInvalid,
281/* 177 - Sm category */ CharacterInvalid,
282/* 178 - No category */ CharacterInvalid,
283/* 179 - No category */ CharacterInvalid,
284/* 180 - Sk category */ CharacterInvalid,
285/* 181 - Ll category */ CharacterIdentifierStart,
286/* 182 - So category */ CharacterInvalid,
287/* 183 - Po category */ CharacterOtherIdentifierPart,
288/* 184 - Sk category */ CharacterInvalid,
289/* 185 - No category */ CharacterInvalid,
290/* 186 - Ll category */ CharacterIdentifierStart,
291/* 187 - Pf category */ CharacterInvalid,
292/* 188 - No category */ CharacterInvalid,
293/* 189 - No category */ CharacterInvalid,
294/* 190 - No category */ CharacterInvalid,
295/* 191 - Po category */ CharacterInvalid,
296/* 192 - Lu category */ CharacterIdentifierStart,
297/* 193 - Lu category */ CharacterIdentifierStart,
298/* 194 - Lu category */ CharacterIdentifierStart,
299/* 195 - Lu category */ CharacterIdentifierStart,
300/* 196 - Lu category */ CharacterIdentifierStart,
301/* 197 - Lu category */ CharacterIdentifierStart,
302/* 198 - Lu category */ CharacterIdentifierStart,
303/* 199 - Lu category */ CharacterIdentifierStart,
304/* 200 - Lu category */ CharacterIdentifierStart,
305/* 201 - Lu category */ CharacterIdentifierStart,
306/* 202 - Lu category */ CharacterIdentifierStart,
307/* 203 - Lu category */ CharacterIdentifierStart,
308/* 204 - Lu category */ CharacterIdentifierStart,
309/* 205 - Lu category */ CharacterIdentifierStart,
310/* 206 - Lu category */ CharacterIdentifierStart,
311/* 207 - Lu category */ CharacterIdentifierStart,
312/* 208 - Lu category */ CharacterIdentifierStart,
313/* 209 - Lu category */ CharacterIdentifierStart,
314/* 210 - Lu category */ CharacterIdentifierStart,
315/* 211 - Lu category */ CharacterIdentifierStart,
316/* 212 - Lu category */ CharacterIdentifierStart,
317/* 213 - Lu category */ CharacterIdentifierStart,
318/* 214 - Lu category */ CharacterIdentifierStart,
319/* 215 - Sm category */ CharacterInvalid,
320/* 216 - Lu category */ CharacterIdentifierStart,
321/* 217 - Lu category */ CharacterIdentifierStart,
322/* 218 - Lu category */ CharacterIdentifierStart,
323/* 219 - Lu category */ CharacterIdentifierStart,
324/* 220 - Lu category */ CharacterIdentifierStart,
325/* 221 - Lu category */ CharacterIdentifierStart,
326/* 222 - Lu category */ CharacterIdentifierStart,
327/* 223 - Ll category */ CharacterIdentifierStart,
328/* 224 - Ll category */ CharacterIdentifierStart,
329/* 225 - Ll category */ CharacterIdentifierStart,
330/* 226 - Ll category */ CharacterIdentifierStart,
331/* 227 - Ll category */ CharacterIdentifierStart,
332/* 228 - Ll category */ CharacterIdentifierStart,
333/* 229 - Ll category */ CharacterIdentifierStart,
334/* 230 - Ll category */ CharacterIdentifierStart,
335/* 231 - Ll category */ CharacterIdentifierStart,
336/* 232 - Ll category */ CharacterIdentifierStart,
337/* 233 - Ll category */ CharacterIdentifierStart,
338/* 234 - Ll category */ CharacterIdentifierStart,
339/* 235 - Ll category */ CharacterIdentifierStart,
340/* 236 - Ll category */ CharacterIdentifierStart,
341/* 237 - Ll category */ CharacterIdentifierStart,
342/* 238 - Ll category */ CharacterIdentifierStart,
343/* 239 - Ll category */ CharacterIdentifierStart,
344/* 240 - Ll category */ CharacterIdentifierStart,
345/* 241 - Ll category */ CharacterIdentifierStart,
346/* 242 - Ll category */ CharacterIdentifierStart,
347/* 243 - Ll category */ CharacterIdentifierStart,
348/* 244 - Ll category */ CharacterIdentifierStart,
349/* 245 - Ll category */ CharacterIdentifierStart,
350/* 246 - Ll category */ CharacterIdentifierStart,
351/* 247 - Sm category */ CharacterInvalid,
352/* 248 - Ll category */ CharacterIdentifierStart,
353/* 249 - Ll category */ CharacterIdentifierStart,
354/* 250 - Ll category */ CharacterIdentifierStart,
355/* 251 - Ll category */ CharacterIdentifierStart,
356/* 252 - Ll category */ CharacterIdentifierStart,
357/* 253 - Ll category */ CharacterIdentifierStart,
358/* 254 - Ll category */ CharacterIdentifierStart,
359/* 255 - Ll category */ CharacterIdentifierStart
360};
361
362// This table provides the character that results from \X where X is the index in the table beginning
363// with SPACE. A table value of 0 means that more processing needs to be done.
364static constexpr const LChar singleCharacterEscapeValuesForASCII[128] = {
365/* 0 - Null */ 0,
366/* 1 - Start of Heading */ 0,
367/* 2 - Start of Text */ 0,
368/* 3 - End of Text */ 0,
369/* 4 - End of Transm. */ 0,
370/* 5 - Enquiry */ 0,
371/* 6 - Acknowledgment */ 0,
372/* 7 - Bell */ 0,
373/* 8 - Back Space */ 0,
374/* 9 - Horizontal Tab */ 0,
375/* 10 - Line Feed */ 0,
376/* 11 - Vertical Tab */ 0,
377/* 12 - Form Feed */ 0,
378/* 13 - Carriage Return */ 0,
379/* 14 - Shift Out */ 0,
380/* 15 - Shift In */ 0,
381/* 16 - Data Line Escape */ 0,
382/* 17 - Device Control 1 */ 0,
383/* 18 - Device Control 2 */ 0,
384/* 19 - Device Control 3 */ 0,
385/* 20 - Device Control 4 */ 0,
386/* 21 - Negative Ack. */ 0,
387/* 22 - Synchronous Idle */ 0,
388/* 23 - End of Transmit */ 0,
389/* 24 - Cancel */ 0,
390/* 25 - End of Medium */ 0,
391/* 26 - Substitute */ 0,
392/* 27 - Escape */ 0,
393/* 28 - File Separator */ 0,
394/* 29 - Group Separator */ 0,
395/* 30 - Record Separator */ 0,
396/* 31 - Unit Separator */ 0,
397/* 32 - Space */ ' ',
398/* 33 - ! */ '!',
399/* 34 - " */ '"',
400/* 35 - # */ '#',
401/* 36 - $ */ '$',
402/* 37 - % */ '%',
403/* 38 - & */ '&',
404/* 39 - ' */ '\'',
405/* 40 - ( */ '(',
406/* 41 - ) */ ')',
407/* 42 - * */ '*',
408/* 43 - + */ '+',
409/* 44 - , */ ',',
410/* 45 - - */ '-',
411/* 46 - . */ '.',
412/* 47 - / */ '/',
413/* 48 - 0 */ 0,
414/* 49 - 1 */ 0,
415/* 50 - 2 */ 0,
416/* 51 - 3 */ 0,
417/* 52 - 4 */ 0,
418/* 53 - 5 */ 0,
419/* 54 - 6 */ 0,
420/* 55 - 7 */ 0,
421/* 56 - 8 */ 0,
422/* 57 - 9 */ 0,
423/* 58 - : */ ':',
424/* 59 - ; */ ';',
425/* 60 - < */ '<',
426/* 61 - = */ '=',
427/* 62 - > */ '>',
428/* 63 - ? */ '?',
429/* 64 - @ */ '@',
430/* 65 - A */ 'A',
431/* 66 - B */ 'B',
432/* 67 - C */ 'C',
433/* 68 - D */ 'D',
434/* 69 - E */ 'E',
435/* 70 - F */ 'F',
436/* 71 - G */ 'G',
437/* 72 - H */ 'H',
438/* 73 - I */ 'I',
439/* 74 - J */ 'J',
440/* 75 - K */ 'K',
441/* 76 - L */ 'L',
442/* 77 - M */ 'M',
443/* 78 - N */ 'N',
444/* 79 - O */ 'O',
445/* 80 - P */ 'P',
446/* 81 - Q */ 'Q',
447/* 82 - R */ 'R',
448/* 83 - S */ 'S',
449/* 84 - T */ 'T',
450/* 85 - U */ 'U',
451/* 86 - V */ 'V',
452/* 87 - W */ 'W',
453/* 88 - X */ 'X',
454/* 89 - Y */ 'Y',
455/* 90 - Z */ 'Z',
456/* 91 - [ */ '[',
457/* 92 - \ */ '\\',
458/* 93 - ] */ ']',
459/* 94 - ^ */ '^',
460/* 95 - _ */ '_',
461/* 96 - ` */ '`',
462/* 97 - a */ 'a',
463/* 98 - b */ 0x08,
464/* 99 - c */ 'c',
465/* 100 - d */ 'd',
466/* 101 - e */ 'e',
467/* 102 - f */ 0x0C,
468/* 103 - g */ 'g',
469/* 104 - h */ 'h',
470/* 105 - i */ 'i',
471/* 106 - j */ 'j',
472/* 107 - k */ 'k',
473/* 108 - l */ 'l',
474/* 109 - m */ 'm',
475/* 110 - n */ 0x0A,
476/* 111 - o */ 'o',
477/* 112 - p */ 'p',
478/* 113 - q */ 'q',
479/* 114 - r */ 0x0D,
480/* 115 - s */ 's',
481/* 116 - t */ 0x09,
482/* 117 - u */ 0,
483/* 118 - v */ 0x0B,
484/* 119 - w */ 'w',
485/* 120 - x */ 0,
486/* 121 - y */ 'y',
487/* 122 - z */ 'z',
488/* 123 - { */ '{',
489/* 124 - | */ '|',
490/* 125 - } */ '}',
491/* 126 - ~ */ '~',
492/* 127 - Delete */ 0
493};
494
495template <typename T>
496Lexer<T>::Lexer(VM& vm, JSParserBuiltinMode builtinMode, JSParserScriptMode scriptMode)
497 : m_isReparsingFunction(false)
498 , m_vm(vm)
499 , m_parsingBuiltinFunction(builtinMode == JSParserBuiltinMode::Builtin)
500 , m_scriptMode(scriptMode)
501{
502}
503
504static inline JSTokenType tokenTypeForIntegerLikeToken(double doubleValue)
505{
506 if ((doubleValue || !std::signbit(doubleValue)) && static_cast<int64_t>(doubleValue) == doubleValue)
507 return INTEGER;
508 return DOUBLE;
509}
510
511template <typename T>
512Lexer<T>::~Lexer()
513{
514}
515
516template <typename T>
517String Lexer<T>::invalidCharacterMessage() const
518{
519 switch (m_current) {
520 case 0:
521 return "Invalid character: '\\0'"_s;
522 case 10:
523 return "Invalid character: '\\n'"_s;
524 case 11:
525 return "Invalid character: '\\v'"_s;
526 case 13:
527 return "Invalid character: '\\r'"_s;
528 case 35:
529 return "Invalid character: '#'"_s;
530 case 64:
531 return "Invalid character: '@'"_s;
532 case 96:
533 return "Invalid character: '`'"_s;
534 default:
535 return makeString("Invalid character '\\u", hex(m_current, 4, Lowercase), '\'');
536 }
537}
538
539template <typename T>
540ALWAYS_INLINE const T* Lexer<T>::currentSourcePtr() const
541{
542 ASSERT(m_code <= m_codeEnd);
543 return m_code;
544}
545
546template <typename T>
547void Lexer<T>::setCode(const SourceCode& source, ParserArena* arena)
548{
549 m_arena = &arena->identifierArena();
550
551 m_lineNumber = source.firstLine().oneBasedInt();
552 m_lastToken = -1;
553
554 StringView sourceString = source.provider()->source();
555
556 if (!sourceString.isNull())
557 setCodeStart(sourceString);
558 else
559 m_codeStart = 0;
560
561 m_source = &source;
562 m_sourceOffset = source.startOffset();
563 m_codeStartPlusOffset = m_codeStart + source.startOffset();
564 m_code = m_codeStartPlusOffset;
565 m_codeEnd = m_codeStart + source.endOffset();
566 m_error = false;
567 m_atLineStart = true;
568 m_lineStart = m_code;
569 m_lexErrorMessage = String();
570 m_sourceURLDirective = String();
571 m_sourceMappingURLDirective = String();
572
573 m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
574 m_buffer16.reserveInitialCapacity(initialReadBufferCapacity);
575 m_bufferForRawTemplateString16.reserveInitialCapacity(initialReadBufferCapacity);
576
577 if (LIKELY(m_code < m_codeEnd))
578 m_current = *m_code;
579 else
580 m_current = 0;
581 ASSERT(currentOffset() == source.startOffset());
582}
583
584template <typename T>
585template <int shiftAmount> ALWAYS_INLINE void Lexer<T>::internalShift()
586{
587 m_code += shiftAmount;
588 ASSERT(currentOffset() >= currentLineStartOffset());
589 m_current = *m_code;
590}
591
592template <typename T>
593ALWAYS_INLINE void Lexer<T>::shift()
594{
595 // At one point timing showed that setting m_current to 0 unconditionally was faster than an if-else sequence.
596 m_current = 0;
597 ++m_code;
598 if (LIKELY(m_code < m_codeEnd))
599 m_current = *m_code;
600}
601
602template <typename T>
603ALWAYS_INLINE bool Lexer<T>::atEnd() const
604{
605 ASSERT(!m_current || m_code < m_codeEnd);
606 return UNLIKELY(UNLIKELY(!m_current) && m_code == m_codeEnd);
607}
608
609template <typename T>
610ALWAYS_INLINE T Lexer<T>::peek(int offset) const
611{
612 ASSERT(offset > 0 && offset < 5);
613 const T* code = m_code + offset;
614 return (code < m_codeEnd) ? *code : 0;
615}
616
617struct ParsedUnicodeEscapeValue {
618 ParsedUnicodeEscapeValue(UChar32 value)
619 : m_value(value)
620 {
621 ASSERT(isValid());
622 }
623
624 enum SpecialValueType { Incomplete = -2, Invalid = -1 };
625 ParsedUnicodeEscapeValue(SpecialValueType type)
626 : m_value(type)
627 {
628 }
629
630 bool isValid() const { return m_value >= 0; }
631 bool isIncomplete() const { return m_value == Incomplete; }
632
633 UChar32 value() const
634 {
635 ASSERT(isValid());
636 return m_value;
637 }
638
639private:
640 UChar32 m_value;
641};
642
643template<typename CharacterType>
644ParsedUnicodeEscapeValue Lexer<CharacterType>::parseUnicodeEscape()
645{
646 if (m_current == '{') {
647 shift();
648 UChar32 codePoint = 0;
649 do {
650 if (!isASCIIHexDigit(m_current))
651 return m_current ? ParsedUnicodeEscapeValue::Invalid : ParsedUnicodeEscapeValue::Incomplete;
652 codePoint = (codePoint << 4) | toASCIIHexValue(m_current);
653 if (codePoint > UCHAR_MAX_VALUE) {
654 // For raw template literal syntax, we consume `NotEscapeSequence`.
655 // Here, we consume NotCodePoint's HexDigits.
656 //
657 // NotEscapeSequence ::
658 // u { [lookahread not one of HexDigit]
659 // u { NotCodePoint
660 // u { CodePoint [lookahead != }]
661 //
662 // NotCodePoint ::
663 // HexDigits but not if MV of HexDigits <= 0x10FFFF
664 //
665 // CodePoint ::
666 // HexDigits but not if MV of HexDigits > 0x10FFFF
667 shift();
668 while (isASCIIHexDigit(m_current))
669 shift();
670
671 return atEnd() ? ParsedUnicodeEscapeValue::Incomplete : ParsedUnicodeEscapeValue::Invalid;
672 }
673 shift();
674 } while (m_current != '}');
675 shift();
676 return codePoint;
677 }
678
679 auto character2 = peek(1);
680 auto character3 = peek(2);
681 auto character4 = peek(3);
682 if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(character2) || !isASCIIHexDigit(character3) || !isASCIIHexDigit(character4))) {
683 auto result = (m_code + 4) >= m_codeEnd ? ParsedUnicodeEscapeValue::Incomplete : ParsedUnicodeEscapeValue::Invalid;
684
685 // For raw template literal syntax, we consume `NotEscapeSequence`.
686 //
687 // NotEscapeSequence ::
688 // u [lookahead not one of HexDigit][lookahead != {]
689 // u HexDigit [lookahead not one of HexDigit]
690 // u HexDigit HexDigit [lookahead not one of HexDigit]
691 // u HexDigit HexDigit HexDigit [lookahead not one of HexDigit]
692 while (isASCIIHexDigit(m_current))
693 shift();
694
695 return result;
696 }
697
698 auto result = convertUnicode(m_current, character2, character3, character4);
699 shift();
700 shift();
701 shift();
702 shift();
703 return result;
704}
705
706template <typename T>
707void Lexer<T>::shiftLineTerminator()
708{
709 ASSERT(isLineTerminator(m_current));
710
711 m_positionBeforeLastNewline = currentPosition();
712 T prev = m_current;
713 shift();
714
715 if (prev == '\r' && m_current == '\n')
716 shift();
717
718 ++m_lineNumber;
719}
720
721template <typename T>
722ALWAYS_INLINE bool Lexer<T>::lastTokenWasRestrKeyword() const
723{
724 return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW;
725}
726
727template <typename T>
728ALWAYS_INLINE void Lexer<T>::skipWhitespace()
729{
730 while (isWhiteSpace(m_current))
731 shift();
732}
733
734static NEVER_INLINE bool isNonLatin1IdentStart(UChar c)
735{
736 return u_hasBinaryProperty(c, UCHAR_ID_START);
737}
738
739static inline bool isIdentStart(LChar c)
740{
741 return typesOfLatin1Characters[c] == CharacterIdentifierStart;
742}
743
744static inline bool isIdentStart(UChar32 c)
745{
746 return isLatin1(c) ? isIdentStart(static_cast<LChar>(c)) : isNonLatin1IdentStart(c);
747}
748
749static NEVER_INLINE bool isNonLatin1IdentPart(UChar32 c)
750{
751 return u_hasBinaryProperty(c, UCHAR_ID_CONTINUE) || c == 0x200C || c == 0x200D;
752}
753
754static ALWAYS_INLINE bool isIdentPart(LChar c)
755{
756 // Character types are divided into two groups depending on whether they can be part of an
757 // identifier or not. Those whose type value is less or equal than CharacterOtherIdentifierPart can be
758 // part of an identifier. (See the CharacterType definition for more details.)
759 return typesOfLatin1Characters[c] <= CharacterOtherIdentifierPart;
760}
761
762static ALWAYS_INLINE bool isIdentPart(UChar32 c)
763{
764 return isLatin1(c) ? isIdentPart(static_cast<LChar>(c)) : isNonLatin1IdentPart(c);
765}
766
767static ALWAYS_INLINE bool isIdentPart(UChar c)
768{
769 return isIdentPart(static_cast<UChar32>(c));
770}
771
772template<typename CharacterType> ALWAYS_INLINE bool isIdentPartIncludingEscapeTemplate(const CharacterType* code, const CharacterType* codeEnd)
773{
774 if (isIdentPart(code[0]))
775 return true;
776
777 // Shortest sequence handled below is \u{0}, which is 5 characters.
778 if (!(code[0] == '\\' && codeEnd - code >= 5 && code[1] == 'u'))
779 return false;
780
781 if (code[2] == '{') {
782 UChar32 codePoint = 0;
783 const CharacterType* pointer;
784 for (pointer = &code[3]; pointer < codeEnd; ++pointer) {
785 auto digit = *pointer;
786 if (!isASCIIHexDigit(digit))
787 break;
788 codePoint = (codePoint << 4) | toASCIIHexValue(digit);
789 if (codePoint > UCHAR_MAX_VALUE)
790 return false;
791 }
792 return isIdentPart(codePoint) && pointer < codeEnd && *pointer == '}';
793 }
794
795 // Shortest sequence handled below is \uXXXX, which is 6 characters.
796 if (codeEnd - code < 6)
797 return false;
798
799 auto character1 = code[2];
800 auto character2 = code[3];
801 auto character3 = code[4];
802 auto character4 = code[5];
803 return isASCIIHexDigit(character1) && isASCIIHexDigit(character2) && isASCIIHexDigit(character3) && isASCIIHexDigit(character4)
804 && isIdentPart(Lexer<LChar>::convertUnicode(character1, character2, character3, character4));
805}
806
807static ALWAYS_INLINE bool isIdentPartIncludingEscape(const LChar* code, const LChar* codeEnd)
808{
809 return isIdentPartIncludingEscapeTemplate(code, codeEnd);
810}
811
812static ALWAYS_INLINE bool isIdentPartIncludingEscape(const UChar* code, const UChar* codeEnd)
813{
814 return isIdentPartIncludingEscapeTemplate(code, codeEnd);
815}
816
817template<typename CharacterType>
818static inline bool isASCIIDigitOrSeparator(CharacterType character)
819{
820 return isASCIIDigit(character) || character == '_';
821}
822
823template<typename CharacterType>
824static inline bool isASCIIHexDigitOrSeparator(CharacterType character)
825{
826 return isASCIIHexDigit(character) || character == '_';
827}
828
829template<typename CharacterType>
830static inline bool isASCIIBinaryDigitOrSeparator(CharacterType character)
831{
832 return isASCIIBinaryDigit(character) || character == '_';
833}
834
835template<typename CharacterType>
836static inline bool isASCIIOctalDigitOrSeparator(CharacterType character)
837{
838 return isASCIIOctalDigit(character) || character == '_';
839}
840
841static inline LChar singleEscape(int c)
842{
843 if (c < 128) {
844 ASSERT(static_cast<size_t>(c) < WTF_ARRAY_LENGTH(singleCharacterEscapeValuesForASCII));
845 return singleCharacterEscapeValuesForASCII[c];
846 }
847 return 0;
848}
849
850template <typename T>
851inline void Lexer<T>::record8(int c)
852{
853 ASSERT(isLatin1(c));
854 m_buffer8.append(static_cast<LChar>(c));
855}
856
857template <typename T>
858inline void Lexer<T>::append8(const T* p, size_t length)
859{
860 size_t currentSize = m_buffer8.size();
861 m_buffer8.grow(currentSize + length);
862 LChar* rawBuffer = m_buffer8.data() + currentSize;
863
864 for (size_t i = 0; i < length; i++) {
865 T c = p[i];
866 ASSERT(isLatin1(c));
867 rawBuffer[i] = c;
868 }
869}
870
871template <typename T>
872inline void Lexer<T>::append16(const LChar* p, size_t length)
873{
874 size_t currentSize = m_buffer16.size();
875 m_buffer16.grow(currentSize + length);
876 UChar* rawBuffer = m_buffer16.data() + currentSize;
877
878 for (size_t i = 0; i < length; i++)
879 rawBuffer[i] = p[i];
880}
881
882template <typename T>
883inline void Lexer<T>::record16(T c)
884{
885 m_buffer16.append(c);
886}
887
888template <typename T>
889inline void Lexer<T>::record16(int c)
890{
891 ASSERT(c >= 0);
892 ASSERT(c <= static_cast<int>(USHRT_MAX));
893 m_buffer16.append(static_cast<UChar>(c));
894}
895
896template<typename CharacterType> inline void Lexer<CharacterType>::recordUnicodeCodePoint(UChar32 codePoint)
897{
898 ASSERT(codePoint >= 0);
899 ASSERT(codePoint <= UCHAR_MAX_VALUE);
900 if (U_IS_BMP(codePoint))
901 record16(codePoint);
902 else {
903 UChar codeUnits[2] = { U16_LEAD(codePoint), U16_TRAIL(codePoint) };
904 append16(codeUnits, 2);
905 }
906}
907
908#if !ASSERT_DISABLED
909bool isSafeBuiltinIdentifier(VM& vm, const Identifier* ident)
910{
911 if (!ident)
912 return true;
913 /* Just block any use of suspicious identifiers. This is intended to
914 * be used as a safety net while implementing builtins.
915 */
916 // FIXME: How can a debug-only assertion be a safety net?
917 if (*ident == vm.propertyNames->builtinNames().callPublicName())
918 return false;
919 if (*ident == vm.propertyNames->builtinNames().applyPublicName())
920 return false;
921 if (*ident == vm.propertyNames->eval)
922 return false;
923 if (*ident == vm.propertyNames->Function)
924 return false;
925 return true;
926}
927#endif
928
929template <>
930template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<LChar>::parseIdentifier(JSTokenData* tokenData, OptionSet<LexerFlags> lexerFlags, bool strictMode)
931{
932 tokenData->escaped = false;
933 const ptrdiff_t remaining = m_codeEnd - m_code;
934 if ((remaining >= maxTokenLength) && !lexerFlags.contains(LexerFlags::IgnoreReservedWords)) {
935 JSTokenType keyword = parseKeyword<shouldCreateIdentifier>(tokenData);
936 if (keyword != IDENT) {
937 ASSERT((!shouldCreateIdentifier) || tokenData->ident);
938 return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword;
939 }
940 }
941
942 bool isPrivateName = m_current == '@' && m_parsingBuiltinFunction;
943 if (isPrivateName)
944 shift();
945
946 const LChar* identifierStart = currentSourcePtr();
947 unsigned identifierLineStart = currentLineStartOffset();
948
949 while (isIdentPart(m_current))
950 shift();
951
952 if (UNLIKELY(m_current == '\\')) {
953 setOffsetFromSourcePtr(identifierStart, identifierLineStart);
954 return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode);
955 }
956
957 const Identifier* ident = nullptr;
958
959 if (shouldCreateIdentifier || m_parsingBuiltinFunction) {
960 int identifierLength = currentSourcePtr() - identifierStart;
961 ident = makeIdentifier(identifierStart, identifierLength);
962 if (m_parsingBuiltinFunction) {
963 if (!isSafeBuiltinIdentifier(m_vm, ident) && !isPrivateName) {
964 m_lexErrorMessage = makeString("The use of '", ident->string(), "' is disallowed in builtin functions.");
965 return ERRORTOK;
966 }
967 if (isPrivateName)
968 ident = &m_arena->makeIdentifier(m_vm, m_vm.propertyNames->lookUpPrivateName(*ident));
969 else if (*ident == m_vm.propertyNames->undefinedKeyword)
970 tokenData->ident = &m_vm.propertyNames->undefinedPrivateName;
971 if (!ident)
972 return INVALID_PRIVATE_NAME_ERRORTOK;
973 }
974 tokenData->ident = ident;
975 } else
976 tokenData->ident = nullptr;
977
978 if (UNLIKELY((remaining < maxTokenLength) && !lexerFlags.contains(LexerFlags::IgnoreReservedWords)) && !isPrivateName) {
979 ASSERT(shouldCreateIdentifier);
980 if (remaining < maxTokenLength) {
981 const HashTableValue* entry = JSC::mainTable.entry(*ident);
982 ASSERT((remaining < maxTokenLength) || !entry);
983 if (!entry)
984 return IDENT;
985 JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
986 return (token != RESERVED_IF_STRICT) || strictMode ? token : IDENT;
987 }
988 return IDENT;
989 }
990
991 return IDENT;
992}
993
994template <>
995template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<UChar>::parseIdentifier(JSTokenData* tokenData, OptionSet<LexerFlags> lexerFlags, bool strictMode)
996{
997 tokenData->escaped = false;
998 const ptrdiff_t remaining = m_codeEnd - m_code;
999 if ((remaining >= maxTokenLength) && !lexerFlags.contains(LexerFlags::IgnoreReservedWords)) {
1000 JSTokenType keyword = parseKeyword<shouldCreateIdentifier>(tokenData);
1001 if (keyword != IDENT) {
1002 ASSERT((!shouldCreateIdentifier) || tokenData->ident);
1003 return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword;
1004 }
1005 }
1006
1007 bool isPrivateName = m_current == '@' && m_parsingBuiltinFunction;
1008 if (isPrivateName)
1009 shift();
1010
1011 const UChar* identifierStart = currentSourcePtr();
1012 int identifierLineStart = currentLineStartOffset();
1013
1014 UChar orAllChars = 0;
1015
1016 while (isIdentPart(m_current)) {
1017 orAllChars |= m_current;
1018 shift();
1019 }
1020
1021 if (UNLIKELY(m_current == '\\')) {
1022 ASSERT(!isPrivateName);
1023 setOffsetFromSourcePtr(identifierStart, identifierLineStart);
1024 return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode);
1025 }
1026
1027 bool isAll8Bit = false;
1028
1029 if (!(orAllChars & ~0xff))
1030 isAll8Bit = true;
1031
1032 const Identifier* ident = nullptr;
1033
1034 if (shouldCreateIdentifier || m_parsingBuiltinFunction) {
1035 int identifierLength = currentSourcePtr() - identifierStart;
1036 if (isAll8Bit)
1037 ident = makeIdentifierLCharFromUChar(identifierStart, identifierLength);
1038 else
1039 ident = makeIdentifier(identifierStart, identifierLength);
1040 if (m_parsingBuiltinFunction) {
1041 if (!isSafeBuiltinIdentifier(m_vm, ident) && !isPrivateName) {
1042 m_lexErrorMessage = makeString("The use of '", ident->string(), "' is disallowed in builtin functions.");
1043 return ERRORTOK;
1044 }
1045 if (isPrivateName)
1046 ident = &m_arena->makeIdentifier(m_vm, m_vm.propertyNames->lookUpPrivateName(*ident));
1047 else if (*ident == m_vm.propertyNames->undefinedKeyword)
1048 tokenData->ident = &m_vm.propertyNames->undefinedPrivateName;
1049 if (!ident)
1050 return INVALID_PRIVATE_NAME_ERRORTOK;
1051 }
1052 tokenData->ident = ident;
1053 } else
1054 tokenData->ident = nullptr;
1055
1056 if (UNLIKELY((remaining < maxTokenLength) && !lexerFlags.contains(LexerFlags::IgnoreReservedWords)) && !isPrivateName) {
1057 ASSERT(shouldCreateIdentifier);
1058 if (remaining < maxTokenLength) {
1059 const HashTableValue* entry = JSC::mainTable.entry(*ident);
1060 ASSERT((remaining < maxTokenLength) || !entry);
1061 if (!entry)
1062 return IDENT;
1063 JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
1064 return (token != RESERVED_IF_STRICT) || strictMode ? token : IDENT;
1065 }
1066 return IDENT;
1067 }
1068
1069 return IDENT;
1070}
1071
1072template<typename CharacterType> template<bool shouldCreateIdentifier> JSTokenType Lexer<CharacterType>::parseIdentifierSlowCase(JSTokenData* tokenData, OptionSet<LexerFlags> lexerFlags, bool strictMode)
1073{
1074 tokenData->escaped = true;
1075 auto identifierStart = currentSourcePtr();
1076 bool bufferRequired = false;
1077
1078 while (true) {
1079 if (LIKELY(isIdentPart(m_current))) {
1080 shift();
1081 continue;
1082 }
1083 if (LIKELY(m_current != '\\'))
1084 break;
1085
1086 // \uXXXX unicode characters.
1087 bufferRequired = true;
1088 if (identifierStart != currentSourcePtr())
1089 m_buffer16.append(identifierStart, currentSourcePtr() - identifierStart);
1090 shift();
1091 if (UNLIKELY(m_current != 'u'))
1092 return atEnd() ? UNTERMINATED_IDENTIFIER_ESCAPE_ERRORTOK : INVALID_IDENTIFIER_ESCAPE_ERRORTOK;
1093 shift();
1094 auto character = parseUnicodeEscape();
1095 if (UNLIKELY(!character.isValid()))
1096 return character.isIncomplete() ? UNTERMINATED_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK : INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK;
1097 if (UNLIKELY(m_buffer16.size() ? !isIdentPart(character.value()) : !isIdentStart(character.value())))
1098 return INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK;
1099 if (shouldCreateIdentifier)
1100 recordUnicodeCodePoint(character.value());
1101 identifierStart = currentSourcePtr();
1102 }
1103
1104 int identifierLength;
1105 const Identifier* ident = nullptr;
1106 if (shouldCreateIdentifier) {
1107 if (!bufferRequired) {
1108 identifierLength = currentSourcePtr() - identifierStart;
1109 ident = makeIdentifier(identifierStart, identifierLength);
1110 } else {
1111 if (identifierStart != currentSourcePtr())
1112 m_buffer16.append(identifierStart, currentSourcePtr() - identifierStart);
1113 ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
1114 }
1115
1116 tokenData->ident = ident;
1117 } else
1118 tokenData->ident = nullptr;
1119
1120 m_buffer16.shrink(0);
1121
1122 if (LIKELY(!lexerFlags.contains(LexerFlags::IgnoreReservedWords))) {
1123 ASSERT(shouldCreateIdentifier);
1124 const HashTableValue* entry = JSC::mainTable.entry(*ident);
1125 if (!entry)
1126 return IDENT;
1127 JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
1128 if ((token != RESERVED_IF_STRICT) || strictMode)
1129 return bufferRequired ? UNEXPECTED_ESCAPE_ERRORTOK : token;
1130 }
1131
1132 return IDENT;
1133}
1134
1135static ALWAYS_INLINE bool characterRequiresParseStringSlowCase(LChar character)
1136{
1137 return character < 0xE;
1138}
1139
1140static ALWAYS_INLINE bool characterRequiresParseStringSlowCase(UChar character)
1141{
1142 return character < 0xE || !isLatin1(character);
1143}
1144
1145template <typename T>
1146template <bool shouldBuildStrings> ALWAYS_INLINE typename Lexer<T>::StringParseResult Lexer<T>::parseString(JSTokenData* tokenData, bool strictMode)
1147{
1148 int startingOffset = currentOffset();
1149 int startingLineStartOffset = currentLineStartOffset();
1150 int startingLineNumber = lineNumber();
1151 T stringQuoteCharacter = m_current;
1152 shift();
1153
1154 const T* stringStart = currentSourcePtr();
1155
1156 while (m_current != stringQuoteCharacter) {
1157 if (UNLIKELY(m_current == '\\')) {
1158 if (stringStart != currentSourcePtr() && shouldBuildStrings)
1159 append8(stringStart, currentSourcePtr() - stringStart);
1160 shift();
1161
1162 LChar escape = singleEscape(m_current);
1163
1164 // Most common escape sequences first.
1165 if (escape) {
1166 if (shouldBuildStrings)
1167 record8(escape);
1168 shift();
1169 } else if (UNLIKELY(isLineTerminator(m_current)))
1170 shiftLineTerminator();
1171 else if (m_current == 'x') {
1172 shift();
1173 if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(peek(1))) {
1174 m_lexErrorMessage = "\\x can only be followed by a hex character sequence"_s;
1175 return (atEnd() || (isASCIIHexDigit(m_current) && (m_code + 1 == m_codeEnd))) ? StringUnterminated : StringCannotBeParsed;
1176 }
1177 T prev = m_current;
1178 shift();
1179 if (shouldBuildStrings)
1180 record8(convertHex(prev, m_current));
1181 shift();
1182 } else {
1183 setOffset(startingOffset, startingLineStartOffset);
1184 setLineNumber(startingLineNumber);
1185 m_buffer8.shrink(0);
1186 return parseStringSlowCase<shouldBuildStrings>(tokenData, strictMode);
1187 }
1188 stringStart = currentSourcePtr();
1189 continue;
1190 }
1191
1192 if (UNLIKELY(characterRequiresParseStringSlowCase(m_current))) {
1193 setOffset(startingOffset, startingLineStartOffset);
1194 setLineNumber(startingLineNumber);
1195 m_buffer8.shrink(0);
1196 return parseStringSlowCase<shouldBuildStrings>(tokenData, strictMode);
1197 }
1198
1199 shift();
1200 }
1201
1202 if (currentSourcePtr() != stringStart && shouldBuildStrings)
1203 append8(stringStart, currentSourcePtr() - stringStart);
1204 if (shouldBuildStrings) {
1205 tokenData->ident = makeIdentifier(m_buffer8.data(), m_buffer8.size());
1206 m_buffer8.shrink(0);
1207 } else
1208 tokenData->ident = 0;
1209
1210 return StringParsedSuccessfully;
1211}
1212
1213template <typename T>
1214template <bool shouldBuildStrings, LexerEscapeParseMode escapeParseMode> ALWAYS_INLINE auto Lexer<T>::parseComplexEscape(bool strictMode, T stringQuoteCharacter) -> StringParseResult
1215{
1216 if (m_current == 'x') {
1217 shift();
1218 if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(peek(1))) {
1219 // For raw template literal syntax, we consume `NotEscapeSequence`.
1220 //
1221 // NotEscapeSequence ::
1222 // x [lookahread not one of HexDigit]
1223 // x HexDigit [lookahread not one of HexDigit]
1224 if (isASCIIHexDigit(m_current))
1225 shift();
1226 ASSERT(!isASCIIHexDigit(m_current));
1227
1228 m_lexErrorMessage = "\\x can only be followed by a hex character sequence"_s;
1229 return atEnd() ? StringUnterminated : StringCannotBeParsed;
1230 }
1231
1232 T prev = m_current;
1233 shift();
1234 if (shouldBuildStrings)
1235 record16(convertHex(prev, m_current));
1236 shift();
1237
1238 return StringParsedSuccessfully;
1239 }
1240
1241 if (m_current == 'u') {
1242 shift();
1243
1244 if (escapeParseMode == LexerEscapeParseMode::String && m_current == stringQuoteCharacter) {
1245 if (shouldBuildStrings)
1246 record16('u');
1247 return StringParsedSuccessfully;
1248 }
1249
1250 auto character = parseUnicodeEscape();
1251 if (character.isValid()) {
1252 if (shouldBuildStrings)
1253 recordUnicodeCodePoint(character.value());
1254 return StringParsedSuccessfully;
1255 }
1256
1257 m_lexErrorMessage = "\\u can only be followed by a Unicode character sequence"_s;
1258 return atEnd() ? StringUnterminated : StringCannotBeParsed;
1259 }
1260
1261 if (strictMode) {
1262 if (isASCIIDigit(m_current)) {
1263 // The only valid numeric escape in strict mode is '\0', and this must not be followed by a decimal digit.
1264 int character1 = m_current;
1265 shift();
1266 if (character1 != '0' || isASCIIDigit(m_current)) {
1267 // For raw template literal syntax, we consume `NotEscapeSequence`.
1268 //
1269 // NotEscapeSequence ::
1270 // 0 DecimalDigit
1271 // DecimalDigit but not 0
1272 if (character1 == '0')
1273 shift();
1274
1275 m_lexErrorMessage = "The only valid numeric escape in strict mode is '\\0'"_s;
1276 return atEnd() ? StringUnterminated : StringCannotBeParsed;
1277 }
1278 if (shouldBuildStrings)
1279 record16(0);
1280 return StringParsedSuccessfully;
1281 }
1282 } else {
1283 if (isASCIIOctalDigit(m_current)) {
1284 // Octal character sequences
1285 T character1 = m_current;
1286 shift();
1287 if (isASCIIOctalDigit(m_current)) {
1288 // Two octal characters
1289 T character2 = m_current;
1290 shift();
1291 if (character1 >= '0' && character1 <= '3' && isASCIIOctalDigit(m_current)) {
1292 if (shouldBuildStrings)
1293 record16((character1 - '0') * 64 + (character2 - '0') * 8 + m_current - '0');
1294 shift();
1295 } else {
1296 if (shouldBuildStrings)
1297 record16((character1 - '0') * 8 + character2 - '0');
1298 }
1299 } else {
1300 if (shouldBuildStrings)
1301 record16(character1 - '0');
1302 }
1303 return StringParsedSuccessfully;
1304 }
1305 }
1306
1307 if (!atEnd()) {
1308 if (shouldBuildStrings)
1309 record16(m_current);
1310 shift();
1311 return StringParsedSuccessfully;
1312 }
1313
1314 m_lexErrorMessage = "Unterminated string constant"_s;
1315 return StringUnterminated;
1316}
1317
1318template <typename T>
1319template <bool shouldBuildStrings> auto Lexer<T>::parseStringSlowCase(JSTokenData* tokenData, bool strictMode) -> StringParseResult
1320{
1321 T stringQuoteCharacter = m_current;
1322 shift();
1323
1324 const T* stringStart = currentSourcePtr();
1325
1326 while (m_current != stringQuoteCharacter) {
1327 if (UNLIKELY(m_current == '\\')) {
1328 if (stringStart != currentSourcePtr() && shouldBuildStrings)
1329 append16(stringStart, currentSourcePtr() - stringStart);
1330 shift();
1331
1332 LChar escape = singleEscape(m_current);
1333
1334 // Most common escape sequences first
1335 if (escape) {
1336 if (shouldBuildStrings)
1337 record16(escape);
1338 shift();
1339 } else if (UNLIKELY(isLineTerminator(m_current)))
1340 shiftLineTerminator();
1341 else {
1342 StringParseResult result = parseComplexEscape<shouldBuildStrings, LexerEscapeParseMode::String>(strictMode, stringQuoteCharacter);
1343 if (result != StringParsedSuccessfully)
1344 return result;
1345 }
1346
1347 stringStart = currentSourcePtr();
1348 continue;
1349 }
1350 // Fast check for characters that require special handling.
1351 // Catches 0, \n, and \r as efficiently as possible, and lets through all common ASCII characters.
1352 static_assert(std::is_unsigned<T>::value, "Lexer expects an unsigned character type");
1353 if (UNLIKELY(m_current < 0xE)) {
1354 // New-line or end of input is not allowed
1355 if (atEnd() || m_current == '\r' || m_current == '\n') {
1356 m_lexErrorMessage = "Unexpected EOF"_s;
1357 return atEnd() ? StringUnterminated : StringCannotBeParsed;
1358 }
1359 // Anything else is just a normal character
1360 }
1361 shift();
1362 }
1363
1364 if (currentSourcePtr() != stringStart && shouldBuildStrings)
1365 append16(stringStart, currentSourcePtr() - stringStart);
1366 if (shouldBuildStrings)
1367 tokenData->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
1368 else
1369 tokenData->ident = 0;
1370
1371 m_buffer16.shrink(0);
1372 return StringParsedSuccessfully;
1373}
1374
1375template <typename T>
1376typename Lexer<T>::StringParseResult Lexer<T>::parseTemplateLiteral(JSTokenData* tokenData, RawStringsBuildMode rawStringsBuildMode)
1377{
1378 bool parseCookedFailed = false;
1379 const T* stringStart = currentSourcePtr();
1380 const T* rawStringStart = currentSourcePtr();
1381
1382 while (m_current != '`') {
1383 if (UNLIKELY(m_current == '\\')) {
1384 if (stringStart != currentSourcePtr())
1385 append16(stringStart, currentSourcePtr() - stringStart);
1386 shift();
1387
1388 LChar escape = singleEscape(m_current);
1389
1390 // Most common escape sequences first.
1391 if (escape) {
1392 record16(escape);
1393 shift();
1394 } else if (UNLIKELY(isLineTerminator(m_current))) {
1395 // Normalize <CR>, <CR><LF> to <LF>.
1396 if (m_current == '\r') {
1397 ASSERT_WITH_MESSAGE(rawStringStart != currentSourcePtr(), "We should have at least shifted the escape.");
1398
1399 if (rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings) {
1400 m_bufferForRawTemplateString16.append(rawStringStart, currentSourcePtr() - rawStringStart);
1401 m_bufferForRawTemplateString16.append('\n');
1402 }
1403
1404 shiftLineTerminator();
1405 rawStringStart = currentSourcePtr();
1406 } else
1407 shiftLineTerminator();
1408 } else {
1409 bool strictMode = true;
1410 StringParseResult result = parseComplexEscape<true, LexerEscapeParseMode::Template>(strictMode, '`');
1411 if (result != StringParsedSuccessfully) {
1412 if (rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings && result == StringCannotBeParsed)
1413 parseCookedFailed = true;
1414 else
1415 return result;
1416 }
1417 }
1418
1419 stringStart = currentSourcePtr();
1420 continue;
1421 }
1422
1423 if (m_current == '$' && peek(1) == '{')
1424 break;
1425
1426 // Fast check for characters that require special handling.
1427 // Catches 0, \n, \r, 0x2028, and 0x2029 as efficiently
1428 // as possible, and lets through all common ASCII characters.
1429 if (UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
1430 // End of input is not allowed.
1431 // Unlike String, line terminator is allowed.
1432 if (atEnd()) {
1433 m_lexErrorMessage = "Unexpected EOF"_s;
1434 return StringUnterminated;
1435 }
1436
1437 if (isLineTerminator(m_current)) {
1438 if (m_current == '\r') {
1439 // Normalize <CR>, <CR><LF> to <LF>.
1440 if (stringStart != currentSourcePtr())
1441 append16(stringStart, currentSourcePtr() - stringStart);
1442 if (rawStringStart != currentSourcePtr() && rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
1443 m_bufferForRawTemplateString16.append(rawStringStart, currentSourcePtr() - rawStringStart);
1444
1445 record16('\n');
1446 if (rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
1447 m_bufferForRawTemplateString16.append('\n');
1448 shiftLineTerminator();
1449 stringStart = currentSourcePtr();
1450 rawStringStart = currentSourcePtr();
1451 } else
1452 shiftLineTerminator();
1453 continue;
1454 }
1455 // Anything else is just a normal character
1456 }
1457
1458 shift();
1459 }
1460
1461 bool isTail = m_current == '`';
1462
1463 if (currentSourcePtr() != stringStart)
1464 append16(stringStart, currentSourcePtr() - stringStart);
1465 if (rawStringStart != currentSourcePtr() && rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
1466 m_bufferForRawTemplateString16.append(rawStringStart, currentSourcePtr() - rawStringStart);
1467
1468 if (!parseCookedFailed)
1469 tokenData->cooked = makeIdentifier(m_buffer16.data(), m_buffer16.size());
1470 else
1471 tokenData->cooked = nullptr;
1472
1473 // Line terminator normalization (e.g. <CR> => <LF>) should be applied to both the raw and cooked representations.
1474 if (rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
1475 tokenData->raw = makeIdentifier(m_bufferForRawTemplateString16.data(), m_bufferForRawTemplateString16.size());
1476 else
1477 tokenData->raw = nullptr;
1478
1479 tokenData->isTail = isTail;
1480
1481 m_buffer16.shrink(0);
1482 m_bufferForRawTemplateString16.shrink(0);
1483
1484 if (isTail) {
1485 // Skip `
1486 shift();
1487 } else {
1488 // Skip $ and {
1489 shift();
1490 shift();
1491 }
1492
1493 return StringParsedSuccessfully;
1494}
1495
1496template <typename T>
1497ALWAYS_INLINE auto Lexer<T>::parseHex() -> Optional<NumberParseResult>
1498{
1499 ASSERT(isASCIIHexDigit(m_current));
1500
1501 // Optimization: most hexadecimal values fit into 4 bytes.
1502 uint32_t hexValue = 0;
1503 int maximumDigits = 7;
1504
1505 do {
1506 if (m_current == '_') {
1507 if (UNLIKELY(!isASCIIHexDigit(peek(1))))
1508 return WTF::nullopt;
1509
1510 shift();
1511 }
1512
1513 hexValue = (hexValue << 4) + toASCIIHexValue(m_current);
1514 shift();
1515 --maximumDigits;
1516 } while (isASCIIHexDigitOrSeparator(m_current) && maximumDigits >= 0);
1517
1518 if (LIKELY(maximumDigits >= 0 && m_current != 'n'))
1519 return NumberParseResult { hexValue };
1520
1521 // No more place in the hexValue buffer.
1522 // The values are shifted out and placed into the m_buffer8 vector.
1523 for (int i = 0; i < 8; ++i) {
1524 int digit = hexValue >> 28;
1525 if (digit < 10)
1526 record8(digit + '0');
1527 else
1528 record8(digit - 10 + 'a');
1529 hexValue <<= 4;
1530 }
1531
1532 while (isASCIIHexDigitOrSeparator(m_current)) {
1533 if (m_current == '_') {
1534 if (UNLIKELY(!isASCIIHexDigit(peek(1))))
1535 return WTF::nullopt;
1536
1537 shift();
1538 }
1539
1540 record8(m_current);
1541 shift();
1542 }
1543
1544 if (UNLIKELY(Options::useBigInt() && m_current == 'n'))
1545 return NumberParseResult { makeIdentifier(m_buffer8.data(), m_buffer8.size()) };
1546
1547 return NumberParseResult { parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 16) };
1548}
1549
1550template <typename T>
1551ALWAYS_INLINE auto Lexer<T>::parseBinary() -> Optional<NumberParseResult>
1552{
1553 ASSERT(isASCIIBinaryDigit(m_current));
1554
1555 // Optimization: most binary values fit into 4 bytes.
1556 uint32_t binaryValue = 0;
1557 const unsigned maximumDigits = 32;
1558 int digit = maximumDigits - 1;
1559 // Temporary buffer for the digits. Makes easier
1560 // to reconstruct the input characters when needed.
1561 LChar digits[maximumDigits];
1562
1563 do {
1564 if (m_current == '_') {
1565 if (UNLIKELY(!isASCIIBinaryDigit(peek(1))))
1566 return WTF::nullopt;
1567
1568 shift();
1569 }
1570
1571 binaryValue = (binaryValue << 1) + (m_current - '0');
1572 digits[digit] = m_current;
1573 shift();
1574 --digit;
1575 } while (isASCIIBinaryDigitOrSeparator(m_current) && digit >= 0);
1576
1577 if (LIKELY(!isASCIIDigitOrSeparator(m_current) && digit >= 0 && m_current != 'n'))
1578 return NumberParseResult { binaryValue };
1579
1580 for (int i = maximumDigits - 1; i > digit; --i)
1581 record8(digits[i]);
1582
1583 while (isASCIIBinaryDigitOrSeparator(m_current)) {
1584 if (m_current == '_') {
1585 if (UNLIKELY(!isASCIIBinaryDigit(peek(1))))
1586 return WTF::nullopt;
1587
1588 shift();
1589 }
1590
1591 record8(m_current);
1592 shift();
1593 }
1594
1595 if (UNLIKELY(Options::useBigInt() && m_current == 'n'))
1596 return NumberParseResult { makeIdentifier(m_buffer8.data(), m_buffer8.size()) };
1597
1598 if (isASCIIDigit(m_current))
1599 return WTF::nullopt;
1600
1601 return NumberParseResult { parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 2) };
1602}
1603
1604template <typename T>
1605ALWAYS_INLINE auto Lexer<T>::parseOctal() -> Optional<NumberParseResult>
1606{
1607 ASSERT(isASCIIOctalDigit(m_current));
1608 ASSERT(!m_buffer8.size() || (m_buffer8.size() == 1 && m_buffer8[0] == '0'));
1609 bool isLegacyLiteral = m_buffer8.size();
1610
1611 // Optimization: most octal values fit into 4 bytes.
1612 uint32_t octalValue = 0;
1613 const unsigned maximumDigits = 10;
1614 int digit = maximumDigits - 1;
1615 // Temporary buffer for the digits. Makes easier
1616 // to reconstruct the input characters when needed.
1617 LChar digits[maximumDigits];
1618
1619 do {
1620 if (m_current == '_') {
1621 if (UNLIKELY(!isASCIIOctalDigit(peek(1)) || isLegacyLiteral))
1622 return WTF::nullopt;
1623
1624 shift();
1625 }
1626
1627 octalValue = octalValue * 8 + (m_current - '0');
1628 digits[digit] = m_current;
1629 shift();
1630 --digit;
1631 } while (isASCIIOctalDigitOrSeparator(m_current) && digit >= 0);
1632
1633 if (LIKELY(!isASCIIDigitOrSeparator(m_current) && digit >= 0 && m_current != 'n'))
1634 return NumberParseResult { octalValue };
1635
1636 for (int i = maximumDigits - 1; i > digit; --i)
1637 record8(digits[i]);
1638
1639 while (isASCIIOctalDigitOrSeparator(m_current)) {
1640 if (m_current == '_') {
1641 if (UNLIKELY(!isASCIIOctalDigit(peek(1)) || isLegacyLiteral))
1642 return WTF::nullopt;
1643
1644 shift();
1645 }
1646
1647 record8(m_current);
1648 shift();
1649 }
1650
1651 if (UNLIKELY(Options::useBigInt() && m_current == 'n') && !isLegacyLiteral)
1652 return NumberParseResult { makeIdentifier(m_buffer8.data(), m_buffer8.size()) };
1653
1654 if (isASCIIDigit(m_current))
1655 return WTF::nullopt;
1656
1657 return NumberParseResult { parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 8) };
1658}
1659
1660template <typename T>
1661ALWAYS_INLINE auto Lexer<T>::parseDecimal() -> Optional<NumberParseResult>
1662{
1663 ASSERT(isASCIIDigit(m_current) || m_buffer8.size());
1664 bool isLegacyLiteral = m_buffer8.size() && isASCIIDigitOrSeparator(m_current);
1665
1666 // Optimization: most decimal values fit into 4 bytes.
1667 uint32_t decimalValue = 0;
1668
1669 // Since parseOctal may be executed before parseDecimal,
1670 // the m_buffer8 may hold ascii digits.
1671 if (!m_buffer8.size()) {
1672 const unsigned maximumDigits = 10;
1673 int digit = maximumDigits - 1;
1674 // Temporary buffer for the digits. Makes easier
1675 // to reconstruct the input characters when needed.
1676 LChar digits[maximumDigits];
1677
1678 do {
1679 if (m_current == '_') {
1680 if (UNLIKELY(!isASCIIDigit(peek(1)) || isLegacyLiteral))
1681 return WTF::nullopt;
1682
1683 shift();
1684 }
1685
1686 decimalValue = decimalValue * 10 + (m_current - '0');
1687 digits[digit] = m_current;
1688 shift();
1689 --digit;
1690 } while (isASCIIDigitOrSeparator(m_current) && digit >= 0);
1691
1692 if (digit >= 0 && m_current != '.' && !isASCIIAlphaCaselessEqual(m_current, 'e') && m_current != 'n')
1693 return NumberParseResult { decimalValue };
1694
1695 for (int i = maximumDigits - 1; i > digit; --i)
1696 record8(digits[i]);
1697 }
1698
1699 while (isASCIIDigitOrSeparator(m_current)) {
1700 if (m_current == '_') {
1701 if (UNLIKELY(!isASCIIDigit(peek(1)) || isLegacyLiteral))
1702 return WTF::nullopt;
1703
1704 shift();
1705 }
1706
1707 record8(m_current);
1708 shift();
1709 }
1710
1711 if (UNLIKELY(Options::useBigInt() && m_current == 'n' && !isLegacyLiteral))
1712 return NumberParseResult { makeIdentifier(m_buffer8.data(), m_buffer8.size()) };
1713
1714 return WTF::nullopt;
1715}
1716
1717template <typename T>
1718ALWAYS_INLINE bool Lexer<T>::parseNumberAfterDecimalPoint()
1719{
1720 ASSERT(isASCIIDigit(m_current));
1721 record8('.');
1722
1723 do {
1724 if (m_current == '_') {
1725 if (UNLIKELY(!isASCIIDigit(peek(1))))
1726 return false;
1727
1728 shift();
1729 }
1730
1731 record8(m_current);
1732 shift();
1733 } while (isASCIIDigitOrSeparator(m_current));
1734
1735 return true;
1736}
1737
1738template <typename T>
1739ALWAYS_INLINE bool Lexer<T>::parseNumberAfterExponentIndicator()
1740{
1741 record8('e');
1742 shift();
1743 if (m_current == '+' || m_current == '-') {
1744 record8(m_current);
1745 shift();
1746 }
1747
1748 if (!isASCIIDigit(m_current))
1749 return false;
1750
1751 do {
1752 if (m_current == '_') {
1753 if (UNLIKELY(!isASCIIDigit(peek(1))))
1754 return false;
1755
1756 shift();
1757 }
1758
1759 record8(m_current);
1760 shift();
1761 } while (isASCIIDigitOrSeparator(m_current));
1762
1763 return true;
1764}
1765
1766template <typename T>
1767ALWAYS_INLINE bool Lexer<T>::parseMultilineComment()
1768{
1769 while (true) {
1770 while (UNLIKELY(m_current == '*')) {
1771 shift();
1772 if (m_current == '/') {
1773 shift();
1774 return true;
1775 }
1776 }
1777
1778 if (atEnd())
1779 return false;
1780
1781 if (isLineTerminator(m_current)) {
1782 shiftLineTerminator();
1783 m_hasLineTerminatorBeforeToken = true;
1784 } else
1785 shift();
1786 }
1787}
1788
1789template <typename T>
1790ALWAYS_INLINE void Lexer<T>::parseCommentDirective()
1791{
1792 // sourceURL and sourceMappingURL directives.
1793 if (!consume("source"))
1794 return;
1795
1796 if (consume("URL=")) {
1797 m_sourceURLDirective = parseCommentDirectiveValue();
1798 return;
1799 }
1800
1801 if (consume("MappingURL=")) {
1802 m_sourceMappingURLDirective = parseCommentDirectiveValue();
1803 return;
1804 }
1805}
1806
1807template <typename T>
1808ALWAYS_INLINE String Lexer<T>::parseCommentDirectiveValue()
1809{
1810 skipWhitespace();
1811 const T* stringStart = currentSourcePtr();
1812 while (!isWhiteSpace(m_current) && !isLineTerminator(m_current) && m_current != '"' && m_current != '\'' && !atEnd())
1813 shift();
1814 const T* stringEnd = currentSourcePtr();
1815 skipWhitespace();
1816
1817 if (!isLineTerminator(m_current) && !atEnd())
1818 return String();
1819
1820 append8(stringStart, stringEnd - stringStart);
1821 String result = String(m_buffer8.data(), m_buffer8.size());
1822 m_buffer8.shrink(0);
1823 return result;
1824}
1825
1826template <typename T>
1827template <unsigned length>
1828ALWAYS_INLINE bool Lexer<T>::consume(const char (&input)[length])
1829{
1830 unsigned lengthToCheck = length - 1; // Ignore the ending NULL byte in the string literal.
1831
1832 unsigned i = 0;
1833 for (; i < lengthToCheck && m_current == input[i]; i++)
1834 shift();
1835
1836 return i == lengthToCheck;
1837}
1838
1839template <typename T>
1840bool Lexer<T>::nextTokenIsColon()
1841{
1842 const T* code = m_code;
1843 while (code < m_codeEnd && (isWhiteSpace(*code) || isLineTerminator(*code)))
1844 code++;
1845
1846 return code < m_codeEnd && *code == ':';
1847}
1848
1849template <typename T>
1850void Lexer<T>::fillTokenInfo(JSToken* tokenRecord, JSTokenType token, int lineNumber, int endOffset, int lineStartOffset, JSTextPosition endPosition)
1851{
1852 JSTokenLocation* tokenLocation = &tokenRecord->m_location;
1853 tokenLocation->line = lineNumber;
1854 tokenLocation->endOffset = endOffset;
1855 tokenLocation->lineStartOffset = lineStartOffset;
1856 ASSERT(tokenLocation->endOffset >= tokenLocation->lineStartOffset);
1857 tokenRecord->m_endPosition = endPosition;
1858 m_lastToken = token;
1859}
1860
1861template <typename T>
1862JSTokenType Lexer<T>::lexWithoutClearingLineTerminator(JSToken* tokenRecord, OptionSet<LexerFlags> lexerFlags, bool strictMode)
1863{
1864 JSTokenData* tokenData = &tokenRecord->m_data;
1865 JSTokenLocation* tokenLocation = &tokenRecord->m_location;
1866 m_lastTokenLocation = JSTokenLocation(tokenRecord->m_location);
1867
1868 ASSERT(!m_error);
1869 ASSERT(m_buffer8.isEmpty());
1870 ASSERT(m_buffer16.isEmpty());
1871
1872 JSTokenType token = ERRORTOK;
1873
1874start:
1875 skipWhitespace();
1876
1877 tokenLocation->startOffset = currentOffset();
1878 ASSERT(currentOffset() >= currentLineStartOffset());
1879 tokenRecord->m_startPosition = currentPosition();
1880
1881 if (atEnd()) {
1882 token = EOFTOK;
1883 goto returnToken;
1884 }
1885
1886 CharacterType type;
1887 if (LIKELY(isLatin1(m_current)))
1888 type = static_cast<CharacterType>(typesOfLatin1Characters[m_current]);
1889 else if (isNonLatin1IdentStart(m_current))
1890 type = CharacterIdentifierStart;
1891 else if (isLineTerminator(m_current))
1892 type = CharacterLineTerminator;
1893 else
1894 type = CharacterInvalid;
1895
1896 switch (type) {
1897 case CharacterGreater:
1898 shift();
1899 if (m_current == '>') {
1900 shift();
1901 if (m_current == '>') {
1902 shift();
1903 if (m_current == '=') {
1904 shift();
1905 token = URSHIFTEQUAL;
1906 break;
1907 }
1908 token = URSHIFT;
1909 break;
1910 }
1911 if (m_current == '=') {
1912 shift();
1913 token = RSHIFTEQUAL;
1914 break;
1915 }
1916 token = RSHIFT;
1917 break;
1918 }
1919 if (m_current == '=') {
1920 shift();
1921 token = GE;
1922 break;
1923 }
1924 token = GT;
1925 break;
1926 case CharacterEqual: {
1927 if (peek(1) == '>') {
1928 token = ARROWFUNCTION;
1929 tokenData->line = lineNumber();
1930 tokenData->offset = currentOffset();
1931 tokenData->lineStartOffset = currentLineStartOffset();
1932 ASSERT(tokenData->offset >= tokenData->lineStartOffset);
1933 shift();
1934 shift();
1935 break;
1936 }
1937
1938 shift();
1939 if (m_current == '=') {
1940 shift();
1941 if (m_current == '=') {
1942 shift();
1943 token = STREQ;
1944 break;
1945 }
1946 token = EQEQ;
1947 break;
1948 }
1949 token = EQUAL;
1950 break;
1951 }
1952 case CharacterLess:
1953 shift();
1954 if (m_current == '!' && peek(1) == '-' && peek(2) == '-') {
1955 if (m_scriptMode == JSParserScriptMode::Classic) {
1956 // <!-- marks the beginning of a line comment (for www usage)
1957 goto inSingleLineComment;
1958 }
1959 }
1960 if (m_current == '<') {
1961 shift();
1962 if (m_current == '=') {
1963 shift();
1964 token = LSHIFTEQUAL;
1965 break;
1966 }
1967 token = LSHIFT;
1968 break;
1969 }
1970 if (m_current == '=') {
1971 shift();
1972 token = LE;
1973 break;
1974 }
1975 token = LT;
1976 break;
1977 case CharacterExclamationMark:
1978 shift();
1979 if (m_current == '=') {
1980 shift();
1981 if (m_current == '=') {
1982 shift();
1983 token = STRNEQ;
1984 break;
1985 }
1986 token = NE;
1987 break;
1988 }
1989 token = EXCLAMATION;
1990 break;
1991 case CharacterAdd:
1992 shift();
1993 if (m_current == '+') {
1994 shift();
1995 token = (!m_hasLineTerminatorBeforeToken) ? PLUSPLUS : AUTOPLUSPLUS;
1996 break;
1997 }
1998 if (m_current == '=') {
1999 shift();
2000 token = PLUSEQUAL;
2001 break;
2002 }
2003 token = PLUS;
2004 break;
2005 case CharacterSub:
2006 shift();
2007 if (m_current == '-') {
2008 shift();
2009 if ((m_atLineStart || m_hasLineTerminatorBeforeToken) && m_current == '>') {
2010 if (m_scriptMode == JSParserScriptMode::Classic) {
2011 shift();
2012 goto inSingleLineComment;
2013 }
2014 }
2015 token = (!m_hasLineTerminatorBeforeToken) ? MINUSMINUS : AUTOMINUSMINUS;
2016 break;
2017 }
2018 if (m_current == '=') {
2019 shift();
2020 token = MINUSEQUAL;
2021 break;
2022 }
2023 token = MINUS;
2024 break;
2025 case CharacterMultiply:
2026 shift();
2027 if (m_current == '=') {
2028 shift();
2029 token = MULTEQUAL;
2030 break;
2031 }
2032 if (m_current == '*') {
2033 shift();
2034 if (m_current == '=') {
2035 shift();
2036 token = POWEQUAL;
2037 break;
2038 }
2039 token = POW;
2040 break;
2041 }
2042 token = TIMES;
2043 break;
2044 case CharacterSlash:
2045 shift();
2046 if (m_current == '/') {
2047 shift();
2048 goto inSingleLineCommentCheckForDirectives;
2049 }
2050 if (m_current == '*') {
2051 shift();
2052 if (parseMultilineComment())
2053 goto start;
2054 m_lexErrorMessage = "Multiline comment was not closed properly"_s;
2055 token = UNTERMINATED_MULTILINE_COMMENT_ERRORTOK;
2056 goto returnError;
2057 }
2058 if (m_current == '=') {
2059 shift();
2060 token = DIVEQUAL;
2061 break;
2062 }
2063 token = DIVIDE;
2064 break;
2065 case CharacterAnd:
2066 shift();
2067 if (m_current == '&') {
2068 shift();
2069 token = AND;
2070 break;
2071 }
2072 if (m_current == '=') {
2073 shift();
2074 token = ANDEQUAL;
2075 break;
2076 }
2077 token = BITAND;
2078 break;
2079 case CharacterXor:
2080 shift();
2081 if (m_current == '=') {
2082 shift();
2083 token = XOREQUAL;
2084 break;
2085 }
2086 token = BITXOR;
2087 break;
2088 case CharacterModulo:
2089 shift();
2090 if (m_current == '=') {
2091 shift();
2092 token = MODEQUAL;
2093 break;
2094 }
2095 token = MOD;
2096 break;
2097 case CharacterOr:
2098 shift();
2099 if (m_current == '=') {
2100 shift();
2101 token = OREQUAL;
2102 break;
2103 }
2104 if (m_current == '|') {
2105 shift();
2106 token = OR;
2107 break;
2108 }
2109 token = BITOR;
2110 break;
2111 case CharacterOpenParen:
2112 token = OPENPAREN;
2113 tokenData->line = lineNumber();
2114 tokenData->offset = currentOffset();
2115 tokenData->lineStartOffset = currentLineStartOffset();
2116 shift();
2117 break;
2118 case CharacterCloseParen:
2119 token = CLOSEPAREN;
2120 shift();
2121 break;
2122 case CharacterOpenBracket:
2123 token = OPENBRACKET;
2124 shift();
2125 break;
2126 case CharacterCloseBracket:
2127 token = CLOSEBRACKET;
2128 shift();
2129 break;
2130 case CharacterComma:
2131 token = COMMA;
2132 shift();
2133 break;
2134 case CharacterColon:
2135 token = COLON;
2136 shift();
2137 break;
2138 case CharacterQuestion:
2139 shift();
2140 if (m_current == '?') {
2141 shift();
2142 token = COALESCE;
2143 break;
2144 }
2145 if (m_current == '.' && !isASCIIDigit(peek(1))) {
2146 shift();
2147 token = QUESTIONDOT;
2148 break;
2149 }
2150 token = QUESTION;
2151 break;
2152 case CharacterTilde:
2153 token = TILDE;
2154 shift();
2155 break;
2156 case CharacterSemicolon:
2157 shift();
2158 token = SEMICOLON;
2159 break;
2160 case CharacterBackQuote:
2161 shift();
2162 token = BACKQUOTE;
2163 break;
2164 case CharacterOpenBrace:
2165 tokenData->line = lineNumber();
2166 tokenData->offset = currentOffset();
2167 tokenData->lineStartOffset = currentLineStartOffset();
2168 ASSERT(tokenData->offset >= tokenData->lineStartOffset);
2169 shift();
2170 token = OPENBRACE;
2171 break;
2172 case CharacterCloseBrace:
2173 tokenData->line = lineNumber();
2174 tokenData->offset = currentOffset();
2175 tokenData->lineStartOffset = currentLineStartOffset();
2176 ASSERT(tokenData->offset >= tokenData->lineStartOffset);
2177 shift();
2178 token = CLOSEBRACE;
2179 break;
2180 case CharacterDot:
2181 shift();
2182 if (!isASCIIDigit(m_current)) {
2183 if (UNLIKELY((m_current == '.') && (peek(1) == '.'))) {
2184 shift();
2185 shift();
2186 token = DOTDOTDOT;
2187 break;
2188 }
2189 token = DOT;
2190 break;
2191 }
2192 if (UNLIKELY(!parseNumberAfterDecimalPoint())) {
2193 m_lexErrorMessage = "Non-number found after decimal point"_s;
2194 token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
2195 goto returnError;
2196 }
2197 token = DOUBLE;
2198 if (UNLIKELY(isASCIIAlphaCaselessEqual(m_current, 'e') && !parseNumberAfterExponentIndicator())) {
2199 m_lexErrorMessage = "Non-number found after exponent indicator"_s;
2200 token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
2201 goto returnError;
2202 }
2203 size_t parsedLength;
2204 tokenData->doubleValue = parseDouble(m_buffer8.data(), m_buffer8.size(), parsedLength);
2205 if (token == INTEGER)
2206 token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2207
2208 if (UNLIKELY(isIdentStart(m_current))) {
2209 m_lexErrorMessage = "No identifiers allowed directly after numeric literal"_s;
2210 token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
2211 goto returnError;
2212 }
2213 m_buffer8.shrink(0);
2214 break;
2215 case CharacterZero:
2216 shift();
2217 if (isASCIIAlphaCaselessEqual(m_current, 'x')) {
2218 if (UNLIKELY(!isASCIIHexDigit(peek(1)))) {
2219 m_lexErrorMessage = "No hexadecimal digits after '0x'"_s;
2220 token = UNTERMINATED_HEX_NUMBER_ERRORTOK;
2221 goto returnError;
2222 }
2223
2224 // Shift out the 'x' prefix.
2225 shift();
2226
2227 auto parseNumberResult = parseHex();
2228 if (!parseNumberResult)
2229 tokenData->doubleValue = 0;
2230 else if (WTF::holds_alternative<double>(*parseNumberResult))
2231 tokenData->doubleValue = WTF::get<double>(*parseNumberResult);
2232 else {
2233 token = BIGINT;
2234 shift();
2235 tokenData->bigIntString = WTF::get<const Identifier*>(*parseNumberResult);
2236 tokenData->radix = 16;
2237 }
2238
2239 if (UNLIKELY(isIdentStart(m_current))) {
2240 m_lexErrorMessage = "No space between hexadecimal literal and identifier"_s;
2241 token = UNTERMINATED_HEX_NUMBER_ERRORTOK;
2242 goto returnError;
2243 }
2244 if (LIKELY(token != BIGINT))
2245 token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2246 m_buffer8.shrink(0);
2247 break;
2248 }
2249 if (isASCIIAlphaCaselessEqual(m_current, 'b')) {
2250 if (UNLIKELY(!isASCIIBinaryDigit(peek(1)))) {
2251 m_lexErrorMessage = "No binary digits after '0b'"_s;
2252 token = UNTERMINATED_BINARY_NUMBER_ERRORTOK;
2253 goto returnError;
2254 }
2255
2256 // Shift out the 'b' prefix.
2257 shift();
2258
2259 auto parseNumberResult = parseBinary();
2260 if (!parseNumberResult)
2261 tokenData->doubleValue = 0;
2262 else if (WTF::holds_alternative<double>(*parseNumberResult))
2263 tokenData->doubleValue = WTF::get<double>(*parseNumberResult);
2264 else {
2265 token = BIGINT;
2266 shift();
2267 tokenData->bigIntString = WTF::get<const Identifier*>(*parseNumberResult);
2268 tokenData->radix = 2;
2269 }
2270
2271 if (UNLIKELY(isIdentStart(m_current))) {
2272 m_lexErrorMessage = "No space between binary literal and identifier"_s;
2273 token = UNTERMINATED_BINARY_NUMBER_ERRORTOK;
2274 goto returnError;
2275 }
2276 if (LIKELY(token != BIGINT))
2277 token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2278 m_buffer8.shrink(0);
2279 break;
2280 }
2281
2282 if (isASCIIAlphaCaselessEqual(m_current, 'o')) {
2283 if (UNLIKELY(!isASCIIOctalDigit(peek(1)))) {
2284 m_lexErrorMessage = "No octal digits after '0o'"_s;
2285 token = UNTERMINATED_OCTAL_NUMBER_ERRORTOK;
2286 goto returnError;
2287 }
2288
2289 // Shift out the 'o' prefix.
2290 shift();
2291
2292 auto parseNumberResult = parseOctal();
2293 if (!parseNumberResult)
2294 tokenData->doubleValue = 0;
2295 else if (WTF::holds_alternative<double>(*parseNumberResult))
2296 tokenData->doubleValue = WTF::get<double>(*parseNumberResult);
2297 else {
2298 token = BIGINT;
2299 shift();
2300 tokenData->bigIntString = WTF::get<const Identifier*>(*parseNumberResult);
2301 tokenData->radix = 8;
2302 }
2303
2304 if (UNLIKELY(isIdentStart(m_current))) {
2305 m_lexErrorMessage = "No space between octal literal and identifier"_s;
2306 token = UNTERMINATED_OCTAL_NUMBER_ERRORTOK;
2307 goto returnError;
2308 }
2309 if (LIKELY(token != BIGINT))
2310 token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2311 m_buffer8.shrink(0);
2312 break;
2313 }
2314
2315 if (UNLIKELY(m_current == '_')) {
2316 m_lexErrorMessage = "Numeric literals may not begin with 0_"_s;
2317 token = UNTERMINATED_OCTAL_NUMBER_ERRORTOK;
2318 goto returnError;
2319 }
2320
2321 record8('0');
2322 if (UNLIKELY(strictMode && isASCIIDigit(m_current))) {
2323 m_lexErrorMessage = "Decimal integer literals with a leading zero are forbidden in strict mode"_s;
2324 token = UNTERMINATED_OCTAL_NUMBER_ERRORTOK;
2325 goto returnError;
2326 }
2327 if (isASCIIOctalDigit(m_current)) {
2328 auto parseNumberResult = parseOctal();
2329 if (parseNumberResult && WTF::holds_alternative<double>(*parseNumberResult)) {
2330 tokenData->doubleValue = WTF::get<double>(*parseNumberResult);
2331 token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2332 }
2333 }
2334 FALLTHROUGH;
2335 case CharacterNumber:
2336 if (LIKELY(token != INTEGER && token != DOUBLE)) {
2337 auto parseNumberResult = parseDecimal();
2338 if (parseNumberResult) {
2339 if (WTF::holds_alternative<double>(*parseNumberResult)) {
2340 tokenData->doubleValue = WTF::get<double>(*parseNumberResult);
2341 token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2342 } else {
2343 token = BIGINT;
2344 shift();
2345 tokenData->bigIntString = WTF::get<const Identifier*>(*parseNumberResult);
2346 tokenData->radix = 10;
2347 }
2348 } else {
2349 token = INTEGER;
2350 if (m_current == '.') {
2351 shift();
2352 if (UNLIKELY(isASCIIDigit(m_current) && !parseNumberAfterDecimalPoint())) {
2353 m_lexErrorMessage = "Non-number found after decimal point"_s;
2354 token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
2355 goto returnError;
2356 }
2357 token = DOUBLE;
2358 }
2359 if (UNLIKELY(isASCIIAlphaCaselessEqual(m_current, 'e') && !parseNumberAfterExponentIndicator())) {
2360 m_lexErrorMessage = "Non-number found after exponent indicator"_s;
2361 token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
2362 goto returnError;
2363 }
2364 size_t parsedLength;
2365 tokenData->doubleValue = parseDouble(m_buffer8.data(), m_buffer8.size(), parsedLength);
2366 if (token == INTEGER)
2367 token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2368 }
2369 }
2370
2371 if (UNLIKELY(isIdentStart(m_current))) {
2372 m_lexErrorMessage = "No identifiers allowed directly after numeric literal"_s;
2373 token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
2374 goto returnError;
2375 }
2376 m_buffer8.shrink(0);
2377 break;
2378 case CharacterQuote: {
2379 StringParseResult result = StringCannotBeParsed;
2380 if (lexerFlags.contains(LexerFlags::DontBuildStrings))
2381 result = parseString<false>(tokenData, strictMode);
2382 else
2383 result = parseString<true>(tokenData, strictMode);
2384
2385 if (UNLIKELY(result != StringParsedSuccessfully)) {
2386 token = result == StringUnterminated ? UNTERMINATED_STRING_LITERAL_ERRORTOK : INVALID_STRING_LITERAL_ERRORTOK;
2387 goto returnError;
2388 }
2389 shift();
2390 token = STRING;
2391 break;
2392 }
2393 case CharacterIdentifierStart:
2394 ASSERT(isIdentStart(m_current));
2395 FALLTHROUGH;
2396 case CharacterBackSlash:
2397 parseIdent:
2398 if (lexerFlags.contains(LexerFlags::DontBuildKeywords))
2399 token = parseIdentifier<false>(tokenData, lexerFlags, strictMode);
2400 else
2401 token = parseIdentifier<true>(tokenData, lexerFlags, strictMode);
2402 break;
2403 case CharacterLineTerminator:
2404 ASSERT(isLineTerminator(m_current));
2405 shiftLineTerminator();
2406 m_atLineStart = true;
2407 m_hasLineTerminatorBeforeToken = true;
2408 m_lineStart = m_code;
2409 goto start;
2410 case CharacterHash:
2411 // Hashbang is only permitted at the start of the source text.
2412 if (peek(1) == '!' && !currentOffset()) {
2413 shift();
2414 shift();
2415 goto inSingleLineComment;
2416 }
2417 goto invalidCharacter;
2418 case CharacterPrivateIdentifierStart:
2419 if (m_parsingBuiltinFunction)
2420 goto parseIdent;
2421 goto invalidCharacter;
2422 case CharacterOtherIdentifierPart:
2423 case CharacterInvalid:
2424 goto invalidCharacter;
2425 default:
2426 RELEASE_ASSERT_NOT_REACHED();
2427 m_lexErrorMessage = "Internal Error"_s;
2428 token = ERRORTOK;
2429 goto returnError;
2430 }
2431
2432 m_atLineStart = false;
2433 goto returnToken;
2434
2435inSingleLineCommentCheckForDirectives:
2436 // Script comment directives like "//# sourceURL=test.js".
2437 if (UNLIKELY((m_current == '#' || m_current == '@') && isWhiteSpace(peek(1)))) {
2438 shift();
2439 shift();
2440 parseCommentDirective();
2441 }
2442 // Fall through to complete single line comment parsing.
2443
2444inSingleLineComment:
2445 {
2446 auto lineNumber = m_lineNumber;
2447 auto endOffset = currentOffset();
2448 auto lineStartOffset = currentLineStartOffset();
2449 auto endPosition = currentPosition();
2450
2451 while (!isLineTerminator(m_current)) {
2452 if (atEnd()) {
2453 token = EOFTOK;
2454 fillTokenInfo(tokenRecord, token, lineNumber, endOffset, lineStartOffset, endPosition);
2455 return token;
2456 }
2457 shift();
2458 }
2459 shiftLineTerminator();
2460 m_atLineStart = true;
2461 m_hasLineTerminatorBeforeToken = true;
2462 m_lineStart = m_code;
2463 if (!lastTokenWasRestrKeyword())
2464 goto start;
2465
2466 token = SEMICOLON;
2467 fillTokenInfo(tokenRecord, token, lineNumber, endOffset, lineStartOffset, endPosition);
2468 return token;
2469 }
2470
2471returnToken:
2472 fillTokenInfo(tokenRecord, token, m_lineNumber, currentOffset(), currentLineStartOffset(), currentPosition());
2473 return token;
2474
2475invalidCharacter:
2476 m_lexErrorMessage = invalidCharacterMessage();
2477 token = ERRORTOK;
2478 // Falls through to return error.
2479
2480returnError:
2481 m_error = true;
2482 fillTokenInfo(tokenRecord, token, m_lineNumber, currentOffset(), currentLineStartOffset(), currentPosition());
2483 RELEASE_ASSERT(token & ErrorTokenFlag);
2484 return token;
2485}
2486
2487template <typename T>
2488static inline void orCharacter(UChar&, UChar);
2489
2490template <>
2491inline void orCharacter<LChar>(UChar&, UChar) { }
2492
2493template <>
2494inline void orCharacter<UChar>(UChar& orAccumulator, UChar character)
2495{
2496 orAccumulator |= character;
2497}
2498
2499template <typename T>
2500JSTokenType Lexer<T>::scanRegExp(JSToken* tokenRecord, UChar patternPrefix)
2501{
2502 JSTokenData* tokenData = &tokenRecord->m_data;
2503 ASSERT(m_buffer16.isEmpty());
2504
2505 bool lastWasEscape = false;
2506 bool inBrackets = false;
2507 UChar charactersOredTogether = 0;
2508
2509 if (patternPrefix) {
2510 ASSERT(!isLineTerminator(patternPrefix));
2511 ASSERT(patternPrefix != '/');
2512 ASSERT(patternPrefix != '[');
2513 record16(patternPrefix);
2514 }
2515
2516 while (true) {
2517 if (isLineTerminator(m_current) || atEnd()) {
2518 m_buffer16.shrink(0);
2519 JSTokenType token = UNTERMINATED_REGEXP_LITERAL_ERRORTOK;
2520 fillTokenInfo(tokenRecord, token, m_lineNumber, currentOffset(), currentLineStartOffset(), currentPosition());
2521 m_error = true;
2522 m_lexErrorMessage = makeString("Unterminated regular expression literal '", getToken(*tokenRecord), "'");
2523 return token;
2524 }
2525
2526 T prev = m_current;
2527
2528 shift();
2529
2530 if (prev == '/' && !lastWasEscape && !inBrackets)
2531 break;
2532
2533 record16(prev);
2534 orCharacter<T>(charactersOredTogether, prev);
2535
2536 if (lastWasEscape) {
2537 lastWasEscape = false;
2538 continue;
2539 }
2540
2541 switch (prev) {
2542 case '[':
2543 inBrackets = true;
2544 break;
2545 case ']':
2546 inBrackets = false;
2547 break;
2548 case '\\':
2549 lastWasEscape = true;
2550 break;
2551 }
2552 }
2553
2554 tokenData->pattern = makeRightSizedIdentifier(m_buffer16.data(), m_buffer16.size(), charactersOredTogether);
2555
2556 m_buffer16.shrink(0);
2557 charactersOredTogether = 0;
2558
2559 while (isIdentPart(m_current)) {
2560 record16(m_current);
2561 orCharacter<T>(charactersOredTogether, m_current);
2562 shift();
2563 }
2564
2565 tokenData->flags = makeRightSizedIdentifier(m_buffer16.data(), m_buffer16.size(), charactersOredTogether);
2566 m_buffer16.shrink(0);
2567
2568 // Since RegExp always ends with /, m_atLineStart always becomes false.
2569 m_atLineStart = false;
2570
2571 JSTokenType token = REGEXP;
2572 fillTokenInfo(tokenRecord, token, m_lineNumber, currentOffset(), currentLineStartOffset(), currentPosition());
2573 return token;
2574}
2575
2576template <typename T>
2577JSTokenType Lexer<T>::scanTemplateString(JSToken* tokenRecord, RawStringsBuildMode rawStringsBuildMode)
2578{
2579 JSTokenData* tokenData = &tokenRecord->m_data;
2580 ASSERT(!m_error);
2581 ASSERT(m_buffer16.isEmpty());
2582
2583 // Leading backquote ` (for template head) or closing brace } (for template trailing) are already shifted in the previous token scan.
2584 // So in this re-scan phase, shift() is not needed here.
2585 StringParseResult result = parseTemplateLiteral(tokenData, rawStringsBuildMode);
2586 JSTokenType token = ERRORTOK;
2587 if (UNLIKELY(result != StringParsedSuccessfully)) {
2588 token = result == StringUnterminated ? UNTERMINATED_TEMPLATE_LITERAL_ERRORTOK : INVALID_TEMPLATE_LITERAL_ERRORTOK;
2589 m_error = true;
2590 } else
2591 token = TEMPLATE;
2592
2593 // Since TemplateString always ends with ` or }, m_atLineStart always becomes false.
2594 m_atLineStart = false;
2595 fillTokenInfo(tokenRecord, token, m_lineNumber, currentOffset(), currentLineStartOffset(), currentPosition());
2596 return token;
2597}
2598
2599template <typename T>
2600void Lexer<T>::clear()
2601{
2602 m_arena = 0;
2603
2604 Vector<LChar> newBuffer8;
2605 m_buffer8.swap(newBuffer8);
2606
2607 Vector<UChar> newBuffer16;
2608 m_buffer16.swap(newBuffer16);
2609
2610 Vector<UChar> newBufferForRawTemplateString16;
2611 m_bufferForRawTemplateString16.swap(newBufferForRawTemplateString16);
2612
2613 m_isReparsingFunction = false;
2614}
2615
2616// Instantiate the two flavors of Lexer we need instead of putting most of this file in Lexer.h
2617template class Lexer<LChar>;
2618template class Lexer<UChar>;
2619
2620} // namespace JSC
2621