1/*
2 * Copyright (C) 1999-2000 Harri Porten ([email protected])
3 * Copyright (C) 2006-2017 Apple Inc. All Rights Reserved.
4 * Copyright (C) 2007 Cameron Zwarich ([email protected])
5 * Copyright (C) 2010 Zoltan Herczeg ([email protected])
6 * Copyright (C) 2012 Mathias Bynens ([email protected])
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Library General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Library General Public License for more details.
17 *
18 * You should have received a copy of the GNU Library General Public License
19 * along with this library; see the file COPYING.LIB. If not, write to
20 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 * Boston, MA 02110-1301, USA.
22 *
23 */
24
25#include "config.h"
26#include "Lexer.h"
27
28#include "BuiltinNames.h"
29#include "Identifier.h"
30#include "JSCInlines.h"
31#include "JSFunctionInlines.h"
32#include "KeywordLookup.h"
33#include "Lexer.lut.h"
34#include "Nodes.h"
35#include "ParseInt.h"
36#include "Parser.h"
37#include <ctype.h>
38#include <limits.h>
39#include <string.h>
40#include <wtf/Assertions.h>
41#include <wtf/HexNumber.h>
42#include <wtf/Variant.h>
43#include <wtf/dtoa.h>
44
45namespace JSC {
46
47bool isLexerKeyword(const Identifier& identifier)
48{
49 return JSC::mainTable.entry(identifier);
50}
51
52enum CharacterType {
53 // Types for the main switch
54
55 // The first three types are fixed, and also used for identifying
56 // ASCII alpha and alphanumeric characters (see isIdentStart and isIdentPart).
57 CharacterIdentifierStart,
58 CharacterZero,
59 CharacterNumber,
60
61 // For single-byte characters grandfathered into Other_ID_Continue -- namely just U+00B7 MIDDLE DOT.
62 // (http://unicode.org/reports/tr31/#Backward_Compatibility)
63 CharacterOtherIdentifierPart,
64
65 CharacterInvalid,
66 CharacterLineTerminator,
67 CharacterExclamationMark,
68 CharacterOpenParen,
69 CharacterCloseParen,
70 CharacterOpenBracket,
71 CharacterCloseBracket,
72 CharacterComma,
73 CharacterColon,
74 CharacterQuestion,
75 CharacterTilde,
76 CharacterQuote,
77 CharacterBackQuote,
78 CharacterDot,
79 CharacterSlash,
80 CharacterBackSlash,
81 CharacterSemicolon,
82 CharacterOpenBrace,
83 CharacterCloseBrace,
84
85 CharacterAdd,
86 CharacterSub,
87 CharacterMultiply,
88 CharacterModulo,
89 CharacterAnd,
90 CharacterXor,
91 CharacterOr,
92 CharacterLess,
93 CharacterGreater,
94 CharacterEqual,
95
96 // Other types (only one so far)
97 CharacterWhiteSpace,
98 CharacterPrivateIdentifierStart
99};
100
101// 256 Latin-1 codes
102static constexpr const unsigned short typesOfLatin1Characters[256] = {
103/* 0 - Null */ CharacterInvalid,
104/* 1 - Start of Heading */ CharacterInvalid,
105/* 2 - Start of Text */ CharacterInvalid,
106/* 3 - End of Text */ CharacterInvalid,
107/* 4 - End of Transm. */ CharacterInvalid,
108/* 5 - Enquiry */ CharacterInvalid,
109/* 6 - Acknowledgment */ CharacterInvalid,
110/* 7 - Bell */ CharacterInvalid,
111/* 8 - Back Space */ CharacterInvalid,
112/* 9 - Horizontal Tab */ CharacterWhiteSpace,
113/* 10 - Line Feed */ CharacterLineTerminator,
114/* 11 - Vertical Tab */ CharacterWhiteSpace,
115/* 12 - Form Feed */ CharacterWhiteSpace,
116/* 13 - Carriage Return */ CharacterLineTerminator,
117/* 14 - Shift Out */ CharacterInvalid,
118/* 15 - Shift In */ CharacterInvalid,
119/* 16 - Data Line Escape */ CharacterInvalid,
120/* 17 - Device Control 1 */ CharacterInvalid,
121/* 18 - Device Control 2 */ CharacterInvalid,
122/* 19 - Device Control 3 */ CharacterInvalid,
123/* 20 - Device Control 4 */ CharacterInvalid,
124/* 21 - Negative Ack. */ CharacterInvalid,
125/* 22 - Synchronous Idle */ CharacterInvalid,
126/* 23 - End of Transmit */ CharacterInvalid,
127/* 24 - Cancel */ CharacterInvalid,
128/* 25 - End of Medium */ CharacterInvalid,
129/* 26 - Substitute */ CharacterInvalid,
130/* 27 - Escape */ CharacterInvalid,
131/* 28 - File Separator */ CharacterInvalid,
132/* 29 - Group Separator */ CharacterInvalid,
133/* 30 - Record Separator */ CharacterInvalid,
134/* 31 - Unit Separator */ CharacterInvalid,
135/* 32 - Space */ CharacterWhiteSpace,
136/* 33 - ! */ CharacterExclamationMark,
137/* 34 - " */ CharacterQuote,
138/* 35 - # */ CharacterInvalid,
139/* 36 - $ */ CharacterIdentifierStart,
140/* 37 - % */ CharacterModulo,
141/* 38 - & */ CharacterAnd,
142/* 39 - ' */ CharacterQuote,
143/* 40 - ( */ CharacterOpenParen,
144/* 41 - ) */ CharacterCloseParen,
145/* 42 - * */ CharacterMultiply,
146/* 43 - + */ CharacterAdd,
147/* 44 - , */ CharacterComma,
148/* 45 - - */ CharacterSub,
149/* 46 - . */ CharacterDot,
150/* 47 - / */ CharacterSlash,
151/* 48 - 0 */ CharacterZero,
152/* 49 - 1 */ CharacterNumber,
153/* 50 - 2 */ CharacterNumber,
154/* 51 - 3 */ CharacterNumber,
155/* 52 - 4 */ CharacterNumber,
156/* 53 - 5 */ CharacterNumber,
157/* 54 - 6 */ CharacterNumber,
158/* 55 - 7 */ CharacterNumber,
159/* 56 - 8 */ CharacterNumber,
160/* 57 - 9 */ CharacterNumber,
161/* 58 - : */ CharacterColon,
162/* 59 - ; */ CharacterSemicolon,
163/* 60 - < */ CharacterLess,
164/* 61 - = */ CharacterEqual,
165/* 62 - > */ CharacterGreater,
166/* 63 - ? */ CharacterQuestion,
167/* 64 - @ */ CharacterPrivateIdentifierStart,
168/* 65 - A */ CharacterIdentifierStart,
169/* 66 - B */ CharacterIdentifierStart,
170/* 67 - C */ CharacterIdentifierStart,
171/* 68 - D */ CharacterIdentifierStart,
172/* 69 - E */ CharacterIdentifierStart,
173/* 70 - F */ CharacterIdentifierStart,
174/* 71 - G */ CharacterIdentifierStart,
175/* 72 - H */ CharacterIdentifierStart,
176/* 73 - I */ CharacterIdentifierStart,
177/* 74 - J */ CharacterIdentifierStart,
178/* 75 - K */ CharacterIdentifierStart,
179/* 76 - L */ CharacterIdentifierStart,
180/* 77 - M */ CharacterIdentifierStart,
181/* 78 - N */ CharacterIdentifierStart,
182/* 79 - O */ CharacterIdentifierStart,
183/* 80 - P */ CharacterIdentifierStart,
184/* 81 - Q */ CharacterIdentifierStart,
185/* 82 - R */ CharacterIdentifierStart,
186/* 83 - S */ CharacterIdentifierStart,
187/* 84 - T */ CharacterIdentifierStart,
188/* 85 - U */ CharacterIdentifierStart,
189/* 86 - V */ CharacterIdentifierStart,
190/* 87 - W */ CharacterIdentifierStart,
191/* 88 - X */ CharacterIdentifierStart,
192/* 89 - Y */ CharacterIdentifierStart,
193/* 90 - Z */ CharacterIdentifierStart,
194/* 91 - [ */ CharacterOpenBracket,
195/* 92 - \ */ CharacterBackSlash,
196/* 93 - ] */ CharacterCloseBracket,
197/* 94 - ^ */ CharacterXor,
198/* 95 - _ */ CharacterIdentifierStart,
199/* 96 - ` */ CharacterBackQuote,
200/* 97 - a */ CharacterIdentifierStart,
201/* 98 - b */ CharacterIdentifierStart,
202/* 99 - c */ CharacterIdentifierStart,
203/* 100 - d */ CharacterIdentifierStart,
204/* 101 - e */ CharacterIdentifierStart,
205/* 102 - f */ CharacterIdentifierStart,
206/* 103 - g */ CharacterIdentifierStart,
207/* 104 - h */ CharacterIdentifierStart,
208/* 105 - i */ CharacterIdentifierStart,
209/* 106 - j */ CharacterIdentifierStart,
210/* 107 - k */ CharacterIdentifierStart,
211/* 108 - l */ CharacterIdentifierStart,
212/* 109 - m */ CharacterIdentifierStart,
213/* 110 - n */ CharacterIdentifierStart,
214/* 111 - o */ CharacterIdentifierStart,
215/* 112 - p */ CharacterIdentifierStart,
216/* 113 - q */ CharacterIdentifierStart,
217/* 114 - r */ CharacterIdentifierStart,
218/* 115 - s */ CharacterIdentifierStart,
219/* 116 - t */ CharacterIdentifierStart,
220/* 117 - u */ CharacterIdentifierStart,
221/* 118 - v */ CharacterIdentifierStart,
222/* 119 - w */ CharacterIdentifierStart,
223/* 120 - x */ CharacterIdentifierStart,
224/* 121 - y */ CharacterIdentifierStart,
225/* 122 - z */ CharacterIdentifierStart,
226/* 123 - { */ CharacterOpenBrace,
227/* 124 - | */ CharacterOr,
228/* 125 - } */ CharacterCloseBrace,
229/* 126 - ~ */ CharacterTilde,
230/* 127 - Delete */ CharacterInvalid,
231/* 128 - Cc category */ CharacterInvalid,
232/* 129 - Cc category */ CharacterInvalid,
233/* 130 - Cc category */ CharacterInvalid,
234/* 131 - Cc category */ CharacterInvalid,
235/* 132 - Cc category */ CharacterInvalid,
236/* 133 - Cc category */ CharacterInvalid,
237/* 134 - Cc category */ CharacterInvalid,
238/* 135 - Cc category */ CharacterInvalid,
239/* 136 - Cc category */ CharacterInvalid,
240/* 137 - Cc category */ CharacterInvalid,
241/* 138 - Cc category */ CharacterInvalid,
242/* 139 - Cc category */ CharacterInvalid,
243/* 140 - Cc category */ CharacterInvalid,
244/* 141 - Cc category */ CharacterInvalid,
245/* 142 - Cc category */ CharacterInvalid,
246/* 143 - Cc category */ CharacterInvalid,
247/* 144 - Cc category */ CharacterInvalid,
248/* 145 - Cc category */ CharacterInvalid,
249/* 146 - Cc category */ CharacterInvalid,
250/* 147 - Cc category */ CharacterInvalid,
251/* 148 - Cc category */ CharacterInvalid,
252/* 149 - Cc category */ CharacterInvalid,
253/* 150 - Cc category */ CharacterInvalid,
254/* 151 - Cc category */ CharacterInvalid,
255/* 152 - Cc category */ CharacterInvalid,
256/* 153 - Cc category */ CharacterInvalid,
257/* 154 - Cc category */ CharacterInvalid,
258/* 155 - Cc category */ CharacterInvalid,
259/* 156 - Cc category */ CharacterInvalid,
260/* 157 - Cc category */ CharacterInvalid,
261/* 158 - Cc category */ CharacterInvalid,
262/* 159 - Cc category */ CharacterInvalid,
263/* 160 - Zs category (nbsp) */ CharacterWhiteSpace,
264/* 161 - Po category */ CharacterInvalid,
265/* 162 - Sc category */ CharacterInvalid,
266/* 163 - Sc category */ CharacterInvalid,
267/* 164 - Sc category */ CharacterInvalid,
268/* 165 - Sc category */ CharacterInvalid,
269/* 166 - So category */ CharacterInvalid,
270/* 167 - So category */ CharacterInvalid,
271/* 168 - Sk category */ CharacterInvalid,
272/* 169 - So category */ CharacterInvalid,
273/* 170 - Ll category */ CharacterIdentifierStart,
274/* 171 - Pi category */ CharacterInvalid,
275/* 172 - Sm category */ CharacterInvalid,
276/* 173 - Cf category */ CharacterInvalid,
277/* 174 - So category */ CharacterInvalid,
278/* 175 - Sk category */ CharacterInvalid,
279/* 176 - So category */ CharacterInvalid,
280/* 177 - Sm category */ CharacterInvalid,
281/* 178 - No category */ CharacterInvalid,
282/* 179 - No category */ CharacterInvalid,
283/* 180 - Sk category */ CharacterInvalid,
284/* 181 - Ll category */ CharacterIdentifierStart,
285/* 182 - So category */ CharacterInvalid,
286/* 183 - Po category */ CharacterOtherIdentifierPart,
287/* 184 - Sk category */ CharacterInvalid,
288/* 185 - No category */ CharacterInvalid,
289/* 186 - Ll category */ CharacterIdentifierStart,
290/* 187 - Pf category */ CharacterInvalid,
291/* 188 - No category */ CharacterInvalid,
292/* 189 - No category */ CharacterInvalid,
293/* 190 - No category */ CharacterInvalid,
294/* 191 - Po category */ CharacterInvalid,
295/* 192 - Lu category */ CharacterIdentifierStart,
296/* 193 - Lu category */ CharacterIdentifierStart,
297/* 194 - Lu category */ CharacterIdentifierStart,
298/* 195 - Lu category */ CharacterIdentifierStart,
299/* 196 - Lu category */ CharacterIdentifierStart,
300/* 197 - Lu category */ CharacterIdentifierStart,
301/* 198 - Lu category */ CharacterIdentifierStart,
302/* 199 - Lu category */ CharacterIdentifierStart,
303/* 200 - Lu category */ CharacterIdentifierStart,
304/* 201 - Lu category */ CharacterIdentifierStart,
305/* 202 - Lu category */ CharacterIdentifierStart,
306/* 203 - Lu category */ CharacterIdentifierStart,
307/* 204 - Lu category */ CharacterIdentifierStart,
308/* 205 - Lu category */ CharacterIdentifierStart,
309/* 206 - Lu category */ CharacterIdentifierStart,
310/* 207 - Lu category */ CharacterIdentifierStart,
311/* 208 - Lu category */ CharacterIdentifierStart,
312/* 209 - Lu category */ CharacterIdentifierStart,
313/* 210 - Lu category */ CharacterIdentifierStart,
314/* 211 - Lu category */ CharacterIdentifierStart,
315/* 212 - Lu category */ CharacterIdentifierStart,
316/* 213 - Lu category */ CharacterIdentifierStart,
317/* 214 - Lu category */ CharacterIdentifierStart,
318/* 215 - Sm category */ CharacterInvalid,
319/* 216 - Lu category */ CharacterIdentifierStart,
320/* 217 - Lu category */ CharacterIdentifierStart,
321/* 218 - Lu category */ CharacterIdentifierStart,
322/* 219 - Lu category */ CharacterIdentifierStart,
323/* 220 - Lu category */ CharacterIdentifierStart,
324/* 221 - Lu category */ CharacterIdentifierStart,
325/* 222 - Lu category */ CharacterIdentifierStart,
326/* 223 - Ll category */ CharacterIdentifierStart,
327/* 224 - Ll category */ CharacterIdentifierStart,
328/* 225 - Ll category */ CharacterIdentifierStart,
329/* 226 - Ll category */ CharacterIdentifierStart,
330/* 227 - Ll category */ CharacterIdentifierStart,
331/* 228 - Ll category */ CharacterIdentifierStart,
332/* 229 - Ll category */ CharacterIdentifierStart,
333/* 230 - Ll category */ CharacterIdentifierStart,
334/* 231 - Ll category */ CharacterIdentifierStart,
335/* 232 - Ll category */ CharacterIdentifierStart,
336/* 233 - Ll category */ CharacterIdentifierStart,
337/* 234 - Ll category */ CharacterIdentifierStart,
338/* 235 - Ll category */ CharacterIdentifierStart,
339/* 236 - Ll category */ CharacterIdentifierStart,
340/* 237 - Ll category */ CharacterIdentifierStart,
341/* 238 - Ll category */ CharacterIdentifierStart,
342/* 239 - Ll category */ CharacterIdentifierStart,
343/* 240 - Ll category */ CharacterIdentifierStart,
344/* 241 - Ll category */ CharacterIdentifierStart,
345/* 242 - Ll category */ CharacterIdentifierStart,
346/* 243 - Ll category */ CharacterIdentifierStart,
347/* 244 - Ll category */ CharacterIdentifierStart,
348/* 245 - Ll category */ CharacterIdentifierStart,
349/* 246 - Ll category */ CharacterIdentifierStart,
350/* 247 - Sm category */ CharacterInvalid,
351/* 248 - Ll category */ CharacterIdentifierStart,
352/* 249 - Ll category */ CharacterIdentifierStart,
353/* 250 - Ll category */ CharacterIdentifierStart,
354/* 251 - Ll category */ CharacterIdentifierStart,
355/* 252 - Ll category */ CharacterIdentifierStart,
356/* 253 - Ll category */ CharacterIdentifierStart,
357/* 254 - Ll category */ CharacterIdentifierStart,
358/* 255 - Ll category */ CharacterIdentifierStart
359};
360
361// This table provides the character that results from \X where X is the index in the table beginning
362// with SPACE. A table value of 0 means that more processing needs to be done.
363static constexpr const LChar singleCharacterEscapeValuesForASCII[128] = {
364/* 0 - Null */ 0,
365/* 1 - Start of Heading */ 0,
366/* 2 - Start of Text */ 0,
367/* 3 - End of Text */ 0,
368/* 4 - End of Transm. */ 0,
369/* 5 - Enquiry */ 0,
370/* 6 - Acknowledgment */ 0,
371/* 7 - Bell */ 0,
372/* 8 - Back Space */ 0,
373/* 9 - Horizontal Tab */ 0,
374/* 10 - Line Feed */ 0,
375/* 11 - Vertical Tab */ 0,
376/* 12 - Form Feed */ 0,
377/* 13 - Carriage Return */ 0,
378/* 14 - Shift Out */ 0,
379/* 15 - Shift In */ 0,
380/* 16 - Data Line Escape */ 0,
381/* 17 - Device Control 1 */ 0,
382/* 18 - Device Control 2 */ 0,
383/* 19 - Device Control 3 */ 0,
384/* 20 - Device Control 4 */ 0,
385/* 21 - Negative Ack. */ 0,
386/* 22 - Synchronous Idle */ 0,
387/* 23 - End of Transmit */ 0,
388/* 24 - Cancel */ 0,
389/* 25 - End of Medium */ 0,
390/* 26 - Substitute */ 0,
391/* 27 - Escape */ 0,
392/* 28 - File Separator */ 0,
393/* 29 - Group Separator */ 0,
394/* 30 - Record Separator */ 0,
395/* 31 - Unit Separator */ 0,
396/* 32 - Space */ ' ',
397/* 33 - ! */ '!',
398/* 34 - " */ '"',
399/* 35 - # */ '#',
400/* 36 - $ */ '$',
401/* 37 - % */ '%',
402/* 38 - & */ '&',
403/* 39 - ' */ '\'',
404/* 40 - ( */ '(',
405/* 41 - ) */ ')',
406/* 42 - * */ '*',
407/* 43 - + */ '+',
408/* 44 - , */ ',',
409/* 45 - - */ '-',
410/* 46 - . */ '.',
411/* 47 - / */ '/',
412/* 48 - 0 */ 0,
413/* 49 - 1 */ 0,
414/* 50 - 2 */ 0,
415/* 51 - 3 */ 0,
416/* 52 - 4 */ 0,
417/* 53 - 5 */ 0,
418/* 54 - 6 */ 0,
419/* 55 - 7 */ 0,
420/* 56 - 8 */ 0,
421/* 57 - 9 */ 0,
422/* 58 - : */ ':',
423/* 59 - ; */ ';',
424/* 60 - < */ '<',
425/* 61 - = */ '=',
426/* 62 - > */ '>',
427/* 63 - ? */ '?',
428/* 64 - @ */ '@',
429/* 65 - A */ 'A',
430/* 66 - B */ 'B',
431/* 67 - C */ 'C',
432/* 68 - D */ 'D',
433/* 69 - E */ 'E',
434/* 70 - F */ 'F',
435/* 71 - G */ 'G',
436/* 72 - H */ 'H',
437/* 73 - I */ 'I',
438/* 74 - J */ 'J',
439/* 75 - K */ 'K',
440/* 76 - L */ 'L',
441/* 77 - M */ 'M',
442/* 78 - N */ 'N',
443/* 79 - O */ 'O',
444/* 80 - P */ 'P',
445/* 81 - Q */ 'Q',
446/* 82 - R */ 'R',
447/* 83 - S */ 'S',
448/* 84 - T */ 'T',
449/* 85 - U */ 'U',
450/* 86 - V */ 'V',
451/* 87 - W */ 'W',
452/* 88 - X */ 'X',
453/* 89 - Y */ 'Y',
454/* 90 - Z */ 'Z',
455/* 91 - [ */ '[',
456/* 92 - \ */ '\\',
457/* 93 - ] */ ']',
458/* 94 - ^ */ '^',
459/* 95 - _ */ '_',
460/* 96 - ` */ '`',
461/* 97 - a */ 'a',
462/* 98 - b */ 0x08,
463/* 99 - c */ 'c',
464/* 100 - d */ 'd',
465/* 101 - e */ 'e',
466/* 102 - f */ 0x0C,
467/* 103 - g */ 'g',
468/* 104 - h */ 'h',
469/* 105 - i */ 'i',
470/* 106 - j */ 'j',
471/* 107 - k */ 'k',
472/* 108 - l */ 'l',
473/* 109 - m */ 'm',
474/* 110 - n */ 0x0A,
475/* 111 - o */ 'o',
476/* 112 - p */ 'p',
477/* 113 - q */ 'q',
478/* 114 - r */ 0x0D,
479/* 115 - s */ 's',
480/* 116 - t */ 0x09,
481/* 117 - u */ 0,
482/* 118 - v */ 0x0B,
483/* 119 - w */ 'w',
484/* 120 - x */ 0,
485/* 121 - y */ 'y',
486/* 122 - z */ 'z',
487/* 123 - { */ '{',
488/* 124 - | */ '|',
489/* 125 - } */ '}',
490/* 126 - ~ */ '~',
491/* 127 - Delete */ 0
492};
493
494template <typename T>
495Lexer<T>::Lexer(VM* vm, JSParserBuiltinMode builtinMode, JSParserScriptMode scriptMode)
496 : m_isReparsingFunction(false)
497 , m_vm(vm)
498 , m_parsingBuiltinFunction(builtinMode == JSParserBuiltinMode::Builtin)
499 , m_scriptMode(scriptMode)
500{
501}
502
503static inline JSTokenType tokenTypeForIntegerLikeToken(double doubleValue)
504{
505 if ((doubleValue || !std::signbit(doubleValue)) && static_cast<int64_t>(doubleValue) == doubleValue)
506 return INTEGER;
507 return DOUBLE;
508}
509
510template <typename T>
511Lexer<T>::~Lexer()
512{
513}
514
515template <typename T>
516String Lexer<T>::invalidCharacterMessage() const
517{
518 switch (m_current) {
519 case 0:
520 return "Invalid character: '\\0'"_s;
521 case 10:
522 return "Invalid character: '\\n'"_s;
523 case 11:
524 return "Invalid character: '\\v'"_s;
525 case 13:
526 return "Invalid character: '\\r'"_s;
527 case 35:
528 return "Invalid character: '#'"_s;
529 case 64:
530 return "Invalid character: '@'"_s;
531 case 96:
532 return "Invalid character: '`'"_s;
533 default:
534 return makeString("Invalid character '\\u", hex(m_current, 4, Lowercase), '\'');
535 }
536}
537
538template <typename T>
539ALWAYS_INLINE const T* Lexer<T>::currentSourcePtr() const
540{
541 ASSERT(m_code <= m_codeEnd);
542 return m_code;
543}
544
545template <typename T>
546void Lexer<T>::setCode(const SourceCode& source, ParserArena* arena)
547{
548 m_arena = &arena->identifierArena();
549
550 m_lineNumber = source.firstLine().oneBasedInt();
551 m_lastToken = -1;
552
553 StringView sourceString = source.provider()->source();
554
555 if (!sourceString.isNull())
556 setCodeStart(sourceString);
557 else
558 m_codeStart = 0;
559
560 m_source = &source;
561 m_sourceOffset = source.startOffset();
562 m_codeStartPlusOffset = m_codeStart + source.startOffset();
563 m_code = m_codeStartPlusOffset;
564 m_codeEnd = m_codeStart + source.endOffset();
565 m_error = false;
566 m_atLineStart = true;
567 m_lineStart = m_code;
568 m_lexErrorMessage = String();
569 m_sourceURLDirective = String();
570 m_sourceMappingURLDirective = String();
571
572 m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
573 m_buffer16.reserveInitialCapacity(initialReadBufferCapacity);
574 m_bufferForRawTemplateString16.reserveInitialCapacity(initialReadBufferCapacity);
575
576 if (LIKELY(m_code < m_codeEnd))
577 m_current = *m_code;
578 else
579 m_current = 0;
580 ASSERT(currentOffset() == source.startOffset());
581}
582
583template <typename T>
584template <int shiftAmount> ALWAYS_INLINE void Lexer<T>::internalShift()
585{
586 m_code += shiftAmount;
587 ASSERT(currentOffset() >= currentLineStartOffset());
588 m_current = *m_code;
589}
590
591template <typename T>
592ALWAYS_INLINE void Lexer<T>::shift()
593{
594 // At one point timing showed that setting m_current to 0 unconditionally was faster than an if-else sequence.
595 m_current = 0;
596 ++m_code;
597 if (LIKELY(m_code < m_codeEnd))
598 m_current = *m_code;
599}
600
601template <typename T>
602ALWAYS_INLINE bool Lexer<T>::atEnd() const
603{
604 ASSERT(!m_current || m_code < m_codeEnd);
605 return UNLIKELY(UNLIKELY(!m_current) && m_code == m_codeEnd);
606}
607
608template <typename T>
609ALWAYS_INLINE T Lexer<T>::peek(int offset) const
610{
611 ASSERT(offset > 0 && offset < 5);
612 const T* code = m_code + offset;
613 return (code < m_codeEnd) ? *code : 0;
614}
615
616struct ParsedUnicodeEscapeValue {
617 ParsedUnicodeEscapeValue(UChar32 value)
618 : m_value(value)
619 {
620 ASSERT(isValid());
621 }
622
623 enum SpecialValueType { Incomplete = -2, Invalid = -1 };
624 ParsedUnicodeEscapeValue(SpecialValueType type)
625 : m_value(type)
626 {
627 }
628
629 bool isValid() const { return m_value >= 0; }
630 bool isIncomplete() const { return m_value == Incomplete; }
631
632 UChar32 value() const
633 {
634 ASSERT(isValid());
635 return m_value;
636 }
637
638private:
639 UChar32 m_value;
640};
641
642template<typename CharacterType>
643ParsedUnicodeEscapeValue Lexer<CharacterType>::parseUnicodeEscape()
644{
645 if (m_current == '{') {
646 shift();
647 UChar32 codePoint = 0;
648 do {
649 if (!isASCIIHexDigit(m_current))
650 return m_current ? ParsedUnicodeEscapeValue::Invalid : ParsedUnicodeEscapeValue::Incomplete;
651 codePoint = (codePoint << 4) | toASCIIHexValue(m_current);
652 if (codePoint > UCHAR_MAX_VALUE) {
653 // For raw template literal syntax, we consume `NotEscapeSequence`.
654 // Here, we consume NotCodePoint's HexDigits.
655 //
656 // NotEscapeSequence ::
657 // u { [lookahread not one of HexDigit]
658 // u { NotCodePoint
659 // u { CodePoint [lookahead != }]
660 //
661 // NotCodePoint ::
662 // HexDigits but not if MV of HexDigits <= 0x10FFFF
663 //
664 // CodePoint ::
665 // HexDigits but not if MV of HexDigits > 0x10FFFF
666 shift();
667 while (isASCIIHexDigit(m_current))
668 shift();
669
670 return atEnd() ? ParsedUnicodeEscapeValue::Incomplete : ParsedUnicodeEscapeValue::Invalid;
671 }
672 shift();
673 } while (m_current != '}');
674 shift();
675 return codePoint;
676 }
677
678 auto character2 = peek(1);
679 auto character3 = peek(2);
680 auto character4 = peek(3);
681 if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(character2) || !isASCIIHexDigit(character3) || !isASCIIHexDigit(character4))) {
682 auto result = (m_code + 4) >= m_codeEnd ? ParsedUnicodeEscapeValue::Incomplete : ParsedUnicodeEscapeValue::Invalid;
683
684 // For raw template literal syntax, we consume `NotEscapeSequence`.
685 //
686 // NotEscapeSequence ::
687 // u [lookahead not one of HexDigit][lookahead != {]
688 // u HexDigit [lookahead not one of HexDigit]
689 // u HexDigit HexDigit [lookahead not one of HexDigit]
690 // u HexDigit HexDigit HexDigit [lookahead not one of HexDigit]
691 while (isASCIIHexDigit(m_current))
692 shift();
693
694 return result;
695 }
696
697 auto result = convertUnicode(m_current, character2, character3, character4);
698 shift();
699 shift();
700 shift();
701 shift();
702 return result;
703}
704
705template <typename T>
706void Lexer<T>::shiftLineTerminator()
707{
708 ASSERT(isLineTerminator(m_current));
709
710 m_positionBeforeLastNewline = currentPosition();
711 T prev = m_current;
712 shift();
713
714 if (prev == '\r' && m_current == '\n')
715 shift();
716
717 ++m_lineNumber;
718}
719
720template <typename T>
721ALWAYS_INLINE bool Lexer<T>::lastTokenWasRestrKeyword() const
722{
723 return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW;
724}
725
726template <typename T>
727ALWAYS_INLINE void Lexer<T>::skipWhitespace()
728{
729 while (isWhiteSpace(m_current))
730 shift();
731}
732
733static NEVER_INLINE bool isNonLatin1IdentStart(UChar c)
734{
735 return u_hasBinaryProperty(c, UCHAR_ID_START);
736}
737
738static inline bool isIdentStart(LChar c)
739{
740 return typesOfLatin1Characters[c] == CharacterIdentifierStart;
741}
742
743static inline bool isIdentStart(UChar32 c)
744{
745 return isLatin1(c) ? isIdentStart(static_cast<LChar>(c)) : isNonLatin1IdentStart(c);
746}
747
748static NEVER_INLINE bool isNonLatin1IdentPart(UChar32 c)
749{
750 return u_hasBinaryProperty(c, UCHAR_ID_CONTINUE) || c == 0x200C || c == 0x200D;
751}
752
753static ALWAYS_INLINE bool isIdentPart(LChar c)
754{
755 // Character types are divided into two groups depending on whether they can be part of an
756 // identifier or not. Those whose type value is less or equal than CharacterOtherIdentifierPart can be
757 // part of an identifier. (See the CharacterType definition for more details.)
758 return typesOfLatin1Characters[c] <= CharacterOtherIdentifierPart;
759}
760
761static ALWAYS_INLINE bool isIdentPart(UChar32 c)
762{
763 return isLatin1(c) ? isIdentPart(static_cast<LChar>(c)) : isNonLatin1IdentPart(c);
764}
765
766static ALWAYS_INLINE bool isIdentPart(UChar c)
767{
768 return isIdentPart(static_cast<UChar32>(c));
769}
770
771template<typename CharacterType> ALWAYS_INLINE bool isIdentPartIncludingEscapeTemplate(const CharacterType* code, const CharacterType* codeEnd)
772{
773 if (isIdentPart(code[0]))
774 return true;
775
776 // Shortest sequence handled below is \u{0}, which is 5 characters.
777 if (!(code[0] == '\\' && codeEnd - code >= 5 && code[1] == 'u'))
778 return false;
779
780 if (code[2] == '{') {
781 UChar32 codePoint = 0;
782 const CharacterType* pointer;
783 for (pointer = &code[3]; pointer < codeEnd; ++pointer) {
784 auto digit = *pointer;
785 if (!isASCIIHexDigit(digit))
786 break;
787 codePoint = (codePoint << 4) | toASCIIHexValue(digit);
788 if (codePoint > UCHAR_MAX_VALUE)
789 return false;
790 }
791 return isIdentPart(codePoint) && pointer < codeEnd && *pointer == '}';
792 }
793
794 // Shortest sequence handled below is \uXXXX, which is 6 characters.
795 if (codeEnd - code < 6)
796 return false;
797
798 auto character1 = code[2];
799 auto character2 = code[3];
800 auto character3 = code[4];
801 auto character4 = code[5];
802 return isASCIIHexDigit(character1) && isASCIIHexDigit(character2) && isASCIIHexDigit(character3) && isASCIIHexDigit(character4)
803 && isIdentPart(Lexer<LChar>::convertUnicode(character1, character2, character3, character4));
804}
805
806static ALWAYS_INLINE bool isIdentPartIncludingEscape(const LChar* code, const LChar* codeEnd)
807{
808 return isIdentPartIncludingEscapeTemplate(code, codeEnd);
809}
810
811static ALWAYS_INLINE bool isIdentPartIncludingEscape(const UChar* code, const UChar* codeEnd)
812{
813 return isIdentPartIncludingEscapeTemplate(code, codeEnd);
814}
815
816template<typename CharacterType>
817static inline bool isASCIIDigitOrSeparator(CharacterType character)
818{
819 return isASCIIDigit(character) || character == '_';
820}
821
822template<typename CharacterType>
823static inline bool isASCIIHexDigitOrSeparator(CharacterType character)
824{
825 return isASCIIHexDigit(character) || character == '_';
826}
827
828template<typename CharacterType>
829static inline bool isASCIIBinaryDigitOrSeparator(CharacterType character)
830{
831 return isASCIIBinaryDigit(character) || character == '_';
832}
833
834template<typename CharacterType>
835static inline bool isASCIIOctalDigitOrSeparator(CharacterType character)
836{
837 return isASCIIOctalDigit(character) || character == '_';
838}
839
840static inline LChar singleEscape(int c)
841{
842 if (c < 128) {
843 ASSERT(static_cast<size_t>(c) < WTF_ARRAY_LENGTH(singleCharacterEscapeValuesForASCII));
844 return singleCharacterEscapeValuesForASCII[c];
845 }
846 return 0;
847}
848
849template <typename T>
850inline void Lexer<T>::record8(int c)
851{
852 ASSERT(c >= 0);
853 ASSERT(c <= 0xFF);
854 m_buffer8.append(static_cast<LChar>(c));
855}
856
857template <typename T>
858inline void assertCharIsIn8BitRange(T c)
859{
860 UNUSED_PARAM(c);
861 ASSERT(c >= 0);
862 ASSERT(c <= 0xFF);
863}
864
865template <>
866inline void assertCharIsIn8BitRange(UChar c)
867{
868 UNUSED_PARAM(c);
869 ASSERT(c <= 0xFF);
870}
871
872template <>
873inline void assertCharIsIn8BitRange(LChar)
874{
875}
876
877template <typename T>
878inline void Lexer<T>::append8(const T* p, size_t length)
879{
880 size_t currentSize = m_buffer8.size();
881 m_buffer8.grow(currentSize + length);
882 LChar* rawBuffer = m_buffer8.data() + currentSize;
883
884 for (size_t i = 0; i < length; i++) {
885 T c = p[i];
886 assertCharIsIn8BitRange(c);
887 rawBuffer[i] = c;
888 }
889}
890
891template <typename T>
892inline void Lexer<T>::append16(const LChar* p, size_t length)
893{
894 size_t currentSize = m_buffer16.size();
895 m_buffer16.grow(currentSize + length);
896 UChar* rawBuffer = m_buffer16.data() + currentSize;
897
898 for (size_t i = 0; i < length; i++)
899 rawBuffer[i] = p[i];
900}
901
902template <typename T>
903inline void Lexer<T>::record16(T c)
904{
905 m_buffer16.append(c);
906}
907
908template <typename T>
909inline void Lexer<T>::record16(int c)
910{
911 ASSERT(c >= 0);
912 ASSERT(c <= static_cast<int>(USHRT_MAX));
913 m_buffer16.append(static_cast<UChar>(c));
914}
915
916template<typename CharacterType> inline void Lexer<CharacterType>::recordUnicodeCodePoint(UChar32 codePoint)
917{
918 ASSERT(codePoint >= 0);
919 ASSERT(codePoint <= UCHAR_MAX_VALUE);
920 if (U_IS_BMP(codePoint))
921 record16(codePoint);
922 else {
923 UChar codeUnits[2] = { U16_LEAD(codePoint), U16_TRAIL(codePoint) };
924 append16(codeUnits, 2);
925 }
926}
927
928#if !ASSERT_DISABLED
929bool isSafeBuiltinIdentifier(VM& vm, const Identifier* ident)
930{
931 if (!ident)
932 return true;
933 /* Just block any use of suspicious identifiers. This is intended to
934 * be used as a safety net while implementing builtins.
935 */
936 // FIXME: How can a debug-only assertion be a safety net?
937 if (*ident == vm.propertyNames->builtinNames().callPublicName())
938 return false;
939 if (*ident == vm.propertyNames->builtinNames().applyPublicName())
940 return false;
941 if (*ident == vm.propertyNames->eval)
942 return false;
943 if (*ident == vm.propertyNames->Function)
944 return false;
945 return true;
946}
947#endif
948
949template <>
950template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<LChar>::parseIdentifier(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
951{
952 tokenData->escaped = false;
953 const ptrdiff_t remaining = m_codeEnd - m_code;
954 if ((remaining >= maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) {
955 JSTokenType keyword = parseKeyword<shouldCreateIdentifier>(tokenData);
956 if (keyword != IDENT) {
957 ASSERT((!shouldCreateIdentifier) || tokenData->ident);
958 return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword;
959 }
960 }
961
962 bool isPrivateName = m_current == '@' && m_parsingBuiltinFunction;
963 if (isPrivateName)
964 shift();
965
966 const LChar* identifierStart = currentSourcePtr();
967 unsigned identifierLineStart = currentLineStartOffset();
968
969 while (isIdentPart(m_current))
970 shift();
971
972 if (UNLIKELY(m_current == '\\')) {
973 setOffsetFromSourcePtr(identifierStart, identifierLineStart);
974 return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode);
975 }
976
977 const Identifier* ident = nullptr;
978
979 if (shouldCreateIdentifier || m_parsingBuiltinFunction) {
980 int identifierLength = currentSourcePtr() - identifierStart;
981 ident = makeIdentifier(identifierStart, identifierLength);
982 if (m_parsingBuiltinFunction) {
983 if (!isSafeBuiltinIdentifier(*m_vm, ident) && !isPrivateName) {
984 m_lexErrorMessage = makeString("The use of '", ident->string(), "' is disallowed in builtin functions.");
985 return ERRORTOK;
986 }
987 if (isPrivateName)
988 ident = &m_arena->makeIdentifier(m_vm, m_vm->propertyNames->lookUpPrivateName(*ident));
989 else if (*ident == m_vm->propertyNames->undefinedKeyword)
990 tokenData->ident = &m_vm->propertyNames->undefinedPrivateName;
991 if (!ident)
992 return INVALID_PRIVATE_NAME_ERRORTOK;
993 }
994 tokenData->ident = ident;
995 } else
996 tokenData->ident = nullptr;
997
998 if (UNLIKELY((remaining < maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) && !isPrivateName) {
999 ASSERT(shouldCreateIdentifier);
1000 if (remaining < maxTokenLength) {
1001 const HashTableValue* entry = JSC::mainTable.entry(*ident);
1002 ASSERT((remaining < maxTokenLength) || !entry);
1003 if (!entry)
1004 return IDENT;
1005 JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
1006 return (token != RESERVED_IF_STRICT) || strictMode ? token : IDENT;
1007 }
1008 return IDENT;
1009 }
1010
1011 return IDENT;
1012}
1013
1014template <>
1015template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<UChar>::parseIdentifier(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
1016{
1017 tokenData->escaped = false;
1018 const ptrdiff_t remaining = m_codeEnd - m_code;
1019 if ((remaining >= maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) {
1020 JSTokenType keyword = parseKeyword<shouldCreateIdentifier>(tokenData);
1021 if (keyword != IDENT) {
1022 ASSERT((!shouldCreateIdentifier) || tokenData->ident);
1023 return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword;
1024 }
1025 }
1026
1027 bool isPrivateName = m_current == '@' && m_parsingBuiltinFunction;
1028 if (isPrivateName)
1029 shift();
1030
1031 const UChar* identifierStart = currentSourcePtr();
1032 int identifierLineStart = currentLineStartOffset();
1033
1034 UChar orAllChars = 0;
1035
1036 while (isIdentPart(m_current)) {
1037 orAllChars |= m_current;
1038 shift();
1039 }
1040
1041 if (UNLIKELY(m_current == '\\')) {
1042 ASSERT(!isPrivateName);
1043 setOffsetFromSourcePtr(identifierStart, identifierLineStart);
1044 return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode);
1045 }
1046
1047 bool isAll8Bit = false;
1048
1049 if (!(orAllChars & ~0xff))
1050 isAll8Bit = true;
1051
1052 const Identifier* ident = nullptr;
1053
1054 if (shouldCreateIdentifier || m_parsingBuiltinFunction) {
1055 int identifierLength = currentSourcePtr() - identifierStart;
1056 if (isAll8Bit)
1057 ident = makeIdentifierLCharFromUChar(identifierStart, identifierLength);
1058 else
1059 ident = makeIdentifier(identifierStart, identifierLength);
1060 if (m_parsingBuiltinFunction) {
1061 if (!isSafeBuiltinIdentifier(*m_vm, ident) && !isPrivateName) {
1062 m_lexErrorMessage = makeString("The use of '", ident->string(), "' is disallowed in builtin functions.");
1063 return ERRORTOK;
1064 }
1065 if (isPrivateName)
1066 ident = &m_arena->makeIdentifier(m_vm, m_vm->propertyNames->lookUpPrivateName(*ident));
1067 else if (*ident == m_vm->propertyNames->undefinedKeyword)
1068 tokenData->ident = &m_vm->propertyNames->undefinedPrivateName;
1069 if (!ident)
1070 return INVALID_PRIVATE_NAME_ERRORTOK;
1071 }
1072 tokenData->ident = ident;
1073 } else
1074 tokenData->ident = nullptr;
1075
1076 if (UNLIKELY((remaining < maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) && !isPrivateName) {
1077 ASSERT(shouldCreateIdentifier);
1078 if (remaining < maxTokenLength) {
1079 const HashTableValue* entry = JSC::mainTable.entry(*ident);
1080 ASSERT((remaining < maxTokenLength) || !entry);
1081 if (!entry)
1082 return IDENT;
1083 JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
1084 return (token != RESERVED_IF_STRICT) || strictMode ? token : IDENT;
1085 }
1086 return IDENT;
1087 }
1088
1089 return IDENT;
1090}
1091
1092template<typename CharacterType> template<bool shouldCreateIdentifier> JSTokenType Lexer<CharacterType>::parseIdentifierSlowCase(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
1093{
1094 tokenData->escaped = true;
1095 auto identifierStart = currentSourcePtr();
1096 bool bufferRequired = false;
1097
1098 while (true) {
1099 if (LIKELY(isIdentPart(m_current))) {
1100 shift();
1101 continue;
1102 }
1103 if (LIKELY(m_current != '\\'))
1104 break;
1105
1106 // \uXXXX unicode characters.
1107 bufferRequired = true;
1108 if (identifierStart != currentSourcePtr())
1109 m_buffer16.append(identifierStart, currentSourcePtr() - identifierStart);
1110 shift();
1111 if (UNLIKELY(m_current != 'u'))
1112 return atEnd() ? UNTERMINATED_IDENTIFIER_ESCAPE_ERRORTOK : INVALID_IDENTIFIER_ESCAPE_ERRORTOK;
1113 shift();
1114 auto character = parseUnicodeEscape();
1115 if (UNLIKELY(!character.isValid()))
1116 return character.isIncomplete() ? UNTERMINATED_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK : INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK;
1117 if (UNLIKELY(m_buffer16.size() ? !isIdentPart(character.value()) : !isIdentStart(character.value())))
1118 return INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK;
1119 if (shouldCreateIdentifier)
1120 recordUnicodeCodePoint(character.value());
1121 identifierStart = currentSourcePtr();
1122 }
1123
1124 int identifierLength;
1125 const Identifier* ident = nullptr;
1126 if (shouldCreateIdentifier) {
1127 if (!bufferRequired) {
1128 identifierLength = currentSourcePtr() - identifierStart;
1129 ident = makeIdentifier(identifierStart, identifierLength);
1130 } else {
1131 if (identifierStart != currentSourcePtr())
1132 m_buffer16.append(identifierStart, currentSourcePtr() - identifierStart);
1133 ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
1134 }
1135
1136 tokenData->ident = ident;
1137 } else
1138 tokenData->ident = nullptr;
1139
1140 m_buffer16.shrink(0);
1141
1142 if (LIKELY(!(lexerFlags & LexerFlagsIgnoreReservedWords))) {
1143 ASSERT(shouldCreateIdentifier);
1144 const HashTableValue* entry = JSC::mainTable.entry(*ident);
1145 if (!entry)
1146 return IDENT;
1147 JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
1148 if ((token != RESERVED_IF_STRICT) || strictMode)
1149 return bufferRequired ? UNEXPECTED_ESCAPE_ERRORTOK : token;
1150 }
1151
1152 return IDENT;
1153}
1154
1155static ALWAYS_INLINE bool characterRequiresParseStringSlowCase(LChar character)
1156{
1157 return character < 0xE;
1158}
1159
1160static ALWAYS_INLINE bool characterRequiresParseStringSlowCase(UChar character)
1161{
1162 return character < 0xE || character > 0xFF;
1163}
1164
1165template <typename T>
1166template <bool shouldBuildStrings> ALWAYS_INLINE typename Lexer<T>::StringParseResult Lexer<T>::parseString(JSTokenData* tokenData, bool strictMode)
1167{
1168 int startingOffset = currentOffset();
1169 int startingLineStartOffset = currentLineStartOffset();
1170 int startingLineNumber = lineNumber();
1171 T stringQuoteCharacter = m_current;
1172 shift();
1173
1174 const T* stringStart = currentSourcePtr();
1175
1176 while (m_current != stringQuoteCharacter) {
1177 if (UNLIKELY(m_current == '\\')) {
1178 if (stringStart != currentSourcePtr() && shouldBuildStrings)
1179 append8(stringStart, currentSourcePtr() - stringStart);
1180 shift();
1181
1182 LChar escape = singleEscape(m_current);
1183
1184 // Most common escape sequences first.
1185 if (escape) {
1186 if (shouldBuildStrings)
1187 record8(escape);
1188 shift();
1189 } else if (UNLIKELY(isLineTerminator(m_current)))
1190 shiftLineTerminator();
1191 else if (m_current == 'x') {
1192 shift();
1193 if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(peek(1))) {
1194 m_lexErrorMessage = "\\x can only be followed by a hex character sequence"_s;
1195 return (atEnd() || (isASCIIHexDigit(m_current) && (m_code + 1 == m_codeEnd))) ? StringUnterminated : StringCannotBeParsed;
1196 }
1197 T prev = m_current;
1198 shift();
1199 if (shouldBuildStrings)
1200 record8(convertHex(prev, m_current));
1201 shift();
1202 } else {
1203 setOffset(startingOffset, startingLineStartOffset);
1204 setLineNumber(startingLineNumber);
1205 m_buffer8.shrink(0);
1206 return parseStringSlowCase<shouldBuildStrings>(tokenData, strictMode);
1207 }
1208 stringStart = currentSourcePtr();
1209 continue;
1210 }
1211
1212 if (UNLIKELY(characterRequiresParseStringSlowCase(m_current))) {
1213 setOffset(startingOffset, startingLineStartOffset);
1214 setLineNumber(startingLineNumber);
1215 m_buffer8.shrink(0);
1216 return parseStringSlowCase<shouldBuildStrings>(tokenData, strictMode);
1217 }
1218
1219 shift();
1220 }
1221
1222 if (currentSourcePtr() != stringStart && shouldBuildStrings)
1223 append8(stringStart, currentSourcePtr() - stringStart);
1224 if (shouldBuildStrings) {
1225 tokenData->ident = makeIdentifier(m_buffer8.data(), m_buffer8.size());
1226 m_buffer8.shrink(0);
1227 } else
1228 tokenData->ident = 0;
1229
1230 return StringParsedSuccessfully;
1231}
1232
1233template <typename T>
1234template <bool shouldBuildStrings, LexerEscapeParseMode escapeParseMode> ALWAYS_INLINE auto Lexer<T>::parseComplexEscape(bool strictMode, T stringQuoteCharacter) -> StringParseResult
1235{
1236 if (m_current == 'x') {
1237 shift();
1238 if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(peek(1))) {
1239 // For raw template literal syntax, we consume `NotEscapeSequence`.
1240 //
1241 // NotEscapeSequence ::
1242 // x [lookahread not one of HexDigit]
1243 // x HexDigit [lookahread not one of HexDigit]
1244 if (isASCIIHexDigit(m_current))
1245 shift();
1246 ASSERT(!isASCIIHexDigit(m_current));
1247
1248 m_lexErrorMessage = "\\x can only be followed by a hex character sequence"_s;
1249 return atEnd() ? StringUnterminated : StringCannotBeParsed;
1250 }
1251
1252 T prev = m_current;
1253 shift();
1254 if (shouldBuildStrings)
1255 record16(convertHex(prev, m_current));
1256 shift();
1257
1258 return StringParsedSuccessfully;
1259 }
1260
1261 if (m_current == 'u') {
1262 shift();
1263
1264 if (escapeParseMode == LexerEscapeParseMode::String && m_current == stringQuoteCharacter) {
1265 if (shouldBuildStrings)
1266 record16('u');
1267 return StringParsedSuccessfully;
1268 }
1269
1270 auto character = parseUnicodeEscape();
1271 if (character.isValid()) {
1272 if (shouldBuildStrings)
1273 recordUnicodeCodePoint(character.value());
1274 return StringParsedSuccessfully;
1275 }
1276
1277 m_lexErrorMessage = "\\u can only be followed by a Unicode character sequence"_s;
1278 return atEnd() ? StringUnterminated : StringCannotBeParsed;
1279 }
1280
1281 if (strictMode) {
1282 if (isASCIIDigit(m_current)) {
1283 // The only valid numeric escape in strict mode is '\0', and this must not be followed by a decimal digit.
1284 int character1 = m_current;
1285 shift();
1286 if (character1 != '0' || isASCIIDigit(m_current)) {
1287 // For raw template literal syntax, we consume `NotEscapeSequence`.
1288 //
1289 // NotEscapeSequence ::
1290 // 0 DecimalDigit
1291 // DecimalDigit but not 0
1292 if (character1 == '0')
1293 shift();
1294
1295 m_lexErrorMessage = "The only valid numeric escape in strict mode is '\\0'"_s;
1296 return atEnd() ? StringUnterminated : StringCannotBeParsed;
1297 }
1298 if (shouldBuildStrings)
1299 record16(0);
1300 return StringParsedSuccessfully;
1301 }
1302 } else {
1303 if (isASCIIOctalDigit(m_current)) {
1304 // Octal character sequences
1305 T character1 = m_current;
1306 shift();
1307 if (isASCIIOctalDigit(m_current)) {
1308 // Two octal characters
1309 T character2 = m_current;
1310 shift();
1311 if (character1 >= '0' && character1 <= '3' && isASCIIOctalDigit(m_current)) {
1312 if (shouldBuildStrings)
1313 record16((character1 - '0') * 64 + (character2 - '0') * 8 + m_current - '0');
1314 shift();
1315 } else {
1316 if (shouldBuildStrings)
1317 record16((character1 - '0') * 8 + character2 - '0');
1318 }
1319 } else {
1320 if (shouldBuildStrings)
1321 record16(character1 - '0');
1322 }
1323 return StringParsedSuccessfully;
1324 }
1325 }
1326
1327 if (!atEnd()) {
1328 if (shouldBuildStrings)
1329 record16(m_current);
1330 shift();
1331 return StringParsedSuccessfully;
1332 }
1333
1334 m_lexErrorMessage = "Unterminated string constant"_s;
1335 return StringUnterminated;
1336}
1337
1338template <typename T>
1339template <bool shouldBuildStrings> auto Lexer<T>::parseStringSlowCase(JSTokenData* tokenData, bool strictMode) -> StringParseResult
1340{
1341 T stringQuoteCharacter = m_current;
1342 shift();
1343
1344 const T* stringStart = currentSourcePtr();
1345
1346 while (m_current != stringQuoteCharacter) {
1347 if (UNLIKELY(m_current == '\\')) {
1348 if (stringStart != currentSourcePtr() && shouldBuildStrings)
1349 append16(stringStart, currentSourcePtr() - stringStart);
1350 shift();
1351
1352 LChar escape = singleEscape(m_current);
1353
1354 // Most common escape sequences first
1355 if (escape) {
1356 if (shouldBuildStrings)
1357 record16(escape);
1358 shift();
1359 } else if (UNLIKELY(isLineTerminator(m_current)))
1360 shiftLineTerminator();
1361 else {
1362 StringParseResult result = parseComplexEscape<shouldBuildStrings, LexerEscapeParseMode::String>(strictMode, stringQuoteCharacter);
1363 if (result != StringParsedSuccessfully)
1364 return result;
1365 }
1366
1367 stringStart = currentSourcePtr();
1368 continue;
1369 }
1370 // Fast check for characters that require special handling.
1371 // Catches 0, \n, and \r as efficiently as possible, and lets through all common ASCII characters.
1372 static_assert(std::is_unsigned<T>::value, "Lexer expects an unsigned character type");
1373 if (UNLIKELY(m_current < 0xE)) {
1374 // New-line or end of input is not allowed
1375 if (atEnd() || m_current == '\r' || m_current == '\n') {
1376 m_lexErrorMessage = "Unexpected EOF"_s;
1377 return atEnd() ? StringUnterminated : StringCannotBeParsed;
1378 }
1379 // Anything else is just a normal character
1380 }
1381 shift();
1382 }
1383
1384 if (currentSourcePtr() != stringStart && shouldBuildStrings)
1385 append16(stringStart, currentSourcePtr() - stringStart);
1386 if (shouldBuildStrings)
1387 tokenData->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
1388 else
1389 tokenData->ident = 0;
1390
1391 m_buffer16.shrink(0);
1392 return StringParsedSuccessfully;
1393}
1394
1395template <typename T>
1396typename Lexer<T>::StringParseResult Lexer<T>::parseTemplateLiteral(JSTokenData* tokenData, RawStringsBuildMode rawStringsBuildMode)
1397{
1398 bool parseCookedFailed = false;
1399 const T* stringStart = currentSourcePtr();
1400 const T* rawStringStart = currentSourcePtr();
1401
1402 while (m_current != '`') {
1403 if (UNLIKELY(m_current == '\\')) {
1404 if (stringStart != currentSourcePtr())
1405 append16(stringStart, currentSourcePtr() - stringStart);
1406 shift();
1407
1408 LChar escape = singleEscape(m_current);
1409
1410 // Most common escape sequences first.
1411 if (escape) {
1412 record16(escape);
1413 shift();
1414 } else if (UNLIKELY(isLineTerminator(m_current))) {
1415 // Normalize <CR>, <CR><LF> to <LF>.
1416 if (m_current == '\r') {
1417 ASSERT_WITH_MESSAGE(rawStringStart != currentSourcePtr(), "We should have at least shifted the escape.");
1418
1419 if (rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings) {
1420 m_bufferForRawTemplateString16.append(rawStringStart, currentSourcePtr() - rawStringStart);
1421 m_bufferForRawTemplateString16.append('\n');
1422 }
1423
1424 shiftLineTerminator();
1425 rawStringStart = currentSourcePtr();
1426 } else
1427 shiftLineTerminator();
1428 } else {
1429 bool strictMode = true;
1430 StringParseResult result = parseComplexEscape<true, LexerEscapeParseMode::Template>(strictMode, '`');
1431 if (result != StringParsedSuccessfully) {
1432 if (rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings && result == StringCannotBeParsed)
1433 parseCookedFailed = true;
1434 else
1435 return result;
1436 }
1437 }
1438
1439 stringStart = currentSourcePtr();
1440 continue;
1441 }
1442
1443 if (m_current == '$' && peek(1) == '{')
1444 break;
1445
1446 // Fast check for characters that require special handling.
1447 // Catches 0, \n, \r, 0x2028, and 0x2029 as efficiently
1448 // as possible, and lets through all common ASCII characters.
1449 if (UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
1450 // End of input is not allowed.
1451 // Unlike String, line terminator is allowed.
1452 if (atEnd()) {
1453 m_lexErrorMessage = "Unexpected EOF"_s;
1454 return StringUnterminated;
1455 }
1456
1457 if (isLineTerminator(m_current)) {
1458 if (m_current == '\r') {
1459 // Normalize <CR>, <CR><LF> to <LF>.
1460 if (stringStart != currentSourcePtr())
1461 append16(stringStart, currentSourcePtr() - stringStart);
1462 if (rawStringStart != currentSourcePtr() && rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
1463 m_bufferForRawTemplateString16.append(rawStringStart, currentSourcePtr() - rawStringStart);
1464
1465 record16('\n');
1466 if (rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
1467 m_bufferForRawTemplateString16.append('\n');
1468 shiftLineTerminator();
1469 stringStart = currentSourcePtr();
1470 rawStringStart = currentSourcePtr();
1471 } else
1472 shiftLineTerminator();
1473 continue;
1474 }
1475 // Anything else is just a normal character
1476 }
1477
1478 shift();
1479 }
1480
1481 bool isTail = m_current == '`';
1482
1483 if (currentSourcePtr() != stringStart)
1484 append16(stringStart, currentSourcePtr() - stringStart);
1485 if (rawStringStart != currentSourcePtr() && rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
1486 m_bufferForRawTemplateString16.append(rawStringStart, currentSourcePtr() - rawStringStart);
1487
1488 if (!parseCookedFailed)
1489 tokenData->cooked = makeIdentifier(m_buffer16.data(), m_buffer16.size());
1490 else
1491 tokenData->cooked = nullptr;
1492
1493 // Line terminator normalization (e.g. <CR> => <LF>) should be applied to both the raw and cooked representations.
1494 if (rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
1495 tokenData->raw = makeIdentifier(m_bufferForRawTemplateString16.data(), m_bufferForRawTemplateString16.size());
1496 else
1497 tokenData->raw = nullptr;
1498
1499 tokenData->isTail = isTail;
1500
1501 m_buffer16.shrink(0);
1502 m_bufferForRawTemplateString16.shrink(0);
1503
1504 if (isTail) {
1505 // Skip `
1506 shift();
1507 } else {
1508 // Skip $ and {
1509 shift();
1510 shift();
1511 }
1512
1513 return StringParsedSuccessfully;
1514}
1515
1516template <typename T>
1517ALWAYS_INLINE auto Lexer<T>::parseHex() -> Optional<NumberParseResult>
1518{
1519 ASSERT(isASCIIHexDigit(m_current));
1520
1521 // Optimization: most hexadecimal values fit into 4 bytes.
1522 uint32_t hexValue = 0;
1523 int maximumDigits = 7;
1524
1525 do {
1526 if (m_current == '_') {
1527 if (UNLIKELY(!isASCIIHexDigit(peek(1))))
1528 return WTF::nullopt;
1529
1530 shift();
1531 }
1532
1533 hexValue = (hexValue << 4) + toASCIIHexValue(m_current);
1534 shift();
1535 --maximumDigits;
1536 } while (isASCIIHexDigitOrSeparator(m_current) && maximumDigits >= 0);
1537
1538 if (LIKELY(maximumDigits >= 0 && m_current != 'n'))
1539 return NumberParseResult { hexValue };
1540
1541 // No more place in the hexValue buffer.
1542 // The values are shifted out and placed into the m_buffer8 vector.
1543 for (int i = 0; i < 8; ++i) {
1544 int digit = hexValue >> 28;
1545 if (digit < 10)
1546 record8(digit + '0');
1547 else
1548 record8(digit - 10 + 'a');
1549 hexValue <<= 4;
1550 }
1551
1552 while (isASCIIHexDigitOrSeparator(m_current)) {
1553 if (m_current == '_') {
1554 if (UNLIKELY(!isASCIIHexDigit(peek(1))))
1555 return WTF::nullopt;
1556
1557 shift();
1558 }
1559
1560 record8(m_current);
1561 shift();
1562 }
1563
1564 if (UNLIKELY(Options::useBigInt() && m_current == 'n'))
1565 return NumberParseResult { makeIdentifier(m_buffer8.data(), m_buffer8.size()) };
1566
1567 return NumberParseResult { parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 16) };
1568}
1569
1570template <typename T>
1571ALWAYS_INLINE auto Lexer<T>::parseBinary() -> Optional<NumberParseResult>
1572{
1573 ASSERT(isASCIIBinaryDigit(m_current));
1574
1575 // Optimization: most binary values fit into 4 bytes.
1576 uint32_t binaryValue = 0;
1577 const unsigned maximumDigits = 32;
1578 int digit = maximumDigits - 1;
1579 // Temporary buffer for the digits. Makes easier
1580 // to reconstruct the input characters when needed.
1581 LChar digits[maximumDigits];
1582
1583 do {
1584 if (m_current == '_') {
1585 if (UNLIKELY(!isASCIIBinaryDigit(peek(1))))
1586 return WTF::nullopt;
1587
1588 shift();
1589 }
1590
1591 binaryValue = (binaryValue << 1) + (m_current - '0');
1592 digits[digit] = m_current;
1593 shift();
1594 --digit;
1595 } while (isASCIIBinaryDigitOrSeparator(m_current) && digit >= 0);
1596
1597 if (LIKELY(!isASCIIDigitOrSeparator(m_current) && digit >= 0 && m_current != 'n'))
1598 return NumberParseResult { binaryValue };
1599
1600 for (int i = maximumDigits - 1; i > digit; --i)
1601 record8(digits[i]);
1602
1603 while (isASCIIBinaryDigitOrSeparator(m_current)) {
1604 if (m_current == '_') {
1605 if (UNLIKELY(!isASCIIBinaryDigit(peek(1))))
1606 return WTF::nullopt;
1607
1608 shift();
1609 }
1610
1611 record8(m_current);
1612 shift();
1613 }
1614
1615 if (UNLIKELY(Options::useBigInt() && m_current == 'n'))
1616 return NumberParseResult { makeIdentifier(m_buffer8.data(), m_buffer8.size()) };
1617
1618 if (isASCIIDigit(m_current))
1619 return WTF::nullopt;
1620
1621 return NumberParseResult { parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 2) };
1622}
1623
1624template <typename T>
1625ALWAYS_INLINE auto Lexer<T>::parseOctal() -> Optional<NumberParseResult>
1626{
1627 ASSERT(isASCIIOctalDigit(m_current));
1628
1629 // Optimization: most octal values fit into 4 bytes.
1630 uint32_t octalValue = 0;
1631 const unsigned maximumDigits = 10;
1632 int digit = maximumDigits - 1;
1633 // Temporary buffer for the digits. Makes easier
1634 // to reconstruct the input characters when needed.
1635 LChar digits[maximumDigits];
1636
1637 do {
1638 if (m_current == '_') {
1639 if (UNLIKELY(!isASCIIOctalDigit(peek(1))))
1640 return WTF::nullopt;
1641
1642 shift();
1643 }
1644
1645 octalValue = octalValue * 8 + (m_current - '0');
1646 digits[digit] = m_current;
1647 shift();
1648 --digit;
1649 } while (isASCIIOctalDigitOrSeparator(m_current) && digit >= 0);
1650
1651 if (LIKELY(!isASCIIDigitOrSeparator(m_current) && digit >= 0 && m_current != 'n'))
1652 return NumberParseResult { octalValue };
1653
1654 for (int i = maximumDigits - 1; i > digit; --i)
1655 record8(digits[i]);
1656
1657 while (isASCIIOctalDigitOrSeparator(m_current)) {
1658 if (m_current == '_') {
1659 if (UNLIKELY(!isASCIIOctalDigit(peek(1))))
1660 return WTF::nullopt;
1661
1662 shift();
1663 }
1664
1665 record8(m_current);
1666 shift();
1667 }
1668
1669 if (UNLIKELY(Options::useBigInt() && m_current == 'n'))
1670 return NumberParseResult { makeIdentifier(m_buffer8.data(), m_buffer8.size()) };
1671
1672 if (isASCIIDigit(m_current))
1673 return WTF::nullopt;
1674
1675 return NumberParseResult { parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 8) };
1676}
1677
1678template <typename T>
1679ALWAYS_INLINE auto Lexer<T>::parseDecimal() -> Optional<NumberParseResult>
1680{
1681 ASSERT(isASCIIDigit(m_current) || m_buffer8.size());
1682
1683 // Optimization: most decimal values fit into 4 bytes.
1684 uint32_t decimalValue = 0;
1685
1686 // Since parseOctal may be executed before parseDecimal,
1687 // the m_buffer8 may hold ascii digits.
1688 if (!m_buffer8.size()) {
1689 const unsigned maximumDigits = 10;
1690 int digit = maximumDigits - 1;
1691 // Temporary buffer for the digits. Makes easier
1692 // to reconstruct the input characters when needed.
1693 LChar digits[maximumDigits];
1694
1695 do {
1696 if (m_current == '_') {
1697 if (UNLIKELY(!isASCIIDigit(peek(1))))
1698 return WTF::nullopt;
1699
1700 shift();
1701 }
1702
1703 decimalValue = decimalValue * 10 + (m_current - '0');
1704 digits[digit] = m_current;
1705 shift();
1706 --digit;
1707 } while (isASCIIDigitOrSeparator(m_current) && digit >= 0);
1708
1709 if (digit >= 0 && m_current != '.' && !isASCIIAlphaCaselessEqual(m_current, 'e') && m_current != 'n')
1710 return NumberParseResult { decimalValue };
1711
1712 for (int i = maximumDigits - 1; i > digit; --i)
1713 record8(digits[i]);
1714 }
1715
1716 while (isASCIIDigitOrSeparator(m_current)) {
1717 if (m_current == '_') {
1718 if (UNLIKELY(!isASCIIDigit(peek(1))))
1719 return WTF::nullopt;
1720
1721 shift();
1722 }
1723
1724 record8(m_current);
1725 shift();
1726 }
1727
1728 if (UNLIKELY(Options::useBigInt() && m_current == 'n'))
1729 return NumberParseResult { makeIdentifier(m_buffer8.data(), m_buffer8.size()) };
1730
1731 return WTF::nullopt;
1732}
1733
1734template <typename T>
1735ALWAYS_INLINE bool Lexer<T>::parseNumberAfterDecimalPoint()
1736{
1737 ASSERT(isASCIIDigit(m_current));
1738 record8('.');
1739
1740 do {
1741 if (m_current == '_') {
1742 if (UNLIKELY(!isASCIIDigit(peek(1))))
1743 return false;
1744
1745 shift();
1746 }
1747
1748 record8(m_current);
1749 shift();
1750 } while (isASCIIDigitOrSeparator(m_current));
1751
1752 return true;
1753}
1754
1755template <typename T>
1756ALWAYS_INLINE bool Lexer<T>::parseNumberAfterExponentIndicator()
1757{
1758 record8('e');
1759 shift();
1760 if (m_current == '+' || m_current == '-') {
1761 record8(m_current);
1762 shift();
1763 }
1764
1765 if (!isASCIIDigit(m_current))
1766 return false;
1767
1768 do {
1769 if (m_current == '_') {
1770 if (UNLIKELY(!isASCIIDigit(peek(1))))
1771 return false;
1772
1773 shift();
1774 }
1775
1776 record8(m_current);
1777 shift();
1778 } while (isASCIIDigitOrSeparator(m_current));
1779
1780 return true;
1781}
1782
1783template <typename T>
1784ALWAYS_INLINE bool Lexer<T>::parseMultilineComment()
1785{
1786 while (true) {
1787 while (UNLIKELY(m_current == '*')) {
1788 shift();
1789 if (m_current == '/') {
1790 shift();
1791 return true;
1792 }
1793 }
1794
1795 if (atEnd())
1796 return false;
1797
1798 if (isLineTerminator(m_current)) {
1799 shiftLineTerminator();
1800 m_hasLineTerminatorBeforeToken = true;
1801 } else
1802 shift();
1803 }
1804}
1805
1806template <typename T>
1807ALWAYS_INLINE void Lexer<T>::parseCommentDirective()
1808{
1809 // sourceURL and sourceMappingURL directives.
1810 if (!consume("source"))
1811 return;
1812
1813 if (consume("URL=")) {
1814 m_sourceURLDirective = parseCommentDirectiveValue();
1815 return;
1816 }
1817
1818 if (consume("MappingURL=")) {
1819 m_sourceMappingURLDirective = parseCommentDirectiveValue();
1820 return;
1821 }
1822}
1823
1824template <typename T>
1825ALWAYS_INLINE String Lexer<T>::parseCommentDirectiveValue()
1826{
1827 skipWhitespace();
1828 const T* stringStart = currentSourcePtr();
1829 while (!isWhiteSpace(m_current) && !isLineTerminator(m_current) && m_current != '"' && m_current != '\'' && !atEnd())
1830 shift();
1831 const T* stringEnd = currentSourcePtr();
1832 skipWhitespace();
1833
1834 if (!isLineTerminator(m_current) && !atEnd())
1835 return String();
1836
1837 append8(stringStart, stringEnd - stringStart);
1838 String result = String(m_buffer8.data(), m_buffer8.size());
1839 m_buffer8.shrink(0);
1840 return result;
1841}
1842
1843template <typename T>
1844template <unsigned length>
1845ALWAYS_INLINE bool Lexer<T>::consume(const char (&input)[length])
1846{
1847 unsigned lengthToCheck = length - 1; // Ignore the ending NULL byte in the string literal.
1848
1849 unsigned i = 0;
1850 for (; i < lengthToCheck && m_current == input[i]; i++)
1851 shift();
1852
1853 return i == lengthToCheck;
1854}
1855
1856template <typename T>
1857bool Lexer<T>::nextTokenIsColon()
1858{
1859 const T* code = m_code;
1860 while (code < m_codeEnd && (isWhiteSpace(*code) || isLineTerminator(*code)))
1861 code++;
1862
1863 return code < m_codeEnd && *code == ':';
1864}
1865
1866template <typename T>
1867void Lexer<T>::fillTokenInfo(JSToken* tokenRecord, JSTokenType token, int lineNumber, int endOffset, int lineStartOffset, JSTextPosition endPosition)
1868{
1869 JSTokenLocation* tokenLocation = &tokenRecord->m_location;
1870 tokenLocation->line = lineNumber;
1871 tokenLocation->endOffset = endOffset;
1872 tokenLocation->lineStartOffset = lineStartOffset;
1873 ASSERT(tokenLocation->endOffset >= tokenLocation->lineStartOffset);
1874 tokenRecord->m_endPosition = endPosition;
1875 m_lastToken = token;
1876}
1877
1878template <typename T>
1879JSTokenType Lexer<T>::lexWithoutClearingLineTerminator(JSToken* tokenRecord, unsigned lexerFlags, bool strictMode)
1880{
1881 JSTokenData* tokenData = &tokenRecord->m_data;
1882 JSTokenLocation* tokenLocation = &tokenRecord->m_location;
1883 m_lastTokenLocation = JSTokenLocation(tokenRecord->m_location);
1884
1885 ASSERT(!m_error);
1886 ASSERT(m_buffer8.isEmpty());
1887 ASSERT(m_buffer16.isEmpty());
1888
1889 JSTokenType token = ERRORTOK;
1890
1891start:
1892 skipWhitespace();
1893
1894 tokenLocation->startOffset = currentOffset();
1895 ASSERT(currentOffset() >= currentLineStartOffset());
1896 tokenRecord->m_startPosition = currentPosition();
1897
1898 if (atEnd()) {
1899 token = EOFTOK;
1900 goto returnToken;
1901 }
1902
1903 CharacterType type;
1904 if (LIKELY(isLatin1(m_current)))
1905 type = static_cast<CharacterType>(typesOfLatin1Characters[m_current]);
1906 else if (isNonLatin1IdentStart(m_current))
1907 type = CharacterIdentifierStart;
1908 else if (isLineTerminator(m_current))
1909 type = CharacterLineTerminator;
1910 else
1911 type = CharacterInvalid;
1912
1913 switch (type) {
1914 case CharacterGreater:
1915 shift();
1916 if (m_current == '>') {
1917 shift();
1918 if (m_current == '>') {
1919 shift();
1920 if (m_current == '=') {
1921 shift();
1922 token = URSHIFTEQUAL;
1923 break;
1924 }
1925 token = URSHIFT;
1926 break;
1927 }
1928 if (m_current == '=') {
1929 shift();
1930 token = RSHIFTEQUAL;
1931 break;
1932 }
1933 token = RSHIFT;
1934 break;
1935 }
1936 if (m_current == '=') {
1937 shift();
1938 token = GE;
1939 break;
1940 }
1941 token = GT;
1942 break;
1943 case CharacterEqual: {
1944 if (peek(1) == '>') {
1945 token = ARROWFUNCTION;
1946 tokenData->line = lineNumber();
1947 tokenData->offset = currentOffset();
1948 tokenData->lineStartOffset = currentLineStartOffset();
1949 ASSERT(tokenData->offset >= tokenData->lineStartOffset);
1950 shift();
1951 shift();
1952 break;
1953 }
1954
1955 shift();
1956 if (m_current == '=') {
1957 shift();
1958 if (m_current == '=') {
1959 shift();
1960 token = STREQ;
1961 break;
1962 }
1963 token = EQEQ;
1964 break;
1965 }
1966 token = EQUAL;
1967 break;
1968 }
1969 case CharacterLess:
1970 shift();
1971 if (m_current == '!' && peek(1) == '-' && peek(2) == '-') {
1972 if (m_scriptMode == JSParserScriptMode::Classic) {
1973 // <!-- marks the beginning of a line comment (for www usage)
1974 goto inSingleLineComment;
1975 }
1976 }
1977 if (m_current == '<') {
1978 shift();
1979 if (m_current == '=') {
1980 shift();
1981 token = LSHIFTEQUAL;
1982 break;
1983 }
1984 token = LSHIFT;
1985 break;
1986 }
1987 if (m_current == '=') {
1988 shift();
1989 token = LE;
1990 break;
1991 }
1992 token = LT;
1993 break;
1994 case CharacterExclamationMark:
1995 shift();
1996 if (m_current == '=') {
1997 shift();
1998 if (m_current == '=') {
1999 shift();
2000 token = STRNEQ;
2001 break;
2002 }
2003 token = NE;
2004 break;
2005 }
2006 token = EXCLAMATION;
2007 break;
2008 case CharacterAdd:
2009 shift();
2010 if (m_current == '+') {
2011 shift();
2012 token = (!m_hasLineTerminatorBeforeToken) ? PLUSPLUS : AUTOPLUSPLUS;
2013 break;
2014 }
2015 if (m_current == '=') {
2016 shift();
2017 token = PLUSEQUAL;
2018 break;
2019 }
2020 token = PLUS;
2021 break;
2022 case CharacterSub:
2023 shift();
2024 if (m_current == '-') {
2025 shift();
2026 if ((m_atLineStart || m_hasLineTerminatorBeforeToken) && m_current == '>') {
2027 if (m_scriptMode == JSParserScriptMode::Classic) {
2028 shift();
2029 goto inSingleLineComment;
2030 }
2031 }
2032 token = (!m_hasLineTerminatorBeforeToken) ? MINUSMINUS : AUTOMINUSMINUS;
2033 break;
2034 }
2035 if (m_current == '=') {
2036 shift();
2037 token = MINUSEQUAL;
2038 break;
2039 }
2040 token = MINUS;
2041 break;
2042 case CharacterMultiply:
2043 shift();
2044 if (m_current == '=') {
2045 shift();
2046 token = MULTEQUAL;
2047 break;
2048 }
2049 if (m_current == '*') {
2050 shift();
2051 if (m_current == '=') {
2052 shift();
2053 token = POWEQUAL;
2054 break;
2055 }
2056 token = POW;
2057 break;
2058 }
2059 token = TIMES;
2060 break;
2061 case CharacterSlash:
2062 shift();
2063 if (m_current == '/') {
2064 shift();
2065 goto inSingleLineCommentCheckForDirectives;
2066 }
2067 if (m_current == '*') {
2068 shift();
2069 if (parseMultilineComment())
2070 goto start;
2071 m_lexErrorMessage = "Multiline comment was not closed properly"_s;
2072 token = UNTERMINATED_MULTILINE_COMMENT_ERRORTOK;
2073 goto returnError;
2074 }
2075 if (m_current == '=') {
2076 shift();
2077 token = DIVEQUAL;
2078 break;
2079 }
2080 token = DIVIDE;
2081 break;
2082 case CharacterAnd:
2083 shift();
2084 if (m_current == '&') {
2085 shift();
2086 token = AND;
2087 break;
2088 }
2089 if (m_current == '=') {
2090 shift();
2091 token = ANDEQUAL;
2092 break;
2093 }
2094 token = BITAND;
2095 break;
2096 case CharacterXor:
2097 shift();
2098 if (m_current == '=') {
2099 shift();
2100 token = XOREQUAL;
2101 break;
2102 }
2103 token = BITXOR;
2104 break;
2105 case CharacterModulo:
2106 shift();
2107 if (m_current == '=') {
2108 shift();
2109 token = MODEQUAL;
2110 break;
2111 }
2112 token = MOD;
2113 break;
2114 case CharacterOr:
2115 shift();
2116 if (m_current == '=') {
2117 shift();
2118 token = OREQUAL;
2119 break;
2120 }
2121 if (m_current == '|') {
2122 shift();
2123 token = OR;
2124 break;
2125 }
2126 token = BITOR;
2127 break;
2128 case CharacterOpenParen:
2129 token = OPENPAREN;
2130 tokenData->line = lineNumber();
2131 tokenData->offset = currentOffset();
2132 tokenData->lineStartOffset = currentLineStartOffset();
2133 shift();
2134 break;
2135 case CharacterCloseParen:
2136 token = CLOSEPAREN;
2137 shift();
2138 break;
2139 case CharacterOpenBracket:
2140 token = OPENBRACKET;
2141 shift();
2142 break;
2143 case CharacterCloseBracket:
2144 token = CLOSEBRACKET;
2145 shift();
2146 break;
2147 case CharacterComma:
2148 token = COMMA;
2149 shift();
2150 break;
2151 case CharacterColon:
2152 token = COLON;
2153 shift();
2154 break;
2155 case CharacterQuestion:
2156 token = QUESTION;
2157 shift();
2158 break;
2159 case CharacterTilde:
2160 token = TILDE;
2161 shift();
2162 break;
2163 case CharacterSemicolon:
2164 shift();
2165 token = SEMICOLON;
2166 break;
2167 case CharacterBackQuote:
2168 shift();
2169 token = BACKQUOTE;
2170 break;
2171 case CharacterOpenBrace:
2172 tokenData->line = lineNumber();
2173 tokenData->offset = currentOffset();
2174 tokenData->lineStartOffset = currentLineStartOffset();
2175 ASSERT(tokenData->offset >= tokenData->lineStartOffset);
2176 shift();
2177 token = OPENBRACE;
2178 break;
2179 case CharacterCloseBrace:
2180 tokenData->line = lineNumber();
2181 tokenData->offset = currentOffset();
2182 tokenData->lineStartOffset = currentLineStartOffset();
2183 ASSERT(tokenData->offset >= tokenData->lineStartOffset);
2184 shift();
2185 token = CLOSEBRACE;
2186 break;
2187 case CharacterDot:
2188 shift();
2189 if (!isASCIIDigit(m_current)) {
2190 if (UNLIKELY((m_current == '.') && (peek(1) == '.'))) {
2191 shift();
2192 shift();
2193 token = DOTDOTDOT;
2194 break;
2195 }
2196 token = DOT;
2197 break;
2198 }
2199 if (UNLIKELY(!parseNumberAfterDecimalPoint())) {
2200 m_lexErrorMessage = "Non-number found after decimal point"_s;
2201 token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
2202 goto returnError;
2203 }
2204 token = DOUBLE;
2205 if (UNLIKELY(isASCIIAlphaCaselessEqual(m_current, 'e') && !parseNumberAfterExponentIndicator())) {
2206 m_lexErrorMessage = "Non-number found after exponent indicator"_s;
2207 token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
2208 goto returnError;
2209 }
2210 size_t parsedLength;
2211 tokenData->doubleValue = parseDouble(m_buffer8.data(), m_buffer8.size(), parsedLength);
2212 if (token == INTEGER)
2213 token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2214
2215 if (UNLIKELY(isIdentStart(m_current))) {
2216 m_lexErrorMessage = "No identifiers allowed directly after numeric literal"_s;
2217 token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
2218 goto returnError;
2219 }
2220 m_buffer8.shrink(0);
2221 break;
2222 case CharacterZero:
2223 shift();
2224 if (isASCIIAlphaCaselessEqual(m_current, 'x')) {
2225 if (UNLIKELY(!isASCIIHexDigit(peek(1)))) {
2226 m_lexErrorMessage = "No hexadecimal digits after '0x'"_s;
2227 token = UNTERMINATED_HEX_NUMBER_ERRORTOK;
2228 goto returnError;
2229 }
2230
2231 // Shift out the 'x' prefix.
2232 shift();
2233
2234 auto parseNumberResult = parseHex();
2235 if (!parseNumberResult)
2236 tokenData->doubleValue = 0;
2237 else if (WTF::holds_alternative<double>(*parseNumberResult))
2238 tokenData->doubleValue = WTF::get<double>(*parseNumberResult);
2239 else {
2240 token = BIGINT;
2241 shift();
2242 tokenData->bigIntString = WTF::get<const Identifier*>(*parseNumberResult);
2243 tokenData->radix = 16;
2244 }
2245
2246 if (UNLIKELY(isIdentStart(m_current))) {
2247 m_lexErrorMessage = "No space between hexadecimal literal and identifier"_s;
2248 token = UNTERMINATED_HEX_NUMBER_ERRORTOK;
2249 goto returnError;
2250 }
2251 if (LIKELY(token != BIGINT))
2252 token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2253 m_buffer8.shrink(0);
2254 break;
2255 }
2256 if (isASCIIAlphaCaselessEqual(m_current, 'b')) {
2257 if (UNLIKELY(!isASCIIBinaryDigit(peek(1)))) {
2258 m_lexErrorMessage = "No binary digits after '0b'"_s;
2259 token = UNTERMINATED_BINARY_NUMBER_ERRORTOK;
2260 goto returnError;
2261 }
2262
2263 // Shift out the 'b' prefix.
2264 shift();
2265
2266 auto parseNumberResult = parseBinary();
2267 if (!parseNumberResult)
2268 tokenData->doubleValue = 0;
2269 else if (WTF::holds_alternative<double>(*parseNumberResult))
2270 tokenData->doubleValue = WTF::get<double>(*parseNumberResult);
2271 else {
2272 token = BIGINT;
2273 shift();
2274 tokenData->bigIntString = WTF::get<const Identifier*>(*parseNumberResult);
2275 tokenData->radix = 2;
2276 }
2277
2278 if (UNLIKELY(isIdentStart(m_current))) {
2279 m_lexErrorMessage = "No space between binary literal and identifier"_s;
2280 token = UNTERMINATED_BINARY_NUMBER_ERRORTOK;
2281 goto returnError;
2282 }
2283 if (LIKELY(token != BIGINT))
2284 token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2285 m_buffer8.shrink(0);
2286 break;
2287 }
2288
2289 if (isASCIIAlphaCaselessEqual(m_current, 'o')) {
2290 if (UNLIKELY(!isASCIIOctalDigit(peek(1)))) {
2291 m_lexErrorMessage = "No octal digits after '0o'"_s;
2292 token = UNTERMINATED_OCTAL_NUMBER_ERRORTOK;
2293 goto returnError;
2294 }
2295
2296 // Shift out the 'o' prefix.
2297 shift();
2298
2299 auto parseNumberResult = parseOctal();
2300 if (!parseNumberResult)
2301 tokenData->doubleValue = 0;
2302 else if (WTF::holds_alternative<double>(*parseNumberResult))
2303 tokenData->doubleValue = WTF::get<double>(*parseNumberResult);
2304 else {
2305 token = BIGINT;
2306 shift();
2307 tokenData->bigIntString = WTF::get<const Identifier*>(*parseNumberResult);
2308 tokenData->radix = 8;
2309 }
2310
2311 if (UNLIKELY(isIdentStart(m_current))) {
2312 m_lexErrorMessage = "No space between octal literal and identifier"_s;
2313 token = UNTERMINATED_OCTAL_NUMBER_ERRORTOK;
2314 goto returnError;
2315 }
2316 if (LIKELY(token != BIGINT))
2317 token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2318 m_buffer8.shrink(0);
2319 break;
2320 }
2321
2322 if (UNLIKELY(m_current == '_')) {
2323 m_lexErrorMessage = "Numeric literals may not begin with 0_"_s;
2324 token = UNTERMINATED_OCTAL_NUMBER_ERRORTOK;
2325 goto returnError;
2326 }
2327
2328 record8('0');
2329 if (UNLIKELY(strictMode && isASCIIDigit(m_current))) {
2330 m_lexErrorMessage = "Decimal integer literals with a leading zero are forbidden in strict mode"_s;
2331 token = UNTERMINATED_OCTAL_NUMBER_ERRORTOK;
2332 goto returnError;
2333 }
2334 if (isASCIIOctalDigit(m_current)) {
2335 auto parseNumberResult = parseOctal();
2336 if (parseNumberResult && WTF::holds_alternative<double>(*parseNumberResult)) {
2337 tokenData->doubleValue = WTF::get<double>(*parseNumberResult);
2338 token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2339 }
2340 }
2341 FALLTHROUGH;
2342 case CharacterNumber:
2343 if (LIKELY(token != INTEGER && token != DOUBLE)) {
2344 auto parseNumberResult = parseDecimal();
2345 if (parseNumberResult) {
2346 if (WTF::holds_alternative<double>(*parseNumberResult)) {
2347 tokenData->doubleValue = WTF::get<double>(*parseNumberResult);
2348 token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2349 } else {
2350 token = BIGINT;
2351 shift();
2352 tokenData->bigIntString = WTF::get<const Identifier*>(*parseNumberResult);
2353 tokenData->radix = 10;
2354 }
2355 } else {
2356 token = INTEGER;
2357 if (m_current == '.') {
2358 shift();
2359 if (UNLIKELY(isASCIIDigit(m_current) && !parseNumberAfterDecimalPoint())) {
2360 m_lexErrorMessage = "Non-number found after decimal point"_s;
2361 token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
2362 goto returnError;
2363 }
2364 token = DOUBLE;
2365 }
2366 if (UNLIKELY(isASCIIAlphaCaselessEqual(m_current, 'e') && !parseNumberAfterExponentIndicator())) {
2367 m_lexErrorMessage = "Non-number found after exponent indicator"_s;
2368 token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
2369 goto returnError;
2370 }
2371 size_t parsedLength;
2372 tokenData->doubleValue = parseDouble(m_buffer8.data(), m_buffer8.size(), parsedLength);
2373 if (token == INTEGER)
2374 token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2375 }
2376 }
2377
2378 if (UNLIKELY(isIdentStart(m_current))) {
2379 m_lexErrorMessage = "No identifiers allowed directly after numeric literal"_s;
2380 token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
2381 goto returnError;
2382 }
2383 m_buffer8.shrink(0);
2384 break;
2385 case CharacterQuote: {
2386 StringParseResult result = StringCannotBeParsed;
2387 if (lexerFlags & LexerFlagsDontBuildStrings)
2388 result = parseString<false>(tokenData, strictMode);
2389 else
2390 result = parseString<true>(tokenData, strictMode);
2391
2392 if (UNLIKELY(result != StringParsedSuccessfully)) {
2393 token = result == StringUnterminated ? UNTERMINATED_STRING_LITERAL_ERRORTOK : INVALID_STRING_LITERAL_ERRORTOK;
2394 goto returnError;
2395 }
2396 shift();
2397 token = STRING;
2398 break;
2399 }
2400 case CharacterIdentifierStart:
2401 ASSERT(isIdentStart(m_current));
2402 FALLTHROUGH;
2403 case CharacterBackSlash:
2404 parseIdent:
2405 if (lexerFlags & LexexFlagsDontBuildKeywords)
2406 token = parseIdentifier<false>(tokenData, lexerFlags, strictMode);
2407 else
2408 token = parseIdentifier<true>(tokenData, lexerFlags, strictMode);
2409 break;
2410 case CharacterLineTerminator:
2411 ASSERT(isLineTerminator(m_current));
2412 shiftLineTerminator();
2413 m_atLineStart = true;
2414 m_hasLineTerminatorBeforeToken = true;
2415 m_lineStart = m_code;
2416 goto start;
2417 case CharacterPrivateIdentifierStart:
2418 if (m_parsingBuiltinFunction)
2419 goto parseIdent;
2420
2421 FALLTHROUGH;
2422 case CharacterOtherIdentifierPart:
2423 case CharacterInvalid:
2424 m_lexErrorMessage = invalidCharacterMessage();
2425 token = ERRORTOK;
2426 goto returnError;
2427 default:
2428 RELEASE_ASSERT_NOT_REACHED();
2429 m_lexErrorMessage = "Internal Error"_s;
2430 token = ERRORTOK;
2431 goto returnError;
2432 }
2433
2434 m_atLineStart = false;
2435 goto returnToken;
2436
2437inSingleLineCommentCheckForDirectives:
2438 // Script comment directives like "//# sourceURL=test.js".
2439 if (UNLIKELY((m_current == '#' || m_current == '@') && isWhiteSpace(peek(1)))) {
2440 shift();
2441 shift();
2442 parseCommentDirective();
2443 }
2444 // Fall through to complete single line comment parsing.
2445
2446inSingleLineComment:
2447 {
2448 auto lineNumber = m_lineNumber;
2449 auto endOffset = currentOffset();
2450 auto lineStartOffset = currentLineStartOffset();
2451 auto endPosition = currentPosition();
2452
2453 while (!isLineTerminator(m_current)) {
2454 if (atEnd()) {
2455 token = EOFTOK;
2456 fillTokenInfo(tokenRecord, token, lineNumber, endOffset, lineStartOffset, endPosition);
2457 return token;
2458 }
2459 shift();
2460 }
2461 shiftLineTerminator();
2462 m_atLineStart = true;
2463 m_hasLineTerminatorBeforeToken = true;
2464 m_lineStart = m_code;
2465 if (!lastTokenWasRestrKeyword())
2466 goto start;
2467
2468 token = SEMICOLON;
2469 fillTokenInfo(tokenRecord, token, lineNumber, endOffset, lineStartOffset, endPosition);
2470 return token;
2471 }
2472
2473returnToken:
2474 fillTokenInfo(tokenRecord, token, m_lineNumber, currentOffset(), currentLineStartOffset(), currentPosition());
2475 return token;
2476
2477returnError:
2478 m_error = true;
2479 fillTokenInfo(tokenRecord, token, m_lineNumber, currentOffset(), currentLineStartOffset(), currentPosition());
2480 RELEASE_ASSERT(token & ErrorTokenFlag);
2481 return token;
2482}
2483
2484template <typename T>
2485static inline void orCharacter(UChar&, UChar);
2486
2487template <>
2488inline void orCharacter<LChar>(UChar&, UChar) { }
2489
2490template <>
2491inline void orCharacter<UChar>(UChar& orAccumulator, UChar character)
2492{
2493 orAccumulator |= character;
2494}
2495
2496template <typename T>
2497JSTokenType Lexer<T>::scanRegExp(JSToken* tokenRecord, UChar patternPrefix)
2498{
2499 JSTokenData* tokenData = &tokenRecord->m_data;
2500 ASSERT(m_buffer16.isEmpty());
2501
2502 bool lastWasEscape = false;
2503 bool inBrackets = false;
2504 UChar charactersOredTogether = 0;
2505
2506 if (patternPrefix) {
2507 ASSERT(!isLineTerminator(patternPrefix));
2508 ASSERT(patternPrefix != '/');
2509 ASSERT(patternPrefix != '[');
2510 record16(patternPrefix);
2511 }
2512
2513 while (true) {
2514 if (isLineTerminator(m_current) || atEnd()) {
2515 m_buffer16.shrink(0);
2516 JSTokenType token = UNTERMINATED_REGEXP_LITERAL_ERRORTOK;
2517 fillTokenInfo(tokenRecord, token, m_lineNumber, currentOffset(), currentLineStartOffset(), currentPosition());
2518 m_error = true;
2519 m_lexErrorMessage = makeString("Unterminated regular expression literal '", getToken(*tokenRecord), "'");
2520 return token;
2521 }
2522
2523 T prev = m_current;
2524
2525 shift();
2526
2527 if (prev == '/' && !lastWasEscape && !inBrackets)
2528 break;
2529
2530 record16(prev);
2531 orCharacter<T>(charactersOredTogether, prev);
2532
2533 if (lastWasEscape) {
2534 lastWasEscape = false;
2535 continue;
2536 }
2537
2538 switch (prev) {
2539 case '[':
2540 inBrackets = true;
2541 break;
2542 case ']':
2543 inBrackets = false;
2544 break;
2545 case '\\':
2546 lastWasEscape = true;
2547 break;
2548 }
2549 }
2550
2551 tokenData->pattern = makeRightSizedIdentifier(m_buffer16.data(), m_buffer16.size(), charactersOredTogether);
2552
2553 m_buffer16.shrink(0);
2554 charactersOredTogether = 0;
2555
2556 while (isIdentPart(m_current)) {
2557 record16(m_current);
2558 orCharacter<T>(charactersOredTogether, m_current);
2559 shift();
2560 }
2561
2562 tokenData->flags = makeRightSizedIdentifier(m_buffer16.data(), m_buffer16.size(), charactersOredTogether);
2563 m_buffer16.shrink(0);
2564
2565 // Since RegExp always ends with /, m_atLineStart always becomes false.
2566 m_atLineStart = false;
2567
2568 JSTokenType token = REGEXP;
2569 fillTokenInfo(tokenRecord, token, m_lineNumber, currentOffset(), currentLineStartOffset(), currentPosition());
2570 return token;
2571}
2572
2573template <typename T>
2574JSTokenType Lexer<T>::scanTemplateString(JSToken* tokenRecord, RawStringsBuildMode rawStringsBuildMode)
2575{
2576 JSTokenData* tokenData = &tokenRecord->m_data;
2577 ASSERT(!m_error);
2578 ASSERT(m_buffer16.isEmpty());
2579
2580 // Leading backquote ` (for template head) or closing brace } (for template trailing) are already shifted in the previous token scan.
2581 // So in this re-scan phase, shift() is not needed here.
2582 StringParseResult result = parseTemplateLiteral(tokenData, rawStringsBuildMode);
2583 JSTokenType token = ERRORTOK;
2584 if (UNLIKELY(result != StringParsedSuccessfully)) {
2585 token = result == StringUnterminated ? UNTERMINATED_TEMPLATE_LITERAL_ERRORTOK : INVALID_TEMPLATE_LITERAL_ERRORTOK;
2586 m_error = true;
2587 } else
2588 token = TEMPLATE;
2589
2590 // Since TemplateString always ends with ` or }, m_atLineStart always becomes false.
2591 m_atLineStart = false;
2592 fillTokenInfo(tokenRecord, token, m_lineNumber, currentOffset(), currentLineStartOffset(), currentPosition());
2593 return token;
2594}
2595
2596template <typename T>
2597void Lexer<T>::clear()
2598{
2599 m_arena = 0;
2600
2601 Vector<LChar> newBuffer8;
2602 m_buffer8.swap(newBuffer8);
2603
2604 Vector<UChar> newBuffer16;
2605 m_buffer16.swap(newBuffer16);
2606
2607 Vector<UChar> newBufferForRawTemplateString16;
2608 m_bufferForRawTemplateString16.swap(newBufferForRawTemplateString16);
2609
2610 m_isReparsingFunction = false;
2611}
2612
2613// Instantiate the two flavors of Lexer we need instead of putting most of this file in Lexer.h
2614template class Lexer<LChar>;
2615template class Lexer<UChar>;
2616
2617} // namespace JSC
2618