1/*
2 * Copyright (C) 2009-2019 Apple Inc. All rights reserved.
3 * Copyright (C) 2012 Mathias Bynens ([email protected])
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include "config.h"
28#include "LiteralParser.h"
29
30#include "ButterflyInlines.h"
31#include "CodeBlock.h"
32#include "JSArray.h"
33#include "JSString.h"
34#include "Lexer.h"
35#include "ObjectConstructor.h"
36#include "JSCInlines.h"
37#include "StrongInlines.h"
38#include <wtf/ASCIICType.h>
39#include <wtf/dtoa.h>
40#include <wtf/text/StringConcatenate.h>
41
42namespace JSC {
43
44template <typename CharType>
45static ALWAYS_INLINE bool isJSONWhiteSpace(const CharType& c)
46{
47 // The JSON RFC 4627 defines a list of allowed characters to be considered
48 // insignificant white space: http://www.ietf.org/rfc/rfc4627.txt (2. JSON Grammar).
49 return c == ' ' || c == 0x9 || c == 0xA || c == 0xD;
50}
51
52template <typename CharType>
53bool LiteralParser<CharType>::tryJSONPParse(Vector<JSONPData>& results, bool needsFullSourceInfo)
54{
55 VM& vm = m_globalObject->vm();
56 auto scope = DECLARE_THROW_SCOPE(vm);
57 if (m_lexer.next() != TokIdentifier)
58 return false;
59 do {
60 Vector<JSONPPathEntry> path;
61 // Unguarded next to start off the lexer
62 Identifier name = Identifier::fromString(vm, m_lexer.currentToken()->start, m_lexer.currentToken()->end - m_lexer.currentToken()->start);
63 JSONPPathEntry entry;
64 if (name == vm.propertyNames->varKeyword) {
65 if (m_lexer.next() != TokIdentifier)
66 return false;
67 entry.m_type = JSONPPathEntryTypeDeclareVar;
68 entry.m_pathEntryName = Identifier::fromString(vm, m_lexer.currentToken()->start, m_lexer.currentToken()->end - m_lexer.currentToken()->start);
69 path.append(entry);
70 } else {
71 entry.m_type = JSONPPathEntryTypeDot;
72 entry.m_pathEntryName = Identifier::fromString(vm, m_lexer.currentToken()->start, m_lexer.currentToken()->end - m_lexer.currentToken()->start);
73 path.append(entry);
74 }
75 if (isLexerKeyword(entry.m_pathEntryName))
76 return false;
77 TokenType tokenType = m_lexer.next();
78 if (entry.m_type == JSONPPathEntryTypeDeclareVar && tokenType != TokAssign)
79 return false;
80 while (tokenType != TokAssign) {
81 switch (tokenType) {
82 case TokLBracket: {
83 entry.m_type = JSONPPathEntryTypeLookup;
84 if (m_lexer.next() != TokNumber)
85 return false;
86 double doubleIndex = m_lexer.currentToken()->numberToken;
87 int index = (int)doubleIndex;
88 if (index != doubleIndex || index < 0)
89 return false;
90 entry.m_pathIndex = index;
91 if (m_lexer.next() != TokRBracket)
92 return false;
93 break;
94 }
95 case TokDot: {
96 entry.m_type = JSONPPathEntryTypeDot;
97 if (m_lexer.next() != TokIdentifier)
98 return false;
99 entry.m_pathEntryName = Identifier::fromString(vm, m_lexer.currentToken()->start, m_lexer.currentToken()->end - m_lexer.currentToken()->start);
100 break;
101 }
102 case TokLParen: {
103 if (path.last().m_type != JSONPPathEntryTypeDot || needsFullSourceInfo)
104 return false;
105 path.last().m_type = JSONPPathEntryTypeCall;
106 entry = path.last();
107 goto startJSON;
108 }
109 default:
110 return false;
111 }
112 path.append(entry);
113 tokenType = m_lexer.next();
114 }
115 startJSON:
116 m_lexer.next();
117 results.append(JSONPData());
118 JSValue startParseExpressionValue = parse(StartParseExpression);
119 RETURN_IF_EXCEPTION(scope, false);
120 results.last().m_value.set(vm, startParseExpressionValue);
121 if (!results.last().m_value)
122 return false;
123 results.last().m_path.swap(path);
124 if (entry.m_type == JSONPPathEntryTypeCall) {
125 if (m_lexer.currentToken()->type != TokRParen)
126 return false;
127 m_lexer.next();
128 }
129 if (m_lexer.currentToken()->type != TokSemi)
130 break;
131 m_lexer.next();
132 } while (m_lexer.currentToken()->type == TokIdentifier);
133 return m_lexer.currentToken()->type == TokEnd;
134}
135
136template <typename CharType>
137ALWAYS_INLINE const Identifier LiteralParser<CharType>::makeIdentifier(const LChar* characters, size_t length)
138{
139 VM& vm = m_globalObject->vm();
140 if (!length)
141 return vm.propertyNames->emptyIdentifier;
142 if (characters[0] >= MaximumCachableCharacter)
143 return Identifier::fromString(vm, characters, length);
144
145 if (length == 1) {
146 if (!m_shortIdentifiers[characters[0]].isNull())
147 return m_shortIdentifiers[characters[0]];
148 m_shortIdentifiers[characters[0]] = Identifier::fromString(vm, characters, length);
149 return m_shortIdentifiers[characters[0]];
150 }
151 if (!m_recentIdentifiers[characters[0]].isNull() && Identifier::equal(m_recentIdentifiers[characters[0]].impl(), characters, length))
152 return m_recentIdentifiers[characters[0]];
153 m_recentIdentifiers[characters[0]] = Identifier::fromString(vm, characters, length);
154 return m_recentIdentifiers[characters[0]];
155}
156
157template <typename CharType>
158ALWAYS_INLINE const Identifier LiteralParser<CharType>::makeIdentifier(const UChar* characters, size_t length)
159{
160 VM& vm = m_globalObject->vm();
161 if (!length)
162 return vm.propertyNames->emptyIdentifier;
163 if (characters[0] >= MaximumCachableCharacter)
164 return Identifier::fromString(vm, characters, length);
165
166 if (length == 1) {
167 if (!m_shortIdentifiers[characters[0]].isNull())
168 return m_shortIdentifiers[characters[0]];
169 m_shortIdentifiers[characters[0]] = Identifier::fromString(vm, characters, length);
170 return m_shortIdentifiers[characters[0]];
171 }
172 if (!m_recentIdentifiers[characters[0]].isNull() && Identifier::equal(m_recentIdentifiers[characters[0]].impl(), characters, length))
173 return m_recentIdentifiers[characters[0]];
174 m_recentIdentifiers[characters[0]] = Identifier::fromString(vm, characters, length);
175 return m_recentIdentifiers[characters[0]];
176}
177
178// 256 Latin-1 codes
179static constexpr const TokenType TokenTypesOfLatin1Characters[256] = {
180/* 0 - Null */ TokError,
181/* 1 - Start of Heading */ TokError,
182/* 2 - Start of Text */ TokError,
183/* 3 - End of Text */ TokError,
184/* 4 - End of Transm. */ TokError,
185/* 5 - Enquiry */ TokError,
186/* 6 - Acknowledgment */ TokError,
187/* 7 - Bell */ TokError,
188/* 8 - Back Space */ TokError,
189/* 9 - Horizontal Tab */ TokError,
190/* 10 - Line Feed */ TokError,
191/* 11 - Vertical Tab */ TokError,
192/* 12 - Form Feed */ TokError,
193/* 13 - Carriage Return */ TokError,
194/* 14 - Shift Out */ TokError,
195/* 15 - Shift In */ TokError,
196/* 16 - Data Line Escape */ TokError,
197/* 17 - Device Control 1 */ TokError,
198/* 18 - Device Control 2 */ TokError,
199/* 19 - Device Control 3 */ TokError,
200/* 20 - Device Control 4 */ TokError,
201/* 21 - Negative Ack. */ TokError,
202/* 22 - Synchronous Idle */ TokError,
203/* 23 - End of Transmit */ TokError,
204/* 24 - Cancel */ TokError,
205/* 25 - End of Medium */ TokError,
206/* 26 - Substitute */ TokError,
207/* 27 - Escape */ TokError,
208/* 28 - File Separator */ TokError,
209/* 29 - Group Separator */ TokError,
210/* 30 - Record Separator */ TokError,
211/* 31 - Unit Separator */ TokError,
212/* 32 - Space */ TokError,
213/* 33 - ! */ TokError,
214/* 34 - " */ TokString,
215/* 35 - # */ TokError,
216/* 36 - $ */ TokIdentifier,
217/* 37 - % */ TokError,
218/* 38 - & */ TokError,
219/* 39 - ' */ TokString,
220/* 40 - ( */ TokLParen,
221/* 41 - ) */ TokRParen,
222/* 42 - * */ TokError,
223/* 43 - + */ TokError,
224/* 44 - , */ TokComma,
225/* 45 - - */ TokNumber,
226/* 46 - . */ TokDot,
227/* 47 - / */ TokError,
228/* 48 - 0 */ TokNumber,
229/* 49 - 1 */ TokNumber,
230/* 50 - 2 */ TokNumber,
231/* 51 - 3 */ TokNumber,
232/* 52 - 4 */ TokNumber,
233/* 53 - 5 */ TokNumber,
234/* 54 - 6 */ TokNumber,
235/* 55 - 7 */ TokNumber,
236/* 56 - 8 */ TokNumber,
237/* 57 - 9 */ TokNumber,
238/* 58 - : */ TokColon,
239/* 59 - ; */ TokSemi,
240/* 60 - < */ TokError,
241/* 61 - = */ TokAssign,
242/* 62 - > */ TokError,
243/* 63 - ? */ TokError,
244/* 64 - @ */ TokError,
245/* 65 - A */ TokIdentifier,
246/* 66 - B */ TokIdentifier,
247/* 67 - C */ TokIdentifier,
248/* 68 - D */ TokIdentifier,
249/* 69 - E */ TokIdentifier,
250/* 70 - F */ TokIdentifier,
251/* 71 - G */ TokIdentifier,
252/* 72 - H */ TokIdentifier,
253/* 73 - I */ TokIdentifier,
254/* 74 - J */ TokIdentifier,
255/* 75 - K */ TokIdentifier,
256/* 76 - L */ TokIdentifier,
257/* 77 - M */ TokIdentifier,
258/* 78 - N */ TokIdentifier,
259/* 79 - O */ TokIdentifier,
260/* 80 - P */ TokIdentifier,
261/* 81 - Q */ TokIdentifier,
262/* 82 - R */ TokIdentifier,
263/* 83 - S */ TokIdentifier,
264/* 84 - T */ TokIdentifier,
265/* 85 - U */ TokIdentifier,
266/* 86 - V */ TokIdentifier,
267/* 87 - W */ TokIdentifier,
268/* 88 - X */ TokIdentifier,
269/* 89 - Y */ TokIdentifier,
270/* 90 - Z */ TokIdentifier,
271/* 91 - [ */ TokLBracket,
272/* 92 - \ */ TokError,
273/* 93 - ] */ TokRBracket,
274/* 94 - ^ */ TokError,
275/* 95 - _ */ TokIdentifier,
276/* 96 - ` */ TokError,
277/* 97 - a */ TokIdentifier,
278/* 98 - b */ TokIdentifier,
279/* 99 - c */ TokIdentifier,
280/* 100 - d */ TokIdentifier,
281/* 101 - e */ TokIdentifier,
282/* 102 - f */ TokIdentifier,
283/* 103 - g */ TokIdentifier,
284/* 104 - h */ TokIdentifier,
285/* 105 - i */ TokIdentifier,
286/* 106 - j */ TokIdentifier,
287/* 107 - k */ TokIdentifier,
288/* 108 - l */ TokIdentifier,
289/* 109 - m */ TokIdentifier,
290/* 110 - n */ TokIdentifier,
291/* 111 - o */ TokIdentifier,
292/* 112 - p */ TokIdentifier,
293/* 113 - q */ TokIdentifier,
294/* 114 - r */ TokIdentifier,
295/* 115 - s */ TokIdentifier,
296/* 116 - t */ TokIdentifier,
297/* 117 - u */ TokIdentifier,
298/* 118 - v */ TokIdentifier,
299/* 119 - w */ TokIdentifier,
300/* 120 - x */ TokIdentifier,
301/* 121 - y */ TokIdentifier,
302/* 122 - z */ TokIdentifier,
303/* 123 - { */ TokLBrace,
304/* 124 - | */ TokError,
305/* 125 - } */ TokRBrace,
306/* 126 - ~ */ TokError,
307/* 127 - Delete */ TokError,
308/* 128 - Cc category */ TokError,
309/* 129 - Cc category */ TokError,
310/* 130 - Cc category */ TokError,
311/* 131 - Cc category */ TokError,
312/* 132 - Cc category */ TokError,
313/* 133 - Cc category */ TokError,
314/* 134 - Cc category */ TokError,
315/* 135 - Cc category */ TokError,
316/* 136 - Cc category */ TokError,
317/* 137 - Cc category */ TokError,
318/* 138 - Cc category */ TokError,
319/* 139 - Cc category */ TokError,
320/* 140 - Cc category */ TokError,
321/* 141 - Cc category */ TokError,
322/* 142 - Cc category */ TokError,
323/* 143 - Cc category */ TokError,
324/* 144 - Cc category */ TokError,
325/* 145 - Cc category */ TokError,
326/* 146 - Cc category */ TokError,
327/* 147 - Cc category */ TokError,
328/* 148 - Cc category */ TokError,
329/* 149 - Cc category */ TokError,
330/* 150 - Cc category */ TokError,
331/* 151 - Cc category */ TokError,
332/* 152 - Cc category */ TokError,
333/* 153 - Cc category */ TokError,
334/* 154 - Cc category */ TokError,
335/* 155 - Cc category */ TokError,
336/* 156 - Cc category */ TokError,
337/* 157 - Cc category */ TokError,
338/* 158 - Cc category */ TokError,
339/* 159 - Cc category */ TokError,
340/* 160 - Zs category (nbsp) */ TokError,
341/* 161 - Po category */ TokError,
342/* 162 - Sc category */ TokError,
343/* 163 - Sc category */ TokError,
344/* 164 - Sc category */ TokError,
345/* 165 - Sc category */ TokError,
346/* 166 - So category */ TokError,
347/* 167 - So category */ TokError,
348/* 168 - Sk category */ TokError,
349/* 169 - So category */ TokError,
350/* 170 - Ll category */ TokError,
351/* 171 - Pi category */ TokError,
352/* 172 - Sm category */ TokError,
353/* 173 - Cf category */ TokError,
354/* 174 - So category */ TokError,
355/* 175 - Sk category */ TokError,
356/* 176 - So category */ TokError,
357/* 177 - Sm category */ TokError,
358/* 178 - No category */ TokError,
359/* 179 - No category */ TokError,
360/* 180 - Sk category */ TokError,
361/* 181 - Ll category */ TokError,
362/* 182 - So category */ TokError,
363/* 183 - Po category */ TokError,
364/* 184 - Sk category */ TokError,
365/* 185 - No category */ TokError,
366/* 186 - Ll category */ TokError,
367/* 187 - Pf category */ TokError,
368/* 188 - No category */ TokError,
369/* 189 - No category */ TokError,
370/* 190 - No category */ TokError,
371/* 191 - Po category */ TokError,
372/* 192 - Lu category */ TokError,
373/* 193 - Lu category */ TokError,
374/* 194 - Lu category */ TokError,
375/* 195 - Lu category */ TokError,
376/* 196 - Lu category */ TokError,
377/* 197 - Lu category */ TokError,
378/* 198 - Lu category */ TokError,
379/* 199 - Lu category */ TokError,
380/* 200 - Lu category */ TokError,
381/* 201 - Lu category */ TokError,
382/* 202 - Lu category */ TokError,
383/* 203 - Lu category */ TokError,
384/* 204 - Lu category */ TokError,
385/* 205 - Lu category */ TokError,
386/* 206 - Lu category */ TokError,
387/* 207 - Lu category */ TokError,
388/* 208 - Lu category */ TokError,
389/* 209 - Lu category */ TokError,
390/* 210 - Lu category */ TokError,
391/* 211 - Lu category */ TokError,
392/* 212 - Lu category */ TokError,
393/* 213 - Lu category */ TokError,
394/* 214 - Lu category */ TokError,
395/* 215 - Sm category */ TokError,
396/* 216 - Lu category */ TokError,
397/* 217 - Lu category */ TokError,
398/* 218 - Lu category */ TokError,
399/* 219 - Lu category */ TokError,
400/* 220 - Lu category */ TokError,
401/* 221 - Lu category */ TokError,
402/* 222 - Lu category */ TokError,
403/* 223 - Ll category */ TokError,
404/* 224 - Ll category */ TokError,
405/* 225 - Ll category */ TokError,
406/* 226 - Ll category */ TokError,
407/* 227 - Ll category */ TokError,
408/* 228 - Ll category */ TokError,
409/* 229 - Ll category */ TokError,
410/* 230 - Ll category */ TokError,
411/* 231 - Ll category */ TokError,
412/* 232 - Ll category */ TokError,
413/* 233 - Ll category */ TokError,
414/* 234 - Ll category */ TokError,
415/* 235 - Ll category */ TokError,
416/* 236 - Ll category */ TokError,
417/* 237 - Ll category */ TokError,
418/* 238 - Ll category */ TokError,
419/* 239 - Ll category */ TokError,
420/* 240 - Ll category */ TokError,
421/* 241 - Ll category */ TokError,
422/* 242 - Ll category */ TokError,
423/* 243 - Ll category */ TokError,
424/* 244 - Ll category */ TokError,
425/* 245 - Ll category */ TokError,
426/* 246 - Ll category */ TokError,
427/* 247 - Sm category */ TokError,
428/* 248 - Ll category */ TokError,
429/* 249 - Ll category */ TokError,
430/* 250 - Ll category */ TokError,
431/* 251 - Ll category */ TokError,
432/* 252 - Ll category */ TokError,
433/* 253 - Ll category */ TokError,
434/* 254 - Ll category */ TokError,
435/* 255 - Ll category */ TokError
436};
437
438template <typename CharType>
439ALWAYS_INLINE TokenType LiteralParser<CharType>::Lexer::lex(LiteralParserToken<CharType>& token)
440{
441#if !ASSERT_DISABLED
442 m_currentTokenID++;
443#endif
444
445 while (m_ptr < m_end && isJSONWhiteSpace(*m_ptr))
446 ++m_ptr;
447
448 ASSERT(m_ptr <= m_end);
449 if (m_ptr == m_end) {
450 token.type = TokEnd;
451 token.start = token.end = m_ptr;
452 return TokEnd;
453 }
454 ASSERT(m_ptr < m_end);
455 token.type = TokError;
456 token.start = m_ptr;
457 CharType character = *m_ptr;
458 if (LIKELY(isLatin1(character))) {
459 TokenType tokenType = TokenTypesOfLatin1Characters[character];
460 switch (tokenType) {
461 case TokString:
462 if (character == '\'' && m_mode == StrictJSON) {
463 m_lexErrorMessage = "Single quotes (\') are not allowed in JSON"_s;
464 return TokError;
465 }
466 return lexString(token, character);
467
468 case TokIdentifier: {
469 switch (character) {
470 case 't':
471 if (m_end - m_ptr >= 4 && m_ptr[1] == 'r' && m_ptr[2] == 'u' && m_ptr[3] == 'e') {
472 m_ptr += 4;
473 token.type = TokTrue;
474 token.end = m_ptr;
475 return TokTrue;
476 }
477 break;
478 case 'f':
479 if (m_end - m_ptr >= 5 && m_ptr[1] == 'a' && m_ptr[2] == 'l' && m_ptr[3] == 's' && m_ptr[4] == 'e') {
480 m_ptr += 5;
481 token.type = TokFalse;
482 token.end = m_ptr;
483 return TokFalse;
484 }
485 break;
486 case 'n':
487 if (m_end - m_ptr >= 4 && m_ptr[1] == 'u' && m_ptr[2] == 'l' && m_ptr[3] == 'l') {
488 m_ptr += 4;
489 token.type = TokNull;
490 token.end = m_ptr;
491 return TokNull;
492 }
493 break;
494 }
495 return lexIdentifier(token);
496 }
497
498 case TokNumber:
499 return lexNumber(token);
500
501 case TokError:
502 break;
503
504 default:
505 ASSERT(tokenType == TokLBracket
506 || tokenType == TokRBracket
507 || tokenType == TokLBrace
508 || tokenType == TokRBrace
509 || tokenType == TokColon
510 || tokenType == TokLParen
511 || tokenType == TokRParen
512 || tokenType == TokComma
513 || tokenType == TokDot
514 || tokenType == TokAssign
515 || tokenType == TokSemi);
516 token.type = tokenType;
517 token.end = ++m_ptr;
518 return tokenType;
519 }
520 }
521 m_lexErrorMessage = makeString("Unrecognized token '", StringView { m_ptr, 1 }, '\'');
522 return TokError;
523}
524
525template <>
526ALWAYS_INLINE TokenType LiteralParser<LChar>::Lexer::lexIdentifier(LiteralParserToken<LChar>& token)
527{
528 while (m_ptr < m_end && (isASCIIAlphanumeric(*m_ptr) || *m_ptr == '_' || *m_ptr == '$'))
529 m_ptr++;
530 token.stringIs8Bit = 1;
531 token.stringToken8 = token.start;
532 token.stringLength = m_ptr - token.start;
533 token.type = TokIdentifier;
534 token.end = m_ptr;
535 return TokIdentifier;
536}
537
538template <>
539ALWAYS_INLINE TokenType LiteralParser<UChar>::Lexer::lexIdentifier(LiteralParserToken<UChar>& token)
540{
541 while (m_ptr < m_end && (isASCIIAlphanumeric(*m_ptr) || *m_ptr == '_' || *m_ptr == '$' || *m_ptr == 0x200C || *m_ptr == 0x200D))
542 m_ptr++;
543 token.stringIs8Bit = 0;
544 token.stringToken16 = token.start;
545 token.stringLength = m_ptr - token.start;
546 token.type = TokIdentifier;
547 token.end = m_ptr;
548 return TokIdentifier;
549}
550
551template <typename CharType>
552TokenType LiteralParser<CharType>::Lexer::next()
553{
554 TokenType result = lex(m_currentToken);
555 ASSERT(m_currentToken.type == result);
556 return result;
557}
558
559template <>
560ALWAYS_INLINE void setParserTokenString<LChar>(LiteralParserToken<LChar>& token, const LChar* string)
561{
562 token.stringIs8Bit = 1;
563 token.stringToken8 = string;
564}
565
566template <>
567ALWAYS_INLINE void setParserTokenString<UChar>(LiteralParserToken<UChar>& token, const UChar* string)
568{
569 token.stringIs8Bit = 0;
570 token.stringToken16 = string;
571}
572
573enum class SafeStringCharacterSet { Strict, NonStrict };
574
575template <SafeStringCharacterSet set>
576static ALWAYS_INLINE bool isSafeStringCharacter(LChar c, LChar terminator)
577{
578 return (c >= ' ' && c != '\\' && c != terminator) || (c == '\t' && set != SafeStringCharacterSet::Strict);
579}
580
581template <SafeStringCharacterSet set>
582static ALWAYS_INLINE bool isSafeStringCharacter(UChar c, UChar terminator)
583{
584 return (c >= ' ' && (set == SafeStringCharacterSet::Strict || isLatin1(c)) && c != '\\' && c != terminator) || (c == '\t' && set != SafeStringCharacterSet::Strict);
585}
586
587template <typename CharType>
588ALWAYS_INLINE TokenType LiteralParser<CharType>::Lexer::lexString(LiteralParserToken<CharType>& token, CharType terminator)
589{
590 ++m_ptr;
591 const CharType* runStart = m_ptr;
592
593 if (m_mode == StrictJSON) {
594 while (m_ptr < m_end && isSafeStringCharacter<SafeStringCharacterSet::Strict>(*m_ptr, terminator))
595 ++m_ptr;
596 } else {
597 while (m_ptr < m_end && isSafeStringCharacter<SafeStringCharacterSet::NonStrict>(*m_ptr, terminator))
598 ++m_ptr;
599 }
600
601 if (LIKELY(m_ptr < m_end && *m_ptr == terminator)) {
602 setParserTokenString<CharType>(token, runStart);
603 token.stringLength = m_ptr - runStart;
604 token.type = TokString;
605 token.end = ++m_ptr;
606 return TokString;
607 }
608 return lexStringSlow(token, runStart, terminator);
609}
610
611template <typename CharType>
612TokenType LiteralParser<CharType>::Lexer::lexStringSlow(LiteralParserToken<CharType>& token, const CharType* runStart, CharType terminator)
613{
614 m_builder.clear();
615 goto slowPathBegin;
616 do {
617 runStart = m_ptr;
618 if (m_mode == StrictJSON) {
619 while (m_ptr < m_end && isSafeStringCharacter<SafeStringCharacterSet::Strict>(*m_ptr, terminator))
620 ++m_ptr;
621 } else {
622 while (m_ptr < m_end && isSafeStringCharacter<SafeStringCharacterSet::NonStrict>(*m_ptr, terminator))
623 ++m_ptr;
624 }
625
626 if (!m_builder.isEmpty())
627 m_builder.appendCharacters(runStart, m_ptr - runStart);
628
629slowPathBegin:
630 if ((m_mode != NonStrictJSON) && m_ptr < m_end && *m_ptr == '\\') {
631 if (m_builder.isEmpty() && runStart < m_ptr)
632 m_builder.appendCharacters(runStart, m_ptr - runStart);
633 ++m_ptr;
634 if (m_ptr >= m_end) {
635 m_lexErrorMessage = "Unterminated string"_s;
636 return TokError;
637 }
638 switch (*m_ptr) {
639 case '"':
640 m_builder.append('"');
641 m_ptr++;
642 break;
643 case '\\':
644 m_builder.append('\\');
645 m_ptr++;
646 break;
647 case '/':
648 m_builder.append('/');
649 m_ptr++;
650 break;
651 case 'b':
652 m_builder.append('\b');
653 m_ptr++;
654 break;
655 case 'f':
656 m_builder.append('\f');
657 m_ptr++;
658 break;
659 case 'n':
660 m_builder.append('\n');
661 m_ptr++;
662 break;
663 case 'r':
664 m_builder.append('\r');
665 m_ptr++;
666 break;
667 case 't':
668 m_builder.append('\t');
669 m_ptr++;
670 break;
671
672 case 'u':
673 if ((m_end - m_ptr) < 5) {
674 m_lexErrorMessage = "\\u must be followed by 4 hex digits"_s;
675 return TokError;
676 } // uNNNN == 5 characters
677 for (int i = 1; i < 5; i++) {
678 if (!isASCIIHexDigit(m_ptr[i])) {
679 m_lexErrorMessage = makeString("\"\\", StringView { m_ptr, 5 }, "\" is not a valid unicode escape");
680 return TokError;
681 }
682 }
683 m_builder.append(JSC::Lexer<CharType>::convertUnicode(m_ptr[1], m_ptr[2], m_ptr[3], m_ptr[4]));
684 m_ptr += 5;
685 break;
686
687 default:
688 if (*m_ptr == '\'' && m_mode != StrictJSON) {
689 m_builder.append('\'');
690 m_ptr++;
691 break;
692 }
693 m_lexErrorMessage = makeString("Invalid escape character ", StringView { m_ptr, 1 });
694 return TokError;
695 }
696 }
697 } while ((m_mode != NonStrictJSON) && m_ptr != runStart && (m_ptr < m_end) && *m_ptr != terminator);
698
699 if (m_ptr >= m_end || *m_ptr != terminator) {
700 m_lexErrorMessage = "Unterminated string"_s;
701 return TokError;
702 }
703
704 if (m_builder.isEmpty()) {
705 setParserTokenString<CharType>(token, runStart);
706 token.stringLength = m_ptr - runStart;
707 } else {
708 if (m_builder.is8Bit()) {
709 token.stringIs8Bit = 1;
710 token.stringToken8 = m_builder.characters8();
711 } else {
712 token.stringIs8Bit = 0;
713 token.stringToken16 = m_builder.characters16();
714 }
715 token.stringLength = m_builder.length();
716 }
717 token.type = TokString;
718 token.end = ++m_ptr;
719 return TokString;
720}
721
722template <typename CharType>
723TokenType LiteralParser<CharType>::Lexer::lexNumber(LiteralParserToken<CharType>& token)
724{
725 // ES5 and json.org define numbers as
726 // number
727 // int
728 // int frac? exp?
729 //
730 // int
731 // -? 0
732 // -? digit1-9 digits?
733 //
734 // digits
735 // digit digits?
736 //
737 // -?(0 | [1-9][0-9]*) ('.' [0-9]+)? ([eE][+-]? [0-9]+)?
738
739 if (m_ptr < m_end && *m_ptr == '-') // -?
740 ++m_ptr;
741
742 // (0 | [1-9][0-9]*)
743 if (m_ptr < m_end && *m_ptr == '0') // 0
744 ++m_ptr;
745 else if (m_ptr < m_end && *m_ptr >= '1' && *m_ptr <= '9') { // [1-9]
746 ++m_ptr;
747 // [0-9]*
748 while (m_ptr < m_end && isASCIIDigit(*m_ptr))
749 ++m_ptr;
750 } else {
751 m_lexErrorMessage = "Invalid number"_s;
752 return TokError;
753 }
754
755 // ('.' [0-9]+)?
756 const int NumberOfDigitsForSafeInt32 = 9; // The numbers from -99999999 to 999999999 are always in range of Int32.
757 if (m_ptr < m_end && *m_ptr == '.') {
758 ++m_ptr;
759 // [0-9]+
760 if (m_ptr >= m_end || !isASCIIDigit(*m_ptr)) {
761 m_lexErrorMessage = "Invalid digits after decimal point"_s;
762 return TokError;
763 }
764
765 ++m_ptr;
766 while (m_ptr < m_end && isASCIIDigit(*m_ptr))
767 ++m_ptr;
768 } else if (m_ptr < m_end && (*m_ptr != 'e' && *m_ptr != 'E') && (m_ptr - token.start) <= NumberOfDigitsForSafeInt32) {
769 int32_t result = 0;
770 token.type = TokNumber;
771 token.end = m_ptr;
772 const CharType* digit = token.start;
773 bool negative = false;
774 if (*digit == '-') {
775 negative = true;
776 digit++;
777 }
778
779 ASSERT((m_ptr - digit) <= NumberOfDigitsForSafeInt32);
780 while (digit < m_ptr)
781 result = result * 10 + (*digit++) - '0';
782
783 if (!negative)
784 token.numberToken = result;
785 else {
786 if (!result)
787 token.numberToken = -0.0;
788 else
789 token.numberToken = -result;
790 }
791 return TokNumber;
792 }
793
794 // ([eE][+-]? [0-9]+)?
795 if (m_ptr < m_end && (*m_ptr == 'e' || *m_ptr == 'E')) { // [eE]
796 ++m_ptr;
797
798 // [-+]?
799 if (m_ptr < m_end && (*m_ptr == '-' || *m_ptr == '+'))
800 ++m_ptr;
801
802 // [0-9]+
803 if (m_ptr >= m_end || !isASCIIDigit(*m_ptr)) {
804 m_lexErrorMessage = "Exponent symbols should be followed by an optional '+' or '-' and then by at least one number"_s;
805 return TokError;
806 }
807
808 ++m_ptr;
809 while (m_ptr < m_end && isASCIIDigit(*m_ptr))
810 ++m_ptr;
811 }
812
813 token.type = TokNumber;
814 token.end = m_ptr;
815 size_t parsedLength;
816 token.numberToken = parseDouble(token.start, token.end - token.start, parsedLength);
817 return TokNumber;
818}
819
820template <typename CharType>
821JSValue LiteralParser<CharType>::parse(ParserState initialState)
822{
823 VM& vm = m_globalObject->vm();
824 auto scope = DECLARE_THROW_SCOPE(vm);
825 ParserState state = initialState;
826 MarkedArgumentBuffer objectStack;
827 JSValue lastValue;
828 Vector<ParserState, 16, UnsafeVectorOverflow> stateStack;
829 Vector<Identifier, 16, UnsafeVectorOverflow> identifierStack;
830 HashSet<JSObject*> visitedUnderscoreProto;
831 while (1) {
832 switch(state) {
833 startParseArray:
834 case StartParseArray: {
835 JSArray* array = constructEmptyArray(m_globalObject, 0);
836 RETURN_IF_EXCEPTION(scope, JSValue());
837 objectStack.appendWithCrashOnOverflow(array);
838 }
839 doParseArrayStartExpression:
840 FALLTHROUGH;
841 case DoParseArrayStartExpression: {
842 TokenType lastToken = m_lexer.currentToken()->type;
843 if (m_lexer.next() == TokRBracket) {
844 if (lastToken == TokComma) {
845 m_parseErrorMessage = "Unexpected comma at the end of array expression"_s;
846 return JSValue();
847 }
848 m_lexer.next();
849 lastValue = objectStack.takeLast();
850 break;
851 }
852
853 stateStack.append(DoParseArrayEndExpression);
854 goto startParseExpression;
855 }
856 case DoParseArrayEndExpression: {
857 JSArray* array = asArray(objectStack.last());
858 array->putDirectIndex(m_globalObject, array->length(), lastValue);
859 RETURN_IF_EXCEPTION(scope, JSValue());
860
861 if (m_lexer.currentToken()->type == TokComma)
862 goto doParseArrayStartExpression;
863
864 if (m_lexer.currentToken()->type != TokRBracket) {
865 m_parseErrorMessage = "Expected ']'"_s;
866 return JSValue();
867 }
868
869 m_lexer.next();
870 lastValue = objectStack.takeLast();
871 break;
872 }
873 startParseObject:
874 case StartParseObject: {
875 JSObject* object = constructEmptyObject(m_globalObject);
876 objectStack.appendWithCrashOnOverflow(object);
877
878 TokenType type = m_lexer.next();
879 if (type == TokString || (m_mode != StrictJSON && type == TokIdentifier)) {
880 typename Lexer::LiteralParserTokenPtr identifierToken = m_lexer.currentToken();
881 if (identifierToken->stringIs8Bit)
882 identifierStack.append(makeIdentifier(identifierToken->stringToken8, identifierToken->stringLength));
883 else
884 identifierStack.append(makeIdentifier(identifierToken->stringToken16, identifierToken->stringLength));
885
886 // Check for colon
887 if (m_lexer.next() != TokColon) {
888 m_parseErrorMessage = "Expected ':' before value in object property definition"_s;
889 return JSValue();
890 }
891
892 m_lexer.next();
893 stateStack.append(DoParseObjectEndExpression);
894 goto startParseExpression;
895 }
896 if (type != TokRBrace) {
897 m_parseErrorMessage = "Expected '}'"_s;
898 return JSValue();
899 }
900 m_lexer.next();
901 lastValue = objectStack.takeLast();
902 break;
903 }
904 doParseObjectStartExpression:
905 case DoParseObjectStartExpression: {
906 TokenType type = m_lexer.next();
907 if (type != TokString && (m_mode == StrictJSON || type != TokIdentifier)) {
908 m_parseErrorMessage = "Property name must be a string literal"_s;
909 return JSValue();
910 }
911 typename Lexer::LiteralParserTokenPtr identifierToken = m_lexer.currentToken();
912 if (identifierToken->stringIs8Bit)
913 identifierStack.append(makeIdentifier(identifierToken->stringToken8, identifierToken->stringLength));
914 else
915 identifierStack.append(makeIdentifier(identifierToken->stringToken16, identifierToken->stringLength));
916
917 // Check for colon
918 if (m_lexer.next() != TokColon) {
919 m_parseErrorMessage = "Expected ':'"_s;
920 return JSValue();
921 }
922
923 m_lexer.next();
924 stateStack.append(DoParseObjectEndExpression);
925 goto startParseExpression;
926 }
927 case DoParseObjectEndExpression:
928 {
929 JSObject* object = asObject(objectStack.last());
930 Identifier ident = identifierStack.takeLast();
931 if (m_mode != StrictJSON && ident == vm.propertyNames->underscoreProto) {
932 if (!visitedUnderscoreProto.add(object).isNewEntry) {
933 m_parseErrorMessage = "Attempted to redefine __proto__ property"_s;
934 return JSValue();
935 }
936 PutPropertySlot slot(object, m_nullOrCodeBlock ? m_nullOrCodeBlock->isStrictMode() : false);
937 objectStack.last().put(m_globalObject, ident, lastValue, slot);
938 } else {
939 if (Optional<uint32_t> index = parseIndex(ident))
940 object->putDirectIndex(m_globalObject, index.value(), lastValue);
941 else
942 object->putDirect(vm, ident, lastValue);
943 }
944 RETURN_IF_EXCEPTION(scope, JSValue());
945 if (m_lexer.currentToken()->type == TokComma)
946 goto doParseObjectStartExpression;
947 if (m_lexer.currentToken()->type != TokRBrace) {
948 m_parseErrorMessage = "Expected '}'"_s;
949 return JSValue();
950 }
951 m_lexer.next();
952 lastValue = objectStack.takeLast();
953 break;
954 }
955 startParseExpression:
956 case StartParseExpression: {
957 switch (m_lexer.currentToken()->type) {
958 case TokLBracket:
959 goto startParseArray;
960 case TokLBrace:
961 goto startParseObject;
962 case TokString: {
963 typename Lexer::LiteralParserTokenPtr stringToken = m_lexer.currentToken();
964 if (stringToken->stringIs8Bit)
965 lastValue = jsString(vm, makeIdentifier(stringToken->stringToken8, stringToken->stringLength).string());
966 else
967 lastValue = jsString(vm, makeIdentifier(stringToken->stringToken16, stringToken->stringLength).string());
968 m_lexer.next();
969 break;
970 }
971 case TokNumber: {
972 typename Lexer::LiteralParserTokenPtr numberToken = m_lexer.currentToken();
973 lastValue = jsNumber(numberToken->numberToken);
974 m_lexer.next();
975 break;
976 }
977 case TokNull:
978 m_lexer.next();
979 lastValue = jsNull();
980 break;
981
982 case TokTrue:
983 m_lexer.next();
984 lastValue = jsBoolean(true);
985 break;
986
987 case TokFalse:
988 m_lexer.next();
989 lastValue = jsBoolean(false);
990 break;
991 case TokRBracket:
992 m_parseErrorMessage = "Unexpected token ']'"_s;
993 return JSValue();
994 case TokRBrace:
995 m_parseErrorMessage = "Unexpected token '}'"_s;
996 return JSValue();
997 case TokIdentifier: {
998 typename Lexer::LiteralParserTokenPtr token = m_lexer.currentToken();
999 if (token->stringIs8Bit)
1000 m_parseErrorMessage = makeString("Unexpected identifier \"", StringView { token->stringToken8, token->stringLength }, '"');
1001 else
1002 m_parseErrorMessage = makeString("Unexpected identifier \"", StringView { token->stringToken16, token->stringLength }, '"');
1003 return JSValue();
1004 }
1005 case TokColon:
1006 m_parseErrorMessage = "Unexpected token ':'"_s;
1007 return JSValue();
1008 case TokLParen:
1009 m_parseErrorMessage = "Unexpected token '('"_s;
1010 return JSValue();
1011 case TokRParen:
1012 m_parseErrorMessage = "Unexpected token ')'"_s;
1013 return JSValue();
1014 case TokComma:
1015 m_parseErrorMessage = "Unexpected token ','"_s;
1016 return JSValue();
1017 case TokDot:
1018 m_parseErrorMessage = "Unexpected token '.'"_s;
1019 return JSValue();
1020 case TokAssign:
1021 m_parseErrorMessage = "Unexpected token '='"_s;
1022 return JSValue();
1023 case TokSemi:
1024 m_parseErrorMessage = "Unexpected token ';'"_s;
1025 return JSValue();
1026 case TokEnd:
1027 m_parseErrorMessage = "Unexpected EOF"_s;
1028 return JSValue();
1029 case TokError:
1030 default:
1031 // Error
1032 m_parseErrorMessage = "Could not parse value expression"_s;
1033 return JSValue();
1034 }
1035 break;
1036 }
1037 case StartParseStatement: {
1038 switch (m_lexer.currentToken()->type) {
1039 case TokLBracket:
1040 case TokNumber:
1041 case TokString:
1042 goto startParseExpression;
1043
1044 case TokLParen: {
1045 m_lexer.next();
1046 stateStack.append(StartParseStatementEndStatement);
1047 goto startParseExpression;
1048 }
1049 case TokRBracket:
1050 m_parseErrorMessage = "Unexpected token ']'"_s;
1051 return JSValue();
1052 case TokLBrace:
1053 m_parseErrorMessage = "Unexpected token '{'"_s;
1054 return JSValue();
1055 case TokRBrace:
1056 m_parseErrorMessage = "Unexpected token '}'"_s;
1057 return JSValue();
1058 case TokIdentifier:
1059 m_parseErrorMessage = "Unexpected identifier"_s;
1060 return JSValue();
1061 case TokColon:
1062 m_parseErrorMessage = "Unexpected token ':'"_s;
1063 return JSValue();
1064 case TokRParen:
1065 m_parseErrorMessage = "Unexpected token ')'"_s;
1066 return JSValue();
1067 case TokComma:
1068 m_parseErrorMessage = "Unexpected token ','"_s;
1069 return JSValue();
1070 case TokTrue:
1071 m_parseErrorMessage = "Unexpected token 'true'"_s;
1072 return JSValue();
1073 case TokFalse:
1074 m_parseErrorMessage = "Unexpected token 'false'"_s;
1075 return JSValue();
1076 case TokNull:
1077 m_parseErrorMessage = "Unexpected token 'null'"_s;
1078 return JSValue();
1079 case TokEnd:
1080 m_parseErrorMessage = "Unexpected EOF"_s;
1081 return JSValue();
1082 case TokDot:
1083 m_parseErrorMessage = "Unexpected token '.'"_s;
1084 return JSValue();
1085 case TokAssign:
1086 m_parseErrorMessage = "Unexpected token '='"_s;
1087 return JSValue();
1088 case TokSemi:
1089 m_parseErrorMessage = "Unexpected token ';'"_s;
1090 return JSValue();
1091 case TokError:
1092 default:
1093 m_parseErrorMessage = "Could not parse statement"_s;
1094 return JSValue();
1095 }
1096 }
1097 case StartParseStatementEndStatement: {
1098 ASSERT(stateStack.isEmpty());
1099 if (m_lexer.currentToken()->type != TokRParen)
1100 return JSValue();
1101 if (m_lexer.next() == TokEnd)
1102 return lastValue;
1103 m_parseErrorMessage = "Unexpected content at end of JSON literal"_s;
1104 return JSValue();
1105 }
1106 default:
1107 RELEASE_ASSERT_NOT_REACHED();
1108 }
1109 if (stateStack.isEmpty())
1110 return lastValue;
1111 state = stateStack.takeLast();
1112 continue;
1113 }
1114}
1115
1116// Instantiate the two flavors of LiteralParser we need instead of putting most of this file in LiteralParser.h
1117template class LiteralParser<LChar>;
1118template class LiteralParser<UChar>;
1119
1120}
1121