1/*
2 * Copyright (C) 2009-2017 Apple Inc. All rights reserved.
3 * Copyright (C) 2012 Mathias Bynens ([email protected])
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include "config.h"
28#include "LiteralParser.h"
29
30#include "ButterflyInlines.h"
31#include "CodeBlock.h"
32#include "JSArray.h"
33#include "JSString.h"
34#include "Lexer.h"
35#include "ObjectConstructor.h"
36#include "JSCInlines.h"
37#include "StrongInlines.h"
38#include <wtf/ASCIICType.h>
39#include <wtf/dtoa.h>
40#include <wtf/text/StringConcatenate.h>
41
42namespace JSC {
43
44template <typename CharType>
45static ALWAYS_INLINE bool isJSONWhiteSpace(const CharType& c)
46{
47 // The JSON RFC 4627 defines a list of allowed characters to be considered
48 // insignificant white space: http://www.ietf.org/rfc/rfc4627.txt (2. JSON Grammar).
49 return c == ' ' || c == 0x9 || c == 0xA || c == 0xD;
50}
51
52template <typename CharType>
53bool LiteralParser<CharType>::tryJSONPParse(Vector<JSONPData>& results, bool needsFullSourceInfo)
54{
55 VM& vm = m_exec->vm();
56 auto scope = DECLARE_THROW_SCOPE(vm);
57 if (m_lexer.next() != TokIdentifier)
58 return false;
59 do {
60 Vector<JSONPPathEntry> path;
61 // Unguarded next to start off the lexer
62 Identifier name = Identifier::fromString(&vm, m_lexer.currentToken()->start, m_lexer.currentToken()->end - m_lexer.currentToken()->start);
63 JSONPPathEntry entry;
64 if (name == vm.propertyNames->varKeyword) {
65 if (m_lexer.next() != TokIdentifier)
66 return false;
67 entry.m_type = JSONPPathEntryTypeDeclareVar;
68 entry.m_pathEntryName = Identifier::fromString(&vm, m_lexer.currentToken()->start, m_lexer.currentToken()->end - m_lexer.currentToken()->start);
69 path.append(entry);
70 } else {
71 entry.m_type = JSONPPathEntryTypeDot;
72 entry.m_pathEntryName = Identifier::fromString(&vm, m_lexer.currentToken()->start, m_lexer.currentToken()->end - m_lexer.currentToken()->start);
73 path.append(entry);
74 }
75 if (isLexerKeyword(entry.m_pathEntryName))
76 return false;
77 TokenType tokenType = m_lexer.next();
78 if (entry.m_type == JSONPPathEntryTypeDeclareVar && tokenType != TokAssign)
79 return false;
80 while (tokenType != TokAssign) {
81 switch (tokenType) {
82 case TokLBracket: {
83 entry.m_type = JSONPPathEntryTypeLookup;
84 if (m_lexer.next() != TokNumber)
85 return false;
86 double doubleIndex = m_lexer.currentToken()->numberToken;
87 int index = (int)doubleIndex;
88 if (index != doubleIndex || index < 0)
89 return false;
90 entry.m_pathIndex = index;
91 if (m_lexer.next() != TokRBracket)
92 return false;
93 break;
94 }
95 case TokDot: {
96 entry.m_type = JSONPPathEntryTypeDot;
97 if (m_lexer.next() != TokIdentifier)
98 return false;
99 entry.m_pathEntryName = Identifier::fromString(&vm, m_lexer.currentToken()->start, m_lexer.currentToken()->end - m_lexer.currentToken()->start);
100 break;
101 }
102 case TokLParen: {
103 if (path.last().m_type != JSONPPathEntryTypeDot || needsFullSourceInfo)
104 return false;
105 path.last().m_type = JSONPPathEntryTypeCall;
106 entry = path.last();
107 goto startJSON;
108 }
109 default:
110 return false;
111 }
112 path.append(entry);
113 tokenType = m_lexer.next();
114 }
115 startJSON:
116 m_lexer.next();
117 results.append(JSONPData());
118 JSValue startParseExpressionValue = parse(StartParseExpression);
119 RETURN_IF_EXCEPTION(scope, false);
120 results.last().m_value.set(vm, startParseExpressionValue);
121 if (!results.last().m_value)
122 return false;
123 results.last().m_path.swap(path);
124 if (entry.m_type == JSONPPathEntryTypeCall) {
125 if (m_lexer.currentToken()->type != TokRParen)
126 return false;
127 m_lexer.next();
128 }
129 if (m_lexer.currentToken()->type != TokSemi)
130 break;
131 m_lexer.next();
132 } while (m_lexer.currentToken()->type == TokIdentifier);
133 return m_lexer.currentToken()->type == TokEnd;
134}
135
136template <typename CharType>
137ALWAYS_INLINE const Identifier LiteralParser<CharType>::makeIdentifier(const LChar* characters, size_t length)
138{
139 if (!length)
140 return m_exec->vm().propertyNames->emptyIdentifier;
141 if (characters[0] >= MaximumCachableCharacter)
142 return Identifier::fromString(&m_exec->vm(), characters, length);
143
144 if (length == 1) {
145 if (!m_shortIdentifiers[characters[0]].isNull())
146 return m_shortIdentifiers[characters[0]];
147 m_shortIdentifiers[characters[0]] = Identifier::fromString(&m_exec->vm(), characters, length);
148 return m_shortIdentifiers[characters[0]];
149 }
150 if (!m_recentIdentifiers[characters[0]].isNull() && Identifier::equal(m_recentIdentifiers[characters[0]].impl(), characters, length))
151 return m_recentIdentifiers[characters[0]];
152 m_recentIdentifiers[characters[0]] = Identifier::fromString(&m_exec->vm(), characters, length);
153 return m_recentIdentifiers[characters[0]];
154}
155
156template <typename CharType>
157ALWAYS_INLINE const Identifier LiteralParser<CharType>::makeIdentifier(const UChar* characters, size_t length)
158{
159 if (!length)
160 return m_exec->vm().propertyNames->emptyIdentifier;
161 if (characters[0] >= MaximumCachableCharacter)
162 return Identifier::fromString(&m_exec->vm(), characters, length);
163
164 if (length == 1) {
165 if (!m_shortIdentifiers[characters[0]].isNull())
166 return m_shortIdentifiers[characters[0]];
167 m_shortIdentifiers[characters[0]] = Identifier::fromString(&m_exec->vm(), characters, length);
168 return m_shortIdentifiers[characters[0]];
169 }
170 if (!m_recentIdentifiers[characters[0]].isNull() && Identifier::equal(m_recentIdentifiers[characters[0]].impl(), characters, length))
171 return m_recentIdentifiers[characters[0]];
172 m_recentIdentifiers[characters[0]] = Identifier::fromString(&m_exec->vm(), characters, length);
173 return m_recentIdentifiers[characters[0]];
174}
175
176// 256 Latin-1 codes
177static constexpr const TokenType TokenTypesOfLatin1Characters[256] = {
178/* 0 - Null */ TokError,
179/* 1 - Start of Heading */ TokError,
180/* 2 - Start of Text */ TokError,
181/* 3 - End of Text */ TokError,
182/* 4 - End of Transm. */ TokError,
183/* 5 - Enquiry */ TokError,
184/* 6 - Acknowledgment */ TokError,
185/* 7 - Bell */ TokError,
186/* 8 - Back Space */ TokError,
187/* 9 - Horizontal Tab */ TokError,
188/* 10 - Line Feed */ TokError,
189/* 11 - Vertical Tab */ TokError,
190/* 12 - Form Feed */ TokError,
191/* 13 - Carriage Return */ TokError,
192/* 14 - Shift Out */ TokError,
193/* 15 - Shift In */ TokError,
194/* 16 - Data Line Escape */ TokError,
195/* 17 - Device Control 1 */ TokError,
196/* 18 - Device Control 2 */ TokError,
197/* 19 - Device Control 3 */ TokError,
198/* 20 - Device Control 4 */ TokError,
199/* 21 - Negative Ack. */ TokError,
200/* 22 - Synchronous Idle */ TokError,
201/* 23 - End of Transmit */ TokError,
202/* 24 - Cancel */ TokError,
203/* 25 - End of Medium */ TokError,
204/* 26 - Substitute */ TokError,
205/* 27 - Escape */ TokError,
206/* 28 - File Separator */ TokError,
207/* 29 - Group Separator */ TokError,
208/* 30 - Record Separator */ TokError,
209/* 31 - Unit Separator */ TokError,
210/* 32 - Space */ TokError,
211/* 33 - ! */ TokError,
212/* 34 - " */ TokString,
213/* 35 - # */ TokError,
214/* 36 - $ */ TokIdentifier,
215/* 37 - % */ TokError,
216/* 38 - & */ TokError,
217/* 39 - ' */ TokString,
218/* 40 - ( */ TokLParen,
219/* 41 - ) */ TokRParen,
220/* 42 - * */ TokError,
221/* 43 - + */ TokError,
222/* 44 - , */ TokComma,
223/* 45 - - */ TokNumber,
224/* 46 - . */ TokDot,
225/* 47 - / */ TokError,
226/* 48 - 0 */ TokNumber,
227/* 49 - 1 */ TokNumber,
228/* 50 - 2 */ TokNumber,
229/* 51 - 3 */ TokNumber,
230/* 52 - 4 */ TokNumber,
231/* 53 - 5 */ TokNumber,
232/* 54 - 6 */ TokNumber,
233/* 55 - 7 */ TokNumber,
234/* 56 - 8 */ TokNumber,
235/* 57 - 9 */ TokNumber,
236/* 58 - : */ TokColon,
237/* 59 - ; */ TokSemi,
238/* 60 - < */ TokError,
239/* 61 - = */ TokAssign,
240/* 62 - > */ TokError,
241/* 63 - ? */ TokError,
242/* 64 - @ */ TokError,
243/* 65 - A */ TokIdentifier,
244/* 66 - B */ TokIdentifier,
245/* 67 - C */ TokIdentifier,
246/* 68 - D */ TokIdentifier,
247/* 69 - E */ TokIdentifier,
248/* 70 - F */ TokIdentifier,
249/* 71 - G */ TokIdentifier,
250/* 72 - H */ TokIdentifier,
251/* 73 - I */ TokIdentifier,
252/* 74 - J */ TokIdentifier,
253/* 75 - K */ TokIdentifier,
254/* 76 - L */ TokIdentifier,
255/* 77 - M */ TokIdentifier,
256/* 78 - N */ TokIdentifier,
257/* 79 - O */ TokIdentifier,
258/* 80 - P */ TokIdentifier,
259/* 81 - Q */ TokIdentifier,
260/* 82 - R */ TokIdentifier,
261/* 83 - S */ TokIdentifier,
262/* 84 - T */ TokIdentifier,
263/* 85 - U */ TokIdentifier,
264/* 86 - V */ TokIdentifier,
265/* 87 - W */ TokIdentifier,
266/* 88 - X */ TokIdentifier,
267/* 89 - Y */ TokIdentifier,
268/* 90 - Z */ TokIdentifier,
269/* 91 - [ */ TokLBracket,
270/* 92 - \ */ TokError,
271/* 93 - ] */ TokRBracket,
272/* 94 - ^ */ TokError,
273/* 95 - _ */ TokIdentifier,
274/* 96 - ` */ TokError,
275/* 97 - a */ TokIdentifier,
276/* 98 - b */ TokIdentifier,
277/* 99 - c */ TokIdentifier,
278/* 100 - d */ TokIdentifier,
279/* 101 - e */ TokIdentifier,
280/* 102 - f */ TokIdentifier,
281/* 103 - g */ TokIdentifier,
282/* 104 - h */ TokIdentifier,
283/* 105 - i */ TokIdentifier,
284/* 106 - j */ TokIdentifier,
285/* 107 - k */ TokIdentifier,
286/* 108 - l */ TokIdentifier,
287/* 109 - m */ TokIdentifier,
288/* 110 - n */ TokIdentifier,
289/* 111 - o */ TokIdentifier,
290/* 112 - p */ TokIdentifier,
291/* 113 - q */ TokIdentifier,
292/* 114 - r */ TokIdentifier,
293/* 115 - s */ TokIdentifier,
294/* 116 - t */ TokIdentifier,
295/* 117 - u */ TokIdentifier,
296/* 118 - v */ TokIdentifier,
297/* 119 - w */ TokIdentifier,
298/* 120 - x */ TokIdentifier,
299/* 121 - y */ TokIdentifier,
300/* 122 - z */ TokIdentifier,
301/* 123 - { */ TokLBrace,
302/* 124 - | */ TokError,
303/* 125 - } */ TokRBrace,
304/* 126 - ~ */ TokError,
305/* 127 - Delete */ TokError,
306/* 128 - Cc category */ TokError,
307/* 129 - Cc category */ TokError,
308/* 130 - Cc category */ TokError,
309/* 131 - Cc category */ TokError,
310/* 132 - Cc category */ TokError,
311/* 133 - Cc category */ TokError,
312/* 134 - Cc category */ TokError,
313/* 135 - Cc category */ TokError,
314/* 136 - Cc category */ TokError,
315/* 137 - Cc category */ TokError,
316/* 138 - Cc category */ TokError,
317/* 139 - Cc category */ TokError,
318/* 140 - Cc category */ TokError,
319/* 141 - Cc category */ TokError,
320/* 142 - Cc category */ TokError,
321/* 143 - Cc category */ TokError,
322/* 144 - Cc category */ TokError,
323/* 145 - Cc category */ TokError,
324/* 146 - Cc category */ TokError,
325/* 147 - Cc category */ TokError,
326/* 148 - Cc category */ TokError,
327/* 149 - Cc category */ TokError,
328/* 150 - Cc category */ TokError,
329/* 151 - Cc category */ TokError,
330/* 152 - Cc category */ TokError,
331/* 153 - Cc category */ TokError,
332/* 154 - Cc category */ TokError,
333/* 155 - Cc category */ TokError,
334/* 156 - Cc category */ TokError,
335/* 157 - Cc category */ TokError,
336/* 158 - Cc category */ TokError,
337/* 159 - Cc category */ TokError,
338/* 160 - Zs category (nbsp) */ TokError,
339/* 161 - Po category */ TokError,
340/* 162 - Sc category */ TokError,
341/* 163 - Sc category */ TokError,
342/* 164 - Sc category */ TokError,
343/* 165 - Sc category */ TokError,
344/* 166 - So category */ TokError,
345/* 167 - So category */ TokError,
346/* 168 - Sk category */ TokError,
347/* 169 - So category */ TokError,
348/* 170 - Ll category */ TokError,
349/* 171 - Pi category */ TokError,
350/* 172 - Sm category */ TokError,
351/* 173 - Cf category */ TokError,
352/* 174 - So category */ TokError,
353/* 175 - Sk category */ TokError,
354/* 176 - So category */ TokError,
355/* 177 - Sm category */ TokError,
356/* 178 - No category */ TokError,
357/* 179 - No category */ TokError,
358/* 180 - Sk category */ TokError,
359/* 181 - Ll category */ TokError,
360/* 182 - So category */ TokError,
361/* 183 - Po category */ TokError,
362/* 184 - Sk category */ TokError,
363/* 185 - No category */ TokError,
364/* 186 - Ll category */ TokError,
365/* 187 - Pf category */ TokError,
366/* 188 - No category */ TokError,
367/* 189 - No category */ TokError,
368/* 190 - No category */ TokError,
369/* 191 - Po category */ TokError,
370/* 192 - Lu category */ TokError,
371/* 193 - Lu category */ TokError,
372/* 194 - Lu category */ TokError,
373/* 195 - Lu category */ TokError,
374/* 196 - Lu category */ TokError,
375/* 197 - Lu category */ TokError,
376/* 198 - Lu category */ TokError,
377/* 199 - Lu category */ TokError,
378/* 200 - Lu category */ TokError,
379/* 201 - Lu category */ TokError,
380/* 202 - Lu category */ TokError,
381/* 203 - Lu category */ TokError,
382/* 204 - Lu category */ TokError,
383/* 205 - Lu category */ TokError,
384/* 206 - Lu category */ TokError,
385/* 207 - Lu category */ TokError,
386/* 208 - Lu category */ TokError,
387/* 209 - Lu category */ TokError,
388/* 210 - Lu category */ TokError,
389/* 211 - Lu category */ TokError,
390/* 212 - Lu category */ TokError,
391/* 213 - Lu category */ TokError,
392/* 214 - Lu category */ TokError,
393/* 215 - Sm category */ TokError,
394/* 216 - Lu category */ TokError,
395/* 217 - Lu category */ TokError,
396/* 218 - Lu category */ TokError,
397/* 219 - Lu category */ TokError,
398/* 220 - Lu category */ TokError,
399/* 221 - Lu category */ TokError,
400/* 222 - Lu category */ TokError,
401/* 223 - Ll category */ TokError,
402/* 224 - Ll category */ TokError,
403/* 225 - Ll category */ TokError,
404/* 226 - Ll category */ TokError,
405/* 227 - Ll category */ TokError,
406/* 228 - Ll category */ TokError,
407/* 229 - Ll category */ TokError,
408/* 230 - Ll category */ TokError,
409/* 231 - Ll category */ TokError,
410/* 232 - Ll category */ TokError,
411/* 233 - Ll category */ TokError,
412/* 234 - Ll category */ TokError,
413/* 235 - Ll category */ TokError,
414/* 236 - Ll category */ TokError,
415/* 237 - Ll category */ TokError,
416/* 238 - Ll category */ TokError,
417/* 239 - Ll category */ TokError,
418/* 240 - Ll category */ TokError,
419/* 241 - Ll category */ TokError,
420/* 242 - Ll category */ TokError,
421/* 243 - Ll category */ TokError,
422/* 244 - Ll category */ TokError,
423/* 245 - Ll category */ TokError,
424/* 246 - Ll category */ TokError,
425/* 247 - Sm category */ TokError,
426/* 248 - Ll category */ TokError,
427/* 249 - Ll category */ TokError,
428/* 250 - Ll category */ TokError,
429/* 251 - Ll category */ TokError,
430/* 252 - Ll category */ TokError,
431/* 253 - Ll category */ TokError,
432/* 254 - Ll category */ TokError,
433/* 255 - Ll category */ TokError
434};
435
436template <typename CharType>
437ALWAYS_INLINE TokenType LiteralParser<CharType>::Lexer::lex(LiteralParserToken<CharType>& token)
438{
439#if !ASSERT_DISABLED
440 m_currentTokenID++;
441#endif
442
443 while (m_ptr < m_end && isJSONWhiteSpace(*m_ptr))
444 ++m_ptr;
445
446 ASSERT(m_ptr <= m_end);
447 if (m_ptr == m_end) {
448 token.type = TokEnd;
449 token.start = token.end = m_ptr;
450 return TokEnd;
451 }
452 ASSERT(m_ptr < m_end);
453 token.type = TokError;
454 token.start = m_ptr;
455 CharType character = *m_ptr;
456 if (LIKELY(character < 256)) {
457 TokenType tokenType = TokenTypesOfLatin1Characters[character];
458 switch (tokenType) {
459 case TokString:
460 if (character == '\'' && m_mode == StrictJSON) {
461 m_lexErrorMessage = "Single quotes (\') are not allowed in JSON"_s;
462 return TokError;
463 }
464 return lexString(token, character);
465
466 case TokIdentifier: {
467 switch (character) {
468 case 't':
469 if (m_end - m_ptr >= 4 && m_ptr[1] == 'r' && m_ptr[2] == 'u' && m_ptr[3] == 'e') {
470 m_ptr += 4;
471 token.type = TokTrue;
472 token.end = m_ptr;
473 return TokTrue;
474 }
475 break;
476 case 'f':
477 if (m_end - m_ptr >= 5 && m_ptr[1] == 'a' && m_ptr[2] == 'l' && m_ptr[3] == 's' && m_ptr[4] == 'e') {
478 m_ptr += 5;
479 token.type = TokFalse;
480 token.end = m_ptr;
481 return TokFalse;
482 }
483 break;
484 case 'n':
485 if (m_end - m_ptr >= 4 && m_ptr[1] == 'u' && m_ptr[2] == 'l' && m_ptr[3] == 'l') {
486 m_ptr += 4;
487 token.type = TokNull;
488 token.end = m_ptr;
489 return TokNull;
490 }
491 break;
492 }
493 return lexIdentifier(token);
494 }
495
496 case TokNumber:
497 return lexNumber(token);
498
499 case TokError:
500 break;
501
502 default:
503 ASSERT(tokenType == TokLBracket
504 || tokenType == TokRBracket
505 || tokenType == TokLBrace
506 || tokenType == TokRBrace
507 || tokenType == TokColon
508 || tokenType == TokLParen
509 || tokenType == TokRParen
510 || tokenType == TokComma
511 || tokenType == TokDot
512 || tokenType == TokAssign
513 || tokenType == TokSemi);
514 token.type = tokenType;
515 token.end = ++m_ptr;
516 return tokenType;
517 }
518 }
519 m_lexErrorMessage = makeString("Unrecognized token '", StringView { m_ptr, 1 }, '\'');
520 return TokError;
521}
522
523template <>
524ALWAYS_INLINE TokenType LiteralParser<LChar>::Lexer::lexIdentifier(LiteralParserToken<LChar>& token)
525{
526 while (m_ptr < m_end && (isASCIIAlphanumeric(*m_ptr) || *m_ptr == '_' || *m_ptr == '$'))
527 m_ptr++;
528 token.stringIs8Bit = 1;
529 token.stringToken8 = token.start;
530 token.stringLength = m_ptr - token.start;
531 token.type = TokIdentifier;
532 token.end = m_ptr;
533 return TokIdentifier;
534}
535
536template <>
537ALWAYS_INLINE TokenType LiteralParser<UChar>::Lexer::lexIdentifier(LiteralParserToken<UChar>& token)
538{
539 while (m_ptr < m_end && (isASCIIAlphanumeric(*m_ptr) || *m_ptr == '_' || *m_ptr == '$' || *m_ptr == 0x200C || *m_ptr == 0x200D))
540 m_ptr++;
541 token.stringIs8Bit = 0;
542 token.stringToken16 = token.start;
543 token.stringLength = m_ptr - token.start;
544 token.type = TokIdentifier;
545 token.end = m_ptr;
546 return TokIdentifier;
547}
548
549template <typename CharType>
550TokenType LiteralParser<CharType>::Lexer::next()
551{
552 TokenType result = lex(m_currentToken);
553 ASSERT(m_currentToken.type == result);
554 return result;
555}
556
557template <>
558ALWAYS_INLINE void setParserTokenString<LChar>(LiteralParserToken<LChar>& token, const LChar* string)
559{
560 token.stringIs8Bit = 1;
561 token.stringToken8 = string;
562}
563
564template <>
565ALWAYS_INLINE void setParserTokenString<UChar>(LiteralParserToken<UChar>& token, const UChar* string)
566{
567 token.stringIs8Bit = 0;
568 token.stringToken16 = string;
569}
570
571enum class SafeStringCharacterSet { Strict, NonStrict };
572
573template <SafeStringCharacterSet set>
574static ALWAYS_INLINE bool isSafeStringCharacter(LChar c, LChar terminator)
575{
576 return (c >= ' ' && c != '\\' && c != terminator) || (c == '\t' && set != SafeStringCharacterSet::Strict);
577}
578
579template <SafeStringCharacterSet set>
580static ALWAYS_INLINE bool isSafeStringCharacter(UChar c, UChar terminator)
581{
582 return (c >= ' ' && (set == SafeStringCharacterSet::Strict || c <= 0xff) && c != '\\' && c != terminator) || (c == '\t' && set != SafeStringCharacterSet::Strict);
583}
584
585template <typename CharType>
586ALWAYS_INLINE TokenType LiteralParser<CharType>::Lexer::lexString(LiteralParserToken<CharType>& token, CharType terminator)
587{
588 ++m_ptr;
589 const CharType* runStart = m_ptr;
590
591 if (m_mode == StrictJSON) {
592 while (m_ptr < m_end && isSafeStringCharacter<SafeStringCharacterSet::Strict>(*m_ptr, terminator))
593 ++m_ptr;
594 } else {
595 while (m_ptr < m_end && isSafeStringCharacter<SafeStringCharacterSet::NonStrict>(*m_ptr, terminator))
596 ++m_ptr;
597 }
598
599 if (LIKELY(m_ptr < m_end && *m_ptr == terminator)) {
600 setParserTokenString<CharType>(token, runStart);
601 token.stringLength = m_ptr - runStart;
602 token.type = TokString;
603 token.end = ++m_ptr;
604 return TokString;
605 }
606 return lexStringSlow(token, runStart, terminator);
607}
608
609template <typename CharType>
610TokenType LiteralParser<CharType>::Lexer::lexStringSlow(LiteralParserToken<CharType>& token, const CharType* runStart, CharType terminator)
611{
612 m_builder.clear();
613 goto slowPathBegin;
614 do {
615 runStart = m_ptr;
616 if (m_mode == StrictJSON) {
617 while (m_ptr < m_end && isSafeStringCharacter<SafeStringCharacterSet::Strict>(*m_ptr, terminator))
618 ++m_ptr;
619 } else {
620 while (m_ptr < m_end && isSafeStringCharacter<SafeStringCharacterSet::NonStrict>(*m_ptr, terminator))
621 ++m_ptr;
622 }
623
624 if (!m_builder.isEmpty())
625 m_builder.append(runStart, m_ptr - runStart);
626
627slowPathBegin:
628 if ((m_mode != NonStrictJSON) && m_ptr < m_end && *m_ptr == '\\') {
629 if (m_builder.isEmpty() && runStart < m_ptr)
630 m_builder.append(runStart, m_ptr - runStart);
631 ++m_ptr;
632 if (m_ptr >= m_end) {
633 m_lexErrorMessage = "Unterminated string"_s;
634 return TokError;
635 }
636 switch (*m_ptr) {
637 case '"':
638 m_builder.append('"');
639 m_ptr++;
640 break;
641 case '\\':
642 m_builder.append('\\');
643 m_ptr++;
644 break;
645 case '/':
646 m_builder.append('/');
647 m_ptr++;
648 break;
649 case 'b':
650 m_builder.append('\b');
651 m_ptr++;
652 break;
653 case 'f':
654 m_builder.append('\f');
655 m_ptr++;
656 break;
657 case 'n':
658 m_builder.append('\n');
659 m_ptr++;
660 break;
661 case 'r':
662 m_builder.append('\r');
663 m_ptr++;
664 break;
665 case 't':
666 m_builder.append('\t');
667 m_ptr++;
668 break;
669
670 case 'u':
671 if ((m_end - m_ptr) < 5) {
672 m_lexErrorMessage = "\\u must be followed by 4 hex digits"_s;
673 return TokError;
674 } // uNNNN == 5 characters
675 for (int i = 1; i < 5; i++) {
676 if (!isASCIIHexDigit(m_ptr[i])) {
677 m_lexErrorMessage = makeString("\"\\", StringView { m_ptr, 5 }, "\" is not a valid unicode escape");
678 return TokError;
679 }
680 }
681 m_builder.append(JSC::Lexer<CharType>::convertUnicode(m_ptr[1], m_ptr[2], m_ptr[3], m_ptr[4]));
682 m_ptr += 5;
683 break;
684
685 default:
686 if (*m_ptr == '\'' && m_mode != StrictJSON) {
687 m_builder.append('\'');
688 m_ptr++;
689 break;
690 }
691 m_lexErrorMessage = makeString("Invalid escape character ", StringView { m_ptr, 1 });
692 return TokError;
693 }
694 }
695 } while ((m_mode != NonStrictJSON) && m_ptr != runStart && (m_ptr < m_end) && *m_ptr != terminator);
696
697 if (m_ptr >= m_end || *m_ptr != terminator) {
698 m_lexErrorMessage = "Unterminated string"_s;
699 return TokError;
700 }
701
702 if (m_builder.isEmpty()) {
703 setParserTokenString<CharType>(token, runStart);
704 token.stringLength = m_ptr - runStart;
705 } else {
706 if (m_builder.is8Bit()) {
707 token.stringIs8Bit = 1;
708 token.stringToken8 = m_builder.characters8();
709 } else {
710 token.stringIs8Bit = 0;
711 token.stringToken16 = m_builder.characters16();
712 }
713 token.stringLength = m_builder.length();
714 }
715 token.type = TokString;
716 token.end = ++m_ptr;
717 return TokString;
718}
719
720template <typename CharType>
721TokenType LiteralParser<CharType>::Lexer::lexNumber(LiteralParserToken<CharType>& token)
722{
723 // ES5 and json.org define numbers as
724 // number
725 // int
726 // int frac? exp?
727 //
728 // int
729 // -? 0
730 // -? digit1-9 digits?
731 //
732 // digits
733 // digit digits?
734 //
735 // -?(0 | [1-9][0-9]*) ('.' [0-9]+)? ([eE][+-]? [0-9]+)?
736
737 if (m_ptr < m_end && *m_ptr == '-') // -?
738 ++m_ptr;
739
740 // (0 | [1-9][0-9]*)
741 if (m_ptr < m_end && *m_ptr == '0') // 0
742 ++m_ptr;
743 else if (m_ptr < m_end && *m_ptr >= '1' && *m_ptr <= '9') { // [1-9]
744 ++m_ptr;
745 // [0-9]*
746 while (m_ptr < m_end && isASCIIDigit(*m_ptr))
747 ++m_ptr;
748 } else {
749 m_lexErrorMessage = "Invalid number"_s;
750 return TokError;
751 }
752
753 // ('.' [0-9]+)?
754 const int NumberOfDigitsForSafeInt32 = 9; // The numbers from -99999999 to 999999999 are always in range of Int32.
755 if (m_ptr < m_end && *m_ptr == '.') {
756 ++m_ptr;
757 // [0-9]+
758 if (m_ptr >= m_end || !isASCIIDigit(*m_ptr)) {
759 m_lexErrorMessage = "Invalid digits after decimal point"_s;
760 return TokError;
761 }
762
763 ++m_ptr;
764 while (m_ptr < m_end && isASCIIDigit(*m_ptr))
765 ++m_ptr;
766 } else if (m_ptr < m_end && (*m_ptr != 'e' && *m_ptr != 'E') && (m_ptr - token.start) <= NumberOfDigitsForSafeInt32) {
767 int32_t result = 0;
768 token.type = TokNumber;
769 token.end = m_ptr;
770 const CharType* digit = token.start;
771 bool negative = false;
772 if (*digit == '-') {
773 negative = true;
774 digit++;
775 }
776
777 ASSERT((m_ptr - digit) <= NumberOfDigitsForSafeInt32);
778 while (digit < m_ptr)
779 result = result * 10 + (*digit++) - '0';
780
781 if (!negative)
782 token.numberToken = result;
783 else {
784 if (!result)
785 token.numberToken = -0.0;
786 else
787 token.numberToken = -result;
788 }
789 return TokNumber;
790 }
791
792 // ([eE][+-]? [0-9]+)?
793 if (m_ptr < m_end && (*m_ptr == 'e' || *m_ptr == 'E')) { // [eE]
794 ++m_ptr;
795
796 // [-+]?
797 if (m_ptr < m_end && (*m_ptr == '-' || *m_ptr == '+'))
798 ++m_ptr;
799
800 // [0-9]+
801 if (m_ptr >= m_end || !isASCIIDigit(*m_ptr)) {
802 m_lexErrorMessage = "Exponent symbols should be followed by an optional '+' or '-' and then by at least one number"_s;
803 return TokError;
804 }
805
806 ++m_ptr;
807 while (m_ptr < m_end && isASCIIDigit(*m_ptr))
808 ++m_ptr;
809 }
810
811 token.type = TokNumber;
812 token.end = m_ptr;
813 size_t parsedLength;
814 token.numberToken = parseDouble(token.start, token.end - token.start, parsedLength);
815 return TokNumber;
816}
817
818template <typename CharType>
819JSValue LiteralParser<CharType>::parse(ParserState initialState)
820{
821 VM& vm = m_exec->vm();
822 auto scope = DECLARE_THROW_SCOPE(vm);
823 ParserState state = initialState;
824 MarkedArgumentBuffer objectStack;
825 JSValue lastValue;
826 Vector<ParserState, 16, UnsafeVectorOverflow> stateStack;
827 Vector<Identifier, 16, UnsafeVectorOverflow> identifierStack;
828 HashSet<JSObject*> visitedUnderscoreProto;
829 while (1) {
830 switch(state) {
831 startParseArray:
832 case StartParseArray: {
833 JSArray* array = constructEmptyArray(m_exec, 0);
834 RETURN_IF_EXCEPTION(scope, JSValue());
835 objectStack.appendWithCrashOnOverflow(array);
836 }
837 doParseArrayStartExpression:
838 FALLTHROUGH;
839 case DoParseArrayStartExpression: {
840 TokenType lastToken = m_lexer.currentToken()->type;
841 if (m_lexer.next() == TokRBracket) {
842 if (lastToken == TokComma) {
843 m_parseErrorMessage = "Unexpected comma at the end of array expression"_s;
844 return JSValue();
845 }
846 m_lexer.next();
847 lastValue = objectStack.takeLast();
848 break;
849 }
850
851 stateStack.append(DoParseArrayEndExpression);
852 goto startParseExpression;
853 }
854 case DoParseArrayEndExpression: {
855 JSArray* array = asArray(objectStack.last());
856 array->putDirectIndex(m_exec, array->length(), lastValue);
857 RETURN_IF_EXCEPTION(scope, JSValue());
858
859 if (m_lexer.currentToken()->type == TokComma)
860 goto doParseArrayStartExpression;
861
862 if (m_lexer.currentToken()->type != TokRBracket) {
863 m_parseErrorMessage = "Expected ']'"_s;
864 return JSValue();
865 }
866
867 m_lexer.next();
868 lastValue = objectStack.takeLast();
869 break;
870 }
871 startParseObject:
872 case StartParseObject: {
873 JSObject* object = constructEmptyObject(m_exec);
874 objectStack.appendWithCrashOnOverflow(object);
875
876 TokenType type = m_lexer.next();
877 if (type == TokString || (m_mode != StrictJSON && type == TokIdentifier)) {
878 typename Lexer::LiteralParserTokenPtr identifierToken = m_lexer.currentToken();
879 if (identifierToken->stringIs8Bit)
880 identifierStack.append(makeIdentifier(identifierToken->stringToken8, identifierToken->stringLength));
881 else
882 identifierStack.append(makeIdentifier(identifierToken->stringToken16, identifierToken->stringLength));
883
884 // Check for colon
885 if (m_lexer.next() != TokColon) {
886 m_parseErrorMessage = "Expected ':' before value in object property definition"_s;
887 return JSValue();
888 }
889
890 m_lexer.next();
891 stateStack.append(DoParseObjectEndExpression);
892 goto startParseExpression;
893 }
894 if (type != TokRBrace) {
895 m_parseErrorMessage = "Expected '}'"_s;
896 return JSValue();
897 }
898 m_lexer.next();
899 lastValue = objectStack.takeLast();
900 break;
901 }
902 doParseObjectStartExpression:
903 case DoParseObjectStartExpression: {
904 TokenType type = m_lexer.next();
905 if (type != TokString && (m_mode == StrictJSON || type != TokIdentifier)) {
906 m_parseErrorMessage = "Property name must be a string literal"_s;
907 return JSValue();
908 }
909 typename Lexer::LiteralParserTokenPtr identifierToken = m_lexer.currentToken();
910 if (identifierToken->stringIs8Bit)
911 identifierStack.append(makeIdentifier(identifierToken->stringToken8, identifierToken->stringLength));
912 else
913 identifierStack.append(makeIdentifier(identifierToken->stringToken16, identifierToken->stringLength));
914
915 // Check for colon
916 if (m_lexer.next() != TokColon) {
917 m_parseErrorMessage = "Expected ':'"_s;
918 return JSValue();
919 }
920
921 m_lexer.next();
922 stateStack.append(DoParseObjectEndExpression);
923 goto startParseExpression;
924 }
925 case DoParseObjectEndExpression:
926 {
927 JSObject* object = asObject(objectStack.last());
928 Identifier ident = identifierStack.takeLast();
929 if (m_mode != StrictJSON && ident == vm.propertyNames->underscoreProto) {
930 if (!visitedUnderscoreProto.add(object).isNewEntry) {
931 m_parseErrorMessage = "Attempted to redefine __proto__ property"_s;
932 return JSValue();
933 }
934 CodeBlock* codeBlock = m_exec->codeBlock();
935 PutPropertySlot slot(object, codeBlock ? codeBlock->isStrictMode() : false);
936 objectStack.last().put(m_exec, ident, lastValue, slot);
937 } else {
938 if (Optional<uint32_t> index = parseIndex(ident))
939 object->putDirectIndex(m_exec, index.value(), lastValue);
940 else
941 object->putDirect(vm, ident, lastValue);
942 }
943 RETURN_IF_EXCEPTION(scope, JSValue());
944 if (m_lexer.currentToken()->type == TokComma)
945 goto doParseObjectStartExpression;
946 if (m_lexer.currentToken()->type != TokRBrace) {
947 m_parseErrorMessage = "Expected '}'"_s;
948 return JSValue();
949 }
950 m_lexer.next();
951 lastValue = objectStack.takeLast();
952 break;
953 }
954 startParseExpression:
955 case StartParseExpression: {
956 switch (m_lexer.currentToken()->type) {
957 case TokLBracket:
958 goto startParseArray;
959 case TokLBrace:
960 goto startParseObject;
961 case TokString: {
962 typename Lexer::LiteralParserTokenPtr stringToken = m_lexer.currentToken();
963 if (stringToken->stringIs8Bit)
964 lastValue = jsString(m_exec, makeIdentifier(stringToken->stringToken8, stringToken->stringLength).string());
965 else
966 lastValue = jsString(m_exec, makeIdentifier(stringToken->stringToken16, stringToken->stringLength).string());
967 m_lexer.next();
968 break;
969 }
970 case TokNumber: {
971 typename Lexer::LiteralParserTokenPtr numberToken = m_lexer.currentToken();
972 lastValue = jsNumber(numberToken->numberToken);
973 m_lexer.next();
974 break;
975 }
976 case TokNull:
977 m_lexer.next();
978 lastValue = jsNull();
979 break;
980
981 case TokTrue:
982 m_lexer.next();
983 lastValue = jsBoolean(true);
984 break;
985
986 case TokFalse:
987 m_lexer.next();
988 lastValue = jsBoolean(false);
989 break;
990 case TokRBracket:
991 m_parseErrorMessage = "Unexpected token ']'"_s;
992 return JSValue();
993 case TokRBrace:
994 m_parseErrorMessage = "Unexpected token '}'"_s;
995 return JSValue();
996 case TokIdentifier: {
997 typename Lexer::LiteralParserTokenPtr token = m_lexer.currentToken();
998 if (token->stringIs8Bit)
999 m_parseErrorMessage = makeString("Unexpected identifier \"", StringView { token->stringToken8, token->stringLength }, '"');
1000 else
1001 m_parseErrorMessage = makeString("Unexpected identifier \"", StringView { token->stringToken16, token->stringLength }, '"');
1002 return JSValue();
1003 }
1004 case TokColon:
1005 m_parseErrorMessage = "Unexpected token ':'"_s;
1006 return JSValue();
1007 case TokLParen:
1008 m_parseErrorMessage = "Unexpected token '('"_s;
1009 return JSValue();
1010 case TokRParen:
1011 m_parseErrorMessage = "Unexpected token ')'"_s;
1012 return JSValue();
1013 case TokComma:
1014 m_parseErrorMessage = "Unexpected token ','"_s;
1015 return JSValue();
1016 case TokDot:
1017 m_parseErrorMessage = "Unexpected token '.'"_s;
1018 return JSValue();
1019 case TokAssign:
1020 m_parseErrorMessage = "Unexpected token '='"_s;
1021 return JSValue();
1022 case TokSemi:
1023 m_parseErrorMessage = "Unexpected token ';'"_s;
1024 return JSValue();
1025 case TokEnd:
1026 m_parseErrorMessage = "Unexpected EOF"_s;
1027 return JSValue();
1028 case TokError:
1029 default:
1030 // Error
1031 m_parseErrorMessage = "Could not parse value expression"_s;
1032 return JSValue();
1033 }
1034 break;
1035 }
1036 case StartParseStatement: {
1037 switch (m_lexer.currentToken()->type) {
1038 case TokLBracket:
1039 case TokNumber:
1040 case TokString:
1041 goto startParseExpression;
1042
1043 case TokLParen: {
1044 m_lexer.next();
1045 stateStack.append(StartParseStatementEndStatement);
1046 goto startParseExpression;
1047 }
1048 case TokRBracket:
1049 m_parseErrorMessage = "Unexpected token ']'"_s;
1050 return JSValue();
1051 case TokLBrace:
1052 m_parseErrorMessage = "Unexpected token '{'"_s;
1053 return JSValue();
1054 case TokRBrace:
1055 m_parseErrorMessage = "Unexpected token '}'"_s;
1056 return JSValue();
1057 case TokIdentifier:
1058 m_parseErrorMessage = "Unexpected identifier"_s;
1059 return JSValue();
1060 case TokColon:
1061 m_parseErrorMessage = "Unexpected token ':'"_s;
1062 return JSValue();
1063 case TokRParen:
1064 m_parseErrorMessage = "Unexpected token ')'"_s;
1065 return JSValue();
1066 case TokComma:
1067 m_parseErrorMessage = "Unexpected token ','"_s;
1068 return JSValue();
1069 case TokTrue:
1070 m_parseErrorMessage = "Unexpected token 'true'"_s;
1071 return JSValue();
1072 case TokFalse:
1073 m_parseErrorMessage = "Unexpected token 'false'"_s;
1074 return JSValue();
1075 case TokNull:
1076 m_parseErrorMessage = "Unexpected token 'null'"_s;
1077 return JSValue();
1078 case TokEnd:
1079 m_parseErrorMessage = "Unexpected EOF"_s;
1080 return JSValue();
1081 case TokDot:
1082 m_parseErrorMessage = "Unexpected token '.'"_s;
1083 return JSValue();
1084 case TokAssign:
1085 m_parseErrorMessage = "Unexpected token '='"_s;
1086 return JSValue();
1087 case TokSemi:
1088 m_parseErrorMessage = "Unexpected token ';'"_s;
1089 return JSValue();
1090 case TokError:
1091 default:
1092 m_parseErrorMessage = "Could not parse statement"_s;
1093 return JSValue();
1094 }
1095 }
1096 case StartParseStatementEndStatement: {
1097 ASSERT(stateStack.isEmpty());
1098 if (m_lexer.currentToken()->type != TokRParen)
1099 return JSValue();
1100 if (m_lexer.next() == TokEnd)
1101 return lastValue;
1102 m_parseErrorMessage = "Unexpected content at end of JSON literal"_s;
1103 return JSValue();
1104 }
1105 default:
1106 RELEASE_ASSERT_NOT_REACHED();
1107 }
1108 if (stateStack.isEmpty())
1109 return lastValue;
1110 state = stateStack.takeLast();
1111 continue;
1112 }
1113}
1114
1115// Instantiate the two flavors of LiteralParser we need instead of putting most of this file in LiteralParser.h
1116template class LiteralParser<LChar>;
1117template class LiteralParser<UChar>;
1118
1119}
1120