LiteralParser.cpp source code [jsc/Source/JavaScriptCore/runtime/LiteralParser.cpp]

1	/*
2	* Copyright (C) 2009-2017 Apple Inc. All rights reserved.
3	* Copyright (C) 2012 Mathias Bynens ([email protected])
4	*
5	* Redistribution and use in source and binary forms, with or without
6	* modification, are permitted provided that the following conditions
7	* are met:
8	* 1. Redistributions of source code must retain the above copyright
9	* notice, this list of conditions and the following disclaimer.
10	* 2. Redistributions in binary form must reproduce the above copyright
11	* notice, this list of conditions and the following disclaimer in the
12	* documentation and/or other materials provided with the distribution.
13	*
14	* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15	* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17	* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
18	* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19	* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20	* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21	* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22	* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24	* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25	*/
26
27	#include "config.h"
28	#include "LiteralParser.h"
29
30	#include "ButterflyInlines.h"
31	#include "CodeBlock.h"
32	#include "JSArray.h"
33	#include "JSString.h"
34	#include "Lexer.h"
35	#include "ObjectConstructor.h"
36	#include "JSCInlines.h"
37	#include "StrongInlines.h"
38	#include <wtf/ASCIICType.h>
39	#include <wtf/dtoa.h>
40	#include <wtf/text/StringConcatenate.h>
41
42	namespace JSC {
43
44	template <typename CharType>
45	static ALWAYS_INLINE bool isJSONWhiteSpace(const CharType& c)
46	{
47	// The JSON RFC 4627 defines a list of allowed characters to be considered
48	// insignificant white space: http://www.ietf.org/rfc/rfc4627.txt (2. JSON Grammar).
49	return c == `' '` \|\| c == `0x9` \|\| c == `0xA` \|\| c == `0xD`;
50	}
51
52	template <typename CharType>
53	bool LiteralParser<CharType>::tryJSONPParse(Vector<JSONPData>& results, bool needsFullSourceInfo)
54	{
55	VM& vm = m_exec->vm();
56	auto scope = DECLARE_THROW_SCOPE(vm);
57	if (m_lexer.next() != TokIdentifier)
58	return false;
59	do {
60	Vector<JSONPPathEntry> path;
61	// Unguarded next to start off the lexer
62	Identifier name = Identifier::fromString(&vm, m_lexer.currentToken()->start, m_lexer.currentToken()->end - m_lexer.currentToken()->start);
63	JSONPPathEntry entry;
64	if (name == vm.propertyNames->varKeyword) {
65	if (m_lexer.next() != TokIdentifier)
66	return false;
67	entry.m_type = JSONPPathEntryTypeDeclareVar;
68	entry.m_pathEntryName = Identifier::fromString(&vm, m_lexer.currentToken()->start, m_lexer.currentToken()->end - m_lexer.currentToken()->start);
69	path.append(entry);
70	} else {
71	entry.m_type = JSONPPathEntryTypeDot;
72	entry.m_pathEntryName = Identifier::fromString(&vm, m_lexer.currentToken()->start, m_lexer.currentToken()->end - m_lexer.currentToken()->start);
73	path.append(entry);
74	}
75	if (isLexerKeyword(entry.m_pathEntryName))
76	return false;
77	TokenType tokenType = m_lexer.next();
78	if (entry.m_type == JSONPPathEntryTypeDeclareVar && tokenType != TokAssign)
79	return false;
80	while (tokenType != TokAssign) {
81	switch (tokenType) {
82	case TokLBracket: {
83	entry.m_type = JSONPPathEntryTypeLookup;
84	if (m_lexer.next() != TokNumber)
85	return false;
86	double doubleIndex = m_lexer.currentToken()->numberToken;
87	int index = (int)doubleIndex;
88	if (index != doubleIndex \|\| index < `0`)
89	return false;
90	entry.m_pathIndex = index;
91	if (m_lexer.next() != TokRBracket)
92	return false;
93	break;
94	}
95	case TokDot: {
96	entry.m_type = JSONPPathEntryTypeDot;
97	if (m_lexer.next() != TokIdentifier)
98	return false;
99	entry.m_pathEntryName = Identifier::fromString(&vm, m_lexer.currentToken()->start, m_lexer.currentToken()->end - m_lexer.currentToken()->start);
100	break;
101	}
102	case TokLParen: {
103	if (path.last().m_type != JSONPPathEntryTypeDot \|\| needsFullSourceInfo)
104	return false;
105	path.last().m_type = JSONPPathEntryTypeCall;
106	entry = path.last();
107	goto startJSON;
108	}
109	default:
110	return false;
111	}
112	path.append(entry);
113	tokenType = m_lexer.next();
114	}
115	startJSON:
116	m_lexer.next();
117	results.append(JSONPData ());
118	JSValue startParseExpressionValue = parse(StartParseExpression);
119	RETURN_IF_EXCEPTION(scope, false);
120	results.last().m_value.set(vm, startParseExpressionValue);
121	if (!results.last().m_value)
122	return false;
123	results.last().m_path.swap(path);
124	if (entry.m_type == JSONPPathEntryTypeCall) {
125	if (m_lexer.currentToken()->type != TokRParen)
126	return false;
127	m_lexer.next();
128	}
129	if (m_lexer.currentToken()->type != TokSemi)
130	break;
131	m_lexer.next();
132	} while (m_lexer.currentToken()->type == TokIdentifier);
133	return m_lexer.currentToken()->type == TokEnd;
134	}
135
136	template <typename CharType>
137	ALWAYS_INLINE const Identifier LiteralParser<CharType>::makeIdentifier(const LChar* characters, size_t length)
138	{
139	if (!length)
140	return m_exec->vm().propertyNames->emptyIdentifier;
141	if (characters[`0`] >= MaximumCachableCharacter)
142	return Identifier::fromString(&m_exec->vm(), characters, length);
143
144	if (length == `1`) {
145	if (!m_shortIdentifiers[characters[`0`]].isNull())
146	return m_shortIdentifiers[characters[`0`]];
147	m_shortIdentifiers[characters[`0`]] = Identifier::fromString(&m_exec->vm(), characters, length);
148	return m_shortIdentifiers[characters[`0`]];
149	}
150	if (!m_recentIdentifiers[characters[`0`]].isNull() && Identifier::equal(m_recentIdentifiers[characters[`0`]].impl(), characters, length))
151	return m_recentIdentifiers[characters[`0`]];
152	m_recentIdentifiers[characters[`0`]] = Identifier::fromString(&m_exec->vm(), characters, length);
153	return m_recentIdentifiers[characters[`0`]];
154	}
155
156	template <typename CharType>
157	ALWAYS_INLINE const Identifier LiteralParser<CharType>::makeIdentifier(const UChar* characters, size_t length)
158	{
159	if (!length)
160	return m_exec->vm().propertyNames->emptyIdentifier;
161	if (characters[`0`] >= MaximumCachableCharacter)
162	return Identifier::fromString(&m_exec->vm(), characters, length);
163
164	if (length == `1`) {
165	if (!m_shortIdentifiers[characters[`0`]].isNull())
166	return m_shortIdentifiers[characters[`0`]];
167	m_shortIdentifiers[characters[`0`]] = Identifier::fromString(&m_exec->vm(), characters, length);
168	return m_shortIdentifiers[characters[`0`]];
169	}
170	if (!m_recentIdentifiers[characters[`0`]].isNull() && Identifier::equal(m_recentIdentifiers[characters[`0`]].impl(), characters, length))
171	return m_recentIdentifiers[characters[`0`]];
172	m_recentIdentifiers[characters[`0`]] = Identifier::fromString(&m_exec->vm(), characters, length);
173	return m_recentIdentifiers[characters[`0`]];
174	}
175
176	// 256 Latin-1 codes
177	static constexpr const TokenType TokenTypesOfLatin1Characters[`256`] = {
178	/ 0 - Null / TokError,
179	/ 1 - Start of Heading / TokError,
180	/ 2 - Start of Text / TokError,
181	/ 3 - End of Text / TokError,
182	/ 4 - End of Transm. / TokError,
183	/ 5 - Enquiry / TokError,
184	/ 6 - Acknowledgment / TokError,
185	/ 7 - Bell / TokError,
186	/ 8 - Back Space / TokError,
187	/ 9 - Horizontal Tab / TokError,
188	/ 10 - Line Feed / TokError,
189	/ 11 - Vertical Tab / TokError,
190	/ 12 - Form Feed / TokError,
191	/ 13 - Carriage Return / TokError,
192	/ 14 - Shift Out / TokError,
193	/ 15 - Shift In / TokError,
194	/ 16 - Data Line Escape / TokError,
195	/ 17 - Device Control 1 / TokError,
196	/ 18 - Device Control 2 / TokError,
197	/ 19 - Device Control 3 / TokError,
198	/ 20 - Device Control 4 / TokError,
199	/ 21 - Negative Ack. / TokError,
200	/ 22 - Synchronous Idle / TokError,
201	/ 23 - End of Transmit / TokError,
202	/ 24 - Cancel / TokError,
203	/ 25 - End of Medium / TokError,
204	/ 26 - Substitute / TokError,
205	/ 27 - Escape / TokError,
206	/ 28 - File Separator / TokError,
207	/ 29 - Group Separator / TokError,
208	/ 30 - Record Separator / TokError,
209	/ 31 - Unit Separator / TokError,
210	/ 32 - Space / TokError,
211	/ 33 - ! / TokError,
212	/ 34 - " / TokString,
213	/ 35 - # / TokError,
214	/ 36 - $ / TokIdentifier,
215	/ 37 - % / TokError,
216	/ 38 - & / TokError,
217	/ 39 - ' / TokString,
218	/ 40 - ( / TokLParen,
219	/ 41 - ) / TokRParen,
220	/ 42 - * / TokError,
221	/ 43 - + / TokError,
222	/ 44 - , / TokComma,
223	/ 45 - - / TokNumber,
224	/ 46 - . / TokDot,
225	/ 47 - / / TokError,
226	/ 48 - 0 / TokNumber,
227	/ 49 - 1 / TokNumber,
228	/ 50 - 2 / TokNumber,
229	/ 51 - 3 / TokNumber,
230	/ 52 - 4 / TokNumber,
231	/ 53 - 5 / TokNumber,
232	/ 54 - 6 / TokNumber,
233	/ 55 - 7 / TokNumber,
234	/ 56 - 8 / TokNumber,
235	/ 57 - 9 / TokNumber,
236	/ 58 - : / TokColon,
237	/ 59 - ; / TokSemi,
238	/ 60 - < / TokError,
239	/ 61 - = / TokAssign,
240	/ 62 - > / TokError,
241	/ 63 - ? / TokError,
242	/ 64 - @ / TokError,
243	/ 65 - A / TokIdentifier,
244	/ 66 - B / TokIdentifier,
245	/ 67 - C / TokIdentifier,
246	/ 68 - D / TokIdentifier,
247	/ 69 - E / TokIdentifier,
248	/ 70 - F / TokIdentifier,
249	/ 71 - G / TokIdentifier,
250	/ 72 - H / TokIdentifier,
251	/ 73 - I / TokIdentifier,
252	/ 74 - J / TokIdentifier,
253	/ 75 - K / TokIdentifier,
254	/ 76 - L / TokIdentifier,
255	/ 77 - M / TokIdentifier,
256	/ 78 - N / TokIdentifier,
257	/ 79 - O / TokIdentifier,
258	/ 80 - P / TokIdentifier,
259	/ 81 - Q / TokIdentifier,
260	/ 82 - R / TokIdentifier,
261	/ 83 - S / TokIdentifier,
262	/ 84 - T / TokIdentifier,
263	/ 85 - U / TokIdentifier,
264	/ 86 - V / TokIdentifier,
265	/ 87 - W / TokIdentifier,
266	/ 88 - X / TokIdentifier,
267	/ 89 - Y / TokIdentifier,
268	/ 90 - Z / TokIdentifier,
269	/ 91 - [ / TokLBracket,
270	/ 92 - \ / TokError,
271	/ 93 - ] / TokRBracket,
272	/ 94 - ^ / TokError,
273	/ 95 - _ / TokIdentifier,
274	/ 96 - ` / TokError,
275	/ 97 - a / TokIdentifier,
276	/ 98 - b / TokIdentifier,
277	/ 99 - c / TokIdentifier,
278	/ 100 - d / TokIdentifier,
279	/ 101 - e / TokIdentifier,
280	/ 102 - f / TokIdentifier,
281	/ 103 - g / TokIdentifier,
282	/ 104 - h / TokIdentifier,
283	/ 105 - i / TokIdentifier,
284	/ 106 - j / TokIdentifier,
285	/ 107 - k / TokIdentifier,
286	/ 108 - l / TokIdentifier,
287	/ 109 - m / TokIdentifier,
288	/ 110 - n / TokIdentifier,
289	/ 111 - o / TokIdentifier,
290	/ 112 - p / TokIdentifier,
291	/ 113 - q / TokIdentifier,
292	/ 114 - r / TokIdentifier,
293	/ 115 - s / TokIdentifier,
294	/ 116 - t / TokIdentifier,
295	/ 117 - u / TokIdentifier,
296	/ 118 - v / TokIdentifier,
297	/ 119 - w / TokIdentifier,
298	/ 120 - x / TokIdentifier,
299	/ 121 - y / TokIdentifier,
300	/ 122 - z / TokIdentifier,
301	/ 123 - { / TokLBrace,
302	/ 124 - \| / TokError,
303	/ 125 - } / TokRBrace,
304	/ 126 - ~ / TokError,
305	/ 127 - Delete / TokError,
306	/ 128 - Cc category / TokError,
307	/ 129 - Cc category / TokError,
308	/ 130 - Cc category / TokError,
309	/ 131 - Cc category / TokError,
310	/ 132 - Cc category / TokError,
311	/ 133 - Cc category / TokError,
312	/ 134 - Cc category / TokError,
313	/ 135 - Cc category / TokError,
314	/ 136 - Cc category / TokError,
315	/ 137 - Cc category / TokError,
316	/ 138 - Cc category / TokError,
317	/ 139 - Cc category / TokError,
318	/ 140 - Cc category / TokError,
319	/ 141 - Cc category / TokError,
320	/ 142 - Cc category / TokError,
321	/ 143 - Cc category / TokError,
322	/ 144 - Cc category / TokError,
323	/ 145 - Cc category / TokError,
324	/ 146 - Cc category / TokError,
325	/ 147 - Cc category / TokError,
326	/ 148 - Cc category / TokError,
327	/ 149 - Cc category / TokError,
328	/ 150 - Cc category / TokError,
329	/ 151 - Cc category / TokError,
330	/ 152 - Cc category / TokError,
331	/ 153 - Cc category / TokError,
332	/ 154 - Cc category / TokError,
333	/ 155 - Cc category / TokError,
334	/ 156 - Cc category / TokError,
335	/ 157 - Cc category / TokError,
336	/ 158 - Cc category / TokError,
337	/ 159 - Cc category / TokError,
338	/ 160 - Zs category (nbsp) / TokError,
339	/ 161 - Po category / TokError,
340	/ 162 - Sc category / TokError,
341	/ 163 - Sc category / TokError,
342	/ 164 - Sc category / TokError,
343	/ 165 - Sc category / TokError,
344	/ 166 - So category / TokError,
345	/ 167 - So category / TokError,
346	/ 168 - Sk category / TokError,
347	/ 169 - So category / TokError,
348	/ 170 - Ll category / TokError,
349	/ 171 - Pi category / TokError,
350	/ 172 - Sm category / TokError,
351	/ 173 - Cf category / TokError,
352	/ 174 - So category / TokError,
353	/ 175 - Sk category / TokError,
354	/ 176 - So category / TokError,
355	/ 177 - Sm category / TokError,
356	/ 178 - No category / TokError,
357	/ 179 - No category / TokError,
358	/ 180 - Sk category / TokError,
359	/ 181 - Ll category / TokError,
360	/ 182 - So category / TokError,
361	/ 183 - Po category / TokError,
362	/ 184 - Sk category / TokError,
363	/ 185 - No category / TokError,
364	/ 186 - Ll category / TokError,
365	/ 187 - Pf category / TokError,
366	/ 188 - No category / TokError,
367	/ 189 - No category / TokError,
368	/ 190 - No category / TokError,
369	/ 191 - Po category / TokError,
370	/ 192 - Lu category / TokError,
371	/ 193 - Lu category / TokError,
372	/ 194 - Lu category / TokError,
373	/ 195 - Lu category / TokError,
374	/ 196 - Lu category / TokError,
375	/ 197 - Lu category / TokError,
376	/ 198 - Lu category / TokError,
377	/ 199 - Lu category / TokError,
378	/ 200 - Lu category / TokError,
379	/ 201 - Lu category / TokError,
380	/ 202 - Lu category / TokError,
381	/ 203 - Lu category / TokError,
382	/ 204 - Lu category / TokError,
383	/ 205 - Lu category / TokError,
384	/ 206 - Lu category / TokError,
385	/ 207 - Lu category / TokError,
386	/ 208 - Lu category / TokError,
387	/ 209 - Lu category / TokError,
388	/ 210 - Lu category / TokError,
389	/ 211 - Lu category / TokError,
390	/ 212 - Lu category / TokError,
391	/ 213 - Lu category / TokError,
392	/ 214 - Lu category / TokError,
393	/ 215 - Sm category / TokError,
394	/ 216 - Lu category / TokError,
395	/ 217 - Lu category / TokError,
396	/ 218 - Lu category / TokError,
397	/ 219 - Lu category / TokError,
398	/ 220 - Lu category / TokError,
399	/ 221 - Lu category / TokError,
400	/ 222 - Lu category / TokError,
401	/ 223 - Ll category / TokError,
402	/ 224 - Ll category / TokError,
403	/ 225 - Ll category / TokError,
404	/ 226 - Ll category / TokError,
405	/ 227 - Ll category / TokError,
406	/ 228 - Ll category / TokError,
407	/ 229 - Ll category / TokError,
408	/ 230 - Ll category / TokError,
409	/ 231 - Ll category / TokError,
410	/ 232 - Ll category / TokError,
411	/ 233 - Ll category / TokError,
412	/ 234 - Ll category / TokError,
413	/ 235 - Ll category / TokError,
414	/ 236 - Ll category / TokError,
415	/ 237 - Ll category / TokError,
416	/ 238 - Ll category / TokError,
417	/ 239 - Ll category / TokError,
418	/ 240 - Ll category / TokError,
419	/ 241 - Ll category / TokError,
420	/ 242 - Ll category / TokError,
421	/ 243 - Ll category / TokError,
422	/ 244 - Ll category / TokError,
423	/ 245 - Ll category / TokError,
424	/ 246 - Ll category / TokError,
425	/ 247 - Sm category / TokError,
426	/ 248 - Ll category / TokError,
427	/ 249 - Ll category / TokError,
428	/ 250 - Ll category / TokError,
429	/ 251 - Ll category / TokError,
430	/ 252 - Ll category / TokError,
431	/ 253 - Ll category / TokError,
432	/ 254 - Ll category / TokError,
433	/ 255 - Ll category / TokError
434	};
435
436	template <typename CharType>
437	ALWAYS_INLINE TokenType LiteralParser<CharType>::Lexer::lex(LiteralParserToken<CharType>& token)
438	{
439	#if !ASSERT_DISABLED
440	m_currentTokenID++;
441	#endif
442
443	while (m_ptr < m_end && isJSONWhiteSpace(*m_ptr))
444	++m_ptr;
445
446	ASSERT(m_ptr <= m_end);
447	if (m_ptr == m_end) {
448	token.type = TokEnd;
449	token.start = token.end = m_ptr;
450	return TokEnd;
451	}
452	ASSERT(m_ptr < m_end);
453	token.type = TokError;
454	token.start = m_ptr;
455	CharType character = *m_ptr;
456	if (LIKELY(character < `256`)) {
457	TokenType tokenType = TokenTypesOfLatin1Characters[character];
458	switch (tokenType) {
459	case TokString:
460	if (character == `'\''` && m_mode == StrictJSON) {
461	m_lexErrorMessage = "Single quotes (\') are not allowed in JSON"_s;
462	return TokError;
463	}
464	return lexString(token, character);
465
466	case TokIdentifier: {
467	switch (character) {
468	case `'t'`:
469	if (m_end - m_ptr >= `4` && m_ptr[`1`] == `'r'` && m_ptr[`2`] == `'u'` && m_ptr[`3`] == `'e'`) {
470	m_ptr += `4`;
471	token.type = TokTrue;
472	token.end = m_ptr;
473	return TokTrue;
474	}
475	break;
476	case `'f'`:
477	if (m_end - m_ptr >= `5` && m_ptr[`1`] == `'a'` && m_ptr[`2`] == `'l'` && m_ptr[`3`] == `'s'` && m_ptr[`4`] == `'e'`) {
478	m_ptr += `5`;
479	token.type = TokFalse;
480	token.end = m_ptr;
481	return TokFalse;
482	}
483	break;
484	case `'n'`:
485	if (m_end - m_ptr >= `4` && m_ptr[`1`] == `'u'` && m_ptr[`2`] == `'l'` && m_ptr[`3`] == `'l'`) {
486	m_ptr += `4`;
487	token.type = TokNull;
488	token.end = m_ptr;
489	return TokNull;
490	}
491	break;
492	}
493	return lexIdentifier(token);
494	}
495
496	case TokNumber:
497	return lexNumber(token);
498
499	case TokError:
500	break;
501
502	default:
503	ASSERT(tokenType == TokLBracket
504	\|\| tokenType == TokRBracket
505	\|\| tokenType == TokLBrace
506	\|\| tokenType == TokRBrace
507	\|\| tokenType == TokColon
508	\|\| tokenType == TokLParen
509	\|\| tokenType == TokRParen
510	\|\| tokenType == TokComma
511	\|\| tokenType == TokDot
512	\|\| tokenType == TokAssign
513	\|\| tokenType == TokSemi);
514	token.type = tokenType;
515	token.end = ++m_ptr;
516	return tokenType;
517	}
518	}
519	m_lexErrorMessage = makeString("Unrecognized token '", StringView { m_ptr, `1` }, `'\''`);
520	return TokError;
521	}
522
523	template <>
524	ALWAYS_INLINE TokenType LiteralParser<LChar>::Lexer::lexIdentifier(LiteralParserToken<LChar>& token)
525	{
526	while (m_ptr < m_end && (isASCIIAlphanumeric(m_ptr) \|\| m_ptr == `'_'` \|\| *m_ptr == `'$'`))
527	m_ptr++;
528	token.stringIs8Bit = `1`;
529	token.stringToken8 = token.start;
530	token.stringLength = m_ptr - token.start;
531	token.type = TokIdentifier;
532	token.end = m_ptr;
533	return TokIdentifier;
534	}
535
536	template <>
537	ALWAYS_INLINE TokenType LiteralParser<UChar>::Lexer::lexIdentifier(LiteralParserToken<UChar>& token)
538	{
539	while (m_ptr < m_end && (isASCIIAlphanumeric(m_ptr) \|\| m_ptr == `'_'` \|\| m_ptr == `'$'` \|\| m_ptr == `0x200C` \|\| *m_ptr == `0x200D`))
540	m_ptr++;
541	token.stringIs8Bit = `0`;
542	token.stringToken16 = token.start;
543	token.stringLength = m_ptr - token.start;
544	token.type = TokIdentifier;
545	token.end = m_ptr;
546	return TokIdentifier;
547	}
548
549	template <typename CharType>
550	TokenType LiteralParser<CharType>::Lexer::next()
551	{
552	TokenType result = lex(m_currentToken);
553	ASSERT(m_currentToken.type == result);
554	return result;
555	}
556
557	template <>
558	ALWAYS_INLINE void setParserTokenString<LChar>(LiteralParserToken<LChar>& token, const LChar* string)
559	{
560	token.stringIs8Bit = `1`;
561	token.stringToken8 = string;
562	}
563
564	template <>
565	ALWAYS_INLINE void setParserTokenString<UChar>(LiteralParserToken<UChar>& token, const UChar* string)
566	{
567	token.stringIs8Bit = `0`;
568	token.stringToken16 = string;
569	}
570
571	enum class SafeStringCharacterSet { Strict, NonStrict };
572
573	template <SafeStringCharacterSet set>
574	static ALWAYS_INLINE bool isSafeStringCharacter(LChar c, LChar terminator)
575	{
576	return (c >= `' '` && c != `'\\'` && c != terminator) \|\| (c == `'\t'` && set != SafeStringCharacterSet::Strict);
577	}
578
579	template <SafeStringCharacterSet set>
580	static ALWAYS_INLINE bool isSafeStringCharacter(UChar c, UChar terminator)
581	{
582	return (c >= `' '` && (set == SafeStringCharacterSet::Strict \|\| c <= `0xff`) && c != `'\\'` && c != terminator) \|\| (c == `'\t'` && set != SafeStringCharacterSet::Strict);
583	}
584
585	template <typename CharType>
586	ALWAYS_INLINE TokenType LiteralParser<CharType>::Lexer::lexString(LiteralParserToken<CharType>& token, CharType terminator)
587	{
588	++m_ptr;
589	const CharType* runStart = m_ptr;
590
591	if (m_mode == StrictJSON) {
592	while (m_ptr < m_end && isSafeStringCharacter<SafeStringCharacterSet::Strict>(*m_ptr, terminator))
593	++m_ptr;
594	} else {
595	while (m_ptr < m_end && isSafeStringCharacter<SafeStringCharacterSet::NonStrict>(*m_ptr, terminator))
596	++m_ptr;
597	}
598
599	if (LIKELY(m_ptr < m_end && *m_ptr == terminator)) {
600	setParserTokenString<CharType>(token, runStart);
601	token.stringLength = m_ptr - runStart;
602	token.type = TokString;
603	token.end = ++m_ptr;
604	return TokString;
605	}
606	return lexStringSlow(token, runStart, terminator);
607	}
608
609	template <typename CharType>
610	TokenType LiteralParser<CharType>::Lexer::lexStringSlow(LiteralParserToken<CharType>& token, const CharType* runStart, CharType terminator)
611	{
612	m_builder.clear();
613	goto slowPathBegin;
614	do {
615	runStart = m_ptr;
616	if (m_mode == StrictJSON) {
617	while (m_ptr < m_end && isSafeStringCharacter<SafeStringCharacterSet::Strict>(*m_ptr, terminator))
618	++m_ptr;
619	} else {
620	while (m_ptr < m_end && isSafeStringCharacter<SafeStringCharacterSet::NonStrict>(*m_ptr, terminator))
621	++m_ptr;
622	}
623
624	if (!m_builder.isEmpty())
625	m_builder.append(runStart, m_ptr - runStart);
626
627	slowPathBegin:
628	if ((m_mode != NonStrictJSON) && m_ptr < m_end && *m_ptr == `'\\'`) {
629	if (m_builder.isEmpty() && runStart < m_ptr)
630	m_builder.append(runStart, m_ptr - runStart);
631	++m_ptr;
632	if (m_ptr >= m_end) {
633	m_lexErrorMessage = "Unterminated string"_s;
634	return TokError;
635	}
636	switch (*m_ptr) {
637	case `'"'`:
638	m_builder.append(`'"'`);
639	m_ptr++;
640	break;
641	case `'\\'`:
642	m_builder.append(`'\\'`);
643	m_ptr++;
644	break;
645	case `'/'`:
646	m_builder.append(`'/'`);
647	m_ptr++;
648	break;
649	case `'b'`:
650	m_builder.append(`'\b'`);
651	m_ptr++;
652	break;
653	case `'f'`:
654	m_builder.append(`'\f'`);
655	m_ptr++;
656	break;
657	case `'n'`:
658	m_builder.append(`'\n'`);
659	m_ptr++;
660	break;
661	case `'r'`:
662	m_builder.append(`'\r'`);
663	m_ptr++;
664	break;
665	case `'t'`:
666	m_builder.append(`'\t'`);
667	m_ptr++;
668	break;
669
670	case `'u'`:
671	if ((m_end - m_ptr) < `5`) {
672	m_lexErrorMessage = "\\u must be followed by 4 hex digits"_s;
673	return TokError;
674	} // uNNNN == 5 characters
675	for (int i = `1`; i < `5`; i++) {
676	if (!isASCIIHexDigit(m_ptr[i])) {
677	m_lexErrorMessage = makeString("\"\\", StringView { m_ptr, `5` }, "\" is not a valid unicode escape");
678	return TokError;
679	}
680	}
681	m_builder.append(JSC::Lexer<CharType>::convertUnicode(m_ptr[`1`], m_ptr[`2`], m_ptr[`3`], m_ptr[`4`]));
682	m_ptr += `5`;
683	break;
684
685	default:
686	if (*m_ptr == `'\''` && m_mode != StrictJSON) {
687	m_builder.append(`'\''`);
688	m_ptr++;
689	break;
690	}
691	m_lexErrorMessage = makeString("Invalid escape character ", StringView { m_ptr, `1` });
692	return TokError;
693	}
694	}
695	} while ((m_mode != NonStrictJSON) && m_ptr != runStart && (m_ptr < m_end) && *m_ptr != terminator);
696
697	if (m_ptr >= m_end \|\| *m_ptr != terminator) {
698	m_lexErrorMessage = "Unterminated string"_s;
699	return TokError;
700	}
701
702	if (m_builder.isEmpty()) {
703	setParserTokenString<CharType>(token, runStart);
704	token.stringLength = m_ptr - runStart;
705	} else {
706	if (m_builder.is8Bit()) {
707	token.stringIs8Bit = `1`;
708	token.stringToken8 = m_builder.characters8();
709	} else {
710	token.stringIs8Bit = `0`;
711	token.stringToken16 = m_builder.characters16();
712	}
713	token.stringLength = m_builder.length();
714	}
715	token.type = TokString;
716	token.end = ++m_ptr;
717	return TokString;
718	}
719
720	template <typename CharType>
721	TokenType LiteralParser<CharType>::Lexer::lexNumber(LiteralParserToken<CharType>& token)
722	{
723	// ES5 and json.org define numbers as
724	// number
725	// int
726	// int frac? exp?
727	//
728	// int
729	// -? 0
730	// -? digit1-9 digits?
731	//
732	// digits
733	// digit digits?
734	//
735	// -?(0 \| [1-9][0-9]) ('.' [0-9]+)? ([eE][+-]? [0-9]+)?*
736
737	if (m_ptr < m_end && m_ptr == `'-'`) // -?*
738	++m_ptr;
739
740	// (0 \| [1-9][0-9])*
741	if (m_ptr < m_end && m_ptr == `'0'`) // 0*
742	++m_ptr;
743	else if (m_ptr < m_end && m_ptr >= `'1'` && m_ptr <= `'9'`) { // [1-9]
744	++m_ptr;
745	// [0-9]*
746	while (m_ptr < m_end && isASCIIDigit(*m_ptr))
747	++m_ptr;
748	} else {
749	m_lexErrorMessage = "Invalid number"_s;
750	return TokError;
751	}
752
753	// ('.' [0-9]+)?
754	const int NumberOfDigitsForSafeInt32 = `9`; // The numbers from -99999999 to 999999999 are always in range of Int32.
755	if (m_ptr < m_end && *m_ptr == `'.'`) {
756	++m_ptr;
757	// [0-9]+
758	if (m_ptr >= m_end \|\| !isASCIIDigit(*m_ptr)) {
759	m_lexErrorMessage = "Invalid digits after decimal point"_s;
760	return TokError;
761	}
762
763	++m_ptr;
764	while (m_ptr < m_end && isASCIIDigit(*m_ptr))
765	++m_ptr;
766	} else if (m_ptr < m_end && (m_ptr != `'e'` && m_ptr != `'E'`) && (m_ptr - token.start) <= NumberOfDigitsForSafeInt32) {
767	int32_t result = `0`;
768	token.type = TokNumber;
769	token.end = m_ptr;
770	const CharType* digit = token.start;
771	bool negative = false;
772	if (*digit == `'-'`) {
773	negative = true;
774	digit++;
775	}
776
777	ASSERT((m_ptr - digit) <= NumberOfDigitsForSafeInt32);
778	while (digit < m_ptr)
779	result = result * `10` + (*digit++) - `'0'`;
780
781	if (!negative)
782	token.numberToken = result;
783	else {
784	if (!result)
785	token.numberToken = -`0.0`;
786	else
787	token.numberToken = -result;
788	}
789	return TokNumber;
790	}
791
792	// ([eE][+-]? [0-9]+)?
793	if (m_ptr < m_end && (m_ptr == `'e'` \|\| m_ptr == `'E'`)) { // [eE]
794	++m_ptr;
795
796	// [-+]?
797	if (m_ptr < m_end && (m_ptr == `'-'` \|\| m_ptr == `'+'`))
798	++m_ptr;
799
800	// [0-9]+
801	if (m_ptr >= m_end \|\| !isASCIIDigit(*m_ptr)) {
802	m_lexErrorMessage = "Exponent symbols should be followed by an optional '+' or '-' and then by at least one number"_s;
803	return TokError;
804	}
805
806	++m_ptr;
807	while (m_ptr < m_end && isASCIIDigit(*m_ptr))
808	++m_ptr;
809	}
810
811	token.type = TokNumber;
812	token.end = m_ptr;
813	size_t parsedLength;
814	token.numberToken = parseDouble(token.start, token.end - token.start, parsedLength);
815	return TokNumber;
816	}
817
818	template <typename CharType>
819	JSValue LiteralParser<CharType>::parse(ParserState initialState)
820	{
821	VM& vm = m_exec->vm();
822	auto scope = DECLARE_THROW_SCOPE(vm);
823	ParserState state = initialState;
824	MarkedArgumentBuffer objectStack;
825	JSValue lastValue;
826	Vector<ParserState, `16`, UnsafeVectorOverflow> stateStack;
827	Vector<Identifier, `16`, UnsafeVectorOverflow> identifierStack;
828	HashSet<JSObject*> visitedUnderscoreProto;
829	while (`1`) {
830	switch(state) {
831	startParseArray:
832	case StartParseArray: {
833	JSArray* array = constructEmptyArray(m_exec, `0`);
834	RETURN_IF_EXCEPTION(scope, JSValue ());
835	objectStack.appendWithCrashOnOverflow(array);
836	}
837	doParseArrayStartExpression:
838	FALLTHROUGH;
839	case DoParseArrayStartExpression: {
840	TokenType lastToken = m_lexer.currentToken()->type;
841	if (m_lexer.next() == TokRBracket) {
842	if (lastToken == TokComma) {
843	m_parseErrorMessage = "Unexpected comma at the end of array expression"_s;
844	return JSValue ();
845	}
846	m_lexer.next();
847	lastValue = objectStack.takeLast();
848	break;
849	}
850
851	stateStack.append(DoParseArrayEndExpression);
852	goto startParseExpression;
853	}
854	case DoParseArrayEndExpression: {
855	JSArray* array = asArray(objectStack.last());
856	array->putDirectIndex(m_exec, array->length(), lastValue);
857	RETURN_IF_EXCEPTION(scope, JSValue ());
858
859	if (m_lexer.currentToken()->type == TokComma)
860	goto doParseArrayStartExpression;
861
862	if (m_lexer.currentToken()->type != TokRBracket) {
863	m_parseErrorMessage = "Expected ']'"_s;
864	return JSValue ();
865	}
866
867	m_lexer.next();
868	lastValue = objectStack.takeLast();
869	break;
870	}
871	startParseObject:
872	case StartParseObject: {
873	JSObject* object = constructEmptyObject(m_exec);
874	objectStack.appendWithCrashOnOverflow(object);
875
876	TokenType type = m_lexer.next();
877	if (type == TokString \|\| (m_mode != StrictJSON && type == TokIdentifier)) {
878	typename Lexer::LiteralParserTokenPtr identifierToken = m_lexer.currentToken();
879	if (identifierToken->stringIs8Bit)
880	identifierStack.append(makeIdentifier(identifierToken->stringToken8, identifierToken->stringLength));
881	else
882	identifierStack.append(makeIdentifier(identifierToken->stringToken16, identifierToken->stringLength));
883
884	// Check for colon
885	if (m_lexer.next() != TokColon) {
886	m_parseErrorMessage = "Expected ':' before value in object property definition"_s;
887	return JSValue ();
888	}
889
890	m_lexer.next();
891	stateStack.append(DoParseObjectEndExpression);
892	goto startParseExpression;
893	}
894	if (type != TokRBrace) {
895	m_parseErrorMessage = "Expected '}'"_s;
896	return JSValue ();
897	}
898	m_lexer.next();
899	lastValue = objectStack.takeLast();
900	break;
901	}
902	doParseObjectStartExpression:
903	case DoParseObjectStartExpression: {
904	TokenType type = m_lexer.next();
905	if (type != TokString && (m_mode == StrictJSON \|\| type != TokIdentifier)) {
906	m_parseErrorMessage = "Property name must be a string literal"_s;
907	return JSValue ();
908	}
909	typename Lexer::LiteralParserTokenPtr identifierToken = m_lexer.currentToken();
910	if (identifierToken->stringIs8Bit)
911	identifierStack.append(makeIdentifier(identifierToken->stringToken8, identifierToken->stringLength));
912	else
913	identifierStack.append(makeIdentifier(identifierToken->stringToken16, identifierToken->stringLength));
914
915	// Check for colon
916	if (m_lexer.next() != TokColon) {
917	m_parseErrorMessage = "Expected ':'"_s;
918	return JSValue ();
919	}
920
921	m_lexer.next();
922	stateStack.append(DoParseObjectEndExpression);
923	goto startParseExpression;
924	}
925	case DoParseObjectEndExpression:
926	{
927	JSObject* object = asObject(objectStack.last());
928	Identifier ident = identifierStack.takeLast();
929	if (m_mode != StrictJSON && ident == vm.propertyNames->underscoreProto) {
930	if (!visitedUnderscoreProto.add(object).isNewEntry) {
931	m_parseErrorMessage = "Attempted to redefine __proto__ property"_s;
932	return JSValue ();
933	}
934	CodeBlock* codeBlock = m_exec->codeBlock();
935	PutPropertySlot slot(object, codeBlock ? codeBlock->isStrictMode() : false);
936	objectStack.last().put(m_exec, ident, lastValue, slot);
937	} else {
938	if (Optional<uint32_t> index = parseIndex(ident))
939	object->putDirectIndex(m_exec, index.value(), lastValue);
940	else
941	object->putDirect(vm, ident, lastValue);
942	}
943	RETURN_IF_EXCEPTION(scope, JSValue ());
944	if (m_lexer.currentToken()->type == TokComma)
945	goto doParseObjectStartExpression;
946	if (m_lexer.currentToken()->type != TokRBrace) {
947	m_parseErrorMessage = "Expected '}'"_s;
948	return JSValue ();
949	}
950	m_lexer.next();
951	lastValue = objectStack.takeLast();
952	break;
953	}
954	startParseExpression:
955	case StartParseExpression: {
956	switch (m_lexer.currentToken()->type) {
957	case TokLBracket:
958	goto startParseArray;
959	case TokLBrace:
960	goto startParseObject;
961	case TokString: {
962	typename Lexer::LiteralParserTokenPtr stringToken = m_lexer.currentToken();
963	if (stringToken->stringIs8Bit)
964	lastValue = jsString(m_exec, makeIdentifier(stringToken->stringToken8, stringToken->stringLength).string());
965	else
966	lastValue = jsString(m_exec, makeIdentifier(stringToken->stringToken16, stringToken->stringLength).string());
967	m_lexer.next();
968	break;
969	}
970	case TokNumber: {
971	typename Lexer::LiteralParserTokenPtr numberToken = m_lexer.currentToken();
972	lastValue = jsNumber(numberToken->numberToken);
973	m_lexer.next();
974	break;
975	}
976	case TokNull:
977	m_lexer.next();
978	lastValue = jsNull();
979	break;
980
981	case TokTrue:
982	m_lexer.next();
983	lastValue = jsBoolean(true);
984	break;
985
986	case TokFalse:
987	m_lexer.next();
988	lastValue = jsBoolean(false);
989	break;
990	case TokRBracket:
991	m_parseErrorMessage = "Unexpected token ']'"_s;
992	return JSValue ();
993	case TokRBrace:
994	m_parseErrorMessage = "Unexpected token '}'"_s;
995	return JSValue ();
996	case TokIdentifier: {
997	typename Lexer::LiteralParserTokenPtr token = m_lexer.currentToken();
998	if (token->stringIs8Bit)
999	m_parseErrorMessage = makeString("Unexpected identifier \"", StringView { token->stringToken8, token->stringLength }, `'"'`);
1000	else
1001	m_parseErrorMessage = makeString("Unexpected identifier \"", StringView { token->stringToken16, token->stringLength }, `'"'`);
1002	return JSValue ();
1003	}
1004	case TokColon:
1005	m_parseErrorMessage = "Unexpected token ':'"_s;
1006	return JSValue ();
1007	case TokLParen:
1008	m_parseErrorMessage = "Unexpected token '('"_s;
1009	return JSValue ();
1010	case TokRParen:
1011	m_parseErrorMessage = "Unexpected token ')'"_s;
1012	return JSValue ();
1013	case TokComma:
1014	m_parseErrorMessage = "Unexpected token ','"_s;
1015	return JSValue ();
1016	case TokDot:
1017	m_parseErrorMessage = "Unexpected token '.'"_s;
1018	return JSValue ();
1019	case TokAssign:
1020	m_parseErrorMessage = "Unexpected token '='"_s;
1021	return JSValue ();
1022	case TokSemi:
1023	m_parseErrorMessage = "Unexpected token ';'"_s;
1024	return JSValue ();
1025	case TokEnd:
1026	m_parseErrorMessage = "Unexpected EOF"_s;
1027	return JSValue ();
1028	case TokError:
1029	default:
1030	// Error
1031	m_parseErrorMessage = "Could not parse value expression"_s;
1032	return JSValue ();
1033	}
1034	break;
1035	}
1036	case StartParseStatement: {
1037	switch (m_lexer.currentToken()->type) {
1038	case TokLBracket:
1039	case TokNumber:
1040	case TokString:
1041	goto startParseExpression;
1042
1043	case TokLParen: {
1044	m_lexer.next();
1045	stateStack.append(StartParseStatementEndStatement);
1046	goto startParseExpression;
1047	}
1048	case TokRBracket:
1049	m_parseErrorMessage = "Unexpected token ']'"_s;
1050	return JSValue ();
1051	case TokLBrace:
1052	m_parseErrorMessage = "Unexpected token '{'"_s;
1053	return JSValue ();
1054	case TokRBrace:
1055	m_parseErrorMessage = "Unexpected token '}'"_s;
1056	return JSValue ();
1057	case TokIdentifier:
1058	m_parseErrorMessage = "Unexpected identifier"_s;
1059	return JSValue ();
1060	case TokColon:
1061	m_parseErrorMessage = "Unexpected token ':'"_s;
1062	return JSValue ();
1063	case TokRParen:
1064	m_parseErrorMessage = "Unexpected token ')'"_s;
1065	return JSValue ();
1066	case TokComma:
1067	m_parseErrorMessage = "Unexpected token ','"_s;
1068	return JSValue ();
1069	case TokTrue:
1070	m_parseErrorMessage = "Unexpected token 'true'"_s;
1071	return JSValue ();
1072	case TokFalse:
1073	m_parseErrorMessage = "Unexpected token 'false'"_s;
1074	return JSValue ();
1075	case TokNull:
1076	m_parseErrorMessage = "Unexpected token 'null'"_s;
1077	return JSValue ();
1078	case TokEnd:
1079	m_parseErrorMessage = "Unexpected EOF"_s;
1080	return JSValue ();
1081	case TokDot:
1082	m_parseErrorMessage = "Unexpected token '.'"_s;
1083	return JSValue ();
1084	case TokAssign:
1085	m_parseErrorMessage = "Unexpected token '='"_s;
1086	return JSValue ();
1087	case TokSemi:
1088	m_parseErrorMessage = "Unexpected token ';'"_s;
1089	return JSValue ();
1090	case TokError:
1091	default:
1092	m_parseErrorMessage = "Could not parse statement"_s;
1093	return JSValue ();
1094	}
1095	}
1096	case StartParseStatementEndStatement: {
1097	ASSERT(stateStack.isEmpty());
1098	if (m_lexer.currentToken()->type != TokRParen)
1099	return JSValue ();
1100	if (m_lexer.next() == TokEnd)
1101	return lastValue;
1102	m_parseErrorMessage = "Unexpected content at end of JSON literal"_s;
1103	return JSValue ();
1104	}
1105	default:
1106	RELEASE_ASSERT_NOT_REACHED();
1107	}
1108	if (stateStack.isEmpty())
1109	return lastValue;
1110	state = stateStack.takeLast();
1111	continue;
1112	}
1113	}
1114
1115	// Instantiate the two flavors of LiteralParser we need instead of putting most of this file in LiteralParser.h
1116	template class LiteralParser<LChar>;
1117	template class LiteralParser<UChar>;
1118
1119	}
1120

Browse the source code of jsc/Source/JavaScriptCore/runtime/LiteralParser.cpp