1/*
2 * Copyright (C) 2010 Google Inc. All rights reserved.
3 * Copyright (C) 2014 University of Washington. All rights reserved.
4 * Copyright (C) 2017-2019 Apple Inc. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are
8 * met:
9 *
10 * * Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * * Redistributions in binary form must reproduce the above
13 * copyright notice, this list of conditions and the following disclaimer
14 * in the documentation and/or other materials provided with the
15 * distribution.
16 * * Neither the name of Google Inc. nor the names of its
17 * contributors may be used to endorse or promote products derived from
18 * this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33#include "config.h"
34#include <wtf/JSONValues.h>
35
36#include <wtf/text/StringBuilder.h>
37
38namespace WTF {
39namespace JSONImpl {
40
41namespace {
42
43static const int stackLimit = 1000;
44
45enum class Token {
46 ObjectBegin,
47 ObjectEnd,
48 ArrayBegin,
49 ArrayEnd,
50 String,
51 Number,
52 BoolTrue,
53 BoolFalse,
54 Null,
55 ListSeparator,
56 ObjectPairSeparator,
57 Invalid,
58};
59
60const char* const nullString = "null";
61const char* const trueString = "true";
62const char* const falseString = "false";
63
64bool parseConstToken(const UChar* start, const UChar* end, const UChar** tokenEnd, const char* token)
65{
66 while (start < end && *token != '\0' && *start++ == *token++) { }
67
68 if (*token != '\0')
69 return false;
70
71 *tokenEnd = start;
72 return true;
73}
74
75bool readInt(const UChar* start, const UChar* end, const UChar** tokenEnd, bool canHaveLeadingZeros)
76{
77 if (start == end)
78 return false;
79
80 bool haveLeadingZero = '0' == *start;
81 int length = 0;
82 while (start < end && '0' <= *start && *start <= '9') {
83 ++start;
84 ++length;
85 }
86
87 if (!length)
88 return false;
89
90 if (!canHaveLeadingZeros && length > 1 && haveLeadingZero)
91 return false;
92
93 *tokenEnd = start;
94 return true;
95}
96
97bool parseNumberToken(const UChar* start, const UChar* end, const UChar** tokenEnd)
98{
99 // We just grab the number here. We validate the size in DecodeNumber.
100 // According to RFC 4627, a valid number is: [minus] int [frac] [exp]
101 if (start == end)
102 return false;
103
104 UChar c = *start;
105 if ('-' == c)
106 ++start;
107
108 if (!readInt(start, end, &start, false))
109 return false;
110
111 if (start == end) {
112 *tokenEnd = start;
113 return true;
114 }
115
116 // Optional fraction part.
117 c = *start;
118 if ('.' == c) {
119 ++start;
120 if (!readInt(start, end, &start, true))
121 return false;
122 if (start == end) {
123 *tokenEnd = start;
124 return true;
125 }
126 c = *start;
127 }
128
129 // Optional exponent part.
130 if ('e' == c || 'E' == c) {
131 ++start;
132 if (start == end)
133 return false;
134 c = *start;
135 if ('-' == c || '+' == c) {
136 ++start;
137 if (start == end)
138 return false;
139 }
140 if (!readInt(start, end, &start, true))
141 return false;
142 }
143
144 *tokenEnd = start;
145 return true;
146}
147
148bool readHexDigits(const UChar* start, const UChar* end, const UChar** tokenEnd, int digits)
149{
150 if (end - start < digits)
151 return false;
152
153 for (int i = 0; i < digits; ++i) {
154 if (!isASCIIHexDigit(*start++))
155 return false;
156 }
157
158 *tokenEnd = start;
159 return true;
160}
161
162bool parseStringToken(const UChar* start, const UChar* end, const UChar** tokenEnd)
163{
164 while (start < end) {
165 UChar c = *start++;
166 if ('\\' == c && start < end) {
167 c = *start++;
168 // Make sure the escaped char is valid.
169 switch (c) {
170 case 'x':
171 if (!readHexDigits(start, end, &start, 2))
172 return false;
173 break;
174 case 'u':
175 if (!readHexDigits(start, end, &start, 4))
176 return false;
177 break;
178 case '\\':
179 case '/':
180 case 'b':
181 case 'f':
182 case 'n':
183 case 'r':
184 case 't':
185 case 'v':
186 case '"':
187 break;
188 default:
189 return false;
190 }
191 } else if ('"' == c) {
192 *tokenEnd = start;
193 return true;
194 }
195 }
196
197 return false;
198}
199
200Token parseToken(const UChar* start, const UChar* end, const UChar** tokenStart, const UChar** tokenEnd)
201{
202 while (start < end && isSpaceOrNewline(*start))
203 ++start;
204
205 if (start == end)
206 return Token::Invalid;
207
208 *tokenStart = start;
209
210 switch (*start) {
211 case 'n':
212 if (parseConstToken(start, end, tokenEnd, nullString))
213 return Token::Null;
214 break;
215 case 't':
216 if (parseConstToken(start, end, tokenEnd, trueString))
217 return Token::BoolTrue;
218 break;
219 case 'f':
220 if (parseConstToken(start, end, tokenEnd, falseString))
221 return Token::BoolFalse;
222 break;
223 case '[':
224 *tokenEnd = start + 1;
225 return Token::ArrayBegin;
226 case ']':
227 *tokenEnd = start + 1;
228 return Token::ArrayEnd;
229 case ',':
230 *tokenEnd = start + 1;
231 return Token::ListSeparator;
232 case '{':
233 *tokenEnd = start + 1;
234 return Token::ObjectBegin;
235 case '}':
236 *tokenEnd = start + 1;
237 return Token::ObjectEnd;
238 case ':':
239 *tokenEnd = start + 1;
240 return Token::ObjectPairSeparator;
241 case '0':
242 case '1':
243 case '2':
244 case '3':
245 case '4':
246 case '5':
247 case '6':
248 case '7':
249 case '8':
250 case '9':
251 case '-':
252 if (parseNumberToken(start, end, tokenEnd))
253 return Token::Number;
254 break;
255 case '"':
256 if (parseStringToken(start + 1, end, tokenEnd))
257 return Token::String;
258 break;
259 }
260
261 return Token::Invalid;
262}
263
264bool decodeString(const UChar* start, const UChar* end, StringBuilder& output)
265{
266 while (start < end) {
267 UChar c = *start++;
268 if ('\\' != c) {
269 output.append(c);
270 continue;
271 }
272 if (UNLIKELY(start >= end))
273 return false;
274 c = *start++;
275 switch (c) {
276 case '"':
277 case '/':
278 case '\\':
279 break;
280 case 'b':
281 c = '\b';
282 break;
283 case 'f':
284 c = '\f';
285 break;
286 case 'n':
287 c = '\n';
288 break;
289 case 'r':
290 c = '\r';
291 break;
292 case 't':
293 c = '\t';
294 break;
295 case 'v':
296 c = '\v';
297 break;
298 case 'x':
299 if (UNLIKELY(start + 1 >= end))
300 return false;
301 c = toASCIIHexValue(start[0], start[1]);
302 start += 2;
303 break;
304 case 'u':
305 if (UNLIKELY(start + 3 >= end))
306 return false;
307 c = toASCIIHexValue(start[0], start[1]) << 8 | toASCIIHexValue(start[2], start[3]);
308 start += 4;
309 break;
310 default:
311 return false;
312 }
313 output.append(c);
314 }
315
316 return true;
317}
318
319bool decodeString(const UChar* start, const UChar* end, String& output)
320{
321 if (start == end) {
322 output = emptyString();
323 return true;
324 }
325
326 if (start > end)
327 return false;
328
329 StringBuilder buffer;
330 buffer.reserveCapacity(end - start);
331 if (!decodeString(start, end, buffer))
332 return false;
333
334 output = buffer.toString();
335 return true;
336}
337
338RefPtr<JSON::Value> buildValue(const UChar* start, const UChar* end, const UChar** valueTokenEnd, int depth)
339{
340 if (depth > stackLimit)
341 return nullptr;
342
343 RefPtr<JSON::Value> result;
344 const UChar* tokenStart;
345 const UChar* tokenEnd;
346 Token token = parseToken(start, end, &tokenStart, &tokenEnd);
347 switch (token) {
348 case Token::Invalid:
349 return nullptr;
350 case Token::Null:
351 result = JSON::Value::null();
352 break;
353 case Token::BoolTrue:
354 result = JSON::Value::create(true);
355 break;
356 case Token::BoolFalse:
357 result = JSON::Value::create(false);
358 break;
359 case Token::Number: {
360 bool ok;
361 double value = charactersToDouble(tokenStart, tokenEnd - tokenStart, &ok);
362 if (!ok)
363 return nullptr;
364 result = JSON::Value::create(value);
365 break;
366 }
367 case Token::String: {
368 String value;
369 bool ok = decodeString(tokenStart + 1, tokenEnd - 1, value);
370 if (!ok)
371 return nullptr;
372 result = JSON::Value::create(value);
373 break;
374 }
375 case Token::ArrayBegin: {
376 Ref<JSON::Array> array = JSON::Array::create();
377 start = tokenEnd;
378 token = parseToken(start, end, &tokenStart, &tokenEnd);
379 while (token != Token::ArrayEnd) {
380 RefPtr<JSON::Value> arrayNode = buildValue(start, end, &tokenEnd, depth + 1);
381 if (!arrayNode)
382 return nullptr;
383 array->pushValue(WTFMove(arrayNode));
384
385 // After a list value, we expect a comma or the end of the list.
386 start = tokenEnd;
387 token = parseToken(start, end, &tokenStart, &tokenEnd);
388 if (token == Token::ListSeparator) {
389 start = tokenEnd;
390 token = parseToken(start, end, &tokenStart, &tokenEnd);
391 if (token == Token::ArrayEnd)
392 return nullptr;
393 } else if (token != Token::ArrayEnd) {
394 // Unexpected value after list value. Bail out.
395 return nullptr;
396 }
397 }
398 if (token != Token::ArrayEnd)
399 return nullptr;
400 result = WTFMove(array);
401 break;
402 }
403 case Token::ObjectBegin: {
404 Ref<JSON::Object> object = JSON::Object::create();
405 start = tokenEnd;
406 token = parseToken(start, end, &tokenStart, &tokenEnd);
407 while (token != Token::ObjectEnd) {
408 if (token != Token::String)
409 return nullptr;
410 String key;
411 if (!decodeString(tokenStart + 1, tokenEnd - 1, key))
412 return nullptr;
413 start = tokenEnd;
414
415 token = parseToken(start, end, &tokenStart, &tokenEnd);
416 if (token != Token::ObjectPairSeparator)
417 return nullptr;
418 start = tokenEnd;
419
420 RefPtr<JSON::Value> value = buildValue(start, end, &tokenEnd, depth + 1);
421 if (!value)
422 return nullptr;
423 object->setValue(key, WTFMove(value));
424 start = tokenEnd;
425
426 // After a key/value pair, we expect a comma or the end of the
427 // object.
428 token = parseToken(start, end, &tokenStart, &tokenEnd);
429 if (token == Token::ListSeparator) {
430 start = tokenEnd;
431 token = parseToken(start, end, &tokenStart, &tokenEnd);
432 if (token == Token::ObjectEnd)
433 return nullptr;
434 } else if (token != Token::ObjectEnd) {
435 // Unexpected value after last object value. Bail out.
436 return nullptr;
437 }
438 }
439 if (token != Token::ObjectEnd)
440 return nullptr;
441 result = WTFMove(object);
442 break;
443 }
444
445 default:
446 // We got a token that's not a value.
447 return nullptr;
448 }
449 *valueTokenEnd = tokenEnd;
450 return result;
451}
452
453inline void appendDoubleQuotedString(StringBuilder& builder, StringView string)
454{
455 builder.append('"');
456 for (UChar codeUnit : string.codeUnits()) {
457 switch (codeUnit) {
458 case '\b':
459 builder.appendLiteral("\\b");
460 continue;
461 case '\f':
462 builder.appendLiteral("\\f");
463 continue;
464 case '\n':
465 builder.appendLiteral("\\n");
466 continue;
467 case '\r':
468 builder.appendLiteral("\\r");
469 continue;
470 case '\t':
471 builder.appendLiteral("\\t");
472 continue;
473 case '\\':
474 builder.appendLiteral("\\\\");
475 continue;
476 case '"':
477 builder.appendLiteral("\\\"");
478 continue;
479 }
480 // We escape < and > to prevent script execution.
481 if (codeUnit >= 32 && codeUnit < 127 && codeUnit != '<' && codeUnit != '>') {
482 builder.append(codeUnit);
483 continue;
484 }
485 // We could encode characters >= 127 as UTF-8 instead of \u escape sequences.
486 // We could handle surrogates here if callers wanted that; for now we just
487 // write them out as a \u sequence, so a surrogate pair appears as two of them.
488 builder.appendLiteral("\\u");
489 builder.append(upperNibbleToASCIIHexDigit(codeUnit >> 8));
490 builder.append(lowerNibbleToASCIIHexDigit(codeUnit >> 8));
491 builder.append(upperNibbleToASCIIHexDigit(codeUnit));
492 builder.append(lowerNibbleToASCIIHexDigit(codeUnit));
493 }
494 builder.append('"');
495}
496
497} // anonymous namespace
498
499Ref<Value> Value::null()
500{
501 return adoptRef(*new Value);
502}
503
504Ref<Value> Value::create(bool value)
505{
506 return adoptRef(*new Value(value));
507}
508
509Ref<Value> Value::create(int value)
510{
511 return adoptRef(*new Value(value));
512}
513
514Ref<Value> Value::create(double value)
515{
516 return adoptRef(*new Value(value));
517}
518
519Ref<Value> Value::create(const String& value)
520{
521 return adoptRef(*new Value(value));
522}
523
524Ref<Value> Value::create(const char* value)
525{
526 return adoptRef(*new Value(value));
527}
528
529bool Value::asValue(RefPtr<Value>& value)
530{
531 value = this;
532 return true;
533}
534
535bool Value::asObject(RefPtr<Object>&)
536{
537 return false;
538}
539
540bool Value::asArray(RefPtr<Array>&)
541{
542 return false;
543}
544
545bool Value::parseJSON(const String& jsonInput, RefPtr<Value>& output)
546{
547 // FIXME: This whole file should just use StringView instead of UChar/length and avoid upconverting.
548 auto characters = StringView(jsonInput).upconvertedCharacters();
549 const UChar* start = characters;
550 const UChar* end = start + jsonInput.length();
551 const UChar* tokenEnd;
552 auto result = buildValue(start, end, &tokenEnd, 0);
553 if (!result)
554 return false;
555
556 for (const UChar* valueEnd = tokenEnd; valueEnd < end; ++valueEnd) {
557 if (!isSpaceOrNewline(*valueEnd))
558 return false;
559 }
560
561 output = WTFMove(result);
562 return true;
563}
564
565String Value::toJSONString() const
566{
567 StringBuilder result;
568 result.reserveCapacity(512);
569 writeJSON(result);
570 return result.toString();
571}
572
573bool Value::asBoolean(bool& output) const
574{
575 if (type() != Type::Boolean)
576 return false;
577
578 output = m_value.boolean;
579 return true;
580}
581
582bool Value::asDouble(double& output) const
583{
584 if (type() != Type::Double)
585 return false;
586
587 output = m_value.number;
588 return true;
589}
590
591bool Value::asDouble(float& output) const
592{
593 if (type() != Type::Double)
594 return false;
595
596 output = static_cast<float>(m_value.number);
597 return true;
598}
599
600bool Value::asInteger(int& output) const
601{
602 if (type() != Type::Integer && type() != Type::Double)
603 return false;
604
605 output = static_cast<int>(m_value.number);
606 return true;
607}
608
609bool Value::asInteger(unsigned& output) const
610{
611 if (type() != Type::Integer && type() != Type::Double)
612 return false;
613
614 output = static_cast<unsigned>(m_value.number);
615 return true;
616}
617
618bool Value::asInteger(long& output) const
619{
620 if (type() != Type::Integer && type() != Type::Double)
621 return false;
622
623 output = static_cast<long>(m_value.number);
624 return true;
625}
626
627bool Value::asInteger(long long& output) const
628{
629 if (type() != Type::Integer && type() != Type::Double)
630 return false;
631
632 output = static_cast<long long>(m_value.number);
633 return true;
634}
635
636bool Value::asInteger(unsigned long& output) const
637{
638 if (type() != Type::Integer && type() != Type::Double)
639 return false;
640
641 output = static_cast<unsigned long>(m_value.number);
642 return true;
643}
644
645bool Value::asInteger(unsigned long long& output) const
646{
647 if (type() != Type::Integer && type() != Type::Double)
648 return false;
649
650 output = static_cast<unsigned long long>(m_value.number);
651 return true;
652}
653
654bool Value::asString(String& output) const
655{
656 if (type() != Type::String)
657 return false;
658
659 output = m_value.string;
660 return true;
661}
662
663void Value::writeJSON(StringBuilder& output) const
664{
665 switch (m_type) {
666 case Type::Null:
667 output.appendLiteral("null");
668 break;
669 case Type::Boolean:
670 if (m_value.boolean)
671 output.appendLiteral("true");
672 else
673 output.appendLiteral("false");
674 break;
675 case Type::String:
676 appendDoubleQuotedString(output, m_value.string);
677 break;
678 case Type::Double:
679 case Type::Integer: {
680 if (!std::isfinite(m_value.number))
681 output.appendLiteral("null");
682 else
683 output.appendNumber(m_value.number);
684 break;
685 }
686 default:
687 ASSERT_NOT_REACHED();
688 }
689}
690
691size_t Value::memoryCost() const
692{
693 size_t memoryCost = sizeof(this);
694 if (m_type == Type::String && m_value.string)
695 memoryCost += m_value.string->sizeInBytes();
696 return memoryCost;
697}
698
699ObjectBase::~ObjectBase()
700{
701}
702
703bool ObjectBase::asObject(RefPtr<Object>& output)
704{
705 COMPILE_ASSERT(sizeof(Object) == sizeof(ObjectBase), cannot_cast);
706
707 output = static_cast<Object*>(this);
708 return true;
709}
710
711Object* ObjectBase::openAccessors()
712{
713 COMPILE_ASSERT(sizeof(Object) == sizeof(ObjectBase), cannot_cast);
714
715 return static_cast<Object*>(this);
716}
717
718size_t ObjectBase::memoryCost() const
719{
720 size_t memoryCost = Value::memoryCost();
721 for (const auto& entry : m_map) {
722 memoryCost += entry.key.sizeInBytes();
723 if (entry.value)
724 memoryCost += entry.value->memoryCost();
725 }
726 return memoryCost;
727}
728
729bool ObjectBase::getBoolean(const String& name, bool& output) const
730{
731 RefPtr<Value> value;
732 if (!getValue(name, value))
733 return false;
734
735 return value->asBoolean(output);
736}
737
738bool ObjectBase::getString(const String& name, String& output) const
739{
740 RefPtr<Value> value;
741 if (!getValue(name, value))
742 return false;
743
744 return value->asString(output);
745}
746
747bool ObjectBase::getObject(const String& name, RefPtr<Object>& output) const
748{
749 RefPtr<Value> value;
750 if (!getValue(name, value))
751 return false;
752
753 return value->asObject(output);
754}
755
756bool ObjectBase::getArray(const String& name, RefPtr<Array>& output) const
757{
758 RefPtr<Value> value;
759 if (!getValue(name, value))
760 return false;
761
762 return value->asArray(output);
763}
764
765bool ObjectBase::getValue(const String& name, RefPtr<Value>& output) const
766{
767 Dictionary::const_iterator findResult = m_map.find(name);
768 if (findResult == m_map.end())
769 return false;
770
771 output = findResult->value;
772 return true;
773}
774
775void ObjectBase::remove(const String& name)
776{
777 m_map.remove(name);
778 m_order.removeFirst(name);
779}
780
781void ObjectBase::writeJSON(StringBuilder& output) const
782{
783 output.append('{');
784 for (size_t i = 0; i < m_order.size(); ++i) {
785 auto findResult = m_map.find(m_order[i]);
786 ASSERT(findResult != m_map.end());
787 if (i)
788 output.append(',');
789 appendDoubleQuotedString(output, findResult->key);
790 output.append(':');
791 findResult->value->writeJSON(output);
792 }
793 output.append('}');
794}
795
796ObjectBase::ObjectBase()
797 : Value(Type::Object)
798 , m_map()
799 , m_order()
800{
801}
802
803ArrayBase::~ArrayBase()
804{
805}
806
807bool ArrayBase::asArray(RefPtr<Array>& output)
808{
809 COMPILE_ASSERT(sizeof(ArrayBase) == sizeof(Array), cannot_cast);
810 output = static_cast<Array*>(this);
811 return true;
812}
813
814void ArrayBase::writeJSON(StringBuilder& output) const
815{
816 output.append('[');
817 for (Vector<RefPtr<Value>>::const_iterator it = m_map.begin(); it != m_map.end(); ++it) {
818 if (it != m_map.begin())
819 output.append(',');
820 (*it)->writeJSON(output);
821 }
822 output.append(']');
823}
824
825ArrayBase::ArrayBase()
826 : Value(Type::Array)
827 , m_map()
828{
829}
830
831RefPtr<Value> ArrayBase::get(size_t index) const
832{
833 RELEASE_ASSERT_WITH_SECURITY_IMPLICATION(index < m_map.size());
834 return m_map[index];
835}
836
837Ref<Object> Object::create()
838{
839 return adoptRef(*new Object);
840}
841
842Ref<Array> Array::create()
843{
844 return adoptRef(*new Array);
845}
846
847size_t ArrayBase::memoryCost() const
848{
849 size_t memoryCost = Value::memoryCost();
850 for (const auto& item : m_map) {
851 if (item)
852 memoryCost += item->memoryCost();
853 }
854 return memoryCost;
855}
856
857} // namespace JSONImpl
858} // namespace WTF
859