1/*
2 * (C) 1999 Lars Knoll ([email protected])
3 * Copyright (C) 2004-2019 Apple Inc. All rights reserved.
4 * Copyright (C) 2007-2009 Torch Mobile, Inc.
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
15 *
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
20 */
21
22#include "config.h"
23#include <wtf/text/WTFString.h>
24
25#include <stdarg.h>
26#include <wtf/ASCIICType.h>
27#include <wtf/DataLog.h>
28#include <wtf/HexNumber.h>
29#include <wtf/MathExtras.h>
30#include <wtf/NeverDestroyed.h>
31#include <wtf/Vector.h>
32#include <wtf/dtoa.h>
33#include <wtf/text/CString.h>
34#include <wtf/text/IntegerToStringConversion.h>
35#include <wtf/text/StringToIntegerConversion.h>
36#include <wtf/unicode/CharacterNames.h>
37#include <wtf/unicode/UTF8Conversion.h>
38
39namespace WTF {
40
41using namespace Unicode;
42
43// Construct a string with UTF-16 data.
44String::String(const UChar* characters, unsigned length)
45{
46 if (characters)
47 m_impl = StringImpl::create(characters, length);
48}
49
50// Construct a string with UTF-16 data, from a null-terminated source.
51String::String(const UChar* nullTerminatedString)
52{
53 if (nullTerminatedString)
54 m_impl = StringImpl::create(nullTerminatedString, lengthOfNullTerminatedString(nullTerminatedString));
55}
56
57// Construct a string with latin1 data.
58String::String(const LChar* characters, unsigned length)
59{
60 if (characters)
61 m_impl = StringImpl::create(characters, length);
62}
63
64String::String(const char* characters, unsigned length)
65{
66 if (characters)
67 m_impl = StringImpl::create(reinterpret_cast<const LChar*>(characters), length);
68}
69
70// Construct a string with Latin-1 data, from a null-terminated source.
71String::String(const LChar* nullTerminatedString)
72{
73 if (nullTerminatedString)
74 m_impl = StringImpl::create(nullTerminatedString);
75}
76
77String::String(const char* nullTerminatedString)
78{
79 if (nullTerminatedString)
80 m_impl = StringImpl::create(reinterpret_cast<const LChar*>(nullTerminatedString));
81}
82
83String::String(ASCIILiteral characters)
84 : m_impl(StringImpl::createFromLiteral(characters))
85{
86}
87
88void String::append(const String& otherString)
89{
90 // FIXME: This is extremely inefficient. So much so that we might want to take this out of String's API.
91
92 if (!m_impl) {
93 m_impl = otherString.m_impl;
94 return;
95 }
96
97 if (otherString.isEmpty())
98 return;
99
100 auto length = m_impl->length();
101 auto otherLength = otherString.m_impl->length();
102 if (otherLength > MaxLength - length)
103 CRASH();
104
105 if (m_impl->is8Bit() && otherString.m_impl->is8Bit()) {
106 LChar* data;
107 auto newImpl = StringImpl::createUninitialized(length + otherLength, data);
108 StringImpl::copyCharacters(data, m_impl->characters8(), length);
109 StringImpl::copyCharacters(data + length, otherString.m_impl->characters8(), otherLength);
110 m_impl = WTFMove(newImpl);
111 return;
112 }
113 UChar* data;
114 auto newImpl = StringImpl::createUninitialized(length + otherLength, data);
115 StringView(*m_impl).getCharactersWithUpconvert(data);
116 StringView(*otherString.m_impl).getCharactersWithUpconvert(data + length);
117 m_impl = WTFMove(newImpl);
118}
119
120void String::append(LChar character)
121{
122 // FIXME: This is extremely inefficient. So much so that we might want to take this out of String's API.
123
124 if (!m_impl) {
125 m_impl = StringImpl::create(&character, 1);
126 return;
127 }
128 if (!is8Bit()) {
129 append(static_cast<UChar>(character));
130 return;
131 }
132 if (m_impl->length() >= MaxLength)
133 CRASH();
134 LChar* data;
135 auto newImpl = StringImpl::createUninitialized(m_impl->length() + 1, data);
136 StringImpl::copyCharacters(data, m_impl->characters8(), m_impl->length());
137 data[m_impl->length()] = character;
138 m_impl = WTFMove(newImpl);
139}
140
141void String::append(UChar character)
142{
143 // FIXME: This is extremely inefficient. So much so that we might want to take this out of String's API.
144
145 if (!m_impl) {
146 m_impl = StringImpl::create(&character, 1);
147 return;
148 }
149 if (isLatin1(character) && is8Bit()) {
150 append(static_cast<LChar>(character));
151 return;
152 }
153 if (m_impl->length() >= MaxLength)
154 CRASH();
155 UChar* data;
156 auto newImpl = StringImpl::createUninitialized(m_impl->length() + 1, data);
157 StringView(*m_impl).getCharactersWithUpconvert(data);
158 data[m_impl->length()] = character;
159 m_impl = WTFMove(newImpl);
160}
161
162int codePointCompare(const String& a, const String& b)
163{
164 return codePointCompare(a.impl(), b.impl());
165}
166
167void String::insert(const String& string, unsigned position)
168{
169 // FIXME: This is extremely inefficient. So much so that we might want to take this out of String's API.
170
171 unsigned lengthToInsert = string.length();
172
173 if (!lengthToInsert) {
174 if (string.isNull())
175 return;
176 if (isNull())
177 m_impl = string.impl();
178 return;
179 }
180
181 if (position >= length()) {
182 append(string);
183 return;
184 }
185
186 if (lengthToInsert > MaxLength - length())
187 CRASH();
188
189 if (is8Bit() && string.is8Bit()) {
190 LChar* data;
191 auto newString = StringImpl::createUninitialized(length() + lengthToInsert, data);
192 StringView(*m_impl).substring(0, position).getCharactersWithUpconvert(data);
193 StringView(string).getCharactersWithUpconvert(data + position);
194 StringView(*m_impl).substring(position).getCharactersWithUpconvert(data + position + lengthToInsert);
195 m_impl = WTFMove(newString);
196 } else {
197 UChar* data;
198 auto newString = StringImpl::createUninitialized(length() + lengthToInsert, data);
199 StringView(*m_impl).substring(0, position).getCharactersWithUpconvert(data);
200 StringView(string).getCharactersWithUpconvert(data + position);
201 StringView(*m_impl).substring(position).getCharactersWithUpconvert(data + position + lengthToInsert);
202 m_impl = WTFMove(newString);
203 }
204}
205
206void String::append(const LChar* charactersToAppend, unsigned lengthToAppend)
207{
208 // FIXME: This is extremely inefficient. So much so that we might want to take this out of String's API.
209
210 if (!m_impl) {
211 if (!charactersToAppend)
212 return;
213 m_impl = StringImpl::create(charactersToAppend, lengthToAppend);
214 return;
215 }
216
217 if (!lengthToAppend)
218 return;
219
220 ASSERT(charactersToAppend);
221
222 unsigned strLength = m_impl->length();
223
224 if (m_impl->is8Bit()) {
225 if (lengthToAppend > MaxLength - strLength)
226 CRASH();
227 LChar* data;
228 auto newImpl = StringImpl::createUninitialized(strLength + lengthToAppend, data);
229 StringImpl::copyCharacters(data, m_impl->characters8(), strLength);
230 StringImpl::copyCharacters(data + strLength, charactersToAppend, lengthToAppend);
231 m_impl = WTFMove(newImpl);
232 return;
233 }
234
235 if (lengthToAppend > MaxLength - strLength)
236 CRASH();
237 UChar* data;
238 auto newImpl = StringImpl::createUninitialized(length() + lengthToAppend, data);
239 StringImpl::copyCharacters(data, m_impl->characters16(), strLength);
240 StringImpl::copyCharacters(data + strLength, charactersToAppend, lengthToAppend);
241 m_impl = WTFMove(newImpl);
242}
243
244void String::append(const UChar* charactersToAppend, unsigned lengthToAppend)
245{
246 // FIXME: This is extremely inefficient. So much so that we might want to take this out of String's API.
247
248 if (!m_impl) {
249 if (!charactersToAppend)
250 return;
251 m_impl = StringImpl::create(charactersToAppend, lengthToAppend);
252 return;
253 }
254
255 if (!lengthToAppend)
256 return;
257
258 unsigned strLength = m_impl->length();
259
260 ASSERT(charactersToAppend);
261 if (lengthToAppend > MaxLength - strLength)
262 CRASH();
263 UChar* data;
264 auto newImpl = StringImpl::createUninitialized(strLength + lengthToAppend, data);
265 if (m_impl->is8Bit())
266 StringImpl::copyCharacters(data, characters8(), strLength);
267 else
268 StringImpl::copyCharacters(data, characters16(), strLength);
269 StringImpl::copyCharacters(data + strLength, charactersToAppend, lengthToAppend);
270 m_impl = WTFMove(newImpl);
271}
272
273
274UChar32 String::characterStartingAt(unsigned i) const
275{
276 if (!m_impl || i >= m_impl->length())
277 return 0;
278 return m_impl->characterStartingAt(i);
279}
280
281void String::truncate(unsigned position)
282{
283 if (m_impl)
284 m_impl = m_impl->substring(0, position);
285}
286
287template<typename CharacterType> inline void String::removeInternal(const CharacterType* characters, unsigned position, unsigned lengthToRemove)
288{
289 CharacterType* data;
290 auto newImpl = StringImpl::createUninitialized(length() - lengthToRemove, data);
291 StringImpl::copyCharacters(data, characters, position);
292 StringImpl::copyCharacters(data + position, characters + position + lengthToRemove, length() - lengthToRemove - position);
293 m_impl = WTFMove(newImpl);
294}
295
296void String::remove(unsigned position, unsigned lengthToRemove)
297{
298 if (!lengthToRemove)
299 return;
300 auto length = this->length();
301 if (position >= length)
302 return;
303 lengthToRemove = std::min(lengthToRemove, length - position);
304 if (is8Bit())
305 removeInternal(characters8(), position, lengthToRemove);
306 else
307 removeInternal(characters16(), position, lengthToRemove);
308}
309
310String String::substring(unsigned position, unsigned length) const
311{
312 // FIXME: Should this function, and the many others like it, be inlined?
313 return m_impl ? m_impl->substring(position, length) : String { };
314}
315
316String String::substringSharingImpl(unsigned offset, unsigned length) const
317{
318 // FIXME: We used to check against a limit of Heap::minExtraCost / sizeof(UChar).
319
320 unsigned stringLength = this->length();
321 offset = std::min(offset, stringLength);
322 length = std::min(length, stringLength - offset);
323
324 if (!offset && length == stringLength)
325 return *this;
326 return StringImpl::createSubstringSharingImpl(*m_impl, offset, length);
327}
328
329String String::convertToASCIILowercase() const
330{
331 // FIXME: Should this function, and the many others like it, be inlined?
332 return m_impl ? m_impl->convertToASCIILowercase() : String { };
333}
334
335String String::convertToASCIIUppercase() const
336{
337 // FIXME: Should this function, and the many others like it, be inlined?
338 return m_impl ? m_impl->convertToASCIIUppercase() : String { };
339}
340
341String String::convertToLowercaseWithoutLocale() const
342{
343 // FIXME: Should this function, and the many others like it, be inlined?
344 return m_impl ? m_impl->convertToLowercaseWithoutLocale() : String { };
345}
346
347String String::convertToLowercaseWithoutLocaleStartingAtFailingIndex8Bit(unsigned failingIndex) const
348{
349 // FIXME: Should this function, and the many others like it, be inlined?
350 return m_impl ? m_impl->convertToLowercaseWithoutLocaleStartingAtFailingIndex8Bit(failingIndex) : String { };
351}
352
353String String::convertToUppercaseWithoutLocale() const
354{
355 // FIXME: Should this function, and the many others like it, be inlined?
356 return m_impl ? m_impl->convertToUppercaseWithoutLocale() : String { };
357}
358
359String String::convertToLowercaseWithLocale(const AtomString& localeIdentifier) const
360{
361 // FIXME: Should this function, and the many others like it, be inlined?
362 return m_impl ? m_impl->convertToLowercaseWithLocale(localeIdentifier) : String { };
363}
364
365String String::convertToUppercaseWithLocale(const AtomString& localeIdentifier) const
366{
367 // FIXME: Should this function, and the many others like it, be inlined?
368 return m_impl ? m_impl->convertToUppercaseWithLocale(localeIdentifier) : String { };
369}
370
371String String::stripWhiteSpace() const
372{
373 // FIXME: Should this function, and the many others like it, be inlined?
374 // FIXME: This function needs a new name. For one thing, "whitespace" is a single
375 // word so the "s" should be lowercase. For another, it's not clear from this name
376 // that the function uses the Unicode definition of whitespace. Most WebKit callers
377 // don't want that and eventually we should consider deleting this.
378 return m_impl ? m_impl->stripWhiteSpace() : String { };
379}
380
381String String::stripLeadingAndTrailingCharacters(CodeUnitMatchFunction predicate) const
382{
383 // FIXME: Should this function, and the many others like it, be inlined?
384 return m_impl ? m_impl->stripLeadingAndTrailingCharacters(predicate) : String { };
385}
386
387String String::simplifyWhiteSpace() const
388{
389 // FIXME: Should this function, and the many others like it, be inlined?
390 // FIXME: This function needs a new name. For one thing, "whitespace" is a single
391 // word so the "s" should be lowercase. For another, it's not clear from this name
392 // that the function uses the Unicode definition of whitespace. Most WebKit callers
393 // don't want that and eventually we should consider deleting this.
394 return m_impl ? m_impl->simplifyWhiteSpace() : String { };
395}
396
397String String::simplifyWhiteSpace(CodeUnitMatchFunction isWhiteSpace) const
398{
399 // FIXME: Should this function, and the many others like it, be inlined?
400 return m_impl ? m_impl->simplifyWhiteSpace(isWhiteSpace) : String { };
401}
402
403String String::removeCharacters(CodeUnitMatchFunction findMatch) const
404{
405 // FIXME: Should this function, and the many others like it, be inlined?
406 return m_impl ? m_impl->removeCharacters(findMatch) : String { };
407}
408
409String String::foldCase() const
410{
411 // FIXME: Should this function, and the many others like it, be inlined?
412 return m_impl ? m_impl->foldCase() : String { };
413}
414
415bool String::percentage(int& result) const
416{
417 if (!m_impl || !m_impl->length())
418 return false;
419
420 if ((*m_impl)[m_impl->length() - 1] != '%')
421 return false;
422
423 if (m_impl->is8Bit())
424 result = charactersToIntStrict(m_impl->characters8(), m_impl->length() - 1);
425 else
426 result = charactersToIntStrict(m_impl->characters16(), m_impl->length() - 1);
427 return true;
428}
429
430Vector<UChar> String::charactersWithNullTermination() const
431{
432 Vector<UChar> result;
433
434 if (m_impl) {
435 result.reserveInitialCapacity(length() + 1);
436
437 if (is8Bit()) {
438 const LChar* characters8 = m_impl->characters8();
439 for (size_t i = 0; i < length(); ++i)
440 result.uncheckedAppend(characters8[i]);
441 } else {
442 const UChar* characters16 = m_impl->characters16();
443 result.append(characters16, m_impl->length());
444 }
445
446 result.append(0);
447 }
448
449 return result;
450}
451
452String String::number(int number)
453{
454 return numberToStringSigned<String>(number);
455}
456
457String String::number(unsigned number)
458{
459 return numberToStringUnsigned<String>(number);
460}
461
462String String::number(long number)
463{
464 return numberToStringSigned<String>(number);
465}
466
467String String::number(unsigned long number)
468{
469 return numberToStringUnsigned<String>(number);
470}
471
472String String::number(long long number)
473{
474 return numberToStringSigned<String>(number);
475}
476
477String String::number(unsigned long long number)
478{
479 return numberToStringUnsigned<String>(number);
480}
481
482String String::numberToStringFixedPrecision(float number, unsigned precision, TrailingZerosTruncatingPolicy trailingZerosTruncatingPolicy)
483{
484 NumberToStringBuffer buffer;
485 return numberToFixedPrecisionString(number, precision, buffer, trailingZerosTruncatingPolicy == TruncateTrailingZeros);
486}
487
488String String::numberToStringFixedPrecision(double number, unsigned precision, TrailingZerosTruncatingPolicy trailingZerosTruncatingPolicy)
489{
490 NumberToStringBuffer buffer;
491 return numberToFixedPrecisionString(number, precision, buffer, trailingZerosTruncatingPolicy == TruncateTrailingZeros);
492}
493
494String String::number(float number)
495{
496 NumberToStringBuffer buffer;
497 return numberToString(number, buffer);
498}
499
500String String::number(double number)
501{
502 NumberToStringBuffer buffer;
503 return numberToString(number, buffer);
504}
505
506String String::numberToStringFixedWidth(double number, unsigned decimalPlaces)
507{
508 NumberToStringBuffer buffer;
509 return numberToFixedWidthString(number, decimalPlaces, buffer);
510}
511
512int String::toIntStrict(bool* ok, int base) const
513{
514 if (!m_impl) {
515 if (ok)
516 *ok = false;
517 return 0;
518 }
519 return m_impl->toIntStrict(ok, base);
520}
521
522unsigned String::toUIntStrict(bool* ok, int base) const
523{
524 if (!m_impl) {
525 if (ok)
526 *ok = false;
527 return 0;
528 }
529 return m_impl->toUIntStrict(ok, base);
530}
531
532int64_t String::toInt64Strict(bool* ok, int base) const
533{
534 if (!m_impl) {
535 if (ok)
536 *ok = false;
537 return 0;
538 }
539 return m_impl->toInt64Strict(ok, base);
540}
541
542uint64_t String::toUInt64Strict(bool* ok, int base) const
543{
544 if (!m_impl) {
545 if (ok)
546 *ok = false;
547 return 0;
548 }
549 return m_impl->toUInt64Strict(ok, base);
550}
551
552intptr_t String::toIntPtrStrict(bool* ok, int base) const
553{
554 if (!m_impl) {
555 if (ok)
556 *ok = false;
557 return 0;
558 }
559 return m_impl->toIntPtrStrict(ok, base);
560}
561
562int String::toInt(bool* ok) const
563{
564 if (!m_impl) {
565 if (ok)
566 *ok = false;
567 return 0;
568 }
569 return m_impl->toInt(ok);
570}
571
572unsigned String::toUInt(bool* ok) const
573{
574 if (!m_impl) {
575 if (ok)
576 *ok = false;
577 return 0;
578 }
579 return m_impl->toUInt(ok);
580}
581
582int64_t String::toInt64(bool* ok) const
583{
584 if (!m_impl) {
585 if (ok)
586 *ok = false;
587 return 0;
588 }
589 return m_impl->toInt64(ok);
590}
591
592uint64_t String::toUInt64(bool* ok) const
593{
594 if (!m_impl) {
595 if (ok)
596 *ok = false;
597 return 0;
598 }
599 return m_impl->toUInt64(ok);
600}
601
602intptr_t String::toIntPtr(bool* ok) const
603{
604 if (!m_impl) {
605 if (ok)
606 *ok = false;
607 return 0;
608 }
609 return m_impl->toIntPtr(ok);
610}
611
612double String::toDouble(bool* ok) const
613{
614 if (!m_impl) {
615 if (ok)
616 *ok = false;
617 return 0.0;
618 }
619 return m_impl->toDouble(ok);
620}
621
622float String::toFloat(bool* ok) const
623{
624 if (!m_impl) {
625 if (ok)
626 *ok = false;
627 return 0.0f;
628 }
629 return m_impl->toFloat(ok);
630}
631
632String String::isolatedCopy() const &
633{
634 // FIXME: Should this function, and the many others like it, be inlined?
635 return m_impl ? m_impl->isolatedCopy() : String { };
636}
637
638String String::isolatedCopy() &&
639{
640 if (isSafeToSendToAnotherThread()) {
641 // Since we know that our string is a temporary that will be destroyed
642 // we can just steal the m_impl from it, thus avoiding a copy.
643 return { WTFMove(*this) };
644 }
645
646 return m_impl ? m_impl->isolatedCopy() : String { };
647}
648
649bool String::isSafeToSendToAnotherThread() const
650{
651 // AtomStrings are not safe to send between threads, as ~StringImpl()
652 // will try to remove them from the wrong AtomStringTable.
653 return isEmpty() || (m_impl->hasOneRef() && !m_impl->isAtom());
654}
655
656template<bool allowEmptyEntries>
657inline Vector<String> String::splitInternal(const String& separator) const
658{
659 Vector<String> result;
660
661 unsigned startPos = 0;
662 size_t endPos;
663 while ((endPos = find(separator, startPos)) != notFound) {
664 if (allowEmptyEntries || startPos != endPos)
665 result.append(substring(startPos, endPos - startPos));
666 startPos = endPos + separator.length();
667 }
668 if (allowEmptyEntries || startPos != length())
669 result.append(substring(startPos));
670
671 return result;
672}
673
674template<bool allowEmptyEntries>
675inline void String::splitInternal(UChar separator, const SplitFunctor& functor) const
676{
677 StringView view(*this);
678
679 unsigned startPos = 0;
680 size_t endPos;
681 while ((endPos = find(separator, startPos)) != notFound) {
682 if (allowEmptyEntries || startPos != endPos)
683 functor(view.substring(startPos, endPos - startPos));
684 startPos = endPos + 1;
685 }
686 if (allowEmptyEntries || startPos != length())
687 functor(view.substring(startPos));
688}
689
690template<bool allowEmptyEntries>
691inline Vector<String> String::splitInternal(UChar separator) const
692{
693 Vector<String> result;
694 splitInternal<allowEmptyEntries>(separator, [&result](StringView item) {
695 result.append(item.toString());
696 });
697
698 return result;
699}
700
701void String::split(UChar separator, const SplitFunctor& functor) const
702{
703 splitInternal<false>(separator, functor);
704}
705
706Vector<String> String::split(UChar separator) const
707{
708 return splitInternal<false>(separator);
709}
710
711Vector<String> String::split(const String& separator) const
712{
713 return splitInternal<false>(separator);
714}
715
716void String::splitAllowingEmptyEntries(UChar separator, const SplitFunctor& functor) const
717{
718 splitInternal<true>(separator, functor);
719}
720
721Vector<String> String::splitAllowingEmptyEntries(UChar separator) const
722{
723 return splitInternal<true>(separator);
724}
725
726Vector<String> String::splitAllowingEmptyEntries(const String& separator) const
727{
728 return splitInternal<true>(separator);
729}
730
731CString String::ascii() const
732{
733 // Printable ASCII characters 32..127 and the null character are
734 // preserved, characters outside of this range are converted to '?'.
735
736 unsigned length = this->length();
737 if (!length) {
738 char* characterBuffer;
739 return CString::newUninitialized(length, characterBuffer);
740 }
741
742 if (this->is8Bit()) {
743 const LChar* characters = this->characters8();
744
745 char* characterBuffer;
746 CString result = CString::newUninitialized(length, characterBuffer);
747
748 for (unsigned i = 0; i < length; ++i) {
749 LChar ch = characters[i];
750 characterBuffer[i] = ch && (ch < 0x20 || ch > 0x7f) ? '?' : ch;
751 }
752
753 return result;
754 }
755
756 const UChar* characters = this->characters16();
757
758 char* characterBuffer;
759 CString result = CString::newUninitialized(length, characterBuffer);
760
761 for (unsigned i = 0; i < length; ++i) {
762 UChar ch = characters[i];
763 characterBuffer[i] = ch && (ch < 0x20 || ch > 0x7f) ? '?' : ch;
764 }
765
766 return result;
767}
768
769CString String::latin1() const
770{
771 // Basic Latin1 (ISO) encoding - Unicode characters 0..255 are
772 // preserved, characters outside of this range are converted to '?'.
773
774 unsigned length = this->length();
775
776 if (!length)
777 return CString("", 0);
778
779 if (is8Bit())
780 return CString(reinterpret_cast<const char*>(this->characters8()), length);
781
782 const UChar* characters = this->characters16();
783
784 char* characterBuffer;
785 CString result = CString::newUninitialized(length, characterBuffer);
786
787 for (unsigned i = 0; i < length; ++i) {
788 UChar ch = characters[i];
789 characterBuffer[i] = !isLatin1(ch) ? '?' : ch;
790 }
791
792 return result;
793}
794
795Expected<CString, UTF8ConversionError> String::tryGetUtf8(ConversionMode mode) const
796{
797 return m_impl ? m_impl->tryGetUtf8(mode) : CString { "", 0 };
798}
799
800Expected<CString, UTF8ConversionError> String::tryGetUtf8() const
801{
802 return tryGetUtf8(LenientConversion);
803}
804
805CString String::utf8(ConversionMode mode) const
806{
807 Expected<CString, UTF8ConversionError> expectedString = tryGetUtf8(mode);
808 RELEASE_ASSERT(expectedString);
809 return expectedString.value();
810}
811
812CString String::utf8() const
813{
814 return utf8(LenientConversion);
815}
816
817String String::make8BitFrom16BitSource(const UChar* source, size_t length)
818{
819 if (!length)
820 return String();
821
822 LChar* destination;
823 String result = String::createUninitialized(length, destination);
824
825 copyLCharsFromUCharSource(destination, source, length);
826
827 return result;
828}
829
830String String::make16BitFrom8BitSource(const LChar* source, size_t length)
831{
832 if (!length)
833 return String();
834
835 UChar* destination;
836 String result = String::createUninitialized(length, destination);
837
838 StringImpl::copyCharacters(destination, source, length);
839
840 return result;
841}
842
843String String::fromUTF8(const LChar* stringStart, size_t length)
844{
845 if (length > MaxLength)
846 CRASH();
847
848 if (!stringStart)
849 return String();
850
851 if (!length)
852 return emptyString();
853
854 if (charactersAreAllASCII(stringStart, length))
855 return StringImpl::create(stringStart, length);
856
857 Vector<UChar, 1024> buffer(length);
858 UChar* bufferStart = buffer.data();
859
860 UChar* bufferCurrent = bufferStart;
861 const char* stringCurrent = reinterpret_cast<const char*>(stringStart);
862 if (!convertUTF8ToUTF16(stringCurrent, reinterpret_cast<const char *>(stringStart + length), &bufferCurrent, bufferCurrent + buffer.size()))
863 return String();
864
865 unsigned utf16Length = bufferCurrent - bufferStart;
866 ASSERT_WITH_SECURITY_IMPLICATION(utf16Length < length);
867 return StringImpl::create(bufferStart, utf16Length);
868}
869
870String String::fromUTF8(const LChar* string)
871{
872 if (!string)
873 return String();
874 return fromUTF8(string, strlen(reinterpret_cast<const char*>(string)));
875}
876
877String String::fromUTF8(const CString& s)
878{
879 return fromUTF8(s.data());
880}
881
882String String::fromUTF8WithLatin1Fallback(const LChar* string, size_t size)
883{
884 String utf8 = fromUTF8(string, size);
885 if (!utf8)
886 return String(string, size);
887 return utf8;
888}
889
890// String Operations
891template<typename CharacterType>
892static unsigned lengthOfCharactersAsInteger(const CharacterType* data, size_t length)
893{
894 size_t i = 0;
895
896 // Allow leading spaces.
897 for (; i != length; ++i) {
898 if (!isSpaceOrNewline(data[i]))
899 break;
900 }
901
902 // Allow sign.
903 if (i != length && (data[i] == '+' || data[i] == '-'))
904 ++i;
905
906 // Allow digits.
907 for (; i != length; ++i) {
908 if (!isASCIIDigit(data[i]))
909 break;
910 }
911
912 return i;
913}
914
915int charactersToIntStrict(const LChar* data, size_t length, bool* ok, int base)
916{
917 return toIntegralType<int, LChar>(data, length, ok, base);
918}
919
920int charactersToIntStrict(const UChar* data, size_t length, bool* ok, int base)
921{
922 return toIntegralType<int, UChar>(data, length, ok, base);
923}
924
925unsigned charactersToUIntStrict(const LChar* data, size_t length, bool* ok, int base)
926{
927 return toIntegralType<unsigned, LChar>(data, length, ok, base);
928}
929
930unsigned charactersToUIntStrict(const UChar* data, size_t length, bool* ok, int base)
931{
932 return toIntegralType<unsigned, UChar>(data, length, ok, base);
933}
934
935int64_t charactersToInt64Strict(const LChar* data, size_t length, bool* ok, int base)
936{
937 return toIntegralType<int64_t, LChar>(data, length, ok, base);
938}
939
940int64_t charactersToInt64Strict(const UChar* data, size_t length, bool* ok, int base)
941{
942 return toIntegralType<int64_t, UChar>(data, length, ok, base);
943}
944
945uint64_t charactersToUInt64Strict(const LChar* data, size_t length, bool* ok, int base)
946{
947 return toIntegralType<uint64_t, LChar>(data, length, ok, base);
948}
949
950uint64_t charactersToUInt64Strict(const UChar* data, size_t length, bool* ok, int base)
951{
952 return toIntegralType<uint64_t, UChar>(data, length, ok, base);
953}
954
955intptr_t charactersToIntPtrStrict(const LChar* data, size_t length, bool* ok, int base)
956{
957 return toIntegralType<intptr_t, LChar>(data, length, ok, base);
958}
959
960intptr_t charactersToIntPtrStrict(const UChar* data, size_t length, bool* ok, int base)
961{
962 return toIntegralType<intptr_t, UChar>(data, length, ok, base);
963}
964
965int charactersToInt(const LChar* data, size_t length, bool* ok)
966{
967 return toIntegralType<int, LChar>(data, lengthOfCharactersAsInteger<LChar>(data, length), ok, 10);
968}
969
970int charactersToInt(const UChar* data, size_t length, bool* ok)
971{
972 return toIntegralType<int, UChar>(data, lengthOfCharactersAsInteger(data, length), ok, 10);
973}
974
975unsigned charactersToUInt(const LChar* data, size_t length, bool* ok)
976{
977 return toIntegralType<unsigned, LChar>(data, lengthOfCharactersAsInteger<LChar>(data, length), ok, 10);
978}
979
980unsigned charactersToUInt(const UChar* data, size_t length, bool* ok)
981{
982 return toIntegralType<unsigned, UChar>(data, lengthOfCharactersAsInteger<UChar>(data, length), ok, 10);
983}
984
985int64_t charactersToInt64(const LChar* data, size_t length, bool* ok)
986{
987 return toIntegralType<int64_t, LChar>(data, lengthOfCharactersAsInteger<LChar>(data, length), ok, 10);
988}
989
990int64_t charactersToInt64(const UChar* data, size_t length, bool* ok)
991{
992 return toIntegralType<int64_t, UChar>(data, lengthOfCharactersAsInteger<UChar>(data, length), ok, 10);
993}
994
995uint64_t charactersToUInt64(const LChar* data, size_t length, bool* ok)
996{
997 return toIntegralType<uint64_t, LChar>(data, lengthOfCharactersAsInteger<LChar>(data, length), ok, 10);
998}
999
1000uint64_t charactersToUInt64(const UChar* data, size_t length, bool* ok)
1001{
1002 return toIntegralType<uint64_t, UChar>(data, lengthOfCharactersAsInteger<UChar>(data, length), ok, 10);
1003}
1004
1005intptr_t charactersToIntPtr(const LChar* data, size_t length, bool* ok)
1006{
1007 return toIntegralType<intptr_t, LChar>(data, lengthOfCharactersAsInteger<LChar>(data, length), ok, 10);
1008}
1009
1010intptr_t charactersToIntPtr(const UChar* data, size_t length, bool* ok)
1011{
1012 return toIntegralType<intptr_t, UChar>(data, lengthOfCharactersAsInteger<UChar>(data, length), ok, 10);
1013}
1014
1015enum TrailingJunkPolicy { DisallowTrailingJunk, AllowTrailingJunk };
1016
1017template<typename CharacterType, TrailingJunkPolicy policy>
1018static inline double toDoubleType(const CharacterType* data, size_t length, bool* ok, size_t& parsedLength)
1019{
1020 size_t leadingSpacesLength = 0;
1021 while (leadingSpacesLength < length && isASCIISpace(data[leadingSpacesLength]))
1022 ++leadingSpacesLength;
1023
1024 double number = parseDouble(data + leadingSpacesLength, length - leadingSpacesLength, parsedLength);
1025 if (!parsedLength) {
1026 if (ok)
1027 *ok = false;
1028 return 0.0;
1029 }
1030
1031 parsedLength += leadingSpacesLength;
1032 if (ok)
1033 *ok = policy == AllowTrailingJunk || parsedLength == length;
1034 return number;
1035}
1036
1037double charactersToDouble(const LChar* data, size_t length, bool* ok)
1038{
1039 size_t parsedLength;
1040 return toDoubleType<LChar, DisallowTrailingJunk>(data, length, ok, parsedLength);
1041}
1042
1043double charactersToDouble(const UChar* data, size_t length, bool* ok)
1044{
1045 size_t parsedLength;
1046 return toDoubleType<UChar, DisallowTrailingJunk>(data, length, ok, parsedLength);
1047}
1048
1049float charactersToFloat(const LChar* data, size_t length, bool* ok)
1050{
1051 // FIXME: This will return ok even when the string fits into a double but not a float.
1052 size_t parsedLength;
1053 return static_cast<float>(toDoubleType<LChar, DisallowTrailingJunk>(data, length, ok, parsedLength));
1054}
1055
1056float charactersToFloat(const UChar* data, size_t length, bool* ok)
1057{
1058 // FIXME: This will return ok even when the string fits into a double but not a float.
1059 size_t parsedLength;
1060 return static_cast<float>(toDoubleType<UChar, DisallowTrailingJunk>(data, length, ok, parsedLength));
1061}
1062
1063float charactersToFloat(const LChar* data, size_t length, size_t& parsedLength)
1064{
1065 // FIXME: This will return ok even when the string fits into a double but not a float.
1066 return static_cast<float>(toDoubleType<LChar, AllowTrailingJunk>(data, length, 0, parsedLength));
1067}
1068
1069float charactersToFloat(const UChar* data, size_t length, size_t& parsedLength)
1070{
1071 // FIXME: This will return ok even when the string fits into a double but not a float.
1072 return static_cast<float>(toDoubleType<UChar, AllowTrailingJunk>(data, length, 0, parsedLength));
1073}
1074
1075const String& emptyString()
1076{
1077 static NeverDestroyed<String> emptyString(StringImpl::empty());
1078 return emptyString;
1079}
1080
1081const String& nullString()
1082{
1083 static NeverDestroyed<String> nullString;
1084 return nullString;
1085}
1086
1087} // namespace WTF
1088
1089#ifndef NDEBUG
1090
1091// For use in the debugger.
1092String* string(const char*);
1093Vector<char> asciiDebug(StringImpl* impl);
1094Vector<char> asciiDebug(String& string);
1095
1096void String::show() const
1097{
1098 dataLogF("%s\n", asciiDebug(impl()).data());
1099}
1100
1101String* string(const char* s)
1102{
1103 // Intentionally leaks memory!
1104 return new String(s);
1105}
1106
1107Vector<char> asciiDebug(StringImpl* impl)
1108{
1109 if (!impl)
1110 return asciiDebug(String("[null]"_s).impl());
1111
1112 Vector<char> buffer;
1113 for (unsigned i = 0; i < impl->length(); ++i) {
1114 UChar ch = (*impl)[i];
1115 if (isASCIIPrintable(ch)) {
1116 if (ch == '\\')
1117 buffer.append(ch);
1118 buffer.append(ch);
1119 } else {
1120 buffer.append('\\');
1121 buffer.append('u');
1122 appendUnsignedAsHexFixedSize(ch, buffer, 4);
1123 }
1124 }
1125 buffer.append('\0');
1126 return buffer;
1127}
1128
1129Vector<char> asciiDebug(String& string)
1130{
1131 return asciiDebug(string.impl());
1132}
1133
1134#endif
1135