WTFString.cpp source code [jsc/Source/WTF/wtf/text/WTFString.cpp]

1	/*
2	* (C) 1999 Lars Knoll ([email protected])
3	* Copyright (C) 2004-2019 Apple Inc. All rights reserved.
4	* Copyright (C) 2007-2009 Torch Mobile, Inc.
5	*
6	* This library is free software; you can redistribute it and/or
7	* modify it under the terms of the GNU Library General Public
8	* License as published by the Free Software Foundation; either
9	* version 2 of the License, or (at your option) any later version.
10	*
11	* This library is distributed in the hope that it will be useful,
12	* but WITHOUT ANY WARRANTY; without even the implied warranty of
13	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	* Library General Public License for more details.
15	*
16	* You should have received a copy of the GNU Library General Public License
17	* along with this library; see the file COPYING.LIB. If not, write to
18	* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19	* Boston, MA 02110-1301, USA.
20	*/
21
22	#include "config.h"
23	#include <wtf/text/WTFString.h>
24
25	#include <stdarg.h>
26	#include <wtf/ASCIICType.h>
27	#include <wtf/DataLog.h>
28	#include <wtf/HexNumber.h>
29	#include <wtf/MathExtras.h>
30	#include <wtf/NeverDestroyed.h>
31	#include <wtf/Vector.h>
32	#include <wtf/dtoa.h>
33	#include <wtf/text/CString.h>
34	#include <wtf/text/IntegerToStringConversion.h>
35	#include <wtf/text/StringToIntegerConversion.h>
36	#include <wtf/unicode/CharacterNames.h>
37	#include <wtf/unicode/UTF8Conversion.h>
38
39	namespace WTF {
40
41	using namespace Unicode;
42
43	// Construct a string with UTF-16 data.
44	String::String(const UChar* characters, unsigned length)
45	{
46	if (characters)
47	m_impl = StringImpl::create(characters, length);
48	}
49
50	// Construct a string with UTF-16 data, from a null-terminated source.
51	String::String(const UChar* nullTerminatedString)
52	{
53	if (nullTerminatedString)
54	m_impl = StringImpl::create(nullTerminatedString, lengthOfNullTerminatedString(nullTerminatedString));
55	}
56
57	// Construct a string with latin1 data.
58	String::String(const LChar* characters, unsigned length)
59	{
60	if (characters)
61	m_impl = StringImpl::create(characters, length);
62	}
63
64	String::String(const char* characters, unsigned length)
65	{
66	if (characters)
67	m_impl = StringImpl::create(reinterpret_cast<const LChar*>(characters), length);
68	}
69
70	// Construct a string with Latin-1 data, from a null-terminated source.
71	String::String(const LChar* nullTerminatedString)
72	{
73	if (nullTerminatedString)
74	m_impl = StringImpl::create(nullTerminatedString);
75	}
76
77	String::String(const char* nullTerminatedString)
78	{
79	if (nullTerminatedString)
80	m_impl = StringImpl::create(reinterpret_cast<const LChar*>(nullTerminatedString));
81	}
82
83	String::String(ASCIILiteral characters)
84	: m_impl (StringImpl::createFromLiteral(characters))
85	{
86	}
87
88	void String::append(const String& otherString)
89	{
90	// FIXME: This is extremely inefficient. So much so that we might want to take this out of String's API.
91
92	if (!m_impl) {
93	m_impl = otherString.m_impl;
94	return;
95	}
96
97	if (otherString.isEmpty())
98	return;
99
100	auto length = m_impl ->length();
101	auto otherLength = otherString.m_impl ->length();
102	if (otherLength > MaxLength - length)
103	CRASH();
104
105	if (m_impl ->is8Bit() && otherString.m_impl ->is8Bit()) {
106	LChar* data;
107	auto newImpl = StringImpl::createUninitialized(length + otherLength, data);
108	StringImpl::copyCharacters(data, m_impl ->characters8(), length);
109	StringImpl::copyCharacters(data + length, otherString.m_impl ->characters8(), otherLength);
110	m_impl = WTFMove(newImpl);
111	return;
112	}
113	UChar* data;
114	auto newImpl = StringImpl::createUninitialized(length + otherLength, data);
115	StringView (*m_impl).getCharactersWithUpconvert(data);
116	StringView (*otherString.m_impl).getCharactersWithUpconvert(data + length);
117	m_impl = WTFMove(newImpl);
118	}
119
120	void String::append(LChar character)
121	{
122	// FIXME: This is extremely inefficient. So much so that we might want to take this out of String's API.
123
124	if (!m_impl) {
125	m_impl = StringImpl::create(&character, `1`);
126	return;
127	}
128	if (!is8Bit()) {
129	append(static_cast<UChar>(character));
130	return;
131	}
132	if (m_impl ->length() >= MaxLength)
133	CRASH();
134	LChar* data;
135	auto newImpl = StringImpl::createUninitialized(m_impl ->length() + `1`, data);
136	StringImpl::copyCharacters(data, m_impl ->characters8(), m_impl ->length());
137	data[m_impl ->length()] = character;
138	m_impl = WTFMove(newImpl);
139	}
140
141	void String::append(UChar character)
142	{
143	// FIXME: This is extremely inefficient. So much so that we might want to take this out of String's API.
144
145	if (!m_impl) {
146	m_impl = StringImpl::create(&character, `1`);
147	return;
148	}
149	if (isLatin1(character) && is8Bit()) {
150	append(static_cast<LChar>(character));
151	return;
152	}
153	if (m_impl ->length() >= MaxLength)
154	CRASH();
155	UChar* data;
156	auto newImpl = StringImpl::createUninitialized(m_impl ->length() + `1`, data);
157	StringView (*m_impl).getCharactersWithUpconvert(data);
158	data[m_impl ->length()] = character;
159	m_impl = WTFMove(newImpl);
160	}
161
162	int codePointCompare(const String& a, const String& b)
163	{
164	return codePointCompare(a.impl(), b.impl());
165	}
166
167	void String::insert(const String& string, unsigned position)
168	{
169	// FIXME: This is extremely inefficient. So much so that we might want to take this out of String's API.
170
171	unsigned lengthToInsert = string.length();
172
173	if (!lengthToInsert) {
174	if (string.isNull())
175	return;
176	if (isNull())
177	m_impl = string.impl();
178	return;
179	}
180
181	if (position >= length()) {
182	append(string);
183	return;
184	}
185
186	if (lengthToInsert > MaxLength - length())
187	CRASH();
188
189	if (is8Bit() && string.is8Bit()) {
190	LChar* data;
191	auto newString = StringImpl::createUninitialized(length() + lengthToInsert, data);
192	StringView (*m_impl).substring(`0`, position).getCharactersWithUpconvert(data);
193	StringView (string).getCharactersWithUpconvert(data + position);
194	StringView (*m_impl).substring(position).getCharactersWithUpconvert(data + position + lengthToInsert);
195	m_impl = WTFMove(newString);
196	} else {
197	UChar* data;
198	auto newString = StringImpl::createUninitialized(length() + lengthToInsert, data);
199	StringView (*m_impl).substring(`0`, position).getCharactersWithUpconvert(data);
200	StringView (string).getCharactersWithUpconvert(data + position);
201	StringView (*m_impl).substring(position).getCharactersWithUpconvert(data + position + lengthToInsert);
202	m_impl = WTFMove(newString);
203	}
204	}
205
206	void String::append(const LChar* charactersToAppend, unsigned lengthToAppend)
207	{
208	// FIXME: This is extremely inefficient. So much so that we might want to take this out of String's API.
209
210	if (!m_impl) {
211	if (!charactersToAppend)
212	return;
213	m_impl = StringImpl::create(charactersToAppend, lengthToAppend);
214	return;
215	}
216
217	if (!lengthToAppend)
218	return;
219
220	ASSERT(charactersToAppend);
221
222	unsigned strLength = m_impl ->length();
223
224	if (m_impl ->is8Bit()) {
225	if (lengthToAppend > MaxLength - strLength)
226	CRASH();
227	LChar* data;
228	auto newImpl = StringImpl::createUninitialized(strLength + lengthToAppend, data);
229	StringImpl::copyCharacters(data, m_impl ->characters8(), strLength);
230	StringImpl::copyCharacters(data + strLength, charactersToAppend, lengthToAppend);
231	m_impl = WTFMove(newImpl);
232	return;
233	}
234
235	if (lengthToAppend > MaxLength - strLength)
236	CRASH();
237	UChar* data;
238	auto newImpl = StringImpl::createUninitialized(length() + lengthToAppend, data);
239	StringImpl::copyCharacters(data, m_impl ->characters16(), strLength);
240	StringImpl::copyCharacters(data + strLength, charactersToAppend, lengthToAppend);
241	m_impl = WTFMove(newImpl);
242	}
243
244	void String::append(const UChar* charactersToAppend, unsigned lengthToAppend)
245	{
246	// FIXME: This is extremely inefficient. So much so that we might want to take this out of String's API.
247
248	if (!m_impl) {
249	if (!charactersToAppend)
250	return;
251	m_impl = StringImpl::create(charactersToAppend, lengthToAppend);
252	return;
253	}
254
255	if (!lengthToAppend)
256	return;
257
258	unsigned strLength = m_impl ->length();
259
260	ASSERT(charactersToAppend);
261	if (lengthToAppend > MaxLength - strLength)
262	CRASH();
263	UChar* data;
264	auto newImpl = StringImpl::createUninitialized(strLength + lengthToAppend, data);
265	if (m_impl ->is8Bit())
266	StringImpl::copyCharacters(data, characters8(), strLength);
267	else
268	StringImpl::copyCharacters(data, characters16(), strLength);
269	StringImpl::copyCharacters(data + strLength, charactersToAppend, lengthToAppend);
270	m_impl = WTFMove(newImpl);
271	}
272
273
274	UChar32 String::characterStartingAt(unsigned i) const
275	{
276	if (!m_impl \|\| i >= m_impl ->length())
277	return `0`;
278	return m_impl ->characterStartingAt(i);
279	}
280
281	void String::truncate(unsigned position)
282	{
283	if (m_impl)
284	m_impl = m_impl ->substring(`0`, position);
285	}
286
287	template<typename CharacterType> inline void String::removeInternal(const CharacterType* characters, unsigned position, unsigned lengthToRemove)
288	{
289	CharacterType* data;
290	auto newImpl = StringImpl::createUninitialized(length() - lengthToRemove, data);
291	StringImpl::copyCharacters(data, characters, position);
292	StringImpl::copyCharacters(data + position, characters + position + lengthToRemove, length() - lengthToRemove - position);
293	m_impl = WTFMove(newImpl);
294	}
295
296	void String::remove(unsigned position, unsigned lengthToRemove)
297	{
298	if (!lengthToRemove)
299	return;
300	auto length = this->length();
301	if (position >= length)
302	return;
303	lengthToRemove = std::min(lengthToRemove, length - position);
304	if (is8Bit())
305	removeInternal(characters8(), position, lengthToRemove);
306	else
307	removeInternal(characters16(), position, lengthToRemove);
308	}
309
310	String String::substring(unsigned position, unsigned length) const
311	{
312	// FIXME: Should this function, and the many others like it, be inlined?
313	return m_impl ? m_impl ->substring(position, length) : String { };
314	}
315
316	String String::substringSharingImpl(unsigned offset, unsigned length) const
317	{
318	// FIXME: We used to check against a limit of Heap::minExtraCost / sizeof(UChar).
319
320	unsigned stringLength = this->length();
321	offset = std::min(offset, stringLength);
322	length = std::min(length, stringLength - offset);
323
324	if (!offset && length == stringLength)
325	return *this;
326	return StringImpl::createSubstringSharingImpl(*m_impl, offset, length);
327	}
328
329	String String::convertToASCIILowercase() const
330	{
331	// FIXME: Should this function, and the many others like it, be inlined?
332	return m_impl ? m_impl ->convertToASCIILowercase() : String { };
333	}
334
335	String String::convertToASCIIUppercase() const
336	{
337	// FIXME: Should this function, and the many others like it, be inlined?
338	return m_impl ? m_impl ->convertToASCIIUppercase() : String { };
339	}
340
341	String String::convertToLowercaseWithoutLocale() const
342	{
343	// FIXME: Should this function, and the many others like it, be inlined?
344	return m_impl ? m_impl ->convertToLowercaseWithoutLocale() : String { };
345	}
346
347	String String::convertToLowercaseWithoutLocaleStartingAtFailingIndex8Bit(unsigned failingIndex) const
348	{
349	// FIXME: Should this function, and the many others like it, be inlined?
350	return m_impl ? m_impl ->convertToLowercaseWithoutLocaleStartingAtFailingIndex8Bit(failingIndex) : String { };
351	}
352
353	String String::convertToUppercaseWithoutLocale() const
354	{
355	// FIXME: Should this function, and the many others like it, be inlined?
356	return m_impl ? m_impl ->convertToUppercaseWithoutLocale() : String { };
357	}
358
359	String String::convertToLowercaseWithLocale(const AtomString& localeIdentifier) const
360	{
361	// FIXME: Should this function, and the many others like it, be inlined?
362	return m_impl ? m_impl ->convertToLowercaseWithLocale(localeIdentifier) : String { };
363	}
364
365	String String::convertToUppercaseWithLocale(const AtomString& localeIdentifier) const
366	{
367	// FIXME: Should this function, and the many others like it, be inlined?
368	return m_impl ? m_impl ->convertToUppercaseWithLocale(localeIdentifier) : String { };
369	}
370
371	String String::stripWhiteSpace() const
372	{
373	// FIXME: Should this function, and the many others like it, be inlined?
374	// FIXME: This function needs a new name. For one thing, "whitespace" is a single
375	// word so the "s" should be lowercase. For another, it's not clear from this name
376	// that the function uses the Unicode definition of whitespace. Most WebKit callers
377	// don't want that and eventually we should consider deleting this.
378	return m_impl ? m_impl ->stripWhiteSpace() : String { };
379	}
380
381	String String::stripLeadingAndTrailingCharacters(CodeUnitMatchFunction predicate) const
382	{
383	// FIXME: Should this function, and the many others like it, be inlined?
384	return m_impl ? m_impl ->stripLeadingAndTrailingCharacters(predicate) : String { };
385	}
386
387	String String::simplifyWhiteSpace() const
388	{
389	// FIXME: Should this function, and the many others like it, be inlined?
390	// FIXME: This function needs a new name. For one thing, "whitespace" is a single
391	// word so the "s" should be lowercase. For another, it's not clear from this name
392	// that the function uses the Unicode definition of whitespace. Most WebKit callers
393	// don't want that and eventually we should consider deleting this.
394	return m_impl ? m_impl ->simplifyWhiteSpace() : String { };
395	}
396
397	String String::simplifyWhiteSpace(CodeUnitMatchFunction isWhiteSpace) const
398	{
399	// FIXME: Should this function, and the many others like it, be inlined?
400	return m_impl ? m_impl ->simplifyWhiteSpace(isWhiteSpace) : String { };
401	}
402
403	String String::removeCharacters(CodeUnitMatchFunction findMatch) const
404	{
405	// FIXME: Should this function, and the many others like it, be inlined?
406	return m_impl ? m_impl ->removeCharacters(findMatch) : String { };
407	}
408
409	String String::foldCase() const
410	{
411	// FIXME: Should this function, and the many others like it, be inlined?
412	return m_impl ? m_impl ->foldCase() : String { };
413	}
414
415	bool String::percentage(int& result) const
416	{
417	if (!m_impl \|\| !m_impl ->length())
418	return false;
419
420	if ((*m_impl)[m_impl ->length() - `1`] != `'%'`)
421	return false;
422
423	if (m_impl ->is8Bit())
424	result = charactersToIntStrict(m_impl ->characters8(), m_impl ->length() - `1`);
425	else
426	result = charactersToIntStrict(m_impl ->characters16(), m_impl ->length() - `1`);
427	return true;
428	}
429
430	Vector<UChar> String::charactersWithNullTermination() const
431	{
432	Vector<UChar> result;
433
434	if (m_impl) {
435	result.reserveInitialCapacity(length() + `1`);
436
437	if (is8Bit()) {
438	const LChar* characters8 = m_impl ->characters8();
439	for (size_t i = `0`; i < length(); ++i)
440	result.uncheckedAppend(characters8[i]);
441	} else {
442	const UChar* characters16 = m_impl ->characters16();
443	result.append(characters16, m_impl ->length());
444	}
445
446	result.append(`0`);
447	}
448
449	return result;
450	}
451
452	String String::number(int number)
453	{
454	return numberToStringSigned<String>(number);
455	}
456
457	String String::number(unsigned number)
458	{
459	return numberToStringUnsigned<String>(number);
460	}
461
462	String String::number(long number)
463	{
464	return numberToStringSigned<String>(number);
465	}
466
467	String String::number(unsigned long number)
468	{
469	return numberToStringUnsigned<String>(number);
470	}
471
472	String String::number(long long number)
473	{
474	return numberToStringSigned<String>(number);
475	}
476
477	String String::number(unsigned long long number)
478	{
479	return numberToStringUnsigned<String>(number);
480	}
481
482	String String::numberToStringFixedPrecision(float number, unsigned precision, TrailingZerosTruncatingPolicy trailingZerosTruncatingPolicy)
483	{
484	NumberToStringBuffer buffer;
485	return numberToFixedPrecisionString(number, precision, buffer, trailingZerosTruncatingPolicy == TruncateTrailingZeros);
486	}
487
488	String String::numberToStringFixedPrecision(double number, unsigned precision, TrailingZerosTruncatingPolicy trailingZerosTruncatingPolicy)
489	{
490	NumberToStringBuffer buffer;
491	return numberToFixedPrecisionString(number, precision, buffer, trailingZerosTruncatingPolicy == TruncateTrailingZeros);
492	}
493
494	String String::number(float number)
495	{
496	NumberToStringBuffer buffer;
497	return numberToString(number, buffer);
498	}
499
500	String String::number(double number)
501	{
502	NumberToStringBuffer buffer;
503	return numberToString(number, buffer);
504	}
505
506	String String::numberToStringFixedWidth(double number, unsigned decimalPlaces)
507	{
508	NumberToStringBuffer buffer;
509	return numberToFixedWidthString(number, decimalPlaces, buffer);
510	}
511
512	int String::toIntStrict(bool* ok, int base) const
513	{
514	if (!m_impl) {
515	if (ok)
516	ok = false*;
517	return `0`;
518	}
519	return m_impl ->toIntStrict(ok, base);
520	}
521
522	unsigned String::toUIntStrict(bool* ok, int base) const
523	{
524	if (!m_impl) {
525	if (ok)
526	ok = false*;
527	return `0`;
528	}
529	return m_impl ->toUIntStrict(ok, base);
530	}
531
532	int64_t String::toInt64Strict(bool* ok, int base) const
533	{
534	if (!m_impl) {
535	if (ok)
536	ok = false*;
537	return `0`;
538	}
539	return m_impl ->toInt64Strict(ok, base);
540	}
541
542	uint64_t String::toUInt64Strict(bool* ok, int base) const
543	{
544	if (!m_impl) {
545	if (ok)
546	ok = false*;
547	return `0`;
548	}
549	return m_impl ->toUInt64Strict(ok, base);
550	}
551
552	intptr_t String::toIntPtrStrict(bool* ok, int base) const
553	{
554	if (!m_impl) {
555	if (ok)
556	ok = false*;
557	return `0`;
558	}
559	return m_impl ->toIntPtrStrict(ok, base);
560	}
561
562	int String::toInt(bool* ok) const
563	{
564	if (!m_impl) {
565	if (ok)
566	ok = false*;
567	return `0`;
568	}
569	return m_impl ->toInt(ok);
570	}
571
572	unsigned String::toUInt(bool* ok) const
573	{
574	if (!m_impl) {
575	if (ok)
576	ok = false*;
577	return `0`;
578	}
579	return m_impl ->toUInt(ok);
580	}
581
582	int64_t String::toInt64(bool* ok) const
583	{
584	if (!m_impl) {
585	if (ok)
586	ok = false*;
587	return `0`;
588	}
589	return m_impl ->toInt64(ok);
590	}
591
592	uint64_t String::toUInt64(bool* ok) const
593	{
594	if (!m_impl) {
595	if (ok)
596	ok = false*;
597	return `0`;
598	}
599	return m_impl ->toUInt64(ok);
600	}
601
602	intptr_t String::toIntPtr(bool* ok) const
603	{
604	if (!m_impl) {
605	if (ok)
606	ok = false*;
607	return `0`;
608	}
609	return m_impl ->toIntPtr(ok);
610	}
611
612	double String::toDouble(bool* ok) const
613	{
614	if (!m_impl) {
615	if (ok)
616	ok = false*;
617	return `0.0`;
618	}
619	return m_impl ->toDouble(ok);
620	}
621
622	float String::toFloat(bool* ok) const
623	{
624	if (!m_impl) {
625	if (ok)
626	ok = false*;
627	return `0.0f`;
628	}
629	return m_impl ->toFloat(ok);
630	}
631
632	String String::isolatedCopy() const &
633	{
634	// FIXME: Should this function, and the many others like it, be inlined?
635	return m_impl ? m_impl ->isolatedCopy() : String { };
636	}
637
638	String String::isolatedCopy() &&
639	{
640	if (isSafeToSendToAnotherThread()) {
641	// Since we know that our string is a temporary that will be destroyed
642	// we can just steal the m_impl from it, thus avoiding a copy.
643	return { WTFMove(*this) };
644	}
645
646	return m_impl ? m_impl ->isolatedCopy() : String { };
647	}
648
649	bool String::isSafeToSendToAnotherThread() const
650	{
651	// AtomStrings are not safe to send between threads, as ~StringImpl()
652	// will try to remove them from the wrong AtomStringTable.
653	return isEmpty() \|\| (m_impl ->hasOneRef() && !m_impl ->isAtom());
654	}
655
656	template<bool allowEmptyEntries>
657	inline Vector<String> String::splitInternal(const String& separator) const
658	{
659	Vector<String> result;
660
661	unsigned startPos = `0`;
662	size_t endPos;
663	while ((endPos = find(separator, startPos)) != notFound) {
664	if (allowEmptyEntries \|\| startPos != endPos)
665	result.append(substring(startPos, endPos - startPos));
666	startPos = endPos + separator.length();
667	}
668	if (allowEmptyEntries \|\| startPos != length())
669	result.append(substring(startPos));
670
671	return result;
672	}
673
674	template<bool allowEmptyEntries>
675	inline void String::splitInternal(UChar separator, const SplitFunctor& functor) const
676	{
677	StringView view(*this);
678
679	unsigned startPos = `0`;
680	size_t endPos;
681	while ((endPos = find(separator, startPos)) != notFound) {
682	if (allowEmptyEntries \|\| startPos != endPos)
683	functor (view.substring(startPos, endPos - startPos));
684	startPos = endPos + `1`;
685	}
686	if (allowEmptyEntries \|\| startPos != length())
687	functor (view.substring(startPos));
688	}
689
690	template<bool allowEmptyEntries>
691	inline Vector<String> String::splitInternal(UChar separator) const
692	{
693	Vector<String> result;
694	splitInternal<allowEmptyEntries>(separator, [&result](StringView item) {
695	result.append(item.toString());
696	});
697
698	return result;
699	}
700
701	void String::split(UChar separator, const SplitFunctor& functor) const
702	{
703	splitInternal<false>(separator, functor);
704	}
705
706	Vector<String> String::split(UChar separator) const
707	{
708	return splitInternal<false>(separator);
709	}
710
711	Vector<String> String::split(const String& separator) const
712	{
713	return splitInternal<false>(separator);
714	}
715
716	void String::splitAllowingEmptyEntries(UChar separator, const SplitFunctor& functor) const
717	{
718	splitInternal<true>(separator, functor);
719	}
720
721	Vector<String> String::splitAllowingEmptyEntries(UChar separator) const
722	{
723	return splitInternal<true>(separator);
724	}
725
726	Vector<String> String::splitAllowingEmptyEntries(const String& separator) const
727	{
728	return splitInternal<true>(separator);
729	}
730
731	CString String::ascii() const
732	{
733	// Printable ASCII characters 32..127 and the null character are
734	// preserved, characters outside of this range are converted to '?'.
735
736	unsigned length = this->length();
737	if (!length) {
738	char* characterBuffer;
739	return CString::newUninitialized(length, characterBuffer);
740	}
741
742	if (this->is8Bit()) {
743	const LChar* characters = this->characters8();
744
745	char* characterBuffer;
746	CString result = CString::newUninitialized(length, characterBuffer);
747
748	for (unsigned i = `0`; i < length; ++i) {
749	LChar ch = characters[i];
750	characterBuffer[i] = ch && (ch < `0x20` \|\| ch > `0x7f`) ? `'?'` : ch;
751	}
752
753	return result;
754	}
755
756	const UChar* characters = this->characters16();
757
758	char* characterBuffer;
759	CString result = CString::newUninitialized(length, characterBuffer);
760
761	for (unsigned i = `0`; i < length; ++i) {
762	UChar ch = characters[i];
763	characterBuffer[i] = ch && (ch < `0x20` \|\| ch > `0x7f`) ? `'?'` : ch;
764	}
765
766	return result;
767	}
768
769	CString String::latin1() const
770	{
771	// Basic Latin1 (ISO) encoding - Unicode characters 0..255 are
772	// preserved, characters outside of this range are converted to '?'.
773
774	unsigned length = this->length();
775
776	if (!length)
777	return CString ("", `0`);
778
779	if (is8Bit())
780	return CString (reinterpret_cast<const char>(this*->characters8()), length);
781
782	const UChar* characters = this->characters16();
783
784	char* characterBuffer;
785	CString result = CString::newUninitialized(length, characterBuffer);
786
787	for (unsigned i = `0`; i < length; ++i) {
788	UChar ch = characters[i];
789	characterBuffer[i] = !isLatin1(ch) ? `'?'` : ch;
790	}
791
792	return result;
793	}
794
795	Expected<CString, UTF8ConversionError> String::tryGetUtf8(ConversionMode mode) const
796	{
797	return m_impl ? m_impl ->tryGetUtf8(mode) : CString { "", `0` };
798	}
799
800	Expected<CString, UTF8ConversionError> String::tryGetUtf8() const
801	{
802	return tryGetUtf8(LenientConversion);
803	}
804
805	CString String::utf8(ConversionMode mode) const
806	{
807	Expected<CString, UTF8ConversionError> expectedString = tryGetUtf8(mode);
808	RELEASE_ASSERT(expectedString);
809	return expectedString.value();
810	}
811
812	CString String::utf8() const
813	{
814	return utf8(LenientConversion);
815	}
816
817	String String::make8BitFrom16BitSource(const UChar* source, size_t length)
818	{
819	if (!length)
820	return String ();
821
822	LChar* destination;
823	String result = String::createUninitialized(length, destination);
824
825	copyLCharsFromUCharSource(destination, source, length);
826
827	return result;
828	}
829
830	String String::make16BitFrom8BitSource(const LChar* source, size_t length)
831	{
832	if (!length)
833	return String ();
834
835	UChar* destination;
836	String result = String::createUninitialized(length, destination);
837
838	StringImpl::copyCharacters(destination, source, length);
839
840	return result;
841	}
842
843	String String::fromUTF8(const LChar* stringStart, size_t length)
844	{
845	if (length > MaxLength)
846	CRASH();
847
848	if (!stringStart)
849	return String ();
850
851	if (!length)
852	return emptyString();
853
854	if (charactersAreAllASCII(stringStart, length))
855	return StringImpl::create(stringStart, length);
856
857	Vector<UChar, `1024`> buffer(length);
858	UChar* bufferStart = buffer.data();
859
860	UChar* bufferCurrent = bufferStart;
861	const char* stringCurrent = reinterpret_cast<const char*>(stringStart);
862	if (!convertUTF8ToUTF16(stringCurrent, reinterpret_cast<const char *>(stringStart + length), &bufferCurrent, bufferCurrent + buffer.size()))
863	return String ();
864
865	unsigned utf16Length = bufferCurrent - bufferStart;
866	ASSERT_WITH_SECURITY_IMPLICATION(utf16Length < length);
867	return StringImpl::create(bufferStart, utf16Length);
868	}
869
870	String String::fromUTF8(const LChar* string)
871	{
872	if (!string)
873	return String ();
874	return fromUTF8(string, strlen(reinterpret_cast<const char*>(string)));
875	}
876
877	String String::fromUTF8(const CString& s)
878	{
879	return fromUTF8(s.data());
880	}
881
882	String String::fromUTF8WithLatin1Fallback(const LChar* string, size_t size)
883	{
884	String utf8 = fromUTF8(string, size);
885	if (!utf8)
886	return String (string, size);
887	return utf8;
888	}
889
890	// String Operations
891	template<typename CharacterType>
892	static unsigned lengthOfCharactersAsInteger(const CharacterType* data, size_t length)
893	{
894	size_t i = `0`;
895
896	// Allow leading spaces.
897	for (; i != length; ++i) {
898	if (!isSpaceOrNewline(data[i]))
899	break;
900	}
901
902	// Allow sign.
903	if (i != length && (data[i] == `'+'` \|\| data[i] == `'-'`))
904	++i;
905
906	// Allow digits.
907	for (; i != length; ++i) {
908	if (!isASCIIDigit(data[i]))
909	break;
910	}
911
912	return i;
913	}
914
915	int charactersToIntStrict(const LChar* data, size_t length, bool* ok, int base)
916	{
917	return toIntegralType<int, LChar>(data, length, ok, base);
918	}
919
920	int charactersToIntStrict(const UChar* data, size_t length, bool* ok, int base)
921	{
922	return toIntegralType<int, UChar>(data, length, ok, base);
923	}
924
925	unsigned charactersToUIntStrict(const LChar* data, size_t length, bool* ok, int base)
926	{
927	return toIntegralType<unsigned, LChar>(data, length, ok, base);
928	}
929
930	unsigned charactersToUIntStrict(const UChar* data, size_t length, bool* ok, int base)
931	{
932	return toIntegralType<unsigned, UChar>(data, length, ok, base);
933	}
934
935	int64_t charactersToInt64Strict(const LChar* data, size_t length, bool* ok, int base)
936	{
937	return toIntegralType<int64_t, LChar>(data, length, ok, base);
938	}
939
940	int64_t charactersToInt64Strict(const UChar* data, size_t length, bool* ok, int base)
941	{
942	return toIntegralType<int64_t, UChar>(data, length, ok, base);
943	}
944
945	uint64_t charactersToUInt64Strict(const LChar* data, size_t length, bool* ok, int base)
946	{
947	return toIntegralType<uint64_t, LChar>(data, length, ok, base);
948	}
949
950	uint64_t charactersToUInt64Strict(const UChar* data, size_t length, bool* ok, int base)
951	{
952	return toIntegralType<uint64_t, UChar>(data, length, ok, base);
953	}
954
955	intptr_t charactersToIntPtrStrict(const LChar* data, size_t length, bool* ok, int base)
956	{
957	return toIntegralType<intptr_t, LChar>(data, length, ok, base);
958	}
959
960	intptr_t charactersToIntPtrStrict(const UChar* data, size_t length, bool* ok, int base)
961	{
962	return toIntegralType<intptr_t, UChar>(data, length, ok, base);
963	}
964
965	int charactersToInt(const LChar* data, size_t length, bool* ok)
966	{
967	return toIntegralType<int, LChar>(data, lengthOfCharactersAsInteger<LChar>(data, length), ok, `10`);
968	}
969
970	int charactersToInt(const UChar* data, size_t length, bool* ok)
971	{
972	return toIntegralType<int, UChar>(data, lengthOfCharactersAsInteger(data, length), ok, `10`);
973	}
974
975	unsigned charactersToUInt(const LChar* data, size_t length, bool* ok)
976	{
977	return toIntegralType<unsigned, LChar>(data, lengthOfCharactersAsInteger<LChar>(data, length), ok, `10`);
978	}
979
980	unsigned charactersToUInt(const UChar* data, size_t length, bool* ok)
981	{
982	return toIntegralType<unsigned, UChar>(data, lengthOfCharactersAsInteger<UChar>(data, length), ok, `10`);
983	}
984
985	int64_t charactersToInt64(const LChar* data, size_t length, bool* ok)
986	{
987	return toIntegralType<int64_t, LChar>(data, lengthOfCharactersAsInteger<LChar>(data, length), ok, `10`);
988	}
989
990	int64_t charactersToInt64(const UChar* data, size_t length, bool* ok)
991	{
992	return toIntegralType<int64_t, UChar>(data, lengthOfCharactersAsInteger<UChar>(data, length), ok, `10`);
993	}
994
995	uint64_t charactersToUInt64(const LChar* data, size_t length, bool* ok)
996	{
997	return toIntegralType<uint64_t, LChar>(data, lengthOfCharactersAsInteger<LChar>(data, length), ok, `10`);
998	}
999
1000	uint64_t charactersToUInt64(const UChar* data, size_t length, bool* ok)
1001	{
1002	return toIntegralType<uint64_t, UChar>(data, lengthOfCharactersAsInteger<UChar>(data, length), ok, `10`);
1003	}
1004
1005	intptr_t charactersToIntPtr(const LChar* data, size_t length, bool* ok)
1006	{
1007	return toIntegralType<intptr_t, LChar>(data, lengthOfCharactersAsInteger<LChar>(data, length), ok, `10`);
1008	}
1009
1010	intptr_t charactersToIntPtr(const UChar* data, size_t length, bool* ok)
1011	{
1012	return toIntegralType<intptr_t, UChar>(data, lengthOfCharactersAsInteger<UChar>(data, length), ok, `10`);
1013	}
1014
1015	enum TrailingJunkPolicy { DisallowTrailingJunk, AllowTrailingJunk };
1016
1017	template<typename CharacterType, TrailingJunkPolicy policy>
1018	static inline double toDoubleType(const CharacterType* data, size_t length, bool* ok, size_t& parsedLength)
1019	{
1020	size_t leadingSpacesLength = `0`;
1021	while (leadingSpacesLength < length && isASCIISpace(data[leadingSpacesLength]))
1022	++leadingSpacesLength;
1023
1024	double number = parseDouble(data + leadingSpacesLength, length - leadingSpacesLength, parsedLength);
1025	if (!parsedLength) {
1026	if (ok)
1027	ok = false*;
1028	return `0.0`;
1029	}
1030
1031	parsedLength += leadingSpacesLength;
1032	if (ok)
1033	*ok = policy == AllowTrailingJunk \|\| parsedLength == length;
1034	return number;
1035	}
1036
1037	double charactersToDouble(const LChar* data, size_t length, bool* ok)
1038	{
1039	size_t parsedLength;
1040	return toDoubleType<LChar, DisallowTrailingJunk>(data, length, ok, parsedLength);
1041	}
1042
1043	double charactersToDouble(const UChar* data, size_t length, bool* ok)
1044	{
1045	size_t parsedLength;
1046	return toDoubleType<UChar, DisallowTrailingJunk>(data, length, ok, parsedLength);
1047	}
1048
1049	float charactersToFloat(const LChar* data, size_t length, bool* ok)
1050	{
1051	// FIXME: This will return ok even when the string fits into a double but not a float.
1052	size_t parsedLength;
1053	return static_cast<float>(toDoubleType<LChar, DisallowTrailingJunk>(data, length, ok, parsedLength));
1054	}
1055
1056	float charactersToFloat(const UChar* data, size_t length, bool* ok)
1057	{
1058	// FIXME: This will return ok even when the string fits into a double but not a float.
1059	size_t parsedLength;
1060	return static_cast<float>(toDoubleType<UChar, DisallowTrailingJunk>(data, length, ok, parsedLength));
1061	}
1062
1063	float charactersToFloat(const LChar* data, size_t length, size_t& parsedLength)
1064	{
1065	// FIXME: This will return ok even when the string fits into a double but not a float.
1066	return static_cast<float>(toDoubleType<LChar, AllowTrailingJunk>(data, length, `0`, parsedLength));
1067	}
1068
1069	float charactersToFloat(const UChar* data, size_t length, size_t& parsedLength)
1070	{
1071	// FIXME: This will return ok even when the string fits into a double but not a float.
1072	return static_cast<float>(toDoubleType<UChar, AllowTrailingJunk>(data, length, `0`, parsedLength));
1073	}
1074
1075	const String& emptyString()
1076	{
1077	static NeverDestroyed<String> emptyString(StringImpl::empty());
1078	return emptyString;
1079	}
1080
1081	const String& nullString()
1082	{
1083	static NeverDestroyed<String> nullString;
1084	return nullString;
1085	}
1086
1087	} // namespace WTF
1088
1089	#ifndef NDEBUG
1090
1091	// For use in the debugger.
1092	String* string(const char*);
1093	Vector<char> asciiDebug(StringImpl* impl);
1094	Vector<char> asciiDebug(String& string);
1095
1096	void String::show() const
1097	{
1098	dataLogF("%s\n", asciiDebug(impl()).data());
1099	}
1100
1101	String* string(const char* s)
1102	{
1103	// Intentionally leaks memory!
1104	return new String (s);
1105	}
1106
1107	Vector<char> asciiDebug(StringImpl* impl)
1108	{
1109	if (!impl)
1110	return asciiDebug(String ("[null]"_s).impl());
1111
1112	Vector<char> buffer;
1113	for (unsigned i = `0`; i < impl->length(); ++i) {
1114	UChar ch = (*impl)[i];
1115	if (isASCIIPrintable(ch)) {
1116	if (ch == `'\\'`)
1117	buffer.append(ch);
1118	buffer.append(ch);
1119	} else {
1120	buffer.append(`'\\'`);
1121	buffer.append(`'u'`);
1122	appendUnsignedAsHexFixedSize(ch, buffer, `4`);
1123	}
1124	}
1125	buffer.append(`'\0'`);
1126	return buffer;
1127	}
1128
1129	Vector<char> asciiDebug(String& string)
1130	{
1131	return asciiDebug(string.impl());
1132	}
1133
1134	#endif
1135

Browse the source code of jsc/Source/WTF/wtf/text/WTFString.cpp