StringImpl.h source code [jsc/Source/WTF/wtf/text/StringImpl.h]

1	/*
2	* Copyright (C) 1999 Lars Knoll ([email protected])
3	* Copyright (C) 2005-2019 Apple Inc. All rights reserved.
4	* Copyright (C) 2009 Google Inc. All rights reserved.
5	*
6	* This library is free software; you can redistribute it and/or
7	* modify it under the terms of the GNU Library General Public
8	* License as published by the Free Software Foundation; either
9	* version 2 of the License, or (at your option) any later version.
10	*
11	* This library is distributed in the hope that it will be useful,
12	* but WITHOUT ANY WARRANTY; without even the implied warranty of
13	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	* Library General Public License for more details.
15	*
16	* You should have received a copy of the GNU Library General Public License
17	* along with this library; see the file COPYING.LIB. If not, write to
18	* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19	* Boston, MA 02110-1301, USA.
20	*
21	*/
22
23	#pragma once
24
25	#include <limits.h>
26	#include <unicode/ustring.h>
27	#include <wtf/ASCIICType.h>
28	#include <wtf/CheckedArithmetic.h>
29	#include <wtf/Expected.h>
30	#include <wtf/MathExtras.h>
31	#include <wtf/StdLibExtras.h>
32	#include <wtf/Vector.h>
33	#include <wtf/text/ASCIIFastPath.h>
34	#include <wtf/text/ConversionMode.h>
35	#include <wtf/text/StringCommon.h>
36	#include <wtf/text/StringHasher.h>
37	#include <wtf/text/UTF8ConversionError.h>
38
39	#if USE(CF)
40	typedef const struct __CFString * CFStringRef;
41	#endif
42
43	#ifdef __OBJC__
44	@class NSString;
45	#endif
46
47	namespace JSC {
48	namespace LLInt { class Data; }
49	class LLIntOffsetsExtractor;
50	}
51
52	namespace WTF {
53
54	class SymbolImpl;
55	class SymbolRegistry;
56
57	struct CStringTranslator;
58	struct HashAndUTF8CharactersTranslator;
59	struct LCharBufferTranslator;
60	struct StringHash;
61	struct SubstringTranslator;
62	struct UCharBufferTranslator;
63
64	template<typename> class RetainPtr;
65
66	template<typename> struct BufferFromStaticDataTranslator;
67	template<typename> struct HashAndCharactersTranslator;
68
69	// Define STRING_STATS to 1 turn on runtime statistics of string sizes and memory usage.
70	#define STRING_STATS 0
71
72	template<bool isSpecialCharacter(UChar), typename CharacterType> bool isAllSpecialCharacters(const CharacterType*, size_t length);
73
74	#if STRING_STATS
75
76	struct StringStats {
77	WTF_MAKE_STRUCT_FAST_ALLOCATED;
78	void add8BitString(unsigned length, bool isSubString = false)
79	{
80	++m_totalNumberStrings;
81	++m_number8BitStrings;
82	if (!isSubString)
83	m_total8BitData += length;
84	}
85
86	void add16BitString(unsigned length, bool isSubString = false)
87	{
88	++m_totalNumberStrings;
89	++m_number16BitStrings;
90	if (!isSubString)
91	m_total16BitData += length;
92	}
93
94	void removeString(StringImpl&);
95	void printStats();
96
97	static constexpr unsigned s_printStringStatsFrequency = `5000`;
98	static std::atomic<unsigned> s_stringRemovesTillPrintStats;
99
100	std::atomic<unsigned> m_refCalls;
101	std::atomic<unsigned> m_derefCalls;
102
103	std::atomic<unsigned> m_totalNumberStrings;
104	std::atomic<unsigned> m_number8BitStrings;
105	std::atomic<unsigned> m_number16BitStrings;
106	std::atomic<unsigned long long> m_total8BitData;
107	std::atomic<unsigned long long> m_total16BitData;
108	};
109
110	#define STRING_STATS_ADD_8BIT_STRING(length) StringImpl::stringStats().add8BitString(length)
111	#define STRING_STATS_ADD_8BIT_STRING2(length, isSubString) StringImpl::stringStats().add8BitString(length, isSubString)
112	#define STRING_STATS_ADD_16BIT_STRING(length) StringImpl::stringStats().add16BitString(length)
113	#define STRING_STATS_ADD_16BIT_STRING2(length, isSubString) StringImpl::stringStats().add16BitString(length, isSubString)
114	#define STRING_STATS_REMOVE_STRING(string) StringImpl::stringStats().removeString(string)
115	#define STRING_STATS_REF_STRING(string) ++StringImpl::stringStats().m_refCalls;
116	#define STRING_STATS_DEREF_STRING(string) ++StringImpl::stringStats().m_derefCalls;
117
118	#else
119
120	#define STRING_STATS_ADD_8BIT_STRING(length) ((void)0)
121	#define STRING_STATS_ADD_8BIT_STRING2(length, isSubString) ((void)0)
122	#define STRING_STATS_ADD_16BIT_STRING(length) ((void)0)
123	#define STRING_STATS_ADD_16BIT_STRING2(length, isSubString) ((void)0)
124	#define STRING_STATS_ADD_UPCONVERTED_STRING(length) ((void)0)
125	#define STRING_STATS_REMOVE_STRING(string) ((void)0)
126	#define STRING_STATS_REF_STRING(string) ((void)0)
127	#define STRING_STATS_DEREF_STRING(string) ((void)0)
128
129	#endif
130
131	class StringImplShape {
132	WTF_MAKE_NONCOPYABLE(StringImplShape);
133	public:
134	static constexpr unsigned MaxLength = std::numeric_limits<int32_t>::max();
135
136	protected:
137	StringImplShape(unsigned refCount, unsigned length, const LChar, unsigned* hashAndFlags);
138	StringImplShape(unsigned refCount, unsigned length, const UChar, unsigned* hashAndFlags);
139
140	enum ConstructWithConstExprTag { ConstructWithConstExpr };
141	template<unsigned characterCount> constexpr StringImplShape(unsigned refCount, unsigned length, const char (&characters)[characterCount], unsigned hashAndFlags, ConstructWithConstExprTag);
142	template<unsigned characterCount> constexpr StringImplShape(unsigned refCount, unsigned length, const char16_t (&characters)[characterCount], unsigned hashAndFlags, ConstructWithConstExprTag);
143
144	unsigned m_refCount;
145	unsigned m_length;
146	union {
147	const LChar* m_data8;
148	const UChar* m_data16;
149	// It seems that reinterpret_cast prevents constexpr's compile time initialization in VC++.
150	// These are needed to avoid reinterpret_cast.
151	const char* m_data8Char;
152	const char16_t* m_data16Char;
153	};
154	mutable unsigned m_hashAndFlags;
155	};
156
157	// FIXME: Use of StringImpl and const is rather confused.
158	// The actual string inside a StringImpl is immutable, so you can't modify a string using a StringImpl&.
159	// We could mark every member function const and always use "const StringImpl&" and "const StringImpl".*
160	// Or we could say that "const" doesn't make sense at all and use "StringImpl&" and "StringImpl" everywhere.*
161	// Right now we use a mix of both, which makes code more confusing and has no benefit.
162
163	class StringImpl : private StringImplShape {
164	WTF_MAKE_NONCOPYABLE(StringImpl); WTF_MAKE_FAST_ALLOCATED;
165
166	friend class AtomStringImpl;
167	friend class JSC::LLInt::Data;
168	friend class JSC::LLIntOffsetsExtractor;
169	friend class PrivateSymbolImpl;
170	friend class RegisteredSymbolImpl;
171	friend class SymbolImpl;
172	friend class ExternalStringImpl;
173
174	friend struct WTF::CStringTranslator;
175	friend struct WTF::HashAndUTF8CharactersTranslator;
176	friend struct WTF::LCharBufferTranslator;
177	friend struct WTF::SubstringTranslator;
178	friend struct WTF::UCharBufferTranslator;
179
180	template<typename> friend struct WTF::BufferFromStaticDataTranslator;
181	template<typename> friend struct WTF::HashAndCharactersTranslator;
182
183	public:
184	enum BufferOwnership { BufferInternal, BufferOwned, BufferSubstring, BufferExternal };
185
186	static constexpr unsigned MaxLength = StringImplShape::MaxLength;
187
188	// The bottom 6 bits in the hash are flags.
189	static constexpr const unsigned s_flagCount = `6`;
190	private:
191	static constexpr const unsigned s_flagMask = (`1u` << s_flagCount) - `1`;
192	static_assert(s_flagCount <= StringHasher::flagCount, "StringHasher reserves enough bits for StringImpl flags");
193	static constexpr const unsigned s_flagStringKindCount = `4`;
194
195	static constexpr const unsigned s_hashFlagStringKindIsAtom = `1u` << (s_flagStringKindCount);
196	static constexpr const unsigned s_hashFlagStringKindIsSymbol = `1u` << (s_flagStringKindCount + `1`);
197	static constexpr const unsigned s_hashMaskStringKind = s_hashFlagStringKindIsAtom \| s_hashFlagStringKindIsSymbol;
198	static constexpr const unsigned s_hashFlagDidReportCost = `1u` << `3`;
199	static constexpr const unsigned s_hashFlag8BitBuffer = `1u` << `2`;
200	static constexpr const unsigned s_hashMaskBufferOwnership = (`1u` << `0`) \| (`1u` << `1`);
201
202	enum StringKind {
203	StringNormal = `0u`, // non-symbol, non-atomic
204	StringAtom = s_hashFlagStringKindIsAtom, // non-symbol, atomic
205	StringSymbol = s_hashFlagStringKindIsSymbol, // symbol, non-atomic
206	};
207
208	// Create a normal 8-bit string with internal storage (BufferInternal).
209	enum Force8Bit { Force8BitConstructor };
210	StringImpl(unsigned length, Force8Bit);
211
212	// Create a normal 16-bit string with internal storage (BufferInternal).
213	explicit StringImpl(unsigned length);
214
215	// Create a StringImpl adopting ownership of the provided buffer (BufferOwned).
216	StringImpl(MallocPtr<LChar>, unsigned length);
217	StringImpl(MallocPtr<UChar>, unsigned length);
218	enum ConstructWithoutCopyingTag { ConstructWithoutCopying };
219	StringImpl(const UChar, unsigned* length, ConstructWithoutCopyingTag);
220	StringImpl(const LChar, unsigned* length, ConstructWithoutCopyingTag);
221
222	// Used to create new strings that are a substring of an existing StringImpl (BufferSubstring).
223	StringImpl(const LChar, unsigned* length, Ref<StringImpl>&&);
224	StringImpl(const UChar, unsigned* length, Ref<StringImpl>&&);
225
226	public:
227	WTF_EXPORT_PRIVATE static void destroy(StringImpl*);
228
229	WTF_EXPORT_PRIVATE static Ref<StringImpl> create(const UChar, unsigned* length);
230	WTF_EXPORT_PRIVATE static Ref<StringImpl> create(const LChar, unsigned* length);
231	WTF_EXPORT_PRIVATE static Ref<StringImpl> create8BitIfPossible(const UChar, unsigned* length);
232	template<size_t inlineCapacity> static Ref<StringImpl> create8BitIfPossible(const Vector<UChar, inlineCapacity>&);
233	WTF_EXPORT_PRIVATE static Ref<StringImpl> create8BitIfPossible(const UChar*);
234
235	ALWAYS_INLINE static Ref<StringImpl> create(const char* characters, unsigned length) { return create(reinterpret_cast<const LChar*>(characters), length); }
236	WTF_EXPORT_PRIVATE static Ref<StringImpl> create(const LChar*);
237	ALWAYS_INLINE static Ref<StringImpl> create(const char* string) { return create(reinterpret_cast<const LChar*>(string)); }
238
239	static Ref<StringImpl> createSubstringSharingImpl(StringImpl&, unsigned offset, unsigned length);
240
241	template<unsigned characterCount> static Ref<StringImpl> createFromLiteral(const char (&)[characterCount]);
242
243	// FIXME: Replace calls to these overloads of createFromLiteral to createWithoutCopying instead.
244	WTF_EXPORT_PRIVATE static Ref<StringImpl> createFromLiteral(const char, unsigned* length);
245	WTF_EXPORT_PRIVATE static Ref<StringImpl> createFromLiteral(const char*);
246
247	WTF_EXPORT_PRIVATE static Ref<StringImpl> createWithoutCopying(const UChar, unsigned* length);
248	WTF_EXPORT_PRIVATE static Ref<StringImpl> createWithoutCopying(const LChar, unsigned* length);
249	WTF_EXPORT_PRIVATE static Ref<StringImpl> createUninitialized(unsigned length, LChar*&);
250	WTF_EXPORT_PRIVATE static Ref<StringImpl> createUninitialized(unsigned length, UChar*&);
251	template<typename CharacterType> static RefPtr<StringImpl> tryCreateUninitialized(unsigned length, CharacterType*&);
252
253	// Reallocate the StringImpl. The originalString must be only owned by the Ref,
254	// and the buffer ownership must be BufferInternal. Just like the input pointer of realloc(),
255	// the originalString can't be used after this function.
256	static Ref<StringImpl> reallocate(Ref<StringImpl>&& originalString, unsigned length, LChar*& data);
257	static Ref<StringImpl> reallocate(Ref<StringImpl>&& originalString, unsigned length, UChar*& data);
258	static Expected<Ref<StringImpl>, UTF8ConversionError> tryReallocate(Ref<StringImpl>&& originalString, unsigned length, LChar*& data);
259	static Expected<Ref<StringImpl>, UTF8ConversionError> tryReallocate(Ref<StringImpl>&& originalString, unsigned length, UChar*& data);
260
261	static unsigned flagsOffset() { return OBJECT_OFFSETOF(StringImpl, m_hashAndFlags); }
262	static constexpr unsigned flagIs8Bit() { return s_hashFlag8BitBuffer; }
263	static constexpr unsigned flagIsAtom() { return s_hashFlagStringKindIsAtom; }
264	static constexpr unsigned flagIsSymbol() { return s_hashFlagStringKindIsSymbol; }
265	static constexpr unsigned maskStringKind() { return s_hashMaskStringKind; }
266	static unsigned dataOffset() { return OBJECT_OFFSETOF(StringImpl, m_data8); }
267
268	template<typename CharacterType, size_t inlineCapacity, typename OverflowHandler, size_t minCapacity>
269	static Ref<StringImpl> adopt(Vector<CharacterType, inlineCapacity, OverflowHandler, minCapacity>&&);
270
271	WTF_EXPORT_PRIVATE static Ref<StringImpl> adopt(StringBuffer<UChar>&&);
272	WTF_EXPORT_PRIVATE static Ref<StringImpl> adopt(StringBuffer<LChar>&&);
273
274	unsigned length() const { return m_length; }
275	static ptrdiff_t lengthMemoryOffset() { return OBJECT_OFFSETOF(StringImpl, m_length); }
276	bool isEmpty() const { return !m_length; }
277
278	bool is8Bit() const { return m_hashAndFlags & s_hashFlag8BitBuffer; }
279	ALWAYS_INLINE const LChar* characters8() const { ASSERT(is8Bit()); return m_data8; }
280	ALWAYS_INLINE const UChar* characters16() const { ASSERT(!is8Bit()); return m_data16; }
281
282	template<typename CharacterType> const CharacterType* characters() const;
283
284	size_t cost() const;
285	size_t costDuringGC();
286
287	WTF_EXPORT_PRIVATE size_t sizeInBytes() const;
288
289	bool isSymbol() const { return m_hashAndFlags & s_hashFlagStringKindIsSymbol; }
290	bool isAtom() const { return m_hashAndFlags & s_hashFlagStringKindIsAtom; }
291	void setIsAtom(bool);
292
293	bool isExternal() const { return bufferOwnership() == BufferExternal; }
294
295	bool isSubString() const { return bufferOwnership() == BufferSubstring; }
296
297	static WTF_EXPORT_PRIVATE Expected<CString, UTF8ConversionError> utf8ForCharacters(const LChar* characters, unsigned length);
298	static WTF_EXPORT_PRIVATE Expected<CString, UTF8ConversionError> utf8ForCharacters(const UChar* characters, unsigned length, ConversionMode = LenientConversion);
299
300	WTF_EXPORT_PRIVATE Expected<CString, UTF8ConversionError> tryGetUtf8ForRange(unsigned offset, unsigned length, ConversionMode = LenientConversion) const;
301	WTF_EXPORT_PRIVATE Expected<CString, UTF8ConversionError> tryGetUtf8(ConversionMode = LenientConversion) const;
302	WTF_EXPORT_PRIVATE CString utf8(ConversionMode = LenientConversion) const;
303
304	private:
305	static WTF_EXPORT_PRIVATE UTF8ConversionError utf8Impl(const UChar* characters, unsigned length, char*& buffer, size_t bufferSize, ConversionMode);
306
307	// The high bits of 'hash' are always empty, but we prefer to store our flags
308	// in the low bits because it makes them slightly more efficient to access.
309	// So, we shift left and right when setting and getting our hash code.
310	void setHash(unsigned) const;
311
312	unsigned rawHash() const { return m_hashAndFlags >> s_flagCount; }
313
314	public:
315	bool hasHash() const { return !!rawHash(); }
316
317	unsigned existingHash() const { ASSERT(hasHash()); return rawHash(); }
318	unsigned hash() const { return hasHash() ? rawHash() : hashSlowCase(); }
319
320	WTF_EXPORT_PRIVATE unsigned concurrentHash() const;
321
322	unsigned symbolAwareHash() const;
323	unsigned existingSymbolAwareHash() const;
324
325	bool isStatic() const { return m_refCount & s_refCountFlagIsStaticString; }
326
327	size_t refCount() const { return m_refCount / s_refCountIncrement; }
328	bool hasOneRef() const { return m_refCount == s_refCountIncrement; }
329	bool hasAtLeastOneRef() const { return m_refCount; } // For assertions.
330
331	void ref();
332	void deref();
333
334	class StaticStringImpl : private StringImplShape {
335	WTF_MAKE_NONCOPYABLE(StaticStringImpl);
336	public:
337	// Used to construct static strings, which have an special refCount that can never hit zero.
338	// This means that the static string will never be destroyed, which is important because
339	// static strings will be shared across threads & ref-counted in a non-threadsafe manner.
340	//
341	// In order to make StaticStringImpl thread safe, we also need to ensure that the rest of
342	// the fields are never mutated by threads. We have this guarantee because:
343	//
344	// 1. m_length is only set on construction and never mutated thereafter.
345	//
346	// 2. m_data8 and m_data16 are only set on construction and never mutated thereafter.
347	// We also know that a StringImpl never changes from 8 bit to 16 bit because there
348	// is no way to set/clear the s_hashFlag8BitBuffer flag other than at construction.
349	//
350	// 3. m_hashAndFlags will not be mutated by different threads because:
351	//
352	// a. StaticStringImpl's constructor sets the s_hashFlagDidReportCost flag to ensure
353	// that StringImpl::cost() returns early.
354	// This means StaticStringImpl costs are not counted. But since there should only
355	// be a finite set of StaticStringImpls, their cost can be aggregated into a single
356	// system cost if needed.
357	// b. setIsAtom() is never called on a StaticStringImpl.
358	// setIsAtom() asserts !isStatic().
359	// c. setHash() is never called on a StaticStringImpl.
360	// StaticStringImpl's constructor sets the hash on construction.
361	// StringImpl::hash() only sets a new hash iff !hasHash().
362	// Additionally, StringImpl::setHash() asserts hasHash() and !isStatic().
363
364	template<unsigned characterCount> constexpr StaticStringImpl(const char (&characters)[characterCount], StringKind = StringNormal);
365	template<unsigned characterCount> constexpr StaticStringImpl(const char16_t (&characters)[characterCount], StringKind = StringNormal);
366	operator StringImpl&();
367	};
368
369	WTF_EXPORT_PRIVATE static StaticStringImpl s_emptyAtomString;
370	ALWAYS_INLINE static StringImpl* empty() { return reinterpret_cast<StringImpl*>(&s_emptyAtomString); }
371
372	// FIXME: Does this really belong in StringImpl?
373	template<typename CharacterType> static void copyCharacters(CharacterType* destination, const CharacterType* source, unsigned numCharacters);
374	static void copyCharacters(UChar* destination, const LChar* source, unsigned numCharacters);
375
376	// Some string features, like reference counting and the atomicity flag, are not
377	// thread-safe. We achieve thread safety by isolation, giving each thread
378	// its own copy of the string.
379	Ref<StringImpl> isolatedCopy() const;
380
381	WTF_EXPORT_PRIVATE Ref<StringImpl> substring(unsigned position, unsigned length = MaxLength);
382
383	UChar at(unsigned) const;
384	UChar operator[](unsigned i) const { return at(i); }
385	WTF_EXPORT_PRIVATE UChar32 characterStartingAt(unsigned);
386
387	int toIntStrict(bool* ok = `0`, int base = `10`);
388	unsigned toUIntStrict(bool* ok = `0`, int base = `10`);
389	int64_t toInt64Strict(bool* ok = `0`, int base = `10`);
390	uint64_t toUInt64Strict(bool* ok = `0`, int base = `10`);
391	intptr_t toIntPtrStrict(bool* ok = `0`, int base = `10`);
392
393	WTF_EXPORT_PRIVATE int toInt(bool* ok = `0`); // ignores trailing garbage
394	unsigned toUInt(bool* ok = `0`); // ignores trailing garbage
395	int64_t toInt64(bool* ok = `0`); // ignores trailing garbage
396	uint64_t toUInt64(bool* ok = `0`); // ignores trailing garbage
397	intptr_t toIntPtr(bool* ok = `0`); // ignores trailing garbage
398
399	// FIXME: Like the strict functions above, these give false for "ok" when there is trailing garbage.
400	// Like the non-strict functions above, these return the value when there is trailing garbage.
401	// It would be better if these were more consistent with the above functions instead.
402	double toDouble(bool* ok = `0`);
403	float toFloat(bool* ok = `0`);
404
405	WTF_EXPORT_PRIVATE Ref<StringImpl> convertToASCIILowercase();
406	WTF_EXPORT_PRIVATE Ref<StringImpl> convertToASCIIUppercase();
407	WTF_EXPORT_PRIVATE Ref<StringImpl> convertToLowercaseWithoutLocale();
408	WTF_EXPORT_PRIVATE Ref<StringImpl> convertToLowercaseWithoutLocaleStartingAtFailingIndex8Bit(unsigned);
409	WTF_EXPORT_PRIVATE Ref<StringImpl> convertToUppercaseWithoutLocale();
410	WTF_EXPORT_PRIVATE Ref<StringImpl> convertToLowercaseWithLocale(const AtomString& localeIdentifier);
411	WTF_EXPORT_PRIVATE Ref<StringImpl> convertToUppercaseWithLocale(const AtomString& localeIdentifier);
412
413	Ref<StringImpl> foldCase();
414
415	Ref<StringImpl> stripWhiteSpace();
416	WTF_EXPORT_PRIVATE Ref<StringImpl> simplifyWhiteSpace();
417	Ref<StringImpl> simplifyWhiteSpace(CodeUnitMatchFunction);
418
419	Ref<StringImpl> stripLeadingAndTrailingCharacters(CodeUnitMatchFunction);
420	Ref<StringImpl> removeCharacters(CodeUnitMatchFunction);
421
422	bool isAllASCII() const;
423	bool isAllLatin1() const;
424	template<bool isSpecialCharacter(UChar)> bool isAllSpecialCharacters() const;
425
426	size_t find(LChar character, unsigned start = `0`);
427	size_t find(char character, unsigned start = `0`);
428	size_t find(UChar character, unsigned start = `0`);
429	WTF_EXPORT_PRIVATE size_t find(CodeUnitMatchFunction, unsigned index = `0`);
430	size_t find(const LChar, unsigned* index = `0`);
431	ALWAYS_INLINE size_t find(const char* string, unsigned index = `0`) { return find(reinterpret_cast<const LChar*>(string), index); }
432	WTF_EXPORT_PRIVATE size_t find(StringImpl*);
433	WTF_EXPORT_PRIVATE size_t find(StringImpl, unsigned* index);
434	WTF_EXPORT_PRIVATE size_t findIgnoringASCIICase(const StringImpl&) const;
435	WTF_EXPORT_PRIVATE size_t findIgnoringASCIICase(const StringImpl&, unsigned startOffset) const;
436	WTF_EXPORT_PRIVATE size_t findIgnoringASCIICase(const StringImpl) const*;
437	WTF_EXPORT_PRIVATE size_t findIgnoringASCIICase(const StringImpl, unsigned* startOffset) const;
438
439	WTF_EXPORT_PRIVATE size_t reverseFind(UChar, unsigned index = MaxLength);
440	WTF_EXPORT_PRIVATE size_t reverseFind(StringImpl, unsigned* index = MaxLength);
441
442	WTF_EXPORT_PRIVATE bool startsWith(const StringImpl) const*;
443	WTF_EXPORT_PRIVATE bool startsWith(const StringImpl&) const;
444	WTF_EXPORT_PRIVATE bool startsWithIgnoringASCIICase(const StringImpl) const*;
445	WTF_EXPORT_PRIVATE bool startsWithIgnoringASCIICase(const StringImpl&) const;
446	WTF_EXPORT_PRIVATE bool startsWith(UChar) const;
447	WTF_EXPORT_PRIVATE bool startsWith(const char, unsigned* matchLength) const;
448	template<unsigned matchLength> bool startsWith(const char (&prefix)[matchLength]) const { return startsWith(prefix, matchLength - `1`); }
449	WTF_EXPORT_PRIVATE bool hasInfixStartingAt(const StringImpl&, unsigned startOffset) const;
450
451	WTF_EXPORT_PRIVATE bool endsWith(StringImpl*);
452	WTF_EXPORT_PRIVATE bool endsWith(StringImpl&);
453	WTF_EXPORT_PRIVATE bool endsWithIgnoringASCIICase(const StringImpl) const*;
454	WTF_EXPORT_PRIVATE bool endsWithIgnoringASCIICase(const StringImpl&) const;
455	WTF_EXPORT_PRIVATE bool endsWith(UChar) const;
456	WTF_EXPORT_PRIVATE bool endsWith(const char, unsigned* matchLength) const;
457	template<unsigned matchLength> bool endsWith(const char (&prefix)[matchLength]) const { return endsWith(prefix, matchLength - `1`); }
458	WTF_EXPORT_PRIVATE bool hasInfixEndingAt(const StringImpl&, unsigned endOffset) const;
459
460	WTF_EXPORT_PRIVATE Ref<StringImpl> replace(UChar, UChar);
461	WTF_EXPORT_PRIVATE Ref<StringImpl> replace(UChar, StringImpl*);
462	ALWAYS_INLINE Ref<StringImpl> replace(UChar pattern, const char* replacement, unsigned replacementLength) { return replace(pattern, reinterpret_cast<const LChar*>(replacement), replacementLength); }
463	WTF_EXPORT_PRIVATE Ref<StringImpl> replace(UChar, const LChar, unsigned* replacementLength);
464	Ref<StringImpl> replace(UChar, const UChar, unsigned* replacementLength);
465	WTF_EXPORT_PRIVATE Ref<StringImpl> replace(StringImpl, StringImpl);
466	WTF_EXPORT_PRIVATE Ref<StringImpl> replace(unsigned index, unsigned length, StringImpl*);
467
468	WTF_EXPORT_PRIVATE UCharDirection defaultWritingDirection(bool* hasStrongDirectionality = nullptr);
469
470	#if USE(CF)
471	RetainPtr<CFStringRef> createCFString();
472	#endif
473
474	#ifdef __OBJC__
475	WTF_EXPORT_PRIVATE operator NSString *();
476	#endif
477
478	#if STRING_STATS
479	ALWAYS_INLINE static StringStats& stringStats() { return m_stringStats; }
480	#endif
481
482	BufferOwnership bufferOwnership() const { return static_cast<BufferOwnership>(m_hashAndFlags & s_hashMaskBufferOwnership); }
483
484	template<typename T> static size_t headerSize() { return tailOffset<T>(); }
485
486	protected:
487	~StringImpl();
488
489	// Used to create new symbol string that holds an existing [[Description]] string as a substring buffer (BufferSubstring).
490	enum CreateSymbolTag { CreateSymbol };
491	StringImpl(CreateSymbolTag, const LChar, unsigned* length);
492	StringImpl(CreateSymbolTag, const UChar, unsigned* length);
493
494	// Null symbol.
495	explicit StringImpl(CreateSymbolTag);
496
497	private:
498	template<typename> static size_t allocationSize(Checked<size_t> tailElementCount);
499	template<typename> static size_t maxInternalLength();
500	template<typename> static size_t tailOffset();
501
502	bool requiresCopy() const;
503	template<typename T> const T* tailPointer() const;
504	template<typename T> T* tailPointer();
505	StringImpl* const& substringBuffer() const;
506	StringImpl*& substringBuffer();
507
508	enum class CaseConvertType { Upper, Lower };
509	template<CaseConvertType, typename CharacterType> static Ref<StringImpl> convertASCIICase(StringImpl&, const CharacterType, unsigned*);
510
511	template<class CodeUnitPredicate> Ref<StringImpl> stripMatchedCharacters(CodeUnitPredicate);
512	template<typename CharacterType> ALWAYS_INLINE Ref<StringImpl> removeCharacters(const CharacterType* characters, CodeUnitMatchFunction);
513	template<typename CharacterType, class CodeUnitPredicate> Ref<StringImpl> simplifyMatchedCharactersToSpace(CodeUnitPredicate);
514	template<typename CharacterType> static Ref<StringImpl> constructInternal(StringImpl&, unsigned);
515	template<typename CharacterType> static Ref<StringImpl> createUninitializedInternal(unsigned, CharacterType*&);
516	template<typename CharacterType> static Ref<StringImpl> createUninitializedInternalNonEmpty(unsigned, CharacterType*&);
517	template<typename CharacterType> static Expected<Ref<StringImpl>, UTF8ConversionError> reallocateInternal(Ref<StringImpl>&&, unsigned, CharacterType*&);
518	template<typename CharacterType> static Ref<StringImpl> createInternal(const CharacterType, unsigned*);
519	WTF_EXPORT_PRIVATE NEVER_INLINE unsigned hashSlowCase() const;
520
521	// The bottom bit in the ref count indicates a static (immortal) string.
522	static constexpr unsigned s_refCountFlagIsStaticString = `0x1`;
523	static constexpr unsigned s_refCountIncrement = `0x2`; // This allows us to ref / deref without disturbing the static string flag.
524
525	#if STRING_STATS
526	WTF_EXPORT_PRIVATE static StringStats m_stringStats;
527	#endif
528
529	public:
530	void assertHashIsCorrect() const;
531	};
532
533	using StaticStringImpl = StringImpl::StaticStringImpl;
534
535	static_assert(sizeof(StringImpl) == sizeof(StaticStringImpl), "");
536
537	#if !ASSERT_DISABLED
538
539	// StringImpls created from StaticStringImpl will ASSERT in the generic ValueCheck<T>::checkConsistency
540	// as they are not allocated by fastMalloc. We don't currently have any way to detect that case
541	// so we ignore the consistency check for all StringImpl.*
542	template<> struct ValueCheck<StringImpl*> {
543	static void checkConsistency(const StringImpl*) { }
544	};
545
546	#endif
547
548	WTF_EXPORT_PRIVATE bool equal(const StringImpl, const* StringImpl*);
549	WTF_EXPORT_PRIVATE bool equal(const StringImpl, const* LChar*);
550	inline bool equal(const StringImpl* a, const char* b) { return equal(a, reinterpret_cast<const LChar*>(b)); }
551	WTF_EXPORT_PRIVATE bool equal(const StringImpl, const* LChar, unsigned*);
552	WTF_EXPORT_PRIVATE bool equal(const StringImpl, const* UChar, unsigned*);
553	inline bool equal(const StringImpl* a, const char* b, unsigned length) { return equal(a, reinterpret_cast<const LChar*>(b), length); }
554	inline bool equal(const LChar* a, StringImpl* b) { return equal(b, a); }
555	inline bool equal(const char* a, StringImpl* b) { return equal(b, reinterpret_cast<const LChar*>(a)); }
556	WTF_EXPORT_PRIVATE bool equal(const StringImpl& a, const StringImpl& b);
557
558	WTF_EXPORT_PRIVATE bool equalIgnoringNullity(StringImpl, StringImpl);
559	WTF_EXPORT_PRIVATE bool equalIgnoringNullity(const UChar, size_t length, StringImpl);
560
561	bool equalIgnoringASCIICase(const StringImpl&, const StringImpl&);
562	WTF_EXPORT_PRIVATE bool equalIgnoringASCIICase(const StringImpl, const* StringImpl*);
563	bool equalIgnoringASCIICase(const StringImpl&, const char*);
564	bool equalIgnoringASCIICase(const StringImpl, const* char*);
565
566	WTF_EXPORT_PRIVATE bool equalIgnoringASCIICaseNonNull(const StringImpl, const* StringImpl*);
567
568	template<unsigned length> bool equalLettersIgnoringASCIICase(const StringImpl&, const char (&lowercaseLetters)[length]);
569	template<unsigned length> bool equalLettersIgnoringASCIICase(const StringImpl, const* char (&lowercaseLetters)[length]);
570
571	size_t find(const LChar, unsigned* length, CodeUnitMatchFunction, unsigned index = `0`);
572	size_t find(const UChar, unsigned* length, CodeUnitMatchFunction, unsigned index = `0`);
573
574	template<typename CharacterType> size_t reverseFindLineTerminator(const CharacterType, unsigned* length, unsigned index = StringImpl::MaxLength);
575	template<typename CharacterType> size_t reverseFind(const CharacterType, unsigned* length, CharacterType matchCharacter, unsigned index = StringImpl::MaxLength);
576	size_t reverseFind(const UChar, unsigned* length, LChar matchCharacter, unsigned index = StringImpl::MaxLength);
577	size_t reverseFind(const LChar, unsigned* length, UChar matchCharacter, unsigned index = StringImpl::MaxLength);
578
579	template<size_t inlineCapacity> bool equalIgnoringNullity(const Vector<UChar, inlineCapacity>&, StringImpl*);
580
581	template<typename CharacterType1, typename CharacterType2> int codePointCompare(const CharacterType1, unsigned* length1, const CharacterType2, unsigned* length2);
582	int codePointCompare(const StringImpl, const* StringImpl*);
583
584	// FIXME: Should rename this to make clear it uses the Unicode definition of whitespace.
585	// Most WebKit callers don't want that would use isASCIISpace or isHTMLSpace instead.
586	bool isSpaceOrNewline(UChar32);
587
588	template<typename CharacterType> unsigned lengthOfNullTerminatedString(const CharacterType*);
589
590	// StringHash is the default hash for StringImpl and RefPtr<StringImpl>*
591	template<typename T> struct DefaultHash;
592	template<> struct DefaultHash<StringImpl*> {
593	typedef StringHash Hash;
594	};
595	template<> struct DefaultHash<RefPtr<StringImpl>> {
596	typedef StringHash Hash;
597	};
598
599	#define MAKE_STATIC_STRING_IMPL(characters) ([] { \
600	static StaticStringImpl impl(characters); \
601	return &impl; \
602	}())
603
604	template<> ALWAYS_INLINE Ref<StringImpl> StringImpl::constructInternal<LChar>(StringImpl& string, unsigned length)
605	{
606	return adoptRef(*new (NotNull, &string) StringImpl { length, Force8BitConstructor });
607	}
608
609	template<> ALWAYS_INLINE Ref<StringImpl> StringImpl::constructInternal<UChar>(StringImpl& string, unsigned length)
610	{
611	return adoptRef(*new (NotNull, &string) StringImpl { length });
612	}
613
614	template<> ALWAYS_INLINE const LChar* StringImpl::characters<LChar>() const
615	{
616	return characters8();
617	}
618
619	template<> ALWAYS_INLINE const UChar* StringImpl::characters<UChar>() const
620	{
621	return characters16();
622	}
623
624	inline size_t find(const LChar* characters, unsigned length, CodeUnitMatchFunction matchFunction, unsigned index)
625	{
626	while (index < length) {
627	if (matchFunction(characters[index]))
628	return index;
629	++index;
630	}
631	return notFound;
632	}
633
634	inline size_t find(const UChar* characters, unsigned length, CodeUnitMatchFunction matchFunction, unsigned index)
635	{
636	while (index < length) {
637	if (matchFunction(characters[index]))
638	return index;
639	++index;
640	}
641	return notFound;
642	}
643
644	template<typename CharacterType> inline size_t reverseFindLineTerminator(const CharacterType* characters, unsigned length, unsigned index)
645	{
646	if (!length)
647	return notFound;
648	if (index >= length)
649	index = length - `1`;
650	auto character = characters[index];
651	while (character != `'\n'` && character != `'\r'`) {
652	if (!index--)
653	return notFound;
654	character = characters[index];
655	}
656	return index;
657	}
658
659	template<typename CharacterType> inline size_t reverseFind(const CharacterType* characters, unsigned length, CharacterType matchCharacter, unsigned index)
660	{
661	if (!length)
662	return notFound;
663	if (index >= length)
664	index = length - `1`;
665	while (characters[index] != matchCharacter) {
666	if (!index--)
667	return notFound;
668	}
669	return index;
670	}
671
672	ALWAYS_INLINE size_t reverseFind(const UChar* characters, unsigned length, LChar matchCharacter, unsigned index)
673	{
674	return reverseFind(characters, length, static_cast<UChar>(matchCharacter), index);
675	}
676
677	inline size_t reverseFind(const LChar* characters, unsigned length, UChar matchCharacter, unsigned index)
678	{
679	if (!isLatin1(matchCharacter))
680	return notFound;
681	return reverseFind(characters, length, static_cast<LChar>(matchCharacter), index);
682	}
683
684	inline size_t StringImpl::find(LChar character, unsigned start)
685	{
686	if (is8Bit())
687	return WTF::find(characters8(), m_length, character, start);
688	return WTF::find(characters16(), m_length, character, start);
689	}
690
691	ALWAYS_INLINE size_t StringImpl::find(char character, unsigned start)
692	{
693	return find(static_cast<LChar>(character), start);
694	}
695
696	inline size_t StringImpl::find(UChar character, unsigned start)
697	{
698	if (is8Bit())
699	return WTF::find(characters8(), m_length, character, start);
700	return WTF::find(characters16(), m_length, character, start);
701	}
702
703	template<size_t inlineCapacity> inline bool equalIgnoringNullity(const Vector<UChar, inlineCapacity>& a, StringImpl* b)
704	{
705	return equalIgnoringNullity(a.data(), a.size(), b);
706	}
707
708	template<typename CharacterType1, typename CharacterType2> inline int codePointCompare(const CharacterType1* characters1, unsigned length1, const CharacterType2* characters2, unsigned length2)
709	{
710	unsigned commonLength = std::min(length1, length2);
711
712	unsigned position = `0`;
713	while (position < commonLength && characters1 == characters2) {
714	++characters1;
715	++characters2;
716	++position;
717	}
718
719	if (position < commonLength)
720	return (characters1[`0`] > characters2[`0`]) ? `1` : -`1`;
721
722	if (length1 == length2)
723	return `0`;
724	return (length1 > length2) ? `1` : -`1`;
725	}
726
727	inline int codePointCompare(const StringImpl* string1, const StringImpl* string2)
728	{
729	// FIXME: Should null strings compare as less than empty strings rather than equal to them?
730	if (!string1)
731	return (string2 && string2->length()) ? -`1` : `0`;
732	if (!string2)
733	return string1->length() ? `1` : `0`;
734
735	bool string1Is8Bit = string1->is8Bit();
736	bool string2Is8Bit = string2->is8Bit();
737	if (string1Is8Bit) {
738	if (string2Is8Bit)
739	return codePointCompare(string1->characters8(), string1->length(), string2->characters8(), string2->length());
740	return codePointCompare(string1->characters8(), string1->length(), string2->characters16(), string2->length());
741	}
742	if (string2Is8Bit)
743	return codePointCompare(string1->characters16(), string1->length(), string2->characters8(), string2->length());
744	return codePointCompare(string1->characters16(), string1->length(), string2->characters16(), string2->length());
745	}
746
747	inline bool isSpaceOrNewline(UChar32 character)
748	{
749	// Use isASCIISpace() for all Latin-1 characters. This will include newlines, which aren't included in Unicode DirWS.
750	return isLatin1(character) ? isASCIISpace(character) : u_charDirection(character) == U_WHITE_SPACE_NEUTRAL;
751	}
752
753	template<typename CharacterType> inline unsigned lengthOfNullTerminatedString(const CharacterType* string)
754	{
755	ASSERT(string);
756	size_t length = `0`;
757	while (string[length])
758	++length;
759
760	RELEASE_ASSERT(length < StringImpl::MaxLength);
761	return static_cast<unsigned>(length);
762	}
763
764	inline StringImplShape::StringImplShape(unsigned refCount, unsigned length, const LChar* data8, unsigned hashAndFlags)
765	: m_refCount(refCount)
766	, m_length(length)
767	, m_data8(data8)
768	, m_hashAndFlags(hashAndFlags)
769	{
770	}
771
772	inline StringImplShape::StringImplShape(unsigned refCount, unsigned length, const UChar* data16, unsigned hashAndFlags)
773	: m_refCount(refCount)
774	, m_length(length)
775	, m_data16(data16)
776	, m_hashAndFlags(hashAndFlags)
777	{
778	}
779
780	template<unsigned characterCount> constexpr StringImplShape::StringImplShape(unsigned refCount, unsigned length, const char (&characters)[characterCount], unsigned hashAndFlags, ConstructWithConstExprTag)
781	: m_refCount(refCount)
782	, m_length(length)
783	, m_data8Char(characters)
784	, m_hashAndFlags(hashAndFlags)
785	{
786	}
787
788	template<unsigned characterCount> constexpr StringImplShape::StringImplShape(unsigned refCount, unsigned length, const char16_t (&characters)[characterCount], unsigned hashAndFlags, ConstructWithConstExprTag)
789	: m_refCount(refCount)
790	, m_length(length)
791	, m_data16Char(characters)
792	, m_hashAndFlags(hashAndFlags)
793	{
794	}
795
796	inline Ref<StringImpl> StringImpl::isolatedCopy() const
797	{
798	if (!requiresCopy()) {
799	if (is8Bit())
800	return StringImpl::createWithoutCopying(m_data8, m_length);
801	return StringImpl::createWithoutCopying(m_data16, m_length);
802	}
803
804	if (is8Bit())
805	return create(m_data8, m_length);
806	return create(m_data16, m_length);
807	}
808
809	inline bool StringImpl::isAllASCII() const
810	{
811	if (is8Bit())
812	return charactersAreAllASCII(characters8(), length());
813	return charactersAreAllASCII(characters16(), length());
814	}
815
816	inline bool StringImpl::isAllLatin1() const
817	{
818	if (is8Bit())
819	return true;
820	auto* characters = characters16();
821	UChar ored = `0`;
822	for (size_t i = `0`; i < length(); ++i)
823	ored \|= characters[i];
824	return !(ored & `0xFF00`);
825	}
826
827	template<bool isSpecialCharacter(UChar), typename CharacterType> inline bool isAllSpecialCharacters(const CharacterType* characters, size_t length)
828	{
829	for (size_t i = `0`; i < length; ++i) {
830	if (!isSpecialCharacter(characters[i]))
831	return false;
832	}
833	return true;
834	}
835
836	template<bool isSpecialCharacter(UChar)> inline bool StringImpl::isAllSpecialCharacters() const
837	{
838	if (is8Bit())
839	return WTF::isAllSpecialCharacters<isSpecialCharacter>(characters8(), length());
840	return WTF::isAllSpecialCharacters<isSpecialCharacter>(characters16(), length());
841	}
842
843	inline StringImpl::StringImpl(unsigned length, Force8Bit)
844	: StringImplShape (s_refCountIncrement, length, tailPointer<LChar>(), s_hashFlag8BitBuffer \| StringNormal \| BufferInternal)
845	{
846	ASSERT(m_data8);
847	ASSERT(m_length);
848
849	STRING_STATS_ADD_8BIT_STRING(m_length);
850	}
851
852	inline StringImpl::StringImpl(unsigned length)
853	: StringImplShape (s_refCountIncrement, length, tailPointer<UChar>(), StringNormal \| BufferInternal)
854	{
855	ASSERT(m_data16);
856	ASSERT(m_length);
857
858	STRING_STATS_ADD_16BIT_STRING(m_length);
859	}
860
861	inline StringImpl::StringImpl(MallocPtr<LChar> characters, unsigned length)
862	: StringImplShape (s_refCountIncrement, length, characters.leakPtr(), s_hashFlag8BitBuffer \| StringNormal \| BufferOwned)
863	{
864	ASSERT(m_data8);
865	ASSERT(m_length);
866
867	STRING_STATS_ADD_8BIT_STRING(m_length);
868	}
869
870	inline StringImpl::StringImpl(const UChar* characters, unsigned length, ConstructWithoutCopyingTag)
871	: StringImplShape (s_refCountIncrement, length, characters, StringNormal \| BufferInternal)
872	{
873	ASSERT(m_data16);
874	ASSERT(m_length);
875
876	STRING_STATS_ADD_16BIT_STRING(m_length);
877	}
878
879	inline StringImpl::StringImpl(const LChar* characters, unsigned length, ConstructWithoutCopyingTag)
880	: StringImplShape (s_refCountIncrement, length, characters, s_hashFlag8BitBuffer \| StringNormal \| BufferInternal)
881	{
882	ASSERT(m_data8);
883	ASSERT(m_length);
884
885	STRING_STATS_ADD_8BIT_STRING(m_length);
886	}
887
888	inline StringImpl::StringImpl(MallocPtr<UChar> characters, unsigned length)
889	: StringImplShape (s_refCountIncrement, length, characters.leakPtr(), StringNormal \| BufferOwned)
890	{
891	ASSERT(m_data16);
892	ASSERT(m_length);
893
894	STRING_STATS_ADD_16BIT_STRING(m_length);
895	}
896
897	inline StringImpl::StringImpl(const LChar* characters, unsigned length, Ref<StringImpl>&& base)
898	: StringImplShape (s_refCountIncrement, length, characters, s_hashFlag8BitBuffer \| StringNormal \| BufferSubstring)
899	{
900	ASSERT(is8Bit());
901	ASSERT(m_data8);
902	ASSERT(m_length);
903	ASSERT(base ->bufferOwnership() != BufferSubstring);
904
905	substringBuffer() = &base.leakRef();
906
907	STRING_STATS_ADD_8BIT_STRING2(m_length, true);
908	}
909
910	inline StringImpl::StringImpl(const UChar* characters, unsigned length, Ref<StringImpl>&& base)
911	: StringImplShape (s_refCountIncrement, length, characters, StringNormal \| BufferSubstring)
912	{
913	ASSERT(!is8Bit());
914	ASSERT(m_data16);
915	ASSERT(m_length);
916	ASSERT(base ->bufferOwnership() != BufferSubstring);
917
918	substringBuffer() = &base.leakRef();
919
920	STRING_STATS_ADD_16BIT_STRING2(m_length, true);
921	}
922
923	template<size_t inlineCapacity> inline Ref<StringImpl> StringImpl::create8BitIfPossible(const Vector<UChar, inlineCapacity>& vector)
924	{
925	return create8BitIfPossible(vector.data(), vector.size());
926	}
927
928	ALWAYS_INLINE Ref<StringImpl> StringImpl::createSubstringSharingImpl(StringImpl& rep, unsigned offset, unsigned length)
929	{
930	ASSERT(length <= rep.length());
931
932	if (!length)
933	return *empty();
934
935	// Coyping the thing would save more memory sometimes, largely due to the size of pointer.
936	size_t substringSize = allocationSize<StringImpl*>(`1`);
937	if (rep.is8Bit()) {
938	if (substringSize >= allocationSize<LChar>(length))
939	return create(rep.m_data8 + offset, length);
940	} else {
941	if (substringSize >= allocationSize<UChar>(length))
942	return create(rep.m_data16 + offset, length);
943	}
944
945	auto* ownerRep = ((rep.bufferOwnership() == BufferSubstring) ? rep.substringBuffer() : &rep);
946
947	// We allocate a buffer that contains both the StringImpl struct as well as the pointer to the owner string.
948	auto* stringImpl = static_cast<StringImpl*>(fastMalloc(substringSize));
949	if (rep.is8Bit())
950	return adoptRef(*new (NotNull, stringImpl) StringImpl (rep.m_data8 + offset, length, *ownerRep));
951	return adoptRef(*new (NotNull, stringImpl) StringImpl (rep.m_data16 + offset, length, *ownerRep));
952	}
953
954	template<unsigned characterCount> ALWAYS_INLINE Ref<StringImpl> StringImpl::createFromLiteral(const char (&characters)[characterCount])
955	{
956	COMPILE_ASSERT(characterCount > `1`, StringImplFromLiteralNotEmpty);
957	COMPILE_ASSERT((characterCount - `1` <= ((unsigned(~`0`) - sizeof(StringImpl)) / sizeof(LChar))), StringImplFromLiteralCannotOverflow);
958
959	return createWithoutCopying(reinterpret_cast<const LChar*>(characters), characterCount - `1`);
960	}
961
962	template<typename CharacterType> ALWAYS_INLINE RefPtr<StringImpl> StringImpl::tryCreateUninitialized(unsigned length, CharacterType*& output)
963	{
964	if (!length) {
965	output = nullptr;
966	return empty();
967	}
968
969	if (length > maxInternalLength<CharacterType>()) {
970	output = nullptr;
971	return nullptr;
972	}
973	StringImpl* result;
974	if (!tryFastMalloc(allocationSize<CharacterType>(length)).getValue(result)) {
975	output = nullptr;
976	return nullptr;
977	}
978	output = result->tailPointer<CharacterType>();
979
980	return constructInternal<CharacterType>(*result, length);
981	}
982
983	template<typename CharacterType, size_t inlineCapacity, typename OverflowHandler, size_t minCapacity>
984	inline Ref<StringImpl> StringImpl::adopt(Vector<CharacterType, inlineCapacity, OverflowHandler, minCapacity>&& vector)
985	{
986	if (size_t size = vector.size()) {
987	ASSERT(vector.data());
988	if (size > MaxLength)
989	CRASH();
990	return adoptRef(*new StringImpl(vector.releaseBuffer(), size));
991	}
992	return *empty();
993	}
994
995	inline size_t StringImpl::cost() const
996	{
997	// For substrings, return the cost of the base string.
998	if (bufferOwnership() == BufferSubstring)
999	return substringBuffer()->cost();
1000
1001	// Note: we must not alter the m_hashAndFlags field in instances of StaticStringImpl.
1002	// We ensure this by pre-setting the s_hashFlagDidReportCost bit in all instances of
1003	// StaticStringImpl. As a result, StaticStringImpl instances will always return a cost of
1004	// 0 here and avoid modifying m_hashAndFlags.
1005	if (m_hashAndFlags & s_hashFlagDidReportCost)
1006	return `0`;
1007
1008	m_hashAndFlags \|= s_hashFlagDidReportCost;
1009	size_t result = m_length;
1010	if (!is8Bit())
1011	result <<= `1`;
1012	return result;
1013	}
1014
1015	inline size_t StringImpl::costDuringGC()
1016	{
1017	if (isStatic())
1018	return `0`;
1019
1020	if (bufferOwnership() == BufferSubstring)
1021	return divideRoundedUp(substringBuffer()->costDuringGC(), refCount());
1022
1023	size_t result = m_length;
1024	if (!is8Bit())
1025	result <<= `1`;
1026	return divideRoundedUp(result, refCount());
1027	}
1028
1029	inline void StringImpl::setIsAtom(bool isAtom)
1030	{
1031	ASSERT(!isStatic());
1032	ASSERT(!isSymbol());
1033	if (isAtom)
1034	m_hashAndFlags \|= s_hashFlagStringKindIsAtom;
1035	else
1036	m_hashAndFlags &= ~s_hashFlagStringKindIsAtom;
1037	}
1038
1039	inline void StringImpl::setHash(unsigned hash) const
1040	{
1041	// The high bits of 'hash' are always empty, but we prefer to store our flags
1042	// in the low bits because it makes them slightly more efficient to access.
1043	// So, we shift left and right when setting and getting our hash code.
1044
1045	ASSERT(!hasHash());
1046	ASSERT(!isStatic());
1047	// Multiple clients assume that StringHasher is the canonical string hash function.
1048	ASSERT(hash == (is8Bit() ? StringHasher::computeHashAndMaskTop8Bits(m_data8, m_length) : StringHasher::computeHashAndMaskTop8Bits(m_data16, m_length)));
1049	ASSERT(!(hash & (s_flagMask << (`8` * sizeof(hash) - s_flagCount)))); // Verify that enough high bits are empty.
1050
1051	hash <<= s_flagCount;
1052	ASSERT(!(hash & m_hashAndFlags)); // Verify that enough low bits are empty after shift.
1053	ASSERT(hash); // Verify that 0 is a valid sentinel hash value.
1054
1055	m_hashAndFlags \|= hash; // Store hash with flags in low bits.
1056	}
1057
1058	inline void StringImpl::ref()
1059	{
1060	STRING_STATS_REF_STRING(*this);
1061
1062	m_refCount += s_refCountIncrement;
1063	}
1064
1065	inline void StringImpl::deref()
1066	{
1067	STRING_STATS_DEREF_STRING(*this);
1068
1069	unsigned tempRefCount = m_refCount - s_refCountIncrement;
1070	if (!tempRefCount) {
1071	StringImpl::destroy(this);
1072	return;
1073	}
1074	m_refCount = tempRefCount;
1075	}
1076
1077	template<typename CharacterType> inline void StringImpl::copyCharacters(CharacterType* destination, const CharacterType* source, unsigned numCharacters)
1078	{
1079	if (numCharacters == `1`) {
1080	destination = source;
1081	return;
1082	}
1083	memcpy(destination, source, numCharacters * sizeof(CharacterType));
1084	}
1085
1086	ALWAYS_INLINE void StringImpl::copyCharacters(UChar* destination, const LChar* source, unsigned numCharacters)
1087	{
1088	for (unsigned i = `0`; i < numCharacters; ++i)
1089	destination[i] = source[i];
1090	}
1091
1092	inline UChar StringImpl::at(unsigned i) const
1093	{
1094	ASSERT_WITH_SECURITY_IMPLICATION(i < m_length);
1095	return is8Bit() ? m_data8[i] : m_data16[i];
1096	}
1097
1098	inline StringImpl::StringImpl(CreateSymbolTag, const LChar* characters, unsigned length)
1099	: StringImplShape (s_refCountIncrement, length, characters, s_hashFlag8BitBuffer \| StringSymbol \| BufferSubstring)
1100	{
1101	ASSERT(is8Bit());
1102	ASSERT(m_data8);
1103	STRING_STATS_ADD_8BIT_STRING2(m_length, true);
1104	}
1105
1106	inline StringImpl::StringImpl(CreateSymbolTag, const UChar* characters, unsigned length)
1107	: StringImplShape (s_refCountIncrement, length, characters, StringSymbol \| BufferSubstring)
1108	{
1109	ASSERT(!is8Bit());
1110	ASSERT(m_data16);
1111	STRING_STATS_ADD_16BIT_STRING2(m_length, true);
1112	}
1113
1114	inline StringImpl::StringImpl(CreateSymbolTag)
1115	: StringImplShape (s_refCountIncrement, `0`, empty()->characters8(), s_hashFlag8BitBuffer \| StringSymbol \| BufferSubstring)
1116	{
1117	ASSERT(is8Bit());
1118	ASSERT(m_data8);
1119	STRING_STATS_ADD_8BIT_STRING2(m_length, true);
1120	}
1121
1122	template<typename T> inline size_t StringImpl::allocationSize(Checked<size_t> tailElementCount)
1123	{
1124	return (tailOffset<T>() + tailElementCount * sizeof(T)).unsafeGet();
1125	}
1126
1127	template<typename CharacterType>
1128	inline size_t StringImpl::maxInternalLength()
1129	{
1130	// In order to not overflow the unsigned length, the check for (std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) is needed when sizeof(CharacterType) == 2.
1131	return std::min(static_cast<size_t>(MaxLength), (std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(CharacterType));
1132	}
1133
1134	template<typename T> inline size_t StringImpl::tailOffset()
1135	{
1136	#if COMPILER(MSVC)
1137	// MSVC doesn't support alignof yet.
1138	return roundUpToMultipleOf<sizeof(T)>(sizeof(StringImpl));
1139	#else
1140	return roundUpToMultipleOf<alignof(T)>(offsetof(StringImpl, m_hashAndFlags) + sizeof(StringImpl::m_hashAndFlags));
1141	#endif
1142	}
1143
1144	inline bool StringImpl::requiresCopy() const
1145	{
1146	if (bufferOwnership() != BufferInternal)
1147	return true;
1148
1149	if (is8Bit())
1150	return m_data8 == tailPointer<LChar>();
1151	return m_data16 == tailPointer<UChar>();
1152	}
1153
1154	template<typename T> inline const T* StringImpl::tailPointer() const
1155	{
1156	return reinterpret_cast_ptr<const T>(reinterpret_cast<const* uint8_t>(this*) + tailOffset<T>());
1157	}
1158
1159	template<typename T> inline T* StringImpl::tailPointer()
1160	{
1161	return reinterpret_cast_ptr<T>(reinterpret_cast<uint8_t>(this) + tailOffset<T>());
1162	}
1163
1164	inline StringImpl* const& StringImpl::substringBuffer() const
1165	{
1166	ASSERT(bufferOwnership() == BufferSubstring);
1167
1168	return tailPointer<StringImpl>();
1169	}
1170
1171	inline StringImpl*& StringImpl::substringBuffer()
1172	{
1173	ASSERT(bufferOwnership() == BufferSubstring);
1174
1175	return tailPointer<StringImpl>();
1176	}
1177
1178	inline void StringImpl::assertHashIsCorrect() const
1179	{
1180	ASSERT(existingHash() == StringHasher::computeHashAndMaskTop8Bits(characters8(), length()));
1181	}
1182
1183	template<unsigned characterCount> constexpr StringImpl::StaticStringImpl::StaticStringImpl(const char (&characters)[characterCount], StringKind stringKind)
1184	: StringImplShape(s_refCountFlagIsStaticString, characterCount - `1`, characters,
1185	s_hashFlag8BitBuffer \| s_hashFlagDidReportCost \| stringKind \| BufferInternal \| (StringHasher::computeLiteralHashAndMaskTop8Bits(characters) << s_flagCount), ConstructWithConstExpr)
1186	{
1187	}
1188
1189	template<unsigned characterCount> constexpr StringImpl::StaticStringImpl::StaticStringImpl(const char16_t (&characters)[characterCount], StringKind stringKind)
1190	: StringImplShape(s_refCountFlagIsStaticString, characterCount - `1`, characters,
1191	s_hashFlagDidReportCost \| stringKind \| BufferInternal \| (StringHasher::computeLiteralHashAndMaskTop8Bits(characters) << s_flagCount), ConstructWithConstExpr)
1192	{
1193	}
1194
1195	inline StringImpl::StaticStringImpl::operator StringImpl&()
1196	{
1197	return *reinterpret_cast<StringImpl>(this*);
1198	}
1199
1200	inline bool equalIgnoringASCIICase(const StringImpl& a, const StringImpl& b)
1201	{
1202	return equalIgnoringASCIICaseCommon(a, b);
1203	}
1204
1205	inline bool equalIgnoringASCIICase(const StringImpl& a, const char* b)
1206	{
1207	return equalIgnoringASCIICaseCommon(a, b);
1208	}
1209
1210	inline bool equalIgnoringASCIICase(const StringImpl* a, const char* b)
1211	{
1212	return a && equalIgnoringASCIICase(*a, b);
1213	}
1214
1215	template<unsigned length> inline bool startsWithLettersIgnoringASCIICase(const StringImpl& string, const char (&lowercaseLetters)[length])
1216	{
1217	return startsWithLettersIgnoringASCIICaseCommon(string, lowercaseLetters);
1218	}
1219
1220	template<unsigned length> inline bool startsWithLettersIgnoringASCIICase(const StringImpl* string, const char (&lowercaseLetters)[length])
1221	{
1222	return string && startsWithLettersIgnoringASCIICase(*string, lowercaseLetters);
1223	}
1224
1225	template<unsigned length> inline bool equalLettersIgnoringASCIICase(const StringImpl& string, const char (&lowercaseLetters)[length])
1226	{
1227	return equalLettersIgnoringASCIICaseCommon(string, lowercaseLetters);
1228	}
1229
1230	template<unsigned length> inline bool equalLettersIgnoringASCIICase(const StringImpl* string, const char (&lowercaseLetters)[length])
1231	{
1232	return string && equalLettersIgnoringASCIICase(*string, lowercaseLetters);
1233	}
1234
1235	} // namespace WTF
1236
1237	using WTF::StaticStringImpl;
1238	using WTF::StringImpl;
1239	using WTF::equal;
1240

Browse the source code of jsc/Source/WTF/wtf/text/StringImpl.h