StringImpl.h source code [webcore/DerivedSources/ForwardingHeaders/wtf/text/StringImpl.h]

1	/*
2	* Copyright (C) 1999 Lars Knoll ([email protected])
3	* Copyright (C) 2005-2018 Apple Inc. All rights reserved.
4	* Copyright (C) 2009 Google Inc. All rights reserved.
5	*
6	* This library is free software; you can redistribute it and/or
7	* modify it under the terms of the GNU Library General Public
8	* License as published by the Free Software Foundation; either
9	* version 2 of the License, or (at your option) any later version.
10	*
11	* This library is distributed in the hope that it will be useful,
12	* but WITHOUT ANY WARRANTY; without even the implied warranty of
13	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	* Library General Public License for more details.
15	*
16	* You should have received a copy of the GNU Library General Public License
17	* along with this library; see the file COPYING.LIB. If not, write to
18	* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19	* Boston, MA 02110-1301, USA.
20	*
21	*/
22
23	#pragma once
24
25	#include <limits.h>
26	#include <unicode/ustring.h>
27	#include <wtf/ASCIICType.h>
28	#include <wtf/CheckedArithmetic.h>
29	#include <wtf/Expected.h>
30	#include <wtf/MathExtras.h>
31	#include <wtf/StdLibExtras.h>
32	#include <wtf/Vector.h>
33	#include <wtf/text/ASCIIFastPath.h>
34	#include <wtf/text/ConversionMode.h>
35	#include <wtf/text/StringCommon.h>
36	#include <wtf/text/StringHasher.h>
37	#include <wtf/text/UTF8ConversionError.h>
38
39	#if USE(CF)
40	typedef const struct __CFString * CFStringRef;
41	#endif
42
43	#ifdef __OBJC__
44	@class NSString;
45	#endif
46
47	namespace JSC {
48	namespace LLInt { class Data; }
49	class LLIntOffsetsExtractor;
50	}
51
52	namespace WTF {
53
54	class SymbolImpl;
55	class SymbolRegistry;
56
57	struct CStringTranslator;
58	struct HashAndUTF8CharactersTranslator;
59	struct LCharBufferTranslator;
60	struct StringHash;
61	struct SubstringTranslator;
62	struct UCharBufferTranslator;
63
64	template<typename> class RetainPtr;
65
66	template<typename> struct BufferFromStaticDataTranslator;
67	template<typename> struct HashAndCharactersTranslator;
68
69	// Define STRING_STATS to 1 turn on runtime statistics of string sizes and memory usage.
70	#define STRING_STATS 0
71
72	template<bool isSpecialCharacter(UChar), typename CharacterType> bool isAllSpecialCharacters(const CharacterType*, size_t length);
73
74	#if STRING_STATS
75
76	struct StringStats {
77	void add8BitString(unsigned length, bool isSubString = false)
78	{
79	++m_totalNumberStrings;
80	++m_number8BitStrings;
81	if (!isSubString)
82	m_total8BitData += length;
83	}
84
85	void add16BitString(unsigned length, bool isSubString = false)
86	{
87	++m_totalNumberStrings;
88	++m_number16BitStrings;
89	if (!isSubString)
90	m_total16BitData += length;
91	}
92
93	void removeString(StringImpl&);
94	void printStats();
95
96	static const unsigned s_printStringStatsFrequency = `5000`;
97	static std::atomic<unsigned> s_stringRemovesTillPrintStats;
98
99	std::atomic<unsigned> m_refCalls;
100	std::atomic<unsigned> m_derefCalls;
101
102	std::atomic<unsigned> m_totalNumberStrings;
103	std::atomic<unsigned> m_number8BitStrings;
104	std::atomic<unsigned> m_number16BitStrings;
105	std::atomic<unsigned long long> m_total8BitData;
106	std::atomic<unsigned long long> m_total16BitData;
107	};
108
109	#define STRING_STATS_ADD_8BIT_STRING(length) StringImpl::stringStats().add8BitString(length)
110	#define STRING_STATS_ADD_8BIT_STRING2(length, isSubString) StringImpl::stringStats().add8BitString(length, isSubString)
111	#define STRING_STATS_ADD_16BIT_STRING(length) StringImpl::stringStats().add16BitString(length)
112	#define STRING_STATS_ADD_16BIT_STRING2(length, isSubString) StringImpl::stringStats().add16BitString(length, isSubString)
113	#define STRING_STATS_REMOVE_STRING(string) StringImpl::stringStats().removeString(string)
114	#define STRING_STATS_REF_STRING(string) ++StringImpl::stringStats().m_refCalls;
115	#define STRING_STATS_DEREF_STRING(string) ++StringImpl::stringStats().m_derefCalls;
116
117	#else
118
119	#define STRING_STATS_ADD_8BIT_STRING(length) ((void)0)
120	#define STRING_STATS_ADD_8BIT_STRING2(length, isSubString) ((void)0)
121	#define STRING_STATS_ADD_16BIT_STRING(length) ((void)0)
122	#define STRING_STATS_ADD_16BIT_STRING2(length, isSubString) ((void)0)
123	#define STRING_STATS_ADD_UPCONVERTED_STRING(length) ((void)0)
124	#define STRING_STATS_REMOVE_STRING(string) ((void)0)
125	#define STRING_STATS_REF_STRING(string) ((void)0)
126	#define STRING_STATS_DEREF_STRING(string) ((void)0)
127
128	#endif
129
130	template<typename CharacterType> inline bool isLatin1(CharacterType character)
131	{
132	using UnsignedCharacterType = typename std::make_unsigned<CharacterType>::type;
133	return static_cast<UnsignedCharacterType>(character) <= static_cast<UnsignedCharacterType>(`0xFF`);
134	}
135
136	class StringImplShape {
137	WTF_MAKE_NONCOPYABLE(StringImplShape);
138	public:
139	static constexpr unsigned MaxLength = std::numeric_limits<int32_t>::max();
140
141	protected:
142	StringImplShape(unsigned refCount, unsigned length, const LChar, unsigned* hashAndFlags);
143	StringImplShape(unsigned refCount, unsigned length, const UChar, unsigned* hashAndFlags);
144
145	enum ConstructWithConstExprTag { ConstructWithConstExpr };
146	template<unsigned characterCount> constexpr StringImplShape(unsigned refCount, unsigned length, const char (&characters)[characterCount], unsigned hashAndFlags, ConstructWithConstExprTag);
147	template<unsigned characterCount> constexpr StringImplShape(unsigned refCount, unsigned length, const char16_t (&characters)[characterCount], unsigned hashAndFlags, ConstructWithConstExprTag);
148
149	unsigned m_refCount;
150	unsigned m_length;
151	union {
152	const LChar* m_data8;
153	const UChar* m_data16;
154	// It seems that reinterpret_cast prevents constexpr's compile time initialization in VC++.
155	// These are needed to avoid reinterpret_cast.
156	const char* m_data8Char;
157	const char16_t* m_data16Char;
158	};
159	mutable unsigned m_hashAndFlags;
160	};
161
162	// FIXME: Use of StringImpl and const is rather confused.
163	// The actual string inside a StringImpl is immutable, so you can't modify a string using a StringImpl&.
164	// We could mark every member function const and always use "const StringImpl&" and "const StringImpl".*
165	// Or we could say that "const" doesn't make sense at all and use "StringImpl&" and "StringImpl" everywhere.*
166	// Right now we use a mix of both, which makes code more confusing and has no benefit.
167
168	class StringImpl : private StringImplShape {
169	WTF_MAKE_NONCOPYABLE(StringImpl); WTF_MAKE_FAST_ALLOCATED;
170
171	friend class AtomStringImpl;
172	friend class JSC::LLInt::Data;
173	friend class JSC::LLIntOffsetsExtractor;
174	friend class PrivateSymbolImpl;
175	friend class RegisteredSymbolImpl;
176	friend class SymbolImpl;
177	friend class ExternalStringImpl;
178
179	friend struct WTF::CStringTranslator;
180	friend struct WTF::HashAndUTF8CharactersTranslator;
181	friend struct WTF::LCharBufferTranslator;
182	friend struct WTF::SubstringTranslator;
183	friend struct WTF::UCharBufferTranslator;
184
185	template<typename> friend struct WTF::BufferFromStaticDataTranslator;
186	template<typename> friend struct WTF::HashAndCharactersTranslator;
187
188	public:
189	enum BufferOwnership { BufferInternal, BufferOwned, BufferSubstring, BufferExternal };
190
191	static constexpr unsigned MaxLength = StringImplShape::MaxLength;
192
193	// The bottom 6 bits in the hash are flags.
194	static constexpr const unsigned s_flagCount = `6`;
195	private:
196	static constexpr const unsigned s_flagMask = (`1u` << s_flagCount) - `1`;
197	static_assert(s_flagCount <= StringHasher::flagCount, "StringHasher reserves enough bits for StringImpl flags");
198	static constexpr const unsigned s_flagStringKindCount = `4`;
199
200	static constexpr const unsigned s_hashFlagStringKindIsAtomic = `1u` << (s_flagStringKindCount);
201	static constexpr const unsigned s_hashFlagStringKindIsSymbol = `1u` << (s_flagStringKindCount + `1`);
202	static constexpr const unsigned s_hashMaskStringKind = s_hashFlagStringKindIsAtomic \| s_hashFlagStringKindIsSymbol;
203	static constexpr const unsigned s_hashFlagDidReportCost = `1u` << `3`;
204	static constexpr const unsigned s_hashFlag8BitBuffer = `1u` << `2`;
205	static constexpr const unsigned s_hashMaskBufferOwnership = (`1u` << `0`) \| (`1u` << `1`);
206
207	enum StringKind {
208	StringNormal = `0u`, // non-symbol, non-atomic
209	StringAtomic = s_hashFlagStringKindIsAtomic, // non-symbol, atomic
210	StringSymbol = s_hashFlagStringKindIsSymbol, // symbol, non-atomic
211	};
212
213	// Create a normal 8-bit string with internal storage (BufferInternal).
214	enum Force8Bit { Force8BitConstructor };
215	StringImpl(unsigned length, Force8Bit);
216
217	// Create a normal 16-bit string with internal storage (BufferInternal).
218	explicit StringImpl(unsigned length);
219
220	// Create a StringImpl adopting ownership of the provided buffer (BufferOwned).
221	StringImpl(MallocPtr<LChar>, unsigned length);
222	StringImpl(MallocPtr<UChar>, unsigned length);
223	enum ConstructWithoutCopyingTag { ConstructWithoutCopying };
224	StringImpl(const UChar, unsigned* length, ConstructWithoutCopyingTag);
225	StringImpl(const LChar, unsigned* length, ConstructWithoutCopyingTag);
226
227	// Used to create new strings that are a substring of an existing StringImpl (BufferSubstring).
228	StringImpl(const LChar, unsigned* length, Ref<StringImpl>&&);
229	StringImpl(const UChar, unsigned* length, Ref<StringImpl>&&);
230
231	public:
232	WTF_EXPORT_PRIVATE static void destroy(StringImpl*);
233
234	WTF_EXPORT_PRIVATE static Ref<StringImpl> create(const UChar, unsigned* length);
235	WTF_EXPORT_PRIVATE static Ref<StringImpl> create(const LChar, unsigned* length);
236	WTF_EXPORT_PRIVATE static Ref<StringImpl> create8BitIfPossible(const UChar, unsigned* length);
237	template<size_t inlineCapacity> static Ref<StringImpl> create8BitIfPossible(const Vector<UChar, inlineCapacity>&);
238	WTF_EXPORT_PRIVATE static Ref<StringImpl> create8BitIfPossible(const UChar*);
239
240	ALWAYS_INLINE static Ref<StringImpl> create(const char* characters, unsigned length) { return create(reinterpret_cast<const LChar*>(characters), length); }
241	WTF_EXPORT_PRIVATE static Ref<StringImpl> create(const LChar*);
242	ALWAYS_INLINE static Ref<StringImpl> create(const char* string) { return create(reinterpret_cast<const LChar*>(string)); }
243
244	static Ref<StringImpl> createSubstringSharingImpl(StringImpl&, unsigned offset, unsigned length);
245
246	template<unsigned characterCount> static Ref<StringImpl> createFromLiteral(const char (&)[characterCount]);
247
248	// FIXME: Replace calls to these overloads of createFromLiteral to createWithoutCopying instead.
249	WTF_EXPORT_PRIVATE static Ref<StringImpl> createFromLiteral(const char, unsigned* length);
250	WTF_EXPORT_PRIVATE static Ref<StringImpl> createFromLiteral(const char*);
251
252	WTF_EXPORT_PRIVATE static Ref<StringImpl> createWithoutCopying(const UChar, unsigned* length);
253	WTF_EXPORT_PRIVATE static Ref<StringImpl> createWithoutCopying(const LChar, unsigned* length);
254	WTF_EXPORT_PRIVATE static Ref<StringImpl> createUninitialized(unsigned length, LChar*&);
255	WTF_EXPORT_PRIVATE static Ref<StringImpl> createUninitialized(unsigned length, UChar*&);
256	template<typename CharacterType> static RefPtr<StringImpl> tryCreateUninitialized(unsigned length, CharacterType*&);
257
258	// Reallocate the StringImpl. The originalString must be only owned by the Ref,
259	// and the buffer ownership must be BufferInternal. Just like the input pointer of realloc(),
260	// the originalString can't be used after this function.
261	static Ref<StringImpl> reallocate(Ref<StringImpl>&& originalString, unsigned length, LChar*& data);
262	static Ref<StringImpl> reallocate(Ref<StringImpl>&& originalString, unsigned length, UChar*& data);
263	static Expected<Ref<StringImpl>, UTF8ConversionError> tryReallocate(Ref<StringImpl>&& originalString, unsigned length, LChar*& data);
264	static Expected<Ref<StringImpl>, UTF8ConversionError> tryReallocate(Ref<StringImpl>&& originalString, unsigned length, UChar*& data);
265
266	static unsigned flagsOffset() { return OBJECT_OFFSETOF(StringImpl, m_hashAndFlags); }
267	static constexpr unsigned flagIs8Bit() { return s_hashFlag8BitBuffer; }
268	static constexpr unsigned flagIsAtomic() { return s_hashFlagStringKindIsAtomic; }
269	static constexpr unsigned flagIsSymbol() { return s_hashFlagStringKindIsSymbol; }
270	static constexpr unsigned maskStringKind() { return s_hashMaskStringKind; }
271	static unsigned dataOffset() { return OBJECT_OFFSETOF(StringImpl, m_data8); }
272
273	template<typename CharacterType, size_t inlineCapacity, typename OverflowHandler, size_t minCapacity>
274	static Ref<StringImpl> adopt(Vector<CharacterType, inlineCapacity, OverflowHandler, minCapacity>&&);
275
276	WTF_EXPORT_PRIVATE static Ref<StringImpl> adopt(StringBuffer<UChar>&&);
277	WTF_EXPORT_PRIVATE static Ref<StringImpl> adopt(StringBuffer<LChar>&&);
278
279	unsigned length() const { return m_length; }
280	static ptrdiff_t lengthMemoryOffset() { return OBJECT_OFFSETOF(StringImpl, m_length); }
281	bool isEmpty() const { return !m_length; }
282
283	bool is8Bit() const { return m_hashAndFlags & s_hashFlag8BitBuffer; }
284	ALWAYS_INLINE const LChar* characters8() const { ASSERT(is8Bit()); return m_data8; }
285	ALWAYS_INLINE const UChar* characters16() const { ASSERT(!is8Bit()); return m_data16; }
286
287	template<typename CharacterType> const CharacterType* characters() const;
288
289	size_t cost() const;
290	size_t costDuringGC();
291
292	WTF_EXPORT_PRIVATE size_t sizeInBytes() const;
293
294	bool isSymbol() const { return m_hashAndFlags & s_hashFlagStringKindIsSymbol; }
295	bool isAtom() const { return m_hashAndFlags & s_hashFlagStringKindIsAtomic; }
296	void setIsAtomic(bool);
297
298	bool isExternal() const { return bufferOwnership() == BufferExternal; }
299
300	bool isSubString() const { return bufferOwnership() == BufferSubstring; }
301
302	static WTF_EXPORT_PRIVATE Expected<CString, UTF8ConversionError> utf8ForCharacters(const LChar* characters, unsigned length);
303	static WTF_EXPORT_PRIVATE Expected<CString, UTF8ConversionError> utf8ForCharacters(const UChar* characters, unsigned length, ConversionMode = LenientConversion);
304
305	WTF_EXPORT_PRIVATE Expected<CString, UTF8ConversionError> tryGetUtf8ForRange(unsigned offset, unsigned length, ConversionMode = LenientConversion) const;
306	WTF_EXPORT_PRIVATE Expected<CString, UTF8ConversionError> tryGetUtf8(ConversionMode = LenientConversion) const;
307	WTF_EXPORT_PRIVATE CString utf8(ConversionMode = LenientConversion) const;
308
309	private:
310	static WTF_EXPORT_PRIVATE UTF8ConversionError utf8Impl(const UChar* characters, unsigned length, char*& buffer, size_t bufferSize, ConversionMode);
311
312	// The high bits of 'hash' are always empty, but we prefer to store our flags
313	// in the low bits because it makes them slightly more efficient to access.
314	// So, we shift left and right when setting and getting our hash code.
315	void setHash(unsigned) const;
316
317	unsigned rawHash() const { return m_hashAndFlags >> s_flagCount; }
318
319	public:
320	bool hasHash() const { return !!rawHash(); }
321
322	unsigned existingHash() const { ASSERT(hasHash()); return rawHash(); }
323	unsigned hash() const { return hasHash() ? rawHash() : hashSlowCase(); }
324
325	WTF_EXPORT_PRIVATE unsigned concurrentHash() const;
326
327	unsigned symbolAwareHash() const;
328	unsigned existingSymbolAwareHash() const;
329
330	bool isStatic() const { return m_refCount & s_refCountFlagIsStaticString; }
331
332	size_t refCount() const { return m_refCount / s_refCountIncrement; }
333	bool hasOneRef() const { return m_refCount == s_refCountIncrement; }
334	bool hasAtLeastOneRef() const { return m_refCount; } // For assertions.
335
336	void ref();
337	void deref();
338
339	class StaticStringImpl : private StringImplShape {
340	WTF_MAKE_NONCOPYABLE(StaticStringImpl);
341	public:
342	// Used to construct static strings, which have an special refCount that can never hit zero.
343	// This means that the static string will never be destroyed, which is important because
344	// static strings will be shared across threads & ref-counted in a non-threadsafe manner.
345	//
346	// In order to make StaticStringImpl thread safe, we also need to ensure that the rest of
347	// the fields are never mutated by threads. We have this guarantee because:
348	//
349	// 1. m_length is only set on construction and never mutated thereafter.
350	//
351	// 2. m_data8 and m_data16 are only set on construction and never mutated thereafter.
352	// We also know that a StringImpl never changes from 8 bit to 16 bit because there
353	// is no way to set/clear the s_hashFlag8BitBuffer flag other than at construction.
354	//
355	// 3. m_hashAndFlags will not be mutated by different threads because:
356	//
357	// a. StaticStringImpl's constructor sets the s_hashFlagDidReportCost flag to ensure
358	// that StringImpl::cost() returns early.
359	// This means StaticStringImpl costs are not counted. But since there should only
360	// be a finite set of StaticStringImpls, their cost can be aggregated into a single
361	// system cost if needed.
362	// b. setIsAtomic() is never called on a StaticStringImpl.
363	// setIsAtomic() asserts !isStatic().
364	// c. setHash() is never called on a StaticStringImpl.
365	// StaticStringImpl's constructor sets the hash on construction.
366	// StringImpl::hash() only sets a new hash iff !hasHash().
367	// Additionally, StringImpl::setHash() asserts hasHash() and !isStatic().
368
369	template<unsigned characterCount> constexpr StaticStringImpl(const char (&characters)[characterCount], StringKind = StringNormal);
370	template<unsigned characterCount> constexpr StaticStringImpl(const char16_t (&characters)[characterCount], StringKind = StringNormal);
371	operator StringImpl&();
372	};
373
374	WTF_EXPORT_PRIVATE static StaticStringImpl s_atomicEmptyString;
375	ALWAYS_INLINE static StringImpl* empty() { return reinterpret_cast<StringImpl*>(&s_atomicEmptyString); }
376
377	// FIXME: Does this really belong in StringImpl?
378	template<typename CharacterType> static void copyCharacters(CharacterType* destination, const CharacterType* source, unsigned numCharacters);
379	static void copyCharacters(UChar* destination, const LChar* source, unsigned numCharacters);
380
381	// Some string features, like reference counting and the atomicity flag, are not
382	// thread-safe. We achieve thread safety by isolation, giving each thread
383	// its own copy of the string.
384	Ref<StringImpl> isolatedCopy() const;
385
386	WTF_EXPORT_PRIVATE Ref<StringImpl> substring(unsigned position, unsigned length = MaxLength);
387
388	UChar at(unsigned) const;
389	UChar operator[](unsigned i) const { return at(i); }
390	WTF_EXPORT_PRIVATE UChar32 characterStartingAt(unsigned);
391
392	int toIntStrict(bool* ok = `0`, int base = `10`);
393	unsigned toUIntStrict(bool* ok = `0`, int base = `10`);
394	int64_t toInt64Strict(bool* ok = `0`, int base = `10`);
395	uint64_t toUInt64Strict(bool* ok = `0`, int base = `10`);
396	intptr_t toIntPtrStrict(bool* ok = `0`, int base = `10`);
397
398	WTF_EXPORT_PRIVATE int toInt(bool* ok = `0`); // ignores trailing garbage
399	unsigned toUInt(bool* ok = `0`); // ignores trailing garbage
400	int64_t toInt64(bool* ok = `0`); // ignores trailing garbage
401	uint64_t toUInt64(bool* ok = `0`); // ignores trailing garbage
402	intptr_t toIntPtr(bool* ok = `0`); // ignores trailing garbage
403
404	// FIXME: Like the strict functions above, these give false for "ok" when there is trailing garbage.
405	// Like the non-strict functions above, these return the value when there is trailing garbage.
406	// It would be better if these were more consistent with the above functions instead.
407	double toDouble(bool* ok = `0`);
408	float toFloat(bool* ok = `0`);
409
410	WTF_EXPORT_PRIVATE Ref<StringImpl> convertToASCIILowercase();
411	WTF_EXPORT_PRIVATE Ref<StringImpl> convertToASCIIUppercase();
412	WTF_EXPORT_PRIVATE Ref<StringImpl> convertToLowercaseWithoutLocale();
413	WTF_EXPORT_PRIVATE Ref<StringImpl> convertToLowercaseWithoutLocaleStartingAtFailingIndex8Bit(unsigned);
414	WTF_EXPORT_PRIVATE Ref<StringImpl> convertToUppercaseWithoutLocale();
415	WTF_EXPORT_PRIVATE Ref<StringImpl> convertToLowercaseWithLocale(const AtomString& localeIdentifier);
416	WTF_EXPORT_PRIVATE Ref<StringImpl> convertToUppercaseWithLocale(const AtomString& localeIdentifier);
417
418	Ref<StringImpl> foldCase();
419
420	Ref<StringImpl> stripWhiteSpace();
421	WTF_EXPORT_PRIVATE Ref<StringImpl> simplifyWhiteSpace();
422	Ref<StringImpl> simplifyWhiteSpace(CodeUnitMatchFunction);
423
424	Ref<StringImpl> stripLeadingAndTrailingCharacters(CodeUnitMatchFunction);
425	Ref<StringImpl> removeCharacters(CodeUnitMatchFunction);
426
427	bool isAllASCII() const;
428	bool isAllLatin1() const;
429	template<bool isSpecialCharacter(UChar)> bool isAllSpecialCharacters() const;
430
431	size_t find(LChar character, unsigned start = `0`);
432	size_t find(char character, unsigned start = `0`);
433	size_t find(UChar character, unsigned start = `0`);
434	WTF_EXPORT_PRIVATE size_t find(CodeUnitMatchFunction, unsigned index = `0`);
435	size_t find(const LChar, unsigned* index = `0`);
436	ALWAYS_INLINE size_t find(const char* string, unsigned index = `0`) { return find(reinterpret_cast<const LChar*>(string), index); }
437	WTF_EXPORT_PRIVATE size_t find(StringImpl*);
438	WTF_EXPORT_PRIVATE size_t find(StringImpl, unsigned* index);
439	WTF_EXPORT_PRIVATE size_t findIgnoringASCIICase(const StringImpl&) const;
440	WTF_EXPORT_PRIVATE size_t findIgnoringASCIICase(const StringImpl&, unsigned startOffset) const;
441	WTF_EXPORT_PRIVATE size_t findIgnoringASCIICase(const StringImpl) const*;
442	WTF_EXPORT_PRIVATE size_t findIgnoringASCIICase(const StringImpl, unsigned* startOffset) const;
443
444	WTF_EXPORT_PRIVATE size_t reverseFind(UChar, unsigned index = MaxLength);
445	WTF_EXPORT_PRIVATE size_t reverseFind(StringImpl, unsigned* index = MaxLength);
446
447	WTF_EXPORT_PRIVATE bool startsWith(const StringImpl) const*;
448	WTF_EXPORT_PRIVATE bool startsWith(const StringImpl&) const;
449	WTF_EXPORT_PRIVATE bool startsWithIgnoringASCIICase(const StringImpl) const*;
450	WTF_EXPORT_PRIVATE bool startsWithIgnoringASCIICase(const StringImpl&) const;
451	WTF_EXPORT_PRIVATE bool startsWith(UChar) const;
452	WTF_EXPORT_PRIVATE bool startsWith(const char, unsigned* matchLength) const;
453	template<unsigned matchLength> bool startsWith(const char (&prefix)[matchLength]) const { return startsWith(prefix, matchLength - `1`); }
454	WTF_EXPORT_PRIVATE bool hasInfixStartingAt(const StringImpl&, unsigned startOffset) const;
455
456	WTF_EXPORT_PRIVATE bool endsWith(StringImpl*);
457	WTF_EXPORT_PRIVATE bool endsWith(StringImpl&);
458	WTF_EXPORT_PRIVATE bool endsWithIgnoringASCIICase(const StringImpl) const*;
459	WTF_EXPORT_PRIVATE bool endsWithIgnoringASCIICase(const StringImpl&) const;
460	WTF_EXPORT_PRIVATE bool endsWith(UChar) const;
461	WTF_EXPORT_PRIVATE bool endsWith(const char, unsigned* matchLength) const;
462	template<unsigned matchLength> bool endsWith(const char (&prefix)[matchLength]) const { return endsWith(prefix, matchLength - `1`); }
463	WTF_EXPORT_PRIVATE bool hasInfixEndingAt(const StringImpl&, unsigned endOffset) const;
464
465	WTF_EXPORT_PRIVATE Ref<StringImpl> replace(UChar, UChar);
466	WTF_EXPORT_PRIVATE Ref<StringImpl> replace(UChar, StringImpl*);
467	ALWAYS_INLINE Ref<StringImpl> replace(UChar pattern, const char* replacement, unsigned replacementLength) { return replace(pattern, reinterpret_cast<const LChar*>(replacement), replacementLength); }
468	WTF_EXPORT_PRIVATE Ref<StringImpl> replace(UChar, const LChar, unsigned* replacementLength);
469	Ref<StringImpl> replace(UChar, const UChar, unsigned* replacementLength);
470	WTF_EXPORT_PRIVATE Ref<StringImpl> replace(StringImpl, StringImpl);
471	WTF_EXPORT_PRIVATE Ref<StringImpl> replace(unsigned index, unsigned length, StringImpl*);
472
473	WTF_EXPORT_PRIVATE UCharDirection defaultWritingDirection(bool* hasStrongDirectionality = nullptr);
474
475	#if USE(CF)
476	RetainPtr<CFStringRef> createCFString();
477	#endif
478
479	#ifdef __OBJC__
480	WTF_EXPORT_PRIVATE operator NSString *();
481	#endif
482
483	#if STRING_STATS
484	ALWAYS_INLINE static StringStats& stringStats() { return m_stringStats; }
485	#endif
486
487	BufferOwnership bufferOwnership() const { return static_cast<BufferOwnership>(m_hashAndFlags & s_hashMaskBufferOwnership); }
488
489	template<typename T> static size_t headerSize() { return tailOffset<T>(); }
490
491	protected:
492	~StringImpl();
493
494	// Used to create new symbol string that holds an existing [[Description]] string as a substring buffer (BufferSubstring).
495	enum CreateSymbolTag { CreateSymbol };
496	StringImpl(CreateSymbolTag, const LChar, unsigned* length);
497	StringImpl(CreateSymbolTag, const UChar, unsigned* length);
498
499	// Null symbol.
500	explicit StringImpl(CreateSymbolTag);
501
502	private:
503	template<typename> static size_t allocationSize(Checked<size_t> tailElementCount);
504	template<typename> static size_t maxInternalLength();
505	template<typename> static size_t tailOffset();
506
507	bool requiresCopy() const;
508	template<typename T> const T* tailPointer() const;
509	template<typename T> T* tailPointer();
510	StringImpl* const& substringBuffer() const;
511	StringImpl*& substringBuffer();
512
513	enum class CaseConvertType { Upper, Lower };
514	template<CaseConvertType, typename CharacterType> static Ref<StringImpl> convertASCIICase(StringImpl&, const CharacterType, unsigned*);
515
516	template<class CodeUnitPredicate> Ref<StringImpl> stripMatchedCharacters(CodeUnitPredicate);
517	template<typename CharacterType> ALWAYS_INLINE Ref<StringImpl> removeCharacters(const CharacterType* characters, CodeUnitMatchFunction);
518	template<typename CharacterType, class CodeUnitPredicate> Ref<StringImpl> simplifyMatchedCharactersToSpace(CodeUnitPredicate);
519	template<typename CharacterType> static Ref<StringImpl> constructInternal(StringImpl&, unsigned);
520	template<typename CharacterType> static Ref<StringImpl> createUninitializedInternal(unsigned, CharacterType*&);
521	template<typename CharacterType> static Ref<StringImpl> createUninitializedInternalNonEmpty(unsigned, CharacterType*&);
522	template<typename CharacterType> static Expected<Ref<StringImpl>, UTF8ConversionError> reallocateInternal(Ref<StringImpl>&&, unsigned, CharacterType*&);
523	template<typename CharacterType> static Ref<StringImpl> createInternal(const CharacterType, unsigned*);
524	WTF_EXPORT_PRIVATE NEVER_INLINE unsigned hashSlowCase() const;
525
526	// The bottom bit in the ref count indicates a static (immortal) string.
527	static const unsigned s_refCountFlagIsStaticString = `0x1`;
528	static const unsigned s_refCountIncrement = `0x2`; // This allows us to ref / deref without disturbing the static string flag.
529
530	#if STRING_STATS
531	WTF_EXPORT_PRIVATE static StringStats m_stringStats;
532	#endif
533
534	public:
535	void assertHashIsCorrect() const;
536	};
537
538	using StaticStringImpl = StringImpl::StaticStringImpl;
539
540	static_assert(sizeof(StringImpl) == sizeof(StaticStringImpl), "");
541
542	#if !ASSERT_DISABLED
543
544	// StringImpls created from StaticStringImpl will ASSERT in the generic ValueCheck<T>::checkConsistency
545	// as they are not allocated by fastMalloc. We don't currently have any way to detect that case
546	// so we ignore the consistency check for all StringImpl.*
547	template<> struct ValueCheck<StringImpl*> {
548	static void checkConsistency(const StringImpl*) { }
549	};
550
551	#endif
552
553	WTF_EXPORT_PRIVATE bool equal(const StringImpl, const* StringImpl*);
554	WTF_EXPORT_PRIVATE bool equal(const StringImpl, const* LChar*);
555	inline bool equal(const StringImpl* a, const char* b) { return equal(a, reinterpret_cast<const LChar*>(b)); }
556	WTF_EXPORT_PRIVATE bool equal(const StringImpl, const* LChar, unsigned*);
557	WTF_EXPORT_PRIVATE bool equal(const StringImpl, const* UChar, unsigned*);
558	inline bool equal(const StringImpl* a, const char* b, unsigned length) { return equal(a, reinterpret_cast<const LChar*>(b), length); }
559	inline bool equal(const LChar* a, StringImpl* b) { return equal(b, a); }
560	inline bool equal(const char* a, StringImpl* b) { return equal(b, reinterpret_cast<const LChar*>(a)); }
561	WTF_EXPORT_PRIVATE bool equal(const StringImpl& a, const StringImpl& b);
562
563	WTF_EXPORT_PRIVATE bool equalIgnoringNullity(StringImpl, StringImpl);
564	WTF_EXPORT_PRIVATE bool equalIgnoringNullity(const UChar, size_t length, StringImpl);
565
566	bool equalIgnoringASCIICase(const StringImpl&, const StringImpl&);
567	WTF_EXPORT_PRIVATE bool equalIgnoringASCIICase(const StringImpl, const* StringImpl*);
568	bool equalIgnoringASCIICase(const StringImpl&, const char*);
569	bool equalIgnoringASCIICase(const StringImpl, const* char*);
570
571	WTF_EXPORT_PRIVATE bool equalIgnoringASCIICaseNonNull(const StringImpl, const* StringImpl*);
572
573	template<unsigned length> bool equalLettersIgnoringASCIICase(const StringImpl&, const char (&lowercaseLetters)[length]);
574	template<unsigned length> bool equalLettersIgnoringASCIICase(const StringImpl, const* char (&lowercaseLetters)[length]);
575
576	size_t find(const LChar, unsigned* length, CodeUnitMatchFunction, unsigned index = `0`);
577	size_t find(const UChar, unsigned* length, CodeUnitMatchFunction, unsigned index = `0`);
578
579	template<typename CharacterType> size_t reverseFindLineTerminator(const CharacterType, unsigned* length, unsigned index = StringImpl::MaxLength);
580	template<typename CharacterType> size_t reverseFind(const CharacterType, unsigned* length, CharacterType matchCharacter, unsigned index = StringImpl::MaxLength);
581	size_t reverseFind(const UChar, unsigned* length, LChar matchCharacter, unsigned index = StringImpl::MaxLength);
582	size_t reverseFind(const LChar, unsigned* length, UChar matchCharacter, unsigned index = StringImpl::MaxLength);
583
584	template<size_t inlineCapacity> bool equalIgnoringNullity(const Vector<UChar, inlineCapacity>&, StringImpl*);
585
586	template<typename CharacterType1, typename CharacterType2> int codePointCompare(const CharacterType1, unsigned* length1, const CharacterType2, unsigned* length2);
587	int codePointCompare(const StringImpl, const* StringImpl*);
588
589	// FIXME: Should rename this to make clear it uses the Unicode definition of whitespace.
590	// Most WebKit callers don't want that would use isASCIISpace or isHTMLSpace instead.
591	bool isSpaceOrNewline(UChar32);
592
593	template<typename CharacterType> unsigned lengthOfNullTerminatedString(const CharacterType*);
594
595	// StringHash is the default hash for StringImpl and RefPtr<StringImpl>*
596	template<typename T> struct DefaultHash;
597	template<> struct DefaultHash<StringImpl*> {
598	typedef StringHash Hash;
599	};
600	template<> struct DefaultHash<RefPtr<StringImpl>> {
601	typedef StringHash Hash;
602	};
603
604	#define MAKE_STATIC_STRING_IMPL(characters) ([] { \
605	static StaticStringImpl impl(characters); \
606	return &impl; \
607	}())
608
609	template<> ALWAYS_INLINE Ref<StringImpl> StringImpl::constructInternal<LChar>(StringImpl& string, unsigned length)
610	{
611	return adoptRef(*new (NotNull, &string) StringImpl { length, Force8BitConstructor });
612	}
613
614	template<> ALWAYS_INLINE Ref<StringImpl> StringImpl::constructInternal<UChar>(StringImpl& string, unsigned length)
615	{
616	return adoptRef(*new (NotNull, &string) StringImpl { length });
617	}
618
619	template<> ALWAYS_INLINE const LChar* StringImpl::characters<LChar>() const
620	{
621	return characters8();
622	}
623
624	template<> ALWAYS_INLINE const UChar* StringImpl::characters<UChar>() const
625	{
626	return characters16();
627	}
628
629	inline size_t find(const LChar* characters, unsigned length, CodeUnitMatchFunction matchFunction, unsigned index)
630	{
631	while (index < length) {
632	if (matchFunction(characters[index]))
633	return index;
634	++index;
635	}
636	return notFound;
637	}
638
639	inline size_t find(const UChar* characters, unsigned length, CodeUnitMatchFunction matchFunction, unsigned index)
640	{
641	while (index < length) {
642	if (matchFunction(characters[index]))
643	return index;
644	++index;
645	}
646	return notFound;
647	}
648
649	template<typename CharacterType> inline size_t reverseFindLineTerminator(const CharacterType* characters, unsigned length, unsigned index)
650	{
651	if (!length)
652	return notFound;
653	if (index >= length)
654	index = length - `1`;
655	auto character = characters[index];
656	while (character != `'\n'` && character != `'\r'`) {
657	if (!index--)
658	return notFound;
659	character = characters[index];
660	}
661	return index;
662	}
663
664	template<typename CharacterType> inline size_t reverseFind(const CharacterType* characters, unsigned length, CharacterType matchCharacter, unsigned index)
665	{
666	if (!length)
667	return notFound;
668	if (index >= length)
669	index = length - `1`;
670	while (characters[index] != matchCharacter) {
671	if (!index--)
672	return notFound;
673	}
674	return index;
675	}
676
677	ALWAYS_INLINE size_t reverseFind(const UChar* characters, unsigned length, LChar matchCharacter, unsigned index)
678	{
679	return reverseFind(characters, length, static_cast<UChar>(matchCharacter), index);
680	}
681
682	inline size_t reverseFind(const LChar* characters, unsigned length, UChar matchCharacter, unsigned index)
683	{
684	if (matchCharacter & ~`0xFF`)
685	return notFound;
686	return reverseFind(characters, length, static_cast<LChar>(matchCharacter), index);
687	}
688
689	inline size_t StringImpl::find(LChar character, unsigned start)
690	{
691	if (is8Bit())
692	return WTF::find(characters8(), m_length, character, start);
693	return WTF::find(characters16(), m_length, character, start);
694	}
695
696	ALWAYS_INLINE size_t StringImpl::find(char character, unsigned start)
697	{
698	return find(static_cast<LChar>(character), start);
699	}
700
701	inline size_t StringImpl::find(UChar character, unsigned start)
702	{
703	if (is8Bit())
704	return WTF::find(characters8(), m_length, character, start);
705	return WTF::find(characters16(), m_length, character, start);
706	}
707
708	template<size_t inlineCapacity> inline bool equalIgnoringNullity(const Vector<UChar, inlineCapacity>& a, StringImpl* b)
709	{
710	return equalIgnoringNullity(a.data(), a.size(), b);
711	}
712
713	template<typename CharacterType1, typename CharacterType2> inline int codePointCompare(const CharacterType1* characters1, unsigned length1, const CharacterType2* characters2, unsigned length2)
714	{
715	unsigned commonLength = std::min(length1, length2);
716
717	unsigned position = `0`;
718	while (position < commonLength && characters1 == characters2) {
719	++characters1;
720	++characters2;
721	++position;
722	}
723
724	if (position < commonLength)
725	return (characters1[`0`] > characters2[`0`]) ? `1` : -`1`;
726
727	if (length1 == length2)
728	return `0`;
729	return (length1 > length2) ? `1` : -`1`;
730	}
731
732	inline int codePointCompare(const StringImpl* string1, const StringImpl* string2)
733	{
734	// FIXME: Should null strings compare as less than empty strings rather than equal to them?
735	if (!string1)
736	return (string2 && string2->length()) ? -`1` : `0`;
737	if (!string2)
738	return string1->length() ? `1` : `0`;
739
740	bool string1Is8Bit = string1->is8Bit();
741	bool string2Is8Bit = string2->is8Bit();
742	if (string1Is8Bit) {
743	if (string2Is8Bit)
744	return codePointCompare(string1->characters8(), string1->length(), string2->characters8(), string2->length());
745	return codePointCompare(string1->characters8(), string1->length(), string2->characters16(), string2->length());
746	}
747	if (string2Is8Bit)
748	return codePointCompare(string1->characters16(), string1->length(), string2->characters8(), string2->length());
749	return codePointCompare(string1->characters16(), string1->length(), string2->characters16(), string2->length());
750	}
751
752	inline bool isSpaceOrNewline(UChar32 character)
753	{
754	// Use isASCIISpace() for all Latin-1 characters. This will include newlines, which aren't included in Unicode DirWS.
755	return character <= `0xFF` ? isASCIISpace(character) : u_charDirection(character) == U_WHITE_SPACE_NEUTRAL;
756	}
757
758	template<typename CharacterType> inline unsigned lengthOfNullTerminatedString(const CharacterType* string)
759	{
760	ASSERT(string);
761	size_t length = `0`;
762	while (string[length])
763	++length;
764
765	RELEASE_ASSERT(length < StringImpl::MaxLength);
766	return static_cast<unsigned>(length);
767	}
768
769	inline StringImplShape::StringImplShape(unsigned refCount, unsigned length, const LChar* data8, unsigned hashAndFlags)
770	: m_refCount(refCount)
771	, m_length(length)
772	, m_data8(data8)
773	, m_hashAndFlags(hashAndFlags)
774	{
775	}
776
777	inline StringImplShape::StringImplShape(unsigned refCount, unsigned length, const UChar* data16, unsigned hashAndFlags)
778	: m_refCount(refCount)
779	, m_length(length)
780	, m_data16(data16)
781	, m_hashAndFlags(hashAndFlags)
782	{
783	}
784
785	template<unsigned characterCount> constexpr StringImplShape::StringImplShape(unsigned refCount, unsigned length, const char (&characters)[characterCount], unsigned hashAndFlags, ConstructWithConstExprTag)
786	: m_refCount(refCount)
787	, m_length(length)
788	, m_data8Char(characters)
789	, m_hashAndFlags(hashAndFlags)
790	{
791	}
792
793	template<unsigned characterCount> constexpr StringImplShape::StringImplShape(unsigned refCount, unsigned length, const char16_t (&characters)[characterCount], unsigned hashAndFlags, ConstructWithConstExprTag)
794	: m_refCount(refCount)
795	, m_length(length)
796	, m_data16Char(characters)
797	, m_hashAndFlags(hashAndFlags)
798	{
799	}
800
801	inline Ref<StringImpl> StringImpl::isolatedCopy() const
802	{
803	if (!requiresCopy()) {
804	if (is8Bit())
805	return StringImpl::createWithoutCopying(m_data8, m_length);
806	return StringImpl::createWithoutCopying(m_data16, m_length);
807	}
808
809	if (is8Bit())
810	return create(m_data8, m_length);
811	return create(m_data16, m_length);
812	}
813
814	inline bool StringImpl::isAllASCII() const
815	{
816	if (is8Bit())
817	return charactersAreAllASCII(characters8(), length());
818	return charactersAreAllASCII(characters16(), length());
819	}
820
821	inline bool StringImpl::isAllLatin1() const
822	{
823	if (is8Bit())
824	return true;
825	auto* characters = characters16();
826	UChar ored = `0`;
827	for (size_t i = `0`; i < length(); ++i)
828	ored \|= characters[i];
829	return !(ored & `0xFF00`);
830	}
831
832	template<bool isSpecialCharacter(UChar), typename CharacterType> inline bool isAllSpecialCharacters(const CharacterType* characters, size_t length)
833	{
834	for (size_t i = `0`; i < length; ++i) {
835	if (!isSpecialCharacter(characters[i]))
836	return false;
837	}
838	return true;
839	}
840
841	template<bool isSpecialCharacter(UChar)> inline bool StringImpl::isAllSpecialCharacters() const
842	{
843	if (is8Bit())
844	return WTF::isAllSpecialCharacters<isSpecialCharacter>(characters8(), length());
845	return WTF::isAllSpecialCharacters<isSpecialCharacter>(characters16(), length());
846	}
847
848	inline StringImpl::StringImpl(unsigned length, Force8Bit)
849	: StringImplShape (s_refCountIncrement, length, tailPointer<LChar>(), s_hashFlag8BitBuffer \| StringNormal \| BufferInternal)
850	{
851	ASSERT(m_data8);
852	ASSERT(m_length);
853
854	STRING_STATS_ADD_8BIT_STRING(m_length);
855	}
856
857	inline StringImpl::StringImpl(unsigned length)
858	: StringImplShape (s_refCountIncrement, length, tailPointer<UChar>(), StringNormal \| BufferInternal)
859	{
860	ASSERT(m_data16);
861	ASSERT(m_length);
862
863	STRING_STATS_ADD_16BIT_STRING(m_length);
864	}
865
866	inline StringImpl::StringImpl(MallocPtr<LChar> characters, unsigned length)
867	: StringImplShape (s_refCountIncrement, length, characters.leakPtr(), s_hashFlag8BitBuffer \| StringNormal \| BufferOwned)
868	{
869	ASSERT(m_data8);
870	ASSERT(m_length);
871
872	STRING_STATS_ADD_8BIT_STRING(m_length);
873	}
874
875	inline StringImpl::StringImpl(const UChar* characters, unsigned length, ConstructWithoutCopyingTag)
876	: StringImplShape (s_refCountIncrement, length, characters, StringNormal \| BufferInternal)
877	{
878	ASSERT(m_data16);
879	ASSERT(m_length);
880
881	STRING_STATS_ADD_16BIT_STRING(m_length);
882	}
883
884	inline StringImpl::StringImpl(const LChar* characters, unsigned length, ConstructWithoutCopyingTag)
885	: StringImplShape (s_refCountIncrement, length, characters, s_hashFlag8BitBuffer \| StringNormal \| BufferInternal)
886	{
887	ASSERT(m_data8);
888	ASSERT(m_length);
889
890	STRING_STATS_ADD_8BIT_STRING(m_length);
891	}
892
893	inline StringImpl::StringImpl(MallocPtr<UChar> characters, unsigned length)
894	: StringImplShape (s_refCountIncrement, length, characters.leakPtr(), StringNormal \| BufferOwned)
895	{
896	ASSERT(m_data16);
897	ASSERT(m_length);
898
899	STRING_STATS_ADD_16BIT_STRING(m_length);
900	}
901
902	inline StringImpl::StringImpl(const LChar* characters, unsigned length, Ref<StringImpl>&& base)
903	: StringImplShape (s_refCountIncrement, length, characters, s_hashFlag8BitBuffer \| StringNormal \| BufferSubstring)
904	{
905	ASSERT(is8Bit());
906	ASSERT(m_data8);
907	ASSERT(m_length);
908	ASSERT(base->bufferOwnership() != BufferSubstring);
909
910	substringBuffer() = &base.leakRef();
911
912	STRING_STATS_ADD_8BIT_STRING2(m_length, true);
913	}
914
915	inline StringImpl::StringImpl(const UChar* characters, unsigned length, Ref<StringImpl>&& base)
916	: StringImplShape (s_refCountIncrement, length, characters, StringNormal \| BufferSubstring)
917	{
918	ASSERT(!is8Bit());
919	ASSERT(m_data16);
920	ASSERT(m_length);
921	ASSERT(base->bufferOwnership() != BufferSubstring);
922
923	substringBuffer() = &base.leakRef();
924
925	STRING_STATS_ADD_16BIT_STRING2(m_length, true);
926	}
927
928	template<size_t inlineCapacity> inline Ref<StringImpl> StringImpl::create8BitIfPossible(const Vector<UChar, inlineCapacity>& vector)
929	{
930	return create8BitIfPossible(vector.data(), vector.size());
931	}
932
933	ALWAYS_INLINE Ref<StringImpl> StringImpl::createSubstringSharingImpl(StringImpl& rep, unsigned offset, unsigned length)
934	{
935	ASSERT(length <= rep.length());
936
937	if (!length)
938	return *empty();
939
940	// Coyping the thing would save more memory sometimes, largely due to the size of pointer.
941	size_t substringSize = allocationSize<StringImpl*>(`1`);
942	if (rep.is8Bit()) {
943	if (substringSize >= allocationSize<LChar>(length))
944	return create(rep.m_data8 + offset, length);
945	} else {
946	if (substringSize >= allocationSize<UChar>(length))
947	return create(rep.m_data16 + offset, length);
948	}
949
950	auto* ownerRep = ((rep.bufferOwnership() == BufferSubstring) ? rep.substringBuffer() : &rep);
951
952	// We allocate a buffer that contains both the StringImpl struct as well as the pointer to the owner string.
953	auto* stringImpl = static_cast<StringImpl*>(fastMalloc(substringSize));
954	if (rep.is8Bit())
955	return adoptRef(*new (NotNull, stringImpl) StringImpl (rep.m_data8 + offset, length, *ownerRep));
956	return adoptRef(*new (NotNull, stringImpl) StringImpl (rep.m_data16 + offset, length, *ownerRep));
957	}
958
959	template<unsigned characterCount> ALWAYS_INLINE Ref<StringImpl> StringImpl::createFromLiteral(const char (&characters)[characterCount])
960	{
961	COMPILE_ASSERT(characterCount > `1`, StringImplFromLiteralNotEmpty);
962	COMPILE_ASSERT((characterCount - `1` <= ((unsigned(~`0`) - sizeof(StringImpl)) / sizeof(LChar))), StringImplFromLiteralCannotOverflow);
963
964	return createWithoutCopying(reinterpret_cast<const LChar*>(characters), characterCount - `1`);
965	}
966
967	template<typename CharacterType> ALWAYS_INLINE RefPtr<StringImpl> StringImpl::tryCreateUninitialized(unsigned length, CharacterType*& output)
968	{
969	if (!length) {
970	output = nullptr;
971	return empty();
972	}
973
974	if (length > maxInternalLength<CharacterType>()) {
975	output = nullptr;
976	return nullptr;
977	}
978	StringImpl* result;
979	if (!tryFastMalloc(allocationSize<CharacterType>(length)).getValue(result)) {
980	output = nullptr;
981	return nullptr;
982	}
983	output = result->tailPointer<CharacterType>();
984
985	return constructInternal<CharacterType>(*result, length);
986	}
987
988	template<typename CharacterType, size_t inlineCapacity, typename OverflowHandler, size_t minCapacity>
989	inline Ref<StringImpl> StringImpl::adopt(Vector<CharacterType, inlineCapacity, OverflowHandler, minCapacity>&& vector)
990	{
991	if (size_t size = vector.size()) {
992	ASSERT(vector.data());
993	if (size > MaxLength)
994	CRASH();
995	return adoptRef(*new StringImpl(vector.releaseBuffer(), size));
996	}
997	return *empty();
998	}
999
1000	inline size_t StringImpl::cost() const
1001	{
1002	// For substrings, return the cost of the base string.
1003	if (bufferOwnership() == BufferSubstring)
1004	return substringBuffer()->cost();
1005
1006	// Note: we must not alter the m_hashAndFlags field in instances of StaticStringImpl.
1007	// We ensure this by pre-setting the s_hashFlagDidReportCost bit in all instances of
1008	// StaticStringImpl. As a result, StaticStringImpl instances will always return a cost of
1009	// 0 here and avoid modifying m_hashAndFlags.
1010	if (m_hashAndFlags & s_hashFlagDidReportCost)
1011	return `0`;
1012
1013	m_hashAndFlags \|= s_hashFlagDidReportCost;
1014	size_t result = m_length;
1015	if (!is8Bit())
1016	result <<= `1`;
1017	return result;
1018	}
1019
1020	inline size_t StringImpl::costDuringGC()
1021	{
1022	if (isStatic())
1023	return `0`;
1024
1025	if (bufferOwnership() == BufferSubstring)
1026	return divideRoundedUp(substringBuffer()->costDuringGC(), refCount());
1027
1028	size_t result = m_length;
1029	if (!is8Bit())
1030	result <<= `1`;
1031	return divideRoundedUp(result, refCount());
1032	}
1033
1034	inline void StringImpl::setIsAtomic(bool isAtom)
1035	{
1036	ASSERT(!isStatic());
1037	ASSERT(!isSymbol());
1038	if (isAtom)
1039	m_hashAndFlags \|= s_hashFlagStringKindIsAtomic;
1040	else
1041	m_hashAndFlags &= ~s_hashFlagStringKindIsAtomic;
1042	}
1043
1044	inline void StringImpl::setHash(unsigned hash) const
1045	{
1046	// The high bits of 'hash' are always empty, but we prefer to store our flags
1047	// in the low bits because it makes them slightly more efficient to access.
1048	// So, we shift left and right when setting and getting our hash code.
1049
1050	ASSERT(!hasHash());
1051	ASSERT(!isStatic());
1052	// Multiple clients assume that StringHasher is the canonical string hash function.
1053	ASSERT(hash == (is8Bit() ? StringHasher::computeHashAndMaskTop8Bits(m_data8, m_length) : StringHasher::computeHashAndMaskTop8Bits(m_data16, m_length)));
1054	ASSERT(!(hash & (s_flagMask << (`8` * sizeof(hash) - s_flagCount)))); // Verify that enough high bits are empty.
1055
1056	hash <<= s_flagCount;
1057	ASSERT(!(hash & m_hashAndFlags)); // Verify that enough low bits are empty after shift.
1058	ASSERT(hash); // Verify that 0 is a valid sentinel hash value.
1059
1060	m_hashAndFlags \|= hash; // Store hash with flags in low bits.
1061	}
1062
1063	inline void StringImpl::ref()
1064	{
1065	STRING_STATS_REF_STRING(*this);
1066
1067	m_refCount += s_refCountIncrement;
1068	}
1069
1070	inline void StringImpl::deref()
1071	{
1072	STRING_STATS_DEREF_STRING(*this);
1073
1074	unsigned tempRefCount = m_refCount - s_refCountIncrement;
1075	if (!tempRefCount) {
1076	StringImpl::destroy(this);
1077	return;
1078	}
1079	m_refCount = tempRefCount;
1080	}
1081
1082	template<typename CharacterType> inline void StringImpl::copyCharacters(CharacterType* destination, const CharacterType* source, unsigned numCharacters)
1083	{
1084	if (numCharacters == `1`) {
1085	destination = source;
1086	return;
1087	}
1088	memcpy(destination, source, numCharacters * sizeof(CharacterType));
1089	}
1090
1091	ALWAYS_INLINE void StringImpl::copyCharacters(UChar* destination, const LChar* source, unsigned numCharacters)
1092	{
1093	for (unsigned i = `0`; i < numCharacters; ++i)
1094	destination[i] = source[i];
1095	}
1096
1097	inline UChar StringImpl::at(unsigned i) const
1098	{
1099	ASSERT_WITH_SECURITY_IMPLICATION(i < m_length);
1100	return is8Bit() ? m_data8[i] : m_data16[i];
1101	}
1102
1103	inline StringImpl::StringImpl(CreateSymbolTag, const LChar* characters, unsigned length)
1104	: StringImplShape (s_refCountIncrement, length, characters, s_hashFlag8BitBuffer \| StringSymbol \| BufferSubstring)
1105	{
1106	ASSERT(is8Bit());
1107	ASSERT(m_data8);
1108	STRING_STATS_ADD_8BIT_STRING2(m_length, true);
1109	}
1110
1111	inline StringImpl::StringImpl(CreateSymbolTag, const UChar* characters, unsigned length)
1112	: StringImplShape (s_refCountIncrement, length, characters, StringSymbol \| BufferSubstring)
1113	{
1114	ASSERT(!is8Bit());
1115	ASSERT(m_data16);
1116	STRING_STATS_ADD_16BIT_STRING2(m_length, true);
1117	}
1118
1119	inline StringImpl::StringImpl(CreateSymbolTag)
1120	: StringImplShape (s_refCountIncrement, `0`, empty()->characters8(), s_hashFlag8BitBuffer \| StringSymbol \| BufferSubstring)
1121	{
1122	ASSERT(is8Bit());
1123	ASSERT(m_data8);
1124	STRING_STATS_ADD_8BIT_STRING2(m_length, true);
1125	}
1126
1127	template<typename T> inline size_t StringImpl::allocationSize(Checked<size_t> tailElementCount)
1128	{
1129	return (tailOffset<T>() + tailElementCount * sizeof(T)).unsafeGet();
1130	}
1131
1132	template<typename CharacterType>
1133	inline size_t StringImpl::maxInternalLength()
1134	{
1135	// In order to not overflow the unsigned length, the check for (std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) is needed when sizeof(CharacterType) == 2.
1136	return std::min(static_cast<size_t>(MaxLength), (std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(CharacterType));
1137	}
1138
1139	template<typename T> inline size_t StringImpl::tailOffset()
1140	{
1141	#if COMPILER(MSVC)
1142	// MSVC doesn't support alignof yet.
1143	return roundUpToMultipleOf<sizeof(T)>(sizeof(StringImpl));
1144	#else
1145	return roundUpToMultipleOf<alignof(T)>(offsetof(StringImpl, m_hashAndFlags) + sizeof(StringImpl::m_hashAndFlags));
1146	#endif
1147	}
1148
1149	inline bool StringImpl::requiresCopy() const
1150	{
1151	if (bufferOwnership() != BufferInternal)
1152	return true;
1153
1154	if (is8Bit())
1155	return m_data8 == tailPointer<LChar>();
1156	return m_data16 == tailPointer<UChar>();
1157	}
1158
1159	template<typename T> inline const T* StringImpl::tailPointer() const
1160	{
1161	return reinterpret_cast_ptr<const T>(reinterpret_cast<const* uint8_t>(this*) + tailOffset<T>());
1162	}
1163
1164	template<typename T> inline T* StringImpl::tailPointer()
1165	{
1166	return reinterpret_cast_ptr<T>(reinterpret_cast<uint8_t>(this) + tailOffset<T>());
1167	}
1168
1169	inline StringImpl* const& StringImpl::substringBuffer() const
1170	{
1171	ASSERT(bufferOwnership() == BufferSubstring);
1172
1173	return tailPointer<StringImpl>();
1174	}
1175
1176	inline StringImpl*& StringImpl::substringBuffer()
1177	{
1178	ASSERT(bufferOwnership() == BufferSubstring);
1179
1180	return tailPointer<StringImpl>();
1181	}
1182
1183	inline void StringImpl::assertHashIsCorrect() const
1184	{
1185	ASSERT(existingHash() == StringHasher::computeHashAndMaskTop8Bits(characters8(), length()));
1186	}
1187
1188	template<unsigned characterCount> constexpr StringImpl::StaticStringImpl::StaticStringImpl(const char (&characters)[characterCount], StringKind stringKind)
1189	: StringImplShape(s_refCountFlagIsStaticString, characterCount - `1`, characters,
1190	s_hashFlag8BitBuffer \| s_hashFlagDidReportCost \| stringKind \| BufferInternal \| (StringHasher::computeLiteralHashAndMaskTop8Bits(characters) << s_flagCount), ConstructWithConstExpr)
1191	{
1192	}
1193
1194	template<unsigned characterCount> constexpr StringImpl::StaticStringImpl::StaticStringImpl(const char16_t (&characters)[characterCount], StringKind stringKind)
1195	: StringImplShape(s_refCountFlagIsStaticString, characterCount - `1`, characters,
1196	s_hashFlagDidReportCost \| stringKind \| BufferInternal \| (StringHasher::computeLiteralHashAndMaskTop8Bits(characters) << s_flagCount), ConstructWithConstExpr)
1197	{
1198	}
1199
1200	inline StringImpl::StaticStringImpl::operator StringImpl&()
1201	{
1202	return *reinterpret_cast<StringImpl>(this*);
1203	}
1204
1205	inline bool equalIgnoringASCIICase(const StringImpl& a, const StringImpl& b)
1206	{
1207	return equalIgnoringASCIICaseCommon(a, b);
1208	}
1209
1210	inline bool equalIgnoringASCIICase(const StringImpl& a, const char* b)
1211	{
1212	return equalIgnoringASCIICaseCommon(a, b);
1213	}
1214
1215	inline bool equalIgnoringASCIICase(const StringImpl* a, const char* b)
1216	{
1217	return a && equalIgnoringASCIICase(*a, b);
1218	}
1219
1220	template<unsigned length> inline bool startsWithLettersIgnoringASCIICase(const StringImpl& string, const char (&lowercaseLetters)[length])
1221	{
1222	return startsWithLettersIgnoringASCIICaseCommon(string, lowercaseLetters);
1223	}
1224
1225	template<unsigned length> inline bool startsWithLettersIgnoringASCIICase(const StringImpl* string, const char (&lowercaseLetters)[length])
1226	{
1227	return string && startsWithLettersIgnoringASCIICase(*string, lowercaseLetters);
1228	}
1229
1230	template<unsigned length> inline bool equalLettersIgnoringASCIICase(const StringImpl& string, const char (&lowercaseLetters)[length])
1231	{
1232	return equalLettersIgnoringASCIICaseCommon(string, lowercaseLetters);
1233	}
1234
1235	template<unsigned length> inline bool equalLettersIgnoringASCIICase(const StringImpl* string, const char (&lowercaseLetters)[length])
1236	{
1237	return string && equalLettersIgnoringASCIICase(*string, lowercaseLetters);
1238	}
1239
1240	} // namespace WTF
1241
1242	using WTF::StaticStringImpl;
1243	using WTF::StringImpl;
1244	using WTF::equal;
1245	using WTF::isLatin1;
1246

Browse the source code of webcore/DerivedSources/ForwardingHeaders/wtf/text/StringImpl.h