1/*
2 * Copyright (C) 1999 Lars Knoll ([email protected])
3 * Copyright (C) 2005-2018 Apple Inc. All rights reserved.
4 * Copyright (C) 2009 Google Inc. All rights reserved.
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
15 *
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
20 *
21 */
22
23#pragma once
24
25#include <limits.h>
26#include <unicode/ustring.h>
27#include <wtf/ASCIICType.h>
28#include <wtf/CheckedArithmetic.h>
29#include <wtf/Expected.h>
30#include <wtf/MathExtras.h>
31#include <wtf/StdLibExtras.h>
32#include <wtf/Vector.h>
33#include <wtf/text/ASCIIFastPath.h>
34#include <wtf/text/ConversionMode.h>
35#include <wtf/text/StringCommon.h>
36#include <wtf/text/StringHasher.h>
37#include <wtf/text/UTF8ConversionError.h>
38
39#if USE(CF)
40typedef const struct __CFString * CFStringRef;
41#endif
42
43#ifdef __OBJC__
44@class NSString;
45#endif
46
47namespace JSC {
48namespace LLInt { class Data; }
49class LLIntOffsetsExtractor;
50}
51
52namespace WTF {
53
54class SymbolImpl;
55class SymbolRegistry;
56
57struct CStringTranslator;
58struct HashAndUTF8CharactersTranslator;
59struct LCharBufferTranslator;
60struct StringHash;
61struct SubstringTranslator;
62struct UCharBufferTranslator;
63
64template<typename> class RetainPtr;
65
66template<typename> struct BufferFromStaticDataTranslator;
67template<typename> struct HashAndCharactersTranslator;
68
69// Define STRING_STATS to 1 turn on runtime statistics of string sizes and memory usage.
70#define STRING_STATS 0
71
72template<bool isSpecialCharacter(UChar), typename CharacterType> bool isAllSpecialCharacters(const CharacterType*, size_t length);
73
74#if STRING_STATS
75
76struct StringStats {
77 void add8BitString(unsigned length, bool isSubString = false)
78 {
79 ++m_totalNumberStrings;
80 ++m_number8BitStrings;
81 if (!isSubString)
82 m_total8BitData += length;
83 }
84
85 void add16BitString(unsigned length, bool isSubString = false)
86 {
87 ++m_totalNumberStrings;
88 ++m_number16BitStrings;
89 if (!isSubString)
90 m_total16BitData += length;
91 }
92
93 void removeString(StringImpl&);
94 void printStats();
95
96 static const unsigned s_printStringStatsFrequency = 5000;
97 static std::atomic<unsigned> s_stringRemovesTillPrintStats;
98
99 std::atomic<unsigned> m_refCalls;
100 std::atomic<unsigned> m_derefCalls;
101
102 std::atomic<unsigned> m_totalNumberStrings;
103 std::atomic<unsigned> m_number8BitStrings;
104 std::atomic<unsigned> m_number16BitStrings;
105 std::atomic<unsigned long long> m_total8BitData;
106 std::atomic<unsigned long long> m_total16BitData;
107};
108
109#define STRING_STATS_ADD_8BIT_STRING(length) StringImpl::stringStats().add8BitString(length)
110#define STRING_STATS_ADD_8BIT_STRING2(length, isSubString) StringImpl::stringStats().add8BitString(length, isSubString)
111#define STRING_STATS_ADD_16BIT_STRING(length) StringImpl::stringStats().add16BitString(length)
112#define STRING_STATS_ADD_16BIT_STRING2(length, isSubString) StringImpl::stringStats().add16BitString(length, isSubString)
113#define STRING_STATS_REMOVE_STRING(string) StringImpl::stringStats().removeString(string)
114#define STRING_STATS_REF_STRING(string) ++StringImpl::stringStats().m_refCalls;
115#define STRING_STATS_DEREF_STRING(string) ++StringImpl::stringStats().m_derefCalls;
116
117#else
118
119#define STRING_STATS_ADD_8BIT_STRING(length) ((void)0)
120#define STRING_STATS_ADD_8BIT_STRING2(length, isSubString) ((void)0)
121#define STRING_STATS_ADD_16BIT_STRING(length) ((void)0)
122#define STRING_STATS_ADD_16BIT_STRING2(length, isSubString) ((void)0)
123#define STRING_STATS_ADD_UPCONVERTED_STRING(length) ((void)0)
124#define STRING_STATS_REMOVE_STRING(string) ((void)0)
125#define STRING_STATS_REF_STRING(string) ((void)0)
126#define STRING_STATS_DEREF_STRING(string) ((void)0)
127
128#endif
129
130template<typename CharacterType> inline bool isLatin1(CharacterType character)
131{
132 using UnsignedCharacterType = typename std::make_unsigned<CharacterType>::type;
133 return static_cast<UnsignedCharacterType>(character) <= static_cast<UnsignedCharacterType>(0xFF);
134}
135
136class StringImplShape {
137 WTF_MAKE_NONCOPYABLE(StringImplShape);
138public:
139 static constexpr unsigned MaxLength = std::numeric_limits<int32_t>::max();
140
141protected:
142 StringImplShape(unsigned refCount, unsigned length, const LChar*, unsigned hashAndFlags);
143 StringImplShape(unsigned refCount, unsigned length, const UChar*, unsigned hashAndFlags);
144
145 enum ConstructWithConstExprTag { ConstructWithConstExpr };
146 template<unsigned characterCount> constexpr StringImplShape(unsigned refCount, unsigned length, const char (&characters)[characterCount], unsigned hashAndFlags, ConstructWithConstExprTag);
147 template<unsigned characterCount> constexpr StringImplShape(unsigned refCount, unsigned length, const char16_t (&characters)[characterCount], unsigned hashAndFlags, ConstructWithConstExprTag);
148
149 unsigned m_refCount;
150 unsigned m_length;
151 union {
152 const LChar* m_data8;
153 const UChar* m_data16;
154 // It seems that reinterpret_cast prevents constexpr's compile time initialization in VC++.
155 // These are needed to avoid reinterpret_cast.
156 const char* m_data8Char;
157 const char16_t* m_data16Char;
158 };
159 mutable unsigned m_hashAndFlags;
160};
161
162// FIXME: Use of StringImpl and const is rather confused.
163// The actual string inside a StringImpl is immutable, so you can't modify a string using a StringImpl&.
164// We could mark every member function const and always use "const StringImpl&" and "const StringImpl*".
165// Or we could say that "const" doesn't make sense at all and use "StringImpl&" and "StringImpl*" everywhere.
166// Right now we use a mix of both, which makes code more confusing and has no benefit.
167
168class StringImpl : private StringImplShape {
169 WTF_MAKE_NONCOPYABLE(StringImpl); WTF_MAKE_FAST_ALLOCATED;
170
171 friend class AtomStringImpl;
172 friend class JSC::LLInt::Data;
173 friend class JSC::LLIntOffsetsExtractor;
174 friend class PrivateSymbolImpl;
175 friend class RegisteredSymbolImpl;
176 friend class SymbolImpl;
177 friend class ExternalStringImpl;
178
179 friend struct WTF::CStringTranslator;
180 friend struct WTF::HashAndUTF8CharactersTranslator;
181 friend struct WTF::LCharBufferTranslator;
182 friend struct WTF::SubstringTranslator;
183 friend struct WTF::UCharBufferTranslator;
184
185 template<typename> friend struct WTF::BufferFromStaticDataTranslator;
186 template<typename> friend struct WTF::HashAndCharactersTranslator;
187
188public:
189 enum BufferOwnership { BufferInternal, BufferOwned, BufferSubstring, BufferExternal };
190
191 static constexpr unsigned MaxLength = StringImplShape::MaxLength;
192
193 // The bottom 6 bits in the hash are flags.
194 static constexpr const unsigned s_flagCount = 6;
195private:
196 static constexpr const unsigned s_flagMask = (1u << s_flagCount) - 1;
197 static_assert(s_flagCount <= StringHasher::flagCount, "StringHasher reserves enough bits for StringImpl flags");
198 static constexpr const unsigned s_flagStringKindCount = 4;
199
200 static constexpr const unsigned s_hashFlagStringKindIsAtomic = 1u << (s_flagStringKindCount);
201 static constexpr const unsigned s_hashFlagStringKindIsSymbol = 1u << (s_flagStringKindCount + 1);
202 static constexpr const unsigned s_hashMaskStringKind = s_hashFlagStringKindIsAtomic | s_hashFlagStringKindIsSymbol;
203 static constexpr const unsigned s_hashFlagDidReportCost = 1u << 3;
204 static constexpr const unsigned s_hashFlag8BitBuffer = 1u << 2;
205 static constexpr const unsigned s_hashMaskBufferOwnership = (1u << 0) | (1u << 1);
206
207 enum StringKind {
208 StringNormal = 0u, // non-symbol, non-atomic
209 StringAtomic = s_hashFlagStringKindIsAtomic, // non-symbol, atomic
210 StringSymbol = s_hashFlagStringKindIsSymbol, // symbol, non-atomic
211 };
212
213 // Create a normal 8-bit string with internal storage (BufferInternal).
214 enum Force8Bit { Force8BitConstructor };
215 StringImpl(unsigned length, Force8Bit);
216
217 // Create a normal 16-bit string with internal storage (BufferInternal).
218 explicit StringImpl(unsigned length);
219
220 // Create a StringImpl adopting ownership of the provided buffer (BufferOwned).
221 StringImpl(MallocPtr<LChar>, unsigned length);
222 StringImpl(MallocPtr<UChar>, unsigned length);
223 enum ConstructWithoutCopyingTag { ConstructWithoutCopying };
224 StringImpl(const UChar*, unsigned length, ConstructWithoutCopyingTag);
225 StringImpl(const LChar*, unsigned length, ConstructWithoutCopyingTag);
226
227 // Used to create new strings that are a substring of an existing StringImpl (BufferSubstring).
228 StringImpl(const LChar*, unsigned length, Ref<StringImpl>&&);
229 StringImpl(const UChar*, unsigned length, Ref<StringImpl>&&);
230
231public:
232 WTF_EXPORT_PRIVATE static void destroy(StringImpl*);
233
234 WTF_EXPORT_PRIVATE static Ref<StringImpl> create(const UChar*, unsigned length);
235 WTF_EXPORT_PRIVATE static Ref<StringImpl> create(const LChar*, unsigned length);
236 WTF_EXPORT_PRIVATE static Ref<StringImpl> create8BitIfPossible(const UChar*, unsigned length);
237 template<size_t inlineCapacity> static Ref<StringImpl> create8BitIfPossible(const Vector<UChar, inlineCapacity>&);
238 WTF_EXPORT_PRIVATE static Ref<StringImpl> create8BitIfPossible(const UChar*);
239
240 ALWAYS_INLINE static Ref<StringImpl> create(const char* characters, unsigned length) { return create(reinterpret_cast<const LChar*>(characters), length); }
241 WTF_EXPORT_PRIVATE static Ref<StringImpl> create(const LChar*);
242 ALWAYS_INLINE static Ref<StringImpl> create(const char* string) { return create(reinterpret_cast<const LChar*>(string)); }
243
244 static Ref<StringImpl> createSubstringSharingImpl(StringImpl&, unsigned offset, unsigned length);
245
246 template<unsigned characterCount> static Ref<StringImpl> createFromLiteral(const char (&)[characterCount]);
247
248 // FIXME: Replace calls to these overloads of createFromLiteral to createWithoutCopying instead.
249 WTF_EXPORT_PRIVATE static Ref<StringImpl> createFromLiteral(const char*, unsigned length);
250 WTF_EXPORT_PRIVATE static Ref<StringImpl> createFromLiteral(const char*);
251
252 WTF_EXPORT_PRIVATE static Ref<StringImpl> createWithoutCopying(const UChar*, unsigned length);
253 WTF_EXPORT_PRIVATE static Ref<StringImpl> createWithoutCopying(const LChar*, unsigned length);
254 WTF_EXPORT_PRIVATE static Ref<StringImpl> createUninitialized(unsigned length, LChar*&);
255 WTF_EXPORT_PRIVATE static Ref<StringImpl> createUninitialized(unsigned length, UChar*&);
256 template<typename CharacterType> static RefPtr<StringImpl> tryCreateUninitialized(unsigned length, CharacterType*&);
257
258 // Reallocate the StringImpl. The originalString must be only owned by the Ref,
259 // and the buffer ownership must be BufferInternal. Just like the input pointer of realloc(),
260 // the originalString can't be used after this function.
261 static Ref<StringImpl> reallocate(Ref<StringImpl>&& originalString, unsigned length, LChar*& data);
262 static Ref<StringImpl> reallocate(Ref<StringImpl>&& originalString, unsigned length, UChar*& data);
263 static Expected<Ref<StringImpl>, UTF8ConversionError> tryReallocate(Ref<StringImpl>&& originalString, unsigned length, LChar*& data);
264 static Expected<Ref<StringImpl>, UTF8ConversionError> tryReallocate(Ref<StringImpl>&& originalString, unsigned length, UChar*& data);
265
266 static unsigned flagsOffset() { return OBJECT_OFFSETOF(StringImpl, m_hashAndFlags); }
267 static constexpr unsigned flagIs8Bit() { return s_hashFlag8BitBuffer; }
268 static constexpr unsigned flagIsAtomic() { return s_hashFlagStringKindIsAtomic; }
269 static constexpr unsigned flagIsSymbol() { return s_hashFlagStringKindIsSymbol; }
270 static constexpr unsigned maskStringKind() { return s_hashMaskStringKind; }
271 static unsigned dataOffset() { return OBJECT_OFFSETOF(StringImpl, m_data8); }
272
273 template<typename CharacterType, size_t inlineCapacity, typename OverflowHandler, size_t minCapacity>
274 static Ref<StringImpl> adopt(Vector<CharacterType, inlineCapacity, OverflowHandler, minCapacity>&&);
275
276 WTF_EXPORT_PRIVATE static Ref<StringImpl> adopt(StringBuffer<UChar>&&);
277 WTF_EXPORT_PRIVATE static Ref<StringImpl> adopt(StringBuffer<LChar>&&);
278
279 unsigned length() const { return m_length; }
280 static ptrdiff_t lengthMemoryOffset() { return OBJECT_OFFSETOF(StringImpl, m_length); }
281 bool isEmpty() const { return !m_length; }
282
283 bool is8Bit() const { return m_hashAndFlags & s_hashFlag8BitBuffer; }
284 ALWAYS_INLINE const LChar* characters8() const { ASSERT(is8Bit()); return m_data8; }
285 ALWAYS_INLINE const UChar* characters16() const { ASSERT(!is8Bit()); return m_data16; }
286
287 template<typename CharacterType> const CharacterType* characters() const;
288
289 size_t cost() const;
290 size_t costDuringGC();
291
292 WTF_EXPORT_PRIVATE size_t sizeInBytes() const;
293
294 bool isSymbol() const { return m_hashAndFlags & s_hashFlagStringKindIsSymbol; }
295 bool isAtom() const { return m_hashAndFlags & s_hashFlagStringKindIsAtomic; }
296 void setIsAtomic(bool);
297
298 bool isExternal() const { return bufferOwnership() == BufferExternal; }
299
300 bool isSubString() const { return bufferOwnership() == BufferSubstring; }
301
302 static WTF_EXPORT_PRIVATE Expected<CString, UTF8ConversionError> utf8ForCharacters(const LChar* characters, unsigned length);
303 static WTF_EXPORT_PRIVATE Expected<CString, UTF8ConversionError> utf8ForCharacters(const UChar* characters, unsigned length, ConversionMode = LenientConversion);
304
305 WTF_EXPORT_PRIVATE Expected<CString, UTF8ConversionError> tryGetUtf8ForRange(unsigned offset, unsigned length, ConversionMode = LenientConversion) const;
306 WTF_EXPORT_PRIVATE Expected<CString, UTF8ConversionError> tryGetUtf8(ConversionMode = LenientConversion) const;
307 WTF_EXPORT_PRIVATE CString utf8(ConversionMode = LenientConversion) const;
308
309private:
310 static WTF_EXPORT_PRIVATE UTF8ConversionError utf8Impl(const UChar* characters, unsigned length, char*& buffer, size_t bufferSize, ConversionMode);
311
312 // The high bits of 'hash' are always empty, but we prefer to store our flags
313 // in the low bits because it makes them slightly more efficient to access.
314 // So, we shift left and right when setting and getting our hash code.
315 void setHash(unsigned) const;
316
317 unsigned rawHash() const { return m_hashAndFlags >> s_flagCount; }
318
319public:
320 bool hasHash() const { return !!rawHash(); }
321
322 unsigned existingHash() const { ASSERT(hasHash()); return rawHash(); }
323 unsigned hash() const { return hasHash() ? rawHash() : hashSlowCase(); }
324
325 WTF_EXPORT_PRIVATE unsigned concurrentHash() const;
326
327 unsigned symbolAwareHash() const;
328 unsigned existingSymbolAwareHash() const;
329
330 bool isStatic() const { return m_refCount & s_refCountFlagIsStaticString; }
331
332 size_t refCount() const { return m_refCount / s_refCountIncrement; }
333 bool hasOneRef() const { return m_refCount == s_refCountIncrement; }
334 bool hasAtLeastOneRef() const { return m_refCount; } // For assertions.
335
336 void ref();
337 void deref();
338
339 class StaticStringImpl : private StringImplShape {
340 WTF_MAKE_NONCOPYABLE(StaticStringImpl);
341 public:
342 // Used to construct static strings, which have an special refCount that can never hit zero.
343 // This means that the static string will never be destroyed, which is important because
344 // static strings will be shared across threads & ref-counted in a non-threadsafe manner.
345 //
346 // In order to make StaticStringImpl thread safe, we also need to ensure that the rest of
347 // the fields are never mutated by threads. We have this guarantee because:
348 //
349 // 1. m_length is only set on construction and never mutated thereafter.
350 //
351 // 2. m_data8 and m_data16 are only set on construction and never mutated thereafter.
352 // We also know that a StringImpl never changes from 8 bit to 16 bit because there
353 // is no way to set/clear the s_hashFlag8BitBuffer flag other than at construction.
354 //
355 // 3. m_hashAndFlags will not be mutated by different threads because:
356 //
357 // a. StaticStringImpl's constructor sets the s_hashFlagDidReportCost flag to ensure
358 // that StringImpl::cost() returns early.
359 // This means StaticStringImpl costs are not counted. But since there should only
360 // be a finite set of StaticStringImpls, their cost can be aggregated into a single
361 // system cost if needed.
362 // b. setIsAtomic() is never called on a StaticStringImpl.
363 // setIsAtomic() asserts !isStatic().
364 // c. setHash() is never called on a StaticStringImpl.
365 // StaticStringImpl's constructor sets the hash on construction.
366 // StringImpl::hash() only sets a new hash iff !hasHash().
367 // Additionally, StringImpl::setHash() asserts hasHash() and !isStatic().
368
369 template<unsigned characterCount> constexpr StaticStringImpl(const char (&characters)[characterCount], StringKind = StringNormal);
370 template<unsigned characterCount> constexpr StaticStringImpl(const char16_t (&characters)[characterCount], StringKind = StringNormal);
371 operator StringImpl&();
372 };
373
374 WTF_EXPORT_PRIVATE static StaticStringImpl s_atomicEmptyString;
375 ALWAYS_INLINE static StringImpl* empty() { return reinterpret_cast<StringImpl*>(&s_atomicEmptyString); }
376
377 // FIXME: Does this really belong in StringImpl?
378 template<typename CharacterType> static void copyCharacters(CharacterType* destination, const CharacterType* source, unsigned numCharacters);
379 static void copyCharacters(UChar* destination, const LChar* source, unsigned numCharacters);
380
381 // Some string features, like reference counting and the atomicity flag, are not
382 // thread-safe. We achieve thread safety by isolation, giving each thread
383 // its own copy of the string.
384 Ref<StringImpl> isolatedCopy() const;
385
386 WTF_EXPORT_PRIVATE Ref<StringImpl> substring(unsigned position, unsigned length = MaxLength);
387
388 UChar at(unsigned) const;
389 UChar operator[](unsigned i) const { return at(i); }
390 WTF_EXPORT_PRIVATE UChar32 characterStartingAt(unsigned);
391
392 int toIntStrict(bool* ok = 0, int base = 10);
393 unsigned toUIntStrict(bool* ok = 0, int base = 10);
394 int64_t toInt64Strict(bool* ok = 0, int base = 10);
395 uint64_t toUInt64Strict(bool* ok = 0, int base = 10);
396 intptr_t toIntPtrStrict(bool* ok = 0, int base = 10);
397
398 WTF_EXPORT_PRIVATE int toInt(bool* ok = 0); // ignores trailing garbage
399 unsigned toUInt(bool* ok = 0); // ignores trailing garbage
400 int64_t toInt64(bool* ok = 0); // ignores trailing garbage
401 uint64_t toUInt64(bool* ok = 0); // ignores trailing garbage
402 intptr_t toIntPtr(bool* ok = 0); // ignores trailing garbage
403
404 // FIXME: Like the strict functions above, these give false for "ok" when there is trailing garbage.
405 // Like the non-strict functions above, these return the value when there is trailing garbage.
406 // It would be better if these were more consistent with the above functions instead.
407 double toDouble(bool* ok = 0);
408 float toFloat(bool* ok = 0);
409
410 WTF_EXPORT_PRIVATE Ref<StringImpl> convertToASCIILowercase();
411 WTF_EXPORT_PRIVATE Ref<StringImpl> convertToASCIIUppercase();
412 WTF_EXPORT_PRIVATE Ref<StringImpl> convertToLowercaseWithoutLocale();
413 WTF_EXPORT_PRIVATE Ref<StringImpl> convertToLowercaseWithoutLocaleStartingAtFailingIndex8Bit(unsigned);
414 WTF_EXPORT_PRIVATE Ref<StringImpl> convertToUppercaseWithoutLocale();
415 WTF_EXPORT_PRIVATE Ref<StringImpl> convertToLowercaseWithLocale(const AtomString& localeIdentifier);
416 WTF_EXPORT_PRIVATE Ref<StringImpl> convertToUppercaseWithLocale(const AtomString& localeIdentifier);
417
418 Ref<StringImpl> foldCase();
419
420 Ref<StringImpl> stripWhiteSpace();
421 WTF_EXPORT_PRIVATE Ref<StringImpl> simplifyWhiteSpace();
422 Ref<StringImpl> simplifyWhiteSpace(CodeUnitMatchFunction);
423
424 Ref<StringImpl> stripLeadingAndTrailingCharacters(CodeUnitMatchFunction);
425 Ref<StringImpl> removeCharacters(CodeUnitMatchFunction);
426
427 bool isAllASCII() const;
428 bool isAllLatin1() const;
429 template<bool isSpecialCharacter(UChar)> bool isAllSpecialCharacters() const;
430
431 size_t find(LChar character, unsigned start = 0);
432 size_t find(char character, unsigned start = 0);
433 size_t find(UChar character, unsigned start = 0);
434 WTF_EXPORT_PRIVATE size_t find(CodeUnitMatchFunction, unsigned index = 0);
435 size_t find(const LChar*, unsigned index = 0);
436 ALWAYS_INLINE size_t find(const char* string, unsigned index = 0) { return find(reinterpret_cast<const LChar*>(string), index); }
437 WTF_EXPORT_PRIVATE size_t find(StringImpl*);
438 WTF_EXPORT_PRIVATE size_t find(StringImpl*, unsigned index);
439 WTF_EXPORT_PRIVATE size_t findIgnoringASCIICase(const StringImpl&) const;
440 WTF_EXPORT_PRIVATE size_t findIgnoringASCIICase(const StringImpl&, unsigned startOffset) const;
441 WTF_EXPORT_PRIVATE size_t findIgnoringASCIICase(const StringImpl*) const;
442 WTF_EXPORT_PRIVATE size_t findIgnoringASCIICase(const StringImpl*, unsigned startOffset) const;
443
444 WTF_EXPORT_PRIVATE size_t reverseFind(UChar, unsigned index = MaxLength);
445 WTF_EXPORT_PRIVATE size_t reverseFind(StringImpl*, unsigned index = MaxLength);
446
447 WTF_EXPORT_PRIVATE bool startsWith(const StringImpl*) const;
448 WTF_EXPORT_PRIVATE bool startsWith(const StringImpl&) const;
449 WTF_EXPORT_PRIVATE bool startsWithIgnoringASCIICase(const StringImpl*) const;
450 WTF_EXPORT_PRIVATE bool startsWithIgnoringASCIICase(const StringImpl&) const;
451 WTF_EXPORT_PRIVATE bool startsWith(UChar) const;
452 WTF_EXPORT_PRIVATE bool startsWith(const char*, unsigned matchLength) const;
453 template<unsigned matchLength> bool startsWith(const char (&prefix)[matchLength]) const { return startsWith(prefix, matchLength - 1); }
454 WTF_EXPORT_PRIVATE bool hasInfixStartingAt(const StringImpl&, unsigned startOffset) const;
455
456 WTF_EXPORT_PRIVATE bool endsWith(StringImpl*);
457 WTF_EXPORT_PRIVATE bool endsWith(StringImpl&);
458 WTF_EXPORT_PRIVATE bool endsWithIgnoringASCIICase(const StringImpl*) const;
459 WTF_EXPORT_PRIVATE bool endsWithIgnoringASCIICase(const StringImpl&) const;
460 WTF_EXPORT_PRIVATE bool endsWith(UChar) const;
461 WTF_EXPORT_PRIVATE bool endsWith(const char*, unsigned matchLength) const;
462 template<unsigned matchLength> bool endsWith(const char (&prefix)[matchLength]) const { return endsWith(prefix, matchLength - 1); }
463 WTF_EXPORT_PRIVATE bool hasInfixEndingAt(const StringImpl&, unsigned endOffset) const;
464
465 WTF_EXPORT_PRIVATE Ref<StringImpl> replace(UChar, UChar);
466 WTF_EXPORT_PRIVATE Ref<StringImpl> replace(UChar, StringImpl*);
467 ALWAYS_INLINE Ref<StringImpl> replace(UChar pattern, const char* replacement, unsigned replacementLength) { return replace(pattern, reinterpret_cast<const LChar*>(replacement), replacementLength); }
468 WTF_EXPORT_PRIVATE Ref<StringImpl> replace(UChar, const LChar*, unsigned replacementLength);
469 Ref<StringImpl> replace(UChar, const UChar*, unsigned replacementLength);
470 WTF_EXPORT_PRIVATE Ref<StringImpl> replace(StringImpl*, StringImpl*);
471 WTF_EXPORT_PRIVATE Ref<StringImpl> replace(unsigned index, unsigned length, StringImpl*);
472
473 WTF_EXPORT_PRIVATE UCharDirection defaultWritingDirection(bool* hasStrongDirectionality = nullptr);
474
475#if USE(CF)
476 RetainPtr<CFStringRef> createCFString();
477#endif
478
479#ifdef __OBJC__
480 WTF_EXPORT_PRIVATE operator NSString *();
481#endif
482
483#if STRING_STATS
484 ALWAYS_INLINE static StringStats& stringStats() { return m_stringStats; }
485#endif
486
487 BufferOwnership bufferOwnership() const { return static_cast<BufferOwnership>(m_hashAndFlags & s_hashMaskBufferOwnership); }
488
489 template<typename T> static size_t headerSize() { return tailOffset<T>(); }
490
491protected:
492 ~StringImpl();
493
494 // Used to create new symbol string that holds an existing [[Description]] string as a substring buffer (BufferSubstring).
495 enum CreateSymbolTag { CreateSymbol };
496 StringImpl(CreateSymbolTag, const LChar*, unsigned length);
497 StringImpl(CreateSymbolTag, const UChar*, unsigned length);
498
499 // Null symbol.
500 explicit StringImpl(CreateSymbolTag);
501
502private:
503 template<typename> static size_t allocationSize(Checked<size_t> tailElementCount);
504 template<typename> static size_t maxInternalLength();
505 template<typename> static size_t tailOffset();
506
507 bool requiresCopy() const;
508 template<typename T> const T* tailPointer() const;
509 template<typename T> T* tailPointer();
510 StringImpl* const& substringBuffer() const;
511 StringImpl*& substringBuffer();
512
513 enum class CaseConvertType { Upper, Lower };
514 template<CaseConvertType, typename CharacterType> static Ref<StringImpl> convertASCIICase(StringImpl&, const CharacterType*, unsigned);
515
516 template<class CodeUnitPredicate> Ref<StringImpl> stripMatchedCharacters(CodeUnitPredicate);
517 template<typename CharacterType> ALWAYS_INLINE Ref<StringImpl> removeCharacters(const CharacterType* characters, CodeUnitMatchFunction);
518 template<typename CharacterType, class CodeUnitPredicate> Ref<StringImpl> simplifyMatchedCharactersToSpace(CodeUnitPredicate);
519 template<typename CharacterType> static Ref<StringImpl> constructInternal(StringImpl&, unsigned);
520 template<typename CharacterType> static Ref<StringImpl> createUninitializedInternal(unsigned, CharacterType*&);
521 template<typename CharacterType> static Ref<StringImpl> createUninitializedInternalNonEmpty(unsigned, CharacterType*&);
522 template<typename CharacterType> static Expected<Ref<StringImpl>, UTF8ConversionError> reallocateInternal(Ref<StringImpl>&&, unsigned, CharacterType*&);
523 template<typename CharacterType> static Ref<StringImpl> createInternal(const CharacterType*, unsigned);
524 WTF_EXPORT_PRIVATE NEVER_INLINE unsigned hashSlowCase() const;
525
526 // The bottom bit in the ref count indicates a static (immortal) string.
527 static const unsigned s_refCountFlagIsStaticString = 0x1;
528 static const unsigned s_refCountIncrement = 0x2; // This allows us to ref / deref without disturbing the static string flag.
529
530#if STRING_STATS
531 WTF_EXPORT_PRIVATE static StringStats m_stringStats;
532#endif
533
534public:
535 void assertHashIsCorrect() const;
536};
537
538using StaticStringImpl = StringImpl::StaticStringImpl;
539
540static_assert(sizeof(StringImpl) == sizeof(StaticStringImpl), "");
541
542#if !ASSERT_DISABLED
543
544// StringImpls created from StaticStringImpl will ASSERT in the generic ValueCheck<T>::checkConsistency
545// as they are not allocated by fastMalloc. We don't currently have any way to detect that case
546// so we ignore the consistency check for all StringImpl*.
547template<> struct ValueCheck<StringImpl*> {
548 static void checkConsistency(const StringImpl*) { }
549};
550
551#endif
552
553WTF_EXPORT_PRIVATE bool equal(const StringImpl*, const StringImpl*);
554WTF_EXPORT_PRIVATE bool equal(const StringImpl*, const LChar*);
555inline bool equal(const StringImpl* a, const char* b) { return equal(a, reinterpret_cast<const LChar*>(b)); }
556WTF_EXPORT_PRIVATE bool equal(const StringImpl*, const LChar*, unsigned);
557WTF_EXPORT_PRIVATE bool equal(const StringImpl*, const UChar*, unsigned);
558inline bool equal(const StringImpl* a, const char* b, unsigned length) { return equal(a, reinterpret_cast<const LChar*>(b), length); }
559inline bool equal(const LChar* a, StringImpl* b) { return equal(b, a); }
560inline bool equal(const char* a, StringImpl* b) { return equal(b, reinterpret_cast<const LChar*>(a)); }
561WTF_EXPORT_PRIVATE bool equal(const StringImpl& a, const StringImpl& b);
562
563WTF_EXPORT_PRIVATE bool equalIgnoringNullity(StringImpl*, StringImpl*);
564WTF_EXPORT_PRIVATE bool equalIgnoringNullity(const UChar*, size_t length, StringImpl*);
565
566bool equalIgnoringASCIICase(const StringImpl&, const StringImpl&);
567WTF_EXPORT_PRIVATE bool equalIgnoringASCIICase(const StringImpl*, const StringImpl*);
568bool equalIgnoringASCIICase(const StringImpl&, const char*);
569bool equalIgnoringASCIICase(const StringImpl*, const char*);
570
571WTF_EXPORT_PRIVATE bool equalIgnoringASCIICaseNonNull(const StringImpl*, const StringImpl*);
572
573template<unsigned length> bool equalLettersIgnoringASCIICase(const StringImpl&, const char (&lowercaseLetters)[length]);
574template<unsigned length> bool equalLettersIgnoringASCIICase(const StringImpl*, const char (&lowercaseLetters)[length]);
575
576size_t find(const LChar*, unsigned length, CodeUnitMatchFunction, unsigned index = 0);
577size_t find(const UChar*, unsigned length, CodeUnitMatchFunction, unsigned index = 0);
578
579template<typename CharacterType> size_t reverseFindLineTerminator(const CharacterType*, unsigned length, unsigned index = StringImpl::MaxLength);
580template<typename CharacterType> size_t reverseFind(const CharacterType*, unsigned length, CharacterType matchCharacter, unsigned index = StringImpl::MaxLength);
581size_t reverseFind(const UChar*, unsigned length, LChar matchCharacter, unsigned index = StringImpl::MaxLength);
582size_t reverseFind(const LChar*, unsigned length, UChar matchCharacter, unsigned index = StringImpl::MaxLength);
583
584template<size_t inlineCapacity> bool equalIgnoringNullity(const Vector<UChar, inlineCapacity>&, StringImpl*);
585
586template<typename CharacterType1, typename CharacterType2> int codePointCompare(const CharacterType1*, unsigned length1, const CharacterType2*, unsigned length2);
587int codePointCompare(const StringImpl*, const StringImpl*);
588
589// FIXME: Should rename this to make clear it uses the Unicode definition of whitespace.
590// Most WebKit callers don't want that would use isASCIISpace or isHTMLSpace instead.
591bool isSpaceOrNewline(UChar32);
592
593template<typename CharacterType> unsigned lengthOfNullTerminatedString(const CharacterType*);
594
595// StringHash is the default hash for StringImpl* and RefPtr<StringImpl>
596template<typename T> struct DefaultHash;
597template<> struct DefaultHash<StringImpl*> {
598 typedef StringHash Hash;
599};
600template<> struct DefaultHash<RefPtr<StringImpl>> {
601 typedef StringHash Hash;
602};
603
604#define MAKE_STATIC_STRING_IMPL(characters) ([] { \
605 static StaticStringImpl impl(characters); \
606 return &impl; \
607 }())
608
609template<> ALWAYS_INLINE Ref<StringImpl> StringImpl::constructInternal<LChar>(StringImpl& string, unsigned length)
610{
611 return adoptRef(*new (NotNull, &string) StringImpl { length, Force8BitConstructor });
612}
613
614template<> ALWAYS_INLINE Ref<StringImpl> StringImpl::constructInternal<UChar>(StringImpl& string, unsigned length)
615{
616 return adoptRef(*new (NotNull, &string) StringImpl { length });
617}
618
619template<> ALWAYS_INLINE const LChar* StringImpl::characters<LChar>() const
620{
621 return characters8();
622}
623
624template<> ALWAYS_INLINE const UChar* StringImpl::characters<UChar>() const
625{
626 return characters16();
627}
628
629inline size_t find(const LChar* characters, unsigned length, CodeUnitMatchFunction matchFunction, unsigned index)
630{
631 while (index < length) {
632 if (matchFunction(characters[index]))
633 return index;
634 ++index;
635 }
636 return notFound;
637}
638
639inline size_t find(const UChar* characters, unsigned length, CodeUnitMatchFunction matchFunction, unsigned index)
640{
641 while (index < length) {
642 if (matchFunction(characters[index]))
643 return index;
644 ++index;
645 }
646 return notFound;
647}
648
649template<typename CharacterType> inline size_t reverseFindLineTerminator(const CharacterType* characters, unsigned length, unsigned index)
650{
651 if (!length)
652 return notFound;
653 if (index >= length)
654 index = length - 1;
655 auto character = characters[index];
656 while (character != '\n' && character != '\r') {
657 if (!index--)
658 return notFound;
659 character = characters[index];
660 }
661 return index;
662}
663
664template<typename CharacterType> inline size_t reverseFind(const CharacterType* characters, unsigned length, CharacterType matchCharacter, unsigned index)
665{
666 if (!length)
667 return notFound;
668 if (index >= length)
669 index = length - 1;
670 while (characters[index] != matchCharacter) {
671 if (!index--)
672 return notFound;
673 }
674 return index;
675}
676
677ALWAYS_INLINE size_t reverseFind(const UChar* characters, unsigned length, LChar matchCharacter, unsigned index)
678{
679 return reverseFind(characters, length, static_cast<UChar>(matchCharacter), index);
680}
681
682inline size_t reverseFind(const LChar* characters, unsigned length, UChar matchCharacter, unsigned index)
683{
684 if (matchCharacter & ~0xFF)
685 return notFound;
686 return reverseFind(characters, length, static_cast<LChar>(matchCharacter), index);
687}
688
689inline size_t StringImpl::find(LChar character, unsigned start)
690{
691 if (is8Bit())
692 return WTF::find(characters8(), m_length, character, start);
693 return WTF::find(characters16(), m_length, character, start);
694}
695
696ALWAYS_INLINE size_t StringImpl::find(char character, unsigned start)
697{
698 return find(static_cast<LChar>(character), start);
699}
700
701inline size_t StringImpl::find(UChar character, unsigned start)
702{
703 if (is8Bit())
704 return WTF::find(characters8(), m_length, character, start);
705 return WTF::find(characters16(), m_length, character, start);
706}
707
708template<size_t inlineCapacity> inline bool equalIgnoringNullity(const Vector<UChar, inlineCapacity>& a, StringImpl* b)
709{
710 return equalIgnoringNullity(a.data(), a.size(), b);
711}
712
713template<typename CharacterType1, typename CharacterType2> inline int codePointCompare(const CharacterType1* characters1, unsigned length1, const CharacterType2* characters2, unsigned length2)
714{
715 unsigned commonLength = std::min(length1, length2);
716
717 unsigned position = 0;
718 while (position < commonLength && *characters1 == *characters2) {
719 ++characters1;
720 ++characters2;
721 ++position;
722 }
723
724 if (position < commonLength)
725 return (characters1[0] > characters2[0]) ? 1 : -1;
726
727 if (length1 == length2)
728 return 0;
729 return (length1 > length2) ? 1 : -1;
730}
731
732inline int codePointCompare(const StringImpl* string1, const StringImpl* string2)
733{
734 // FIXME: Should null strings compare as less than empty strings rather than equal to them?
735 if (!string1)
736 return (string2 && string2->length()) ? -1 : 0;
737 if (!string2)
738 return string1->length() ? 1 : 0;
739
740 bool string1Is8Bit = string1->is8Bit();
741 bool string2Is8Bit = string2->is8Bit();
742 if (string1Is8Bit) {
743 if (string2Is8Bit)
744 return codePointCompare(string1->characters8(), string1->length(), string2->characters8(), string2->length());
745 return codePointCompare(string1->characters8(), string1->length(), string2->characters16(), string2->length());
746 }
747 if (string2Is8Bit)
748 return codePointCompare(string1->characters16(), string1->length(), string2->characters8(), string2->length());
749 return codePointCompare(string1->characters16(), string1->length(), string2->characters16(), string2->length());
750}
751
752inline bool isSpaceOrNewline(UChar32 character)
753{
754 // Use isASCIISpace() for all Latin-1 characters. This will include newlines, which aren't included in Unicode DirWS.
755 return character <= 0xFF ? isASCIISpace(character) : u_charDirection(character) == U_WHITE_SPACE_NEUTRAL;
756}
757
758template<typename CharacterType> inline unsigned lengthOfNullTerminatedString(const CharacterType* string)
759{
760 ASSERT(string);
761 size_t length = 0;
762 while (string[length])
763 ++length;
764
765 RELEASE_ASSERT(length < StringImpl::MaxLength);
766 return static_cast<unsigned>(length);
767}
768
769inline StringImplShape::StringImplShape(unsigned refCount, unsigned length, const LChar* data8, unsigned hashAndFlags)
770 : m_refCount(refCount)
771 , m_length(length)
772 , m_data8(data8)
773 , m_hashAndFlags(hashAndFlags)
774{
775}
776
777inline StringImplShape::StringImplShape(unsigned refCount, unsigned length, const UChar* data16, unsigned hashAndFlags)
778 : m_refCount(refCount)
779 , m_length(length)
780 , m_data16(data16)
781 , m_hashAndFlags(hashAndFlags)
782{
783}
784
785template<unsigned characterCount> constexpr StringImplShape::StringImplShape(unsigned refCount, unsigned length, const char (&characters)[characterCount], unsigned hashAndFlags, ConstructWithConstExprTag)
786 : m_refCount(refCount)
787 , m_length(length)
788 , m_data8Char(characters)
789 , m_hashAndFlags(hashAndFlags)
790{
791}
792
793template<unsigned characterCount> constexpr StringImplShape::StringImplShape(unsigned refCount, unsigned length, const char16_t (&characters)[characterCount], unsigned hashAndFlags, ConstructWithConstExprTag)
794 : m_refCount(refCount)
795 , m_length(length)
796 , m_data16Char(characters)
797 , m_hashAndFlags(hashAndFlags)
798{
799}
800
801inline Ref<StringImpl> StringImpl::isolatedCopy() const
802{
803 if (!requiresCopy()) {
804 if (is8Bit())
805 return StringImpl::createWithoutCopying(m_data8, m_length);
806 return StringImpl::createWithoutCopying(m_data16, m_length);
807 }
808
809 if (is8Bit())
810 return create(m_data8, m_length);
811 return create(m_data16, m_length);
812}
813
814inline bool StringImpl::isAllASCII() const
815{
816 if (is8Bit())
817 return charactersAreAllASCII(characters8(), length());
818 return charactersAreAllASCII(characters16(), length());
819}
820
821inline bool StringImpl::isAllLatin1() const
822{
823 if (is8Bit())
824 return true;
825 auto* characters = characters16();
826 UChar ored = 0;
827 for (size_t i = 0; i < length(); ++i)
828 ored |= characters[i];
829 return !(ored & 0xFF00);
830}
831
832template<bool isSpecialCharacter(UChar), typename CharacterType> inline bool isAllSpecialCharacters(const CharacterType* characters, size_t length)
833{
834 for (size_t i = 0; i < length; ++i) {
835 if (!isSpecialCharacter(characters[i]))
836 return false;
837 }
838 return true;
839}
840
841template<bool isSpecialCharacter(UChar)> inline bool StringImpl::isAllSpecialCharacters() const
842{
843 if (is8Bit())
844 return WTF::isAllSpecialCharacters<isSpecialCharacter>(characters8(), length());
845 return WTF::isAllSpecialCharacters<isSpecialCharacter>(characters16(), length());
846}
847
848inline StringImpl::StringImpl(unsigned length, Force8Bit)
849 : StringImplShape(s_refCountIncrement, length, tailPointer<LChar>(), s_hashFlag8BitBuffer | StringNormal | BufferInternal)
850{
851 ASSERT(m_data8);
852 ASSERT(m_length);
853
854 STRING_STATS_ADD_8BIT_STRING(m_length);
855}
856
857inline StringImpl::StringImpl(unsigned length)
858 : StringImplShape(s_refCountIncrement, length, tailPointer<UChar>(), StringNormal | BufferInternal)
859{
860 ASSERT(m_data16);
861 ASSERT(m_length);
862
863 STRING_STATS_ADD_16BIT_STRING(m_length);
864}
865
866inline StringImpl::StringImpl(MallocPtr<LChar> characters, unsigned length)
867 : StringImplShape(s_refCountIncrement, length, characters.leakPtr(), s_hashFlag8BitBuffer | StringNormal | BufferOwned)
868{
869 ASSERT(m_data8);
870 ASSERT(m_length);
871
872 STRING_STATS_ADD_8BIT_STRING(m_length);
873}
874
875inline StringImpl::StringImpl(const UChar* characters, unsigned length, ConstructWithoutCopyingTag)
876 : StringImplShape(s_refCountIncrement, length, characters, StringNormal | BufferInternal)
877{
878 ASSERT(m_data16);
879 ASSERT(m_length);
880
881 STRING_STATS_ADD_16BIT_STRING(m_length);
882}
883
884inline StringImpl::StringImpl(const LChar* characters, unsigned length, ConstructWithoutCopyingTag)
885 : StringImplShape(s_refCountIncrement, length, characters, s_hashFlag8BitBuffer | StringNormal | BufferInternal)
886{
887 ASSERT(m_data8);
888 ASSERT(m_length);
889
890 STRING_STATS_ADD_8BIT_STRING(m_length);
891}
892
893inline StringImpl::StringImpl(MallocPtr<UChar> characters, unsigned length)
894 : StringImplShape(s_refCountIncrement, length, characters.leakPtr(), StringNormal | BufferOwned)
895{
896 ASSERT(m_data16);
897 ASSERT(m_length);
898
899 STRING_STATS_ADD_16BIT_STRING(m_length);
900}
901
902inline StringImpl::StringImpl(const LChar* characters, unsigned length, Ref<StringImpl>&& base)
903 : StringImplShape(s_refCountIncrement, length, characters, s_hashFlag8BitBuffer | StringNormal | BufferSubstring)
904{
905 ASSERT(is8Bit());
906 ASSERT(m_data8);
907 ASSERT(m_length);
908 ASSERT(base->bufferOwnership() != BufferSubstring);
909
910 substringBuffer() = &base.leakRef();
911
912 STRING_STATS_ADD_8BIT_STRING2(m_length, true);
913}
914
915inline StringImpl::StringImpl(const UChar* characters, unsigned length, Ref<StringImpl>&& base)
916 : StringImplShape(s_refCountIncrement, length, characters, StringNormal | BufferSubstring)
917{
918 ASSERT(!is8Bit());
919 ASSERT(m_data16);
920 ASSERT(m_length);
921 ASSERT(base->bufferOwnership() != BufferSubstring);
922
923 substringBuffer() = &base.leakRef();
924
925 STRING_STATS_ADD_16BIT_STRING2(m_length, true);
926}
927
928template<size_t inlineCapacity> inline Ref<StringImpl> StringImpl::create8BitIfPossible(const Vector<UChar, inlineCapacity>& vector)
929{
930 return create8BitIfPossible(vector.data(), vector.size());
931}
932
933ALWAYS_INLINE Ref<StringImpl> StringImpl::createSubstringSharingImpl(StringImpl& rep, unsigned offset, unsigned length)
934{
935 ASSERT(length <= rep.length());
936
937 if (!length)
938 return *empty();
939
940 // Coyping the thing would save more memory sometimes, largely due to the size of pointer.
941 size_t substringSize = allocationSize<StringImpl*>(1);
942 if (rep.is8Bit()) {
943 if (substringSize >= allocationSize<LChar>(length))
944 return create(rep.m_data8 + offset, length);
945 } else {
946 if (substringSize >= allocationSize<UChar>(length))
947 return create(rep.m_data16 + offset, length);
948 }
949
950 auto* ownerRep = ((rep.bufferOwnership() == BufferSubstring) ? rep.substringBuffer() : &rep);
951
952 // We allocate a buffer that contains both the StringImpl struct as well as the pointer to the owner string.
953 auto* stringImpl = static_cast<StringImpl*>(fastMalloc(substringSize));
954 if (rep.is8Bit())
955 return adoptRef(*new (NotNull, stringImpl) StringImpl(rep.m_data8 + offset, length, *ownerRep));
956 return adoptRef(*new (NotNull, stringImpl) StringImpl(rep.m_data16 + offset, length, *ownerRep));
957}
958
959template<unsigned characterCount> ALWAYS_INLINE Ref<StringImpl> StringImpl::createFromLiteral(const char (&characters)[characterCount])
960{
961 COMPILE_ASSERT(characterCount > 1, StringImplFromLiteralNotEmpty);
962 COMPILE_ASSERT((characterCount - 1 <= ((unsigned(~0) - sizeof(StringImpl)) / sizeof(LChar))), StringImplFromLiteralCannotOverflow);
963
964 return createWithoutCopying(reinterpret_cast<const LChar*>(characters), characterCount - 1);
965}
966
967template<typename CharacterType> ALWAYS_INLINE RefPtr<StringImpl> StringImpl::tryCreateUninitialized(unsigned length, CharacterType*& output)
968{
969 if (!length) {
970 output = nullptr;
971 return empty();
972 }
973
974 if (length > maxInternalLength<CharacterType>()) {
975 output = nullptr;
976 return nullptr;
977 }
978 StringImpl* result;
979 if (!tryFastMalloc(allocationSize<CharacterType>(length)).getValue(result)) {
980 output = nullptr;
981 return nullptr;
982 }
983 output = result->tailPointer<CharacterType>();
984
985 return constructInternal<CharacterType>(*result, length);
986}
987
988template<typename CharacterType, size_t inlineCapacity, typename OverflowHandler, size_t minCapacity>
989inline Ref<StringImpl> StringImpl::adopt(Vector<CharacterType, inlineCapacity, OverflowHandler, minCapacity>&& vector)
990{
991 if (size_t size = vector.size()) {
992 ASSERT(vector.data());
993 if (size > MaxLength)
994 CRASH();
995 return adoptRef(*new StringImpl(vector.releaseBuffer(), size));
996 }
997 return *empty();
998}
999
1000inline size_t StringImpl::cost() const
1001{
1002 // For substrings, return the cost of the base string.
1003 if (bufferOwnership() == BufferSubstring)
1004 return substringBuffer()->cost();
1005
1006 // Note: we must not alter the m_hashAndFlags field in instances of StaticStringImpl.
1007 // We ensure this by pre-setting the s_hashFlagDidReportCost bit in all instances of
1008 // StaticStringImpl. As a result, StaticStringImpl instances will always return a cost of
1009 // 0 here and avoid modifying m_hashAndFlags.
1010 if (m_hashAndFlags & s_hashFlagDidReportCost)
1011 return 0;
1012
1013 m_hashAndFlags |= s_hashFlagDidReportCost;
1014 size_t result = m_length;
1015 if (!is8Bit())
1016 result <<= 1;
1017 return result;
1018}
1019
1020inline size_t StringImpl::costDuringGC()
1021{
1022 if (isStatic())
1023 return 0;
1024
1025 if (bufferOwnership() == BufferSubstring)
1026 return divideRoundedUp(substringBuffer()->costDuringGC(), refCount());
1027
1028 size_t result = m_length;
1029 if (!is8Bit())
1030 result <<= 1;
1031 return divideRoundedUp(result, refCount());
1032}
1033
1034inline void StringImpl::setIsAtomic(bool isAtom)
1035{
1036 ASSERT(!isStatic());
1037 ASSERT(!isSymbol());
1038 if (isAtom)
1039 m_hashAndFlags |= s_hashFlagStringKindIsAtomic;
1040 else
1041 m_hashAndFlags &= ~s_hashFlagStringKindIsAtomic;
1042}
1043
1044inline void StringImpl::setHash(unsigned hash) const
1045{
1046 // The high bits of 'hash' are always empty, but we prefer to store our flags
1047 // in the low bits because it makes them slightly more efficient to access.
1048 // So, we shift left and right when setting and getting our hash code.
1049
1050 ASSERT(!hasHash());
1051 ASSERT(!isStatic());
1052 // Multiple clients assume that StringHasher is the canonical string hash function.
1053 ASSERT(hash == (is8Bit() ? StringHasher::computeHashAndMaskTop8Bits(m_data8, m_length) : StringHasher::computeHashAndMaskTop8Bits(m_data16, m_length)));
1054 ASSERT(!(hash & (s_flagMask << (8 * sizeof(hash) - s_flagCount)))); // Verify that enough high bits are empty.
1055
1056 hash <<= s_flagCount;
1057 ASSERT(!(hash & m_hashAndFlags)); // Verify that enough low bits are empty after shift.
1058 ASSERT(hash); // Verify that 0 is a valid sentinel hash value.
1059
1060 m_hashAndFlags |= hash; // Store hash with flags in low bits.
1061}
1062
1063inline void StringImpl::ref()
1064{
1065 STRING_STATS_REF_STRING(*this);
1066
1067 m_refCount += s_refCountIncrement;
1068}
1069
1070inline void StringImpl::deref()
1071{
1072 STRING_STATS_DEREF_STRING(*this);
1073
1074 unsigned tempRefCount = m_refCount - s_refCountIncrement;
1075 if (!tempRefCount) {
1076 StringImpl::destroy(this);
1077 return;
1078 }
1079 m_refCount = tempRefCount;
1080}
1081
1082template<typename CharacterType> inline void StringImpl::copyCharacters(CharacterType* destination, const CharacterType* source, unsigned numCharacters)
1083{
1084 if (numCharacters == 1) {
1085 *destination = *source;
1086 return;
1087 }
1088 memcpy(destination, source, numCharacters * sizeof(CharacterType));
1089}
1090
1091ALWAYS_INLINE void StringImpl::copyCharacters(UChar* destination, const LChar* source, unsigned numCharacters)
1092{
1093 for (unsigned i = 0; i < numCharacters; ++i)
1094 destination[i] = source[i];
1095}
1096
1097inline UChar StringImpl::at(unsigned i) const
1098{
1099 ASSERT_WITH_SECURITY_IMPLICATION(i < m_length);
1100 return is8Bit() ? m_data8[i] : m_data16[i];
1101}
1102
1103inline StringImpl::StringImpl(CreateSymbolTag, const LChar* characters, unsigned length)
1104 : StringImplShape(s_refCountIncrement, length, characters, s_hashFlag8BitBuffer | StringSymbol | BufferSubstring)
1105{
1106 ASSERT(is8Bit());
1107 ASSERT(m_data8);
1108 STRING_STATS_ADD_8BIT_STRING2(m_length, true);
1109}
1110
1111inline StringImpl::StringImpl(CreateSymbolTag, const UChar* characters, unsigned length)
1112 : StringImplShape(s_refCountIncrement, length, characters, StringSymbol | BufferSubstring)
1113{
1114 ASSERT(!is8Bit());
1115 ASSERT(m_data16);
1116 STRING_STATS_ADD_16BIT_STRING2(m_length, true);
1117}
1118
1119inline StringImpl::StringImpl(CreateSymbolTag)
1120 : StringImplShape(s_refCountIncrement, 0, empty()->characters8(), s_hashFlag8BitBuffer | StringSymbol | BufferSubstring)
1121{
1122 ASSERT(is8Bit());
1123 ASSERT(m_data8);
1124 STRING_STATS_ADD_8BIT_STRING2(m_length, true);
1125}
1126
1127template<typename T> inline size_t StringImpl::allocationSize(Checked<size_t> tailElementCount)
1128{
1129 return (tailOffset<T>() + tailElementCount * sizeof(T)).unsafeGet();
1130}
1131
1132template<typename CharacterType>
1133inline size_t StringImpl::maxInternalLength()
1134{
1135 // In order to not overflow the unsigned length, the check for (std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) is needed when sizeof(CharacterType) == 2.
1136 return std::min(static_cast<size_t>(MaxLength), (std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(CharacterType));
1137}
1138
1139template<typename T> inline size_t StringImpl::tailOffset()
1140{
1141#if COMPILER(MSVC)
1142 // MSVC doesn't support alignof yet.
1143 return roundUpToMultipleOf<sizeof(T)>(sizeof(StringImpl));
1144#else
1145 return roundUpToMultipleOf<alignof(T)>(offsetof(StringImpl, m_hashAndFlags) + sizeof(StringImpl::m_hashAndFlags));
1146#endif
1147}
1148
1149inline bool StringImpl::requiresCopy() const
1150{
1151 if (bufferOwnership() != BufferInternal)
1152 return true;
1153
1154 if (is8Bit())
1155 return m_data8 == tailPointer<LChar>();
1156 return m_data16 == tailPointer<UChar>();
1157}
1158
1159template<typename T> inline const T* StringImpl::tailPointer() const
1160{
1161 return reinterpret_cast_ptr<const T*>(reinterpret_cast<const uint8_t*>(this) + tailOffset<T>());
1162}
1163
1164template<typename T> inline T* StringImpl::tailPointer()
1165{
1166 return reinterpret_cast_ptr<T*>(reinterpret_cast<uint8_t*>(this) + tailOffset<T>());
1167}
1168
1169inline StringImpl* const& StringImpl::substringBuffer() const
1170{
1171 ASSERT(bufferOwnership() == BufferSubstring);
1172
1173 return *tailPointer<StringImpl*>();
1174}
1175
1176inline StringImpl*& StringImpl::substringBuffer()
1177{
1178 ASSERT(bufferOwnership() == BufferSubstring);
1179
1180 return *tailPointer<StringImpl*>();
1181}
1182
1183inline void StringImpl::assertHashIsCorrect() const
1184{
1185 ASSERT(existingHash() == StringHasher::computeHashAndMaskTop8Bits(characters8(), length()));
1186}
1187
1188template<unsigned characterCount> constexpr StringImpl::StaticStringImpl::StaticStringImpl(const char (&characters)[characterCount], StringKind stringKind)
1189 : StringImplShape(s_refCountFlagIsStaticString, characterCount - 1, characters,
1190 s_hashFlag8BitBuffer | s_hashFlagDidReportCost | stringKind | BufferInternal | (StringHasher::computeLiteralHashAndMaskTop8Bits(characters) << s_flagCount), ConstructWithConstExpr)
1191{
1192}
1193
1194template<unsigned characterCount> constexpr StringImpl::StaticStringImpl::StaticStringImpl(const char16_t (&characters)[characterCount], StringKind stringKind)
1195 : StringImplShape(s_refCountFlagIsStaticString, characterCount - 1, characters,
1196 s_hashFlagDidReportCost | stringKind | BufferInternal | (StringHasher::computeLiteralHashAndMaskTop8Bits(characters) << s_flagCount), ConstructWithConstExpr)
1197{
1198}
1199
1200inline StringImpl::StaticStringImpl::operator StringImpl&()
1201{
1202 return *reinterpret_cast<StringImpl*>(this);
1203}
1204
1205inline bool equalIgnoringASCIICase(const StringImpl& a, const StringImpl& b)
1206{
1207 return equalIgnoringASCIICaseCommon(a, b);
1208}
1209
1210inline bool equalIgnoringASCIICase(const StringImpl& a, const char* b)
1211{
1212 return equalIgnoringASCIICaseCommon(a, b);
1213}
1214
1215inline bool equalIgnoringASCIICase(const StringImpl* a, const char* b)
1216{
1217 return a && equalIgnoringASCIICase(*a, b);
1218}
1219
1220template<unsigned length> inline bool startsWithLettersIgnoringASCIICase(const StringImpl& string, const char (&lowercaseLetters)[length])
1221{
1222 return startsWithLettersIgnoringASCIICaseCommon(string, lowercaseLetters);
1223}
1224
1225template<unsigned length> inline bool startsWithLettersIgnoringASCIICase(const StringImpl* string, const char (&lowercaseLetters)[length])
1226{
1227 return string && startsWithLettersIgnoringASCIICase(*string, lowercaseLetters);
1228}
1229
1230template<unsigned length> inline bool equalLettersIgnoringASCIICase(const StringImpl& string, const char (&lowercaseLetters)[length])
1231{
1232 return equalLettersIgnoringASCIICaseCommon(string, lowercaseLetters);
1233}
1234
1235template<unsigned length> inline bool equalLettersIgnoringASCIICase(const StringImpl* string, const char (&lowercaseLetters)[length])
1236{
1237 return string && equalLettersIgnoringASCIICase(*string, lowercaseLetters);
1238}
1239
1240} // namespace WTF
1241
1242using WTF::StaticStringImpl;
1243using WTF::StringImpl;
1244using WTF::equal;
1245using WTF::isLatin1;
1246