1/*
2 * Copyright (C) 1999 Lars Knoll ([email protected])
3 * Copyright (C) 2005-2019 Apple Inc. All rights reserved.
4 * Copyright (C) 2009 Google Inc. All rights reserved.
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
15 *
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
20 *
21 */
22
23#pragma once
24
25#include <limits.h>
26#include <unicode/ustring.h>
27#include <wtf/ASCIICType.h>
28#include <wtf/CheckedArithmetic.h>
29#include <wtf/Expected.h>
30#include <wtf/MathExtras.h>
31#include <wtf/StdLibExtras.h>
32#include <wtf/Vector.h>
33#include <wtf/text/ASCIIFastPath.h>
34#include <wtf/text/ConversionMode.h>
35#include <wtf/text/StringCommon.h>
36#include <wtf/text/StringHasher.h>
37#include <wtf/text/UTF8ConversionError.h>
38
39#if USE(CF)
40typedef const struct __CFString * CFStringRef;
41#endif
42
43#ifdef __OBJC__
44@class NSString;
45#endif
46
47namespace JSC {
48namespace LLInt { class Data; }
49class LLIntOffsetsExtractor;
50}
51
52namespace WTF {
53
54class SymbolImpl;
55class SymbolRegistry;
56
57struct CStringTranslator;
58struct HashAndUTF8CharactersTranslator;
59struct LCharBufferTranslator;
60struct StringHash;
61struct SubstringTranslator;
62struct UCharBufferTranslator;
63
64template<typename> class RetainPtr;
65
66template<typename> struct BufferFromStaticDataTranslator;
67template<typename> struct HashAndCharactersTranslator;
68
69// Define STRING_STATS to 1 turn on runtime statistics of string sizes and memory usage.
70#define STRING_STATS 0
71
72template<bool isSpecialCharacter(UChar), typename CharacterType> bool isAllSpecialCharacters(const CharacterType*, size_t length);
73
74#if STRING_STATS
75
76struct StringStats {
77 WTF_MAKE_STRUCT_FAST_ALLOCATED;
78 void add8BitString(unsigned length, bool isSubString = false)
79 {
80 ++m_totalNumberStrings;
81 ++m_number8BitStrings;
82 if (!isSubString)
83 m_total8BitData += length;
84 }
85
86 void add16BitString(unsigned length, bool isSubString = false)
87 {
88 ++m_totalNumberStrings;
89 ++m_number16BitStrings;
90 if (!isSubString)
91 m_total16BitData += length;
92 }
93
94 void removeString(StringImpl&);
95 void printStats();
96
97 static constexpr unsigned s_printStringStatsFrequency = 5000;
98 static std::atomic<unsigned> s_stringRemovesTillPrintStats;
99
100 std::atomic<unsigned> m_refCalls;
101 std::atomic<unsigned> m_derefCalls;
102
103 std::atomic<unsigned> m_totalNumberStrings;
104 std::atomic<unsigned> m_number8BitStrings;
105 std::atomic<unsigned> m_number16BitStrings;
106 std::atomic<unsigned long long> m_total8BitData;
107 std::atomic<unsigned long long> m_total16BitData;
108};
109
110#define STRING_STATS_ADD_8BIT_STRING(length) StringImpl::stringStats().add8BitString(length)
111#define STRING_STATS_ADD_8BIT_STRING2(length, isSubString) StringImpl::stringStats().add8BitString(length, isSubString)
112#define STRING_STATS_ADD_16BIT_STRING(length) StringImpl::stringStats().add16BitString(length)
113#define STRING_STATS_ADD_16BIT_STRING2(length, isSubString) StringImpl::stringStats().add16BitString(length, isSubString)
114#define STRING_STATS_REMOVE_STRING(string) StringImpl::stringStats().removeString(string)
115#define STRING_STATS_REF_STRING(string) ++StringImpl::stringStats().m_refCalls;
116#define STRING_STATS_DEREF_STRING(string) ++StringImpl::stringStats().m_derefCalls;
117
118#else
119
120#define STRING_STATS_ADD_8BIT_STRING(length) ((void)0)
121#define STRING_STATS_ADD_8BIT_STRING2(length, isSubString) ((void)0)
122#define STRING_STATS_ADD_16BIT_STRING(length) ((void)0)
123#define STRING_STATS_ADD_16BIT_STRING2(length, isSubString) ((void)0)
124#define STRING_STATS_ADD_UPCONVERTED_STRING(length) ((void)0)
125#define STRING_STATS_REMOVE_STRING(string) ((void)0)
126#define STRING_STATS_REF_STRING(string) ((void)0)
127#define STRING_STATS_DEREF_STRING(string) ((void)0)
128
129#endif
130
131class StringImplShape {
132 WTF_MAKE_NONCOPYABLE(StringImplShape);
133public:
134 static constexpr unsigned MaxLength = std::numeric_limits<int32_t>::max();
135
136protected:
137 StringImplShape(unsigned refCount, unsigned length, const LChar*, unsigned hashAndFlags);
138 StringImplShape(unsigned refCount, unsigned length, const UChar*, unsigned hashAndFlags);
139
140 enum ConstructWithConstExprTag { ConstructWithConstExpr };
141 template<unsigned characterCount> constexpr StringImplShape(unsigned refCount, unsigned length, const char (&characters)[characterCount], unsigned hashAndFlags, ConstructWithConstExprTag);
142 template<unsigned characterCount> constexpr StringImplShape(unsigned refCount, unsigned length, const char16_t (&characters)[characterCount], unsigned hashAndFlags, ConstructWithConstExprTag);
143
144 unsigned m_refCount;
145 unsigned m_length;
146 union {
147 const LChar* m_data8;
148 const UChar* m_data16;
149 // It seems that reinterpret_cast prevents constexpr's compile time initialization in VC++.
150 // These are needed to avoid reinterpret_cast.
151 const char* m_data8Char;
152 const char16_t* m_data16Char;
153 };
154 mutable unsigned m_hashAndFlags;
155};
156
157// FIXME: Use of StringImpl and const is rather confused.
158// The actual string inside a StringImpl is immutable, so you can't modify a string using a StringImpl&.
159// We could mark every member function const and always use "const StringImpl&" and "const StringImpl*".
160// Or we could say that "const" doesn't make sense at all and use "StringImpl&" and "StringImpl*" everywhere.
161// Right now we use a mix of both, which makes code more confusing and has no benefit.
162
163class StringImpl : private StringImplShape {
164 WTF_MAKE_NONCOPYABLE(StringImpl); WTF_MAKE_FAST_ALLOCATED;
165
166 friend class AtomStringImpl;
167 friend class JSC::LLInt::Data;
168 friend class JSC::LLIntOffsetsExtractor;
169 friend class PrivateSymbolImpl;
170 friend class RegisteredSymbolImpl;
171 friend class SymbolImpl;
172 friend class ExternalStringImpl;
173
174 friend struct WTF::CStringTranslator;
175 friend struct WTF::HashAndUTF8CharactersTranslator;
176 friend struct WTF::LCharBufferTranslator;
177 friend struct WTF::SubstringTranslator;
178 friend struct WTF::UCharBufferTranslator;
179
180 template<typename> friend struct WTF::BufferFromStaticDataTranslator;
181 template<typename> friend struct WTF::HashAndCharactersTranslator;
182
183public:
184 enum BufferOwnership { BufferInternal, BufferOwned, BufferSubstring, BufferExternal };
185
186 static constexpr unsigned MaxLength = StringImplShape::MaxLength;
187
188 // The bottom 6 bits in the hash are flags.
189 static constexpr const unsigned s_flagCount = 6;
190private:
191 static constexpr const unsigned s_flagMask = (1u << s_flagCount) - 1;
192 static_assert(s_flagCount <= StringHasher::flagCount, "StringHasher reserves enough bits for StringImpl flags");
193 static constexpr const unsigned s_flagStringKindCount = 4;
194
195 static constexpr const unsigned s_hashFlagStringKindIsAtom = 1u << (s_flagStringKindCount);
196 static constexpr const unsigned s_hashFlagStringKindIsSymbol = 1u << (s_flagStringKindCount + 1);
197 static constexpr const unsigned s_hashMaskStringKind = s_hashFlagStringKindIsAtom | s_hashFlagStringKindIsSymbol;
198 static constexpr const unsigned s_hashFlagDidReportCost = 1u << 3;
199 static constexpr const unsigned s_hashFlag8BitBuffer = 1u << 2;
200 static constexpr const unsigned s_hashMaskBufferOwnership = (1u << 0) | (1u << 1);
201
202 enum StringKind {
203 StringNormal = 0u, // non-symbol, non-atomic
204 StringAtom = s_hashFlagStringKindIsAtom, // non-symbol, atomic
205 StringSymbol = s_hashFlagStringKindIsSymbol, // symbol, non-atomic
206 };
207
208 // Create a normal 8-bit string with internal storage (BufferInternal).
209 enum Force8Bit { Force8BitConstructor };
210 StringImpl(unsigned length, Force8Bit);
211
212 // Create a normal 16-bit string with internal storage (BufferInternal).
213 explicit StringImpl(unsigned length);
214
215 // Create a StringImpl adopting ownership of the provided buffer (BufferOwned).
216 StringImpl(MallocPtr<LChar>, unsigned length);
217 StringImpl(MallocPtr<UChar>, unsigned length);
218 enum ConstructWithoutCopyingTag { ConstructWithoutCopying };
219 StringImpl(const UChar*, unsigned length, ConstructWithoutCopyingTag);
220 StringImpl(const LChar*, unsigned length, ConstructWithoutCopyingTag);
221
222 // Used to create new strings that are a substring of an existing StringImpl (BufferSubstring).
223 StringImpl(const LChar*, unsigned length, Ref<StringImpl>&&);
224 StringImpl(const UChar*, unsigned length, Ref<StringImpl>&&);
225
226public:
227 WTF_EXPORT_PRIVATE static void destroy(StringImpl*);
228
229 WTF_EXPORT_PRIVATE static Ref<StringImpl> create(const UChar*, unsigned length);
230 WTF_EXPORT_PRIVATE static Ref<StringImpl> create(const LChar*, unsigned length);
231 WTF_EXPORT_PRIVATE static Ref<StringImpl> create8BitIfPossible(const UChar*, unsigned length);
232 template<size_t inlineCapacity> static Ref<StringImpl> create8BitIfPossible(const Vector<UChar, inlineCapacity>&);
233 WTF_EXPORT_PRIVATE static Ref<StringImpl> create8BitIfPossible(const UChar*);
234
235 ALWAYS_INLINE static Ref<StringImpl> create(const char* characters, unsigned length) { return create(reinterpret_cast<const LChar*>(characters), length); }
236 WTF_EXPORT_PRIVATE static Ref<StringImpl> create(const LChar*);
237 ALWAYS_INLINE static Ref<StringImpl> create(const char* string) { return create(reinterpret_cast<const LChar*>(string)); }
238
239 static Ref<StringImpl> createSubstringSharingImpl(StringImpl&, unsigned offset, unsigned length);
240
241 template<unsigned characterCount> static Ref<StringImpl> createFromLiteral(const char (&)[characterCount]);
242
243 // FIXME: Replace calls to these overloads of createFromLiteral to createWithoutCopying instead.
244 WTF_EXPORT_PRIVATE static Ref<StringImpl> createFromLiteral(const char*, unsigned length);
245 WTF_EXPORT_PRIVATE static Ref<StringImpl> createFromLiteral(const char*);
246
247 WTF_EXPORT_PRIVATE static Ref<StringImpl> createWithoutCopying(const UChar*, unsigned length);
248 WTF_EXPORT_PRIVATE static Ref<StringImpl> createWithoutCopying(const LChar*, unsigned length);
249 WTF_EXPORT_PRIVATE static Ref<StringImpl> createUninitialized(unsigned length, LChar*&);
250 WTF_EXPORT_PRIVATE static Ref<StringImpl> createUninitialized(unsigned length, UChar*&);
251 template<typename CharacterType> static RefPtr<StringImpl> tryCreateUninitialized(unsigned length, CharacterType*&);
252
253 // Reallocate the StringImpl. The originalString must be only owned by the Ref,
254 // and the buffer ownership must be BufferInternal. Just like the input pointer of realloc(),
255 // the originalString can't be used after this function.
256 static Ref<StringImpl> reallocate(Ref<StringImpl>&& originalString, unsigned length, LChar*& data);
257 static Ref<StringImpl> reallocate(Ref<StringImpl>&& originalString, unsigned length, UChar*& data);
258 static Expected<Ref<StringImpl>, UTF8ConversionError> tryReallocate(Ref<StringImpl>&& originalString, unsigned length, LChar*& data);
259 static Expected<Ref<StringImpl>, UTF8ConversionError> tryReallocate(Ref<StringImpl>&& originalString, unsigned length, UChar*& data);
260
261 static unsigned flagsOffset() { return OBJECT_OFFSETOF(StringImpl, m_hashAndFlags); }
262 static constexpr unsigned flagIs8Bit() { return s_hashFlag8BitBuffer; }
263 static constexpr unsigned flagIsAtom() { return s_hashFlagStringKindIsAtom; }
264 static constexpr unsigned flagIsSymbol() { return s_hashFlagStringKindIsSymbol; }
265 static constexpr unsigned maskStringKind() { return s_hashMaskStringKind; }
266 static unsigned dataOffset() { return OBJECT_OFFSETOF(StringImpl, m_data8); }
267
268 template<typename CharacterType, size_t inlineCapacity, typename OverflowHandler, size_t minCapacity>
269 static Ref<StringImpl> adopt(Vector<CharacterType, inlineCapacity, OverflowHandler, minCapacity>&&);
270
271 WTF_EXPORT_PRIVATE static Ref<StringImpl> adopt(StringBuffer<UChar>&&);
272 WTF_EXPORT_PRIVATE static Ref<StringImpl> adopt(StringBuffer<LChar>&&);
273
274 unsigned length() const { return m_length; }
275 static ptrdiff_t lengthMemoryOffset() { return OBJECT_OFFSETOF(StringImpl, m_length); }
276 bool isEmpty() const { return !m_length; }
277
278 bool is8Bit() const { return m_hashAndFlags & s_hashFlag8BitBuffer; }
279 ALWAYS_INLINE const LChar* characters8() const { ASSERT(is8Bit()); return m_data8; }
280 ALWAYS_INLINE const UChar* characters16() const { ASSERT(!is8Bit()); return m_data16; }
281
282 template<typename CharacterType> const CharacterType* characters() const;
283
284 size_t cost() const;
285 size_t costDuringGC();
286
287 WTF_EXPORT_PRIVATE size_t sizeInBytes() const;
288
289 bool isSymbol() const { return m_hashAndFlags & s_hashFlagStringKindIsSymbol; }
290 bool isAtom() const { return m_hashAndFlags & s_hashFlagStringKindIsAtom; }
291 void setIsAtom(bool);
292
293 bool isExternal() const { return bufferOwnership() == BufferExternal; }
294
295 bool isSubString() const { return bufferOwnership() == BufferSubstring; }
296
297 static WTF_EXPORT_PRIVATE Expected<CString, UTF8ConversionError> utf8ForCharacters(const LChar* characters, unsigned length);
298 static WTF_EXPORT_PRIVATE Expected<CString, UTF8ConversionError> utf8ForCharacters(const UChar* characters, unsigned length, ConversionMode = LenientConversion);
299
300 WTF_EXPORT_PRIVATE Expected<CString, UTF8ConversionError> tryGetUtf8ForRange(unsigned offset, unsigned length, ConversionMode = LenientConversion) const;
301 WTF_EXPORT_PRIVATE Expected<CString, UTF8ConversionError> tryGetUtf8(ConversionMode = LenientConversion) const;
302 WTF_EXPORT_PRIVATE CString utf8(ConversionMode = LenientConversion) const;
303
304private:
305 static WTF_EXPORT_PRIVATE UTF8ConversionError utf8Impl(const UChar* characters, unsigned length, char*& buffer, size_t bufferSize, ConversionMode);
306
307 // The high bits of 'hash' are always empty, but we prefer to store our flags
308 // in the low bits because it makes them slightly more efficient to access.
309 // So, we shift left and right when setting and getting our hash code.
310 void setHash(unsigned) const;
311
312 unsigned rawHash() const { return m_hashAndFlags >> s_flagCount; }
313
314public:
315 bool hasHash() const { return !!rawHash(); }
316
317 unsigned existingHash() const { ASSERT(hasHash()); return rawHash(); }
318 unsigned hash() const { return hasHash() ? rawHash() : hashSlowCase(); }
319
320 WTF_EXPORT_PRIVATE unsigned concurrentHash() const;
321
322 unsigned symbolAwareHash() const;
323 unsigned existingSymbolAwareHash() const;
324
325 bool isStatic() const { return m_refCount & s_refCountFlagIsStaticString; }
326
327 size_t refCount() const { return m_refCount / s_refCountIncrement; }
328 bool hasOneRef() const { return m_refCount == s_refCountIncrement; }
329 bool hasAtLeastOneRef() const { return m_refCount; } // For assertions.
330
331 void ref();
332 void deref();
333
334 class StaticStringImpl : private StringImplShape {
335 WTF_MAKE_NONCOPYABLE(StaticStringImpl);
336 public:
337 // Used to construct static strings, which have an special refCount that can never hit zero.
338 // This means that the static string will never be destroyed, which is important because
339 // static strings will be shared across threads & ref-counted in a non-threadsafe manner.
340 //
341 // In order to make StaticStringImpl thread safe, we also need to ensure that the rest of
342 // the fields are never mutated by threads. We have this guarantee because:
343 //
344 // 1. m_length is only set on construction and never mutated thereafter.
345 //
346 // 2. m_data8 and m_data16 are only set on construction and never mutated thereafter.
347 // We also know that a StringImpl never changes from 8 bit to 16 bit because there
348 // is no way to set/clear the s_hashFlag8BitBuffer flag other than at construction.
349 //
350 // 3. m_hashAndFlags will not be mutated by different threads because:
351 //
352 // a. StaticStringImpl's constructor sets the s_hashFlagDidReportCost flag to ensure
353 // that StringImpl::cost() returns early.
354 // This means StaticStringImpl costs are not counted. But since there should only
355 // be a finite set of StaticStringImpls, their cost can be aggregated into a single
356 // system cost if needed.
357 // b. setIsAtom() is never called on a StaticStringImpl.
358 // setIsAtom() asserts !isStatic().
359 // c. setHash() is never called on a StaticStringImpl.
360 // StaticStringImpl's constructor sets the hash on construction.
361 // StringImpl::hash() only sets a new hash iff !hasHash().
362 // Additionally, StringImpl::setHash() asserts hasHash() and !isStatic().
363
364 template<unsigned characterCount> constexpr StaticStringImpl(const char (&characters)[characterCount], StringKind = StringNormal);
365 template<unsigned characterCount> constexpr StaticStringImpl(const char16_t (&characters)[characterCount], StringKind = StringNormal);
366 operator StringImpl&();
367 };
368
369 WTF_EXPORT_PRIVATE static StaticStringImpl s_emptyAtomString;
370 ALWAYS_INLINE static StringImpl* empty() { return reinterpret_cast<StringImpl*>(&s_emptyAtomString); }
371
372 // FIXME: Does this really belong in StringImpl?
373 template<typename CharacterType> static void copyCharacters(CharacterType* destination, const CharacterType* source, unsigned numCharacters);
374 static void copyCharacters(UChar* destination, const LChar* source, unsigned numCharacters);
375
376 // Some string features, like reference counting and the atomicity flag, are not
377 // thread-safe. We achieve thread safety by isolation, giving each thread
378 // its own copy of the string.
379 Ref<StringImpl> isolatedCopy() const;
380
381 WTF_EXPORT_PRIVATE Ref<StringImpl> substring(unsigned position, unsigned length = MaxLength);
382
383 UChar at(unsigned) const;
384 UChar operator[](unsigned i) const { return at(i); }
385 WTF_EXPORT_PRIVATE UChar32 characterStartingAt(unsigned);
386
387 int toIntStrict(bool* ok = 0, int base = 10);
388 unsigned toUIntStrict(bool* ok = 0, int base = 10);
389 int64_t toInt64Strict(bool* ok = 0, int base = 10);
390 uint64_t toUInt64Strict(bool* ok = 0, int base = 10);
391 intptr_t toIntPtrStrict(bool* ok = 0, int base = 10);
392
393 WTF_EXPORT_PRIVATE int toInt(bool* ok = 0); // ignores trailing garbage
394 unsigned toUInt(bool* ok = 0); // ignores trailing garbage
395 int64_t toInt64(bool* ok = 0); // ignores trailing garbage
396 uint64_t toUInt64(bool* ok = 0); // ignores trailing garbage
397 intptr_t toIntPtr(bool* ok = 0); // ignores trailing garbage
398
399 // FIXME: Like the strict functions above, these give false for "ok" when there is trailing garbage.
400 // Like the non-strict functions above, these return the value when there is trailing garbage.
401 // It would be better if these were more consistent with the above functions instead.
402 double toDouble(bool* ok = 0);
403 float toFloat(bool* ok = 0);
404
405 WTF_EXPORT_PRIVATE Ref<StringImpl> convertToASCIILowercase();
406 WTF_EXPORT_PRIVATE Ref<StringImpl> convertToASCIIUppercase();
407 WTF_EXPORT_PRIVATE Ref<StringImpl> convertToLowercaseWithoutLocale();
408 WTF_EXPORT_PRIVATE Ref<StringImpl> convertToLowercaseWithoutLocaleStartingAtFailingIndex8Bit(unsigned);
409 WTF_EXPORT_PRIVATE Ref<StringImpl> convertToUppercaseWithoutLocale();
410 WTF_EXPORT_PRIVATE Ref<StringImpl> convertToLowercaseWithLocale(const AtomString& localeIdentifier);
411 WTF_EXPORT_PRIVATE Ref<StringImpl> convertToUppercaseWithLocale(const AtomString& localeIdentifier);
412
413 Ref<StringImpl> foldCase();
414
415 Ref<StringImpl> stripWhiteSpace();
416 WTF_EXPORT_PRIVATE Ref<StringImpl> simplifyWhiteSpace();
417 Ref<StringImpl> simplifyWhiteSpace(CodeUnitMatchFunction);
418
419 Ref<StringImpl> stripLeadingAndTrailingCharacters(CodeUnitMatchFunction);
420 Ref<StringImpl> removeCharacters(CodeUnitMatchFunction);
421
422 bool isAllASCII() const;
423 bool isAllLatin1() const;
424 template<bool isSpecialCharacter(UChar)> bool isAllSpecialCharacters() const;
425
426 size_t find(LChar character, unsigned start = 0);
427 size_t find(char character, unsigned start = 0);
428 size_t find(UChar character, unsigned start = 0);
429 WTF_EXPORT_PRIVATE size_t find(CodeUnitMatchFunction, unsigned index = 0);
430 size_t find(const LChar*, unsigned index = 0);
431 ALWAYS_INLINE size_t find(const char* string, unsigned index = 0) { return find(reinterpret_cast<const LChar*>(string), index); }
432 WTF_EXPORT_PRIVATE size_t find(StringImpl*);
433 WTF_EXPORT_PRIVATE size_t find(StringImpl*, unsigned index);
434 WTF_EXPORT_PRIVATE size_t findIgnoringASCIICase(const StringImpl&) const;
435 WTF_EXPORT_PRIVATE size_t findIgnoringASCIICase(const StringImpl&, unsigned startOffset) const;
436 WTF_EXPORT_PRIVATE size_t findIgnoringASCIICase(const StringImpl*) const;
437 WTF_EXPORT_PRIVATE size_t findIgnoringASCIICase(const StringImpl*, unsigned startOffset) const;
438
439 WTF_EXPORT_PRIVATE size_t reverseFind(UChar, unsigned index = MaxLength);
440 WTF_EXPORT_PRIVATE size_t reverseFind(StringImpl*, unsigned index = MaxLength);
441
442 WTF_EXPORT_PRIVATE bool startsWith(const StringImpl*) const;
443 WTF_EXPORT_PRIVATE bool startsWith(const StringImpl&) const;
444 WTF_EXPORT_PRIVATE bool startsWithIgnoringASCIICase(const StringImpl*) const;
445 WTF_EXPORT_PRIVATE bool startsWithIgnoringASCIICase(const StringImpl&) const;
446 WTF_EXPORT_PRIVATE bool startsWith(UChar) const;
447 WTF_EXPORT_PRIVATE bool startsWith(const char*, unsigned matchLength) const;
448 template<unsigned matchLength> bool startsWith(const char (&prefix)[matchLength]) const { return startsWith(prefix, matchLength - 1); }
449 WTF_EXPORT_PRIVATE bool hasInfixStartingAt(const StringImpl&, unsigned startOffset) const;
450
451 WTF_EXPORT_PRIVATE bool endsWith(StringImpl*);
452 WTF_EXPORT_PRIVATE bool endsWith(StringImpl&);
453 WTF_EXPORT_PRIVATE bool endsWithIgnoringASCIICase(const StringImpl*) const;
454 WTF_EXPORT_PRIVATE bool endsWithIgnoringASCIICase(const StringImpl&) const;
455 WTF_EXPORT_PRIVATE bool endsWith(UChar) const;
456 WTF_EXPORT_PRIVATE bool endsWith(const char*, unsigned matchLength) const;
457 template<unsigned matchLength> bool endsWith(const char (&prefix)[matchLength]) const { return endsWith(prefix, matchLength - 1); }
458 WTF_EXPORT_PRIVATE bool hasInfixEndingAt(const StringImpl&, unsigned endOffset) const;
459
460 WTF_EXPORT_PRIVATE Ref<StringImpl> replace(UChar, UChar);
461 WTF_EXPORT_PRIVATE Ref<StringImpl> replace(UChar, StringImpl*);
462 ALWAYS_INLINE Ref<StringImpl> replace(UChar pattern, const char* replacement, unsigned replacementLength) { return replace(pattern, reinterpret_cast<const LChar*>(replacement), replacementLength); }
463 WTF_EXPORT_PRIVATE Ref<StringImpl> replace(UChar, const LChar*, unsigned replacementLength);
464 Ref<StringImpl> replace(UChar, const UChar*, unsigned replacementLength);
465 WTF_EXPORT_PRIVATE Ref<StringImpl> replace(StringImpl*, StringImpl*);
466 WTF_EXPORT_PRIVATE Ref<StringImpl> replace(unsigned index, unsigned length, StringImpl*);
467
468 WTF_EXPORT_PRIVATE UCharDirection defaultWritingDirection(bool* hasStrongDirectionality = nullptr);
469
470#if USE(CF)
471 RetainPtr<CFStringRef> createCFString();
472#endif
473
474#ifdef __OBJC__
475 WTF_EXPORT_PRIVATE operator NSString *();
476#endif
477
478#if STRING_STATS
479 ALWAYS_INLINE static StringStats& stringStats() { return m_stringStats; }
480#endif
481
482 BufferOwnership bufferOwnership() const { return static_cast<BufferOwnership>(m_hashAndFlags & s_hashMaskBufferOwnership); }
483
484 template<typename T> static size_t headerSize() { return tailOffset<T>(); }
485
486protected:
487 ~StringImpl();
488
489 // Used to create new symbol string that holds an existing [[Description]] string as a substring buffer (BufferSubstring).
490 enum CreateSymbolTag { CreateSymbol };
491 StringImpl(CreateSymbolTag, const LChar*, unsigned length);
492 StringImpl(CreateSymbolTag, const UChar*, unsigned length);
493
494 // Null symbol.
495 explicit StringImpl(CreateSymbolTag);
496
497private:
498 template<typename> static size_t allocationSize(Checked<size_t> tailElementCount);
499 template<typename> static size_t maxInternalLength();
500 template<typename> static size_t tailOffset();
501
502 bool requiresCopy() const;
503 template<typename T> const T* tailPointer() const;
504 template<typename T> T* tailPointer();
505 StringImpl* const& substringBuffer() const;
506 StringImpl*& substringBuffer();
507
508 enum class CaseConvertType { Upper, Lower };
509 template<CaseConvertType, typename CharacterType> static Ref<StringImpl> convertASCIICase(StringImpl&, const CharacterType*, unsigned);
510
511 template<class CodeUnitPredicate> Ref<StringImpl> stripMatchedCharacters(CodeUnitPredicate);
512 template<typename CharacterType> ALWAYS_INLINE Ref<StringImpl> removeCharacters(const CharacterType* characters, CodeUnitMatchFunction);
513 template<typename CharacterType, class CodeUnitPredicate> Ref<StringImpl> simplifyMatchedCharactersToSpace(CodeUnitPredicate);
514 template<typename CharacterType> static Ref<StringImpl> constructInternal(StringImpl&, unsigned);
515 template<typename CharacterType> static Ref<StringImpl> createUninitializedInternal(unsigned, CharacterType*&);
516 template<typename CharacterType> static Ref<StringImpl> createUninitializedInternalNonEmpty(unsigned, CharacterType*&);
517 template<typename CharacterType> static Expected<Ref<StringImpl>, UTF8ConversionError> reallocateInternal(Ref<StringImpl>&&, unsigned, CharacterType*&);
518 template<typename CharacterType> static Ref<StringImpl> createInternal(const CharacterType*, unsigned);
519 WTF_EXPORT_PRIVATE NEVER_INLINE unsigned hashSlowCase() const;
520
521 // The bottom bit in the ref count indicates a static (immortal) string.
522 static constexpr unsigned s_refCountFlagIsStaticString = 0x1;
523 static constexpr unsigned s_refCountIncrement = 0x2; // This allows us to ref / deref without disturbing the static string flag.
524
525#if STRING_STATS
526 WTF_EXPORT_PRIVATE static StringStats m_stringStats;
527#endif
528
529public:
530 void assertHashIsCorrect() const;
531};
532
533using StaticStringImpl = StringImpl::StaticStringImpl;
534
535static_assert(sizeof(StringImpl) == sizeof(StaticStringImpl), "");
536
537#if !ASSERT_DISABLED
538
539// StringImpls created from StaticStringImpl will ASSERT in the generic ValueCheck<T>::checkConsistency
540// as they are not allocated by fastMalloc. We don't currently have any way to detect that case
541// so we ignore the consistency check for all StringImpl*.
542template<> struct ValueCheck<StringImpl*> {
543 static void checkConsistency(const StringImpl*) { }
544};
545
546#endif
547
548WTF_EXPORT_PRIVATE bool equal(const StringImpl*, const StringImpl*);
549WTF_EXPORT_PRIVATE bool equal(const StringImpl*, const LChar*);
550inline bool equal(const StringImpl* a, const char* b) { return equal(a, reinterpret_cast<const LChar*>(b)); }
551WTF_EXPORT_PRIVATE bool equal(const StringImpl*, const LChar*, unsigned);
552WTF_EXPORT_PRIVATE bool equal(const StringImpl*, const UChar*, unsigned);
553inline bool equal(const StringImpl* a, const char* b, unsigned length) { return equal(a, reinterpret_cast<const LChar*>(b), length); }
554inline bool equal(const LChar* a, StringImpl* b) { return equal(b, a); }
555inline bool equal(const char* a, StringImpl* b) { return equal(b, reinterpret_cast<const LChar*>(a)); }
556WTF_EXPORT_PRIVATE bool equal(const StringImpl& a, const StringImpl& b);
557
558WTF_EXPORT_PRIVATE bool equalIgnoringNullity(StringImpl*, StringImpl*);
559WTF_EXPORT_PRIVATE bool equalIgnoringNullity(const UChar*, size_t length, StringImpl*);
560
561bool equalIgnoringASCIICase(const StringImpl&, const StringImpl&);
562WTF_EXPORT_PRIVATE bool equalIgnoringASCIICase(const StringImpl*, const StringImpl*);
563bool equalIgnoringASCIICase(const StringImpl&, const char*);
564bool equalIgnoringASCIICase(const StringImpl*, const char*);
565
566WTF_EXPORT_PRIVATE bool equalIgnoringASCIICaseNonNull(const StringImpl*, const StringImpl*);
567
568template<unsigned length> bool equalLettersIgnoringASCIICase(const StringImpl&, const char (&lowercaseLetters)[length]);
569template<unsigned length> bool equalLettersIgnoringASCIICase(const StringImpl*, const char (&lowercaseLetters)[length]);
570
571size_t find(const LChar*, unsigned length, CodeUnitMatchFunction, unsigned index = 0);
572size_t find(const UChar*, unsigned length, CodeUnitMatchFunction, unsigned index = 0);
573
574template<typename CharacterType> size_t reverseFindLineTerminator(const CharacterType*, unsigned length, unsigned index = StringImpl::MaxLength);
575template<typename CharacterType> size_t reverseFind(const CharacterType*, unsigned length, CharacterType matchCharacter, unsigned index = StringImpl::MaxLength);
576size_t reverseFind(const UChar*, unsigned length, LChar matchCharacter, unsigned index = StringImpl::MaxLength);
577size_t reverseFind(const LChar*, unsigned length, UChar matchCharacter, unsigned index = StringImpl::MaxLength);
578
579template<size_t inlineCapacity> bool equalIgnoringNullity(const Vector<UChar, inlineCapacity>&, StringImpl*);
580
581template<typename CharacterType1, typename CharacterType2> int codePointCompare(const CharacterType1*, unsigned length1, const CharacterType2*, unsigned length2);
582int codePointCompare(const StringImpl*, const StringImpl*);
583
584// FIXME: Should rename this to make clear it uses the Unicode definition of whitespace.
585// Most WebKit callers don't want that would use isASCIISpace or isHTMLSpace instead.
586bool isSpaceOrNewline(UChar32);
587
588template<typename CharacterType> unsigned lengthOfNullTerminatedString(const CharacterType*);
589
590// StringHash is the default hash for StringImpl* and RefPtr<StringImpl>
591template<typename T> struct DefaultHash;
592template<> struct DefaultHash<StringImpl*> {
593 typedef StringHash Hash;
594};
595template<> struct DefaultHash<RefPtr<StringImpl>> {
596 typedef StringHash Hash;
597};
598
599#define MAKE_STATIC_STRING_IMPL(characters) ([] { \
600 static StaticStringImpl impl(characters); \
601 return &impl; \
602 }())
603
604template<> ALWAYS_INLINE Ref<StringImpl> StringImpl::constructInternal<LChar>(StringImpl& string, unsigned length)
605{
606 return adoptRef(*new (NotNull, &string) StringImpl { length, Force8BitConstructor });
607}
608
609template<> ALWAYS_INLINE Ref<StringImpl> StringImpl::constructInternal<UChar>(StringImpl& string, unsigned length)
610{
611 return adoptRef(*new (NotNull, &string) StringImpl { length });
612}
613
614template<> ALWAYS_INLINE const LChar* StringImpl::characters<LChar>() const
615{
616 return characters8();
617}
618
619template<> ALWAYS_INLINE const UChar* StringImpl::characters<UChar>() const
620{
621 return characters16();
622}
623
624inline size_t find(const LChar* characters, unsigned length, CodeUnitMatchFunction matchFunction, unsigned index)
625{
626 while (index < length) {
627 if (matchFunction(characters[index]))
628 return index;
629 ++index;
630 }
631 return notFound;
632}
633
634inline size_t find(const UChar* characters, unsigned length, CodeUnitMatchFunction matchFunction, unsigned index)
635{
636 while (index < length) {
637 if (matchFunction(characters[index]))
638 return index;
639 ++index;
640 }
641 return notFound;
642}
643
644template<typename CharacterType> inline size_t reverseFindLineTerminator(const CharacterType* characters, unsigned length, unsigned index)
645{
646 if (!length)
647 return notFound;
648 if (index >= length)
649 index = length - 1;
650 auto character = characters[index];
651 while (character != '\n' && character != '\r') {
652 if (!index--)
653 return notFound;
654 character = characters[index];
655 }
656 return index;
657}
658
659template<typename CharacterType> inline size_t reverseFind(const CharacterType* characters, unsigned length, CharacterType matchCharacter, unsigned index)
660{
661 if (!length)
662 return notFound;
663 if (index >= length)
664 index = length - 1;
665 while (characters[index] != matchCharacter) {
666 if (!index--)
667 return notFound;
668 }
669 return index;
670}
671
672ALWAYS_INLINE size_t reverseFind(const UChar* characters, unsigned length, LChar matchCharacter, unsigned index)
673{
674 return reverseFind(characters, length, static_cast<UChar>(matchCharacter), index);
675}
676
677inline size_t reverseFind(const LChar* characters, unsigned length, UChar matchCharacter, unsigned index)
678{
679 if (!isLatin1(matchCharacter))
680 return notFound;
681 return reverseFind(characters, length, static_cast<LChar>(matchCharacter), index);
682}
683
684inline size_t StringImpl::find(LChar character, unsigned start)
685{
686 if (is8Bit())
687 return WTF::find(characters8(), m_length, character, start);
688 return WTF::find(characters16(), m_length, character, start);
689}
690
691ALWAYS_INLINE size_t StringImpl::find(char character, unsigned start)
692{
693 return find(static_cast<LChar>(character), start);
694}
695
696inline size_t StringImpl::find(UChar character, unsigned start)
697{
698 if (is8Bit())
699 return WTF::find(characters8(), m_length, character, start);
700 return WTF::find(characters16(), m_length, character, start);
701}
702
703template<size_t inlineCapacity> inline bool equalIgnoringNullity(const Vector<UChar, inlineCapacity>& a, StringImpl* b)
704{
705 return equalIgnoringNullity(a.data(), a.size(), b);
706}
707
708template<typename CharacterType1, typename CharacterType2> inline int codePointCompare(const CharacterType1* characters1, unsigned length1, const CharacterType2* characters2, unsigned length2)
709{
710 unsigned commonLength = std::min(length1, length2);
711
712 unsigned position = 0;
713 while (position < commonLength && *characters1 == *characters2) {
714 ++characters1;
715 ++characters2;
716 ++position;
717 }
718
719 if (position < commonLength)
720 return (characters1[0] > characters2[0]) ? 1 : -1;
721
722 if (length1 == length2)
723 return 0;
724 return (length1 > length2) ? 1 : -1;
725}
726
727inline int codePointCompare(const StringImpl* string1, const StringImpl* string2)
728{
729 // FIXME: Should null strings compare as less than empty strings rather than equal to them?
730 if (!string1)
731 return (string2 && string2->length()) ? -1 : 0;
732 if (!string2)
733 return string1->length() ? 1 : 0;
734
735 bool string1Is8Bit = string1->is8Bit();
736 bool string2Is8Bit = string2->is8Bit();
737 if (string1Is8Bit) {
738 if (string2Is8Bit)
739 return codePointCompare(string1->characters8(), string1->length(), string2->characters8(), string2->length());
740 return codePointCompare(string1->characters8(), string1->length(), string2->characters16(), string2->length());
741 }
742 if (string2Is8Bit)
743 return codePointCompare(string1->characters16(), string1->length(), string2->characters8(), string2->length());
744 return codePointCompare(string1->characters16(), string1->length(), string2->characters16(), string2->length());
745}
746
747inline bool isSpaceOrNewline(UChar32 character)
748{
749 // Use isASCIISpace() for all Latin-1 characters. This will include newlines, which aren't included in Unicode DirWS.
750 return isLatin1(character) ? isASCIISpace(character) : u_charDirection(character) == U_WHITE_SPACE_NEUTRAL;
751}
752
753template<typename CharacterType> inline unsigned lengthOfNullTerminatedString(const CharacterType* string)
754{
755 ASSERT(string);
756 size_t length = 0;
757 while (string[length])
758 ++length;
759
760 RELEASE_ASSERT(length < StringImpl::MaxLength);
761 return static_cast<unsigned>(length);
762}
763
764inline StringImplShape::StringImplShape(unsigned refCount, unsigned length, const LChar* data8, unsigned hashAndFlags)
765 : m_refCount(refCount)
766 , m_length(length)
767 , m_data8(data8)
768 , m_hashAndFlags(hashAndFlags)
769{
770}
771
772inline StringImplShape::StringImplShape(unsigned refCount, unsigned length, const UChar* data16, unsigned hashAndFlags)
773 : m_refCount(refCount)
774 , m_length(length)
775 , m_data16(data16)
776 , m_hashAndFlags(hashAndFlags)
777{
778}
779
780template<unsigned characterCount> constexpr StringImplShape::StringImplShape(unsigned refCount, unsigned length, const char (&characters)[characterCount], unsigned hashAndFlags, ConstructWithConstExprTag)
781 : m_refCount(refCount)
782 , m_length(length)
783 , m_data8Char(characters)
784 , m_hashAndFlags(hashAndFlags)
785{
786}
787
788template<unsigned characterCount> constexpr StringImplShape::StringImplShape(unsigned refCount, unsigned length, const char16_t (&characters)[characterCount], unsigned hashAndFlags, ConstructWithConstExprTag)
789 : m_refCount(refCount)
790 , m_length(length)
791 , m_data16Char(characters)
792 , m_hashAndFlags(hashAndFlags)
793{
794}
795
796inline Ref<StringImpl> StringImpl::isolatedCopy() const
797{
798 if (!requiresCopy()) {
799 if (is8Bit())
800 return StringImpl::createWithoutCopying(m_data8, m_length);
801 return StringImpl::createWithoutCopying(m_data16, m_length);
802 }
803
804 if (is8Bit())
805 return create(m_data8, m_length);
806 return create(m_data16, m_length);
807}
808
809inline bool StringImpl::isAllASCII() const
810{
811 if (is8Bit())
812 return charactersAreAllASCII(characters8(), length());
813 return charactersAreAllASCII(characters16(), length());
814}
815
816inline bool StringImpl::isAllLatin1() const
817{
818 if (is8Bit())
819 return true;
820 auto* characters = characters16();
821 UChar ored = 0;
822 for (size_t i = 0; i < length(); ++i)
823 ored |= characters[i];
824 return !(ored & 0xFF00);
825}
826
827template<bool isSpecialCharacter(UChar), typename CharacterType> inline bool isAllSpecialCharacters(const CharacterType* characters, size_t length)
828{
829 for (size_t i = 0; i < length; ++i) {
830 if (!isSpecialCharacter(characters[i]))
831 return false;
832 }
833 return true;
834}
835
836template<bool isSpecialCharacter(UChar)> inline bool StringImpl::isAllSpecialCharacters() const
837{
838 if (is8Bit())
839 return WTF::isAllSpecialCharacters<isSpecialCharacter>(characters8(), length());
840 return WTF::isAllSpecialCharacters<isSpecialCharacter>(characters16(), length());
841}
842
843inline StringImpl::StringImpl(unsigned length, Force8Bit)
844 : StringImplShape(s_refCountIncrement, length, tailPointer<LChar>(), s_hashFlag8BitBuffer | StringNormal | BufferInternal)
845{
846 ASSERT(m_data8);
847 ASSERT(m_length);
848
849 STRING_STATS_ADD_8BIT_STRING(m_length);
850}
851
852inline StringImpl::StringImpl(unsigned length)
853 : StringImplShape(s_refCountIncrement, length, tailPointer<UChar>(), StringNormal | BufferInternal)
854{
855 ASSERT(m_data16);
856 ASSERT(m_length);
857
858 STRING_STATS_ADD_16BIT_STRING(m_length);
859}
860
861inline StringImpl::StringImpl(MallocPtr<LChar> characters, unsigned length)
862 : StringImplShape(s_refCountIncrement, length, characters.leakPtr(), s_hashFlag8BitBuffer | StringNormal | BufferOwned)
863{
864 ASSERT(m_data8);
865 ASSERT(m_length);
866
867 STRING_STATS_ADD_8BIT_STRING(m_length);
868}
869
870inline StringImpl::StringImpl(const UChar* characters, unsigned length, ConstructWithoutCopyingTag)
871 : StringImplShape(s_refCountIncrement, length, characters, StringNormal | BufferInternal)
872{
873 ASSERT(m_data16);
874 ASSERT(m_length);
875
876 STRING_STATS_ADD_16BIT_STRING(m_length);
877}
878
879inline StringImpl::StringImpl(const LChar* characters, unsigned length, ConstructWithoutCopyingTag)
880 : StringImplShape(s_refCountIncrement, length, characters, s_hashFlag8BitBuffer | StringNormal | BufferInternal)
881{
882 ASSERT(m_data8);
883 ASSERT(m_length);
884
885 STRING_STATS_ADD_8BIT_STRING(m_length);
886}
887
888inline StringImpl::StringImpl(MallocPtr<UChar> characters, unsigned length)
889 : StringImplShape(s_refCountIncrement, length, characters.leakPtr(), StringNormal | BufferOwned)
890{
891 ASSERT(m_data16);
892 ASSERT(m_length);
893
894 STRING_STATS_ADD_16BIT_STRING(m_length);
895}
896
897inline StringImpl::StringImpl(const LChar* characters, unsigned length, Ref<StringImpl>&& base)
898 : StringImplShape(s_refCountIncrement, length, characters, s_hashFlag8BitBuffer | StringNormal | BufferSubstring)
899{
900 ASSERT(is8Bit());
901 ASSERT(m_data8);
902 ASSERT(m_length);
903 ASSERT(base->bufferOwnership() != BufferSubstring);
904
905 substringBuffer() = &base.leakRef();
906
907 STRING_STATS_ADD_8BIT_STRING2(m_length, true);
908}
909
910inline StringImpl::StringImpl(const UChar* characters, unsigned length, Ref<StringImpl>&& base)
911 : StringImplShape(s_refCountIncrement, length, characters, StringNormal | BufferSubstring)
912{
913 ASSERT(!is8Bit());
914 ASSERT(m_data16);
915 ASSERT(m_length);
916 ASSERT(base->bufferOwnership() != BufferSubstring);
917
918 substringBuffer() = &base.leakRef();
919
920 STRING_STATS_ADD_16BIT_STRING2(m_length, true);
921}
922
923template<size_t inlineCapacity> inline Ref<StringImpl> StringImpl::create8BitIfPossible(const Vector<UChar, inlineCapacity>& vector)
924{
925 return create8BitIfPossible(vector.data(), vector.size());
926}
927
928ALWAYS_INLINE Ref<StringImpl> StringImpl::createSubstringSharingImpl(StringImpl& rep, unsigned offset, unsigned length)
929{
930 ASSERT(length <= rep.length());
931
932 if (!length)
933 return *empty();
934
935 // Coyping the thing would save more memory sometimes, largely due to the size of pointer.
936 size_t substringSize = allocationSize<StringImpl*>(1);
937 if (rep.is8Bit()) {
938 if (substringSize >= allocationSize<LChar>(length))
939 return create(rep.m_data8 + offset, length);
940 } else {
941 if (substringSize >= allocationSize<UChar>(length))
942 return create(rep.m_data16 + offset, length);
943 }
944
945 auto* ownerRep = ((rep.bufferOwnership() == BufferSubstring) ? rep.substringBuffer() : &rep);
946
947 // We allocate a buffer that contains both the StringImpl struct as well as the pointer to the owner string.
948 auto* stringImpl = static_cast<StringImpl*>(fastMalloc(substringSize));
949 if (rep.is8Bit())
950 return adoptRef(*new (NotNull, stringImpl) StringImpl(rep.m_data8 + offset, length, *ownerRep));
951 return adoptRef(*new (NotNull, stringImpl) StringImpl(rep.m_data16 + offset, length, *ownerRep));
952}
953
954template<unsigned characterCount> ALWAYS_INLINE Ref<StringImpl> StringImpl::createFromLiteral(const char (&characters)[characterCount])
955{
956 COMPILE_ASSERT(characterCount > 1, StringImplFromLiteralNotEmpty);
957 COMPILE_ASSERT((characterCount - 1 <= ((unsigned(~0) - sizeof(StringImpl)) / sizeof(LChar))), StringImplFromLiteralCannotOverflow);
958
959 return createWithoutCopying(reinterpret_cast<const LChar*>(characters), characterCount - 1);
960}
961
962template<typename CharacterType> ALWAYS_INLINE RefPtr<StringImpl> StringImpl::tryCreateUninitialized(unsigned length, CharacterType*& output)
963{
964 if (!length) {
965 output = nullptr;
966 return empty();
967 }
968
969 if (length > maxInternalLength<CharacterType>()) {
970 output = nullptr;
971 return nullptr;
972 }
973 StringImpl* result;
974 if (!tryFastMalloc(allocationSize<CharacterType>(length)).getValue(result)) {
975 output = nullptr;
976 return nullptr;
977 }
978 output = result->tailPointer<CharacterType>();
979
980 return constructInternal<CharacterType>(*result, length);
981}
982
983template<typename CharacterType, size_t inlineCapacity, typename OverflowHandler, size_t minCapacity>
984inline Ref<StringImpl> StringImpl::adopt(Vector<CharacterType, inlineCapacity, OverflowHandler, minCapacity>&& vector)
985{
986 if (size_t size = vector.size()) {
987 ASSERT(vector.data());
988 if (size > MaxLength)
989 CRASH();
990 return adoptRef(*new StringImpl(vector.releaseBuffer(), size));
991 }
992 return *empty();
993}
994
995inline size_t StringImpl::cost() const
996{
997 // For substrings, return the cost of the base string.
998 if (bufferOwnership() == BufferSubstring)
999 return substringBuffer()->cost();
1000
1001 // Note: we must not alter the m_hashAndFlags field in instances of StaticStringImpl.
1002 // We ensure this by pre-setting the s_hashFlagDidReportCost bit in all instances of
1003 // StaticStringImpl. As a result, StaticStringImpl instances will always return a cost of
1004 // 0 here and avoid modifying m_hashAndFlags.
1005 if (m_hashAndFlags & s_hashFlagDidReportCost)
1006 return 0;
1007
1008 m_hashAndFlags |= s_hashFlagDidReportCost;
1009 size_t result = m_length;
1010 if (!is8Bit())
1011 result <<= 1;
1012 return result;
1013}
1014
1015inline size_t StringImpl::costDuringGC()
1016{
1017 if (isStatic())
1018 return 0;
1019
1020 if (bufferOwnership() == BufferSubstring)
1021 return divideRoundedUp(substringBuffer()->costDuringGC(), refCount());
1022
1023 size_t result = m_length;
1024 if (!is8Bit())
1025 result <<= 1;
1026 return divideRoundedUp(result, refCount());
1027}
1028
1029inline void StringImpl::setIsAtom(bool isAtom)
1030{
1031 ASSERT(!isStatic());
1032 ASSERT(!isSymbol());
1033 if (isAtom)
1034 m_hashAndFlags |= s_hashFlagStringKindIsAtom;
1035 else
1036 m_hashAndFlags &= ~s_hashFlagStringKindIsAtom;
1037}
1038
1039inline void StringImpl::setHash(unsigned hash) const
1040{
1041 // The high bits of 'hash' are always empty, but we prefer to store our flags
1042 // in the low bits because it makes them slightly more efficient to access.
1043 // So, we shift left and right when setting and getting our hash code.
1044
1045 ASSERT(!hasHash());
1046 ASSERT(!isStatic());
1047 // Multiple clients assume that StringHasher is the canonical string hash function.
1048 ASSERT(hash == (is8Bit() ? StringHasher::computeHashAndMaskTop8Bits(m_data8, m_length) : StringHasher::computeHashAndMaskTop8Bits(m_data16, m_length)));
1049 ASSERT(!(hash & (s_flagMask << (8 * sizeof(hash) - s_flagCount)))); // Verify that enough high bits are empty.
1050
1051 hash <<= s_flagCount;
1052 ASSERT(!(hash & m_hashAndFlags)); // Verify that enough low bits are empty after shift.
1053 ASSERT(hash); // Verify that 0 is a valid sentinel hash value.
1054
1055 m_hashAndFlags |= hash; // Store hash with flags in low bits.
1056}
1057
1058inline void StringImpl::ref()
1059{
1060 STRING_STATS_REF_STRING(*this);
1061
1062 m_refCount += s_refCountIncrement;
1063}
1064
1065inline void StringImpl::deref()
1066{
1067 STRING_STATS_DEREF_STRING(*this);
1068
1069 unsigned tempRefCount = m_refCount - s_refCountIncrement;
1070 if (!tempRefCount) {
1071 StringImpl::destroy(this);
1072 return;
1073 }
1074 m_refCount = tempRefCount;
1075}
1076
1077template<typename CharacterType> inline void StringImpl::copyCharacters(CharacterType* destination, const CharacterType* source, unsigned numCharacters)
1078{
1079 if (numCharacters == 1) {
1080 *destination = *source;
1081 return;
1082 }
1083 memcpy(destination, source, numCharacters * sizeof(CharacterType));
1084}
1085
1086ALWAYS_INLINE void StringImpl::copyCharacters(UChar* destination, const LChar* source, unsigned numCharacters)
1087{
1088 for (unsigned i = 0; i < numCharacters; ++i)
1089 destination[i] = source[i];
1090}
1091
1092inline UChar StringImpl::at(unsigned i) const
1093{
1094 ASSERT_WITH_SECURITY_IMPLICATION(i < m_length);
1095 return is8Bit() ? m_data8[i] : m_data16[i];
1096}
1097
1098inline StringImpl::StringImpl(CreateSymbolTag, const LChar* characters, unsigned length)
1099 : StringImplShape(s_refCountIncrement, length, characters, s_hashFlag8BitBuffer | StringSymbol | BufferSubstring)
1100{
1101 ASSERT(is8Bit());
1102 ASSERT(m_data8);
1103 STRING_STATS_ADD_8BIT_STRING2(m_length, true);
1104}
1105
1106inline StringImpl::StringImpl(CreateSymbolTag, const UChar* characters, unsigned length)
1107 : StringImplShape(s_refCountIncrement, length, characters, StringSymbol | BufferSubstring)
1108{
1109 ASSERT(!is8Bit());
1110 ASSERT(m_data16);
1111 STRING_STATS_ADD_16BIT_STRING2(m_length, true);
1112}
1113
1114inline StringImpl::StringImpl(CreateSymbolTag)
1115 : StringImplShape(s_refCountIncrement, 0, empty()->characters8(), s_hashFlag8BitBuffer | StringSymbol | BufferSubstring)
1116{
1117 ASSERT(is8Bit());
1118 ASSERT(m_data8);
1119 STRING_STATS_ADD_8BIT_STRING2(m_length, true);
1120}
1121
1122template<typename T> inline size_t StringImpl::allocationSize(Checked<size_t> tailElementCount)
1123{
1124 return (tailOffset<T>() + tailElementCount * sizeof(T)).unsafeGet();
1125}
1126
1127template<typename CharacterType>
1128inline size_t StringImpl::maxInternalLength()
1129{
1130 // In order to not overflow the unsigned length, the check for (std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) is needed when sizeof(CharacterType) == 2.
1131 return std::min(static_cast<size_t>(MaxLength), (std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(CharacterType));
1132}
1133
1134template<typename T> inline size_t StringImpl::tailOffset()
1135{
1136#if COMPILER(MSVC)
1137 // MSVC doesn't support alignof yet.
1138 return roundUpToMultipleOf<sizeof(T)>(sizeof(StringImpl));
1139#else
1140 return roundUpToMultipleOf<alignof(T)>(offsetof(StringImpl, m_hashAndFlags) + sizeof(StringImpl::m_hashAndFlags));
1141#endif
1142}
1143
1144inline bool StringImpl::requiresCopy() const
1145{
1146 if (bufferOwnership() != BufferInternal)
1147 return true;
1148
1149 if (is8Bit())
1150 return m_data8 == tailPointer<LChar>();
1151 return m_data16 == tailPointer<UChar>();
1152}
1153
1154template<typename T> inline const T* StringImpl::tailPointer() const
1155{
1156 return reinterpret_cast_ptr<const T*>(reinterpret_cast<const uint8_t*>(this) + tailOffset<T>());
1157}
1158
1159template<typename T> inline T* StringImpl::tailPointer()
1160{
1161 return reinterpret_cast_ptr<T*>(reinterpret_cast<uint8_t*>(this) + tailOffset<T>());
1162}
1163
1164inline StringImpl* const& StringImpl::substringBuffer() const
1165{
1166 ASSERT(bufferOwnership() == BufferSubstring);
1167
1168 return *tailPointer<StringImpl*>();
1169}
1170
1171inline StringImpl*& StringImpl::substringBuffer()
1172{
1173 ASSERT(bufferOwnership() == BufferSubstring);
1174
1175 return *tailPointer<StringImpl*>();
1176}
1177
1178inline void StringImpl::assertHashIsCorrect() const
1179{
1180 ASSERT(existingHash() == StringHasher::computeHashAndMaskTop8Bits(characters8(), length()));
1181}
1182
1183template<unsigned characterCount> constexpr StringImpl::StaticStringImpl::StaticStringImpl(const char (&characters)[characterCount], StringKind stringKind)
1184 : StringImplShape(s_refCountFlagIsStaticString, characterCount - 1, characters,
1185 s_hashFlag8BitBuffer | s_hashFlagDidReportCost | stringKind | BufferInternal | (StringHasher::computeLiteralHashAndMaskTop8Bits(characters) << s_flagCount), ConstructWithConstExpr)
1186{
1187}
1188
1189template<unsigned characterCount> constexpr StringImpl::StaticStringImpl::StaticStringImpl(const char16_t (&characters)[characterCount], StringKind stringKind)
1190 : StringImplShape(s_refCountFlagIsStaticString, characterCount - 1, characters,
1191 s_hashFlagDidReportCost | stringKind | BufferInternal | (StringHasher::computeLiteralHashAndMaskTop8Bits(characters) << s_flagCount), ConstructWithConstExpr)
1192{
1193}
1194
1195inline StringImpl::StaticStringImpl::operator StringImpl&()
1196{
1197 return *reinterpret_cast<StringImpl*>(this);
1198}
1199
1200inline bool equalIgnoringASCIICase(const StringImpl& a, const StringImpl& b)
1201{
1202 return equalIgnoringASCIICaseCommon(a, b);
1203}
1204
1205inline bool equalIgnoringASCIICase(const StringImpl& a, const char* b)
1206{
1207 return equalIgnoringASCIICaseCommon(a, b);
1208}
1209
1210inline bool equalIgnoringASCIICase(const StringImpl* a, const char* b)
1211{
1212 return a && equalIgnoringASCIICase(*a, b);
1213}
1214
1215template<unsigned length> inline bool startsWithLettersIgnoringASCIICase(const StringImpl& string, const char (&lowercaseLetters)[length])
1216{
1217 return startsWithLettersIgnoringASCIICaseCommon(string, lowercaseLetters);
1218}
1219
1220template<unsigned length> inline bool startsWithLettersIgnoringASCIICase(const StringImpl* string, const char (&lowercaseLetters)[length])
1221{
1222 return string && startsWithLettersIgnoringASCIICase(*string, lowercaseLetters);
1223}
1224
1225template<unsigned length> inline bool equalLettersIgnoringASCIICase(const StringImpl& string, const char (&lowercaseLetters)[length])
1226{
1227 return equalLettersIgnoringASCIICaseCommon(string, lowercaseLetters);
1228}
1229
1230template<unsigned length> inline bool equalLettersIgnoringASCIICase(const StringImpl* string, const char (&lowercaseLetters)[length])
1231{
1232 return string && equalLettersIgnoringASCIICase(*string, lowercaseLetters);
1233}
1234
1235} // namespace WTF
1236
1237using WTF::StaticStringImpl;
1238using WTF::StringImpl;
1239using WTF::equal;
1240