1 | /* |
2 | * Copyright (C) 1999 Lars Knoll ([email protected]) |
3 | * Copyright (C) 2005-2019 Apple Inc. All rights reserved. |
4 | * Copyright (C) 2009 Google Inc. All rights reserved. |
5 | * |
6 | * This library is free software; you can redistribute it and/or |
7 | * modify it under the terms of the GNU Library General Public |
8 | * License as published by the Free Software Foundation; either |
9 | * version 2 of the License, or (at your option) any later version. |
10 | * |
11 | * This library is distributed in the hope that it will be useful, |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | * Library General Public License for more details. |
15 | * |
16 | * You should have received a copy of the GNU Library General Public License |
17 | * along with this library; see the file COPYING.LIB. If not, write to |
18 | * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
19 | * Boston, MA 02110-1301, USA. |
20 | * |
21 | */ |
22 | |
23 | #pragma once |
24 | |
25 | #include <limits.h> |
26 | #include <unicode/ustring.h> |
27 | #include <wtf/ASCIICType.h> |
28 | #include <wtf/CheckedArithmetic.h> |
29 | #include <wtf/Expected.h> |
30 | #include <wtf/MathExtras.h> |
31 | #include <wtf/StdLibExtras.h> |
32 | #include <wtf/Vector.h> |
33 | #include <wtf/text/ASCIIFastPath.h> |
34 | #include <wtf/text/ConversionMode.h> |
35 | #include <wtf/text/StringCommon.h> |
36 | #include <wtf/text/StringHasher.h> |
37 | #include <wtf/text/UTF8ConversionError.h> |
38 | |
39 | #if USE(CF) |
40 | typedef const struct __CFString * CFStringRef; |
41 | #endif |
42 | |
43 | #ifdef __OBJC__ |
44 | @class NSString; |
45 | #endif |
46 | |
47 | namespace JSC { |
48 | namespace LLInt { class Data; } |
49 | class ; |
50 | } |
51 | |
52 | namespace WTF { |
53 | |
54 | class SymbolImpl; |
55 | class SymbolRegistry; |
56 | |
57 | struct CStringTranslator; |
58 | struct HashAndUTF8CharactersTranslator; |
59 | struct LCharBufferTranslator; |
60 | struct StringHash; |
61 | struct SubstringTranslator; |
62 | struct UCharBufferTranslator; |
63 | |
64 | template<typename> class RetainPtr; |
65 | |
66 | template<typename> struct BufferFromStaticDataTranslator; |
67 | template<typename> struct HashAndCharactersTranslator; |
68 | |
69 | // Define STRING_STATS to 1 turn on runtime statistics of string sizes and memory usage. |
70 | #define STRING_STATS 0 |
71 | |
72 | template<bool isSpecialCharacter(UChar), typename CharacterType> bool isAllSpecialCharacters(const CharacterType*, size_t length); |
73 | |
74 | #if STRING_STATS |
75 | |
76 | struct StringStats { |
77 | WTF_MAKE_STRUCT_FAST_ALLOCATED; |
78 | void add8BitString(unsigned length, bool isSubString = false) |
79 | { |
80 | ++m_totalNumberStrings; |
81 | ++m_number8BitStrings; |
82 | if (!isSubString) |
83 | m_total8BitData += length; |
84 | } |
85 | |
86 | void add16BitString(unsigned length, bool isSubString = false) |
87 | { |
88 | ++m_totalNumberStrings; |
89 | ++m_number16BitStrings; |
90 | if (!isSubString) |
91 | m_total16BitData += length; |
92 | } |
93 | |
94 | void removeString(StringImpl&); |
95 | void printStats(); |
96 | |
97 | static constexpr unsigned s_printStringStatsFrequency = 5000; |
98 | static std::atomic<unsigned> s_stringRemovesTillPrintStats; |
99 | |
100 | std::atomic<unsigned> m_refCalls; |
101 | std::atomic<unsigned> m_derefCalls; |
102 | |
103 | std::atomic<unsigned> m_totalNumberStrings; |
104 | std::atomic<unsigned> m_number8BitStrings; |
105 | std::atomic<unsigned> m_number16BitStrings; |
106 | std::atomic<unsigned long long> m_total8BitData; |
107 | std::atomic<unsigned long long> m_total16BitData; |
108 | }; |
109 | |
110 | #define STRING_STATS_ADD_8BIT_STRING(length) StringImpl::stringStats().add8BitString(length) |
111 | #define STRING_STATS_ADD_8BIT_STRING2(length, isSubString) StringImpl::stringStats().add8BitString(length, isSubString) |
112 | #define STRING_STATS_ADD_16BIT_STRING(length) StringImpl::stringStats().add16BitString(length) |
113 | #define STRING_STATS_ADD_16BIT_STRING2(length, isSubString) StringImpl::stringStats().add16BitString(length, isSubString) |
114 | #define STRING_STATS_REMOVE_STRING(string) StringImpl::stringStats().removeString(string) |
115 | #define STRING_STATS_REF_STRING(string) ++StringImpl::stringStats().m_refCalls; |
116 | #define STRING_STATS_DEREF_STRING(string) ++StringImpl::stringStats().m_derefCalls; |
117 | |
118 | #else |
119 | |
120 | #define STRING_STATS_ADD_8BIT_STRING(length) ((void)0) |
121 | #define STRING_STATS_ADD_8BIT_STRING2(length, isSubString) ((void)0) |
122 | #define STRING_STATS_ADD_16BIT_STRING(length) ((void)0) |
123 | #define STRING_STATS_ADD_16BIT_STRING2(length, isSubString) ((void)0) |
124 | #define STRING_STATS_ADD_UPCONVERTED_STRING(length) ((void)0) |
125 | #define STRING_STATS_REMOVE_STRING(string) ((void)0) |
126 | #define STRING_STATS_REF_STRING(string) ((void)0) |
127 | #define STRING_STATS_DEREF_STRING(string) ((void)0) |
128 | |
129 | #endif |
130 | |
131 | class StringImplShape { |
132 | WTF_MAKE_NONCOPYABLE(StringImplShape); |
133 | public: |
134 | static constexpr unsigned MaxLength = std::numeric_limits<int32_t>::max(); |
135 | |
136 | protected: |
137 | StringImplShape(unsigned refCount, unsigned length, const LChar*, unsigned hashAndFlags); |
138 | StringImplShape(unsigned refCount, unsigned length, const UChar*, unsigned hashAndFlags); |
139 | |
140 | enum ConstructWithConstExprTag { ConstructWithConstExpr }; |
141 | template<unsigned characterCount> constexpr StringImplShape(unsigned refCount, unsigned length, const char (&characters)[characterCount], unsigned hashAndFlags, ConstructWithConstExprTag); |
142 | template<unsigned characterCount> constexpr StringImplShape(unsigned refCount, unsigned length, const char16_t (&characters)[characterCount], unsigned hashAndFlags, ConstructWithConstExprTag); |
143 | |
144 | unsigned m_refCount; |
145 | unsigned m_length; |
146 | union { |
147 | const LChar* m_data8; |
148 | const UChar* m_data16; |
149 | // It seems that reinterpret_cast prevents constexpr's compile time initialization in VC++. |
150 | // These are needed to avoid reinterpret_cast. |
151 | const char* m_data8Char; |
152 | const char16_t* m_data16Char; |
153 | }; |
154 | mutable unsigned m_hashAndFlags; |
155 | }; |
156 | |
157 | // FIXME: Use of StringImpl and const is rather confused. |
158 | // The actual string inside a StringImpl is immutable, so you can't modify a string using a StringImpl&. |
159 | // We could mark every member function const and always use "const StringImpl&" and "const StringImpl*". |
160 | // Or we could say that "const" doesn't make sense at all and use "StringImpl&" and "StringImpl*" everywhere. |
161 | // Right now we use a mix of both, which makes code more confusing and has no benefit. |
162 | |
163 | class StringImpl : private StringImplShape { |
164 | WTF_MAKE_NONCOPYABLE(StringImpl); WTF_MAKE_FAST_ALLOCATED; |
165 | |
166 | friend class AtomStringImpl; |
167 | friend class JSC::LLInt::Data; |
168 | friend class JSC::LLIntOffsetsExtractor; |
169 | friend class PrivateSymbolImpl; |
170 | friend class RegisteredSymbolImpl; |
171 | friend class SymbolImpl; |
172 | friend class ExternalStringImpl; |
173 | |
174 | friend struct WTF::CStringTranslator; |
175 | friend struct WTF::HashAndUTF8CharactersTranslator; |
176 | friend struct WTF::LCharBufferTranslator; |
177 | friend struct WTF::SubstringTranslator; |
178 | friend struct WTF::UCharBufferTranslator; |
179 | |
180 | template<typename> friend struct WTF::BufferFromStaticDataTranslator; |
181 | template<typename> friend struct WTF::HashAndCharactersTranslator; |
182 | |
183 | public: |
184 | enum BufferOwnership { BufferInternal, BufferOwned, BufferSubstring, BufferExternal }; |
185 | |
186 | static constexpr unsigned MaxLength = StringImplShape::MaxLength; |
187 | |
188 | // The bottom 6 bits in the hash are flags. |
189 | static constexpr const unsigned s_flagCount = 6; |
190 | private: |
191 | static constexpr const unsigned s_flagMask = (1u << s_flagCount) - 1; |
192 | static_assert(s_flagCount <= StringHasher::flagCount, "StringHasher reserves enough bits for StringImpl flags" ); |
193 | static constexpr const unsigned s_flagStringKindCount = 4; |
194 | |
195 | static constexpr const unsigned s_hashFlagStringKindIsAtom = 1u << (s_flagStringKindCount); |
196 | static constexpr const unsigned s_hashFlagStringKindIsSymbol = 1u << (s_flagStringKindCount + 1); |
197 | static constexpr const unsigned s_hashMaskStringKind = s_hashFlagStringKindIsAtom | s_hashFlagStringKindIsSymbol; |
198 | static constexpr const unsigned s_hashFlagDidReportCost = 1u << 3; |
199 | static constexpr const unsigned s_hashFlag8BitBuffer = 1u << 2; |
200 | static constexpr const unsigned s_hashMaskBufferOwnership = (1u << 0) | (1u << 1); |
201 | |
202 | enum StringKind { |
203 | StringNormal = 0u, // non-symbol, non-atomic |
204 | StringAtom = s_hashFlagStringKindIsAtom, // non-symbol, atomic |
205 | StringSymbol = s_hashFlagStringKindIsSymbol, // symbol, non-atomic |
206 | }; |
207 | |
208 | // Create a normal 8-bit string with internal storage (BufferInternal). |
209 | enum Force8Bit { Force8BitConstructor }; |
210 | StringImpl(unsigned length, Force8Bit); |
211 | |
212 | // Create a normal 16-bit string with internal storage (BufferInternal). |
213 | explicit StringImpl(unsigned length); |
214 | |
215 | // Create a StringImpl adopting ownership of the provided buffer (BufferOwned). |
216 | StringImpl(MallocPtr<LChar>, unsigned length); |
217 | StringImpl(MallocPtr<UChar>, unsigned length); |
218 | enum ConstructWithoutCopyingTag { ConstructWithoutCopying }; |
219 | StringImpl(const UChar*, unsigned length, ConstructWithoutCopyingTag); |
220 | StringImpl(const LChar*, unsigned length, ConstructWithoutCopyingTag); |
221 | |
222 | // Used to create new strings that are a substring of an existing StringImpl (BufferSubstring). |
223 | StringImpl(const LChar*, unsigned length, Ref<StringImpl>&&); |
224 | StringImpl(const UChar*, unsigned length, Ref<StringImpl>&&); |
225 | |
226 | public: |
227 | WTF_EXPORT_PRIVATE static void destroy(StringImpl*); |
228 | |
229 | WTF_EXPORT_PRIVATE static Ref<StringImpl> create(const UChar*, unsigned length); |
230 | WTF_EXPORT_PRIVATE static Ref<StringImpl> create(const LChar*, unsigned length); |
231 | WTF_EXPORT_PRIVATE static Ref<StringImpl> create8BitIfPossible(const UChar*, unsigned length); |
232 | template<size_t inlineCapacity> static Ref<StringImpl> create8BitIfPossible(const Vector<UChar, inlineCapacity>&); |
233 | WTF_EXPORT_PRIVATE static Ref<StringImpl> create8BitIfPossible(const UChar*); |
234 | |
235 | ALWAYS_INLINE static Ref<StringImpl> create(const char* characters, unsigned length) { return create(reinterpret_cast<const LChar*>(characters), length); } |
236 | WTF_EXPORT_PRIVATE static Ref<StringImpl> create(const LChar*); |
237 | ALWAYS_INLINE static Ref<StringImpl> create(const char* string) { return create(reinterpret_cast<const LChar*>(string)); } |
238 | |
239 | static Ref<StringImpl> createSubstringSharingImpl(StringImpl&, unsigned offset, unsigned length); |
240 | |
241 | template<unsigned characterCount> static Ref<StringImpl> createFromLiteral(const char (&)[characterCount]); |
242 | |
243 | // FIXME: Replace calls to these overloads of createFromLiteral to createWithoutCopying instead. |
244 | WTF_EXPORT_PRIVATE static Ref<StringImpl> createFromLiteral(const char*, unsigned length); |
245 | WTF_EXPORT_PRIVATE static Ref<StringImpl> createFromLiteral(const char*); |
246 | |
247 | WTF_EXPORT_PRIVATE static Ref<StringImpl> createWithoutCopying(const UChar*, unsigned length); |
248 | WTF_EXPORT_PRIVATE static Ref<StringImpl> createWithoutCopying(const LChar*, unsigned length); |
249 | WTF_EXPORT_PRIVATE static Ref<StringImpl> createUninitialized(unsigned length, LChar*&); |
250 | WTF_EXPORT_PRIVATE static Ref<StringImpl> createUninitialized(unsigned length, UChar*&); |
251 | template<typename CharacterType> static RefPtr<StringImpl> tryCreateUninitialized(unsigned length, CharacterType*&); |
252 | |
253 | // Reallocate the StringImpl. The originalString must be only owned by the Ref, |
254 | // and the buffer ownership must be BufferInternal. Just like the input pointer of realloc(), |
255 | // the originalString can't be used after this function. |
256 | static Ref<StringImpl> reallocate(Ref<StringImpl>&& originalString, unsigned length, LChar*& data); |
257 | static Ref<StringImpl> reallocate(Ref<StringImpl>&& originalString, unsigned length, UChar*& data); |
258 | static Expected<Ref<StringImpl>, UTF8ConversionError> tryReallocate(Ref<StringImpl>&& originalString, unsigned length, LChar*& data); |
259 | static Expected<Ref<StringImpl>, UTF8ConversionError> tryReallocate(Ref<StringImpl>&& originalString, unsigned length, UChar*& data); |
260 | |
261 | static unsigned flagsOffset() { return OBJECT_OFFSETOF(StringImpl, m_hashAndFlags); } |
262 | static constexpr unsigned flagIs8Bit() { return s_hashFlag8BitBuffer; } |
263 | static constexpr unsigned flagIsAtom() { return s_hashFlagStringKindIsAtom; } |
264 | static constexpr unsigned flagIsSymbol() { return s_hashFlagStringKindIsSymbol; } |
265 | static constexpr unsigned maskStringKind() { return s_hashMaskStringKind; } |
266 | static unsigned dataOffset() { return OBJECT_OFFSETOF(StringImpl, m_data8); } |
267 | |
268 | template<typename CharacterType, size_t inlineCapacity, typename OverflowHandler, size_t minCapacity> |
269 | static Ref<StringImpl> adopt(Vector<CharacterType, inlineCapacity, OverflowHandler, minCapacity>&&); |
270 | |
271 | WTF_EXPORT_PRIVATE static Ref<StringImpl> adopt(StringBuffer<UChar>&&); |
272 | WTF_EXPORT_PRIVATE static Ref<StringImpl> adopt(StringBuffer<LChar>&&); |
273 | |
274 | unsigned length() const { return m_length; } |
275 | static ptrdiff_t lengthMemoryOffset() { return OBJECT_OFFSETOF(StringImpl, m_length); } |
276 | bool isEmpty() const { return !m_length; } |
277 | |
278 | bool is8Bit() const { return m_hashAndFlags & s_hashFlag8BitBuffer; } |
279 | ALWAYS_INLINE const LChar* characters8() const { ASSERT(is8Bit()); return m_data8; } |
280 | ALWAYS_INLINE const UChar* characters16() const { ASSERT(!is8Bit()); return m_data16; } |
281 | |
282 | template<typename CharacterType> const CharacterType* characters() const; |
283 | |
284 | size_t cost() const; |
285 | size_t costDuringGC(); |
286 | |
287 | WTF_EXPORT_PRIVATE size_t sizeInBytes() const; |
288 | |
289 | bool isSymbol() const { return m_hashAndFlags & s_hashFlagStringKindIsSymbol; } |
290 | bool isAtom() const { return m_hashAndFlags & s_hashFlagStringKindIsAtom; } |
291 | void setIsAtom(bool); |
292 | |
293 | bool isExternal() const { return bufferOwnership() == BufferExternal; } |
294 | |
295 | bool isSubString() const { return bufferOwnership() == BufferSubstring; } |
296 | |
297 | static WTF_EXPORT_PRIVATE Expected<CString, UTF8ConversionError> utf8ForCharacters(const LChar* characters, unsigned length); |
298 | static WTF_EXPORT_PRIVATE Expected<CString, UTF8ConversionError> utf8ForCharacters(const UChar* characters, unsigned length, ConversionMode = LenientConversion); |
299 | |
300 | WTF_EXPORT_PRIVATE Expected<CString, UTF8ConversionError> tryGetUtf8ForRange(unsigned offset, unsigned length, ConversionMode = LenientConversion) const; |
301 | WTF_EXPORT_PRIVATE Expected<CString, UTF8ConversionError> tryGetUtf8(ConversionMode = LenientConversion) const; |
302 | WTF_EXPORT_PRIVATE CString utf8(ConversionMode = LenientConversion) const; |
303 | |
304 | private: |
305 | static WTF_EXPORT_PRIVATE UTF8ConversionError utf8Impl(const UChar* characters, unsigned length, char*& buffer, size_t bufferSize, ConversionMode); |
306 | |
307 | // The high bits of 'hash' are always empty, but we prefer to store our flags |
308 | // in the low bits because it makes them slightly more efficient to access. |
309 | // So, we shift left and right when setting and getting our hash code. |
310 | void setHash(unsigned) const; |
311 | |
312 | unsigned rawHash() const { return m_hashAndFlags >> s_flagCount; } |
313 | |
314 | public: |
315 | bool hasHash() const { return !!rawHash(); } |
316 | |
317 | unsigned existingHash() const { ASSERT(hasHash()); return rawHash(); } |
318 | unsigned hash() const { return hasHash() ? rawHash() : hashSlowCase(); } |
319 | |
320 | WTF_EXPORT_PRIVATE unsigned concurrentHash() const; |
321 | |
322 | unsigned symbolAwareHash() const; |
323 | unsigned existingSymbolAwareHash() const; |
324 | |
325 | bool isStatic() const { return m_refCount & s_refCountFlagIsStaticString; } |
326 | |
327 | size_t refCount() const { return m_refCount / s_refCountIncrement; } |
328 | bool hasOneRef() const { return m_refCount == s_refCountIncrement; } |
329 | bool hasAtLeastOneRef() const { return m_refCount; } // For assertions. |
330 | |
331 | void ref(); |
332 | void deref(); |
333 | |
334 | class StaticStringImpl : private StringImplShape { |
335 | WTF_MAKE_NONCOPYABLE(StaticStringImpl); |
336 | public: |
337 | // Used to construct static strings, which have an special refCount that can never hit zero. |
338 | // This means that the static string will never be destroyed, which is important because |
339 | // static strings will be shared across threads & ref-counted in a non-threadsafe manner. |
340 | // |
341 | // In order to make StaticStringImpl thread safe, we also need to ensure that the rest of |
342 | // the fields are never mutated by threads. We have this guarantee because: |
343 | // |
344 | // 1. m_length is only set on construction and never mutated thereafter. |
345 | // |
346 | // 2. m_data8 and m_data16 are only set on construction and never mutated thereafter. |
347 | // We also know that a StringImpl never changes from 8 bit to 16 bit because there |
348 | // is no way to set/clear the s_hashFlag8BitBuffer flag other than at construction. |
349 | // |
350 | // 3. m_hashAndFlags will not be mutated by different threads because: |
351 | // |
352 | // a. StaticStringImpl's constructor sets the s_hashFlagDidReportCost flag to ensure |
353 | // that StringImpl::cost() returns early. |
354 | // This means StaticStringImpl costs are not counted. But since there should only |
355 | // be a finite set of StaticStringImpls, their cost can be aggregated into a single |
356 | // system cost if needed. |
357 | // b. setIsAtom() is never called on a StaticStringImpl. |
358 | // setIsAtom() asserts !isStatic(). |
359 | // c. setHash() is never called on a StaticStringImpl. |
360 | // StaticStringImpl's constructor sets the hash on construction. |
361 | // StringImpl::hash() only sets a new hash iff !hasHash(). |
362 | // Additionally, StringImpl::setHash() asserts hasHash() and !isStatic(). |
363 | |
364 | template<unsigned characterCount> constexpr StaticStringImpl(const char (&characters)[characterCount], StringKind = StringNormal); |
365 | template<unsigned characterCount> constexpr StaticStringImpl(const char16_t (&characters)[characterCount], StringKind = StringNormal); |
366 | operator StringImpl&(); |
367 | }; |
368 | |
369 | WTF_EXPORT_PRIVATE static StaticStringImpl s_emptyAtomString; |
370 | ALWAYS_INLINE static StringImpl* empty() { return reinterpret_cast<StringImpl*>(&s_emptyAtomString); } |
371 | |
372 | // FIXME: Does this really belong in StringImpl? |
373 | template<typename CharacterType> static void copyCharacters(CharacterType* destination, const CharacterType* source, unsigned numCharacters); |
374 | static void copyCharacters(UChar* destination, const LChar* source, unsigned numCharacters); |
375 | |
376 | // Some string features, like reference counting and the atomicity flag, are not |
377 | // thread-safe. We achieve thread safety by isolation, giving each thread |
378 | // its own copy of the string. |
379 | Ref<StringImpl> isolatedCopy() const; |
380 | |
381 | WTF_EXPORT_PRIVATE Ref<StringImpl> substring(unsigned position, unsigned length = MaxLength); |
382 | |
383 | UChar at(unsigned) const; |
384 | UChar operator[](unsigned i) const { return at(i); } |
385 | WTF_EXPORT_PRIVATE UChar32 characterStartingAt(unsigned); |
386 | |
387 | int toIntStrict(bool* ok = 0, int base = 10); |
388 | unsigned toUIntStrict(bool* ok = 0, int base = 10); |
389 | int64_t toInt64Strict(bool* ok = 0, int base = 10); |
390 | uint64_t toUInt64Strict(bool* ok = 0, int base = 10); |
391 | intptr_t toIntPtrStrict(bool* ok = 0, int base = 10); |
392 | |
393 | WTF_EXPORT_PRIVATE int toInt(bool* ok = 0); // ignores trailing garbage |
394 | unsigned toUInt(bool* ok = 0); // ignores trailing garbage |
395 | int64_t toInt64(bool* ok = 0); // ignores trailing garbage |
396 | uint64_t toUInt64(bool* ok = 0); // ignores trailing garbage |
397 | intptr_t toIntPtr(bool* ok = 0); // ignores trailing garbage |
398 | |
399 | // FIXME: Like the strict functions above, these give false for "ok" when there is trailing garbage. |
400 | // Like the non-strict functions above, these return the value when there is trailing garbage. |
401 | // It would be better if these were more consistent with the above functions instead. |
402 | double toDouble(bool* ok = 0); |
403 | float toFloat(bool* ok = 0); |
404 | |
405 | WTF_EXPORT_PRIVATE Ref<StringImpl> convertToASCIILowercase(); |
406 | WTF_EXPORT_PRIVATE Ref<StringImpl> convertToASCIIUppercase(); |
407 | WTF_EXPORT_PRIVATE Ref<StringImpl> convertToLowercaseWithoutLocale(); |
408 | WTF_EXPORT_PRIVATE Ref<StringImpl> convertToLowercaseWithoutLocaleStartingAtFailingIndex8Bit(unsigned); |
409 | WTF_EXPORT_PRIVATE Ref<StringImpl> convertToUppercaseWithoutLocale(); |
410 | WTF_EXPORT_PRIVATE Ref<StringImpl> convertToLowercaseWithLocale(const AtomString& localeIdentifier); |
411 | WTF_EXPORT_PRIVATE Ref<StringImpl> convertToUppercaseWithLocale(const AtomString& localeIdentifier); |
412 | |
413 | Ref<StringImpl> foldCase(); |
414 | |
415 | Ref<StringImpl> stripWhiteSpace(); |
416 | WTF_EXPORT_PRIVATE Ref<StringImpl> simplifyWhiteSpace(); |
417 | Ref<StringImpl> simplifyWhiteSpace(CodeUnitMatchFunction); |
418 | |
419 | Ref<StringImpl> stripLeadingAndTrailingCharacters(CodeUnitMatchFunction); |
420 | Ref<StringImpl> removeCharacters(CodeUnitMatchFunction); |
421 | |
422 | bool isAllASCII() const; |
423 | bool isAllLatin1() const; |
424 | template<bool isSpecialCharacter(UChar)> bool isAllSpecialCharacters() const; |
425 | |
426 | size_t find(LChar character, unsigned start = 0); |
427 | size_t find(char character, unsigned start = 0); |
428 | size_t find(UChar character, unsigned start = 0); |
429 | WTF_EXPORT_PRIVATE size_t find(CodeUnitMatchFunction, unsigned index = 0); |
430 | size_t find(const LChar*, unsigned index = 0); |
431 | ALWAYS_INLINE size_t find(const char* string, unsigned index = 0) { return find(reinterpret_cast<const LChar*>(string), index); } |
432 | WTF_EXPORT_PRIVATE size_t find(StringImpl*); |
433 | WTF_EXPORT_PRIVATE size_t find(StringImpl*, unsigned index); |
434 | WTF_EXPORT_PRIVATE size_t findIgnoringASCIICase(const StringImpl&) const; |
435 | WTF_EXPORT_PRIVATE size_t findIgnoringASCIICase(const StringImpl&, unsigned startOffset) const; |
436 | WTF_EXPORT_PRIVATE size_t findIgnoringASCIICase(const StringImpl*) const; |
437 | WTF_EXPORT_PRIVATE size_t findIgnoringASCIICase(const StringImpl*, unsigned startOffset) const; |
438 | |
439 | WTF_EXPORT_PRIVATE size_t reverseFind(UChar, unsigned index = MaxLength); |
440 | WTF_EXPORT_PRIVATE size_t reverseFind(StringImpl*, unsigned index = MaxLength); |
441 | |
442 | WTF_EXPORT_PRIVATE bool startsWith(const StringImpl*) const; |
443 | WTF_EXPORT_PRIVATE bool startsWith(const StringImpl&) const; |
444 | WTF_EXPORT_PRIVATE bool startsWithIgnoringASCIICase(const StringImpl*) const; |
445 | WTF_EXPORT_PRIVATE bool startsWithIgnoringASCIICase(const StringImpl&) const; |
446 | WTF_EXPORT_PRIVATE bool startsWith(UChar) const; |
447 | WTF_EXPORT_PRIVATE bool startsWith(const char*, unsigned matchLength) const; |
448 | template<unsigned matchLength> bool startsWith(const char (&prefix)[matchLength]) const { return startsWith(prefix, matchLength - 1); } |
449 | WTF_EXPORT_PRIVATE bool hasInfixStartingAt(const StringImpl&, unsigned startOffset) const; |
450 | |
451 | WTF_EXPORT_PRIVATE bool endsWith(StringImpl*); |
452 | WTF_EXPORT_PRIVATE bool endsWith(StringImpl&); |
453 | WTF_EXPORT_PRIVATE bool endsWithIgnoringASCIICase(const StringImpl*) const; |
454 | WTF_EXPORT_PRIVATE bool endsWithIgnoringASCIICase(const StringImpl&) const; |
455 | WTF_EXPORT_PRIVATE bool endsWith(UChar) const; |
456 | WTF_EXPORT_PRIVATE bool endsWith(const char*, unsigned matchLength) const; |
457 | template<unsigned matchLength> bool endsWith(const char (&prefix)[matchLength]) const { return endsWith(prefix, matchLength - 1); } |
458 | WTF_EXPORT_PRIVATE bool hasInfixEndingAt(const StringImpl&, unsigned endOffset) const; |
459 | |
460 | WTF_EXPORT_PRIVATE Ref<StringImpl> replace(UChar, UChar); |
461 | WTF_EXPORT_PRIVATE Ref<StringImpl> replace(UChar, StringImpl*); |
462 | ALWAYS_INLINE Ref<StringImpl> replace(UChar pattern, const char* replacement, unsigned replacementLength) { return replace(pattern, reinterpret_cast<const LChar*>(replacement), replacementLength); } |
463 | WTF_EXPORT_PRIVATE Ref<StringImpl> replace(UChar, const LChar*, unsigned replacementLength); |
464 | Ref<StringImpl> replace(UChar, const UChar*, unsigned replacementLength); |
465 | WTF_EXPORT_PRIVATE Ref<StringImpl> replace(StringImpl*, StringImpl*); |
466 | WTF_EXPORT_PRIVATE Ref<StringImpl> replace(unsigned index, unsigned length, StringImpl*); |
467 | |
468 | WTF_EXPORT_PRIVATE UCharDirection defaultWritingDirection(bool* hasStrongDirectionality = nullptr); |
469 | |
470 | #if USE(CF) |
471 | RetainPtr<CFStringRef> createCFString(); |
472 | #endif |
473 | |
474 | #ifdef __OBJC__ |
475 | WTF_EXPORT_PRIVATE operator NSString *(); |
476 | #endif |
477 | |
478 | #if STRING_STATS |
479 | ALWAYS_INLINE static StringStats& stringStats() { return m_stringStats; } |
480 | #endif |
481 | |
482 | BufferOwnership bufferOwnership() const { return static_cast<BufferOwnership>(m_hashAndFlags & s_hashMaskBufferOwnership); } |
483 | |
484 | template<typename T> static size_t () { return tailOffset<T>(); } |
485 | |
486 | protected: |
487 | ~StringImpl(); |
488 | |
489 | // Used to create new symbol string that holds an existing [[Description]] string as a substring buffer (BufferSubstring). |
490 | enum CreateSymbolTag { CreateSymbol }; |
491 | StringImpl(CreateSymbolTag, const LChar*, unsigned length); |
492 | StringImpl(CreateSymbolTag, const UChar*, unsigned length); |
493 | |
494 | // Null symbol. |
495 | explicit StringImpl(CreateSymbolTag); |
496 | |
497 | private: |
498 | template<typename> static size_t allocationSize(Checked<size_t> tailElementCount); |
499 | template<typename> static size_t maxInternalLength(); |
500 | template<typename> static size_t tailOffset(); |
501 | |
502 | bool requiresCopy() const; |
503 | template<typename T> const T* tailPointer() const; |
504 | template<typename T> T* tailPointer(); |
505 | StringImpl* const& substringBuffer() const; |
506 | StringImpl*& substringBuffer(); |
507 | |
508 | enum class CaseConvertType { Upper, Lower }; |
509 | template<CaseConvertType, typename CharacterType> static Ref<StringImpl> convertASCIICase(StringImpl&, const CharacterType*, unsigned); |
510 | |
511 | template<class CodeUnitPredicate> Ref<StringImpl> stripMatchedCharacters(CodeUnitPredicate); |
512 | template<typename CharacterType> ALWAYS_INLINE Ref<StringImpl> removeCharacters(const CharacterType* characters, CodeUnitMatchFunction); |
513 | template<typename CharacterType, class CodeUnitPredicate> Ref<StringImpl> simplifyMatchedCharactersToSpace(CodeUnitPredicate); |
514 | template<typename CharacterType> static Ref<StringImpl> constructInternal(StringImpl&, unsigned); |
515 | template<typename CharacterType> static Ref<StringImpl> createUninitializedInternal(unsigned, CharacterType*&); |
516 | template<typename CharacterType> static Ref<StringImpl> createUninitializedInternalNonEmpty(unsigned, CharacterType*&); |
517 | template<typename CharacterType> static Expected<Ref<StringImpl>, UTF8ConversionError> reallocateInternal(Ref<StringImpl>&&, unsigned, CharacterType*&); |
518 | template<typename CharacterType> static Ref<StringImpl> createInternal(const CharacterType*, unsigned); |
519 | WTF_EXPORT_PRIVATE NEVER_INLINE unsigned hashSlowCase() const; |
520 | |
521 | // The bottom bit in the ref count indicates a static (immortal) string. |
522 | static constexpr unsigned s_refCountFlagIsStaticString = 0x1; |
523 | static constexpr unsigned s_refCountIncrement = 0x2; // This allows us to ref / deref without disturbing the static string flag. |
524 | |
525 | #if STRING_STATS |
526 | WTF_EXPORT_PRIVATE static StringStats m_stringStats; |
527 | #endif |
528 | |
529 | public: |
530 | void assertHashIsCorrect() const; |
531 | }; |
532 | |
533 | using StaticStringImpl = StringImpl::StaticStringImpl; |
534 | |
535 | static_assert(sizeof(StringImpl) == sizeof(StaticStringImpl), "" ); |
536 | |
537 | #if !ASSERT_DISABLED |
538 | |
539 | // StringImpls created from StaticStringImpl will ASSERT in the generic ValueCheck<T>::checkConsistency |
540 | // as they are not allocated by fastMalloc. We don't currently have any way to detect that case |
541 | // so we ignore the consistency check for all StringImpl*. |
542 | template<> struct ValueCheck<StringImpl*> { |
543 | static void checkConsistency(const StringImpl*) { } |
544 | }; |
545 | |
546 | #endif |
547 | |
548 | WTF_EXPORT_PRIVATE bool equal(const StringImpl*, const StringImpl*); |
549 | WTF_EXPORT_PRIVATE bool equal(const StringImpl*, const LChar*); |
550 | inline bool equal(const StringImpl* a, const char* b) { return equal(a, reinterpret_cast<const LChar*>(b)); } |
551 | WTF_EXPORT_PRIVATE bool equal(const StringImpl*, const LChar*, unsigned); |
552 | WTF_EXPORT_PRIVATE bool equal(const StringImpl*, const UChar*, unsigned); |
553 | inline bool equal(const StringImpl* a, const char* b, unsigned length) { return equal(a, reinterpret_cast<const LChar*>(b), length); } |
554 | inline bool equal(const LChar* a, StringImpl* b) { return equal(b, a); } |
555 | inline bool equal(const char* a, StringImpl* b) { return equal(b, reinterpret_cast<const LChar*>(a)); } |
556 | WTF_EXPORT_PRIVATE bool equal(const StringImpl& a, const StringImpl& b); |
557 | |
558 | WTF_EXPORT_PRIVATE bool equalIgnoringNullity(StringImpl*, StringImpl*); |
559 | WTF_EXPORT_PRIVATE bool equalIgnoringNullity(const UChar*, size_t length, StringImpl*); |
560 | |
561 | bool equalIgnoringASCIICase(const StringImpl&, const StringImpl&); |
562 | WTF_EXPORT_PRIVATE bool equalIgnoringASCIICase(const StringImpl*, const StringImpl*); |
563 | bool equalIgnoringASCIICase(const StringImpl&, const char*); |
564 | bool equalIgnoringASCIICase(const StringImpl*, const char*); |
565 | |
566 | WTF_EXPORT_PRIVATE bool equalIgnoringASCIICaseNonNull(const StringImpl*, const StringImpl*); |
567 | |
568 | template<unsigned length> bool equalLettersIgnoringASCIICase(const StringImpl&, const char (&lowercaseLetters)[length]); |
569 | template<unsigned length> bool equalLettersIgnoringASCIICase(const StringImpl*, const char (&lowercaseLetters)[length]); |
570 | |
571 | size_t find(const LChar*, unsigned length, CodeUnitMatchFunction, unsigned index = 0); |
572 | size_t find(const UChar*, unsigned length, CodeUnitMatchFunction, unsigned index = 0); |
573 | |
574 | template<typename CharacterType> size_t reverseFindLineTerminator(const CharacterType*, unsigned length, unsigned index = StringImpl::MaxLength); |
575 | template<typename CharacterType> size_t reverseFind(const CharacterType*, unsigned length, CharacterType matchCharacter, unsigned index = StringImpl::MaxLength); |
576 | size_t reverseFind(const UChar*, unsigned length, LChar matchCharacter, unsigned index = StringImpl::MaxLength); |
577 | size_t reverseFind(const LChar*, unsigned length, UChar matchCharacter, unsigned index = StringImpl::MaxLength); |
578 | |
579 | template<size_t inlineCapacity> bool equalIgnoringNullity(const Vector<UChar, inlineCapacity>&, StringImpl*); |
580 | |
581 | template<typename CharacterType1, typename CharacterType2> int codePointCompare(const CharacterType1*, unsigned length1, const CharacterType2*, unsigned length2); |
582 | int codePointCompare(const StringImpl*, const StringImpl*); |
583 | |
584 | // FIXME: Should rename this to make clear it uses the Unicode definition of whitespace. |
585 | // Most WebKit callers don't want that would use isASCIISpace or isHTMLSpace instead. |
586 | bool isSpaceOrNewline(UChar32); |
587 | |
588 | template<typename CharacterType> unsigned lengthOfNullTerminatedString(const CharacterType*); |
589 | |
590 | // StringHash is the default hash for StringImpl* and RefPtr<StringImpl> |
591 | template<typename T> struct DefaultHash; |
592 | template<> struct DefaultHash<StringImpl*> { |
593 | typedef StringHash Hash; |
594 | }; |
595 | template<> struct DefaultHash<RefPtr<StringImpl>> { |
596 | typedef StringHash Hash; |
597 | }; |
598 | |
599 | #define MAKE_STATIC_STRING_IMPL(characters) ([] { \ |
600 | static StaticStringImpl impl(characters); \ |
601 | return &impl; \ |
602 | }()) |
603 | |
604 | template<> ALWAYS_INLINE Ref<StringImpl> StringImpl::constructInternal<LChar>(StringImpl& string, unsigned length) |
605 | { |
606 | return adoptRef(*new (NotNull, &string) StringImpl { length, Force8BitConstructor }); |
607 | } |
608 | |
609 | template<> ALWAYS_INLINE Ref<StringImpl> StringImpl::constructInternal<UChar>(StringImpl& string, unsigned length) |
610 | { |
611 | return adoptRef(*new (NotNull, &string) StringImpl { length }); |
612 | } |
613 | |
614 | template<> ALWAYS_INLINE const LChar* StringImpl::characters<LChar>() const |
615 | { |
616 | return characters8(); |
617 | } |
618 | |
619 | template<> ALWAYS_INLINE const UChar* StringImpl::characters<UChar>() const |
620 | { |
621 | return characters16(); |
622 | } |
623 | |
624 | inline size_t find(const LChar* characters, unsigned length, CodeUnitMatchFunction matchFunction, unsigned index) |
625 | { |
626 | while (index < length) { |
627 | if (matchFunction(characters[index])) |
628 | return index; |
629 | ++index; |
630 | } |
631 | return notFound; |
632 | } |
633 | |
634 | inline size_t find(const UChar* characters, unsigned length, CodeUnitMatchFunction matchFunction, unsigned index) |
635 | { |
636 | while (index < length) { |
637 | if (matchFunction(characters[index])) |
638 | return index; |
639 | ++index; |
640 | } |
641 | return notFound; |
642 | } |
643 | |
644 | template<typename CharacterType> inline size_t reverseFindLineTerminator(const CharacterType* characters, unsigned length, unsigned index) |
645 | { |
646 | if (!length) |
647 | return notFound; |
648 | if (index >= length) |
649 | index = length - 1; |
650 | auto character = characters[index]; |
651 | while (character != '\n' && character != '\r') { |
652 | if (!index--) |
653 | return notFound; |
654 | character = characters[index]; |
655 | } |
656 | return index; |
657 | } |
658 | |
659 | template<typename CharacterType> inline size_t reverseFind(const CharacterType* characters, unsigned length, CharacterType matchCharacter, unsigned index) |
660 | { |
661 | if (!length) |
662 | return notFound; |
663 | if (index >= length) |
664 | index = length - 1; |
665 | while (characters[index] != matchCharacter) { |
666 | if (!index--) |
667 | return notFound; |
668 | } |
669 | return index; |
670 | } |
671 | |
672 | ALWAYS_INLINE size_t reverseFind(const UChar* characters, unsigned length, LChar matchCharacter, unsigned index) |
673 | { |
674 | return reverseFind(characters, length, static_cast<UChar>(matchCharacter), index); |
675 | } |
676 | |
677 | inline size_t reverseFind(const LChar* characters, unsigned length, UChar matchCharacter, unsigned index) |
678 | { |
679 | if (!isLatin1(matchCharacter)) |
680 | return notFound; |
681 | return reverseFind(characters, length, static_cast<LChar>(matchCharacter), index); |
682 | } |
683 | |
684 | inline size_t StringImpl::find(LChar character, unsigned start) |
685 | { |
686 | if (is8Bit()) |
687 | return WTF::find(characters8(), m_length, character, start); |
688 | return WTF::find(characters16(), m_length, character, start); |
689 | } |
690 | |
691 | ALWAYS_INLINE size_t StringImpl::find(char character, unsigned start) |
692 | { |
693 | return find(static_cast<LChar>(character), start); |
694 | } |
695 | |
696 | inline size_t StringImpl::find(UChar character, unsigned start) |
697 | { |
698 | if (is8Bit()) |
699 | return WTF::find(characters8(), m_length, character, start); |
700 | return WTF::find(characters16(), m_length, character, start); |
701 | } |
702 | |
703 | template<size_t inlineCapacity> inline bool equalIgnoringNullity(const Vector<UChar, inlineCapacity>& a, StringImpl* b) |
704 | { |
705 | return equalIgnoringNullity(a.data(), a.size(), b); |
706 | } |
707 | |
708 | template<typename CharacterType1, typename CharacterType2> inline int codePointCompare(const CharacterType1* characters1, unsigned length1, const CharacterType2* characters2, unsigned length2) |
709 | { |
710 | unsigned commonLength = std::min(length1, length2); |
711 | |
712 | unsigned position = 0; |
713 | while (position < commonLength && *characters1 == *characters2) { |
714 | ++characters1; |
715 | ++characters2; |
716 | ++position; |
717 | } |
718 | |
719 | if (position < commonLength) |
720 | return (characters1[0] > characters2[0]) ? 1 : -1; |
721 | |
722 | if (length1 == length2) |
723 | return 0; |
724 | return (length1 > length2) ? 1 : -1; |
725 | } |
726 | |
727 | inline int codePointCompare(const StringImpl* string1, const StringImpl* string2) |
728 | { |
729 | // FIXME: Should null strings compare as less than empty strings rather than equal to them? |
730 | if (!string1) |
731 | return (string2 && string2->length()) ? -1 : 0; |
732 | if (!string2) |
733 | return string1->length() ? 1 : 0; |
734 | |
735 | bool string1Is8Bit = string1->is8Bit(); |
736 | bool string2Is8Bit = string2->is8Bit(); |
737 | if (string1Is8Bit) { |
738 | if (string2Is8Bit) |
739 | return codePointCompare(string1->characters8(), string1->length(), string2->characters8(), string2->length()); |
740 | return codePointCompare(string1->characters8(), string1->length(), string2->characters16(), string2->length()); |
741 | } |
742 | if (string2Is8Bit) |
743 | return codePointCompare(string1->characters16(), string1->length(), string2->characters8(), string2->length()); |
744 | return codePointCompare(string1->characters16(), string1->length(), string2->characters16(), string2->length()); |
745 | } |
746 | |
747 | inline bool isSpaceOrNewline(UChar32 character) |
748 | { |
749 | // Use isASCIISpace() for all Latin-1 characters. This will include newlines, which aren't included in Unicode DirWS. |
750 | return isLatin1(character) ? isASCIISpace(character) : u_charDirection(character) == U_WHITE_SPACE_NEUTRAL; |
751 | } |
752 | |
753 | template<typename CharacterType> inline unsigned lengthOfNullTerminatedString(const CharacterType* string) |
754 | { |
755 | ASSERT(string); |
756 | size_t length = 0; |
757 | while (string[length]) |
758 | ++length; |
759 | |
760 | RELEASE_ASSERT(length < StringImpl::MaxLength); |
761 | return static_cast<unsigned>(length); |
762 | } |
763 | |
764 | inline StringImplShape::StringImplShape(unsigned refCount, unsigned length, const LChar* data8, unsigned hashAndFlags) |
765 | : m_refCount(refCount) |
766 | , m_length(length) |
767 | , m_data8(data8) |
768 | , m_hashAndFlags(hashAndFlags) |
769 | { |
770 | } |
771 | |
772 | inline StringImplShape::StringImplShape(unsigned refCount, unsigned length, const UChar* data16, unsigned hashAndFlags) |
773 | : m_refCount(refCount) |
774 | , m_length(length) |
775 | , m_data16(data16) |
776 | , m_hashAndFlags(hashAndFlags) |
777 | { |
778 | } |
779 | |
780 | template<unsigned characterCount> constexpr StringImplShape::StringImplShape(unsigned refCount, unsigned length, const char (&characters)[characterCount], unsigned hashAndFlags, ConstructWithConstExprTag) |
781 | : m_refCount(refCount) |
782 | , m_length(length) |
783 | , m_data8Char(characters) |
784 | , m_hashAndFlags(hashAndFlags) |
785 | { |
786 | } |
787 | |
788 | template<unsigned characterCount> constexpr StringImplShape::StringImplShape(unsigned refCount, unsigned length, const char16_t (&characters)[characterCount], unsigned hashAndFlags, ConstructWithConstExprTag) |
789 | : m_refCount(refCount) |
790 | , m_length(length) |
791 | , m_data16Char(characters) |
792 | , m_hashAndFlags(hashAndFlags) |
793 | { |
794 | } |
795 | |
796 | inline Ref<StringImpl> StringImpl::isolatedCopy() const |
797 | { |
798 | if (!requiresCopy()) { |
799 | if (is8Bit()) |
800 | return StringImpl::createWithoutCopying(m_data8, m_length); |
801 | return StringImpl::createWithoutCopying(m_data16, m_length); |
802 | } |
803 | |
804 | if (is8Bit()) |
805 | return create(m_data8, m_length); |
806 | return create(m_data16, m_length); |
807 | } |
808 | |
809 | inline bool StringImpl::isAllASCII() const |
810 | { |
811 | if (is8Bit()) |
812 | return charactersAreAllASCII(characters8(), length()); |
813 | return charactersAreAllASCII(characters16(), length()); |
814 | } |
815 | |
816 | inline bool StringImpl::isAllLatin1() const |
817 | { |
818 | if (is8Bit()) |
819 | return true; |
820 | auto* characters = characters16(); |
821 | UChar ored = 0; |
822 | for (size_t i = 0; i < length(); ++i) |
823 | ored |= characters[i]; |
824 | return !(ored & 0xFF00); |
825 | } |
826 | |
827 | template<bool isSpecialCharacter(UChar), typename CharacterType> inline bool isAllSpecialCharacters(const CharacterType* characters, size_t length) |
828 | { |
829 | for (size_t i = 0; i < length; ++i) { |
830 | if (!isSpecialCharacter(characters[i])) |
831 | return false; |
832 | } |
833 | return true; |
834 | } |
835 | |
836 | template<bool isSpecialCharacter(UChar)> inline bool StringImpl::isAllSpecialCharacters() const |
837 | { |
838 | if (is8Bit()) |
839 | return WTF::isAllSpecialCharacters<isSpecialCharacter>(characters8(), length()); |
840 | return WTF::isAllSpecialCharacters<isSpecialCharacter>(characters16(), length()); |
841 | } |
842 | |
843 | inline StringImpl::StringImpl(unsigned length, Force8Bit) |
844 | : StringImplShape(s_refCountIncrement, length, tailPointer<LChar>(), s_hashFlag8BitBuffer | StringNormal | BufferInternal) |
845 | { |
846 | ASSERT(m_data8); |
847 | ASSERT(m_length); |
848 | |
849 | STRING_STATS_ADD_8BIT_STRING(m_length); |
850 | } |
851 | |
852 | inline StringImpl::StringImpl(unsigned length) |
853 | : StringImplShape(s_refCountIncrement, length, tailPointer<UChar>(), StringNormal | BufferInternal) |
854 | { |
855 | ASSERT(m_data16); |
856 | ASSERT(m_length); |
857 | |
858 | STRING_STATS_ADD_16BIT_STRING(m_length); |
859 | } |
860 | |
861 | inline StringImpl::StringImpl(MallocPtr<LChar> characters, unsigned length) |
862 | : StringImplShape(s_refCountIncrement, length, characters.leakPtr(), s_hashFlag8BitBuffer | StringNormal | BufferOwned) |
863 | { |
864 | ASSERT(m_data8); |
865 | ASSERT(m_length); |
866 | |
867 | STRING_STATS_ADD_8BIT_STRING(m_length); |
868 | } |
869 | |
870 | inline StringImpl::StringImpl(const UChar* characters, unsigned length, ConstructWithoutCopyingTag) |
871 | : StringImplShape(s_refCountIncrement, length, characters, StringNormal | BufferInternal) |
872 | { |
873 | ASSERT(m_data16); |
874 | ASSERT(m_length); |
875 | |
876 | STRING_STATS_ADD_16BIT_STRING(m_length); |
877 | } |
878 | |
879 | inline StringImpl::StringImpl(const LChar* characters, unsigned length, ConstructWithoutCopyingTag) |
880 | : StringImplShape(s_refCountIncrement, length, characters, s_hashFlag8BitBuffer | StringNormal | BufferInternal) |
881 | { |
882 | ASSERT(m_data8); |
883 | ASSERT(m_length); |
884 | |
885 | STRING_STATS_ADD_8BIT_STRING(m_length); |
886 | } |
887 | |
888 | inline StringImpl::StringImpl(MallocPtr<UChar> characters, unsigned length) |
889 | : StringImplShape(s_refCountIncrement, length, characters.leakPtr(), StringNormal | BufferOwned) |
890 | { |
891 | ASSERT(m_data16); |
892 | ASSERT(m_length); |
893 | |
894 | STRING_STATS_ADD_16BIT_STRING(m_length); |
895 | } |
896 | |
897 | inline StringImpl::StringImpl(const LChar* characters, unsigned length, Ref<StringImpl>&& base) |
898 | : StringImplShape(s_refCountIncrement, length, characters, s_hashFlag8BitBuffer | StringNormal | BufferSubstring) |
899 | { |
900 | ASSERT(is8Bit()); |
901 | ASSERT(m_data8); |
902 | ASSERT(m_length); |
903 | ASSERT(base->bufferOwnership() != BufferSubstring); |
904 | |
905 | substringBuffer() = &base.leakRef(); |
906 | |
907 | STRING_STATS_ADD_8BIT_STRING2(m_length, true); |
908 | } |
909 | |
910 | inline StringImpl::StringImpl(const UChar* characters, unsigned length, Ref<StringImpl>&& base) |
911 | : StringImplShape(s_refCountIncrement, length, characters, StringNormal | BufferSubstring) |
912 | { |
913 | ASSERT(!is8Bit()); |
914 | ASSERT(m_data16); |
915 | ASSERT(m_length); |
916 | ASSERT(base->bufferOwnership() != BufferSubstring); |
917 | |
918 | substringBuffer() = &base.leakRef(); |
919 | |
920 | STRING_STATS_ADD_16BIT_STRING2(m_length, true); |
921 | } |
922 | |
923 | template<size_t inlineCapacity> inline Ref<StringImpl> StringImpl::create8BitIfPossible(const Vector<UChar, inlineCapacity>& vector) |
924 | { |
925 | return create8BitIfPossible(vector.data(), vector.size()); |
926 | } |
927 | |
928 | ALWAYS_INLINE Ref<StringImpl> StringImpl::createSubstringSharingImpl(StringImpl& rep, unsigned offset, unsigned length) |
929 | { |
930 | ASSERT(length <= rep.length()); |
931 | |
932 | if (!length) |
933 | return *empty(); |
934 | |
935 | // Coyping the thing would save more memory sometimes, largely due to the size of pointer. |
936 | size_t substringSize = allocationSize<StringImpl*>(1); |
937 | if (rep.is8Bit()) { |
938 | if (substringSize >= allocationSize<LChar>(length)) |
939 | return create(rep.m_data8 + offset, length); |
940 | } else { |
941 | if (substringSize >= allocationSize<UChar>(length)) |
942 | return create(rep.m_data16 + offset, length); |
943 | } |
944 | |
945 | auto* ownerRep = ((rep.bufferOwnership() == BufferSubstring) ? rep.substringBuffer() : &rep); |
946 | |
947 | // We allocate a buffer that contains both the StringImpl struct as well as the pointer to the owner string. |
948 | auto* stringImpl = static_cast<StringImpl*>(fastMalloc(substringSize)); |
949 | if (rep.is8Bit()) |
950 | return adoptRef(*new (NotNull, stringImpl) StringImpl(rep.m_data8 + offset, length, *ownerRep)); |
951 | return adoptRef(*new (NotNull, stringImpl) StringImpl(rep.m_data16 + offset, length, *ownerRep)); |
952 | } |
953 | |
954 | template<unsigned characterCount> ALWAYS_INLINE Ref<StringImpl> StringImpl::createFromLiteral(const char (&characters)[characterCount]) |
955 | { |
956 | COMPILE_ASSERT(characterCount > 1, StringImplFromLiteralNotEmpty); |
957 | COMPILE_ASSERT((characterCount - 1 <= ((unsigned(~0) - sizeof(StringImpl)) / sizeof(LChar))), StringImplFromLiteralCannotOverflow); |
958 | |
959 | return createWithoutCopying(reinterpret_cast<const LChar*>(characters), characterCount - 1); |
960 | } |
961 | |
962 | template<typename CharacterType> ALWAYS_INLINE RefPtr<StringImpl> StringImpl::tryCreateUninitialized(unsigned length, CharacterType*& output) |
963 | { |
964 | if (!length) { |
965 | output = nullptr; |
966 | return empty(); |
967 | } |
968 | |
969 | if (length > maxInternalLength<CharacterType>()) { |
970 | output = nullptr; |
971 | return nullptr; |
972 | } |
973 | StringImpl* result; |
974 | if (!tryFastMalloc(allocationSize<CharacterType>(length)).getValue(result)) { |
975 | output = nullptr; |
976 | return nullptr; |
977 | } |
978 | output = result->tailPointer<CharacterType>(); |
979 | |
980 | return constructInternal<CharacterType>(*result, length); |
981 | } |
982 | |
983 | template<typename CharacterType, size_t inlineCapacity, typename OverflowHandler, size_t minCapacity> |
984 | inline Ref<StringImpl> StringImpl::adopt(Vector<CharacterType, inlineCapacity, OverflowHandler, minCapacity>&& vector) |
985 | { |
986 | if (size_t size = vector.size()) { |
987 | ASSERT(vector.data()); |
988 | if (size > MaxLength) |
989 | CRASH(); |
990 | return adoptRef(*new StringImpl(vector.releaseBuffer(), size)); |
991 | } |
992 | return *empty(); |
993 | } |
994 | |
995 | inline size_t StringImpl::cost() const |
996 | { |
997 | // For substrings, return the cost of the base string. |
998 | if (bufferOwnership() == BufferSubstring) |
999 | return substringBuffer()->cost(); |
1000 | |
1001 | // Note: we must not alter the m_hashAndFlags field in instances of StaticStringImpl. |
1002 | // We ensure this by pre-setting the s_hashFlagDidReportCost bit in all instances of |
1003 | // StaticStringImpl. As a result, StaticStringImpl instances will always return a cost of |
1004 | // 0 here and avoid modifying m_hashAndFlags. |
1005 | if (m_hashAndFlags & s_hashFlagDidReportCost) |
1006 | return 0; |
1007 | |
1008 | m_hashAndFlags |= s_hashFlagDidReportCost; |
1009 | size_t result = m_length; |
1010 | if (!is8Bit()) |
1011 | result <<= 1; |
1012 | return result; |
1013 | } |
1014 | |
1015 | inline size_t StringImpl::costDuringGC() |
1016 | { |
1017 | if (isStatic()) |
1018 | return 0; |
1019 | |
1020 | if (bufferOwnership() == BufferSubstring) |
1021 | return divideRoundedUp(substringBuffer()->costDuringGC(), refCount()); |
1022 | |
1023 | size_t result = m_length; |
1024 | if (!is8Bit()) |
1025 | result <<= 1; |
1026 | return divideRoundedUp(result, refCount()); |
1027 | } |
1028 | |
1029 | inline void StringImpl::setIsAtom(bool isAtom) |
1030 | { |
1031 | ASSERT(!isStatic()); |
1032 | ASSERT(!isSymbol()); |
1033 | if (isAtom) |
1034 | m_hashAndFlags |= s_hashFlagStringKindIsAtom; |
1035 | else |
1036 | m_hashAndFlags &= ~s_hashFlagStringKindIsAtom; |
1037 | } |
1038 | |
1039 | inline void StringImpl::setHash(unsigned hash) const |
1040 | { |
1041 | // The high bits of 'hash' are always empty, but we prefer to store our flags |
1042 | // in the low bits because it makes them slightly more efficient to access. |
1043 | // So, we shift left and right when setting and getting our hash code. |
1044 | |
1045 | ASSERT(!hasHash()); |
1046 | ASSERT(!isStatic()); |
1047 | // Multiple clients assume that StringHasher is the canonical string hash function. |
1048 | ASSERT(hash == (is8Bit() ? StringHasher::computeHashAndMaskTop8Bits(m_data8, m_length) : StringHasher::computeHashAndMaskTop8Bits(m_data16, m_length))); |
1049 | ASSERT(!(hash & (s_flagMask << (8 * sizeof(hash) - s_flagCount)))); // Verify that enough high bits are empty. |
1050 | |
1051 | hash <<= s_flagCount; |
1052 | ASSERT(!(hash & m_hashAndFlags)); // Verify that enough low bits are empty after shift. |
1053 | ASSERT(hash); // Verify that 0 is a valid sentinel hash value. |
1054 | |
1055 | m_hashAndFlags |= hash; // Store hash with flags in low bits. |
1056 | } |
1057 | |
1058 | inline void StringImpl::ref() |
1059 | { |
1060 | STRING_STATS_REF_STRING(*this); |
1061 | |
1062 | m_refCount += s_refCountIncrement; |
1063 | } |
1064 | |
1065 | inline void StringImpl::deref() |
1066 | { |
1067 | STRING_STATS_DEREF_STRING(*this); |
1068 | |
1069 | unsigned tempRefCount = m_refCount - s_refCountIncrement; |
1070 | if (!tempRefCount) { |
1071 | StringImpl::destroy(this); |
1072 | return; |
1073 | } |
1074 | m_refCount = tempRefCount; |
1075 | } |
1076 | |
1077 | template<typename CharacterType> inline void StringImpl::copyCharacters(CharacterType* destination, const CharacterType* source, unsigned numCharacters) |
1078 | { |
1079 | if (numCharacters == 1) { |
1080 | *destination = *source; |
1081 | return; |
1082 | } |
1083 | memcpy(destination, source, numCharacters * sizeof(CharacterType)); |
1084 | } |
1085 | |
1086 | ALWAYS_INLINE void StringImpl::copyCharacters(UChar* destination, const LChar* source, unsigned numCharacters) |
1087 | { |
1088 | for (unsigned i = 0; i < numCharacters; ++i) |
1089 | destination[i] = source[i]; |
1090 | } |
1091 | |
1092 | inline UChar StringImpl::at(unsigned i) const |
1093 | { |
1094 | ASSERT_WITH_SECURITY_IMPLICATION(i < m_length); |
1095 | return is8Bit() ? m_data8[i] : m_data16[i]; |
1096 | } |
1097 | |
1098 | inline StringImpl::StringImpl(CreateSymbolTag, const LChar* characters, unsigned length) |
1099 | : StringImplShape(s_refCountIncrement, length, characters, s_hashFlag8BitBuffer | StringSymbol | BufferSubstring) |
1100 | { |
1101 | ASSERT(is8Bit()); |
1102 | ASSERT(m_data8); |
1103 | STRING_STATS_ADD_8BIT_STRING2(m_length, true); |
1104 | } |
1105 | |
1106 | inline StringImpl::StringImpl(CreateSymbolTag, const UChar* characters, unsigned length) |
1107 | : StringImplShape(s_refCountIncrement, length, characters, StringSymbol | BufferSubstring) |
1108 | { |
1109 | ASSERT(!is8Bit()); |
1110 | ASSERT(m_data16); |
1111 | STRING_STATS_ADD_16BIT_STRING2(m_length, true); |
1112 | } |
1113 | |
1114 | inline StringImpl::StringImpl(CreateSymbolTag) |
1115 | : StringImplShape(s_refCountIncrement, 0, empty()->characters8(), s_hashFlag8BitBuffer | StringSymbol | BufferSubstring) |
1116 | { |
1117 | ASSERT(is8Bit()); |
1118 | ASSERT(m_data8); |
1119 | STRING_STATS_ADD_8BIT_STRING2(m_length, true); |
1120 | } |
1121 | |
1122 | template<typename T> inline size_t StringImpl::allocationSize(Checked<size_t> tailElementCount) |
1123 | { |
1124 | return (tailOffset<T>() + tailElementCount * sizeof(T)).unsafeGet(); |
1125 | } |
1126 | |
1127 | template<typename CharacterType> |
1128 | inline size_t StringImpl::maxInternalLength() |
1129 | { |
1130 | // In order to not overflow the unsigned length, the check for (std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) is needed when sizeof(CharacterType) == 2. |
1131 | return std::min(static_cast<size_t>(MaxLength), (std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(CharacterType)); |
1132 | } |
1133 | |
1134 | template<typename T> inline size_t StringImpl::tailOffset() |
1135 | { |
1136 | #if COMPILER(MSVC) |
1137 | // MSVC doesn't support alignof yet. |
1138 | return roundUpToMultipleOf<sizeof(T)>(sizeof(StringImpl)); |
1139 | #else |
1140 | return roundUpToMultipleOf<alignof(T)>(offsetof(StringImpl, m_hashAndFlags) + sizeof(StringImpl::m_hashAndFlags)); |
1141 | #endif |
1142 | } |
1143 | |
1144 | inline bool StringImpl::requiresCopy() const |
1145 | { |
1146 | if (bufferOwnership() != BufferInternal) |
1147 | return true; |
1148 | |
1149 | if (is8Bit()) |
1150 | return m_data8 == tailPointer<LChar>(); |
1151 | return m_data16 == tailPointer<UChar>(); |
1152 | } |
1153 | |
1154 | template<typename T> inline const T* StringImpl::tailPointer() const |
1155 | { |
1156 | return reinterpret_cast_ptr<const T*>(reinterpret_cast<const uint8_t*>(this) + tailOffset<T>()); |
1157 | } |
1158 | |
1159 | template<typename T> inline T* StringImpl::tailPointer() |
1160 | { |
1161 | return reinterpret_cast_ptr<T*>(reinterpret_cast<uint8_t*>(this) + tailOffset<T>()); |
1162 | } |
1163 | |
1164 | inline StringImpl* const& StringImpl::substringBuffer() const |
1165 | { |
1166 | ASSERT(bufferOwnership() == BufferSubstring); |
1167 | |
1168 | return *tailPointer<StringImpl*>(); |
1169 | } |
1170 | |
1171 | inline StringImpl*& StringImpl::substringBuffer() |
1172 | { |
1173 | ASSERT(bufferOwnership() == BufferSubstring); |
1174 | |
1175 | return *tailPointer<StringImpl*>(); |
1176 | } |
1177 | |
1178 | inline void StringImpl::assertHashIsCorrect() const |
1179 | { |
1180 | ASSERT(existingHash() == StringHasher::computeHashAndMaskTop8Bits(characters8(), length())); |
1181 | } |
1182 | |
1183 | template<unsigned characterCount> constexpr StringImpl::StaticStringImpl::StaticStringImpl(const char (&characters)[characterCount], StringKind stringKind) |
1184 | : StringImplShape(s_refCountFlagIsStaticString, characterCount - 1, characters, |
1185 | s_hashFlag8BitBuffer | s_hashFlagDidReportCost | stringKind | BufferInternal | (StringHasher::computeLiteralHashAndMaskTop8Bits(characters) << s_flagCount), ConstructWithConstExpr) |
1186 | { |
1187 | } |
1188 | |
1189 | template<unsigned characterCount> constexpr StringImpl::StaticStringImpl::StaticStringImpl(const char16_t (&characters)[characterCount], StringKind stringKind) |
1190 | : StringImplShape(s_refCountFlagIsStaticString, characterCount - 1, characters, |
1191 | s_hashFlagDidReportCost | stringKind | BufferInternal | (StringHasher::computeLiteralHashAndMaskTop8Bits(characters) << s_flagCount), ConstructWithConstExpr) |
1192 | { |
1193 | } |
1194 | |
1195 | inline StringImpl::StaticStringImpl::operator StringImpl&() |
1196 | { |
1197 | return *reinterpret_cast<StringImpl*>(this); |
1198 | } |
1199 | |
1200 | inline bool equalIgnoringASCIICase(const StringImpl& a, const StringImpl& b) |
1201 | { |
1202 | return equalIgnoringASCIICaseCommon(a, b); |
1203 | } |
1204 | |
1205 | inline bool equalIgnoringASCIICase(const StringImpl& a, const char* b) |
1206 | { |
1207 | return equalIgnoringASCIICaseCommon(a, b); |
1208 | } |
1209 | |
1210 | inline bool equalIgnoringASCIICase(const StringImpl* a, const char* b) |
1211 | { |
1212 | return a && equalIgnoringASCIICase(*a, b); |
1213 | } |
1214 | |
1215 | template<unsigned length> inline bool startsWithLettersIgnoringASCIICase(const StringImpl& string, const char (&lowercaseLetters)[length]) |
1216 | { |
1217 | return startsWithLettersIgnoringASCIICaseCommon(string, lowercaseLetters); |
1218 | } |
1219 | |
1220 | template<unsigned length> inline bool startsWithLettersIgnoringASCIICase(const StringImpl* string, const char (&lowercaseLetters)[length]) |
1221 | { |
1222 | return string && startsWithLettersIgnoringASCIICase(*string, lowercaseLetters); |
1223 | } |
1224 | |
1225 | template<unsigned length> inline bool equalLettersIgnoringASCIICase(const StringImpl& string, const char (&lowercaseLetters)[length]) |
1226 | { |
1227 | return equalLettersIgnoringASCIICaseCommon(string, lowercaseLetters); |
1228 | } |
1229 | |
1230 | template<unsigned length> inline bool equalLettersIgnoringASCIICase(const StringImpl* string, const char (&lowercaseLetters)[length]) |
1231 | { |
1232 | return string && equalLettersIgnoringASCIICase(*string, lowercaseLetters); |
1233 | } |
1234 | |
1235 | } // namespace WTF |
1236 | |
1237 | using WTF::StaticStringImpl; |
1238 | using WTF::StringImpl; |
1239 | using WTF::equal; |
1240 | |