1 | /* |
2 | * Copyright (C) 1999 Lars Knoll ([email protected]) |
3 | * Copyright (C) 2005-2018 Apple Inc. All rights reserved. |
4 | * Copyright (C) 2009 Google Inc. All rights reserved. |
5 | * |
6 | * This library is free software; you can redistribute it and/or |
7 | * modify it under the terms of the GNU Library General Public |
8 | * License as published by the Free Software Foundation; either |
9 | * version 2 of the License, or (at your option) any later version. |
10 | * |
11 | * This library is distributed in the hope that it will be useful, |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | * Library General Public License for more details. |
15 | * |
16 | * You should have received a copy of the GNU Library General Public License |
17 | * along with this library; see the file COPYING.LIB. If not, write to |
18 | * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
19 | * Boston, MA 02110-1301, USA. |
20 | * |
21 | */ |
22 | |
23 | #pragma once |
24 | |
25 | #include <limits.h> |
26 | #include <unicode/ustring.h> |
27 | #include <wtf/ASCIICType.h> |
28 | #include <wtf/CheckedArithmetic.h> |
29 | #include <wtf/Expected.h> |
30 | #include <wtf/MathExtras.h> |
31 | #include <wtf/StdLibExtras.h> |
32 | #include <wtf/Vector.h> |
33 | #include <wtf/text/ASCIIFastPath.h> |
34 | #include <wtf/text/ConversionMode.h> |
35 | #include <wtf/text/StringCommon.h> |
36 | #include <wtf/text/StringHasher.h> |
37 | #include <wtf/text/UTF8ConversionError.h> |
38 | |
39 | #if USE(CF) |
40 | typedef const struct __CFString * CFStringRef; |
41 | #endif |
42 | |
43 | #ifdef __OBJC__ |
44 | @class NSString; |
45 | #endif |
46 | |
47 | namespace JSC { |
48 | namespace LLInt { class Data; } |
49 | class ; |
50 | } |
51 | |
52 | namespace WTF { |
53 | |
54 | class SymbolImpl; |
55 | class SymbolRegistry; |
56 | |
57 | struct CStringTranslator; |
58 | struct HashAndUTF8CharactersTranslator; |
59 | struct LCharBufferTranslator; |
60 | struct StringHash; |
61 | struct SubstringTranslator; |
62 | struct UCharBufferTranslator; |
63 | |
64 | template<typename> class RetainPtr; |
65 | |
66 | template<typename> struct BufferFromStaticDataTranslator; |
67 | template<typename> struct HashAndCharactersTranslator; |
68 | |
69 | // Define STRING_STATS to 1 turn on runtime statistics of string sizes and memory usage. |
70 | #define STRING_STATS 0 |
71 | |
72 | template<bool isSpecialCharacter(UChar), typename CharacterType> bool isAllSpecialCharacters(const CharacterType*, size_t length); |
73 | |
74 | #if STRING_STATS |
75 | |
76 | struct StringStats { |
77 | void add8BitString(unsigned length, bool isSubString = false) |
78 | { |
79 | ++m_totalNumberStrings; |
80 | ++m_number8BitStrings; |
81 | if (!isSubString) |
82 | m_total8BitData += length; |
83 | } |
84 | |
85 | void add16BitString(unsigned length, bool isSubString = false) |
86 | { |
87 | ++m_totalNumberStrings; |
88 | ++m_number16BitStrings; |
89 | if (!isSubString) |
90 | m_total16BitData += length; |
91 | } |
92 | |
93 | void removeString(StringImpl&); |
94 | void printStats(); |
95 | |
96 | static const unsigned s_printStringStatsFrequency = 5000; |
97 | static std::atomic<unsigned> s_stringRemovesTillPrintStats; |
98 | |
99 | std::atomic<unsigned> m_refCalls; |
100 | std::atomic<unsigned> m_derefCalls; |
101 | |
102 | std::atomic<unsigned> m_totalNumberStrings; |
103 | std::atomic<unsigned> m_number8BitStrings; |
104 | std::atomic<unsigned> m_number16BitStrings; |
105 | std::atomic<unsigned long long> m_total8BitData; |
106 | std::atomic<unsigned long long> m_total16BitData; |
107 | }; |
108 | |
109 | #define STRING_STATS_ADD_8BIT_STRING(length) StringImpl::stringStats().add8BitString(length) |
110 | #define STRING_STATS_ADD_8BIT_STRING2(length, isSubString) StringImpl::stringStats().add8BitString(length, isSubString) |
111 | #define STRING_STATS_ADD_16BIT_STRING(length) StringImpl::stringStats().add16BitString(length) |
112 | #define STRING_STATS_ADD_16BIT_STRING2(length, isSubString) StringImpl::stringStats().add16BitString(length, isSubString) |
113 | #define STRING_STATS_REMOVE_STRING(string) StringImpl::stringStats().removeString(string) |
114 | #define STRING_STATS_REF_STRING(string) ++StringImpl::stringStats().m_refCalls; |
115 | #define STRING_STATS_DEREF_STRING(string) ++StringImpl::stringStats().m_derefCalls; |
116 | |
117 | #else |
118 | |
119 | #define STRING_STATS_ADD_8BIT_STRING(length) ((void)0) |
120 | #define STRING_STATS_ADD_8BIT_STRING2(length, isSubString) ((void)0) |
121 | #define STRING_STATS_ADD_16BIT_STRING(length) ((void)0) |
122 | #define STRING_STATS_ADD_16BIT_STRING2(length, isSubString) ((void)0) |
123 | #define STRING_STATS_ADD_UPCONVERTED_STRING(length) ((void)0) |
124 | #define STRING_STATS_REMOVE_STRING(string) ((void)0) |
125 | #define STRING_STATS_REF_STRING(string) ((void)0) |
126 | #define STRING_STATS_DEREF_STRING(string) ((void)0) |
127 | |
128 | #endif |
129 | |
130 | template<typename CharacterType> inline bool isLatin1(CharacterType character) |
131 | { |
132 | using UnsignedCharacterType = typename std::make_unsigned<CharacterType>::type; |
133 | return static_cast<UnsignedCharacterType>(character) <= static_cast<UnsignedCharacterType>(0xFF); |
134 | } |
135 | |
136 | class StringImplShape { |
137 | WTF_MAKE_NONCOPYABLE(StringImplShape); |
138 | public: |
139 | static constexpr unsigned MaxLength = std::numeric_limits<int32_t>::max(); |
140 | |
141 | protected: |
142 | StringImplShape(unsigned refCount, unsigned length, const LChar*, unsigned hashAndFlags); |
143 | StringImplShape(unsigned refCount, unsigned length, const UChar*, unsigned hashAndFlags); |
144 | |
145 | enum ConstructWithConstExprTag { ConstructWithConstExpr }; |
146 | template<unsigned characterCount> constexpr StringImplShape(unsigned refCount, unsigned length, const char (&characters)[characterCount], unsigned hashAndFlags, ConstructWithConstExprTag); |
147 | template<unsigned characterCount> constexpr StringImplShape(unsigned refCount, unsigned length, const char16_t (&characters)[characterCount], unsigned hashAndFlags, ConstructWithConstExprTag); |
148 | |
149 | unsigned m_refCount; |
150 | unsigned m_length; |
151 | union { |
152 | const LChar* m_data8; |
153 | const UChar* m_data16; |
154 | // It seems that reinterpret_cast prevents constexpr's compile time initialization in VC++. |
155 | // These are needed to avoid reinterpret_cast. |
156 | const char* m_data8Char; |
157 | const char16_t* m_data16Char; |
158 | }; |
159 | mutable unsigned m_hashAndFlags; |
160 | }; |
161 | |
162 | // FIXME: Use of StringImpl and const is rather confused. |
163 | // The actual string inside a StringImpl is immutable, so you can't modify a string using a StringImpl&. |
164 | // We could mark every member function const and always use "const StringImpl&" and "const StringImpl*". |
165 | // Or we could say that "const" doesn't make sense at all and use "StringImpl&" and "StringImpl*" everywhere. |
166 | // Right now we use a mix of both, which makes code more confusing and has no benefit. |
167 | |
168 | class StringImpl : private StringImplShape { |
169 | WTF_MAKE_NONCOPYABLE(StringImpl); WTF_MAKE_FAST_ALLOCATED; |
170 | |
171 | friend class AtomStringImpl; |
172 | friend class JSC::LLInt::Data; |
173 | friend class JSC::LLIntOffsetsExtractor; |
174 | friend class PrivateSymbolImpl; |
175 | friend class RegisteredSymbolImpl; |
176 | friend class SymbolImpl; |
177 | friend class ExternalStringImpl; |
178 | |
179 | friend struct WTF::CStringTranslator; |
180 | friend struct WTF::HashAndUTF8CharactersTranslator; |
181 | friend struct WTF::LCharBufferTranslator; |
182 | friend struct WTF::SubstringTranslator; |
183 | friend struct WTF::UCharBufferTranslator; |
184 | |
185 | template<typename> friend struct WTF::BufferFromStaticDataTranslator; |
186 | template<typename> friend struct WTF::HashAndCharactersTranslator; |
187 | |
188 | public: |
189 | enum BufferOwnership { BufferInternal, BufferOwned, BufferSubstring, BufferExternal }; |
190 | |
191 | static constexpr unsigned MaxLength = StringImplShape::MaxLength; |
192 | |
193 | // The bottom 6 bits in the hash are flags. |
194 | static constexpr const unsigned s_flagCount = 6; |
195 | private: |
196 | static constexpr const unsigned s_flagMask = (1u << s_flagCount) - 1; |
197 | static_assert(s_flagCount <= StringHasher::flagCount, "StringHasher reserves enough bits for StringImpl flags" ); |
198 | static constexpr const unsigned s_flagStringKindCount = 4; |
199 | |
200 | static constexpr const unsigned s_hashFlagStringKindIsAtomic = 1u << (s_flagStringKindCount); |
201 | static constexpr const unsigned s_hashFlagStringKindIsSymbol = 1u << (s_flagStringKindCount + 1); |
202 | static constexpr const unsigned s_hashMaskStringKind = s_hashFlagStringKindIsAtomic | s_hashFlagStringKindIsSymbol; |
203 | static constexpr const unsigned s_hashFlagDidReportCost = 1u << 3; |
204 | static constexpr const unsigned s_hashFlag8BitBuffer = 1u << 2; |
205 | static constexpr const unsigned s_hashMaskBufferOwnership = (1u << 0) | (1u << 1); |
206 | |
207 | enum StringKind { |
208 | StringNormal = 0u, // non-symbol, non-atomic |
209 | StringAtomic = s_hashFlagStringKindIsAtomic, // non-symbol, atomic |
210 | StringSymbol = s_hashFlagStringKindIsSymbol, // symbol, non-atomic |
211 | }; |
212 | |
213 | // Create a normal 8-bit string with internal storage (BufferInternal). |
214 | enum Force8Bit { Force8BitConstructor }; |
215 | StringImpl(unsigned length, Force8Bit); |
216 | |
217 | // Create a normal 16-bit string with internal storage (BufferInternal). |
218 | explicit StringImpl(unsigned length); |
219 | |
220 | // Create a StringImpl adopting ownership of the provided buffer (BufferOwned). |
221 | StringImpl(MallocPtr<LChar>, unsigned length); |
222 | StringImpl(MallocPtr<UChar>, unsigned length); |
223 | enum ConstructWithoutCopyingTag { ConstructWithoutCopying }; |
224 | StringImpl(const UChar*, unsigned length, ConstructWithoutCopyingTag); |
225 | StringImpl(const LChar*, unsigned length, ConstructWithoutCopyingTag); |
226 | |
227 | // Used to create new strings that are a substring of an existing StringImpl (BufferSubstring). |
228 | StringImpl(const LChar*, unsigned length, Ref<StringImpl>&&); |
229 | StringImpl(const UChar*, unsigned length, Ref<StringImpl>&&); |
230 | |
231 | public: |
232 | WTF_EXPORT_PRIVATE static void destroy(StringImpl*); |
233 | |
234 | WTF_EXPORT_PRIVATE static Ref<StringImpl> create(const UChar*, unsigned length); |
235 | WTF_EXPORT_PRIVATE static Ref<StringImpl> create(const LChar*, unsigned length); |
236 | WTF_EXPORT_PRIVATE static Ref<StringImpl> create8BitIfPossible(const UChar*, unsigned length); |
237 | template<size_t inlineCapacity> static Ref<StringImpl> create8BitIfPossible(const Vector<UChar, inlineCapacity>&); |
238 | WTF_EXPORT_PRIVATE static Ref<StringImpl> create8BitIfPossible(const UChar*); |
239 | |
240 | ALWAYS_INLINE static Ref<StringImpl> create(const char* characters, unsigned length) { return create(reinterpret_cast<const LChar*>(characters), length); } |
241 | WTF_EXPORT_PRIVATE static Ref<StringImpl> create(const LChar*); |
242 | ALWAYS_INLINE static Ref<StringImpl> create(const char* string) { return create(reinterpret_cast<const LChar*>(string)); } |
243 | |
244 | static Ref<StringImpl> createSubstringSharingImpl(StringImpl&, unsigned offset, unsigned length); |
245 | |
246 | template<unsigned characterCount> static Ref<StringImpl> createFromLiteral(const char (&)[characterCount]); |
247 | |
248 | // FIXME: Replace calls to these overloads of createFromLiteral to createWithoutCopying instead. |
249 | WTF_EXPORT_PRIVATE static Ref<StringImpl> createFromLiteral(const char*, unsigned length); |
250 | WTF_EXPORT_PRIVATE static Ref<StringImpl> createFromLiteral(const char*); |
251 | |
252 | WTF_EXPORT_PRIVATE static Ref<StringImpl> createWithoutCopying(const UChar*, unsigned length); |
253 | WTF_EXPORT_PRIVATE static Ref<StringImpl> createWithoutCopying(const LChar*, unsigned length); |
254 | WTF_EXPORT_PRIVATE static Ref<StringImpl> createUninitialized(unsigned length, LChar*&); |
255 | WTF_EXPORT_PRIVATE static Ref<StringImpl> createUninitialized(unsigned length, UChar*&); |
256 | template<typename CharacterType> static RefPtr<StringImpl> tryCreateUninitialized(unsigned length, CharacterType*&); |
257 | |
258 | // Reallocate the StringImpl. The originalString must be only owned by the Ref, |
259 | // and the buffer ownership must be BufferInternal. Just like the input pointer of realloc(), |
260 | // the originalString can't be used after this function. |
261 | static Ref<StringImpl> reallocate(Ref<StringImpl>&& originalString, unsigned length, LChar*& data); |
262 | static Ref<StringImpl> reallocate(Ref<StringImpl>&& originalString, unsigned length, UChar*& data); |
263 | static Expected<Ref<StringImpl>, UTF8ConversionError> tryReallocate(Ref<StringImpl>&& originalString, unsigned length, LChar*& data); |
264 | static Expected<Ref<StringImpl>, UTF8ConversionError> tryReallocate(Ref<StringImpl>&& originalString, unsigned length, UChar*& data); |
265 | |
266 | static unsigned flagsOffset() { return OBJECT_OFFSETOF(StringImpl, m_hashAndFlags); } |
267 | static constexpr unsigned flagIs8Bit() { return s_hashFlag8BitBuffer; } |
268 | static constexpr unsigned flagIsAtomic() { return s_hashFlagStringKindIsAtomic; } |
269 | static constexpr unsigned flagIsSymbol() { return s_hashFlagStringKindIsSymbol; } |
270 | static constexpr unsigned maskStringKind() { return s_hashMaskStringKind; } |
271 | static unsigned dataOffset() { return OBJECT_OFFSETOF(StringImpl, m_data8); } |
272 | |
273 | template<typename CharacterType, size_t inlineCapacity, typename OverflowHandler, size_t minCapacity> |
274 | static Ref<StringImpl> adopt(Vector<CharacterType, inlineCapacity, OverflowHandler, minCapacity>&&); |
275 | |
276 | WTF_EXPORT_PRIVATE static Ref<StringImpl> adopt(StringBuffer<UChar>&&); |
277 | WTF_EXPORT_PRIVATE static Ref<StringImpl> adopt(StringBuffer<LChar>&&); |
278 | |
279 | unsigned length() const { return m_length; } |
280 | static ptrdiff_t lengthMemoryOffset() { return OBJECT_OFFSETOF(StringImpl, m_length); } |
281 | bool isEmpty() const { return !m_length; } |
282 | |
283 | bool is8Bit() const { return m_hashAndFlags & s_hashFlag8BitBuffer; } |
284 | ALWAYS_INLINE const LChar* characters8() const { ASSERT(is8Bit()); return m_data8; } |
285 | ALWAYS_INLINE const UChar* characters16() const { ASSERT(!is8Bit()); return m_data16; } |
286 | |
287 | template<typename CharacterType> const CharacterType* characters() const; |
288 | |
289 | size_t cost() const; |
290 | size_t costDuringGC(); |
291 | |
292 | WTF_EXPORT_PRIVATE size_t sizeInBytes() const; |
293 | |
294 | bool isSymbol() const { return m_hashAndFlags & s_hashFlagStringKindIsSymbol; } |
295 | bool isAtom() const { return m_hashAndFlags & s_hashFlagStringKindIsAtomic; } |
296 | void setIsAtomic(bool); |
297 | |
298 | bool isExternal() const { return bufferOwnership() == BufferExternal; } |
299 | |
300 | bool isSubString() const { return bufferOwnership() == BufferSubstring; } |
301 | |
302 | static WTF_EXPORT_PRIVATE Expected<CString, UTF8ConversionError> utf8ForCharacters(const LChar* characters, unsigned length); |
303 | static WTF_EXPORT_PRIVATE Expected<CString, UTF8ConversionError> utf8ForCharacters(const UChar* characters, unsigned length, ConversionMode = LenientConversion); |
304 | |
305 | WTF_EXPORT_PRIVATE Expected<CString, UTF8ConversionError> tryGetUtf8ForRange(unsigned offset, unsigned length, ConversionMode = LenientConversion) const; |
306 | WTF_EXPORT_PRIVATE Expected<CString, UTF8ConversionError> tryGetUtf8(ConversionMode = LenientConversion) const; |
307 | WTF_EXPORT_PRIVATE CString utf8(ConversionMode = LenientConversion) const; |
308 | |
309 | private: |
310 | static WTF_EXPORT_PRIVATE UTF8ConversionError utf8Impl(const UChar* characters, unsigned length, char*& buffer, size_t bufferSize, ConversionMode); |
311 | |
312 | // The high bits of 'hash' are always empty, but we prefer to store our flags |
313 | // in the low bits because it makes them slightly more efficient to access. |
314 | // So, we shift left and right when setting and getting our hash code. |
315 | void setHash(unsigned) const; |
316 | |
317 | unsigned rawHash() const { return m_hashAndFlags >> s_flagCount; } |
318 | |
319 | public: |
320 | bool hasHash() const { return !!rawHash(); } |
321 | |
322 | unsigned existingHash() const { ASSERT(hasHash()); return rawHash(); } |
323 | unsigned hash() const { return hasHash() ? rawHash() : hashSlowCase(); } |
324 | |
325 | WTF_EXPORT_PRIVATE unsigned concurrentHash() const; |
326 | |
327 | unsigned symbolAwareHash() const; |
328 | unsigned existingSymbolAwareHash() const; |
329 | |
330 | bool isStatic() const { return m_refCount & s_refCountFlagIsStaticString; } |
331 | |
332 | size_t refCount() const { return m_refCount / s_refCountIncrement; } |
333 | bool hasOneRef() const { return m_refCount == s_refCountIncrement; } |
334 | bool hasAtLeastOneRef() const { return m_refCount; } // For assertions. |
335 | |
336 | void ref(); |
337 | void deref(); |
338 | |
339 | class StaticStringImpl : private StringImplShape { |
340 | WTF_MAKE_NONCOPYABLE(StaticStringImpl); |
341 | public: |
342 | // Used to construct static strings, which have an special refCount that can never hit zero. |
343 | // This means that the static string will never be destroyed, which is important because |
344 | // static strings will be shared across threads & ref-counted in a non-threadsafe manner. |
345 | // |
346 | // In order to make StaticStringImpl thread safe, we also need to ensure that the rest of |
347 | // the fields are never mutated by threads. We have this guarantee because: |
348 | // |
349 | // 1. m_length is only set on construction and never mutated thereafter. |
350 | // |
351 | // 2. m_data8 and m_data16 are only set on construction and never mutated thereafter. |
352 | // We also know that a StringImpl never changes from 8 bit to 16 bit because there |
353 | // is no way to set/clear the s_hashFlag8BitBuffer flag other than at construction. |
354 | // |
355 | // 3. m_hashAndFlags will not be mutated by different threads because: |
356 | // |
357 | // a. StaticStringImpl's constructor sets the s_hashFlagDidReportCost flag to ensure |
358 | // that StringImpl::cost() returns early. |
359 | // This means StaticStringImpl costs are not counted. But since there should only |
360 | // be a finite set of StaticStringImpls, their cost can be aggregated into a single |
361 | // system cost if needed. |
362 | // b. setIsAtomic() is never called on a StaticStringImpl. |
363 | // setIsAtomic() asserts !isStatic(). |
364 | // c. setHash() is never called on a StaticStringImpl. |
365 | // StaticStringImpl's constructor sets the hash on construction. |
366 | // StringImpl::hash() only sets a new hash iff !hasHash(). |
367 | // Additionally, StringImpl::setHash() asserts hasHash() and !isStatic(). |
368 | |
369 | template<unsigned characterCount> constexpr StaticStringImpl(const char (&characters)[characterCount], StringKind = StringNormal); |
370 | template<unsigned characterCount> constexpr StaticStringImpl(const char16_t (&characters)[characterCount], StringKind = StringNormal); |
371 | operator StringImpl&(); |
372 | }; |
373 | |
374 | WTF_EXPORT_PRIVATE static StaticStringImpl s_atomicEmptyString; |
375 | ALWAYS_INLINE static StringImpl* empty() { return reinterpret_cast<StringImpl*>(&s_atomicEmptyString); } |
376 | |
377 | // FIXME: Does this really belong in StringImpl? |
378 | template<typename CharacterType> static void copyCharacters(CharacterType* destination, const CharacterType* source, unsigned numCharacters); |
379 | static void copyCharacters(UChar* destination, const LChar* source, unsigned numCharacters); |
380 | |
381 | // Some string features, like reference counting and the atomicity flag, are not |
382 | // thread-safe. We achieve thread safety by isolation, giving each thread |
383 | // its own copy of the string. |
384 | Ref<StringImpl> isolatedCopy() const; |
385 | |
386 | WTF_EXPORT_PRIVATE Ref<StringImpl> substring(unsigned position, unsigned length = MaxLength); |
387 | |
388 | UChar at(unsigned) const; |
389 | UChar operator[](unsigned i) const { return at(i); } |
390 | WTF_EXPORT_PRIVATE UChar32 characterStartingAt(unsigned); |
391 | |
392 | int toIntStrict(bool* ok = 0, int base = 10); |
393 | unsigned toUIntStrict(bool* ok = 0, int base = 10); |
394 | int64_t toInt64Strict(bool* ok = 0, int base = 10); |
395 | uint64_t toUInt64Strict(bool* ok = 0, int base = 10); |
396 | intptr_t toIntPtrStrict(bool* ok = 0, int base = 10); |
397 | |
398 | WTF_EXPORT_PRIVATE int toInt(bool* ok = 0); // ignores trailing garbage |
399 | unsigned toUInt(bool* ok = 0); // ignores trailing garbage |
400 | int64_t toInt64(bool* ok = 0); // ignores trailing garbage |
401 | uint64_t toUInt64(bool* ok = 0); // ignores trailing garbage |
402 | intptr_t toIntPtr(bool* ok = 0); // ignores trailing garbage |
403 | |
404 | // FIXME: Like the strict functions above, these give false for "ok" when there is trailing garbage. |
405 | // Like the non-strict functions above, these return the value when there is trailing garbage. |
406 | // It would be better if these were more consistent with the above functions instead. |
407 | double toDouble(bool* ok = 0); |
408 | float toFloat(bool* ok = 0); |
409 | |
410 | WTF_EXPORT_PRIVATE Ref<StringImpl> convertToASCIILowercase(); |
411 | WTF_EXPORT_PRIVATE Ref<StringImpl> convertToASCIIUppercase(); |
412 | WTF_EXPORT_PRIVATE Ref<StringImpl> convertToLowercaseWithoutLocale(); |
413 | WTF_EXPORT_PRIVATE Ref<StringImpl> convertToLowercaseWithoutLocaleStartingAtFailingIndex8Bit(unsigned); |
414 | WTF_EXPORT_PRIVATE Ref<StringImpl> convertToUppercaseWithoutLocale(); |
415 | WTF_EXPORT_PRIVATE Ref<StringImpl> convertToLowercaseWithLocale(const AtomString& localeIdentifier); |
416 | WTF_EXPORT_PRIVATE Ref<StringImpl> convertToUppercaseWithLocale(const AtomString& localeIdentifier); |
417 | |
418 | Ref<StringImpl> foldCase(); |
419 | |
420 | Ref<StringImpl> stripWhiteSpace(); |
421 | WTF_EXPORT_PRIVATE Ref<StringImpl> simplifyWhiteSpace(); |
422 | Ref<StringImpl> simplifyWhiteSpace(CodeUnitMatchFunction); |
423 | |
424 | Ref<StringImpl> stripLeadingAndTrailingCharacters(CodeUnitMatchFunction); |
425 | Ref<StringImpl> removeCharacters(CodeUnitMatchFunction); |
426 | |
427 | bool isAllASCII() const; |
428 | bool isAllLatin1() const; |
429 | template<bool isSpecialCharacter(UChar)> bool isAllSpecialCharacters() const; |
430 | |
431 | size_t find(LChar character, unsigned start = 0); |
432 | size_t find(char character, unsigned start = 0); |
433 | size_t find(UChar character, unsigned start = 0); |
434 | WTF_EXPORT_PRIVATE size_t find(CodeUnitMatchFunction, unsigned index = 0); |
435 | size_t find(const LChar*, unsigned index = 0); |
436 | ALWAYS_INLINE size_t find(const char* string, unsigned index = 0) { return find(reinterpret_cast<const LChar*>(string), index); } |
437 | WTF_EXPORT_PRIVATE size_t find(StringImpl*); |
438 | WTF_EXPORT_PRIVATE size_t find(StringImpl*, unsigned index); |
439 | WTF_EXPORT_PRIVATE size_t findIgnoringASCIICase(const StringImpl&) const; |
440 | WTF_EXPORT_PRIVATE size_t findIgnoringASCIICase(const StringImpl&, unsigned startOffset) const; |
441 | WTF_EXPORT_PRIVATE size_t findIgnoringASCIICase(const StringImpl*) const; |
442 | WTF_EXPORT_PRIVATE size_t findIgnoringASCIICase(const StringImpl*, unsigned startOffset) const; |
443 | |
444 | WTF_EXPORT_PRIVATE size_t reverseFind(UChar, unsigned index = MaxLength); |
445 | WTF_EXPORT_PRIVATE size_t reverseFind(StringImpl*, unsigned index = MaxLength); |
446 | |
447 | WTF_EXPORT_PRIVATE bool startsWith(const StringImpl*) const; |
448 | WTF_EXPORT_PRIVATE bool startsWith(const StringImpl&) const; |
449 | WTF_EXPORT_PRIVATE bool startsWithIgnoringASCIICase(const StringImpl*) const; |
450 | WTF_EXPORT_PRIVATE bool startsWithIgnoringASCIICase(const StringImpl&) const; |
451 | WTF_EXPORT_PRIVATE bool startsWith(UChar) const; |
452 | WTF_EXPORT_PRIVATE bool startsWith(const char*, unsigned matchLength) const; |
453 | template<unsigned matchLength> bool startsWith(const char (&prefix)[matchLength]) const { return startsWith(prefix, matchLength - 1); } |
454 | WTF_EXPORT_PRIVATE bool hasInfixStartingAt(const StringImpl&, unsigned startOffset) const; |
455 | |
456 | WTF_EXPORT_PRIVATE bool endsWith(StringImpl*); |
457 | WTF_EXPORT_PRIVATE bool endsWith(StringImpl&); |
458 | WTF_EXPORT_PRIVATE bool endsWithIgnoringASCIICase(const StringImpl*) const; |
459 | WTF_EXPORT_PRIVATE bool endsWithIgnoringASCIICase(const StringImpl&) const; |
460 | WTF_EXPORT_PRIVATE bool endsWith(UChar) const; |
461 | WTF_EXPORT_PRIVATE bool endsWith(const char*, unsigned matchLength) const; |
462 | template<unsigned matchLength> bool endsWith(const char (&prefix)[matchLength]) const { return endsWith(prefix, matchLength - 1); } |
463 | WTF_EXPORT_PRIVATE bool hasInfixEndingAt(const StringImpl&, unsigned endOffset) const; |
464 | |
465 | WTF_EXPORT_PRIVATE Ref<StringImpl> replace(UChar, UChar); |
466 | WTF_EXPORT_PRIVATE Ref<StringImpl> replace(UChar, StringImpl*); |
467 | ALWAYS_INLINE Ref<StringImpl> replace(UChar pattern, const char* replacement, unsigned replacementLength) { return replace(pattern, reinterpret_cast<const LChar*>(replacement), replacementLength); } |
468 | WTF_EXPORT_PRIVATE Ref<StringImpl> replace(UChar, const LChar*, unsigned replacementLength); |
469 | Ref<StringImpl> replace(UChar, const UChar*, unsigned replacementLength); |
470 | WTF_EXPORT_PRIVATE Ref<StringImpl> replace(StringImpl*, StringImpl*); |
471 | WTF_EXPORT_PRIVATE Ref<StringImpl> replace(unsigned index, unsigned length, StringImpl*); |
472 | |
473 | WTF_EXPORT_PRIVATE UCharDirection defaultWritingDirection(bool* hasStrongDirectionality = nullptr); |
474 | |
475 | #if USE(CF) |
476 | RetainPtr<CFStringRef> createCFString(); |
477 | #endif |
478 | |
479 | #ifdef __OBJC__ |
480 | WTF_EXPORT_PRIVATE operator NSString *(); |
481 | #endif |
482 | |
483 | #if STRING_STATS |
484 | ALWAYS_INLINE static StringStats& stringStats() { return m_stringStats; } |
485 | #endif |
486 | |
487 | BufferOwnership bufferOwnership() const { return static_cast<BufferOwnership>(m_hashAndFlags & s_hashMaskBufferOwnership); } |
488 | |
489 | template<typename T> static size_t () { return tailOffset<T>(); } |
490 | |
491 | protected: |
492 | ~StringImpl(); |
493 | |
494 | // Used to create new symbol string that holds an existing [[Description]] string as a substring buffer (BufferSubstring). |
495 | enum CreateSymbolTag { CreateSymbol }; |
496 | StringImpl(CreateSymbolTag, const LChar*, unsigned length); |
497 | StringImpl(CreateSymbolTag, const UChar*, unsigned length); |
498 | |
499 | // Null symbol. |
500 | explicit StringImpl(CreateSymbolTag); |
501 | |
502 | private: |
503 | template<typename> static size_t allocationSize(Checked<size_t> tailElementCount); |
504 | template<typename> static size_t maxInternalLength(); |
505 | template<typename> static size_t tailOffset(); |
506 | |
507 | bool requiresCopy() const; |
508 | template<typename T> const T* tailPointer() const; |
509 | template<typename T> T* tailPointer(); |
510 | StringImpl* const& substringBuffer() const; |
511 | StringImpl*& substringBuffer(); |
512 | |
513 | enum class CaseConvertType { Upper, Lower }; |
514 | template<CaseConvertType, typename CharacterType> static Ref<StringImpl> convertASCIICase(StringImpl&, const CharacterType*, unsigned); |
515 | |
516 | template<class CodeUnitPredicate> Ref<StringImpl> stripMatchedCharacters(CodeUnitPredicate); |
517 | template<typename CharacterType> ALWAYS_INLINE Ref<StringImpl> removeCharacters(const CharacterType* characters, CodeUnitMatchFunction); |
518 | template<typename CharacterType, class CodeUnitPredicate> Ref<StringImpl> simplifyMatchedCharactersToSpace(CodeUnitPredicate); |
519 | template<typename CharacterType> static Ref<StringImpl> constructInternal(StringImpl&, unsigned); |
520 | template<typename CharacterType> static Ref<StringImpl> createUninitializedInternal(unsigned, CharacterType*&); |
521 | template<typename CharacterType> static Ref<StringImpl> createUninitializedInternalNonEmpty(unsigned, CharacterType*&); |
522 | template<typename CharacterType> static Expected<Ref<StringImpl>, UTF8ConversionError> reallocateInternal(Ref<StringImpl>&&, unsigned, CharacterType*&); |
523 | template<typename CharacterType> static Ref<StringImpl> createInternal(const CharacterType*, unsigned); |
524 | WTF_EXPORT_PRIVATE NEVER_INLINE unsigned hashSlowCase() const; |
525 | |
526 | // The bottom bit in the ref count indicates a static (immortal) string. |
527 | static const unsigned s_refCountFlagIsStaticString = 0x1; |
528 | static const unsigned s_refCountIncrement = 0x2; // This allows us to ref / deref without disturbing the static string flag. |
529 | |
530 | #if STRING_STATS |
531 | WTF_EXPORT_PRIVATE static StringStats m_stringStats; |
532 | #endif |
533 | |
534 | public: |
535 | void assertHashIsCorrect() const; |
536 | }; |
537 | |
538 | using StaticStringImpl = StringImpl::StaticStringImpl; |
539 | |
540 | static_assert(sizeof(StringImpl) == sizeof(StaticStringImpl), "" ); |
541 | |
542 | #if !ASSERT_DISABLED |
543 | |
544 | // StringImpls created from StaticStringImpl will ASSERT in the generic ValueCheck<T>::checkConsistency |
545 | // as they are not allocated by fastMalloc. We don't currently have any way to detect that case |
546 | // so we ignore the consistency check for all StringImpl*. |
547 | template<> struct ValueCheck<StringImpl*> { |
548 | static void checkConsistency(const StringImpl*) { } |
549 | }; |
550 | |
551 | #endif |
552 | |
553 | WTF_EXPORT_PRIVATE bool equal(const StringImpl*, const StringImpl*); |
554 | WTF_EXPORT_PRIVATE bool equal(const StringImpl*, const LChar*); |
555 | inline bool equal(const StringImpl* a, const char* b) { return equal(a, reinterpret_cast<const LChar*>(b)); } |
556 | WTF_EXPORT_PRIVATE bool equal(const StringImpl*, const LChar*, unsigned); |
557 | WTF_EXPORT_PRIVATE bool equal(const StringImpl*, const UChar*, unsigned); |
558 | inline bool equal(const StringImpl* a, const char* b, unsigned length) { return equal(a, reinterpret_cast<const LChar*>(b), length); } |
559 | inline bool equal(const LChar* a, StringImpl* b) { return equal(b, a); } |
560 | inline bool equal(const char* a, StringImpl* b) { return equal(b, reinterpret_cast<const LChar*>(a)); } |
561 | WTF_EXPORT_PRIVATE bool equal(const StringImpl& a, const StringImpl& b); |
562 | |
563 | WTF_EXPORT_PRIVATE bool equalIgnoringNullity(StringImpl*, StringImpl*); |
564 | WTF_EXPORT_PRIVATE bool equalIgnoringNullity(const UChar*, size_t length, StringImpl*); |
565 | |
566 | bool equalIgnoringASCIICase(const StringImpl&, const StringImpl&); |
567 | WTF_EXPORT_PRIVATE bool equalIgnoringASCIICase(const StringImpl*, const StringImpl*); |
568 | bool equalIgnoringASCIICase(const StringImpl&, const char*); |
569 | bool equalIgnoringASCIICase(const StringImpl*, const char*); |
570 | |
571 | WTF_EXPORT_PRIVATE bool equalIgnoringASCIICaseNonNull(const StringImpl*, const StringImpl*); |
572 | |
573 | template<unsigned length> bool equalLettersIgnoringASCIICase(const StringImpl&, const char (&lowercaseLetters)[length]); |
574 | template<unsigned length> bool equalLettersIgnoringASCIICase(const StringImpl*, const char (&lowercaseLetters)[length]); |
575 | |
576 | size_t find(const LChar*, unsigned length, CodeUnitMatchFunction, unsigned index = 0); |
577 | size_t find(const UChar*, unsigned length, CodeUnitMatchFunction, unsigned index = 0); |
578 | |
579 | template<typename CharacterType> size_t reverseFindLineTerminator(const CharacterType*, unsigned length, unsigned index = StringImpl::MaxLength); |
580 | template<typename CharacterType> size_t reverseFind(const CharacterType*, unsigned length, CharacterType matchCharacter, unsigned index = StringImpl::MaxLength); |
581 | size_t reverseFind(const UChar*, unsigned length, LChar matchCharacter, unsigned index = StringImpl::MaxLength); |
582 | size_t reverseFind(const LChar*, unsigned length, UChar matchCharacter, unsigned index = StringImpl::MaxLength); |
583 | |
584 | template<size_t inlineCapacity> bool equalIgnoringNullity(const Vector<UChar, inlineCapacity>&, StringImpl*); |
585 | |
586 | template<typename CharacterType1, typename CharacterType2> int codePointCompare(const CharacterType1*, unsigned length1, const CharacterType2*, unsigned length2); |
587 | int codePointCompare(const StringImpl*, const StringImpl*); |
588 | |
589 | // FIXME: Should rename this to make clear it uses the Unicode definition of whitespace. |
590 | // Most WebKit callers don't want that would use isASCIISpace or isHTMLSpace instead. |
591 | bool isSpaceOrNewline(UChar32); |
592 | |
593 | template<typename CharacterType> unsigned lengthOfNullTerminatedString(const CharacterType*); |
594 | |
595 | // StringHash is the default hash for StringImpl* and RefPtr<StringImpl> |
596 | template<typename T> struct DefaultHash; |
597 | template<> struct DefaultHash<StringImpl*> { |
598 | typedef StringHash Hash; |
599 | }; |
600 | template<> struct DefaultHash<RefPtr<StringImpl>> { |
601 | typedef StringHash Hash; |
602 | }; |
603 | |
604 | #define MAKE_STATIC_STRING_IMPL(characters) ([] { \ |
605 | static StaticStringImpl impl(characters); \ |
606 | return &impl; \ |
607 | }()) |
608 | |
609 | template<> ALWAYS_INLINE Ref<StringImpl> StringImpl::constructInternal<LChar>(StringImpl& string, unsigned length) |
610 | { |
611 | return adoptRef(*new (NotNull, &string) StringImpl { length, Force8BitConstructor }); |
612 | } |
613 | |
614 | template<> ALWAYS_INLINE Ref<StringImpl> StringImpl::constructInternal<UChar>(StringImpl& string, unsigned length) |
615 | { |
616 | return adoptRef(*new (NotNull, &string) StringImpl { length }); |
617 | } |
618 | |
619 | template<> ALWAYS_INLINE const LChar* StringImpl::characters<LChar>() const |
620 | { |
621 | return characters8(); |
622 | } |
623 | |
624 | template<> ALWAYS_INLINE const UChar* StringImpl::characters<UChar>() const |
625 | { |
626 | return characters16(); |
627 | } |
628 | |
629 | inline size_t find(const LChar* characters, unsigned length, CodeUnitMatchFunction matchFunction, unsigned index) |
630 | { |
631 | while (index < length) { |
632 | if (matchFunction(characters[index])) |
633 | return index; |
634 | ++index; |
635 | } |
636 | return notFound; |
637 | } |
638 | |
639 | inline size_t find(const UChar* characters, unsigned length, CodeUnitMatchFunction matchFunction, unsigned index) |
640 | { |
641 | while (index < length) { |
642 | if (matchFunction(characters[index])) |
643 | return index; |
644 | ++index; |
645 | } |
646 | return notFound; |
647 | } |
648 | |
649 | template<typename CharacterType> inline size_t reverseFindLineTerminator(const CharacterType* characters, unsigned length, unsigned index) |
650 | { |
651 | if (!length) |
652 | return notFound; |
653 | if (index >= length) |
654 | index = length - 1; |
655 | auto character = characters[index]; |
656 | while (character != '\n' && character != '\r') { |
657 | if (!index--) |
658 | return notFound; |
659 | character = characters[index]; |
660 | } |
661 | return index; |
662 | } |
663 | |
664 | template<typename CharacterType> inline size_t reverseFind(const CharacterType* characters, unsigned length, CharacterType matchCharacter, unsigned index) |
665 | { |
666 | if (!length) |
667 | return notFound; |
668 | if (index >= length) |
669 | index = length - 1; |
670 | while (characters[index] != matchCharacter) { |
671 | if (!index--) |
672 | return notFound; |
673 | } |
674 | return index; |
675 | } |
676 | |
677 | ALWAYS_INLINE size_t reverseFind(const UChar* characters, unsigned length, LChar matchCharacter, unsigned index) |
678 | { |
679 | return reverseFind(characters, length, static_cast<UChar>(matchCharacter), index); |
680 | } |
681 | |
682 | inline size_t reverseFind(const LChar* characters, unsigned length, UChar matchCharacter, unsigned index) |
683 | { |
684 | if (matchCharacter & ~0xFF) |
685 | return notFound; |
686 | return reverseFind(characters, length, static_cast<LChar>(matchCharacter), index); |
687 | } |
688 | |
689 | inline size_t StringImpl::find(LChar character, unsigned start) |
690 | { |
691 | if (is8Bit()) |
692 | return WTF::find(characters8(), m_length, character, start); |
693 | return WTF::find(characters16(), m_length, character, start); |
694 | } |
695 | |
696 | ALWAYS_INLINE size_t StringImpl::find(char character, unsigned start) |
697 | { |
698 | return find(static_cast<LChar>(character), start); |
699 | } |
700 | |
701 | inline size_t StringImpl::find(UChar character, unsigned start) |
702 | { |
703 | if (is8Bit()) |
704 | return WTF::find(characters8(), m_length, character, start); |
705 | return WTF::find(characters16(), m_length, character, start); |
706 | } |
707 | |
708 | template<size_t inlineCapacity> inline bool equalIgnoringNullity(const Vector<UChar, inlineCapacity>& a, StringImpl* b) |
709 | { |
710 | return equalIgnoringNullity(a.data(), a.size(), b); |
711 | } |
712 | |
713 | template<typename CharacterType1, typename CharacterType2> inline int codePointCompare(const CharacterType1* characters1, unsigned length1, const CharacterType2* characters2, unsigned length2) |
714 | { |
715 | unsigned commonLength = std::min(length1, length2); |
716 | |
717 | unsigned position = 0; |
718 | while (position < commonLength && *characters1 == *characters2) { |
719 | ++characters1; |
720 | ++characters2; |
721 | ++position; |
722 | } |
723 | |
724 | if (position < commonLength) |
725 | return (characters1[0] > characters2[0]) ? 1 : -1; |
726 | |
727 | if (length1 == length2) |
728 | return 0; |
729 | return (length1 > length2) ? 1 : -1; |
730 | } |
731 | |
732 | inline int codePointCompare(const StringImpl* string1, const StringImpl* string2) |
733 | { |
734 | // FIXME: Should null strings compare as less than empty strings rather than equal to them? |
735 | if (!string1) |
736 | return (string2 && string2->length()) ? -1 : 0; |
737 | if (!string2) |
738 | return string1->length() ? 1 : 0; |
739 | |
740 | bool string1Is8Bit = string1->is8Bit(); |
741 | bool string2Is8Bit = string2->is8Bit(); |
742 | if (string1Is8Bit) { |
743 | if (string2Is8Bit) |
744 | return codePointCompare(string1->characters8(), string1->length(), string2->characters8(), string2->length()); |
745 | return codePointCompare(string1->characters8(), string1->length(), string2->characters16(), string2->length()); |
746 | } |
747 | if (string2Is8Bit) |
748 | return codePointCompare(string1->characters16(), string1->length(), string2->characters8(), string2->length()); |
749 | return codePointCompare(string1->characters16(), string1->length(), string2->characters16(), string2->length()); |
750 | } |
751 | |
752 | inline bool isSpaceOrNewline(UChar32 character) |
753 | { |
754 | // Use isASCIISpace() for all Latin-1 characters. This will include newlines, which aren't included in Unicode DirWS. |
755 | return character <= 0xFF ? isASCIISpace(character) : u_charDirection(character) == U_WHITE_SPACE_NEUTRAL; |
756 | } |
757 | |
758 | template<typename CharacterType> inline unsigned lengthOfNullTerminatedString(const CharacterType* string) |
759 | { |
760 | ASSERT(string); |
761 | size_t length = 0; |
762 | while (string[length]) |
763 | ++length; |
764 | |
765 | RELEASE_ASSERT(length < StringImpl::MaxLength); |
766 | return static_cast<unsigned>(length); |
767 | } |
768 | |
769 | inline StringImplShape::StringImplShape(unsigned refCount, unsigned length, const LChar* data8, unsigned hashAndFlags) |
770 | : m_refCount(refCount) |
771 | , m_length(length) |
772 | , m_data8(data8) |
773 | , m_hashAndFlags(hashAndFlags) |
774 | { |
775 | } |
776 | |
777 | inline StringImplShape::StringImplShape(unsigned refCount, unsigned length, const UChar* data16, unsigned hashAndFlags) |
778 | : m_refCount(refCount) |
779 | , m_length(length) |
780 | , m_data16(data16) |
781 | , m_hashAndFlags(hashAndFlags) |
782 | { |
783 | } |
784 | |
785 | template<unsigned characterCount> constexpr StringImplShape::StringImplShape(unsigned refCount, unsigned length, const char (&characters)[characterCount], unsigned hashAndFlags, ConstructWithConstExprTag) |
786 | : m_refCount(refCount) |
787 | , m_length(length) |
788 | , m_data8Char(characters) |
789 | , m_hashAndFlags(hashAndFlags) |
790 | { |
791 | } |
792 | |
793 | template<unsigned characterCount> constexpr StringImplShape::StringImplShape(unsigned refCount, unsigned length, const char16_t (&characters)[characterCount], unsigned hashAndFlags, ConstructWithConstExprTag) |
794 | : m_refCount(refCount) |
795 | , m_length(length) |
796 | , m_data16Char(characters) |
797 | , m_hashAndFlags(hashAndFlags) |
798 | { |
799 | } |
800 | |
801 | inline Ref<StringImpl> StringImpl::isolatedCopy() const |
802 | { |
803 | if (!requiresCopy()) { |
804 | if (is8Bit()) |
805 | return StringImpl::createWithoutCopying(m_data8, m_length); |
806 | return StringImpl::createWithoutCopying(m_data16, m_length); |
807 | } |
808 | |
809 | if (is8Bit()) |
810 | return create(m_data8, m_length); |
811 | return create(m_data16, m_length); |
812 | } |
813 | |
814 | inline bool StringImpl::isAllASCII() const |
815 | { |
816 | if (is8Bit()) |
817 | return charactersAreAllASCII(characters8(), length()); |
818 | return charactersAreAllASCII(characters16(), length()); |
819 | } |
820 | |
821 | inline bool StringImpl::isAllLatin1() const |
822 | { |
823 | if (is8Bit()) |
824 | return true; |
825 | auto* characters = characters16(); |
826 | UChar ored = 0; |
827 | for (size_t i = 0; i < length(); ++i) |
828 | ored |= characters[i]; |
829 | return !(ored & 0xFF00); |
830 | } |
831 | |
832 | template<bool isSpecialCharacter(UChar), typename CharacterType> inline bool isAllSpecialCharacters(const CharacterType* characters, size_t length) |
833 | { |
834 | for (size_t i = 0; i < length; ++i) { |
835 | if (!isSpecialCharacter(characters[i])) |
836 | return false; |
837 | } |
838 | return true; |
839 | } |
840 | |
841 | template<bool isSpecialCharacter(UChar)> inline bool StringImpl::isAllSpecialCharacters() const |
842 | { |
843 | if (is8Bit()) |
844 | return WTF::isAllSpecialCharacters<isSpecialCharacter>(characters8(), length()); |
845 | return WTF::isAllSpecialCharacters<isSpecialCharacter>(characters16(), length()); |
846 | } |
847 | |
848 | inline StringImpl::StringImpl(unsigned length, Force8Bit) |
849 | : StringImplShape(s_refCountIncrement, length, tailPointer<LChar>(), s_hashFlag8BitBuffer | StringNormal | BufferInternal) |
850 | { |
851 | ASSERT(m_data8); |
852 | ASSERT(m_length); |
853 | |
854 | STRING_STATS_ADD_8BIT_STRING(m_length); |
855 | } |
856 | |
857 | inline StringImpl::StringImpl(unsigned length) |
858 | : StringImplShape(s_refCountIncrement, length, tailPointer<UChar>(), StringNormal | BufferInternal) |
859 | { |
860 | ASSERT(m_data16); |
861 | ASSERT(m_length); |
862 | |
863 | STRING_STATS_ADD_16BIT_STRING(m_length); |
864 | } |
865 | |
866 | inline StringImpl::StringImpl(MallocPtr<LChar> characters, unsigned length) |
867 | : StringImplShape(s_refCountIncrement, length, characters.leakPtr(), s_hashFlag8BitBuffer | StringNormal | BufferOwned) |
868 | { |
869 | ASSERT(m_data8); |
870 | ASSERT(m_length); |
871 | |
872 | STRING_STATS_ADD_8BIT_STRING(m_length); |
873 | } |
874 | |
875 | inline StringImpl::StringImpl(const UChar* characters, unsigned length, ConstructWithoutCopyingTag) |
876 | : StringImplShape(s_refCountIncrement, length, characters, StringNormal | BufferInternal) |
877 | { |
878 | ASSERT(m_data16); |
879 | ASSERT(m_length); |
880 | |
881 | STRING_STATS_ADD_16BIT_STRING(m_length); |
882 | } |
883 | |
884 | inline StringImpl::StringImpl(const LChar* characters, unsigned length, ConstructWithoutCopyingTag) |
885 | : StringImplShape(s_refCountIncrement, length, characters, s_hashFlag8BitBuffer | StringNormal | BufferInternal) |
886 | { |
887 | ASSERT(m_data8); |
888 | ASSERT(m_length); |
889 | |
890 | STRING_STATS_ADD_8BIT_STRING(m_length); |
891 | } |
892 | |
893 | inline StringImpl::StringImpl(MallocPtr<UChar> characters, unsigned length) |
894 | : StringImplShape(s_refCountIncrement, length, characters.leakPtr(), StringNormal | BufferOwned) |
895 | { |
896 | ASSERT(m_data16); |
897 | ASSERT(m_length); |
898 | |
899 | STRING_STATS_ADD_16BIT_STRING(m_length); |
900 | } |
901 | |
902 | inline StringImpl::StringImpl(const LChar* characters, unsigned length, Ref<StringImpl>&& base) |
903 | : StringImplShape(s_refCountIncrement, length, characters, s_hashFlag8BitBuffer | StringNormal | BufferSubstring) |
904 | { |
905 | ASSERT(is8Bit()); |
906 | ASSERT(m_data8); |
907 | ASSERT(m_length); |
908 | ASSERT(base->bufferOwnership() != BufferSubstring); |
909 | |
910 | substringBuffer() = &base.leakRef(); |
911 | |
912 | STRING_STATS_ADD_8BIT_STRING2(m_length, true); |
913 | } |
914 | |
915 | inline StringImpl::StringImpl(const UChar* characters, unsigned length, Ref<StringImpl>&& base) |
916 | : StringImplShape(s_refCountIncrement, length, characters, StringNormal | BufferSubstring) |
917 | { |
918 | ASSERT(!is8Bit()); |
919 | ASSERT(m_data16); |
920 | ASSERT(m_length); |
921 | ASSERT(base->bufferOwnership() != BufferSubstring); |
922 | |
923 | substringBuffer() = &base.leakRef(); |
924 | |
925 | STRING_STATS_ADD_16BIT_STRING2(m_length, true); |
926 | } |
927 | |
928 | template<size_t inlineCapacity> inline Ref<StringImpl> StringImpl::create8BitIfPossible(const Vector<UChar, inlineCapacity>& vector) |
929 | { |
930 | return create8BitIfPossible(vector.data(), vector.size()); |
931 | } |
932 | |
933 | ALWAYS_INLINE Ref<StringImpl> StringImpl::createSubstringSharingImpl(StringImpl& rep, unsigned offset, unsigned length) |
934 | { |
935 | ASSERT(length <= rep.length()); |
936 | |
937 | if (!length) |
938 | return *empty(); |
939 | |
940 | // Coyping the thing would save more memory sometimes, largely due to the size of pointer. |
941 | size_t substringSize = allocationSize<StringImpl*>(1); |
942 | if (rep.is8Bit()) { |
943 | if (substringSize >= allocationSize<LChar>(length)) |
944 | return create(rep.m_data8 + offset, length); |
945 | } else { |
946 | if (substringSize >= allocationSize<UChar>(length)) |
947 | return create(rep.m_data16 + offset, length); |
948 | } |
949 | |
950 | auto* ownerRep = ((rep.bufferOwnership() == BufferSubstring) ? rep.substringBuffer() : &rep); |
951 | |
952 | // We allocate a buffer that contains both the StringImpl struct as well as the pointer to the owner string. |
953 | auto* stringImpl = static_cast<StringImpl*>(fastMalloc(substringSize)); |
954 | if (rep.is8Bit()) |
955 | return adoptRef(*new (NotNull, stringImpl) StringImpl(rep.m_data8 + offset, length, *ownerRep)); |
956 | return adoptRef(*new (NotNull, stringImpl) StringImpl(rep.m_data16 + offset, length, *ownerRep)); |
957 | } |
958 | |
959 | template<unsigned characterCount> ALWAYS_INLINE Ref<StringImpl> StringImpl::createFromLiteral(const char (&characters)[characterCount]) |
960 | { |
961 | COMPILE_ASSERT(characterCount > 1, StringImplFromLiteralNotEmpty); |
962 | COMPILE_ASSERT((characterCount - 1 <= ((unsigned(~0) - sizeof(StringImpl)) / sizeof(LChar))), StringImplFromLiteralCannotOverflow); |
963 | |
964 | return createWithoutCopying(reinterpret_cast<const LChar*>(characters), characterCount - 1); |
965 | } |
966 | |
967 | template<typename CharacterType> ALWAYS_INLINE RefPtr<StringImpl> StringImpl::tryCreateUninitialized(unsigned length, CharacterType*& output) |
968 | { |
969 | if (!length) { |
970 | output = nullptr; |
971 | return empty(); |
972 | } |
973 | |
974 | if (length > maxInternalLength<CharacterType>()) { |
975 | output = nullptr; |
976 | return nullptr; |
977 | } |
978 | StringImpl* result; |
979 | if (!tryFastMalloc(allocationSize<CharacterType>(length)).getValue(result)) { |
980 | output = nullptr; |
981 | return nullptr; |
982 | } |
983 | output = result->tailPointer<CharacterType>(); |
984 | |
985 | return constructInternal<CharacterType>(*result, length); |
986 | } |
987 | |
988 | template<typename CharacterType, size_t inlineCapacity, typename OverflowHandler, size_t minCapacity> |
989 | inline Ref<StringImpl> StringImpl::adopt(Vector<CharacterType, inlineCapacity, OverflowHandler, minCapacity>&& vector) |
990 | { |
991 | if (size_t size = vector.size()) { |
992 | ASSERT(vector.data()); |
993 | if (size > MaxLength) |
994 | CRASH(); |
995 | return adoptRef(*new StringImpl(vector.releaseBuffer(), size)); |
996 | } |
997 | return *empty(); |
998 | } |
999 | |
1000 | inline size_t StringImpl::cost() const |
1001 | { |
1002 | // For substrings, return the cost of the base string. |
1003 | if (bufferOwnership() == BufferSubstring) |
1004 | return substringBuffer()->cost(); |
1005 | |
1006 | // Note: we must not alter the m_hashAndFlags field in instances of StaticStringImpl. |
1007 | // We ensure this by pre-setting the s_hashFlagDidReportCost bit in all instances of |
1008 | // StaticStringImpl. As a result, StaticStringImpl instances will always return a cost of |
1009 | // 0 here and avoid modifying m_hashAndFlags. |
1010 | if (m_hashAndFlags & s_hashFlagDidReportCost) |
1011 | return 0; |
1012 | |
1013 | m_hashAndFlags |= s_hashFlagDidReportCost; |
1014 | size_t result = m_length; |
1015 | if (!is8Bit()) |
1016 | result <<= 1; |
1017 | return result; |
1018 | } |
1019 | |
1020 | inline size_t StringImpl::costDuringGC() |
1021 | { |
1022 | if (isStatic()) |
1023 | return 0; |
1024 | |
1025 | if (bufferOwnership() == BufferSubstring) |
1026 | return divideRoundedUp(substringBuffer()->costDuringGC(), refCount()); |
1027 | |
1028 | size_t result = m_length; |
1029 | if (!is8Bit()) |
1030 | result <<= 1; |
1031 | return divideRoundedUp(result, refCount()); |
1032 | } |
1033 | |
1034 | inline void StringImpl::setIsAtomic(bool isAtom) |
1035 | { |
1036 | ASSERT(!isStatic()); |
1037 | ASSERT(!isSymbol()); |
1038 | if (isAtom) |
1039 | m_hashAndFlags |= s_hashFlagStringKindIsAtomic; |
1040 | else |
1041 | m_hashAndFlags &= ~s_hashFlagStringKindIsAtomic; |
1042 | } |
1043 | |
1044 | inline void StringImpl::setHash(unsigned hash) const |
1045 | { |
1046 | // The high bits of 'hash' are always empty, but we prefer to store our flags |
1047 | // in the low bits because it makes them slightly more efficient to access. |
1048 | // So, we shift left and right when setting and getting our hash code. |
1049 | |
1050 | ASSERT(!hasHash()); |
1051 | ASSERT(!isStatic()); |
1052 | // Multiple clients assume that StringHasher is the canonical string hash function. |
1053 | ASSERT(hash == (is8Bit() ? StringHasher::computeHashAndMaskTop8Bits(m_data8, m_length) : StringHasher::computeHashAndMaskTop8Bits(m_data16, m_length))); |
1054 | ASSERT(!(hash & (s_flagMask << (8 * sizeof(hash) - s_flagCount)))); // Verify that enough high bits are empty. |
1055 | |
1056 | hash <<= s_flagCount; |
1057 | ASSERT(!(hash & m_hashAndFlags)); // Verify that enough low bits are empty after shift. |
1058 | ASSERT(hash); // Verify that 0 is a valid sentinel hash value. |
1059 | |
1060 | m_hashAndFlags |= hash; // Store hash with flags in low bits. |
1061 | } |
1062 | |
1063 | inline void StringImpl::ref() |
1064 | { |
1065 | STRING_STATS_REF_STRING(*this); |
1066 | |
1067 | m_refCount += s_refCountIncrement; |
1068 | } |
1069 | |
1070 | inline void StringImpl::deref() |
1071 | { |
1072 | STRING_STATS_DEREF_STRING(*this); |
1073 | |
1074 | unsigned tempRefCount = m_refCount - s_refCountIncrement; |
1075 | if (!tempRefCount) { |
1076 | StringImpl::destroy(this); |
1077 | return; |
1078 | } |
1079 | m_refCount = tempRefCount; |
1080 | } |
1081 | |
1082 | template<typename CharacterType> inline void StringImpl::copyCharacters(CharacterType* destination, const CharacterType* source, unsigned numCharacters) |
1083 | { |
1084 | if (numCharacters == 1) { |
1085 | *destination = *source; |
1086 | return; |
1087 | } |
1088 | memcpy(destination, source, numCharacters * sizeof(CharacterType)); |
1089 | } |
1090 | |
1091 | ALWAYS_INLINE void StringImpl::copyCharacters(UChar* destination, const LChar* source, unsigned numCharacters) |
1092 | { |
1093 | for (unsigned i = 0; i < numCharacters; ++i) |
1094 | destination[i] = source[i]; |
1095 | } |
1096 | |
1097 | inline UChar StringImpl::at(unsigned i) const |
1098 | { |
1099 | ASSERT_WITH_SECURITY_IMPLICATION(i < m_length); |
1100 | return is8Bit() ? m_data8[i] : m_data16[i]; |
1101 | } |
1102 | |
1103 | inline StringImpl::StringImpl(CreateSymbolTag, const LChar* characters, unsigned length) |
1104 | : StringImplShape(s_refCountIncrement, length, characters, s_hashFlag8BitBuffer | StringSymbol | BufferSubstring) |
1105 | { |
1106 | ASSERT(is8Bit()); |
1107 | ASSERT(m_data8); |
1108 | STRING_STATS_ADD_8BIT_STRING2(m_length, true); |
1109 | } |
1110 | |
1111 | inline StringImpl::StringImpl(CreateSymbolTag, const UChar* characters, unsigned length) |
1112 | : StringImplShape(s_refCountIncrement, length, characters, StringSymbol | BufferSubstring) |
1113 | { |
1114 | ASSERT(!is8Bit()); |
1115 | ASSERT(m_data16); |
1116 | STRING_STATS_ADD_16BIT_STRING2(m_length, true); |
1117 | } |
1118 | |
1119 | inline StringImpl::StringImpl(CreateSymbolTag) |
1120 | : StringImplShape(s_refCountIncrement, 0, empty()->characters8(), s_hashFlag8BitBuffer | StringSymbol | BufferSubstring) |
1121 | { |
1122 | ASSERT(is8Bit()); |
1123 | ASSERT(m_data8); |
1124 | STRING_STATS_ADD_8BIT_STRING2(m_length, true); |
1125 | } |
1126 | |
1127 | template<typename T> inline size_t StringImpl::allocationSize(Checked<size_t> tailElementCount) |
1128 | { |
1129 | return (tailOffset<T>() + tailElementCount * sizeof(T)).unsafeGet(); |
1130 | } |
1131 | |
1132 | template<typename CharacterType> |
1133 | inline size_t StringImpl::maxInternalLength() |
1134 | { |
1135 | // In order to not overflow the unsigned length, the check for (std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) is needed when sizeof(CharacterType) == 2. |
1136 | return std::min(static_cast<size_t>(MaxLength), (std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(CharacterType)); |
1137 | } |
1138 | |
1139 | template<typename T> inline size_t StringImpl::tailOffset() |
1140 | { |
1141 | #if COMPILER(MSVC) |
1142 | // MSVC doesn't support alignof yet. |
1143 | return roundUpToMultipleOf<sizeof(T)>(sizeof(StringImpl)); |
1144 | #else |
1145 | return roundUpToMultipleOf<alignof(T)>(offsetof(StringImpl, m_hashAndFlags) + sizeof(StringImpl::m_hashAndFlags)); |
1146 | #endif |
1147 | } |
1148 | |
1149 | inline bool StringImpl::requiresCopy() const |
1150 | { |
1151 | if (bufferOwnership() != BufferInternal) |
1152 | return true; |
1153 | |
1154 | if (is8Bit()) |
1155 | return m_data8 == tailPointer<LChar>(); |
1156 | return m_data16 == tailPointer<UChar>(); |
1157 | } |
1158 | |
1159 | template<typename T> inline const T* StringImpl::tailPointer() const |
1160 | { |
1161 | return reinterpret_cast_ptr<const T*>(reinterpret_cast<const uint8_t*>(this) + tailOffset<T>()); |
1162 | } |
1163 | |
1164 | template<typename T> inline T* StringImpl::tailPointer() |
1165 | { |
1166 | return reinterpret_cast_ptr<T*>(reinterpret_cast<uint8_t*>(this) + tailOffset<T>()); |
1167 | } |
1168 | |
1169 | inline StringImpl* const& StringImpl::substringBuffer() const |
1170 | { |
1171 | ASSERT(bufferOwnership() == BufferSubstring); |
1172 | |
1173 | return *tailPointer<StringImpl*>(); |
1174 | } |
1175 | |
1176 | inline StringImpl*& StringImpl::substringBuffer() |
1177 | { |
1178 | ASSERT(bufferOwnership() == BufferSubstring); |
1179 | |
1180 | return *tailPointer<StringImpl*>(); |
1181 | } |
1182 | |
1183 | inline void StringImpl::assertHashIsCorrect() const |
1184 | { |
1185 | ASSERT(existingHash() == StringHasher::computeHashAndMaskTop8Bits(characters8(), length())); |
1186 | } |
1187 | |
1188 | template<unsigned characterCount> constexpr StringImpl::StaticStringImpl::StaticStringImpl(const char (&characters)[characterCount], StringKind stringKind) |
1189 | : StringImplShape(s_refCountFlagIsStaticString, characterCount - 1, characters, |
1190 | s_hashFlag8BitBuffer | s_hashFlagDidReportCost | stringKind | BufferInternal | (StringHasher::computeLiteralHashAndMaskTop8Bits(characters) << s_flagCount), ConstructWithConstExpr) |
1191 | { |
1192 | } |
1193 | |
1194 | template<unsigned characterCount> constexpr StringImpl::StaticStringImpl::StaticStringImpl(const char16_t (&characters)[characterCount], StringKind stringKind) |
1195 | : StringImplShape(s_refCountFlagIsStaticString, characterCount - 1, characters, |
1196 | s_hashFlagDidReportCost | stringKind | BufferInternal | (StringHasher::computeLiteralHashAndMaskTop8Bits(characters) << s_flagCount), ConstructWithConstExpr) |
1197 | { |
1198 | } |
1199 | |
1200 | inline StringImpl::StaticStringImpl::operator StringImpl&() |
1201 | { |
1202 | return *reinterpret_cast<StringImpl*>(this); |
1203 | } |
1204 | |
1205 | inline bool equalIgnoringASCIICase(const StringImpl& a, const StringImpl& b) |
1206 | { |
1207 | return equalIgnoringASCIICaseCommon(a, b); |
1208 | } |
1209 | |
1210 | inline bool equalIgnoringASCIICase(const StringImpl& a, const char* b) |
1211 | { |
1212 | return equalIgnoringASCIICaseCommon(a, b); |
1213 | } |
1214 | |
1215 | inline bool equalIgnoringASCIICase(const StringImpl* a, const char* b) |
1216 | { |
1217 | return a && equalIgnoringASCIICase(*a, b); |
1218 | } |
1219 | |
1220 | template<unsigned length> inline bool startsWithLettersIgnoringASCIICase(const StringImpl& string, const char (&lowercaseLetters)[length]) |
1221 | { |
1222 | return startsWithLettersIgnoringASCIICaseCommon(string, lowercaseLetters); |
1223 | } |
1224 | |
1225 | template<unsigned length> inline bool startsWithLettersIgnoringASCIICase(const StringImpl* string, const char (&lowercaseLetters)[length]) |
1226 | { |
1227 | return string && startsWithLettersIgnoringASCIICase(*string, lowercaseLetters); |
1228 | } |
1229 | |
1230 | template<unsigned length> inline bool equalLettersIgnoringASCIICase(const StringImpl& string, const char (&lowercaseLetters)[length]) |
1231 | { |
1232 | return equalLettersIgnoringASCIICaseCommon(string, lowercaseLetters); |
1233 | } |
1234 | |
1235 | template<unsigned length> inline bool equalLettersIgnoringASCIICase(const StringImpl* string, const char (&lowercaseLetters)[length]) |
1236 | { |
1237 | return string && equalLettersIgnoringASCIICase(*string, lowercaseLetters); |
1238 | } |
1239 | |
1240 | } // namespace WTF |
1241 | |
1242 | using WTF::StaticStringImpl; |
1243 | using WTF::StringImpl; |
1244 | using WTF::equal; |
1245 | using WTF::isLatin1; |
1246 | |