1/*
2 * Copyright (C) 2004-2008, 2013-2014 Apple Inc. All rights reserved.
3 * Copyright (C) 2010 Patrick Gansterer <[email protected]>
4 * Copyright (C) 2012 Google Inc. All rights reserved.
5 * Copyright (C) 2015 Yusuke Suzuki<[email protected]>. All rights reserved.
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
16 *
17 * You should have received a copy of the GNU Library General Public License
18 * along with this library; see the file COPYING.LIB. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 * Boston, MA 02110-1301, USA.
21 *
22 */
23
24#include "config.h"
25#include <wtf/text/AtomStringImpl.h>
26
27#include <wtf/CommaPrinter.h>
28#include <wtf/DataLog.h>
29#include <wtf/HashSet.h>
30#include <wtf/StringPrintStream.h>
31#include <wtf/Threading.h>
32#include <wtf/text/AtomStringTable.h>
33#include <wtf/text/IntegerToStringConversion.h>
34#include <wtf/text/StringHash.h>
35#include <wtf/unicode/UTF8Conversion.h>
36
37#if USE(WEB_THREAD)
38#include <wtf/Lock.h>
39#endif
40
41namespace WTF {
42
43using namespace Unicode;
44
45#if USE(WEB_THREAD)
46
47class AtomStringTableLocker : public LockHolder {
48 WTF_MAKE_NONCOPYABLE(AtomStringTableLocker);
49
50 static Lock s_stringTableLock;
51public:
52 AtomStringTableLocker()
53 : LockHolder(&s_stringTableLock)
54 {
55 }
56};
57
58Lock AtomStringTableLocker::s_stringTableLock;
59
60#else
61
62class AtomStringTableLocker {
63 WTF_MAKE_NONCOPYABLE(AtomStringTableLocker);
64public:
65 AtomStringTableLocker() { }
66};
67
68#endif // USE(WEB_THREAD)
69
70using StringTableImpl = HashSet<StringImpl*>;
71
72static ALWAYS_INLINE StringTableImpl& stringTable()
73{
74 return Thread::current().atomStringTable()->table();
75}
76
77template<typename T, typename HashTranslator>
78static inline Ref<AtomStringImpl> addToStringTable(AtomStringTableLocker&, StringTableImpl& atomStringTable, const T& value)
79{
80 auto addResult = atomStringTable.add<HashTranslator>(value);
81
82 // If the string is newly-translated, then we need to adopt it.
83 // The boolean in the pair tells us if that is so.
84 if (addResult.isNewEntry)
85 return adoptRef(static_cast<AtomStringImpl&>(**addResult.iterator));
86 return *static_cast<AtomStringImpl*>(*addResult.iterator);
87}
88
89template<typename T, typename HashTranslator>
90static inline Ref<AtomStringImpl> addToStringTable(const T& value)
91{
92 AtomStringTableLocker locker;
93 return addToStringTable<T, HashTranslator>(locker, stringTable(), value);
94}
95
96struct CStringTranslator {
97 static unsigned hash(const LChar* characters)
98 {
99 return StringHasher::computeHashAndMaskTop8Bits(characters);
100 }
101
102 static inline bool equal(StringImpl* str, const LChar* characters)
103 {
104 return WTF::equal(str, characters);
105 }
106
107 static void translate(StringImpl*& location, const LChar* const& characters, unsigned hash)
108 {
109 location = &StringImpl::create(characters).leakRef();
110 location->setHash(hash);
111 location->setIsAtomic(true);
112 }
113};
114
115RefPtr<AtomStringImpl> AtomStringImpl::add(const LChar* characters)
116{
117 if (!characters)
118 return nullptr;
119 if (!*characters)
120 return static_cast<AtomStringImpl*>(StringImpl::empty());
121
122 return addToStringTable<const LChar*, CStringTranslator>(characters);
123}
124
125template<typename CharacterType>
126struct HashTranslatorCharBuffer {
127 const CharacterType* characters;
128 unsigned length;
129 unsigned hash;
130
131 HashTranslatorCharBuffer(const CharacterType* characters, unsigned length)
132 : characters(characters)
133 , length(length)
134 , hash(StringHasher::computeHashAndMaskTop8Bits(characters, length))
135 {
136 }
137
138 HashTranslatorCharBuffer(const CharacterType* characters, unsigned length, unsigned hash)
139 : characters(characters)
140 , length(length)
141 , hash(hash)
142 {
143 }
144};
145
146using UCharBuffer = HashTranslatorCharBuffer<UChar>;
147struct UCharBufferTranslator {
148 static unsigned hash(const UCharBuffer& buf)
149 {
150 return buf.hash;
151 }
152
153 static bool equal(StringImpl* const& str, const UCharBuffer& buf)
154 {
155 return WTF::equal(str, buf.characters, buf.length);
156 }
157
158 static void translate(StringImpl*& location, const UCharBuffer& buf, unsigned hash)
159 {
160 location = &StringImpl::create8BitIfPossible(buf.characters, buf.length).leakRef();
161 location->setHash(hash);
162 location->setIsAtomic(true);
163 }
164};
165
166struct HashAndUTF8Characters {
167 unsigned hash;
168 const char* characters;
169 unsigned length;
170 unsigned utf16Length;
171};
172
173struct HashAndUTF8CharactersTranslator {
174 static unsigned hash(const HashAndUTF8Characters& buffer)
175 {
176 return buffer.hash;
177 }
178
179 static bool equal(StringImpl* const& string, const HashAndUTF8Characters& buffer)
180 {
181 if (buffer.utf16Length != string->length())
182 return false;
183
184 // If buffer contains only ASCII characters UTF-8 and UTF16 length are the same.
185 if (buffer.utf16Length != buffer.length) {
186 if (string->is8Bit())
187 return equalLatin1WithUTF8(string->characters8(), buffer.characters, buffer.characters + buffer.length);
188
189 return equalUTF16WithUTF8(string->characters16(), buffer.characters, buffer.characters + buffer.length);
190 }
191
192 if (string->is8Bit()) {
193 const LChar* stringCharacters = string->characters8();
194
195 for (unsigned i = 0; i < buffer.length; ++i) {
196 ASSERT(isASCII(buffer.characters[i]));
197 if (stringCharacters[i] != buffer.characters[i])
198 return false;
199 }
200
201 return true;
202 }
203
204 const UChar* stringCharacters = string->characters16();
205
206 for (unsigned i = 0; i < buffer.length; ++i) {
207 ASSERT(isASCII(buffer.characters[i]));
208 if (stringCharacters[i] != buffer.characters[i])
209 return false;
210 }
211
212 return true;
213 }
214
215 static void translate(StringImpl*& location, const HashAndUTF8Characters& buffer, unsigned hash)
216 {
217 UChar* target;
218 auto newString = StringImpl::createUninitialized(buffer.utf16Length, target);
219
220 bool isAllASCII;
221 const char* source = buffer.characters;
222 if (!convertUTF8ToUTF16(source, source + buffer.length, &target, target + buffer.utf16Length, &isAllASCII))
223 ASSERT_NOT_REACHED();
224
225 if (isAllASCII)
226 newString = StringImpl::create(buffer.characters, buffer.length);
227
228 location = &newString.leakRef();
229 location->setHash(hash);
230 location->setIsAtomic(true);
231 }
232};
233
234RefPtr<AtomStringImpl> AtomStringImpl::add(const UChar* characters, unsigned length)
235{
236 if (!characters)
237 return nullptr;
238
239 if (!length)
240 return static_cast<AtomStringImpl*>(StringImpl::empty());
241
242 UCharBuffer buffer { characters, length };
243 return addToStringTable<UCharBuffer, UCharBufferTranslator>(buffer);
244}
245
246RefPtr<AtomStringImpl> AtomStringImpl::add(const UChar* characters)
247{
248 if (!characters)
249 return nullptr;
250
251 unsigned length = 0;
252 while (characters[length] != UChar(0))
253 ++length;
254
255 if (!length)
256 return static_cast<AtomStringImpl*>(StringImpl::empty());
257
258 UCharBuffer buffer { characters, length };
259 return addToStringTable<UCharBuffer, UCharBufferTranslator>(buffer);
260}
261
262struct SubstringLocation {
263 StringImpl* baseString;
264 unsigned start;
265 unsigned length;
266};
267
268struct SubstringTranslator {
269 static void translate(StringImpl*& location, const SubstringLocation& buffer, unsigned hash)
270 {
271 location = &StringImpl::createSubstringSharingImpl(*buffer.baseString, buffer.start, buffer.length).leakRef();
272 location->setHash(hash);
273 location->setIsAtomic(true);
274 }
275};
276
277struct SubstringTranslator8 : SubstringTranslator {
278 static unsigned hash(const SubstringLocation& buffer)
279 {
280 return StringHasher::computeHashAndMaskTop8Bits(buffer.baseString->characters8() + buffer.start, buffer.length);
281 }
282
283 static bool equal(StringImpl* const& string, const SubstringLocation& buffer)
284 {
285 return WTF::equal(string, buffer.baseString->characters8() + buffer.start, buffer.length);
286 }
287};
288
289struct SubstringTranslator16 : SubstringTranslator {
290 static unsigned hash(const SubstringLocation& buffer)
291 {
292 return StringHasher::computeHashAndMaskTop8Bits(buffer.baseString->characters16() + buffer.start, buffer.length);
293 }
294
295 static bool equal(StringImpl* const& string, const SubstringLocation& buffer)
296 {
297 return WTF::equal(string, buffer.baseString->characters16() + buffer.start, buffer.length);
298 }
299};
300
301RefPtr<AtomStringImpl> AtomStringImpl::add(StringImpl* baseString, unsigned start, unsigned length)
302{
303 if (!baseString)
304 return nullptr;
305
306 if (!length || start >= baseString->length())
307 return static_cast<AtomStringImpl*>(StringImpl::empty());
308
309 unsigned maxLength = baseString->length() - start;
310 if (length >= maxLength) {
311 if (!start)
312 return add(baseString);
313 length = maxLength;
314 }
315
316 SubstringLocation buffer = { baseString, start, length };
317 if (baseString->is8Bit())
318 return addToStringTable<SubstringLocation, SubstringTranslator8>(buffer);
319 return addToStringTable<SubstringLocation, SubstringTranslator16>(buffer);
320}
321
322using LCharBuffer = HashTranslatorCharBuffer<LChar>;
323struct LCharBufferTranslator {
324 static unsigned hash(const LCharBuffer& buf)
325 {
326 return buf.hash;
327 }
328
329 static bool equal(StringImpl* const& str, const LCharBuffer& buf)
330 {
331 return WTF::equal(str, buf.characters, buf.length);
332 }
333
334 static void translate(StringImpl*& location, const LCharBuffer& buf, unsigned hash)
335 {
336 location = &StringImpl::create(buf.characters, buf.length).leakRef();
337 location->setHash(hash);
338 location->setIsAtomic(true);
339 }
340};
341
342template<typename CharType>
343struct BufferFromStaticDataTranslator {
344 using Buffer = HashTranslatorCharBuffer<CharType>;
345 static unsigned hash(const Buffer& buf)
346 {
347 return buf.hash;
348 }
349
350 static bool equal(StringImpl* const& str, const Buffer& buf)
351 {
352 return WTF::equal(str, buf.characters, buf.length);
353 }
354
355 static void translate(StringImpl*& location, const Buffer& buf, unsigned hash)
356 {
357 location = &StringImpl::createWithoutCopying(buf.characters, buf.length).leakRef();
358 location->setHash(hash);
359 location->setIsAtomic(true);
360 }
361};
362
363RefPtr<AtomStringImpl> AtomStringImpl::add(const LChar* characters, unsigned length)
364{
365 if (!characters)
366 return nullptr;
367
368 if (!length)
369 return static_cast<AtomStringImpl*>(StringImpl::empty());
370
371 LCharBuffer buffer { characters, length };
372 return addToStringTable<LCharBuffer, LCharBufferTranslator>(buffer);
373}
374
375Ref<AtomStringImpl> AtomStringImpl::addLiteral(const char* characters, unsigned length)
376{
377 ASSERT(characters);
378 ASSERT(length);
379
380 LCharBuffer buffer { reinterpret_cast<const LChar*>(characters), length };
381 return addToStringTable<LCharBuffer, BufferFromStaticDataTranslator<LChar>>(buffer);
382}
383
384static Ref<AtomStringImpl> addSymbol(AtomStringTableLocker& locker, StringTableImpl& atomStringTable, StringImpl& base)
385{
386 ASSERT(base.length());
387 ASSERT(base.isSymbol());
388
389 SubstringLocation buffer = { &base, 0, base.length() };
390 if (base.is8Bit())
391 return addToStringTable<SubstringLocation, SubstringTranslator8>(locker, atomStringTable, buffer);
392 return addToStringTable<SubstringLocation, SubstringTranslator16>(locker, atomStringTable, buffer);
393}
394
395static inline Ref<AtomStringImpl> addSymbol(StringImpl& base)
396{
397 AtomStringTableLocker locker;
398 return addSymbol(locker, stringTable(), base);
399}
400
401static Ref<AtomStringImpl> addStatic(AtomStringTableLocker& locker, StringTableImpl& atomStringTable, const StringImpl& base)
402{
403 ASSERT(base.length());
404 ASSERT(base.isStatic());
405
406 if (base.is8Bit()) {
407 LCharBuffer buffer { base.characters8(), base.length(), base.hash() };
408 return addToStringTable<LCharBuffer, BufferFromStaticDataTranslator<LChar>>(locker, atomStringTable, buffer);
409 }
410 UCharBuffer buffer { base.characters16(), base.length(), base.hash() };
411 return addToStringTable<UCharBuffer, BufferFromStaticDataTranslator<UChar>>(locker, atomStringTable, buffer);
412}
413
414static inline Ref<AtomStringImpl> addStatic(const StringImpl& base)
415{
416 AtomStringTableLocker locker;
417 return addStatic(locker, stringTable(), base);
418}
419
420RefPtr<AtomStringImpl> AtomStringImpl::add(const StaticStringImpl* string)
421{
422 auto s = reinterpret_cast<const StringImpl*>(string);
423 ASSERT(s->isStatic());
424 return addStatic(*s);
425}
426
427Ref<AtomStringImpl> AtomStringImpl::addSlowCase(StringImpl& string)
428{
429 // This check is necessary for null symbols.
430 // Their length is zero, but they are not AtomStringImpl.
431 if (!string.length())
432 return *static_cast<AtomStringImpl*>(StringImpl::empty());
433
434 if (string.isStatic())
435 return addStatic(string);
436
437 if (string.isSymbol())
438 return addSymbol(string);
439
440 ASSERT_WITH_MESSAGE(!string.isAtom(), "AtomStringImpl should not hit the slow case if the string is already atomic.");
441
442 AtomStringTableLocker locker;
443 auto addResult = stringTable().add(&string);
444
445 if (addResult.isNewEntry) {
446 ASSERT(*addResult.iterator == &string);
447 string.setIsAtomic(true);
448 }
449
450 return *static_cast<AtomStringImpl*>(*addResult.iterator);
451}
452
453Ref<AtomStringImpl> AtomStringImpl::addSlowCase(AtomStringTable& stringTable, StringImpl& string)
454{
455 // This check is necessary for null symbols.
456 // Their length is zero, but they are not AtomStringImpl.
457 if (!string.length())
458 return *static_cast<AtomStringImpl*>(StringImpl::empty());
459
460 if (string.isStatic()) {
461 AtomStringTableLocker locker;
462 return addStatic(locker, stringTable.table(), string);
463 }
464
465 if (string.isSymbol()) {
466 AtomStringTableLocker locker;
467 return addSymbol(locker, stringTable.table(), string);
468 }
469
470 ASSERT_WITH_MESSAGE(!string.isAtom(), "AtomStringImpl should not hit the slow case if the string is already atomic.");
471
472 AtomStringTableLocker locker;
473 auto addResult = stringTable.table().add(&string);
474
475 if (addResult.isNewEntry) {
476 ASSERT(*addResult.iterator == &string);
477 string.setIsAtomic(true);
478 }
479
480 return *static_cast<AtomStringImpl*>(*addResult.iterator);
481}
482
483void AtomStringImpl::remove(AtomStringImpl* string)
484{
485 ASSERT(string->isAtom());
486 AtomStringTableLocker locker;
487 auto& atomStringTable = stringTable();
488 auto iterator = atomStringTable.find(string);
489 ASSERT_WITH_MESSAGE(iterator != atomStringTable.end(), "The string being removed is atomic in the string table of an other thread!");
490 ASSERT(string == *iterator);
491 atomStringTable.remove(iterator);
492}
493
494RefPtr<AtomStringImpl> AtomStringImpl::lookUpSlowCase(StringImpl& string)
495{
496 ASSERT_WITH_MESSAGE(!string.isAtom(), "AtomicStringImpls should return from the fast case.");
497
498 if (!string.length())
499 return static_cast<AtomStringImpl*>(StringImpl::empty());
500
501 AtomStringTableLocker locker;
502 auto& atomStringTable = stringTable();
503 auto iterator = atomStringTable.find(&string);
504 if (iterator != atomStringTable.end())
505 return static_cast<AtomStringImpl*>(*iterator);
506 return nullptr;
507}
508
509RefPtr<AtomStringImpl> AtomStringImpl::addUTF8(const char* charactersStart, const char* charactersEnd)
510{
511 HashAndUTF8Characters buffer;
512 buffer.characters = charactersStart;
513 buffer.hash = calculateStringHashAndLengthFromUTF8MaskingTop8Bits(charactersStart, charactersEnd, buffer.length, buffer.utf16Length);
514
515 if (!buffer.hash)
516 return nullptr;
517
518 return addToStringTable<HashAndUTF8Characters, HashAndUTF8CharactersTranslator>(buffer);
519}
520
521RefPtr<AtomStringImpl> AtomStringImpl::lookUp(const LChar* characters, unsigned length)
522{
523 AtomStringTableLocker locker;
524 auto& table = stringTable();
525
526 LCharBuffer buffer = { characters, length };
527 auto iterator = table.find<LCharBufferTranslator>(buffer);
528 if (iterator != table.end())
529 return static_cast<AtomStringImpl*>(*iterator);
530 return nullptr;
531}
532
533RefPtr<AtomStringImpl> AtomStringImpl::lookUp(const UChar* characters, unsigned length)
534{
535 AtomStringTableLocker locker;
536 auto& table = stringTable();
537
538 UCharBuffer buffer { characters, length };
539 auto iterator = table.find<UCharBufferTranslator>(buffer);
540 if (iterator != table.end())
541 return static_cast<AtomStringImpl*>(*iterator);
542 return nullptr;
543}
544
545#if !ASSERT_DISABLED
546bool AtomStringImpl::isInAtomStringTable(StringImpl* string)
547{
548 AtomStringTableLocker locker;
549 return stringTable().contains(string);
550}
551#endif
552
553} // namespace WTF
554