1 | /* |
2 | * Copyright (C) 2004-2008, 2013-2014 Apple Inc. All rights reserved. |
3 | * Copyright (C) 2010 Patrick Gansterer <[email protected]> |
4 | * Copyright (C) 2012 Google Inc. All rights reserved. |
5 | * Copyright (C) 2015 Yusuke Suzuki<[email protected]>. All rights reserved. |
6 | * |
7 | * This library is free software; you can redistribute it and/or |
8 | * modify it under the terms of the GNU Library General Public |
9 | * License as published by the Free Software Foundation; either |
10 | * version 2 of the License, or (at your option) any later version. |
11 | * |
12 | * This library is distributed in the hope that it will be useful, |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | * Library General Public License for more details. |
16 | * |
17 | * You should have received a copy of the GNU Library General Public License |
18 | * along with this library; see the file COPYING.LIB. If not, write to |
19 | * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
20 | * Boston, MA 02110-1301, USA. |
21 | * |
22 | */ |
23 | |
24 | #include "config.h" |
25 | #include <wtf/text/AtomStringImpl.h> |
26 | |
27 | #include <wtf/CommaPrinter.h> |
28 | #include <wtf/DataLog.h> |
29 | #include <wtf/HashSet.h> |
30 | #include <wtf/StringPrintStream.h> |
31 | #include <wtf/Threading.h> |
32 | #include <wtf/text/AtomStringTable.h> |
33 | #include <wtf/text/IntegerToStringConversion.h> |
34 | #include <wtf/text/StringHash.h> |
35 | #include <wtf/unicode/UTF8Conversion.h> |
36 | |
37 | #if USE(WEB_THREAD) |
38 | #include <wtf/Lock.h> |
39 | #endif |
40 | |
41 | namespace WTF { |
42 | |
43 | using namespace Unicode; |
44 | |
45 | #if USE(WEB_THREAD) |
46 | |
47 | class AtomStringTableLocker : public LockHolder { |
48 | WTF_MAKE_NONCOPYABLE(AtomStringTableLocker); |
49 | |
50 | static Lock s_stringTableLock; |
51 | public: |
52 | AtomStringTableLocker() |
53 | : LockHolder(&s_stringTableLock) |
54 | { |
55 | } |
56 | }; |
57 | |
58 | Lock AtomStringTableLocker::s_stringTableLock; |
59 | |
60 | #else |
61 | |
62 | class AtomStringTableLocker { |
63 | WTF_MAKE_NONCOPYABLE(AtomStringTableLocker); |
64 | public: |
65 | AtomStringTableLocker() { } |
66 | }; |
67 | |
68 | #endif // USE(WEB_THREAD) |
69 | |
70 | using StringTableImpl = HashSet<StringImpl*>; |
71 | |
72 | static ALWAYS_INLINE StringTableImpl& stringTable() |
73 | { |
74 | return Thread::current().atomStringTable()->table(); |
75 | } |
76 | |
77 | template<typename T, typename HashTranslator> |
78 | static inline Ref<AtomStringImpl> addToStringTable(AtomStringTableLocker&, StringTableImpl& atomStringTable, const T& value) |
79 | { |
80 | auto addResult = atomStringTable.add<HashTranslator>(value); |
81 | |
82 | // If the string is newly-translated, then we need to adopt it. |
83 | // The boolean in the pair tells us if that is so. |
84 | if (addResult.isNewEntry) |
85 | return adoptRef(static_cast<AtomStringImpl&>(**addResult.iterator)); |
86 | return *static_cast<AtomStringImpl*>(*addResult.iterator); |
87 | } |
88 | |
89 | template<typename T, typename HashTranslator> |
90 | static inline Ref<AtomStringImpl> addToStringTable(const T& value) |
91 | { |
92 | AtomStringTableLocker locker; |
93 | return addToStringTable<T, HashTranslator>(locker, stringTable(), value); |
94 | } |
95 | |
96 | struct CStringTranslator { |
97 | static unsigned hash(const LChar* characters) |
98 | { |
99 | return StringHasher::computeHashAndMaskTop8Bits(characters); |
100 | } |
101 | |
102 | static inline bool equal(StringImpl* str, const LChar* characters) |
103 | { |
104 | return WTF::equal(str, characters); |
105 | } |
106 | |
107 | static void translate(StringImpl*& location, const LChar* const& characters, unsigned hash) |
108 | { |
109 | location = &StringImpl::create(characters).leakRef(); |
110 | location->setHash(hash); |
111 | location->setIsAtom(true); |
112 | } |
113 | }; |
114 | |
115 | RefPtr<AtomStringImpl> AtomStringImpl::add(const LChar* characters) |
116 | { |
117 | if (!characters) |
118 | return nullptr; |
119 | if (!*characters) |
120 | return static_cast<AtomStringImpl*>(StringImpl::empty()); |
121 | |
122 | return addToStringTable<const LChar*, CStringTranslator>(characters); |
123 | } |
124 | |
125 | template<typename CharacterType> |
126 | struct HashTranslatorCharBuffer { |
127 | const CharacterType* characters; |
128 | unsigned length; |
129 | unsigned hash; |
130 | |
131 | HashTranslatorCharBuffer(const CharacterType* characters, unsigned length) |
132 | : characters(characters) |
133 | , length(length) |
134 | , hash(StringHasher::computeHashAndMaskTop8Bits(characters, length)) |
135 | { |
136 | } |
137 | |
138 | HashTranslatorCharBuffer(const CharacterType* characters, unsigned length, unsigned hash) |
139 | : characters(characters) |
140 | , length(length) |
141 | , hash(hash) |
142 | { |
143 | } |
144 | }; |
145 | |
146 | using UCharBuffer = HashTranslatorCharBuffer<UChar>; |
147 | struct UCharBufferTranslator { |
148 | static unsigned hash(const UCharBuffer& buf) |
149 | { |
150 | return buf.hash; |
151 | } |
152 | |
153 | static bool equal(StringImpl* const& str, const UCharBuffer& buf) |
154 | { |
155 | return WTF::equal(str, buf.characters, buf.length); |
156 | } |
157 | |
158 | static void translate(StringImpl*& location, const UCharBuffer& buf, unsigned hash) |
159 | { |
160 | location = &StringImpl::create8BitIfPossible(buf.characters, buf.length).leakRef(); |
161 | location->setHash(hash); |
162 | location->setIsAtom(true); |
163 | } |
164 | }; |
165 | |
166 | struct HashAndUTF8Characters { |
167 | unsigned hash; |
168 | const char* characters; |
169 | unsigned length; |
170 | unsigned utf16Length; |
171 | }; |
172 | |
173 | struct HashAndUTF8CharactersTranslator { |
174 | static unsigned hash(const HashAndUTF8Characters& buffer) |
175 | { |
176 | return buffer.hash; |
177 | } |
178 | |
179 | static bool equal(StringImpl* const& string, const HashAndUTF8Characters& buffer) |
180 | { |
181 | if (buffer.utf16Length != string->length()) |
182 | return false; |
183 | |
184 | // If buffer contains only ASCII characters UTF-8 and UTF16 length are the same. |
185 | if (buffer.utf16Length != buffer.length) { |
186 | if (string->is8Bit()) |
187 | return equalLatin1WithUTF8(string->characters8(), buffer.characters, buffer.characters + buffer.length); |
188 | |
189 | return equalUTF16WithUTF8(string->characters16(), buffer.characters, buffer.characters + buffer.length); |
190 | } |
191 | |
192 | if (string->is8Bit()) { |
193 | const LChar* stringCharacters = string->characters8(); |
194 | |
195 | for (unsigned i = 0; i < buffer.length; ++i) { |
196 | ASSERT(isASCII(buffer.characters[i])); |
197 | if (stringCharacters[i] != buffer.characters[i]) |
198 | return false; |
199 | } |
200 | |
201 | return true; |
202 | } |
203 | |
204 | const UChar* stringCharacters = string->characters16(); |
205 | |
206 | for (unsigned i = 0; i < buffer.length; ++i) { |
207 | ASSERT(isASCII(buffer.characters[i])); |
208 | if (stringCharacters[i] != buffer.characters[i]) |
209 | return false; |
210 | } |
211 | |
212 | return true; |
213 | } |
214 | |
215 | static void translate(StringImpl*& location, const HashAndUTF8Characters& buffer, unsigned hash) |
216 | { |
217 | UChar* target; |
218 | auto newString = StringImpl::createUninitialized(buffer.utf16Length, target); |
219 | |
220 | bool isAllASCII; |
221 | const char* source = buffer.characters; |
222 | if (!convertUTF8ToUTF16(source, source + buffer.length, &target, target + buffer.utf16Length, &isAllASCII)) |
223 | ASSERT_NOT_REACHED(); |
224 | |
225 | if (isAllASCII) |
226 | newString = StringImpl::create(buffer.characters, buffer.length); |
227 | |
228 | location = &newString.leakRef(); |
229 | location->setHash(hash); |
230 | location->setIsAtom(true); |
231 | } |
232 | }; |
233 | |
234 | RefPtr<AtomStringImpl> AtomStringImpl::add(const UChar* characters, unsigned length) |
235 | { |
236 | if (!characters) |
237 | return nullptr; |
238 | |
239 | if (!length) |
240 | return static_cast<AtomStringImpl*>(StringImpl::empty()); |
241 | |
242 | UCharBuffer buffer { characters, length }; |
243 | return addToStringTable<UCharBuffer, UCharBufferTranslator>(buffer); |
244 | } |
245 | |
246 | RefPtr<AtomStringImpl> AtomStringImpl::add(const UChar* characters) |
247 | { |
248 | if (!characters) |
249 | return nullptr; |
250 | |
251 | unsigned length = 0; |
252 | while (characters[length] != UChar(0)) |
253 | ++length; |
254 | |
255 | if (!length) |
256 | return static_cast<AtomStringImpl*>(StringImpl::empty()); |
257 | |
258 | UCharBuffer buffer { characters, length }; |
259 | return addToStringTable<UCharBuffer, UCharBufferTranslator>(buffer); |
260 | } |
261 | |
262 | struct SubstringLocation { |
263 | StringImpl* baseString; |
264 | unsigned start; |
265 | unsigned length; |
266 | }; |
267 | |
268 | struct SubstringTranslator { |
269 | static void translate(StringImpl*& location, const SubstringLocation& buffer, unsigned hash) |
270 | { |
271 | location = &StringImpl::createSubstringSharingImpl(*buffer.baseString, buffer.start, buffer.length).leakRef(); |
272 | location->setHash(hash); |
273 | location->setIsAtom(true); |
274 | } |
275 | }; |
276 | |
277 | struct SubstringTranslator8 : SubstringTranslator { |
278 | static unsigned hash(const SubstringLocation& buffer) |
279 | { |
280 | return StringHasher::computeHashAndMaskTop8Bits(buffer.baseString->characters8() + buffer.start, buffer.length); |
281 | } |
282 | |
283 | static bool equal(StringImpl* const& string, const SubstringLocation& buffer) |
284 | { |
285 | return WTF::equal(string, buffer.baseString->characters8() + buffer.start, buffer.length); |
286 | } |
287 | }; |
288 | |
289 | struct SubstringTranslator16 : SubstringTranslator { |
290 | static unsigned hash(const SubstringLocation& buffer) |
291 | { |
292 | return StringHasher::computeHashAndMaskTop8Bits(buffer.baseString->characters16() + buffer.start, buffer.length); |
293 | } |
294 | |
295 | static bool equal(StringImpl* const& string, const SubstringLocation& buffer) |
296 | { |
297 | return WTF::equal(string, buffer.baseString->characters16() + buffer.start, buffer.length); |
298 | } |
299 | }; |
300 | |
301 | RefPtr<AtomStringImpl> AtomStringImpl::add(StringImpl* baseString, unsigned start, unsigned length) |
302 | { |
303 | if (!baseString) |
304 | return nullptr; |
305 | |
306 | if (!length || start >= baseString->length()) |
307 | return static_cast<AtomStringImpl*>(StringImpl::empty()); |
308 | |
309 | unsigned maxLength = baseString->length() - start; |
310 | if (length >= maxLength) { |
311 | if (!start) |
312 | return add(baseString); |
313 | length = maxLength; |
314 | } |
315 | |
316 | SubstringLocation buffer = { baseString, start, length }; |
317 | if (baseString->is8Bit()) |
318 | return addToStringTable<SubstringLocation, SubstringTranslator8>(buffer); |
319 | return addToStringTable<SubstringLocation, SubstringTranslator16>(buffer); |
320 | } |
321 | |
322 | using LCharBuffer = HashTranslatorCharBuffer<LChar>; |
323 | struct LCharBufferTranslator { |
324 | static unsigned hash(const LCharBuffer& buf) |
325 | { |
326 | return buf.hash; |
327 | } |
328 | |
329 | static bool equal(StringImpl* const& str, const LCharBuffer& buf) |
330 | { |
331 | return WTF::equal(str, buf.characters, buf.length); |
332 | } |
333 | |
334 | static void translate(StringImpl*& location, const LCharBuffer& buf, unsigned hash) |
335 | { |
336 | location = &StringImpl::create(buf.characters, buf.length).leakRef(); |
337 | location->setHash(hash); |
338 | location->setIsAtom(true); |
339 | } |
340 | }; |
341 | |
342 | template<typename CharType> |
343 | struct BufferFromStaticDataTranslator { |
344 | using Buffer = HashTranslatorCharBuffer<CharType>; |
345 | static unsigned hash(const Buffer& buf) |
346 | { |
347 | return buf.hash; |
348 | } |
349 | |
350 | static bool equal(StringImpl* const& str, const Buffer& buf) |
351 | { |
352 | return WTF::equal(str, buf.characters, buf.length); |
353 | } |
354 | |
355 | static void translate(StringImpl*& location, const Buffer& buf, unsigned hash) |
356 | { |
357 | location = &StringImpl::createWithoutCopying(buf.characters, buf.length).leakRef(); |
358 | location->setHash(hash); |
359 | location->setIsAtom(true); |
360 | } |
361 | }; |
362 | |
363 | RefPtr<AtomStringImpl> AtomStringImpl::add(const LChar* characters, unsigned length) |
364 | { |
365 | if (!characters) |
366 | return nullptr; |
367 | |
368 | if (!length) |
369 | return static_cast<AtomStringImpl*>(StringImpl::empty()); |
370 | |
371 | LCharBuffer buffer { characters, length }; |
372 | return addToStringTable<LCharBuffer, LCharBufferTranslator>(buffer); |
373 | } |
374 | |
375 | Ref<AtomStringImpl> AtomStringImpl::addLiteral(const char* characters, unsigned length) |
376 | { |
377 | ASSERT(characters); |
378 | ASSERT(length); |
379 | |
380 | LCharBuffer buffer { reinterpret_cast<const LChar*>(characters), length }; |
381 | return addToStringTable<LCharBuffer, BufferFromStaticDataTranslator<LChar>>(buffer); |
382 | } |
383 | |
384 | static Ref<AtomStringImpl> addSymbol(AtomStringTableLocker& locker, StringTableImpl& atomStringTable, StringImpl& base) |
385 | { |
386 | ASSERT(base.length()); |
387 | ASSERT(base.isSymbol()); |
388 | |
389 | SubstringLocation buffer = { &base, 0, base.length() }; |
390 | if (base.is8Bit()) |
391 | return addToStringTable<SubstringLocation, SubstringTranslator8>(locker, atomStringTable, buffer); |
392 | return addToStringTable<SubstringLocation, SubstringTranslator16>(locker, atomStringTable, buffer); |
393 | } |
394 | |
395 | static inline Ref<AtomStringImpl> addSymbol(StringImpl& base) |
396 | { |
397 | AtomStringTableLocker locker; |
398 | return addSymbol(locker, stringTable(), base); |
399 | } |
400 | |
401 | static Ref<AtomStringImpl> addStatic(AtomStringTableLocker& locker, StringTableImpl& atomStringTable, const StringImpl& base) |
402 | { |
403 | ASSERT(base.length()); |
404 | ASSERT(base.isStatic()); |
405 | |
406 | if (base.is8Bit()) { |
407 | LCharBuffer buffer { base.characters8(), base.length(), base.hash() }; |
408 | return addToStringTable<LCharBuffer, BufferFromStaticDataTranslator<LChar>>(locker, atomStringTable, buffer); |
409 | } |
410 | UCharBuffer buffer { base.characters16(), base.length(), base.hash() }; |
411 | return addToStringTable<UCharBuffer, BufferFromStaticDataTranslator<UChar>>(locker, atomStringTable, buffer); |
412 | } |
413 | |
414 | static inline Ref<AtomStringImpl> addStatic(const StringImpl& base) |
415 | { |
416 | AtomStringTableLocker locker; |
417 | return addStatic(locker, stringTable(), base); |
418 | } |
419 | |
420 | RefPtr<AtomStringImpl> AtomStringImpl::add(const StaticStringImpl* string) |
421 | { |
422 | auto s = reinterpret_cast<const StringImpl*>(string); |
423 | ASSERT(s->isStatic()); |
424 | return addStatic(*s); |
425 | } |
426 | |
427 | Ref<AtomStringImpl> AtomStringImpl::addSlowCase(StringImpl& string) |
428 | { |
429 | // This check is necessary for null symbols. |
430 | // Their length is zero, but they are not AtomStringImpl. |
431 | if (!string.length()) |
432 | return *static_cast<AtomStringImpl*>(StringImpl::empty()); |
433 | |
434 | if (string.isStatic()) |
435 | return addStatic(string); |
436 | |
437 | if (string.isSymbol()) |
438 | return addSymbol(string); |
439 | |
440 | ASSERT_WITH_MESSAGE(!string.isAtom(), "AtomStringImpl should not hit the slow case if the string is already an atom." ); |
441 | |
442 | AtomStringTableLocker locker; |
443 | auto addResult = stringTable().add(&string); |
444 | |
445 | if (addResult.isNewEntry) { |
446 | ASSERT(*addResult.iterator == &string); |
447 | string.setIsAtom(true); |
448 | } |
449 | |
450 | return *static_cast<AtomStringImpl*>(*addResult.iterator); |
451 | } |
452 | |
453 | Ref<AtomStringImpl> AtomStringImpl::addSlowCase(AtomStringTable& stringTable, StringImpl& string) |
454 | { |
455 | // This check is necessary for null symbols. |
456 | // Their length is zero, but they are not AtomStringImpl. |
457 | if (!string.length()) |
458 | return *static_cast<AtomStringImpl*>(StringImpl::empty()); |
459 | |
460 | if (string.isStatic()) { |
461 | AtomStringTableLocker locker; |
462 | return addStatic(locker, stringTable.table(), string); |
463 | } |
464 | |
465 | if (string.isSymbol()) { |
466 | AtomStringTableLocker locker; |
467 | return addSymbol(locker, stringTable.table(), string); |
468 | } |
469 | |
470 | ASSERT_WITH_MESSAGE(!string.isAtom(), "AtomStringImpl should not hit the slow case if the string is already an atom." ); |
471 | |
472 | AtomStringTableLocker locker; |
473 | auto addResult = stringTable.table().add(&string); |
474 | |
475 | if (addResult.isNewEntry) { |
476 | ASSERT(*addResult.iterator == &string); |
477 | string.setIsAtom(true); |
478 | } |
479 | |
480 | return *static_cast<AtomStringImpl*>(*addResult.iterator); |
481 | } |
482 | |
483 | void AtomStringImpl::remove(AtomStringImpl* string) |
484 | { |
485 | ASSERT(string->isAtom()); |
486 | AtomStringTableLocker locker; |
487 | auto& atomStringTable = stringTable(); |
488 | auto iterator = atomStringTable.find(string); |
489 | ASSERT_WITH_MESSAGE(iterator != atomStringTable.end(), "The string being removed is an atom in the string table of an other thread!" ); |
490 | ASSERT(string == *iterator); |
491 | atomStringTable.remove(iterator); |
492 | } |
493 | |
494 | RefPtr<AtomStringImpl> AtomStringImpl::lookUpSlowCase(StringImpl& string) |
495 | { |
496 | ASSERT_WITH_MESSAGE(!string.isAtom(), "AtomStringImpl objects should return from the fast case." ); |
497 | |
498 | if (!string.length()) |
499 | return static_cast<AtomStringImpl*>(StringImpl::empty()); |
500 | |
501 | AtomStringTableLocker locker; |
502 | auto& atomStringTable = stringTable(); |
503 | auto iterator = atomStringTable.find(&string); |
504 | if (iterator != atomStringTable.end()) |
505 | return static_cast<AtomStringImpl*>(*iterator); |
506 | return nullptr; |
507 | } |
508 | |
509 | RefPtr<AtomStringImpl> AtomStringImpl::addUTF8(const char* , const char* charactersEnd) |
510 | { |
511 | HashAndUTF8Characters buffer; |
512 | buffer.characters = charactersStart; |
513 | buffer.hash = calculateStringHashAndLengthFromUTF8MaskingTop8Bits(charactersStart, charactersEnd, buffer.length, buffer.utf16Length); |
514 | |
515 | if (!buffer.hash) |
516 | return nullptr; |
517 | |
518 | return addToStringTable<HashAndUTF8Characters, HashAndUTF8CharactersTranslator>(buffer); |
519 | } |
520 | |
521 | RefPtr<AtomStringImpl> AtomStringImpl::lookUp(const LChar* characters, unsigned length) |
522 | { |
523 | AtomStringTableLocker locker; |
524 | auto& table = stringTable(); |
525 | |
526 | LCharBuffer buffer = { characters, length }; |
527 | auto iterator = table.find<LCharBufferTranslator>(buffer); |
528 | if (iterator != table.end()) |
529 | return static_cast<AtomStringImpl*>(*iterator); |
530 | return nullptr; |
531 | } |
532 | |
533 | RefPtr<AtomStringImpl> AtomStringImpl::lookUp(const UChar* characters, unsigned length) |
534 | { |
535 | AtomStringTableLocker locker; |
536 | auto& table = stringTable(); |
537 | |
538 | UCharBuffer buffer { characters, length }; |
539 | auto iterator = table.find<UCharBufferTranslator>(buffer); |
540 | if (iterator != table.end()) |
541 | return static_cast<AtomStringImpl*>(*iterator); |
542 | return nullptr; |
543 | } |
544 | |
545 | #if !ASSERT_DISABLED |
546 | bool AtomStringImpl::isInAtomStringTable(StringImpl* string) |
547 | { |
548 | AtomStringTableLocker locker; |
549 | return stringTable().contains(string); |
550 | } |
551 | #endif |
552 | |
553 | } // namespace WTF |
554 | |