AtomStringImpl.cpp source code [webcore/Source/WTF/wtf/text/AtomStringImpl.cpp]

1	/*
2	* Copyright (C) 2004-2008, 2013-2014 Apple Inc. All rights reserved.
3	* Copyright (C) 2010 Patrick Gansterer <[email protected]>
4	* Copyright (C) 2012 Google Inc. All rights reserved.
5	* Copyright (C) 2015 Yusuke Suzuki<[email protected]>. All rights reserved.
6	*
7	* This library is free software; you can redistribute it and/or
8	* modify it under the terms of the GNU Library General Public
9	* License as published by the Free Software Foundation; either
10	* version 2 of the License, or (at your option) any later version.
11	*
12	* This library is distributed in the hope that it will be useful,
13	* but WITHOUT ANY WARRANTY; without even the implied warranty of
14	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15	* Library General Public License for more details.
16	*
17	* You should have received a copy of the GNU Library General Public License
18	* along with this library; see the file COPYING.LIB. If not, write to
19	* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20	* Boston, MA 02110-1301, USA.
21	*
22	*/
23
24	#include "config.h"
25	#include <wtf/text/AtomStringImpl.h>
26
27	#include <wtf/CommaPrinter.h>
28	#include <wtf/DataLog.h>
29	#include <wtf/HashSet.h>
30	#include <wtf/StringPrintStream.h>
31	#include <wtf/Threading.h>
32	#include <wtf/text/AtomStringTable.h>
33	#include <wtf/text/IntegerToStringConversion.h>
34	#include <wtf/text/StringHash.h>
35	#include <wtf/unicode/UTF8Conversion.h>
36
37	#if USE(WEB_THREAD)
38	#include <wtf/Lock.h>
39	#endif
40
41	namespace WTF {
42
43	using namespace Unicode;
44
45	#if USE(WEB_THREAD)
46
47	class AtomStringTableLocker : public LockHolder {
48	WTF_MAKE_NONCOPYABLE(AtomStringTableLocker);
49
50	static Lock s_stringTableLock;
51	public:
52	AtomStringTableLocker()
53	: LockHolder(&s_stringTableLock)
54	{
55	}
56	};
57
58	Lock AtomStringTableLocker::s_stringTableLock;
59
60	#else
61
62	class AtomStringTableLocker {
63	WTF_MAKE_NONCOPYABLE(AtomStringTableLocker);
64	public:
65	AtomStringTableLocker() { }
66	};
67
68	#endif // USE(WEB_THREAD)
69
70	using StringTableImpl = HashSet<StringImpl*>;
71
72	static ALWAYS_INLINE StringTableImpl& stringTable()
73	{
74	return Thread::current().atomStringTable()->table();
75	}
76
77	template<typename T, typename HashTranslator>
78	static inline Ref<AtomStringImpl> addToStringTable(AtomStringTableLocker&, StringTableImpl& atomStringTable, const T& value)
79	{
80	auto addResult = atomStringTable.add<HashTranslator>(value);
81
82	// If the string is newly-translated, then we need to adopt it.
83	// The boolean in the pair tells us if that is so.
84	if (addResult.isNewEntry)
85	return adoptRef(static_cast<AtomStringImpl&>(**addResult.iterator));
86	return *static_cast<AtomStringImpl>(addResult.iterator);
87	}
88
89	template<typename T, typename HashTranslator>
90	static inline Ref<AtomStringImpl> addToStringTable(const T& value)
91	{
92	AtomStringTableLocker locker;
93	return addToStringTable<T, HashTranslator>(locker, stringTable(), value);
94	}
95
96	struct CStringTranslator {
97	static unsigned hash(const LChar* characters)
98	{
99	return StringHasher::computeHashAndMaskTop8Bits(characters);
100	}
101
102	static inline bool equal(StringImpl* str, const LChar* characters)
103	{
104	return WTF::equal(str, characters);
105	}
106
107	static void translate(StringImpl& location, const* LChar* const& characters, unsigned hash)
108	{
109	location = &StringImpl::create(characters).leakRef();
110	location->setHash(hash);
111	location->setIsAtomic(true);
112	}
113	};
114
115	RefPtr<AtomStringImpl> AtomStringImpl::add(const LChar* characters)
116	{
117	if (!characters)
118	return nullptr;
119	if (!*characters)
120	return static_cast<AtomStringImpl*>(StringImpl::empty());
121
122	return addToStringTable<const LChar*, CStringTranslator>(characters);
123	}
124
125	template<typename CharacterType>
126	struct HashTranslatorCharBuffer {
127	const CharacterType* characters;
128	unsigned length;
129	unsigned hash;
130
131	HashTranslatorCharBuffer(const CharacterType* characters, unsigned length)
132	: characters(characters)
133	, length(length)
134	, hash(StringHasher::computeHashAndMaskTop8Bits(characters, length))
135	{
136	}
137
138	HashTranslatorCharBuffer(const CharacterType* characters, unsigned length, unsigned hash)
139	: characters(characters)
140	, length(length)
141	, hash(hash)
142	{
143	}
144	};
145
146	using UCharBuffer = HashTranslatorCharBuffer<UChar>;
147	struct UCharBufferTranslator {
148	static unsigned hash(const UCharBuffer& buf)
149	{
150	return buf.hash;
151	}
152
153	static bool equal(StringImpl* const& str, const UCharBuffer& buf)
154	{
155	return WTF::equal(str, buf.characters, buf.length);
156	}
157
158	static void translate(StringImpl& location, const* UCharBuffer& buf, unsigned hash)
159	{
160	location = &StringImpl::create8BitIfPossible(buf.characters, buf.length).leakRef();
161	location->setHash(hash);
162	location->setIsAtomic(true);
163	}
164	};
165
166	struct HashAndUTF8Characters {
167	unsigned hash;
168	const char* characters;
169	unsigned length;
170	unsigned utf16Length;
171	};
172
173	struct HashAndUTF8CharactersTranslator {
174	static unsigned hash(const HashAndUTF8Characters& buffer)
175	{
176	return buffer.hash;
177	}
178
179	static bool equal(StringImpl* const& string, const HashAndUTF8Characters& buffer)
180	{
181	if (buffer.utf16Length != string->length())
182	return false;
183
184	// If buffer contains only ASCII characters UTF-8 and UTF16 length are the same.
185	if (buffer.utf16Length != buffer.length) {
186	if (string->is8Bit())
187	return equalLatin1WithUTF8(string->characters8(), buffer.characters, buffer.characters + buffer.length);
188
189	return equalUTF16WithUTF8(string->characters16(), buffer.characters, buffer.characters + buffer.length);
190	}
191
192	if (string->is8Bit()) {
193	const LChar* stringCharacters = string->characters8();
194
195	for (unsigned i = `0`; i < buffer.length; ++i) {
196	ASSERT(isASCII(buffer.characters[i]));
197	if (stringCharacters[i] != buffer.characters[i])
198	return false;
199	}
200
201	return true;
202	}
203
204	const UChar* stringCharacters = string->characters16();
205
206	for (unsigned i = `0`; i < buffer.length; ++i) {
207	ASSERT(isASCII(buffer.characters[i]));
208	if (stringCharacters[i] != buffer.characters[i])
209	return false;
210	}
211
212	return true;
213	}
214
215	static void translate(StringImpl& location, const* HashAndUTF8Characters& buffer, unsigned hash)
216	{
217	UChar* target;
218	auto newString = StringImpl::createUninitialized(buffer.utf16Length, target);
219
220	bool isAllASCII;
221	const char* source = buffer.characters;
222	if (!convertUTF8ToUTF16(source, source + buffer.length, &target, target + buffer.utf16Length, &isAllASCII))
223	ASSERT_NOT_REACHED();
224
225	if (isAllASCII)
226	newString = StringImpl::create(buffer.characters, buffer.length);
227
228	location = &newString.leakRef();
229	location->setHash(hash);
230	location->setIsAtomic(true);
231	}
232	};
233
234	RefPtr<AtomStringImpl> AtomStringImpl::add(const UChar* characters, unsigned length)
235	{
236	if (!characters)
237	return nullptr;
238
239	if (!length)
240	return static_cast<AtomStringImpl*>(StringImpl::empty());
241
242	UCharBuffer buffer { characters, length };
243	return addToStringTable<UCharBuffer, UCharBufferTranslator>(buffer);
244	}
245
246	RefPtr<AtomStringImpl> AtomStringImpl::add(const UChar* characters)
247	{
248	if (!characters)
249	return nullptr;
250
251	unsigned length = `0`;
252	while (characters[length] != UChar(`0`))
253	++length;
254
255	if (!length)
256	return static_cast<AtomStringImpl*>(StringImpl::empty());
257
258	UCharBuffer buffer { characters, length };
259	return addToStringTable<UCharBuffer, UCharBufferTranslator>(buffer);
260	}
261
262	struct SubstringLocation {
263	StringImpl* baseString;
264	unsigned start;
265	unsigned length;
266	};
267
268	struct SubstringTranslator {
269	static void translate(StringImpl& location, const* SubstringLocation& buffer, unsigned hash)
270	{
271	location = &StringImpl::createSubstringSharingImpl(*buffer.baseString, buffer.start, buffer.length).leakRef();
272	location->setHash(hash);
273	location->setIsAtomic(true);
274	}
275	};
276
277	struct SubstringTranslator8 : SubstringTranslator {
278	static unsigned hash(const SubstringLocation& buffer)
279	{
280	return StringHasher::computeHashAndMaskTop8Bits(buffer.baseString->characters8() + buffer.start, buffer.length);
281	}
282
283	static bool equal(StringImpl* const& string, const SubstringLocation& buffer)
284	{
285	return WTF::equal(string, buffer.baseString->characters8() + buffer.start, buffer.length);
286	}
287	};
288
289	struct SubstringTranslator16 : SubstringTranslator {
290	static unsigned hash(const SubstringLocation& buffer)
291	{
292	return StringHasher::computeHashAndMaskTop8Bits(buffer.baseString->characters16() + buffer.start, buffer.length);
293	}
294
295	static bool equal(StringImpl* const& string, const SubstringLocation& buffer)
296	{
297	return WTF::equal(string, buffer.baseString->characters16() + buffer.start, buffer.length);
298	}
299	};
300
301	RefPtr<AtomStringImpl> AtomStringImpl::add(StringImpl* baseString, unsigned start, unsigned length)
302	{
303	if (!baseString)
304	return nullptr;
305
306	if (!length \|\| start >= baseString->length())
307	return static_cast<AtomStringImpl*>(StringImpl::empty());
308
309	unsigned maxLength = baseString->length() - start;
310	if (length >= maxLength) {
311	if (!start)
312	return add(baseString);
313	length = maxLength;
314	}
315
316	SubstringLocation buffer = { baseString, start, length };
317	if (baseString->is8Bit())
318	return addToStringTable<SubstringLocation, SubstringTranslator8>(buffer);
319	return addToStringTable<SubstringLocation, SubstringTranslator16>(buffer);
320	}
321
322	using LCharBuffer = HashTranslatorCharBuffer<LChar>;
323	struct LCharBufferTranslator {
324	static unsigned hash(const LCharBuffer& buf)
325	{
326	return buf.hash;
327	}
328
329	static bool equal(StringImpl* const& str, const LCharBuffer& buf)
330	{
331	return WTF::equal(str, buf.characters, buf.length);
332	}
333
334	static void translate(StringImpl& location, const* LCharBuffer& buf, unsigned hash)
335	{
336	location = &StringImpl::create(buf.characters, buf.length).leakRef();
337	location->setHash(hash);
338	location->setIsAtomic(true);
339	}
340	};
341
342	template<typename CharType>
343	struct BufferFromStaticDataTranslator {
344	using Buffer = HashTranslatorCharBuffer<CharType>;
345	static unsigned hash(const Buffer& buf)
346	{
347	return buf.hash;
348	}
349
350	static bool equal(StringImpl* const& str, const Buffer& buf)
351	{
352	return WTF::equal(str, buf.characters, buf.length);
353	}
354
355	static void translate(StringImpl& location, const* Buffer& buf, unsigned hash)
356	{
357	location = &StringImpl::createWithoutCopying(buf.characters, buf.length).leakRef();
358	location->setHash(hash);
359	location->setIsAtomic(true);
360	}
361	};
362
363	RefPtr<AtomStringImpl> AtomStringImpl::add(const LChar* characters, unsigned length)
364	{
365	if (!characters)
366	return nullptr;
367
368	if (!length)
369	return static_cast<AtomStringImpl*>(StringImpl::empty());
370
371	LCharBuffer buffer { characters, length };
372	return addToStringTable<LCharBuffer, LCharBufferTranslator>(buffer);
373	}
374
375	Ref<AtomStringImpl> AtomStringImpl::addLiteral(const char* characters, unsigned length)
376	{
377	ASSERT(characters);
378	ASSERT(length);
379
380	LCharBuffer buffer { reinterpret_cast<const LChar*>(characters), length };
381	return addToStringTable<LCharBuffer, BufferFromStaticDataTranslator<LChar>>(buffer);
382	}
383
384	static Ref<AtomStringImpl> addSymbol(AtomStringTableLocker& locker, StringTableImpl& atomStringTable, StringImpl& base)
385	{
386	ASSERT(base.length());
387	ASSERT(base.isSymbol());
388
389	SubstringLocation buffer = { &base, `0`, base.length() };
390	if (base.is8Bit())
391	return addToStringTable<SubstringLocation, SubstringTranslator8>(locker, atomStringTable, buffer);
392	return addToStringTable<SubstringLocation, SubstringTranslator16>(locker, atomStringTable, buffer);
393	}
394
395	static inline Ref<AtomStringImpl> addSymbol(StringImpl& base)
396	{
397	AtomStringTableLocker locker;
398	return addSymbol(locker, stringTable(), base);
399	}
400
401	static Ref<AtomStringImpl> addStatic(AtomStringTableLocker& locker, StringTableImpl& atomStringTable, const StringImpl& base)
402	{
403	ASSERT(base.length());
404	ASSERT(base.isStatic());
405
406	if (base.is8Bit()) {
407	LCharBuffer buffer { base.characters8(), base.length(), base.hash() };
408	return addToStringTable<LCharBuffer, BufferFromStaticDataTranslator<LChar>>(locker, atomStringTable, buffer);
409	}
410	UCharBuffer buffer { base.characters16(), base.length(), base.hash() };
411	return addToStringTable<UCharBuffer, BufferFromStaticDataTranslator<UChar>>(locker, atomStringTable, buffer);
412	}
413
414	static inline Ref<AtomStringImpl> addStatic(const StringImpl& base)
415	{
416	AtomStringTableLocker locker;
417	return addStatic(locker, stringTable(), base);
418	}
419
420	RefPtr<AtomStringImpl> AtomStringImpl::add(const StaticStringImpl* string)
421	{
422	auto s = reinterpret_cast<const StringImpl*>(string);
423	ASSERT(s->isStatic());
424	return addStatic(*s);
425	}
426
427	Ref<AtomStringImpl> AtomStringImpl::addSlowCase(StringImpl& string)
428	{
429	// This check is necessary for null symbols.
430	// Their length is zero, but they are not AtomStringImpl.
431	if (!string.length())
432	return *static_cast<AtomStringImpl*>(StringImpl::empty());
433
434	if (string.isStatic())
435	return addStatic(string);
436
437	if (string.isSymbol())
438	return addSymbol(string);
439
440	ASSERT_WITH_MESSAGE(!string.isAtom(), "AtomStringImpl should not hit the slow case if the string is already atomic.");
441
442	AtomStringTableLocker locker;
443	auto addResult = stringTable().add(&string);
444
445	if (addResult.isNewEntry) {
446	ASSERT(*addResult.iterator == &string);
447	string.setIsAtomic(true);
448	}
449
450	return *static_cast<AtomStringImpl>(addResult.iterator);
451	}
452
453	Ref<AtomStringImpl> AtomStringImpl::addSlowCase(AtomStringTable& stringTable, StringImpl& string)
454	{
455	// This check is necessary for null symbols.
456	// Their length is zero, but they are not AtomStringImpl.
457	if (!string.length())
458	return *static_cast<AtomStringImpl*>(StringImpl::empty());
459
460	if (string.isStatic()) {
461	AtomStringTableLocker locker;
462	return addStatic(locker, stringTable.table(), string);
463	}
464
465	if (string.isSymbol()) {
466	AtomStringTableLocker locker;
467	return addSymbol(locker, stringTable.table(), string);
468	}
469
470	ASSERT_WITH_MESSAGE(!string.isAtom(), "AtomStringImpl should not hit the slow case if the string is already atomic.");
471
472	AtomStringTableLocker locker;
473	auto addResult = stringTable.table().add(&string);
474
475	if (addResult.isNewEntry) {
476	ASSERT(*addResult.iterator == &string);
477	string.setIsAtomic(true);
478	}
479
480	return *static_cast<AtomStringImpl>(addResult.iterator);
481	}
482
483	void AtomStringImpl::remove(AtomStringImpl* string)
484	{
485	ASSERT(string->isAtom());
486	AtomStringTableLocker locker;
487	auto& atomStringTable = stringTable();
488	auto iterator = atomStringTable.find(string);
489	ASSERT_WITH_MESSAGE(iterator != atomStringTable.end(), "The string being removed is atomic in the string table of an other thread!");
490	ASSERT(string == *iterator);
491	atomStringTable.remove(iterator);
492	}
493
494	RefPtr<AtomStringImpl> AtomStringImpl::lookUpSlowCase(StringImpl& string)
495	{
496	ASSERT_WITH_MESSAGE(!string.isAtom(), "AtomicStringImpls should return from the fast case.");
497
498	if (!string.length())
499	return static_cast<AtomStringImpl*>(StringImpl::empty());
500
501	AtomStringTableLocker locker;
502	auto& atomStringTable = stringTable();
503	auto iterator = atomStringTable.find(&string);
504	if (iterator != atomStringTable.end())
505	return static_cast<AtomStringImpl>(iterator);
506	return nullptr;
507	}
508
509	RefPtr<AtomStringImpl> AtomStringImpl::addUTF8(const char* charactersStart, const char* charactersEnd)
510	{
511	HashAndUTF8Characters buffer;
512	buffer.characters = charactersStart;
513	buffer.hash = calculateStringHashAndLengthFromUTF8MaskingTop8Bits(charactersStart, charactersEnd, buffer.length, buffer.utf16Length);
514
515	if (!buffer.hash)
516	return nullptr;
517
518	return addToStringTable<HashAndUTF8Characters, HashAndUTF8CharactersTranslator>(buffer);
519	}
520
521	RefPtr<AtomStringImpl> AtomStringImpl::lookUp(const LChar* characters, unsigned length)
522	{
523	AtomStringTableLocker locker;
524	auto& table = stringTable();
525
526	LCharBuffer buffer = { characters, length };
527	auto iterator = table.find<LCharBufferTranslator>(buffer);
528	if (iterator != table.end())
529	return static_cast<AtomStringImpl>(iterator);
530	return nullptr;
531	}
532
533	RefPtr<AtomStringImpl> AtomStringImpl::lookUp(const UChar* characters, unsigned length)
534	{
535	AtomStringTableLocker locker;
536	auto& table = stringTable();
537
538	UCharBuffer buffer { characters, length };
539	auto iterator = table.find<UCharBufferTranslator>(buffer);
540	if (iterator != table.end())
541	return static_cast<AtomStringImpl>(iterator);
542	return nullptr;
543	}
544
545	#if !ASSERT_DISABLED
546	bool AtomStringImpl::isInAtomStringTable(StringImpl* string)
547	{
548	AtomStringTableLocker locker;
549	return stringTable().contains(string);
550	}
551	#endif
552
553	} // namespace WTF
554

Browse the source code of webcore/Source/WTF/wtf/text/AtomStringImpl.cpp