1/*
2 * Copyright (C) 2005-2017 Apple Inc. All rights reserved.
3 * Copyright (C) 2010 Patrick Gansterer <[email protected]>
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
14 *
15 * You should have received a copy of the GNU Library General Public License
16 * along with this library; see the file COPYING.LIB. If not, write to
17 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18 * Boston, MA 02110-1301, USA.
19 *
20 */
21
22#pragma once
23
24#include <unicode/utypes.h>
25#include <wtf/text/LChar.h>
26
27namespace WTF {
28
29// Paul Hsieh's SuperFastHash
30// http://www.azillionmonkeys.com/qed/hash.html
31
32// LChar data is interpreted as Latin-1-encoded (zero-extended to 16 bits).
33
34// NOTE: The hash computation here must stay in sync with the create_hash_table script in
35// JavaScriptCore and the CodeGeneratorJS.pm script in WebCore.
36
37// Golden ratio. Arbitrary start value to avoid mapping all zeros to a hash value of zero.
38static constexpr const unsigned stringHashingStartValue = 0x9E3779B9U;
39
40class StringHasher {
41 WTF_MAKE_FAST_ALLOCATED;
42public:
43 static constexpr const unsigned flagCount = 8; // Save 8 bits for StringImpl to use as flags.
44 static constexpr const unsigned maskHash = (1U << (sizeof(unsigned) * 8 - flagCount)) - 1;
45
46 struct DefaultConverter {
47 template<typename CharType>
48 static constexpr UChar convert(CharType character)
49 {
50 return static_cast<std::make_unsigned_t<CharType>>((character));
51 }
52 };
53
54 StringHasher() = default;
55
56 // The hasher hashes two characters at a time, and thus an "aligned" hasher is one
57 // where an even number of characters have been added. Callers that always add
58 // characters two at a time can use the "assuming aligned" functions.
59 void addCharactersAssumingAligned(UChar a, UChar b)
60 {
61 ASSERT(!m_hasPendingCharacter);
62 m_hash = calculateWithTwoCharacters(m_hash, a, b);
63 }
64
65 void addCharacter(UChar character)
66 {
67 if (m_hasPendingCharacter) {
68 m_hasPendingCharacter = false;
69 addCharactersAssumingAligned(m_pendingCharacter, character);
70 return;
71 }
72
73 m_pendingCharacter = character;
74 m_hasPendingCharacter = true;
75 }
76
77 void addCharacters(UChar a, UChar b)
78 {
79 if (m_hasPendingCharacter) {
80#if !ASSERT_DISABLED
81 m_hasPendingCharacter = false;
82#endif
83 addCharactersAssumingAligned(m_pendingCharacter, a);
84 m_pendingCharacter = b;
85#if !ASSERT_DISABLED
86 m_hasPendingCharacter = true;
87#endif
88 return;
89 }
90
91 addCharactersAssumingAligned(a, b);
92 }
93
94 template<typename T, typename Converter> void addCharactersAssumingAligned(const T* data, unsigned length)
95 {
96 ASSERT(!m_hasPendingCharacter);
97
98 bool remainder = length & 1;
99 length >>= 1;
100
101 while (length--) {
102 addCharactersAssumingAligned(Converter::convert(data[0]), Converter::convert(data[1]));
103 data += 2;
104 }
105
106 if (remainder)
107 addCharacter(Converter::convert(*data));
108 }
109
110 template<typename T> void addCharactersAssumingAligned(const T* data, unsigned length)
111 {
112 addCharactersAssumingAligned<T, DefaultConverter>(data, length);
113 }
114
115 template<typename T, typename Converter> void addCharactersAssumingAligned(const T* data)
116 {
117 ASSERT(!m_hasPendingCharacter);
118
119 while (T a = *data++) {
120 T b = *data++;
121 if (!b) {
122 addCharacter(Converter::convert(a));
123 break;
124 }
125 addCharactersAssumingAligned(Converter::convert(a), Converter::convert(b));
126 }
127 }
128
129 template<typename T> void addCharactersAssumingAligned(const T* data)
130 {
131 addCharactersAssumingAligned<T, DefaultConverter>(data);
132 }
133
134 template<typename T, typename Converter> void addCharacters(const T* data, unsigned length)
135 {
136 if (!length)
137 return;
138 if (m_hasPendingCharacter) {
139 m_hasPendingCharacter = false;
140 addCharactersAssumingAligned(m_pendingCharacter, Converter::convert(*data++));
141 --length;
142 }
143 addCharactersAssumingAligned<T, Converter>(data, length);
144 }
145
146 template<typename T> void addCharacters(const T* data, unsigned length)
147 {
148 addCharacters<T, DefaultConverter>(data, length);
149 }
150
151 template<typename T, typename Converter> void addCharacters(const T* data)
152 {
153 if (m_hasPendingCharacter && *data) {
154 m_hasPendingCharacter = false;
155 addCharactersAssumingAligned(m_pendingCharacter, Converter::convert(*data++));
156 }
157 addCharactersAssumingAligned<T, Converter>(data);
158 }
159
160 template<typename T> void addCharacters(const T* data)
161 {
162 addCharacters<T, DefaultConverter>(data);
163 }
164
165 unsigned hashWithTop8BitsMasked() const
166 {
167 return finalizeAndMaskTop8Bits(processPendingCharacter());
168 }
169
170 unsigned hash() const
171 {
172 return finalize(processPendingCharacter());
173 }
174
175 template<typename T, typename Converter> static constexpr unsigned computeHashAndMaskTop8Bits(const T* data, unsigned length)
176 {
177 return finalizeAndMaskTop8Bits(computeHashImpl<T, Converter>(data, length));
178 }
179
180 template<typename T, typename Converter> static constexpr unsigned computeHashAndMaskTop8Bits(const T* data)
181 {
182 return finalizeAndMaskTop8Bits(computeHashImpl<T, Converter>(data));
183 }
184
185 template<typename T> static constexpr unsigned computeHashAndMaskTop8Bits(const T* data, unsigned length)
186 {
187 return computeHashAndMaskTop8Bits<T, DefaultConverter>(data, length);
188 }
189
190 template<typename T> static constexpr unsigned computeHashAndMaskTop8Bits(const T* data)
191 {
192 return computeHashAndMaskTop8Bits<T, DefaultConverter>(data);
193 }
194
195 template<typename T, typename Converter> static constexpr unsigned computeHash(const T* data, unsigned length)
196 {
197 return finalize(computeHashImpl<T, Converter>(data, length));
198 }
199
200 template<typename T, typename Converter> static constexpr unsigned computeHash(const T* data)
201 {
202 return finalize(computeHashImpl<T, Converter>(data));
203 }
204
205 template<typename T> static constexpr unsigned computeHash(const T* data, unsigned length)
206 {
207 return computeHash<T, DefaultConverter>(data, length);
208 }
209
210 template<typename T> static constexpr unsigned computeHash(const T* data)
211 {
212 return computeHash<T, DefaultConverter>(data);
213 }
214
215
216 template<typename T, unsigned charactersCount>
217 static constexpr unsigned computeLiteralHash(const T (&characters)[charactersCount])
218 {
219 return computeHash<T, DefaultConverter>(characters, charactersCount - 1);
220 }
221
222 template<typename T, unsigned charactersCount>
223 static constexpr unsigned computeLiteralHashAndMaskTop8Bits(const T (&characters)[charactersCount])
224 {
225 return computeHashAndMaskTop8Bits<T, DefaultConverter>(characters, charactersCount - 1);
226 }
227
228 static unsigned hashMemory(const void* data, unsigned length)
229 {
230 size_t lengthInUChar = length / sizeof(UChar);
231 StringHasher hasher;
232 hasher.addCharactersAssumingAligned(static_cast<const UChar*>(data), lengthInUChar);
233
234 for (size_t i = 0; i < length % sizeof(UChar); ++i)
235 hasher.addCharacter(static_cast<const char*>(data)[lengthInUChar * sizeof(UChar) + i]);
236
237 return hasher.hash();
238 }
239
240 template<size_t length> static unsigned hashMemory(const void* data)
241 {
242 return hashMemory(data, length);
243 }
244
245private:
246 ALWAYS_INLINE static constexpr unsigned avalancheBits(unsigned hash)
247 {
248 unsigned result = hash;
249
250 result ^= result << 3;
251 result += result >> 5;
252 result ^= result << 2;
253 result += result >> 15;
254 result ^= result << 10;
255
256 return result;
257 }
258
259 static constexpr unsigned finalize(unsigned hash)
260 {
261 return avoidZero(avalancheBits(hash));
262 }
263
264 static constexpr unsigned finalizeAndMaskTop8Bits(unsigned hash)
265 {
266 // Reserving space from the high bits for flags preserves most of the hash's
267 // value, since hash lookup typically masks out the high bits anyway.
268 return avoidZero(avalancheBits(hash) & StringHasher::maskHash);
269 }
270
271 // This avoids ever returning a hash code of 0, since that is used to
272 // signal "hash not computed yet". Setting the high bit maintains
273 // reasonable fidelity to a hash code of 0 because it is likely to yield
274 // exactly 0 when hash lookup masks out the high bits.
275 ALWAYS_INLINE static constexpr unsigned avoidZero(unsigned hash)
276 {
277 if (hash)
278 return hash;
279 return 0x80000000 >> flagCount;
280 }
281
282 static constexpr unsigned calculateWithRemainingLastCharacter(unsigned hash, unsigned character)
283 {
284 unsigned result = hash;
285
286 result += character;
287 result ^= result << 11;
288 result += result >> 17;
289
290 return result;
291 }
292
293 ALWAYS_INLINE static constexpr unsigned calculateWithTwoCharacters(unsigned hash, unsigned firstCharacter, unsigned secondCharacter)
294 {
295 unsigned result = hash;
296
297 result += firstCharacter;
298 result = (result << 16) ^ ((secondCharacter << 11) ^ result);
299 result += result >> 11;
300
301 return result;
302 }
303
304 template<typename T, typename Converter>
305 static constexpr unsigned computeHashImpl(const T* characters, unsigned length)
306 {
307 unsigned result = stringHashingStartValue;
308 bool remainder = length & 1;
309 length >>= 1;
310
311 while (length--) {
312 result = calculateWithTwoCharacters(result, Converter::convert(characters[0]), Converter::convert(characters[1]));
313 characters += 2;
314 }
315
316 if (remainder)
317 return calculateWithRemainingLastCharacter(result, Converter::convert(characters[0]));
318 return result;
319 }
320
321 template<typename T, typename Converter>
322 static constexpr unsigned computeHashImpl(const T* characters)
323 {
324 unsigned result = stringHashingStartValue;
325 while (T a = *characters++) {
326 T b = *characters++;
327 if (!b)
328 return calculateWithRemainingLastCharacter(result, Converter::convert(a));
329 result = calculateWithTwoCharacters(result, Converter::convert(a), Converter::convert(b));
330 }
331 return result;
332 }
333
334 unsigned processPendingCharacter() const
335 {
336 unsigned result = m_hash;
337
338 // Handle end case.
339 if (m_hasPendingCharacter)
340 return calculateWithRemainingLastCharacter(result, m_pendingCharacter);
341 return result;
342 }
343
344 unsigned m_hash { stringHashingStartValue };
345 UChar m_pendingCharacter { 0 };
346 bool m_hasPendingCharacter { false };
347};
348
349} // namespace WTF
350
351using WTF::StringHasher;
352