1 | /* |
2 | * Copyright (C) 2015-2019 Apple Inc. All rights reserved. |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions |
6 | * are met: |
7 | * 1. Redistributions of source code must retain the above copyright |
8 | * notice, this list of conditions and the following disclaimer. |
9 | * 2. Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * |
13 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' |
14 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, |
15 | * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
16 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS |
17 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
18 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
19 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
20 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
21 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
22 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF |
23 | * THE POSSIBILITY OF SUCH DAMAGE. |
24 | */ |
25 | |
26 | #pragma once |
27 | |
28 | #include <algorithm> |
29 | #include <unicode/uchar.h> |
30 | #include <wtf/ASCIICType.h> |
31 | #include <wtf/NotFound.h> |
32 | #include <wtf/UnalignedAccess.h> |
33 | |
34 | namespace WTF { |
35 | |
36 | template<typename CharacterType> inline bool isLatin1(CharacterType character) |
37 | { |
38 | using UnsignedCharacterType = typename std::make_unsigned<CharacterType>::type; |
39 | return static_cast<UnsignedCharacterType>(character) <= static_cast<UnsignedCharacterType>(0xFF); |
40 | } |
41 | |
42 | using CodeUnitMatchFunction = bool (*)(UChar); |
43 | |
44 | template<typename CharacterTypeA, typename CharacterTypeB> bool equalIgnoringASCIICase(const CharacterTypeA*, const CharacterTypeB*, unsigned length); |
45 | template<typename CharacterTypeA, typename CharacterTypeB> bool equalIgnoringASCIICase(const CharacterTypeA*, unsigned lengthA, const CharacterTypeB*, unsigned lengthB); |
46 | |
47 | template<typename StringClassA, typename StringClassB> bool equalIgnoringASCIICaseCommon(const StringClassA&, const StringClassB&); |
48 | |
49 | template<typename CharacterType> bool equalLettersIgnoringASCIICase(const CharacterType*, const char* lowercaseLetters, unsigned length); |
50 | template<typename CharacterType, unsigned lowercaseLettersLength> bool equalLettersIgnoringASCIICase(const CharacterType*, unsigned charactersLength, const char (&lowercaseLetters)[lowercaseLettersLength]); |
51 | |
52 | template<typename StringClass, unsigned length> bool equalLettersIgnoringASCIICaseCommon(const StringClass&, const char (&lowercaseLetters)[length]); |
53 | |
54 | bool equalIgnoringASCIICase(const char*, const char*); |
55 | template<unsigned lowercaseLettersLength> bool equalLettersIgnoringASCIICase(const char*, const char (&lowercaseLetters)[lowercaseLettersLength]); |
56 | |
57 | // Do comparisons 8 or 4 bytes-at-a-time on architectures where it's safe. |
58 | #if (CPU(X86_64) || CPU(ARM64)) && !ASAN_ENABLED |
59 | ALWAYS_INLINE bool equal(const LChar* aLChar, const LChar* bLChar, unsigned length) |
60 | { |
61 | unsigned dwordLength = length >> 3; |
62 | |
63 | const char* a = reinterpret_cast<const char*>(aLChar); |
64 | const char* b = reinterpret_cast<const char*>(bLChar); |
65 | |
66 | if (dwordLength) { |
67 | for (unsigned i = 0; i != dwordLength; ++i) { |
68 | if (unalignedLoad<uint64_t>(a) != unalignedLoad<uint64_t>(b)) |
69 | return false; |
70 | |
71 | a += sizeof(uint64_t); |
72 | b += sizeof(uint64_t); |
73 | } |
74 | } |
75 | |
76 | if (length & 4) { |
77 | if (unalignedLoad<uint32_t>(a) != unalignedLoad<uint32_t>(b)) |
78 | return false; |
79 | |
80 | a += sizeof(uint32_t); |
81 | b += sizeof(uint32_t); |
82 | } |
83 | |
84 | if (length & 2) { |
85 | if (unalignedLoad<uint16_t>(a) != unalignedLoad<uint16_t>(b)) |
86 | return false; |
87 | |
88 | a += sizeof(uint16_t); |
89 | b += sizeof(uint16_t); |
90 | } |
91 | |
92 | if (length & 1 && (*reinterpret_cast<const LChar*>(a) != *reinterpret_cast<const LChar*>(b))) |
93 | return false; |
94 | |
95 | return true; |
96 | } |
97 | |
98 | ALWAYS_INLINE bool equal(const UChar* aUChar, const UChar* bUChar, unsigned length) |
99 | { |
100 | unsigned dwordLength = length >> 2; |
101 | |
102 | const char* a = reinterpret_cast<const char*>(aUChar); |
103 | const char* b = reinterpret_cast<const char*>(bUChar); |
104 | |
105 | if (dwordLength) { |
106 | for (unsigned i = 0; i != dwordLength; ++i) { |
107 | if (unalignedLoad<uint64_t>(a) != unalignedLoad<uint64_t>(b)) |
108 | return false; |
109 | |
110 | a += sizeof(uint64_t); |
111 | b += sizeof(uint64_t); |
112 | } |
113 | } |
114 | |
115 | if (length & 2) { |
116 | if (unalignedLoad<uint32_t>(a) != unalignedLoad<uint32_t>(b)) |
117 | return false; |
118 | |
119 | a += sizeof(uint32_t); |
120 | b += sizeof(uint32_t); |
121 | } |
122 | |
123 | if (length & 1 && (*reinterpret_cast<const UChar*>(a) != *reinterpret_cast<const UChar*>(b))) |
124 | return false; |
125 | |
126 | return true; |
127 | } |
128 | #elif CPU(X86) && !ASAN_ENABLED |
129 | ALWAYS_INLINE bool equal(const LChar* aLChar, const LChar* bLChar, unsigned length) |
130 | { |
131 | const char* a = reinterpret_cast<const char*>(aLChar); |
132 | const char* b = reinterpret_cast<const char*>(bLChar); |
133 | |
134 | unsigned wordLength = length >> 2; |
135 | for (unsigned i = 0; i != wordLength; ++i) { |
136 | if (unalignedLoad<uint32_t>(a) != unalignedLoad<uint32_t>(b)) |
137 | return false; |
138 | a += sizeof(uint32_t); |
139 | b += sizeof(uint32_t); |
140 | } |
141 | |
142 | length &= 3; |
143 | |
144 | if (length) { |
145 | const LChar* aRemainder = reinterpret_cast<const LChar*>(a); |
146 | const LChar* bRemainder = reinterpret_cast<const LChar*>(b); |
147 | |
148 | for (unsigned i = 0; i < length; ++i) { |
149 | if (aRemainder[i] != bRemainder[i]) |
150 | return false; |
151 | } |
152 | } |
153 | |
154 | return true; |
155 | } |
156 | |
157 | ALWAYS_INLINE bool equal(const UChar* aUChar, const UChar* bUChar, unsigned length) |
158 | { |
159 | const char* a = reinterpret_cast<const char*>(aUChar); |
160 | const char* b = reinterpret_cast<const char*>(bUChar); |
161 | |
162 | unsigned wordLength = length >> 1; |
163 | for (unsigned i = 0; i != wordLength; ++i) { |
164 | if (unalignedLoad<uint32_t>(a) != unalignedLoad<uint32_t>(b)) |
165 | return false; |
166 | a += sizeof(uint32_t); |
167 | b += sizeof(uint32_t); |
168 | } |
169 | |
170 | if (length & 1 && *reinterpret_cast<const UChar*>(a) != *reinterpret_cast<const UChar*>(b)) |
171 | return false; |
172 | |
173 | return true; |
174 | } |
175 | #elif PLATFORM(IOS_FAMILY) && WTF_ARM_ARCH_AT_LEAST(7) && !ASAN_ENABLED |
176 | ALWAYS_INLINE bool equal(const LChar* a, const LChar* b, unsigned length) |
177 | { |
178 | bool isEqual = false; |
179 | uint32_t aValue; |
180 | uint32_t bValue; |
181 | asm("subs %[length], #4\n" |
182 | "blo 2f\n" |
183 | |
184 | "0:\n" // Label 0 = Start of loop over 32 bits. |
185 | "ldr %[aValue], [%[a]], #4\n" |
186 | "ldr %[bValue], [%[b]], #4\n" |
187 | "cmp %[aValue], %[bValue]\n" |
188 | "bne 66f\n" |
189 | "subs %[length], #4\n" |
190 | "bhs 0b\n" |
191 | |
192 | // At this point, length can be: |
193 | // -0: 00000000000000000000000000000000 (0 bytes left) |
194 | // -1: 11111111111111111111111111111111 (3 bytes left) |
195 | // -2: 11111111111111111111111111111110 (2 bytes left) |
196 | // -3: 11111111111111111111111111111101 (1 byte left) |
197 | // -4: 11111111111111111111111111111100 (length was 0) |
198 | // The pointers are at the correct position. |
199 | "2:\n" // Label 2 = End of loop over 32 bits, check for pair of characters. |
200 | "tst %[length], #2\n" |
201 | "beq 1f\n" |
202 | "ldrh %[aValue], [%[a]], #2\n" |
203 | "ldrh %[bValue], [%[b]], #2\n" |
204 | "cmp %[aValue], %[bValue]\n" |
205 | "bne 66f\n" |
206 | |
207 | "1:\n" // Label 1 = Check for a single character left. |
208 | "tst %[length], #1\n" |
209 | "beq 42f\n" |
210 | "ldrb %[aValue], [%[a]]\n" |
211 | "ldrb %[bValue], [%[b]]\n" |
212 | "cmp %[aValue], %[bValue]\n" |
213 | "bne 66f\n" |
214 | |
215 | "42:\n" // Label 42 = Success. |
216 | "mov %[isEqual], #1\n" |
217 | "66:\n" // Label 66 = End without changing isEqual to 1. |
218 | : [length]"+r" (length), [isEqual]"+r" (isEqual), [a]"+r" (a), [b]"+r" (b), [aValue]"+r" (aValue), [bValue]"+r" (bValue) |
219 | : |
220 | : |
221 | ); |
222 | return isEqual; |
223 | } |
224 | |
225 | ALWAYS_INLINE bool equal(const UChar* a, const UChar* b, unsigned length) |
226 | { |
227 | bool isEqual = false; |
228 | uint32_t aValue; |
229 | uint32_t bValue; |
230 | asm("subs %[length], #2\n" |
231 | "blo 1f\n" |
232 | |
233 | "0:\n" // Label 0 = Start of loop over 32 bits. |
234 | "ldr %[aValue], [%[a]], #4\n" |
235 | "ldr %[bValue], [%[b]], #4\n" |
236 | "cmp %[aValue], %[bValue]\n" |
237 | "bne 66f\n" |
238 | "subs %[length], #2\n" |
239 | "bhs 0b\n" |
240 | |
241 | // At this point, length can be: |
242 | // -0: 00000000000000000000000000000000 (0 bytes left) |
243 | // -1: 11111111111111111111111111111111 (1 character left, 2 bytes) |
244 | // -2: 11111111111111111111111111111110 (length was zero) |
245 | // The pointers are at the correct position. |
246 | "1:\n" // Label 1 = Check for a single character left. |
247 | "tst %[length], #1\n" |
248 | "beq 42f\n" |
249 | "ldrh %[aValue], [%[a]]\n" |
250 | "ldrh %[bValue], [%[b]]\n" |
251 | "cmp %[aValue], %[bValue]\n" |
252 | "bne 66f\n" |
253 | |
254 | "42:\n" // Label 42 = Success. |
255 | "mov %[isEqual], #1\n" |
256 | "66:\n" // Label 66 = End without changing isEqual to 1. |
257 | : [length]"+r" (length), [isEqual]"+r" (isEqual), [a]"+r" (a), [b]"+r" (b), [aValue]"+r" (aValue), [bValue]"+r" (bValue) |
258 | : |
259 | : |
260 | ); |
261 | return isEqual; |
262 | } |
263 | #elif !ASAN_ENABLED |
264 | ALWAYS_INLINE bool equal(const LChar* a, const LChar* b, unsigned length) { return !memcmp(a, b, length); } |
265 | ALWAYS_INLINE bool equal(const UChar* a, const UChar* b, unsigned length) { return !memcmp(a, b, length * sizeof(UChar)); } |
266 | #else |
267 | ALWAYS_INLINE bool equal(const LChar* a, const LChar* b, unsigned length) |
268 | { |
269 | for (unsigned i = 0; i < length; ++i) { |
270 | if (a[i] != b[i]) |
271 | return false; |
272 | } |
273 | return true; |
274 | } |
275 | ALWAYS_INLINE bool equal(const UChar* a, const UChar* b, unsigned length) |
276 | { |
277 | for (unsigned i = 0; i < length; ++i) { |
278 | if (a[i] != b[i]) |
279 | return false; |
280 | } |
281 | return true; |
282 | } |
283 | #endif |
284 | |
285 | ALWAYS_INLINE bool equal(const LChar* a, const UChar* b, unsigned length) |
286 | { |
287 | for (unsigned i = 0; i < length; ++i) { |
288 | if (a[i] != b[i]) |
289 | return false; |
290 | } |
291 | return true; |
292 | } |
293 | |
294 | ALWAYS_INLINE bool equal(const UChar* a, const LChar* b, unsigned length) { return equal(b, a, length); } |
295 | |
296 | template<typename StringClassA, typename StringClassB> |
297 | ALWAYS_INLINE bool equalCommon(const StringClassA& a, const StringClassB& b) |
298 | { |
299 | unsigned length = a.length(); |
300 | if (length != b.length()) |
301 | return false; |
302 | |
303 | if (a.is8Bit()) { |
304 | if (b.is8Bit()) |
305 | return equal(a.characters8(), b.characters8(), length); |
306 | |
307 | return equal(a.characters8(), b.characters16(), length); |
308 | } |
309 | |
310 | if (b.is8Bit()) |
311 | return equal(a.characters16(), b.characters8(), length); |
312 | |
313 | return equal(a.characters16(), b.characters16(), length); |
314 | } |
315 | |
316 | template<typename StringClassA, typename StringClassB> |
317 | ALWAYS_INLINE bool equalCommon(const StringClassA* a, const StringClassB* b) |
318 | { |
319 | if (a == b) |
320 | return true; |
321 | if (!a || !b) |
322 | return false; |
323 | return equal(*a, *b); |
324 | } |
325 | |
326 | template<typename StringClass, unsigned length> bool equal(const StringClass& a, const UChar (&codeUnits)[length]) |
327 | { |
328 | if (a.length() != length) |
329 | return false; |
330 | |
331 | if (a.is8Bit()) |
332 | return equal(a.characters8(), codeUnits, length); |
333 | |
334 | return equal(a.characters16(), codeUnits, length); |
335 | } |
336 | |
337 | template<typename CharacterTypeA, typename CharacterTypeB> |
338 | inline bool equalIgnoringASCIICase(const CharacterTypeA* a, const CharacterTypeB* b, unsigned length) |
339 | { |
340 | for (unsigned i = 0; i < length; ++i) { |
341 | if (toASCIILower(a[i]) != toASCIILower(b[i])) |
342 | return false; |
343 | } |
344 | return true; |
345 | } |
346 | |
347 | template<typename CharacterTypeA, typename CharacterTypeB> inline bool equalIgnoringASCIICase(const CharacterTypeA* a, unsigned lengthA, const CharacterTypeB* b, unsigned lengthB) |
348 | { |
349 | return lengthA == lengthB && equalIgnoringASCIICase(a, b, lengthA); |
350 | } |
351 | |
352 | template<typename StringClassA, typename StringClassB> |
353 | bool equalIgnoringASCIICaseCommon(const StringClassA& a, const StringClassB& b) |
354 | { |
355 | unsigned length = a.length(); |
356 | if (length != b.length()) |
357 | return false; |
358 | |
359 | if (a.is8Bit()) { |
360 | if (b.is8Bit()) |
361 | return equalIgnoringASCIICase(a.characters8(), b.characters8(), length); |
362 | |
363 | return equalIgnoringASCIICase(a.characters8(), b.characters16(), length); |
364 | } |
365 | |
366 | if (b.is8Bit()) |
367 | return equalIgnoringASCIICase(a.characters16(), b.characters8(), length); |
368 | |
369 | return equalIgnoringASCIICase(a.characters16(), b.characters16(), length); |
370 | } |
371 | |
372 | template<typename StringClassA> bool equalIgnoringASCIICaseCommon(const StringClassA& a, const char* b) |
373 | { |
374 | unsigned length = a.length(); |
375 | if (length != strlen(b)) |
376 | return false; |
377 | |
378 | if (a.is8Bit()) |
379 | return equalIgnoringASCIICase(a.characters8(), b, length); |
380 | |
381 | return equalIgnoringASCIICase(a.characters16(), b, length); |
382 | } |
383 | |
384 | template<typename StringClassA, typename StringClassB> |
385 | bool startsWith(const StringClassA& reference, const StringClassB& prefix) |
386 | { |
387 | unsigned prefixLength = prefix.length(); |
388 | if (prefixLength > reference.length()) |
389 | return false; |
390 | |
391 | if (reference.is8Bit()) { |
392 | if (prefix.is8Bit()) |
393 | return equal(reference.characters8(), prefix.characters8(), prefixLength); |
394 | return equal(reference.characters8(), prefix.characters16(), prefixLength); |
395 | } |
396 | if (prefix.is8Bit()) |
397 | return equal(reference.characters16(), prefix.characters8(), prefixLength); |
398 | return equal(reference.characters16(), prefix.characters16(), prefixLength); |
399 | } |
400 | |
401 | template<typename StringClassA, typename StringClassB> |
402 | bool startsWithIgnoringASCIICase(const StringClassA& reference, const StringClassB& prefix) |
403 | { |
404 | unsigned prefixLength = prefix.length(); |
405 | if (prefixLength > reference.length()) |
406 | return false; |
407 | |
408 | if (reference.is8Bit()) { |
409 | if (prefix.is8Bit()) |
410 | return equalIgnoringASCIICase(reference.characters8(), prefix.characters8(), prefixLength); |
411 | return equalIgnoringASCIICase(reference.characters8(), prefix.characters16(), prefixLength); |
412 | } |
413 | if (prefix.is8Bit()) |
414 | return equalIgnoringASCIICase(reference.characters16(), prefix.characters8(), prefixLength); |
415 | return equalIgnoringASCIICase(reference.characters16(), prefix.characters16(), prefixLength); |
416 | } |
417 | |
418 | template<typename StringClassA, typename StringClassB> |
419 | bool endsWith(const StringClassA& reference, const StringClassB& suffix) |
420 | { |
421 | unsigned suffixLength = suffix.length(); |
422 | unsigned referenceLength = reference.length(); |
423 | if (suffixLength > referenceLength) |
424 | return false; |
425 | |
426 | unsigned startOffset = referenceLength - suffixLength; |
427 | |
428 | if (reference.is8Bit()) { |
429 | if (suffix.is8Bit()) |
430 | return equal(reference.characters8() + startOffset, suffix.characters8(), suffixLength); |
431 | return equal(reference.characters8() + startOffset, suffix.characters16(), suffixLength); |
432 | } |
433 | if (suffix.is8Bit()) |
434 | return equal(reference.characters16() + startOffset, suffix.characters8(), suffixLength); |
435 | return equal(reference.characters16() + startOffset, suffix.characters16(), suffixLength); |
436 | } |
437 | |
438 | template<typename StringClassA, typename StringClassB> |
439 | bool endsWithIgnoringASCIICase(const StringClassA& reference, const StringClassB& suffix) |
440 | { |
441 | unsigned suffixLength = suffix.length(); |
442 | unsigned referenceLength = reference.length(); |
443 | if (suffixLength > referenceLength) |
444 | return false; |
445 | |
446 | unsigned startOffset = referenceLength - suffixLength; |
447 | |
448 | if (reference.is8Bit()) { |
449 | if (suffix.is8Bit()) |
450 | return equalIgnoringASCIICase(reference.characters8() + startOffset, suffix.characters8(), suffixLength); |
451 | return equalIgnoringASCIICase(reference.characters8() + startOffset, suffix.characters16(), suffixLength); |
452 | } |
453 | if (suffix.is8Bit()) |
454 | return equalIgnoringASCIICase(reference.characters16() + startOffset, suffix.characters8(), suffixLength); |
455 | return equalIgnoringASCIICase(reference.characters16() + startOffset, suffix.characters16(), suffixLength); |
456 | } |
457 | |
458 | template <typename SearchCharacterType, typename MatchCharacterType> |
459 | size_t findIgnoringASCIICase(const SearchCharacterType* source, const MatchCharacterType* matchCharacters, unsigned startOffset, unsigned searchLength, unsigned matchLength) |
460 | { |
461 | ASSERT(searchLength >= matchLength); |
462 | |
463 | const SearchCharacterType* startSearchedCharacters = source + startOffset; |
464 | |
465 | // delta is the number of additional times to test; delta == 0 means test only once. |
466 | unsigned delta = searchLength - matchLength; |
467 | |
468 | for (unsigned i = 0; i <= delta; ++i) { |
469 | if (equalIgnoringASCIICase(startSearchedCharacters + i, matchCharacters, matchLength)) |
470 | return startOffset + i; |
471 | } |
472 | return notFound; |
473 | } |
474 | |
475 | inline size_t findIgnoringASCIICaseWithoutLength(const char* source, const char* matchCharacters) |
476 | { |
477 | unsigned searchLength = strlen(source); |
478 | unsigned matchLength = strlen(matchCharacters); |
479 | |
480 | return matchLength < searchLength ? findIgnoringASCIICase(source, matchCharacters, 0, searchLength, matchLength) : notFound; |
481 | } |
482 | |
483 | template<typename StringClassA, typename StringClassB> |
484 | size_t findIgnoringASCIICase(const StringClassA& source, const StringClassB& stringToFind, unsigned startOffset) |
485 | { |
486 | unsigned sourceStringLength = source.length(); |
487 | unsigned matchLength = stringToFind.length(); |
488 | if (!matchLength) |
489 | return std::min(startOffset, sourceStringLength); |
490 | |
491 | // Check startOffset & matchLength are in range. |
492 | if (startOffset > sourceStringLength) |
493 | return notFound; |
494 | unsigned searchLength = sourceStringLength - startOffset; |
495 | if (matchLength > searchLength) |
496 | return notFound; |
497 | |
498 | if (source.is8Bit()) { |
499 | if (stringToFind.is8Bit()) |
500 | return findIgnoringASCIICase(source.characters8(), stringToFind.characters8(), startOffset, searchLength, matchLength); |
501 | return findIgnoringASCIICase(source.characters8(), stringToFind.characters16(), startOffset, searchLength, matchLength); |
502 | } |
503 | |
504 | if (stringToFind.is8Bit()) |
505 | return findIgnoringASCIICase(source.characters16(), stringToFind.characters8(), startOffset, searchLength, matchLength); |
506 | |
507 | return findIgnoringASCIICase(source.characters16(), stringToFind.characters16(), startOffset, searchLength, matchLength); |
508 | } |
509 | |
510 | template <typename SearchCharacterType, typename MatchCharacterType> |
511 | ALWAYS_INLINE static size_t findInner(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned searchLength, unsigned matchLength) |
512 | { |
513 | // Optimization: keep a running hash of the strings, |
514 | // only call equal() if the hashes match. |
515 | |
516 | // delta is the number of additional times to test; delta == 0 means test only once. |
517 | unsigned delta = searchLength - matchLength; |
518 | |
519 | unsigned searchHash = 0; |
520 | unsigned matchHash = 0; |
521 | |
522 | for (unsigned i = 0; i < matchLength; ++i) { |
523 | searchHash += searchCharacters[i]; |
524 | matchHash += matchCharacters[i]; |
525 | } |
526 | |
527 | unsigned i = 0; |
528 | // keep looping until we match |
529 | while (searchHash != matchHash || !equal(searchCharacters + i, matchCharacters, matchLength)) { |
530 | if (i == delta) |
531 | return notFound; |
532 | searchHash += searchCharacters[i + matchLength]; |
533 | searchHash -= searchCharacters[i]; |
534 | ++i; |
535 | } |
536 | return index + i; |
537 | } |
538 | |
539 | template<typename CharacterType> |
540 | inline size_t find(const CharacterType* characters, unsigned length, CharacterType matchCharacter, unsigned index = 0) |
541 | { |
542 | while (index < length) { |
543 | if (characters[index] == matchCharacter) |
544 | return index; |
545 | ++index; |
546 | } |
547 | return notFound; |
548 | } |
549 | |
550 | ALWAYS_INLINE size_t find(const UChar* characters, unsigned length, LChar matchCharacter, unsigned index = 0) |
551 | { |
552 | return find(characters, length, static_cast<UChar>(matchCharacter), index); |
553 | } |
554 | |
555 | inline size_t find(const LChar* characters, unsigned length, UChar matchCharacter, unsigned index = 0) |
556 | { |
557 | if (!isLatin1(matchCharacter)) |
558 | return notFound; |
559 | return find(characters, length, static_cast<LChar>(matchCharacter), index); |
560 | } |
561 | |
562 | template<typename StringClass> |
563 | size_t findCommon(const StringClass& haystack, const StringClass& needle, unsigned start) |
564 | { |
565 | unsigned needleLength = needle.length(); |
566 | |
567 | if (needleLength == 1) { |
568 | if (haystack.is8Bit()) |
569 | return WTF::find(haystack.characters8(), haystack.length(), needle[0], start); |
570 | return WTF::find(haystack.characters16(), haystack.length(), needle[0], start); |
571 | } |
572 | |
573 | if (start > haystack.length()) |
574 | return notFound; |
575 | |
576 | if (!needleLength) |
577 | return start; |
578 | |
579 | unsigned searchLength = haystack.length() - start; |
580 | if (needleLength > searchLength) |
581 | return notFound; |
582 | |
583 | if (haystack.is8Bit()) { |
584 | if (needle.is8Bit()) |
585 | return findInner(haystack.characters8() + start, needle.characters8(), start, searchLength, needleLength); |
586 | return findInner(haystack.characters8() + start, needle.characters16(), start, searchLength, needleLength); |
587 | } |
588 | |
589 | if (needle.is8Bit()) |
590 | return findInner(haystack.characters16() + start, needle.characters8(), start, searchLength, needleLength); |
591 | |
592 | return findInner(haystack.characters16() + start, needle.characters16(), start, searchLength, needleLength); |
593 | } |
594 | |
595 | // This is marked inline since it's mostly used in non-inline functions for each string type. |
596 | // When used directly in code it's probably OK to be inline; maybe the loop will be unrolled. |
597 | template<typename CharacterType> inline bool equalLettersIgnoringASCIICase(const CharacterType* characters, const char* lowercaseLetters, unsigned length) |
598 | { |
599 | for (unsigned i = 0; i < length; ++i) { |
600 | if (!isASCIIAlphaCaselessEqual(characters[i], lowercaseLetters[i])) |
601 | return false; |
602 | } |
603 | return true; |
604 | } |
605 | |
606 | template<typename CharacterType, unsigned lowercaseLettersLength> inline bool equalLettersIgnoringASCIICase(const CharacterType* characters, unsigned charactersLength, const char (&lowercaseLetters)[lowercaseLettersLength]) |
607 | { |
608 | ASSERT(strlen(lowercaseLetters) == lowercaseLettersLength - 1); |
609 | unsigned = lowercaseLettersLength - 1; |
610 | return charactersLength == lowercaseLettersStringLength && equalLettersIgnoringASCIICase(characters, lowercaseLetters, lowercaseLettersStringLength); |
611 | } |
612 | |
613 | template<typename StringClass> bool inline hasPrefixWithLettersIgnoringASCIICaseCommon(const StringClass& string, const char* lowercaseLetters, unsigned length) |
614 | { |
615 | #if !ASSERT_DISABLED |
616 | ASSERT(*lowercaseLetters); |
617 | for (const char* letter = lowercaseLetters; *letter; ++letter) |
618 | ASSERT(toASCIILowerUnchecked(*letter) == *letter); |
619 | #endif |
620 | ASSERT(string.length() >= length); |
621 | |
622 | if (string.is8Bit()) |
623 | return equalLettersIgnoringASCIICase(string.characters8(), lowercaseLetters, length); |
624 | return equalLettersIgnoringASCIICase(string.characters16(), lowercaseLetters, length); |
625 | } |
626 | |
627 | // This is intentionally not marked inline because it's used often and is not speed-critical enough to want it inlined everywhere. |
628 | template<typename StringClass> bool equalLettersIgnoringASCIICaseCommonWithoutLength(const StringClass& string, const char* lowercaseLetters) |
629 | { |
630 | unsigned length = string.length(); |
631 | if (length != strlen(lowercaseLetters)) |
632 | return false; |
633 | return hasPrefixWithLettersIgnoringASCIICaseCommon(string, lowercaseLetters, length); |
634 | } |
635 | |
636 | template<typename StringClass> bool startsWithLettersIgnoringASCIICaseCommonWithoutLength(const StringClass& string, const char* lowercaseLetters) |
637 | { |
638 | size_t prefixLength = strlen(lowercaseLetters); |
639 | if (!prefixLength) |
640 | return true; |
641 | if (string.length() < prefixLength) |
642 | return false; |
643 | return hasPrefixWithLettersIgnoringASCIICaseCommon(string, lowercaseLetters, prefixLength); |
644 | } |
645 | |
646 | template<typename StringClass, unsigned length> inline bool equalLettersIgnoringASCIICaseCommon(const StringClass& string, const char (&lowercaseLetters)[length]) |
647 | { |
648 | // Don't actually use the length; we are choosing code size over speed. |
649 | ASSERT(strlen(lowercaseLetters) == length - 1); |
650 | const char* pointer = lowercaseLetters; |
651 | return equalLettersIgnoringASCIICaseCommonWithoutLength(string, pointer); |
652 | } |
653 | |
654 | template<typename StringClass, unsigned length> inline bool startsWithLettersIgnoringASCIICaseCommon(const StringClass& string, const char (&lowercaseLetters)[length]) |
655 | { |
656 | const char* pointer = lowercaseLetters; |
657 | return startsWithLettersIgnoringASCIICaseCommonWithoutLength(string, pointer); |
658 | } |
659 | |
660 | inline bool equalIgnoringASCIICase(const char* a, const char* b) |
661 | { |
662 | auto length = strlen(a); |
663 | return length == strlen(b) && equalIgnoringASCIICase(a, b, length); |
664 | } |
665 | |
666 | template<unsigned lowercaseLettersLength> inline bool equalLettersIgnoringASCIICase(const char* string, const char (&lowercaseLetters)[lowercaseLettersLength]) |
667 | { |
668 | auto length = strlen(lowercaseLetters); |
669 | return strlen(string) == length && equalLettersIgnoringASCIICase(string, lowercaseLetters, length); |
670 | } |
671 | |
672 | } |
673 | |
674 | using WTF::equalIgnoringASCIICase; |
675 | using WTF::equalLettersIgnoringASCIICase; |
676 | using WTF::isLatin1; |
677 | |