1 | /* |
2 | * Copyright (C) 2015-2017 Apple Inc. All rights reserved. |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions |
6 | * are met: |
7 | * 1. Redistributions of source code must retain the above copyright |
8 | * notice, this list of conditions and the following disclaimer. |
9 | * 2. Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * |
13 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' |
14 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, |
15 | * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
16 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS |
17 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
18 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
19 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
20 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
21 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
22 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF |
23 | * THE POSSIBILITY OF SUCH DAMAGE. |
24 | */ |
25 | |
26 | #pragma once |
27 | |
28 | #include <algorithm> |
29 | #include <unicode/uchar.h> |
30 | #include <wtf/ASCIICType.h> |
31 | #include <wtf/NotFound.h> |
32 | #include <wtf/UnalignedAccess.h> |
33 | |
34 | namespace WTF { |
35 | |
36 | using CodeUnitMatchFunction = bool (*)(UChar); |
37 | |
38 | template<typename CharacterTypeA, typename CharacterTypeB> bool equalIgnoringASCIICase(const CharacterTypeA*, const CharacterTypeB*, unsigned length); |
39 | template<typename CharacterTypeA, typename CharacterTypeB> bool equalIgnoringASCIICase(const CharacterTypeA*, unsigned lengthA, const CharacterTypeB*, unsigned lengthB); |
40 | |
41 | template<typename StringClassA, typename StringClassB> bool equalIgnoringASCIICaseCommon(const StringClassA&, const StringClassB&); |
42 | |
43 | template<typename CharacterType> bool equalLettersIgnoringASCIICase(const CharacterType*, const char* lowercaseLetters, unsigned length); |
44 | template<typename CharacterType, unsigned lowercaseLettersLength> bool equalLettersIgnoringASCIICase(const CharacterType*, unsigned charactersLength, const char (&lowercaseLetters)[lowercaseLettersLength]); |
45 | |
46 | template<typename StringClass, unsigned length> bool equalLettersIgnoringASCIICaseCommon(const StringClass&, const char (&lowercaseLetters)[length]); |
47 | |
48 | bool equalIgnoringASCIICase(const char*, const char*); |
49 | template<unsigned lowercaseLettersLength> bool equalLettersIgnoringASCIICase(const char*, const char (&lowercaseLetters)[lowercaseLettersLength]); |
50 | |
51 | // Do comparisons 8 or 4 bytes-at-a-time on architectures where it's safe. |
52 | #if (CPU(X86_64) || CPU(ARM64)) && !ASAN_ENABLED |
53 | ALWAYS_INLINE bool equal(const LChar* aLChar, const LChar* bLChar, unsigned length) |
54 | { |
55 | unsigned dwordLength = length >> 3; |
56 | |
57 | const char* a = reinterpret_cast<const char*>(aLChar); |
58 | const char* b = reinterpret_cast<const char*>(bLChar); |
59 | |
60 | if (dwordLength) { |
61 | for (unsigned i = 0; i != dwordLength; ++i) { |
62 | if (unalignedLoad<uint64_t>(a) != unalignedLoad<uint64_t>(b)) |
63 | return false; |
64 | |
65 | a += sizeof(uint64_t); |
66 | b += sizeof(uint64_t); |
67 | } |
68 | } |
69 | |
70 | if (length & 4) { |
71 | if (unalignedLoad<uint32_t>(a) != unalignedLoad<uint32_t>(b)) |
72 | return false; |
73 | |
74 | a += sizeof(uint32_t); |
75 | b += sizeof(uint32_t); |
76 | } |
77 | |
78 | if (length & 2) { |
79 | if (unalignedLoad<uint16_t>(a) != unalignedLoad<uint16_t>(b)) |
80 | return false; |
81 | |
82 | a += sizeof(uint16_t); |
83 | b += sizeof(uint16_t); |
84 | } |
85 | |
86 | if (length & 1 && (*reinterpret_cast<const LChar*>(a) != *reinterpret_cast<const LChar*>(b))) |
87 | return false; |
88 | |
89 | return true; |
90 | } |
91 | |
92 | ALWAYS_INLINE bool equal(const UChar* aUChar, const UChar* bUChar, unsigned length) |
93 | { |
94 | unsigned dwordLength = length >> 2; |
95 | |
96 | const char* a = reinterpret_cast<const char*>(aUChar); |
97 | const char* b = reinterpret_cast<const char*>(bUChar); |
98 | |
99 | if (dwordLength) { |
100 | for (unsigned i = 0; i != dwordLength; ++i) { |
101 | if (unalignedLoad<uint64_t>(a) != unalignedLoad<uint64_t>(b)) |
102 | return false; |
103 | |
104 | a += sizeof(uint64_t); |
105 | b += sizeof(uint64_t); |
106 | } |
107 | } |
108 | |
109 | if (length & 2) { |
110 | if (unalignedLoad<uint32_t>(a) != unalignedLoad<uint32_t>(b)) |
111 | return false; |
112 | |
113 | a += sizeof(uint32_t); |
114 | b += sizeof(uint32_t); |
115 | } |
116 | |
117 | if (length & 1 && (*reinterpret_cast<const UChar*>(a) != *reinterpret_cast<const UChar*>(b))) |
118 | return false; |
119 | |
120 | return true; |
121 | } |
122 | #elif CPU(X86) && !ASAN_ENABLED |
123 | ALWAYS_INLINE bool equal(const LChar* aLChar, const LChar* bLChar, unsigned length) |
124 | { |
125 | const char* a = reinterpret_cast<const char*>(aLChar); |
126 | const char* b = reinterpret_cast<const char*>(bLChar); |
127 | |
128 | unsigned wordLength = length >> 2; |
129 | for (unsigned i = 0; i != wordLength; ++i) { |
130 | if (unalignedLoad<uint32_t>(a) != unalignedLoad<uint32_t>(b)) |
131 | return false; |
132 | a += sizeof(uint32_t); |
133 | b += sizeof(uint32_t); |
134 | } |
135 | |
136 | length &= 3; |
137 | |
138 | if (length) { |
139 | const LChar* aRemainder = reinterpret_cast<const LChar*>(a); |
140 | const LChar* bRemainder = reinterpret_cast<const LChar*>(b); |
141 | |
142 | for (unsigned i = 0; i < length; ++i) { |
143 | if (aRemainder[i] != bRemainder[i]) |
144 | return false; |
145 | } |
146 | } |
147 | |
148 | return true; |
149 | } |
150 | |
151 | ALWAYS_INLINE bool equal(const UChar* aUChar, const UChar* bUChar, unsigned length) |
152 | { |
153 | const char* a = reinterpret_cast<const char*>(aUChar); |
154 | const char* b = reinterpret_cast<const char*>(bUChar); |
155 | |
156 | unsigned wordLength = length >> 1; |
157 | for (unsigned i = 0; i != wordLength; ++i) { |
158 | if (unalignedLoad<uint32_t>(a) != unalignedLoad<uint32_t>(b)) |
159 | return false; |
160 | a += sizeof(uint32_t); |
161 | b += sizeof(uint32_t); |
162 | } |
163 | |
164 | if (length & 1 && *reinterpret_cast<const UChar*>(a) != *reinterpret_cast<const UChar*>(b)) |
165 | return false; |
166 | |
167 | return true; |
168 | } |
169 | #elif PLATFORM(IOS_FAMILY) && WTF_ARM_ARCH_AT_LEAST(7) && !ASAN_ENABLED |
170 | ALWAYS_INLINE bool equal(const LChar* a, const LChar* b, unsigned length) |
171 | { |
172 | bool isEqual = false; |
173 | uint32_t aValue; |
174 | uint32_t bValue; |
175 | asm("subs %[length], #4\n" |
176 | "blo 2f\n" |
177 | |
178 | "0:\n" // Label 0 = Start of loop over 32 bits. |
179 | "ldr %[aValue], [%[a]], #4\n" |
180 | "ldr %[bValue], [%[b]], #4\n" |
181 | "cmp %[aValue], %[bValue]\n" |
182 | "bne 66f\n" |
183 | "subs %[length], #4\n" |
184 | "bhs 0b\n" |
185 | |
186 | // At this point, length can be: |
187 | // -0: 00000000000000000000000000000000 (0 bytes left) |
188 | // -1: 11111111111111111111111111111111 (3 bytes left) |
189 | // -2: 11111111111111111111111111111110 (2 bytes left) |
190 | // -3: 11111111111111111111111111111101 (1 byte left) |
191 | // -4: 11111111111111111111111111111100 (length was 0) |
192 | // The pointers are at the correct position. |
193 | "2:\n" // Label 2 = End of loop over 32 bits, check for pair of characters. |
194 | "tst %[length], #2\n" |
195 | "beq 1f\n" |
196 | "ldrh %[aValue], [%[a]], #2\n" |
197 | "ldrh %[bValue], [%[b]], #2\n" |
198 | "cmp %[aValue], %[bValue]\n" |
199 | "bne 66f\n" |
200 | |
201 | "1:\n" // Label 1 = Check for a single character left. |
202 | "tst %[length], #1\n" |
203 | "beq 42f\n" |
204 | "ldrb %[aValue], [%[a]]\n" |
205 | "ldrb %[bValue], [%[b]]\n" |
206 | "cmp %[aValue], %[bValue]\n" |
207 | "bne 66f\n" |
208 | |
209 | "42:\n" // Label 42 = Success. |
210 | "mov %[isEqual], #1\n" |
211 | "66:\n" // Label 66 = End without changing isEqual to 1. |
212 | : [length]"+r" (length), [isEqual]"+r" (isEqual), [a]"+r" (a), [b]"+r" (b), [aValue]"+r" (aValue), [bValue]"+r" (bValue) |
213 | : |
214 | : |
215 | ); |
216 | return isEqual; |
217 | } |
218 | |
219 | ALWAYS_INLINE bool equal(const UChar* a, const UChar* b, unsigned length) |
220 | { |
221 | bool isEqual = false; |
222 | uint32_t aValue; |
223 | uint32_t bValue; |
224 | asm("subs %[length], #2\n" |
225 | "blo 1f\n" |
226 | |
227 | "0:\n" // Label 0 = Start of loop over 32 bits. |
228 | "ldr %[aValue], [%[a]], #4\n" |
229 | "ldr %[bValue], [%[b]], #4\n" |
230 | "cmp %[aValue], %[bValue]\n" |
231 | "bne 66f\n" |
232 | "subs %[length], #2\n" |
233 | "bhs 0b\n" |
234 | |
235 | // At this point, length can be: |
236 | // -0: 00000000000000000000000000000000 (0 bytes left) |
237 | // -1: 11111111111111111111111111111111 (1 character left, 2 bytes) |
238 | // -2: 11111111111111111111111111111110 (length was zero) |
239 | // The pointers are at the correct position. |
240 | "1:\n" // Label 1 = Check for a single character left. |
241 | "tst %[length], #1\n" |
242 | "beq 42f\n" |
243 | "ldrh %[aValue], [%[a]]\n" |
244 | "ldrh %[bValue], [%[b]]\n" |
245 | "cmp %[aValue], %[bValue]\n" |
246 | "bne 66f\n" |
247 | |
248 | "42:\n" // Label 42 = Success. |
249 | "mov %[isEqual], #1\n" |
250 | "66:\n" // Label 66 = End without changing isEqual to 1. |
251 | : [length]"+r" (length), [isEqual]"+r" (isEqual), [a]"+r" (a), [b]"+r" (b), [aValue]"+r" (aValue), [bValue]"+r" (bValue) |
252 | : |
253 | : |
254 | ); |
255 | return isEqual; |
256 | } |
257 | #elif !ASAN_ENABLED |
258 | ALWAYS_INLINE bool equal(const LChar* a, const LChar* b, unsigned length) { return !memcmp(a, b, length); } |
259 | ALWAYS_INLINE bool equal(const UChar* a, const UChar* b, unsigned length) { return !memcmp(a, b, length * sizeof(UChar)); } |
260 | #else |
261 | ALWAYS_INLINE bool equal(const LChar* a, const LChar* b, unsigned length) |
262 | { |
263 | for (unsigned i = 0; i < length; ++i) { |
264 | if (a[i] != b[i]) |
265 | return false; |
266 | } |
267 | return true; |
268 | } |
269 | ALWAYS_INLINE bool equal(const UChar* a, const UChar* b, unsigned length) |
270 | { |
271 | for (unsigned i = 0; i < length; ++i) { |
272 | if (a[i] != b[i]) |
273 | return false; |
274 | } |
275 | return true; |
276 | } |
277 | #endif |
278 | |
279 | ALWAYS_INLINE bool equal(const LChar* a, const UChar* b, unsigned length) |
280 | { |
281 | for (unsigned i = 0; i < length; ++i) { |
282 | if (a[i] != b[i]) |
283 | return false; |
284 | } |
285 | return true; |
286 | } |
287 | |
288 | ALWAYS_INLINE bool equal(const UChar* a, const LChar* b, unsigned length) { return equal(b, a, length); } |
289 | |
290 | template<typename StringClassA, typename StringClassB> |
291 | ALWAYS_INLINE bool equalCommon(const StringClassA& a, const StringClassB& b) |
292 | { |
293 | unsigned length = a.length(); |
294 | if (length != b.length()) |
295 | return false; |
296 | |
297 | if (a.is8Bit()) { |
298 | if (b.is8Bit()) |
299 | return equal(a.characters8(), b.characters8(), length); |
300 | |
301 | return equal(a.characters8(), b.characters16(), length); |
302 | } |
303 | |
304 | if (b.is8Bit()) |
305 | return equal(a.characters16(), b.characters8(), length); |
306 | |
307 | return equal(a.characters16(), b.characters16(), length); |
308 | } |
309 | |
310 | template<typename StringClassA, typename StringClassB> |
311 | ALWAYS_INLINE bool equalCommon(const StringClassA* a, const StringClassB* b) |
312 | { |
313 | if (a == b) |
314 | return true; |
315 | if (!a || !b) |
316 | return false; |
317 | return equal(*a, *b); |
318 | } |
319 | |
320 | template<typename StringClass, unsigned length> bool equal(const StringClass& a, const UChar (&codeUnits)[length]) |
321 | { |
322 | if (a.length() != length) |
323 | return false; |
324 | |
325 | if (a.is8Bit()) |
326 | return equal(a.characters8(), codeUnits, length); |
327 | |
328 | return equal(a.characters16(), codeUnits, length); |
329 | } |
330 | |
331 | template<typename CharacterTypeA, typename CharacterTypeB> |
332 | inline bool equalIgnoringASCIICase(const CharacterTypeA* a, const CharacterTypeB* b, unsigned length) |
333 | { |
334 | for (unsigned i = 0; i < length; ++i) { |
335 | if (toASCIILower(a[i]) != toASCIILower(b[i])) |
336 | return false; |
337 | } |
338 | return true; |
339 | } |
340 | |
341 | template<typename CharacterTypeA, typename CharacterTypeB> inline bool equalIgnoringASCIICase(const CharacterTypeA* a, unsigned lengthA, const CharacterTypeB* b, unsigned lengthB) |
342 | { |
343 | return lengthA == lengthB && equalIgnoringASCIICase(a, b, lengthA); |
344 | } |
345 | |
346 | template<typename StringClassA, typename StringClassB> |
347 | bool equalIgnoringASCIICaseCommon(const StringClassA& a, const StringClassB& b) |
348 | { |
349 | unsigned length = a.length(); |
350 | if (length != b.length()) |
351 | return false; |
352 | |
353 | if (a.is8Bit()) { |
354 | if (b.is8Bit()) |
355 | return equalIgnoringASCIICase(a.characters8(), b.characters8(), length); |
356 | |
357 | return equalIgnoringASCIICase(a.characters8(), b.characters16(), length); |
358 | } |
359 | |
360 | if (b.is8Bit()) |
361 | return equalIgnoringASCIICase(a.characters16(), b.characters8(), length); |
362 | |
363 | return equalIgnoringASCIICase(a.characters16(), b.characters16(), length); |
364 | } |
365 | |
366 | template<typename StringClassA> bool equalIgnoringASCIICaseCommon(const StringClassA& a, const char* b) |
367 | { |
368 | unsigned length = a.length(); |
369 | if (length != strlen(b)) |
370 | return false; |
371 | |
372 | if (a.is8Bit()) |
373 | return equalIgnoringASCIICase(a.characters8(), b, length); |
374 | |
375 | return equalIgnoringASCIICase(a.characters16(), b, length); |
376 | } |
377 | |
378 | template<typename StringClassA, typename StringClassB> |
379 | bool startsWith(const StringClassA& reference, const StringClassB& prefix) |
380 | { |
381 | unsigned prefixLength = prefix.length(); |
382 | if (prefixLength > reference.length()) |
383 | return false; |
384 | |
385 | if (reference.is8Bit()) { |
386 | if (prefix.is8Bit()) |
387 | return equal(reference.characters8(), prefix.characters8(), prefixLength); |
388 | return equal(reference.characters8(), prefix.characters16(), prefixLength); |
389 | } |
390 | if (prefix.is8Bit()) |
391 | return equal(reference.characters16(), prefix.characters8(), prefixLength); |
392 | return equal(reference.characters16(), prefix.characters16(), prefixLength); |
393 | } |
394 | |
395 | template<typename StringClassA, typename StringClassB> |
396 | bool startsWithIgnoringASCIICase(const StringClassA& reference, const StringClassB& prefix) |
397 | { |
398 | unsigned prefixLength = prefix.length(); |
399 | if (prefixLength > reference.length()) |
400 | return false; |
401 | |
402 | if (reference.is8Bit()) { |
403 | if (prefix.is8Bit()) |
404 | return equalIgnoringASCIICase(reference.characters8(), prefix.characters8(), prefixLength); |
405 | return equalIgnoringASCIICase(reference.characters8(), prefix.characters16(), prefixLength); |
406 | } |
407 | if (prefix.is8Bit()) |
408 | return equalIgnoringASCIICase(reference.characters16(), prefix.characters8(), prefixLength); |
409 | return equalIgnoringASCIICase(reference.characters16(), prefix.characters16(), prefixLength); |
410 | } |
411 | |
412 | template<typename StringClassA, typename StringClassB> |
413 | bool endsWith(const StringClassA& reference, const StringClassB& suffix) |
414 | { |
415 | unsigned suffixLength = suffix.length(); |
416 | unsigned referenceLength = reference.length(); |
417 | if (suffixLength > referenceLength) |
418 | return false; |
419 | |
420 | unsigned startOffset = referenceLength - suffixLength; |
421 | |
422 | if (reference.is8Bit()) { |
423 | if (suffix.is8Bit()) |
424 | return equal(reference.characters8() + startOffset, suffix.characters8(), suffixLength); |
425 | return equal(reference.characters8() + startOffset, suffix.characters16(), suffixLength); |
426 | } |
427 | if (suffix.is8Bit()) |
428 | return equal(reference.characters16() + startOffset, suffix.characters8(), suffixLength); |
429 | return equal(reference.characters16() + startOffset, suffix.characters16(), suffixLength); |
430 | } |
431 | |
432 | template<typename StringClassA, typename StringClassB> |
433 | bool endsWithIgnoringASCIICase(const StringClassA& reference, const StringClassB& suffix) |
434 | { |
435 | unsigned suffixLength = suffix.length(); |
436 | unsigned referenceLength = reference.length(); |
437 | if (suffixLength > referenceLength) |
438 | return false; |
439 | |
440 | unsigned startOffset = referenceLength - suffixLength; |
441 | |
442 | if (reference.is8Bit()) { |
443 | if (suffix.is8Bit()) |
444 | return equalIgnoringASCIICase(reference.characters8() + startOffset, suffix.characters8(), suffixLength); |
445 | return equalIgnoringASCIICase(reference.characters8() + startOffset, suffix.characters16(), suffixLength); |
446 | } |
447 | if (suffix.is8Bit()) |
448 | return equalIgnoringASCIICase(reference.characters16() + startOffset, suffix.characters8(), suffixLength); |
449 | return equalIgnoringASCIICase(reference.characters16() + startOffset, suffix.characters16(), suffixLength); |
450 | } |
451 | |
452 | template <typename SearchCharacterType, typename MatchCharacterType> |
453 | size_t findIgnoringASCIICase(const SearchCharacterType* source, const MatchCharacterType* matchCharacters, unsigned startOffset, unsigned searchLength, unsigned matchLength) |
454 | { |
455 | ASSERT(searchLength >= matchLength); |
456 | |
457 | const SearchCharacterType* startSearchedCharacters = source + startOffset; |
458 | |
459 | // delta is the number of additional times to test; delta == 0 means test only once. |
460 | unsigned delta = searchLength - matchLength; |
461 | |
462 | for (unsigned i = 0; i <= delta; ++i) { |
463 | if (equalIgnoringASCIICase(startSearchedCharacters + i, matchCharacters, matchLength)) |
464 | return startOffset + i; |
465 | } |
466 | return notFound; |
467 | } |
468 | |
469 | inline size_t findIgnoringASCIICaseWithoutLength(const char* source, const char* matchCharacters) |
470 | { |
471 | unsigned searchLength = strlen(source); |
472 | unsigned matchLength = strlen(matchCharacters); |
473 | |
474 | return matchLength < searchLength ? findIgnoringASCIICase(source, matchCharacters, 0, searchLength, matchLength) : notFound; |
475 | } |
476 | |
477 | template<typename StringClassA, typename StringClassB> |
478 | size_t findIgnoringASCIICase(const StringClassA& source, const StringClassB& stringToFind, unsigned startOffset) |
479 | { |
480 | unsigned sourceStringLength = source.length(); |
481 | unsigned matchLength = stringToFind.length(); |
482 | if (!matchLength) |
483 | return std::min(startOffset, sourceStringLength); |
484 | |
485 | // Check startOffset & matchLength are in range. |
486 | if (startOffset > sourceStringLength) |
487 | return notFound; |
488 | unsigned searchLength = sourceStringLength - startOffset; |
489 | if (matchLength > searchLength) |
490 | return notFound; |
491 | |
492 | if (source.is8Bit()) { |
493 | if (stringToFind.is8Bit()) |
494 | return findIgnoringASCIICase(source.characters8(), stringToFind.characters8(), startOffset, searchLength, matchLength); |
495 | return findIgnoringASCIICase(source.characters8(), stringToFind.characters16(), startOffset, searchLength, matchLength); |
496 | } |
497 | |
498 | if (stringToFind.is8Bit()) |
499 | return findIgnoringASCIICase(source.characters16(), stringToFind.characters8(), startOffset, searchLength, matchLength); |
500 | |
501 | return findIgnoringASCIICase(source.characters16(), stringToFind.characters16(), startOffset, searchLength, matchLength); |
502 | } |
503 | |
504 | template <typename SearchCharacterType, typename MatchCharacterType> |
505 | ALWAYS_INLINE static size_t findInner(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned searchLength, unsigned matchLength) |
506 | { |
507 | // Optimization: keep a running hash of the strings, |
508 | // only call equal() if the hashes match. |
509 | |
510 | // delta is the number of additional times to test; delta == 0 means test only once. |
511 | unsigned delta = searchLength - matchLength; |
512 | |
513 | unsigned searchHash = 0; |
514 | unsigned matchHash = 0; |
515 | |
516 | for (unsigned i = 0; i < matchLength; ++i) { |
517 | searchHash += searchCharacters[i]; |
518 | matchHash += matchCharacters[i]; |
519 | } |
520 | |
521 | unsigned i = 0; |
522 | // keep looping until we match |
523 | while (searchHash != matchHash || !equal(searchCharacters + i, matchCharacters, matchLength)) { |
524 | if (i == delta) |
525 | return notFound; |
526 | searchHash += searchCharacters[i + matchLength]; |
527 | searchHash -= searchCharacters[i]; |
528 | ++i; |
529 | } |
530 | return index + i; |
531 | } |
532 | |
533 | template<typename CharacterType> |
534 | inline size_t find(const CharacterType* characters, unsigned length, CharacterType matchCharacter, unsigned index = 0) |
535 | { |
536 | while (index < length) { |
537 | if (characters[index] == matchCharacter) |
538 | return index; |
539 | ++index; |
540 | } |
541 | return notFound; |
542 | } |
543 | |
544 | ALWAYS_INLINE size_t find(const UChar* characters, unsigned length, LChar matchCharacter, unsigned index = 0) |
545 | { |
546 | return find(characters, length, static_cast<UChar>(matchCharacter), index); |
547 | } |
548 | |
549 | inline size_t find(const LChar* characters, unsigned length, UChar matchCharacter, unsigned index = 0) |
550 | { |
551 | if (matchCharacter & ~0xFF) |
552 | return notFound; |
553 | return find(characters, length, static_cast<LChar>(matchCharacter), index); |
554 | } |
555 | |
556 | template<typename StringClass> |
557 | size_t findCommon(const StringClass& haystack, const StringClass& needle, unsigned start) |
558 | { |
559 | unsigned needleLength = needle.length(); |
560 | |
561 | if (needleLength == 1) { |
562 | if (haystack.is8Bit()) |
563 | return WTF::find(haystack.characters8(), haystack.length(), needle[0], start); |
564 | return WTF::find(haystack.characters16(), haystack.length(), needle[0], start); |
565 | } |
566 | |
567 | if (!needleLength) |
568 | return std::min(start, haystack.length()); |
569 | |
570 | if (start > haystack.length()) |
571 | return notFound; |
572 | unsigned searchLength = haystack.length() - start; |
573 | if (needleLength > searchLength) |
574 | return notFound; |
575 | |
576 | if (haystack.is8Bit()) { |
577 | if (needle.is8Bit()) |
578 | return findInner(haystack.characters8() + start, needle.characters8(), start, searchLength, needleLength); |
579 | return findInner(haystack.characters8() + start, needle.characters16(), start, searchLength, needleLength); |
580 | } |
581 | |
582 | if (needle.is8Bit()) |
583 | return findInner(haystack.characters16() + start, needle.characters8(), start, searchLength, needleLength); |
584 | |
585 | return findInner(haystack.characters16() + start, needle.characters16(), start, searchLength, needleLength); |
586 | } |
587 | |
588 | // This is marked inline since it's mostly used in non-inline functions for each string type. |
589 | // When used directly in code it's probably OK to be inline; maybe the loop will be unrolled. |
590 | template<typename CharacterType> inline bool equalLettersIgnoringASCIICase(const CharacterType* characters, const char* lowercaseLetters, unsigned length) |
591 | { |
592 | for (unsigned i = 0; i < length; ++i) { |
593 | if (!isASCIIAlphaCaselessEqual(characters[i], lowercaseLetters[i])) |
594 | return false; |
595 | } |
596 | return true; |
597 | } |
598 | |
599 | template<typename CharacterType, unsigned lowercaseLettersLength> inline bool equalLettersIgnoringASCIICase(const CharacterType* characters, unsigned charactersLength, const char (&lowercaseLetters)[lowercaseLettersLength]) |
600 | { |
601 | ASSERT(strlen(lowercaseLetters) == lowercaseLettersLength - 1); |
602 | unsigned = lowercaseLettersLength - 1; |
603 | return charactersLength == lowercaseLettersStringLength && equalLettersIgnoringASCIICase(characters, lowercaseLetters, lowercaseLettersStringLength); |
604 | } |
605 | |
606 | template<typename StringClass> bool inline hasPrefixWithLettersIgnoringASCIICaseCommon(const StringClass& string, const char* lowercaseLetters, unsigned length) |
607 | { |
608 | #if !ASSERT_DISABLED |
609 | ASSERT(*lowercaseLetters); |
610 | for (const char* letter = lowercaseLetters; *letter; ++letter) |
611 | ASSERT(toASCIILowerUnchecked(*letter) == *letter); |
612 | #endif |
613 | ASSERT(string.length() >= length); |
614 | |
615 | if (string.is8Bit()) |
616 | return equalLettersIgnoringASCIICase(string.characters8(), lowercaseLetters, length); |
617 | return equalLettersIgnoringASCIICase(string.characters16(), lowercaseLetters, length); |
618 | } |
619 | |
620 | // This is intentionally not marked inline because it's used often and is not speed-critical enough to want it inlined everywhere. |
621 | template<typename StringClass> bool equalLettersIgnoringASCIICaseCommonWithoutLength(const StringClass& string, const char* lowercaseLetters) |
622 | { |
623 | unsigned length = string.length(); |
624 | if (length != strlen(lowercaseLetters)) |
625 | return false; |
626 | return hasPrefixWithLettersIgnoringASCIICaseCommon(string, lowercaseLetters, length); |
627 | } |
628 | |
629 | template<typename StringClass> bool startsWithLettersIgnoringASCIICaseCommonWithoutLength(const StringClass& string, const char* lowercaseLetters) |
630 | { |
631 | size_t prefixLength = strlen(lowercaseLetters); |
632 | if (!prefixLength) |
633 | return true; |
634 | if (string.length() < prefixLength) |
635 | return false; |
636 | return hasPrefixWithLettersIgnoringASCIICaseCommon(string, lowercaseLetters, prefixLength); |
637 | } |
638 | |
639 | template<typename StringClass, unsigned length> inline bool equalLettersIgnoringASCIICaseCommon(const StringClass& string, const char (&lowercaseLetters)[length]) |
640 | { |
641 | // Don't actually use the length; we are choosing code size over speed. |
642 | ASSERT(strlen(lowercaseLetters) == length - 1); |
643 | const char* pointer = lowercaseLetters; |
644 | return equalLettersIgnoringASCIICaseCommonWithoutLength(string, pointer); |
645 | } |
646 | |
647 | template<typename StringClass, unsigned length> inline bool startsWithLettersIgnoringASCIICaseCommon(const StringClass& string, const char (&lowercaseLetters)[length]) |
648 | { |
649 | const char* pointer = lowercaseLetters; |
650 | return startsWithLettersIgnoringASCIICaseCommonWithoutLength(string, pointer); |
651 | } |
652 | |
653 | inline bool equalIgnoringASCIICase(const char* a, const char* b) |
654 | { |
655 | auto length = strlen(a); |
656 | return length == strlen(b) && equalIgnoringASCIICase(a, b, length); |
657 | } |
658 | |
659 | template<unsigned lowercaseLettersLength> inline bool equalLettersIgnoringASCIICase(const char* string, const char (&lowercaseLetters)[lowercaseLettersLength]) |
660 | { |
661 | auto length = strlen(lowercaseLetters); |
662 | return strlen(string) == length && equalLettersIgnoringASCIICase(string, lowercaseLetters, length); |
663 | } |
664 | |
665 | } |
666 | |
667 | using WTF::equalIgnoringASCIICase; |
668 | using WTF::equalLettersIgnoringASCIICase; |
669 | |