1 | /* |
2 | * (C) 1999 Lars Knoll ([email protected]) |
3 | * Copyright (C) 2004-2019 Apple Inc. All rights reserved. |
4 | * Copyright (C) 2007-2009 Torch Mobile, Inc. |
5 | * |
6 | * This library is free software; you can redistribute it and/or |
7 | * modify it under the terms of the GNU Library General Public |
8 | * License as published by the Free Software Foundation; either |
9 | * version 2 of the License, or (at your option) any later version. |
10 | * |
11 | * This library is distributed in the hope that it will be useful, |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | * Library General Public License for more details. |
15 | * |
16 | * You should have received a copy of the GNU Library General Public License |
17 | * along with this library; see the file COPYING.LIB. If not, write to |
18 | * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
19 | * Boston, MA 02110-1301, USA. |
20 | */ |
21 | |
22 | #include "config.h" |
23 | #include <wtf/text/WTFString.h> |
24 | |
25 | #include <stdarg.h> |
26 | #include <wtf/ASCIICType.h> |
27 | #include <wtf/DataLog.h> |
28 | #include <wtf/HexNumber.h> |
29 | #include <wtf/MathExtras.h> |
30 | #include <wtf/NeverDestroyed.h> |
31 | #include <wtf/Vector.h> |
32 | #include <wtf/dtoa.h> |
33 | #include <wtf/text/CString.h> |
34 | #include <wtf/text/IntegerToStringConversion.h> |
35 | #include <wtf/text/StringToIntegerConversion.h> |
36 | #include <wtf/unicode/CharacterNames.h> |
37 | #include <wtf/unicode/UTF8Conversion.h> |
38 | |
39 | namespace WTF { |
40 | |
41 | using namespace Unicode; |
42 | |
43 | // Construct a string with UTF-16 data. |
44 | String::String(const UChar* characters, unsigned length) |
45 | { |
46 | if (characters) |
47 | m_impl = StringImpl::create(characters, length); |
48 | } |
49 | |
50 | // Construct a string with UTF-16 data, from a null-terminated source. |
51 | String::String(const UChar* nullTerminatedString) |
52 | { |
53 | if (nullTerminatedString) |
54 | m_impl = StringImpl::create(nullTerminatedString, lengthOfNullTerminatedString(nullTerminatedString)); |
55 | } |
56 | |
57 | // Construct a string with latin1 data. |
58 | String::String(const LChar* characters, unsigned length) |
59 | { |
60 | if (characters) |
61 | m_impl = StringImpl::create(characters, length); |
62 | } |
63 | |
64 | String::String(const char* characters, unsigned length) |
65 | { |
66 | if (characters) |
67 | m_impl = StringImpl::create(reinterpret_cast<const LChar*>(characters), length); |
68 | } |
69 | |
70 | // Construct a string with Latin-1 data, from a null-terminated source. |
71 | String::String(const LChar* nullTerminatedString) |
72 | { |
73 | if (nullTerminatedString) |
74 | m_impl = StringImpl::create(nullTerminatedString); |
75 | } |
76 | |
77 | String::String(const char* nullTerminatedString) |
78 | { |
79 | if (nullTerminatedString) |
80 | m_impl = StringImpl::create(reinterpret_cast<const LChar*>(nullTerminatedString)); |
81 | } |
82 | |
83 | String::String(ASCIILiteral characters) |
84 | : m_impl(StringImpl::createFromLiteral(characters)) |
85 | { |
86 | } |
87 | |
88 | void String::append(const String& otherString) |
89 | { |
90 | // FIXME: This is extremely inefficient. So much so that we might want to take this out of String's API. |
91 | |
92 | if (!m_impl) { |
93 | m_impl = otherString.m_impl; |
94 | return; |
95 | } |
96 | |
97 | if (otherString.isEmpty()) |
98 | return; |
99 | |
100 | auto length = m_impl->length(); |
101 | auto otherLength = otherString.m_impl->length(); |
102 | if (otherLength > MaxLength - length) |
103 | CRASH(); |
104 | |
105 | if (m_impl->is8Bit() && otherString.m_impl->is8Bit()) { |
106 | LChar* data; |
107 | auto newImpl = StringImpl::createUninitialized(length + otherLength, data); |
108 | StringImpl::copyCharacters(data, m_impl->characters8(), length); |
109 | StringImpl::copyCharacters(data + length, otherString.m_impl->characters8(), otherLength); |
110 | m_impl = WTFMove(newImpl); |
111 | return; |
112 | } |
113 | UChar* data; |
114 | auto newImpl = StringImpl::createUninitialized(length + otherLength, data); |
115 | StringView(*m_impl).getCharactersWithUpconvert(data); |
116 | StringView(*otherString.m_impl).getCharactersWithUpconvert(data + length); |
117 | m_impl = WTFMove(newImpl); |
118 | } |
119 | |
120 | void String::append(LChar character) |
121 | { |
122 | // FIXME: This is extremely inefficient. So much so that we might want to take this out of String's API. |
123 | |
124 | if (!m_impl) { |
125 | m_impl = StringImpl::create(&character, 1); |
126 | return; |
127 | } |
128 | if (!is8Bit()) { |
129 | append(static_cast<UChar>(character)); |
130 | return; |
131 | } |
132 | if (m_impl->length() >= MaxLength) |
133 | CRASH(); |
134 | LChar* data; |
135 | auto newImpl = StringImpl::createUninitialized(m_impl->length() + 1, data); |
136 | StringImpl::copyCharacters(data, m_impl->characters8(), m_impl->length()); |
137 | data[m_impl->length()] = character; |
138 | m_impl = WTFMove(newImpl); |
139 | } |
140 | |
141 | void String::append(UChar character) |
142 | { |
143 | // FIXME: This is extremely inefficient. So much so that we might want to take this out of String's API. |
144 | |
145 | if (!m_impl) { |
146 | m_impl = StringImpl::create(&character, 1); |
147 | return; |
148 | } |
149 | if (isLatin1(character) && is8Bit()) { |
150 | append(static_cast<LChar>(character)); |
151 | return; |
152 | } |
153 | if (m_impl->length() >= MaxLength) |
154 | CRASH(); |
155 | UChar* data; |
156 | auto newImpl = StringImpl::createUninitialized(m_impl->length() + 1, data); |
157 | StringView(*m_impl).getCharactersWithUpconvert(data); |
158 | data[m_impl->length()] = character; |
159 | m_impl = WTFMove(newImpl); |
160 | } |
161 | |
162 | int codePointCompare(const String& a, const String& b) |
163 | { |
164 | return codePointCompare(a.impl(), b.impl()); |
165 | } |
166 | |
167 | void String::insert(const String& string, unsigned position) |
168 | { |
169 | // FIXME: This is extremely inefficient. So much so that we might want to take this out of String's API. |
170 | |
171 | unsigned lengthToInsert = string.length(); |
172 | |
173 | if (!lengthToInsert) { |
174 | if (string.isNull()) |
175 | return; |
176 | if (isNull()) |
177 | m_impl = string.impl(); |
178 | return; |
179 | } |
180 | |
181 | if (position >= length()) { |
182 | append(string); |
183 | return; |
184 | } |
185 | |
186 | if (lengthToInsert > MaxLength - length()) |
187 | CRASH(); |
188 | |
189 | if (is8Bit() && string.is8Bit()) { |
190 | LChar* data; |
191 | auto newString = StringImpl::createUninitialized(length() + lengthToInsert, data); |
192 | StringView(*m_impl).substring(0, position).getCharactersWithUpconvert(data); |
193 | StringView(string).getCharactersWithUpconvert(data + position); |
194 | StringView(*m_impl).substring(position).getCharactersWithUpconvert(data + position + lengthToInsert); |
195 | m_impl = WTFMove(newString); |
196 | } else { |
197 | UChar* data; |
198 | auto newString = StringImpl::createUninitialized(length() + lengthToInsert, data); |
199 | StringView(*m_impl).substring(0, position).getCharactersWithUpconvert(data); |
200 | StringView(string).getCharactersWithUpconvert(data + position); |
201 | StringView(*m_impl).substring(position).getCharactersWithUpconvert(data + position + lengthToInsert); |
202 | m_impl = WTFMove(newString); |
203 | } |
204 | } |
205 | |
206 | void String::append(const LChar* charactersToAppend, unsigned lengthToAppend) |
207 | { |
208 | // FIXME: This is extremely inefficient. So much so that we might want to take this out of String's API. |
209 | |
210 | if (!m_impl) { |
211 | if (!charactersToAppend) |
212 | return; |
213 | m_impl = StringImpl::create(charactersToAppend, lengthToAppend); |
214 | return; |
215 | } |
216 | |
217 | if (!lengthToAppend) |
218 | return; |
219 | |
220 | ASSERT(charactersToAppend); |
221 | |
222 | unsigned strLength = m_impl->length(); |
223 | |
224 | if (m_impl->is8Bit()) { |
225 | if (lengthToAppend > MaxLength - strLength) |
226 | CRASH(); |
227 | LChar* data; |
228 | auto newImpl = StringImpl::createUninitialized(strLength + lengthToAppend, data); |
229 | StringImpl::copyCharacters(data, m_impl->characters8(), strLength); |
230 | StringImpl::copyCharacters(data + strLength, charactersToAppend, lengthToAppend); |
231 | m_impl = WTFMove(newImpl); |
232 | return; |
233 | } |
234 | |
235 | if (lengthToAppend > MaxLength - strLength) |
236 | CRASH(); |
237 | UChar* data; |
238 | auto newImpl = StringImpl::createUninitialized(length() + lengthToAppend, data); |
239 | StringImpl::copyCharacters(data, m_impl->characters16(), strLength); |
240 | StringImpl::copyCharacters(data + strLength, charactersToAppend, lengthToAppend); |
241 | m_impl = WTFMove(newImpl); |
242 | } |
243 | |
244 | void String::append(const UChar* charactersToAppend, unsigned lengthToAppend) |
245 | { |
246 | // FIXME: This is extremely inefficient. So much so that we might want to take this out of String's API. |
247 | |
248 | if (!m_impl) { |
249 | if (!charactersToAppend) |
250 | return; |
251 | m_impl = StringImpl::create(charactersToAppend, lengthToAppend); |
252 | return; |
253 | } |
254 | |
255 | if (!lengthToAppend) |
256 | return; |
257 | |
258 | unsigned strLength = m_impl->length(); |
259 | |
260 | ASSERT(charactersToAppend); |
261 | if (lengthToAppend > MaxLength - strLength) |
262 | CRASH(); |
263 | UChar* data; |
264 | auto newImpl = StringImpl::createUninitialized(strLength + lengthToAppend, data); |
265 | if (m_impl->is8Bit()) |
266 | StringImpl::copyCharacters(data, characters8(), strLength); |
267 | else |
268 | StringImpl::copyCharacters(data, characters16(), strLength); |
269 | StringImpl::copyCharacters(data + strLength, charactersToAppend, lengthToAppend); |
270 | m_impl = WTFMove(newImpl); |
271 | } |
272 | |
273 | |
274 | UChar32 String::characterStartingAt(unsigned i) const |
275 | { |
276 | if (!m_impl || i >= m_impl->length()) |
277 | return 0; |
278 | return m_impl->characterStartingAt(i); |
279 | } |
280 | |
281 | void String::truncate(unsigned position) |
282 | { |
283 | if (m_impl) |
284 | m_impl = m_impl->substring(0, position); |
285 | } |
286 | |
287 | template<typename CharacterType> inline void String::removeInternal(const CharacterType* characters, unsigned position, unsigned lengthToRemove) |
288 | { |
289 | CharacterType* data; |
290 | auto newImpl = StringImpl::createUninitialized(length() - lengthToRemove, data); |
291 | StringImpl::copyCharacters(data, characters, position); |
292 | StringImpl::copyCharacters(data + position, characters + position + lengthToRemove, length() - lengthToRemove - position); |
293 | m_impl = WTFMove(newImpl); |
294 | } |
295 | |
296 | void String::remove(unsigned position, unsigned lengthToRemove) |
297 | { |
298 | if (!lengthToRemove) |
299 | return; |
300 | auto length = this->length(); |
301 | if (position >= length) |
302 | return; |
303 | lengthToRemove = std::min(lengthToRemove, length - position); |
304 | if (is8Bit()) |
305 | removeInternal(characters8(), position, lengthToRemove); |
306 | else |
307 | removeInternal(characters16(), position, lengthToRemove); |
308 | } |
309 | |
310 | String String::substring(unsigned position, unsigned length) const |
311 | { |
312 | // FIXME: Should this function, and the many others like it, be inlined? |
313 | return m_impl ? m_impl->substring(position, length) : String { }; |
314 | } |
315 | |
316 | String String::substringSharingImpl(unsigned offset, unsigned length) const |
317 | { |
318 | // FIXME: We used to check against a limit of Heap::minExtraCost / sizeof(UChar). |
319 | |
320 | unsigned stringLength = this->length(); |
321 | offset = std::min(offset, stringLength); |
322 | length = std::min(length, stringLength - offset); |
323 | |
324 | if (!offset && length == stringLength) |
325 | return *this; |
326 | return StringImpl::createSubstringSharingImpl(*m_impl, offset, length); |
327 | } |
328 | |
329 | String String::convertToASCIILowercase() const |
330 | { |
331 | // FIXME: Should this function, and the many others like it, be inlined? |
332 | return m_impl ? m_impl->convertToASCIILowercase() : String { }; |
333 | } |
334 | |
335 | String String::convertToASCIIUppercase() const |
336 | { |
337 | // FIXME: Should this function, and the many others like it, be inlined? |
338 | return m_impl ? m_impl->convertToASCIIUppercase() : String { }; |
339 | } |
340 | |
341 | String String::convertToLowercaseWithoutLocale() const |
342 | { |
343 | // FIXME: Should this function, and the many others like it, be inlined? |
344 | return m_impl ? m_impl->convertToLowercaseWithoutLocale() : String { }; |
345 | } |
346 | |
347 | String String::convertToLowercaseWithoutLocaleStartingAtFailingIndex8Bit(unsigned failingIndex) const |
348 | { |
349 | // FIXME: Should this function, and the many others like it, be inlined? |
350 | return m_impl ? m_impl->convertToLowercaseWithoutLocaleStartingAtFailingIndex8Bit(failingIndex) : String { }; |
351 | } |
352 | |
353 | String String::convertToUppercaseWithoutLocale() const |
354 | { |
355 | // FIXME: Should this function, and the many others like it, be inlined? |
356 | return m_impl ? m_impl->convertToUppercaseWithoutLocale() : String { }; |
357 | } |
358 | |
359 | String String::convertToLowercaseWithLocale(const AtomString& localeIdentifier) const |
360 | { |
361 | // FIXME: Should this function, and the many others like it, be inlined? |
362 | return m_impl ? m_impl->convertToLowercaseWithLocale(localeIdentifier) : String { }; |
363 | } |
364 | |
365 | String String::convertToUppercaseWithLocale(const AtomString& localeIdentifier) const |
366 | { |
367 | // FIXME: Should this function, and the many others like it, be inlined? |
368 | return m_impl ? m_impl->convertToUppercaseWithLocale(localeIdentifier) : String { }; |
369 | } |
370 | |
371 | String String::stripWhiteSpace() const |
372 | { |
373 | // FIXME: Should this function, and the many others like it, be inlined? |
374 | // FIXME: This function needs a new name. For one thing, "whitespace" is a single |
375 | // word so the "s" should be lowercase. For another, it's not clear from this name |
376 | // that the function uses the Unicode definition of whitespace. Most WebKit callers |
377 | // don't want that and eventually we should consider deleting this. |
378 | return m_impl ? m_impl->stripWhiteSpace() : String { }; |
379 | } |
380 | |
381 | String String::stripLeadingAndTrailingCharacters(CodeUnitMatchFunction predicate) const |
382 | { |
383 | // FIXME: Should this function, and the many others like it, be inlined? |
384 | return m_impl ? m_impl->stripLeadingAndTrailingCharacters(predicate) : String { }; |
385 | } |
386 | |
387 | String String::simplifyWhiteSpace() const |
388 | { |
389 | // FIXME: Should this function, and the many others like it, be inlined? |
390 | // FIXME: This function needs a new name. For one thing, "whitespace" is a single |
391 | // word so the "s" should be lowercase. For another, it's not clear from this name |
392 | // that the function uses the Unicode definition of whitespace. Most WebKit callers |
393 | // don't want that and eventually we should consider deleting this. |
394 | return m_impl ? m_impl->simplifyWhiteSpace() : String { }; |
395 | } |
396 | |
397 | String String::simplifyWhiteSpace(CodeUnitMatchFunction isWhiteSpace) const |
398 | { |
399 | // FIXME: Should this function, and the many others like it, be inlined? |
400 | return m_impl ? m_impl->simplifyWhiteSpace(isWhiteSpace) : String { }; |
401 | } |
402 | |
403 | String String::removeCharacters(CodeUnitMatchFunction findMatch) const |
404 | { |
405 | // FIXME: Should this function, and the many others like it, be inlined? |
406 | return m_impl ? m_impl->removeCharacters(findMatch) : String { }; |
407 | } |
408 | |
409 | String String::foldCase() const |
410 | { |
411 | // FIXME: Should this function, and the many others like it, be inlined? |
412 | return m_impl ? m_impl->foldCase() : String { }; |
413 | } |
414 | |
415 | bool String::percentage(int& result) const |
416 | { |
417 | if (!m_impl || !m_impl->length()) |
418 | return false; |
419 | |
420 | if ((*m_impl)[m_impl->length() - 1] != '%') |
421 | return false; |
422 | |
423 | if (m_impl->is8Bit()) |
424 | result = charactersToIntStrict(m_impl->characters8(), m_impl->length() - 1); |
425 | else |
426 | result = charactersToIntStrict(m_impl->characters16(), m_impl->length() - 1); |
427 | return true; |
428 | } |
429 | |
430 | Vector<UChar> String::charactersWithNullTermination() const |
431 | { |
432 | Vector<UChar> result; |
433 | |
434 | if (m_impl) { |
435 | result.reserveInitialCapacity(length() + 1); |
436 | |
437 | if (is8Bit()) { |
438 | const LChar* characters8 = m_impl->characters8(); |
439 | for (size_t i = 0; i < length(); ++i) |
440 | result.uncheckedAppend(characters8[i]); |
441 | } else { |
442 | const UChar* characters16 = m_impl->characters16(); |
443 | result.append(characters16, m_impl->length()); |
444 | } |
445 | |
446 | result.append(0); |
447 | } |
448 | |
449 | return result; |
450 | } |
451 | |
452 | String String::number(int number) |
453 | { |
454 | return numberToStringSigned<String>(number); |
455 | } |
456 | |
457 | String String::number(unsigned number) |
458 | { |
459 | return numberToStringUnsigned<String>(number); |
460 | } |
461 | |
462 | String String::number(long number) |
463 | { |
464 | return numberToStringSigned<String>(number); |
465 | } |
466 | |
467 | String String::number(unsigned long number) |
468 | { |
469 | return numberToStringUnsigned<String>(number); |
470 | } |
471 | |
472 | String String::number(long long number) |
473 | { |
474 | return numberToStringSigned<String>(number); |
475 | } |
476 | |
477 | String String::number(unsigned long long number) |
478 | { |
479 | return numberToStringUnsigned<String>(number); |
480 | } |
481 | |
482 | String String::numberToStringFixedPrecision(float number, unsigned precision, TrailingZerosTruncatingPolicy trailingZerosTruncatingPolicy) |
483 | { |
484 | NumberToStringBuffer buffer; |
485 | return numberToFixedPrecisionString(number, precision, buffer, trailingZerosTruncatingPolicy == TruncateTrailingZeros); |
486 | } |
487 | |
488 | String String::numberToStringFixedPrecision(double number, unsigned precision, TrailingZerosTruncatingPolicy trailingZerosTruncatingPolicy) |
489 | { |
490 | NumberToStringBuffer buffer; |
491 | return numberToFixedPrecisionString(number, precision, buffer, trailingZerosTruncatingPolicy == TruncateTrailingZeros); |
492 | } |
493 | |
494 | String String::number(float number) |
495 | { |
496 | NumberToStringBuffer buffer; |
497 | return numberToString(number, buffer); |
498 | } |
499 | |
500 | String String::number(double number) |
501 | { |
502 | NumberToStringBuffer buffer; |
503 | return numberToString(number, buffer); |
504 | } |
505 | |
506 | String String::numberToStringFixedWidth(double number, unsigned decimalPlaces) |
507 | { |
508 | NumberToStringBuffer buffer; |
509 | return numberToFixedWidthString(number, decimalPlaces, buffer); |
510 | } |
511 | |
512 | int String::toIntStrict(bool* ok, int base) const |
513 | { |
514 | if (!m_impl) { |
515 | if (ok) |
516 | *ok = false; |
517 | return 0; |
518 | } |
519 | return m_impl->toIntStrict(ok, base); |
520 | } |
521 | |
522 | unsigned String::toUIntStrict(bool* ok, int base) const |
523 | { |
524 | if (!m_impl) { |
525 | if (ok) |
526 | *ok = false; |
527 | return 0; |
528 | } |
529 | return m_impl->toUIntStrict(ok, base); |
530 | } |
531 | |
532 | int64_t String::toInt64Strict(bool* ok, int base) const |
533 | { |
534 | if (!m_impl) { |
535 | if (ok) |
536 | *ok = false; |
537 | return 0; |
538 | } |
539 | return m_impl->toInt64Strict(ok, base); |
540 | } |
541 | |
542 | uint64_t String::toUInt64Strict(bool* ok, int base) const |
543 | { |
544 | if (!m_impl) { |
545 | if (ok) |
546 | *ok = false; |
547 | return 0; |
548 | } |
549 | return m_impl->toUInt64Strict(ok, base); |
550 | } |
551 | |
552 | intptr_t String::toIntPtrStrict(bool* ok, int base) const |
553 | { |
554 | if (!m_impl) { |
555 | if (ok) |
556 | *ok = false; |
557 | return 0; |
558 | } |
559 | return m_impl->toIntPtrStrict(ok, base); |
560 | } |
561 | |
562 | int String::toInt(bool* ok) const |
563 | { |
564 | if (!m_impl) { |
565 | if (ok) |
566 | *ok = false; |
567 | return 0; |
568 | } |
569 | return m_impl->toInt(ok); |
570 | } |
571 | |
572 | unsigned String::toUInt(bool* ok) const |
573 | { |
574 | if (!m_impl) { |
575 | if (ok) |
576 | *ok = false; |
577 | return 0; |
578 | } |
579 | return m_impl->toUInt(ok); |
580 | } |
581 | |
582 | int64_t String::toInt64(bool* ok) const |
583 | { |
584 | if (!m_impl) { |
585 | if (ok) |
586 | *ok = false; |
587 | return 0; |
588 | } |
589 | return m_impl->toInt64(ok); |
590 | } |
591 | |
592 | uint64_t String::toUInt64(bool* ok) const |
593 | { |
594 | if (!m_impl) { |
595 | if (ok) |
596 | *ok = false; |
597 | return 0; |
598 | } |
599 | return m_impl->toUInt64(ok); |
600 | } |
601 | |
602 | intptr_t String::toIntPtr(bool* ok) const |
603 | { |
604 | if (!m_impl) { |
605 | if (ok) |
606 | *ok = false; |
607 | return 0; |
608 | } |
609 | return m_impl->toIntPtr(ok); |
610 | } |
611 | |
612 | double String::toDouble(bool* ok) const |
613 | { |
614 | if (!m_impl) { |
615 | if (ok) |
616 | *ok = false; |
617 | return 0.0; |
618 | } |
619 | return m_impl->toDouble(ok); |
620 | } |
621 | |
622 | float String::toFloat(bool* ok) const |
623 | { |
624 | if (!m_impl) { |
625 | if (ok) |
626 | *ok = false; |
627 | return 0.0f; |
628 | } |
629 | return m_impl->toFloat(ok); |
630 | } |
631 | |
632 | String String::isolatedCopy() const & |
633 | { |
634 | // FIXME: Should this function, and the many others like it, be inlined? |
635 | return m_impl ? m_impl->isolatedCopy() : String { }; |
636 | } |
637 | |
638 | String String::isolatedCopy() && |
639 | { |
640 | if (isSafeToSendToAnotherThread()) { |
641 | // Since we know that our string is a temporary that will be destroyed |
642 | // we can just steal the m_impl from it, thus avoiding a copy. |
643 | return { WTFMove(*this) }; |
644 | } |
645 | |
646 | return m_impl ? m_impl->isolatedCopy() : String { }; |
647 | } |
648 | |
649 | bool String::isSafeToSendToAnotherThread() const |
650 | { |
651 | // AtomStrings are not safe to send between threads, as ~StringImpl() |
652 | // will try to remove them from the wrong AtomStringTable. |
653 | return isEmpty() || (m_impl->hasOneRef() && !m_impl->isAtom()); |
654 | } |
655 | |
656 | template<bool allowEmptyEntries> |
657 | inline Vector<String> String::splitInternal(const String& separator) const |
658 | { |
659 | Vector<String> result; |
660 | |
661 | unsigned startPos = 0; |
662 | size_t endPos; |
663 | while ((endPos = find(separator, startPos)) != notFound) { |
664 | if (allowEmptyEntries || startPos != endPos) |
665 | result.append(substring(startPos, endPos - startPos)); |
666 | startPos = endPos + separator.length(); |
667 | } |
668 | if (allowEmptyEntries || startPos != length()) |
669 | result.append(substring(startPos)); |
670 | |
671 | return result; |
672 | } |
673 | |
674 | template<bool allowEmptyEntries> |
675 | inline void String::splitInternal(UChar separator, const SplitFunctor& functor) const |
676 | { |
677 | StringView view(*this); |
678 | |
679 | unsigned startPos = 0; |
680 | size_t endPos; |
681 | while ((endPos = find(separator, startPos)) != notFound) { |
682 | if (allowEmptyEntries || startPos != endPos) |
683 | functor(view.substring(startPos, endPos - startPos)); |
684 | startPos = endPos + 1; |
685 | } |
686 | if (allowEmptyEntries || startPos != length()) |
687 | functor(view.substring(startPos)); |
688 | } |
689 | |
690 | template<bool allowEmptyEntries> |
691 | inline Vector<String> String::splitInternal(UChar separator) const |
692 | { |
693 | Vector<String> result; |
694 | splitInternal<allowEmptyEntries>(separator, [&result](StringView item) { |
695 | result.append(item.toString()); |
696 | }); |
697 | |
698 | return result; |
699 | } |
700 | |
701 | void String::split(UChar separator, const SplitFunctor& functor) const |
702 | { |
703 | splitInternal<false>(separator, functor); |
704 | } |
705 | |
706 | Vector<String> String::split(UChar separator) const |
707 | { |
708 | return splitInternal<false>(separator); |
709 | } |
710 | |
711 | Vector<String> String::split(const String& separator) const |
712 | { |
713 | return splitInternal<false>(separator); |
714 | } |
715 | |
716 | void String::splitAllowingEmptyEntries(UChar separator, const SplitFunctor& functor) const |
717 | { |
718 | splitInternal<true>(separator, functor); |
719 | } |
720 | |
721 | Vector<String> String::splitAllowingEmptyEntries(UChar separator) const |
722 | { |
723 | return splitInternal<true>(separator); |
724 | } |
725 | |
726 | Vector<String> String::splitAllowingEmptyEntries(const String& separator) const |
727 | { |
728 | return splitInternal<true>(separator); |
729 | } |
730 | |
731 | CString String::ascii() const |
732 | { |
733 | // Printable ASCII characters 32..127 and the null character are |
734 | // preserved, characters outside of this range are converted to '?'. |
735 | |
736 | unsigned length = this->length(); |
737 | if (!length) { |
738 | char* characterBuffer; |
739 | return CString::newUninitialized(length, characterBuffer); |
740 | } |
741 | |
742 | if (this->is8Bit()) { |
743 | const LChar* characters = this->characters8(); |
744 | |
745 | char* characterBuffer; |
746 | CString result = CString::newUninitialized(length, characterBuffer); |
747 | |
748 | for (unsigned i = 0; i < length; ++i) { |
749 | LChar ch = characters[i]; |
750 | characterBuffer[i] = ch && (ch < 0x20 || ch > 0x7f) ? '?' : ch; |
751 | } |
752 | |
753 | return result; |
754 | } |
755 | |
756 | const UChar* characters = this->characters16(); |
757 | |
758 | char* characterBuffer; |
759 | CString result = CString::newUninitialized(length, characterBuffer); |
760 | |
761 | for (unsigned i = 0; i < length; ++i) { |
762 | UChar ch = characters[i]; |
763 | characterBuffer[i] = ch && (ch < 0x20 || ch > 0x7f) ? '?' : ch; |
764 | } |
765 | |
766 | return result; |
767 | } |
768 | |
769 | CString String::latin1() const |
770 | { |
771 | // Basic Latin1 (ISO) encoding - Unicode characters 0..255 are |
772 | // preserved, characters outside of this range are converted to '?'. |
773 | |
774 | unsigned length = this->length(); |
775 | |
776 | if (!length) |
777 | return CString("" , 0); |
778 | |
779 | if (is8Bit()) |
780 | return CString(reinterpret_cast<const char*>(this->characters8()), length); |
781 | |
782 | const UChar* characters = this->characters16(); |
783 | |
784 | char* characterBuffer; |
785 | CString result = CString::newUninitialized(length, characterBuffer); |
786 | |
787 | for (unsigned i = 0; i < length; ++i) { |
788 | UChar ch = characters[i]; |
789 | characterBuffer[i] = !isLatin1(ch) ? '?' : ch; |
790 | } |
791 | |
792 | return result; |
793 | } |
794 | |
795 | Expected<CString, UTF8ConversionError> String::tryGetUtf8(ConversionMode mode) const |
796 | { |
797 | return m_impl ? m_impl->tryGetUtf8(mode) : CString { "" , 0 }; |
798 | } |
799 | |
800 | Expected<CString, UTF8ConversionError> String::tryGetUtf8() const |
801 | { |
802 | return tryGetUtf8(LenientConversion); |
803 | } |
804 | |
805 | CString String::utf8(ConversionMode mode) const |
806 | { |
807 | Expected<CString, UTF8ConversionError> expectedString = tryGetUtf8(mode); |
808 | RELEASE_ASSERT(expectedString); |
809 | return expectedString.value(); |
810 | } |
811 | |
812 | CString String::utf8() const |
813 | { |
814 | return utf8(LenientConversion); |
815 | } |
816 | |
817 | String String::make8BitFrom16BitSource(const UChar* source, size_t length) |
818 | { |
819 | if (!length) |
820 | return String(); |
821 | |
822 | LChar* destination; |
823 | String result = String::createUninitialized(length, destination); |
824 | |
825 | copyLCharsFromUCharSource(destination, source, length); |
826 | |
827 | return result; |
828 | } |
829 | |
830 | String String::make16BitFrom8BitSource(const LChar* source, size_t length) |
831 | { |
832 | if (!length) |
833 | return String(); |
834 | |
835 | UChar* destination; |
836 | String result = String::createUninitialized(length, destination); |
837 | |
838 | StringImpl::copyCharacters(destination, source, length); |
839 | |
840 | return result; |
841 | } |
842 | |
843 | String String::fromUTF8(const LChar* stringStart, size_t length) |
844 | { |
845 | if (length > MaxLength) |
846 | CRASH(); |
847 | |
848 | if (!stringStart) |
849 | return String(); |
850 | |
851 | if (!length) |
852 | return emptyString(); |
853 | |
854 | if (charactersAreAllASCII(stringStart, length)) |
855 | return StringImpl::create(stringStart, length); |
856 | |
857 | Vector<UChar, 1024> buffer(length); |
858 | UChar* bufferStart = buffer.data(); |
859 | |
860 | UChar* bufferCurrent = bufferStart; |
861 | const char* stringCurrent = reinterpret_cast<const char*>(stringStart); |
862 | if (!convertUTF8ToUTF16(stringCurrent, reinterpret_cast<const char *>(stringStart + length), &bufferCurrent, bufferCurrent + buffer.size())) |
863 | return String(); |
864 | |
865 | unsigned utf16Length = bufferCurrent - bufferStart; |
866 | ASSERT_WITH_SECURITY_IMPLICATION(utf16Length < length); |
867 | return StringImpl::create(bufferStart, utf16Length); |
868 | } |
869 | |
870 | String String::fromUTF8(const LChar* string) |
871 | { |
872 | if (!string) |
873 | return String(); |
874 | return fromUTF8(string, strlen(reinterpret_cast<const char*>(string))); |
875 | } |
876 | |
877 | String String::fromUTF8(const CString& s) |
878 | { |
879 | return fromUTF8(s.data()); |
880 | } |
881 | |
882 | String String::fromUTF8WithLatin1Fallback(const LChar* string, size_t size) |
883 | { |
884 | String utf8 = fromUTF8(string, size); |
885 | if (!utf8) |
886 | return String(string, size); |
887 | return utf8; |
888 | } |
889 | |
890 | // String Operations |
891 | template<typename CharacterType> |
892 | static unsigned lengthOfCharactersAsInteger(const CharacterType* data, size_t length) |
893 | { |
894 | size_t i = 0; |
895 | |
896 | // Allow leading spaces. |
897 | for (; i != length; ++i) { |
898 | if (!isSpaceOrNewline(data[i])) |
899 | break; |
900 | } |
901 | |
902 | // Allow sign. |
903 | if (i != length && (data[i] == '+' || data[i] == '-')) |
904 | ++i; |
905 | |
906 | // Allow digits. |
907 | for (; i != length; ++i) { |
908 | if (!isASCIIDigit(data[i])) |
909 | break; |
910 | } |
911 | |
912 | return i; |
913 | } |
914 | |
915 | int charactersToIntStrict(const LChar* data, size_t length, bool* ok, int base) |
916 | { |
917 | return toIntegralType<int, LChar>(data, length, ok, base); |
918 | } |
919 | |
920 | int charactersToIntStrict(const UChar* data, size_t length, bool* ok, int base) |
921 | { |
922 | return toIntegralType<int, UChar>(data, length, ok, base); |
923 | } |
924 | |
925 | unsigned charactersToUIntStrict(const LChar* data, size_t length, bool* ok, int base) |
926 | { |
927 | return toIntegralType<unsigned, LChar>(data, length, ok, base); |
928 | } |
929 | |
930 | unsigned charactersToUIntStrict(const UChar* data, size_t length, bool* ok, int base) |
931 | { |
932 | return toIntegralType<unsigned, UChar>(data, length, ok, base); |
933 | } |
934 | |
935 | int64_t charactersToInt64Strict(const LChar* data, size_t length, bool* ok, int base) |
936 | { |
937 | return toIntegralType<int64_t, LChar>(data, length, ok, base); |
938 | } |
939 | |
940 | int64_t charactersToInt64Strict(const UChar* data, size_t length, bool* ok, int base) |
941 | { |
942 | return toIntegralType<int64_t, UChar>(data, length, ok, base); |
943 | } |
944 | |
945 | uint64_t charactersToUInt64Strict(const LChar* data, size_t length, bool* ok, int base) |
946 | { |
947 | return toIntegralType<uint64_t, LChar>(data, length, ok, base); |
948 | } |
949 | |
950 | uint64_t charactersToUInt64Strict(const UChar* data, size_t length, bool* ok, int base) |
951 | { |
952 | return toIntegralType<uint64_t, UChar>(data, length, ok, base); |
953 | } |
954 | |
955 | intptr_t charactersToIntPtrStrict(const LChar* data, size_t length, bool* ok, int base) |
956 | { |
957 | return toIntegralType<intptr_t, LChar>(data, length, ok, base); |
958 | } |
959 | |
960 | intptr_t charactersToIntPtrStrict(const UChar* data, size_t length, bool* ok, int base) |
961 | { |
962 | return toIntegralType<intptr_t, UChar>(data, length, ok, base); |
963 | } |
964 | |
965 | int charactersToInt(const LChar* data, size_t length, bool* ok) |
966 | { |
967 | return toIntegralType<int, LChar>(data, lengthOfCharactersAsInteger<LChar>(data, length), ok, 10); |
968 | } |
969 | |
970 | int charactersToInt(const UChar* data, size_t length, bool* ok) |
971 | { |
972 | return toIntegralType<int, UChar>(data, lengthOfCharactersAsInteger(data, length), ok, 10); |
973 | } |
974 | |
975 | unsigned charactersToUInt(const LChar* data, size_t length, bool* ok) |
976 | { |
977 | return toIntegralType<unsigned, LChar>(data, lengthOfCharactersAsInteger<LChar>(data, length), ok, 10); |
978 | } |
979 | |
980 | unsigned charactersToUInt(const UChar* data, size_t length, bool* ok) |
981 | { |
982 | return toIntegralType<unsigned, UChar>(data, lengthOfCharactersAsInteger<UChar>(data, length), ok, 10); |
983 | } |
984 | |
985 | int64_t charactersToInt64(const LChar* data, size_t length, bool* ok) |
986 | { |
987 | return toIntegralType<int64_t, LChar>(data, lengthOfCharactersAsInteger<LChar>(data, length), ok, 10); |
988 | } |
989 | |
990 | int64_t charactersToInt64(const UChar* data, size_t length, bool* ok) |
991 | { |
992 | return toIntegralType<int64_t, UChar>(data, lengthOfCharactersAsInteger<UChar>(data, length), ok, 10); |
993 | } |
994 | |
995 | uint64_t charactersToUInt64(const LChar* data, size_t length, bool* ok) |
996 | { |
997 | return toIntegralType<uint64_t, LChar>(data, lengthOfCharactersAsInteger<LChar>(data, length), ok, 10); |
998 | } |
999 | |
1000 | uint64_t charactersToUInt64(const UChar* data, size_t length, bool* ok) |
1001 | { |
1002 | return toIntegralType<uint64_t, UChar>(data, lengthOfCharactersAsInteger<UChar>(data, length), ok, 10); |
1003 | } |
1004 | |
1005 | intptr_t charactersToIntPtr(const LChar* data, size_t length, bool* ok) |
1006 | { |
1007 | return toIntegralType<intptr_t, LChar>(data, lengthOfCharactersAsInteger<LChar>(data, length), ok, 10); |
1008 | } |
1009 | |
1010 | intptr_t charactersToIntPtr(const UChar* data, size_t length, bool* ok) |
1011 | { |
1012 | return toIntegralType<intptr_t, UChar>(data, lengthOfCharactersAsInteger<UChar>(data, length), ok, 10); |
1013 | } |
1014 | |
1015 | enum TrailingJunkPolicy { DisallowTrailingJunk, AllowTrailingJunk }; |
1016 | |
1017 | template<typename CharacterType, TrailingJunkPolicy policy> |
1018 | static inline double toDoubleType(const CharacterType* data, size_t length, bool* ok, size_t& parsedLength) |
1019 | { |
1020 | size_t leadingSpacesLength = 0; |
1021 | while (leadingSpacesLength < length && isASCIISpace(data[leadingSpacesLength])) |
1022 | ++leadingSpacesLength; |
1023 | |
1024 | double number = parseDouble(data + leadingSpacesLength, length - leadingSpacesLength, parsedLength); |
1025 | if (!parsedLength) { |
1026 | if (ok) |
1027 | *ok = false; |
1028 | return 0.0; |
1029 | } |
1030 | |
1031 | parsedLength += leadingSpacesLength; |
1032 | if (ok) |
1033 | *ok = policy == AllowTrailingJunk || parsedLength == length; |
1034 | return number; |
1035 | } |
1036 | |
1037 | double charactersToDouble(const LChar* data, size_t length, bool* ok) |
1038 | { |
1039 | size_t parsedLength; |
1040 | return toDoubleType<LChar, DisallowTrailingJunk>(data, length, ok, parsedLength); |
1041 | } |
1042 | |
1043 | double charactersToDouble(const UChar* data, size_t length, bool* ok) |
1044 | { |
1045 | size_t parsedLength; |
1046 | return toDoubleType<UChar, DisallowTrailingJunk>(data, length, ok, parsedLength); |
1047 | } |
1048 | |
1049 | float charactersToFloat(const LChar* data, size_t length, bool* ok) |
1050 | { |
1051 | // FIXME: This will return ok even when the string fits into a double but not a float. |
1052 | size_t parsedLength; |
1053 | return static_cast<float>(toDoubleType<LChar, DisallowTrailingJunk>(data, length, ok, parsedLength)); |
1054 | } |
1055 | |
1056 | float charactersToFloat(const UChar* data, size_t length, bool* ok) |
1057 | { |
1058 | // FIXME: This will return ok even when the string fits into a double but not a float. |
1059 | size_t parsedLength; |
1060 | return static_cast<float>(toDoubleType<UChar, DisallowTrailingJunk>(data, length, ok, parsedLength)); |
1061 | } |
1062 | |
1063 | float charactersToFloat(const LChar* data, size_t length, size_t& parsedLength) |
1064 | { |
1065 | // FIXME: This will return ok even when the string fits into a double but not a float. |
1066 | return static_cast<float>(toDoubleType<LChar, AllowTrailingJunk>(data, length, 0, parsedLength)); |
1067 | } |
1068 | |
1069 | float charactersToFloat(const UChar* data, size_t length, size_t& parsedLength) |
1070 | { |
1071 | // FIXME: This will return ok even when the string fits into a double but not a float. |
1072 | return static_cast<float>(toDoubleType<UChar, AllowTrailingJunk>(data, length, 0, parsedLength)); |
1073 | } |
1074 | |
1075 | const String& emptyString() |
1076 | { |
1077 | static NeverDestroyed<String> emptyString(StringImpl::empty()); |
1078 | return emptyString; |
1079 | } |
1080 | |
1081 | const String& nullString() |
1082 | { |
1083 | static NeverDestroyed<String> nullString; |
1084 | return nullString; |
1085 | } |
1086 | |
1087 | } // namespace WTF |
1088 | |
1089 | #ifndef NDEBUG |
1090 | |
1091 | // For use in the debugger. |
1092 | String* string(const char*); |
1093 | Vector<char> asciiDebug(StringImpl* impl); |
1094 | Vector<char> asciiDebug(String& string); |
1095 | |
1096 | void String::show() const |
1097 | { |
1098 | dataLogF("%s\n" , asciiDebug(impl()).data()); |
1099 | } |
1100 | |
1101 | String* string(const char* s) |
1102 | { |
1103 | // Intentionally leaks memory! |
1104 | return new String(s); |
1105 | } |
1106 | |
1107 | Vector<char> asciiDebug(StringImpl* impl) |
1108 | { |
1109 | if (!impl) |
1110 | return asciiDebug(String("[null]"_s ).impl()); |
1111 | |
1112 | Vector<char> buffer; |
1113 | for (unsigned i = 0; i < impl->length(); ++i) { |
1114 | UChar ch = (*impl)[i]; |
1115 | if (isASCIIPrintable(ch)) { |
1116 | if (ch == '\\') |
1117 | buffer.append(ch); |
1118 | buffer.append(ch); |
1119 | } else { |
1120 | buffer.append('\\'); |
1121 | buffer.append('u'); |
1122 | appendUnsignedAsHexFixedSize(ch, buffer, 4); |
1123 | } |
1124 | } |
1125 | buffer.append('\0'); |
1126 | return buffer; |
1127 | } |
1128 | |
1129 | Vector<char> asciiDebug(String& string) |
1130 | { |
1131 | return asciiDebug(string.impl()); |
1132 | } |
1133 | |
1134 | #endif |
1135 | |