1 | /* |
2 | * Copyright (C) 2010-2019 Apple Inc. All rights reserved. |
3 | * Copyright (C) 2012 Google Inc. All rights reserved. |
4 | * |
5 | * Redistribution and use in source and binary forms, with or without |
6 | * modification, are permitted provided that the following conditions |
7 | * are met: |
8 | * 1. Redistributions of source code must retain the above copyright |
9 | * notice, this list of conditions and the following disclaimer. |
10 | * 2. Redistributions in binary form must reproduce the above copyright |
11 | * notice, this list of conditions and the following disclaimer in the |
12 | * documentation and/or other materials provided with the distribution. |
13 | * |
14 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
15 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
16 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
17 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
18 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
19 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
20 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
21 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
22 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
23 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
24 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
25 | */ |
26 | |
27 | #include "config.h" |
28 | #include <wtf/text/StringBuilder.h> |
29 | |
30 | #include <wtf/dtoa.h> |
31 | #include <wtf/MathExtras.h> |
32 | |
33 | namespace WTF { |
34 | |
35 | static constexpr unsigned maxCapacity = String::MaxLength; |
36 | |
37 | static unsigned expandedCapacity(unsigned capacity, unsigned requiredLength) |
38 | { |
39 | static constexpr unsigned minimumCapacity = 16; |
40 | return std::max(requiredLength, std::max(minimumCapacity, std::min(capacity * 2, maxCapacity))); |
41 | } |
42 | |
43 | void StringBuilder::reifyString() const |
44 | { |
45 | ASSERT(!hasOverflowed()); |
46 | |
47 | // Check if the string already exists. |
48 | if (!m_string.isNull()) { |
49 | ASSERT(m_string.length() == m_length.unsafeGet<unsigned>()); |
50 | return; |
51 | } |
52 | |
53 | #if !ASSERT_DISABLED |
54 | m_isReified = true; |
55 | #endif |
56 | |
57 | // Check for empty. |
58 | if (!m_length) { |
59 | m_string = StringImpl::empty(); |
60 | return; |
61 | } |
62 | |
63 | // Must be valid in the buffer, take a substring (unless string fills the buffer). |
64 | ASSERT(m_buffer && m_length.unsafeGet<unsigned>() <= m_buffer->length()); |
65 | if (m_length.unsafeGet<unsigned>() == m_buffer->length()) |
66 | m_string = m_buffer.get(); |
67 | else |
68 | m_string = StringImpl::createSubstringSharingImpl(*m_buffer, 0, m_length.unsafeGet()); |
69 | } |
70 | |
71 | void StringBuilder::resize(unsigned newSize) |
72 | { |
73 | if (hasOverflowed()) |
74 | return; |
75 | |
76 | // Check newSize < m_length, hence m_length > 0. |
77 | unsigned oldLength = m_length.unsafeGet(); |
78 | ASSERT(newSize <= oldLength); |
79 | if (newSize == oldLength) |
80 | return; |
81 | ASSERT(oldLength); |
82 | |
83 | m_length = newSize; |
84 | ASSERT(!hasOverflowed()); |
85 | |
86 | // If there is a buffer, we only need to duplicate it if it has more than one ref. |
87 | if (m_buffer) { |
88 | m_string = String(); // Clear the string to remove the reference to m_buffer if any before checking the reference count of m_buffer. |
89 | if (!m_buffer->hasOneRef()) { |
90 | if (m_buffer->is8Bit()) |
91 | allocateBuffer(m_buffer->characters8(), m_buffer->length()); |
92 | else |
93 | allocateBuffer(m_buffer->characters16(), m_buffer->length()); |
94 | } |
95 | ASSERT(hasOverflowed() || m_buffer->length() >= m_length.unsafeGet<unsigned>()); |
96 | return; |
97 | } |
98 | |
99 | // Since m_length && !m_buffer, the string must be valid in m_string, and m_string.length() > 0. |
100 | ASSERT(!m_string.isEmpty()); |
101 | ASSERT(oldLength == m_string.length()); |
102 | ASSERT(newSize < m_string.length()); |
103 | m_string = StringImpl::createSubstringSharingImpl(*m_string.impl(), 0, newSize); |
104 | } |
105 | |
106 | // Allocate a new 8 bit buffer, copying in currentCharacters (these may come from either m_string |
107 | // or m_buffer, neither will be reassigned until the copy has completed). |
108 | void StringBuilder::allocateBuffer(const LChar* currentCharacters, unsigned requiredLength) |
109 | { |
110 | ASSERT(!hasOverflowed()); |
111 | ASSERT(m_is8Bit); |
112 | // Copy the existing data into a new buffer, set result to point to the end of the existing data. |
113 | auto buffer = StringImpl::tryCreateUninitialized(requiredLength, m_bufferCharacters8); |
114 | if (UNLIKELY(!buffer)) |
115 | return didOverflow(); |
116 | std::memcpy(m_bufferCharacters8, currentCharacters, m_length.unsafeGet()); |
117 | |
118 | // Update the builder state. |
119 | m_buffer = WTFMove(buffer); |
120 | m_string = String(); |
121 | ASSERT(m_buffer->length() == requiredLength); |
122 | } |
123 | |
124 | // Allocate a new 16 bit buffer, copying in currentCharacters (these may come from either m_string |
125 | // or m_buffer, neither will be reassigned until the copy has completed). |
126 | void StringBuilder::allocateBuffer(const UChar* currentCharacters, unsigned requiredLength) |
127 | { |
128 | ASSERT(!hasOverflowed()); |
129 | ASSERT(!m_is8Bit); |
130 | // Copy the existing data into a new buffer, set result to point to the end of the existing data. |
131 | auto buffer = StringImpl::tryCreateUninitialized(requiredLength, m_bufferCharacters16); |
132 | if (UNLIKELY(!buffer)) |
133 | return didOverflow(); |
134 | std::memcpy(m_bufferCharacters16, currentCharacters, static_cast<size_t>(m_length.unsafeGet()) * sizeof(UChar)); // This can't overflow. |
135 | |
136 | // Update the builder state. |
137 | m_buffer = WTFMove(buffer); |
138 | m_string = String(); |
139 | ASSERT(m_buffer->length() == requiredLength); |
140 | } |
141 | |
142 | // Allocate a new 16 bit buffer, copying in currentCharacters (which is 8 bit and may come |
143 | // from either m_string or m_buffer, neither will be reassigned until the copy has completed). |
144 | void StringBuilder::allocateBufferUpConvert(const LChar* currentCharacters, unsigned requiredLength) |
145 | { |
146 | ASSERT(!hasOverflowed()); |
147 | ASSERT(m_is8Bit); |
148 | unsigned length = m_length.unsafeGet(); |
149 | ASSERT(requiredLength <= maxCapacity && requiredLength >= length); |
150 | // Copy the existing data into a new buffer, set result to point to the end of the existing data. |
151 | auto buffer = StringImpl::tryCreateUninitialized(requiredLength, m_bufferCharacters16); |
152 | if (UNLIKELY(!buffer)) |
153 | return didOverflow(); // Treat a failure to allcoate as an overflow. |
154 | for (unsigned i = 0; i < length; ++i) |
155 | m_bufferCharacters16[i] = currentCharacters[i]; |
156 | |
157 | m_is8Bit = false; |
158 | |
159 | // Update the builder state. |
160 | m_buffer = WTFMove(buffer); |
161 | m_string = String(); |
162 | ASSERT(m_buffer->length() == requiredLength); |
163 | } |
164 | |
165 | template<> |
166 | void StringBuilder::reallocateBuffer<LChar>(unsigned requiredLength) |
167 | { |
168 | // If the buffer has only one ref (by this StringBuilder), reallocate it, |
169 | // otherwise fall back to "allocate and copy" method. |
170 | m_string = String(); |
171 | |
172 | ASSERT(m_is8Bit); |
173 | ASSERT(m_buffer->is8Bit()); |
174 | |
175 | if (m_buffer->hasOneRef()) { |
176 | auto expectedStringImpl = StringImpl::tryReallocate(m_buffer.releaseNonNull(), requiredLength, m_bufferCharacters8); |
177 | if (UNLIKELY(!expectedStringImpl)) |
178 | return didOverflow(); |
179 | m_buffer = WTFMove(expectedStringImpl.value()); |
180 | } else |
181 | allocateBuffer(m_buffer->characters8(), requiredLength); |
182 | ASSERT(hasOverflowed() || m_buffer->length() == requiredLength); |
183 | } |
184 | |
185 | template<> |
186 | void StringBuilder::reallocateBuffer<UChar>(unsigned requiredLength) |
187 | { |
188 | // If the buffer has only one ref (by this StringBuilder), reallocate it, |
189 | // otherwise fall back to "allocate and copy" method. |
190 | m_string = String(); |
191 | |
192 | if (m_buffer->is8Bit()) |
193 | allocateBufferUpConvert(m_buffer->characters8(), requiredLength); |
194 | else if (m_buffer->hasOneRef()) { |
195 | auto expectedStringImpl = StringImpl::tryReallocate(m_buffer.releaseNonNull(), requiredLength, m_bufferCharacters16); |
196 | if (UNLIKELY(!expectedStringImpl)) |
197 | return didOverflow(); |
198 | m_buffer = WTFMove(expectedStringImpl.value()); |
199 | } else |
200 | allocateBuffer(m_buffer->characters16(), requiredLength); |
201 | ASSERT(hasOverflowed() || m_buffer->length() == requiredLength); |
202 | } |
203 | |
204 | void StringBuilder::reserveCapacity(unsigned newCapacity) |
205 | { |
206 | if (hasOverflowed()) |
207 | return; |
208 | ASSERT(newCapacity <= String::MaxLength); |
209 | if (m_buffer) { |
210 | // If there is already a buffer, then grow if necessary. |
211 | if (newCapacity > m_buffer->length()) { |
212 | if (m_buffer->is8Bit()) |
213 | reallocateBuffer<LChar>(newCapacity); |
214 | else |
215 | reallocateBuffer<UChar>(newCapacity); |
216 | } |
217 | } else { |
218 | // Grow the string, if necessary. |
219 | unsigned length = m_length.unsafeGet(); |
220 | if (newCapacity > length) { |
221 | if (!length) { |
222 | LChar* nullPlaceholder = nullptr; |
223 | allocateBuffer(nullPlaceholder, newCapacity); |
224 | } else if (m_string.is8Bit()) |
225 | allocateBuffer(m_string.characters8(), newCapacity); |
226 | else |
227 | allocateBuffer(m_string.characters16(), newCapacity); |
228 | } |
229 | } |
230 | ASSERT(hasOverflowed() || !newCapacity || m_buffer->length() >= newCapacity); |
231 | } |
232 | |
233 | // Make 'additionalLength' additional capacity be available in m_buffer, update m_string & m_length, |
234 | // return a pointer to the newly allocated storage. |
235 | // Returns nullptr if the size of the new builder would have overflowed |
236 | template<typename CharacterType> ALWAYS_INLINE CharacterType* StringBuilder::extendBufferForAppending(unsigned additionalLength) |
237 | { |
238 | ASSERT(additionalLength); |
239 | |
240 | // Calculate the new size of the builder after appending. |
241 | CheckedInt32 requiredLength = m_length + additionalLength; |
242 | if (requiredLength.hasOverflowed()) { |
243 | didOverflow(); |
244 | return nullptr; |
245 | } |
246 | |
247 | return extendBufferForAppendingWithoutOverflowCheck<CharacterType>(requiredLength); |
248 | } |
249 | |
250 | template<typename CharacterType> ALWAYS_INLINE CharacterType* StringBuilder::extendBufferForAppendingWithoutOverflowCheck(CheckedInt32 requiredLength) |
251 | { |
252 | ASSERT(!requiredLength.hasOverflowed()); |
253 | |
254 | if (m_buffer && (requiredLength.unsafeGet<unsigned>() <= m_buffer->length())) { |
255 | // If the buffer is valid it must be at least as long as the current builder contents! |
256 | ASSERT(m_buffer->length() >= m_length.unsafeGet<unsigned>()); |
257 | unsigned currentLength = m_length.unsafeGet(); |
258 | m_string = String(); |
259 | m_length = requiredLength; |
260 | return getBufferCharacters<CharacterType>() + currentLength; |
261 | } |
262 | |
263 | return extendBufferForAppendingSlowCase<CharacterType>(requiredLength.unsafeGet()); |
264 | } |
265 | |
266 | LChar* StringBuilder::extendBufferForAppending8(CheckedInt32 requiredLength) |
267 | { |
268 | if (UNLIKELY(requiredLength.hasOverflowed())) { |
269 | didOverflow(); |
270 | return nullptr; |
271 | } |
272 | return extendBufferForAppendingWithoutOverflowCheck<LChar>(requiredLength); |
273 | } |
274 | |
275 | UChar* StringBuilder::extendBufferForAppending16(CheckedInt32 requiredLength) |
276 | { |
277 | if (UNLIKELY(requiredLength.hasOverflowed())) { |
278 | didOverflow(); |
279 | return nullptr; |
280 | } |
281 | if (m_is8Bit) { |
282 | const LChar* characters; |
283 | if (m_buffer) { |
284 | ASSERT(m_buffer->length() >= m_length.unsafeGet<unsigned>()); |
285 | characters = m_buffer->characters8(); |
286 | } else { |
287 | ASSERT(m_string.length() == m_length.unsafeGet<unsigned>()); |
288 | characters = m_string.isNull() ? nullptr : m_string.characters8(); |
289 | } |
290 | allocateBufferUpConvert(characters, expandedCapacity(capacity(), requiredLength.unsafeGet())); |
291 | if (UNLIKELY(hasOverflowed())) |
292 | return nullptr; |
293 | unsigned oldLength = m_length.unsafeGet(); |
294 | m_length = requiredLength.unsafeGet(); |
295 | return m_bufferCharacters16 + oldLength; |
296 | } |
297 | return extendBufferForAppendingWithoutOverflowCheck<UChar>(requiredLength); |
298 | } |
299 | |
300 | // Make 'requiredLength' capacity be available in m_buffer, update m_string & m_length, |
301 | // return a pointer to the newly allocated storage. |
302 | template<typename CharacterType> CharacterType* StringBuilder::extendBufferForAppendingSlowCase(unsigned requiredLength) |
303 | { |
304 | ASSERT(!hasOverflowed()); |
305 | ASSERT(requiredLength); |
306 | |
307 | if (m_buffer) { |
308 | // If the buffer is valid it must be at least as long as the current builder contents! |
309 | ASSERT(m_buffer->length() >= m_length.unsafeGet<unsigned>()); |
310 | |
311 | reallocateBuffer<CharacterType>(expandedCapacity(capacity(), requiredLength)); |
312 | } else { |
313 | ASSERT(m_string.length() == m_length.unsafeGet<unsigned>()); |
314 | allocateBuffer(m_length ? m_string.characters<CharacterType>() : nullptr, expandedCapacity(capacity(), requiredLength)); |
315 | } |
316 | if (UNLIKELY(hasOverflowed())) |
317 | return nullptr; |
318 | |
319 | CharacterType* result = getBufferCharacters<CharacterType>() + m_length.unsafeGet(); |
320 | m_length = requiredLength; |
321 | ASSERT(!hasOverflowed()); |
322 | ASSERT(m_buffer->length() >= m_length.unsafeGet<unsigned>()); |
323 | return result; |
324 | } |
325 | |
326 | void StringBuilder::appendCharacters(const UChar* characters, unsigned length) |
327 | { |
328 | if (!length || hasOverflowed()) |
329 | return; |
330 | |
331 | ASSERT(characters); |
332 | |
333 | if (m_is8Bit && length == 1 && isLatin1(characters[0])) { |
334 | append(static_cast<LChar>(characters[0])); |
335 | return; |
336 | } |
337 | |
338 | // FIXME: Should we optimize memory by keeping the string 8-bit when all the characters are Latin-1? |
339 | |
340 | UChar* destination = extendBufferForAppending16(m_length + length); |
341 | if (UNLIKELY(!destination)) |
342 | return; |
343 | std::memcpy(destination, characters, static_cast<size_t>(length) * sizeof(UChar)); |
344 | ASSERT(!hasOverflowed()); |
345 | ASSERT(m_buffer->length() >= m_length.unsafeGet<unsigned>()); |
346 | } |
347 | |
348 | void StringBuilder::appendCharacters(const LChar* characters, unsigned length) |
349 | { |
350 | if (!length || hasOverflowed()) |
351 | return; |
352 | |
353 | ASSERT(characters); |
354 | |
355 | if (m_is8Bit) { |
356 | LChar* destination = extendBufferForAppending<LChar>(length); |
357 | if (!destination) { |
358 | ASSERT(hasOverflowed()); |
359 | return; |
360 | } |
361 | if (length > 8) |
362 | std::memcpy(destination, characters, length); |
363 | else { |
364 | // FIXME: How strong is our evidence that this is faster than memcpy? What platforms is this true for? |
365 | const LChar* end = characters + length; |
366 | while (characters < end) |
367 | *destination++ = *characters++; |
368 | } |
369 | } else { |
370 | UChar* destination = extendBufferForAppending<UChar>(length); |
371 | if (!destination) { |
372 | ASSERT(hasOverflowed()); |
373 | return; |
374 | } |
375 | const LChar* end = characters + length; |
376 | while (characters < end) |
377 | *destination++ = *characters++; |
378 | } |
379 | } |
380 | |
381 | #if USE(CF) |
382 | |
383 | void StringBuilder::append(CFStringRef string) |
384 | { |
385 | // Fast path: avoid constructing a temporary String when possible. |
386 | if (auto* characters = CFStringGetCStringPtr(string, kCFStringEncodingISOLatin1)) { |
387 | appendCharacters(reinterpret_cast<const LChar*>(characters), CFStringGetLength(string)); |
388 | return; |
389 | } |
390 | append(String(string)); |
391 | } |
392 | |
393 | #endif |
394 | |
395 | void StringBuilder::appendNumber(int number) |
396 | { |
397 | numberToStringSigned<StringBuilder>(number, this); |
398 | } |
399 | |
400 | void StringBuilder::appendNumber(unsigned number) |
401 | { |
402 | numberToStringUnsigned<StringBuilder>(number, this); |
403 | } |
404 | |
405 | void StringBuilder::appendNumber(long number) |
406 | { |
407 | numberToStringSigned<StringBuilder>(number, this); |
408 | } |
409 | |
410 | void StringBuilder::appendNumber(unsigned long number) |
411 | { |
412 | numberToStringUnsigned<StringBuilder>(number, this); |
413 | } |
414 | |
415 | void StringBuilder::appendNumber(long long number) |
416 | { |
417 | numberToStringSigned<StringBuilder>(number, this); |
418 | } |
419 | |
420 | void StringBuilder::appendNumber(unsigned long long number) |
421 | { |
422 | numberToStringUnsigned<StringBuilder>(number, this); |
423 | } |
424 | |
425 | void StringBuilder::appendFixedPrecisionNumber(float number, unsigned precision, TrailingZerosTruncatingPolicy policy) |
426 | { |
427 | NumberToStringBuffer buffer; |
428 | append(numberToFixedPrecisionString(number, precision, buffer, policy == TruncateTrailingZeros)); |
429 | } |
430 | |
431 | void StringBuilder::appendFixedPrecisionNumber(double number, unsigned precision, TrailingZerosTruncatingPolicy policy) |
432 | { |
433 | NumberToStringBuffer buffer; |
434 | append(numberToFixedPrecisionString(number, precision, buffer, policy == TruncateTrailingZeros)); |
435 | } |
436 | |
437 | void StringBuilder::appendNumber(float number) |
438 | { |
439 | NumberToStringBuffer buffer; |
440 | append(numberToString(number, buffer)); |
441 | } |
442 | |
443 | void StringBuilder::appendNumber(double number) |
444 | { |
445 | NumberToStringBuffer buffer; |
446 | append(numberToString(number, buffer)); |
447 | } |
448 | |
449 | void StringBuilder::appendFixedWidthNumber(float number, unsigned decimalPlaces) |
450 | { |
451 | NumberToStringBuffer buffer; |
452 | append(numberToFixedWidthString(number, decimalPlaces, buffer)); |
453 | } |
454 | |
455 | void StringBuilder::appendFixedWidthNumber(double number, unsigned decimalPlaces) |
456 | { |
457 | NumberToStringBuffer buffer; |
458 | append(numberToFixedWidthString(number, decimalPlaces, buffer)); |
459 | } |
460 | |
461 | bool StringBuilder::canShrink() const |
462 | { |
463 | if (hasOverflowed()) |
464 | return false; |
465 | // Only shrink the buffer if it's less than 80% full. |
466 | // FIXME: We should tune this heuristic based some actual test case measurements. |
467 | unsigned length = m_length.unsafeGet(); |
468 | return m_buffer && m_buffer->length() > (length + (length >> 2)); |
469 | } |
470 | |
471 | void StringBuilder::shrinkToFit() |
472 | { |
473 | if (canShrink()) { |
474 | if (m_is8Bit) |
475 | reallocateBuffer<LChar>(m_length.unsafeGet()); |
476 | else |
477 | reallocateBuffer<UChar>(m_length.unsafeGet()); |
478 | ASSERT(!hasOverflowed()); |
479 | m_string = WTFMove(m_buffer); |
480 | } |
481 | } |
482 | |
483 | } // namespace WTF |
484 | |