1 | /* |
2 | |
3 | Copyright (C) 2014-2019 Apple Inc. All rights reserved. |
4 | |
5 | Redistribution and use in source and binary forms, with or without |
6 | modification, are permitted provided that the following conditions |
7 | are met: |
8 | 1. Redistributions of source code must retain the above copyright |
9 | notice, this list of conditions and the following disclaimer. |
10 | 2. Redistributions in binary form must reproduce the above copyright |
11 | notice, this list of conditions and the following disclaimer in the |
12 | documentation and/or other materials provided with the distribution. |
13 | |
14 | THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY |
15 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
16 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
17 | DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY |
18 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
19 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
20 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON |
21 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
22 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
23 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
24 | |
25 | */ |
26 | |
27 | #include "config.h" |
28 | #include <wtf/text/StringView.h> |
29 | |
30 | #include <mutex> |
31 | #include <unicode/ubrk.h> |
32 | #include <unicode/unorm2.h> |
33 | #include <wtf/HashMap.h> |
34 | #include <wtf/Lock.h> |
35 | #include <wtf/NeverDestroyed.h> |
36 | #include <wtf/Optional.h> |
37 | #include <wtf/text/TextBreakIterator.h> |
38 | |
39 | namespace WTF { |
40 | |
41 | bool StringView::containsIgnoringASCIICase(const StringView& matchString) const |
42 | { |
43 | return findIgnoringASCIICase(matchString) != notFound; |
44 | } |
45 | |
46 | bool StringView::containsIgnoringASCIICase(const StringView& matchString, unsigned startOffset) const |
47 | { |
48 | return findIgnoringASCIICase(matchString, startOffset) != notFound; |
49 | } |
50 | |
51 | size_t StringView::findIgnoringASCIICase(const StringView& matchString) const |
52 | { |
53 | return ::WTF::findIgnoringASCIICase(*this, matchString, 0); |
54 | } |
55 | |
56 | size_t StringView::findIgnoringASCIICase(const StringView& matchString, unsigned startOffset) const |
57 | { |
58 | return ::WTF::findIgnoringASCIICase(*this, matchString, startOffset); |
59 | } |
60 | |
61 | bool StringView::startsWith(UChar character) const |
62 | { |
63 | return m_length && (*this)[0] == character; |
64 | } |
65 | |
66 | bool StringView::startsWith(const StringView& prefix) const |
67 | { |
68 | return ::WTF::startsWith(*this, prefix); |
69 | } |
70 | |
71 | bool StringView::startsWithIgnoringASCIICase(const StringView& prefix) const |
72 | { |
73 | return ::WTF::startsWithIgnoringASCIICase(*this, prefix); |
74 | } |
75 | |
76 | bool StringView::endsWith(const StringView& suffix) const |
77 | { |
78 | return ::WTF::endsWith(*this, suffix); |
79 | } |
80 | |
81 | bool StringView::endsWithIgnoringASCIICase(const StringView& suffix) const |
82 | { |
83 | return ::WTF::endsWithIgnoringASCIICase(*this, suffix); |
84 | } |
85 | |
86 | Expected<CString, UTF8ConversionError> StringView::tryGetUtf8(ConversionMode mode) const |
87 | { |
88 | if (isNull()) |
89 | return CString("" , 0); |
90 | if (is8Bit()) |
91 | return StringImpl::utf8ForCharacters(characters8(), length()); |
92 | return StringImpl::utf8ForCharacters(characters16(), length(), mode); |
93 | } |
94 | |
95 | CString StringView::utf8(ConversionMode mode) const |
96 | { |
97 | auto expectedString = tryGetUtf8(mode); |
98 | RELEASE_ASSERT(expectedString); |
99 | return expectedString.value(); |
100 | } |
101 | |
102 | size_t StringView::find(StringView matchString, unsigned start) const |
103 | { |
104 | return findCommon(*this, matchString, start); |
105 | } |
106 | |
107 | void StringView::SplitResult::Iterator::findNextSubstring() |
108 | { |
109 | for (size_t separatorPosition; (separatorPosition = m_result.m_string.find(m_result.m_separator, m_position)) != notFound; ++m_position) { |
110 | if (m_result.m_allowEmptyEntries || separatorPosition > m_position) { |
111 | m_length = separatorPosition - m_position; |
112 | return; |
113 | } |
114 | } |
115 | m_length = m_result.m_string.length() - m_position; |
116 | if (!m_length && !m_result.m_allowEmptyEntries) |
117 | m_isDone = true; |
118 | } |
119 | |
120 | auto StringView::SplitResult::Iterator::operator++() -> Iterator& |
121 | { |
122 | ASSERT(m_position <= m_result.m_string.length() && !m_isDone); |
123 | m_position += m_length; |
124 | if (m_position < m_result.m_string.length()) { |
125 | ++m_position; |
126 | findNextSubstring(); |
127 | } else if (!m_isDone) |
128 | m_isDone = true; |
129 | return *this; |
130 | } |
131 | |
132 | class StringView::GraphemeClusters::Iterator::Impl { |
133 | WTF_MAKE_FAST_ALLOCATED; |
134 | public: |
135 | Impl(const StringView& stringView, Optional<NonSharedCharacterBreakIterator>&& iterator, unsigned index) |
136 | : m_stringView(stringView) |
137 | , m_iterator(WTFMove(iterator)) |
138 | , m_index(index) |
139 | , m_indexEnd(computeIndexEnd()) |
140 | { |
141 | } |
142 | |
143 | void operator++() |
144 | { |
145 | ASSERT(m_indexEnd > m_index); |
146 | m_index = m_indexEnd; |
147 | m_indexEnd = computeIndexEnd(); |
148 | } |
149 | |
150 | StringView operator*() const |
151 | { |
152 | if (m_stringView.is8Bit()) |
153 | return StringView(m_stringView.characters8() + m_index, m_indexEnd - m_index); |
154 | return StringView(m_stringView.characters16() + m_index, m_indexEnd - m_index); |
155 | } |
156 | |
157 | bool operator==(const Impl& other) const |
158 | { |
159 | ASSERT(&m_stringView == &other.m_stringView); |
160 | auto result = m_index == other.m_index; |
161 | ASSERT(!result || m_indexEnd == other.m_indexEnd); |
162 | return result; |
163 | } |
164 | |
165 | unsigned computeIndexEnd() |
166 | { |
167 | if (!m_iterator) |
168 | return 0; |
169 | if (m_index == m_stringView.length()) |
170 | return m_index; |
171 | return ubrk_following(m_iterator.value(), m_index); |
172 | } |
173 | |
174 | private: |
175 | const StringView& m_stringView; |
176 | Optional<NonSharedCharacterBreakIterator> m_iterator; |
177 | unsigned m_index; |
178 | unsigned m_indexEnd; |
179 | }; |
180 | |
181 | StringView::GraphemeClusters::Iterator::Iterator(const StringView& stringView, unsigned index) |
182 | : m_impl(makeUnique<Impl>(stringView, stringView.isNull() ? WTF::nullopt : Optional<NonSharedCharacterBreakIterator>(NonSharedCharacterBreakIterator(stringView)), index)) |
183 | { |
184 | } |
185 | |
186 | StringView::GraphemeClusters::Iterator::~Iterator() |
187 | { |
188 | } |
189 | |
190 | StringView::GraphemeClusters::Iterator::Iterator(Iterator&& other) |
191 | : m_impl(WTFMove(other.m_impl)) |
192 | { |
193 | } |
194 | |
195 | auto StringView::GraphemeClusters::Iterator::operator++() -> Iterator& |
196 | { |
197 | ++(*m_impl); |
198 | return *this; |
199 | } |
200 | |
201 | StringView StringView::GraphemeClusters::Iterator::operator*() const |
202 | { |
203 | return **m_impl; |
204 | } |
205 | |
206 | bool StringView::GraphemeClusters::Iterator::operator==(const Iterator& other) const |
207 | { |
208 | return *m_impl == *(other.m_impl); |
209 | } |
210 | |
211 | bool StringView::GraphemeClusters::Iterator::operator!=(const Iterator& other) const |
212 | { |
213 | return !(*this == other); |
214 | } |
215 | |
216 | enum class ASCIICase { Lower, Upper }; |
217 | |
218 | template<ASCIICase type, typename CharacterType> |
219 | String convertASCIICase(const CharacterType* input, unsigned length) |
220 | { |
221 | if (!input) |
222 | return { }; |
223 | |
224 | CharacterType* characters; |
225 | auto result = String::createUninitialized(length, characters); |
226 | for (unsigned i = 0; i < length; ++i) |
227 | characters[i] = type == ASCIICase::Lower ? toASCIILower(input[i]) : toASCIIUpper(input[i]); |
228 | return result; |
229 | } |
230 | |
231 | String StringView::convertToASCIILowercase() const |
232 | { |
233 | if (m_is8Bit) |
234 | return convertASCIICase<ASCIICase::Lower>(static_cast<const LChar*>(m_characters), m_length); |
235 | return convertASCIICase<ASCIICase::Lower>(static_cast<const UChar*>(m_characters), m_length); |
236 | } |
237 | |
238 | String StringView::convertToASCIIUppercase() const |
239 | { |
240 | if (m_is8Bit) |
241 | return convertASCIICase<ASCIICase::Upper>(static_cast<const LChar*>(m_characters), m_length); |
242 | return convertASCIICase<ASCIICase::Upper>(static_cast<const UChar*>(m_characters), m_length); |
243 | } |
244 | |
245 | StringViewWithUnderlyingString normalizedNFC(StringView string) |
246 | { |
247 | // Latin-1 characters are unaffected by normalization. |
248 | if (string.is8Bit()) |
249 | return { string, { } }; |
250 | |
251 | UErrorCode status = U_ZERO_ERROR; |
252 | const UNormalizer2* normalizer = unorm2_getNFCInstance(&status); |
253 | ASSERT(U_SUCCESS(status)); |
254 | |
255 | // No need to normalize if already normalized. |
256 | UBool checkResult = unorm2_isNormalized(normalizer, string.characters16(), string.length(), &status); |
257 | if (checkResult) |
258 | return { string, { } }; |
259 | |
260 | unsigned normalizedLength = unorm2_normalize(normalizer, string.characters16(), string.length(), nullptr, 0, &status); |
261 | ASSERT(status == U_BUFFER_OVERFLOW_ERROR); |
262 | |
263 | UChar* characters; |
264 | String result = String::createUninitialized(normalizedLength, characters); |
265 | |
266 | status = U_ZERO_ERROR; |
267 | unorm2_normalize(normalizer, string.characters16(), string.length(), characters, normalizedLength, &status); |
268 | ASSERT(U_SUCCESS(status)); |
269 | |
270 | StringView view { result }; |
271 | return { view, WTFMove(result) }; |
272 | } |
273 | |
274 | String normalizedNFC(const String& string) |
275 | { |
276 | auto result = normalizedNFC(StringView { string }); |
277 | if (result.underlyingString.isNull()) |
278 | return string; |
279 | return result.underlyingString; |
280 | } |
281 | |
282 | #if CHECK_STRINGVIEW_LIFETIME |
283 | |
284 | // Manage reference count manually so UnderlyingString does not need to be defined in the header. |
285 | |
286 | struct StringView::UnderlyingString { |
287 | WTF_MAKE_STRUCT_FAST_ALLOCATED; |
288 | std::atomic_uint refCount { 1u }; |
289 | bool isValid { true }; |
290 | const StringImpl& string; |
291 | explicit UnderlyingString(const StringImpl&); |
292 | }; |
293 | |
294 | StringView::UnderlyingString::UnderlyingString(const StringImpl& string) |
295 | : string(string) |
296 | { |
297 | } |
298 | |
299 | static Lock underlyingStringsMutex; |
300 | |
301 | static HashMap<const StringImpl*, StringView::UnderlyingString*>& underlyingStrings() |
302 | { |
303 | static NeverDestroyed<HashMap<const StringImpl*, StringView::UnderlyingString*>> map; |
304 | return map; |
305 | } |
306 | |
307 | void StringView::invalidate(const StringImpl& stringToBeDestroyed) |
308 | { |
309 | UnderlyingString* underlyingString; |
310 | { |
311 | std::lock_guard<Lock> lock(underlyingStringsMutex); |
312 | underlyingString = underlyingStrings().take(&stringToBeDestroyed); |
313 | if (!underlyingString) |
314 | return; |
315 | } |
316 | ASSERT(underlyingString->isValid); |
317 | underlyingString->isValid = false; |
318 | } |
319 | |
320 | bool StringView::underlyingStringIsValid() const |
321 | { |
322 | return !m_underlyingString || m_underlyingString->isValid; |
323 | } |
324 | |
325 | void StringView::adoptUnderlyingString(UnderlyingString* underlyingString) |
326 | { |
327 | if (m_underlyingString) { |
328 | std::lock_guard<Lock> lock(underlyingStringsMutex); |
329 | if (!--m_underlyingString->refCount) { |
330 | if (m_underlyingString->isValid) { |
331 | underlyingStrings().remove(&m_underlyingString->string); |
332 | } |
333 | delete m_underlyingString; |
334 | } |
335 | } |
336 | m_underlyingString = underlyingString; |
337 | } |
338 | |
339 | void StringView::setUnderlyingString(const StringImpl* string) |
340 | { |
341 | UnderlyingString* underlyingString; |
342 | if (!string) |
343 | underlyingString = nullptr; |
344 | else { |
345 | std::lock_guard<Lock> lock(underlyingStringsMutex); |
346 | auto result = underlyingStrings().add(string, nullptr); |
347 | if (result.isNewEntry) |
348 | result.iterator->value = new UnderlyingString(*string); |
349 | else |
350 | ++result.iterator->value->refCount; |
351 | underlyingString = result.iterator->value; |
352 | } |
353 | adoptUnderlyingString(underlyingString); |
354 | } |
355 | |
356 | void StringView::setUnderlyingString(const StringView& otherString) |
357 | { |
358 | UnderlyingString* underlyingString = otherString.m_underlyingString; |
359 | if (underlyingString) |
360 | ++underlyingString->refCount; |
361 | adoptUnderlyingString(underlyingString); |
362 | } |
363 | |
364 | #endif // CHECK_STRINGVIEW_LIFETIME |
365 | |
366 | } // namespace WTF |
367 | |