1 | /* |
2 | |
3 | Copyright (C) 2014-2019 Apple Inc. All rights reserved. |
4 | |
5 | Redistribution and use in source and binary forms, with or without |
6 | modification, are permitted provided that the following conditions |
7 | are met: |
8 | 1. Redistributions of source code must retain the above copyright |
9 | notice, this list of conditions and the following disclaimer. |
10 | 2. Redistributions in binary form must reproduce the above copyright |
11 | notice, this list of conditions and the following disclaimer in the |
12 | documentation and/or other materials provided with the distribution. |
13 | |
14 | THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY |
15 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
16 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
17 | DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY |
18 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
19 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
20 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON |
21 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
22 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
23 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
24 | |
25 | */ |
26 | |
27 | #include "config.h" |
28 | #include <wtf/text/StringView.h> |
29 | |
30 | #include <mutex> |
31 | #include <unicode/ubrk.h> |
32 | #include <unicode/unorm2.h> |
33 | #include <wtf/HashMap.h> |
34 | #include <wtf/Lock.h> |
35 | #include <wtf/NeverDestroyed.h> |
36 | #include <wtf/Optional.h> |
37 | #include <wtf/text/TextBreakIterator.h> |
38 | |
39 | namespace WTF { |
40 | |
41 | bool StringView::containsIgnoringASCIICase(const StringView& matchString) const |
42 | { |
43 | return findIgnoringASCIICase(matchString) != notFound; |
44 | } |
45 | |
46 | bool StringView::containsIgnoringASCIICase(const StringView& matchString, unsigned startOffset) const |
47 | { |
48 | return findIgnoringASCIICase(matchString, startOffset) != notFound; |
49 | } |
50 | |
51 | size_t StringView::findIgnoringASCIICase(const StringView& matchString) const |
52 | { |
53 | return ::WTF::findIgnoringASCIICase(*this, matchString, 0); |
54 | } |
55 | |
56 | size_t StringView::findIgnoringASCIICase(const StringView& matchString, unsigned startOffset) const |
57 | { |
58 | return ::WTF::findIgnoringASCIICase(*this, matchString, startOffset); |
59 | } |
60 | |
61 | bool StringView::startsWith(UChar character) const |
62 | { |
63 | return m_length && (*this)[0] == character; |
64 | } |
65 | |
66 | bool StringView::startsWith(const StringView& prefix) const |
67 | { |
68 | return ::WTF::startsWith(*this, prefix); |
69 | } |
70 | |
71 | bool StringView::startsWithIgnoringASCIICase(const StringView& prefix) const |
72 | { |
73 | return ::WTF::startsWithIgnoringASCIICase(*this, prefix); |
74 | } |
75 | |
76 | bool StringView::endsWith(const StringView& suffix) const |
77 | { |
78 | return ::WTF::endsWith(*this, suffix); |
79 | } |
80 | |
81 | bool StringView::endsWithIgnoringASCIICase(const StringView& suffix) const |
82 | { |
83 | return ::WTF::endsWithIgnoringASCIICase(*this, suffix); |
84 | } |
85 | |
86 | Expected<CString, UTF8ConversionError> StringView::tryGetUtf8(ConversionMode mode) const |
87 | { |
88 | if (isNull()) |
89 | return CString("" , 0); |
90 | if (is8Bit()) |
91 | return StringImpl::utf8ForCharacters(characters8(), length()); |
92 | return StringImpl::utf8ForCharacters(characters16(), length(), mode); |
93 | } |
94 | |
95 | CString StringView::utf8(ConversionMode mode) const |
96 | { |
97 | auto expectedString = tryGetUtf8(mode); |
98 | RELEASE_ASSERT(expectedString); |
99 | return expectedString.value(); |
100 | } |
101 | |
102 | size_t StringView::find(StringView matchString, unsigned start) const |
103 | { |
104 | return findCommon(*this, matchString, start); |
105 | } |
106 | |
107 | void StringView::SplitResult::Iterator::findNextSubstring() |
108 | { |
109 | for (size_t separatorPosition; (separatorPosition = m_result.m_string.find(m_result.m_separator, m_position)) != notFound; ++m_position) { |
110 | if (m_result.m_allowEmptyEntries || separatorPosition > m_position) { |
111 | m_length = separatorPosition - m_position; |
112 | return; |
113 | } |
114 | } |
115 | m_length = m_result.m_string.length() - m_position; |
116 | if (!m_length && !m_result.m_allowEmptyEntries) |
117 | m_isDone = true; |
118 | } |
119 | |
120 | auto StringView::SplitResult::Iterator::operator++() -> Iterator& |
121 | { |
122 | ASSERT(m_position <= m_result.m_string.length() && !m_isDone); |
123 | m_position += m_length; |
124 | if (m_position < m_result.m_string.length()) { |
125 | ++m_position; |
126 | findNextSubstring(); |
127 | } else if (!m_isDone) |
128 | m_isDone = true; |
129 | return *this; |
130 | } |
131 | |
132 | class StringView::GraphemeClusters::Iterator::Impl { |
133 | WTF_MAKE_FAST_ALLOCATED; |
134 | public: |
135 | Impl(const StringView& stringView, Optional<NonSharedCharacterBreakIterator>&& iterator, unsigned index) |
136 | : m_stringView(stringView) |
137 | , m_iterator(WTFMove(iterator)) |
138 | , m_index(index) |
139 | , m_indexEnd(computeIndexEnd()) |
140 | { |
141 | } |
142 | |
143 | void operator++() |
144 | { |
145 | ASSERT(m_indexEnd > m_index); |
146 | m_index = m_indexEnd; |
147 | m_indexEnd = computeIndexEnd(); |
148 | } |
149 | |
150 | StringView operator*() const |
151 | { |
152 | if (m_stringView.is8Bit()) |
153 | return StringView(m_stringView.characters8() + m_index, m_indexEnd - m_index); |
154 | return StringView(m_stringView.characters16() + m_index, m_indexEnd - m_index); |
155 | } |
156 | |
157 | bool operator==(const Impl& other) const |
158 | { |
159 | ASSERT(&m_stringView == &other.m_stringView); |
160 | auto result = m_index == other.m_index; |
161 | ASSERT(!result || m_indexEnd == other.m_indexEnd); |
162 | return result; |
163 | } |
164 | |
165 | unsigned computeIndexEnd() |
166 | { |
167 | if (!m_iterator) |
168 | return 0; |
169 | if (m_index == m_stringView.length()) |
170 | return m_index; |
171 | return ubrk_following(m_iterator.value(), m_index); |
172 | } |
173 | |
174 | private: |
175 | const StringView& m_stringView; |
176 | Optional<NonSharedCharacterBreakIterator> m_iterator; |
177 | unsigned m_index; |
178 | unsigned m_indexEnd; |
179 | }; |
180 | |
181 | StringView::GraphemeClusters::Iterator::Iterator(const StringView& stringView, unsigned index) |
182 | : m_impl(std::make_unique<Impl>(stringView, stringView.isNull() ? WTF::nullopt : Optional<NonSharedCharacterBreakIterator>(NonSharedCharacterBreakIterator(stringView)), index)) |
183 | { |
184 | } |
185 | |
186 | StringView::GraphemeClusters::Iterator::~Iterator() |
187 | { |
188 | } |
189 | |
190 | StringView::GraphemeClusters::Iterator::Iterator(Iterator&& other) |
191 | : m_impl(WTFMove(other.m_impl)) |
192 | { |
193 | } |
194 | |
195 | auto StringView::GraphemeClusters::Iterator::operator++() -> Iterator& |
196 | { |
197 | ++(*m_impl); |
198 | return *this; |
199 | } |
200 | |
201 | StringView StringView::GraphemeClusters::Iterator::operator*() const |
202 | { |
203 | return **m_impl; |
204 | } |
205 | |
206 | bool StringView::GraphemeClusters::Iterator::operator==(const Iterator& other) const |
207 | { |
208 | return *m_impl == *(other.m_impl); |
209 | } |
210 | |
211 | bool StringView::GraphemeClusters::Iterator::operator!=(const Iterator& other) const |
212 | { |
213 | return !(*this == other); |
214 | } |
215 | |
216 | enum class ASCIICase { Lower, Upper }; |
217 | |
218 | template<ASCIICase type, typename CharacterType> |
219 | String convertASCIICase(const CharacterType* input, unsigned length) |
220 | { |
221 | if (!input) |
222 | return { }; |
223 | |
224 | CharacterType* characters; |
225 | auto result = String::createUninitialized(length, characters); |
226 | for (unsigned i = 0; i < length; ++i) |
227 | characters[i] = type == ASCIICase::Lower ? toASCIILower(input[i]) : toASCIIUpper(input[i]); |
228 | return result; |
229 | } |
230 | |
231 | String StringView::convertToASCIILowercase() const |
232 | { |
233 | if (m_is8Bit) |
234 | return convertASCIICase<ASCIICase::Lower>(static_cast<const LChar*>(m_characters), m_length); |
235 | return convertASCIICase<ASCIICase::Lower>(static_cast<const UChar*>(m_characters), m_length); |
236 | } |
237 | |
238 | String StringView::convertToASCIIUppercase() const |
239 | { |
240 | if (m_is8Bit) |
241 | return convertASCIICase<ASCIICase::Upper>(static_cast<const LChar*>(m_characters), m_length); |
242 | return convertASCIICase<ASCIICase::Upper>(static_cast<const UChar*>(m_characters), m_length); |
243 | } |
244 | |
245 | StringViewWithUnderlyingString normalizedNFC(StringView string) |
246 | { |
247 | // Latin-1 characters are unaffected by normalization. |
248 | if (string.is8Bit()) |
249 | return { string, { } }; |
250 | |
251 | UErrorCode status = U_ZERO_ERROR; |
252 | const UNormalizer2* normalizer = unorm2_getNFCInstance(&status); |
253 | ASSERT(U_SUCCESS(status)); |
254 | |
255 | // No need to normalize if already normalized. |
256 | UBool checkResult = unorm2_isNormalized(normalizer, string.characters16(), string.length(), &status); |
257 | if (checkResult) |
258 | return { string, { } }; |
259 | |
260 | unsigned normalizedLength = unorm2_normalize(normalizer, string.characters16(), string.length(), nullptr, 0, &status); |
261 | ASSERT(status == U_BUFFER_OVERFLOW_ERROR); |
262 | |
263 | UChar* characters; |
264 | String result = String::createUninitialized(normalizedLength, characters); |
265 | |
266 | status = U_ZERO_ERROR; |
267 | unorm2_normalize(normalizer, string.characters16(), string.length(), characters, normalizedLength, &status); |
268 | ASSERT(U_SUCCESS(status)); |
269 | |
270 | StringView view { result }; |
271 | return { view, WTFMove(result) }; |
272 | } |
273 | |
274 | String normalizedNFC(const String& string) |
275 | { |
276 | auto result = normalizedNFC(StringView { string }); |
277 | if (result.underlyingString.isNull()) |
278 | return string; |
279 | return result.underlyingString; |
280 | } |
281 | |
282 | #if CHECK_STRINGVIEW_LIFETIME |
283 | |
284 | // Manage reference count manually so UnderlyingString does not need to be defined in the header. |
285 | |
286 | struct StringView::UnderlyingString { |
287 | std::atomic_uint refCount { 1u }; |
288 | bool isValid { true }; |
289 | const StringImpl& string; |
290 | explicit UnderlyingString(const StringImpl&); |
291 | }; |
292 | |
293 | StringView::UnderlyingString::UnderlyingString(const StringImpl& string) |
294 | : string(string) |
295 | { |
296 | } |
297 | |
298 | static Lock underlyingStringsMutex; |
299 | |
300 | static HashMap<const StringImpl*, StringView::UnderlyingString*>& underlyingStrings() |
301 | { |
302 | static NeverDestroyed<HashMap<const StringImpl*, StringView::UnderlyingString*>> map; |
303 | return map; |
304 | } |
305 | |
306 | void StringView::invalidate(const StringImpl& stringToBeDestroyed) |
307 | { |
308 | UnderlyingString* underlyingString; |
309 | { |
310 | std::lock_guard<Lock> lock(underlyingStringsMutex); |
311 | underlyingString = underlyingStrings().take(&stringToBeDestroyed); |
312 | if (!underlyingString) |
313 | return; |
314 | } |
315 | ASSERT(underlyingString->isValid); |
316 | underlyingString->isValid = false; |
317 | } |
318 | |
319 | bool StringView::underlyingStringIsValid() const |
320 | { |
321 | return !m_underlyingString || m_underlyingString->isValid; |
322 | } |
323 | |
324 | void StringView::adoptUnderlyingString(UnderlyingString* underlyingString) |
325 | { |
326 | if (m_underlyingString) { |
327 | std::lock_guard<Lock> lock(underlyingStringsMutex); |
328 | if (!--m_underlyingString->refCount) { |
329 | if (m_underlyingString->isValid) { |
330 | underlyingStrings().remove(&m_underlyingString->string); |
331 | } |
332 | delete m_underlyingString; |
333 | } |
334 | } |
335 | m_underlyingString = underlyingString; |
336 | } |
337 | |
338 | void StringView::setUnderlyingString(const StringImpl* string) |
339 | { |
340 | UnderlyingString* underlyingString; |
341 | if (!string) |
342 | underlyingString = nullptr; |
343 | else { |
344 | std::lock_guard<Lock> lock(underlyingStringsMutex); |
345 | auto result = underlyingStrings().add(string, nullptr); |
346 | if (result.isNewEntry) |
347 | result.iterator->value = new UnderlyingString(*string); |
348 | else |
349 | ++result.iterator->value->refCount; |
350 | underlyingString = result.iterator->value; |
351 | } |
352 | adoptUnderlyingString(underlyingString); |
353 | } |
354 | |
355 | void StringView::setUnderlyingString(const StringView& otherString) |
356 | { |
357 | UnderlyingString* underlyingString = otherString.m_underlyingString; |
358 | if (underlyingString) |
359 | ++underlyingString->refCount; |
360 | adoptUnderlyingString(underlyingString); |
361 | } |
362 | |
363 | #endif // CHECK_STRINGVIEW_LIFETIME |
364 | |
365 | } // namespace WTF |
366 | |