1 | /* |
2 | |
3 | Copyright (C) 2014-2019 Apple Inc. All rights reserved. |
4 | |
5 | Redistribution and use in source and binary forms, with or without |
6 | modification, are permitted provided that the following conditions |
7 | are met: |
8 | 1. Redistributions of source code must retain the above copyright |
9 | notice, this list of conditions and the following disclaimer. |
10 | 2. Redistributions in binary form must reproduce the above copyright |
11 | notice, this list of conditions and the following disclaimer in the |
12 | documentation and/or other materials provided with the distribution. |
13 | |
14 | THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY |
15 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
16 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
17 | DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY |
18 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
19 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
20 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON |
21 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
22 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
23 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
24 | |
25 | */ |
26 | |
27 | #include "config.h" |
28 | #include <wtf/text/StringView.h> |
29 | |
30 | #include <mutex> |
31 | #include <unicode/ubrk.h> |
32 | #include <unicode/unorm2.h> |
33 | #include <wtf/HashMap.h> |
34 | #include <wtf/Lock.h> |
35 | #include <wtf/NeverDestroyed.h> |
36 | #include <wtf/Optional.h> |
37 | #include <wtf/text/TextBreakIterator.h> |
38 | |
39 | namespace WTF { |
40 | |
41 | bool StringView::containsIgnoringASCIICase(const StringView& matchString) const |
42 | { |
43 | return findIgnoringASCIICase(matchString) != notFound; |
44 | } |
45 | |
46 | bool StringView::containsIgnoringASCIICase(const StringView& matchString, unsigned startOffset) const |
47 | { |
48 | return findIgnoringASCIICase(matchString, startOffset) != notFound; |
49 | } |
50 | |
51 | size_t StringView::findIgnoringASCIICase(const StringView& matchString) const |
52 | { |
53 | return ::WTF::findIgnoringASCIICase(*this, matchString, 0); |
54 | } |
55 | |
56 | size_t StringView::findIgnoringASCIICase(const StringView& matchString, unsigned startOffset) const |
57 | { |
58 | return ::WTF::findIgnoringASCIICase(*this, matchString, startOffset); |
59 | } |
60 | |
61 | bool StringView::startsWith(const StringView& prefix) const |
62 | { |
63 | return ::WTF::startsWith(*this, prefix); |
64 | } |
65 | |
66 | bool StringView::startsWithIgnoringASCIICase(const StringView& prefix) const |
67 | { |
68 | return ::WTF::startsWithIgnoringASCIICase(*this, prefix); |
69 | } |
70 | |
71 | bool StringView::endsWith(const StringView& suffix) const |
72 | { |
73 | return ::WTF::endsWith(*this, suffix); |
74 | } |
75 | |
76 | bool StringView::endsWithIgnoringASCIICase(const StringView& suffix) const |
77 | { |
78 | return ::WTF::endsWithIgnoringASCIICase(*this, suffix); |
79 | } |
80 | |
81 | Expected<CString, UTF8ConversionError> StringView::tryGetUtf8(ConversionMode mode) const |
82 | { |
83 | if (isNull()) |
84 | return CString("" , 0); |
85 | if (is8Bit()) |
86 | return StringImpl::utf8ForCharacters(characters8(), length()); |
87 | return StringImpl::utf8ForCharacters(characters16(), length(), mode); |
88 | } |
89 | |
90 | CString StringView::utf8(ConversionMode mode) const |
91 | { |
92 | auto expectedString = tryGetUtf8(mode); |
93 | RELEASE_ASSERT(expectedString); |
94 | return expectedString.value(); |
95 | } |
96 | |
97 | size_t StringView::find(StringView matchString, unsigned start) const |
98 | { |
99 | return findCommon(*this, matchString, start); |
100 | } |
101 | |
102 | void StringView::SplitResult::Iterator::findNextSubstring() |
103 | { |
104 | for (size_t separatorPosition; (separatorPosition = m_result.m_string.find(m_result.m_separator, m_position)) != notFound; ++m_position) { |
105 | if (m_result.m_allowEmptyEntries || separatorPosition > m_position) { |
106 | m_length = separatorPosition - m_position; |
107 | return; |
108 | } |
109 | } |
110 | m_length = m_result.m_string.length() - m_position; |
111 | if (!m_length && !m_result.m_allowEmptyEntries) |
112 | m_isDone = true; |
113 | } |
114 | |
115 | auto StringView::SplitResult::Iterator::operator++() -> Iterator& |
116 | { |
117 | ASSERT(m_position <= m_result.m_string.length() && !m_isDone); |
118 | m_position += m_length; |
119 | if (m_position < m_result.m_string.length()) { |
120 | ++m_position; |
121 | findNextSubstring(); |
122 | } else if (!m_isDone) |
123 | m_isDone = true; |
124 | return *this; |
125 | } |
126 | |
127 | class StringView::GraphemeClusters::Iterator::Impl { |
128 | WTF_MAKE_FAST_ALLOCATED; |
129 | public: |
130 | Impl(const StringView& stringView, Optional<NonSharedCharacterBreakIterator>&& iterator, unsigned index) |
131 | : m_stringView(stringView) |
132 | , m_iterator(WTFMove(iterator)) |
133 | , m_index(index) |
134 | , m_indexEnd(computeIndexEnd()) |
135 | { |
136 | } |
137 | |
138 | void operator++() |
139 | { |
140 | ASSERT(m_indexEnd > m_index); |
141 | m_index = m_indexEnd; |
142 | m_indexEnd = computeIndexEnd(); |
143 | } |
144 | |
145 | StringView operator*() const |
146 | { |
147 | if (m_stringView.is8Bit()) |
148 | return StringView(m_stringView.characters8() + m_index, m_indexEnd - m_index); |
149 | return StringView(m_stringView.characters16() + m_index, m_indexEnd - m_index); |
150 | } |
151 | |
152 | bool operator==(const Impl& other) const |
153 | { |
154 | ASSERT(&m_stringView == &other.m_stringView); |
155 | auto result = m_index == other.m_index; |
156 | ASSERT(!result || m_indexEnd == other.m_indexEnd); |
157 | return result; |
158 | } |
159 | |
160 | unsigned computeIndexEnd() |
161 | { |
162 | if (!m_iterator) |
163 | return 0; |
164 | if (m_index == m_stringView.length()) |
165 | return m_index; |
166 | return ubrk_following(m_iterator.value(), m_index); |
167 | } |
168 | |
169 | private: |
170 | const StringView& m_stringView; |
171 | Optional<NonSharedCharacterBreakIterator> m_iterator; |
172 | unsigned m_index; |
173 | unsigned m_indexEnd; |
174 | }; |
175 | |
176 | StringView::GraphemeClusters::Iterator::Iterator(const StringView& stringView, unsigned index) |
177 | : m_impl(std::make_unique<Impl>(stringView, stringView.isNull() ? WTF::nullopt : Optional<NonSharedCharacterBreakIterator>(NonSharedCharacterBreakIterator(stringView)), index)) |
178 | { |
179 | } |
180 | |
181 | StringView::GraphemeClusters::Iterator::~Iterator() |
182 | { |
183 | } |
184 | |
185 | StringView::GraphemeClusters::Iterator::Iterator(Iterator&& other) |
186 | : m_impl(WTFMove(other.m_impl)) |
187 | { |
188 | } |
189 | |
190 | auto StringView::GraphemeClusters::Iterator::operator++() -> Iterator& |
191 | { |
192 | ++(*m_impl); |
193 | return *this; |
194 | } |
195 | |
196 | StringView StringView::GraphemeClusters::Iterator::operator*() const |
197 | { |
198 | return **m_impl; |
199 | } |
200 | |
201 | bool StringView::GraphemeClusters::Iterator::operator==(const Iterator& other) const |
202 | { |
203 | return *m_impl == *(other.m_impl); |
204 | } |
205 | |
206 | bool StringView::GraphemeClusters::Iterator::operator!=(const Iterator& other) const |
207 | { |
208 | return !(*this == other); |
209 | } |
210 | |
211 | enum class ASCIICase { Lower, Upper }; |
212 | |
213 | template<ASCIICase type, typename CharacterType> |
214 | String convertASCIICase(const CharacterType* input, unsigned length) |
215 | { |
216 | if (!input) |
217 | return { }; |
218 | |
219 | CharacterType* characters; |
220 | auto result = String::createUninitialized(length, characters); |
221 | for (unsigned i = 0; i < length; ++i) |
222 | characters[i] = type == ASCIICase::Lower ? toASCIILower(input[i]) : toASCIIUpper(input[i]); |
223 | return result; |
224 | } |
225 | |
226 | String StringView::convertToASCIILowercase() const |
227 | { |
228 | if (m_is8Bit) |
229 | return convertASCIICase<ASCIICase::Lower>(static_cast<const LChar*>(m_characters), m_length); |
230 | return convertASCIICase<ASCIICase::Lower>(static_cast<const UChar*>(m_characters), m_length); |
231 | } |
232 | |
233 | String StringView::convertToASCIIUppercase() const |
234 | { |
235 | if (m_is8Bit) |
236 | return convertASCIICase<ASCIICase::Upper>(static_cast<const LChar*>(m_characters), m_length); |
237 | return convertASCIICase<ASCIICase::Upper>(static_cast<const UChar*>(m_characters), m_length); |
238 | } |
239 | |
240 | StringViewWithUnderlyingString normalizedNFC(StringView string) |
241 | { |
242 | // Latin-1 characters are unaffected by normalization. |
243 | if (string.is8Bit()) |
244 | return { string, { } }; |
245 | |
246 | UErrorCode status = U_ZERO_ERROR; |
247 | const UNormalizer2* normalizer = unorm2_getNFCInstance(&status); |
248 | ASSERT(U_SUCCESS(status)); |
249 | |
250 | // No need to normalize if already normalized. |
251 | UBool checkResult = unorm2_isNormalized(normalizer, string.characters16(), string.length(), &status); |
252 | if (checkResult) |
253 | return { string, { } }; |
254 | |
255 | unsigned normalizedLength = unorm2_normalize(normalizer, string.characters16(), string.length(), nullptr, 0, &status); |
256 | ASSERT(status == U_BUFFER_OVERFLOW_ERROR); |
257 | |
258 | UChar* characters; |
259 | String result = String::createUninitialized(normalizedLength, characters); |
260 | |
261 | status = U_ZERO_ERROR; |
262 | unorm2_normalize(normalizer, string.characters16(), string.length(), characters, normalizedLength, &status); |
263 | ASSERT(U_SUCCESS(status)); |
264 | |
265 | StringView view { result }; |
266 | return { view, WTFMove(result) }; |
267 | } |
268 | |
269 | String normalizedNFC(const String& string) |
270 | { |
271 | auto result = normalizedNFC(StringView { string }); |
272 | if (result.underlyingString.isNull()) |
273 | return string; |
274 | return result.underlyingString; |
275 | } |
276 | |
277 | #if CHECK_STRINGVIEW_LIFETIME |
278 | |
279 | // Manage reference count manually so UnderlyingString does not need to be defined in the header. |
280 | |
281 | struct StringView::UnderlyingString { |
282 | std::atomic_uint refCount { 1u }; |
283 | bool isValid { true }; |
284 | const StringImpl& string; |
285 | explicit UnderlyingString(const StringImpl&); |
286 | }; |
287 | |
288 | StringView::UnderlyingString::UnderlyingString(const StringImpl& string) |
289 | : string(string) |
290 | { |
291 | } |
292 | |
293 | static Lock underlyingStringsMutex; |
294 | |
295 | static HashMap<const StringImpl*, StringView::UnderlyingString*>& underlyingStrings() |
296 | { |
297 | static NeverDestroyed<HashMap<const StringImpl*, StringView::UnderlyingString*>> map; |
298 | return map; |
299 | } |
300 | |
301 | void StringView::invalidate(const StringImpl& stringToBeDestroyed) |
302 | { |
303 | UnderlyingString* underlyingString; |
304 | { |
305 | std::lock_guard<Lock> lock(underlyingStringsMutex); |
306 | underlyingString = underlyingStrings().take(&stringToBeDestroyed); |
307 | if (!underlyingString) |
308 | return; |
309 | } |
310 | ASSERT(underlyingString->isValid); |
311 | underlyingString->isValid = false; |
312 | } |
313 | |
314 | bool StringView::underlyingStringIsValid() const |
315 | { |
316 | return !m_underlyingString || m_underlyingString->isValid; |
317 | } |
318 | |
319 | void StringView::adoptUnderlyingString(UnderlyingString* underlyingString) |
320 | { |
321 | if (m_underlyingString) { |
322 | std::lock_guard<Lock> lock(underlyingStringsMutex); |
323 | if (!--m_underlyingString->refCount) { |
324 | if (m_underlyingString->isValid) { |
325 | underlyingStrings().remove(&m_underlyingString->string); |
326 | } |
327 | delete m_underlyingString; |
328 | } |
329 | } |
330 | m_underlyingString = underlyingString; |
331 | } |
332 | |
333 | void StringView::setUnderlyingString(const StringImpl* string) |
334 | { |
335 | UnderlyingString* underlyingString; |
336 | if (!string) |
337 | underlyingString = nullptr; |
338 | else { |
339 | std::lock_guard<Lock> lock(underlyingStringsMutex); |
340 | auto result = underlyingStrings().add(string, nullptr); |
341 | if (result.isNewEntry) |
342 | result.iterator->value = new UnderlyingString(*string); |
343 | else |
344 | ++result.iterator->value->refCount; |
345 | underlyingString = result.iterator->value; |
346 | } |
347 | adoptUnderlyingString(underlyingString); |
348 | } |
349 | |
350 | void StringView::setUnderlyingString(const StringView& otherString) |
351 | { |
352 | UnderlyingString* underlyingString = otherString.m_underlyingString; |
353 | if (underlyingString) |
354 | ++underlyingString->refCount; |
355 | adoptUnderlyingString(underlyingString); |
356 | } |
357 | |
358 | #endif // CHECK_STRINGVIEW_LIFETIME |
359 | |
360 | } // namespace WTF |
361 | |