1/*
2
3Copyright (C) 2014-2019 Apple Inc. All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions
7are met:
81. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
102. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13
14THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY
15EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY
18DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
20LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
21ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
23SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24
25*/
26
27#include "config.h"
28#include <wtf/text/StringView.h>
29
30#include <mutex>
31#include <unicode/ubrk.h>
32#include <unicode/unorm2.h>
33#include <wtf/HashMap.h>
34#include <wtf/Lock.h>
35#include <wtf/NeverDestroyed.h>
36#include <wtf/Optional.h>
37#include <wtf/text/TextBreakIterator.h>
38
39namespace WTF {
40
41bool StringView::containsIgnoringASCIICase(const StringView& matchString) const
42{
43 return findIgnoringASCIICase(matchString) != notFound;
44}
45
46bool StringView::containsIgnoringASCIICase(const StringView& matchString, unsigned startOffset) const
47{
48 return findIgnoringASCIICase(matchString, startOffset) != notFound;
49}
50
51size_t StringView::findIgnoringASCIICase(const StringView& matchString) const
52{
53 return ::WTF::findIgnoringASCIICase(*this, matchString, 0);
54}
55
56size_t StringView::findIgnoringASCIICase(const StringView& matchString, unsigned startOffset) const
57{
58 return ::WTF::findIgnoringASCIICase(*this, matchString, startOffset);
59}
60
61bool StringView::startsWith(const StringView& prefix) const
62{
63 return ::WTF::startsWith(*this, prefix);
64}
65
66bool StringView::startsWithIgnoringASCIICase(const StringView& prefix) const
67{
68 return ::WTF::startsWithIgnoringASCIICase(*this, prefix);
69}
70
71bool StringView::endsWith(const StringView& suffix) const
72{
73 return ::WTF::endsWith(*this, suffix);
74}
75
76bool StringView::endsWithIgnoringASCIICase(const StringView& suffix) const
77{
78 return ::WTF::endsWithIgnoringASCIICase(*this, suffix);
79}
80
81Expected<CString, UTF8ConversionError> StringView::tryGetUtf8(ConversionMode mode) const
82{
83 if (isNull())
84 return CString("", 0);
85 if (is8Bit())
86 return StringImpl::utf8ForCharacters(characters8(), length());
87 return StringImpl::utf8ForCharacters(characters16(), length(), mode);
88}
89
90CString StringView::utf8(ConversionMode mode) const
91{
92 auto expectedString = tryGetUtf8(mode);
93 RELEASE_ASSERT(expectedString);
94 return expectedString.value();
95}
96
97size_t StringView::find(StringView matchString, unsigned start) const
98{
99 return findCommon(*this, matchString, start);
100}
101
102void StringView::SplitResult::Iterator::findNextSubstring()
103{
104 for (size_t separatorPosition; (separatorPosition = m_result.m_string.find(m_result.m_separator, m_position)) != notFound; ++m_position) {
105 if (m_result.m_allowEmptyEntries || separatorPosition > m_position) {
106 m_length = separatorPosition - m_position;
107 return;
108 }
109 }
110 m_length = m_result.m_string.length() - m_position;
111 if (!m_length && !m_result.m_allowEmptyEntries)
112 m_isDone = true;
113}
114
115auto StringView::SplitResult::Iterator::operator++() -> Iterator&
116{
117 ASSERT(m_position <= m_result.m_string.length() && !m_isDone);
118 m_position += m_length;
119 if (m_position < m_result.m_string.length()) {
120 ++m_position;
121 findNextSubstring();
122 } else if (!m_isDone)
123 m_isDone = true;
124 return *this;
125}
126
127class StringView::GraphemeClusters::Iterator::Impl {
128 WTF_MAKE_FAST_ALLOCATED;
129public:
130 Impl(const StringView& stringView, Optional<NonSharedCharacterBreakIterator>&& iterator, unsigned index)
131 : m_stringView(stringView)
132 , m_iterator(WTFMove(iterator))
133 , m_index(index)
134 , m_indexEnd(computeIndexEnd())
135 {
136 }
137
138 void operator++()
139 {
140 ASSERT(m_indexEnd > m_index);
141 m_index = m_indexEnd;
142 m_indexEnd = computeIndexEnd();
143 }
144
145 StringView operator*() const
146 {
147 if (m_stringView.is8Bit())
148 return StringView(m_stringView.characters8() + m_index, m_indexEnd - m_index);
149 return StringView(m_stringView.characters16() + m_index, m_indexEnd - m_index);
150 }
151
152 bool operator==(const Impl& other) const
153 {
154 ASSERT(&m_stringView == &other.m_stringView);
155 auto result = m_index == other.m_index;
156 ASSERT(!result || m_indexEnd == other.m_indexEnd);
157 return result;
158 }
159
160 unsigned computeIndexEnd()
161 {
162 if (!m_iterator)
163 return 0;
164 if (m_index == m_stringView.length())
165 return m_index;
166 return ubrk_following(m_iterator.value(), m_index);
167 }
168
169private:
170 const StringView& m_stringView;
171 Optional<NonSharedCharacterBreakIterator> m_iterator;
172 unsigned m_index;
173 unsigned m_indexEnd;
174};
175
176StringView::GraphemeClusters::Iterator::Iterator(const StringView& stringView, unsigned index)
177 : m_impl(std::make_unique<Impl>(stringView, stringView.isNull() ? WTF::nullopt : Optional<NonSharedCharacterBreakIterator>(NonSharedCharacterBreakIterator(stringView)), index))
178{
179}
180
181StringView::GraphemeClusters::Iterator::~Iterator()
182{
183}
184
185StringView::GraphemeClusters::Iterator::Iterator(Iterator&& other)
186 : m_impl(WTFMove(other.m_impl))
187{
188}
189
190auto StringView::GraphemeClusters::Iterator::operator++() -> Iterator&
191{
192 ++(*m_impl);
193 return *this;
194}
195
196StringView StringView::GraphemeClusters::Iterator::operator*() const
197{
198 return **m_impl;
199}
200
201bool StringView::GraphemeClusters::Iterator::operator==(const Iterator& other) const
202{
203 return *m_impl == *(other.m_impl);
204}
205
206bool StringView::GraphemeClusters::Iterator::operator!=(const Iterator& other) const
207{
208 return !(*this == other);
209}
210
211enum class ASCIICase { Lower, Upper };
212
213template<ASCIICase type, typename CharacterType>
214String convertASCIICase(const CharacterType* input, unsigned length)
215{
216 if (!input)
217 return { };
218
219 CharacterType* characters;
220 auto result = String::createUninitialized(length, characters);
221 for (unsigned i = 0; i < length; ++i)
222 characters[i] = type == ASCIICase::Lower ? toASCIILower(input[i]) : toASCIIUpper(input[i]);
223 return result;
224}
225
226String StringView::convertToASCIILowercase() const
227{
228 if (m_is8Bit)
229 return convertASCIICase<ASCIICase::Lower>(static_cast<const LChar*>(m_characters), m_length);
230 return convertASCIICase<ASCIICase::Lower>(static_cast<const UChar*>(m_characters), m_length);
231}
232
233String StringView::convertToASCIIUppercase() const
234{
235 if (m_is8Bit)
236 return convertASCIICase<ASCIICase::Upper>(static_cast<const LChar*>(m_characters), m_length);
237 return convertASCIICase<ASCIICase::Upper>(static_cast<const UChar*>(m_characters), m_length);
238}
239
240StringViewWithUnderlyingString normalizedNFC(StringView string)
241{
242 // Latin-1 characters are unaffected by normalization.
243 if (string.is8Bit())
244 return { string, { } };
245
246 UErrorCode status = U_ZERO_ERROR;
247 const UNormalizer2* normalizer = unorm2_getNFCInstance(&status);
248 ASSERT(U_SUCCESS(status));
249
250 // No need to normalize if already normalized.
251 UBool checkResult = unorm2_isNormalized(normalizer, string.characters16(), string.length(), &status);
252 if (checkResult)
253 return { string, { } };
254
255 unsigned normalizedLength = unorm2_normalize(normalizer, string.characters16(), string.length(), nullptr, 0, &status);
256 ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
257
258 UChar* characters;
259 String result = String::createUninitialized(normalizedLength, characters);
260
261 status = U_ZERO_ERROR;
262 unorm2_normalize(normalizer, string.characters16(), string.length(), characters, normalizedLength, &status);
263 ASSERT(U_SUCCESS(status));
264
265 StringView view { result };
266 return { view, WTFMove(result) };
267}
268
269String normalizedNFC(const String& string)
270{
271 auto result = normalizedNFC(StringView { string });
272 if (result.underlyingString.isNull())
273 return string;
274 return result.underlyingString;
275}
276
277#if CHECK_STRINGVIEW_LIFETIME
278
279// Manage reference count manually so UnderlyingString does not need to be defined in the header.
280
281struct StringView::UnderlyingString {
282 std::atomic_uint refCount { 1u };
283 bool isValid { true };
284 const StringImpl& string;
285 explicit UnderlyingString(const StringImpl&);
286};
287
288StringView::UnderlyingString::UnderlyingString(const StringImpl& string)
289 : string(string)
290{
291}
292
293static Lock underlyingStringsMutex;
294
295static HashMap<const StringImpl*, StringView::UnderlyingString*>& underlyingStrings()
296{
297 static NeverDestroyed<HashMap<const StringImpl*, StringView::UnderlyingString*>> map;
298 return map;
299}
300
301void StringView::invalidate(const StringImpl& stringToBeDestroyed)
302{
303 UnderlyingString* underlyingString;
304 {
305 std::lock_guard<Lock> lock(underlyingStringsMutex);
306 underlyingString = underlyingStrings().take(&stringToBeDestroyed);
307 if (!underlyingString)
308 return;
309 }
310 ASSERT(underlyingString->isValid);
311 underlyingString->isValid = false;
312}
313
314bool StringView::underlyingStringIsValid() const
315{
316 return !m_underlyingString || m_underlyingString->isValid;
317}
318
319void StringView::adoptUnderlyingString(UnderlyingString* underlyingString)
320{
321 if (m_underlyingString) {
322 std::lock_guard<Lock> lock(underlyingStringsMutex);
323 if (!--m_underlyingString->refCount) {
324 if (m_underlyingString->isValid) {
325 underlyingStrings().remove(&m_underlyingString->string);
326 }
327 delete m_underlyingString;
328 }
329 }
330 m_underlyingString = underlyingString;
331}
332
333void StringView::setUnderlyingString(const StringImpl* string)
334{
335 UnderlyingString* underlyingString;
336 if (!string)
337 underlyingString = nullptr;
338 else {
339 std::lock_guard<Lock> lock(underlyingStringsMutex);
340 auto result = underlyingStrings().add(string, nullptr);
341 if (result.isNewEntry)
342 result.iterator->value = new UnderlyingString(*string);
343 else
344 ++result.iterator->value->refCount;
345 underlyingString = result.iterator->value;
346 }
347 adoptUnderlyingString(underlyingString);
348}
349
350void StringView::setUnderlyingString(const StringView& otherString)
351{
352 UnderlyingString* underlyingString = otherString.m_underlyingString;
353 if (underlyingString)
354 ++underlyingString->refCount;
355 adoptUnderlyingString(underlyingString);
356}
357
358#endif // CHECK_STRINGVIEW_LIFETIME
359
360} // namespace WTF
361