1/*
2
3Copyright (C) 2014-2019 Apple Inc. All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions
7are met:
81. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
102. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13
14THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY
15EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY
18DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
20LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
21ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
23SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24
25*/
26
27#include "config.h"
28#include <wtf/text/StringView.h>
29
30#include <mutex>
31#include <unicode/ubrk.h>
32#include <unicode/unorm2.h>
33#include <wtf/HashMap.h>
34#include <wtf/Lock.h>
35#include <wtf/NeverDestroyed.h>
36#include <wtf/Optional.h>
37#include <wtf/text/TextBreakIterator.h>
38
39namespace WTF {
40
41bool StringView::containsIgnoringASCIICase(const StringView& matchString) const
42{
43 return findIgnoringASCIICase(matchString) != notFound;
44}
45
46bool StringView::containsIgnoringASCIICase(const StringView& matchString, unsigned startOffset) const
47{
48 return findIgnoringASCIICase(matchString, startOffset) != notFound;
49}
50
51size_t StringView::findIgnoringASCIICase(const StringView& matchString) const
52{
53 return ::WTF::findIgnoringASCIICase(*this, matchString, 0);
54}
55
56size_t StringView::findIgnoringASCIICase(const StringView& matchString, unsigned startOffset) const
57{
58 return ::WTF::findIgnoringASCIICase(*this, matchString, startOffset);
59}
60
61bool StringView::startsWith(UChar character) const
62{
63 return m_length && (*this)[0] == character;
64}
65
66bool StringView::startsWith(const StringView& prefix) const
67{
68 return ::WTF::startsWith(*this, prefix);
69}
70
71bool StringView::startsWithIgnoringASCIICase(const StringView& prefix) const
72{
73 return ::WTF::startsWithIgnoringASCIICase(*this, prefix);
74}
75
76bool StringView::endsWith(const StringView& suffix) const
77{
78 return ::WTF::endsWith(*this, suffix);
79}
80
81bool StringView::endsWithIgnoringASCIICase(const StringView& suffix) const
82{
83 return ::WTF::endsWithIgnoringASCIICase(*this, suffix);
84}
85
86Expected<CString, UTF8ConversionError> StringView::tryGetUtf8(ConversionMode mode) const
87{
88 if (isNull())
89 return CString("", 0);
90 if (is8Bit())
91 return StringImpl::utf8ForCharacters(characters8(), length());
92 return StringImpl::utf8ForCharacters(characters16(), length(), mode);
93}
94
95CString StringView::utf8(ConversionMode mode) const
96{
97 auto expectedString = tryGetUtf8(mode);
98 RELEASE_ASSERT(expectedString);
99 return expectedString.value();
100}
101
102size_t StringView::find(StringView matchString, unsigned start) const
103{
104 return findCommon(*this, matchString, start);
105}
106
107void StringView::SplitResult::Iterator::findNextSubstring()
108{
109 for (size_t separatorPosition; (separatorPosition = m_result.m_string.find(m_result.m_separator, m_position)) != notFound; ++m_position) {
110 if (m_result.m_allowEmptyEntries || separatorPosition > m_position) {
111 m_length = separatorPosition - m_position;
112 return;
113 }
114 }
115 m_length = m_result.m_string.length() - m_position;
116 if (!m_length && !m_result.m_allowEmptyEntries)
117 m_isDone = true;
118}
119
120auto StringView::SplitResult::Iterator::operator++() -> Iterator&
121{
122 ASSERT(m_position <= m_result.m_string.length() && !m_isDone);
123 m_position += m_length;
124 if (m_position < m_result.m_string.length()) {
125 ++m_position;
126 findNextSubstring();
127 } else if (!m_isDone)
128 m_isDone = true;
129 return *this;
130}
131
132class StringView::GraphemeClusters::Iterator::Impl {
133 WTF_MAKE_FAST_ALLOCATED;
134public:
135 Impl(const StringView& stringView, Optional<NonSharedCharacterBreakIterator>&& iterator, unsigned index)
136 : m_stringView(stringView)
137 , m_iterator(WTFMove(iterator))
138 , m_index(index)
139 , m_indexEnd(computeIndexEnd())
140 {
141 }
142
143 void operator++()
144 {
145 ASSERT(m_indexEnd > m_index);
146 m_index = m_indexEnd;
147 m_indexEnd = computeIndexEnd();
148 }
149
150 StringView operator*() const
151 {
152 if (m_stringView.is8Bit())
153 return StringView(m_stringView.characters8() + m_index, m_indexEnd - m_index);
154 return StringView(m_stringView.characters16() + m_index, m_indexEnd - m_index);
155 }
156
157 bool operator==(const Impl& other) const
158 {
159 ASSERT(&m_stringView == &other.m_stringView);
160 auto result = m_index == other.m_index;
161 ASSERT(!result || m_indexEnd == other.m_indexEnd);
162 return result;
163 }
164
165 unsigned computeIndexEnd()
166 {
167 if (!m_iterator)
168 return 0;
169 if (m_index == m_stringView.length())
170 return m_index;
171 return ubrk_following(m_iterator.value(), m_index);
172 }
173
174private:
175 const StringView& m_stringView;
176 Optional<NonSharedCharacterBreakIterator> m_iterator;
177 unsigned m_index;
178 unsigned m_indexEnd;
179};
180
181StringView::GraphemeClusters::Iterator::Iterator(const StringView& stringView, unsigned index)
182 : m_impl(makeUnique<Impl>(stringView, stringView.isNull() ? WTF::nullopt : Optional<NonSharedCharacterBreakIterator>(NonSharedCharacterBreakIterator(stringView)), index))
183{
184}
185
186StringView::GraphemeClusters::Iterator::~Iterator()
187{
188}
189
190StringView::GraphemeClusters::Iterator::Iterator(Iterator&& other)
191 : m_impl(WTFMove(other.m_impl))
192{
193}
194
195auto StringView::GraphemeClusters::Iterator::operator++() -> Iterator&
196{
197 ++(*m_impl);
198 return *this;
199}
200
201StringView StringView::GraphemeClusters::Iterator::operator*() const
202{
203 return **m_impl;
204}
205
206bool StringView::GraphemeClusters::Iterator::operator==(const Iterator& other) const
207{
208 return *m_impl == *(other.m_impl);
209}
210
211bool StringView::GraphemeClusters::Iterator::operator!=(const Iterator& other) const
212{
213 return !(*this == other);
214}
215
216enum class ASCIICase { Lower, Upper };
217
218template<ASCIICase type, typename CharacterType>
219String convertASCIICase(const CharacterType* input, unsigned length)
220{
221 if (!input)
222 return { };
223
224 CharacterType* characters;
225 auto result = String::createUninitialized(length, characters);
226 for (unsigned i = 0; i < length; ++i)
227 characters[i] = type == ASCIICase::Lower ? toASCIILower(input[i]) : toASCIIUpper(input[i]);
228 return result;
229}
230
231String StringView::convertToASCIILowercase() const
232{
233 if (m_is8Bit)
234 return convertASCIICase<ASCIICase::Lower>(static_cast<const LChar*>(m_characters), m_length);
235 return convertASCIICase<ASCIICase::Lower>(static_cast<const UChar*>(m_characters), m_length);
236}
237
238String StringView::convertToASCIIUppercase() const
239{
240 if (m_is8Bit)
241 return convertASCIICase<ASCIICase::Upper>(static_cast<const LChar*>(m_characters), m_length);
242 return convertASCIICase<ASCIICase::Upper>(static_cast<const UChar*>(m_characters), m_length);
243}
244
245StringViewWithUnderlyingString normalizedNFC(StringView string)
246{
247 // Latin-1 characters are unaffected by normalization.
248 if (string.is8Bit())
249 return { string, { } };
250
251 UErrorCode status = U_ZERO_ERROR;
252 const UNormalizer2* normalizer = unorm2_getNFCInstance(&status);
253 ASSERT(U_SUCCESS(status));
254
255 // No need to normalize if already normalized.
256 UBool checkResult = unorm2_isNormalized(normalizer, string.characters16(), string.length(), &status);
257 if (checkResult)
258 return { string, { } };
259
260 unsigned normalizedLength = unorm2_normalize(normalizer, string.characters16(), string.length(), nullptr, 0, &status);
261 ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
262
263 UChar* characters;
264 String result = String::createUninitialized(normalizedLength, characters);
265
266 status = U_ZERO_ERROR;
267 unorm2_normalize(normalizer, string.characters16(), string.length(), characters, normalizedLength, &status);
268 ASSERT(U_SUCCESS(status));
269
270 StringView view { result };
271 return { view, WTFMove(result) };
272}
273
274String normalizedNFC(const String& string)
275{
276 auto result = normalizedNFC(StringView { string });
277 if (result.underlyingString.isNull())
278 return string;
279 return result.underlyingString;
280}
281
282#if CHECK_STRINGVIEW_LIFETIME
283
284// Manage reference count manually so UnderlyingString does not need to be defined in the header.
285
286struct StringView::UnderlyingString {
287 WTF_MAKE_STRUCT_FAST_ALLOCATED;
288 std::atomic_uint refCount { 1u };
289 bool isValid { true };
290 const StringImpl& string;
291 explicit UnderlyingString(const StringImpl&);
292};
293
294StringView::UnderlyingString::UnderlyingString(const StringImpl& string)
295 : string(string)
296{
297}
298
299static Lock underlyingStringsMutex;
300
301static HashMap<const StringImpl*, StringView::UnderlyingString*>& underlyingStrings()
302{
303 static NeverDestroyed<HashMap<const StringImpl*, StringView::UnderlyingString*>> map;
304 return map;
305}
306
307void StringView::invalidate(const StringImpl& stringToBeDestroyed)
308{
309 UnderlyingString* underlyingString;
310 {
311 std::lock_guard<Lock> lock(underlyingStringsMutex);
312 underlyingString = underlyingStrings().take(&stringToBeDestroyed);
313 if (!underlyingString)
314 return;
315 }
316 ASSERT(underlyingString->isValid);
317 underlyingString->isValid = false;
318}
319
320bool StringView::underlyingStringIsValid() const
321{
322 return !m_underlyingString || m_underlyingString->isValid;
323}
324
325void StringView::adoptUnderlyingString(UnderlyingString* underlyingString)
326{
327 if (m_underlyingString) {
328 std::lock_guard<Lock> lock(underlyingStringsMutex);
329 if (!--m_underlyingString->refCount) {
330 if (m_underlyingString->isValid) {
331 underlyingStrings().remove(&m_underlyingString->string);
332 }
333 delete m_underlyingString;
334 }
335 }
336 m_underlyingString = underlyingString;
337}
338
339void StringView::setUnderlyingString(const StringImpl* string)
340{
341 UnderlyingString* underlyingString;
342 if (!string)
343 underlyingString = nullptr;
344 else {
345 std::lock_guard<Lock> lock(underlyingStringsMutex);
346 auto result = underlyingStrings().add(string, nullptr);
347 if (result.isNewEntry)
348 result.iterator->value = new UnderlyingString(*string);
349 else
350 ++result.iterator->value->refCount;
351 underlyingString = result.iterator->value;
352 }
353 adoptUnderlyingString(underlyingString);
354}
355
356void StringView::setUnderlyingString(const StringView& otherString)
357{
358 UnderlyingString* underlyingString = otherString.m_underlyingString;
359 if (underlyingString)
360 ++underlyingString->refCount;
361 adoptUnderlyingString(underlyingString);
362}
363
364#endif // CHECK_STRINGVIEW_LIFETIME
365
366} // namespace WTF
367