1/*
2
3Copyright (C) 2014-2019 Apple Inc. All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions
7are met:
81. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
102. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13
14THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY
15EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY
18DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
20LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
21ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
23SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24
25*/
26
27#include "config.h"
28#include <wtf/text/StringView.h>
29
30#include <mutex>
31#include <unicode/ubrk.h>
32#include <unicode/unorm2.h>
33#include <wtf/HashMap.h>
34#include <wtf/Lock.h>
35#include <wtf/NeverDestroyed.h>
36#include <wtf/Optional.h>
37#include <wtf/text/TextBreakIterator.h>
38
39namespace WTF {
40
41bool StringView::containsIgnoringASCIICase(const StringView& matchString) const
42{
43 return findIgnoringASCIICase(matchString) != notFound;
44}
45
46bool StringView::containsIgnoringASCIICase(const StringView& matchString, unsigned startOffset) const
47{
48 return findIgnoringASCIICase(matchString, startOffset) != notFound;
49}
50
51size_t StringView::findIgnoringASCIICase(const StringView& matchString) const
52{
53 return ::WTF::findIgnoringASCIICase(*this, matchString, 0);
54}
55
56size_t StringView::findIgnoringASCIICase(const StringView& matchString, unsigned startOffset) const
57{
58 return ::WTF::findIgnoringASCIICase(*this, matchString, startOffset);
59}
60
61bool StringView::startsWith(UChar character) const
62{
63 return m_length && (*this)[0] == character;
64}
65
66bool StringView::startsWith(const StringView& prefix) const
67{
68 return ::WTF::startsWith(*this, prefix);
69}
70
71bool StringView::startsWithIgnoringASCIICase(const StringView& prefix) const
72{
73 return ::WTF::startsWithIgnoringASCIICase(*this, prefix);
74}
75
76bool StringView::endsWith(const StringView& suffix) const
77{
78 return ::WTF::endsWith(*this, suffix);
79}
80
81bool StringView::endsWithIgnoringASCIICase(const StringView& suffix) const
82{
83 return ::WTF::endsWithIgnoringASCIICase(*this, suffix);
84}
85
86Expected<CString, UTF8ConversionError> StringView::tryGetUtf8(ConversionMode mode) const
87{
88 if (isNull())
89 return CString("", 0);
90 if (is8Bit())
91 return StringImpl::utf8ForCharacters(characters8(), length());
92 return StringImpl::utf8ForCharacters(characters16(), length(), mode);
93}
94
95CString StringView::utf8(ConversionMode mode) const
96{
97 auto expectedString = tryGetUtf8(mode);
98 RELEASE_ASSERT(expectedString);
99 return expectedString.value();
100}
101
102size_t StringView::find(StringView matchString, unsigned start) const
103{
104 return findCommon(*this, matchString, start);
105}
106
107void StringView::SplitResult::Iterator::findNextSubstring()
108{
109 for (size_t separatorPosition; (separatorPosition = m_result.m_string.find(m_result.m_separator, m_position)) != notFound; ++m_position) {
110 if (m_result.m_allowEmptyEntries || separatorPosition > m_position) {
111 m_length = separatorPosition - m_position;
112 return;
113 }
114 }
115 m_length = m_result.m_string.length() - m_position;
116 if (!m_length && !m_result.m_allowEmptyEntries)
117 m_isDone = true;
118}
119
120auto StringView::SplitResult::Iterator::operator++() -> Iterator&
121{
122 ASSERT(m_position <= m_result.m_string.length() && !m_isDone);
123 m_position += m_length;
124 if (m_position < m_result.m_string.length()) {
125 ++m_position;
126 findNextSubstring();
127 } else if (!m_isDone)
128 m_isDone = true;
129 return *this;
130}
131
132class StringView::GraphemeClusters::Iterator::Impl {
133 WTF_MAKE_FAST_ALLOCATED;
134public:
135 Impl(const StringView& stringView, Optional<NonSharedCharacterBreakIterator>&& iterator, unsigned index)
136 : m_stringView(stringView)
137 , m_iterator(WTFMove(iterator))
138 , m_index(index)
139 , m_indexEnd(computeIndexEnd())
140 {
141 }
142
143 void operator++()
144 {
145 ASSERT(m_indexEnd > m_index);
146 m_index = m_indexEnd;
147 m_indexEnd = computeIndexEnd();
148 }
149
150 StringView operator*() const
151 {
152 if (m_stringView.is8Bit())
153 return StringView(m_stringView.characters8() + m_index, m_indexEnd - m_index);
154 return StringView(m_stringView.characters16() + m_index, m_indexEnd - m_index);
155 }
156
157 bool operator==(const Impl& other) const
158 {
159 ASSERT(&m_stringView == &other.m_stringView);
160 auto result = m_index == other.m_index;
161 ASSERT(!result || m_indexEnd == other.m_indexEnd);
162 return result;
163 }
164
165 unsigned computeIndexEnd()
166 {
167 if (!m_iterator)
168 return 0;
169 if (m_index == m_stringView.length())
170 return m_index;
171 return ubrk_following(m_iterator.value(), m_index);
172 }
173
174private:
175 const StringView& m_stringView;
176 Optional<NonSharedCharacterBreakIterator> m_iterator;
177 unsigned m_index;
178 unsigned m_indexEnd;
179};
180
181StringView::GraphemeClusters::Iterator::Iterator(const StringView& stringView, unsigned index)
182 : m_impl(std::make_unique<Impl>(stringView, stringView.isNull() ? WTF::nullopt : Optional<NonSharedCharacterBreakIterator>(NonSharedCharacterBreakIterator(stringView)), index))
183{
184}
185
186StringView::GraphemeClusters::Iterator::~Iterator()
187{
188}
189
190StringView::GraphemeClusters::Iterator::Iterator(Iterator&& other)
191 : m_impl(WTFMove(other.m_impl))
192{
193}
194
195auto StringView::GraphemeClusters::Iterator::operator++() -> Iterator&
196{
197 ++(*m_impl);
198 return *this;
199}
200
201StringView StringView::GraphemeClusters::Iterator::operator*() const
202{
203 return **m_impl;
204}
205
206bool StringView::GraphemeClusters::Iterator::operator==(const Iterator& other) const
207{
208 return *m_impl == *(other.m_impl);
209}
210
211bool StringView::GraphemeClusters::Iterator::operator!=(const Iterator& other) const
212{
213 return !(*this == other);
214}
215
216enum class ASCIICase { Lower, Upper };
217
218template<ASCIICase type, typename CharacterType>
219String convertASCIICase(const CharacterType* input, unsigned length)
220{
221 if (!input)
222 return { };
223
224 CharacterType* characters;
225 auto result = String::createUninitialized(length, characters);
226 for (unsigned i = 0; i < length; ++i)
227 characters[i] = type == ASCIICase::Lower ? toASCIILower(input[i]) : toASCIIUpper(input[i]);
228 return result;
229}
230
231String StringView::convertToASCIILowercase() const
232{
233 if (m_is8Bit)
234 return convertASCIICase<ASCIICase::Lower>(static_cast<const LChar*>(m_characters), m_length);
235 return convertASCIICase<ASCIICase::Lower>(static_cast<const UChar*>(m_characters), m_length);
236}
237
238String StringView::convertToASCIIUppercase() const
239{
240 if (m_is8Bit)
241 return convertASCIICase<ASCIICase::Upper>(static_cast<const LChar*>(m_characters), m_length);
242 return convertASCIICase<ASCIICase::Upper>(static_cast<const UChar*>(m_characters), m_length);
243}
244
245StringViewWithUnderlyingString normalizedNFC(StringView string)
246{
247 // Latin-1 characters are unaffected by normalization.
248 if (string.is8Bit())
249 return { string, { } };
250
251 UErrorCode status = U_ZERO_ERROR;
252 const UNormalizer2* normalizer = unorm2_getNFCInstance(&status);
253 ASSERT(U_SUCCESS(status));
254
255 // No need to normalize if already normalized.
256 UBool checkResult = unorm2_isNormalized(normalizer, string.characters16(), string.length(), &status);
257 if (checkResult)
258 return { string, { } };
259
260 unsigned normalizedLength = unorm2_normalize(normalizer, string.characters16(), string.length(), nullptr, 0, &status);
261 ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
262
263 UChar* characters;
264 String result = String::createUninitialized(normalizedLength, characters);
265
266 status = U_ZERO_ERROR;
267 unorm2_normalize(normalizer, string.characters16(), string.length(), characters, normalizedLength, &status);
268 ASSERT(U_SUCCESS(status));
269
270 StringView view { result };
271 return { view, WTFMove(result) };
272}
273
274String normalizedNFC(const String& string)
275{
276 auto result = normalizedNFC(StringView { string });
277 if (result.underlyingString.isNull())
278 return string;
279 return result.underlyingString;
280}
281
282#if CHECK_STRINGVIEW_LIFETIME
283
284// Manage reference count manually so UnderlyingString does not need to be defined in the header.
285
286struct StringView::UnderlyingString {
287 std::atomic_uint refCount { 1u };
288 bool isValid { true };
289 const StringImpl& string;
290 explicit UnderlyingString(const StringImpl&);
291};
292
293StringView::UnderlyingString::UnderlyingString(const StringImpl& string)
294 : string(string)
295{
296}
297
298static Lock underlyingStringsMutex;
299
300static HashMap<const StringImpl*, StringView::UnderlyingString*>& underlyingStrings()
301{
302 static NeverDestroyed<HashMap<const StringImpl*, StringView::UnderlyingString*>> map;
303 return map;
304}
305
306void StringView::invalidate(const StringImpl& stringToBeDestroyed)
307{
308 UnderlyingString* underlyingString;
309 {
310 std::lock_guard<Lock> lock(underlyingStringsMutex);
311 underlyingString = underlyingStrings().take(&stringToBeDestroyed);
312 if (!underlyingString)
313 return;
314 }
315 ASSERT(underlyingString->isValid);
316 underlyingString->isValid = false;
317}
318
319bool StringView::underlyingStringIsValid() const
320{
321 return !m_underlyingString || m_underlyingString->isValid;
322}
323
324void StringView::adoptUnderlyingString(UnderlyingString* underlyingString)
325{
326 if (m_underlyingString) {
327 std::lock_guard<Lock> lock(underlyingStringsMutex);
328 if (!--m_underlyingString->refCount) {
329 if (m_underlyingString->isValid) {
330 underlyingStrings().remove(&m_underlyingString->string);
331 }
332 delete m_underlyingString;
333 }
334 }
335 m_underlyingString = underlyingString;
336}
337
338void StringView::setUnderlyingString(const StringImpl* string)
339{
340 UnderlyingString* underlyingString;
341 if (!string)
342 underlyingString = nullptr;
343 else {
344 std::lock_guard<Lock> lock(underlyingStringsMutex);
345 auto result = underlyingStrings().add(string, nullptr);
346 if (result.isNewEntry)
347 result.iterator->value = new UnderlyingString(*string);
348 else
349 ++result.iterator->value->refCount;
350 underlyingString = result.iterator->value;
351 }
352 adoptUnderlyingString(underlyingString);
353}
354
355void StringView::setUnderlyingString(const StringView& otherString)
356{
357 UnderlyingString* underlyingString = otherString.m_underlyingString;
358 if (underlyingString)
359 ++underlyingString->refCount;
360 adoptUnderlyingString(underlyingString);
361}
362
363#endif // CHECK_STRINGVIEW_LIFETIME
364
365} // namespace WTF
366