1/*
2 * Copyright (C) 2008, 2014 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of Apple Inc. ("Apple") nor the names of
14 * its contributors may be used to endorse or promote products derived
15 * from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
18 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
21 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include "config.h"
30#include <wtf/unicode/Collator.h>
31
32// FIXME: Merge this with CollatorDefault.cpp into a single Collator.cpp source file.
33
34#if !UCONFIG_NO_COLLATION
35
36#include <mutex>
37#include <unicode/ucol.h>
38#include <wtf/Lock.h>
39#include <wtf/text/StringView.h>
40
41#if OS(DARWIN) && USE(CF)
42#include <CoreFoundation/CoreFoundation.h>
43#include <wtf/RetainPtr.h>
44#endif
45
46namespace WTF {
47
48static UCollator* cachedCollator;
49static char* cachedCollatorLocale;
50static bool cachedCollatorShouldSortLowercaseFirst;
51
52static Lock cachedCollatorMutex;
53
54#if !(OS(DARWIN) && USE(CF))
55
56static inline const char* resolveDefaultLocale(const char* locale)
57{
58 return locale;
59}
60
61#else
62
63static inline char* copyShortASCIIString(CFStringRef string)
64{
65 // OK to have a fixed size buffer and to only handle ASCII since we only use this for locale names.
66 char buffer[256];
67 if (!string || !CFStringGetCString(string, buffer, sizeof(buffer), kCFStringEncodingASCII))
68 return strdup("");
69 return strdup(buffer);
70}
71
72static char* copyDefaultLocale()
73{
74#if !PLATFORM(IOS_FAMILY)
75 return copyShortASCIIString(static_cast<CFStringRef>(CFLocaleGetValue(adoptCF(CFLocaleCopyCurrent()).get(), kCFLocaleCollatorIdentifier)));
76#else
77 // FIXME: Documentation claims the code above would work on iOS 4.0 and later. After test that works, we should remove this and use that instead.
78 return copyShortASCIIString(adoptCF(static_cast<CFStringRef>(CFPreferencesCopyValue(CFSTR("AppleCollationOrder"), kCFPreferencesAnyApplication, kCFPreferencesCurrentUser, kCFPreferencesAnyHost))).get());
79#endif
80}
81
82static inline const char* resolveDefaultLocale(const char* locale)
83{
84 if (locale)
85 return locale;
86 // Since iOS and OS X don't set UNIX locale to match the user's selected locale, the ICU default locale is not the right one.
87 // So, instead of passing null to ICU, we pass the name of the user's selected locale.
88 static char* defaultLocale;
89 static std::once_flag initializeDefaultLocaleOnce;
90 std::call_once(initializeDefaultLocaleOnce, []{
91 defaultLocale = copyDefaultLocale();
92 });
93 return defaultLocale;
94}
95
96#endif
97
98static inline bool localesMatch(const char* a, const char* b)
99{
100 // Two null locales are equal, other locales are compared with strcmp.
101 return a == b || (a && b && !strcmp(a, b));
102}
103
104Collator::Collator(const char* locale, bool shouldSortLowercaseFirst)
105{
106 UErrorCode status = U_ZERO_ERROR;
107
108 {
109 std::lock_guard<Lock> lock(cachedCollatorMutex);
110 if (cachedCollator && localesMatch(cachedCollatorLocale, locale) && cachedCollatorShouldSortLowercaseFirst == shouldSortLowercaseFirst) {
111 m_collator = cachedCollator;
112 m_locale = cachedCollatorLocale;
113 m_shouldSortLowercaseFirst = shouldSortLowercaseFirst;
114 cachedCollator = nullptr;
115 cachedCollatorLocale = nullptr;
116 return;
117 }
118 }
119
120 m_collator = ucol_open(resolveDefaultLocale(locale), &status);
121 if (U_FAILURE(status)) {
122 status = U_ZERO_ERROR;
123 m_collator = ucol_open("", &status); // Fall back to Unicode Collation Algorithm.
124 }
125 ASSERT(U_SUCCESS(status));
126
127 ucol_setAttribute(m_collator, UCOL_CASE_FIRST, shouldSortLowercaseFirst ? UCOL_LOWER_FIRST : UCOL_UPPER_FIRST, &status);
128 ASSERT(U_SUCCESS(status));
129
130 ucol_setAttribute(m_collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
131 ASSERT(U_SUCCESS(status));
132
133 m_locale = locale ? fastStrDup(locale) : nullptr;
134 m_shouldSortLowercaseFirst = shouldSortLowercaseFirst;
135}
136
137Collator::~Collator()
138{
139 std::lock_guard<Lock> lock(cachedCollatorMutex);
140 if (cachedCollator) {
141 ucol_close(cachedCollator);
142 fastFree(cachedCollatorLocale);
143 }
144 cachedCollator = m_collator;
145 cachedCollatorLocale = m_locale;
146 cachedCollatorShouldSortLowercaseFirst = m_shouldSortLowercaseFirst;
147}
148
149static int32_t getIndexLatin1(UCharIterator* iterator, UCharIteratorOrigin origin)
150{
151 switch (origin) {
152 case UITER_START:
153 return iterator->start;
154 case UITER_CURRENT:
155 return iterator->index;
156 case UITER_LIMIT:
157 return iterator->limit;
158 case UITER_ZERO:
159 return 0;
160 case UITER_LENGTH:
161 return iterator->length;
162 }
163 ASSERT_NOT_REACHED();
164 return U_SENTINEL;
165}
166
167static int32_t moveLatin1(UCharIterator* iterator, int32_t delta, UCharIteratorOrigin origin)
168{
169 return iterator->index = getIndexLatin1(iterator, origin) + delta;
170}
171
172static UBool hasNextLatin1(UCharIterator* iterator)
173{
174 return iterator->index < iterator->limit;
175}
176
177static UBool hasPreviousLatin1(UCharIterator* iterator)
178{
179 return iterator->index > iterator->start;
180}
181
182static UChar32 currentLatin1(UCharIterator* iterator)
183{
184 ASSERT(iterator->index >= iterator->start);
185 if (iterator->index >= iterator->limit)
186 return U_SENTINEL;
187 return static_cast<const LChar*>(iterator->context)[iterator->index];
188}
189
190static UChar32 nextLatin1(UCharIterator* iterator)
191{
192 ASSERT(iterator->index >= iterator->start);
193 if (iterator->index >= iterator->limit)
194 return U_SENTINEL;
195 return static_cast<const LChar*>(iterator->context)[iterator->index++];
196}
197
198static UChar32 previousLatin1(UCharIterator* iterator)
199{
200 if (iterator->index <= iterator->start)
201 return U_SENTINEL;
202 return static_cast<const LChar*>(iterator->context)[--iterator->index];
203}
204
205static uint32_t getStateLatin1(const UCharIterator* iterator)
206{
207 return iterator->index;
208}
209
210static void setStateLatin1(UCharIterator* iterator, uint32_t state, UErrorCode*)
211{
212 iterator->index = state;
213}
214
215static UCharIterator createLatin1Iterator(const LChar* characters, int length)
216{
217 UCharIterator iterator;
218 iterator.context = characters;
219 iterator.length = length;
220 iterator.start = 0;
221 iterator.index = 0;
222 iterator.limit = length;
223 iterator.reservedField = 0;
224 iterator.getIndex = getIndexLatin1;
225 iterator.move = moveLatin1;
226 iterator.hasNext = hasNextLatin1;
227 iterator.hasPrevious = hasPreviousLatin1;
228 iterator.current = currentLatin1;
229 iterator.next = nextLatin1;
230 iterator.previous = previousLatin1;
231 iterator.reservedFn = nullptr;
232 iterator.getState = getStateLatin1;
233 iterator.setState = setStateLatin1;
234 return iterator;
235}
236
237UCharIterator createIterator(StringView string)
238{
239 if (string.is8Bit())
240 return createLatin1Iterator(string.characters8(), string.length());
241 UCharIterator iterator;
242 uiter_setString(&iterator, string.characters16(), string.length());
243 return iterator;
244}
245
246int Collator::collate(StringView a, StringView b) const
247{
248 UCharIterator iteratorA = createIterator(a);
249 UCharIterator iteratorB = createIterator(b);
250 UErrorCode status = U_ZERO_ERROR;
251 int result = ucol_strcollIter(m_collator, &iteratorA, &iteratorB, &status);
252 ASSERT(U_SUCCESS(status));
253 return result;
254}
255
256static UCharIterator createIteratorUTF8(const char* string)
257{
258 UCharIterator iterator;
259 uiter_setUTF8(&iterator, string, strlen(string));
260 return iterator;
261}
262
263int Collator::collateUTF8(const char* a, const char* b) const
264{
265 UCharIterator iteratorA = createIteratorUTF8(a);
266 UCharIterator iteratorB = createIteratorUTF8(b);
267 UErrorCode status = U_ZERO_ERROR;
268 int result = ucol_strcollIter(m_collator, &iteratorA, &iteratorB, &status);
269 ASSERT(U_SUCCESS(status));
270 return result;
271}
272
273} // namespace WTF
274
275#endif
276