1 | /* |
2 | * Copyright (C) 2008, 2014 Apple Inc. All rights reserved. |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions |
6 | * are met: |
7 | * |
8 | * 1. Redistributions of source code must retain the above copyright |
9 | * notice, this list of conditions and the following disclaimer. |
10 | * 2. Redistributions in binary form must reproduce the above copyright |
11 | * notice, this list of conditions and the following disclaimer in the |
12 | * documentation and/or other materials provided with the distribution. |
13 | * 3. Neither the name of Apple Inc. ("Apple") nor the names of |
14 | * its contributors may be used to endorse or promote products derived |
15 | * from this software without specific prior written permission. |
16 | * |
17 | * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY |
18 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
19 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
20 | * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY |
21 | * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
22 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
23 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
24 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
26 | * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | #include "config.h" |
30 | #include <wtf/unicode/Collator.h> |
31 | |
32 | // FIXME: Merge this with CollatorDefault.cpp into a single Collator.cpp source file. |
33 | |
34 | #if !UCONFIG_NO_COLLATION |
35 | |
36 | #include <mutex> |
37 | #include <unicode/ucol.h> |
38 | #include <wtf/Lock.h> |
39 | #include <wtf/text/StringView.h> |
40 | |
41 | #if OS(DARWIN) && USE(CF) |
42 | #include <CoreFoundation/CoreFoundation.h> |
43 | #include <wtf/RetainPtr.h> |
44 | #endif |
45 | |
46 | namespace WTF { |
47 | |
48 | static UCollator* cachedCollator; |
49 | static char* cachedCollatorLocale; |
50 | static bool cachedCollatorShouldSortLowercaseFirst; |
51 | |
52 | static Lock cachedCollatorMutex; |
53 | |
54 | #if !(OS(DARWIN) && USE(CF)) |
55 | |
56 | static inline const char* resolveDefaultLocale(const char* locale) |
57 | { |
58 | return locale; |
59 | } |
60 | |
61 | #else |
62 | |
63 | static inline char* copyShortASCIIString(CFStringRef string) |
64 | { |
65 | // OK to have a fixed size buffer and to only handle ASCII since we only use this for locale names. |
66 | char buffer[256]; |
67 | if (!string || !CFStringGetCString(string, buffer, sizeof(buffer), kCFStringEncodingASCII)) |
68 | return strdup("" ); |
69 | return strdup(buffer); |
70 | } |
71 | |
72 | static char* copyDefaultLocale() |
73 | { |
74 | #if !PLATFORM(IOS_FAMILY) |
75 | return copyShortASCIIString(static_cast<CFStringRef>(CFLocaleGetValue(adoptCF(CFLocaleCopyCurrent()).get(), kCFLocaleCollatorIdentifier))); |
76 | #else |
77 | // FIXME: Documentation claims the code above would work on iOS 4.0 and later. After test that works, we should remove this and use that instead. |
78 | return copyShortASCIIString(adoptCF(static_cast<CFStringRef>(CFPreferencesCopyValue(CFSTR("AppleCollationOrder" ), kCFPreferencesAnyApplication, kCFPreferencesCurrentUser, kCFPreferencesAnyHost))).get()); |
79 | #endif |
80 | } |
81 | |
82 | static inline const char* resolveDefaultLocale(const char* locale) |
83 | { |
84 | if (locale) |
85 | return locale; |
86 | // Since iOS and OS X don't set UNIX locale to match the user's selected locale, the ICU default locale is not the right one. |
87 | // So, instead of passing null to ICU, we pass the name of the user's selected locale. |
88 | static char* defaultLocale; |
89 | static std::once_flag initializeDefaultLocaleOnce; |
90 | std::call_once(initializeDefaultLocaleOnce, []{ |
91 | defaultLocale = copyDefaultLocale(); |
92 | }); |
93 | return defaultLocale; |
94 | } |
95 | |
96 | #endif |
97 | |
98 | static inline bool localesMatch(const char* a, const char* b) |
99 | { |
100 | // Two null locales are equal, other locales are compared with strcmp. |
101 | return a == b || (a && b && !strcmp(a, b)); |
102 | } |
103 | |
104 | Collator::Collator(const char* locale, bool shouldSortLowercaseFirst) |
105 | { |
106 | UErrorCode status = U_ZERO_ERROR; |
107 | |
108 | { |
109 | std::lock_guard<Lock> lock(cachedCollatorMutex); |
110 | if (cachedCollator && localesMatch(cachedCollatorLocale, locale) && cachedCollatorShouldSortLowercaseFirst == shouldSortLowercaseFirst) { |
111 | m_collator = cachedCollator; |
112 | m_locale = cachedCollatorLocale; |
113 | m_shouldSortLowercaseFirst = shouldSortLowercaseFirst; |
114 | cachedCollator = nullptr; |
115 | cachedCollatorLocale = nullptr; |
116 | return; |
117 | } |
118 | } |
119 | |
120 | m_collator = ucol_open(resolveDefaultLocale(locale), &status); |
121 | if (U_FAILURE(status)) { |
122 | status = U_ZERO_ERROR; |
123 | m_collator = ucol_open("" , &status); // Fall back to Unicode Collation Algorithm. |
124 | } |
125 | ASSERT(U_SUCCESS(status)); |
126 | |
127 | ucol_setAttribute(m_collator, UCOL_CASE_FIRST, shouldSortLowercaseFirst ? UCOL_LOWER_FIRST : UCOL_UPPER_FIRST, &status); |
128 | ASSERT(U_SUCCESS(status)); |
129 | |
130 | ucol_setAttribute(m_collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); |
131 | ASSERT(U_SUCCESS(status)); |
132 | |
133 | m_locale = locale ? fastStrDup(locale) : nullptr; |
134 | m_shouldSortLowercaseFirst = shouldSortLowercaseFirst; |
135 | } |
136 | |
137 | Collator::~Collator() |
138 | { |
139 | std::lock_guard<Lock> lock(cachedCollatorMutex); |
140 | if (cachedCollator) { |
141 | ucol_close(cachedCollator); |
142 | fastFree(cachedCollatorLocale); |
143 | } |
144 | cachedCollator = m_collator; |
145 | cachedCollatorLocale = m_locale; |
146 | cachedCollatorShouldSortLowercaseFirst = m_shouldSortLowercaseFirst; |
147 | } |
148 | |
149 | static int32_t getIndexLatin1(UCharIterator* iterator, UCharIteratorOrigin origin) |
150 | { |
151 | switch (origin) { |
152 | case UITER_START: |
153 | return iterator->start; |
154 | case UITER_CURRENT: |
155 | return iterator->index; |
156 | case UITER_LIMIT: |
157 | return iterator->limit; |
158 | case UITER_ZERO: |
159 | return 0; |
160 | case UITER_LENGTH: |
161 | return iterator->length; |
162 | } |
163 | ASSERT_NOT_REACHED(); |
164 | return U_SENTINEL; |
165 | } |
166 | |
167 | static int32_t moveLatin1(UCharIterator* iterator, int32_t delta, UCharIteratorOrigin origin) |
168 | { |
169 | return iterator->index = getIndexLatin1(iterator, origin) + delta; |
170 | } |
171 | |
172 | static UBool hasNextLatin1(UCharIterator* iterator) |
173 | { |
174 | return iterator->index < iterator->limit; |
175 | } |
176 | |
177 | static UBool hasPreviousLatin1(UCharIterator* iterator) |
178 | { |
179 | return iterator->index > iterator->start; |
180 | } |
181 | |
182 | static UChar32 currentLatin1(UCharIterator* iterator) |
183 | { |
184 | ASSERT(iterator->index >= iterator->start); |
185 | if (iterator->index >= iterator->limit) |
186 | return U_SENTINEL; |
187 | return static_cast<const LChar*>(iterator->context)[iterator->index]; |
188 | } |
189 | |
190 | static UChar32 nextLatin1(UCharIterator* iterator) |
191 | { |
192 | ASSERT(iterator->index >= iterator->start); |
193 | if (iterator->index >= iterator->limit) |
194 | return U_SENTINEL; |
195 | return static_cast<const LChar*>(iterator->context)[iterator->index++]; |
196 | } |
197 | |
198 | static UChar32 previousLatin1(UCharIterator* iterator) |
199 | { |
200 | if (iterator->index <= iterator->start) |
201 | return U_SENTINEL; |
202 | return static_cast<const LChar*>(iterator->context)[--iterator->index]; |
203 | } |
204 | |
205 | static uint32_t getStateLatin1(const UCharIterator* iterator) |
206 | { |
207 | return iterator->index; |
208 | } |
209 | |
210 | static void setStateLatin1(UCharIterator* iterator, uint32_t state, UErrorCode*) |
211 | { |
212 | iterator->index = state; |
213 | } |
214 | |
215 | static UCharIterator createLatin1Iterator(const LChar* characters, int length) |
216 | { |
217 | UCharIterator iterator; |
218 | iterator.context = characters; |
219 | iterator.length = length; |
220 | iterator.start = 0; |
221 | iterator.index = 0; |
222 | iterator.limit = length; |
223 | iterator.reservedField = 0; |
224 | iterator.getIndex = getIndexLatin1; |
225 | iterator.move = moveLatin1; |
226 | iterator.hasNext = hasNextLatin1; |
227 | iterator.hasPrevious = hasPreviousLatin1; |
228 | iterator.current = currentLatin1; |
229 | iterator.next = nextLatin1; |
230 | iterator.previous = previousLatin1; |
231 | iterator.reservedFn = nullptr; |
232 | iterator.getState = getStateLatin1; |
233 | iterator.setState = setStateLatin1; |
234 | return iterator; |
235 | } |
236 | |
237 | UCharIterator createIterator(StringView string) |
238 | { |
239 | if (string.is8Bit()) |
240 | return createLatin1Iterator(string.characters8(), string.length()); |
241 | UCharIterator iterator; |
242 | uiter_setString(&iterator, string.characters16(), string.length()); |
243 | return iterator; |
244 | } |
245 | |
246 | int Collator::collate(StringView a, StringView b) const |
247 | { |
248 | UCharIterator iteratorA = createIterator(a); |
249 | UCharIterator iteratorB = createIterator(b); |
250 | UErrorCode status = U_ZERO_ERROR; |
251 | int result = ucol_strcollIter(m_collator, &iteratorA, &iteratorB, &status); |
252 | ASSERT(U_SUCCESS(status)); |
253 | return result; |
254 | } |
255 | |
256 | static UCharIterator createIteratorUTF8(const char* string) |
257 | { |
258 | UCharIterator iterator; |
259 | uiter_setUTF8(&iterator, string, strlen(string)); |
260 | return iterator; |
261 | } |
262 | |
263 | int Collator::collateUTF8(const char* a, const char* b) const |
264 | { |
265 | UCharIterator iteratorA = createIteratorUTF8(a); |
266 | UCharIterator iteratorB = createIteratorUTF8(b); |
267 | UErrorCode status = U_ZERO_ERROR; |
268 | int result = ucol_strcollIter(m_collator, &iteratorA, &iteratorB, &status); |
269 | ASSERT(U_SUCCESS(status)); |
270 | return result; |
271 | } |
272 | |
273 | } // namespace WTF |
274 | |
275 | #endif |
276 | |