1 | /* |
2 | * Copyright (C) 2015 Andy VanWagoner ([email protected]) |
3 | * Copyright (C) 2015 Sukolsak Sakshuwong ([email protected]) |
4 | * Copyright (C) 2016-2017 Apple Inc. All Rights Reserved. |
5 | * |
6 | * Redistribution and use in source and binary forms, with or without |
7 | * modification, are permitted provided that the following conditions |
8 | * are met: |
9 | * 1. Redistributions of source code must retain the above copyright |
10 | * notice, this list of conditions and the following disclaimer. |
11 | * 2. Redistributions in binary form must reproduce the above copyright |
12 | * notice, this list of conditions and the following disclaimer in the |
13 | * documentation and/or other materials provided with the distribution. |
14 | * |
15 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' |
16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, |
17 | * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
18 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS |
19 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
20 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
21 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
22 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
23 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
24 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF |
25 | * THE POSSIBILITY OF SUCH DAMAGE. |
26 | */ |
27 | |
28 | #include "config.h" |
29 | #include "IntlCollator.h" |
30 | |
31 | #if ENABLE(INTL) |
32 | |
33 | #include "CatchScope.h" |
34 | #include "Error.h" |
35 | #include "IntlCollatorConstructor.h" |
36 | #include "IntlObject.h" |
37 | #include "JSBoundFunction.h" |
38 | #include "JSCInlines.h" |
39 | #include "ObjectConstructor.h" |
40 | #include "SlotVisitorInlines.h" |
41 | #include "StructureInlines.h" |
42 | #include <unicode/ucol.h> |
43 | #include <wtf/unicode/Collator.h> |
44 | |
45 | namespace JSC { |
46 | |
47 | const ClassInfo IntlCollator::s_info = { "Object" , &Base::s_info, nullptr, nullptr, CREATE_METHOD_TABLE(IntlCollator) }; |
48 | |
49 | static const char* const relevantCollatorExtensionKeys[3] = { "co" , "kn" , "kf" }; |
50 | static const size_t indexOfExtensionKeyCo = 0; |
51 | static const size_t indexOfExtensionKeyKn = 1; |
52 | static const size_t indexOfExtensionKeyKf = 2; |
53 | |
54 | void IntlCollator::UCollatorDeleter::operator()(UCollator* collator) const |
55 | { |
56 | if (collator) |
57 | ucol_close(collator); |
58 | } |
59 | |
60 | IntlCollator* IntlCollator::create(VM& vm, Structure* structure) |
61 | { |
62 | IntlCollator* format = new (NotNull, allocateCell<IntlCollator>(vm.heap)) IntlCollator(vm, structure); |
63 | format->finishCreation(vm); |
64 | return format; |
65 | } |
66 | |
67 | Structure* IntlCollator::createStructure(VM& vm, JSGlobalObject* globalObject, JSValue prototype) |
68 | { |
69 | return Structure::create(vm, globalObject, prototype, TypeInfo(ObjectType, StructureFlags), info()); |
70 | } |
71 | |
72 | IntlCollator::IntlCollator(VM& vm, Structure* structure) |
73 | : JSDestructibleObject(vm, structure) |
74 | { |
75 | } |
76 | |
77 | void IntlCollator::finishCreation(VM& vm) |
78 | { |
79 | Base::finishCreation(vm); |
80 | ASSERT(inherits(vm, info())); |
81 | } |
82 | |
83 | void IntlCollator::destroy(JSCell* cell) |
84 | { |
85 | static_cast<IntlCollator*>(cell)->IntlCollator::~IntlCollator(); |
86 | } |
87 | |
88 | void IntlCollator::visitChildren(JSCell* cell, SlotVisitor& visitor) |
89 | { |
90 | IntlCollator* thisObject = jsCast<IntlCollator*>(cell); |
91 | ASSERT_GC_OBJECT_INHERITS(thisObject, info()); |
92 | |
93 | Base::visitChildren(thisObject, visitor); |
94 | |
95 | visitor.append(thisObject->m_boundCompare); |
96 | } |
97 | |
98 | static Vector<String> sortLocaleData(const String& locale, size_t keyIndex) |
99 | { |
100 | // 9.1 Internal slots of Service Constructors & 10.2.3 Internal slots (ECMA-402 2.0) |
101 | Vector<String> keyLocaleData; |
102 | switch (keyIndex) { |
103 | case indexOfExtensionKeyCo: { |
104 | // 10.2.3 "The first element of [[sortLocaleData]][locale].co and [[searchLocaleData]][locale].co must be null for all locale values." |
105 | keyLocaleData.append({ }); |
106 | |
107 | UErrorCode status = U_ZERO_ERROR; |
108 | UEnumeration* enumeration = ucol_getKeywordValuesForLocale("collation" , locale.utf8().data(), false, &status); |
109 | if (U_SUCCESS(status)) { |
110 | const char* collation; |
111 | while ((collation = uenum_next(enumeration, nullptr, &status)) && U_SUCCESS(status)) { |
112 | // 10.2.3 "The values "standard" and "search" must not be used as elements in any [[sortLocaleData]][locale].co and [[searchLocaleData]][locale].co array." |
113 | if (!strcmp(collation, "standard" ) || !strcmp(collation, "search" )) |
114 | continue; |
115 | |
116 | // Map keyword values to BCP 47 equivalents. |
117 | if (!strcmp(collation, "dictionary" )) |
118 | collation = "dict" ; |
119 | else if (!strcmp(collation, "gb2312han" )) |
120 | collation = "gb2312" ; |
121 | else if (!strcmp(collation, "phonebook" )) |
122 | collation = "phonebk" ; |
123 | else if (!strcmp(collation, "traditional" )) |
124 | collation = "trad" ; |
125 | |
126 | keyLocaleData.append(collation); |
127 | } |
128 | uenum_close(enumeration); |
129 | } |
130 | break; |
131 | } |
132 | case indexOfExtensionKeyKn: |
133 | keyLocaleData.reserveInitialCapacity(2); |
134 | keyLocaleData.uncheckedAppend("false"_s ); |
135 | keyLocaleData.uncheckedAppend("true"_s ); |
136 | break; |
137 | case indexOfExtensionKeyKf: |
138 | keyLocaleData.reserveInitialCapacity(3); |
139 | keyLocaleData.uncheckedAppend("false"_s ); |
140 | keyLocaleData.uncheckedAppend("lower"_s ); |
141 | keyLocaleData.uncheckedAppend("upper"_s ); |
142 | break; |
143 | default: |
144 | ASSERT_NOT_REACHED(); |
145 | } |
146 | return keyLocaleData; |
147 | } |
148 | |
149 | static Vector<String> searchLocaleData(const String&, size_t keyIndex) |
150 | { |
151 | // 9.1 Internal slots of Service Constructors & 10.2.3 Internal slots (ECMA-402 2.0) |
152 | Vector<String> keyLocaleData; |
153 | switch (keyIndex) { |
154 | case indexOfExtensionKeyCo: |
155 | // 10.2.3 "The first element of [[sortLocaleData]][locale].co and [[searchLocaleData]][locale].co must be null for all locale values." |
156 | keyLocaleData.reserveInitialCapacity(1); |
157 | keyLocaleData.append({ }); |
158 | break; |
159 | case indexOfExtensionKeyKn: |
160 | keyLocaleData.reserveInitialCapacity(2); |
161 | keyLocaleData.uncheckedAppend("false"_s ); |
162 | keyLocaleData.uncheckedAppend("true"_s ); |
163 | break; |
164 | case indexOfExtensionKeyKf: |
165 | keyLocaleData.reserveInitialCapacity(3); |
166 | keyLocaleData.uncheckedAppend("false"_s ); |
167 | keyLocaleData.uncheckedAppend("lower"_s ); |
168 | keyLocaleData.uncheckedAppend("upper"_s ); |
169 | break; |
170 | default: |
171 | ASSERT_NOT_REACHED(); |
172 | } |
173 | return keyLocaleData; |
174 | } |
175 | |
176 | void IntlCollator::initializeCollator(ExecState& state, JSValue locales, JSValue optionsValue) |
177 | { |
178 | VM& vm = state.vm(); |
179 | auto scope = DECLARE_THROW_SCOPE(vm); |
180 | |
181 | // 10.1.1 InitializeCollator (collator, locales, options) (ECMA-402) |
182 | // https://tc39.github.io/ecma402/#sec-initializecollator |
183 | |
184 | auto requestedLocales = canonicalizeLocaleList(state, locales); |
185 | RETURN_IF_EXCEPTION(scope, void()); |
186 | |
187 | JSObject* options; |
188 | if (optionsValue.isUndefined()) |
189 | options = constructEmptyObject(&state, state.lexicalGlobalObject()->nullPrototypeObjectStructure()); |
190 | else { |
191 | options = optionsValue.toObject(&state); |
192 | RETURN_IF_EXCEPTION(scope, void()); |
193 | } |
194 | |
195 | String usageString = intlStringOption(state, options, vm.propertyNames->usage, { "sort" , "search" }, "usage must be either \"sort\" or \"search\"" , "sort" ); |
196 | RETURN_IF_EXCEPTION(scope, void()); |
197 | if (usageString == "sort" ) |
198 | m_usage = Usage::Sort; |
199 | else if (usageString == "search" ) |
200 | m_usage = Usage::Search; |
201 | else |
202 | ASSERT_NOT_REACHED(); |
203 | |
204 | auto localeData = (m_usage == Usage::Sort) ? sortLocaleData : searchLocaleData; |
205 | |
206 | HashMap<String, String> opt; |
207 | |
208 | String matcher = intlStringOption(state, options, vm.propertyNames->localeMatcher, { "lookup" , "best fit" }, "localeMatcher must be either \"lookup\" or \"best fit\"" , "best fit" ); |
209 | RETURN_IF_EXCEPTION(scope, void()); |
210 | opt.add("localeMatcher"_s , matcher); |
211 | |
212 | { |
213 | String numericString; |
214 | bool usesFallback; |
215 | bool numeric = intlBooleanOption(state, options, vm.propertyNames->numeric, usesFallback); |
216 | RETURN_IF_EXCEPTION(scope, void()); |
217 | if (!usesFallback) |
218 | numericString = numeric ? "true"_s : "false"_s ; |
219 | if (!numericString.isNull()) |
220 | opt.add("kn"_s , numericString); |
221 | } |
222 | { |
223 | String caseFirst = intlStringOption(state, options, vm.propertyNames->caseFirst, { "upper" , "lower" , "false" }, "caseFirst must be either \"upper\", \"lower\", or \"false\"" , nullptr); |
224 | RETURN_IF_EXCEPTION(scope, void()); |
225 | if (!caseFirst.isNull()) |
226 | opt.add("kf"_s , caseFirst); |
227 | } |
228 | |
229 | auto& availableLocales = state.jsCallee()->globalObject(vm)->intlCollatorAvailableLocales(); |
230 | auto result = resolveLocale(state, availableLocales, requestedLocales, opt, relevantCollatorExtensionKeys, WTF_ARRAY_LENGTH(relevantCollatorExtensionKeys), localeData); |
231 | |
232 | m_locale = result.get("locale"_s ); |
233 | if (m_locale.isEmpty()) { |
234 | throwTypeError(&state, scope, "failed to initialize Collator due to invalid locale"_s ); |
235 | return; |
236 | } |
237 | |
238 | const String& collation = result.get("co"_s ); |
239 | m_collation = collation.isNull() ? "default"_s : collation; |
240 | m_numeric = result.get("kn"_s ) == "true" ; |
241 | |
242 | const String& caseFirst = result.get("kf"_s ); |
243 | if (caseFirst == "lower" ) |
244 | m_caseFirst = CaseFirst::Lower; |
245 | else if (caseFirst == "upper" ) |
246 | m_caseFirst = CaseFirst::Upper; |
247 | else |
248 | m_caseFirst = CaseFirst::False; |
249 | |
250 | String sensitivityString = intlStringOption(state, options, vm.propertyNames->sensitivity, { "base" , "accent" , "case" , "variant" }, "sensitivity must be either \"base\", \"accent\", \"case\", or \"variant\"" , nullptr); |
251 | RETURN_IF_EXCEPTION(scope, void()); |
252 | if (sensitivityString == "base" ) |
253 | m_sensitivity = Sensitivity::Base; |
254 | else if (sensitivityString == "accent" ) |
255 | m_sensitivity = Sensitivity::Accent; |
256 | else if (sensitivityString == "case" ) |
257 | m_sensitivity = Sensitivity::Case; |
258 | else |
259 | m_sensitivity = Sensitivity::Variant; |
260 | |
261 | bool usesFallback; |
262 | bool ignorePunctuation = intlBooleanOption(state, options, vm.propertyNames->ignorePunctuation, usesFallback); |
263 | if (usesFallback) |
264 | ignorePunctuation = false; |
265 | RETURN_IF_EXCEPTION(scope, void()); |
266 | m_ignorePunctuation = ignorePunctuation; |
267 | |
268 | m_initializedCollator = true; |
269 | } |
270 | |
271 | void IntlCollator::createCollator(ExecState& state) |
272 | { |
273 | VM& vm = state.vm(); |
274 | auto scope = DECLARE_CATCH_SCOPE(vm); |
275 | ASSERT(!m_collator); |
276 | |
277 | if (!m_initializedCollator) { |
278 | initializeCollator(state, jsUndefined(), jsUndefined()); |
279 | scope.assertNoException(); |
280 | } |
281 | |
282 | UErrorCode status = U_ZERO_ERROR; |
283 | auto collator = std::unique_ptr<UCollator, UCollatorDeleter>(ucol_open(m_locale.utf8().data(), &status)); |
284 | if (U_FAILURE(status)) |
285 | return; |
286 | |
287 | UColAttributeValue strength = UCOL_PRIMARY; |
288 | UColAttributeValue caseLevel = UCOL_OFF; |
289 | UColAttributeValue caseFirst = UCOL_OFF; |
290 | switch (m_sensitivity) { |
291 | case Sensitivity::Base: |
292 | break; |
293 | case Sensitivity::Accent: |
294 | strength = UCOL_SECONDARY; |
295 | break; |
296 | case Sensitivity::Case: |
297 | caseLevel = UCOL_ON; |
298 | break; |
299 | case Sensitivity::Variant: |
300 | strength = UCOL_TERTIARY; |
301 | break; |
302 | } |
303 | switch (m_caseFirst) { |
304 | case CaseFirst::False: |
305 | break; |
306 | case CaseFirst::Lower: |
307 | caseFirst = UCOL_LOWER_FIRST; |
308 | break; |
309 | case CaseFirst::Upper: |
310 | caseFirst = UCOL_UPPER_FIRST; |
311 | break; |
312 | } |
313 | |
314 | ucol_setAttribute(collator.get(), UCOL_STRENGTH, strength, &status); |
315 | ucol_setAttribute(collator.get(), UCOL_CASE_LEVEL, caseLevel, &status); |
316 | ucol_setAttribute(collator.get(), UCOL_CASE_FIRST, caseFirst, &status); |
317 | ucol_setAttribute(collator.get(), UCOL_NUMERIC_COLLATION, m_numeric ? UCOL_ON : UCOL_OFF, &status); |
318 | |
319 | // FIXME: Setting UCOL_ALTERNATE_HANDLING to UCOL_SHIFTED causes punctuation and whitespace to be |
320 | // ignored. There is currently no way to ignore only punctuation. |
321 | ucol_setAttribute(collator.get(), UCOL_ALTERNATE_HANDLING, m_ignorePunctuation ? UCOL_SHIFTED : UCOL_DEFAULT, &status); |
322 | |
323 | // "The method is required to return 0 when comparing Strings that are considered canonically |
324 | // equivalent by the Unicode standard." |
325 | ucol_setAttribute(collator.get(), UCOL_NORMALIZATION_MODE, UCOL_ON, &status); |
326 | if (U_FAILURE(status)) |
327 | return; |
328 | |
329 | m_collator = WTFMove(collator); |
330 | } |
331 | |
332 | JSValue IntlCollator::compareStrings(ExecState& state, StringView x, StringView y) |
333 | { |
334 | VM& vm = state.vm(); |
335 | auto scope = DECLARE_THROW_SCOPE(vm); |
336 | |
337 | // 10.3.4 CompareStrings abstract operation (ECMA-402 2.0) |
338 | if (!m_collator) { |
339 | createCollator(state); |
340 | if (!m_collator) |
341 | return throwException(&state, scope, createError(&state, "Failed to compare strings."_s )); |
342 | } |
343 | |
344 | UErrorCode status = U_ZERO_ERROR; |
345 | UCharIterator iteratorX = createIterator(x); |
346 | UCharIterator iteratorY = createIterator(y); |
347 | auto result = ucol_strcollIter(m_collator.get(), &iteratorX, &iteratorY, &status); |
348 | if (U_FAILURE(status)) |
349 | return throwException(&state, scope, createError(&state, "Failed to compare strings."_s )); |
350 | return jsNumber(result); |
351 | } |
352 | |
353 | ASCIILiteral IntlCollator::usageString(Usage usage) |
354 | { |
355 | switch (usage) { |
356 | case Usage::Sort: |
357 | return "sort"_s ; |
358 | case Usage::Search: |
359 | return "search"_s ; |
360 | } |
361 | ASSERT_NOT_REACHED(); |
362 | return ASCIILiteral::null(); |
363 | } |
364 | |
365 | ASCIILiteral IntlCollator::sensitivityString(Sensitivity sensitivity) |
366 | { |
367 | switch (sensitivity) { |
368 | case Sensitivity::Base: |
369 | return "base"_s ; |
370 | case Sensitivity::Accent: |
371 | return "accent"_s ; |
372 | case Sensitivity::Case: |
373 | return "case"_s ; |
374 | case Sensitivity::Variant: |
375 | return "variant"_s ; |
376 | } |
377 | ASSERT_NOT_REACHED(); |
378 | return ASCIILiteral::null(); |
379 | } |
380 | |
381 | ASCIILiteral IntlCollator::caseFirstString(CaseFirst caseFirst) |
382 | { |
383 | switch (caseFirst) { |
384 | case CaseFirst::False: |
385 | return "false"_s ; |
386 | case CaseFirst::Lower: |
387 | return "lower"_s ; |
388 | case CaseFirst::Upper: |
389 | return "upper"_s ; |
390 | } |
391 | ASSERT_NOT_REACHED(); |
392 | return ASCIILiteral::null(); |
393 | } |
394 | |
395 | JSObject* IntlCollator::resolvedOptions(ExecState& state) |
396 | { |
397 | VM& vm = state.vm(); |
398 | auto scope = DECLARE_THROW_SCOPE(vm); |
399 | |
400 | // 10.3.5 Intl.Collator.prototype.resolvedOptions() (ECMA-402 2.0) |
401 | // The function returns a new object whose properties and attributes are set as if |
402 | // constructed by an object literal assigning to each of the following properties the |
403 | // value of the corresponding internal slot of this Collator object (see 10.4): locale, |
404 | // usage, sensitivity, ignorePunctuation, collation, as well as those properties shown |
405 | // in Table 1 whose keys are included in the %Collator%[[relevantExtensionKeys]] |
406 | // internal slot of the standard built-in object that is the initial value of |
407 | // Intl.Collator. |
408 | |
409 | if (!m_initializedCollator) { |
410 | initializeCollator(state, jsUndefined(), jsUndefined()); |
411 | scope.assertNoException(); |
412 | } |
413 | |
414 | JSObject* options = constructEmptyObject(&state); |
415 | options->putDirect(vm, vm.propertyNames->locale, jsString(&state, m_locale)); |
416 | options->putDirect(vm, vm.propertyNames->usage, jsNontrivialString(&state, usageString(m_usage))); |
417 | options->putDirect(vm, vm.propertyNames->sensitivity, jsNontrivialString(&state, sensitivityString(m_sensitivity))); |
418 | options->putDirect(vm, vm.propertyNames->ignorePunctuation, jsBoolean(m_ignorePunctuation)); |
419 | options->putDirect(vm, vm.propertyNames->collation, jsString(&state, m_collation)); |
420 | options->putDirect(vm, vm.propertyNames->numeric, jsBoolean(m_numeric)); |
421 | options->putDirect(vm, vm.propertyNames->caseFirst, jsNontrivialString(&state, caseFirstString(m_caseFirst))); |
422 | return options; |
423 | } |
424 | |
425 | void IntlCollator::setBoundCompare(VM& vm, JSBoundFunction* format) |
426 | { |
427 | m_boundCompare.set(vm, this, format); |
428 | } |
429 | |
430 | } // namespace JSC |
431 | |
432 | #endif // ENABLE(INTL) |
433 | |