1 | /* |
2 | * Copyright (C) 2015 Andy VanWagoner ([email protected]) |
3 | * Copyright (C) 2015 Sukolsak Sakshuwong ([email protected]) |
4 | * Copyright (C) 2016-2019 Apple Inc. All Rights Reserved. |
5 | * |
6 | * Redistribution and use in source and binary forms, with or without |
7 | * modification, are permitted provided that the following conditions |
8 | * are met: |
9 | * 1. Redistributions of source code must retain the above copyright |
10 | * notice, this list of conditions and the following disclaimer. |
11 | * 2. Redistributions in binary form must reproduce the above copyright |
12 | * notice, this list of conditions and the following disclaimer in the |
13 | * documentation and/or other materials provided with the distribution. |
14 | * |
15 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' |
16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, |
17 | * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
18 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS |
19 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
20 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
21 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
22 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
23 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
24 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF |
25 | * THE POSSIBILITY OF SUCH DAMAGE. |
26 | */ |
27 | |
28 | #include "config.h" |
29 | #include "IntlCollator.h" |
30 | |
31 | #if ENABLE(INTL) |
32 | |
33 | #include "CatchScope.h" |
34 | #include "Error.h" |
35 | #include "IntlCollatorConstructor.h" |
36 | #include "IntlObject.h" |
37 | #include "JSBoundFunction.h" |
38 | #include "JSCInlines.h" |
39 | #include "ObjectConstructor.h" |
40 | #include "SlotVisitorInlines.h" |
41 | #include "StructureInlines.h" |
42 | #include <unicode/ucol.h> |
43 | #include <wtf/unicode/Collator.h> |
44 | |
45 | namespace JSC { |
46 | |
47 | const ClassInfo IntlCollator::s_info = { "Object" , &Base::s_info, nullptr, nullptr, CREATE_METHOD_TABLE(IntlCollator) }; |
48 | |
49 | static const char* const relevantCollatorExtensionKeys[3] = { "co" , "kn" , "kf" }; |
50 | static const size_t indexOfExtensionKeyCo = 0; |
51 | static const size_t indexOfExtensionKeyKn = 1; |
52 | static const size_t indexOfExtensionKeyKf = 2; |
53 | |
54 | void IntlCollator::UCollatorDeleter::operator()(UCollator* collator) const |
55 | { |
56 | if (collator) |
57 | ucol_close(collator); |
58 | } |
59 | |
60 | IntlCollator* IntlCollator::create(VM& vm, Structure* structure) |
61 | { |
62 | IntlCollator* format = new (NotNull, allocateCell<IntlCollator>(vm.heap)) IntlCollator(vm, structure); |
63 | format->finishCreation(vm); |
64 | return format; |
65 | } |
66 | |
67 | Structure* IntlCollator::createStructure(VM& vm, JSGlobalObject* globalObject, JSValue prototype) |
68 | { |
69 | return Structure::create(vm, globalObject, prototype, TypeInfo(ObjectType, StructureFlags), info()); |
70 | } |
71 | |
72 | IntlCollator::IntlCollator(VM& vm, Structure* structure) |
73 | : JSDestructibleObject(vm, structure) |
74 | { |
75 | } |
76 | |
77 | void IntlCollator::finishCreation(VM& vm) |
78 | { |
79 | Base::finishCreation(vm); |
80 | ASSERT(inherits(vm, info())); |
81 | } |
82 | |
83 | void IntlCollator::destroy(JSCell* cell) |
84 | { |
85 | static_cast<IntlCollator*>(cell)->IntlCollator::~IntlCollator(); |
86 | } |
87 | |
88 | void IntlCollator::visitChildren(JSCell* cell, SlotVisitor& visitor) |
89 | { |
90 | IntlCollator* thisObject = jsCast<IntlCollator*>(cell); |
91 | ASSERT_GC_OBJECT_INHERITS(thisObject, info()); |
92 | |
93 | Base::visitChildren(thisObject, visitor); |
94 | |
95 | visitor.append(thisObject->m_boundCompare); |
96 | } |
97 | |
98 | static Vector<String> sortLocaleData(const String& locale, size_t keyIndex) |
99 | { |
100 | // 9.1 Internal slots of Service Constructors & 10.2.3 Internal slots (ECMA-402 2.0) |
101 | Vector<String> keyLocaleData; |
102 | switch (keyIndex) { |
103 | case indexOfExtensionKeyCo: { |
104 | // 10.2.3 "The first element of [[sortLocaleData]][locale].co and [[searchLocaleData]][locale].co must be null for all locale values." |
105 | keyLocaleData.append({ }); |
106 | |
107 | UErrorCode status = U_ZERO_ERROR; |
108 | UEnumeration* enumeration = ucol_getKeywordValuesForLocale("collation" , locale.utf8().data(), false, &status); |
109 | if (U_SUCCESS(status)) { |
110 | const char* collation; |
111 | while ((collation = uenum_next(enumeration, nullptr, &status)) && U_SUCCESS(status)) { |
112 | // 10.2.3 "The values "standard" and "search" must not be used as elements in any [[sortLocaleData]][locale].co and [[searchLocaleData]][locale].co array." |
113 | if (!strcmp(collation, "standard" ) || !strcmp(collation, "search" )) |
114 | continue; |
115 | |
116 | // Map keyword values to BCP 47 equivalents. |
117 | if (!strcmp(collation, "dictionary" )) |
118 | collation = "dict" ; |
119 | else if (!strcmp(collation, "gb2312han" )) |
120 | collation = "gb2312" ; |
121 | else if (!strcmp(collation, "phonebook" )) |
122 | collation = "phonebk" ; |
123 | else if (!strcmp(collation, "traditional" )) |
124 | collation = "trad" ; |
125 | |
126 | keyLocaleData.append(collation); |
127 | } |
128 | uenum_close(enumeration); |
129 | } |
130 | break; |
131 | } |
132 | case indexOfExtensionKeyKn: |
133 | keyLocaleData.reserveInitialCapacity(2); |
134 | keyLocaleData.uncheckedAppend("false"_s ); |
135 | keyLocaleData.uncheckedAppend("true"_s ); |
136 | break; |
137 | case indexOfExtensionKeyKf: |
138 | keyLocaleData.reserveInitialCapacity(3); |
139 | keyLocaleData.uncheckedAppend("false"_s ); |
140 | keyLocaleData.uncheckedAppend("lower"_s ); |
141 | keyLocaleData.uncheckedAppend("upper"_s ); |
142 | break; |
143 | default: |
144 | ASSERT_NOT_REACHED(); |
145 | } |
146 | return keyLocaleData; |
147 | } |
148 | |
149 | static Vector<String> searchLocaleData(const String&, size_t keyIndex) |
150 | { |
151 | // 9.1 Internal slots of Service Constructors & 10.2.3 Internal slots (ECMA-402 2.0) |
152 | Vector<String> keyLocaleData; |
153 | switch (keyIndex) { |
154 | case indexOfExtensionKeyCo: |
155 | // 10.2.3 "The first element of [[sortLocaleData]][locale].co and [[searchLocaleData]][locale].co must be null for all locale values." |
156 | keyLocaleData.reserveInitialCapacity(1); |
157 | keyLocaleData.append({ }); |
158 | break; |
159 | case indexOfExtensionKeyKn: |
160 | keyLocaleData.reserveInitialCapacity(2); |
161 | keyLocaleData.uncheckedAppend("false"_s ); |
162 | keyLocaleData.uncheckedAppend("true"_s ); |
163 | break; |
164 | case indexOfExtensionKeyKf: |
165 | keyLocaleData.reserveInitialCapacity(3); |
166 | keyLocaleData.uncheckedAppend("false"_s ); |
167 | keyLocaleData.uncheckedAppend("lower"_s ); |
168 | keyLocaleData.uncheckedAppend("upper"_s ); |
169 | break; |
170 | default: |
171 | ASSERT_NOT_REACHED(); |
172 | } |
173 | return keyLocaleData; |
174 | } |
175 | |
176 | void IntlCollator::initializeCollator(JSGlobalObject* globalObject, JSValue locales, JSValue optionsValue) |
177 | { |
178 | VM& vm = globalObject->vm(); |
179 | auto scope = DECLARE_THROW_SCOPE(vm); |
180 | |
181 | // 10.1.1 InitializeCollator (collator, locales, options) (ECMA-402) |
182 | // https://tc39.github.io/ecma402/#sec-initializecollator |
183 | |
184 | auto requestedLocales = canonicalizeLocaleList(globalObject, locales); |
185 | RETURN_IF_EXCEPTION(scope, void()); |
186 | |
187 | JSValue options = optionsValue; |
188 | if (!optionsValue.isUndefined()) { |
189 | options = optionsValue.toObject(globalObject); |
190 | RETURN_IF_EXCEPTION(scope, void()); |
191 | } |
192 | |
193 | String usageString = intlStringOption(globalObject, options, vm.propertyNames->usage, { "sort" , "search" }, "usage must be either \"sort\" or \"search\"" , "sort" ); |
194 | RETURN_IF_EXCEPTION(scope, void()); |
195 | if (usageString == "sort" ) |
196 | m_usage = Usage::Sort; |
197 | else if (usageString == "search" ) |
198 | m_usage = Usage::Search; |
199 | else |
200 | ASSERT_NOT_REACHED(); |
201 | |
202 | auto localeData = (m_usage == Usage::Sort) ? sortLocaleData : searchLocaleData; |
203 | |
204 | HashMap<String, String> opt; |
205 | |
206 | String matcher = intlStringOption(globalObject, options, vm.propertyNames->localeMatcher, { "lookup" , "best fit" }, "localeMatcher must be either \"lookup\" or \"best fit\"" , "best fit" ); |
207 | RETURN_IF_EXCEPTION(scope, void()); |
208 | opt.add("localeMatcher"_s , matcher); |
209 | |
210 | { |
211 | String numericString; |
212 | bool usesFallback; |
213 | bool numeric = intlBooleanOption(globalObject, options, vm.propertyNames->numeric, usesFallback); |
214 | RETURN_IF_EXCEPTION(scope, void()); |
215 | if (!usesFallback) |
216 | numericString = numeric ? "true"_s : "false"_s ; |
217 | if (!numericString.isNull()) |
218 | opt.add("kn"_s , numericString); |
219 | } |
220 | { |
221 | String caseFirst = intlStringOption(globalObject, options, vm.propertyNames->caseFirst, { "upper" , "lower" , "false" }, "caseFirst must be either \"upper\", \"lower\", or \"false\"" , nullptr); |
222 | RETURN_IF_EXCEPTION(scope, void()); |
223 | if (!caseFirst.isNull()) |
224 | opt.add("kf"_s , caseFirst); |
225 | } |
226 | |
227 | auto& availableLocales = globalObject->intlCollatorAvailableLocales(); |
228 | auto result = resolveLocale(globalObject, availableLocales, requestedLocales, opt, relevantCollatorExtensionKeys, WTF_ARRAY_LENGTH(relevantCollatorExtensionKeys), localeData); |
229 | |
230 | m_locale = result.get("locale"_s ); |
231 | if (m_locale.isEmpty()) { |
232 | throwTypeError(globalObject, scope, "failed to initialize Collator due to invalid locale"_s ); |
233 | return; |
234 | } |
235 | |
236 | const String& collation = result.get("co"_s ); |
237 | m_collation = collation.isNull() ? "default"_s : collation; |
238 | m_numeric = result.get("kn"_s ) == "true" ; |
239 | |
240 | const String& caseFirst = result.get("kf"_s ); |
241 | if (caseFirst == "lower" ) |
242 | m_caseFirst = CaseFirst::Lower; |
243 | else if (caseFirst == "upper" ) |
244 | m_caseFirst = CaseFirst::Upper; |
245 | else |
246 | m_caseFirst = CaseFirst::False; |
247 | |
248 | String sensitivityString = intlStringOption(globalObject, options, vm.propertyNames->sensitivity, { "base" , "accent" , "case" , "variant" }, "sensitivity must be either \"base\", \"accent\", \"case\", or \"variant\"" , nullptr); |
249 | RETURN_IF_EXCEPTION(scope, void()); |
250 | if (sensitivityString == "base" ) |
251 | m_sensitivity = Sensitivity::Base; |
252 | else if (sensitivityString == "accent" ) |
253 | m_sensitivity = Sensitivity::Accent; |
254 | else if (sensitivityString == "case" ) |
255 | m_sensitivity = Sensitivity::Case; |
256 | else |
257 | m_sensitivity = Sensitivity::Variant; |
258 | |
259 | bool usesFallback; |
260 | bool ignorePunctuation = intlBooleanOption(globalObject, options, vm.propertyNames->ignorePunctuation, usesFallback); |
261 | if (usesFallback) |
262 | ignorePunctuation = false; |
263 | RETURN_IF_EXCEPTION(scope, void()); |
264 | m_ignorePunctuation = ignorePunctuation; |
265 | |
266 | m_initializedCollator = true; |
267 | } |
268 | |
269 | void IntlCollator::createCollator(JSGlobalObject* globalObject) |
270 | { |
271 | VM& vm = globalObject->vm(); |
272 | auto scope = DECLARE_CATCH_SCOPE(vm); |
273 | ASSERT(!m_collator); |
274 | |
275 | if (!m_initializedCollator) { |
276 | initializeCollator(globalObject, jsUndefined(), jsUndefined()); |
277 | scope.assertNoException(); |
278 | } |
279 | |
280 | UErrorCode status = U_ZERO_ERROR; |
281 | auto collator = std::unique_ptr<UCollator, UCollatorDeleter>(ucol_open(m_locale.utf8().data(), &status)); |
282 | if (U_FAILURE(status)) |
283 | return; |
284 | |
285 | UColAttributeValue strength = UCOL_PRIMARY; |
286 | UColAttributeValue caseLevel = UCOL_OFF; |
287 | UColAttributeValue caseFirst = UCOL_OFF; |
288 | switch (m_sensitivity) { |
289 | case Sensitivity::Base: |
290 | break; |
291 | case Sensitivity::Accent: |
292 | strength = UCOL_SECONDARY; |
293 | break; |
294 | case Sensitivity::Case: |
295 | caseLevel = UCOL_ON; |
296 | break; |
297 | case Sensitivity::Variant: |
298 | strength = UCOL_TERTIARY; |
299 | break; |
300 | } |
301 | switch (m_caseFirst) { |
302 | case CaseFirst::False: |
303 | break; |
304 | case CaseFirst::Lower: |
305 | caseFirst = UCOL_LOWER_FIRST; |
306 | break; |
307 | case CaseFirst::Upper: |
308 | caseFirst = UCOL_UPPER_FIRST; |
309 | break; |
310 | } |
311 | |
312 | ucol_setAttribute(collator.get(), UCOL_STRENGTH, strength, &status); |
313 | ucol_setAttribute(collator.get(), UCOL_CASE_LEVEL, caseLevel, &status); |
314 | ucol_setAttribute(collator.get(), UCOL_CASE_FIRST, caseFirst, &status); |
315 | ucol_setAttribute(collator.get(), UCOL_NUMERIC_COLLATION, m_numeric ? UCOL_ON : UCOL_OFF, &status); |
316 | |
317 | // FIXME: Setting UCOL_ALTERNATE_HANDLING to UCOL_SHIFTED causes punctuation and whitespace to be |
318 | // ignored. There is currently no way to ignore only punctuation. |
319 | ucol_setAttribute(collator.get(), UCOL_ALTERNATE_HANDLING, m_ignorePunctuation ? UCOL_SHIFTED : UCOL_DEFAULT, &status); |
320 | |
321 | // "The method is required to return 0 when comparing Strings that are considered canonically |
322 | // equivalent by the Unicode standard." |
323 | ucol_setAttribute(collator.get(), UCOL_NORMALIZATION_MODE, UCOL_ON, &status); |
324 | if (U_FAILURE(status)) |
325 | return; |
326 | |
327 | m_collator = WTFMove(collator); |
328 | } |
329 | |
330 | JSValue IntlCollator::compareStrings(JSGlobalObject* globalObject, StringView x, StringView y) |
331 | { |
332 | VM& vm = globalObject->vm(); |
333 | auto scope = DECLARE_THROW_SCOPE(vm); |
334 | |
335 | // 10.3.4 CompareStrings abstract operation (ECMA-402 2.0) |
336 | if (!m_collator) { |
337 | createCollator(globalObject); |
338 | if (!m_collator) |
339 | return throwException(globalObject, scope, createError(globalObject, "Failed to compare strings."_s )); |
340 | } |
341 | |
342 | UErrorCode status = U_ZERO_ERROR; |
343 | UCollationResult result = UCOL_EQUAL; |
344 | if (x.is8Bit() && y.is8Bit() && x.isAllASCII() && y.isAllASCII()) |
345 | result = ucol_strcollUTF8(m_collator.get(), bitwise_cast<const char*>(x.characters8()), x.length(), bitwise_cast<const char*>(y.characters8()), y.length(), &status); |
346 | else { |
347 | auto getCharacters = [&] (const StringView& view, Vector<UChar>& buffer) -> const UChar* { |
348 | if (!view.is8Bit()) |
349 | return view.characters16(); |
350 | buffer.resize(view.length()); |
351 | StringImpl::copyCharacters(buffer.data(), view.characters8(), view.length()); |
352 | return buffer.data(); |
353 | }; |
354 | |
355 | Vector<UChar> xBuffer; |
356 | Vector<UChar> yBuffer; |
357 | const UChar* xCharacters = getCharacters(x, xBuffer); |
358 | const UChar* yCharacters = getCharacters(y, yBuffer); |
359 | result = ucol_strcoll(m_collator.get(), xCharacters, x.length(), yCharacters, y.length()); |
360 | } |
361 | if (U_FAILURE(status)) |
362 | return throwException(globalObject, scope, createError(globalObject, "Failed to compare strings."_s )); |
363 | return jsNumber(result); |
364 | } |
365 | |
366 | ASCIILiteral IntlCollator::usageString(Usage usage) |
367 | { |
368 | switch (usage) { |
369 | case Usage::Sort: |
370 | return "sort"_s ; |
371 | case Usage::Search: |
372 | return "search"_s ; |
373 | } |
374 | ASSERT_NOT_REACHED(); |
375 | return ASCIILiteral::null(); |
376 | } |
377 | |
378 | ASCIILiteral IntlCollator::sensitivityString(Sensitivity sensitivity) |
379 | { |
380 | switch (sensitivity) { |
381 | case Sensitivity::Base: |
382 | return "base"_s ; |
383 | case Sensitivity::Accent: |
384 | return "accent"_s ; |
385 | case Sensitivity::Case: |
386 | return "case"_s ; |
387 | case Sensitivity::Variant: |
388 | return "variant"_s ; |
389 | } |
390 | ASSERT_NOT_REACHED(); |
391 | return ASCIILiteral::null(); |
392 | } |
393 | |
394 | ASCIILiteral IntlCollator::caseFirstString(CaseFirst caseFirst) |
395 | { |
396 | switch (caseFirst) { |
397 | case CaseFirst::False: |
398 | return "false"_s ; |
399 | case CaseFirst::Lower: |
400 | return "lower"_s ; |
401 | case CaseFirst::Upper: |
402 | return "upper"_s ; |
403 | } |
404 | ASSERT_NOT_REACHED(); |
405 | return ASCIILiteral::null(); |
406 | } |
407 | |
408 | JSObject* IntlCollator::resolvedOptions(JSGlobalObject* globalObject) |
409 | { |
410 | VM& vm = globalObject->vm(); |
411 | auto scope = DECLARE_THROW_SCOPE(vm); |
412 | |
413 | // 10.3.5 Intl.Collator.prototype.resolvedOptions() (ECMA-402 2.0) |
414 | // The function returns a new object whose properties and attributes are set as if |
415 | // constructed by an object literal assigning to each of the following properties the |
416 | // value of the corresponding internal slot of this Collator object (see 10.4): locale, |
417 | // usage, sensitivity, ignorePunctuation, collation, as well as those properties shown |
418 | // in Table 1 whose keys are included in the %Collator%[[relevantExtensionKeys]] |
419 | // internal slot of the standard built-in object that is the initial value of |
420 | // Intl.Collator. |
421 | |
422 | if (!m_initializedCollator) { |
423 | initializeCollator(globalObject, jsUndefined(), jsUndefined()); |
424 | scope.assertNoException(); |
425 | } |
426 | |
427 | JSObject* options = constructEmptyObject(globalObject); |
428 | options->putDirect(vm, vm.propertyNames->locale, jsString(vm, m_locale)); |
429 | options->putDirect(vm, vm.propertyNames->usage, jsNontrivialString(vm, usageString(m_usage))); |
430 | options->putDirect(vm, vm.propertyNames->sensitivity, jsNontrivialString(vm, sensitivityString(m_sensitivity))); |
431 | options->putDirect(vm, vm.propertyNames->ignorePunctuation, jsBoolean(m_ignorePunctuation)); |
432 | options->putDirect(vm, vm.propertyNames->collation, jsString(vm, m_collation)); |
433 | options->putDirect(vm, vm.propertyNames->numeric, jsBoolean(m_numeric)); |
434 | options->putDirect(vm, vm.propertyNames->caseFirst, jsNontrivialString(vm, caseFirstString(m_caseFirst))); |
435 | return options; |
436 | } |
437 | |
438 | void IntlCollator::setBoundCompare(VM& vm, JSBoundFunction* format) |
439 | { |
440 | m_boundCompare.set(vm, this, format); |
441 | } |
442 | |
443 | } // namespace JSC |
444 | |
445 | #endif // ENABLE(INTL) |
446 | |