1/*
2 * Copyright (C) 2015 Andy VanWagoner ([email protected])
3 * Copyright (C) 2015 Sukolsak Sakshuwong ([email protected])
4 * Copyright (C) 2016-2019 Apple Inc. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
16 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
17 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
19 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
25 * THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include "config.h"
29#include "IntlCollator.h"
30
31#if ENABLE(INTL)
32
33#include "CatchScope.h"
34#include "Error.h"
35#include "IntlCollatorConstructor.h"
36#include "IntlObject.h"
37#include "JSBoundFunction.h"
38#include "JSCInlines.h"
39#include "ObjectConstructor.h"
40#include "SlotVisitorInlines.h"
41#include "StructureInlines.h"
42#include <unicode/ucol.h>
43#include <wtf/unicode/Collator.h>
44
45namespace JSC {
46
47const ClassInfo IntlCollator::s_info = { "Object", &Base::s_info, nullptr, nullptr, CREATE_METHOD_TABLE(IntlCollator) };
48
49static const char* const relevantCollatorExtensionKeys[3] = { "co", "kn", "kf" };
50static const size_t indexOfExtensionKeyCo = 0;
51static const size_t indexOfExtensionKeyKn = 1;
52static const size_t indexOfExtensionKeyKf = 2;
53
54void IntlCollator::UCollatorDeleter::operator()(UCollator* collator) const
55{
56 if (collator)
57 ucol_close(collator);
58}
59
60IntlCollator* IntlCollator::create(VM& vm, Structure* structure)
61{
62 IntlCollator* format = new (NotNull, allocateCell<IntlCollator>(vm.heap)) IntlCollator(vm, structure);
63 format->finishCreation(vm);
64 return format;
65}
66
67Structure* IntlCollator::createStructure(VM& vm, JSGlobalObject* globalObject, JSValue prototype)
68{
69 return Structure::create(vm, globalObject, prototype, TypeInfo(ObjectType, StructureFlags), info());
70}
71
72IntlCollator::IntlCollator(VM& vm, Structure* structure)
73 : JSDestructibleObject(vm, structure)
74{
75}
76
77void IntlCollator::finishCreation(VM& vm)
78{
79 Base::finishCreation(vm);
80 ASSERT(inherits(vm, info()));
81}
82
83void IntlCollator::destroy(JSCell* cell)
84{
85 static_cast<IntlCollator*>(cell)->IntlCollator::~IntlCollator();
86}
87
88void IntlCollator::visitChildren(JSCell* cell, SlotVisitor& visitor)
89{
90 IntlCollator* thisObject = jsCast<IntlCollator*>(cell);
91 ASSERT_GC_OBJECT_INHERITS(thisObject, info());
92
93 Base::visitChildren(thisObject, visitor);
94
95 visitor.append(thisObject->m_boundCompare);
96}
97
98static Vector<String> sortLocaleData(const String& locale, size_t keyIndex)
99{
100 // 9.1 Internal slots of Service Constructors & 10.2.3 Internal slots (ECMA-402 2.0)
101 Vector<String> keyLocaleData;
102 switch (keyIndex) {
103 case indexOfExtensionKeyCo: {
104 // 10.2.3 "The first element of [[sortLocaleData]][locale].co and [[searchLocaleData]][locale].co must be null for all locale values."
105 keyLocaleData.append({ });
106
107 UErrorCode status = U_ZERO_ERROR;
108 UEnumeration* enumeration = ucol_getKeywordValuesForLocale("collation", locale.utf8().data(), false, &status);
109 if (U_SUCCESS(status)) {
110 const char* collation;
111 while ((collation = uenum_next(enumeration, nullptr, &status)) && U_SUCCESS(status)) {
112 // 10.2.3 "The values "standard" and "search" must not be used as elements in any [[sortLocaleData]][locale].co and [[searchLocaleData]][locale].co array."
113 if (!strcmp(collation, "standard") || !strcmp(collation, "search"))
114 continue;
115
116 // Map keyword values to BCP 47 equivalents.
117 if (!strcmp(collation, "dictionary"))
118 collation = "dict";
119 else if (!strcmp(collation, "gb2312han"))
120 collation = "gb2312";
121 else if (!strcmp(collation, "phonebook"))
122 collation = "phonebk";
123 else if (!strcmp(collation, "traditional"))
124 collation = "trad";
125
126 keyLocaleData.append(collation);
127 }
128 uenum_close(enumeration);
129 }
130 break;
131 }
132 case indexOfExtensionKeyKn:
133 keyLocaleData.reserveInitialCapacity(2);
134 keyLocaleData.uncheckedAppend("false"_s);
135 keyLocaleData.uncheckedAppend("true"_s);
136 break;
137 case indexOfExtensionKeyKf:
138 keyLocaleData.reserveInitialCapacity(3);
139 keyLocaleData.uncheckedAppend("false"_s);
140 keyLocaleData.uncheckedAppend("lower"_s);
141 keyLocaleData.uncheckedAppend("upper"_s);
142 break;
143 default:
144 ASSERT_NOT_REACHED();
145 }
146 return keyLocaleData;
147}
148
149static Vector<String> searchLocaleData(const String&, size_t keyIndex)
150{
151 // 9.1 Internal slots of Service Constructors & 10.2.3 Internal slots (ECMA-402 2.0)
152 Vector<String> keyLocaleData;
153 switch (keyIndex) {
154 case indexOfExtensionKeyCo:
155 // 10.2.3 "The first element of [[sortLocaleData]][locale].co and [[searchLocaleData]][locale].co must be null for all locale values."
156 keyLocaleData.reserveInitialCapacity(1);
157 keyLocaleData.append({ });
158 break;
159 case indexOfExtensionKeyKn:
160 keyLocaleData.reserveInitialCapacity(2);
161 keyLocaleData.uncheckedAppend("false"_s);
162 keyLocaleData.uncheckedAppend("true"_s);
163 break;
164 case indexOfExtensionKeyKf:
165 keyLocaleData.reserveInitialCapacity(3);
166 keyLocaleData.uncheckedAppend("false"_s);
167 keyLocaleData.uncheckedAppend("lower"_s);
168 keyLocaleData.uncheckedAppend("upper"_s);
169 break;
170 default:
171 ASSERT_NOT_REACHED();
172 }
173 return keyLocaleData;
174}
175
176void IntlCollator::initializeCollator(JSGlobalObject* globalObject, JSValue locales, JSValue optionsValue)
177{
178 VM& vm = globalObject->vm();
179 auto scope = DECLARE_THROW_SCOPE(vm);
180
181 // 10.1.1 InitializeCollator (collator, locales, options) (ECMA-402)
182 // https://tc39.github.io/ecma402/#sec-initializecollator
183
184 auto requestedLocales = canonicalizeLocaleList(globalObject, locales);
185 RETURN_IF_EXCEPTION(scope, void());
186
187 JSValue options = optionsValue;
188 if (!optionsValue.isUndefined()) {
189 options = optionsValue.toObject(globalObject);
190 RETURN_IF_EXCEPTION(scope, void());
191 }
192
193 String usageString = intlStringOption(globalObject, options, vm.propertyNames->usage, { "sort", "search" }, "usage must be either \"sort\" or \"search\"", "sort");
194 RETURN_IF_EXCEPTION(scope, void());
195 if (usageString == "sort")
196 m_usage = Usage::Sort;
197 else if (usageString == "search")
198 m_usage = Usage::Search;
199 else
200 ASSERT_NOT_REACHED();
201
202 auto localeData = (m_usage == Usage::Sort) ? sortLocaleData : searchLocaleData;
203
204 HashMap<String, String> opt;
205
206 String matcher = intlStringOption(globalObject, options, vm.propertyNames->localeMatcher, { "lookup", "best fit" }, "localeMatcher must be either \"lookup\" or \"best fit\"", "best fit");
207 RETURN_IF_EXCEPTION(scope, void());
208 opt.add("localeMatcher"_s, matcher);
209
210 {
211 String numericString;
212 bool usesFallback;
213 bool numeric = intlBooleanOption(globalObject, options, vm.propertyNames->numeric, usesFallback);
214 RETURN_IF_EXCEPTION(scope, void());
215 if (!usesFallback)
216 numericString = numeric ? "true"_s : "false"_s;
217 if (!numericString.isNull())
218 opt.add("kn"_s, numericString);
219 }
220 {
221 String caseFirst = intlStringOption(globalObject, options, vm.propertyNames->caseFirst, { "upper", "lower", "false" }, "caseFirst must be either \"upper\", \"lower\", or \"false\"", nullptr);
222 RETURN_IF_EXCEPTION(scope, void());
223 if (!caseFirst.isNull())
224 opt.add("kf"_s, caseFirst);
225 }
226
227 auto& availableLocales = globalObject->intlCollatorAvailableLocales();
228 auto result = resolveLocale(globalObject, availableLocales, requestedLocales, opt, relevantCollatorExtensionKeys, WTF_ARRAY_LENGTH(relevantCollatorExtensionKeys), localeData);
229
230 m_locale = result.get("locale"_s);
231 if (m_locale.isEmpty()) {
232 throwTypeError(globalObject, scope, "failed to initialize Collator due to invalid locale"_s);
233 return;
234 }
235
236 const String& collation = result.get("co"_s);
237 m_collation = collation.isNull() ? "default"_s : collation;
238 m_numeric = result.get("kn"_s) == "true";
239
240 const String& caseFirst = result.get("kf"_s);
241 if (caseFirst == "lower")
242 m_caseFirst = CaseFirst::Lower;
243 else if (caseFirst == "upper")
244 m_caseFirst = CaseFirst::Upper;
245 else
246 m_caseFirst = CaseFirst::False;
247
248 String sensitivityString = intlStringOption(globalObject, options, vm.propertyNames->sensitivity, { "base", "accent", "case", "variant" }, "sensitivity must be either \"base\", \"accent\", \"case\", or \"variant\"", nullptr);
249 RETURN_IF_EXCEPTION(scope, void());
250 if (sensitivityString == "base")
251 m_sensitivity = Sensitivity::Base;
252 else if (sensitivityString == "accent")
253 m_sensitivity = Sensitivity::Accent;
254 else if (sensitivityString == "case")
255 m_sensitivity = Sensitivity::Case;
256 else
257 m_sensitivity = Sensitivity::Variant;
258
259 bool usesFallback;
260 bool ignorePunctuation = intlBooleanOption(globalObject, options, vm.propertyNames->ignorePunctuation, usesFallback);
261 if (usesFallback)
262 ignorePunctuation = false;
263 RETURN_IF_EXCEPTION(scope, void());
264 m_ignorePunctuation = ignorePunctuation;
265
266 m_initializedCollator = true;
267}
268
269void IntlCollator::createCollator(JSGlobalObject* globalObject)
270{
271 VM& vm = globalObject->vm();
272 auto scope = DECLARE_CATCH_SCOPE(vm);
273 ASSERT(!m_collator);
274
275 if (!m_initializedCollator) {
276 initializeCollator(globalObject, jsUndefined(), jsUndefined());
277 scope.assertNoException();
278 }
279
280 UErrorCode status = U_ZERO_ERROR;
281 auto collator = std::unique_ptr<UCollator, UCollatorDeleter>(ucol_open(m_locale.utf8().data(), &status));
282 if (U_FAILURE(status))
283 return;
284
285 UColAttributeValue strength = UCOL_PRIMARY;
286 UColAttributeValue caseLevel = UCOL_OFF;
287 UColAttributeValue caseFirst = UCOL_OFF;
288 switch (m_sensitivity) {
289 case Sensitivity::Base:
290 break;
291 case Sensitivity::Accent:
292 strength = UCOL_SECONDARY;
293 break;
294 case Sensitivity::Case:
295 caseLevel = UCOL_ON;
296 break;
297 case Sensitivity::Variant:
298 strength = UCOL_TERTIARY;
299 break;
300 }
301 switch (m_caseFirst) {
302 case CaseFirst::False:
303 break;
304 case CaseFirst::Lower:
305 caseFirst = UCOL_LOWER_FIRST;
306 break;
307 case CaseFirst::Upper:
308 caseFirst = UCOL_UPPER_FIRST;
309 break;
310 }
311
312 ucol_setAttribute(collator.get(), UCOL_STRENGTH, strength, &status);
313 ucol_setAttribute(collator.get(), UCOL_CASE_LEVEL, caseLevel, &status);
314 ucol_setAttribute(collator.get(), UCOL_CASE_FIRST, caseFirst, &status);
315 ucol_setAttribute(collator.get(), UCOL_NUMERIC_COLLATION, m_numeric ? UCOL_ON : UCOL_OFF, &status);
316
317 // FIXME: Setting UCOL_ALTERNATE_HANDLING to UCOL_SHIFTED causes punctuation and whitespace to be
318 // ignored. There is currently no way to ignore only punctuation.
319 ucol_setAttribute(collator.get(), UCOL_ALTERNATE_HANDLING, m_ignorePunctuation ? UCOL_SHIFTED : UCOL_DEFAULT, &status);
320
321 // "The method is required to return 0 when comparing Strings that are considered canonically
322 // equivalent by the Unicode standard."
323 ucol_setAttribute(collator.get(), UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
324 if (U_FAILURE(status))
325 return;
326
327 m_collator = WTFMove(collator);
328}
329
330JSValue IntlCollator::compareStrings(JSGlobalObject* globalObject, StringView x, StringView y)
331{
332 VM& vm = globalObject->vm();
333 auto scope = DECLARE_THROW_SCOPE(vm);
334
335 // 10.3.4 CompareStrings abstract operation (ECMA-402 2.0)
336 if (!m_collator) {
337 createCollator(globalObject);
338 if (!m_collator)
339 return throwException(globalObject, scope, createError(globalObject, "Failed to compare strings."_s));
340 }
341
342 UErrorCode status = U_ZERO_ERROR;
343 UCollationResult result = UCOL_EQUAL;
344 if (x.is8Bit() && y.is8Bit() && x.isAllASCII() && y.isAllASCII())
345 result = ucol_strcollUTF8(m_collator.get(), bitwise_cast<const char*>(x.characters8()), x.length(), bitwise_cast<const char*>(y.characters8()), y.length(), &status);
346 else {
347 auto getCharacters = [&] (const StringView& view, Vector<UChar>& buffer) -> const UChar* {
348 if (!view.is8Bit())
349 return view.characters16();
350 buffer.resize(view.length());
351 StringImpl::copyCharacters(buffer.data(), view.characters8(), view.length());
352 return buffer.data();
353 };
354
355 Vector<UChar> xBuffer;
356 Vector<UChar> yBuffer;
357 const UChar* xCharacters = getCharacters(x, xBuffer);
358 const UChar* yCharacters = getCharacters(y, yBuffer);
359 result = ucol_strcoll(m_collator.get(), xCharacters, x.length(), yCharacters, y.length());
360 }
361 if (U_FAILURE(status))
362 return throwException(globalObject, scope, createError(globalObject, "Failed to compare strings."_s));
363 return jsNumber(result);
364}
365
366ASCIILiteral IntlCollator::usageString(Usage usage)
367{
368 switch (usage) {
369 case Usage::Sort:
370 return "sort"_s;
371 case Usage::Search:
372 return "search"_s;
373 }
374 ASSERT_NOT_REACHED();
375 return ASCIILiteral::null();
376}
377
378ASCIILiteral IntlCollator::sensitivityString(Sensitivity sensitivity)
379{
380 switch (sensitivity) {
381 case Sensitivity::Base:
382 return "base"_s;
383 case Sensitivity::Accent:
384 return "accent"_s;
385 case Sensitivity::Case:
386 return "case"_s;
387 case Sensitivity::Variant:
388 return "variant"_s;
389 }
390 ASSERT_NOT_REACHED();
391 return ASCIILiteral::null();
392}
393
394ASCIILiteral IntlCollator::caseFirstString(CaseFirst caseFirst)
395{
396 switch (caseFirst) {
397 case CaseFirst::False:
398 return "false"_s;
399 case CaseFirst::Lower:
400 return "lower"_s;
401 case CaseFirst::Upper:
402 return "upper"_s;
403 }
404 ASSERT_NOT_REACHED();
405 return ASCIILiteral::null();
406}
407
408JSObject* IntlCollator::resolvedOptions(JSGlobalObject* globalObject)
409{
410 VM& vm = globalObject->vm();
411 auto scope = DECLARE_THROW_SCOPE(vm);
412
413 // 10.3.5 Intl.Collator.prototype.resolvedOptions() (ECMA-402 2.0)
414 // The function returns a new object whose properties and attributes are set as if
415 // constructed by an object literal assigning to each of the following properties the
416 // value of the corresponding internal slot of this Collator object (see 10.4): locale,
417 // usage, sensitivity, ignorePunctuation, collation, as well as those properties shown
418 // in Table 1 whose keys are included in the %Collator%[[relevantExtensionKeys]]
419 // internal slot of the standard built-in object that is the initial value of
420 // Intl.Collator.
421
422 if (!m_initializedCollator) {
423 initializeCollator(globalObject, jsUndefined(), jsUndefined());
424 scope.assertNoException();
425 }
426
427 JSObject* options = constructEmptyObject(globalObject);
428 options->putDirect(vm, vm.propertyNames->locale, jsString(vm, m_locale));
429 options->putDirect(vm, vm.propertyNames->usage, jsNontrivialString(vm, usageString(m_usage)));
430 options->putDirect(vm, vm.propertyNames->sensitivity, jsNontrivialString(vm, sensitivityString(m_sensitivity)));
431 options->putDirect(vm, vm.propertyNames->ignorePunctuation, jsBoolean(m_ignorePunctuation));
432 options->putDirect(vm, vm.propertyNames->collation, jsString(vm, m_collation));
433 options->putDirect(vm, vm.propertyNames->numeric, jsBoolean(m_numeric));
434 options->putDirect(vm, vm.propertyNames->caseFirst, jsNontrivialString(vm, caseFirstString(m_caseFirst)));
435 return options;
436}
437
438void IntlCollator::setBoundCompare(VM& vm, JSBoundFunction* format)
439{
440 m_boundCompare.set(vm, this, format);
441}
442
443} // namespace JSC
444
445#endif // ENABLE(INTL)
446