1/*
2 * Copyright (C) 2015 Andy VanWagoner ([email protected])
3 * Copyright (C) 2015 Sukolsak Sakshuwong ([email protected])
4 * Copyright (C) 2016-2017 Apple Inc. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
16 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
17 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
19 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
25 * THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include "config.h"
29#include "IntlCollator.h"
30
31#if ENABLE(INTL)
32
33#include "CatchScope.h"
34#include "Error.h"
35#include "IntlCollatorConstructor.h"
36#include "IntlObject.h"
37#include "JSBoundFunction.h"
38#include "JSCInlines.h"
39#include "ObjectConstructor.h"
40#include "SlotVisitorInlines.h"
41#include "StructureInlines.h"
42#include <unicode/ucol.h>
43#include <wtf/unicode/Collator.h>
44
45namespace JSC {
46
47const ClassInfo IntlCollator::s_info = { "Object", &Base::s_info, nullptr, nullptr, CREATE_METHOD_TABLE(IntlCollator) };
48
49static const char* const relevantCollatorExtensionKeys[3] = { "co", "kn", "kf" };
50static const size_t indexOfExtensionKeyCo = 0;
51static const size_t indexOfExtensionKeyKn = 1;
52static const size_t indexOfExtensionKeyKf = 2;
53
54void IntlCollator::UCollatorDeleter::operator()(UCollator* collator) const
55{
56 if (collator)
57 ucol_close(collator);
58}
59
60IntlCollator* IntlCollator::create(VM& vm, Structure* structure)
61{
62 IntlCollator* format = new (NotNull, allocateCell<IntlCollator>(vm.heap)) IntlCollator(vm, structure);
63 format->finishCreation(vm);
64 return format;
65}
66
67Structure* IntlCollator::createStructure(VM& vm, JSGlobalObject* globalObject, JSValue prototype)
68{
69 return Structure::create(vm, globalObject, prototype, TypeInfo(ObjectType, StructureFlags), info());
70}
71
72IntlCollator::IntlCollator(VM& vm, Structure* structure)
73 : JSDestructibleObject(vm, structure)
74{
75}
76
77void IntlCollator::finishCreation(VM& vm)
78{
79 Base::finishCreation(vm);
80 ASSERT(inherits(vm, info()));
81}
82
83void IntlCollator::destroy(JSCell* cell)
84{
85 static_cast<IntlCollator*>(cell)->IntlCollator::~IntlCollator();
86}
87
88void IntlCollator::visitChildren(JSCell* cell, SlotVisitor& visitor)
89{
90 IntlCollator* thisObject = jsCast<IntlCollator*>(cell);
91 ASSERT_GC_OBJECT_INHERITS(thisObject, info());
92
93 Base::visitChildren(thisObject, visitor);
94
95 visitor.append(thisObject->m_boundCompare);
96}
97
98static Vector<String> sortLocaleData(const String& locale, size_t keyIndex)
99{
100 // 9.1 Internal slots of Service Constructors & 10.2.3 Internal slots (ECMA-402 2.0)
101 Vector<String> keyLocaleData;
102 switch (keyIndex) {
103 case indexOfExtensionKeyCo: {
104 // 10.2.3 "The first element of [[sortLocaleData]][locale].co and [[searchLocaleData]][locale].co must be null for all locale values."
105 keyLocaleData.append({ });
106
107 UErrorCode status = U_ZERO_ERROR;
108 UEnumeration* enumeration = ucol_getKeywordValuesForLocale("collation", locale.utf8().data(), false, &status);
109 if (U_SUCCESS(status)) {
110 const char* collation;
111 while ((collation = uenum_next(enumeration, nullptr, &status)) && U_SUCCESS(status)) {
112 // 10.2.3 "The values "standard" and "search" must not be used as elements in any [[sortLocaleData]][locale].co and [[searchLocaleData]][locale].co array."
113 if (!strcmp(collation, "standard") || !strcmp(collation, "search"))
114 continue;
115
116 // Map keyword values to BCP 47 equivalents.
117 if (!strcmp(collation, "dictionary"))
118 collation = "dict";
119 else if (!strcmp(collation, "gb2312han"))
120 collation = "gb2312";
121 else if (!strcmp(collation, "phonebook"))
122 collation = "phonebk";
123 else if (!strcmp(collation, "traditional"))
124 collation = "trad";
125
126 keyLocaleData.append(collation);
127 }
128 uenum_close(enumeration);
129 }
130 break;
131 }
132 case indexOfExtensionKeyKn:
133 keyLocaleData.reserveInitialCapacity(2);
134 keyLocaleData.uncheckedAppend("false"_s);
135 keyLocaleData.uncheckedAppend("true"_s);
136 break;
137 case indexOfExtensionKeyKf:
138 keyLocaleData.reserveInitialCapacity(3);
139 keyLocaleData.uncheckedAppend("false"_s);
140 keyLocaleData.uncheckedAppend("lower"_s);
141 keyLocaleData.uncheckedAppend("upper"_s);
142 break;
143 default:
144 ASSERT_NOT_REACHED();
145 }
146 return keyLocaleData;
147}
148
149static Vector<String> searchLocaleData(const String&, size_t keyIndex)
150{
151 // 9.1 Internal slots of Service Constructors & 10.2.3 Internal slots (ECMA-402 2.0)
152 Vector<String> keyLocaleData;
153 switch (keyIndex) {
154 case indexOfExtensionKeyCo:
155 // 10.2.3 "The first element of [[sortLocaleData]][locale].co and [[searchLocaleData]][locale].co must be null for all locale values."
156 keyLocaleData.reserveInitialCapacity(1);
157 keyLocaleData.append({ });
158 break;
159 case indexOfExtensionKeyKn:
160 keyLocaleData.reserveInitialCapacity(2);
161 keyLocaleData.uncheckedAppend("false"_s);
162 keyLocaleData.uncheckedAppend("true"_s);
163 break;
164 case indexOfExtensionKeyKf:
165 keyLocaleData.reserveInitialCapacity(3);
166 keyLocaleData.uncheckedAppend("false"_s);
167 keyLocaleData.uncheckedAppend("lower"_s);
168 keyLocaleData.uncheckedAppend("upper"_s);
169 break;
170 default:
171 ASSERT_NOT_REACHED();
172 }
173 return keyLocaleData;
174}
175
176void IntlCollator::initializeCollator(ExecState& state, JSValue locales, JSValue optionsValue)
177{
178 VM& vm = state.vm();
179 auto scope = DECLARE_THROW_SCOPE(vm);
180
181 // 10.1.1 InitializeCollator (collator, locales, options) (ECMA-402)
182 // https://tc39.github.io/ecma402/#sec-initializecollator
183
184 auto requestedLocales = canonicalizeLocaleList(state, locales);
185 RETURN_IF_EXCEPTION(scope, void());
186
187 JSObject* options;
188 if (optionsValue.isUndefined())
189 options = constructEmptyObject(&state, state.lexicalGlobalObject()->nullPrototypeObjectStructure());
190 else {
191 options = optionsValue.toObject(&state);
192 RETURN_IF_EXCEPTION(scope, void());
193 }
194
195 String usageString = intlStringOption(state, options, vm.propertyNames->usage, { "sort", "search" }, "usage must be either \"sort\" or \"search\"", "sort");
196 RETURN_IF_EXCEPTION(scope, void());
197 if (usageString == "sort")
198 m_usage = Usage::Sort;
199 else if (usageString == "search")
200 m_usage = Usage::Search;
201 else
202 ASSERT_NOT_REACHED();
203
204 auto localeData = (m_usage == Usage::Sort) ? sortLocaleData : searchLocaleData;
205
206 HashMap<String, String> opt;
207
208 String matcher = intlStringOption(state, options, vm.propertyNames->localeMatcher, { "lookup", "best fit" }, "localeMatcher must be either \"lookup\" or \"best fit\"", "best fit");
209 RETURN_IF_EXCEPTION(scope, void());
210 opt.add("localeMatcher"_s, matcher);
211
212 {
213 String numericString;
214 bool usesFallback;
215 bool numeric = intlBooleanOption(state, options, vm.propertyNames->numeric, usesFallback);
216 RETURN_IF_EXCEPTION(scope, void());
217 if (!usesFallback)
218 numericString = numeric ? "true"_s : "false"_s;
219 if (!numericString.isNull())
220 opt.add("kn"_s, numericString);
221 }
222 {
223 String caseFirst = intlStringOption(state, options, vm.propertyNames->caseFirst, { "upper", "lower", "false" }, "caseFirst must be either \"upper\", \"lower\", or \"false\"", nullptr);
224 RETURN_IF_EXCEPTION(scope, void());
225 if (!caseFirst.isNull())
226 opt.add("kf"_s, caseFirst);
227 }
228
229 auto& availableLocales = state.jsCallee()->globalObject(vm)->intlCollatorAvailableLocales();
230 auto result = resolveLocale(state, availableLocales, requestedLocales, opt, relevantCollatorExtensionKeys, WTF_ARRAY_LENGTH(relevantCollatorExtensionKeys), localeData);
231
232 m_locale = result.get("locale"_s);
233 if (m_locale.isEmpty()) {
234 throwTypeError(&state, scope, "failed to initialize Collator due to invalid locale"_s);
235 return;
236 }
237
238 const String& collation = result.get("co"_s);
239 m_collation = collation.isNull() ? "default"_s : collation;
240 m_numeric = result.get("kn"_s) == "true";
241
242 const String& caseFirst = result.get("kf"_s);
243 if (caseFirst == "lower")
244 m_caseFirst = CaseFirst::Lower;
245 else if (caseFirst == "upper")
246 m_caseFirst = CaseFirst::Upper;
247 else
248 m_caseFirst = CaseFirst::False;
249
250 String sensitivityString = intlStringOption(state, options, vm.propertyNames->sensitivity, { "base", "accent", "case", "variant" }, "sensitivity must be either \"base\", \"accent\", \"case\", or \"variant\"", nullptr);
251 RETURN_IF_EXCEPTION(scope, void());
252 if (sensitivityString == "base")
253 m_sensitivity = Sensitivity::Base;
254 else if (sensitivityString == "accent")
255 m_sensitivity = Sensitivity::Accent;
256 else if (sensitivityString == "case")
257 m_sensitivity = Sensitivity::Case;
258 else
259 m_sensitivity = Sensitivity::Variant;
260
261 bool usesFallback;
262 bool ignorePunctuation = intlBooleanOption(state, options, vm.propertyNames->ignorePunctuation, usesFallback);
263 if (usesFallback)
264 ignorePunctuation = false;
265 RETURN_IF_EXCEPTION(scope, void());
266 m_ignorePunctuation = ignorePunctuation;
267
268 m_initializedCollator = true;
269}
270
271void IntlCollator::createCollator(ExecState& state)
272{
273 VM& vm = state.vm();
274 auto scope = DECLARE_CATCH_SCOPE(vm);
275 ASSERT(!m_collator);
276
277 if (!m_initializedCollator) {
278 initializeCollator(state, jsUndefined(), jsUndefined());
279 scope.assertNoException();
280 }
281
282 UErrorCode status = U_ZERO_ERROR;
283 auto collator = std::unique_ptr<UCollator, UCollatorDeleter>(ucol_open(m_locale.utf8().data(), &status));
284 if (U_FAILURE(status))
285 return;
286
287 UColAttributeValue strength = UCOL_PRIMARY;
288 UColAttributeValue caseLevel = UCOL_OFF;
289 UColAttributeValue caseFirst = UCOL_OFF;
290 switch (m_sensitivity) {
291 case Sensitivity::Base:
292 break;
293 case Sensitivity::Accent:
294 strength = UCOL_SECONDARY;
295 break;
296 case Sensitivity::Case:
297 caseLevel = UCOL_ON;
298 break;
299 case Sensitivity::Variant:
300 strength = UCOL_TERTIARY;
301 break;
302 }
303 switch (m_caseFirst) {
304 case CaseFirst::False:
305 break;
306 case CaseFirst::Lower:
307 caseFirst = UCOL_LOWER_FIRST;
308 break;
309 case CaseFirst::Upper:
310 caseFirst = UCOL_UPPER_FIRST;
311 break;
312 }
313
314 ucol_setAttribute(collator.get(), UCOL_STRENGTH, strength, &status);
315 ucol_setAttribute(collator.get(), UCOL_CASE_LEVEL, caseLevel, &status);
316 ucol_setAttribute(collator.get(), UCOL_CASE_FIRST, caseFirst, &status);
317 ucol_setAttribute(collator.get(), UCOL_NUMERIC_COLLATION, m_numeric ? UCOL_ON : UCOL_OFF, &status);
318
319 // FIXME: Setting UCOL_ALTERNATE_HANDLING to UCOL_SHIFTED causes punctuation and whitespace to be
320 // ignored. There is currently no way to ignore only punctuation.
321 ucol_setAttribute(collator.get(), UCOL_ALTERNATE_HANDLING, m_ignorePunctuation ? UCOL_SHIFTED : UCOL_DEFAULT, &status);
322
323 // "The method is required to return 0 when comparing Strings that are considered canonically
324 // equivalent by the Unicode standard."
325 ucol_setAttribute(collator.get(), UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
326 if (U_FAILURE(status))
327 return;
328
329 m_collator = WTFMove(collator);
330}
331
332JSValue IntlCollator::compareStrings(ExecState& state, StringView x, StringView y)
333{
334 VM& vm = state.vm();
335 auto scope = DECLARE_THROW_SCOPE(vm);
336
337 // 10.3.4 CompareStrings abstract operation (ECMA-402 2.0)
338 if (!m_collator) {
339 createCollator(state);
340 if (!m_collator)
341 return throwException(&state, scope, createError(&state, "Failed to compare strings."_s));
342 }
343
344 UErrorCode status = U_ZERO_ERROR;
345 UCharIterator iteratorX = createIterator(x);
346 UCharIterator iteratorY = createIterator(y);
347 auto result = ucol_strcollIter(m_collator.get(), &iteratorX, &iteratorY, &status);
348 if (U_FAILURE(status))
349 return throwException(&state, scope, createError(&state, "Failed to compare strings."_s));
350 return jsNumber(result);
351}
352
353ASCIILiteral IntlCollator::usageString(Usage usage)
354{
355 switch (usage) {
356 case Usage::Sort:
357 return "sort"_s;
358 case Usage::Search:
359 return "search"_s;
360 }
361 ASSERT_NOT_REACHED();
362 return ASCIILiteral::null();
363}
364
365ASCIILiteral IntlCollator::sensitivityString(Sensitivity sensitivity)
366{
367 switch (sensitivity) {
368 case Sensitivity::Base:
369 return "base"_s;
370 case Sensitivity::Accent:
371 return "accent"_s;
372 case Sensitivity::Case:
373 return "case"_s;
374 case Sensitivity::Variant:
375 return "variant"_s;
376 }
377 ASSERT_NOT_REACHED();
378 return ASCIILiteral::null();
379}
380
381ASCIILiteral IntlCollator::caseFirstString(CaseFirst caseFirst)
382{
383 switch (caseFirst) {
384 case CaseFirst::False:
385 return "false"_s;
386 case CaseFirst::Lower:
387 return "lower"_s;
388 case CaseFirst::Upper:
389 return "upper"_s;
390 }
391 ASSERT_NOT_REACHED();
392 return ASCIILiteral::null();
393}
394
395JSObject* IntlCollator::resolvedOptions(ExecState& state)
396{
397 VM& vm = state.vm();
398 auto scope = DECLARE_THROW_SCOPE(vm);
399
400 // 10.3.5 Intl.Collator.prototype.resolvedOptions() (ECMA-402 2.0)
401 // The function returns a new object whose properties and attributes are set as if
402 // constructed by an object literal assigning to each of the following properties the
403 // value of the corresponding internal slot of this Collator object (see 10.4): locale,
404 // usage, sensitivity, ignorePunctuation, collation, as well as those properties shown
405 // in Table 1 whose keys are included in the %Collator%[[relevantExtensionKeys]]
406 // internal slot of the standard built-in object that is the initial value of
407 // Intl.Collator.
408
409 if (!m_initializedCollator) {
410 initializeCollator(state, jsUndefined(), jsUndefined());
411 scope.assertNoException();
412 }
413
414 JSObject* options = constructEmptyObject(&state);
415 options->putDirect(vm, vm.propertyNames->locale, jsString(&state, m_locale));
416 options->putDirect(vm, vm.propertyNames->usage, jsNontrivialString(&state, usageString(m_usage)));
417 options->putDirect(vm, vm.propertyNames->sensitivity, jsNontrivialString(&state, sensitivityString(m_sensitivity)));
418 options->putDirect(vm, vm.propertyNames->ignorePunctuation, jsBoolean(m_ignorePunctuation));
419 options->putDirect(vm, vm.propertyNames->collation, jsString(&state, m_collation));
420 options->putDirect(vm, vm.propertyNames->numeric, jsBoolean(m_numeric));
421 options->putDirect(vm, vm.propertyNames->caseFirst, jsNontrivialString(&state, caseFirstString(m_caseFirst)));
422 return options;
423}
424
425void IntlCollator::setBoundCompare(VM& vm, JSBoundFunction* format)
426{
427 m_boundCompare.set(vm, this, format);
428}
429
430} // namespace JSC
431
432#endif // ENABLE(INTL)
433