1/*
2 * Copyright (C) 2015 Andy VanWagoner ([email protected])
3 * Copyright (C) 2015 Sukolsak Sakshuwong ([email protected])
4 * Copyright (C) 2016-2019 Apple Inc. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
16 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
17 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
19 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
25 * THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include "config.h"
29#include "IntlObject.h"
30
31#if ENABLE(INTL)
32
33#include "Error.h"
34#include "FunctionPrototype.h"
35#include "IntlCanonicalizeLanguage.h"
36#include "IntlCollatorConstructor.h"
37#include "IntlCollatorPrototype.h"
38#include "IntlDateTimeFormatConstructor.h"
39#include "IntlDateTimeFormatPrototype.h"
40#include "IntlNumberFormatConstructor.h"
41#include "IntlNumberFormatPrototype.h"
42#include "IntlPluralRulesConstructor.h"
43#include "IntlPluralRulesPrototype.h"
44#include "JSCInlines.h"
45#include "JSCJSValueInlines.h"
46#include "Lookup.h"
47#include "ObjectPrototype.h"
48#include "Options.h"
49#include <unicode/uloc.h>
50#include <unicode/unumsys.h>
51#include <wtf/Assertions.h>
52#include <wtf/Language.h>
53#include <wtf/NeverDestroyed.h>
54#include <wtf/text/StringBuilder.h>
55
56namespace JSC {
57
58STATIC_ASSERT_IS_TRIVIALLY_DESTRUCTIBLE(IntlObject);
59
60static EncodedJSValue JSC_HOST_CALL intlObjectFuncGetCanonicalLocales(JSGlobalObject*, CallFrame*);
61
62static JSValue createCollatorConstructor(VM& vm, JSObject* object)
63{
64 IntlObject* intlObject = jsCast<IntlObject*>(object);
65 JSGlobalObject* globalObject = intlObject->globalObject(vm);
66 return IntlCollatorConstructor::create(vm, IntlCollatorConstructor::createStructure(vm, globalObject, globalObject->functionPrototype()), jsCast<IntlCollatorPrototype*>(globalObject->collatorStructure()->storedPrototypeObject()));
67}
68
69static JSValue createNumberFormatConstructor(VM& vm, JSObject* object)
70{
71 IntlObject* intlObject = jsCast<IntlObject*>(object);
72 JSGlobalObject* globalObject = intlObject->globalObject(vm);
73 return IntlNumberFormatConstructor::create(vm, IntlNumberFormatConstructor::createStructure(vm, globalObject, globalObject->functionPrototype()), jsCast<IntlNumberFormatPrototype*>(globalObject->numberFormatStructure()->storedPrototypeObject()));
74}
75
76static JSValue createDateTimeFormatConstructor(VM& vm, JSObject* object)
77{
78 IntlObject* intlObject = jsCast<IntlObject*>(object);
79 JSGlobalObject* globalObject = intlObject->globalObject(vm);
80 return IntlDateTimeFormatConstructor::create(vm, IntlDateTimeFormatConstructor::createStructure(vm, globalObject, globalObject->functionPrototype()), jsCast<IntlDateTimeFormatPrototype*>(globalObject->dateTimeFormatStructure()->storedPrototypeObject()));
81}
82
83static JSValue createPluralRulesConstructor(VM& vm, JSObject* object)
84{
85 IntlObject* intlObject = jsCast<IntlObject*>(object);
86 JSGlobalObject* globalObject = intlObject->globalObject(vm);
87 return IntlPluralRulesConstructor::create(vm, IntlPluralRulesConstructor::createStructure(vm, globalObject, globalObject->functionPrototype()), jsCast<IntlPluralRulesPrototype*>(globalObject->pluralRulesStructure()->storedPrototypeObject()));
88}
89
90}
91
92#include "IntlObject.lut.h"
93
94namespace JSC {
95
96/* Source for IntlObject.lut.h
97@begin intlObjectTable
98 getCanonicalLocales intlObjectFuncGetCanonicalLocales DontEnum|Function 1
99 Collator createCollatorConstructor DontEnum|PropertyCallback
100 DateTimeFormat createDateTimeFormatConstructor DontEnum|PropertyCallback
101 NumberFormat createNumberFormatConstructor DontEnum|PropertyCallback
102 PluralRules createPluralRulesConstructor DontEnum|PropertyCallback
103@end
104*/
105
106struct MatcherResult {
107 String locale;
108 String extension;
109 size_t extensionIndex { 0 };
110};
111
112const ClassInfo IntlObject::s_info = { "Object", &Base::s_info, &intlObjectTable, nullptr, CREATE_METHOD_TABLE(IntlObject) };
113
114IntlObject::IntlObject(VM& vm, Structure* structure)
115 : JSNonFinalObject(vm, structure)
116{
117}
118
119IntlObject* IntlObject::create(VM& vm, Structure* structure)
120{
121 IntlObject* object = new (NotNull, allocateCell<IntlObject>(vm.heap)) IntlObject(vm, structure);
122 object->finishCreation(vm);
123 return object;
124}
125
126Structure* IntlObject::createStructure(VM& vm, JSGlobalObject* globalObject, JSValue prototype)
127{
128 return Structure::create(vm, globalObject, prototype, TypeInfo(ObjectType, StructureFlags), info());
129}
130
131String convertICULocaleToBCP47LanguageTag(const char* localeID)
132{
133 UErrorCode status = U_ZERO_ERROR;
134 Vector<char, 32> buffer(32);
135 auto length = uloc_toLanguageTag(localeID, buffer.data(), buffer.size(), false, &status);
136 if (status == U_BUFFER_OVERFLOW_ERROR) {
137 buffer.grow(length);
138 status = U_ZERO_ERROR;
139 uloc_toLanguageTag(localeID, buffer.data(), buffer.size(), false, &status);
140 }
141 if (!U_FAILURE(status))
142 return String(buffer.data(), length);
143 return String();
144}
145
146bool intlBooleanOption(JSGlobalObject* globalObject, JSValue options, PropertyName property, bool& usesFallback)
147{
148 // GetOption (options, property, type="boolean", values, fallback)
149 // https://tc39.github.io/ecma402/#sec-getoption
150
151 VM& vm = globalObject->vm();
152 auto scope = DECLARE_THROW_SCOPE(vm);
153
154 if (options.isUndefined()) {
155 usesFallback = true;
156 return false;
157 }
158
159 JSObject* opts = options.toObject(globalObject);
160 RETURN_IF_EXCEPTION(scope, false);
161
162 JSValue value = opts->get(globalObject, property);
163 RETURN_IF_EXCEPTION(scope, false);
164
165 if (!value.isUndefined()) {
166 bool booleanValue = value.toBoolean(globalObject);
167 usesFallback = false;
168 return booleanValue;
169 }
170
171 // Because fallback can be undefined, we let the caller handle it instead.
172 usesFallback = true;
173 return false;
174}
175
176String intlStringOption(JSGlobalObject* globalObject, JSValue options, PropertyName property, std::initializer_list<const char*> values, const char* notFound, const char* fallback)
177{
178 // GetOption (options, property, type="string", values, fallback)
179 // https://tc39.github.io/ecma402/#sec-getoption
180
181 VM& vm = globalObject->vm();
182 auto scope = DECLARE_THROW_SCOPE(vm);
183
184 if (options.isUndefined())
185 return fallback;
186
187 JSObject* opts = options.toObject(globalObject);
188 RETURN_IF_EXCEPTION(scope, String());
189
190 JSValue value = opts->get(globalObject, property);
191 RETURN_IF_EXCEPTION(scope, String());
192
193 if (!value.isUndefined()) {
194 String stringValue = value.toWTFString(globalObject);
195 RETURN_IF_EXCEPTION(scope, String());
196
197 if (values.size() && std::find(values.begin(), values.end(), stringValue) == values.end()) {
198 throwException(globalObject, scope, createRangeError(globalObject, notFound));
199 return { };
200 }
201 return stringValue;
202 }
203
204 return fallback;
205}
206
207unsigned intlNumberOption(JSGlobalObject* globalObject, JSValue options, PropertyName property, unsigned minimum, unsigned maximum, unsigned fallback)
208{
209 // GetNumberOption (options, property, minimum, maximum, fallback)
210 // https://tc39.github.io/ecma402/#sec-getnumberoption
211
212 VM& vm = globalObject->vm();
213 auto scope = DECLARE_THROW_SCOPE(vm);
214
215 if (options.isUndefined())
216 return fallback;
217
218 JSObject* opts = options.toObject(globalObject);
219 RETURN_IF_EXCEPTION(scope, 0);
220
221 JSValue value = opts->get(globalObject, property);
222 RETURN_IF_EXCEPTION(scope, 0);
223
224 RELEASE_AND_RETURN(scope, intlDefaultNumberOption(globalObject, value, property, minimum, maximum, fallback));
225}
226
227unsigned intlDefaultNumberOption(JSGlobalObject* globalObject, JSValue value, PropertyName property, unsigned minimum, unsigned maximum, unsigned fallback)
228{
229 // DefaultNumberOption (value, minimum, maximum, fallback)
230 // https://tc39.github.io/ecma402/#sec-defaultnumberoption
231
232 VM& vm = globalObject->vm();
233 auto scope = DECLARE_THROW_SCOPE(vm);
234
235 if (!value.isUndefined()) {
236 double doubleValue = value.toNumber(globalObject);
237 RETURN_IF_EXCEPTION(scope, 0);
238
239 if (!(doubleValue >= minimum && doubleValue <= maximum)) {
240 throwException(globalObject, scope, createRangeError(globalObject, *property.publicName() + " is out of range"));
241 return 0;
242 }
243 return static_cast<unsigned>(doubleValue);
244 }
245 return fallback;
246}
247
248static String privateUseLangTag(const Vector<String>& parts, size_t startIndex)
249{
250 size_t numParts = parts.size();
251 size_t currentIndex = startIndex;
252
253 // Check for privateuse.
254 // privateuse = "x" 1*("-" (1*8alphanum))
255 StringBuilder privateuse;
256 while (currentIndex < numParts) {
257 const String& singleton = parts[currentIndex];
258 unsigned singletonLength = singleton.length();
259 bool isValid = (singletonLength == 1 && (singleton == "x" || singleton == "X"));
260 if (!isValid)
261 break;
262
263 if (currentIndex != startIndex)
264 privateuse.append('-');
265
266 ++currentIndex;
267 unsigned numExtParts = 0;
268 privateuse.append('x');
269 while (currentIndex < numParts) {
270 const String& extPart = parts[currentIndex];
271 unsigned extPartLength = extPart.length();
272
273 bool isValid = (extPartLength >= 1 && extPartLength <= 8 && extPart.isAllSpecialCharacters<isASCIIAlphanumeric>());
274 if (!isValid)
275 break;
276
277 ++currentIndex;
278 ++numExtParts;
279 privateuse.append('-');
280 privateuse.append(extPart.convertToASCIILowercase());
281 }
282
283 // Requires at least one production.
284 if (!numExtParts)
285 return String();
286 }
287
288 // Leftovers makes it invalid.
289 if (currentIndex < numParts)
290 return String();
291
292 return privateuse.toString();
293}
294
295static String preferredLanguage(const String& language)
296{
297 auto preferred = intlPreferredLanguageTag(language);
298 if (!preferred.isNull())
299 return preferred;
300 return language;
301}
302
303static String preferredRegion(const String& region)
304{
305 auto preferred = intlPreferredRegionTag(region);
306 if (!preferred.isNull())
307 return preferred;
308 return region;
309
310}
311
312static String canonicalLangTag(const Vector<String>& parts)
313{
314 ASSERT(!parts.isEmpty());
315
316 // Follows the grammar at https://www.rfc-editor.org/rfc/bcp/bcp47.txt
317 // langtag = language ["-" script] ["-" region] *("-" variant) *("-" extension) ["-" privateuse]
318
319 size_t numParts = parts.size();
320 // Check for language.
321 // language = 2*3ALPHA ["-" extlang] / 4ALPHA / 5*8ALPHA
322 size_t currentIndex = 0;
323 const String& language = parts[currentIndex];
324 unsigned languageLength = language.length();
325 bool canHaveExtlang = languageLength >= 2 && languageLength <= 3;
326 bool isValidLanguage = languageLength >= 2 && languageLength <= 8 && language.isAllSpecialCharacters<isASCIIAlpha>();
327 if (!isValidLanguage)
328 return String();
329
330 ++currentIndex;
331 StringBuilder canonical;
332
333 const String langtag = preferredLanguage(language.convertToASCIILowercase());
334 canonical.append(langtag);
335
336 // Check for extlang.
337 // extlang = 3ALPHA *2("-" 3ALPHA)
338 if (canHaveExtlang) {
339 for (unsigned times = 0; times < 3 && currentIndex < numParts; ++times) {
340 const String& extlang = parts[currentIndex];
341 unsigned extlangLength = extlang.length();
342 if (extlangLength == 3 && extlang.isAllSpecialCharacters<isASCIIAlpha>()) {
343 ++currentIndex;
344 auto extlangLower = extlang.convertToASCIILowercase();
345 if (!times && intlPreferredExtlangTag(extlangLower) == langtag) {
346 canonical.clear();
347 canonical.append(extlangLower);
348 continue;
349 }
350 canonical.append('-');
351 canonical.append(extlangLower);
352 } else
353 break;
354 }
355 }
356
357 // Check for script.
358 // script = 4ALPHA
359 if (currentIndex < numParts) {
360 const String& script = parts[currentIndex];
361 unsigned scriptLength = script.length();
362 if (scriptLength == 4 && script.isAllSpecialCharacters<isASCIIAlpha>()) {
363 ++currentIndex;
364 canonical.append('-');
365 canonical.append(toASCIIUpper(script[0]));
366 canonical.append(script.substring(1, 3).convertToASCIILowercase());
367 }
368 }
369
370 // Check for region.
371 // region = 2ALPHA / 3DIGIT
372 if (currentIndex < numParts) {
373 const String& region = parts[currentIndex];
374 unsigned regionLength = region.length();
375 bool isValidRegion = (
376 (regionLength == 2 && region.isAllSpecialCharacters<isASCIIAlpha>())
377 || (regionLength == 3 && region.isAllSpecialCharacters<isASCIIDigit>())
378 );
379 if (isValidRegion) {
380 ++currentIndex;
381 canonical.append('-');
382 canonical.append(preferredRegion(region.convertToASCIIUppercase()));
383 }
384 }
385
386 // Check for variant.
387 // variant = 5*8alphanum / (DIGIT 3alphanum)
388 HashSet<String> subtags;
389 while (currentIndex < numParts) {
390 const String& variant = parts[currentIndex];
391 unsigned variantLength = variant.length();
392 bool isValidVariant = (
393 (variantLength >= 5 && variantLength <= 8 && variant.isAllSpecialCharacters<isASCIIAlphanumeric>())
394 || (variantLength == 4 && isASCIIDigit(variant[0]) && variant.substring(1, 3).isAllSpecialCharacters<isASCIIAlphanumeric>())
395 );
396 if (!isValidVariant)
397 break;
398
399 // Cannot include duplicate subtags (case insensitive).
400 String lowerVariant = variant.convertToASCIILowercase();
401 if (!subtags.add(lowerVariant).isNewEntry)
402 return String();
403
404 ++currentIndex;
405
406 // Reordering variant subtags is not required in the spec.
407 canonical.append('-');
408 canonical.append(lowerVariant);
409 }
410
411 // Check for extension.
412 // extension = singleton 1*("-" (2*8alphanum))
413 // singleton = alphanum except x or X
414 subtags.clear();
415 Vector<String> extensions;
416 while (currentIndex < numParts) {
417 const String& possibleSingleton = parts[currentIndex];
418 unsigned singletonLength = possibleSingleton.length();
419 bool isValidSingleton = (singletonLength == 1 && possibleSingleton != "x" && possibleSingleton != "X" && isASCIIAlphanumeric(possibleSingleton[0]));
420 if (!isValidSingleton)
421 break;
422
423 // Cannot include duplicate singleton (case insensitive).
424 String singleton = possibleSingleton.convertToASCIILowercase();
425 if (!subtags.add(singleton).isNewEntry)
426 return String();
427
428 ++currentIndex;
429 int numExtParts = 0;
430 StringBuilder extension;
431 extension.append(singleton);
432 while (currentIndex < numParts) {
433 const String& extPart = parts[currentIndex];
434 unsigned extPartLength = extPart.length();
435
436 bool isValid = (extPartLength >= 2 && extPartLength <= 8 && extPart.isAllSpecialCharacters<isASCIIAlphanumeric>());
437 if (!isValid)
438 break;
439
440 ++currentIndex;
441 ++numExtParts;
442 extension.append('-');
443 extension.append(extPart.convertToASCIILowercase());
444 }
445
446 // Requires at least one production.
447 if (!numExtParts)
448 return String();
449
450 extensions.append(extension.toString());
451 }
452
453 // Add extensions to canonical sorted by singleton.
454 std::sort(
455 extensions.begin(),
456 extensions.end(),
457 [] (const String& a, const String& b) -> bool {
458 return a[0] < b[0];
459 }
460 );
461 size_t numExtenstions = extensions.size();
462 for (size_t i = 0; i < numExtenstions; ++i) {
463 canonical.append('-');
464 canonical.append(extensions[i]);
465 }
466
467 // Check for privateuse.
468 if (currentIndex < numParts) {
469 String privateuse = privateUseLangTag(parts, currentIndex);
470 if (privateuse.isNull())
471 return String();
472 canonical.append('-');
473 canonical.append(privateuse);
474 }
475
476 const String tag = canonical.toString();
477 const String preferred = intlRedundantLanguageTag(tag);
478 if (!preferred.isNull())
479 return preferred;
480 return tag;
481}
482
483static String canonicalizeLanguageTag(const String& locale)
484{
485 // IsStructurallyValidLanguageTag (locale)
486 // CanonicalizeLanguageTag (locale)
487 // These are done one after another in CanonicalizeLocaleList, so they are combined here to reduce duplication.
488 // https://www.rfc-editor.org/rfc/bcp/bcp47.txt
489
490 // Language-Tag = langtag / privateuse / grandfathered
491 String grandfather = intlGrandfatheredLanguageTag(locale.convertToASCIILowercase());
492 if (!grandfather.isNull())
493 return grandfather;
494
495 Vector<String> parts = locale.splitAllowingEmptyEntries('-');
496 if (!parts.isEmpty()) {
497 String langtag = canonicalLangTag(parts);
498 if (!langtag.isNull())
499 return langtag;
500
501 String privateuse = privateUseLangTag(parts, 0);
502 if (!privateuse.isNull())
503 return privateuse;
504 }
505
506 return String();
507}
508
509Vector<String> canonicalizeLocaleList(JSGlobalObject* globalObject, JSValue locales)
510{
511 // CanonicalizeLocaleList (locales)
512 // https://tc39.github.io/ecma402/#sec-canonicalizelocalelist
513
514 VM& vm = globalObject->vm();
515 auto scope = DECLARE_THROW_SCOPE(vm);
516
517 Vector<String> seen;
518
519 if (locales.isUndefined())
520 return seen;
521
522 JSObject* localesObject;
523 if (locales.isString()) {
524 JSArray* localesArray = JSArray::tryCreate(vm, globalObject->arrayStructureForIndexingTypeDuringAllocation(ArrayWithContiguous));
525 if (!localesArray) {
526 throwOutOfMemoryError(globalObject, scope);
527 RETURN_IF_EXCEPTION(scope, Vector<String>());
528 }
529 localesArray->push(globalObject, locales);
530 RETURN_IF_EXCEPTION(scope, Vector<String>());
531
532 localesObject = localesArray;
533 } else {
534 localesObject = locales.toObject(globalObject);
535 RETURN_IF_EXCEPTION(scope, Vector<String>());
536 }
537
538 // 6. Let len be ToLength(Get(O, "length")).
539 JSValue lengthProperty = localesObject->get(globalObject, vm.propertyNames->length);
540 RETURN_IF_EXCEPTION(scope, Vector<String>());
541
542 double length = lengthProperty.toLength(globalObject);
543 RETURN_IF_EXCEPTION(scope, Vector<String>());
544
545 HashSet<String> seenSet;
546 for (double k = 0; k < length; ++k) {
547 bool kPresent = localesObject->hasProperty(globalObject, k);
548 RETURN_IF_EXCEPTION(scope, Vector<String>());
549
550 if (kPresent) {
551 JSValue kValue = localesObject->get(globalObject, k);
552 RETURN_IF_EXCEPTION(scope, Vector<String>());
553
554 if (!kValue.isString() && !kValue.isObject()) {
555 throwTypeError(globalObject, scope, "locale value must be a string or object"_s);
556 return Vector<String>();
557 }
558
559 JSString* tag = kValue.toString(globalObject);
560 RETURN_IF_EXCEPTION(scope, Vector<String>());
561
562 auto tagValue = tag->value(globalObject);
563 RETURN_IF_EXCEPTION(scope, Vector<String>());
564
565 String canonicalizedTag = canonicalizeLanguageTag(tagValue);
566 if (canonicalizedTag.isNull()) {
567 throwException(globalObject, scope, createRangeError(globalObject, "invalid language tag: " + tagValue));
568 return Vector<String>();
569 }
570
571 if (seenSet.add(canonicalizedTag).isNewEntry)
572 seen.append(canonicalizedTag);
573 }
574 }
575
576 return seen;
577}
578
579String bestAvailableLocale(const HashSet<String>& availableLocales, const String& locale)
580{
581 // BestAvailableLocale (availableLocales, locale)
582 // https://tc39.github.io/ecma402/#sec-bestavailablelocale
583
584 String candidate = locale;
585 while (!candidate.isEmpty()) {
586 if (availableLocales.contains(candidate))
587 return candidate;
588
589 size_t pos = candidate.reverseFind('-');
590 if (pos == notFound)
591 return String();
592
593 if (pos >= 2 && candidate[pos - 2] == '-')
594 pos -= 2;
595
596 candidate = candidate.substring(0, pos);
597 }
598
599 return String();
600}
601
602String defaultLocale(JSGlobalObject* globalObject)
603{
604 // DefaultLocale ()
605 // https://tc39.github.io/ecma402/#sec-defaultlocale
606
607 // WebCore's global objects will have their own ideas of how to determine the language. It may
608 // be determined by WebCore-specific logic like some WK settings. Usually this will return the
609 // same thing as userPreferredLanguages()[0].
610 if (auto defaultLanguage = globalObject->globalObjectMethodTable()->defaultLanguage) {
611 String locale = canonicalizeLanguageTag(defaultLanguage());
612 if (!locale.isEmpty())
613 return locale;
614 }
615
616 Vector<String> languages = userPreferredLanguages();
617 for (const auto& language : languages) {
618 String locale = canonicalizeLanguageTag(language);
619 if (!locale.isEmpty())
620 return locale;
621 }
622
623 // If all else fails, ask ICU. It will probably say something bogus like en_us even if the user
624 // has configured some other language, but being wrong is better than crashing.
625 String locale = convertICULocaleToBCP47LanguageTag(uloc_getDefault());
626 if (!locale.isEmpty())
627 return locale;
628
629 return "en"_s;
630}
631
632String removeUnicodeLocaleExtension(const String& locale)
633{
634 Vector<String> parts = locale.split('-');
635 StringBuilder builder;
636 size_t partsSize = parts.size();
637 bool atPrivate = false;
638 if (partsSize > 0)
639 builder.append(parts[0]);
640 for (size_t p = 1; p < partsSize; ++p) {
641 if (parts[p] == "x")
642 atPrivate = true;
643 if (!atPrivate && parts[p] == "u" && p + 1 < partsSize) {
644 // Skip the u- and anything that follows until another singleton.
645 // While the next part is part of the unicode extension, skip it.
646 while (p + 1 < partsSize && parts[p + 1].length() > 1)
647 ++p;
648 } else {
649 builder.append('-');
650 builder.append(parts[p]);
651 }
652 }
653 return builder.toString();
654}
655
656static MatcherResult lookupMatcher(JSGlobalObject* globalObject, const HashSet<String>& availableLocales, const Vector<String>& requestedLocales)
657{
658 // LookupMatcher (availableLocales, requestedLocales)
659 // https://tc39.github.io/ecma402/#sec-lookupmatcher
660
661 String locale;
662 String noExtensionsLocale;
663 String availableLocale;
664 for (size_t i = 0; i < requestedLocales.size() && availableLocale.isNull(); ++i) {
665 locale = requestedLocales[i];
666 noExtensionsLocale = removeUnicodeLocaleExtension(locale);
667 availableLocale = bestAvailableLocale(availableLocales, noExtensionsLocale);
668 }
669
670 MatcherResult result;
671 if (!availableLocale.isEmpty()) {
672 result.locale = availableLocale;
673 if (locale != noExtensionsLocale) {
674 size_t extensionIndex = locale.find("-u-");
675 RELEASE_ASSERT(extensionIndex != notFound);
676
677 size_t extensionLength = locale.length() - extensionIndex;
678 size_t end = extensionIndex + 3;
679 while (end < locale.length()) {
680 end = locale.find('-', end);
681 if (end == notFound)
682 break;
683 if (end + 2 < locale.length() && locale[end + 2] == '-') {
684 extensionLength = end - extensionIndex;
685 break;
686 }
687 end++;
688 }
689 result.extension = locale.substring(extensionIndex, extensionLength);
690 result.extensionIndex = extensionIndex;
691 }
692 } else
693 result.locale = defaultLocale(globalObject);
694 return result;
695}
696
697static MatcherResult bestFitMatcher(JSGlobalObject* globalObject, const HashSet<String>& availableLocales, const Vector<String>& requestedLocales)
698{
699 // BestFitMatcher (availableLocales, requestedLocales)
700 // https://tc39.github.io/ecma402/#sec-bestfitmatcher
701
702 // FIXME: Implement something better than lookup.
703 return lookupMatcher(globalObject, availableLocales, requestedLocales);
704}
705
706static void unicodeExtensionSubTags(const String& extension, Vector<String>& subtags)
707{
708 // UnicodeExtensionSubtags (extension)
709 // https://tc39.github.io/ecma402/#sec-unicodeextensionsubtags
710
711 auto extensionLength = extension.length();
712 if (extensionLength < 3)
713 return;
714
715 size_t subtagStart = 3; // Skip initial -u-.
716 size_t valueStart = 3;
717 bool isLeading = true;
718 for (size_t index = subtagStart; index < extensionLength; ++index) {
719 if (extension[index] == '-') {
720 if (index - subtagStart == 2) {
721 // Tag is a key, first append prior key's value if there is one.
722 if (subtagStart - valueStart > 1)
723 subtags.append(extension.substring(valueStart, subtagStart - valueStart - 1));
724 subtags.append(extension.substring(subtagStart, index - subtagStart));
725 valueStart = index + 1;
726 isLeading = false;
727 } else if (isLeading) {
728 // Leading subtags before first key.
729 subtags.append(extension.substring(subtagStart, index - subtagStart));
730 valueStart = index + 1;
731 }
732 subtagStart = index + 1;
733 }
734 }
735 if (extensionLength - subtagStart == 2) {
736 // Trailing an extension key, first append prior key's value if there is one.
737 if (subtagStart - valueStart > 1)
738 subtags.append(extension.substring(valueStart, subtagStart - valueStart - 1));
739 valueStart = subtagStart;
740 }
741 // Append final key's value.
742 subtags.append(extension.substring(valueStart, extensionLength - valueStart));
743}
744
745HashMap<String, String> resolveLocale(JSGlobalObject* globalObject, const HashSet<String>& availableLocales, const Vector<String>& requestedLocales, const HashMap<String, String>& options, const char* const relevantExtensionKeys[], size_t relevantExtensionKeyCount, Vector<String> (*localeData)(const String&, size_t))
746{
747 // ResolveLocale (availableLocales, requestedLocales, options, relevantExtensionKeys, localeData)
748 // https://tc39.github.io/ecma402/#sec-resolvelocale
749
750 const String& matcher = options.get("localeMatcher"_s);
751 MatcherResult matcherResult = (matcher == "lookup")
752 ? lookupMatcher(globalObject, availableLocales, requestedLocales)
753 : bestFitMatcher(globalObject, availableLocales, requestedLocales);
754
755 String foundLocale = matcherResult.locale;
756
757 Vector<String> extensionSubtags;
758 if (!matcherResult.extension.isNull())
759 unicodeExtensionSubTags(matcherResult.extension, extensionSubtags);
760
761 HashMap<String, String> result;
762 result.add("dataLocale"_s, foundLocale);
763
764 String supportedExtension = "-u"_s;
765 for (size_t keyIndex = 0; keyIndex < relevantExtensionKeyCount; ++keyIndex) {
766 const char* key = relevantExtensionKeys[keyIndex];
767 Vector<String> keyLocaleData = localeData(foundLocale, keyIndex);
768 ASSERT(!keyLocaleData.isEmpty());
769
770 String value = keyLocaleData[0];
771 String supportedExtensionAddition;
772
773 if (!extensionSubtags.isEmpty()) {
774 size_t keyPos = extensionSubtags.find(key);
775 if (keyPos != notFound) {
776 if (keyPos + 1 < extensionSubtags.size() && extensionSubtags[keyPos + 1].length() > 2) {
777 const String& requestedValue = extensionSubtags[keyPos + 1];
778 if (keyLocaleData.contains(requestedValue)) {
779 value = requestedValue;
780 supportedExtensionAddition = makeString('-', key, '-', value);
781 }
782 } else if (keyLocaleData.contains(static_cast<String>("true"_s))) {
783 value = "true"_s;
784 }
785 }
786 }
787
788 HashMap<String, String>::const_iterator iterator = options.find(key);
789 if (iterator != options.end()) {
790 const String& optionsValue = iterator->value;
791 // Undefined should not get added to the options, it won't displace the extension.
792 // Null will remove the extension.
793 if ((optionsValue.isNull() || keyLocaleData.contains(optionsValue)) && optionsValue != value) {
794 value = optionsValue;
795 supportedExtensionAddition = String();
796 }
797 }
798 result.add(key, value);
799 supportedExtension.append(supportedExtensionAddition);
800 }
801
802 if (supportedExtension.length() > 2) {
803 String preExtension = foundLocale.substring(0, matcherResult.extensionIndex);
804 String postExtension = foundLocale.substring(matcherResult.extensionIndex);
805 foundLocale = preExtension + supportedExtension + postExtension;
806 }
807
808 result.add("locale"_s, foundLocale);
809 return result;
810}
811
812static JSArray* lookupSupportedLocales(JSGlobalObject* globalObject, const HashSet<String>& availableLocales, const Vector<String>& requestedLocales)
813{
814 // LookupSupportedLocales (availableLocales, requestedLocales)
815 // https://tc39.github.io/ecma402/#sec-lookupsupportedlocales
816
817 VM& vm = globalObject->vm();
818 auto scope = DECLARE_THROW_SCOPE(vm);
819
820 size_t len = requestedLocales.size();
821 JSArray* subset = JSArray::tryCreate(vm, globalObject->arrayStructureForIndexingTypeDuringAllocation(ArrayWithUndecided), 0);
822 if (!subset) {
823 throwOutOfMemoryError(globalObject, scope);
824 return nullptr;
825 }
826
827 unsigned index = 0;
828 for (size_t k = 0; k < len; ++k) {
829 const String& locale = requestedLocales[k];
830 String noExtensionsLocale = removeUnicodeLocaleExtension(locale);
831 String availableLocale = bestAvailableLocale(availableLocales, noExtensionsLocale);
832 if (!availableLocale.isNull()) {
833 subset->putDirectIndex(globalObject, index++, jsString(vm, locale));
834 RETURN_IF_EXCEPTION(scope, nullptr);
835 }
836 }
837
838 return subset;
839}
840
841static JSArray* bestFitSupportedLocales(JSGlobalObject* globalObject, const HashSet<String>& availableLocales, const Vector<String>& requestedLocales)
842{
843 // BestFitSupportedLocales (availableLocales, requestedLocales)
844 // https://tc39.github.io/ecma402/#sec-bestfitsupportedlocales
845
846 // FIXME: Implement something better than lookup.
847 return lookupSupportedLocales(globalObject, availableLocales, requestedLocales);
848}
849
850JSValue supportedLocales(JSGlobalObject* globalObject, const HashSet<String>& availableLocales, const Vector<String>& requestedLocales, JSValue options)
851{
852 // SupportedLocales (availableLocales, requestedLocales, options)
853 // https://tc39.github.io/ecma402/#sec-supportedlocales
854
855 VM& vm = globalObject->vm();
856 auto scope = DECLARE_THROW_SCOPE(vm);
857 String matcher;
858
859 if (!options.isUndefined()) {
860 matcher = intlStringOption(globalObject, options, vm.propertyNames->localeMatcher, { "lookup", "best fit" }, "localeMatcher must be either \"lookup\" or \"best fit\"", "best fit");
861 RETURN_IF_EXCEPTION(scope, JSValue());
862 } else
863 matcher = "best fit"_s;
864
865 JSArray* supportedLocales = (matcher == "best fit")
866 ? bestFitSupportedLocales(globalObject, availableLocales, requestedLocales)
867 : lookupSupportedLocales(globalObject, availableLocales, requestedLocales);
868 RETURN_IF_EXCEPTION(scope, JSValue());
869
870 PropertyNameArray keys(vm, PropertyNameMode::Strings, PrivateSymbolMode::Exclude);
871 supportedLocales->getOwnPropertyNames(supportedLocales, globalObject, keys, EnumerationMode());
872 RETURN_IF_EXCEPTION(scope, JSValue());
873
874 PropertyDescriptor desc;
875 desc.setConfigurable(false);
876 desc.setWritable(false);
877
878 size_t len = keys.size();
879 for (size_t i = 0; i < len; ++i) {
880 supportedLocales->defineOwnProperty(supportedLocales, globalObject, keys[i], desc, true);
881 RETURN_IF_EXCEPTION(scope, JSValue());
882 }
883 supportedLocales->defineOwnProperty(supportedLocales, globalObject, vm.propertyNames->length, desc, true);
884 RETURN_IF_EXCEPTION(scope, JSValue());
885
886 return supportedLocales;
887}
888
889Vector<String> numberingSystemsForLocale(const String& locale)
890{
891 static NeverDestroyed<Vector<String>> cachedNumberingSystems;
892 Vector<String>& availableNumberingSystems = cachedNumberingSystems.get();
893
894 if (UNLIKELY(availableNumberingSystems.isEmpty())) {
895 static Lock cachedNumberingSystemsMutex;
896 std::lock_guard<Lock> lock(cachedNumberingSystemsMutex);
897 if (availableNumberingSystems.isEmpty()) {
898 UErrorCode status = U_ZERO_ERROR;
899 UEnumeration* numberingSystemNames = unumsys_openAvailableNames(&status);
900 ASSERT(U_SUCCESS(status));
901
902 int32_t resultLength;
903 // Numbering system names are always ASCII, so use char[].
904 while (const char* result = uenum_next(numberingSystemNames, &resultLength, &status)) {
905 ASSERT(U_SUCCESS(status));
906 auto numsys = unumsys_openByName(result, &status);
907 ASSERT(U_SUCCESS(status));
908 // Only support algorithmic if it is the default fot the locale, handled below.
909 if (!unumsys_isAlgorithmic(numsys))
910 availableNumberingSystems.append(String(result, resultLength));
911 unumsys_close(numsys);
912 }
913 uenum_close(numberingSystemNames);
914 }
915 }
916
917 UErrorCode status = U_ZERO_ERROR;
918 UNumberingSystem* defaultSystem = unumsys_open(locale.utf8().data(), &status);
919 ASSERT(U_SUCCESS(status));
920 String defaultSystemName(unumsys_getName(defaultSystem));
921 unumsys_close(defaultSystem);
922
923 Vector<String> numberingSystems({ defaultSystemName });
924 numberingSystems.appendVector(availableNumberingSystems);
925 return numberingSystems;
926}
927
928EncodedJSValue JSC_HOST_CALL intlObjectFuncGetCanonicalLocales(JSGlobalObject* globalObject, CallFrame* callFrame)
929{
930 // Intl.getCanonicalLocales(locales)
931 // https://tc39.github.io/ecma402/#sec-intl.getcanonicallocales
932
933 VM& vm = globalObject->vm();
934 auto scope = DECLARE_THROW_SCOPE(vm);
935
936 Vector<String> localeList = canonicalizeLocaleList(globalObject, callFrame->argument(0));
937 RETURN_IF_EXCEPTION(scope, encodedJSValue());
938 auto length = localeList.size();
939
940 JSArray* localeArray = JSArray::tryCreate(vm, globalObject->arrayStructureForIndexingTypeDuringAllocation(ArrayWithContiguous), length);
941 if (!localeArray) {
942 throwOutOfMemoryError(globalObject, scope);
943 return encodedJSValue();
944 }
945
946 for (size_t i = 0; i < length; ++i) {
947 localeArray->putDirectIndex(globalObject, i, jsString(vm, localeList[i]));
948 RETURN_IF_EXCEPTION(scope, encodedJSValue());
949 }
950 return JSValue::encode(localeArray);
951}
952
953} // namespace JSC
954
955#endif // ENABLE(INTL)
956