1 | /* |
2 | * Copyright (C) 2015 Andy VanWagoner ([email protected]) |
3 | * Copyright (C) 2015 Sukolsak Sakshuwong ([email protected]) |
4 | * Copyright (C) 2016 Apple Inc. All rights reserved. |
5 | * |
6 | * Redistribution and use in source and binary forms, with or without |
7 | * modification, are permitted provided that the following conditions |
8 | * are met: |
9 | * 1. Redistributions of source code must retain the above copyright |
10 | * notice, this list of conditions and the following disclaimer. |
11 | * 2. Redistributions in binary form must reproduce the above copyright |
12 | * notice, this list of conditions and the following disclaimer in the |
13 | * documentation and/or other materials provided with the distribution. |
14 | * |
15 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' |
16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, |
17 | * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
18 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS |
19 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
20 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
21 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
22 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
23 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
24 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF |
25 | * THE POSSIBILITY OF SUCH DAMAGE. |
26 | */ |
27 | |
28 | #include "config.h" |
29 | #include "IntlObject.h" |
30 | |
31 | #if ENABLE(INTL) |
32 | |
33 | #include "Error.h" |
34 | #include "FunctionPrototype.h" |
35 | #include "IntlCanonicalizeLanguage.h" |
36 | #include "IntlCollatorConstructor.h" |
37 | #include "IntlCollatorPrototype.h" |
38 | #include "IntlDateTimeFormatConstructor.h" |
39 | #include "IntlDateTimeFormatPrototype.h" |
40 | #include "IntlNumberFormatConstructor.h" |
41 | #include "IntlNumberFormatPrototype.h" |
42 | #include "IntlPluralRulesConstructor.h" |
43 | #include "IntlPluralRulesPrototype.h" |
44 | #include "JSCInlines.h" |
45 | #include "JSCJSValueInlines.h" |
46 | #include "Lookup.h" |
47 | #include "ObjectPrototype.h" |
48 | #include "Options.h" |
49 | #include <unicode/uloc.h> |
50 | #include <unicode/unumsys.h> |
51 | #include <wtf/Assertions.h> |
52 | #include <wtf/Language.h> |
53 | #include <wtf/NeverDestroyed.h> |
54 | #include <wtf/text/StringBuilder.h> |
55 | |
56 | namespace JSC { |
57 | |
58 | STATIC_ASSERT_IS_TRIVIALLY_DESTRUCTIBLE(IntlObject); |
59 | |
60 | static EncodedJSValue JSC_HOST_CALL intlObjectFuncGetCanonicalLocales(ExecState*); |
61 | |
62 | static JSValue createCollatorConstructor(VM& vm, JSObject* object) |
63 | { |
64 | IntlObject* intlObject = jsCast<IntlObject*>(object); |
65 | JSGlobalObject* globalObject = intlObject->globalObject(vm); |
66 | return IntlCollatorConstructor::create(vm, IntlCollatorConstructor::createStructure(vm, globalObject, globalObject->functionPrototype()), jsCast<IntlCollatorPrototype*>(globalObject->collatorStructure()->storedPrototypeObject())); |
67 | } |
68 | |
69 | static JSValue createNumberFormatConstructor(VM& vm, JSObject* object) |
70 | { |
71 | IntlObject* intlObject = jsCast<IntlObject*>(object); |
72 | JSGlobalObject* globalObject = intlObject->globalObject(vm); |
73 | return IntlNumberFormatConstructor::create(vm, IntlNumberFormatConstructor::createStructure(vm, globalObject, globalObject->functionPrototype()), jsCast<IntlNumberFormatPrototype*>(globalObject->numberFormatStructure()->storedPrototypeObject())); |
74 | } |
75 | |
76 | static JSValue createDateTimeFormatConstructor(VM& vm, JSObject* object) |
77 | { |
78 | IntlObject* intlObject = jsCast<IntlObject*>(object); |
79 | JSGlobalObject* globalObject = intlObject->globalObject(vm); |
80 | return IntlDateTimeFormatConstructor::create(vm, IntlDateTimeFormatConstructor::createStructure(vm, globalObject, globalObject->functionPrototype()), jsCast<IntlDateTimeFormatPrototype*>(globalObject->dateTimeFormatStructure()->storedPrototypeObject())); |
81 | } |
82 | |
83 | static JSValue createPluralRulesConstructor(VM& vm, JSObject* object) |
84 | { |
85 | IntlObject* intlObject = jsCast<IntlObject*>(object); |
86 | JSGlobalObject* globalObject = intlObject->globalObject(vm); |
87 | return IntlPluralRulesConstructor::create(vm, IntlPluralRulesConstructor::createStructure(vm, globalObject, globalObject->functionPrototype()), jsCast<IntlPluralRulesPrototype*>(globalObject->pluralRulesStructure()->storedPrototypeObject())); |
88 | } |
89 | |
90 | } |
91 | |
92 | #include "IntlObject.lut.h" |
93 | |
94 | namespace JSC { |
95 | |
96 | /* Source for IntlObject.lut.h |
97 | @begin intlObjectTable |
98 | getCanonicalLocales intlObjectFuncGetCanonicalLocales DontEnum|Function 1 |
99 | Collator createCollatorConstructor DontEnum|PropertyCallback |
100 | DateTimeFormat createDateTimeFormatConstructor DontEnum|PropertyCallback |
101 | NumberFormat createNumberFormatConstructor DontEnum|PropertyCallback |
102 | @end |
103 | */ |
104 | |
105 | struct MatcherResult { |
106 | String locale; |
107 | String extension; |
108 | size_t extensionIndex { 0 }; |
109 | }; |
110 | |
111 | const ClassInfo IntlObject::s_info = { "Object" , &Base::s_info, &intlObjectTable, nullptr, CREATE_METHOD_TABLE(IntlObject) }; |
112 | |
113 | IntlObject::IntlObject(VM& vm, Structure* structure) |
114 | : JSNonFinalObject(vm, structure) |
115 | { |
116 | } |
117 | |
118 | IntlObject* IntlObject::create(VM& vm, Structure* structure) |
119 | { |
120 | IntlObject* object = new (NotNull, allocateCell<IntlObject>(vm.heap)) IntlObject(vm, structure); |
121 | object->finishCreation(vm); |
122 | return object; |
123 | } |
124 | |
125 | void IntlObject::finishCreation(VM& vm) |
126 | { |
127 | Base::finishCreation(vm); |
128 | ASSERT(inherits(vm, info())); |
129 | |
130 | // Constructor Properties of the Intl Object |
131 | // https://tc39.github.io/ecma402/#sec-constructor-properties-of-the-intl-object |
132 | if (Options::useIntlPluralRules()) |
133 | putDirectWithoutTransition(vm, vm.propertyNames->PluralRules, createPluralRulesConstructor(vm, this), static_cast<unsigned>(PropertyAttribute::DontEnum)); |
134 | } |
135 | |
136 | Structure* IntlObject::createStructure(VM& vm, JSGlobalObject* globalObject, JSValue prototype) |
137 | { |
138 | return Structure::create(vm, globalObject, prototype, TypeInfo(ObjectType, StructureFlags), info()); |
139 | } |
140 | |
141 | String convertICULocaleToBCP47LanguageTag(const char* localeID) |
142 | { |
143 | UErrorCode status = U_ZERO_ERROR; |
144 | Vector<char, 32> buffer(32); |
145 | auto length = uloc_toLanguageTag(localeID, buffer.data(), buffer.size(), false, &status); |
146 | if (status == U_BUFFER_OVERFLOW_ERROR) { |
147 | buffer.grow(length); |
148 | status = U_ZERO_ERROR; |
149 | uloc_toLanguageTag(localeID, buffer.data(), buffer.size(), false, &status); |
150 | } |
151 | if (!U_FAILURE(status)) |
152 | return String(buffer.data(), length); |
153 | return String(); |
154 | } |
155 | |
156 | bool intlBooleanOption(ExecState& state, JSValue options, PropertyName property, bool& usesFallback) |
157 | { |
158 | // GetOption (options, property, type="boolean", values, fallback) |
159 | // https://tc39.github.io/ecma402/#sec-getoption |
160 | |
161 | VM& vm = state.vm(); |
162 | auto scope = DECLARE_THROW_SCOPE(vm); |
163 | |
164 | JSObject* opts = options.toObject(&state); |
165 | RETURN_IF_EXCEPTION(scope, false); |
166 | |
167 | JSValue value = opts->get(&state, property); |
168 | RETURN_IF_EXCEPTION(scope, false); |
169 | |
170 | if (!value.isUndefined()) { |
171 | bool booleanValue = value.toBoolean(&state); |
172 | usesFallback = false; |
173 | return booleanValue; |
174 | } |
175 | |
176 | // Because fallback can be undefined, we let the caller handle it instead. |
177 | usesFallback = true; |
178 | return false; |
179 | } |
180 | |
181 | String intlStringOption(ExecState& state, JSValue options, PropertyName property, std::initializer_list<const char*> values, const char* notFound, const char* fallback) |
182 | { |
183 | // GetOption (options, property, type="string", values, fallback) |
184 | // https://tc39.github.io/ecma402/#sec-getoption |
185 | |
186 | VM& vm = state.vm(); |
187 | auto scope = DECLARE_THROW_SCOPE(vm); |
188 | |
189 | JSObject* opts = options.toObject(&state); |
190 | RETURN_IF_EXCEPTION(scope, String()); |
191 | |
192 | JSValue value = opts->get(&state, property); |
193 | RETURN_IF_EXCEPTION(scope, String()); |
194 | |
195 | if (!value.isUndefined()) { |
196 | String stringValue = value.toWTFString(&state); |
197 | RETURN_IF_EXCEPTION(scope, String()); |
198 | |
199 | if (values.size() && std::find(values.begin(), values.end(), stringValue) == values.end()) { |
200 | throwException(&state, scope, createRangeError(&state, notFound)); |
201 | return { }; |
202 | } |
203 | return stringValue; |
204 | } |
205 | |
206 | return fallback; |
207 | } |
208 | |
209 | unsigned intlNumberOption(ExecState& state, JSValue options, PropertyName property, unsigned minimum, unsigned maximum, unsigned fallback) |
210 | { |
211 | // GetNumberOption (options, property, minimum, maximum, fallback) |
212 | // https://tc39.github.io/ecma402/#sec-getnumberoption |
213 | |
214 | VM& vm = state.vm(); |
215 | auto scope = DECLARE_THROW_SCOPE(vm); |
216 | |
217 | JSObject* opts = options.toObject(&state); |
218 | RETURN_IF_EXCEPTION(scope, 0); |
219 | |
220 | JSValue value = opts->get(&state, property); |
221 | RETURN_IF_EXCEPTION(scope, 0); |
222 | |
223 | RELEASE_AND_RETURN(scope, intlDefaultNumberOption(state, value, property, minimum, maximum, fallback)); |
224 | } |
225 | |
226 | unsigned intlDefaultNumberOption(ExecState& state, JSValue value, PropertyName property, unsigned minimum, unsigned maximum, unsigned fallback) |
227 | { |
228 | // DefaultNumberOption (value, minimum, maximum, fallback) |
229 | // https://tc39.github.io/ecma402/#sec-defaultnumberoption |
230 | |
231 | VM& vm = state.vm(); |
232 | auto scope = DECLARE_THROW_SCOPE(vm); |
233 | |
234 | if (!value.isUndefined()) { |
235 | double doubleValue = value.toNumber(&state); |
236 | RETURN_IF_EXCEPTION(scope, 0); |
237 | |
238 | if (!(doubleValue >= minimum && doubleValue <= maximum)) { |
239 | throwException(&state, scope, createRangeError(&state, *property.publicName() + " is out of range" )); |
240 | return 0; |
241 | } |
242 | return static_cast<unsigned>(doubleValue); |
243 | } |
244 | return fallback; |
245 | } |
246 | |
247 | static String privateUseLangTag(const Vector<String>& parts, size_t startIndex) |
248 | { |
249 | size_t numParts = parts.size(); |
250 | size_t currentIndex = startIndex; |
251 | |
252 | // Check for privateuse. |
253 | // privateuse = "x" 1*("-" (1*8alphanum)) |
254 | StringBuilder privateuse; |
255 | while (currentIndex < numParts) { |
256 | const String& singleton = parts[currentIndex]; |
257 | unsigned singletonLength = singleton.length(); |
258 | bool isValid = (singletonLength == 1 && (singleton == "x" || singleton == "X" )); |
259 | if (!isValid) |
260 | break; |
261 | |
262 | if (currentIndex != startIndex) |
263 | privateuse.append('-'); |
264 | |
265 | ++currentIndex; |
266 | unsigned numExtParts = 0; |
267 | privateuse.append('x'); |
268 | while (currentIndex < numParts) { |
269 | const String& extPart = parts[currentIndex]; |
270 | unsigned extPartLength = extPart.length(); |
271 | |
272 | bool isValid = (extPartLength >= 1 && extPartLength <= 8 && extPart.isAllSpecialCharacters<isASCIIAlphanumeric>()); |
273 | if (!isValid) |
274 | break; |
275 | |
276 | ++currentIndex; |
277 | ++numExtParts; |
278 | privateuse.append('-'); |
279 | privateuse.append(extPart.convertToASCIILowercase()); |
280 | } |
281 | |
282 | // Requires at least one production. |
283 | if (!numExtParts) |
284 | return String(); |
285 | } |
286 | |
287 | // Leftovers makes it invalid. |
288 | if (currentIndex < numParts) |
289 | return String(); |
290 | |
291 | return privateuse.toString(); |
292 | } |
293 | |
294 | static String preferredLanguage(const String& language) |
295 | { |
296 | auto preferred = intlPreferredLanguageTag(language); |
297 | if (!preferred.isNull()) |
298 | return preferred; |
299 | return language; |
300 | } |
301 | |
302 | static String preferredRegion(const String& region) |
303 | { |
304 | auto preferred = intlPreferredRegionTag(region); |
305 | if (!preferred.isNull()) |
306 | return preferred; |
307 | return region; |
308 | |
309 | } |
310 | |
311 | static String canonicalLangTag(const Vector<String>& parts) |
312 | { |
313 | ASSERT(!parts.isEmpty()); |
314 | |
315 | // Follows the grammar at https://www.rfc-editor.org/rfc/bcp/bcp47.txt |
316 | // langtag = language ["-" script] ["-" region] *("-" variant) *("-" extension) ["-" privateuse] |
317 | |
318 | size_t numParts = parts.size(); |
319 | // Check for language. |
320 | // language = 2*3ALPHA ["-" extlang] / 4ALPHA / 5*8ALPHA |
321 | size_t currentIndex = 0; |
322 | const String& language = parts[currentIndex]; |
323 | unsigned languageLength = language.length(); |
324 | bool canHaveExtlang = languageLength >= 2 && languageLength <= 3; |
325 | bool isValidLanguage = languageLength >= 2 && languageLength <= 8 && language.isAllSpecialCharacters<isASCIIAlpha>(); |
326 | if (!isValidLanguage) |
327 | return String(); |
328 | |
329 | ++currentIndex; |
330 | StringBuilder canonical; |
331 | |
332 | const String langtag = preferredLanguage(language.convertToASCIILowercase()); |
333 | canonical.append(langtag); |
334 | |
335 | // Check for extlang. |
336 | // extlang = 3ALPHA *2("-" 3ALPHA) |
337 | if (canHaveExtlang) { |
338 | for (unsigned times = 0; times < 3 && currentIndex < numParts; ++times) { |
339 | const String& extlang = parts[currentIndex]; |
340 | unsigned extlangLength = extlang.length(); |
341 | if (extlangLength == 3 && extlang.isAllSpecialCharacters<isASCIIAlpha>()) { |
342 | ++currentIndex; |
343 | auto extlangLower = extlang.convertToASCIILowercase(); |
344 | if (!times && intlPreferredExtlangTag(extlangLower) == langtag) { |
345 | canonical.clear(); |
346 | canonical.append(extlangLower); |
347 | continue; |
348 | } |
349 | canonical.append('-'); |
350 | canonical.append(extlangLower); |
351 | } else |
352 | break; |
353 | } |
354 | } |
355 | |
356 | // Check for script. |
357 | // script = 4ALPHA |
358 | if (currentIndex < numParts) { |
359 | const String& script = parts[currentIndex]; |
360 | unsigned scriptLength = script.length(); |
361 | if (scriptLength == 4 && script.isAllSpecialCharacters<isASCIIAlpha>()) { |
362 | ++currentIndex; |
363 | canonical.append('-'); |
364 | canonical.append(toASCIIUpper(script[0])); |
365 | canonical.append(script.substring(1, 3).convertToASCIILowercase()); |
366 | } |
367 | } |
368 | |
369 | // Check for region. |
370 | // region = 2ALPHA / 3DIGIT |
371 | if (currentIndex < numParts) { |
372 | const String& region = parts[currentIndex]; |
373 | unsigned regionLength = region.length(); |
374 | bool isValidRegion = ( |
375 | (regionLength == 2 && region.isAllSpecialCharacters<isASCIIAlpha>()) |
376 | || (regionLength == 3 && region.isAllSpecialCharacters<isASCIIDigit>()) |
377 | ); |
378 | if (isValidRegion) { |
379 | ++currentIndex; |
380 | canonical.append('-'); |
381 | canonical.append(preferredRegion(region.convertToASCIIUppercase())); |
382 | } |
383 | } |
384 | |
385 | // Check for variant. |
386 | // variant = 5*8alphanum / (DIGIT 3alphanum) |
387 | HashSet<String> subtags; |
388 | while (currentIndex < numParts) { |
389 | const String& variant = parts[currentIndex]; |
390 | unsigned variantLength = variant.length(); |
391 | bool isValidVariant = ( |
392 | (variantLength >= 5 && variantLength <= 8 && variant.isAllSpecialCharacters<isASCIIAlphanumeric>()) |
393 | || (variantLength == 4 && isASCIIDigit(variant[0]) && variant.substring(1, 3).isAllSpecialCharacters<isASCIIAlphanumeric>()) |
394 | ); |
395 | if (!isValidVariant) |
396 | break; |
397 | |
398 | // Cannot include duplicate subtags (case insensitive). |
399 | String lowerVariant = variant.convertToASCIILowercase(); |
400 | if (!subtags.add(lowerVariant).isNewEntry) |
401 | return String(); |
402 | |
403 | ++currentIndex; |
404 | |
405 | // Reordering variant subtags is not required in the spec. |
406 | canonical.append('-'); |
407 | canonical.append(lowerVariant); |
408 | } |
409 | |
410 | // Check for extension. |
411 | // extension = singleton 1*("-" (2*8alphanum)) |
412 | // singleton = alphanum except x or X |
413 | subtags.clear(); |
414 | Vector<String> extensions; |
415 | while (currentIndex < numParts) { |
416 | const String& possibleSingleton = parts[currentIndex]; |
417 | unsigned singletonLength = possibleSingleton.length(); |
418 | bool isValidSingleton = (singletonLength == 1 && possibleSingleton != "x" && possibleSingleton != "X" && isASCIIAlphanumeric(possibleSingleton[0])); |
419 | if (!isValidSingleton) |
420 | break; |
421 | |
422 | // Cannot include duplicate singleton (case insensitive). |
423 | String singleton = possibleSingleton.convertToASCIILowercase(); |
424 | if (!subtags.add(singleton).isNewEntry) |
425 | return String(); |
426 | |
427 | ++currentIndex; |
428 | int numExtParts = 0; |
429 | StringBuilder extension; |
430 | extension.append(singleton); |
431 | while (currentIndex < numParts) { |
432 | const String& extPart = parts[currentIndex]; |
433 | unsigned extPartLength = extPart.length(); |
434 | |
435 | bool isValid = (extPartLength >= 2 && extPartLength <= 8 && extPart.isAllSpecialCharacters<isASCIIAlphanumeric>()); |
436 | if (!isValid) |
437 | break; |
438 | |
439 | ++currentIndex; |
440 | ++numExtParts; |
441 | extension.append('-'); |
442 | extension.append(extPart.convertToASCIILowercase()); |
443 | } |
444 | |
445 | // Requires at least one production. |
446 | if (!numExtParts) |
447 | return String(); |
448 | |
449 | extensions.append(extension.toString()); |
450 | } |
451 | |
452 | // Add extensions to canonical sorted by singleton. |
453 | std::sort( |
454 | extensions.begin(), |
455 | extensions.end(), |
456 | [] (const String& a, const String& b) -> bool { |
457 | return a[0] < b[0]; |
458 | } |
459 | ); |
460 | size_t numExtenstions = extensions.size(); |
461 | for (size_t i = 0; i < numExtenstions; ++i) { |
462 | canonical.append('-'); |
463 | canonical.append(extensions[i]); |
464 | } |
465 | |
466 | // Check for privateuse. |
467 | if (currentIndex < numParts) { |
468 | String privateuse = privateUseLangTag(parts, currentIndex); |
469 | if (privateuse.isNull()) |
470 | return String(); |
471 | canonical.append('-'); |
472 | canonical.append(privateuse); |
473 | } |
474 | |
475 | const String tag = canonical.toString(); |
476 | const String preferred = intlRedundantLanguageTag(tag); |
477 | if (!preferred.isNull()) |
478 | return preferred; |
479 | return tag; |
480 | } |
481 | |
482 | static String canonicalizeLanguageTag(const String& locale) |
483 | { |
484 | // IsStructurallyValidLanguageTag (locale) |
485 | // CanonicalizeLanguageTag (locale) |
486 | // These are done one after another in CanonicalizeLocaleList, so they are combined here to reduce duplication. |
487 | // https://www.rfc-editor.org/rfc/bcp/bcp47.txt |
488 | |
489 | // Language-Tag = langtag / privateuse / grandfathered |
490 | String grandfather = intlGrandfatheredLanguageTag(locale.convertToASCIILowercase()); |
491 | if (!grandfather.isNull()) |
492 | return grandfather; |
493 | |
494 | Vector<String> parts = locale.splitAllowingEmptyEntries('-'); |
495 | if (!parts.isEmpty()) { |
496 | String langtag = canonicalLangTag(parts); |
497 | if (!langtag.isNull()) |
498 | return langtag; |
499 | |
500 | String privateuse = privateUseLangTag(parts, 0); |
501 | if (!privateuse.isNull()) |
502 | return privateuse; |
503 | } |
504 | |
505 | return String(); |
506 | } |
507 | |
508 | Vector<String> canonicalizeLocaleList(ExecState& state, JSValue locales) |
509 | { |
510 | // CanonicalizeLocaleList (locales) |
511 | // https://tc39.github.io/ecma402/#sec-canonicalizelocalelist |
512 | |
513 | VM& vm = state.vm(); |
514 | auto scope = DECLARE_THROW_SCOPE(vm); |
515 | |
516 | JSGlobalObject* globalObject = state.jsCallee()->globalObject(vm); |
517 | Vector<String> seen; |
518 | |
519 | if (locales.isUndefined()) |
520 | return seen; |
521 | |
522 | JSObject* localesObject; |
523 | if (locales.isString()) { |
524 | JSArray* localesArray = JSArray::tryCreate(vm, globalObject->arrayStructureForIndexingTypeDuringAllocation(ArrayWithContiguous)); |
525 | if (!localesArray) { |
526 | throwOutOfMemoryError(&state, scope); |
527 | RETURN_IF_EXCEPTION(scope, Vector<String>()); |
528 | } |
529 | localesArray->push(&state, locales); |
530 | RETURN_IF_EXCEPTION(scope, Vector<String>()); |
531 | |
532 | localesObject = localesArray; |
533 | } else { |
534 | localesObject = locales.toObject(&state); |
535 | RETURN_IF_EXCEPTION(scope, Vector<String>()); |
536 | } |
537 | |
538 | // 6. Let len be ToLength(Get(O, "length")). |
539 | JSValue lengthProperty = localesObject->get(&state, vm.propertyNames->length); |
540 | RETURN_IF_EXCEPTION(scope, Vector<String>()); |
541 | |
542 | double length = lengthProperty.toLength(&state); |
543 | RETURN_IF_EXCEPTION(scope, Vector<String>()); |
544 | |
545 | HashSet<String> seenSet; |
546 | for (double k = 0; k < length; ++k) { |
547 | bool kPresent = localesObject->hasProperty(&state, k); |
548 | RETURN_IF_EXCEPTION(scope, Vector<String>()); |
549 | |
550 | if (kPresent) { |
551 | JSValue kValue = localesObject->get(&state, k); |
552 | RETURN_IF_EXCEPTION(scope, Vector<String>()); |
553 | |
554 | if (!kValue.isString() && !kValue.isObject()) { |
555 | throwTypeError(&state, scope, "locale value must be a string or object"_s ); |
556 | return Vector<String>(); |
557 | } |
558 | |
559 | JSString* tag = kValue.toString(&state); |
560 | RETURN_IF_EXCEPTION(scope, Vector<String>()); |
561 | |
562 | String canonicalizedTag = canonicalizeLanguageTag(tag->value(&state)); |
563 | if (canonicalizedTag.isNull()) { |
564 | throwException(&state, scope, createRangeError(&state, "invalid language tag: " + tag->value(&state))); |
565 | return Vector<String>(); |
566 | } |
567 | |
568 | if (seenSet.add(canonicalizedTag).isNewEntry) |
569 | seen.append(canonicalizedTag); |
570 | } |
571 | } |
572 | |
573 | return seen; |
574 | } |
575 | |
576 | String bestAvailableLocale(const HashSet<String>& availableLocales, const String& locale) |
577 | { |
578 | // BestAvailableLocale (availableLocales, locale) |
579 | // https://tc39.github.io/ecma402/#sec-bestavailablelocale |
580 | |
581 | String candidate = locale; |
582 | while (!candidate.isEmpty()) { |
583 | if (availableLocales.contains(candidate)) |
584 | return candidate; |
585 | |
586 | size_t pos = candidate.reverseFind('-'); |
587 | if (pos == notFound) |
588 | return String(); |
589 | |
590 | if (pos >= 2 && candidate[pos - 2] == '-') |
591 | pos -= 2; |
592 | |
593 | candidate = candidate.substring(0, pos); |
594 | } |
595 | |
596 | return String(); |
597 | } |
598 | |
599 | String defaultLocale(ExecState& state) |
600 | { |
601 | // DefaultLocale () |
602 | // https://tc39.github.io/ecma402/#sec-defaultlocale |
603 | |
604 | // WebCore's global objects will have their own ideas of how to determine the language. It may |
605 | // be determined by WebCore-specific logic like some WK settings. Usually this will return the |
606 | // same thing as userPreferredLanguages()[0]. |
607 | VM& vm = state.vm(); |
608 | if (auto defaultLanguage = state.jsCallee()->globalObject(vm)->globalObjectMethodTable()->defaultLanguage) { |
609 | String locale = canonicalizeLanguageTag(defaultLanguage()); |
610 | if (!locale.isEmpty()) |
611 | return locale; |
612 | } |
613 | |
614 | Vector<String> languages = userPreferredLanguages(); |
615 | for (const auto& language : languages) { |
616 | String locale = canonicalizeLanguageTag(language); |
617 | if (!locale.isEmpty()) |
618 | return locale; |
619 | } |
620 | |
621 | // If all else fails, ask ICU. It will probably say something bogus like en_us even if the user |
622 | // has configured some other language, but being wrong is better than crashing. |
623 | String locale = convertICULocaleToBCP47LanguageTag(uloc_getDefault()); |
624 | if (!locale.isEmpty()) |
625 | return locale; |
626 | |
627 | return "en"_s ; |
628 | } |
629 | |
630 | String removeUnicodeLocaleExtension(const String& locale) |
631 | { |
632 | Vector<String> parts = locale.split('-'); |
633 | StringBuilder builder; |
634 | size_t partsSize = parts.size(); |
635 | bool atPrivate = false; |
636 | if (partsSize > 0) |
637 | builder.append(parts[0]); |
638 | for (size_t p = 1; p < partsSize; ++p) { |
639 | if (parts[p] == "x" ) |
640 | atPrivate = true; |
641 | if (!atPrivate && parts[p] == "u" && p + 1 < partsSize) { |
642 | // Skip the u- and anything that follows until another singleton. |
643 | // While the next part is part of the unicode extension, skip it. |
644 | while (p + 1 < partsSize && parts[p + 1].length() > 1) |
645 | ++p; |
646 | } else { |
647 | builder.append('-'); |
648 | builder.append(parts[p]); |
649 | } |
650 | } |
651 | return builder.toString(); |
652 | } |
653 | |
654 | static MatcherResult lookupMatcher(ExecState& state, const HashSet<String>& availableLocales, const Vector<String>& requestedLocales) |
655 | { |
656 | // LookupMatcher (availableLocales, requestedLocales) |
657 | // https://tc39.github.io/ecma402/#sec-lookupmatcher |
658 | |
659 | String locale; |
660 | String noExtensionsLocale; |
661 | String availableLocale; |
662 | for (size_t i = 0; i < requestedLocales.size() && availableLocale.isNull(); ++i) { |
663 | locale = requestedLocales[i]; |
664 | noExtensionsLocale = removeUnicodeLocaleExtension(locale); |
665 | availableLocale = bestAvailableLocale(availableLocales, noExtensionsLocale); |
666 | } |
667 | |
668 | MatcherResult result; |
669 | if (!availableLocale.isEmpty()) { |
670 | result.locale = availableLocale; |
671 | if (locale != noExtensionsLocale) { |
672 | size_t extensionIndex = locale.find("-u-" ); |
673 | RELEASE_ASSERT(extensionIndex != notFound); |
674 | |
675 | size_t extensionLength = locale.length() - extensionIndex; |
676 | size_t end = extensionIndex + 3; |
677 | while (end < locale.length()) { |
678 | end = locale.find('-', end); |
679 | if (end == notFound) |
680 | break; |
681 | if (end + 2 < locale.length() && locale[end + 2] == '-') { |
682 | extensionLength = end - extensionIndex; |
683 | break; |
684 | } |
685 | end++; |
686 | } |
687 | result.extension = locale.substring(extensionIndex, extensionLength); |
688 | result.extensionIndex = extensionIndex; |
689 | } |
690 | } else |
691 | result.locale = defaultLocale(state); |
692 | return result; |
693 | } |
694 | |
695 | static MatcherResult bestFitMatcher(ExecState& state, const HashSet<String>& availableLocales, const Vector<String>& requestedLocales) |
696 | { |
697 | // BestFitMatcher (availableLocales, requestedLocales) |
698 | // https://tc39.github.io/ecma402/#sec-bestfitmatcher |
699 | |
700 | // FIXME: Implement something better than lookup. |
701 | return lookupMatcher(state, availableLocales, requestedLocales); |
702 | } |
703 | |
704 | static void unicodeExtensionSubTags(const String& extension, Vector<String>& subtags) |
705 | { |
706 | // UnicodeExtensionSubtags (extension) |
707 | // https://tc39.github.io/ecma402/#sec-unicodeextensionsubtags |
708 | |
709 | auto extensionLength = extension.length(); |
710 | if (extensionLength < 3) |
711 | return; |
712 | |
713 | size_t subtagStart = 3; // Skip initial -u-. |
714 | size_t valueStart = 3; |
715 | bool isLeading = true; |
716 | for (size_t index = subtagStart; index < extensionLength; ++index) { |
717 | if (extension[index] == '-') { |
718 | if (index - subtagStart == 2) { |
719 | // Tag is a key, first append prior key's value if there is one. |
720 | if (subtagStart - valueStart > 1) |
721 | subtags.append(extension.substring(valueStart, subtagStart - valueStart - 1)); |
722 | subtags.append(extension.substring(subtagStart, index - subtagStart)); |
723 | valueStart = index + 1; |
724 | isLeading = false; |
725 | } else if (isLeading) { |
726 | // Leading subtags before first key. |
727 | subtags.append(extension.substring(subtagStart, index - subtagStart)); |
728 | valueStart = index + 1; |
729 | } |
730 | subtagStart = index + 1; |
731 | } |
732 | } |
733 | if (extensionLength - subtagStart == 2) { |
734 | // Trailing an extension key, first append prior key's value if there is one. |
735 | if (subtagStart - valueStart > 1) |
736 | subtags.append(extension.substring(valueStart, subtagStart - valueStart - 1)); |
737 | valueStart = subtagStart; |
738 | } |
739 | // Append final key's value. |
740 | subtags.append(extension.substring(valueStart, extensionLength - valueStart)); |
741 | } |
742 | |
743 | HashMap<String, String> resolveLocale(ExecState& state, const HashSet<String>& availableLocales, const Vector<String>& requestedLocales, const HashMap<String, String>& options, const char* const relevantExtensionKeys[], size_t relevantExtensionKeyCount, Vector<String> (*localeData)(const String&, size_t)) |
744 | { |
745 | // ResolveLocale (availableLocales, requestedLocales, options, relevantExtensionKeys, localeData) |
746 | // https://tc39.github.io/ecma402/#sec-resolvelocale |
747 | |
748 | const String& matcher = options.get("localeMatcher"_s ); |
749 | MatcherResult matcherResult = (matcher == "lookup" ) |
750 | ? lookupMatcher(state, availableLocales, requestedLocales) |
751 | : bestFitMatcher(state, availableLocales, requestedLocales); |
752 | |
753 | String foundLocale = matcherResult.locale; |
754 | |
755 | Vector<String> extensionSubtags; |
756 | if (!matcherResult.extension.isNull()) |
757 | unicodeExtensionSubTags(matcherResult.extension, extensionSubtags); |
758 | |
759 | HashMap<String, String> result; |
760 | result.add("dataLocale"_s , foundLocale); |
761 | |
762 | String supportedExtension = "-u"_s ; |
763 | for (size_t keyIndex = 0; keyIndex < relevantExtensionKeyCount; ++keyIndex) { |
764 | const char* key = relevantExtensionKeys[keyIndex]; |
765 | Vector<String> keyLocaleData = localeData(foundLocale, keyIndex); |
766 | ASSERT(!keyLocaleData.isEmpty()); |
767 | |
768 | String value = keyLocaleData[0]; |
769 | String supportedExtensionAddition; |
770 | |
771 | if (!extensionSubtags.isEmpty()) { |
772 | size_t keyPos = extensionSubtags.find(key); |
773 | if (keyPos != notFound) { |
774 | if (keyPos + 1 < extensionSubtags.size() && extensionSubtags[keyPos + 1].length() > 2) { |
775 | const String& requestedValue = extensionSubtags[keyPos + 1]; |
776 | if (keyLocaleData.contains(requestedValue)) { |
777 | value = requestedValue; |
778 | supportedExtensionAddition = makeString('-', key, '-', value); |
779 | } |
780 | } else if (keyLocaleData.contains(static_cast<String>("true"_s ))) { |
781 | value = "true"_s ; |
782 | } |
783 | } |
784 | } |
785 | |
786 | HashMap<String, String>::const_iterator iterator = options.find(key); |
787 | if (iterator != options.end()) { |
788 | const String& optionsValue = iterator->value; |
789 | // Undefined should not get added to the options, it won't displace the extension. |
790 | // Null will remove the extension. |
791 | if ((optionsValue.isNull() || keyLocaleData.contains(optionsValue)) && optionsValue != value) { |
792 | value = optionsValue; |
793 | supportedExtensionAddition = String(); |
794 | } |
795 | } |
796 | result.add(key, value); |
797 | supportedExtension.append(supportedExtensionAddition); |
798 | } |
799 | |
800 | if (supportedExtension.length() > 2) { |
801 | String preExtension = foundLocale.substring(0, matcherResult.extensionIndex); |
802 | String postExtension = foundLocale.substring(matcherResult.extensionIndex); |
803 | foundLocale = preExtension + supportedExtension + postExtension; |
804 | } |
805 | |
806 | result.add("locale"_s , foundLocale); |
807 | return result; |
808 | } |
809 | |
810 | static JSArray* lookupSupportedLocales(ExecState& state, const HashSet<String>& availableLocales, const Vector<String>& requestedLocales) |
811 | { |
812 | // LookupSupportedLocales (availableLocales, requestedLocales) |
813 | // https://tc39.github.io/ecma402/#sec-lookupsupportedlocales |
814 | |
815 | VM& vm = state.vm(); |
816 | auto scope = DECLARE_THROW_SCOPE(vm); |
817 | |
818 | size_t len = requestedLocales.size(); |
819 | JSGlobalObject* globalObject = state.jsCallee()->globalObject(vm); |
820 | JSArray* subset = JSArray::tryCreate(vm, globalObject->arrayStructureForIndexingTypeDuringAllocation(ArrayWithUndecided), 0); |
821 | if (!subset) { |
822 | throwOutOfMemoryError(&state, scope); |
823 | return nullptr; |
824 | } |
825 | |
826 | unsigned index = 0; |
827 | for (size_t k = 0; k < len; ++k) { |
828 | const String& locale = requestedLocales[k]; |
829 | String noExtensionsLocale = removeUnicodeLocaleExtension(locale); |
830 | String availableLocale = bestAvailableLocale(availableLocales, noExtensionsLocale); |
831 | if (!availableLocale.isNull()) { |
832 | subset->putDirectIndex(&state, index++, jsString(&state, locale)); |
833 | RETURN_IF_EXCEPTION(scope, nullptr); |
834 | } |
835 | } |
836 | |
837 | return subset; |
838 | } |
839 | |
840 | static JSArray* bestFitSupportedLocales(ExecState& state, const HashSet<String>& availableLocales, const Vector<String>& requestedLocales) |
841 | { |
842 | // BestFitSupportedLocales (availableLocales, requestedLocales) |
843 | // https://tc39.github.io/ecma402/#sec-bestfitsupportedlocales |
844 | |
845 | // FIXME: Implement something better than lookup. |
846 | return lookupSupportedLocales(state, availableLocales, requestedLocales); |
847 | } |
848 | |
849 | JSValue supportedLocales(ExecState& state, const HashSet<String>& availableLocales, const Vector<String>& requestedLocales, JSValue options) |
850 | { |
851 | // SupportedLocales (availableLocales, requestedLocales, options) |
852 | // https://tc39.github.io/ecma402/#sec-supportedlocales |
853 | |
854 | VM& vm = state.vm(); |
855 | auto scope = DECLARE_THROW_SCOPE(vm); |
856 | String matcher; |
857 | |
858 | if (!options.isUndefined()) { |
859 | matcher = intlStringOption(state, options, vm.propertyNames->localeMatcher, { "lookup" , "best fit" }, "localeMatcher must be either \"lookup\" or \"best fit\"" , "best fit" ); |
860 | RETURN_IF_EXCEPTION(scope, JSValue()); |
861 | } else |
862 | matcher = "best fit"_s ; |
863 | |
864 | JSArray* supportedLocales = (matcher == "best fit" ) |
865 | ? bestFitSupportedLocales(state, availableLocales, requestedLocales) |
866 | : lookupSupportedLocales(state, availableLocales, requestedLocales); |
867 | RETURN_IF_EXCEPTION(scope, JSValue()); |
868 | |
869 | PropertyNameArray keys(&vm, PropertyNameMode::Strings, PrivateSymbolMode::Exclude); |
870 | supportedLocales->getOwnPropertyNames(supportedLocales, &state, keys, EnumerationMode()); |
871 | RETURN_IF_EXCEPTION(scope, JSValue()); |
872 | |
873 | PropertyDescriptor desc; |
874 | desc.setConfigurable(false); |
875 | desc.setWritable(false); |
876 | |
877 | size_t len = keys.size(); |
878 | for (size_t i = 0; i < len; ++i) { |
879 | supportedLocales->defineOwnProperty(supportedLocales, &state, keys[i], desc, true); |
880 | RETURN_IF_EXCEPTION(scope, JSValue()); |
881 | } |
882 | supportedLocales->defineOwnProperty(supportedLocales, &state, vm.propertyNames->length, desc, true); |
883 | RETURN_IF_EXCEPTION(scope, JSValue()); |
884 | |
885 | return supportedLocales; |
886 | } |
887 | |
888 | Vector<String> numberingSystemsForLocale(const String& locale) |
889 | { |
890 | static NeverDestroyed<Vector<String>> cachedNumberingSystems; |
891 | Vector<String>& availableNumberingSystems = cachedNumberingSystems.get(); |
892 | |
893 | if (UNLIKELY(availableNumberingSystems.isEmpty())) { |
894 | static Lock cachedNumberingSystemsMutex; |
895 | std::lock_guard<Lock> lock(cachedNumberingSystemsMutex); |
896 | if (availableNumberingSystems.isEmpty()) { |
897 | UErrorCode status = U_ZERO_ERROR; |
898 | UEnumeration* numberingSystemNames = unumsys_openAvailableNames(&status); |
899 | ASSERT(U_SUCCESS(status)); |
900 | |
901 | int32_t resultLength; |
902 | // Numbering system names are always ASCII, so use char[]. |
903 | while (const char* result = uenum_next(numberingSystemNames, &resultLength, &status)) { |
904 | ASSERT(U_SUCCESS(status)); |
905 | auto numsys = unumsys_openByName(result, &status); |
906 | ASSERT(U_SUCCESS(status)); |
907 | // Only support algorithmic if it is the default fot the locale, handled below. |
908 | if (!unumsys_isAlgorithmic(numsys)) |
909 | availableNumberingSystems.append(String(result, resultLength)); |
910 | unumsys_close(numsys); |
911 | } |
912 | uenum_close(numberingSystemNames); |
913 | } |
914 | } |
915 | |
916 | UErrorCode status = U_ZERO_ERROR; |
917 | UNumberingSystem* defaultSystem = unumsys_open(locale.utf8().data(), &status); |
918 | ASSERT(U_SUCCESS(status)); |
919 | String defaultSystemName(unumsys_getName(defaultSystem)); |
920 | unumsys_close(defaultSystem); |
921 | |
922 | Vector<String> numberingSystems({ defaultSystemName }); |
923 | numberingSystems.appendVector(availableNumberingSystems); |
924 | return numberingSystems; |
925 | } |
926 | |
927 | EncodedJSValue JSC_HOST_CALL intlObjectFuncGetCanonicalLocales(ExecState* state) |
928 | { |
929 | // Intl.getCanonicalLocales(locales) |
930 | // https://tc39.github.io/ecma402/#sec-intl.getcanonicallocales |
931 | |
932 | VM& vm = state->vm(); |
933 | auto scope = DECLARE_THROW_SCOPE(vm); |
934 | |
935 | Vector<String> localeList = canonicalizeLocaleList(*state, state->argument(0)); |
936 | RETURN_IF_EXCEPTION(scope, encodedJSValue()); |
937 | auto length = localeList.size(); |
938 | |
939 | JSGlobalObject* globalObject = state->jsCallee()->globalObject(vm); |
940 | JSArray* localeArray = JSArray::tryCreate(vm, globalObject->arrayStructureForIndexingTypeDuringAllocation(ArrayWithContiguous), length); |
941 | if (!localeArray) { |
942 | throwOutOfMemoryError(state, scope); |
943 | return encodedJSValue(); |
944 | } |
945 | |
946 | for (size_t i = 0; i < length; ++i) { |
947 | localeArray->putDirectIndex(state, i, jsString(state, localeList[i])); |
948 | RETURN_IF_EXCEPTION(scope, encodedJSValue()); |
949 | } |
950 | return JSValue::encode(localeArray); |
951 | } |
952 | |
953 | } // namespace JSC |
954 | |
955 | #endif // ENABLE(INTL) |
956 | |