// Copyright 2018 the V8 project authors. All rights reserved.
|
// Use of this source code is governed by a BSD-style license that can be
|
// found in the LICENSE file.
|
|
#ifndef V8_INTL_SUPPORT
|
#error Internationalization is expected to be enabled.
|
#endif // V8_INTL_SUPPORT
|
|
#include "src/objects/js-collator.h"
|
|
#include "src/isolate.h"
|
#include "src/objects-inl.h"
|
#include "src/objects/js-collator-inl.h"
|
#include "unicode/coll.h"
|
#include "unicode/locid.h"
|
#include "unicode/strenum.h"
|
#include "unicode/ucol.h"
|
#include "unicode/uloc.h"
|
|
namespace v8 {
|
namespace internal {
|
|
namespace {
|
|
// TODO(gsathya): Consider internalizing the value strings.
|
void CreateDataPropertyForOptions(Isolate* isolate, Handle<JSObject> options,
|
Handle<String> key, const char* value) {
|
CHECK_NOT_NULL(value);
|
Handle<String> value_str =
|
isolate->factory()->NewStringFromAsciiChecked(value);
|
|
// This is a brand new JSObject that shouldn't already have the same
|
// key so this shouldn't fail.
|
CHECK(JSReceiver::CreateDataProperty(isolate, options, key, value_str,
|
kDontThrow)
|
.FromJust());
|
}
|
|
void CreateDataPropertyForOptions(Isolate* isolate, Handle<JSObject> options,
|
Handle<String> key, bool value) {
|
Handle<Object> value_obj = isolate->factory()->ToBoolean(value);
|
|
// This is a brand new JSObject that shouldn't already have the same
|
// key so this shouldn't fail.
|
CHECK(JSReceiver::CreateDataProperty(isolate, options, key, value_obj,
|
kDontThrow)
|
.FromJust());
|
}
|
|
} // anonymous namespace
|
|
// static
|
Handle<JSObject> JSCollator::ResolvedOptions(Isolate* isolate,
|
Handle<JSCollator> collator) {
|
Handle<JSObject> options =
|
isolate->factory()->NewJSObject(isolate->object_function());
|
|
JSCollator::Usage usage = collator->usage();
|
CreateDataPropertyForOptions(isolate, options,
|
isolate->factory()->usage_string(),
|
JSCollator::UsageToString(usage));
|
|
icu::Collator* icu_collator = collator->icu_collator()->raw();
|
CHECK_NOT_NULL(icu_collator);
|
|
UErrorCode status = U_ZERO_ERROR;
|
bool numeric =
|
icu_collator->getAttribute(UCOL_NUMERIC_COLLATION, status) == UCOL_ON;
|
CHECK(U_SUCCESS(status));
|
CreateDataPropertyForOptions(isolate, options,
|
isolate->factory()->numeric_string(), numeric);
|
|
const char* case_first = nullptr;
|
status = U_ZERO_ERROR;
|
switch (icu_collator->getAttribute(UCOL_CASE_FIRST, status)) {
|
case UCOL_LOWER_FIRST:
|
case_first = "lower";
|
break;
|
case UCOL_UPPER_FIRST:
|
case_first = "upper";
|
break;
|
default:
|
case_first = "false";
|
}
|
CHECK(U_SUCCESS(status));
|
CreateDataPropertyForOptions(
|
isolate, options, isolate->factory()->caseFirst_string(), case_first);
|
|
const char* sensitivity = nullptr;
|
status = U_ZERO_ERROR;
|
switch (icu_collator->getAttribute(UCOL_STRENGTH, status)) {
|
case UCOL_PRIMARY: {
|
CHECK(U_SUCCESS(status));
|
status = U_ZERO_ERROR;
|
// case level: true + s1 -> case, s1 -> base.
|
if (UCOL_ON == icu_collator->getAttribute(UCOL_CASE_LEVEL, status)) {
|
sensitivity = "case";
|
} else {
|
sensitivity = "base";
|
}
|
CHECK(U_SUCCESS(status));
|
break;
|
}
|
case UCOL_SECONDARY:
|
sensitivity = "accent";
|
break;
|
case UCOL_TERTIARY:
|
sensitivity = "variant";
|
break;
|
case UCOL_QUATERNARY:
|
// We shouldn't get quaternary and identical from ICU, but if we do
|
// put them into variant.
|
sensitivity = "variant";
|
break;
|
default:
|
sensitivity = "variant";
|
}
|
CHECK(U_SUCCESS(status));
|
CreateDataPropertyForOptions(
|
isolate, options, isolate->factory()->sensitivity_string(), sensitivity);
|
|
status = U_ZERO_ERROR;
|
bool ignore_punctuation = icu_collator->getAttribute(UCOL_ALTERNATE_HANDLING,
|
status) == UCOL_SHIFTED;
|
CHECK(U_SUCCESS(status));
|
CreateDataPropertyForOptions(isolate, options,
|
isolate->factory()->ignorePunctuation_string(),
|
ignore_punctuation);
|
|
status = U_ZERO_ERROR;
|
const char* collation;
|
std::unique_ptr<icu::StringEnumeration> collation_values(
|
icu_collator->getKeywordValues("co", status));
|
// Collation wasn't provided as a keyword to icu, use default.
|
if (status == U_ILLEGAL_ARGUMENT_ERROR) {
|
CreateDataPropertyForOptions(
|
isolate, options, isolate->factory()->collation_string(), "default");
|
} else {
|
CHECK(U_SUCCESS(status));
|
CHECK_NOT_NULL(collation_values.get());
|
|
int32_t length;
|
status = U_ZERO_ERROR;
|
collation = collation_values->next(&length, status);
|
CHECK(U_SUCCESS(status));
|
|
// There has to be at least one value.
|
CHECK_NOT_NULL(collation);
|
CreateDataPropertyForOptions(
|
isolate, options, isolate->factory()->collation_string(), collation);
|
|
status = U_ZERO_ERROR;
|
collation_values->reset(status);
|
CHECK(U_SUCCESS(status));
|
}
|
|
status = U_ZERO_ERROR;
|
icu::Locale icu_locale = icu_collator->getLocale(ULOC_VALID_LOCALE, status);
|
CHECK(U_SUCCESS(status));
|
|
char result[ULOC_FULLNAME_CAPACITY];
|
status = U_ZERO_ERROR;
|
uloc_toLanguageTag(icu_locale.getName(), result, ULOC_FULLNAME_CAPACITY,
|
FALSE, &status);
|
CHECK(U_SUCCESS(status));
|
|
CreateDataPropertyForOptions(isolate, options,
|
isolate->factory()->locale_string(), result);
|
|
return options;
|
}
|
|
namespace {
|
|
std::map<std::string, std::string> LookupUnicodeExtensions(
|
const icu::Locale& icu_locale, const std::set<std::string>& relevant_keys) {
|
std::map<std::string, std::string> extensions;
|
|
UErrorCode status = U_ZERO_ERROR;
|
std::unique_ptr<icu::StringEnumeration> keywords(
|
icu_locale.createKeywords(status));
|
if (U_FAILURE(status)) return extensions;
|
|
if (!keywords) return extensions;
|
char value[ULOC_FULLNAME_CAPACITY];
|
|
int32_t length;
|
status = U_ZERO_ERROR;
|
for (const char* keyword = keywords->next(&length, status);
|
keyword != nullptr; keyword = keywords->next(&length, status)) {
|
// Ignore failures in ICU and skip to the next keyword.
|
//
|
// This is fine.™
|
if (U_FAILURE(status)) {
|
status = U_ZERO_ERROR;
|
continue;
|
}
|
|
icu_locale.getKeywordValue(keyword, value, ULOC_FULLNAME_CAPACITY, status);
|
|
// Ignore failures in ICU and skip to the next keyword.
|
//
|
// This is fine.™
|
if (U_FAILURE(status)) {
|
status = U_ZERO_ERROR;
|
continue;
|
}
|
|
const char* bcp47_key = uloc_toUnicodeLocaleKey(keyword);
|
|
// Ignore keywords that we don't recognize - spec allows that.
|
if (bcp47_key && (relevant_keys.find(bcp47_key) != relevant_keys.end())) {
|
const char* bcp47_value = uloc_toUnicodeLocaleType(bcp47_key, value);
|
extensions.insert(
|
std::pair<std::string, std::string>(bcp47_key, bcp47_value));
|
}
|
}
|
|
return extensions;
|
}
|
|
void SetCaseFirstOption(icu::Collator* icu_collator, const char* value) {
|
CHECK_NOT_NULL(icu_collator);
|
CHECK_NOT_NULL(value);
|
UErrorCode status = U_ZERO_ERROR;
|
if (strcmp(value, "upper") == 0) {
|
icu_collator->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, status);
|
} else if (strcmp(value, "lower") == 0) {
|
icu_collator->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, status);
|
} else {
|
icu_collator->setAttribute(UCOL_CASE_FIRST, UCOL_OFF, status);
|
}
|
CHECK(U_SUCCESS(status));
|
}
|
|
} // anonymous namespace
|
|
// static
|
MaybeHandle<JSCollator> JSCollator::InitializeCollator(
|
Isolate* isolate, Handle<JSCollator> collator, Handle<Object> locales,
|
Handle<Object> options_obj) {
|
// 1. Let requestedLocales be ? CanonicalizeLocaleList(locales).
|
Handle<JSObject> requested_locales;
|
ASSIGN_RETURN_ON_EXCEPTION(isolate, requested_locales,
|
Intl::CanonicalizeLocaleListJS(isolate, locales),
|
JSCollator);
|
|
// 2. If options is undefined, then
|
if (options_obj->IsUndefined(isolate)) {
|
// 2. a. Let options be ObjectCreate(null).
|
options_obj = isolate->factory()->NewJSObjectWithNullProto();
|
} else {
|
// 3. Else
|
// 3. a. Let options be ? ToObject(options).
|
ASSIGN_RETURN_ON_EXCEPTION(
|
isolate, options_obj,
|
Object::ToObject(isolate, options_obj, "Intl.Collator"), JSCollator);
|
}
|
|
// At this point, options_obj can either be a JSObject or a JSProxy only.
|
Handle<JSReceiver> options = Handle<JSReceiver>::cast(options_obj);
|
|
// 4. Let usage be ? GetOption(options, "usage", "string", « "sort",
|
// "search" », "sort").
|
std::vector<const char*> values = {"sort", "search"};
|
std::unique_ptr<char[]> usage_str = nullptr;
|
JSCollator::Usage usage = JSCollator::Usage::SORT;
|
Maybe<bool> found_usage = Intl::GetStringOption(
|
isolate, options, "usage", values, "Intl.Collator", &usage_str);
|
MAYBE_RETURN(found_usage, MaybeHandle<JSCollator>());
|
|
if (found_usage.FromJust()) {
|
DCHECK_NOT_NULL(usage_str.get());
|
if (strcmp(usage_str.get(), "search") == 0) {
|
usage = JSCollator::Usage::SEARCH;
|
}
|
}
|
|
// 5. Set collator.[[Usage]] to usage.
|
collator->set_usage(usage);
|
|
// 6. If usage is "sort", then
|
// a. Let localeData be %Collator%.[[SortLocaleData]].
|
// 7. Else,
|
// a. Let localeData be %Collator%.[[SearchLocaleData]].
|
//
|
// The above two spec operations aren't required, the Intl spec is
|
// crazy. See https://github.com/tc39/ecma402/issues/256
|
|
// TODO(gsathya): This is currently done as part of the
|
// Intl::ResolveLocale call below. Fix this once resolveLocale is
|
// changed to not do the lookup.
|
//
|
// 9. Let matcher be ? GetOption(options, "localeMatcher", "string",
|
// « "lookup", "best fit" », "best fit").
|
// 10. Set opt.[[localeMatcher]] to matcher.
|
|
// 11. Let numeric be ? GetOption(options, "numeric", "boolean",
|
// undefined, undefined).
|
// 12. If numeric is not undefined, then
|
// a. Let numeric be ! ToString(numeric).
|
//
|
// Note: We omit the ToString(numeric) operation as it's not
|
// observable. Intl::GetBoolOption returns a Boolean and
|
// ToString(Boolean) is not side-effecting.
|
//
|
// 13. Set opt.[[kn]] to numeric.
|
bool numeric;
|
Maybe<bool> found_numeric = Intl::GetBoolOption(isolate, options, "numeric",
|
"Intl.Collator", &numeric);
|
MAYBE_RETURN(found_numeric, MaybeHandle<JSCollator>());
|
|
// 14. Let caseFirst be ? GetOption(options, "caseFirst", "string",
|
// « "upper", "lower", "false" », undefined).
|
// 15. Set opt.[[kf]] to caseFirst.
|
values = {"upper", "lower", "false"};
|
std::unique_ptr<char[]> case_first_str = nullptr;
|
Maybe<bool> found_case_first = Intl::GetStringOption(
|
isolate, options, "caseFirst", values, "Intl.Collator", &case_first_str);
|
MAYBE_RETURN(found_case_first, MaybeHandle<JSCollator>());
|
|
// The relevant unicode extensions accepted by Collator as specified here:
|
// https://tc39.github.io/ecma402/#sec-intl-collator-internal-slots
|
//
|
// 16. Let relevantExtensionKeys be %Collator%.[[RelevantExtensionKeys]].
|
std::set<std::string> relevant_extension_keys{"co", "kn", "kf"};
|
|
// We don't pass the relevant_extension_keys to ResolveLocale here
|
// as per the spec.
|
//
|
// In ResolveLocale, the spec makes sure we only pick and use the
|
// relevant extension keys and ignore any other keys. Also, in
|
// ResolveLocale, the spec makes sure that if a given key has both a
|
// value in the options object and an unicode extension value, then
|
// we pick the value provided in the options object.
|
// For example: in the case of `new Intl.Collator('en-u-kn-true', {
|
// numeric: false })` the value `false` is used for the `numeric`
|
// key.
|
//
|
// Instead of performing all this validation in ResolveLocale, we
|
// just perform it inline below. In the future when we port
|
// ResolveLocale to C++, we can make all these validations generic
|
// and move it ResolveLocale.
|
//
|
// 17. Let r be ResolveLocale(%Collator%.[[AvailableLocales]],
|
// requestedLocales, opt, %Collator%.[[RelevantExtensionKeys]],
|
// localeData).
|
// 18. Set collator.[[Locale]] to r.[[locale]].
|
Handle<JSObject> r;
|
ASSIGN_RETURN_ON_EXCEPTION(
|
isolate, r,
|
Intl::ResolveLocale(isolate, "collator", requested_locales, options),
|
JSCollator);
|
|
Handle<String> locale_with_extension_str =
|
isolate->factory()->NewStringFromStaticChars("localeWithExtension");
|
Handle<Object> locale_with_extension_obj =
|
JSObject::GetDataProperty(r, locale_with_extension_str);
|
|
// The locale_with_extension has to be a string. Either a user
|
// provided canonicalized string or the default locale.
|
CHECK(locale_with_extension_obj->IsString());
|
Handle<String> locale_with_extension =
|
Handle<String>::cast(locale_with_extension_obj);
|
|
icu::Locale icu_locale =
|
Intl::CreateICULocale(isolate, locale_with_extension);
|
DCHECK(!icu_locale.isBogus());
|
|
std::map<std::string, std::string> extensions =
|
LookupUnicodeExtensions(icu_locale, relevant_extension_keys);
|
|
// 19. Let collation be r.[[co]].
|
//
|
// r.[[co]] is already set as part of the icu::Locale creation as
|
// icu parses unicode extensions and sets the keywords.
|
//
|
// We need to sanitize the keywords based on certain ECMAScript rules.
|
//
|
// As per https://tc39.github.io/ecma402/#sec-intl-collator-internal-slots:
|
// The values "standard" and "search" must not be used as elements
|
// in any [[SortLocaleData]][locale].co and
|
// [[SearchLocaleData]][locale].co list.
|
auto co_extension_it = extensions.find("co");
|
if (co_extension_it != extensions.end()) {
|
const std::string& value = co_extension_it->second;
|
if ((value == "search") || (value == "standard")) {
|
UErrorCode status = U_ZERO_ERROR;
|
icu_locale.setKeywordValue("co", NULL, status);
|
CHECK(U_SUCCESS(status));
|
}
|
}
|
|
// 20. If collation is null, let collation be "default".
|
// 21. Set collator.[[Collation]] to collation.
|
//
|
// We don't store the collation value as per the above two steps
|
// here. The collation value can be looked up from icu::Collator on
|
// demand, as part of Intl.Collator.prototype.resolvedOptions.
|
|
UErrorCode status = U_ZERO_ERROR;
|
std::unique_ptr<icu::Collator> icu_collator(
|
icu::Collator::createInstance(icu_locale, status));
|
if (U_FAILURE(status) || icu_collator.get() == nullptr) {
|
status = U_ZERO_ERROR;
|
// Remove extensions and try again.
|
icu::Locale no_extension_locale(icu_locale.getBaseName());
|
icu_collator.reset(
|
icu::Collator::createInstance(no_extension_locale, status));
|
|
if (U_FAILURE(status) || icu_collator.get() == nullptr) {
|
FATAL("Failed to create ICU collator, are ICU data files missing?");
|
}
|
}
|
DCHECK(U_SUCCESS(status));
|
CHECK_NOT_NULL(icu_collator.get());
|
|
// 22. If relevantExtensionKeys contains "kn", then
|
// a. Set collator.[[Numeric]] to ! SameValue(r.[[kn]], "true").
|
//
|
// If the numeric value is passed in through the options object,
|
// then we use it. Otherwise, we check if the numeric value is
|
// passed in through the unicode extensions.
|
status = U_ZERO_ERROR;
|
if (found_numeric.FromJust()) {
|
icu_collator->setAttribute(UCOL_NUMERIC_COLLATION,
|
numeric ? UCOL_ON : UCOL_OFF, status);
|
CHECK(U_SUCCESS(status));
|
} else {
|
auto kn_extension_it = extensions.find("kn");
|
if (kn_extension_it != extensions.end()) {
|
const std::string& value = kn_extension_it->second;
|
|
numeric = (value == "true");
|
|
icu_collator->setAttribute(UCOL_NUMERIC_COLLATION,
|
numeric ? UCOL_ON : UCOL_OFF, status);
|
CHECK(U_SUCCESS(status));
|
}
|
}
|
|
// 23. If relevantExtensionKeys contains "kf", then
|
// a. Set collator.[[CaseFirst]] to r.[[kf]].
|
//
|
// If the caseFirst value is passed in through the options object,
|
// then we use it. Otherwise, we check if the caseFirst value is
|
// passed in through the unicode extensions.
|
if (found_case_first.FromJust()) {
|
const char* case_first_cstr = case_first_str.get();
|
SetCaseFirstOption(icu_collator.get(), case_first_cstr);
|
} else {
|
auto kf_extension_it = extensions.find("kf");
|
if (kf_extension_it != extensions.end()) {
|
const std::string& value = kf_extension_it->second;
|
SetCaseFirstOption(icu_collator.get(), value.c_str());
|
}
|
}
|
|
// Normalization is always on, by the spec. We are free to optimize
|
// if the strings are already normalized (but we don't have a way to tell
|
// that right now).
|
status = U_ZERO_ERROR;
|
icu_collator->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
|
CHECK(U_SUCCESS(status));
|
|
// 24. Let sensitivity be ? GetOption(options, "sensitivity",
|
// "string", « "base", "accent", "case", "variant" », undefined).
|
values = {"base", "accent", "case", "variant"};
|
std::unique_ptr<char[]> sensitivity_str = nullptr;
|
Maybe<bool> found_sensitivity =
|
Intl::GetStringOption(isolate, options, "sensitivity", values,
|
"Intl.Collator", &sensitivity_str);
|
MAYBE_RETURN(found_sensitivity, MaybeHandle<JSCollator>());
|
|
// 25. If sensitivity is undefined, then
|
if (!found_sensitivity.FromJust()) {
|
// 25. a. If usage is "sort", then
|
if (usage == Usage::SORT) {
|
// 25. a. i. Let sensitivity be "variant".
|
// 26. Set collator.[[Sensitivity]] to sensitivity.
|
icu_collator->setStrength(icu::Collator::TERTIARY);
|
}
|
} else {
|
DCHECK(found_sensitivity.FromJust());
|
const char* sensitivity_cstr = sensitivity_str.get();
|
DCHECK_NOT_NULL(sensitivity_cstr);
|
|
// 26. Set collator.[[Sensitivity]] to sensitivity.
|
if (strcmp(sensitivity_cstr, "base") == 0) {
|
icu_collator->setStrength(icu::Collator::PRIMARY);
|
} else if (strcmp(sensitivity_cstr, "accent") == 0) {
|
icu_collator->setStrength(icu::Collator::SECONDARY);
|
} else if (strcmp(sensitivity_cstr, "case") == 0) {
|
icu_collator->setStrength(icu::Collator::PRIMARY);
|
status = U_ZERO_ERROR;
|
icu_collator->setAttribute(UCOL_CASE_LEVEL, UCOL_ON, status);
|
CHECK(U_SUCCESS(status));
|
} else {
|
DCHECK_EQ(0, strcmp(sensitivity_cstr, "variant"));
|
icu_collator->setStrength(icu::Collator::TERTIARY);
|
}
|
}
|
|
// 27.Let ignorePunctuation be ? GetOption(options,
|
// "ignorePunctuation", "boolean", undefined, false).
|
bool ignore_punctuation;
|
Maybe<bool> found_ignore_punctuation =
|
Intl::GetBoolOption(isolate, options, "ignorePunctuation",
|
"Intl.Collator", &ignore_punctuation);
|
MAYBE_RETURN(found_ignore_punctuation, MaybeHandle<JSCollator>());
|
|
// 28. Set collator.[[IgnorePunctuation]] to ignorePunctuation.
|
if (found_ignore_punctuation.FromJust() && ignore_punctuation) {
|
status = U_ZERO_ERROR;
|
icu_collator->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status);
|
CHECK(U_SUCCESS(status));
|
}
|
|
Handle<Managed<icu::Collator>> managed_collator =
|
Managed<icu::Collator>::FromUniquePtr(isolate, 0,
|
std::move(icu_collator));
|
collator->set_icu_collator(*managed_collator);
|
|
// 29. Return collator.
|
return collator;
|
}
|
|
// static
|
const char* JSCollator::UsageToString(Usage usage) {
|
switch (usage) {
|
case Usage::SORT:
|
return "sort";
|
case Usage::SEARCH:
|
return "search";
|
case Usage::COUNT:
|
UNREACHABLE();
|
}
|
}
|
|
} // namespace internal
|
} // namespace v8
|