Skip to content
192 changes: 22 additions & 170 deletions fluent-langneg/src/locale.ts
Original file line number Diff line number Diff line change
@@ -1,177 +1,29 @@
const languageCodeRe = "([a-z]{2,3}|\\*)";
const scriptCodeRe = "(?:-([a-z]{4}|\\*))";
const regionCodeRe = "(?:-([a-z]{2}|\\*))";
const variantCodeRe = "(?:-(([0-9][a-z0-9]{3}|[a-z0-9]{5,8})|\\*))";
export class LocaleWrapper extends Intl.Locale {
variants?: string;

/**
* Regular expression splitting locale id into four pieces:
*
* Example: `en-Latn-US-macos`
*
* language: en
* script: Latn
* region: US
* variant: macos
*
* It can also accept a range `*` character on any position.
*/
const localeRe = new RegExp(
`^${languageCodeRe}${scriptCodeRe}?${regionCodeRe}?${variantCodeRe}?$`,
"i"
);

export class Locale {
isWellFormed: boolean;
language?: string;
script?: string;
region?: string;
variant?: string;

/**
* Parses a locale id using the localeRe into an array with four elements.
*
* If the second argument `range` is set to true, it places range `*` char
* in place of any missing piece.
*
* It also allows skipping the script section of the id, so `en-US` is
* properly parsed as `en-*-US-*`.
*/
constructor(locale: string) {
const result = localeRe.exec(locale.replace(/_/g, "-"));
if (!result) {
this.isWellFormed = false;
return;
}

let [, language, script, region, variant] = result;

if (language) {
this.language = language.toLowerCase();
}
if (script) {
this.script = script[0].toUpperCase() + script.slice(1);
}
if (region) {
this.region = region.toUpperCase();
}
this.variant = variant;
this.isWellFormed = true;
}

isEqual(other: Locale): boolean {
return (
this.language === other.language &&
this.script === other.script &&
this.region === other.region &&
this.variant === other.variant
);
}

matches(other: Locale, thisRange = false, otherRange = false): boolean {
return (
(this.language === other.language ||
(thisRange && this.language === undefined) ||
(otherRange && other.language === undefined)) &&
(this.script === other.script ||
(thisRange && this.script === undefined) ||
(otherRange && other.script === undefined)) &&
(this.region === other.region ||
(thisRange && this.region === undefined) ||
(otherRange && other.region === undefined)) &&
(this.variant === other.variant ||
(thisRange && this.variant === undefined) ||
(otherRange && other.variant === undefined))
);
}

toString(): string {
return [this.language, this.script, this.region, this.variant]
.filter(part => part !== undefined)
.join("-");
}

clearVariants(): void {
this.variant = undefined;
}

clearRegion(): void {
this.region = undefined;
}

addLikelySubtags(): boolean {
const newLocale = getLikelySubtagsMin(this.toString().toLowerCase());
if (newLocale) {
this.language = newLocale.language;
this.script = newLocale.script;
this.region = newLocale.region;
this.variant = newLocale.variant;
return true;
let tag = locale
.replace(/_/g, "-")
.replace(/^\*/, "und")
.replace(/-\*/g, "");

super(tag);

if (!("variants" in this)) {
// Available on Firefox 141 & later
let lsrTagLength = this.language.length;
if (this.script) lsrTagLength += this.script.length + 1;
if (this.region) lsrTagLength += this.region.length + 1;

if (tag.length > lsrTagLength) {
let unicodeExtStart: number | undefined = tag.search(/-[a-zA-Z]-/);
if (unicodeExtStart === -1) unicodeExtStart = undefined;
this.variants = tag.substring(lsrTagLength + 1, unicodeExtStart);
}
}
return false;
}
}

/**
* Below is a manually a list of likely subtags corresponding to Unicode
* CLDR likelySubtags list.
* This list is curated by the maintainers of Project Fluent and is
* intended to be used in place of the full likelySubtags list in use cases
* where full list cannot be (for example, due to the size).
*
* This version of the list is based on CLDR 30.0.3.
*/
const likelySubtagsMin: Record<string, string> = {
ar: "ar-arab-eg",
"az-arab": "az-arab-ir",
"az-ir": "az-arab-ir",
be: "be-cyrl-by",
da: "da-latn-dk",
el: "el-grek-gr",
en: "en-latn-us",
fa: "fa-arab-ir",
ja: "ja-jpan-jp",
ko: "ko-kore-kr",
pt: "pt-latn-br",
sr: "sr-cyrl-rs",
"sr-ru": "sr-latn-ru",
sv: "sv-latn-se",
ta: "ta-taml-in",
uk: "uk-cyrl-ua",
zh: "zh-hans-cn",
"zh-hant": "zh-hant-tw",
"zh-hk": "zh-hant-hk",
"zh-mo": "zh-hant-mo",
"zh-tw": "zh-hant-tw",
"zh-gb": "zh-hant-gb",
"zh-us": "zh-hant-us",
};

const regionMatchingLangs = [
"az",
"bg",
"cs",
"de",
"es",
"fi",
"fr",
"hu",
"it",
"lt",
"lv",
"nl",
"pl",
"ro",
"ru",
];

function getLikelySubtagsMin(loc: string): Locale | null {
if (Object.prototype.hasOwnProperty.call(likelySubtagsMin, loc)) {
return new Locale(likelySubtagsMin[loc]);
}
const locale = new Locale(loc);
if (locale.language && regionMatchingLangs.includes(locale.language)) {
locale.region = locale.language.toUpperCase();
return locale;
get language(): string {
return super.language ?? "und";
}
return null;
}
Loading