Skip to content

Commit

Permalink
ICU-22547 fix addLikelySubtags for 4 chars script code
Browse files Browse the repository at this point in the history
Also fix ICU-22546 to correct the comments in the API doc
and add additional unit tests
  • Loading branch information
FrankYFTang committed Oct 28, 2023
1 parent e04f442 commit 92eeb45
Show file tree
Hide file tree
Showing 7 changed files with 127 additions and 18 deletions.
9 changes: 8 additions & 1 deletion icu4c/source/common/loclikely.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -467,7 +467,14 @@ _uloc_addLikelySubtags(const char* localeID,
goto error;
}
if (langLength > 3) {
goto error;
if (langLength == 4 && scriptLength == 0) {
langLength = 0;
scriptLength = 4;
uprv_memcpy(script, lang, 4);
lang[0] = '\0';
} else {
goto error;
}
}

/* Find the length of the trailing portion. */
Expand Down
10 changes: 5 additions & 5 deletions icu4c/source/common/unicode/locid.h
Original file line number Diff line number Diff line change
Expand Up @@ -518,20 +518,20 @@ class U_COMMON_API Locale : public UObject {
* If this Locale is already in the maximal form, or not valid, or there is
* no data available for maximization, the Locale will be unchanged.
*
* For example, "und-Zzzz" cannot be maximized, since there is no
* For example, "sh" cannot be maximized, since there is no
* reasonable maximization.
*
* Examples:
*
* "und_Zzzz" maximizes to "en_Latn_US"
*
* "en" maximizes to "en_Latn_US"
*
* "de" maximizes to "de_Latn_US"
* "de" maximizes to "de_Latn_DE"
*
* "sr" maximizes to "sr_Cyrl_RS"
*
* "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.)
*
* "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.)
* "zh_Hani" maximizes to "zh_Hani_CN"
*
* @param status error information if maximizing this Locale failed.
* If this Locale is not well-formed, the error code is
Expand Down
9 changes: 5 additions & 4 deletions icu4c/source/common/unicode/uloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -1158,19 +1158,20 @@ uloc_getLocaleForLCID(uint32_t hostID, char *locale, int32_t localeCapacity,
*
* If localeID is already in the maximal form, or there is no data available
* for maximization, it will be copied to the output buffer. For example,
* "und-Zzzz" cannot be maximized, since there is no reasonable maximization.
* "sh" cannot be maximized, since there is no reasonable maximization.
*
* Examples:
*
* "und_Zzzz" maximizes to "en_Latn_US"
*
* "en" maximizes to "en_Latn_US"
*
* "de" maximizes to "de_Latn_US"
* "de" maximizes to "de_Latn_DE"
*
* "sr" maximizes to "sr_Cyrl_RS"
*
* "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.)
* "zh_Hani" maximizes to "zh_Hani_CN"
*
* "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.)
*
* @param localeID The locale to maximize
* @param maximizedLocaleID The maximized locale
Expand Down
38 changes: 35 additions & 3 deletions icu4c/source/test/cintltst/cloctst.c
Original file line number Diff line number Diff line change
Expand Up @@ -3782,6 +3782,38 @@ const char* const basic_maximize_data[][2] = {
}, {
"_DE@em=emoji",
"de_Latn_DE@em=emoji"
}, {
// ICU-22547
// unicode_language_id = "root" |
// (unicode_language_subtag (sep unicode_script_subtag)? | unicode_script_subtag)
// (sep unicode_region_subtag)? (sep unicode_variant_subtag)* ;
// so "aaaa" is a well-formed unicode_language_id
"aaaa",
"aaaa",
}, {
// ICU-22546
"und-Zzzz",
"en_Latn_US" // If change, please also update common/unicode/uloc.h
}, {
// ICU-22546
"en",
"en_Latn_US" // If change, please also update common/unicode/uloc.h
}, {
// ICU-22546
"de",
"de_Latn_DE" // If change, please also update common/unicode/uloc.h
}, {
// ICU-22546
"sr",
"sr_Cyrl_RS" // If change, please also update common/unicode/uloc.h
}, {
// ICU-22546
"sh",
"sh" // If change, please also update common/unicode/uloc.h
}, {
// ICU-22546
"zh_Hani",
"zh_Hani_CN" // If change, please also update common/unicode/uloc.h
}
};

Expand Down Expand Up @@ -6013,7 +6045,7 @@ static void TestLikelySubtags()
}
}
else if (uprv_stricmp(maximal, buffer) != 0) {
log_err(" maximal doesn't match expected %s in uloc_addLikelySubtags(), minimal \"%s\" = %s\n", maximal, minimal, buffer);
log_err("1 maximal doesn't match expected %s in uloc_addLikelySubtags(), minimal \"%s\" = %s\n", maximal, minimal, buffer);
}
}

Expand Down Expand Up @@ -6066,7 +6098,7 @@ static void TestLikelySubtags()
}
}
else if (uprv_stricmp(maximal, buffer) != 0) {
log_err(" maximal doesn't match expected \"%s\" in uloc_addLikelySubtags(), minimal \"%s\" = \"%s\"\n", maximal, minimal, buffer);
log_err("2 maximal doesn't match expected \"%s\" in uloc_addLikelySubtags(), minimal \"%s\" = \"%s\"\n", maximal, minimal, buffer);
}
}

Expand Down Expand Up @@ -6128,7 +6160,7 @@ static void TestLikelySubtags()
}
else if (status == U_BUFFER_OVERFLOW_ERROR || status == U_STRING_NOT_TERMINATED_WARNING) {
if (uprv_strnicmp(maximal, buffer, bufferSize) != 0) {
log_err(" maximal doesn't match expected %s in uloc_addLikelySubtags(), minimal \"%s\" = %*s\n",
log_err("3 maximal doesn't match expected %s in uloc_addLikelySubtags(), minimal \"%s\" = %*s\n",
maximal, minimal, (int)sizeof(buffer), buffer);
}
}
Expand Down
39 changes: 39 additions & 0 deletions icu4c/source/test/intltest/loctest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3842,6 +3842,45 @@ LocaleTest::TestAddLikelyAndMinimizeSubtags() {
"und_US",
"en_Latn_US",
"en"
}, {
// ICU-22547
// unicode_language_id = "root" |
// (unicode_language_subtag (sep unicode_script_subtag)? | unicode_script_subtag)
// (sep unicode_region_subtag)? (sep unicode_variant_subtag)* ;
// so "aaaa" is a well-formed unicode_language_id
"aaaa",
"aaaa",
"aaaa",
}, {
// ICU-22546
"und-Zzzz",
"en_Latn_US", // If change, please also update common/unicode/locid.h
"en"
}, {
// ICU-22546
"en",
"en_Latn_US", // If change, please also update common/unicode/locid.h
"en"
}, {
// ICU-22546
"de",
"de_Latn_DE", // If change, please also update common/unicode/locid.h
"de"
}, {
// ICU-22546
"sr",
"sr_Cyrl_RS", // If change, please also update common/unicode/locid.h
"sr"
}, {
// ICU-22546
"sh",
"sh",// If change, please also update common/unicode/locid.h
"sh"
}, {
// ICU-22546
"zh_Hani",
"zh_Hani_CN", // If change, please also update common/unicode/locid.h
"zh_Hani"
}
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1917,6 +1917,38 @@ public void TestAddLikelySubtags() {
}, {
"zzz",
""
}, {
// ICU-22547
// unicode_language_id = "root" |
// (unicode_language_subtag (sep unicode_script_subtag)? | unicode_script_subtag)
// (sep unicode_region_subtag)? (sep unicode_variant_subtag)* ;
// so "aaaa" is a well-formed unicode_language_id
"aaaa",
"aaaa",
}, {
// ICU-22546
"und-Zzzz",
"en_Latn_US" // If change, please also update ULocale.java
}, {
// ICU-22546
"en",
"en_Latn_US" // If change, please also update ULocale.java
}, {
// ICU-22546
"de",
"de_Latn_DE" // If change, please also update ULocale.java
}, {
// ICU-22546
"sr",
"sr_Cyrl_RS" // If change, please also update ULocale.java
}, {
// ICU-22546
"sh",
"sh" // If change, please also update ULocale.java
}, {
// ICU-22546
"zh_Hani",
"zh_Hani_CN" // If change, please also update ULocale.java
}
};

Expand Down
8 changes: 3 additions & 5 deletions icu4j/main/core/src/main/java/com/ibm/icu/util/ULocale.java
Original file line number Diff line number Diff line change
Expand Up @@ -2694,20 +2694,18 @@ public static ULocale acceptLanguage(ULocale[] acceptLanguageList, boolean[] fal
*
* If the provided ULocale instance is already in the maximal form, or there is no
* data available available for maximization, it will be returned. For example,
* "und-Zzzz" cannot be maximized, since there is no reasonable maximization.
* "sh" cannot be maximized, since there is no reasonable maximization.
* Otherwise, a new ULocale instance with the maximal form is returned.
*
* Examples:
*
* "en" maximizes to "en_Latn_US"
*
* "de" maximizes to "de_Latn_US"
* "de" maximizes to "de_Latn_DE"
*
* "sr" maximizes to "sr_Cyrl_RS"
*
* "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.)
*
* "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.)
* "zh_Hani" maximizes to "zh_Hani_CN"
*
* @param loc The ULocale to maximize
* @return The maximized ULocale instance.
Expand Down

0 comments on commit 92eeb45

Please sign in to comment.