Skip to content

Commit

Permalink
ICU-22493 Implement First Day Override in Calendar
Browse files Browse the repository at this point in the history
  • Loading branch information
FrankYFTang committed Nov 29, 2023
1 parent 5d3e84a commit 276d3dc
Show file tree
Hide file tree
Showing 7 changed files with 222 additions and 44 deletions.
66 changes: 38 additions & 28 deletions icu4c/source/common/loclikely.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -789,55 +789,65 @@ U_NAMESPACE_END
// The following must at least allow for rg key value (6) plus terminator (1).
#define ULOC_RG_BUFLEN 8

U_CAPI int32_t U_EXPORT2
ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion,
char *region, int32_t regionCapacity, UErrorCode* status) {
if (U_FAILURE(*status)) {
return 0;
}
char rgBuf[ULOC_RG_BUFLEN];
UErrorCode rgStatus = U_ZERO_ERROR;
namespace {
int GetRegionFromKey(const char *localeID, const char* key, char* buf) {
UErrorCode status = U_ZERO_ERROR;

// First check for rg keyword value
icu::CharString rg;
{
icu::CharStringByteSink sink(&rg);
ulocimp_getKeywordValue(localeID, "rg", sink, &rgStatus);
ulocimp_getKeywordValue(localeID, key, sink, &status);
}
int32_t rgLen = rg.length();
if (U_FAILURE(rgStatus) || rgLen < 3 || rgLen > 7) {
rgLen = 0;
int32_t len = rg.length();
if (U_FAILURE(status) || len < 3 || len > 7) {
len = 0;
} else {
// chop off the subdivision code (which will generally be "zzzz" anyway)
const char* const data = rg.data();
if (uprv_isASCIILetter(data[0])) {
rgLen = 2;
rgBuf[0] = uprv_toupper(data[0]);
rgBuf[1] = uprv_toupper(data[1]);
len = 2;
buf[0] = uprv_toupper(data[0]);
buf[1] = uprv_toupper(data[1]);
} else {
// assume three-digit region code
rgLen = 3;
uprv_memcpy(rgBuf, data, rgLen);
len = 3;
uprv_memcpy(buf, data, len);
}
}
return len;
}
} // namespace

U_CAPI int32_t U_EXPORT2
ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion,
char *region, int32_t regionCapacity, UErrorCode* status) {
if (U_FAILURE(*status)) {
return 0;
}
char rgBuf[ULOC_RG_BUFLEN];
int32_t rgLen = GetRegionFromKey(localeID, "rg", rgBuf);
if (rgLen == 0) {
// No valid rg keyword value, try for unicode_region_subtag
rgLen = uloc_getCountry(localeID, rgBuf, ULOC_RG_BUFLEN, status);
if (U_FAILURE(*status)) {
rgLen = 0;
} else if (rgLen == 0 && inferRegion) {
// no unicode_region_subtag but inferRegion true, try likely subtags
rgStatus = U_ZERO_ERROR;
icu::CharString locBuf;
{
icu::CharStringByteSink sink(&locBuf);
ulocimp_addLikelySubtags(localeID, sink, &rgStatus);
}
if (U_SUCCESS(rgStatus)) {
rgLen = uloc_getCountry(locBuf.data(), rgBuf, ULOC_RG_BUFLEN, status);
if (U_FAILURE(*status)) {
rgLen = 0;
// Second check for sd keyword value
rgLen = GetRegionFromKey(localeID, "sd", rgBuf);
if (rgLen == 0) {
// no unicode_region_subtag but inferRegion true, try likely subtags
UErrorCode rgStatus = U_ZERO_ERROR;
icu::CharString locBuf;
{
icu::CharStringByteSink sink(&locBuf);
ulocimp_addLikelySubtags(localeID, sink, &rgStatus);
}
if (U_SUCCESS(rgStatus)) {
rgLen = uloc_getCountry(locBuf.data(), rgBuf, ULOC_RG_BUFLEN, status);
if (U_FAILURE(*status)) {
rgLen = 0;
}
}
}
}
Expand Down
9 changes: 5 additions & 4 deletions icu4c/source/i18n/iso8601cal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,12 @@ UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ISO8601Calendar)
ISO8601Calendar::ISO8601Calendar(const Locale& aLocale, UErrorCode& success)
: GregorianCalendar(aLocale, success)
{
UErrorCode fwStatus = U_ZERO_ERROR;
int32_t fwLength = aLocale.getKeywordValue("fw", nullptr, 0, fwStatus);
// Do not set first day of week for iso8601 to Monday if we have fw keyword
UErrorCode tempStatus = U_ZERO_ERROR;
int32_t length = aLocale.getKeywordValue("fw", nullptr, 0, tempStatus) +
aLocale.getKeywordValue("rg", nullptr, 0, tempStatus);
// Do not set first day of week for iso8601 to Monday if we have fw or rg keywords
// and let the value set by the Calendar constructor to take care of it.
if (U_SUCCESS(fwStatus) && fwLength == 0) {
if (U_SUCCESS(tempStatus) && length == 0) {
setFirstDayOfWeek(UCAL_MONDAY);
}
setMinimalDaysInFirstWeek(4);
Expand Down
76 changes: 76 additions & 0 deletions icu4c/source/test/intltest/caltest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,7 @@ void CalendarTest::runIndexedTest( int32_t index, UBool exec, const char* &name,
TESTCASE_AUTO(TestFWWithISO8601);
TESTCASE_AUTO(TestDangiOverflowIsLeapMonthBetween22507);
TESTCASE_AUTO(TestRollWeekOfYear);
TESTCASE_AUTO(TestFirstDayOfWeek);

TESTCASE_AUTO_END;
}
Expand Down Expand Up @@ -5552,6 +5553,81 @@ void CalendarTest::TestRollWeekOfYear() {
U_ASSERT(U_SUCCESS(status));
cal->roll(UCAL_WEEK_OF_YEAR, 1, status);
}

void CalendarTest::verifyFirstDayOfWeek(const char* locale, UCalendarDaysOfWeek expected) {
UErrorCode status = U_ZERO_ERROR;
Locale l = Locale::forLanguageTag(locale, status);
U_ASSERT(U_SUCCESS(status));
LocalPointer<Calendar> cal(Calendar::createInstance(l, status), status);
U_ASSERT(U_SUCCESS(status));
assertEquals(locale,
expected, cal->getFirstDayOfWeek(status));
U_ASSERT(U_SUCCESS(status));
}

/**
* Test "First Day Overrides" behavior
* https://unicode.org/reports/tr35/tr35-dates.html#first-day-overrides
* And data in <firstDay> of
* https://github.com/unicode-org/cldr/blob/main/common/supplemental/supplementalData.xml
*
* Examples of region for First Day of a week
* Friday: MV
* Saturday: AE AF
* Sunday: US JP
* Monday: GB
*/
void CalendarTest::TestFirstDayOfWeek() {
// Test -u-fw- value
verifyFirstDayOfWeek("en-MV-u-ca-iso8601-fw-sun-rg-mvzzzz-sd-usca", UCAL_SUNDAY);
verifyFirstDayOfWeek("en-MV-u-ca-iso8601-fw-mon-rg-mvzzzz-sd-usca", UCAL_MONDAY);
verifyFirstDayOfWeek("en-MV-u-ca-iso8601-fw-tue-rg-mvzzzz-sd-usca", UCAL_TUESDAY);
verifyFirstDayOfWeek("en-MV-u-ca-iso8601-fw-wed-rg-mvzzzz-sd-usca", UCAL_WEDNESDAY);
verifyFirstDayOfWeek("en-MV-u-ca-iso8601-fw-thu-rg-mvzzzz-sd-usca", UCAL_THURSDAY);
verifyFirstDayOfWeek("en-AE-u-ca-iso8601-fw-fri-rg-aezzzz-sd-usca", UCAL_FRIDAY);
verifyFirstDayOfWeek("en-MV-u-ca-iso8601-fw-sat-rg-mvzzzz-sd-usca", UCAL_SATURDAY);

// Test -u-rg- value
verifyFirstDayOfWeek("en-MV-u-ca-iso8601-rg-mvzzzz-sd-usca", UCAL_FRIDAY);
verifyFirstDayOfWeek("en-MV-u-ca-iso8601-rg-aezzzz-sd-usca", UCAL_SATURDAY);
verifyFirstDayOfWeek("en-MV-u-ca-iso8601-rg-uszzzz-sd-usca", UCAL_SUNDAY);
verifyFirstDayOfWeek("en-MV-u-ca-iso8601-rg-gbzzzz-sd-usca", UCAL_MONDAY);

// Test -u-ca-iso8601
verifyFirstDayOfWeek("en-MV-u-ca-iso8601-sd-mv00", UCAL_MONDAY);
verifyFirstDayOfWeek("en-AE-u-ca-iso8601-sd-aeaj", UCAL_MONDAY);
verifyFirstDayOfWeek("en-US-u-ca-iso8601-sd-usca", UCAL_MONDAY);

// Test Region Tags only
verifyFirstDayOfWeek("en-MV", UCAL_FRIDAY);
verifyFirstDayOfWeek("en-AE", UCAL_SATURDAY);
verifyFirstDayOfWeek("en-US", UCAL_SUNDAY);
verifyFirstDayOfWeek("dv-GB", UCAL_MONDAY);

// Test -u-sd-
verifyFirstDayOfWeek("en-u-sd-mv00", UCAL_FRIDAY);
verifyFirstDayOfWeek("en-u-sd-aeaj", UCAL_SATURDAY);
verifyFirstDayOfWeek("en-u-sd-usca", UCAL_SUNDAY);
verifyFirstDayOfWeek("dv-u-sd-gbsct", UCAL_MONDAY);

// Test Add Likely Subtags algorithm produces a region
// dv => dv_Thaa_MV => Friday
verifyFirstDayOfWeek("dv", UCAL_FRIDAY);
// und_Thaa => dv_Thaa_MV => Friday
verifyFirstDayOfWeek("und-Thaa", UCAL_FRIDAY);

// ssh => ssh_Arab_AE => Saturday
verifyFirstDayOfWeek("ssh", UCAL_SATURDAY);
// wbl_Arab => wbl_Arab_AF => Saturday
verifyFirstDayOfWeek("wbl-Arab", UCAL_SATURDAY);

// en => en_Latn_US => Sunday
verifyFirstDayOfWeek("en", UCAL_SUNDAY);
// und_Hira => ja_Hira_JP => Sunday
verifyFirstDayOfWeek("und-Hira", UCAL_SUNDAY);

verifyFirstDayOfWeek("zxx", UCAL_MONDAY);
}
#endif /* #if !UCONFIG_NO_FORMATTING */

//eof
3 changes: 3 additions & 0 deletions icu4c/source/test/intltest/caltest.h
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,9 @@ class CalendarTest: public CalendarTimeZoneTest {
void TestFWWithISO8601();
void TestRollWeekOfYear();

void verifyFirstDayOfWeek(const char* locale, UCalendarDaysOfWeek expected);
void TestFirstDayOfWeek();

void RunChineseCalendarInTemporalLeapYearTest(Calendar* cal);
void RunIslamicCalendarInTemporalLeapYearTest(Calendar* cal);
void Run366DaysIsLeapYearCalendarInTemporalLeapYearTest(Calendar* cal);
Expand Down
11 changes: 7 additions & 4 deletions icu4j/main/core/src/main/java/com/ibm/icu/util/Calendar.java
Original file line number Diff line number Diff line change
Expand Up @@ -1856,10 +1856,13 @@ private static Calendar createInstance(ULocale locale) {
case ISO8601:
// Only differs week numbering rule from Gregorian
cal = new GregorianCalendar(zone, locale);
String type = locale.getUnicodeLocaleType("fw");
// Only set fw to Monday for ISO8601 if there aer no fw keyword.
// If there is a fw keyword, the Calendar constructor already set it to the fw value.
if (locale.getKeywordValue("fw") == null) {
// Based on UTS35 "First Day Overrides"
// https://unicode.org/reports/tr35/tr35-dates.html#first-day-overrides
// Only set fw to Monday for ISO8601 if there are no fw nor rg keywords.
// If there is a fw or rg keywords, the Calendar constructor already set it
// to the fw value or based on the rg value.
if (locale.getUnicodeLocaleType("fw") == null &&
locale.getUnicodeLocaleType("rg") == null) {
cal.setFirstDayOfWeek(MONDAY);
}
cal.setMinimalDaysInFirstWeek(4);
Expand Down
31 changes: 23 additions & 8 deletions icu4j/main/core/src/main/java/com/ibm/icu/util/ULocale.java
Original file line number Diff line number Diff line change
Expand Up @@ -958,6 +958,22 @@ public static String getCountry(String localeID) {
return new LocaleIDParser(localeID).getCountry();
}

/**
* Get region code from a key in locale or null.
*/
private static String getRegionFromKey(ULocale locale, String key) {
String region = locale.getKeywordValue(key);
if (region != null && region.length() >= 3 && region.length() <= 7) {
if (Character.isLetter(region.charAt(0))) {
return AsciiUtil.toUpperString(region.substring(0, 2));
} else {
// assume three-digit region code
return region.substring(0, 3);
}
}
return null;
}

/**
* {@icu} Get the region to use for supplemental data lookup.
* Uses
Expand All @@ -981,17 +997,16 @@ public static String getCountry(String localeID) {
@Deprecated
public static String getRegionForSupplementalData(
ULocale locale, boolean inferRegion) {
String region = locale.getKeywordValue("rg");
if (region != null && region.length() >= 3 && region.length() <= 7) {
if (Character.isLetter(region.charAt(0))) {
return AsciiUtil.toUpperString(region.substring(0, 2));
} else {
// assume three-digit region code
return region.substring(0, 3);
}
String region = getRegionFromKey(locale, "rg");
if (region != null) {
return region;
}
region = locale.getCountry();
if (region.length() == 0 && inferRegion) {
region = getRegionFromKey(locale, "sd");
if (region != null) {
return region;
}
ULocale maximized = addLikelySubtags(locale);
region = maximized.getCountry();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,76 @@ void quasiGregorianTest(Calendar cal, int[] data) {
}
}

private void verifyFirstDayOfWeek(String l, int weekday) {
assertEquals(l, weekday,
Calendar.getInstance(Locale.forLanguageTag(l)).getFirstDayOfWeek());
}
/**
* Test "First Day Overrides" behavior
* https://unicode.org/reports/tr35/tr35-dates.html#first-day-overrides
* And data in <firstDay> of
* https://github.com/unicode-org/cldr/blob/main/common/supplemental/supplementalData.xml
*
* Examples of region for First Day of a week
* Friday: MV
* Saturday: AE AF
* Sunday: US JP
* Monday: GB
*/
@Test
public void TestFirstDayOfWeek() {
String l;
// Test -u-fw- value
verifyFirstDayOfWeek("en-MV-u-ca-iso8601-fw-sun-rg-mvzzzz-sd-usca", Calendar.SUNDAY);
verifyFirstDayOfWeek("en-MV-u-ca-iso8601-fw-mon-rg-mvzzzz-sd-usca", Calendar.MONDAY);
verifyFirstDayOfWeek("en-MV-u-ca-iso8601-fw-tue-rg-mvzzzz-sd-usca", Calendar.TUESDAY);
verifyFirstDayOfWeek("en-MV-u-ca-iso8601-fw-wed-rg-mvzzzz-sd-usca", Calendar.WEDNESDAY);
verifyFirstDayOfWeek("en-MV-u-ca-iso8601-fw-thu-rg-mvzzzz-sd-usca", Calendar.THURSDAY);
verifyFirstDayOfWeek("en-AE-u-ca-iso8601-fw-fri-rg-aezzzz-sd-usca", Calendar.FRIDAY);
verifyFirstDayOfWeek("en-MV-u-ca-iso8601-fw-sat-rg-mvzzzz-sd-usca", Calendar.SATURDAY);

// Test -u-rg- value
verifyFirstDayOfWeek("en-MV-u-ca-iso8601-rg-mvzzzz-sd-usca", Calendar.FRIDAY);
verifyFirstDayOfWeek("en-MV-u-ca-iso8601-rg-aezzzz-sd-usca", Calendar.SATURDAY);
verifyFirstDayOfWeek("en-MV-u-ca-iso8601-rg-uszzzz-sd-usca", Calendar.SUNDAY);
verifyFirstDayOfWeek("en-MV-u-ca-iso8601-rg-gbzzzz-sd-usca", Calendar.MONDAY);

// Test -u-ca-iso8601
verifyFirstDayOfWeek("en-MV-u-ca-iso8601-sd-mv00", Calendar.MONDAY);
verifyFirstDayOfWeek("en-AE-u-ca-iso8601-sd-aeaj", Calendar.MONDAY);
verifyFirstDayOfWeek("en-US-u-ca-iso8601-sd-usca", Calendar.MONDAY);

// Test Region Tags only
verifyFirstDayOfWeek("en-MV", Calendar.FRIDAY);
verifyFirstDayOfWeek("en-AE", Calendar.SATURDAY);
verifyFirstDayOfWeek("en-US", Calendar.SUNDAY);
verifyFirstDayOfWeek("dv-GB", Calendar.MONDAY);

// Test -u-sd-
//verifyFirstDayOfWeek("en-u-sd-mv00", Calendar.FRIDAY);
// verifyFirstDayOfWeek("en-u-sd-aeaj", Calendar.SATURDAY);
// verifyFirstDayOfWeek("en-u-sd-usca", Calendar.SUNDAY);
// verifyFirstDayOfWeek("dv-u-sd-gbsct", Calendar.MONDAY);

// Test Add Likely Subtags algorithm produces a region
// dv => dv_Thaa_MV => Friday
verifyFirstDayOfWeek("dv", Calendar.FRIDAY);
// und_Thaa => dv_Thaa_MV => Friday
verifyFirstDayOfWeek("und-Thaa", Calendar.FRIDAY);

// ssh => ssh_Arab_AE => Saturday
verifyFirstDayOfWeek("ssh", Calendar.SATURDAY);
// wbl_Arab => wbl_Arab_AF => Saturday
verifyFirstDayOfWeek("wbl-Arab", Calendar.SATURDAY);

// en => en_Latn_US => Sunday
verifyFirstDayOfWeek("en", Calendar.SUNDAY);
// und_Hira => ja_Hira_JP => Sunday
verifyFirstDayOfWeek("und-Hira", Calendar.SUNDAY);

verifyFirstDayOfWeek("zxx", Calendar.MONDAY);
}

/**
* Verify that BuddhistCalendar shifts years to Buddhist Era but otherwise
* behaves like GregorianCalendar.
Expand Down

0 comments on commit 276d3dc

Please sign in to comment.