From 3d1dee683743c4578ced479c10b1fbe25aeacc9a Mon Sep 17 00:00:00 2001 From: Mohamed Akram Date: Tue, 3 Oct 2023 00:02:53 +0400 Subject: [PATCH] ICU-22528 Improve date formatting performance --- icu4c/source/common/unistr.cpp | 75 +++++++++------ icu4c/source/i18n/calendar.cpp | 4 +- icu4c/source/i18n/datefmt.cpp | 48 +++++++--- icu4c/source/i18n/dtfmtsym.cpp | 34 ++++++- .../source/i18n/formatted_string_builder.cpp | 12 ++- icu4c/source/i18n/gregoimp.cpp | 15 ++- icu4c/source/i18n/gregoimp.h | 32 +++++-- icu4c/source/i18n/smpdtfmt.cpp | 92 ++++++++++--------- icu4c/source/i18n/unicode/calendar.h | 2 +- icu4c/source/i18n/unicode/formattednumber.h | 4 + .../i18n/unicode/simplenumberformatter.h | 5 + 11 files changed, 218 insertions(+), 105 deletions(-) diff --git a/icu4c/source/common/unistr.cpp b/icu4c/source/common/unistr.cpp index 04f01cfa16cd..b722b45ff6e5 100644 --- a/icu4c/source/common/unistr.cpp +++ b/icu4c/source/common/unistr.cpp @@ -1607,41 +1607,58 @@ UnicodeString::doAppend(const char16_t *srcChars, int32_t srcStart, int32_t srcL int32_t oldLength = length(); int32_t newLength; - if (uprv_add32_overflow(oldLength, srcLength, &newLength)) { - setToBogus(); - return *this; - } - // Check for append onto ourself - const char16_t* oldArray = getArrayStart(); - if (isBufferWritable() && - oldArray < srcChars + srcLength && - srcChars < oldArray + oldLength) { - // Copy into a new UnicodeString and start over - UnicodeString copy(srcChars, srcLength); - if (copy.isBogus()) { + if (srcLength <= getCapacity() - oldLength && isBufferWritable()) { + newLength = oldLength + srcLength; + // Faster than a memmove + if (srcLength <= 4) { + char16_t *arr = getArrayStart(); + arr[oldLength] = srcChars[0]; + if (srcLength > 1) arr[oldLength+1] = srcChars[1]; + if (srcLength > 2) arr[oldLength+2] = srcChars[2]; + if (srcLength > 3) arr[oldLength+3] = srcChars[3]; + setLength(newLength); + return *this; + } + } else { + if (uprv_add32_overflow(oldLength, srcLength, &newLength)) { setToBogus(); return *this; } - return doAppend(copy.getArrayStart(), 0, srcLength); - } - - // optimize append() onto a large-enough, owned string - if((newLength <= getCapacity() && isBufferWritable()) || - cloneArrayIfNeeded(newLength, getGrowCapacity(newLength))) { - char16_t *newArray = getArrayStart(); - // Do not copy characters when - // char16_t *buffer=str.getAppendBuffer(...); - // is followed by - // str.append(buffer, length); - // or - // str.appendString(buffer, length) - // or similar. - if(srcChars != newArray + oldLength) { - us_arrayCopy(srcChars, 0, newArray, oldLength, srcLength); + + // Check for append onto ourself + const char16_t* oldArray = getArrayStart(); + if (isBufferWritable() && + oldArray < srcChars + srcLength && + srcChars < oldArray + oldLength) { + // Copy into a new UnicodeString and start over + UnicodeString copy(srcChars, srcLength); + if (copy.isBogus()) { + setToBogus(); + return *this; + } + return doAppend(copy.getArrayStart(), 0, srcLength); } - setLength(newLength); + + // optimize append() onto a large-enough, owned string + if (!cloneArrayIfNeeded(newLength, getGrowCapacity(newLength))) { + return *this; + } + } + + char16_t *newArray = getArrayStart(); + // Do not copy characters when + // char16_t *buffer=str.getAppendBuffer(...); + // is followed by + // str.append(buffer, length); + // or + // str.appendString(buffer, length) + // or similar. + if(srcChars != newArray + oldLength) { + us_arrayCopy(srcChars, 0, newArray, oldLength, srcLength); } + setLength(newLength); + return *this; } diff --git a/icu4c/source/i18n/calendar.cpp b/icu4c/source/i18n/calendar.cpp index 4b6edc87c914..659c91b86a64 100644 --- a/icu4c/source/i18n/calendar.cpp +++ b/icu4c/source/i18n/calendar.cpp @@ -1532,11 +1532,11 @@ void Calendar::computeFields(UErrorCode &ec) U_ASSERT(fFields[UCAL_DST_OFFSET] <= getMaximum(UCAL_DST_OFFSET)); } -uint8_t Calendar::julianDayToDayOfWeek(double julian) +uint8_t Calendar::julianDayToDayOfWeek(int32_t julian) { // If julian is negative, then julian%7 will be negative, so we adjust // accordingly. We add 1 because Julian day 0 is Monday. - int8_t dayOfWeek = (int8_t) uprv_fmod(julian + 1, 7); + int8_t dayOfWeek = (int8_t) ((julian + 1) % 7); uint8_t result = (uint8_t)(dayOfWeek + ((dayOfWeek < 0) ? (7+UCAL_SUNDAY ) : UCAL_SUNDAY)); return result; diff --git a/icu4c/source/i18n/datefmt.cpp b/icu4c/source/i18n/datefmt.cpp index 029634e3dcbb..b25312463e01 100644 --- a/icu4c/source/i18n/datefmt.cpp +++ b/icu4c/source/i18n/datefmt.cpp @@ -28,6 +28,7 @@ #include "unicode/smpdtfmt.h" #include "unicode/dtptngen.h" #include "unicode/udisplaycontext.h" +#include "unicode/gregocal.h" #include "reldtfmt.h" #include "sharedobject.h" #include "unifiedcache.h" @@ -277,15 +278,25 @@ DateFormat::format(Calendar& /* unused cal */, UnicodeString& DateFormat::format(UDate date, UnicodeString& appendTo, FieldPosition& fieldPosition) const { if (fCalendar != nullptr) { - // Use a clone of our calendar instance - Calendar* calClone = fCalendar->clone(); - if (calClone != nullptr) { - UErrorCode ec = U_ZERO_ERROR; - calClone->setTime(date, ec); + UErrorCode ec = U_ZERO_ERROR; + auto calType = fCalendar->getType(); + // Avoid a heap allocation and corresponding free for the common case + if (uprv_strcmp(calType, "gregorian") == 0) { + GregorianCalendar cal(*static_cast(fCalendar)); + cal.setTime(date, ec); if (U_SUCCESS(ec)) { - format(*calClone, appendTo, fieldPosition); + format(cal, appendTo, fieldPosition); + } + } else { + // Use a clone of our calendar instance + Calendar *calClone = fCalendar->clone(); + if (calClone != nullptr) { + calClone->setTime(date, ec); + if (U_SUCCESS(ec)) { + format(*calClone, appendTo, fieldPosition); + } + delete calClone; } - delete calClone; } } return appendTo; @@ -297,13 +308,24 @@ UnicodeString& DateFormat::format(UDate date, UnicodeString& appendTo, FieldPositionIterator* posIter, UErrorCode& status) const { if (fCalendar != nullptr) { - Calendar* calClone = fCalendar->clone(); - if (calClone != nullptr) { - calClone->setTime(date, status); - if (U_SUCCESS(status)) { - format(*calClone, appendTo, posIter, status); + UErrorCode ec = U_ZERO_ERROR; + auto calType = fCalendar->getType(); + // Avoid a heap allocation and corresponding free for the common case + if (uprv_strcmp(calType, "gregorian") == 0) { + GregorianCalendar cal(*static_cast(fCalendar)); + cal.setTime(date, ec); + if (U_SUCCESS(ec)) { + format(cal, appendTo, posIter, status); + } + } else { + Calendar* calClone = fCalendar->clone(); + if (calClone != nullptr) { + calClone->setTime(date, status); + if (U_SUCCESS(status)) { + format(*calClone, appendTo, posIter, status); + } + delete calClone; } - delete calClone; } } return appendTo; diff --git a/icu4c/source/i18n/dtfmtsym.cpp b/icu4c/source/i18n/dtfmtsym.cpp index 943f6e21d2ed..13834b16540d 100644 --- a/icu4c/source/i18n/dtfmtsym.cpp +++ b/icu4c/source/i18n/dtfmtsym.cpp @@ -86,6 +86,33 @@ static const char16_t gPatternChars[] = { 0 }; +/** + * Map of each ASCII character to its corresponding index in the table above if + * it is a pattern character and -1 otherwise. + */ +static const int8_t gLookupPatternChars[] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + // + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + // ! " # $ % & ' ( ) * + , - . / + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, +#if UDAT_HAS_PATTERN_CHAR_FOR_TIME_SEPARATOR + // 0 1 2 3 4 5 6 7 8 9 : ; < = > ? + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 37, -1, -1, -1, -1, -1, +#else + // 0 1 2 3 4 5 6 7 8 9 : ; < = > ? + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, +#endif + // @ A B C D E F G H I J K L M N O + -1, 22, 36, -1, 10, 9, 11, 0, 5, -1, -1, 16, 26, 2, -1, 31, + // P Q R S T U V W X Y Z [ \ ] ^ _ + -1, 27, -1, 8, -1, 30, 29, 13, 32, 18, 23, -1, -1, -1, -1, -1, + // ` a b c d e f g h i j k l m n o + -1, 14, 35, 25, 3, 19, -1, 21, 15, -1, -1, 4, -1, 6, -1, -1, + // p q r s t u v w x y z { | } ~ + -1, 28, 34, 7, -1, 20, 24, 12, 33, 1, 17, -1, -1, -1, -1, -1 +}; + //------------------------------------------------------ // Strings of last resort. These are only used if we have no resource // files. They aren't designed for actual use, just for backup. @@ -1391,12 +1418,11 @@ DateFormatSymbols::getPatternUChars() UDateFormatField U_EXPORT2 DateFormatSymbols::getPatternCharIndex(char16_t c) { - const char16_t *p = u_strchr(gPatternChars, c); - if (p == nullptr) { + if (c >= UPRV_LENGTHOF(gLookupPatternChars)) { return UDAT_FIELD_COUNT; - } else { - return static_cast(p - gPatternChars); } + const auto idx = gLookupPatternChars[c]; + return idx == -1 ? UDAT_FIELD_COUNT : static_cast(idx); } static const uint64_t kNumericFieldsAlways = diff --git a/icu4c/source/i18n/formatted_string_builder.cpp b/icu4c/source/i18n/formatted_string_builder.cpp index 8dbf954af9ff..50eeb45202c3 100644 --- a/icu4c/source/i18n/formatted_string_builder.cpp +++ b/icu4c/source/i18n/formatted_string_builder.cpp @@ -153,13 +153,15 @@ FormattedStringBuilder::insertCodePoint(int32_t index, UChar32 codePoint, Field if (U_FAILURE(status)) { return count; } + auto charPtr = getCharPtr(); + auto fieldPtr = getFieldPtr(); if (count == 1) { - getCharPtr()[position] = (char16_t) codePoint; - getFieldPtr()[position] = field; + charPtr[position] = (char16_t) codePoint; + fieldPtr[position] = field; } else { - getCharPtr()[position] = U16_LEAD(codePoint); - getCharPtr()[position + 1] = U16_TRAIL(codePoint); - getFieldPtr()[position] = getFieldPtr()[position + 1] = field; + charPtr[position] = U16_LEAD(codePoint); + charPtr[position + 1] = U16_TRAIL(codePoint); + fieldPtr[position] = fieldPtr[position + 1] = field; } return count; } diff --git a/icu4c/source/i18n/gregoimp.cpp b/icu4c/source/i18n/gregoimp.cpp index 31b5aeed8354..1043113fdf17 100644 --- a/icu4c/source/i18n/gregoimp.cpp +++ b/icu4c/source/i18n/gregoimp.cpp @@ -32,6 +32,15 @@ int64_t ClockMath::floorDivide(int64_t numerator, int64_t denominator) { numerator / denominator : ((numerator + 1) / denominator) - 1; } +int32_t ClockMath::floorDivide(int32_t numerator, int32_t denominator, + int32_t* remainder) { + auto quotient = floorDivide(numerator, denominator); + if (remainder != nullptr) { + *remainder = numerator - (quotient * denominator); + } + return quotient; +} + int32_t ClockMath::floorDivide(double numerator, int32_t denominator, int32_t* remainder) { // For an integer n and representable ⌊x/n⌋, ⌊RN(x/n)⌋=⌊x/n⌋, where RN is @@ -106,7 +115,7 @@ double Grego::fieldsToDay(int32_t year, int32_t month, int32_t dom) { return julian - JULIAN_1970_CE; // JD => epoch day } -void Grego::dayToFields(double day, int32_t& year, int32_t& month, +void Grego::dayToFields(int32_t day, int32_t& year, int32_t& month, int32_t& dom, int32_t& dow, int32_t& doy) { // Convert from 1970 CE epoch to 1 CE epoch (Gregorian calendar) @@ -130,7 +139,7 @@ void Grego::dayToFields(double day, int32_t& year, int32_t& month, UBool isLeap = isLeapYear(year); // Gregorian day zero is a Monday. - dow = (int32_t) uprv_fmod(day + 1, 7); + dow = (day + 1) % 7; dow += (dow < 0) ? (UCAL_SUNDAY + 7) : UCAL_SUNDAY; // Common Julian/Gregorian calculation @@ -152,7 +161,7 @@ void Grego::timeToFields(UDate time, int32_t& year, int32_t& month, dayToFields(day, year, month, dom, dow, doy); } -int32_t Grego::dayOfWeek(double day) { +int32_t Grego::dayOfWeek(int32_t day) { int32_t dow; ClockMath::floorDivide(day + int{UCAL_THURSDAY}, 7, &dow); return (dow == 0) ? UCAL_SATURDAY : dow; diff --git a/icu4c/source/i18n/gregoimp.h b/icu4c/source/i18n/gregoimp.h index d65d6a4f88e9..1a81b4c0d9e8 100644 --- a/icu4c/source/i18n/gregoimp.h +++ b/icu4c/source/i18n/gregoimp.h @@ -62,6 +62,24 @@ class ClockMath { */ static inline double floorDivide(double numerator, double denominator); + /** + * Divide two numbers, returning the floor of the quotient and + * the modulus remainder. Unlike the built-in division, this is + * mathematically well-behaved. E.g., -1/4 => 0 and + * -1%4 => -1, but floorDivide(-1,4) => + * -1 with remainder => 3. NOTE: If numerator is + * too large, the returned quotient may overflow. + * @param numerator the numerator + * @param denominator a divisor which must be != 0 + * @param remainder output parameter to receive the + * remainder. Unlike numerator % denominator, this + * will always be non-negative, in the half-open range [0, + * |denominator|). + * @return the floor of the quotient + */ + static int32_t floorDivide(int32_t numerator, int32_t denominator, + int32_t* remainder); + /** * Divide two numbers, returning the floor of the quotient and * the modulus remainder. Unlike the built-in division, this is @@ -183,26 +201,26 @@ class Grego { /** * Convert a 1970-epoch day number to proleptic Gregorian year, * month, day-of-month, and day-of-week. - * @param day 1970-epoch day (integral value) + * @param day 1970-epoch day * @param year output parameter to receive year * @param month output parameter to receive month (0-based, 0==Jan) * @param dom output parameter to receive day-of-month (1-based) * @param dow output parameter to receive day-of-week (1-based, 1==Sun) * @param doy output parameter to receive day-of-year (1-based) */ - static void dayToFields(double day, int32_t& year, int32_t& month, + static void dayToFields(int32_t day, int32_t& year, int32_t& month, int32_t& dom, int32_t& dow, int32_t& doy); /** * Convert a 1970-epoch day number to proleptic Gregorian year, * month, day-of-month, and day-of-week. - * @param day 1970-epoch day (integral value) + * @param day 1970-epoch day * @param year output parameter to receive year * @param month output parameter to receive month (0-based, 0==Jan) * @param dom output parameter to receive day-of-month (1-based) * @param dow output parameter to receive day-of-week (1-based, 1==Sun) */ - static inline void dayToFields(double day, int32_t& year, int32_t& month, + static inline void dayToFields(int32_t day, int32_t& year, int32_t& month, int32_t& dom, int32_t& dow); /** @@ -221,10 +239,10 @@ class Grego { /** * Return the day of week on the 1970-epoch day - * @param day the 1970-epoch day (integral value) + * @param day the 1970-epoch day * @return the day of week */ - static int32_t dayOfWeek(double day); + static int32_t dayOfWeek(int32_t day); /** * Returns the ordinal number for the specified day of week within the month. @@ -283,7 +301,7 @@ Grego::previousMonthLength(int y, int m) { return (m > 0) ? monthLength(y, m-1) : 31; } -inline void Grego::dayToFields(double day, int32_t& year, int32_t& month, +inline void Grego::dayToFields(int32_t day, int32_t& year, int32_t& month, int32_t& dom, int32_t& dow) { int32_t doy_unused; dayToFields(day,year,month,dom,dow,doy_unused); diff --git a/icu4c/source/i18n/smpdtfmt.cpp b/icu4c/source/i18n/smpdtfmt.cpp index 628f7febe04a..d767babe7fe9 100644 --- a/icu4c/source/i18n/smpdtfmt.cpp +++ b/icu4c/source/i18n/smpdtfmt.cpp @@ -1074,7 +1074,8 @@ SimpleDateFormat::_format(Calendar& cal, UnicodeString& appendTo, UDisplayContext capitalizationContext = getContext(UDISPCTX_TYPE_CAPITALIZATION, status); // loop through the pattern string character by character - for (int32_t i = 0; i < fPattern.length() && U_SUCCESS(status); ++i) { + int32_t patternLength = fPattern.length(); + for (int32_t i = 0; i < patternLength && U_SUCCESS(status); ++i) { char16_t ch = fPattern[i]; // Use subFormat() to format a repeated pattern character @@ -1087,7 +1088,7 @@ SimpleDateFormat::_format(Calendar& cal, UnicodeString& appendTo, if (ch == QUOTE) { // Consecutive single quotes are a single quote literal, // either outside of quotes or between quotes - if ((i+1) < fPattern.length() && fPattern[i+1] == QUOTE) { + if ((i+1) < patternLength && fPattern[i+1] == QUOTE) { appendTo += (char16_t)QUOTE; ++i; } else { @@ -1480,6 +1481,9 @@ SimpleDateFormat::subFormat(UnicodeString &appendTo, Calendar& cal, UErrorCode& status) const { + static const int32_t maxIntCount = 10; + static const UnicodeString hebr(u"hebr"); + if (U_FAILURE(status)) { return; } @@ -1488,14 +1492,9 @@ SimpleDateFormat::subFormat(UnicodeString &appendTo, // text for an individual pattern symbol (e.g., "HH" or "yyyy") UDateFormatField patternCharIndex = DateFormatSymbols::getPatternCharIndex(ch); - const int32_t maxIntCount = 10; int32_t beginOffset = appendTo.length(); - const NumberFormat *currentNumberFormat; DateFormatSymbols::ECapitalizationContextUsageType capContextUsageType = DateFormatSymbols::kCapContextUsageOther; - UBool isHebrewCalendar = (uprv_strcmp(cal.getType(),"hebrew") == 0); - UBool isChineseCalendar = (uprv_strcmp(cal.getType(),"chinese") == 0 || uprv_strcmp(cal.getType(),"dangi") == 0); - // if the pattern character is unrecognized, signal an error and dump out if (patternCharIndex == UDAT_FIELD_COUNT) { @@ -1510,35 +1509,37 @@ SimpleDateFormat::subFormat(UnicodeString &appendTo, // Don't get value unless it is useful if (field < UCAL_FIELD_COUNT) { value = (patternCharIndex != UDAT_RELATED_YEAR_FIELD)? cal.get(field, status): cal.getRelatedYear(status); - } - if (U_FAILURE(status)) { - return; + if (U_FAILURE(status)) { + return; + } } - currentNumberFormat = getNumberFormatByIndex(patternCharIndex); + const NumberFormat *currentNumberFormat = getNumberFormatByIndex(patternCharIndex); if (currentNumberFormat == nullptr) { status = U_INTERNAL_PROGRAM_ERROR; return; } - UnicodeString hebr("hebr", 4, US_INV); switch (patternCharIndex) { // for any "G" symbol, write out the appropriate era string // "GGGG" is wide era name, "GGGGG" is narrow era name, anything else is abbreviated name case UDAT_ERA_FIELD: - if (isChineseCalendar) { - zeroPaddingNumber(currentNumberFormat,appendTo, value, 1, 9); // as in ICU4J - } else { - if (count == 5) { - _appendSymbol(appendTo, value, fSymbols->fNarrowEras, fSymbols->fNarrowErasCount); - capContextUsageType = DateFormatSymbols::kCapContextUsageEraNarrow; - } else if (count == 4) { - _appendSymbol(appendTo, value, fSymbols->fEraNames, fSymbols->fEraNamesCount); - capContextUsageType = DateFormatSymbols::kCapContextUsageEraWide; + { + auto calType = cal.getType(); + if (uprv_strcmp(calType,"chinese") == 0 || uprv_strcmp(calType,"dangi") == 0) { + zeroPaddingNumber(currentNumberFormat,appendTo, value, 1, 9); // as in ICU4J } else { - _appendSymbol(appendTo, value, fSymbols->fEras, fSymbols->fErasCount); - capContextUsageType = DateFormatSymbols::kCapContextUsageEraAbbrev; + if (count == 5) { + _appendSymbol(appendTo, value, fSymbols->fNarrowEras, fSymbols->fNarrowErasCount); + capContextUsageType = DateFormatSymbols::kCapContextUsageEraNarrow; + } else if (count == 4) { + _appendSymbol(appendTo, value, fSymbols->fEraNames, fSymbols->fEraNamesCount); + capContextUsageType = DateFormatSymbols::kCapContextUsageEraWide; + } else { + _appendSymbol(appendTo, value, fSymbols->fEras, fSymbols->fErasCount); + capContextUsageType = DateFormatSymbols::kCapContextUsageEraAbbrev; + } } } break; @@ -1577,7 +1578,7 @@ SimpleDateFormat::subFormat(UnicodeString &appendTo, // for "MMMMM"/"LLLLL", use the narrow form case UDAT_MONTH_FIELD: case UDAT_STANDALONE_MONTH_FIELD: - if ( isHebrewCalendar ) { + if (uprv_strcmp(cal.getType(),"hebrew") == 0) { HebrewCalendar *hc = (HebrewCalendar*)&cal; if (hc->isLeapYear(hc->get(UCAL_YEAR,status)) && value == 6 && count >= 3 ) value = 13; // Show alternate form for Adar II in leap years in Hebrew calendar. @@ -1765,7 +1766,8 @@ SimpleDateFormat::subFormat(UnicodeString &appendTo, UDate date = cal.getTime(status); const TimeZoneFormat *tzfmt = tzFormat(status); if (U_SUCCESS(status)) { - if (patternCharIndex == UDAT_TIMEZONE_FIELD) { + switch (patternCharIndex) { + case UDAT_TIMEZONE_FIELD: if (count < 4) { // "z", "zz", "zzz" tzfmt->format(UTZFMT_STYLE_SPECIFIC_SHORT, tz, date, zoneString); @@ -1775,8 +1777,8 @@ SimpleDateFormat::subFormat(UnicodeString &appendTo, tzfmt->format(UTZFMT_STYLE_SPECIFIC_LONG, tz, date, zoneString); capContextUsageType = DateFormatSymbols::kCapContextUsageMetazoneLong; } - } - else if (patternCharIndex == UDAT_TIMEZONE_RFC_FIELD) { + break; + case UDAT_TIMEZONE_RFC_FIELD: if (count < 4) { // "Z" tzfmt->format(UTZFMT_STYLE_ISO_BASIC_LOCAL_FULL, tz, date, zoneString); @@ -1787,8 +1789,8 @@ SimpleDateFormat::subFormat(UnicodeString &appendTo, // "ZZ", "ZZZ", "ZZZZ" tzfmt->format(UTZFMT_STYLE_LOCALIZED_GMT, tz, date, zoneString); } - } - else if (patternCharIndex == UDAT_TIMEZONE_GENERIC_FIELD) { + break; + case UDAT_TIMEZONE_GENERIC_FIELD: if (count == 1) { // "v" tzfmt->format(UTZFMT_STYLE_GENERIC_SHORT, tz, date, zoneString); @@ -1798,8 +1800,8 @@ SimpleDateFormat::subFormat(UnicodeString &appendTo, tzfmt->format(UTZFMT_STYLE_GENERIC_LONG, tz, date, zoneString); capContextUsageType = DateFormatSymbols::kCapContextUsageMetazoneLong; } - } - else if (patternCharIndex == UDAT_TIMEZONE_SPECIAL_FIELD) { + break; + case UDAT_TIMEZONE_SPECIAL_FIELD: if (count == 1) { // "V" tzfmt->format(UTZFMT_STYLE_ZONE_ID_SHORT, tz, date, zoneString); @@ -1814,8 +1816,8 @@ SimpleDateFormat::subFormat(UnicodeString &appendTo, tzfmt->format(UTZFMT_STYLE_GENERIC_LOCATION, tz, date, zoneString); capContextUsageType = DateFormatSymbols::kCapContextUsageZoneLong; } - } - else if (patternCharIndex == UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD) { + break; + case UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD: if (count == 1) { // "O" tzfmt->format(UTZFMT_STYLE_LOCALIZED_GMT_SHORT, tz, date, zoneString); @@ -1823,8 +1825,8 @@ SimpleDateFormat::subFormat(UnicodeString &appendTo, // "OOOO" tzfmt->format(UTZFMT_STYLE_LOCALIZED_GMT, tz, date, zoneString); } - } - else if (patternCharIndex == UDAT_TIMEZONE_ISO_FIELD) { + break; + case UDAT_TIMEZONE_ISO_FIELD: if (count == 1) { // "X" tzfmt->format(UTZFMT_STYLE_ISO_BASIC_SHORT, tz, date, zoneString); @@ -1841,8 +1843,8 @@ SimpleDateFormat::subFormat(UnicodeString &appendTo, // "XXXXX" tzfmt->format(UTZFMT_STYLE_ISO_EXTENDED_FULL, tz, date, zoneString); } - } - else if (patternCharIndex == UDAT_TIMEZONE_ISO_LOCAL_FIELD) { + break; + case UDAT_TIMEZONE_ISO_LOCAL_FIELD: if (count == 1) { // "x" tzfmt->format(UTZFMT_STYLE_ISO_BASIC_LOCAL_SHORT, tz, date, zoneString); @@ -1859,8 +1861,8 @@ SimpleDateFormat::subFormat(UnicodeString &appendTo, // "xxxxx" tzfmt->format(UTZFMT_STYLE_ISO_EXTENDED_LOCAL_FULL, tz, date, zoneString); } - } - else { + break; + default: UPRV_UNREACHABLE_EXIT; } } @@ -2157,16 +2159,24 @@ SimpleDateFormat::zeroPaddingNumber( if (currentNumberFormat == fNumberFormat && fSimpleNumberFormatter) { // Can use fast path + // We create UFormattedNumberData ourselves to avoid a heap allocation + // and corresponding free. Set the pointer to null afterwards to prevent + // the implementation from attempting to free it. UErrorCode localStatus = U_ZERO_ERROR; - number::SimpleNumber number = number::SimpleNumber::forInt64(value, localStatus); + number::impl::UFormattedNumberData data; + data.quantity.setToLong(value); + number::SimpleNumber number(&data, localStatus); number.setMinimumIntegerDigits(minDigits, localStatus); number.truncateStart(maxDigits, localStatus); number::FormattedNumber result = fSimpleNumberFormatter->format(std::move(number), localStatus); if (U_FAILURE(localStatus)) { + result.fData = nullptr; return; } - appendTo.append(result.toTempString(localStatus)); + UnicodeStringAppendable appendable(appendTo); + result.appendTo(appendable, localStatus); + result.fData = nullptr; return; } diff --git a/icu4c/source/i18n/unicode/calendar.h b/icu4c/source/i18n/unicode/calendar.h index 26781f8a0568..31851d666b34 100644 --- a/icu4c/source/i18n/unicode/calendar.h +++ b/icu4c/source/i18n/unicode/calendar.h @@ -2340,7 +2340,7 @@ class U_I18N_API Calendar : public UObject { * @return Day number from 1..7 (SUN..SAT). * @internal */ - static uint8_t julianDayToDayOfWeek(double julian); + static uint8_t julianDayToDayOfWeek(int32_t julian); #endif /* U_HIDE_INTERNAL_API */ private: diff --git a/icu4c/source/i18n/unicode/formattednumber.h b/icu4c/source/i18n/unicode/formattednumber.h index 83178ea40efe..6b212d7c8257 100644 --- a/icu4c/source/i18n/unicode/formattednumber.h +++ b/icu4c/source/i18n/unicode/formattednumber.h @@ -25,6 +25,7 @@ U_NAMESPACE_BEGIN class FieldPositionIteratorHandler; +class SimpleDateFormat; namespace number { // icu::number @@ -190,6 +191,9 @@ class U_I18N_API FormattedNumber : public UMemory, public FormattedValue { // To give C API access to internals friend struct impl::UFormattedNumberImpl; + + // To give access to the data pointer for non-heap allocation + friend class icu::SimpleDateFormat; }; template diff --git a/icu4c/source/i18n/unicode/simplenumberformatter.h b/icu4c/source/i18n/unicode/simplenumberformatter.h index 32b79a94da40..2cafd360f145 100644 --- a/icu4c/source/i18n/unicode/simplenumberformatter.h +++ b/icu4c/source/i18n/unicode/simplenumberformatter.h @@ -30,6 +30,8 @@ U_NAMESPACE_BEGIN +/* forward declaration */ +class SimpleDateFormat; namespace number { // icu::number @@ -169,6 +171,9 @@ class U_I18N_API SimpleNumber : public UMemory { USimpleNumberSign fSign = UNUM_SIMPLE_NUMBER_NO_SIGN; friend class SimpleNumberFormatter; + + // Uses the private constructor to avoid a heap allocation + friend class icu::SimpleDateFormat; };