Skip to content

Commit

Permalink
ICU-22528 Improve date formatting performance
Browse files Browse the repository at this point in the history
  • Loading branch information
mohd-akram authored and richgillam committed Oct 14, 2023
1 parent cb7b1b6 commit 3d1dee6
Show file tree
Hide file tree
Showing 11 changed files with 218 additions and 105 deletions.
75 changes: 46 additions & 29 deletions icu4c/source/common/unistr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1607,41 +1607,58 @@ UnicodeString::doAppend(const char16_t *srcChars, int32_t srcStart, int32_t srcL

int32_t oldLength = length();
int32_t newLength;
if (uprv_add32_overflow(oldLength, srcLength, &newLength)) {
setToBogus();
return *this;
}

// Check for append onto ourself
const char16_t* oldArray = getArrayStart();
if (isBufferWritable() &&
oldArray < srcChars + srcLength &&
srcChars < oldArray + oldLength) {
// Copy into a new UnicodeString and start over
UnicodeString copy(srcChars, srcLength);
if (copy.isBogus()) {
if (srcLength <= getCapacity() - oldLength && isBufferWritable()) {
newLength = oldLength + srcLength;
// Faster than a memmove
if (srcLength <= 4) {
char16_t *arr = getArrayStart();
arr[oldLength] = srcChars[0];
if (srcLength > 1) arr[oldLength+1] = srcChars[1];
if (srcLength > 2) arr[oldLength+2] = srcChars[2];
if (srcLength > 3) arr[oldLength+3] = srcChars[3];
setLength(newLength);
return *this;
}
} else {
if (uprv_add32_overflow(oldLength, srcLength, &newLength)) {
setToBogus();
return *this;
}
return doAppend(copy.getArrayStart(), 0, srcLength);
}

// optimize append() onto a large-enough, owned string
if((newLength <= getCapacity() && isBufferWritable()) ||
cloneArrayIfNeeded(newLength, getGrowCapacity(newLength))) {
char16_t *newArray = getArrayStart();
// Do not copy characters when
// char16_t *buffer=str.getAppendBuffer(...);
// is followed by
// str.append(buffer, length);
// or
// str.appendString(buffer, length)
// or similar.
if(srcChars != newArray + oldLength) {
us_arrayCopy(srcChars, 0, newArray, oldLength, srcLength);

// Check for append onto ourself
const char16_t* oldArray = getArrayStart();
if (isBufferWritable() &&
oldArray < srcChars + srcLength &&
srcChars < oldArray + oldLength) {
// Copy into a new UnicodeString and start over
UnicodeString copy(srcChars, srcLength);
if (copy.isBogus()) {
setToBogus();
return *this;
}
return doAppend(copy.getArrayStart(), 0, srcLength);
}
setLength(newLength);

// optimize append() onto a large-enough, owned string
if (!cloneArrayIfNeeded(newLength, getGrowCapacity(newLength))) {
return *this;
}
}

char16_t *newArray = getArrayStart();
// Do not copy characters when
// char16_t *buffer=str.getAppendBuffer(...);
// is followed by
// str.append(buffer, length);
// or
// str.appendString(buffer, length)
// or similar.
if(srcChars != newArray + oldLength) {
us_arrayCopy(srcChars, 0, newArray, oldLength, srcLength);
}
setLength(newLength);

return *this;
}

Expand Down
4 changes: 2 additions & 2 deletions icu4c/source/i18n/calendar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1532,11 +1532,11 @@ void Calendar::computeFields(UErrorCode &ec)
U_ASSERT(fFields[UCAL_DST_OFFSET] <= getMaximum(UCAL_DST_OFFSET));
}

uint8_t Calendar::julianDayToDayOfWeek(double julian)
uint8_t Calendar::julianDayToDayOfWeek(int32_t julian)
{
// If julian is negative, then julian%7 will be negative, so we adjust
// accordingly. We add 1 because Julian day 0 is Monday.
int8_t dayOfWeek = (int8_t) uprv_fmod(julian + 1, 7);
int8_t dayOfWeek = (int8_t) ((julian + 1) % 7);

uint8_t result = (uint8_t)(dayOfWeek + ((dayOfWeek < 0) ? (7+UCAL_SUNDAY ) : UCAL_SUNDAY));
return result;
Expand Down
48 changes: 35 additions & 13 deletions icu4c/source/i18n/datefmt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include "unicode/smpdtfmt.h"
#include "unicode/dtptngen.h"
#include "unicode/udisplaycontext.h"
#include "unicode/gregocal.h"
#include "reldtfmt.h"
#include "sharedobject.h"
#include "unifiedcache.h"
Expand Down Expand Up @@ -277,15 +278,25 @@ DateFormat::format(Calendar& /* unused cal */,
UnicodeString&
DateFormat::format(UDate date, UnicodeString& appendTo, FieldPosition& fieldPosition) const {
if (fCalendar != nullptr) {
// Use a clone of our calendar instance
Calendar* calClone = fCalendar->clone();
if (calClone != nullptr) {
UErrorCode ec = U_ZERO_ERROR;
calClone->setTime(date, ec);
UErrorCode ec = U_ZERO_ERROR;
auto calType = fCalendar->getType();
// Avoid a heap allocation and corresponding free for the common case
if (uprv_strcmp(calType, "gregorian") == 0) {
GregorianCalendar cal(*static_cast<GregorianCalendar*>(fCalendar));
cal.setTime(date, ec);
if (U_SUCCESS(ec)) {
format(*calClone, appendTo, fieldPosition);
format(cal, appendTo, fieldPosition);
}
} else {
// Use a clone of our calendar instance
Calendar *calClone = fCalendar->clone();
if (calClone != nullptr) {
calClone->setTime(date, ec);
if (U_SUCCESS(ec)) {
format(*calClone, appendTo, fieldPosition);
}
delete calClone;
}
delete calClone;
}
}
return appendTo;
Expand All @@ -297,13 +308,24 @@ UnicodeString&
DateFormat::format(UDate date, UnicodeString& appendTo, FieldPositionIterator* posIter,
UErrorCode& status) const {
if (fCalendar != nullptr) {
Calendar* calClone = fCalendar->clone();
if (calClone != nullptr) {
calClone->setTime(date, status);
if (U_SUCCESS(status)) {
format(*calClone, appendTo, posIter, status);
UErrorCode ec = U_ZERO_ERROR;
auto calType = fCalendar->getType();
// Avoid a heap allocation and corresponding free for the common case
if (uprv_strcmp(calType, "gregorian") == 0) {
GregorianCalendar cal(*static_cast<GregorianCalendar*>(fCalendar));
cal.setTime(date, ec);
if (U_SUCCESS(ec)) {
format(cal, appendTo, posIter, status);
}
} else {
Calendar* calClone = fCalendar->clone();
if (calClone != nullptr) {
calClone->setTime(date, status);
if (U_SUCCESS(status)) {
format(*calClone, appendTo, posIter, status);
}
delete calClone;
}
delete calClone;
}
}
return appendTo;
Expand Down
34 changes: 30 additions & 4 deletions icu4c/source/i18n/dtfmtsym.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,33 @@ static const char16_t gPatternChars[] = {
0
};

/**
* Map of each ASCII character to its corresponding index in the table above if
* it is a pattern character and -1 otherwise.
*/
static const int8_t gLookupPatternChars[] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
//
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
// ! " # $ % & ' ( ) * + , - . /
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
#if UDAT_HAS_PATTERN_CHAR_FOR_TIME_SEPARATOR
// 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 37, -1, -1, -1, -1, -1,
#else
// 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
#endif
// @ A B C D E F G H I J K L M N O
-1, 22, 36, -1, 10, 9, 11, 0, 5, -1, -1, 16, 26, 2, -1, 31,
// P Q R S T U V W X Y Z [ \ ] ^ _
-1, 27, -1, 8, -1, 30, 29, 13, 32, 18, 23, -1, -1, -1, -1, -1,
// ` a b c d e f g h i j k l m n o
-1, 14, 35, 25, 3, 19, -1, 21, 15, -1, -1, 4, -1, 6, -1, -1,
// p q r s t u v w x y z { | } ~
-1, 28, 34, 7, -1, 20, 24, 12, 33, 1, 17, -1, -1, -1, -1, -1
};

//------------------------------------------------------
// Strings of last resort. These are only used if we have no resource
// files. They aren't designed for actual use, just for backup.
Expand Down Expand Up @@ -1391,12 +1418,11 @@ DateFormatSymbols::getPatternUChars()

UDateFormatField U_EXPORT2
DateFormatSymbols::getPatternCharIndex(char16_t c) {
const char16_t *p = u_strchr(gPatternChars, c);
if (p == nullptr) {
if (c >= UPRV_LENGTHOF(gLookupPatternChars)) {
return UDAT_FIELD_COUNT;
} else {
return static_cast<UDateFormatField>(p - gPatternChars);
}
const auto idx = gLookupPatternChars[c];
return idx == -1 ? UDAT_FIELD_COUNT : static_cast<UDateFormatField>(idx);
}

static const uint64_t kNumericFieldsAlways =
Expand Down
12 changes: 7 additions & 5 deletions icu4c/source/i18n/formatted_string_builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,13 +153,15 @@ FormattedStringBuilder::insertCodePoint(int32_t index, UChar32 codePoint, Field
if (U_FAILURE(status)) {
return count;
}
auto charPtr = getCharPtr();
auto fieldPtr = getFieldPtr();
if (count == 1) {
getCharPtr()[position] = (char16_t) codePoint;
getFieldPtr()[position] = field;
charPtr[position] = (char16_t) codePoint;
fieldPtr[position] = field;
} else {
getCharPtr()[position] = U16_LEAD(codePoint);
getCharPtr()[position + 1] = U16_TRAIL(codePoint);
getFieldPtr()[position] = getFieldPtr()[position + 1] = field;
charPtr[position] = U16_LEAD(codePoint);
charPtr[position + 1] = U16_TRAIL(codePoint);
fieldPtr[position] = fieldPtr[position + 1] = field;
}
return count;
}
Expand Down
15 changes: 12 additions & 3 deletions icu4c/source/i18n/gregoimp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,15 @@ int64_t ClockMath::floorDivide(int64_t numerator, int64_t denominator) {
numerator / denominator : ((numerator + 1) / denominator) - 1;
}

int32_t ClockMath::floorDivide(int32_t numerator, int32_t denominator,
int32_t* remainder) {
auto quotient = floorDivide(numerator, denominator);
if (remainder != nullptr) {
*remainder = numerator - (quotient * denominator);
}
return quotient;
}

int32_t ClockMath::floorDivide(double numerator, int32_t denominator,
int32_t* remainder) {
// For an integer n and representable ⌊x/n⌋, ⌊RN(x/n)⌋=⌊x/n⌋, where RN is
Expand Down Expand Up @@ -106,7 +115,7 @@ double Grego::fieldsToDay(int32_t year, int32_t month, int32_t dom) {
return julian - JULIAN_1970_CE; // JD => epoch day
}

void Grego::dayToFields(double day, int32_t& year, int32_t& month,
void Grego::dayToFields(int32_t day, int32_t& year, int32_t& month,
int32_t& dom, int32_t& dow, int32_t& doy) {

// Convert from 1970 CE epoch to 1 CE epoch (Gregorian calendar)
Expand All @@ -130,7 +139,7 @@ void Grego::dayToFields(double day, int32_t& year, int32_t& month,
UBool isLeap = isLeapYear(year);

// Gregorian day zero is a Monday.
dow = (int32_t) uprv_fmod(day + 1, 7);
dow = (day + 1) % 7;
dow += (dow < 0) ? (UCAL_SUNDAY + 7) : UCAL_SUNDAY;

// Common Julian/Gregorian calculation
Expand All @@ -152,7 +161,7 @@ void Grego::timeToFields(UDate time, int32_t& year, int32_t& month,
dayToFields(day, year, month, dom, dow, doy);
}

int32_t Grego::dayOfWeek(double day) {
int32_t Grego::dayOfWeek(int32_t day) {
int32_t dow;
ClockMath::floorDivide(day + int{UCAL_THURSDAY}, 7, &dow);
return (dow == 0) ? UCAL_SATURDAY : dow;
Expand Down
32 changes: 25 additions & 7 deletions icu4c/source/i18n/gregoimp.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,24 @@ class ClockMath {
*/
static inline double floorDivide(double numerator, double denominator);

/**
* Divide two numbers, returning the floor of the quotient and
* the modulus remainder. Unlike the built-in division, this is
* mathematically well-behaved. E.g., <code>-1/4</code> => 0 and
* <code>-1%4</code> => -1, but <code>floorDivide(-1,4)</code> =>
* -1 with <code>remainder</code> => 3. NOTE: If numerator is
* too large, the returned quotient may overflow.
* @param numerator the numerator
* @param denominator a divisor which must be != 0
* @param remainder output parameter to receive the
* remainder. Unlike <code>numerator % denominator</code>, this
* will always be non-negative, in the half-open range <code>[0,
* |denominator|)</code>.
* @return the floor of the quotient
*/
static int32_t floorDivide(int32_t numerator, int32_t denominator,
int32_t* remainder);

/**
* Divide two numbers, returning the floor of the quotient and
* the modulus remainder. Unlike the built-in division, this is
Expand Down Expand Up @@ -183,26 +201,26 @@ class Grego {
/**
* Convert a 1970-epoch day number to proleptic Gregorian year,
* month, day-of-month, and day-of-week.
* @param day 1970-epoch day (integral value)
* @param day 1970-epoch day
* @param year output parameter to receive year
* @param month output parameter to receive month (0-based, 0==Jan)
* @param dom output parameter to receive day-of-month (1-based)
* @param dow output parameter to receive day-of-week (1-based, 1==Sun)
* @param doy output parameter to receive day-of-year (1-based)
*/
static void dayToFields(double day, int32_t& year, int32_t& month,
static void dayToFields(int32_t day, int32_t& year, int32_t& month,
int32_t& dom, int32_t& dow, int32_t& doy);

/**
* Convert a 1970-epoch day number to proleptic Gregorian year,
* month, day-of-month, and day-of-week.
* @param day 1970-epoch day (integral value)
* @param day 1970-epoch day
* @param year output parameter to receive year
* @param month output parameter to receive month (0-based, 0==Jan)
* @param dom output parameter to receive day-of-month (1-based)
* @param dow output parameter to receive day-of-week (1-based, 1==Sun)
*/
static inline void dayToFields(double day, int32_t& year, int32_t& month,
static inline void dayToFields(int32_t day, int32_t& year, int32_t& month,
int32_t& dom, int32_t& dow);

/**
Expand All @@ -221,10 +239,10 @@ class Grego {

/**
* Return the day of week on the 1970-epoch day
* @param day the 1970-epoch day (integral value)
* @param day the 1970-epoch day
* @return the day of week
*/
static int32_t dayOfWeek(double day);
static int32_t dayOfWeek(int32_t day);

/**
* Returns the ordinal number for the specified day of week within the month.
Expand Down Expand Up @@ -283,7 +301,7 @@ Grego::previousMonthLength(int y, int m) {
return (m > 0) ? monthLength(y, m-1) : 31;
}

inline void Grego::dayToFields(double day, int32_t& year, int32_t& month,
inline void Grego::dayToFields(int32_t day, int32_t& year, int32_t& month,
int32_t& dom, int32_t& dow) {
int32_t doy_unused;
dayToFields(day,year,month,dom,dow,doy_unused);
Expand Down
Loading

0 comments on commit 3d1dee6

Please sign in to comment.