-
Notifications
You must be signed in to change notification settings - Fork 4.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
JIT: peel off dominant switch case under PGO #52827
Changes from 3 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -85,44 +85,7 @@ class typeInfo; | |
struct BasicBlockList; | ||
struct flowList; | ||
struct EHblkDsc; | ||
|
||
/***************************************************************************** | ||
* | ||
* The following describes a switch block. | ||
* | ||
* Things to know: | ||
* 1. If bbsHasDefault is true, the default case is the last one in the array of basic block addresses | ||
* namely bbsDstTab[bbsCount - 1]. | ||
* 2. bbsCount must be at least 1, for the default case. bbsCount cannot be zero. It appears that the ECMA spec | ||
* allows for a degenerate switch with zero cases. Normally, the optimizer will optimize degenerate | ||
* switches with just a default case to a BBJ_ALWAYS branch, and a switch with just two cases to a BBJ_COND. | ||
* However, in debuggable code, we might not do that, so bbsCount might be 1. | ||
*/ | ||
struct BBswtDesc | ||
{ | ||
BasicBlock** bbsDstTab; // case label table address | ||
unsigned bbsCount; // count of cases (includes 'default' if bbsHasDefault) | ||
bool bbsHasDefault; | ||
|
||
BBswtDesc() : bbsHasDefault(true) | ||
{ | ||
} | ||
|
||
void removeDefault() | ||
{ | ||
assert(bbsHasDefault); | ||
assert(bbsCount > 0); | ||
bbsHasDefault = false; | ||
bbsCount--; | ||
} | ||
|
||
BasicBlock* getDefault() | ||
{ | ||
assert(bbsHasDefault); | ||
assert(bbsCount > 0); | ||
return bbsDstTab[bbsCount - 1]; | ||
} | ||
}; | ||
struct BBswtDesc; | ||
|
||
struct StackEntry | ||
{ | ||
|
@@ -1291,6 +1254,47 @@ struct BasicBlockList | |
} | ||
}; | ||
|
||
// BBswtDesc -- descriptor for a switch block | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You inserted this in an odd location, because there is a huge comment above There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Moved. |
||
// | ||
// Things to know: | ||
// 1. If bbsHasDefault is true, the default case is the last one in the array of basic block addresses | ||
// namely bbsDstTab[bbsCount - 1]. | ||
// 2. bbsCount must be at least 1, for the default case. bbsCount cannot be zero. It appears that the ECMA spec | ||
// allows for a degenerate switch with zero cases. Normally, the optimizer will optimize degenerate | ||
// switches with just a default case to a BBJ_ALWAYS branch, and a switch with just two cases to a BBJ_COND. | ||
// However, in debuggable code, we might not do that, so bbsCount might be 1. | ||
// | ||
struct BBswtDesc | ||
{ | ||
BasicBlock** bbsDstTab; // case label table address | ||
unsigned bbsCount; // count of cases (includes 'default' if bbsHasDefault) | ||
unsigned bbsDominantCase; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you should add comments for the new fields. E.g., (1) define what "dominant" means, (2) are the fields always valid, or only under some PGO condition? (3) clarify that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added. |
||
BasicBlock::weight_t bbsDominantFraction; | ||
bool bbsHasDefault; | ||
bool bbsHasDominantCase; | ||
|
||
BBswtDesc() : bbsHasDefault(true), bbsHasDominantCase(false) | ||
{ | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You need to update There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should you dump the new fields in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed and added some output to both methods, eg
|
||
|
||
void removeDefault() | ||
{ | ||
assert(bbsHasDefault); | ||
assert(bbsCount > 0); | ||
bbsHasDefault = false; | ||
bbsCount--; | ||
} | ||
|
||
BasicBlock* getDefault() | ||
{ | ||
assert(bbsHasDefault); | ||
assert(bbsCount > 0); | ||
return bbsDstTab[bbsCount - 1]; | ||
} | ||
}; | ||
|
||
// flowList -- control flow edge | ||
// | ||
struct flowList | ||
{ | ||
public: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1803,25 +1803,26 @@ void Compiler::fgCompactBlocks(BasicBlock* block, BasicBlock* bNext) | |
} | ||
} | ||
|
||
// Note we could update the local variable weights here by | ||
// calling lvaMarkLocalVars, with the block and weight adjustment. | ||
|
||
// If either block or bNext has a profile weight | ||
// or if both block and bNext have non-zero weights | ||
// then we select the highest weight block. | ||
// then we wil use the max weight for the block. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. typo: wil |
||
// | ||
const bool hasProfileWeight = block->hasProfileWeight() || bNext->hasProfileWeight(); | ||
const bool hasNonZeroWeight = (block->bbWeight > BB_ZERO_WEIGHT) || (bNext->bbWeight > BB_ZERO_WEIGHT); | ||
|
||
if (block->hasProfileWeight() || bNext->hasProfileWeight() || (block->bbWeight && bNext->bbWeight)) | ||
if (hasProfileWeight || hasNonZeroWeight) | ||
{ | ||
// We are keeping block so update its fields | ||
// when bNext has a greater weight | ||
BasicBlock::weight_t const newWeight = max(block->bbWeight, bNext->bbWeight); | ||
|
||
if (block->bbWeight < bNext->bbWeight) | ||
if (hasProfileWeight) | ||
{ | ||
block->bbWeight = bNext->bbWeight; | ||
|
||
block->bbFlags |= (bNext->bbFlags & BBF_PROF_WEIGHT); // Set the profile weight flag (if necessary) | ||
assert(block->bbWeight != BB_ZERO_WEIGHT); | ||
block->bbFlags &= ~BBF_RUN_RARELY; // Clear any RarelyRun flag | ||
block->setBBProfileWeight(newWeight); | ||
} | ||
else | ||
{ | ||
assert(newWeight != BB_ZERO_WEIGHT); | ||
block->bbWeight = newWeight; | ||
block->bbFlags &= ~BBF_RUN_RARELY; | ||
} | ||
} | ||
// otherwise if either block has a zero weight we select the zero weight | ||
|
@@ -3726,47 +3727,163 @@ bool Compiler::fgOptimizeBranch(BasicBlock* bJump) | |
return true; | ||
} | ||
|
||
/***************************************************************************** | ||
* | ||
* Function called to optimize switch statements | ||
*/ | ||
|
||
//----------------------------------------------------------------------------- | ||
// fgOptimizeSwitchJump: see if a switch has a dominant case, and modify to | ||
// check for that case up front (aka switch peeling). | ||
// | ||
// Returns: | ||
// True if the switch now has an upstream check for the dominant case. | ||
// | ||
bool Compiler::fgOptimizeSwitchJumps() | ||
{ | ||
bool result = false; // Our return value | ||
|
||
#if 0 | ||
// TODO-CQ: Add switch jump optimizations? | ||
if (!fgHasSwitch) | ||
{ | ||
return false; | ||
} | ||
|
||
if (!fgHaveValidEdgeWeights) | ||
return false; | ||
bool modified = false; | ||
|
||
for (BasicBlock* bSrc = fgFirstBB; bSrc != NULL; bSrc = bSrc->bbNext) | ||
for (BasicBlock* block = fgFirstBB; block != NULL; block = block->bbNext) | ||
{ | ||
if (bSrc->bbJumpKind == BBJ_SWITCH) | ||
if (block->IsLIR()) | ||
{ | ||
break; | ||
} | ||
|
||
if (block->isRunRarely()) | ||
{ | ||
unsigned jumpCnt; jumpCnt = bSrc->bbJumpSwt->bbsCount; | ||
BasicBlock** jumpTab; jumpTab = bSrc->bbJumpSwt->bbsDstTab; | ||
continue; | ||
} | ||
|
||
do | ||
{ | ||
BasicBlock* bDst = *jumpTab; | ||
flowList* edgeToDst = fgGetPredForBlock(bDst, bSrc); | ||
double outRatio = (double) edgeToDst->edgeWeightMin() / (double) bSrc->bbWeight; | ||
if (block->bbJumpKind != BBJ_SWITCH) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this ever called for LIR? Check for SWITCH before RunRarely, as it's more likely to be false? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Doesn't make sense to call for LIR as switches are lowered there. So changed to an assert. |
||
{ | ||
continue; | ||
} | ||
|
||
if (outRatio >= 0.60) | ||
if (!block->bbJumpSwt->bbsHasDominantCase) | ||
{ | ||
continue; | ||
} | ||
|
||
// We currently will only see dominant cases with PGO. | ||
// | ||
assert(block->hasProfileWeight()); | ||
|
||
const unsigned dominantCase = block->bbJumpSwt->bbsDominantCase; | ||
|
||
JITDUMP(FMT_BB " has switch with dominant case %u, considering peeling\n", block->bbNum, dominantCase); | ||
|
||
// The dominant case should not be the default case, as we already peel that one. | ||
// | ||
assert(dominantCase < (block->bbJumpSwt->bbsCount - 1)); | ||
BasicBlock* const dominantTarget = block->bbJumpSwt->bbsDstTab[dominantCase]; | ||
Statement* const switchStmt = block->lastStmt(); | ||
GenTree* const switchTree = switchStmt->GetRootNode(); | ||
assert(switchTree->OperIs(GT_SWITCH)); | ||
GenTree* const switchValue = switchTree->AsOp()->gtGetOp1(); | ||
|
||
// Split the switch block just before at the switch. | ||
// | ||
// After this, newBlock is the switch block, and | ||
// block is the upstream block. | ||
// | ||
BasicBlock* newBlock = nullptr; | ||
|
||
if (block->firstStmt() == switchStmt) | ||
{ | ||
newBlock = fgSplitBlockAtBeginning(block); | ||
} | ||
else | ||
{ | ||
newBlock = fgSplitBlockAfterStatement(block, switchStmt->GetPrevStmt()); | ||
} | ||
|
||
// Set up a compare in the upstream block, "stealing" the switch value tree. | ||
// | ||
GenTree* const dominantCaseCompare = gtNewOperNode(GT_EQ, TYP_INT, switchValue, gtNewIconNode(dominantCase)); | ||
GenTree* const jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, dominantCaseCompare); | ||
Statement* const jmpStmt = fgNewStmtFromTree(jmpTree, switchStmt->GetILOffsetX()); | ||
fgInsertStmtAtEnd(block, jmpStmt); | ||
dominantCaseCompare->gtFlags |= GTF_RELOP_JMP_USED; | ||
|
||
// Reattach switch value to the switch. This may introduce a comma | ||
// in the upstream compare tree, if the switch value expression is complex. | ||
// | ||
switchTree->AsOp()->gtOp1 = fgMakeMultiUse(&dominantCaseCompare->AsOp()->gtOp1); | ||
|
||
// Update flags | ||
// | ||
dominantCaseCompare->gtFlags |= dominantCaseCompare->AsOp()->gtOp1->gtFlags; | ||
jmpTree->gtFlags |= dominantCaseCompare->gtFlags; | ||
dominantCaseCompare->gtFlags |= GTF_RELOP_JMP_USED; | ||
|
||
// Wire up the new control flow. | ||
// | ||
block->bbJumpKind = BBJ_COND; | ||
block->bbJumpDest = dominantTarget; | ||
flowList* const blockToTargetEdge = fgAddRefPred(dominantTarget, block); | ||
flowList* const blockToNewBlockEdge = newBlock->bbPreds; | ||
assert(blockToNewBlockEdge->getBlock() == block); | ||
assert(blockToTargetEdge->getBlock() == block); | ||
|
||
// Update profile data | ||
// | ||
const BasicBlock::weight_t fraction = newBlock->bbJumpSwt->bbsDominantFraction; | ||
const BasicBlock::weight_t blockToTargetWeight = block->bbWeight * fraction; | ||
const BasicBlock::weight_t blockToNewBlockWeight = block->bbWeight - blockToTargetWeight; | ||
|
||
newBlock->setBBProfileWeight(blockToNewBlockWeight); | ||
|
||
blockToTargetEdge->setEdgeWeights(blockToTargetWeight, blockToTargetWeight, dominantTarget); | ||
blockToNewBlockEdge->setEdgeWeights(blockToNewBlockWeight, blockToNewBlockWeight, block); | ||
|
||
// There may be other switch cases that lead to this same block, but there's just | ||
// one edge in the flowgraph. So we need to subtract off the profile data that now flows | ||
// along the peeled edge. | ||
// | ||
for (flowList* pred = dominantTarget->bbPreds; pred != nullptr; pred = pred->flNext) | ||
{ | ||
if (pred->getBlock() == newBlock) | ||
{ | ||
if (pred->flDupCount == 1) | ||
{ | ||
// The only switch case leading to the dominant target was the one we peeled. | ||
// So the edge from the switch now has zero weight. | ||
// | ||
pred->setEdgeWeights(BB_ZERO_WEIGHT, BB_ZERO_WEIGHT, dominantTarget); | ||
} | ||
else | ||
{ | ||
// straighten switch here... | ||
// Other switch cases also lead to the dominant target. | ||
// Subtract off the weight we transferred to the peel. | ||
// | ||
BasicBlock::weight_t newMinWeight = pred->edgeWeightMin() - blockToTargetWeight; | ||
BasicBlock::weight_t newMaxWeight = pred->edgeWeightMax() - blockToTargetWeight; | ||
|
||
if (newMinWeight < BB_ZERO_WEIGHT) | ||
{ | ||
newMinWeight = BB_ZERO_WEIGHT; | ||
} | ||
if (newMaxWeight < BB_ZERO_WEIGHT) | ||
{ | ||
newMaxWeight = BB_ZERO_WEIGHT; | ||
} | ||
pred->setEdgeWeights(newMinWeight, newMaxWeight, dominantTarget); | ||
} | ||
} | ||
while (++jumpTab, --jumpCnt); | ||
} | ||
|
||
// For now we leave the switch as is, since there's no way | ||
// to indicate that one of the cases is now unreachable. | ||
// | ||
// But it no longer has a dominant case. | ||
// | ||
newBlock->bbJumpSwt->bbsHasDominantCase = false; | ||
|
||
modified = true; | ||
} | ||
#endif | ||
|
||
return result; | ||
return modified; | ||
} | ||
|
||
//----------------------------------------------------------------------------- | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Rearranged so that I could refer to
BasicBlock::weight_t
withinBBswtDesc
.