Skip to content

Commit

Permalink
[LV][VPlan] Change to implement VPlan based predication for
Browse files Browse the repository at this point in the history
VPlan-native path

Context: Patch Series #2 for outer loop vectorization support in LV
using VPlan. (RFC:
http://lists.llvm.org/pipermail/llvm-dev/2017-December/119523.html).

Patch series #2 checks that inner loops are still trivially lock-step
among all vector elements. Non-loop branches are blindly assumed as
divergent.

Changes here implement VPlan based predication algorithm to compute
predicates for blocks that need predication. Predicates are computed
for the VPLoop region in reverse post order. A block's predicate is
computed as OR of the masks of all incoming edges. The mask for an
incoming edge is computed as AND of predecessor block's predicate and
either predecessor's Condition bit or NOT(Condition bit) depending on
whether the edge from predecessor block to the current block is true
or false edge.

Reviewers: fhahn, rengolin, hsaito, dcaballe

Reviewed By: fhahn

Patch by Satish Guggilla, thanks!

Differential Revision: https://reviews.llvm.org/D53349

llvm-svn: 351990
  • Loading branch information
hidekisaito committed Jan 23, 2019
1 parent 020ce3f commit 4e4ecae
Show file tree
Hide file tree
Showing 9 changed files with 650 additions and 6 deletions.
1 change: 1 addition & 0 deletions llvm/lib/Transforms/Vectorize/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ add_llvm_library(LLVMVectorize
VPlan.cpp
VPlanHCFGBuilder.cpp
VPlanHCFGTransforms.cpp
VPlanPredicator.cpp
VPlanSLP.cpp
VPlanVerifier.cpp

Expand Down
7 changes: 6 additions & 1 deletion llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ using namespace llvm;
#define LV_NAME "loop-vectorize"
#define DEBUG_TYPE LV_NAME

extern cl::opt<bool> EnableVPlanPredication;

static cl::opt<bool>
EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
cl::desc("Enable if-conversion during vectorization."));
Expand Down Expand Up @@ -487,7 +489,10 @@ bool LoopVectorizationLegality::canVectorizeOuterLoop() {
// Check whether the BranchInst is a supported one. Only unconditional
// branches, conditional branches with an outer loop invariant condition or
// backedges are supported.
if (Br && Br->isConditional() &&
// FIXME: We skip these checks when VPlan predication is enabled as we
// want to allow divergent branches. This whole check will be removed
// once VPlan predication is on by default.
if (!EnableVPlanPredication && Br && Br->isConditional() &&
!TheLoop->isLoopInvariant(Br->getCondition()) &&
!LI->isLoopHeader(Br->getSuccessor(0)) &&
!LI->isLoopHeader(Br->getSuccessor(1))) {
Expand Down
27 changes: 22 additions & 5 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
#include "VPRecipeBuilder.h"
#include "VPlanHCFGBuilder.h"
#include "VPlanHCFGTransforms.h"
#include "VPlanPredicator.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
Expand Down Expand Up @@ -255,6 +256,13 @@ cl::opt<bool> EnableVPlanNativePath(
cl::desc("Enable VPlan-native vectorization path with "
"support for outer loop vectorization."));

// FIXME: Remove this switch once we have divergence analysis. Currently we
// assume divergent non-backedge branches when this switch is true.
cl::opt<bool> EnableVPlanPredication(
"enable-vplan-predication", cl::init(false), cl::Hidden,
cl::desc("Enable VPlan-native vectorization path predicator with "
"support for outer loop vectorization."));

// This flag enables the stress testing of the VPlan H-CFG construction in the
// VPlan-native vectorization path. It must be used in conjuction with
// -enable-vplan-native-path. -vplan-verify-hcfg can also be used to enable the
Expand Down Expand Up @@ -6896,13 +6904,22 @@ LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan);
HCFGBuilder.buildHierarchicalCFG();

for (unsigned VF = Range.Start; VF < Range.End; VF *= 2)
Plan->addVF(VF);

if (EnableVPlanPredication) {
VPlanPredicator VPP(*Plan);
VPP.predicate();

// Avoid running transformation to recipes until masked code generation in
// VPlan-native path is in place.
return Plan;
}

SmallPtrSet<Instruction *, 1> DeadInstructions;
VPlanHCFGTransforms::VPInstructionsToVPRecipes(
Plan, Legal->getInductionVars(), DeadInstructions);

for (unsigned VF = Range.Start; VF < Range.End; VF *= 2)
Plan->addVF(VF);

return Plan;
}

Expand Down Expand Up @@ -7119,8 +7136,8 @@ static bool processLoopInVPlanNativePath(
VectorizationFactor VF = LVP.planInVPlanNativePath(OptForSize, UserVF);

// If we are stress testing VPlan builds, do not attempt to generate vector
// code.
if (VPlanBuildStressTest)
// code. Masked vector code generation support will follow soon.
if (VPlanBuildStressTest || EnableVPlanPredication)
return false;

LVP.setBestPlan(VF.Width, 1);
Expand Down
13 changes: 13 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -560,6 +560,19 @@ void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) {
bumpIndent(1);
OS << Indent << "\"" << DOT::EscapeString(BasicBlock->getName()) << ":\\n\"";
bumpIndent(1);

// Dump the block predicate.
const VPValue *Pred = BasicBlock->getPredicate();
if (Pred) {
OS << " +\n" << Indent << " \"BlockPredicate: ";
if (const VPInstruction *PredI = dyn_cast<VPInstruction>(Pred)) {
PredI->printAsOperand(OS);
OS << " (" << DOT::EscapeString(PredI->getParent()->getName())
<< ")\\l\"";
} else
Pred->printAsOperand(OS);
}

for (const VPRecipeBase &Recipe : *BasicBlock)
Recipe.print(OS, Indent);

Expand Down
53 changes: 53 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,9 @@ class VPBlockBase {
/// Successor selector, null for zero or single successor blocks.
VPValue *CondBit = nullptr;

/// Current block predicate - null if the block does not need a predicate.
VPValue *Predicate = nullptr;

/// Add \p Successor as the last successor to this block.
void appendSuccessor(VPBlockBase *Successor) {
assert(Successor && "Cannot add nullptr successor!");
Expand Down Expand Up @@ -490,6 +493,12 @@ class VPBlockBase {

void setCondBit(VPValue *CV) { CondBit = CV; }

VPValue *getPredicate() { return Predicate; }

const VPValue *getPredicate() const { return Predicate; }

void setPredicate(VPValue *Pred) { Predicate = Pred; }

/// Set a given VPBlockBase \p Successor as the single successor of this
/// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
/// This VPBlockBase must have no successors.
Expand Down Expand Up @@ -520,6 +529,15 @@ class VPBlockBase {
appendPredecessor(Pred);
}

/// Remove all the predecessor of this block.
void clearPredecessors() { Predecessors.clear(); }

/// Remove all the successors of this block and set to null its condition bit
void clearSuccessors() {
Successors.clear();
CondBit = nullptr;
}

/// The method which generates the output IR that correspond to this
/// VPBlockBase, thereby "executing" the VPlan.
virtual void execute(struct VPTransformState *State) = 0;
Expand Down Expand Up @@ -1490,6 +1508,41 @@ class VPBlockUtils {
From->removeSuccessor(To);
To->removePredecessor(From);
}

/// Returns true if the edge \p FromBlock -> \p ToBlock is a back-edge.
static bool isBackEdge(const VPBlockBase *FromBlock,
const VPBlockBase *ToBlock, const VPLoopInfo *VPLI) {
assert(FromBlock->getParent() == ToBlock->getParent() &&
FromBlock->getParent() && "Must be in same region");
const VPLoop *FromLoop = VPLI->getLoopFor(FromBlock);
const VPLoop *ToLoop = VPLI->getLoopFor(ToBlock);
if (!FromLoop || !ToLoop || FromLoop != ToLoop)
return false;

// A back-edge is a branch from the loop latch to its header.
return ToLoop->isLoopLatch(FromBlock) && ToBlock == ToLoop->getHeader();
}

/// Returns true if \p Block is a loop latch
static bool blockIsLoopLatch(const VPBlockBase *Block,
const VPLoopInfo *VPLInfo) {
if (const VPLoop *ParentVPL = VPLInfo->getLoopFor(Block))
return ParentVPL->isLoopLatch(Block);

return false;
}

/// Count and return the number of succesors of \p PredBlock excluding any
/// backedges.
static unsigned countSuccessorsNoBE(VPBlockBase *PredBlock,
VPLoopInfo *VPLI) {
unsigned Count = 0;
for (VPBlockBase *SuccBlock : PredBlock->getSuccessors()) {
if (!VPBlockUtils::isBackEdge(PredBlock, SuccBlock, VPLI))
Count++;
}
return Count;
}
};

class VPInterleavedAccessInfo {
Expand Down
Loading

0 comments on commit 4e4ecae

Please sign in to comment.