50 #define DEBUG_TYPE "loop-peel" 52 STATISTIC(NumPeeled,
"Number of loops peeled");
56 cl::desc(
"Set the unroll peeling count, for testing purposes"));
60 cl::desc(
"Allows loops to be peeled when the dynamic " 61 "trip count is known to be low."));
66 cl::desc(
"Allows loop nests to be peeled."));
70 cl::desc(
"Max average trip count which will cause loop peeling."));
74 cl::desc(
"Force a peel count regardless of profiling information."));
78 cl::desc(
"Allow peeling of loops with multiple deopt exits."));
141 "Non-loop Phi should not be checked for turning into invariant.");
144 auto I = IterationsToInvariance.
find(Phi);
145 if (
I != IterationsToInvariance.
end())
157 else if (
PHINode *IncPhi = dyn_cast<PHINode>(Input)) {
159 if (IncPhi->getParent() != L->
getHeader())
164 IncPhi, L, BackEdge, IterationsToInvariance);
166 ToInvariance = InputToInvariance + 1u;
171 IterationsToInvariance[Phi] = ToInvariance;
187 unsigned DesiredPeelCount = 0;
189 for (
auto *BB : L.
blocks()) {
190 auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
191 if (!BI || BI->isUnconditional())
198 Value *Condition = BI->getCondition();
199 Value *LeftVal, *RightVal;
216 if (!isa<SCEVAddRecExpr>(LeftSCEV)) {
217 if (isa<SCEVAddRecExpr>(RightSCEV)) {
236 unsigned NewPeelCount = DesiredPeelCount;
249 auto PeelOneMoreIteration = [&IterVal, &NextIterVal, &SE, Step,
251 IterVal = NextIterVal;
252 NextIterVal = SE.getAddExpr(IterVal, Step);
256 auto CanPeelOneMoreIteration = [&NewPeelCount, &MaxPeelCount]() {
257 return NewPeelCount < MaxPeelCount;
260 while (CanPeelOneMoreIteration() &&
261 SE.isKnownPredicate(Pred, IterVal, RightSCEV))
262 PeelOneMoreIteration();
276 !SE.isKnownPredicate(Pred, IterVal, RightSCEV) &&
277 SE.isKnownPredicate(Pred, NextIterVal, RightSCEV)) {
278 if (!CanPeelOneMoreIteration())
280 PeelOneMoreIteration();
283 DesiredPeelCount =
std::max(DesiredPeelCount, NewPeelCount);
286 return DesiredPeelCount;
294 assert(LoopSize > 0 &&
"Zero loop size is not allowed!");
312 <<
" iterations.\n");
322 unsigned AlreadyPeeled = 0;
324 AlreadyPeeled = *Peeled;
335 if (2 * LoopSize <= Threshold && UnrollPeelMaxCount > 0) {
342 unsigned DesiredPeelCount = TargetPeelCount;
344 assert(BackEdge &&
"Loop is not in simplified form?");
345 for (
auto BI = L->
getHeader()->
begin(); isa<PHINode>(&*BI); ++BI) {
346 PHINode *Phi = cast<PHINode>(&*BI);
348 Phi, L, BackEdge, IterationsToInvariance);
350 DesiredPeelCount =
std::max(DesiredPeelCount, ToInvariance);
357 DesiredPeelCount =
std::max(DesiredPeelCount,
360 if (DesiredPeelCount > 0) {
361 DesiredPeelCount =
std::min(DesiredPeelCount, MaxPeelCount);
363 assert(DesiredPeelCount > 0 &&
"Wrong loop size estimation?");
366 <<
" iteration(s) to turn" 367 <<
" some Phis into invariants.\n");
393 LLVM_DEBUG(
dbgs() <<
"Profile-based estimated trip count is " << *PeelCount
398 (LoopSize * (*PeelCount + 1) <=
Threshold)) {
400 <<
" iterations.\n");
404 LLVM_DEBUG(
dbgs() <<
"Requested peel count: " << *PeelCount <<
"\n");
405 LLVM_DEBUG(
dbgs() <<
"Already peel count: " << AlreadyPeeled <<
"\n");
436 uint64_t &FallThroughWeight) {
439 if (!FallThroughWeight)
442 unsigned HeaderIdx = (LatchBR->
getSuccessor(0) == Header ? 0 : 1);
445 HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThroughWeight)
446 : MDB.createBranchWeights(FallThroughWeight, ExitWeight);
447 LatchBR->
setMetadata(LLVMContext::MD_prof, WeightNode);
449 FallThroughWeight > ExitWeight ? FallThroughWeight - ExitWeight : 1;
459 uint64_t &ExitWeight,
460 uint64_t &FallThroughWeight) {
461 uint64_t TrueWeight, FalseWeight;
464 unsigned HeaderIdx = LatchBR->
getSuccessor(0) == Header ? 0 : 1;
465 ExitWeight = HeaderIdx ? TrueWeight : FalseWeight;
466 FallThroughWeight = HeaderIdx ? FalseWeight : TrueWeight;
477 uint64_t FallThroughWeight) {
480 if (!FallThroughWeight)
485 unsigned HeaderIdx = LatchBR->
getSuccessor(0) == Header ? 0 : 1;
487 HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThroughWeight)
488 : MDB.createBranchWeights(FallThroughWeight, ExitWeight);
489 LatchBR->
setMetadata(LLVMContext::MD_prof, WeightNode);
545 for (
Loop *ChildLoop : *L) {
546 cloneLoop(ChildLoop, ParentLoop, VMap, LI,
nullptr);
560 BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
579 PHINode *NewPHI = cast<PHINode>(VMap[&*
I]);
580 if (IterNumber == 0) {
584 Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
585 if (LatchInst && L->contains(LatchInst))
586 VMap[&*
I] = LVMap[LatchInst];
588 VMap[&*
I] = LatchVal;
590 cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);
597 for (
auto Edge : ExitEdges)
598 for (
PHINode &PHI : Edge.second->phis()) {
599 Value *LatchVal = PHI.getIncomingValueForBlock(Edge.first);
600 Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
601 if (LatchInst && L->contains(LatchInst))
602 LatchVal = VMap[LatchVal];
603 PHI.addIncoming(LatchVal, cast<BasicBlock>(VMap[Edge.first]));
609 LVMap[KV.first] = KV.second;
615 Optional<bool> UserAllowProfileBasedPeeling,
bool UnrollingSpecficValues) {
628 if (UnrollingSpecficValues) {
657 bool PreserveLCSSA) {
658 assert(PeelCount > 0 &&
"Attempt to peel out zero iterations?");
659 assert(
canPeel(L) &&
"Attempt to peel a loop which is not peelable?");
691 for (
auto Edge : ExitEdges) {
692 if (ExitIDom.
count(Edge.second))
697 ExitIDom[Edge.second] = BB;
756 NewPreHeader->setName(PreHeader->
getName() +
".peel.newph");
763 cast<BranchInst>(cast<BasicBlock>(Latch)->getTerminator());
764 uint64_t ExitWeight = 0, FallThroughWeight = 0;
768 for (
unsigned Iter = 0; Iter < PeelCount; ++Iter) {
773 LoopBlocks, VMap, LVMap, DT, LI);
784 for (
auto Exit : ExitIDom)
786 cast<BasicBlock>(LVMap[Exit.second]));
787 #ifdef EXPENSIVE_CHECKS 792 auto *LatchBRCopy = cast<BranchInst>(VMap[LatchBR]);
796 LatchBRCopy->setMetadata(LLVMContext::MD_loop,
nullptr);
798 InsertTop = InsertBot;
803 F->getBasicBlockList(),
804 NewBlocks[0]->getIterator(),
F->end());
812 Instruction *LatchInst = dyn_cast<Instruction>(NewVal);
813 if (LatchInst && L->
contains(LatchInst))
814 NewVal = LVMap[LatchInst];
822 unsigned AlreadyPeeled = 0;
824 AlreadyPeeled = *Peeled;
837 simplifyLoop(L, DT, LI, SE, AC,
nullptr, PreserveLCSSA);
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
static const char * PeeledCountMetaData
bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
const SCEV * getConstant(ConstantInt *V)
This class represents lattice values for constants.
static cl::opt< unsigned > UnrollPeelMaxCount("unroll-peel-max-count", cl::init(7), cl::Hidden, cl::desc("Max average trip count which will cause loop peeling."))
static void fixupBranchWeights(BasicBlock *Header, BranchInst *LatchBR, uint64_t ExitWeight, uint64_t FallThroughWeight)
Update the weights of original Latch block after peeling off all iterations.
LLVM_NODISCARD bool empty() const
bool hasDedicatedExits() const
Return true if no exit block for the loop has a predecessor that is outside the loop.
void setIncomingValueForBlock(const BasicBlock *BB, Value *V)
Set every incoming value(s) for block BB to V.
void push_back(const T &Elt)
The main scalar evolution driver.
void getUniqueNonLatchExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all unique successor blocks of this loop except successors from Latch block are not considered...
Loop * cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, LoopInfo *LI, LPPassManager *LPM)
Recursively clone the specified loop and all of its children, mapping the blocks with the specified m...
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
A cache of @llvm.assume calls within a function.
LLVMContext & getContext() const
All values hold a context through their type.
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
Find nearest common dominator basic block for basic block A and B.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
BasicBlock * getSuccessor(unsigned i) const
STATISTIC(NumFunctions, "Total number of functions")
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
void remapInstructionsInBlocks(const SmallVectorImpl< BasicBlock * > &Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
iterator begin()
Instruction iterator methods.
bool match(Val *V, const Pattern &P)
const CallInst * getTerminatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize prior to the terminating return in...
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
const Loop * getLoop() const
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
unsigned getNumSuccessors() const
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
static cl::opt< bool > UnrollAllowLoopNestsPeeling("unroll-allow-loop-nests-peeling", cl::init(false), cl::Hidden, cl::desc("Allows loop nests to be peeled."))
void setName(const Twine &Name)
Change the name of the value.
RPOIterator endRPO() const
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, Optional< bool > UserAllowPeeling, Optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
BlockT * getHeader() const
bool isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
static cl::opt< bool > UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden, cl::desc("Allows loops to be peeled when the dynamic " "trip count is known to be low."))
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
void addStringMetadataToLoop(Loop *TheLoop, const char *MDString, unsigned V=0)
Set input string into loop metadata by keeping other values intact.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
iterator find(const_arg_type_t< KeyT > Val)
initializer< Ty > init(const Ty &Val)
* if(!EatIfPresent(lltok::kw_thread_local)) return false
parseOptionalThreadLocal := /*empty
llvm::Optional< int > getOptionalIntLoopAttribute(Loop *TheLoop, StringRef Name)
Find named metadata for a loop with an integer value.
LLVM Basic Block Representation.
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop.
Conditional or Unconditional Branch instruction.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase * getIDom() const
Value * getIncomingValueForBlock(const BasicBlock *BB) const
bool peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, bool PreserveLCSSA)
Peel off the first PeelCount iterations of loop L.
const SCEV * evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const
Return the value of this chain of recurrences at the specified iteration number.
int getNumOccurrences() const
static cl::opt< unsigned > UnrollPeelCount("unroll-peel-count", cl::Hidden, cl::desc("Set the unroll peeling count, for testing purposes"))
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
std::vector< BasicBlock * >::const_reverse_iterator RPOIterator
Fast - This calling convention attempts to make calls as fast as possible (e.g.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
self_iterator getIterator()
static void initBranchWeights(BasicBlock *Header, BranchInst *LatchBR, uint64_t &ExitWeight, uint64_t &FallThroughWeight)
Initialize the weights.
bool hasProfileData(bool IncludeSynthetic=false) const
Return true if the function is annotated with profile data.
static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE)
BlockT * getUniqueExitBlock() const
If getUniqueExitBlocks would return exactly one block, return that block.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
void getExitEdges(SmallVectorImpl< Edge > &ExitEdges) const
Return all pairs of (inside_block,outside_block).
Optional< unsigned > getLoopEstimatedTripCount(Loop *L, unsigned *EstimatedLoopInvocationWeight=nullptr)
Returns a loop's estimated trip count based on branch weight metadata.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Iterator for intrusive lists based on ilist_node.
Type * getType() const
Return the LLVM type of this SCEV expression.
Align max(MaybeAlign Lhs, Align Rhs)
bool hasNoSelfWrap() const
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
constexpr bool hasValue() const
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Store the result of a depth first search within basic blocks contained by a single loop.
static void updateBranchWeights(BasicBlock *Header, BranchInst *LatchBR, uint64_t ExitWeight, uint64_t &FallThroughWeight)
Update the branch weights of the latch of a peeled-off loop iteration.
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, DebugInfoFinder *DIFinder=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
static cl::opt< unsigned > Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), cl::init(100), cl::Hidden)
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to,...
static unsigned calculateIterationsToInvariance(PHINode *Phi, Loop *L, BasicBlock *BackEdge, SmallDenseMap< PHINode *, unsigned > &IterationsToInvariance)
This class represents an analyzed expression in the program.
Represents a single loop in the control flow graph.
StringRef getName() const
Return a constant reference to the value's name.
const Function * getParent() const
Return the enclosing method, or null if none.
bool isEquality() const
Return true if this predicate is either EQ or NE.
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
LLVM Value Representation.
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
RPOIterator beginRPO() const
Reverse iterate over the cached postorder blocks.
static const unsigned InfiniteIterationsToInvariance
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
static cl::opt< unsigned > UnrollForcePeelCount("unroll-force-peel-count", cl::init(0), cl::Hidden, cl::desc("Force a peel count regardless of profiling information."))
bool extractProfMetadata(uint64_t &TrueVal, uint64_t &FalseVal) const
Retrieve the raw weight values of a conditional branch or select.
Optional< MonotonicPredicateType > getMonotonicPredicateType(const SCEVAddRecExpr *LHS, ICmpInst::Predicate Pred)
If, for all loop invariant X, the predicate "LHS `Pred` X" is monotonically increasing or decreasing,...
iterator_range< block_iterator > blocks() const
void computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::PeelingPreferences &PP, unsigned &TripCount, ScalarEvolution &SE, unsigned Threshold=UINT_MAX)
static cl::opt< bool > UnrollPeelMultiDeoptExit("unroll-peel-multi-deopt-exit", cl::init(true), cl::Hidden, cl::desc("Allow peeling of loops with multiple deopt exits."))
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop, BasicBlock *InsertBot, SmallVectorImpl< std::pair< BasicBlock *, BasicBlock * >> &ExitEdges, SmallVectorImpl< BasicBlock * > &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT, LoopInfo *LI)
Clones the body of the loop L, putting it between InsertTop and InsertBot.
const BasicBlock * getParent() const
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
void forgetTopmostLoop(const Loop *L)