43 #define DEBUG_TYPE "loop-unroll"
46 STATISTIC(NumCompletelyUnrolled,
"Number of loops completely unrolled");
47 STATISTIC(NumUnrolled,
"Number of loops unrolled (completely or otherwise)");
51 cl::desc(
"Allow runtime unrolled loops to be unrolled "
52 "with epilog instead of prolog."));
65 if (
PHINode *PN = dyn_cast<PHINode>(I)) {
66 for (
unsigned i = 0, e = PN->getNumIncomingValues();
i != e; ++
i) {
69 PN->setIncomingBlock(
i, cast<BasicBlock>(It->
second));
90 if (!OnlyPred)
return nullptr;
95 DEBUG(
dbgs() <<
"Merging: " << *BB <<
"into: " << *OnlyPred);
122 for (
auto *DI : Children)
131 if (ForgottenLoops.
insert(
L).second)
161 for (
Use &U :
I.operands()) {
162 if (
auto Def = dyn_cast<Instruction>(U)) {
184 assert(OldLoop &&
"Should (at least) be in the loop being unrolled!");
186 Loop *&NewLoop = NewLoops[OldLoop];
190 "Header should be first in RPO");
192 NewLoop =
new Loop();
200 NewLoop->addBasicBlockToLoop(ClonedBB, *LI);
246 bool AllowRuntime,
bool AllowExpensiveTripCount,
247 bool PreserveCondBr,
bool PreserveOnlyFirst,
248 unsigned TripMultiple,
unsigned PeelCount,
LoopInfo *LI,
251 bool PreserveLCSSA) {
255 DEBUG(
dbgs() <<
" Can't unroll; loop preheader-insertion failed.\n");
261 DEBUG(
dbgs() <<
" Can't unroll; loop exit-block-insertion failed.\n");
267 DEBUG(
dbgs() <<
" Can't unroll; Loop body cannot be cloned.\n");
277 " Can't unroll; loop not terminated by a conditional branch.\n");
284 " Won't unroll loop: address of header block is taken.\n");
289 DEBUG(
dbgs() <<
" Trip Count = " << TripCount <<
"\n");
290 if (TripMultiple != 1)
291 DEBUG(
dbgs() <<
" Trip Multiple = " << TripMultiple <<
"\n");
295 if (TripCount != 0 && Count > TripCount)
299 if (TripCount == 0 && Count < 2 && PeelCount == 0)
304 assert(TripCount == 0 || TripCount % TripMultiple == 0);
307 bool CompletelyUnroll = Count == TripCount;
310 std::vector<BasicBlock*> OriginalLoopBlocks = L->
getBlocks();
318 bool NeedToFixLCSSA = PreserveLCSSA && CompletelyUnroll &&
320 return isa<PHINode>(BB->
begin());
326 bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime);
328 assert((!RuntimeTripCount || !PeelCount) &&
329 "Did not expect runtime trip-count unrolling "
330 "and peeling for the same loop");
333 peelLoop(L, PeelCount, LI, SE, DT, PreserveLCSSA);
339 bool HasConvergent =
false;
340 for (
auto &BB : L->
blocks())
343 HasConvergent |= CS.isConvergent();
344 assert((!HasConvergent || TripMultiple % Count == 0) &&
345 "Unroll count must divide trip multiple if loop contains a "
346 "convergent operation.");
349 if (RuntimeTripCount && TripMultiple % Count != 0 &&
354 RuntimeTripCount =
false;
365 unsigned BreakoutTrip = 0;
366 if (TripCount != 0) {
367 BreakoutTrip = TripCount % Count;
371 BreakoutTrip = TripMultiple =
377 if (CompletelyUnroll) {
379 <<
" with trip count " << TripCount <<
"!\n");
382 <<
"completely unrolled loop with "
383 <<
NV(
"UnrollCount", TripCount) <<
" iterations");
384 }
else if (PeelCount) {
386 <<
" with iteration count " << PeelCount <<
"!\n");
389 <<
" peeled loop by " <<
NV(
"PeelCount", PeelCount)
394 Diag <<
"unrolled loop by a factor of " <<
NV(
"UnrollCount", Count);
398 if (TripMultiple == 0 || BreakoutTrip != TripMultiple) {
399 DEBUG(
dbgs() <<
" with a breakout at trip " << BreakoutTrip);
400 ORE->
emit(Diag <<
" with a breakout at trip "
401 <<
NV(
"BreakoutTrip", BreakoutTrip));
402 }
else if (TripMultiple != 1) {
403 DEBUG(
dbgs() <<
" with " << TripMultiple <<
" trips per branch");
404 ORE->
emit(Diag <<
" with " <<
NV(
"TripMultiple", TripMultiple)
405 <<
" trips per branch");
406 }
else if (RuntimeTripCount) {
407 DEBUG(
dbgs() <<
" with run-time trip count");
408 ORE->
emit(Diag <<
" with run-time trip count");
419 std::vector<PHINode*> OrigPHINode;
421 OrigPHINode.push_back(cast<PHINode>(
I));
424 std::vector<BasicBlock*> Headers;
425 std::vector<BasicBlock*> Latches;
426 Headers.push_back(Header);
427 Latches.push_back(LatchBlock);
439 std::vector<BasicBlock*> UnrolledLoopBlocks = L->
getBlocks();
446 for (
Loop *SubLoop : *L)
447 LoopsToSimplify.
insert(SubLoop);
449 for (
unsigned It = 1; It != Count; ++It) {
450 std::vector<BasicBlock*> NewBlocks;
462 L->addBasicBlockToLoop(New, *LI);
466 LoopsToSimplify.insert(NewLoops[OldLoop]);
477 for (
PHINode *OrigPHI : OrigPHINode) {
478 PHINode *NewPHI = cast<PHINode>(VMap[OrigPHI]);
480 if (
Instruction *InValI = dyn_cast<Instruction>(InVal))
481 if (It > 1 && L->contains(InValI))
482 InVal = LastValueMap[InValI];
483 VMap[OrigPHI] = InVal;
488 LastValueMap[*BB] = New;
491 LastValueMap[
VI->first] =
VI->second;
495 if (L->contains(Succ))
501 if (It != LastValueMap.end())
509 Headers.push_back(New);
510 if (*BB == LatchBlock)
511 Latches.push_back(New);
513 NewBlocks.push_back(New);
514 UnrolledLoopBlocks.push_back(New);
524 auto BBDomNode = DT->
getNode(*BB);
525 auto BBIDom = BBDomNode->getIDom();
526 BasicBlock *OriginalBBIDom = BBIDom->getBlock();
528 New, cast<BasicBlock>(LastValueMap[cast<Value>(OriginalBBIDom)]));
537 if (
auto *II = dyn_cast<IntrinsicInst>(&
I))
538 if (II->getIntrinsicID() == Intrinsic::assume)
545 for (
PHINode *PN : OrigPHINode) {
546 if (CompletelyUnroll) {
547 PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader));
550 else if (Count > 1) {
551 Value *InVal = PN->removeIncomingValue(LatchBlock,
false);
554 if (
Instruction *InValI = dyn_cast<Instruction>(InVal)) {
555 if (L->contains(InValI))
556 InVal = LastValueMap[InVal];
558 assert(Latches.back() == LastValueMap[LatchBlock] &&
"bad last latch");
559 PN->addIncoming(InVal, Latches.back());
565 for (
unsigned i = 0, e = Latches.size();
i != e; ++
i) {
567 BranchInst *Term = cast<BranchInst>(Latches[
i]->getTerminator());
570 unsigned j = (
i + 1) % e;
572 bool NeedConditional =
true;
574 if (RuntimeTripCount && j != 0) {
575 NeedConditional =
false;
580 if (CompletelyUnroll) {
587 "NeedCondition cannot be modified by both complete "
588 "unrolling and runtime unrolling");
589 NeedConditional = (PreserveCondBr && j && !(PreserveOnlyFirst &&
i != 0));
590 }
else if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) {
593 NeedConditional =
false;
596 if (NeedConditional) {
602 if (Dest != LoopExit) {
605 if (Succ == Headers[
i])
626 if (DT && Count > 1) {
627 for (
auto *BB : OriginalLoopBlocks) {
628 auto *BBDomNode = DT->
getNode(BB);
630 for (
auto *ChildDomNode : BBDomNode->getChildren()) {
631 auto *ChildBB = ChildDomNode->getBlock();
632 if (!L->contains(ChildBB))
636 for (
auto *ChildBB : ChildrenToUpdate)
644 BranchInst *Term = cast<BranchInst>(Latch->getTerminator());
650 std::replace(Latches.begin(), Latches.end(), Dest, Fold);
651 UnrolledLoopBlocks.erase(
std::remove(UnrolledLoopBlocks.begin(),
652 UnrolledLoopBlocks.end(), Dest),
653 UnrolledLoopBlocks.end());
660 if (DT && !CompletelyUnroll)
666 if (SE && !CompletelyUnroll && Count > 1) {
672 while (!DeadInsts.
empty())
674 dyn_cast_or_null<Instruction>(&*DeadInsts.
pop_back_val()))
682 const std::vector<BasicBlock*> &NewLoopBlocks = L->getBlocks();
700 NumCompletelyUnrolled += CompletelyUnroll;
705 if (CompletelyUnroll)
716 if (PreserveLCSSA && OuterL && CompletelyUnroll && !NeedToFixLCSSA)
724 if (!OuterL && !CompletelyUnroll)
738 if (!OuterL->contains(LatchLoop))
739 while (OuterL->getParentLoop() != LatchLoop)
740 OuterL = OuterL->getParentLoop();
745 assert(OuterL->isLCSSAForm(*DT) &&
746 "Loops should be in LCSSA form after loop-unroll.");
749 for (
Loop *SubLoop : LoopsToSimplify)
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void push_back(const T &Elt)
A parsed version of the target data layout string in and methods for querying it. ...
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
DiagnosticInfoOptimizationBase::Argument NV
void removeBlock(BlockT *BB)
This method completely removes BB from all data structures, including all of the Loop objects it is n...
uint64_t GreatestCommonDivisor64(uint64_t A, uint64_t B)
GreatestCommonDivisor64 - Return the greatest common divisor of the two values using Euclid's algorit...
STATISTIC(NumFunctions,"Total number of functions")
iterator erase(iterator where)
static void remapInstruction(Instruction *I, ValueToValueMapTy &VMap)
Convert the instruction operands from referencing the current values into those specified by VMap...
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
std::error_code remove(const Twine &path, bool IgnoreNonExisting=true)
Remove path.
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
unsigned getNumOperands() const
unsigned getNumOperands() const
Return number of MDNode operands.
static bool needToInsertPhisForLCSSA(Loop *L, std::vector< BasicBlock * > Blocks, LoopInfo *LI)
Check if unrolling created a situation where we need to insert phi nodes to preserve LCSSA form...
The main scalar evolution driver.
A cache of .assume calls within a function.
LoopT * getParentLoop() const
const Function * getParent() const
Return the enclosing method, or null if none.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
const std::vector< BlockT * > & getBlocks() const
Get a list of the basic blocks which make up this loop.
BlockT * getHeader() const
bool UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, bool AllowRuntime, bool AllowExpensiveTripCount, bool PreserveCondBr, bool PreserveOnlyFirst, unsigned TripMultiple, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA)
Unroll the given loop by Count.
StringRef getName() const
Return a constant reference to the value's name.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
iterator begin()
Instruction iterator methods.
bool formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
bool isUnconditional() const
Value * removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty=true)
Remove an incoming value.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr it the function does no...
A Use represents the edge between a Value definition and its users.
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool equals(StringRef RHS) const
equals - Check for string equality, this is more efficient than compare() when the relative ordering ...
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches, switches, etc.
void setName(const Twine &Name)
Change the name of the value.
LLVM_NODISCARD bool empty() const
void getExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all of the successor blocks of this loop.
bool insert(const value_type &X)
Insert a new element into the SetVector.
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
BasicBlock * getSuccessor(unsigned i) const
iterator find(const KeyT &Val)
Base class for the actual dominator tree node.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
iterator_range< block_iterator > blocks() const
unsigned getNumSuccessors() const
Return the number of successors that this terminator has.
initializer< Ty > init(const Ty &Val)
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
LLVM Basic Block Representation.
void eraseNode(NodeT *BB)
eraseNode - Removes a node from the dominator tree.
Conditional or Unconditional Branch instruction.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
void splice(iterator where, iplist_impl &L2)
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
bool any_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
const InstListType & getInstList() const
Return the underlying instruction list container.
bool isSafeToClone() const
Return true if the loop body is safe to clone in practice.
std::vector< BasicBlock * >::const_reverse_iterator RPOIterator
for(unsigned i=0, e=MI->getNumOperands();i!=e;++i)
Value * getOperand(unsigned i) const
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B)
findNearestCommonDominator - Find nearest common dominator basic block for basic block A and B...
void markAsRemoved(Loop *L)
Update LoopInfo after removing the last backedge from a loop.
void verifyDomTree() const
Verify the correctness of the domtree by re-computing it.
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr)
If the specified value is a trivially dead instruction, delete it.
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
StringRef getString() const
const MDOperand & getOperand(unsigned I) const
A SetVector that performs no allocations if smaller than a certain size.
Iterator for intrusive lists based on ilist_node.
const BasicBlockListType & getBasicBlockList() const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
LLVM_NODISCARD T pop_back_val()
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
void addTopLevelLoop(LoopT *New)
This adds the specified loop to the collection of top-level loops.
void setOperand(unsigned i, Value *Val)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
const Loop * addClonedBlockToLoopInfo(BasicBlock *OriginalBB, BasicBlock *ClonedBB, LoopInfo *LI, NewLoopsMap &NewLoops)
Adds ClonedBB to LoopInfo, creates a new loop for ClonedBB if necessary and adds a mapping from the o...
Store the result of a depth first search within basic blocks contained by a single loop...
void push_back(pointer val)
bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT, LoopInfo *LI, SmallVectorImpl< WeakVH > &Dead)
SimplifyLoopIVs - Simplify users of induction variables within this loop.
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getSinglePredecessor()
Return the predecessor of this block if it has a single predecessor block.
bool peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, bool PreserveLCSSA)
Peel off the first PeelCount iterations of loop L.
void FoldSingleEntryPHINodes(BasicBlock *BB, MemoryDependenceResults *MemDep=nullptr)
We know that BB has one predecessor.
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool empty() const
empty - Check if the string is empty.
MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
bool UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, bool AllowExpensiveTripCount, bool UseEpilogRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, bool PreserveLCSSA)
Insert code in the prolog/epilog code when unrolling a loop with a run-time trip-count.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
void registerAssumption(CallInst *CI)
Add an .assume intrinsic to this function's cache.
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
static cl::opt< bool > UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(false), cl::Hidden, cl::desc("Allow runtime unrolled loops to be unrolled ""with epilog instead of prolog."))
Represents a single loop in the control flow graph.
SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
TerminatorInst * getTerminator()
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
RPOIterator beginRPO() const
Reverse iterate over the cached postorder blocks.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction has no side ef...
LLVM Value Representation.
succ_range successors(BasicBlock *BB)
void recalculate(FT &F)
recalculate - compute a dominator tree for the given function
StringRef - Represent a constant reference to a string, i.e.
static BasicBlock * foldBlockIntoPredecessor(BasicBlock *BB, LoopInfo *LI, ScalarEvolution *SE, SmallPtrSetImpl< Loop * > &ForgottenLoops, DominatorTree *DT)
Folds a basic block into its predecessor if it only has one predecessor, and that predecessor only ha...
Value * SimplifyInstruction(Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const DominatorTree *DT=nullptr, AssumptionCache *AC=nullptr)
See if we can compute a simplified version of this instruction.
bool replacementPreservesLCSSAForm(Instruction *From, Value *To)
Returns true if replacing From with To everywhere is guaranteed to preserve LCSSA form...
DomTreeNodeBase< NodeT > * getNode(NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr)
CloneBasicBlock - Return a copy of the specified basic block, but without embedding the block into a ...
RPOIterator endRPO() const