45 #define DEBUG_TYPE "loop-unroll" 48 "Number of loops unrolled with run-time trip counts");
51 cl::desc(
"Allow runtime unrolling for loops with multiple exits, when " 52 "epilog is generated"));
85 assert(Latch &&
"Loop must have a latch");
86 BasicBlock *PrologLatch = cast<BasicBlock>(VMap[Latch]);
94 for (
PHINode &PN : Succ->phis()) {
108 NewPN->
addIncoming(PN.getIncomingValueForBlock(NewPreHeader),
115 Value *V = PN.getIncomingValueForBlock(Latch);
129 PN.setIncomingValueForBlock(NewPreHeader, NewPN);
131 PN.addIncoming(NewPN, PrologExit);
144 nullptr, PreserveLCSSA);
152 assert(Count != 0 &&
"nonsensical Count!");
163 nullptr, PreserveLCSSA);
165 B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader);
189 assert(Latch &&
"Loop must have a latch");
190 BasicBlock *EpilogLatch = cast<BasicBlock>(VMap[Latch]);
219 assert(PN.hasOneUse() &&
"The phi should have 1 use");
220 PHINode *EpilogPN = cast<PHINode>(PN.use_begin()->getUser());
221 assert(EpilogPN->getParent() == Exit &&
"EpilogPN should be in Exit block");
226 Value *V = PN.getIncomingValueForBlock(Latch);
233 EpilogPN->addIncoming(V, EpilogLatch);
235 assert(EpilogPN->getBasicBlockIndex(EpilogPreHeader) >= 0 &&
236 "EpilogPN should have EpilogPreHeader incoming block");
238 EpilogPN->setIncomingBlock(EpilogPN->getBasicBlockIndex(EpilogPreHeader),
254 for (
PHINode &PN : Succ->phis()) {
260 NewPN->
addIncoming(PN.getIncomingValueForBlock(NewPreHeader), PreHeader);
262 NewPN->
addIncoming(PN.getIncomingValueForBlock(Latch), Latch);
266 PHINode *VPN = cast<PHINode>(VMap[&PN]);
273 Value *BrLoopExit =
B.CreateIsNotNull(ModVal,
"lcmp.mod");
274 assert(Exit &&
"Loop must have a single exit block only");
280 B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit);
301 const bool UseEpilogRemainder,
const bool UnrollRemainder,
304 std::vector<BasicBlock *> &NewBlocks,
LoopBlocksDFS &LoopBlocks,
306 StringRef suffix = UseEpilogRemainder ?
"epil" :
"prol";
314 NewLoops[ParentLoop] = ParentLoop;
315 if (!CreateRemainderLoop)
316 NewLoops[L] = ParentLoop;
322 NewBlocks.push_back(NewBB);
327 if (CreateRemainderLoop || LI->
getLoopFor(*BB) != L || ParentLoop)
344 DT->
addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB]));
351 VMap.
erase((*BB)->getTerminator());
352 BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]);
355 if (!CreateRemainderLoop) {
366 Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot);
377 PHINode *NewPHI = cast<PHINode>(VMap[&*
I]);
378 if (!CreateRemainderLoop) {
379 if (UseEpilogRemainder) {
385 cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);
390 BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
398 if (CreateRemainderLoop) {
399 Loop *NewLoop = NewLoops[L];
400 assert(NewLoop &&
"L should have been cloned");
430 bool UseEpilogRemainder) {
444 dbgs() <<
"Bailout for multi-exit handling when latch exit has >1 " 464 bool PreserveLCSSA,
bool UseEpilogRemainder) {
468 UseEpilogRemainder) &&
469 "Should be safe to unroll before checking profitability!");
493 if (ExitingBlocks.
size() > 2)
500 return (OtherExits.
size() == 1 &&
501 OtherExits[0]->getTerminatingDeoptimizeCall());
512 uint64_t UnrollFactor) {
513 uint64_t TrueWeight, FalseWeight;
521 uint64_t BackEdgeWeight = (UnrollFactor - 1) * ExitWeight;
524 auto *RemainderLatchBR = cast<BranchInst>(Latch->
getTerminator());
525 unsigned HeaderIdx = (RemainderLatchBR->getSuccessor(0) == Header ? 0 : 1);
526 MDBuilder MDB(RemainderLatchBR->getContext());
528 HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight)
529 : MDB.createBranchWeights(BackEdgeWeight, ExitWeight);
530 RemainderLatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
573 Loop *L,
unsigned Count,
bool AllowExpensiveTripCount,
574 bool UseEpilogRemainder,
bool UnrollRemainder,
bool ForgetAllSCEV,
579 LLVM_DEBUG(UseEpilogRemainder ?
dbgs() <<
"Using epilog remainder.\n" 580 :
dbgs() <<
"Using prolog remainder.\n");
598 <<
"Loop latch not terminated by a conditional branch.\n");
602 unsigned ExitIndex = LatchBR->
getSuccessor(0) == Header ? 1 : 0;
610 <<
"One of the loop latch successors must be the exit block.\n");
617 bool isMultiExitUnrollingEnabled =
619 UseEpilogRemainder) &&
623 if (!isMultiExitUnrollingEnabled &&
627 <<
"Multiple exit/exiting blocks in loop and multi-exit unrolling not " 643 if (isa<SCEVCouldNotCompute>(BECountSC) ||
652 const SCEV *TripCountSC =
654 if (isa<SCEVCouldNotCompute>(TripCountSC)) {
663 if (!AllowExpensiveTripCount &&
666 LLVM_DEBUG(
dbgs() <<
"High cost for expanding trip count scev!\n");
672 if (
Log2_32(Count) > BEWidth) {
675 <<
"Count failed constraint on overflow trip count calculation.\n");
693 if (UseEpilogRemainder) {
701 nullptr, PreserveLCSSA);
708 EpilogPreHeader =
SplitBlock(NewExit, NewExitTerminator, DT, LI);
713 PrologPreHeader =
SplitEdge(PreHeader, Header, DT, LI);
750 ModVal =
B.CreateAnd(TripCount, Count - 1,
"xtraiter");
762 Value *ModValTmp =
B.CreateURem(BECount,
765 Value *ModValAdd =
B.CreateAdd(ModValTmp,
769 ModVal =
B.CreateURem(ModValAdd,
774 UseEpilogRemainder ?
B.CreateICmpULT(BECount,
777 B.CreateIsNotNull(ModVal,
"lcmp.mod");
778 BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader;
779 BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit;
781 B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop);
784 if (UseEpilogRemainder)
800 std::vector<BasicBlock *> NewBlocks;
805 bool CreateRemainderLoop = (Count != 2);
810 BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit;
811 BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
813 L, ModVal, CreateRemainderLoop, UseEpilogRemainder, UnrollRemainder,
814 InsertTop, InsertBot,
815 NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI);
819 if (remainderLoop && !UnrollRemainder)
824 F->getBasicBlockList(),
825 NewBlocks[0]->getIterator(),
832 for (
auto *BB : OtherExits) {
833 for (
auto &II : *BB) {
838 if (!isa<PHINode>(II))
840 PHINode *Phi = cast<PHINode>(&II);
844 for (
unsigned i =0; i < oldNumOperands; i++){
858 #if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG) 861 [SuccBB](
BasicBlock *EB) {
return EB == SuccBB; }) ||
862 SuccBB == LatchExit) &&
863 "Breaks the definition of dedicated exits!");
880 for (
auto *BB : L->
blocks()) {
881 auto *DomNodeBB = DT->
getNode(BB);
882 for (
auto *DomChild : DomNodeBB->children()) {
883 auto *DomChildBB = DomChild->
getBlock();
888 for (
auto *BB : ChildrenToUpdate)
916 if (UseEpilogRemainder) {
920 EpilogPreHeader, NewPreHeader, VMap, DT, LI,
926 Value *TestVal = B2.CreateSub(TripCount, ModVal,
"unroll_iter");
928 B2.SetInsertPoint(LatchBR);
936 IdxCmp = B2.CreateIsNotNull(IdxSub, NewIdx->
getName() +
".ncmp");
938 IdxCmp = B2.CreateIsNull(IdxSub, NewIdx->
getName() +
".ncmp");
945 ConnectProlog(L, BECount, Count, PrologExit, LatchExit, PreHeader,
946 NewPreHeader, VMap, DT, LI, PreserveLCSSA);
954 #if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG) 962 if (OtherExits.size() > 0) {
973 if (remainderLoop && UnrollRemainder) {
977 { Count - 1, Count - 1,
981 0,
false, ForgetAllSCEV},
982 LI, SE, DT, AC,
TTI,
nullptr, PreserveLCSSA);
986 *ResultLoop = remainderLoop;
987 NumRuntimeUnrolled++;
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
A parsed version of the target data layout string in and methods for querying it.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
const SCEV * getConstant(ConstantInt *V)
This class represents lattice values for constants.
static bool canProfitablyUnrollMultiExitLoop(Loop *L, SmallVectorImpl< BasicBlock * > &OtherExits, BasicBlock *LatchExit, bool PreserveLCSSA, bool UseEpilogRemainder)
Returns true if we can profitably unroll the multi-exit loop L.
bool UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, bool AllowExpensiveTripCount, bool UseEpilogRemainder, bool UnrollRemainder, bool ForgetAllSCEV, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const TargetTransformInfo *TTI, bool PreserveLCSSA, Loop **ResultLoop=nullptr)
Insert code in the prolog/epilog code when unrolling a loop with a run-time trip-count.
void setIncomingValueForBlock(const BasicBlock *BB, Value *V)
Set every incoming value(s) for block BB to V.
void push_back(const T &Elt)
The main scalar evolution driver.
void getUniqueNonLatchExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all unique successor blocks of this loop except successors from Latch block are not considered...
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
A cache of @llvm.assume calls within a function.
static bool canSafelyUnrollMultiExitLoop(Loop *L, BasicBlock *LatchExit, bool PreserveLCSSA, bool UseEpilogRemainder)
Returns true if we can safely unroll a multi-exit/exiting loop.
BasicBlock * getSuccessor(unsigned i) const
STATISTIC(NumFunctions, "Total number of functions")
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
iterator begin()
Instruction iterator methods.
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Value * removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty=true)
Remove an incoming value.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
bool isIntegerTy() const
True if this is an instance of IntegerType.
void setName(const Twine &Name)
Change the name of the value.
RPOIterator endRPO() const
BlockT * getHeader() const
Type * getType() const
All values are typed, get the type of this value.
static Loop * CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop, const bool UseEpilogRemainder, const bool UnrollRemainder, BasicBlock *InsertTop, BasicBlock *InsertBot, BasicBlock *Preheader, std::vector< BasicBlock * > &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI)
Create a clone of the blocks in a loop and connect them together.
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
const char *const LLVMLoopUnrollFollowupRemainder
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Ensure that all exit blocks of the loop are dedicated exits.
static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, BasicBlock *PrologExit, BasicBlock *OriginalLoopLatchExit, BasicBlock *PreHeader, BasicBlock *NewPreHeader, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA)
Connect the unrolling prolog code to the original loop.
The loop was fully unrolled into straight-line code.
initializer< Ty > init(const Ty &Val)
static cl::opt< bool > UnrollRuntimeMultiExit("unroll-runtime-multi-exit", cl::init(false), cl::Hidden, cl::desc("Allow runtime unrolling for loops with multiple exits, when " "epilog is generated"))
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM Basic Block Representation.
Conditional or Unconditional Branch instruction.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase * getIDom() const
Value * getIncomingValueForBlock(const BasicBlock *BB) const
const SCEV * getExitCount(const Loop *L, const BasicBlock *ExitingBlock, ExitCountKind Kind=Exact)
Return the number of times the backedge executes before the given exit would be taken; if not exactly...
void setLoopAlreadyUnrolled()
Add llvm.loop.unroll.disable to this loop's loop id metadata.
int getNumOccurrences() const
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
std::vector< BasicBlock * >::const_reverse_iterator RPOIterator
self_iterator getIterator()
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, BasicBlock *Exit, BasicBlock *PreHeader, BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA)
Connect the unrolling epilog code to the original loop.
const char *const LLVMLoopUnrollFollowupAll
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Iterator for intrusive lists based on ilist_node.
static void updateLatchBranchWeightsForRemainderLoop(Loop *OrigLoop, Loop *RemainderLoop, uint64_t UnrollFactor)
Type * getType() const
Return the LLVM type of this SCEV expression.
void setIncomingBlock(unsigned i, BasicBlock *BB)
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Module.h This file contains the declarations for the Module class.
constexpr const T & getValue() const LLVM_LVALUE_FUNCTION
constexpr bool hasValue() const
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
pred_range predecessors(BasicBlock *BB)
unsigned getNumIncomingValues() const
Return the number of incoming edges.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
const Loop * addClonedBlockToLoopInfo(BasicBlock *OriginalBB, BasicBlock *ClonedBB, LoopInfo *LI, NewLoopsMap &NewLoops)
Adds ClonedBB to LoopInfo, creates a new loop for ClonedBB if necessary and adds a mapping from the o...
Store the result of a depth first search within basic blocks contained by a single loop.
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, DebugInfoFinder *DIFinder=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
This class uses information about analyze scalars to rewrite expressions in canonical form.
LoopUnrollResult UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const llvm::TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, Loop **RemainderLoop=nullptr)
Unroll the given loop by Count.
If this flag is set, the remapper ignores missing function-local entries (Argument,...
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to,...
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
This class represents an analyzed expression in the program.
Represents a single loop in the control flow graph.
cl::opt< unsigned > SCEVCheapExpansionBudget
StringRef getName() const
Return a constant reference to the value's name.
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
const Function * getParent() const
Return the enclosing method, or null if none.
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
bool isUnconditional() const
void setCondition(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
LLVM Value Representation.
succ_range successors(Instruction *I)
RPOIterator beginRPO() const
Reverse iterate over the cached postorder blocks.
The loop was not modified.
StringRef - Represent a constant reference to a string, i.e.
bool isHighCostExpansion(const SCEV *Expr, Loop *L, unsigned Budget, const TargetTransformInfo *TTI, const Instruction *At)
Return true for expressions that can't be evaluated at runtime within given Budget.
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
void setIncomingValue(unsigned i, Value *V)
bool extractProfMetadata(uint64_t &TrueVal, uint64_t &FalseVal) const
Retrieve the raw weight values of a conditional branch or select.
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
iterator_range< block_iterator > blocks() const
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
bool erase(const KeyT &Val)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
void forgetTopmostLoop(const Loop *L)