40#define DEBUG_TYPE "loop-rotate"
43 "Number of loops not rotated due to the header size");
45 "Number of instructions hoisted into loop preheader");
47 "Number of instructions cloned into loop preheader");
52 cl::desc(
"Allow loop rotation multiple times in order to reach "
53 "a better latch exit"));
61 const unsigned MaxHeaderSize;
74 LoopRotate(
unsigned MaxHeaderSize,
LoopInfo *LI,
79 : MaxHeaderSize(MaxHeaderSize), LI(LI),
TTI(
TTI), AC(AC), DT(DT), SE(SE),
80 MSSAU(MSSAU), SQ(SQ), RotationOnly(RotationOnly),
81 IsUtilMode(IsUtilMode), PrepareForLTO(PrepareForLTO) {}
82 bool processLoop(
Loop *L);
85 bool rotateLoop(
Loop *L,
bool SimplifiedLatch);
86 bool simplifyLoopLatch(
Loop *L);
93 bool Inserted = VM.
insert({K, V}).second;
109 PN->removeIncomingValue(PN->getBasicBlockIndex(OrigPreheader));
114 for (
I = OrigHeader->
begin();
I !=
E; ++
I) {
115 Value *OrigHeaderVal = &*
I;
131 SSA.AddAvailableValue(OrigHeader, OrigHeaderVal);
132 SSA.AddAvailableValue(OrigPreheader, OrigPreHeaderVal);
138 Instruction *UserInst = cast<Instruction>(U.getUser());
139 if (!isa<PHINode>(UserInst)) {
144 if (UserBB == OrigHeader)
149 if (UserBB == OrigPreheader) {
150 U = OrigPreHeaderVal;
167 if (UserBB == OrigHeader)
175 if (UserBB == OrigPreheader)
176 NewVal = OrigPreHeaderVal;
177 else if (
SSA.HasValueForBlock(UserBB))
178 NewVal =
SSA.GetValueInMiddleOfBlock(UserBB);
181 DbgValue->replaceVariableLocationOp(OrigHeaderVal, NewVal);
191 BranchInst *BI = dyn_cast<BranchInst>(Header->getTerminator());
194 if (L->contains(HeaderExit))
197 for (
auto &Phi : Header->phis()) {
200 return cast<Instruction>(U)->getParent() != HeaderExit;
216 assert(Latch &&
"need latch");
223 if (L->contains(Exit))
231 L->getUniqueExitBlocks(Exits);
232 if (!Exits.
empty()) {
253 bool HasConditionalPreHeader,
256 if (WeightMD ==
nullptr)
267 if (Weights.
size() != 2)
269 uint32_t OrigLoopExitWeight = Weights[0];
270 uint32_t OrigLoopBackedgeWeight = Weights[1];
273 std::swap(OrigLoopExitWeight, OrigLoopBackedgeWeight);
299 if (HasConditionalPreHeader) {
301 if (OrigLoopBackedgeWeight > OrigLoopExitWeight) {
310 if ((OrigLoopBackedgeWeight & HighBit) != 0 ||
311 (OrigLoopExitWeight & HighBit) != 0)
313 OrigLoopBackedgeWeight <<= 1;
314 OrigLoopExitWeight <<= 1;
319 ExitWeight0 = OrigLoopExitWeight - OrigLoopBackedgeWeight;
322 uint32_t ExitWeight1 = OrigLoopExitWeight - ExitWeight0;
324 uint32_t LoopBackWeight = OrigLoopBackedgeWeight - EnterWeight;
329 SuccsSwapped ? ExitWeight1 : LoopBackWeight);
330 LoopBI.
setMetadata(LLVMContext::MD_prof, LoopWeightMD);
331 if (HasConditionalPreHeader) {
332 MDNode *PreHeaderWeightMD =
334 SuccsSwapped ? ExitWeight0 : EnterWeight);
335 PreHeaderBI.
setMetadata(LLVMContext::MD_prof, PreHeaderWeightMD);
352bool LoopRotate::rotateLoop(
Loop *L,
bool SimplifiedLatch) {
354 if (
L->getBlocks().size() == 1)
357 bool Rotated =
false;
369 if (!
L->isLoopExiting(OrigHeader))
379 if (
L->isLoopExiting(OrigLatch) && !SimplifiedLatch && IsUtilMode ==
false &&
391 Metrics.analyzeBasicBlock(OrigHeader, *
TTI, EphValues, PrepareForLTO);
394 dbgs() <<
"LoopRotation: NOT rotating - contains non-duplicatable"
395 <<
" instructions: ";
400 LLVM_DEBUG(
dbgs() <<
"LoopRotation: NOT rotating - contains convergent "
405 if (!
Metrics.NumInsts.isValid()) {
406 LLVM_DEBUG(
dbgs() <<
"LoopRotation: NOT rotating - contains instructions"
407 " with invalid cost: ";
411 if (
Metrics.NumInsts > MaxHeaderSize) {
414 <<
" instructions, which is more than the threshold ("
415 << MaxHeaderSize <<
" instructions): ";
417 ++NumNotRotatedDueToHeaderSize;
423 if (PrepareForLTO &&
Metrics.NumInlineCandidates > 0)
432 if (!OrigPreheader || !
L->hasDedicatedExits())
441 SE->forgetTopmostLoop(L);
446 SE->forgetBlockAndLoopDispositions();
451 MSSAU->getMemorySSA()->verifyMemorySSA();
458 bool BISuccsSwapped =
L->contains(Exit);
461 assert(NewHeader &&
"Unable to determine new loop header");
462 assert(
L->contains(NewHeader) && !
L->contains(Exit) &&
463 "Unable to determine loop header and exit blocks");
468 "New header doesn't have one pred!");
478 for (;
PHINode *PN = dyn_cast<PHINode>(
I); ++
I)
488 using DbgIntrinsicHash =
489 std::pair<std::pair<hash_code, DILocalVariable *>,
DIExpression *>;
491 auto VarLocOps =
D->location_ops();
498 if (
auto *DII = dyn_cast<DbgVariableIntrinsic>(&
I))
499 DbgIntrinsics.
insert(makeHash(DII));
509 if (
auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&
I))
523 !isa<DbgInfoIntrinsic>(Inst) && !isa<AllocaInst>(Inst)) {
531 C->insertBefore(LoopEntryBranch);
533 ++NumInstrsDuplicated;
540 if (
auto *DII = dyn_cast<DbgVariableIntrinsic>(
C))
541 if (DbgIntrinsics.
count(makeHash(DII))) {
542 C->eraseFromParent();
550 if (V && LI->replacementPreservesLCSSAForm(
C, V)) {
554 if (!
C->mayHaveSideEffects()) {
555 C->eraseFromParent();
565 if (
auto *II = dyn_cast<AssumeInst>(
C))
566 AC->registerAssumption(II);
574 if (!NoAliasDeclInstructions.
empty()) {
599 LLVM_DEBUG(
dbgs() <<
" Cloning llvm.experimental.noalias.scope.decl:"
612 NoAliasDeclScopes.
push_back(NAD->getScopeList());
626 cast<Instruction>(
ValueMap[*NoAliasDeclInstructions.begin()]);
627 auto *LastInst = &OrigPreheader->
back();
642 PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
654 MSSAU->updateForClonedBlockIntoPred(OrigHeader, OrigPreheader,
667 if (!InsertedPHIs.
empty())
671 L->moveToHeader(NewHeader);
672 assert(
L->getHeader() == NewHeader &&
"Latch block is our new header");
679 Updates.
push_back({DominatorTree::Insert, OrigPreheader, Exit});
680 Updates.
push_back({DominatorTree::Insert, OrigPreheader, NewHeader});
681 Updates.
push_back({DominatorTree::Delete, OrigPreheader, OrigHeader});
684 MSSAU->applyUpdates(Updates, *DT,
true);
686 MSSAU->getMemorySSA()->verifyMemorySSA();
688 DT->applyUpdates(Updates);
702 const bool HasConditionalPreHeader =
703 !isa<ConstantInt>(
Cond) ||
708 if (HasConditionalPreHeader) {
716 OrigPreheader, NewHeader,
725 bool SplitLatchEdge =
false;
728 Loop *PredLoop = LI->getLoopFor(ExitPred);
729 if (!PredLoop || PredLoop->
contains(Exit) ||
730 isa<IndirectBrInst>(ExitPred->getTerminator()))
732 SplitLatchEdge |=
L->getLoopLatch() == ExitPred;
739 "Despite splitting all preds, failed to split latch exit?");
740 (void)SplitLatchEdge;
750 if (DT) DT->deleteEdge(OrigPreheader, Exit);
754 MSSAU->removeEdge(OrigPreheader, Exit);
757 assert(
L->getLoopPreheader() &&
"Invalid loop preheader after loop rotation");
758 assert(
L->getLoopLatch() &&
"Invalid loop latch after loop rotation");
761 MSSAU->getMemorySSA()->verifyMemorySSA();
774 MSSAU->getMemorySSA()->verifyMemorySSA();
781 SimplifiedLatch =
false;
799 bool seenIncrement =
false;
800 bool MultiExitLoop =
false;
802 if (!L->getExitingBlock())
803 MultiExitLoop =
true;
810 if (isa<DbgInfoIntrinsic>(
I))
813 switch (
I->getOpcode()) {
816 case Instruction::GetElementPtr:
818 if (!cast<GEPOperator>(
I)->hasAllConstantIndices())
822 case Instruction::Add:
823 case Instruction::Sub:
824 case Instruction::And:
825 case Instruction::Or:
826 case Instruction::Xor:
827 case Instruction::Shl:
828 case Instruction::LShr:
829 case Instruction::AShr: {
831 !isa<Constant>(
I->getOperand(0))
833 : !isa<Constant>(
I->getOperand(1)) ?
I->getOperand(1) :
nullptr;
841 auto *UserInst = cast<Instruction>(UseI);
842 if (!L->contains(UserInst))
849 seenIncrement =
true;
852 case Instruction::Trunc:
853 case Instruction::ZExt:
854 case Instruction::SExt:
870bool LoopRotate::simplifyLoopLatch(
Loop *L) {
880 if (!LastExit || !
L->isLoopExiting(LastExit))
891 << LastExit->
getName() <<
"\n");
899 SE->forgetBlockAndLoopDispositions();
903 MSSAU->getMemorySSA()->verifyMemorySSA();
909bool LoopRotate::processLoop(
Loop *L) {
911 MDNode *LoopMD =
L->getLoopID();
913 bool SimplifiedLatch =
false;
919 SimplifiedLatch = simplifyLoopLatch(L);
921 bool MadeChange = rotateLoop(L, SimplifiedLatch);
922 assert((!MadeChange ||
L->isLoopExiting(
L->getLoopLatch())) &&
923 "Loop latch should be exiting after loop-rotate.");
927 if ((MadeChange || SimplifiedLatch) && LoopMD)
928 L->setLoopID(LoopMD);
930 return MadeChange || SimplifiedLatch;
939 unsigned Threshold =
unsigned(-1),
940 bool IsUtilMode =
true,
bool PrepareForLTO) {
941 LoopRotate LR(Threshold, LI,
TTI, AC, DT, SE, MSSAU, SQ, RotationOnly,
942 IsUtilMode, PrepareForLTO);
943 return LR.processLoop(L);
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
static constexpr uint32_t ZeroTripCountWeights[]
static bool canRotateDeoptimizingLatchExit(Loop *L)
static bool shouldSpeculateInstrs(BasicBlock::iterator Begin, BasicBlock::iterator End, Loop *L)
Determine whether the instructions in this range may be safely and cheaply speculated.
static cl::opt< bool > MultiRotate("loop-rotate-multi", cl::init(false), cl::Hidden, cl::desc("Allow loop rotation multiple times in order to reach " "a better latch exit"))
static bool profitableToRotateLoopExitingLatch(Loop *L)
static void updateBranchWeights(BranchInst &PreHeaderBI, BranchInst &LoopBI, bool HasConditionalPreHeader, bool SuccsSwapped)
static void InsertNewValueIntoMap(ValueToValueMapTy &VM, Value *K, Value *V)
Insert (K, V) pair into the ValueToValueMap, and verify the key did not previously exist in the map,...
static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader, BasicBlock *OrigPreheader, ValueToValueMapTy &ValueMap, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *InsertedPHIs)
RewriteUsesOfClonedInstructions - We just cloned the instructions from the old header into the prehea...
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Class recording the (high level) value of a variable.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
InstListType::iterator iterator
Instruction iterators...
LLVMContext & getContext() const
Get the context in which this basic block lives.
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
const CallInst * getPostdominatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize that is present either in current ...
const Instruction & back() const
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Conditional or Unconditional Branch instruction.
bool isConditional() const
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Value * getCondition() const
This is the common base class for debug info intrinsics for variables.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
const BasicBlock * getParent() const
bool isTerminator() const
bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Represents a single loop in the control flow graph.
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight)
Return metadata containing two branch weights.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Value * getIncomingValueForBlock(const BasicBlock *BB) const
Helper class for SSA formation on a set of values defined in multiple blocks.
The main scalar evolution driver.
void forgetValue(Value *V)
This method should be called by the client when it has changed a value in a way that may effect its v...
Implements a dense probed hash-table based set with some number of buckets stored inline.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
void setName(const Twine &Name)
Change the name of the value.
iterator_range< user_iterator > users()
LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< use_iterator > uses()
StringRef getName() const
Return a constant reference to the value's name.
void dump() const
Support for debugging, callable in GDB: V->dump()
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
@ C
The default llvm calling convention, compatible with C.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
auto successors(const MachineBasicBlock *BB)
MDNode * getBranchWeightMDNode(const Instruction &I)
Get the branch weights metadata node.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
void insertDebugValuesForPHIs(BasicBlock *BB, SmallVectorImpl< PHINode * > &InsertedPHIs)
Propagate dbg.value intrinsics through the newly inserted PHIs.
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void findDbgValues(SmallVectorImpl< DbgValueInst * > &DbgValues, Value *V)
Finds the llvm.dbg.value intrinsics describing a value.
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
bool VerifyMemorySSA
Enables verification of MemorySSA.
bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
void cloneAndAdaptNoAliasScopes(ArrayRef< MDNode * > NoAliasDeclScopes, ArrayRef< BasicBlock * > NewBlocks, LLVMContext &Context, StringRef Ext)
Clone the specified noalias decl scopes.
bool FoldSingleEntryPHINodes(BasicBlock *BB, MemoryDependenceResults *MemDep=nullptr)
We know that BB has one predecessor.
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto predecessors(const MachineBasicBlock *BB)
void extractFromBranchWeightMD(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Faster version of extractBranchWeights() that skips checks and must only be called with "branch_weigh...
bool LoopRotation(Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI, AssumptionCache *AC, DominatorTree *DT, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, const SimplifyQuery &SQ, bool RotationOnly, unsigned Threshold, bool IsUtilMode, bool PrepareForLTO=false)
Convert a loop into a loop with bottom test.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Utility to calculate the size and a few similar metrics for a set of basic blocks.
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Option class for critical edge splitting.