Go to the documentation of this file.
38 #define DEBUG_TYPE "loop-rotate"
41 "Number of loops not rotated due to the header size");
43 "Number of instructions hoisted into loop preheader");
45 "Number of instructions cloned into loop preheader");
46 STATISTIC(NumRotated,
"Number of loops rotated");
50 cl::desc(
"Allow loop rotation multiple times in order to reach "
51 "a better latch exit"));
56 const unsigned MaxHeaderSize;
69 LoopRotate(
unsigned MaxHeaderSize,
LoopInfo *LI,
74 : MaxHeaderSize(MaxHeaderSize), LI(LI),
TTI(
TTI), AC(AC), DT(DT), SE(SE),
75 MSSAU(MSSAU), SQ(SQ), RotationOnly(RotationOnly),
76 IsUtilMode(IsUtilMode), PrepareForLTO(PrepareForLTO) {}
77 bool processLoop(
Loop *L);
80 bool rotateLoop(
Loop *L,
bool SimplifiedLatch);
81 bool simplifyLoopLatch(
Loop *L);
88 bool Inserted = VM.
insert({K, V}).second;
104 PN->removeIncomingValue(PN->getBasicBlockIndex(OrigPreheader));
109 for (
I = OrigHeader->
begin();
I !=
E; ++
I) {
110 Value *OrigHeaderVal = &*
I;
126 SSA.AddAvailableValue(OrigHeader, OrigHeaderVal);
127 SSA.AddAvailableValue(OrigPreheader, OrigPreHeaderVal);
133 Instruction *UserInst = cast<Instruction>(U.getUser());
134 if (!isa<PHINode>(UserInst)) {
139 if (UserBB == OrigHeader)
144 if (UserBB == OrigPreheader) {
145 U = OrigPreHeaderVal;
162 if (UserBB == OrigHeader)
170 if (UserBB == OrigPreheader)
171 NewVal = OrigPreHeaderVal;
172 else if (
SSA.HasValueForBlock(UserBB))
173 NewVal =
SSA.GetValueInMiddleOfBlock(UserBB);
176 DbgValue->replaceVariableLocationOp(OrigHeaderVal, NewVal);
192 for (
auto &Phi : Header->
phis()) {
195 return cast<Instruction>(U)->getParent() != HeaderExit;
211 assert(Latch &&
"need latch");
227 if (!Exits.empty()) {
241 return !
BB->getPostdominatingDeoptimizeCall();
260 bool LoopRotate::rotateLoop(
Loop *L,
bool SimplifiedLatch) {
265 bool Rotated =
false;
287 if (L->
isLoopExiting(OrigLatch) && !SimplifiedLatch && IsUtilMode ==
false &&
299 Metrics.analyzeBasicBlock(OrigHeader, *
TTI, EphValues, PrepareForLTO);
302 dbgs() <<
"LoopRotation: NOT rotating - contains non-duplicatable"
303 <<
" instructions: ";
308 LLVM_DEBUG(
dbgs() <<
"LoopRotation: NOT rotating - contains convergent "
313 if (
Metrics.NumInsts > MaxHeaderSize) {
316 <<
" instructions, which is more than the threshold ("
317 << MaxHeaderSize <<
" instructions): ";
319 ++NumNotRotatedDueToHeaderSize;
325 if (PrepareForLTO &&
Metrics.NumInlineCandidates > 0)
343 SE->forgetTopmostLoop(L);
347 MSSAU->getMemorySSA()->verifyMemorySSA();
356 assert(NewHeader &&
"Unable to determine new loop header");
358 "Unable to determine loop header and exit blocks");
363 "New header doesn't have one pred!");
373 for (;
PHINode *PN = dyn_cast<PHINode>(
I); ++
I)
383 using DbgIntrinsicHash =
384 std::pair<std::pair<hash_code, DILocalVariable *>,
DIExpression *>;
386 auto VarLocOps =
D->location_ops();
393 if (
auto *DII = dyn_cast<DbgVariableIntrinsic>(&
I))
394 DbgIntrinsics.
insert(makeHash(DII));
404 if (
auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&
I))
405 NoAliasDeclInstructions.push_back(Decl);
418 !isa<DbgInfoIntrinsic>(Inst) && !isa<AllocaInst>(Inst)) {
426 ++NumInstrsDuplicated;
433 if (
auto *DII = dyn_cast<DbgVariableIntrinsic>(
C))
434 if (DbgIntrinsics.
count(makeHash(DII))) {
443 if (V && LI->replacementPreservesLCSSAForm(
C, V)) {
447 if (!
C->mayHaveSideEffects()) {
457 C->insertBefore(LoopEntryBranch);
459 if (
auto *II = dyn_cast<AssumeInst>(
C))
460 AC->registerAssumption(II);
468 if (!NoAliasDeclInstructions.empty()) {
493 LLVM_DEBUG(
dbgs() <<
" Cloning llvm.experimental.noalias.scope.decl:"
506 NoAliasDeclScopes.push_back(NAD->getScopeList());
520 cast<Instruction>(
ValueMap[*NoAliasDeclInstructions.begin()]);
521 auto *LastInst = &OrigPreheader->
back();
536 PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
548 MSSAU->updateForClonedBlockIntoPred(OrigHeader, OrigPreheader,
561 if (!InsertedPHIs.empty())
566 assert(L->
getHeader() == NewHeader &&
"Latch block is our new header");
578 MSSAU->applyUpdates(Updates, *DT,
true);
580 MSSAU->getMemorySSA()->verifyMemorySSA();
582 DT->applyUpdates(Updates);
605 OrigPreheader, NewHeader,
614 bool SplitLatchEdge =
false;
617 Loop *PredLoop = LI->getLoopFor(ExitPred);
618 if (!PredLoop || PredLoop->
contains(Exit) ||
619 ExitPred->getTerminator()->isIndirectTerminator())
628 "Despite splitting all preds, failed to split latch exit?");
629 (void)SplitLatchEdge;
639 if (DT) DT->deleteEdge(OrigPreheader, Exit);
643 MSSAU->removeEdge(OrigPreheader, Exit);
650 MSSAU->getMemorySSA()->verifyMemorySSA();
663 MSSAU->getMemorySSA()->verifyMemorySSA();
670 SimplifiedLatch =
false;
688 bool seenIncrement =
false;
689 bool MultiExitLoop =
false;
692 MultiExitLoop =
true;
699 if (isa<DbgInfoIntrinsic>(
I))
702 switch (
I->getOpcode()) {
705 case Instruction::GetElementPtr:
707 if (!cast<GEPOperator>(
I)->hasAllConstantIndices())
712 case Instruction::Sub:
713 case Instruction::And:
714 case Instruction::Or:
715 case Instruction::Xor:
716 case Instruction::Shl:
717 case Instruction::LShr:
718 case Instruction::AShr: {
720 !isa<Constant>(
I->getOperand(0))
722 : !isa<Constant>(
I->getOperand(1)) ?
I->getOperand(1) :
nullptr;
730 auto *UserInst = cast<Instruction>(UseI);
738 seenIncrement =
true;
741 case Instruction::Trunc:
742 case Instruction::ZExt:
743 case Instruction::SExt:
759 bool LoopRotate::simplifyLoopLatch(
Loop *L) {
780 << LastExit->
getName() <<
"\n");
787 MSSAU->getMemorySSA()->verifyMemorySSA();
793 bool LoopRotate::processLoop(
Loop *L) {
797 bool SimplifiedLatch =
false;
803 SimplifiedLatch = simplifyLoopLatch(L);
805 bool MadeChange = rotateLoop(L, SimplifiedLatch);
807 "Loop latch should be exiting after loop-rotate.");
811 if ((MadeChange || SimplifiedLatch) && LoopMD)
814 return MadeChange || SimplifiedLatch;
823 unsigned Threshold =
unsigned(-1),
824 bool IsUtilMode =
true,
bool PrepareForLTO) {
825 LoopRotate LR(Threshold, LI,
TTI, AC, DT, SE, MSSAU, SQ, RotationOnly,
826 IsUtilMode, PrepareForLTO);
827 return LR.processLoop(L);
bool isTerminator() const
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
InstListType::iterator iterator
Instruction iterators...
bool isSafeToSpeculativelyExecute(const Value *V, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
static bool shouldSpeculateInstrs(BasicBlock::iterator Begin, BasicBlock::iterator End, Loop *L)
Determine whether the instructions in this range may be safely and cheaply speculated.
Represents a single loop in the control flow graph.
void dump() const
Support for debugging, callable in GDB: V->dump()
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Utility to calculate the size and a few similar metrics for a set of basic blocks.
Implements a dense probed hash-table based set with some number of buckets stored inline.
The main scalar evolution driver.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
static bool profitableToRotateLoopExitingLatch(Loop *L)
BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
static constexpr UpdateKind Insert
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
auto successors(MachineBasicBlock *BB)
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
static cl::opt< bool > MultiRotate("loop-rotate-multi", cl::init(false), cl::Hidden, cl::desc("Allow loop rotation multiple times in order to reach " "a better latch exit"))
LLVM Basic Block Representation.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
(vector float) vec_cmpeq(*A, *B) C
iterator begin()
Instruction iterator methods.
Option class for critical edge splitting.
iterator_range< use_iterator > uses()
Value * getIncomingValueForBlock(const BasicBlock *BB) const
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Value * SimplifyInstruction(Instruction *I, const SimplifyQuery &Q, OptimizationRemarkEmitter *ORE=nullptr)
See if we can compute a simplified version of this instruction.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
STATISTIC(NumFunctions, "Total number of functions")
auto predecessors(MachineBasicBlock *BB)
void setName(const Twine &Name)
Change the name of the value.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
const CallInst * getPostdominatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize that is present either in current ...
Value * getCondition() const
void cloneAndAdaptNoAliasScopes(ArrayRef< MDNode * > NoAliasDeclScopes, ArrayRef< BasicBlock * > NewBlocks, LLVMContext &Context, StringRef Ext)
Clone the specified noalias decl scopes.
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
This is the common base class for debug info intrinsics for variables.
Class recording the (high level) value of a variable.
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void moveToHeader(BlockT *BB)
This method is used to move BB (which must be part of this loop) to be the loop header of the loop (t...
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
void getUniqueExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all unique successor blocks of this loop.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
initializer< Ty > init(const Ty &Val)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader, BasicBlock *OrigPreheader, ValueToValueMapTy &ValueMap, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *InsertedPHIs)
RewriteUsesOfClonedInstructions - We just cloned the instructions from the old header into the prehea...
void insertDebugValuesForPHIs(BasicBlock *BB, SmallVectorImpl< PHINode * > &InsertedPHIs)
Propagate dbg.value intrinsics through the newly inserted PHIs.
void findDbgValues(SmallVectorImpl< DbgValueInst * > &DbgValues, Value *V)
Finds the llvm.dbg.value intrinsics describing a value.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
void forgetValue(Value *V)
This method should be called by the client when it has changed a value in a way that may effect its v...
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
bool hasDedicatedExits() const
Return true if no exit block for the loop has a predecessor that is outside the loop.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
static bool canRotateDeoptimizingLatchExit(Loop *L)
bool isUnconditional() const
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false)
Attempts to merge a block into its predecessor, if possible.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
A cache of @llvm.assume calls within a function.
Type * getType() const
All values are typed, get the type of this value.
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
self_iterator getIterator()
bool mayReadFromMemory() const
Return true if this instruction may read memory.
bool hasLoopInvariantOperands(const Instruction *I) const
Return true if all the operands of the specified instruction are loop invariant.
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
StringRef getName() const
Return a constant reference to the value's name.
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
LLVMContext & getContext() const
Get the context in which this basic block lives.
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
bool LoopRotation(Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI, AssumptionCache *AC, DominatorTree *DT, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, const SimplifyQuery &SQ, bool RotationOnly, unsigned Threshold, bool IsUtilMode, bool PrepareForLTO=false)
Convert a loop into a loop with bottom test.
BlockT * getHeader() const
bool FoldSingleEntryPHINodes(BasicBlock *BB, MemoryDependenceResults *MemDep=nullptr)
We know that BB has one predecessor.
const Instruction & back() const
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
const BasicBlock * getParent() const
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
static void InsertNewValueIntoMap(ValueToValueMapTy &VM, Value *K, Value *V)
Insert (K, V) pair into the ValueToValueMap, and verify the key did not previously exist in the map,...
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop.
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
bool VerifyMemorySSA
Enables verification of MemorySSA.
Conditional or Unconditional Branch instruction.
Helper class for SSA formation on a set of values defined in multiple blocks.
LLVM Value Representation.
iterator_range< user_iterator > users()
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
static constexpr UpdateKind Delete
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
A Use represents the edge between a Value definition and its users.