52 #define DEBUG_TYPE "loop-flatten" 59 cl::desc(
"Limit on the cost of instructions that can be repeated due to " 65 cl::desc(
"Assume that the product of the two iteration " 66 "limits will never overflow"));
71 cl::desc(
"Widen the loop induction variables, if possible, so " 72 "overflow checks won't reject flattening"));
120 IterationInstructions.
insert(BackBranch);
127 InductionPHI =
nullptr;
150 if (!
Compare || !IsValidPredicate(
Compare->getUnsignedPredicate()) ||
162 Increment = dyn_cast<BinaryOperator>(
Compare->getOperand(0));
163 Limit =
Compare->getOperand(1);
167 Increment = dyn_cast<BinaryOperator>(
Compare->getOperand(1));
168 Limit =
Compare->getOperand(0);
170 if (!Increment || Increment->hasNUsesOrMore(3)) {
174 IterationInstructions.
insert(Increment);
180 "PHI value is not increment inst");
182 auto *CI = dyn_cast<ConstantInt>(
184 if (!CI || !CI->isZero()) {
222 assert(InnerPHI.getNumIncomingValues() == 2);
223 Value *PreHeaderValue =
231 PHINode *OuterPHI = dyn_cast<PHINode>(PreHeaderValue);
241 PHINode *LCSSAPHI = dyn_cast<PHINode>(
252 dbgs() <<
"LCSSA PHI incoming value does not match latch value\n");
259 SafeOuterPHIs.
insert(OuterPHI);
264 if (!SafeOuterPHIs.
count(&OuterPHI)) {
265 LLVM_DEBUG(
dbgs() <<
"found unsafe PHI in outer loop: "; OuterPHI.dump());
283 unsigned RepeatedInstrCost = 0;
289 if (!isa<PHINode>(&
I) && !
I.isTerminator() &&
291 LLVM_DEBUG(
dbgs() <<
"Cannot flatten because instruction may have " 300 if (IterationInstructions.
count(&
I))
315 RepeatedInstrCost += Cost;
319 LLVM_DEBUG(
dbgs() <<
"Cost of instructions that will be repeated: " 320 << RepeatedInstrCost <<
"\n");
324 LLVM_DEBUG(
dbgs() <<
"checkOuterLoopInsts: not profitable, bailing.\n");
343 (isa<SExtInst>(InnerLimit) || isa<ZExtInst>(InnerLimit)))
344 InnerLimit = cast<Instruction>(InnerLimit)->getOperand(0);
355 if (isa<TruncInst>(U)) {
358 U = *U->user_begin();
361 LLVM_DEBUG(
dbgs() <<
"Found use of inner induction variable: "; U->dump());
364 Value *MatchedItCount;
378 if ((IsAdd || IsAddTrunc) && MatchedItCount == InnerLimit) {
380 ValidOuterPHIUses.
insert(MatchedMul);
383 LLVM_DEBUG(
dbgs() <<
"Did not match expected pattern, bailing\n");
394 auto IsValidOuterPHIUses = [&] (
User *U) ->
bool {
395 LLVM_DEBUG(
dbgs() <<
"Found use of outer induction variable: "; U->dump());
396 if (!ValidOuterPHIUses.
count(U)) {
397 LLVM_DEBUG(
dbgs() <<
"Did not match expected pattern, bailing\n");
404 if (
auto *V = dyn_cast<TruncInst>(U)) {
405 for (
auto *K : V->users()) {
406 if (!IsValidOuterPHIUses(K))
412 if (!IsValidOuterPHIUses(U))
418 <<
" value(s) that can be replaced:\n";
447 if (
auto *
GEP = dyn_cast<GetElementPtrInst>(U)) {
452 if (
GEP->isInBounds() &&
454 DL.getPointerTypeSizeInBits(
GEP->getType())) {
456 dbgs() <<
"use of linear IV would be UB if overflow occurred: ";
516 LLVM_DEBUG(
dbgs() <<
"Checks all passed, doing the transformation\n");
522 Remark <<
"Flattened into outer loop";
526 Value *NewTripCount =
530 NewTripCount->
dump());
562 dbgs() <<
"with: "; OuterValue->
dump());
584 auto &
DL = M->getDataLayout();
587 unsigned MaxLegalSize =
DL.getLargestLegalIntTypeSizeInBits();
588 auto *MaxLegalType =
DL.getLargestLegalIntType(M->getContext());
593 if (InnerType != OuterType ||
594 InnerType->getScalarSizeInBits() >= MaxLegalSize ||
595 MaxLegalType->getScalarSizeInBits() < InnerType->getScalarSizeInBits() * 2) {
608 for (
unsigned i = 0; i < WideIVs.
size(); i++) {
610 ElimExt, Widened,
true ,
615 LLVM_DEBUG(
dbgs() <<
"Deleting old phi: "; WideIVs[i].NarrowIV->dump());
619 assert(Widened &&
"Widenend IV expected");
629 dbgs() <<
"Loop flattening running on outer loop " 648 LLVM_DEBUG(
dbgs() <<
"Multiply would always overflow, so not profitable\n");
651 LLVM_DEBUG(
dbgs() <<
"Multiply might overflow, not flattening\n");
655 LLVM_DEBUG(
dbgs() <<
"Multiply cannot overflow, modifying loop in-place\n");
661 bool Changed =
false;
663 auto *OuterLoop = InnerLoop->getParentLoop();
720 ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
721 LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
722 auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
724 auto &TTIP = getAnalysis<TargetTransformInfoWrapperPass>();
725 auto *
TTI = &TTIP.getTTI(
F);
726 auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
A parsed version of the target data layout string in and methods for querying it.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
SmallPtrSet< PHINode *, 4 > InnerPHIsToTransform
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
INITIALIZE_PASS_BEGIN(LoopFlattenLegacyPass, "loop-flatten", "Flattens loops", false, false) INITIALIZE_PASS_END(LoopFlattenLegacyPass
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
This class represents lattice values for constants.
static bool CanWidenIV(struct FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, const TargetTransformInfo *TTI)
A Module instance is used to store all the information related to an LLVM module.
void push_back(const T &Elt)
The main scalar evolution driver.
FunctionPass * createLoopFlattenPass()
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
Analysis pass providing the TargetTransformInfo.
Always overflows in the direction of signed/unsigned min value.
BasicBlock * getSuccessor(unsigned i) const
Analysis pass which computes a DominatorTree.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Value * getCondition() const
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
PHINode * InnerInductionPHI
PHINode * OuterInductionPHI
static cl::opt< bool > AssumeNoOverflow("loop-flatten-assume-no-overflow", cl::Hidden, cl::init(false), cl::desc("Assume that the product of the two iteration " "limits will never overflow"))
void dump() const
Support for debugging, callable in GDB: V->dump()
bool match(Val *V, const Pattern &P)
AnalysisUsage & addRequired()
Value * removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty=true)
Remove an incoming value.
PHINode * createWideIV(const WideIVInfo &WI, LoopInfo *LI, ScalarEvolution *SE, SCEVExpander &Rewriter, DominatorTree *DT, SmallVectorImpl< WeakTrackingVH > &DeadInsts, unsigned &NumElimExt, unsigned &NumWidened, bool HasGuards, bool UsePostIncrementRanges)
Widen Induction Variables - Extend the width of an IV to cover its widest uses.
static bool DoFlattenLoopPair(struct FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, const TargetTransformInfo *TTI)
CastClass_match< OpTy, Instruction::Trunc > m_Trunc(const OpTy &Op)
Matches Trunc.
void deleteEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge deletion and update the tree.
void erase(Loop *L)
Update LoopInfo after removing the last backedge from a loop.
static bool CanFlattenLoopPair(struct FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, const TargetTransformInfo *TTI)
Analysis pass that exposes the LoopInfo for a function.
BlockT * getHeader() const
Type * getType() const
All values are typed, get the type of this value.
static bool checkPHIs(struct FlattenInfo &FI, const TargetTransformInfo *TTI)
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
static bool checkOuterLoopInsts(struct FlattenInfo &FI, SmallPtrSetImpl< Instruction * > &IterationInstructions, const TargetTransformInfo *TTI)
static bool runOnFunction(Function &F, bool PostInlining)
initializer< Ty > init(const Ty &Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
A set of analyses that are preserved following a run of a transformation pass.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
static bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
LLVM Basic Block Representation.
Conditional or Unconditional Branch instruction.
static cl::opt< unsigned > RepeatedInstructionThreshold("loop-flatten-cost-threshold", cl::Hidden, cl::init(2), cl::desc("Limit on the cost of instructions that can be repeated due to " "loop flattening"))
Value * getIncomingValueForBlock(const BasicBlock *BB) const
bool RecursivelyDeleteDeadPHINode(PHINode *PN, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
If the specified value is an effectively dead PHI node, due to being a def-use chain of single-use no...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Represent the analysis usage information of a pass.
This instruction compares its operands according to the predicate given to the constructor.
static bool findLoopComponents(Loop *L, SmallPtrSetImpl< Instruction * > &IterationInstructions, PHINode *&InductionPHI, Value *&Limit, BinaryOperator *&Increment, BranchInst *&BackBranch, ScalarEvolution *SE)
FunctionPass class - This class is used to implement most global optimizations.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
static bool checkIVUsers(struct FlattenInfo &FI)
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
static cl::opt< bool > WidenIV("loop-flatten-widen-iv", cl::Hidden, cl::init(true), cl::desc("Widen the loop induction variables, if possible, so " "overflow checks won't reject flattening"))
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
BinaryOperator * InnerIncrement
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
A function analysis which provides an AssumptionCache.
Value * hasConstantValue() const
If the specified PHI node always merges together the same value, return the value,...
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
A struct for saving information about induction variables.
void initializeLoopFlattenLegacyPassPass(PassRegistry &)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Module.h This file contains the declarations for the Module class.
static OverflowResult checkOverflow(struct FlattenInfo &FI, DominatorTree *DT, AssumptionCache *AC)
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
bool isConditional() const
unsigned getNumIncomingValues() const
Return the number of incoming edges.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
iterator_range< user_iterator > users()
This class uses information about analyze scalars to rewrite expressions in canonical form.
Represents analyses that only rely on functions' control flow.
Analysis pass that exposes the ScalarEvolution for a function.
Virtual Register Rewriter
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to,...
Always overflows in the direction of signed/unsigned max value.
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
unsigned getIntegerBitWidth() const
StringRef getName() const
Represents a single loop in the control flow graph.
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
void preserveSet()
Mark an analysis set as preserved.
StringRef getName() const
Return a constant reference to the value's name.
const Function * getParent() const
Return the enclosing method, or null if none.
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
bool isUnconditional() const
SmallVector< LoopT *, 4 > getLoopsInPreorder()
Return all of the loops in the function in preorder across the loop nests, with siblings in forward p...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool Flatten(DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, TargetTransformInfo *TTI)
bool isSafeToSpeculativelyExecute(const Value *V, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
Module * getParent()
Get the module that this global value is contained inside of...
LLVM Value Representation.
BinaryOperator * OuterIncrement
static bool FlattenLoopPair(struct FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, const TargetTransformInfo *TTI)
A container for analyses that lazily runs them and caches their results.
OverflowResult computeOverflowForUnsignedMul(const Value *LHS, const Value *RHS, const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, bool UseInstrInfo=true)
SmallPtrSet< Value *, 4 > LinearIVUses
FlattenInfo(Loop *OL, Loop *IL)
static BinaryOperator * CreateMul(Value *S1, Value *S2, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp)
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
const BasicBlock * getParent() const
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL