69 #define DEBUG_TYPE "loop-unroll" 73 cl::desc(
"The cost threshold for loop unrolling"));
77 cl::desc(
"The cost threshold for partial loop unrolling"));
81 cl::desc(
"The maximum 'boost' (represented as a percentage >= 100) applied " 82 "to the threshold when aggressively unrolling a loop due to the " 83 "dynamic cost savings. If completely unrolling a loop will reduce " 84 "the total runtime from X to Y, we boost the loop unroll " 85 "threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, " 86 "X/Y). This limit avoids excessive code bloat."));
90 cl::desc(
"Don't allow loop unrolling to simulate more than this number of" 91 "iterations when checking full unroll profitability"));
95 cl::desc(
"Use this unroll count for all loops including those with " 96 "unroll_count pragma values, for testing purposes"));
100 cl::desc(
"Set the max unroll count for partial and runtime unrolling, for" 101 "testing purposes"));
106 "Set the max unroll count for full unrolling, for testing purposes"));
110 cl::desc(
"Set the unroll peeling count, for testing purposes"));
114 cl::desc(
"Allows loops to be partially unrolled until " 115 "-unroll-threshold loop size is reached."));
119 cl::desc(
"Allow generation of a loop remainder (extra iterations) " 120 "when unrolling a loop."));
124 cl::desc(
"Unroll loops with run-time trip counts"));
129 "The max of trip count upper bound that is considered in unrolling"));
133 cl::desc(
"Unrolled size limit for loops with an unroll(full) or " 134 "unroll_count pragma."));
138 cl::desc(
"If the runtime tripcount for the loop is lower than the " 139 "threshold, the loop is considered as flat and will be less " 140 "aggressively unrolled."));
144 cl::desc(
"Allows loops to be peeled when the dynamic " 145 "trip count is known to be low."));
149 cl::desc(
"Allow the loop remainder to be unrolled."));
156 cl::desc(
"Enqueue and re-visit child loops in the loop PM after unrolling. " 157 "This shouldn't typically be needed as child loops (or their " 158 "clones) were already visited."));
238 UP.
Count = *UserCount;
240 UP.
Partial = *UserAllowPartial;
259 struct UnrolledInstState {
263 unsigned IsCounted : 1;
267 struct UnrolledInstStateKeyInfo {
271 static inline UnrolledInstState getEmptyKey() {
272 return {PtrInfo::getEmptyKey(), 0, 0, 0};
275 static inline UnrolledInstState getTombstoneKey() {
276 return {PtrInfo::getTombstoneKey(), 0, 0, 0};
279 static inline unsigned getHashValue(
const UnrolledInstState &S) {
280 return PairInfo::getHashValue({S.I, S.Iteration});
283 static inline bool isEqual(
const UnrolledInstState &LHS,
284 const UnrolledInstState &RHS) {
289 struct EstimatedUnrollCost {
291 unsigned UnrolledCost;
295 unsigned RolledDynamicCost;
322 "The unroll iterations max is too large!");
341 unsigned UnrolledCost = 0;
348 unsigned RolledDynamicCost = 0;
364 auto AddCostRecursively = [&](
Instruction &RootI,
int Iteration) {
365 assert(Iteration >= 0 &&
"Cannot have a negative iteration!");
366 assert(CostWorklist.
empty() &&
"Must start with an empty cost list");
367 assert(PHIUsedList.
empty() &&
"Must start with an empty phi used list");
369 for (;; --Iteration) {
375 auto CostIter = InstCostMap.
find({
I, Iteration, 0, 0});
376 if (CostIter == InstCostMap.
end())
381 auto &Cost = *CostIter;
387 Cost.IsCounted =
true;
390 if (
auto *PhiI = dyn_cast<PHINode>(I))
391 if (PhiI->getParent() == L->
getHeader()) {
392 assert(Cost.IsFree &&
"Loop PHIs shouldn't be evaluated as they " 393 "inherently simplify during unrolling.");
400 if (
auto *OpI = dyn_cast<Instruction>(
411 << Iteration <<
"): ");
428 }
while (!CostWorklist.
empty());
430 if (PHIUsedList.
empty())
435 "Cannot track PHI-used values past the first iteration!");
445 "Must have loops in LCSSA form to track live-out values.");
447 LLVM_DEBUG(
dbgs() <<
"Starting LoopUnroll profitability analysis...\n");
453 for (
unsigned Iteration = 0; Iteration < TripCount; ++Iteration) {
454 LLVM_DEBUG(
dbgs() <<
" Analyzing iteration " << Iteration <<
"\n");
466 PHI->getNumIncomingValues() == 2 &&
467 "Must have an incoming value only for the preheader and the latch.");
469 Value *V = PHI->getIncomingValueForBlock(
472 if (Iteration != 0 && !C)
473 C = SimplifiedValues.
lookup(V);
475 SimplifiedInputValues.
push_back({PHI, C});
479 SimplifiedValues.
clear();
480 while (!SimplifiedInputValues.
empty())
488 for (
unsigned Idx = 0; Idx != BBWorklist.
size(); ++Idx) {
497 if (isa<DbgInfoIntrinsic>(
I) || EphValues.
count(&
I))
507 bool IsFree = Analyzer.
visit(
I);
508 bool Inserted = InstCostMap.
insert({&
I, (int)Iteration,
512 assert(Inserted &&
"Cannot have a state for an unvisited instruction!");
519 if (
auto *CI = dyn_cast<CallInst>(&
I)) {
529 if (
I.mayHaveSideEffects())
530 AddCostRecursively(
I, Iteration);
533 if (UnrolledCost > MaxUnrolledLoopSize) {
535 <<
" UnrolledCost: " << UnrolledCost
536 <<
", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize
547 if (
BranchInst *BI = dyn_cast<BranchInst>(TI)) {
548 if (BI->isConditional()) {
550 SimplifiedValues.
lookup(BI->getCondition())) {
552 if (isa<UndefValue>(SimpleCond))
553 KnownSucc = BI->getSuccessor(0);
555 dyn_cast<ConstantInt>(SimpleCond))
556 KnownSucc = BI->getSuccessor(SimpleCondVal->isZero() ? 1 : 0);
561 SimplifiedValues.
lookup(
SI->getCondition())) {
563 if (isa<UndefValue>(SimpleCond))
564 KnownSucc =
SI->getSuccessor(0);
566 dyn_cast<ConstantInt>(SimpleCond))
567 KnownSucc =
SI->findCaseValue(SimpleCondVal)->getCaseSuccessor();
572 BBWorklist.
insert(KnownSucc);
574 ExitWorklist.
insert({BB, KnownSucc});
583 ExitWorklist.
insert({BB, Succ});
584 AddCostRecursively(*TI, Iteration);
589 if (UnrolledCost == RolledDynamicCost) {
591 <<
" UnrolledCost: " << UnrolledCost <<
"\n");
596 while (!ExitWorklist.
empty()) {
598 std::tie(ExitingBB, ExitBB) = ExitWorklist.
pop_back_val();
605 Value *
Op = PN->getIncomingValueForBlock(ExitingBB);
606 if (
auto *OpI = dyn_cast<Instruction>(Op))
608 AddCostRecursively(*OpI, TripCount - 1);
613 <<
"UnrolledCost: " << UnrolledCost <<
", " 614 <<
"RolledDynamicCost: " << RolledDynamicCost <<
"\n");
615 return {{UnrolledCost, RolledDynamicCost}};
620 const Loop *L,
unsigned &NumCalls,
bool &NotDuplicatable,
bool &
Convergent,
630 unsigned LoopSize = Metrics.
NumInsts;
638 LoopSize =
std::max(LoopSize, BEInsns + 1);
674 "Unroll count hint metadata should have two operands.");
676 mdconst::extract<ConstantInt>(MD->
getOperand(1))->getZExtValue();
677 assert(Count >= 1 &&
"Unroll count must be positive.");
689 unsigned MaxPercentThresholdBoost) {
692 else if (Cost.UnrolledCost != 0)
694 return std::min(100 * Cost.RolledDynamicCost / Cost.UnrolledCost,
695 MaxPercentThresholdBoost);
697 return MaxPercentThresholdBoost;
704 assert(LoopSize >= UP.
BEInsns &&
"LoopSize should not be less than BEInsns!");
720 unsigned &TripMultiple,
unsigned LoopSize,
725 bool UserUnrollCount =
UnrollCount.getNumOccurrences() > 0;
726 if (UserUnrollCount) {
736 if (PragmaCount > 0) {
737 UP.
Count = PragmaCount;
746 if (PragmaFullUnroll && TripCount != 0) {
747 UP.
Count = TripCount;
753 bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
754 PragmaEnableUnroll || UserUnrollCount;
756 if (ExplicitUnroll && TripCount != 0) {
773 unsigned ExactTripCount = TripCount;
774 assert((ExactTripCount == 0 || MaxTripCount == 0) &&
775 "ExtractTripCount and MaxTripCount cannot both be non zero.");
776 unsigned FullUnrollTripCount = ExactTripCount ? ExactTripCount : MaxTripCount;
777 UP.
Count = FullUnrollTripCount;
782 UseUpperBound = (MaxTripCount == FullUnrollTripCount);
783 TripCount = FullUnrollTripCount;
784 TripMultiple = UP.
UpperBound ? 1 : TripMultiple;
785 return ExplicitUnroll;
791 L, FullUnrollTripCount, DT, SE, EphValues, TTI,
795 if (Cost->UnrolledCost < UP.
Threshold * Boost / 100) {
796 UseUpperBound = (MaxTripCount == FullUnrollTripCount);
797 TripCount = FullUnrollTripCount;
798 TripMultiple = UP.
UpperBound ? 1 : TripMultiple;
799 return ExplicitUnroll;
810 return ExplicitUnroll;
819 <<
"-unroll-allow-partial not given\n");
824 UP.
Count = TripCount;
833 while (UP.
Count != 0 && TripCount % UP.
Count != 0)
841 while (UP.
Count != 0 &&
846 if (PragmaEnableUnroll)
849 "UnrollAsDirectedTooLarge",
851 <<
"Unable to unroll loop as directed by unroll(enable) " 853 "because unrolled size is too large.";
858 UP.
Count = TripCount;
862 if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
863 UP.
Count != TripCount)
865 return OptimizationRemarkMissed(DEBUG_TYPE,
866 "FullUnrollAsDirectedTooLarge",
867 L->getStartLoc(), L->getHeader())
868 <<
"Unable to fully unroll loop as directed by unroll pragma " 870 "unrolled size is too large.";
872 return ExplicitUnroll;
875 "All cases when TripCount is constant should be covered here.");
876 if (PragmaFullUnroll)
879 DEBUG_TYPE,
"CantFullUnrollAsDirectedRuntimeTripCount",
881 <<
"Unable to fully unroll loop as directed by unroll(full) " 883 "because loop has a runtime trip count.";
904 UP.
Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount;
907 dbgs() <<
" will not try to unroll loop with runtime trip count " 908 <<
"-unroll-runtime not given\n");
917 while (UP.
Count != 0 &&
922 unsigned OrigCount = UP.
Count;
926 while (UP.
Count != 0 && TripMultiple % UP.
Count != 0)
929 dbgs() <<
"Remainder loop is restricted (that could architecture " 930 "specific or because the loop contains a convergent " 931 "instruction), so unroll count must divide the trip " 933 << TripMultiple <<
". Reducing unroll count from " << OrigCount
934 <<
" to " << UP.
Count <<
".\n");
941 "DifferentUnrollCountFromDirected",
943 <<
"Unable to unroll loop the number of times directed by " 944 "unroll_count pragma because remainder loop is restricted " 945 "(that could architecture specific or because the loop " 946 "contains a convergent instruction) and so must have an " 948 "count that divides the loop trip multiple of " 949 <<
NV(
"TripMultiple", TripMultiple) <<
". Unrolling instead " 950 <<
NV(
"UnrollCount", UP.
Count) <<
" time(s).";
960 return ExplicitUnroll;
979 dbgs() <<
" Not unrolling loop which is not in loop-simplify form.\n");
988 unsigned NumInlineCandidates;
989 bool NotDuplicatable;
992 L, SE, TTI, OptLevel, ProvidedThreshold, ProvidedCount,
993 ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
994 ProvidedAllowPeeling);
1006 if (NotDuplicatable) {
1007 LLVM_DEBUG(
dbgs() <<
" Not unrolling loop which contains non-duplicatable" 1008 <<
" instructions.\n");
1011 if (NumInlineCandidates != 0) {
1012 LLVM_DEBUG(
dbgs() <<
" Not unrolling loop with inlinable calls.\n");
1017 unsigned TripCount = 0;
1018 unsigned MaxTripCount = 0;
1019 unsigned TripMultiple = 1;
1048 bool MaxOrZero =
false;
1067 bool UseUpperBound =
false;
1069 L, TTI, DT, LI, SE, EphValues, &ORE, TripCount, MaxTripCount,
1070 TripMultiple, LoopSize, UP, UseUpperBound);
1074 if (TripCount && UP.
Count > TripCount)
1075 UP.
Count = TripCount;
1081 Loop *RemainderLoop =
nullptr;
1085 LI, &SE, &DT, &AC, &ORE, PreserveLCSSA, &RemainderLoop);
1089 if (RemainderLoop) {
1106 return UnrollResult;
1118 return UnrollResult;
1123 class LoopUnroll :
public LoopPass {
1132 bool OnlyWhenForced;
1141 LoopUnroll(
int OptLevel = 2,
bool OnlyWhenForced =
false,
1147 :
LoopPass(ID), OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced),
1148 ProvidedCount(std::move(Count)), ProvidedThreshold(
Threshold),
1149 ProvidedAllowPartial(AllowPartial), ProvidedRuntime(Runtime),
1150 ProvidedUpperBound(UpperBound), ProvidedAllowPeeling(AllowPeeling) {
1160 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1161 LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1162 ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1164 getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1165 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1170 bool PreserveLCSSA = mustPreserveAnalysisID(
LCSSAID);
1173 L, DT, LI, SE, TTI, AC, ORE, PreserveLCSSA, OptLevel, OnlyWhenForced,
1174 ProvidedCount, ProvidedThreshold, ProvidedAllowPartial, ProvidedRuntime,
1175 ProvidedUpperBound, ProvidedAllowPeeling);
1205 int Threshold,
int Count,
int AllowPartial,
1206 int Runtime,
int UpperBound,
1211 return new LoopUnroll(
1212 OptLevel, OnlyWhenForced,
1230 Function *
F = L.getHeader()->getParent();
1236 "LoopFullUnrollPass: OptimizationRemarkEmitterAnalysis not " 1237 "cached at a higher level");
1241 Loop *ParentL = L.getParentLoop();
1248 std::string LoopName = L.getName();
1252 true, OptLevel, OnlyWhenForced,
1282 bool IsCurrentLoopValid =
false;
1289 if (SibLoop == &L) {
1290 IsCurrentLoopValid =
true;
1295 return OldLoops.
count(SibLoop) != 0;
1299 if (!IsCurrentLoopValid) {
1313 template <
typename RangeT>
1321 assert(PreOrderLoops.
empty() &&
"Must start with an empty preorder walk.");
1323 "Must start with an empty preorder walk worklist.");
1329 }
while (!PreOrderWorklist.
empty());
1332 PreOrderLoops.
clear();
1348 LAM = &LAMProxy->getManager();
1355 bool Changed =
false;
1362 for (
auto &L : LI) {
1363 Changed |=
simplifyLoop(L, &DT, &LI, &SE, &AC,
false );
1369 while (!Worklist.
empty()) {
1384 LocalAllowPeeling =
false;
1385 std::string LoopName = L.
getName();
1389 &L, DT, &LI, SE, TTI, AC, ORE,
1390 true, UnrollOpts.OptLevel, UnrollOpts.OnlyWhenForced,
1392 None, UnrollOpts.AllowPartial, UnrollOpts.AllowRuntime,
1393 UnrollOpts.AllowUpperBound, LocalAllowPeeling);
1404 LAM->clear(L, LoopName);
Pass interface - Implemented by all 'passes'.
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value *> &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop)...
unsigned getSmallConstantTripCount(const Loop *L)
Returns the maximum trip count of the loop if it is a single-exit loop and we can compute a small max...
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
unsigned getSmallConstantTripMultiple(const Loop *L)
Returns the largest constant divisor of the trip count of the loop if it is a single-exit loop and we...
DiagnosticInfoOptimizationBase::Argument NV
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
LLVM_NODISCARD T pop_back_val()
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
This class represents lattice values for constants.
size_type size() const
Determine the number of elements in the SetVector.
This header provides classes for managing a pipeline of passes over loops in LLVM IR...
bool convergent
True if this function contains a call to a convergent function.
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
static bool HasUnrollEnablePragma(const Loop *L)
bool isLCSSAForm(DominatorTree &DT) const
Return true if the Loop is in LCSSA form.
Implements a dense probed hash-table based set.
void push_back(const T &Elt)
Analysis providing profile information.
The main scalar evolution driver.
static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost, unsigned MaxPercentThresholdBoost)
Pass * createSimpleLoopUnrollPass(int OptLevel=2, bool OnlyWhenForced=false)
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
Analysis pass providing the TargetTransformInfo.
unsigned NumInlineCandidates
The number of calls to internal functions with a single caller.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Analysis pass which computes a DominatorTree.
const MDOperand & getOperand(unsigned I) const
bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const SmallPtrSetImpl< const Value *> &EphValues, OptimizationRemarkEmitter *ORE, unsigned &TripCount, unsigned MaxTripCount, unsigned &TripMultiple, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound)
bool notDuplicatable
True if this function cannot be duplicated.
static cl::opt< unsigned > UnrollFullMaxCount("unroll-full-max-count", cl::Hidden, cl::desc("Set the max unroll count for full unrolling, for testing purposes"))
LoopUnrollResult UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, bool AllowRuntime, bool AllowExpensiveTripCount, bool PreserveCondBr, bool PreserveOnlyFirst, unsigned TripMultiple, unsigned PeelCount, bool UnrollRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, Loop **RemainderLoop=nullptr)
Unroll the given loop by Count.
unsigned getSmallConstantMaxTripCount(const Loop *L)
Returns the upper bound of the loop trip count as a normal unsigned value.
const char *const LLVMLoopUnrollFollowupUnrolled
void addChildLoops(ArrayRef< Loop *> NewChildLoops)
Loop passes should use this method to indicate they have added new child loops of the current loop...
static cl::opt< unsigned > UnrollPartialThreshold("unroll-partial-threshold", cl::Hidden, cl::desc("The cost threshold for partial loop unrolling"))
bool formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
void dump() const
Support for debugging, callable in GDB: V->dump()
static cl::opt< bool > UnrollRevisitChildLoops("unroll-revisit-child-loops", cl::Hidden, cl::desc("Enqueue and re-visit child loops in the loop PM after unrolling. " "This shouldn't typically be needed as child loops (or their " "clones) were already visited."))
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Pass * createLoopUnrollPass(int OptLevel=2, bool OnlyWhenForced=false, int Threshold=-1, int Count=-1, int AllowPartial=-1, int Runtime=-1, int UpperBound=-1, int AllowPeeling=-1)
bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
static cl::opt< unsigned > FlatLoopTripCountThreshold("flat-loop-tripcount-threshold", cl::init(5), cl::Hidden, cl::desc("If the runtime tripcount for the loop is lower than the " "threshold, the loop is considered as flat and will be less " "aggressively unrolled."))
static bool HasUnrollFullPragma(const Loop *L)
static cl::opt< unsigned > UnrollCount("unroll-count", cl::Hidden, cl::desc("Use this unroll count for all loops including those with " "unroll_count pragma values, for testing purposes"))
Analysis pass that exposes the LoopInfo for a function.
static cl::opt< bool > UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden, cl::desc("Unroll loops with run-time trip counts"))
static Optional< EstimatedUnrollCost > analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE, const SmallPtrSetImpl< const Value *> &EphValues, const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize)
Figure out if the loop is worth full unrolling.
BlockT * getHeader() const
static MDNode * GetUnrollMetadataForLoop(const Loop *L, StringRef Name)
The transformation should be applied without considering a cost model.
void computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, unsigned &TripCount, ScalarEvolution &SE)
void visit(Iterator Start, Iterator End)
bool insert(const value_type &X)
Insert a new element into the SetVector.
static cl::opt< unsigned > UnrollMaxUpperBound("unroll-max-upperbound", cl::init(8), cl::Hidden, cl::desc("The max of trip count upper bound that is considered in unrolling"))
static bool isEqual(const Function &Caller, const Function &Callee)
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
const T & getValue() const LLVM_LVALUE_FUNCTION
const char *const LLVMLoopUnrollFollowupRemainder
This header provides classes for managing per-loop analyses.
static cl::opt< bool > UnrollAllowRemainder("unroll-allow-remainder", cl::Hidden, cl::desc("Allow generation of a loop remainder (extra iterations) " "when unrolling a loop."))
static cl::opt< unsigned > UnrollMaxPercentThresholdBoost("unroll-max-percent-threshold-boost", cl::init(400), cl::Hidden, cl::desc("The maximum 'boost' (represented as a percentage >= 100) applied " "to the threshold when aggressively unrolling a loop due to the " "dynamic cost savings. If completely unrolling a loop will reduce " "the total runtime from X to Y, we boost the loop unroll " "threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, " "X/Y). This limit avoids excessive code bloat."))
void initializeLoopUnrollPass(PassRegistry &)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
The loop was fully unrolled into straight-line code.
initializer< Ty > init(const Ty &Val)
static cl::opt< bool > UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden, cl::desc("Allows loops to be peeled when the dynamic " "trip count is known to be low."))
TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, int OptLevel, Optional< unsigned > UserThreshold, Optional< unsigned > UserCount, Optional< bool > UserAllowPartial, Optional< bool > UserRuntime, Optional< bool > UserUpperBound, Optional< bool > UserAllowPeeling)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
A set of analyses that are preserved following a run of a transformation pass.
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
LLVM Basic Block Representation.
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop...
Conditional or Unconditional Branch instruction.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
This is an important base class in LLVM.
static cl::opt< unsigned > UnrollMaxCount("unroll-max-count", cl::Hidden, cl::desc("Set the max unroll count for partial and runtime unrolling, for" "testing purposes"))
This file contains the declarations for the subclasses of Constant, which represent the different fla...
void setLoopAlreadyUnrolled()
Add llvm.loop.unroll.disable to this loop's loop id metadata.
std::pair< iterator, bool > insert(const ValueT &V)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Represent the analysis usage information of a pass.
void analyzeBasicBlock(const BasicBlock *BB, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value *> &EphValues)
Add information about a block to the current state.
bool optForSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
static cl::opt< unsigned > UnrollThreshold("unroll-threshold", cl::Hidden, cl::desc("The cost threshold for loop unrolling"))
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Optional< unsigned > getLoopEstimatedTripCount(Loop *L)
Get a loop's estimated trip count based on branch weight metadata.
static cl::opt< unsigned > UnrollPeelCount("unroll-peel-count", cl::Hidden, cl::desc("Set the unroll peeling count, for testing purposes"))
void addSiblingLoops(ArrayRef< Loop *> NewSibLoops)
Loop passes should use this method to indicate they have added new sibling loops to the current loop...
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
void markLoopAsDeleted(Loop &L, llvm::StringRef Name)
Loop passes should use this method to indicate they have deleted a loop from the nest.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
TargetTransformInfo & TTI
const char *const LLVMLoopUnrollFollowupAll
The transformation should not be applied.
A function analysis which provides an AssumptionCache.
unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value *> &EphValues, unsigned BEInsns)
ApproximateLoopSize - Approximate the size of the loop.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
A SetVector that performs no allocations if smaller than a certain size.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
This is the shared class of boolean and integer constants.
static cl::opt< bool > UnrollAllowPartial("unroll-allow-partial", cl::Hidden, cl::desc("Allows loops to be partially unrolled until " "-unroll-threshold loop size is reached."))
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
static cl::opt< unsigned > PragmaUnrollThreshold("pragma-unroll-threshold", cl::init(16 *1024), cl::Hidden, cl::desc("Unrolled size limit for loops with an unroll(full) or " "unroll_count pragma."))
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Utility to calculate the size and a few similar metrics for a set of basic blocks.
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
static unsigned UnrollCountPragmaValue(const Loop *L)
LLVM_NODISCARD T pop_back_val()
void markLoopAsDeleted(Loop &L)
static uint64_t getUnrolledLoopSize(unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
void clear()
Completely clear the SetVector.
amdgpu Simplify well known AMD library false FunctionCallee Callee
static cl::opt< bool > UnrollUnrollRemainder("unroll-remainder", cl::Hidden, cl::desc("Allow the loop remainder to be unrolled."))
static cl::opt< unsigned > Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), cl::init(100), cl::Hidden)
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
Analysis pass that exposes the ScalarEvolution for a function.
static const unsigned NoThreshold
A magic value for use with the Threshold parameter to indicate that the loop unroll should be perform...
LoopT * getParentLoop() const
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to...
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
LLVM_NODISCARD bool empty() const
StringRef getName() const
Represents a single loop in the control flow graph.
StringRef getName() const
Return a constant reference to the value's name.
const Function * getParent() const
Return the enclosing method, or null if none.
bool empty() const
Determine if the SetVector is empty or not.
iterator find(const_arg_type_t< ValueT > V)
TransformationMode hasUnrollTransformation(Loop *L)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
TransformationMode
The mode sets how eager a transformation should be applied.
static SmallVector< Loop *, 8 > appendLoopsToWorklist(RangeT &&Loops)
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
static bool HasRuntimeUnrollDisablePragma(const Loop *L)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool hasHugeWorkingSetSize()
Returns true if the working set size of the code is considered huge.
static LoopUnrollResult tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache &AC, OptimizationRemarkEmitter &ORE, bool PreserveLCSSA, int OptLevel, bool OnlyWhenForced, Optional< unsigned > ProvidedCount, Optional< unsigned > ProvidedThreshold, Optional< bool > ProvidedAllowPartial, Optional< bool > ProvidedRuntime, Optional< bool > ProvidedUpperBound, Optional< bool > ProvidedAllowPeeling)
Module * getParent()
Get the module that this global value is contained inside of...
LLVM Value Representation.
succ_range successors(Instruction *I)
The loop was not modified.
static cl::opt< unsigned > UnrollMaxIterationsCountToAnalyze("unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden, cl::desc("Don't allow loop unrolling to simulate more than this number of" "iterations when checking full unroll profitability"))
bool isBackedgeTakenCountMaxOrZero(const Loop *L)
Return true if the backedge taken count is either the value returned by getMaxBackedgeTakenCount or z...
void verifyLoop() const
Verify loop structure.
StringRef - Represent a constant reference to a string, i.e.
A container for analyses that lazily runs them and caches their results.
This header defines various interfaces for pass management in LLVM.
unsigned getNumOperands() const
Return number of MDNode operands.
unsigned NumInsts
Number of instructions in the analyzed blocks.
iterator_range< block_iterator > blocks() const
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
bool hasProfileData() const
Return true if the function is annotated with profile data.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
LoopUnrollResult
Represents the result of a UnrollLoop invocation.