74#define DEBUG_TYPE "loop-unroll"
78 cl::desc(
"Forget everything in SCEV when doing LoopUnroll, instead of just"
79 " the current top-most loop. This is sometimes preferred to reduce"
84 cl::desc(
"The cost threshold for loop unrolling"));
89 cl::desc(
"The cost threshold for loop unrolling when optimizing for "
94 cl::desc(
"The cost threshold for partial loop unrolling"));
98 cl::desc(
"The maximum 'boost' (represented as a percentage >= 100) applied "
99 "to the threshold when aggressively unrolling a loop due to the "
100 "dynamic cost savings. If completely unrolling a loop will reduce "
101 "the total runtime from X to Y, we boost the loop unroll "
102 "threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, "
103 "X/Y). This limit avoids excessive code bloat."));
107 cl::desc(
"Don't allow loop unrolling to simulate more than this number of"
108 "iterations when checking full unroll profitability"));
112 cl::desc(
"Use this unroll count for all loops including those with "
113 "unroll_count pragma values, for testing purposes"));
117 cl::desc(
"Set the max unroll count for partial and runtime unrolling, for"
118 "testing purposes"));
123 "Set the max unroll count for full unrolling, for testing purposes"));
127 cl::desc(
"Allows loops to be partially unrolled until "
128 "-unroll-threshold loop size is reached."));
132 cl::desc(
"Allow generation of a loop remainder (extra iterations) "
133 "when unrolling a loop."));
137 cl::desc(
"Unroll loops with run-time trip counts"));
142 "The max of trip count upper bound that is considered in unrolling"));
146 cl::desc(
"Unrolled size limit for loops with an unroll(full) or "
147 "unroll_count pragma."));
151 cl::desc(
"If the runtime tripcount for the loop is lower than the "
152 "threshold, the loop is considered as flat and will be less "
153 "aggressively unrolled."));
157 cl::desc(
"Allow the loop remainder to be unrolled."));
164 cl::desc(
"Enqueue and re-visit child loops in the loop PM after unrolling. "
165 "This shouldn't typically be needed as child loops (or their "
166 "clones) were already visited."));
170 cl::desc(
"Threshold (max size of unrolled loop) to use in aggressive (O3) "
175 cl::desc(
"Default threshold (max size of unrolled "
176 "loop), used in all but O3 optimizations"));
180 cl::desc(
"Maximum allowed iterations to unroll under pragma unroll full."));
185static const unsigned NoThreshold = std::numeric_limits<unsigned>::max();
193 std::optional<unsigned> UserThreshold, std::optional<unsigned> UserCount,
194 std::optional<bool> UserAllowPartial, std::optional<bool> UserRuntime,
195 std::optional<bool> UserUpperBound,
196 std::optional<unsigned> UserFullUnrollMaxCount) {
208 UP.
MaxCount = std::numeric_limits<unsigned>::max();
228 bool OptForSize = L->getHeader()->getParent()->hasOptSize() ||
232 PGSOQueryType::IRPass));
271 UP.
Count = *UserCount;
272 if (UserAllowPartial)
273 UP.
Partial = *UserAllowPartial;
278 if (UserFullUnrollMaxCount)
292struct UnrolledInstState {
296 unsigned IsCounted : 1;
300struct UnrolledInstStateKeyInfo {
304 static inline UnrolledInstState getEmptyKey() {
305 return {PtrInfo::getEmptyKey(), 0, 0, 0};
308 static inline UnrolledInstState getTombstoneKey() {
309 return {PtrInfo::getTombstoneKey(), 0, 0, 0};
312 static inline unsigned getHashValue(
const UnrolledInstState &S) {
313 return PairInfo::getHashValue({S.I, S.Iteration});
316 static inline bool isEqual(
const UnrolledInstState &LHS,
317 const UnrolledInstState &RHS) {
318 return PairInfo::isEqual({
LHS.I,
LHS.Iteration}, {
RHS.I,
RHS.Iteration});
322struct EstimatedUnrollCost {
324 unsigned UnrolledCost;
328 unsigned RolledDynamicCost;
332 PragmaInfo(
bool UUC,
bool PFU,
unsigned PC,
bool PEU)
333 : UserUnrollCount(UUC), PragmaFullUnroll(PFU), PragmaCount(PC),
334 PragmaEnableUnroll(PEU) {}
335 const bool UserUnrollCount;
336 const bool PragmaFullUnroll;
337 const unsigned PragmaCount;
338 const bool PragmaEnableUnroll;
360 unsigned MaxIterationsCountToAnalyze) {
364 assert(MaxIterationsCountToAnalyze <
365 (
unsigned)(std::numeric_limits<int>::max() / 2) &&
366 "The unroll iterations max is too large!");
370 if (!L->isInnermost())
374 if (!TripCount || TripCount > MaxIterationsCountToAnalyze)
407 auto AddCostRecursively = [&](
Instruction &RootI,
int Iteration) {
408 assert(Iteration >= 0 &&
"Cannot have a negative iteration!");
409 assert(CostWorklist.
empty() &&
"Must start with an empty cost list");
410 assert(PHIUsedList.
empty() &&
"Must start with an empty phi used list");
416 for (;; --Iteration) {
422 auto CostIter = InstCostMap.
find({
I, Iteration, 0, 0});
423 if (CostIter == InstCostMap.
end())
428 auto &
Cost = *CostIter;
434 Cost.IsCounted =
true;
437 if (
auto *PhiI = dyn_cast<PHINode>(
I))
438 if (PhiI->getParent() == L->getHeader()) {
439 assert(
Cost.IsFree &&
"Loop PHIs shouldn't be evaluated as they "
440 "inherently simplify during unrolling.");
447 if (
auto *OpI = dyn_cast<Instruction>(
448 PhiI->getIncomingValueForBlock(L->getLoopLatch())))
449 if (L->contains(OpI))
460 if (auto Res = SimplifiedValues.lookup(Op))
466 << Iteration <<
"): ");
476 auto *OpI = dyn_cast<Instruction>(
Op);
477 if (!OpI || !L->contains(OpI))
483 }
while (!CostWorklist.
empty());
485 if (PHIUsedList.
empty())
490 "Cannot track PHI-used values past the first iteration!");
498 assert(L->isLoopSimplifyForm() &&
"Must put loop into normal form first.");
499 assert(L->isLCSSAForm(DT) &&
500 "Must have loops in LCSSA form to track live-out values.");
502 LLVM_DEBUG(
dbgs() <<
"Starting LoopUnroll profitability analysis...\n");
505 L->getHeader()->getParent()->hasMinSize() ?
511 for (
unsigned Iteration = 0; Iteration < TripCount; ++Iteration) {
512 LLVM_DEBUG(
dbgs() <<
" Analyzing iteration " << Iteration <<
"\n");
517 auto *
PHI = dyn_cast<PHINode>(&
I);
524 PHI->getNumIncomingValues() == 2 &&
525 "Must have an incoming value only for the preheader and the latch.");
527 Value *V =
PHI->getIncomingValueForBlock(
528 Iteration == 0 ? L->getLoopPreheader() : L->getLoopLatch());
529 if (Iteration != 0 && SimplifiedValues.
count(V))
530 V = SimplifiedValues.
lookup(V);
535 SimplifiedValues.
clear();
536 while (!SimplifiedInputValues.
empty())
542 BBWorklist.
insert(L->getHeader());
553 if (isa<DbgInfoIntrinsic>(
I) || EphValues.
count(&
I))
563 bool IsFree = Analyzer.visit(
I);
564 bool Inserted = InstCostMap.
insert({&
I, (int)Iteration,
568 assert(Inserted &&
"Cannot have a state for an unvisited instruction!");
575 if (
auto *CI = dyn_cast<CallInst>(&
I)) {
576 const Function *Callee = CI->getCalledFunction();
585 if (
I.mayHaveSideEffects())
586 AddCostRecursively(
I, Iteration);
589 if (UnrolledCost > MaxUnrolledLoopSize) {
591 <<
" UnrolledCost: " << UnrolledCost
592 <<
", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize
601 if (SimplifiedValues.
count(V))
602 V = SimplifiedValues.
lookup(V);
603 return dyn_cast<Constant>(V);
609 if (
BranchInst *BI = dyn_cast<BranchInst>(TI)) {
610 if (BI->isConditional()) {
611 if (
auto *SimpleCond = getSimplifiedConstant(BI->getCondition())) {
613 if (isa<UndefValue>(SimpleCond))
614 KnownSucc = BI->getSuccessor(0);
616 dyn_cast<ConstantInt>(SimpleCond))
617 KnownSucc = BI->getSuccessor(SimpleCondVal->isZero() ? 1 : 0);
620 }
else if (
SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
621 if (
auto *SimpleCond = getSimplifiedConstant(SI->getCondition())) {
623 if (isa<UndefValue>(SimpleCond))
624 KnownSucc = SI->getSuccessor(0);
626 dyn_cast<ConstantInt>(SimpleCond))
627 KnownSucc = SI->findCaseValue(SimpleCondVal)->getCaseSuccessor();
631 if (L->contains(KnownSucc))
632 BBWorklist.
insert(KnownSucc);
634 ExitWorklist.
insert({BB, KnownSucc});
640 if (L->contains(Succ))
643 ExitWorklist.
insert({BB, Succ});
644 AddCostRecursively(*TI, Iteration);
649 if (UnrolledCost == RolledDynamicCost) {
651 <<
" UnrolledCost: " << UnrolledCost <<
"\n");
656 while (!ExitWorklist.
empty()) {
658 std::tie(ExitingBB, ExitBB) = ExitWorklist.
pop_back_val();
661 auto *PN = dyn_cast<PHINode>(&
I);
665 Value *
Op = PN->getIncomingValueForBlock(ExitingBB);
666 if (
auto *OpI = dyn_cast<Instruction>(
Op))
667 if (L->contains(OpI))
668 AddCostRecursively(*OpI, TripCount - 1);
673 "All instructions must have a valid cost, whether the "
674 "loop is rolled or unrolled.");
677 <<
"UnrolledCost: " << UnrolledCost <<
", "
678 <<
"RolledDynamicCost: " << RolledDynamicCost <<
"\n");
688 Metrics.analyzeBasicBlock(BB,
TTI, EphValues,
false,
691 NotDuplicatable =
Metrics.notDuplicatable;
704 if (LoopSize.
isValid() && LoopSize < BEInsns + 1)
706 LoopSize = BEInsns + 1;
721 if (NotDuplicatable) {
722 LLVM_DEBUG(
dbgs() <<
" Non-duplicatable blocks prevent unrolling.\n");
730 unsigned CountOverwrite)
const {
732 assert(LS >= UP.
BEInsns &&
"LoopSize should not be less than BEInsns!");
743 if (
MDNode *LoopID = L->getLoopID())
770 "Unroll count hint metadata should have two operands.");
772 mdconst::extract<ConstantInt>(MD->
getOperand(1))->getZExtValue();
773 assert(Count >= 1 &&
"Unroll count must be positive.");
785 unsigned MaxPercentThresholdBoost) {
786 if (
Cost.RolledDynamicCost >= std::numeric_limits<unsigned>::max() / 100)
788 else if (
Cost.UnrolledCost != 0)
790 return std::min(100 *
Cost.RolledDynamicCost /
Cost.UnrolledCost,
791 MaxPercentThresholdBoost);
793 return MaxPercentThresholdBoost;
796static std::optional<unsigned>
798 const unsigned TripMultiple,
const unsigned TripCount,
805 if (PInfo.UserUnrollCount) {
812 if (PInfo.PragmaCount > 0) {
813 if ((UP.
AllowRemainder || (TripMultiple % PInfo.PragmaCount == 0)))
814 return PInfo.PragmaCount;
817 if (PInfo.PragmaFullUnroll && TripCount != 0) {
829 if (PInfo.PragmaEnableUnroll && !TripCount && MaxTripCount &&
842 assert(FullUnrollTripCount &&
"should be non-zero!");
850 return FullUnrollTripCount;
856 L, FullUnrollTripCount, DT, SE, EphValues,
TTI,
862 return FullUnrollTripCount;
867static std::optional<unsigned>
877 <<
"-unroll-allow-partial not given\n");
934 const bool UserUnrollCount =
UnrollCount.getNumOccurrences() > 0;
939 const bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
940 PragmaEnableUnroll || UserUnrollCount;
942 PragmaInfo PInfo(UserUnrollCount, PragmaFullUnroll, PragmaCount,
949 "explicit unroll count",
false);
959 MaxTripCount, UCE, UP)) {
960 UP.
Count = *UnrollFactor;
962 if (UserUnrollCount || (PragmaCount > 0)) {
966 UP.
Runtime |= (PragmaCount > 0);
967 return ExplicitUnroll;
969 if (ExplicitUnroll && TripCount != 0) {
983 UP.
Count = TripCount;
985 TripCount, UCE, UP)) {
986 UP.
Count = *UnrollFactor;
987 UseUpperBound =
false;
988 return ExplicitUnroll;
1004 if (!TripCount && MaxTripCount && (UP.
UpperBound || MaxOrZero) &&
1006 UP.
Count = MaxTripCount;
1008 MaxTripCount, UCE, UP)) {
1009 UP.
Count = *UnrollFactor;
1010 UseUpperBound =
true;
1011 return ExplicitUnroll;
1020 return ExplicitUnroll;
1031 UP.
Count = *UnrollFactor;
1033 if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
1034 UP.
Count != TripCount)
1037 "FullUnrollAsDirectedTooLarge",
1038 L->getStartLoc(), L->getHeader())
1039 <<
"Unable to fully unroll loop as directed by unroll pragma "
1041 "unrolled size is too large.";
1045 if (UP.
Count == 0) {
1046 if (PragmaEnableUnroll)
1049 "UnrollAsDirectedTooLarge",
1050 L->getStartLoc(), L->getHeader())
1051 <<
"Unable to unroll loop as directed by unroll(enable) "
1053 "because unrolled size is too large.";
1057 return ExplicitUnroll;
1060 "All cases when TripCount is constant should be covered here.");
1061 if (PragmaFullUnroll)
1064 DEBUG_TYPE,
"CantFullUnrollAsDirectedRuntimeTripCount",
1065 L->getStartLoc(), L->getHeader())
1066 <<
"Unable to fully unroll loop as directed by unroll(full) "
1068 "because loop has a runtime trip count.";
1085 if (L->getHeader()->getParent()->hasProfileData()) {
1093 UP.
Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount;
1096 dbgs() <<
" will not try to unroll loop with runtime trip count "
1097 <<
"-unroll-runtime not given\n");
1106 while (UP.
Count != 0 &&
1111 unsigned OrigCount = UP.
Count;
1115 while (UP.
Count != 0 && TripMultiple % UP.
Count != 0)
1118 dbgs() <<
"Remainder loop is restricted (that could architecture "
1119 "specific or because the loop contains a convergent "
1120 "instruction), so unroll count must divide the trip "
1122 << TripMultiple <<
". Reducing unroll count from " << OrigCount
1123 <<
" to " << UP.
Count <<
".\n");
1125 using namespace ore;
1130 "DifferentUnrollCountFromDirected",
1131 L->getStartLoc(), L->getHeader())
1132 <<
"Unable to unroll loop the number of times directed by "
1133 "unroll_count pragma because remainder loop is restricted "
1134 "(that could architecture specific or because the loop "
1135 "contains a convergent instruction) and so must have an "
1137 "count that divides the loop trip multiple of "
1138 << NV(
"TripMultiple", TripMultiple) <<
". Unrolling instead "
1139 << NV(
"UnrollCount", UP.
Count) <<
" time(s).";
1146 if (MaxTripCount && UP.
Count > MaxTripCount)
1147 UP.
Count = MaxTripCount;
1153 return ExplicitUnroll;
1161 bool OnlyFullUnroll,
bool OnlyWhenForced,
bool ForgetAllSCEV,
1162 std::optional<unsigned> ProvidedCount,
1163 std::optional<unsigned> ProvidedThreshold,
1164 std::optional<bool> ProvidedAllowPartial,
1165 std::optional<bool> ProvidedRuntime,
1166 std::optional<bool> ProvidedUpperBound,
1167 std::optional<bool> ProvidedAllowPeeling,
1168 std::optional<bool> ProvidedAllowProfileBasedPeeling,
1169 std::optional<unsigned> ProvidedFullUnrollMaxCount,
1173 << L->getHeader()->getParent()->getName() <<
"] Loop %"
1174 << L->getHeader()->getName() <<
"\n");
1183 Loop *ParentL = L->getParentLoop();
1184 if (ParentL !=
nullptr &&
1188 <<
" llvm.loop.unroll_and_jam.\n");
1199 <<
" Not unrolling loop since it has llvm.loop.unroll_and_jam.\n");
1203 if (!L->isLoopSimplifyForm()) {
1205 dbgs() <<
" Not unrolling loop which is not in loop-simplify form.\n");
1211 if (OnlyWhenForced && !(TM &
TM_Enable))
1214 bool OptForSize = L->getHeader()->getParent()->hasOptSize();
1216 L, SE,
TTI, BFI, PSI, ORE, OptLevel, ProvidedThreshold, ProvidedCount,
1217 ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
1218 ProvidedFullUnrollMaxCount);
1220 L, SE,
TTI, ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling,
true);
1246 LLVM_DEBUG(
dbgs() <<
" Not unrolling loop with inlinable calls.\n");
1255 unsigned TripCount = 0;
1256 unsigned TripMultiple = 1;
1258 L->getExitingBlocks(ExitingBlocks);
1259 for (
BasicBlock *ExitingBlock : ExitingBlocks)
1261 if (!TripCount || TC < TripCount)
1262 TripCount = TripMultiple = TC;
1268 BasicBlock *ExitingBlock = L->getLoopLatch();
1269 if (!ExitingBlock || !L->isLoopExiting(ExitingBlock))
1270 ExitingBlock = L->getExitingBlock();
1286 unsigned MaxTripCount = 0;
1287 bool MaxOrZero =
false;
1295 bool UseUpperBound =
false;
1297 L,
TTI, DT, LI, &AC, SE, EphValues, &ORE, TripCount, MaxTripCount,
1298 MaxOrZero, TripMultiple, UCE, UP, PP, UseUpperBound);
1305 assert(UP.
Count == 1 &&
"Cannot perform peel and unroll in the same step");
1306 LLVM_DEBUG(
dbgs() <<
"PEELING loop %" << L->getHeader()->getName()
1307 <<
" with iteration count " << PP.
PeelCount <<
"!\n");
1321 L->setLoopAlreadyUnrolled();
1328 if (OnlyFullUnroll && (UP.
Count < TripCount || UP.
Count < MaxTripCount)) {
1330 dbgs() <<
"Not attempting partial/runtime unroll in FullLoopUnroll.\n");
1339 UP.
Runtime &= TripCount == 0 && TripMultiple % UP.
Count != 0;
1342 MDNode *OrigLoopID = L->getLoopID();
1345 Loop *RemainderLoop =
nullptr;
1356 L, ULO, LI, &SE, &DT, &AC, &
TTI, &ORE, PreserveLCSSA, &RemainderLoop, AA);
1360 if (RemainderLoop) {
1361 std::optional<MDNode *> RemainderLoopID =
1364 if (RemainderLoopID)
1365 RemainderLoop->
setLoopID(*RemainderLoopID);
1369 std::optional<MDNode *> NewLoopID =
1373 L->setLoopID(*NewLoopID);
1377 return UnrollResult;
1384 L->setLoopAlreadyUnrolled();
1386 return UnrollResult;
1391class LoopUnroll :
public LoopPass {
1400 bool OnlyWhenForced;
1407 std::optional<unsigned> ProvidedCount;
1408 std::optional<unsigned> ProvidedThreshold;
1409 std::optional<bool> ProvidedAllowPartial;
1410 std::optional<bool> ProvidedRuntime;
1411 std::optional<bool> ProvidedUpperBound;
1412 std::optional<bool> ProvidedAllowPeeling;
1413 std::optional<bool> ProvidedAllowProfileBasedPeeling;
1414 std::optional<unsigned> ProvidedFullUnrollMaxCount;
1416 LoopUnroll(
int OptLevel = 2,
bool OnlyWhenForced =
false,
1417 bool ForgetAllSCEV =
false,
1418 std::optional<unsigned> Threshold = std::nullopt,
1419 std::optional<unsigned> Count = std::nullopt,
1420 std::optional<bool> AllowPartial = std::nullopt,
1421 std::optional<bool>
Runtime = std::nullopt,
1422 std::optional<bool> UpperBound = std::nullopt,
1423 std::optional<bool> AllowPeeling = std::nullopt,
1424 std::optional<bool> AllowProfileBasedPeeling = std::nullopt,
1425 std::optional<unsigned> ProvidedFullUnrollMaxCount = std::nullopt)
1426 :
LoopPass(
ID), OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced),
1427 ForgetAllSCEV(ForgetAllSCEV), ProvidedCount(
std::
move(Count)),
1428 ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial),
1429 ProvidedRuntime(
Runtime), ProvidedUpperBound(UpperBound),
1430 ProvidedAllowPeeling(AllowPeeling),
1431 ProvidedAllowProfileBasedPeeling(AllowProfileBasedPeeling),
1432 ProvidedFullUnrollMaxCount(ProvidedFullUnrollMaxCount) {
1442 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1443 LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1444 ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1446 getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
1447 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
1452 bool PreserveLCSSA = mustPreserveAnalysisID(
LCSSAID);
1455 L, DT, LI, SE,
TTI, AC, ORE,
nullptr,
nullptr, PreserveLCSSA, OptLevel,
1456 false, OnlyWhenForced, ForgetAllSCEV, ProvidedCount,
1457 ProvidedThreshold, ProvidedAllowPartial, ProvidedRuntime,
1458 ProvidedUpperBound, ProvidedAllowPeeling,
1459 ProvidedAllowProfileBasedPeeling, ProvidedFullUnrollMaxCount);
1461 if (Result == LoopUnrollResult::FullyUnrolled)
1464 return Result != LoopUnrollResult::Unmodified;
1480char LoopUnroll::ID = 0;
1489 bool ForgetAllSCEV,
int Threshold,
int Count,
1490 int AllowPartial,
int Runtime,
int UpperBound,
1495 return new LoopUnroll(
1496 OptLevel, OnlyWhenForced, ForgetAllSCEV,
1497 Threshold == -1 ? std::nullopt : std::optional<unsigned>(Threshold),
1498 Count == -1 ? std::nullopt : std::optional<unsigned>(Count),
1499 AllowPartial == -1 ? std::nullopt : std::optional<bool>(AllowPartial),
1501 UpperBound == -1 ? std::nullopt : std::optional<bool>(UpperBound),
1502 AllowPeeling == -1 ? std::nullopt : std::optional<bool>(AllowPeeling));
1515 Loop *ParentL = L.getParentLoop();
1522 std::string LoopName = std::string(L.getName());
1527 true, OptLevel,
true,
1528 OnlyWhenForced, ForgetSCEV, std::nullopt,
1529 std::nullopt,
false,
1560 bool IsCurrentLoopValid =
false;
1567 if (SibLoop == &L) {
1568 IsCurrentLoopValid =
true;
1577 if (!IsCurrentLoopValid) {
1607 LAM = &LAMProxy->getManager();
1612 auto *BFI = (PSI && PSI->hasProfileSummary()) ?
1615 bool Changed =
false;
1622 for (
const auto &L : LI) {
1633 while (!Worklist.
empty()) {
1640 Loop *ParentL = L.getParentLoop();
1646 std::optional<bool> LocalAllowPeeling = UnrollOpts.
AllowPeeling;
1647 if (PSI && PSI->hasHugeWorkingSetSize())
1648 LocalAllowPeeling =
false;
1649 std::string LoopName = std::string(L.getName());
1653 &L, DT, &LI, SE,
TTI, AC, ORE, BFI, PSI,
1683 OS, MapClassName2PassName);
1695 <<
"profile-peeling;";
static bool isEqual(const Function &Caller, const Function &Callee)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines DenseMapInfo traits for DenseMap.
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This header defines various interfaces for pass management in LLVM.
This header provides classes for managing per-loop analyses.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
static MDNode * getUnrollMetadataForLoop(const Loop *L, StringRef Name)
static cl::opt< unsigned > UnrollMaxCount("unroll-max-count", cl::Hidden, cl::desc("Set the max unroll count for partial and runtime unrolling, for" "testing purposes"))
static cl::opt< unsigned > UnrollCount("unroll-count", cl::Hidden, cl::desc("Use this unroll count for all loops including those with " "unroll_count pragma values, for testing purposes"))
static cl::opt< unsigned > UnrollThresholdDefault("unroll-threshold-default", cl::init(150), cl::Hidden, cl::desc("Default threshold (max size of unrolled " "loop), used in all but O3 optimizations"))
static cl::opt< unsigned > FlatLoopTripCountThreshold("flat-loop-tripcount-threshold", cl::init(5), cl::Hidden, cl::desc("If the runtime tripcount for the loop is lower than the " "threshold, the loop is considered as flat and will be less " "aggressively unrolled."))
static cl::opt< unsigned > UnrollMaxIterationsCountToAnalyze("unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden, cl::desc("Don't allow loop unrolling to simulate more than this number of" "iterations when checking full unroll profitability"))
static cl::opt< unsigned > UnrollOptSizeThreshold("unroll-optsize-threshold", cl::init(0), cl::Hidden, cl::desc("The cost threshold for loop unrolling when optimizing for " "size"))
static bool hasUnrollFullPragma(const Loop *L)
static cl::opt< bool > UnrollUnrollRemainder("unroll-remainder", cl::Hidden, cl::desc("Allow the loop remainder to be unrolled."))
static unsigned unrollCountPragmaValue(const Loop *L)
static bool hasUnrollEnablePragma(const Loop *L)
static cl::opt< unsigned > UnrollFullMaxCount("unroll-full-max-count", cl::Hidden, cl::desc("Set the max unroll count for full unrolling, for testing purposes"))
static cl::opt< unsigned > UnrollMaxUpperBound("unroll-max-upperbound", cl::init(8), cl::Hidden, cl::desc("The max of trip count upper bound that is considered in unrolling"))
static std::optional< unsigned > shouldFullUnroll(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, const unsigned FullUnrollTripCount, const UnrollCostEstimator UCE, const TargetTransformInfo::UnrollingPreferences &UP)
static std::optional< EstimatedUnrollCost > analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize, unsigned MaxIterationsCountToAnalyze)
Figure out if the loop is worth full unrolling.
static cl::opt< unsigned > UnrollPartialThreshold("unroll-partial-threshold", cl::Hidden, cl::desc("The cost threshold for partial loop unrolling"))
static cl::opt< bool > UnrollAllowRemainder("unroll-allow-remainder", cl::Hidden, cl::desc("Allow generation of a loop remainder (extra iterations) " "when unrolling a loop."))
static std::optional< unsigned > shouldPartialUnroll(const unsigned LoopSize, const unsigned TripCount, const UnrollCostEstimator UCE, const TargetTransformInfo::UnrollingPreferences &UP)
static cl::opt< unsigned > PragmaUnrollFullMaxIterations("pragma-unroll-full-max-iterations", cl::init(1 '000 '000), cl::Hidden, cl::desc("Maximum allowed iterations to unroll under pragma unroll full."))
static const unsigned NoThreshold
A magic value for use with the Threshold parameter to indicate that the loop unroll should be perform...
static std::optional< unsigned > shouldPragmaUnroll(Loop *L, const PragmaInfo &PInfo, const unsigned TripMultiple, const unsigned TripCount, unsigned MaxTripCount, const UnrollCostEstimator UCE, const TargetTransformInfo::UnrollingPreferences &UP)
static cl::opt< bool > UnrollRevisitChildLoops("unroll-revisit-child-loops", cl::Hidden, cl::desc("Enqueue and re-visit child loops in the loop PM after unrolling. " "This shouldn't typically be needed as child loops (or their " "clones) were already visited."))
static cl::opt< unsigned > UnrollThreshold("unroll-threshold", cl::Hidden, cl::desc("The cost threshold for loop unrolling"))
static cl::opt< bool > UnrollRuntime("unroll-runtime", cl::Hidden, cl::desc("Unroll loops with run-time trip counts"))
static LoopUnrollResult tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache &AC, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, bool PreserveLCSSA, int OptLevel, bool OnlyFullUnroll, bool OnlyWhenForced, bool ForgetAllSCEV, std::optional< unsigned > ProvidedCount, std::optional< unsigned > ProvidedThreshold, std::optional< bool > ProvidedAllowPartial, std::optional< bool > ProvidedRuntime, std::optional< bool > ProvidedUpperBound, std::optional< bool > ProvidedAllowPeeling, std::optional< bool > ProvidedAllowProfileBasedPeeling, std::optional< unsigned > ProvidedFullUnrollMaxCount, AAResults *AA=nullptr)
static bool hasRuntimeUnrollDisablePragma(const Loop *L)
static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost, unsigned MaxPercentThresholdBoost)
static cl::opt< unsigned > UnrollThresholdAggressive("unroll-threshold-aggressive", cl::init(300), cl::Hidden, cl::desc("Threshold (max size of unrolled loop) to use in aggressive (O3) " "optimizations"))
static cl::opt< unsigned > UnrollMaxPercentThresholdBoost("unroll-max-percent-threshold-boost", cl::init(400), cl::Hidden, cl::desc("The maximum 'boost' (represented as a percentage >= 100) applied " "to the threshold when aggressively unrolling a loop due to the " "dynamic cost savings. If completely unrolling a loop will reduce " "the total runtime from X to Y, we boost the loop unroll " "threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, " "X/Y). This limit avoids excessive code bloat."))
static cl::opt< unsigned > PragmaUnrollThreshold("pragma-unroll-threshold", cl::init(16 *1024), cl::Hidden, cl::desc("Unrolled size limit for loops with an unroll(full) or " "unroll_count pragma."))
static cl::opt< bool > UnrollAllowPartial("unroll-allow-partial", cl::Hidden, cl::desc("Allows loops to be partially unrolled until " "-unroll-threshold loop size is reached."))
mir Rename Register Operands
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
A manager for alias analyses.
A container for analyses that lazily runs them and caches their results.
void clear(IRUnitT &IR, llvm::StringRef Name)
Clear any cached analysis results for a single unit of IR.
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
This is the shared class of boolean and integer constants.
This is an important base class in LLVM.
This class represents an Operation in the Expression.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Implements a dense probed hash-table based set.
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
std::optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
const Function * getFunction() const
Return the function this instruction belongs to.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
void addChildLoops(ArrayRef< Loop * > NewChildLoops)
Loop passes should use this method to indicate they have added new child loops of the current loop.
void markLoopAsDeleted(Loop &L, llvm::StringRef Name)
Loop passes should use this method to indicate they have deleted a loop from the nest.
void addSiblingLoops(ArrayRef< Loop * > NewSibLoops)
Loop passes should use this method to indicate they have added new sibling loops to the current loop.
void markLoopAsDeleted(Loop &L)
Analysis pass that exposes the LoopInfo for a function.
void verifyLoop() const
Verify loop structure.
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Represents a single loop in the control flow graph.
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
const MDOperand & getOperand(unsigned I) const
unsigned getNumOperands() const
Return number of MDNode operands.
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
bool empty() const
Determine if the PriorityWorklist is empty or not.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
Analysis pass that exposes the ScalarEvolution for a function.
The main scalar evolution driver.
unsigned getSmallConstantTripMultiple(const Loop *L, const SCEV *ExitCount)
Returns the largest constant divisor of the trip count as a normal unsigned value,...
unsigned getSmallConstantMaxTripCount(const Loop *L, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Returns the upper bound of the loop trip count as a normal unsigned value.
bool isBackedgeTakenCountMaxOrZero(const Loop *L)
Return true if the backedge taken count is either the value returned by getConstantMaxBackedgeTakenCo...
unsigned getSmallConstantTripCount(const Loop *L)
Returns the exact trip count of the loop if we can compute it, and the result is a small constant.
size_type size() const
Determine the number of elements in the SetVector.
void clear()
Completely clear the SetVector.
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
value_type pop_back_val()
A version of PriorityWorklist that selects small size optimized data structures for the vector and ma...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Analysis pass providing the TargetTransformInfo.
Produce an estimate of the unrolled cost of the specified loop.
ConvergenceKind Convergence
bool ConvergenceAllowsRuntime
uint64_t getUnrolledLoopSize(const TargetTransformInfo::UnrollingPreferences &UP, unsigned CountOverwrite=0) const
Returns loop size estimation for unrolled loop, given the unrolling configuration specified by UP.
bool canUnroll() const
Whether it is legal to unroll this loop.
unsigned NumInlineCandidates
UnrollCostEstimator(const Loop *L, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value * > &EphValues, unsigned BEInsns)
uint64_t getRolledLoopSize() const
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
iterator find(const_arg_type_t< ValueT > V)
An efficient, type-erasing, non-owning reference to a callable.
This class implements an extremely fast bulk output stream that can only output to a stream.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
initializer< Ty > init(const Ty &Val)
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
std::optional< unsigned > getLoopEstimatedTripCount(Loop *L, unsigned *EstimatedLoopInvocationWeight=nullptr)
Returns a loop's estimated trip count based on branch weight metadata.
void simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const TargetTransformInfo *TTI, AAResults *AA=nullptr)
Perform some cleanup and simplifications on loops after unrolling.
void initializeLoopUnrollPass(PassRegistry &)
void computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::PeelingPreferences &PP, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE, AssumptionCache *AC=nullptr, unsigned Threshold=UINT_MAX)
auto successors(const MachineBasicBlock *BB)
@ Runtime
Detect stack use after return if not disabled runtime with (ASAN_OPTIONS=detect_stack_use_after_retur...
bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
std::optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
Pass * createLoopUnrollPass(int OptLevel=2, bool OnlyWhenForced=false, bool ForgetAllSCEV=false, int Threshold=-1, int Count=-1, int AllowPartial=-1, int Runtime=-1, int UpperBound=-1, int AllowPeeling=-1)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, std::optional< bool > UserAllowPeeling, std::optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
CallBase * getLoopConvergenceHeart(const Loop *TheLoop)
Find the convergence heart of the loop.
TransformationMode hasUnrollAndJamTransformation(const Loop *L)
cl::opt< bool > ForgetSCEVInLoopUnroll
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
cl::opt< unsigned > SCEVCheapExpansionBudget
TransformationMode hasUnrollTransformation(const Loop *L)
LoopUnrollResult
Represents the result of a UnrollLoop invocation.
@ PartiallyUnrolled
The loop was partially unrolled – we still have a loop, but with a smaller trip count.
@ Unmodified
The loop was not modified.
@ FullyUnrolled
The loop was fully unrolled into straight-line code.
bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, const UnrollCostEstimator &UCE, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound)
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
const char *const LLVMLoopUnrollFollowupAll
TransformationMode
The mode sets how eager a transformation should be applied.
@ TM_ForcedByUser
The transformation was directed by the user, e.g.
@ TM_Disable
The transformation should not be applied.
@ TM_Enable
The transformation should be applied without considering a cost model.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
void appendLoopsToWorklist(RangeT &&, SmallPriorityWorklist< Loop *, 4 > &)
Utility that implements appending of loops onto a worklist given a range.
TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, std::optional< unsigned > UserThreshold, std::optional< unsigned > UserCount, std::optional< bool > UserAllowPartial, std::optional< bool > UserRuntime, std::optional< bool > UserUpperBound, std::optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
const char *const LLVMLoopUnrollFollowupRemainder
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
const char *const LLVMLoopUnrollFollowupUnrolled
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
LoopUnrollResult UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const llvm::TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, Loop **RemainderLoop=nullptr, AAResults *AA=nullptr)
Unroll the given loop by Count.
bool peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, DominatorTree &DT, AssumptionCache *AC, bool PreserveLCSSA, ValueToValueMapTy &VMap)
VMap is the value-map that maps instructions from the original loop to instructions in the last peele...
Implement std::hash so that hash_code can be used in STL containers.
Utility to calculate the size and a few similar metrics for a set of basic blocks.
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
An information struct used to provide DenseMap with the various necessary components for a given valu...
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
TargetTransformInfo & TTI
bool OnlyWhenForced
If false, use a cost model to determine whether unrolling of a loop is profitable.
const bool ForgetSCEV
If true, forget all loops when unrolling.
std::optional< unsigned > FullUnrollMaxCount
std::optional< bool > AllowPartial
std::optional< bool > AllowRuntime
std::optional< bool > AllowProfileBasedPeeling
std::optional< bool > AllowPeeling
std::optional< bool > AllowUpperBound
A CRTP mix-in to automatically provide informational APIs needed for passes.
const Instruction * Heart
bool AllowExpensiveTripCount
unsigned SCEVExpansionBudget