74#define DEBUG_TYPE "loop-unroll"
78 cl::desc(
"Forget everything in SCEV when doing LoopUnroll, instead of just"
79 " the current top-most loop. This is sometimes preferred to reduce"
84 cl::desc(
"The cost threshold for loop unrolling"));
89 cl::desc(
"The cost threshold for loop unrolling when optimizing for "
94 cl::desc(
"The cost threshold for partial loop unrolling"));
98 cl::desc(
"The maximum 'boost' (represented as a percentage >= 100) applied "
99 "to the threshold when aggressively unrolling a loop due to the "
100 "dynamic cost savings. If completely unrolling a loop will reduce "
101 "the total runtime from X to Y, we boost the loop unroll "
102 "threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, "
103 "X/Y). This limit avoids excessive code bloat."));
107 cl::desc(
"Don't allow loop unrolling to simulate more than this number of "
108 "iterations when checking full unroll profitability"));
112 cl::desc(
"Use this unroll count for all loops including those with "
113 "unroll_count pragma values, for testing purposes"));
117 cl::desc(
"Set the max unroll count for partial and runtime unrolling, for"
118 "testing purposes"));
123 "Set the max unroll count for full unrolling, for testing purposes"));
127 cl::desc(
"Allows loops to be partially unrolled until "
128 "-unroll-threshold loop size is reached."));
132 cl::desc(
"Allow generation of a loop remainder (extra iterations) "
133 "when unrolling a loop."));
137 cl::desc(
"Unroll loops with run-time trip counts"));
142 "The max of trip count upper bound that is considered in unrolling"));
146 cl::desc(
"Unrolled size limit for loops with unroll metadata "
147 "(full, enable, or count)."));
151 cl::desc(
"If the runtime tripcount for the loop is lower than the "
152 "threshold, the loop is considered as flat and will be less "
153 "aggressively unrolled."));
157 cl::desc(
"Allow the loop remainder to be unrolled."));
164 cl::desc(
"Enqueue and re-visit child loops in the loop PM after unrolling. "
165 "This shouldn't typically be needed as child loops (or their "
166 "clones) were already visited."));
170 cl::desc(
"Threshold (max size of unrolled loop) to use in aggressive (O3) "
175 cl::desc(
"Default threshold (max size of unrolled "
176 "loop), used in all but O3 optimizations"));
180 cl::desc(
"Maximum allowed iterations to unroll under pragma unroll full."));
185static const unsigned NoThreshold = std::numeric_limits<unsigned>::max();
193 std::optional<unsigned> UserThreshold, std::optional<unsigned> UserCount,
194 std::optional<bool> UserAllowPartial, std::optional<bool> UserRuntime,
195 std::optional<bool> UserUpperBound,
196 std::optional<unsigned> UserFullUnrollMaxCount) {
208 UP.
MaxCount = std::numeric_limits<unsigned>::max();
227 TTI.getUnrollingPreferences(L, SE, UP, &ORE);
230 bool OptForSize = L->getHeader()->getParent()->hasOptSize() ||
273 UP.
Count = *UserCount;
274 if (UserAllowPartial)
275 UP.
Partial = *UserAllowPartial;
280 if (UserFullUnrollMaxCount)
294struct UnrolledInstState {
298 unsigned IsCounted : 1;
302struct UnrolledInstStateKeyInfo {
303 using PtrInfo = DenseMapInfo<Instruction *>;
304 using PairInfo = DenseMapInfo<std::pair<Instruction *, int>>;
306 static inline UnrolledInstState getEmptyKey() {
307 return {PtrInfo::getEmptyKey(), 0, 0, 0};
310 static inline UnrolledInstState getTombstoneKey() {
311 return {PtrInfo::getTombstoneKey(), 0, 0, 0};
314 static inline unsigned getHashValue(
const UnrolledInstState &S) {
315 return PairInfo::getHashValue({S.I, S.Iteration});
318 static inline bool isEqual(
const UnrolledInstState &
LHS,
319 const UnrolledInstState &
RHS) {
320 return PairInfo::isEqual({
LHS.I,
LHS.Iteration}, {
RHS.I,
RHS.Iteration});
324struct EstimatedUnrollCost {
326 unsigned UnrolledCost;
330 unsigned RolledDynamicCost;
352 unsigned MaxIterationsCountToAnalyze) {
356 assert(MaxIterationsCountToAnalyze <
357 (
unsigned)(std::numeric_limits<int>::max() / 2) &&
358 "The unroll iterations max is too large!");
362 if (!L->isInnermost()) {
364 <<
"Not analyzing loop cost: not an innermost loop.\n");
369 if (!TripCount || TripCount > MaxIterationsCountToAnalyze) {
371 <<
"Not analyzing loop cost: trip count "
372 << (TripCount ?
"too large" :
"unknown") <<
".\n");
406 auto AddCostRecursively = [&](
Instruction &RootI,
int Iteration) {
407 assert(Iteration >= 0 &&
"Cannot have a negative iteration!");
408 assert(CostWorklist.
empty() &&
"Must start with an empty cost list");
409 assert(PHIUsedList.
empty() &&
"Must start with an empty phi used list");
415 for (;; --Iteration) {
421 auto CostIter = InstCostMap.
find({
I, Iteration, 0, 0});
422 if (CostIter == InstCostMap.
end())
427 auto &Cost = *CostIter;
433 Cost.IsCounted =
true;
437 if (PhiI->getParent() == L->getHeader()) {
438 assert(Cost.IsFree &&
"Loop PHIs shouldn't be evaluated as they "
439 "inherently simplify during unrolling.");
447 PhiI->getIncomingValueForBlock(L->getLoopLatch())))
448 if (L->contains(OpI))
457 transform(
I->operands(), std::back_inserter(Operands),
459 if (auto Res = SimplifiedValues.lookup(Op))
463 UnrolledCost +=
TTI.getInstructionCost(
I, Operands,
CostKind);
465 <<
"Adding cost of instruction (iteration " << Iteration
477 if (!OpI || !L->contains(OpI))
483 }
while (!CostWorklist.
empty());
485 if (PHIUsedList.
empty())
490 "Cannot track PHI-used values past the first iteration!");
498 assert(L->isLoopSimplifyForm() &&
"Must put loop into normal form first.");
499 assert(L->isLCSSAForm(DT) &&
500 "Must have loops in LCSSA form to track live-out values.");
503 <<
"Starting LoopUnroll profitability analysis...\n");
506 L->getHeader()->getParent()->hasMinSize() ?
512 for (
unsigned Iteration = 0; Iteration < TripCount; ++Iteration) {
525 PHI->getNumIncomingValues() == 2 &&
526 "Must have an incoming value only for the preheader and the latch.");
528 Value *V =
PHI->getIncomingValueForBlock(
529 Iteration == 0 ? L->getLoopPreheader() : L->getLoopLatch());
530 if (Iteration != 0 && SimplifiedValues.
count(V))
531 V = SimplifiedValues.
lookup(V);
536 SimplifiedValues.
clear();
537 while (!SimplifiedInputValues.
empty())
543 BBWorklist.
insert(L->getHeader());
545 for (
unsigned Idx = 0; Idx != BBWorklist.
size(); ++Idx) {
559 RolledDynamicCost +=
TTI.getInstructionCost(&
I,
CostKind);
564 bool IsFree = Analyzer.
visit(
I);
565 bool Inserted = InstCostMap.
insert({&
I, (int)Iteration,
569 assert(Inserted &&
"Cannot have a state for an unvisited instruction!");
577 const Function *Callee = CI->getCalledFunction();
578 if (!Callee ||
TTI.isLoweredToCall(Callee)) {
580 <<
"Can't analyze cost of loop with call\n");
587 if (
I.mayHaveSideEffects())
588 AddCostRecursively(
I, Iteration);
591 if (UnrolledCost > MaxUnrolledLoopSize) {
593 dbgs().
indent(3) <<
"Exceeded threshold.. exiting.\n";
595 <<
"UnrolledCost: " << UnrolledCost
596 <<
", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize <<
"\n";
605 if (SimplifiedValues.
count(V))
606 V = SimplifiedValues.
lookup(V);
614 if (
auto *SimpleCond = getSimplifiedConstant(BI->getCondition())) {
617 KnownSucc = BI->getSuccessor(0);
620 KnownSucc = BI->getSuccessor(SimpleCondVal->isZero() ? 1 : 0);
623 if (
auto *SimpleCond = getSimplifiedConstant(
SI->getCondition())) {
626 KnownSucc =
SI->getSuccessor(0);
629 KnownSucc =
SI->findCaseValue(SimpleCondVal)->getCaseSuccessor();
633 if (L->contains(KnownSucc))
634 BBWorklist.
insert(KnownSucc);
636 ExitWorklist.
insert({BB, KnownSucc});
642 if (L->contains(Succ))
645 ExitWorklist.
insert({BB, Succ});
646 AddCostRecursively(*TI, Iteration);
651 if (UnrolledCost == RolledDynamicCost) {
653 dbgs().
indent(3) <<
"No opportunities found.. exiting.\n";
654 dbgs().
indent(3) <<
"UnrolledCost: " << UnrolledCost <<
"\n";
660 while (!ExitWorklist.
empty()) {
662 std::tie(ExitingBB, ExitBB) = ExitWorklist.
pop_back_val();
669 Value *
Op = PN->getIncomingValueForBlock(ExitingBB);
671 if (L->contains(OpI))
672 AddCostRecursively(*OpI, TripCount - 1);
677 "All instructions must have a valid cost, whether the "
678 "loop is rolled or unrolled.");
682 dbgs().
indent(3) <<
"UnrolledCost: " << UnrolledCost
683 <<
", RolledDynamicCost: " << RolledDynamicCost <<
"\n";
692 bool TripCountIsUniform) {
695 Metrics.analyzeBasicBlock(BB,
TTI, EphValues,
false,
698 NotDuplicatable =
Metrics.notDuplicatable;
715 if (LoopSize.isValid() && LoopSize < BEInsns + 1)
717 LoopSize = BEInsns + 1;
721 const Loop *L)
const {
722 auto ReportCannotUnroll = [&](
StringRef Reason) {
727 L->getStartLoc(), L->getHeader())
728 <<
"unable to unroll loop: " << Reason;
733 ReportCannotUnroll(
"contains convergent operations");
736 if (!LoopSize.isValid()) {
737 ReportCannotUnroll(
"loop size could not be computed");
740 if (NotDuplicatable) {
741 ReportCannotUnroll(
"contains non-duplicatable instructions");
749 unsigned CountOverwrite)
const {
750 unsigned LS = LoopSize.getValue();
751 assert(LS >= UP.
BEInsns &&
"LoopSize should not be less than BEInsns!");
797 "Unroll count hint metadata should have two operands.");
800 assert(
Count >= 1 &&
"Unroll count must be positive.");
821 unsigned MaxPercentThresholdBoost) {
822 if (Cost.RolledDynamicCost >= std::numeric_limits<unsigned>::max() / 100)
824 else if (Cost.UnrolledCost != 0)
826 return std::min(100 * Cost.RolledDynamicCost / Cost.UnrolledCost,
827 MaxPercentThresholdBoost);
829 return MaxPercentThresholdBoost;
832static std::optional<unsigned>
834 const unsigned TripMultiple,
const unsigned TripCount,
850 <<
"Not unrolling with user count " <<
UnrollCount <<
": "
852 :
"remainder not allowed")
864 <<
"Not unrolling with pragma count " << PInfo.
PragmaCount
865 <<
": remainder not allowed, count does not divide trip "
866 <<
"multiple " << TripMultiple <<
".\n");
869 L->getStartLoc(), L->getHeader())
870 <<
"may be unable to unroll loop with count "
872 <<
": remainder loop is not allowed and count does not divide "
874 <<
ore::NV(
"TripMultiple", TripMultiple);
879 if (TripCount != 0) {
885 <<
"Won't unroll; trip count is too large.\n");
888 "PragmaFullUnrollTripCountTooLarge",
889 L->getStartLoc(), L->getHeader())
890 <<
"may be unable to fully unroll loop: trip count "
891 <<
ore::NV(
"TripCount", TripCount) <<
" exceeds limit "
898 <<
"Fully unrolling with trip count: " << TripCount <<
".\n");
902 <<
"Not fully unrolling: unknown trip count.\n");
905 "PragmaFullUnrollUnknownTripCount",
906 L->getStartLoc(), L->getHeader())
907 <<
"may be unable to fully unroll loop: trip count is unknown";
914 <<
"Unrolling with max trip count: " << MaxTripCount <<
".\n");
926 assert(FullUnrollTripCount &&
"should be non-zero!");
930 <<
"Not unrolling: trip count " << FullUnrollTripCount
940 <<
" < threshold " << UP.
Threshold <<
".\n");
941 return FullUnrollTripCount;
945 <<
"Unrolled size " << UnrolledSize <<
" exceeds threshold "
946 << UP.
Threshold <<
"; checking for cost benefit.\n");
952 L, FullUnrollTripCount, DT, SE, EphValues,
TTI,
957 unsigned BoostedThreshold = UP.
Threshold * Boost / 100;
958 if (Cost->UnrolledCost < BoostedThreshold) {
960 return FullUnrollTripCount;
963 <<
"Not unrolling: cost " << Cost->UnrolledCost
964 <<
" >= boosted threshold " << BoostedThreshold <<
".\n");
970static std::optional<unsigned>
980 <<
"-unroll-allow-partial not given\n");
993 <<
"Unrolled size exceeds threshold; reducing count "
994 <<
"from " <<
count <<
" to " << NewCount <<
".\n");
1009 while (
count != 0 &&
1015 <<
"Will not partially unroll: no profitable count.\n");
1025 <<
"Partially unrolling with count: " <<
count <<
"\n");
1041 const unsigned TripCount,
1042 const unsigned MaxTripCount,
const bool MaxOrZero,
1043 const unsigned TripMultiple,
1051 << TripCount <<
", MaxTripCount=" << MaxTripCount
1052 << (MaxOrZero ?
" (MaxOrZero)" :
"")
1053 <<
", TripMultiple=" << TripMultiple <<
"\n");
1058 dbgs().
indent(1) <<
"Explicit unroll requested:";
1060 dbgs() <<
" user-count";
1062 dbgs() <<
" pragma-full";
1066 dbgs() <<
" pragma-enable";
1076 "explicit unroll count");
1079 <<
"Using explicit peel count: " << PP.
PeelCount <<
".\n");
1097 MaxTripCount, UCE, UP, ORE)) {
1098 UP.
Count = *UnrollFactor;
1122 TripCount, UCE, UP)) {
1123 UP.
Count = *UnrollFactor;
1141 if (!TripCount && MaxTripCount && (UP.
UpperBound || MaxOrZero) &&
1144 MaxTripCount, UCE, UP)) {
1145 UP.
Count = *UnrollFactor;
1155 <<
"Peeling with count: " << PP.
PeelCount <<
".\n");
1170 UP.
Count = *UnrollFactor;
1174 "All cases when TripCount is constant should be covered here.");
1181 <<
"Not runtime unrolling: disabled by pragma.\n");
1188 << MaxTripCount <<
" is small (<= "
1194 if (L->getHeader()->getParent()->hasProfileData()) {
1204 <<
"Will not try to unroll loop with runtime trip count "
1205 <<
"because -unroll-runtime not given\n");
1214 while (UP.
Count != 0 &&
1219 unsigned OrigCount = UP.
Count;
1223 while (UP.
Count != 0 && TripMultiple % UP.
Count != 0)
1226 <<
"Remainder loop is restricted (that could be architecture "
1227 "specific or because the loop contains a convergent "
1228 "instruction), so unroll count must divide the trip "
1230 << TripMultiple <<
". Reducing unroll count from " << OrigCount
1231 <<
" to " << UP.
Count <<
".\n");
1237 if (MaxTripCount && UP.
Count > MaxTripCount)
1238 UP.
Count = MaxTripCount;
1244 <<
"Runtime unrolling with count: " << UP.
Count <<
"\n");
1253 bool OnlyFullUnroll,
bool OnlyWhenForced,
bool ForgetAllSCEV,
1254 std::optional<unsigned> ProvidedCount,
1255 std::optional<unsigned> ProvidedThreshold,
1256 std::optional<bool> ProvidedAllowPartial,
1257 std::optional<bool> ProvidedRuntime,
1258 std::optional<bool> ProvidedUpperBound,
1259 std::optional<bool> ProvidedAllowPeeling,
1260 std::optional<bool> ProvidedAllowProfileBasedPeeling,
1261 std::optional<unsigned> ProvidedFullUnrollMaxCount,
1265 << L->getHeader()->getParent()->getName() <<
"] Loop %"
1266 << L->getHeader()->getName()
1267 <<
" (depth=" << L->getLoopDepth() <<
")\n");
1279 Loop *ParentL = L->getParentLoop();
1280 if (ParentL !=
nullptr &&
1284 <<
" llvm.loop.unroll_and_jam.\n");
1295 <<
"Not unrolling loop since it has llvm.loop.unroll_and_jam.\n");
1299 if (!L->isLoopSimplifyForm()) {
1301 <<
"Not unrolling loop which is not in loop-simplify form.\n");
1305 L->getStartLoc(), L->getHeader())
1306 <<
"unable to unroll loop: not in loop-simplify form";
1314 if (OnlyWhenForced && !(TM &
TM_Enable)) {
1316 <<
"disabled and loop not explicitly "
1321 bool OptForSize = L->getHeader()->getParent()->hasOptSize();
1323 L, SE,
TTI, BFI, PSI, ORE, OptLevel, ProvidedThreshold, ProvidedCount,
1324 ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
1325 ProvidedFullUnrollMaxCount);
1327 L, SE,
TTI, ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling,
true);
1337 L->getStartLoc(), L->getHeader())
1338 <<
"unable to unroll loop: unroll threshold is zero";
1366 <<
"Not unrolling loop with inlinable calls.\n");
1370 "InlineCandidatesPreventUnroll",
1371 L->getStartLoc(), L->getHeader())
1372 <<
"unable to unroll loop: contains inlinable calls";
1383 unsigned TripCount = 0;
1384 unsigned TripMultiple = 1;
1386 L->getExitingBlocks(ExitingBlocks);
1387 for (
BasicBlock *ExitingBlock : ExitingBlocks)
1389 if (!TripCount || TC < TripCount)
1390 TripCount = TripMultiple = TC;
1396 BasicBlock *ExitingBlock = L->getLoopLatch();
1397 if (!ExitingBlock || !L->isLoopExiting(ExitingBlock))
1398 ExitingBlock = L->getExitingBlock();
1411 unsigned MaxTripCount = 0;
1412 bool MaxOrZero =
false;
1421 MaxTripCount, MaxOrZero, TripMultiple, UCE, UP, PP);
1424 <<
"Not unrolling: no viable strategy found.\n");
1428 L->getStartLoc(), L->getHeader())
1429 <<
"unable to unroll loop: no viable unroll count found";
1438 assert(UP.
Count == 1 &&
"Cannot perform peel and unroll in the same step");
1439 LLVM_DEBUG(
dbgs() <<
"PEELING loop %" << L->getHeader()->getName()
1440 <<
" with iteration count " << PP.
PeelCount <<
"!\n");
1456 L->setLoopAlreadyUnrolled();
1461 if (OnlyFullUnroll && ((!TripCount && !MaxTripCount) ||
1462 UP.
Count < TripCount || UP.
Count < MaxTripCount)) {
1464 <<
"Not attempting partial/runtime unroll in FullLoopUnroll.\n");
1473 UP.
Runtime &= TripCount == 0 && TripMultiple % UP.
Count != 0;
1476 MDNode *OrigLoopID = L->getLoopID();
1478 DebugLoc LoopStartLoc = L->getStartLoc();
1482 Loop *RemainderLoop =
nullptr;
1495 L, ULO, LI, &SE, &DT, &AC, &
TTI, &ORE, PreserveLCSSA, &RemainderLoop,
AA);
1499 <<
"Failed to unroll loop as explicitly requested.\n");
1502 LoopStartLoc, LoopHeader)
1503 <<
"failed to unroll loop as explicitly requested";
1512 LoopStartLoc, LoopHeader)
1513 <<
"unable to fully unroll loop as directed; "
1514 <<
"unrolled by factor " <<
ore::NV(
"UnrollCount", ULO.
Count);
1520 LoopStartLoc, LoopHeader)
1521 <<
"unable to unroll loop with requested count "
1523 <<
"; unrolled by factor " <<
ore::NV(
"UnrollCount", ULO.
Count);
1527 if (RemainderLoop) {
1528 std::optional<MDNode *> RemainderLoopID =
1531 if (RemainderLoopID)
1532 RemainderLoop->
setLoopID(*RemainderLoopID);
1536 std::optional<MDNode *> NewLoopID =
1540 L->setLoopID(*NewLoopID);
1544 return UnrollResult;
1551 L->setLoopAlreadyUnrolled();
1553 return UnrollResult;
1558class LoopUnroll :
public LoopPass {
1567 bool OnlyWhenForced;
1574 std::optional<unsigned> ProvidedCount;
1575 std::optional<unsigned> ProvidedThreshold;
1576 std::optional<bool> ProvidedAllowPartial;
1577 std::optional<bool> ProvidedRuntime;
1578 std::optional<bool> ProvidedUpperBound;
1579 std::optional<bool> ProvidedAllowPeeling;
1580 std::optional<bool> ProvidedAllowProfileBasedPeeling;
1581 std::optional<unsigned> ProvidedFullUnrollMaxCount;
1583 LoopUnroll(
int OptLevel = 2,
bool OnlyWhenForced =
false,
1584 bool ForgetAllSCEV =
false,
1585 std::optional<unsigned> Threshold = std::nullopt,
1586 std::optional<unsigned>
Count = std::nullopt,
1587 std::optional<bool> AllowPartial = std::nullopt,
1588 std::optional<bool>
Runtime = std::nullopt,
1589 std::optional<bool> UpperBound = std::nullopt,
1590 std::optional<bool> AllowPeeling = std::nullopt,
1591 std::optional<bool> AllowProfileBasedPeeling = std::nullopt,
1592 std::optional<unsigned> ProvidedFullUnrollMaxCount = std::nullopt)
1593 : LoopPass(
ID), OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced),
1594 ForgetAllSCEV(ForgetAllSCEV), ProvidedCount(std::
move(
Count)),
1595 ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial),
1596 ProvidedRuntime(
Runtime), ProvidedUpperBound(UpperBound),
1597 ProvidedAllowPeeling(AllowPeeling),
1598 ProvidedAllowProfileBasedPeeling(AllowProfileBasedPeeling),
1599 ProvidedFullUnrollMaxCount(ProvidedFullUnrollMaxCount) {
1603 bool runOnLoop(Loop *L, LPPassManager &LPM)
override {
1609 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1610 LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1611 ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1612 const TargetTransformInfo &
TTI =
1613 getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
1614 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
1617 ? &getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo()
1622 OptimizationRemarkEmitter ORE(&
F);
1623 bool PreserveLCSSA = mustPreserveAnalysisID(
LCSSAID);
1626 L, DT, LI, SE,
TTI, AC, ORE,
nullptr,
nullptr, PreserveLCSSA, OptLevel,
1627 false, OnlyWhenForced, ForgetAllSCEV, ProvidedCount,
1628 ProvidedThreshold, ProvidedAllowPartial, ProvidedRuntime,
1629 ProvidedUpperBound, ProvidedAllowPeeling,
1630 ProvidedAllowProfileBasedPeeling, ProvidedFullUnrollMaxCount, UI);
1632 if (Result == LoopUnrollResult::FullyUnrolled)
1635 return Result != LoopUnrollResult::Unmodified;
1640 void getAnalysisUsage(AnalysisUsage &AU)
const override {
1652char LoopUnroll::ID = 0;
1662 bool ForgetAllSCEV,
int Threshold,
int Count,
1663 int AllowPartial,
int Runtime,
int UpperBound,
1668 return new LoopUnroll(
1669 OptLevel, OnlyWhenForced, ForgetAllSCEV,
1670 Threshold == -1 ? std::nullopt : std::optional<unsigned>(Threshold),
1671 Count == -1 ? std::nullopt : std::optional<unsigned>(
Count),
1672 AllowPartial == -1 ? std::nullopt : std::optional<bool>(AllowPartial),
1674 UpperBound == -1 ? std::nullopt : std::optional<bool>(UpperBound),
1675 AllowPeeling == -1 ? std::nullopt : std::optional<bool>(AllowPeeling));
1688 Loop *ParentL = L.getParentLoop();
1695 std::string LoopName = std::string(L.getName());
1700 true, OptLevel,
true,
1701 OnlyWhenForced, ForgetSCEV, std::nullopt,
1702 std::nullopt,
false,
1733 bool IsCurrentLoopValid =
false;
1740 if (SibLoop == &L) {
1741 IsCurrentLoopValid =
true;
1750 if (!IsCurrentLoopValid) {
1783 if (
auto *LAMProxy = AM.
getCachedResult<LoopAnalysisManagerFunctionProxy>(
F))
1784 LAM = &LAMProxy->getManager();
1789 auto *BFI = (PSI && PSI->hasProfileSummary()) ?
1799 for (
const auto &L : LI) {
1810 while (!Worklist.
empty()) {
1817 Loop *ParentL = L.getParentLoop();
1823 std::optional<bool> LocalAllowPeeling = UnrollOpts.AllowPeeling;
1824 if (PSI && PSI->hasHugeWorkingSetSize())
1825 LocalAllowPeeling =
false;
1826 std::string LoopName = std::string(L.getName());
1830 &L, DT, &LI, SE,
TTI, AC, ORE, BFI, PSI,
1831 true, UnrollOpts.OptLevel,
false,
1832 UnrollOpts.OnlyWhenForced, UnrollOpts.ForgetSCEV,
1834 std::nullopt, UnrollOpts.AllowPartial,
1835 UnrollOpts.AllowRuntime, UnrollOpts.AllowUpperBound, LocalAllowPeeling,
1836 UnrollOpts.AllowProfileBasedPeeling, UnrollOpts.FullUnrollMaxCount, UI,
1848 LAM->clear(L, LoopName);
1860 OS, MapClassName2PassName);
1862 if (UnrollOpts.AllowPartial != std::nullopt)
1863 OS << (*UnrollOpts.AllowPartial ?
"" :
"no-") <<
"partial;";
1864 if (UnrollOpts.AllowPeeling != std::nullopt)
1865 OS << (*UnrollOpts.AllowPeeling ?
"" :
"no-") <<
"peeling;";
1866 if (UnrollOpts.AllowRuntime != std::nullopt)
1867 OS << (*UnrollOpts.AllowRuntime ?
"" :
"no-") <<
"runtime;";
1868 if (UnrollOpts.AllowUpperBound != std::nullopt)
1869 OS << (*UnrollOpts.AllowUpperBound ?
"" :
"no-") <<
"upperbound;";
1870 if (UnrollOpts.AllowProfileBasedPeeling != std::nullopt)
1871 OS << (*UnrollOpts.AllowProfileBasedPeeling ?
"" :
"no-")
1872 <<
"profile-peeling;";
1873 if (UnrollOpts.FullUnrollMaxCount != std::nullopt)
1874 OS <<
"full-unroll-max=" << UnrollOpts.FullUnrollMaxCount <<
';';
1875 OS <<
'O' << UnrollOpts.OptLevel;
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
This file defines DenseMapInfo traits for DenseMap.
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This header defines various interfaces for pass management in LLVM.
This header provides classes for managing per-loop analyses.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
static cl::opt< unsigned > UnrollMaxCount("unroll-max-count", cl::Hidden, cl::desc("Set the max unroll count for partial and runtime unrolling, for" "testing purposes"))
static cl::opt< unsigned > UnrollCount("unroll-count", cl::Hidden, cl::desc("Use this unroll count for all loops including those with " "unroll_count pragma values, for testing purposes"))
static cl::opt< unsigned > UnrollThresholdDefault("unroll-threshold-default", cl::init(150), cl::Hidden, cl::desc("Default threshold (max size of unrolled " "loop), used in all but O3 optimizations"))
static cl::opt< unsigned > FlatLoopTripCountThreshold("flat-loop-tripcount-threshold", cl::init(5), cl::Hidden, cl::desc("If the runtime tripcount for the loop is lower than the " "threshold, the loop is considered as flat and will be less " "aggressively unrolled."))
static cl::opt< unsigned > UnrollOptSizeThreshold("unroll-optsize-threshold", cl::init(0), cl::Hidden, cl::desc("The cost threshold for loop unrolling when optimizing for " "size"))
static bool hasUnrollFullPragma(const Loop *L)
static bool isSCEVUniform(const SCEV *S, UniformityInfo &UI)
Returns true if the SCEV expression is uniform, i.e., all threads in a convergent execution agree on ...
static cl::opt< bool > UnrollUnrollRemainder("unroll-remainder", cl::Hidden, cl::desc("Allow the loop remainder to be unrolled."))
static unsigned unrollCountPragmaValue(const Loop *L)
static bool hasUnrollEnablePragma(const Loop *L)
static cl::opt< unsigned > PragmaUnrollThreshold("pragma-unroll-threshold", cl::init(16 *1024), cl::Hidden, cl::desc("Unrolled size limit for loops with unroll metadata " "(full, enable, or count)."))
static cl::opt< unsigned > UnrollFullMaxCount("unroll-full-max-count", cl::Hidden, cl::desc("Set the max unroll count for full unrolling, for testing purposes"))
static cl::opt< unsigned > UnrollMaxUpperBound("unroll-max-upperbound", cl::init(8), cl::Hidden, cl::desc("The max of trip count upper bound that is considered in unrolling"))
static std::optional< unsigned > shouldPragmaUnroll(Loop *L, const UnrollPragmaInfo &PInfo, const unsigned TripMultiple, const unsigned TripCount, unsigned MaxTripCount, const UnrollCostEstimator UCE, const TargetTransformInfo::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
static std::optional< unsigned > shouldFullUnroll(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, const unsigned FullUnrollTripCount, const UnrollCostEstimator UCE, const TargetTransformInfo::UnrollingPreferences &UP)
static std::optional< EstimatedUnrollCost > analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize, unsigned MaxIterationsCountToAnalyze)
Figure out if the loop is worth full unrolling.
static LoopUnrollResult tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache &AC, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, bool PreserveLCSSA, int OptLevel, bool OnlyFullUnroll, bool OnlyWhenForced, bool ForgetAllSCEV, std::optional< unsigned > ProvidedCount, std::optional< unsigned > ProvidedThreshold, std::optional< bool > ProvidedAllowPartial, std::optional< bool > ProvidedRuntime, std::optional< bool > ProvidedUpperBound, std::optional< bool > ProvidedAllowPeeling, std::optional< bool > ProvidedAllowProfileBasedPeeling, std::optional< unsigned > ProvidedFullUnrollMaxCount, UniformityInfo *UI=nullptr, AAResults *AA=nullptr)
static cl::opt< unsigned > UnrollPartialThreshold("unroll-partial-threshold", cl::Hidden, cl::desc("The cost threshold for partial loop unrolling"))
static cl::opt< bool > UnrollAllowRemainder("unroll-allow-remainder", cl::Hidden, cl::desc("Allow generation of a loop remainder (extra iterations) " "when unrolling a loop."))
static std::optional< unsigned > shouldPartialUnroll(const unsigned LoopSize, const unsigned TripCount, const UnrollCostEstimator UCE, const TargetTransformInfo::UnrollingPreferences &UP)
static cl::opt< unsigned > PragmaUnrollFullMaxIterations("pragma-unroll-full-max-iterations", cl::init(1 '000 '000), cl::Hidden, cl::desc("Maximum allowed iterations to unroll under pragma unroll full."))
static const unsigned NoThreshold
A magic value for use with the Threshold parameter to indicate that the loop unroll should be perform...
static cl::opt< bool > UnrollRevisitChildLoops("unroll-revisit-child-loops", cl::Hidden, cl::desc("Enqueue and re-visit child loops in the loop PM after unrolling. " "This shouldn't typically be needed as child loops (or their " "clones) were already visited."))
static cl::opt< unsigned > UnrollThreshold("unroll-threshold", cl::Hidden, cl::desc("The cost threshold for loop unrolling"))
static cl::opt< bool > UnrollRuntime("unroll-runtime", cl::Hidden, cl::desc("Unroll loops with run-time trip counts"))
static bool hasRuntimeUnrollDisablePragma(const Loop *L)
static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost, unsigned MaxPercentThresholdBoost)
static cl::opt< unsigned > UnrollThresholdAggressive("unroll-threshold-aggressive", cl::init(300), cl::Hidden, cl::desc("Threshold (max size of unrolled loop) to use in aggressive (O3) " "optimizations"))
static cl::opt< unsigned > UnrollMaxIterationsCountToAnalyze("unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden, cl::desc("Don't allow loop unrolling to simulate more than this number of " "iterations when checking full unroll profitability"))
static cl::opt< unsigned > UnrollMaxPercentThresholdBoost("unroll-max-percent-threshold-boost", cl::init(400), cl::Hidden, cl::desc("The maximum 'boost' (represented as a percentage >= 100) applied " "to the threshold when aggressively unrolling a loop due to the " "dynamic cost savings. If completely unrolling a loop will reduce " "the total runtime from X to Y, we boost the loop unroll " "threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, " "X/Y). This limit avoids excessive code bloat."))
static cl::opt< bool > UnrollAllowPartial("unroll-allow-partial", cl::Hidden, cl::desc("Allows loops to be partially unrolled until " "-unroll-threshold loop size is reached."))
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
A manager for alias analyses.
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional Branch instruction.
This is the shared class of boolean and integer constants.
This is an important base class in LLVM.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Implements a dense probed hash-table based set.
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
void addChildLoops(ArrayRef< Loop * > NewChildLoops)
Loop passes should use this method to indicate they have added new child loops of the current loop.
void markLoopAsDeleted(Loop &L, llvm::StringRef Name)
Loop passes should use this method to indicate they have deleted a loop from the nest.
void addSiblingLoops(ArrayRef< Loop * > NewSibLoops)
Loop passes should use this method to indicate they have added new sibling loops to the current loop.
void markLoopAsDeleted(Loop &L)
Analysis pass that exposes the LoopInfo for a function.
void verifyLoop() const
Verify loop structure.
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Represents a single loop in the control flow graph.
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
const MDOperand & getOperand(unsigned I) const
unsigned getNumOperands() const
Return number of MDNode operands.
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
bool empty() const
Determine if the PriorityWorklist is empty or not.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
This class represents an analyzed expression in the program.
LLVM_ABI ArrayRef< SCEVUse > operands() const
Return operands of this SCEV expression.
Analysis pass that exposes the ScalarEvolution for a function.
The main scalar evolution driver.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
LLVM_ABI unsigned getSmallConstantTripMultiple(const Loop *L, const SCEV *ExitCount)
Returns the largest constant divisor of the trip count as a normal unsigned value,...
LLVM_ABI unsigned getSmallConstantMaxTripCount(const Loop *L, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Returns the upper bound of the loop trip count as a normal unsigned value.
LLVM_ABI bool isBackedgeTakenCountMaxOrZero(const Loop *L)
Return true if the backedge taken count is either the value returned by getConstantMaxBackedgeTakenCo...
LLVM_ABI unsigned getSmallConstantTripCount(const Loop *L)
Returns the exact trip count of the loop if we can compute it, and the result is a small constant.
size_type size() const
Determine the number of elements in the SetVector.
void clear()
Completely clear the SetVector.
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
value_type pop_back_val()
A version of PriorityWorklist that selects small size optimized data structures for the vector and ma...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
void insert_range(Range &&R)
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Analysis pass providing the TargetTransformInfo.
Produce an estimate of the unrolled cost of the specified loop.
ConvergenceKind Convergence
bool ConvergenceAllowsRuntime
LLVM_ABI uint64_t getUnrolledLoopSize(const TargetTransformInfo::UnrollingPreferences &UP, unsigned CountOverwrite=0) const
Returns loop size estimation for unrolled loop, given the unrolling configuration specified by UP.
LLVM_ABI bool canUnroll(OptimizationRemarkEmitter *ORE=nullptr, const Loop *L=nullptr) const
Whether it is legal to unroll this loop.
LLVM_ABI UnrollCostEstimator(const Loop *L, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value * > &EphValues, unsigned BEInsns, bool TripCountIsUniform=false)
unsigned NumInlineCandidates
uint64_t getRolledLoopSize() const
void visit(Iterator Start, Iterator End)
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
iterator find(const_arg_type_t< ValueT > V)
An efficient, type-erasing, non-owning reference to a callable.
This class implements an extremely fast bulk output stream that can only output to a stream.
raw_ostream & indent(unsigned NumSpaces)
indent - Insert 'NumSpaces' spaces.
Abstract Attribute helper functions.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
GenericUniformityInfo< SSAContext > UniformityInfo
LLVM_ABI std::optional< unsigned > getLoopEstimatedTripCount(Loop *L, unsigned *EstimatedLoopInvocationWeight=nullptr)
Return either:
bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
auto successors(const MachineBasicBlock *BB)
@ Runtime
Detect stack use after return if not disabled runtime with (ASAN_OPTIONS=detect_stack_use_after_retur...
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
LLVM_ABI bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
LLVM_ABI std::optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
LLVM_ABI bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
LLVM_ABI void simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const TargetTransformInfo *TTI, ArrayRef< BasicBlock * > Blocks, AAResults *AA=nullptr)
Perform some cleanup and simplifications on loops after unrolling.
LLVM_ABI Pass * createLoopUnrollPass(int OptLevel=2, bool OnlyWhenForced=false, bool ForgetAllSCEV=false, int Threshold=-1, int Count=-1, int AllowPartial=-1, int Runtime=-1, int UpperBound=-1, int AllowPeeling=-1)
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
LLVM_ABI void initializeLoopUnrollPass(PassRegistry &)
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, std::optional< bool > UserAllowPeeling, std::optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
LLVM_ABI CallBase * getLoopConvergenceHeart(const Loop *TheLoop)
Find the convergence heart of the loop.
LLVM_ABI TransformationMode hasUnrollAndJamTransformation(const Loop *L)
cl::opt< bool > ForgetSCEVInLoopUnroll
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::PeelingPreferences &PP, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache *AC=nullptr, unsigned Threshold=UINT_MAX)
LLVM_TEMPLATE_ABI void appendLoopsToWorklist(RangeT &&, SmallPriorityWorklist< Loop *, 4 > &)
Utility that implements appending of loops onto a worklist given a range.
LLVM_ABI cl::opt< unsigned > SCEVCheapExpansionBudget
FunctionAddr VTableAddr Count
LLVM_ABI TransformationMode hasUnrollTransformation(const Loop *L)
LoopUnrollResult
Represents the result of a UnrollLoop invocation.
@ PartiallyUnrolled
The loop was partially unrolled – we still have a loop, but with a smaller trip count.
@ Unmodified
The loop was not modified.
@ FullyUnrolled
The loop was fully unrolled into straight-line code.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
void peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI, ScalarEvolution *SE, DominatorTree &DT, AssumptionCache *AC, bool PreserveLCSSA, ValueToValueMapTy &VMap)
VMap is the value-map that maps instructions from the original loop to instructions in the last peele...
const char *const LLVMLoopUnrollFollowupAll
TransformationMode
The mode sets how eager a transformation should be applied.
@ TM_ForcedByUser
The transformation was directed by the user, e.g.
@ TM_Disable
The transformation should not be applied.
@ TM_Enable
The transformation should be applied without considering a cost model.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
LLVM_ABI MDNode * getUnrollMetadataForLoop(const Loop *L, StringRef Name)
DWARFExpression::Operation Op
LLVM_ABI TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, std::optional< unsigned > UserThreshold, std::optional< unsigned > UserCount, std::optional< bool > UserAllowPartial, std::optional< bool > UserRuntime, std::optional< bool > UserUpperBound, std::optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
const char *const LLVMLoopUnrollFollowupRemainder
LLVM_ABI PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
const char *const LLVMLoopUnrollFollowupUnrolled
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI LoopUnrollResult UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const llvm::TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, Loop **RemainderLoop=nullptr, AAResults *AA=nullptr)
Unroll the given loop by Count.
LLVM_ABI void computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, const UnrollCostEstimator &UCE, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP)
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Utility to calculate the size and a few similar metrics for a set of basic blocks.
static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
TargetTransformInfo & TTI
A CRTP mix-in to automatically provide informational APIs needed for passes.
const Instruction * Heart
bool RuntimeUnrollMultiExit
bool AllowExpensiveTripCount
bool AddAdditionalAccumulators
unsigned SCEVExpansionBudget
const bool PragmaFullUnroll
UnrollPragmaInfo(const Loop *L)
const unsigned PragmaCount
const bool ExplicitUnroll
const bool PragmaRuntimeUnrollDisable
const bool UserUnrollCount
const bool PragmaEnableUnroll