LLVM 20.0.0git
LoopUnrollPass.cpp
Go to the documentation of this file.
1//===- LoopUnroll.cpp - Loop unroller pass --------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass implements a simple loop unroller. It works best when loops have
10// been canonicalized by the -indvars pass, allowing it to determine the trip
11// counts of loops easily.
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/DenseMap.h"
17#include "llvm/ADT/DenseSet.h"
18#include "llvm/ADT/STLExtras.h"
19#include "llvm/ADT/SetVector.h"
22#include "llvm/ADT/StringRef.h"
35#include "llvm/IR/BasicBlock.h"
36#include "llvm/IR/CFG.h"
37#include "llvm/IR/Constant.h"
38#include "llvm/IR/Constants.h"
40#include "llvm/IR/Dominators.h"
41#include "llvm/IR/Function.h"
42#include "llvm/IR/Instruction.h"
45#include "llvm/IR/Metadata.h"
46#include "llvm/IR/PassManager.h"
48#include "llvm/Pass.h"
51#include "llvm/Support/Debug.h"
63#include <algorithm>
64#include <cassert>
65#include <cstdint>
66#include <limits>
67#include <optional>
68#include <string>
69#include <tuple>
70#include <utility>
71
72using namespace llvm;
73
74#define DEBUG_TYPE "loop-unroll"
75
77 "forget-scev-loop-unroll", cl::init(false), cl::Hidden,
78 cl::desc("Forget everything in SCEV when doing LoopUnroll, instead of just"
79 " the current top-most loop. This is sometimes preferred to reduce"
80 " compile time."));
81
83 UnrollThreshold("unroll-threshold", cl::Hidden,
84 cl::desc("The cost threshold for loop unrolling"));
85
88 "unroll-optsize-threshold", cl::init(0), cl::Hidden,
89 cl::desc("The cost threshold for loop unrolling when optimizing for "
90 "size"));
91
93 "unroll-partial-threshold", cl::Hidden,
94 cl::desc("The cost threshold for partial loop unrolling"));
95
97 "unroll-max-percent-threshold-boost", cl::init(400), cl::Hidden,
98 cl::desc("The maximum 'boost' (represented as a percentage >= 100) applied "
99 "to the threshold when aggressively unrolling a loop due to the "
100 "dynamic cost savings. If completely unrolling a loop will reduce "
101 "the total runtime from X to Y, we boost the loop unroll "
102 "threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, "
103 "X/Y). This limit avoids excessive code bloat."));
104
106 "unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden,
107 cl::desc("Don't allow loop unrolling to simulate more than this number of"
108 "iterations when checking full unroll profitability"));
109
111 "unroll-count", cl::Hidden,
112 cl::desc("Use this unroll count for all loops including those with "
113 "unroll_count pragma values, for testing purposes"));
114
116 "unroll-max-count", cl::Hidden,
117 cl::desc("Set the max unroll count for partial and runtime unrolling, for"
118 "testing purposes"));
119
121 "unroll-full-max-count", cl::Hidden,
122 cl::desc(
123 "Set the max unroll count for full unrolling, for testing purposes"));
124
125static cl::opt<bool>
126 UnrollAllowPartial("unroll-allow-partial", cl::Hidden,
127 cl::desc("Allows loops to be partially unrolled until "
128 "-unroll-threshold loop size is reached."));
129
131 "unroll-allow-remainder", cl::Hidden,
132 cl::desc("Allow generation of a loop remainder (extra iterations) "
133 "when unrolling a loop."));
134
135static cl::opt<bool>
136 UnrollRuntime("unroll-runtime", cl::Hidden,
137 cl::desc("Unroll loops with run-time trip counts"));
138
140 "unroll-max-upperbound", cl::init(8), cl::Hidden,
141 cl::desc(
142 "The max of trip count upper bound that is considered in unrolling"));
143
145 "pragma-unroll-threshold", cl::init(16 * 1024), cl::Hidden,
146 cl::desc("Unrolled size limit for loops with an unroll(full) or "
147 "unroll_count pragma."));
148
150 "flat-loop-tripcount-threshold", cl::init(5), cl::Hidden,
151 cl::desc("If the runtime tripcount for the loop is lower than the "
152 "threshold, the loop is considered as flat and will be less "
153 "aggressively unrolled."));
154
156 "unroll-remainder", cl::Hidden,
157 cl::desc("Allow the loop remainder to be unrolled."));
158
159// This option isn't ever intended to be enabled, it serves to allow
160// experiments to check the assumptions about when this kind of revisit is
161// necessary.
163 "unroll-revisit-child-loops", cl::Hidden,
164 cl::desc("Enqueue and re-visit child loops in the loop PM after unrolling. "
165 "This shouldn't typically be needed as child loops (or their "
166 "clones) were already visited."));
167
169 "unroll-threshold-aggressive", cl::init(300), cl::Hidden,
170 cl::desc("Threshold (max size of unrolled loop) to use in aggressive (O3) "
171 "optimizations"));
173 UnrollThresholdDefault("unroll-threshold-default", cl::init(150),
175 cl::desc("Default threshold (max size of unrolled "
176 "loop), used in all but O3 optimizations"));
177
179 "pragma-unroll-full-max-iterations", cl::init(1'000'000), cl::Hidden,
180 cl::desc("Maximum allowed iterations to unroll under pragma unroll full."));
181
182/// A magic value for use with the Threshold parameter to indicate
183/// that the loop unroll should be performed regardless of how much
184/// code expansion would result.
185static const unsigned NoThreshold = std::numeric_limits<unsigned>::max();
186
187/// Gather the various unrolling parameters based on the defaults, compiler
188/// flags, TTI overrides and user specified parameters.
192 OptimizationRemarkEmitter &ORE, int OptLevel,
193 std::optional<unsigned> UserThreshold, std::optional<unsigned> UserCount,
194 std::optional<bool> UserAllowPartial, std::optional<bool> UserRuntime,
195 std::optional<bool> UserUpperBound,
196 std::optional<unsigned> UserFullUnrollMaxCount) {
198
199 // Set up the defaults
200 UP.Threshold =
204 UP.PartialThreshold = 150;
206 UP.Count = 0;
208 UP.MaxCount = std::numeric_limits<unsigned>::max();
210 UP.FullUnrollMaxCount = std::numeric_limits<unsigned>::max();
211 UP.BEInsns = 2;
212 UP.Partial = false;
213 UP.Runtime = false;
214 UP.AllowRemainder = true;
215 UP.UnrollRemainder = false;
216 UP.AllowExpensiveTripCount = false;
217 UP.Force = false;
218 UP.UpperBound = false;
219 UP.UnrollAndJam = false;
223
224 // Override with any target specific settings
225 TTI.getUnrollingPreferences(L, SE, UP, &ORE);
226
227 // Apply size attributes
228 bool OptForSize = L->getHeader()->getParent()->hasOptSize() ||
229 // Let unroll hints / pragmas take precedence over PGSO.
231 llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI,
232 PGSOQueryType::IRPass));
233 if (OptForSize) {
237 }
238
239 // Apply any user values specified by cl::opt
240 if (UnrollThreshold.getNumOccurrences() > 0)
242 if (UnrollPartialThreshold.getNumOccurrences() > 0)
244 if (UnrollMaxPercentThresholdBoost.getNumOccurrences() > 0)
246 if (UnrollMaxCount.getNumOccurrences() > 0)
248 if (UnrollMaxUpperBound.getNumOccurrences() > 0)
250 if (UnrollFullMaxCount.getNumOccurrences() > 0)
252 if (UnrollAllowPartial.getNumOccurrences() > 0)
254 if (UnrollAllowRemainder.getNumOccurrences() > 0)
256 if (UnrollRuntime.getNumOccurrences() > 0)
259 UP.UpperBound = false;
260 if (UnrollUnrollRemainder.getNumOccurrences() > 0)
262 if (UnrollMaxIterationsCountToAnalyze.getNumOccurrences() > 0)
264
265 // Apply user values provided by argument
266 if (UserThreshold) {
267 UP.Threshold = *UserThreshold;
268 UP.PartialThreshold = *UserThreshold;
269 }
270 if (UserCount)
271 UP.Count = *UserCount;
272 if (UserAllowPartial)
273 UP.Partial = *UserAllowPartial;
274 if (UserRuntime)
275 UP.Runtime = *UserRuntime;
276 if (UserUpperBound)
277 UP.UpperBound = *UserUpperBound;
278 if (UserFullUnrollMaxCount)
279 UP.FullUnrollMaxCount = *UserFullUnrollMaxCount;
280
281 return UP;
282}
283
284namespace {
285
286/// A struct to densely store the state of an instruction after unrolling at
287/// each iteration.
288///
289/// This is designed to work like a tuple of <Instruction *, int> for the
290/// purposes of hashing and lookup, but to be able to associate two boolean
291/// states with each key.
292struct UnrolledInstState {
293 Instruction *I;
294 int Iteration : 30;
295 unsigned IsFree : 1;
296 unsigned IsCounted : 1;
297};
298
299/// Hashing and equality testing for a set of the instruction states.
300struct UnrolledInstStateKeyInfo {
301 using PtrInfo = DenseMapInfo<Instruction *>;
303
304 static inline UnrolledInstState getEmptyKey() {
305 return {PtrInfo::getEmptyKey(), 0, 0, 0};
306 }
307
308 static inline UnrolledInstState getTombstoneKey() {
309 return {PtrInfo::getTombstoneKey(), 0, 0, 0};
310 }
311
312 static inline unsigned getHashValue(const UnrolledInstState &S) {
313 return PairInfo::getHashValue({S.I, S.Iteration});
314 }
315
316 static inline bool isEqual(const UnrolledInstState &LHS,
317 const UnrolledInstState &RHS) {
318 return PairInfo::isEqual({LHS.I, LHS.Iteration}, {RHS.I, RHS.Iteration});
319 }
320};
321
322struct EstimatedUnrollCost {
323 /// The estimated cost after unrolling.
324 unsigned UnrolledCost;
325
326 /// The estimated dynamic cost of executing the instructions in the
327 /// rolled form.
328 unsigned RolledDynamicCost;
329};
330
331struct PragmaInfo {
332 PragmaInfo(bool UUC, bool PFU, unsigned PC, bool PEU)
333 : UserUnrollCount(UUC), PragmaFullUnroll(PFU), PragmaCount(PC),
334 PragmaEnableUnroll(PEU) {}
335 const bool UserUnrollCount;
336 const bool PragmaFullUnroll;
337 const unsigned PragmaCount;
338 const bool PragmaEnableUnroll;
339};
340
341} // end anonymous namespace
342
343/// Figure out if the loop is worth full unrolling.
344///
345/// Complete loop unrolling can make some loads constant, and we need to know
346/// if that would expose any further optimization opportunities. This routine
347/// estimates this optimization. It computes cost of unrolled loop
348/// (UnrolledCost) and dynamic cost of the original loop (RolledDynamicCost). By
349/// dynamic cost we mean that we won't count costs of blocks that are known not
350/// to be executed (i.e. if we have a branch in the loop and we know that at the
351/// given iteration its condition would be resolved to true, we won't add up the
352/// cost of the 'false'-block).
353/// \returns Optional value, holding the RolledDynamicCost and UnrolledCost. If
354/// the analysis failed (no benefits expected from the unrolling, or the loop is
355/// too big to analyze), the returned value is std::nullopt.
356static std::optional<EstimatedUnrollCost> analyzeLoopUnrollCost(
357 const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE,
358 const SmallPtrSetImpl<const Value *> &EphValues,
359 const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize,
360 unsigned MaxIterationsCountToAnalyze) {
361 // We want to be able to scale offsets by the trip count and add more offsets
362 // to them without checking for overflows, and we already don't want to
363 // analyze *massive* trip counts, so we force the max to be reasonably small.
364 assert(MaxIterationsCountToAnalyze <
365 (unsigned)(std::numeric_limits<int>::max() / 2) &&
366 "The unroll iterations max is too large!");
367
368 // Only analyze inner loops. We can't properly estimate cost of nested loops
369 // and we won't visit inner loops again anyway.
370 if (!L->isInnermost())
371 return std::nullopt;
372
373 // Don't simulate loops with a big or unknown tripcount
374 if (!TripCount || TripCount > MaxIterationsCountToAnalyze)
375 return std::nullopt;
376
379 DenseMap<Value *, Value *> SimplifiedValues;
380 SmallVector<std::pair<Value *, Value *>, 4> SimplifiedInputValues;
381
382 // The estimated cost of the unrolled form of the loop. We try to estimate
383 // this by simplifying as much as we can while computing the estimate.
384 InstructionCost UnrolledCost = 0;
385
386 // We also track the estimated dynamic (that is, actually executed) cost in
387 // the rolled form. This helps identify cases when the savings from unrolling
388 // aren't just exposing dead control flows, but actual reduced dynamic
389 // instructions due to the simplifications which we expect to occur after
390 // unrolling.
391 InstructionCost RolledDynamicCost = 0;
392
393 // We track the simplification of each instruction in each iteration. We use
394 // this to recursively merge costs into the unrolled cost on-demand so that
395 // we don't count the cost of any dead code. This is essentially a map from
396 // <instruction, int> to <bool, bool>, but stored as a densely packed struct.
398
399 // A small worklist used to accumulate cost of instructions from each
400 // observable and reached root in the loop.
402
403 // PHI-used worklist used between iterations while accumulating cost.
405
406 // Helper function to accumulate cost for instructions in the loop.
407 auto AddCostRecursively = [&](Instruction &RootI, int Iteration) {
408 assert(Iteration >= 0 && "Cannot have a negative iteration!");
409 assert(CostWorklist.empty() && "Must start with an empty cost list");
410 assert(PHIUsedList.empty() && "Must start with an empty phi used list");
411 CostWorklist.push_back(&RootI);
413 RootI.getFunction()->hasMinSize() ?
416 for (;; --Iteration) {
417 do {
418 Instruction *I = CostWorklist.pop_back_val();
419
420 // InstCostMap only uses I and Iteration as a key, the other two values
421 // don't matter here.
422 auto CostIter = InstCostMap.find({I, Iteration, 0, 0});
423 if (CostIter == InstCostMap.end())
424 // If an input to a PHI node comes from a dead path through the loop
425 // we may have no cost data for it here. What that actually means is
426 // that it is free.
427 continue;
428 auto &Cost = *CostIter;
429 if (Cost.IsCounted)
430 // Already counted this instruction.
431 continue;
432
433 // Mark that we are counting the cost of this instruction now.
434 Cost.IsCounted = true;
435
436 // If this is a PHI node in the loop header, just add it to the PHI set.
437 if (auto *PhiI = dyn_cast<PHINode>(I))
438 if (PhiI->getParent() == L->getHeader()) {
439 assert(Cost.IsFree && "Loop PHIs shouldn't be evaluated as they "
440 "inherently simplify during unrolling.");
441 if (Iteration == 0)
442 continue;
443
444 // Push the incoming value from the backedge into the PHI used list
445 // if it is an in-loop instruction. We'll use this to populate the
446 // cost worklist for the next iteration (as we count backwards).
447 if (auto *OpI = dyn_cast<Instruction>(
448 PhiI->getIncomingValueForBlock(L->getLoopLatch())))
449 if (L->contains(OpI))
450 PHIUsedList.push_back(OpI);
451 continue;
452 }
453
454 // First accumulate the cost of this instruction.
455 if (!Cost.IsFree) {
456 // Consider simplified operands in instruction cost.
458 transform(I->operands(), std::back_inserter(Operands),
459 [&](Value *Op) {
460 if (auto Res = SimplifiedValues.lookup(Op))
461 return Res;
462 return Op;
463 });
464 UnrolledCost += TTI.getInstructionCost(I, Operands, CostKind);
465 LLVM_DEBUG(dbgs() << "Adding cost of instruction (iteration "
466 << Iteration << "): ");
467 LLVM_DEBUG(I->dump());
468 }
469
470 // We must count the cost of every operand which is not free,
471 // recursively. If we reach a loop PHI node, simply add it to the set
472 // to be considered on the next iteration (backwards!).
473 for (Value *Op : I->operands()) {
474 // Check whether this operand is free due to being a constant or
475 // outside the loop.
476 auto *OpI = dyn_cast<Instruction>(Op);
477 if (!OpI || !L->contains(OpI))
478 continue;
479
480 // Otherwise accumulate its cost.
481 CostWorklist.push_back(OpI);
482 }
483 } while (!CostWorklist.empty());
484
485 if (PHIUsedList.empty())
486 // We've exhausted the search.
487 break;
488
489 assert(Iteration > 0 &&
490 "Cannot track PHI-used values past the first iteration!");
491 CostWorklist.append(PHIUsedList.begin(), PHIUsedList.end());
492 PHIUsedList.clear();
493 }
494 };
495
496 // Ensure that we don't violate the loop structure invariants relied on by
497 // this analysis.
498 assert(L->isLoopSimplifyForm() && "Must put loop into normal form first.");
499 assert(L->isLCSSAForm(DT) &&
500 "Must have loops in LCSSA form to track live-out values.");
501
502 LLVM_DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n");
503
505 L->getHeader()->getParent()->hasMinSize() ?
507 // Simulate execution of each iteration of the loop counting instructions,
508 // which would be simplified.
509 // Since the same load will take different values on different iterations,
510 // we literally have to go through all loop's iterations.
511 for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) {
512 LLVM_DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n");
513
514 // Prepare for the iteration by collecting any simplified entry or backedge
515 // inputs.
516 for (Instruction &I : *L->getHeader()) {
517 auto *PHI = dyn_cast<PHINode>(&I);
518 if (!PHI)
519 break;
520
521 // The loop header PHI nodes must have exactly two input: one from the
522 // loop preheader and one from the loop latch.
523 assert(
524 PHI->getNumIncomingValues() == 2 &&
525 "Must have an incoming value only for the preheader and the latch.");
526
527 Value *V = PHI->getIncomingValueForBlock(
528 Iteration == 0 ? L->getLoopPreheader() : L->getLoopLatch());
529 if (Iteration != 0 && SimplifiedValues.count(V))
530 V = SimplifiedValues.lookup(V);
531 SimplifiedInputValues.push_back({PHI, V});
532 }
533
534 // Now clear and re-populate the map for the next iteration.
535 SimplifiedValues.clear();
536 while (!SimplifiedInputValues.empty())
537 SimplifiedValues.insert(SimplifiedInputValues.pop_back_val());
538
539 UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SE, L);
540
541 BBWorklist.clear();
542 BBWorklist.insert(L->getHeader());
543 // Note that we *must not* cache the size, this loop grows the worklist.
544 for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
545 BasicBlock *BB = BBWorklist[Idx];
546
547 // Visit all instructions in the given basic block and try to simplify
548 // it. We don't change the actual IR, just count optimization
549 // opportunities.
550 for (Instruction &I : *BB) {
551 // These won't get into the final code - don't even try calculating the
552 // cost for them.
553 if (isa<DbgInfoIntrinsic>(I) || EphValues.count(&I))
554 continue;
555
556 // Track this instruction's expected baseline cost when executing the
557 // rolled loop form.
558 RolledDynamicCost += TTI.getInstructionCost(&I, CostKind);
559
560 // Visit the instruction to analyze its loop cost after unrolling,
561 // and if the visitor returns true, mark the instruction as free after
562 // unrolling and continue.
563 bool IsFree = Analyzer.visit(I);
564 bool Inserted = InstCostMap.insert({&I, (int)Iteration,
565 (unsigned)IsFree,
566 /*IsCounted*/ false}).second;
567 (void)Inserted;
568 assert(Inserted && "Cannot have a state for an unvisited instruction!");
569
570 if (IsFree)
571 continue;
572
573 // Can't properly model a cost of a call.
574 // FIXME: With a proper cost model we should be able to do it.
575 if (auto *CI = dyn_cast<CallInst>(&I)) {
576 const Function *Callee = CI->getCalledFunction();
577 if (!Callee || TTI.isLoweredToCall(Callee)) {
578 LLVM_DEBUG(dbgs() << "Can't analyze cost of loop with call\n");
579 return std::nullopt;
580 }
581 }
582
583 // If the instruction might have a side-effect recursively account for
584 // the cost of it and all the instructions leading up to it.
585 if (I.mayHaveSideEffects())
586 AddCostRecursively(I, Iteration);
587
588 // If unrolled body turns out to be too big, bail out.
589 if (UnrolledCost > MaxUnrolledLoopSize) {
590 LLVM_DEBUG(dbgs() << " Exceeded threshold.. exiting.\n"
591 << " UnrolledCost: " << UnrolledCost
592 << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize
593 << "\n");
594 return std::nullopt;
595 }
596 }
597
598 Instruction *TI = BB->getTerminator();
599
600 auto getSimplifiedConstant = [&](Value *V) -> Constant * {
601 if (SimplifiedValues.count(V))
602 V = SimplifiedValues.lookup(V);
603 return dyn_cast<Constant>(V);
604 };
605
606 // Add in the live successors by first checking whether we have terminator
607 // that may be simplified based on the values simplified by this call.
608 BasicBlock *KnownSucc = nullptr;
609 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
610 if (BI->isConditional()) {
611 if (auto *SimpleCond = getSimplifiedConstant(BI->getCondition())) {
612 // Just take the first successor if condition is undef
613 if (isa<UndefValue>(SimpleCond))
614 KnownSucc = BI->getSuccessor(0);
615 else if (ConstantInt *SimpleCondVal =
616 dyn_cast<ConstantInt>(SimpleCond))
617 KnownSucc = BI->getSuccessor(SimpleCondVal->isZero() ? 1 : 0);
618 }
619 }
620 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
621 if (auto *SimpleCond = getSimplifiedConstant(SI->getCondition())) {
622 // Just take the first successor if condition is undef
623 if (isa<UndefValue>(SimpleCond))
624 KnownSucc = SI->getSuccessor(0);
625 else if (ConstantInt *SimpleCondVal =
626 dyn_cast<ConstantInt>(SimpleCond))
627 KnownSucc = SI->findCaseValue(SimpleCondVal)->getCaseSuccessor();
628 }
629 }
630 if (KnownSucc) {
631 if (L->contains(KnownSucc))
632 BBWorklist.insert(KnownSucc);
633 else
634 ExitWorklist.insert({BB, KnownSucc});
635 continue;
636 }
637
638 // Add BB's successors to the worklist.
639 for (BasicBlock *Succ : successors(BB))
640 if (L->contains(Succ))
641 BBWorklist.insert(Succ);
642 else
643 ExitWorklist.insert({BB, Succ});
644 AddCostRecursively(*TI, Iteration);
645 }
646
647 // If we found no optimization opportunities on the first iteration, we
648 // won't find them on later ones too.
649 if (UnrolledCost == RolledDynamicCost) {
650 LLVM_DEBUG(dbgs() << " No opportunities found.. exiting.\n"
651 << " UnrolledCost: " << UnrolledCost << "\n");
652 return std::nullopt;
653 }
654 }
655
656 while (!ExitWorklist.empty()) {
657 BasicBlock *ExitingBB, *ExitBB;
658 std::tie(ExitingBB, ExitBB) = ExitWorklist.pop_back_val();
659
660 for (Instruction &I : *ExitBB) {
661 auto *PN = dyn_cast<PHINode>(&I);
662 if (!PN)
663 break;
664
665 Value *Op = PN->getIncomingValueForBlock(ExitingBB);
666 if (auto *OpI = dyn_cast<Instruction>(Op))
667 if (L->contains(OpI))
668 AddCostRecursively(*OpI, TripCount - 1);
669 }
670 }
671
672 assert(UnrolledCost.isValid() && RolledDynamicCost.isValid() &&
673 "All instructions must have a valid cost, whether the "
674 "loop is rolled or unrolled.");
675
676 LLVM_DEBUG(dbgs() << "Analysis finished:\n"
677 << "UnrolledCost: " << UnrolledCost << ", "
678 << "RolledDynamicCost: " << RolledDynamicCost << "\n");
679 return {{unsigned(*UnrolledCost.getValue()),
680 unsigned(*RolledDynamicCost.getValue())}};
681}
682
684 const Loop *L, const TargetTransformInfo &TTI,
685 const SmallPtrSetImpl<const Value *> &EphValues, unsigned BEInsns) {
687 for (BasicBlock *BB : L->blocks())
688 Metrics.analyzeBasicBlock(BB, TTI, EphValues, /* PrepareForLTO= */ false,
689 L);
690 NumInlineCandidates = Metrics.NumInlineCandidates;
691 NotDuplicatable = Metrics.notDuplicatable;
692 Convergence = Metrics.Convergence;
693 LoopSize = Metrics.NumInsts;
695 Metrics.Convergence != ConvergenceKind::Uncontrolled &&
697
698 // Don't allow an estimate of size zero. This would allows unrolling of loops
699 // with huge iteration counts, which is a compile time problem even if it's
700 // not a problem for code quality. Also, the code using this size may assume
701 // that each loop has at least three instructions (likely a conditional
702 // branch, a comparison feeding that branch, and some kind of loop increment
703 // feeding that comparison instruction).
704 if (LoopSize.isValid() && LoopSize < BEInsns + 1)
705 // This is an open coded max() on InstructionCost
706 LoopSize = BEInsns + 1;
707}
708
710 switch (Convergence) {
712 LLVM_DEBUG(dbgs() << " Convergence prevents unrolling.\n");
713 return false;
714 default:
715 break;
716 }
717 if (!LoopSize.isValid()) {
718 LLVM_DEBUG(dbgs() << " Invalid loop size prevents unrolling.\n");
719 return false;
720 }
721 if (NotDuplicatable) {
722 LLVM_DEBUG(dbgs() << " Non-duplicatable blocks prevent unrolling.\n");
723 return false;
724 }
725 return true;
726}
727
730 unsigned CountOverwrite) const {
731 unsigned LS = *LoopSize.getValue();
732 assert(LS >= UP.BEInsns && "LoopSize should not be less than BEInsns!");
733 if (CountOverwrite)
734 return static_cast<uint64_t>(LS - UP.BEInsns) * CountOverwrite + UP.BEInsns;
735 else
736 return static_cast<uint64_t>(LS - UP.BEInsns) * UP.Count + UP.BEInsns;
737}
738
739// Returns the loop hint metadata node with the given name (for example,
740// "llvm.loop.unroll.count"). If no such metadata node exists, then nullptr is
741// returned.
743 if (MDNode *LoopID = L->getLoopID())
744 return GetUnrollMetadata(LoopID, Name);
745 return nullptr;
746}
747
748// Returns true if the loop has an unroll(full) pragma.
749static bool hasUnrollFullPragma(const Loop *L) {
750 return getUnrollMetadataForLoop(L, "llvm.loop.unroll.full");
751}
752
753// Returns true if the loop has an unroll(enable) pragma. This metadata is used
754// for both "#pragma unroll" and "#pragma clang loop unroll(enable)" directives.
755static bool hasUnrollEnablePragma(const Loop *L) {
756 return getUnrollMetadataForLoop(L, "llvm.loop.unroll.enable");
757}
758
759// Returns true if the loop has an runtime unroll(disable) pragma.
760static bool hasRuntimeUnrollDisablePragma(const Loop *L) {
761 return getUnrollMetadataForLoop(L, "llvm.loop.unroll.runtime.disable");
762}
763
764// If loop has an unroll_count pragma return the (necessarily
765// positive) value from the pragma. Otherwise return 0.
766static unsigned unrollCountPragmaValue(const Loop *L) {
767 MDNode *MD = getUnrollMetadataForLoop(L, "llvm.loop.unroll.count");
768 if (MD) {
769 assert(MD->getNumOperands() == 2 &&
770 "Unroll count hint metadata should have two operands.");
771 unsigned Count =
772 mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
773 assert(Count >= 1 && "Unroll count must be positive.");
774 return Count;
775 }
776 return 0;
777}
778
779// Computes the boosting factor for complete unrolling.
780// If fully unrolling the loop would save a lot of RolledDynamicCost, it would
781// be beneficial to fully unroll the loop even if unrolledcost is large. We
782// use (RolledDynamicCost / UnrolledCost) to model the unroll benefits to adjust
783// the unroll threshold.
784static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost,
785 unsigned MaxPercentThresholdBoost) {
786 if (Cost.RolledDynamicCost >= std::numeric_limits<unsigned>::max() / 100)
787 return 100;
788 else if (Cost.UnrolledCost != 0)
789 // The boosting factor is RolledDynamicCost / UnrolledCost
790 return std::min(100 * Cost.RolledDynamicCost / Cost.UnrolledCost,
791 MaxPercentThresholdBoost);
792 else
793 return MaxPercentThresholdBoost;
794}
795
796static std::optional<unsigned>
797shouldPragmaUnroll(Loop *L, const PragmaInfo &PInfo,
798 const unsigned TripMultiple, const unsigned TripCount,
799 unsigned MaxTripCount, const UnrollCostEstimator UCE,
801
802 // Using unroll pragma
803 // 1st priority is unroll count set by "unroll-count" option.
804
805 if (PInfo.UserUnrollCount) {
806 if (UP.AllowRemainder &&
807 UCE.getUnrolledLoopSize(UP, (unsigned)UnrollCount) < UP.Threshold)
808 return (unsigned)UnrollCount;
809 }
810
811 // 2nd priority is unroll count set by pragma.
812 if (PInfo.PragmaCount > 0) {
813 if ((UP.AllowRemainder || (TripMultiple % PInfo.PragmaCount == 0)))
814 return PInfo.PragmaCount;
815 }
816
817 if (PInfo.PragmaFullUnroll && TripCount != 0) {
818 // Certain cases with UBSAN can cause trip count to be calculated as
819 // INT_MAX, Block full unrolling at a reasonable limit so that the compiler
820 // doesn't hang trying to unroll the loop. See PR77842
821 if (TripCount > PragmaUnrollFullMaxIterations) {
822 LLVM_DEBUG(dbgs() << "Won't unroll; trip count is too large\n");
823 return std::nullopt;
824 }
825
826 return TripCount;
827 }
828
829 if (PInfo.PragmaEnableUnroll && !TripCount && MaxTripCount &&
830 MaxTripCount <= UP.MaxUpperBound)
831 return MaxTripCount;
832
833 // if didn't return until here, should continue to other priorties
834 return std::nullopt;
835}
836
837static std::optional<unsigned> shouldFullUnroll(
840 const unsigned FullUnrollTripCount, const UnrollCostEstimator UCE,
842 assert(FullUnrollTripCount && "should be non-zero!");
843
844 if (FullUnrollTripCount > UP.FullUnrollMaxCount)
845 return std::nullopt;
846
847 // When computing the unrolled size, note that BEInsns are not replicated
848 // like the rest of the loop body.
849 if (UCE.getUnrolledLoopSize(UP) < UP.Threshold)
850 return FullUnrollTripCount;
851
852 // The loop isn't that small, but we still can fully unroll it if that
853 // helps to remove a significant number of instructions.
854 // To check that, run additional analysis on the loop.
855 if (std::optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost(
856 L, FullUnrollTripCount, DT, SE, EphValues, TTI,
859 unsigned Boost =
861 if (Cost->UnrolledCost < UP.Threshold * Boost / 100)
862 return FullUnrollTripCount;
863 }
864 return std::nullopt;
865}
866
867static std::optional<unsigned>
868shouldPartialUnroll(const unsigned LoopSize, const unsigned TripCount,
869 const UnrollCostEstimator UCE,
871
872 if (!TripCount)
873 return std::nullopt;
874
875 if (!UP.Partial) {
876 LLVM_DEBUG(dbgs() << " will not try to unroll partially because "
877 << "-unroll-allow-partial not given\n");
878 return 0;
879 }
880 unsigned count = UP.Count;
881 if (count == 0)
882 count = TripCount;
883 if (UP.PartialThreshold != NoThreshold) {
884 // Reduce unroll count to be modulo of TripCount for partial unrolling.
886 count = (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) /
887 (LoopSize - UP.BEInsns);
888 if (count > UP.MaxCount)
889 count = UP.MaxCount;
890 while (count != 0 && TripCount % count != 0)
891 count--;
892 if (UP.AllowRemainder && count <= 1) {
893 // If there is no Count that is modulo of TripCount, set Count to
894 // largest power-of-two factor that satisfies the threshold limit.
895 // As we'll create fixup loop, do the type of unrolling only if
896 // remainder loop is allowed.
898 while (count != 0 &&
900 count >>= 1;
901 }
902 if (count < 2) {
903 count = 0;
904 }
905 } else {
906 count = TripCount;
907 }
908 if (count > UP.MaxCount)
909 count = UP.MaxCount;
910
911 LLVM_DEBUG(dbgs() << " partially unrolling with count: " << count << "\n");
912
913 return count;
914}
915// Returns true if unroll count was set explicitly.
916// Calculates unroll count and writes it to UP.Count.
917// Unless IgnoreUser is true, will also use metadata and command-line options
918// that are specific to to the LoopUnroll pass (which, for instance, are
919// irrelevant for the LoopUnrollAndJam pass).
920// FIXME: This function is used by LoopUnroll and LoopUnrollAndJam, but consumes
921// many LoopUnroll-specific options. The shared functionality should be
922// refactored into it own function.
926 const SmallPtrSetImpl<const Value *> &EphValues,
927 OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount,
928 bool MaxOrZero, unsigned TripMultiple, const UnrollCostEstimator &UCE,
930 TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound) {
931
932 unsigned LoopSize = UCE.getRolledLoopSize();
933
934 const bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
935 const bool PragmaFullUnroll = hasUnrollFullPragma(L);
936 const unsigned PragmaCount = unrollCountPragmaValue(L);
937 const bool PragmaEnableUnroll = hasUnrollEnablePragma(L);
938
939 const bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
940 PragmaEnableUnroll || UserUnrollCount;
941
942 PragmaInfo PInfo(UserUnrollCount, PragmaFullUnroll, PragmaCount,
943 PragmaEnableUnroll);
944 // Use an explicit peel count that has been specified for testing. In this
945 // case it's not permitted to also specify an explicit unroll count.
946 if (PP.PeelCount) {
947 if (UnrollCount.getNumOccurrences() > 0) {
948 report_fatal_error("Cannot specify both explicit peel count and "
949 "explicit unroll count", /*GenCrashDiag=*/false);
950 }
951 UP.Count = 1;
952 UP.Runtime = false;
953 return true;
954 }
955 // Check for explicit Count.
956 // 1st priority is unroll count set by "unroll-count" option.
957 // 2nd priority is unroll count set by pragma.
958 if (auto UnrollFactor = shouldPragmaUnroll(L, PInfo, TripMultiple, TripCount,
959 MaxTripCount, UCE, UP)) {
960 UP.Count = *UnrollFactor;
961
962 if (UserUnrollCount || (PragmaCount > 0)) {
963 UP.AllowExpensiveTripCount = true;
964 UP.Force = true;
965 }
966 UP.Runtime |= (PragmaCount > 0);
967 return ExplicitUnroll;
968 } else {
969 if (ExplicitUnroll && TripCount != 0) {
970 // If the loop has an unrolling pragma, we want to be more aggressive with
971 // unrolling limits. Set thresholds to at least the PragmaUnrollThreshold
972 // value which is larger than the default limits.
973 UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold);
975 std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold);
976 }
977 }
978
979 // 3rd priority is exact full unrolling. This will eliminate all copies
980 // of some exit test.
981 UP.Count = 0;
982 if (TripCount) {
983 UP.Count = TripCount;
984 if (auto UnrollFactor = shouldFullUnroll(L, TTI, DT, SE, EphValues,
985 TripCount, UCE, UP)) {
986 UP.Count = *UnrollFactor;
987 UseUpperBound = false;
988 return ExplicitUnroll;
989 }
990 }
991
992 // 4th priority is bounded unrolling.
993 // We can unroll by the upper bound amount if it's generally allowed or if
994 // we know that the loop is executed either the upper bound or zero times.
995 // (MaxOrZero unrolling keeps only the first loop test, so the number of
996 // loop tests remains the same compared to the non-unrolled version, whereas
997 // the generic upper bound unrolling keeps all but the last loop test so the
998 // number of loop tests goes up which may end up being worse on targets with
999 // constrained branch predictor resources so is controlled by an option.)
1000 // In addition we only unroll small upper bounds.
1001 // Note that the cost of bounded unrolling is always strictly greater than
1002 // cost of exact full unrolling. As such, if we have an exact count and
1003 // found it unprofitable, we'll never chose to bounded unroll.
1004 if (!TripCount && MaxTripCount && (UP.UpperBound || MaxOrZero) &&
1005 MaxTripCount <= UP.MaxUpperBound) {
1006 UP.Count = MaxTripCount;
1007 if (auto UnrollFactor = shouldFullUnroll(L, TTI, DT, SE, EphValues,
1008 MaxTripCount, UCE, UP)) {
1009 UP.Count = *UnrollFactor;
1010 UseUpperBound = true;
1011 return ExplicitUnroll;
1012 }
1013 }
1014
1015 // 5th priority is loop peeling.
1016 computePeelCount(L, LoopSize, PP, TripCount, DT, SE, AC, UP.Threshold);
1017 if (PP.PeelCount) {
1018 UP.Runtime = false;
1019 UP.Count = 1;
1020 return ExplicitUnroll;
1021 }
1022
1023 // Before starting partial unrolling, set up.partial to true,
1024 // if user explicitly asked for unrolling
1025 if (TripCount)
1026 UP.Partial |= ExplicitUnroll;
1027
1028 // 6th priority is partial unrolling.
1029 // Try partial unroll only when TripCount could be statically calculated.
1030 if (auto UnrollFactor = shouldPartialUnroll(LoopSize, TripCount, UCE, UP)) {
1031 UP.Count = *UnrollFactor;
1032
1033 if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
1034 UP.Count != TripCount)
1035 ORE->emit([&]() {
1037 "FullUnrollAsDirectedTooLarge",
1038 L->getStartLoc(), L->getHeader())
1039 << "Unable to fully unroll loop as directed by unroll pragma "
1040 "because "
1041 "unrolled size is too large.";
1042 });
1043
1044 if (UP.PartialThreshold != NoThreshold) {
1045 if (UP.Count == 0) {
1046 if (PragmaEnableUnroll)
1047 ORE->emit([&]() {
1049 "UnrollAsDirectedTooLarge",
1050 L->getStartLoc(), L->getHeader())
1051 << "Unable to unroll loop as directed by unroll(enable) "
1052 "pragma "
1053 "because unrolled size is too large.";
1054 });
1055 }
1056 }
1057 return ExplicitUnroll;
1058 }
1059 assert(TripCount == 0 &&
1060 "All cases when TripCount is constant should be covered here.");
1061 if (PragmaFullUnroll)
1062 ORE->emit([&]() {
1064 DEBUG_TYPE, "CantFullUnrollAsDirectedRuntimeTripCount",
1065 L->getStartLoc(), L->getHeader())
1066 << "Unable to fully unroll loop as directed by unroll(full) "
1067 "pragma "
1068 "because loop has a runtime trip count.";
1069 });
1070
1071 // 7th priority is runtime unrolling.
1072 // Don't unroll a runtime trip count loop when it is disabled.
1074 UP.Count = 0;
1075 return false;
1076 }
1077
1078 // Don't unroll a small upper bound loop unless user or TTI asked to do so.
1079 if (MaxTripCount && !UP.Force && MaxTripCount < UP.MaxUpperBound) {
1080 UP.Count = 0;
1081 return false;
1082 }
1083
1084 // Check if the runtime trip count is too small when profile is available.
1085 if (L->getHeader()->getParent()->hasProfileData()) {
1086 if (auto ProfileTripCount = getLoopEstimatedTripCount(L)) {
1087 if (*ProfileTripCount < FlatLoopTripCountThreshold)
1088 return false;
1089 else
1090 UP.AllowExpensiveTripCount = true;
1091 }
1092 }
1093 UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount;
1094 if (!UP.Runtime) {
1095 LLVM_DEBUG(
1096 dbgs() << " will not try to unroll loop with runtime trip count "
1097 << "-unroll-runtime not given\n");
1098 UP.Count = 0;
1099 return false;
1100 }
1101 if (UP.Count == 0)
1103
1104 // Reduce unroll count to be the largest power-of-two factor of
1105 // the original count which satisfies the threshold limit.
1106 while (UP.Count != 0 &&
1108 UP.Count >>= 1;
1109
1110#ifndef NDEBUG
1111 unsigned OrigCount = UP.Count;
1112#endif
1113
1114 if (!UP.AllowRemainder && UP.Count != 0 && (TripMultiple % UP.Count) != 0) {
1115 while (UP.Count != 0 && TripMultiple % UP.Count != 0)
1116 UP.Count >>= 1;
1117 LLVM_DEBUG(
1118 dbgs() << "Remainder loop is restricted (that could architecture "
1119 "specific or because the loop contains a convergent "
1120 "instruction), so unroll count must divide the trip "
1121 "multiple, "
1122 << TripMultiple << ". Reducing unroll count from " << OrigCount
1123 << " to " << UP.Count << ".\n");
1124
1125 using namespace ore;
1126
1127 if (unrollCountPragmaValue(L) > 0 && !UP.AllowRemainder)
1128 ORE->emit([&]() {
1130 "DifferentUnrollCountFromDirected",
1131 L->getStartLoc(), L->getHeader())
1132 << "Unable to unroll loop the number of times directed by "
1133 "unroll_count pragma because remainder loop is restricted "
1134 "(that could architecture specific or because the loop "
1135 "contains a convergent instruction) and so must have an "
1136 "unroll "
1137 "count that divides the loop trip multiple of "
1138 << NV("TripMultiple", TripMultiple) << ". Unrolling instead "
1139 << NV("UnrollCount", UP.Count) << " time(s).";
1140 });
1141 }
1142
1143 if (UP.Count > UP.MaxCount)
1144 UP.Count = UP.MaxCount;
1145
1146 if (MaxTripCount && UP.Count > MaxTripCount)
1147 UP.Count = MaxTripCount;
1148
1149 LLVM_DEBUG(dbgs() << " runtime unrolling with count: " << UP.Count
1150 << "\n");
1151 if (UP.Count < 2)
1152 UP.Count = 0;
1153 return ExplicitUnroll;
1154}
1155
1156static LoopUnrollResult
1160 ProfileSummaryInfo *PSI, bool PreserveLCSSA, int OptLevel,
1161 bool OnlyFullUnroll, bool OnlyWhenForced, bool ForgetAllSCEV,
1162 std::optional<unsigned> ProvidedCount,
1163 std::optional<unsigned> ProvidedThreshold,
1164 std::optional<bool> ProvidedAllowPartial,
1165 std::optional<bool> ProvidedRuntime,
1166 std::optional<bool> ProvidedUpperBound,
1167 std::optional<bool> ProvidedAllowPeeling,
1168 std::optional<bool> ProvidedAllowProfileBasedPeeling,
1169 std::optional<unsigned> ProvidedFullUnrollMaxCount,
1170 AAResults *AA = nullptr) {
1171
1172 LLVM_DEBUG(dbgs() << "Loop Unroll: F["
1173 << L->getHeader()->getParent()->getName() << "] Loop %"
1174 << L->getHeader()->getName() << "\n");
1176 if (TM & TM_Disable)
1178
1179 // If this loop isn't forced to be unrolled, avoid unrolling it when the
1180 // parent loop has an explicit unroll-and-jam pragma. This is to prevent
1181 // automatic unrolling from interfering with the user requested
1182 // transformation.
1183 Loop *ParentL = L->getParentLoop();
1184 if (ParentL != nullptr &&
1187 LLVM_DEBUG(dbgs() << "Not unrolling loop since parent loop has"
1188 << " llvm.loop.unroll_and_jam.\n");
1190 }
1191
1192 // If this loop isn't forced to be unrolled, avoid unrolling it when the
1193 // loop has an explicit unroll-and-jam pragma. This is to prevent automatic
1194 // unrolling from interfering with the user requested transformation.
1197 LLVM_DEBUG(
1198 dbgs()
1199 << " Not unrolling loop since it has llvm.loop.unroll_and_jam.\n");
1201 }
1202
1203 if (!L->isLoopSimplifyForm()) {
1204 LLVM_DEBUG(
1205 dbgs() << " Not unrolling loop which is not in loop-simplify form.\n");
1207 }
1208
1209 // When automatic unrolling is disabled, do not unroll unless overridden for
1210 // this loop.
1211 if (OnlyWhenForced && !(TM & TM_Enable))
1213
1214 bool OptForSize = L->getHeader()->getParent()->hasOptSize();
1216 L, SE, TTI, BFI, PSI, ORE, OptLevel, ProvidedThreshold, ProvidedCount,
1217 ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
1218 ProvidedFullUnrollMaxCount);
1220 L, SE, TTI, ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling, true);
1221
1222 // Exit early if unrolling is disabled. For OptForSize, we pick the loop size
1223 // as threshold later on.
1224 if (UP.Threshold == 0 && (!UP.Partial || UP.PartialThreshold == 0) &&
1225 !OptForSize)
1227
1229 CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
1230
1231 UnrollCostEstimator UCE(L, TTI, EphValues, UP.BEInsns);
1232 if (!UCE.canUnroll()) {
1233 LLVM_DEBUG(dbgs() << " Loop not considered unrollable.\n");
1235 }
1236
1237 unsigned LoopSize = UCE.getRolledLoopSize();
1238 LLVM_DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
1239
1240 // When optimizing for size, use LoopSize + 1 as threshold (we use < Threshold
1241 // later), to (fully) unroll loops, if it does not increase code size.
1242 if (OptForSize)
1243 UP.Threshold = std::max(UP.Threshold, LoopSize + 1);
1244
1245 if (UCE.NumInlineCandidates != 0) {
1246 LLVM_DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
1248 }
1249
1250 // Find the smallest exact trip count for any exit. This is an upper bound
1251 // on the loop trip count, but an exit at an earlier iteration is still
1252 // possible. An unroll by the smallest exact trip count guarantees that all
1253 // branches relating to at least one exit can be eliminated. This is unlike
1254 // the max trip count, which only guarantees that the backedge can be broken.
1255 unsigned TripCount = 0;
1256 unsigned TripMultiple = 1;
1257 SmallVector<BasicBlock *, 8> ExitingBlocks;
1258 L->getExitingBlocks(ExitingBlocks);
1259 for (BasicBlock *ExitingBlock : ExitingBlocks)
1260 if (unsigned TC = SE.getSmallConstantTripCount(L, ExitingBlock))
1261 if (!TripCount || TC < TripCount)
1262 TripCount = TripMultiple = TC;
1263
1264 if (!TripCount) {
1265 // If no exact trip count is known, determine the trip multiple of either
1266 // the loop latch or the single exiting block.
1267 // TODO: Relax for multiple exits.
1268 BasicBlock *ExitingBlock = L->getLoopLatch();
1269 if (!ExitingBlock || !L->isLoopExiting(ExitingBlock))
1270 ExitingBlock = L->getExitingBlock();
1271 if (ExitingBlock)
1272 TripMultiple = SE.getSmallConstantTripMultiple(L, ExitingBlock);
1273 }
1274
1275 // If the loop contains a convergent operation, the prelude we'd add
1276 // to do the first few instructions before we hit the unrolled loop
1277 // is unsafe -- it adds a control-flow dependency to the convergent
1278 // operation. Therefore restrict remainder loop (try unrolling without).
1279 //
1280 // TODO: This is somewhat conservative; we could allow the remainder if the
1281 // trip count is uniform.
1283
1284 // Try to find the trip count upper bound if we cannot find the exact trip
1285 // count.
1286 unsigned MaxTripCount = 0;
1287 bool MaxOrZero = false;
1288 if (!TripCount) {
1289 MaxTripCount = SE.getSmallConstantMaxTripCount(L);
1290 MaxOrZero = SE.isBackedgeTakenCountMaxOrZero(L);
1291 }
1292
1293 // computeUnrollCount() decides whether it is beneficial to use upper bound to
1294 // fully unroll the loop.
1295 bool UseUpperBound = false;
1296 bool IsCountSetExplicitly = computeUnrollCount(
1297 L, TTI, DT, LI, &AC, SE, EphValues, &ORE, TripCount, MaxTripCount,
1298 MaxOrZero, TripMultiple, UCE, UP, PP, UseUpperBound);
1299 if (!UP.Count)
1301
1303
1304 if (PP.PeelCount) {
1305 assert(UP.Count == 1 && "Cannot perform peel and unroll in the same step");
1306 LLVM_DEBUG(dbgs() << "PEELING loop %" << L->getHeader()->getName()
1307 << " with iteration count " << PP.PeelCount << "!\n");
1308 ORE.emit([&]() {
1309 return OptimizationRemark(DEBUG_TYPE, "Peeled", L->getStartLoc(),
1310 L->getHeader())
1311 << " peeled loop by " << ore::NV("PeelCount", PP.PeelCount)
1312 << " iterations";
1313 });
1314
1315 ValueToValueMapTy VMap;
1316 if (peelLoop(L, PP.PeelCount, LI, &SE, DT, &AC, PreserveLCSSA, VMap)) {
1317 simplifyLoopAfterUnroll(L, true, LI, &SE, &DT, &AC, &TTI, nullptr);
1318 // If the loop was peeled, we already "used up" the profile information
1319 // we had, so we don't want to unroll or peel again.
1321 L->setLoopAlreadyUnrolled();
1323 }
1325 }
1326
1327 // Do not attempt partial/runtime unrolling in FullLoopUnrolling
1328 if (OnlyFullUnroll && (UP.Count < TripCount || UP.Count < MaxTripCount)) {
1329 LLVM_DEBUG(
1330 dbgs() << "Not attempting partial/runtime unroll in FullLoopUnroll.\n");
1332 }
1333
1334 // At this point, UP.Runtime indicates that run-time unrolling is allowed.
1335 // However, we only want to actually perform it if we don't know the trip
1336 // count and the unroll count doesn't divide the known trip multiple.
1337 // TODO: This decision should probably be pushed up into
1338 // computeUnrollCount().
1339 UP.Runtime &= TripCount == 0 && TripMultiple % UP.Count != 0;
1340
1341 // Save loop properties before it is transformed.
1342 MDNode *OrigLoopID = L->getLoopID();
1343
1344 // Unroll the loop.
1345 Loop *RemainderLoop = nullptr;
1347 ULO.Count = UP.Count;
1348 ULO.Force = UP.Force;
1351 ULO.Runtime = UP.Runtime;
1352 ULO.ForgetAllSCEV = ForgetAllSCEV;
1355 LoopUnrollResult UnrollResult = UnrollLoop(
1356 L, ULO, LI, &SE, &DT, &AC, &TTI, &ORE, PreserveLCSSA, &RemainderLoop, AA);
1357 if (UnrollResult == LoopUnrollResult::Unmodified)
1359
1360 if (RemainderLoop) {
1361 std::optional<MDNode *> RemainderLoopID =
1364 if (RemainderLoopID)
1365 RemainderLoop->setLoopID(*RemainderLoopID);
1366 }
1367
1368 if (UnrollResult != LoopUnrollResult::FullyUnrolled) {
1369 std::optional<MDNode *> NewLoopID =
1372 if (NewLoopID) {
1373 L->setLoopID(*NewLoopID);
1374
1375 // Do not setLoopAlreadyUnrolled if loop attributes have been specified
1376 // explicitly.
1377 return UnrollResult;
1378 }
1379 }
1380
1381 // If loop has an unroll count pragma or unrolled by explicitly set count
1382 // mark loop as unrolled to prevent unrolling beyond that requested.
1383 if (UnrollResult != LoopUnrollResult::FullyUnrolled && IsCountSetExplicitly)
1384 L->setLoopAlreadyUnrolled();
1385
1386 return UnrollResult;
1387}
1388
1389namespace {
1390
1391class LoopUnroll : public LoopPass {
1392public:
1393 static char ID; // Pass ID, replacement for typeid
1394
1395 int OptLevel;
1396
1397 /// If false, use a cost model to determine whether unrolling of a loop is
1398 /// profitable. If true, only loops that explicitly request unrolling via
1399 /// metadata are considered. All other loops are skipped.
1400 bool OnlyWhenForced;
1401
1402 /// If false, when SCEV is invalidated, only forget everything in the
1403 /// top-most loop (call forgetTopMostLoop), of the loop being processed.
1404 /// Otherwise, forgetAllLoops and rebuild when needed next.
1405 bool ForgetAllSCEV;
1406
1407 std::optional<unsigned> ProvidedCount;
1408 std::optional<unsigned> ProvidedThreshold;
1409 std::optional<bool> ProvidedAllowPartial;
1410 std::optional<bool> ProvidedRuntime;
1411 std::optional<bool> ProvidedUpperBound;
1412 std::optional<bool> ProvidedAllowPeeling;
1413 std::optional<bool> ProvidedAllowProfileBasedPeeling;
1414 std::optional<unsigned> ProvidedFullUnrollMaxCount;
1415
1416 LoopUnroll(int OptLevel = 2, bool OnlyWhenForced = false,
1417 bool ForgetAllSCEV = false,
1418 std::optional<unsigned> Threshold = std::nullopt,
1419 std::optional<unsigned> Count = std::nullopt,
1420 std::optional<bool> AllowPartial = std::nullopt,
1421 std::optional<bool> Runtime = std::nullopt,
1422 std::optional<bool> UpperBound = std::nullopt,
1423 std::optional<bool> AllowPeeling = std::nullopt,
1424 std::optional<bool> AllowProfileBasedPeeling = std::nullopt,
1425 std::optional<unsigned> ProvidedFullUnrollMaxCount = std::nullopt)
1426 : LoopPass(ID), OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced),
1427 ForgetAllSCEV(ForgetAllSCEV), ProvidedCount(std::move(Count)),
1428 ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial),
1429 ProvidedRuntime(Runtime), ProvidedUpperBound(UpperBound),
1430 ProvidedAllowPeeling(AllowPeeling),
1431 ProvidedAllowProfileBasedPeeling(AllowProfileBasedPeeling),
1432 ProvidedFullUnrollMaxCount(ProvidedFullUnrollMaxCount) {
1434 }
1435
1436 bool runOnLoop(Loop *L, LPPassManager &LPM) override {
1437 if (skipLoop(L))
1438 return false;
1439
1440 Function &F = *L->getHeader()->getParent();
1441
1442 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1443 LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1444 ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1445 const TargetTransformInfo &TTI =
1446 getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1447 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1448 // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
1449 // pass. Function analyses need to be preserved across loop transformations
1450 // but ORE cannot be preserved (see comment before the pass definition).
1452 bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
1453
1455 L, DT, LI, SE, TTI, AC, ORE, nullptr, nullptr, PreserveLCSSA, OptLevel,
1456 /*OnlyFullUnroll*/ false, OnlyWhenForced, ForgetAllSCEV, ProvidedCount,
1457 ProvidedThreshold, ProvidedAllowPartial, ProvidedRuntime,
1458 ProvidedUpperBound, ProvidedAllowPeeling,
1459 ProvidedAllowProfileBasedPeeling, ProvidedFullUnrollMaxCount);
1460
1461 if (Result == LoopUnrollResult::FullyUnrolled)
1462 LPM.markLoopAsDeleted(*L);
1463
1464 return Result != LoopUnrollResult::Unmodified;
1465 }
1466
1467 /// This transformation requires natural loop information & requires that
1468 /// loop preheaders be inserted into the CFG...
1469 void getAnalysisUsage(AnalysisUsage &AU) const override {
1472 // FIXME: Loop passes are required to preserve domtree, and for now we just
1473 // recreate dom info if anything gets unrolled.
1475 }
1476};
1477
1478} // end anonymous namespace
1479
1480char LoopUnroll::ID = 0;
1481
1482INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
1486INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
1487
1488Pass *llvm::createLoopUnrollPass(int OptLevel, bool OnlyWhenForced,
1489 bool ForgetAllSCEV, int Threshold, int Count,
1490 int AllowPartial, int Runtime, int UpperBound,
1491 int AllowPeeling) {
1492 // TODO: It would make more sense for this function to take the optionals
1493 // directly, but that's dangerous since it would silently break out of tree
1494 // callers.
1495 return new LoopUnroll(
1496 OptLevel, OnlyWhenForced, ForgetAllSCEV,
1497 Threshold == -1 ? std::nullopt : std::optional<unsigned>(Threshold),
1498 Count == -1 ? std::nullopt : std::optional<unsigned>(Count),
1499 AllowPartial == -1 ? std::nullopt : std::optional<bool>(AllowPartial),
1500 Runtime == -1 ? std::nullopt : std::optional<bool>(Runtime),
1501 UpperBound == -1 ? std::nullopt : std::optional<bool>(UpperBound),
1502 AllowPeeling == -1 ? std::nullopt : std::optional<bool>(AllowPeeling));
1503}
1504
1507 LPMUpdater &Updater) {
1508 // For the new PM, we can't use OptimizationRemarkEmitter as an analysis
1509 // pass. Function analyses need to be preserved across loop transformations
1510 // but ORE cannot be preserved (see comment before the pass definition).
1511 OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
1512
1513 // Keep track of the previous loop structure so we can identify new loops
1514 // created by unrolling.
1515 Loop *ParentL = L.getParentLoop();
1516 SmallPtrSet<Loop *, 4> OldLoops;
1517 if (ParentL)
1518 OldLoops.insert(ParentL->begin(), ParentL->end());
1519 else
1520 OldLoops.insert(AR.LI.begin(), AR.LI.end());
1521
1522 std::string LoopName = std::string(L.getName());
1523
1524 bool Changed =
1525 tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, ORE,
1526 /*BFI*/ nullptr, /*PSI*/ nullptr,
1527 /*PreserveLCSSA*/ true, OptLevel, /*OnlyFullUnroll*/ true,
1528 OnlyWhenForced, ForgetSCEV, /*Count*/ std::nullopt,
1529 /*Threshold*/ std::nullopt, /*AllowPartial*/ false,
1530 /*Runtime*/ false, /*UpperBound*/ false,
1531 /*AllowPeeling*/ true,
1532 /*AllowProfileBasedPeeling*/ false,
1533 /*FullUnrollMaxCount*/ std::nullopt) !=
1535 if (!Changed)
1536 return PreservedAnalyses::all();
1537
1538 // The parent must not be damaged by unrolling!
1539#ifndef NDEBUG
1540 if (ParentL)
1541 ParentL->verifyLoop();
1542#endif
1543
1544 // Unrolling can do several things to introduce new loops into a loop nest:
1545 // - Full unrolling clones child loops within the current loop but then
1546 // removes the current loop making all of the children appear to be new
1547 // sibling loops.
1548 //
1549 // When a new loop appears as a sibling loop after fully unrolling,
1550 // its nesting structure has fundamentally changed and we want to revisit
1551 // it to reflect that.
1552 //
1553 // When unrolling has removed the current loop, we need to tell the
1554 // infrastructure that it is gone.
1555 //
1556 // Finally, we support a debugging/testing mode where we revisit child loops
1557 // as well. These are not expected to require further optimizations as either
1558 // they or the loop they were cloned from have been directly visited already.
1559 // But the debugging mode allows us to check this assumption.
1560 bool IsCurrentLoopValid = false;
1561 SmallVector<Loop *, 4> SibLoops;
1562 if (ParentL)
1563 SibLoops.append(ParentL->begin(), ParentL->end());
1564 else
1565 SibLoops.append(AR.LI.begin(), AR.LI.end());
1566 erase_if(SibLoops, [&](Loop *SibLoop) {
1567 if (SibLoop == &L) {
1568 IsCurrentLoopValid = true;
1569 return true;
1570 }
1571
1572 // Otherwise erase the loop from the list if it was in the old loops.
1573 return OldLoops.contains(SibLoop);
1574 });
1575 Updater.addSiblingLoops(SibLoops);
1576
1577 if (!IsCurrentLoopValid) {
1578 Updater.markLoopAsDeleted(L, LoopName);
1579 } else {
1580 // We can only walk child loops if the current loop remained valid.
1582 // Walk *all* of the child loops.
1583 SmallVector<Loop *, 4> ChildLoops(L.begin(), L.end());
1584 Updater.addChildLoops(ChildLoops);
1585 }
1586 }
1587
1589}
1590
1593 auto &LI = AM.getResult<LoopAnalysis>(F);
1594 // There are no loops in the function. Return before computing other expensive
1595 // analyses.
1596 if (LI.empty())
1597 return PreservedAnalyses::all();
1598 auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
1599 auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1600 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1601 auto &AC = AM.getResult<AssumptionAnalysis>(F);
1603 AAResults &AA = AM.getResult<AAManager>(F);
1604
1605 LoopAnalysisManager *LAM = nullptr;
1606 if (auto *LAMProxy = AM.getCachedResult<LoopAnalysisManagerFunctionProxy>(F))
1607 LAM = &LAMProxy->getManager();
1608
1609 auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
1610 ProfileSummaryInfo *PSI =
1611 MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
1612 auto *BFI = (PSI && PSI->hasProfileSummary()) ?
1613 &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
1614
1615 bool Changed = false;
1616
1617 // The unroller requires loops to be in simplified form, and also needs LCSSA.
1618 // Since simplification may add new inner loops, it has to run before the
1619 // legality and profitability checks. This means running the loop unroller
1620 // will simplify all loops, regardless of whether anything end up being
1621 // unrolled.
1622 for (const auto &L : LI) {
1623 Changed |=
1624 simplifyLoop(L, &DT, &LI, &SE, &AC, nullptr, false /* PreserveLCSSA */);
1625 Changed |= formLCSSARecursively(*L, DT, &LI, &SE);
1626 }
1627
1628 // Add the loop nests in the reverse order of LoopInfo. See method
1629 // declaration.
1631 appendLoopsToWorklist(LI, Worklist);
1632
1633 while (!Worklist.empty()) {
1634 // Because the LoopInfo stores the loops in RPO, we walk the worklist
1635 // from back to front so that we work forward across the CFG, which
1636 // for unrolling is only needed to get optimization remarks emitted in
1637 // a forward order.
1638 Loop &L = *Worklist.pop_back_val();
1639#ifndef NDEBUG
1640 Loop *ParentL = L.getParentLoop();
1641#endif
1642
1643 // Check if the profile summary indicates that the profiled application
1644 // has a huge working set size, in which case we disable peeling to avoid
1645 // bloating it further.
1646 std::optional<bool> LocalAllowPeeling = UnrollOpts.AllowPeeling;
1647 if (PSI && PSI->hasHugeWorkingSetSize())
1648 LocalAllowPeeling = false;
1649 std::string LoopName = std::string(L.getName());
1650 // The API here is quite complex to call and we allow to select some
1651 // flavors of unrolling during construction time (by setting UnrollOpts).
1653 &L, DT, &LI, SE, TTI, AC, ORE, BFI, PSI,
1654 /*PreserveLCSSA*/ true, UnrollOpts.OptLevel, /*OnlyFullUnroll*/ false,
1655 UnrollOpts.OnlyWhenForced, UnrollOpts.ForgetSCEV,
1656 /*Count*/ std::nullopt,
1657 /*Threshold*/ std::nullopt, UnrollOpts.AllowPartial,
1658 UnrollOpts.AllowRuntime, UnrollOpts.AllowUpperBound, LocalAllowPeeling,
1659 UnrollOpts.AllowProfileBasedPeeling, UnrollOpts.FullUnrollMaxCount,
1660 &AA);
1661 Changed |= Result != LoopUnrollResult::Unmodified;
1662
1663 // The parent must not be damaged by unrolling!
1664#ifndef NDEBUG
1665 if (Result != LoopUnrollResult::Unmodified && ParentL)
1666 ParentL->verifyLoop();
1667#endif
1668
1669 // Clear any cached analysis results for L if we removed it completely.
1670 if (LAM && Result == LoopUnrollResult::FullyUnrolled)
1671 LAM->clear(L, LoopName);
1672 }
1673
1674 if (!Changed)
1675 return PreservedAnalyses::all();
1676
1678}
1679
1681 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
1682 static_cast<PassInfoMixin<LoopUnrollPass> *>(this)->printPipeline(
1683 OS, MapClassName2PassName);
1684 OS << '<';
1685 if (UnrollOpts.AllowPartial != std::nullopt)
1686 OS << (*UnrollOpts.AllowPartial ? "" : "no-") << "partial;";
1687 if (UnrollOpts.AllowPeeling != std::nullopt)
1688 OS << (*UnrollOpts.AllowPeeling ? "" : "no-") << "peeling;";
1689 if (UnrollOpts.AllowRuntime != std::nullopt)
1690 OS << (*UnrollOpts.AllowRuntime ? "" : "no-") << "runtime;";
1691 if (UnrollOpts.AllowUpperBound != std::nullopt)
1692 OS << (*UnrollOpts.AllowUpperBound ? "" : "no-") << "upperbound;";
1693 if (UnrollOpts.AllowProfileBasedPeeling != std::nullopt)
1694 OS << (*UnrollOpts.AllowProfileBasedPeeling ? "" : "no-")
1695 << "profile-peeling;";
1696 if (UnrollOpts.FullUnrollMaxCount != std::nullopt)
1697 OS << "full-unroll-max=" << UnrollOpts.FullUnrollMaxCount << ';';
1698 OS << 'O' << UnrollOpts.OptLevel;
1699 OS << '>';
1700}
Rewrite undef for PHI
static bool isEqual(const Function &Caller, const Function &Callee)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file defines DenseMapInfo traits for DenseMap.
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
std::string Name
#define DEBUG_TYPE
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This header defines various interfaces for pass management in LLVM.
This header provides classes for managing per-loop analyses.
loops
Definition: LoopInfo.cpp:1209
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
static MDNode * getUnrollMetadataForLoop(const Loop *L, StringRef Name)
static cl::opt< unsigned > UnrollMaxCount("unroll-max-count", cl::Hidden, cl::desc("Set the max unroll count for partial and runtime unrolling, for" "testing purposes"))
static cl::opt< unsigned > UnrollCount("unroll-count", cl::Hidden, cl::desc("Use this unroll count for all loops including those with " "unroll_count pragma values, for testing purposes"))
static cl::opt< unsigned > UnrollThresholdDefault("unroll-threshold-default", cl::init(150), cl::Hidden, cl::desc("Default threshold (max size of unrolled " "loop), used in all but O3 optimizations"))
static cl::opt< unsigned > FlatLoopTripCountThreshold("flat-loop-tripcount-threshold", cl::init(5), cl::Hidden, cl::desc("If the runtime tripcount for the loop is lower than the " "threshold, the loop is considered as flat and will be less " "aggressively unrolled."))
static cl::opt< unsigned > UnrollMaxIterationsCountToAnalyze("unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden, cl::desc("Don't allow loop unrolling to simulate more than this number of" "iterations when checking full unroll profitability"))
static cl::opt< unsigned > UnrollOptSizeThreshold("unroll-optsize-threshold", cl::init(0), cl::Hidden, cl::desc("The cost threshold for loop unrolling when optimizing for " "size"))
static bool hasUnrollFullPragma(const Loop *L)
static cl::opt< bool > UnrollUnrollRemainder("unroll-remainder", cl::Hidden, cl::desc("Allow the loop remainder to be unrolled."))
static unsigned unrollCountPragmaValue(const Loop *L)
static bool hasUnrollEnablePragma(const Loop *L)
static cl::opt< unsigned > UnrollFullMaxCount("unroll-full-max-count", cl::Hidden, cl::desc("Set the max unroll count for full unrolling, for testing purposes"))
static cl::opt< unsigned > UnrollMaxUpperBound("unroll-max-upperbound", cl::init(8), cl::Hidden, cl::desc("The max of trip count upper bound that is considered in unrolling"))
static std::optional< unsigned > shouldFullUnroll(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, const unsigned FullUnrollTripCount, const UnrollCostEstimator UCE, const TargetTransformInfo::UnrollingPreferences &UP)
static std::optional< EstimatedUnrollCost > analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize, unsigned MaxIterationsCountToAnalyze)
Figure out if the loop is worth full unrolling.
static cl::opt< unsigned > UnrollPartialThreshold("unroll-partial-threshold", cl::Hidden, cl::desc("The cost threshold for partial loop unrolling"))
static cl::opt< bool > UnrollAllowRemainder("unroll-allow-remainder", cl::Hidden, cl::desc("Allow generation of a loop remainder (extra iterations) " "when unrolling a loop."))
static std::optional< unsigned > shouldPartialUnroll(const unsigned LoopSize, const unsigned TripCount, const UnrollCostEstimator UCE, const TargetTransformInfo::UnrollingPreferences &UP)
static cl::opt< unsigned > PragmaUnrollFullMaxIterations("pragma-unroll-full-max-iterations", cl::init(1 '000 '000), cl::Hidden, cl::desc("Maximum allowed iterations to unroll under pragma unroll full."))
static const unsigned NoThreshold
A magic value for use with the Threshold parameter to indicate that the loop unroll should be perform...
static std::optional< unsigned > shouldPragmaUnroll(Loop *L, const PragmaInfo &PInfo, const unsigned TripMultiple, const unsigned TripCount, unsigned MaxTripCount, const UnrollCostEstimator UCE, const TargetTransformInfo::UnrollingPreferences &UP)
static cl::opt< bool > UnrollRevisitChildLoops("unroll-revisit-child-loops", cl::Hidden, cl::desc("Enqueue and re-visit child loops in the loop PM after unrolling. " "This shouldn't typically be needed as child loops (or their " "clones) were already visited."))
static cl::opt< unsigned > UnrollThreshold("unroll-threshold", cl::Hidden, cl::desc("The cost threshold for loop unrolling"))
static cl::opt< bool > UnrollRuntime("unroll-runtime", cl::Hidden, cl::desc("Unroll loops with run-time trip counts"))
static LoopUnrollResult tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache &AC, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, bool PreserveLCSSA, int OptLevel, bool OnlyFullUnroll, bool OnlyWhenForced, bool ForgetAllSCEV, std::optional< unsigned > ProvidedCount, std::optional< unsigned > ProvidedThreshold, std::optional< bool > ProvidedAllowPartial, std::optional< bool > ProvidedRuntime, std::optional< bool > ProvidedUpperBound, std::optional< bool > ProvidedAllowPeeling, std::optional< bool > ProvidedAllowProfileBasedPeeling, std::optional< unsigned > ProvidedFullUnrollMaxCount, AAResults *AA=nullptr)
static bool hasRuntimeUnrollDisablePragma(const Loop *L)
static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost, unsigned MaxPercentThresholdBoost)
static cl::opt< unsigned > UnrollThresholdAggressive("unroll-threshold-aggressive", cl::init(300), cl::Hidden, cl::desc("Threshold (max size of unrolled loop) to use in aggressive (O3) " "optimizations"))
static cl::opt< unsigned > UnrollMaxPercentThresholdBoost("unroll-max-percent-threshold-boost", cl::init(400), cl::Hidden, cl::desc("The maximum 'boost' (represented as a percentage >= 100) applied " "to the threshold when aggressively unrolling a loop due to the " "dynamic cost savings. If completely unrolling a loop will reduce " "the total runtime from X to Y, we boost the loop unroll " "threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, " "X/Y). This limit avoids excessive code bloat."))
static cl::opt< unsigned > PragmaUnrollThreshold("pragma-unroll-threshold", cl::init(16 *1024), cl::Hidden, cl::desc("Unrolled size limit for loops with an unroll(full) or " "unroll_count pragma."))
static cl::opt< bool > UnrollAllowPartial("unroll-allow-partial", cl::Hidden, cl::desc("Allows loops to be partially unrolled until " "-unroll-threshold loop size is reached."))
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
Machine Trace Metrics
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
This file contains the declarations for metadata subclasses.
LoopAnalysisManager LAM
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
A manager for alias analyses.
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
void clear(IRUnitT &IR, llvm::StringRef Name)
Clear any cached analysis results for a single unit of IR.
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:429
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:410
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:194
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:152
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
Implements a dense probed hash-table based set.
Definition: DenseSet.h:278
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:704
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:567
std::optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:72
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
void addChildLoops(ArrayRef< Loop * > NewChildLoops)
Loop passes should use this method to indicate they have added new child loops of the current loop.
void markLoopAsDeleted(Loop &L, llvm::StringRef Name)
Loop passes should use this method to indicate they have deleted a loop from the nest.
void addSiblingLoops(ArrayRef< Loop * > NewSibLoops)
Loop passes should use this method to indicate they have added new sibling loops to the current loop.
void markLoopAsDeleted(Loop &L)
Definition: LoopPass.cpp:111
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:566
void verifyLoop() const
Verify loop structure.
iterator end() const
iterator begin() const
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
iterator end() const
iterator begin() const
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:526
Metadata node.
Definition: Metadata.h:1069
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1430
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1436
The optimization diagnostic interface.
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
Diagnostic information for applied optimization remarks.
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:692
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:94
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
bool empty() const
Determine if the PriorityWorklist is empty or not.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
Analysis pass that exposes the ScalarEvolution for a function.
The main scalar evolution driver.
unsigned getSmallConstantTripMultiple(const Loop *L, const SCEV *ExitCount)
Returns the largest constant divisor of the trip count as a normal unsigned value,...
unsigned getSmallConstantMaxTripCount(const Loop *L, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Returns the upper bound of the loop trip count as a normal unsigned value.
bool isBackedgeTakenCountMaxOrZero(const Loop *L)
Return true if the backedge taken count is either the value returned by getConstantMaxBackedgeTakenCo...
unsigned getSmallConstantTripCount(const Loop *L)
Returns the exact trip count of the loop if we can compute it, and the result is a small constant.
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
void clear()
Completely clear the SetVector.
Definition: SetVector.h:273
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
value_type pop_back_val()
Definition: SetVector.h:285
A version of PriorityWorklist that selects small size optimized data structures for the vector and ma...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:363
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:452
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:458
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
bool empty() const
Definition: SmallVector.h:81
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:683
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
Multiway switch.
Analysis pass providing the TargetTransformInfo.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const
Get target-customized preferences for the generic loop unrolling transformation.
TargetCostKind
The kind of cost model.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
Produce an estimate of the unrolled cost of the specified loop.
Definition: UnrollLoop.h:128
ConvergenceKind Convergence
Definition: UnrollLoop.h:134
uint64_t getUnrolledLoopSize(const TargetTransformInfo::UnrollingPreferences &UP, unsigned CountOverwrite=0) const
Returns loop size estimation for unrolled loop, given the unrolling configuration specified by UP.
bool canUnroll() const
Whether it is legal to unroll this loop.
UnrollCostEstimator(const Loop *L, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value * > &EphValues, unsigned BEInsns)
uint64_t getRolledLoopSize() const
Definition: UnrollLoop.h:144
LLVM Value Representation.
Definition: Value.h:74
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:213
iterator find(const_arg_type_t< ValueT > V)
Definition: DenseSet.h:187
An efficient, type-erasing, non-owning reference to a callable.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
std::optional< unsigned > getLoopEstimatedTripCount(Loop *L, unsigned *EstimatedLoopInvocationWeight=nullptr)
Returns a loop's estimated trip count based on branch weight metadata.
Definition: LoopUtils.cpp:850
void simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const TargetTransformInfo *TTI, AAResults *AA=nullptr)
Perform some cleanup and simplifications on loops after unrolling.
Definition: LoopUnroll.cpp:342
void initializeLoopUnrollPass(PassRegistry &)
void computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::PeelingPreferences &PP, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE, AssumptionCache *AC=nullptr, unsigned Threshold=UINT_MAX)
Definition: LoopPeel.cpp:536
auto successors(const MachineBasicBlock *BB)
@ Runtime
Detect stack use after return if not disabled runtime with (ASAN_OPTIONS=detect_stack_use_after_retur...
bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
Definition: LCSSA.cpp:465
std::optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
Definition: LoopUtils.cpp:263
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
char & LCSSAID
Definition: LCSSA.cpp:542
Pass * createLoopUnrollPass(int OptLevel=2, bool OnlyWhenForced=false, bool ForgetAllSCEV=false, int Threshold=-1, int Count=-1, int AllowPartial=-1, int Runtime=-1, int UpperBound=-1, int AllowPeeling=-1)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1952
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, std::optional< bool > UserAllowPeeling, std::optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
Definition: LoopPeel.cpp:870
CallBase * getLoopConvergenceHeart(const Loop *TheLoop)
Find the convergence heart of the loop.
Definition: LoopInfo.cpp:1120
TransformationMode hasUnrollAndJamTransformation(const Loop *L)
Definition: LoopUtils.cpp:373
cl::opt< bool > ForgetSCEVInLoopUnroll
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
cl::opt< unsigned > SCEVCheapExpansionBudget
TransformationMode hasUnrollTransformation(const Loop *L)
Definition: LoopUtils.cpp:352
LoopUnrollResult
Represents the result of a UnrollLoop invocation.
Definition: UnrollLoop.h:56
@ PartiallyUnrolled
The loop was partially unrolled – we still have a loop, but with a smaller trip count.
@ Unmodified
The loop was not modified.
@ FullyUnrolled
The loop was fully unrolled into straight-line code.
bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, const UnrollCostEstimator &UCE, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound)
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
Definition: LoopUtils.cpp:141
const char *const LLVMLoopUnrollFollowupAll
Definition: UnrollLoop.h:44
TransformationMode
The mode sets how eager a transformation should be applied.
Definition: LoopUtils.h:277
@ TM_ForcedByUser
The transformation was directed by the user, e.g.
Definition: LoopUtils.h:294
@ TM_Disable
The transformation should not be applied.
Definition: LoopUtils.h:286
@ TM_Enable
The transformation should be applied without considering a cost model.
Definition: LoopUtils.h:283
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1938
void appendLoopsToWorklist(RangeT &&, SmallPriorityWorklist< Loop *, 4 > &)
Utility that implements appending of loops onto a worklist given a range.
Definition: LoopUtils.cpp:1814
TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, std::optional< unsigned > UserThreshold, std::optional< unsigned > UserCount, std::optional< bool > UserAllowPartial, std::optional< bool > UserRuntime, std::optional< bool > UserUpperBound, std::optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1873
const char *const LLVMLoopUnrollFollowupRemainder
Definition: UnrollLoop.h:47
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
const char *const LLVMLoopUnrollFollowupUnrolled
Definition: UnrollLoop.h:45
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2099
MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
LoopUnrollResult UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const llvm::TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, Loop **RemainderLoop=nullptr, AAResults *AA=nullptr)
Unroll the given loop by Count.
Definition: LoopUnroll.cpp:458
bool peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, DominatorTree &DT, AssumptionCache *AC, bool PreserveLCSSA, ValueToValueMapTy &VMap)
VMap is the value-map that maps instructions from the original loop to instructions in the last peele...
Definition: LoopPeel.cpp:914
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
Utility to calculate the size and a few similar metrics for a set of basic blocks.
Definition: CodeMetrics.h:33
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition: CodeMetrics.cpp:71
An information struct used to provide DenseMap with the various necessary components for a given valu...
Definition: DenseMapInfo.h:52
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
bool OnlyWhenForced
If false, use a cost model to determine whether unrolling of a loop is profitable.
const bool ForgetSCEV
If true, forget all loops when unrolling.
std::optional< unsigned > FullUnrollMaxCount
std::optional< bool > AllowPartial
std::optional< bool > AllowRuntime
std::optional< bool > AllowProfileBasedPeeling
std::optional< bool > AllowPeeling
std::optional< bool > AllowUpperBound
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition: PassManager.h:69
bool PeelProfiledIterations
Allow peeling basing on profile.
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
Parameters that control the generic loop unrolling transformation.
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
bool UpperBound
Allow using trip count upper bound to unroll loops.
unsigned Threshold
The cost threshold for the unrolled loop.
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
unsigned DefaultUnrollRuntimeCount
Default unroll count for loops with run-time trip count.
unsigned MaxPercentThresholdBoost
If complete unrolling will reduce the cost of the loop, we will boost the Threshold by a certain perc...
unsigned SCEVExpansionBudget
Don't allow runtime unrolling if expanding the trip count takes more than SCEVExpansionBudget.
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
unsigned MaxIterationsCountToAnalyze
Don't allow loop unrolling to simulate more than this number of iterations when checking full unroll ...
bool AllowRemainder
Allow generation of a loop remainder (extra iterations after unroll).
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
unsigned FullUnrollMaxCount
Set the maximum unrolling factor for full unrolling.
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
unsigned MaxUpperBound
Set the maximum upper bound of trip count.
const Instruction * Heart
Definition: UnrollLoop.h:77
unsigned SCEVExpansionBudget
Definition: UnrollLoop.h:78