LLVM  8.0.0svn
LoopUnrollPass.cpp
Go to the documentation of this file.
1 //===- LoopUnroll.cpp - Loop unroller pass --------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass implements a simple loop unroller. It works best when loops have
11 // been canonicalized by the -indvars pass, allowing it to determine the trip
12 // counts of loops easily.
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/DenseMapInfo.h"
18 #include "llvm/ADT/DenseSet.h"
19 #include "llvm/ADT/None.h"
20 #include "llvm/ADT/Optional.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SetVector.h"
23 #include "llvm/ADT/SmallPtrSet.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include "llvm/ADT/StringRef.h"
29 #include "llvm/Analysis/LoopInfo.h"
30 #include "llvm/Analysis/LoopPass.h"
36 #include "llvm/IR/BasicBlock.h"
37 #include "llvm/IR/CFG.h"
38 #include "llvm/IR/Constant.h"
39 #include "llvm/IR/Constants.h"
40 #include "llvm/IR/DiagnosticInfo.h"
41 #include "llvm/IR/Dominators.h"
42 #include "llvm/IR/Function.h"
43 #include "llvm/IR/Instruction.h"
44 #include "llvm/IR/Instructions.h"
45 #include "llvm/IR/IntrinsicInst.h"
46 #include "llvm/IR/Metadata.h"
47 #include "llvm/IR/PassManager.h"
48 #include "llvm/Pass.h"
49 #include "llvm/Support/Casting.h"
51 #include "llvm/Support/Debug.h"
54 #include "llvm/Transforms/Scalar.h"
56 #include "llvm/Transforms/Utils.h"
60 #include <algorithm>
61 #include <cassert>
62 #include <cstdint>
63 #include <limits>
64 #include <string>
65 #include <tuple>
66 #include <utility>
67 
68 using namespace llvm;
69 
70 #define DEBUG_TYPE "loop-unroll"
71 
72 static cl::opt<unsigned>
73  UnrollThreshold("unroll-threshold", cl::Hidden,
74  cl::desc("The cost threshold for loop unrolling"));
75 
77  "unroll-partial-threshold", cl::Hidden,
78  cl::desc("The cost threshold for partial loop unrolling"));
79 
81  "unroll-max-percent-threshold-boost", cl::init(400), cl::Hidden,
82  cl::desc("The maximum 'boost' (represented as a percentage >= 100) applied "
83  "to the threshold when aggressively unrolling a loop due to the "
84  "dynamic cost savings. If completely unrolling a loop will reduce "
85  "the total runtime from X to Y, we boost the loop unroll "
86  "threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, "
87  "X/Y). This limit avoids excessive code bloat."));
88 
90  "unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden,
91  cl::desc("Don't allow loop unrolling to simulate more than this number of"
92  "iterations when checking full unroll profitability"));
93 
95  "unroll-count", cl::Hidden,
96  cl::desc("Use this unroll count for all loops including those with "
97  "unroll_count pragma values, for testing purposes"));
98 
100  "unroll-max-count", cl::Hidden,
101  cl::desc("Set the max unroll count for partial and runtime unrolling, for"
102  "testing purposes"));
103 
105  "unroll-full-max-count", cl::Hidden,
106  cl::desc(
107  "Set the max unroll count for full unrolling, for testing purposes"));
108 
110  "unroll-peel-count", cl::Hidden,
111  cl::desc("Set the unroll peeling count, for testing purposes"));
112 
113 static cl::opt<bool>
114  UnrollAllowPartial("unroll-allow-partial", cl::Hidden,
115  cl::desc("Allows loops to be partially unrolled until "
116  "-unroll-threshold loop size is reached."));
117 
119  "unroll-allow-remainder", cl::Hidden,
120  cl::desc("Allow generation of a loop remainder (extra iterations) "
121  "when unrolling a loop."));
122 
123 static cl::opt<bool>
124  UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden,
125  cl::desc("Unroll loops with run-time trip counts"));
126 
128  "unroll-max-upperbound", cl::init(8), cl::Hidden,
129  cl::desc(
130  "The max of trip count upper bound that is considered in unrolling"));
131 
133  "pragma-unroll-threshold", cl::init(16 * 1024), cl::Hidden,
134  cl::desc("Unrolled size limit for loops with an unroll(full) or "
135  "unroll_count pragma."));
136 
138  "flat-loop-tripcount-threshold", cl::init(5), cl::Hidden,
139  cl::desc("If the runtime tripcount for the loop is lower than the "
140  "threshold, the loop is considered as flat and will be less "
141  "aggressively unrolled."));
142 
143 static cl::opt<bool>
144  UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden,
145  cl::desc("Allows loops to be peeled when the dynamic "
146  "trip count is known to be low."));
147 
149  "unroll-remainder", cl::Hidden,
150  cl::desc("Allow the loop remainder to be unrolled."));
151 
152 // This option isn't ever intended to be enabled, it serves to allow
153 // experiments to check the assumptions about when this kind of revisit is
154 // necessary.
156  "unroll-revisit-child-loops", cl::Hidden,
157  cl::desc("Enqueue and re-visit child loops in the loop PM after unrolling. "
158  "This shouldn't typically be needed as child loops (or their "
159  "clones) were already visited."));
160 
161 /// A magic value for use with the Threshold parameter to indicate
162 /// that the loop unroll should be performed regardless of how much
163 /// code expansion would result.
165 
166 /// Gather the various unrolling parameters based on the defaults, compiler
167 /// flags, TTI overrides and user specified parameters.
169  Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, int OptLevel,
170  Optional<unsigned> UserThreshold, Optional<unsigned> UserCount,
171  Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
172  Optional<bool> UserUpperBound, Optional<bool> UserAllowPeeling) {
174 
175  // Set up the defaults
176  UP.Threshold = OptLevel > 2 ? 300 : 150;
177  UP.MaxPercentThresholdBoost = 400;
178  UP.OptSizeThreshold = 0;
179  UP.PartialThreshold = 150;
181  UP.Count = 0;
182  UP.PeelCount = 0;
186  UP.BEInsns = 2;
187  UP.Partial = false;
188  UP.Runtime = false;
189  UP.AllowRemainder = true;
190  UP.UnrollRemainder = false;
191  UP.AllowExpensiveTripCount = false;
192  UP.Force = false;
193  UP.UpperBound = false;
194  UP.AllowPeeling = true;
195  UP.UnrollAndJam = false;
197 
198  // Override with any target specific settings
199  TTI.getUnrollingPreferences(L, SE, UP);
200 
201  // Apply size attributes
202  if (L->getHeader()->getParent()->optForSize()) {
203  UP.Threshold = UP.OptSizeThreshold;
205  }
206 
207  // Apply any user values specified by cl::opt
208  if (UnrollThreshold.getNumOccurrences() > 0)
210  if (UnrollPartialThreshold.getNumOccurrences() > 0)
212  if (UnrollMaxPercentThresholdBoost.getNumOccurrences() > 0)
214  if (UnrollMaxCount.getNumOccurrences() > 0)
216  if (UnrollFullMaxCount.getNumOccurrences() > 0)
218  if (UnrollPeelCount.getNumOccurrences() > 0)
220  if (UnrollAllowPartial.getNumOccurrences() > 0)
222  if (UnrollAllowRemainder.getNumOccurrences() > 0)
224  if (UnrollRuntime.getNumOccurrences() > 0)
225  UP.Runtime = UnrollRuntime;
226  if (UnrollMaxUpperBound == 0)
227  UP.UpperBound = false;
228  if (UnrollAllowPeeling.getNumOccurrences() > 0)
230  if (UnrollUnrollRemainder.getNumOccurrences() > 0)
232 
233  // Apply user values provided by argument
234  if (UserThreshold.hasValue()) {
235  UP.Threshold = *UserThreshold;
236  UP.PartialThreshold = *UserThreshold;
237  }
238  if (UserCount.hasValue())
239  UP.Count = *UserCount;
240  if (UserAllowPartial.hasValue())
241  UP.Partial = *UserAllowPartial;
242  if (UserRuntime.hasValue())
243  UP.Runtime = *UserRuntime;
244  if (UserUpperBound.hasValue())
245  UP.UpperBound = *UserUpperBound;
246  if (UserAllowPeeling.hasValue())
247  UP.AllowPeeling = *UserAllowPeeling;
248 
249  return UP;
250 }
251 
252 namespace {
253 
254 /// A struct to densely store the state of an instruction after unrolling at
255 /// each iteration.
256 ///
257 /// This is designed to work like a tuple of <Instruction *, int> for the
258 /// purposes of hashing and lookup, but to be able to associate two boolean
259 /// states with each key.
260 struct UnrolledInstState {
261  Instruction *I;
262  int Iteration : 30;
263  unsigned IsFree : 1;
264  unsigned IsCounted : 1;
265 };
266 
267 /// Hashing and equality testing for a set of the instruction states.
268 struct UnrolledInstStateKeyInfo {
269  using PtrInfo = DenseMapInfo<Instruction *>;
271 
272  static inline UnrolledInstState getEmptyKey() {
273  return {PtrInfo::getEmptyKey(), 0, 0, 0};
274  }
275 
276  static inline UnrolledInstState getTombstoneKey() {
277  return {PtrInfo::getTombstoneKey(), 0, 0, 0};
278  }
279 
280  static inline unsigned getHashValue(const UnrolledInstState &S) {
281  return PairInfo::getHashValue({S.I, S.Iteration});
282  }
283 
284  static inline bool isEqual(const UnrolledInstState &LHS,
285  const UnrolledInstState &RHS) {
286  return PairInfo::isEqual({LHS.I, LHS.Iteration}, {RHS.I, RHS.Iteration});
287  }
288 };
289 
290 struct EstimatedUnrollCost {
291  /// The estimated cost after unrolling.
292  unsigned UnrolledCost;
293 
294  /// The estimated dynamic cost of executing the instructions in the
295  /// rolled form.
296  unsigned RolledDynamicCost;
297 };
298 
299 } // end anonymous namespace
300 
301 /// Figure out if the loop is worth full unrolling.
302 ///
303 /// Complete loop unrolling can make some loads constant, and we need to know
304 /// if that would expose any further optimization opportunities. This routine
305 /// estimates this optimization. It computes cost of unrolled loop
306 /// (UnrolledCost) and dynamic cost of the original loop (RolledDynamicCost). By
307 /// dynamic cost we mean that we won't count costs of blocks that are known not
308 /// to be executed (i.e. if we have a branch in the loop and we know that at the
309 /// given iteration its condition would be resolved to true, we won't add up the
310 /// cost of the 'false'-block).
311 /// \returns Optional value, holding the RolledDynamicCost and UnrolledCost. If
312 /// the analysis failed (no benefits expected from the unrolling, or the loop is
313 /// too big to analyze), the returned value is None.
315  const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE,
316  const SmallPtrSetImpl<const Value *> &EphValues,
317  const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize) {
318  // We want to be able to scale offsets by the trip count and add more offsets
319  // to them without checking for overflows, and we already don't want to
320  // analyze *massive* trip counts, so we force the max to be reasonably small.
322  (unsigned)(std::numeric_limits<int>::max() / 2) &&
323  "The unroll iterations max is too large!");
324 
325  // Only analyze inner loops. We can't properly estimate cost of nested loops
326  // and we won't visit inner loops again anyway.
327  if (!L->empty())
328  return None;
329 
330  // Don't simulate loops with a big or unknown tripcount
331  if (!UnrollMaxIterationsCountToAnalyze || !TripCount ||
333  return None;
334 
337  DenseMap<Value *, Constant *> SimplifiedValues;
338  SmallVector<std::pair<Value *, Constant *>, 4> SimplifiedInputValues;
339 
340  // The estimated cost of the unrolled form of the loop. We try to estimate
341  // this by simplifying as much as we can while computing the estimate.
342  unsigned UnrolledCost = 0;
343 
344  // We also track the estimated dynamic (that is, actually executed) cost in
345  // the rolled form. This helps identify cases when the savings from unrolling
346  // aren't just exposing dead control flows, but actual reduced dynamic
347  // instructions due to the simplifications which we expect to occur after
348  // unrolling.
349  unsigned RolledDynamicCost = 0;
350 
351  // We track the simplification of each instruction in each iteration. We use
352  // this to recursively merge costs into the unrolled cost on-demand so that
353  // we don't count the cost of any dead code. This is essentially a map from
354  // <instruction, int> to <bool, bool>, but stored as a densely packed struct.
356 
357  // A small worklist used to accumulate cost of instructions from each
358  // observable and reached root in the loop.
359  SmallVector<Instruction *, 16> CostWorklist;
360 
361  // PHI-used worklist used between iterations while accumulating cost.
362  SmallVector<Instruction *, 4> PHIUsedList;
363 
364  // Helper function to accumulate cost for instructions in the loop.
365  auto AddCostRecursively = [&](Instruction &RootI, int Iteration) {
366  assert(Iteration >= 0 && "Cannot have a negative iteration!");
367  assert(CostWorklist.empty() && "Must start with an empty cost list");
368  assert(PHIUsedList.empty() && "Must start with an empty phi used list");
369  CostWorklist.push_back(&RootI);
370  for (;; --Iteration) {
371  do {
372  Instruction *I = CostWorklist.pop_back_val();
373 
374  // InstCostMap only uses I and Iteration as a key, the other two values
375  // don't matter here.
376  auto CostIter = InstCostMap.find({I, Iteration, 0, 0});
377  if (CostIter == InstCostMap.end())
378  // If an input to a PHI node comes from a dead path through the loop
379  // we may have no cost data for it here. What that actually means is
380  // that it is free.
381  continue;
382  auto &Cost = *CostIter;
383  if (Cost.IsCounted)
384  // Already counted this instruction.
385  continue;
386 
387  // Mark that we are counting the cost of this instruction now.
388  Cost.IsCounted = true;
389 
390  // If this is a PHI node in the loop header, just add it to the PHI set.
391  if (auto *PhiI = dyn_cast<PHINode>(I))
392  if (PhiI->getParent() == L->getHeader()) {
393  assert(Cost.IsFree && "Loop PHIs shouldn't be evaluated as they "
394  "inherently simplify during unrolling.");
395  if (Iteration == 0)
396  continue;
397 
398  // Push the incoming value from the backedge into the PHI used list
399  // if it is an in-loop instruction. We'll use this to populate the
400  // cost worklist for the next iteration (as we count backwards).
401  if (auto *OpI = dyn_cast<Instruction>(
402  PhiI->getIncomingValueForBlock(L->getLoopLatch())))
403  if (L->contains(OpI))
404  PHIUsedList.push_back(OpI);
405  continue;
406  }
407 
408  // First accumulate the cost of this instruction.
409  if (!Cost.IsFree) {
410  UnrolledCost += TTI.getUserCost(I);
411  LLVM_DEBUG(dbgs() << "Adding cost of instruction (iteration "
412  << Iteration << "): ");
413  LLVM_DEBUG(I->dump());
414  }
415 
416  // We must count the cost of every operand which is not free,
417  // recursively. If we reach a loop PHI node, simply add it to the set
418  // to be considered on the next iteration (backwards!).
419  for (Value *Op : I->operands()) {
420  // Check whether this operand is free due to being a constant or
421  // outside the loop.
422  auto *OpI = dyn_cast<Instruction>(Op);
423  if (!OpI || !L->contains(OpI))
424  continue;
425 
426  // Otherwise accumulate its cost.
427  CostWorklist.push_back(OpI);
428  }
429  } while (!CostWorklist.empty());
430 
431  if (PHIUsedList.empty())
432  // We've exhausted the search.
433  break;
434 
435  assert(Iteration > 0 &&
436  "Cannot track PHI-used values past the first iteration!");
437  CostWorklist.append(PHIUsedList.begin(), PHIUsedList.end());
438  PHIUsedList.clear();
439  }
440  };
441 
442  // Ensure that we don't violate the loop structure invariants relied on by
443  // this analysis.
444  assert(L->isLoopSimplifyForm() && "Must put loop into normal form first.");
445  assert(L->isLCSSAForm(DT) &&
446  "Must have loops in LCSSA form to track live-out values.");
447 
448  LLVM_DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n");
449 
450  // Simulate execution of each iteration of the loop counting instructions,
451  // which would be simplified.
452  // Since the same load will take different values on different iterations,
453  // we literally have to go through all loop's iterations.
454  for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) {
455  LLVM_DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n");
456 
457  // Prepare for the iteration by collecting any simplified entry or backedge
458  // inputs.
459  for (Instruction &I : *L->getHeader()) {
460  auto *PHI = dyn_cast<PHINode>(&I);
461  if (!PHI)
462  break;
463 
464  // The loop header PHI nodes must have exactly two input: one from the
465  // loop preheader and one from the loop latch.
466  assert(
467  PHI->getNumIncomingValues() == 2 &&
468  "Must have an incoming value only for the preheader and the latch.");
469 
470  Value *V = PHI->getIncomingValueForBlock(
471  Iteration == 0 ? L->getLoopPreheader() : L->getLoopLatch());
472  Constant *C = dyn_cast<Constant>(V);
473  if (Iteration != 0 && !C)
474  C = SimplifiedValues.lookup(V);
475  if (C)
476  SimplifiedInputValues.push_back({PHI, C});
477  }
478 
479  // Now clear and re-populate the map for the next iteration.
480  SimplifiedValues.clear();
481  while (!SimplifiedInputValues.empty())
482  SimplifiedValues.insert(SimplifiedInputValues.pop_back_val());
483 
484  UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SE, L);
485 
486  BBWorklist.clear();
487  BBWorklist.insert(L->getHeader());
488  // Note that we *must not* cache the size, this loop grows the worklist.
489  for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
490  BasicBlock *BB = BBWorklist[Idx];
491 
492  // Visit all instructions in the given basic block and try to simplify
493  // it. We don't change the actual IR, just count optimization
494  // opportunities.
495  for (Instruction &I : *BB) {
496  // These won't get into the final code - don't even try calculating the
497  // cost for them.
498  if (isa<DbgInfoIntrinsic>(I) || EphValues.count(&I))
499  continue;
500 
501  // Track this instruction's expected baseline cost when executing the
502  // rolled loop form.
503  RolledDynamicCost += TTI.getUserCost(&I);
504 
505  // Visit the instruction to analyze its loop cost after unrolling,
506  // and if the visitor returns true, mark the instruction as free after
507  // unrolling and continue.
508  bool IsFree = Analyzer.visit(I);
509  bool Inserted = InstCostMap.insert({&I, (int)Iteration,
510  (unsigned)IsFree,
511  /*IsCounted*/ false}).second;
512  (void)Inserted;
513  assert(Inserted && "Cannot have a state for an unvisited instruction!");
514 
515  if (IsFree)
516  continue;
517 
518  // Can't properly model a cost of a call.
519  // FIXME: With a proper cost model we should be able to do it.
520  if (auto *CI = dyn_cast<CallInst>(&I)) {
521  const Function *Callee = CI->getCalledFunction();
522  if (!Callee || TTI.isLoweredToCall(Callee)) {
523  LLVM_DEBUG(dbgs() << "Can't analyze cost of loop with call\n");
524  return None;
525  }
526  }
527 
528  // If the instruction might have a side-effect recursively account for
529  // the cost of it and all the instructions leading up to it.
530  if (I.mayHaveSideEffects())
531  AddCostRecursively(I, Iteration);
532 
533  // If unrolled body turns out to be too big, bail out.
534  if (UnrolledCost > MaxUnrolledLoopSize) {
535  LLVM_DEBUG(dbgs() << " Exceeded threshold.. exiting.\n"
536  << " UnrolledCost: " << UnrolledCost
537  << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize
538  << "\n");
539  return None;
540  }
541  }
542 
543  TerminatorInst *TI = BB->getTerminator();
544 
545  // Add in the live successors by first checking whether we have terminator
546  // that may be simplified based on the values simplified by this call.
547  BasicBlock *KnownSucc = nullptr;
548  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
549  if (BI->isConditional()) {
550  if (Constant *SimpleCond =
551  SimplifiedValues.lookup(BI->getCondition())) {
552  // Just take the first successor if condition is undef
553  if (isa<UndefValue>(SimpleCond))
554  KnownSucc = BI->getSuccessor(0);
555  else if (ConstantInt *SimpleCondVal =
556  dyn_cast<ConstantInt>(SimpleCond))
557  KnownSucc = BI->getSuccessor(SimpleCondVal->isZero() ? 1 : 0);
558  }
559  }
560  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
561  if (Constant *SimpleCond =
562  SimplifiedValues.lookup(SI->getCondition())) {
563  // Just take the first successor if condition is undef
564  if (isa<UndefValue>(SimpleCond))
565  KnownSucc = SI->getSuccessor(0);
566  else if (ConstantInt *SimpleCondVal =
567  dyn_cast<ConstantInt>(SimpleCond))
568  KnownSucc = SI->findCaseValue(SimpleCondVal)->getCaseSuccessor();
569  }
570  }
571  if (KnownSucc) {
572  if (L->contains(KnownSucc))
573  BBWorklist.insert(KnownSucc);
574  else
575  ExitWorklist.insert({BB, KnownSucc});
576  continue;
577  }
578 
579  // Add BB's successors to the worklist.
580  for (BasicBlock *Succ : successors(BB))
581  if (L->contains(Succ))
582  BBWorklist.insert(Succ);
583  else
584  ExitWorklist.insert({BB, Succ});
585  AddCostRecursively(*TI, Iteration);
586  }
587 
588  // If we found no optimization opportunities on the first iteration, we
589  // won't find them on later ones too.
590  if (UnrolledCost == RolledDynamicCost) {
591  LLVM_DEBUG(dbgs() << " No opportunities found.. exiting.\n"
592  << " UnrolledCost: " << UnrolledCost << "\n");
593  return None;
594  }
595  }
596 
597  while (!ExitWorklist.empty()) {
598  BasicBlock *ExitingBB, *ExitBB;
599  std::tie(ExitingBB, ExitBB) = ExitWorklist.pop_back_val();
600 
601  for (Instruction &I : *ExitBB) {
602  auto *PN = dyn_cast<PHINode>(&I);
603  if (!PN)
604  break;
605 
606  Value *Op = PN->getIncomingValueForBlock(ExitingBB);
607  if (auto *OpI = dyn_cast<Instruction>(Op))
608  if (L->contains(OpI))
609  AddCostRecursively(*OpI, TripCount - 1);
610  }
611  }
612 
613  LLVM_DEBUG(dbgs() << "Analysis finished:\n"
614  << "UnrolledCost: " << UnrolledCost << ", "
615  << "RolledDynamicCost: " << RolledDynamicCost << "\n");
616  return {{UnrolledCost, RolledDynamicCost}};
617 }
618 
619 /// ApproximateLoopSize - Approximate the size of the loop.
621  const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent,
622  const TargetTransformInfo &TTI,
623  const SmallPtrSetImpl<const Value *> &EphValues, unsigned BEInsns) {
625  for (BasicBlock *BB : L->blocks())
626  Metrics.analyzeBasicBlock(BB, TTI, EphValues);
627  NumCalls = Metrics.NumInlineCandidates;
628  NotDuplicatable = Metrics.notDuplicatable;
629  Convergent = Metrics.convergent;
630 
631  unsigned LoopSize = Metrics.NumInsts;
632 
633  // Don't allow an estimate of size zero. This would allows unrolling of loops
634  // with huge iteration counts, which is a compile time problem even if it's
635  // not a problem for code quality. Also, the code using this size may assume
636  // that each loop has at least three instructions (likely a conditional
637  // branch, a comparison feeding that branch, and some kind of loop increment
638  // feeding that comparison instruction).
639  LoopSize = std::max(LoopSize, BEInsns + 1);
640 
641  return LoopSize;
642 }
643 
644 // Returns the loop hint metadata node with the given name (for example,
645 // "llvm.loop.unroll.count"). If no such metadata node exists, then nullptr is
646 // returned.
648  if (MDNode *LoopID = L->getLoopID())
649  return GetUnrollMetadata(LoopID, Name);
650  return nullptr;
651 }
652 
653 // Returns true if the loop has an unroll(full) pragma.
654 static bool HasUnrollFullPragma(const Loop *L) {
655  return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.full");
656 }
657 
658 // Returns true if the loop has an unroll(enable) pragma. This metadata is used
659 // for both "#pragma unroll" and "#pragma clang loop unroll(enable)" directives.
660 static bool HasUnrollEnablePragma(const Loop *L) {
661  return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.enable");
662 }
663 
664 // Returns true if the loop has an unroll(disable) pragma.
665 static bool HasUnrollDisablePragma(const Loop *L) {
666  return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.disable");
667 }
668 
669 // Returns true if the loop has an runtime unroll(disable) pragma.
670 static bool HasRuntimeUnrollDisablePragma(const Loop *L) {
671  return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.runtime.disable");
672 }
673 
674 // If loop has an unroll_count pragma return the (necessarily
675 // positive) value from the pragma. Otherwise return 0.
676 static unsigned UnrollCountPragmaValue(const Loop *L) {
677  MDNode *MD = GetUnrollMetadataForLoop(L, "llvm.loop.unroll.count");
678  if (MD) {
679  assert(MD->getNumOperands() == 2 &&
680  "Unroll count hint metadata should have two operands.");
681  unsigned Count =
682  mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
683  assert(Count >= 1 && "Unroll count must be positive.");
684  return Count;
685  }
686  return 0;
687 }
688 
689 // Computes the boosting factor for complete unrolling.
690 // If fully unrolling the loop would save a lot of RolledDynamicCost, it would
691 // be beneficial to fully unroll the loop even if unrolledcost is large. We
692 // use (RolledDynamicCost / UnrolledCost) to model the unroll benefits to adjust
693 // the unroll threshold.
694 static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost,
695  unsigned MaxPercentThresholdBoost) {
696  if (Cost.RolledDynamicCost >= std::numeric_limits<unsigned>::max() / 100)
697  return 100;
698  else if (Cost.UnrolledCost != 0)
699  // The boosting factor is RolledDynamicCost / UnrolledCost
700  return std::min(100 * Cost.RolledDynamicCost / Cost.UnrolledCost,
701  MaxPercentThresholdBoost);
702  else
703  return MaxPercentThresholdBoost;
704 }
705 
706 // Returns loop size estimation for unrolled loop.
707 static uint64_t getUnrolledLoopSize(
708  unsigned LoopSize,
710  assert(LoopSize >= UP.BEInsns && "LoopSize should not be less than BEInsns!");
711  return (uint64_t)(LoopSize - UP.BEInsns) * UP.Count + UP.BEInsns;
712 }
713 
714 // Returns true if unroll count was set explicitly.
715 // Calculates unroll count and writes it to UP.Count.
717  Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI,
718  ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues,
719  OptimizationRemarkEmitter *ORE, unsigned &TripCount, unsigned MaxTripCount,
720  unsigned &TripMultiple, unsigned LoopSize,
721  TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound) {
722  // Check for explicit Count.
723  // 1st priority is unroll count set by "unroll-count" option.
724  bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
725  if (UserUnrollCount) {
726  UP.Count = UnrollCount;
727  UP.AllowExpensiveTripCount = true;
728  UP.Force = true;
729  if (UP.AllowRemainder && getUnrolledLoopSize(LoopSize, UP) < UP.Threshold)
730  return true;
731  }
732 
733  // 2nd priority is unroll count set by pragma.
734  unsigned PragmaCount = UnrollCountPragmaValue(L);
735  if (PragmaCount > 0) {
736  UP.Count = PragmaCount;
737  UP.Runtime = true;
738  UP.AllowExpensiveTripCount = true;
739  UP.Force = true;
740  if ((UP.AllowRemainder || (TripMultiple % PragmaCount == 0)) &&
742  return true;
743  }
744  bool PragmaFullUnroll = HasUnrollFullPragma(L);
745  if (PragmaFullUnroll && TripCount != 0) {
746  UP.Count = TripCount;
747  if (getUnrolledLoopSize(LoopSize, UP) < PragmaUnrollThreshold)
748  return false;
749  }
750 
751  bool PragmaEnableUnroll = HasUnrollEnablePragma(L);
752  bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
753  PragmaEnableUnroll || UserUnrollCount;
754 
755  if (ExplicitUnroll && TripCount != 0) {
756  // If the loop has an unrolling pragma, we want to be more aggressive with
757  // unrolling limits. Set thresholds to at least the PragmaUnrollThreshold
758  // value which is larger than the default limits.
759  UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold);
760  UP.PartialThreshold =
761  std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold);
762  }
763 
764  // 3rd priority is full unroll count.
765  // Full unroll makes sense only when TripCount or its upper bound could be
766  // statically calculated.
767  // Also we need to check if we exceed FullUnrollMaxCount.
768  // If using the upper bound to unroll, TripMultiple should be set to 1 because
769  // we do not know when loop may exit.
770  // MaxTripCount and ExactTripCount cannot both be non zero since we only
771  // compute the former when the latter is zero.
772  unsigned ExactTripCount = TripCount;
773  assert((ExactTripCount == 0 || MaxTripCount == 0) &&
774  "ExtractTripCount and MaxTripCount cannot both be non zero.");
775  unsigned FullUnrollTripCount = ExactTripCount ? ExactTripCount : MaxTripCount;
776  UP.Count = FullUnrollTripCount;
777  if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) {
778  // When computing the unrolled size, note that BEInsns are not replicated
779  // like the rest of the loop body.
780  if (getUnrolledLoopSize(LoopSize, UP) < UP.Threshold) {
781  UseUpperBound = (MaxTripCount == FullUnrollTripCount);
782  TripCount = FullUnrollTripCount;
783  TripMultiple = UP.UpperBound ? 1 : TripMultiple;
784  return ExplicitUnroll;
785  } else {
786  // The loop isn't that small, but we still can fully unroll it if that
787  // helps to remove a significant number of instructions.
788  // To check that, run additional analysis on the loop.
790  L, FullUnrollTripCount, DT, SE, EphValues, TTI,
791  UP.Threshold * UP.MaxPercentThresholdBoost / 100)) {
792  unsigned Boost =
794  if (Cost->UnrolledCost < UP.Threshold * Boost / 100) {
795  UseUpperBound = (MaxTripCount == FullUnrollTripCount);
796  TripCount = FullUnrollTripCount;
797  TripMultiple = UP.UpperBound ? 1 : TripMultiple;
798  return ExplicitUnroll;
799  }
800  }
801  }
802  }
803 
804  // 4th priority is loop peeling
805  computePeelCount(L, LoopSize, UP, TripCount, SE);
806  if (UP.PeelCount) {
807  UP.Runtime = false;
808  UP.Count = 1;
809  return ExplicitUnroll;
810  }
811 
812  // 5th priority is partial unrolling.
813  // Try partial unroll only when TripCount could be statically calculated.
814  if (TripCount) {
815  UP.Partial |= ExplicitUnroll;
816  if (!UP.Partial) {
817  LLVM_DEBUG(dbgs() << " will not try to unroll partially because "
818  << "-unroll-allow-partial not given\n");
819  UP.Count = 0;
820  return false;
821  }
822  if (UP.Count == 0)
823  UP.Count = TripCount;
824  if (UP.PartialThreshold != NoThreshold) {
825  // Reduce unroll count to be modulo of TripCount for partial unrolling.
826  if (getUnrolledLoopSize(LoopSize, UP) > UP.PartialThreshold)
827  UP.Count =
828  (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) /
829  (LoopSize - UP.BEInsns);
830  if (UP.Count > UP.MaxCount)
831  UP.Count = UP.MaxCount;
832  while (UP.Count != 0 && TripCount % UP.Count != 0)
833  UP.Count--;
834  if (UP.AllowRemainder && UP.Count <= 1) {
835  // If there is no Count that is modulo of TripCount, set Count to
836  // largest power-of-two factor that satisfies the threshold limit.
837  // As we'll create fixup loop, do the type of unrolling only if
838  // remainder loop is allowed.
840  while (UP.Count != 0 &&
841  getUnrolledLoopSize(LoopSize, UP) > UP.PartialThreshold)
842  UP.Count >>= 1;
843  }
844  if (UP.Count < 2) {
845  if (PragmaEnableUnroll)
846  ORE->emit([&]() {
848  "UnrollAsDirectedTooLarge",
849  L->getStartLoc(), L->getHeader())
850  << "Unable to unroll loop as directed by unroll(enable) "
851  "pragma "
852  "because unrolled size is too large.";
853  });
854  UP.Count = 0;
855  }
856  } else {
857  UP.Count = TripCount;
858  }
859  if (UP.Count > UP.MaxCount)
860  UP.Count = UP.MaxCount;
861  if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
862  UP.Count != TripCount)
863  ORE->emit([&]() {
864  return OptimizationRemarkMissed(DEBUG_TYPE,
865  "FullUnrollAsDirectedTooLarge",
866  L->getStartLoc(), L->getHeader())
867  << "Unable to fully unroll loop as directed by unroll pragma "
868  "because "
869  "unrolled size is too large.";
870  });
871  return ExplicitUnroll;
872  }
873  assert(TripCount == 0 &&
874  "All cases when TripCount is constant should be covered here.");
875  if (PragmaFullUnroll)
876  ORE->emit([&]() {
878  DEBUG_TYPE, "CantFullUnrollAsDirectedRuntimeTripCount",
879  L->getStartLoc(), L->getHeader())
880  << "Unable to fully unroll loop as directed by unroll(full) "
881  "pragma "
882  "because loop has a runtime trip count.";
883  });
884 
885  // 6th priority is runtime unrolling.
886  // Don't unroll a runtime trip count loop when it is disabled.
888  UP.Count = 0;
889  return false;
890  }
891 
892  // Check if the runtime trip count is too small when profile is available.
893  if (L->getHeader()->getParent()->hasProfileData()) {
894  if (auto ProfileTripCount = getLoopEstimatedTripCount(L)) {
895  if (*ProfileTripCount < FlatLoopTripCountThreshold)
896  return false;
897  else
898  UP.AllowExpensiveTripCount = true;
899  }
900  }
901 
902  // Reduce count based on the type of unrolling and the threshold values.
903  UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount;
904  if (!UP.Runtime) {
905  LLVM_DEBUG(
906  dbgs() << " will not try to unroll loop with runtime trip count "
907  << "-unroll-runtime not given\n");
908  UP.Count = 0;
909  return false;
910  }
911  if (UP.Count == 0)
913 
914  // Reduce unroll count to be the largest power-of-two factor of
915  // the original count which satisfies the threshold limit.
916  while (UP.Count != 0 &&
917  getUnrolledLoopSize(LoopSize, UP) > UP.PartialThreshold)
918  UP.Count >>= 1;
919 
920 #ifndef NDEBUG
921  unsigned OrigCount = UP.Count;
922 #endif
923 
924  if (!UP.AllowRemainder && UP.Count != 0 && (TripMultiple % UP.Count) != 0) {
925  while (UP.Count != 0 && TripMultiple % UP.Count != 0)
926  UP.Count >>= 1;
927  LLVM_DEBUG(
928  dbgs() << "Remainder loop is restricted (that could architecture "
929  "specific or because the loop contains a convergent "
930  "instruction), so unroll count must divide the trip "
931  "multiple, "
932  << TripMultiple << ". Reducing unroll count from " << OrigCount
933  << " to " << UP.Count << ".\n");
934 
935  using namespace ore;
936 
937  if (PragmaCount > 0 && !UP.AllowRemainder)
938  ORE->emit([&]() {
940  "DifferentUnrollCountFromDirected",
941  L->getStartLoc(), L->getHeader())
942  << "Unable to unroll loop the number of times directed by "
943  "unroll_count pragma because remainder loop is restricted "
944  "(that could architecture specific or because the loop "
945  "contains a convergent instruction) and so must have an "
946  "unroll "
947  "count that divides the loop trip multiple of "
948  << NV("TripMultiple", TripMultiple) << ". Unrolling instead "
949  << NV("UnrollCount", UP.Count) << " time(s).";
950  });
951  }
952 
953  if (UP.Count > UP.MaxCount)
954  UP.Count = UP.MaxCount;
955  LLVM_DEBUG(dbgs() << " partially unrolling with count: " << UP.Count
956  << "\n");
957  if (UP.Count < 2)
958  UP.Count = 0;
959  return ExplicitUnroll;
960 }
961 
963  Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
964  const TargetTransformInfo &TTI, AssumptionCache &AC,
965  OptimizationRemarkEmitter &ORE, bool PreserveLCSSA, int OptLevel,
966  Optional<unsigned> ProvidedCount, Optional<unsigned> ProvidedThreshold,
967  Optional<bool> ProvidedAllowPartial, Optional<bool> ProvidedRuntime,
968  Optional<bool> ProvidedUpperBound, Optional<bool> ProvidedAllowPeeling) {
969  LLVM_DEBUG(dbgs() << "Loop Unroll: F["
970  << L->getHeader()->getParent()->getName() << "] Loop %"
971  << L->getHeader()->getName() << "\n");
972  if (HasUnrollDisablePragma(L))
974  if (!L->isLoopSimplifyForm()) {
975  LLVM_DEBUG(
976  dbgs() << " Not unrolling loop which is not in loop-simplify form.\n");
978  }
979 
980  unsigned NumInlineCandidates;
981  bool NotDuplicatable;
982  bool Convergent;
984  L, SE, TTI, OptLevel, ProvidedThreshold, ProvidedCount,
985  ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
986  ProvidedAllowPeeling);
987  // Exit early if unrolling is disabled.
988  if (UP.Threshold == 0 && (!UP.Partial || UP.PartialThreshold == 0))
990 
992  CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
993 
994  unsigned LoopSize =
995  ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent,
996  TTI, EphValues, UP.BEInsns);
997  LLVM_DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
998  if (NotDuplicatable) {
999  LLVM_DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable"
1000  << " instructions.\n");
1002  }
1003  if (NumInlineCandidates != 0) {
1004  LLVM_DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
1006  }
1007 
1008  // Find trip count and trip multiple if count is not available
1009  unsigned TripCount = 0;
1010  unsigned MaxTripCount = 0;
1011  unsigned TripMultiple = 1;
1012  // If there are multiple exiting blocks but one of them is the latch, use the
1013  // latch for the trip count estimation. Otherwise insist on a single exiting
1014  // block for the trip count estimation.
1015  BasicBlock *ExitingBlock = L->getLoopLatch();
1016  if (!ExitingBlock || !L->isLoopExiting(ExitingBlock))
1017  ExitingBlock = L->getExitingBlock();
1018  if (ExitingBlock) {
1019  TripCount = SE.getSmallConstantTripCount(L, ExitingBlock);
1020  TripMultiple = SE.getSmallConstantTripMultiple(L, ExitingBlock);
1021  }
1022 
1023  // If the loop contains a convergent operation, the prelude we'd add
1024  // to do the first few instructions before we hit the unrolled loop
1025  // is unsafe -- it adds a control-flow dependency to the convergent
1026  // operation. Therefore restrict remainder loop (try unrollig without).
1027  //
1028  // TODO: This is quite conservative. In practice, convergent_op()
1029  // is likely to be called unconditionally in the loop. In this
1030  // case, the program would be ill-formed (on most architectures)
1031  // unless n were the same on all threads in a thread group.
1032  // Assuming n is the same on all threads, any kind of unrolling is
1033  // safe. But currently llvm's notion of convergence isn't powerful
1034  // enough to express this.
1035  if (Convergent)
1036  UP.AllowRemainder = false;
1037 
1038  // Try to find the trip count upper bound if we cannot find the exact trip
1039  // count.
1040  bool MaxOrZero = false;
1041  if (!TripCount) {
1042  MaxTripCount = SE.getSmallConstantMaxTripCount(L);
1043  MaxOrZero = SE.isBackedgeTakenCountMaxOrZero(L);
1044  // We can unroll by the upper bound amount if it's generally allowed or if
1045  // we know that the loop is executed either the upper bound or zero times.
1046  // (MaxOrZero unrolling keeps only the first loop test, so the number of
1047  // loop tests remains the same compared to the non-unrolled version, whereas
1048  // the generic upper bound unrolling keeps all but the last loop test so the
1049  // number of loop tests goes up which may end up being worse on targets with
1050  // constrained branch predictor resources so is controlled by an option.)
1051  // In addition we only unroll small upper bounds.
1052  if (!(UP.UpperBound || MaxOrZero) || MaxTripCount > UnrollMaxUpperBound) {
1053  MaxTripCount = 0;
1054  }
1055  }
1056 
1057  // computeUnrollCount() decides whether it is beneficial to use upper bound to
1058  // fully unroll the loop.
1059  bool UseUpperBound = false;
1060  bool IsCountSetExplicitly = computeUnrollCount(
1061  L, TTI, DT, LI, SE, EphValues, &ORE, TripCount, MaxTripCount,
1062  TripMultiple, LoopSize, UP, UseUpperBound);
1063  if (!UP.Count)
1065  // Unroll factor (Count) must be less or equal to TripCount.
1066  if (TripCount && UP.Count > TripCount)
1067  UP.Count = TripCount;
1068 
1069  // Unroll the loop.
1070  LoopUnrollResult UnrollResult = UnrollLoop(
1071  L, UP.Count, TripCount, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount,
1072  UseUpperBound, MaxOrZero, TripMultiple, UP.PeelCount, UP.UnrollRemainder,
1073  LI, &SE, &DT, &AC, &ORE, PreserveLCSSA);
1074  if (UnrollResult == LoopUnrollResult::Unmodified)
1076 
1077  // If loop has an unroll count pragma or unrolled by explicitly set count
1078  // mark loop as unrolled to prevent unrolling beyond that requested.
1079  // If the loop was peeled, we already "used up" the profile information
1080  // we had, so we don't want to unroll or peel again.
1081  if (UnrollResult != LoopUnrollResult::FullyUnrolled &&
1082  (IsCountSetExplicitly || UP.PeelCount))
1084 
1085  return UnrollResult;
1086 }
1087 
1088 namespace {
1089 
1090 class LoopUnroll : public LoopPass {
1091 public:
1092  static char ID; // Pass ID, replacement for typeid
1093 
1094  int OptLevel;
1095  Optional<unsigned> ProvidedCount;
1096  Optional<unsigned> ProvidedThreshold;
1097  Optional<bool> ProvidedAllowPartial;
1098  Optional<bool> ProvidedRuntime;
1099  Optional<bool> ProvidedUpperBound;
1100  Optional<bool> ProvidedAllowPeeling;
1101 
1102  LoopUnroll(int OptLevel = 2, Optional<unsigned> Threshold = None,
1103  Optional<unsigned> Count = None,
1104  Optional<bool> AllowPartial = None, Optional<bool> Runtime = None,
1105  Optional<bool> UpperBound = None,
1106  Optional<bool> AllowPeeling = None)
1107  : LoopPass(ID), OptLevel(OptLevel), ProvidedCount(std::move(Count)),
1108  ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial),
1109  ProvidedRuntime(Runtime), ProvidedUpperBound(UpperBound),
1110  ProvidedAllowPeeling(AllowPeeling) {
1112  }
1113 
1114  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
1115  if (skipLoop(L))
1116  return false;
1117 
1118  Function &F = *L->getHeader()->getParent();
1119 
1120  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1121  LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1122  ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1123  const TargetTransformInfo &TTI =
1124  getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1125  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1126  // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
1127  // pass. Function analyses need to be preserved across loop transformations
1128  // but ORE cannot be preserved (see comment before the pass definition).
1129  OptimizationRemarkEmitter ORE(&F);
1130  bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
1131 
1133  L, DT, LI, SE, TTI, AC, ORE, PreserveLCSSA, OptLevel, ProvidedCount,
1134  ProvidedThreshold, ProvidedAllowPartial, ProvidedRuntime,
1135  ProvidedUpperBound, ProvidedAllowPeeling);
1136 
1137  if (Result == LoopUnrollResult::FullyUnrolled)
1138  LPM.markLoopAsDeleted(*L);
1139 
1140  return Result != LoopUnrollResult::Unmodified;
1141  }
1142 
1143  /// This transformation requires natural loop information & requires that
1144  /// loop preheaders be inserted into the CFG...
1145  void getAnalysisUsage(AnalysisUsage &AU) const override {
1148  // FIXME: Loop passes are required to preserve domtree, and for now we just
1149  // recreate dom info if anything gets unrolled.
1151  }
1152 };
1153 
1154 } // end anonymous namespace
1155 
1156 char LoopUnroll::ID = 0;
1157 
1158 INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
1162 INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
1163 
1164 Pass *llvm::createLoopUnrollPass(int OptLevel, int Threshold, int Count,
1165  int AllowPartial, int Runtime, int UpperBound,
1166  int AllowPeeling) {
1167  // TODO: It would make more sense for this function to take the optionals
1168  // directly, but that's dangerous since it would silently break out of tree
1169  // callers.
1170  return new LoopUnroll(
1171  OptLevel, Threshold == -1 ? None : Optional<unsigned>(Threshold),
1172  Count == -1 ? None : Optional<unsigned>(Count),
1173  AllowPartial == -1 ? None : Optional<bool>(AllowPartial),
1174  Runtime == -1 ? None : Optional<bool>(Runtime),
1175  UpperBound == -1 ? None : Optional<bool>(UpperBound),
1176  AllowPeeling == -1 ? None : Optional<bool>(AllowPeeling));
1177 }
1178 
1180  return createLoopUnrollPass(OptLevel, -1, -1, 0, 0, 0, 0);
1181 }
1182 
1185  LPMUpdater &Updater) {
1186  const auto &FAM =
1187  AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
1188  Function *F = L.getHeader()->getParent();
1189 
1190  auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F);
1191  // FIXME: This should probably be optional rather than required.
1192  if (!ORE)
1194  "LoopFullUnrollPass: OptimizationRemarkEmitterAnalysis not "
1195  "cached at a higher level");
1196 
1197  // Keep track of the previous loop structure so we can identify new loops
1198  // created by unrolling.
1199  Loop *ParentL = L.getParentLoop();
1200  SmallPtrSet<Loop *, 4> OldLoops;
1201  if (ParentL)
1202  OldLoops.insert(ParentL->begin(), ParentL->end());
1203  else
1204  OldLoops.insert(AR.LI.begin(), AR.LI.end());
1205 
1206  std::string LoopName = L.getName();
1207 
1208  bool Changed =
1209  tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, *ORE,
1210  /*PreserveLCSSA*/ true, OptLevel, /*Count*/ None,
1211  /*Threshold*/ None, /*AllowPartial*/ false,
1212  /*Runtime*/ false, /*UpperBound*/ false,
1213  /*AllowPeeling*/ false) != LoopUnrollResult::Unmodified;
1214  if (!Changed)
1215  return PreservedAnalyses::all();
1216 
1217  // The parent must not be damaged by unrolling!
1218 #ifndef NDEBUG
1219  if (ParentL)
1220  ParentL->verifyLoop();
1221 #endif
1222 
1223  // Unrolling can do several things to introduce new loops into a loop nest:
1224  // - Full unrolling clones child loops within the current loop but then
1225  // removes the current loop making all of the children appear to be new
1226  // sibling loops.
1227  //
1228  // When a new loop appears as a sibling loop after fully unrolling,
1229  // its nesting structure has fundamentally changed and we want to revisit
1230  // it to reflect that.
1231  //
1232  // When unrolling has removed the current loop, we need to tell the
1233  // infrastructure that it is gone.
1234  //
1235  // Finally, we support a debugging/testing mode where we revisit child loops
1236  // as well. These are not expected to require further optimizations as either
1237  // they or the loop they were cloned from have been directly visited already.
1238  // But the debugging mode allows us to check this assumption.
1239  bool IsCurrentLoopValid = false;
1240  SmallVector<Loop *, 4> SibLoops;
1241  if (ParentL)
1242  SibLoops.append(ParentL->begin(), ParentL->end());
1243  else
1244  SibLoops.append(AR.LI.begin(), AR.LI.end());
1245  erase_if(SibLoops, [&](Loop *SibLoop) {
1246  if (SibLoop == &L) {
1247  IsCurrentLoopValid = true;
1248  return true;
1249  }
1250 
1251  // Otherwise erase the loop from the list if it was in the old loops.
1252  return OldLoops.count(SibLoop) != 0;
1253  });
1254  Updater.addSiblingLoops(SibLoops);
1255 
1256  if (!IsCurrentLoopValid) {
1257  Updater.markLoopAsDeleted(L, LoopName);
1258  } else {
1259  // We can only walk child loops if the current loop remained valid.
1261  // Walk *all* of the child loops.
1262  SmallVector<Loop *, 4> ChildLoops(L.begin(), L.end());
1263  Updater.addChildLoops(ChildLoops);
1264  }
1265  }
1266 
1268 }
1269 
1270 template <typename RangeT>
1272  SmallVector<Loop *, 8> Worklist;
1273  // We use an internal worklist to build up the preorder traversal without
1274  // recursion.
1275  SmallVector<Loop *, 4> PreOrderLoops, PreOrderWorklist;
1276 
1277  for (Loop *RootL : Loops) {
1278  assert(PreOrderLoops.empty() && "Must start with an empty preorder walk.");
1279  assert(PreOrderWorklist.empty() &&
1280  "Must start with an empty preorder walk worklist.");
1281  PreOrderWorklist.push_back(RootL);
1282  do {
1283  Loop *L = PreOrderWorklist.pop_back_val();
1284  PreOrderWorklist.append(L->begin(), L->end());
1285  PreOrderLoops.push_back(L);
1286  } while (!PreOrderWorklist.empty());
1287 
1288  Worklist.append(PreOrderLoops.begin(), PreOrderLoops.end());
1289  PreOrderLoops.clear();
1290  }
1291  return Worklist;
1292 }
1293 
1296  auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
1297  auto &LI = AM.getResult<LoopAnalysis>(F);
1298  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1299  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1300  auto &AC = AM.getResult<AssumptionAnalysis>(F);
1302 
1303  LoopAnalysisManager *LAM = nullptr;
1304  if (auto *LAMProxy = AM.getCachedResult<LoopAnalysisManagerFunctionProxy>(F))
1305  LAM = &LAMProxy->getManager();
1306 
1307  const ModuleAnalysisManager &MAM =
1309  ProfileSummaryInfo *PSI =
1311 
1312  bool Changed = false;
1313 
1314  // The unroller requires loops to be in simplified form, and also needs LCSSA.
1315  // Since simplification may add new inner loops, it has to run before the
1316  // legality and profitability checks. This means running the loop unroller
1317  // will simplify all loops, regardless of whether anything end up being
1318  // unrolled.
1319  for (auto &L : LI) {
1320  Changed |= simplifyLoop(L, &DT, &LI, &SE, &AC, false /* PreserveLCSSA */);
1321  Changed |= formLCSSARecursively(*L, DT, &LI, &SE);
1322  }
1323 
1325 
1326  while (!Worklist.empty()) {
1327  // Because the LoopInfo stores the loops in RPO, we walk the worklist
1328  // from back to front so that we work forward across the CFG, which
1329  // for unrolling is only needed to get optimization remarks emitted in
1330  // a forward order.
1331  Loop &L = *Worklist.pop_back_val();
1332 #ifndef NDEBUG
1333  Loop *ParentL = L.getParentLoop();
1334 #endif
1335 
1336  // The API here is quite complex to call, but there are only two interesting
1337  // states we support: partial and full (or "simple") unrolling. However, to
1338  // enable these things we actually pass "None" in for the optional to avoid
1339  // providing an explicit choice.
1340  Optional<bool> AllowPartialParam, RuntimeParam, UpperBoundParam,
1341  AllowPeeling;
1342  // Check if the profile summary indicates that the profiled application
1343  // has a huge working set size, in which case we disable peeling to avoid
1344  // bloating it further.
1345  if (PSI && PSI->hasHugeWorkingSetSize())
1346  AllowPeeling = false;
1347  std::string LoopName = L.getName();
1349  tryToUnrollLoop(&L, DT, &LI, SE, TTI, AC, ORE,
1350  /*PreserveLCSSA*/ true, OptLevel, /*Count*/ None,
1351  /*Threshold*/ None, AllowPartialParam, RuntimeParam,
1352  UpperBoundParam, AllowPeeling);
1353  Changed |= Result != LoopUnrollResult::Unmodified;
1354 
1355  // The parent must not be damaged by unrolling!
1356 #ifndef NDEBUG
1357  if (Result != LoopUnrollResult::Unmodified && ParentL)
1358  ParentL->verifyLoop();
1359 #endif
1360 
1361  // Clear any cached analysis results for L if we removed it completely.
1362  if (LAM && Result == LoopUnrollResult::FullyUnrolled)
1363  LAM->clear(L, LoopName);
1364  }
1365 
1366  if (!Changed)
1367  return PreservedAnalyses::all();
1368 
1370 }
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:81
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value *> &EphValues)
Collect a loop&#39;s ephemeral values (those used only by an assume or similar intrinsics in the loop)...
Definition: CodeMetrics.cpp:72
uint64_t CallInst * C
unsigned getSmallConstantTripCount(const Loop *L)
Returns the maximum trip count of the loop if it is a single-exit loop and we can compute a small max...
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
Diagnostic information for missed-optimization remarks.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:225
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
unsigned getSmallConstantTripMultiple(const Loop *L)
Returns the largest constant divisor of the trip count of the loop if it is a single-exit loop and we...
DiagnosticInfoOptimizationBase::Argument NV
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:770
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:139
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:78
This header provides classes for managing a pipeline of passes over loops in LLVM IR...
bool convergent
True if this function contains a call to a convergent function.
Definition: CodeMetrics.h:57
static bool HasUnrollEnablePragma(const Loop *L)
bool isLCSSAForm(DominatorTree &DT) const
Return true if the Loop is in LCSSA form.
Definition: LoopInfo.cpp:177
Pass * createLoopUnrollPass(int OptLevel=2, int Threshold=-1, int Count=-1, int AllowPartial=-1, int Runtime=-1, int UpperBound=-1, int AllowPeeling=-1)
Implements a dense probed hash-table based set.
Definition: DenseSet.h:221
Analysis providing profile information.
The main scalar evolution driver.
#define DEBUG_TYPE
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold, but used for partial/runtime unrolling (set to UINT_MAX to disable).
static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost, unsigned MaxPercentThresholdBoost)
This file contains the declarations for metadata subclasses.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:174
An immutable pass that tracks lazily created AssumptionCache objects.
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
A cache of @llvm.assume calls within a function.
Analysis pass providing the TargetTransformInfo.
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
unsigned second
unsigned NumInlineCandidates
The number of calls to internal functions with a single caller.
Definition: CodeMetrics.h:78
Metadata node.
Definition: Metadata.h:864
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:231
F(f)
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1069
bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const SmallPtrSetImpl< const Value *> &EphValues, OptimizationRemarkEmitter *ORE, unsigned &TripCount, unsigned MaxTripCount, unsigned &TripMultiple, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound)
bool notDuplicatable
True if this function cannot be duplicated.
Definition: CodeMetrics.h:54
static cl::opt< unsigned > UnrollFullMaxCount("unroll-full-max-count", cl::Hidden, cl::desc("Set the max unroll count for full unrolling, for testing purposes"))
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
unsigned getSmallConstantMaxTripCount(const Loop *L)
Returns the upper bound of the loop trip count as a normal unsigned value.
void addChildLoops(ArrayRef< Loop *> NewChildLoops)
Loop passes should use this method to indicate they have added new child loops of the current loop...
static cl::opt< unsigned > UnrollPartialThreshold("unroll-partial-threshold", cl::Hidden, cl::desc("The cost threshold for partial loop unrolling"))
bool formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
Definition: LCSSA.cpp:344
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:191
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:4245
static cl::opt< bool > UnrollRevisitChildLoops("unroll-revisit-child-loops", cl::Hidden, cl::desc("Enqueue and re-visit child loops in the loop PM after unrolling. " "This shouldn't typically be needed as child loops (or their " "clones) were already visited."))
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
Hexagon Hardware Loops
bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
static cl::opt< unsigned > FlatLoopTripCountThreshold("flat-loop-tripcount-threshold", cl::init(5), cl::Hidden, cl::desc("If the runtime tripcount for the loop is lower than the " "threshold, the loop is considered as flat and will be less " "aggressively unrolled."))
static bool HasUnrollFullPragma(const Loop *L)
bool AllowPeeling
Allow peeling off loop iterations for loops with low dynamic tripcount.
Pass * createSimpleLoopUnrollPass(int OptLevel=2)
static cl::opt< unsigned > UnrollCount("unroll-count", cl::Hidden, cl::desc("Use this unroll count for all loops including those with " "unroll_count pragma values, for testing purposes"))
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
unsigned FullUnrollMaxCount
Set the maximum unrolling factor for full unrolling.
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:939
static cl::opt< bool > UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden, cl::desc("Unroll loops with run-time trip counts"))
static Optional< EstimatedUnrollCost > analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE, const SmallPtrSetImpl< const Value *> &EphValues, const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize)
Figure out if the loop is worth full unrolling.
BlockT * getHeader() const
Definition: LoopInfo.h:100
static MDNode * GetUnrollMetadataForLoop(const Loop *L, StringRef Name)
void computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, unsigned &TripCount, ScalarEvolution &SE)
void visit(Iterator Start, Iterator End)
Definition: InstVisitor.h:90
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:142
static cl::opt< unsigned > UnrollMaxUpperBound("unroll-max-upperbound", cl::init(8), cl::Hidden, cl::desc("The max of trip count upper bound that is considered in unrolling"))
static bool isEqual(const Function &Caller, const Function &Callee)
This header provides classes for managing per-loop analyses.
static cl::opt< bool > UnrollAllowRemainder("unroll-allow-remainder", cl::Hidden, cl::desc("Allow generation of a loop remainder (extra iterations) " "when unrolling a loop."))
static cl::opt< unsigned > UnrollMaxPercentThresholdBoost("unroll-max-percent-threshold-boost", cl::init(400), cl::Hidden, cl::desc("The maximum 'boost' (represented as a percentage >= 100) applied " "to the threshold when aggressively unrolling a loop due to the " "dynamic cost savings. If completely unrolling a loop will reduce " "the total runtime from X to Y, we boost the loop unroll " "threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, " "X/Y). This limit avoids excessive code bloat."))
void initializeLoopUnrollPass(PassRegistry &)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:145
amdgpu Simplify well known AMD library false Value * Callee
bool AllowRemainder
Allow generation of a loop remainder (extra iterations after unroll).
The loop was fully unrolled into straight-line code.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:410
static cl::opt< bool > UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden, cl::desc("Allows loops to be peeled when the dynamic " "trip count is known to be low."))
TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, int OptLevel, Optional< unsigned > UserThreshold, Optional< unsigned > UserCount, Optional< bool > UserAllowPartial, Optional< bool > UserRuntime, Optional< bool > UserUpperBound, Optional< bool > UserAllowPeeling)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
Subclasses of this class are all able to terminate a basic block.
Definition: InstrTypes.h:55
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:154
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop...
Definition: LoopInfo.h:203
Conditional or Unconditional Branch instruction.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
This is an important base class in LLVM.
Definition: Constant.h:42
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:129
static cl::opt< unsigned > UnrollMaxCount("unroll-max-count", cl::Hidden, cl::desc("Set the max unroll count for partial and runtime unrolling, for" "testing purposes"))
This file contains the declarations for the subclasses of Constant, which represent the different fla...
iterator end() const
Definition: LoopInfo.h:660
void setLoopAlreadyUnrolled()
Add llvm.loop.unroll.disable to this loop&#39;s loop id metadata.
Definition: LoopInfo.cpp:261
Machine Trace Metrics
char & LCSSAID
Definition: LCSSA.cpp:424
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
Represent the analysis usage information of a pass.
void analyzeBasicBlock(const BasicBlock *BB, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value *> &EphValues)
Add information about a block to the current state.
bool optForSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:598
static cl::opt< unsigned > UnrollThreshold("unroll-threshold", cl::Hidden, cl::desc("The cost threshold for loop unrolling"))
op_range operands()
Definition: User.h:238
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
Optional< unsigned > getLoopEstimatedTripCount(Loop *L)
Get a loop&#39;s estimated trip count based on branch weight metadata.
Definition: LoopUtils.cpp:392
static cl::opt< unsigned > UnrollPeelCount("unroll-peel-count", cl::Hidden, cl::desc("Set the unroll peeling count, for testing purposes"))
void addSiblingLoops(ArrayRef< Loop *> NewSibLoops)
Loop passes should use this method to indicate they have added new sibling loops to the current loop...
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:160
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:335
void markLoopAsDeleted(Loop &L, llvm::StringRef Name)
Loop passes should use this method to indicate they have deleted a loop from the nest.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
A function analysis which provides an AssumptionCache.
unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value *> &EphValues, unsigned BEInsns)
ApproximateLoopSize - Approximate the size of the loop.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:110
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:298
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file. ...
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
static cl::opt< bool > UnrollAllowPartial("unroll-allow-partial", cl::Hidden, cl::desc("Allows loops to be partially unrolled until " "-unroll-threshold loop size is reached."))
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:1154
static cl::opt< unsigned > PragmaUnrollThreshold("pragma-unroll-threshold", cl::init(16 *1024), cl::Hidden, cl::desc("Unrolled size limit for loops with an unroll(full) or " "unroll_count pragma."))
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
iterator begin() const
Definition: LoopInfo.h:142
Utility to calculate the size and a few similar metrics for a set of basic blocks.
Definition: CodeMetrics.h:42
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:48
static unsigned UnrollCountPragmaValue(const Loop *L)
unsigned DefaultUnrollRuntimeCount
Default unroll count for loops with run-time trip count.
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:381
void markLoopAsDeleted(Loop &L)
Definition: LoopPass.cpp:143
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
static uint64_t getUnrolledLoopSize(unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2&#39;s erase_if which is equivalent t...
Definition: STLExtras.h:1180
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
iterator begin() const
Definition: LoopInfo.h:659
static bool HasUnrollDisablePragma(const Loop *L)
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
static cl::opt< bool > UnrollUnrollRemainder("unroll-remainder", cl::Hidden, cl::desc("Allow the loop remainder to be unrolled."))
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:394
MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
Definition: LoopUnroll.cpp:886
Analysis pass that exposes the ScalarEvolution for a function.
static const unsigned NoThreshold
A magic value for use with the Threshold parameter to indicate that the loop unroll should be perform...
LoopT * getParentLoop() const
Definition: LoopInfo.h:101
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
bool hasValue() const
Definition: Optional.h:187
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to...
Definition: LoopInfo.cpp:193
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:215
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:133
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
unsigned Threshold
The cost threshold for the unrolled loop.
int getUserCost(const User *U, ArrayRef< const Value *> Operands) const
Estimate the cost of a given IR user when lowered.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
StringRef getName() const
Definition: LoopInfo.h:583
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:459
Parameters that control the generic loop unrolling transformation.
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable)...
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:224
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
#define I(x, y, z)
Definition: MD5.cpp:58
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:73
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass&#39;s AnalysisUsage.
Definition: LoopUtils.cpp:128
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:789
iterator end() const
Definition: LoopInfo.h:143
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
static SmallVector< Loop *, 8 > appendLoopsToWorklist(RangeT &&Loops)
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:181
static LoopUnrollResult tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache &AC, OptimizationRemarkEmitter &ORE, bool PreserveLCSSA, int OptLevel, Optional< unsigned > ProvidedCount, Optional< unsigned > ProvidedThreshold, Optional< bool > ProvidedAllowPartial, Optional< bool > ProvidedRuntime, Optional< bool > ProvidedUpperBound, Optional< bool > ProvidedAllowPeeling)
bool empty() const
Definition: LoopInfo.h:146
static int const Threshold
TODO: Write a new FunctionPass AliasAnalysis so that it can keep a cache.
Multiway switch.
static bool HasRuntimeUnrollDisablePragma(const Loop *L)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool hasHugeWorkingSetSize()
Returns true if the working set size of the code is considered huge.
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:566
LLVM Value Representation.
Definition: Value.h:73
succ_range successors(Instruction *I)
Definition: CFG.h:262
unsigned MaxPercentThresholdBoost
If complete unrolling will reduce the cost of the loop, we will boost the Threshold by a certain perc...
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
The loop was not modified.
static cl::opt< unsigned > UnrollMaxIterationsCountToAnalyze("unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden, cl::desc("Don't allow loop unrolling to simulate more than this number of" "iterations when checking full unroll profitability"))
bool isBackedgeTakenCountMaxOrZero(const Loop *L)
Return true if the backedge taken count is either the value returned by getMaxBackedgeTakenCount or z...
bool UpperBound
Allow using trip count upper bound to unroll loops.
void verifyLoop() const
Verify loop structure.
Definition: LoopInfoImpl.h:295
void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP) const
Get target-customized preferences for the generic loop unrolling transformation.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
A container for analyses that lazily runs them and caches their results.
This pass exposes codegen information to IR-level passes.
This header defines various interfaces for pass management in LLVM.
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1075
#define LLVM_DEBUG(X)
Definition: Debug.h:123
unsigned NumInsts
Number of instructions in the analyzed blocks.
Definition: CodeMetrics.h:63
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:156
LoopUnrollResult UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, bool AllowRuntime, bool AllowExpensiveTripCount, bool PreserveCondBr, bool PreserveOnlyFirst, unsigned TripMultiple, unsigned PeelCount, bool UnrollRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA)
Unroll the given loop by Count.
Definition: LoopUnroll.cpp:332
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:50
The optimization diagnostic interface.
bool hasProfileData() const
Return true if the function is annotated with profile data.
Definition: Function.h:308
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:1038
LoopUnrollResult
Represents the result of a UnrollLoop invocation.
Definition: UnrollLoop.h:43