LLVM  6.0.0svn
LoopUnrollPass.cpp
Go to the documentation of this file.
1 //===- LoopUnroll.cpp - Loop unroller pass --------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass implements a simple loop unroller. It works best when loops have
11 // been canonicalized by the -indvars pass, allowing it to determine the trip
12 // counts of loops easily.
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/DenseMapInfo.h"
18 #include "llvm/ADT/DenseSet.h"
19 #include "llvm/ADT/None.h"
20 #include "llvm/ADT/Optional.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SetVector.h"
23 #include "llvm/ADT/SmallPtrSet.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include "llvm/ADT/StringRef.h"
29 #include "llvm/Analysis/LoopInfo.h"
30 #include "llvm/Analysis/LoopPass.h"
36 #include "llvm/IR/BasicBlock.h"
37 #include "llvm/IR/CFG.h"
38 #include "llvm/IR/Constant.h"
39 #include "llvm/IR/Constants.h"
40 #include "llvm/IR/DiagnosticInfo.h"
41 #include "llvm/IR/Dominators.h"
42 #include "llvm/IR/Function.h"
43 #include "llvm/IR/Instruction.h"
44 #include "llvm/IR/Instructions.h"
45 #include "llvm/IR/IntrinsicInst.h"
46 #include "llvm/IR/Metadata.h"
47 #include "llvm/IR/PassManager.h"
48 #include "llvm/Pass.h"
49 #include "llvm/Support/Casting.h"
51 #include "llvm/Support/Debug.h"
54 #include "llvm/Transforms/Scalar.h"
59 #include <algorithm>
60 #include <cassert>
61 #include <cstdint>
62 #include <limits>
63 #include <string>
64 #include <tuple>
65 #include <utility>
66 
67 using namespace llvm;
68 
69 #define DEBUG_TYPE "loop-unroll"
70 
71 static cl::opt<unsigned>
72  UnrollThreshold("unroll-threshold", cl::Hidden,
73  cl::desc("The cost threshold for loop unrolling"));
74 
76  "unroll-partial-threshold", cl::Hidden,
77  cl::desc("The cost threshold for partial loop unrolling"));
78 
80  "unroll-max-percent-threshold-boost", cl::init(400), cl::Hidden,
81  cl::desc("The maximum 'boost' (represented as a percentage >= 100) applied "
82  "to the threshold when aggressively unrolling a loop due to the "
83  "dynamic cost savings. If completely unrolling a loop will reduce "
84  "the total runtime from X to Y, we boost the loop unroll "
85  "threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, "
86  "X/Y). This limit avoids excessive code bloat."));
87 
89  "unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden,
90  cl::desc("Don't allow loop unrolling to simulate more than this number of"
91  "iterations when checking full unroll profitability"));
92 
94  "unroll-count", cl::Hidden,
95  cl::desc("Use this unroll count for all loops including those with "
96  "unroll_count pragma values, for testing purposes"));
97 
99  "unroll-max-count", cl::Hidden,
100  cl::desc("Set the max unroll count for partial and runtime unrolling, for"
101  "testing purposes"));
102 
104  "unroll-full-max-count", cl::Hidden,
105  cl::desc(
106  "Set the max unroll count for full unrolling, for testing purposes"));
107 
109  "unroll-peel-count", cl::Hidden,
110  cl::desc("Set the unroll peeling count, for testing purposes"));
111 
112 static cl::opt<bool>
113  UnrollAllowPartial("unroll-allow-partial", cl::Hidden,
114  cl::desc("Allows loops to be partially unrolled until "
115  "-unroll-threshold loop size is reached."));
116 
118  "unroll-allow-remainder", cl::Hidden,
119  cl::desc("Allow generation of a loop remainder (extra iterations) "
120  "when unrolling a loop."));
121 
122 static cl::opt<bool>
123  UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden,
124  cl::desc("Unroll loops with run-time trip counts"));
125 
127  "unroll-max-upperbound", cl::init(8), cl::Hidden,
128  cl::desc(
129  "The max of trip count upper bound that is considered in unrolling"));
130 
132  "pragma-unroll-threshold", cl::init(16 * 1024), cl::Hidden,
133  cl::desc("Unrolled size limit for loops with an unroll(full) or "
134  "unroll_count pragma."));
135 
137  "flat-loop-tripcount-threshold", cl::init(5), cl::Hidden,
138  cl::desc("If the runtime tripcount for the loop is lower than the "
139  "threshold, the loop is considered as flat and will be less "
140  "aggressively unrolled."));
141 
142 static cl::opt<bool>
143  UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden,
144  cl::desc("Allows loops to be peeled when the dynamic "
145  "trip count is known to be low."));
146 
148  "unroll-remainder", cl::Hidden,
149  cl::desc("Allow the loop remainder to be unrolled."));
150 
151 // This option isn't ever intended to be enabled, it serves to allow
152 // experiments to check the assumptions about when this kind of revisit is
153 // necessary.
155  "unroll-revisit-child-loops", cl::Hidden,
156  cl::desc("Enqueue and re-visit child loops in the loop PM after unrolling. "
157  "This shouldn't typically be needed as child loops (or their "
158  "clones) were already visited."));
159 
160 /// A magic value for use with the Threshold parameter to indicate
161 /// that the loop unroll should be performed regardless of how much
162 /// code expansion would result.
164 
165 /// Gather the various unrolling parameters based on the defaults, compiler
166 /// flags, TTI overrides and user specified parameters.
168  Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, int OptLevel,
169  Optional<unsigned> UserThreshold, Optional<unsigned> UserCount,
170  Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
171  Optional<bool> UserUpperBound, Optional<bool> UserAllowPeeling) {
173 
174  // Set up the defaults
175  UP.Threshold = OptLevel > 2 ? 300 : 150;
176  UP.MaxPercentThresholdBoost = 400;
177  UP.OptSizeThreshold = 0;
178  UP.PartialThreshold = 150;
180  UP.Count = 0;
181  UP.PeelCount = 0;
185  UP.BEInsns = 2;
186  UP.Partial = false;
187  UP.Runtime = false;
188  UP.AllowRemainder = true;
189  UP.UnrollRemainder = false;
190  UP.AllowExpensiveTripCount = false;
191  UP.Force = false;
192  UP.UpperBound = false;
193  UP.AllowPeeling = true;
194 
195  // Override with any target specific settings
196  TTI.getUnrollingPreferences(L, SE, UP);
197 
198  // Apply size attributes
199  if (L->getHeader()->getParent()->optForSize()) {
200  UP.Threshold = UP.OptSizeThreshold;
202  }
203 
204  // Apply any user values specified by cl::opt
205  if (UnrollThreshold.getNumOccurrences() > 0)
207  if (UnrollPartialThreshold.getNumOccurrences() > 0)
209  if (UnrollMaxPercentThresholdBoost.getNumOccurrences() > 0)
211  if (UnrollMaxCount.getNumOccurrences() > 0)
213  if (UnrollFullMaxCount.getNumOccurrences() > 0)
215  if (UnrollPeelCount.getNumOccurrences() > 0)
217  if (UnrollAllowPartial.getNumOccurrences() > 0)
219  if (UnrollAllowRemainder.getNumOccurrences() > 0)
221  if (UnrollRuntime.getNumOccurrences() > 0)
222  UP.Runtime = UnrollRuntime;
223  if (UnrollMaxUpperBound == 0)
224  UP.UpperBound = false;
225  if (UnrollAllowPeeling.getNumOccurrences() > 0)
227  if (UnrollUnrollRemainder.getNumOccurrences() > 0)
229 
230  // Apply user values provided by argument
231  if (UserThreshold.hasValue()) {
232  UP.Threshold = *UserThreshold;
233  UP.PartialThreshold = *UserThreshold;
234  }
235  if (UserCount.hasValue())
236  UP.Count = *UserCount;
237  if (UserAllowPartial.hasValue())
238  UP.Partial = *UserAllowPartial;
239  if (UserRuntime.hasValue())
240  UP.Runtime = *UserRuntime;
241  if (UserUpperBound.hasValue())
242  UP.UpperBound = *UserUpperBound;
243  if (UserAllowPeeling.hasValue())
244  UP.AllowPeeling = *UserAllowPeeling;
245 
246  return UP;
247 }
248 
249 namespace {
250 
251 /// A struct to densely store the state of an instruction after unrolling at
252 /// each iteration.
253 ///
254 /// This is designed to work like a tuple of <Instruction *, int> for the
255 /// purposes of hashing and lookup, but to be able to associate two boolean
256 /// states with each key.
257 struct UnrolledInstState {
258  Instruction *I;
259  int Iteration : 30;
260  unsigned IsFree : 1;
261  unsigned IsCounted : 1;
262 };
263 
264 /// Hashing and equality testing for a set of the instruction states.
265 struct UnrolledInstStateKeyInfo {
266  using PtrInfo = DenseMapInfo<Instruction *>;
268 
269  static inline UnrolledInstState getEmptyKey() {
270  return {PtrInfo::getEmptyKey(), 0, 0, 0};
271  }
272 
273  static inline UnrolledInstState getTombstoneKey() {
274  return {PtrInfo::getTombstoneKey(), 0, 0, 0};
275  }
276 
277  static inline unsigned getHashValue(const UnrolledInstState &S) {
278  return PairInfo::getHashValue({S.I, S.Iteration});
279  }
280 
281  static inline bool isEqual(const UnrolledInstState &LHS,
282  const UnrolledInstState &RHS) {
283  return PairInfo::isEqual({LHS.I, LHS.Iteration}, {RHS.I, RHS.Iteration});
284  }
285 };
286 
287 struct EstimatedUnrollCost {
288  /// \brief The estimated cost after unrolling.
289  unsigned UnrolledCost;
290 
291  /// \brief The estimated dynamic cost of executing the instructions in the
292  /// rolled form.
293  unsigned RolledDynamicCost;
294 };
295 
296 } // end anonymous namespace
297 
298 /// \brief Figure out if the loop is worth full unrolling.
299 ///
300 /// Complete loop unrolling can make some loads constant, and we need to know
301 /// if that would expose any further optimization opportunities. This routine
302 /// estimates this optimization. It computes cost of unrolled loop
303 /// (UnrolledCost) and dynamic cost of the original loop (RolledDynamicCost). By
304 /// dynamic cost we mean that we won't count costs of blocks that are known not
305 /// to be executed (i.e. if we have a branch in the loop and we know that at the
306 /// given iteration its condition would be resolved to true, we won't add up the
307 /// cost of the 'false'-block).
308 /// \returns Optional value, holding the RolledDynamicCost and UnrolledCost. If
309 /// the analysis failed (no benefits expected from the unrolling, or the loop is
310 /// too big to analyze), the returned value is None.
312 analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,
313  ScalarEvolution &SE, const TargetTransformInfo &TTI,
314  unsigned MaxUnrolledLoopSize) {
315  // We want to be able to scale offsets by the trip count and add more offsets
316  // to them without checking for overflows, and we already don't want to
317  // analyze *massive* trip counts, so we force the max to be reasonably small.
319  (unsigned)(std::numeric_limits<int>::max() / 2) &&
320  "The unroll iterations max is too large!");
321 
322  // Only analyze inner loops. We can't properly estimate cost of nested loops
323  // and we won't visit inner loops again anyway.
324  if (!L->empty())
325  return None;
326 
327  // Don't simulate loops with a big or unknown tripcount
328  if (!UnrollMaxIterationsCountToAnalyze || !TripCount ||
330  return None;
331 
334  DenseMap<Value *, Constant *> SimplifiedValues;
335  SmallVector<std::pair<Value *, Constant *>, 4> SimplifiedInputValues;
336 
337  // The estimated cost of the unrolled form of the loop. We try to estimate
338  // this by simplifying as much as we can while computing the estimate.
339  unsigned UnrolledCost = 0;
340 
341  // We also track the estimated dynamic (that is, actually executed) cost in
342  // the rolled form. This helps identify cases when the savings from unrolling
343  // aren't just exposing dead control flows, but actual reduced dynamic
344  // instructions due to the simplifications which we expect to occur after
345  // unrolling.
346  unsigned RolledDynamicCost = 0;
347 
348  // We track the simplification of each instruction in each iteration. We use
349  // this to recursively merge costs into the unrolled cost on-demand so that
350  // we don't count the cost of any dead code. This is essentially a map from
351  // <instruction, int> to <bool, bool>, but stored as a densely packed struct.
353 
354  // A small worklist used to accumulate cost of instructions from each
355  // observable and reached root in the loop.
356  SmallVector<Instruction *, 16> CostWorklist;
357 
358  // PHI-used worklist used between iterations while accumulating cost.
359  SmallVector<Instruction *, 4> PHIUsedList;
360 
361  // Helper function to accumulate cost for instructions in the loop.
362  auto AddCostRecursively = [&](Instruction &RootI, int Iteration) {
363  assert(Iteration >= 0 && "Cannot have a negative iteration!");
364  assert(CostWorklist.empty() && "Must start with an empty cost list");
365  assert(PHIUsedList.empty() && "Must start with an empty phi used list");
366  CostWorklist.push_back(&RootI);
367  for (;; --Iteration) {
368  do {
369  Instruction *I = CostWorklist.pop_back_val();
370 
371  // InstCostMap only uses I and Iteration as a key, the other two values
372  // don't matter here.
373  auto CostIter = InstCostMap.find({I, Iteration, 0, 0});
374  if (CostIter == InstCostMap.end())
375  // If an input to a PHI node comes from a dead path through the loop
376  // we may have no cost data for it here. What that actually means is
377  // that it is free.
378  continue;
379  auto &Cost = *CostIter;
380  if (Cost.IsCounted)
381  // Already counted this instruction.
382  continue;
383 
384  // Mark that we are counting the cost of this instruction now.
385  Cost.IsCounted = true;
386 
387  // If this is a PHI node in the loop header, just add it to the PHI set.
388  if (auto *PhiI = dyn_cast<PHINode>(I))
389  if (PhiI->getParent() == L->getHeader()) {
390  assert(Cost.IsFree && "Loop PHIs shouldn't be evaluated as they "
391  "inherently simplify during unrolling.");
392  if (Iteration == 0)
393  continue;
394 
395  // Push the incoming value from the backedge into the PHI used list
396  // if it is an in-loop instruction. We'll use this to populate the
397  // cost worklist for the next iteration (as we count backwards).
398  if (auto *OpI = dyn_cast<Instruction>(
399  PhiI->getIncomingValueForBlock(L->getLoopLatch())))
400  if (L->contains(OpI))
401  PHIUsedList.push_back(OpI);
402  continue;
403  }
404 
405  // First accumulate the cost of this instruction.
406  if (!Cost.IsFree) {
407  UnrolledCost += TTI.getUserCost(I);
408  DEBUG(dbgs() << "Adding cost of instruction (iteration " << Iteration
409  << "): ");
410  DEBUG(I->dump());
411  }
412 
413  // We must count the cost of every operand which is not free,
414  // recursively. If we reach a loop PHI node, simply add it to the set
415  // to be considered on the next iteration (backwards!).
416  for (Value *Op : I->operands()) {
417  // Check whether this operand is free due to being a constant or
418  // outside the loop.
419  auto *OpI = dyn_cast<Instruction>(Op);
420  if (!OpI || !L->contains(OpI))
421  continue;
422 
423  // Otherwise accumulate its cost.
424  CostWorklist.push_back(OpI);
425  }
426  } while (!CostWorklist.empty());
427 
428  if (PHIUsedList.empty())
429  // We've exhausted the search.
430  break;
431 
432  assert(Iteration > 0 &&
433  "Cannot track PHI-used values past the first iteration!");
434  CostWorklist.append(PHIUsedList.begin(), PHIUsedList.end());
435  PHIUsedList.clear();
436  }
437  };
438 
439  // Ensure that we don't violate the loop structure invariants relied on by
440  // this analysis.
441  assert(L->isLoopSimplifyForm() && "Must put loop into normal form first.");
442  assert(L->isLCSSAForm(DT) &&
443  "Must have loops in LCSSA form to track live-out values.");
444 
445  DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n");
446 
447  // Simulate execution of each iteration of the loop counting instructions,
448  // which would be simplified.
449  // Since the same load will take different values on different iterations,
450  // we literally have to go through all loop's iterations.
451  for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) {
452  DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n");
453 
454  // Prepare for the iteration by collecting any simplified entry or backedge
455  // inputs.
456  for (Instruction &I : *L->getHeader()) {
457  auto *PHI = dyn_cast<PHINode>(&I);
458  if (!PHI)
459  break;
460 
461  // The loop header PHI nodes must have exactly two input: one from the
462  // loop preheader and one from the loop latch.
463  assert(
464  PHI->getNumIncomingValues() == 2 &&
465  "Must have an incoming value only for the preheader and the latch.");
466 
467  Value *V = PHI->getIncomingValueForBlock(
468  Iteration == 0 ? L->getLoopPreheader() : L->getLoopLatch());
469  Constant *C = dyn_cast<Constant>(V);
470  if (Iteration != 0 && !C)
471  C = SimplifiedValues.lookup(V);
472  if (C)
473  SimplifiedInputValues.push_back({PHI, C});
474  }
475 
476  // Now clear and re-populate the map for the next iteration.
477  SimplifiedValues.clear();
478  while (!SimplifiedInputValues.empty())
479  SimplifiedValues.insert(SimplifiedInputValues.pop_back_val());
480 
481  UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SE, L);
482 
483  BBWorklist.clear();
484  BBWorklist.insert(L->getHeader());
485  // Note that we *must not* cache the size, this loop grows the worklist.
486  for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
487  BasicBlock *BB = BBWorklist[Idx];
488 
489  // Visit all instructions in the given basic block and try to simplify
490  // it. We don't change the actual IR, just count optimization
491  // opportunities.
492  for (Instruction &I : *BB) {
493  if (isa<DbgInfoIntrinsic>(I))
494  continue;
495 
496  // Track this instruction's expected baseline cost when executing the
497  // rolled loop form.
498  RolledDynamicCost += TTI.getUserCost(&I);
499 
500  // Visit the instruction to analyze its loop cost after unrolling,
501  // and if the visitor returns true, mark the instruction as free after
502  // unrolling and continue.
503  bool IsFree = Analyzer.visit(I);
504  bool Inserted = InstCostMap.insert({&I, (int)Iteration,
505  (unsigned)IsFree,
506  /*IsCounted*/ false}).second;
507  (void)Inserted;
508  assert(Inserted && "Cannot have a state for an unvisited instruction!");
509 
510  if (IsFree)
511  continue;
512 
513  // Can't properly model a cost of a call.
514  // FIXME: With a proper cost model we should be able to do it.
515  if(isa<CallInst>(&I))
516  return None;
517 
518  // If the instruction might have a side-effect recursively account for
519  // the cost of it and all the instructions leading up to it.
520  if (I.mayHaveSideEffects())
521  AddCostRecursively(I, Iteration);
522 
523  // If unrolled body turns out to be too big, bail out.
524  if (UnrolledCost > MaxUnrolledLoopSize) {
525  DEBUG(dbgs() << " Exceeded threshold.. exiting.\n"
526  << " UnrolledCost: " << UnrolledCost
527  << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize
528  << "\n");
529  return None;
530  }
531  }
532 
533  TerminatorInst *TI = BB->getTerminator();
534 
535  // Add in the live successors by first checking whether we have terminator
536  // that may be simplified based on the values simplified by this call.
537  BasicBlock *KnownSucc = nullptr;
538  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
539  if (BI->isConditional()) {
540  if (Constant *SimpleCond =
541  SimplifiedValues.lookup(BI->getCondition())) {
542  // Just take the first successor if condition is undef
543  if (isa<UndefValue>(SimpleCond))
544  KnownSucc = BI->getSuccessor(0);
545  else if (ConstantInt *SimpleCondVal =
546  dyn_cast<ConstantInt>(SimpleCond))
547  KnownSucc = BI->getSuccessor(SimpleCondVal->isZero() ? 1 : 0);
548  }
549  }
550  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
551  if (Constant *SimpleCond =
552  SimplifiedValues.lookup(SI->getCondition())) {
553  // Just take the first successor if condition is undef
554  if (isa<UndefValue>(SimpleCond))
555  KnownSucc = SI->getSuccessor(0);
556  else if (ConstantInt *SimpleCondVal =
557  dyn_cast<ConstantInt>(SimpleCond))
558  KnownSucc = SI->findCaseValue(SimpleCondVal)->getCaseSuccessor();
559  }
560  }
561  if (KnownSucc) {
562  if (L->contains(KnownSucc))
563  BBWorklist.insert(KnownSucc);
564  else
565  ExitWorklist.insert({BB, KnownSucc});
566  continue;
567  }
568 
569  // Add BB's successors to the worklist.
570  for (BasicBlock *Succ : successors(BB))
571  if (L->contains(Succ))
572  BBWorklist.insert(Succ);
573  else
574  ExitWorklist.insert({BB, Succ});
575  AddCostRecursively(*TI, Iteration);
576  }
577 
578  // If we found no optimization opportunities on the first iteration, we
579  // won't find them on later ones too.
580  if (UnrolledCost == RolledDynamicCost) {
581  DEBUG(dbgs() << " No opportunities found.. exiting.\n"
582  << " UnrolledCost: " << UnrolledCost << "\n");
583  return None;
584  }
585  }
586 
587  while (!ExitWorklist.empty()) {
588  BasicBlock *ExitingBB, *ExitBB;
589  std::tie(ExitingBB, ExitBB) = ExitWorklist.pop_back_val();
590 
591  for (Instruction &I : *ExitBB) {
592  auto *PN = dyn_cast<PHINode>(&I);
593  if (!PN)
594  break;
595 
596  Value *Op = PN->getIncomingValueForBlock(ExitingBB);
597  if (auto *OpI = dyn_cast<Instruction>(Op))
598  if (L->contains(OpI))
599  AddCostRecursively(*OpI, TripCount - 1);
600  }
601  }
602 
603  DEBUG(dbgs() << "Analysis finished:\n"
604  << "UnrolledCost: " << UnrolledCost << ", "
605  << "RolledDynamicCost: " << RolledDynamicCost << "\n");
606  return {{UnrolledCost, RolledDynamicCost}};
607 }
608 
609 /// ApproximateLoopSize - Approximate the size of the loop.
610 static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
611  bool &NotDuplicatable, bool &Convergent,
612  const TargetTransformInfo &TTI,
613  AssumptionCache *AC, unsigned BEInsns) {
615  CodeMetrics::collectEphemeralValues(L, AC, EphValues);
616 
618  for (BasicBlock *BB : L->blocks())
619  Metrics.analyzeBasicBlock(BB, TTI, EphValues);
620  NumCalls = Metrics.NumInlineCandidates;
621  NotDuplicatable = Metrics.notDuplicatable;
622  Convergent = Metrics.convergent;
623 
624  unsigned LoopSize = Metrics.NumInsts;
625 
626  // Don't allow an estimate of size zero. This would allows unrolling of loops
627  // with huge iteration counts, which is a compile time problem even if it's
628  // not a problem for code quality. Also, the code using this size may assume
629  // that each loop has at least three instructions (likely a conditional
630  // branch, a comparison feeding that branch, and some kind of loop increment
631  // feeding that comparison instruction).
632  LoopSize = std::max(LoopSize, BEInsns + 1);
633 
634  return LoopSize;
635 }
636 
637 // Returns the loop hint metadata node with the given name (for example,
638 // "llvm.loop.unroll.count"). If no such metadata node exists, then nullptr is
639 // returned.
641  if (MDNode *LoopID = L->getLoopID())
642  return GetUnrollMetadata(LoopID, Name);
643  return nullptr;
644 }
645 
646 // Returns true if the loop has an unroll(full) pragma.
647 static bool HasUnrollFullPragma(const Loop *L) {
648  return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.full");
649 }
650 
651 // Returns true if the loop has an unroll(enable) pragma. This metadata is used
652 // for both "#pragma unroll" and "#pragma clang loop unroll(enable)" directives.
653 static bool HasUnrollEnablePragma(const Loop *L) {
654  return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.enable");
655 }
656 
657 // Returns true if the loop has an unroll(disable) pragma.
658 static bool HasUnrollDisablePragma(const Loop *L) {
659  return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.disable");
660 }
661 
662 // Returns true if the loop has an runtime unroll(disable) pragma.
663 static bool HasRuntimeUnrollDisablePragma(const Loop *L) {
664  return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.runtime.disable");
665 }
666 
667 // If loop has an unroll_count pragma return the (necessarily
668 // positive) value from the pragma. Otherwise return 0.
669 static unsigned UnrollCountPragmaValue(const Loop *L) {
670  MDNode *MD = GetUnrollMetadataForLoop(L, "llvm.loop.unroll.count");
671  if (MD) {
672  assert(MD->getNumOperands() == 2 &&
673  "Unroll count hint metadata should have two operands.");
674  unsigned Count =
675  mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
676  assert(Count >= 1 && "Unroll count must be positive.");
677  return Count;
678  }
679  return 0;
680 }
681 
682 // Computes the boosting factor for complete unrolling.
683 // If fully unrolling the loop would save a lot of RolledDynamicCost, it would
684 // be beneficial to fully unroll the loop even if unrolledcost is large. We
685 // use (RolledDynamicCost / UnrolledCost) to model the unroll benefits to adjust
686 // the unroll threshold.
687 static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost,
688  unsigned MaxPercentThresholdBoost) {
689  if (Cost.RolledDynamicCost >= std::numeric_limits<unsigned>::max() / 100)
690  return 100;
691  else if (Cost.UnrolledCost != 0)
692  // The boosting factor is RolledDynamicCost / UnrolledCost
693  return std::min(100 * Cost.RolledDynamicCost / Cost.UnrolledCost,
694  MaxPercentThresholdBoost);
695  else
696  return MaxPercentThresholdBoost;
697 }
698 
699 // Returns loop size estimation for unrolled loop.
700 static uint64_t getUnrolledLoopSize(
701  unsigned LoopSize,
703  assert(LoopSize >= UP.BEInsns && "LoopSize should not be less than BEInsns!");
704  return (uint64_t)(LoopSize - UP.BEInsns) * UP.Count + UP.BEInsns;
705 }
706 
707 // Returns true if unroll count was set explicitly.
708 // Calculates unroll count and writes it to UP.Count.
709 static bool computeUnrollCount(
710  Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI,
711  ScalarEvolution &SE, OptimizationRemarkEmitter *ORE, unsigned &TripCount,
712  unsigned MaxTripCount, unsigned &TripMultiple, unsigned LoopSize,
713  TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound) {
714  // Check for explicit Count.
715  // 1st priority is unroll count set by "unroll-count" option.
716  bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
717  if (UserUnrollCount) {
718  UP.Count = UnrollCount;
719  UP.AllowExpensiveTripCount = true;
720  UP.Force = true;
721  if (UP.AllowRemainder && getUnrolledLoopSize(LoopSize, UP) < UP.Threshold)
722  return true;
723  }
724 
725  // 2nd priority is unroll count set by pragma.
726  unsigned PragmaCount = UnrollCountPragmaValue(L);
727  if (PragmaCount > 0) {
728  UP.Count = PragmaCount;
729  UP.Runtime = true;
730  UP.AllowExpensiveTripCount = true;
731  UP.Force = true;
732  if (UP.AllowRemainder &&
734  return true;
735  }
736  bool PragmaFullUnroll = HasUnrollFullPragma(L);
737  if (PragmaFullUnroll && TripCount != 0) {
738  UP.Count = TripCount;
739  if (getUnrolledLoopSize(LoopSize, UP) < PragmaUnrollThreshold)
740  return false;
741  }
742 
743  bool PragmaEnableUnroll = HasUnrollEnablePragma(L);
744  bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
745  PragmaEnableUnroll || UserUnrollCount;
746 
747  if (ExplicitUnroll && TripCount != 0) {
748  // If the loop has an unrolling pragma, we want to be more aggressive with
749  // unrolling limits. Set thresholds to at least the PragmaThreshold value
750  // which is larger than the default limits.
751  UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold);
752  UP.PartialThreshold =
753  std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold);
754  }
755 
756  // 3rd priority is full unroll count.
757  // Full unroll makes sense only when TripCount or its upper bound could be
758  // statically calculated.
759  // Also we need to check if we exceed FullUnrollMaxCount.
760  // If using the upper bound to unroll, TripMultiple should be set to 1 because
761  // we do not know when loop may exit.
762  // MaxTripCount and ExactTripCount cannot both be non zero since we only
763  // compute the former when the latter is zero.
764  unsigned ExactTripCount = TripCount;
765  assert((ExactTripCount == 0 || MaxTripCount == 0) &&
766  "ExtractTripCound and MaxTripCount cannot both be non zero.");
767  unsigned FullUnrollTripCount = ExactTripCount ? ExactTripCount : MaxTripCount;
768  UP.Count = FullUnrollTripCount;
769  if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) {
770  // When computing the unrolled size, note that BEInsns are not replicated
771  // like the rest of the loop body.
772  if (getUnrolledLoopSize(LoopSize, UP) < UP.Threshold) {
773  UseUpperBound = (MaxTripCount == FullUnrollTripCount);
774  TripCount = FullUnrollTripCount;
775  TripMultiple = UP.UpperBound ? 1 : TripMultiple;
776  return ExplicitUnroll;
777  } else {
778  // The loop isn't that small, but we still can fully unroll it if that
779  // helps to remove a significant number of instructions.
780  // To check that, run additional analysis on the loop.
782  L, FullUnrollTripCount, DT, SE, TTI,
783  UP.Threshold * UP.MaxPercentThresholdBoost / 100)) {
784  unsigned Boost =
786  if (Cost->UnrolledCost < UP.Threshold * Boost / 100) {
787  UseUpperBound = (MaxTripCount == FullUnrollTripCount);
788  TripCount = FullUnrollTripCount;
789  TripMultiple = UP.UpperBound ? 1 : TripMultiple;
790  return ExplicitUnroll;
791  }
792  }
793  }
794  }
795 
796  // 4th priority is loop peeling
797  computePeelCount(L, LoopSize, UP, TripCount);
798  if (UP.PeelCount) {
799  UP.Runtime = false;
800  UP.Count = 1;
801  return ExplicitUnroll;
802  }
803 
804  // 5th priority is partial unrolling.
805  // Try partial unroll only when TripCount could be staticaly calculated.
806  if (TripCount) {
807  UP.Partial |= ExplicitUnroll;
808  if (!UP.Partial) {
809  DEBUG(dbgs() << " will not try to unroll partially because "
810  << "-unroll-allow-partial not given\n");
811  UP.Count = 0;
812  return false;
813  }
814  if (UP.Count == 0)
815  UP.Count = TripCount;
816  if (UP.PartialThreshold != NoThreshold) {
817  // Reduce unroll count to be modulo of TripCount for partial unrolling.
818  if (getUnrolledLoopSize(LoopSize, UP) > UP.PartialThreshold)
819  UP.Count =
820  (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) /
821  (LoopSize - UP.BEInsns);
822  if (UP.Count > UP.MaxCount)
823  UP.Count = UP.MaxCount;
824  while (UP.Count != 0 && TripCount % UP.Count != 0)
825  UP.Count--;
826  if (UP.AllowRemainder && UP.Count <= 1) {
827  // If there is no Count that is modulo of TripCount, set Count to
828  // largest power-of-two factor that satisfies the threshold limit.
829  // As we'll create fixup loop, do the type of unrolling only if
830  // remainder loop is allowed.
832  while (UP.Count != 0 &&
833  getUnrolledLoopSize(LoopSize, UP) > UP.PartialThreshold)
834  UP.Count >>= 1;
835  }
836  if (UP.Count < 2) {
837  if (PragmaEnableUnroll)
838  ORE->emit([&]() {
840  "UnrollAsDirectedTooLarge",
841  L->getStartLoc(), L->getHeader())
842  << "Unable to unroll loop as directed by unroll(enable) "
843  "pragma "
844  "because unrolled size is too large.";
845  });
846  UP.Count = 0;
847  }
848  } else {
849  UP.Count = TripCount;
850  }
851  if (UP.Count > UP.MaxCount)
852  UP.Count = UP.MaxCount;
853  if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
854  UP.Count != TripCount)
855  ORE->emit([&]() {
856  return OptimizationRemarkMissed(DEBUG_TYPE,
857  "FullUnrollAsDirectedTooLarge",
858  L->getStartLoc(), L->getHeader())
859  << "Unable to fully unroll loop as directed by unroll pragma "
860  "because "
861  "unrolled size is too large.";
862  });
863  return ExplicitUnroll;
864  }
865  assert(TripCount == 0 &&
866  "All cases when TripCount is constant should be covered here.");
867  if (PragmaFullUnroll)
868  ORE->emit([&]() {
870  DEBUG_TYPE, "CantFullUnrollAsDirectedRuntimeTripCount",
871  L->getStartLoc(), L->getHeader())
872  << "Unable to fully unroll loop as directed by unroll(full) "
873  "pragma "
874  "because loop has a runtime trip count.";
875  });
876 
877  // 6th priority is runtime unrolling.
878  // Don't unroll a runtime trip count loop when it is disabled.
880  UP.Count = 0;
881  return false;
882  }
883 
884  // Check if the runtime trip count is too small when profile is available.
885  if (L->getHeader()->getParent()->getEntryCount()) {
886  if (auto ProfileTripCount = getLoopEstimatedTripCount(L)) {
887  if (*ProfileTripCount < FlatLoopTripCountThreshold)
888  return false;
889  else
890  UP.AllowExpensiveTripCount = true;
891  }
892  }
893 
894  // Reduce count based on the type of unrolling and the threshold values.
895  UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount;
896  if (!UP.Runtime) {
897  DEBUG(dbgs() << " will not try to unroll loop with runtime trip count "
898  << "-unroll-runtime not given\n");
899  UP.Count = 0;
900  return false;
901  }
902  if (UP.Count == 0)
904 
905  // Reduce unroll count to be the largest power-of-two factor of
906  // the original count which satisfies the threshold limit.
907  while (UP.Count != 0 &&
908  getUnrolledLoopSize(LoopSize, UP) > UP.PartialThreshold)
909  UP.Count >>= 1;
910 
911 #ifndef NDEBUG
912  unsigned OrigCount = UP.Count;
913 #endif
914 
915  if (!UP.AllowRemainder && UP.Count != 0 && (TripMultiple % UP.Count) != 0) {
916  while (UP.Count != 0 && TripMultiple % UP.Count != 0)
917  UP.Count >>= 1;
918  DEBUG(dbgs() << "Remainder loop is restricted (that could architecture "
919  "specific or because the loop contains a convergent "
920  "instruction), so unroll count must divide the trip "
921  "multiple, "
922  << TripMultiple << ". Reducing unroll count from "
923  << OrigCount << " to " << UP.Count << ".\n");
924 
925  using namespace ore;
926 
927  if (PragmaCount > 0 && !UP.AllowRemainder)
928  ORE->emit([&]() {
930  "DifferentUnrollCountFromDirected",
931  L->getStartLoc(), L->getHeader())
932  << "Unable to unroll loop the number of times directed by "
933  "unroll_count pragma because remainder loop is restricted "
934  "(that could architecture specific or because the loop "
935  "contains a convergent instruction) and so must have an "
936  "unroll "
937  "count that divides the loop trip multiple of "
938  << NV("TripMultiple", TripMultiple) << ". Unrolling instead "
939  << NV("UnrollCount", UP.Count) << " time(s).";
940  });
941  }
942 
943  if (UP.Count > UP.MaxCount)
944  UP.Count = UP.MaxCount;
945  DEBUG(dbgs() << " partially unrolling with count: " << UP.Count << "\n");
946  if (UP.Count < 2)
947  UP.Count = 0;
948  return ExplicitUnroll;
949 }
950 
952  Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
953  const TargetTransformInfo &TTI, AssumptionCache &AC,
954  OptimizationRemarkEmitter &ORE, bool PreserveLCSSA, int OptLevel,
955  Optional<unsigned> ProvidedCount, Optional<unsigned> ProvidedThreshold,
956  Optional<bool> ProvidedAllowPartial, Optional<bool> ProvidedRuntime,
957  Optional<bool> ProvidedUpperBound, Optional<bool> ProvidedAllowPeeling) {
958  DEBUG(dbgs() << "Loop Unroll: F[" << L->getHeader()->getParent()->getName()
959  << "] Loop %" << L->getHeader()->getName() << "\n");
960  if (HasUnrollDisablePragma(L))
962  if (!L->isLoopSimplifyForm()) {
963  DEBUG(
964  dbgs() << " Not unrolling loop which is not in loop-simplify form.\n");
966  }
967 
968  unsigned NumInlineCandidates;
969  bool NotDuplicatable;
970  bool Convergent;
972  L, SE, TTI, OptLevel, ProvidedThreshold, ProvidedCount,
973  ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
974  ProvidedAllowPeeling);
975  // Exit early if unrolling is disabled.
976  if (UP.Threshold == 0 && (!UP.Partial || UP.PartialThreshold == 0))
978  unsigned LoopSize = ApproximateLoopSize(
979  L, NumInlineCandidates, NotDuplicatable, Convergent, TTI, &AC, UP.BEInsns);
980  DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
981  if (NotDuplicatable) {
982  DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable"
983  << " instructions.\n");
985  }
986  if (NumInlineCandidates != 0) {
987  DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
989  }
990 
991  // Find trip count and trip multiple if count is not available
992  unsigned TripCount = 0;
993  unsigned MaxTripCount = 0;
994  unsigned TripMultiple = 1;
995  // If there are multiple exiting blocks but one of them is the latch, use the
996  // latch for the trip count estimation. Otherwise insist on a single exiting
997  // block for the trip count estimation.
998  BasicBlock *ExitingBlock = L->getLoopLatch();
999  if (!ExitingBlock || !L->isLoopExiting(ExitingBlock))
1000  ExitingBlock = L->getExitingBlock();
1001  if (ExitingBlock) {
1002  TripCount = SE.getSmallConstantTripCount(L, ExitingBlock);
1003  TripMultiple = SE.getSmallConstantTripMultiple(L, ExitingBlock);
1004  }
1005 
1006  // If the loop contains a convergent operation, the prelude we'd add
1007  // to do the first few instructions before we hit the unrolled loop
1008  // is unsafe -- it adds a control-flow dependency to the convergent
1009  // operation. Therefore restrict remainder loop (try unrollig without).
1010  //
1011  // TODO: This is quite conservative. In practice, convergent_op()
1012  // is likely to be called unconditionally in the loop. In this
1013  // case, the program would be ill-formed (on most architectures)
1014  // unless n were the same on all threads in a thread group.
1015  // Assuming n is the same on all threads, any kind of unrolling is
1016  // safe. But currently llvm's notion of convergence isn't powerful
1017  // enough to express this.
1018  if (Convergent)
1019  UP.AllowRemainder = false;
1020 
1021  // Try to find the trip count upper bound if we cannot find the exact trip
1022  // count.
1023  bool MaxOrZero = false;
1024  if (!TripCount) {
1025  MaxTripCount = SE.getSmallConstantMaxTripCount(L);
1026  MaxOrZero = SE.isBackedgeTakenCountMaxOrZero(L);
1027  // We can unroll by the upper bound amount if it's generally allowed or if
1028  // we know that the loop is executed either the upper bound or zero times.
1029  // (MaxOrZero unrolling keeps only the first loop test, so the number of
1030  // loop tests remains the same compared to the non-unrolled version, whereas
1031  // the generic upper bound unrolling keeps all but the last loop test so the
1032  // number of loop tests goes up which may end up being worse on targets with
1033  // constriained branch predictor resources so is controlled by an option.)
1034  // In addition we only unroll small upper bounds.
1035  if (!(UP.UpperBound || MaxOrZero) || MaxTripCount > UnrollMaxUpperBound) {
1036  MaxTripCount = 0;
1037  }
1038  }
1039 
1040  // computeUnrollCount() decides whether it is beneficial to use upper bound to
1041  // fully unroll the loop.
1042  bool UseUpperBound = false;
1043  bool IsCountSetExplicitly =
1044  computeUnrollCount(L, TTI, DT, LI, SE, &ORE, TripCount, MaxTripCount,
1045  TripMultiple, LoopSize, UP, UseUpperBound);
1046  if (!UP.Count)
1048  // Unroll factor (Count) must be less or equal to TripCount.
1049  if (TripCount && UP.Count > TripCount)
1050  UP.Count = TripCount;
1051 
1052  // Unroll the loop.
1053  LoopUnrollResult UnrollResult = UnrollLoop(
1054  L, UP.Count, TripCount, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount,
1055  UseUpperBound, MaxOrZero, TripMultiple, UP.PeelCount, UP.UnrollRemainder,
1056  LI, &SE, &DT, &AC, &ORE, PreserveLCSSA);
1057  if (UnrollResult == LoopUnrollResult::Unmodified)
1059 
1060  // If loop has an unroll count pragma or unrolled by explicitly set count
1061  // mark loop as unrolled to prevent unrolling beyond that requested.
1062  // If the loop was peeled, we already "used up" the profile information
1063  // we had, so we don't want to unroll or peel again.
1064  if (UnrollResult != LoopUnrollResult::FullyUnrolled &&
1065  (IsCountSetExplicitly || UP.PeelCount))
1067 
1068  return UnrollResult;
1069 }
1070 
1071 namespace {
1072 
1073 class LoopUnroll : public LoopPass {
1074 public:
1075  static char ID; // Pass ID, replacement for typeid
1076 
1077  int OptLevel;
1078  Optional<unsigned> ProvidedCount;
1079  Optional<unsigned> ProvidedThreshold;
1080  Optional<bool> ProvidedAllowPartial;
1081  Optional<bool> ProvidedRuntime;
1082  Optional<bool> ProvidedUpperBound;
1083  Optional<bool> ProvidedAllowPeeling;
1084 
1085  LoopUnroll(int OptLevel = 2, Optional<unsigned> Threshold = None,
1086  Optional<unsigned> Count = None,
1087  Optional<bool> AllowPartial = None, Optional<bool> Runtime = None,
1088  Optional<bool> UpperBound = None,
1089  Optional<bool> AllowPeeling = None)
1090  : LoopPass(ID), OptLevel(OptLevel), ProvidedCount(std::move(Count)),
1091  ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial),
1092  ProvidedRuntime(Runtime), ProvidedUpperBound(UpperBound),
1093  ProvidedAllowPeeling(AllowPeeling) {
1095  }
1096 
1097  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
1098  if (skipLoop(L))
1099  return false;
1100 
1101  Function &F = *L->getHeader()->getParent();
1102 
1103  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1104  LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1105  ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1106  const TargetTransformInfo &TTI =
1107  getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1108  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1109  // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
1110  // pass. Function analyses need to be preserved across loop transformations
1111  // but ORE cannot be preserved (see comment before the pass definition).
1112  OptimizationRemarkEmitter ORE(&F);
1113  bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
1114 
1116  L, DT, LI, SE, TTI, AC, ORE, PreserveLCSSA, OptLevel, ProvidedCount,
1117  ProvidedThreshold, ProvidedAllowPartial, ProvidedRuntime,
1118  ProvidedUpperBound, ProvidedAllowPeeling);
1119 
1120  if (Result == LoopUnrollResult::FullyUnrolled)
1121  LPM.markLoopAsDeleted(*L);
1122 
1123  return Result != LoopUnrollResult::Unmodified;
1124  }
1125 
1126  /// This transformation requires natural loop information & requires that
1127  /// loop preheaders be inserted into the CFG...
1128  void getAnalysisUsage(AnalysisUsage &AU) const override {
1131  // FIXME: Loop passes are required to preserve domtree, and for now we just
1132  // recreate dom info if anything gets unrolled.
1134  }
1135 };
1136 
1137 } // end anonymous namespace
1138 
1139 char LoopUnroll::ID = 0;
1140 
1141 INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
1145 INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
1146 
1147 Pass *llvm::createLoopUnrollPass(int OptLevel, int Threshold, int Count,
1148  int AllowPartial, int Runtime, int UpperBound,
1149  int AllowPeeling) {
1150  // TODO: It would make more sense for this function to take the optionals
1151  // directly, but that's dangerous since it would silently break out of tree
1152  // callers.
1153  return new LoopUnroll(
1154  OptLevel, Threshold == -1 ? None : Optional<unsigned>(Threshold),
1155  Count == -1 ? None : Optional<unsigned>(Count),
1156  AllowPartial == -1 ? None : Optional<bool>(AllowPartial),
1157  Runtime == -1 ? None : Optional<bool>(Runtime),
1158  UpperBound == -1 ? None : Optional<bool>(UpperBound),
1159  AllowPeeling == -1 ? None : Optional<bool>(AllowPeeling));
1160 }
1161 
1163  return createLoopUnrollPass(OptLevel, -1, -1, 0, 0, 0, 0);
1164 }
1165 
1168  LPMUpdater &Updater) {
1169  const auto &FAM =
1170  AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
1171  Function *F = L.getHeader()->getParent();
1172 
1173  auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F);
1174  // FIXME: This should probably be optional rather than required.
1175  if (!ORE)
1177  "LoopFullUnrollPass: OptimizationRemarkEmitterAnalysis not "
1178  "cached at a higher level");
1179 
1180  // Keep track of the previous loop structure so we can identify new loops
1181  // created by unrolling.
1182  Loop *ParentL = L.getParentLoop();
1183  SmallPtrSet<Loop *, 4> OldLoops;
1184  if (ParentL)
1185  OldLoops.insert(ParentL->begin(), ParentL->end());
1186  else
1187  OldLoops.insert(AR.LI.begin(), AR.LI.end());
1188 
1189  std::string LoopName = L.getName();
1190 
1191  bool Changed =
1192  tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, *ORE,
1193  /*PreserveLCSSA*/ true, OptLevel, /*Count*/ None,
1194  /*Threshold*/ None, /*AllowPartial*/ false,
1195  /*Runtime*/ false, /*UpperBound*/ false,
1196  /*AllowPeeling*/ false) != LoopUnrollResult::Unmodified;
1197  if (!Changed)
1198  return PreservedAnalyses::all();
1199 
1200  // The parent must not be damaged by unrolling!
1201 #ifndef NDEBUG
1202  if (ParentL)
1203  ParentL->verifyLoop();
1204 #endif
1205 
1206  // Unrolling can do several things to introduce new loops into a loop nest:
1207  // - Full unrolling clones child loops within the current loop but then
1208  // removes the current loop making all of the children appear to be new
1209  // sibling loops.
1210  //
1211  // When a new loop appears as a sibling loop after fully unrolling,
1212  // its nesting structure has fundamentally changed and we want to revisit
1213  // it to reflect that.
1214  //
1215  // When unrolling has removed the current loop, we need to tell the
1216  // infrastructure that it is gone.
1217  //
1218  // Finally, we support a debugging/testing mode where we revisit child loops
1219  // as well. These are not expected to require further optimizations as either
1220  // they or the loop they were cloned from have been directly visited already.
1221  // But the debugging mode allows us to check this assumption.
1222  bool IsCurrentLoopValid = false;
1223  SmallVector<Loop *, 4> SibLoops;
1224  if (ParentL)
1225  SibLoops.append(ParentL->begin(), ParentL->end());
1226  else
1227  SibLoops.append(AR.LI.begin(), AR.LI.end());
1228  erase_if(SibLoops, [&](Loop *SibLoop) {
1229  if (SibLoop == &L) {
1230  IsCurrentLoopValid = true;
1231  return true;
1232  }
1233 
1234  // Otherwise erase the loop from the list if it was in the old loops.
1235  return OldLoops.count(SibLoop) != 0;
1236  });
1237  Updater.addSiblingLoops(SibLoops);
1238 
1239  if (!IsCurrentLoopValid) {
1240  Updater.markLoopAsDeleted(L, LoopName);
1241  } else {
1242  // We can only walk child loops if the current loop remained valid.
1244  // Walk *all* of the child loops.
1245  SmallVector<Loop *, 4> ChildLoops(L.begin(), L.end());
1246  Updater.addChildLoops(ChildLoops);
1247  }
1248  }
1249 
1251 }
1252 
1253 template <typename RangeT>
1255  SmallVector<Loop *, 8> Worklist;
1256  // We use an internal worklist to build up the preorder traversal without
1257  // recursion.
1258  SmallVector<Loop *, 4> PreOrderLoops, PreOrderWorklist;
1259 
1260  for (Loop *RootL : Loops) {
1261  assert(PreOrderLoops.empty() && "Must start with an empty preorder walk.");
1262  assert(PreOrderWorklist.empty() &&
1263  "Must start with an empty preorder walk worklist.");
1264  PreOrderWorklist.push_back(RootL);
1265  do {
1266  Loop *L = PreOrderWorklist.pop_back_val();
1267  PreOrderWorklist.append(L->begin(), L->end());
1268  PreOrderLoops.push_back(L);
1269  } while (!PreOrderWorklist.empty());
1270 
1271  Worklist.append(PreOrderLoops.begin(), PreOrderLoops.end());
1272  PreOrderLoops.clear();
1273  }
1274  return Worklist;
1275 }
1276 
1279  auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
1280  auto &LI = AM.getResult<LoopAnalysis>(F);
1281  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1282  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1283  auto &AC = AM.getResult<AssumptionAnalysis>(F);
1285 
1286  LoopAnalysisManager *LAM = nullptr;
1287  if (auto *LAMProxy = AM.getCachedResult<LoopAnalysisManagerFunctionProxy>(F))
1288  LAM = &LAMProxy->getManager();
1289 
1290  const ModuleAnalysisManager &MAM =
1292  ProfileSummaryInfo *PSI =
1294 
1295  bool Changed = false;
1296 
1297  // The unroller requires loops to be in simplified form, and also needs LCSSA.
1298  // Since simplification may add new inner loops, it has to run before the
1299  // legality and profitability checks. This means running the loop unroller
1300  // will simplify all loops, regardless of whether anything end up being
1301  // unrolled.
1302  for (auto &L : LI) {
1303  Changed |= simplifyLoop(L, &DT, &LI, &SE, &AC, false /* PreserveLCSSA */);
1304  Changed |= formLCSSARecursively(*L, DT, &LI, &SE);
1305  }
1306 
1308 
1309  while (!Worklist.empty()) {
1310  // Because the LoopInfo stores the loops in RPO, we walk the worklist
1311  // from back to front so that we work forward across the CFG, which
1312  // for unrolling is only needed to get optimization remarks emitted in
1313  // a forward order.
1314  Loop &L = *Worklist.pop_back_val();
1315 #ifndef NDEBUG
1316  Loop *ParentL = L.getParentLoop();
1317 #endif
1318 
1319  // The API here is quite complex to call, but there are only two interesting
1320  // states we support: partial and full (or "simple") unrolling. However, to
1321  // enable these things we actually pass "None" in for the optional to avoid
1322  // providing an explicit choice.
1323  Optional<bool> AllowPartialParam, RuntimeParam, UpperBoundParam,
1324  AllowPeeling;
1325  // Check if the profile summary indicates that the profiled application
1326  // has a huge working set size, in which case we disable peeling to avoid
1327  // bloating it further.
1328  if (PSI && PSI->hasHugeWorkingSetSize())
1329  AllowPeeling = false;
1330  std::string LoopName = L.getName();
1331  LoopUnrollResult Result =
1332  tryToUnrollLoop(&L, DT, &LI, SE, TTI, AC, ORE,
1333  /*PreserveLCSSA*/ true, OptLevel, /*Count*/ None,
1334  /*Threshold*/ None, AllowPartialParam, RuntimeParam,
1335  UpperBoundParam, AllowPeeling);
1336  Changed |= Result != LoopUnrollResult::Unmodified;
1337 
1338  // The parent must not be damaged by unrolling!
1339 #ifndef NDEBUG
1340  if (Result != LoopUnrollResult::Unmodified && ParentL)
1341  ParentL->verifyLoop();
1342 #endif
1343 
1344  // Clear any cached analysis results for L if we removed it completely.
1345  if (LAM && Result == LoopUnrollResult::FullyUnrolled)
1346  LAM->clear(L, LoopName);
1347  }
1348 
1349  if (!Changed)
1350  return PreservedAnalyses::all();
1351 
1353 }
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:81
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value *> &EphValues)
Collect a loop&#39;s ephemeral values (those used only by an assume or similar intrinsics in the loop)...
Definition: CodeMetrics.cpp:73
uint64_t CallInst * C
unsigned getSmallConstantTripCount(const Loop *L)
Returns the maximum trip count of the loop if it is a single-exit loop and we can compute a small max...
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
Diagnostic information for missed-optimization remarks.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:157
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
unsigned getSmallConstantTripMultiple(const Loop *L)
Returns the largest constant divisor of the trip count of the loop if it is a single-exit loop and we...
DiagnosticInfoOptimizationBase::Argument NV
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:687
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:115
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:78
This header provides classes for managing a pipeline of passes over loops in LLVM IR...
bool convergent
True if this function contains a call to a convergent function.
Definition: CodeMetrics.h:57
static bool HasUnrollEnablePragma(const Loop *L)
bool isLCSSAForm(DominatorTree &DT) const
Return true if the Loop is in LCSSA form.
Definition: LoopInfo.cpp:175
Pass * createLoopUnrollPass(int OptLevel=2, int Threshold=-1, int Count=-1, int AllowPartial=-1, int Runtime=-1, int UpperBound=-1, int AllowPeeling=-1)
Implements a dense probed hash-table based set.
Definition: DenseSet.h:221
Analysis providing profile information.
The main scalar evolution driver.
#define DEBUG_TYPE
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold, but used for partial/runtime unrolling (set to UINT_MAX to disable).
static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost, unsigned MaxPercentThresholdBoost)
This file contains the declarations for metadata subclasses.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:106
An immutable pass that tracks lazily created AssumptionCache objects.
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
A cache of .assume calls within a function.
Analysis pass providing the TargetTransformInfo.
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
unsigned second
unsigned NumInlineCandidates
The number of calls to internal functions with a single caller.
Definition: CodeMetrics.h:78
Metadata node.
Definition: Metadata.h:862
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:238
F(f)
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1067
static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, int OptLevel, Optional< unsigned > UserThreshold, Optional< unsigned > UserCount, Optional< bool > UserAllowPartial, Optional< bool > UserRuntime, Optional< bool > UserUpperBound, Optional< bool > UserAllowPeeling)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
bool notDuplicatable
True if this function cannot be duplicated.
Definition: CodeMetrics.h:54
static cl::opt< unsigned > UnrollFullMaxCount("unroll-full-max-count", cl::Hidden, cl::desc("Set the max unroll count for full unrolling, for testing purposes"))
unsigned getSmallConstantMaxTripCount(const Loop *L)
Returns the upper bound of the loop trip count as a normal unsigned value.
void addChildLoops(ArrayRef< Loop *> NewChildLoops)
Loop passes should use this method to indicate they have added new child loops of the current loop...
static cl::opt< unsigned > UnrollPartialThreshold("unroll-partial-threshold", cl::Hidden, cl::desc("The cost threshold for partial loop unrolling"))
bool formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
Definition: LCSSA.cpp:332
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:191
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:3641
static cl::opt< bool > UnrollRevisitChildLoops("unroll-revisit-child-loops", cl::Hidden, cl::desc("Enqueue and re-visit child loops in the loop PM after unrolling. " "This shouldn't typically be needed as child loops (or their " "clones) were already visited."))
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
Hexagon Hardware Loops
bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
static cl::opt< unsigned > FlatLoopTripCountThreshold("flat-loop-tripcount-threshold", cl::init(5), cl::Hidden, cl::desc("If the runtime tripcount for the loop is lower than the " "threshold, the loop is considered as flat and will be less " "aggressively unrolled."))
static bool HasUnrollFullPragma(const Loop *L)
bool AllowPeeling
Allow peeling off loop iterations for loops with low dynamic tripcount.
Pass * createSimpleLoopUnrollPass(int OptLevel=2)
static cl::opt< unsigned > UnrollCount("unroll-count", cl::Hidden, cl::desc("Use this unroll count for all loops including those with " "unroll_count pragma values, for testing purposes"))
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
unsigned FullUnrollMaxCount
Set the maximum unrolling factor for full unrolling.
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:907
static cl::opt< bool > UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden, cl::desc("Unroll loops with run-time trip counts"))
BlockT * getHeader() const
Definition: LoopInfo.h:100
static MDNode * GetUnrollMetadataForLoop(const Loop *L, StringRef Name)
void visit(Iterator Start, Iterator End)
Definition: InstVisitor.h:90
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:142
static cl::opt< unsigned > UnrollMaxUpperBound("unroll-max-upperbound", cl::init(8), cl::Hidden, cl::desc("The max of trip count upper bound that is considered in unrolling"))
static bool isEqual(const Function &Caller, const Function &Callee)
This header provides classes for managing per-loop analyses.
static cl::opt< bool > UnrollAllowRemainder("unroll-allow-remainder", cl::Hidden, cl::desc("Allow generation of a loop remainder (extra iterations) " "when unrolling a loop."))
static cl::opt< unsigned > UnrollMaxPercentThresholdBoost("unroll-max-percent-threshold-boost", cl::init(400), cl::Hidden, cl::desc("The maximum 'boost' (represented as a percentage >= 100) applied " "to the threshold when aggressively unrolling a loop due to the " "dynamic cost savings. If completely unrolling a loop will reduce " "the total runtime from X to Y, we boost the loop unroll " "threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, " "X/Y). This limit avoids excessive code bloat."))
void initializeLoopUnrollPass(PassRegistry &)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:140
bool AllowRemainder
Allow generation of a loop remainder (extra iterations after unroll).
static bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, OptimizationRemarkEmitter *ORE, unsigned &TripCount, unsigned MaxTripCount, unsigned &TripMultiple, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound)
The loop was fully unrolled into straight-line code.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
static cl::opt< bool > UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden, cl::desc("Allows loops to be peeled when the dynamic " "trip count is known to be low."))
static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent, const TargetTransformInfo &TTI, AssumptionCache *AC, unsigned BEInsns)
ApproximateLoopSize - Approximate the size of the loop.
Subclasses of this class are all able to terminate a basic block.
Definition: InstrTypes.h:54
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop...
Definition: LoopInfo.h:184
Conditional or Unconditional Branch instruction.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
static Optional< EstimatedUnrollCost > analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE, const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize)
Figure out if the loop is worth full unrolling.
This is an important base class in LLVM.
Definition: Constant.h:42
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:116
static cl::opt< unsigned > UnrollMaxCount("unroll-max-count", cl::Hidden, cl::desc("Set the max unroll count for partial and runtime unrolling, for" "testing purposes"))
This file contains the declarations for the subclasses of Constant, which represent the different fla...
iterator end() const
Definition: LoopInfo.h:635
void setLoopAlreadyUnrolled()
Add llvm.loop.unroll.disable to this loop&#39;s loop id metadata.
Definition: LoopInfo.cpp:271
Machine Trace Metrics
char & LCSSAID
Definition: LCSSA.cpp:412
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
Represent the analysis usage information of a pass.
void analyzeBasicBlock(const BasicBlock *BB, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value *> &EphValues)
Add information about a block to the current state.
bool optForSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:530
static cl::opt< unsigned > UnrollThreshold("unroll-threshold", cl::Hidden, cl::desc("The cost threshold for loop unrolling"))
op_range operands()
Definition: User.h:222
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
Optional< unsigned > getLoopEstimatedTripCount(Loop *L)
Get a loop&#39;s estimated trip count based on branch weight metadata.
Definition: LoopUtils.cpp:1304
void computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, unsigned &TripCount)
static cl::opt< unsigned > UnrollPeelCount("unroll-peel-count", cl::Hidden, cl::desc("Set the unroll peeling count, for testing purposes"))
void addSiblingLoops(ArrayRef< Loop *> NewSibLoops)
Loop passes should use this method to indicate they have added new sibling loops to the current loop...
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Optional< uint64_t > getEntryCount() const
Get the entry count for this function.
Definition: Function.cpp:1330
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:345
void markLoopAsDeleted(Loop &L, llvm::StringRef Name)
Loop passes should use this method to indicate they have deleted a loop from the nest.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
A function analysis which provides an AssumptionCache.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:110
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:298
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file. ...
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
static cl::opt< bool > UnrollAllowPartial("unroll-allow-partial", cl::Hidden, cl::desc("Allows loops to be partially unrolled until " "-unroll-threshold loop size is reached."))
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:1062
static cl::opt< unsigned > PragmaUnrollThreshold("pragma-unroll-threshold", cl::init(16 *1024), cl::Hidden, cl::desc("Unrolled size limit for loops with an unroll(full) or " "unroll_count pragma."))
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
iterator begin() const
Definition: LoopInfo.h:142
Utility to calculate the size and a few similar metrics for a set of basic blocks.
Definition: CodeMetrics.h:42
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:48
static unsigned UnrollCountPragmaValue(const Loop *L)
unsigned DefaultUnrollRuntimeCount
Default unroll count for loops with run-time trip count.
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:385
void markLoopAsDeleted(Loop &L)
Definition: LoopPass.cpp:143
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
static uint64_t getUnrolledLoopSize(unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2&#39;s erase_if which is equivalent t...
Definition: STLExtras.h:880
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
iterator begin() const
Definition: LoopInfo.h:634
static bool HasUnrollDisablePragma(const Loop *L)
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
static cl::opt< bool > UnrollUnrollRemainder("unroll-remainder", cl::Hidden, cl::desc("Allow the loop remainder to be unrolled."))
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:398
MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
Definition: LoopUnroll.cpp:895
Analysis pass that exposes the ScalarEvolution for a function.
static const unsigned NoThreshold
A magic value for use with the Threshold parameter to indicate that the loop unroll should be perform...
LoopT * getParentLoop() const
Definition: LoopInfo.h:101
bool hasValue() const
Definition: Optional.h:137
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to...
Definition: LoopInfo.cpp:191
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:213
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:120
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
unsigned Threshold
The cost threshold for the unrolled loop.
int getUserCost(const User *U, ArrayRef< const Value *> Operands) const
Estimate the cost of a given IR user when lowered.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:61
StringRef getName() const
Definition: LoopInfo.h:558
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:420
Parameters that control the generic loop unrolling transformation.
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable)...
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:220
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
#define I(x, y, z)
Definition: MD5.cpp:58
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:73
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass&#39;s AnalysisUsage.
Definition: LoopUtils.cpp:1023
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:706
iterator end() const
Definition: LoopInfo.h:143
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
static SmallVector< Loop *, 8 > appendLoopsToWorklist(RangeT &&Loops)
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:181
static LoopUnrollResult tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache &AC, OptimizationRemarkEmitter &ORE, bool PreserveLCSSA, int OptLevel, Optional< unsigned > ProvidedCount, Optional< unsigned > ProvidedThreshold, Optional< bool > ProvidedAllowPartial, Optional< bool > ProvidedRuntime, Optional< bool > ProvidedUpperBound, Optional< bool > ProvidedAllowPeeling)
bool empty() const
Definition: LoopInfo.h:146
static int const Threshold
TODO: Write a new FunctionPass AliasAnalysis so that it can keep a cache.
Multiway switch.
static bool HasRuntimeUnrollDisablePragma(const Loop *L)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool hasHugeWorkingSetSize()
Returns true if the working set size of the code is considered huge.
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:556
LLVM Value Representation.
Definition: Value.h:73
succ_range successors(BasicBlock *BB)
Definition: CFG.h:143
unsigned MaxPercentThresholdBoost
If complete unrolling will reduce the cost of the loop, we will boost the Threshold by a certain perc...
The loop was not modified.
static cl::opt< unsigned > UnrollMaxIterationsCountToAnalyze("unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden, cl::desc("Don't allow loop unrolling to simulate more than this number of" "iterations when checking full unroll profitability"))
#define DEBUG(X)
Definition: Debug.h:118
bool isBackedgeTakenCountMaxOrZero(const Loop *L)
Return true if the backedge taken count is either the value returned by getMaxBackedgeTakenCount or z...
bool UpperBound
Allow using trip count upper bound to unroll loops.
void verifyLoop() const
Verify loop structure.
Definition: LoopInfoImpl.h:227
void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP) const
Get target-customized preferences for the generic loop unrolling transformation.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
A container for analyses that lazily runs them and caches their results.
This pass exposes codegen information to IR-level passes.
This header defines various interfaces for pass management in LLVM.
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1073
unsigned NumInsts
Number of instructions in the analyzed blocks.
Definition: CodeMetrics.h:63
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:156
LoopUnrollResult UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, bool AllowRuntime, bool AllowExpensiveTripCount, bool PreserveCondBr, bool PreserveOnlyFirst, unsigned TripMultiple, unsigned PeelCount, bool UnrollRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA)
Unroll the given loop by Count.
Definition: LoopUnroll.cpp:308
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:50
The optimization diagnostic interface.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:946
LoopUnrollResult
Represents the result of a UnrollLoop invocation.
Definition: UnrollLoop.h:41