LLVM  7.0.0svn
LoopUnrollPass.cpp
Go to the documentation of this file.
1 //===- LoopUnroll.cpp - Loop unroller pass --------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass implements a simple loop unroller. It works best when loops have
11 // been canonicalized by the -indvars pass, allowing it to determine the trip
12 // counts of loops easily.
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/DenseMapInfo.h"
18 #include "llvm/ADT/DenseSet.h"
19 #include "llvm/ADT/None.h"
20 #include "llvm/ADT/Optional.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SetVector.h"
23 #include "llvm/ADT/SmallPtrSet.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include "llvm/ADT/StringRef.h"
29 #include "llvm/Analysis/LoopInfo.h"
30 #include "llvm/Analysis/LoopPass.h"
36 #include "llvm/IR/BasicBlock.h"
37 #include "llvm/IR/CFG.h"
38 #include "llvm/IR/Constant.h"
39 #include "llvm/IR/Constants.h"
40 #include "llvm/IR/DiagnosticInfo.h"
41 #include "llvm/IR/Dominators.h"
42 #include "llvm/IR/Function.h"
43 #include "llvm/IR/Instruction.h"
44 #include "llvm/IR/Instructions.h"
45 #include "llvm/IR/IntrinsicInst.h"
46 #include "llvm/IR/Metadata.h"
47 #include "llvm/IR/PassManager.h"
48 #include "llvm/Pass.h"
49 #include "llvm/Support/Casting.h"
51 #include "llvm/Support/Debug.h"
54 #include "llvm/Transforms/Scalar.h"
56 #include "llvm/Transforms/Utils.h"
60 #include <algorithm>
61 #include <cassert>
62 #include <cstdint>
63 #include <limits>
64 #include <string>
65 #include <tuple>
66 #include <utility>
67 
68 using namespace llvm;
69 
70 #define DEBUG_TYPE "loop-unroll"
71 
72 static cl::opt<unsigned>
73  UnrollThreshold("unroll-threshold", cl::Hidden,
74  cl::desc("The cost threshold for loop unrolling"));
75 
77  "unroll-partial-threshold", cl::Hidden,
78  cl::desc("The cost threshold for partial loop unrolling"));
79 
81  "unroll-max-percent-threshold-boost", cl::init(400), cl::Hidden,
82  cl::desc("The maximum 'boost' (represented as a percentage >= 100) applied "
83  "to the threshold when aggressively unrolling a loop due to the "
84  "dynamic cost savings. If completely unrolling a loop will reduce "
85  "the total runtime from X to Y, we boost the loop unroll "
86  "threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, "
87  "X/Y). This limit avoids excessive code bloat."));
88 
90  "unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden,
91  cl::desc("Don't allow loop unrolling to simulate more than this number of"
92  "iterations when checking full unroll profitability"));
93 
95  "unroll-count", cl::Hidden,
96  cl::desc("Use this unroll count for all loops including those with "
97  "unroll_count pragma values, for testing purposes"));
98 
100  "unroll-max-count", cl::Hidden,
101  cl::desc("Set the max unroll count for partial and runtime unrolling, for"
102  "testing purposes"));
103 
105  "unroll-full-max-count", cl::Hidden,
106  cl::desc(
107  "Set the max unroll count for full unrolling, for testing purposes"));
108 
110  "unroll-peel-count", cl::Hidden,
111  cl::desc("Set the unroll peeling count, for testing purposes"));
112 
113 static cl::opt<bool>
114  UnrollAllowPartial("unroll-allow-partial", cl::Hidden,
115  cl::desc("Allows loops to be partially unrolled until "
116  "-unroll-threshold loop size is reached."));
117 
119  "unroll-allow-remainder", cl::Hidden,
120  cl::desc("Allow generation of a loop remainder (extra iterations) "
121  "when unrolling a loop."));
122 
123 static cl::opt<bool>
124  UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden,
125  cl::desc("Unroll loops with run-time trip counts"));
126 
128  "unroll-max-upperbound", cl::init(8), cl::Hidden,
129  cl::desc(
130  "The max of trip count upper bound that is considered in unrolling"));
131 
133  "pragma-unroll-threshold", cl::init(16 * 1024), cl::Hidden,
134  cl::desc("Unrolled size limit for loops with an unroll(full) or "
135  "unroll_count pragma."));
136 
138  "flat-loop-tripcount-threshold", cl::init(5), cl::Hidden,
139  cl::desc("If the runtime tripcount for the loop is lower than the "
140  "threshold, the loop is considered as flat and will be less "
141  "aggressively unrolled."));
142 
143 static cl::opt<bool>
144  UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden,
145  cl::desc("Allows loops to be peeled when the dynamic "
146  "trip count is known to be low."));
147 
149  "unroll-remainder", cl::Hidden,
150  cl::desc("Allow the loop remainder to be unrolled."));
151 
152 // This option isn't ever intended to be enabled, it serves to allow
153 // experiments to check the assumptions about when this kind of revisit is
154 // necessary.
156  "unroll-revisit-child-loops", cl::Hidden,
157  cl::desc("Enqueue and re-visit child loops in the loop PM after unrolling. "
158  "This shouldn't typically be needed as child loops (or their "
159  "clones) were already visited."));
160 
161 /// A magic value for use with the Threshold parameter to indicate
162 /// that the loop unroll should be performed regardless of how much
163 /// code expansion would result.
165 
166 /// Gather the various unrolling parameters based on the defaults, compiler
167 /// flags, TTI overrides and user specified parameters.
169  Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, int OptLevel,
170  Optional<unsigned> UserThreshold, Optional<unsigned> UserCount,
171  Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
172  Optional<bool> UserUpperBound, Optional<bool> UserAllowPeeling) {
174 
175  // Set up the defaults
176  UP.Threshold = OptLevel > 2 ? 300 : 150;
177  UP.MaxPercentThresholdBoost = 400;
178  UP.OptSizeThreshold = 0;
179  UP.PartialThreshold = 150;
181  UP.Count = 0;
182  UP.PeelCount = 0;
186  UP.BEInsns = 2;
187  UP.Partial = false;
188  UP.Runtime = false;
189  UP.AllowRemainder = true;
190  UP.UnrollRemainder = false;
191  UP.AllowExpensiveTripCount = false;
192  UP.Force = false;
193  UP.UpperBound = false;
194  UP.AllowPeeling = true;
195 
196  // Override with any target specific settings
197  TTI.getUnrollingPreferences(L, SE, UP);
198 
199  // Apply size attributes
200  if (L->getHeader()->getParent()->optForSize()) {
201  UP.Threshold = UP.OptSizeThreshold;
203  }
204 
205  // Apply any user values specified by cl::opt
206  if (UnrollThreshold.getNumOccurrences() > 0)
208  if (UnrollPartialThreshold.getNumOccurrences() > 0)
210  if (UnrollMaxPercentThresholdBoost.getNumOccurrences() > 0)
212  if (UnrollMaxCount.getNumOccurrences() > 0)
214  if (UnrollFullMaxCount.getNumOccurrences() > 0)
216  if (UnrollPeelCount.getNumOccurrences() > 0)
218  if (UnrollAllowPartial.getNumOccurrences() > 0)
220  if (UnrollAllowRemainder.getNumOccurrences() > 0)
222  if (UnrollRuntime.getNumOccurrences() > 0)
223  UP.Runtime = UnrollRuntime;
224  if (UnrollMaxUpperBound == 0)
225  UP.UpperBound = false;
226  if (UnrollAllowPeeling.getNumOccurrences() > 0)
228  if (UnrollUnrollRemainder.getNumOccurrences() > 0)
230 
231  // Apply user values provided by argument
232  if (UserThreshold.hasValue()) {
233  UP.Threshold = *UserThreshold;
234  UP.PartialThreshold = *UserThreshold;
235  }
236  if (UserCount.hasValue())
237  UP.Count = *UserCount;
238  if (UserAllowPartial.hasValue())
239  UP.Partial = *UserAllowPartial;
240  if (UserRuntime.hasValue())
241  UP.Runtime = *UserRuntime;
242  if (UserUpperBound.hasValue())
243  UP.UpperBound = *UserUpperBound;
244  if (UserAllowPeeling.hasValue())
245  UP.AllowPeeling = *UserAllowPeeling;
246 
247  return UP;
248 }
249 
250 namespace {
251 
252 /// A struct to densely store the state of an instruction after unrolling at
253 /// each iteration.
254 ///
255 /// This is designed to work like a tuple of <Instruction *, int> for the
256 /// purposes of hashing and lookup, but to be able to associate two boolean
257 /// states with each key.
258 struct UnrolledInstState {
259  Instruction *I;
260  int Iteration : 30;
261  unsigned IsFree : 1;
262  unsigned IsCounted : 1;
263 };
264 
265 /// Hashing and equality testing for a set of the instruction states.
266 struct UnrolledInstStateKeyInfo {
267  using PtrInfo = DenseMapInfo<Instruction *>;
269 
270  static inline UnrolledInstState getEmptyKey() {
271  return {PtrInfo::getEmptyKey(), 0, 0, 0};
272  }
273 
274  static inline UnrolledInstState getTombstoneKey() {
275  return {PtrInfo::getTombstoneKey(), 0, 0, 0};
276  }
277 
278  static inline unsigned getHashValue(const UnrolledInstState &S) {
279  return PairInfo::getHashValue({S.I, S.Iteration});
280  }
281 
282  static inline bool isEqual(const UnrolledInstState &LHS,
283  const UnrolledInstState &RHS) {
284  return PairInfo::isEqual({LHS.I, LHS.Iteration}, {RHS.I, RHS.Iteration});
285  }
286 };
287 
288 struct EstimatedUnrollCost {
289  /// \brief The estimated cost after unrolling.
290  unsigned UnrolledCost;
291 
292  /// \brief The estimated dynamic cost of executing the instructions in the
293  /// rolled form.
294  unsigned RolledDynamicCost;
295 };
296 
297 } // end anonymous namespace
298 
299 /// \brief Figure out if the loop is worth full unrolling.
300 ///
301 /// Complete loop unrolling can make some loads constant, and we need to know
302 /// if that would expose any further optimization opportunities. This routine
303 /// estimates this optimization. It computes cost of unrolled loop
304 /// (UnrolledCost) and dynamic cost of the original loop (RolledDynamicCost). By
305 /// dynamic cost we mean that we won't count costs of blocks that are known not
306 /// to be executed (i.e. if we have a branch in the loop and we know that at the
307 /// given iteration its condition would be resolved to true, we won't add up the
308 /// cost of the 'false'-block).
309 /// \returns Optional value, holding the RolledDynamicCost and UnrolledCost. If
310 /// the analysis failed (no benefits expected from the unrolling, or the loop is
311 /// too big to analyze), the returned value is None.
313  const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE,
314  const SmallPtrSetImpl<const Value *> &EphValues,
315  const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize) {
316  // We want to be able to scale offsets by the trip count and add more offsets
317  // to them without checking for overflows, and we already don't want to
318  // analyze *massive* trip counts, so we force the max to be reasonably small.
320  (unsigned)(std::numeric_limits<int>::max() / 2) &&
321  "The unroll iterations max is too large!");
322 
323  // Only analyze inner loops. We can't properly estimate cost of nested loops
324  // and we won't visit inner loops again anyway.
325  if (!L->empty())
326  return None;
327 
328  // Don't simulate loops with a big or unknown tripcount
329  if (!UnrollMaxIterationsCountToAnalyze || !TripCount ||
331  return None;
332 
335  DenseMap<Value *, Constant *> SimplifiedValues;
336  SmallVector<std::pair<Value *, Constant *>, 4> SimplifiedInputValues;
337 
338  // The estimated cost of the unrolled form of the loop. We try to estimate
339  // this by simplifying as much as we can while computing the estimate.
340  unsigned UnrolledCost = 0;
341 
342  // We also track the estimated dynamic (that is, actually executed) cost in
343  // the rolled form. This helps identify cases when the savings from unrolling
344  // aren't just exposing dead control flows, but actual reduced dynamic
345  // instructions due to the simplifications which we expect to occur after
346  // unrolling.
347  unsigned RolledDynamicCost = 0;
348 
349  // We track the simplification of each instruction in each iteration. We use
350  // this to recursively merge costs into the unrolled cost on-demand so that
351  // we don't count the cost of any dead code. This is essentially a map from
352  // <instruction, int> to <bool, bool>, but stored as a densely packed struct.
354 
355  // A small worklist used to accumulate cost of instructions from each
356  // observable and reached root in the loop.
357  SmallVector<Instruction *, 16> CostWorklist;
358 
359  // PHI-used worklist used between iterations while accumulating cost.
360  SmallVector<Instruction *, 4> PHIUsedList;
361 
362  // Helper function to accumulate cost for instructions in the loop.
363  auto AddCostRecursively = [&](Instruction &RootI, int Iteration) {
364  assert(Iteration >= 0 && "Cannot have a negative iteration!");
365  assert(CostWorklist.empty() && "Must start with an empty cost list");
366  assert(PHIUsedList.empty() && "Must start with an empty phi used list");
367  CostWorklist.push_back(&RootI);
368  for (;; --Iteration) {
369  do {
370  Instruction *I = CostWorklist.pop_back_val();
371 
372  // InstCostMap only uses I and Iteration as a key, the other two values
373  // don't matter here.
374  auto CostIter = InstCostMap.find({I, Iteration, 0, 0});
375  if (CostIter == InstCostMap.end())
376  // If an input to a PHI node comes from a dead path through the loop
377  // we may have no cost data for it here. What that actually means is
378  // that it is free.
379  continue;
380  auto &Cost = *CostIter;
381  if (Cost.IsCounted)
382  // Already counted this instruction.
383  continue;
384 
385  // Mark that we are counting the cost of this instruction now.
386  Cost.IsCounted = true;
387 
388  // If this is a PHI node in the loop header, just add it to the PHI set.
389  if (auto *PhiI = dyn_cast<PHINode>(I))
390  if (PhiI->getParent() == L->getHeader()) {
391  assert(Cost.IsFree && "Loop PHIs shouldn't be evaluated as they "
392  "inherently simplify during unrolling.");
393  if (Iteration == 0)
394  continue;
395 
396  // Push the incoming value from the backedge into the PHI used list
397  // if it is an in-loop instruction. We'll use this to populate the
398  // cost worklist for the next iteration (as we count backwards).
399  if (auto *OpI = dyn_cast<Instruction>(
400  PhiI->getIncomingValueForBlock(L->getLoopLatch())))
401  if (L->contains(OpI))
402  PHIUsedList.push_back(OpI);
403  continue;
404  }
405 
406  // First accumulate the cost of this instruction.
407  if (!Cost.IsFree) {
408  UnrolledCost += TTI.getUserCost(I);
409  DEBUG(dbgs() << "Adding cost of instruction (iteration " << Iteration
410  << "): ");
411  DEBUG(I->dump());
412  }
413 
414  // We must count the cost of every operand which is not free,
415  // recursively. If we reach a loop PHI node, simply add it to the set
416  // to be considered on the next iteration (backwards!).
417  for (Value *Op : I->operands()) {
418  // Check whether this operand is free due to being a constant or
419  // outside the loop.
420  auto *OpI = dyn_cast<Instruction>(Op);
421  if (!OpI || !L->contains(OpI))
422  continue;
423 
424  // Otherwise accumulate its cost.
425  CostWorklist.push_back(OpI);
426  }
427  } while (!CostWorklist.empty());
428 
429  if (PHIUsedList.empty())
430  // We've exhausted the search.
431  break;
432 
433  assert(Iteration > 0 &&
434  "Cannot track PHI-used values past the first iteration!");
435  CostWorklist.append(PHIUsedList.begin(), PHIUsedList.end());
436  PHIUsedList.clear();
437  }
438  };
439 
440  // Ensure that we don't violate the loop structure invariants relied on by
441  // this analysis.
442  assert(L->isLoopSimplifyForm() && "Must put loop into normal form first.");
443  assert(L->isLCSSAForm(DT) &&
444  "Must have loops in LCSSA form to track live-out values.");
445 
446  DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n");
447 
448  // Simulate execution of each iteration of the loop counting instructions,
449  // which would be simplified.
450  // Since the same load will take different values on different iterations,
451  // we literally have to go through all loop's iterations.
452  for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) {
453  DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n");
454 
455  // Prepare for the iteration by collecting any simplified entry or backedge
456  // inputs.
457  for (Instruction &I : *L->getHeader()) {
458  auto *PHI = dyn_cast<PHINode>(&I);
459  if (!PHI)
460  break;
461 
462  // The loop header PHI nodes must have exactly two input: one from the
463  // loop preheader and one from the loop latch.
464  assert(
465  PHI->getNumIncomingValues() == 2 &&
466  "Must have an incoming value only for the preheader and the latch.");
467 
468  Value *V = PHI->getIncomingValueForBlock(
469  Iteration == 0 ? L->getLoopPreheader() : L->getLoopLatch());
470  Constant *C = dyn_cast<Constant>(V);
471  if (Iteration != 0 && !C)
472  C = SimplifiedValues.lookup(V);
473  if (C)
474  SimplifiedInputValues.push_back({PHI, C});
475  }
476 
477  // Now clear and re-populate the map for the next iteration.
478  SimplifiedValues.clear();
479  while (!SimplifiedInputValues.empty())
480  SimplifiedValues.insert(SimplifiedInputValues.pop_back_val());
481 
482  UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SE, L);
483 
484  BBWorklist.clear();
485  BBWorklist.insert(L->getHeader());
486  // Note that we *must not* cache the size, this loop grows the worklist.
487  for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
488  BasicBlock *BB = BBWorklist[Idx];
489 
490  // Visit all instructions in the given basic block and try to simplify
491  // it. We don't change the actual IR, just count optimization
492  // opportunities.
493  for (Instruction &I : *BB) {
494  // These won't get into the final code - don't even try calculating the
495  // cost for them.
496  if (isa<DbgInfoIntrinsic>(I) || EphValues.count(&I))
497  continue;
498 
499  // Track this instruction's expected baseline cost when executing the
500  // rolled loop form.
501  RolledDynamicCost += TTI.getUserCost(&I);
502 
503  // Visit the instruction to analyze its loop cost after unrolling,
504  // and if the visitor returns true, mark the instruction as free after
505  // unrolling and continue.
506  bool IsFree = Analyzer.visit(I);
507  bool Inserted = InstCostMap.insert({&I, (int)Iteration,
508  (unsigned)IsFree,
509  /*IsCounted*/ false}).second;
510  (void)Inserted;
511  assert(Inserted && "Cannot have a state for an unvisited instruction!");
512 
513  if (IsFree)
514  continue;
515 
516  // Can't properly model a cost of a call.
517  // FIXME: With a proper cost model we should be able to do it.
518  if(isa<CallInst>(&I))
519  return None;
520 
521  // If the instruction might have a side-effect recursively account for
522  // the cost of it and all the instructions leading up to it.
523  if (I.mayHaveSideEffects())
524  AddCostRecursively(I, Iteration);
525 
526  // If unrolled body turns out to be too big, bail out.
527  if (UnrolledCost > MaxUnrolledLoopSize) {
528  DEBUG(dbgs() << " Exceeded threshold.. exiting.\n"
529  << " UnrolledCost: " << UnrolledCost
530  << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize
531  << "\n");
532  return None;
533  }
534  }
535 
536  TerminatorInst *TI = BB->getTerminator();
537 
538  // Add in the live successors by first checking whether we have terminator
539  // that may be simplified based on the values simplified by this call.
540  BasicBlock *KnownSucc = nullptr;
541  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
542  if (BI->isConditional()) {
543  if (Constant *SimpleCond =
544  SimplifiedValues.lookup(BI->getCondition())) {
545  // Just take the first successor if condition is undef
546  if (isa<UndefValue>(SimpleCond))
547  KnownSucc = BI->getSuccessor(0);
548  else if (ConstantInt *SimpleCondVal =
549  dyn_cast<ConstantInt>(SimpleCond))
550  KnownSucc = BI->getSuccessor(SimpleCondVal->isZero() ? 1 : 0);
551  }
552  }
553  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
554  if (Constant *SimpleCond =
555  SimplifiedValues.lookup(SI->getCondition())) {
556  // Just take the first successor if condition is undef
557  if (isa<UndefValue>(SimpleCond))
558  KnownSucc = SI->getSuccessor(0);
559  else if (ConstantInt *SimpleCondVal =
560  dyn_cast<ConstantInt>(SimpleCond))
561  KnownSucc = SI->findCaseValue(SimpleCondVal)->getCaseSuccessor();
562  }
563  }
564  if (KnownSucc) {
565  if (L->contains(KnownSucc))
566  BBWorklist.insert(KnownSucc);
567  else
568  ExitWorklist.insert({BB, KnownSucc});
569  continue;
570  }
571 
572  // Add BB's successors to the worklist.
573  for (BasicBlock *Succ : successors(BB))
574  if (L->contains(Succ))
575  BBWorklist.insert(Succ);
576  else
577  ExitWorklist.insert({BB, Succ});
578  AddCostRecursively(*TI, Iteration);
579  }
580 
581  // If we found no optimization opportunities on the first iteration, we
582  // won't find them on later ones too.
583  if (UnrolledCost == RolledDynamicCost) {
584  DEBUG(dbgs() << " No opportunities found.. exiting.\n"
585  << " UnrolledCost: " << UnrolledCost << "\n");
586  return None;
587  }
588  }
589 
590  while (!ExitWorklist.empty()) {
591  BasicBlock *ExitingBB, *ExitBB;
592  std::tie(ExitingBB, ExitBB) = ExitWorklist.pop_back_val();
593 
594  for (Instruction &I : *ExitBB) {
595  auto *PN = dyn_cast<PHINode>(&I);
596  if (!PN)
597  break;
598 
599  Value *Op = PN->getIncomingValueForBlock(ExitingBB);
600  if (auto *OpI = dyn_cast<Instruction>(Op))
601  if (L->contains(OpI))
602  AddCostRecursively(*OpI, TripCount - 1);
603  }
604  }
605 
606  DEBUG(dbgs() << "Analysis finished:\n"
607  << "UnrolledCost: " << UnrolledCost << ", "
608  << "RolledDynamicCost: " << RolledDynamicCost << "\n");
609  return {{UnrolledCost, RolledDynamicCost}};
610 }
611 
612 /// ApproximateLoopSize - Approximate the size of the loop.
613 static unsigned
614 ApproximateLoopSize(const Loop *L, unsigned &NumCalls, bool &NotDuplicatable,
615  bool &Convergent, const TargetTransformInfo &TTI,
616  const SmallPtrSetImpl<const Value *> &EphValues,
617  unsigned BEInsns) {
619  for (BasicBlock *BB : L->blocks())
620  Metrics.analyzeBasicBlock(BB, TTI, EphValues);
621  NumCalls = Metrics.NumInlineCandidates;
622  NotDuplicatable = Metrics.notDuplicatable;
623  Convergent = Metrics.convergent;
624 
625  unsigned LoopSize = Metrics.NumInsts;
626 
627  // Don't allow an estimate of size zero. This would allows unrolling of loops
628  // with huge iteration counts, which is a compile time problem even if it's
629  // not a problem for code quality. Also, the code using this size may assume
630  // that each loop has at least three instructions (likely a conditional
631  // branch, a comparison feeding that branch, and some kind of loop increment
632  // feeding that comparison instruction).
633  LoopSize = std::max(LoopSize, BEInsns + 1);
634 
635  return LoopSize;
636 }
637 
638 // Returns the loop hint metadata node with the given name (for example,
639 // "llvm.loop.unroll.count"). If no such metadata node exists, then nullptr is
640 // returned.
642  if (MDNode *LoopID = L->getLoopID())
643  return GetUnrollMetadata(LoopID, Name);
644  return nullptr;
645 }
646 
647 // Returns true if the loop has an unroll(full) pragma.
648 static bool HasUnrollFullPragma(const Loop *L) {
649  return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.full");
650 }
651 
652 // Returns true if the loop has an unroll(enable) pragma. This metadata is used
653 // for both "#pragma unroll" and "#pragma clang loop unroll(enable)" directives.
654 static bool HasUnrollEnablePragma(const Loop *L) {
655  return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.enable");
656 }
657 
658 // Returns true if the loop has an unroll(disable) pragma.
659 static bool HasUnrollDisablePragma(const Loop *L) {
660  return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.disable");
661 }
662 
663 // Returns true if the loop has an runtime unroll(disable) pragma.
664 static bool HasRuntimeUnrollDisablePragma(const Loop *L) {
665  return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.runtime.disable");
666 }
667 
668 // If loop has an unroll_count pragma return the (necessarily
669 // positive) value from the pragma. Otherwise return 0.
670 static unsigned UnrollCountPragmaValue(const Loop *L) {
671  MDNode *MD = GetUnrollMetadataForLoop(L, "llvm.loop.unroll.count");
672  if (MD) {
673  assert(MD->getNumOperands() == 2 &&
674  "Unroll count hint metadata should have two operands.");
675  unsigned Count =
676  mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
677  assert(Count >= 1 && "Unroll count must be positive.");
678  return Count;
679  }
680  return 0;
681 }
682 
683 // Computes the boosting factor for complete unrolling.
684 // If fully unrolling the loop would save a lot of RolledDynamicCost, it would
685 // be beneficial to fully unroll the loop even if unrolledcost is large. We
686 // use (RolledDynamicCost / UnrolledCost) to model the unroll benefits to adjust
687 // the unroll threshold.
688 static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost,
689  unsigned MaxPercentThresholdBoost) {
690  if (Cost.RolledDynamicCost >= std::numeric_limits<unsigned>::max() / 100)
691  return 100;
692  else if (Cost.UnrolledCost != 0)
693  // The boosting factor is RolledDynamicCost / UnrolledCost
694  return std::min(100 * Cost.RolledDynamicCost / Cost.UnrolledCost,
695  MaxPercentThresholdBoost);
696  else
697  return MaxPercentThresholdBoost;
698 }
699 
700 // Returns loop size estimation for unrolled loop.
701 static uint64_t getUnrolledLoopSize(
702  unsigned LoopSize,
704  assert(LoopSize >= UP.BEInsns && "LoopSize should not be less than BEInsns!");
705  return (uint64_t)(LoopSize - UP.BEInsns) * UP.Count + UP.BEInsns;
706 }
707 
708 // Returns true if unroll count was set explicitly.
709 // Calculates unroll count and writes it to UP.Count.
710 static bool computeUnrollCount(
711  Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI,
712  ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues,
713  OptimizationRemarkEmitter *ORE, unsigned &TripCount, unsigned MaxTripCount,
714  unsigned &TripMultiple, unsigned LoopSize,
715  TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound) {
716  // Check for explicit Count.
717  // 1st priority is unroll count set by "unroll-count" option.
718  bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
719  if (UserUnrollCount) {
720  UP.Count = UnrollCount;
721  UP.AllowExpensiveTripCount = true;
722  UP.Force = true;
723  if (UP.AllowRemainder && getUnrolledLoopSize(LoopSize, UP) < UP.Threshold)
724  return true;
725  }
726 
727  // 2nd priority is unroll count set by pragma.
728  unsigned PragmaCount = UnrollCountPragmaValue(L);
729  if (PragmaCount > 0) {
730  UP.Count = PragmaCount;
731  UP.Runtime = true;
732  UP.AllowExpensiveTripCount = true;
733  UP.Force = true;
734  if ((UP.AllowRemainder || (TripMultiple % PragmaCount == 0)) &&
736  return true;
737  }
738  bool PragmaFullUnroll = HasUnrollFullPragma(L);
739  if (PragmaFullUnroll && TripCount != 0) {
740  UP.Count = TripCount;
741  if (getUnrolledLoopSize(LoopSize, UP) < PragmaUnrollThreshold)
742  return false;
743  }
744 
745  bool PragmaEnableUnroll = HasUnrollEnablePragma(L);
746  bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
747  PragmaEnableUnroll || UserUnrollCount;
748 
749  if (ExplicitUnroll && TripCount != 0) {
750  // If the loop has an unrolling pragma, we want to be more aggressive with
751  // unrolling limits. Set thresholds to at least the PragmaThreshold value
752  // which is larger than the default limits.
753  UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold);
754  UP.PartialThreshold =
755  std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold);
756  }
757 
758  // 3rd priority is full unroll count.
759  // Full unroll makes sense only when TripCount or its upper bound could be
760  // statically calculated.
761  // Also we need to check if we exceed FullUnrollMaxCount.
762  // If using the upper bound to unroll, TripMultiple should be set to 1 because
763  // we do not know when loop may exit.
764  // MaxTripCount and ExactTripCount cannot both be non zero since we only
765  // compute the former when the latter is zero.
766  unsigned ExactTripCount = TripCount;
767  assert((ExactTripCount == 0 || MaxTripCount == 0) &&
768  "ExtractTripCound and MaxTripCount cannot both be non zero.");
769  unsigned FullUnrollTripCount = ExactTripCount ? ExactTripCount : MaxTripCount;
770  UP.Count = FullUnrollTripCount;
771  if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) {
772  // When computing the unrolled size, note that BEInsns are not replicated
773  // like the rest of the loop body.
774  if (getUnrolledLoopSize(LoopSize, UP) < UP.Threshold) {
775  UseUpperBound = (MaxTripCount == FullUnrollTripCount);
776  TripCount = FullUnrollTripCount;
777  TripMultiple = UP.UpperBound ? 1 : TripMultiple;
778  return ExplicitUnroll;
779  } else {
780  // The loop isn't that small, but we still can fully unroll it if that
781  // helps to remove a significant number of instructions.
782  // To check that, run additional analysis on the loop.
784  L, FullUnrollTripCount, DT, SE, EphValues, TTI,
785  UP.Threshold * UP.MaxPercentThresholdBoost / 100)) {
786  unsigned Boost =
788  if (Cost->UnrolledCost < UP.Threshold * Boost / 100) {
789  UseUpperBound = (MaxTripCount == FullUnrollTripCount);
790  TripCount = FullUnrollTripCount;
791  TripMultiple = UP.UpperBound ? 1 : TripMultiple;
792  return ExplicitUnroll;
793  }
794  }
795  }
796  }
797 
798  // 4th priority is loop peeling
799  computePeelCount(L, LoopSize, UP, TripCount, SE);
800  if (UP.PeelCount) {
801  UP.Runtime = false;
802  UP.Count = 1;
803  return ExplicitUnroll;
804  }
805 
806  // 5th priority is partial unrolling.
807  // Try partial unroll only when TripCount could be staticaly calculated.
808  if (TripCount) {
809  UP.Partial |= ExplicitUnroll;
810  if (!UP.Partial) {
811  DEBUG(dbgs() << " will not try to unroll partially because "
812  << "-unroll-allow-partial not given\n");
813  UP.Count = 0;
814  return false;
815  }
816  if (UP.Count == 0)
817  UP.Count = TripCount;
818  if (UP.PartialThreshold != NoThreshold) {
819  // Reduce unroll count to be modulo of TripCount for partial unrolling.
820  if (getUnrolledLoopSize(LoopSize, UP) > UP.PartialThreshold)
821  UP.Count =
822  (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) /
823  (LoopSize - UP.BEInsns);
824  if (UP.Count > UP.MaxCount)
825  UP.Count = UP.MaxCount;
826  while (UP.Count != 0 && TripCount % UP.Count != 0)
827  UP.Count--;
828  if (UP.AllowRemainder && UP.Count <= 1) {
829  // If there is no Count that is modulo of TripCount, set Count to
830  // largest power-of-two factor that satisfies the threshold limit.
831  // As we'll create fixup loop, do the type of unrolling only if
832  // remainder loop is allowed.
834  while (UP.Count != 0 &&
835  getUnrolledLoopSize(LoopSize, UP) > UP.PartialThreshold)
836  UP.Count >>= 1;
837  }
838  if (UP.Count < 2) {
839  if (PragmaEnableUnroll)
840  ORE->emit([&]() {
842  "UnrollAsDirectedTooLarge",
843  L->getStartLoc(), L->getHeader())
844  << "Unable to unroll loop as directed by unroll(enable) "
845  "pragma "
846  "because unrolled size is too large.";
847  });
848  UP.Count = 0;
849  }
850  } else {
851  UP.Count = TripCount;
852  }
853  if (UP.Count > UP.MaxCount)
854  UP.Count = UP.MaxCount;
855  if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
856  UP.Count != TripCount)
857  ORE->emit([&]() {
858  return OptimizationRemarkMissed(DEBUG_TYPE,
859  "FullUnrollAsDirectedTooLarge",
860  L->getStartLoc(), L->getHeader())
861  << "Unable to fully unroll loop as directed by unroll pragma "
862  "because "
863  "unrolled size is too large.";
864  });
865  return ExplicitUnroll;
866  }
867  assert(TripCount == 0 &&
868  "All cases when TripCount is constant should be covered here.");
869  if (PragmaFullUnroll)
870  ORE->emit([&]() {
872  DEBUG_TYPE, "CantFullUnrollAsDirectedRuntimeTripCount",
873  L->getStartLoc(), L->getHeader())
874  << "Unable to fully unroll loop as directed by unroll(full) "
875  "pragma "
876  "because loop has a runtime trip count.";
877  });
878 
879  // 6th priority is runtime unrolling.
880  // Don't unroll a runtime trip count loop when it is disabled.
882  UP.Count = 0;
883  return false;
884  }
885 
886  // Check if the runtime trip count is too small when profile is available.
887  if (L->getHeader()->getParent()->hasProfileData()) {
888  if (auto ProfileTripCount = getLoopEstimatedTripCount(L)) {
889  if (*ProfileTripCount < FlatLoopTripCountThreshold)
890  return false;
891  else
892  UP.AllowExpensiveTripCount = true;
893  }
894  }
895 
896  // Reduce count based on the type of unrolling and the threshold values.
897  UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount;
898  if (!UP.Runtime) {
899  DEBUG(dbgs() << " will not try to unroll loop with runtime trip count "
900  << "-unroll-runtime not given\n");
901  UP.Count = 0;
902  return false;
903  }
904  if (UP.Count == 0)
906 
907  // Reduce unroll count to be the largest power-of-two factor of
908  // the original count which satisfies the threshold limit.
909  while (UP.Count != 0 &&
910  getUnrolledLoopSize(LoopSize, UP) > UP.PartialThreshold)
911  UP.Count >>= 1;
912 
913 #ifndef NDEBUG
914  unsigned OrigCount = UP.Count;
915 #endif
916 
917  if (!UP.AllowRemainder && UP.Count != 0 && (TripMultiple % UP.Count) != 0) {
918  while (UP.Count != 0 && TripMultiple % UP.Count != 0)
919  UP.Count >>= 1;
920  DEBUG(dbgs() << "Remainder loop is restricted (that could architecture "
921  "specific or because the loop contains a convergent "
922  "instruction), so unroll count must divide the trip "
923  "multiple, "
924  << TripMultiple << ". Reducing unroll count from "
925  << OrigCount << " to " << UP.Count << ".\n");
926 
927  using namespace ore;
928 
929  if (PragmaCount > 0 && !UP.AllowRemainder)
930  ORE->emit([&]() {
932  "DifferentUnrollCountFromDirected",
933  L->getStartLoc(), L->getHeader())
934  << "Unable to unroll loop the number of times directed by "
935  "unroll_count pragma because remainder loop is restricted "
936  "(that could architecture specific or because the loop "
937  "contains a convergent instruction) and so must have an "
938  "unroll "
939  "count that divides the loop trip multiple of "
940  << NV("TripMultiple", TripMultiple) << ". Unrolling instead "
941  << NV("UnrollCount", UP.Count) << " time(s).";
942  });
943  }
944 
945  if (UP.Count > UP.MaxCount)
946  UP.Count = UP.MaxCount;
947  DEBUG(dbgs() << " partially unrolling with count: " << UP.Count << "\n");
948  if (UP.Count < 2)
949  UP.Count = 0;
950  return ExplicitUnroll;
951 }
952 
954  Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
955  const TargetTransformInfo &TTI, AssumptionCache &AC,
956  OptimizationRemarkEmitter &ORE, bool PreserveLCSSA, int OptLevel,
957  Optional<unsigned> ProvidedCount, Optional<unsigned> ProvidedThreshold,
958  Optional<bool> ProvidedAllowPartial, Optional<bool> ProvidedRuntime,
959  Optional<bool> ProvidedUpperBound, Optional<bool> ProvidedAllowPeeling) {
960  DEBUG(dbgs() << "Loop Unroll: F[" << L->getHeader()->getParent()->getName()
961  << "] Loop %" << L->getHeader()->getName() << "\n");
962  if (HasUnrollDisablePragma(L))
964  if (!L->isLoopSimplifyForm()) {
965  DEBUG(
966  dbgs() << " Not unrolling loop which is not in loop-simplify form.\n");
968  }
969 
970  unsigned NumInlineCandidates;
971  bool NotDuplicatable;
972  bool Convergent;
974  L, SE, TTI, OptLevel, ProvidedThreshold, ProvidedCount,
975  ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
976  ProvidedAllowPeeling);
977  // Exit early if unrolling is disabled.
978  if (UP.Threshold == 0 && (!UP.Partial || UP.PartialThreshold == 0))
980 
982  CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
983 
984  unsigned LoopSize =
985  ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent,
986  TTI, EphValues, UP.BEInsns);
987  DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
988  if (NotDuplicatable) {
989  DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable"
990  << " instructions.\n");
992  }
993  if (NumInlineCandidates != 0) {
994  DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
996  }
997 
998  // Find trip count and trip multiple if count is not available
999  unsigned TripCount = 0;
1000  unsigned MaxTripCount = 0;
1001  unsigned TripMultiple = 1;
1002  // If there are multiple exiting blocks but one of them is the latch, use the
1003  // latch for the trip count estimation. Otherwise insist on a single exiting
1004  // block for the trip count estimation.
1005  BasicBlock *ExitingBlock = L->getLoopLatch();
1006  if (!ExitingBlock || !L->isLoopExiting(ExitingBlock))
1007  ExitingBlock = L->getExitingBlock();
1008  if (ExitingBlock) {
1009  TripCount = SE.getSmallConstantTripCount(L, ExitingBlock);
1010  TripMultiple = SE.getSmallConstantTripMultiple(L, ExitingBlock);
1011  }
1012 
1013  // If the loop contains a convergent operation, the prelude we'd add
1014  // to do the first few instructions before we hit the unrolled loop
1015  // is unsafe -- it adds a control-flow dependency to the convergent
1016  // operation. Therefore restrict remainder loop (try unrollig without).
1017  //
1018  // TODO: This is quite conservative. In practice, convergent_op()
1019  // is likely to be called unconditionally in the loop. In this
1020  // case, the program would be ill-formed (on most architectures)
1021  // unless n were the same on all threads in a thread group.
1022  // Assuming n is the same on all threads, any kind of unrolling is
1023  // safe. But currently llvm's notion of convergence isn't powerful
1024  // enough to express this.
1025  if (Convergent)
1026  UP.AllowRemainder = false;
1027 
1028  // Try to find the trip count upper bound if we cannot find the exact trip
1029  // count.
1030  bool MaxOrZero = false;
1031  if (!TripCount) {
1032  MaxTripCount = SE.getSmallConstantMaxTripCount(L);
1033  MaxOrZero = SE.isBackedgeTakenCountMaxOrZero(L);
1034  // We can unroll by the upper bound amount if it's generally allowed or if
1035  // we know that the loop is executed either the upper bound or zero times.
1036  // (MaxOrZero unrolling keeps only the first loop test, so the number of
1037  // loop tests remains the same compared to the non-unrolled version, whereas
1038  // the generic upper bound unrolling keeps all but the last loop test so the
1039  // number of loop tests goes up which may end up being worse on targets with
1040  // constriained branch predictor resources so is controlled by an option.)
1041  // In addition we only unroll small upper bounds.
1042  if (!(UP.UpperBound || MaxOrZero) || MaxTripCount > UnrollMaxUpperBound) {
1043  MaxTripCount = 0;
1044  }
1045  }
1046 
1047  // computeUnrollCount() decides whether it is beneficial to use upper bound to
1048  // fully unroll the loop.
1049  bool UseUpperBound = false;
1050  bool IsCountSetExplicitly = computeUnrollCount(
1051  L, TTI, DT, LI, SE, EphValues, &ORE, TripCount, MaxTripCount,
1052  TripMultiple, LoopSize, UP, UseUpperBound);
1053  if (!UP.Count)
1055  // Unroll factor (Count) must be less or equal to TripCount.
1056  if (TripCount && UP.Count > TripCount)
1057  UP.Count = TripCount;
1058 
1059  // Unroll the loop.
1060  LoopUnrollResult UnrollResult = UnrollLoop(
1061  L, UP.Count, TripCount, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount,
1062  UseUpperBound, MaxOrZero, TripMultiple, UP.PeelCount, UP.UnrollRemainder,
1063  LI, &SE, &DT, &AC, &ORE, PreserveLCSSA);
1064  if (UnrollResult == LoopUnrollResult::Unmodified)
1066 
1067  // If loop has an unroll count pragma or unrolled by explicitly set count
1068  // mark loop as unrolled to prevent unrolling beyond that requested.
1069  // If the loop was peeled, we already "used up" the profile information
1070  // we had, so we don't want to unroll or peel again.
1071  if (UnrollResult != LoopUnrollResult::FullyUnrolled &&
1072  (IsCountSetExplicitly || UP.PeelCount))
1074 
1075  return UnrollResult;
1076 }
1077 
1078 namespace {
1079 
1080 class LoopUnroll : public LoopPass {
1081 public:
1082  static char ID; // Pass ID, replacement for typeid
1083 
1084  int OptLevel;
1085  Optional<unsigned> ProvidedCount;
1086  Optional<unsigned> ProvidedThreshold;
1087  Optional<bool> ProvidedAllowPartial;
1088  Optional<bool> ProvidedRuntime;
1089  Optional<bool> ProvidedUpperBound;
1090  Optional<bool> ProvidedAllowPeeling;
1091 
1092  LoopUnroll(int OptLevel = 2, Optional<unsigned> Threshold = None,
1093  Optional<unsigned> Count = None,
1094  Optional<bool> AllowPartial = None, Optional<bool> Runtime = None,
1095  Optional<bool> UpperBound = None,
1096  Optional<bool> AllowPeeling = None)
1097  : LoopPass(ID), OptLevel(OptLevel), ProvidedCount(std::move(Count)),
1098  ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial),
1099  ProvidedRuntime(Runtime), ProvidedUpperBound(UpperBound),
1100  ProvidedAllowPeeling(AllowPeeling) {
1102  }
1103 
1104  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
1105  if (skipLoop(L))
1106  return false;
1107 
1108  Function &F = *L->getHeader()->getParent();
1109 
1110  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1111  LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1112  ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1113  const TargetTransformInfo &TTI =
1114  getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1115  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1116  // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
1117  // pass. Function analyses need to be preserved across loop transformations
1118  // but ORE cannot be preserved (see comment before the pass definition).
1119  OptimizationRemarkEmitter ORE(&F);
1120  bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
1121 
1123  L, DT, LI, SE, TTI, AC, ORE, PreserveLCSSA, OptLevel, ProvidedCount,
1124  ProvidedThreshold, ProvidedAllowPartial, ProvidedRuntime,
1125  ProvidedUpperBound, ProvidedAllowPeeling);
1126 
1127  if (Result == LoopUnrollResult::FullyUnrolled)
1128  LPM.markLoopAsDeleted(*L);
1129 
1130  return Result != LoopUnrollResult::Unmodified;
1131  }
1132 
1133  /// This transformation requires natural loop information & requires that
1134  /// loop preheaders be inserted into the CFG...
1135  void getAnalysisUsage(AnalysisUsage &AU) const override {
1138  // FIXME: Loop passes are required to preserve domtree, and for now we just
1139  // recreate dom info if anything gets unrolled.
1141  }
1142 };
1143 
1144 } // end anonymous namespace
1145 
1146 char LoopUnroll::ID = 0;
1147 
1148 INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
1152 INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
1153 
1154 Pass *llvm::createLoopUnrollPass(int OptLevel, int Threshold, int Count,
1155  int AllowPartial, int Runtime, int UpperBound,
1156  int AllowPeeling) {
1157  // TODO: It would make more sense for this function to take the optionals
1158  // directly, but that's dangerous since it would silently break out of tree
1159  // callers.
1160  return new LoopUnroll(
1161  OptLevel, Threshold == -1 ? None : Optional<unsigned>(Threshold),
1162  Count == -1 ? None : Optional<unsigned>(Count),
1163  AllowPartial == -1 ? None : Optional<bool>(AllowPartial),
1164  Runtime == -1 ? None : Optional<bool>(Runtime),
1165  UpperBound == -1 ? None : Optional<bool>(UpperBound),
1166  AllowPeeling == -1 ? None : Optional<bool>(AllowPeeling));
1167 }
1168 
1170  return createLoopUnrollPass(OptLevel, -1, -1, 0, 0, 0, 0);
1171 }
1172 
1175  LPMUpdater &Updater) {
1176  const auto &FAM =
1177  AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
1178  Function *F = L.getHeader()->getParent();
1179 
1180  auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F);
1181  // FIXME: This should probably be optional rather than required.
1182  if (!ORE)
1184  "LoopFullUnrollPass: OptimizationRemarkEmitterAnalysis not "
1185  "cached at a higher level");
1186 
1187  // Keep track of the previous loop structure so we can identify new loops
1188  // created by unrolling.
1189  Loop *ParentL = L.getParentLoop();
1190  SmallPtrSet<Loop *, 4> OldLoops;
1191  if (ParentL)
1192  OldLoops.insert(ParentL->begin(), ParentL->end());
1193  else
1194  OldLoops.insert(AR.LI.begin(), AR.LI.end());
1195 
1196  std::string LoopName = L.getName();
1197 
1198  bool Changed =
1199  tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, *ORE,
1200  /*PreserveLCSSA*/ true, OptLevel, /*Count*/ None,
1201  /*Threshold*/ None, /*AllowPartial*/ false,
1202  /*Runtime*/ false, /*UpperBound*/ false,
1203  /*AllowPeeling*/ false) != LoopUnrollResult::Unmodified;
1204  if (!Changed)
1205  return PreservedAnalyses::all();
1206 
1207  // The parent must not be damaged by unrolling!
1208 #ifndef NDEBUG
1209  if (ParentL)
1210  ParentL->verifyLoop();
1211 #endif
1212 
1213  // Unrolling can do several things to introduce new loops into a loop nest:
1214  // - Full unrolling clones child loops within the current loop but then
1215  // removes the current loop making all of the children appear to be new
1216  // sibling loops.
1217  //
1218  // When a new loop appears as a sibling loop after fully unrolling,
1219  // its nesting structure has fundamentally changed and we want to revisit
1220  // it to reflect that.
1221  //
1222  // When unrolling has removed the current loop, we need to tell the
1223  // infrastructure that it is gone.
1224  //
1225  // Finally, we support a debugging/testing mode where we revisit child loops
1226  // as well. These are not expected to require further optimizations as either
1227  // they or the loop they were cloned from have been directly visited already.
1228  // But the debugging mode allows us to check this assumption.
1229  bool IsCurrentLoopValid = false;
1230  SmallVector<Loop *, 4> SibLoops;
1231  if (ParentL)
1232  SibLoops.append(ParentL->begin(), ParentL->end());
1233  else
1234  SibLoops.append(AR.LI.begin(), AR.LI.end());
1235  erase_if(SibLoops, [&](Loop *SibLoop) {
1236  if (SibLoop == &L) {
1237  IsCurrentLoopValid = true;
1238  return true;
1239  }
1240 
1241  // Otherwise erase the loop from the list if it was in the old loops.
1242  return OldLoops.count(SibLoop) != 0;
1243  });
1244  Updater.addSiblingLoops(SibLoops);
1245 
1246  if (!IsCurrentLoopValid) {
1247  Updater.markLoopAsDeleted(L, LoopName);
1248  } else {
1249  // We can only walk child loops if the current loop remained valid.
1251  // Walk *all* of the child loops.
1252  SmallVector<Loop *, 4> ChildLoops(L.begin(), L.end());
1253  Updater.addChildLoops(ChildLoops);
1254  }
1255  }
1256 
1258 }
1259 
1260 template <typename RangeT>
1262  SmallVector<Loop *, 8> Worklist;
1263  // We use an internal worklist to build up the preorder traversal without
1264  // recursion.
1265  SmallVector<Loop *, 4> PreOrderLoops, PreOrderWorklist;
1266 
1267  for (Loop *RootL : Loops) {
1268  assert(PreOrderLoops.empty() && "Must start with an empty preorder walk.");
1269  assert(PreOrderWorklist.empty() &&
1270  "Must start with an empty preorder walk worklist.");
1271  PreOrderWorklist.push_back(RootL);
1272  do {
1273  Loop *L = PreOrderWorklist.pop_back_val();
1274  PreOrderWorklist.append(L->begin(), L->end());
1275  PreOrderLoops.push_back(L);
1276  } while (!PreOrderWorklist.empty());
1277 
1278  Worklist.append(PreOrderLoops.begin(), PreOrderLoops.end());
1279  PreOrderLoops.clear();
1280  }
1281  return Worklist;
1282 }
1283 
1286  auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
1287  auto &LI = AM.getResult<LoopAnalysis>(F);
1288  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1289  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1290  auto &AC = AM.getResult<AssumptionAnalysis>(F);
1292 
1293  LoopAnalysisManager *LAM = nullptr;
1294  if (auto *LAMProxy = AM.getCachedResult<LoopAnalysisManagerFunctionProxy>(F))
1295  LAM = &LAMProxy->getManager();
1296 
1297  const ModuleAnalysisManager &MAM =
1299  ProfileSummaryInfo *PSI =
1301 
1302  bool Changed = false;
1303 
1304  // The unroller requires loops to be in simplified form, and also needs LCSSA.
1305  // Since simplification may add new inner loops, it has to run before the
1306  // legality and profitability checks. This means running the loop unroller
1307  // will simplify all loops, regardless of whether anything end up being
1308  // unrolled.
1309  for (auto &L : LI) {
1310  Changed |= simplifyLoop(L, &DT, &LI, &SE, &AC, false /* PreserveLCSSA */);
1311  Changed |= formLCSSARecursively(*L, DT, &LI, &SE);
1312  }
1313 
1315 
1316  while (!Worklist.empty()) {
1317  // Because the LoopInfo stores the loops in RPO, we walk the worklist
1318  // from back to front so that we work forward across the CFG, which
1319  // for unrolling is only needed to get optimization remarks emitted in
1320  // a forward order.
1321  Loop &L = *Worklist.pop_back_val();
1322 #ifndef NDEBUG
1323  Loop *ParentL = L.getParentLoop();
1324 #endif
1325 
1326  // The API here is quite complex to call, but there are only two interesting
1327  // states we support: partial and full (or "simple") unrolling. However, to
1328  // enable these things we actually pass "None" in for the optional to avoid
1329  // providing an explicit choice.
1330  Optional<bool> AllowPartialParam, RuntimeParam, UpperBoundParam,
1331  AllowPeeling;
1332  // Check if the profile summary indicates that the profiled application
1333  // has a huge working set size, in which case we disable peeling to avoid
1334  // bloating it further.
1335  if (PSI && PSI->hasHugeWorkingSetSize())
1336  AllowPeeling = false;
1337  std::string LoopName = L.getName();
1338  LoopUnrollResult Result =
1339  tryToUnrollLoop(&L, DT, &LI, SE, TTI, AC, ORE,
1340  /*PreserveLCSSA*/ true, OptLevel, /*Count*/ None,
1341  /*Threshold*/ None, AllowPartialParam, RuntimeParam,
1342  UpperBoundParam, AllowPeeling);
1343  Changed |= Result != LoopUnrollResult::Unmodified;
1344 
1345  // The parent must not be damaged by unrolling!
1346 #ifndef NDEBUG
1347  if (Result != LoopUnrollResult::Unmodified && ParentL)
1348  ParentL->verifyLoop();
1349 #endif
1350 
1351  // Clear any cached analysis results for L if we removed it completely.
1352  if (LAM && Result == LoopUnrollResult::FullyUnrolled)
1353  LAM->clear(L, LoopName);
1354  }
1355 
1356  if (!Changed)
1357  return PreservedAnalyses::all();
1358 
1360 }
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:81
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value *> &EphValues)
Collect a loop&#39;s ephemeral values (those used only by an assume or similar intrinsics in the loop)...
Definition: CodeMetrics.cpp:72
uint64_t CallInst * C
unsigned getSmallConstantTripCount(const Loop *L)
Returns the maximum trip count of the loop if it is a single-exit loop and we can compute a small max...
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
Diagnostic information for missed-optimization remarks.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:157
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
unsigned getSmallConstantTripMultiple(const Loop *L)
Returns the largest constant divisor of the trip count of the loop if it is a single-exit loop and we...
DiagnosticInfoOptimizationBase::Argument NV
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:687
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:115
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:78
This header provides classes for managing a pipeline of passes over loops in LLVM IR...
bool convergent
True if this function contains a call to a convergent function.
Definition: CodeMetrics.h:57
static bool HasUnrollEnablePragma(const Loop *L)
bool isLCSSAForm(DominatorTree &DT) const
Return true if the Loop is in LCSSA form.
Definition: LoopInfo.cpp:175
Pass * createLoopUnrollPass(int OptLevel=2, int Threshold=-1, int Count=-1, int AllowPartial=-1, int Runtime=-1, int UpperBound=-1, int AllowPeeling=-1)
Implements a dense probed hash-table based set.
Definition: DenseSet.h:221
Analysis providing profile information.
The main scalar evolution driver.
#define DEBUG_TYPE
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold, but used for partial/runtime unrolling (set to UINT_MAX to disable).
static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost, unsigned MaxPercentThresholdBoost)
This file contains the declarations for metadata subclasses.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:106
An immutable pass that tracks lazily created AssumptionCache objects.
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
A cache of .assume calls within a function.
Analysis pass providing the TargetTransformInfo.
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
unsigned second
unsigned NumInlineCandidates
The number of calls to internal functions with a single caller.
Definition: CodeMetrics.h:78
Metadata node.
Definition: Metadata.h:862
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:225
F(f)
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1067
static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, int OptLevel, Optional< unsigned > UserThreshold, Optional< unsigned > UserCount, Optional< bool > UserAllowPartial, Optional< bool > UserRuntime, Optional< bool > UserUpperBound, Optional< bool > UserAllowPeeling)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
bool notDuplicatable
True if this function cannot be duplicated.
Definition: CodeMetrics.h:54
static cl::opt< unsigned > UnrollFullMaxCount("unroll-full-max-count", cl::Hidden, cl::desc("Set the max unroll count for full unrolling, for testing purposes"))
unsigned getSmallConstantMaxTripCount(const Loop *L)
Returns the upper bound of the loop trip count as a normal unsigned value.
void addChildLoops(ArrayRef< Loop *> NewChildLoops)
Loop passes should use this method to indicate they have added new child loops of the current loop...
static cl::opt< unsigned > UnrollPartialThreshold("unroll-partial-threshold", cl::Hidden, cl::desc("The cost threshold for partial loop unrolling"))
bool formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
Definition: LCSSA.cpp:338
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:191
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:3682
static cl::opt< bool > UnrollRevisitChildLoops("unroll-revisit-child-loops", cl::Hidden, cl::desc("Enqueue and re-visit child loops in the loop PM after unrolling. " "This shouldn't typically be needed as child loops (or their " "clones) were already visited."))
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
Hexagon Hardware Loops
bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
static cl::opt< unsigned > FlatLoopTripCountThreshold("flat-loop-tripcount-threshold", cl::init(5), cl::Hidden, cl::desc("If the runtime tripcount for the loop is lower than the " "threshold, the loop is considered as flat and will be less " "aggressively unrolled."))
static bool HasUnrollFullPragma(const Loop *L)
bool AllowPeeling
Allow peeling off loop iterations for loops with low dynamic tripcount.
Pass * createSimpleLoopUnrollPass(int OptLevel=2)
static cl::opt< unsigned > UnrollCount("unroll-count", cl::Hidden, cl::desc("Use this unroll count for all loops including those with " "unroll_count pragma values, for testing purposes"))
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
unsigned FullUnrollMaxCount
Set the maximum unrolling factor for full unrolling.
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:933
static cl::opt< bool > UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden, cl::desc("Unroll loops with run-time trip counts"))
static Optional< EstimatedUnrollCost > analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE, const SmallPtrSetImpl< const Value *> &EphValues, const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize)
Figure out if the loop is worth full unrolling.
BlockT * getHeader() const
Definition: LoopInfo.h:100
static MDNode * GetUnrollMetadataForLoop(const Loop *L, StringRef Name)
void computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, unsigned &TripCount, ScalarEvolution &SE)
void visit(Iterator Start, Iterator End)
Definition: InstVisitor.h:90
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:142
static cl::opt< unsigned > UnrollMaxUpperBound("unroll-max-upperbound", cl::init(8), cl::Hidden, cl::desc("The max of trip count upper bound that is considered in unrolling"))
static bool isEqual(const Function &Caller, const Function &Callee)
This header provides classes for managing per-loop analyses.
static cl::opt< bool > UnrollAllowRemainder("unroll-allow-remainder", cl::Hidden, cl::desc("Allow generation of a loop remainder (extra iterations) " "when unrolling a loop."))
static cl::opt< unsigned > UnrollMaxPercentThresholdBoost("unroll-max-percent-threshold-boost", cl::init(400), cl::Hidden, cl::desc("The maximum 'boost' (represented as a percentage >= 100) applied " "to the threshold when aggressively unrolling a loop due to the " "dynamic cost savings. If completely unrolling a loop will reduce " "the total runtime from X to Y, we boost the loop unroll " "threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, " "X/Y). This limit avoids excessive code bloat."))
void initializeLoopUnrollPass(PassRegistry &)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:142
bool AllowRemainder
Allow generation of a loop remainder (extra iterations after unroll).
The loop was fully unrolled into straight-line code.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
static cl::opt< bool > UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden, cl::desc("Allows loops to be peeled when the dynamic " "trip count is known to be low."))
Subclasses of this class are all able to terminate a basic block.
Definition: InstrTypes.h:55
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop...
Definition: LoopInfo.h:197
Conditional or Unconditional Branch instruction.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
This is an important base class in LLVM.
Definition: Constant.h:42
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:116
static cl::opt< unsigned > UnrollMaxCount("unroll-max-count", cl::Hidden, cl::desc("Set the max unroll count for partial and runtime unrolling, for" "testing purposes"))
This file contains the declarations for the subclasses of Constant, which represent the different fla...
iterator end() const
Definition: LoopInfo.h:654
void setLoopAlreadyUnrolled()
Add llvm.loop.unroll.disable to this loop&#39;s loop id metadata.
Definition: LoopInfo.cpp:271
Machine Trace Metrics
char & LCSSAID
Definition: LCSSA.cpp:418
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
Represent the analysis usage information of a pass.
static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value *> &EphValues, unsigned BEInsns)
ApproximateLoopSize - Approximate the size of the loop.
void analyzeBasicBlock(const BasicBlock *BB, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value *> &EphValues)
Add information about a block to the current state.
bool optForSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:579
static cl::opt< unsigned > UnrollThreshold("unroll-threshold", cl::Hidden, cl::desc("The cost threshold for loop unrolling"))
op_range operands()
Definition: User.h:238
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
static bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const SmallPtrSetImpl< const Value *> &EphValues, OptimizationRemarkEmitter *ORE, unsigned &TripCount, unsigned MaxTripCount, unsigned &TripMultiple, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound)
Optional< unsigned > getLoopEstimatedTripCount(Loop *L)
Get a loop&#39;s estimated trip count based on branch weight metadata.
Definition: LoopUtils.cpp:1486
static cl::opt< unsigned > UnrollPeelCount("unroll-peel-count", cl::Hidden, cl::desc("Set the unroll peeling count, for testing purposes"))
void addSiblingLoops(ArrayRef< Loop *> NewSibLoops)
Loop passes should use this method to indicate they have added new sibling loops to the current loop...
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:345
void markLoopAsDeleted(Loop &L, llvm::StringRef Name)
Loop passes should use this method to indicate they have deleted a loop from the nest.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
A function analysis which provides an AssumptionCache.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:110
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:298
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file. ...
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
static cl::opt< bool > UnrollAllowPartial("unroll-allow-partial", cl::Hidden, cl::desc("Allows loops to be partially unrolled until " "-unroll-threshold loop size is reached."))
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:1062
static cl::opt< unsigned > PragmaUnrollThreshold("pragma-unroll-threshold", cl::init(16 *1024), cl::Hidden, cl::desc("Unrolled size limit for loops with an unroll(full) or " "unroll_count pragma."))
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:862
iterator begin() const
Definition: LoopInfo.h:142
Utility to calculate the size and a few similar metrics for a set of basic blocks.
Definition: CodeMetrics.h:42
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:48
static unsigned UnrollCountPragmaValue(const Loop *L)
unsigned DefaultUnrollRuntimeCount
Default unroll count for loops with run-time trip count.
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:383
void markLoopAsDeleted(Loop &L)
Definition: LoopPass.cpp:142
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
static uint64_t getUnrolledLoopSize(unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2&#39;s erase_if which is equivalent t...
Definition: STLExtras.h:963
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
iterator begin() const
Definition: LoopInfo.h:653
static bool HasUnrollDisablePragma(const Loop *L)
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
static cl::opt< bool > UnrollUnrollRemainder("unroll-remainder", cl::Hidden, cl::desc("Allow the loop remainder to be unrolled."))
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:396
MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
Definition: LoopUnroll.cpp:879
Analysis pass that exposes the ScalarEvolution for a function.
static const unsigned NoThreshold
A magic value for use with the Threshold parameter to indicate that the loop unroll should be perform...
LoopT * getParentLoop() const
Definition: LoopInfo.h:101
bool hasValue() const
Definition: Optional.h:183
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to...
Definition: LoopInfo.cpp:191
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:213
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:120
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
unsigned Threshold
The cost threshold for the unrolled loop.
int getUserCost(const User *U, ArrayRef< const Value *> Operands) const
Estimate the cost of a given IR user when lowered.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:61
StringRef getName() const
Definition: LoopInfo.h:577
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:439
Parameters that control the generic loop unrolling transformation.
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable)...
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:224
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
#define I(x, y, z)
Definition: MD5.cpp:58
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:73
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass&#39;s AnalysisUsage.
Definition: LoopUtils.cpp:1223
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:706
iterator end() const
Definition: LoopInfo.h:143
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
static SmallVector< Loop *, 8 > appendLoopsToWorklist(RangeT &&Loops)
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:181
static LoopUnrollResult tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache &AC, OptimizationRemarkEmitter &ORE, bool PreserveLCSSA, int OptLevel, Optional< unsigned > ProvidedCount, Optional< unsigned > ProvidedThreshold, Optional< bool > ProvidedAllowPartial, Optional< bool > ProvidedRuntime, Optional< bool > ProvidedUpperBound, Optional< bool > ProvidedAllowPeeling)
bool empty() const
Definition: LoopInfo.h:146
static int const Threshold
TODO: Write a new FunctionPass AliasAnalysis so that it can keep a cache.
Multiway switch.
static bool HasRuntimeUnrollDisablePragma(const Loop *L)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool hasHugeWorkingSetSize()
Returns true if the working set size of the code is considered huge.
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:565
LLVM Value Representation.
Definition: Value.h:73
succ_range successors(BasicBlock *BB)
Definition: CFG.h:143
unsigned MaxPercentThresholdBoost
If complete unrolling will reduce the cost of the loop, we will boost the Threshold by a certain perc...
The loop was not modified.
static cl::opt< unsigned > UnrollMaxIterationsCountToAnalyze("unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden, cl::desc("Don't allow loop unrolling to simulate more than this number of" "iterations when checking full unroll profitability"))
#define DEBUG(X)
Definition: Debug.h:118
bool isBackedgeTakenCountMaxOrZero(const Loop *L)
Return true if the backedge taken count is either the value returned by getMaxBackedgeTakenCount or z...
bool UpperBound
Allow using trip count upper bound to unroll loops.
void verifyLoop() const
Verify loop structure.
Definition: LoopInfoImpl.h:227
void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP) const
Get target-customized preferences for the generic loop unrolling transformation.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
A container for analyses that lazily runs them and caches their results.
This pass exposes codegen information to IR-level passes.
This header defines various interfaces for pass management in LLVM.
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1073
unsigned NumInsts
Number of instructions in the analyzed blocks.
Definition: CodeMetrics.h:63
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:156
LoopUnrollResult UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, bool AllowRuntime, bool AllowExpensiveTripCount, bool PreserveCondBr, bool PreserveOnlyFirst, unsigned TripMultiple, unsigned PeelCount, bool UnrollRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA)
Unroll the given loop by Count.
Definition: LoopUnroll.cpp:291
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:50
The optimization diagnostic interface.
bool hasProfileData() const
Return true if the function is annotated with profile data.
Definition: Function.h:289
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:946
LoopUnrollResult
Represents the result of a UnrollLoop invocation.
Definition: UnrollLoop.h:41