LLVM  10.0.0svn
LoopUnrollPass.cpp
Go to the documentation of this file.
1 //===- LoopUnroll.cpp - Loop unroller pass --------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass implements a simple loop unroller. It works best when loops have
10 // been canonicalized by the -indvars pass, allowing it to determine the trip
11 // counts of loops easily.
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/DenseMapInfo.h"
17 #include "llvm/ADT/DenseSet.h"
18 #include "llvm/ADT/None.h"
19 #include "llvm/ADT/Optional.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SetVector.h"
22 #include "llvm/ADT/SmallPtrSet.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/StringRef.h"
30 #include "llvm/Analysis/LoopInfo.h"
31 #include "llvm/Analysis/LoopPass.h"
37 #include "llvm/IR/BasicBlock.h"
38 #include "llvm/IR/CFG.h"
39 #include "llvm/IR/Constant.h"
40 #include "llvm/IR/Constants.h"
41 #include "llvm/IR/DiagnosticInfo.h"
42 #include "llvm/IR/Dominators.h"
43 #include "llvm/IR/Function.h"
44 #include "llvm/IR/Instruction.h"
45 #include "llvm/IR/Instructions.h"
46 #include "llvm/IR/IntrinsicInst.h"
47 #include "llvm/IR/Metadata.h"
48 #include "llvm/IR/PassManager.h"
49 #include "llvm/Pass.h"
50 #include "llvm/Support/Casting.h"
52 #include "llvm/Support/Debug.h"
55 #include "llvm/Transforms/Scalar.h"
57 #include "llvm/Transforms/Utils.h"
62 #include <algorithm>
63 #include <cassert>
64 #include <cstdint>
65 #include <limits>
66 #include <string>
67 #include <tuple>
68 #include <utility>
69 
70 using namespace llvm;
71 
72 #define DEBUG_TYPE "loop-unroll"
73 
75  "forget-scev-loop-unroll", cl::init(false), cl::Hidden,
76  cl::desc("Forget everything in SCEV when doing LoopUnroll, instead of just"
77  " the current top-most loop. This is somtimes preferred to reduce"
78  " compile time."));
79 
80 static cl::opt<unsigned>
81  UnrollThreshold("unroll-threshold", cl::Hidden,
82  cl::desc("The cost threshold for loop unrolling"));
83 
85  "unroll-partial-threshold", cl::Hidden,
86  cl::desc("The cost threshold for partial loop unrolling"));
87 
89  "unroll-max-percent-threshold-boost", cl::init(400), cl::Hidden,
90  cl::desc("The maximum 'boost' (represented as a percentage >= 100) applied "
91  "to the threshold when aggressively unrolling a loop due to the "
92  "dynamic cost savings. If completely unrolling a loop will reduce "
93  "the total runtime from X to Y, we boost the loop unroll "
94  "threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, "
95  "X/Y). This limit avoids excessive code bloat."));
96 
98  "unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden,
99  cl::desc("Don't allow loop unrolling to simulate more than this number of"
100  "iterations when checking full unroll profitability"));
101 
103  "unroll-count", cl::Hidden,
104  cl::desc("Use this unroll count for all loops including those with "
105  "unroll_count pragma values, for testing purposes"));
106 
108  "unroll-max-count", cl::Hidden,
109  cl::desc("Set the max unroll count for partial and runtime unrolling, for"
110  "testing purposes"));
111 
113  "unroll-full-max-count", cl::Hidden,
114  cl::desc(
115  "Set the max unroll count for full unrolling, for testing purposes"));
116 
118  "unroll-peel-count", cl::Hidden,
119  cl::desc("Set the unroll peeling count, for testing purposes"));
120 
121 static cl::opt<bool>
122  UnrollAllowPartial("unroll-allow-partial", cl::Hidden,
123  cl::desc("Allows loops to be partially unrolled until "
124  "-unroll-threshold loop size is reached."));
125 
127  "unroll-allow-remainder", cl::Hidden,
128  cl::desc("Allow generation of a loop remainder (extra iterations) "
129  "when unrolling a loop."));
130 
131 static cl::opt<bool>
132  UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden,
133  cl::desc("Unroll loops with run-time trip counts"));
134 
136  "unroll-max-upperbound", cl::init(8), cl::Hidden,
137  cl::desc(
138  "The max of trip count upper bound that is considered in unrolling"));
139 
141  "pragma-unroll-threshold", cl::init(16 * 1024), cl::Hidden,
142  cl::desc("Unrolled size limit for loops with an unroll(full) or "
143  "unroll_count pragma."));
144 
146  "flat-loop-tripcount-threshold", cl::init(5), cl::Hidden,
147  cl::desc("If the runtime tripcount for the loop is lower than the "
148  "threshold, the loop is considered as flat and will be less "
149  "aggressively unrolled."));
150 
151 static cl::opt<bool>
152  UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden,
153  cl::desc("Allows loops to be peeled when the dynamic "
154  "trip count is known to be low."));
155 
157  "unroll-remainder", cl::Hidden,
158  cl::desc("Allow the loop remainder to be unrolled."));
159 
160 // This option isn't ever intended to be enabled, it serves to allow
161 // experiments to check the assumptions about when this kind of revisit is
162 // necessary.
164  "unroll-revisit-child-loops", cl::Hidden,
165  cl::desc("Enqueue and re-visit child loops in the loop PM after unrolling. "
166  "This shouldn't typically be needed as child loops (or their "
167  "clones) were already visited."));
168 
169 /// A magic value for use with the Threshold parameter to indicate
170 /// that the loop unroll should be performed regardless of how much
171 /// code expansion would result.
173 
174 /// Gather the various unrolling parameters based on the defaults, compiler
175 /// flags, TTI overrides and user specified parameters.
177  Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI,
178  BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel,
179  Optional<unsigned> UserThreshold, Optional<unsigned> UserCount,
180  Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
181  Optional<bool> UserUpperBound, Optional<bool> UserAllowPeeling,
182  Optional<bool> UserAllowProfileBasedPeeling) {
184 
185  // Set up the defaults
186  UP.Threshold = OptLevel > 2 ? 300 : 150;
187  UP.MaxPercentThresholdBoost = 400;
188  UP.OptSizeThreshold = 0;
189  UP.PartialThreshold = 150;
191  UP.Count = 0;
192  UP.PeelCount = 0;
196  UP.BEInsns = 2;
197  UP.Partial = false;
198  UP.Runtime = false;
199  UP.AllowRemainder = true;
200  UP.UnrollRemainder = false;
201  UP.AllowExpensiveTripCount = false;
202  UP.Force = false;
203  UP.UpperBound = false;
204  UP.AllowPeeling = true;
205  UP.UnrollAndJam = false;
206  UP.PeelProfiledIterations = true;
208 
209  // Override with any target specific settings
210  TTI.getUnrollingPreferences(L, SE, UP);
211 
212  // Apply size attributes
213  bool OptForSize = L->getHeader()->getParent()->hasOptSize() ||
215  if (OptForSize) {
216  UP.Threshold = UP.OptSizeThreshold;
218  UP.MaxPercentThresholdBoost = 100;
219  }
220 
221  // Apply any user values specified by cl::opt
222  if (UnrollThreshold.getNumOccurrences() > 0)
224  if (UnrollPartialThreshold.getNumOccurrences() > 0)
226  if (UnrollMaxPercentThresholdBoost.getNumOccurrences() > 0)
228  if (UnrollMaxCount.getNumOccurrences() > 0)
230  if (UnrollFullMaxCount.getNumOccurrences() > 0)
232  if (UnrollPeelCount.getNumOccurrences() > 0)
234  if (UnrollAllowPartial.getNumOccurrences() > 0)
236  if (UnrollAllowRemainder.getNumOccurrences() > 0)
238  if (UnrollRuntime.getNumOccurrences() > 0)
239  UP.Runtime = UnrollRuntime;
240  if (UnrollMaxUpperBound == 0)
241  UP.UpperBound = false;
242  if (UnrollAllowPeeling.getNumOccurrences() > 0)
244  if (UnrollUnrollRemainder.getNumOccurrences() > 0)
246 
247  // Apply user values provided by argument
248  if (UserThreshold.hasValue()) {
249  UP.Threshold = *UserThreshold;
250  UP.PartialThreshold = *UserThreshold;
251  }
252  if (UserCount.hasValue())
253  UP.Count = *UserCount;
254  if (UserAllowPartial.hasValue())
255  UP.Partial = *UserAllowPartial;
256  if (UserRuntime.hasValue())
257  UP.Runtime = *UserRuntime;
258  if (UserUpperBound.hasValue())
259  UP.UpperBound = *UserUpperBound;
260  if (UserAllowPeeling.hasValue())
261  UP.AllowPeeling = *UserAllowPeeling;
262  if (UserAllowProfileBasedPeeling.hasValue())
263  UP.PeelProfiledIterations = *UserAllowProfileBasedPeeling;
264 
265  return UP;
266 }
267 
268 namespace {
269 
270 /// A struct to densely store the state of an instruction after unrolling at
271 /// each iteration.
272 ///
273 /// This is designed to work like a tuple of <Instruction *, int> for the
274 /// purposes of hashing and lookup, but to be able to associate two boolean
275 /// states with each key.
276 struct UnrolledInstState {
277  Instruction *I;
278  int Iteration : 30;
279  unsigned IsFree : 1;
280  unsigned IsCounted : 1;
281 };
282 
283 /// Hashing and equality testing for a set of the instruction states.
284 struct UnrolledInstStateKeyInfo {
285  using PtrInfo = DenseMapInfo<Instruction *>;
287 
288  static inline UnrolledInstState getEmptyKey() {
289  return {PtrInfo::getEmptyKey(), 0, 0, 0};
290  }
291 
292  static inline UnrolledInstState getTombstoneKey() {
293  return {PtrInfo::getTombstoneKey(), 0, 0, 0};
294  }
295 
296  static inline unsigned getHashValue(const UnrolledInstState &S) {
297  return PairInfo::getHashValue({S.I, S.Iteration});
298  }
299 
300  static inline bool isEqual(const UnrolledInstState &LHS,
301  const UnrolledInstState &RHS) {
302  return PairInfo::isEqual({LHS.I, LHS.Iteration}, {RHS.I, RHS.Iteration});
303  }
304 };
305 
306 struct EstimatedUnrollCost {
307  /// The estimated cost after unrolling.
308  unsigned UnrolledCost;
309 
310  /// The estimated dynamic cost of executing the instructions in the
311  /// rolled form.
312  unsigned RolledDynamicCost;
313 };
314 
315 } // end anonymous namespace
316 
317 /// Figure out if the loop is worth full unrolling.
318 ///
319 /// Complete loop unrolling can make some loads constant, and we need to know
320 /// if that would expose any further optimization opportunities. This routine
321 /// estimates this optimization. It computes cost of unrolled loop
322 /// (UnrolledCost) and dynamic cost of the original loop (RolledDynamicCost). By
323 /// dynamic cost we mean that we won't count costs of blocks that are known not
324 /// to be executed (i.e. if we have a branch in the loop and we know that at the
325 /// given iteration its condition would be resolved to true, we won't add up the
326 /// cost of the 'false'-block).
327 /// \returns Optional value, holding the RolledDynamicCost and UnrolledCost. If
328 /// the analysis failed (no benefits expected from the unrolling, or the loop is
329 /// too big to analyze), the returned value is None.
331  const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE,
332  const SmallPtrSetImpl<const Value *> &EphValues,
333  const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize) {
334  // We want to be able to scale offsets by the trip count and add more offsets
335  // to them without checking for overflows, and we already don't want to
336  // analyze *massive* trip counts, so we force the max to be reasonably small.
338  (unsigned)(std::numeric_limits<int>::max() / 2) &&
339  "The unroll iterations max is too large!");
340 
341  // Only analyze inner loops. We can't properly estimate cost of nested loops
342  // and we won't visit inner loops again anyway.
343  if (!L->empty())
344  return None;
345 
346  // Don't simulate loops with a big or unknown tripcount
347  if (!UnrollMaxIterationsCountToAnalyze || !TripCount ||
349  return None;
350 
353  DenseMap<Value *, Constant *> SimplifiedValues;
354  SmallVector<std::pair<Value *, Constant *>, 4> SimplifiedInputValues;
355 
356  // The estimated cost of the unrolled form of the loop. We try to estimate
357  // this by simplifying as much as we can while computing the estimate.
358  unsigned UnrolledCost = 0;
359 
360  // We also track the estimated dynamic (that is, actually executed) cost in
361  // the rolled form. This helps identify cases when the savings from unrolling
362  // aren't just exposing dead control flows, but actual reduced dynamic
363  // instructions due to the simplifications which we expect to occur after
364  // unrolling.
365  unsigned RolledDynamicCost = 0;
366 
367  // We track the simplification of each instruction in each iteration. We use
368  // this to recursively merge costs into the unrolled cost on-demand so that
369  // we don't count the cost of any dead code. This is essentially a map from
370  // <instruction, int> to <bool, bool>, but stored as a densely packed struct.
372 
373  // A small worklist used to accumulate cost of instructions from each
374  // observable and reached root in the loop.
375  SmallVector<Instruction *, 16> CostWorklist;
376 
377  // PHI-used worklist used between iterations while accumulating cost.
378  SmallVector<Instruction *, 4> PHIUsedList;
379 
380  // Helper function to accumulate cost for instructions in the loop.
381  auto AddCostRecursively = [&](Instruction &RootI, int Iteration) {
382  assert(Iteration >= 0 && "Cannot have a negative iteration!");
383  assert(CostWorklist.empty() && "Must start with an empty cost list");
384  assert(PHIUsedList.empty() && "Must start with an empty phi used list");
385  CostWorklist.push_back(&RootI);
386  for (;; --Iteration) {
387  do {
388  Instruction *I = CostWorklist.pop_back_val();
389 
390  // InstCostMap only uses I and Iteration as a key, the other two values
391  // don't matter here.
392  auto CostIter = InstCostMap.find({I, Iteration, 0, 0});
393  if (CostIter == InstCostMap.end())
394  // If an input to a PHI node comes from a dead path through the loop
395  // we may have no cost data for it here. What that actually means is
396  // that it is free.
397  continue;
398  auto &Cost = *CostIter;
399  if (Cost.IsCounted)
400  // Already counted this instruction.
401  continue;
402 
403  // Mark that we are counting the cost of this instruction now.
404  Cost.IsCounted = true;
405 
406  // If this is a PHI node in the loop header, just add it to the PHI set.
407  if (auto *PhiI = dyn_cast<PHINode>(I))
408  if (PhiI->getParent() == L->getHeader()) {
409  assert(Cost.IsFree && "Loop PHIs shouldn't be evaluated as they "
410  "inherently simplify during unrolling.");
411  if (Iteration == 0)
412  continue;
413 
414  // Push the incoming value from the backedge into the PHI used list
415  // if it is an in-loop instruction. We'll use this to populate the
416  // cost worklist for the next iteration (as we count backwards).
417  if (auto *OpI = dyn_cast<Instruction>(
418  PhiI->getIncomingValueForBlock(L->getLoopLatch())))
419  if (L->contains(OpI))
420  PHIUsedList.push_back(OpI);
421  continue;
422  }
423 
424  // First accumulate the cost of this instruction.
425  if (!Cost.IsFree) {
426  UnrolledCost += TTI.getUserCost(I);
427  LLVM_DEBUG(dbgs() << "Adding cost of instruction (iteration "
428  << Iteration << "): ");
429  LLVM_DEBUG(I->dump());
430  }
431 
432  // We must count the cost of every operand which is not free,
433  // recursively. If we reach a loop PHI node, simply add it to the set
434  // to be considered on the next iteration (backwards!).
435  for (Value *Op : I->operands()) {
436  // Check whether this operand is free due to being a constant or
437  // outside the loop.
438  auto *OpI = dyn_cast<Instruction>(Op);
439  if (!OpI || !L->contains(OpI))
440  continue;
441 
442  // Otherwise accumulate its cost.
443  CostWorklist.push_back(OpI);
444  }
445  } while (!CostWorklist.empty());
446 
447  if (PHIUsedList.empty())
448  // We've exhausted the search.
449  break;
450 
451  assert(Iteration > 0 &&
452  "Cannot track PHI-used values past the first iteration!");
453  CostWorklist.append(PHIUsedList.begin(), PHIUsedList.end());
454  PHIUsedList.clear();
455  }
456  };
457 
458  // Ensure that we don't violate the loop structure invariants relied on by
459  // this analysis.
460  assert(L->isLoopSimplifyForm() && "Must put loop into normal form first.");
461  assert(L->isLCSSAForm(DT) &&
462  "Must have loops in LCSSA form to track live-out values.");
463 
464  LLVM_DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n");
465 
466  // Simulate execution of each iteration of the loop counting instructions,
467  // which would be simplified.
468  // Since the same load will take different values on different iterations,
469  // we literally have to go through all loop's iterations.
470  for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) {
471  LLVM_DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n");
472 
473  // Prepare for the iteration by collecting any simplified entry or backedge
474  // inputs.
475  for (Instruction &I : *L->getHeader()) {
476  auto *PHI = dyn_cast<PHINode>(&I);
477  if (!PHI)
478  break;
479 
480  // The loop header PHI nodes must have exactly two input: one from the
481  // loop preheader and one from the loop latch.
482  assert(
483  PHI->getNumIncomingValues() == 2 &&
484  "Must have an incoming value only for the preheader and the latch.");
485 
486  Value *V = PHI->getIncomingValueForBlock(
487  Iteration == 0 ? L->getLoopPreheader() : L->getLoopLatch());
488  Constant *C = dyn_cast<Constant>(V);
489  if (Iteration != 0 && !C)
490  C = SimplifiedValues.lookup(V);
491  if (C)
492  SimplifiedInputValues.push_back({PHI, C});
493  }
494 
495  // Now clear and re-populate the map for the next iteration.
496  SimplifiedValues.clear();
497  while (!SimplifiedInputValues.empty())
498  SimplifiedValues.insert(SimplifiedInputValues.pop_back_val());
499 
500  UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SE, L);
501 
502  BBWorklist.clear();
503  BBWorklist.insert(L->getHeader());
504  // Note that we *must not* cache the size, this loop grows the worklist.
505  for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
506  BasicBlock *BB = BBWorklist[Idx];
507 
508  // Visit all instructions in the given basic block and try to simplify
509  // it. We don't change the actual IR, just count optimization
510  // opportunities.
511  for (Instruction &I : *BB) {
512  // These won't get into the final code - don't even try calculating the
513  // cost for them.
514  if (isa<DbgInfoIntrinsic>(I) || EphValues.count(&I))
515  continue;
516 
517  // Track this instruction's expected baseline cost when executing the
518  // rolled loop form.
519  RolledDynamicCost += TTI.getUserCost(&I);
520 
521  // Visit the instruction to analyze its loop cost after unrolling,
522  // and if the visitor returns true, mark the instruction as free after
523  // unrolling and continue.
524  bool IsFree = Analyzer.visit(I);
525  bool Inserted = InstCostMap.insert({&I, (int)Iteration,
526  (unsigned)IsFree,
527  /*IsCounted*/ false}).second;
528  (void)Inserted;
529  assert(Inserted && "Cannot have a state for an unvisited instruction!");
530 
531  if (IsFree)
532  continue;
533 
534  // Can't properly model a cost of a call.
535  // FIXME: With a proper cost model we should be able to do it.
536  if (auto *CI = dyn_cast<CallInst>(&I)) {
537  const Function *Callee = CI->getCalledFunction();
538  if (!Callee || TTI.isLoweredToCall(Callee)) {
539  LLVM_DEBUG(dbgs() << "Can't analyze cost of loop with call\n");
540  return None;
541  }
542  }
543 
544  // If the instruction might have a side-effect recursively account for
545  // the cost of it and all the instructions leading up to it.
546  if (I.mayHaveSideEffects())
547  AddCostRecursively(I, Iteration);
548 
549  // If unrolled body turns out to be too big, bail out.
550  if (UnrolledCost > MaxUnrolledLoopSize) {
551  LLVM_DEBUG(dbgs() << " Exceeded threshold.. exiting.\n"
552  << " UnrolledCost: " << UnrolledCost
553  << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize
554  << "\n");
555  return None;
556  }
557  }
558 
559  Instruction *TI = BB->getTerminator();
560 
561  // Add in the live successors by first checking whether we have terminator
562  // that may be simplified based on the values simplified by this call.
563  BasicBlock *KnownSucc = nullptr;
564  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
565  if (BI->isConditional()) {
566  if (Constant *SimpleCond =
567  SimplifiedValues.lookup(BI->getCondition())) {
568  // Just take the first successor if condition is undef
569  if (isa<UndefValue>(SimpleCond))
570  KnownSucc = BI->getSuccessor(0);
571  else if (ConstantInt *SimpleCondVal =
572  dyn_cast<ConstantInt>(SimpleCond))
573  KnownSucc = BI->getSuccessor(SimpleCondVal->isZero() ? 1 : 0);
574  }
575  }
576  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
577  if (Constant *SimpleCond =
578  SimplifiedValues.lookup(SI->getCondition())) {
579  // Just take the first successor if condition is undef
580  if (isa<UndefValue>(SimpleCond))
581  KnownSucc = SI->getSuccessor(0);
582  else if (ConstantInt *SimpleCondVal =
583  dyn_cast<ConstantInt>(SimpleCond))
584  KnownSucc = SI->findCaseValue(SimpleCondVal)->getCaseSuccessor();
585  }
586  }
587  if (KnownSucc) {
588  if (L->contains(KnownSucc))
589  BBWorklist.insert(KnownSucc);
590  else
591  ExitWorklist.insert({BB, KnownSucc});
592  continue;
593  }
594 
595  // Add BB's successors to the worklist.
596  for (BasicBlock *Succ : successors(BB))
597  if (L->contains(Succ))
598  BBWorklist.insert(Succ);
599  else
600  ExitWorklist.insert({BB, Succ});
601  AddCostRecursively(*TI, Iteration);
602  }
603 
604  // If we found no optimization opportunities on the first iteration, we
605  // won't find them on later ones too.
606  if (UnrolledCost == RolledDynamicCost) {
607  LLVM_DEBUG(dbgs() << " No opportunities found.. exiting.\n"
608  << " UnrolledCost: " << UnrolledCost << "\n");
609  return None;
610  }
611  }
612 
613  while (!ExitWorklist.empty()) {
614  BasicBlock *ExitingBB, *ExitBB;
615  std::tie(ExitingBB, ExitBB) = ExitWorklist.pop_back_val();
616 
617  for (Instruction &I : *ExitBB) {
618  auto *PN = dyn_cast<PHINode>(&I);
619  if (!PN)
620  break;
621 
622  Value *Op = PN->getIncomingValueForBlock(ExitingBB);
623  if (auto *OpI = dyn_cast<Instruction>(Op))
624  if (L->contains(OpI))
625  AddCostRecursively(*OpI, TripCount - 1);
626  }
627  }
628 
629  LLVM_DEBUG(dbgs() << "Analysis finished:\n"
630  << "UnrolledCost: " << UnrolledCost << ", "
631  << "RolledDynamicCost: " << RolledDynamicCost << "\n");
632  return {{UnrolledCost, RolledDynamicCost}};
633 }
634 
635 /// ApproximateLoopSize - Approximate the size of the loop.
637  const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent,
638  const TargetTransformInfo &TTI,
639  const SmallPtrSetImpl<const Value *> &EphValues, unsigned BEInsns) {
641  for (BasicBlock *BB : L->blocks())
642  Metrics.analyzeBasicBlock(BB, TTI, EphValues);
643  NumCalls = Metrics.NumInlineCandidates;
644  NotDuplicatable = Metrics.notDuplicatable;
645  Convergent = Metrics.convergent;
646 
647  unsigned LoopSize = Metrics.NumInsts;
648 
649  // Don't allow an estimate of size zero. This would allows unrolling of loops
650  // with huge iteration counts, which is a compile time problem even if it's
651  // not a problem for code quality. Also, the code using this size may assume
652  // that each loop has at least three instructions (likely a conditional
653  // branch, a comparison feeding that branch, and some kind of loop increment
654  // feeding that comparison instruction).
655  LoopSize = std::max(LoopSize, BEInsns + 1);
656 
657  return LoopSize;
658 }
659 
660 // Returns the loop hint metadata node with the given name (for example,
661 // "llvm.loop.unroll.count"). If no such metadata node exists, then nullptr is
662 // returned.
664  if (MDNode *LoopID = L->getLoopID())
665  return GetUnrollMetadata(LoopID, Name);
666  return nullptr;
667 }
668 
669 // Returns true if the loop has an unroll(full) pragma.
670 static bool HasUnrollFullPragma(const Loop *L) {
671  return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.full");
672 }
673 
674 // Returns true if the loop has an unroll(enable) pragma. This metadata is used
675 // for both "#pragma unroll" and "#pragma clang loop unroll(enable)" directives.
676 static bool HasUnrollEnablePragma(const Loop *L) {
677  return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.enable");
678 }
679 
680 // Returns true if the loop has an runtime unroll(disable) pragma.
681 static bool HasRuntimeUnrollDisablePragma(const Loop *L) {
682  return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.runtime.disable");
683 }
684 
685 // If loop has an unroll_count pragma return the (necessarily
686 // positive) value from the pragma. Otherwise return 0.
687 static unsigned UnrollCountPragmaValue(const Loop *L) {
688  MDNode *MD = GetUnrollMetadataForLoop(L, "llvm.loop.unroll.count");
689  if (MD) {
690  assert(MD->getNumOperands() == 2 &&
691  "Unroll count hint metadata should have two operands.");
692  unsigned Count =
693  mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
694  assert(Count >= 1 && "Unroll count must be positive.");
695  return Count;
696  }
697  return 0;
698 }
699 
700 // Computes the boosting factor for complete unrolling.
701 // If fully unrolling the loop would save a lot of RolledDynamicCost, it would
702 // be beneficial to fully unroll the loop even if unrolledcost is large. We
703 // use (RolledDynamicCost / UnrolledCost) to model the unroll benefits to adjust
704 // the unroll threshold.
705 static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost,
706  unsigned MaxPercentThresholdBoost) {
707  if (Cost.RolledDynamicCost >= std::numeric_limits<unsigned>::max() / 100)
708  return 100;
709  else if (Cost.UnrolledCost != 0)
710  // The boosting factor is RolledDynamicCost / UnrolledCost
711  return std::min(100 * Cost.RolledDynamicCost / Cost.UnrolledCost,
712  MaxPercentThresholdBoost);
713  else
714  return MaxPercentThresholdBoost;
715 }
716 
717 // Returns loop size estimation for unrolled loop.
718 static uint64_t getUnrolledLoopSize(
719  unsigned LoopSize,
721  assert(LoopSize >= UP.BEInsns && "LoopSize should not be less than BEInsns!");
722  return (uint64_t)(LoopSize - UP.BEInsns) * UP.Count + UP.BEInsns;
723 }
724 
725 // Returns true if unroll count was set explicitly.
726 // Calculates unroll count and writes it to UP.Count.
727 // Unless IgnoreUser is true, will also use metadata and command-line options
728 // that are specific to to the LoopUnroll pass (which, for instance, are
729 // irrelevant for the LoopUnrollAndJam pass).
730 // FIXME: This function is used by LoopUnroll and LoopUnrollAndJam, but consumes
731 // many LoopUnroll-specific options. The shared functionality should be
732 // refactored into it own function.
734  Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI,
735  ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues,
736  OptimizationRemarkEmitter *ORE, unsigned &TripCount, unsigned MaxTripCount,
737  unsigned &TripMultiple, unsigned LoopSize,
738  TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound) {
739 
740  // Check for explicit Count.
741  // 1st priority is unroll count set by "unroll-count" option.
742  bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
743  if (UserUnrollCount) {
744  UP.Count = UnrollCount;
745  UP.AllowExpensiveTripCount = true;
746  UP.Force = true;
747  if (UP.AllowRemainder && getUnrolledLoopSize(LoopSize, UP) < UP.Threshold)
748  return true;
749  }
750 
751  // 2nd priority is unroll count set by pragma.
752  unsigned PragmaCount = UnrollCountPragmaValue(L);
753  if (PragmaCount > 0) {
754  UP.Count = PragmaCount;
755  UP.Runtime = true;
756  UP.AllowExpensiveTripCount = true;
757  UP.Force = true;
758  if ((UP.AllowRemainder || (TripMultiple % PragmaCount == 0)) &&
760  return true;
761  }
762  bool PragmaFullUnroll = HasUnrollFullPragma(L);
763  if (PragmaFullUnroll && TripCount != 0) {
764  UP.Count = TripCount;
765  if (getUnrolledLoopSize(LoopSize, UP) < PragmaUnrollThreshold)
766  return false;
767  }
768 
769  bool PragmaEnableUnroll = HasUnrollEnablePragma(L);
770  bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
771  PragmaEnableUnroll || UserUnrollCount;
772 
773  if (ExplicitUnroll && TripCount != 0) {
774  // If the loop has an unrolling pragma, we want to be more aggressive with
775  // unrolling limits. Set thresholds to at least the PragmaUnrollThreshold
776  // value which is larger than the default limits.
777  UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold);
778  UP.PartialThreshold =
779  std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold);
780  }
781 
782  // 3rd priority is full unroll count.
783  // Full unroll makes sense only when TripCount or its upper bound could be
784  // statically calculated.
785  // Also we need to check if we exceed FullUnrollMaxCount.
786  // If using the upper bound to unroll, TripMultiple should be set to 1 because
787  // we do not know when loop may exit.
788  // MaxTripCount and ExactTripCount cannot both be non zero since we only
789  // compute the former when the latter is zero.
790  unsigned ExactTripCount = TripCount;
791  assert((ExactTripCount == 0 || MaxTripCount == 0) &&
792  "ExtractTripCount and MaxTripCount cannot both be non zero.");
793  unsigned FullUnrollTripCount = ExactTripCount ? ExactTripCount : MaxTripCount;
794  UP.Count = FullUnrollTripCount;
795  if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) {
796  // When computing the unrolled size, note that BEInsns are not replicated
797  // like the rest of the loop body.
798  if (getUnrolledLoopSize(LoopSize, UP) < UP.Threshold) {
799  UseUpperBound = (MaxTripCount == FullUnrollTripCount);
800  TripCount = FullUnrollTripCount;
801  TripMultiple = UP.UpperBound ? 1 : TripMultiple;
802  return ExplicitUnroll;
803  } else {
804  // The loop isn't that small, but we still can fully unroll it if that
805  // helps to remove a significant number of instructions.
806  // To check that, run additional analysis on the loop.
808  L, FullUnrollTripCount, DT, SE, EphValues, TTI,
809  UP.Threshold * UP.MaxPercentThresholdBoost / 100)) {
810  unsigned Boost =
812  if (Cost->UnrolledCost < UP.Threshold * Boost / 100) {
813  UseUpperBound = (MaxTripCount == FullUnrollTripCount);
814  TripCount = FullUnrollTripCount;
815  TripMultiple = UP.UpperBound ? 1 : TripMultiple;
816  return ExplicitUnroll;
817  }
818  }
819  }
820  }
821 
822  // 4th priority is loop peeling.
823  computePeelCount(L, LoopSize, UP, TripCount, SE);
824  if (UP.PeelCount) {
825  UP.Runtime = false;
826  UP.Count = 1;
827  return ExplicitUnroll;
828  }
829 
830  // 5th priority is partial unrolling.
831  // Try partial unroll only when TripCount could be statically calculated.
832  if (TripCount) {
833  UP.Partial |= ExplicitUnroll;
834  if (!UP.Partial) {
835  LLVM_DEBUG(dbgs() << " will not try to unroll partially because "
836  << "-unroll-allow-partial not given\n");
837  UP.Count = 0;
838  return false;
839  }
840  if (UP.Count == 0)
841  UP.Count = TripCount;
842  if (UP.PartialThreshold != NoThreshold) {
843  // Reduce unroll count to be modulo of TripCount for partial unrolling.
844  if (getUnrolledLoopSize(LoopSize, UP) > UP.PartialThreshold)
845  UP.Count =
846  (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) /
847  (LoopSize - UP.BEInsns);
848  if (UP.Count > UP.MaxCount)
849  UP.Count = UP.MaxCount;
850  while (UP.Count != 0 && TripCount % UP.Count != 0)
851  UP.Count--;
852  if (UP.AllowRemainder && UP.Count <= 1) {
853  // If there is no Count that is modulo of TripCount, set Count to
854  // largest power-of-two factor that satisfies the threshold limit.
855  // As we'll create fixup loop, do the type of unrolling only if
856  // remainder loop is allowed.
858  while (UP.Count != 0 &&
859  getUnrolledLoopSize(LoopSize, UP) > UP.PartialThreshold)
860  UP.Count >>= 1;
861  }
862  if (UP.Count < 2) {
863  if (PragmaEnableUnroll)
864  ORE->emit([&]() {
866  "UnrollAsDirectedTooLarge",
867  L->getStartLoc(), L->getHeader())
868  << "Unable to unroll loop as directed by unroll(enable) "
869  "pragma "
870  "because unrolled size is too large.";
871  });
872  UP.Count = 0;
873  }
874  } else {
875  UP.Count = TripCount;
876  }
877  if (UP.Count > UP.MaxCount)
878  UP.Count = UP.MaxCount;
879  if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
880  UP.Count != TripCount)
881  ORE->emit([&]() {
882  return OptimizationRemarkMissed(DEBUG_TYPE,
883  "FullUnrollAsDirectedTooLarge",
884  L->getStartLoc(), L->getHeader())
885  << "Unable to fully unroll loop as directed by unroll pragma "
886  "because "
887  "unrolled size is too large.";
888  });
889  return ExplicitUnroll;
890  }
891  assert(TripCount == 0 &&
892  "All cases when TripCount is constant should be covered here.");
893  if (PragmaFullUnroll)
894  ORE->emit([&]() {
896  DEBUG_TYPE, "CantFullUnrollAsDirectedRuntimeTripCount",
897  L->getStartLoc(), L->getHeader())
898  << "Unable to fully unroll loop as directed by unroll(full) "
899  "pragma "
900  "because loop has a runtime trip count.";
901  });
902 
903  // 6th priority is runtime unrolling.
904  // Don't unroll a runtime trip count loop when it is disabled.
906  UP.Count = 0;
907  return false;
908  }
909 
910  // Check if the runtime trip count is too small when profile is available.
911  if (L->getHeader()->getParent()->hasProfileData()) {
912  if (auto ProfileTripCount = getLoopEstimatedTripCount(L)) {
913  if (*ProfileTripCount < FlatLoopTripCountThreshold)
914  return false;
915  else
916  UP.AllowExpensiveTripCount = true;
917  }
918  }
919 
920  // Reduce count based on the type of unrolling and the threshold values.
921  UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount;
922  if (!UP.Runtime) {
923  LLVM_DEBUG(
924  dbgs() << " will not try to unroll loop with runtime trip count "
925  << "-unroll-runtime not given\n");
926  UP.Count = 0;
927  return false;
928  }
929  if (UP.Count == 0)
931 
932  // Reduce unroll count to be the largest power-of-two factor of
933  // the original count which satisfies the threshold limit.
934  while (UP.Count != 0 &&
935  getUnrolledLoopSize(LoopSize, UP) > UP.PartialThreshold)
936  UP.Count >>= 1;
937 
938 #ifndef NDEBUG
939  unsigned OrigCount = UP.Count;
940 #endif
941 
942  if (!UP.AllowRemainder && UP.Count != 0 && (TripMultiple % UP.Count) != 0) {
943  while (UP.Count != 0 && TripMultiple % UP.Count != 0)
944  UP.Count >>= 1;
945  LLVM_DEBUG(
946  dbgs() << "Remainder loop is restricted (that could architecture "
947  "specific or because the loop contains a convergent "
948  "instruction), so unroll count must divide the trip "
949  "multiple, "
950  << TripMultiple << ". Reducing unroll count from " << OrigCount
951  << " to " << UP.Count << ".\n");
952 
953  using namespace ore;
954 
955  if (PragmaCount > 0 && !UP.AllowRemainder)
956  ORE->emit([&]() {
958  "DifferentUnrollCountFromDirected",
959  L->getStartLoc(), L->getHeader())
960  << "Unable to unroll loop the number of times directed by "
961  "unroll_count pragma because remainder loop is restricted "
962  "(that could architecture specific or because the loop "
963  "contains a convergent instruction) and so must have an "
964  "unroll "
965  "count that divides the loop trip multiple of "
966  << NV("TripMultiple", TripMultiple) << ". Unrolling instead "
967  << NV("UnrollCount", UP.Count) << " time(s).";
968  });
969  }
970 
971  if (UP.Count > UP.MaxCount)
972  UP.Count = UP.MaxCount;
973  LLVM_DEBUG(dbgs() << " partially unrolling with count: " << UP.Count
974  << "\n");
975  if (UP.Count < 2)
976  UP.Count = 0;
977  return ExplicitUnroll;
978 }
979 
981  Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
982  const TargetTransformInfo &TTI, AssumptionCache &AC,
984  ProfileSummaryInfo *PSI, bool PreserveLCSSA, int OptLevel,
985  bool OnlyWhenForced, bool ForgetAllSCEV, Optional<unsigned> ProvidedCount,
986  Optional<unsigned> ProvidedThreshold, Optional<bool> ProvidedAllowPartial,
987  Optional<bool> ProvidedRuntime, Optional<bool> ProvidedUpperBound,
988  Optional<bool> ProvidedAllowPeeling,
989  Optional<bool> ProvidedAllowProfileBasedPeeling) {
990  LLVM_DEBUG(dbgs() << "Loop Unroll: F["
991  << L->getHeader()->getParent()->getName() << "] Loop %"
992  << L->getHeader()->getName() << "\n");
994  if (TM & TM_Disable)
996  if (!L->isLoopSimplifyForm()) {
997  LLVM_DEBUG(
998  dbgs() << " Not unrolling loop which is not in loop-simplify form.\n");
1000  }
1001 
1002  // When automtatic unrolling is disabled, do not unroll unless overridden for
1003  // this loop.
1004  if (OnlyWhenForced && !(TM & TM_Enable))
1006 
1007  bool OptForSize = L->getHeader()->getParent()->hasOptSize();
1008  unsigned NumInlineCandidates;
1009  bool NotDuplicatable;
1010  bool Convergent;
1012  L, SE, TTI, BFI, PSI, OptLevel, ProvidedThreshold, ProvidedCount,
1013  ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
1014  ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling);
1015 
1016  // Exit early if unrolling is disabled. For OptForSize, we pick the loop size
1017  // as threshold later on.
1018  if (UP.Threshold == 0 && (!UP.Partial || UP.PartialThreshold == 0) &&
1019  !OptForSize)
1021 
1023  CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
1024 
1025  unsigned LoopSize =
1026  ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent,
1027  TTI, EphValues, UP.BEInsns);
1028  LLVM_DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
1029  if (NotDuplicatable) {
1030  LLVM_DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable"
1031  << " instructions.\n");
1033  }
1034 
1035  // When optimizing for size, use LoopSize as threshold, to (fully) unroll
1036  // loops, if it does not increase code size.
1037  if (OptForSize)
1038  UP.Threshold = std::max(UP.Threshold, LoopSize);
1039 
1040  if (NumInlineCandidates != 0) {
1041  LLVM_DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
1043  }
1044 
1045  // Find trip count and trip multiple if count is not available
1046  unsigned TripCount = 0;
1047  unsigned MaxTripCount = 0;
1048  unsigned TripMultiple = 1;
1049  // If there are multiple exiting blocks but one of them is the latch, use the
1050  // latch for the trip count estimation. Otherwise insist on a single exiting
1051  // block for the trip count estimation.
1052  BasicBlock *ExitingBlock = L->getLoopLatch();
1053  if (!ExitingBlock || !L->isLoopExiting(ExitingBlock))
1054  ExitingBlock = L->getExitingBlock();
1055  if (ExitingBlock) {
1056  TripCount = SE.getSmallConstantTripCount(L, ExitingBlock);
1057  TripMultiple = SE.getSmallConstantTripMultiple(L, ExitingBlock);
1058  }
1059 
1060  // If the loop contains a convergent operation, the prelude we'd add
1061  // to do the first few instructions before we hit the unrolled loop
1062  // is unsafe -- it adds a control-flow dependency to the convergent
1063  // operation. Therefore restrict remainder loop (try unrollig without).
1064  //
1065  // TODO: This is quite conservative. In practice, convergent_op()
1066  // is likely to be called unconditionally in the loop. In this
1067  // case, the program would be ill-formed (on most architectures)
1068  // unless n were the same on all threads in a thread group.
1069  // Assuming n is the same on all threads, any kind of unrolling is
1070  // safe. But currently llvm's notion of convergence isn't powerful
1071  // enough to express this.
1072  if (Convergent)
1073  UP.AllowRemainder = false;
1074 
1075  // Try to find the trip count upper bound if we cannot find the exact trip
1076  // count.
1077  bool MaxOrZero = false;
1078  if (!TripCount) {
1079  MaxTripCount = SE.getSmallConstantMaxTripCount(L);
1080  MaxOrZero = SE.isBackedgeTakenCountMaxOrZero(L);
1081  // We can unroll by the upper bound amount if it's generally allowed or if
1082  // we know that the loop is executed either the upper bound or zero times.
1083  // (MaxOrZero unrolling keeps only the first loop test, so the number of
1084  // loop tests remains the same compared to the non-unrolled version, whereas
1085  // the generic upper bound unrolling keeps all but the last loop test so the
1086  // number of loop tests goes up which may end up being worse on targets with
1087  // constrained branch predictor resources so is controlled by an option.)
1088  // In addition we only unroll small upper bounds.
1089  if (!(UP.UpperBound || MaxOrZero) || MaxTripCount > UnrollMaxUpperBound) {
1090  MaxTripCount = 0;
1091  }
1092  }
1093 
1094  // computeUnrollCount() decides whether it is beneficial to use upper bound to
1095  // fully unroll the loop.
1096  bool UseUpperBound = false;
1097  bool IsCountSetExplicitly = computeUnrollCount(
1098  L, TTI, DT, LI, SE, EphValues, &ORE, TripCount, MaxTripCount,
1099  TripMultiple, LoopSize, UP, UseUpperBound);
1100  if (!UP.Count)
1102  // Unroll factor (Count) must be less or equal to TripCount.
1103  if (TripCount && UP.Count > TripCount)
1104  UP.Count = TripCount;
1105 
1106  // Save loop properties before it is transformed.
1107  MDNode *OrigLoopID = L->getLoopID();
1108 
1109  // Unroll the loop.
1110  Loop *RemainderLoop = nullptr;
1111  LoopUnrollResult UnrollResult = UnrollLoop(
1112  L,
1113  {UP.Count, TripCount, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount,
1114  UseUpperBound, MaxOrZero, TripMultiple, UP.PeelCount, UP.UnrollRemainder,
1115  ForgetAllSCEV},
1116  LI, &SE, &DT, &AC, &ORE, PreserveLCSSA, &RemainderLoop);
1117  if (UnrollResult == LoopUnrollResult::Unmodified)
1119 
1120  if (RemainderLoop) {
1121  Optional<MDNode *> RemainderLoopID =
1124  if (RemainderLoopID.hasValue())
1125  RemainderLoop->setLoopID(RemainderLoopID.getValue());
1126  }
1127 
1128  if (UnrollResult != LoopUnrollResult::FullyUnrolled) {
1129  Optional<MDNode *> NewLoopID =
1132  if (NewLoopID.hasValue()) {
1133  L->setLoopID(NewLoopID.getValue());
1134 
1135  // Do not setLoopAlreadyUnrolled if loop attributes have been specified
1136  // explicitly.
1137  return UnrollResult;
1138  }
1139  }
1140 
1141  // If loop has an unroll count pragma or unrolled by explicitly set count
1142  // mark loop as unrolled to prevent unrolling beyond that requested.
1143  // If the loop was peeled, we already "used up" the profile information
1144  // we had, so we don't want to unroll or peel again.
1145  if (UnrollResult != LoopUnrollResult::FullyUnrolled &&
1146  (IsCountSetExplicitly || (UP.PeelProfiledIterations && UP.PeelCount)))
1148 
1149  return UnrollResult;
1150 }
1151 
1152 namespace {
1153 
1154 class LoopUnroll : public LoopPass {
1155 public:
1156  static char ID; // Pass ID, replacement for typeid
1157 
1158  int OptLevel;
1159 
1160  /// If false, use a cost model to determine whether unrolling of a loop is
1161  /// profitable. If true, only loops that explicitly request unrolling via
1162  /// metadata are considered. All other loops are skipped.
1163  bool OnlyWhenForced;
1164 
1165  /// If false, when SCEV is invalidated, only forget everything in the
1166  /// top-most loop (call forgetTopMostLoop), of the loop being processed.
1167  /// Otherwise, forgetAllLoops and rebuild when needed next.
1168  bool ForgetAllSCEV;
1169 
1170  Optional<unsigned> ProvidedCount;
1171  Optional<unsigned> ProvidedThreshold;
1172  Optional<bool> ProvidedAllowPartial;
1173  Optional<bool> ProvidedRuntime;
1174  Optional<bool> ProvidedUpperBound;
1175  Optional<bool> ProvidedAllowPeeling;
1176  Optional<bool> ProvidedAllowProfileBasedPeeling;
1177 
1178  LoopUnroll(int OptLevel = 2, bool OnlyWhenForced = false,
1179  bool ForgetAllSCEV = false, Optional<unsigned> Threshold = None,
1180  Optional<unsigned> Count = None,
1181  Optional<bool> AllowPartial = None, Optional<bool> Runtime = None,
1182  Optional<bool> UpperBound = None,
1183  Optional<bool> AllowPeeling = None,
1184  Optional<bool> AllowProfileBasedPeeling = None)
1185  : LoopPass(ID), OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced),
1186  ForgetAllSCEV(ForgetAllSCEV), ProvidedCount(std::move(Count)),
1187  ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial),
1188  ProvidedRuntime(Runtime), ProvidedUpperBound(UpperBound),
1189  ProvidedAllowPeeling(AllowPeeling),
1190  ProvidedAllowProfileBasedPeeling(AllowProfileBasedPeeling) {
1192  }
1193 
1194  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
1195  if (skipLoop(L))
1196  return false;
1197 
1198  Function &F = *L->getHeader()->getParent();
1199 
1200  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1201  LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1202  ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1203  const TargetTransformInfo &TTI =
1204  getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1205  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1206  // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
1207  // pass. Function analyses need to be preserved across loop transformations
1208  // but ORE cannot be preserved (see comment before the pass definition).
1209  OptimizationRemarkEmitter ORE(&F);
1210  bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
1211 
1213  L, DT, LI, SE, TTI, AC, ORE, nullptr, nullptr, PreserveLCSSA, OptLevel,
1214  OnlyWhenForced, ForgetAllSCEV, ProvidedCount, ProvidedThreshold,
1215  ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
1216  ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling);
1217 
1218  if (Result == LoopUnrollResult::FullyUnrolled)
1219  LPM.markLoopAsDeleted(*L);
1220 
1221  return Result != LoopUnrollResult::Unmodified;
1222  }
1223 
1224  /// This transformation requires natural loop information & requires that
1225  /// loop preheaders be inserted into the CFG...
1226  void getAnalysisUsage(AnalysisUsage &AU) const override {
1229  // FIXME: Loop passes are required to preserve domtree, and for now we just
1230  // recreate dom info if anything gets unrolled.
1232  }
1233 };
1234 
1235 } // end anonymous namespace
1236 
1237 char LoopUnroll::ID = 0;
1238 
1239 INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
1243 INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
1244 
1245 Pass *llvm::createLoopUnrollPass(int OptLevel, bool OnlyWhenForced,
1246  bool ForgetAllSCEV, int Threshold, int Count,
1247  int AllowPartial, int Runtime, int UpperBound,
1248  int AllowPeeling) {
1249  // TODO: It would make more sense for this function to take the optionals
1250  // directly, but that's dangerous since it would silently break out of tree
1251  // callers.
1252  return new LoopUnroll(
1253  OptLevel, OnlyWhenForced, ForgetAllSCEV,
1254  Threshold == -1 ? None : Optional<unsigned>(Threshold),
1255  Count == -1 ? None : Optional<unsigned>(Count),
1256  AllowPartial == -1 ? None : Optional<bool>(AllowPartial),
1257  Runtime == -1 ? None : Optional<bool>(Runtime),
1258  UpperBound == -1 ? None : Optional<bool>(UpperBound),
1259  AllowPeeling == -1 ? None : Optional<bool>(AllowPeeling));
1260 }
1261 
1262 Pass *llvm::createSimpleLoopUnrollPass(int OptLevel, bool OnlyWhenForced,
1263  bool ForgetAllSCEV) {
1264  return createLoopUnrollPass(OptLevel, OnlyWhenForced, ForgetAllSCEV, -1, -1,
1265  0, 0, 0, 0);
1266 }
1267 
1270  LPMUpdater &Updater) {
1271  const auto &FAM =
1272  AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
1273  Function *F = L.getHeader()->getParent();
1274 
1275  auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F);
1276  // FIXME: This should probably be optional rather than required.
1277  if (!ORE)
1279  "LoopFullUnrollPass: OptimizationRemarkEmitterAnalysis not "
1280  "cached at a higher level");
1281 
1282  // Keep track of the previous loop structure so we can identify new loops
1283  // created by unrolling.
1284  Loop *ParentL = L.getParentLoop();
1285  SmallPtrSet<Loop *, 4> OldLoops;
1286  if (ParentL)
1287  OldLoops.insert(ParentL->begin(), ParentL->end());
1288  else
1289  OldLoops.insert(AR.LI.begin(), AR.LI.end());
1290 
1291  std::string LoopName = L.getName();
1292 
1293  bool Changed = tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, *ORE,
1294  /*BFI*/ nullptr, /*PSI*/ nullptr,
1295  /*PreserveLCSSA*/ true, OptLevel,
1296  OnlyWhenForced, ForgetSCEV, /*Count*/ None,
1297  /*Threshold*/ None, /*AllowPartial*/ false,
1298  /*Runtime*/ false, /*UpperBound*/ false,
1299  /*AllowPeeling*/ false,
1300  /*AllowProfileBasedPeeling*/ false) !=
1302  if (!Changed)
1303  return PreservedAnalyses::all();
1304 
1305  // The parent must not be damaged by unrolling!
1306 #ifndef NDEBUG
1307  if (ParentL)
1308  ParentL->verifyLoop();
1309 #endif
1310 
1311  // Unrolling can do several things to introduce new loops into a loop nest:
1312  // - Full unrolling clones child loops within the current loop but then
1313  // removes the current loop making all of the children appear to be new
1314  // sibling loops.
1315  //
1316  // When a new loop appears as a sibling loop after fully unrolling,
1317  // its nesting structure has fundamentally changed and we want to revisit
1318  // it to reflect that.
1319  //
1320  // When unrolling has removed the current loop, we need to tell the
1321  // infrastructure that it is gone.
1322  //
1323  // Finally, we support a debugging/testing mode where we revisit child loops
1324  // as well. These are not expected to require further optimizations as either
1325  // they or the loop they were cloned from have been directly visited already.
1326  // But the debugging mode allows us to check this assumption.
1327  bool IsCurrentLoopValid = false;
1328  SmallVector<Loop *, 4> SibLoops;
1329  if (ParentL)
1330  SibLoops.append(ParentL->begin(), ParentL->end());
1331  else
1332  SibLoops.append(AR.LI.begin(), AR.LI.end());
1333  erase_if(SibLoops, [&](Loop *SibLoop) {
1334  if (SibLoop == &L) {
1335  IsCurrentLoopValid = true;
1336  return true;
1337  }
1338 
1339  // Otherwise erase the loop from the list if it was in the old loops.
1340  return OldLoops.count(SibLoop) != 0;
1341  });
1342  Updater.addSiblingLoops(SibLoops);
1343 
1344  if (!IsCurrentLoopValid) {
1345  Updater.markLoopAsDeleted(L, LoopName);
1346  } else {
1347  // We can only walk child loops if the current loop remained valid.
1349  // Walk *all* of the child loops.
1350  SmallVector<Loop *, 4> ChildLoops(L.begin(), L.end());
1351  Updater.addChildLoops(ChildLoops);
1352  }
1353  }
1354 
1356 }
1357 
1358 template <typename RangeT>
1360  SmallVector<Loop *, 8> Worklist;
1361  // We use an internal worklist to build up the preorder traversal without
1362  // recursion.
1363  SmallVector<Loop *, 4> PreOrderLoops, PreOrderWorklist;
1364 
1365  for (Loop *RootL : Loops) {
1366  assert(PreOrderLoops.empty() && "Must start with an empty preorder walk.");
1367  assert(PreOrderWorklist.empty() &&
1368  "Must start with an empty preorder walk worklist.");
1369  PreOrderWorklist.push_back(RootL);
1370  do {
1371  Loop *L = PreOrderWorklist.pop_back_val();
1372  PreOrderWorklist.append(L->begin(), L->end());
1373  PreOrderLoops.push_back(L);
1374  } while (!PreOrderWorklist.empty());
1375 
1376  Worklist.append(PreOrderLoops.begin(), PreOrderLoops.end());
1377  PreOrderLoops.clear();
1378  }
1379  return Worklist;
1380 }
1381 
1384  auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
1385  auto &LI = AM.getResult<LoopAnalysis>(F);
1386  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1387  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1388  auto &AC = AM.getResult<AssumptionAnalysis>(F);
1390 
1391  LoopAnalysisManager *LAM = nullptr;
1392  if (auto *LAMProxy = AM.getCachedResult<LoopAnalysisManagerFunctionProxy>(F))
1393  LAM = &LAMProxy->getManager();
1394 
1395  const ModuleAnalysisManager &MAM =
1397  ProfileSummaryInfo *PSI =
1399  auto *BFI = (PSI && PSI->hasProfileSummary()) ?
1400  &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
1401 
1402  bool Changed = false;
1403 
1404  // The unroller requires loops to be in simplified form, and also needs LCSSA.
1405  // Since simplification may add new inner loops, it has to run before the
1406  // legality and profitability checks. This means running the loop unroller
1407  // will simplify all loops, regardless of whether anything end up being
1408  // unrolled.
1409  for (auto &L : LI) {
1410  Changed |=
1411  simplifyLoop(L, &DT, &LI, &SE, &AC, nullptr, false /* PreserveLCSSA */);
1412  Changed |= formLCSSARecursively(*L, DT, &LI, &SE);
1413  }
1414 
1416 
1417  while (!Worklist.empty()) {
1418  // Because the LoopInfo stores the loops in RPO, we walk the worklist
1419  // from back to front so that we work forward across the CFG, which
1420  // for unrolling is only needed to get optimization remarks emitted in
1421  // a forward order.
1422  Loop &L = *Worklist.pop_back_val();
1423 #ifndef NDEBUG
1424  Loop *ParentL = L.getParentLoop();
1425 #endif
1426 
1427  // Check if the profile summary indicates that the profiled application
1428  // has a huge working set size, in which case we disable peeling to avoid
1429  // bloating it further.
1430  Optional<bool> LocalAllowPeeling = UnrollOpts.AllowPeeling;
1431  if (PSI && PSI->hasHugeWorkingSetSize())
1432  LocalAllowPeeling = false;
1433  std::string LoopName = L.getName();
1434  // The API here is quite complex to call and we allow to select some
1435  // flavors of unrolling during construction time (by setting UnrollOpts).
1437  &L, DT, &LI, SE, TTI, AC, ORE, BFI, PSI,
1438  /*PreserveLCSSA*/ true, UnrollOpts.OptLevel, UnrollOpts.OnlyWhenForced,
1439  UnrollOpts.ForgetSCEV, /*Count*/ None,
1440  /*Threshold*/ None, UnrollOpts.AllowPartial, UnrollOpts.AllowRuntime,
1441  UnrollOpts.AllowUpperBound, LocalAllowPeeling,
1442  UnrollOpts.AllowProfileBasedPeeling);
1443  Changed |= Result != LoopUnrollResult::Unmodified;
1444 
1445  // The parent must not be damaged by unrolling!
1446 #ifndef NDEBUG
1447  if (Result != LoopUnrollResult::Unmodified && ParentL)
1448  ParentL->verifyLoop();
1449 #endif
1450 
1451  // Clear any cached analysis results for L if we removed it completely.
1452  if (LAM && Result == LoopUnrollResult::FullyUnrolled)
1453  LAM->clear(L, LoopName);
1454  }
1455 
1456  if (!Changed)
1457  return PreservedAnalyses::all();
1458 
1460 }
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:80
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value *> &EphValues)
Collect a loop&#39;s ephemeral values (those used only by an assume or similar intrinsics in the loop)...
Definition: CodeMetrics.cpp:70
uint64_t CallInst * C
bool PeelProfiledIterations
Allow peeling basing on profile.
unsigned getSmallConstantTripCount(const Loop *L)
Returns the maximum trip count of the loop if it is a single-exit loop and we can compute a small max...
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
Pass * createSimpleLoopUnrollPass(int OptLevel=2, bool OnlyWhenForced=false, bool ForgetAllSCEV=false)
bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
Diagnostic information for missed-optimization remarks.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:211
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
unsigned getSmallConstantTripMultiple(const Loop *L)
Returns the largest constant divisor of the trip count of the loop if it is a single-exit loop and we...
DiagnosticInfoOptimizationBase::Argument NV
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:776
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:139
This class represents lattice values for constants.
Definition: AllocatorList.h:23
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
This header provides classes for managing a pipeline of passes over loops in LLVM IR...
cl::opt< bool > ForgetSCEVInLoopUnroll
bool convergent
True if this function contains a call to a convergent function.
Definition: CodeMetrics.h:47
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:622
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
static bool HasUnrollEnablePragma(const Loop *L)
bool isLCSSAForm(DominatorTree &DT) const
Return true if the Loop is in LCSSA form.
Definition: LoopInfo.cpp:444
Implements a dense probed hash-table based set.
Definition: DenseSet.h:249
void push_back(const T &Elt)
Definition: SmallVector.h:211
Analysis providing profile information.
The main scalar evolution driver.
#define DEBUG_TYPE
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold, but used for partial/runtime unrolling (set to UINT_MAX to disable).
static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost, unsigned MaxPercentThresholdBoost)
This file contains the declarations for metadata subclasses.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:160
An immutable pass that tracks lazily created AssumptionCache objects.
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
A cache of @llvm.assume calls within a function.
Analysis pass providing the TargetTransformInfo.
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
unsigned second
unsigned NumInlineCandidates
The number of calls to internal functions with a single caller.
Definition: CodeMetrics.h:68
Metadata node.
Definition: Metadata.h:863
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:230
F(f)
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1068
bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const SmallPtrSetImpl< const Value *> &EphValues, OptimizationRemarkEmitter *ORE, unsigned &TripCount, unsigned MaxTripCount, unsigned &TripMultiple, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound)
bool notDuplicatable
True if this function cannot be duplicated.
Definition: CodeMetrics.h:44
static cl::opt< unsigned > UnrollFullMaxCount("unroll-full-max-count", cl::Hidden, cl::desc("Set the max unroll count for full unrolling, for testing purposes"))
bool shouldOptimizeForSize(Function *F, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
Returns true if function F is suggested to be size-optimized base on the profile. ...
Definition: SizeOpts.cpp:23
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
unsigned getSmallConstantMaxTripCount(const Loop *L)
Returns the upper bound of the loop trip count as a normal unsigned value.
const char *const LLVMLoopUnrollFollowupUnrolled
Definition: UnrollLoop.h:42
void addChildLoops(ArrayRef< Loop *> NewChildLoops)
Loop passes should use this method to indicate they have added new child loops of the current loop...
static cl::opt< unsigned > UnrollPartialThreshold("unroll-partial-threshold", cl::Hidden, cl::desc("The cost threshold for partial loop unrolling"))
bool formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
Definition: LCSSA.cpp:385
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:221
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:4428
static cl::opt< bool > UnrollRevisitChildLoops("unroll-revisit-child-loops", cl::Hidden, cl::desc("Enqueue and re-visit child loops in the loop PM after unrolling. " "This shouldn't typically be needed as child loops (or their " "clones) were already visited."))
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
Hexagon Hardware Loops
static cl::opt< unsigned > FlatLoopTripCountThreshold("flat-loop-tripcount-threshold", cl::init(5), cl::Hidden, cl::desc("If the runtime tripcount for the loop is lower than the " "threshold, the loop is considered as flat and will be less " "aggressively unrolled."))
static bool HasUnrollFullPragma(const Loop *L)
TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel, Optional< unsigned > UserThreshold, Optional< unsigned > UserCount, Optional< bool > UserAllowPartial, Optional< bool > UserRuntime, Optional< bool > UserUpperBound, Optional< bool > UserAllowPeeling, Optional< bool > UserAllowProfileBasedPeeling)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
bool AllowPeeling
Allow peeling off loop iterations.
static cl::opt< unsigned > UnrollCount("unroll-count", cl::Hidden, cl::desc("Use this unroll count for all loops including those with " "unroll_count pragma values, for testing purposes"))
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
unsigned FullUnrollMaxCount
Set the maximum unrolling factor for full unrolling.
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:1183
static cl::opt< bool > UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden, cl::desc("Unroll loops with run-time trip counts"))
static Optional< EstimatedUnrollCost > analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE, const SmallPtrSetImpl< const Value *> &EphValues, const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize)
Figure out if the loop is worth full unrolling.
BlockT * getHeader() const
Definition: LoopInfo.h:105
static MDNode * GetUnrollMetadataForLoop(const Loop *L, StringRef Name)
The transformation should be applied without considering a cost model.
Definition: LoopUtils.h:228
void computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, unsigned &TripCount, ScalarEvolution &SE)
void visit(Iterator Start, Iterator End)
Definition: InstVisitor.h:89
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
static cl::opt< unsigned > UnrollMaxUpperBound("unroll-max-upperbound", cl::init(8), cl::Hidden, cl::desc("The max of trip count upper bound that is considered in unrolling"))
static bool isEqual(const Function &Caller, const Function &Callee)
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:509
const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:255
const char *const LLVMLoopUnrollFollowupRemainder
Definition: UnrollLoop.h:44
This header provides classes for managing per-loop analyses.
static cl::opt< bool > UnrollAllowRemainder("unroll-allow-remainder", cl::Hidden, cl::desc("Allow generation of a loop remainder (extra iterations) " "when unrolling a loop."))
static cl::opt< unsigned > UnrollMaxPercentThresholdBoost("unroll-max-percent-threshold-boost", cl::init(400), cl::Hidden, cl::desc("The maximum 'boost' (represented as a percentage >= 100) applied " "to the threshold when aggressively unrolling a loop due to the " "dynamic cost savings. If completely unrolling a loop will reduce " "the total runtime from X to Y, we boost the loop unroll " "threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, " "X/Y). This limit avoids excessive code bloat."))
void initializeLoopUnrollPass(PassRegistry &)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
bool AllowRemainder
Allow generation of a loop remainder (extra iterations after unroll).
bool hasProfileSummary()
Returns true if profile summary is available.
The loop was fully unrolled into straight-line code.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
static cl::opt< bool > UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden, cl::desc("Allows loops to be peeled when the dynamic " "trip count is known to be low."))
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop...
Definition: LoopInfo.h:208
Conditional or Unconditional Branch instruction.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
This is an important base class in LLVM.
Definition: Constant.h:41
static cl::opt< unsigned > UnrollMaxCount("unroll-max-count", cl::Hidden, cl::desc("Set the max unroll count for partial and runtime unrolling, for" "testing purposes"))
This file contains the declarations for the subclasses of Constant, which represent the different fla...
iterator end() const
Definition: LoopInfo.h:904
void setLoopAlreadyUnrolled()
Add llvm.loop.unroll.disable to this loop&#39;s loop id metadata.
Definition: LoopInfo.cpp:521
Machine Trace Metrics
char & LCSSAID
Definition: LCSSA.cpp:467
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
Represent the analysis usage information of a pass.
void analyzeBasicBlock(const BasicBlock *BB, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value *> &EphValues)
Add information about a block to the current state.
static cl::opt< unsigned > UnrollThreshold("unroll-threshold", cl::Hidden, cl::desc("The cost threshold for loop unrolling"))
op_range operands()
Definition: User.h:237
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
Optional< unsigned > getLoopEstimatedTripCount(Loop *L)
Get a loop&#39;s estimated trip count based on branch weight metadata.
Definition: LoopUtils.cpp:675
bool hasProfileData(bool IncludeSynthetic=false) const
Return true if the function is annotated with profile data.
Definition: Function.h:308
static cl::opt< unsigned > UnrollPeelCount("unroll-peel-count", cl::Hidden, cl::desc("Set the unroll peeling count, for testing purposes"))
void addSiblingLoops(ArrayRef< Loop *> NewSibLoops)
Loop passes should use this method to indicate they have added new sibling loops to the current loop...
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:607
void markLoopAsDeleted(Loop &L, llvm::StringRef Name)
Loop passes should use this method to indicate they have deleted a loop from the nest.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
const char *const LLVMLoopUnrollFollowupAll
Definition: UnrollLoop.h:41
The transformation should not be applied.
Definition: LoopUtils.h:231
A function analysis which provides an AssumptionCache.
unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value *> &EphValues, unsigned BEInsns)
ApproximateLoopSize - Approximate the size of the loop.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:115
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:297
Analysis pass which computes BlockFrequencyInfo.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file. ...
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
static cl::opt< bool > UnrollAllowPartial("unroll-allow-partial", cl::Hidden, cl::desc("Allows loops to be partially unrolled until " "-unroll-threshold loop size is reached."))
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:1160
static cl::opt< unsigned > PragmaUnrollThreshold("pragma-unroll-threshold", cl::init(16 *1024), cl::Hidden, cl::desc("Unrolled size limit for loops with an unroll(full) or " "unroll_count pragma."))
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
iterator begin() const
Definition: LoopInfo.h:147
Utility to calculate the size and a few similar metrics for a set of basic blocks.
Definition: CodeMetrics.h:32
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:47
static unsigned UnrollCountPragmaValue(const Loop *L)
unsigned DefaultUnrollRuntimeCount
Default unroll count for loops with run-time trip count.
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:374
void markLoopAsDeleted(Loop &L)
Definition: LoopPass.cpp:143
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
static uint64_t getUnrolledLoopSize(unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2&#39;s erase_if which is equivalent t...
Definition: STLExtras.h:1344
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
Optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
Definition: LoopUtils.cpp:298
iterator begin() const
Definition: LoopInfo.h:903
amdgpu Simplify well known AMD library false FunctionCallee Callee
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
static cl::opt< bool > UnrollUnrollRemainder("unroll-remainder", cl::Hidden, cl::desc("Allow the loop remainder to be unrolled."))
static cl::opt< unsigned > Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), cl::init(100), cl::Hidden)
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:387
MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
Definition: LoopUnroll.cpp:958
Analysis pass that exposes the ScalarEvolution for a function.
static const unsigned NoThreshold
A magic value for use with the Threshold parameter to indicate that the loop unroll should be perform...
LoopT * getParentLoop() const
Definition: LoopInfo.h:106
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
bool hasValue() const
Definition: Optional.h:259
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to...
Definition: LoopInfo.cpp:460
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:485
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
unsigned Threshold
The cost threshold for the unrolled loop.
int getUserCost(const User *U, ArrayRef< const Value *> Operands) const
Estimate the cost of a given IR user when lowered.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
StringRef getName() const
Definition: LoopInfo.h:827
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:509
Parameters that control the generic loop unrolling transformation.
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable)...
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
#define I(x, y, z)
Definition: MD5.cpp:58
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:72
LoopUnrollResult UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, Loop **RemainderLoop=nullptr)
Unroll the given loop by Count.
Definition: LoopUnroll.cpp:276
static LoopUnrollResult tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache &AC, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, bool PreserveLCSSA, int OptLevel, bool OnlyWhenForced, bool ForgetAllSCEV, Optional< unsigned > ProvidedCount, Optional< unsigned > ProvidedThreshold, Optional< bool > ProvidedAllowPartial, Optional< bool > ProvidedRuntime, Optional< bool > ProvidedUpperBound, Optional< bool > ProvidedAllowPeeling, Optional< bool > ProvidedAllowProfileBasedPeeling)
TransformationMode hasUnrollTransformation(Loop *L)
Definition: LoopUtils.cpp:387
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass&#39;s AnalysisUsage.
Definition: LoopUtils.cpp:137
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:795
iterator end() const
Definition: LoopInfo.h:148
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
TransformationMode
The mode sets how eager a transformation should be applied.
Definition: LoopUtils.h:222
static SmallVector< Loop *, 8 > appendLoopsToWorklist(RangeT &&Loops)
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:211
Pass * createLoopUnrollPass(int OptLevel=2, bool OnlyWhenForced=false, bool ForgetAllSCEV=false, int Threshold=-1, int Count=-1, int AllowPartial=-1, int Runtime=-1, int UpperBound=-1, int AllowPeeling=-1)
bool empty() const
Definition: LoopInfo.h:151
Multiway switch.
static bool HasRuntimeUnrollDisablePragma(const Loop *L)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool hasHugeWorkingSetSize()
Returns true if the working set size of the code is considered huge.
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:575
LLVM Value Representation.
Definition: Value.h:73
succ_range successors(Instruction *I)
Definition: CFG.h:259
unsigned MaxPercentThresholdBoost
If complete unrolling will reduce the cost of the loop, we will boost the Threshold by a certain perc...
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
The loop was not modified.
static cl::opt< unsigned > UnrollMaxIterationsCountToAnalyze("unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden, cl::desc("Don't allow loop unrolling to simulate more than this number of" "iterations when checking full unroll profitability"))
bool isBackedgeTakenCountMaxOrZero(const Loop *L)
Return true if the backedge taken count is either the value returned by getConstantMaxBackedgeTakenCo...
bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)
bool UpperBound
Allow using trip count upper bound to unroll loops.
void verifyLoop() const
Verify loop structure.
Definition: LoopInfoImpl.h:281
void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP) const
Get target-customized preferences for the generic loop unrolling transformation.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
A container for analyses that lazily runs them and caches their results.
This pass exposes codegen information to IR-level passes.
This header defines various interfaces for pass management in LLVM.
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1074
#define LLVM_DEBUG(X)
Definition: Debug.h:122
unsigned NumInsts
Number of instructions in the analyzed blocks.
Definition: CodeMetrics.h:53
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:161
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:49
The optimization diagnostic interface.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:1044
LoopUnrollResult
Represents the result of a UnrollLoop invocation.
Definition: UnrollLoop.h:53