LLVM  9.0.0svn
LoopUnrollPass.cpp
Go to the documentation of this file.
1 //===- LoopUnroll.cpp - Loop unroller pass --------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass implements a simple loop unroller. It works best when loops have
10 // been canonicalized by the -indvars pass, allowing it to determine the trip
11 // counts of loops easily.
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/DenseMapInfo.h"
17 #include "llvm/ADT/DenseSet.h"
18 #include "llvm/ADT/None.h"
19 #include "llvm/ADT/Optional.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SetVector.h"
22 #include "llvm/ADT/SmallPtrSet.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/StringRef.h"
28 #include "llvm/Analysis/LoopInfo.h"
29 #include "llvm/Analysis/LoopPass.h"
35 #include "llvm/IR/BasicBlock.h"
36 #include "llvm/IR/CFG.h"
37 #include "llvm/IR/Constant.h"
38 #include "llvm/IR/Constants.h"
39 #include "llvm/IR/DiagnosticInfo.h"
40 #include "llvm/IR/Dominators.h"
41 #include "llvm/IR/Function.h"
42 #include "llvm/IR/Instruction.h"
43 #include "llvm/IR/Instructions.h"
44 #include "llvm/IR/IntrinsicInst.h"
45 #include "llvm/IR/Metadata.h"
46 #include "llvm/IR/PassManager.h"
47 #include "llvm/Pass.h"
48 #include "llvm/Support/Casting.h"
50 #include "llvm/Support/Debug.h"
53 #include "llvm/Transforms/Scalar.h"
55 #include "llvm/Transforms/Utils.h"
59 #include <algorithm>
60 #include <cassert>
61 #include <cstdint>
62 #include <limits>
63 #include <string>
64 #include <tuple>
65 #include <utility>
66 
67 using namespace llvm;
68 
69 #define DEBUG_TYPE "loop-unroll"
70 
71 static cl::opt<unsigned>
72  UnrollThreshold("unroll-threshold", cl::Hidden,
73  cl::desc("The cost threshold for loop unrolling"));
74 
76  "unroll-partial-threshold", cl::Hidden,
77  cl::desc("The cost threshold for partial loop unrolling"));
78 
80  "unroll-max-percent-threshold-boost", cl::init(400), cl::Hidden,
81  cl::desc("The maximum 'boost' (represented as a percentage >= 100) applied "
82  "to the threshold when aggressively unrolling a loop due to the "
83  "dynamic cost savings. If completely unrolling a loop will reduce "
84  "the total runtime from X to Y, we boost the loop unroll "
85  "threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, "
86  "X/Y). This limit avoids excessive code bloat."));
87 
89  "unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden,
90  cl::desc("Don't allow loop unrolling to simulate more than this number of"
91  "iterations when checking full unroll profitability"));
92 
94  "unroll-count", cl::Hidden,
95  cl::desc("Use this unroll count for all loops including those with "
96  "unroll_count pragma values, for testing purposes"));
97 
99  "unroll-max-count", cl::Hidden,
100  cl::desc("Set the max unroll count for partial and runtime unrolling, for"
101  "testing purposes"));
102 
104  "unroll-full-max-count", cl::Hidden,
105  cl::desc(
106  "Set the max unroll count for full unrolling, for testing purposes"));
107 
109  "unroll-peel-count", cl::Hidden,
110  cl::desc("Set the unroll peeling count, for testing purposes"));
111 
112 static cl::opt<bool>
113  UnrollAllowPartial("unroll-allow-partial", cl::Hidden,
114  cl::desc("Allows loops to be partially unrolled until "
115  "-unroll-threshold loop size is reached."));
116 
118  "unroll-allow-remainder", cl::Hidden,
119  cl::desc("Allow generation of a loop remainder (extra iterations) "
120  "when unrolling a loop."));
121 
122 static cl::opt<bool>
123  UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden,
124  cl::desc("Unroll loops with run-time trip counts"));
125 
127  "unroll-max-upperbound", cl::init(8), cl::Hidden,
128  cl::desc(
129  "The max of trip count upper bound that is considered in unrolling"));
130 
132  "pragma-unroll-threshold", cl::init(16 * 1024), cl::Hidden,
133  cl::desc("Unrolled size limit for loops with an unroll(full) or "
134  "unroll_count pragma."));
135 
137  "flat-loop-tripcount-threshold", cl::init(5), cl::Hidden,
138  cl::desc("If the runtime tripcount for the loop is lower than the "
139  "threshold, the loop is considered as flat and will be less "
140  "aggressively unrolled."));
141 
142 static cl::opt<bool>
143  UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden,
144  cl::desc("Allows loops to be peeled when the dynamic "
145  "trip count is known to be low."));
146 
148  "unroll-remainder", cl::Hidden,
149  cl::desc("Allow the loop remainder to be unrolled."));
150 
151 // This option isn't ever intended to be enabled, it serves to allow
152 // experiments to check the assumptions about when this kind of revisit is
153 // necessary.
155  "unroll-revisit-child-loops", cl::Hidden,
156  cl::desc("Enqueue and re-visit child loops in the loop PM after unrolling. "
157  "This shouldn't typically be needed as child loops (or their "
158  "clones) were already visited."));
159 
160 /// A magic value for use with the Threshold parameter to indicate
161 /// that the loop unroll should be performed regardless of how much
162 /// code expansion would result.
164 
165 /// Gather the various unrolling parameters based on the defaults, compiler
166 /// flags, TTI overrides and user specified parameters.
168  Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, int OptLevel,
169  Optional<unsigned> UserThreshold, Optional<unsigned> UserCount,
170  Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
171  Optional<bool> UserUpperBound, Optional<bool> UserAllowPeeling) {
173 
174  // Set up the defaults
175  UP.Threshold = OptLevel > 2 ? 300 : 150;
176  UP.MaxPercentThresholdBoost = 400;
177  UP.OptSizeThreshold = 0;
178  UP.PartialThreshold = 150;
180  UP.Count = 0;
181  UP.PeelCount = 0;
185  UP.BEInsns = 2;
186  UP.Partial = false;
187  UP.Runtime = false;
188  UP.AllowRemainder = true;
189  UP.UnrollRemainder = false;
190  UP.AllowExpensiveTripCount = false;
191  UP.Force = false;
192  UP.UpperBound = false;
193  UP.AllowPeeling = true;
194  UP.UnrollAndJam = false;
196 
197  // Override with any target specific settings
198  TTI.getUnrollingPreferences(L, SE, UP);
199 
200  // Apply size attributes
201  if (L->getHeader()->getParent()->optForSize()) {
202  UP.Threshold = UP.OptSizeThreshold;
204  }
205 
206  // Apply any user values specified by cl::opt
207  if (UnrollThreshold.getNumOccurrences() > 0)
209  if (UnrollPartialThreshold.getNumOccurrences() > 0)
211  if (UnrollMaxPercentThresholdBoost.getNumOccurrences() > 0)
213  if (UnrollMaxCount.getNumOccurrences() > 0)
215  if (UnrollFullMaxCount.getNumOccurrences() > 0)
217  if (UnrollPeelCount.getNumOccurrences() > 0)
219  if (UnrollAllowPartial.getNumOccurrences() > 0)
221  if (UnrollAllowRemainder.getNumOccurrences() > 0)
223  if (UnrollRuntime.getNumOccurrences() > 0)
224  UP.Runtime = UnrollRuntime;
225  if (UnrollMaxUpperBound == 0)
226  UP.UpperBound = false;
227  if (UnrollAllowPeeling.getNumOccurrences() > 0)
229  if (UnrollUnrollRemainder.getNumOccurrences() > 0)
231 
232  // Apply user values provided by argument
233  if (UserThreshold.hasValue()) {
234  UP.Threshold = *UserThreshold;
235  UP.PartialThreshold = *UserThreshold;
236  }
237  if (UserCount.hasValue())
238  UP.Count = *UserCount;
239  if (UserAllowPartial.hasValue())
240  UP.Partial = *UserAllowPartial;
241  if (UserRuntime.hasValue())
242  UP.Runtime = *UserRuntime;
243  if (UserUpperBound.hasValue())
244  UP.UpperBound = *UserUpperBound;
245  if (UserAllowPeeling.hasValue())
246  UP.AllowPeeling = *UserAllowPeeling;
247 
248  return UP;
249 }
250 
251 namespace {
252 
253 /// A struct to densely store the state of an instruction after unrolling at
254 /// each iteration.
255 ///
256 /// This is designed to work like a tuple of <Instruction *, int> for the
257 /// purposes of hashing and lookup, but to be able to associate two boolean
258 /// states with each key.
259 struct UnrolledInstState {
260  Instruction *I;
261  int Iteration : 30;
262  unsigned IsFree : 1;
263  unsigned IsCounted : 1;
264 };
265 
266 /// Hashing and equality testing for a set of the instruction states.
267 struct UnrolledInstStateKeyInfo {
268  using PtrInfo = DenseMapInfo<Instruction *>;
270 
271  static inline UnrolledInstState getEmptyKey() {
272  return {PtrInfo::getEmptyKey(), 0, 0, 0};
273  }
274 
275  static inline UnrolledInstState getTombstoneKey() {
276  return {PtrInfo::getTombstoneKey(), 0, 0, 0};
277  }
278 
279  static inline unsigned getHashValue(const UnrolledInstState &S) {
280  return PairInfo::getHashValue({S.I, S.Iteration});
281  }
282 
283  static inline bool isEqual(const UnrolledInstState &LHS,
284  const UnrolledInstState &RHS) {
285  return PairInfo::isEqual({LHS.I, LHS.Iteration}, {RHS.I, RHS.Iteration});
286  }
287 };
288 
289 struct EstimatedUnrollCost {
290  /// The estimated cost after unrolling.
291  unsigned UnrolledCost;
292 
293  /// The estimated dynamic cost of executing the instructions in the
294  /// rolled form.
295  unsigned RolledDynamicCost;
296 };
297 
298 } // end anonymous namespace
299 
300 /// Figure out if the loop is worth full unrolling.
301 ///
302 /// Complete loop unrolling can make some loads constant, and we need to know
303 /// if that would expose any further optimization opportunities. This routine
304 /// estimates this optimization. It computes cost of unrolled loop
305 /// (UnrolledCost) and dynamic cost of the original loop (RolledDynamicCost). By
306 /// dynamic cost we mean that we won't count costs of blocks that are known not
307 /// to be executed (i.e. if we have a branch in the loop and we know that at the
308 /// given iteration its condition would be resolved to true, we won't add up the
309 /// cost of the 'false'-block).
310 /// \returns Optional value, holding the RolledDynamicCost and UnrolledCost. If
311 /// the analysis failed (no benefits expected from the unrolling, or the loop is
312 /// too big to analyze), the returned value is None.
314  const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE,
315  const SmallPtrSetImpl<const Value *> &EphValues,
316  const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize) {
317  // We want to be able to scale offsets by the trip count and add more offsets
318  // to them without checking for overflows, and we already don't want to
319  // analyze *massive* trip counts, so we force the max to be reasonably small.
321  (unsigned)(std::numeric_limits<int>::max() / 2) &&
322  "The unroll iterations max is too large!");
323 
324  // Only analyze inner loops. We can't properly estimate cost of nested loops
325  // and we won't visit inner loops again anyway.
326  if (!L->empty())
327  return None;
328 
329  // Don't simulate loops with a big or unknown tripcount
330  if (!UnrollMaxIterationsCountToAnalyze || !TripCount ||
332  return None;
333 
336  DenseMap<Value *, Constant *> SimplifiedValues;
337  SmallVector<std::pair<Value *, Constant *>, 4> SimplifiedInputValues;
338 
339  // The estimated cost of the unrolled form of the loop. We try to estimate
340  // this by simplifying as much as we can while computing the estimate.
341  unsigned UnrolledCost = 0;
342 
343  // We also track the estimated dynamic (that is, actually executed) cost in
344  // the rolled form. This helps identify cases when the savings from unrolling
345  // aren't just exposing dead control flows, but actual reduced dynamic
346  // instructions due to the simplifications which we expect to occur after
347  // unrolling.
348  unsigned RolledDynamicCost = 0;
349 
350  // We track the simplification of each instruction in each iteration. We use
351  // this to recursively merge costs into the unrolled cost on-demand so that
352  // we don't count the cost of any dead code. This is essentially a map from
353  // <instruction, int> to <bool, bool>, but stored as a densely packed struct.
355 
356  // A small worklist used to accumulate cost of instructions from each
357  // observable and reached root in the loop.
358  SmallVector<Instruction *, 16> CostWorklist;
359 
360  // PHI-used worklist used between iterations while accumulating cost.
361  SmallVector<Instruction *, 4> PHIUsedList;
362 
363  // Helper function to accumulate cost for instructions in the loop.
364  auto AddCostRecursively = [&](Instruction &RootI, int Iteration) {
365  assert(Iteration >= 0 && "Cannot have a negative iteration!");
366  assert(CostWorklist.empty() && "Must start with an empty cost list");
367  assert(PHIUsedList.empty() && "Must start with an empty phi used list");
368  CostWorklist.push_back(&RootI);
369  for (;; --Iteration) {
370  do {
371  Instruction *I = CostWorklist.pop_back_val();
372 
373  // InstCostMap only uses I and Iteration as a key, the other two values
374  // don't matter here.
375  auto CostIter = InstCostMap.find({I, Iteration, 0, 0});
376  if (CostIter == InstCostMap.end())
377  // If an input to a PHI node comes from a dead path through the loop
378  // we may have no cost data for it here. What that actually means is
379  // that it is free.
380  continue;
381  auto &Cost = *CostIter;
382  if (Cost.IsCounted)
383  // Already counted this instruction.
384  continue;
385 
386  // Mark that we are counting the cost of this instruction now.
387  Cost.IsCounted = true;
388 
389  // If this is a PHI node in the loop header, just add it to the PHI set.
390  if (auto *PhiI = dyn_cast<PHINode>(I))
391  if (PhiI->getParent() == L->getHeader()) {
392  assert(Cost.IsFree && "Loop PHIs shouldn't be evaluated as they "
393  "inherently simplify during unrolling.");
394  if (Iteration == 0)
395  continue;
396 
397  // Push the incoming value from the backedge into the PHI used list
398  // if it is an in-loop instruction. We'll use this to populate the
399  // cost worklist for the next iteration (as we count backwards).
400  if (auto *OpI = dyn_cast<Instruction>(
401  PhiI->getIncomingValueForBlock(L->getLoopLatch())))
402  if (L->contains(OpI))
403  PHIUsedList.push_back(OpI);
404  continue;
405  }
406 
407  // First accumulate the cost of this instruction.
408  if (!Cost.IsFree) {
409  UnrolledCost += TTI.getUserCost(I);
410  LLVM_DEBUG(dbgs() << "Adding cost of instruction (iteration "
411  << Iteration << "): ");
412  LLVM_DEBUG(I->dump());
413  }
414 
415  // We must count the cost of every operand which is not free,
416  // recursively. If we reach a loop PHI node, simply add it to the set
417  // to be considered on the next iteration (backwards!).
418  for (Value *Op : I->operands()) {
419  // Check whether this operand is free due to being a constant or
420  // outside the loop.
421  auto *OpI = dyn_cast<Instruction>(Op);
422  if (!OpI || !L->contains(OpI))
423  continue;
424 
425  // Otherwise accumulate its cost.
426  CostWorklist.push_back(OpI);
427  }
428  } while (!CostWorklist.empty());
429 
430  if (PHIUsedList.empty())
431  // We've exhausted the search.
432  break;
433 
434  assert(Iteration > 0 &&
435  "Cannot track PHI-used values past the first iteration!");
436  CostWorklist.append(PHIUsedList.begin(), PHIUsedList.end());
437  PHIUsedList.clear();
438  }
439  };
440 
441  // Ensure that we don't violate the loop structure invariants relied on by
442  // this analysis.
443  assert(L->isLoopSimplifyForm() && "Must put loop into normal form first.");
444  assert(L->isLCSSAForm(DT) &&
445  "Must have loops in LCSSA form to track live-out values.");
446 
447  LLVM_DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n");
448 
449  // Simulate execution of each iteration of the loop counting instructions,
450  // which would be simplified.
451  // Since the same load will take different values on different iterations,
452  // we literally have to go through all loop's iterations.
453  for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) {
454  LLVM_DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n");
455 
456  // Prepare for the iteration by collecting any simplified entry or backedge
457  // inputs.
458  for (Instruction &I : *L->getHeader()) {
459  auto *PHI = dyn_cast<PHINode>(&I);
460  if (!PHI)
461  break;
462 
463  // The loop header PHI nodes must have exactly two input: one from the
464  // loop preheader and one from the loop latch.
465  assert(
466  PHI->getNumIncomingValues() == 2 &&
467  "Must have an incoming value only for the preheader and the latch.");
468 
469  Value *V = PHI->getIncomingValueForBlock(
470  Iteration == 0 ? L->getLoopPreheader() : L->getLoopLatch());
471  Constant *C = dyn_cast<Constant>(V);
472  if (Iteration != 0 && !C)
473  C = SimplifiedValues.lookup(V);
474  if (C)
475  SimplifiedInputValues.push_back({PHI, C});
476  }
477 
478  // Now clear and re-populate the map for the next iteration.
479  SimplifiedValues.clear();
480  while (!SimplifiedInputValues.empty())
481  SimplifiedValues.insert(SimplifiedInputValues.pop_back_val());
482 
483  UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SE, L);
484 
485  BBWorklist.clear();
486  BBWorklist.insert(L->getHeader());
487  // Note that we *must not* cache the size, this loop grows the worklist.
488  for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
489  BasicBlock *BB = BBWorklist[Idx];
490 
491  // Visit all instructions in the given basic block and try to simplify
492  // it. We don't change the actual IR, just count optimization
493  // opportunities.
494  for (Instruction &I : *BB) {
495  // These won't get into the final code - don't even try calculating the
496  // cost for them.
497  if (isa<DbgInfoIntrinsic>(I) || EphValues.count(&I))
498  continue;
499 
500  // Track this instruction's expected baseline cost when executing the
501  // rolled loop form.
502  RolledDynamicCost += TTI.getUserCost(&I);
503 
504  // Visit the instruction to analyze its loop cost after unrolling,
505  // and if the visitor returns true, mark the instruction as free after
506  // unrolling and continue.
507  bool IsFree = Analyzer.visit(I);
508  bool Inserted = InstCostMap.insert({&I, (int)Iteration,
509  (unsigned)IsFree,
510  /*IsCounted*/ false}).second;
511  (void)Inserted;
512  assert(Inserted && "Cannot have a state for an unvisited instruction!");
513 
514  if (IsFree)
515  continue;
516 
517  // Can't properly model a cost of a call.
518  // FIXME: With a proper cost model we should be able to do it.
519  if (auto *CI = dyn_cast<CallInst>(&I)) {
520  const Function *Callee = CI->getCalledFunction();
521  if (!Callee || TTI.isLoweredToCall(Callee)) {
522  LLVM_DEBUG(dbgs() << "Can't analyze cost of loop with call\n");
523  return None;
524  }
525  }
526 
527  // If the instruction might have a side-effect recursively account for
528  // the cost of it and all the instructions leading up to it.
529  if (I.mayHaveSideEffects())
530  AddCostRecursively(I, Iteration);
531 
532  // If unrolled body turns out to be too big, bail out.
533  if (UnrolledCost > MaxUnrolledLoopSize) {
534  LLVM_DEBUG(dbgs() << " Exceeded threshold.. exiting.\n"
535  << " UnrolledCost: " << UnrolledCost
536  << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize
537  << "\n");
538  return None;
539  }
540  }
541 
542  Instruction *TI = BB->getTerminator();
543 
544  // Add in the live successors by first checking whether we have terminator
545  // that may be simplified based on the values simplified by this call.
546  BasicBlock *KnownSucc = nullptr;
547  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
548  if (BI->isConditional()) {
549  if (Constant *SimpleCond =
550  SimplifiedValues.lookup(BI->getCondition())) {
551  // Just take the first successor if condition is undef
552  if (isa<UndefValue>(SimpleCond))
553  KnownSucc = BI->getSuccessor(0);
554  else if (ConstantInt *SimpleCondVal =
555  dyn_cast<ConstantInt>(SimpleCond))
556  KnownSucc = BI->getSuccessor(SimpleCondVal->isZero() ? 1 : 0);
557  }
558  }
559  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
560  if (Constant *SimpleCond =
561  SimplifiedValues.lookup(SI->getCondition())) {
562  // Just take the first successor if condition is undef
563  if (isa<UndefValue>(SimpleCond))
564  KnownSucc = SI->getSuccessor(0);
565  else if (ConstantInt *SimpleCondVal =
566  dyn_cast<ConstantInt>(SimpleCond))
567  KnownSucc = SI->findCaseValue(SimpleCondVal)->getCaseSuccessor();
568  }
569  }
570  if (KnownSucc) {
571  if (L->contains(KnownSucc))
572  BBWorklist.insert(KnownSucc);
573  else
574  ExitWorklist.insert({BB, KnownSucc});
575  continue;
576  }
577 
578  // Add BB's successors to the worklist.
579  for (BasicBlock *Succ : successors(BB))
580  if (L->contains(Succ))
581  BBWorklist.insert(Succ);
582  else
583  ExitWorklist.insert({BB, Succ});
584  AddCostRecursively(*TI, Iteration);
585  }
586 
587  // If we found no optimization opportunities on the first iteration, we
588  // won't find them on later ones too.
589  if (UnrolledCost == RolledDynamicCost) {
590  LLVM_DEBUG(dbgs() << " No opportunities found.. exiting.\n"
591  << " UnrolledCost: " << UnrolledCost << "\n");
592  return None;
593  }
594  }
595 
596  while (!ExitWorklist.empty()) {
597  BasicBlock *ExitingBB, *ExitBB;
598  std::tie(ExitingBB, ExitBB) = ExitWorklist.pop_back_val();
599 
600  for (Instruction &I : *ExitBB) {
601  auto *PN = dyn_cast<PHINode>(&I);
602  if (!PN)
603  break;
604 
605  Value *Op = PN->getIncomingValueForBlock(ExitingBB);
606  if (auto *OpI = dyn_cast<Instruction>(Op))
607  if (L->contains(OpI))
608  AddCostRecursively(*OpI, TripCount - 1);
609  }
610  }
611 
612  LLVM_DEBUG(dbgs() << "Analysis finished:\n"
613  << "UnrolledCost: " << UnrolledCost << ", "
614  << "RolledDynamicCost: " << RolledDynamicCost << "\n");
615  return {{UnrolledCost, RolledDynamicCost}};
616 }
617 
618 /// ApproximateLoopSize - Approximate the size of the loop.
620  const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent,
621  const TargetTransformInfo &TTI,
622  const SmallPtrSetImpl<const Value *> &EphValues, unsigned BEInsns) {
624  for (BasicBlock *BB : L->blocks())
625  Metrics.analyzeBasicBlock(BB, TTI, EphValues);
626  NumCalls = Metrics.NumInlineCandidates;
627  NotDuplicatable = Metrics.notDuplicatable;
628  Convergent = Metrics.convergent;
629 
630  unsigned LoopSize = Metrics.NumInsts;
631 
632  // Don't allow an estimate of size zero. This would allows unrolling of loops
633  // with huge iteration counts, which is a compile time problem even if it's
634  // not a problem for code quality. Also, the code using this size may assume
635  // that each loop has at least three instructions (likely a conditional
636  // branch, a comparison feeding that branch, and some kind of loop increment
637  // feeding that comparison instruction).
638  LoopSize = std::max(LoopSize, BEInsns + 1);
639 
640  return LoopSize;
641 }
642 
643 // Returns the loop hint metadata node with the given name (for example,
644 // "llvm.loop.unroll.count"). If no such metadata node exists, then nullptr is
645 // returned.
647  if (MDNode *LoopID = L->getLoopID())
648  return GetUnrollMetadata(LoopID, Name);
649  return nullptr;
650 }
651 
652 // Returns true if the loop has an unroll(full) pragma.
653 static bool HasUnrollFullPragma(const Loop *L) {
654  return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.full");
655 }
656 
657 // Returns true if the loop has an unroll(enable) pragma. This metadata is used
658 // for both "#pragma unroll" and "#pragma clang loop unroll(enable)" directives.
659 static bool HasUnrollEnablePragma(const Loop *L) {
660  return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.enable");
661 }
662 
663 // Returns true if the loop has an runtime unroll(disable) pragma.
664 static bool HasRuntimeUnrollDisablePragma(const Loop *L) {
665  return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.runtime.disable");
666 }
667 
668 // If loop has an unroll_count pragma return the (necessarily
669 // positive) value from the pragma. Otherwise return 0.
670 static unsigned UnrollCountPragmaValue(const Loop *L) {
671  MDNode *MD = GetUnrollMetadataForLoop(L, "llvm.loop.unroll.count");
672  if (MD) {
673  assert(MD->getNumOperands() == 2 &&
674  "Unroll count hint metadata should have two operands.");
675  unsigned Count =
676  mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
677  assert(Count >= 1 && "Unroll count must be positive.");
678  return Count;
679  }
680  return 0;
681 }
682 
683 // Computes the boosting factor for complete unrolling.
684 // If fully unrolling the loop would save a lot of RolledDynamicCost, it would
685 // be beneficial to fully unroll the loop even if unrolledcost is large. We
686 // use (RolledDynamicCost / UnrolledCost) to model the unroll benefits to adjust
687 // the unroll threshold.
688 static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost,
689  unsigned MaxPercentThresholdBoost) {
690  if (Cost.RolledDynamicCost >= std::numeric_limits<unsigned>::max() / 100)
691  return 100;
692  else if (Cost.UnrolledCost != 0)
693  // The boosting factor is RolledDynamicCost / UnrolledCost
694  return std::min(100 * Cost.RolledDynamicCost / Cost.UnrolledCost,
695  MaxPercentThresholdBoost);
696  else
697  return MaxPercentThresholdBoost;
698 }
699 
700 // Returns loop size estimation for unrolled loop.
701 static uint64_t getUnrolledLoopSize(
702  unsigned LoopSize,
704  assert(LoopSize >= UP.BEInsns && "LoopSize should not be less than BEInsns!");
705  return (uint64_t)(LoopSize - UP.BEInsns) * UP.Count + UP.BEInsns;
706 }
707 
708 // Returns true if unroll count was set explicitly.
709 // Calculates unroll count and writes it to UP.Count.
710 // Unless IgnoreUser is true, will also use metadata and command-line options
711 // that are specific to to the LoopUnroll pass (which, for instance, are
712 // irrelevant for the LoopUnrollAndJam pass).
713 // FIXME: This function is used by LoopUnroll and LoopUnrollAndJam, but consumes
714 // many LoopUnroll-specific options. The shared functionality should be
715 // refactored into it own function.
717  Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI,
718  ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues,
719  OptimizationRemarkEmitter *ORE, unsigned &TripCount, unsigned MaxTripCount,
720  unsigned &TripMultiple, unsigned LoopSize,
721  TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound) {
722 
723  // Check for explicit Count.
724  // 1st priority is unroll count set by "unroll-count" option.
725  bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
726  if (UserUnrollCount) {
727  UP.Count = UnrollCount;
728  UP.AllowExpensiveTripCount = true;
729  UP.Force = true;
730  if (UP.AllowRemainder && getUnrolledLoopSize(LoopSize, UP) < UP.Threshold)
731  return true;
732  }
733 
734  // 2nd priority is unroll count set by pragma.
735  unsigned PragmaCount = UnrollCountPragmaValue(L);
736  if (PragmaCount > 0) {
737  UP.Count = PragmaCount;
738  UP.Runtime = true;
739  UP.AllowExpensiveTripCount = true;
740  UP.Force = true;
741  if ((UP.AllowRemainder || (TripMultiple % PragmaCount == 0)) &&
743  return true;
744  }
745  bool PragmaFullUnroll = HasUnrollFullPragma(L);
746  if (PragmaFullUnroll && TripCount != 0) {
747  UP.Count = TripCount;
748  if (getUnrolledLoopSize(LoopSize, UP) < PragmaUnrollThreshold)
749  return false;
750  }
751 
752  bool PragmaEnableUnroll = HasUnrollEnablePragma(L);
753  bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
754  PragmaEnableUnroll || UserUnrollCount;
755 
756  if (ExplicitUnroll && TripCount != 0) {
757  // If the loop has an unrolling pragma, we want to be more aggressive with
758  // unrolling limits. Set thresholds to at least the PragmaUnrollThreshold
759  // value which is larger than the default limits.
760  UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold);
761  UP.PartialThreshold =
762  std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold);
763  }
764 
765  // 3rd priority is full unroll count.
766  // Full unroll makes sense only when TripCount or its upper bound could be
767  // statically calculated.
768  // Also we need to check if we exceed FullUnrollMaxCount.
769  // If using the upper bound to unroll, TripMultiple should be set to 1 because
770  // we do not know when loop may exit.
771  // MaxTripCount and ExactTripCount cannot both be non zero since we only
772  // compute the former when the latter is zero.
773  unsigned ExactTripCount = TripCount;
774  assert((ExactTripCount == 0 || MaxTripCount == 0) &&
775  "ExtractTripCount and MaxTripCount cannot both be non zero.");
776  unsigned FullUnrollTripCount = ExactTripCount ? ExactTripCount : MaxTripCount;
777  UP.Count = FullUnrollTripCount;
778  if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) {
779  // When computing the unrolled size, note that BEInsns are not replicated
780  // like the rest of the loop body.
781  if (getUnrolledLoopSize(LoopSize, UP) < UP.Threshold) {
782  UseUpperBound = (MaxTripCount == FullUnrollTripCount);
783  TripCount = FullUnrollTripCount;
784  TripMultiple = UP.UpperBound ? 1 : TripMultiple;
785  return ExplicitUnroll;
786  } else {
787  // The loop isn't that small, but we still can fully unroll it if that
788  // helps to remove a significant number of instructions.
789  // To check that, run additional analysis on the loop.
791  L, FullUnrollTripCount, DT, SE, EphValues, TTI,
792  UP.Threshold * UP.MaxPercentThresholdBoost / 100)) {
793  unsigned Boost =
795  if (Cost->UnrolledCost < UP.Threshold * Boost / 100) {
796  UseUpperBound = (MaxTripCount == FullUnrollTripCount);
797  TripCount = FullUnrollTripCount;
798  TripMultiple = UP.UpperBound ? 1 : TripMultiple;
799  return ExplicitUnroll;
800  }
801  }
802  }
803  }
804 
805  // 4th priority is loop peeling.
806  computePeelCount(L, LoopSize, UP, TripCount, SE);
807  if (UP.PeelCount) {
808  UP.Runtime = false;
809  UP.Count = 1;
810  return ExplicitUnroll;
811  }
812 
813  // 5th priority is partial unrolling.
814  // Try partial unroll only when TripCount could be statically calculated.
815  if (TripCount) {
816  UP.Partial |= ExplicitUnroll;
817  if (!UP.Partial) {
818  LLVM_DEBUG(dbgs() << " will not try to unroll partially because "
819  << "-unroll-allow-partial not given\n");
820  UP.Count = 0;
821  return false;
822  }
823  if (UP.Count == 0)
824  UP.Count = TripCount;
825  if (UP.PartialThreshold != NoThreshold) {
826  // Reduce unroll count to be modulo of TripCount for partial unrolling.
827  if (getUnrolledLoopSize(LoopSize, UP) > UP.PartialThreshold)
828  UP.Count =
829  (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) /
830  (LoopSize - UP.BEInsns);
831  if (UP.Count > UP.MaxCount)
832  UP.Count = UP.MaxCount;
833  while (UP.Count != 0 && TripCount % UP.Count != 0)
834  UP.Count--;
835  if (UP.AllowRemainder && UP.Count <= 1) {
836  // If there is no Count that is modulo of TripCount, set Count to
837  // largest power-of-two factor that satisfies the threshold limit.
838  // As we'll create fixup loop, do the type of unrolling only if
839  // remainder loop is allowed.
841  while (UP.Count != 0 &&
842  getUnrolledLoopSize(LoopSize, UP) > UP.PartialThreshold)
843  UP.Count >>= 1;
844  }
845  if (UP.Count < 2) {
846  if (PragmaEnableUnroll)
847  ORE->emit([&]() {
849  "UnrollAsDirectedTooLarge",
850  L->getStartLoc(), L->getHeader())
851  << "Unable to unroll loop as directed by unroll(enable) "
852  "pragma "
853  "because unrolled size is too large.";
854  });
855  UP.Count = 0;
856  }
857  } else {
858  UP.Count = TripCount;
859  }
860  if (UP.Count > UP.MaxCount)
861  UP.Count = UP.MaxCount;
862  if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
863  UP.Count != TripCount)
864  ORE->emit([&]() {
865  return OptimizationRemarkMissed(DEBUG_TYPE,
866  "FullUnrollAsDirectedTooLarge",
867  L->getStartLoc(), L->getHeader())
868  << "Unable to fully unroll loop as directed by unroll pragma "
869  "because "
870  "unrolled size is too large.";
871  });
872  return ExplicitUnroll;
873  }
874  assert(TripCount == 0 &&
875  "All cases when TripCount is constant should be covered here.");
876  if (PragmaFullUnroll)
877  ORE->emit([&]() {
879  DEBUG_TYPE, "CantFullUnrollAsDirectedRuntimeTripCount",
880  L->getStartLoc(), L->getHeader())
881  << "Unable to fully unroll loop as directed by unroll(full) "
882  "pragma "
883  "because loop has a runtime trip count.";
884  });
885 
886  // 6th priority is runtime unrolling.
887  // Don't unroll a runtime trip count loop when it is disabled.
889  UP.Count = 0;
890  return false;
891  }
892 
893  // Check if the runtime trip count is too small when profile is available.
894  if (L->getHeader()->getParent()->hasProfileData()) {
895  if (auto ProfileTripCount = getLoopEstimatedTripCount(L)) {
896  if (*ProfileTripCount < FlatLoopTripCountThreshold)
897  return false;
898  else
899  UP.AllowExpensiveTripCount = true;
900  }
901  }
902 
903  // Reduce count based on the type of unrolling and the threshold values.
904  UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount;
905  if (!UP.Runtime) {
906  LLVM_DEBUG(
907  dbgs() << " will not try to unroll loop with runtime trip count "
908  << "-unroll-runtime not given\n");
909  UP.Count = 0;
910  return false;
911  }
912  if (UP.Count == 0)
914 
915  // Reduce unroll count to be the largest power-of-two factor of
916  // the original count which satisfies the threshold limit.
917  while (UP.Count != 0 &&
918  getUnrolledLoopSize(LoopSize, UP) > UP.PartialThreshold)
919  UP.Count >>= 1;
920 
921 #ifndef NDEBUG
922  unsigned OrigCount = UP.Count;
923 #endif
924 
925  if (!UP.AllowRemainder && UP.Count != 0 && (TripMultiple % UP.Count) != 0) {
926  while (UP.Count != 0 && TripMultiple % UP.Count != 0)
927  UP.Count >>= 1;
928  LLVM_DEBUG(
929  dbgs() << "Remainder loop is restricted (that could architecture "
930  "specific or because the loop contains a convergent "
931  "instruction), so unroll count must divide the trip "
932  "multiple, "
933  << TripMultiple << ". Reducing unroll count from " << OrigCount
934  << " to " << UP.Count << ".\n");
935 
936  using namespace ore;
937 
938  if (PragmaCount > 0 && !UP.AllowRemainder)
939  ORE->emit([&]() {
941  "DifferentUnrollCountFromDirected",
942  L->getStartLoc(), L->getHeader())
943  << "Unable to unroll loop the number of times directed by "
944  "unroll_count pragma because remainder loop is restricted "
945  "(that could architecture specific or because the loop "
946  "contains a convergent instruction) and so must have an "
947  "unroll "
948  "count that divides the loop trip multiple of "
949  << NV("TripMultiple", TripMultiple) << ". Unrolling instead "
950  << NV("UnrollCount", UP.Count) << " time(s).";
951  });
952  }
953 
954  if (UP.Count > UP.MaxCount)
955  UP.Count = UP.MaxCount;
956  LLVM_DEBUG(dbgs() << " partially unrolling with count: " << UP.Count
957  << "\n");
958  if (UP.Count < 2)
959  UP.Count = 0;
960  return ExplicitUnroll;
961 }
962 
964  Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
965  const TargetTransformInfo &TTI, AssumptionCache &AC,
966  OptimizationRemarkEmitter &ORE, bool PreserveLCSSA, int OptLevel,
967  bool OnlyWhenForced, Optional<unsigned> ProvidedCount,
968  Optional<unsigned> ProvidedThreshold, Optional<bool> ProvidedAllowPartial,
969  Optional<bool> ProvidedRuntime, Optional<bool> ProvidedUpperBound,
970  Optional<bool> ProvidedAllowPeeling) {
971  LLVM_DEBUG(dbgs() << "Loop Unroll: F["
972  << L->getHeader()->getParent()->getName() << "] Loop %"
973  << L->getHeader()->getName() << "\n");
975  if (TM & TM_Disable)
977  if (!L->isLoopSimplifyForm()) {
978  LLVM_DEBUG(
979  dbgs() << " Not unrolling loop which is not in loop-simplify form.\n");
981  }
982 
983  // When automtatic unrolling is disabled, do not unroll unless overridden for
984  // this loop.
985  if (OnlyWhenForced && !(TM & TM_Enable))
987 
988  unsigned NumInlineCandidates;
989  bool NotDuplicatable;
990  bool Convergent;
992  L, SE, TTI, OptLevel, ProvidedThreshold, ProvidedCount,
993  ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
994  ProvidedAllowPeeling);
995  // Exit early if unrolling is disabled.
996  if (UP.Threshold == 0 && (!UP.Partial || UP.PartialThreshold == 0))
998 
1000  CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
1001 
1002  unsigned LoopSize =
1003  ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent,
1004  TTI, EphValues, UP.BEInsns);
1005  LLVM_DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
1006  if (NotDuplicatable) {
1007  LLVM_DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable"
1008  << " instructions.\n");
1010  }
1011  if (NumInlineCandidates != 0) {
1012  LLVM_DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
1014  }
1015 
1016  // Find trip count and trip multiple if count is not available
1017  unsigned TripCount = 0;
1018  unsigned MaxTripCount = 0;
1019  unsigned TripMultiple = 1;
1020  // If there are multiple exiting blocks but one of them is the latch, use the
1021  // latch for the trip count estimation. Otherwise insist on a single exiting
1022  // block for the trip count estimation.
1023  BasicBlock *ExitingBlock = L->getLoopLatch();
1024  if (!ExitingBlock || !L->isLoopExiting(ExitingBlock))
1025  ExitingBlock = L->getExitingBlock();
1026  if (ExitingBlock) {
1027  TripCount = SE.getSmallConstantTripCount(L, ExitingBlock);
1028  TripMultiple = SE.getSmallConstantTripMultiple(L, ExitingBlock);
1029  }
1030 
1031  // If the loop contains a convergent operation, the prelude we'd add
1032  // to do the first few instructions before we hit the unrolled loop
1033  // is unsafe -- it adds a control-flow dependency to the convergent
1034  // operation. Therefore restrict remainder loop (try unrollig without).
1035  //
1036  // TODO: This is quite conservative. In practice, convergent_op()
1037  // is likely to be called unconditionally in the loop. In this
1038  // case, the program would be ill-formed (on most architectures)
1039  // unless n were the same on all threads in a thread group.
1040  // Assuming n is the same on all threads, any kind of unrolling is
1041  // safe. But currently llvm's notion of convergence isn't powerful
1042  // enough to express this.
1043  if (Convergent)
1044  UP.AllowRemainder = false;
1045 
1046  // Try to find the trip count upper bound if we cannot find the exact trip
1047  // count.
1048  bool MaxOrZero = false;
1049  if (!TripCount) {
1050  MaxTripCount = SE.getSmallConstantMaxTripCount(L);
1051  MaxOrZero = SE.isBackedgeTakenCountMaxOrZero(L);
1052  // We can unroll by the upper bound amount if it's generally allowed or if
1053  // we know that the loop is executed either the upper bound or zero times.
1054  // (MaxOrZero unrolling keeps only the first loop test, so the number of
1055  // loop tests remains the same compared to the non-unrolled version, whereas
1056  // the generic upper bound unrolling keeps all but the last loop test so the
1057  // number of loop tests goes up which may end up being worse on targets with
1058  // constrained branch predictor resources so is controlled by an option.)
1059  // In addition we only unroll small upper bounds.
1060  if (!(UP.UpperBound || MaxOrZero) || MaxTripCount > UnrollMaxUpperBound) {
1061  MaxTripCount = 0;
1062  }
1063  }
1064 
1065  // computeUnrollCount() decides whether it is beneficial to use upper bound to
1066  // fully unroll the loop.
1067  bool UseUpperBound = false;
1068  bool IsCountSetExplicitly = computeUnrollCount(
1069  L, TTI, DT, LI, SE, EphValues, &ORE, TripCount, MaxTripCount,
1070  TripMultiple, LoopSize, UP, UseUpperBound);
1071  if (!UP.Count)
1073  // Unroll factor (Count) must be less or equal to TripCount.
1074  if (TripCount && UP.Count > TripCount)
1075  UP.Count = TripCount;
1076 
1077  // Save loop properties before it is transformed.
1078  MDNode *OrigLoopID = L->getLoopID();
1079 
1080  // Unroll the loop.
1081  Loop *RemainderLoop = nullptr;
1082  LoopUnrollResult UnrollResult = UnrollLoop(
1083  L, UP.Count, TripCount, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount,
1084  UseUpperBound, MaxOrZero, TripMultiple, UP.PeelCount, UP.UnrollRemainder,
1085  LI, &SE, &DT, &AC, &ORE, PreserveLCSSA, &RemainderLoop);
1086  if (UnrollResult == LoopUnrollResult::Unmodified)
1088 
1089  if (RemainderLoop) {
1090  Optional<MDNode *> RemainderLoopID =
1093  if (RemainderLoopID.hasValue())
1094  RemainderLoop->setLoopID(RemainderLoopID.getValue());
1095  }
1096 
1097  if (UnrollResult != LoopUnrollResult::FullyUnrolled) {
1098  Optional<MDNode *> NewLoopID =
1101  if (NewLoopID.hasValue()) {
1102  L->setLoopID(NewLoopID.getValue());
1103 
1104  // Do not setLoopAlreadyUnrolled if loop attributes have been specified
1105  // explicitly.
1106  return UnrollResult;
1107  }
1108  }
1109 
1110  // If loop has an unroll count pragma or unrolled by explicitly set count
1111  // mark loop as unrolled to prevent unrolling beyond that requested.
1112  // If the loop was peeled, we already "used up" the profile information
1113  // we had, so we don't want to unroll or peel again.
1114  if (UnrollResult != LoopUnrollResult::FullyUnrolled &&
1115  (IsCountSetExplicitly || UP.PeelCount))
1117 
1118  return UnrollResult;
1119 }
1120 
1121 namespace {
1122 
1123 class LoopUnroll : public LoopPass {
1124 public:
1125  static char ID; // Pass ID, replacement for typeid
1126 
1127  int OptLevel;
1128 
1129  /// If false, use a cost model to determine whether unrolling of a loop is
1130  /// profitable. If true, only loops that explicitly request unrolling via
1131  /// metadata are considered. All other loops are skipped.
1132  bool OnlyWhenForced;
1133 
1134  Optional<unsigned> ProvidedCount;
1135  Optional<unsigned> ProvidedThreshold;
1136  Optional<bool> ProvidedAllowPartial;
1137  Optional<bool> ProvidedRuntime;
1138  Optional<bool> ProvidedUpperBound;
1139  Optional<bool> ProvidedAllowPeeling;
1140 
1141  LoopUnroll(int OptLevel = 2, bool OnlyWhenForced = false,
1143  Optional<unsigned> Count = None,
1144  Optional<bool> AllowPartial = None, Optional<bool> Runtime = None,
1145  Optional<bool> UpperBound = None,
1146  Optional<bool> AllowPeeling = None)
1147  : LoopPass(ID), OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced),
1148  ProvidedCount(std::move(Count)), ProvidedThreshold(Threshold),
1149  ProvidedAllowPartial(AllowPartial), ProvidedRuntime(Runtime),
1150  ProvidedUpperBound(UpperBound), ProvidedAllowPeeling(AllowPeeling) {
1152  }
1153 
1154  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
1155  if (skipLoop(L))
1156  return false;
1157 
1158  Function &F = *L->getHeader()->getParent();
1159 
1160  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1161  LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1162  ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1163  const TargetTransformInfo &TTI =
1164  getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1165  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1166  // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
1167  // pass. Function analyses need to be preserved across loop transformations
1168  // but ORE cannot be preserved (see comment before the pass definition).
1169  OptimizationRemarkEmitter ORE(&F);
1170  bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
1171 
1173  L, DT, LI, SE, TTI, AC, ORE, PreserveLCSSA, OptLevel, OnlyWhenForced,
1174  ProvidedCount, ProvidedThreshold, ProvidedAllowPartial, ProvidedRuntime,
1175  ProvidedUpperBound, ProvidedAllowPeeling);
1176 
1177  if (Result == LoopUnrollResult::FullyUnrolled)
1178  LPM.markLoopAsDeleted(*L);
1179 
1180  return Result != LoopUnrollResult::Unmodified;
1181  }
1182 
1183  /// This transformation requires natural loop information & requires that
1184  /// loop preheaders be inserted into the CFG...
1185  void getAnalysisUsage(AnalysisUsage &AU) const override {
1188  // FIXME: Loop passes are required to preserve domtree, and for now we just
1189  // recreate dom info if anything gets unrolled.
1191  }
1192 };
1193 
1194 } // end anonymous namespace
1195 
1196 char LoopUnroll::ID = 0;
1197 
1198 INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
1202 INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
1203 
1204 Pass *llvm::createLoopUnrollPass(int OptLevel, bool OnlyWhenForced,
1205  int Threshold, int Count, int AllowPartial,
1206  int Runtime, int UpperBound,
1207  int AllowPeeling) {
1208  // TODO: It would make more sense for this function to take the optionals
1209  // directly, but that's dangerous since it would silently break out of tree
1210  // callers.
1211  return new LoopUnroll(
1212  OptLevel, OnlyWhenForced,
1213  Threshold == -1 ? None : Optional<unsigned>(Threshold),
1214  Count == -1 ? None : Optional<unsigned>(Count),
1215  AllowPartial == -1 ? None : Optional<bool>(AllowPartial),
1216  Runtime == -1 ? None : Optional<bool>(Runtime),
1217  UpperBound == -1 ? None : Optional<bool>(UpperBound),
1218  AllowPeeling == -1 ? None : Optional<bool>(AllowPeeling));
1219 }
1220 
1221 Pass *llvm::createSimpleLoopUnrollPass(int OptLevel, bool OnlyWhenForced) {
1222  return createLoopUnrollPass(OptLevel, OnlyWhenForced, -1, -1, 0, 0, 0, 0);
1223 }
1224 
1227  LPMUpdater &Updater) {
1228  const auto &FAM =
1229  AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
1230  Function *F = L.getHeader()->getParent();
1231 
1232  auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F);
1233  // FIXME: This should probably be optional rather than required.
1234  if (!ORE)
1236  "LoopFullUnrollPass: OptimizationRemarkEmitterAnalysis not "
1237  "cached at a higher level");
1238 
1239  // Keep track of the previous loop structure so we can identify new loops
1240  // created by unrolling.
1241  Loop *ParentL = L.getParentLoop();
1242  SmallPtrSet<Loop *, 4> OldLoops;
1243  if (ParentL)
1244  OldLoops.insert(ParentL->begin(), ParentL->end());
1245  else
1246  OldLoops.insert(AR.LI.begin(), AR.LI.end());
1247 
1248  std::string LoopName = L.getName();
1249 
1250  bool Changed =
1251  tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, *ORE,
1252  /*PreserveLCSSA*/ true, OptLevel, OnlyWhenForced,
1253  /*Count*/ None,
1254  /*Threshold*/ None, /*AllowPartial*/ false,
1255  /*Runtime*/ false, /*UpperBound*/ false,
1256  /*AllowPeeling*/ false) != LoopUnrollResult::Unmodified;
1257  if (!Changed)
1258  return PreservedAnalyses::all();
1259 
1260  // The parent must not be damaged by unrolling!
1261 #ifndef NDEBUG
1262  if (ParentL)
1263  ParentL->verifyLoop();
1264 #endif
1265 
1266  // Unrolling can do several things to introduce new loops into a loop nest:
1267  // - Full unrolling clones child loops within the current loop but then
1268  // removes the current loop making all of the children appear to be new
1269  // sibling loops.
1270  //
1271  // When a new loop appears as a sibling loop after fully unrolling,
1272  // its nesting structure has fundamentally changed and we want to revisit
1273  // it to reflect that.
1274  //
1275  // When unrolling has removed the current loop, we need to tell the
1276  // infrastructure that it is gone.
1277  //
1278  // Finally, we support a debugging/testing mode where we revisit child loops
1279  // as well. These are not expected to require further optimizations as either
1280  // they or the loop they were cloned from have been directly visited already.
1281  // But the debugging mode allows us to check this assumption.
1282  bool IsCurrentLoopValid = false;
1283  SmallVector<Loop *, 4> SibLoops;
1284  if (ParentL)
1285  SibLoops.append(ParentL->begin(), ParentL->end());
1286  else
1287  SibLoops.append(AR.LI.begin(), AR.LI.end());
1288  erase_if(SibLoops, [&](Loop *SibLoop) {
1289  if (SibLoop == &L) {
1290  IsCurrentLoopValid = true;
1291  return true;
1292  }
1293 
1294  // Otherwise erase the loop from the list if it was in the old loops.
1295  return OldLoops.count(SibLoop) != 0;
1296  });
1297  Updater.addSiblingLoops(SibLoops);
1298 
1299  if (!IsCurrentLoopValid) {
1300  Updater.markLoopAsDeleted(L, LoopName);
1301  } else {
1302  // We can only walk child loops if the current loop remained valid.
1304  // Walk *all* of the child loops.
1305  SmallVector<Loop *, 4> ChildLoops(L.begin(), L.end());
1306  Updater.addChildLoops(ChildLoops);
1307  }
1308  }
1309 
1311 }
1312 
1313 template <typename RangeT>
1315  SmallVector<Loop *, 8> Worklist;
1316  // We use an internal worklist to build up the preorder traversal without
1317  // recursion.
1318  SmallVector<Loop *, 4> PreOrderLoops, PreOrderWorklist;
1319 
1320  for (Loop *RootL : Loops) {
1321  assert(PreOrderLoops.empty() && "Must start with an empty preorder walk.");
1322  assert(PreOrderWorklist.empty() &&
1323  "Must start with an empty preorder walk worklist.");
1324  PreOrderWorklist.push_back(RootL);
1325  do {
1326  Loop *L = PreOrderWorklist.pop_back_val();
1327  PreOrderWorklist.append(L->begin(), L->end());
1328  PreOrderLoops.push_back(L);
1329  } while (!PreOrderWorklist.empty());
1330 
1331  Worklist.append(PreOrderLoops.begin(), PreOrderLoops.end());
1332  PreOrderLoops.clear();
1333  }
1334  return Worklist;
1335 }
1336 
1339  auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
1340  auto &LI = AM.getResult<LoopAnalysis>(F);
1341  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1342  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1343  auto &AC = AM.getResult<AssumptionAnalysis>(F);
1345 
1346  LoopAnalysisManager *LAM = nullptr;
1347  if (auto *LAMProxy = AM.getCachedResult<LoopAnalysisManagerFunctionProxy>(F))
1348  LAM = &LAMProxy->getManager();
1349 
1350  const ModuleAnalysisManager &MAM =
1352  ProfileSummaryInfo *PSI =
1354 
1355  bool Changed = false;
1356 
1357  // The unroller requires loops to be in simplified form, and also needs LCSSA.
1358  // Since simplification may add new inner loops, it has to run before the
1359  // legality and profitability checks. This means running the loop unroller
1360  // will simplify all loops, regardless of whether anything end up being
1361  // unrolled.
1362  for (auto &L : LI) {
1363  Changed |= simplifyLoop(L, &DT, &LI, &SE, &AC, false /* PreserveLCSSA */);
1364  Changed |= formLCSSARecursively(*L, DT, &LI, &SE);
1365  }
1366 
1368 
1369  while (!Worklist.empty()) {
1370  // Because the LoopInfo stores the loops in RPO, we walk the worklist
1371  // from back to front so that we work forward across the CFG, which
1372  // for unrolling is only needed to get optimization remarks emitted in
1373  // a forward order.
1374  Loop &L = *Worklist.pop_back_val();
1375 #ifndef NDEBUG
1376  Loop *ParentL = L.getParentLoop();
1377 #endif
1378 
1379  // Check if the profile summary indicates that the profiled application
1380  // has a huge working set size, in which case we disable peeling to avoid
1381  // bloating it further.
1382  Optional<bool> LocalAllowPeeling = UnrollOpts.AllowPeeling;
1383  if (PSI && PSI->hasHugeWorkingSetSize())
1384  LocalAllowPeeling = false;
1385  std::string LoopName = L.getName();
1386  // The API here is quite complex to call and we allow to select some
1387  // flavors of unrolling during construction time (by setting UnrollOpts).
1389  &L, DT, &LI, SE, TTI, AC, ORE,
1390  /*PreserveLCSSA*/ true, UnrollOpts.OptLevel, UnrollOpts.OnlyWhenForced,
1391  /*Count*/ None,
1392  /*Threshold*/ None, UnrollOpts.AllowPartial, UnrollOpts.AllowRuntime,
1393  UnrollOpts.AllowUpperBound, LocalAllowPeeling);
1394  Changed |= Result != LoopUnrollResult::Unmodified;
1395 
1396  // The parent must not be damaged by unrolling!
1397 #ifndef NDEBUG
1398  if (Result != LoopUnrollResult::Unmodified && ParentL)
1399  ParentL->verifyLoop();
1400 #endif
1401 
1402  // Clear any cached analysis results for L if we removed it completely.
1403  if (LAM && Result == LoopUnrollResult::FullyUnrolled)
1404  LAM->clear(L, LoopName);
1405  }
1406 
1407  if (!Changed)
1408  return PreservedAnalyses::all();
1409 
1411 }
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:80
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value *> &EphValues)
Collect a loop&#39;s ephemeral values (those used only by an assume or similar intrinsics in the loop)...
Definition: CodeMetrics.cpp:70
uint64_t CallInst * C
unsigned getSmallConstantTripCount(const Loop *L)
Returns the maximum trip count of the loop if it is a single-exit loop and we can compute a small max...
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
Diagnostic information for missed-optimization remarks.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:224
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
unsigned getSmallConstantTripMultiple(const Loop *L)
Returns the largest constant divisor of the trip count of the loop if it is a single-exit loop and we...
DiagnosticInfoOptimizationBase::Argument NV
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:769
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:139
This class represents lattice values for constants.
Definition: AllocatorList.h:23
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
This header provides classes for managing a pipeline of passes over loops in LLVM IR...
bool convergent
True if this function contains a call to a convergent function.
Definition: CodeMetrics.h:47
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
static bool HasUnrollEnablePragma(const Loop *L)
bool isLCSSAForm(DominatorTree &DT) const
Return true if the Loop is in LCSSA form.
Definition: LoopInfo.cpp:176
Implements a dense probed hash-table based set.
Definition: DenseSet.h:249
Analysis providing profile information.
The main scalar evolution driver.
#define DEBUG_TYPE
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold, but used for partial/runtime unrolling (set to UINT_MAX to disable).
static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost, unsigned MaxPercentThresholdBoost)
Pass * createSimpleLoopUnrollPass(int OptLevel=2, bool OnlyWhenForced=false)
This file contains the declarations for metadata subclasses.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:173
An immutable pass that tracks lazily created AssumptionCache objects.
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
A cache of @llvm.assume calls within a function.
Analysis pass providing the TargetTransformInfo.
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
unsigned second
unsigned NumInlineCandidates
The number of calls to internal functions with a single caller.
Definition: CodeMetrics.h:68
Metadata node.
Definition: Metadata.h:863
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:230
F(f)
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1068
bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const SmallPtrSetImpl< const Value *> &EphValues, OptimizationRemarkEmitter *ORE, unsigned &TripCount, unsigned MaxTripCount, unsigned &TripMultiple, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound)
bool notDuplicatable
True if this function cannot be duplicated.
Definition: CodeMetrics.h:44
static cl::opt< unsigned > UnrollFullMaxCount("unroll-full-max-count", cl::Hidden, cl::desc("Set the max unroll count for full unrolling, for testing purposes"))
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
LoopUnrollResult UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, bool AllowRuntime, bool AllowExpensiveTripCount, bool PreserveCondBr, bool PreserveOnlyFirst, unsigned TripMultiple, unsigned PeelCount, bool UnrollRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, Loop **RemainderLoop=nullptr)
Unroll the given loop by Count.
Definition: LoopUnroll.cpp:334
unsigned getSmallConstantMaxTripCount(const Loop *L)
Returns the upper bound of the loop trip count as a normal unsigned value.
const char *const LLVMLoopUnrollFollowupUnrolled
Definition: UnrollLoop.h:40
void addChildLoops(ArrayRef< Loop *> NewChildLoops)
Loop passes should use this method to indicate they have added new child loops of the current loop...
static cl::opt< unsigned > UnrollPartialThreshold("unroll-partial-threshold", cl::Hidden, cl::desc("The cost threshold for partial loop unrolling"))
bool formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
Definition: LCSSA.cpp:383
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:221
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:4342
static cl::opt< bool > UnrollRevisitChildLoops("unroll-revisit-child-loops", cl::Hidden, cl::desc("Enqueue and re-visit child loops in the loop PM after unrolling. " "This shouldn't typically be needed as child loops (or their " "clones) were already visited."))
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
Pass * createLoopUnrollPass(int OptLevel=2, bool OnlyWhenForced=false, int Threshold=-1, int Count=-1, int AllowPartial=-1, int Runtime=-1, int UpperBound=-1, int AllowPeeling=-1)
Hexagon Hardware Loops
bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
static cl::opt< unsigned > FlatLoopTripCountThreshold("flat-loop-tripcount-threshold", cl::init(5), cl::Hidden, cl::desc("If the runtime tripcount for the loop is lower than the " "threshold, the loop is considered as flat and will be less " "aggressively unrolled."))
static bool HasUnrollFullPragma(const Loop *L)
bool AllowPeeling
Allow peeling off loop iterations for loops with low dynamic tripcount.
static cl::opt< unsigned > UnrollCount("unroll-count", cl::Hidden, cl::desc("Use this unroll count for all loops including those with " "unroll_count pragma values, for testing purposes"))
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
unsigned FullUnrollMaxCount
Set the maximum unrolling factor for full unrolling.
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:944
static cl::opt< bool > UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden, cl::desc("Unroll loops with run-time trip counts"))
static Optional< EstimatedUnrollCost > analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE, const SmallPtrSetImpl< const Value *> &EphValues, const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize)
Figure out if the loop is worth full unrolling.
BlockT * getHeader() const
Definition: LoopInfo.h:99
static MDNode * GetUnrollMetadataForLoop(const Loop *L, StringRef Name)
The transformation should be applied without considering a cost model.
Definition: LoopUtils.h:217
void computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, unsigned &TripCount, ScalarEvolution &SE)
void visit(Iterator Start, Iterator End)
Definition: InstVisitor.h:89
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
static cl::opt< unsigned > UnrollMaxUpperBound("unroll-max-upperbound", cl::init(8), cl::Hidden, cl::desc("The max of trip count upper bound that is considered in unrolling"))
static bool isEqual(const Function &Caller, const Function &Callee)
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:238
const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:207
const char *const LLVMLoopUnrollFollowupRemainder
Definition: UnrollLoop.h:42
This header provides classes for managing per-loop analyses.
static cl::opt< bool > UnrollAllowRemainder("unroll-allow-remainder", cl::Hidden, cl::desc("Allow generation of a loop remainder (extra iterations) " "when unrolling a loop."))
static cl::opt< unsigned > UnrollMaxPercentThresholdBoost("unroll-max-percent-threshold-boost", cl::init(400), cl::Hidden, cl::desc("The maximum 'boost' (represented as a percentage >= 100) applied " "to the threshold when aggressively unrolling a loop due to the " "dynamic cost savings. If completely unrolling a loop will reduce " "the total runtime from X to Y, we boost the loop unroll " "threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, " "X/Y). This limit avoids excessive code bloat."))
void initializeLoopUnrollPass(PassRegistry &)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
bool AllowRemainder
Allow generation of a loop remainder (extra iterations after unroll).
The loop was fully unrolled into straight-line code.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:422
static cl::opt< bool > UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden, cl::desc("Allows loops to be peeled when the dynamic " "trip count is known to be low."))
TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, int OptLevel, Optional< unsigned > UserThreshold, Optional< unsigned > UserCount, Optional< bool > UserAllowPartial, Optional< bool > UserRuntime, Optional< bool > UserUpperBound, Optional< bool > UserAllowPeeling)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop...
Definition: LoopInfo.h:202
Conditional or Unconditional Branch instruction.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
This is an important base class in LLVM.
Definition: Constant.h:41
static cl::opt< unsigned > UnrollMaxCount("unroll-max-count", cl::Hidden, cl::desc("Set the max unroll count for partial and runtime unrolling, for" "testing purposes"))
This file contains the declarations for the subclasses of Constant, which represent the different fla...
iterator end() const
Definition: LoopInfo.h:665
void setLoopAlreadyUnrolled()
Add llvm.loop.unroll.disable to this loop&#39;s loop id metadata.
Definition: LoopInfo.cpp:256
Machine Trace Metrics
char & LCSSAID
Definition: LCSSA.cpp:463
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
Represent the analysis usage information of a pass.
void analyzeBasicBlock(const BasicBlock *BB, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value *> &EphValues)
Add information about a block to the current state.
bool optForSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:597
static cl::opt< unsigned > UnrollThreshold("unroll-threshold", cl::Hidden, cl::desc("The cost threshold for loop unrolling"))
op_range operands()
Definition: User.h:237
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
Optional< unsigned > getLoopEstimatedTripCount(Loop *L)
Get a loop&#39;s estimated trip count based on branch weight metadata.
Definition: LoopUtils.cpp:618
static cl::opt< unsigned > UnrollPeelCount("unroll-peel-count", cl::Hidden, cl::desc("Set the unroll peeling count, for testing purposes"))
void addSiblingLoops(ArrayRef< Loop *> NewSibLoops)
Loop passes should use this method to indicate they have added new sibling loops to the current loop...
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:342
void markLoopAsDeleted(Loop &L, llvm::StringRef Name)
Loop passes should use this method to indicate they have deleted a loop from the nest.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
const char *const LLVMLoopUnrollFollowupAll
Definition: UnrollLoop.h:39
The transformation should not be applied.
Definition: LoopUtils.h:220
A function analysis which provides an AssumptionCache.
unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value *> &EphValues, unsigned BEInsns)
ApproximateLoopSize - Approximate the size of the loop.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:109
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:297
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file. ...
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
static cl::opt< bool > UnrollAllowPartial("unroll-allow-partial", cl::Hidden, cl::desc("Allows loops to be partially unrolled until " "-unroll-threshold loop size is reached."))
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:1153
static cl::opt< unsigned > PragmaUnrollThreshold("pragma-unroll-threshold", cl::init(16 *1024), cl::Hidden, cl::desc("Unrolled size limit for loops with an unroll(full) or " "unroll_count pragma."))
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:839
iterator begin() const
Definition: LoopInfo.h:141
Utility to calculate the size and a few similar metrics for a set of basic blocks.
Definition: CodeMetrics.h:32
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:47
static unsigned UnrollCountPragmaValue(const Loop *L)
unsigned DefaultUnrollRuntimeCount
Default unroll count for loops with run-time trip count.
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:373
void markLoopAsDeleted(Loop &L)
Definition: LoopPass.cpp:142
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
static uint64_t getUnrolledLoopSize(unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2&#39;s erase_if which is equivalent t...
Definition: STLExtras.h:1329
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
Optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
Definition: LoopUtils.cpp:248
iterator begin() const
Definition: LoopInfo.h:664
amdgpu Simplify well known AMD library false FunctionCallee Callee
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
static cl::opt< bool > UnrollUnrollRemainder("unroll-remainder", cl::Hidden, cl::desc("Allow the loop remainder to be unrolled."))
static cl::opt< unsigned > Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), cl::init(100), cl::Hidden)
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:386
MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
Definition: LoopUnroll.cpp:895
Analysis pass that exposes the ScalarEvolution for a function.
static const unsigned NoThreshold
A magic value for use with the Threshold parameter to indicate that the loop unroll should be perform...
LoopT * getParentLoop() const
Definition: LoopInfo.h:100
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
bool hasValue() const
Definition: Optional.h:211
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to...
Definition: LoopInfo.cpp:192
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:214
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
unsigned Threshold
The cost threshold for the unrolled loop.
int getUserCost(const User *U, ArrayRef< const Value *> Operands) const
Estimate the cost of a given IR user when lowered.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
StringRef getName() const
Definition: LoopInfo.h:588
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:464
Parameters that control the generic loop unrolling transformation.
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable)...
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
#define I(x, y, z)
Definition: MD5.cpp:58
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:72
TransformationMode hasUnrollTransformation(Loop *L)
Definition: LoopUtils.cpp:333
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass&#39;s AnalysisUsage.
Definition: LoopUtils.cpp:134
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:788
iterator end() const
Definition: LoopInfo.h:142
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:322
TransformationMode
The mode sets how eager a transformation should be applied.
Definition: LoopUtils.h:211
static SmallVector< Loop *, 8 > appendLoopsToWorklist(RangeT &&Loops)
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:211
bool empty() const
Definition: LoopInfo.h:145
Multiway switch.
static bool HasRuntimeUnrollDisablePragma(const Loop *L)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool hasHugeWorkingSetSize()
Returns true if the working set size of the code is considered huge.
static LoopUnrollResult tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache &AC, OptimizationRemarkEmitter &ORE, bool PreserveLCSSA, int OptLevel, bool OnlyWhenForced, Optional< unsigned > ProvidedCount, Optional< unsigned > ProvidedThreshold, Optional< bool > ProvidedAllowPartial, Optional< bool > ProvidedRuntime, Optional< bool > ProvidedUpperBound, Optional< bool > ProvidedAllowPeeling)
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:565
LLVM Value Representation.
Definition: Value.h:72
succ_range successors(Instruction *I)
Definition: CFG.h:259
unsigned MaxPercentThresholdBoost
If complete unrolling will reduce the cost of the loop, we will boost the Threshold by a certain perc...
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
The loop was not modified.
static cl::opt< unsigned > UnrollMaxIterationsCountToAnalyze("unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden, cl::desc("Don't allow loop unrolling to simulate more than this number of" "iterations when checking full unroll profitability"))
bool isBackedgeTakenCountMaxOrZero(const Loop *L)
Return true if the backedge taken count is either the value returned by getMaxBackedgeTakenCount or z...
bool UpperBound
Allow using trip count upper bound to unroll loops.
void verifyLoop() const
Verify loop structure.
Definition: LoopInfoImpl.h:294
void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP) const
Get target-customized preferences for the generic loop unrolling transformation.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
A container for analyses that lazily runs them and caches their results.
This pass exposes codegen information to IR-level passes.
This header defines various interfaces for pass management in LLVM.
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1074
#define LLVM_DEBUG(X)
Definition: Debug.h:122
unsigned NumInsts
Number of instructions in the analyzed blocks.
Definition: CodeMetrics.h:53
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:155
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:49
The optimization diagnostic interface.
bool hasProfileData() const
Return true if the function is annotated with profile data.
Definition: Function.h:307
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:1037
LoopUnrollResult
Represents the result of a UnrollLoop invocation.
Definition: UnrollLoop.h:51