LLVM  16.0.0git
LoopUnrollAndJamPass.cpp
Go to the documentation of this file.
1 //===- LoopUnrollAndJam.cpp - Loop unroll and jam pass --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass implements an unroll and jam pass. Most of the work is done by
10 // Utils/UnrollLoopAndJam.cpp.
11 //===----------------------------------------------------------------------===//
12 
14 #include "llvm/ADT/ArrayRef.h"
15 #include "llvm/ADT/None.h"
16 #include "llvm/ADT/Optional.h"
18 #include "llvm/ADT/SmallPtrSet.h"
19 #include "llvm/ADT/StringRef.h"
24 #include "llvm/Analysis/LoopInfo.h"
26 #include "llvm/Analysis/LoopPass.h"
30 #include "llvm/IR/BasicBlock.h"
31 #include "llvm/IR/Constants.h"
32 #include "llvm/IR/Dominators.h"
33 #include "llvm/IR/Function.h"
34 #include "llvm/IR/Instructions.h"
35 #include "llvm/IR/Metadata.h"
36 #include "llvm/IR/PassManager.h"
37 #include "llvm/InitializePasses.h"
38 #include "llvm/Pass.h"
39 #include "llvm/PassRegistry.h"
40 #include "llvm/Support/Casting.h"
42 #include "llvm/Support/Compiler.h"
43 #include "llvm/Support/Debug.h"
45 #include "llvm/Transforms/Scalar.h"
50 #include <cassert>
51 #include <cstdint>
52 
53 namespace llvm {
54 class Instruction;
55 class Value;
56 } // namespace llvm
57 
58 using namespace llvm;
59 
60 #define DEBUG_TYPE "loop-unroll-and-jam"
61 
62 /// @{
63 /// Metadata attribute names
64 static const char *const LLVMLoopUnrollAndJamFollowupAll =
65  "llvm.loop.unroll_and_jam.followup_all";
66 static const char *const LLVMLoopUnrollAndJamFollowupInner =
67  "llvm.loop.unroll_and_jam.followup_inner";
68 static const char *const LLVMLoopUnrollAndJamFollowupOuter =
69  "llvm.loop.unroll_and_jam.followup_outer";
71  "llvm.loop.unroll_and_jam.followup_remainder_inner";
73  "llvm.loop.unroll_and_jam.followup_remainder_outer";
74 /// @}
75 
76 static cl::opt<bool>
77  AllowUnrollAndJam("allow-unroll-and-jam", cl::Hidden,
78  cl::desc("Allows loops to be unroll-and-jammed."));
79 
81  "unroll-and-jam-count", cl::Hidden,
82  cl::desc("Use this unroll count for all loops including those with "
83  "unroll_and_jam_count pragma values, for testing purposes"));
84 
86  "unroll-and-jam-threshold", cl::init(60), cl::Hidden,
87  cl::desc("Threshold to use for inner loop when doing unroll and jam."));
88 
90  "pragma-unroll-and-jam-threshold", cl::init(1024), cl::Hidden,
91  cl::desc("Unrolled size limit for loops with an unroll_and_jam(full) or "
92  "unroll_count pragma."));
93 
94 // Returns the loop hint metadata node with the given name (for example,
95 // "llvm.loop.unroll.count"). If no such metadata node exists, then nullptr is
96 // returned.
98  if (MDNode *LoopID = L->getLoopID())
99  return GetUnrollMetadata(LoopID, Name);
100  return nullptr;
101 }
102 
103 // Returns true if the loop has any metadata starting with Prefix. For example a
104 // Prefix of "llvm.loop.unroll." returns true if we have any unroll metadata.
105 static bool hasAnyUnrollPragma(const Loop *L, StringRef Prefix) {
106  if (MDNode *LoopID = L->getLoopID()) {
107  // First operand should refer to the loop id itself.
108  assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
109  assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
110 
111  for (unsigned I = 1, E = LoopID->getNumOperands(); I < E; ++I) {
112  MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(I));
113  if (!MD)
114  continue;
115 
116  MDString *S = dyn_cast<MDString>(MD->getOperand(0));
117  if (!S)
118  continue;
119 
120  if (S->getString().startswith(Prefix))
121  return true;
122  }
123  }
124  return false;
125 }
126 
127 // Returns true if the loop has an unroll_and_jam(enable) pragma.
128 static bool hasUnrollAndJamEnablePragma(const Loop *L) {
129  return getUnrollMetadataForLoop(L, "llvm.loop.unroll_and_jam.enable");
130 }
131 
132 // If loop has an unroll_and_jam_count pragma return the (necessarily
133 // positive) value from the pragma. Otherwise return 0.
134 static unsigned unrollAndJamCountPragmaValue(const Loop *L) {
135  MDNode *MD = getUnrollMetadataForLoop(L, "llvm.loop.unroll_and_jam.count");
136  if (MD) {
137  assert(MD->getNumOperands() == 2 &&
138  "Unroll count hint metadata should have two operands.");
139  unsigned Count =
140  mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
141  assert(Count >= 1 && "Unroll count must be positive.");
142  return Count;
143  }
144  return 0;
145 }
146 
147 // Returns loop size estimation for unrolled loop.
148 static uint64_t
149 getUnrollAndJammedLoopSize(unsigned LoopSize,
151  assert(LoopSize >= UP.BEInsns && "LoopSize should not be less than BEInsns!");
152  return static_cast<uint64_t>(LoopSize - UP.BEInsns) * UP.Count + UP.BEInsns;
153 }
154 
155 // Calculates unroll and jam count and writes it to UP.Count. Returns true if
156 // unroll count was set explicitly.
158  Loop *L, Loop *SubLoop, const TargetTransformInfo &TTI, DominatorTree &DT,
160  const SmallPtrSetImpl<const Value *> &EphValues,
161  OptimizationRemarkEmitter *ORE, unsigned OuterTripCount,
162  unsigned OuterTripMultiple, unsigned OuterLoopSize, unsigned InnerTripCount,
163  unsigned InnerLoopSize, TargetTransformInfo::UnrollingPreferences &UP,
165  // First up use computeUnrollCount from the loop unroller to get a count
166  // for unrolling the outer loop, plus any loops requiring explicit
167  // unrolling we leave to the unroller. This uses UP.Threshold /
168  // UP.PartialThreshold / UP.MaxCount to come up with sensible loop values.
169  // We have already checked that the loop has no unroll.* pragmas.
170  unsigned MaxTripCount = 0;
171  bool UseUpperBound = false;
172  bool ExplicitUnroll = computeUnrollCount(
173  L, TTI, DT, LI, AC, SE, EphValues, ORE, OuterTripCount, MaxTripCount,
174  /*MaxOrZero*/ false, OuterTripMultiple, OuterLoopSize, UP, PP,
175  UseUpperBound);
176  if (ExplicitUnroll || UseUpperBound) {
177  // If the user explicitly set the loop as unrolled, dont UnJ it. Leave it
178  // for the unroller instead.
179  LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; explicit count set by "
180  "computeUnrollCount\n");
181  UP.Count = 0;
182  return false;
183  }
184 
185  // Override with any explicit Count from the "unroll-and-jam-count" option.
186  bool UserUnrollCount = UnrollAndJamCount.getNumOccurrences() > 0;
187  if (UserUnrollCount) {
189  UP.Force = true;
190  if (UP.AllowRemainder &&
191  getUnrollAndJammedLoopSize(OuterLoopSize, UP) < UP.Threshold &&
192  getUnrollAndJammedLoopSize(InnerLoopSize, UP) <
194  return true;
195  }
196 
197  // Check for unroll_and_jam pragmas
198  unsigned PragmaCount = unrollAndJamCountPragmaValue(L);
199  if (PragmaCount > 0) {
200  UP.Count = PragmaCount;
201  UP.Runtime = true;
202  UP.Force = true;
203  if ((UP.AllowRemainder || (OuterTripMultiple % PragmaCount == 0)) &&
204  getUnrollAndJammedLoopSize(OuterLoopSize, UP) < UP.Threshold &&
205  getUnrollAndJammedLoopSize(InnerLoopSize, UP) <
207  return true;
208  }
209 
210  bool PragmaEnableUnroll = hasUnrollAndJamEnablePragma(L);
211  bool ExplicitUnrollAndJamCount = PragmaCount > 0 || UserUnrollCount;
212  bool ExplicitUnrollAndJam = PragmaEnableUnroll || ExplicitUnrollAndJamCount;
213 
214  // If the loop has an unrolling pragma, we want to be more aggressive with
215  // unrolling limits.
216  if (ExplicitUnrollAndJam)
218 
219  if (!UP.AllowRemainder && getUnrollAndJammedLoopSize(InnerLoopSize, UP) >=
221  LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; can't create remainder and "
222  "inner loop too large\n");
223  UP.Count = 0;
224  return false;
225  }
226 
227  // We have a sensible limit for the outer loop, now adjust it for the inner
228  // loop and UP.UnrollAndJamInnerLoopThreshold. If the outer limit was set
229  // explicitly, we want to stick to it.
230  if (!ExplicitUnrollAndJamCount && UP.AllowRemainder) {
231  while (UP.Count != 0 && getUnrollAndJammedLoopSize(InnerLoopSize, UP) >=
233  UP.Count--;
234  }
235 
236  // If we are explicitly unroll and jamming, we are done. Otherwise there are a
237  // number of extra performance heuristics to check.
238  if (ExplicitUnrollAndJam)
239  return true;
240 
241  // If the inner loop count is known and small, leave the entire loop nest to
242  // be the unroller
243  if (InnerTripCount && InnerLoopSize * InnerTripCount < UP.Threshold) {
244  LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; small inner loop count is "
245  "being left for the unroller\n");
246  UP.Count = 0;
247  return false;
248  }
249 
250  // Check for situations where UnJ is likely to be unprofitable. Including
251  // subloops with more than 1 block.
252  if (SubLoop->getBlocks().size() != 1) {
253  LLVM_DEBUG(
254  dbgs() << "Won't unroll-and-jam; More than one inner loop block\n");
255  UP.Count = 0;
256  return false;
257  }
258 
259  // Limit to loops where there is something to gain from unrolling and
260  // jamming the loop. In this case, look for loads that are invariant in the
261  // outer loop and can become shared.
262  unsigned NumInvariant = 0;
263  for (BasicBlock *BB : SubLoop->getBlocks()) {
264  for (Instruction &I : *BB) {
265  if (auto *Ld = dyn_cast<LoadInst>(&I)) {
266  Value *V = Ld->getPointerOperand();
267  const SCEV *LSCEV = SE.getSCEVAtScope(V, L);
268  if (SE.isLoopInvariant(LSCEV, L))
269  NumInvariant++;
270  }
271  }
272  }
273  if (NumInvariant == 0) {
274  LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; No loop invariant loads\n");
275  UP.Count = 0;
276  return false;
277  }
278 
279  return false;
280 }
281 
282 static LoopUnrollResult
286  OptimizationRemarkEmitter &ORE, int OptLevel) {
288  gatherUnrollingPreferences(L, SE, TTI, nullptr, nullptr, ORE, OptLevel,
289  None, None, None, None, None, None);
292 
294  if (EnableMode & TM_Disable)
296  if (EnableMode & TM_ForcedByUser)
297  UP.UnrollAndJam = true;
298 
301  if (UnrollAndJamThreshold.getNumOccurrences() > 0)
303  // Exit early if unrolling is disabled.
304  if (!UP.UnrollAndJam || UP.UnrollAndJamInnerLoopThreshold == 0)
306 
307  LLVM_DEBUG(dbgs() << "Loop Unroll and Jam: F["
308  << L->getHeader()->getParent()->getName() << "] Loop %"
309  << L->getHeader()->getName() << "\n");
310 
311  // A loop with any unroll pragma (enabling/disabling/count/etc) is left for
312  // the unroller, so long as it does not explicitly have unroll_and_jam
313  // metadata. This means #pragma nounroll will disable unroll and jam as well
314  // as unrolling
315  if (hasAnyUnrollPragma(L, "llvm.loop.unroll.") &&
316  !hasAnyUnrollPragma(L, "llvm.loop.unroll_and_jam.")) {
317  LLVM_DEBUG(dbgs() << " Disabled due to pragma.\n");
319  }
320 
321  if (!isSafeToUnrollAndJam(L, SE, DT, DI, *LI)) {
322  LLVM_DEBUG(dbgs() << " Disabled due to not being safe.\n");
324  }
325 
326  // Approximate the loop size and collect useful info
327  unsigned NumInlineCandidates;
328  bool NotDuplicatable;
329  bool Convergent;
331  CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
332  Loop *SubLoop = L->getSubLoops()[0];
333  InstructionCost InnerLoopSizeIC =
334  ApproximateLoopSize(SubLoop, NumInlineCandidates, NotDuplicatable,
335  Convergent, TTI, EphValues, UP.BEInsns);
336  InstructionCost OuterLoopSizeIC =
337  ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent,
338  TTI, EphValues, UP.BEInsns);
339  LLVM_DEBUG(dbgs() << " Outer Loop Size: " << OuterLoopSizeIC << "\n");
340  LLVM_DEBUG(dbgs() << " Inner Loop Size: " << InnerLoopSizeIC << "\n");
341 
342  if (!InnerLoopSizeIC.isValid() || !OuterLoopSizeIC.isValid()) {
343  LLVM_DEBUG(dbgs() << " Not unrolling loop which contains instructions"
344  << " with invalid cost.\n");
346  }
347  unsigned InnerLoopSize = *InnerLoopSizeIC.getValue();
348  unsigned OuterLoopSize = *OuterLoopSizeIC.getValue();
349 
350  if (NotDuplicatable) {
351  LLVM_DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable "
352  "instructions.\n");
354  }
355  if (NumInlineCandidates != 0) {
356  LLVM_DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
358  }
359  if (Convergent) {
360  LLVM_DEBUG(
361  dbgs() << " Not unrolling loop with convergent instructions.\n");
363  }
364 
365  // Save original loop IDs for after the transformation.
366  MDNode *OrigOuterLoopID = L->getLoopID();
367  MDNode *OrigSubLoopID = SubLoop->getLoopID();
368 
369  // To assign the loop id of the epilogue, assign it before unrolling it so it
370  // is applied to every inner loop of the epilogue. We later apply the loop ID
371  // for the jammed inner loop.
372  Optional<MDNode *> NewInnerEpilogueLoopID = makeFollowupLoopID(
373  OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll,
375  if (NewInnerEpilogueLoopID)
376  SubLoop->setLoopID(NewInnerEpilogueLoopID.value());
377 
378  // Find trip count and trip multiple
379  BasicBlock *Latch = L->getLoopLatch();
380  BasicBlock *SubLoopLatch = SubLoop->getLoopLatch();
381  unsigned OuterTripCount = SE.getSmallConstantTripCount(L, Latch);
382  unsigned OuterTripMultiple = SE.getSmallConstantTripMultiple(L, Latch);
383  unsigned InnerTripCount = SE.getSmallConstantTripCount(SubLoop, SubLoopLatch);
384 
385  // Decide if, and by how much, to unroll
386  bool IsCountSetExplicitly = computeUnrollAndJamCount(
387  L, SubLoop, TTI, DT, LI, &AC, SE, EphValues, &ORE, OuterTripCount,
388  OuterTripMultiple, OuterLoopSize, InnerTripCount, InnerLoopSize, UP, PP);
389  if (UP.Count <= 1)
391  // Unroll factor (Count) must be less or equal to TripCount.
392  if (OuterTripCount && UP.Count > OuterTripCount)
393  UP.Count = OuterTripCount;
394 
395  Loop *EpilogueOuterLoop = nullptr;
396  LoopUnrollResult UnrollResult = UnrollAndJamLoop(
397  L, UP.Count, OuterTripCount, OuterTripMultiple, UP.UnrollRemainder, LI,
398  &SE, &DT, &AC, &TTI, &ORE, &EpilogueOuterLoop);
399 
400  // Assign new loop attributes.
401  if (EpilogueOuterLoop) {
402  Optional<MDNode *> NewOuterEpilogueLoopID = makeFollowupLoopID(
403  OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll,
405  if (NewOuterEpilogueLoopID)
406  EpilogueOuterLoop->setLoopID(NewOuterEpilogueLoopID.value());
407  }
408 
409  Optional<MDNode *> NewInnerLoopID =
412  if (NewInnerLoopID)
413  SubLoop->setLoopID(NewInnerLoopID.value());
414  else
415  SubLoop->setLoopID(OrigSubLoopID);
416 
417  if (UnrollResult == LoopUnrollResult::PartiallyUnrolled) {
418  Optional<MDNode *> NewOuterLoopID = makeFollowupLoopID(
419  OrigOuterLoopID,
421  if (NewOuterLoopID) {
422  L->setLoopID(NewOuterLoopID.value());
423 
424  // Do not setLoopAlreadyUnrolled if a followup was given.
425  return UnrollResult;
426  }
427  }
428 
429  // If loop has an unroll count pragma or unrolled by explicitly set count
430  // mark loop as unrolled to prevent unrolling beyond that requested.
431  if (UnrollResult != LoopUnrollResult::FullyUnrolled && IsCountSetExplicitly)
433 
434  return UnrollResult;
435 }
436 
438  ScalarEvolution &SE,
439  const TargetTransformInfo &TTI,
441  OptimizationRemarkEmitter &ORE, int OptLevel,
442  LPMUpdater &U) {
443  bool DidSomething = false;
445  Loop *OutmostLoop = &LN.getOutermostLoop();
446 
447  // Add the loop nests in the reverse order of LN. See method
448  // declaration.
450  appendLoopsToWorklist(Loops, Worklist);
451  while (!Worklist.empty()) {
452  Loop *L = Worklist.pop_back_val();
453  std::string LoopName = std::string(L->getName());
454  LoopUnrollResult Result =
455  tryToUnrollAndJamLoop(L, DT, &LI, SE, TTI, AC, DI, ORE, OptLevel);
456  if (Result != LoopUnrollResult::Unmodified)
457  DidSomething = true;
458  if (L == OutmostLoop && Result == LoopUnrollResult::FullyUnrolled)
459  U.markLoopAsDeleted(*L, LoopName);
460  }
461 
462  return DidSomething;
463 }
464 
465 namespace {
466 
467 class LoopUnrollAndJam : public LoopPass {
468 public:
469  static char ID; // Pass ID, replacement for typeid
470  unsigned OptLevel;
471 
472  LoopUnrollAndJam(int OptLevel = 2) : LoopPass(ID), OptLevel(OptLevel) {
474  }
475 
476  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
477  if (skipLoop(L))
478  return false;
479 
480  auto *F = L->getHeader()->getParent();
481  auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
482  auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
483  auto &DI = getAnalysis<DependenceAnalysisWrapperPass>().getDI();
484  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
485  auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*F);
486  auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
487  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(*F);
488 
490  tryToUnrollAndJamLoop(L, DT, LI, SE, TTI, AC, DI, ORE, OptLevel);
491 
492  if (Result == LoopUnrollResult::FullyUnrolled)
493  LPM.markLoopAsDeleted(*L);
494 
496  }
497 
498  /// This transformation requires natural loop information & requires that
499  /// loop preheaders be inserted into the CFG...
500  void getAnalysisUsage(AnalysisUsage &AU) const override {
509  }
510 };
511 
512 } // end anonymous namespace
513 
514 char LoopUnrollAndJam::ID = 0;
515 
516 INITIALIZE_PASS_BEGIN(LoopUnrollAndJam, "loop-unroll-and-jam",
517  "Unroll and Jam loops", false, false)
521 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
522 INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
528 INITIALIZE_PASS_END(LoopUnrollAndJam, "loop-unroll-and-jam",
529  "Unroll and Jam loops", false, false)
530 
532  return new LoopUnrollAndJam(OptLevel);
533 }
534 
538  LPMUpdater &U) {
539  Function &F = *LN.getParent();
540 
541  DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
543 
544  if (!tryToUnrollAndJamLoop(LN, AR.DT, AR.LI, AR.SE, AR.TTI, AR.AC, DI, ORE,
545  OptLevel, U))
546  return PreservedAnalyses::all();
547 
548  auto PA = getLoopPassPreservedAnalyses();
549  PA.preserve<LoopNestAnalysis>();
550  return PA;
551 }
llvm::InstructionCost
Definition: InstructionCost.h:30
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
llvm::TargetTransformInfo::UnrollingPreferences::BEInsns
unsigned BEInsns
Definition: TargetTransformInfo.h:467
AssumptionCache.h
llvm::TargetTransformInfo::UnrollingPreferences::Runtime
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
Definition: TargetTransformInfo.h:474
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::LoopStandardAnalysisResults::AC
AssumptionCache & AC
Definition: LoopAnalysisManager.h:53
Optional.h
llvm::AArch64PACKey::ID
ID
Definition: AArch64BaseInfo.h:818
Metadata.h
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
hasUnrollAndJamEnablePragma
static bool hasUnrollAndJamEnablePragma(const Loop *L)
Definition: LoopUnrollAndJamPass.cpp:128
UnrollAndJamCount
static cl::opt< unsigned > UnrollAndJamCount("unroll-and-jam-count", cl::Hidden, cl::desc("Use this unroll count for all loops including those with " "unroll_and_jam_count pragma values, for testing purposes"))
llvm::cl::Prefix
@ Prefix
Definition: CommandLine.h:161
Scalar.h
llvm::DependenceAnalysisWrapperPass
Legacy pass manager pass to access dependence information.
Definition: DependenceAnalysis.h:1001
llvm::ScalarEvolution::getSmallConstantTripMultiple
unsigned getSmallConstantTripMultiple(const Loop *L, const SCEV *ExitCount)
Returns the largest constant divisor of the trip count as a normal unsigned value,...
Definition: ScalarEvolution.cpp:8172
llvm::Function
Definition: Function.h:60
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
StringRef.h
Pass.h
Loops
Hexagon Hardware Loops
Definition: HexagonHardwareLoops.cpp:374
llvm::UnrollAndJamLoop
LoopUnrollResult UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount, unsigned TripMultiple, bool UnrollRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE, Loop **EpilogueLoop=nullptr)
Definition: LoopUnrollAndJam.cpp:223
LLVMLoopUnrollAndJamFollowupRemainderInner
static const char *const LLVMLoopUnrollAndJamFollowupRemainderInner
Definition: LoopUnrollAndJamPass.cpp:70
LLVMLoopUnrollAndJamFollowupOuter
static const char *const LLVMLoopUnrollAndJamFollowupOuter
Definition: LoopUnrollAndJamPass.cpp:68
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:173
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:449
llvm::makeFollowupLoopID
Optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
Definition: LoopUtils.cpp:264
llvm::TargetTransformInfo::UnrollingPreferences::UnrollAndJamInnerLoopThreshold
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
Definition: TargetTransformInfo.h:493
OptimizationRemarkEmitter.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
llvm::TargetTransformInfo::UnrollingPreferences::UnrollRemainder
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
Definition: TargetTransformInfo.h:486
llvm::getLoopAnalysisUsage
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
Definition: LoopUtils.cpp:142
llvm::TargetTransformInfo::UnrollingPreferences::Count
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
Definition: TargetTransformInfo.h:451
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
ScalarEvolution.h
llvm::TargetTransformInfo::PeelingPreferences
Definition: TargetTransformInfo.h:529
llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1293
llvm::LoopStandardAnalysisResults
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Definition: LoopAnalysisManager.h:51
llvm::Optional
Definition: APInt.h:33
LoopUnrollAndJamPass.h
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
llvm::MCID::Convergent
@ Convergent
Definition: MCInstrDesc.h:184
AllowUnrollAndJam
static cl::opt< bool > AllowUnrollAndJam("allow-unroll-and-jam", cl::Hidden, cl::desc("Allows loops to be unroll-and-jammed."))
llvm::LoopStandardAnalysisResults::DT
DominatorTree & DT
Definition: LoopAnalysisManager.h:54
and
We currently generate a but we really shouldn eax ecx xorl edx divl ecx eax divl ecx movl eax ret A similar code sequence works for division We currently compile i32 v2 eax eax jo LBB1_2 and
Definition: README.txt:1271
PassRegistry.h
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:55
getUnrollMetadataForLoop
static MDNode * getUnrollMetadataForLoop(const Loop *L, StringRef Name)
Definition: LoopUnrollAndJamPass.cpp:97
loop
Analysis the ScalarEvolution expression for r is< loop > Outside the loop
Definition: README.txt:8
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::MDNode::getNumOperands
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1298
llvm::gatherUnrollingPreferences
TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, Optional< unsigned > UserThreshold, Optional< unsigned > UserCount, Optional< bool > UserAllowPartial, Optional< bool > UserRuntime, Optional< bool > UserUpperBound, Optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
Definition: LoopUnrollPass.cpp:185
LoopAnalysisManager.h
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
PriorityWorklist.h
CommandLine.h
CodeMetrics.h
llvm::LoopBase::getSubLoops
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
Definition: LoopInfo.h:160
llvm::ApproximateLoopSize
InstructionCost ApproximateLoopSize(const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value * > &EphValues, unsigned BEInsns)
ApproximateLoopSize - Approximate the size of the loop.
Definition: LoopUnrollPass.cpp:667
llvm::Loop::setLoopAlreadyUnrolled
void setLoopAlreadyUnrolled()
Add llvm.loop.unroll.disable to this loop's loop id metadata.
Definition: LoopInfo.cpp:537
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24
llvm::GetUnrollMetadata
MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
Definition: LoopUnroll.cpp:852
Constants.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
LLVMLoopUnrollAndJamFollowupInner
static const char *const LLVMLoopUnrollAndJamFollowupInner
Definition: LoopUnrollAndJamPass.cpp:66
llvm::TargetTransformInfo::UnrollingPreferences::Force
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
Definition: TargetTransformInfo.h:482
computeUnrollAndJamCount
static bool computeUnrollAndJamCount(Loop *L, Loop *SubLoop, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned OuterTripCount, unsigned OuterTripMultiple, unsigned OuterLoopSize, unsigned InnerTripCount, unsigned InnerLoopSize, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP)
Definition: LoopUnrollAndJamPass.cpp:157
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
false
Definition: StackSlotColoring.cpp:141
llvm::TargetTransformInfo::UnrollingPreferences::UnrollAndJam
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
Definition: TargetTransformInfo.h:488
llvm::LoopBase::getBlocks
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:188
tryToUnrollAndJamLoop
static LoopUnrollResult tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache &AC, DependenceInfo &DI, OptimizationRemarkEmitter &ORE, int OptLevel)
Definition: LoopUnrollAndJamPass.cpp:283
llvm::Instruction
Definition: Instruction.h:42
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:302
llvm::cl::Option::getNumOccurrences
int getNumOccurrences() const
Definition: CommandLine.h:403
LoopUtils.h
llvm::CodeMetrics::collectEphemeralValues
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition: CodeMetrics.cpp:70
llvm::ScalarEvolutionWrapperPass
Definition: ScalarEvolution.h:2188
llvm::LPPassManager
Definition: LoopPass.h:76
SmallPtrSet.h
llvm::Loop::getName
StringRef getName() const
Definition: LoopInfo.h:891
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::LoopNestAnalysis
This analysis provides information for a loop nest.
Definition: LoopNestAnalysis.h:202
loops
loop unroll and Unroll and Jam loops
Definition: LoopUnrollAndJamPass.cpp:529
llvm::LoopUnrollResult::FullyUnrolled
@ FullyUnrolled
The loop was fully unrolled into straight-line code.
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
LoopInfo.h
llvm::ScalarEvolution::getSmallConstantTripCount
unsigned getSmallConstantTripCount(const Loop *L)
Returns the exact trip count of the loop if we can compute it, and the result is a small constant.
Definition: ScalarEvolution.cpp:8016
llvm::MDNode::getOperand
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1292
llvm::getLoopPassPreservedAnalyses
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
Definition: LoopAnalysisManager.cpp:138
BasicBlock.h
llvm::cl::opt< bool >
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:75
llvm::DependenceInfo
DependenceInfo - This class is the main dependence-analysis driver.
Definition: DependenceAnalysis.h:291
uint64_t
llvm::LoopPass
Definition: LoopPass.h:28
llvm::hasUnrollAndJamTransformation
TransformationMode hasUnrollAndJamTransformation(const Loop *L)
Definition: LoopUtils.cpp:374
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2642
llvm::LPMUpdater
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
Definition: LoopPassManager.h:262
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::SmallPriorityWorklist
A version of PriorityWorklist that selects small size optimized data structures for the vector and ma...
Definition: PriorityWorklist.h:255
llvm::TargetTransformInfo::UnrollingPreferences
Parameters that control the generic loop unrolling transformation.
Definition: TargetTransformInfo.h:417
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::LoopUnrollResult::PartiallyUnrolled
@ PartiallyUnrolled
The loop was partially unrolled – we still have a loop, but with a smaller trip count.
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:447
llvm::computeUnrollCount
bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound)
Definition: LoopUnrollPass.cpp:890
llvm::LoopUnrollResult
LoopUnrollResult
Represents the result of a UnrollLoop invocation.
Definition: UnrollLoop.h:54
ArrayRef.h
llvm::PriorityWorklist< T, SmallVector< T, N >, SmallDenseMap< T, ptrdiff_t > >::empty
bool empty() const
Determine if the PriorityWorklist is empty or not.
Definition: PriorityWorklist.h:67
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:232
getUnrollAndJammedLoopSize
static uint64_t getUnrollAndJammedLoopSize(unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP)
Definition: LoopUnrollAndJamPass.cpp:149
llvm::TM_Disable
@ TM_Disable
The transformation should not be applied.
Definition: LoopUtils.h:277
llvm::initializeLoopUnrollAndJamPass
void initializeLoopUnrollAndJamPass(PassRegistry &)
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LoopNestAnalysis.h
llvm::InstructionCost::getValue
std::optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
Definition: InstructionCost.h:88
llvm::LPMUpdater::markLoopAsDeleted
void markLoopAsDeleted(Loop &L, llvm::StringRef Name)
Loop passes should use this method to indicate they have deleted a loop from the nest.
Definition: LoopPassManager.h:282
llvm::MDNode
Metadata node.
Definition: Metadata.h:944
LoopPassManager.h
llvm::LoopNest::getOutermostLoop
Loop & getOutermostLoop() const
Return the outermost loop in the loop nest.
Definition: LoopNestAnalysis.h:80
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:202
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::LoopInfo
Definition: LoopInfo.h:1108
None.h
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
llvm::TM_ForcedByUser
@ TM_ForcedByUser
The transformation was directed by the user, e.g.
Definition: LoopUtils.h:285
LoopPass.h
llvm::appendLoopsToWorklist
void appendLoopsToWorklist(RangeT &&, SmallPriorityWorklist< Loop *, 4 > &)
Utility that implements appending of loops onto a worklist given a range.
Definition: LoopUtils.cpp:1525
UnrollAndJamThreshold
static cl::opt< unsigned > UnrollAndJamThreshold("unroll-and-jam-threshold", cl::init(60), cl::Hidden, cl::desc("Threshold to use for inner loop when doing unroll and jam."))
Compiler.h
llvm::LoopStandardAnalysisResults::LI
LoopInfo & LI
Definition: LoopAnalysisManager.h:55
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::InstructionCost::isValid
bool isValid() const
Definition: InstructionCost.h:80
llvm::LoopNest::getParent
Function * getParent() const
Return the function to which the loop-nest belongs.
Definition: LoopNestAnalysis.h:176
LLVMLoopUnrollAndJamFollowupRemainderOuter
static const char *const LLVMLoopUnrollAndJamFollowupRemainderOuter
Definition: LoopUnrollAndJamPass.cpp:72
hasAnyUnrollPragma
static bool hasAnyUnrollPragma(const Loop *L, StringRef Prefix)
Definition: LoopUnrollAndJamPass.cpp:105
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
llvm::isSafeToUnrollAndJam
bool isSafeToUnrollAndJam(Loop *L, ScalarEvolution &SE, DominatorTree &DT, DependenceInfo &DI, LoopInfo &LI)
Definition: LoopUnrollAndJam.cpp:865
llvm::ScalarEvolution::isLoopInvariant
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
Definition: ScalarEvolution.cpp:13637
llvm::OptimizationRemarkEmitterWrapperPass
OptimizationRemarkEmitter legacy analysis pass.
Definition: OptimizationRemarkEmitter.h:146
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:50
llvm::Loop::setLoopID
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:525
llvm::None
constexpr std::nullopt_t None
Definition: None.h:27
llvm::LPPassManager::markLoopAsDeleted
void markLoopAsDeleted(Loop &L)
Definition: LoopPass.cpp:110
llvm::LoopStandardAnalysisResults::TTI
TargetTransformInfo & TTI
Definition: LoopAnalysisManager.h:58
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
llvm::Loop::getLoopID
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:501
llvm::TargetTransformInfo::UnrollingPreferences::AllowRemainder
bool AllowRemainder
Allow generation of a loop remainder (extra iterations after unroll).
Definition: TargetTransformInfo.h:476
Casting.h
Function.h
LLVMLoopUnrollAndJamFollowupAll
static const char *const LLVMLoopUnrollAndJamFollowupAll
Definition: LoopUnrollAndJamPass.cpp:64
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:105
llvm::LoopUnrollAndJamPass::run
PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition: LoopUnrollAndJamPass.cpp:535
PassManager.h
PragmaUnrollAndJamThreshold
static cl::opt< unsigned > PragmaUnrollAndJamThreshold("pragma-unroll-and-jam-threshold", cl::init(1024), cl::Hidden, cl::desc("Unrolled size limit for loops with an unroll_and_jam(full) or " "unroll_count pragma."))
llvm::LoopStandardAnalysisResults::SE
ScalarEvolution & SE
Definition: LoopAnalysisManager.h:56
llvm::LoopStandardAnalysisResults::AA
AAResults & AA
Definition: LoopAnalysisManager.h:52
llvm::PriorityWorklist< T, SmallVector< T, N >, SmallDenseMap< T, ptrdiff_t > >::pop_back_val
T pop_back_val()
Definition: PriorityWorklist.h:153
llvm::LoopNest::getLoops
ArrayRef< Loop * > getLoops() const
Get the loops in the nest.
Definition: LoopNestAnalysis.h:117
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
Instructions.h
jam
loop unroll and jam
Definition: LoopUnrollAndJamPass.cpp:528
Dominators.h
llvm::TargetTransformInfo::UnrollingPreferences::Threshold
unsigned Threshold
The cost threshold for the unrolled loop.
Definition: TargetTransformInfo.h:425
UnrollLoop.h
TargetTransformInfo.h
unrollAndJamCountPragmaValue
static unsigned unrollAndJamCountPragmaValue(const Loop *L)
Definition: LoopUnrollAndJamPass.cpp:134
llvm::Optional::value
constexpr const T & value() const &
Definition: Optional.h:281
llvm::SmallPtrSetImpl< const Value * >
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
DependenceAnalysis.h
llvm::cl::desc
Definition: CommandLine.h:413
llvm::LoopUnrollResult::Unmodified
@ Unmodified
The loop was not modified.
raw_ostream.h
LoopPeel.h
llvm::MDString
A single uniqued string.
Definition: Metadata.h:612
InitializePasses.h
llvm::ScalarEvolution::getSCEVAtScope
const SCEV * getSCEVAtScope(const SCEV *S, const Loop *L)
Return a SCEV expression for the specified value at the specified scope in the program.
Definition: ScalarEvolution.cpp:9621
llvm::LoopNest
This class represents a loop nest and can be used to query its properties.
Definition: LoopNestAnalysis.h:28
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::TransformationMode
TransformationMode
The mode sets how eager a transformation should be applied.
Definition: LoopUtils.h:268
Debug.h
llvm::gatherPeelingPreferences
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, Optional< bool > UserAllowPeeling, Optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
Definition: LoopPeel.cpp:796
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(LoopUnrollAndJam, "loop-unroll-and-jam", "Unroll and Jam loops", false, false) INITIALIZE_PASS_END(LoopUnrollAndJam
llvm::createLoopUnrollAndJamPass
Pass * createLoopUnrollAndJamPass(int OptLevel=2)
Definition: LoopUnrollAndJamPass.cpp:531