LLVM  10.0.0svn
LoopUnrollAndJamPass.cpp
Go to the documentation of this file.
1 //===- LoopUnrollAndJam.cpp - Loop unroll and jam pass --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass implements an unroll and jam pass. Most of the work is done by
10 // Utils/UnrollLoopAndJam.cpp.
11 //===----------------------------------------------------------------------===//
12 
14 #include "llvm/ADT/None.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SmallPtrSet.h"
17 #include "llvm/ADT/StringRef.h"
22 #include "llvm/Analysis/LoopInfo.h"
23 #include "llvm/Analysis/LoopPass.h"
27 #include "llvm/IR/BasicBlock.h"
28 #include "llvm/IR/CFG.h"
29 #include "llvm/IR/Constant.h"
30 #include "llvm/IR/Constants.h"
31 #include "llvm/IR/Dominators.h"
32 #include "llvm/IR/Function.h"
33 #include "llvm/IR/Instruction.h"
34 #include "llvm/IR/Instructions.h"
35 #include "llvm/IR/IntrinsicInst.h"
36 #include "llvm/IR/Metadata.h"
37 #include "llvm/IR/PassManager.h"
38 #include "llvm/Pass.h"
39 #include "llvm/Support/Casting.h"
41 #include "llvm/Support/Debug.h"
44 #include "llvm/Transforms/Scalar.h"
46 #include "llvm/Transforms/Utils.h"
49 #include <algorithm>
50 #include <cassert>
51 #include <cstdint>
52 #include <string>
53 
54 using namespace llvm;
55 
56 #define DEBUG_TYPE "loop-unroll-and-jam"
57 
58 /// @{
59 /// Metadata attribute names
60 static const char *const LLVMLoopUnrollAndJamFollowupAll =
61  "llvm.loop.unroll_and_jam.followup_all";
62 static const char *const LLVMLoopUnrollAndJamFollowupInner =
63  "llvm.loop.unroll_and_jam.followup_inner";
64 static const char *const LLVMLoopUnrollAndJamFollowupOuter =
65  "llvm.loop.unroll_and_jam.followup_outer";
67  "llvm.loop.unroll_and_jam.followup_remainder_inner";
69  "llvm.loop.unroll_and_jam.followup_remainder_outer";
70 /// @}
71 
72 static cl::opt<bool>
73  AllowUnrollAndJam("allow-unroll-and-jam", cl::Hidden,
74  cl::desc("Allows loops to be unroll-and-jammed."));
75 
77  "unroll-and-jam-count", cl::Hidden,
78  cl::desc("Use this unroll count for all loops including those with "
79  "unroll_and_jam_count pragma values, for testing purposes"));
80 
82  "unroll-and-jam-threshold", cl::init(60), cl::Hidden,
83  cl::desc("Threshold to use for inner loop when doing unroll and jam."));
84 
86  "pragma-unroll-and-jam-threshold", cl::init(1024), cl::Hidden,
87  cl::desc("Unrolled size limit for loops with an unroll_and_jam(full) or "
88  "unroll_count pragma."));
89 
90 // Returns the loop hint metadata node with the given name (for example,
91 // "llvm.loop.unroll.count"). If no such metadata node exists, then nullptr is
92 // returned.
94  if (MDNode *LoopID = L->getLoopID())
95  return GetUnrollMetadata(LoopID, Name);
96  return nullptr;
97 }
98 
99 // Returns true if the loop has any metadata starting with Prefix. For example a
100 // Prefix of "llvm.loop.unroll." returns true if we have any unroll metadata.
101 static bool HasAnyUnrollPragma(const Loop *L, StringRef Prefix) {
102  if (MDNode *LoopID = L->getLoopID()) {
103  // First operand should refer to the loop id itself.
104  assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
105  assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
106 
107  for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
108  MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
109  if (!MD)
110  continue;
111 
112  MDString *S = dyn_cast<MDString>(MD->getOperand(0));
113  if (!S)
114  continue;
115 
116  if (S->getString().startswith(Prefix))
117  return true;
118  }
119  }
120  return false;
121 }
122 
123 // Returns true if the loop has an unroll_and_jam(enable) pragma.
124 static bool HasUnrollAndJamEnablePragma(const Loop *L) {
125  return GetUnrollMetadataForLoop(L, "llvm.loop.unroll_and_jam.enable");
126 }
127 
128 // If loop has an unroll_and_jam_count pragma return the (necessarily
129 // positive) value from the pragma. Otherwise return 0.
130 static unsigned UnrollAndJamCountPragmaValue(const Loop *L) {
131  MDNode *MD = GetUnrollMetadataForLoop(L, "llvm.loop.unroll_and_jam.count");
132  if (MD) {
133  assert(MD->getNumOperands() == 2 &&
134  "Unroll count hint metadata should have two operands.");
135  unsigned Count =
136  mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
137  assert(Count >= 1 && "Unroll count must be positive.");
138  return Count;
139  }
140  return 0;
141 }
142 
143 // Returns loop size estimation for unrolled loop.
144 static uint64_t
145 getUnrollAndJammedLoopSize(unsigned LoopSize,
147  assert(LoopSize >= UP.BEInsns && "LoopSize should not be less than BEInsns!");
148  return static_cast<uint64_t>(LoopSize - UP.BEInsns) * UP.Count + UP.BEInsns;
149 }
150 
151 // Calculates unroll and jam count and writes it to UP.Count. Returns true if
152 // unroll count was set explicitly.
154  Loop *L, Loop *SubLoop, const TargetTransformInfo &TTI, DominatorTree &DT,
155  LoopInfo *LI, ScalarEvolution &SE,
156  const SmallPtrSetImpl<const Value *> &EphValues,
157  OptimizationRemarkEmitter *ORE, unsigned OuterTripCount,
158  unsigned OuterTripMultiple, unsigned OuterLoopSize, unsigned InnerTripCount,
159  unsigned InnerLoopSize, TargetTransformInfo::UnrollingPreferences &UP) {
160  // First up use computeUnrollCount from the loop unroller to get a count
161  // for unrolling the outer loop, plus any loops requiring explicit
162  // unrolling we leave to the unroller. This uses UP.Threshold /
163  // UP.PartialThreshold / UP.MaxCount to come up with sensible loop values.
164  // We have already checked that the loop has no unroll.* pragmas.
165  unsigned MaxTripCount = 0;
166  bool UseUpperBound = false;
167  bool ExplicitUnroll = computeUnrollCount(
168  L, TTI, DT, LI, SE, EphValues, ORE, OuterTripCount, MaxTripCount,
169  /*MaxOrZero*/ false, OuterTripMultiple, OuterLoopSize, UP, UseUpperBound);
170  if (ExplicitUnroll || UseUpperBound) {
171  // If the user explicitly set the loop as unrolled, dont UnJ it. Leave it
172  // for the unroller instead.
173  LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; explicit count set by "
174  "computeUnrollCount\n");
175  UP.Count = 0;
176  return false;
177  }
178 
179  // Override with any explicit Count from the "unroll-and-jam-count" option.
180  bool UserUnrollCount = UnrollAndJamCount.getNumOccurrences() > 0;
181  if (UserUnrollCount) {
183  UP.Force = true;
184  if (UP.AllowRemainder &&
185  getUnrollAndJammedLoopSize(OuterLoopSize, UP) < UP.Threshold &&
186  getUnrollAndJammedLoopSize(InnerLoopSize, UP) <
188  return true;
189  }
190 
191  // Check for unroll_and_jam pragmas
192  unsigned PragmaCount = UnrollAndJamCountPragmaValue(L);
193  if (PragmaCount > 0) {
194  UP.Count = PragmaCount;
195  UP.Runtime = true;
196  UP.Force = true;
197  if ((UP.AllowRemainder || (OuterTripMultiple % PragmaCount == 0)) &&
198  getUnrollAndJammedLoopSize(OuterLoopSize, UP) < UP.Threshold &&
199  getUnrollAndJammedLoopSize(InnerLoopSize, UP) <
201  return true;
202  }
203 
204  bool PragmaEnableUnroll = HasUnrollAndJamEnablePragma(L);
205  bool ExplicitUnrollAndJamCount = PragmaCount > 0 || UserUnrollCount;
206  bool ExplicitUnrollAndJam = PragmaEnableUnroll || ExplicitUnrollAndJamCount;
207 
208  // If the loop has an unrolling pragma, we want to be more aggressive with
209  // unrolling limits.
210  if (ExplicitUnrollAndJam)
212 
213  if (!UP.AllowRemainder && getUnrollAndJammedLoopSize(InnerLoopSize, UP) >=
215  LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; can't create remainder and "
216  "inner loop too large\n");
217  UP.Count = 0;
218  return false;
219  }
220 
221  // We have a sensible limit for the outer loop, now adjust it for the inner
222  // loop and UP.UnrollAndJamInnerLoopThreshold. If the outer limit was set
223  // explicitly, we want to stick to it.
224  if (!ExplicitUnrollAndJamCount && UP.AllowRemainder) {
225  while (UP.Count != 0 && getUnrollAndJammedLoopSize(InnerLoopSize, UP) >=
227  UP.Count--;
228  }
229 
230  // If we are explicitly unroll and jamming, we are done. Otherwise there are a
231  // number of extra performance heuristics to check.
232  if (ExplicitUnrollAndJam)
233  return true;
234 
235  // If the inner loop count is known and small, leave the entire loop nest to
236  // be the unroller
237  if (InnerTripCount && InnerLoopSize * InnerTripCount < UP.Threshold) {
238  LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; small inner loop count is "
239  "being left for the unroller\n");
240  UP.Count = 0;
241  return false;
242  }
243 
244  // Check for situations where UnJ is likely to be unprofitable. Including
245  // subloops with more than 1 block.
246  if (SubLoop->getBlocks().size() != 1) {
247  LLVM_DEBUG(
248  dbgs() << "Won't unroll-and-jam; More than one inner loop block\n");
249  UP.Count = 0;
250  return false;
251  }
252 
253  // Limit to loops where there is something to gain from unrolling and
254  // jamming the loop. In this case, look for loads that are invariant in the
255  // outer loop and can become shared.
256  unsigned NumInvariant = 0;
257  for (BasicBlock *BB : SubLoop->getBlocks()) {
258  for (Instruction &I : *BB) {
259  if (auto *Ld = dyn_cast<LoadInst>(&I)) {
260  Value *V = Ld->getPointerOperand();
261  const SCEV *LSCEV = SE.getSCEVAtScope(V, L);
262  if (SE.isLoopInvariant(LSCEV, L))
263  NumInvariant++;
264  }
265  }
266  }
267  if (NumInvariant == 0) {
268  LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; No loop invariant loads\n");
269  UP.Count = 0;
270  return false;
271  }
272 
273  return false;
274 }
275 
276 static LoopUnrollResult
278  ScalarEvolution &SE, const TargetTransformInfo &TTI,
280  OptimizationRemarkEmitter &ORE, int OptLevel) {
281  // Quick checks of the correct loop form
282  if (!L->isLoopSimplifyForm() || L->getSubLoops().size() != 1)
284  Loop *SubLoop = L->getSubLoops()[0];
285  if (!SubLoop->isLoopSimplifyForm())
287 
288  BasicBlock *Latch = L->getLoopLatch();
289  BasicBlock *Exit = L->getExitingBlock();
290  BasicBlock *SubLoopLatch = SubLoop->getLoopLatch();
291  BasicBlock *SubLoopExit = SubLoop->getExitingBlock();
292 
293  if (Latch != Exit || SubLoopLatch != SubLoopExit)
295 
297  gatherUnrollingPreferences(L, SE, TTI, nullptr, nullptr, OptLevel, None,
298  None, None, None, None, None, None, None);
299  if (AllowUnrollAndJam.getNumOccurrences() > 0)
301  if (UnrollAndJamThreshold.getNumOccurrences() > 0)
303  // Exit early if unrolling is disabled.
306 
307  LLVM_DEBUG(dbgs() << "Loop Unroll and Jam: F["
308  << L->getHeader()->getParent()->getName() << "] Loop %"
309  << L->getHeader()->getName() << "\n");
310 
312  if (EnableMode & TM_Disable)
314 
315  // A loop with any unroll pragma (enabling/disabling/count/etc) is left for
316  // the unroller, so long as it does not explicitly have unroll_and_jam
317  // metadata. This means #pragma nounroll will disable unroll and jam as well
318  // as unrolling
319  if (HasAnyUnrollPragma(L, "llvm.loop.unroll.") &&
320  !HasAnyUnrollPragma(L, "llvm.loop.unroll_and_jam.")) {
321  LLVM_DEBUG(dbgs() << " Disabled due to pragma.\n");
323  }
324 
325  if (!isSafeToUnrollAndJam(L, SE, DT, DI)) {
326  LLVM_DEBUG(dbgs() << " Disabled due to not being safe.\n");
328  }
329 
330  // Approximate the loop size and collect useful info
331  unsigned NumInlineCandidates;
332  bool NotDuplicatable;
333  bool Convergent;
335  CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
336  unsigned InnerLoopSize =
337  ApproximateLoopSize(SubLoop, NumInlineCandidates, NotDuplicatable,
338  Convergent, TTI, EphValues, UP.BEInsns);
339  unsigned OuterLoopSize =
340  ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent,
341  TTI, EphValues, UP.BEInsns);
342  LLVM_DEBUG(dbgs() << " Outer Loop Size: " << OuterLoopSize << "\n");
343  LLVM_DEBUG(dbgs() << " Inner Loop Size: " << InnerLoopSize << "\n");
344  if (NotDuplicatable) {
345  LLVM_DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable "
346  "instructions.\n");
348  }
349  if (NumInlineCandidates != 0) {
350  LLVM_DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
352  }
353  if (Convergent) {
354  LLVM_DEBUG(
355  dbgs() << " Not unrolling loop with convergent instructions.\n");
357  }
358 
359  // Save original loop IDs for after the transformation.
360  MDNode *OrigOuterLoopID = L->getLoopID();
361  MDNode *OrigSubLoopID = SubLoop->getLoopID();
362 
363  // To assign the loop id of the epilogue, assign it before unrolling it so it
364  // is applied to every inner loop of the epilogue. We later apply the loop ID
365  // for the jammed inner loop.
366  Optional<MDNode *> NewInnerEpilogueLoopID = makeFollowupLoopID(
367  OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll,
368  LLVMLoopUnrollAndJamFollowupRemainderInner});
369  if (NewInnerEpilogueLoopID.hasValue())
370  SubLoop->setLoopID(NewInnerEpilogueLoopID.getValue());
371 
372  // Find trip count and trip multiple
373  unsigned OuterTripCount = SE.getSmallConstantTripCount(L, Latch);
374  unsigned OuterTripMultiple = SE.getSmallConstantTripMultiple(L, Latch);
375  unsigned InnerTripCount = SE.getSmallConstantTripCount(SubLoop, SubLoopLatch);
376 
377  // Decide if, and by how much, to unroll
378  bool IsCountSetExplicitly = computeUnrollAndJamCount(
379  L, SubLoop, TTI, DT, LI, SE, EphValues, &ORE, OuterTripCount,
380  OuterTripMultiple, OuterLoopSize, InnerTripCount, InnerLoopSize, UP);
381  if (UP.Count <= 1)
383  // Unroll factor (Count) must be less or equal to TripCount.
384  if (OuterTripCount && UP.Count > OuterTripCount)
385  UP.Count = OuterTripCount;
386 
387  Loop *EpilogueOuterLoop = nullptr;
388  LoopUnrollResult UnrollResult = UnrollAndJamLoop(
389  L, UP.Count, OuterTripCount, OuterTripMultiple, UP.UnrollRemainder, LI,
390  &SE, &DT, &AC, &ORE, &EpilogueOuterLoop);
391 
392  // Assign new loop attributes.
393  if (EpilogueOuterLoop) {
394  Optional<MDNode *> NewOuterEpilogueLoopID = makeFollowupLoopID(
395  OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll,
396  LLVMLoopUnrollAndJamFollowupRemainderOuter});
397  if (NewOuterEpilogueLoopID.hasValue())
398  EpilogueOuterLoop->setLoopID(NewOuterEpilogueLoopID.getValue());
399  }
400 
401  Optional<MDNode *> NewInnerLoopID =
403  LLVMLoopUnrollAndJamFollowupInner});
404  if (NewInnerLoopID.hasValue())
405  SubLoop->setLoopID(NewInnerLoopID.getValue());
406  else
407  SubLoop->setLoopID(OrigSubLoopID);
408 
409  if (UnrollResult == LoopUnrollResult::PartiallyUnrolled) {
410  Optional<MDNode *> NewOuterLoopID = makeFollowupLoopID(
411  OrigOuterLoopID,
412  {LLVMLoopUnrollAndJamFollowupAll, LLVMLoopUnrollAndJamFollowupOuter});
413  if (NewOuterLoopID.hasValue()) {
414  L->setLoopID(NewOuterLoopID.getValue());
415 
416  // Do not setLoopAlreadyUnrolled if a followup was given.
417  return UnrollResult;
418  }
419  }
420 
421  // If loop has an unroll count pragma or unrolled by explicitly set count
422  // mark loop as unrolled to prevent unrolling beyond that requested.
423  if (UnrollResult != LoopUnrollResult::FullyUnrolled && IsCountSetExplicitly)
425 
426  return UnrollResult;
427 }
428 
429 namespace {
430 
431 class LoopUnrollAndJam : public LoopPass {
432 public:
433  static char ID; // Pass ID, replacement for typeid
434  unsigned OptLevel;
435 
436  LoopUnrollAndJam(int OptLevel = 2) : LoopPass(ID), OptLevel(OptLevel) {
438  }
439 
440  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
441  if (skipLoop(L))
442  return false;
443 
444  Function &F = *L->getHeader()->getParent();
445 
446  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
447  LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
448  ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
449  const TargetTransformInfo &TTI =
450  getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
451  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
452  auto &DI = getAnalysis<DependenceAnalysisWrapperPass>().getDI();
453  // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
454  // pass. Function analyses need to be preserved across loop transformations
455  // but ORE cannot be preserved (see comment before the pass definition).
457 
458  LoopUnrollResult Result =
459  tryToUnrollAndJamLoop(L, DT, LI, SE, TTI, AC, DI, ORE, OptLevel);
460 
461  if (Result == LoopUnrollResult::FullyUnrolled)
462  LPM.markLoopAsDeleted(*L);
463 
464  return Result != LoopUnrollResult::Unmodified;
465  }
466 
467  /// This transformation requires natural loop information & requires that
468  /// loop preheaders be inserted into the CFG...
469  void getAnalysisUsage(AnalysisUsage &AU) const override {
474  }
475 };
476 
477 } // end anonymous namespace
478 
479 char LoopUnrollAndJam::ID = 0;
480 
481 INITIALIZE_PASS_BEGIN(LoopUnrollAndJam, "loop-unroll-and-jam",
482  "Unroll and Jam loops", false, false)
487 INITIALIZE_PASS_END(LoopUnrollAndJam, "loop-unroll-and-jam",
488  "Unroll and Jam loops", false, false)
489 
491  return new LoopUnrollAndJam(OptLevel);
492 }
493 
496  LPMUpdater &) {
497  const auto &FAM =
498  AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
499  Function *F = L.getHeader()->getParent();
500 
501  auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F);
502  // FIXME: This should probably be optional rather than required.
503  if (!ORE)
505  "LoopUnrollAndJamPass: OptimizationRemarkEmitterAnalysis not cached at "
506  "a higher level");
507 
508  DependenceInfo DI(F, &AR.AA, &AR.SE, &AR.LI);
509 
511  &L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, DI, *ORE, OptLevel);
512 
513  if (Result == LoopUnrollResult::Unmodified)
514  return PreservedAnalyses::all();
515 
517 }
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:80
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value *> &EphValues)
Collect a loop&#39;s ephemeral values (those used only by an assume or similar intrinsics in the loop)...
Definition: CodeMetrics.cpp:70
const NoneType None
Definition: None.h:23
unsigned getSmallConstantTripCount(const Loop *L)
Returns the maximum trip count of the loop if it is a single-exit loop and we can compute a small max...
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:209
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
unsigned getSmallConstantTripMultiple(const Loop *L)
Returns the largest constant divisor of the trip count of the loop if it is a single-exit loop and we...
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:777
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:139
This class represents lattice values for constants.
Definition: AllocatorList.h:23
This header provides classes for managing a pipeline of passes over loops in LLVM IR...
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
static bool HasUnrollAndJamEnablePragma(const Loop *L)
INITIALIZE_PASS_BEGIN(LoopUnrollAndJam, "loop-unroll-and-jam", "Unroll and Jam loops", false, false) INITIALIZE_PASS_END(LoopUnrollAndJam
static cl::opt< unsigned > UnrollAndJamCount("unroll-and-jam-count", cl::Hidden, cl::desc("Use this unroll count for all loops including those with " "unroll_and_jam_count pragma values, for testing purposes"))
Legacy pass manager pass to access dependence information.
LLVM_NODISCARD bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:270
The main scalar evolution driver.
This file contains the declarations for metadata subclasses.
An immutable pass that tracks lazily created AssumptionCache objects.
static const char *const LLVMLoopUnrollAndJamFollowupRemainderInner
A cache of @llvm.assume calls within a function.
static const char *const LLVMLoopUnrollAndJamFollowupOuter
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
Metadata node.
Definition: Metadata.h:863
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
F(f)
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1068
DependenceInfo - This class is the main dependence-analysis driver.
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
static cl::opt< bool > AllowUnrollAndJam("allow-unroll-and-jam", cl::Hidden, cl::desc("Allows loops to be unroll-and-jammed."))
static LoopUnrollResult tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache &AC, DependenceInfo &DI, OptimizationRemarkEmitter &ORE, int OptLevel)
TransformationMode hasUnrollAndJamTransformation(Loop *L)
Definition: LoopUtils.cpp:412
TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel, Optional< unsigned > UserThreshold, Optional< unsigned > UserCount, Optional< bool > UserAllowPartial, Optional< bool > UserRuntime, Optional< bool > UserUpperBound, Optional< bool > UserAllowPeeling, Optional< bool > UserAllowProfileBasedPeeling, Optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
static const char *const LLVMLoopUnrollAndJamFollowupInner
BlockT * getHeader() const
Definition: LoopInfo.h:105
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:513
const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:255
bool isSafeToUnrollAndJam(Loop *L, ScalarEvolution &SE, DominatorTree &DT, DependenceInfo &DI)
This header provides classes for managing per-loop analyses.
bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const SmallPtrSetImpl< const Value *> &EphValues, OptimizationRemarkEmitter *ORE, unsigned &TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned &TripMultiple, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
bool AllowRemainder
Allow generation of a loop remainder (extra iterations after unroll).
Pass * createLoopUnrollAndJamPass(int OptLevel=2)
StringRef getString() const
Definition: Metadata.cpp:463
loop unroll and Unroll and Jam loops
The loop was fully unrolled into straight-line code.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:154
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
void setLoopAlreadyUnrolled()
Add llvm.loop.unroll.disable to this loop&#39;s loop id metadata.
Definition: LoopInfo.cpp:525
static bool computeUnrollAndJamCount(Loop *L, Loop *SubLoop, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const SmallPtrSetImpl< const Value *> &EphValues, OptimizationRemarkEmitter *ORE, unsigned OuterTripCount, unsigned OuterTripMultiple, unsigned OuterLoopSize, unsigned InnerTripCount, unsigned InnerLoopSize, TargetTransformInfo::UnrollingPreferences &UP)
const SCEV * getSCEVAtScope(const SCEV *S, const Loop *L)
Return a SCEV expression for the specified value at the specified scope in the program.
Represent the analysis usage information of a pass.
constexpr double e
Definition: MathExtras.h:57
static unsigned UnrollAndJamCountPragmaValue(const Loop *L)
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
static MDNode * GetUnrollMetadataForLoop(const Loop *L, StringRef Name)
The loop was partially unrolled – we still have a loop, but with a smaller trip count.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:160
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
static uint64_t getUnrollAndJammedLoopSize(unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP)
The transformation should not be applied.
Definition: LoopUtils.h:231
unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value *> &EphValues, unsigned BEInsns)
ApproximateLoopSize - Approximate the size of the loop.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:1161
void markLoopAsDeleted(Loop &L)
Definition: LoopPass.cpp:143
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
Optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
Definition: LoopUtils.cpp:302
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
Definition: LoopUnroll.cpp:962
loop unroll and jam
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
Definition: LoopInfo.h:136
static const char *const LLVMLoopUnrollAndJamFollowupAll
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
bool hasValue() const
Definition: Optional.h:259
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to...
Definition: LoopInfo.cpp:464
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:489
unsigned Threshold
The cost threshold for the unrolled loop.
This class represents an analyzed expression in the program.
static cl::opt< unsigned > UnrollAndJamThreshold("unroll-and-jam-threshold", cl::init(60), cl::Hidden, cl::desc("Threshold to use for inner loop when doing unroll and jam."))
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:509
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:154
static cl::opt< unsigned > PragmaUnrollAndJamThreshold("pragma-unroll-and-jam-threshold", cl::init(1024), cl::Hidden, cl::desc("Unrolled size limit for loops with an unroll_and_jam(full) or " "unroll_count pragma."))
Parameters that control the generic loop unrolling transformation.
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
#define I(x, y, z)
Definition: MD5.cpp:58
static const char *const LLVMLoopUnrollAndJamFollowupRemainderOuter
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass&#39;s AnalysisUsage.
Definition: LoopUtils.cpp:138
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
TransformationMode
The mode sets how eager a transformation should be applied.
Definition: LoopUtils.h:222
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:74
The loop was not modified.
void initializeLoopUnrollAndJamPass(PassRegistry &)
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
A single uniqued string.
Definition: Metadata.h:603
A container for analyses that lazily runs them and caches their results.
static bool HasAnyUnrollPragma(const Loop *L, StringRef Prefix)
This pass exposes codegen information to IR-level passes.
This header defines various interfaces for pass management in LLVM.
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1074
#define LLVM_DEBUG(X)
Definition: Debug.h:122
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:49
The optimization diagnostic interface.
LoopUnrollResult
Represents the result of a UnrollLoop invocation.
Definition: UnrollLoop.h:53
LoopUnrollResult UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount, unsigned TripMultiple, bool UnrollRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, OptimizationRemarkEmitter *ORE, Loop **EpilogueLoop=nullptr)