LLVM  4.0.0
LoopLoadElimination.cpp
Go to the documentation of this file.
1 //===- LoopLoadElimination.cpp - Loop Load Elimination Pass ---------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implement a loop-aware load elimination pass.
11 //
12 // It uses LoopAccessAnalysis to identify loop-carried dependences with a
13 // distance of one between stores and loads. These form the candidates for the
14 // transformation. The source value of each store then propagated to the user
15 // of the corresponding load. This makes the load dead.
16 //
17 // The pass can also version the loop and add memchecks in order to prove that
18 // may-aliasing stores can't change the value in memory before it's read by the
19 // load.
20 //
21 //===----------------------------------------------------------------------===//
22 
23 #include "llvm/ADT/APInt.h"
24 #include "llvm/ADT/DenseMap.h"
26 #include "llvm/ADT/SmallSet.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/ADT/Statistic.h"
29 #include "llvm/ADT/STLExtras.h"
32 #include "llvm/Analysis/LoopInfo.h"
36 #include "llvm/IR/DataLayout.h"
37 #include "llvm/IR/Dominators.h"
38 #include "llvm/IR/Instructions.h"
39 #include "llvm/IR/Module.h"
40 #include "llvm/IR/Type.h"
41 #include "llvm/IR/Value.h"
42 #include "llvm/Pass.h"
43 #include "llvm/Support/Casting.h"
45 #include "llvm/Support/Debug.h"
46 #include "llvm/Transforms/Scalar.h"
48 #include <forward_list>
49 #include <cassert>
50 #include <algorithm>
51 #include <set>
52 #include <tuple>
53 #include <utility>
54 
55 #define LLE_OPTION "loop-load-elim"
56 #define DEBUG_TYPE LLE_OPTION
57 
58 using namespace llvm;
59 
61  "runtime-check-per-loop-load-elim", cl::Hidden,
62  cl::desc("Max number of memchecks allowed per eliminated load on average"),
63  cl::init(1));
64 
66  "loop-load-elimination-scev-check-threshold", cl::init(8), cl::Hidden,
67  cl::desc("The maximum number of SCEV checks allowed for Loop "
68  "Load Elimination"));
69 
70 STATISTIC(NumLoopLoadEliminted, "Number of loads eliminated by LLE");
71 
72 namespace {
73 
74 /// \brief Represent a store-to-forwarding candidate.
75 struct StoreToLoadForwardingCandidate {
76  LoadInst *Load;
78 
79  StoreToLoadForwardingCandidate(LoadInst *Load, StoreInst *Store)
80  : Load(Load), Store(Store) {}
81 
82  /// \brief Return true if the dependence from the store to the load has a
83  /// distance of one. E.g. A[i+1] = A[i]
84  bool isDependenceDistanceOfOne(PredicatedScalarEvolution &PSE,
85  Loop *L) const {
86  Value *LoadPtr = Load->getPointerOperand();
87  Value *StorePtr = Store->getPointerOperand();
88  Type *LoadPtrType = LoadPtr->getType();
89  Type *LoadType = LoadPtrType->getPointerElementType();
90 
91  assert(LoadPtrType->getPointerAddressSpace() ==
92  StorePtr->getType()->getPointerAddressSpace() &&
93  LoadType == StorePtr->getType()->getPointerElementType() &&
94  "Should be a known dependence");
95 
96  // Currently we only support accesses with unit stride. FIXME: we should be
97  // able to handle non unit stirde as well as long as the stride is equal to
98  // the dependence distance.
99  if (getPtrStride(PSE, LoadPtr, L) != 1 ||
100  getPtrStride(PSE, StorePtr, L) != 1)
101  return false;
102 
103  auto &DL = Load->getParent()->getModule()->getDataLayout();
104  unsigned TypeByteSize = DL.getTypeAllocSize(const_cast<Type *>(LoadType));
105 
106  auto *LoadPtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(LoadPtr));
107  auto *StorePtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(StorePtr));
108 
109  // We don't need to check non-wrapping here because forward/backward
110  // dependence wouldn't be valid if these weren't monotonic accesses.
111  auto *Dist = cast<SCEVConstant>(
112  PSE.getSE()->getMinusSCEV(StorePtrSCEV, LoadPtrSCEV));
113  const APInt &Val = Dist->getAPInt();
114  return Val == TypeByteSize;
115  }
116 
117  Value *getLoadPtr() const { return Load->getPointerOperand(); }
118 
119 #ifndef NDEBUG
120  friend raw_ostream &operator<<(raw_ostream &OS,
121  const StoreToLoadForwardingCandidate &Cand) {
122  OS << *Cand.Store << " -->\n";
123  OS.indent(2) << *Cand.Load << "\n";
124  return OS;
125  }
126 #endif
127 };
128 
129 /// \brief Check if the store dominates all latches, so as long as there is no
130 /// intervening store this value will be loaded in the next iteration.
131 bool doesStoreDominatesAllLatches(BasicBlock *StoreBlock, Loop *L,
132  DominatorTree *DT) {
134  L->getLoopLatches(Latches);
135  return llvm::all_of(Latches, [&](const BasicBlock *Latch) {
136  return DT->dominates(StoreBlock, Latch);
137  });
138 }
139 
140 /// \brief Return true if the load is not executed on all paths in the loop.
141 static bool isLoadConditional(LoadInst *Load, Loop *L) {
142  return Load->getParent() != L->getHeader();
143 }
144 
145 /// \brief The per-loop class that does most of the work.
146 class LoadEliminationForLoop {
147 public:
148  LoadEliminationForLoop(Loop *L, LoopInfo *LI, const LoopAccessInfo &LAI,
149  DominatorTree *DT)
150  : L(L), LI(LI), LAI(LAI), DT(DT), PSE(LAI.getPSE()) {}
151 
152  /// \brief Look through the loop-carried and loop-independent dependences in
153  /// this loop and find store->load dependences.
154  ///
155  /// Note that no candidate is returned if LAA has failed to analyze the loop
156  /// (e.g. if it's not bottom-tested, contains volatile memops, etc.)
157  std::forward_list<StoreToLoadForwardingCandidate>
158  findStoreToLoadDependences(const LoopAccessInfo &LAI) {
159  std::forward_list<StoreToLoadForwardingCandidate> Candidates;
160 
161  const auto *Deps = LAI.getDepChecker().getDependences();
162  if (!Deps)
163  return Candidates;
164 
165  // Find store->load dependences (consequently true dep). Both lexically
166  // forward and backward dependences qualify. Disqualify loads that have
167  // other unknown dependences.
168 
169  SmallSet<Instruction *, 4> LoadsWithUnknownDepedence;
170 
171  for (const auto &Dep : *Deps) {
172  Instruction *Source = Dep.getSource(LAI);
173  Instruction *Destination = Dep.getDestination(LAI);
174 
175  if (Dep.Type == MemoryDepChecker::Dependence::Unknown) {
176  if (isa<LoadInst>(Source))
177  LoadsWithUnknownDepedence.insert(Source);
178  if (isa<LoadInst>(Destination))
179  LoadsWithUnknownDepedence.insert(Destination);
180  continue;
181  }
182 
183  if (Dep.isBackward())
184  // Note that the designations source and destination follow the program
185  // order, i.e. source is always first. (The direction is given by the
186  // DepType.)
187  std::swap(Source, Destination);
188  else
189  assert(Dep.isForward() && "Needs to be a forward dependence");
190 
191  auto *Store = dyn_cast<StoreInst>(Source);
192  if (!Store)
193  continue;
194  auto *Load = dyn_cast<LoadInst>(Destination);
195  if (!Load)
196  continue;
197 
198  // Only progagate the value if they are of the same type.
199  if (Store->getPointerOperand()->getType() !=
200  Load->getPointerOperand()->getType())
201  continue;
202 
203  Candidates.emplace_front(Load, Store);
204  }
205 
206  if (!LoadsWithUnknownDepedence.empty())
207  Candidates.remove_if([&](const StoreToLoadForwardingCandidate &C) {
208  return LoadsWithUnknownDepedence.count(C.Load);
209  });
210 
211  return Candidates;
212  }
213 
214  /// \brief Return the index of the instruction according to program order.
215  unsigned getInstrIndex(Instruction *Inst) {
216  auto I = InstOrder.find(Inst);
217  assert(I != InstOrder.end() && "No index for instruction");
218  return I->second;
219  }
220 
221  /// \brief If a load has multiple candidates associated (i.e. different
222  /// stores), it means that it could be forwarding from multiple stores
223  /// depending on control flow. Remove these candidates.
224  ///
225  /// Here, we rely on LAA to include the relevant loop-independent dependences.
226  /// LAA is known to omit these in the very simple case when the read and the
227  /// write within an alias set always takes place using the *same* pointer.
228  ///
229  /// However, we know that this is not the case here, i.e. we can rely on LAA
230  /// to provide us with loop-independent dependences for the cases we're
231  /// interested. Consider the case for example where a loop-independent
232  /// dependece S1->S2 invalidates the forwarding S3->S2.
233  ///
234  /// A[i] = ... (S1)
235  /// ... = A[i] (S2)
236  /// A[i+1] = ... (S3)
237  ///
238  /// LAA will perform dependence analysis here because there are two
239  /// *different* pointers involved in the same alias set (&A[i] and &A[i+1]).
240  void removeDependencesFromMultipleStores(
241  std::forward_list<StoreToLoadForwardingCandidate> &Candidates) {
242  // If Store is nullptr it means that we have multiple stores forwarding to
243  // this store.
245  LoadToSingleCandT;
246  LoadToSingleCandT LoadToSingleCand;
247 
248  for (const auto &Cand : Candidates) {
249  bool NewElt;
250  LoadToSingleCandT::iterator Iter;
251 
252  std::tie(Iter, NewElt) =
253  LoadToSingleCand.insert(std::make_pair(Cand.Load, &Cand));
254  if (!NewElt) {
255  const StoreToLoadForwardingCandidate *&OtherCand = Iter->second;
256  // Already multiple stores forward to this load.
257  if (OtherCand == nullptr)
258  continue;
259 
260  // Handle the very basic case when the two stores are in the same block
261  // so deciding which one forwards is easy. The later one forwards as
262  // long as they both have a dependence distance of one to the load.
263  if (Cand.Store->getParent() == OtherCand->Store->getParent() &&
264  Cand.isDependenceDistanceOfOne(PSE, L) &&
265  OtherCand->isDependenceDistanceOfOne(PSE, L)) {
266  // They are in the same block, the later one will forward to the load.
267  if (getInstrIndex(OtherCand->Store) < getInstrIndex(Cand.Store))
268  OtherCand = &Cand;
269  } else
270  OtherCand = nullptr;
271  }
272  }
273 
274  Candidates.remove_if([&](const StoreToLoadForwardingCandidate &Cand) {
275  if (LoadToSingleCand[Cand.Load] != &Cand) {
276  DEBUG(dbgs() << "Removing from candidates: \n" << Cand
277  << " The load may have multiple stores forwarding to "
278  << "it\n");
279  return true;
280  }
281  return false;
282  });
283  }
284 
285  /// \brief Given two pointers operations by their RuntimePointerChecking
286  /// indices, return true if they require an alias check.
287  ///
288  /// We need a check if one is a pointer for a candidate load and the other is
289  /// a pointer for a possibly intervening store.
290  bool needsChecking(unsigned PtrIdx1, unsigned PtrIdx2,
291  const SmallSet<Value *, 4> &PtrsWrittenOnFwdingPath,
292  const std::set<Value *> &CandLoadPtrs) {
293  Value *Ptr1 =
295  Value *Ptr2 =
297  return ((PtrsWrittenOnFwdingPath.count(Ptr1) && CandLoadPtrs.count(Ptr2)) ||
298  (PtrsWrittenOnFwdingPath.count(Ptr2) && CandLoadPtrs.count(Ptr1)));
299  }
300 
301  /// \brief Return pointers that are possibly written to on the path from a
302  /// forwarding store to a load.
303  ///
304  /// These pointers need to be alias-checked against the forwarding candidates.
305  SmallSet<Value *, 4> findPointersWrittenOnForwardingPath(
307  // From FirstStore to LastLoad neither of the elimination candidate loads
308  // should overlap with any of the stores.
309  //
310  // E.g.:
311  //
312  // st1 C[i]
313  // ld1 B[i] <-------,
314  // ld0 A[i] <----, | * LastLoad
315  // ... | |
316  // st2 E[i] | |
317  // st3 B[i+1] -- | -' * FirstStore
318  // st0 A[i+1] ---'
319  // st4 D[i]
320  //
321  // st0 forwards to ld0 if the accesses in st4 and st1 don't overlap with
322  // ld0.
323 
324  LoadInst *LastLoad =
325  std::max_element(Candidates.begin(), Candidates.end(),
326  [&](const StoreToLoadForwardingCandidate &A,
327  const StoreToLoadForwardingCandidate &B) {
328  return getInstrIndex(A.Load) < getInstrIndex(B.Load);
329  })
330  ->Load;
331  StoreInst *FirstStore =
332  std::min_element(Candidates.begin(), Candidates.end(),
333  [&](const StoreToLoadForwardingCandidate &A,
334  const StoreToLoadForwardingCandidate &B) {
335  return getInstrIndex(A.Store) <
336  getInstrIndex(B.Store);
337  })
338  ->Store;
339 
340  // We're looking for stores after the first forwarding store until the end
341  // of the loop, then from the beginning of the loop until the last
342  // forwarded-to load. Collect the pointer for the stores.
343  SmallSet<Value *, 4> PtrsWrittenOnFwdingPath;
344 
345  auto InsertStorePtr = [&](Instruction *I) {
346  if (auto *S = dyn_cast<StoreInst>(I))
347  PtrsWrittenOnFwdingPath.insert(S->getPointerOperand());
348  };
349  const auto &MemInstrs = LAI.getDepChecker().getMemoryInstructions();
350  std::for_each(MemInstrs.begin() + getInstrIndex(FirstStore) + 1,
351  MemInstrs.end(), InsertStorePtr);
352  std::for_each(MemInstrs.begin(), &MemInstrs[getInstrIndex(LastLoad)],
353  InsertStorePtr);
354 
355  return PtrsWrittenOnFwdingPath;
356  }
357 
358  /// \brief Determine the pointer alias checks to prove that there are no
359  /// intervening stores.
362 
363  SmallSet<Value *, 4> PtrsWrittenOnFwdingPath =
364  findPointersWrittenOnForwardingPath(Candidates);
365 
366  // Collect the pointers of the candidate loads.
367  // FIXME: SmallSet does not work with std::inserter.
368  std::set<Value *> CandLoadPtrs;
369  transform(Candidates,
370  std::inserter(CandLoadPtrs, CandLoadPtrs.begin()),
371  std::mem_fn(&StoreToLoadForwardingCandidate::getLoadPtr));
372 
373  const auto &AllChecks = LAI.getRuntimePointerChecking()->getChecks();
375 
376  std::copy_if(AllChecks.begin(), AllChecks.end(), std::back_inserter(Checks),
378  for (auto PtrIdx1 : Check.first->Members)
379  for (auto PtrIdx2 : Check.second->Members)
380  if (needsChecking(PtrIdx1, PtrIdx2,
381  PtrsWrittenOnFwdingPath, CandLoadPtrs))
382  return true;
383  return false;
384  });
385 
386  DEBUG(dbgs() << "\nPointer Checks (count: " << Checks.size() << "):\n");
387  DEBUG(LAI.getRuntimePointerChecking()->printChecks(dbgs(), Checks));
388 
389  return Checks;
390  }
391 
392  /// \brief Perform the transformation for a candidate.
393  void
394  propagateStoredValueToLoadUsers(const StoreToLoadForwardingCandidate &Cand,
395  SCEVExpander &SEE) {
396  //
397  // loop:
398  // %x = load %gep_i
399  // = ... %x
400  // store %y, %gep_i_plus_1
401  //
402  // =>
403  //
404  // ph:
405  // %x.initial = load %gep_0
406  // loop:
407  // %x.storeforward = phi [%x.initial, %ph] [%y, %loop]
408  // %x = load %gep_i <---- now dead
409  // = ... %x.storeforward
410  // store %y, %gep_i_plus_1
411 
412  Value *Ptr = Cand.Load->getPointerOperand();
413  auto *PtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(Ptr));
414  auto *PH = L->getLoopPreheader();
415  Value *InitialPtr = SEE.expandCodeFor(PtrSCEV->getStart(), Ptr->getType(),
416  PH->getTerminator());
417  Value *Initial =
418  new LoadInst(InitialPtr, "load_initial", /* isVolatile */ false,
419  Cand.Load->getAlignment(), PH->getTerminator());
420 
421  PHINode *PHI = PHINode::Create(Initial->getType(), 2, "store_forwarded",
422  &L->getHeader()->front());
423  PHI->addIncoming(Initial, PH);
424  PHI->addIncoming(Cand.Store->getOperand(0), L->getLoopLatch());
425 
426  Cand.Load->replaceAllUsesWith(PHI);
427  }
428 
429  /// \brief Top-level driver for each loop: find store->load forwarding
430  /// candidates, add run-time checks and perform transformation.
431  bool processLoop() {
432  DEBUG(dbgs() << "\nIn \"" << L->getHeader()->getParent()->getName()
433  << "\" checking " << *L << "\n");
434  // Look for store-to-load forwarding cases across the
435  // backedge. E.g.:
436  //
437  // loop:
438  // %x = load %gep_i
439  // = ... %x
440  // store %y, %gep_i_plus_1
441  //
442  // =>
443  //
444  // ph:
445  // %x.initial = load %gep_0
446  // loop:
447  // %x.storeforward = phi [%x.initial, %ph] [%y, %loop]
448  // %x = load %gep_i <---- now dead
449  // = ... %x.storeforward
450  // store %y, %gep_i_plus_1
451 
452  // First start with store->load dependences.
453  auto StoreToLoadDependences = findStoreToLoadDependences(LAI);
454  if (StoreToLoadDependences.empty())
455  return false;
456 
457  // Generate an index for each load and store according to the original
458  // program order. This will be used later.
459  InstOrder = LAI.getDepChecker().generateInstructionOrderMap();
460 
461  // To keep things simple for now, remove those where the load is potentially
462  // fed by multiple stores.
463  removeDependencesFromMultipleStores(StoreToLoadDependences);
464  if (StoreToLoadDependences.empty())
465  return false;
466 
467  // Filter the candidates further.
469  unsigned NumForwarding = 0;
470  for (const StoreToLoadForwardingCandidate Cand : StoreToLoadDependences) {
471  DEBUG(dbgs() << "Candidate " << Cand);
472 
473  // Make sure that the stored values is available everywhere in the loop in
474  // the next iteration.
475  if (!doesStoreDominatesAllLatches(Cand.Store->getParent(), L, DT))
476  continue;
477 
478  // If the load is conditional we can't hoist its 0-iteration instance to
479  // the preheader because that would make it unconditional. Thus we would
480  // access a memory location that the original loop did not access.
481  if (isLoadConditional(Cand.Load, L))
482  continue;
483 
484  // Check whether the SCEV difference is the same as the induction step,
485  // thus we load the value in the next iteration.
486  if (!Cand.isDependenceDistanceOfOne(PSE, L))
487  continue;
488 
489  ++NumForwarding;
490  DEBUG(dbgs()
491  << NumForwarding
492  << ". Valid store-to-load forwarding across the loop backedge\n");
493  Candidates.push_back(Cand);
494  }
495  if (Candidates.empty())
496  return false;
497 
498  // Check intervening may-alias stores. These need runtime checks for alias
499  // disambiguation.
501  collectMemchecks(Candidates);
502 
503  // Too many checks are likely to outweigh the benefits of forwarding.
504  if (Checks.size() > Candidates.size() * CheckPerElim) {
505  DEBUG(dbgs() << "Too many run-time checks needed.\n");
506  return false;
507  }
508 
509  if (LAI.getPSE().getUnionPredicate().getComplexity() >
511  DEBUG(dbgs() << "Too many SCEV run-time checks needed.\n");
512  return false;
513  }
514 
515  if (!Checks.empty() || !LAI.getPSE().getUnionPredicate().isAlwaysTrue()) {
516  if (L->getHeader()->getParent()->optForSize()) {
517  DEBUG(dbgs() << "Versioning is needed but not allowed when optimizing "
518  "for size.\n");
519  return false;
520  }
521 
522  if (!L->isLoopSimplifyForm()) {
523  DEBUG(dbgs() << "Loop is not is loop-simplify form");
524  return false;
525  }
526 
527  // Point of no-return, start the transformation. First, version the loop
528  // if necessary.
529 
530  LoopVersioning LV(LAI, L, LI, DT, PSE.getSE(), false);
531  LV.setAliasChecks(std::move(Checks));
532  LV.setSCEVChecks(LAI.getPSE().getUnionPredicate());
533  LV.versionLoop();
534  }
535 
536  // Next, propagate the value stored by the store to the users of the load.
537  // Also for the first iteration, generate the initial value of the load.
538  SCEVExpander SEE(*PSE.getSE(), L->getHeader()->getModule()->getDataLayout(),
539  "storeforward");
540  for (const auto &Cand : Candidates)
541  propagateStoredValueToLoadUsers(Cand, SEE);
542  NumLoopLoadEliminted += NumForwarding;
543 
544  return true;
545  }
546 
547 private:
548  Loop *L;
549 
550  /// \brief Maps the load/store instructions to their index according to
551  /// program order.
553 
554  // Analyses used.
555  LoopInfo *LI;
556  const LoopAccessInfo &LAI;
557  DominatorTree *DT;
559 };
560 
561 /// \brief The pass. Most of the work is delegated to the per-loop
562 /// LoadEliminationForLoop class.
563 class LoopLoadElimination : public FunctionPass {
564 public:
565  LoopLoadElimination() : FunctionPass(ID) {
566  initializeLoopLoadEliminationPass(*PassRegistry::getPassRegistry());
567  }
568 
569  bool runOnFunction(Function &F) override {
570  if (skipFunction(F))
571  return false;
572 
573  auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
574  auto *LAA = &getAnalysis<LoopAccessLegacyAnalysis>();
575  auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
576 
577  // Build up a worklist of inner-loops to vectorize. This is necessary as the
578  // act of distributing a loop creates new loops and can invalidate iterators
579  // across the loops.
580  SmallVector<Loop *, 8> Worklist;
581 
582  for (Loop *TopLevelLoop : *LI)
583  for (Loop *L : depth_first(TopLevelLoop))
584  // We only handle inner-most loops.
585  if (L->empty())
586  Worklist.push_back(L);
587 
588  // Now walk the identified inner loops.
589  bool Changed = false;
590  for (Loop *L : Worklist) {
591  const LoopAccessInfo &LAI = LAA->getInfo(L);
592  // The actual work is performed by LoadEliminationForLoop.
593  LoadEliminationForLoop LEL(L, LI, LAI, DT);
594  Changed |= LEL.processLoop();
595  }
596 
597  // Process each loop nest in the function.
598  return Changed;
599  }
600 
601  void getAnalysisUsage(AnalysisUsage &AU) const override {
610  }
611 
612  static char ID;
613 };
614 
615 } // end anonymous namespace
616 
618 static const char LLE_name[] = "Loop Load Elimination";
619 
620 INITIALIZE_PASS_BEGIN(LoopLoadElimination, LLE_OPTION, LLE_name, false, false)
625 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
626 INITIALIZE_PASS_END(LoopLoadElimination, LLE_OPTION, LLE_name, false, false)
627 
628 namespace llvm {
629 
631  return new LoopLoadElimination();
632 }
633 
634 } // end namespace llvm
Legacy wrapper pass to provide the GlobalsAAResult object.
MachineLoop * L
static bool Check(DecodeStatus &Out, DecodeStatus In)
TrackingVH< Value > PointerValue
Holds the pointer value that we need to check.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
STATISTIC(NumFunctions,"Total number of functions")
This is the interface for a simple mod/ref and alias analysis over globals.
const SmallVectorImpl< Instruction * > & getMemoryInstructions() const
The vector of memory access instructions.
void getLoopLatches(SmallVectorImpl< BlockT * > &LoopLatches) const
Return all loop latch blocks of this loop.
Definition: LoopInfo.h:250
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:84
int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of its element size.
raw_ostream & indent(unsigned NumSpaces)
indent - Insert 'NumSpaces' spaces.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:736
An instruction for reading from memory.
Definition: Instructions.h:164
BlockT * getHeader() const
Definition: LoopInfo.h:102
Type * getPointerElementType() const
Definition: Type.h:358
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:157
void printChecks(raw_ostream &OS, const SmallVectorImpl< PointerCheck > &Checks, unsigned Depth=0) const
Print Checks.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:172
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:53
LLVM_NODISCARD bool empty() const
Definition: SmallSet.h:55
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:32
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
static const char LLE_name[]
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to...
Definition: LoopInfo.cpp:190
This file implements a class to represent arbitrary precision integral constant values and operations...
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:60
#define F(x, y, z)
Definition: MD5.cpp:51
const RuntimePointerChecking * getRuntimePointerChecking() const
Function Alias Analysis false
void initializeLoopLoadEliminationPass(PassRegistry &)
static GCRegistry::Add< OcamlGC > B("ocaml","ocaml 3.10-compatible GC")
An instruction for storing to memory.
Definition: Instructions.h:300
#define LLE_OPTION
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:96
const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:395
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:109
LLVM Basic Block Representation.
Definition: BasicBlock.h:51
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
This analysis provides dependence information for the memory accesses of a loop.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:115
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:36
const PointerInfo & getPointerInfo(unsigned PtrIdx) const
Return PointerInfo for pointer at index PtrIdx.
Represent the analysis usage information of a pass.
const SmallVector< PointerCheck, 4 > & getChecks() const
Returns the checks that generateChecks created.
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,"Assign register bank of generic virtual registers", false, false) RegBankSelect
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
Value * getPointerOperand()
Definition: Instructions.h:270
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
Definition: Pass.cpp:149
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:80
void setAliasChecks(SmallVector< RuntimePointerChecking::PointerCheck, 4 > Checks)
Sets the runtime alias checks for versioning the loop.
std::pair< const CheckingPtrGroup *, const CheckingPtrGroup * > PointerCheck
A memcheck which made up of a pair of grouped pointers.
char & LoopSimplifyID
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:218
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:64
bool isAlwaysTrue() const override
Implementation of the SCEVPredicate interface.
AnalysisUsage & addRequiredID(const void *ID)
Definition: Pass.cpp:289
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
Module.h This file contains the declarations for the Module class.
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:230
FunctionPass * createLoopLoadEliminationPass()
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:50
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
Drive the analysis of memory accesses in the loop.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
static GCRegistry::Add< ShadowStackGC > C("shadow-stack","Very portable GC for uncooperative code generators")
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:586
Class for arbitrary precision integers.
Definition: APInt.h:77
#define SEE(c)
Definition: regcomp.c:126
This class uses information about analyze scalars to rewrite expressions in canonical form...
unsigned getComplexity() const override
We estimate the complexity of a union predicate as the size number of predicates in the union...
const SCEVUnionPredicate & getUnionPredicate() const
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:119
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:368
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
#define I(x, y, z)
Definition: MD5.cpp:54
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:135
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:287
DenseMap< Instruction *, unsigned > generateInstructionOrderMap() const
Generate a mapping between the memory instructions and their indices according to program order...
raw_ostream & operator<<(raw_ostream &OS, const APInt &I)
Definition: APInt.h:1726
OutputIt transform(R &&Range, OutputIt d_first, UnaryPredicate P)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere...
Definition: STLExtras.h:807
iterator_range< df_iterator< T > > depth_first(const T &G)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static cl::opt< unsigned > CheckPerElim("runtime-check-per-loop-load-elim", cl::Hidden, cl::desc("Max number of memchecks allowed per eliminated load on average"), cl::init(1))
LLVM Value Representation.
Definition: Value.h:71
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass...
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:44
bool empty() const
Definition: LoopInfo.h:136
#define DEBUG(X)
Definition: Debug.h:100
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:831
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:217
int * Ptr
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:479
static GCRegistry::Add< ErlangGC > A("erlang","erlang-compatible garbage collector")
const BasicBlock * getParent() const
Definition: Instruction.h:62
static cl::opt< unsigned > LoadElimSCEVCheckThreshold("loop-load-elimination-scev-check-threshold", cl::init(8), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed for Loop ""Load Elimination"))