LLVM  6.0.0svn
LoopLoadElimination.cpp
Go to the documentation of this file.
1 //===- LoopLoadElimination.cpp - Loop Load Elimination Pass ---------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implement a loop-aware load elimination pass.
11 //
12 // It uses LoopAccessAnalysis to identify loop-carried dependences with a
13 // distance of one between stores and loads. These form the candidates for the
14 // transformation. The source value of each store then propagated to the user
15 // of the corresponding load. This makes the load dead.
16 //
17 // The pass can also version the loop and add memchecks in order to prove that
18 // may-aliasing stores can't change the value in memory before it's read by the
19 // load.
20 //
21 //===----------------------------------------------------------------------===//
22 
24 #include "llvm/ADT/APInt.h"
25 #include "llvm/ADT/DenseMap.h"
27 #include "llvm/ADT/STLExtras.h"
28 #include "llvm/ADT/SmallSet.h"
29 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/ADT/Statistic.h"
36 #include "llvm/Analysis/LoopInfo.h"
42 #include "llvm/IR/DataLayout.h"
43 #include "llvm/IR/Dominators.h"
44 #include "llvm/IR/Instructions.h"
45 #include "llvm/IR/Module.h"
46 #include "llvm/IR/PassManager.h"
47 #include "llvm/IR/Type.h"
48 #include "llvm/IR/Value.h"
49 #include "llvm/Pass.h"
50 #include "llvm/Support/Casting.h"
52 #include "llvm/Support/Debug.h"
54 #include "llvm/Transforms/Scalar.h"
56 #include <algorithm>
57 #include <cassert>
58 #include <forward_list>
59 #include <set>
60 #include <tuple>
61 #include <utility>
62 
63 using namespace llvm;
64 
65 #define LLE_OPTION "loop-load-elim"
66 #define DEBUG_TYPE LLE_OPTION
67 
69  "runtime-check-per-loop-load-elim", cl::Hidden,
70  cl::desc("Max number of memchecks allowed per eliminated load on average"),
71  cl::init(1));
72 
74  "loop-load-elimination-scev-check-threshold", cl::init(8), cl::Hidden,
75  cl::desc("The maximum number of SCEV checks allowed for Loop "
76  "Load Elimination"));
77 
78 STATISTIC(NumLoopLoadEliminted, "Number of loads eliminated by LLE");
79 
80 namespace {
81 
82 /// \brief Represent a store-to-forwarding candidate.
83 struct StoreToLoadForwardingCandidate {
84  LoadInst *Load;
86 
87  StoreToLoadForwardingCandidate(LoadInst *Load, StoreInst *Store)
88  : Load(Load), Store(Store) {}
89 
90  /// \brief Return true if the dependence from the store to the load has a
91  /// distance of one. E.g. A[i+1] = A[i]
92  bool isDependenceDistanceOfOne(PredicatedScalarEvolution &PSE,
93  Loop *L) const {
94  Value *LoadPtr = Load->getPointerOperand();
95  Value *StorePtr = Store->getPointerOperand();
96  Type *LoadPtrType = LoadPtr->getType();
97  Type *LoadType = LoadPtrType->getPointerElementType();
98 
99  assert(LoadPtrType->getPointerAddressSpace() ==
100  StorePtr->getType()->getPointerAddressSpace() &&
101  LoadType == StorePtr->getType()->getPointerElementType() &&
102  "Should be a known dependence");
103 
104  // Currently we only support accesses with unit stride. FIXME: we should be
105  // able to handle non unit stirde as well as long as the stride is equal to
106  // the dependence distance.
107  if (getPtrStride(PSE, LoadPtr, L) != 1 ||
108  getPtrStride(PSE, StorePtr, L) != 1)
109  return false;
110 
111  auto &DL = Load->getParent()->getModule()->getDataLayout();
112  unsigned TypeByteSize = DL.getTypeAllocSize(const_cast<Type *>(LoadType));
113 
114  auto *LoadPtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(LoadPtr));
115  auto *StorePtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(StorePtr));
116 
117  // We don't need to check non-wrapping here because forward/backward
118  // dependence wouldn't be valid if these weren't monotonic accesses.
119  auto *Dist = cast<SCEVConstant>(
120  PSE.getSE()->getMinusSCEV(StorePtrSCEV, LoadPtrSCEV));
121  const APInt &Val = Dist->getAPInt();
122  return Val == TypeByteSize;
123  }
124 
125  Value *getLoadPtr() const { return Load->getPointerOperand(); }
126 
127 #ifndef NDEBUG
128  friend raw_ostream &operator<<(raw_ostream &OS,
129  const StoreToLoadForwardingCandidate &Cand) {
130  OS << *Cand.Store << " -->\n";
131  OS.indent(2) << *Cand.Load << "\n";
132  return OS;
133  }
134 #endif
135 };
136 
137 } // end anonymous namespace
138 
139 /// \brief Check if the store dominates all latches, so as long as there is no
140 /// intervening store this value will be loaded in the next iteration.
141 static bool doesStoreDominatesAllLatches(BasicBlock *StoreBlock, Loop *L,
142  DominatorTree *DT) {
144  L->getLoopLatches(Latches);
145  return llvm::all_of(Latches, [&](const BasicBlock *Latch) {
146  return DT->dominates(StoreBlock, Latch);
147  });
148 }
149 
150 /// \brief Return true if the load is not executed on all paths in the loop.
151 static bool isLoadConditional(LoadInst *Load, Loop *L) {
152  return Load->getParent() != L->getHeader();
153 }
154 
155 namespace {
156 
157 /// \brief The per-loop class that does most of the work.
158 class LoadEliminationForLoop {
159 public:
160  LoadEliminationForLoop(Loop *L, LoopInfo *LI, const LoopAccessInfo &LAI,
161  DominatorTree *DT)
162  : L(L), LI(LI), LAI(LAI), DT(DT), PSE(LAI.getPSE()) {}
163 
164  /// \brief Look through the loop-carried and loop-independent dependences in
165  /// this loop and find store->load dependences.
166  ///
167  /// Note that no candidate is returned if LAA has failed to analyze the loop
168  /// (e.g. if it's not bottom-tested, contains volatile memops, etc.)
169  std::forward_list<StoreToLoadForwardingCandidate>
170  findStoreToLoadDependences(const LoopAccessInfo &LAI) {
171  std::forward_list<StoreToLoadForwardingCandidate> Candidates;
172 
173  const auto *Deps = LAI.getDepChecker().getDependences();
174  if (!Deps)
175  return Candidates;
176 
177  // Find store->load dependences (consequently true dep). Both lexically
178  // forward and backward dependences qualify. Disqualify loads that have
179  // other unknown dependences.
180 
181  SmallSet<Instruction *, 4> LoadsWithUnknownDepedence;
182 
183  for (const auto &Dep : *Deps) {
184  Instruction *Source = Dep.getSource(LAI);
185  Instruction *Destination = Dep.getDestination(LAI);
186 
187  if (Dep.Type == MemoryDepChecker::Dependence::Unknown) {
188  if (isa<LoadInst>(Source))
189  LoadsWithUnknownDepedence.insert(Source);
190  if (isa<LoadInst>(Destination))
191  LoadsWithUnknownDepedence.insert(Destination);
192  continue;
193  }
194 
195  if (Dep.isBackward())
196  // Note that the designations source and destination follow the program
197  // order, i.e. source is always first. (The direction is given by the
198  // DepType.)
199  std::swap(Source, Destination);
200  else
201  assert(Dep.isForward() && "Needs to be a forward dependence");
202 
203  auto *Store = dyn_cast<StoreInst>(Source);
204  if (!Store)
205  continue;
206  auto *Load = dyn_cast<LoadInst>(Destination);
207  if (!Load)
208  continue;
209 
210  // Only progagate the value if they are of the same type.
211  if (Store->getPointerOperandType() != Load->getPointerOperandType())
212  continue;
213 
214  Candidates.emplace_front(Load, Store);
215  }
216 
217  if (!LoadsWithUnknownDepedence.empty())
218  Candidates.remove_if([&](const StoreToLoadForwardingCandidate &C) {
219  return LoadsWithUnknownDepedence.count(C.Load);
220  });
221 
222  return Candidates;
223  }
224 
225  /// \brief Return the index of the instruction according to program order.
226  unsigned getInstrIndex(Instruction *Inst) {
227  auto I = InstOrder.find(Inst);
228  assert(I != InstOrder.end() && "No index for instruction");
229  return I->second;
230  }
231 
232  /// \brief If a load has multiple candidates associated (i.e. different
233  /// stores), it means that it could be forwarding from multiple stores
234  /// depending on control flow. Remove these candidates.
235  ///
236  /// Here, we rely on LAA to include the relevant loop-independent dependences.
237  /// LAA is known to omit these in the very simple case when the read and the
238  /// write within an alias set always takes place using the *same* pointer.
239  ///
240  /// However, we know that this is not the case here, i.e. we can rely on LAA
241  /// to provide us with loop-independent dependences for the cases we're
242  /// interested. Consider the case for example where a loop-independent
243  /// dependece S1->S2 invalidates the forwarding S3->S2.
244  ///
245  /// A[i] = ... (S1)
246  /// ... = A[i] (S2)
247  /// A[i+1] = ... (S3)
248  ///
249  /// LAA will perform dependence analysis here because there are two
250  /// *different* pointers involved in the same alias set (&A[i] and &A[i+1]).
251  void removeDependencesFromMultipleStores(
252  std::forward_list<StoreToLoadForwardingCandidate> &Candidates) {
253  // If Store is nullptr it means that we have multiple stores forwarding to
254  // this store.
255  using LoadToSingleCandT =
257  LoadToSingleCandT LoadToSingleCand;
258 
259  for (const auto &Cand : Candidates) {
260  bool NewElt;
261  LoadToSingleCandT::iterator Iter;
262 
263  std::tie(Iter, NewElt) =
264  LoadToSingleCand.insert(std::make_pair(Cand.Load, &Cand));
265  if (!NewElt) {
266  const StoreToLoadForwardingCandidate *&OtherCand = Iter->second;
267  // Already multiple stores forward to this load.
268  if (OtherCand == nullptr)
269  continue;
270 
271  // Handle the very basic case when the two stores are in the same block
272  // so deciding which one forwards is easy. The later one forwards as
273  // long as they both have a dependence distance of one to the load.
274  if (Cand.Store->getParent() == OtherCand->Store->getParent() &&
275  Cand.isDependenceDistanceOfOne(PSE, L) &&
276  OtherCand->isDependenceDistanceOfOne(PSE, L)) {
277  // They are in the same block, the later one will forward to the load.
278  if (getInstrIndex(OtherCand->Store) < getInstrIndex(Cand.Store))
279  OtherCand = &Cand;
280  } else
281  OtherCand = nullptr;
282  }
283  }
284 
285  Candidates.remove_if([&](const StoreToLoadForwardingCandidate &Cand) {
286  if (LoadToSingleCand[Cand.Load] != &Cand) {
287  DEBUG(dbgs() << "Removing from candidates: \n" << Cand
288  << " The load may have multiple stores forwarding to "
289  << "it\n");
290  return true;
291  }
292  return false;
293  });
294  }
295 
296  /// \brief Given two pointers operations by their RuntimePointerChecking
297  /// indices, return true if they require an alias check.
298  ///
299  /// We need a check if one is a pointer for a candidate load and the other is
300  /// a pointer for a possibly intervening store.
301  bool needsChecking(unsigned PtrIdx1, unsigned PtrIdx2,
302  const SmallSet<Value *, 4> &PtrsWrittenOnFwdingPath,
303  const std::set<Value *> &CandLoadPtrs) {
304  Value *Ptr1 =
306  Value *Ptr2 =
308  return ((PtrsWrittenOnFwdingPath.count(Ptr1) && CandLoadPtrs.count(Ptr2)) ||
309  (PtrsWrittenOnFwdingPath.count(Ptr2) && CandLoadPtrs.count(Ptr1)));
310  }
311 
312  /// \brief Return pointers that are possibly written to on the path from a
313  /// forwarding store to a load.
314  ///
315  /// These pointers need to be alias-checked against the forwarding candidates.
316  SmallSet<Value *, 4> findPointersWrittenOnForwardingPath(
318  // From FirstStore to LastLoad neither of the elimination candidate loads
319  // should overlap with any of the stores.
320  //
321  // E.g.:
322  //
323  // st1 C[i]
324  // ld1 B[i] <-------,
325  // ld0 A[i] <----, | * LastLoad
326  // ... | |
327  // st2 E[i] | |
328  // st3 B[i+1] -- | -' * FirstStore
329  // st0 A[i+1] ---'
330  // st4 D[i]
331  //
332  // st0 forwards to ld0 if the accesses in st4 and st1 don't overlap with
333  // ld0.
334 
335  LoadInst *LastLoad =
336  std::max_element(Candidates.begin(), Candidates.end(),
337  [&](const StoreToLoadForwardingCandidate &A,
338  const StoreToLoadForwardingCandidate &B) {
339  return getInstrIndex(A.Load) < getInstrIndex(B.Load);
340  })
341  ->Load;
342  StoreInst *FirstStore =
343  std::min_element(Candidates.begin(), Candidates.end(),
344  [&](const StoreToLoadForwardingCandidate &A,
345  const StoreToLoadForwardingCandidate &B) {
346  return getInstrIndex(A.Store) <
347  getInstrIndex(B.Store);
348  })
349  ->Store;
350 
351  // We're looking for stores after the first forwarding store until the end
352  // of the loop, then from the beginning of the loop until the last
353  // forwarded-to load. Collect the pointer for the stores.
354  SmallSet<Value *, 4> PtrsWrittenOnFwdingPath;
355 
356  auto InsertStorePtr = [&](Instruction *I) {
357  if (auto *S = dyn_cast<StoreInst>(I))
358  PtrsWrittenOnFwdingPath.insert(S->getPointerOperand());
359  };
360  const auto &MemInstrs = LAI.getDepChecker().getMemoryInstructions();
361  std::for_each(MemInstrs.begin() + getInstrIndex(FirstStore) + 1,
362  MemInstrs.end(), InsertStorePtr);
363  std::for_each(MemInstrs.begin(), &MemInstrs[getInstrIndex(LastLoad)],
364  InsertStorePtr);
365 
366  return PtrsWrittenOnFwdingPath;
367  }
368 
369  /// \brief Determine the pointer alias checks to prove that there are no
370  /// intervening stores.
373 
374  SmallSet<Value *, 4> PtrsWrittenOnFwdingPath =
375  findPointersWrittenOnForwardingPath(Candidates);
376 
377  // Collect the pointers of the candidate loads.
378  // FIXME: SmallSet does not work with std::inserter.
379  std::set<Value *> CandLoadPtrs;
380  transform(Candidates,
381  std::inserter(CandLoadPtrs, CandLoadPtrs.begin()),
382  std::mem_fn(&StoreToLoadForwardingCandidate::getLoadPtr));
383 
384  const auto &AllChecks = LAI.getRuntimePointerChecking()->getChecks();
386 
387  copy_if(AllChecks, std::back_inserter(Checks),
389  for (auto PtrIdx1 : Check.first->Members)
390  for (auto PtrIdx2 : Check.second->Members)
391  if (needsChecking(PtrIdx1, PtrIdx2, PtrsWrittenOnFwdingPath,
392  CandLoadPtrs))
393  return true;
394  return false;
395  });
396 
397  DEBUG(dbgs() << "\nPointer Checks (count: " << Checks.size() << "):\n");
398  DEBUG(LAI.getRuntimePointerChecking()->printChecks(dbgs(), Checks));
399 
400  return Checks;
401  }
402 
403  /// \brief Perform the transformation for a candidate.
404  void
405  propagateStoredValueToLoadUsers(const StoreToLoadForwardingCandidate &Cand,
406  SCEVExpander &SEE) {
407  // loop:
408  // %x = load %gep_i
409  // = ... %x
410  // store %y, %gep_i_plus_1
411  //
412  // =>
413  //
414  // ph:
415  // %x.initial = load %gep_0
416  // loop:
417  // %x.storeforward = phi [%x.initial, %ph] [%y, %loop]
418  // %x = load %gep_i <---- now dead
419  // = ... %x.storeforward
420  // store %y, %gep_i_plus_1
421 
422  Value *Ptr = Cand.Load->getPointerOperand();
423  auto *PtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(Ptr));
424  auto *PH = L->getLoopPreheader();
425  Value *InitialPtr = SEE.expandCodeFor(PtrSCEV->getStart(), Ptr->getType(),
426  PH->getTerminator());
427  Value *Initial =
428  new LoadInst(InitialPtr, "load_initial", /* isVolatile */ false,
429  Cand.Load->getAlignment(), PH->getTerminator());
430 
431  PHINode *PHI = PHINode::Create(Initial->getType(), 2, "store_forwarded",
432  &L->getHeader()->front());
433  PHI->addIncoming(Initial, PH);
434  PHI->addIncoming(Cand.Store->getOperand(0), L->getLoopLatch());
435 
436  Cand.Load->replaceAllUsesWith(PHI);
437  }
438 
439  /// \brief Top-level driver for each loop: find store->load forwarding
440  /// candidates, add run-time checks and perform transformation.
441  bool processLoop() {
442  DEBUG(dbgs() << "\nIn \"" << L->getHeader()->getParent()->getName()
443  << "\" checking " << *L << "\n");
444 
445  // Look for store-to-load forwarding cases across the
446  // backedge. E.g.:
447  //
448  // loop:
449  // %x = load %gep_i
450  // = ... %x
451  // store %y, %gep_i_plus_1
452  //
453  // =>
454  //
455  // ph:
456  // %x.initial = load %gep_0
457  // loop:
458  // %x.storeforward = phi [%x.initial, %ph] [%y, %loop]
459  // %x = load %gep_i <---- now dead
460  // = ... %x.storeforward
461  // store %y, %gep_i_plus_1
462 
463  // First start with store->load dependences.
464  auto StoreToLoadDependences = findStoreToLoadDependences(LAI);
465  if (StoreToLoadDependences.empty())
466  return false;
467 
468  // Generate an index for each load and store according to the original
469  // program order. This will be used later.
470  InstOrder = LAI.getDepChecker().generateInstructionOrderMap();
471 
472  // To keep things simple for now, remove those where the load is potentially
473  // fed by multiple stores.
474  removeDependencesFromMultipleStores(StoreToLoadDependences);
475  if (StoreToLoadDependences.empty())
476  return false;
477 
478  // Filter the candidates further.
480  unsigned NumForwarding = 0;
481  for (const StoreToLoadForwardingCandidate Cand : StoreToLoadDependences) {
482  DEBUG(dbgs() << "Candidate " << Cand);
483 
484  // Make sure that the stored values is available everywhere in the loop in
485  // the next iteration.
486  if (!doesStoreDominatesAllLatches(Cand.Store->getParent(), L, DT))
487  continue;
488 
489  // If the load is conditional we can't hoist its 0-iteration instance to
490  // the preheader because that would make it unconditional. Thus we would
491  // access a memory location that the original loop did not access.
492  if (isLoadConditional(Cand.Load, L))
493  continue;
494 
495  // Check whether the SCEV difference is the same as the induction step,
496  // thus we load the value in the next iteration.
497  if (!Cand.isDependenceDistanceOfOne(PSE, L))
498  continue;
499 
500  ++NumForwarding;
501  DEBUG(dbgs()
502  << NumForwarding
503  << ". Valid store-to-load forwarding across the loop backedge\n");
504  Candidates.push_back(Cand);
505  }
506  if (Candidates.empty())
507  return false;
508 
509  // Check intervening may-alias stores. These need runtime checks for alias
510  // disambiguation.
512  collectMemchecks(Candidates);
513 
514  // Too many checks are likely to outweigh the benefits of forwarding.
515  if (Checks.size() > Candidates.size() * CheckPerElim) {
516  DEBUG(dbgs() << "Too many run-time checks needed.\n");
517  return false;
518  }
519 
520  if (LAI.getPSE().getUnionPredicate().getComplexity() >
522  DEBUG(dbgs() << "Too many SCEV run-time checks needed.\n");
523  return false;
524  }
525 
526  if (!Checks.empty() || !LAI.getPSE().getUnionPredicate().isAlwaysTrue()) {
527  if (L->getHeader()->getParent()->optForSize()) {
528  DEBUG(dbgs() << "Versioning is needed but not allowed when optimizing "
529  "for size.\n");
530  return false;
531  }
532 
533  if (!L->isLoopSimplifyForm()) {
534  DEBUG(dbgs() << "Loop is not is loop-simplify form");
535  return false;
536  }
537 
538  // Point of no-return, start the transformation. First, version the loop
539  // if necessary.
540 
541  LoopVersioning LV(LAI, L, LI, DT, PSE.getSE(), false);
542  LV.setAliasChecks(std::move(Checks));
543  LV.setSCEVChecks(LAI.getPSE().getUnionPredicate());
544  LV.versionLoop();
545  }
546 
547  // Next, propagate the value stored by the store to the users of the load.
548  // Also for the first iteration, generate the initial value of the load.
550  "storeforward");
551  for (const auto &Cand : Candidates)
552  propagateStoredValueToLoadUsers(Cand, SEE);
553  NumLoopLoadEliminted += NumForwarding;
554 
555  return true;
556  }
557 
558 private:
559  Loop *L;
560 
561  /// \brief Maps the load/store instructions to their index according to
562  /// program order.
564 
565  // Analyses used.
566  LoopInfo *LI;
567  const LoopAccessInfo &LAI;
568  DominatorTree *DT;
570 };
571 
572 } // end anonymous namespace
573 
574 static bool
576  function_ref<const LoopAccessInfo &(Loop &)> GetLAI) {
577  // Build up a worklist of inner-loops to transform to avoid iterator
578  // invalidation.
579  // FIXME: This logic comes from other passes that actually change the loop
580  // nest structure. It isn't clear this is necessary (or useful) for a pass
581  // which merely optimizes the use of loads in a loop.
582  SmallVector<Loop *, 8> Worklist;
583 
584  for (Loop *TopLevelLoop : LI)
585  for (Loop *L : depth_first(TopLevelLoop))
586  // We only handle inner-most loops.
587  if (L->empty())
588  Worklist.push_back(L);
589 
590  // Now walk the identified inner loops.
591  bool Changed = false;
592  for (Loop *L : Worklist) {
593  // The actual work is performed by LoadEliminationForLoop.
594  LoadEliminationForLoop LEL(L, &LI, GetLAI(*L), &DT);
595  Changed |= LEL.processLoop();
596  }
597  return Changed;
598 }
599 
600 namespace {
601 
602 /// \brief The pass. Most of the work is delegated to the per-loop
603 /// LoadEliminationForLoop class.
604 class LoopLoadElimination : public FunctionPass {
605 public:
606  static char ID;
607 
608  LoopLoadElimination() : FunctionPass(ID) {
610  }
611 
612  bool runOnFunction(Function &F) override {
613  if (skipFunction(F))
614  return false;
615 
616  auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
617  auto &LAA = getAnalysis<LoopAccessLegacyAnalysis>();
618  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
619 
620  // Process each loop nest in the function.
622  F, LI, DT,
623  [&LAA](Loop &L) -> const LoopAccessInfo & { return LAA.getInfo(&L); });
624  }
625 
626  void getAnalysisUsage(AnalysisUsage &AU) const override {
635  }
636 };
637 
638 } // end anonymous namespace
639 
641 
642 static const char LLE_name[] = "Loop Load Elimination";
643 
644 INITIALIZE_PASS_BEGIN(LoopLoadElimination, LLE_OPTION, LLE_name, false, false)
649 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
650 INITIALIZE_PASS_END(LoopLoadElimination, LLE_OPTION, LLE_name, false, false)
651 
653  return new LoopLoadElimination();
654 }
655 
658  auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
659  auto &LI = AM.getResult<LoopAnalysis>(F);
660  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
661  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
662  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
663  auto &AA = AM.getResult<AAManager>(F);
664  auto &AC = AM.getResult<AssumptionAnalysis>(F);
665 
666  auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
667  bool Changed = eliminateLoadsAcrossLoops(
668  F, LI, DT, [&](Loop &L) -> const LoopAccessInfo & {
669  LoopStandardAnalysisResults AR = {AA, AC, DT, LI,
670  SE, TLI, TTI, nullptr};
671  return LAM.getResult<LoopAccessAnalysis>(L, AR);
672  });
673 
674  if (!Changed)
675  return PreservedAnalyses::all();
676 
678  return PA;
679 }
Legacy wrapper pass to provide the GlobalsAAResult object.
static bool Check(DecodeStatus &Out, DecodeStatus In)
uint64_t CallInst * C
TrackingVH< Value > PointerValue
Holds the pointer value that we need to check.
void printChecks(raw_ostream &OS, const SmallVectorImpl< PointerCheck > &Checks, unsigned Depth=0) const
Print Checks.
static bool eliminateLoadsAcrossLoops(Function &F, LoopInfo &LI, DominatorTree &DT, function_ref< const LoopAccessInfo &(Loop &)> GetLAI)
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:157
DenseMap< Instruction *, unsigned > generateInstructionOrderMap() const
Generate a mapping between the memory instructions and their indices according to program order...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:687
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
This is the interface for a simple mod/ref and alias analysis over globals.
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:860
static cl::opt< unsigned > LoadElimSCEVCheckThreshold("loop-load-elimination-scev-check-threshold", cl::init(8), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed for Loop " "Load Elimination"))
const RuntimePointerChecking * getRuntimePointerChecking() const
int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of its element size.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:106
raw_ostream & indent(unsigned NumSpaces)
indent - Insert &#39;NumSpaces&#39; spaces.
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:89
Analysis pass providing the TargetTransformInfo.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:813
STATISTIC(NumFunctions, "Total number of functions")
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:238
F(f)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:503
An instruction for reading from memory.
Definition: Instructions.h:164
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:191
AnalysisUsage & addRequired()
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr it the function does no...
Definition: BasicBlock.cpp:116
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
LLVM_NODISCARD bool empty() const
Definition: SmallSet.h:56
Type * getPointerElementType() const
Definition: Type.h:373
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:361
static bool isLoadConditional(LoadInst *Load, Loop *L)
Return true if the load is not executed on all paths in the loop.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
static const char LLE_name[]
void getLoopLatches(SmallVectorImpl< BlockT *> &LoopLatches) const
Return all loop latch blocks of this loop.
Definition: LoopInfo.h:284
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:933
This file implements a class to represent arbitrary precision integral constant values and operations...
BlockT * getHeader() const
Definition: LoopInfo.h:100
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
const SmallVectorImpl< Instruction * > & getMemoryInstructions() const
The vector of memory access instructions.
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
This header provides classes for managing per-loop analyses.
void initializeLoopLoadEliminationPass(PassRegistry &)
An instruction for storing to memory.
Definition: Instructions.h:306
#define LLE_OPTION
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:140
const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
static bool runOnFunction(Function &F, bool PostInlining)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
This analysis provides dependence information for the memory accesses of a loop.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:116
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:36
const Instruction & front() const
Definition: BasicBlock.h:264
A manager for alias analyses.
Represent the analysis usage information of a pass.
bool optForSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:530
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
Value * getPointerOperand()
Definition: Instructions.h:270
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:81
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
void setAliasChecks(SmallVector< RuntimePointerChecking::PointerCheck, 4 > Checks)
Sets the runtime alias checks for versioning the loop.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
std::pair< const CheckingPtrGroup *, const CheckingPtrGroup * > PointerCheck
A memcheck which made up of a pair of grouped pointers.
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
char & LoopSimplifyID
A function analysis which provides an AssumptionCache.
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
This header defines the LoopLoadEliminationPass object.
bool isAlwaysTrue() const override
Implementation of the SCEVPredicate interface.
const SCEVUnionPredicate & getUnionPredicate() const
AnalysisUsage & addRequiredID(const void *ID)
Definition: Pass.cpp:298
const PointerInfo & getPointerInfo(unsigned PtrIdx) const
Return PointerInfo for pointer at index PtrIdx.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:239
Module.h This file contains the declarations for the Module class.
FunctionPass * createLoopLoadEliminationPass()
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:48
Drive the analysis of memory accesses in the loop.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
This class emits a version of the loop where run-time checks ensure that may-alias pointers can&#39;t ove...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:923
Class for arbitrary precision integers.
Definition: APInt.h:69
#define SEE(c)
Definition: regcomp.c:258
This class uses information about analyze scalars to rewrite expressions in canonical form...
Analysis pass that exposes the ScalarEvolution for a function.
unsigned getComplexity() const override
We estimate the complexity of a union predicate as the size number of predicates in the union...
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:403
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to...
Definition: LoopInfo.cpp:191
This analysis provides dependence information for the memory accesses of a loop.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:120
Type * getPointerOperandType() const
Definition: Instructions.h:273
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:61
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:439
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:220
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
#define I(x, y, z)
Definition: MD5.cpp:58
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
raw_ostream & operator<<(raw_ostream &OS, const APInt &I)
Definition: APInt.h:2018
OutputIt transform(R &&Range, OutputIt d_first, UnaryPredicate P)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere...
Definition: STLExtras.h:890
bool empty() const
Definition: LoopInfo.h:146
Analysis pass providing the TargetLibraryInfo.
iterator_range< df_iterator< T > > depth_first(const T &G)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static cl::opt< unsigned > CheckPerElim("runtime-check-per-loop-load-elim", cl::Hidden, cl::desc("Max number of memchecks allowed per eliminated load on average"), cl::init(1))
LLVM Value Representation.
Definition: Value.h:73
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:44
#define DEBUG(X)
Definition: Debug.h:118
The legacy pass manager&#39;s analysis pass to compute loop information.
Definition: LoopInfo.h:958
A container for analyses that lazily runs them and caches their results.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:267
This pass exposes codegen information to IR-level passes.
UnaryPredicate for_each(R &&Range, UnaryPredicate P)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:806
This header defines various interfaces for pass management in LLVM.
const SmallVector< PointerCheck, 4 > & getChecks() const
Returns the checks that generateChecks created.
static bool doesStoreDominatesAllLatches(BasicBlock *StoreBlock, Loop *L, DominatorTree *DT)
Check if the store dominates all latches, so as long as there is no intervening store this value will...
for(unsigned i=Desc.getNumOperands(), e=OldMI.getNumOperands();i !=e;++i)
Value * getPointerOperand()
Definition: Instructions.h:398
const BasicBlock * getParent() const
Definition: Instruction.h:66
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:946
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:65