LLVM  9.0.0svn
LoopLoadElimination.cpp
Go to the documentation of this file.
1 //===- LoopLoadElimination.cpp - Loop Load Elimination Pass ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implement a loop-aware load elimination pass.
10 //
11 // It uses LoopAccessAnalysis to identify loop-carried dependences with a
12 // distance of one between stores and loads. These form the candidates for the
13 // transformation. The source value of each store then propagated to the user
14 // of the corresponding load. This makes the load dead.
15 //
16 // The pass can also version the loop and add memchecks in order to prove that
17 // may-aliasing stores can't change the value in memory before it's read by the
18 // load.
19 //
20 //===----------------------------------------------------------------------===//
21 
23 #include "llvm/ADT/APInt.h"
24 #include "llvm/ADT/DenseMap.h"
26 #include "llvm/ADT/STLExtras.h"
27 #include "llvm/ADT/SmallPtrSet.h"
28 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/ADT/Statistic.h"
37 #include "llvm/Analysis/LoopInfo.h"
45 #include "llvm/IR/DataLayout.h"
46 #include "llvm/IR/Dominators.h"
47 #include "llvm/IR/Instructions.h"
48 #include "llvm/IR/Module.h"
49 #include "llvm/IR/PassManager.h"
50 #include "llvm/IR/Type.h"
51 #include "llvm/IR/Value.h"
52 #include "llvm/Pass.h"
53 #include "llvm/Support/Casting.h"
55 #include "llvm/Support/Debug.h"
57 #include "llvm/Transforms/Scalar.h"
58 #include "llvm/Transforms/Utils.h"
61 #include <algorithm>
62 #include <cassert>
63 #include <forward_list>
64 #include <set>
65 #include <tuple>
66 #include <utility>
67 
68 using namespace llvm;
69 
70 #define LLE_OPTION "loop-load-elim"
71 #define DEBUG_TYPE LLE_OPTION
72 
74  "runtime-check-per-loop-load-elim", cl::Hidden,
75  cl::desc("Max number of memchecks allowed per eliminated load on average"),
76  cl::init(1));
77 
79  "loop-load-elimination-scev-check-threshold", cl::init(8), cl::Hidden,
80  cl::desc("The maximum number of SCEV checks allowed for Loop "
81  "Load Elimination"));
82 
83 STATISTIC(NumLoopLoadEliminted, "Number of loads eliminated by LLE");
84 
85 namespace {
86 
87 /// Represent a store-to-forwarding candidate.
88 struct StoreToLoadForwardingCandidate {
89  LoadInst *Load;
91 
92  StoreToLoadForwardingCandidate(LoadInst *Load, StoreInst *Store)
93  : Load(Load), Store(Store) {}
94 
95  /// Return true if the dependence from the store to the load has a
96  /// distance of one. E.g. A[i+1] = A[i]
97  bool isDependenceDistanceOfOne(PredicatedScalarEvolution &PSE,
98  Loop *L) const {
99  Value *LoadPtr = Load->getPointerOperand();
100  Value *StorePtr = Store->getPointerOperand();
101  Type *LoadPtrType = LoadPtr->getType();
102  Type *LoadType = LoadPtrType->getPointerElementType();
103 
104  assert(LoadPtrType->getPointerAddressSpace() ==
105  StorePtr->getType()->getPointerAddressSpace() &&
106  LoadType == StorePtr->getType()->getPointerElementType() &&
107  "Should be a known dependence");
108 
109  // Currently we only support accesses with unit stride. FIXME: we should be
110  // able to handle non unit stirde as well as long as the stride is equal to
111  // the dependence distance.
112  if (getPtrStride(PSE, LoadPtr, L) != 1 ||
113  getPtrStride(PSE, StorePtr, L) != 1)
114  return false;
115 
116  auto &DL = Load->getParent()->getModule()->getDataLayout();
117  unsigned TypeByteSize = DL.getTypeAllocSize(const_cast<Type *>(LoadType));
118 
119  auto *LoadPtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(LoadPtr));
120  auto *StorePtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(StorePtr));
121 
122  // We don't need to check non-wrapping here because forward/backward
123  // dependence wouldn't be valid if these weren't monotonic accesses.
124  auto *Dist = cast<SCEVConstant>(
125  PSE.getSE()->getMinusSCEV(StorePtrSCEV, LoadPtrSCEV));
126  const APInt &Val = Dist->getAPInt();
127  return Val == TypeByteSize;
128  }
129 
130  Value *getLoadPtr() const { return Load->getPointerOperand(); }
131 
132 #ifndef NDEBUG
133  friend raw_ostream &operator<<(raw_ostream &OS,
134  const StoreToLoadForwardingCandidate &Cand) {
135  OS << *Cand.Store << " -->\n";
136  OS.indent(2) << *Cand.Load << "\n";
137  return OS;
138  }
139 #endif
140 };
141 
142 } // end anonymous namespace
143 
144 /// Check if the store dominates all latches, so as long as there is no
145 /// intervening store this value will be loaded in the next iteration.
146 static bool doesStoreDominatesAllLatches(BasicBlock *StoreBlock, Loop *L,
147  DominatorTree *DT) {
149  L->getLoopLatches(Latches);
150  return llvm::all_of(Latches, [&](const BasicBlock *Latch) {
151  return DT->dominates(StoreBlock, Latch);
152  });
153 }
154 
155 /// Return true if the load is not executed on all paths in the loop.
156 static bool isLoadConditional(LoadInst *Load, Loop *L) {
157  return Load->getParent() != L->getHeader();
158 }
159 
160 namespace {
161 
162 /// The per-loop class that does most of the work.
163 class LoadEliminationForLoop {
164 public:
165  LoadEliminationForLoop(Loop *L, LoopInfo *LI, const LoopAccessInfo &LAI,
167  ProfileSummaryInfo* PSI)
168  : L(L), LI(LI), LAI(LAI), DT(DT), BFI(BFI), PSI(PSI), PSE(LAI.getPSE()) {}
169 
170  /// Look through the loop-carried and loop-independent dependences in
171  /// this loop and find store->load dependences.
172  ///
173  /// Note that no candidate is returned if LAA has failed to analyze the loop
174  /// (e.g. if it's not bottom-tested, contains volatile memops, etc.)
175  std::forward_list<StoreToLoadForwardingCandidate>
176  findStoreToLoadDependences(const LoopAccessInfo &LAI) {
177  std::forward_list<StoreToLoadForwardingCandidate> Candidates;
178 
179  const auto *Deps = LAI.getDepChecker().getDependences();
180  if (!Deps)
181  return Candidates;
182 
183  // Find store->load dependences (consequently true dep). Both lexically
184  // forward and backward dependences qualify. Disqualify loads that have
185  // other unknown dependences.
186 
187  SmallPtrSet<Instruction *, 4> LoadsWithUnknownDepedence;
188 
189  for (const auto &Dep : *Deps) {
190  Instruction *Source = Dep.getSource(LAI);
191  Instruction *Destination = Dep.getDestination(LAI);
192 
193  if (Dep.Type == MemoryDepChecker::Dependence::Unknown) {
194  if (isa<LoadInst>(Source))
195  LoadsWithUnknownDepedence.insert(Source);
196  if (isa<LoadInst>(Destination))
197  LoadsWithUnknownDepedence.insert(Destination);
198  continue;
199  }
200 
201  if (Dep.isBackward())
202  // Note that the designations source and destination follow the program
203  // order, i.e. source is always first. (The direction is given by the
204  // DepType.)
205  std::swap(Source, Destination);
206  else
207  assert(Dep.isForward() && "Needs to be a forward dependence");
208 
209  auto *Store = dyn_cast<StoreInst>(Source);
210  if (!Store)
211  continue;
212  auto *Load = dyn_cast<LoadInst>(Destination);
213  if (!Load)
214  continue;
215 
216  // Only progagate the value if they are of the same type.
217  if (Store->getPointerOperandType() != Load->getPointerOperandType())
218  continue;
219 
220  Candidates.emplace_front(Load, Store);
221  }
222 
223  if (!LoadsWithUnknownDepedence.empty())
224  Candidates.remove_if([&](const StoreToLoadForwardingCandidate &C) {
225  return LoadsWithUnknownDepedence.count(C.Load);
226  });
227 
228  return Candidates;
229  }
230 
231  /// Return the index of the instruction according to program order.
232  unsigned getInstrIndex(Instruction *Inst) {
233  auto I = InstOrder.find(Inst);
234  assert(I != InstOrder.end() && "No index for instruction");
235  return I->second;
236  }
237 
238  /// If a load has multiple candidates associated (i.e. different
239  /// stores), it means that it could be forwarding from multiple stores
240  /// depending on control flow. Remove these candidates.
241  ///
242  /// Here, we rely on LAA to include the relevant loop-independent dependences.
243  /// LAA is known to omit these in the very simple case when the read and the
244  /// write within an alias set always takes place using the *same* pointer.
245  ///
246  /// However, we know that this is not the case here, i.e. we can rely on LAA
247  /// to provide us with loop-independent dependences for the cases we're
248  /// interested. Consider the case for example where a loop-independent
249  /// dependece S1->S2 invalidates the forwarding S3->S2.
250  ///
251  /// A[i] = ... (S1)
252  /// ... = A[i] (S2)
253  /// A[i+1] = ... (S3)
254  ///
255  /// LAA will perform dependence analysis here because there are two
256  /// *different* pointers involved in the same alias set (&A[i] and &A[i+1]).
257  void removeDependencesFromMultipleStores(
258  std::forward_list<StoreToLoadForwardingCandidate> &Candidates) {
259  // If Store is nullptr it means that we have multiple stores forwarding to
260  // this store.
261  using LoadToSingleCandT =
263  LoadToSingleCandT LoadToSingleCand;
264 
265  for (const auto &Cand : Candidates) {
266  bool NewElt;
267  LoadToSingleCandT::iterator Iter;
268 
269  std::tie(Iter, NewElt) =
270  LoadToSingleCand.insert(std::make_pair(Cand.Load, &Cand));
271  if (!NewElt) {
272  const StoreToLoadForwardingCandidate *&OtherCand = Iter->second;
273  // Already multiple stores forward to this load.
274  if (OtherCand == nullptr)
275  continue;
276 
277  // Handle the very basic case when the two stores are in the same block
278  // so deciding which one forwards is easy. The later one forwards as
279  // long as they both have a dependence distance of one to the load.
280  if (Cand.Store->getParent() == OtherCand->Store->getParent() &&
281  Cand.isDependenceDistanceOfOne(PSE, L) &&
282  OtherCand->isDependenceDistanceOfOne(PSE, L)) {
283  // They are in the same block, the later one will forward to the load.
284  if (getInstrIndex(OtherCand->Store) < getInstrIndex(Cand.Store))
285  OtherCand = &Cand;
286  } else
287  OtherCand = nullptr;
288  }
289  }
290 
291  Candidates.remove_if([&](const StoreToLoadForwardingCandidate &Cand) {
292  if (LoadToSingleCand[Cand.Load] != &Cand) {
293  LLVM_DEBUG(
294  dbgs() << "Removing from candidates: \n"
295  << Cand
296  << " The load may have multiple stores forwarding to "
297  << "it\n");
298  return true;
299  }
300  return false;
301  });
302  }
303 
304  /// Given two pointers operations by their RuntimePointerChecking
305  /// indices, return true if they require an alias check.
306  ///
307  /// We need a check if one is a pointer for a candidate load and the other is
308  /// a pointer for a possibly intervening store.
309  bool needsChecking(unsigned PtrIdx1, unsigned PtrIdx2,
310  const SmallPtrSet<Value *, 4> &PtrsWrittenOnFwdingPath,
311  const std::set<Value *> &CandLoadPtrs) {
312  Value *Ptr1 =
314  Value *Ptr2 =
316  return ((PtrsWrittenOnFwdingPath.count(Ptr1) && CandLoadPtrs.count(Ptr2)) ||
317  (PtrsWrittenOnFwdingPath.count(Ptr2) && CandLoadPtrs.count(Ptr1)));
318  }
319 
320  /// Return pointers that are possibly written to on the path from a
321  /// forwarding store to a load.
322  ///
323  /// These pointers need to be alias-checked against the forwarding candidates.
324  SmallPtrSet<Value *, 4> findPointersWrittenOnForwardingPath(
326  // From FirstStore to LastLoad neither of the elimination candidate loads
327  // should overlap with any of the stores.
328  //
329  // E.g.:
330  //
331  // st1 C[i]
332  // ld1 B[i] <-------,
333  // ld0 A[i] <----, | * LastLoad
334  // ... | |
335  // st2 E[i] | |
336  // st3 B[i+1] -- | -' * FirstStore
337  // st0 A[i+1] ---'
338  // st4 D[i]
339  //
340  // st0 forwards to ld0 if the accesses in st4 and st1 don't overlap with
341  // ld0.
342 
343  LoadInst *LastLoad =
344  std::max_element(Candidates.begin(), Candidates.end(),
345  [&](const StoreToLoadForwardingCandidate &A,
346  const StoreToLoadForwardingCandidate &B) {
347  return getInstrIndex(A.Load) < getInstrIndex(B.Load);
348  })
349  ->Load;
350  StoreInst *FirstStore =
351  std::min_element(Candidates.begin(), Candidates.end(),
352  [&](const StoreToLoadForwardingCandidate &A,
353  const StoreToLoadForwardingCandidate &B) {
354  return getInstrIndex(A.Store) <
355  getInstrIndex(B.Store);
356  })
357  ->Store;
358 
359  // We're looking for stores after the first forwarding store until the end
360  // of the loop, then from the beginning of the loop until the last
361  // forwarded-to load. Collect the pointer for the stores.
362  SmallPtrSet<Value *, 4> PtrsWrittenOnFwdingPath;
363 
364  auto InsertStorePtr = [&](Instruction *I) {
365  if (auto *S = dyn_cast<StoreInst>(I))
366  PtrsWrittenOnFwdingPath.insert(S->getPointerOperand());
367  };
368  const auto &MemInstrs = LAI.getDepChecker().getMemoryInstructions();
369  std::for_each(MemInstrs.begin() + getInstrIndex(FirstStore) + 1,
370  MemInstrs.end(), InsertStorePtr);
371  std::for_each(MemInstrs.begin(), &MemInstrs[getInstrIndex(LastLoad)],
372  InsertStorePtr);
373 
374  return PtrsWrittenOnFwdingPath;
375  }
376 
377  /// Determine the pointer alias checks to prove that there are no
378  /// intervening stores.
381 
382  SmallPtrSet<Value *, 4> PtrsWrittenOnFwdingPath =
383  findPointersWrittenOnForwardingPath(Candidates);
384 
385  // Collect the pointers of the candidate loads.
386  // FIXME: SmallPtrSet does not work with std::inserter.
387  std::set<Value *> CandLoadPtrs;
388  transform(Candidates,
389  std::inserter(CandLoadPtrs, CandLoadPtrs.begin()),
390  std::mem_fn(&StoreToLoadForwardingCandidate::getLoadPtr));
391 
392  const auto &AllChecks = LAI.getRuntimePointerChecking()->getChecks();
394 
395  copy_if(AllChecks, std::back_inserter(Checks),
397  for (auto PtrIdx1 : Check.first->Members)
398  for (auto PtrIdx2 : Check.second->Members)
399  if (needsChecking(PtrIdx1, PtrIdx2, PtrsWrittenOnFwdingPath,
400  CandLoadPtrs))
401  return true;
402  return false;
403  });
404 
405  LLVM_DEBUG(dbgs() << "\nPointer Checks (count: " << Checks.size()
406  << "):\n");
408 
409  return Checks;
410  }
411 
412  /// Perform the transformation for a candidate.
413  void
414  propagateStoredValueToLoadUsers(const StoreToLoadForwardingCandidate &Cand,
415  SCEVExpander &SEE) {
416  // loop:
417  // %x = load %gep_i
418  // = ... %x
419  // store %y, %gep_i_plus_1
420  //
421  // =>
422  //
423  // ph:
424  // %x.initial = load %gep_0
425  // loop:
426  // %x.storeforward = phi [%x.initial, %ph] [%y, %loop]
427  // %x = load %gep_i <---- now dead
428  // = ... %x.storeforward
429  // store %y, %gep_i_plus_1
430 
431  Value *Ptr = Cand.Load->getPointerOperand();
432  auto *PtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(Ptr));
433  auto *PH = L->getLoopPreheader();
434  Value *InitialPtr = SEE.expandCodeFor(PtrSCEV->getStart(), Ptr->getType(),
435  PH->getTerminator());
436  Value *Initial = new LoadInst(
437  Cand.Load->getType(), InitialPtr, "load_initial",
438  /* isVolatile */ false, Cand.Load->getAlignment(), PH->getTerminator());
439 
440  PHINode *PHI = PHINode::Create(Initial->getType(), 2, "store_forwarded",
441  &L->getHeader()->front());
442  PHI->addIncoming(Initial, PH);
443  PHI->addIncoming(Cand.Store->getOperand(0), L->getLoopLatch());
444 
445  Cand.Load->replaceAllUsesWith(PHI);
446  }
447 
448  /// Top-level driver for each loop: find store->load forwarding
449  /// candidates, add run-time checks and perform transformation.
450  bool processLoop() {
451  LLVM_DEBUG(dbgs() << "\nIn \"" << L->getHeader()->getParent()->getName()
452  << "\" checking " << *L << "\n");
453 
454  // Look for store-to-load forwarding cases across the
455  // backedge. E.g.:
456  //
457  // loop:
458  // %x = load %gep_i
459  // = ... %x
460  // store %y, %gep_i_plus_1
461  //
462  // =>
463  //
464  // ph:
465  // %x.initial = load %gep_0
466  // loop:
467  // %x.storeforward = phi [%x.initial, %ph] [%y, %loop]
468  // %x = load %gep_i <---- now dead
469  // = ... %x.storeforward
470  // store %y, %gep_i_plus_1
471 
472  // First start with store->load dependences.
473  auto StoreToLoadDependences = findStoreToLoadDependences(LAI);
474  if (StoreToLoadDependences.empty())
475  return false;
476 
477  // Generate an index for each load and store according to the original
478  // program order. This will be used later.
479  InstOrder = LAI.getDepChecker().generateInstructionOrderMap();
480 
481  // To keep things simple for now, remove those where the load is potentially
482  // fed by multiple stores.
483  removeDependencesFromMultipleStores(StoreToLoadDependences);
484  if (StoreToLoadDependences.empty())
485  return false;
486 
487  // Filter the candidates further.
489  unsigned NumForwarding = 0;
490  for (const StoreToLoadForwardingCandidate Cand : StoreToLoadDependences) {
491  LLVM_DEBUG(dbgs() << "Candidate " << Cand);
492 
493  // Make sure that the stored values is available everywhere in the loop in
494  // the next iteration.
495  if (!doesStoreDominatesAllLatches(Cand.Store->getParent(), L, DT))
496  continue;
497 
498  // If the load is conditional we can't hoist its 0-iteration instance to
499  // the preheader because that would make it unconditional. Thus we would
500  // access a memory location that the original loop did not access.
501  if (isLoadConditional(Cand.Load, L))
502  continue;
503 
504  // Check whether the SCEV difference is the same as the induction step,
505  // thus we load the value in the next iteration.
506  if (!Cand.isDependenceDistanceOfOne(PSE, L))
507  continue;
508 
509  ++NumForwarding;
510  LLVM_DEBUG(
511  dbgs()
512  << NumForwarding
513  << ". Valid store-to-load forwarding across the loop backedge\n");
514  Candidates.push_back(Cand);
515  }
516  if (Candidates.empty())
517  return false;
518 
519  // Check intervening may-alias stores. These need runtime checks for alias
520  // disambiguation.
522  collectMemchecks(Candidates);
523 
524  // Too many checks are likely to outweigh the benefits of forwarding.
525  if (Checks.size() > Candidates.size() * CheckPerElim) {
526  LLVM_DEBUG(dbgs() << "Too many run-time checks needed.\n");
527  return false;
528  }
529 
530  if (LAI.getPSE().getUnionPredicate().getComplexity() >
532  LLVM_DEBUG(dbgs() << "Too many SCEV run-time checks needed.\n");
533  return false;
534  }
535 
536  if (!Checks.empty() || !LAI.getPSE().getUnionPredicate().isAlwaysTrue()) {
537  if (LAI.hasConvergentOp()) {
538  LLVM_DEBUG(dbgs() << "Versioning is needed but not allowed with "
539  "convergent calls\n");
540  return false;
541  }
542 
543  auto *HeaderBB = L->getHeader();
544  auto *F = HeaderBB->getParent();
545  bool OptForSize = F->hasOptSize() ||
546  llvm::shouldOptimizeForSize(HeaderBB, PSI, BFI);
547  if (OptForSize) {
548  LLVM_DEBUG(
549  dbgs() << "Versioning is needed but not allowed when optimizing "
550  "for size.\n");
551  return false;
552  }
553 
554  if (!L->isLoopSimplifyForm()) {
555  LLVM_DEBUG(dbgs() << "Loop is not is loop-simplify form");
556  return false;
557  }
558 
559  // Point of no-return, start the transformation. First, version the loop
560  // if necessary.
561 
562  LoopVersioning LV(LAI, L, LI, DT, PSE.getSE(), false);
563  LV.setAliasChecks(std::move(Checks));
564  LV.setSCEVChecks(LAI.getPSE().getUnionPredicate());
565  LV.versionLoop();
566  }
567 
568  // Next, propagate the value stored by the store to the users of the load.
569  // Also for the first iteration, generate the initial value of the load.
571  "storeforward");
572  for (const auto &Cand : Candidates)
573  propagateStoredValueToLoadUsers(Cand, SEE);
574  NumLoopLoadEliminted += NumForwarding;
575 
576  return true;
577  }
578 
579 private:
580  Loop *L;
581 
582  /// Maps the load/store instructions to their index according to
583  /// program order.
585 
586  // Analyses used.
587  LoopInfo *LI;
588  const LoopAccessInfo &LAI;
589  DominatorTree *DT;
591  ProfileSummaryInfo *PSI;
593 };
594 
595 } // end anonymous namespace
596 
597 static bool
600  function_ref<const LoopAccessInfo &(Loop &)> GetLAI) {
601  // Build up a worklist of inner-loops to transform to avoid iterator
602  // invalidation.
603  // FIXME: This logic comes from other passes that actually change the loop
604  // nest structure. It isn't clear this is necessary (or useful) for a pass
605  // which merely optimizes the use of loads in a loop.
606  SmallVector<Loop *, 8> Worklist;
607 
608  for (Loop *TopLevelLoop : LI)
609  for (Loop *L : depth_first(TopLevelLoop))
610  // We only handle inner-most loops.
611  if (L->empty())
612  Worklist.push_back(L);
613 
614  // Now walk the identified inner loops.
615  bool Changed = false;
616  for (Loop *L : Worklist) {
617  // The actual work is performed by LoadEliminationForLoop.
618  LoadEliminationForLoop LEL(L, &LI, GetLAI(*L), &DT, BFI, PSI);
619  Changed |= LEL.processLoop();
620  }
621  return Changed;
622 }
623 
624 namespace {
625 
626 /// The pass. Most of the work is delegated to the per-loop
627 /// LoadEliminationForLoop class.
628 class LoopLoadElimination : public FunctionPass {
629 public:
630  static char ID;
631 
632  LoopLoadElimination() : FunctionPass(ID) {
634  }
635 
636  bool runOnFunction(Function &F) override {
637  if (skipFunction(F))
638  return false;
639 
640  auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
641  auto &LAA = getAnalysis<LoopAccessLegacyAnalysis>();
642  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
643  auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
644  auto *BFI = (PSI && PSI->hasProfileSummary()) ?
645  &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
646  nullptr;
647 
648  // Process each loop nest in the function.
650  F, LI, DT, BFI, PSI,
651  [&LAA](Loop &L) -> const LoopAccessInfo & { return LAA.getInfo(&L); });
652  }
653 
654  void getAnalysisUsage(AnalysisUsage &AU) const override {
665  }
666 };
667 
668 } // end anonymous namespace
669 
671 
672 static const char LLE_name[] = "Loop Load Elimination";
673 
674 INITIALIZE_PASS_BEGIN(LoopLoadElimination, LLE_OPTION, LLE_name, false, false)
679 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
682 INITIALIZE_PASS_END(LoopLoadElimination, LLE_OPTION, LLE_name, false, false)
683 
685  return new LoopLoadElimination();
686 }
687 
690  auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
691  auto &LI = AM.getResult<LoopAnalysis>(F);
692  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
693  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
694  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
695  auto &AA = AM.getResult<AAManager>(F);
696  auto &AC = AM.getResult<AssumptionAnalysis>(F);
697  auto &MAM = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F).getManager();
698  auto *PSI = MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
699  auto *BFI = (PSI && PSI->hasProfileSummary()) ?
700  &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
702  ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA()
703  : nullptr;
704 
705  auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
706  bool Changed = eliminateLoadsAcrossLoops(
707  F, LI, DT, BFI, PSI, [&](Loop &L) -> const LoopAccessInfo & {
708  LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, MSSA};
709  return LAM.getResult<LoopAccessAnalysis>(L, AR);
710  });
711 
712  if (!Changed)
713  return PreservedAnalyses::all();
714 
716  return PA;
717 }
Legacy wrapper pass to provide the GlobalsAAResult object.
static bool Check(DecodeStatus &Out, DecodeStatus In)
uint64_t CallInst * C
TrackingVH< Value > PointerValue
Holds the pointer value that we need to check.
void printChecks(raw_ostream &OS, const SmallVectorImpl< PointerCheck > &Checks, unsigned Depth=0) const
Print Checks.
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:211
DenseMap< Instruction *, unsigned > generateInstructionOrderMap() const
Generate a mapping between the memory instructions and their indices according to program order...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
This is an alternative analysis pass to BlockFrequencyInfoWrapperPass.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:776
This class represents lattice values for constants.
Definition: AllocatorList.h:23
This is the interface for a simple mod/ref and alias analysis over globals.
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1239
static cl::opt< unsigned > LoadElimSCEVCheckThreshold("loop-load-elimination-scev-check-threshold", cl::init(8), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed for Loop " "Load Elimination"))
const RuntimePointerChecking * getRuntimePointerChecking() const
Analysis providing profile information.
int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of its element size.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:160
raw_ostream & indent(unsigned NumSpaces)
indent - Insert &#39;NumSpaces&#39; spaces.
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:116
Analysis pass providing the TargetTransformInfo.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1192
STATISTIC(NumFunctions, "Total number of functions")
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:230
F(f)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:580
An instruction for reading from memory.
Definition: Instructions.h:167
bool shouldOptimizeForSize(Function *F, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
Returns true if function F is suggested to be size-optimized base on the profile. ...
Definition: SizeOpts.cpp:23
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:221
AnalysisUsage & addRequired()
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:133
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
Type * getPointerElementType() const
Definition: Type.h:376
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
static bool isLoadConditional(LoadInst *Load, Loop *L)
Return true if the load is not executed on all paths in the loop.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:703
static const char LLE_name[]
void getLoopLatches(SmallVectorImpl< BlockT *> &LoopLatches) const
Return all loop latch blocks of this loop.
Definition: LoopInfo.h:311
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:1152
This file implements a class to represent arbitrary precision integral constant values and operations...
BlockT * getHeader() const
Definition: LoopInfo.h:102
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
const SmallVectorImpl< Instruction * > & getMemoryInstructions() const
The vector of memory access instructions.
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
This header provides classes for managing per-loop analyses.
void initializeLoopLoadEliminationPass(PassRegistry &)
An instruction for storing to memory.
Definition: Instructions.h:320
#define LLE_OPTION
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
bool hasProfileSummary()
Returns true if profile summary is available.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
static bool runOnFunction(Function &F, bool PostInlining)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
This analysis provides dependence information for the memory accesses of a loop.
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:91
const Instruction & front() const
Definition: BasicBlock.h:280
A manager for alias analyses.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
Represent the analysis usage information of a pass.
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
Value * getPointerOperand()
Definition: Instructions.h:284
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
void setAliasChecks(SmallVector< RuntimePointerChecking::PointerCheck, 4 > Checks)
Sets the runtime alias checks for versioning the loop.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
std::pair< const CheckingPtrGroup *, const CheckingPtrGroup * > PointerCheck
A memcheck which made up of a pair of grouped pointers.
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
size_t size() const
Definition: SmallVector.h:52
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
char & LoopSimplifyID
A function analysis which provides an AssumptionCache.
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
This header defines the LoopLoadEliminationPass object.
Analysis pass which computes BlockFrequencyInfo.
bool isAlwaysTrue() const override
Implementation of the SCEVPredicate interface.
const SCEVUnionPredicate & getUnionPredicate() const
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:1160
AnalysisUsage & addRequiredID(const void *ID)
Definition: Pass.cpp:314
const PointerInfo & getPointerInfo(unsigned PtrIdx) const
Return PointerInfo for pointer at index PtrIdx.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:248
Module.h This file contains the declarations for the Module class.
FunctionPass * createLoopLoadEliminationPass()
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:47
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:924
Drive the analysis of memory accesses in the loop.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
This class emits a version of the loop where run-time checks ensure that may-alias pointers can&#39;t ove...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
Class for arbitrary precision integers.
Definition: APInt.h:69
#define SEE(c)
Definition: regcomp.c:253
This class uses information about analyze scalars to rewrite expressions in canonical form...
static bool eliminateLoadsAcrossLoops(Function &F, LoopInfo &LI, DominatorTree &DT, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, function_ref< const LoopAccessInfo &(Loop &)> GetLAI)
static void getLazyBFIAnalysisUsage(AnalysisUsage &AU)
Helper for client passes to set up the analysis usage on behalf of this pass.
Analysis pass that exposes the ScalarEvolution for a function.
unsigned getComplexity() const override
We estimate the complexity of a union predicate as the size number of predicates in the union...
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:469
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to...
Definition: LoopInfo.cpp:425
This analysis provides dependence information for the memory accesses of a loop.
Type * getPointerOperandType() const
Definition: Instructions.h:287
cl::opt< bool > EnableMSSALoopDependency
Enables memory ssa as a dependency for loop passes.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:506
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
#define I(x, y, z)
Definition: MD5.cpp:58
bool hasConvergentOp() const
Return true if there is a convergent operation in the loop.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
raw_ostream & operator<<(raw_ostream &OS, const APInt &I)
Definition: APInt.h:2038
OutputIt transform(R &&Range, OutputIt d_first, UnaryPredicate P)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere...
Definition: STLExtras.h:1274
bool empty() const
Definition: LoopInfo.h:148
Analysis pass providing the TargetLibraryInfo.
iterator_range< df_iterator< T > > depth_first(const T &G)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static cl::opt< unsigned > CheckPerElim("runtime-check-per-loop-load-elim", cl::Hidden, cl::desc("Max number of memchecks allowed per eliminated load on average"), cl::init(1))
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:575
LLVM Value Representation.
Definition: Value.h:72
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:45
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
The legacy pass manager&#39;s analysis pass to compute loop information.
Definition: LoopInfo.h:1177
A container for analyses that lazily runs them and caches their results.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:259
This pass exposes codegen information to IR-level passes.
UnaryPredicate for_each(R &&Range, UnaryPredicate P)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1185
This header defines various interfaces for pass management in LLVM.
const SmallVector< PointerCheck, 4 > & getChecks() const
Returns the checks that generateChecks created.
#define LLVM_DEBUG(X)
Definition: Debug.h:122
static bool doesStoreDominatesAllLatches(BasicBlock *StoreBlock, Loop *L, DominatorTree *DT)
Check if the store dominates all latches, so as long as there is no intervening store this value will...
for(unsigned i=Desc.getNumOperands(), e=OldMI.getNumOperands();i !=e;++i)
Value * getPointerOperand()
Definition: Instructions.h:412
const BasicBlock * getParent() const
Definition: Instruction.h:66
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:1044