LLVM 18.0.0git
LoopLoadElimination.cpp
Go to the documentation of this file.
1//===- LoopLoadElimination.cpp - Loop Load Elimination Pass ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implement a loop-aware load elimination pass.
10//
11// It uses LoopAccessAnalysis to identify loop-carried dependences with a
12// distance of one between stores and loads. These form the candidates for the
13// transformation. The source value of each store then propagated to the user
14// of the corresponding load. This makes the load dead.
15//
16// The pass can also version the loop and add memchecks in order to prove that
17// may-aliasing stores can't change the value in memory before it's read by the
18// load.
19//
20//===----------------------------------------------------------------------===//
21
23#include "llvm/ADT/APInt.h"
24#include "llvm/ADT/DenseMap.h"
26#include "llvm/ADT/STLExtras.h"
29#include "llvm/ADT/Statistic.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/Dominators.h"
45#include "llvm/IR/Module.h"
46#include "llvm/IR/PassManager.h"
47#include "llvm/IR/Type.h"
48#include "llvm/IR/Value.h"
51#include "llvm/Support/Debug.h"
58#include <algorithm>
59#include <cassert>
60#include <forward_list>
61#include <tuple>
62#include <utility>
63
64using namespace llvm;
65
66#define LLE_OPTION "loop-load-elim"
67#define DEBUG_TYPE LLE_OPTION
68
70 "runtime-check-per-loop-load-elim", cl::Hidden,
71 cl::desc("Max number of memchecks allowed per eliminated load on average"),
72 cl::init(1));
73
75 "loop-load-elimination-scev-check-threshold", cl::init(8), cl::Hidden,
76 cl::desc("The maximum number of SCEV checks allowed for Loop "
77 "Load Elimination"));
78
79STATISTIC(NumLoopLoadEliminted, "Number of loads eliminated by LLE");
80
81namespace {
82
83/// Represent a store-to-forwarding candidate.
84struct StoreToLoadForwardingCandidate {
85 LoadInst *Load;
86 StoreInst *Store;
87
88 StoreToLoadForwardingCandidate(LoadInst *Load, StoreInst *Store)
89 : Load(Load), Store(Store) {}
90
91 /// Return true if the dependence from the store to the load has an
92 /// absolute distance of one.
93 /// E.g. A[i+1] = A[i] (or A[i-1] = A[i] for descending loop)
94 bool isDependenceDistanceOfOne(PredicatedScalarEvolution &PSE,
95 Loop *L) const {
96 Value *LoadPtr = Load->getPointerOperand();
97 Value *StorePtr = Store->getPointerOperand();
98 Type *LoadType = getLoadStoreType(Load);
99 auto &DL = Load->getParent()->getModule()->getDataLayout();
100
101 assert(LoadPtr->getType()->getPointerAddressSpace() ==
102 StorePtr->getType()->getPointerAddressSpace() &&
103 DL.getTypeSizeInBits(LoadType) ==
104 DL.getTypeSizeInBits(getLoadStoreType(Store)) &&
105 "Should be a known dependence");
106
107 int64_t StrideLoad = getPtrStride(PSE, LoadType, LoadPtr, L).value_or(0);
108 int64_t StrideStore = getPtrStride(PSE, LoadType, StorePtr, L).value_or(0);
109 if (!StrideLoad || !StrideStore || StrideLoad != StrideStore)
110 return false;
111
112 // TODO: This check for stride values other than 1 and -1 can be eliminated.
113 // However, doing so may cause the LoopAccessAnalysis to overcompensate,
114 // generating numerous non-wrap runtime checks that may undermine the
115 // benefits of load elimination. To safely implement support for non-unit
116 // strides, we would need to ensure either that the processed case does not
117 // require these additional checks, or improve the LAA to handle them more
118 // efficiently, or potentially both.
119 if (std::abs(StrideLoad) != 1)
120 return false;
121
122 unsigned TypeByteSize = DL.getTypeAllocSize(const_cast<Type *>(LoadType));
123
124 auto *LoadPtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(LoadPtr));
125 auto *StorePtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(StorePtr));
126
127 // We don't need to check non-wrapping here because forward/backward
128 // dependence wouldn't be valid if these weren't monotonic accesses.
129 auto *Dist = cast<SCEVConstant>(
130 PSE.getSE()->getMinusSCEV(StorePtrSCEV, LoadPtrSCEV));
131 const APInt &Val = Dist->getAPInt();
132 return Val == TypeByteSize * StrideLoad;
133 }
134
135 Value *getLoadPtr() const { return Load->getPointerOperand(); }
136
137#ifndef NDEBUG
139 const StoreToLoadForwardingCandidate &Cand) {
140 OS << *Cand.Store << " -->\n";
141 OS.indent(2) << *Cand.Load << "\n";
142 return OS;
143 }
144#endif
145};
146
147} // end anonymous namespace
148
149/// Check if the store dominates all latches, so as long as there is no
150/// intervening store this value will be loaded in the next iteration.
151static bool doesStoreDominatesAllLatches(BasicBlock *StoreBlock, Loop *L,
152 DominatorTree *DT) {
154 L->getLoopLatches(Latches);
155 return llvm::all_of(Latches, [&](const BasicBlock *Latch) {
156 return DT->dominates(StoreBlock, Latch);
157 });
158}
159
160/// Return true if the load is not executed on all paths in the loop.
161static bool isLoadConditional(LoadInst *Load, Loop *L) {
162 return Load->getParent() != L->getHeader();
163}
164
165namespace {
166
167/// The per-loop class that does most of the work.
168class LoadEliminationForLoop {
169public:
170 LoadEliminationForLoop(Loop *L, LoopInfo *LI, const LoopAccessInfo &LAI,
173 : L(L), LI(LI), LAI(LAI), DT(DT), BFI(BFI), PSI(PSI), PSE(LAI.getPSE()) {}
174
175 /// Look through the loop-carried and loop-independent dependences in
176 /// this loop and find store->load dependences.
177 ///
178 /// Note that no candidate is returned if LAA has failed to analyze the loop
179 /// (e.g. if it's not bottom-tested, contains volatile memops, etc.)
180 std::forward_list<StoreToLoadForwardingCandidate>
181 findStoreToLoadDependences(const LoopAccessInfo &LAI) {
182 std::forward_list<StoreToLoadForwardingCandidate> Candidates;
183
184 const auto *Deps = LAI.getDepChecker().getDependences();
185 if (!Deps)
186 return Candidates;
187
188 // Find store->load dependences (consequently true dep). Both lexically
189 // forward and backward dependences qualify. Disqualify loads that have
190 // other unknown dependences.
191
192 SmallPtrSet<Instruction *, 4> LoadsWithUnknownDepedence;
193
194 for (const auto &Dep : *Deps) {
195 Instruction *Source = Dep.getSource(LAI);
196 Instruction *Destination = Dep.getDestination(LAI);
197
200 if (isa<LoadInst>(Source))
201 LoadsWithUnknownDepedence.insert(Source);
202 if (isa<LoadInst>(Destination))
203 LoadsWithUnknownDepedence.insert(Destination);
204 continue;
205 }
206
207 if (Dep.isBackward())
208 // Note that the designations source and destination follow the program
209 // order, i.e. source is always first. (The direction is given by the
210 // DepType.)
211 std::swap(Source, Destination);
212 else
213 assert(Dep.isForward() && "Needs to be a forward dependence");
214
215 auto *Store = dyn_cast<StoreInst>(Source);
216 if (!Store)
217 continue;
218 auto *Load = dyn_cast<LoadInst>(Destination);
219 if (!Load)
220 continue;
221
222 // Only propagate if the stored values are bit/pointer castable.
225 Store->getParent()->getModule()->getDataLayout()))
226 continue;
227
228 Candidates.emplace_front(Load, Store);
229 }
230
231 if (!LoadsWithUnknownDepedence.empty())
232 Candidates.remove_if([&](const StoreToLoadForwardingCandidate &C) {
233 return LoadsWithUnknownDepedence.count(C.Load);
234 });
235
236 return Candidates;
237 }
238
239 /// Return the index of the instruction according to program order.
240 unsigned getInstrIndex(Instruction *Inst) {
241 auto I = InstOrder.find(Inst);
242 assert(I != InstOrder.end() && "No index for instruction");
243 return I->second;
244 }
245
246 /// If a load has multiple candidates associated (i.e. different
247 /// stores), it means that it could be forwarding from multiple stores
248 /// depending on control flow. Remove these candidates.
249 ///
250 /// Here, we rely on LAA to include the relevant loop-independent dependences.
251 /// LAA is known to omit these in the very simple case when the read and the
252 /// write within an alias set always takes place using the *same* pointer.
253 ///
254 /// However, we know that this is not the case here, i.e. we can rely on LAA
255 /// to provide us with loop-independent dependences for the cases we're
256 /// interested. Consider the case for example where a loop-independent
257 /// dependece S1->S2 invalidates the forwarding S3->S2.
258 ///
259 /// A[i] = ... (S1)
260 /// ... = A[i] (S2)
261 /// A[i+1] = ... (S3)
262 ///
263 /// LAA will perform dependence analysis here because there are two
264 /// *different* pointers involved in the same alias set (&A[i] and &A[i+1]).
265 void removeDependencesFromMultipleStores(
266 std::forward_list<StoreToLoadForwardingCandidate> &Candidates) {
267 // If Store is nullptr it means that we have multiple stores forwarding to
268 // this store.
269 using LoadToSingleCandT =
271 LoadToSingleCandT LoadToSingleCand;
272
273 for (const auto &Cand : Candidates) {
274 bool NewElt;
275 LoadToSingleCandT::iterator Iter;
276
277 std::tie(Iter, NewElt) =
278 LoadToSingleCand.insert(std::make_pair(Cand.Load, &Cand));
279 if (!NewElt) {
280 const StoreToLoadForwardingCandidate *&OtherCand = Iter->second;
281 // Already multiple stores forward to this load.
282 if (OtherCand == nullptr)
283 continue;
284
285 // Handle the very basic case when the two stores are in the same block
286 // so deciding which one forwards is easy. The later one forwards as
287 // long as they both have a dependence distance of one to the load.
288 if (Cand.Store->getParent() == OtherCand->Store->getParent() &&
289 Cand.isDependenceDistanceOfOne(PSE, L) &&
290 OtherCand->isDependenceDistanceOfOne(PSE, L)) {
291 // They are in the same block, the later one will forward to the load.
292 if (getInstrIndex(OtherCand->Store) < getInstrIndex(Cand.Store))
293 OtherCand = &Cand;
294 } else
295 OtherCand = nullptr;
296 }
297 }
298
299 Candidates.remove_if([&](const StoreToLoadForwardingCandidate &Cand) {
300 if (LoadToSingleCand[Cand.Load] != &Cand) {
301 LLVM_DEBUG(
302 dbgs() << "Removing from candidates: \n"
303 << Cand
304 << " The load may have multiple stores forwarding to "
305 << "it\n");
306 return true;
307 }
308 return false;
309 });
310 }
311
312 /// Given two pointers operations by their RuntimePointerChecking
313 /// indices, return true if they require an alias check.
314 ///
315 /// We need a check if one is a pointer for a candidate load and the other is
316 /// a pointer for a possibly intervening store.
317 bool needsChecking(unsigned PtrIdx1, unsigned PtrIdx2,
318 const SmallPtrSetImpl<Value *> &PtrsWrittenOnFwdingPath,
319 const SmallPtrSetImpl<Value *> &CandLoadPtrs) {
320 Value *Ptr1 =
322 Value *Ptr2 =
324 return ((PtrsWrittenOnFwdingPath.count(Ptr1) && CandLoadPtrs.count(Ptr2)) ||
325 (PtrsWrittenOnFwdingPath.count(Ptr2) && CandLoadPtrs.count(Ptr1)));
326 }
327
328 /// Return pointers that are possibly written to on the path from a
329 /// forwarding store to a load.
330 ///
331 /// These pointers need to be alias-checked against the forwarding candidates.
332 SmallPtrSet<Value *, 4> findPointersWrittenOnForwardingPath(
334 // From FirstStore to LastLoad neither of the elimination candidate loads
335 // should overlap with any of the stores.
336 //
337 // E.g.:
338 //
339 // st1 C[i]
340 // ld1 B[i] <-------,
341 // ld0 A[i] <----, | * LastLoad
342 // ... | |
343 // st2 E[i] | |
344 // st3 B[i+1] -- | -' * FirstStore
345 // st0 A[i+1] ---'
346 // st4 D[i]
347 //
348 // st0 forwards to ld0 if the accesses in st4 and st1 don't overlap with
349 // ld0.
350
351 LoadInst *LastLoad =
352 std::max_element(Candidates.begin(), Candidates.end(),
353 [&](const StoreToLoadForwardingCandidate &A,
354 const StoreToLoadForwardingCandidate &B) {
355 return getInstrIndex(A.Load) < getInstrIndex(B.Load);
356 })
357 ->Load;
358 StoreInst *FirstStore =
359 std::min_element(Candidates.begin(), Candidates.end(),
360 [&](const StoreToLoadForwardingCandidate &A,
361 const StoreToLoadForwardingCandidate &B) {
362 return getInstrIndex(A.Store) <
363 getInstrIndex(B.Store);
364 })
365 ->Store;
366
367 // We're looking for stores after the first forwarding store until the end
368 // of the loop, then from the beginning of the loop until the last
369 // forwarded-to load. Collect the pointer for the stores.
370 SmallPtrSet<Value *, 4> PtrsWrittenOnFwdingPath;
371
372 auto InsertStorePtr = [&](Instruction *I) {
373 if (auto *S = dyn_cast<StoreInst>(I))
374 PtrsWrittenOnFwdingPath.insert(S->getPointerOperand());
375 };
376 const auto &MemInstrs = LAI.getDepChecker().getMemoryInstructions();
377 std::for_each(MemInstrs.begin() + getInstrIndex(FirstStore) + 1,
378 MemInstrs.end(), InsertStorePtr);
379 std::for_each(MemInstrs.begin(), &MemInstrs[getInstrIndex(LastLoad)],
380 InsertStorePtr);
381
382 return PtrsWrittenOnFwdingPath;
383 }
384
385 /// Determine the pointer alias checks to prove that there are no
386 /// intervening stores.
389
390 SmallPtrSet<Value *, 4> PtrsWrittenOnFwdingPath =
391 findPointersWrittenOnForwardingPath(Candidates);
392
393 // Collect the pointers of the candidate loads.
394 SmallPtrSet<Value *, 4> CandLoadPtrs;
395 for (const auto &Candidate : Candidates)
396 CandLoadPtrs.insert(Candidate.getLoadPtr());
397
398 const auto &AllChecks = LAI.getRuntimePointerChecking()->getChecks();
400
401 copy_if(AllChecks, std::back_inserter(Checks),
402 [&](const RuntimePointerCheck &Check) {
403 for (auto PtrIdx1 : Check.first->Members)
404 for (auto PtrIdx2 : Check.second->Members)
405 if (needsChecking(PtrIdx1, PtrIdx2, PtrsWrittenOnFwdingPath,
406 CandLoadPtrs))
407 return true;
408 return false;
409 });
410
411 LLVM_DEBUG(dbgs() << "\nPointer Checks (count: " << Checks.size()
412 << "):\n");
414
415 return Checks;
416 }
417
418 /// Perform the transformation for a candidate.
419 void
420 propagateStoredValueToLoadUsers(const StoreToLoadForwardingCandidate &Cand,
421 SCEVExpander &SEE) {
422 // loop:
423 // %x = load %gep_i
424 // = ... %x
425 // store %y, %gep_i_plus_1
426 //
427 // =>
428 //
429 // ph:
430 // %x.initial = load %gep_0
431 // loop:
432 // %x.storeforward = phi [%x.initial, %ph] [%y, %loop]
433 // %x = load %gep_i <---- now dead
434 // = ... %x.storeforward
435 // store %y, %gep_i_plus_1
436
437 Value *Ptr = Cand.Load->getPointerOperand();
438 auto *PtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(Ptr));
439 auto *PH = L->getLoopPreheader();
440 assert(PH && "Preheader should exist!");
441 Value *InitialPtr = SEE.expandCodeFor(PtrSCEV->getStart(), Ptr->getType(),
442 PH->getTerminator());
443 Value *Initial = new LoadInst(
444 Cand.Load->getType(), InitialPtr, "load_initial",
445 /* isVolatile */ false, Cand.Load->getAlign(), PH->getTerminator());
446
447 PHINode *PHI = PHINode::Create(Initial->getType(), 2, "store_forwarded");
448 PHI->insertBefore(L->getHeader()->begin());
449 PHI->addIncoming(Initial, PH);
450
451 Type *LoadType = Initial->getType();
452 Type *StoreType = Cand.Store->getValueOperand()->getType();
453 auto &DL = Cand.Load->getParent()->getModule()->getDataLayout();
454 (void)DL;
455
456 assert(DL.getTypeSizeInBits(LoadType) == DL.getTypeSizeInBits(StoreType) &&
457 "The type sizes should match!");
458
459 Value *StoreValue = Cand.Store->getValueOperand();
460 if (LoadType != StoreType)
462 StoreValue, LoadType, "store_forward_cast", Cand.Store);
463
464 PHI->addIncoming(StoreValue, L->getLoopLatch());
465
466 Cand.Load->replaceAllUsesWith(PHI);
467 }
468
469 /// Top-level driver for each loop: find store->load forwarding
470 /// candidates, add run-time checks and perform transformation.
471 bool processLoop() {
472 LLVM_DEBUG(dbgs() << "\nIn \"" << L->getHeader()->getParent()->getName()
473 << "\" checking " << *L << "\n");
474
475 // Look for store-to-load forwarding cases across the
476 // backedge. E.g.:
477 //
478 // loop:
479 // %x = load %gep_i
480 // = ... %x
481 // store %y, %gep_i_plus_1
482 //
483 // =>
484 //
485 // ph:
486 // %x.initial = load %gep_0
487 // loop:
488 // %x.storeforward = phi [%x.initial, %ph] [%y, %loop]
489 // %x = load %gep_i <---- now dead
490 // = ... %x.storeforward
491 // store %y, %gep_i_plus_1
492
493 // First start with store->load dependences.
494 auto StoreToLoadDependences = findStoreToLoadDependences(LAI);
495 if (StoreToLoadDependences.empty())
496 return false;
497
498 // Generate an index for each load and store according to the original
499 // program order. This will be used later.
500 InstOrder = LAI.getDepChecker().generateInstructionOrderMap();
501
502 // To keep things simple for now, remove those where the load is potentially
503 // fed by multiple stores.
504 removeDependencesFromMultipleStores(StoreToLoadDependences);
505 if (StoreToLoadDependences.empty())
506 return false;
507
508 // Filter the candidates further.
510 for (const StoreToLoadForwardingCandidate &Cand : StoreToLoadDependences) {
511 LLVM_DEBUG(dbgs() << "Candidate " << Cand);
512
513 // Make sure that the stored values is available everywhere in the loop in
514 // the next iteration.
515 if (!doesStoreDominatesAllLatches(Cand.Store->getParent(), L, DT))
516 continue;
517
518 // If the load is conditional we can't hoist its 0-iteration instance to
519 // the preheader because that would make it unconditional. Thus we would
520 // access a memory location that the original loop did not access.
521 if (isLoadConditional(Cand.Load, L))
522 continue;
523
524 // Check whether the SCEV difference is the same as the induction step,
525 // thus we load the value in the next iteration.
526 if (!Cand.isDependenceDistanceOfOne(PSE, L))
527 continue;
528
529 assert(isa<SCEVAddRecExpr>(PSE.getSCEV(Cand.Load->getPointerOperand())) &&
530 "Loading from something other than indvar?");
531 assert(
532 isa<SCEVAddRecExpr>(PSE.getSCEV(Cand.Store->getPointerOperand())) &&
533 "Storing to something other than indvar?");
534
535 Candidates.push_back(Cand);
537 dbgs()
538 << Candidates.size()
539 << ". Valid store-to-load forwarding across the loop backedge\n");
540 }
541 if (Candidates.empty())
542 return false;
543
544 // Check intervening may-alias stores. These need runtime checks for alias
545 // disambiguation.
546 SmallVector<RuntimePointerCheck, 4> Checks = collectMemchecks(Candidates);
547
548 // Too many checks are likely to outweigh the benefits of forwarding.
549 if (Checks.size() > Candidates.size() * CheckPerElim) {
550 LLVM_DEBUG(dbgs() << "Too many run-time checks needed.\n");
551 return false;
552 }
553
554 if (LAI.getPSE().getPredicate().getComplexity() >
556 LLVM_DEBUG(dbgs() << "Too many SCEV run-time checks needed.\n");
557 return false;
558 }
559
560 if (!L->isLoopSimplifyForm()) {
561 LLVM_DEBUG(dbgs() << "Loop is not is loop-simplify form");
562 return false;
563 }
564
565 if (!Checks.empty() || !LAI.getPSE().getPredicate().isAlwaysTrue()) {
566 if (LAI.hasConvergentOp()) {
567 LLVM_DEBUG(dbgs() << "Versioning is needed but not allowed with "
568 "convergent calls\n");
569 return false;
570 }
571
572 auto *HeaderBB = L->getHeader();
573 auto *F = HeaderBB->getParent();
574 bool OptForSize = F->hasOptSize() ||
575 llvm::shouldOptimizeForSize(HeaderBB, PSI, BFI,
576 PGSOQueryType::IRPass);
577 if (OptForSize) {
579 dbgs() << "Versioning is needed but not allowed when optimizing "
580 "for size.\n");
581 return false;
582 }
583
584 // Point of no-return, start the transformation. First, version the loop
585 // if necessary.
586
587 LoopVersioning LV(LAI, Checks, L, LI, DT, PSE.getSE());
588 LV.versionLoop();
589
590 // After versioning, some of the candidates' pointers could stop being
591 // SCEVAddRecs. We need to filter them out.
592 auto NoLongerGoodCandidate = [this](
593 const StoreToLoadForwardingCandidate &Cand) {
594 return !isa<SCEVAddRecExpr>(
595 PSE.getSCEV(Cand.Load->getPointerOperand())) ||
596 !isa<SCEVAddRecExpr>(
597 PSE.getSCEV(Cand.Store->getPointerOperand()));
598 };
599 llvm::erase_if(Candidates, NoLongerGoodCandidate);
600 }
601
602 // Next, propagate the value stored by the store to the users of the load.
603 // Also for the first iteration, generate the initial value of the load.
604 SCEVExpander SEE(*PSE.getSE(), L->getHeader()->getModule()->getDataLayout(),
605 "storeforward");
606 for (const auto &Cand : Candidates)
607 propagateStoredValueToLoadUsers(Cand, SEE);
608 NumLoopLoadEliminted += Candidates.size();
609
610 return true;
611 }
612
613private:
614 Loop *L;
615
616 /// Maps the load/store instructions to their index according to
617 /// program order.
619
620 // Analyses used.
621 LoopInfo *LI;
622 const LoopAccessInfo &LAI;
623 DominatorTree *DT;
627};
628
629} // end anonymous namespace
630
632 DominatorTree &DT,
636 LoopAccessInfoManager &LAIs) {
637 // Build up a worklist of inner-loops to transform to avoid iterator
638 // invalidation.
639 // FIXME: This logic comes from other passes that actually change the loop
640 // nest structure. It isn't clear this is necessary (or useful) for a pass
641 // which merely optimizes the use of loads in a loop.
642 SmallVector<Loop *, 8> Worklist;
643
644 bool Changed = false;
645
646 for (Loop *TopLevelLoop : LI)
647 for (Loop *L : depth_first(TopLevelLoop)) {
648 Changed |= simplifyLoop(L, &DT, &LI, SE, AC, /*MSSAU*/ nullptr, false);
649 // We only handle inner-most loops.
650 if (L->isInnermost())
651 Worklist.push_back(L);
652 }
653
654 // Now walk the identified inner loops.
655 for (Loop *L : Worklist) {
656 // Match historical behavior
657 if (!L->isRotatedForm() || !L->getExitingBlock())
658 continue;
659 // The actual work is performed by LoadEliminationForLoop.
660 LoadEliminationForLoop LEL(L, &LI, LAIs.getInfo(*L), &DT, BFI, PSI);
661 Changed |= LEL.processLoop();
662 if (Changed)
663 LAIs.clear();
664 }
665 return Changed;
666}
667
670 auto &LI = AM.getResult<LoopAnalysis>(F);
671 // There are no loops in the function. Return before computing other expensive
672 // analyses.
673 if (LI.empty())
674 return PreservedAnalyses::all();
675 auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
676 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
677 auto &AC = AM.getResult<AssumptionAnalysis>(F);
678 auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
679 auto *PSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
680 auto *BFI = (PSI && PSI->hasProfileSummary()) ?
681 &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
683
684 bool Changed = eliminateLoadsAcrossLoops(F, LI, DT, BFI, PSI, &SE, &AC, LAIs);
685
686 if (!Changed)
687 return PreservedAnalyses::all();
688
692 return PA;
693}
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
#define Check(C,...)
This is the interface for a simple mod/ref and alias analysis over globals.
This header provides classes for managing per-loop analyses.
static bool eliminateLoadsAcrossLoops(Function &F, LoopInfo &LI, DominatorTree &DT, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, ScalarEvolution *SE, AssumptionCache *AC, LoopAccessInfoManager &LAIs)
static cl::opt< unsigned > LoadElimSCEVCheckThreshold("loop-load-elimination-scev-check-threshold", cl::init(8), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed for Loop " "Load Elimination"))
static bool isLoadConditional(LoadInst *Load, Loop *L)
Return true if the load is not executed on all paths in the loop.
static bool doesStoreDominatesAllLatches(BasicBlock *StoreBlock, Loop *L, DominatorTree *DT)
Check if the store dominates all latches, so as long as there is no intervening store this value will...
static cl::opt< unsigned > CheckPerElim("runtime-check-per-loop-load-elim", cl::Hidden, cl::desc("Max number of memchecks allowed per eliminated load on average"), cl::init(1))
This header defines the LoopLoadEliminationPass object.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Module.h This file contains the declarations for the Module class.
if(VerifyEach)
This header defines various interfaces for pass management in LLVM.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This pass exposes codegen information to IR-level passes.
Class for arbitrary precision integers.
Definition: APInt.h:76
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:649
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:803
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
static CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
static bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:277
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:164
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:123
An instruction for reading from memory.
Definition: Instructions.h:177
This analysis provides dependence information for the memory accesses of a loop.
const LoopAccessInfo & getInfo(Loop &L)
Drive the analysis of memory accesses in the loop.
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
const RuntimePointerChecking * getRuntimePointerChecking() const
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
bool hasConvergentOp() const
Return true if there is a convergent operation in the loop.
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:566
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
const SmallVectorImpl< Instruction * > & getMemoryInstructions() const
The vector of memory access instructions.
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
DenseMap< Instruction *, unsigned > generateInstructionOrderMap() const
Generate a mapping between the memory instructions and their indices according to program order.
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:1087
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
const SCEVPredicate & getPredicate() const
const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:172
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:178
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:193
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
void printChecks(raw_ostream &OS, const SmallVectorImpl< RuntimePointerCheck > &Checks, unsigned Depth=0) const
Print Checks.
const SmallVectorImpl< RuntimePointerCheck > & getChecks() const
Returns the checks that generateChecks created.
const PointerInfo & getPointerInfo(unsigned PtrIdx) const
Return PointerInfo for pointer at index PtrIdx.
This class uses information about analyze scalars to rewrite expressions in canonical form.
virtual unsigned getComplexity() const
Returns the estimated complexity of this predicate.
virtual bool isAlwaysTrue() const =0
Returns true if the predicate is always true.
Analysis pass that exposes the ScalarEvolution for a function.
The main scalar evolution driver.
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:345
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:384
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:366
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:451
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
An instruction for storing to memory.
Definition: Instructions.h:301
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
raw_ostream & indent(unsigned NumSpaces)
indent - Insert 'NumSpaces' spaces.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1726
std::pair< const RuntimeCheckingPtrGroup *, const RuntimeCheckingPtrGroup * > RuntimePointerCheck
A memcheck which made up of a pair of grouped pointers.
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1772
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:292
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2020
iterator_range< df_iterator< T > > depth_first(const T &G)
Type * getLoadStoreType(Value *I)
A helper function that returns the type of a load or store instruction.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define SEE(c)
Definition: regcomp.c:254
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
TrackingVH< Value > PointerValue
Holds the pointer value that we need to check.