LLVM 20.0.0git
LoopLoadElimination.cpp
Go to the documentation of this file.
1//===- LoopLoadElimination.cpp - Loop Load Elimination Pass ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implement a loop-aware load elimination pass.
10//
11// It uses LoopAccessAnalysis to identify loop-carried dependences with a
12// distance of one between stores and loads. These form the candidates for the
13// transformation. The source value of each store then propagated to the user
14// of the corresponding load. This makes the load dead.
15//
16// The pass can also version the loop and add memchecks in order to prove that
17// may-aliasing stores can't change the value in memory before it's read by the
18// load.
19//
20//===----------------------------------------------------------------------===//
21
23#include "llvm/ADT/APInt.h"
24#include "llvm/ADT/DenseMap.h"
26#include "llvm/ADT/STLExtras.h"
29#include "llvm/ADT/Statistic.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/Dominators.h"
45#include "llvm/IR/PassManager.h"
46#include "llvm/IR/Type.h"
47#include "llvm/IR/Value.h"
50#include "llvm/Support/Debug.h"
56#include <algorithm>
57#include <cassert>
58#include <forward_list>
59#include <tuple>
60#include <utility>
61
62using namespace llvm;
63
64#define LLE_OPTION "loop-load-elim"
65#define DEBUG_TYPE LLE_OPTION
66
68 "runtime-check-per-loop-load-elim", cl::Hidden,
69 cl::desc("Max number of memchecks allowed per eliminated load on average"),
70 cl::init(1));
71
73 "loop-load-elimination-scev-check-threshold", cl::init(8), cl::Hidden,
74 cl::desc("The maximum number of SCEV checks allowed for Loop "
75 "Load Elimination"));
76
77STATISTIC(NumLoopLoadEliminted, "Number of loads eliminated by LLE");
78
79namespace {
80
81/// Represent a store-to-forwarding candidate.
82struct StoreToLoadForwardingCandidate {
83 LoadInst *Load;
84 StoreInst *Store;
85
86 StoreToLoadForwardingCandidate(LoadInst *Load, StoreInst *Store)
87 : Load(Load), Store(Store) {}
88
89 /// Return true if the dependence from the store to the load has an
90 /// absolute distance of one.
91 /// E.g. A[i+1] = A[i] (or A[i-1] = A[i] for descending loop)
92 bool isDependenceDistanceOfOne(PredicatedScalarEvolution &PSE,
93 Loop *L) const {
94 Value *LoadPtr = Load->getPointerOperand();
95 Value *StorePtr = Store->getPointerOperand();
96 Type *LoadType = getLoadStoreType(Load);
97 auto &DL = Load->getDataLayout();
98
100 StorePtr->getType()->getPointerAddressSpace() &&
101 DL.getTypeSizeInBits(LoadType) ==
102 DL.getTypeSizeInBits(getLoadStoreType(Store)) &&
103 "Should be a known dependence");
104
105 int64_t StrideLoad = getPtrStride(PSE, LoadType, LoadPtr, L).value_or(0);
106 int64_t StrideStore = getPtrStride(PSE, LoadType, StorePtr, L).value_or(0);
107 if (!StrideLoad || !StrideStore || StrideLoad != StrideStore)
108 return false;
109
110 // TODO: This check for stride values other than 1 and -1 can be eliminated.
111 // However, doing so may cause the LoopAccessAnalysis to overcompensate,
112 // generating numerous non-wrap runtime checks that may undermine the
113 // benefits of load elimination. To safely implement support for non-unit
114 // strides, we would need to ensure either that the processed case does not
115 // require these additional checks, or improve the LAA to handle them more
116 // efficiently, or potentially both.
117 if (std::abs(StrideLoad) != 1)
118 return false;
119
120 unsigned TypeByteSize = DL.getTypeAllocSize(const_cast<Type *>(LoadType));
121
122 auto *LoadPtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(LoadPtr));
123 auto *StorePtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(StorePtr));
124
125 // We don't need to check non-wrapping here because forward/backward
126 // dependence wouldn't be valid if these weren't monotonic accesses.
127 auto *Dist = dyn_cast<SCEVConstant>(
128 PSE.getSE()->getMinusSCEV(StorePtrSCEV, LoadPtrSCEV));
129 if (!Dist)
130 return false;
131 const APInt &Val = Dist->getAPInt();
132 return Val == TypeByteSize * StrideLoad;
133 }
134
135 Value *getLoadPtr() const { return Load->getPointerOperand(); }
136
137#ifndef NDEBUG
139 const StoreToLoadForwardingCandidate &Cand) {
140 OS << *Cand.Store << " -->\n";
141 OS.indent(2) << *Cand.Load << "\n";
142 return OS;
143 }
144#endif
145};
146
147} // end anonymous namespace
148
149/// Check if the store dominates all latches, so as long as there is no
150/// intervening store this value will be loaded in the next iteration.
151static bool doesStoreDominatesAllLatches(BasicBlock *StoreBlock, Loop *L,
152 DominatorTree *DT) {
154 L->getLoopLatches(Latches);
155 return llvm::all_of(Latches, [&](const BasicBlock *Latch) {
156 return DT->dominates(StoreBlock, Latch);
157 });
158}
159
160/// Return true if the load is not executed on all paths in the loop.
161static bool isLoadConditional(LoadInst *Load, Loop *L) {
162 return Load->getParent() != L->getHeader();
163}
164
165namespace {
166
167/// The per-loop class that does most of the work.
168class LoadEliminationForLoop {
169public:
170 LoadEliminationForLoop(Loop *L, LoopInfo *LI, const LoopAccessInfo &LAI,
173 : L(L), LI(LI), LAI(LAI), DT(DT), BFI(BFI), PSI(PSI), PSE(LAI.getPSE()) {}
174
175 /// Look through the loop-carried and loop-independent dependences in
176 /// this loop and find store->load dependences.
177 ///
178 /// Note that no candidate is returned if LAA has failed to analyze the loop
179 /// (e.g. if it's not bottom-tested, contains volatile memops, etc.)
180 std::forward_list<StoreToLoadForwardingCandidate>
181 findStoreToLoadDependences(const LoopAccessInfo &LAI) {
182 std::forward_list<StoreToLoadForwardingCandidate> Candidates;
183
184 const auto &DepChecker = LAI.getDepChecker();
185 const auto *Deps = DepChecker.getDependences();
186 if (!Deps)
187 return Candidates;
188
189 // Find store->load dependences (consequently true dep). Both lexically
190 // forward and backward dependences qualify. Disqualify loads that have
191 // other unknown dependences.
192
193 SmallPtrSet<Instruction *, 4> LoadsWithUnknownDependence;
194
195 for (const auto &Dep : *Deps) {
196 Instruction *Source = Dep.getSource(DepChecker);
197 Instruction *Destination = Dep.getDestination(DepChecker);
198
201 if (isa<LoadInst>(Source))
202 LoadsWithUnknownDependence.insert(Source);
203 if (isa<LoadInst>(Destination))
204 LoadsWithUnknownDependence.insert(Destination);
205 continue;
206 }
207
208 if (Dep.isBackward())
209 // Note that the designations source and destination follow the program
210 // order, i.e. source is always first. (The direction is given by the
211 // DepType.)
212 std::swap(Source, Destination);
213 else
214 assert(Dep.isForward() && "Needs to be a forward dependence");
215
216 auto *Store = dyn_cast<StoreInst>(Source);
217 if (!Store)
218 continue;
219 auto *Load = dyn_cast<LoadInst>(Destination);
220 if (!Load)
221 continue;
222
223 // Only propagate if the stored values are bit/pointer castable.
226 Store->getDataLayout()))
227 continue;
228
229 Candidates.emplace_front(Load, Store);
230 }
231
232 if (!LoadsWithUnknownDependence.empty())
233 Candidates.remove_if([&](const StoreToLoadForwardingCandidate &C) {
234 return LoadsWithUnknownDependence.count(C.Load);
235 });
236
237 return Candidates;
238 }
239
240 /// Return the index of the instruction according to program order.
241 unsigned getInstrIndex(Instruction *Inst) {
242 auto I = InstOrder.find(Inst);
243 assert(I != InstOrder.end() && "No index for instruction");
244 return I->second;
245 }
246
247 /// If a load has multiple candidates associated (i.e. different
248 /// stores), it means that it could be forwarding from multiple stores
249 /// depending on control flow. Remove these candidates.
250 ///
251 /// Here, we rely on LAA to include the relevant loop-independent dependences.
252 /// LAA is known to omit these in the very simple case when the read and the
253 /// write within an alias set always takes place using the *same* pointer.
254 ///
255 /// However, we know that this is not the case here, i.e. we can rely on LAA
256 /// to provide us with loop-independent dependences for the cases we're
257 /// interested. Consider the case for example where a loop-independent
258 /// dependece S1->S2 invalidates the forwarding S3->S2.
259 ///
260 /// A[i] = ... (S1)
261 /// ... = A[i] (S2)
262 /// A[i+1] = ... (S3)
263 ///
264 /// LAA will perform dependence analysis here because there are two
265 /// *different* pointers involved in the same alias set (&A[i] and &A[i+1]).
266 void removeDependencesFromMultipleStores(
267 std::forward_list<StoreToLoadForwardingCandidate> &Candidates) {
268 // If Store is nullptr it means that we have multiple stores forwarding to
269 // this store.
270 using LoadToSingleCandT =
272 LoadToSingleCandT LoadToSingleCand;
273
274 for (const auto &Cand : Candidates) {
275 bool NewElt;
276 LoadToSingleCandT::iterator Iter;
277
278 std::tie(Iter, NewElt) =
279 LoadToSingleCand.insert(std::make_pair(Cand.Load, &Cand));
280 if (!NewElt) {
281 const StoreToLoadForwardingCandidate *&OtherCand = Iter->second;
282 // Already multiple stores forward to this load.
283 if (OtherCand == nullptr)
284 continue;
285
286 // Handle the very basic case when the two stores are in the same block
287 // so deciding which one forwards is easy. The later one forwards as
288 // long as they both have a dependence distance of one to the load.
289 if (Cand.Store->getParent() == OtherCand->Store->getParent() &&
290 Cand.isDependenceDistanceOfOne(PSE, L) &&
291 OtherCand->isDependenceDistanceOfOne(PSE, L)) {
292 // They are in the same block, the later one will forward to the load.
293 if (getInstrIndex(OtherCand->Store) < getInstrIndex(Cand.Store))
294 OtherCand = &Cand;
295 } else
296 OtherCand = nullptr;
297 }
298 }
299
300 Candidates.remove_if([&](const StoreToLoadForwardingCandidate &Cand) {
301 if (LoadToSingleCand[Cand.Load] != &Cand) {
302 LLVM_DEBUG(
303 dbgs() << "Removing from candidates: \n"
304 << Cand
305 << " The load may have multiple stores forwarding to "
306 << "it\n");
307 return true;
308 }
309 return false;
310 });
311 }
312
313 /// Given two pointers operations by their RuntimePointerChecking
314 /// indices, return true if they require an alias check.
315 ///
316 /// We need a check if one is a pointer for a candidate load and the other is
317 /// a pointer for a possibly intervening store.
318 bool needsChecking(unsigned PtrIdx1, unsigned PtrIdx2,
319 const SmallPtrSetImpl<Value *> &PtrsWrittenOnFwdingPath,
320 const SmallPtrSetImpl<Value *> &CandLoadPtrs) {
321 Value *Ptr1 =
323 Value *Ptr2 =
325 return ((PtrsWrittenOnFwdingPath.count(Ptr1) && CandLoadPtrs.count(Ptr2)) ||
326 (PtrsWrittenOnFwdingPath.count(Ptr2) && CandLoadPtrs.count(Ptr1)));
327 }
328
329 /// Return pointers that are possibly written to on the path from a
330 /// forwarding store to a load.
331 ///
332 /// These pointers need to be alias-checked against the forwarding candidates.
333 SmallPtrSet<Value *, 4> findPointersWrittenOnForwardingPath(
335 // From FirstStore to LastLoad neither of the elimination candidate loads
336 // should overlap with any of the stores.
337 //
338 // E.g.:
339 //
340 // st1 C[i]
341 // ld1 B[i] <-------,
342 // ld0 A[i] <----, | * LastLoad
343 // ... | |
344 // st2 E[i] | |
345 // st3 B[i+1] -- | -' * FirstStore
346 // st0 A[i+1] ---'
347 // st4 D[i]
348 //
349 // st0 forwards to ld0 if the accesses in st4 and st1 don't overlap with
350 // ld0.
351
352 LoadInst *LastLoad =
353 llvm::max_element(Candidates,
354 [&](const StoreToLoadForwardingCandidate &A,
355 const StoreToLoadForwardingCandidate &B) {
356 return getInstrIndex(A.Load) <
357 getInstrIndex(B.Load);
358 })
359 ->Load;
360 StoreInst *FirstStore =
361 llvm::min_element(Candidates,
362 [&](const StoreToLoadForwardingCandidate &A,
363 const StoreToLoadForwardingCandidate &B) {
364 return getInstrIndex(A.Store) <
365 getInstrIndex(B.Store);
366 })
367 ->Store;
368
369 // We're looking for stores after the first forwarding store until the end
370 // of the loop, then from the beginning of the loop until the last
371 // forwarded-to load. Collect the pointer for the stores.
372 SmallPtrSet<Value *, 4> PtrsWrittenOnFwdingPath;
373
374 auto InsertStorePtr = [&](Instruction *I) {
375 if (auto *S = dyn_cast<StoreInst>(I))
376 PtrsWrittenOnFwdingPath.insert(S->getPointerOperand());
377 };
378 const auto &MemInstrs = LAI.getDepChecker().getMemoryInstructions();
379 std::for_each(MemInstrs.begin() + getInstrIndex(FirstStore) + 1,
380 MemInstrs.end(), InsertStorePtr);
381 std::for_each(MemInstrs.begin(), &MemInstrs[getInstrIndex(LastLoad)],
382 InsertStorePtr);
383
384 return PtrsWrittenOnFwdingPath;
385 }
386
387 /// Determine the pointer alias checks to prove that there are no
388 /// intervening stores.
391
392 SmallPtrSet<Value *, 4> PtrsWrittenOnFwdingPath =
393 findPointersWrittenOnForwardingPath(Candidates);
394
395 // Collect the pointers of the candidate loads.
396 SmallPtrSet<Value *, 4> CandLoadPtrs;
397 for (const auto &Candidate : Candidates)
398 CandLoadPtrs.insert(Candidate.getLoadPtr());
399
400 const auto &AllChecks = LAI.getRuntimePointerChecking()->getChecks();
402
403 copy_if(AllChecks, std::back_inserter(Checks),
404 [&](const RuntimePointerCheck &Check) {
405 for (auto PtrIdx1 : Check.first->Members)
406 for (auto PtrIdx2 : Check.second->Members)
407 if (needsChecking(PtrIdx1, PtrIdx2, PtrsWrittenOnFwdingPath,
408 CandLoadPtrs))
409 return true;
410 return false;
411 });
412
413 LLVM_DEBUG(dbgs() << "\nPointer Checks (count: " << Checks.size()
414 << "):\n");
416
417 return Checks;
418 }
419
420 /// Perform the transformation for a candidate.
421 void
422 propagateStoredValueToLoadUsers(const StoreToLoadForwardingCandidate &Cand,
423 SCEVExpander &SEE) {
424 // loop:
425 // %x = load %gep_i
426 // = ... %x
427 // store %y, %gep_i_plus_1
428 //
429 // =>
430 //
431 // ph:
432 // %x.initial = load %gep_0
433 // loop:
434 // %x.storeforward = phi [%x.initial, %ph] [%y, %loop]
435 // %x = load %gep_i <---- now dead
436 // = ... %x.storeforward
437 // store %y, %gep_i_plus_1
438
439 Value *Ptr = Cand.Load->getPointerOperand();
440 auto *PtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(Ptr));
441 auto *PH = L->getLoopPreheader();
442 assert(PH && "Preheader should exist!");
443 Value *InitialPtr = SEE.expandCodeFor(PtrSCEV->getStart(), Ptr->getType(),
444 PH->getTerminator());
445 Value *Initial =
446 new LoadInst(Cand.Load->getType(), InitialPtr, "load_initial",
447 /* isVolatile */ false, Cand.Load->getAlign(),
448 PH->getTerminator()->getIterator());
449 // We don't give any debug location to Initial, because it is inserted
450 // into the loop's preheader. A debug location inside the loop will cause
451 // a misleading stepping when debugging. The test update-debugloc-store
452 // -forwarded.ll checks this.
453
454 PHINode *PHI = PHINode::Create(Initial->getType(), 2, "store_forwarded");
455 PHI->insertBefore(L->getHeader()->begin());
456 PHI->addIncoming(Initial, PH);
457
458 Type *LoadType = Initial->getType();
459 Type *StoreType = Cand.Store->getValueOperand()->getType();
460 auto &DL = Cand.Load->getDataLayout();
461 (void)DL;
462
463 assert(DL.getTypeSizeInBits(LoadType) == DL.getTypeSizeInBits(StoreType) &&
464 "The type sizes should match!");
465
466 Value *StoreValue = Cand.Store->getValueOperand();
467 if (LoadType != StoreType) {
468 StoreValue = CastInst::CreateBitOrPointerCast(StoreValue, LoadType,
469 "store_forward_cast",
470 Cand.Store->getIterator());
471 // Because it casts the old `load` value and is used by the new `phi`
472 // which replaces the old `load`, we give the `load`'s debug location
473 // to it.
474 cast<Instruction>(StoreValue)->setDebugLoc(Cand.Load->getDebugLoc());
475 }
476
477 PHI->addIncoming(StoreValue, L->getLoopLatch());
478
479 Cand.Load->replaceAllUsesWith(PHI);
480 PHI->setDebugLoc(Cand.Load->getDebugLoc());
481 }
482
483 /// Top-level driver for each loop: find store->load forwarding
484 /// candidates, add run-time checks and perform transformation.
485 bool processLoop() {
486 LLVM_DEBUG(dbgs() << "\nIn \"" << L->getHeader()->getParent()->getName()
487 << "\" checking " << *L << "\n");
488
489 // Look for store-to-load forwarding cases across the
490 // backedge. E.g.:
491 //
492 // loop:
493 // %x = load %gep_i
494 // = ... %x
495 // store %y, %gep_i_plus_1
496 //
497 // =>
498 //
499 // ph:
500 // %x.initial = load %gep_0
501 // loop:
502 // %x.storeforward = phi [%x.initial, %ph] [%y, %loop]
503 // %x = load %gep_i <---- now dead
504 // = ... %x.storeforward
505 // store %y, %gep_i_plus_1
506
507 // First start with store->load dependences.
508 auto StoreToLoadDependences = findStoreToLoadDependences(LAI);
509 if (StoreToLoadDependences.empty())
510 return false;
511
512 // Generate an index for each load and store according to the original
513 // program order. This will be used later.
514 InstOrder = LAI.getDepChecker().generateInstructionOrderMap();
515
516 // To keep things simple for now, remove those where the load is potentially
517 // fed by multiple stores.
518 removeDependencesFromMultipleStores(StoreToLoadDependences);
519 if (StoreToLoadDependences.empty())
520 return false;
521
522 // Filter the candidates further.
524 for (const StoreToLoadForwardingCandidate &Cand : StoreToLoadDependences) {
525 LLVM_DEBUG(dbgs() << "Candidate " << Cand);
526
527 // Make sure that the stored values is available everywhere in the loop in
528 // the next iteration.
529 if (!doesStoreDominatesAllLatches(Cand.Store->getParent(), L, DT))
530 continue;
531
532 // If the load is conditional we can't hoist its 0-iteration instance to
533 // the preheader because that would make it unconditional. Thus we would
534 // access a memory location that the original loop did not access.
535 if (isLoadConditional(Cand.Load, L))
536 continue;
537
538 // Check whether the SCEV difference is the same as the induction step,
539 // thus we load the value in the next iteration.
540 if (!Cand.isDependenceDistanceOfOne(PSE, L))
541 continue;
542
543 assert(isa<SCEVAddRecExpr>(PSE.getSCEV(Cand.Load->getPointerOperand())) &&
544 "Loading from something other than indvar?");
545 assert(
546 isa<SCEVAddRecExpr>(PSE.getSCEV(Cand.Store->getPointerOperand())) &&
547 "Storing to something other than indvar?");
548
549 Candidates.push_back(Cand);
551 dbgs()
552 << Candidates.size()
553 << ". Valid store-to-load forwarding across the loop backedge\n");
554 }
555 if (Candidates.empty())
556 return false;
557
558 // Check intervening may-alias stores. These need runtime checks for alias
559 // disambiguation.
560 SmallVector<RuntimePointerCheck, 4> Checks = collectMemchecks(Candidates);
561
562 // Too many checks are likely to outweigh the benefits of forwarding.
563 if (Checks.size() > Candidates.size() * CheckPerElim) {
564 LLVM_DEBUG(dbgs() << "Too many run-time checks needed.\n");
565 return false;
566 }
567
568 if (LAI.getPSE().getPredicate().getComplexity() >
570 LLVM_DEBUG(dbgs() << "Too many SCEV run-time checks needed.\n");
571 return false;
572 }
573
574 if (!L->isLoopSimplifyForm()) {
575 LLVM_DEBUG(dbgs() << "Loop is not is loop-simplify form");
576 return false;
577 }
578
579 if (!Checks.empty() || !LAI.getPSE().getPredicate().isAlwaysTrue()) {
580 if (LAI.hasConvergentOp()) {
581 LLVM_DEBUG(dbgs() << "Versioning is needed but not allowed with "
582 "convergent calls\n");
583 return false;
584 }
585
586 auto *HeaderBB = L->getHeader();
587 if (llvm::shouldOptimizeForSize(HeaderBB, PSI, BFI,
588 PGSOQueryType::IRPass)) {
590 dbgs() << "Versioning is needed but not allowed when optimizing "
591 "for size.\n");
592 return false;
593 }
594
595 // Point of no-return, start the transformation. First, version the loop
596 // if necessary.
597
598 LoopVersioning LV(LAI, Checks, L, LI, DT, PSE.getSE());
599 LV.versionLoop();
600
601 // After versioning, some of the candidates' pointers could stop being
602 // SCEVAddRecs. We need to filter them out.
603 auto NoLongerGoodCandidate = [this](
604 const StoreToLoadForwardingCandidate &Cand) {
605 return !isa<SCEVAddRecExpr>(
606 PSE.getSCEV(Cand.Load->getPointerOperand())) ||
607 !isa<SCEVAddRecExpr>(
608 PSE.getSCEV(Cand.Store->getPointerOperand()));
609 };
610 llvm::erase_if(Candidates, NoLongerGoodCandidate);
611 }
612
613 // Next, propagate the value stored by the store to the users of the load.
614 // Also for the first iteration, generate the initial value of the load.
615 SCEVExpander SEE(*PSE.getSE(), L->getHeader()->getDataLayout(),
616 "storeforward");
617 for (const auto &Cand : Candidates)
618 propagateStoredValueToLoadUsers(Cand, SEE);
619 NumLoopLoadEliminted += Candidates.size();
620
621 return true;
622 }
623
624private:
625 Loop *L;
626
627 /// Maps the load/store instructions to their index according to
628 /// program order.
630
631 // Analyses used.
632 LoopInfo *LI;
633 const LoopAccessInfo &LAI;
634 DominatorTree *DT;
638};
639
640} // end anonymous namespace
641
643 DominatorTree &DT,
647 LoopAccessInfoManager &LAIs) {
648 // Build up a worklist of inner-loops to transform to avoid iterator
649 // invalidation.
650 // FIXME: This logic comes from other passes that actually change the loop
651 // nest structure. It isn't clear this is necessary (or useful) for a pass
652 // which merely optimizes the use of loads in a loop.
653 SmallVector<Loop *, 8> Worklist;
654
655 bool Changed = false;
656
657 for (Loop *TopLevelLoop : LI)
658 for (Loop *L : depth_first(TopLevelLoop)) {
659 Changed |= simplifyLoop(L, &DT, &LI, SE, AC, /*MSSAU*/ nullptr, false);
660 // We only handle inner-most loops.
661 if (L->isInnermost())
662 Worklist.push_back(L);
663 }
664
665 // Now walk the identified inner loops.
666 for (Loop *L : Worklist) {
667 // Match historical behavior
668 if (!L->isRotatedForm() || !L->getExitingBlock())
669 continue;
670 // The actual work is performed by LoadEliminationForLoop.
671 LoadEliminationForLoop LEL(L, &LI, LAIs.getInfo(*L), &DT, BFI, PSI);
672 Changed |= LEL.processLoop();
673 if (Changed)
674 LAIs.clear();
675 }
676 return Changed;
677}
678
681 auto &LI = AM.getResult<LoopAnalysis>(F);
682 // There are no loops in the function. Return before computing other expensive
683 // analyses.
684 if (LI.empty())
685 return PreservedAnalyses::all();
686 auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
687 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
688 auto &AC = AM.getResult<AssumptionAnalysis>(F);
689 auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
690 auto *PSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
691 auto *BFI = (PSI && PSI->hasProfileSummary()) ?
692 &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
694
695 bool Changed = eliminateLoadsAcrossLoops(F, LI, DT, BFI, PSI, &SE, &AC, LAIs);
696
697 if (!Changed)
698 return PreservedAnalyses::all();
699
703 return PA;
704}
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file defines the DenseMap class.
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
#define Check(C,...)
This is the interface for a simple mod/ref and alias analysis over globals.
This header defines various interfaces for pass management in LLVM.
This header provides classes for managing per-loop analyses.
static bool eliminateLoadsAcrossLoops(Function &F, LoopInfo &LI, DominatorTree &DT, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, ScalarEvolution *SE, AssumptionCache *AC, LoopAccessInfoManager &LAIs)
static cl::opt< unsigned > LoadElimSCEVCheckThreshold("loop-load-elimination-scev-check-threshold", cl::init(8), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed for Loop " "Load Elimination"))
static bool isLoadConditional(LoadInst *Load, Loop *L)
Return true if the load is not executed on all paths in the loop.
static bool doesStoreDominatesAllLatches(BasicBlock *StoreBlock, Loop *L, DominatorTree *DT)
Check if the store dominates all latches, so as long as there is no intervening store this value will...
static cl::opt< unsigned > CheckPerElim("runtime-check-per-loop-load-elim", cl::Hidden, cl::desc("Max number of memchecks allowed per eliminated load on average"), cl::init(1))
This header defines the LoopLoadEliminationPass object.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
if(PassOpts->AAPipeline)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
This pass exposes codegen information to IR-level passes.
Class for arbitrary precision integers.
Definition: APInt.h:78
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:410
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
static bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
static CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
An instruction for reading from memory.
Definition: Instructions.h:176
This analysis provides dependence information for the memory accesses of a loop.
const LoopAccessInfo & getInfo(Loop &L)
Drive the analysis of memory accesses in the loop.
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
const RuntimePointerChecking * getRuntimePointerChecking() const
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
bool hasConvergentOp() const
Return true if there is a convergent operation in the loop.
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:566
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
const SmallVectorImpl< Instruction * > & getMemoryInstructions() const
The vector of memory access instructions.
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
DenseMap< Instruction *, unsigned > generateInstructionOrderMap() const
Generate a mapping between the memory instructions and their indices according to program order.
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:692
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
const SCEVPredicate & getPredicate() const
const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:131
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
void printChecks(raw_ostream &OS, const SmallVectorImpl< RuntimePointerCheck > &Checks, unsigned Depth=0) const
Print Checks.
const SmallVectorImpl< RuntimePointerCheck > & getChecks() const
Returns the checks that generateChecks created.
const PointerInfo & getPointerInfo(unsigned PtrIdx) const
Return PointerInfo for pointer at index PtrIdx.
This class uses information about analyze scalars to rewrite expressions in canonical form.
virtual unsigned getComplexity() const
Returns the estimated complexity of this predicate.
virtual bool isAlwaysTrue() const =0
Returns true if the predicate is always true.
Analysis pass that exposes the ScalarEvolution for a function.
The main scalar evolution driver.
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:363
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:452
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
raw_ostream & indent(unsigned NumSpaces)
indent - Insert 'NumSpaces' spaces.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
auto min_element(R &&Range)
Provide wrappers to std::min_element which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:2004
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
std::pair< const RuntimeCheckingPtrGroup *, const RuntimeCheckingPtrGroup * > RuntimePointerCheck
A memcheck which made up of a pair of grouped pointers.
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1785
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:2014
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:303
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2099
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
iterator_range< df_iterator< T > > depth_first(const T &G)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define SEE(c)
Definition: regcomp.c:254
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
TrackingVH< Value > PointerValue
Holds the pointer value that we need to check.