LLVM 22.0.0git
LoopLoadElimination.cpp
Go to the documentation of this file.
1//===- LoopLoadElimination.cpp - Loop Load Elimination Pass ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implement a loop-aware load elimination pass.
10//
11// It uses LoopAccessAnalysis to identify loop-carried dependences with a
12// distance of one between stores and loads. These form the candidates for the
13// transformation. The source value of each store then propagated to the user
14// of the corresponding load. This makes the load dead.
15//
16// The pass can also version the loop and add memchecks in order to prove that
17// may-aliasing stores can't change the value in memory before it's read by the
18// load.
19//
20//===----------------------------------------------------------------------===//
21
23#include "llvm/ADT/APInt.h"
24#include "llvm/ADT/DenseMap.h"
26#include "llvm/ADT/STLExtras.h"
29#include "llvm/ADT/Statistic.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/Dominators.h"
45#include "llvm/IR/PassManager.h"
46#include "llvm/IR/Type.h"
47#include "llvm/IR/Value.h"
50#include "llvm/Support/Debug.h"
56#include <algorithm>
57#include <cassert>
58#include <forward_list>
59#include <tuple>
60#include <utility>
61
62using namespace llvm;
63
64#define LLE_OPTION "loop-load-elim"
65#define DEBUG_TYPE LLE_OPTION
66
68 "runtime-check-per-loop-load-elim", cl::Hidden,
69 cl::desc("Max number of memchecks allowed per eliminated load on average"),
70 cl::init(1));
71
73 "loop-load-elimination-scev-check-threshold", cl::init(8), cl::Hidden,
74 cl::desc("The maximum number of SCEV checks allowed for Loop "
75 "Load Elimination"));
76
77STATISTIC(NumLoopLoadEliminted, "Number of loads eliminated by LLE");
78
79namespace {
80
81/// Represent a store-to-forwarding candidate.
82struct StoreToLoadForwardingCandidate {
83 LoadInst *Load;
84 StoreInst *Store;
85
86 StoreToLoadForwardingCandidate(LoadInst *Load, StoreInst *Store)
87 : Load(Load), Store(Store) {}
88
89 /// Return true if the dependence from the store to the load has an
90 /// absolute distance of one.
91 /// E.g. A[i+1] = A[i] (or A[i-1] = A[i] for descending loop)
92 bool isDependenceDistanceOfOne(PredicatedScalarEvolution &PSE,
93 Loop *L) const {
94 Value *LoadPtr = Load->getPointerOperand();
95 Value *StorePtr = Store->getPointerOperand();
96 Type *LoadType = getLoadStoreType(Load);
97 auto &DL = Load->getDataLayout();
98
100 StorePtr->getType()->getPointerAddressSpace() &&
101 DL.getTypeSizeInBits(LoadType) ==
102 DL.getTypeSizeInBits(getLoadStoreType(Store)) &&
103 "Should be a known dependence");
104
105 int64_t StrideLoad = getPtrStride(PSE, LoadType, LoadPtr, L).value_or(0);
106 int64_t StrideStore = getPtrStride(PSE, LoadType, StorePtr, L).value_or(0);
107 if (!StrideLoad || !StrideStore || StrideLoad != StrideStore)
108 return false;
109
110 // TODO: This check for stride values other than 1 and -1 can be eliminated.
111 // However, doing so may cause the LoopAccessAnalysis to overcompensate,
112 // generating numerous non-wrap runtime checks that may undermine the
113 // benefits of load elimination. To safely implement support for non-unit
114 // strides, we would need to ensure either that the processed case does not
115 // require these additional checks, or improve the LAA to handle them more
116 // efficiently, or potentially both.
117 if (std::abs(StrideLoad) != 1)
118 return false;
119
120 unsigned TypeByteSize = DL.getTypeAllocSize(LoadType);
121
122 auto *LoadPtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(LoadPtr));
123 auto *StorePtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(StorePtr));
124
125 // We don't need to check non-wrapping here because forward/backward
126 // dependence wouldn't be valid if these weren't monotonic accesses.
127 auto *Dist = dyn_cast<SCEVConstant>(
128 PSE.getSE()->getMinusSCEV(StorePtrSCEV, LoadPtrSCEV));
129 if (!Dist)
130 return false;
131 const APInt &Val = Dist->getAPInt();
132 return Val == TypeByteSize * StrideLoad;
133 }
134
135 Value *getLoadPtr() const { return Load->getPointerOperand(); }
136
137#ifndef NDEBUG
139 const StoreToLoadForwardingCandidate &Cand) {
140 OS << *Cand.Store << " -->\n";
141 OS.indent(2) << *Cand.Load << "\n";
142 return OS;
143 }
144#endif
145};
146
147} // end anonymous namespace
148
149/// Check if the store dominates all latches, so as long as there is no
150/// intervening store this value will be loaded in the next iteration.
151static bool doesStoreDominatesAllLatches(BasicBlock *StoreBlock, Loop *L,
152 DominatorTree *DT) {
154 L->getLoopLatches(Latches);
155 return llvm::all_of(Latches, [&](const BasicBlock *Latch) {
156 return DT->dominates(StoreBlock, Latch);
157 });
158}
159
160/// Return true if the load is not executed on all paths in the loop.
161static bool isLoadConditional(LoadInst *Load, Loop *L) {
162 return Load->getParent() != L->getHeader();
163}
164
165namespace {
166
167/// The per-loop class that does most of the work.
168class LoadEliminationForLoop {
169public:
170 LoadEliminationForLoop(Loop *L, LoopInfo *LI, const LoopAccessInfo &LAI,
173 : L(L), LI(LI), LAI(LAI), DT(DT), BFI(BFI), PSI(PSI), PSE(LAI.getPSE()) {}
174
175 /// Look through the loop-carried and loop-independent dependences in
176 /// this loop and find store->load dependences.
177 ///
178 /// Note that no candidate is returned if LAA has failed to analyze the loop
179 /// (e.g. if it's not bottom-tested, contains volatile memops, etc.)
180 std::forward_list<StoreToLoadForwardingCandidate>
181 findStoreToLoadDependences(const LoopAccessInfo &LAI) {
182 std::forward_list<StoreToLoadForwardingCandidate> Candidates;
183
184 const auto &DepChecker = LAI.getDepChecker();
185 const auto *Deps = DepChecker.getDependences();
186 if (!Deps)
187 return Candidates;
188
189 // Find store->load dependences (consequently true dep). Both lexically
190 // forward and backward dependences qualify. Disqualify loads that have
191 // other unknown dependences.
192
193 SmallPtrSet<Instruction *, 4> LoadsWithUnknownDependence;
194
195 for (const auto &Dep : *Deps) {
196 Instruction *Source = Dep.getSource(DepChecker);
197 Instruction *Destination = Dep.getDestination(DepChecker);
198
201 if (isa<LoadInst>(Source))
202 LoadsWithUnknownDependence.insert(Source);
203 if (isa<LoadInst>(Destination))
204 LoadsWithUnknownDependence.insert(Destination);
205 continue;
206 }
207
208 if (Dep.isBackward())
209 // Note that the designations source and destination follow the program
210 // order, i.e. source is always first. (The direction is given by the
211 // DepType.)
212 std::swap(Source, Destination);
213 else
214 assert(Dep.isForward() && "Needs to be a forward dependence");
215
216 auto *Store = dyn_cast<StoreInst>(Source);
217 if (!Store)
218 continue;
219 auto *Load = dyn_cast<LoadInst>(Destination);
220 if (!Load)
221 continue;
222
223 // Only propagate if the stored values are bit/pointer castable.
226 Store->getDataLayout()))
227 continue;
228
229 Candidates.emplace_front(Load, Store);
230 }
231
232 if (!LoadsWithUnknownDependence.empty())
233 Candidates.remove_if([&](const StoreToLoadForwardingCandidate &C) {
234 return LoadsWithUnknownDependence.count(C.Load);
235 });
236
237 return Candidates;
238 }
239
240 /// Return the index of the instruction according to program order.
241 unsigned getInstrIndex(Instruction *Inst) {
242 auto I = InstOrder.find(Inst);
243 assert(I != InstOrder.end() && "No index for instruction");
244 return I->second;
245 }
246
247 /// If a load has multiple candidates associated (i.e. different
248 /// stores), it means that it could be forwarding from multiple stores
249 /// depending on control flow. Remove these candidates.
250 ///
251 /// Here, we rely on LAA to include the relevant loop-independent dependences.
252 /// LAA is known to omit these in the very simple case when the read and the
253 /// write within an alias set always takes place using the *same* pointer.
254 ///
255 /// However, we know that this is not the case here, i.e. we can rely on LAA
256 /// to provide us with loop-independent dependences for the cases we're
257 /// interested. Consider the case for example where a loop-independent
258 /// dependece S1->S2 invalidates the forwarding S3->S2.
259 ///
260 /// A[i] = ... (S1)
261 /// ... = A[i] (S2)
262 /// A[i+1] = ... (S3)
263 ///
264 /// LAA will perform dependence analysis here because there are two
265 /// *different* pointers involved in the same alias set (&A[i] and &A[i+1]).
266 void removeDependencesFromMultipleStores(
267 std::forward_list<StoreToLoadForwardingCandidate> &Candidates) {
268 // If Store is nullptr it means that we have multiple stores forwarding to
269 // this store.
270 using LoadToSingleCandT =
272 LoadToSingleCandT LoadToSingleCand;
273
274 for (const auto &Cand : Candidates) {
275 bool NewElt;
276 LoadToSingleCandT::iterator Iter;
277
278 std::tie(Iter, NewElt) =
279 LoadToSingleCand.insert(std::make_pair(Cand.Load, &Cand));
280 if (!NewElt) {
281 const StoreToLoadForwardingCandidate *&OtherCand = Iter->second;
282 // Already multiple stores forward to this load.
283 if (OtherCand == nullptr)
284 continue;
285
286 // Handle the very basic case when the two stores are in the same block
287 // so deciding which one forwards is easy. The later one forwards as
288 // long as they both have a dependence distance of one to the load.
289 if (Cand.Store->getParent() == OtherCand->Store->getParent() &&
290 Cand.isDependenceDistanceOfOne(PSE, L) &&
291 OtherCand->isDependenceDistanceOfOne(PSE, L)) {
292 // They are in the same block, the later one will forward to the load.
293 if (getInstrIndex(OtherCand->Store) < getInstrIndex(Cand.Store))
294 OtherCand = &Cand;
295 } else
296 OtherCand = nullptr;
297 }
298 }
299
300 Candidates.remove_if([&](const StoreToLoadForwardingCandidate &Cand) {
301 if (LoadToSingleCand[Cand.Load] != &Cand) {
302 LLVM_DEBUG(
303 dbgs() << "Removing from candidates: \n"
304 << Cand
305 << " The load may have multiple stores forwarding to "
306 << "it\n");
307 return true;
308 }
309 return false;
310 });
311 }
312
313 /// Given two pointers operations by their RuntimePointerChecking
314 /// indices, return true if they require an alias check.
315 ///
316 /// We need a check if one is a pointer for a candidate load and the other is
317 /// a pointer for a possibly intervening store.
318 bool needsChecking(unsigned PtrIdx1, unsigned PtrIdx2,
319 const SmallPtrSetImpl<Value *> &PtrsWrittenOnFwdingPath,
320 const SmallPtrSetImpl<Value *> &CandLoadPtrs) {
321 Value *Ptr1 =
323 Value *Ptr2 =
325 return ((PtrsWrittenOnFwdingPath.count(Ptr1) && CandLoadPtrs.count(Ptr2)) ||
326 (PtrsWrittenOnFwdingPath.count(Ptr2) && CandLoadPtrs.count(Ptr1)));
327 }
328
329 /// Return pointers that are possibly written to on the path from a
330 /// forwarding store to a load.
331 ///
332 /// These pointers need to be alias-checked against the forwarding candidates.
333 SmallPtrSet<Value *, 4> findPointersWrittenOnForwardingPath(
335 // From FirstStore to LastLoad neither of the elimination candidate loads
336 // should overlap with any of the stores.
337 //
338 // E.g.:
339 //
340 // st1 C[i]
341 // ld1 B[i] <-------,
342 // ld0 A[i] <----, | * LastLoad
343 // ... | |
344 // st2 E[i] | |
345 // st3 B[i+1] -- | -' * FirstStore
346 // st0 A[i+1] ---'
347 // st4 D[i]
348 //
349 // st0 forwards to ld0 if the accesses in st4 and st1 don't overlap with
350 // ld0.
351
352 LoadInst *LastLoad =
353 llvm::max_element(Candidates,
354 [&](const StoreToLoadForwardingCandidate &A,
355 const StoreToLoadForwardingCandidate &B) {
356 return getInstrIndex(A.Load) <
357 getInstrIndex(B.Load);
358 })
359 ->Load;
360 StoreInst *FirstStore =
361 llvm::min_element(Candidates,
362 [&](const StoreToLoadForwardingCandidate &A,
363 const StoreToLoadForwardingCandidate &B) {
364 return getInstrIndex(A.Store) <
365 getInstrIndex(B.Store);
366 })
367 ->Store;
368
369 // We're looking for stores after the first forwarding store until the end
370 // of the loop, then from the beginning of the loop until the last
371 // forwarded-to load. Collect the pointer for the stores.
372 SmallPtrSet<Value *, 4> PtrsWrittenOnFwdingPath;
373
374 auto InsertStorePtr = [&](Instruction *I) {
375 if (auto *S = dyn_cast<StoreInst>(I))
376 PtrsWrittenOnFwdingPath.insert(S->getPointerOperand());
377 };
378 const auto &MemInstrs = LAI.getDepChecker().getMemoryInstructions();
379 std::for_each(MemInstrs.begin() + getInstrIndex(FirstStore) + 1,
380 MemInstrs.end(), InsertStorePtr);
381 std::for_each(MemInstrs.begin(), &MemInstrs[getInstrIndex(LastLoad)],
382 InsertStorePtr);
383
384 return PtrsWrittenOnFwdingPath;
385 }
386
387 /// Determine the pointer alias checks to prove that there are no
388 /// intervening stores.
391
392 SmallPtrSet<Value *, 4> PtrsWrittenOnFwdingPath =
393 findPointersWrittenOnForwardingPath(Candidates);
394
395 // Collect the pointers of the candidate loads.
396 SmallPtrSet<Value *, 4> CandLoadPtrs;
397 for (const auto &Candidate : Candidates)
398 CandLoadPtrs.insert(Candidate.getLoadPtr());
399
400 const auto &AllChecks = LAI.getRuntimePointerChecking()->getChecks();
402
403 copy_if(AllChecks, std::back_inserter(Checks),
404 [&](const RuntimePointerCheck &Check) {
405 for (auto PtrIdx1 : Check.first->Members)
406 for (auto PtrIdx2 : Check.second->Members)
407 if (needsChecking(PtrIdx1, PtrIdx2, PtrsWrittenOnFwdingPath,
408 CandLoadPtrs))
409 return true;
410 return false;
411 });
412
413 LLVM_DEBUG(dbgs() << "\nPointer Checks (count: " << Checks.size()
414 << "):\n");
416
417 return Checks;
418 }
419
420 /// Perform the transformation for a candidate.
421 void
422 propagateStoredValueToLoadUsers(const StoreToLoadForwardingCandidate &Cand,
423 SCEVExpander &SEE) {
424 // loop:
425 // %x = load %gep_i
426 // = ... %x
427 // store %y, %gep_i_plus_1
428 //
429 // =>
430 //
431 // ph:
432 // %x.initial = load %gep_0
433 // loop:
434 // %x.storeforward = phi [%x.initial, %ph] [%y, %loop]
435 // %x = load %gep_i <---- now dead
436 // = ... %x.storeforward
437 // store %y, %gep_i_plus_1
438
439 Value *Ptr = Cand.Load->getPointerOperand();
440 auto *PtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(Ptr));
441 auto *PH = L->getLoopPreheader();
442 assert(PH && "Preheader should exist!");
443 Value *InitialPtr = SEE.expandCodeFor(PtrSCEV->getStart(), Ptr->getType(),
444 PH->getTerminator());
446 new LoadInst(Cand.Load->getType(), InitialPtr, "load_initial",
447 /* isVolatile */ false, Cand.Load->getAlign(),
448 PH->getTerminator()->getIterator());
449 // We don't give any debug location to Initial, because it is inserted
450 // into the loop's preheader. A debug location inside the loop will cause
451 // a misleading stepping when debugging. The test update-debugloc-store
452 // -forwarded.ll checks this.
453 Initial->setDebugLoc(DebugLoc::getDropped());
454
455 PHINode *PHI = PHINode::Create(Initial->getType(), 2, "store_forwarded");
456 PHI->insertBefore(L->getHeader()->begin());
457 PHI->addIncoming(Initial, PH);
458
459 Type *LoadType = Initial->getType();
460 Type *StoreType = Cand.Store->getValueOperand()->getType();
461 auto &DL = Cand.Load->getDataLayout();
462 (void)DL;
463
464 assert(DL.getTypeSizeInBits(LoadType) == DL.getTypeSizeInBits(StoreType) &&
465 "The type sizes should match!");
466
467 Value *StoreValue = Cand.Store->getValueOperand();
468 if (LoadType != StoreType) {
469 StoreValue = CastInst::CreateBitOrPointerCast(StoreValue, LoadType,
470 "store_forward_cast",
471 Cand.Store->getIterator());
472 // Because it casts the old `load` value and is used by the new `phi`
473 // which replaces the old `load`, we give the `load`'s debug location
474 // to it.
475 cast<Instruction>(StoreValue)->setDebugLoc(Cand.Load->getDebugLoc());
476 }
477
478 PHI->addIncoming(StoreValue, L->getLoopLatch());
479
480 Cand.Load->replaceAllUsesWith(PHI);
481 PHI->setDebugLoc(Cand.Load->getDebugLoc());
482 }
483
484 /// Top-level driver for each loop: find store->load forwarding
485 /// candidates, add run-time checks and perform transformation.
486 bool processLoop() {
487 LLVM_DEBUG(dbgs() << "\nIn \"" << L->getHeader()->getParent()->getName()
488 << "\" checking " << *L << "\n");
489
490 // Look for store-to-load forwarding cases across the
491 // backedge. E.g.:
492 //
493 // loop:
494 // %x = load %gep_i
495 // = ... %x
496 // store %y, %gep_i_plus_1
497 //
498 // =>
499 //
500 // ph:
501 // %x.initial = load %gep_0
502 // loop:
503 // %x.storeforward = phi [%x.initial, %ph] [%y, %loop]
504 // %x = load %gep_i <---- now dead
505 // = ... %x.storeforward
506 // store %y, %gep_i_plus_1
507
508 // First start with store->load dependences.
509 auto StoreToLoadDependences = findStoreToLoadDependences(LAI);
510 if (StoreToLoadDependences.empty())
511 return false;
512
513 // Generate an index for each load and store according to the original
514 // program order. This will be used later.
515 InstOrder = LAI.getDepChecker().generateInstructionOrderMap();
516
517 // To keep things simple for now, remove those where the load is potentially
518 // fed by multiple stores.
519 removeDependencesFromMultipleStores(StoreToLoadDependences);
520 if (StoreToLoadDependences.empty())
521 return false;
522
523 // Filter the candidates further.
525 for (const StoreToLoadForwardingCandidate &Cand : StoreToLoadDependences) {
526 LLVM_DEBUG(dbgs() << "Candidate " << Cand);
527
528 // Make sure that the stored values is available everywhere in the loop in
529 // the next iteration.
530 if (!doesStoreDominatesAllLatches(Cand.Store->getParent(), L, DT))
531 continue;
532
533 // If the load is conditional we can't hoist its 0-iteration instance to
534 // the preheader because that would make it unconditional. Thus we would
535 // access a memory location that the original loop did not access.
536 if (isLoadConditional(Cand.Load, L))
537 continue;
538
539 // Check whether the SCEV difference is the same as the induction step,
540 // thus we load the value in the next iteration.
541 if (!Cand.isDependenceDistanceOfOne(PSE, L))
542 continue;
543
544 assert(isa<SCEVAddRecExpr>(PSE.getSCEV(Cand.Load->getPointerOperand())) &&
545 "Loading from something other than indvar?");
546 assert(
547 isa<SCEVAddRecExpr>(PSE.getSCEV(Cand.Store->getPointerOperand())) &&
548 "Storing to something other than indvar?");
549
550 Candidates.push_back(Cand);
552 dbgs()
553 << Candidates.size()
554 << ". Valid store-to-load forwarding across the loop backedge\n");
555 }
556 if (Candidates.empty())
557 return false;
558
559 // Check intervening may-alias stores. These need runtime checks for alias
560 // disambiguation.
561 SmallVector<RuntimePointerCheck, 4> Checks = collectMemchecks(Candidates);
562
563 // Too many checks are likely to outweigh the benefits of forwarding.
564 if (Checks.size() > Candidates.size() * CheckPerElim) {
565 LLVM_DEBUG(dbgs() << "Too many run-time checks needed.\n");
566 return false;
567 }
568
569 if (LAI.getPSE().getPredicate().getComplexity() >
571 LLVM_DEBUG(dbgs() << "Too many SCEV run-time checks needed.\n");
572 return false;
573 }
574
575 if (!L->isLoopSimplifyForm()) {
576 LLVM_DEBUG(dbgs() << "Loop is not is loop-simplify form");
577 return false;
578 }
579
580 if (!Checks.empty() || !LAI.getPSE().getPredicate().isAlwaysTrue()) {
581 if (LAI.hasConvergentOp()) {
582 LLVM_DEBUG(dbgs() << "Versioning is needed but not allowed with "
583 "convergent calls\n");
584 return false;
585 }
586
587 auto *HeaderBB = L->getHeader();
588 if (llvm::shouldOptimizeForSize(HeaderBB, PSI, BFI,
589 PGSOQueryType::IRPass)) {
591 dbgs() << "Versioning is needed but not allowed when optimizing "
592 "for size.\n");
593 return false;
594 }
595
596 // Point of no-return, start the transformation. First, version the loop
597 // if necessary.
598
599 LoopVersioning LV(LAI, Checks, L, LI, DT, PSE.getSE());
600 LV.versionLoop();
601
602 // After versioning, some of the candidates' pointers could stop being
603 // SCEVAddRecs. We need to filter them out.
604 auto NoLongerGoodCandidate = [this](
605 const StoreToLoadForwardingCandidate &Cand) {
606 return !isa<SCEVAddRecExpr>(
607 PSE.getSCEV(Cand.Load->getPointerOperand())) ||
608 !isa<SCEVAddRecExpr>(
609 PSE.getSCEV(Cand.Store->getPointerOperand()));
610 };
611 llvm::erase_if(Candidates, NoLongerGoodCandidate);
612 }
613
614 // Next, propagate the value stored by the store to the users of the load.
615 // Also for the first iteration, generate the initial value of the load.
616 SCEVExpander SEE(*PSE.getSE(), L->getHeader()->getDataLayout(),
617 "storeforward");
618 for (const auto &Cand : Candidates)
619 propagateStoredValueToLoadUsers(Cand, SEE);
620 NumLoopLoadEliminted += Candidates.size();
621
622 return true;
623 }
624
625private:
626 Loop *L;
627
628 /// Maps the load/store instructions to their index according to
629 /// program order.
631
632 // Analyses used.
633 LoopInfo *LI;
634 const LoopAccessInfo &LAI;
635 DominatorTree *DT;
639};
640
641} // end anonymous namespace
642
644 DominatorTree &DT,
648 LoopAccessInfoManager &LAIs) {
649 // Build up a worklist of inner-loops to transform to avoid iterator
650 // invalidation.
651 // FIXME: This logic comes from other passes that actually change the loop
652 // nest structure. It isn't clear this is necessary (or useful) for a pass
653 // which merely optimizes the use of loads in a loop.
654 SmallVector<Loop *, 8> Worklist;
655
656 bool Changed = false;
657
658 for (Loop *TopLevelLoop : LI)
659 for (Loop *L : depth_first(TopLevelLoop)) {
660 Changed |= simplifyLoop(L, &DT, &LI, SE, AC, /*MSSAU*/ nullptr, false);
661 // We only handle inner-most loops.
662 if (L->isInnermost())
663 Worklist.push_back(L);
664 }
665
666 // Now walk the identified inner loops.
667 for (Loop *L : Worklist) {
668 // Match historical behavior
669 if (!L->isRotatedForm() || !L->getExitingBlock())
670 continue;
671 // The actual work is performed by LoadEliminationForLoop.
672 LoadEliminationForLoop LEL(L, &LI, LAIs.getInfo(*L), &DT, BFI, PSI);
673 Changed |= LEL.processLoop();
674 if (Changed)
675 LAIs.clear();
676 }
677 return Changed;
678}
679
682 auto &LI = AM.getResult<LoopAnalysis>(F);
683 // There are no loops in the function. Return before computing other expensive
684 // analyses.
685 if (LI.empty())
686 return PreservedAnalyses::all();
687 auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
688 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
689 auto &AC = AM.getResult<AssumptionAnalysis>(F);
690 auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
691 auto *PSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
692 auto *BFI = (PSI && PSI->hasProfileSummary()) ?
693 &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
695
696 bool Changed = eliminateLoadsAcrossLoops(F, LI, DT, BFI, PSI, &SE, &AC, LAIs);
697
698 if (!Changed)
699 return PreservedAnalyses::all();
700
704 return PA;
705}
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This file defines the DenseMap class.
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
#define Check(C,...)
This is the interface for a simple mod/ref and alias analysis over globals.
This header defines various interfaces for pass management in LLVM.
This header provides classes for managing per-loop analyses.
static bool eliminateLoadsAcrossLoops(Function &F, LoopInfo &LI, DominatorTree &DT, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, ScalarEvolution *SE, AssumptionCache *AC, LoopAccessInfoManager &LAIs)
static cl::opt< unsigned > LoadElimSCEVCheckThreshold("loop-load-elimination-scev-check-threshold", cl::init(8), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed for Loop " "Load Elimination"))
static bool isLoadConditional(LoadInst *Load, Loop *L)
Return true if the load is not executed on all paths in the loop.
static bool doesStoreDominatesAllLatches(BasicBlock *StoreBlock, Loop *L, DominatorTree *DT)
Check if the store dominates all latches, so as long as there is no intervening store this value will...
static cl::opt< unsigned > CheckPerElim("runtime-check-per-loop-load-elim", cl::Hidden, cl::desc("Max number of memchecks allowed per eliminated load on average"), cl::init(1))
This header defines the LoopLoadEliminationPass object.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
if(PassOpts->AAPipeline)
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
#define LLVM_DEBUG(...)
Definition: Debug.h:119
This pass exposes codegen information to IR-level passes.
Class for arbitrary precision integers.
Definition: APInt.h:78
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:255
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:412
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:62
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
static LLVM_ABI bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
static LLVM_ABI CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
static DebugLoc getDropped()
Definition: DebugLoc.h:164
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:230
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:284
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:165
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:135
An instruction for reading from memory.
Definition: Instructions.h:180
This analysis provides dependence information for the memory accesses of a loop.
LLVM_ABI const LoopAccessInfo & getInfo(Loop &L, bool AllowPartial=false)
Drive the analysis of memory accesses in the loop.
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
const RuntimePointerChecking * getRuntimePointerChecking() const
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
bool hasConvergentOp() const
Return true if there is a convergent operation in the loop.
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:570
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:40
const SmallVectorImpl< Instruction * > & getMemoryInstructions() const
The vector of memory access instructions.
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
DenseMap< Instruction *, unsigned > generateInstructionOrderMap() const
Generate a mapping between the memory instructions and their indices according to program order.
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:716
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEVPredicate & getPredicate() const
LLVM_ABI const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:118
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition: Analysis.h:132
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
LLVM_ABI void printChecks(raw_ostream &OS, const SmallVectorImpl< RuntimePointerCheck > &Checks, unsigned Depth=0) const
Print Checks.
const SmallVectorImpl< RuntimePointerCheck > & getChecks() const
Returns the checks that generateChecks created.
const PointerInfo & getPointerInfo(unsigned PtrIdx) const
Return PointerInfo for pointer at index PtrIdx.
This class uses information about analyze scalars to rewrite expressions in canonical form.
virtual unsigned getComplexity() const
Returns the estimated complexity of this predicate.
virtual bool isAlwaysTrue() const =0
Returns true if the predicate is always true.
Analysis pass that exposes the ScalarEvolution for a function.
The main scalar evolution driver.
LLVM_ABI const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:380
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:470
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:401
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:541
bool empty() const
Definition: SmallVector.h:82
size_t size() const
Definition: SmallVector.h:79
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
An instruction for storing to memory.
Definition: Instructions.h:296
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
LLVM Value Representation.
Definition: Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
raw_ostream & indent(unsigned NumSpaces)
indent - Insert 'NumSpaces' spaces.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
LLVM_ABI bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
auto min_element(R &&Range)
Provide wrappers to std::min_element which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:2039
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1744
std::pair< const RuntimeCheckingPtrGroup *, const RuntimeCheckingPtrGroup * > RuntimePointerCheck
A memcheck which made up of a pair of grouped pointers.
LLVM_ABI bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1796
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:2049
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:312
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2139
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
iterator_range< df_iterator< T > > depth_first(const T &G)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:858
#define SEE(c)
Definition: regcomp.c:248
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
TrackingVH< Value > PointerValue
Holds the pointer value that we need to check.