LLVM 17.0.0git
LoopLoadElimination.cpp
Go to the documentation of this file.
1//===- LoopLoadElimination.cpp - Loop Load Elimination Pass ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implement a loop-aware load elimination pass.
10//
11// It uses LoopAccessAnalysis to identify loop-carried dependences with a
12// distance of one between stores and loads. These form the candidates for the
13// transformation. The source value of each store then propagated to the user
14// of the corresponding load. This makes the load dead.
15//
16// The pass can also version the loop and add memchecks in order to prove that
17// may-aliasing stores can't change the value in memory before it's read by the
18// load.
19//
20//===----------------------------------------------------------------------===//
21
23#include "llvm/ADT/APInt.h"
24#include "llvm/ADT/DenseMap.h"
26#include "llvm/ADT/STLExtras.h"
29#include "llvm/ADT/Statistic.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/Dominators.h"
45#include "llvm/IR/Module.h"
46#include "llvm/IR/PassManager.h"
47#include "llvm/IR/Type.h"
48#include "llvm/IR/Value.h"
50#include "llvm/Pass.h"
53#include "llvm/Support/Debug.h"
61#include <algorithm>
62#include <cassert>
63#include <forward_list>
64#include <tuple>
65#include <utility>
66
67using namespace llvm;
68
69#define LLE_OPTION "loop-load-elim"
70#define DEBUG_TYPE LLE_OPTION
71
73 "runtime-check-per-loop-load-elim", cl::Hidden,
74 cl::desc("Max number of memchecks allowed per eliminated load on average"),
75 cl::init(1));
76
78 "loop-load-elimination-scev-check-threshold", cl::init(8), cl::Hidden,
79 cl::desc("The maximum number of SCEV checks allowed for Loop "
80 "Load Elimination"));
81
82STATISTIC(NumLoopLoadEliminted, "Number of loads eliminated by LLE");
83
84namespace {
85
86/// Represent a store-to-forwarding candidate.
87struct StoreToLoadForwardingCandidate {
88 LoadInst *Load;
89 StoreInst *Store;
90
91 StoreToLoadForwardingCandidate(LoadInst *Load, StoreInst *Store)
92 : Load(Load), Store(Store) {}
93
94 /// Return true if the dependence from the store to the load has a
95 /// distance of one. E.g. A[i+1] = A[i]
96 bool isDependenceDistanceOfOne(PredicatedScalarEvolution &PSE,
97 Loop *L) const {
98 Value *LoadPtr = Load->getPointerOperand();
99 Value *StorePtr = Store->getPointerOperand();
100 Type *LoadType = getLoadStoreType(Load);
101 auto &DL = Load->getParent()->getModule()->getDataLayout();
102
103 assert(LoadPtr->getType()->getPointerAddressSpace() ==
104 StorePtr->getType()->getPointerAddressSpace() &&
105 DL.getTypeSizeInBits(LoadType) ==
106 DL.getTypeSizeInBits(getLoadStoreType(Store)) &&
107 "Should be a known dependence");
108
109 // Currently we only support accesses with unit stride. FIXME: we should be
110 // able to handle non unit stirde as well as long as the stride is equal to
111 // the dependence distance.
112 if (getPtrStride(PSE, LoadType, LoadPtr, L).value_or(0) != 1 ||
113 getPtrStride(PSE, LoadType, StorePtr, L).value_or(0) != 1)
114 return false;
115
116 unsigned TypeByteSize = DL.getTypeAllocSize(const_cast<Type *>(LoadType));
117
118 auto *LoadPtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(LoadPtr));
119 auto *StorePtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(StorePtr));
120
121 // We don't need to check non-wrapping here because forward/backward
122 // dependence wouldn't be valid if these weren't monotonic accesses.
123 auto *Dist = cast<SCEVConstant>(
124 PSE.getSE()->getMinusSCEV(StorePtrSCEV, LoadPtrSCEV));
125 const APInt &Val = Dist->getAPInt();
126 return Val == TypeByteSize;
127 }
128
129 Value *getLoadPtr() const { return Load->getPointerOperand(); }
130
131#ifndef NDEBUG
133 const StoreToLoadForwardingCandidate &Cand) {
134 OS << *Cand.Store << " -->\n";
135 OS.indent(2) << *Cand.Load << "\n";
136 return OS;
137 }
138#endif
139};
140
141} // end anonymous namespace
142
143/// Check if the store dominates all latches, so as long as there is no
144/// intervening store this value will be loaded in the next iteration.
145static bool doesStoreDominatesAllLatches(BasicBlock *StoreBlock, Loop *L,
146 DominatorTree *DT) {
148 L->getLoopLatches(Latches);
149 return llvm::all_of(Latches, [&](const BasicBlock *Latch) {
150 return DT->dominates(StoreBlock, Latch);
151 });
152}
153
154/// Return true if the load is not executed on all paths in the loop.
155static bool isLoadConditional(LoadInst *Load, Loop *L) {
156 return Load->getParent() != L->getHeader();
157}
158
159namespace {
160
161/// The per-loop class that does most of the work.
162class LoadEliminationForLoop {
163public:
164 LoadEliminationForLoop(Loop *L, LoopInfo *LI, const LoopAccessInfo &LAI,
167 : L(L), LI(LI), LAI(LAI), DT(DT), BFI(BFI), PSI(PSI), PSE(LAI.getPSE()) {}
168
169 /// Look through the loop-carried and loop-independent dependences in
170 /// this loop and find store->load dependences.
171 ///
172 /// Note that no candidate is returned if LAA has failed to analyze the loop
173 /// (e.g. if it's not bottom-tested, contains volatile memops, etc.)
174 std::forward_list<StoreToLoadForwardingCandidate>
175 findStoreToLoadDependences(const LoopAccessInfo &LAI) {
176 std::forward_list<StoreToLoadForwardingCandidate> Candidates;
177
178 const auto *Deps = LAI.getDepChecker().getDependences();
179 if (!Deps)
180 return Candidates;
181
182 // Find store->load dependences (consequently true dep). Both lexically
183 // forward and backward dependences qualify. Disqualify loads that have
184 // other unknown dependences.
185
186 SmallPtrSet<Instruction *, 4> LoadsWithUnknownDepedence;
187
188 for (const auto &Dep : *Deps) {
189 Instruction *Source = Dep.getSource(LAI);
190 Instruction *Destination = Dep.getDestination(LAI);
191
193 if (isa<LoadInst>(Source))
194 LoadsWithUnknownDepedence.insert(Source);
195 if (isa<LoadInst>(Destination))
196 LoadsWithUnknownDepedence.insert(Destination);
197 continue;
198 }
199
200 if (Dep.isBackward())
201 // Note that the designations source and destination follow the program
202 // order, i.e. source is always first. (The direction is given by the
203 // DepType.)
204 std::swap(Source, Destination);
205 else
206 assert(Dep.isForward() && "Needs to be a forward dependence");
207
208 auto *Store = dyn_cast<StoreInst>(Source);
209 if (!Store)
210 continue;
211 auto *Load = dyn_cast<LoadInst>(Destination);
212 if (!Load)
213 continue;
214
215 // Only propagate if the stored values are bit/pointer castable.
218 Store->getParent()->getModule()->getDataLayout()))
219 continue;
220
221 Candidates.emplace_front(Load, Store);
222 }
223
224 if (!LoadsWithUnknownDepedence.empty())
225 Candidates.remove_if([&](const StoreToLoadForwardingCandidate &C) {
226 return LoadsWithUnknownDepedence.count(C.Load);
227 });
228
229 return Candidates;
230 }
231
232 /// Return the index of the instruction according to program order.
233 unsigned getInstrIndex(Instruction *Inst) {
234 auto I = InstOrder.find(Inst);
235 assert(I != InstOrder.end() && "No index for instruction");
236 return I->second;
237 }
238
239 /// If a load has multiple candidates associated (i.e. different
240 /// stores), it means that it could be forwarding from multiple stores
241 /// depending on control flow. Remove these candidates.
242 ///
243 /// Here, we rely on LAA to include the relevant loop-independent dependences.
244 /// LAA is known to omit these in the very simple case when the read and the
245 /// write within an alias set always takes place using the *same* pointer.
246 ///
247 /// However, we know that this is not the case here, i.e. we can rely on LAA
248 /// to provide us with loop-independent dependences for the cases we're
249 /// interested. Consider the case for example where a loop-independent
250 /// dependece S1->S2 invalidates the forwarding S3->S2.
251 ///
252 /// A[i] = ... (S1)
253 /// ... = A[i] (S2)
254 /// A[i+1] = ... (S3)
255 ///
256 /// LAA will perform dependence analysis here because there are two
257 /// *different* pointers involved in the same alias set (&A[i] and &A[i+1]).
258 void removeDependencesFromMultipleStores(
259 std::forward_list<StoreToLoadForwardingCandidate> &Candidates) {
260 // If Store is nullptr it means that we have multiple stores forwarding to
261 // this store.
262 using LoadToSingleCandT =
264 LoadToSingleCandT LoadToSingleCand;
265
266 for (const auto &Cand : Candidates) {
267 bool NewElt;
268 LoadToSingleCandT::iterator Iter;
269
270 std::tie(Iter, NewElt) =
271 LoadToSingleCand.insert(std::make_pair(Cand.Load, &Cand));
272 if (!NewElt) {
273 const StoreToLoadForwardingCandidate *&OtherCand = Iter->second;
274 // Already multiple stores forward to this load.
275 if (OtherCand == nullptr)
276 continue;
277
278 // Handle the very basic case when the two stores are in the same block
279 // so deciding which one forwards is easy. The later one forwards as
280 // long as they both have a dependence distance of one to the load.
281 if (Cand.Store->getParent() == OtherCand->Store->getParent() &&
282 Cand.isDependenceDistanceOfOne(PSE, L) &&
283 OtherCand->isDependenceDistanceOfOne(PSE, L)) {
284 // They are in the same block, the later one will forward to the load.
285 if (getInstrIndex(OtherCand->Store) < getInstrIndex(Cand.Store))
286 OtherCand = &Cand;
287 } else
288 OtherCand = nullptr;
289 }
290 }
291
292 Candidates.remove_if([&](const StoreToLoadForwardingCandidate &Cand) {
293 if (LoadToSingleCand[Cand.Load] != &Cand) {
294 LLVM_DEBUG(
295 dbgs() << "Removing from candidates: \n"
296 << Cand
297 << " The load may have multiple stores forwarding to "
298 << "it\n");
299 return true;
300 }
301 return false;
302 });
303 }
304
305 /// Given two pointers operations by their RuntimePointerChecking
306 /// indices, return true if they require an alias check.
307 ///
308 /// We need a check if one is a pointer for a candidate load and the other is
309 /// a pointer for a possibly intervening store.
310 bool needsChecking(unsigned PtrIdx1, unsigned PtrIdx2,
311 const SmallPtrSetImpl<Value *> &PtrsWrittenOnFwdingPath,
312 const SmallPtrSetImpl<Value *> &CandLoadPtrs) {
313 Value *Ptr1 =
315 Value *Ptr2 =
317 return ((PtrsWrittenOnFwdingPath.count(Ptr1) && CandLoadPtrs.count(Ptr2)) ||
318 (PtrsWrittenOnFwdingPath.count(Ptr2) && CandLoadPtrs.count(Ptr1)));
319 }
320
321 /// Return pointers that are possibly written to on the path from a
322 /// forwarding store to a load.
323 ///
324 /// These pointers need to be alias-checked against the forwarding candidates.
325 SmallPtrSet<Value *, 4> findPointersWrittenOnForwardingPath(
327 // From FirstStore to LastLoad neither of the elimination candidate loads
328 // should overlap with any of the stores.
329 //
330 // E.g.:
331 //
332 // st1 C[i]
333 // ld1 B[i] <-------,
334 // ld0 A[i] <----, | * LastLoad
335 // ... | |
336 // st2 E[i] | |
337 // st3 B[i+1] -- | -' * FirstStore
338 // st0 A[i+1] ---'
339 // st4 D[i]
340 //
341 // st0 forwards to ld0 if the accesses in st4 and st1 don't overlap with
342 // ld0.
343
344 LoadInst *LastLoad =
345 std::max_element(Candidates.begin(), Candidates.end(),
346 [&](const StoreToLoadForwardingCandidate &A,
347 const StoreToLoadForwardingCandidate &B) {
348 return getInstrIndex(A.Load) < getInstrIndex(B.Load);
349 })
350 ->Load;
351 StoreInst *FirstStore =
352 std::min_element(Candidates.begin(), Candidates.end(),
353 [&](const StoreToLoadForwardingCandidate &A,
354 const StoreToLoadForwardingCandidate &B) {
355 return getInstrIndex(A.Store) <
356 getInstrIndex(B.Store);
357 })
358 ->Store;
359
360 // We're looking for stores after the first forwarding store until the end
361 // of the loop, then from the beginning of the loop until the last
362 // forwarded-to load. Collect the pointer for the stores.
363 SmallPtrSet<Value *, 4> PtrsWrittenOnFwdingPath;
364
365 auto InsertStorePtr = [&](Instruction *I) {
366 if (auto *S = dyn_cast<StoreInst>(I))
367 PtrsWrittenOnFwdingPath.insert(S->getPointerOperand());
368 };
369 const auto &MemInstrs = LAI.getDepChecker().getMemoryInstructions();
370 std::for_each(MemInstrs.begin() + getInstrIndex(FirstStore) + 1,
371 MemInstrs.end(), InsertStorePtr);
372 std::for_each(MemInstrs.begin(), &MemInstrs[getInstrIndex(LastLoad)],
373 InsertStorePtr);
374
375 return PtrsWrittenOnFwdingPath;
376 }
377
378 /// Determine the pointer alias checks to prove that there are no
379 /// intervening stores.
382
383 SmallPtrSet<Value *, 4> PtrsWrittenOnFwdingPath =
384 findPointersWrittenOnForwardingPath(Candidates);
385
386 // Collect the pointers of the candidate loads.
387 SmallPtrSet<Value *, 4> CandLoadPtrs;
388 for (const auto &Candidate : Candidates)
389 CandLoadPtrs.insert(Candidate.getLoadPtr());
390
391 const auto &AllChecks = LAI.getRuntimePointerChecking()->getChecks();
393
394 copy_if(AllChecks, std::back_inserter(Checks),
395 [&](const RuntimePointerCheck &Check) {
396 for (auto PtrIdx1 : Check.first->Members)
397 for (auto PtrIdx2 : Check.second->Members)
398 if (needsChecking(PtrIdx1, PtrIdx2, PtrsWrittenOnFwdingPath,
399 CandLoadPtrs))
400 return true;
401 return false;
402 });
403
404 LLVM_DEBUG(dbgs() << "\nPointer Checks (count: " << Checks.size()
405 << "):\n");
407
408 return Checks;
409 }
410
411 /// Perform the transformation for a candidate.
412 void
413 propagateStoredValueToLoadUsers(const StoreToLoadForwardingCandidate &Cand,
414 SCEVExpander &SEE) {
415 // loop:
416 // %x = load %gep_i
417 // = ... %x
418 // store %y, %gep_i_plus_1
419 //
420 // =>
421 //
422 // ph:
423 // %x.initial = load %gep_0
424 // loop:
425 // %x.storeforward = phi [%x.initial, %ph] [%y, %loop]
426 // %x = load %gep_i <---- now dead
427 // = ... %x.storeforward
428 // store %y, %gep_i_plus_1
429
430 Value *Ptr = Cand.Load->getPointerOperand();
431 auto *PtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(Ptr));
432 auto *PH = L->getLoopPreheader();
433 assert(PH && "Preheader should exist!");
434 Value *InitialPtr = SEE.expandCodeFor(PtrSCEV->getStart(), Ptr->getType(),
435 PH->getTerminator());
436 Value *Initial = new LoadInst(
437 Cand.Load->getType(), InitialPtr, "load_initial",
438 /* isVolatile */ false, Cand.Load->getAlign(), PH->getTerminator());
439
440 PHINode *PHI = PHINode::Create(Initial->getType(), 2, "store_forwarded",
441 &L->getHeader()->front());
442 PHI->addIncoming(Initial, PH);
443
444 Type *LoadType = Initial->getType();
445 Type *StoreType = Cand.Store->getValueOperand()->getType();
446 auto &DL = Cand.Load->getParent()->getModule()->getDataLayout();
447 (void)DL;
448
449 assert(DL.getTypeSizeInBits(LoadType) == DL.getTypeSizeInBits(StoreType) &&
450 "The type sizes should match!");
451
452 Value *StoreValue = Cand.Store->getValueOperand();
453 if (LoadType != StoreType)
455 StoreValue, LoadType, "store_forward_cast", Cand.Store);
456
457 PHI->addIncoming(StoreValue, L->getLoopLatch());
458
459 Cand.Load->replaceAllUsesWith(PHI);
460 }
461
462 /// Top-level driver for each loop: find store->load forwarding
463 /// candidates, add run-time checks and perform transformation.
464 bool processLoop() {
465 LLVM_DEBUG(dbgs() << "\nIn \"" << L->getHeader()->getParent()->getName()
466 << "\" checking " << *L << "\n");
467
468 // Look for store-to-load forwarding cases across the
469 // backedge. E.g.:
470 //
471 // loop:
472 // %x = load %gep_i
473 // = ... %x
474 // store %y, %gep_i_plus_1
475 //
476 // =>
477 //
478 // ph:
479 // %x.initial = load %gep_0
480 // loop:
481 // %x.storeforward = phi [%x.initial, %ph] [%y, %loop]
482 // %x = load %gep_i <---- now dead
483 // = ... %x.storeforward
484 // store %y, %gep_i_plus_1
485
486 // First start with store->load dependences.
487 auto StoreToLoadDependences = findStoreToLoadDependences(LAI);
488 if (StoreToLoadDependences.empty())
489 return false;
490
491 // Generate an index for each load and store according to the original
492 // program order. This will be used later.
493 InstOrder = LAI.getDepChecker().generateInstructionOrderMap();
494
495 // To keep things simple for now, remove those where the load is potentially
496 // fed by multiple stores.
497 removeDependencesFromMultipleStores(StoreToLoadDependences);
498 if (StoreToLoadDependences.empty())
499 return false;
500
501 // Filter the candidates further.
503 for (const StoreToLoadForwardingCandidate &Cand : StoreToLoadDependences) {
504 LLVM_DEBUG(dbgs() << "Candidate " << Cand);
505
506 // Make sure that the stored values is available everywhere in the loop in
507 // the next iteration.
508 if (!doesStoreDominatesAllLatches(Cand.Store->getParent(), L, DT))
509 continue;
510
511 // If the load is conditional we can't hoist its 0-iteration instance to
512 // the preheader because that would make it unconditional. Thus we would
513 // access a memory location that the original loop did not access.
514 if (isLoadConditional(Cand.Load, L))
515 continue;
516
517 // Check whether the SCEV difference is the same as the induction step,
518 // thus we load the value in the next iteration.
519 if (!Cand.isDependenceDistanceOfOne(PSE, L))
520 continue;
521
522 assert(isa<SCEVAddRecExpr>(PSE.getSCEV(Cand.Load->getPointerOperand())) &&
523 "Loading from something other than indvar?");
524 assert(
525 isa<SCEVAddRecExpr>(PSE.getSCEV(Cand.Store->getPointerOperand())) &&
526 "Storing to something other than indvar?");
527
528 Candidates.push_back(Cand);
530 dbgs()
531 << Candidates.size()
532 << ". Valid store-to-load forwarding across the loop backedge\n");
533 }
534 if (Candidates.empty())
535 return false;
536
537 // Check intervening may-alias stores. These need runtime checks for alias
538 // disambiguation.
539 SmallVector<RuntimePointerCheck, 4> Checks = collectMemchecks(Candidates);
540
541 // Too many checks are likely to outweigh the benefits of forwarding.
542 if (Checks.size() > Candidates.size() * CheckPerElim) {
543 LLVM_DEBUG(dbgs() << "Too many run-time checks needed.\n");
544 return false;
545 }
546
547 if (LAI.getPSE().getPredicate().getComplexity() >
549 LLVM_DEBUG(dbgs() << "Too many SCEV run-time checks needed.\n");
550 return false;
551 }
552
553 if (!L->isLoopSimplifyForm()) {
554 LLVM_DEBUG(dbgs() << "Loop is not is loop-simplify form");
555 return false;
556 }
557
558 if (!Checks.empty() || !LAI.getPSE().getPredicate().isAlwaysTrue()) {
559 if (LAI.hasConvergentOp()) {
560 LLVM_DEBUG(dbgs() << "Versioning is needed but not allowed with "
561 "convergent calls\n");
562 return false;
563 }
564
565 auto *HeaderBB = L->getHeader();
566 auto *F = HeaderBB->getParent();
567 bool OptForSize = F->hasOptSize() ||
568 llvm::shouldOptimizeForSize(HeaderBB, PSI, BFI,
569 PGSOQueryType::IRPass);
570 if (OptForSize) {
572 dbgs() << "Versioning is needed but not allowed when optimizing "
573 "for size.\n");
574 return false;
575 }
576
577 // Point of no-return, start the transformation. First, version the loop
578 // if necessary.
579
580 LoopVersioning LV(LAI, Checks, L, LI, DT, PSE.getSE());
581 LV.versionLoop();
582
583 // After versioning, some of the candidates' pointers could stop being
584 // SCEVAddRecs. We need to filter them out.
585 auto NoLongerGoodCandidate = [this](
586 const StoreToLoadForwardingCandidate &Cand) {
587 return !isa<SCEVAddRecExpr>(
588 PSE.getSCEV(Cand.Load->getPointerOperand())) ||
589 !isa<SCEVAddRecExpr>(
590 PSE.getSCEV(Cand.Store->getPointerOperand()));
591 };
592 llvm::erase_if(Candidates, NoLongerGoodCandidate);
593 }
594
595 // Next, propagate the value stored by the store to the users of the load.
596 // Also for the first iteration, generate the initial value of the load.
597 SCEVExpander SEE(*PSE.getSE(), L->getHeader()->getModule()->getDataLayout(),
598 "storeforward");
599 for (const auto &Cand : Candidates)
600 propagateStoredValueToLoadUsers(Cand, SEE);
601 NumLoopLoadEliminted += Candidates.size();
602
603 return true;
604 }
605
606private:
607 Loop *L;
608
609 /// Maps the load/store instructions to their index according to
610 /// program order.
612
613 // Analyses used.
614 LoopInfo *LI;
615 const LoopAccessInfo &LAI;
616 DominatorTree *DT;
620};
621
622} // end anonymous namespace
623
625 DominatorTree &DT,
629 LoopAccessInfoManager &LAIs) {
630 // Build up a worklist of inner-loops to transform to avoid iterator
631 // invalidation.
632 // FIXME: This logic comes from other passes that actually change the loop
633 // nest structure. It isn't clear this is necessary (or useful) for a pass
634 // which merely optimizes the use of loads in a loop.
635 SmallVector<Loop *, 8> Worklist;
636
637 bool Changed = false;
638
639 for (Loop *TopLevelLoop : LI)
640 for (Loop *L : depth_first(TopLevelLoop)) {
641 Changed |= simplifyLoop(L, &DT, &LI, SE, AC, /*MSSAU*/ nullptr, false);
642 // We only handle inner-most loops.
643 if (L->isInnermost())
644 Worklist.push_back(L);
645 }
646
647 // Now walk the identified inner loops.
648 for (Loop *L : Worklist) {
649 // Match historical behavior
650 if (!L->isRotatedForm() || !L->getExitingBlock())
651 continue;
652 // The actual work is performed by LoadEliminationForLoop.
653 LoadEliminationForLoop LEL(L, &LI, LAIs.getInfo(*L), &DT, BFI, PSI);
654 Changed |= LEL.processLoop();
655 if (Changed)
656 LAIs.clear();
657 }
658 return Changed;
659}
660
663 auto &LI = AM.getResult<LoopAnalysis>(F);
664 // There are no loops in the function. Return before computing other expensive
665 // analyses.
666 if (LI.empty())
667 return PreservedAnalyses::all();
668 auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
669 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
670 auto &AC = AM.getResult<AssumptionAnalysis>(F);
671 auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
672 auto *PSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
673 auto *BFI = (PSI && PSI->hasProfileSummary()) ?
674 &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
676
677 bool Changed = eliminateLoadsAcrossLoops(F, LI, DT, BFI, PSI, &SE, &AC, LAIs);
678
679 if (!Changed)
680 return PreservedAnalyses::all();
681
683 return PA;
684}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
for(auto &MBB :MF)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
This is the interface for a simple mod/ref and alias analysis over globals.
#define Check(C,...)
Definition: Lint.cpp:170
This header provides classes for managing per-loop analyses.
static bool eliminateLoadsAcrossLoops(Function &F, LoopInfo &LI, DominatorTree &DT, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, ScalarEvolution *SE, AssumptionCache *AC, LoopAccessInfoManager &LAIs)
static cl::opt< unsigned > LoadElimSCEVCheckThreshold("loop-load-elimination-scev-check-threshold", cl::init(8), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed for Loop " "Load Elimination"))
static bool isLoadConditional(LoadInst *Load, Loop *L)
Return true if the load is not executed on all paths in the loop.
static bool doesStoreDominatesAllLatches(BasicBlock *StoreBlock, Loop *L, DominatorTree *DT)
Check if the store dominates all latches, so as long as there is no intervening store this value will...
static cl::opt< unsigned > CheckPerElim("runtime-check-per-loop-load-elim", cl::Hidden, cl::desc("Max number of memchecks allowed per eliminated load on average"), cl::init(1))
This header defines the LoopLoadEliminationPass object.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Module.h This file contains the declarations for the Module class.
if(VerifyEach)
This header defines various interfaces for pass management in LLVM.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This pass exposes codegen information to IR-level passes.
Class for arbitrary precision integers.
Definition: APInt.h:75
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:620
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
static CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
static bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
An instruction for reading from memory.
Definition: Instructions.h:177
This analysis provides dependence information for the memory accesses of a loop.
const LoopAccessInfo & getInfo(Loop &L)
Drive the analysis of memory accesses in the loop.
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
const RuntimePointerChecking * getRuntimePointerChecking() const
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
bool hasConvergentOp() const
Return true if there is a convergent operation in the loop.
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:1268
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
const SmallVectorImpl< Instruction * > & getMemoryInstructions() const
The vector of memory access instructions.
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
DenseMap< Instruction *, unsigned > generateInstructionOrderMap() const
Generate a mapping between the memory instructions and their indices according to program order.
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:1058
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
const SCEVPredicate & getPredicate() const
const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
void printChecks(raw_ostream &OS, const SmallVectorImpl< RuntimePointerCheck > &Checks, unsigned Depth=0) const
Print Checks.
const SmallVectorImpl< RuntimePointerCheck > & getChecks() const
Returns the checks that generateChecks created.
const PointerInfo & getPointerInfo(unsigned PtrIdx) const
Return PointerInfo for pointer at index PtrIdx.
This class uses information about analyze scalars to rewrite expressions in canonical form.
virtual unsigned getComplexity() const
Returns the estimated complexity of this predicate.
virtual bool isAlwaysTrue() const =0
Returns true if the predicate is always true.
Analysis pass that exposes the ScalarEvolution for a function.
The main scalar evolution driver.
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:344
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
An instruction for storing to memory.
Definition: Instructions.h:301
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
raw_ostream & indent(unsigned NumSpaces)
indent - Insert 'NumSpaces' spaces.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1819
std::pair< const RuntimeCheckingPtrGroup *, const RuntimeCheckingPtrGroup * > RuntimePointerCheck
A memcheck which made up of a pair of grouped pointers.
std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1865
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:292
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2113
iterator_range< df_iterator< T > > depth_first(const T &G)
Type * getLoadStoreType(Value *I)
A helper function that returns the type of a load or store instruction.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define SEE(c)
Definition: regcomp.c:254
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
TrackingVH< Value > PointerValue
Holds the pointer value that we need to check.