LLVM 22.0.0git
LoopLoadElimination.cpp
Go to the documentation of this file.
1//===- LoopLoadElimination.cpp - Loop Load Elimination Pass ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implement a loop-aware load elimination pass.
10//
11// It uses LoopAccessAnalysis to identify loop-carried dependences with a
12// distance of one between stores and loads. These form the candidates for the
13// transformation. The source value of each store then propagated to the user
14// of the corresponding load. This makes the load dead.
15//
16// The pass can also version the loop and add memchecks in order to prove that
17// may-aliasing stores can't change the value in memory before it's read by the
18// load.
19//
20//===----------------------------------------------------------------------===//
21
23#include "llvm/ADT/APInt.h"
24#include "llvm/ADT/DenseMap.h"
26#include "llvm/ADT/STLExtras.h"
29#include "llvm/ADT/Statistic.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/Dominators.h"
45#include "llvm/IR/PassManager.h"
46#include "llvm/IR/Type.h"
47#include "llvm/IR/Value.h"
50#include "llvm/Support/Debug.h"
56#include <algorithm>
57#include <cassert>
58#include <forward_list>
59#include <tuple>
60#include <utility>
61
62using namespace llvm;
63
64#define LLE_OPTION "loop-load-elim"
65#define DEBUG_TYPE LLE_OPTION
66
68 "runtime-check-per-loop-load-elim", cl::Hidden,
69 cl::desc("Max number of memchecks allowed per eliminated load on average"),
70 cl::init(1));
71
73 "loop-load-elimination-scev-check-threshold", cl::init(8), cl::Hidden,
74 cl::desc("The maximum number of SCEV checks allowed for Loop "
75 "Load Elimination"));
76
77STATISTIC(NumLoopLoadEliminted, "Number of loads eliminated by LLE");
78
79namespace {
80
81/// Represent a store-to-forwarding candidate.
82struct StoreToLoadForwardingCandidate {
83 LoadInst *Load;
84 StoreInst *Store;
85
86 StoreToLoadForwardingCandidate(LoadInst *Load, StoreInst *Store)
87 : Load(Load), Store(Store) {}
88
89 /// Return true if the dependence from the store to the load has an
90 /// absolute distance of one.
91 /// E.g. A[i+1] = A[i] (or A[i-1] = A[i] for descending loop)
92 bool isDependenceDistanceOfOne(PredicatedScalarEvolution &PSE, Loop *L,
93 const DominatorTree &DT) const {
94 Value *LoadPtr = Load->getPointerOperand();
95 Value *StorePtr = Store->getPointerOperand();
96 Type *LoadType = getLoadStoreType(Load);
97 auto &DL = Load->getDataLayout();
98
100 StorePtr->getType()->getPointerAddressSpace() &&
101 DL.getTypeSizeInBits(LoadType) ==
102 DL.getTypeSizeInBits(getLoadStoreType(Store)) &&
103 "Should be a known dependence");
104
105 int64_t StrideLoad =
106 getPtrStride(PSE, LoadType, LoadPtr, L, DT).value_or(0);
107 int64_t StrideStore =
108 getPtrStride(PSE, LoadType, StorePtr, L, DT).value_or(0);
109 if (!StrideLoad || !StrideStore || StrideLoad != StrideStore)
110 return false;
111
112 // TODO: This check for stride values other than 1 and -1 can be eliminated.
113 // However, doing so may cause the LoopAccessAnalysis to overcompensate,
114 // generating numerous non-wrap runtime checks that may undermine the
115 // benefits of load elimination. To safely implement support for non-unit
116 // strides, we would need to ensure either that the processed case does not
117 // require these additional checks, or improve the LAA to handle them more
118 // efficiently, or potentially both.
119 if (std::abs(StrideLoad) != 1)
120 return false;
121
122 unsigned TypeByteSize = DL.getTypeAllocSize(LoadType);
123
124 auto *LoadPtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(LoadPtr));
125 auto *StorePtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(StorePtr));
126
127 // We don't need to check non-wrapping here because forward/backward
128 // dependence wouldn't be valid if these weren't monotonic accesses.
129 auto *Dist = dyn_cast<SCEVConstant>(
130 PSE.getSE()->getMinusSCEV(StorePtrSCEV, LoadPtrSCEV));
131 if (!Dist)
132 return false;
133 const APInt &Val = Dist->getAPInt();
134 return Val == TypeByteSize * StrideLoad;
135 }
136
137 Value *getLoadPtr() const { return Load->getPointerOperand(); }
138
139#ifndef NDEBUG
140 friend raw_ostream &operator<<(raw_ostream &OS,
141 const StoreToLoadForwardingCandidate &Cand) {
142 OS << *Cand.Store << " -->\n";
143 OS.indent(2) << *Cand.Load << "\n";
144 return OS;
145 }
146#endif
147};
148
149} // end anonymous namespace
150
151/// Check if the store dominates all latches, so as long as there is no
152/// intervening store this value will be loaded in the next iteration.
153static bool doesStoreDominatesAllLatches(BasicBlock *StoreBlock, Loop *L,
154 DominatorTree *DT) {
156 L->getLoopLatches(Latches);
157 return llvm::all_of(Latches, [&](const BasicBlock *Latch) {
158 return DT->dominates(StoreBlock, Latch);
159 });
160}
161
162/// Return true if the load is not executed on all paths in the loop.
163static bool isLoadConditional(LoadInst *Load, Loop *L) {
164 return Load->getParent() != L->getHeader();
165}
166
167namespace {
168
169/// The per-loop class that does most of the work.
170class LoadEliminationForLoop {
171public:
172 LoadEliminationForLoop(Loop *L, LoopInfo *LI, const LoopAccessInfo &LAI,
173 DominatorTree *DT, BlockFrequencyInfo *BFI,
174 ProfileSummaryInfo* PSI)
175 : L(L), LI(LI), LAI(LAI), DT(DT), BFI(BFI), PSI(PSI), PSE(LAI.getPSE()) {}
176
177 /// Look through the loop-carried and loop-independent dependences in
178 /// this loop and find store->load dependences.
179 ///
180 /// Note that no candidate is returned if LAA has failed to analyze the loop
181 /// (e.g. if it's not bottom-tested, contains volatile memops, etc.)
182 std::forward_list<StoreToLoadForwardingCandidate>
183 findStoreToLoadDependences(const LoopAccessInfo &LAI) {
184 std::forward_list<StoreToLoadForwardingCandidate> Candidates;
185
186 const auto &DepChecker = LAI.getDepChecker();
187 const auto *Deps = DepChecker.getDependences();
188 if (!Deps)
189 return Candidates;
190
191 // Find store->load dependences (consequently true dep). Both lexically
192 // forward and backward dependences qualify. Disqualify loads that have
193 // other unknown dependences.
194
195 SmallPtrSet<Instruction *, 4> LoadsWithUnknownDependence;
196
197 for (const auto &Dep : *Deps) {
198 Instruction *Source = Dep.getSource(DepChecker);
199 Instruction *Destination = Dep.getDestination(DepChecker);
200
203 if (isa<LoadInst>(Source))
204 LoadsWithUnknownDependence.insert(Source);
205 if (isa<LoadInst>(Destination))
206 LoadsWithUnknownDependence.insert(Destination);
207 continue;
208 }
209
210 if (Dep.isBackward())
211 // Note that the designations source and destination follow the program
212 // order, i.e. source is always first. (The direction is given by the
213 // DepType.)
214 std::swap(Source, Destination);
215 else
216 assert(Dep.isForward() && "Needs to be a forward dependence");
217
218 auto *Store = dyn_cast<StoreInst>(Source);
219 if (!Store)
220 continue;
221 auto *Load = dyn_cast<LoadInst>(Destination);
222 if (!Load)
223 continue;
224
225 // Only propagate if the stored values are bit/pointer castable.
228 Store->getDataLayout()))
229 continue;
230
231 Candidates.emplace_front(Load, Store);
232 }
233
234 if (!LoadsWithUnknownDependence.empty())
235 Candidates.remove_if([&](const StoreToLoadForwardingCandidate &C) {
236 return LoadsWithUnknownDependence.count(C.Load);
237 });
238
239 return Candidates;
240 }
241
242 /// Return the index of the instruction according to program order.
243 unsigned getInstrIndex(Instruction *Inst) {
244 auto I = InstOrder.find(Inst);
245 assert(I != InstOrder.end() && "No index for instruction");
246 return I->second;
247 }
248
249 /// If a load has multiple candidates associated (i.e. different
250 /// stores), it means that it could be forwarding from multiple stores
251 /// depending on control flow. Remove these candidates.
252 ///
253 /// Here, we rely on LAA to include the relevant loop-independent dependences.
254 /// LAA is known to omit these in the very simple case when the read and the
255 /// write within an alias set always takes place using the *same* pointer.
256 ///
257 /// However, we know that this is not the case here, i.e. we can rely on LAA
258 /// to provide us with loop-independent dependences for the cases we're
259 /// interested. Consider the case for example where a loop-independent
260 /// dependece S1->S2 invalidates the forwarding S3->S2.
261 ///
262 /// A[i] = ... (S1)
263 /// ... = A[i] (S2)
264 /// A[i+1] = ... (S3)
265 ///
266 /// LAA will perform dependence analysis here because there are two
267 /// *different* pointers involved in the same alias set (&A[i] and &A[i+1]).
268 void removeDependencesFromMultipleStores(
269 std::forward_list<StoreToLoadForwardingCandidate> &Candidates) {
270 // If Store is nullptr it means that we have multiple stores forwarding to
271 // this store.
272 using LoadToSingleCandT =
273 DenseMap<LoadInst *, const StoreToLoadForwardingCandidate *>;
274 LoadToSingleCandT LoadToSingleCand;
275
276 for (const auto &Cand : Candidates) {
277 bool NewElt;
278 LoadToSingleCandT::iterator Iter;
279
280 std::tie(Iter, NewElt) =
281 LoadToSingleCand.insert(std::make_pair(Cand.Load, &Cand));
282 if (!NewElt) {
283 const StoreToLoadForwardingCandidate *&OtherCand = Iter->second;
284 // Already multiple stores forward to this load.
285 if (OtherCand == nullptr)
286 continue;
287
288 // Handle the very basic case when the two stores are in the same block
289 // so deciding which one forwards is easy. The later one forwards as
290 // long as they both have a dependence distance of one to the load.
291 if (Cand.Store->getParent() == OtherCand->Store->getParent() &&
292 Cand.isDependenceDistanceOfOne(PSE, L, *DT) &&
293 OtherCand->isDependenceDistanceOfOne(PSE, L, *DT)) {
294 // They are in the same block, the later one will forward to the load.
295 if (getInstrIndex(OtherCand->Store) < getInstrIndex(Cand.Store))
296 OtherCand = &Cand;
297 } else
298 OtherCand = nullptr;
299 }
300 }
301
302 Candidates.remove_if([&](const StoreToLoadForwardingCandidate &Cand) {
303 if (LoadToSingleCand[Cand.Load] != &Cand) {
305 dbgs() << "Removing from candidates: \n"
306 << Cand
307 << " The load may have multiple stores forwarding to "
308 << "it\n");
309 return true;
310 }
311 return false;
312 });
313 }
314
315 /// Given two pointers operations by their RuntimePointerChecking
316 /// indices, return true if they require an alias check.
317 ///
318 /// We need a check if one is a pointer for a candidate load and the other is
319 /// a pointer for a possibly intervening store.
320 bool needsChecking(unsigned PtrIdx1, unsigned PtrIdx2,
321 const SmallPtrSetImpl<Value *> &PtrsWrittenOnFwdingPath,
322 const SmallPtrSetImpl<Value *> &CandLoadPtrs) {
323 Value *Ptr1 =
324 LAI.getRuntimePointerChecking()->getPointerInfo(PtrIdx1).PointerValue;
325 Value *Ptr2 =
326 LAI.getRuntimePointerChecking()->getPointerInfo(PtrIdx2).PointerValue;
327 return ((PtrsWrittenOnFwdingPath.count(Ptr1) && CandLoadPtrs.count(Ptr2)) ||
328 (PtrsWrittenOnFwdingPath.count(Ptr2) && CandLoadPtrs.count(Ptr1)));
329 }
330
331 /// Return pointers that are possibly written to on the path from a
332 /// forwarding store to a load.
333 ///
334 /// These pointers need to be alias-checked against the forwarding candidates.
335 SmallPtrSet<Value *, 4> findPointersWrittenOnForwardingPath(
336 const SmallVectorImpl<StoreToLoadForwardingCandidate> &Candidates) {
337 // From FirstStore to LastLoad neither of the elimination candidate loads
338 // should overlap with any of the stores.
339 //
340 // E.g.:
341 //
342 // st1 C[i]
343 // ld1 B[i] <-------,
344 // ld0 A[i] <----, | * LastLoad
345 // ... | |
346 // st2 E[i] | |
347 // st3 B[i+1] -- | -' * FirstStore
348 // st0 A[i+1] ---'
349 // st4 D[i]
350 //
351 // st0 forwards to ld0 if the accesses in st4 and st1 don't overlap with
352 // ld0.
353
354 LoadInst *LastLoad =
355 llvm::max_element(Candidates,
356 [&](const StoreToLoadForwardingCandidate &A,
357 const StoreToLoadForwardingCandidate &B) {
358 return getInstrIndex(A.Load) <
359 getInstrIndex(B.Load);
360 })
361 ->Load;
362 StoreInst *FirstStore =
363 llvm::min_element(Candidates,
364 [&](const StoreToLoadForwardingCandidate &A,
365 const StoreToLoadForwardingCandidate &B) {
366 return getInstrIndex(A.Store) <
367 getInstrIndex(B.Store);
368 })
369 ->Store;
370
371 // We're looking for stores after the first forwarding store until the end
372 // of the loop, then from the beginning of the loop until the last
373 // forwarded-to load. Collect the pointer for the stores.
374 SmallPtrSet<Value *, 4> PtrsWrittenOnFwdingPath;
375
376 auto InsertStorePtr = [&](Instruction *I) {
377 if (auto *S = dyn_cast<StoreInst>(I))
378 PtrsWrittenOnFwdingPath.insert(S->getPointerOperand());
379 };
380 const auto &MemInstrs = LAI.getDepChecker().getMemoryInstructions();
381 std::for_each(MemInstrs.begin() + getInstrIndex(FirstStore) + 1,
382 MemInstrs.end(), InsertStorePtr);
383 std::for_each(MemInstrs.begin(), &MemInstrs[getInstrIndex(LastLoad)],
384 InsertStorePtr);
385
386 return PtrsWrittenOnFwdingPath;
387 }
388
389 /// Determine the pointer alias checks to prove that there are no
390 /// intervening stores.
391 SmallVector<RuntimePointerCheck, 4> collectMemchecks(
392 const SmallVectorImpl<StoreToLoadForwardingCandidate> &Candidates) {
393
394 SmallPtrSet<Value *, 4> PtrsWrittenOnFwdingPath =
395 findPointersWrittenOnForwardingPath(Candidates);
396
397 // Collect the pointers of the candidate loads.
398 SmallPtrSet<Value *, 4> CandLoadPtrs;
399 for (const auto &Candidate : Candidates)
400 CandLoadPtrs.insert(Candidate.getLoadPtr());
401
402 const auto &AllChecks = LAI.getRuntimePointerChecking()->getChecks();
403 SmallVector<RuntimePointerCheck, 4> Checks;
404
405 copy_if(AllChecks, std::back_inserter(Checks),
406 [&](const RuntimePointerCheck &Check) {
407 for (auto PtrIdx1 : Check.first->Members)
408 for (auto PtrIdx2 : Check.second->Members)
409 if (needsChecking(PtrIdx1, PtrIdx2, PtrsWrittenOnFwdingPath,
410 CandLoadPtrs))
411 return true;
412 return false;
413 });
414
415 LLVM_DEBUG(dbgs() << "\nPointer Checks (count: " << Checks.size()
416 << "):\n");
417 LLVM_DEBUG(LAI.getRuntimePointerChecking()->printChecks(dbgs(), Checks));
418
419 return Checks;
420 }
421
422 /// Perform the transformation for a candidate.
423 void
424 propagateStoredValueToLoadUsers(const StoreToLoadForwardingCandidate &Cand,
425 SCEVExpander &SEE) {
426 // loop:
427 // %x = load %gep_i
428 // = ... %x
429 // store %y, %gep_i_plus_1
430 //
431 // =>
432 //
433 // ph:
434 // %x.initial = load %gep_0
435 // loop:
436 // %x.storeforward = phi [%x.initial, %ph] [%y, %loop]
437 // %x = load %gep_i <---- now dead
438 // = ... %x.storeforward
439 // store %y, %gep_i_plus_1
440
441 Value *Ptr = Cand.Load->getPointerOperand();
442 auto *PtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(Ptr));
443 auto *PH = L->getLoopPreheader();
444 assert(PH && "Preheader should exist!");
445 Value *InitialPtr = SEE.expandCodeFor(PtrSCEV->getStart(), Ptr->getType(),
446 PH->getTerminator());
448 new LoadInst(Cand.Load->getType(), InitialPtr, "load_initial",
449 /* isVolatile */ false, Cand.Load->getAlign(),
450 PH->getTerminator()->getIterator());
451 // We don't give any debug location to Initial, because it is inserted
452 // into the loop's preheader. A debug location inside the loop will cause
453 // a misleading stepping when debugging. The test update-debugloc-store
454 // -forwarded.ll checks this.
455 Initial->setDebugLoc(DebugLoc::getDropped());
456
457 PHINode *PHI = PHINode::Create(Initial->getType(), 2, "store_forwarded");
458 PHI->insertBefore(L->getHeader()->begin());
459 PHI->addIncoming(Initial, PH);
460
461 Type *LoadType = Initial->getType();
462 Type *StoreType = Cand.Store->getValueOperand()->getType();
463 auto &DL = Cand.Load->getDataLayout();
464 (void)DL;
465
466 assert(DL.getTypeSizeInBits(LoadType) == DL.getTypeSizeInBits(StoreType) &&
467 "The type sizes should match!");
468
469 Value *StoreValue = Cand.Store->getValueOperand();
470 if (LoadType != StoreType) {
471 StoreValue = CastInst::CreateBitOrPointerCast(StoreValue, LoadType,
472 "store_forward_cast",
473 Cand.Store->getIterator());
474 // Because it casts the old `load` value and is used by the new `phi`
475 // which replaces the old `load`, we give the `load`'s debug location
476 // to it.
477 cast<Instruction>(StoreValue)->setDebugLoc(Cand.Load->getDebugLoc());
478 }
479
480 PHI->addIncoming(StoreValue, L->getLoopLatch());
481
482 Cand.Load->replaceAllUsesWith(PHI);
483 PHI->setDebugLoc(Cand.Load->getDebugLoc());
484 }
485
486 /// Top-level driver for each loop: find store->load forwarding
487 /// candidates, add run-time checks and perform transformation.
488 bool processLoop() {
489 LLVM_DEBUG(dbgs() << "\nIn \"" << L->getHeader()->getParent()->getName()
490 << "\" checking " << *L << "\n");
491
492 // Look for store-to-load forwarding cases across the
493 // backedge. E.g.:
494 //
495 // loop:
496 // %x = load %gep_i
497 // = ... %x
498 // store %y, %gep_i_plus_1
499 //
500 // =>
501 //
502 // ph:
503 // %x.initial = load %gep_0
504 // loop:
505 // %x.storeforward = phi [%x.initial, %ph] [%y, %loop]
506 // %x = load %gep_i <---- now dead
507 // = ... %x.storeforward
508 // store %y, %gep_i_plus_1
509
510 // First start with store->load dependences.
511 auto StoreToLoadDependences = findStoreToLoadDependences(LAI);
512 if (StoreToLoadDependences.empty())
513 return false;
514
515 // Generate an index for each load and store according to the original
516 // program order. This will be used later.
517 InstOrder = LAI.getDepChecker().generateInstructionOrderMap();
518
519 // To keep things simple for now, remove those where the load is potentially
520 // fed by multiple stores.
521 removeDependencesFromMultipleStores(StoreToLoadDependences);
522 if (StoreToLoadDependences.empty())
523 return false;
524
525 // Filter the candidates further.
527 for (const StoreToLoadForwardingCandidate &Cand : StoreToLoadDependences) {
528 LLVM_DEBUG(dbgs() << "Candidate " << Cand);
529
530 // Make sure that the stored values is available everywhere in the loop in
531 // the next iteration.
532 if (!doesStoreDominatesAllLatches(Cand.Store->getParent(), L, DT))
533 continue;
534
535 // If the load is conditional we can't hoist its 0-iteration instance to
536 // the preheader because that would make it unconditional. Thus we would
537 // access a memory location that the original loop did not access.
538 if (isLoadConditional(Cand.Load, L))
539 continue;
540
541 // Check whether the SCEV difference is the same as the induction step,
542 // thus we load the value in the next iteration.
543 if (!Cand.isDependenceDistanceOfOne(PSE, L, *DT))
544 continue;
545
546 assert(isa<SCEVAddRecExpr>(PSE.getSCEV(Cand.Load->getPointerOperand())) &&
547 "Loading from something other than indvar?");
548 assert(
549 isa<SCEVAddRecExpr>(PSE.getSCEV(Cand.Store->getPointerOperand())) &&
550 "Storing to something other than indvar?");
551
552 Candidates.push_back(Cand);
554 dbgs()
555 << Candidates.size()
556 << ". Valid store-to-load forwarding across the loop backedge\n");
557 }
558 if (Candidates.empty())
559 return false;
560
561 // Check intervening may-alias stores. These need runtime checks for alias
562 // disambiguation.
563 SmallVector<RuntimePointerCheck, 4> Checks = collectMemchecks(Candidates);
564
565 // Too many checks are likely to outweigh the benefits of forwarding.
566 if (Checks.size() > Candidates.size() * CheckPerElim) {
567 LLVM_DEBUG(dbgs() << "Too many run-time checks needed.\n");
568 return false;
569 }
570
571 if (LAI.getPSE().getPredicate().getComplexity() >
573 LLVM_DEBUG(dbgs() << "Too many SCEV run-time checks needed.\n");
574 return false;
575 }
576
577 if (!L->isLoopSimplifyForm()) {
578 LLVM_DEBUG(dbgs() << "Loop is not is loop-simplify form");
579 return false;
580 }
581
582 if (!Checks.empty() || !LAI.getPSE().getPredicate().isAlwaysTrue()) {
583 if (LAI.hasConvergentOp()) {
584 LLVM_DEBUG(dbgs() << "Versioning is needed but not allowed with "
585 "convergent calls\n");
586 return false;
587 }
588
589 auto *HeaderBB = L->getHeader();
590 if (llvm::shouldOptimizeForSize(HeaderBB, PSI, BFI,
591 PGSOQueryType::IRPass)) {
593 dbgs() << "Versioning is needed but not allowed when optimizing "
594 "for size.\n");
595 return false;
596 }
597
598 // Point of no-return, start the transformation. First, version the loop
599 // if necessary.
600
601 LoopVersioning LV(LAI, Checks, L, LI, DT, PSE.getSE());
602 LV.versionLoop();
603
604 // After versioning, some of the candidates' pointers could stop being
605 // SCEVAddRecs. We need to filter them out.
606 auto NoLongerGoodCandidate = [this](
607 const StoreToLoadForwardingCandidate &Cand) {
608 return !isa<SCEVAddRecExpr>(
609 PSE.getSCEV(Cand.Load->getPointerOperand())) ||
611 PSE.getSCEV(Cand.Store->getPointerOperand()));
612 };
613 llvm::erase_if(Candidates, NoLongerGoodCandidate);
614 }
615
616 // Next, propagate the value stored by the store to the users of the load.
617 // Also for the first iteration, generate the initial value of the load.
618 SCEVExpander SEE(*PSE.getSE(), L->getHeader()->getDataLayout(),
619 "storeforward");
620 for (const auto &Cand : Candidates)
621 propagateStoredValueToLoadUsers(Cand, SEE);
622 NumLoopLoadEliminted += Candidates.size();
623
624 return true;
625 }
626
627private:
628 Loop *L;
629
630 /// Maps the load/store instructions to their index according to
631 /// program order.
632 DenseMap<Instruction *, unsigned> InstOrder;
633
634 // Analyses used.
635 LoopInfo *LI;
636 const LoopAccessInfo &LAI;
637 DominatorTree *DT;
638 BlockFrequencyInfo *BFI;
639 ProfileSummaryInfo *PSI;
640 PredicatedScalarEvolution PSE;
641};
642
643} // end anonymous namespace
644
646 DominatorTree &DT,
650 LoopAccessInfoManager &LAIs) {
651 // Build up a worklist of inner-loops to transform to avoid iterator
652 // invalidation.
653 // FIXME: This logic comes from other passes that actually change the loop
654 // nest structure. It isn't clear this is necessary (or useful) for a pass
655 // which merely optimizes the use of loads in a loop.
656 SmallVector<Loop *, 8> Worklist;
657
658 bool Changed = false;
659
660 for (Loop *TopLevelLoop : LI)
661 for (Loop *L : depth_first(TopLevelLoop)) {
662 Changed |= simplifyLoop(L, &DT, &LI, SE, AC, /*MSSAU*/ nullptr, false);
663 // We only handle inner-most loops.
664 if (L->isInnermost())
665 Worklist.push_back(L);
666 }
667
668 // Now walk the identified inner loops.
669 for (Loop *L : Worklist) {
670 // Match historical behavior
671 if (!L->isRotatedForm() || !L->getExitingBlock())
672 continue;
673 // The actual work is performed by LoadEliminationForLoop.
674 LoadEliminationForLoop LEL(L, &LI, LAIs.getInfo(*L), &DT, BFI, PSI);
675 Changed |= LEL.processLoop();
676 if (Changed)
677 LAIs.clear();
678 }
679 return Changed;
680}
681
684 auto &LI = AM.getResult<LoopAnalysis>(F);
685 // There are no loops in the function. Return before computing other expensive
686 // analyses.
687 if (LI.empty())
688 return PreservedAnalyses::all();
689 auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
690 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
691 auto &AC = AM.getResult<AssumptionAnalysis>(F);
692 auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
693 auto *PSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
694 auto *BFI = (PSI && PSI->hasProfileSummary()) ?
695 &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
697
698 bool Changed = eliminateLoadsAcrossLoops(F, LI, DT, BFI, PSI, &SE, &AC, LAIs);
699
700 if (!Changed)
701 return PreservedAnalyses::all();
702
706 return PA;
707}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file defines the DenseMap class.
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
#define Check(C,...)
This is the interface for a simple mod/ref and alias analysis over globals.
This header defines various interfaces for pass management in LLVM.
This header provides classes for managing per-loop analyses.
static bool eliminateLoadsAcrossLoops(Function &F, LoopInfo &LI, DominatorTree &DT, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, ScalarEvolution *SE, AssumptionCache *AC, LoopAccessInfoManager &LAIs)
static cl::opt< unsigned > LoadElimSCEVCheckThreshold("loop-load-elimination-scev-check-threshold", cl::init(8), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed for Loop " "Load Elimination"))
static bool isLoadConditional(LoadInst *Load, Loop *L)
Return true if the load is not executed on all paths in the loop.
static bool doesStoreDominatesAllLatches(BasicBlock *StoreBlock, Loop *L, DominatorTree *DT)
Check if the store dominates all latches, so as long as there is no intervening store this value will...
static cl::opt< unsigned > CheckPerElim("runtime-check-per-loop-load-elim", cl::Hidden, cl::desc("Max number of memchecks allowed per eliminated load on average"), cl::init(1))
This header defines the LoopLoadEliminationPass object.
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
This pass exposes codegen information to IR-level passes.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
static LLVM_ABI bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
static LLVM_ABI CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
static DebugLoc getDropped()
Definition DebugLoc.h:164
Analysis pass which computes a DominatorTree.
Definition Dominators.h:284
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:165
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
An instruction for reading from memory.
Value * getPointerOperand()
Align getAlign() const
Return the alignment of the access that is being performed.
This analysis provides dependence information for the memory accesses of a loop.
LLVM_ABI const LoopAccessInfo & getInfo(Loop &L, bool AllowPartial=false)
Analysis pass that exposes the LoopInfo for a function.
Definition LoopInfo.h:569
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
Analysis pass that exposes the ScalarEvolution for a function.
The main scalar evolution driver.
LLVM_ABI const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Value * getValueOperand()
Value * getPointerOperand()
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:538
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
raw_ostream & indent(unsigned NumSpaces)
indent - Insert 'NumSpaces' spaces.
Changed
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
initializer< Ty > init(const Ty &Val)
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
auto min_element(R &&Range)
Provide wrappers to std::min_element which take ranges instead of having to pass begin/end explicitly...
Definition STLExtras.h:2020
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
std::pair< const RuntimeCheckingPtrGroup *, const RuntimeCheckingPtrGroup * > RuntimePointerCheck
A memcheck which made up of a pair of grouped pointers.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
LLVM_ABI bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1777
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
Definition STLExtras.h:2030
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2120
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
iterator_range< df_iterator< T > > depth_first(const T &G)
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define SEE(c)
Definition regcomp.c:248
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)