LLVM 17.0.0git
LICM.cpp
Go to the documentation of this file.
1//===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs loop invariant code motion, attempting to remove as much
10// code from the body of a loop as possible. It does this by either hoisting
11// code into the preheader block, or by sinking code to the exit blocks if it is
12// safe. This pass also promotes must-aliased memory locations in the loop to
13// live in registers, thus hoisting and sinking "invariant" loads and stores.
14//
15// Hoisting operations out of loops is a canonicalization transform. It
16// enables and simplifies subsequent optimizations in the middle-end.
17// Rematerialization of hoisted instructions to reduce register pressure is the
18// responsibility of the back-end, which has more accurate information about
19// register pressure and also handles other optimizations than LICM that
20// increase live-ranges.
21//
22// This pass uses alias analysis for two purposes:
23//
24// 1. Moving loop invariant loads and calls out of loops. If we can determine
25// that a load or call inside of a loop never aliases anything stored to,
26// we can hoist it or sink it like any other instruction.
27// 2. Scalar Promotion of Memory - If there is a store instruction inside of
28// the loop, we try to move the store to happen AFTER the loop instead of
29// inside of the loop. This can only happen if a few conditions are true:
30// A. The pointer stored through is loop invariant
31// B. There are no stores or loads in the loop which _may_ alias the
32// pointer. There are no calls in the loop which mod/ref the pointer.
33// If these conditions are true, we can promote the loads and stores in the
34// loop of the pointer to use a temporary alloca'd variable. We then use
35// the SSAUpdater to construct the appropriate SSA form for the value.
36//
37//===----------------------------------------------------------------------===//
38
42#include "llvm/ADT/Statistic.h"
49#include "llvm/Analysis/Loads.h"
62#include "llvm/IR/CFG.h"
63#include "llvm/IR/Constants.h"
64#include "llvm/IR/DataLayout.h"
67#include "llvm/IR/Dominators.h"
70#include "llvm/IR/IRBuilder.h"
71#include "llvm/IR/LLVMContext.h"
72#include "llvm/IR/Metadata.h"
77#include "llvm/Support/Debug.h"
86#include <algorithm>
87#include <utility>
88using namespace llvm;
89
90namespace llvm {
91class LPMUpdater;
92} // namespace llvm
93
94#define DEBUG_TYPE "licm"
95
96STATISTIC(NumCreatedBlocks, "Number of blocks created");
97STATISTIC(NumClonedBranches, "Number of branches cloned");
98STATISTIC(NumSunk, "Number of instructions sunk out of loop");
99STATISTIC(NumHoisted, "Number of instructions hoisted out of loop");
100STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk");
101STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk");
102STATISTIC(NumPromotionCandidates, "Number of promotion candidates");
103STATISTIC(NumLoadPromoted, "Number of load-only promotions");
104STATISTIC(NumLoadStorePromoted, "Number of load and store promotions");
105STATISTIC(NumMinMaxHoisted,
106 "Number of min/max expressions hoisted out of the loop");
107STATISTIC(NumGEPsHoisted,
108 "Number of geps reassociated and hoisted out of the loop");
109STATISTIC(NumAddSubHoisted, "Number of add/subtract expressions reassociated "
110 "and hoisted out of the loop");
111
112/// Memory promotion is enabled by default.
113static cl::opt<bool>
114 DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false),
115 cl::desc("Disable memory promotion in LICM pass"));
116
118 "licm-control-flow-hoisting", cl::Hidden, cl::init(false),
119 cl::desc("Enable control flow (and PHI) hoisting in LICM"));
120
121static cl::opt<bool>
122 SingleThread("licm-force-thread-model-single", cl::Hidden, cl::init(false),
123 cl::desc("Force thread model single in LICM pass"));
124
126 "licm-max-num-uses-traversed", cl::Hidden, cl::init(8),
127 cl::desc("Max num uses visited for identifying load "
128 "invariance in loop using invariant start (default = 8)"));
129
130// Experimental option to allow imprecision in LICM in pathological cases, in
131// exchange for faster compile. This is to be removed if MemorySSA starts to
132// address the same issue. LICM calls MemorySSAWalker's
133// getClobberingMemoryAccess, up to the value of the Cap, getting perfect
134// accuracy. Afterwards, LICM will call into MemorySSA's getDefiningAccess,
135// which may not be precise, since optimizeUses is capped. The result is
136// correct, but we may not get as "far up" as possible to get which access is
137// clobbering the one queried.
139 "licm-mssa-optimization-cap", cl::init(100), cl::Hidden,
140 cl::desc("Enable imprecision in LICM in pathological cases, in exchange "
141 "for faster compile. Caps the MemorySSA clobbering calls."));
142
143// Experimentally, memory promotion carries less importance than sinking and
144// hoisting. Limit when we do promotion when using MemorySSA, in order to save
145// compile time.
147 "licm-mssa-max-acc-promotion", cl::init(250), cl::Hidden,
148 cl::desc("[LICM & MemorySSA] When MSSA in LICM is disabled, this has no "
149 "effect. When MSSA in LICM is enabled, then this is the maximum "
150 "number of accesses allowed to be present in a loop in order to "
151 "enable memory promotion."));
152
153static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
154static bool isNotUsedOrFoldableInLoop(const Instruction &I, const Loop *CurLoop,
155 const LoopSafetyInfo *SafetyInfo,
157 bool &FoldableInLoop, bool LoopNestMode);
158static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
159 BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
162static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
163 const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
166 Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI,
167 const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
168 OptimizationRemarkEmitter *ORE, const Instruction *CtxI,
169 AssumptionCache *AC, bool AllowSpeculation);
170static bool pointerInvalidatedByLoop(MemorySSA *MSSA, MemoryUse *MU,
171 Loop *CurLoop, Instruction &I,
173 bool InvariantGroup);
174static bool pointerInvalidatedByBlock(BasicBlock &BB, MemorySSA &MSSA,
175 MemoryUse &MU);
176/// Aggregates various functions for hoisting computations out of loop.
177static bool hoistArithmetics(Instruction &I, Loop &L,
178 ICFLoopSafetyInfo &SafetyInfo,
180 DominatorTree *DT);
182 Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
183 const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU);
184
185static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
186 MemorySSAUpdater &MSSAU);
187
189 ICFLoopSafetyInfo &SafetyInfo,
191
192static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L,
193 function_ref<void(Instruction *)> Fn);
195 std::pair<SmallSetVector<Value *, 8>, bool>;
198
199namespace {
200struct LoopInvariantCodeMotion {
201 bool runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI, DominatorTree *DT,
204 OptimizationRemarkEmitter *ORE, bool LoopNestMode = false);
205
206 LoopInvariantCodeMotion(unsigned LicmMssaOptCap,
207 unsigned LicmMssaNoAccForPromotionCap,
208 bool LicmAllowSpeculation)
209 : LicmMssaOptCap(LicmMssaOptCap),
210 LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap),
211 LicmAllowSpeculation(LicmAllowSpeculation) {}
212
213private:
214 unsigned LicmMssaOptCap;
215 unsigned LicmMssaNoAccForPromotionCap;
216 bool LicmAllowSpeculation;
217};
218
219struct LegacyLICMPass : public LoopPass {
220 static char ID; // Pass identification, replacement for typeid
221 LegacyLICMPass(
222 unsigned LicmMssaOptCap = SetLicmMssaOptCap,
223 unsigned LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap,
224 bool LicmAllowSpeculation = true)
225 : LoopPass(ID), LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
226 LicmAllowSpeculation) {
228 }
229
230 bool runOnLoop(Loop *L, LPPassManager &LPM) override {
231 if (skipLoop(L))
232 return false;
233
234 LLVM_DEBUG(dbgs() << "Perform LICM on Loop with header at block "
235 << L->getHeader()->getNameOrAsOperand() << "\n");
236
237 Function *F = L->getHeader()->getParent();
238
239 auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
240 MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
241 // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
242 // pass. Function analyses need to be preserved across loop transformations
243 // but ORE cannot be preserved (see comment before the pass definition).
244 OptimizationRemarkEmitter ORE(L->getHeader()->getParent());
245 return LICM.runOnLoop(
246 L, &getAnalysis<AAResultsWrapperPass>().getAAResults(),
247 &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
248 &getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
249 &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(*F),
250 &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(*F),
251 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*F),
252 SE ? &SE->getSE() : nullptr, MSSA, &ORE);
253 }
254
255 /// This transformation requires natural loop information & requires that
256 /// loop preheaders be inserted into the CFG...
257 ///
258 void getAnalysisUsage(AnalysisUsage &AU) const override {
270 }
271
272private:
273 LoopInvariantCodeMotion LICM;
274};
275} // namespace
276
279 if (!AR.MSSA)
280 report_fatal_error("LICM requires MemorySSA (loop-mssa)",
281 /*GenCrashDiag*/false);
282
283 // For the new PM, we also can't use OptimizationRemarkEmitter as an analysis
284 // pass. Function analyses need to be preserved across loop transformations
285 // but ORE cannot be preserved (see comment before the pass definition).
286 OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
287
288 LoopInvariantCodeMotion LICM(Opts.MssaOptCap, Opts.MssaNoAccForPromotionCap,
289 Opts.AllowSpeculation);
290 if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.AC, &AR.TLI, &AR.TTI,
291 &AR.SE, AR.MSSA, &ORE))
292 return PreservedAnalyses::all();
293
295 PA.preserve<MemorySSAAnalysis>();
296
297 return PA;
298}
299
301 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
302 static_cast<PassInfoMixin<LICMPass> *>(this)->printPipeline(
303 OS, MapClassName2PassName);
304
305 OS << '<';
306 OS << (Opts.AllowSpeculation ? "" : "no-") << "allowspeculation";
307 OS << '>';
308}
309
312 LPMUpdater &) {
313 if (!AR.MSSA)
314 report_fatal_error("LNICM requires MemorySSA (loop-mssa)",
315 /*GenCrashDiag*/false);
316
317 // For the new PM, we also can't use OptimizationRemarkEmitter as an analysis
318 // pass. Function analyses need to be preserved across loop transformations
319 // but ORE cannot be preserved (see comment before the pass definition).
321
322 LoopInvariantCodeMotion LICM(Opts.MssaOptCap, Opts.MssaNoAccForPromotionCap,
323 Opts.AllowSpeculation);
324
325 Loop &OutermostLoop = LN.getOutermostLoop();
326 bool Changed = LICM.runOnLoop(&OutermostLoop, &AR.AA, &AR.LI, &AR.DT, &AR.AC,
327 &AR.TLI, &AR.TTI, &AR.SE, AR.MSSA, &ORE, true);
328
329 if (!Changed)
330 return PreservedAnalyses::all();
331
333
334 PA.preserve<DominatorTreeAnalysis>();
335 PA.preserve<LoopAnalysis>();
336 PA.preserve<MemorySSAAnalysis>();
337
338 return PA;
339}
340
342 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
343 static_cast<PassInfoMixin<LNICMPass> *>(this)->printPipeline(
344 OS, MapClassName2PassName);
345
346 OS << '<';
347 OS << (Opts.AllowSpeculation ? "" : "no-") << "allowspeculation";
348 OS << '>';
349}
350
351char LegacyLICMPass::ID = 0;
352INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion",
353 false, false)
359INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false,
360 false)
361
362Pass *llvm::createLICMPass() { return new LegacyLICMPass(); }
363Pass *llvm::createLICMPass(unsigned LicmMssaOptCap,
364 unsigned LicmMssaNoAccForPromotionCap,
365 bool LicmAllowSpeculation) {
366 return new LegacyLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
367 LicmAllowSpeculation);
368}
369
371 MemorySSA &MSSA)
373 IsSink, L, MSSA) {}
374
376 unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap, bool IsSink,
377 Loop &L, MemorySSA &MSSA)
378 : LicmMssaOptCap(LicmMssaOptCap),
379 LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap),
380 IsSink(IsSink) {
381 unsigned AccessCapCount = 0;
382 for (auto *BB : L.getBlocks())
383 if (const auto *Accesses = MSSA.getBlockAccesses(BB))
384 for (const auto &MA : *Accesses) {
385 (void)MA;
386 ++AccessCapCount;
387 if (AccessCapCount > LicmMssaNoAccForPromotionCap) {
388 NoOfMemAccTooLarge = true;
389 return;
390 }
391 }
392}
393
394/// Hoist expressions out of the specified loop. Note, alias info for inner
395/// loop is not preserved so it is not a good idea to run LICM multiple
396/// times on one loop.
397bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI,
401 ScalarEvolution *SE, MemorySSA *MSSA,
403 bool LoopNestMode) {
404 bool Changed = false;
405
406 assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
407
408 // If this loop has metadata indicating that LICM is not to be performed then
409 // just exit.
411 return false;
412 }
413
414 // Don't sink stores from loops with coroutine suspend instructions.
415 // LICM would sink instructions into the default destination of
416 // the coroutine switch. The default destination of the switch is to
417 // handle the case where the coroutine is suspended, by which point the
418 // coroutine frame may have been destroyed. No instruction can be sunk there.
419 // FIXME: This would unfortunately hurt the performance of coroutines, however
420 // there is currently no general solution for this. Similar issues could also
421 // potentially happen in other passes where instructions are being moved
422 // across that edge.
423 bool HasCoroSuspendInst = llvm::any_of(L->getBlocks(), [](BasicBlock *BB) {
424 return llvm::any_of(*BB, [](Instruction &I) {
425 IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I);
426 return II && II->getIntrinsicID() == Intrinsic::coro_suspend;
427 });
428 });
429
430 MemorySSAUpdater MSSAU(MSSA);
431 SinkAndHoistLICMFlags Flags(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
432 /*IsSink=*/true, *L, *MSSA);
433
434 // Get the preheader block to move instructions into...
435 BasicBlock *Preheader = L->getLoopPreheader();
436
437 // Compute loop safety information.
438 ICFLoopSafetyInfo SafetyInfo;
439 SafetyInfo.computeLoopSafetyInfo(L);
440
441 // We want to visit all of the instructions in this loop... that are not parts
442 // of our subloops (they have already had their invariants hoisted out of
443 // their loop, into this loop, so there is no need to process the BODIES of
444 // the subloops).
445 //
446 // Traverse the body of the loop in depth first order on the dominator tree so
447 // that we are guaranteed to see definitions before we see uses. This allows
448 // us to sink instructions in one pass, without iteration. After sinking
449 // instructions, we perform another pass to hoist them out of the loop.
450 if (L->hasDedicatedExits())
451 Changed |=
452 LoopNestMode
453 ? sinkRegionForLoopNest(DT->getNode(L->getHeader()), AA, LI, DT,
454 TLI, TTI, L, MSSAU, &SafetyInfo, Flags, ORE)
455 : sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
456 MSSAU, &SafetyInfo, Flags, ORE);
457 Flags.setIsSink(false);
458 if (Preheader)
459 Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, AC, TLI, L,
460 MSSAU, SE, &SafetyInfo, Flags, ORE, LoopNestMode,
461 LicmAllowSpeculation);
462
463 // Now that all loop invariants have been removed from the loop, promote any
464 // memory references to scalars that we can.
465 // Don't sink stores from loops without dedicated block exits. Exits
466 // containing indirect branches are not transformed by loop simplify,
467 // make sure we catch that. An additional load may be generated in the
468 // preheader for SSA updater, so also avoid sinking when no preheader
469 // is available.
470 if (!DisablePromotion && Preheader && L->hasDedicatedExits() &&
471 !Flags.tooManyMemoryAccesses() && !HasCoroSuspendInst) {
472 // Figure out the loop exits and their insertion points
474 L->getUniqueExitBlocks(ExitBlocks);
475
476 // We can't insert into a catchswitch.
477 bool HasCatchSwitch = llvm::any_of(ExitBlocks, [](BasicBlock *Exit) {
478 return isa<CatchSwitchInst>(Exit->getTerminator());
479 });
480
481 if (!HasCatchSwitch) {
483 SmallVector<MemoryAccess *, 8> MSSAInsertPts;
484 InsertPts.reserve(ExitBlocks.size());
485 MSSAInsertPts.reserve(ExitBlocks.size());
486 for (BasicBlock *ExitBlock : ExitBlocks) {
487 InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
488 MSSAInsertPts.push_back(nullptr);
489 }
490
492
493 // Promoting one set of accesses may make the pointers for another set
494 // loop invariant, so run this in a loop.
495 bool Promoted = false;
496 bool LocalPromoted;
497 do {
498 LocalPromoted = false;
499 for (auto [PointerMustAliases, HasReadsOutsideSet] :
500 collectPromotionCandidates(MSSA, AA, L)) {
501 LocalPromoted |= promoteLoopAccessesToScalars(
502 PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI,
503 DT, AC, TLI, TTI, L, MSSAU, &SafetyInfo, ORE,
504 LicmAllowSpeculation, HasReadsOutsideSet);
505 }
506 Promoted |= LocalPromoted;
507 } while (LocalPromoted);
508
509 // Once we have promoted values across the loop body we have to
510 // recursively reform LCSSA as any nested loop may now have values defined
511 // within the loop used in the outer loop.
512 // FIXME: This is really heavy handed. It would be a bit better to use an
513 // SSAUpdater strategy during promotion that was LCSSA aware and reformed
514 // it as it went.
515 if (Promoted)
516 formLCSSARecursively(*L, *DT, LI);
517
518 Changed |= Promoted;
519 }
520 }
521
522 // Check that neither this loop nor its parent have had LCSSA broken. LICM is
523 // specifically moving instructions across the loop boundary and so it is
524 // especially in need of basic functional correctness checking here.
525 assert(L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!");
526 assert((L->isOutermost() || L->getParentLoop()->isLCSSAForm(*DT)) &&
527 "Parent loop not left in LCSSA form after LICM!");
528
529 if (VerifyMemorySSA)
530 MSSA->verifyMemorySSA();
531
532 if (Changed && SE)
534 return Changed;
535}
536
537/// Walk the specified region of the CFG (defined by all blocks dominated by
538/// the specified block, and that are in the current loop) in reverse depth
539/// first order w.r.t the DominatorTree. This allows us to visit uses before
540/// definitions, allowing us to sink a loop body in one pass without iteration.
541///
544 TargetTransformInfo *TTI, Loop *CurLoop,
545 MemorySSAUpdater &MSSAU, ICFLoopSafetyInfo *SafetyInfo,
547 OptimizationRemarkEmitter *ORE, Loop *OutermostLoop) {
548
549 // Verify inputs.
550 assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
551 CurLoop != nullptr && SafetyInfo != nullptr &&
552 "Unexpected input to sinkRegion.");
553
554 // We want to visit children before parents. We will enqueue all the parents
555 // before their children in the worklist and process the worklist in reverse
556 // order.
558
559 bool Changed = false;
560 for (DomTreeNode *DTN : reverse(Worklist)) {
561 BasicBlock *BB = DTN->getBlock();
562 // Only need to process the contents of this block if it is not part of a
563 // subloop (which would already have been processed).
564 if (inSubLoop(BB, CurLoop, LI))
565 continue;
566
567 for (BasicBlock::iterator II = BB->end(); II != BB->begin();) {
568 Instruction &I = *--II;
569
570 // The instruction is not used in the loop if it is dead. In this case,
571 // we just delete it instead of sinking it.
572 if (isInstructionTriviallyDead(&I, TLI)) {
573 LLVM_DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n');
576 ++II;
577 eraseInstruction(I, *SafetyInfo, MSSAU);
578 Changed = true;
579 continue;
580 }
581
582 // Check to see if we can sink this instruction to the exit blocks
583 // of the loop. We can do this if the all users of the instruction are
584 // outside of the loop. In this case, it doesn't even matter if the
585 // operands of the instruction are loop invariant.
586 //
587 bool FoldableInLoop = false;
588 bool LoopNestMode = OutermostLoop != nullptr;
589 if (!I.mayHaveSideEffects() &&
590 isNotUsedOrFoldableInLoop(I, LoopNestMode ? OutermostLoop : CurLoop,
591 SafetyInfo, TTI, FoldableInLoop,
592 LoopNestMode) &&
593 canSinkOrHoistInst(I, AA, DT, CurLoop, MSSAU, true, Flags, ORE)) {
594 if (sink(I, LI, DT, CurLoop, SafetyInfo, MSSAU, ORE)) {
595 if (!FoldableInLoop) {
596 ++II;
598 eraseInstruction(I, *SafetyInfo, MSSAU);
599 }
600 Changed = true;
601 }
602 }
603 }
604 }
605 if (VerifyMemorySSA)
606 MSSAU.getMemorySSA()->verifyMemorySSA();
607 return Changed;
608}
609
612 TargetTransformInfo *TTI, Loop *CurLoop,
613 MemorySSAUpdater &MSSAU,
614 ICFLoopSafetyInfo *SafetyInfo,
617
618 bool Changed = false;
620 Worklist.insert(CurLoop);
621 appendLoopsToWorklist(*CurLoop, Worklist);
622 while (!Worklist.empty()) {
623 Loop *L = Worklist.pop_back_val();
624 Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
625 MSSAU, SafetyInfo, Flags, ORE, CurLoop);
626 }
627 return Changed;
628}
629
630namespace {
631// This is a helper class for hoistRegion to make it able to hoist control flow
632// in order to be able to hoist phis. The way this works is that we initially
633// start hoisting to the loop preheader, and when we see a loop invariant branch
634// we make note of this. When we then come to hoist an instruction that's
635// conditional on such a branch we duplicate the branch and the relevant control
636// flow, then hoist the instruction into the block corresponding to its original
637// block in the duplicated control flow.
638class ControlFlowHoister {
639private:
640 // Information about the loop we are hoisting from
641 LoopInfo *LI;
642 DominatorTree *DT;
643 Loop *CurLoop;
644 MemorySSAUpdater &MSSAU;
645
646 // A map of blocks in the loop to the block their instructions will be hoisted
647 // to.
648 DenseMap<BasicBlock *, BasicBlock *> HoistDestinationMap;
649
650 // The branches that we can hoist, mapped to the block that marks a
651 // convergence point of their control flow.
652 DenseMap<BranchInst *, BasicBlock *> HoistableBranches;
653
654public:
655 ControlFlowHoister(LoopInfo *LI, DominatorTree *DT, Loop *CurLoop,
656 MemorySSAUpdater &MSSAU)
657 : LI(LI), DT(DT), CurLoop(CurLoop), MSSAU(MSSAU) {}
658
659 void registerPossiblyHoistableBranch(BranchInst *BI) {
660 // We can only hoist conditional branches with loop invariant operands.
661 if (!ControlFlowHoisting || !BI->isConditional() ||
662 !CurLoop->hasLoopInvariantOperands(BI))
663 return;
664
665 // The branch destinations need to be in the loop, and we don't gain
666 // anything by duplicating conditional branches with duplicate successors,
667 // as it's essentially the same as an unconditional branch.
668 BasicBlock *TrueDest = BI->getSuccessor(0);
669 BasicBlock *FalseDest = BI->getSuccessor(1);
670 if (!CurLoop->contains(TrueDest) || !CurLoop->contains(FalseDest) ||
671 TrueDest == FalseDest)
672 return;
673
674 // We can hoist BI if one branch destination is the successor of the other,
675 // or both have common successor which we check by seeing if the
676 // intersection of their successors is non-empty.
677 // TODO: This could be expanded to allowing branches where both ends
678 // eventually converge to a single block.
679 SmallPtrSet<BasicBlock *, 4> TrueDestSucc, FalseDestSucc;
680 TrueDestSucc.insert(succ_begin(TrueDest), succ_end(TrueDest));
681 FalseDestSucc.insert(succ_begin(FalseDest), succ_end(FalseDest));
682 BasicBlock *CommonSucc = nullptr;
683 if (TrueDestSucc.count(FalseDest)) {
684 CommonSucc = FalseDest;
685 } else if (FalseDestSucc.count(TrueDest)) {
686 CommonSucc = TrueDest;
687 } else {
688 set_intersect(TrueDestSucc, FalseDestSucc);
689 // If there's one common successor use that.
690 if (TrueDestSucc.size() == 1)
691 CommonSucc = *TrueDestSucc.begin();
692 // If there's more than one pick whichever appears first in the block list
693 // (we can't use the value returned by TrueDestSucc.begin() as it's
694 // unpredicatable which element gets returned).
695 else if (!TrueDestSucc.empty()) {
696 Function *F = TrueDest->getParent();
697 auto IsSucc = [&](BasicBlock &BB) { return TrueDestSucc.count(&BB); };
698 auto It = llvm::find_if(*F, IsSucc);
699 assert(It != F->end() && "Could not find successor in function");
700 CommonSucc = &*It;
701 }
702 }
703 // The common successor has to be dominated by the branch, as otherwise
704 // there will be some other path to the successor that will not be
705 // controlled by this branch so any phi we hoist would be controlled by the
706 // wrong condition. This also takes care of avoiding hoisting of loop back
707 // edges.
708 // TODO: In some cases this could be relaxed if the successor is dominated
709 // by another block that's been hoisted and we can guarantee that the
710 // control flow has been replicated exactly.
711 if (CommonSucc && DT->dominates(BI, CommonSucc))
712 HoistableBranches[BI] = CommonSucc;
713 }
714
715 bool canHoistPHI(PHINode *PN) {
716 // The phi must have loop invariant operands.
717 if (!ControlFlowHoisting || !CurLoop->hasLoopInvariantOperands(PN))
718 return false;
719 // We can hoist phis if the block they are in is the target of hoistable
720 // branches which cover all of the predecessors of the block.
721 SmallPtrSet<BasicBlock *, 8> PredecessorBlocks;
722 BasicBlock *BB = PN->getParent();
723 for (BasicBlock *PredBB : predecessors(BB))
724 PredecessorBlocks.insert(PredBB);
725 // If we have less predecessor blocks than predecessors then the phi will
726 // have more than one incoming value for the same block which we can't
727 // handle.
728 // TODO: This could be handled be erasing some of the duplicate incoming
729 // values.
730 if (PredecessorBlocks.size() != pred_size(BB))
731 return false;
732 for (auto &Pair : HoistableBranches) {
733 if (Pair.second == BB) {
734 // Which blocks are predecessors via this branch depends on if the
735 // branch is triangle-like or diamond-like.
736 if (Pair.first->getSuccessor(0) == BB) {
737 PredecessorBlocks.erase(Pair.first->getParent());
738 PredecessorBlocks.erase(Pair.first->getSuccessor(1));
739 } else if (Pair.first->getSuccessor(1) == BB) {
740 PredecessorBlocks.erase(Pair.first->getParent());
741 PredecessorBlocks.erase(Pair.first->getSuccessor(0));
742 } else {
743 PredecessorBlocks.erase(Pair.first->getSuccessor(0));
744 PredecessorBlocks.erase(Pair.first->getSuccessor(1));
745 }
746 }
747 }
748 // PredecessorBlocks will now be empty if for every predecessor of BB we
749 // found a hoistable branch source.
750 return PredecessorBlocks.empty();
751 }
752
753 BasicBlock *getOrCreateHoistedBlock(BasicBlock *BB) {
755 return CurLoop->getLoopPreheader();
756 // If BB has already been hoisted, return that
757 if (HoistDestinationMap.count(BB))
758 return HoistDestinationMap[BB];
759
760 // Check if this block is conditional based on a pending branch
761 auto HasBBAsSuccessor =
763 return BB != Pair.second && (Pair.first->getSuccessor(0) == BB ||
764 Pair.first->getSuccessor(1) == BB);
765 };
766 auto It = llvm::find_if(HoistableBranches, HasBBAsSuccessor);
767
768 // If not involved in a pending branch, hoist to preheader
769 BasicBlock *InitialPreheader = CurLoop->getLoopPreheader();
770 if (It == HoistableBranches.end()) {
771 LLVM_DEBUG(dbgs() << "LICM using "
772 << InitialPreheader->getNameOrAsOperand()
773 << " as hoist destination for "
774 << BB->getNameOrAsOperand() << "\n");
775 HoistDestinationMap[BB] = InitialPreheader;
776 return InitialPreheader;
777 }
778 BranchInst *BI = It->first;
779 assert(std::find_if(++It, HoistableBranches.end(), HasBBAsSuccessor) ==
780 HoistableBranches.end() &&
781 "BB is expected to be the target of at most one branch");
782
783 LLVMContext &C = BB->getContext();
784 BasicBlock *TrueDest = BI->getSuccessor(0);
785 BasicBlock *FalseDest = BI->getSuccessor(1);
786 BasicBlock *CommonSucc = HoistableBranches[BI];
787 BasicBlock *HoistTarget = getOrCreateHoistedBlock(BI->getParent());
788
789 // Create hoisted versions of blocks that currently don't have them
790 auto CreateHoistedBlock = [&](BasicBlock *Orig) {
791 if (HoistDestinationMap.count(Orig))
792 return HoistDestinationMap[Orig];
793 BasicBlock *New =
794 BasicBlock::Create(C, Orig->getName() + ".licm", Orig->getParent());
795 HoistDestinationMap[Orig] = New;
796 DT->addNewBlock(New, HoistTarget);
797 if (CurLoop->getParentLoop())
798 CurLoop->getParentLoop()->addBasicBlockToLoop(New, *LI);
799 ++NumCreatedBlocks;
800 LLVM_DEBUG(dbgs() << "LICM created " << New->getName()
801 << " as hoist destination for " << Orig->getName()
802 << "\n");
803 return New;
804 };
805 BasicBlock *HoistTrueDest = CreateHoistedBlock(TrueDest);
806 BasicBlock *HoistFalseDest = CreateHoistedBlock(FalseDest);
807 BasicBlock *HoistCommonSucc = CreateHoistedBlock(CommonSucc);
808
809 // Link up these blocks with branches.
810 if (!HoistCommonSucc->getTerminator()) {
811 // The new common successor we've generated will branch to whatever that
812 // hoist target branched to.
813 BasicBlock *TargetSucc = HoistTarget->getSingleSuccessor();
814 assert(TargetSucc && "Expected hoist target to have a single successor");
815 HoistCommonSucc->moveBefore(TargetSucc);
816 BranchInst::Create(TargetSucc, HoistCommonSucc);
817 }
818 if (!HoistTrueDest->getTerminator()) {
819 HoistTrueDest->moveBefore(HoistCommonSucc);
820 BranchInst::Create(HoistCommonSucc, HoistTrueDest);
821 }
822 if (!HoistFalseDest->getTerminator()) {
823 HoistFalseDest->moveBefore(HoistCommonSucc);
824 BranchInst::Create(HoistCommonSucc, HoistFalseDest);
825 }
826
827 // If BI is being cloned to what was originally the preheader then
828 // HoistCommonSucc will now be the new preheader.
829 if (HoistTarget == InitialPreheader) {
830 // Phis in the loop header now need to use the new preheader.
831 InitialPreheader->replaceSuccessorsPhiUsesWith(HoistCommonSucc);
833 HoistTarget->getSingleSuccessor(), HoistCommonSucc, {HoistTarget});
834 // The new preheader dominates the loop header.
835 DomTreeNode *PreheaderNode = DT->getNode(HoistCommonSucc);
836 DomTreeNode *HeaderNode = DT->getNode(CurLoop->getHeader());
837 DT->changeImmediateDominator(HeaderNode, PreheaderNode);
838 // The preheader hoist destination is now the new preheader, with the
839 // exception of the hoist destination of this branch.
840 for (auto &Pair : HoistDestinationMap)
841 if (Pair.second == InitialPreheader && Pair.first != BI->getParent())
842 Pair.second = HoistCommonSucc;
843 }
844
845 // Now finally clone BI.
847 HoistTarget->getTerminator(),
848 BranchInst::Create(HoistTrueDest, HoistFalseDest, BI->getCondition()));
849 ++NumClonedBranches;
850
851 assert(CurLoop->getLoopPreheader() &&
852 "Hoisting blocks should not have destroyed preheader");
853 return HoistDestinationMap[BB];
854 }
855};
856} // namespace
857
858/// Walk the specified region of the CFG (defined by all blocks dominated by
859/// the specified block, and that are in the current loop) in depth first
860/// order w.r.t the DominatorTree. This allows us to visit definitions before
861/// uses, allowing us to hoist a loop body in one pass without iteration.
862///
865 TargetLibraryInfo *TLI, Loop *CurLoop,
867 ICFLoopSafetyInfo *SafetyInfo,
869 OptimizationRemarkEmitter *ORE, bool LoopNestMode,
870 bool AllowSpeculation) {
871 // Verify inputs.
872 assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
873 CurLoop != nullptr && SafetyInfo != nullptr &&
874 "Unexpected input to hoistRegion.");
875
876 ControlFlowHoister CFH(LI, DT, CurLoop, MSSAU);
877
878 // Keep track of instructions that have been hoisted, as they may need to be
879 // re-hoisted if they end up not dominating all of their uses.
880 SmallVector<Instruction *, 16> HoistedInstructions;
881
882 // For PHI hoisting to work we need to hoist blocks before their successors.
883 // We can do this by iterating through the blocks in the loop in reverse
884 // post-order.
885 LoopBlocksRPO Worklist(CurLoop);
886 Worklist.perform(LI);
887 bool Changed = false;
888 BasicBlock *Preheader = CurLoop->getLoopPreheader();
889 for (BasicBlock *BB : Worklist) {
890 // Only need to process the contents of this block if it is not part of a
891 // subloop (which would already have been processed).
892 if (!LoopNestMode && inSubLoop(BB, CurLoop, LI))
893 continue;
894
896 // Try hoisting the instruction out to the preheader. We can only do
897 // this if all of the operands of the instruction are loop invariant and
898 // if it is safe to hoist the instruction. We also check block frequency
899 // to make sure instruction only gets hoisted into colder blocks.
900 // TODO: It may be safe to hoist if we are hoisting to a conditional block
901 // and we have accurately duplicated the control flow from the loop header
902 // to that block.
903 if (CurLoop->hasLoopInvariantOperands(&I) &&
904 canSinkOrHoistInst(I, AA, DT, CurLoop, MSSAU, true, Flags, ORE) &&
906 I, DT, TLI, CurLoop, SafetyInfo, ORE,
907 Preheader->getTerminator(), AC, AllowSpeculation)) {
908 hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
909 MSSAU, SE, ORE);
910 HoistedInstructions.push_back(&I);
911 Changed = true;
912 continue;
913 }
914
915 // Attempt to remove floating point division out of the loop by
916 // converting it to a reciprocal multiplication.
917 if (I.getOpcode() == Instruction::FDiv && I.hasAllowReciprocal() &&
918 CurLoop->isLoopInvariant(I.getOperand(1))) {
919 auto Divisor = I.getOperand(1);
920 auto One = llvm::ConstantFP::get(Divisor->getType(), 1.0);
921 auto ReciprocalDivisor = BinaryOperator::CreateFDiv(One, Divisor);
922 ReciprocalDivisor->setFastMathFlags(I.getFastMathFlags());
923 SafetyInfo->insertInstructionTo(ReciprocalDivisor, I.getParent());
924 ReciprocalDivisor->insertBefore(&I);
925
926 auto Product =
927 BinaryOperator::CreateFMul(I.getOperand(0), ReciprocalDivisor);
928 Product->setFastMathFlags(I.getFastMathFlags());
929 SafetyInfo->insertInstructionTo(Product, I.getParent());
930 Product->insertAfter(&I);
931 I.replaceAllUsesWith(Product);
932 eraseInstruction(I, *SafetyInfo, MSSAU);
933
934 hoist(*ReciprocalDivisor, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB),
935 SafetyInfo, MSSAU, SE, ORE);
936 HoistedInstructions.push_back(ReciprocalDivisor);
937 Changed = true;
938 continue;
939 }
940
941 auto IsInvariantStart = [&](Instruction &I) {
942 using namespace PatternMatch;
943 return I.use_empty() &&
944 match(&I, m_Intrinsic<Intrinsic::invariant_start>());
945 };
946 auto MustExecuteWithoutWritesBefore = [&](Instruction &I) {
947 return SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop) &&
948 SafetyInfo->doesNotWriteMemoryBefore(I, CurLoop);
949 };
950 if ((IsInvariantStart(I) || isGuard(&I)) &&
951 CurLoop->hasLoopInvariantOperands(&I) &&
952 MustExecuteWithoutWritesBefore(I)) {
953 hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
954 MSSAU, SE, ORE);
955 HoistedInstructions.push_back(&I);
956 Changed = true;
957 continue;
958 }
959
960 if (PHINode *PN = dyn_cast<PHINode>(&I)) {
961 if (CFH.canHoistPHI(PN)) {
962 // Redirect incoming blocks first to ensure that we create hoisted
963 // versions of those blocks before we hoist the phi.
964 for (unsigned int i = 0; i < PN->getNumIncomingValues(); ++i)
966 i, CFH.getOrCreateHoistedBlock(PN->getIncomingBlock(i)));
967 hoist(*PN, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
968 MSSAU, SE, ORE);
969 assert(DT->dominates(PN, BB) && "Conditional PHIs not expected");
970 Changed = true;
971 continue;
972 }
973 }
974
975 // Try to reassociate instructions so that part of computations can be
976 // done out of loop.
977 if (hoistArithmetics(I, *CurLoop, *SafetyInfo, MSSAU, AC, DT)) {
978 Changed = true;
979 continue;
980 }
981
982 // Remember possibly hoistable branches so we can actually hoist them
983 // later if needed.
984 if (BranchInst *BI = dyn_cast<BranchInst>(&I))
985 CFH.registerPossiblyHoistableBranch(BI);
986 }
987 }
988
989 // If we hoisted instructions to a conditional block they may not dominate
990 // their uses that weren't hoisted (such as phis where some operands are not
991 // loop invariant). If so make them unconditional by moving them to their
992 // immediate dominator. We iterate through the instructions in reverse order
993 // which ensures that when we rehoist an instruction we rehoist its operands,
994 // and also keep track of where in the block we are rehoisting to to make sure
995 // that we rehoist instructions before the instructions that use them.
996 Instruction *HoistPoint = nullptr;
998 for (Instruction *I : reverse(HoistedInstructions)) {
999 if (!llvm::all_of(I->uses(),
1000 [&](Use &U) { return DT->dominates(I, U); })) {
1001 BasicBlock *Dominator =
1002 DT->getNode(I->getParent())->getIDom()->getBlock();
1003 if (!HoistPoint || !DT->dominates(HoistPoint->getParent(), Dominator)) {
1004 if (HoistPoint)
1005 assert(DT->dominates(Dominator, HoistPoint->getParent()) &&
1006 "New hoist point expected to dominate old hoist point");
1007 HoistPoint = Dominator->getTerminator();
1008 }
1009 LLVM_DEBUG(dbgs() << "LICM rehoisting to "
1010 << HoistPoint->getParent()->getNameOrAsOperand()
1011 << ": " << *I << "\n");
1012 moveInstructionBefore(*I, *HoistPoint, *SafetyInfo, MSSAU, SE);
1013 HoistPoint = I;
1014 Changed = true;
1015 }
1016 }
1017 }
1018 if (VerifyMemorySSA)
1019 MSSAU.getMemorySSA()->verifyMemorySSA();
1020
1021 // Now that we've finished hoisting make sure that LI and DT are still
1022 // valid.
1023#ifdef EXPENSIVE_CHECKS
1024 if (Changed) {
1025 assert(DT->verify(DominatorTree::VerificationLevel::Fast) &&
1026 "Dominator tree verification failed");
1027 LI->verify(*DT);
1028 }
1029#endif
1030
1031 return Changed;
1032}
1033
1034// Return true if LI is invariant within scope of the loop. LI is invariant if
1035// CurLoop is dominated by an invariant.start representing the same memory
1036// location and size as the memory location LI loads from, and also the
1037// invariant.start has no uses.
1039 Loop *CurLoop) {
1040 Value *Addr = LI->getOperand(0);
1041 const DataLayout &DL = LI->getModule()->getDataLayout();
1042 const TypeSize LocSizeInBits = DL.getTypeSizeInBits(LI->getType());
1043
1044 // It is not currently possible for clang to generate an invariant.start
1045 // intrinsic with scalable vector types because we don't support thread local
1046 // sizeless types and we don't permit sizeless types in structs or classes.
1047 // Furthermore, even if support is added for this in future the intrinsic
1048 // itself is defined to have a size of -1 for variable sized objects. This
1049 // makes it impossible to verify if the intrinsic envelops our region of
1050 // interest. For example, both <vscale x 32 x i8> and <vscale x 16 x i8>
1051 // types would have a -1 parameter, but the former is clearly double the size
1052 // of the latter.
1053 if (LocSizeInBits.isScalable())
1054 return false;
1055
1056 // if the type is i8 addrspace(x)*, we know this is the type of
1057 // llvm.invariant.start operand
1058 auto *PtrInt8Ty = PointerType::get(Type::getInt8Ty(LI->getContext()),
1060 unsigned BitcastsVisited = 0;
1061 // Look through bitcasts until we reach the i8* type (this is invariant.start
1062 // operand type).
1063 while (Addr->getType() != PtrInt8Ty) {
1064 auto *BC = dyn_cast<BitCastInst>(Addr);
1065 // Avoid traversing high number of bitcast uses.
1066 if (++BitcastsVisited > MaxNumUsesTraversed || !BC)
1067 return false;
1068 Addr = BC->getOperand(0);
1069 }
1070 // If we've ended up at a global/constant, bail. We shouldn't be looking at
1071 // uselists for non-local Values in a loop pass.
1072 if (isa<Constant>(Addr))
1073 return false;
1074
1075 unsigned UsesVisited = 0;
1076 // Traverse all uses of the load operand value, to see if invariant.start is
1077 // one of the uses, and whether it dominates the load instruction.
1078 for (auto *U : Addr->users()) {
1079 // Avoid traversing for Load operand with high number of users.
1080 if (++UsesVisited > MaxNumUsesTraversed)
1081 return false;
1082 IntrinsicInst *II = dyn_cast<IntrinsicInst>(U);
1083 // If there are escaping uses of invariant.start instruction, the load maybe
1084 // non-invariant.
1085 if (!II || II->getIntrinsicID() != Intrinsic::invariant_start ||
1086 !II->use_empty())
1087 continue;
1088 ConstantInt *InvariantSize = cast<ConstantInt>(II->getArgOperand(0));
1089 // The intrinsic supports having a -1 argument for variable sized objects
1090 // so we should check for that here.
1091 if (InvariantSize->isNegative())
1092 continue;
1093 uint64_t InvariantSizeInBits = InvariantSize->getSExtValue() * 8;
1094 // Confirm the invariant.start location size contains the load operand size
1095 // in bits. Also, the invariant.start should dominate the load, and we
1096 // should not hoist the load out of a loop that contains this dominating
1097 // invariant.start.
1098 if (LocSizeInBits.getFixedValue() <= InvariantSizeInBits &&
1099 DT->properlyDominates(II->getParent(), CurLoop->getHeader()))
1100 return true;
1101 }
1102
1103 return false;
1104}
1105
1106namespace {
1107/// Return true if-and-only-if we know how to (mechanically) both hoist and
1108/// sink a given instruction out of a loop. Does not address legality
1109/// concerns such as aliasing or speculation safety.
1110bool isHoistableAndSinkableInst(Instruction &I) {
1111 // Only these instructions are hoistable/sinkable.
1112 return (isa<LoadInst>(I) || isa<StoreInst>(I) || isa<CallInst>(I) ||
1113 isa<FenceInst>(I) || isa<CastInst>(I) || isa<UnaryOperator>(I) ||
1114 isa<BinaryOperator>(I) || isa<SelectInst>(I) ||
1115 isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
1116 isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
1117 isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) ||
1118 isa<InsertValueInst>(I) || isa<FreezeInst>(I));
1119}
1120/// Return true if MSSA knows there are no MemoryDefs in the loop.
1121bool isReadOnly(const MemorySSAUpdater &MSSAU, const Loop *L) {
1122 for (auto *BB : L->getBlocks())
1123 if (MSSAU.getMemorySSA()->getBlockDefs(BB))
1124 return false;
1125 return true;
1126}
1127
1128/// Return true if I is the only Instruction with a MemoryAccess in L.
1129bool isOnlyMemoryAccess(const Instruction *I, const Loop *L,
1130 const MemorySSAUpdater &MSSAU) {
1131 for (auto *BB : L->getBlocks())
1132 if (auto *Accs = MSSAU.getMemorySSA()->getBlockAccesses(BB)) {
1133 int NotAPhi = 0;
1134 for (const auto &Acc : *Accs) {
1135 if (isa<MemoryPhi>(&Acc))
1136 continue;
1137 const auto *MUD = cast<MemoryUseOrDef>(&Acc);
1138 if (MUD->getMemoryInst() != I || NotAPhi++ == 1)
1139 return false;
1140 }
1141 }
1142 return true;
1143}
1144}
1145
1147 BatchAAResults &BAA,
1148 SinkAndHoistLICMFlags &Flags,
1149 MemoryUseOrDef *MA) {
1150 // See declaration of SetLicmMssaOptCap for usage details.
1151 if (Flags.tooManyClobberingCalls())
1152 return MA->getDefiningAccess();
1153
1154 MemoryAccess *Source =
1156 Flags.incrementClobberingCalls();
1157 return Source;
1158}
1159
1161 Loop *CurLoop, MemorySSAUpdater &MSSAU,
1162 bool TargetExecutesOncePerLoop,
1163 SinkAndHoistLICMFlags &Flags,
1165 // If we don't understand the instruction, bail early.
1166 if (!isHoistableAndSinkableInst(I))
1167 return false;
1168
1169 MemorySSA *MSSA = MSSAU.getMemorySSA();
1170 // Loads have extra constraints we have to verify before we can hoist them.
1171 if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
1172 if (!LI->isUnordered())
1173 return false; // Don't sink/hoist volatile or ordered atomic loads!
1174
1175 // Loads from constant memory are always safe to move, even if they end up
1176 // in the same alias set as something that ends up being modified.
1177 if (!isModSet(AA->getModRefInfoMask(LI->getOperand(0))))
1178 return true;
1179 if (LI->hasMetadata(LLVMContext::MD_invariant_load))
1180 return true;
1181
1182 if (LI->isAtomic() && !TargetExecutesOncePerLoop)
1183 return false; // Don't risk duplicating unordered loads
1184
1185 // This checks for an invariant.start dominating the load.
1186 if (isLoadInvariantInLoop(LI, DT, CurLoop))
1187 return true;
1188
1189 auto MU = cast<MemoryUse>(MSSA->getMemoryAccess(LI));
1190
1191 bool InvariantGroup = LI->hasMetadata(LLVMContext::MD_invariant_group);
1192
1193 bool Invalidated = pointerInvalidatedByLoop(
1194 MSSA, MU, CurLoop, I, Flags, InvariantGroup);
1195 // Check loop-invariant address because this may also be a sinkable load
1196 // whose address is not necessarily loop-invariant.
1197 if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand()))
1198 ORE->emit([&]() {
1200 DEBUG_TYPE, "LoadWithLoopInvariantAddressInvalidated", LI)
1201 << "failed to move load with loop-invariant address "
1202 "because the loop may invalidate its value";
1203 });
1204
1205 return !Invalidated;
1206 } else if (CallInst *CI = dyn_cast<CallInst>(&I)) {
1207 // Don't sink or hoist dbg info; it's legal, but not useful.
1208 if (isa<DbgInfoIntrinsic>(I))
1209 return false;
1210
1211 // Don't sink calls which can throw.
1212 if (CI->mayThrow())
1213 return false;
1214
1215 // Convergent attribute has been used on operations that involve
1216 // inter-thread communication which results are implicitly affected by the
1217 // enclosing control flows. It is not safe to hoist or sink such operations
1218 // across control flow.
1219 if (CI->isConvergent())
1220 return false;
1221
1222 using namespace PatternMatch;
1223 if (match(CI, m_Intrinsic<Intrinsic::assume>()))
1224 // Assumes don't actually alias anything or throw
1225 return true;
1226
1227 // Handle simple cases by querying alias analysis.
1228 MemoryEffects Behavior = AA->getMemoryEffects(CI);
1229 if (Behavior.doesNotAccessMemory())
1230 return true;
1231 if (Behavior.onlyReadsMemory()) {
1232 // A readonly argmemonly function only reads from memory pointed to by
1233 // it's arguments with arbitrary offsets. If we can prove there are no
1234 // writes to this memory in the loop, we can hoist or sink.
1235 if (Behavior.onlyAccessesArgPointees()) {
1236 // TODO: expand to writeable arguments
1237 for (Value *Op : CI->args())
1238 if (Op->getType()->isPointerTy() &&
1240 MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(CI)), CurLoop, I,
1241 Flags, /*InvariantGroup=*/false))
1242 return false;
1243 return true;
1244 }
1245
1246 // If this call only reads from memory and there are no writes to memory
1247 // in the loop, we can hoist or sink the call as appropriate.
1248 if (isReadOnly(MSSAU, CurLoop))
1249 return true;
1250 }
1251
1252 // FIXME: This should use mod/ref information to see if we can hoist or
1253 // sink the call.
1254
1255 return false;
1256 } else if (auto *FI = dyn_cast<FenceInst>(&I)) {
1257 // Fences alias (most) everything to provide ordering. For the moment,
1258 // just give up if there are any other memory operations in the loop.
1259 return isOnlyMemoryAccess(FI, CurLoop, MSSAU);
1260 } else if (auto *SI = dyn_cast<StoreInst>(&I)) {
1261 if (!SI->isUnordered())
1262 return false; // Don't sink/hoist volatile or ordered atomic store!
1263
1264 // We can only hoist a store that we can prove writes a value which is not
1265 // read or overwritten within the loop. For those cases, we fallback to
1266 // load store promotion instead. TODO: We can extend this to cases where
1267 // there is exactly one write to the location and that write dominates an
1268 // arbitrary number of reads in the loop.
1269 if (isOnlyMemoryAccess(SI, CurLoop, MSSAU))
1270 return true;
1271 // If there are more accesses than the Promotion cap, then give up as we're
1272 // not walking a list that long.
1273 if (Flags.tooManyMemoryAccesses())
1274 return false;
1275
1276 auto *SIMD = MSSA->getMemoryAccess(SI);
1277 BatchAAResults BAA(*AA);
1278 auto *Source = getClobberingMemoryAccess(*MSSA, BAA, Flags, SIMD);
1279 // Make sure there are no clobbers inside the loop.
1280 if (!MSSA->isLiveOnEntryDef(Source) &&
1281 CurLoop->contains(Source->getBlock()))
1282 return false;
1283
1284 // If there are interfering Uses (i.e. their defining access is in the
1285 // loop), or ordered loads (stored as Defs!), don't move this store.
1286 // Could do better here, but this is conservatively correct.
1287 // TODO: Cache set of Uses on the first walk in runOnLoop, update when
1288 // moving accesses. Can also extend to dominating uses.
1289 for (auto *BB : CurLoop->getBlocks())
1290 if (auto *Accesses = MSSA->getBlockAccesses(BB)) {
1291 for (const auto &MA : *Accesses)
1292 if (const auto *MU = dyn_cast<MemoryUse>(&MA)) {
1293 auto *MD = getClobberingMemoryAccess(*MSSA, BAA, Flags,
1294 const_cast<MemoryUse *>(MU));
1295 if (!MSSA->isLiveOnEntryDef(MD) &&
1296 CurLoop->contains(MD->getBlock()))
1297 return false;
1298 // Disable hoisting past potentially interfering loads. Optimized
1299 // Uses may point to an access outside the loop, as getClobbering
1300 // checks the previous iteration when walking the backedge.
1301 // FIXME: More precise: no Uses that alias SI.
1302 if (!Flags.getIsSink() && !MSSA->dominates(SIMD, MU))
1303 return false;
1304 } else if (const auto *MD = dyn_cast<MemoryDef>(&MA)) {
1305 if (auto *LI = dyn_cast<LoadInst>(MD->getMemoryInst())) {
1306 (void)LI; // Silence warning.
1307 assert(!LI->isUnordered() && "Expected unordered load");
1308 return false;
1309 }
1310 // Any call, while it may not be clobbering SI, it may be a use.
1311 if (auto *CI = dyn_cast<CallInst>(MD->getMemoryInst())) {
1312 // Check if the call may read from the memory location written
1313 // to by SI. Check CI's attributes and arguments; the number of
1314 // such checks performed is limited above by NoOfMemAccTooLarge.
1316 if (isModOrRefSet(MRI))
1317 return false;
1318 }
1319 }
1320 }
1321 return true;
1322 }
1323
1324 assert(!I.mayReadOrWriteMemory() && "unhandled aliasing");
1325
1326 // We've established mechanical ability and aliasing, it's up to the caller
1327 // to check fault safety
1328 return true;
1329}
1330
1331/// Returns true if a PHINode is a trivially replaceable with an
1332/// Instruction.
1333/// This is true when all incoming values are that instruction.
1334/// This pattern occurs most often with LCSSA PHI nodes.
1335///
1336static bool isTriviallyReplaceablePHI(const PHINode &PN, const Instruction &I) {
1337 for (const Value *IncValue : PN.incoming_values())
1338 if (IncValue != &I)
1339 return false;
1340
1341 return true;
1342}
1343
1344/// Return true if the instruction is foldable in the loop.
1345static bool isFoldableInLoop(const Instruction &I, const Loop *CurLoop,
1346 const TargetTransformInfo *TTI) {
1347 if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
1348 InstructionCost CostI =
1350 if (CostI != TargetTransformInfo::TCC_Free)
1351 return false;
1352 // For a GEP, we cannot simply use getInstructionCost because currently
1353 // it optimistically assumes that a GEP will fold into addressing mode
1354 // regardless of its users.
1355 const BasicBlock *BB = GEP->getParent();
1356 for (const User *U : GEP->users()) {
1357 const Instruction *UI = cast<Instruction>(U);
1358 if (CurLoop->contains(UI) &&
1359 (BB != UI->getParent() ||
1360 (!isa<StoreInst>(UI) && !isa<LoadInst>(UI))))
1361 return false;
1362 }
1363 return true;
1364 }
1365
1366 return false;
1367}
1368
1369/// Return true if the only users of this instruction are outside of
1370/// the loop. If this is true, we can sink the instruction to the exit
1371/// blocks of the loop.
1372///
1373/// We also return true if the instruction could be folded away in lowering.
1374/// (e.g., a GEP can be folded into a load as an addressing mode in the loop).
1375static bool isNotUsedOrFoldableInLoop(const Instruction &I, const Loop *CurLoop,
1376 const LoopSafetyInfo *SafetyInfo,
1378 bool &FoldableInLoop, bool LoopNestMode) {
1379 const auto &BlockColors = SafetyInfo->getBlockColors();
1380 bool IsFoldable = isFoldableInLoop(I, CurLoop, TTI);
1381 for (const User *U : I.users()) {
1382 const Instruction *UI = cast<Instruction>(U);
1383 if (const PHINode *PN = dyn_cast<PHINode>(UI)) {
1384 const BasicBlock *BB = PN->getParent();
1385 // We cannot sink uses in catchswitches.
1386 if (isa<CatchSwitchInst>(BB->getTerminator()))
1387 return false;
1388
1389 // We need to sink a callsite to a unique funclet. Avoid sinking if the
1390 // phi use is too muddled.
1391 if (isa<CallInst>(I))
1392 if (!BlockColors.empty() &&
1393 BlockColors.find(const_cast<BasicBlock *>(BB))->second.size() != 1)
1394 return false;
1395
1396 if (LoopNestMode) {
1397 while (isa<PHINode>(UI) && UI->hasOneUser() &&
1398 UI->getNumOperands() == 1) {
1399 if (!CurLoop->contains(UI))
1400 break;
1401 UI = cast<Instruction>(UI->user_back());
1402 }
1403 }
1404 }
1405
1406 if (CurLoop->contains(UI)) {
1407 if (IsFoldable) {
1408 FoldableInLoop = true;
1409 continue;
1410 }
1411 return false;
1412 }
1413 }
1414 return true;
1415}
1416
1418 Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
1419 const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU) {
1420 Instruction *New;
1421 if (auto *CI = dyn_cast<CallInst>(&I)) {
1422 const auto &BlockColors = SafetyInfo->getBlockColors();
1423
1424 // Sinking call-sites need to be handled differently from other
1425 // instructions. The cloned call-site needs a funclet bundle operand
1426 // appropriate for its location in the CFG.
1428 for (unsigned BundleIdx = 0, BundleEnd = CI->getNumOperandBundles();
1429 BundleIdx != BundleEnd; ++BundleIdx) {
1430 OperandBundleUse Bundle = CI->getOperandBundleAt(BundleIdx);
1431 if (Bundle.getTagID() == LLVMContext::OB_funclet)
1432 continue;
1433
1434 OpBundles.emplace_back(Bundle);
1435 }
1436
1437 if (!BlockColors.empty()) {
1438 const ColorVector &CV = BlockColors.find(&ExitBlock)->second;
1439 assert(CV.size() == 1 && "non-unique color for exit block!");
1440 BasicBlock *BBColor = CV.front();
1441 Instruction *EHPad = BBColor->getFirstNonPHI();
1442 if (EHPad->isEHPad())
1443 OpBundles.emplace_back("funclet", EHPad);
1444 }
1445
1446 New = CallInst::Create(CI, OpBundles);
1447 } else {
1448 New = I.clone();
1449 }
1450
1451 New->insertInto(&ExitBlock, ExitBlock.getFirstInsertionPt());
1452 if (!I.getName().empty())
1453 New->setName(I.getName() + ".le");
1454
1455 if (MSSAU.getMemorySSA()->getMemoryAccess(&I)) {
1456 // Create a new MemoryAccess and let MemorySSA set its defining access.
1457 MemoryAccess *NewMemAcc = MSSAU.createMemoryAccessInBB(
1458 New, nullptr, New->getParent(), MemorySSA::Beginning);
1459 if (NewMemAcc) {
1460 if (auto *MemDef = dyn_cast<MemoryDef>(NewMemAcc))
1461 MSSAU.insertDef(MemDef, /*RenameUses=*/true);
1462 else {
1463 auto *MemUse = cast<MemoryUse>(NewMemAcc);
1464 MSSAU.insertUse(MemUse, /*RenameUses=*/true);
1465 }
1466 }
1467 }
1468
1469 // Build LCSSA PHI nodes for any in-loop operands (if legal). Note that
1470 // this is particularly cheap because we can rip off the PHI node that we're
1471 // replacing for the number and blocks of the predecessors.
1472 // OPT: If this shows up in a profile, we can instead finish sinking all
1473 // invariant instructions, and then walk their operands to re-establish
1474 // LCSSA. That will eliminate creating PHI nodes just to nuke them when
1475 // sinking bottom-up.
1476 for (Use &Op : New->operands())
1477 if (LI->wouldBeOutOfLoopUseRequiringLCSSA(Op.get(), PN.getParent())) {
1478 auto *OInst = cast<Instruction>(Op.get());
1479 PHINode *OpPN =
1480 PHINode::Create(OInst->getType(), PN.getNumIncomingValues(),
1481 OInst->getName() + ".lcssa", &ExitBlock.front());
1482 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
1483 OpPN->addIncoming(OInst, PN.getIncomingBlock(i));
1484 Op = OpPN;
1485 }
1486 return New;
1487}
1488
1490 MemorySSAUpdater &MSSAU) {
1491 MSSAU.removeMemoryAccess(&I);
1492 SafetyInfo.removeInstruction(&I);
1493 I.eraseFromParent();
1494}
1495
1497 ICFLoopSafetyInfo &SafetyInfo,
1498 MemorySSAUpdater &MSSAU,
1499 ScalarEvolution *SE) {
1500 SafetyInfo.removeInstruction(&I);
1501 SafetyInfo.insertInstructionTo(&I, Dest.getParent());
1502 I.moveBefore(&Dest);
1503 if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
1504 MSSAU.getMemorySSA()->getMemoryAccess(&I)))
1505 MSSAU.moveToPlace(OldMemAcc, Dest.getParent(), MemorySSA::BeforeTerminator);
1506 if (SE)
1508}
1509
1511 PHINode *TPN, Instruction *I, LoopInfo *LI,
1513 const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop,
1514 MemorySSAUpdater &MSSAU) {
1516 "Expect only trivially replaceable PHI");
1517 BasicBlock *ExitBlock = TPN->getParent();
1518 Instruction *New;
1519 auto It = SunkCopies.find(ExitBlock);
1520 if (It != SunkCopies.end())
1521 New = It->second;
1522 else
1523 New = SunkCopies[ExitBlock] = cloneInstructionInExitBlock(
1524 *I, *ExitBlock, *TPN, LI, SafetyInfo, MSSAU);
1525 return New;
1526}
1527
1528static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo) {
1529 BasicBlock *BB = PN->getParent();
1530 if (!BB->canSplitPredecessors())
1531 return false;
1532 // It's not impossible to split EHPad blocks, but if BlockColors already exist
1533 // it require updating BlockColors for all offspring blocks accordingly. By
1534 // skipping such corner case, we can make updating BlockColors after splitting
1535 // predecessor fairly simple.
1536 if (!SafetyInfo->getBlockColors().empty() && BB->getFirstNonPHI()->isEHPad())
1537 return false;
1538 for (BasicBlock *BBPred : predecessors(BB)) {
1539 if (isa<IndirectBrInst>(BBPred->getTerminator()))
1540 return false;
1541 }
1542 return true;
1543}
1544
1546 LoopInfo *LI, const Loop *CurLoop,
1547 LoopSafetyInfo *SafetyInfo,
1548 MemorySSAUpdater *MSSAU) {
1549#ifndef NDEBUG
1551 CurLoop->getUniqueExitBlocks(ExitBlocks);
1552 SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),
1553 ExitBlocks.end());
1554#endif
1555 BasicBlock *ExitBB = PN->getParent();
1556 assert(ExitBlockSet.count(ExitBB) && "Expect the PHI is in an exit block.");
1557
1558 // Split predecessors of the loop exit to make instructions in the loop are
1559 // exposed to exit blocks through trivially replaceable PHIs while keeping the
1560 // loop in the canonical form where each predecessor of each exit block should
1561 // be contained within the loop. For example, this will convert the loop below
1562 // from
1563 //
1564 // LB1:
1565 // %v1 =
1566 // br %LE, %LB2
1567 // LB2:
1568 // %v2 =
1569 // br %LE, %LB1
1570 // LE:
1571 // %p = phi [%v1, %LB1], [%v2, %LB2] <-- non-trivially replaceable
1572 //
1573 // to
1574 //
1575 // LB1:
1576 // %v1 =
1577 // br %LE.split, %LB2
1578 // LB2:
1579 // %v2 =
1580 // br %LE.split2, %LB1
1581 // LE.split:
1582 // %p1 = phi [%v1, %LB1] <-- trivially replaceable
1583 // br %LE
1584 // LE.split2:
1585 // %p2 = phi [%v2, %LB2] <-- trivially replaceable
1586 // br %LE
1587 // LE:
1588 // %p = phi [%p1, %LE.split], [%p2, %LE.split2]
1589 //
1590 const auto &BlockColors = SafetyInfo->getBlockColors();
1591 SmallSetVector<BasicBlock *, 8> PredBBs(pred_begin(ExitBB), pred_end(ExitBB));
1592 while (!PredBBs.empty()) {
1593 BasicBlock *PredBB = *PredBBs.begin();
1594 assert(CurLoop->contains(PredBB) &&
1595 "Expect all predecessors are in the loop");
1596 if (PN->getBasicBlockIndex(PredBB) >= 0) {
1598 ExitBB, PredBB, ".split.loop.exit", DT, LI, MSSAU, true);
1599 // Since we do not allow splitting EH-block with BlockColors in
1600 // canSplitPredecessors(), we can simply assign predecessor's color to
1601 // the new block.
1602 if (!BlockColors.empty())
1603 // Grab a reference to the ColorVector to be inserted before getting the
1604 // reference to the vector we are copying because inserting the new
1605 // element in BlockColors might cause the map to be reallocated.
1606 SafetyInfo->copyColors(NewPred, PredBB);
1607 }
1608 PredBBs.remove(PredBB);
1609 }
1610}
1611
1612/// When an instruction is found to only be used outside of the loop, this
1613/// function moves it to the exit blocks and patches up SSA form as needed.
1614/// This method is guaranteed to remove the original instruction from its
1615/// position, and may either delete it or move it to outside of the loop.
1616///
1617static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
1618 const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
1620 bool Changed = false;
1621 LLVM_DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
1622
1623 // Iterate over users to be ready for actual sinking. Replace users via
1624 // unreachable blocks with undef and make all user PHIs trivially replaceable.
1625 SmallPtrSet<Instruction *, 8> VisitedUsers;
1626 for (Value::user_iterator UI = I.user_begin(), UE = I.user_end(); UI != UE;) {
1627 auto *User = cast<Instruction>(*UI);
1628 Use &U = UI.getUse();
1629 ++UI;
1630
1631 if (VisitedUsers.count(User) || CurLoop->contains(User))
1632 continue;
1633
1634 if (!DT->isReachableFromEntry(User->getParent())) {
1635 U = PoisonValue::get(I.getType());
1636 Changed = true;
1637 continue;
1638 }
1639
1640 // The user must be a PHI node.
1641 PHINode *PN = cast<PHINode>(User);
1642
1643 // Surprisingly, instructions can be used outside of loops without any
1644 // exits. This can only happen in PHI nodes if the incoming block is
1645 // unreachable.
1646 BasicBlock *BB = PN->getIncomingBlock(U);
1647 if (!DT->isReachableFromEntry(BB)) {
1648 U = PoisonValue::get(I.getType());
1649 Changed = true;
1650 continue;
1651 }
1652
1653 VisitedUsers.insert(PN);
1654 if (isTriviallyReplaceablePHI(*PN, I))
1655 continue;
1656
1657 if (!canSplitPredecessors(PN, SafetyInfo))
1658 return Changed;
1659
1660 // Split predecessors of the PHI so that we can make users trivially
1661 // replaceable.
1662 splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop, SafetyInfo, &MSSAU);
1663
1664 // Should rebuild the iterators, as they may be invalidated by
1665 // splitPredecessorsOfLoopExit().
1666 UI = I.user_begin();
1667 UE = I.user_end();
1668 }
1669
1670 if (VisitedUsers.empty())
1671 return Changed;
1672
1673 ORE->emit([&]() {
1674 return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I)
1675 << "sinking " << ore::NV("Inst", &I);
1676 });
1677 if (isa<LoadInst>(I))
1678 ++NumMovedLoads;
1679 else if (isa<CallInst>(I))
1680 ++NumMovedCalls;
1681 ++NumSunk;
1682
1683#ifndef NDEBUG
1685 CurLoop->getUniqueExitBlocks(ExitBlocks);
1686 SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),
1687 ExitBlocks.end());
1688#endif
1689
1690 // Clones of this instruction. Don't create more than one per exit block!
1692
1693 // If this instruction is only used outside of the loop, then all users are
1694 // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of
1695 // the instruction.
1696 // First check if I is worth sinking for all uses. Sink only when it is worth
1697 // across all uses.
1698 SmallSetVector<User*, 8> Users(I.user_begin(), I.user_end());
1699 for (auto *UI : Users) {
1700 auto *User = cast<Instruction>(UI);
1701
1702 if (CurLoop->contains(User))
1703 continue;
1704
1705 PHINode *PN = cast<PHINode>(User);
1706 assert(ExitBlockSet.count(PN->getParent()) &&
1707 "The LCSSA PHI is not in an exit block!");
1708
1709 // The PHI must be trivially replaceable.
1711 PN, &I, LI, SunkCopies, SafetyInfo, CurLoop, MSSAU);
1712 PN->replaceAllUsesWith(New);
1713 eraseInstruction(*PN, *SafetyInfo, MSSAU);
1714 Changed = true;
1715 }
1716 return Changed;
1717}
1718
1719/// When an instruction is found to only use loop invariant operands that
1720/// is safe to hoist, this instruction is called to do the dirty work.
1721///
1722static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
1723 BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
1726 LLVM_DEBUG(dbgs() << "LICM hoisting to " << Dest->getNameOrAsOperand() << ": "
1727 << I << "\n");
1728 ORE->emit([&]() {
1729 return OptimizationRemark(DEBUG_TYPE, "Hoisted", &I) << "hoisting "
1730 << ore::NV("Inst", &I);
1731 });
1732
1733 // Metadata can be dependent on conditions we are hoisting above.
1734 // Conservatively strip all metadata on the instruction unless we were
1735 // guaranteed to execute I if we entered the loop, in which case the metadata
1736 // is valid in the loop preheader.
1737 // Similarly, If I is a call and it is not guaranteed to execute in the loop,
1738 // then moving to the preheader means we should strip attributes on the call
1739 // that can cause UB since we may be hoisting above conditions that allowed
1740 // inferring those attributes. They may not be valid at the preheader.
1741 if ((I.hasMetadataOtherThanDebugLoc() || isa<CallInst>(I)) &&
1742 // The check on hasMetadataOtherThanDebugLoc is to prevent us from burning
1743 // time in isGuaranteedToExecute if we don't actually have anything to
1744 // drop. It is a compile time optimization, not required for correctness.
1745 !SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop))
1746 I.dropUBImplyingAttrsAndMetadata();
1747
1748 if (isa<PHINode>(I))
1749 // Move the new node to the end of the phi list in the destination block.
1750 moveInstructionBefore(I, *Dest->getFirstNonPHI(), *SafetyInfo, MSSAU, SE);
1751 else
1752 // Move the new node to the destination block, before its terminator.
1753 moveInstructionBefore(I, *Dest->getTerminator(), *SafetyInfo, MSSAU, SE);
1754
1755 I.updateLocationAfterHoist();
1756
1757 if (isa<LoadInst>(I))
1758 ++NumMovedLoads;
1759 else if (isa<CallInst>(I))
1760 ++NumMovedCalls;
1761 ++NumHoisted;
1762}
1763
1764/// Only sink or hoist an instruction if it is not a trapping instruction,
1765/// or if the instruction is known not to trap when moved to the preheader.
1766/// or if it is a trapping instruction and is guaranteed to execute.
1768 Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI,
1769 const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
1770 OptimizationRemarkEmitter *ORE, const Instruction *CtxI,
1771 AssumptionCache *AC, bool AllowSpeculation) {
1772 if (AllowSpeculation &&
1773 isSafeToSpeculativelyExecute(&Inst, CtxI, AC, DT, TLI))
1774 return true;
1775
1776 bool GuaranteedToExecute =
1777 SafetyInfo->isGuaranteedToExecute(Inst, DT, CurLoop);
1778
1779 if (!GuaranteedToExecute) {
1780 auto *LI = dyn_cast<LoadInst>(&Inst);
1781 if (LI && CurLoop->isLoopInvariant(LI->getPointerOperand()))
1782 ORE->emit([&]() {
1784 DEBUG_TYPE, "LoadWithLoopInvariantAddressCondExecuted", LI)
1785 << "failed to hoist load with loop-invariant address "
1786 "because load is conditionally executed";
1787 });
1788 }
1789
1790 return GuaranteedToExecute;
1791}
1792
1793namespace {
1794class LoopPromoter : public LoadAndStorePromoter {
1795 Value *SomePtr; // Designated pointer to store to.
1796 SmallVectorImpl<BasicBlock *> &LoopExitBlocks;
1797 SmallVectorImpl<Instruction *> &LoopInsertPts;
1798 SmallVectorImpl<MemoryAccess *> &MSSAInsertPts;
1799 PredIteratorCache &PredCache;
1800 MemorySSAUpdater &MSSAU;
1801 LoopInfo &LI;
1802 DebugLoc DL;
1803 Align Alignment;
1804 bool UnorderedAtomic;
1805 AAMDNodes AATags;
1806 ICFLoopSafetyInfo &SafetyInfo;
1807 bool CanInsertStoresInExitBlocks;
1809
1810 // We're about to add a use of V in a loop exit block. Insert an LCSSA phi
1811 // (if legal) if doing so would add an out-of-loop use to an instruction
1812 // defined in-loop.
1813 Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const {
1814 if (!LI.wouldBeOutOfLoopUseRequiringLCSSA(V, BB))
1815 return V;
1816
1817 Instruction *I = cast<Instruction>(V);
1818 // We need to create an LCSSA PHI node for the incoming value and
1819 // store that.
1820 PHINode *PN = PHINode::Create(I->getType(), PredCache.size(BB),
1821 I->getName() + ".lcssa", &BB->front());
1822 for (BasicBlock *Pred : PredCache.get(BB))
1823 PN->addIncoming(I, Pred);
1824 return PN;
1825 }
1826
1827public:
1828 LoopPromoter(Value *SP, ArrayRef<const Instruction *> Insts, SSAUpdater &S,
1832 MemorySSAUpdater &MSSAU, LoopInfo &li, DebugLoc dl,
1833 Align Alignment, bool UnorderedAtomic, const AAMDNodes &AATags,
1834 ICFLoopSafetyInfo &SafetyInfo, bool CanInsertStoresInExitBlocks)
1835 : LoadAndStorePromoter(Insts, S), SomePtr(SP), LoopExitBlocks(LEB),
1836 LoopInsertPts(LIP), MSSAInsertPts(MSSAIP), PredCache(PIC), MSSAU(MSSAU),
1837 LI(li), DL(std::move(dl)), Alignment(Alignment),
1838 UnorderedAtomic(UnorderedAtomic), AATags(AATags),
1839 SafetyInfo(SafetyInfo),
1840 CanInsertStoresInExitBlocks(CanInsertStoresInExitBlocks), Uses(Insts) {}
1841
1842 void insertStoresInLoopExitBlocks() {
1843 // Insert stores after in the loop exit blocks. Each exit block gets a
1844 // store of the live-out values that feed them. Since we've already told
1845 // the SSA updater about the defs in the loop and the preheader
1846 // definition, it is all set and we can start using it.
1847 DIAssignID *NewID = nullptr;
1848 for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) {
1849 BasicBlock *ExitBlock = LoopExitBlocks[i];
1850 Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
1851 LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock);
1852 Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock);
1853 Instruction *InsertPos = LoopInsertPts[i];
1854 StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos);
1855 if (UnorderedAtomic)
1856 NewSI->setOrdering(AtomicOrdering::Unordered);
1857 NewSI->setAlignment(Alignment);
1858 NewSI->setDebugLoc(DL);
1859 // Attach DIAssignID metadata to the new store, generating it on the
1860 // first loop iteration.
1861 if (i == 0) {
1862 // NewSI will have its DIAssignID set here if there are any stores in
1863 // Uses with a DIAssignID attachment. This merged ID will then be
1864 // attached to the other inserted stores (in the branch below).
1865 NewSI->mergeDIAssignID(Uses);
1866 NewID = cast_or_null<DIAssignID>(
1867 NewSI->getMetadata(LLVMContext::MD_DIAssignID));
1868 } else {
1869 // Attach the DIAssignID (or nullptr) merged from Uses in the branch
1870 // above.
1871 NewSI->setMetadata(LLVMContext::MD_DIAssignID, NewID);
1872 }
1873
1874 if (AATags)
1875 NewSI->setAAMetadata(AATags);
1876
1877 MemoryAccess *MSSAInsertPoint = MSSAInsertPts[i];
1878 MemoryAccess *NewMemAcc;
1879 if (!MSSAInsertPoint) {
1880 NewMemAcc = MSSAU.createMemoryAccessInBB(
1881 NewSI, nullptr, NewSI->getParent(), MemorySSA::Beginning);
1882 } else {
1883 NewMemAcc =
1884 MSSAU.createMemoryAccessAfter(NewSI, nullptr, MSSAInsertPoint);
1885 }
1886 MSSAInsertPts[i] = NewMemAcc;
1887 MSSAU.insertDef(cast<MemoryDef>(NewMemAcc), true);
1888 // FIXME: true for safety, false may still be correct.
1889 }
1890 }
1891
1892 void doExtraRewritesBeforeFinalDeletion() override {
1893 if (CanInsertStoresInExitBlocks)
1894 insertStoresInLoopExitBlocks();
1895 }
1896
1897 void instructionDeleted(Instruction *I) const override {
1898 SafetyInfo.removeInstruction(I);
1899 MSSAU.removeMemoryAccess(I);
1900 }
1901
1902 bool shouldDelete(Instruction *I) const override {
1903 if (isa<StoreInst>(I))
1904 return CanInsertStoresInExitBlocks;
1905 return true;
1906 }
1907};
1908
1909bool isNotCapturedBeforeOrInLoop(const Value *V, const Loop *L,
1910 DominatorTree *DT) {
1911 // We can perform the captured-before check against any instruction in the
1912 // loop header, as the loop header is reachable from any instruction inside
1913 // the loop.
1914 // TODO: ReturnCaptures=true shouldn't be necessary here.
1915 return !PointerMayBeCapturedBefore(V, /* ReturnCaptures */ true,
1916 /* StoreCaptures */ true,
1917 L->getHeader()->getTerminator(), DT);
1918}
1919
1920/// Return true if we can prove that a caller cannot inspect the object if an
1921/// unwind occurs inside the loop.
1922bool isNotVisibleOnUnwindInLoop(const Value *Object, const Loop *L,
1923 DominatorTree *DT) {
1924 bool RequiresNoCaptureBeforeUnwind;
1925 if (!isNotVisibleOnUnwind(Object, RequiresNoCaptureBeforeUnwind))
1926 return false;
1927
1928 return !RequiresNoCaptureBeforeUnwind ||
1929 isNotCapturedBeforeOrInLoop(Object, L, DT);
1930}
1931
1932// We don't consider globals as writable: While the physical memory is writable,
1933// we may not have provenance to perform the write.
1934bool isWritableObject(const Value *Object) {
1935 // TODO: Alloca might not be writable after its lifetime ends.
1936 // See https://github.com/llvm/llvm-project/issues/51838.
1937 if (isa<AllocaInst>(Object))
1938 return true;
1939
1940 // TODO: Also handle sret.
1941 if (auto *A = dyn_cast<Argument>(Object))
1942 return A->hasByValAttr();
1943
1944 // TODO: Noalias has nothing to do with writability, this should check for
1945 // an allocator function.
1946 return isNoAliasCall(Object);
1947}
1948
1949bool isThreadLocalObject(const Value *Object, const Loop *L, DominatorTree *DT,
1951 // The object must be function-local to start with, and then not captured
1952 // before/in the loop.
1954 isNotCapturedBeforeOrInLoop(Object, L, DT)) ||
1956}
1957
1958} // namespace
1959
1960/// Try to promote memory values to scalars by sinking stores out of the
1961/// loop and moving loads to before the loop. We do this by looping over
1962/// the stores in the loop, looking for stores to Must pointers which are
1963/// loop invariant.
1964///
1966 const SmallSetVector<Value *, 8> &PointerMustAliases,
1971 const TargetLibraryInfo *TLI, TargetTransformInfo *TTI, Loop *CurLoop,
1972 MemorySSAUpdater &MSSAU, ICFLoopSafetyInfo *SafetyInfo,
1973 OptimizationRemarkEmitter *ORE, bool AllowSpeculation,
1974 bool HasReadsOutsideSet) {
1975 // Verify inputs.
1976 assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
1977 SafetyInfo != nullptr &&
1978 "Unexpected Input to promoteLoopAccessesToScalars");
1979
1980 LLVM_DEBUG({
1981 dbgs() << "Trying to promote set of must-aliased pointers:\n";
1982 for (Value *Ptr : PointerMustAliases)
1983 dbgs() << " " << *Ptr << "\n";
1984 });
1985 ++NumPromotionCandidates;
1986
1987 Value *SomePtr = *PointerMustAliases.begin();
1988 BasicBlock *Preheader = CurLoop->getLoopPreheader();
1989
1990 // It is not safe to promote a load/store from the loop if the load/store is
1991 // conditional. For example, turning:
1992 //
1993 // for () { if (c) *P += 1; }
1994 //
1995 // into:
1996 //
1997 // tmp = *P; for () { if (c) tmp +=1; } *P = tmp;
1998 //
1999 // is not safe, because *P may only be valid to access if 'c' is true.
2000 //
2001 // The safety property divides into two parts:
2002 // p1) The memory may not be dereferenceable on entry to the loop. In this
2003 // case, we can't insert the required load in the preheader.
2004 // p2) The memory model does not allow us to insert a store along any dynamic
2005 // path which did not originally have one.
2006 //
2007 // If at least one store is guaranteed to execute, both properties are
2008 // satisfied, and promotion is legal.
2009 //
2010 // This, however, is not a necessary condition. Even if no store/load is
2011 // guaranteed to execute, we can still establish these properties.
2012 // We can establish (p1) by proving that hoisting the load into the preheader
2013 // is safe (i.e. proving dereferenceability on all paths through the loop). We
2014 // can use any access within the alias set to prove dereferenceability,
2015 // since they're all must alias.
2016 //
2017 // There are two ways establish (p2):
2018 // a) Prove the location is thread-local. In this case the memory model
2019 // requirement does not apply, and stores are safe to insert.
2020 // b) Prove a store dominates every exit block. In this case, if an exit
2021 // blocks is reached, the original dynamic path would have taken us through
2022 // the store, so inserting a store into the exit block is safe. Note that this
2023 // is different from the store being guaranteed to execute. For instance,
2024 // if an exception is thrown on the first iteration of the loop, the original
2025 // store is never executed, but the exit blocks are not executed either.
2026
2027 bool DereferenceableInPH = false;
2028 bool StoreIsGuanteedToExecute = false;
2029 bool FoundLoadToPromote = false;
2030 // Goes from Unknown to either Safe or Unsafe, but can't switch between them.
2031 enum {
2032 StoreSafe,
2033 StoreUnsafe,
2034 StoreSafetyUnknown,
2035 } StoreSafety = StoreSafetyUnknown;
2036
2038
2039 // We start with an alignment of one and try to find instructions that allow
2040 // us to prove better alignment.
2041 Align Alignment;
2042 // Keep track of which types of access we see
2043 bool SawUnorderedAtomic = false;
2044 bool SawNotAtomic = false;
2045 AAMDNodes AATags;
2046
2047 const DataLayout &MDL = Preheader->getModule()->getDataLayout();
2048
2049 // If there are reads outside the promoted set, then promoting stores is
2050 // definitely not safe.
2051 if (HasReadsOutsideSet)
2052 StoreSafety = StoreUnsafe;
2053
2054 if (StoreSafety == StoreSafetyUnknown && SafetyInfo->anyBlockMayThrow()) {
2055 // If a loop can throw, we have to insert a store along each unwind edge.
2056 // That said, we can't actually make the unwind edge explicit. Therefore,
2057 // we have to prove that the store is dead along the unwind edge. We do
2058 // this by proving that the caller can't have a reference to the object
2059 // after return and thus can't possibly load from the object.
2060 Value *Object = getUnderlyingObject(SomePtr);
2061 if (!isNotVisibleOnUnwindInLoop(Object, CurLoop, DT))
2062 StoreSafety = StoreUnsafe;
2063 }
2064
2065 // Check that all accesses to pointers in the alias set use the same type.
2066 // We cannot (yet) promote a memory location that is loaded and stored in
2067 // different sizes. While we are at it, collect alignment and AA info.
2068 Type *AccessTy = nullptr;
2069 for (Value *ASIV : PointerMustAliases) {
2070 for (Use &U : ASIV->uses()) {
2071 // Ignore instructions that are outside the loop.
2072 Instruction *UI = dyn_cast<Instruction>(U.getUser());
2073 if (!UI || !CurLoop->contains(UI))
2074 continue;
2075
2076 // If there is an non-load/store instruction in the loop, we can't promote
2077 // it.
2078 if (LoadInst *Load = dyn_cast<LoadInst>(UI)) {
2079 if (!Load->isUnordered())
2080 return false;
2081
2082 SawUnorderedAtomic |= Load->isAtomic();
2083 SawNotAtomic |= !Load->isAtomic();
2084 FoundLoadToPromote = true;
2085
2086 Align InstAlignment = Load->getAlign();
2087
2088 // Note that proving a load safe to speculate requires proving
2089 // sufficient alignment at the target location. Proving it guaranteed
2090 // to execute does as well. Thus we can increase our guaranteed
2091 // alignment as well.
2092 if (!DereferenceableInPH || (InstAlignment > Alignment))
2094 *Load, DT, TLI, CurLoop, SafetyInfo, ORE,
2095 Preheader->getTerminator(), AC, AllowSpeculation)) {
2096 DereferenceableInPH = true;
2097 Alignment = std::max(Alignment, InstAlignment);
2098 }
2099 } else if (const StoreInst *Store = dyn_cast<StoreInst>(UI)) {
2100 // Stores *of* the pointer are not interesting, only stores *to* the
2101 // pointer.
2102 if (U.getOperandNo() != StoreInst::getPointerOperandIndex())
2103 continue;
2104 if (!Store->isUnordered())
2105 return false;
2106
2107 SawUnorderedAtomic |= Store->isAtomic();
2108 SawNotAtomic |= !Store->isAtomic();
2109
2110 // If the store is guaranteed to execute, both properties are satisfied.
2111 // We may want to check if a store is guaranteed to execute even if we
2112 // already know that promotion is safe, since it may have higher
2113 // alignment than any other guaranteed stores, in which case we can
2114 // raise the alignment on the promoted store.
2115 Align InstAlignment = Store->getAlign();
2116 bool GuaranteedToExecute =
2117 SafetyInfo->isGuaranteedToExecute(*UI, DT, CurLoop);
2118 StoreIsGuanteedToExecute |= GuaranteedToExecute;
2119 if (GuaranteedToExecute) {
2120 DereferenceableInPH = true;
2121 if (StoreSafety == StoreSafetyUnknown)
2122 StoreSafety = StoreSafe;
2123 Alignment = std::max(Alignment, InstAlignment);
2124 }
2125
2126 // If a store dominates all exit blocks, it is safe to sink.
2127 // As explained above, if an exit block was executed, a dominating
2128 // store must have been executed at least once, so we are not
2129 // introducing stores on paths that did not have them.
2130 // Note that this only looks at explicit exit blocks. If we ever
2131 // start sinking stores into unwind edges (see above), this will break.
2132 if (StoreSafety == StoreSafetyUnknown &&
2133 llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) {
2134 return DT->dominates(Store->getParent(), Exit);
2135 }))
2136 StoreSafety = StoreSafe;
2137
2138 // If the store is not guaranteed to execute, we may still get
2139 // deref info through it.
2140 if (!DereferenceableInPH) {
2141 DereferenceableInPH = isDereferenceableAndAlignedPointer(
2142 Store->getPointerOperand(), Store->getValueOperand()->getType(),
2143 Store->getAlign(), MDL, Preheader->getTerminator(), AC, DT, TLI);
2144 }
2145 } else
2146 continue; // Not a load or store.
2147
2148 if (!AccessTy)
2149 AccessTy = getLoadStoreType(UI);
2150 else if (AccessTy != getLoadStoreType(UI))
2151 return false;
2152
2153 // Merge the AA tags.
2154 if (LoopUses.empty()) {
2155 // On the first load/store, just take its AA tags.
2156 AATags = UI->getAAMetadata();
2157 } else if (AATags) {
2158 AATags = AATags.merge(UI->getAAMetadata());
2159 }
2160
2161 LoopUses.push_back(UI);
2162 }
2163 }
2164
2165 // If we found both an unordered atomic instruction and a non-atomic memory
2166 // access, bail. We can't blindly promote non-atomic to atomic since we
2167 // might not be able to lower the result. We can't downgrade since that
2168 // would violate memory model. Also, align 0 is an error for atomics.
2169 if (SawUnorderedAtomic && SawNotAtomic)
2170 return false;
2171
2172 // If we're inserting an atomic load in the preheader, we must be able to
2173 // lower it. We're only guaranteed to be able to lower naturally aligned
2174 // atomics.
2175 if (SawUnorderedAtomic && Alignment < MDL.getTypeStoreSize(AccessTy))
2176 return false;
2177
2178 // If we couldn't prove we can hoist the load, bail.
2179 if (!DereferenceableInPH) {
2180 LLVM_DEBUG(dbgs() << "Not promoting: Not dereferenceable in preheader\n");
2181 return false;
2182 }
2183
2184 // We know we can hoist the load, but don't have a guaranteed store.
2185 // Check whether the location is writable and thread-local. If it is, then we
2186 // can insert stores along paths which originally didn't have them without
2187 // violating the memory model.
2188 if (StoreSafety == StoreSafetyUnknown) {
2189 Value *Object = getUnderlyingObject(SomePtr);
2190 if (isWritableObject(Object) &&
2191 isThreadLocalObject(Object, CurLoop, DT, TTI))
2192 StoreSafety = StoreSafe;
2193 }
2194
2195 // If we've still failed to prove we can sink the store, hoist the load
2196 // only, if possible.
2197 if (StoreSafety != StoreSafe && !FoundLoadToPromote)
2198 // If we cannot hoist the load either, give up.
2199 return false;
2200
2201 // Lets do the promotion!
2202 if (StoreSafety == StoreSafe) {
2203 LLVM_DEBUG(dbgs() << "LICM: Promoting load/store of the value: " << *SomePtr
2204 << '\n');
2205 ++NumLoadStorePromoted;
2206 } else {
2207 LLVM_DEBUG(dbgs() << "LICM: Promoting load of the value: " << *SomePtr
2208 << '\n');
2209 ++NumLoadPromoted;
2210 }
2211
2212 ORE->emit([&]() {
2213 return OptimizationRemark(DEBUG_TYPE, "PromoteLoopAccessesToScalar",
2214 LoopUses[0])
2215 << "Moving accesses to memory location out of the loop";
2216 });
2217
2218 // Look at all the loop uses, and try to merge their locations.
2219 std::vector<DILocation *> LoopUsesLocs;
2220 for (auto *U : LoopUses)
2221 LoopUsesLocs.push_back(U->getDebugLoc().get());
2222 auto DL = DebugLoc(DILocation::getMergedLocations(LoopUsesLocs));
2223
2224 // We use the SSAUpdater interface to insert phi nodes as required.
2226 SSAUpdater SSA(&NewPHIs);
2227 LoopPromoter Promoter(SomePtr, LoopUses, SSA, ExitBlocks, InsertPts,
2228 MSSAInsertPts, PIC, MSSAU, *LI, DL, Alignment,
2229 SawUnorderedAtomic, AATags, *SafetyInfo,
2230 StoreSafety == StoreSafe);
2231
2232 // Set up the preheader to have a definition of the value. It is the live-out
2233 // value from the preheader that uses in the loop will use.
2234 LoadInst *PreheaderLoad = nullptr;
2235 if (FoundLoadToPromote || !StoreIsGuanteedToExecute) {
2236 PreheaderLoad =
2237 new LoadInst(AccessTy, SomePtr, SomePtr->getName() + ".promoted",
2238 Preheader->getTerminator());
2239 if (SawUnorderedAtomic)
2240 PreheaderLoad->setOrdering(AtomicOrdering::Unordered);
2241 PreheaderLoad->setAlignment(Alignment);
2242 PreheaderLoad->setDebugLoc(DebugLoc());
2243 if (AATags)
2244 PreheaderLoad->setAAMetadata(AATags);
2245
2246 MemoryAccess *PreheaderLoadMemoryAccess = MSSAU.createMemoryAccessInBB(
2247 PreheaderLoad, nullptr, PreheaderLoad->getParent(), MemorySSA::End);
2248 MemoryUse *NewMemUse = cast<MemoryUse>(PreheaderLoadMemoryAccess);
2249 MSSAU.insertUse(NewMemUse, /*RenameUses=*/true);
2250 SSA.AddAvailableValue(Preheader, PreheaderLoad);
2251 } else {
2252 SSA.AddAvailableValue(Preheader, PoisonValue::get(AccessTy));
2253 }
2254
2255 if (VerifyMemorySSA)
2256 MSSAU.getMemorySSA()->verifyMemorySSA();
2257 // Rewrite all the loads in the loop and remember all the definitions from
2258 // stores in the loop.
2259 Promoter.run(LoopUses);
2260
2261 if (VerifyMemorySSA)
2262 MSSAU.getMemorySSA()->verifyMemorySSA();
2263 // If the SSAUpdater didn't use the load in the preheader, just zap it now.
2264 if (PreheaderLoad && PreheaderLoad->use_empty())
2265 eraseInstruction(*PreheaderLoad, *SafetyInfo, MSSAU);
2266
2267 return true;
2268}
2269
2270static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L,
2271 function_ref<void(Instruction *)> Fn) {
2272 for (const BasicBlock *BB : L->blocks())
2273 if (const auto *Accesses = MSSA->getBlockAccesses(BB))
2274 for (const auto &Access : *Accesses)
2275 if (const auto *MUD = dyn_cast<MemoryUseOrDef>(&Access))
2276 Fn(MUD->getMemoryInst());
2277}
2278
2279// The bool indicates whether there might be reads outside the set, in which
2280// case only loads may be promoted.
2283 BatchAAResults BatchAA(*AA);
2284 AliasSetTracker AST(BatchAA);
2285
2286 auto IsPotentiallyPromotable = [L](const Instruction *I) {
2287 if (const auto *SI = dyn_cast<StoreInst>(I))
2288 return L->isLoopInvariant(SI->getPointerOperand());
2289 if (const auto *LI = dyn_cast<LoadInst>(I))
2290 return L->isLoopInvariant(LI->getPointerOperand());
2291 return false;
2292 };
2293
2294 // Populate AST with potentially promotable accesses.
2295 SmallPtrSet<Value *, 16> AttemptingPromotion;
2296 foreachMemoryAccess(MSSA, L, [&](Instruction *I) {
2297 if (IsPotentiallyPromotable(I)) {
2298 AttemptingPromotion.insert(I);
2299 AST.add(I);
2300 }
2301 });
2302
2303 // We're only interested in must-alias sets that contain a mod.
2305 for (AliasSet &AS : AST)
2306 if (!AS.isForwardingAliasSet() && AS.isMod() && AS.isMustAlias())
2307 Sets.push_back({&AS, false});
2308
2309 if (Sets.empty())
2310 return {}; // Nothing to promote...
2311
2312 // Discard any sets for which there is an aliasing non-promotable access.
2313 foreachMemoryAccess(MSSA, L, [&](Instruction *I) {
2314 if (AttemptingPromotion.contains(I))
2315 return;
2316
2318 ModRefInfo MR = Pair.getPointer()->aliasesUnknownInst(I, BatchAA);
2319 // Cannot promote if there are writes outside the set.
2320 if (isModSet(MR))
2321 return true;
2322 if (isRefSet(MR)) {
2323 // Remember reads outside the set.
2324 Pair.setInt(true);
2325 // If this is a mod-only set and there are reads outside the set,
2326 // we will not be able to promote, so bail out early.
2327 return !Pair.getPointer()->isRef();
2328 }
2329 return false;
2330 });
2331 });
2332
2334 for (auto [Set, HasReadsOutsideSet] : Sets) {
2335 SmallSetVector<Value *, 8> PointerMustAliases;
2336 for (const auto &ASI : *Set)
2337 PointerMustAliases.insert(ASI.getValue());
2338 Result.emplace_back(std::move(PointerMustAliases), HasReadsOutsideSet);
2339 }
2340
2341 return Result;
2342}
2343
2345 Loop *CurLoop, Instruction &I,
2346 SinkAndHoistLICMFlags &Flags,
2347 bool InvariantGroup) {
2348 // For hoisting, use the walker to determine safety
2349 if (!Flags.getIsSink()) {
2350 // If hoisting an invariant group, we only need to check that there
2351 // is no store to the loaded pointer between the start of the loop,
2352 // and the load (since all values must be the same).
2353
2354 // This can be checked in two conditions:
2355 // 1) if the memoryaccess is outside the loop
2356 // 2) the earliest access is at the loop header,
2357 // if the memory loaded is the phi node
2358
2359 BatchAAResults BAA(MSSA->getAA());
2360 MemoryAccess *Source = getClobberingMemoryAccess(*MSSA, BAA, Flags, MU);
2361 return !MSSA->isLiveOnEntryDef(Source) &&
2362 CurLoop->contains(Source->getBlock()) &&
2363 !(InvariantGroup && Source->getBlock() == CurLoop->getHeader() && isa<MemoryPhi>(Source));
2364 }
2365
2366 // For sinking, we'd need to check all Defs below this use. The getClobbering
2367 // call will look on the backedge of the loop, but will check aliasing with
2368 // the instructions on the previous iteration.
2369 // For example:
2370 // for (i ... )
2371 // load a[i] ( Use (LoE)
2372 // store a[i] ( 1 = Def (2), with 2 = Phi for the loop.
2373 // i++;
2374 // The load sees no clobbering inside the loop, as the backedge alias check
2375 // does phi translation, and will check aliasing against store a[i-1].
2376 // However sinking the load outside the loop, below the store is incorrect.
2377
2378 // For now, only sink if there are no Defs in the loop, and the existing ones
2379 // precede the use and are in the same block.
2380 // FIXME: Increase precision: Safe to sink if Use post dominates the Def;
2381 // needs PostDominatorTreeAnalysis.
2382 // FIXME: More precise: no Defs that alias this Use.
2383 if (Flags.tooManyMemoryAccesses())
2384 return true;
2385 for (auto *BB : CurLoop->getBlocks())
2386 if (pointerInvalidatedByBlock(*BB, *MSSA, *MU))
2387 return true;
2388 // When sinking, the source block may not be part of the loop so check it.
2389 if (!CurLoop->contains(&I))
2390 return pointerInvalidatedByBlock(*I.getParent(), *MSSA, *MU);
2391
2392 return false;
2393}
2394
2396 if (const auto *Accesses = MSSA.getBlockDefs(&BB))
2397 for (const auto &MA : *Accesses)
2398 if (const auto *MD = dyn_cast<MemoryDef>(&MA))
2399 if (MU.getBlock() != MD->getBlock() || !MSSA.locallyDominates(MD, &MU))
2400 return true;
2401 return false;
2402}
2403
2404/// Try to simplify things like (A < INV_1 AND icmp A < INV_2) into (A <
2405/// min(INV_1, INV_2)), if INV_1 and INV_2 are both loop invariants and their
2406/// minimun can be computed outside of loop, and X is not a loop-invariant.
2407static bool hoistMinMax(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo,
2408 MemorySSAUpdater &MSSAU) {
2409 bool Inverse = false;
2410 using namespace PatternMatch;
2411 Value *Cond1, *Cond2;
2412 if (match(&I, m_LogicalOr(m_Value(Cond1), m_Value(Cond2)))) {
2413 Inverse = true;
2414 } else if (match(&I, m_LogicalAnd(m_Value(Cond1), m_Value(Cond2)))) {
2415 // Do nothing
2416 } else
2417 return false;
2418
2419 auto MatchICmpAgainstInvariant = [&](Value *C, ICmpInst::Predicate &P,
2420 Value *&LHS, Value *&RHS) {
2421 if (!match(C, m_OneUse(m_ICmp(P, m_Value(LHS), m_Value(RHS)))))
2422 return false;
2423 if (!LHS->getType()->isIntegerTy())
2424 return false;
2426 return false;
2427 if (L.isLoopInvariant(LHS)) {
2428 std::swap(LHS, RHS);
2430 }
2431 if (L.isLoopInvariant(LHS) || !L.isLoopInvariant(RHS))
2432 return false;
2433 if (Inverse)
2435 return true;
2436 };
2437 ICmpInst::Predicate P1, P2;
2438 Value *LHS1, *LHS2, *RHS1, *RHS2;
2439 if (!MatchICmpAgainstInvariant(Cond1, P1, LHS1, RHS1) ||
2440 !MatchICmpAgainstInvariant(Cond2, P2, LHS2, RHS2))
2441 return false;
2442 if (P1 != P2 || LHS1 != LHS2)
2443 return false;
2444
2445 // Everything is fine, we can do the transform.
2446 bool UseMin = ICmpInst::isLT(P1) || ICmpInst::isLE(P1);
2447 assert(
2448 (UseMin || ICmpInst::isGT(P1) || ICmpInst::isGE(P1)) &&
2449 "Relational predicate is either less (or equal) or greater (or equal)!");
2451 ? (UseMin ? Intrinsic::smin : Intrinsic::smax)
2452 : (UseMin ? Intrinsic::umin : Intrinsic::umax);
2453 auto *Preheader = L.getLoopPreheader();
2454 assert(Preheader && "Loop is not in simplify form?");
2455 IRBuilder<> Builder(Preheader->getTerminator());
2456 // We are about to create a new guaranteed use for RHS2 which might not exist
2457 // before (if it was a non-taken input of logical and/or instruction). If it
2458 // was poison, we need to freeze it. Note that no new use for LHS and RHS1 are
2459 // introduced, so they don't need this.
2460 if (isa<SelectInst>(I))
2461 RHS2 = Builder.CreateFreeze(RHS2, RHS2->getName() + ".fr");
2462 Value *NewRHS = Builder.CreateBinaryIntrinsic(
2463 id, RHS1, RHS2, nullptr, StringRef("invariant.") +
2464 (ICmpInst::isSigned(P1) ? "s" : "u") +
2465 (UseMin ? "min" : "max"));
2466 Builder.SetInsertPoint(&I);
2468 if (Inverse)
2470 Value *NewCond = Builder.CreateICmp(P, LHS1, NewRHS);
2471 NewCond->takeName(&I);
2472 I.replaceAllUsesWith(NewCond);
2473 eraseInstruction(I, SafetyInfo, MSSAU);
2474 eraseInstruction(*cast<Instruction>(Cond1), SafetyInfo, MSSAU);
2475 eraseInstruction(*cast<Instruction>(Cond2), SafetyInfo, MSSAU);
2476 return true;
2477}
2478
2479/// Reassociate gep (gep ptr, idx1), idx2 to gep (gep ptr, idx2), idx1 if
2480/// this allows hoisting the inner GEP.
2481static bool hoistGEP(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo,
2483 DominatorTree *DT) {
2484 auto *GEP = dyn_cast<GetElementPtrInst>(&I);
2485 if (!GEP)
2486 return false;
2487
2488 auto *Src = dyn_cast<GetElementPtrInst>(GEP->getPointerOperand());
2489 if (!Src || !Src->hasOneUse() || !L.contains(Src))
2490 return false;
2491
2492 Value *SrcPtr = Src->getPointerOperand();
2493 auto LoopInvariant = [&](Value *V) { return L.isLoopInvariant(V); };
2494 if (!L.isLoopInvariant(SrcPtr) || !all_of(GEP->indices(), LoopInvariant))
2495 return false;
2496
2497 // This can only happen if !AllowSpeculation, otherwise this would already be
2498 // handled.
2499 // FIXME: Should we respect AllowSpeculation in these reassociation folds?
2500 // The flag exists to prevent metadata dropping, which is not relevant here.
2501 if (all_of(Src->indices(), LoopInvariant))
2502 return false;
2503
2504 // The swapped GEPs are inbounds if both original GEPs are inbounds
2505 // and the sign of the offsets is the same. For simplicity, only
2506 // handle both offsets being non-negative.
2507 const DataLayout &DL = GEP->getModule()->getDataLayout();
2508 auto NonNegative = [&](Value *V) {
2509 return isKnownNonNegative(V, DL, 0, AC, GEP, DT);
2510 };
2511 bool IsInBounds = Src->isInBounds() && GEP->isInBounds() &&
2512 all_of(Src->indices(), NonNegative) &&
2513 all_of(GEP->indices(), NonNegative);
2514
2515 BasicBlock *Preheader = L.getLoopPreheader();
2516 IRBuilder<> Builder(Preheader->getTerminator());
2517 Value *NewSrc = Builder.CreateGEP(GEP->getSourceElementType(), SrcPtr,
2518 SmallVector<Value *>(GEP->indices()),
2519 "invariant.gep", IsInBounds);
2520 Builder.SetInsertPoint(GEP);
2521 Value *NewGEP = Builder.CreateGEP(Src->getSourceElementType(), NewSrc,
2522 SmallVector<Value *>(Src->indices()), "gep",
2523 IsInBounds);
2524 GEP->replaceAllUsesWith(NewGEP);
2525 eraseInstruction(*GEP, SafetyInfo, MSSAU);
2526 eraseInstruction(*Src, SafetyInfo, MSSAU);
2527 return true;
2528}
2529
2530/// Try to turn things like "LV + C1 < C2" into "LV < C2 - C1". Here
2531/// C1 and C2 are loop invariants and LV is a loop-variant.
2532static bool hoistAdd(ICmpInst::Predicate Pred, Value *VariantLHS,
2533 Value *InvariantRHS, ICmpInst &ICmp, Loop &L,
2534 ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU,
2535 AssumptionCache *AC, DominatorTree *DT) {
2536 assert(ICmpInst::isSigned(Pred) && "Not supported yet!");
2537 assert(!L.isLoopInvariant(VariantLHS) && "Precondition.");
2538 assert(L.isLoopInvariant(InvariantRHS) && "Precondition.");
2539
2540 // Try to represent VariantLHS as sum of invariant and variant operands.
2541 using namespace PatternMatch;
2542 Value *VariantOp, *InvariantOp;
2543 if (!match(VariantLHS, m_NSWAdd(m_Value(VariantOp), m_Value(InvariantOp))))
2544 return false;
2545
2546 // LHS itself is a loop-variant, try to represent it in the form:
2547 // "VariantOp + InvariantOp". If it is possible, then we can reassociate.
2548 if (L.isLoopInvariant(VariantOp))
2549 std::swap(VariantOp, InvariantOp);
2550 if (L.isLoopInvariant(VariantOp) || !L.isLoopInvariant(InvariantOp))
2551 return false;
2552
2553 // In order to turn "LV + C1 < C2" into "LV < C2 - C1", we need to be able to
2554 // freely move values from left side of inequality to right side (just as in
2555 // normal linear arithmetics). Overflows make things much more complicated, so
2556 // we want to avoid this.
2557 auto &DL = L.getHeader()->getModule()->getDataLayout();
2558 bool ProvedNoOverflowAfterReassociate =
2559 computeOverflowForSignedSub(InvariantRHS, InvariantOp, DL, AC, &ICmp,
2561 if (!ProvedNoOverflowAfterReassociate)
2562 return false;
2563 auto *Preheader = L.getLoopPreheader();
2564 assert(Preheader && "Loop is not in simplify form?");
2565 IRBuilder<> Builder(Preheader->getTerminator());
2566 Value *NewCmpOp = Builder.CreateSub(InvariantRHS, InvariantOp, "invariant.op",
2567 /*HasNUW*/ false, /*HasNSW*/ true);
2568 ICmp.setPredicate(Pred);
2569 ICmp.setOperand(0, VariantOp);
2570 ICmp.setOperand(1, NewCmpOp);
2571 eraseInstruction(cast<Instruction>(*VariantLHS), SafetyInfo, MSSAU);
2572 return true;
2573}
2574
2575/// Reassociate and hoist add/sub expressions.
2576static bool hoistAddSub(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo,
2578 DominatorTree *DT) {
2579 using namespace PatternMatch;
2581 Value *LHS, *RHS;
2582 if (!match(&I, m_ICmp(Pred, m_Value(LHS), m_Value(RHS))))
2583 return false;
2584
2585 // TODO: Support unsigned predicates?
2586 if (!ICmpInst::isSigned(Pred))
2587 return false;
2588
2589 // Put variant operand to LHS position.
2590 if (L.isLoopInvariant(LHS)) {
2591 std::swap(LHS, RHS);
2592 Pred = ICmpInst::getSwappedPredicate(Pred);
2593 }
2594 // We want to delete the initial operation after reassociation, so only do it
2595 // if it has no other uses.
2596 if (L.isLoopInvariant(LHS) || !L.isLoopInvariant(RHS) || !LHS->hasOneUse())
2597 return false;
2598
2599 // TODO: We could go with smarter context, taking common dominator of all I's
2600 // users instead of I itself.
2601 if (hoistAdd(Pred, LHS, RHS, cast<ICmpInst>(I), L, SafetyInfo, MSSAU, AC, DT))
2602 return true;
2603
2604 // TODO: Support Sub.
2605
2606 return false;
2607}
2608
2610 ICFLoopSafetyInfo &SafetyInfo,
2612 DominatorTree *DT) {
2613 // Optimize complex patterns, such as (x < INV1 && x < INV2), turning them
2614 // into (x < min(INV1, INV2)), and hoisting the invariant part of this
2615 // expression out of the loop.
2616 if (hoistMinMax(I, L, SafetyInfo, MSSAU)) {
2617 ++NumHoisted;
2618 ++NumMinMaxHoisted;
2619 return true;
2620 }
2621
2622 // Try to hoist GEPs by reassociation.
2623 if (hoistGEP(I, L, SafetyInfo, MSSAU, AC, DT)) {
2624 ++NumHoisted;
2625 ++NumGEPsHoisted;
2626 return true;
2627 }
2628
2629 // Try to hoist add/sub's by reassociation.
2630 if (hoistAddSub(I, L, SafetyInfo, MSSAU, AC, DT)) {
2631 ++NumHoisted;
2632 ++NumAddSubHoisted;
2633 return true;
2634 }
2635
2636 return false;
2637}
2638
2639/// Little predicate that returns true if the specified basic block is in
2640/// a subloop of the current one, not the current one itself.
2641///
2642static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI) {
2643 assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop");
2644 return LI->getLoopFor(BB) != CurLoop;
2645}
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
assume Assume Builder
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
@ NonNegative
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
Rewrite Partial Register Uses
#define DEBUG_TYPE
Hexagon Common GEP
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
iv Induction Variable Users
Definition: IVUsers.cpp:48
static bool isNotUsedOrFoldableInLoop(const Instruction &I, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, TargetTransformInfo *TTI, bool &FoldableInLoop, bool LoopNestMode)
Return true if the only users of this instruction are outside of the loop.
Definition: LICM.cpp:1375
static bool hoistGEP(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU, AssumptionCache *AC, DominatorTree *DT)
Reassociate gep (gep ptr, idx1), idx2 to gep (gep ptr, idx2), idx1 if this allows hoisting the inner ...
Definition: LICM.cpp:2481
static cl::opt< bool > SingleThread("licm-force-thread-model-single", cl::Hidden, cl::init(false), cl::desc("Force thread model single in LICM pass"))
static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT, LoopInfo *LI, const Loop *CurLoop, LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU)
Definition: LICM.cpp:1545
static bool isFoldableInLoop(const Instruction &I, const Loop *CurLoop, const TargetTransformInfo *TTI)
Return true if the instruction is foldable in the loop.
Definition: LICM.cpp:1345
static bool hoistMinMax(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
Try to simplify things like (A < INV_1 AND icmp A < INV_2) into (A < min(INV_1, INV_2)),...
Definition: LICM.cpp:2407
static Instruction * cloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI, const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU)
Definition: LICM.cpp:1417
static cl::opt< bool > ControlFlowHoisting("licm-control-flow-hoisting", cl::Hidden, cl::init(false), cl::desc("Enable control flow (and PHI) hoisting in LICM"))
static bool pointerInvalidatedByLoop(MemorySSA *MSSA, MemoryUse *MU, Loop *CurLoop, Instruction &I, SinkAndHoistLICMFlags &Flags, bool InvariantGroup)
Definition: LICM.cpp:2344
static bool hoistAdd(ICmpInst::Predicate Pred, Value *VariantLHS, Value *InvariantRHS, ICmpInst &ICmp, Loop &L, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU, AssumptionCache *AC, DominatorTree *DT)
Try to turn things like "LV + C1 < C2" into "LV < C2 - C1".
Definition: LICM.cpp:2532
static MemoryAccess * getClobberingMemoryAccess(MemorySSA &MSSA, BatchAAResults &BAA, SinkAndHoistLICMFlags &Flags, MemoryUseOrDef *MA)
Definition: LICM.cpp:1146
static SmallVector< PointersAndHasReadsOutsideSet, 0 > collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L)
Definition: LICM.cpp:2282
static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU, ScalarEvolution *SE, OptimizationRemarkEmitter *ORE)
When an instruction is found to only use loop invariant operands that is safe to hoist,...
Definition: LICM.cpp:1722
static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo)
Definition: LICM.cpp:1528
static void moveInstructionBefore(Instruction &I, Instruction &Dest, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU, ScalarEvolution *SE)
Definition: LICM.cpp:1496
static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU, OptimizationRemarkEmitter *ORE)
When an instruction is found to only be used outside of the loop, this function moves it to the exit ...
Definition: LICM.cpp:1617
static bool hoistAddSub(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU, AssumptionCache *AC, DominatorTree *DT)
Reassociate and hoist add/sub expressions.
Definition: LICM.cpp:2576
static cl::opt< uint32_t > MaxNumUsesTraversed("licm-max-num-uses-traversed", cl::Hidden, cl::init(8), cl::desc("Max num uses visited for identifying load " "invariance in loop using invariant start (default = 8)"))
static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L, function_ref< void(Instruction *)> Fn)
Definition: LICM.cpp:2270
static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT, Loop *CurLoop)
Definition: LICM.cpp:1038
static Instruction * sinkThroughTriviallyReplaceablePHI(PHINode *TPN, Instruction *I, LoopInfo *LI, SmallDenseMap< BasicBlock *, Instruction *, 32 > &SunkCopies, const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop, MemorySSAUpdater &MSSAU)
Definition: LICM.cpp:1510
licm
Definition: LICM.cpp:359
static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI)
Little predicate that returns true if the specified basic block is in a subloop of the current one,...
Definition: LICM.cpp:2642
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
Definition: LICM.cpp:1489
static bool isSafeToExecuteUnconditionally(Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE, const Instruction *CtxI, AssumptionCache *AC, bool AllowSpeculation)
Only sink or hoist an instruction if it is not a trapping instruction, or if the instruction is known...
Definition: LICM.cpp:1767
static bool hoistArithmetics(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU, AssumptionCache *AC, DominatorTree *DT)
Aggregates various functions for hoisting computations out of loop.
Definition: LICM.cpp:2609
static bool isTriviallyReplaceablePHI(const PHINode &PN, const Instruction &I)
Returns true if a PHINode is a trivially replaceable with an Instruction.
Definition: LICM.cpp:1336
std::pair< SmallSetVector< Value *, 8 >, bool > PointersAndHasReadsOutsideSet
Definition: LICM.cpp:195
static cl::opt< bool > DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false), cl::desc("Disable memory promotion in LICM pass"))
Memory promotion is enabled by default.
static bool pointerInvalidatedByBlock(BasicBlock &BB, MemorySSA &MSSA, MemoryUse &MU)
Definition: LICM.cpp:2395
This file defines the interface for the loop nest analysis.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Machine Loop Invariant Code Motion
Memory SSA
Definition: MemorySSA.cpp:71
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
This file contains the declarations for metadata subclasses.
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
#define P(N)
PassInstrumentationCallbacks PIC
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
This file provides a priority worklist.
@ SI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file defines generic set operations that may be used on set's of different types,...
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This pass exposes codegen information to IR-level passes.
@ Flags
Definition: TextStubV5.cpp:93
static cl::opt< bool > DisablePromotion("disable-type-promotion", cl::Hidden, cl::init(false), cl::desc("Disable type promotion pass"))
Value * RHS
Value * LHS
ModRefInfo getModRefInfoMask(const MemoryLocation &Loc, bool IgnoreLocals=false)
Returns a bitmask that should be unconditionally applied to the ModRef info of a memory location.
MemoryEffects getMemoryEffects(const CallBase *Call)
Return the behavior of the given call site.
void add(Value *Ptr, LocationSize Size, const AAMDNodes &AAInfo)
These methods are used to add different types of instructions to the alias sets.
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:620
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
void replaceSuccessorsPhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block's successors to refer to basic block New instead of basic bl...
Definition: BasicBlock.cpp:507
iterator end()
Definition: BasicBlock.h:325
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:323
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:254
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
Definition: BasicBlock.cpp:136
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:217
const Instruction & front() const
Definition: BasicBlock.h:335
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:105
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:323
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:112
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:35
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:127
bool canSplitPredecessors() const
Definition: BasicBlock.cpp:379
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:146
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Conditional or Unconditional Branch instruction.
bool isConditional() const
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1357
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
void setPredicate(Predicate P)
Set the predicate for this instruction to the specified value.
Definition: InstrTypes.h:804
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:711
bool isSigned() const
Definition: InstrTypes.h:961
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:863
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:825
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
Definition: Constants.cpp:927
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
bool isNegative() const
Definition: Constants.h:192
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:151
Assignment ID.
static DILocation * getMergedLocations(ArrayRef< DILocation * > Locs)
Try to combine the vector of locations passed as input in a single one.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Definition: DataLayout.h:468
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
iterator end()
Definition: DenseMap.h:84
DomTreeNodeBase * getIDom() const
NodeT * getBlock() const
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:314
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
This implementation of LoopSafetyInfo use ImplicitControlFlowTracking to give precise answers on "may...
Definition: MustExecute.h:132
bool doesNotWriteMemoryBefore(const BasicBlock *BB, const Loop *CurLoop) const
Returns true if we could not execute a memory-modifying instruction before we enter BB under assumpti...
void removeInstruction(const Instruction *Inst)
Inform safety info that we are planning to remove the instruction Inst from its block.
Definition: MustExecute.cpp:99
bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const override
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
bool anyBlockMayThrow() const override
Returns true iff any block of the loop for which this info is contains an instruction that may throw ...
Definition: MustExecute.cpp:75
void computeLoopSafetyInfo(const Loop *CurLoop) override
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
Definition: MustExecute.cpp:79
void insertInstructionTo(const Instruction *Inst, const BasicBlock *BB)
Inform the safety info that we are planning to insert a new instruction Inst into the basic block BB.
Definition: MustExecute.cpp:93
This instruction compares its operands according to the predicate given to the constructor.
static bool isGE(Predicate P)
Return true if the predicate is SGE or UGE.
static bool isLT(Predicate P)
Return true if the predicate is SLT or ULT.
static bool isGT(Predicate P)
Return true if the predicate is SGT or UGT.
bool isRelational() const
Return true if the predicate is relational (not EQ or NE).
static bool isLE(Predicate P)
Return true if the predicate is SLE or ULE.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2564
void mergeDIAssignID(ArrayRef< const Instruction * > SourceInstructions)
Merge the DIAssignID metadata from this instruction and those attached to instructions in SourceInstr...
Definition: DebugInfo.cpp:875
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:70
void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
Definition: Metadata.cpp:1608
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:700
const BasicBlock * getParent() const
Definition: Instruction.h:90
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:87
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:275
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1521
AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
Definition: Metadata.cpp:1594
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:362
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:54
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Definition: LICM.cpp:300
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition: LICM.cpp:277
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition: LICM.cpp:310
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Definition: LICM.cpp:341
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
This is an alternative analysis pass to BlockFrequencyInfoWrapperPass.
static void getLazyBFIAnalysisUsage(AnalysisUsage &AU)
Helper for client passes to set up the analysis usage on behalf of this pass.
This is an alternative analysis pass to BranchProbabilityInfoWrapperPass.
Helper class for promoting a collection of loads and stores into SSA Form using the SSAUpdater.
Definition: SSAUpdater.h:147
An instruction for reading from memory.
Definition: Instructions.h:177
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:270
void setAlignment(Align Align)
Definition: Instructions.h:224
void setOrdering(AtomicOrdering Ordering)
Sets the ordering constraint of this load instruction.
Definition: Instructions.h:234
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:569
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getHeader() const
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
void getUniqueExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all unique successor blocks of this loop.
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
Wrapper class to LoopBlocksDFS that provides a standard begin()/end() interface for the DFS reverse p...
Definition: LoopIterator.h:172
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopIterator.h:180
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:594
bool wouldBeOutOfLoopUseRequiringLCSSA(const Value *V, const BasicBlock *ExitBB) const
Definition: LoopInfo.cpp:932
This class represents a loop nest and can be used to query its properties.
Function * getParent() const
Return the function to which the loop-nest belongs.
Loop & getOutermostLoop() const
Return the outermost loop in the loop nest.
Captures loop safety information.
Definition: MustExecute.h:60
void copyColors(BasicBlock *New, BasicBlock *Old)
Copy colors of block Old into the block New.
Definition: MustExecute.cpp:36
const DenseMap< BasicBlock *, ColorVector > & getBlockColors() const
Returns block colors map that is used to update funclet operand bundles.
Definition: MustExecute.cpp:32
virtual bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const =0
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:47
bool hasLoopInvariantOperands(const Instruction *I) const
Return true if all the operands of the specified instruction are loop invariant.
Definition: LoopInfo.cpp:66
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:60
BasicBlock * getBlock() const
Definition: MemorySSA.h:164
Summary of how a function affects memory in the program.
Definition: ModRef.h:63
bool onlyAccessesArgPointees() const
Whether this function only (at most) accesses argument memory.
Definition: ModRef.h:200
bool onlyReadsMemory() const
Whether this function only (at most) reads memory.
Definition: ModRef.h:194
bool doesNotAccessMemory() const
Whether this function accesses no memory.
Definition: ModRef.h:191
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:936
MemorySSA * getMemorySSA() const
Get handle on MemorySSA.
void insertDef(MemoryDef *Def, bool RenameUses=false)
Insert a definition into the MemorySSA IR.
MemoryAccess * createMemoryAccessInBB(Instruction *I, MemoryAccess *Definition, const BasicBlock *BB, MemorySSA::InsertionPlace Point)
Create a MemoryAccess in MemorySSA at a specified point in a block, with a specified clobbering defin...
void insertUse(MemoryUse *Use, bool RenameUses=false)
void removeMemoryAccess(MemoryAccess *, bool OptimizePhis=false)
Remove a MemoryAccess from MemorySSA, including updating all definitions and uses.
MemoryUseOrDef * createMemoryAccessAfter(Instruction *I, MemoryAccess *Definition, MemoryAccess *InsertPt)
void moveToPlace(MemoryUseOrDef *What, BasicBlock *BB, MemorySSA::InsertionPlace Where)
void wireOldPredecessorsToNewImmediatePredecessor(BasicBlock *Old, BasicBlock *New, ArrayRef< BasicBlock * > Preds, bool IdenticalEdgesWereMerged=true)
A new empty BasicBlock (New) now branches directly to Old.
MemoryAccess * getClobberingMemoryAccess(const Instruction *I, BatchAAResults &AA)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Definition: MemorySSA.h:1046
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:986
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:700
AliasAnalysis & getAA()
Definition: MemorySSA.h:797
const AccessList * getBlockAccesses(const BasicBlock *BB) const
Return the list of MemoryAccess's for a given basic block.
Definition: MemorySSA.h:757
MemorySSAWalker * getSkipSelfWalker()
Definition: MemorySSA.cpp:1570
bool dominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...
Definition: MemorySSA.cpp:2115
void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition: MemorySSA.cpp:1862
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:717
const DefsList * getBlockDefs(const BasicBlock *BB) const
Return the list of MemoryDef's and MemoryPhi's for a given basic block.
Definition: MemorySSA.h:765
bool locallyDominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in the same basic block, determine whether MemoryAccess A dominates MemoryA...
Definition: MemorySSA.cpp:2084
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Definition: MemorySSA.h:737
Class that has the common methods + fields of memory uses/defs.
Definition: MemorySSA.h:252
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Definition: MemorySSA.h:262
Represents read-only accesses to memory.
Definition: MemorySSA.h:312
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:398
The optimization diagnostic interface.
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
Diagnostic information for applied optimization remarks.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
void setIncomingBlock(unsigned i, BasicBlock *BB)
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:94
PointerIntPair - This class implements a pair of a pointer and small integer.
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1743
PredIteratorCache - This class is an extremely trivial cache for predecessor iterator queries.
size_t size(BasicBlock *BB) const
ArrayRef< BasicBlock * > get(BasicBlock *BB)
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
bool empty() const
Determine if the PriorityWorklist is empty or not.
bool insert(const T &X)
Insert a new element into the PriorityWorklist.
Helper class for SSA formation on a set of values defined in multiple blocks.
Definition: SSAUpdater.h:39
The main scalar evolution driver.
void forgetBlockAndLoopDispositions(Value *V=nullptr)
Called when the client has changed the disposition of values in a loop or block.
void forgetLoopDispositions()
Called when the client has changed the disposition of values in this loop.
bool remove(const value_type &X)
Remove an item from the set vector.
Definition: SetVector.h:168
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:152
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:83
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:93
Flags controlling how much is checked when sinking or hoisting instructions.
Definition: LoopUtils.h:113
SinkAndHoistLICMFlags(unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap, bool IsSink, Loop &L, MemorySSA &MSSA)
Definition: LICM.cpp:375
unsigned LicmMssaNoAccForPromotionCap
Definition: LoopUtils.h:132
A version of PriorityWorklist that selects small size optimized data structures for the vector and ma...
size_type size() const
Definition: SmallPtrSet.h:93
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:379
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
iterator begin() const
Definition: SmallPtrSet.h:403
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:389
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:312
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:941
void reserve(size_type N)
Definition: SmallVector.h:667
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
An instruction for storing to memory.
Definition: Instructions.h:301
void setAlignment(Align Align)
Definition: Instructions.h:349
void setOrdering(AtomicOrdering Ordering)
Sets the ordering constraint of this store instruction.
Definition: Instructions.h:360
static unsigned getPointerOperandIndex()
Definition: Instructions.h:395
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Provides information about what library functions are available for the current target.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCC_Free
Expected to fold away in lowering.
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
Definition: TinyPtrVector.h:29
EltTy front() const
unsigned size() const
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static IntegerType * getInt8Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:229
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUser() const
Return true if there is exactly one user of this value.
Definition: Value.cpp:157
std::string getNameOrAsOperand() const
Definition: Value.cpp:446
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:535
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1069
user_iterator_impl< User > user_iterator
Definition: Value.h:390
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:384
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:182
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:166
An efficient, type-erasing, non-owning reference to a callable.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap > m_NSWAdd(const LHS &L, const RHS &R)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void ReplaceInstWithInst(BasicBlock *BB, BasicBlock::iterator &BI, Instruction *I)
Replace the instruction specified by BI with the instruction specified by I.
SmallVector< DomTreeNode *, 16 > collectChildrenInLoop(DomTreeNode *N, const Loop *CurLoop)
Does a BFS from a given node to all of its children inside a given loop.
Definition: LoopUtils.cpp:450
Interval::succ_iterator succ_end(Interval *I)
Definition: Interval.h:102
@ NeverOverflows
Never overflows.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1819
OverflowResult computeOverflowForSignedSub(const Value *LHS, const Value *RHS, const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT)
bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, Loop *CurLoop, MemorySSAUpdater &MSSAU, bool TargetExecutesOncePerLoop, SinkAndHoistLICMFlags &LICMFlags, OptimizationRemarkEmitter *ORE=nullptr)
Returns true if is legal to hoist or sink this instruction disregarding the possible introduction of ...
Definition: LICM.cpp:1160
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
Definition: SetOperations.h:40
void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition: Utils.cpp:1367
void initializeLegacyLICMPassPass(PassRegistry &)
bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, Align Alignment, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Returns true if V is always a dereferenceable pointer with alignment greater or equal than requested.
Definition: Loads.cpp:201
bool PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures, bool StoreCaptures, const Instruction *I, const DominatorTree *DT, bool IncludeI=false, unsigned MaxUsesToExplore=0, const LoopInfo *LI=nullptr)
PointerMayBeCapturedBefore - Return true if this pointer value may be captured by the enclosing funct...
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:99
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value,...
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:748
bool isNoAliasCall(const Value *V)
Return true if this pointer is returned by a noalias function.
Pass * createLICMPass()
Definition: LICM.cpp:362
Interval::pred_iterator pred_end(Interval *I)
Definition: Interval.h:112
bool hoistRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *, AssumptionCache *, TargetLibraryInfo *, Loop *, MemorySSAUpdater &, ScalarEvolution *, ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *, bool, bool AllowSpeculation)
Walk the specified region of the CFG (defined by all blocks dominated by the specified block,...
Definition: LICM.cpp:863
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1826
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:398
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....
Definition: GuardUtils.cpp:18
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:511
bool isModSet(const ModRefInfo MRI)
Definition: ModRef.h:48
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:109
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
bool isModOrRefSet(const ModRefInfo MRI)
Definition: ModRef.h:42
bool isNotVisibleOnUnwind(const Value *Object, bool &RequiresNoCaptureBeforeUnwind)
Return true if Object memory is not visible after an unwind, in the sense that program semantics cann...
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
Definition: LoopUtils.cpp:141
bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI)
Put a loop nest into LCSSA form.
Definition: LCSSA.cpp:397
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
Definition: ModRef.h:27
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:89
bool salvageKnowledge(Instruction *I, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Calls BuildAssumeFromInst and if the resulting llvm.assume is valid insert if before I.
bool hasDisableLICMTransformsHint(const Loop *L)
Look for the loop attribute that disables the LICM transformation heuristics.
Definition: LoopUtils.cpp:348
void appendLoopsToWorklist(RangeT &&, SmallPriorityWorklist< Loop *, 4 > &)
Utility that implements appending of loops onto a worklist given a range.
Definition: LoopUtils.cpp:1577
bool promoteLoopAccessesToScalars(const SmallSetVector< Value *, 8 > &, SmallVectorImpl< BasicBlock * > &, SmallVectorImpl< Instruction * > &, SmallVectorImpl< MemoryAccess * > &, PredIteratorCache &, LoopInfo *, DominatorTree *, AssumptionCache *AC, const TargetLibraryInfo *, TargetTransformInfo *, Loop *, MemorySSAUpdater &, ICFLoopSafetyInfo *, OptimizationRemarkEmitter *, bool AllowSpeculation, bool HasReadsOutsideSet)
Try to promote memory values to scalars by sinking stores out of the loop and moving loads to before ...
Definition: LICM.cpp:1965
bool isIdentifiedFunctionLocal(const Value *V)
Return true if V is umabigously identified at the function-level.
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1946
bool isKnownNonNegative(const Value *V, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Returns true if the give value is known to be non-negative.
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1846
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2113
auto predecessors(const MachineBasicBlock *BB)
bool sinkRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *, TargetLibraryInfo *, TargetTransformInfo *, Loop *CurLoop, MemorySSAUpdater &, ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *, Loop *OutermostLoop=nullptr)
Walk the specified region of the CFG (defined by all blocks dominated by the specified block,...
Definition: LICM.cpp:542
cl::opt< unsigned > SetLicmMssaNoAccForPromotionCap
unsigned pred_size(const MachineBasicBlock *BB)
cl::opt< unsigned > SetLicmMssaOptCap
bool sinkRegionForLoopNest(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *, TargetLibraryInfo *, TargetTransformInfo *, Loop *, MemorySSAUpdater &, ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *)
Call sinkRegion on loops contained within the specified loop in order from innermost to outermost.
Definition: LICM.cpp:610
Type * getLoadStoreType(Value *I)
A helper function that returns the type of a load or store instruction.
Definition: BitVector.h:858
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:651
AAMDNodes merge(const AAMDNodes &Other) const
Given two sets of AAMDNodes applying to potentially different locations, determine the best AAMDNodes...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
unsigned MssaOptCap
Definition: LICM.h:49
unsigned MssaNoAccForPromotionCap
Definition: LICM.h:50
bool AllowSpeculation
Definition: LICM.h:51
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
A lightweight accessor for an operand bundle meant to be passed around by value.
Definition: InstrTypes.h:1085
uint32_t getTagID() const
Return the tag of this operand bundle as an integer.
Definition: InstrTypes.h:1113
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition: PassManager.h:371