LLVM  9.0.0svn
LICM.cpp
Go to the documentation of this file.
1 //===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass performs loop invariant code motion, attempting to remove as much
10 // code from the body of a loop as possible. It does this by either hoisting
11 // code into the preheader block, or by sinking code to the exit blocks if it is
12 // safe. This pass also promotes must-aliased memory locations in the loop to
13 // live in registers, thus hoisting and sinking "invariant" loads and stores.
14 //
15 // This pass uses alias analysis for two purposes:
16 //
17 // 1. Moving loop invariant loads and calls out of loops. If we can determine
18 // that a load or call inside of a loop never aliases anything stored to,
19 // we can hoist it or sink it like any other instruction.
20 // 2. Scalar Promotion of Memory - If there is a store instruction inside of
21 // the loop, we try to move the store to happen AFTER the loop instead of
22 // inside of the loop. This can only happen if a few conditions are true:
23 // A. The pointer stored through is loop invariant
24 // B. There are no stores or loads in the loop which _may_ alias the
25 // pointer. There are no calls in the loop which mod/ref the pointer.
26 // If these conditions are true, we can promote the loads and stores in the
27 // loop of the pointer to use a temporary alloca'd variable. We then use
28 // the SSAUpdater to construct the appropriate SSA form for the value.
29 //
30 //===----------------------------------------------------------------------===//
31 
33 #include "llvm/ADT/SetOperations.h"
34 #include "llvm/ADT/Statistic.h"
42 #include "llvm/Analysis/Loads.h"
43 #include "llvm/Analysis/LoopInfo.h"
45 #include "llvm/Analysis/LoopPass.h"
54 #include "llvm/IR/CFG.h"
55 #include "llvm/IR/Constants.h"
56 #include "llvm/IR/DataLayout.h"
58 #include "llvm/IR/DerivedTypes.h"
59 #include "llvm/IR/Dominators.h"
60 #include "llvm/IR/Instructions.h"
61 #include "llvm/IR/IntrinsicInst.h"
62 #include "llvm/IR/LLVMContext.h"
63 #include "llvm/IR/Metadata.h"
64 #include "llvm/IR/PatternMatch.h"
67 #include "llvm/Support/Debug.h"
69 #include "llvm/Transforms/Scalar.h"
75 #include <algorithm>
76 #include <utility>
77 using namespace llvm;
78 
79 #define DEBUG_TYPE "licm"
80 
81 STATISTIC(NumCreatedBlocks, "Number of blocks created");
82 STATISTIC(NumClonedBranches, "Number of branches cloned");
83 STATISTIC(NumSunk, "Number of instructions sunk out of loop");
84 STATISTIC(NumHoisted, "Number of instructions hoisted out of loop");
85 STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk");
86 STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk");
87 STATISTIC(NumPromoted, "Number of memory locations promoted to registers");
88 
89 /// Memory promotion is enabled by default.
90 static cl::opt<bool>
91  DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false),
92  cl::desc("Disable memory promotion in LICM pass"));
93 
95  "licm-control-flow-hoisting", cl::Hidden, cl::init(false),
96  cl::desc("Enable control flow (and PHI) hoisting in LICM"));
97 
99  "licm-max-num-uses-traversed", cl::Hidden, cl::init(8),
100  cl::desc("Max num uses visited for identifying load "
101  "invariance in loop using invariant start (default = 8)"));
102 
103 // Default value of zero implies we use the regular alias set tracker mechanism
104 // instead of the cross product using AA to identify aliasing of the memory
105 // location we are interested in.
106 static cl::opt<int>
107 LICMN2Theshold("licm-n2-threshold", cl::Hidden, cl::init(0),
108  cl::desc("How many instruction to cross product using AA"));
109 
110 // Experimental option to allow imprecision in LICM in pathological cases, in
111 // exchange for faster compile. This is to be removed if MemorySSA starts to
112 // address the same issue. This flag applies only when LICM uses MemorySSA
113 // instead on AliasSetTracker. LICM calls MemorySSAWalker's
114 // getClobberingMemoryAccess, up to the value of the Cap, getting perfect
115 // accuracy. Afterwards, LICM will call into MemorySSA's getDefiningAccess,
116 // which may not be precise, since optimizeUses is capped. The result is
117 // correct, but we may not get as "far up" as possible to get which access is
118 // clobbering the one queried.
120  "licm-mssa-optimization-cap", cl::init(100), cl::Hidden,
121  cl::desc("Enable imprecision in LICM in pathological cases, in exchange "
122  "for faster compile. Caps the MemorySSA clobbering calls."));
123 
124 // Experimentally, memory promotion carries less importance than sinking and
125 // hoisting. Limit when we do promotion when using MemorySSA, in order to save
126 // compile time.
128  "licm-mssa-max-acc-promotion", cl::init(250), cl::Hidden,
129  cl::desc("[LICM & MemorySSA] When MSSA in LICM is disabled, this has no "
130  "effect. When MSSA in LICM is enabled, then this is the maximum "
131  "number of accesses allowed to be present in a loop in order to "
132  "enable memory promotion."));
133 
134 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
135 static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
136  const LoopSafetyInfo *SafetyInfo,
137  TargetTransformInfo *TTI, bool &FreeInLoop);
138 static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
139  BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
141 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
142  const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
145  const DominatorTree *DT,
146  const Loop *CurLoop,
147  const LoopSafetyInfo *SafetyInfo,
149  const Instruction *CtxI = nullptr);
150 static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,
151  AliasSetTracker *CurAST, Loop *CurLoop,
152  AliasAnalysis *AA);
154  Loop *CurLoop,
155  SinkAndHoistLICMFlags &Flags);
157  Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
158  const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU);
159 
160 static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
161  AliasSetTracker *AST, MemorySSAUpdater *MSSAU);
162 
163 static void moveInstructionBefore(Instruction &I, Instruction &Dest,
164  ICFLoopSafetyInfo &SafetyInfo,
165  MemorySSAUpdater *MSSAU);
166 
167 namespace {
168 struct LoopInvariantCodeMotion {
170  bool runOnLoop(Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT,
172  ScalarEvolution *SE, MemorySSA *MSSA,
173  OptimizationRemarkEmitter *ORE, bool DeleteAST);
174 
175  ASTrackerMapTy &getLoopToAliasSetMap() { return LoopToAliasSetMap; }
176  LoopInvariantCodeMotion(unsigned LicmMssaOptCap,
177  unsigned LicmMssaNoAccForPromotionCap)
178  : LicmMssaOptCap(LicmMssaOptCap),
179  LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap) {}
180 
181 private:
182  ASTrackerMapTy LoopToAliasSetMap;
183  unsigned LicmMssaOptCap;
184  unsigned LicmMssaNoAccForPromotionCap;
185 
186  std::unique_ptr<AliasSetTracker>
187  collectAliasInfoForLoop(Loop *L, LoopInfo *LI, AliasAnalysis *AA);
188  std::unique_ptr<AliasSetTracker>
189  collectAliasInfoForLoopWithMSSA(Loop *L, AliasAnalysis *AA,
190  MemorySSAUpdater *MSSAU);
191 };
192 
193 struct LegacyLICMPass : public LoopPass {
194  static char ID; // Pass identification, replacement for typeid
195  LegacyLICMPass(
196  unsigned LicmMssaOptCap = SetLicmMssaOptCap,
197  unsigned LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap)
198  : LoopPass(ID), LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap) {
200  }
201 
202  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
203  if (skipLoop(L)) {
204  // If we have run LICM on a previous loop but now we are skipping
205  // (because we've hit the opt-bisect limit), we need to clear the
206  // loop alias information.
207  LICM.getLoopToAliasSetMap().clear();
208  return false;
209  }
210 
211  auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
213  ? (&getAnalysis<MemorySSAWrapperPass>().getMSSA())
214  : nullptr;
215  // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
216  // pass. Function analyses need to be preserved across loop transformations
217  // but ORE cannot be preserved (see comment before the pass definition).
219  return LICM.runOnLoop(L,
220  &getAnalysis<AAResultsWrapperPass>().getAAResults(),
221  &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
222  &getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
223  &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
224  &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
225  *L->getHeader()->getParent()),
226  SE ? &SE->getSE() : nullptr, MSSA, &ORE, false);
227  }
228 
229  /// This transformation requires natural loop information & requires that
230  /// loop preheaders be inserted into the CFG...
231  ///
232  void getAnalysisUsage(AnalysisUsage &AU) const override {
239  }
242  }
243 
245 
246  bool doFinalization() override {
247  auto &AliasSetMap = LICM.getLoopToAliasSetMap();
248  // All loops in the AliasSetMap should be cleaned up already. The only case
249  // where we fail to do so is if an outer loop gets deleted before LICM
250  // visits it.
251  assert(all_of(AliasSetMap,
253  return !KV.first->getParentLoop();
254  }) &&
255  "Didn't free loop alias sets");
256  AliasSetMap.clear();
257  return false;
258  }
259 
260 private:
261  LoopInvariantCodeMotion LICM;
262 
263  /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
264  void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To,
265  Loop *L) override;
266 
267  /// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias
268  /// set.
269  void deleteAnalysisValue(Value *V, Loop *L) override;
270 
271  /// Simple Analysis hook. Delete loop L from alias set map.
272  void deleteAnalysisLoop(Loop *L) override;
273 };
274 } // namespace
275 
278  const auto &FAM =
279  AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
280  Function *F = L.getHeader()->getParent();
281 
282  auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F);
283  // FIXME: This should probably be optional rather than required.
284  if (!ORE)
285  report_fatal_error("LICM: OptimizationRemarkEmitterAnalysis not "
286  "cached at a higher level");
287 
288  LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap);
289  if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.TTI, &AR.SE,
290  AR.MSSA, ORE, true))
291  return PreservedAnalyses::all();
292 
293  auto PA = getLoopPassPreservedAnalyses();
294 
295  PA.preserve<DominatorTreeAnalysis>();
296  PA.preserve<LoopAnalysis>();
297 
298  return PA;
299 }
300 
301 char LegacyLICMPass::ID = 0;
302 INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion",
303  false, false)
308 INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false,
309  false)
310 
311 Pass *llvm::createLICMPass() { return new LegacyLICMPass(); }
312 Pass *llvm::createLICMPass(unsigned LicmMssaOptCap,
313  unsigned LicmMssaNoAccForPromotionCap) {
314  return new LegacyLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap);
315 }
316 
317 /// Hoist expressions out of the specified loop. Note, alias info for inner
318 /// loop is not preserved so it is not a good idea to run LICM multiple
319 /// times on one loop.
320 /// We should delete AST for inner loops in the new pass manager to avoid
321 /// memory leak.
322 ///
323 bool LoopInvariantCodeMotion::runOnLoop(
324  Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT,
326  MemorySSA *MSSA, OptimizationRemarkEmitter *ORE, bool DeleteAST) {
327  bool Changed = false;
328 
329  assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
330 
331  std::unique_ptr<AliasSetTracker> CurAST;
332  std::unique_ptr<MemorySSAUpdater> MSSAU;
333  bool NoOfMemAccTooLarge = false;
334  unsigned LicmMssaOptCounter = 0;
335 
336  if (!MSSA) {
337  LLVM_DEBUG(dbgs() << "LICM: Using Alias Set Tracker.\n");
338  CurAST = collectAliasInfoForLoop(L, LI, AA);
339  } else {
340  LLVM_DEBUG(dbgs() << "LICM: Using MemorySSA.\n");
341  MSSAU = make_unique<MemorySSAUpdater>(MSSA);
342 
343  unsigned AccessCapCount = 0;
344  for (auto *BB : L->getBlocks()) {
345  if (auto *Accesses = MSSA->getBlockAccesses(BB)) {
346  for (const auto &MA : *Accesses) {
347  (void)MA;
348  AccessCapCount++;
349  if (AccessCapCount > LicmMssaNoAccForPromotionCap) {
350  NoOfMemAccTooLarge = true;
351  break;
352  }
353  }
354  }
355  if (NoOfMemAccTooLarge)
356  break;
357  }
358  }
359 
360  // Get the preheader block to move instructions into...
361  BasicBlock *Preheader = L->getLoopPreheader();
362 
363  // Compute loop safety information.
364  ICFLoopSafetyInfo SafetyInfo(DT);
365  SafetyInfo.computeLoopSafetyInfo(L);
366 
367  // We want to visit all of the instructions in this loop... that are not parts
368  // of our subloops (they have already had their invariants hoisted out of
369  // their loop, into this loop, so there is no need to process the BODIES of
370  // the subloops).
371  //
372  // Traverse the body of the loop in depth first order on the dominator tree so
373  // that we are guaranteed to see definitions before we see uses. This allows
374  // us to sink instructions in one pass, without iteration. After sinking
375  // instructions, we perform another pass to hoist them out of the loop.
376  SinkAndHoistLICMFlags Flags = {NoOfMemAccTooLarge, LicmMssaOptCounter,
377  LicmMssaOptCap, LicmMssaNoAccForPromotionCap};
378  if (L->hasDedicatedExits())
379  Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
380  CurAST.get(), MSSAU.get(), &SafetyInfo, Flags, ORE);
381  if (Preheader)
382  Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
383  CurAST.get(), MSSAU.get(), &SafetyInfo, Flags, ORE);
384 
385  // Now that all loop invariants have been removed from the loop, promote any
386  // memory references to scalars that we can.
387  // Don't sink stores from loops without dedicated block exits. Exits
388  // containing indirect branches are not transformed by loop simplify,
389  // make sure we catch that. An additional load may be generated in the
390  // preheader for SSA updater, so also avoid sinking when no preheader
391  // is available.
392  if (!DisablePromotion && Preheader && L->hasDedicatedExits() &&
393  !NoOfMemAccTooLarge) {
394  // Figure out the loop exits and their insertion points
395  SmallVector<BasicBlock *, 8> ExitBlocks;
396  L->getUniqueExitBlocks(ExitBlocks);
397 
398  // We can't insert into a catchswitch.
399  bool HasCatchSwitch = llvm::any_of(ExitBlocks, [](BasicBlock *Exit) {
400  return isa<CatchSwitchInst>(Exit->getTerminator());
401  });
402 
403  if (!HasCatchSwitch) {
405  SmallVector<MemoryAccess *, 8> MSSAInsertPts;
406  InsertPts.reserve(ExitBlocks.size());
407  if (MSSAU)
408  MSSAInsertPts.reserve(ExitBlocks.size());
409  for (BasicBlock *ExitBlock : ExitBlocks) {
410  InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
411  if (MSSAU)
412  MSSAInsertPts.push_back(nullptr);
413  }
414 
415  PredIteratorCache PIC;
416 
417  bool Promoted = false;
418 
419  // Build an AST using MSSA.
420  if (!CurAST.get())
421  CurAST = collectAliasInfoForLoopWithMSSA(L, AA, MSSAU.get());
422 
423  // Loop over all of the alias sets in the tracker object.
424  for (AliasSet &AS : *CurAST) {
425  // We can promote this alias set if it has a store, if it is a "Must"
426  // alias set, if the pointer is loop invariant, and if we are not
427  // eliminating any volatile loads or stores.
428  if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
429  !L->isLoopInvariant(AS.begin()->getValue()))
430  continue;
431 
432  assert(
433  !AS.empty() &&
434  "Must alias set should have at least one pointer element in it!");
435 
436  SmallSetVector<Value *, 8> PointerMustAliases;
437  for (const auto &ASI : AS)
438  PointerMustAliases.insert(ASI.getValue());
439 
440  Promoted |= promoteLoopAccessesToScalars(
441  PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI,
442  DT, TLI, L, CurAST.get(), MSSAU.get(), &SafetyInfo, ORE);
443  }
444 
445  // Once we have promoted values across the loop body we have to
446  // recursively reform LCSSA as any nested loop may now have values defined
447  // within the loop used in the outer loop.
448  // FIXME: This is really heavy handed. It would be a bit better to use an
449  // SSAUpdater strategy during promotion that was LCSSA aware and reformed
450  // it as it went.
451  if (Promoted)
452  formLCSSARecursively(*L, *DT, LI, SE);
453 
454  Changed |= Promoted;
455  }
456  }
457 
458  // Check that neither this loop nor its parent have had LCSSA broken. LICM is
459  // specifically moving instructions across the loop boundary and so it is
460  // especially in need of sanity checking here.
461  assert(L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!");
462  assert((!L->getParentLoop() || L->getParentLoop()->isLCSSAForm(*DT)) &&
463  "Parent loop not left in LCSSA form after LICM!");
464 
465  // If this loop is nested inside of another one, save the alias information
466  // for when we process the outer loop.
467  if (!MSSAU.get() && CurAST.get() && L->getParentLoop() && !DeleteAST)
468  LoopToAliasSetMap[L] = std::move(CurAST);
469 
470  if (MSSAU.get() && VerifyMemorySSA)
471  MSSAU->getMemorySSA()->verifyMemorySSA();
472 
473  if (Changed && SE)
474  SE->forgetLoopDispositions(L);
475  return Changed;
476 }
477 
478 /// Walk the specified region of the CFG (defined by all blocks dominated by
479 /// the specified block, and that are in the current loop) in reverse depth
480 /// first order w.r.t the DominatorTree. This allows us to visit uses before
481 /// definitions, allowing us to sink a loop body in one pass without iteration.
482 ///
485  TargetTransformInfo *TTI, Loop *CurLoop,
486  AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
487  ICFLoopSafetyInfo *SafetyInfo,
488  SinkAndHoistLICMFlags &Flags,
490 
491  // Verify inputs.
492  assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
493  CurLoop != nullptr && SafetyInfo != nullptr &&
494  "Unexpected input to sinkRegion.");
495  assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) &&
496  "Either AliasSetTracker or MemorySSA should be initialized.");
497 
498  // We want to visit children before parents. We will enque all the parents
499  // before their children in the worklist and process the worklist in reverse
500  // order.
502 
503  bool Changed = false;
504  for (DomTreeNode *DTN : reverse(Worklist)) {
505  BasicBlock *BB = DTN->getBlock();
506  // Only need to process the contents of this block if it is not part of a
507  // subloop (which would already have been processed).
508  if (inSubLoop(BB, CurLoop, LI))
509  continue;
510 
511  for (BasicBlock::iterator II = BB->end(); II != BB->begin();) {
512  Instruction &I = *--II;
513 
514  // If the instruction is dead, we would try to sink it because it isn't
515  // used in the loop, instead, just delete it.
516  if (isInstructionTriviallyDead(&I, TLI)) {
517  LLVM_DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n');
518  salvageDebugInfo(I);
519  ++II;
520  eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
521  Changed = true;
522  continue;
523  }
524 
525  // Check to see if we can sink this instruction to the exit blocks
526  // of the loop. We can do this if the all users of the instruction are
527  // outside of the loop. In this case, it doesn't even matter if the
528  // operands of the instruction are loop invariant.
529  //
530  bool FreeInLoop = false;
531  if (isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) &&
532  canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags,
533  ORE) &&
534  !I.mayHaveSideEffects()) {
535  if (sink(I, LI, DT, CurLoop, SafetyInfo, MSSAU, ORE)) {
536  if (!FreeInLoop) {
537  ++II;
538  eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
539  }
540  Changed = true;
541  }
542  }
543  }
544  }
545  if (MSSAU && VerifyMemorySSA)
546  MSSAU->getMemorySSA()->verifyMemorySSA();
547  return Changed;
548 }
549 
550 namespace {
551 // This is a helper class for hoistRegion to make it able to hoist control flow
552 // in order to be able to hoist phis. The way this works is that we initially
553 // start hoisting to the loop preheader, and when we see a loop invariant branch
554 // we make note of this. When we then come to hoist an instruction that's
555 // conditional on such a branch we duplicate the branch and the relevant control
556 // flow, then hoist the instruction into the block corresponding to its original
557 // block in the duplicated control flow.
558 class ControlFlowHoister {
559 private:
560  // Information about the loop we are hoisting from
561  LoopInfo *LI;
562  DominatorTree *DT;
563  Loop *CurLoop;
564  MemorySSAUpdater *MSSAU;
565 
566  // A map of blocks in the loop to the block their instructions will be hoisted
567  // to.
568  DenseMap<BasicBlock *, BasicBlock *> HoistDestinationMap;
569 
570  // The branches that we can hoist, mapped to the block that marks a
571  // convergence point of their control flow.
572  DenseMap<BranchInst *, BasicBlock *> HoistableBranches;
573 
574 public:
575  ControlFlowHoister(LoopInfo *LI, DominatorTree *DT, Loop *CurLoop,
576  MemorySSAUpdater *MSSAU)
577  : LI(LI), DT(DT), CurLoop(CurLoop), MSSAU(MSSAU) {}
578 
579  void registerPossiblyHoistableBranch(BranchInst *BI) {
580  // We can only hoist conditional branches with loop invariant operands.
581  if (!ControlFlowHoisting || !BI->isConditional() ||
582  !CurLoop->hasLoopInvariantOperands(BI))
583  return;
584 
585  // The branch destinations need to be in the loop, and we don't gain
586  // anything by duplicating conditional branches with duplicate successors,
587  // as it's essentially the same as an unconditional branch.
588  BasicBlock *TrueDest = BI->getSuccessor(0);
589  BasicBlock *FalseDest = BI->getSuccessor(1);
590  if (!CurLoop->contains(TrueDest) || !CurLoop->contains(FalseDest) ||
591  TrueDest == FalseDest)
592  return;
593 
594  // We can hoist BI if one branch destination is the successor of the other,
595  // or both have common successor which we check by seeing if the
596  // intersection of their successors is non-empty.
597  // TODO: This could be expanded to allowing branches where both ends
598  // eventually converge to a single block.
599  SmallPtrSet<BasicBlock *, 4> TrueDestSucc, FalseDestSucc;
600  TrueDestSucc.insert(succ_begin(TrueDest), succ_end(TrueDest));
601  FalseDestSucc.insert(succ_begin(FalseDest), succ_end(FalseDest));
602  BasicBlock *CommonSucc = nullptr;
603  if (TrueDestSucc.count(FalseDest)) {
604  CommonSucc = FalseDest;
605  } else if (FalseDestSucc.count(TrueDest)) {
606  CommonSucc = TrueDest;
607  } else {
608  set_intersect(TrueDestSucc, FalseDestSucc);
609  // If there's one common successor use that.
610  if (TrueDestSucc.size() == 1)
611  CommonSucc = *TrueDestSucc.begin();
612  // If there's more than one pick whichever appears first in the block list
613  // (we can't use the value returned by TrueDestSucc.begin() as it's
614  // unpredicatable which element gets returned).
615  else if (!TrueDestSucc.empty()) {
616  Function *F = TrueDest->getParent();
617  auto IsSucc = [&](BasicBlock &BB) { return TrueDestSucc.count(&BB); };
618  auto It = std::find_if(F->begin(), F->end(), IsSucc);
619  assert(It != F->end() && "Could not find successor in function");
620  CommonSucc = &*It;
621  }
622  }
623  // The common successor has to be dominated by the branch, as otherwise
624  // there will be some other path to the successor that will not be
625  // controlled by this branch so any phi we hoist would be controlled by the
626  // wrong condition. This also takes care of avoiding hoisting of loop back
627  // edges.
628  // TODO: In some cases this could be relaxed if the successor is dominated
629  // by another block that's been hoisted and we can guarantee that the
630  // control flow has been replicated exactly.
631  if (CommonSucc && DT->dominates(BI, CommonSucc))
632  HoistableBranches[BI] = CommonSucc;
633  }
634 
635  bool canHoistPHI(PHINode *PN) {
636  // The phi must have loop invariant operands.
637  if (!ControlFlowHoisting || !CurLoop->hasLoopInvariantOperands(PN))
638  return false;
639  // We can hoist phis if the block they are in is the target of hoistable
640  // branches which cover all of the predecessors of the block.
641  SmallPtrSet<BasicBlock *, 8> PredecessorBlocks;
642  BasicBlock *BB = PN->getParent();
643  for (BasicBlock *PredBB : predecessors(BB))
644  PredecessorBlocks.insert(PredBB);
645  // If we have less predecessor blocks than predecessors then the phi will
646  // have more than one incoming value for the same block which we can't
647  // handle.
648  // TODO: This could be handled be erasing some of the duplicate incoming
649  // values.
650  if (PredecessorBlocks.size() != pred_size(BB))
651  return false;
652  for (auto &Pair : HoistableBranches) {
653  if (Pair.second == BB) {
654  // Which blocks are predecessors via this branch depends on if the
655  // branch is triangle-like or diamond-like.
656  if (Pair.first->getSuccessor(0) == BB) {
657  PredecessorBlocks.erase(Pair.first->getParent());
658  PredecessorBlocks.erase(Pair.first->getSuccessor(1));
659  } else if (Pair.first->getSuccessor(1) == BB) {
660  PredecessorBlocks.erase(Pair.first->getParent());
661  PredecessorBlocks.erase(Pair.first->getSuccessor(0));
662  } else {
663  PredecessorBlocks.erase(Pair.first->getSuccessor(0));
664  PredecessorBlocks.erase(Pair.first->getSuccessor(1));
665  }
666  }
667  }
668  // PredecessorBlocks will now be empty if for every predecessor of BB we
669  // found a hoistable branch source.
670  return PredecessorBlocks.empty();
671  }
672 
673  BasicBlock *getOrCreateHoistedBlock(BasicBlock *BB) {
674  if (!ControlFlowHoisting)
675  return CurLoop->getLoopPreheader();
676  // If BB has already been hoisted, return that
677  if (HoistDestinationMap.count(BB))
678  return HoistDestinationMap[BB];
679 
680  // Check if this block is conditional based on a pending branch
681  auto HasBBAsSuccessor =
683  return BB != Pair.second && (Pair.first->getSuccessor(0) == BB ||
684  Pair.first->getSuccessor(1) == BB);
685  };
686  auto It = std::find_if(HoistableBranches.begin(), HoistableBranches.end(),
687  HasBBAsSuccessor);
688 
689  // If not involved in a pending branch, hoist to preheader
690  BasicBlock *InitialPreheader = CurLoop->getLoopPreheader();
691  if (It == HoistableBranches.end()) {
692  LLVM_DEBUG(dbgs() << "LICM using " << InitialPreheader->getName()
693  << " as hoist destination for " << BB->getName()
694  << "\n");
695  HoistDestinationMap[BB] = InitialPreheader;
696  return InitialPreheader;
697  }
698  BranchInst *BI = It->first;
699  assert(std::find_if(++It, HoistableBranches.end(), HasBBAsSuccessor) ==
700  HoistableBranches.end() &&
701  "BB is expected to be the target of at most one branch");
702 
703  LLVMContext &C = BB->getContext();
704  BasicBlock *TrueDest = BI->getSuccessor(0);
705  BasicBlock *FalseDest = BI->getSuccessor(1);
706  BasicBlock *CommonSucc = HoistableBranches[BI];
707  BasicBlock *HoistTarget = getOrCreateHoistedBlock(BI->getParent());
708 
709  // Create hoisted versions of blocks that currently don't have them
710  auto CreateHoistedBlock = [&](BasicBlock *Orig) {
711  if (HoistDestinationMap.count(Orig))
712  return HoistDestinationMap[Orig];
713  BasicBlock *New =
714  BasicBlock::Create(C, Orig->getName() + ".licm", Orig->getParent());
715  HoistDestinationMap[Orig] = New;
716  DT->addNewBlock(New, HoistTarget);
717  if (CurLoop->getParentLoop())
718  CurLoop->getParentLoop()->addBasicBlockToLoop(New, *LI);
719  ++NumCreatedBlocks;
720  LLVM_DEBUG(dbgs() << "LICM created " << New->getName()
721  << " as hoist destination for " << Orig->getName()
722  << "\n");
723  return New;
724  };
725  BasicBlock *HoistTrueDest = CreateHoistedBlock(TrueDest);
726  BasicBlock *HoistFalseDest = CreateHoistedBlock(FalseDest);
727  BasicBlock *HoistCommonSucc = CreateHoistedBlock(CommonSucc);
728 
729  // Link up these blocks with branches.
730  if (!HoistCommonSucc->getTerminator()) {
731  // The new common successor we've generated will branch to whatever that
732  // hoist target branched to.
733  BasicBlock *TargetSucc = HoistTarget->getSingleSuccessor();
734  assert(TargetSucc && "Expected hoist target to have a single successor");
735  HoistCommonSucc->moveBefore(TargetSucc);
736  BranchInst::Create(TargetSucc, HoistCommonSucc);
737  }
738  if (!HoistTrueDest->getTerminator()) {
739  HoistTrueDest->moveBefore(HoistCommonSucc);
740  BranchInst::Create(HoistCommonSucc, HoistTrueDest);
741  }
742  if (!HoistFalseDest->getTerminator()) {
743  HoistFalseDest->moveBefore(HoistCommonSucc);
744  BranchInst::Create(HoistCommonSucc, HoistFalseDest);
745  }
746 
747  // If BI is being cloned to what was originally the preheader then
748  // HoistCommonSucc will now be the new preheader.
749  if (HoistTarget == InitialPreheader) {
750  // Phis in the loop header now need to use the new preheader.
751  InitialPreheader->replaceSuccessorsPhiUsesWith(HoistCommonSucc);
752  if (MSSAU)
754  HoistTarget->getSingleSuccessor(), HoistCommonSucc, {HoistTarget});
755  // The new preheader dominates the loop header.
756  DomTreeNode *PreheaderNode = DT->getNode(HoistCommonSucc);
757  DomTreeNode *HeaderNode = DT->getNode(CurLoop->getHeader());
758  DT->changeImmediateDominator(HeaderNode, PreheaderNode);
759  // The preheader hoist destination is now the new preheader, with the
760  // exception of the hoist destination of this branch.
761  for (auto &Pair : HoistDestinationMap)
762  if (Pair.second == InitialPreheader && Pair.first != BI->getParent())
763  Pair.second = HoistCommonSucc;
764  }
765 
766  // Now finally clone BI.
768  HoistTarget->getTerminator(),
769  BranchInst::Create(HoistTrueDest, HoistFalseDest, BI->getCondition()));
770  ++NumClonedBranches;
771 
772  assert(CurLoop->getLoopPreheader() &&
773  "Hoisting blocks should not have destroyed preheader");
774  return HoistDestinationMap[BB];
775  }
776 };
777 } // namespace
778 
779 /// Walk the specified region of the CFG (defined by all blocks dominated by
780 /// the specified block, and that are in the current loop) in depth first
781 /// order w.r.t the DominatorTree. This allows us to visit definitions before
782 /// uses, allowing us to hoist a loop body in one pass without iteration.
783 ///
785  DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop,
786  AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
787  ICFLoopSafetyInfo *SafetyInfo,
788  SinkAndHoistLICMFlags &Flags,
790  // Verify inputs.
791  assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
792  CurLoop != nullptr && SafetyInfo != nullptr &&
793  "Unexpected input to hoistRegion.");
794  assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) &&
795  "Either AliasSetTracker or MemorySSA should be initialized.");
796 
797  ControlFlowHoister CFH(LI, DT, CurLoop, MSSAU);
798 
799  // Keep track of instructions that have been hoisted, as they may need to be
800  // re-hoisted if they end up not dominating all of their uses.
801  SmallVector<Instruction *, 16> HoistedInstructions;
802 
803  // For PHI hoisting to work we need to hoist blocks before their successors.
804  // We can do this by iterating through the blocks in the loop in reverse
805  // post-order.
806  LoopBlocksRPO Worklist(CurLoop);
807  Worklist.perform(LI);
808  bool Changed = false;
809  for (BasicBlock *BB : Worklist) {
810  // Only need to process the contents of this block if it is not part of a
811  // subloop (which would already have been processed).
812  if (inSubLoop(BB, CurLoop, LI))
813  continue;
814 
815  for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) {
816  Instruction &I = *II++;
817  // Try constant folding this instruction. If all the operands are
818  // constants, it is technically hoistable, but it would be better to
819  // just fold it.
821  &I, I.getModule()->getDataLayout(), TLI)) {
822  LLVM_DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C
823  << '\n');
824  if (CurAST)
825  CurAST->copyValue(&I, C);
826  // FIXME MSSA: Such replacements may make accesses unoptimized (D51960).
828  if (isInstructionTriviallyDead(&I, TLI))
829  eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
830  Changed = true;
831  continue;
832  }
833 
834  // Try hoisting the instruction out to the preheader. We can only do
835  // this if all of the operands of the instruction are loop invariant and
836  // if it is safe to hoist the instruction.
837  // TODO: It may be safe to hoist if we are hoisting to a conditional block
838  // and we have accurately duplicated the control flow from the loop header
839  // to that block.
840  if (CurLoop->hasLoopInvariantOperands(&I) &&
841  canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags,
842  ORE) &&
844  I, DT, CurLoop, SafetyInfo, ORE,
845  CurLoop->getLoopPreheader()->getTerminator())) {
846  hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
847  MSSAU, ORE);
848  HoistedInstructions.push_back(&I);
849  Changed = true;
850  continue;
851  }
852 
853  // Attempt to remove floating point division out of the loop by
854  // converting it to a reciprocal multiplication.
855  if (I.getOpcode() == Instruction::FDiv &&
856  CurLoop->isLoopInvariant(I.getOperand(1)) &&
857  I.hasAllowReciprocal()) {
858  auto Divisor = I.getOperand(1);
859  auto One = llvm::ConstantFP::get(Divisor->getType(), 1.0);
860  auto ReciprocalDivisor = BinaryOperator::CreateFDiv(One, Divisor);
861  ReciprocalDivisor->setFastMathFlags(I.getFastMathFlags());
862  SafetyInfo->insertInstructionTo(ReciprocalDivisor, I.getParent());
863  ReciprocalDivisor->insertBefore(&I);
864 
865  auto Product =
866  BinaryOperator::CreateFMul(I.getOperand(0), ReciprocalDivisor);
867  Product->setFastMathFlags(I.getFastMathFlags());
868  SafetyInfo->insertInstructionTo(Product, I.getParent());
869  Product->insertAfter(&I);
870  I.replaceAllUsesWith(Product);
871  eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
872 
873  hoist(*ReciprocalDivisor, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB),
874  SafetyInfo, MSSAU, ORE);
875  HoistedInstructions.push_back(ReciprocalDivisor);
876  Changed = true;
877  continue;
878  }
879 
880  auto IsInvariantStart = [&](Instruction &I) {
881  using namespace PatternMatch;
882  return I.use_empty() &&
883  match(&I, m_Intrinsic<Intrinsic::invariant_start>());
884  };
885  auto MustExecuteWithoutWritesBefore = [&](Instruction &I) {
886  return SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop) &&
887  SafetyInfo->doesNotWriteMemoryBefore(I, CurLoop);
888  };
889  if ((IsInvariantStart(I) || isGuard(&I)) &&
890  CurLoop->hasLoopInvariantOperands(&I) &&
891  MustExecuteWithoutWritesBefore(I)) {
892  hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
893  MSSAU, ORE);
894  HoistedInstructions.push_back(&I);
895  Changed = true;
896  continue;
897  }
898 
899  if (PHINode *PN = dyn_cast<PHINode>(&I)) {
900  if (CFH.canHoistPHI(PN)) {
901  // Redirect incoming blocks first to ensure that we create hoisted
902  // versions of those blocks before we hoist the phi.
903  for (unsigned int i = 0; i < PN->getNumIncomingValues(); ++i)
904  PN->setIncomingBlock(
905  i, CFH.getOrCreateHoistedBlock(PN->getIncomingBlock(i)));
906  hoist(*PN, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
907  MSSAU, ORE);
908  assert(DT->dominates(PN, BB) && "Conditional PHIs not expected");
909  Changed = true;
910  continue;
911  }
912  }
913 
914  // Remember possibly hoistable branches so we can actually hoist them
915  // later if needed.
916  if (BranchInst *BI = dyn_cast<BranchInst>(&I))
917  CFH.registerPossiblyHoistableBranch(BI);
918  }
919  }
920 
921  // If we hoisted instructions to a conditional block they may not dominate
922  // their uses that weren't hoisted (such as phis where some operands are not
923  // loop invariant). If so make them unconditional by moving them to their
924  // immediate dominator. We iterate through the instructions in reverse order
925  // which ensures that when we rehoist an instruction we rehoist its operands,
926  // and also keep track of where in the block we are rehoisting to to make sure
927  // that we rehoist instructions before the instructions that use them.
928  Instruction *HoistPoint = nullptr;
929  if (ControlFlowHoisting) {
930  for (Instruction *I : reverse(HoistedInstructions)) {
931  if (!llvm::all_of(I->uses(),
932  [&](Use &U) { return DT->dominates(I, U); })) {
933  BasicBlock *Dominator =
934  DT->getNode(I->getParent())->getIDom()->getBlock();
935  if (!HoistPoint || !DT->dominates(HoistPoint->getParent(), Dominator)) {
936  if (HoistPoint)
937  assert(DT->dominates(Dominator, HoistPoint->getParent()) &&
938  "New hoist point expected to dominate old hoist point");
939  HoistPoint = Dominator->getTerminator();
940  }
941  LLVM_DEBUG(dbgs() << "LICM rehoisting to "
942  << HoistPoint->getParent()->getName()
943  << ": " << *I << "\n");
944  moveInstructionBefore(*I, *HoistPoint, *SafetyInfo, MSSAU);
945  HoistPoint = I;
946  Changed = true;
947  }
948  }
949  }
950  if (MSSAU && VerifyMemorySSA)
951  MSSAU->getMemorySSA()->verifyMemorySSA();
952 
953  // Now that we've finished hoisting make sure that LI and DT are still
954  // valid.
955 #ifndef NDEBUG
956  if (Changed) {
958  "Dominator tree verification failed");
959  LI->verify(*DT);
960  }
961 #endif
962 
963  return Changed;
964 }
965 
966 // Return true if LI is invariant within scope of the loop. LI is invariant if
967 // CurLoop is dominated by an invariant.start representing the same memory
968 // location and size as the memory location LI loads from, and also the
969 // invariant.start has no uses.
971  Loop *CurLoop) {
972  Value *Addr = LI->getOperand(0);
973  const DataLayout &DL = LI->getModule()->getDataLayout();
974  const uint32_t LocSizeInBits = DL.getTypeSizeInBits(
975  cast<PointerType>(Addr->getType())->getElementType());
976 
977  // if the type is i8 addrspace(x)*, we know this is the type of
978  // llvm.invariant.start operand
979  auto *PtrInt8Ty = PointerType::get(Type::getInt8Ty(LI->getContext()),
980  LI->getPointerAddressSpace());
981  unsigned BitcastsVisited = 0;
982  // Look through bitcasts until we reach the i8* type (this is invariant.start
983  // operand type).
984  while (Addr->getType() != PtrInt8Ty) {
985  auto *BC = dyn_cast<BitCastInst>(Addr);
986  // Avoid traversing high number of bitcast uses.
987  if (++BitcastsVisited > MaxNumUsesTraversed || !BC)
988  return false;
989  Addr = BC->getOperand(0);
990  }
991 
992  unsigned UsesVisited = 0;
993  // Traverse all uses of the load operand value, to see if invariant.start is
994  // one of the uses, and whether it dominates the load instruction.
995  for (auto *U : Addr->users()) {
996  // Avoid traversing for Load operand with high number of users.
997  if (++UsesVisited > MaxNumUsesTraversed)
998  return false;
1000  // If there are escaping uses of invariant.start instruction, the load maybe
1001  // non-invariant.
1002  if (!II || II->getIntrinsicID() != Intrinsic::invariant_start ||
1003  !II->use_empty())
1004  continue;
1005  unsigned InvariantSizeInBits =
1006  cast<ConstantInt>(II->getArgOperand(0))->getSExtValue() * 8;
1007  // Confirm the invariant.start location size contains the load operand size
1008  // in bits. Also, the invariant.start should dominate the load, and we
1009  // should not hoist the load out of a loop that contains this dominating
1010  // invariant.start.
1011  if (LocSizeInBits <= InvariantSizeInBits &&
1012  DT->properlyDominates(II->getParent(), CurLoop->getHeader()))
1013  return true;
1014  }
1015 
1016  return false;
1017 }
1018 
1019 namespace {
1020 /// Return true if-and-only-if we know how to (mechanically) both hoist and
1021 /// sink a given instruction out of a loop. Does not address legality
1022 /// concerns such as aliasing or speculation safety.
1023 bool isHoistableAndSinkableInst(Instruction &I) {
1024  // Only these instructions are hoistable/sinkable.
1025  return (isa<LoadInst>(I) || isa<StoreInst>(I) || isa<CallInst>(I) ||
1026  isa<FenceInst>(I) || isa<BinaryOperator>(I) || isa<CastInst>(I) ||
1027  isa<SelectInst>(I) || isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
1028  isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
1029  isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) ||
1030  isa<InsertValueInst>(I));
1031 }
1032 /// Return true if all of the alias sets within this AST are known not to
1033 /// contain a Mod, or if MSSA knows thare are no MemoryDefs in the loop.
1034 bool isReadOnly(AliasSetTracker *CurAST, const MemorySSAUpdater *MSSAU,
1035  const Loop *L) {
1036  if (CurAST) {
1037  for (AliasSet &AS : *CurAST) {
1038  if (!AS.isForwardingAliasSet() && AS.isMod()) {
1039  return false;
1040  }
1041  }
1042  return true;
1043  } else { /*MSSAU*/
1044  for (auto *BB : L->getBlocks())
1045  if (MSSAU->getMemorySSA()->getBlockDefs(BB))
1046  return false;
1047  return true;
1048  }
1049 }
1050 
1051 /// Return true if I is the only Instruction with a MemoryAccess in L.
1052 bool isOnlyMemoryAccess(const Instruction *I, const Loop *L,
1053  const MemorySSAUpdater *MSSAU) {
1054  for (auto *BB : L->getBlocks())
1055  if (auto *Accs = MSSAU->getMemorySSA()->getBlockAccesses(BB)) {
1056  int NotAPhi = 0;
1057  for (const auto &Acc : *Accs) {
1058  if (isa<MemoryPhi>(&Acc))
1059  continue;
1060  const auto *MUD = cast<MemoryUseOrDef>(&Acc);
1061  if (MUD->getMemoryInst() != I || NotAPhi++ == 1)
1062  return false;
1063  }
1064  }
1065  return true;
1066 }
1067 }
1068 
1070  Loop *CurLoop, AliasSetTracker *CurAST,
1071  MemorySSAUpdater *MSSAU,
1072  bool TargetExecutesOncePerLoop,
1073  SinkAndHoistLICMFlags *Flags,
1075  // If we don't understand the instruction, bail early.
1076  if (!isHoistableAndSinkableInst(I))
1077  return false;
1078 
1079  MemorySSA *MSSA = MSSAU ? MSSAU->getMemorySSA() : nullptr;
1080  if (MSSA)
1081  assert(Flags != nullptr && "Flags cannot be null.");
1082 
1083  // Loads have extra constraints we have to verify before we can hoist them.
1084  if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
1085  if (!LI->isUnordered())
1086  return false; // Don't sink/hoist volatile or ordered atomic loads!
1087 
1088  // Loads from constant memory are always safe to move, even if they end up
1089  // in the same alias set as something that ends up being modified.
1090  if (AA->pointsToConstantMemory(LI->getOperand(0)))
1091  return true;
1092  if (LI->getMetadata(LLVMContext::MD_invariant_load))
1093  return true;
1094 
1095  if (LI->isAtomic() && !TargetExecutesOncePerLoop)
1096  return false; // Don't risk duplicating unordered loads
1097 
1098  // This checks for an invariant.start dominating the load.
1099  if (isLoadInvariantInLoop(LI, DT, CurLoop))
1100  return true;
1101 
1102  bool Invalidated;
1103  if (CurAST)
1104  Invalidated = pointerInvalidatedByLoop(MemoryLocation::get(LI), CurAST,
1105  CurLoop, AA);
1106  else
1107  Invalidated = pointerInvalidatedByLoopWithMSSA(
1108  MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(LI)), CurLoop, *Flags);
1109  // Check loop-invariant address because this may also be a sinkable load
1110  // whose address is not necessarily loop-invariant.
1111  if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand()))
1112  ORE->emit([&]() {
1113  return OptimizationRemarkMissed(
1114  DEBUG_TYPE, "LoadWithLoopInvariantAddressInvalidated", LI)
1115  << "failed to move load with loop-invariant address "
1116  "because the loop may invalidate its value";
1117  });
1118 
1119  return !Invalidated;
1120  } else if (CallInst *CI = dyn_cast<CallInst>(&I)) {
1121  // Don't sink or hoist dbg info; it's legal, but not useful.
1122  if (isa<DbgInfoIntrinsic>(I))
1123  return false;
1124 
1125  // Don't sink calls which can throw.
1126  if (CI->mayThrow())
1127  return false;
1128 
1129  using namespace PatternMatch;
1130  if (match(CI, m_Intrinsic<Intrinsic::assume>()))
1131  // Assumes don't actually alias anything or throw
1132  return true;
1133 
1134  // Handle simple cases by querying alias analysis.
1135  FunctionModRefBehavior Behavior = AA->getModRefBehavior(CI);
1136  if (Behavior == FMRB_DoesNotAccessMemory)
1137  return true;
1138  if (AliasAnalysis::onlyReadsMemory(Behavior)) {
1139  // A readonly argmemonly function only reads from memory pointed to by
1140  // it's arguments with arbitrary offsets. If we can prove there are no
1141  // writes to this memory in the loop, we can hoist or sink.
1143  // TODO: expand to writeable arguments
1144  for (Value *Op : CI->arg_operands())
1145  if (Op->getType()->isPointerTy()) {
1146  bool Invalidated;
1147  if (CurAST)
1148  Invalidated = pointerInvalidatedByLoop(
1150  CurAST, CurLoop, AA);
1151  else
1152  Invalidated = pointerInvalidatedByLoopWithMSSA(
1153  MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(CI)), CurLoop,
1154  *Flags);
1155  if (Invalidated)
1156  return false;
1157  }
1158  return true;
1159  }
1160 
1161  // If this call only reads from memory and there are no writes to memory
1162  // in the loop, we can hoist or sink the call as appropriate.
1163  if (isReadOnly(CurAST, MSSAU, CurLoop))
1164  return true;
1165  }
1166 
1167  // FIXME: This should use mod/ref information to see if we can hoist or
1168  // sink the call.
1169 
1170  return false;
1171  } else if (auto *FI = dyn_cast<FenceInst>(&I)) {
1172  // Fences alias (most) everything to provide ordering. For the moment,
1173  // just give up if there are any other memory operations in the loop.
1174  if (CurAST) {
1175  auto Begin = CurAST->begin();
1176  assert(Begin != CurAST->end() && "must contain FI");
1177  if (std::next(Begin) != CurAST->end())
1178  // constant memory for instance, TODO: handle better
1179  return false;
1180  auto *UniqueI = Begin->getUniqueInstruction();
1181  if (!UniqueI)
1182  // other memory op, give up
1183  return false;
1184  (void)FI; // suppress unused variable warning
1185  assert(UniqueI == FI && "AS must contain FI");
1186  return true;
1187  } else // MSSAU
1188  return isOnlyMemoryAccess(FI, CurLoop, MSSAU);
1189  } else if (auto *SI = dyn_cast<StoreInst>(&I)) {
1190  if (!SI->isUnordered())
1191  return false; // Don't sink/hoist volatile or ordered atomic store!
1192 
1193  // We can only hoist a store that we can prove writes a value which is not
1194  // read or overwritten within the loop. For those cases, we fallback to
1195  // load store promotion instead. TODO: We can extend this to cases where
1196  // there is exactly one write to the location and that write dominates an
1197  // arbitrary number of reads in the loop.
1198  if (CurAST) {
1199  auto &AS = CurAST->getAliasSetFor(MemoryLocation::get(SI));
1200 
1201  if (AS.isRef() || !AS.isMustAlias())
1202  // Quick exit test, handled by the full path below as well.
1203  return false;
1204  auto *UniqueI = AS.getUniqueInstruction();
1205  if (!UniqueI)
1206  // other memory op, give up
1207  return false;
1208  assert(UniqueI == SI && "AS must contain SI");
1209  return true;
1210  } else { // MSSAU
1211  if (isOnlyMemoryAccess(SI, CurLoop, MSSAU))
1212  return true;
1213  // If there are more accesses than the Promotion cap, give up, we're not
1214  // walking a list that long.
1215  if (Flags->NoOfMemAccTooLarge)
1216  return false;
1217  // Check store only if there's still "quota" to check clobber.
1218  if (Flags->LicmMssaOptCounter >= Flags->LicmMssaOptCap)
1219  return false;
1220  // If there are interfering Uses (i.e. their defining access is in the
1221  // loop), or ordered loads (stored as Defs!), don't move this store.
1222  // Could do better here, but this is conservatively correct.
1223  // TODO: Cache set of Uses on the first walk in runOnLoop, update when
1224  // moving accesses. Can also extend to dominating uses.
1225  for (auto *BB : CurLoop->getBlocks())
1226  if (auto *Accesses = MSSA->getBlockAccesses(BB)) {
1227  for (const auto &MA : *Accesses)
1228  if (const auto *MU = dyn_cast<MemoryUse>(&MA)) {
1229  auto *MD = MU->getDefiningAccess();
1230  if (!MSSA->isLiveOnEntryDef(MD) &&
1231  CurLoop->contains(MD->getBlock()))
1232  return false;
1233  } else if (const auto *MD = dyn_cast<MemoryDef>(&MA))
1234  if (auto *LI = dyn_cast<LoadInst>(MD->getMemoryInst())) {
1235  (void)LI; // Silence warning.
1236  assert(!LI->isUnordered() && "Expected unordered load");
1237  return false;
1238  }
1239  }
1240 
1242  Flags->LicmMssaOptCounter++;
1243  // If there are no clobbering Defs in the loop, store is safe to hoist.
1244  return MSSA->isLiveOnEntryDef(Source) ||
1245  !CurLoop->contains(Source->getBlock());
1246  }
1247  }
1248 
1249  assert(!I.mayReadOrWriteMemory() && "unhandled aliasing");
1250 
1251  // We've established mechanical ability and aliasing, it's up to the caller
1252  // to check fault safety
1253  return true;
1254 }
1255 
1256 /// Returns true if a PHINode is a trivially replaceable with an
1257 /// Instruction.
1258 /// This is true when all incoming values are that instruction.
1259 /// This pattern occurs most often with LCSSA PHI nodes.
1260 ///
1261 static bool isTriviallyReplaceablePHI(const PHINode &PN, const Instruction &I) {
1262  for (const Value *IncValue : PN.incoming_values())
1263  if (IncValue != &I)
1264  return false;
1265 
1266  return true;
1267 }
1268 
1269 /// Return true if the instruction is free in the loop.
1270 static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop,
1271  const TargetTransformInfo *TTI) {
1272 
1273  if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I)) {
1275  return false;
1276  // For a GEP, we cannot simply use getUserCost because currently it
1277  // optimistically assume that a GEP will fold into addressing mode
1278  // regardless of its users.
1279  const BasicBlock *BB = GEP->getParent();
1280  for (const User *U : GEP->users()) {
1281  const Instruction *UI = cast<Instruction>(U);
1282  if (CurLoop->contains(UI) &&
1283  (BB != UI->getParent() ||
1284  (!isa<StoreInst>(UI) && !isa<LoadInst>(UI))))
1285  return false;
1286  }
1287  return true;
1288  } else
1289  return TTI->getUserCost(&I) == TargetTransformInfo::TCC_Free;
1290 }
1291 
1292 /// Return true if the only users of this instruction are outside of
1293 /// the loop. If this is true, we can sink the instruction to the exit
1294 /// blocks of the loop.
1295 ///
1296 /// We also return true if the instruction could be folded away in lowering.
1297 /// (e.g., a GEP can be folded into a load as an addressing mode in the loop).
1298 static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
1299  const LoopSafetyInfo *SafetyInfo,
1300  TargetTransformInfo *TTI, bool &FreeInLoop) {
1301  const auto &BlockColors = SafetyInfo->getBlockColors();
1302  bool IsFree = isFreeInLoop(I, CurLoop, TTI);
1303  for (const User *U : I.users()) {
1304  const Instruction *UI = cast<Instruction>(U);
1305  if (const PHINode *PN = dyn_cast<PHINode>(UI)) {
1306  const BasicBlock *BB = PN->getParent();
1307  // We cannot sink uses in catchswitches.
1308  if (isa<CatchSwitchInst>(BB->getTerminator()))
1309  return false;
1310 
1311  // We need to sink a callsite to a unique funclet. Avoid sinking if the
1312  // phi use is too muddled.
1313  if (isa<CallInst>(I))
1314  if (!BlockColors.empty() &&
1315  BlockColors.find(const_cast<BasicBlock *>(BB))->second.size() != 1)
1316  return false;
1317  }
1318 
1319  if (CurLoop->contains(UI)) {
1320  if (IsFree) {
1321  FreeInLoop = true;
1322  continue;
1323  }
1324  return false;
1325  }
1326  }
1327  return true;
1328 }
1329 
1331  Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
1332  const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU) {
1333  Instruction *New;
1334  if (auto *CI = dyn_cast<CallInst>(&I)) {
1335  const auto &BlockColors = SafetyInfo->getBlockColors();
1336 
1337  // Sinking call-sites need to be handled differently from other
1338  // instructions. The cloned call-site needs a funclet bundle operand
1339  // appropriate for its location in the CFG.
1341  for (unsigned BundleIdx = 0, BundleEnd = CI->getNumOperandBundles();
1342  BundleIdx != BundleEnd; ++BundleIdx) {
1343  OperandBundleUse Bundle = CI->getOperandBundleAt(BundleIdx);
1344  if (Bundle.getTagID() == LLVMContext::OB_funclet)
1345  continue;
1346 
1347  OpBundles.emplace_back(Bundle);
1348  }
1349 
1350  if (!BlockColors.empty()) {
1351  const ColorVector &CV = BlockColors.find(&ExitBlock)->second;
1352  assert(CV.size() == 1 && "non-unique color for exit block!");
1353  BasicBlock *BBColor = CV.front();
1354  Instruction *EHPad = BBColor->getFirstNonPHI();
1355  if (EHPad->isEHPad())
1356  OpBundles.emplace_back("funclet", EHPad);
1357  }
1358 
1359  New = CallInst::Create(CI, OpBundles);
1360  } else {
1361  New = I.clone();
1362  }
1363 
1364  ExitBlock.getInstList().insert(ExitBlock.getFirstInsertionPt(), New);
1365  if (!I.getName().empty())
1366  New->setName(I.getName() + ".le");
1367 
1368  MemoryAccess *OldMemAcc;
1369  if (MSSAU && (OldMemAcc = MSSAU->getMemorySSA()->getMemoryAccess(&I))) {
1370  // Create a new MemoryAccess and let MemorySSA set its defining access.
1371  MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB(
1372  New, nullptr, New->getParent(), MemorySSA::Beginning);
1373  if (NewMemAcc) {
1374  if (auto *MemDef = dyn_cast<MemoryDef>(NewMemAcc))
1375  MSSAU->insertDef(MemDef, /*RenameUses=*/true);
1376  else {
1377  auto *MemUse = cast<MemoryUse>(NewMemAcc);
1378  MSSAU->insertUse(MemUse);
1379  }
1380  }
1381  }
1382 
1383  // Build LCSSA PHI nodes for any in-loop operands. Note that this is
1384  // particularly cheap because we can rip off the PHI node that we're
1385  // replacing for the number and blocks of the predecessors.
1386  // OPT: If this shows up in a profile, we can instead finish sinking all
1387  // invariant instructions, and then walk their operands to re-establish
1388  // LCSSA. That will eliminate creating PHI nodes just to nuke them when
1389  // sinking bottom-up.
1390  for (User::op_iterator OI = New->op_begin(), OE = New->op_end(); OI != OE;
1391  ++OI)
1392  if (Instruction *OInst = dyn_cast<Instruction>(*OI))
1393  if (Loop *OLoop = LI->getLoopFor(OInst->getParent()))
1394  if (!OLoop->contains(&PN)) {
1395  PHINode *OpPN =
1396  PHINode::Create(OInst->getType(), PN.getNumIncomingValues(),
1397  OInst->getName() + ".lcssa", &ExitBlock.front());
1398  for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
1399  OpPN->addIncoming(OInst, PN.getIncomingBlock(i));
1400  *OI = OpPN;
1401  }
1402  return New;
1403 }
1404 
1406  AliasSetTracker *AST, MemorySSAUpdater *MSSAU) {
1407  if (AST)
1408  AST->deleteValue(&I);
1409  if (MSSAU)
1410  MSSAU->removeMemoryAccess(&I);
1411  SafetyInfo.removeInstruction(&I);
1412  I.eraseFromParent();
1413 }
1414 
1416  ICFLoopSafetyInfo &SafetyInfo,
1417  MemorySSAUpdater *MSSAU) {
1418  SafetyInfo.removeInstruction(&I);
1419  SafetyInfo.insertInstructionTo(&I, Dest.getParent());
1420  I.moveBefore(&Dest);
1421  if (MSSAU)
1422  if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
1423  MSSAU->getMemorySSA()->getMemoryAccess(&I)))
1424  MSSAU->moveToPlace(OldMemAcc, Dest.getParent(), MemorySSA::End);
1425 }
1426 
1428  PHINode *TPN, Instruction *I, LoopInfo *LI,
1430  const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop,
1431  MemorySSAUpdater *MSSAU) {
1432  assert(isTriviallyReplaceablePHI(*TPN, *I) &&
1433  "Expect only trivially replaceable PHI");
1434  BasicBlock *ExitBlock = TPN->getParent();
1435  Instruction *New;
1436  auto It = SunkCopies.find(ExitBlock);
1437  if (It != SunkCopies.end())
1438  New = It->second;
1439  else
1440  New = SunkCopies[ExitBlock] = CloneInstructionInExitBlock(
1441  *I, *ExitBlock, *TPN, LI, SafetyInfo, MSSAU);
1442  return New;
1443 }
1444 
1445 static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo) {
1446  BasicBlock *BB = PN->getParent();
1447  if (!BB->canSplitPredecessors())
1448  return false;
1449  // It's not impossible to split EHPad blocks, but if BlockColors already exist
1450  // it require updating BlockColors for all offspring blocks accordingly. By
1451  // skipping such corner case, we can make updating BlockColors after splitting
1452  // predecessor fairly simple.
1453  if (!SafetyInfo->getBlockColors().empty() && BB->getFirstNonPHI()->isEHPad())
1454  return false;
1455  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
1456  BasicBlock *BBPred = *PI;
1457  if (isa<IndirectBrInst>(BBPred->getTerminator()))
1458  return false;
1459  }
1460  return true;
1461 }
1462 
1464  LoopInfo *LI, const Loop *CurLoop,
1465  LoopSafetyInfo *SafetyInfo,
1466  MemorySSAUpdater *MSSAU) {
1467 #ifndef NDEBUG
1468  SmallVector<BasicBlock *, 32> ExitBlocks;
1469  CurLoop->getUniqueExitBlocks(ExitBlocks);
1470  SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),
1471  ExitBlocks.end());
1472 #endif
1473  BasicBlock *ExitBB = PN->getParent();
1474  assert(ExitBlockSet.count(ExitBB) && "Expect the PHI is in an exit block.");
1475 
1476  // Split predecessors of the loop exit to make instructions in the loop are
1477  // exposed to exit blocks through trivially replaceable PHIs while keeping the
1478  // loop in the canonical form where each predecessor of each exit block should
1479  // be contained within the loop. For example, this will convert the loop below
1480  // from
1481  //
1482  // LB1:
1483  // %v1 =
1484  // br %LE, %LB2
1485  // LB2:
1486  // %v2 =
1487  // br %LE, %LB1
1488  // LE:
1489  // %p = phi [%v1, %LB1], [%v2, %LB2] <-- non-trivially replaceable
1490  //
1491  // to
1492  //
1493  // LB1:
1494  // %v1 =
1495  // br %LE.split, %LB2
1496  // LB2:
1497  // %v2 =
1498  // br %LE.split2, %LB1
1499  // LE.split:
1500  // %p1 = phi [%v1, %LB1] <-- trivially replaceable
1501  // br %LE
1502  // LE.split2:
1503  // %p2 = phi [%v2, %LB2] <-- trivially replaceable
1504  // br %LE
1505  // LE:
1506  // %p = phi [%p1, %LE.split], [%p2, %LE.split2]
1507  //
1508  const auto &BlockColors = SafetyInfo->getBlockColors();
1509  SmallSetVector<BasicBlock *, 8> PredBBs(pred_begin(ExitBB), pred_end(ExitBB));
1510  while (!PredBBs.empty()) {
1511  BasicBlock *PredBB = *PredBBs.begin();
1512  assert(CurLoop->contains(PredBB) &&
1513  "Expect all predecessors are in the loop");
1514  if (PN->getBasicBlockIndex(PredBB) >= 0) {
1516  ExitBB, PredBB, ".split.loop.exit", DT, LI, MSSAU, true);
1517  // Since we do not allow splitting EH-block with BlockColors in
1518  // canSplitPredecessors(), we can simply assign predecessor's color to
1519  // the new block.
1520  if (!BlockColors.empty())
1521  // Grab a reference to the ColorVector to be inserted before getting the
1522  // reference to the vector we are copying because inserting the new
1523  // element in BlockColors might cause the map to be reallocated.
1524  SafetyInfo->copyColors(NewPred, PredBB);
1525  }
1526  PredBBs.remove(PredBB);
1527  }
1528 }
1529 
1530 /// When an instruction is found to only be used outside of the loop, this
1531 /// function moves it to the exit blocks and patches up SSA form as needed.
1532 /// This method is guaranteed to remove the original instruction from its
1533 /// position, and may either delete it or move it to outside of the loop.
1534 ///
1535 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
1536  const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
1538  LLVM_DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
1539  ORE->emit([&]() {
1540  return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I)
1541  << "sinking " << ore::NV("Inst", &I);
1542  });
1543  bool Changed = false;
1544  if (isa<LoadInst>(I))
1545  ++NumMovedLoads;
1546  else if (isa<CallInst>(I))
1547  ++NumMovedCalls;
1548  ++NumSunk;
1549 
1550  // Iterate over users to be ready for actual sinking. Replace users via
1551  // unreachable blocks with undef and make all user PHIs trivially replaceable.
1552  SmallPtrSet<Instruction *, 8> VisitedUsers;
1553  for (Value::user_iterator UI = I.user_begin(), UE = I.user_end(); UI != UE;) {
1554  auto *User = cast<Instruction>(*UI);
1555  Use &U = UI.getUse();
1556  ++UI;
1557 
1558  if (VisitedUsers.count(User) || CurLoop->contains(User))
1559  continue;
1560 
1561  if (!DT->isReachableFromEntry(User->getParent())) {
1562  U = UndefValue::get(I.getType());
1563  Changed = true;
1564  continue;
1565  }
1566 
1567  // The user must be a PHI node.
1568  PHINode *PN = cast<PHINode>(User);
1569 
1570  // Surprisingly, instructions can be used outside of loops without any
1571  // exits. This can only happen in PHI nodes if the incoming block is
1572  // unreachable.
1573  BasicBlock *BB = PN->getIncomingBlock(U);
1574  if (!DT->isReachableFromEntry(BB)) {
1575  U = UndefValue::get(I.getType());
1576  Changed = true;
1577  continue;
1578  }
1579 
1580  VisitedUsers.insert(PN);
1581  if (isTriviallyReplaceablePHI(*PN, I))
1582  continue;
1583 
1584  if (!canSplitPredecessors(PN, SafetyInfo))
1585  return Changed;
1586 
1587  // Split predecessors of the PHI so that we can make users trivially
1588  // replaceable.
1589  splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop, SafetyInfo, MSSAU);
1590 
1591  // Should rebuild the iterators, as they may be invalidated by
1592  // splitPredecessorsOfLoopExit().
1593  UI = I.user_begin();
1594  UE = I.user_end();
1595  }
1596 
1597  if (VisitedUsers.empty())
1598  return Changed;
1599 
1600 #ifndef NDEBUG
1601  SmallVector<BasicBlock *, 32> ExitBlocks;
1602  CurLoop->getUniqueExitBlocks(ExitBlocks);
1603  SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),
1604  ExitBlocks.end());
1605 #endif
1606 
1607  // Clones of this instruction. Don't create more than one per exit block!
1609 
1610  // If this instruction is only used outside of the loop, then all users are
1611  // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of
1612  // the instruction.
1614  for (auto *UI : Users) {
1615  auto *User = cast<Instruction>(UI);
1616 
1617  if (CurLoop->contains(User))
1618  continue;
1619 
1620  PHINode *PN = cast<PHINode>(User);
1621  assert(ExitBlockSet.count(PN->getParent()) &&
1622  "The LCSSA PHI is not in an exit block!");
1623  // The PHI must be trivially replaceable.
1625  PN, &I, LI, SunkCopies, SafetyInfo, CurLoop, MSSAU);
1626  PN->replaceAllUsesWith(New);
1627  eraseInstruction(*PN, *SafetyInfo, nullptr, nullptr);
1628  Changed = true;
1629  }
1630  return Changed;
1631 }
1632 
1633 /// When an instruction is found to only use loop invariant operands that
1634 /// is safe to hoist, this instruction is called to do the dirty work.
1635 ///
1636 static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
1637  BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
1639  LLVM_DEBUG(dbgs() << "LICM hoisting to " << Dest->getName() << ": " << I
1640  << "\n");
1641  ORE->emit([&]() {
1642  return OptimizationRemark(DEBUG_TYPE, "Hoisted", &I) << "hoisting "
1643  << ore::NV("Inst", &I);
1644  });
1645 
1646  // Metadata can be dependent on conditions we are hoisting above.
1647  // Conservatively strip all metadata on the instruction unless we were
1648  // guaranteed to execute I if we entered the loop, in which case the metadata
1649  // is valid in the loop preheader.
1650  if (I.hasMetadataOtherThanDebugLoc() &&
1651  // The check on hasMetadataOtherThanDebugLoc is to prevent us from burning
1652  // time in isGuaranteedToExecute if we don't actually have anything to
1653  // drop. It is a compile time optimization, not required for correctness.
1654  !SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop))
1656 
1657  if (isa<PHINode>(I))
1658  // Move the new node to the end of the phi list in the destination block.
1659  moveInstructionBefore(I, *Dest->getFirstNonPHI(), *SafetyInfo, MSSAU);
1660  else
1661  // Move the new node to the destination block, before its terminator.
1662  moveInstructionBefore(I, *Dest->getTerminator(), *SafetyInfo, MSSAU);
1663 
1664  // Apply line 0 debug locations when we are moving instructions to different
1665  // basic blocks because we want to avoid jumpy line tables.
1666  if (const DebugLoc &DL = I.getDebugLoc())
1667  I.setDebugLoc(DebugLoc::get(0, 0, DL.getScope(), DL.getInlinedAt()));
1668 
1669  if (isa<LoadInst>(I))
1670  ++NumMovedLoads;
1671  else if (isa<CallInst>(I))
1672  ++NumMovedCalls;
1673  ++NumHoisted;
1674 }
1675 
1676 /// Only sink or hoist an instruction if it is not a trapping instruction,
1677 /// or if the instruction is known not to trap when moved to the preheader.
1678 /// or if it is a trapping instruction and is guaranteed to execute.
1680  const DominatorTree *DT,
1681  const Loop *CurLoop,
1682  const LoopSafetyInfo *SafetyInfo,
1684  const Instruction *CtxI) {
1685  if (isSafeToSpeculativelyExecute(&Inst, CtxI, DT))
1686  return true;
1687 
1688  bool GuaranteedToExecute =
1689  SafetyInfo->isGuaranteedToExecute(Inst, DT, CurLoop);
1690 
1691  if (!GuaranteedToExecute) {
1692  auto *LI = dyn_cast<LoadInst>(&Inst);
1693  if (LI && CurLoop->isLoopInvariant(LI->getPointerOperand()))
1694  ORE->emit([&]() {
1695  return OptimizationRemarkMissed(
1696  DEBUG_TYPE, "LoadWithLoopInvariantAddressCondExecuted", LI)
1697  << "failed to hoist load with loop-invariant address "
1698  "because load is conditionally executed";
1699  });
1700  }
1701 
1702  return GuaranteedToExecute;
1703 }
1704 
1705 namespace {
1706 class LoopPromoter : public LoadAndStorePromoter {
1707  Value *SomePtr; // Designated pointer to store to.
1708  const SmallSetVector<Value *, 8> &PointerMustAliases;
1709  SmallVectorImpl<BasicBlock *> &LoopExitBlocks;
1710  SmallVectorImpl<Instruction *> &LoopInsertPts;
1711  SmallVectorImpl<MemoryAccess *> &MSSAInsertPts;
1712  PredIteratorCache &PredCache;
1713  AliasSetTracker &AST;
1714  MemorySSAUpdater *MSSAU;
1715  LoopInfo &LI;
1716  DebugLoc DL;
1717  int Alignment;
1718  bool UnorderedAtomic;
1719  AAMDNodes AATags;
1720  ICFLoopSafetyInfo &SafetyInfo;
1721 
1722  Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const {
1723  if (Instruction *I = dyn_cast<Instruction>(V))
1724  if (Loop *L = LI.getLoopFor(I->getParent()))
1725  if (!L->contains(BB)) {
1726  // We need to create an LCSSA PHI node for the incoming value and
1727  // store that.
1728  PHINode *PN = PHINode::Create(I->getType(), PredCache.size(BB),
1729  I->getName() + ".lcssa", &BB->front());
1730  for (BasicBlock *Pred : PredCache.get(BB))
1731  PN->addIncoming(I, Pred);
1732  return PN;
1733  }
1734  return V;
1735  }
1736 
1737 public:
1738  LoopPromoter(Value *SP, ArrayRef<const Instruction *> Insts, SSAUpdater &S,
1739  const SmallSetVector<Value *, 8> &PMA,
1743  AliasSetTracker &ast, MemorySSAUpdater *MSSAU, LoopInfo &li,
1744  DebugLoc dl, int alignment, bool UnorderedAtomic,
1745  const AAMDNodes &AATags, ICFLoopSafetyInfo &SafetyInfo)
1746  : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
1747  LoopExitBlocks(LEB), LoopInsertPts(LIP), MSSAInsertPts(MSSAIP),
1748  PredCache(PIC), AST(ast), MSSAU(MSSAU), LI(li), DL(std::move(dl)),
1749  Alignment(alignment), UnorderedAtomic(UnorderedAtomic), AATags(AATags),
1750  SafetyInfo(SafetyInfo) {}
1751 
1752  bool isInstInList(Instruction *I,
1753  const SmallVectorImpl<Instruction *> &) const override {
1754  Value *Ptr;
1755  if (LoadInst *LI = dyn_cast<LoadInst>(I))
1756  Ptr = LI->getOperand(0);
1757  else
1758  Ptr = cast<StoreInst>(I)->getPointerOperand();
1759  return PointerMustAliases.count(Ptr);
1760  }
1761 
1762  void doExtraRewritesBeforeFinalDeletion() override {
1763  // Insert stores after in the loop exit blocks. Each exit block gets a
1764  // store of the live-out values that feed them. Since we've already told
1765  // the SSA updater about the defs in the loop and the preheader
1766  // definition, it is all set and we can start using it.
1767  for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) {
1768  BasicBlock *ExitBlock = LoopExitBlocks[i];
1769  Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
1770  LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock);
1771  Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock);
1772  Instruction *InsertPos = LoopInsertPts[i];
1773  StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos);
1774  if (UnorderedAtomic)
1776  NewSI->setAlignment(Alignment);
1777  NewSI->setDebugLoc(DL);
1778  if (AATags)
1779  NewSI->setAAMetadata(AATags);
1780 
1781  if (MSSAU) {
1782  MemoryAccess *MSSAInsertPoint = MSSAInsertPts[i];
1783  MemoryAccess *NewMemAcc;
1784  if (!MSSAInsertPoint) {
1785  NewMemAcc = MSSAU->createMemoryAccessInBB(
1786  NewSI, nullptr, NewSI->getParent(), MemorySSA::Beginning);
1787  } else {
1788  NewMemAcc =
1789  MSSAU->createMemoryAccessAfter(NewSI, nullptr, MSSAInsertPoint);
1790  }
1791  MSSAInsertPts[i] = NewMemAcc;
1792  MSSAU->insertDef(cast<MemoryDef>(NewMemAcc), true);
1793  // FIXME: true for safety, false may still be correct.
1794  }
1795  }
1796  }
1797 
1798  void replaceLoadWithValue(LoadInst *LI, Value *V) const override {
1799  // Update alias analysis.
1800  AST.copyValue(LI, V);
1801  }
1802  void instructionDeleted(Instruction *I) const override {
1803  SafetyInfo.removeInstruction(I);
1804  AST.deleteValue(I);
1805  if (MSSAU)
1806  MSSAU->removeMemoryAccess(I);
1807  }
1808 };
1809 
1810 
1811 /// Return true iff we can prove that a caller of this function can not inspect
1812 /// the contents of the provided object in a well defined program.
1813 bool isKnownNonEscaping(Value *Object, const TargetLibraryInfo *TLI) {
1814  if (isa<AllocaInst>(Object))
1815  // Since the alloca goes out of scope, we know the caller can't retain a
1816  // reference to it and be well defined. Thus, we don't need to check for
1817  // capture.
1818  return true;
1819 
1820  // For all other objects we need to know that the caller can't possibly
1821  // have gotten a reference to the object. There are two components of
1822  // that:
1823  // 1) Object can't be escaped by this function. This is what
1824  // PointerMayBeCaptured checks.
1825  // 2) Object can't have been captured at definition site. For this, we
1826  // need to know the return value is noalias. At the moment, we use a
1827  // weaker condition and handle only AllocLikeFunctions (which are
1828  // known to be noalias). TODO
1829  return isAllocLikeFn(Object, TLI) &&
1830  !PointerMayBeCaptured(Object, true, true);
1831 }
1832 
1833 } // namespace
1834 
1835 /// Try to promote memory values to scalars by sinking stores out of the
1836 /// loop and moving loads to before the loop. We do this by looping over
1837 /// the stores in the loop, looking for stores to Must pointers which are
1838 /// loop invariant.
1839 ///
1841  const SmallSetVector<Value *, 8> &PointerMustAliases,
1842  SmallVectorImpl<BasicBlock *> &ExitBlocks,
1843  SmallVectorImpl<Instruction *> &InsertPts,
1845  LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
1846  Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
1847  ICFLoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) {
1848  // Verify inputs.
1849  assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
1850  CurAST != nullptr && SafetyInfo != nullptr &&
1851  "Unexpected Input to promoteLoopAccessesToScalars");
1852 
1853  Value *SomePtr = *PointerMustAliases.begin();
1854  BasicBlock *Preheader = CurLoop->getLoopPreheader();
1855 
1856  // It is not safe to promote a load/store from the loop if the load/store is
1857  // conditional. For example, turning:
1858  //
1859  // for () { if (c) *P += 1; }
1860  //
1861  // into:
1862  //
1863  // tmp = *P; for () { if (c) tmp +=1; } *P = tmp;
1864  //
1865  // is not safe, because *P may only be valid to access if 'c' is true.
1866  //
1867  // The safety property divides into two parts:
1868  // p1) The memory may not be dereferenceable on entry to the loop. In this
1869  // case, we can't insert the required load in the preheader.
1870  // p2) The memory model does not allow us to insert a store along any dynamic
1871  // path which did not originally have one.
1872  //
1873  // If at least one store is guaranteed to execute, both properties are
1874  // satisfied, and promotion is legal.
1875  //
1876  // This, however, is not a necessary condition. Even if no store/load is
1877  // guaranteed to execute, we can still establish these properties.
1878  // We can establish (p1) by proving that hoisting the load into the preheader
1879  // is safe (i.e. proving dereferenceability on all paths through the loop). We
1880  // can use any access within the alias set to prove dereferenceability,
1881  // since they're all must alias.
1882  //
1883  // There are two ways establish (p2):
1884  // a) Prove the location is thread-local. In this case the memory model
1885  // requirement does not apply, and stores are safe to insert.
1886  // b) Prove a store dominates every exit block. In this case, if an exit
1887  // blocks is reached, the original dynamic path would have taken us through
1888  // the store, so inserting a store into the exit block is safe. Note that this
1889  // is different from the store being guaranteed to execute. For instance,
1890  // if an exception is thrown on the first iteration of the loop, the original
1891  // store is never executed, but the exit blocks are not executed either.
1892 
1893  bool DereferenceableInPH = false;
1894  bool SafeToInsertStore = false;
1895 
1897 
1898  // We start with an alignment of one and try to find instructions that allow
1899  // us to prove better alignment.
1900  unsigned Alignment = 1;
1901  // Keep track of which types of access we see
1902  bool SawUnorderedAtomic = false;
1903  bool SawNotAtomic = false;
1904  AAMDNodes AATags;
1905 
1906  const DataLayout &MDL = Preheader->getModule()->getDataLayout();
1907 
1908  bool IsKnownThreadLocalObject = false;
1909  if (SafetyInfo->anyBlockMayThrow()) {
1910  // If a loop can throw, we have to insert a store along each unwind edge.
1911  // That said, we can't actually make the unwind edge explicit. Therefore,
1912  // we have to prove that the store is dead along the unwind edge. We do
1913  // this by proving that the caller can't have a reference to the object
1914  // after return and thus can't possibly load from the object.
1915  Value *Object = GetUnderlyingObject(SomePtr, MDL);
1916  if (!isKnownNonEscaping(Object, TLI))
1917  return false;
1918  // Subtlety: Alloca's aren't visible to callers, but *are* potentially
1919  // visible to other threads if captured and used during their lifetimes.
1920  IsKnownThreadLocalObject = !isa<AllocaInst>(Object);
1921  }
1922 
1923  // Check that all of the pointers in the alias set have the same type. We
1924  // cannot (yet) promote a memory location that is loaded and stored in
1925  // different sizes. While we are at it, collect alignment and AA info.
1926  for (Value *ASIV : PointerMustAliases) {
1927  // Check that all of the pointers in the alias set have the same type. We
1928  // cannot (yet) promote a memory location that is loaded and stored in
1929  // different sizes.
1930  if (SomePtr->getType() != ASIV->getType())
1931  return false;
1932 
1933  for (User *U : ASIV->users()) {
1934  // Ignore instructions that are outside the loop.
1935  Instruction *UI = dyn_cast<Instruction>(U);
1936  if (!UI || !CurLoop->contains(UI))
1937  continue;
1938 
1939  // If there is an non-load/store instruction in the loop, we can't promote
1940  // it.
1941  if (LoadInst *Load = dyn_cast<LoadInst>(UI)) {
1942  if (!Load->isUnordered())
1943  return false;
1944 
1945  SawUnorderedAtomic |= Load->isAtomic();
1946  SawNotAtomic |= !Load->isAtomic();
1947 
1948  unsigned InstAlignment = Load->getAlignment();
1949  if (!InstAlignment)
1950  InstAlignment =
1951  MDL.getABITypeAlignment(Load->getType());
1952 
1953  // Note that proving a load safe to speculate requires proving
1954  // sufficient alignment at the target location. Proving it guaranteed
1955  // to execute does as well. Thus we can increase our guaranteed
1956  // alignment as well.
1957  if (!DereferenceableInPH || (InstAlignment > Alignment))
1958  if (isSafeToExecuteUnconditionally(*Load, DT, CurLoop, SafetyInfo,
1959  ORE, Preheader->getTerminator())) {
1960  DereferenceableInPH = true;
1961  Alignment = std::max(Alignment, InstAlignment);
1962  }
1963  } else if (const StoreInst *Store = dyn_cast<StoreInst>(UI)) {
1964  // Stores *of* the pointer are not interesting, only stores *to* the
1965  // pointer.
1966  if (UI->getOperand(1) != ASIV)
1967  continue;
1968  if (!Store->isUnordered())
1969  return false;
1970 
1971  SawUnorderedAtomic |= Store->isAtomic();
1972  SawNotAtomic |= !Store->isAtomic();
1973 
1974  // If the store is guaranteed to execute, both properties are satisfied.
1975  // We may want to check if a store is guaranteed to execute even if we
1976  // already know that promotion is safe, since it may have higher
1977  // alignment than any other guaranteed stores, in which case we can
1978  // raise the alignment on the promoted store.
1979  unsigned InstAlignment = Store->getAlignment();
1980  if (!InstAlignment)
1981  InstAlignment =
1982  MDL.getABITypeAlignment(Store->getValueOperand()->getType());
1983 
1984  if (!DereferenceableInPH || !SafeToInsertStore ||
1985  (InstAlignment > Alignment)) {
1986  if (SafetyInfo->isGuaranteedToExecute(*UI, DT, CurLoop)) {
1987  DereferenceableInPH = true;
1988  SafeToInsertStore = true;
1989  Alignment = std::max(Alignment, InstAlignment);
1990  }
1991  }
1992 
1993  // If a store dominates all exit blocks, it is safe to sink.
1994  // As explained above, if an exit block was executed, a dominating
1995  // store must have been executed at least once, so we are not
1996  // introducing stores on paths that did not have them.
1997  // Note that this only looks at explicit exit blocks. If we ever
1998  // start sinking stores into unwind edges (see above), this will break.
1999  if (!SafeToInsertStore)
2000  SafeToInsertStore = llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) {
2001  return DT->dominates(Store->getParent(), Exit);
2002  });
2003 
2004  // If the store is not guaranteed to execute, we may still get
2005  // deref info through it.
2006  if (!DereferenceableInPH) {
2007  DereferenceableInPH = isDereferenceableAndAlignedPointer(
2008  Store->getPointerOperand(), Store->getAlignment(), MDL,
2009  Preheader->getTerminator(), DT);
2010  }
2011  } else
2012  return false; // Not a load or store.
2013 
2014  // Merge the AA tags.
2015  if (LoopUses.empty()) {
2016  // On the first load/store, just take its AA tags.
2017  UI->getAAMetadata(AATags);
2018  } else if (AATags) {
2019  UI->getAAMetadata(AATags, /* Merge = */ true);
2020  }
2021 
2022  LoopUses.push_back(UI);
2023  }
2024  }
2025 
2026  // If we found both an unordered atomic instruction and a non-atomic memory
2027  // access, bail. We can't blindly promote non-atomic to atomic since we
2028  // might not be able to lower the result. We can't downgrade since that
2029  // would violate memory model. Also, align 0 is an error for atomics.
2030  if (SawUnorderedAtomic && SawNotAtomic)
2031  return false;
2032 
2033  // If we're inserting an atomic load in the preheader, we must be able to
2034  // lower it. We're only guaranteed to be able to lower naturally aligned
2035  // atomics.
2036  auto *SomePtrElemType = SomePtr->getType()->getPointerElementType();
2037  if (SawUnorderedAtomic &&
2038  Alignment < MDL.getTypeStoreSize(SomePtrElemType))
2039  return false;
2040 
2041  // If we couldn't prove we can hoist the load, bail.
2042  if (!DereferenceableInPH)
2043  return false;
2044 
2045  // We know we can hoist the load, but don't have a guaranteed store.
2046  // Check whether the location is thread-local. If it is, then we can insert
2047  // stores along paths which originally didn't have them without violating the
2048  // memory model.
2049  if (!SafeToInsertStore) {
2050  if (IsKnownThreadLocalObject)
2051  SafeToInsertStore = true;
2052  else {
2053  Value *Object = GetUnderlyingObject(SomePtr, MDL);
2054  SafeToInsertStore =
2055  (isAllocLikeFn(Object, TLI) || isa<AllocaInst>(Object)) &&
2056  !PointerMayBeCaptured(Object, true, true);
2057  }
2058  }
2059 
2060  // If we've still failed to prove we can sink the store, give up.
2061  if (!SafeToInsertStore)
2062  return false;
2063 
2064  // Otherwise, this is safe to promote, lets do it!
2065  LLVM_DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " << *SomePtr
2066  << '\n');
2067  ORE->emit([&]() {
2068  return OptimizationRemark(DEBUG_TYPE, "PromoteLoopAccessesToScalar",
2069  LoopUses[0])
2070  << "Moving accesses to memory location out of the loop";
2071  });
2072  ++NumPromoted;
2073 
2074  // Grab a debug location for the inserted loads/stores; given that the
2075  // inserted loads/stores have little relation to the original loads/stores,
2076  // this code just arbitrarily picks a location from one, since any debug
2077  // location is better than none.
2078  DebugLoc DL = LoopUses[0]->getDebugLoc();
2079 
2080  // We use the SSAUpdater interface to insert phi nodes as required.
2082  SSAUpdater SSA(&NewPHIs);
2083  LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
2084  InsertPts, MSSAInsertPts, PIC, *CurAST, MSSAU, *LI, DL,
2085  Alignment, SawUnorderedAtomic, AATags, *SafetyInfo);
2086 
2087  // Set up the preheader to have a definition of the value. It is the live-out
2088  // value from the preheader that uses in the loop will use.
2089  LoadInst *PreheaderLoad = new LoadInst(
2090  SomePtr->getType()->getPointerElementType(), SomePtr,
2091  SomePtr->getName() + ".promoted", Preheader->getTerminator());
2092  if (SawUnorderedAtomic)
2093  PreheaderLoad->setOrdering(AtomicOrdering::Unordered);
2094  PreheaderLoad->setAlignment(Alignment);
2095  PreheaderLoad->setDebugLoc(DL);
2096  if (AATags)
2097  PreheaderLoad->setAAMetadata(AATags);
2098  SSA.AddAvailableValue(Preheader, PreheaderLoad);
2099 
2100  MemoryAccess *PreheaderLoadMemoryAccess;
2101  if (MSSAU) {
2102  PreheaderLoadMemoryAccess = MSSAU->createMemoryAccessInBB(
2103  PreheaderLoad, nullptr, PreheaderLoad->getParent(), MemorySSA::End);
2104  MemoryUse *NewMemUse = cast<MemoryUse>(PreheaderLoadMemoryAccess);
2105  MSSAU->insertUse(NewMemUse);
2106  }
2107 
2108  // Rewrite all the loads in the loop and remember all the definitions from
2109  // stores in the loop.
2110  Promoter.run(LoopUses);
2111 
2112  if (MSSAU && VerifyMemorySSA)
2113  MSSAU->getMemorySSA()->verifyMemorySSA();
2114  // If the SSAUpdater didn't use the load in the preheader, just zap it now.
2115  if (PreheaderLoad->use_empty())
2116  eraseInstruction(*PreheaderLoad, *SafetyInfo, CurAST, MSSAU);
2117 
2118  return true;
2119 }
2120 
2121 /// Returns an owning pointer to an alias set which incorporates aliasing info
2122 /// from L and all subloops of L.
2123 /// FIXME: In new pass manager, there is no helper function to handle loop
2124 /// analysis such as cloneBasicBlockAnalysis, so the AST needs to be recomputed
2125 /// from scratch for every loop. Hook up with the helper functions when
2126 /// available in the new pass manager to avoid redundant computation.
2127 std::unique_ptr<AliasSetTracker>
2128 LoopInvariantCodeMotion::collectAliasInfoForLoop(Loop *L, LoopInfo *LI,
2129  AliasAnalysis *AA) {
2130  std::unique_ptr<AliasSetTracker> CurAST;
2131  SmallVector<Loop *, 4> RecomputeLoops;
2132  for (Loop *InnerL : L->getSubLoops()) {
2133  auto MapI = LoopToAliasSetMap.find(InnerL);
2134  // If the AST for this inner loop is missing it may have been merged into
2135  // some other loop's AST and then that loop unrolled, and so we need to
2136  // recompute it.
2137  if (MapI == LoopToAliasSetMap.end()) {
2138  RecomputeLoops.push_back(InnerL);
2139  continue;
2140  }
2141  std::unique_ptr<AliasSetTracker> InnerAST = std::move(MapI->second);
2142 
2143  if (CurAST) {
2144  // What if InnerLoop was modified by other passes ?
2145  // Once we've incorporated the inner loop's AST into ours, we don't need
2146  // the subloop's anymore.
2147  CurAST->add(*InnerAST);
2148  } else {
2149  CurAST = std::move(InnerAST);
2150  }
2151  LoopToAliasSetMap.erase(MapI);
2152  }
2153  if (!CurAST)
2154  CurAST = make_unique<AliasSetTracker>(*AA);
2155 
2156  // Add everything from the sub loops that are no longer directly available.
2157  for (Loop *InnerL : RecomputeLoops)
2158  for (BasicBlock *BB : InnerL->blocks())
2159  CurAST->add(*BB);
2160 
2161  // And merge in this loop (without anything from inner loops).
2162  for (BasicBlock *BB : L->blocks())
2163  if (LI->getLoopFor(BB) == L)
2164  CurAST->add(*BB);
2165 
2166  return CurAST;
2167 }
2168 
2169 std::unique_ptr<AliasSetTracker>
2170 LoopInvariantCodeMotion::collectAliasInfoForLoopWithMSSA(
2171  Loop *L, AliasAnalysis *AA, MemorySSAUpdater *MSSAU) {
2172  auto *MSSA = MSSAU->getMemorySSA();
2173  auto CurAST = make_unique<AliasSetTracker>(*AA, MSSA, L);
2174  CurAST->addAllInstructionsInLoopUsingMSSA();
2175  return CurAST;
2176 }
2177 
2178 /// Simple analysis hook. Clone alias set info.
2179 ///
2180 void LegacyLICMPass::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To,
2181  Loop *L) {
2182  auto ASTIt = LICM.getLoopToAliasSetMap().find(L);
2183  if (ASTIt == LICM.getLoopToAliasSetMap().end())
2184  return;
2185 
2186  ASTIt->second->copyValue(From, To);
2187 }
2188 
2189 /// Simple Analysis hook. Delete value V from alias set
2190 ///
2191 void LegacyLICMPass::deleteAnalysisValue(Value *V, Loop *L) {
2192  auto ASTIt = LICM.getLoopToAliasSetMap().find(L);
2193  if (ASTIt == LICM.getLoopToAliasSetMap().end())
2194  return;
2195 
2196  ASTIt->second->deleteValue(V);
2197 }
2198 
2199 /// Simple Analysis hook. Delete value L from alias set map.
2200 ///
2201 void LegacyLICMPass::deleteAnalysisLoop(Loop *L) {
2202  if (!LICM.getLoopToAliasSetMap().count(L))
2203  return;
2204 
2205  LICM.getLoopToAliasSetMap().erase(L);
2206 }
2207 
2209  AliasSetTracker *CurAST, Loop *CurLoop,
2210  AliasAnalysis *AA) {
2211  // First check to see if any of the basic blocks in CurLoop invalidate *V.
2212  bool isInvalidatedAccordingToAST = CurAST->getAliasSetFor(MemLoc).isMod();
2213 
2214  if (!isInvalidatedAccordingToAST || !LICMN2Theshold)
2215  return isInvalidatedAccordingToAST;
2216 
2217  // Check with a diagnostic analysis if we can refine the information above.
2218  // This is to identify the limitations of using the AST.
2219  // The alias set mechanism used by LICM has a major weakness in that it
2220  // combines all things which may alias into a single set *before* asking
2221  // modref questions. As a result, a single readonly call within a loop will
2222  // collapse all loads and stores into a single alias set and report
2223  // invalidation if the loop contains any store. For example, readonly calls
2224  // with deopt states have this form and create a general alias set with all
2225  // loads and stores. In order to get any LICM in loops containing possible
2226  // deopt states we need a more precise invalidation of checking the mod ref
2227  // info of each instruction within the loop and LI. This has a complexity of
2228  // O(N^2), so currently, it is used only as a diagnostic tool since the
2229  // default value of LICMN2Threshold is zero.
2230 
2231  // Don't look at nested loops.
2232  if (CurLoop->begin() != CurLoop->end())
2233  return true;
2234 
2235  int N = 0;
2236  for (BasicBlock *BB : CurLoop->getBlocks())
2237  for (Instruction &I : *BB) {
2238  if (N >= LICMN2Theshold) {
2239  LLVM_DEBUG(dbgs() << "Alasing N2 threshold exhausted for "
2240  << *(MemLoc.Ptr) << "\n");
2241  return true;
2242  }
2243  N++;
2244  auto Res = AA->getModRefInfo(&I, MemLoc);
2245  if (isModSet(Res)) {
2246  LLVM_DEBUG(dbgs() << "Aliasing failed on " << I << " for "
2247  << *(MemLoc.Ptr) << "\n");
2248  return true;
2249  }
2250  }
2251  LLVM_DEBUG(dbgs() << "Aliasing okay for " << *(MemLoc.Ptr) << "\n");
2252  return false;
2253 }
2254 
2256  Loop *CurLoop,
2257  SinkAndHoistLICMFlags &Flags) {
2259  // See declaration of SetLicmMssaOptCap for usage details.
2260  if (Flags.LicmMssaOptCounter >= Flags.LicmMssaOptCap)
2261  Source = MU->getDefiningAccess();
2262  else {
2263  Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(MU);
2264  Flags.LicmMssaOptCounter++;
2265  }
2266  return !MSSA->isLiveOnEntryDef(Source) &&
2267  CurLoop->contains(Source->getBlock());
2268 }
2269 
2270 /// Little predicate that returns true if the specified basic block is in
2271 /// a subloop of the current one, not the current one itself.
2272 ///
2273 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI) {
2274  assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop");
2275  return LI->getLoopFor(BB) != CurLoop;
2276 }
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:80
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, AliasSetTracker *AST, MemorySSAUpdater *MSSAU)
Definition: LICM.cpp:1405
uint64_t CallInst * C
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:67
bool canSplitPredecessors() const
Definition: BasicBlock.cpp:371
void insertInstructionTo(const Instruction *Inst, const BasicBlock *BB)
Inform the safety info that we are planning to insert a new instruction Inst into the basic block BB...
Definition: MustExecute.cpp:85
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:645
void ReplaceInstWithInst(BasicBlock::InstListType &BIL, BasicBlock::iterator &BI, Instruction *I)
Replace the instruction specified by BI with the instruction specified by I.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
bool promoteLoopAccessesToScalars(const SmallSetVector< Value *, 8 > &, SmallVectorImpl< BasicBlock *> &, SmallVectorImpl< Instruction *> &, SmallVectorImpl< MemoryAccess *> &, PredIteratorCache &, LoopInfo *, DominatorTree *, const TargetLibraryInfo *, Loop *, AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *, OptimizationRemarkEmitter *)
Try to promote memory values to scalars by sinking stores out of the loop and moving loads to before ...
Definition: LICM.cpp:1840
static bool isTriviallyReplaceablePHI(const PHINode &PN, const Instruction &I)
Returns true if a PHINode is a trivially replaceable with an Instruction.
Definition: LICM.cpp:1261
Helper class for SSA formation on a set of values defined in multiple blocks.
Definition: SSAUpdater.h:38
Diagnostic information for missed-optimization remarks.
DILocation * get() const
Get the underlying DILocation.
Definition: DebugLoc.cpp:21
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Value * getPointerOperand(Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
DiagnosticInfoOptimizationBase::Argument NV
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:82
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:776
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:139
This class represents lattice values for constants.
Definition: AllocatorList.h:23
This header provides classes for managing a pipeline of passes over loops in LLVM IR...
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Definition: MemorySSA.h:257
This is the interface for a simple mod/ref and alias analysis over globals.
const_iterator end() const
EltTy front() const
bool hasMetadataOtherThanDebugLoc() const
Return true if this instruction has metadata attached to it other than a debug location.
Definition: Instruction.h:228
void dropUnknownNonDebugMetadata(ArrayRef< unsigned > KnownIDs)
Drop all unknown metadata except for debug locations.
Definition: Metadata.cpp:1195
iterator end()
Definition: Function.h:663
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
static constexpr LocationSize unknown()
bool sinkRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *, TargetLibraryInfo *, TargetTransformInfo *, Loop *, AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *)
Walk the specified region of the CFG (defined by all blocks dominated by the specified block...
Definition: LICM.cpp:483
bool hasDedicatedExits() const
Return true if no exit block for the loop has a predecessor that is outside the loop.
Definition: LoopInfoImpl.h:85
bool isLCSSAForm(DominatorTree &DT) const
Return true if the Loop is in LCSSA form.
Definition: LoopInfo.cpp:195
const AccessList * getBlockAccesses(const BasicBlock *BB) const
Return the list of MemoryAccess&#39;s for a given basic block.
Definition: MemorySSA.h:758
void AddAvailableValue(BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value...
Definition: SSAUpdater.cpp:71
The main scalar evolution driver.
static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT, LoopInfo *LI, const Loop *CurLoop, LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU)
Definition: LICM.cpp:1463
This class represents a function call, abstracting a target machine&#39;s calling convention.
This file contains the declarations for metadata subclasses.
void setOrdering(AtomicOrdering Ordering)
Sets the ordering constraint of this load instruction.
Definition: Instructions.h:253
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:173
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:629
bool salvageDebugInfo(Instruction &I)
Assuming the instruction I is going to be deleted, attempt to salvage debug users of I by writing the...
Definition: Local.cpp:1624
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:709
void insertUse(MemoryUse *Use)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1185
BasicBlock * getSuccessor(unsigned i) const
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop, const TargetTransformInfo *TTI)
Return true if the instruction is free in the loop.
Definition: LICM.cpp:1270
STATISTIC(NumFunctions, "Total number of functions")
A debug info location.
Definition: DebugLoc.h:33
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:230
F(f)
An instruction for reading from memory.
Definition: Instructions.h:167
Hexagon Common GEP
Value * getCondition() const
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:137
iv Induction Variable Users
Definition: IVUsers.cpp:51
bool hoistRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *, TargetLibraryInfo *, Loop *, AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *)
Walk the specified region of the CFG (defined by all blocks dominated by the specified block...
Definition: LICM.cpp:784
void reserve(size_type N)
Definition: SmallVector.h:369
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
Definition: TinyPtrVector.h:30
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:32
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:299
op_iterator op_begin()
Definition: User.h:229
bool hasLoopInvariantOperands(const Instruction *I) const
Return true if all the operands of the specified instruction are loop invariant.
Definition: LoopInfo.cpp:65
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
Definition: LoopInfoImpl.h:703
static bool pointerInvalidatedByLoop(MemoryLocation MemLoc, AliasSetTracker *CurAST, Loop *CurLoop, AliasAnalysis *AA)
Definition: LICM.cpp:2208
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:268
Loop Invariant Code Motion
Definition: LICM.cpp:308
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1218
bool formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
Definition: LCSSA.cpp:385
Represents read-only accesses to memory.
Definition: MemorySSA.h:319
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:47
AnalysisUsage & addRequired()
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:133
ArrayRef< BasicBlock * > get(BasicBlock *BB)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:963
virtual void computeLoopSafetyInfo(const Loop *CurLoop)
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
Definition: MustExecute.cpp:71
This is the interface for a SCEV-based alias analysis.
MemorySSA * getMemorySSA() const
Get handle on MemorySSA.
Type * getPointerElementType() const
Definition: Type.h:375
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:697
This implementation of LoopSafetyInfo use ImplicitControlFlowTracking to give precise answers on "may...
Definition: MustExecute.h:123
bool hasAllowReciprocal() const
Determine whether the allow-reciprocal flag is set.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
virtual bool doFinalization(Module &)
doFinalization - Virtual method overriden by subclasses to do any necessary clean up after all passes...
Definition: Pass.h:109
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:703
uint32_t getTagID() const
Return the tag of this operand bundle as an integer.
Definition: InstrTypes.h:1005
const DefsList * getBlockDefs(const BasicBlock *BB) const
Return the list of MemoryDef&#39;s and MemoryPhi&#39;s for a given basic block.
Definition: MemorySSA.h:766
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:285
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:952
bool remove(const value_type &X)
Remove an item from the set vector.
Definition: SetVector.h:157
LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:126
virtual bool anyBlockMayThrow() const
Returns true iff any block of the loop for which this info is contains an instruction that may throw ...
Definition: MustExecute.cpp:67
BlockT * getHeader() const
Definition: LoopInfo.h:100
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:102
FunctionModRefBehavior getModRefBehavior(const CallBase *Call)
Return the behavior of the given call site.
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:266
static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU, Loop *CurLoop, SinkAndHoistLICMFlags &Flags)
Definition: LICM.cpp:2255
INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false, false) INITIALIZE_PASS_END(LegacyLICMPass
Instruction * clone() const
Create a copy of &#39;this&#39; instruction that is identical in all ways except the following: ...
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref&#39;ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:720
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
Definition: LoopInfoImpl.h:250
PredIteratorCache - This class is an extremely trivial cache for predecessor iterator queries...
FunctionModRefBehavior
Summary of how a function affects memory in the program.
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:268
Instruction * getUniqueInstruction()
If this alias set is known to contain a single instruction and only a single unique instruction...
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:82
This class represents a no-op cast from one type to another.
Memory SSA
Definition: MemorySSA.cpp:64
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
An instruction for storing to memory.
Definition: Instructions.h:320
static cl::opt< bool > DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false), cl::desc("Disable memory promotion in LICM pass"))
Memory promotion is enabled by default.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:429
void copyColors(BasicBlock *New, BasicBlock *Old)
Copy colors of block Old into the block New.
Definition: MustExecute.cpp:29
iterator begin()
Definition: Function.h:661
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
Value * getOperand(unsigned i) const
Definition: User.h:169
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:210
void forgetLoopDispositions(const Loop *L)
Called when the client has changed the disposition of values in this loop.
Interval::succ_iterator succ_end(Interval *I)
Definition: Interval.h:105
bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal=false)
Checks whether the given location points to constant memory, or if OrLocal is true whether it points ...
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:873
void getAAMetadata(AAMDNodes &N, bool Merge=false) const
Fills the AAMDNodes structure with AA metadata from this instruction.
NodeT * getBlock() const
static Instruction * sinkThroughTriviallyReplaceablePHI(PHINode *TPN, Instruction *I, LoopInfo *LI, SmallDenseMap< BasicBlock *, Instruction *, 32 > &SunkCopies, const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop, MemorySSAUpdater *MSSAU)
Definition: LICM.cpp:1427
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock *> Preds, const char *Suffix, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
virtual bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
static cl::opt< int > LICMN2Theshold("licm-n2-threshold", cl::Hidden, cl::init(0), cl::desc("How many instruction to cross product using AA"))
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:189
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:216
virtual bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const =0
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:318
MemorySSAWalker * getSkipSelfWalker()
Definition: MemorySSA.cpp:1564
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
Definition: SetOperations.h:39
void setAAMetadata(const AAMDNodes &N)
Sets the metadata on this instruction from the AAMDNodes structure.
Definition: Metadata.cpp:1261
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
Conditional or Unconditional Branch instruction.
size_t size(BasicBlock *BB) const
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node&#39;s...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:91
This file contains the declarations for the subclasses of Constant, which represent the different fla...
const Instruction & front() const
Definition: BasicBlock.h:280
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
bool mayHaveSideEffects() const
Return true if the instruction may have side effects.
Definition: Instruction.h:572
Expected to fold away in lowering.
Constant * ConstantFoldInstruction(Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstruction - Try to constant fold the specified instruction.
Diagnostic information for applied optimization remarks.
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:112
Pass * createLICMPass()
Definition: LICM.cpp:311
Represent the analysis usage information of a pass.
op_iterator op_end()
Definition: User.h:231
cl::opt< unsigned > SetLicmMssaOptCap
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1192
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition: LICM.cpp:276
AliasSet & getAliasSetFor(const MemoryLocation &MemLoc)
Return the alias set which contains the specified memory location.
Interval::pred_iterator pred_end(Interval *I)
Definition: Interval.h:115
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:99
static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo)
Definition: LICM.cpp:1445
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
auto find_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1213
bool isMod() const
void setAlignment(unsigned Align)
bool onlyReadsMemory(const CallBase *Call)
Checks if the specified call is known to only read from non-volatile memory (or not access memory at ...
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1424
MemoryAccess * createMemoryAccessInBB(Instruction *I, MemoryAccess *Definition, const BasicBlock *BB, MemorySSA::InsertionPlace Point)
Create a MemoryAccess in MemorySSA at a specified point in a block, with a specified clobbering defin...
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT, Loop *CurLoop)
Definition: LICM.cpp:970
size_t size() const
Definition: SmallVector.h:52
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Value * GetUnderlyingObject(Value *V, const DataLayout &DL, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value...
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
This function does not perform any non-local loads or stores to memory.
void moveToPlace(MemoryUseOrDef *What, BasicBlock *BB, MemorySSA::InsertionPlace Where)
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:59
static cl::opt< uint32_t > MaxNumUsesTraversed("licm-max-num-uses-traversed", cl::Hidden, cl::init(8), cl::desc("Max num uses visited for identifying load " "invariance in loop using invariant start (default = 8)"))
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:50
const Value * Ptr
The address of the start of the location.
Representation for a specific memory location.
size_type size() const
Definition: SmallPtrSet.h:92
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:333
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:110
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:297
static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE)
When an instruction is found to only use loop invariant operands that is safe to hoist, this instruction is called to do the dirty work.
Definition: LICM.cpp:1636
Iterator for intrusive lists based on ilist_node.
void removeInstruction(const Instruction *Inst)
Inform safety info that we are planning to remove the instruction Inst from its block.
Definition: MustExecute.cpp:91
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
bool doesNotWriteMemoryBefore(const BasicBlock *BB, const Loop *CurLoop) const
Returns true if we could not execute a memory-modifying instruction before we enter BB under assumpti...
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file. ...
#define DEBUG_TYPE
Definition: LICM.cpp:79
BlockVerifier::State From
static cl::opt< bool > ControlFlowHoisting("licm-control-flow-hoisting", cl::Hidden, cl::init(false), cl::desc("Enable control flow (and PHI) hoisting in LICM"))
void verifyMemorySSA() const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition: MemorySSA.cpp:1848
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false...
Definition: SmallPtrSet.h:377
iterator end()
Definition: BasicBlock.h:270
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:1160
Helper class for promoting a collection of loads and stores into SSA Form using the SSAUpdater...
Definition: SSAUpdater.h:136
void copyValue(Value *From, Value *To)
This method should be used whenever a preexisting value in the program is copied or cloned...
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:248
Provides information about what library functions are available for the current target.
iterator begin() const
Definition: LoopInfo.h:142
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:749
void wireOldPredecessorsToNewImmediatePredecessor(BasicBlock *Old, BasicBlock *New, ArrayRef< BasicBlock *> Preds, bool IdenticalEdgesWereMerged=true)
A new empty BasicBlock (New) now branches directly to Old.
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:643
BasicBlock * getBlock() const
Definition: MemorySSA.h:159
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
bool isConditional() const
static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, TargetTransformInfo *TTI, bool &FreeInLoop)
Return true if the only users of this instruction are outside of the loop.
Definition: LICM.cpp:1298
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:124
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
Definition: Constants.cpp:694
unsigned getNumIncomingValues() const
Return the number of incoming edges.
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental.guard intrinsic.
Definition: GuardUtils.cpp:17
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:55
Class that has the common methods + fields of memory uses/defs.
Definition: MemorySSA.h:247
A lightweight accessor for an operand bundle meant to be passed around by value.
Definition: InstrTypes.h:977
iterator_range< user_iterator > users()
Definition: Value.h:399
static bool isSafeToExecuteUnconditionally(Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE, const Instruction *CtxI=nullptr)
Only sink or hoist an instruction if it is not a trapping instruction, or if the instruction is known...
Definition: LICM.cpp:1679
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:593
LoopT * getParentLoop() const
Definition: LoopInfo.h:101
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
Definition: LoopInfo.h:131
LLVM_NODISCARD bool isModSet(const ModRefInfo MRI)
void setOrdering(AtomicOrdering Ordering)
Sets the ordering constraint of this store instruction.
Definition: Instructions.h:378
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
static bool onlyAccessesArgPointees(FunctionModRefBehavior MRB)
Checks if functions with the specified behavior are known to read and write at most from objects poin...
iterator insert(iterator where, pointer New)
Definition: ilist.h:226
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:321
cl::opt< unsigned > SetLicmMssaNoAccForPromotionCap
iterator begin() const
Definition: SmallPtrSet.h:396
int getUserCost(const User *U, ArrayRef< const Value *> Operands) const
Estimate the cost of a given IR user when lowered.
cl::opt< bool > EnableMSSALoopDependency
Enables memory ssa as a dependency for loop passes.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:465
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:149
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
unsigned pred_size(const BasicBlock *BB)
Get the number of predecessors of BB.
Definition: CFG.h:121
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:72
user_iterator_impl< User > user_iterator
Definition: Value.h:368
bool isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast=false)
Tests if a value is a call or invoke to a library function that allocates memory (either malloc...
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass&#39;s AnalysisUsage.
Definition: LoopUtils.cpp:136
licm
Definition: LICM.cpp:308
MemoryUseOrDef * createMemoryAccessAfter(Instruction *I, MemoryAccess *Definition, MemoryAccess *InsertPt)
iterator end() const
Definition: LoopInfo.h:143
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Definition: MemorySSA.h:738
Captures loop safety information.
Definition: MustExecute.h:47
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:171
const_iterator begin() const
Wrapper class to LoopBlocksDFS that provides a standard begin()/end() interface for the DFS reverse p...
Definition: LoopIterator.h:172
SmallVector< DomTreeNode *, 16 > collectChildrenInLoop(DomTreeNode *N, const Loop *CurLoop)
Does a BFS from a given node to all of its children inside a given loop.
Definition: LoopUtils.cpp:432
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:290
MemoryAccess * getClobberingMemoryAccess(const Instruction *I)
Given a memory Mod/Ref/ModRef&#39;ing instruction, calling this will give you the nearest dominating Memo...
Definition: MemorySSA.h:1023
static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI)
Little predicate that returns true if the specified basic block is in a subloop of the current one...
Definition: LICM.cpp:2273
void removeMemoryAccess(MemoryAccess *, bool OptimizePhis=false)
Remove a MemoryAccess from MemorySSA, including updating all definitions and uses.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
Definition: Value.h:375
bool isSafeToSpeculativelyExecute(const Value *V, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
void insertDef(MemoryDef *Def, bool RenameUses=false)
Insert a definition into the MemorySSA IR.
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction has no side ef...
Definition: Local.cpp:353
LLVM Value Representation.
Definition: Value.h:72
void setAlignment(unsigned Align)
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:444
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:86
void initializeLegacyLICMPassPass(PassRegistry &)
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:583
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU, bool TargetExecutesOncePerLoop, SinkAndHoistLICMFlags *LICMFlags=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Returns true if is legal to hoist or sink this instruction disregarding the possible introduction of ...
Definition: LICM.cpp:1069
The legacy pass manager&#39;s analysis pass to compute loop information.
Definition: LoopInfo.h:977
void getUniqueExitBlocks(SmallVectorImpl< BlockT *> &ExitBlocks) const
Return all unique successor blocks of this loop.
Definition: LoopInfoImpl.h:99
This is the interface for LLVM&#39;s primary stateless and local alias analysis.
static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE)
When an instruction is found to only be used outside of the loop, this function moves it to the exit ...
Definition: LICM.cpp:1535
A container for analyses that lazily runs them and caches their results.
static Instruction * CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI, const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU)
Definition: LICM.cpp:1330
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:259
bool isDereferenceableAndAlignedPointer(const Value *V, unsigned Align, const DataLayout &DL, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr)
Returns true if V is always a dereferenceable pointer with alignment greater or equal than requested...
Definition: Loads.cpp:128
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopIterator.h:180
ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc)
getModRefInfo (for call sites) - Return information about whether a particular call site modifies or ...
#define LLVM_DEBUG(X)
Definition: Debug.h:122
op_range incoming_values()
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:156
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:173
The optimization diagnostic interface.
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
Definition: BasicBlock.cpp:120
bool use_empty() const
Definition: Value.h:322
unsigned size() const
void deleteValue(Value *PtrVal)
This method is used to remove a pointer value from the AliasSetTracker entirely.
loop versioning Loop Versioning For LICM
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:43
const BasicBlock * getParent() const
Definition: Instruction.h:66
static void moveInstructionBefore(Instruction &I, Instruction &Dest, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater *MSSAU)
Definition: LICM.cpp:1415
bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures, unsigned MaxUsesToExplore=DefaultMaxUsesToExplore)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
user_iterator user_end()
Definition: Value.h:383
bool mayReadOrWriteMemory() const
Return true if this instruction may read or write memory.
Definition: Instruction.h:532
const DenseMap< BasicBlock *, ColorVector > & getBlockColors() const
Returns block colors map that is used to update funclet operand bundles.
Definition: MustExecute.cpp:25