LLVM  9.0.0svn
LICM.cpp
Go to the documentation of this file.
1 //===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass performs loop invariant code motion, attempting to remove as much
10 // code from the body of a loop as possible. It does this by either hoisting
11 // code into the preheader block, or by sinking code to the exit blocks if it is
12 // safe. This pass also promotes must-aliased memory locations in the loop to
13 // live in registers, thus hoisting and sinking "invariant" loads and stores.
14 //
15 // This pass uses alias analysis for two purposes:
16 //
17 // 1. Moving loop invariant loads and calls out of loops. If we can determine
18 // that a load or call inside of a loop never aliases anything stored to,
19 // we can hoist it or sink it like any other instruction.
20 // 2. Scalar Promotion of Memory - If there is a store instruction inside of
21 // the loop, we try to move the store to happen AFTER the loop instead of
22 // inside of the loop. This can only happen if a few conditions are true:
23 // A. The pointer stored through is loop invariant
24 // B. There are no stores or loads in the loop which _may_ alias the
25 // pointer. There are no calls in the loop which mod/ref the pointer.
26 // If these conditions are true, we can promote the loads and stores in the
27 // loop of the pointer to use a temporary alloca'd variable. We then use
28 // the SSAUpdater to construct the appropriate SSA form for the value.
29 //
30 //===----------------------------------------------------------------------===//
31 
33 #include "llvm/ADT/SetOperations.h"
34 #include "llvm/ADT/Statistic.h"
42 #include "llvm/Analysis/Loads.h"
43 #include "llvm/Analysis/LoopInfo.h"
45 #include "llvm/Analysis/LoopPass.h"
54 #include "llvm/IR/CFG.h"
55 #include "llvm/IR/Constants.h"
56 #include "llvm/IR/DataLayout.h"
57 #include "llvm/IR/DerivedTypes.h"
58 #include "llvm/IR/Dominators.h"
59 #include "llvm/IR/Instructions.h"
60 #include "llvm/IR/IntrinsicInst.h"
61 #include "llvm/IR/LLVMContext.h"
62 #include "llvm/IR/Metadata.h"
63 #include "llvm/IR/PatternMatch.h"
66 #include "llvm/Support/Debug.h"
68 #include "llvm/Transforms/Scalar.h"
74 #include <algorithm>
75 #include <utility>
76 using namespace llvm;
77 
78 #define DEBUG_TYPE "licm"
79 
80 STATISTIC(NumCreatedBlocks, "Number of blocks created");
81 STATISTIC(NumClonedBranches, "Number of branches cloned");
82 STATISTIC(NumSunk, "Number of instructions sunk out of loop");
83 STATISTIC(NumHoisted, "Number of instructions hoisted out of loop");
84 STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk");
85 STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk");
86 STATISTIC(NumPromoted, "Number of memory locations promoted to registers");
87 
88 /// Memory promotion is enabled by default.
89 static cl::opt<bool>
90  DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false),
91  cl::desc("Disable memory promotion in LICM pass"));
92 
94  "licm-control-flow-hoisting", cl::Hidden, cl::init(false),
95  cl::desc("Enable control flow (and PHI) hoisting in LICM"));
96 
98  "licm-max-num-uses-traversed", cl::Hidden, cl::init(8),
99  cl::desc("Max num uses visited for identifying load "
100  "invariance in loop using invariant start (default = 8)"));
101 
102 // Default value of zero implies we use the regular alias set tracker mechanism
103 // instead of the cross product using AA to identify aliasing of the memory
104 // location we are interested in.
105 static cl::opt<int>
106 LICMN2Theshold("licm-n2-threshold", cl::Hidden, cl::init(0),
107  cl::desc("How many instruction to cross product using AA"));
108 
109 // Experimental option to allow imprecision in LICM in pathological cases, in
110 // exchange for faster compile. This is to be removed if MemorySSA starts to
111 // address the same issue. This flag applies only when LICM uses MemorySSA
112 // instead on AliasSetTracker. LICM calls MemorySSAWalker's
113 // getClobberingMemoryAccess, up to the value of the Cap, getting perfect
114 // accuracy. Afterwards, LICM will call into MemorySSA's getDefiningAccess,
115 // which may not be precise, since optimizeUses is capped. The result is
116 // correct, but we may not get as "far up" as possible to get which access is
117 // clobbering the one queried.
119  "licm-mssa-optimization-cap", cl::init(100), cl::Hidden,
120  cl::desc("Enable imprecision in LICM in pathological cases, in exchange "
121  "for faster compile. Caps the MemorySSA clobbering calls."));
122 
123 // Experimentally, memory promotion carries less importance than sinking and
124 // hoisting. Limit when we do promotion when using MemorySSA, in order to save
125 // compile time.
127  "max-acc-licm-promotion", cl::init(250), cl::Hidden,
128  cl::desc("[LICM & MemorySSA] When MSSA in LICM is disabled, this has no "
129  "effect. When MSSA in LICM is enabled, then this is the maximum "
130  "number of accesses allowed to be present in a loop in order to "
131  "enable memory promotion."));
132 
133 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
134 static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
135  const LoopSafetyInfo *SafetyInfo,
136  TargetTransformInfo *TTI, bool &FreeInLoop);
137 static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
138  BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
140 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
141  const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
143  bool FreeInLoop);
145  const DominatorTree *DT,
146  const Loop *CurLoop,
147  const LoopSafetyInfo *SafetyInfo,
149  const Instruction *CtxI = nullptr);
150 static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,
151  AliasSetTracker *CurAST, Loop *CurLoop,
152  AliasAnalysis *AA);
154  Loop *CurLoop,
155  int &LicmMssaOptCounter);
157  Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
158  const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU);
159 
160 static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
161  AliasSetTracker *AST, MemorySSAUpdater *MSSAU);
162 
163 static void moveInstructionBefore(Instruction &I, Instruction &Dest,
164  ICFLoopSafetyInfo &SafetyInfo,
165  MemorySSAUpdater *MSSAU);
166 
167 namespace {
168 struct LoopInvariantCodeMotion {
170  bool runOnLoop(Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT,
172  ScalarEvolution *SE, MemorySSA *MSSA,
173  OptimizationRemarkEmitter *ORE, bool DeleteAST);
174 
175  ASTrackerMapTy &getLoopToAliasSetMap() { return LoopToAliasSetMap; }
176 
177 private:
178  ASTrackerMapTy LoopToAliasSetMap;
179 
180  std::unique_ptr<AliasSetTracker>
181  collectAliasInfoForLoop(Loop *L, LoopInfo *LI, AliasAnalysis *AA);
182  std::unique_ptr<AliasSetTracker>
183  collectAliasInfoForLoopWithMSSA(Loop *L, AliasAnalysis *AA,
184  MemorySSAUpdater *MSSAU);
185 };
186 
187 struct LegacyLICMPass : public LoopPass {
188  static char ID; // Pass identification, replacement for typeid
189  LegacyLICMPass() : LoopPass(ID) {
191  }
192 
193  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
194  if (skipLoop(L)) {
195  // If we have run LICM on a previous loop but now we are skipping
196  // (because we've hit the opt-bisect limit), we need to clear the
197  // loop alias information.
198  LICM.getLoopToAliasSetMap().clear();
199  return false;
200  }
201 
202  auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
204  ? (&getAnalysis<MemorySSAWrapperPass>().getMSSA())
205  : nullptr;
206  // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
207  // pass. Function analyses need to be preserved across loop transformations
208  // but ORE cannot be preserved (see comment before the pass definition).
210  return LICM.runOnLoop(L,
211  &getAnalysis<AAResultsWrapperPass>().getAAResults(),
212  &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
213  &getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
214  &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
215  &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
216  *L->getHeader()->getParent()),
217  SE ? &SE->getSE() : nullptr, MSSA, &ORE, false);
218  }
219 
220  /// This transformation requires natural loop information & requires that
221  /// loop preheaders be inserted into the CFG...
222  ///
223  void getAnalysisUsage(AnalysisUsage &AU) const override {
230  }
233  }
234 
236 
237  bool doFinalization() override {
238  assert(LICM.getLoopToAliasSetMap().empty() &&
239  "Didn't free loop alias sets");
240  return false;
241  }
242 
243 private:
244  LoopInvariantCodeMotion LICM;
245 
246  /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
247  void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To,
248  Loop *L) override;
249 
250  /// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias
251  /// set.
252  void deleteAnalysisValue(Value *V, Loop *L) override;
253 
254  /// Simple Analysis hook. Delete loop L from alias set map.
255  void deleteAnalysisLoop(Loop *L) override;
256 };
257 } // namespace
258 
261  const auto &FAM =
262  AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
263  Function *F = L.getHeader()->getParent();
264 
265  auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F);
266  // FIXME: This should probably be optional rather than required.
267  if (!ORE)
268  report_fatal_error("LICM: OptimizationRemarkEmitterAnalysis not "
269  "cached at a higher level");
270 
271  LoopInvariantCodeMotion LICM;
272  if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.TTI, &AR.SE,
273  AR.MSSA, ORE, true))
274  return PreservedAnalyses::all();
275 
276  auto PA = getLoopPassPreservedAnalyses();
277 
278  PA.preserve<DominatorTreeAnalysis>();
279  PA.preserve<LoopAnalysis>();
280 
281  return PA;
282 }
283 
284 char LegacyLICMPass::ID = 0;
285 INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion",
286  false, false)
291 INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false,
292  false)
293 
294 Pass *llvm::createLICMPass() { return new LegacyLICMPass(); }
295 
296 /// Hoist expressions out of the specified loop. Note, alias info for inner
297 /// loop is not preserved so it is not a good idea to run LICM multiple
298 /// times on one loop.
299 /// We should delete AST for inner loops in the new pass manager to avoid
300 /// memory leak.
301 ///
302 bool LoopInvariantCodeMotion::runOnLoop(
303  Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT,
305  MemorySSA *MSSA, OptimizationRemarkEmitter *ORE, bool DeleteAST) {
306  bool Changed = false;
307 
308  assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
309 
310  std::unique_ptr<AliasSetTracker> CurAST;
311  std::unique_ptr<MemorySSAUpdater> MSSAU;
312  bool NoOfMemAccTooLarge = false;
313  int LicmMssaOptCounter = 0;
314 
315  if (!MSSA) {
316  LLVM_DEBUG(dbgs() << "LICM: Using Alias Set Tracker.\n");
317  CurAST = collectAliasInfoForLoop(L, LI, AA);
318  } else {
319  LLVM_DEBUG(dbgs() << "LICM: Using MemorySSA.\n");
320  MSSAU = make_unique<MemorySSAUpdater>(MSSA);
321 
322  unsigned AccessCapCount = 0;
323  for (auto *BB : L->getBlocks()) {
324  if (auto *Accesses = MSSA->getBlockAccesses(BB)) {
325  for (const auto &MA : *Accesses) {
326  (void)MA;
327  AccessCapCount++;
328  if (AccessCapCount > AccessCapForMSSAPromotion) {
329  NoOfMemAccTooLarge = true;
330  break;
331  }
332  }
333  }
334  if (NoOfMemAccTooLarge)
335  break;
336  }
337  }
338 
339  // Get the preheader block to move instructions into...
340  BasicBlock *Preheader = L->getLoopPreheader();
341 
342  // Compute loop safety information.
343  ICFLoopSafetyInfo SafetyInfo(DT);
344  SafetyInfo.computeLoopSafetyInfo(L);
345 
346  // We want to visit all of the instructions in this loop... that are not parts
347  // of our subloops (they have already had their invariants hoisted out of
348  // their loop, into this loop, so there is no need to process the BODIES of
349  // the subloops).
350  //
351  // Traverse the body of the loop in depth first order on the dominator tree so
352  // that we are guaranteed to see definitions before we see uses. This allows
353  // us to sink instructions in one pass, without iteration. After sinking
354  // instructions, we perform another pass to hoist them out of the loop.
355  //
356  if (L->hasDedicatedExits())
357  Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
358  CurAST.get(), MSSAU.get(), &SafetyInfo,
359  NoOfMemAccTooLarge, LicmMssaOptCounter, ORE);
360  if (Preheader)
361  Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
362  CurAST.get(), MSSAU.get(), &SafetyInfo,
363  NoOfMemAccTooLarge, LicmMssaOptCounter, ORE);
364 
365  // Now that all loop invariants have been removed from the loop, promote any
366  // memory references to scalars that we can.
367  // Don't sink stores from loops without dedicated block exits. Exits
368  // containing indirect branches are not transformed by loop simplify,
369  // make sure we catch that. An additional load may be generated in the
370  // preheader for SSA updater, so also avoid sinking when no preheader
371  // is available.
372  if (!DisablePromotion && Preheader && L->hasDedicatedExits() &&
373  !NoOfMemAccTooLarge) {
374  // Figure out the loop exits and their insertion points
375  SmallVector<BasicBlock *, 8> ExitBlocks;
376  L->getUniqueExitBlocks(ExitBlocks);
377 
378  // We can't insert into a catchswitch.
379  bool HasCatchSwitch = llvm::any_of(ExitBlocks, [](BasicBlock *Exit) {
380  return isa<CatchSwitchInst>(Exit->getTerminator());
381  });
382 
383  if (!HasCatchSwitch) {
385  SmallVector<MemoryAccess *, 8> MSSAInsertPts;
386  InsertPts.reserve(ExitBlocks.size());
387  if (MSSAU)
388  MSSAInsertPts.reserve(ExitBlocks.size());
389  for (BasicBlock *ExitBlock : ExitBlocks) {
390  InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
391  if (MSSAU)
392  MSSAInsertPts.push_back(nullptr);
393  }
394 
395  PredIteratorCache PIC;
396 
397  bool Promoted = false;
398 
399  // Build an AST using MSSA.
400  if (!CurAST.get())
401  CurAST = collectAliasInfoForLoopWithMSSA(L, AA, MSSAU.get());
402 
403  // Loop over all of the alias sets in the tracker object.
404  for (AliasSet &AS : *CurAST) {
405  // We can promote this alias set if it has a store, if it is a "Must"
406  // alias set, if the pointer is loop invariant, and if we are not
407  // eliminating any volatile loads or stores.
408  if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
409  !L->isLoopInvariant(AS.begin()->getValue()))
410  continue;
411 
412  assert(
413  !AS.empty() &&
414  "Must alias set should have at least one pointer element in it!");
415 
416  SmallSetVector<Value *, 8> PointerMustAliases;
417  for (const auto &ASI : AS)
418  PointerMustAliases.insert(ASI.getValue());
419 
420  Promoted |= promoteLoopAccessesToScalars(
421  PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI,
422  DT, TLI, L, CurAST.get(), MSSAU.get(), &SafetyInfo, ORE);
423  }
424 
425  // Once we have promoted values across the loop body we have to
426  // recursively reform LCSSA as any nested loop may now have values defined
427  // within the loop used in the outer loop.
428  // FIXME: This is really heavy handed. It would be a bit better to use an
429  // SSAUpdater strategy during promotion that was LCSSA aware and reformed
430  // it as it went.
431  if (Promoted)
432  formLCSSARecursively(*L, *DT, LI, SE);
433 
434  Changed |= Promoted;
435  }
436  }
437 
438  // Check that neither this loop nor its parent have had LCSSA broken. LICM is
439  // specifically moving instructions across the loop boundary and so it is
440  // especially in need of sanity checking here.
441  assert(L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!");
442  assert((!L->getParentLoop() || L->getParentLoop()->isLCSSAForm(*DT)) &&
443  "Parent loop not left in LCSSA form after LICM!");
444 
445  // If this loop is nested inside of another one, save the alias information
446  // for when we process the outer loop.
447  if (!MSSAU.get() && CurAST.get() && L->getParentLoop() && !DeleteAST)
448  LoopToAliasSetMap[L] = std::move(CurAST);
449 
450  if (MSSAU.get() && VerifyMemorySSA)
451  MSSAU->getMemorySSA()->verifyMemorySSA();
452 
453  if (Changed && SE)
454  SE->forgetLoopDispositions(L);
455  return Changed;
456 }
457 
458 /// Walk the specified region of the CFG (defined by all blocks dominated by
459 /// the specified block, and that are in the current loop) in reverse depth
460 /// first order w.r.t the DominatorTree. This allows us to visit uses before
461 /// definitions, allowing us to sink a loop body in one pass without iteration.
462 ///
465  TargetTransformInfo *TTI, Loop *CurLoop,
466  AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
467  ICFLoopSafetyInfo *SafetyInfo, bool NoOfMemAccTooLarge,
468  int &LicmMssaOptCounter, OptimizationRemarkEmitter *ORE) {
469 
470  // Verify inputs.
471  assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
472  CurLoop != nullptr && SafetyInfo != nullptr &&
473  "Unexpected input to sinkRegion.");
474  assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) &&
475  "Either AliasSetTracker or MemorySSA should be initialized.");
476 
477  // We want to visit children before parents. We will enque all the parents
478  // before their children in the worklist and process the worklist in reverse
479  // order.
481 
482  bool Changed = false;
483  for (DomTreeNode *DTN : reverse(Worklist)) {
484  BasicBlock *BB = DTN->getBlock();
485  // Only need to process the contents of this block if it is not part of a
486  // subloop (which would already have been processed).
487  if (inSubLoop(BB, CurLoop, LI))
488  continue;
489 
490  for (BasicBlock::iterator II = BB->end(); II != BB->begin();) {
491  Instruction &I = *--II;
492 
493  // If the instruction is dead, we would try to sink it because it isn't
494  // used in the loop, instead, just delete it.
495  if (isInstructionTriviallyDead(&I, TLI)) {
496  LLVM_DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n');
497  salvageDebugInfo(I);
498  ++II;
499  eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
500  Changed = true;
501  continue;
502  }
503 
504  // Check to see if we can sink this instruction to the exit blocks
505  // of the loop. We can do this if the all users of the instruction are
506  // outside of the loop. In this case, it doesn't even matter if the
507  // operands of the instruction are loop invariant.
508  //
509  bool FreeInLoop = false;
510  if (isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) &&
511  canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true,
512  NoOfMemAccTooLarge, &LicmMssaOptCounter, ORE) &&
513  !I.mayHaveSideEffects()) {
514  if (sink(I, LI, DT, CurLoop, SafetyInfo, MSSAU, ORE, FreeInLoop)) {
515  if (!FreeInLoop) {
516  ++II;
517  eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
518  }
519  Changed = true;
520  }
521  }
522  }
523  }
524  if (MSSAU && VerifyMemorySSA)
525  MSSAU->getMemorySSA()->verifyMemorySSA();
526  return Changed;
527 }
528 
529 namespace {
530 // This is a helper class for hoistRegion to make it able to hoist control flow
531 // in order to be able to hoist phis. The way this works is that we initially
532 // start hoisting to the loop preheader, and when we see a loop invariant branch
533 // we make note of this. When we then come to hoist an instruction that's
534 // conditional on such a branch we duplicate the branch and the relevant control
535 // flow, then hoist the instruction into the block corresponding to its original
536 // block in the duplicated control flow.
537 class ControlFlowHoister {
538 private:
539  // Information about the loop we are hoisting from
540  LoopInfo *LI;
541  DominatorTree *DT;
542  Loop *CurLoop;
543  MemorySSAUpdater *MSSAU;
544 
545  // A map of blocks in the loop to the block their instructions will be hoisted
546  // to.
547  DenseMap<BasicBlock *, BasicBlock *> HoistDestinationMap;
548 
549  // The branches that we can hoist, mapped to the block that marks a
550  // convergence point of their control flow.
551  DenseMap<BranchInst *, BasicBlock *> HoistableBranches;
552 
553 public:
554  ControlFlowHoister(LoopInfo *LI, DominatorTree *DT, Loop *CurLoop,
555  MemorySSAUpdater *MSSAU)
556  : LI(LI), DT(DT), CurLoop(CurLoop), MSSAU(MSSAU) {}
557 
558  void registerPossiblyHoistableBranch(BranchInst *BI) {
559  // We can only hoist conditional branches with loop invariant operands.
560  if (!ControlFlowHoisting || !BI->isConditional() ||
561  !CurLoop->hasLoopInvariantOperands(BI))
562  return;
563 
564  // The branch destinations need to be in the loop, and we don't gain
565  // anything by duplicating conditional branches with duplicate successors,
566  // as it's essentially the same as an unconditional branch.
567  BasicBlock *TrueDest = BI->getSuccessor(0);
568  BasicBlock *FalseDest = BI->getSuccessor(1);
569  if (!CurLoop->contains(TrueDest) || !CurLoop->contains(FalseDest) ||
570  TrueDest == FalseDest)
571  return;
572 
573  // We can hoist BI if one branch destination is the successor of the other,
574  // or both have common successor which we check by seeing if the
575  // intersection of their successors is non-empty.
576  // TODO: This could be expanded to allowing branches where both ends
577  // eventually converge to a single block.
578  SmallPtrSet<BasicBlock *, 4> TrueDestSucc, FalseDestSucc;
579  TrueDestSucc.insert(succ_begin(TrueDest), succ_end(TrueDest));
580  FalseDestSucc.insert(succ_begin(FalseDest), succ_end(FalseDest));
581  BasicBlock *CommonSucc = nullptr;
582  if (TrueDestSucc.count(FalseDest)) {
583  CommonSucc = FalseDest;
584  } else if (FalseDestSucc.count(TrueDest)) {
585  CommonSucc = TrueDest;
586  } else {
587  set_intersect(TrueDestSucc, FalseDestSucc);
588  // If there's one common successor use that.
589  if (TrueDestSucc.size() == 1)
590  CommonSucc = *TrueDestSucc.begin();
591  // If there's more than one pick whichever appears first in the block list
592  // (we can't use the value returned by TrueDestSucc.begin() as it's
593  // unpredicatable which element gets returned).
594  else if (!TrueDestSucc.empty()) {
595  Function *F = TrueDest->getParent();
596  auto IsSucc = [&](BasicBlock &BB) { return TrueDestSucc.count(&BB); };
597  auto It = std::find_if(F->begin(), F->end(), IsSucc);
598  assert(It != F->end() && "Could not find successor in function");
599  CommonSucc = &*It;
600  }
601  }
602  // The common successor has to be dominated by the branch, as otherwise
603  // there will be some other path to the successor that will not be
604  // controlled by this branch so any phi we hoist would be controlled by the
605  // wrong condition. This also takes care of avoiding hoisting of loop back
606  // edges.
607  // TODO: In some cases this could be relaxed if the successor is dominated
608  // by another block that's been hoisted and we can guarantee that the
609  // control flow has been replicated exactly.
610  if (CommonSucc && DT->dominates(BI, CommonSucc))
611  HoistableBranches[BI] = CommonSucc;
612  }
613 
614  bool canHoistPHI(PHINode *PN) {
615  // The phi must have loop invariant operands.
616  if (!ControlFlowHoisting || !CurLoop->hasLoopInvariantOperands(PN))
617  return false;
618  // We can hoist phis if the block they are in is the target of hoistable
619  // branches which cover all of the predecessors of the block.
620  SmallPtrSet<BasicBlock *, 8> PredecessorBlocks;
621  BasicBlock *BB = PN->getParent();
622  for (BasicBlock *PredBB : predecessors(BB))
623  PredecessorBlocks.insert(PredBB);
624  // If we have less predecessor blocks than predecessors then the phi will
625  // have more than one incoming value for the same block which we can't
626  // handle.
627  // TODO: This could be handled be erasing some of the duplicate incoming
628  // values.
629  if (PredecessorBlocks.size() != pred_size(BB))
630  return false;
631  for (auto &Pair : HoistableBranches) {
632  if (Pair.second == BB) {
633  // Which blocks are predecessors via this branch depends on if the
634  // branch is triangle-like or diamond-like.
635  if (Pair.first->getSuccessor(0) == BB) {
636  PredecessorBlocks.erase(Pair.first->getParent());
637  PredecessorBlocks.erase(Pair.first->getSuccessor(1));
638  } else if (Pair.first->getSuccessor(1) == BB) {
639  PredecessorBlocks.erase(Pair.first->getParent());
640  PredecessorBlocks.erase(Pair.first->getSuccessor(0));
641  } else {
642  PredecessorBlocks.erase(Pair.first->getSuccessor(0));
643  PredecessorBlocks.erase(Pair.first->getSuccessor(1));
644  }
645  }
646  }
647  // PredecessorBlocks will now be empty if for every predecessor of BB we
648  // found a hoistable branch source.
649  return PredecessorBlocks.empty();
650  }
651 
652  BasicBlock *getOrCreateHoistedBlock(BasicBlock *BB) {
653  if (!ControlFlowHoisting)
654  return CurLoop->getLoopPreheader();
655  // If BB has already been hoisted, return that
656  if (HoistDestinationMap.count(BB))
657  return HoistDestinationMap[BB];
658 
659  // Check if this block is conditional based on a pending branch
660  auto HasBBAsSuccessor =
662  return BB != Pair.second && (Pair.first->getSuccessor(0) == BB ||
663  Pair.first->getSuccessor(1) == BB);
664  };
665  auto It = std::find_if(HoistableBranches.begin(), HoistableBranches.end(),
666  HasBBAsSuccessor);
667 
668  // If not involved in a pending branch, hoist to preheader
669  BasicBlock *InitialPreheader = CurLoop->getLoopPreheader();
670  if (It == HoistableBranches.end()) {
671  LLVM_DEBUG(dbgs() << "LICM using " << InitialPreheader->getName()
672  << " as hoist destination for " << BB->getName()
673  << "\n");
674  HoistDestinationMap[BB] = InitialPreheader;
675  return InitialPreheader;
676  }
677  BranchInst *BI = It->first;
678  assert(std::find_if(++It, HoistableBranches.end(), HasBBAsSuccessor) ==
679  HoistableBranches.end() &&
680  "BB is expected to be the target of at most one branch");
681 
682  LLVMContext &C = BB->getContext();
683  BasicBlock *TrueDest = BI->getSuccessor(0);
684  BasicBlock *FalseDest = BI->getSuccessor(1);
685  BasicBlock *CommonSucc = HoistableBranches[BI];
686  BasicBlock *HoistTarget = getOrCreateHoistedBlock(BI->getParent());
687 
688  // Create hoisted versions of blocks that currently don't have them
689  auto CreateHoistedBlock = [&](BasicBlock *Orig) {
690  if (HoistDestinationMap.count(Orig))
691  return HoistDestinationMap[Orig];
692  BasicBlock *New =
693  BasicBlock::Create(C, Orig->getName() + ".licm", Orig->getParent());
694  HoistDestinationMap[Orig] = New;
695  DT->addNewBlock(New, HoistTarget);
696  if (CurLoop->getParentLoop())
697  CurLoop->getParentLoop()->addBasicBlockToLoop(New, *LI);
698  ++NumCreatedBlocks;
699  LLVM_DEBUG(dbgs() << "LICM created " << New->getName()
700  << " as hoist destination for " << Orig->getName()
701  << "\n");
702  return New;
703  };
704  BasicBlock *HoistTrueDest = CreateHoistedBlock(TrueDest);
705  BasicBlock *HoistFalseDest = CreateHoistedBlock(FalseDest);
706  BasicBlock *HoistCommonSucc = CreateHoistedBlock(CommonSucc);
707 
708  // Link up these blocks with branches.
709  if (!HoistCommonSucc->getTerminator()) {
710  // The new common successor we've generated will branch to whatever that
711  // hoist target branched to.
712  BasicBlock *TargetSucc = HoistTarget->getSingleSuccessor();
713  assert(TargetSucc && "Expected hoist target to have a single successor");
714  HoistCommonSucc->moveBefore(TargetSucc);
715  BranchInst::Create(TargetSucc, HoistCommonSucc);
716  }
717  if (!HoistTrueDest->getTerminator()) {
718  HoistTrueDest->moveBefore(HoistCommonSucc);
719  BranchInst::Create(HoistCommonSucc, HoistTrueDest);
720  }
721  if (!HoistFalseDest->getTerminator()) {
722  HoistFalseDest->moveBefore(HoistCommonSucc);
723  BranchInst::Create(HoistCommonSucc, HoistFalseDest);
724  }
725 
726  // If BI is being cloned to what was originally the preheader then
727  // HoistCommonSucc will now be the new preheader.
728  if (HoistTarget == InitialPreheader) {
729  // Phis in the loop header now need to use the new preheader.
730  InitialPreheader->replaceSuccessorsPhiUsesWith(HoistCommonSucc);
731  if (MSSAU)
733  HoistTarget->getSingleSuccessor(), HoistCommonSucc, {HoistTarget});
734  // The new preheader dominates the loop header.
735  DomTreeNode *PreheaderNode = DT->getNode(HoistCommonSucc);
736  DomTreeNode *HeaderNode = DT->getNode(CurLoop->getHeader());
737  DT->changeImmediateDominator(HeaderNode, PreheaderNode);
738  // The preheader hoist destination is now the new preheader, with the
739  // exception of the hoist destination of this branch.
740  for (auto &Pair : HoistDestinationMap)
741  if (Pair.second == InitialPreheader && Pair.first != BI->getParent())
742  Pair.second = HoistCommonSucc;
743  }
744 
745  // Now finally clone BI.
747  HoistTarget->getTerminator(),
748  BranchInst::Create(HoistTrueDest, HoistFalseDest, BI->getCondition()));
749  ++NumClonedBranches;
750 
751  assert(CurLoop->getLoopPreheader() &&
752  "Hoisting blocks should not have destroyed preheader");
753  return HoistDestinationMap[BB];
754  }
755 };
756 } // namespace
757 
758 /// Walk the specified region of the CFG (defined by all blocks dominated by
759 /// the specified block, and that are in the current loop) in depth first
760 /// order w.r.t the DominatorTree. This allows us to visit definitions before
761 /// uses, allowing us to hoist a loop body in one pass without iteration.
762 ///
764  DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop,
765  AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
766  ICFLoopSafetyInfo *SafetyInfo, bool NoOfMemAccTooLarge,
767  int &LicmMssaOptCounter,
769  // Verify inputs.
770  assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
771  CurLoop != nullptr && SafetyInfo != nullptr &&
772  "Unexpected input to hoistRegion.");
773  assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) &&
774  "Either AliasSetTracker or MemorySSA should be initialized.");
775 
776  ControlFlowHoister CFH(LI, DT, CurLoop, MSSAU);
777 
778  // Keep track of instructions that have been hoisted, as they may need to be
779  // re-hoisted if they end up not dominating all of their uses.
780  SmallVector<Instruction *, 16> HoistedInstructions;
781 
782  // For PHI hoisting to work we need to hoist blocks before their successors.
783  // We can do this by iterating through the blocks in the loop in reverse
784  // post-order.
785  LoopBlocksRPO Worklist(CurLoop);
786  Worklist.perform(LI);
787  bool Changed = false;
788  for (BasicBlock *BB : Worklist) {
789  // Only need to process the contents of this block if it is not part of a
790  // subloop (which would already have been processed).
791  if (inSubLoop(BB, CurLoop, LI))
792  continue;
793 
794  for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) {
795  Instruction &I = *II++;
796  // Try constant folding this instruction. If all the operands are
797  // constants, it is technically hoistable, but it would be better to
798  // just fold it.
800  &I, I.getModule()->getDataLayout(), TLI)) {
801  LLVM_DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C
802  << '\n');
803  if (CurAST)
804  CurAST->copyValue(&I, C);
805  // FIXME MSSA: Such replacements may make accesses unoptimized (D51960).
807  if (isInstructionTriviallyDead(&I, TLI))
808  eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
809  Changed = true;
810  continue;
811  }
812 
813  // Try hoisting the instruction out to the preheader. We can only do
814  // this if all of the operands of the instruction are loop invariant and
815  // if it is safe to hoist the instruction.
816  // TODO: It may be safe to hoist if we are hoisting to a conditional block
817  // and we have accurately duplicated the control flow from the loop header
818  // to that block.
819  if (CurLoop->hasLoopInvariantOperands(&I) &&
820  canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true,
821  NoOfMemAccTooLarge, &LicmMssaOptCounter, ORE) &&
823  I, DT, CurLoop, SafetyInfo, ORE,
824  CurLoop->getLoopPreheader()->getTerminator())) {
825  hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
826  MSSAU, ORE);
827  HoistedInstructions.push_back(&I);
828  Changed = true;
829  continue;
830  }
831 
832  // Attempt to remove floating point division out of the loop by
833  // converting it to a reciprocal multiplication.
834  if (I.getOpcode() == Instruction::FDiv &&
835  CurLoop->isLoopInvariant(I.getOperand(1)) &&
836  I.hasAllowReciprocal()) {
837  auto Divisor = I.getOperand(1);
838  auto One = llvm::ConstantFP::get(Divisor->getType(), 1.0);
839  auto ReciprocalDivisor = BinaryOperator::CreateFDiv(One, Divisor);
840  ReciprocalDivisor->setFastMathFlags(I.getFastMathFlags());
841  SafetyInfo->insertInstructionTo(ReciprocalDivisor, I.getParent());
842  ReciprocalDivisor->insertBefore(&I);
843 
844  auto Product =
845  BinaryOperator::CreateFMul(I.getOperand(0), ReciprocalDivisor);
846  Product->setFastMathFlags(I.getFastMathFlags());
847  SafetyInfo->insertInstructionTo(Product, I.getParent());
848  Product->insertAfter(&I);
849  I.replaceAllUsesWith(Product);
850  eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
851 
852  hoist(*ReciprocalDivisor, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB),
853  SafetyInfo, MSSAU, ORE);
854  HoistedInstructions.push_back(ReciprocalDivisor);
855  Changed = true;
856  continue;
857  }
858 
859  auto IsInvariantStart = [&](Instruction &I) {
860  using namespace PatternMatch;
861  return I.use_empty() &&
862  match(&I, m_Intrinsic<Intrinsic::invariant_start>());
863  };
864  auto MustExecuteWithoutWritesBefore = [&](Instruction &I) {
865  return SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop) &&
866  SafetyInfo->doesNotWriteMemoryBefore(I, CurLoop);
867  };
868  if ((IsInvariantStart(I) || isGuard(&I)) &&
869  CurLoop->hasLoopInvariantOperands(&I) &&
870  MustExecuteWithoutWritesBefore(I)) {
871  hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
872  MSSAU, ORE);
873  HoistedInstructions.push_back(&I);
874  Changed = true;
875  continue;
876  }
877 
878  if (PHINode *PN = dyn_cast<PHINode>(&I)) {
879  if (CFH.canHoistPHI(PN)) {
880  // Redirect incoming blocks first to ensure that we create hoisted
881  // versions of those blocks before we hoist the phi.
882  for (unsigned int i = 0; i < PN->getNumIncomingValues(); ++i)
883  PN->setIncomingBlock(
884  i, CFH.getOrCreateHoistedBlock(PN->getIncomingBlock(i)));
885  hoist(*PN, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
886  MSSAU, ORE);
887  assert(DT->dominates(PN, BB) && "Conditional PHIs not expected");
888  Changed = true;
889  continue;
890  }
891  }
892 
893  // Remember possibly hoistable branches so we can actually hoist them
894  // later if needed.
895  if (BranchInst *BI = dyn_cast<BranchInst>(&I))
896  CFH.registerPossiblyHoistableBranch(BI);
897  }
898  }
899 
900  // If we hoisted instructions to a conditional block they may not dominate
901  // their uses that weren't hoisted (such as phis where some operands are not
902  // loop invariant). If so make them unconditional by moving them to their
903  // immediate dominator. We iterate through the instructions in reverse order
904  // which ensures that when we rehoist an instruction we rehoist its operands,
905  // and also keep track of where in the block we are rehoisting to to make sure
906  // that we rehoist instructions before the instructions that use them.
907  Instruction *HoistPoint = nullptr;
908  if (ControlFlowHoisting) {
909  for (Instruction *I : reverse(HoistedInstructions)) {
910  if (!llvm::all_of(I->uses(),
911  [&](Use &U) { return DT->dominates(I, U); })) {
912  BasicBlock *Dominator =
913  DT->getNode(I->getParent())->getIDom()->getBlock();
914  if (!HoistPoint || !DT->dominates(HoistPoint->getParent(), Dominator)) {
915  if (HoistPoint)
916  assert(DT->dominates(Dominator, HoistPoint->getParent()) &&
917  "New hoist point expected to dominate old hoist point");
918  HoistPoint = Dominator->getTerminator();
919  }
920  LLVM_DEBUG(dbgs() << "LICM rehoisting to "
921  << HoistPoint->getParent()->getName()
922  << ": " << *I << "\n");
923  moveInstructionBefore(*I, *HoistPoint, *SafetyInfo, MSSAU);
924  HoistPoint = I;
925  Changed = true;
926  }
927  }
928  }
929  if (MSSAU && VerifyMemorySSA)
930  MSSAU->getMemorySSA()->verifyMemorySSA();
931 
932  // Now that we've finished hoisting make sure that LI and DT are still
933  // valid.
934 #ifndef NDEBUG
935  if (Changed) {
937  "Dominator tree verification failed");
938  LI->verify(*DT);
939  }
940 #endif
941 
942  return Changed;
943 }
944 
945 // Return true if LI is invariant within scope of the loop. LI is invariant if
946 // CurLoop is dominated by an invariant.start representing the same memory
947 // location and size as the memory location LI loads from, and also the
948 // invariant.start has no uses.
950  Loop *CurLoop) {
951  Value *Addr = LI->getOperand(0);
952  const DataLayout &DL = LI->getModule()->getDataLayout();
953  const uint32_t LocSizeInBits = DL.getTypeSizeInBits(
954  cast<PointerType>(Addr->getType())->getElementType());
955 
956  // if the type is i8 addrspace(x)*, we know this is the type of
957  // llvm.invariant.start operand
958  auto *PtrInt8Ty = PointerType::get(Type::getInt8Ty(LI->getContext()),
959  LI->getPointerAddressSpace());
960  unsigned BitcastsVisited = 0;
961  // Look through bitcasts until we reach the i8* type (this is invariant.start
962  // operand type).
963  while (Addr->getType() != PtrInt8Ty) {
964  auto *BC = dyn_cast<BitCastInst>(Addr);
965  // Avoid traversing high number of bitcast uses.
966  if (++BitcastsVisited > MaxNumUsesTraversed || !BC)
967  return false;
968  Addr = BC->getOperand(0);
969  }
970 
971  unsigned UsesVisited = 0;
972  // Traverse all uses of the load operand value, to see if invariant.start is
973  // one of the uses, and whether it dominates the load instruction.
974  for (auto *U : Addr->users()) {
975  // Avoid traversing for Load operand with high number of users.
976  if (++UsesVisited > MaxNumUsesTraversed)
977  return false;
979  // If there are escaping uses of invariant.start instruction, the load maybe
980  // non-invariant.
981  if (!II || II->getIntrinsicID() != Intrinsic::invariant_start ||
982  !II->use_empty())
983  continue;
984  unsigned InvariantSizeInBits =
985  cast<ConstantInt>(II->getArgOperand(0))->getSExtValue() * 8;
986  // Confirm the invariant.start location size contains the load operand size
987  // in bits. Also, the invariant.start should dominate the load, and we
988  // should not hoist the load out of a loop that contains this dominating
989  // invariant.start.
990  if (LocSizeInBits <= InvariantSizeInBits &&
991  DT->properlyDominates(II->getParent(), CurLoop->getHeader()))
992  return true;
993  }
994 
995  return false;
996 }
997 
998 namespace {
999 /// Return true if-and-only-if we know how to (mechanically) both hoist and
1000 /// sink a given instruction out of a loop. Does not address legality
1001 /// concerns such as aliasing or speculation safety.
1002 bool isHoistableAndSinkableInst(Instruction &I) {
1003  // Only these instructions are hoistable/sinkable.
1004  return (isa<LoadInst>(I) || isa<StoreInst>(I) ||
1005  isa<CallInst>(I) || isa<FenceInst>(I) ||
1006  isa<BinaryOperator>(I) || isa<CastInst>(I) ||
1007  isa<SelectInst>(I) || isa<GetElementPtrInst>(I) ||
1008  isa<CmpInst>(I) || isa<InsertElementInst>(I) ||
1009  isa<ExtractElementInst>(I) || isa<ShuffleVectorInst>(I) ||
1010  isa<ExtractValueInst>(I) || isa<InsertValueInst>(I));
1011 }
1012 /// Return true if all of the alias sets within this AST are known not to
1013 /// contain a Mod, or if MSSA knows thare are no MemoryDefs in the loop.
1014 bool isReadOnly(AliasSetTracker *CurAST, const MemorySSAUpdater *MSSAU,
1015  const Loop *L) {
1016  if (CurAST) {
1017  for (AliasSet &AS : *CurAST) {
1018  if (!AS.isForwardingAliasSet() && AS.isMod()) {
1019  return false;
1020  }
1021  }
1022  return true;
1023  } else { /*MSSAU*/
1024  for (auto *BB : L->getBlocks())
1025  if (MSSAU->getMemorySSA()->getBlockDefs(BB))
1026  return false;
1027  return true;
1028  }
1029 }
1030 
1031 /// Return true if I is the only Instruction with a MemoryAccess in L.
1032 bool isOnlyMemoryAccess(const Instruction *I, const Loop *L,
1033  const MemorySSAUpdater *MSSAU) {
1034  for (auto *BB : L->getBlocks())
1035  if (auto *Accs = MSSAU->getMemorySSA()->getBlockAccesses(BB)) {
1036  int NotAPhi = 0;
1037  for (const auto &Acc : *Accs) {
1038  if (isa<MemoryPhi>(&Acc))
1039  continue;
1040  const auto *MUD = cast<MemoryUseOrDef>(&Acc);
1041  if (MUD->getMemoryInst() != I || NotAPhi++ == 1)
1042  return false;
1043  }
1044  }
1045  return true;
1046 }
1047 }
1048 
1050  Loop *CurLoop, AliasSetTracker *CurAST,
1051  MemorySSAUpdater *MSSAU,
1052  bool TargetExecutesOncePerLoop,
1053  bool NoOfMemAccTooLarge, int *LicmMssaOptCounter,
1055  // If we don't understand the instruction, bail early.
1056  if (!isHoistableAndSinkableInst(I))
1057  return false;
1058 
1059  MemorySSA *MSSA = MSSAU ? MSSAU->getMemorySSA() : nullptr;
1060  if (MSSA)
1061  assert(LicmMssaOptCounter != nullptr && "Counter cannot be null.");
1062 
1063  // Loads have extra constraints we have to verify before we can hoist them.
1064  if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
1065  if (!LI->isUnordered())
1066  return false; // Don't sink/hoist volatile or ordered atomic loads!
1067 
1068  // Loads from constant memory are always safe to move, even if they end up
1069  // in the same alias set as something that ends up being modified.
1070  if (AA->pointsToConstantMemory(LI->getOperand(0)))
1071  return true;
1072  if (LI->getMetadata(LLVMContext::MD_invariant_load))
1073  return true;
1074 
1075  if (LI->isAtomic() && !TargetExecutesOncePerLoop)
1076  return false; // Don't risk duplicating unordered loads
1077 
1078  // This checks for an invariant.start dominating the load.
1079  if (isLoadInvariantInLoop(LI, DT, CurLoop))
1080  return true;
1081 
1082  bool Invalidated;
1083  if (CurAST)
1084  Invalidated = pointerInvalidatedByLoop(MemoryLocation::get(LI), CurAST,
1085  CurLoop, AA);
1086  else
1087  Invalidated = pointerInvalidatedByLoopWithMSSA(
1088  MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(LI)), CurLoop,
1089  *LicmMssaOptCounter);
1090  // Check loop-invariant address because this may also be a sinkable load
1091  // whose address is not necessarily loop-invariant.
1092  if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand()))
1093  ORE->emit([&]() {
1094  return OptimizationRemarkMissed(
1095  DEBUG_TYPE, "LoadWithLoopInvariantAddressInvalidated", LI)
1096  << "failed to move load with loop-invariant address "
1097  "because the loop may invalidate its value";
1098  });
1099 
1100  return !Invalidated;
1101  } else if (CallInst *CI = dyn_cast<CallInst>(&I)) {
1102  // Don't sink or hoist dbg info; it's legal, but not useful.
1103  if (isa<DbgInfoIntrinsic>(I))
1104  return false;
1105 
1106  // Don't sink calls which can throw.
1107  if (CI->mayThrow())
1108  return false;
1109 
1110  using namespace PatternMatch;
1111  if (match(CI, m_Intrinsic<Intrinsic::assume>()))
1112  // Assumes don't actually alias anything or throw
1113  return true;
1114 
1115  // Handle simple cases by querying alias analysis.
1116  FunctionModRefBehavior Behavior = AA->getModRefBehavior(CI);
1117  if (Behavior == FMRB_DoesNotAccessMemory)
1118  return true;
1119  if (AliasAnalysis::onlyReadsMemory(Behavior)) {
1120  // A readonly argmemonly function only reads from memory pointed to by
1121  // it's arguments with arbitrary offsets. If we can prove there are no
1122  // writes to this memory in the loop, we can hoist or sink.
1124  // TODO: expand to writeable arguments
1125  for (Value *Op : CI->arg_operands())
1126  if (Op->getType()->isPointerTy()) {
1127  bool Invalidated;
1128  if (CurAST)
1129  Invalidated = pointerInvalidatedByLoop(
1131  CurAST, CurLoop, AA);
1132  else
1133  Invalidated = pointerInvalidatedByLoopWithMSSA(
1134  MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(CI)), CurLoop,
1135  *LicmMssaOptCounter);
1136  if (Invalidated)
1137  return false;
1138  }
1139  return true;
1140  }
1141 
1142  // If this call only reads from memory and there are no writes to memory
1143  // in the loop, we can hoist or sink the call as appropriate.
1144  if (isReadOnly(CurAST, MSSAU, CurLoop))
1145  return true;
1146  }
1147 
1148  // FIXME: This should use mod/ref information to see if we can hoist or
1149  // sink the call.
1150 
1151  return false;
1152  } else if (auto *FI = dyn_cast<FenceInst>(&I)) {
1153  // Fences alias (most) everything to provide ordering. For the moment,
1154  // just give up if there are any other memory operations in the loop.
1155  if (CurAST) {
1156  auto Begin = CurAST->begin();
1157  assert(Begin != CurAST->end() && "must contain FI");
1158  if (std::next(Begin) != CurAST->end())
1159  // constant memory for instance, TODO: handle better
1160  return false;
1161  auto *UniqueI = Begin->getUniqueInstruction();
1162  if (!UniqueI)
1163  // other memory op, give up
1164  return false;
1165  (void)FI; // suppress unused variable warning
1166  assert(UniqueI == FI && "AS must contain FI");
1167  return true;
1168  } else // MSSAU
1169  return isOnlyMemoryAccess(FI, CurLoop, MSSAU);
1170  } else if (auto *SI = dyn_cast<StoreInst>(&I)) {
1171  if (!SI->isUnordered())
1172  return false; // Don't sink/hoist volatile or ordered atomic store!
1173 
1174  // We can only hoist a store that we can prove writes a value which is not
1175  // read or overwritten within the loop. For those cases, we fallback to
1176  // load store promotion instead. TODO: We can extend this to cases where
1177  // there is exactly one write to the location and that write dominates an
1178  // arbitrary number of reads in the loop.
1179  if (CurAST) {
1180  auto &AS = CurAST->getAliasSetFor(MemoryLocation::get(SI));
1181 
1182  if (AS.isRef() || !AS.isMustAlias())
1183  // Quick exit test, handled by the full path below as well.
1184  return false;
1185  auto *UniqueI = AS.getUniqueInstruction();
1186  if (!UniqueI)
1187  // other memory op, give up
1188  return false;
1189  assert(UniqueI == SI && "AS must contain SI");
1190  return true;
1191  } else { // MSSAU
1192  if (isOnlyMemoryAccess(SI, CurLoop, MSSAU))
1193  return true;
1194  // If there are more accesses than the Promotion cap, give up, we're not
1195  // walking a list that long.
1196  if (NoOfMemAccTooLarge)
1197  return false;
1198  // Check store only if there's still "quota" to check clobber.
1199  if (*LicmMssaOptCounter >= LicmMssaOptCap)
1200  return false;
1201  // If there are interfering Uses (i.e. their defining access is in the
1202  // loop), or ordered loads (stored as Defs!), don't move this store.
1203  // Could do better here, but this is conservatively correct.
1204  // TODO: Cache set of Uses on the first walk in runOnLoop, update when
1205  // moving accesses. Can also extend to dominating uses.
1206  for (auto *BB : CurLoop->getBlocks())
1207  if (auto *Accesses = MSSA->getBlockAccesses(BB)) {
1208  for (const auto &MA : *Accesses)
1209  if (const auto *MU = dyn_cast<MemoryUse>(&MA)) {
1210  auto *MD = MU->getDefiningAccess();
1211  if (!MSSA->isLiveOnEntryDef(MD) &&
1212  CurLoop->contains(MD->getBlock()))
1213  return false;
1214  } else if (const auto *MD = dyn_cast<MemoryDef>(&MA))
1215  if (auto *LI = dyn_cast<LoadInst>(MD->getMemoryInst())) {
1216  (void)LI; // Silence warning.
1217  assert(!LI->isUnordered() && "Expected unordered load");
1218  return false;
1219  }
1220  }
1221 
1223  (*LicmMssaOptCounter)++;
1224  // If there are no clobbering Defs in the loop, store is safe to hoist.
1225  return MSSA->isLiveOnEntryDef(Source) ||
1226  !CurLoop->contains(Source->getBlock());
1227  }
1228  }
1229 
1230  assert(!I.mayReadOrWriteMemory() && "unhandled aliasing");
1231 
1232  // We've established mechanical ability and aliasing, it's up to the caller
1233  // to check fault safety
1234  return true;
1235 }
1236 
1237 /// Returns true if a PHINode is a trivially replaceable with an
1238 /// Instruction.
1239 /// This is true when all incoming values are that instruction.
1240 /// This pattern occurs most often with LCSSA PHI nodes.
1241 ///
1242 static bool isTriviallyReplaceablePHI(const PHINode &PN, const Instruction &I) {
1243  for (const Value *IncValue : PN.incoming_values())
1244  if (IncValue != &I)
1245  return false;
1246 
1247  return true;
1248 }
1249 
1250 /// Return true if the instruction is free in the loop.
1251 static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop,
1252  const TargetTransformInfo *TTI) {
1253 
1254  if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I)) {
1256  return false;
1257  // For a GEP, we cannot simply use getUserCost because currently it
1258  // optimistically assume that a GEP will fold into addressing mode
1259  // regardless of its users.
1260  const BasicBlock *BB = GEP->getParent();
1261  for (const User *U : GEP->users()) {
1262  const Instruction *UI = cast<Instruction>(U);
1263  if (CurLoop->contains(UI) &&
1264  (BB != UI->getParent() ||
1265  (!isa<StoreInst>(UI) && !isa<LoadInst>(UI))))
1266  return false;
1267  }
1268  return true;
1269  } else
1270  return TTI->getUserCost(&I) == TargetTransformInfo::TCC_Free;
1271 }
1272 
1273 /// Return true if the only users of this instruction are outside of
1274 /// the loop. If this is true, we can sink the instruction to the exit
1275 /// blocks of the loop.
1276 ///
1277 /// We also return true if the instruction could be folded away in lowering.
1278 /// (e.g., a GEP can be folded into a load as an addressing mode in the loop).
1279 static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
1280  const LoopSafetyInfo *SafetyInfo,
1281  TargetTransformInfo *TTI, bool &FreeInLoop) {
1282  const auto &BlockColors = SafetyInfo->getBlockColors();
1283  bool IsFree = isFreeInLoop(I, CurLoop, TTI);
1284  for (const User *U : I.users()) {
1285  const Instruction *UI = cast<Instruction>(U);
1286  if (const PHINode *PN = dyn_cast<PHINode>(UI)) {
1287  const BasicBlock *BB = PN->getParent();
1288  // We cannot sink uses in catchswitches.
1289  if (isa<CatchSwitchInst>(BB->getTerminator()))
1290  return false;
1291 
1292  // We need to sink a callsite to a unique funclet. Avoid sinking if the
1293  // phi use is too muddled.
1294  if (isa<CallInst>(I))
1295  if (!BlockColors.empty() &&
1296  BlockColors.find(const_cast<BasicBlock *>(BB))->second.size() != 1)
1297  return false;
1298  }
1299 
1300  if (CurLoop->contains(UI)) {
1301  if (IsFree) {
1302  FreeInLoop = true;
1303  continue;
1304  }
1305  return false;
1306  }
1307  }
1308  return true;
1309 }
1310 
1312  Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
1313  const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU) {
1314  Instruction *New;
1315  if (auto *CI = dyn_cast<CallInst>(&I)) {
1316  const auto &BlockColors = SafetyInfo->getBlockColors();
1317 
1318  // Sinking call-sites need to be handled differently from other
1319  // instructions. The cloned call-site needs a funclet bundle operand
1320  // appropriate for it's location in the CFG.
1322  for (unsigned BundleIdx = 0, BundleEnd = CI->getNumOperandBundles();
1323  BundleIdx != BundleEnd; ++BundleIdx) {
1324  OperandBundleUse Bundle = CI->getOperandBundleAt(BundleIdx);
1325  if (Bundle.getTagID() == LLVMContext::OB_funclet)
1326  continue;
1327 
1328  OpBundles.emplace_back(Bundle);
1329  }
1330 
1331  if (!BlockColors.empty()) {
1332  const ColorVector &CV = BlockColors.find(&ExitBlock)->second;
1333  assert(CV.size() == 1 && "non-unique color for exit block!");
1334  BasicBlock *BBColor = CV.front();
1335  Instruction *EHPad = BBColor->getFirstNonPHI();
1336  if (EHPad->isEHPad())
1337  OpBundles.emplace_back("funclet", EHPad);
1338  }
1339 
1340  New = CallInst::Create(CI, OpBundles);
1341  } else {
1342  New = I.clone();
1343  }
1344 
1345  ExitBlock.getInstList().insert(ExitBlock.getFirstInsertionPt(), New);
1346  if (!I.getName().empty())
1347  New->setName(I.getName() + ".le");
1348 
1349  MemoryAccess *OldMemAcc;
1350  if (MSSAU && (OldMemAcc = MSSAU->getMemorySSA()->getMemoryAccess(&I))) {
1351  // Create a new MemoryAccess and let MemorySSA set its defining access.
1352  MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB(
1353  New, nullptr, New->getParent(), MemorySSA::Beginning);
1354  if (NewMemAcc) {
1355  if (auto *MemDef = dyn_cast<MemoryDef>(NewMemAcc))
1356  MSSAU->insertDef(MemDef, /*RenameUses=*/true);
1357  else {
1358  auto *MemUse = cast<MemoryUse>(NewMemAcc);
1359  MSSAU->insertUse(MemUse);
1360  }
1361  }
1362  }
1363 
1364  // Build LCSSA PHI nodes for any in-loop operands. Note that this is
1365  // particularly cheap because we can rip off the PHI node that we're
1366  // replacing for the number and blocks of the predecessors.
1367  // OPT: If this shows up in a profile, we can instead finish sinking all
1368  // invariant instructions, and then walk their operands to re-establish
1369  // LCSSA. That will eliminate creating PHI nodes just to nuke them when
1370  // sinking bottom-up.
1371  for (User::op_iterator OI = New->op_begin(), OE = New->op_end(); OI != OE;
1372  ++OI)
1373  if (Instruction *OInst = dyn_cast<Instruction>(*OI))
1374  if (Loop *OLoop = LI->getLoopFor(OInst->getParent()))
1375  if (!OLoop->contains(&PN)) {
1376  PHINode *OpPN =
1377  PHINode::Create(OInst->getType(), PN.getNumIncomingValues(),
1378  OInst->getName() + ".lcssa", &ExitBlock.front());
1379  for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
1380  OpPN->addIncoming(OInst, PN.getIncomingBlock(i));
1381  *OI = OpPN;
1382  }
1383  return New;
1384 }
1385 
1387  AliasSetTracker *AST, MemorySSAUpdater *MSSAU) {
1388  if (AST)
1389  AST->deleteValue(&I);
1390  if (MSSAU)
1391  MSSAU->removeMemoryAccess(&I);
1392  SafetyInfo.removeInstruction(&I);
1393  I.eraseFromParent();
1394 }
1395 
1397  ICFLoopSafetyInfo &SafetyInfo,
1398  MemorySSAUpdater *MSSAU) {
1399  SafetyInfo.removeInstruction(&I);
1400  SafetyInfo.insertInstructionTo(&I, Dest.getParent());
1401  I.moveBefore(&Dest);
1402  if (MSSAU)
1403  if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
1404  MSSAU->getMemorySSA()->getMemoryAccess(&I)))
1405  MSSAU->moveToPlace(OldMemAcc, Dest.getParent(), MemorySSA::End);
1406 }
1407 
1409  PHINode *TPN, Instruction *I, LoopInfo *LI,
1411  const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop,
1412  MemorySSAUpdater *MSSAU) {
1413  assert(isTriviallyReplaceablePHI(*TPN, *I) &&
1414  "Expect only trivially replaceable PHI");
1415  BasicBlock *ExitBlock = TPN->getParent();
1416  Instruction *New;
1417  auto It = SunkCopies.find(ExitBlock);
1418  if (It != SunkCopies.end())
1419  New = It->second;
1420  else
1421  New = SunkCopies[ExitBlock] = CloneInstructionInExitBlock(
1422  *I, *ExitBlock, *TPN, LI, SafetyInfo, MSSAU);
1423  return New;
1424 }
1425 
1426 static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo) {
1427  BasicBlock *BB = PN->getParent();
1428  if (!BB->canSplitPredecessors())
1429  return false;
1430  // It's not impossible to split EHPad blocks, but if BlockColors already exist
1431  // it require updating BlockColors for all offspring blocks accordingly. By
1432  // skipping such corner case, we can make updating BlockColors after splitting
1433  // predecessor fairly simple.
1434  if (!SafetyInfo->getBlockColors().empty() && BB->getFirstNonPHI()->isEHPad())
1435  return false;
1436  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
1437  BasicBlock *BBPred = *PI;
1438  if (isa<IndirectBrInst>(BBPred->getTerminator()))
1439  return false;
1440  }
1441  return true;
1442 }
1443 
1445  LoopInfo *LI, const Loop *CurLoop,
1446  LoopSafetyInfo *SafetyInfo,
1447  MemorySSAUpdater *MSSAU) {
1448 #ifndef NDEBUG
1449  SmallVector<BasicBlock *, 32> ExitBlocks;
1450  CurLoop->getUniqueExitBlocks(ExitBlocks);
1451  SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),
1452  ExitBlocks.end());
1453 #endif
1454  BasicBlock *ExitBB = PN->getParent();
1455  assert(ExitBlockSet.count(ExitBB) && "Expect the PHI is in an exit block.");
1456 
1457  // Split predecessors of the loop exit to make instructions in the loop are
1458  // exposed to exit blocks through trivially replaceable PHIs while keeping the
1459  // loop in the canonical form where each predecessor of each exit block should
1460  // be contained within the loop. For example, this will convert the loop below
1461  // from
1462  //
1463  // LB1:
1464  // %v1 =
1465  // br %LE, %LB2
1466  // LB2:
1467  // %v2 =
1468  // br %LE, %LB1
1469  // LE:
1470  // %p = phi [%v1, %LB1], [%v2, %LB2] <-- non-trivially replaceable
1471  //
1472  // to
1473  //
1474  // LB1:
1475  // %v1 =
1476  // br %LE.split, %LB2
1477  // LB2:
1478  // %v2 =
1479  // br %LE.split2, %LB1
1480  // LE.split:
1481  // %p1 = phi [%v1, %LB1] <-- trivially replaceable
1482  // br %LE
1483  // LE.split2:
1484  // %p2 = phi [%v2, %LB2] <-- trivially replaceable
1485  // br %LE
1486  // LE:
1487  // %p = phi [%p1, %LE.split], [%p2, %LE.split2]
1488  //
1489  const auto &BlockColors = SafetyInfo->getBlockColors();
1490  SmallSetVector<BasicBlock *, 8> PredBBs(pred_begin(ExitBB), pred_end(ExitBB));
1491  while (!PredBBs.empty()) {
1492  BasicBlock *PredBB = *PredBBs.begin();
1493  assert(CurLoop->contains(PredBB) &&
1494  "Expect all predecessors are in the loop");
1495  if (PN->getBasicBlockIndex(PredBB) >= 0) {
1497  ExitBB, PredBB, ".split.loop.exit", DT, LI, MSSAU, true);
1498  // Since we do not allow splitting EH-block with BlockColors in
1499  // canSplitPredecessors(), we can simply assign predecessor's color to
1500  // the new block.
1501  if (!BlockColors.empty())
1502  // Grab a reference to the ColorVector to be inserted before getting the
1503  // reference to the vector we are copying because inserting the new
1504  // element in BlockColors might cause the map to be reallocated.
1505  SafetyInfo->copyColors(NewPred, PredBB);
1506  }
1507  PredBBs.remove(PredBB);
1508  }
1509 }
1510 
1511 /// When an instruction is found to only be used outside of the loop, this
1512 /// function moves it to the exit blocks and patches up SSA form as needed.
1513 /// This method is guaranteed to remove the original instruction from its
1514 /// position, and may either delete it or move it to outside of the loop.
1515 ///
1516 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
1517  const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
1519  bool FreeInLoop) {
1520  LLVM_DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
1521  ORE->emit([&]() {
1522  return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I)
1523  << "sinking " << ore::NV("Inst", &I);
1524  });
1525  bool Changed = false;
1526  if (isa<LoadInst>(I))
1527  ++NumMovedLoads;
1528  else if (isa<CallInst>(I))
1529  ++NumMovedCalls;
1530  ++NumSunk;
1531 
1532  // Iterate over users to be ready for actual sinking. Replace users via
1533  // unrechable blocks with undef and make all user PHIs trivially replcable.
1534  SmallPtrSet<Instruction *, 8> VisitedUsers;
1535  for (Value::user_iterator UI = I.user_begin(), UE = I.user_end(); UI != UE;) {
1536  auto *User = cast<Instruction>(*UI);
1537  Use &U = UI.getUse();
1538  ++UI;
1539 
1540  if (VisitedUsers.count(User) || CurLoop->contains(User))
1541  continue;
1542 
1543  if (!DT->isReachableFromEntry(User->getParent())) {
1544  U = UndefValue::get(I.getType());
1545  Changed = true;
1546  continue;
1547  }
1548 
1549  // The user must be a PHI node.
1550  PHINode *PN = cast<PHINode>(User);
1551 
1552  // Surprisingly, instructions can be used outside of loops without any
1553  // exits. This can only happen in PHI nodes if the incoming block is
1554  // unreachable.
1555  BasicBlock *BB = PN->getIncomingBlock(U);
1556  if (!DT->isReachableFromEntry(BB)) {
1557  U = UndefValue::get(I.getType());
1558  Changed = true;
1559  continue;
1560  }
1561 
1562  VisitedUsers.insert(PN);
1563  if (isTriviallyReplaceablePHI(*PN, I))
1564  continue;
1565 
1566  if (!canSplitPredecessors(PN, SafetyInfo))
1567  return Changed;
1568 
1569  // Split predecessors of the PHI so that we can make users trivially
1570  // replaceable.
1571  splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop, SafetyInfo, MSSAU);
1572 
1573  // Should rebuild the iterators, as they may be invalidated by
1574  // splitPredecessorsOfLoopExit().
1575  UI = I.user_begin();
1576  UE = I.user_end();
1577  }
1578 
1579  if (VisitedUsers.empty())
1580  return Changed;
1581 
1582 #ifndef NDEBUG
1583  SmallVector<BasicBlock *, 32> ExitBlocks;
1584  CurLoop->getUniqueExitBlocks(ExitBlocks);
1585  SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),
1586  ExitBlocks.end());
1587 #endif
1588 
1589  // Clones of this instruction. Don't create more than one per exit block!
1591 
1592  // If this instruction is only used outside of the loop, then all users are
1593  // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of
1594  // the instruction.
1596  for (auto *UI : Users) {
1597  auto *User = cast<Instruction>(UI);
1598 
1599  if (CurLoop->contains(User))
1600  continue;
1601 
1602  PHINode *PN = cast<PHINode>(User);
1603  assert(ExitBlockSet.count(PN->getParent()) &&
1604  "The LCSSA PHI is not in an exit block!");
1605  // The PHI must be trivially replaceable.
1607  PN, &I, LI, SunkCopies, SafetyInfo, CurLoop, MSSAU);
1608  PN->replaceAllUsesWith(New);
1609  eraseInstruction(*PN, *SafetyInfo, nullptr, nullptr);
1610  Changed = true;
1611  }
1612  return Changed;
1613 }
1614 
1615 /// When an instruction is found to only use loop invariant operands that
1616 /// is safe to hoist, this instruction is called to do the dirty work.
1617 ///
1618 static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
1619  BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
1621  LLVM_DEBUG(dbgs() << "LICM hoisting to " << Dest->getName() << ": " << I
1622  << "\n");
1623  ORE->emit([&]() {
1624  return OptimizationRemark(DEBUG_TYPE, "Hoisted", &I) << "hoisting "
1625  << ore::NV("Inst", &I);
1626  });
1627 
1628  // Metadata can be dependent on conditions we are hoisting above.
1629  // Conservatively strip all metadata on the instruction unless we were
1630  // guaranteed to execute I if we entered the loop, in which case the metadata
1631  // is valid in the loop preheader.
1632  if (I.hasMetadataOtherThanDebugLoc() &&
1633  // The check on hasMetadataOtherThanDebugLoc is to prevent us from burning
1634  // time in isGuaranteedToExecute if we don't actually have anything to
1635  // drop. It is a compile time optimization, not required for correctness.
1636  !SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop))
1638 
1639  if (isa<PHINode>(I))
1640  // Move the new node to the end of the phi list in the destination block.
1641  moveInstructionBefore(I, *Dest->getFirstNonPHI(), *SafetyInfo, MSSAU);
1642  else
1643  // Move the new node to the destination block, before its terminator.
1644  moveInstructionBefore(I, *Dest->getTerminator(), *SafetyInfo, MSSAU);
1645 
1646  // Do not retain debug locations when we are moving instructions to different
1647  // basic blocks, because we want to avoid jumpy line tables. Calls, however,
1648  // need to retain their debug locs because they may be inlined.
1649  // FIXME: How do we retain source locations without causing poor debugging
1650  // behavior?
1651  if (!isa<CallInst>(I))
1652  I.setDebugLoc(DebugLoc());
1653 
1654  if (isa<LoadInst>(I))
1655  ++NumMovedLoads;
1656  else if (isa<CallInst>(I))
1657  ++NumMovedCalls;
1658  ++NumHoisted;
1659 }
1660 
1661 /// Only sink or hoist an instruction if it is not a trapping instruction,
1662 /// or if the instruction is known not to trap when moved to the preheader.
1663 /// or if it is a trapping instruction and is guaranteed to execute.
1665  const DominatorTree *DT,
1666  const Loop *CurLoop,
1667  const LoopSafetyInfo *SafetyInfo,
1669  const Instruction *CtxI) {
1670  if (isSafeToSpeculativelyExecute(&Inst, CtxI, DT))
1671  return true;
1672 
1673  bool GuaranteedToExecute =
1674  SafetyInfo->isGuaranteedToExecute(Inst, DT, CurLoop);
1675 
1676  if (!GuaranteedToExecute) {
1677  auto *LI = dyn_cast<LoadInst>(&Inst);
1678  if (LI && CurLoop->isLoopInvariant(LI->getPointerOperand()))
1679  ORE->emit([&]() {
1680  return OptimizationRemarkMissed(
1681  DEBUG_TYPE, "LoadWithLoopInvariantAddressCondExecuted", LI)
1682  << "failed to hoist load with loop-invariant address "
1683  "because load is conditionally executed";
1684  });
1685  }
1686 
1687  return GuaranteedToExecute;
1688 }
1689 
1690 namespace {
1691 class LoopPromoter : public LoadAndStorePromoter {
1692  Value *SomePtr; // Designated pointer to store to.
1693  const SmallSetVector<Value *, 8> &PointerMustAliases;
1694  SmallVectorImpl<BasicBlock *> &LoopExitBlocks;
1695  SmallVectorImpl<Instruction *> &LoopInsertPts;
1696  SmallVectorImpl<MemoryAccess *> &MSSAInsertPts;
1697  PredIteratorCache &PredCache;
1698  AliasSetTracker &AST;
1699  MemorySSAUpdater *MSSAU;
1700  LoopInfo &LI;
1701  DebugLoc DL;
1702  int Alignment;
1703  bool UnorderedAtomic;
1704  AAMDNodes AATags;
1705  ICFLoopSafetyInfo &SafetyInfo;
1706 
1707  Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const {
1708  if (Instruction *I = dyn_cast<Instruction>(V))
1709  if (Loop *L = LI.getLoopFor(I->getParent()))
1710  if (!L->contains(BB)) {
1711  // We need to create an LCSSA PHI node for the incoming value and
1712  // store that.
1713  PHINode *PN = PHINode::Create(I->getType(), PredCache.size(BB),
1714  I->getName() + ".lcssa", &BB->front());
1715  for (BasicBlock *Pred : PredCache.get(BB))
1716  PN->addIncoming(I, Pred);
1717  return PN;
1718  }
1719  return V;
1720  }
1721 
1722 public:
1723  LoopPromoter(Value *SP, ArrayRef<const Instruction *> Insts, SSAUpdater &S,
1724  const SmallSetVector<Value *, 8> &PMA,
1728  AliasSetTracker &ast, MemorySSAUpdater *MSSAU, LoopInfo &li,
1729  DebugLoc dl, int alignment, bool UnorderedAtomic,
1730  const AAMDNodes &AATags, ICFLoopSafetyInfo &SafetyInfo)
1731  : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
1732  LoopExitBlocks(LEB), LoopInsertPts(LIP), MSSAInsertPts(MSSAIP),
1733  PredCache(PIC), AST(ast), MSSAU(MSSAU), LI(li), DL(std::move(dl)),
1734  Alignment(alignment), UnorderedAtomic(UnorderedAtomic), AATags(AATags),
1735  SafetyInfo(SafetyInfo) {}
1736 
1737  bool isInstInList(Instruction *I,
1738  const SmallVectorImpl<Instruction *> &) const override {
1739  Value *Ptr;
1740  if (LoadInst *LI = dyn_cast<LoadInst>(I))
1741  Ptr = LI->getOperand(0);
1742  else
1743  Ptr = cast<StoreInst>(I)->getPointerOperand();
1744  return PointerMustAliases.count(Ptr);
1745  }
1746 
1747  void doExtraRewritesBeforeFinalDeletion() override {
1748  // Insert stores after in the loop exit blocks. Each exit block gets a
1749  // store of the live-out values that feed them. Since we've already told
1750  // the SSA updater about the defs in the loop and the preheader
1751  // definition, it is all set and we can start using it.
1752  for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) {
1753  BasicBlock *ExitBlock = LoopExitBlocks[i];
1754  Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
1755  LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock);
1756  Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock);
1757  Instruction *InsertPos = LoopInsertPts[i];
1758  StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos);
1759  if (UnorderedAtomic)
1761  NewSI->setAlignment(Alignment);
1762  NewSI->setDebugLoc(DL);
1763  if (AATags)
1764  NewSI->setAAMetadata(AATags);
1765 
1766  if (MSSAU) {
1767  MemoryAccess *MSSAInsertPoint = MSSAInsertPts[i];
1768  MemoryAccess *NewMemAcc;
1769  if (!MSSAInsertPoint) {
1770  NewMemAcc = MSSAU->createMemoryAccessInBB(
1771  NewSI, nullptr, NewSI->getParent(), MemorySSA::Beginning);
1772  } else {
1773  NewMemAcc =
1774  MSSAU->createMemoryAccessAfter(NewSI, nullptr, MSSAInsertPoint);
1775  }
1776  MSSAInsertPts[i] = NewMemAcc;
1777  MSSAU->insertDef(cast<MemoryDef>(NewMemAcc), true);
1778  // FIXME: true for safety, false may still be correct.
1779  }
1780  }
1781  }
1782 
1783  void replaceLoadWithValue(LoadInst *LI, Value *V) const override {
1784  // Update alias analysis.
1785  AST.copyValue(LI, V);
1786  }
1787  void instructionDeleted(Instruction *I) const override {
1788  SafetyInfo.removeInstruction(I);
1789  AST.deleteValue(I);
1790  if (MSSAU)
1791  MSSAU->removeMemoryAccess(I);
1792  }
1793 };
1794 
1795 
1796 /// Return true iff we can prove that a caller of this function can not inspect
1797 /// the contents of the provided object in a well defined program.
1798 bool isKnownNonEscaping(Value *Object, const TargetLibraryInfo *TLI) {
1799  if (isa<AllocaInst>(Object))
1800  // Since the alloca goes out of scope, we know the caller can't retain a
1801  // reference to it and be well defined. Thus, we don't need to check for
1802  // capture.
1803  return true;
1804 
1805  // For all other objects we need to know that the caller can't possibly
1806  // have gotten a reference to the object. There are two components of
1807  // that:
1808  // 1) Object can't be escaped by this function. This is what
1809  // PointerMayBeCaptured checks.
1810  // 2) Object can't have been captured at definition site. For this, we
1811  // need to know the return value is noalias. At the moment, we use a
1812  // weaker condition and handle only AllocLikeFunctions (which are
1813  // known to be noalias). TODO
1814  return isAllocLikeFn(Object, TLI) &&
1815  !PointerMayBeCaptured(Object, true, true);
1816 }
1817 
1818 } // namespace
1819 
1820 /// Try to promote memory values to scalars by sinking stores out of the
1821 /// loop and moving loads to before the loop. We do this by looping over
1822 /// the stores in the loop, looking for stores to Must pointers which are
1823 /// loop invariant.
1824 ///
1826  const SmallSetVector<Value *, 8> &PointerMustAliases,
1827  SmallVectorImpl<BasicBlock *> &ExitBlocks,
1828  SmallVectorImpl<Instruction *> &InsertPts,
1830  LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
1831  Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
1832  ICFLoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) {
1833  // Verify inputs.
1834  assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
1835  CurAST != nullptr && SafetyInfo != nullptr &&
1836  "Unexpected Input to promoteLoopAccessesToScalars");
1837 
1838  Value *SomePtr = *PointerMustAliases.begin();
1839  BasicBlock *Preheader = CurLoop->getLoopPreheader();
1840 
1841  // It is not safe to promote a load/store from the loop if the load/store is
1842  // conditional. For example, turning:
1843  //
1844  // for () { if (c) *P += 1; }
1845  //
1846  // into:
1847  //
1848  // tmp = *P; for () { if (c) tmp +=1; } *P = tmp;
1849  //
1850  // is not safe, because *P may only be valid to access if 'c' is true.
1851  //
1852  // The safety property divides into two parts:
1853  // p1) The memory may not be dereferenceable on entry to the loop. In this
1854  // case, we can't insert the required load in the preheader.
1855  // p2) The memory model does not allow us to insert a store along any dynamic
1856  // path which did not originally have one.
1857  //
1858  // If at least one store is guaranteed to execute, both properties are
1859  // satisfied, and promotion is legal.
1860  //
1861  // This, however, is not a necessary condition. Even if no store/load is
1862  // guaranteed to execute, we can still establish these properties.
1863  // We can establish (p1) by proving that hoisting the load into the preheader
1864  // is safe (i.e. proving dereferenceability on all paths through the loop). We
1865  // can use any access within the alias set to prove dereferenceability,
1866  // since they're all must alias.
1867  //
1868  // There are two ways establish (p2):
1869  // a) Prove the location is thread-local. In this case the memory model
1870  // requirement does not apply, and stores are safe to insert.
1871  // b) Prove a store dominates every exit block. In this case, if an exit
1872  // blocks is reached, the original dynamic path would have taken us through
1873  // the store, so inserting a store into the exit block is safe. Note that this
1874  // is different from the store being guaranteed to execute. For instance,
1875  // if an exception is thrown on the first iteration of the loop, the original
1876  // store is never executed, but the exit blocks are not executed either.
1877 
1878  bool DereferenceableInPH = false;
1879  bool SafeToInsertStore = false;
1880 
1882 
1883  // We start with an alignment of one and try to find instructions that allow
1884  // us to prove better alignment.
1885  unsigned Alignment = 1;
1886  // Keep track of which types of access we see
1887  bool SawUnorderedAtomic = false;
1888  bool SawNotAtomic = false;
1889  AAMDNodes AATags;
1890 
1891  const DataLayout &MDL = Preheader->getModule()->getDataLayout();
1892 
1893  bool IsKnownThreadLocalObject = false;
1894  if (SafetyInfo->anyBlockMayThrow()) {
1895  // If a loop can throw, we have to insert a store along each unwind edge.
1896  // That said, we can't actually make the unwind edge explicit. Therefore,
1897  // we have to prove that the store is dead along the unwind edge. We do
1898  // this by proving that the caller can't have a reference to the object
1899  // after return and thus can't possibly load from the object.
1900  Value *Object = GetUnderlyingObject(SomePtr, MDL);
1901  if (!isKnownNonEscaping(Object, TLI))
1902  return false;
1903  // Subtlety: Alloca's aren't visible to callers, but *are* potentially
1904  // visible to other threads if captured and used during their lifetimes.
1905  IsKnownThreadLocalObject = !isa<AllocaInst>(Object);
1906  }
1907 
1908  // Check that all of the pointers in the alias set have the same type. We
1909  // cannot (yet) promote a memory location that is loaded and stored in
1910  // different sizes. While we are at it, collect alignment and AA info.
1911  for (Value *ASIV : PointerMustAliases) {
1912  // Check that all of the pointers in the alias set have the same type. We
1913  // cannot (yet) promote a memory location that is loaded and stored in
1914  // different sizes.
1915  if (SomePtr->getType() != ASIV->getType())
1916  return false;
1917 
1918  for (User *U : ASIV->users()) {
1919  // Ignore instructions that are outside the loop.
1920  Instruction *UI = dyn_cast<Instruction>(U);
1921  if (!UI || !CurLoop->contains(UI))
1922  continue;
1923 
1924  // If there is an non-load/store instruction in the loop, we can't promote
1925  // it.
1926  if (LoadInst *Load = dyn_cast<LoadInst>(UI)) {
1927  if (!Load->isUnordered())
1928  return false;
1929 
1930  SawUnorderedAtomic |= Load->isAtomic();
1931  SawNotAtomic |= !Load->isAtomic();
1932 
1933  unsigned InstAlignment = Load->getAlignment();
1934  if (!InstAlignment)
1935  InstAlignment =
1936  MDL.getABITypeAlignment(Load->getType());
1937 
1938  // Note that proving a load safe to speculate requires proving
1939  // sufficient alignment at the target location. Proving it guaranteed
1940  // to execute does as well. Thus we can increase our guaranteed
1941  // alignment as well.
1942  if (!DereferenceableInPH || (InstAlignment > Alignment))
1943  if (isSafeToExecuteUnconditionally(*Load, DT, CurLoop, SafetyInfo,
1944  ORE, Preheader->getTerminator())) {
1945  DereferenceableInPH = true;
1946  Alignment = std::max(Alignment, InstAlignment);
1947  }
1948  } else if (const StoreInst *Store = dyn_cast<StoreInst>(UI)) {
1949  // Stores *of* the pointer are not interesting, only stores *to* the
1950  // pointer.
1951  if (UI->getOperand(1) != ASIV)
1952  continue;
1953  if (!Store->isUnordered())
1954  return false;
1955 
1956  SawUnorderedAtomic |= Store->isAtomic();
1957  SawNotAtomic |= !Store->isAtomic();
1958 
1959  // If the store is guaranteed to execute, both properties are satisfied.
1960  // We may want to check if a store is guaranteed to execute even if we
1961  // already know that promotion is safe, since it may have higher
1962  // alignment than any other guaranteed stores, in which case we can
1963  // raise the alignment on the promoted store.
1964  unsigned InstAlignment = Store->getAlignment();
1965  if (!InstAlignment)
1966  InstAlignment =
1967  MDL.getABITypeAlignment(Store->getValueOperand()->getType());
1968 
1969  if (!DereferenceableInPH || !SafeToInsertStore ||
1970  (InstAlignment > Alignment)) {
1971  if (SafetyInfo->isGuaranteedToExecute(*UI, DT, CurLoop)) {
1972  DereferenceableInPH = true;
1973  SafeToInsertStore = true;
1974  Alignment = std::max(Alignment, InstAlignment);
1975  }
1976  }
1977 
1978  // If a store dominates all exit blocks, it is safe to sink.
1979  // As explained above, if an exit block was executed, a dominating
1980  // store must have been executed at least once, so we are not
1981  // introducing stores on paths that did not have them.
1982  // Note that this only looks at explicit exit blocks. If we ever
1983  // start sinking stores into unwind edges (see above), this will break.
1984  if (!SafeToInsertStore)
1985  SafeToInsertStore = llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) {
1986  return DT->dominates(Store->getParent(), Exit);
1987  });
1988 
1989  // If the store is not guaranteed to execute, we may still get
1990  // deref info through it.
1991  if (!DereferenceableInPH) {
1992  DereferenceableInPH = isDereferenceableAndAlignedPointer(
1993  Store->getPointerOperand(), Store->getAlignment(), MDL,
1994  Preheader->getTerminator(), DT);
1995  }
1996  } else
1997  return false; // Not a load or store.
1998 
1999  // Merge the AA tags.
2000  if (LoopUses.empty()) {
2001  // On the first load/store, just take its AA tags.
2002  UI->getAAMetadata(AATags);
2003  } else if (AATags) {
2004  UI->getAAMetadata(AATags, /* Merge = */ true);
2005  }
2006 
2007  LoopUses.push_back(UI);
2008  }
2009  }
2010 
2011  // If we found both an unordered atomic instruction and a non-atomic memory
2012  // access, bail. We can't blindly promote non-atomic to atomic since we
2013  // might not be able to lower the result. We can't downgrade since that
2014  // would violate memory model. Also, align 0 is an error for atomics.
2015  if (SawUnorderedAtomic && SawNotAtomic)
2016  return false;
2017 
2018  // If we're inserting an atomic load in the preheader, we must be able to
2019  // lower it. We're only guaranteed to be able to lower naturally aligned
2020  // atomics.
2021  auto *SomePtrElemType = SomePtr->getType()->getPointerElementType();
2022  if (SawUnorderedAtomic &&
2023  Alignment < MDL.getTypeStoreSize(SomePtrElemType))
2024  return false;
2025 
2026  // If we couldn't prove we can hoist the load, bail.
2027  if (!DereferenceableInPH)
2028  return false;
2029 
2030  // We know we can hoist the load, but don't have a guaranteed store.
2031  // Check whether the location is thread-local. If it is, then we can insert
2032  // stores along paths which originally didn't have them without violating the
2033  // memory model.
2034  if (!SafeToInsertStore) {
2035  if (IsKnownThreadLocalObject)
2036  SafeToInsertStore = true;
2037  else {
2038  Value *Object = GetUnderlyingObject(SomePtr, MDL);
2039  SafeToInsertStore =
2040  (isAllocLikeFn(Object, TLI) || isa<AllocaInst>(Object)) &&
2041  !PointerMayBeCaptured(Object, true, true);
2042  }
2043  }
2044 
2045  // If we've still failed to prove we can sink the store, give up.
2046  if (!SafeToInsertStore)
2047  return false;
2048 
2049  // Otherwise, this is safe to promote, lets do it!
2050  LLVM_DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " << *SomePtr
2051  << '\n');
2052  ORE->emit([&]() {
2053  return OptimizationRemark(DEBUG_TYPE, "PromoteLoopAccessesToScalar",
2054  LoopUses[0])
2055  << "Moving accesses to memory location out of the loop";
2056  });
2057  ++NumPromoted;
2058 
2059  // Grab a debug location for the inserted loads/stores; given that the
2060  // inserted loads/stores have little relation to the original loads/stores,
2061  // this code just arbitrarily picks a location from one, since any debug
2062  // location is better than none.
2063  DebugLoc DL = LoopUses[0]->getDebugLoc();
2064 
2065  // We use the SSAUpdater interface to insert phi nodes as required.
2067  SSAUpdater SSA(&NewPHIs);
2068  LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
2069  InsertPts, MSSAInsertPts, PIC, *CurAST, MSSAU, *LI, DL,
2070  Alignment, SawUnorderedAtomic, AATags, *SafetyInfo);
2071 
2072  // Set up the preheader to have a definition of the value. It is the live-out
2073  // value from the preheader that uses in the loop will use.
2074  LoadInst *PreheaderLoad = new LoadInst(
2075  SomePtr->getType()->getPointerElementType(), SomePtr,
2076  SomePtr->getName() + ".promoted", Preheader->getTerminator());
2077  if (SawUnorderedAtomic)
2078  PreheaderLoad->setOrdering(AtomicOrdering::Unordered);
2079  PreheaderLoad->setAlignment(Alignment);
2080  PreheaderLoad->setDebugLoc(DL);
2081  if (AATags)
2082  PreheaderLoad->setAAMetadata(AATags);
2083  SSA.AddAvailableValue(Preheader, PreheaderLoad);
2084 
2085  MemoryAccess *PreheaderLoadMemoryAccess;
2086  if (MSSAU) {
2087  PreheaderLoadMemoryAccess = MSSAU->createMemoryAccessInBB(
2088  PreheaderLoad, nullptr, PreheaderLoad->getParent(), MemorySSA::End);
2089  MemoryUse *NewMemUse = cast<MemoryUse>(PreheaderLoadMemoryAccess);
2090  MSSAU->insertUse(NewMemUse);
2091  }
2092 
2093  // Rewrite all the loads in the loop and remember all the definitions from
2094  // stores in the loop.
2095  Promoter.run(LoopUses);
2096 
2097  if (MSSAU && VerifyMemorySSA)
2098  MSSAU->getMemorySSA()->verifyMemorySSA();
2099  // If the SSAUpdater didn't use the load in the preheader, just zap it now.
2100  if (PreheaderLoad->use_empty())
2101  eraseInstruction(*PreheaderLoad, *SafetyInfo, CurAST, MSSAU);
2102 
2103  return true;
2104 }
2105 
2106 /// Returns an owning pointer to an alias set which incorporates aliasing info
2107 /// from L and all subloops of L.
2108 /// FIXME: In new pass manager, there is no helper function to handle loop
2109 /// analysis such as cloneBasicBlockAnalysis, so the AST needs to be recomputed
2110 /// from scratch for every loop. Hook up with the helper functions when
2111 /// available in the new pass manager to avoid redundant computation.
2112 std::unique_ptr<AliasSetTracker>
2113 LoopInvariantCodeMotion::collectAliasInfoForLoop(Loop *L, LoopInfo *LI,
2114  AliasAnalysis *AA) {
2115  std::unique_ptr<AliasSetTracker> CurAST;
2116  SmallVector<Loop *, 4> RecomputeLoops;
2117  for (Loop *InnerL : L->getSubLoops()) {
2118  auto MapI = LoopToAliasSetMap.find(InnerL);
2119  // If the AST for this inner loop is missing it may have been merged into
2120  // some other loop's AST and then that loop unrolled, and so we need to
2121  // recompute it.
2122  if (MapI == LoopToAliasSetMap.end()) {
2123  RecomputeLoops.push_back(InnerL);
2124  continue;
2125  }
2126  std::unique_ptr<AliasSetTracker> InnerAST = std::move(MapI->second);
2127 
2128  if (CurAST) {
2129  // What if InnerLoop was modified by other passes ?
2130  // Once we've incorporated the inner loop's AST into ours, we don't need
2131  // the subloop's anymore.
2132  CurAST->add(*InnerAST);
2133  } else {
2134  CurAST = std::move(InnerAST);
2135  }
2136  LoopToAliasSetMap.erase(MapI);
2137  }
2138  if (!CurAST)
2139  CurAST = make_unique<AliasSetTracker>(*AA);
2140 
2141  // Add everything from the sub loops that are no longer directly available.
2142  for (Loop *InnerL : RecomputeLoops)
2143  for (BasicBlock *BB : InnerL->blocks())
2144  CurAST->add(*BB);
2145 
2146  // And merge in this loop (without anything from inner loops).
2147  for (BasicBlock *BB : L->blocks())
2148  if (LI->getLoopFor(BB) == L)
2149  CurAST->add(*BB);
2150 
2151  return CurAST;
2152 }
2153 
2154 std::unique_ptr<AliasSetTracker>
2155 LoopInvariantCodeMotion::collectAliasInfoForLoopWithMSSA(
2156  Loop *L, AliasAnalysis *AA, MemorySSAUpdater *MSSAU) {
2157  auto *MSSA = MSSAU->getMemorySSA();
2158  auto CurAST = make_unique<AliasSetTracker>(*AA, MSSA, L);
2159  CurAST->addAllInstructionsInLoopUsingMSSA();
2160  return CurAST;
2161 }
2162 
2163 /// Simple analysis hook. Clone alias set info.
2164 ///
2165 void LegacyLICMPass::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To,
2166  Loop *L) {
2167  auto ASTIt = LICM.getLoopToAliasSetMap().find(L);
2168  if (ASTIt == LICM.getLoopToAliasSetMap().end())
2169  return;
2170 
2171  ASTIt->second->copyValue(From, To);
2172 }
2173 
2174 /// Simple Analysis hook. Delete value V from alias set
2175 ///
2176 void LegacyLICMPass::deleteAnalysisValue(Value *V, Loop *L) {
2177  auto ASTIt = LICM.getLoopToAliasSetMap().find(L);
2178  if (ASTIt == LICM.getLoopToAliasSetMap().end())
2179  return;
2180 
2181  ASTIt->second->deleteValue(V);
2182 }
2183 
2184 /// Simple Analysis hook. Delete value L from alias set map.
2185 ///
2186 void LegacyLICMPass::deleteAnalysisLoop(Loop *L) {
2187  if (!LICM.getLoopToAliasSetMap().count(L))
2188  return;
2189 
2190  LICM.getLoopToAliasSetMap().erase(L);
2191 }
2192 
2194  AliasSetTracker *CurAST, Loop *CurLoop,
2195  AliasAnalysis *AA) {
2196  // First check to see if any of the basic blocks in CurLoop invalidate *V.
2197  bool isInvalidatedAccordingToAST = CurAST->getAliasSetFor(MemLoc).isMod();
2198 
2199  if (!isInvalidatedAccordingToAST || !LICMN2Theshold)
2200  return isInvalidatedAccordingToAST;
2201 
2202  // Check with a diagnostic analysis if we can refine the information above.
2203  // This is to identify the limitations of using the AST.
2204  // The alias set mechanism used by LICM has a major weakness in that it
2205  // combines all things which may alias into a single set *before* asking
2206  // modref questions. As a result, a single readonly call within a loop will
2207  // collapse all loads and stores into a single alias set and report
2208  // invalidation if the loop contains any store. For example, readonly calls
2209  // with deopt states have this form and create a general alias set with all
2210  // loads and stores. In order to get any LICM in loops containing possible
2211  // deopt states we need a more precise invalidation of checking the mod ref
2212  // info of each instruction within the loop and LI. This has a complexity of
2213  // O(N^2), so currently, it is used only as a diagnostic tool since the
2214  // default value of LICMN2Threshold is zero.
2215 
2216  // Don't look at nested loops.
2217  if (CurLoop->begin() != CurLoop->end())
2218  return true;
2219 
2220  int N = 0;
2221  for (BasicBlock *BB : CurLoop->getBlocks())
2222  for (Instruction &I : *BB) {
2223  if (N >= LICMN2Theshold) {
2224  LLVM_DEBUG(dbgs() << "Alasing N2 threshold exhausted for "
2225  << *(MemLoc.Ptr) << "\n");
2226  return true;
2227  }
2228  N++;
2229  auto Res = AA->getModRefInfo(&I, MemLoc);
2230  if (isModSet(Res)) {
2231  LLVM_DEBUG(dbgs() << "Aliasing failed on " << I << " for "
2232  << *(MemLoc.Ptr) << "\n");
2233  return true;
2234  }
2235  }
2236  LLVM_DEBUG(dbgs() << "Aliasing okay for " << *(MemLoc.Ptr) << "\n");
2237  return false;
2238 }
2239 
2241  Loop *CurLoop,
2242  int &LicmMssaOptCounter) {
2244  // See declaration of LicmMssaOptCap for usage details.
2245  if (LicmMssaOptCounter >= LicmMssaOptCap)
2246  Source = MU->getDefiningAccess();
2247  else {
2248  Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(MU);
2249  LicmMssaOptCounter++;
2250  }
2251  return !MSSA->isLiveOnEntryDef(Source) &&
2252  CurLoop->contains(Source->getBlock());
2253 }
2254 
2255 /// Little predicate that returns true if the specified basic block is in
2256 /// a subloop of the current one, not the current one itself.
2257 ///
2258 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI) {
2259  assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop");
2260  return LI->getLoopFor(BB) != CurLoop;
2261 }
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:80
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, AliasSetTracker *AST, MemorySSAUpdater *MSSAU)
Definition: LICM.cpp:1386
uint64_t CallInst * C
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:67
bool canSplitPredecessors() const
Definition: BasicBlock.cpp:371
void insertInstructionTo(const Instruction *Inst, const BasicBlock *BB)
Inform the safety info that we are planning to insert a new instruction Inst into the basic block BB...
Definition: MustExecute.cpp:85
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:645
void ReplaceInstWithInst(BasicBlock::InstListType &BIL, BasicBlock::iterator &BI, Instruction *I)
Replace the instruction specified by BI with the instruction specified by I.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
bool promoteLoopAccessesToScalars(const SmallSetVector< Value *, 8 > &, SmallVectorImpl< BasicBlock *> &, SmallVectorImpl< Instruction *> &, SmallVectorImpl< MemoryAccess *> &, PredIteratorCache &, LoopInfo *, DominatorTree *, const TargetLibraryInfo *, Loop *, AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *, OptimizationRemarkEmitter *)
Try to promote memory values to scalars by sinking stores out of the loop and moving loads to before ...
Definition: LICM.cpp:1825
static bool isTriviallyReplaceablePHI(const PHINode &PN, const Instruction &I)
Returns true if a PHINode is a trivially replaceable with an Instruction.
Definition: LICM.cpp:1242
Helper class for SSA formation on a set of values defined in multiple blocks.
Definition: SSAUpdater.h:38
Diagnostic information for missed-optimization remarks.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Value * getPointerOperand(Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
DiagnosticInfoOptimizationBase::Argument NV
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:82
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:769
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:139
This class represents lattice values for constants.
Definition: AllocatorList.h:23
This header provides classes for managing a pipeline of passes over loops in LLVM IR...
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Definition: MemorySSA.h:254
This is the interface for a simple mod/ref and alias analysis over globals.
const_iterator end() const
EltTy front() const
bool hasMetadataOtherThanDebugLoc() const
Return true if this instruction has metadata attached to it other than a debug location.
Definition: Instruction.h:228
void dropUnknownNonDebugMetadata(ArrayRef< unsigned > KnownIDs)
Drop all unknown metadata except for debug locations.
Definition: Metadata.cpp:1198
iterator end()
Definition: Function.h:657
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
static constexpr LocationSize unknown()
bool hasDedicatedExits() const
Return true if no exit block for the loop has a predecessor that is outside the loop.
Definition: LoopInfoImpl.h:85
bool isLCSSAForm(DominatorTree &DT) const
Return true if the Loop is in LCSSA form.
Definition: LoopInfo.cpp:176
const AccessList * getBlockAccesses(const BasicBlock *BB) const
Return the list of MemoryAccess&#39;s for a given basic block.
Definition: MemorySSA.h:755
void AddAvailableValue(BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value...
Definition: SSAUpdater.cpp:71
The main scalar evolution driver.
static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT, LoopInfo *LI, const Loop *CurLoop, LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU)
Definition: LICM.cpp:1444
This class represents a function call, abstracting a target machine&#39;s calling convention.
This file contains the declarations for metadata subclasses.
void setOrdering(AtomicOrdering Ordering)
Sets the ordering constraint of this load instruction.
Definition: Instructions.h:253
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:173
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:629
bool sinkRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *, TargetLibraryInfo *, TargetTransformInfo *, Loop *, AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *, bool, int &, OptimizationRemarkEmitter *)
Walk the specified region of the CFG (defined by all blocks dominated by the specified block...
Definition: LICM.cpp:463
bool salvageDebugInfo(Instruction &I)
Assuming the instruction I is going to be deleted, attempt to salvage debug users of I by writing the...
Definition: Local.cpp:1603
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:709
void insertUse(MemoryUse *Use)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1185
BasicBlock * getSuccessor(unsigned i) const
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop, const TargetTransformInfo *TTI)
Return true if the instruction is free in the loop.
Definition: LICM.cpp:1251
STATISTIC(NumFunctions, "Total number of functions")
A debug info location.
Definition: DebugLoc.h:33
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:230
F(f)
An instruction for reading from memory.
Definition: Instructions.h:167
Hexagon Common GEP
Value * getCondition() const
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:137
iv Induction Variable Users
Definition: IVUsers.cpp:51
void reserve(size_type N)
Definition: SmallVector.h:369
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
Definition: TinyPtrVector.h:30
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:32
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:299
op_iterator op_begin()
Definition: User.h:229
bool hasLoopInvariantOperands(const Instruction *I) const
Return true if all the operands of the specified instruction are loop invariant.
Definition: LoopInfo.cpp:63
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
Definition: LoopInfoImpl.h:703
static bool pointerInvalidatedByLoop(MemoryLocation MemLoc, AliasSetTracker *CurAST, Loop *CurLoop, AliasAnalysis *AA)
Definition: LICM.cpp:2193
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:268
Loop Invariant Code Motion
Definition: LICM.cpp:291
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1155
bool formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
Definition: LCSSA.cpp:383
Represents read-only accesses to memory.
Definition: MemorySSA.h:316
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:47
AnalysisUsage & addRequired()
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:133
ArrayRef< BasicBlock * > get(BasicBlock *BB)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:957
virtual void computeLoopSafetyInfo(const Loop *CurLoop)
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
Definition: MustExecute.cpp:71
This is the interface for a SCEV-based alias analysis.
MemorySSA * getMemorySSA() const
Get handle on MemorySSA.
Type * getPointerElementType() const
Definition: Type.h:375
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:689
This implementation of LoopSafetyInfo use ImplicitControlFlowTracking to give precise answers on "may...
Definition: MustExecute.h:123
bool hasAllowReciprocal() const
Determine whether the allow-reciprocal flag is set.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
virtual bool doFinalization(Module &)
doFinalization - Virtual method overriden by subclasses to do any necessary clean up after all passes...
Definition: Pass.h:109
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:700
uint32_t getTagID() const
Return the tag of this operand bundle as an integer.
Definition: InstrTypes.h:942
const DefsList * getBlockDefs(const BasicBlock *BB) const
Return the list of MemoryDef&#39;s and MemoryPhi&#39;s for a given basic block.
Definition: MemorySSA.h:763
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:285
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:944
bool remove(const value_type &X)
Remove an item from the set vector.
Definition: SetVector.h:157
LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:126
virtual bool anyBlockMayThrow() const
Returns true iff any block of the loop for which this info is contains an instruction that may throw ...
Definition: MustExecute.cpp:67
BlockT * getHeader() const
Definition: LoopInfo.h:99
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:102
FunctionModRefBehavior getModRefBehavior(const CallBase *Call)
Return the behavior of the given call site.
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:266
INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false, false) INITIALIZE_PASS_END(LegacyLICMPass
Instruction * clone() const
Create a copy of &#39;this&#39; instruction that is identical in all ways except the following: ...
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref&#39;ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:717
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
Definition: LoopInfoImpl.h:250
PredIteratorCache - This class is an extremely trivial cache for predecessor iterator queries...
FunctionModRefBehavior
Summary of how a function affects memory in the program.
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:268
Instruction * getUniqueInstruction()
If this alias set is known to contain a single instruction and only a single unique instruction...
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:82
This class represents a no-op cast from one type to another.
Memory SSA
Definition: MemorySSA.cpp:64
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
An instruction for storing to memory.
Definition: Instructions.h:320
static cl::opt< bool > DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false), cl::desc("Disable memory promotion in LICM pass"))
Memory promotion is enabled by default.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:429
void copyColors(BasicBlock *New, BasicBlock *Old)
Copy colors of block Old into the block New.
Definition: MustExecute.cpp:29
iterator begin()
Definition: Function.h:655
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
Value * getOperand(unsigned i) const
Definition: User.h:169
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:210
void forgetLoopDispositions(const Loop *L)
Called when the client has changed the disposition of values in this loop.
Interval::succ_iterator succ_end(Interval *I)
Definition: Interval.h:105
bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal=false)
Checks whether the given location points to constant memory, or if OrLocal is true whether it points ...
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:873
void getAAMetadata(AAMDNodes &N, bool Merge=false) const
Fills the AAMDNodes structure with AA metadata from this instruction.
NodeT * getBlock() const
static Instruction * sinkThroughTriviallyReplaceablePHI(PHINode *TPN, Instruction *I, LoopInfo *LI, SmallDenseMap< BasicBlock *, Instruction *, 32 > &SunkCopies, const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop, MemorySSAUpdater *MSSAU)
Definition: LICM.cpp:1408
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock *> Preds, const char *Suffix, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
virtual bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
static cl::opt< int > LICMN2Theshold("licm-n2-threshold", cl::Hidden, cl::init(0), cl::desc("How many instruction to cross product using AA"))
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:189
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:216
virtual bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const =0
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:321
MemorySSAWalker * getSkipSelfWalker()
Definition: MemorySSA.cpp:1508
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
Definition: SetOperations.h:39
void setAAMetadata(const AAMDNodes &N)
Sets the metadata on this instruction from the AAMDNodes structure.
Definition: Metadata.cpp:1264
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
Conditional or Unconditional Branch instruction.
size_t size(BasicBlock *BB) const
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node&#39;s...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:91
This file contains the declarations for the subclasses of Constant, which represent the different fla...
const Instruction & front() const
Definition: BasicBlock.h:280
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
bool mayHaveSideEffects() const
Return true if the instruction may have side effects.
Definition: Instruction.h:575
Expected to fold away in lowering.
Constant * ConstantFoldInstruction(Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstruction - Try to constant fold the specified instruction.
static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU, Loop *CurLoop, int &LicmMssaOptCounter)
Definition: LICM.cpp:2240
Diagnostic information for applied optimization remarks.
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:112
Pass * createLICMPass()
Definition: LICM.cpp:294
Represent the analysis usage information of a pass.
op_iterator op_end()
Definition: User.h:231
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1192
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition: LICM.cpp:259
AliasSet & getAliasSetFor(const MemoryLocation &MemLoc)
Return the alias set which contains the specified memory location.
Interval::pred_iterator pred_end(Interval *I)
Definition: Interval.h:115
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:99
static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo)
Definition: LICM.cpp:1426
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
auto find_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1213
bool isMod() const
void setAlignment(unsigned Align)
bool onlyReadsMemory(const CallBase *Call)
Checks if the specified call is known to only read from non-volatile memory (or not access memory at ...
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1424
MemoryAccess * createMemoryAccessInBB(Instruction *I, MemoryAccess *Definition, const BasicBlock *BB, MemorySSA::InsertionPlace Point)
Create a MemoryAccess in MemorySSA at a specified point in a block, with a specified clobbering defin...
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT, Loop *CurLoop)
Definition: LICM.cpp:949
size_t size() const
Definition: SmallVector.h:52
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Value * GetUnderlyingObject(Value *V, const DataLayout &DL, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value...
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
This function does not perform any non-local loads or stores to memory.
void moveToPlace(MemoryUseOrDef *What, BasicBlock *BB, MemorySSA::InsertionPlace Where)
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:57
static cl::opt< unsigned > AccessCapForMSSAPromotion("max-acc-licm-promotion", cl::init(250), cl::Hidden, cl::desc("[LICM & MemorySSA] When MSSA in LICM is disabled, this has no " "effect. When MSSA in LICM is enabled, then this is the maximum " "number of accesses allowed to be present in a loop in order to " "enable memory promotion."))
static cl::opt< uint32_t > MaxNumUsesTraversed("licm-max-num-uses-traversed", cl::Hidden, cl::init(8), cl::desc("Max num uses visited for identifying load " "invariance in loop using invariant start (default = 8)"))
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:50
const Value * Ptr
The address of the start of the location.
Representation for a specific memory location.
size_type size() const
Definition: SmallPtrSet.h:92
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:333
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:109
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:297
static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE)
When an instruction is found to only use loop invariant operands that is safe to hoist, this instruction is called to do the dirty work.
Definition: LICM.cpp:1618
Iterator for intrusive lists based on ilist_node.
void removeInstruction(const Instruction *Inst)
Inform safety info that we are planning to remove the instruction Inst from its block.
Definition: MustExecute.cpp:91
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
bool doesNotWriteMemoryBefore(const BasicBlock *BB, const Loop *CurLoop) const
Returns true if we could not execute a memory-modifying instruction before we enter BB under assumpti...
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file. ...
#define DEBUG_TYPE
Definition: LICM.cpp:78
BlockVerifier::State From
static cl::opt< bool > ControlFlowHoisting("licm-control-flow-hoisting", cl::Hidden, cl::init(false), cl::desc("Enable control flow (and PHI) hoisting in LICM"))
void verifyMemorySSA() const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition: MemorySSA.cpp:1792
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false...
Definition: SmallPtrSet.h:377
iterator end()
Definition: BasicBlock.h:270
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:1153
Helper class for promoting a collection of loads and stores into SSA Form using the SSAUpdater...
Definition: SSAUpdater.h:136
void copyValue(Value *From, Value *To)
This method should be used whenever a preexisting value in the program is copied or cloned...
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:248
Provides information about what library functions are available for the current target.
iterator begin() const
Definition: LoopInfo.h:141
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:749
void wireOldPredecessorsToNewImmediatePredecessor(BasicBlock *Old, BasicBlock *New, ArrayRef< BasicBlock *> Preds, bool IdenticalEdgesWereMerged=true)
A new empty BasicBlock (New) now branches directly to Old.
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:643
BasicBlock * getBlock() const
Definition: MemorySSA.h:156
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
bool isConditional() const
static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, TargetTransformInfo *TTI, bool &FreeInLoop)
Return true if the only users of this instruction are outside of the loop.
Definition: LICM.cpp:1279
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:124
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
Definition: Constants.cpp:694
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE, bool FreeInLoop)
When an instruction is found to only be used outside of the loop, this function moves it to the exit ...
Definition: LICM.cpp:1516
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental.guard intrinsic.
Definition: GuardUtils.cpp:17
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:55
Class that has the common methods + fields of memory uses/defs.
Definition: MemorySSA.h:244
A lightweight accessor for an operand bundle meant to be passed around by value.
Definition: InstrTypes.h:914
iterator_range< user_iterator > users()
Definition: Value.h:399
static bool isSafeToExecuteUnconditionally(Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE, const Instruction *CtxI=nullptr)
Only sink or hoist an instruction if it is not a trapping instruction, or if the instruction is known...
Definition: LICM.cpp:1664
bool hoistRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *, TargetLibraryInfo *, Loop *, AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *, bool, int &, OptimizationRemarkEmitter *)
Walk the specified region of the CFG (defined by all blocks dominated by the specified block...
Definition: LICM.cpp:763
static cl::opt< int > LicmMssaOptCap("licm-mssa-optimization-cap", cl::init(100), cl::Hidden, cl::desc("Enable imprecision in LICM in pathological cases, in exchange " "for faster compile. Caps the MemorySSA clobbering calls."))
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:593
LoopT * getParentLoop() const
Definition: LoopInfo.h:100
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
Definition: LoopInfo.h:130
LLVM_NODISCARD bool isModSet(const ModRefInfo MRI)
void setOrdering(AtomicOrdering Ordering)
Sets the ordering constraint of this store instruction.
Definition: Instructions.h:378
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
static bool onlyAccessesArgPointees(FunctionModRefBehavior MRB)
Checks if functions with the specified behavior are known to read and write at most from objects poin...
iterator insert(iterator where, pointer New)
Definition: ilist.h:226
iterator begin() const
Definition: SmallPtrSet.h:396
int getUserCost(const User *U, ArrayRef< const Value *> Operands) const
Estimate the cost of a given IR user when lowered.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:464
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:148
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
unsigned pred_size(const BasicBlock *BB)
Get the number of predecessors of BB.
Definition: CFG.h:121
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:72
user_iterator_impl< User > user_iterator
Definition: Value.h:368
bool isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast=false)
Tests if a value is a call or invoke to a library function that allocates memory (either malloc...
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass&#39;s AnalysisUsage.
Definition: LoopUtils.cpp:136
licm
Definition: LICM.cpp:291
MemoryUseOrDef * createMemoryAccessAfter(Instruction *I, MemoryAccess *Definition, MemoryAccess *InsertPt)
iterator end() const
Definition: LoopInfo.h:142
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:322
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Definition: MemorySSA.h:735
Captures loop safety information.
Definition: MustExecute.h:47
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:171
const_iterator begin() const
Wrapper class to LoopBlocksDFS that provides a standard begin()/end() interface for the DFS reverse p...
Definition: LoopIterator.h:172
SmallVector< DomTreeNode *, 16 > collectChildrenInLoop(DomTreeNode *N, const Loop *CurLoop)
Does a BFS from a given node to all of its children inside a given loop.
Definition: LoopUtils.cpp:432
bool empty() const
Definition: LoopInfo.h:145
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:290
MemoryAccess * getClobberingMemoryAccess(const Instruction *I)
Given a memory Mod/Ref/ModRef&#39;ing instruction, calling this will give you the nearest dominating Memo...
Definition: MemorySSA.h:1017
static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI)
Little predicate that returns true if the specified basic block is in a subloop of the current one...
Definition: LICM.cpp:2258
void removeMemoryAccess(MemoryAccess *, bool OptimizePhis=false)
Remove a MemoryAccess from MemorySSA, including updating all definitions and uses.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
Definition: Value.h:375
bool isSafeToSpeculativelyExecute(const Value *V, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
void insertDef(MemoryDef *Def, bool RenameUses=false)
Insert a definition into the MemorySSA IR.
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction has no side ef...
Definition: Local.cpp:349
LLVM Value Representation.
Definition: Value.h:72
void setAlignment(unsigned Align)
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:444
bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU, bool TargetExecutesOncePerLoop, bool NoOfMemAccessesTooLarge, int *LicmMssaOptCap=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Returns true if is legal to hoist or sink this instruction disregarding the possible introduction of ...
Definition: LICM.cpp:1049
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:86
void initializeLegacyLICMPassPass(PassRegistry &)
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:586
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
The legacy pass manager&#39;s analysis pass to compute loop information.
Definition: LoopInfo.h:969
void getUniqueExitBlocks(SmallVectorImpl< BlockT *> &ExitBlocks) const
Return all unique successor blocks of this loop.
Definition: LoopInfoImpl.h:99
This is the interface for LLVM&#39;s primary stateless and local alias analysis.
A container for analyses that lazily runs them and caches their results.
static Instruction * CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI, const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU)
Definition: LICM.cpp:1311
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:259
bool isDereferenceableAndAlignedPointer(const Value *V, unsigned Align, const DataLayout &DL, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr)
Returns true if V is always a dereferenceable pointer with alignment greater or equal than requested...
Definition: Loads.cpp:128
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopIterator.h:180
ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc)
getModRefInfo (for call sites) - Return information about whether a particular call site modifies or ...
#define LLVM_DEBUG(X)
Definition: Debug.h:122
op_range incoming_values()
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:155
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:173
The optimization diagnostic interface.
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
Definition: BasicBlock.cpp:120
bool use_empty() const
Definition: Value.h:322
unsigned size() const
void deleteValue(Value *PtrVal)
This method is used to remove a pointer value from the AliasSetTracker entirely.
loop versioning Loop Versioning For LICM
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:43
const BasicBlock * getParent() const
Definition: Instruction.h:66
static void moveInstructionBefore(Instruction &I, Instruction &Dest, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater *MSSAU)
Definition: LICM.cpp:1396
cl::opt< bool > EnableMSSALoopDependency
Enables memory ssa as a dependency for loop passes.
bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures, unsigned MaxUsesToExplore=DefaultMaxUsesToExplore)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
user_iterator user_end()
Definition: Value.h:383
bool mayReadOrWriteMemory() const
Return true if this instruction may read or write memory.
Definition: Instruction.h:535
const DenseMap< BasicBlock *, ColorVector > & getBlockColors() const
Returns block colors map that is used to update funclet operand bundles.
Definition: MustExecute.cpp:25