LLVM  10.0.0svn
LICM.cpp
Go to the documentation of this file.
1 //===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass performs loop invariant code motion, attempting to remove as much
10 // code from the body of a loop as possible. It does this by either hoisting
11 // code into the preheader block, or by sinking code to the exit blocks if it is
12 // safe. This pass also promotes must-aliased memory locations in the loop to
13 // live in registers, thus hoisting and sinking "invariant" loads and stores.
14 //
15 // This pass uses alias analysis for two purposes:
16 //
17 // 1. Moving loop invariant loads and calls out of loops. If we can determine
18 // that a load or call inside of a loop never aliases anything stored to,
19 // we can hoist it or sink it like any other instruction.
20 // 2. Scalar Promotion of Memory - If there is a store instruction inside of
21 // the loop, we try to move the store to happen AFTER the loop instead of
22 // inside of the loop. This can only happen if a few conditions are true:
23 // A. The pointer stored through is loop invariant
24 // B. There are no stores or loads in the loop which _may_ alias the
25 // pointer. There are no calls in the loop which mod/ref the pointer.
26 // If these conditions are true, we can promote the loads and stores in the
27 // loop of the pointer to use a temporary alloca'd variable. We then use
28 // the SSAUpdater to construct the appropriate SSA form for the value.
29 //
30 //===----------------------------------------------------------------------===//
31 
33 #include "llvm/ADT/SetOperations.h"
34 #include "llvm/ADT/Statistic.h"
42 #include "llvm/Analysis/Loads.h"
43 #include "llvm/Analysis/LoopInfo.h"
45 #include "llvm/Analysis/LoopPass.h"
54 #include "llvm/IR/CFG.h"
55 #include "llvm/IR/Constants.h"
56 #include "llvm/IR/DataLayout.h"
58 #include "llvm/IR/DerivedTypes.h"
59 #include "llvm/IR/Dominators.h"
60 #include "llvm/IR/Instructions.h"
61 #include "llvm/IR/IntrinsicInst.h"
62 #include "llvm/IR/LLVMContext.h"
63 #include "llvm/IR/Metadata.h"
64 #include "llvm/IR/PatternMatch.h"
67 #include "llvm/Support/Debug.h"
69 #include "llvm/Transforms/Scalar.h"
75 #include <algorithm>
76 #include <utility>
77 using namespace llvm;
78 
79 #define DEBUG_TYPE "licm"
80 
81 STATISTIC(NumCreatedBlocks, "Number of blocks created");
82 STATISTIC(NumClonedBranches, "Number of branches cloned");
83 STATISTIC(NumSunk, "Number of instructions sunk out of loop");
84 STATISTIC(NumHoisted, "Number of instructions hoisted out of loop");
85 STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk");
86 STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk");
87 STATISTIC(NumPromoted, "Number of memory locations promoted to registers");
88 
89 /// Memory promotion is enabled by default.
90 static cl::opt<bool>
91  DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false),
92  cl::desc("Disable memory promotion in LICM pass"));
93 
95  "licm-control-flow-hoisting", cl::Hidden, cl::init(false),
96  cl::desc("Enable control flow (and PHI) hoisting in LICM"));
97 
99  "licm-max-num-uses-traversed", cl::Hidden, cl::init(8),
100  cl::desc("Max num uses visited for identifying load "
101  "invariance in loop using invariant start (default = 8)"));
102 
103 // Default value of zero implies we use the regular alias set tracker mechanism
104 // instead of the cross product using AA to identify aliasing of the memory
105 // location we are interested in.
106 static cl::opt<int>
107 LICMN2Theshold("licm-n2-threshold", cl::Hidden, cl::init(0),
108  cl::desc("How many instruction to cross product using AA"));
109 
110 // Experimental option to allow imprecision in LICM in pathological cases, in
111 // exchange for faster compile. This is to be removed if MemorySSA starts to
112 // address the same issue. This flag applies only when LICM uses MemorySSA
113 // instead on AliasSetTracker. LICM calls MemorySSAWalker's
114 // getClobberingMemoryAccess, up to the value of the Cap, getting perfect
115 // accuracy. Afterwards, LICM will call into MemorySSA's getDefiningAccess,
116 // which may not be precise, since optimizeUses is capped. The result is
117 // correct, but we may not get as "far up" as possible to get which access is
118 // clobbering the one queried.
120  "licm-mssa-optimization-cap", cl::init(100), cl::Hidden,
121  cl::desc("Enable imprecision in LICM in pathological cases, in exchange "
122  "for faster compile. Caps the MemorySSA clobbering calls."));
123 
124 // Experimentally, memory promotion carries less importance than sinking and
125 // hoisting. Limit when we do promotion when using MemorySSA, in order to save
126 // compile time.
128  "licm-mssa-max-acc-promotion", cl::init(250), cl::Hidden,
129  cl::desc("[LICM & MemorySSA] When MSSA in LICM is disabled, this has no "
130  "effect. When MSSA in LICM is enabled, then this is the maximum "
131  "number of accesses allowed to be present in a loop in order to "
132  "enable memory promotion."));
133 
134 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
135 static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
136  const LoopSafetyInfo *SafetyInfo,
137  TargetTransformInfo *TTI, bool &FreeInLoop);
138 static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
139  BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
141 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
142  const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
145  const DominatorTree *DT,
146  const Loop *CurLoop,
147  const LoopSafetyInfo *SafetyInfo,
149  const Instruction *CtxI = nullptr);
150 static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,
151  AliasSetTracker *CurAST, Loop *CurLoop,
152  AliasAnalysis *AA);
154  Loop *CurLoop,
155  SinkAndHoistLICMFlags &Flags);
157  Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
158  const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU);
159 
160 static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
161  AliasSetTracker *AST, MemorySSAUpdater *MSSAU);
162 
163 static void moveInstructionBefore(Instruction &I, Instruction &Dest,
164  ICFLoopSafetyInfo &SafetyInfo,
165  MemorySSAUpdater *MSSAU);
166 
167 namespace {
168 struct LoopInvariantCodeMotion {
170  bool runOnLoop(Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT,
172  ScalarEvolution *SE, MemorySSA *MSSA,
173  OptimizationRemarkEmitter *ORE, bool DeleteAST);
174 
175  ASTrackerMapTy &getLoopToAliasSetMap() { return LoopToAliasSetMap; }
176  LoopInvariantCodeMotion(unsigned LicmMssaOptCap,
177  unsigned LicmMssaNoAccForPromotionCap)
178  : LicmMssaOptCap(LicmMssaOptCap),
179  LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap) {}
180 
181 private:
182  ASTrackerMapTy LoopToAliasSetMap;
183  unsigned LicmMssaOptCap;
184  unsigned LicmMssaNoAccForPromotionCap;
185 
186  std::unique_ptr<AliasSetTracker>
187  collectAliasInfoForLoop(Loop *L, LoopInfo *LI, AliasAnalysis *AA);
188  std::unique_ptr<AliasSetTracker>
189  collectAliasInfoForLoopWithMSSA(Loop *L, AliasAnalysis *AA,
190  MemorySSAUpdater *MSSAU);
191 };
192 
193 struct LegacyLICMPass : public LoopPass {
194  static char ID; // Pass identification, replacement for typeid
195  LegacyLICMPass(
196  unsigned LicmMssaOptCap = SetLicmMssaOptCap,
197  unsigned LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap)
198  : LoopPass(ID), LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap) {
200  }
201 
202  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
203  if (skipLoop(L)) {
204  // If we have run LICM on a previous loop but now we are skipping
205  // (because we've hit the opt-bisect limit), we need to clear the
206  // loop alias information.
207  LICM.getLoopToAliasSetMap().clear();
208  return false;
209  }
210 
211  auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
213  ? (&getAnalysis<MemorySSAWrapperPass>().getMSSA())
214  : nullptr;
215  // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
216  // pass. Function analyses need to be preserved across loop transformations
217  // but ORE cannot be preserved (see comment before the pass definition).
219  return LICM.runOnLoop(L,
220  &getAnalysis<AAResultsWrapperPass>().getAAResults(),
221  &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
222  &getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
223  &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
224  *L->getHeader()->getParent()),
225  &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
226  *L->getHeader()->getParent()),
227  SE ? &SE->getSE() : nullptr, MSSA, &ORE, false);
228  }
229 
230  /// This transformation requires natural loop information & requires that
231  /// loop preheaders be inserted into the CFG...
232  ///
233  void getAnalysisUsage(AnalysisUsage &AU) const override {
240  }
243  }
244 
246 
247  bool doFinalization() override {
248  auto &AliasSetMap = LICM.getLoopToAliasSetMap();
249  // All loops in the AliasSetMap should be cleaned up already. The only case
250  // where we fail to do so is if an outer loop gets deleted before LICM
251  // visits it.
252  assert(all_of(AliasSetMap,
254  return !KV.first->getParentLoop();
255  }) &&
256  "Didn't free loop alias sets");
257  AliasSetMap.clear();
258  return false;
259  }
260 
261 private:
262  LoopInvariantCodeMotion LICM;
263 
264  /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
265  void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To,
266  Loop *L) override;
267 
268  /// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias
269  /// set.
270  void deleteAnalysisValue(Value *V, Loop *L) override;
271 
272  /// Simple Analysis hook. Delete loop L from alias set map.
273  void deleteAnalysisLoop(Loop *L) override;
274 };
275 } // namespace
276 
279  const auto &FAM =
280  AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
281  Function *F = L.getHeader()->getParent();
282 
283  auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F);
284  // FIXME: This should probably be optional rather than required.
285  if (!ORE)
286  report_fatal_error("LICM: OptimizationRemarkEmitterAnalysis not "
287  "cached at a higher level");
288 
289  LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap);
290  if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.TTI, &AR.SE,
291  AR.MSSA, ORE, true))
292  return PreservedAnalyses::all();
293 
294  auto PA = getLoopPassPreservedAnalyses();
295 
296  PA.preserve<DominatorTreeAnalysis>();
297  PA.preserve<LoopAnalysis>();
298  if (AR.MSSA)
299  PA.preserve<MemorySSAAnalysis>();
300 
301  return PA;
302 }
303 
304 char LegacyLICMPass::ID = 0;
305 INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion",
306  false, false)
311 INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false,
312  false)
313 
314 Pass *llvm::createLICMPass() { return new LegacyLICMPass(); }
315 Pass *llvm::createLICMPass(unsigned LicmMssaOptCap,
316  unsigned LicmMssaNoAccForPromotionCap) {
317  return new LegacyLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap);
318 }
319 
320 /// Hoist expressions out of the specified loop. Note, alias info for inner
321 /// loop is not preserved so it is not a good idea to run LICM multiple
322 /// times on one loop.
323 /// We should delete AST for inner loops in the new pass manager to avoid
324 /// memory leak.
325 ///
326 bool LoopInvariantCodeMotion::runOnLoop(
327  Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT,
329  MemorySSA *MSSA, OptimizationRemarkEmitter *ORE, bool DeleteAST) {
330  bool Changed = false;
331 
332  assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
333 
334  // If this loop has metadata indicating that LICM is not to be performed then
335  // just exit.
337  return false;
338  }
339 
340  std::unique_ptr<AliasSetTracker> CurAST;
341  std::unique_ptr<MemorySSAUpdater> MSSAU;
342  bool NoOfMemAccTooLarge = false;
343  unsigned LicmMssaOptCounter = 0;
344 
345  if (!MSSA) {
346  LLVM_DEBUG(dbgs() << "LICM: Using Alias Set Tracker.\n");
347  CurAST = collectAliasInfoForLoop(L, LI, AA);
348  } else {
349  LLVM_DEBUG(dbgs() << "LICM: Using MemorySSA.\n");
350  MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
351 
352  unsigned AccessCapCount = 0;
353  for (auto *BB : L->getBlocks()) {
354  if (auto *Accesses = MSSA->getBlockAccesses(BB)) {
355  for (const auto &MA : *Accesses) {
356  (void)MA;
357  AccessCapCount++;
358  if (AccessCapCount > LicmMssaNoAccForPromotionCap) {
359  NoOfMemAccTooLarge = true;
360  break;
361  }
362  }
363  }
364  if (NoOfMemAccTooLarge)
365  break;
366  }
367  }
368 
369  // Get the preheader block to move instructions into...
370  BasicBlock *Preheader = L->getLoopPreheader();
371 
372  // Compute loop safety information.
373  ICFLoopSafetyInfo SafetyInfo(DT);
374  SafetyInfo.computeLoopSafetyInfo(L);
375 
376  // We want to visit all of the instructions in this loop... that are not parts
377  // of our subloops (they have already had their invariants hoisted out of
378  // their loop, into this loop, so there is no need to process the BODIES of
379  // the subloops).
380  //
381  // Traverse the body of the loop in depth first order on the dominator tree so
382  // that we are guaranteed to see definitions before we see uses. This allows
383  // us to sink instructions in one pass, without iteration. After sinking
384  // instructions, we perform another pass to hoist them out of the loop.
385  SinkAndHoistLICMFlags Flags = {NoOfMemAccTooLarge, LicmMssaOptCounter,
386  LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
387  /*IsSink=*/true};
388  if (L->hasDedicatedExits())
389  Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
390  CurAST.get(), MSSAU.get(), &SafetyInfo, Flags, ORE);
391  Flags.IsSink = false;
392  if (Preheader)
393  Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
394  CurAST.get(), MSSAU.get(), &SafetyInfo, Flags, ORE);
395 
396  // Now that all loop invariants have been removed from the loop, promote any
397  // memory references to scalars that we can.
398  // Don't sink stores from loops without dedicated block exits. Exits
399  // containing indirect branches are not transformed by loop simplify,
400  // make sure we catch that. An additional load may be generated in the
401  // preheader for SSA updater, so also avoid sinking when no preheader
402  // is available.
403  if (!DisablePromotion && Preheader && L->hasDedicatedExits() &&
404  !NoOfMemAccTooLarge) {
405  // Figure out the loop exits and their insertion points
406  SmallVector<BasicBlock *, 8> ExitBlocks;
407  L->getUniqueExitBlocks(ExitBlocks);
408 
409  // We can't insert into a catchswitch.
410  bool HasCatchSwitch = llvm::any_of(ExitBlocks, [](BasicBlock *Exit) {
411  return isa<CatchSwitchInst>(Exit->getTerminator());
412  });
413 
414  if (!HasCatchSwitch) {
416  SmallVector<MemoryAccess *, 8> MSSAInsertPts;
417  InsertPts.reserve(ExitBlocks.size());
418  if (MSSAU)
419  MSSAInsertPts.reserve(ExitBlocks.size());
420  for (BasicBlock *ExitBlock : ExitBlocks) {
421  InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
422  if (MSSAU)
423  MSSAInsertPts.push_back(nullptr);
424  }
425 
426  PredIteratorCache PIC;
427 
428  bool Promoted = false;
429 
430  // Build an AST using MSSA.
431  if (!CurAST.get())
432  CurAST = collectAliasInfoForLoopWithMSSA(L, AA, MSSAU.get());
433 
434  // Loop over all of the alias sets in the tracker object.
435  for (AliasSet &AS : *CurAST) {
436  // We can promote this alias set if it has a store, if it is a "Must"
437  // alias set, if the pointer is loop invariant, and if we are not
438  // eliminating any volatile loads or stores.
439  if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
440  !L->isLoopInvariant(AS.begin()->getValue()))
441  continue;
442 
443  assert(
444  !AS.empty() &&
445  "Must alias set should have at least one pointer element in it!");
446 
447  SmallSetVector<Value *, 8> PointerMustAliases;
448  for (const auto &ASI : AS)
449  PointerMustAliases.insert(ASI.getValue());
450 
451  Promoted |= promoteLoopAccessesToScalars(
452  PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI,
453  DT, TLI, L, CurAST.get(), MSSAU.get(), &SafetyInfo, ORE);
454  }
455 
456  // Once we have promoted values across the loop body we have to
457  // recursively reform LCSSA as any nested loop may now have values defined
458  // within the loop used in the outer loop.
459  // FIXME: This is really heavy handed. It would be a bit better to use an
460  // SSAUpdater strategy during promotion that was LCSSA aware and reformed
461  // it as it went.
462  if (Promoted)
463  formLCSSARecursively(*L, *DT, LI, SE);
464 
465  Changed |= Promoted;
466  }
467  }
468 
469  // Check that neither this loop nor its parent have had LCSSA broken. LICM is
470  // specifically moving instructions across the loop boundary and so it is
471  // especially in need of sanity checking here.
472  assert(L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!");
473  assert((!L->getParentLoop() || L->getParentLoop()->isLCSSAForm(*DT)) &&
474  "Parent loop not left in LCSSA form after LICM!");
475 
476  // If this loop is nested inside of another one, save the alias information
477  // for when we process the outer loop.
478  if (!MSSAU.get() && CurAST.get() && L->getParentLoop() && !DeleteAST)
479  LoopToAliasSetMap[L] = std::move(CurAST);
480 
481  if (MSSAU.get() && VerifyMemorySSA)
482  MSSAU->getMemorySSA()->verifyMemorySSA();
483 
484  if (Changed && SE)
485  SE->forgetLoopDispositions(L);
486  return Changed;
487 }
488 
489 /// Walk the specified region of the CFG (defined by all blocks dominated by
490 /// the specified block, and that are in the current loop) in reverse depth
491 /// first order w.r.t the DominatorTree. This allows us to visit uses before
492 /// definitions, allowing us to sink a loop body in one pass without iteration.
493 ///
496  TargetTransformInfo *TTI, Loop *CurLoop,
497  AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
498  ICFLoopSafetyInfo *SafetyInfo,
499  SinkAndHoistLICMFlags &Flags,
501 
502  // Verify inputs.
503  assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
504  CurLoop != nullptr && SafetyInfo != nullptr &&
505  "Unexpected input to sinkRegion.");
506  assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) &&
507  "Either AliasSetTracker or MemorySSA should be initialized.");
508 
509  // We want to visit children before parents. We will enque all the parents
510  // before their children in the worklist and process the worklist in reverse
511  // order.
513 
514  bool Changed = false;
515  for (DomTreeNode *DTN : reverse(Worklist)) {
516  BasicBlock *BB = DTN->getBlock();
517  // Only need to process the contents of this block if it is not part of a
518  // subloop (which would already have been processed).
519  if (inSubLoop(BB, CurLoop, LI))
520  continue;
521 
522  for (BasicBlock::iterator II = BB->end(); II != BB->begin();) {
523  Instruction &I = *--II;
524 
525  // If the instruction is dead, we would try to sink it because it isn't
526  // used in the loop, instead, just delete it.
527  if (isInstructionTriviallyDead(&I, TLI)) {
528  LLVM_DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n');
529  salvageDebugInfo(I);
530  ++II;
531  eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
532  Changed = true;
533  continue;
534  }
535 
536  // Check to see if we can sink this instruction to the exit blocks
537  // of the loop. We can do this if the all users of the instruction are
538  // outside of the loop. In this case, it doesn't even matter if the
539  // operands of the instruction are loop invariant.
540  //
541  bool FreeInLoop = false;
542  if (isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) &&
543  canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags,
544  ORE) &&
545  !I.mayHaveSideEffects()) {
546  if (sink(I, LI, DT, CurLoop, SafetyInfo, MSSAU, ORE)) {
547  if (!FreeInLoop) {
548  ++II;
549  eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
550  }
551  Changed = true;
552  }
553  }
554  }
555  }
556  if (MSSAU && VerifyMemorySSA)
557  MSSAU->getMemorySSA()->verifyMemorySSA();
558  return Changed;
559 }
560 
561 namespace {
562 // This is a helper class for hoistRegion to make it able to hoist control flow
563 // in order to be able to hoist phis. The way this works is that we initially
564 // start hoisting to the loop preheader, and when we see a loop invariant branch
565 // we make note of this. When we then come to hoist an instruction that's
566 // conditional on such a branch we duplicate the branch and the relevant control
567 // flow, then hoist the instruction into the block corresponding to its original
568 // block in the duplicated control flow.
569 class ControlFlowHoister {
570 private:
571  // Information about the loop we are hoisting from
572  LoopInfo *LI;
573  DominatorTree *DT;
574  Loop *CurLoop;
575  MemorySSAUpdater *MSSAU;
576 
577  // A map of blocks in the loop to the block their instructions will be hoisted
578  // to.
579  DenseMap<BasicBlock *, BasicBlock *> HoistDestinationMap;
580 
581  // The branches that we can hoist, mapped to the block that marks a
582  // convergence point of their control flow.
583  DenseMap<BranchInst *, BasicBlock *> HoistableBranches;
584 
585 public:
586  ControlFlowHoister(LoopInfo *LI, DominatorTree *DT, Loop *CurLoop,
587  MemorySSAUpdater *MSSAU)
588  : LI(LI), DT(DT), CurLoop(CurLoop), MSSAU(MSSAU) {}
589 
590  void registerPossiblyHoistableBranch(BranchInst *BI) {
591  // We can only hoist conditional branches with loop invariant operands.
592  if (!ControlFlowHoisting || !BI->isConditional() ||
593  !CurLoop->hasLoopInvariantOperands(BI))
594  return;
595 
596  // The branch destinations need to be in the loop, and we don't gain
597  // anything by duplicating conditional branches with duplicate successors,
598  // as it's essentially the same as an unconditional branch.
599  BasicBlock *TrueDest = BI->getSuccessor(0);
600  BasicBlock *FalseDest = BI->getSuccessor(1);
601  if (!CurLoop->contains(TrueDest) || !CurLoop->contains(FalseDest) ||
602  TrueDest == FalseDest)
603  return;
604 
605  // We can hoist BI if one branch destination is the successor of the other,
606  // or both have common successor which we check by seeing if the
607  // intersection of their successors is non-empty.
608  // TODO: This could be expanded to allowing branches where both ends
609  // eventually converge to a single block.
610  SmallPtrSet<BasicBlock *, 4> TrueDestSucc, FalseDestSucc;
611  TrueDestSucc.insert(succ_begin(TrueDest), succ_end(TrueDest));
612  FalseDestSucc.insert(succ_begin(FalseDest), succ_end(FalseDest));
613  BasicBlock *CommonSucc = nullptr;
614  if (TrueDestSucc.count(FalseDest)) {
615  CommonSucc = FalseDest;
616  } else if (FalseDestSucc.count(TrueDest)) {
617  CommonSucc = TrueDest;
618  } else {
619  set_intersect(TrueDestSucc, FalseDestSucc);
620  // If there's one common successor use that.
621  if (TrueDestSucc.size() == 1)
622  CommonSucc = *TrueDestSucc.begin();
623  // If there's more than one pick whichever appears first in the block list
624  // (we can't use the value returned by TrueDestSucc.begin() as it's
625  // unpredicatable which element gets returned).
626  else if (!TrueDestSucc.empty()) {
627  Function *F = TrueDest->getParent();
628  auto IsSucc = [&](BasicBlock &BB) { return TrueDestSucc.count(&BB); };
629  auto It = std::find_if(F->begin(), F->end(), IsSucc);
630  assert(It != F->end() && "Could not find successor in function");
631  CommonSucc = &*It;
632  }
633  }
634  // The common successor has to be dominated by the branch, as otherwise
635  // there will be some other path to the successor that will not be
636  // controlled by this branch so any phi we hoist would be controlled by the
637  // wrong condition. This also takes care of avoiding hoisting of loop back
638  // edges.
639  // TODO: In some cases this could be relaxed if the successor is dominated
640  // by another block that's been hoisted and we can guarantee that the
641  // control flow has been replicated exactly.
642  if (CommonSucc && DT->dominates(BI, CommonSucc))
643  HoistableBranches[BI] = CommonSucc;
644  }
645 
646  bool canHoistPHI(PHINode *PN) {
647  // The phi must have loop invariant operands.
648  if (!ControlFlowHoisting || !CurLoop->hasLoopInvariantOperands(PN))
649  return false;
650  // We can hoist phis if the block they are in is the target of hoistable
651  // branches which cover all of the predecessors of the block.
652  SmallPtrSet<BasicBlock *, 8> PredecessorBlocks;
653  BasicBlock *BB = PN->getParent();
654  for (BasicBlock *PredBB : predecessors(BB))
655  PredecessorBlocks.insert(PredBB);
656  // If we have less predecessor blocks than predecessors then the phi will
657  // have more than one incoming value for the same block which we can't
658  // handle.
659  // TODO: This could be handled be erasing some of the duplicate incoming
660  // values.
661  if (PredecessorBlocks.size() != pred_size(BB))
662  return false;
663  for (auto &Pair : HoistableBranches) {
664  if (Pair.second == BB) {
665  // Which blocks are predecessors via this branch depends on if the
666  // branch is triangle-like or diamond-like.
667  if (Pair.first->getSuccessor(0) == BB) {
668  PredecessorBlocks.erase(Pair.first->getParent());
669  PredecessorBlocks.erase(Pair.first->getSuccessor(1));
670  } else if (Pair.first->getSuccessor(1) == BB) {
671  PredecessorBlocks.erase(Pair.first->getParent());
672  PredecessorBlocks.erase(Pair.first->getSuccessor(0));
673  } else {
674  PredecessorBlocks.erase(Pair.first->getSuccessor(0));
675  PredecessorBlocks.erase(Pair.first->getSuccessor(1));
676  }
677  }
678  }
679  // PredecessorBlocks will now be empty if for every predecessor of BB we
680  // found a hoistable branch source.
681  return PredecessorBlocks.empty();
682  }
683 
684  BasicBlock *getOrCreateHoistedBlock(BasicBlock *BB) {
685  if (!ControlFlowHoisting)
686  return CurLoop->getLoopPreheader();
687  // If BB has already been hoisted, return that
688  if (HoistDestinationMap.count(BB))
689  return HoistDestinationMap[BB];
690 
691  // Check if this block is conditional based on a pending branch
692  auto HasBBAsSuccessor =
694  return BB != Pair.second && (Pair.first->getSuccessor(0) == BB ||
695  Pair.first->getSuccessor(1) == BB);
696  };
697  auto It = std::find_if(HoistableBranches.begin(), HoistableBranches.end(),
698  HasBBAsSuccessor);
699 
700  // If not involved in a pending branch, hoist to preheader
701  BasicBlock *InitialPreheader = CurLoop->getLoopPreheader();
702  if (It == HoistableBranches.end()) {
703  LLVM_DEBUG(dbgs() << "LICM using " << InitialPreheader->getName()
704  << " as hoist destination for " << BB->getName()
705  << "\n");
706  HoistDestinationMap[BB] = InitialPreheader;
707  return InitialPreheader;
708  }
709  BranchInst *BI = It->first;
710  assert(std::find_if(++It, HoistableBranches.end(), HasBBAsSuccessor) ==
711  HoistableBranches.end() &&
712  "BB is expected to be the target of at most one branch");
713 
714  LLVMContext &C = BB->getContext();
715  BasicBlock *TrueDest = BI->getSuccessor(0);
716  BasicBlock *FalseDest = BI->getSuccessor(1);
717  BasicBlock *CommonSucc = HoistableBranches[BI];
718  BasicBlock *HoistTarget = getOrCreateHoistedBlock(BI->getParent());
719 
720  // Create hoisted versions of blocks that currently don't have them
721  auto CreateHoistedBlock = [&](BasicBlock *Orig) {
722  if (HoistDestinationMap.count(Orig))
723  return HoistDestinationMap[Orig];
724  BasicBlock *New =
725  BasicBlock::Create(C, Orig->getName() + ".licm", Orig->getParent());
726  HoistDestinationMap[Orig] = New;
727  DT->addNewBlock(New, HoistTarget);
728  if (CurLoop->getParentLoop())
729  CurLoop->getParentLoop()->addBasicBlockToLoop(New, *LI);
730  ++NumCreatedBlocks;
731  LLVM_DEBUG(dbgs() << "LICM created " << New->getName()
732  << " as hoist destination for " << Orig->getName()
733  << "\n");
734  return New;
735  };
736  BasicBlock *HoistTrueDest = CreateHoistedBlock(TrueDest);
737  BasicBlock *HoistFalseDest = CreateHoistedBlock(FalseDest);
738  BasicBlock *HoistCommonSucc = CreateHoistedBlock(CommonSucc);
739 
740  // Link up these blocks with branches.
741  if (!HoistCommonSucc->getTerminator()) {
742  // The new common successor we've generated will branch to whatever that
743  // hoist target branched to.
744  BasicBlock *TargetSucc = HoistTarget->getSingleSuccessor();
745  assert(TargetSucc && "Expected hoist target to have a single successor");
746  HoistCommonSucc->moveBefore(TargetSucc);
747  BranchInst::Create(TargetSucc, HoistCommonSucc);
748  }
749  if (!HoistTrueDest->getTerminator()) {
750  HoistTrueDest->moveBefore(HoistCommonSucc);
751  BranchInst::Create(HoistCommonSucc, HoistTrueDest);
752  }
753  if (!HoistFalseDest->getTerminator()) {
754  HoistFalseDest->moveBefore(HoistCommonSucc);
755  BranchInst::Create(HoistCommonSucc, HoistFalseDest);
756  }
757 
758  // If BI is being cloned to what was originally the preheader then
759  // HoistCommonSucc will now be the new preheader.
760  if (HoistTarget == InitialPreheader) {
761  // Phis in the loop header now need to use the new preheader.
762  InitialPreheader->replaceSuccessorsPhiUsesWith(HoistCommonSucc);
763  if (MSSAU)
765  HoistTarget->getSingleSuccessor(), HoistCommonSucc, {HoistTarget});
766  // The new preheader dominates the loop header.
767  DomTreeNode *PreheaderNode = DT->getNode(HoistCommonSucc);
768  DomTreeNode *HeaderNode = DT->getNode(CurLoop->getHeader());
769  DT->changeImmediateDominator(HeaderNode, PreheaderNode);
770  // The preheader hoist destination is now the new preheader, with the
771  // exception of the hoist destination of this branch.
772  for (auto &Pair : HoistDestinationMap)
773  if (Pair.second == InitialPreheader && Pair.first != BI->getParent())
774  Pair.second = HoistCommonSucc;
775  }
776 
777  // Now finally clone BI.
779  HoistTarget->getTerminator(),
780  BranchInst::Create(HoistTrueDest, HoistFalseDest, BI->getCondition()));
781  ++NumClonedBranches;
782 
783  assert(CurLoop->getLoopPreheader() &&
784  "Hoisting blocks should not have destroyed preheader");
785  return HoistDestinationMap[BB];
786  }
787 };
788 } // namespace
789 
790 /// Walk the specified region of the CFG (defined by all blocks dominated by
791 /// the specified block, and that are in the current loop) in depth first
792 /// order w.r.t the DominatorTree. This allows us to visit definitions before
793 /// uses, allowing us to hoist a loop body in one pass without iteration.
794 ///
796  DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop,
797  AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
798  ICFLoopSafetyInfo *SafetyInfo,
799  SinkAndHoistLICMFlags &Flags,
801  // Verify inputs.
802  assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
803  CurLoop != nullptr && SafetyInfo != nullptr &&
804  "Unexpected input to hoistRegion.");
805  assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) &&
806  "Either AliasSetTracker or MemorySSA should be initialized.");
807 
808  ControlFlowHoister CFH(LI, DT, CurLoop, MSSAU);
809 
810  // Keep track of instructions that have been hoisted, as they may need to be
811  // re-hoisted if they end up not dominating all of their uses.
812  SmallVector<Instruction *, 16> HoistedInstructions;
813 
814  // For PHI hoisting to work we need to hoist blocks before their successors.
815  // We can do this by iterating through the blocks in the loop in reverse
816  // post-order.
817  LoopBlocksRPO Worklist(CurLoop);
818  Worklist.perform(LI);
819  bool Changed = false;
820  for (BasicBlock *BB : Worklist) {
821  // Only need to process the contents of this block if it is not part of a
822  // subloop (which would already have been processed).
823  if (inSubLoop(BB, CurLoop, LI))
824  continue;
825 
826  for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) {
827  Instruction &I = *II++;
828  // Try constant folding this instruction. If all the operands are
829  // constants, it is technically hoistable, but it would be better to
830  // just fold it.
832  &I, I.getModule()->getDataLayout(), TLI)) {
833  LLVM_DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C
834  << '\n');
835  if (CurAST)
836  CurAST->copyValue(&I, C);
837  // FIXME MSSA: Such replacements may make accesses unoptimized (D51960).
839  if (isInstructionTriviallyDead(&I, TLI))
840  eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
841  Changed = true;
842  continue;
843  }
844 
845  // Try hoisting the instruction out to the preheader. We can only do
846  // this if all of the operands of the instruction are loop invariant and
847  // if it is safe to hoist the instruction.
848  // TODO: It may be safe to hoist if we are hoisting to a conditional block
849  // and we have accurately duplicated the control flow from the loop header
850  // to that block.
851  if (CurLoop->hasLoopInvariantOperands(&I) &&
852  canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags,
853  ORE) &&
855  I, DT, CurLoop, SafetyInfo, ORE,
856  CurLoop->getLoopPreheader()->getTerminator())) {
857  hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
858  MSSAU, ORE);
859  HoistedInstructions.push_back(&I);
860  Changed = true;
861  continue;
862  }
863 
864  // Attempt to remove floating point division out of the loop by
865  // converting it to a reciprocal multiplication.
866  if (I.getOpcode() == Instruction::FDiv &&
867  CurLoop->isLoopInvariant(I.getOperand(1)) &&
868  I.hasAllowReciprocal()) {
869  auto Divisor = I.getOperand(1);
870  auto One = llvm::ConstantFP::get(Divisor->getType(), 1.0);
871  auto ReciprocalDivisor = BinaryOperator::CreateFDiv(One, Divisor);
872  ReciprocalDivisor->setFastMathFlags(I.getFastMathFlags());
873  SafetyInfo->insertInstructionTo(ReciprocalDivisor, I.getParent());
874  ReciprocalDivisor->insertBefore(&I);
875 
876  auto Product =
877  BinaryOperator::CreateFMul(I.getOperand(0), ReciprocalDivisor);
878  Product->setFastMathFlags(I.getFastMathFlags());
879  SafetyInfo->insertInstructionTo(Product, I.getParent());
880  Product->insertAfter(&I);
881  I.replaceAllUsesWith(Product);
882  eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
883 
884  hoist(*ReciprocalDivisor, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB),
885  SafetyInfo, MSSAU, ORE);
886  HoistedInstructions.push_back(ReciprocalDivisor);
887  Changed = true;
888  continue;
889  }
890 
891  auto IsInvariantStart = [&](Instruction &I) {
892  using namespace PatternMatch;
893  return I.use_empty() &&
894  match(&I, m_Intrinsic<Intrinsic::invariant_start>());
895  };
896  auto MustExecuteWithoutWritesBefore = [&](Instruction &I) {
897  return SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop) &&
898  SafetyInfo->doesNotWriteMemoryBefore(I, CurLoop);
899  };
900  if ((IsInvariantStart(I) || isGuard(&I)) &&
901  CurLoop->hasLoopInvariantOperands(&I) &&
902  MustExecuteWithoutWritesBefore(I)) {
903  hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
904  MSSAU, ORE);
905  HoistedInstructions.push_back(&I);
906  Changed = true;
907  continue;
908  }
909 
910  if (PHINode *PN = dyn_cast<PHINode>(&I)) {
911  if (CFH.canHoistPHI(PN)) {
912  // Redirect incoming blocks first to ensure that we create hoisted
913  // versions of those blocks before we hoist the phi.
914  for (unsigned int i = 0; i < PN->getNumIncomingValues(); ++i)
915  PN->setIncomingBlock(
916  i, CFH.getOrCreateHoistedBlock(PN->getIncomingBlock(i)));
917  hoist(*PN, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
918  MSSAU, ORE);
919  assert(DT->dominates(PN, BB) && "Conditional PHIs not expected");
920  Changed = true;
921  continue;
922  }
923  }
924 
925  // Remember possibly hoistable branches so we can actually hoist them
926  // later if needed.
927  if (BranchInst *BI = dyn_cast<BranchInst>(&I))
928  CFH.registerPossiblyHoistableBranch(BI);
929  }
930  }
931 
932  // If we hoisted instructions to a conditional block they may not dominate
933  // their uses that weren't hoisted (such as phis where some operands are not
934  // loop invariant). If so make them unconditional by moving them to their
935  // immediate dominator. We iterate through the instructions in reverse order
936  // which ensures that when we rehoist an instruction we rehoist its operands,
937  // and also keep track of where in the block we are rehoisting to to make sure
938  // that we rehoist instructions before the instructions that use them.
939  Instruction *HoistPoint = nullptr;
940  if (ControlFlowHoisting) {
941  for (Instruction *I : reverse(HoistedInstructions)) {
942  if (!llvm::all_of(I->uses(),
943  [&](Use &U) { return DT->dominates(I, U); })) {
944  BasicBlock *Dominator =
945  DT->getNode(I->getParent())->getIDom()->getBlock();
946  if (!HoistPoint || !DT->dominates(HoistPoint->getParent(), Dominator)) {
947  if (HoistPoint)
948  assert(DT->dominates(Dominator, HoistPoint->getParent()) &&
949  "New hoist point expected to dominate old hoist point");
950  HoistPoint = Dominator->getTerminator();
951  }
952  LLVM_DEBUG(dbgs() << "LICM rehoisting to "
953  << HoistPoint->getParent()->getName()
954  << ": " << *I << "\n");
955  moveInstructionBefore(*I, *HoistPoint, *SafetyInfo, MSSAU);
956  HoistPoint = I;
957  Changed = true;
958  }
959  }
960  }
961  if (MSSAU && VerifyMemorySSA)
962  MSSAU->getMemorySSA()->verifyMemorySSA();
963 
964  // Now that we've finished hoisting make sure that LI and DT are still
965  // valid.
966 #ifndef NDEBUG
967  if (Changed) {
969  "Dominator tree verification failed");
970  LI->verify(*DT);
971  }
972 #endif
973 
974  return Changed;
975 }
976 
977 // Return true if LI is invariant within scope of the loop. LI is invariant if
978 // CurLoop is dominated by an invariant.start representing the same memory
979 // location and size as the memory location LI loads from, and also the
980 // invariant.start has no uses.
982  Loop *CurLoop) {
983  Value *Addr = LI->getOperand(0);
984  const DataLayout &DL = LI->getModule()->getDataLayout();
985  const uint32_t LocSizeInBits = DL.getTypeSizeInBits(LI->getType());
986 
987  // if the type is i8 addrspace(x)*, we know this is the type of
988  // llvm.invariant.start operand
989  auto *PtrInt8Ty = PointerType::get(Type::getInt8Ty(LI->getContext()),
990  LI->getPointerAddressSpace());
991  unsigned BitcastsVisited = 0;
992  // Look through bitcasts until we reach the i8* type (this is invariant.start
993  // operand type).
994  while (Addr->getType() != PtrInt8Ty) {
995  auto *BC = dyn_cast<BitCastInst>(Addr);
996  // Avoid traversing high number of bitcast uses.
997  if (++BitcastsVisited > MaxNumUsesTraversed || !BC)
998  return false;
999  Addr = BC->getOperand(0);
1000  }
1001 
1002  unsigned UsesVisited = 0;
1003  // Traverse all uses of the load operand value, to see if invariant.start is
1004  // one of the uses, and whether it dominates the load instruction.
1005  for (auto *U : Addr->users()) {
1006  // Avoid traversing for Load operand with high number of users.
1007  if (++UsesVisited > MaxNumUsesTraversed)
1008  return false;
1010  // If there are escaping uses of invariant.start instruction, the load maybe
1011  // non-invariant.
1012  if (!II || II->getIntrinsicID() != Intrinsic::invariant_start ||
1013  !II->use_empty())
1014  continue;
1015  unsigned InvariantSizeInBits =
1016  cast<ConstantInt>(II->getArgOperand(0))->getSExtValue() * 8;
1017  // Confirm the invariant.start location size contains the load operand size
1018  // in bits. Also, the invariant.start should dominate the load, and we
1019  // should not hoist the load out of a loop that contains this dominating
1020  // invariant.start.
1021  if (LocSizeInBits <= InvariantSizeInBits &&
1022  DT->properlyDominates(II->getParent(), CurLoop->getHeader()))
1023  return true;
1024  }
1025 
1026  return false;
1027 }
1028 
1029 namespace {
1030 /// Return true if-and-only-if we know how to (mechanically) both hoist and
1031 /// sink a given instruction out of a loop. Does not address legality
1032 /// concerns such as aliasing or speculation safety.
1033 bool isHoistableAndSinkableInst(Instruction &I) {
1034  // Only these instructions are hoistable/sinkable.
1035  return (isa<LoadInst>(I) || isa<StoreInst>(I) || isa<CallInst>(I) ||
1036  isa<FenceInst>(I) || isa<CastInst>(I) ||
1037  isa<UnaryOperator>(I) || isa<BinaryOperator>(I) ||
1038  isa<SelectInst>(I) || isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
1039  isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
1040  isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) ||
1041  isa<InsertValueInst>(I));
1042 }
1043 /// Return true if all of the alias sets within this AST are known not to
1044 /// contain a Mod, or if MSSA knows thare are no MemoryDefs in the loop.
1045 bool isReadOnly(AliasSetTracker *CurAST, const MemorySSAUpdater *MSSAU,
1046  const Loop *L) {
1047  if (CurAST) {
1048  for (AliasSet &AS : *CurAST) {
1049  if (!AS.isForwardingAliasSet() && AS.isMod()) {
1050  return false;
1051  }
1052  }
1053  return true;
1054  } else { /*MSSAU*/
1055  for (auto *BB : L->getBlocks())
1056  if (MSSAU->getMemorySSA()->getBlockDefs(BB))
1057  return false;
1058  return true;
1059  }
1060 }
1061 
1062 /// Return true if I is the only Instruction with a MemoryAccess in L.
1063 bool isOnlyMemoryAccess(const Instruction *I, const Loop *L,
1064  const MemorySSAUpdater *MSSAU) {
1065  for (auto *BB : L->getBlocks())
1066  if (auto *Accs = MSSAU->getMemorySSA()->getBlockAccesses(BB)) {
1067  int NotAPhi = 0;
1068  for (const auto &Acc : *Accs) {
1069  if (isa<MemoryPhi>(&Acc))
1070  continue;
1071  const auto *MUD = cast<MemoryUseOrDef>(&Acc);
1072  if (MUD->getMemoryInst() != I || NotAPhi++ == 1)
1073  return false;
1074  }
1075  }
1076  return true;
1077 }
1078 }
1079 
1081  Loop *CurLoop, AliasSetTracker *CurAST,
1082  MemorySSAUpdater *MSSAU,
1083  bool TargetExecutesOncePerLoop,
1084  SinkAndHoistLICMFlags *Flags,
1086  // If we don't understand the instruction, bail early.
1087  if (!isHoistableAndSinkableInst(I))
1088  return false;
1089 
1090  MemorySSA *MSSA = MSSAU ? MSSAU->getMemorySSA() : nullptr;
1091  if (MSSA)
1092  assert(Flags != nullptr && "Flags cannot be null.");
1093 
1094  // Loads have extra constraints we have to verify before we can hoist them.
1095  if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
1096  if (!LI->isUnordered())
1097  return false; // Don't sink/hoist volatile or ordered atomic loads!
1098 
1099  // Loads from constant memory are always safe to move, even if they end up
1100  // in the same alias set as something that ends up being modified.
1101  if (AA->pointsToConstantMemory(LI->getOperand(0)))
1102  return true;
1103  if (LI->hasMetadata(LLVMContext::MD_invariant_load))
1104  return true;
1105 
1106  if (LI->isAtomic() && !TargetExecutesOncePerLoop)
1107  return false; // Don't risk duplicating unordered loads
1108 
1109  // This checks for an invariant.start dominating the load.
1110  if (isLoadInvariantInLoop(LI, DT, CurLoop))
1111  return true;
1112 
1113  bool Invalidated;
1114  if (CurAST)
1115  Invalidated = pointerInvalidatedByLoop(MemoryLocation::get(LI), CurAST,
1116  CurLoop, AA);
1117  else
1118  Invalidated = pointerInvalidatedByLoopWithMSSA(
1119  MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(LI)), CurLoop, *Flags);
1120  // Check loop-invariant address because this may also be a sinkable load
1121  // whose address is not necessarily loop-invariant.
1122  if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand()))
1123  ORE->emit([&]() {
1124  return OptimizationRemarkMissed(
1125  DEBUG_TYPE, "LoadWithLoopInvariantAddressInvalidated", LI)
1126  << "failed to move load with loop-invariant address "
1127  "because the loop may invalidate its value";
1128  });
1129 
1130  return !Invalidated;
1131  } else if (CallInst *CI = dyn_cast<CallInst>(&I)) {
1132  // Don't sink or hoist dbg info; it's legal, but not useful.
1133  if (isa<DbgInfoIntrinsic>(I))
1134  return false;
1135 
1136  // Don't sink calls which can throw.
1137  if (CI->mayThrow())
1138  return false;
1139 
1140  using namespace PatternMatch;
1141  if (match(CI, m_Intrinsic<Intrinsic::assume>()))
1142  // Assumes don't actually alias anything or throw
1143  return true;
1144 
1145  // Handle simple cases by querying alias analysis.
1146  FunctionModRefBehavior Behavior = AA->getModRefBehavior(CI);
1147  if (Behavior == FMRB_DoesNotAccessMemory)
1148  return true;
1149  if (AliasAnalysis::onlyReadsMemory(Behavior)) {
1150  // A readonly argmemonly function only reads from memory pointed to by
1151  // it's arguments with arbitrary offsets. If we can prove there are no
1152  // writes to this memory in the loop, we can hoist or sink.
1154  // TODO: expand to writeable arguments
1155  for (Value *Op : CI->arg_operands())
1156  if (Op->getType()->isPointerTy()) {
1157  bool Invalidated;
1158  if (CurAST)
1159  Invalidated = pointerInvalidatedByLoop(
1161  CurAST, CurLoop, AA);
1162  else
1163  Invalidated = pointerInvalidatedByLoopWithMSSA(
1164  MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(CI)), CurLoop,
1165  *Flags);
1166  if (Invalidated)
1167  return false;
1168  }
1169  return true;
1170  }
1171 
1172  // If this call only reads from memory and there are no writes to memory
1173  // in the loop, we can hoist or sink the call as appropriate.
1174  if (isReadOnly(CurAST, MSSAU, CurLoop))
1175  return true;
1176  }
1177 
1178  // FIXME: This should use mod/ref information to see if we can hoist or
1179  // sink the call.
1180 
1181  return false;
1182  } else if (auto *FI = dyn_cast<FenceInst>(&I)) {
1183  // Fences alias (most) everything to provide ordering. For the moment,
1184  // just give up if there are any other memory operations in the loop.
1185  if (CurAST) {
1186  auto Begin = CurAST->begin();
1187  assert(Begin != CurAST->end() && "must contain FI");
1188  if (std::next(Begin) != CurAST->end())
1189  // constant memory for instance, TODO: handle better
1190  return false;
1191  auto *UniqueI = Begin->getUniqueInstruction();
1192  if (!UniqueI)
1193  // other memory op, give up
1194  return false;
1195  (void)FI; // suppress unused variable warning
1196  assert(UniqueI == FI && "AS must contain FI");
1197  return true;
1198  } else // MSSAU
1199  return isOnlyMemoryAccess(FI, CurLoop, MSSAU);
1200  } else if (auto *SI = dyn_cast<StoreInst>(&I)) {
1201  if (!SI->isUnordered())
1202  return false; // Don't sink/hoist volatile or ordered atomic store!
1203 
1204  // We can only hoist a store that we can prove writes a value which is not
1205  // read or overwritten within the loop. For those cases, we fallback to
1206  // load store promotion instead. TODO: We can extend this to cases where
1207  // there is exactly one write to the location and that write dominates an
1208  // arbitrary number of reads in the loop.
1209  if (CurAST) {
1210  auto &AS = CurAST->getAliasSetFor(MemoryLocation::get(SI));
1211 
1212  if (AS.isRef() || !AS.isMustAlias())
1213  // Quick exit test, handled by the full path below as well.
1214  return false;
1215  auto *UniqueI = AS.getUniqueInstruction();
1216  if (!UniqueI)
1217  // other memory op, give up
1218  return false;
1219  assert(UniqueI == SI && "AS must contain SI");
1220  return true;
1221  } else { // MSSAU
1222  if (isOnlyMemoryAccess(SI, CurLoop, MSSAU))
1223  return true;
1224  // If there are more accesses than the Promotion cap, give up, we're not
1225  // walking a list that long.
1226  if (Flags->NoOfMemAccTooLarge)
1227  return false;
1228  // Check store only if there's still "quota" to check clobber.
1229  if (Flags->LicmMssaOptCounter >= Flags->LicmMssaOptCap)
1230  return false;
1231  // If there are interfering Uses (i.e. their defining access is in the
1232  // loop), or ordered loads (stored as Defs!), don't move this store.
1233  // Could do better here, but this is conservatively correct.
1234  // TODO: Cache set of Uses on the first walk in runOnLoop, update when
1235  // moving accesses. Can also extend to dominating uses.
1236  auto *SIMD = MSSA->getMemoryAccess(SI);
1237  for (auto *BB : CurLoop->getBlocks())
1238  if (auto *Accesses = MSSA->getBlockAccesses(BB)) {
1239  for (const auto &MA : *Accesses)
1240  if (const auto *MU = dyn_cast<MemoryUse>(&MA)) {
1241  auto *MD = MU->getDefiningAccess();
1242  if (!MSSA->isLiveOnEntryDef(MD) &&
1243  CurLoop->contains(MD->getBlock()))
1244  return false;
1245  // Disable hoisting past potentially interfering loads. Optimized
1246  // Uses may point to an access outside the loop, as getClobbering
1247  // checks the previous iteration when walking the backedge.
1248  // FIXME: More precise: no Uses that alias SI.
1249  if (!Flags->IsSink && !MSSA->dominates(SIMD, MU))
1250  return false;
1251  } else if (const auto *MD = dyn_cast<MemoryDef>(&MA))
1252  if (auto *LI = dyn_cast<LoadInst>(MD->getMemoryInst())) {
1253  (void)LI; // Silence warning.
1254  assert(!LI->isUnordered() && "Expected unordered load");
1255  return false;
1256  }
1257  }
1258 
1260  Flags->LicmMssaOptCounter++;
1261  // If there are no clobbering Defs in the loop, store is safe to hoist.
1262  return MSSA->isLiveOnEntryDef(Source) ||
1263  !CurLoop->contains(Source->getBlock());
1264  }
1265  }
1266 
1267  assert(!I.mayReadOrWriteMemory() && "unhandled aliasing");
1268 
1269  // We've established mechanical ability and aliasing, it's up to the caller
1270  // to check fault safety
1271  return true;
1272 }
1273 
1274 /// Returns true if a PHINode is a trivially replaceable with an
1275 /// Instruction.
1276 /// This is true when all incoming values are that instruction.
1277 /// This pattern occurs most often with LCSSA PHI nodes.
1278 ///
1279 static bool isTriviallyReplaceablePHI(const PHINode &PN, const Instruction &I) {
1280  for (const Value *IncValue : PN.incoming_values())
1281  if (IncValue != &I)
1282  return false;
1283 
1284  return true;
1285 }
1286 
1287 /// Return true if the instruction is free in the loop.
1288 static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop,
1289  const TargetTransformInfo *TTI) {
1290 
1291  if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I)) {
1293  return false;
1294  // For a GEP, we cannot simply use getUserCost because currently it
1295  // optimistically assume that a GEP will fold into addressing mode
1296  // regardless of its users.
1297  const BasicBlock *BB = GEP->getParent();
1298  for (const User *U : GEP->users()) {
1299  const Instruction *UI = cast<Instruction>(U);
1300  if (CurLoop->contains(UI) &&
1301  (BB != UI->getParent() ||
1302  (!isa<StoreInst>(UI) && !isa<LoadInst>(UI))))
1303  return false;
1304  }
1305  return true;
1306  } else
1307  return TTI->getUserCost(&I) == TargetTransformInfo::TCC_Free;
1308 }
1309 
1310 /// Return true if the only users of this instruction are outside of
1311 /// the loop. If this is true, we can sink the instruction to the exit
1312 /// blocks of the loop.
1313 ///
1314 /// We also return true if the instruction could be folded away in lowering.
1315 /// (e.g., a GEP can be folded into a load as an addressing mode in the loop).
1316 static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
1317  const LoopSafetyInfo *SafetyInfo,
1318  TargetTransformInfo *TTI, bool &FreeInLoop) {
1319  const auto &BlockColors = SafetyInfo->getBlockColors();
1320  bool IsFree = isFreeInLoop(I, CurLoop, TTI);
1321  for (const User *U : I.users()) {
1322  const Instruction *UI = cast<Instruction>(U);
1323  if (const PHINode *PN = dyn_cast<PHINode>(UI)) {
1324  const BasicBlock *BB = PN->getParent();
1325  // We cannot sink uses in catchswitches.
1326  if (isa<CatchSwitchInst>(BB->getTerminator()))
1327  return false;
1328 
1329  // We need to sink a callsite to a unique funclet. Avoid sinking if the
1330  // phi use is too muddled.
1331  if (isa<CallInst>(I))
1332  if (!BlockColors.empty() &&
1333  BlockColors.find(const_cast<BasicBlock *>(BB))->second.size() != 1)
1334  return false;
1335  }
1336 
1337  if (CurLoop->contains(UI)) {
1338  if (IsFree) {
1339  FreeInLoop = true;
1340  continue;
1341  }
1342  return false;
1343  }
1344  }
1345  return true;
1346 }
1347 
1349  Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
1350  const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU) {
1351  Instruction *New;
1352  if (auto *CI = dyn_cast<CallInst>(&I)) {
1353  const auto &BlockColors = SafetyInfo->getBlockColors();
1354 
1355  // Sinking call-sites need to be handled differently from other
1356  // instructions. The cloned call-site needs a funclet bundle operand
1357  // appropriate for its location in the CFG.
1359  for (unsigned BundleIdx = 0, BundleEnd = CI->getNumOperandBundles();
1360  BundleIdx != BundleEnd; ++BundleIdx) {
1361  OperandBundleUse Bundle = CI->getOperandBundleAt(BundleIdx);
1362  if (Bundle.getTagID() == LLVMContext::OB_funclet)
1363  continue;
1364 
1365  OpBundles.emplace_back(Bundle);
1366  }
1367 
1368  if (!BlockColors.empty()) {
1369  const ColorVector &CV = BlockColors.find(&ExitBlock)->second;
1370  assert(CV.size() == 1 && "non-unique color for exit block!");
1371  BasicBlock *BBColor = CV.front();
1372  Instruction *EHPad = BBColor->getFirstNonPHI();
1373  if (EHPad->isEHPad())
1374  OpBundles.emplace_back("funclet", EHPad);
1375  }
1376 
1377  New = CallInst::Create(CI, OpBundles);
1378  } else {
1379  New = I.clone();
1380  }
1381 
1382  ExitBlock.getInstList().insert(ExitBlock.getFirstInsertionPt(), New);
1383  if (!I.getName().empty())
1384  New->setName(I.getName() + ".le");
1385 
1386  MemoryAccess *OldMemAcc;
1387  if (MSSAU && (OldMemAcc = MSSAU->getMemorySSA()->getMemoryAccess(&I))) {
1388  // Create a new MemoryAccess and let MemorySSA set its defining access.
1389  MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB(
1390  New, nullptr, New->getParent(), MemorySSA::Beginning);
1391  if (NewMemAcc) {
1392  if (auto *MemDef = dyn_cast<MemoryDef>(NewMemAcc))
1393  MSSAU->insertDef(MemDef, /*RenameUses=*/true);
1394  else {
1395  auto *MemUse = cast<MemoryUse>(NewMemAcc);
1396  MSSAU->insertUse(MemUse, /*RenameUses=*/true);
1397  }
1398  }
1399  }
1400 
1401  // Build LCSSA PHI nodes for any in-loop operands. Note that this is
1402  // particularly cheap because we can rip off the PHI node that we're
1403  // replacing for the number and blocks of the predecessors.
1404  // OPT: If this shows up in a profile, we can instead finish sinking all
1405  // invariant instructions, and then walk their operands to re-establish
1406  // LCSSA. That will eliminate creating PHI nodes just to nuke them when
1407  // sinking bottom-up.
1408  for (User::op_iterator OI = New->op_begin(), OE = New->op_end(); OI != OE;
1409  ++OI)
1410  if (Instruction *OInst = dyn_cast<Instruction>(*OI))
1411  if (Loop *OLoop = LI->getLoopFor(OInst->getParent()))
1412  if (!OLoop->contains(&PN)) {
1413  PHINode *OpPN =
1414  PHINode::Create(OInst->getType(), PN.getNumIncomingValues(),
1415  OInst->getName() + ".lcssa", &ExitBlock.front());
1416  for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
1417  OpPN->addIncoming(OInst, PN.getIncomingBlock(i));
1418  *OI = OpPN;
1419  }
1420  return New;
1421 }
1422 
1424  AliasSetTracker *AST, MemorySSAUpdater *MSSAU) {
1425  if (AST)
1426  AST->deleteValue(&I);
1427  if (MSSAU)
1428  MSSAU->removeMemoryAccess(&I);
1429  SafetyInfo.removeInstruction(&I);
1430  I.eraseFromParent();
1431 }
1432 
1434  ICFLoopSafetyInfo &SafetyInfo,
1435  MemorySSAUpdater *MSSAU) {
1436  SafetyInfo.removeInstruction(&I);
1437  SafetyInfo.insertInstructionTo(&I, Dest.getParent());
1438  I.moveBefore(&Dest);
1439  if (MSSAU)
1440  if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
1441  MSSAU->getMemorySSA()->getMemoryAccess(&I)))
1442  MSSAU->moveToPlace(OldMemAcc, Dest.getParent(), MemorySSA::End);
1443 }
1444 
1446  PHINode *TPN, Instruction *I, LoopInfo *LI,
1448  const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop,
1449  MemorySSAUpdater *MSSAU) {
1450  assert(isTriviallyReplaceablePHI(*TPN, *I) &&
1451  "Expect only trivially replaceable PHI");
1452  BasicBlock *ExitBlock = TPN->getParent();
1453  Instruction *New;
1454  auto It = SunkCopies.find(ExitBlock);
1455  if (It != SunkCopies.end())
1456  New = It->second;
1457  else
1458  New = SunkCopies[ExitBlock] = CloneInstructionInExitBlock(
1459  *I, *ExitBlock, *TPN, LI, SafetyInfo, MSSAU);
1460  return New;
1461 }
1462 
1463 static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo) {
1464  BasicBlock *BB = PN->getParent();
1465  if (!BB->canSplitPredecessors())
1466  return false;
1467  // It's not impossible to split EHPad blocks, but if BlockColors already exist
1468  // it require updating BlockColors for all offspring blocks accordingly. By
1469  // skipping such corner case, we can make updating BlockColors after splitting
1470  // predecessor fairly simple.
1471  if (!SafetyInfo->getBlockColors().empty() && BB->getFirstNonPHI()->isEHPad())
1472  return false;
1473  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
1474  BasicBlock *BBPred = *PI;
1475  if (isa<IndirectBrInst>(BBPred->getTerminator()))
1476  return false;
1477  }
1478  return true;
1479 }
1480 
1482  LoopInfo *LI, const Loop *CurLoop,
1483  LoopSafetyInfo *SafetyInfo,
1484  MemorySSAUpdater *MSSAU) {
1485 #ifndef NDEBUG
1486  SmallVector<BasicBlock *, 32> ExitBlocks;
1487  CurLoop->getUniqueExitBlocks(ExitBlocks);
1488  SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),
1489  ExitBlocks.end());
1490 #endif
1491  BasicBlock *ExitBB = PN->getParent();
1492  assert(ExitBlockSet.count(ExitBB) && "Expect the PHI is in an exit block.");
1493 
1494  // Split predecessors of the loop exit to make instructions in the loop are
1495  // exposed to exit blocks through trivially replaceable PHIs while keeping the
1496  // loop in the canonical form where each predecessor of each exit block should
1497  // be contained within the loop. For example, this will convert the loop below
1498  // from
1499  //
1500  // LB1:
1501  // %v1 =
1502  // br %LE, %LB2
1503  // LB2:
1504  // %v2 =
1505  // br %LE, %LB1
1506  // LE:
1507  // %p = phi [%v1, %LB1], [%v2, %LB2] <-- non-trivially replaceable
1508  //
1509  // to
1510  //
1511  // LB1:
1512  // %v1 =
1513  // br %LE.split, %LB2
1514  // LB2:
1515  // %v2 =
1516  // br %LE.split2, %LB1
1517  // LE.split:
1518  // %p1 = phi [%v1, %LB1] <-- trivially replaceable
1519  // br %LE
1520  // LE.split2:
1521  // %p2 = phi [%v2, %LB2] <-- trivially replaceable
1522  // br %LE
1523  // LE:
1524  // %p = phi [%p1, %LE.split], [%p2, %LE.split2]
1525  //
1526  const auto &BlockColors = SafetyInfo->getBlockColors();
1527  SmallSetVector<BasicBlock *, 8> PredBBs(pred_begin(ExitBB), pred_end(ExitBB));
1528  while (!PredBBs.empty()) {
1529  BasicBlock *PredBB = *PredBBs.begin();
1530  assert(CurLoop->contains(PredBB) &&
1531  "Expect all predecessors are in the loop");
1532  if (PN->getBasicBlockIndex(PredBB) >= 0) {
1534  ExitBB, PredBB, ".split.loop.exit", DT, LI, MSSAU, true);
1535  // Since we do not allow splitting EH-block with BlockColors in
1536  // canSplitPredecessors(), we can simply assign predecessor's color to
1537  // the new block.
1538  if (!BlockColors.empty())
1539  // Grab a reference to the ColorVector to be inserted before getting the
1540  // reference to the vector we are copying because inserting the new
1541  // element in BlockColors might cause the map to be reallocated.
1542  SafetyInfo->copyColors(NewPred, PredBB);
1543  }
1544  PredBBs.remove(PredBB);
1545  }
1546 }
1547 
1548 /// When an instruction is found to only be used outside of the loop, this
1549 /// function moves it to the exit blocks and patches up SSA form as needed.
1550 /// This method is guaranteed to remove the original instruction from its
1551 /// position, and may either delete it or move it to outside of the loop.
1552 ///
1553 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
1554  const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
1556  LLVM_DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
1557  ORE->emit([&]() {
1558  return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I)
1559  << "sinking " << ore::NV("Inst", &I);
1560  });
1561  bool Changed = false;
1562  if (isa<LoadInst>(I))
1563  ++NumMovedLoads;
1564  else if (isa<CallInst>(I))
1565  ++NumMovedCalls;
1566  ++NumSunk;
1567 
1568  // Iterate over users to be ready for actual sinking. Replace users via
1569  // unreachable blocks with undef and make all user PHIs trivially replaceable.
1570  SmallPtrSet<Instruction *, 8> VisitedUsers;
1571  for (Value::user_iterator UI = I.user_begin(), UE = I.user_end(); UI != UE;) {
1572  auto *User = cast<Instruction>(*UI);
1573  Use &U = UI.getUse();
1574  ++UI;
1575 
1576  if (VisitedUsers.count(User) || CurLoop->contains(User))
1577  continue;
1578 
1579  if (!DT->isReachableFromEntry(User->getParent())) {
1580  U = UndefValue::get(I.getType());
1581  Changed = true;
1582  continue;
1583  }
1584 
1585  // The user must be a PHI node.
1586  PHINode *PN = cast<PHINode>(User);
1587 
1588  // Surprisingly, instructions can be used outside of loops without any
1589  // exits. This can only happen in PHI nodes if the incoming block is
1590  // unreachable.
1591  BasicBlock *BB = PN->getIncomingBlock(U);
1592  if (!DT->isReachableFromEntry(BB)) {
1593  U = UndefValue::get(I.getType());
1594  Changed = true;
1595  continue;
1596  }
1597 
1598  VisitedUsers.insert(PN);
1599  if (isTriviallyReplaceablePHI(*PN, I))
1600  continue;
1601 
1602  if (!canSplitPredecessors(PN, SafetyInfo))
1603  return Changed;
1604 
1605  // Split predecessors of the PHI so that we can make users trivially
1606  // replaceable.
1607  splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop, SafetyInfo, MSSAU);
1608 
1609  // Should rebuild the iterators, as they may be invalidated by
1610  // splitPredecessorsOfLoopExit().
1611  UI = I.user_begin();
1612  UE = I.user_end();
1613  }
1614 
1615  if (VisitedUsers.empty())
1616  return Changed;
1617 
1618 #ifndef NDEBUG
1619  SmallVector<BasicBlock *, 32> ExitBlocks;
1620  CurLoop->getUniqueExitBlocks(ExitBlocks);
1621  SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),
1622  ExitBlocks.end());
1623 #endif
1624 
1625  // Clones of this instruction. Don't create more than one per exit block!
1627 
1628  // If this instruction is only used outside of the loop, then all users are
1629  // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of
1630  // the instruction.
1632  for (auto *UI : Users) {
1633  auto *User = cast<Instruction>(UI);
1634 
1635  if (CurLoop->contains(User))
1636  continue;
1637 
1638  PHINode *PN = cast<PHINode>(User);
1639  assert(ExitBlockSet.count(PN->getParent()) &&
1640  "The LCSSA PHI is not in an exit block!");
1641  // The PHI must be trivially replaceable.
1643  PN, &I, LI, SunkCopies, SafetyInfo, CurLoop, MSSAU);
1644  PN->replaceAllUsesWith(New);
1645  eraseInstruction(*PN, *SafetyInfo, nullptr, nullptr);
1646  Changed = true;
1647  }
1648  return Changed;
1649 }
1650 
1651 /// When an instruction is found to only use loop invariant operands that
1652 /// is safe to hoist, this instruction is called to do the dirty work.
1653 ///
1654 static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
1655  BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
1657  LLVM_DEBUG(dbgs() << "LICM hoisting to " << Dest->getName() << ": " << I
1658  << "\n");
1659  ORE->emit([&]() {
1660  return OptimizationRemark(DEBUG_TYPE, "Hoisted", &I) << "hoisting "
1661  << ore::NV("Inst", &I);
1662  });
1663 
1664  // Metadata can be dependent on conditions we are hoisting above.
1665  // Conservatively strip all metadata on the instruction unless we were
1666  // guaranteed to execute I if we entered the loop, in which case the metadata
1667  // is valid in the loop preheader.
1668  if (I.hasMetadataOtherThanDebugLoc() &&
1669  // The check on hasMetadataOtherThanDebugLoc is to prevent us from burning
1670  // time in isGuaranteedToExecute if we don't actually have anything to
1671  // drop. It is a compile time optimization, not required for correctness.
1672  !SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop))
1674 
1675  if (isa<PHINode>(I))
1676  // Move the new node to the end of the phi list in the destination block.
1677  moveInstructionBefore(I, *Dest->getFirstNonPHI(), *SafetyInfo, MSSAU);
1678  else
1679  // Move the new node to the destination block, before its terminator.
1680  moveInstructionBefore(I, *Dest->getTerminator(), *SafetyInfo, MSSAU);
1681 
1682  // Apply line 0 debug locations when we are moving instructions to different
1683  // basic blocks because we want to avoid jumpy line tables.
1684  if (const DebugLoc &DL = I.getDebugLoc())
1685  I.setDebugLoc(DebugLoc::get(0, 0, DL.getScope(), DL.getInlinedAt()));
1686 
1687  if (isa<LoadInst>(I))
1688  ++NumMovedLoads;
1689  else if (isa<CallInst>(I))
1690  ++NumMovedCalls;
1691  ++NumHoisted;
1692 }
1693 
1694 /// Only sink or hoist an instruction if it is not a trapping instruction,
1695 /// or if the instruction is known not to trap when moved to the preheader.
1696 /// or if it is a trapping instruction and is guaranteed to execute.
1698  const DominatorTree *DT,
1699  const Loop *CurLoop,
1700  const LoopSafetyInfo *SafetyInfo,
1702  const Instruction *CtxI) {
1703  if (isSafeToSpeculativelyExecute(&Inst, CtxI, DT))
1704  return true;
1705 
1706  bool GuaranteedToExecute =
1707  SafetyInfo->isGuaranteedToExecute(Inst, DT, CurLoop);
1708 
1709  if (!GuaranteedToExecute) {
1710  auto *LI = dyn_cast<LoadInst>(&Inst);
1711  if (LI && CurLoop->isLoopInvariant(LI->getPointerOperand()))
1712  ORE->emit([&]() {
1713  return OptimizationRemarkMissed(
1714  DEBUG_TYPE, "LoadWithLoopInvariantAddressCondExecuted", LI)
1715  << "failed to hoist load with loop-invariant address "
1716  "because load is conditionally executed";
1717  });
1718  }
1719 
1720  return GuaranteedToExecute;
1721 }
1722 
1723 namespace {
1724 class LoopPromoter : public LoadAndStorePromoter {
1725  Value *SomePtr; // Designated pointer to store to.
1726  const SmallSetVector<Value *, 8> &PointerMustAliases;
1727  SmallVectorImpl<BasicBlock *> &LoopExitBlocks;
1728  SmallVectorImpl<Instruction *> &LoopInsertPts;
1729  SmallVectorImpl<MemoryAccess *> &MSSAInsertPts;
1730  PredIteratorCache &PredCache;
1731  AliasSetTracker &AST;
1732  MemorySSAUpdater *MSSAU;
1733  LoopInfo &LI;
1734  DebugLoc DL;
1735  int Alignment;
1736  bool UnorderedAtomic;
1737  AAMDNodes AATags;
1738  ICFLoopSafetyInfo &SafetyInfo;
1739 
1740  Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const {
1741  if (Instruction *I = dyn_cast<Instruction>(V))
1742  if (Loop *L = LI.getLoopFor(I->getParent()))
1743  if (!L->contains(BB)) {
1744  // We need to create an LCSSA PHI node for the incoming value and
1745  // store that.
1746  PHINode *PN = PHINode::Create(I->getType(), PredCache.size(BB),
1747  I->getName() + ".lcssa", &BB->front());
1748  for (BasicBlock *Pred : PredCache.get(BB))
1749  PN->addIncoming(I, Pred);
1750  return PN;
1751  }
1752  return V;
1753  }
1754 
1755 public:
1756  LoopPromoter(Value *SP, ArrayRef<const Instruction *> Insts, SSAUpdater &S,
1757  const SmallSetVector<Value *, 8> &PMA,
1761  AliasSetTracker &ast, MemorySSAUpdater *MSSAU, LoopInfo &li,
1762  DebugLoc dl, int alignment, bool UnorderedAtomic,
1763  const AAMDNodes &AATags, ICFLoopSafetyInfo &SafetyInfo)
1764  : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
1765  LoopExitBlocks(LEB), LoopInsertPts(LIP), MSSAInsertPts(MSSAIP),
1766  PredCache(PIC), AST(ast), MSSAU(MSSAU), LI(li), DL(std::move(dl)),
1767  Alignment(alignment), UnorderedAtomic(UnorderedAtomic), AATags(AATags),
1768  SafetyInfo(SafetyInfo) {}
1769 
1770  bool isInstInList(Instruction *I,
1771  const SmallVectorImpl<Instruction *> &) const override {
1772  Value *Ptr;
1773  if (LoadInst *LI = dyn_cast<LoadInst>(I))
1774  Ptr = LI->getOperand(0);
1775  else
1776  Ptr = cast<StoreInst>(I)->getPointerOperand();
1777  return PointerMustAliases.count(Ptr);
1778  }
1779 
1780  void doExtraRewritesBeforeFinalDeletion() override {
1781  // Insert stores after in the loop exit blocks. Each exit block gets a
1782  // store of the live-out values that feed them. Since we've already told
1783  // the SSA updater about the defs in the loop and the preheader
1784  // definition, it is all set and we can start using it.
1785  for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) {
1786  BasicBlock *ExitBlock = LoopExitBlocks[i];
1787  Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
1788  LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock);
1789  Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock);
1790  Instruction *InsertPos = LoopInsertPts[i];
1791  StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos);
1792  if (UnorderedAtomic)
1794  NewSI->setAlignment(Alignment);
1795  NewSI->setDebugLoc(DL);
1796  if (AATags)
1797  NewSI->setAAMetadata(AATags);
1798 
1799  if (MSSAU) {
1800  MemoryAccess *MSSAInsertPoint = MSSAInsertPts[i];
1801  MemoryAccess *NewMemAcc;
1802  if (!MSSAInsertPoint) {
1803  NewMemAcc = MSSAU->createMemoryAccessInBB(
1804  NewSI, nullptr, NewSI->getParent(), MemorySSA::Beginning);
1805  } else {
1806  NewMemAcc =
1807  MSSAU->createMemoryAccessAfter(NewSI, nullptr, MSSAInsertPoint);
1808  }
1809  MSSAInsertPts[i] = NewMemAcc;
1810  MSSAU->insertDef(cast<MemoryDef>(NewMemAcc), true);
1811  // FIXME: true for safety, false may still be correct.
1812  }
1813  }
1814  }
1815 
1816  void replaceLoadWithValue(LoadInst *LI, Value *V) const override {
1817  // Update alias analysis.
1818  AST.copyValue(LI, V);
1819  }
1820  void instructionDeleted(Instruction *I) const override {
1821  SafetyInfo.removeInstruction(I);
1822  AST.deleteValue(I);
1823  if (MSSAU)
1824  MSSAU->removeMemoryAccess(I);
1825  }
1826 };
1827 
1828 
1829 /// Return true iff we can prove that a caller of this function can not inspect
1830 /// the contents of the provided object in a well defined program.
1831 bool isKnownNonEscaping(Value *Object, const TargetLibraryInfo *TLI) {
1832  if (isa<AllocaInst>(Object))
1833  // Since the alloca goes out of scope, we know the caller can't retain a
1834  // reference to it and be well defined. Thus, we don't need to check for
1835  // capture.
1836  return true;
1837 
1838  // For all other objects we need to know that the caller can't possibly
1839  // have gotten a reference to the object. There are two components of
1840  // that:
1841  // 1) Object can't be escaped by this function. This is what
1842  // PointerMayBeCaptured checks.
1843  // 2) Object can't have been captured at definition site. For this, we
1844  // need to know the return value is noalias. At the moment, we use a
1845  // weaker condition and handle only AllocLikeFunctions (which are
1846  // known to be noalias). TODO
1847  return isAllocLikeFn(Object, TLI) &&
1848  !PointerMayBeCaptured(Object, true, true);
1849 }
1850 
1851 } // namespace
1852 
1853 /// Try to promote memory values to scalars by sinking stores out of the
1854 /// loop and moving loads to before the loop. We do this by looping over
1855 /// the stores in the loop, looking for stores to Must pointers which are
1856 /// loop invariant.
1857 ///
1859  const SmallSetVector<Value *, 8> &PointerMustAliases,
1860  SmallVectorImpl<BasicBlock *> &ExitBlocks,
1861  SmallVectorImpl<Instruction *> &InsertPts,
1863  LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
1864  Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
1865  ICFLoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) {
1866  // Verify inputs.
1867  assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
1868  CurAST != nullptr && SafetyInfo != nullptr &&
1869  "Unexpected Input to promoteLoopAccessesToScalars");
1870 
1871  Value *SomePtr = *PointerMustAliases.begin();
1872  BasicBlock *Preheader = CurLoop->getLoopPreheader();
1873 
1874  // It is not safe to promote a load/store from the loop if the load/store is
1875  // conditional. For example, turning:
1876  //
1877  // for () { if (c) *P += 1; }
1878  //
1879  // into:
1880  //
1881  // tmp = *P; for () { if (c) tmp +=1; } *P = tmp;
1882  //
1883  // is not safe, because *P may only be valid to access if 'c' is true.
1884  //
1885  // The safety property divides into two parts:
1886  // p1) The memory may not be dereferenceable on entry to the loop. In this
1887  // case, we can't insert the required load in the preheader.
1888  // p2) The memory model does not allow us to insert a store along any dynamic
1889  // path which did not originally have one.
1890  //
1891  // If at least one store is guaranteed to execute, both properties are
1892  // satisfied, and promotion is legal.
1893  //
1894  // This, however, is not a necessary condition. Even if no store/load is
1895  // guaranteed to execute, we can still establish these properties.
1896  // We can establish (p1) by proving that hoisting the load into the preheader
1897  // is safe (i.e. proving dereferenceability on all paths through the loop). We
1898  // can use any access within the alias set to prove dereferenceability,
1899  // since they're all must alias.
1900  //
1901  // There are two ways establish (p2):
1902  // a) Prove the location is thread-local. In this case the memory model
1903  // requirement does not apply, and stores are safe to insert.
1904  // b) Prove a store dominates every exit block. In this case, if an exit
1905  // blocks is reached, the original dynamic path would have taken us through
1906  // the store, so inserting a store into the exit block is safe. Note that this
1907  // is different from the store being guaranteed to execute. For instance,
1908  // if an exception is thrown on the first iteration of the loop, the original
1909  // store is never executed, but the exit blocks are not executed either.
1910 
1911  bool DereferenceableInPH = false;
1912  bool SafeToInsertStore = false;
1913 
1915 
1916  // We start with an alignment of one and try to find instructions that allow
1917  // us to prove better alignment.
1918  unsigned Alignment = 1;
1919  // Keep track of which types of access we see
1920  bool SawUnorderedAtomic = false;
1921  bool SawNotAtomic = false;
1922  AAMDNodes AATags;
1923 
1924  const DataLayout &MDL = Preheader->getModule()->getDataLayout();
1925 
1926  bool IsKnownThreadLocalObject = false;
1927  if (SafetyInfo->anyBlockMayThrow()) {
1928  // If a loop can throw, we have to insert a store along each unwind edge.
1929  // That said, we can't actually make the unwind edge explicit. Therefore,
1930  // we have to prove that the store is dead along the unwind edge. We do
1931  // this by proving that the caller can't have a reference to the object
1932  // after return and thus can't possibly load from the object.
1933  Value *Object = GetUnderlyingObject(SomePtr, MDL);
1934  if (!isKnownNonEscaping(Object, TLI))
1935  return false;
1936  // Subtlety: Alloca's aren't visible to callers, but *are* potentially
1937  // visible to other threads if captured and used during their lifetimes.
1938  IsKnownThreadLocalObject = !isa<AllocaInst>(Object);
1939  }
1940 
1941  // Check that all of the pointers in the alias set have the same type. We
1942  // cannot (yet) promote a memory location that is loaded and stored in
1943  // different sizes. While we are at it, collect alignment and AA info.
1944  for (Value *ASIV : PointerMustAliases) {
1945  // Check that all of the pointers in the alias set have the same type. We
1946  // cannot (yet) promote a memory location that is loaded and stored in
1947  // different sizes.
1948  if (SomePtr->getType() != ASIV->getType())
1949  return false;
1950 
1951  for (User *U : ASIV->users()) {
1952  // Ignore instructions that are outside the loop.
1953  Instruction *UI = dyn_cast<Instruction>(U);
1954  if (!UI || !CurLoop->contains(UI))
1955  continue;
1956 
1957  // If there is an non-load/store instruction in the loop, we can't promote
1958  // it.
1959  if (LoadInst *Load = dyn_cast<LoadInst>(UI)) {
1960  if (!Load->isUnordered())
1961  return false;
1962 
1963  SawUnorderedAtomic |= Load->isAtomic();
1964  SawNotAtomic |= !Load->isAtomic();
1965 
1966  unsigned InstAlignment = Load->getAlignment();
1967  if (!InstAlignment)
1968  InstAlignment =
1969  MDL.getABITypeAlignment(Load->getType());
1970 
1971  // Note that proving a load safe to speculate requires proving
1972  // sufficient alignment at the target location. Proving it guaranteed
1973  // to execute does as well. Thus we can increase our guaranteed
1974  // alignment as well.
1975  if (!DereferenceableInPH || (InstAlignment > Alignment))
1976  if (isSafeToExecuteUnconditionally(*Load, DT, CurLoop, SafetyInfo,
1977  ORE, Preheader->getTerminator())) {
1978  DereferenceableInPH = true;
1979  Alignment = std::max(Alignment, InstAlignment);
1980  }
1981  } else if (const StoreInst *Store = dyn_cast<StoreInst>(UI)) {
1982  // Stores *of* the pointer are not interesting, only stores *to* the
1983  // pointer.
1984  if (UI->getOperand(1) != ASIV)
1985  continue;
1986  if (!Store->isUnordered())
1987  return false;
1988 
1989  SawUnorderedAtomic |= Store->isAtomic();
1990  SawNotAtomic |= !Store->isAtomic();
1991 
1992  // If the store is guaranteed to execute, both properties are satisfied.
1993  // We may want to check if a store is guaranteed to execute even if we
1994  // already know that promotion is safe, since it may have higher
1995  // alignment than any other guaranteed stores, in which case we can
1996  // raise the alignment on the promoted store.
1997  unsigned InstAlignment = Store->getAlignment();
1998  if (!InstAlignment)
1999  InstAlignment =
2000  MDL.getABITypeAlignment(Store->getValueOperand()->getType());
2001 
2002  if (!DereferenceableInPH || !SafeToInsertStore ||
2003  (InstAlignment > Alignment)) {
2004  if (SafetyInfo->isGuaranteedToExecute(*UI, DT, CurLoop)) {
2005  DereferenceableInPH = true;
2006  SafeToInsertStore = true;
2007  Alignment = std::max(Alignment, InstAlignment);
2008  }
2009  }
2010 
2011  // If a store dominates all exit blocks, it is safe to sink.
2012  // As explained above, if an exit block was executed, a dominating
2013  // store must have been executed at least once, so we are not
2014  // introducing stores on paths that did not have them.
2015  // Note that this only looks at explicit exit blocks. If we ever
2016  // start sinking stores into unwind edges (see above), this will break.
2017  if (!SafeToInsertStore)
2018  SafeToInsertStore = llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) {
2019  return DT->dominates(Store->getParent(), Exit);
2020  });
2021 
2022  // If the store is not guaranteed to execute, we may still get
2023  // deref info through it.
2024  if (!DereferenceableInPH) {
2025  DereferenceableInPH = isDereferenceableAndAlignedPointer(
2026  Store->getPointerOperand(), Store->getValueOperand()->getType(),
2027  Store->getAlignment(), MDL, Preheader->getTerminator(), DT);
2028  }
2029  } else
2030  return false; // Not a load or store.
2031 
2032  // Merge the AA tags.
2033  if (LoopUses.empty()) {
2034  // On the first load/store, just take its AA tags.
2035  UI->getAAMetadata(AATags);
2036  } else if (AATags) {
2037  UI->getAAMetadata(AATags, /* Merge = */ true);
2038  }
2039 
2040  LoopUses.push_back(UI);
2041  }
2042  }
2043 
2044  // If we found both an unordered atomic instruction and a non-atomic memory
2045  // access, bail. We can't blindly promote non-atomic to atomic since we
2046  // might not be able to lower the result. We can't downgrade since that
2047  // would violate memory model. Also, align 0 is an error for atomics.
2048  if (SawUnorderedAtomic && SawNotAtomic)
2049  return false;
2050 
2051  // If we're inserting an atomic load in the preheader, we must be able to
2052  // lower it. We're only guaranteed to be able to lower naturally aligned
2053  // atomics.
2054  auto *SomePtrElemType = SomePtr->getType()->getPointerElementType();
2055  if (SawUnorderedAtomic &&
2056  Alignment < MDL.getTypeStoreSize(SomePtrElemType))
2057  return false;
2058 
2059  // If we couldn't prove we can hoist the load, bail.
2060  if (!DereferenceableInPH)
2061  return false;
2062 
2063  // We know we can hoist the load, but don't have a guaranteed store.
2064  // Check whether the location is thread-local. If it is, then we can insert
2065  // stores along paths which originally didn't have them without violating the
2066  // memory model.
2067  if (!SafeToInsertStore) {
2068  if (IsKnownThreadLocalObject)
2069  SafeToInsertStore = true;
2070  else {
2071  Value *Object = GetUnderlyingObject(SomePtr, MDL);
2072  SafeToInsertStore =
2073  (isAllocLikeFn(Object, TLI) || isa<AllocaInst>(Object)) &&
2074  !PointerMayBeCaptured(Object, true, true);
2075  }
2076  }
2077 
2078  // If we've still failed to prove we can sink the store, give up.
2079  if (!SafeToInsertStore)
2080  return false;
2081 
2082  // Otherwise, this is safe to promote, lets do it!
2083  LLVM_DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " << *SomePtr
2084  << '\n');
2085  ORE->emit([&]() {
2086  return OptimizationRemark(DEBUG_TYPE, "PromoteLoopAccessesToScalar",
2087  LoopUses[0])
2088  << "Moving accesses to memory location out of the loop";
2089  });
2090  ++NumPromoted;
2091 
2092  // Grab a debug location for the inserted loads/stores; given that the
2093  // inserted loads/stores have little relation to the original loads/stores,
2094  // this code just arbitrarily picks a location from one, since any debug
2095  // location is better than none.
2096  DebugLoc DL = LoopUses[0]->getDebugLoc();
2097 
2098  // We use the SSAUpdater interface to insert phi nodes as required.
2100  SSAUpdater SSA(&NewPHIs);
2101  LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
2102  InsertPts, MSSAInsertPts, PIC, *CurAST, MSSAU, *LI, DL,
2103  Alignment, SawUnorderedAtomic, AATags, *SafetyInfo);
2104 
2105  // Set up the preheader to have a definition of the value. It is the live-out
2106  // value from the preheader that uses in the loop will use.
2107  LoadInst *PreheaderLoad = new LoadInst(
2108  SomePtr->getType()->getPointerElementType(), SomePtr,
2109  SomePtr->getName() + ".promoted", Preheader->getTerminator());
2110  if (SawUnorderedAtomic)
2111  PreheaderLoad->setOrdering(AtomicOrdering::Unordered);
2112  PreheaderLoad->setAlignment(Alignment);
2113  PreheaderLoad->setDebugLoc(DL);
2114  if (AATags)
2115  PreheaderLoad->setAAMetadata(AATags);
2116  SSA.AddAvailableValue(Preheader, PreheaderLoad);
2117 
2118  MemoryAccess *PreheaderLoadMemoryAccess;
2119  if (MSSAU) {
2120  PreheaderLoadMemoryAccess = MSSAU->createMemoryAccessInBB(
2121  PreheaderLoad, nullptr, PreheaderLoad->getParent(), MemorySSA::End);
2122  MemoryUse *NewMemUse = cast<MemoryUse>(PreheaderLoadMemoryAccess);
2123  MSSAU->insertUse(NewMemUse, /*RenameUses=*/true);
2124  }
2125 
2126  if (MSSAU && VerifyMemorySSA)
2127  MSSAU->getMemorySSA()->verifyMemorySSA();
2128  // Rewrite all the loads in the loop and remember all the definitions from
2129  // stores in the loop.
2130  Promoter.run(LoopUses);
2131 
2132  if (MSSAU && VerifyMemorySSA)
2133  MSSAU->getMemorySSA()->verifyMemorySSA();
2134  // If the SSAUpdater didn't use the load in the preheader, just zap it now.
2135  if (PreheaderLoad->use_empty())
2136  eraseInstruction(*PreheaderLoad, *SafetyInfo, CurAST, MSSAU);
2137 
2138  return true;
2139 }
2140 
2141 /// Returns an owning pointer to an alias set which incorporates aliasing info
2142 /// from L and all subloops of L.
2143 /// FIXME: In new pass manager, there is no helper function to handle loop
2144 /// analysis such as cloneBasicBlockAnalysis, so the AST needs to be recomputed
2145 /// from scratch for every loop. Hook up with the helper functions when
2146 /// available in the new pass manager to avoid redundant computation.
2147 std::unique_ptr<AliasSetTracker>
2148 LoopInvariantCodeMotion::collectAliasInfoForLoop(Loop *L, LoopInfo *LI,
2149  AliasAnalysis *AA) {
2150  std::unique_ptr<AliasSetTracker> CurAST;
2151  SmallVector<Loop *, 4> RecomputeLoops;
2152  for (Loop *InnerL : L->getSubLoops()) {
2153  auto MapI = LoopToAliasSetMap.find(InnerL);
2154  // If the AST for this inner loop is missing it may have been merged into
2155  // some other loop's AST and then that loop unrolled, and so we need to
2156  // recompute it.
2157  if (MapI == LoopToAliasSetMap.end()) {
2158  RecomputeLoops.push_back(InnerL);
2159  continue;
2160  }
2161  std::unique_ptr<AliasSetTracker> InnerAST = std::move(MapI->second);
2162 
2163  if (CurAST) {
2164  // What if InnerLoop was modified by other passes ?
2165  // Once we've incorporated the inner loop's AST into ours, we don't need
2166  // the subloop's anymore.
2167  CurAST->add(*InnerAST);
2168  } else {
2169  CurAST = std::move(InnerAST);
2170  }
2171  LoopToAliasSetMap.erase(MapI);
2172  }
2173  if (!CurAST)
2174  CurAST = std::make_unique<AliasSetTracker>(*AA);
2175 
2176  // Add everything from the sub loops that are no longer directly available.
2177  for (Loop *InnerL : RecomputeLoops)
2178  for (BasicBlock *BB : InnerL->blocks())
2179  CurAST->add(*BB);
2180 
2181  // And merge in this loop (without anything from inner loops).
2182  for (BasicBlock *BB : L->blocks())
2183  if (LI->getLoopFor(BB) == L)
2184  CurAST->add(*BB);
2185 
2186  return CurAST;
2187 }
2188 
2189 std::unique_ptr<AliasSetTracker>
2190 LoopInvariantCodeMotion::collectAliasInfoForLoopWithMSSA(
2191  Loop *L, AliasAnalysis *AA, MemorySSAUpdater *MSSAU) {
2192  auto *MSSA = MSSAU->getMemorySSA();
2193  auto CurAST = std::make_unique<AliasSetTracker>(*AA, MSSA, L);
2194  CurAST->addAllInstructionsInLoopUsingMSSA();
2195  return CurAST;
2196 }
2197 
2198 /// Simple analysis hook. Clone alias set info.
2199 ///
2200 void LegacyLICMPass::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To,
2201  Loop *L) {
2202  auto ASTIt = LICM.getLoopToAliasSetMap().find(L);
2203  if (ASTIt == LICM.getLoopToAliasSetMap().end())
2204  return;
2205 
2206  ASTIt->second->copyValue(From, To);
2207 }
2208 
2209 /// Simple Analysis hook. Delete value V from alias set
2210 ///
2211 void LegacyLICMPass::deleteAnalysisValue(Value *V, Loop *L) {
2212  auto ASTIt = LICM.getLoopToAliasSetMap().find(L);
2213  if (ASTIt == LICM.getLoopToAliasSetMap().end())
2214  return;
2215 
2216  ASTIt->second->deleteValue(V);
2217 }
2218 
2219 /// Simple Analysis hook. Delete value L from alias set map.
2220 ///
2221 void LegacyLICMPass::deleteAnalysisLoop(Loop *L) {
2222  if (!LICM.getLoopToAliasSetMap().count(L))
2223  return;
2224 
2225  LICM.getLoopToAliasSetMap().erase(L);
2226 }
2227 
2229  AliasSetTracker *CurAST, Loop *CurLoop,
2230  AliasAnalysis *AA) {
2231  // First check to see if any of the basic blocks in CurLoop invalidate *V.
2232  bool isInvalidatedAccordingToAST = CurAST->getAliasSetFor(MemLoc).isMod();
2233 
2234  if (!isInvalidatedAccordingToAST || !LICMN2Theshold)
2235  return isInvalidatedAccordingToAST;
2236 
2237  // Check with a diagnostic analysis if we can refine the information above.
2238  // This is to identify the limitations of using the AST.
2239  // The alias set mechanism used by LICM has a major weakness in that it
2240  // combines all things which may alias into a single set *before* asking
2241  // modref questions. As a result, a single readonly call within a loop will
2242  // collapse all loads and stores into a single alias set and report
2243  // invalidation if the loop contains any store. For example, readonly calls
2244  // with deopt states have this form and create a general alias set with all
2245  // loads and stores. In order to get any LICM in loops containing possible
2246  // deopt states we need a more precise invalidation of checking the mod ref
2247  // info of each instruction within the loop and LI. This has a complexity of
2248  // O(N^2), so currently, it is used only as a diagnostic tool since the
2249  // default value of LICMN2Threshold is zero.
2250 
2251  // Don't look at nested loops.
2252  if (CurLoop->begin() != CurLoop->end())
2253  return true;
2254 
2255  int N = 0;
2256  for (BasicBlock *BB : CurLoop->getBlocks())
2257  for (Instruction &I : *BB) {
2258  if (N >= LICMN2Theshold) {
2259  LLVM_DEBUG(dbgs() << "Alasing N2 threshold exhausted for "
2260  << *(MemLoc.Ptr) << "\n");
2261  return true;
2262  }
2263  N++;
2264  auto Res = AA->getModRefInfo(&I, MemLoc);
2265  if (isModSet(Res)) {
2266  LLVM_DEBUG(dbgs() << "Aliasing failed on " << I << " for "
2267  << *(MemLoc.Ptr) << "\n");
2268  return true;
2269  }
2270  }
2271  LLVM_DEBUG(dbgs() << "Aliasing okay for " << *(MemLoc.Ptr) << "\n");
2272  return false;
2273 }
2274 
2276  Loop *CurLoop,
2277  SinkAndHoistLICMFlags &Flags) {
2278  // For hoisting, use the walker to determine safety
2279  if (!Flags.IsSink) {
2281  // See declaration of SetLicmMssaOptCap for usage details.
2282  if (Flags.LicmMssaOptCounter >= Flags.LicmMssaOptCap)
2283  Source = MU->getDefiningAccess();
2284  else {
2285  Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(MU);
2286  Flags.LicmMssaOptCounter++;
2287  }
2288  return !MSSA->isLiveOnEntryDef(Source) &&
2289  CurLoop->contains(Source->getBlock());
2290  }
2291 
2292  // For sinking, we'd need to check all Defs below this use. The getClobbering
2293  // call will look on the backedge of the loop, but will check aliasing with
2294  // the instructions on the previous iteration.
2295  // For example:
2296  // for (i ... )
2297  // load a[i] ( Use (LoE)
2298  // store a[i] ( 1 = Def (2), with 2 = Phi for the loop.
2299  // i++;
2300  // The load sees no clobbering inside the loop, as the backedge alias check
2301  // does phi translation, and will check aliasing against store a[i-1].
2302  // However sinking the load outside the loop, below the store is incorrect.
2303 
2304  // For now, only sink if there are no Defs in the loop, and the existing ones
2305  // precede the use and are in the same block.
2306  // FIXME: Increase precision: Safe to sink if Use post dominates the Def;
2307  // needs PostDominatorTreeAnalysis.
2308  // FIXME: More precise: no Defs that alias this Use.
2309  if (Flags.NoOfMemAccTooLarge)
2310  return true;
2311  for (auto *BB : CurLoop->getBlocks())
2312  if (auto *Accesses = MSSA->getBlockDefs(BB))
2313  for (const auto &MA : *Accesses)
2314  if (const auto *MD = dyn_cast<MemoryDef>(&MA))
2315  if (MU->getBlock() != MD->getBlock() ||
2316  !MSSA->locallyDominates(MD, MU))
2317  return true;
2318  return false;
2319 }
2320 
2321 /// Little predicate that returns true if the specified basic block is in
2322 /// a subloop of the current one, not the current one itself.
2323 ///
2324 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI) {
2325  assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop");
2326  return LI->getLoopFor(BB) != CurLoop;
2327 }
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:80
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, AliasSetTracker *AST, MemorySSAUpdater *MSSAU)
Definition: LICM.cpp:1423
uint64_t CallInst * C
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:67
bool canSplitPredecessors() const
Definition: BasicBlock.cpp:371
void insertInstructionTo(const Instruction *Inst, const BasicBlock *BB)
Inform the safety info that we are planning to insert a new instruction Inst into the basic block BB...
Definition: MustExecute.cpp:90
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:641
void ReplaceInstWithInst(BasicBlock::InstListType &BIL, BasicBlock::iterator &BI, Instruction *I)
Replace the instruction specified by BI with the instruction specified by I.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
bool promoteLoopAccessesToScalars(const SmallSetVector< Value *, 8 > &, SmallVectorImpl< BasicBlock *> &, SmallVectorImpl< Instruction *> &, SmallVectorImpl< MemoryAccess *> &, PredIteratorCache &, LoopInfo *, DominatorTree *, const TargetLibraryInfo *, Loop *, AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *, OptimizationRemarkEmitter *)
Try to promote memory values to scalars by sinking stores out of the loop and moving loads to before ...
Definition: LICM.cpp:1858
static bool isTriviallyReplaceablePHI(const PHINode &PN, const Instruction &I)
Returns true if a PHINode is a trivially replaceable with an Instruction.
Definition: LICM.cpp:1279
Helper class for SSA formation on a set of values defined in multiple blocks.
Definition: SSAUpdater.h:38
Diagnostic information for missed-optimization remarks.
DILocation * get() const
Get the underlying DILocation.
Definition: DebugLoc.cpp:21
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
DiagnosticInfoOptimizationBase::Argument NV
bool hasDisableLICMTransformsHint(const Loop *L)
Look for the loop attribute that disables the LICM transformation heuristics.
Definition: LoopUtils.cpp:387
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:83
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:776
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:139
This class represents lattice values for constants.
Definition: AllocatorList.h:23
This header provides classes for managing a pipeline of passes over loops in LLVM IR...
bool dominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...
Definition: MemorySSA.cpp:2132
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Definition: MemorySSA.h:257
This is the interface for a simple mod/ref and alias analysis over globals.
const_iterator end() const
EltTy front() const
bool hasMetadataOtherThanDebugLoc() const
Return true if this instruction has metadata attached to it other than a debug location.
Definition: Instruction.h:228
void dropUnknownNonDebugMetadata(ArrayRef< unsigned > KnownIDs)
Drop all unknown metadata except for debug locations.
Definition: Metadata.cpp:1195
iterator end()
Definition: Function.h:682
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
static constexpr LocationSize unknown()
bool sinkRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *, TargetLibraryInfo *, TargetTransformInfo *, Loop *, AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *)
Walk the specified region of the CFG (defined by all blocks dominated by the specified block...
Definition: LICM.cpp:494
bool hasDedicatedExits() const
Return true if no exit block for the loop has a predecessor that is outside the loop.
Definition: LoopInfoImpl.h:85
bool isLCSSAForm(DominatorTree &DT) const
Return true if the Loop is in LCSSA form.
Definition: LoopInfo.cpp:444
const AccessList * getBlockAccesses(const BasicBlock *BB) const
Return the list of MemoryAccess&#39;s for a given basic block.
Definition: MemorySSA.h:758
void AddAvailableValue(BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value...
Definition: SSAUpdater.cpp:71
The main scalar evolution driver.
static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT, LoopInfo *LI, const Loop *CurLoop, LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU)
Definition: LICM.cpp:1481
void insertUse(MemoryUse *Use, bool RenameUses=false)
This class represents a function call, abstracting a target machine&#39;s calling convention.
This file contains the declarations for metadata subclasses.
void setOrdering(AtomicOrdering Ordering)
Sets the ordering constraint of this load instruction.
Definition: Instructions.h:253
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:160
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:632
bool salvageDebugInfo(Instruction &I)
Assuming the instruction I is going to be deleted, attempt to salvage debug users of I by writing the...
Definition: Local.cpp:1605
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:733
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1165
BasicBlock * getSuccessor(unsigned i) const
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop, const TargetTransformInfo *TTI)
Return true if the instruction is free in the loop.
Definition: LICM.cpp:1288
STATISTIC(NumFunctions, "Total number of functions")
A debug info location.
Definition: DebugLoc.h:33
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:230
F(f)
An instruction for reading from memory.
Definition: Instructions.h:167
Hexagon Common GEP
Value * getCondition() const
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:137
iv Induction Variable Users
Definition: IVUsers.cpp:51
bool hoistRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *, TargetLibraryInfo *, Loop *, AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *)
Walk the specified region of the CFG (defined by all blocks dominated by the specified block...
Definition: LICM.cpp:795
void reserve(size_type N)
Definition: SmallVector.h:369
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
Definition: TinyPtrVector.h:30
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:32
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:299
bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, unsigned Align, const DataLayout &DL, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr)
Returns true if V is always a dereferenceable pointer with alignment greater or equal than requested...
Definition: Loads.cpp:135
op_iterator op_begin()
Definition: User.h:229
bool hasLoopInvariantOperands(const Instruction *I) const
Return true if all the operands of the specified instruction are loop invariant.
Definition: LoopInfo.cpp:67
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
Definition: LoopInfoImpl.h:683
static bool pointerInvalidatedByLoop(MemoryLocation MemLoc, AliasSetTracker *CurAST, Loop *CurLoop, AliasAnalysis *AA)
Definition: LICM.cpp:2228
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:268
Loop Invariant Code Motion
Definition: LICM.cpp:311
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1241
bool formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
Definition: LCSSA.cpp:385
Represents read-only accesses to memory.
Definition: MemorySSA.h:319
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:47
AnalysisUsage & addRequired()
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:133
ArrayRef< BasicBlock * > get(BasicBlock *BB)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:965
virtual void computeLoopSafetyInfo(const Loop *CurLoop)
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
Definition: MustExecute.cpp:76
This is the interface for a SCEV-based alias analysis.
MemorySSA * getMemorySSA() const
Get handle on MemorySSA.
Type * getPointerElementType() const
Definition: Type.h:376
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:928
This implementation of LoopSafetyInfo use ImplicitControlFlowTracking to give precise answers on "may...
Definition: MustExecute.h:130
bool hasAllowReciprocal() const
Determine whether the allow-reciprocal flag is set.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
virtual bool doFinalization(Module &)
doFinalization - Virtual method overriden by subclasses to do any necessary clean up after all passes...
Definition: Pass.h:109
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:703
uint32_t getTagID() const
Return the tag of this operand bundle as an integer.
Definition: InstrTypes.h:1028
const DefsList * getBlockDefs(const BasicBlock *BB) const
Return the list of MemoryDef&#39;s and MemoryPhi&#39;s for a given basic block.
Definition: MemorySSA.h:766
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:285
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:1183
bool remove(const value_type &X)
Remove an item from the set vector.
Definition: SetVector.h:157
LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:140
virtual bool anyBlockMayThrow() const
Returns true iff any block of the loop for which this info is contains an instruction that may throw ...
Definition: MustExecute.cpp:72
BlockT * getHeader() const
Definition: LoopInfo.h:105
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:102
FunctionModRefBehavior getModRefBehavior(const CallBase *Call)
Return the behavior of the given call site.
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:261
static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU, Loop *CurLoop, SinkAndHoistLICMFlags &Flags)
Definition: LICM.cpp:2275
INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false, false) INITIALIZE_PASS_END(LegacyLICMPass
Instruction * clone() const
Create a copy of &#39;this&#39; instruction that is identical in all ways except the following: ...
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref&#39;ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:720
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
Definition: LoopInfoImpl.h:237
PredIteratorCache - This class is an extremely trivial cache for predecessor iterator queries...
FunctionModRefBehavior
Summary of how a function affects memory in the program.
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:268
Instruction * getUniqueInstruction()
If this alias set is known to contain a single instruction and only a single unique instruction...
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:82
This class represents a no-op cast from one type to another.
Memory SSA
Definition: MemorySSA.cpp:65
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
An instruction for storing to memory.
Definition: Instructions.h:320
static cl::opt< bool > DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false), cl::desc("Disable memory promotion in LICM pass"))
Memory promotion is enabled by default.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:429
void copyColors(BasicBlock *New, BasicBlock *Old)
Copy colors of block Old into the block New.
Definition: MustExecute.cpp:34
iterator begin()
Definition: Function.h:680
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
Value * getOperand(unsigned i) const
Definition: User.h:169
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:210
void forgetLoopDispositions(const Loop *L)
Called when the client has changed the disposition of values in this loop.
Interval::succ_iterator succ_end(Interval *I)
Definition: Interval.h:105
bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal=false)
Checks whether the given location points to constant memory, or if OrLocal is true whether it points ...
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:875
void getAAMetadata(AAMDNodes &N, bool Merge=false) const
Fills the AAMDNodes structure with AA metadata from this instruction.
NodeT * getBlock() const
static Instruction * sinkThroughTriviallyReplaceablePHI(PHINode *TPN, Instruction *I, LoopInfo *LI, SmallDenseMap< BasicBlock *, Instruction *, 32 > &SunkCopies, const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop, MemorySSAUpdater *MSSAU)
Definition: LICM.cpp:1445
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock *> Preds, const char *Suffix, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
virtual bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
static cl::opt< int > LICMN2Theshold("licm-n2-threshold", cl::Hidden, cl::init(0), cl::desc("How many instruction to cross product using AA"))
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:189
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:216
virtual bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const =0
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:328
MemorySSAWalker * getSkipSelfWalker()
Definition: MemorySSA.cpp:1575
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
Definition: SetOperations.h:39
void setAAMetadata(const AAMDNodes &N)
Sets the metadata on this instruction from the AAMDNodes structure.
Definition: Metadata.cpp:1261
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
Conditional or Unconditional Branch instruction.
size_t size(BasicBlock *BB) const
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node&#39;s...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:91
This file contains the declarations for the subclasses of Constant, which represent the different fla...
const Instruction & front() const
Definition: BasicBlock.h:280
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
bool mayHaveSideEffects() const
Return true if the instruction may have side effects.
Definition: Instruction.h:582
Expected to fold away in lowering.
Constant * ConstantFoldInstruction(Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstruction - Try to constant fold the specified instruction.
Diagnostic information for applied optimization remarks.
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:112
Pass * createLICMPass()
Definition: LICM.cpp:314
Represent the analysis usage information of a pass.
op_iterator op_end()
Definition: User.h:231
cl::opt< unsigned > SetLicmMssaOptCap
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1172
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition: LICM.cpp:277
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
AliasSet & getAliasSetFor(const MemoryLocation &MemLoc)
Return the alias set which contains the specified memory location.
Interval::pred_iterator pred_end(Interval *I)
Definition: Interval.h:115
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:99
static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo)
Definition: LICM.cpp:1463
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
auto find_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1193
bool isMod() const
void setAlignment(unsigned Align)
bool onlyReadsMemory(const CallBase *Call)
Checks if the specified call is known to only read from non-volatile memory (or not access memory at ...
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1446
MemoryAccess * createMemoryAccessInBB(Instruction *I, MemoryAccess *Definition, const BasicBlock *BB, MemorySSA::InsertionPlace Point)
Create a MemoryAccess in MemorySSA at a specified point in a block, with a specified clobbering defin...
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT, Loop *CurLoop)
Definition: LICM.cpp:981
size_t size() const
Definition: SmallVector.h:52
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Value * GetUnderlyingObject(Value *V, const DataLayout &DL, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value...
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
This function does not perform any non-local loads or stores to memory.
void moveToPlace(MemoryUseOrDef *What, BasicBlock *BB, MemorySSA::InsertionPlace Where)
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:61
static cl::opt< uint32_t > MaxNumUsesTraversed("licm-max-num-uses-traversed", cl::Hidden, cl::init(8), cl::desc("Max num uses visited for identifying load " "invariance in loop using invariant start (default = 8)"))
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:50
const Value * Ptr
The address of the start of the location.
Representation for a specific memory location.
size_type size() const
Definition: SmallPtrSet.h:92
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:333
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:115
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:297
static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE)
When an instruction is found to only use loop invariant operands that is safe to hoist, this instruction is called to do the dirty work.
Definition: LICM.cpp:1654
Iterator for intrusive lists based on ilist_node.
void removeInstruction(const Instruction *Inst)
Inform safety info that we are planning to remove the instruction Inst from its block.
Definition: MustExecute.cpp:96
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
bool doesNotWriteMemoryBefore(const BasicBlock *BB, const Loop *CurLoop) const
Returns true if we could not execute a memory-modifying instruction before we enter BB under assumpti...
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file. ...
#define DEBUG_TYPE
Definition: LICM.cpp:79
BlockVerifier::State From
static cl::opt< bool > ControlFlowHoisting("licm-control-flow-hoisting", cl::Hidden, cl::init(false), cl::desc("Enable control flow (and PHI) hoisting in LICM"))
void verifyMemorySSA() const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition: MemorySSA.cpp:1863
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false...
Definition: SmallPtrSet.h:377
iterator end()
Definition: BasicBlock.h:270
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:1160
Helper class for promoting a collection of loads and stores into SSA Form using the SSAUpdater...
Definition: SSAUpdater.h:136
void copyValue(Value *From, Value *To)
This method should be used whenever a preexisting value in the program is copied or cloned...
bool locallyDominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in the same basic block, determine whether MemoryAccess A dominates MemoryA...
Definition: MemorySSA.cpp:2101
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:248
Provides information about what library functions are available for the current target.
iterator begin() const
Definition: LoopInfo.h:147
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:752
void wireOldPredecessorsToNewImmediatePredecessor(BasicBlock *Old, BasicBlock *New, ArrayRef< BasicBlock *> Preds, bool IdenticalEdgesWereMerged=true)
A new empty BasicBlock (New) now branches directly to Old.
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:643
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:926
BasicBlock * getBlock() const
Definition: MemorySSA.h:159
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
bool isConditional() const
static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, TargetTransformInfo *TTI, bool &FreeInLoop)
Return true if the only users of this instruction are outside of the loop.
Definition: LICM.cpp:1316
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:124
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
Definition: Constants.cpp:716
unsigned getNumIncomingValues() const
Return the number of incoming edges.
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental.guard intrinsic.
Definition: GuardUtils.cpp:17
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:55
Class that has the common methods + fields of memory uses/defs.
Definition: MemorySSA.h:247
A lightweight accessor for an operand bundle meant to be passed around by value.
Definition: InstrTypes.h:1000
iterator_range< user_iterator > users()
Definition: Value.h:419
static bool isSafeToExecuteUnconditionally(Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE, const Instruction *CtxI=nullptr)
Only sink or hoist an instruction if it is not a trapping instruction, or if the instruction is known...
Definition: LICM.cpp:1697
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:598
LoopT * getParentLoop() const
Definition: LoopInfo.h:106
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
Definition: LoopInfo.h:136
LLVM_NODISCARD bool isModSet(const ModRefInfo MRI)
void setOrdering(AtomicOrdering Ordering)
Sets the ordering constraint of this store instruction.
Definition: Instructions.h:378
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
static bool onlyAccessesArgPointees(FunctionModRefBehavior MRB)
Checks if functions with the specified behavior are known to read and write at most from objects poin...
iterator insert(iterator where, pointer New)
Definition: ilist.h:226
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:331
cl::opt< unsigned > SetLicmMssaNoAccForPromotionCap
iterator begin() const
Definition: SmallPtrSet.h:396
int getUserCost(const User *U, ArrayRef< const Value *> Operands) const
Estimate the cost of a given IR user when lowered.
cl::opt< bool > EnableMSSALoopDependency
Enables memory ssa as a dependency for loop passes.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:509
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:154
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
unsigned pred_size(const BasicBlock *BB)
Get the number of predecessors of BB.
Definition: CFG.h:121
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:72
user_iterator_impl< User > user_iterator
Definition: Value.h:388
bool isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast=false)
Tests if a value is a call or invoke to a library function that allocates memory (either malloc...
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass&#39;s AnalysisUsage.
Definition: LoopUtils.cpp:138
licm
Definition: LICM.cpp:311
MemoryUseOrDef * createMemoryAccessAfter(Instruction *I, MemoryAccess *Definition, MemoryAccess *InsertPt)
iterator end() const
Definition: LoopInfo.h:148
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Definition: MemorySSA.h:738
Captures loop safety information.
Definition: MustExecute.h:54
const Value * getPointerOperand(const Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
const_iterator begin() const
Wrapper class to LoopBlocksDFS that provides a standard begin()/end() interface for the DFS reverse p...
Definition: LoopIterator.h:172
SmallVector< DomTreeNode *, 16 > collectChildrenInLoop(DomTreeNode *N, const Loop *CurLoop)
Does a BFS from a given node to all of its children inside a given loop.
Definition: LoopUtils.cpp:488
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:290
MemoryAccess * getClobberingMemoryAccess(const Instruction *I)
Given a memory Mod/Ref/ModRef&#39;ing instruction, calling this will give you the nearest dominating Memo...
Definition: MemorySSA.h:1025
static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI)
Little predicate that returns true if the specified basic block is in a subloop of the current one...
Definition: LICM.cpp:2324
void removeMemoryAccess(MemoryAccess *, bool OptimizePhis=false)
Remove a MemoryAccess from MemorySSA, including updating all definitions and uses.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
Definition: Value.h:395
bool isSafeToSpeculativelyExecute(const Value *V, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
void insertDef(MemoryDef *Def, bool RenameUses=false)
Insert a definition into the MemorySSA IR.
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction has no side ef...
Definition: Local.cpp:359
LLVM Value Representation.
Definition: Value.h:73
void setAlignment(unsigned Align)
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:445
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:86
void initializeLegacyLICMPassPass(PassRegistry &)
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:593
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU, bool TargetExecutesOncePerLoop, SinkAndHoistLICMFlags *LICMFlags=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Returns true if is legal to hoist or sink this instruction disregarding the possible introduction of ...
Definition: LICM.cpp:1080
The legacy pass manager&#39;s analysis pass to compute loop information.
Definition: LoopInfo.h:1208
void getUniqueExitBlocks(SmallVectorImpl< BlockT *> &ExitBlocks) const
Return all unique successor blocks of this loop.
Definition: LoopInfoImpl.h:115
This is the interface for LLVM&#39;s primary stateless and local alias analysis.
static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE)
When an instruction is found to only be used outside of the loop, this function moves it to the exit ...
Definition: LICM.cpp:1553
A container for analyses that lazily runs them and caches their results.
static Instruction * CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI, const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU)
Definition: LICM.cpp:1348
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:259
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopIterator.h:180
ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc)
getModRefInfo (for call sites) - Return information about whether a particular call site modifies or ...
#define LLVM_DEBUG(X)
Definition: Debug.h:122
op_range incoming_values()
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:161
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:173
The optimization diagnostic interface.
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
Definition: BasicBlock.cpp:120
bool use_empty() const
Definition: Value.h:342
unsigned size() const
void deleteValue(Value *PtrVal)
This method is used to remove a pointer value from the AliasSetTracker entirely.
loop versioning Loop Versioning For LICM
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:43
const BasicBlock * getParent() const
Definition: Instruction.h:66
static void moveInstructionBefore(Instruction &I, Instruction &Dest, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater *MSSAU)
Definition: LICM.cpp:1433
bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures, unsigned MaxUsesToExplore=DefaultMaxUsesToExplore)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
user_iterator user_end()
Definition: Value.h:403
bool mayReadOrWriteMemory() const
Return true if this instruction may read or write memory.
Definition: Instruction.h:542
const DenseMap< BasicBlock *, ColorVector > & getBlockColors() const
Returns block colors map that is used to update funclet operand bundles.
Definition: MustExecute.cpp:30