LLVM  10.0.0svn
LICM.cpp
Go to the documentation of this file.
1 //===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass performs loop invariant code motion, attempting to remove as much
10 // code from the body of a loop as possible. It does this by either hoisting
11 // code into the preheader block, or by sinking code to the exit blocks if it is
12 // safe. This pass also promotes must-aliased memory locations in the loop to
13 // live in registers, thus hoisting and sinking "invariant" loads and stores.
14 //
15 // This pass uses alias analysis for two purposes:
16 //
17 // 1. Moving loop invariant loads and calls out of loops. If we can determine
18 // that a load or call inside of a loop never aliases anything stored to,
19 // we can hoist it or sink it like any other instruction.
20 // 2. Scalar Promotion of Memory - If there is a store instruction inside of
21 // the loop, we try to move the store to happen AFTER the loop instead of
22 // inside of the loop. This can only happen if a few conditions are true:
23 // A. The pointer stored through is loop invariant
24 // B. There are no stores or loads in the loop which _may_ alias the
25 // pointer. There are no calls in the loop which mod/ref the pointer.
26 // If these conditions are true, we can promote the loads and stores in the
27 // loop of the pointer to use a temporary alloca'd variable. We then use
28 // the SSAUpdater to construct the appropriate SSA form for the value.
29 //
30 //===----------------------------------------------------------------------===//
31 
33 #include "llvm/ADT/SetOperations.h"
34 #include "llvm/ADT/Statistic.h"
42 #include "llvm/Analysis/Loads.h"
43 #include "llvm/Analysis/LoopInfo.h"
45 #include "llvm/Analysis/LoopPass.h"
54 #include "llvm/IR/CFG.h"
55 #include "llvm/IR/Constants.h"
56 #include "llvm/IR/DataLayout.h"
58 #include "llvm/IR/DerivedTypes.h"
59 #include "llvm/IR/Dominators.h"
60 #include "llvm/IR/Instructions.h"
61 #include "llvm/IR/IntrinsicInst.h"
62 #include "llvm/IR/LLVMContext.h"
63 #include "llvm/IR/Metadata.h"
64 #include "llvm/IR/PatternMatch.h"
67 #include "llvm/Support/Debug.h"
69 #include "llvm/Transforms/Scalar.h"
75 #include <algorithm>
76 #include <utility>
77 using namespace llvm;
78 
79 #define DEBUG_TYPE "licm"
80 
81 STATISTIC(NumCreatedBlocks, "Number of blocks created");
82 STATISTIC(NumClonedBranches, "Number of branches cloned");
83 STATISTIC(NumSunk, "Number of instructions sunk out of loop");
84 STATISTIC(NumHoisted, "Number of instructions hoisted out of loop");
85 STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk");
86 STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk");
87 STATISTIC(NumPromoted, "Number of memory locations promoted to registers");
88 
89 /// Memory promotion is enabled by default.
90 static cl::opt<bool>
91  DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false),
92  cl::desc("Disable memory promotion in LICM pass"));
93 
95  "licm-control-flow-hoisting", cl::Hidden, cl::init(false),
96  cl::desc("Enable control flow (and PHI) hoisting in LICM"));
97 
99  "licm-max-num-uses-traversed", cl::Hidden, cl::init(8),
100  cl::desc("Max num uses visited for identifying load "
101  "invariance in loop using invariant start (default = 8)"));
102 
103 // Default value of zero implies we use the regular alias set tracker mechanism
104 // instead of the cross product using AA to identify aliasing of the memory
105 // location we are interested in.
106 static cl::opt<int>
107 LICMN2Theshold("licm-n2-threshold", cl::Hidden, cl::init(0),
108  cl::desc("How many instruction to cross product using AA"));
109 
110 // Experimental option to allow imprecision in LICM in pathological cases, in
111 // exchange for faster compile. This is to be removed if MemorySSA starts to
112 // address the same issue. This flag applies only when LICM uses MemorySSA
113 // instead on AliasSetTracker. LICM calls MemorySSAWalker's
114 // getClobberingMemoryAccess, up to the value of the Cap, getting perfect
115 // accuracy. Afterwards, LICM will call into MemorySSA's getDefiningAccess,
116 // which may not be precise, since optimizeUses is capped. The result is
117 // correct, but we may not get as "far up" as possible to get which access is
118 // clobbering the one queried.
120  "licm-mssa-optimization-cap", cl::init(100), cl::Hidden,
121  cl::desc("Enable imprecision in LICM in pathological cases, in exchange "
122  "for faster compile. Caps the MemorySSA clobbering calls."));
123 
124 // Experimentally, memory promotion carries less importance than sinking and
125 // hoisting. Limit when we do promotion when using MemorySSA, in order to save
126 // compile time.
128  "licm-mssa-max-acc-promotion", cl::init(250), cl::Hidden,
129  cl::desc("[LICM & MemorySSA] When MSSA in LICM is disabled, this has no "
130  "effect. When MSSA in LICM is enabled, then this is the maximum "
131  "number of accesses allowed to be present in a loop in order to "
132  "enable memory promotion."));
133 
134 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
135 static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
136  const LoopSafetyInfo *SafetyInfo,
137  TargetTransformInfo *TTI, bool &FreeInLoop);
138 static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
139  BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
141 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
142  const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
145  const DominatorTree *DT,
146  const Loop *CurLoop,
147  const LoopSafetyInfo *SafetyInfo,
149  const Instruction *CtxI = nullptr);
150 static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,
151  AliasSetTracker *CurAST, Loop *CurLoop,
152  AliasAnalysis *AA);
154  Loop *CurLoop,
155  SinkAndHoistLICMFlags &Flags);
157  Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
158  const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU);
159 
160 static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
161  AliasSetTracker *AST, MemorySSAUpdater *MSSAU);
162 
163 static void moveInstructionBefore(Instruction &I, Instruction &Dest,
164  ICFLoopSafetyInfo &SafetyInfo,
165  MemorySSAUpdater *MSSAU);
166 
167 namespace {
168 struct LoopInvariantCodeMotion {
170  bool runOnLoop(Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT,
172  ScalarEvolution *SE, MemorySSA *MSSA,
173  OptimizationRemarkEmitter *ORE, bool DeleteAST);
174 
175  ASTrackerMapTy &getLoopToAliasSetMap() { return LoopToAliasSetMap; }
176  LoopInvariantCodeMotion(unsigned LicmMssaOptCap,
177  unsigned LicmMssaNoAccForPromotionCap)
178  : LicmMssaOptCap(LicmMssaOptCap),
179  LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap) {}
180 
181 private:
182  ASTrackerMapTy LoopToAliasSetMap;
183  unsigned LicmMssaOptCap;
184  unsigned LicmMssaNoAccForPromotionCap;
185 
186  std::unique_ptr<AliasSetTracker>
187  collectAliasInfoForLoop(Loop *L, LoopInfo *LI, AliasAnalysis *AA);
188  std::unique_ptr<AliasSetTracker>
189  collectAliasInfoForLoopWithMSSA(Loop *L, AliasAnalysis *AA,
190  MemorySSAUpdater *MSSAU);
191 };
192 
193 struct LegacyLICMPass : public LoopPass {
194  static char ID; // Pass identification, replacement for typeid
195  LegacyLICMPass(
196  unsigned LicmMssaOptCap = SetLicmMssaOptCap,
197  unsigned LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap)
198  : LoopPass(ID), LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap) {
200  }
201 
202  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
203  if (skipLoop(L)) {
204  // If we have run LICM on a previous loop but now we are skipping
205  // (because we've hit the opt-bisect limit), we need to clear the
206  // loop alias information.
207  LICM.getLoopToAliasSetMap().clear();
208  return false;
209  }
210 
211  auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
213  ? (&getAnalysis<MemorySSAWrapperPass>().getMSSA())
214  : nullptr;
215  // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
216  // pass. Function analyses need to be preserved across loop transformations
217  // but ORE cannot be preserved (see comment before the pass definition).
219  return LICM.runOnLoop(L,
220  &getAnalysis<AAResultsWrapperPass>().getAAResults(),
221  &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
222  &getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
223  &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
224  *L->getHeader()->getParent()),
225  &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
226  *L->getHeader()->getParent()),
227  SE ? &SE->getSE() : nullptr, MSSA, &ORE, false);
228  }
229 
230  /// This transformation requires natural loop information & requires that
231  /// loop preheaders be inserted into the CFG...
232  ///
233  void getAnalysisUsage(AnalysisUsage &AU) const override {
240  }
243  }
244 
246 
247  bool doFinalization() override {
248  auto &AliasSetMap = LICM.getLoopToAliasSetMap();
249  // All loops in the AliasSetMap should be cleaned up already. The only case
250  // where we fail to do so is if an outer loop gets deleted before LICM
251  // visits it.
252  assert(all_of(AliasSetMap,
254  return !KV.first->getParentLoop();
255  }) &&
256  "Didn't free loop alias sets");
257  AliasSetMap.clear();
258  return false;
259  }
260 
261 private:
262  LoopInvariantCodeMotion LICM;
263 
264  /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
265  void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To,
266  Loop *L) override;
267 
268  /// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias
269  /// set.
270  void deleteAnalysisValue(Value *V, Loop *L) override;
271 
272  /// Simple Analysis hook. Delete loop L from alias set map.
273  void deleteAnalysisLoop(Loop *L) override;
274 };
275 } // namespace
276 
279  const auto &FAM =
280  AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
281  Function *F = L.getHeader()->getParent();
282 
283  auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F);
284  // FIXME: This should probably be optional rather than required.
285  if (!ORE)
286  report_fatal_error("LICM: OptimizationRemarkEmitterAnalysis not "
287  "cached at a higher level");
288 
289  LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap);
290  if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.TTI, &AR.SE,
291  AR.MSSA, ORE, true))
292  return PreservedAnalyses::all();
293 
294  auto PA = getLoopPassPreservedAnalyses();
295 
296  PA.preserve<DominatorTreeAnalysis>();
297  PA.preserve<LoopAnalysis>();
298  if (AR.MSSA)
299  PA.preserve<MemorySSAAnalysis>();
300 
301  return PA;
302 }
303 
304 char LegacyLICMPass::ID = 0;
305 INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion",
306  false, false)
311 INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false,
312  false)
313 
314 Pass *llvm::createLICMPass() { return new LegacyLICMPass(); }
315 Pass *llvm::createLICMPass(unsigned LicmMssaOptCap,
316  unsigned LicmMssaNoAccForPromotionCap) {
317  return new LegacyLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap);
318 }
319 
320 /// Hoist expressions out of the specified loop. Note, alias info for inner
321 /// loop is not preserved so it is not a good idea to run LICM multiple
322 /// times on one loop.
323 /// We should delete AST for inner loops in the new pass manager to avoid
324 /// memory leak.
325 ///
326 bool LoopInvariantCodeMotion::runOnLoop(
327  Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT,
329  MemorySSA *MSSA, OptimizationRemarkEmitter *ORE, bool DeleteAST) {
330  bool Changed = false;
331 
332  assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
333 
334  // If this loop has metadata indicating that LICM is not to be performed then
335  // just exit.
337  return false;
338  }
339 
340  std::unique_ptr<AliasSetTracker> CurAST;
341  std::unique_ptr<MemorySSAUpdater> MSSAU;
342  bool NoOfMemAccTooLarge = false;
343  unsigned LicmMssaOptCounter = 0;
344 
345  if (!MSSA) {
346  LLVM_DEBUG(dbgs() << "LICM: Using Alias Set Tracker.\n");
347  CurAST = collectAliasInfoForLoop(L, LI, AA);
348  } else {
349  LLVM_DEBUG(dbgs() << "LICM: Using MemorySSA.\n");
350  MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
351 
352  unsigned AccessCapCount = 0;
353  for (auto *BB : L->getBlocks()) {
354  if (auto *Accesses = MSSA->getBlockAccesses(BB)) {
355  for (const auto &MA : *Accesses) {
356  (void)MA;
357  AccessCapCount++;
358  if (AccessCapCount > LicmMssaNoAccForPromotionCap) {
359  NoOfMemAccTooLarge = true;
360  break;
361  }
362  }
363  }
364  if (NoOfMemAccTooLarge)
365  break;
366  }
367  }
368 
369  // Get the preheader block to move instructions into...
370  BasicBlock *Preheader = L->getLoopPreheader();
371 
372  // Compute loop safety information.
373  ICFLoopSafetyInfo SafetyInfo(DT);
374  SafetyInfo.computeLoopSafetyInfo(L);
375 
376  // We want to visit all of the instructions in this loop... that are not parts
377  // of our subloops (they have already had their invariants hoisted out of
378  // their loop, into this loop, so there is no need to process the BODIES of
379  // the subloops).
380  //
381  // Traverse the body of the loop in depth first order on the dominator tree so
382  // that we are guaranteed to see definitions before we see uses. This allows
383  // us to sink instructions in one pass, without iteration. After sinking
384  // instructions, we perform another pass to hoist them out of the loop.
385  SinkAndHoistLICMFlags Flags = {NoOfMemAccTooLarge, LicmMssaOptCounter,
386  LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
387  /*IsSink=*/true};
388  if (L->hasDedicatedExits())
389  Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
390  CurAST.get(), MSSAU.get(), &SafetyInfo, Flags, ORE);
391  Flags.IsSink = false;
392  if (Preheader)
393  Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
394  CurAST.get(), MSSAU.get(), &SafetyInfo, Flags, ORE);
395 
396  // Now that all loop invariants have been removed from the loop, promote any
397  // memory references to scalars that we can.
398  // Don't sink stores from loops without dedicated block exits. Exits
399  // containing indirect branches are not transformed by loop simplify,
400  // make sure we catch that. An additional load may be generated in the
401  // preheader for SSA updater, so also avoid sinking when no preheader
402  // is available.
403  if (!DisablePromotion && Preheader && L->hasDedicatedExits() &&
404  !NoOfMemAccTooLarge) {
405  // Figure out the loop exits and their insertion points
406  SmallVector<BasicBlock *, 8> ExitBlocks;
407  L->getUniqueExitBlocks(ExitBlocks);
408 
409  // We can't insert into a catchswitch.
410  bool HasCatchSwitch = llvm::any_of(ExitBlocks, [](BasicBlock *Exit) {
411  return isa<CatchSwitchInst>(Exit->getTerminator());
412  });
413 
414  if (!HasCatchSwitch) {
416  SmallVector<MemoryAccess *, 8> MSSAInsertPts;
417  InsertPts.reserve(ExitBlocks.size());
418  if (MSSAU)
419  MSSAInsertPts.reserve(ExitBlocks.size());
420  for (BasicBlock *ExitBlock : ExitBlocks) {
421  InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
422  if (MSSAU)
423  MSSAInsertPts.push_back(nullptr);
424  }
425 
426  PredIteratorCache PIC;
427 
428  bool Promoted = false;
429 
430  // Build an AST using MSSA.
431  if (!CurAST.get())
432  CurAST = collectAliasInfoForLoopWithMSSA(L, AA, MSSAU.get());
433 
434  // Loop over all of the alias sets in the tracker object.
435  for (AliasSet &AS : *CurAST) {
436  // We can promote this alias set if it has a store, if it is a "Must"
437  // alias set, if the pointer is loop invariant, and if we are not
438  // eliminating any volatile loads or stores.
439  if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
440  !L->isLoopInvariant(AS.begin()->getValue()))
441  continue;
442 
443  assert(
444  !AS.empty() &&
445  "Must alias set should have at least one pointer element in it!");
446 
447  SmallSetVector<Value *, 8> PointerMustAliases;
448  for (const auto &ASI : AS)
449  PointerMustAliases.insert(ASI.getValue());
450 
451  Promoted |= promoteLoopAccessesToScalars(
452  PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI,
453  DT, TLI, L, CurAST.get(), MSSAU.get(), &SafetyInfo, ORE);
454  }
455 
456  // Once we have promoted values across the loop body we have to
457  // recursively reform LCSSA as any nested loop may now have values defined
458  // within the loop used in the outer loop.
459  // FIXME: This is really heavy handed. It would be a bit better to use an
460  // SSAUpdater strategy during promotion that was LCSSA aware and reformed
461  // it as it went.
462  if (Promoted)
463  formLCSSARecursively(*L, *DT, LI, SE);
464 
465  Changed |= Promoted;
466  }
467  }
468 
469  // Check that neither this loop nor its parent have had LCSSA broken. LICM is
470  // specifically moving instructions across the loop boundary and so it is
471  // especially in need of sanity checking here.
472  assert(L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!");
473  assert((!L->getParentLoop() || L->getParentLoop()->isLCSSAForm(*DT)) &&
474  "Parent loop not left in LCSSA form after LICM!");
475 
476  // If this loop is nested inside of another one, save the alias information
477  // for when we process the outer loop.
478  if (!MSSAU.get() && CurAST.get() && L->getParentLoop() && !DeleteAST)
479  LoopToAliasSetMap[L] = std::move(CurAST);
480 
481  if (MSSAU.get() && VerifyMemorySSA)
482  MSSAU->getMemorySSA()->verifyMemorySSA();
483 
484  if (Changed && SE)
485  SE->forgetLoopDispositions(L);
486  return Changed;
487 }
488 
489 /// Walk the specified region of the CFG (defined by all blocks dominated by
490 /// the specified block, and that are in the current loop) in reverse depth
491 /// first order w.r.t the DominatorTree. This allows us to visit uses before
492 /// definitions, allowing us to sink a loop body in one pass without iteration.
493 ///
496  TargetTransformInfo *TTI, Loop *CurLoop,
497  AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
498  ICFLoopSafetyInfo *SafetyInfo,
499  SinkAndHoistLICMFlags &Flags,
501 
502  // Verify inputs.
503  assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
504  CurLoop != nullptr && SafetyInfo != nullptr &&
505  "Unexpected input to sinkRegion.");
506  assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) &&
507  "Either AliasSetTracker or MemorySSA should be initialized.");
508 
509  // We want to visit children before parents. We will enque all the parents
510  // before their children in the worklist and process the worklist in reverse
511  // order.
513 
514  bool Changed = false;
515  for (DomTreeNode *DTN : reverse(Worklist)) {
516  BasicBlock *BB = DTN->getBlock();
517  // Only need to process the contents of this block if it is not part of a
518  // subloop (which would already have been processed).
519  if (inSubLoop(BB, CurLoop, LI))
520  continue;
521 
522  for (BasicBlock::iterator II = BB->end(); II != BB->begin();) {
523  Instruction &I = *--II;
524 
525  // If the instruction is dead, we would try to sink it because it isn't
526  // used in the loop, instead, just delete it.
527  if (isInstructionTriviallyDead(&I, TLI)) {
528  LLVM_DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n');
529  salvageDebugInfo(I);
530  ++II;
531  eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
532  Changed = true;
533  continue;
534  }
535 
536  // Check to see if we can sink this instruction to the exit blocks
537  // of the loop. We can do this if the all users of the instruction are
538  // outside of the loop. In this case, it doesn't even matter if the
539  // operands of the instruction are loop invariant.
540  //
541  bool FreeInLoop = false;
542  if (isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) &&
543  canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags,
544  ORE) &&
545  !I.mayHaveSideEffects()) {
546  if (sink(I, LI, DT, CurLoop, SafetyInfo, MSSAU, ORE)) {
547  if (!FreeInLoop) {
548  ++II;
549  eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
550  }
551  Changed = true;
552  }
553  }
554  }
555  }
556  if (MSSAU && VerifyMemorySSA)
557  MSSAU->getMemorySSA()->verifyMemorySSA();
558  return Changed;
559 }
560 
561 namespace {
562 // This is a helper class for hoistRegion to make it able to hoist control flow
563 // in order to be able to hoist phis. The way this works is that we initially
564 // start hoisting to the loop preheader, and when we see a loop invariant branch
565 // we make note of this. When we then come to hoist an instruction that's
566 // conditional on such a branch we duplicate the branch and the relevant control
567 // flow, then hoist the instruction into the block corresponding to its original
568 // block in the duplicated control flow.
569 class ControlFlowHoister {
570 private:
571  // Information about the loop we are hoisting from
572  LoopInfo *LI;
573  DominatorTree *DT;
574  Loop *CurLoop;
575  MemorySSAUpdater *MSSAU;
576 
577  // A map of blocks in the loop to the block their instructions will be hoisted
578  // to.
579  DenseMap<BasicBlock *, BasicBlock *> HoistDestinationMap;
580 
581  // The branches that we can hoist, mapped to the block that marks a
582  // convergence point of their control flow.
583  DenseMap<BranchInst *, BasicBlock *> HoistableBranches;
584 
585 public:
586  ControlFlowHoister(LoopInfo *LI, DominatorTree *DT, Loop *CurLoop,
587  MemorySSAUpdater *MSSAU)
588  : LI(LI), DT(DT), CurLoop(CurLoop), MSSAU(MSSAU) {}
589 
590  void registerPossiblyHoistableBranch(BranchInst *BI) {
591  // We can only hoist conditional branches with loop invariant operands.
592  if (!ControlFlowHoisting || !BI->isConditional() ||
593  !CurLoop->hasLoopInvariantOperands(BI))
594  return;
595 
596  // The branch destinations need to be in the loop, and we don't gain
597  // anything by duplicating conditional branches with duplicate successors,
598  // as it's essentially the same as an unconditional branch.
599  BasicBlock *TrueDest = BI->getSuccessor(0);
600  BasicBlock *FalseDest = BI->getSuccessor(1);
601  if (!CurLoop->contains(TrueDest) || !CurLoop->contains(FalseDest) ||
602  TrueDest == FalseDest)
603  return;
604 
605  // We can hoist BI if one branch destination is the successor of the other,
606  // or both have common successor which we check by seeing if the
607  // intersection of their successors is non-empty.
608  // TODO: This could be expanded to allowing branches where both ends
609  // eventually converge to a single block.
610  SmallPtrSet<BasicBlock *, 4> TrueDestSucc, FalseDestSucc;
611  TrueDestSucc.insert(succ_begin(TrueDest), succ_end(TrueDest));
612  FalseDestSucc.insert(succ_begin(FalseDest), succ_end(FalseDest));
613  BasicBlock *CommonSucc = nullptr;
614  if (TrueDestSucc.count(FalseDest)) {
615  CommonSucc = FalseDest;
616  } else if (FalseDestSucc.count(TrueDest)) {
617  CommonSucc = TrueDest;
618  } else {
619  set_intersect(TrueDestSucc, FalseDestSucc);
620  // If there's one common successor use that.
621  if (TrueDestSucc.size() == 1)
622  CommonSucc = *TrueDestSucc.begin();
623  // If there's more than one pick whichever appears first in the block list
624  // (we can't use the value returned by TrueDestSucc.begin() as it's
625  // unpredicatable which element gets returned).
626  else if (!TrueDestSucc.empty()) {
627  Function *F = TrueDest->getParent();
628  auto IsSucc = [&](BasicBlock &BB) { return TrueDestSucc.count(&BB); };
629  auto It = std::find_if(F->begin(), F->end(), IsSucc);
630  assert(It != F->end() && "Could not find successor in function");
631  CommonSucc = &*It;
632  }
633  }
634  // The common successor has to be dominated by the branch, as otherwise
635  // there will be some other path to the successor that will not be
636  // controlled by this branch so any phi we hoist would be controlled by the
637  // wrong condition. This also takes care of avoiding hoisting of loop back
638  // edges.
639  // TODO: In some cases this could be relaxed if the successor is dominated
640  // by another block that's been hoisted and we can guarantee that the
641  // control flow has been replicated exactly.
642  if (CommonSucc && DT->dominates(BI, CommonSucc))
643  HoistableBranches[BI] = CommonSucc;
644  }
645 
646  bool canHoistPHI(PHINode *PN) {
647  // The phi must have loop invariant operands.
648  if (!ControlFlowHoisting || !CurLoop->hasLoopInvariantOperands(PN))
649  return false;
650  // We can hoist phis if the block they are in is the target of hoistable
651  // branches which cover all of the predecessors of the block.
652  SmallPtrSet<BasicBlock *, 8> PredecessorBlocks;
653  BasicBlock *BB = PN->getParent();
654  for (BasicBlock *PredBB : predecessors(BB))
655  PredecessorBlocks.insert(PredBB);
656  // If we have less predecessor blocks than predecessors then the phi will
657  // have more than one incoming value for the same block which we can't
658  // handle.
659  // TODO: This could be handled be erasing some of the duplicate incoming
660  // values.
661  if (PredecessorBlocks.size() != pred_size(BB))
662  return false;
663  for (auto &Pair : HoistableBranches) {
664  if (Pair.second == BB) {
665  // Which blocks are predecessors via this branch depends on if the
666  // branch is triangle-like or diamond-like.
667  if (Pair.first->getSuccessor(0) == BB) {
668  PredecessorBlocks.erase(Pair.first->getParent());
669  PredecessorBlocks.erase(Pair.first->getSuccessor(1));
670  } else if (Pair.first->getSuccessor(1) == BB) {
671  PredecessorBlocks.erase(Pair.first->getParent());
672  PredecessorBlocks.erase(Pair.first->getSuccessor(0));
673  } else {
674  PredecessorBlocks.erase(Pair.first->getSuccessor(0));
675  PredecessorBlocks.erase(Pair.first->getSuccessor(1));
676  }
677  }
678  }
679  // PredecessorBlocks will now be empty if for every predecessor of BB we
680  // found a hoistable branch source.
681  return PredecessorBlocks.empty();
682  }
683 
684  BasicBlock *getOrCreateHoistedBlock(BasicBlock *BB) {
685  if (!ControlFlowHoisting)
686  return CurLoop->getLoopPreheader();
687  // If BB has already been hoisted, return that
688  if (HoistDestinationMap.count(BB))
689  return HoistDestinationMap[BB];
690 
691  // Check if this block is conditional based on a pending branch
692  auto HasBBAsSuccessor =
694  return BB != Pair.second && (Pair.first->getSuccessor(0) == BB ||
695  Pair.first->getSuccessor(1) == BB);
696  };
697  auto It = std::find_if(HoistableBranches.begin(), HoistableBranches.end(),
698  HasBBAsSuccessor);
699 
700  // If not involved in a pending branch, hoist to preheader
701  BasicBlock *InitialPreheader = CurLoop->getLoopPreheader();
702  if (It == HoistableBranches.end()) {
703  LLVM_DEBUG(dbgs() << "LICM using " << InitialPreheader->getName()
704  << " as hoist destination for " << BB->getName()
705  << "\n");
706  HoistDestinationMap[BB] = InitialPreheader;
707  return InitialPreheader;
708  }
709  BranchInst *BI = It->first;
710  assert(std::find_if(++It, HoistableBranches.end(), HasBBAsSuccessor) ==
711  HoistableBranches.end() &&
712  "BB is expected to be the target of at most one branch");
713 
714  LLVMContext &C = BB->getContext();
715  BasicBlock *TrueDest = BI->getSuccessor(0);
716  BasicBlock *FalseDest = BI->getSuccessor(1);
717  BasicBlock *CommonSucc = HoistableBranches[BI];
718  BasicBlock *HoistTarget = getOrCreateHoistedBlock(BI->getParent());
719 
720  // Create hoisted versions of blocks that currently don't have them
721  auto CreateHoistedBlock = [&](BasicBlock *Orig) {
722  if (HoistDestinationMap.count(Orig))
723  return HoistDestinationMap[Orig];
724  BasicBlock *New =
725  BasicBlock::Create(C, Orig->getName() + ".licm", Orig->getParent());
726  HoistDestinationMap[Orig] = New;
727  DT->addNewBlock(New, HoistTarget);
728  if (CurLoop->getParentLoop())
729  CurLoop->getParentLoop()->addBasicBlockToLoop(New, *LI);
730  ++NumCreatedBlocks;
731  LLVM_DEBUG(dbgs() << "LICM created " << New->getName()
732  << " as hoist destination for " << Orig->getName()
733  << "\n");
734  return New;
735  };
736  BasicBlock *HoistTrueDest = CreateHoistedBlock(TrueDest);
737  BasicBlock *HoistFalseDest = CreateHoistedBlock(FalseDest);
738  BasicBlock *HoistCommonSucc = CreateHoistedBlock(CommonSucc);
739 
740  // Link up these blocks with branches.
741  if (!HoistCommonSucc->getTerminator()) {
742  // The new common successor we've generated will branch to whatever that
743  // hoist target branched to.
744  BasicBlock *TargetSucc = HoistTarget->getSingleSuccessor();
745  assert(TargetSucc && "Expected hoist target to have a single successor");
746  HoistCommonSucc->moveBefore(TargetSucc);
747  BranchInst::Create(TargetSucc, HoistCommonSucc);
748  }
749  if (!HoistTrueDest->getTerminator()) {
750  HoistTrueDest->moveBefore(HoistCommonSucc);
751  BranchInst::Create(HoistCommonSucc, HoistTrueDest);
752  }
753  if (!HoistFalseDest->getTerminator()) {
754  HoistFalseDest->moveBefore(HoistCommonSucc);
755  BranchInst::Create(HoistCommonSucc, HoistFalseDest);
756  }
757 
758  // If BI is being cloned to what was originally the preheader then
759  // HoistCommonSucc will now be the new preheader.
760  if (HoistTarget == InitialPreheader) {
761  // Phis in the loop header now need to use the new preheader.
762  InitialPreheader->replaceSuccessorsPhiUsesWith(HoistCommonSucc);
763  if (MSSAU)
765  HoistTarget->getSingleSuccessor(), HoistCommonSucc, {HoistTarget});
766  // The new preheader dominates the loop header.
767  DomTreeNode *PreheaderNode = DT->getNode(HoistCommonSucc);
768  DomTreeNode *HeaderNode = DT->getNode(CurLoop->getHeader());
769  DT->changeImmediateDominator(HeaderNode, PreheaderNode);
770  // The preheader hoist destination is now the new preheader, with the
771  // exception of the hoist destination of this branch.
772  for (auto &Pair : HoistDestinationMap)
773  if (Pair.second == InitialPreheader && Pair.first != BI->getParent())
774  Pair.second = HoistCommonSucc;
775  }
776 
777  // Now finally clone BI.
779  HoistTarget->getTerminator(),
780  BranchInst::Create(HoistTrueDest, HoistFalseDest, BI->getCondition()));
781  ++NumClonedBranches;
782 
783  assert(CurLoop->getLoopPreheader() &&
784  "Hoisting blocks should not have destroyed preheader");
785  return HoistDestinationMap[BB];
786  }
787 };
788 } // namespace
789 
790 /// Walk the specified region of the CFG (defined by all blocks dominated by
791 /// the specified block, and that are in the current loop) in depth first
792 /// order w.r.t the DominatorTree. This allows us to visit definitions before
793 /// uses, allowing us to hoist a loop body in one pass without iteration.
794 ///
796  DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop,
797  AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
798  ICFLoopSafetyInfo *SafetyInfo,
799  SinkAndHoistLICMFlags &Flags,
801  // Verify inputs.
802  assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
803  CurLoop != nullptr && SafetyInfo != nullptr &&
804  "Unexpected input to hoistRegion.");
805  assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) &&
806  "Either AliasSetTracker or MemorySSA should be initialized.");
807 
808  ControlFlowHoister CFH(LI, DT, CurLoop, MSSAU);
809 
810  // Keep track of instructions that have been hoisted, as they may need to be
811  // re-hoisted if they end up not dominating all of their uses.
812  SmallVector<Instruction *, 16> HoistedInstructions;
813 
814  // For PHI hoisting to work we need to hoist blocks before their successors.
815  // We can do this by iterating through the blocks in the loop in reverse
816  // post-order.
817  LoopBlocksRPO Worklist(CurLoop);
818  Worklist.perform(LI);
819  bool Changed = false;
820  for (BasicBlock *BB : Worklist) {
821  // Only need to process the contents of this block if it is not part of a
822  // subloop (which would already have been processed).
823  if (inSubLoop(BB, CurLoop, LI))
824  continue;
825 
826  for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) {
827  Instruction &I = *II++;
828  // Try constant folding this instruction. If all the operands are
829  // constants, it is technically hoistable, but it would be better to
830  // just fold it.
832  &I, I.getModule()->getDataLayout(), TLI)) {
833  LLVM_DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C
834  << '\n');
835  if (CurAST)
836  CurAST->copyValue(&I, C);
837  // FIXME MSSA: Such replacements may make accesses unoptimized (D51960).
839  if (isInstructionTriviallyDead(&I, TLI))
840  eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
841  Changed = true;
842  continue;
843  }
844 
845  // Try hoisting the instruction out to the preheader. We can only do
846  // this if all of the operands of the instruction are loop invariant and
847  // if it is safe to hoist the instruction.
848  // TODO: It may be safe to hoist if we are hoisting to a conditional block
849  // and we have accurately duplicated the control flow from the loop header
850  // to that block.
851  if (CurLoop->hasLoopInvariantOperands(&I) &&
852  canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags,
853  ORE) &&
855  I, DT, CurLoop, SafetyInfo, ORE,
856  CurLoop->getLoopPreheader()->getTerminator())) {
857  hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
858  MSSAU, ORE);
859  HoistedInstructions.push_back(&I);
860  Changed = true;
861  continue;
862  }
863 
864  // Attempt to remove floating point division out of the loop by
865  // converting it to a reciprocal multiplication.
866  if (I.getOpcode() == Instruction::FDiv &&
867  CurLoop->isLoopInvariant(I.getOperand(1)) &&
868  I.hasAllowReciprocal()) {
869  auto Divisor = I.getOperand(1);
870  auto One = llvm::ConstantFP::get(Divisor->getType(), 1.0);
871  auto ReciprocalDivisor = BinaryOperator::CreateFDiv(One, Divisor);
872  ReciprocalDivisor->setFastMathFlags(I.getFastMathFlags());
873  SafetyInfo->insertInstructionTo(ReciprocalDivisor, I.getParent());
874  ReciprocalDivisor->insertBefore(&I);
875 
876  auto Product =
877  BinaryOperator::CreateFMul(I.getOperand(0), ReciprocalDivisor);
878  Product->setFastMathFlags(I.getFastMathFlags());
879  SafetyInfo->insertInstructionTo(Product, I.getParent());
880  Product->insertAfter(&I);
881  I.replaceAllUsesWith(Product);
882  eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
883 
884  hoist(*ReciprocalDivisor, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB),
885  SafetyInfo, MSSAU, ORE);
886  HoistedInstructions.push_back(ReciprocalDivisor);
887  Changed = true;
888  continue;
889  }
890 
891  auto IsInvariantStart = [&](Instruction &I) {
892  using namespace PatternMatch;
893  return I.use_empty() &&
894  match(&I, m_Intrinsic<Intrinsic::invariant_start>());
895  };
896  auto MustExecuteWithoutWritesBefore = [&](Instruction &I) {
897  return SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop) &&
898  SafetyInfo->doesNotWriteMemoryBefore(I, CurLoop);
899  };
900  if ((IsInvariantStart(I) || isGuard(&I)) &&
901  CurLoop->hasLoopInvariantOperands(&I) &&
902  MustExecuteWithoutWritesBefore(I)) {
903  hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
904  MSSAU, ORE);
905  HoistedInstructions.push_back(&I);
906  Changed = true;
907  continue;
908  }
909 
910  if (PHINode *PN = dyn_cast<PHINode>(&I)) {
911  if (CFH.canHoistPHI(PN)) {
912  // Redirect incoming blocks first to ensure that we create hoisted
913  // versions of those blocks before we hoist the phi.
914  for (unsigned int i = 0; i < PN->getNumIncomingValues(); ++i)
915  PN->setIncomingBlock(
916  i, CFH.getOrCreateHoistedBlock(PN->getIncomingBlock(i)));
917  hoist(*PN, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
918  MSSAU, ORE);
919  assert(DT->dominates(PN, BB) && "Conditional PHIs not expected");
920  Changed = true;
921  continue;
922  }
923  }
924 
925  // Remember possibly hoistable branches so we can actually hoist them
926  // later if needed.
927  if (BranchInst *BI = dyn_cast<BranchInst>(&I))
928  CFH.registerPossiblyHoistableBranch(BI);
929  }
930  }
931 
932  // If we hoisted instructions to a conditional block they may not dominate
933  // their uses that weren't hoisted (such as phis where some operands are not
934  // loop invariant). If so make them unconditional by moving them to their
935  // immediate dominator. We iterate through the instructions in reverse order
936  // which ensures that when we rehoist an instruction we rehoist its operands,
937  // and also keep track of where in the block we are rehoisting to to make sure
938  // that we rehoist instructions before the instructions that use them.
939  Instruction *HoistPoint = nullptr;
940  if (ControlFlowHoisting) {
941  for (Instruction *I : reverse(HoistedInstructions)) {
942  if (!llvm::all_of(I->uses(),
943  [&](Use &U) { return DT->dominates(I, U); })) {
944  BasicBlock *Dominator =
945  DT->getNode(I->getParent())->getIDom()->getBlock();
946  if (!HoistPoint || !DT->dominates(HoistPoint->getParent(), Dominator)) {
947  if (HoistPoint)
948  assert(DT->dominates(Dominator, HoistPoint->getParent()) &&
949  "New hoist point expected to dominate old hoist point");
950  HoistPoint = Dominator->getTerminator();
951  }
952  LLVM_DEBUG(dbgs() << "LICM rehoisting to "
953  << HoistPoint->getParent()->getName()
954  << ": " << *I << "\n");
955  moveInstructionBefore(*I, *HoistPoint, *SafetyInfo, MSSAU);
956  HoistPoint = I;
957  Changed = true;
958  }
959  }
960  }
961  if (MSSAU && VerifyMemorySSA)
962  MSSAU->getMemorySSA()->verifyMemorySSA();
963 
964  // Now that we've finished hoisting make sure that LI and DT are still
965  // valid.
966 #ifdef EXPENSIVE_CHECKS
967  if (Changed) {
969  "Dominator tree verification failed");
970  LI->verify(*DT);
971  }
972 #endif
973 
974  return Changed;
975 }
976 
977 // Return true if LI is invariant within scope of the loop. LI is invariant if
978 // CurLoop is dominated by an invariant.start representing the same memory
979 // location and size as the memory location LI loads from, and also the
980 // invariant.start has no uses.
982  Loop *CurLoop) {
983  Value *Addr = LI->getOperand(0);
984  const DataLayout &DL = LI->getModule()->getDataLayout();
985  const uint32_t LocSizeInBits = DL.getTypeSizeInBits(LI->getType());
986 
987  // if the type is i8 addrspace(x)*, we know this is the type of
988  // llvm.invariant.start operand
989  auto *PtrInt8Ty = PointerType::get(Type::getInt8Ty(LI->getContext()),
990  LI->getPointerAddressSpace());
991  unsigned BitcastsVisited = 0;
992  // Look through bitcasts until we reach the i8* type (this is invariant.start
993  // operand type).
994  while (Addr->getType() != PtrInt8Ty) {
995  auto *BC = dyn_cast<BitCastInst>(Addr);
996  // Avoid traversing high number of bitcast uses.
997  if (++BitcastsVisited > MaxNumUsesTraversed || !BC)
998  return false;
999  Addr = BC->getOperand(0);
1000  }
1001 
1002  unsigned UsesVisited = 0;
1003  // Traverse all uses of the load operand value, to see if invariant.start is
1004  // one of the uses, and whether it dominates the load instruction.
1005  for (auto *U : Addr->users()) {
1006  // Avoid traversing for Load operand with high number of users.
1007  if (++UsesVisited > MaxNumUsesTraversed)
1008  return false;
1010  // If there are escaping uses of invariant.start instruction, the load maybe
1011  // non-invariant.
1012  if (!II || II->getIntrinsicID() != Intrinsic::invariant_start ||
1013  !II->use_empty())
1014  continue;
1015  unsigned InvariantSizeInBits =
1016  cast<ConstantInt>(II->getArgOperand(0))->getSExtValue() * 8;
1017  // Confirm the invariant.start location size contains the load operand size
1018  // in bits. Also, the invariant.start should dominate the load, and we
1019  // should not hoist the load out of a loop that contains this dominating
1020  // invariant.start.
1021  if (LocSizeInBits <= InvariantSizeInBits &&
1022  DT->properlyDominates(II->getParent(), CurLoop->getHeader()))
1023  return true;
1024  }
1025 
1026  return false;
1027 }
1028 
1029 namespace {
1030 /// Return true if-and-only-if we know how to (mechanically) both hoist and
1031 /// sink a given instruction out of a loop. Does not address legality
1032 /// concerns such as aliasing or speculation safety.
1033 bool isHoistableAndSinkableInst(Instruction &I) {
1034  // Only these instructions are hoistable/sinkable.
1035  return (isa<LoadInst>(I) || isa<StoreInst>(I) || isa<CallInst>(I) ||
1036  isa<FenceInst>(I) || isa<CastInst>(I) ||
1037  isa<UnaryOperator>(I) || isa<BinaryOperator>(I) ||
1038  isa<SelectInst>(I) || isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
1039  isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
1040  isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) ||
1041  isa<InsertValueInst>(I));
1042 }
1043 /// Return true if all of the alias sets within this AST are known not to
1044 /// contain a Mod, or if MSSA knows thare are no MemoryDefs in the loop.
1045 bool isReadOnly(AliasSetTracker *CurAST, const MemorySSAUpdater *MSSAU,
1046  const Loop *L) {
1047  if (CurAST) {
1048  for (AliasSet &AS : *CurAST) {
1049  if (!AS.isForwardingAliasSet() && AS.isMod()) {
1050  return false;
1051  }
1052  }
1053  return true;
1054  } else { /*MSSAU*/
1055  for (auto *BB : L->getBlocks())
1056  if (MSSAU->getMemorySSA()->getBlockDefs(BB))
1057  return false;
1058  return true;
1059  }
1060 }
1061 
1062 /// Return true if I is the only Instruction with a MemoryAccess in L.
1063 bool isOnlyMemoryAccess(const Instruction *I, const Loop *L,
1064  const MemorySSAUpdater *MSSAU) {
1065  for (auto *BB : L->getBlocks())
1066  if (auto *Accs = MSSAU->getMemorySSA()->getBlockAccesses(BB)) {
1067  int NotAPhi = 0;
1068  for (const auto &Acc : *Accs) {
1069  if (isa<MemoryPhi>(&Acc))
1070  continue;
1071  const auto *MUD = cast<MemoryUseOrDef>(&Acc);
1072  if (MUD->getMemoryInst() != I || NotAPhi++ == 1)
1073  return false;
1074  }
1075  }
1076  return true;
1077 }
1078 }
1079 
1081  Loop *CurLoop, AliasSetTracker *CurAST,
1082  MemorySSAUpdater *MSSAU,
1083  bool TargetExecutesOncePerLoop,
1084  SinkAndHoistLICMFlags *Flags,
1086  // If we don't understand the instruction, bail early.
1087  if (!isHoistableAndSinkableInst(I))
1088  return false;
1089 
1090  MemorySSA *MSSA = MSSAU ? MSSAU->getMemorySSA() : nullptr;
1091  if (MSSA)
1092  assert(Flags != nullptr && "Flags cannot be null.");
1093 
1094  // Loads have extra constraints we have to verify before we can hoist them.
1095  if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
1096  if (!LI->isUnordered())
1097  return false; // Don't sink/hoist volatile or ordered atomic loads!
1098 
1099  // Loads from constant memory are always safe to move, even if they end up
1100  // in the same alias set as something that ends up being modified.
1101  if (AA->pointsToConstantMemory(LI->getOperand(0)))
1102  return true;
1103  if (LI->hasMetadata(LLVMContext::MD_invariant_load))
1104  return true;
1105 
1106  if (LI->isAtomic() && !TargetExecutesOncePerLoop)
1107  return false; // Don't risk duplicating unordered loads
1108 
1109  // This checks for an invariant.start dominating the load.
1110  if (isLoadInvariantInLoop(LI, DT, CurLoop))
1111  return true;
1112 
1113  bool Invalidated;
1114  if (CurAST)
1115  Invalidated = pointerInvalidatedByLoop(MemoryLocation::get(LI), CurAST,
1116  CurLoop, AA);
1117  else
1118  Invalidated = pointerInvalidatedByLoopWithMSSA(
1119  MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(LI)), CurLoop, *Flags);
1120  // Check loop-invariant address because this may also be a sinkable load
1121  // whose address is not necessarily loop-invariant.
1122  if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand()))
1123  ORE->emit([&]() {
1124  return OptimizationRemarkMissed(
1125  DEBUG_TYPE, "LoadWithLoopInvariantAddressInvalidated", LI)
1126  << "failed to move load with loop-invariant address "
1127  "because the loop may invalidate its value";
1128  });
1129 
1130  return !Invalidated;
1131  } else if (CallInst *CI = dyn_cast<CallInst>(&I)) {
1132  // Don't sink or hoist dbg info; it's legal, but not useful.
1133  if (isa<DbgInfoIntrinsic>(I))
1134  return false;
1135 
1136  // Don't sink calls which can throw.
1137  if (CI->mayThrow())
1138  return false;
1139 
1140  using namespace PatternMatch;
1141  if (match(CI, m_Intrinsic<Intrinsic::assume>()))
1142  // Assumes don't actually alias anything or throw
1143  return true;
1144 
1145  // Handle simple cases by querying alias analysis.
1146  FunctionModRefBehavior Behavior = AA->getModRefBehavior(CI);
1147  if (Behavior == FMRB_DoesNotAccessMemory)
1148  return true;
1149  if (AliasAnalysis::onlyReadsMemory(Behavior)) {
1150  // A readonly argmemonly function only reads from memory pointed to by
1151  // it's arguments with arbitrary offsets. If we can prove there are no
1152  // writes to this memory in the loop, we can hoist or sink.
1154  // TODO: expand to writeable arguments
1155  for (Value *Op : CI->arg_operands())
1156  if (Op->getType()->isPointerTy()) {
1157  bool Invalidated;
1158  if (CurAST)
1159  Invalidated = pointerInvalidatedByLoop(
1161  CurAST, CurLoop, AA);
1162  else
1163  Invalidated = pointerInvalidatedByLoopWithMSSA(
1164  MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(CI)), CurLoop,
1165  *Flags);
1166  if (Invalidated)
1167  return false;
1168  }
1169  return true;
1170  }
1171 
1172  // If this call only reads from memory and there are no writes to memory
1173  // in the loop, we can hoist or sink the call as appropriate.
1174  if (isReadOnly(CurAST, MSSAU, CurLoop))
1175  return true;
1176  }
1177 
1178  // FIXME: This should use mod/ref information to see if we can hoist or
1179  // sink the call.
1180 
1181  return false;
1182  } else if (auto *FI = dyn_cast<FenceInst>(&I)) {
1183  // Fences alias (most) everything to provide ordering. For the moment,
1184  // just give up if there are any other memory operations in the loop.
1185  if (CurAST) {
1186  auto Begin = CurAST->begin();
1187  assert(Begin != CurAST->end() && "must contain FI");
1188  if (std::next(Begin) != CurAST->end())
1189  // constant memory for instance, TODO: handle better
1190  return false;
1191  auto *UniqueI = Begin->getUniqueInstruction();
1192  if (!UniqueI)
1193  // other memory op, give up
1194  return false;
1195  (void)FI; // suppress unused variable warning
1196  assert(UniqueI == FI && "AS must contain FI");
1197  return true;
1198  } else // MSSAU
1199  return isOnlyMemoryAccess(FI, CurLoop, MSSAU);
1200  } else if (auto *SI = dyn_cast<StoreInst>(&I)) {
1201  if (!SI->isUnordered())
1202  return false; // Don't sink/hoist volatile or ordered atomic store!
1203 
1204  // We can only hoist a store that we can prove writes a value which is not
1205  // read or overwritten within the loop. For those cases, we fallback to
1206  // load store promotion instead. TODO: We can extend this to cases where
1207  // there is exactly one write to the location and that write dominates an
1208  // arbitrary number of reads in the loop.
1209  if (CurAST) {
1210  auto &AS = CurAST->getAliasSetFor(MemoryLocation::get(SI));
1211 
1212  if (AS.isRef() || !AS.isMustAlias())
1213  // Quick exit test, handled by the full path below as well.
1214  return false;
1215  auto *UniqueI = AS.getUniqueInstruction();
1216  if (!UniqueI)
1217  // other memory op, give up
1218  return false;
1219  assert(UniqueI == SI && "AS must contain SI");
1220  return true;
1221  } else { // MSSAU
1222  if (isOnlyMemoryAccess(SI, CurLoop, MSSAU))
1223  return true;
1224  // If there are more accesses than the Promotion cap, give up, we're not
1225  // walking a list that long.
1226  if (Flags->NoOfMemAccTooLarge)
1227  return false;
1228  // Check store only if there's still "quota" to check clobber.
1229  if (Flags->LicmMssaOptCounter >= Flags->LicmMssaOptCap)
1230  return false;
1231  // If there are interfering Uses (i.e. their defining access is in the
1232  // loop), or ordered loads (stored as Defs!), don't move this store.
1233  // Could do better here, but this is conservatively correct.
1234  // TODO: Cache set of Uses on the first walk in runOnLoop, update when
1235  // moving accesses. Can also extend to dominating uses.
1236  auto *SIMD = MSSA->getMemoryAccess(SI);
1237  for (auto *BB : CurLoop->getBlocks())
1238  if (auto *Accesses = MSSA->getBlockAccesses(BB)) {
1239  for (const auto &MA : *Accesses)
1240  if (const auto *MU = dyn_cast<MemoryUse>(&MA)) {
1241  auto *MD = MU->getDefiningAccess();
1242  if (!MSSA->isLiveOnEntryDef(MD) &&
1243  CurLoop->contains(MD->getBlock()))
1244  return false;
1245  // Disable hoisting past potentially interfering loads. Optimized
1246  // Uses may point to an access outside the loop, as getClobbering
1247  // checks the previous iteration when walking the backedge.
1248  // FIXME: More precise: no Uses that alias SI.
1249  if (!Flags->IsSink && !MSSA->dominates(SIMD, MU))
1250  return false;
1251  } else if (const auto *MD = dyn_cast<MemoryDef>(&MA)) {
1252  if (auto *LI = dyn_cast<LoadInst>(MD->getMemoryInst())) {
1253  (void)LI; // Silence warning.
1254  assert(!LI->isUnordered() && "Expected unordered load");
1255  return false;
1256  }
1257  // Any call, while it may not be clobbering SI, it may be a use.
1258  if (auto *CI = dyn_cast<CallInst>(MD->getMemoryInst())) {
1259  // Check if the call may read from the memory locattion written
1260  // to by SI. Check CI's attributes and arguments; the number of
1261  // such checks performed is limited above by NoOfMemAccTooLarge.
1263  if (isModOrRefSet(MRI))
1264  return false;
1265  }
1266  }
1267  }
1268 
1270  Flags->LicmMssaOptCounter++;
1271  // If there are no clobbering Defs in the loop, store is safe to hoist.
1272  return MSSA->isLiveOnEntryDef(Source) ||
1273  !CurLoop->contains(Source->getBlock());
1274  }
1275  }
1276 
1277  assert(!I.mayReadOrWriteMemory() && "unhandled aliasing");
1278 
1279  // We've established mechanical ability and aliasing, it's up to the caller
1280  // to check fault safety
1281  return true;
1282 }
1283 
1284 /// Returns true if a PHINode is a trivially replaceable with an
1285 /// Instruction.
1286 /// This is true when all incoming values are that instruction.
1287 /// This pattern occurs most often with LCSSA PHI nodes.
1288 ///
1289 static bool isTriviallyReplaceablePHI(const PHINode &PN, const Instruction &I) {
1290  for (const Value *IncValue : PN.incoming_values())
1291  if (IncValue != &I)
1292  return false;
1293 
1294  return true;
1295 }
1296 
1297 /// Return true if the instruction is free in the loop.
1298 static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop,
1299  const TargetTransformInfo *TTI) {
1300 
1301  if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I)) {
1303  return false;
1304  // For a GEP, we cannot simply use getUserCost because currently it
1305  // optimistically assume that a GEP will fold into addressing mode
1306  // regardless of its users.
1307  const BasicBlock *BB = GEP->getParent();
1308  for (const User *U : GEP->users()) {
1309  const Instruction *UI = cast<Instruction>(U);
1310  if (CurLoop->contains(UI) &&
1311  (BB != UI->getParent() ||
1312  (!isa<StoreInst>(UI) && !isa<LoadInst>(UI))))
1313  return false;
1314  }
1315  return true;
1316  } else
1317  return TTI->getUserCost(&I) == TargetTransformInfo::TCC_Free;
1318 }
1319 
1320 /// Return true if the only users of this instruction are outside of
1321 /// the loop. If this is true, we can sink the instruction to the exit
1322 /// blocks of the loop.
1323 ///
1324 /// We also return true if the instruction could be folded away in lowering.
1325 /// (e.g., a GEP can be folded into a load as an addressing mode in the loop).
1326 static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
1327  const LoopSafetyInfo *SafetyInfo,
1328  TargetTransformInfo *TTI, bool &FreeInLoop) {
1329  const auto &BlockColors = SafetyInfo->getBlockColors();
1330  bool IsFree = isFreeInLoop(I, CurLoop, TTI);
1331  for (const User *U : I.users()) {
1332  const Instruction *UI = cast<Instruction>(U);
1333  if (const PHINode *PN = dyn_cast<PHINode>(UI)) {
1334  const BasicBlock *BB = PN->getParent();
1335  // We cannot sink uses in catchswitches.
1336  if (isa<CatchSwitchInst>(BB->getTerminator()))
1337  return false;
1338 
1339  // We need to sink a callsite to a unique funclet. Avoid sinking if the
1340  // phi use is too muddled.
1341  if (isa<CallInst>(I))
1342  if (!BlockColors.empty() &&
1343  BlockColors.find(const_cast<BasicBlock *>(BB))->second.size() != 1)
1344  return false;
1345  }
1346 
1347  if (CurLoop->contains(UI)) {
1348  if (IsFree) {
1349  FreeInLoop = true;
1350  continue;
1351  }
1352  return false;
1353  }
1354  }
1355  return true;
1356 }
1357 
1359  Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
1360  const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU) {
1361  Instruction *New;
1362  if (auto *CI = dyn_cast<CallInst>(&I)) {
1363  const auto &BlockColors = SafetyInfo->getBlockColors();
1364 
1365  // Sinking call-sites need to be handled differently from other
1366  // instructions. The cloned call-site needs a funclet bundle operand
1367  // appropriate for its location in the CFG.
1369  for (unsigned BundleIdx = 0, BundleEnd = CI->getNumOperandBundles();
1370  BundleIdx != BundleEnd; ++BundleIdx) {
1371  OperandBundleUse Bundle = CI->getOperandBundleAt(BundleIdx);
1372  if (Bundle.getTagID() == LLVMContext::OB_funclet)
1373  continue;
1374 
1375  OpBundles.emplace_back(Bundle);
1376  }
1377 
1378  if (!BlockColors.empty()) {
1379  const ColorVector &CV = BlockColors.find(&ExitBlock)->second;
1380  assert(CV.size() == 1 && "non-unique color for exit block!");
1381  BasicBlock *BBColor = CV.front();
1382  Instruction *EHPad = BBColor->getFirstNonPHI();
1383  if (EHPad->isEHPad())
1384  OpBundles.emplace_back("funclet", EHPad);
1385  }
1386 
1387  New = CallInst::Create(CI, OpBundles);
1388  } else {
1389  New = I.clone();
1390  }
1391 
1392  ExitBlock.getInstList().insert(ExitBlock.getFirstInsertionPt(), New);
1393  if (!I.getName().empty())
1394  New->setName(I.getName() + ".le");
1395 
1396  if (MSSAU && MSSAU->getMemorySSA()->getMemoryAccess(&I)) {
1397  // Create a new MemoryAccess and let MemorySSA set its defining access.
1398  MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB(
1399  New, nullptr, New->getParent(), MemorySSA::Beginning);
1400  if (NewMemAcc) {
1401  if (auto *MemDef = dyn_cast<MemoryDef>(NewMemAcc))
1402  MSSAU->insertDef(MemDef, /*RenameUses=*/true);
1403  else {
1404  auto *MemUse = cast<MemoryUse>(NewMemAcc);
1405  MSSAU->insertUse(MemUse, /*RenameUses=*/true);
1406  }
1407  }
1408  }
1409 
1410  // Build LCSSA PHI nodes for any in-loop operands. Note that this is
1411  // particularly cheap because we can rip off the PHI node that we're
1412  // replacing for the number and blocks of the predecessors.
1413  // OPT: If this shows up in a profile, we can instead finish sinking all
1414  // invariant instructions, and then walk their operands to re-establish
1415  // LCSSA. That will eliminate creating PHI nodes just to nuke them when
1416  // sinking bottom-up.
1417  for (User::op_iterator OI = New->op_begin(), OE = New->op_end(); OI != OE;
1418  ++OI)
1419  if (Instruction *OInst = dyn_cast<Instruction>(*OI))
1420  if (Loop *OLoop = LI->getLoopFor(OInst->getParent()))
1421  if (!OLoop->contains(&PN)) {
1422  PHINode *OpPN =
1423  PHINode::Create(OInst->getType(), PN.getNumIncomingValues(),
1424  OInst->getName() + ".lcssa", &ExitBlock.front());
1425  for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
1426  OpPN->addIncoming(OInst, PN.getIncomingBlock(i));
1427  *OI = OpPN;
1428  }
1429  return New;
1430 }
1431 
1433  AliasSetTracker *AST, MemorySSAUpdater *MSSAU) {
1434  if (AST)
1435  AST->deleteValue(&I);
1436  if (MSSAU)
1437  MSSAU->removeMemoryAccess(&I);
1438  SafetyInfo.removeInstruction(&I);
1439  I.eraseFromParent();
1440 }
1441 
1443  ICFLoopSafetyInfo &SafetyInfo,
1444  MemorySSAUpdater *MSSAU) {
1445  SafetyInfo.removeInstruction(&I);
1446  SafetyInfo.insertInstructionTo(&I, Dest.getParent());
1447  I.moveBefore(&Dest);
1448  if (MSSAU)
1449  if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
1450  MSSAU->getMemorySSA()->getMemoryAccess(&I)))
1451  MSSAU->moveToPlace(OldMemAcc, Dest.getParent(), MemorySSA::End);
1452 }
1453 
1455  PHINode *TPN, Instruction *I, LoopInfo *LI,
1457  const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop,
1458  MemorySSAUpdater *MSSAU) {
1459  assert(isTriviallyReplaceablePHI(*TPN, *I) &&
1460  "Expect only trivially replaceable PHI");
1461  BasicBlock *ExitBlock = TPN->getParent();
1462  Instruction *New;
1463  auto It = SunkCopies.find(ExitBlock);
1464  if (It != SunkCopies.end())
1465  New = It->second;
1466  else
1467  New = SunkCopies[ExitBlock] = CloneInstructionInExitBlock(
1468  *I, *ExitBlock, *TPN, LI, SafetyInfo, MSSAU);
1469  return New;
1470 }
1471 
1472 static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo) {
1473  BasicBlock *BB = PN->getParent();
1474  if (!BB->canSplitPredecessors())
1475  return false;
1476  // It's not impossible to split EHPad blocks, but if BlockColors already exist
1477  // it require updating BlockColors for all offspring blocks accordingly. By
1478  // skipping such corner case, we can make updating BlockColors after splitting
1479  // predecessor fairly simple.
1480  if (!SafetyInfo->getBlockColors().empty() && BB->getFirstNonPHI()->isEHPad())
1481  return false;
1482  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
1483  BasicBlock *BBPred = *PI;
1484  if (isa<IndirectBrInst>(BBPred->getTerminator()))
1485  return false;
1486  }
1487  return true;
1488 }
1489 
1491  LoopInfo *LI, const Loop *CurLoop,
1492  LoopSafetyInfo *SafetyInfo,
1493  MemorySSAUpdater *MSSAU) {
1494 #ifndef NDEBUG
1495  SmallVector<BasicBlock *, 32> ExitBlocks;
1496  CurLoop->getUniqueExitBlocks(ExitBlocks);
1497  SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),
1498  ExitBlocks.end());
1499 #endif
1500  BasicBlock *ExitBB = PN->getParent();
1501  assert(ExitBlockSet.count(ExitBB) && "Expect the PHI is in an exit block.");
1502 
1503  // Split predecessors of the loop exit to make instructions in the loop are
1504  // exposed to exit blocks through trivially replaceable PHIs while keeping the
1505  // loop in the canonical form where each predecessor of each exit block should
1506  // be contained within the loop. For example, this will convert the loop below
1507  // from
1508  //
1509  // LB1:
1510  // %v1 =
1511  // br %LE, %LB2
1512  // LB2:
1513  // %v2 =
1514  // br %LE, %LB1
1515  // LE:
1516  // %p = phi [%v1, %LB1], [%v2, %LB2] <-- non-trivially replaceable
1517  //
1518  // to
1519  //
1520  // LB1:
1521  // %v1 =
1522  // br %LE.split, %LB2
1523  // LB2:
1524  // %v2 =
1525  // br %LE.split2, %LB1
1526  // LE.split:
1527  // %p1 = phi [%v1, %LB1] <-- trivially replaceable
1528  // br %LE
1529  // LE.split2:
1530  // %p2 = phi [%v2, %LB2] <-- trivially replaceable
1531  // br %LE
1532  // LE:
1533  // %p = phi [%p1, %LE.split], [%p2, %LE.split2]
1534  //
1535  const auto &BlockColors = SafetyInfo->getBlockColors();
1536  SmallSetVector<BasicBlock *, 8> PredBBs(pred_begin(ExitBB), pred_end(ExitBB));
1537  while (!PredBBs.empty()) {
1538  BasicBlock *PredBB = *PredBBs.begin();
1539  assert(CurLoop->contains(PredBB) &&
1540  "Expect all predecessors are in the loop");
1541  if (PN->getBasicBlockIndex(PredBB) >= 0) {
1543  ExitBB, PredBB, ".split.loop.exit", DT, LI, MSSAU, true);
1544  // Since we do not allow splitting EH-block with BlockColors in
1545  // canSplitPredecessors(), we can simply assign predecessor's color to
1546  // the new block.
1547  if (!BlockColors.empty())
1548  // Grab a reference to the ColorVector to be inserted before getting the
1549  // reference to the vector we are copying because inserting the new
1550  // element in BlockColors might cause the map to be reallocated.
1551  SafetyInfo->copyColors(NewPred, PredBB);
1552  }
1553  PredBBs.remove(PredBB);
1554  }
1555 }
1556 
1557 /// When an instruction is found to only be used outside of the loop, this
1558 /// function moves it to the exit blocks and patches up SSA form as needed.
1559 /// This method is guaranteed to remove the original instruction from its
1560 /// position, and may either delete it or move it to outside of the loop.
1561 ///
1562 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
1563  const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
1565  LLVM_DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
1566  ORE->emit([&]() {
1567  return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I)
1568  << "sinking " << ore::NV("Inst", &I);
1569  });
1570  bool Changed = false;
1571  if (isa<LoadInst>(I))
1572  ++NumMovedLoads;
1573  else if (isa<CallInst>(I))
1574  ++NumMovedCalls;
1575  ++NumSunk;
1576 
1577  // Iterate over users to be ready for actual sinking. Replace users via
1578  // unreachable blocks with undef and make all user PHIs trivially replaceable.
1579  SmallPtrSet<Instruction *, 8> VisitedUsers;
1580  for (Value::user_iterator UI = I.user_begin(), UE = I.user_end(); UI != UE;) {
1581  auto *User = cast<Instruction>(*UI);
1582  Use &U = UI.getUse();
1583  ++UI;
1584 
1585  if (VisitedUsers.count(User) || CurLoop->contains(User))
1586  continue;
1587 
1588  if (!DT->isReachableFromEntry(User->getParent())) {
1589  U = UndefValue::get(I.getType());
1590  Changed = true;
1591  continue;
1592  }
1593 
1594  // The user must be a PHI node.
1595  PHINode *PN = cast<PHINode>(User);
1596 
1597  // Surprisingly, instructions can be used outside of loops without any
1598  // exits. This can only happen in PHI nodes if the incoming block is
1599  // unreachable.
1600  BasicBlock *BB = PN->getIncomingBlock(U);
1601  if (!DT->isReachableFromEntry(BB)) {
1602  U = UndefValue::get(I.getType());
1603  Changed = true;
1604  continue;
1605  }
1606 
1607  VisitedUsers.insert(PN);
1608  if (isTriviallyReplaceablePHI(*PN, I))
1609  continue;
1610 
1611  if (!canSplitPredecessors(PN, SafetyInfo))
1612  return Changed;
1613 
1614  // Split predecessors of the PHI so that we can make users trivially
1615  // replaceable.
1616  splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop, SafetyInfo, MSSAU);
1617 
1618  // Should rebuild the iterators, as they may be invalidated by
1619  // splitPredecessorsOfLoopExit().
1620  UI = I.user_begin();
1621  UE = I.user_end();
1622  }
1623 
1624  if (VisitedUsers.empty())
1625  return Changed;
1626 
1627 #ifndef NDEBUG
1628  SmallVector<BasicBlock *, 32> ExitBlocks;
1629  CurLoop->getUniqueExitBlocks(ExitBlocks);
1630  SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),
1631  ExitBlocks.end());
1632 #endif
1633 
1634  // Clones of this instruction. Don't create more than one per exit block!
1636 
1637  // If this instruction is only used outside of the loop, then all users are
1638  // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of
1639  // the instruction.
1641  for (auto *UI : Users) {
1642  auto *User = cast<Instruction>(UI);
1643 
1644  if (CurLoop->contains(User))
1645  continue;
1646 
1647  PHINode *PN = cast<PHINode>(User);
1648  assert(ExitBlockSet.count(PN->getParent()) &&
1649  "The LCSSA PHI is not in an exit block!");
1650  // The PHI must be trivially replaceable.
1652  PN, &I, LI, SunkCopies, SafetyInfo, CurLoop, MSSAU);
1653  PN->replaceAllUsesWith(New);
1654  eraseInstruction(*PN, *SafetyInfo, nullptr, nullptr);
1655  Changed = true;
1656  }
1657  return Changed;
1658 }
1659 
1660 /// When an instruction is found to only use loop invariant operands that
1661 /// is safe to hoist, this instruction is called to do the dirty work.
1662 ///
1663 static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
1664  BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
1666  LLVM_DEBUG(dbgs() << "LICM hoisting to " << Dest->getName() << ": " << I
1667  << "\n");
1668  ORE->emit([&]() {
1669  return OptimizationRemark(DEBUG_TYPE, "Hoisted", &I) << "hoisting "
1670  << ore::NV("Inst", &I);
1671  });
1672 
1673  // Metadata can be dependent on conditions we are hoisting above.
1674  // Conservatively strip all metadata on the instruction unless we were
1675  // guaranteed to execute I if we entered the loop, in which case the metadata
1676  // is valid in the loop preheader.
1677  if (I.hasMetadataOtherThanDebugLoc() &&
1678  // The check on hasMetadataOtherThanDebugLoc is to prevent us from burning
1679  // time in isGuaranteedToExecute if we don't actually have anything to
1680  // drop. It is a compile time optimization, not required for correctness.
1681  !SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop))
1683 
1684  if (isa<PHINode>(I))
1685  // Move the new node to the end of the phi list in the destination block.
1686  moveInstructionBefore(I, *Dest->getFirstNonPHI(), *SafetyInfo, MSSAU);
1687  else
1688  // Move the new node to the destination block, before its terminator.
1689  moveInstructionBefore(I, *Dest->getTerminator(), *SafetyInfo, MSSAU);
1690 
1691  // Apply line 0 debug locations when we are moving instructions to different
1692  // basic blocks because we want to avoid jumpy line tables.
1693  if (const DebugLoc &DL = I.getDebugLoc())
1694  I.setDebugLoc(DebugLoc::get(0, 0, DL.getScope(), DL.getInlinedAt()));
1695 
1696  if (isa<LoadInst>(I))
1697  ++NumMovedLoads;
1698  else if (isa<CallInst>(I))
1699  ++NumMovedCalls;
1700  ++NumHoisted;
1701 }
1702 
1703 /// Only sink or hoist an instruction if it is not a trapping instruction,
1704 /// or if the instruction is known not to trap when moved to the preheader.
1705 /// or if it is a trapping instruction and is guaranteed to execute.
1707  const DominatorTree *DT,
1708  const Loop *CurLoop,
1709  const LoopSafetyInfo *SafetyInfo,
1711  const Instruction *CtxI) {
1712  if (isSafeToSpeculativelyExecute(&Inst, CtxI, DT))
1713  return true;
1714 
1715  bool GuaranteedToExecute =
1716  SafetyInfo->isGuaranteedToExecute(Inst, DT, CurLoop);
1717 
1718  if (!GuaranteedToExecute) {
1719  auto *LI = dyn_cast<LoadInst>(&Inst);
1720  if (LI && CurLoop->isLoopInvariant(LI->getPointerOperand()))
1721  ORE->emit([&]() {
1722  return OptimizationRemarkMissed(
1723  DEBUG_TYPE, "LoadWithLoopInvariantAddressCondExecuted", LI)
1724  << "failed to hoist load with loop-invariant address "
1725  "because load is conditionally executed";
1726  });
1727  }
1728 
1729  return GuaranteedToExecute;
1730 }
1731 
1732 namespace {
1733 class LoopPromoter : public LoadAndStorePromoter {
1734  Value *SomePtr; // Designated pointer to store to.
1735  const SmallSetVector<Value *, 8> &PointerMustAliases;
1736  SmallVectorImpl<BasicBlock *> &LoopExitBlocks;
1737  SmallVectorImpl<Instruction *> &LoopInsertPts;
1738  SmallVectorImpl<MemoryAccess *> &MSSAInsertPts;
1739  PredIteratorCache &PredCache;
1740  AliasSetTracker &AST;
1741  MemorySSAUpdater *MSSAU;
1742  LoopInfo &LI;
1743  DebugLoc DL;
1744  int Alignment;
1745  bool UnorderedAtomic;
1746  AAMDNodes AATags;
1747  ICFLoopSafetyInfo &SafetyInfo;
1748 
1749  Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const {
1750  if (Instruction *I = dyn_cast<Instruction>(V))
1751  if (Loop *L = LI.getLoopFor(I->getParent()))
1752  if (!L->contains(BB)) {
1753  // We need to create an LCSSA PHI node for the incoming value and
1754  // store that.
1755  PHINode *PN = PHINode::Create(I->getType(), PredCache.size(BB),
1756  I->getName() + ".lcssa", &BB->front());
1757  for (BasicBlock *Pred : PredCache.get(BB))
1758  PN->addIncoming(I, Pred);
1759  return PN;
1760  }
1761  return V;
1762  }
1763 
1764 public:
1765  LoopPromoter(Value *SP, ArrayRef<const Instruction *> Insts, SSAUpdater &S,
1766  const SmallSetVector<Value *, 8> &PMA,
1770  AliasSetTracker &ast, MemorySSAUpdater *MSSAU, LoopInfo &li,
1771  DebugLoc dl, int alignment, bool UnorderedAtomic,
1772  const AAMDNodes &AATags, ICFLoopSafetyInfo &SafetyInfo)
1773  : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
1774  LoopExitBlocks(LEB), LoopInsertPts(LIP), MSSAInsertPts(MSSAIP),
1775  PredCache(PIC), AST(ast), MSSAU(MSSAU), LI(li), DL(std::move(dl)),
1776  Alignment(alignment), UnorderedAtomic(UnorderedAtomic), AATags(AATags),
1777  SafetyInfo(SafetyInfo) {}
1778 
1779  bool isInstInList(Instruction *I,
1780  const SmallVectorImpl<Instruction *> &) const override {
1781  Value *Ptr;
1782  if (LoadInst *LI = dyn_cast<LoadInst>(I))
1783  Ptr = LI->getOperand(0);
1784  else
1785  Ptr = cast<StoreInst>(I)->getPointerOperand();
1786  return PointerMustAliases.count(Ptr);
1787  }
1788 
1789  void doExtraRewritesBeforeFinalDeletion() override {
1790  // Insert stores after in the loop exit blocks. Each exit block gets a
1791  // store of the live-out values that feed them. Since we've already told
1792  // the SSA updater about the defs in the loop and the preheader
1793  // definition, it is all set and we can start using it.
1794  for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) {
1795  BasicBlock *ExitBlock = LoopExitBlocks[i];
1796  Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
1797  LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock);
1798  Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock);
1799  Instruction *InsertPos = LoopInsertPts[i];
1800  StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos);
1801  if (UnorderedAtomic)
1803  NewSI->setAlignment(MaybeAlign(Alignment));
1804  NewSI->setDebugLoc(DL);
1805  if (AATags)
1806  NewSI->setAAMetadata(AATags);
1807 
1808  if (MSSAU) {
1809  MemoryAccess *MSSAInsertPoint = MSSAInsertPts[i];
1810  MemoryAccess *NewMemAcc;
1811  if (!MSSAInsertPoint) {
1812  NewMemAcc = MSSAU->createMemoryAccessInBB(
1813  NewSI, nullptr, NewSI->getParent(), MemorySSA::Beginning);
1814  } else {
1815  NewMemAcc =
1816  MSSAU->createMemoryAccessAfter(NewSI, nullptr, MSSAInsertPoint);
1817  }
1818  MSSAInsertPts[i] = NewMemAcc;
1819  MSSAU->insertDef(cast<MemoryDef>(NewMemAcc), true);
1820  // FIXME: true for safety, false may still be correct.
1821  }
1822  }
1823  }
1824 
1825  void replaceLoadWithValue(LoadInst *LI, Value *V) const override {
1826  // Update alias analysis.
1827  AST.copyValue(LI, V);
1828  }
1829  void instructionDeleted(Instruction *I) const override {
1830  SafetyInfo.removeInstruction(I);
1831  AST.deleteValue(I);
1832  if (MSSAU)
1833  MSSAU->removeMemoryAccess(I);
1834  }
1835 };
1836 
1837 
1838 /// Return true iff we can prove that a caller of this function can not inspect
1839 /// the contents of the provided object in a well defined program.
1840 bool isKnownNonEscaping(Value *Object, const TargetLibraryInfo *TLI) {
1841  if (isa<AllocaInst>(Object))
1842  // Since the alloca goes out of scope, we know the caller can't retain a
1843  // reference to it and be well defined. Thus, we don't need to check for
1844  // capture.
1845  return true;
1846 
1847  // For all other objects we need to know that the caller can't possibly
1848  // have gotten a reference to the object. There are two components of
1849  // that:
1850  // 1) Object can't be escaped by this function. This is what
1851  // PointerMayBeCaptured checks.
1852  // 2) Object can't have been captured at definition site. For this, we
1853  // need to know the return value is noalias. At the moment, we use a
1854  // weaker condition and handle only AllocLikeFunctions (which are
1855  // known to be noalias). TODO
1856  return isAllocLikeFn(Object, TLI) &&
1857  !PointerMayBeCaptured(Object, true, true);
1858 }
1859 
1860 } // namespace
1861 
1862 /// Try to promote memory values to scalars by sinking stores out of the
1863 /// loop and moving loads to before the loop. We do this by looping over
1864 /// the stores in the loop, looking for stores to Must pointers which are
1865 /// loop invariant.
1866 ///
1868  const SmallSetVector<Value *, 8> &PointerMustAliases,
1869  SmallVectorImpl<BasicBlock *> &ExitBlocks,
1870  SmallVectorImpl<Instruction *> &InsertPts,
1872  LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
1873  Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
1874  ICFLoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) {
1875  // Verify inputs.
1876  assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
1877  CurAST != nullptr && SafetyInfo != nullptr &&
1878  "Unexpected Input to promoteLoopAccessesToScalars");
1879 
1880  Value *SomePtr = *PointerMustAliases.begin();
1881  BasicBlock *Preheader = CurLoop->getLoopPreheader();
1882 
1883  // It is not safe to promote a load/store from the loop if the load/store is
1884  // conditional. For example, turning:
1885  //
1886  // for () { if (c) *P += 1; }
1887  //
1888  // into:
1889  //
1890  // tmp = *P; for () { if (c) tmp +=1; } *P = tmp;
1891  //
1892  // is not safe, because *P may only be valid to access if 'c' is true.
1893  //
1894  // The safety property divides into two parts:
1895  // p1) The memory may not be dereferenceable on entry to the loop. In this
1896  // case, we can't insert the required load in the preheader.
1897  // p2) The memory model does not allow us to insert a store along any dynamic
1898  // path which did not originally have one.
1899  //
1900  // If at least one store is guaranteed to execute, both properties are
1901  // satisfied, and promotion is legal.
1902  //
1903  // This, however, is not a necessary condition. Even if no store/load is
1904  // guaranteed to execute, we can still establish these properties.
1905  // We can establish (p1) by proving that hoisting the load into the preheader
1906  // is safe (i.e. proving dereferenceability on all paths through the loop). We
1907  // can use any access within the alias set to prove dereferenceability,
1908  // since they're all must alias.
1909  //
1910  // There are two ways establish (p2):
1911  // a) Prove the location is thread-local. In this case the memory model
1912  // requirement does not apply, and stores are safe to insert.
1913  // b) Prove a store dominates every exit block. In this case, if an exit
1914  // blocks is reached, the original dynamic path would have taken us through
1915  // the store, so inserting a store into the exit block is safe. Note that this
1916  // is different from the store being guaranteed to execute. For instance,
1917  // if an exception is thrown on the first iteration of the loop, the original
1918  // store is never executed, but the exit blocks are not executed either.
1919 
1920  bool DereferenceableInPH = false;
1921  bool SafeToInsertStore = false;
1922 
1924 
1925  // We start with an alignment of one and try to find instructions that allow
1926  // us to prove better alignment.
1927  unsigned Alignment = 1;
1928  // Keep track of which types of access we see
1929  bool SawUnorderedAtomic = false;
1930  bool SawNotAtomic = false;
1931  AAMDNodes AATags;
1932 
1933  const DataLayout &MDL = Preheader->getModule()->getDataLayout();
1934 
1935  bool IsKnownThreadLocalObject = false;
1936  if (SafetyInfo->anyBlockMayThrow()) {
1937  // If a loop can throw, we have to insert a store along each unwind edge.
1938  // That said, we can't actually make the unwind edge explicit. Therefore,
1939  // we have to prove that the store is dead along the unwind edge. We do
1940  // this by proving that the caller can't have a reference to the object
1941  // after return and thus can't possibly load from the object.
1942  Value *Object = GetUnderlyingObject(SomePtr, MDL);
1943  if (!isKnownNonEscaping(Object, TLI))
1944  return false;
1945  // Subtlety: Alloca's aren't visible to callers, but *are* potentially
1946  // visible to other threads if captured and used during their lifetimes.
1947  IsKnownThreadLocalObject = !isa<AllocaInst>(Object);
1948  }
1949 
1950  // Check that all of the pointers in the alias set have the same type. We
1951  // cannot (yet) promote a memory location that is loaded and stored in
1952  // different sizes. While we are at it, collect alignment and AA info.
1953  for (Value *ASIV : PointerMustAliases) {
1954  // Check that all of the pointers in the alias set have the same type. We
1955  // cannot (yet) promote a memory location that is loaded and stored in
1956  // different sizes.
1957  if (SomePtr->getType() != ASIV->getType())
1958  return false;
1959 
1960  for (User *U : ASIV->users()) {
1961  // Ignore instructions that are outside the loop.
1962  Instruction *UI = dyn_cast<Instruction>(U);
1963  if (!UI || !CurLoop->contains(UI))
1964  continue;
1965 
1966  // If there is an non-load/store instruction in the loop, we can't promote
1967  // it.
1968  if (LoadInst *Load = dyn_cast<LoadInst>(UI)) {
1969  if (!Load->isUnordered())
1970  return false;
1971 
1972  SawUnorderedAtomic |= Load->isAtomic();
1973  SawNotAtomic |= !Load->isAtomic();
1974 
1975  unsigned InstAlignment = Load->getAlignment();
1976  if (!InstAlignment)
1977  InstAlignment =
1978  MDL.getABITypeAlignment(Load->getType());
1979 
1980  // Note that proving a load safe to speculate requires proving
1981  // sufficient alignment at the target location. Proving it guaranteed
1982  // to execute does as well. Thus we can increase our guaranteed
1983  // alignment as well.
1984  if (!DereferenceableInPH || (InstAlignment > Alignment))
1985  if (isSafeToExecuteUnconditionally(*Load, DT, CurLoop, SafetyInfo,
1986  ORE, Preheader->getTerminator())) {
1987  DereferenceableInPH = true;
1988  Alignment = std::max(Alignment, InstAlignment);
1989  }
1990  } else if (const StoreInst *Store = dyn_cast<StoreInst>(UI)) {
1991  // Stores *of* the pointer are not interesting, only stores *to* the
1992  // pointer.
1993  if (UI->getOperand(1) != ASIV)
1994  continue;
1995  if (!Store->isUnordered())
1996  return false;
1997 
1998  SawUnorderedAtomic |= Store->isAtomic();
1999  SawNotAtomic |= !Store->isAtomic();
2000 
2001  // If the store is guaranteed to execute, both properties are satisfied.
2002  // We may want to check if a store is guaranteed to execute even if we
2003  // already know that promotion is safe, since it may have higher
2004  // alignment than any other guaranteed stores, in which case we can
2005  // raise the alignment on the promoted store.
2006  unsigned InstAlignment = Store->getAlignment();
2007  if (!InstAlignment)
2008  InstAlignment =
2009  MDL.getABITypeAlignment(Store->getValueOperand()->getType());
2010 
2011  if (!DereferenceableInPH || !SafeToInsertStore ||
2012  (InstAlignment > Alignment)) {
2013  if (SafetyInfo->isGuaranteedToExecute(*UI, DT, CurLoop)) {
2014  DereferenceableInPH = true;
2015  SafeToInsertStore = true;
2016  Alignment = std::max(Alignment, InstAlignment);
2017  }
2018  }
2019 
2020  // If a store dominates all exit blocks, it is safe to sink.
2021  // As explained above, if an exit block was executed, a dominating
2022  // store must have been executed at least once, so we are not
2023  // introducing stores on paths that did not have them.
2024  // Note that this only looks at explicit exit blocks. If we ever
2025  // start sinking stores into unwind edges (see above), this will break.
2026  if (!SafeToInsertStore)
2027  SafeToInsertStore = llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) {
2028  return DT->dominates(Store->getParent(), Exit);
2029  });
2030 
2031  // If the store is not guaranteed to execute, we may still get
2032  // deref info through it.
2033  if (!DereferenceableInPH) {
2034  DereferenceableInPH = isDereferenceableAndAlignedPointer(
2035  Store->getPointerOperand(), Store->getValueOperand()->getType(),
2036  MaybeAlign(Store->getAlignment()), MDL,
2037  Preheader->getTerminator(), DT);
2038  }
2039  } else
2040  return false; // Not a load or store.
2041 
2042  // Merge the AA tags.
2043  if (LoopUses.empty()) {
2044  // On the first load/store, just take its AA tags.
2045  UI->getAAMetadata(AATags);
2046  } else if (AATags) {
2047  UI->getAAMetadata(AATags, /* Merge = */ true);
2048  }
2049 
2050  LoopUses.push_back(UI);
2051  }
2052  }
2053 
2054  // If we found both an unordered atomic instruction and a non-atomic memory
2055  // access, bail. We can't blindly promote non-atomic to atomic since we
2056  // might not be able to lower the result. We can't downgrade since that
2057  // would violate memory model. Also, align 0 is an error for atomics.
2058  if (SawUnorderedAtomic && SawNotAtomic)
2059  return false;
2060 
2061  // If we're inserting an atomic load in the preheader, we must be able to
2062  // lower it. We're only guaranteed to be able to lower naturally aligned
2063  // atomics.
2064  auto *SomePtrElemType = SomePtr->getType()->getPointerElementType();
2065  if (SawUnorderedAtomic &&
2066  Alignment < MDL.getTypeStoreSize(SomePtrElemType))
2067  return false;
2068 
2069  // If we couldn't prove we can hoist the load, bail.
2070  if (!DereferenceableInPH)
2071  return false;
2072 
2073  // We know we can hoist the load, but don't have a guaranteed store.
2074  // Check whether the location is thread-local. If it is, then we can insert
2075  // stores along paths which originally didn't have them without violating the
2076  // memory model.
2077  if (!SafeToInsertStore) {
2078  if (IsKnownThreadLocalObject)
2079  SafeToInsertStore = true;
2080  else {
2081  Value *Object = GetUnderlyingObject(SomePtr, MDL);
2082  SafeToInsertStore =
2083  (isAllocLikeFn(Object, TLI) || isa<AllocaInst>(Object)) &&
2084  !PointerMayBeCaptured(Object, true, true);
2085  }
2086  }
2087 
2088  // If we've still failed to prove we can sink the store, give up.
2089  if (!SafeToInsertStore)
2090  return false;
2091 
2092  // Otherwise, this is safe to promote, lets do it!
2093  LLVM_DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " << *SomePtr
2094  << '\n');
2095  ORE->emit([&]() {
2096  return OptimizationRemark(DEBUG_TYPE, "PromoteLoopAccessesToScalar",
2097  LoopUses[0])
2098  << "Moving accesses to memory location out of the loop";
2099  });
2100  ++NumPromoted;
2101 
2102  // Grab a debug location for the inserted loads/stores; given that the
2103  // inserted loads/stores have little relation to the original loads/stores,
2104  // this code just arbitrarily picks a location from one, since any debug
2105  // location is better than none.
2106  DebugLoc DL = LoopUses[0]->getDebugLoc();
2107 
2108  // We use the SSAUpdater interface to insert phi nodes as required.
2110  SSAUpdater SSA(&NewPHIs);
2111  LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
2112  InsertPts, MSSAInsertPts, PIC, *CurAST, MSSAU, *LI, DL,
2113  Alignment, SawUnorderedAtomic, AATags, *SafetyInfo);
2114 
2115  // Set up the preheader to have a definition of the value. It is the live-out
2116  // value from the preheader that uses in the loop will use.
2117  LoadInst *PreheaderLoad = new LoadInst(
2118  SomePtr->getType()->getPointerElementType(), SomePtr,
2119  SomePtr->getName() + ".promoted", Preheader->getTerminator());
2120  if (SawUnorderedAtomic)
2121  PreheaderLoad->setOrdering(AtomicOrdering::Unordered);
2122  PreheaderLoad->setAlignment(MaybeAlign(Alignment));
2123  PreheaderLoad->setDebugLoc(DL);
2124  if (AATags)
2125  PreheaderLoad->setAAMetadata(AATags);
2126  SSA.AddAvailableValue(Preheader, PreheaderLoad);
2127 
2128  if (MSSAU) {
2129  MemoryAccess *PreheaderLoadMemoryAccess = MSSAU->createMemoryAccessInBB(
2130  PreheaderLoad, nullptr, PreheaderLoad->getParent(), MemorySSA::End);
2131  MemoryUse *NewMemUse = cast<MemoryUse>(PreheaderLoadMemoryAccess);
2132  MSSAU->insertUse(NewMemUse, /*RenameUses=*/true);
2133  }
2134 
2135  if (MSSAU && VerifyMemorySSA)
2136  MSSAU->getMemorySSA()->verifyMemorySSA();
2137  // Rewrite all the loads in the loop and remember all the definitions from
2138  // stores in the loop.
2139  Promoter.run(LoopUses);
2140 
2141  if (MSSAU && VerifyMemorySSA)
2142  MSSAU->getMemorySSA()->verifyMemorySSA();
2143  // If the SSAUpdater didn't use the load in the preheader, just zap it now.
2144  if (PreheaderLoad->use_empty())
2145  eraseInstruction(*PreheaderLoad, *SafetyInfo, CurAST, MSSAU);
2146 
2147  return true;
2148 }
2149 
2150 /// Returns an owning pointer to an alias set which incorporates aliasing info
2151 /// from L and all subloops of L.
2152 /// FIXME: In new pass manager, there is no helper function to handle loop
2153 /// analysis such as cloneBasicBlockAnalysis, so the AST needs to be recomputed
2154 /// from scratch for every loop. Hook up with the helper functions when
2155 /// available in the new pass manager to avoid redundant computation.
2156 std::unique_ptr<AliasSetTracker>
2157 LoopInvariantCodeMotion::collectAliasInfoForLoop(Loop *L, LoopInfo *LI,
2158  AliasAnalysis *AA) {
2159  std::unique_ptr<AliasSetTracker> CurAST;
2160  SmallVector<Loop *, 4> RecomputeLoops;
2161  for (Loop *InnerL : L->getSubLoops()) {
2162  auto MapI = LoopToAliasSetMap.find(InnerL);
2163  // If the AST for this inner loop is missing it may have been merged into
2164  // some other loop's AST and then that loop unrolled, and so we need to
2165  // recompute it.
2166  if (MapI == LoopToAliasSetMap.end()) {
2167  RecomputeLoops.push_back(InnerL);
2168  continue;
2169  }
2170  std::unique_ptr<AliasSetTracker> InnerAST = std::move(MapI->second);
2171 
2172  if (CurAST) {
2173  // What if InnerLoop was modified by other passes ?
2174  // Once we've incorporated the inner loop's AST into ours, we don't need
2175  // the subloop's anymore.
2176  CurAST->add(*InnerAST);
2177  } else {
2178  CurAST = std::move(InnerAST);
2179  }
2180  LoopToAliasSetMap.erase(MapI);
2181  }
2182  if (!CurAST)
2183  CurAST = std::make_unique<AliasSetTracker>(*AA);
2184 
2185  // Add everything from the sub loops that are no longer directly available.
2186  for (Loop *InnerL : RecomputeLoops)
2187  for (BasicBlock *BB : InnerL->blocks())
2188  CurAST->add(*BB);
2189 
2190  // And merge in this loop (without anything from inner loops).
2191  for (BasicBlock *BB : L->blocks())
2192  if (LI->getLoopFor(BB) == L)
2193  CurAST->add(*BB);
2194 
2195  return CurAST;
2196 }
2197 
2198 std::unique_ptr<AliasSetTracker>
2199 LoopInvariantCodeMotion::collectAliasInfoForLoopWithMSSA(
2200  Loop *L, AliasAnalysis *AA, MemorySSAUpdater *MSSAU) {
2201  auto *MSSA = MSSAU->getMemorySSA();
2202  auto CurAST = std::make_unique<AliasSetTracker>(*AA, MSSA, L);
2203  CurAST->addAllInstructionsInLoopUsingMSSA();
2204  return CurAST;
2205 }
2206 
2207 /// Simple analysis hook. Clone alias set info.
2208 ///
2209 void LegacyLICMPass::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To,
2210  Loop *L) {
2211  auto ASTIt = LICM.getLoopToAliasSetMap().find(L);
2212  if (ASTIt == LICM.getLoopToAliasSetMap().end())
2213  return;
2214 
2215  ASTIt->second->copyValue(From, To);
2216 }
2217 
2218 /// Simple Analysis hook. Delete value V from alias set
2219 ///
2220 void LegacyLICMPass::deleteAnalysisValue(Value *V, Loop *L) {
2221  auto ASTIt = LICM.getLoopToAliasSetMap().find(L);
2222  if (ASTIt == LICM.getLoopToAliasSetMap().end())
2223  return;
2224 
2225  ASTIt->second->deleteValue(V);
2226 }
2227 
2228 /// Simple Analysis hook. Delete value L from alias set map.
2229 ///
2230 void LegacyLICMPass::deleteAnalysisLoop(Loop *L) {
2231  if (!LICM.getLoopToAliasSetMap().count(L))
2232  return;
2233 
2234  LICM.getLoopToAliasSetMap().erase(L);
2235 }
2236 
2238  AliasSetTracker *CurAST, Loop *CurLoop,
2239  AliasAnalysis *AA) {
2240  // First check to see if any of the basic blocks in CurLoop invalidate *V.
2241  bool isInvalidatedAccordingToAST = CurAST->getAliasSetFor(MemLoc).isMod();
2242 
2243  if (!isInvalidatedAccordingToAST || !LICMN2Theshold)
2244  return isInvalidatedAccordingToAST;
2245 
2246  // Check with a diagnostic analysis if we can refine the information above.
2247  // This is to identify the limitations of using the AST.
2248  // The alias set mechanism used by LICM has a major weakness in that it
2249  // combines all things which may alias into a single set *before* asking
2250  // modref questions. As a result, a single readonly call within a loop will
2251  // collapse all loads and stores into a single alias set and report
2252  // invalidation if the loop contains any store. For example, readonly calls
2253  // with deopt states have this form and create a general alias set with all
2254  // loads and stores. In order to get any LICM in loops containing possible
2255  // deopt states we need a more precise invalidation of checking the mod ref
2256  // info of each instruction within the loop and LI. This has a complexity of
2257  // O(N^2), so currently, it is used only as a diagnostic tool since the
2258  // default value of LICMN2Threshold is zero.
2259 
2260  // Don't look at nested loops.
2261  if (CurLoop->begin() != CurLoop->end())
2262  return true;
2263 
2264  int N = 0;
2265  for (BasicBlock *BB : CurLoop->getBlocks())
2266  for (Instruction &I : *BB) {
2267  if (N >= LICMN2Theshold) {
2268  LLVM_DEBUG(dbgs() << "Alasing N2 threshold exhausted for "
2269  << *(MemLoc.Ptr) << "\n");
2270  return true;
2271  }
2272  N++;
2273  auto Res = AA->getModRefInfo(&I, MemLoc);
2274  if (isModSet(Res)) {
2275  LLVM_DEBUG(dbgs() << "Aliasing failed on " << I << " for "
2276  << *(MemLoc.Ptr) << "\n");
2277  return true;
2278  }
2279  }
2280  LLVM_DEBUG(dbgs() << "Aliasing okay for " << *(MemLoc.Ptr) << "\n");
2281  return false;
2282 }
2283 
2285  Loop *CurLoop,
2286  SinkAndHoistLICMFlags &Flags) {
2287  // For hoisting, use the walker to determine safety
2288  if (!Flags.IsSink) {
2290  // See declaration of SetLicmMssaOptCap for usage details.
2291  if (Flags.LicmMssaOptCounter >= Flags.LicmMssaOptCap)
2292  Source = MU->getDefiningAccess();
2293  else {
2294  Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(MU);
2295  Flags.LicmMssaOptCounter++;
2296  }
2297  return !MSSA->isLiveOnEntryDef(Source) &&
2298  CurLoop->contains(Source->getBlock());
2299  }
2300 
2301  // For sinking, we'd need to check all Defs below this use. The getClobbering
2302  // call will look on the backedge of the loop, but will check aliasing with
2303  // the instructions on the previous iteration.
2304  // For example:
2305  // for (i ... )
2306  // load a[i] ( Use (LoE)
2307  // store a[i] ( 1 = Def (2), with 2 = Phi for the loop.
2308  // i++;
2309  // The load sees no clobbering inside the loop, as the backedge alias check
2310  // does phi translation, and will check aliasing against store a[i-1].
2311  // However sinking the load outside the loop, below the store is incorrect.
2312 
2313  // For now, only sink if there are no Defs in the loop, and the existing ones
2314  // precede the use and are in the same block.
2315  // FIXME: Increase precision: Safe to sink if Use post dominates the Def;
2316  // needs PostDominatorTreeAnalysis.
2317  // FIXME: More precise: no Defs that alias this Use.
2318  if (Flags.NoOfMemAccTooLarge)
2319  return true;
2320  for (auto *BB : CurLoop->getBlocks())
2321  if (auto *Accesses = MSSA->getBlockDefs(BB))
2322  for (const auto &MA : *Accesses)
2323  if (const auto *MD = dyn_cast<MemoryDef>(&MA))
2324  if (MU->getBlock() != MD->getBlock() ||
2325  !MSSA->locallyDominates(MD, MU))
2326  return true;
2327  return false;
2328 }
2329 
2330 /// Little predicate that returns true if the specified basic block is in
2331 /// a subloop of the current one, not the current one itself.
2332 ///
2333 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI) {
2334  assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop");
2335  return LI->getLoopFor(BB) != CurLoop;
2336 }
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:80
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, AliasSetTracker *AST, MemorySSAUpdater *MSSAU)
Definition: LICM.cpp:1432
uint64_t CallInst * C
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:67
bool canSplitPredecessors() const
Definition: BasicBlock.cpp:378
void insertInstructionTo(const Instruction *Inst, const BasicBlock *BB)
Inform the safety info that we are planning to insert a new instruction Inst into the basic block BB...
Definition: MustExecute.cpp:90
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:641
void ReplaceInstWithInst(BasicBlock::InstListType &BIL, BasicBlock::iterator &BI, Instruction *I)
Replace the instruction specified by BI with the instruction specified by I.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
bool promoteLoopAccessesToScalars(const SmallSetVector< Value *, 8 > &, SmallVectorImpl< BasicBlock *> &, SmallVectorImpl< Instruction *> &, SmallVectorImpl< MemoryAccess *> &, PredIteratorCache &, LoopInfo *, DominatorTree *, const TargetLibraryInfo *, Loop *, AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *, OptimizationRemarkEmitter *)
Try to promote memory values to scalars by sinking stores out of the loop and moving loads to before ...
Definition: LICM.cpp:1867
static bool isTriviallyReplaceablePHI(const PHINode &PN, const Instruction &I)
Returns true if a PHINode is a trivially replaceable with an Instruction.
Definition: LICM.cpp:1289
Helper class for SSA formation on a set of values defined in multiple blocks.
Definition: SSAUpdater.h:38
Diagnostic information for missed-optimization remarks.
DILocation * get() const
Get the underlying DILocation.
Definition: DebugLoc.cpp:21
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
DiagnosticInfoOptimizationBase::Argument NV
bool hasDisableLICMTransformsHint(const Loop *L)
Look for the loop attribute that disables the LICM transformation heuristics.
Definition: LoopUtils.cpp:387
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:83
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:777
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:139
This class represents lattice values for constants.
Definition: AllocatorList.h:23
This header provides classes for managing a pipeline of passes over loops in LLVM IR...
bool dominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...
Definition: MemorySSA.cpp:2139
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Definition: MemorySSA.h:257
This is the interface for a simple mod/ref and alias analysis over globals.
const_iterator end() const
EltTy front() const
bool hasMetadataOtherThanDebugLoc() const
Return true if this instruction has metadata attached to it other than a debug location.
Definition: Instruction.h:228
void dropUnknownNonDebugMetadata(ArrayRef< unsigned > KnownIDs)
Drop all unknown metadata except for debug locations.
Definition: Metadata.cpp:1195
iterator end()
Definition: Function.h:687
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
static constexpr LocationSize unknown()
bool sinkRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *, TargetLibraryInfo *, TargetTransformInfo *, Loop *, AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *)
Walk the specified region of the CFG (defined by all blocks dominated by the specified block...
Definition: LICM.cpp:494
bool hasDedicatedExits() const
Return true if no exit block for the loop has a predecessor that is outside the loop.
Definition: LoopInfoImpl.h:85
bool isLCSSAForm(DominatorTree &DT) const
Return true if the Loop is in LCSSA form.
Definition: LoopInfo.cpp:448
const AccessList * getBlockAccesses(const BasicBlock *BB) const
Return the list of MemoryAccess&#39;s for a given basic block.
Definition: MemorySSA.h:758
void AddAvailableValue(BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value...
Definition: SSAUpdater.cpp:71
The main scalar evolution driver.
static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT, LoopInfo *LI, const Loop *CurLoop, LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU)
Definition: LICM.cpp:1490
void insertUse(MemoryUse *Use, bool RenameUses=false)
This class represents a function call, abstracting a target machine&#39;s calling convention.
This file contains the declarations for metadata subclasses.
void setOrdering(AtomicOrdering Ordering)
Sets the ordering constraint of this load instruction.
Definition: Instructions.h:258
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:160
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:637
bool salvageDebugInfo(Instruction &I)
Assuming the instruction I is going to be deleted, attempt to salvage debug users of I by writing the...
Definition: Local.cpp:1605
bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, MaybeAlign Alignment, const DataLayout &DL, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr)
Returns true if V is always a dereferenceable pointer with alignment greater or equal than requested...
Definition: Loads.cpp:138
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:743
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1165
BasicBlock * getSuccessor(unsigned i) const
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop, const TargetTransformInfo *TTI)
Return true if the instruction is free in the loop.
Definition: LICM.cpp:1298
STATISTIC(NumFunctions, "Total number of functions")
A debug info location.
Definition: DebugLoc.h:33
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:230
F(f)
An instruction for reading from memory.
Definition: Instructions.h:169
Hexagon Common GEP
Value * getCondition() const
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:624
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:144
iv Induction Variable Users
Definition: IVUsers.cpp:51
bool hoistRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *, TargetLibraryInfo *, Loop *, AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *)
Walk the specified region of the CFG (defined by all blocks dominated by the specified block...
Definition: LICM.cpp:795
void reserve(size_type N)
Definition: SmallVector.h:369
void setAlignment(MaybeAlign Align)
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
Definition: TinyPtrVector.h:30
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:32
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:299
op_iterator op_begin()
Definition: User.h:229
bool hasLoopInvariantOperands(const Instruction *I) const
Return true if all the operands of the specified instruction are loop invariant.
Definition: LoopInfo.cpp:67
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
Definition: LoopInfoImpl.h:681
static bool pointerInvalidatedByLoop(MemoryLocation MemLoc, AliasSetTracker *CurAST, Loop *CurLoop, AliasAnalysis *AA)
Definition: LICM.cpp:2237
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:273
Loop Invariant Code Motion
Definition: LICM.cpp:311
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1241
bool formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
Definition: LCSSA.cpp:385
Represents read-only accesses to memory.
Definition: MemorySSA.h:319
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:47
AnalysisUsage & addRequired()
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:140
ArrayRef< BasicBlock * > get(BasicBlock *BB)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:965
virtual void computeLoopSafetyInfo(const Loop *CurLoop)
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
Definition: MustExecute.cpp:76
This is the interface for a SCEV-based alias analysis.
MemorySSA * getMemorySSA() const
Get handle on MemorySSA.
Type * getPointerElementType() const
Definition: Type.h:381
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:928
This implementation of LoopSafetyInfo use ImplicitControlFlowTracking to give precise answers on "may...
Definition: MustExecute.h:130
bool hasAllowReciprocal() const
Determine whether the allow-reciprocal flag is set.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
virtual bool doFinalization(Module &)
doFinalization - Virtual method overriden by subclasses to do any necessary clean up after all passes...
Definition: Pass.h:109
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:703
uint32_t getTagID() const
Return the tag of this operand bundle as an integer.
Definition: InstrTypes.h:1028
const DefsList * getBlockDefs(const BasicBlock *BB) const
Return the list of MemoryDef&#39;s and MemoryPhi&#39;s for a given basic block.
Definition: MemorySSA.h:766
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:285
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:1183
bool remove(const value_type &X)
Remove an item from the set vector.
Definition: SetVector.h:157
LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:140
virtual bool anyBlockMayThrow() const
Returns true iff any block of the loop for which this info is contains an instruction that may throw ...
Definition: MustExecute.cpp:72
BlockT * getHeader() const
Definition: LoopInfo.h:105
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:102
FunctionModRefBehavior getModRefBehavior(const CallBase *Call)
Return the behavior of the given call site.
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:261
static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU, Loop *CurLoop, SinkAndHoistLICMFlags &Flags)
Definition: LICM.cpp:2284
INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false, false) INITIALIZE_PASS_END(LegacyLICMPass
Instruction * clone() const
Create a copy of &#39;this&#39; instruction that is identical in all ways except the following: ...
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:246
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref&#39;ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:720
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
Definition: LoopInfoImpl.h:235
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:454
PredIteratorCache - This class is an extremely trivial cache for predecessor iterator queries...
FunctionModRefBehavior
Summary of how a function affects memory in the program.
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:275
Instruction * getUniqueInstruction()
If this alias set is known to contain a single instruction and only a single unique instruction...
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:82
This class represents a no-op cast from one type to another.
Memory SSA
Definition: MemorySSA.cpp:65
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
An instruction for storing to memory.
Definition: Instructions.h:325
static cl::opt< bool > DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false), cl::desc("Disable memory promotion in LICM pass"))
Memory promotion is enabled by default.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:429
void copyColors(BasicBlock *New, BasicBlock *Old)
Copy colors of block Old into the block New.
Definition: MustExecute.cpp:34
iterator begin()
Definition: Function.h:685
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
Value * getOperand(unsigned i) const
Definition: User.h:169
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:210
void forgetLoopDispositions(const Loop *L)
Called when the client has changed the disposition of values in this loop.
Interval::succ_iterator succ_end(Interval *I)
Definition: Interval.h:105
bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal=false)
Checks whether the given location points to constant memory, or if OrLocal is true whether it points ...
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:881
void getAAMetadata(AAMDNodes &N, bool Merge=false) const
Fills the AAMDNodes structure with AA metadata from this instruction.
NodeT * getBlock() const
static Instruction * sinkThroughTriviallyReplaceablePHI(PHINode *TPN, Instruction *I, LoopInfo *LI, SmallDenseMap< BasicBlock *, Instruction *, 32 > &SunkCopies, const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop, MemorySSAUpdater *MSSAU)
Definition: LICM.cpp:1454
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock *> Preds, const char *Suffix, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
virtual bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
static cl::opt< int > LICMN2Theshold("licm-n2-threshold", cl::Hidden, cl::init(0), cl::desc("How many instruction to cross product using AA"))
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:196
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:154
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:223
virtual bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const =0
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:328
unsigned const MachineRegisterInfo * MRI
MemorySSAWalker * getSkipSelfWalker()
Definition: MemorySSA.cpp:1576
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
Definition: SetOperations.h:39
void setAAMetadata(const AAMDNodes &N)
Sets the metadata on this instruction from the AAMDNodes structure.
Definition: Metadata.cpp:1261
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
Conditional or Unconditional Branch instruction.
size_t size(BasicBlock *BB) const
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node&#39;s...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:91
void setAlignment(MaybeAlign Align)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
const Instruction & front() const
Definition: BasicBlock.h:285
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
bool mayHaveSideEffects() const
Return true if the instruction may have side effects.
Definition: Instruction.h:582
Expected to fold away in lowering.
Constant * ConstantFoldInstruction(Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstruction - Try to constant fold the specified instruction.
Diagnostic information for applied optimization remarks.
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:112
Pass * createLICMPass()
Definition: LICM.cpp:314
Represent the analysis usage information of a pass.
op_iterator op_end()
Definition: User.h:231
cl::opt< unsigned > SetLicmMssaOptCap
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1172
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition: LICM.cpp:277
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
AliasSet & getAliasSetFor(const MemoryLocation &MemLoc)
Return the alias set which contains the specified memory location.
constexpr double e
Definition: MathExtras.h:57
Interval::pred_iterator pred_end(Interval *I)
Definition: Interval.h:115
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:99
static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo)
Definition: LICM.cpp:1472
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
auto find_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1193
bool isMod() const
bool onlyReadsMemory(const CallBase *Call)
Checks if the specified call is known to only read from non-volatile memory (or not access memory at ...
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1446
MemoryAccess * createMemoryAccessInBB(Instruction *I, MemoryAccess *Definition, const BasicBlock *BB, MemorySSA::InsertionPlace Point)
Create a MemoryAccess in MemorySSA at a specified point in a block, with a specified clobbering defin...
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:160
static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT, Loop *CurLoop)
Definition: LICM.cpp:981
size_t size() const
Definition: SmallVector.h:52
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Value * GetUnderlyingObject(Value *V, const DataLayout &DL, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value...
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
This function does not perform any non-local loads or stores to memory.
void moveToPlace(MemoryUseOrDef *What, BasicBlock *BB, MemorySSA::InsertionPlace Where)
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:61
static cl::opt< uint32_t > MaxNumUsesTraversed("licm-max-num-uses-traversed", cl::Hidden, cl::init(8), cl::desc("Max num uses visited for identifying load " "invariance in loop using invariant start (default = 8)"))
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:50
const Value * Ptr
The address of the start of the location.
Representation for a specific memory location.
size_type size() const
Definition: SmallPtrSet.h:92
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:338
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:115
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:297
static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE)
When an instruction is found to only use loop invariant operands that is safe to hoist, this instruction is called to do the dirty work.
Definition: LICM.cpp:1663
Iterator for intrusive lists based on ilist_node.
void removeInstruction(const Instruction *Inst)
Inform safety info that we are planning to remove the instruction Inst from its block.
Definition: MustExecute.cpp:96
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
bool doesNotWriteMemoryBefore(const BasicBlock *BB, const Loop *CurLoop) const
Returns true if we could not execute a memory-modifying instruction before we enter BB under assumpti...
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file. ...
#define DEBUG_TYPE
Definition: LICM.cpp:79
BlockVerifier::State From
static cl::opt< bool > ControlFlowHoisting("licm-control-flow-hoisting", cl::Hidden, cl::init(false), cl::desc("Enable control flow (and PHI) hoisting in LICM"))
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:390
void verifyMemorySSA() const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition: MemorySSA.cpp:1870
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false...
Definition: SmallPtrSet.h:377
iterator end()
Definition: BasicBlock.h:275
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:1161
This struct is a compact representation of a valid (power of two) or undefined (0) alignment...
Definition: Alignment.h:117
Helper class for promoting a collection of loads and stores into SSA Form using the SSAUpdater...
Definition: SSAUpdater.h:136
void copyValue(Value *From, Value *To)
This method should be used whenever a preexisting value in the program is copied or cloned...
bool locallyDominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in the same basic block, determine whether MemoryAccess A dominates MemoryA...
Definition: MemorySSA.cpp:2108
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:248
Provides information about what library functions are available for the current target.
iterator begin() const
Definition: LoopInfo.h:147
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:755
void wireOldPredecessorsToNewImmediatePredecessor(BasicBlock *Old, BasicBlock *New, ArrayRef< BasicBlock *> Preds, bool IdenticalEdgesWereMerged=true)
A new empty BasicBlock (New) now branches directly to Old.
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:643
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:926
BasicBlock * getBlock() const
Definition: MemorySSA.h:159
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
bool isConditional() const
static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, TargetTransformInfo *TTI, bool &FreeInLoop)
Return true if the only users of this instruction are outside of the loop.
Definition: LICM.cpp:1326
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:124
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
Definition: Constants.cpp:716
unsigned getNumIncomingValues() const
Return the number of incoming edges.
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental.guard intrinsic.
Definition: GuardUtils.cpp:17
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:55
Class that has the common methods + fields of memory uses/defs.
Definition: MemorySSA.h:247
A lightweight accessor for an operand bundle meant to be passed around by value.
Definition: InstrTypes.h:1000
iterator_range< user_iterator > users()
Definition: Value.h:420
static bool isSafeToExecuteUnconditionally(Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE, const Instruction *CtxI=nullptr)
Only sink or hoist an instruction if it is not a trapping instruction, or if the instruction is known...
Definition: LICM.cpp:1706
LoopT * getParentLoop() const
Definition: LoopInfo.h:106
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
Definition: LoopInfo.h:136
LLVM_NODISCARD bool isModSet(const ModRefInfo MRI)
void setOrdering(AtomicOrdering Ordering)
Sets the ordering constraint of this store instruction.
Definition: Instructions.h:384
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
static bool onlyAccessesArgPointees(FunctionModRefBehavior MRB)
Checks if functions with the specified behavior are known to read and write at most from objects poin...
iterator insert(iterator where, pointer New)
Definition: ilist.h:226
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:331
cl::opt< unsigned > SetLicmMssaNoAccForPromotionCap
iterator begin() const
Definition: SmallPtrSet.h:396
int getUserCost(const User *U, ArrayRef< const Value *> Operands) const
Estimate the cost of a given IR user when lowered.
cl::opt< bool > EnableMSSALoopDependency
Enables memory ssa as a dependency for loop passes.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:509
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:154
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
unsigned pred_size(const BasicBlock *BB)
Get the number of predecessors of BB.
Definition: CFG.h:121
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:72
user_iterator_impl< User > user_iterator
Definition: Value.h:389
bool isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast=false)
Tests if a value is a call or invoke to a library function that allocates memory (either malloc...
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass&#39;s AnalysisUsage.
Definition: LoopUtils.cpp:138
licm
Definition: LICM.cpp:311
MemoryUseOrDef * createMemoryAccessAfter(Instruction *I, MemoryAccess *Definition, MemoryAccess *InsertPt)
iterator end() const
Definition: LoopInfo.h:148
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Definition: MemorySSA.h:738
Captures loop safety information.
Definition: MustExecute.h:54
const Value * getPointerOperand(const Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
const_iterator begin() const
Wrapper class to LoopBlocksDFS that provides a standard begin()/end() interface for the DFS reverse p...
Definition: LoopIterator.h:172
SmallVector< DomTreeNode *, 16 > collectChildrenInLoop(DomTreeNode *N, const Loop *CurLoop)
Does a BFS from a given node to all of its children inside a given loop.
Definition: LoopUtils.cpp:488
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:295
MemoryAccess * getClobberingMemoryAccess(const Instruction *I)
Given a memory Mod/Ref/ModRef&#39;ing instruction, calling this will give you the nearest dominating Memo...
Definition: MemorySSA.h:1025
static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI)
Little predicate that returns true if the specified basic block is in a subloop of the current one...
Definition: LICM.cpp:2333
void removeMemoryAccess(MemoryAccess *, bool OptimizePhis=false)
Remove a MemoryAccess from MemorySSA, including updating all definitions and uses.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
Definition: Value.h:396
bool isSafeToSpeculativelyExecute(const Value *V, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
void insertDef(MemoryDef *Def, bool RenameUses=false)
Insert a definition into the MemorySSA IR.
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction has no side ef...
Definition: Local.cpp:359
LLVM Value Representation.
Definition: Value.h:74
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:86
void initializeLegacyLICMPassPass(PassRegistry &)
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:593
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU, bool TargetExecutesOncePerLoop, SinkAndHoistLICMFlags *LICMFlags=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Returns true if is legal to hoist or sink this instruction disregarding the possible introduction of ...
Definition: LICM.cpp:1080
The legacy pass manager&#39;s analysis pass to compute loop information.
Definition: LoopInfo.h:1208
void getUniqueExitBlocks(SmallVectorImpl< BlockT *> &ExitBlocks) const
Return all unique successor blocks of this loop.
Definition: LoopInfoImpl.h:115
This is the interface for LLVM&#39;s primary stateless and local alias analysis.
static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE)
When an instruction is found to only be used outside of the loop, this function moves it to the exit ...
Definition: LICM.cpp:1562
A container for analyses that lazily runs them and caches their results.
static Instruction * CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI, const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU)
Definition: LICM.cpp:1358
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:259
LLVM_NODISCARD bool isModOrRefSet(const ModRefInfo MRI)
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopIterator.h:180
ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc)
getModRefInfo (for call sites) - Return information about whether a particular call site modifies or ...
#define LLVM_DEBUG(X)
Definition: Debug.h:122
op_range incoming_values()
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:161
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:178
The optimization diagnostic interface.
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
Definition: BasicBlock.cpp:127
bool use_empty() const
Definition: Value.h:343
unsigned size() const
void deleteValue(Value *PtrVal)
This method is used to remove a pointer value from the AliasSetTracker entirely.
loop versioning Loop Versioning For LICM
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:43
const BasicBlock * getParent() const
Definition: Instruction.h:66
static void moveInstructionBefore(Instruction &I, Instruction &Dest, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater *MSSAU)
Definition: LICM.cpp:1442
bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures, unsigned MaxUsesToExplore=DefaultMaxUsesToExplore)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
user_iterator user_end()
Definition: Value.h:404
bool mayReadOrWriteMemory() const
Return true if this instruction may read or write memory.
Definition: Instruction.h:542
const DenseMap< BasicBlock *, ColorVector > & getBlockColors() const
Returns block colors map that is used to update funclet operand bundles.
Definition: MustExecute.cpp:30