LLVM  8.0.0svn
EarlyCSE.cpp
Go to the documentation of this file.
1 //===- EarlyCSE.cpp - Simple and fast CSE pass ----------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass performs a simple dominator tree walk that eliminates trivially
11 // redundant instructions.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/DenseMapInfo.h"
17 #include "llvm/ADT/Hashing.h"
18 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SetVector.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/Statistic.h"
32 #include "llvm/IR/BasicBlock.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/DataLayout.h"
35 #include "llvm/IR/Dominators.h"
36 #include "llvm/IR/Function.h"
37 #include "llvm/IR/InstrTypes.h"
38 #include "llvm/IR/Instruction.h"
39 #include "llvm/IR/Instructions.h"
40 #include "llvm/IR/IntrinsicInst.h"
41 #include "llvm/IR/Intrinsics.h"
42 #include "llvm/IR/LLVMContext.h"
43 #include "llvm/IR/PassManager.h"
44 #include "llvm/IR/PatternMatch.h"
45 #include "llvm/IR/Type.h"
46 #include "llvm/IR/Use.h"
47 #include "llvm/IR/Value.h"
48 #include "llvm/Pass.h"
49 #include "llvm/Support/Allocator.h"
51 #include "llvm/Support/Casting.h"
52 #include "llvm/Support/Debug.h"
56 #include "llvm/Transforms/Scalar.h"
57 #include <cassert>
58 #include <deque>
59 #include <memory>
60 #include <utility>
61 
62 using namespace llvm;
63 using namespace llvm::PatternMatch;
64 
65 #define DEBUG_TYPE "early-cse"
66 
67 STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd");
68 STATISTIC(NumCSE, "Number of instructions CSE'd");
69 STATISTIC(NumCSECVP, "Number of compare instructions CVP'd");
70 STATISTIC(NumCSELoad, "Number of load instructions CSE'd");
71 STATISTIC(NumCSECall, "Number of call instructions CSE'd");
72 STATISTIC(NumDSE, "Number of trivial dead stores removed");
73 
74 DEBUG_COUNTER(CSECounter, "early-cse",
75  "Controls which instructions are removed");
76 
77 //===----------------------------------------------------------------------===//
78 // SimpleValue
79 //===----------------------------------------------------------------------===//
80 
81 namespace {
82 
83 /// Struct representing the available values in the scoped hash table.
84 struct SimpleValue {
85  Instruction *Inst;
86 
87  SimpleValue(Instruction *I) : Inst(I) {
88  assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
89  }
90 
91  bool isSentinel() const {
94  }
95 
96  static bool canHandle(Instruction *Inst) {
97  // This can only handle non-void readnone functions.
98  if (CallInst *CI = dyn_cast<CallInst>(Inst))
99  return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy();
100  return isa<CastInst>(Inst) || isa<BinaryOperator>(Inst) ||
101  isa<GetElementPtrInst>(Inst) || isa<CmpInst>(Inst) ||
102  isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) ||
103  isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) ||
104  isa<ExtractValueInst>(Inst) || isa<InsertValueInst>(Inst);
105  }
106 };
107 
108 } // end anonymous namespace
109 
110 namespace llvm {
111 
112 template <> struct DenseMapInfo<SimpleValue> {
113  static inline SimpleValue getEmptyKey() {
115  }
116 
117  static inline SimpleValue getTombstoneKey() {
119  }
120 
121  static unsigned getHashValue(SimpleValue Val);
122  static bool isEqual(SimpleValue LHS, SimpleValue RHS);
123 };
124 
125 } // end namespace llvm
126 
127 unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
128  Instruction *Inst = Val.Inst;
129  // Hash in all of the operands as pointers.
130  if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst)) {
131  Value *LHS = BinOp->getOperand(0);
132  Value *RHS = BinOp->getOperand(1);
133  if (BinOp->isCommutative() && BinOp->getOperand(0) > BinOp->getOperand(1))
134  std::swap(LHS, RHS);
135 
136  return hash_combine(BinOp->getOpcode(), LHS, RHS);
137  }
138 
139  if (CmpInst *CI = dyn_cast<CmpInst>(Inst)) {
140  Value *LHS = CI->getOperand(0);
141  Value *RHS = CI->getOperand(1);
142  CmpInst::Predicate Pred = CI->getPredicate();
143  if (Inst->getOperand(0) > Inst->getOperand(1)) {
144  std::swap(LHS, RHS);
145  Pred = CI->getSwappedPredicate();
146  }
147  return hash_combine(Inst->getOpcode(), Pred, LHS, RHS);
148  }
149 
150  // Hash min/max/abs (cmp + select) to allow for commuted operands.
151  // Min/max may also have non-canonical compare predicate (eg, the compare for
152  // smin may use 'sgt' rather than 'slt'), and non-canonical operands in the
153  // compare.
154  Value *A, *B;
156  // TODO: We should also detect FP min/max.
157  if (SPF == SPF_SMIN || SPF == SPF_SMAX ||
158  SPF == SPF_UMIN || SPF == SPF_UMAX) {
159  if (A > B)
160  std::swap(A, B);
161  return hash_combine(Inst->getOpcode(), SPF, A, B);
162  }
163  if (SPF == SPF_ABS || SPF == SPF_NABS) {
164  // ABS/NABS always puts the input in A and its negation in B.
165  return hash_combine(Inst->getOpcode(), SPF, A, B);
166  }
167 
168  if (CastInst *CI = dyn_cast<CastInst>(Inst))
169  return hash_combine(CI->getOpcode(), CI->getType(), CI->getOperand(0));
170 
171  if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Inst))
172  return hash_combine(EVI->getOpcode(), EVI->getOperand(0),
173  hash_combine_range(EVI->idx_begin(), EVI->idx_end()));
174 
175  if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(Inst))
176  return hash_combine(IVI->getOpcode(), IVI->getOperand(0),
177  IVI->getOperand(1),
178  hash_combine_range(IVI->idx_begin(), IVI->idx_end()));
179 
180  assert((isa<CallInst>(Inst) || isa<BinaryOperator>(Inst) ||
181  isa<GetElementPtrInst>(Inst) || isa<SelectInst>(Inst) ||
182  isa<ExtractElementInst>(Inst) || isa<InsertElementInst>(Inst) ||
183  isa<ShuffleVectorInst>(Inst)) &&
184  "Invalid/unknown instruction");
185 
186  // Mix in the opcode.
187  return hash_combine(
188  Inst->getOpcode(),
190 }
191 
192 bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
193  Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
194 
195  if (LHS.isSentinel() || RHS.isSentinel())
196  return LHSI == RHSI;
197 
198  if (LHSI->getOpcode() != RHSI->getOpcode())
199  return false;
200  if (LHSI->isIdenticalToWhenDefined(RHSI))
201  return true;
202 
203  // If we're not strictly identical, we still might be a commutable instruction
204  if (BinaryOperator *LHSBinOp = dyn_cast<BinaryOperator>(LHSI)) {
205  if (!LHSBinOp->isCommutative())
206  return false;
207 
208  assert(isa<BinaryOperator>(RHSI) &&
209  "same opcode, but different instruction type?");
210  BinaryOperator *RHSBinOp = cast<BinaryOperator>(RHSI);
211 
212  // Commuted equality
213  return LHSBinOp->getOperand(0) == RHSBinOp->getOperand(1) &&
214  LHSBinOp->getOperand(1) == RHSBinOp->getOperand(0);
215  }
216  if (CmpInst *LHSCmp = dyn_cast<CmpInst>(LHSI)) {
217  assert(isa<CmpInst>(RHSI) &&
218  "same opcode, but different instruction type?");
219  CmpInst *RHSCmp = cast<CmpInst>(RHSI);
220  // Commuted equality
221  return LHSCmp->getOperand(0) == RHSCmp->getOperand(1) &&
222  LHSCmp->getOperand(1) == RHSCmp->getOperand(0) &&
223  LHSCmp->getSwappedPredicate() == RHSCmp->getPredicate();
224  }
225 
226  // Min/max/abs can occur with commuted operands, non-canonical predicates,
227  // and/or non-canonical operands.
228  Value *LHSA, *LHSB;
229  SelectPatternFlavor LSPF = matchSelectPattern(LHSI, LHSA, LHSB).Flavor;
230  // TODO: We should also detect FP min/max.
231  if (LSPF == SPF_SMIN || LSPF == SPF_SMAX ||
232  LSPF == SPF_UMIN || LSPF == SPF_UMAX ||
233  LSPF == SPF_ABS || LSPF == SPF_NABS) {
234  Value *RHSA, *RHSB;
235  SelectPatternFlavor RSPF = matchSelectPattern(RHSI, RHSA, RHSB).Flavor;
236  if (LSPF == RSPF) {
237  // Abs results are placed in a defined order by matchSelectPattern.
238  if (LSPF == SPF_ABS || LSPF == SPF_NABS)
239  return LHSA == RHSA && LHSB == RHSB;
240  return ((LHSA == RHSA && LHSB == RHSB) ||
241  (LHSA == RHSB && LHSB == RHSA));
242  }
243  }
244 
245  return false;
246 }
247 
248 //===----------------------------------------------------------------------===//
249 // CallValue
250 //===----------------------------------------------------------------------===//
251 
252 namespace {
253 
254 /// Struct representing the available call values in the scoped hash
255 /// table.
256 struct CallValue {
257  Instruction *Inst;
258 
259  CallValue(Instruction *I) : Inst(I) {
260  assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
261  }
262 
263  bool isSentinel() const {
264  return Inst == DenseMapInfo<Instruction *>::getEmptyKey() ||
266  }
267 
268  static bool canHandle(Instruction *Inst) {
269  // Don't value number anything that returns void.
270  if (Inst->getType()->isVoidTy())
271  return false;
272 
273  CallInst *CI = dyn_cast<CallInst>(Inst);
274  if (!CI || !CI->onlyReadsMemory())
275  return false;
276  return true;
277  }
278 };
279 
280 } // end anonymous namespace
281 
282 namespace llvm {
283 
284 template <> struct DenseMapInfo<CallValue> {
285  static inline CallValue getEmptyKey() {
287  }
288 
289  static inline CallValue getTombstoneKey() {
291  }
292 
293  static unsigned getHashValue(CallValue Val);
294  static bool isEqual(CallValue LHS, CallValue RHS);
295 };
296 
297 } // end namespace llvm
298 
299 unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {
300  Instruction *Inst = Val.Inst;
301  // Hash all of the operands as pointers and mix in the opcode.
302  return hash_combine(
303  Inst->getOpcode(),
305 }
306 
307 bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
308  Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
309  if (LHS.isSentinel() || RHS.isSentinel())
310  return LHSI == RHSI;
311  return LHSI->isIdenticalTo(RHSI);
312 }
313 
314 //===----------------------------------------------------------------------===//
315 // EarlyCSE implementation
316 //===----------------------------------------------------------------------===//
317 
318 namespace {
319 
320 /// A simple and fast domtree-based CSE pass.
321 ///
322 /// This pass does a simple depth-first walk over the dominator tree,
323 /// eliminating trivially redundant instructions and using instsimplify to
324 /// canonicalize things as it goes. It is intended to be fast and catch obvious
325 /// cases so that instcombine and other passes are more effective. It is
326 /// expected that a later pass of GVN will catch the interesting/hard cases.
327 class EarlyCSE {
328 public:
329  const TargetLibraryInfo &TLI;
330  const TargetTransformInfo &TTI;
331  DominatorTree &DT;
332  AssumptionCache &AC;
333  const SimplifyQuery SQ;
334  MemorySSA *MSSA;
335  std::unique_ptr<MemorySSAUpdater> MSSAUpdater;
336 
337  using AllocatorTy =
340  using ScopedHTType =
342  AllocatorTy>;
343 
344  /// A scoped hash table of the current values of all of our simple
345  /// scalar expressions.
346  ///
347  /// As we walk down the domtree, we look to see if instructions are in this:
348  /// if so, we replace them with what we find, otherwise we insert them so
349  /// that dominated values can succeed in their lookup.
350  ScopedHTType AvailableValues;
351 
352  /// A scoped hash table of the current values of previously encountered
353  /// memory locations.
354  ///
355  /// This allows us to get efficient access to dominating loads or stores when
356  /// we have a fully redundant load. In addition to the most recent load, we
357  /// keep track of a generation count of the read, which is compared against
358  /// the current generation count. The current generation count is incremented
359  /// after every possibly writing memory operation, which ensures that we only
360  /// CSE loads with other loads that have no intervening store. Ordering
361  /// events (such as fences or atomic instructions) increment the generation
362  /// count as well; essentially, we model these as writes to all possible
363  /// locations. Note that atomic and/or volatile loads and stores can be
364  /// present the table; it is the responsibility of the consumer to inspect
365  /// the atomicity/volatility if needed.
366  struct LoadValue {
367  Instruction *DefInst = nullptr;
368  unsigned Generation = 0;
369  int MatchingId = -1;
370  bool IsAtomic = false;
371 
372  LoadValue() = default;
373  LoadValue(Instruction *Inst, unsigned Generation, unsigned MatchingId,
374  bool IsAtomic)
375  : DefInst(Inst), Generation(Generation), MatchingId(MatchingId),
376  IsAtomic(IsAtomic) {}
377  };
378 
379  using LoadMapAllocator =
382  using LoadHTType =
384  LoadMapAllocator>;
385 
386  LoadHTType AvailableLoads;
387 
388  // A scoped hash table mapping memory locations (represented as typed
389  // addresses) to generation numbers at which that memory location became
390  // (henceforth indefinitely) invariant.
391  using InvariantMapAllocator =
394  using InvariantHTType =
396  InvariantMapAllocator>;
397  InvariantHTType AvailableInvariants;
398 
399  /// A scoped hash table of the current values of read-only call
400  /// values.
401  ///
402  /// It uses the same generation count as loads.
403  using CallHTType =
405  CallHTType AvailableCalls;
406 
407  /// This is the current generation of the memory value.
408  unsigned CurrentGeneration = 0;
409 
410  /// Set up the EarlyCSE runner for a particular function.
411  EarlyCSE(const DataLayout &DL, const TargetLibraryInfo &TLI,
412  const TargetTransformInfo &TTI, DominatorTree &DT,
413  AssumptionCache &AC, MemorySSA *MSSA)
414  : TLI(TLI), TTI(TTI), DT(DT), AC(AC), SQ(DL, &TLI, &DT, &AC), MSSA(MSSA),
415  MSSAUpdater(llvm::make_unique<MemorySSAUpdater>(MSSA)) {}
416 
417  bool run();
418 
419 private:
420  // Almost a POD, but needs to call the constructors for the scoped hash
421  // tables so that a new scope gets pushed on. These are RAII so that the
422  // scope gets popped when the NodeScope is destroyed.
423  class NodeScope {
424  public:
425  NodeScope(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
426  InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls)
427  : Scope(AvailableValues), LoadScope(AvailableLoads),
428  InvariantScope(AvailableInvariants), CallScope(AvailableCalls) {}
429  NodeScope(const NodeScope &) = delete;
430  NodeScope &operator=(const NodeScope &) = delete;
431 
432  private:
433  ScopedHTType::ScopeTy Scope;
434  LoadHTType::ScopeTy LoadScope;
435  InvariantHTType::ScopeTy InvariantScope;
436  CallHTType::ScopeTy CallScope;
437  };
438 
439  // Contains all the needed information to create a stack for doing a depth
440  // first traversal of the tree. This includes scopes for values, loads, and
441  // calls as well as the generation. There is a child iterator so that the
442  // children do not need to be store separately.
443  class StackNode {
444  public:
445  StackNode(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
446  InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls,
447  unsigned cg, DomTreeNode *n, DomTreeNode::iterator child,
449  : CurrentGeneration(cg), ChildGeneration(cg), Node(n), ChildIter(child),
450  EndIter(end),
451  Scopes(AvailableValues, AvailableLoads, AvailableInvariants,
452  AvailableCalls)
453  {}
454  StackNode(const StackNode &) = delete;
455  StackNode &operator=(const StackNode &) = delete;
456 
457  // Accessors.
458  unsigned currentGeneration() { return CurrentGeneration; }
459  unsigned childGeneration() { return ChildGeneration; }
460  void childGeneration(unsigned generation) { ChildGeneration = generation; }
461  DomTreeNode *node() { return Node; }
462  DomTreeNode::iterator childIter() { return ChildIter; }
463 
464  DomTreeNode *nextChild() {
465  DomTreeNode *child = *ChildIter;
466  ++ChildIter;
467  return child;
468  }
469 
470  DomTreeNode::iterator end() { return EndIter; }
471  bool isProcessed() { return Processed; }
472  void process() { Processed = true; }
473 
474  private:
475  unsigned CurrentGeneration;
476  unsigned ChildGeneration;
477  DomTreeNode *Node;
478  DomTreeNode::iterator ChildIter;
479  DomTreeNode::iterator EndIter;
480  NodeScope Scopes;
481  bool Processed = false;
482  };
483 
484  /// Wrapper class to handle memory instructions, including loads,
485  /// stores and intrinsic loads and stores defined by the target.
486  class ParseMemoryInst {
487  public:
488  ParseMemoryInst(Instruction *Inst, const TargetTransformInfo &TTI)
489  : Inst(Inst) {
490  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
491  if (TTI.getTgtMemIntrinsic(II, Info))
492  IsTargetMemInst = true;
493  }
494 
495  bool isLoad() const {
496  if (IsTargetMemInst) return Info.ReadMem;
497  return isa<LoadInst>(Inst);
498  }
499 
500  bool isStore() const {
501  if (IsTargetMemInst) return Info.WriteMem;
502  return isa<StoreInst>(Inst);
503  }
504 
505  bool isAtomic() const {
506  if (IsTargetMemInst)
507  return Info.Ordering != AtomicOrdering::NotAtomic;
508  return Inst->isAtomic();
509  }
510 
511  bool isUnordered() const {
512  if (IsTargetMemInst)
513  return Info.isUnordered();
514 
515  if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
516  return LI->isUnordered();
517  } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
518  return SI->isUnordered();
519  }
520  // Conservative answer
521  return !Inst->isAtomic();
522  }
523 
524  bool isVolatile() const {
525  if (IsTargetMemInst)
526  return Info.IsVolatile;
527 
528  if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
529  return LI->isVolatile();
530  } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
531  return SI->isVolatile();
532  }
533  // Conservative answer
534  return true;
535  }
536 
537  bool isInvariantLoad() const {
538  if (auto *LI = dyn_cast<LoadInst>(Inst))
539  return LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr;
540  return false;
541  }
542 
543  bool isMatchingMemLoc(const ParseMemoryInst &Inst) const {
544  return (getPointerOperand() == Inst.getPointerOperand() &&
545  getMatchingId() == Inst.getMatchingId());
546  }
547 
548  bool isValid() const { return getPointerOperand() != nullptr; }
549 
550  // For regular (non-intrinsic) loads/stores, this is set to -1. For
551  // intrinsic loads/stores, the id is retrieved from the corresponding
552  // field in the MemIntrinsicInfo structure. That field contains
553  // non-negative values only.
554  int getMatchingId() const {
555  if (IsTargetMemInst) return Info.MatchingId;
556  return -1;
557  }
558 
559  Value *getPointerOperand() const {
560  if (IsTargetMemInst) return Info.PtrVal;
561  return getLoadStorePointerOperand(Inst);
562  }
563 
564  bool mayReadFromMemory() const {
565  if (IsTargetMemInst) return Info.ReadMem;
566  return Inst->mayReadFromMemory();
567  }
568 
569  bool mayWriteToMemory() const {
570  if (IsTargetMemInst) return Info.WriteMem;
571  return Inst->mayWriteToMemory();
572  }
573 
574  private:
575  bool IsTargetMemInst = false;
576  MemIntrinsicInfo Info;
577  Instruction *Inst;
578  };
579 
580  bool processNode(DomTreeNode *Node);
581 
582  bool handleBranchCondition(Instruction *CondInst, const BranchInst *BI,
583  const BasicBlock *BB, const BasicBlock *Pred);
584 
585  Value *getOrCreateResult(Value *Inst, Type *ExpectedType) const {
586  if (auto *LI = dyn_cast<LoadInst>(Inst))
587  return LI;
588  if (auto *SI = dyn_cast<StoreInst>(Inst))
589  return SI->getValueOperand();
590  assert(isa<IntrinsicInst>(Inst) && "Instruction not supported");
591  return TTI.getOrCreateResultFromMemIntrinsic(cast<IntrinsicInst>(Inst),
592  ExpectedType);
593  }
594 
595  /// Return true if the instruction is known to only operate on memory
596  /// provably invariant in the given "generation".
597  bool isOperatingOnInvariantMemAt(Instruction *I, unsigned GenAt);
598 
599  bool isSameMemGeneration(unsigned EarlierGeneration, unsigned LaterGeneration,
600  Instruction *EarlierInst, Instruction *LaterInst);
601 
602  void removeMSSA(Instruction *Inst) {
603  if (!MSSA)
604  return;
605  // Removing a store here can leave MemorySSA in an unoptimized state by
606  // creating MemoryPhis that have identical arguments and by creating
607  // MemoryUses whose defining access is not an actual clobber. We handle the
608  // phi case eagerly here. The non-optimized MemoryUse case is lazily
609  // updated by MemorySSA getClobberingMemoryAccess.
610  if (MemoryAccess *MA = MSSA->getMemoryAccess(Inst)) {
611  // Optimize MemoryPhi nodes that may become redundant by having all the
612  // same input values once MA is removed.
613  SmallSetVector<MemoryPhi *, 4> PhisToCheck;
615  WorkQueue.push_back(MA);
616  // Process MemoryPhi nodes in FIFO order using a ever-growing vector since
617  // we shouldn't be processing that many phis and this will avoid an
618  // allocation in almost all cases.
619  for (unsigned I = 0; I < WorkQueue.size(); ++I) {
620  MemoryAccess *WI = WorkQueue[I];
621 
622  for (auto *U : WI->users())
623  if (MemoryPhi *MP = dyn_cast<MemoryPhi>(U))
624  PhisToCheck.insert(MP);
625 
626  MSSAUpdater->removeMemoryAccess(WI);
627 
628  for (MemoryPhi *MP : PhisToCheck) {
629  MemoryAccess *FirstIn = MP->getIncomingValue(0);
630  if (llvm::all_of(MP->incoming_values(),
631  [=](Use &In) { return In == FirstIn; }))
632  WorkQueue.push_back(MP);
633  }
634  PhisToCheck.clear();
635  }
636  }
637  }
638 };
639 
640 } // end anonymous namespace
641 
642 /// Determine if the memory referenced by LaterInst is from the same heap
643 /// version as EarlierInst.
644 /// This is currently called in two scenarios:
645 ///
646 /// load p
647 /// ...
648 /// load p
649 ///
650 /// and
651 ///
652 /// x = load p
653 /// ...
654 /// store x, p
655 ///
656 /// in both cases we want to verify that there are no possible writes to the
657 /// memory referenced by p between the earlier and later instruction.
658 bool EarlyCSE::isSameMemGeneration(unsigned EarlierGeneration,
659  unsigned LaterGeneration,
660  Instruction *EarlierInst,
661  Instruction *LaterInst) {
662  // Check the simple memory generation tracking first.
663  if (EarlierGeneration == LaterGeneration)
664  return true;
665 
666  if (!MSSA)
667  return false;
668 
669  // If MemorySSA has determined that one of EarlierInst or LaterInst does not
670  // read/write memory, then we can safely return true here.
671  // FIXME: We could be more aggressive when checking doesNotAccessMemory(),
672  // onlyReadsMemory(), mayReadFromMemory(), and mayWriteToMemory() in this pass
673  // by also checking the MemorySSA MemoryAccess on the instruction. Initial
674  // experiments suggest this isn't worthwhile, at least for C/C++ code compiled
675  // with the default optimization pipeline.
676  auto *EarlierMA = MSSA->getMemoryAccess(EarlierInst);
677  if (!EarlierMA)
678  return true;
679  auto *LaterMA = MSSA->getMemoryAccess(LaterInst);
680  if (!LaterMA)
681  return true;
682 
683  // Since we know LaterDef dominates LaterInst and EarlierInst dominates
684  // LaterInst, if LaterDef dominates EarlierInst then it can't occur between
685  // EarlierInst and LaterInst and neither can any other write that potentially
686  // clobbers LaterInst.
687  MemoryAccess *LaterDef =
688  MSSA->getWalker()->getClobberingMemoryAccess(LaterInst);
689  return MSSA->dominates(LaterDef, EarlierMA);
690 }
691 
692 bool EarlyCSE::isOperatingOnInvariantMemAt(Instruction *I, unsigned GenAt) {
693  // A location loaded from with an invariant_load is assumed to *never* change
694  // within the visible scope of the compilation.
695  if (auto *LI = dyn_cast<LoadInst>(I))
696  if (LI->getMetadata(LLVMContext::MD_invariant_load))
697  return true;
698 
699  auto MemLocOpt = MemoryLocation::getOrNone(I);
700  if (!MemLocOpt)
701  // "target" intrinsic forms of loads aren't currently known to
702  // MemoryLocation::get. TODO
703  return false;
704  MemoryLocation MemLoc = *MemLocOpt;
705  if (!AvailableInvariants.count(MemLoc))
706  return false;
707 
708  // Is the generation at which this became invariant older than the
709  // current one?
710  return AvailableInvariants.lookup(MemLoc) <= GenAt;
711 }
712 
713 bool EarlyCSE::handleBranchCondition(Instruction *CondInst,
714  const BranchInst *BI, const BasicBlock *BB,
715  const BasicBlock *Pred) {
716  assert(BI->isConditional() && "Should be a conditional branch!");
717  assert(BI->getCondition() == CondInst && "Wrong condition?");
718  assert(BI->getSuccessor(0) == BB || BI->getSuccessor(1) == BB);
719  auto *TorF = (BI->getSuccessor(0) == BB)
722  auto MatchBinOp = [](Instruction *I, unsigned Opcode) {
723  if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(I))
724  return BOp->getOpcode() == Opcode;
725  return false;
726  };
727  // If the condition is AND operation, we can propagate its operands into the
728  // true branch. If it is OR operation, we can propagate them into the false
729  // branch.
730  unsigned PropagateOpcode =
731  (BI->getSuccessor(0) == BB) ? Instruction::And : Instruction::Or;
732 
733  bool MadeChanges = false;
736  WorkList.push_back(CondInst);
737  while (!WorkList.empty()) {
738  Instruction *Curr = WorkList.pop_back_val();
739 
740  AvailableValues.insert(Curr, TorF);
741  LLVM_DEBUG(dbgs() << "EarlyCSE CVP: Add conditional value for '"
742  << Curr->getName() << "' as " << *TorF << " in "
743  << BB->getName() << "\n");
744  if (!DebugCounter::shouldExecute(CSECounter)) {
745  LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
746  } else {
747  // Replace all dominated uses with the known value.
748  if (unsigned Count = replaceDominatedUsesWith(Curr, TorF, DT,
749  BasicBlockEdge(Pred, BB))) {
750  NumCSECVP += Count;
751  MadeChanges = true;
752  }
753  }
754 
755  if (MatchBinOp(Curr, PropagateOpcode))
756  for (auto &Op : cast<BinaryOperator>(Curr)->operands())
757  if (Instruction *OPI = dyn_cast<Instruction>(Op))
758  if (SimpleValue::canHandle(OPI) && Visited.insert(OPI).second)
759  WorkList.push_back(OPI);
760  }
761 
762  return MadeChanges;
763 }
764 
765 bool EarlyCSE::processNode(DomTreeNode *Node) {
766  bool Changed = false;
767  BasicBlock *BB = Node->getBlock();
768 
769  // If this block has a single predecessor, then the predecessor is the parent
770  // of the domtree node and all of the live out memory values are still current
771  // in this block. If this block has multiple predecessors, then they could
772  // have invalidated the live-out memory values of our parent value. For now,
773  // just be conservative and invalidate memory if this block has multiple
774  // predecessors.
775  if (!BB->getSinglePredecessor())
776  ++CurrentGeneration;
777 
778  // If this node has a single predecessor which ends in a conditional branch,
779  // we can infer the value of the branch condition given that we took this
780  // path. We need the single predecessor to ensure there's not another path
781  // which reaches this block where the condition might hold a different
782  // value. Since we're adding this to the scoped hash table (like any other
783  // def), it will have been popped if we encounter a future merge block.
784  if (BasicBlock *Pred = BB->getSinglePredecessor()) {
785  auto *BI = dyn_cast<BranchInst>(Pred->getTerminator());
786  if (BI && BI->isConditional()) {
787  auto *CondInst = dyn_cast<Instruction>(BI->getCondition());
788  if (CondInst && SimpleValue::canHandle(CondInst))
789  Changed |= handleBranchCondition(CondInst, BI, BB, Pred);
790  }
791  }
792 
793  /// LastStore - Keep track of the last non-volatile store that we saw... for
794  /// as long as there in no instruction that reads memory. If we see a store
795  /// to the same location, we delete the dead store. This zaps trivial dead
796  /// stores which can occur in bitfield code among other things.
797  Instruction *LastStore = nullptr;
798 
799  // See if any instructions in the block can be eliminated. If so, do it. If
800  // not, add them to AvailableValues.
801  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
802  Instruction *Inst = &*I++;
803 
804  // Dead instructions should just be removed.
805  if (isInstructionTriviallyDead(Inst, &TLI)) {
806  LLVM_DEBUG(dbgs() << "EarlyCSE DCE: " << *Inst << '\n');
807  if (!DebugCounter::shouldExecute(CSECounter)) {
808  LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
809  continue;
810  }
811  salvageDebugInfo(*Inst);
812  removeMSSA(Inst);
813  Inst->eraseFromParent();
814  Changed = true;
815  ++NumSimplify;
816  continue;
817  }
818 
819  // Skip assume intrinsics, they don't really have side effects (although
820  // they're marked as such to ensure preservation of control dependencies),
821  // and this pass will not bother with its removal. However, we should mark
822  // its condition as true for all dominated blocks.
823  if (match(Inst, m_Intrinsic<Intrinsic::assume>())) {
824  auto *CondI =
825  dyn_cast<Instruction>(cast<CallInst>(Inst)->getArgOperand(0));
826  if (CondI && SimpleValue::canHandle(CondI)) {
827  LLVM_DEBUG(dbgs() << "EarlyCSE considering assumption: " << *Inst
828  << '\n');
829  AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext()));
830  } else
831  LLVM_DEBUG(dbgs() << "EarlyCSE skipping assumption: " << *Inst << '\n');
832  continue;
833  }
834 
835  // Skip sideeffect intrinsics, for the same reason as assume intrinsics.
836  if (match(Inst, m_Intrinsic<Intrinsic::sideeffect>())) {
837  LLVM_DEBUG(dbgs() << "EarlyCSE skipping sideeffect: " << *Inst << '\n');
838  continue;
839  }
840 
841  // We can skip all invariant.start intrinsics since they only read memory,
842  // and we can forward values across it. For invariant starts without
843  // invariant ends, we can use the fact that the invariantness never ends to
844  // start a scope in the current generaton which is true for all future
845  // generations. Also, we dont need to consume the last store since the
846  // semantics of invariant.start allow us to perform DSE of the last
847  // store, if there was a store following invariant.start. Consider:
848  //
849  // store 30, i8* p
850  // invariant.start(p)
851  // store 40, i8* p
852  // We can DSE the store to 30, since the store 40 to invariant location p
853  // causes undefined behaviour.
854  if (match(Inst, m_Intrinsic<Intrinsic::invariant_start>())) {
855  // If there are any uses, the scope might end.
856  if (!Inst->use_empty())
857  continue;
858  auto *CI = cast<CallInst>(Inst);
859  MemoryLocation MemLoc = MemoryLocation::getForArgument(CI, 1, TLI);
860  // Don't start a scope if we already have a better one pushed
861  if (!AvailableInvariants.count(MemLoc))
862  AvailableInvariants.insert(MemLoc, CurrentGeneration);
863  continue;
864  }
865 
866  if (match(Inst, m_Intrinsic<Intrinsic::experimental_guard>())) {
867  if (auto *CondI =
868  dyn_cast<Instruction>(cast<CallInst>(Inst)->getArgOperand(0))) {
869  if (SimpleValue::canHandle(CondI)) {
870  // Do we already know the actual value of this condition?
871  if (auto *KnownCond = AvailableValues.lookup(CondI)) {
872  // Is the condition known to be true?
873  if (isa<ConstantInt>(KnownCond) &&
874  cast<ConstantInt>(KnownCond)->isOne()) {
875  LLVM_DEBUG(dbgs()
876  << "EarlyCSE removing guard: " << *Inst << '\n');
877  removeMSSA(Inst);
878  Inst->eraseFromParent();
879  Changed = true;
880  continue;
881  } else
882  // Use the known value if it wasn't true.
883  cast<CallInst>(Inst)->setArgOperand(0, KnownCond);
884  }
885  // The condition we're on guarding here is true for all dominated
886  // locations.
887  AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext()));
888  }
889  }
890 
891  // Guard intrinsics read all memory, but don't write any memory.
892  // Accordingly, don't update the generation but consume the last store (to
893  // avoid an incorrect DSE).
894  LastStore = nullptr;
895  continue;
896  }
897 
898  // If the instruction can be simplified (e.g. X+0 = X) then replace it with
899  // its simpler value.
900  if (Value *V = SimplifyInstruction(Inst, SQ)) {
901  LLVM_DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << " to: " << *V
902  << '\n');
903  if (!DebugCounter::shouldExecute(CSECounter)) {
904  LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
905  } else {
906  bool Killed = false;
907  if (!Inst->use_empty()) {
908  Inst->replaceAllUsesWith(V);
909  Changed = true;
910  }
911  if (isInstructionTriviallyDead(Inst, &TLI)) {
912  removeMSSA(Inst);
913  Inst->eraseFromParent();
914  Changed = true;
915  Killed = true;
916  }
917  if (Changed)
918  ++NumSimplify;
919  if (Killed)
920  continue;
921  }
922  }
923 
924  // If this is a simple instruction that we can value number, process it.
925  if (SimpleValue::canHandle(Inst)) {
926  // See if the instruction has an available value. If so, use it.
927  if (Value *V = AvailableValues.lookup(Inst)) {
928  LLVM_DEBUG(dbgs() << "EarlyCSE CSE: " << *Inst << " to: " << *V
929  << '\n');
930  if (!DebugCounter::shouldExecute(CSECounter)) {
931  LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
932  continue;
933  }
934  if (auto *I = dyn_cast<Instruction>(V))
935  I->andIRFlags(Inst);
936  Inst->replaceAllUsesWith(V);
937  removeMSSA(Inst);
938  Inst->eraseFromParent();
939  Changed = true;
940  ++NumCSE;
941  continue;
942  }
943 
944  // Otherwise, just remember that this value is available.
945  AvailableValues.insert(Inst, Inst);
946  continue;
947  }
948 
949  ParseMemoryInst MemInst(Inst, TTI);
950  // If this is a non-volatile load, process it.
951  if (MemInst.isValid() && MemInst.isLoad()) {
952  // (conservatively) we can't peak past the ordering implied by this
953  // operation, but we can add this load to our set of available values
954  if (MemInst.isVolatile() || !MemInst.isUnordered()) {
955  LastStore = nullptr;
956  ++CurrentGeneration;
957  }
958 
959  if (MemInst.isInvariantLoad()) {
960  // If we pass an invariant load, we know that memory location is
961  // indefinitely constant from the moment of first dereferenceability.
962  // We conservatively treat the invariant_load as that moment. If we
963  // pass a invariant load after already establishing a scope, don't
964  // restart it since we want to preserve the earliest point seen.
965  auto MemLoc = MemoryLocation::get(Inst);
966  if (!AvailableInvariants.count(MemLoc))
967  AvailableInvariants.insert(MemLoc, CurrentGeneration);
968  }
969 
970  // If we have an available version of this load, and if it is the right
971  // generation or the load is known to be from an invariant location,
972  // replace this instruction.
973  //
974  // If either the dominating load or the current load are invariant, then
975  // we can assume the current load loads the same value as the dominating
976  // load.
977  LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
978  if (InVal.DefInst != nullptr &&
979  InVal.MatchingId == MemInst.getMatchingId() &&
980  // We don't yet handle removing loads with ordering of any kind.
981  !MemInst.isVolatile() && MemInst.isUnordered() &&
982  // We can't replace an atomic load with one which isn't also atomic.
983  InVal.IsAtomic >= MemInst.isAtomic() &&
984  (isOperatingOnInvariantMemAt(Inst, InVal.Generation) ||
985  isSameMemGeneration(InVal.Generation, CurrentGeneration,
986  InVal.DefInst, Inst))) {
987  Value *Op = getOrCreateResult(InVal.DefInst, Inst->getType());
988  if (Op != nullptr) {
989  LLVM_DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst
990  << " to: " << *InVal.DefInst << '\n');
991  if (!DebugCounter::shouldExecute(CSECounter)) {
992  LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
993  continue;
994  }
995  if (!Inst->use_empty())
996  Inst->replaceAllUsesWith(Op);
997  removeMSSA(Inst);
998  Inst->eraseFromParent();
999  Changed = true;
1000  ++NumCSELoad;
1001  continue;
1002  }
1003  }
1004 
1005  // Otherwise, remember that we have this instruction.
1006  AvailableLoads.insert(
1007  MemInst.getPointerOperand(),
1008  LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(),
1009  MemInst.isAtomic()));
1010  LastStore = nullptr;
1011  continue;
1012  }
1013 
1014  // If this instruction may read from memory or throw (and potentially read
1015  // from memory in the exception handler), forget LastStore. Load/store
1016  // intrinsics will indicate both a read and a write to memory. The target
1017  // may override this (e.g. so that a store intrinsic does not read from
1018  // memory, and thus will be treated the same as a regular store for
1019  // commoning purposes).
1020  if ((Inst->mayReadFromMemory() || Inst->mayThrow()) &&
1021  !(MemInst.isValid() && !MemInst.mayReadFromMemory()))
1022  LastStore = nullptr;
1023 
1024  // If this is a read-only call, process it.
1025  if (CallValue::canHandle(Inst)) {
1026  // If we have an available version of this call, and if it is the right
1027  // generation, replace this instruction.
1028  std::pair<Instruction *, unsigned> InVal = AvailableCalls.lookup(Inst);
1029  if (InVal.first != nullptr &&
1030  isSameMemGeneration(InVal.second, CurrentGeneration, InVal.first,
1031  Inst)) {
1032  LLVM_DEBUG(dbgs() << "EarlyCSE CSE CALL: " << *Inst
1033  << " to: " << *InVal.first << '\n');
1034  if (!DebugCounter::shouldExecute(CSECounter)) {
1035  LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1036  continue;
1037  }
1038  if (!Inst->use_empty())
1039  Inst->replaceAllUsesWith(InVal.first);
1040  removeMSSA(Inst);
1041  Inst->eraseFromParent();
1042  Changed = true;
1043  ++NumCSECall;
1044  continue;
1045  }
1046 
1047  // Otherwise, remember that we have this instruction.
1048  AvailableCalls.insert(
1049  Inst, std::pair<Instruction *, unsigned>(Inst, CurrentGeneration));
1050  continue;
1051  }
1052 
1053  // A release fence requires that all stores complete before it, but does
1054  // not prevent the reordering of following loads 'before' the fence. As a
1055  // result, we don't need to consider it as writing to memory and don't need
1056  // to advance the generation. We do need to prevent DSE across the fence,
1057  // but that's handled above.
1058  if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
1059  if (FI->getOrdering() == AtomicOrdering::Release) {
1060  assert(Inst->mayReadFromMemory() && "relied on to prevent DSE above");
1061  continue;
1062  }
1063 
1064  // write back DSE - If we write back the same value we just loaded from
1065  // the same location and haven't passed any intervening writes or ordering
1066  // operations, we can remove the write. The primary benefit is in allowing
1067  // the available load table to remain valid and value forward past where
1068  // the store originally was.
1069  if (MemInst.isValid() && MemInst.isStore()) {
1070  LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
1071  if (InVal.DefInst &&
1072  InVal.DefInst == getOrCreateResult(Inst, InVal.DefInst->getType()) &&
1073  InVal.MatchingId == MemInst.getMatchingId() &&
1074  // We don't yet handle removing stores with ordering of any kind.
1075  !MemInst.isVolatile() && MemInst.isUnordered() &&
1076  (isOperatingOnInvariantMemAt(Inst, InVal.Generation) ||
1077  isSameMemGeneration(InVal.Generation, CurrentGeneration,
1078  InVal.DefInst, Inst))) {
1079  // It is okay to have a LastStore to a different pointer here if MemorySSA
1080  // tells us that the load and store are from the same memory generation.
1081  // In that case, LastStore should keep its present value since we're
1082  // removing the current store.
1083  assert((!LastStore ||
1084  ParseMemoryInst(LastStore, TTI).getPointerOperand() ==
1085  MemInst.getPointerOperand() ||
1086  MSSA) &&
1087  "can't have an intervening store if not using MemorySSA!");
1088  LLVM_DEBUG(dbgs() << "EarlyCSE DSE (writeback): " << *Inst << '\n');
1089  if (!DebugCounter::shouldExecute(CSECounter)) {
1090  LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1091  continue;
1092  }
1093  removeMSSA(Inst);
1094  Inst->eraseFromParent();
1095  Changed = true;
1096  ++NumDSE;
1097  // We can avoid incrementing the generation count since we were able
1098  // to eliminate this store.
1099  continue;
1100  }
1101  }
1102 
1103  // Okay, this isn't something we can CSE at all. Check to see if it is
1104  // something that could modify memory. If so, our available memory values
1105  // cannot be used so bump the generation count.
1106  if (Inst->mayWriteToMemory()) {
1107  ++CurrentGeneration;
1108 
1109  if (MemInst.isValid() && MemInst.isStore()) {
1110  // We do a trivial form of DSE if there are two stores to the same
1111  // location with no intervening loads. Delete the earlier store.
1112  // At the moment, we don't remove ordered stores, but do remove
1113  // unordered atomic stores. There's no special requirement (for
1114  // unordered atomics) about removing atomic stores only in favor of
1115  // other atomic stores since we we're going to execute the non-atomic
1116  // one anyway and the atomic one might never have become visible.
1117  if (LastStore) {
1118  ParseMemoryInst LastStoreMemInst(LastStore, TTI);
1119  assert(LastStoreMemInst.isUnordered() &&
1120  !LastStoreMemInst.isVolatile() &&
1121  "Violated invariant");
1122  if (LastStoreMemInst.isMatchingMemLoc(MemInst)) {
1123  LLVM_DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore
1124  << " due to: " << *Inst << '\n');
1125  if (!DebugCounter::shouldExecute(CSECounter)) {
1126  LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1127  } else {
1128  removeMSSA(LastStore);
1129  LastStore->eraseFromParent();
1130  Changed = true;
1131  ++NumDSE;
1132  LastStore = nullptr;
1133  }
1134  }
1135  // fallthrough - we can exploit information about this store
1136  }
1137 
1138  // Okay, we just invalidated anything we knew about loaded values. Try
1139  // to salvage *something* by remembering that the stored value is a live
1140  // version of the pointer. It is safe to forward from volatile stores
1141  // to non-volatile loads, so we don't have to check for volatility of
1142  // the store.
1143  AvailableLoads.insert(
1144  MemInst.getPointerOperand(),
1145  LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(),
1146  MemInst.isAtomic()));
1147 
1148  // Remember that this was the last unordered store we saw for DSE. We
1149  // don't yet handle DSE on ordered or volatile stores since we don't
1150  // have a good way to model the ordering requirement for following
1151  // passes once the store is removed. We could insert a fence, but
1152  // since fences are slightly stronger than stores in their ordering,
1153  // it's not clear this is a profitable transform. Another option would
1154  // be to merge the ordering with that of the post dominating store.
1155  if (MemInst.isUnordered() && !MemInst.isVolatile())
1156  LastStore = Inst;
1157  else
1158  LastStore = nullptr;
1159  }
1160  }
1161  }
1162 
1163  return Changed;
1164 }
1165 
1166 bool EarlyCSE::run() {
1167  // Note, deque is being used here because there is significant performance
1168  // gains over vector when the container becomes very large due to the
1169  // specific access patterns. For more information see the mailing list
1170  // discussion on this:
1171  // http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20120116/135228.html
1172  std::deque<StackNode *> nodesToProcess;
1173 
1174  bool Changed = false;
1175 
1176  // Process the root node.
1177  nodesToProcess.push_back(new StackNode(
1178  AvailableValues, AvailableLoads, AvailableInvariants, AvailableCalls,
1179  CurrentGeneration, DT.getRootNode(),
1180  DT.getRootNode()->begin(), DT.getRootNode()->end()));
1181 
1182  // Save the current generation.
1183  unsigned LiveOutGeneration = CurrentGeneration;
1184 
1185  // Process the stack.
1186  while (!nodesToProcess.empty()) {
1187  // Grab the first item off the stack. Set the current generation, remove
1188  // the node from the stack, and process it.
1189  StackNode *NodeToProcess = nodesToProcess.back();
1190 
1191  // Initialize class members.
1192  CurrentGeneration = NodeToProcess->currentGeneration();
1193 
1194  // Check if the node needs to be processed.
1195  if (!NodeToProcess->isProcessed()) {
1196  // Process the node.
1197  Changed |= processNode(NodeToProcess->node());
1198  NodeToProcess->childGeneration(CurrentGeneration);
1199  NodeToProcess->process();
1200  } else if (NodeToProcess->childIter() != NodeToProcess->end()) {
1201  // Push the next child onto the stack.
1202  DomTreeNode *child = NodeToProcess->nextChild();
1203  nodesToProcess.push_back(
1204  new StackNode(AvailableValues, AvailableLoads, AvailableInvariants,
1205  AvailableCalls, NodeToProcess->childGeneration(),
1206  child, child->begin(), child->end()));
1207  } else {
1208  // It has been processed, and there are no more children to process,
1209  // so delete it and pop it off the stack.
1210  delete NodeToProcess;
1211  nodesToProcess.pop_back();
1212  }
1213  } // while (!nodes...)
1214 
1215  // Reset the current generation.
1216  CurrentGeneration = LiveOutGeneration;
1217 
1218  return Changed;
1219 }
1220 
1223  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1224  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1225  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1226  auto &AC = AM.getResult<AssumptionAnalysis>(F);
1227  auto *MSSA =
1228  UseMemorySSA ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA() : nullptr;
1229 
1230  EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA);
1231 
1232  if (!CSE.run())
1233  return PreservedAnalyses::all();
1234 
1235  PreservedAnalyses PA;
1236  PA.preserveSet<CFGAnalyses>();
1237  PA.preserve<GlobalsAA>();
1238  if (UseMemorySSA)
1240  return PA;
1241 }
1242 
1243 namespace {
1244 
1245 /// A simple and fast domtree-based CSE pass.
1246 ///
1247 /// This pass does a simple depth-first walk over the dominator tree,
1248 /// eliminating trivially redundant instructions and using instsimplify to
1249 /// canonicalize things as it goes. It is intended to be fast and catch obvious
1250 /// cases so that instcombine and other passes are more effective. It is
1251 /// expected that a later pass of GVN will catch the interesting/hard cases.
1252 template<bool UseMemorySSA>
1253 class EarlyCSELegacyCommonPass : public FunctionPass {
1254 public:
1255  static char ID;
1256 
1257  EarlyCSELegacyCommonPass() : FunctionPass(ID) {
1258  if (UseMemorySSA)
1260  else
1262  }
1263 
1264  bool runOnFunction(Function &F) override {
1265  if (skipFunction(F))
1266  return false;
1267 
1268  auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
1269  auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1270  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1271  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1272  auto *MSSA =
1273  UseMemorySSA ? &getAnalysis<MemorySSAWrapperPass>().getMSSA() : nullptr;
1274 
1275  EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA);
1276 
1277  return CSE.run();
1278  }
1279 
1280  void getAnalysisUsage(AnalysisUsage &AU) const override {
1285  if (UseMemorySSA) {
1288  }
1290  AU.setPreservesCFG();
1291  }
1292 };
1293 
1294 } // end anonymous namespace
1295 
1296 using EarlyCSELegacyPass = EarlyCSELegacyCommonPass</*UseMemorySSA=*/false>;
1297 
1298 template<>
1299 char EarlyCSELegacyPass::ID = 0;
1300 
1301 INITIALIZE_PASS_BEGIN(EarlyCSELegacyPass, "early-cse", "Early CSE", false,
1302  false)
1307 INITIALIZE_PASS_END(EarlyCSELegacyPass, "early-cse", "Early CSE", false, false)
1308 
1309 using EarlyCSEMemSSALegacyPass =
1310  EarlyCSELegacyCommonPass</*UseMemorySSA=*/true>;
1311 
1312 template<>
1313 char EarlyCSEMemSSALegacyPass::ID = 0;
1314 
1315 FunctionPass *llvm::createEarlyCSEPass(bool UseMemorySSA) {
1316  if (UseMemorySSA)
1317  return new EarlyCSEMemSSALegacyPass();
1318  else
1319  return new EarlyCSELegacyPass();
1320 }
1321 
1322 INITIALIZE_PASS_BEGIN(EarlyCSEMemSSALegacyPass, "early-cse-memssa",
1323  "Early CSE w/ MemorySSA", false, false)
1329 INITIALIZE_PASS_END(EarlyCSEMemSSALegacyPass, "early-cse-memssa",
1330  "Early CSE w/ MemorySSA", false, false)
Legacy wrapper pass to provide the GlobalsAAResult object.
void initializeEarlyCSELegacyPassPass(PassRegistry &)
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:68
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:259
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:584
static SimpleValue getTombstoneKey()
Definition: EarlyCSE.cpp:117
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:875
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
This instruction extracts a struct member or array element value from an aggregate value...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Value * getPointerOperand(Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
Unsigned minimum.
Atomic ordering constants.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:687
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
bool isAtomic() const
Return true if this instruction has an AtomicOrdering of unordered or higher.
This is the interface for a simple mod/ref and alias analysis over globals.
An instruction for ordering other memory operations.
Definition: Instructions.h:444
value_op_iterator value_op_begin()
Definition: User.h:256
This class represents a function call, abstracting a target machine&#39;s calling convention.
An immutable pass that tracks lazily created AssumptionCache objects.
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
A cache of @llvm.assume calls within a function.
Analysis pass providing the TargetTransformInfo.
bool salvageDebugInfo(Instruction &I)
Assuming the instruction I is going to be deleted, attempt to salvage debug users of I by writing the...
Definition: Local.cpp:1606
static CallValue getTombstoneKey()
Definition: EarlyCSE.cpp:289
value_op_iterator value_op_end()
Definition: User.h:259
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1018
BasicBlock * getSuccessor(unsigned i) const
STATISTIC(NumFunctions, "Total number of functions")
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:225
F(f)
block Block Frequency true
An instruction for reading from memory.
Definition: Instructions.h:168
Value * getCondition() const
This defines the Use class.
static Optional< MemoryLocation > getOrNone(const Instruction *Inst)
unsigned replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT, const BasicBlockEdge &Edge)
Replace each use of &#39;From&#39; with &#39;To&#39; if that use is dominated by the given edge.
Definition: Local.cpp:2427
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:33
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
Definition: EarlyCSE.cpp:1221
This file defines the MallocAllocator and BumpPtrAllocator interfaces.
Signed maximum.
bool onlyReadsMemory() const
Determine if the call does not access or only reads memory.
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:264
bool isIdenticalTo(const Instruction *I) const
Return true if the specified instruction is exactly identical to the current one. ...
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:919
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:361
Absolute value.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:592
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
separate const offset from Split GEPs to a variadic base and a constant offset for better CSE
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:681
static bool isLoad(int Opcode)
static CallValue getEmptyKey()
Definition: EarlyCSE.cpp:285
RecyclingAllocator - This class wraps an Allocator, adding the functionality of recycling deleted obj...
This file provides an implementation of debug counters.
static void cse(BasicBlock *BB)
Perform cse of induction variable instructions.
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:142
static bool isEqual(const Function &Caller, const Function &Callee)
This file provides the interface for a simple, fast CSE pass.
early cse memssa
Definition: EarlyCSE.cpp:1329
void andIRFlags(const Value *V)
Logical &#39;and&#39; of any supported wrapping, exact, and fast-math flags of V and this instruction...
static bool isStore(int Opcode)
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:126
Value * getLoadStorePointerOperand(Value *V)
A helper function that returns the pointer operand of a load or store instruction.
An instruction for storing to memory.
Definition: Instructions.h:310
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:439
Optimize for code generation
INITIALIZE_PASS_BEGIN(EarlyCSELegacyPass, "early-cse", "Early CSE", false, false) using EarlyCSEMemSSALegacyPass
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:142
Value * getOperand(unsigned i) const
Definition: User.h:170
bool isVoidTy() const
Return true if this is &#39;void&#39;.
Definition: Type.h:141
BumpPtrAllocatorImpl BumpPtrAllocator
The standard BumpPtrAllocator which just uses the default template parameters.
Definition: Allocator.h:380
NodeT * getBlock() const
static bool runOnFunction(Function &F, bool PostInlining)
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:235
bool isIdenticalToWhenDefined(const Instruction *I) const
This is like isIdenticalTo, except that it ignores the SubclassOptionalData flags, which may specify conditions under which the instruction&#39;s result is undefined.
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
Conditional or Unconditional Branch instruction.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static SimpleValue getEmptyKey()
Definition: EarlyCSE.cpp:113
This file contains the declarations for the subclasses of Constant, which represent the different fla...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
bool mayThrow() const
Return true if this instruction may throw an exception.
Represent the analysis usage information of a pass.
Analysis pass providing a never-invalidated alias analysis result.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:885
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:72
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
size_t size() const
Definition: SmallVector.h:53
static bool isAtomic(Instruction *I)
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
static MemoryLocation getForArgument(ImmutableCallSite CS, unsigned ArgIdx, const TargetLibraryInfo &TLI)
Return a location representing a particular argument of a call.
MemoryUseOrDef * getMemoryAccess(const Instruction *) const
Given a memory Mod/Ref&#39;ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.cpp:1821
Floating point maxnum.
Representation for a specific memory location.
A function analysis which provides an AssumptionCache.
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:298
Iterator for intrusive lists based on ilist_node.
SelectPatternFlavor Flavor
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
iterator end()
Definition: BasicBlock.h:266
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
SelectPatternFlavor
Specific patterns of select instructions we can match.
Provides information about what library functions are available for the current target.
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:883
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:381
bool isConditional() const
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:286
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:577
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:924
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition: Hashing.h:602
iterator_range< user_iterator > users()
Definition: Value.h:400
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition: Hashing.h:480
Represents analyses that only rely on functions&#39; control flow.
Definition: PassManager.h:114
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:959
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:189
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:224
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) const
#define I(x, y, z)
Definition: MD5.cpp:58
bool mayReadFromMemory() const
Return true if this instruction may read memory.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:174
DEBUG_COUNTER(CSECounter, "early-cse", "Controls which instructions are removed")
Signed minimum.
EarlyCSELegacyCommonPass< false > EarlyCSELegacyPass
Definition: EarlyCSE.cpp:1296
Analysis pass providing the TargetLibraryInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSentinel(const DWARFDebugNames::AttributeEncoding &AE)
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:565
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction has no side ef...
Definition: Local.cpp:347
LLVM Value Representation.
Definition: Value.h:73
SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp=nullptr, unsigned Depth=0)
Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind and providing the out param...
typename std::vector< DomTreeNodeBase *>::iterator iterator
void initializeEarlyCSEMemSSALegacyPassPass(PassRegistry &)
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
FunctionPass * createEarlyCSEPass(bool UseMemorySSA=false)
Definition: EarlyCSE.cpp:1315
A container for analyses that lazily runs them and caches their results.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:254
This pass exposes codegen information to IR-level passes.
static bool isVolatile(Instruction *Inst)
Represents phi nodes for memory accesses.
Definition: MemorySSA.h:459
const TerminatorInst * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:138
This header defines various interfaces for pass management in LLVM.
#define LLVM_DEBUG(X)
Definition: Debug.h:119
Value * SimplifyInstruction(Instruction *I, const SimplifyQuery &Q, OptimizationRemarkEmitter *ORE=nullptr)
See if we can compute a simplified version of this instruction.
Information about a load/store intrinsic defined by the target.
bool use_empty() const
Definition: Value.h:323
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
This instruction inserts a struct field of array element value into an aggregate value.