LLVM  8.0.0svn
EarlyCSE.cpp
Go to the documentation of this file.
1 //===- EarlyCSE.cpp - Simple and fast CSE pass ----------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass performs a simple dominator tree walk that eliminates trivially
11 // redundant instructions.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/DenseMapInfo.h"
17 #include "llvm/ADT/Hashing.h"
18 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SetVector.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/Statistic.h"
33 #include "llvm/IR/BasicBlock.h"
34 #include "llvm/IR/Constants.h"
35 #include "llvm/IR/DataLayout.h"
36 #include "llvm/IR/Dominators.h"
37 #include "llvm/IR/Function.h"
38 #include "llvm/IR/InstrTypes.h"
39 #include "llvm/IR/Instruction.h"
40 #include "llvm/IR/Instructions.h"
41 #include "llvm/IR/IntrinsicInst.h"
42 #include "llvm/IR/Intrinsics.h"
43 #include "llvm/IR/LLVMContext.h"
44 #include "llvm/IR/PassManager.h"
45 #include "llvm/IR/PatternMatch.h"
46 #include "llvm/IR/Type.h"
47 #include "llvm/IR/Use.h"
48 #include "llvm/IR/Value.h"
49 #include "llvm/Pass.h"
50 #include "llvm/Support/Allocator.h"
52 #include "llvm/Support/Casting.h"
53 #include "llvm/Support/Debug.h"
57 #include "llvm/Transforms/Scalar.h"
59 #include <cassert>
60 #include <deque>
61 #include <memory>
62 #include <utility>
63 
64 using namespace llvm;
65 using namespace llvm::PatternMatch;
66 
67 #define DEBUG_TYPE "early-cse"
68 
69 STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd");
70 STATISTIC(NumCSE, "Number of instructions CSE'd");
71 STATISTIC(NumCSECVP, "Number of compare instructions CVP'd");
72 STATISTIC(NumCSELoad, "Number of load instructions CSE'd");
73 STATISTIC(NumCSECall, "Number of call instructions CSE'd");
74 STATISTIC(NumDSE, "Number of trivial dead stores removed");
75 
76 DEBUG_COUNTER(CSECounter, "early-cse",
77  "Controls which instructions are removed");
78 
79 //===----------------------------------------------------------------------===//
80 // SimpleValue
81 //===----------------------------------------------------------------------===//
82 
83 namespace {
84 
85 /// Struct representing the available values in the scoped hash table.
86 struct SimpleValue {
87  Instruction *Inst;
88 
89  SimpleValue(Instruction *I) : Inst(I) {
90  assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
91  }
92 
93  bool isSentinel() const {
96  }
97 
98  static bool canHandle(Instruction *Inst) {
99  // This can only handle non-void readnone functions.
100  if (CallInst *CI = dyn_cast<CallInst>(Inst))
101  return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy();
102  return isa<CastInst>(Inst) || isa<BinaryOperator>(Inst) ||
103  isa<GetElementPtrInst>(Inst) || isa<CmpInst>(Inst) ||
104  isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) ||
105  isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) ||
106  isa<ExtractValueInst>(Inst) || isa<InsertValueInst>(Inst);
107  }
108 };
109 
110 } // end anonymous namespace
111 
112 namespace llvm {
113 
114 template <> struct DenseMapInfo<SimpleValue> {
115  static inline SimpleValue getEmptyKey() {
117  }
118 
119  static inline SimpleValue getTombstoneKey() {
121  }
122 
123  static unsigned getHashValue(SimpleValue Val);
124  static bool isEqual(SimpleValue LHS, SimpleValue RHS);
125 };
126 
127 } // end namespace llvm
128 
129 unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
130  Instruction *Inst = Val.Inst;
131  // Hash in all of the operands as pointers.
132  if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst)) {
133  Value *LHS = BinOp->getOperand(0);
134  Value *RHS = BinOp->getOperand(1);
135  if (BinOp->isCommutative() && BinOp->getOperand(0) > BinOp->getOperand(1))
136  std::swap(LHS, RHS);
137 
138  return hash_combine(BinOp->getOpcode(), LHS, RHS);
139  }
140 
141  if (CmpInst *CI = dyn_cast<CmpInst>(Inst)) {
142  Value *LHS = CI->getOperand(0);
143  Value *RHS = CI->getOperand(1);
144  CmpInst::Predicate Pred = CI->getPredicate();
145  if (Inst->getOperand(0) > Inst->getOperand(1)) {
146  std::swap(LHS, RHS);
147  Pred = CI->getSwappedPredicate();
148  }
149  return hash_combine(Inst->getOpcode(), Pred, LHS, RHS);
150  }
151 
152  // Hash min/max/abs (cmp + select) to allow for commuted operands.
153  // Min/max may also have non-canonical compare predicate (eg, the compare for
154  // smin may use 'sgt' rather than 'slt'), and non-canonical operands in the
155  // compare.
156  Value *A, *B;
158  // TODO: We should also detect FP min/max.
159  if (SPF == SPF_SMIN || SPF == SPF_SMAX ||
160  SPF == SPF_UMIN || SPF == SPF_UMAX) {
161  if (A > B)
162  std::swap(A, B);
163  return hash_combine(Inst->getOpcode(), SPF, A, B);
164  }
165  if (SPF == SPF_ABS || SPF == SPF_NABS) {
166  // ABS/NABS always puts the input in A and its negation in B.
167  return hash_combine(Inst->getOpcode(), SPF, A, B);
168  }
169 
170  if (CastInst *CI = dyn_cast<CastInst>(Inst))
171  return hash_combine(CI->getOpcode(), CI->getType(), CI->getOperand(0));
172 
173  if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Inst))
174  return hash_combine(EVI->getOpcode(), EVI->getOperand(0),
175  hash_combine_range(EVI->idx_begin(), EVI->idx_end()));
176 
177  if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(Inst))
178  return hash_combine(IVI->getOpcode(), IVI->getOperand(0),
179  IVI->getOperand(1),
180  hash_combine_range(IVI->idx_begin(), IVI->idx_end()));
181 
182  assert((isa<CallInst>(Inst) || isa<BinaryOperator>(Inst) ||
183  isa<GetElementPtrInst>(Inst) || isa<SelectInst>(Inst) ||
184  isa<ExtractElementInst>(Inst) || isa<InsertElementInst>(Inst) ||
185  isa<ShuffleVectorInst>(Inst)) &&
186  "Invalid/unknown instruction");
187 
188  // Mix in the opcode.
189  return hash_combine(
190  Inst->getOpcode(),
192 }
193 
194 bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
195  Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
196 
197  if (LHS.isSentinel() || RHS.isSentinel())
198  return LHSI == RHSI;
199 
200  if (LHSI->getOpcode() != RHSI->getOpcode())
201  return false;
202  if (LHSI->isIdenticalToWhenDefined(RHSI))
203  return true;
204 
205  // If we're not strictly identical, we still might be a commutable instruction
206  if (BinaryOperator *LHSBinOp = dyn_cast<BinaryOperator>(LHSI)) {
207  if (!LHSBinOp->isCommutative())
208  return false;
209 
210  assert(isa<BinaryOperator>(RHSI) &&
211  "same opcode, but different instruction type?");
212  BinaryOperator *RHSBinOp = cast<BinaryOperator>(RHSI);
213 
214  // Commuted equality
215  return LHSBinOp->getOperand(0) == RHSBinOp->getOperand(1) &&
216  LHSBinOp->getOperand(1) == RHSBinOp->getOperand(0);
217  }
218  if (CmpInst *LHSCmp = dyn_cast<CmpInst>(LHSI)) {
219  assert(isa<CmpInst>(RHSI) &&
220  "same opcode, but different instruction type?");
221  CmpInst *RHSCmp = cast<CmpInst>(RHSI);
222  // Commuted equality
223  return LHSCmp->getOperand(0) == RHSCmp->getOperand(1) &&
224  LHSCmp->getOperand(1) == RHSCmp->getOperand(0) &&
225  LHSCmp->getSwappedPredicate() == RHSCmp->getPredicate();
226  }
227 
228  // Min/max/abs can occur with commuted operands, non-canonical predicates,
229  // and/or non-canonical operands.
230  Value *LHSA, *LHSB;
231  SelectPatternFlavor LSPF = matchSelectPattern(LHSI, LHSA, LHSB).Flavor;
232  // TODO: We should also detect FP min/max.
233  if (LSPF == SPF_SMIN || LSPF == SPF_SMAX ||
234  LSPF == SPF_UMIN || LSPF == SPF_UMAX ||
235  LSPF == SPF_ABS || LSPF == SPF_NABS) {
236  Value *RHSA, *RHSB;
237  SelectPatternFlavor RSPF = matchSelectPattern(RHSI, RHSA, RHSB).Flavor;
238  if (LSPF == RSPF) {
239  // Abs results are placed in a defined order by matchSelectPattern.
240  if (LSPF == SPF_ABS || LSPF == SPF_NABS)
241  return LHSA == RHSA && LHSB == RHSB;
242  return ((LHSA == RHSA && LHSB == RHSB) ||
243  (LHSA == RHSB && LHSB == RHSA));
244  }
245  }
246 
247  return false;
248 }
249 
250 //===----------------------------------------------------------------------===//
251 // CallValue
252 //===----------------------------------------------------------------------===//
253 
254 namespace {
255 
256 /// Struct representing the available call values in the scoped hash
257 /// table.
258 struct CallValue {
259  Instruction *Inst;
260 
261  CallValue(Instruction *I) : Inst(I) {
262  assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
263  }
264 
265  bool isSentinel() const {
266  return Inst == DenseMapInfo<Instruction *>::getEmptyKey() ||
268  }
269 
270  static bool canHandle(Instruction *Inst) {
271  // Don't value number anything that returns void.
272  if (Inst->getType()->isVoidTy())
273  return false;
274 
275  CallInst *CI = dyn_cast<CallInst>(Inst);
276  if (!CI || !CI->onlyReadsMemory())
277  return false;
278  return true;
279  }
280 };
281 
282 } // end anonymous namespace
283 
284 namespace llvm {
285 
286 template <> struct DenseMapInfo<CallValue> {
287  static inline CallValue getEmptyKey() {
289  }
290 
291  static inline CallValue getTombstoneKey() {
293  }
294 
295  static unsigned getHashValue(CallValue Val);
296  static bool isEqual(CallValue LHS, CallValue RHS);
297 };
298 
299 } // end namespace llvm
300 
301 unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {
302  Instruction *Inst = Val.Inst;
303  // Hash all of the operands as pointers and mix in the opcode.
304  return hash_combine(
305  Inst->getOpcode(),
307 }
308 
309 bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
310  Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
311  if (LHS.isSentinel() || RHS.isSentinel())
312  return LHSI == RHSI;
313  return LHSI->isIdenticalTo(RHSI);
314 }
315 
316 //===----------------------------------------------------------------------===//
317 // EarlyCSE implementation
318 //===----------------------------------------------------------------------===//
319 
320 namespace {
321 
322 /// A simple and fast domtree-based CSE pass.
323 ///
324 /// This pass does a simple depth-first walk over the dominator tree,
325 /// eliminating trivially redundant instructions and using instsimplify to
326 /// canonicalize things as it goes. It is intended to be fast and catch obvious
327 /// cases so that instcombine and other passes are more effective. It is
328 /// expected that a later pass of GVN will catch the interesting/hard cases.
329 class EarlyCSE {
330 public:
331  const TargetLibraryInfo &TLI;
332  const TargetTransformInfo &TTI;
333  DominatorTree &DT;
334  AssumptionCache &AC;
335  const SimplifyQuery SQ;
336  MemorySSA *MSSA;
337  std::unique_ptr<MemorySSAUpdater> MSSAUpdater;
338 
339  using AllocatorTy =
342  using ScopedHTType =
344  AllocatorTy>;
345 
346  /// A scoped hash table of the current values of all of our simple
347  /// scalar expressions.
348  ///
349  /// As we walk down the domtree, we look to see if instructions are in this:
350  /// if so, we replace them with what we find, otherwise we insert them so
351  /// that dominated values can succeed in their lookup.
352  ScopedHTType AvailableValues;
353 
354  /// A scoped hash table of the current values of previously encountered
355  /// memory locations.
356  ///
357  /// This allows us to get efficient access to dominating loads or stores when
358  /// we have a fully redundant load. In addition to the most recent load, we
359  /// keep track of a generation count of the read, which is compared against
360  /// the current generation count. The current generation count is incremented
361  /// after every possibly writing memory operation, which ensures that we only
362  /// CSE loads with other loads that have no intervening store. Ordering
363  /// events (such as fences or atomic instructions) increment the generation
364  /// count as well; essentially, we model these as writes to all possible
365  /// locations. Note that atomic and/or volatile loads and stores can be
366  /// present the table; it is the responsibility of the consumer to inspect
367  /// the atomicity/volatility if needed.
368  struct LoadValue {
369  Instruction *DefInst = nullptr;
370  unsigned Generation = 0;
371  int MatchingId = -1;
372  bool IsAtomic = false;
373 
374  LoadValue() = default;
375  LoadValue(Instruction *Inst, unsigned Generation, unsigned MatchingId,
376  bool IsAtomic)
377  : DefInst(Inst), Generation(Generation), MatchingId(MatchingId),
378  IsAtomic(IsAtomic) {}
379  };
380 
381  using LoadMapAllocator =
384  using LoadHTType =
386  LoadMapAllocator>;
387 
388  LoadHTType AvailableLoads;
389 
390  // A scoped hash table mapping memory locations (represented as typed
391  // addresses) to generation numbers at which that memory location became
392  // (henceforth indefinitely) invariant.
393  using InvariantMapAllocator =
396  using InvariantHTType =
398  InvariantMapAllocator>;
399  InvariantHTType AvailableInvariants;
400 
401  /// A scoped hash table of the current values of read-only call
402  /// values.
403  ///
404  /// It uses the same generation count as loads.
405  using CallHTType =
407  CallHTType AvailableCalls;
408 
409  /// This is the current generation of the memory value.
410  unsigned CurrentGeneration = 0;
411 
412  /// Set up the EarlyCSE runner for a particular function.
413  EarlyCSE(const DataLayout &DL, const TargetLibraryInfo &TLI,
414  const TargetTransformInfo &TTI, DominatorTree &DT,
415  AssumptionCache &AC, MemorySSA *MSSA)
416  : TLI(TLI), TTI(TTI), DT(DT), AC(AC), SQ(DL, &TLI, &DT, &AC), MSSA(MSSA),
417  MSSAUpdater(llvm::make_unique<MemorySSAUpdater>(MSSA)) {}
418 
419  bool run();
420 
421 private:
422  // Almost a POD, but needs to call the constructors for the scoped hash
423  // tables so that a new scope gets pushed on. These are RAII so that the
424  // scope gets popped when the NodeScope is destroyed.
425  class NodeScope {
426  public:
427  NodeScope(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
428  InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls)
429  : Scope(AvailableValues), LoadScope(AvailableLoads),
430  InvariantScope(AvailableInvariants), CallScope(AvailableCalls) {}
431  NodeScope(const NodeScope &) = delete;
432  NodeScope &operator=(const NodeScope &) = delete;
433 
434  private:
435  ScopedHTType::ScopeTy Scope;
436  LoadHTType::ScopeTy LoadScope;
437  InvariantHTType::ScopeTy InvariantScope;
438  CallHTType::ScopeTy CallScope;
439  };
440 
441  // Contains all the needed information to create a stack for doing a depth
442  // first traversal of the tree. This includes scopes for values, loads, and
443  // calls as well as the generation. There is a child iterator so that the
444  // children do not need to be store separately.
445  class StackNode {
446  public:
447  StackNode(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
448  InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls,
449  unsigned cg, DomTreeNode *n, DomTreeNode::iterator child,
451  : CurrentGeneration(cg), ChildGeneration(cg), Node(n), ChildIter(child),
452  EndIter(end),
453  Scopes(AvailableValues, AvailableLoads, AvailableInvariants,
454  AvailableCalls)
455  {}
456  StackNode(const StackNode &) = delete;
457  StackNode &operator=(const StackNode &) = delete;
458 
459  // Accessors.
460  unsigned currentGeneration() { return CurrentGeneration; }
461  unsigned childGeneration() { return ChildGeneration; }
462  void childGeneration(unsigned generation) { ChildGeneration = generation; }
463  DomTreeNode *node() { return Node; }
464  DomTreeNode::iterator childIter() { return ChildIter; }
465 
466  DomTreeNode *nextChild() {
467  DomTreeNode *child = *ChildIter;
468  ++ChildIter;
469  return child;
470  }
471 
472  DomTreeNode::iterator end() { return EndIter; }
473  bool isProcessed() { return Processed; }
474  void process() { Processed = true; }
475 
476  private:
477  unsigned CurrentGeneration;
478  unsigned ChildGeneration;
479  DomTreeNode *Node;
480  DomTreeNode::iterator ChildIter;
481  DomTreeNode::iterator EndIter;
482  NodeScope Scopes;
483  bool Processed = false;
484  };
485 
486  /// Wrapper class to handle memory instructions, including loads,
487  /// stores and intrinsic loads and stores defined by the target.
488  class ParseMemoryInst {
489  public:
490  ParseMemoryInst(Instruction *Inst, const TargetTransformInfo &TTI)
491  : Inst(Inst) {
492  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
493  if (TTI.getTgtMemIntrinsic(II, Info))
494  IsTargetMemInst = true;
495  }
496 
497  bool isLoad() const {
498  if (IsTargetMemInst) return Info.ReadMem;
499  return isa<LoadInst>(Inst);
500  }
501 
502  bool isStore() const {
503  if (IsTargetMemInst) return Info.WriteMem;
504  return isa<StoreInst>(Inst);
505  }
506 
507  bool isAtomic() const {
508  if (IsTargetMemInst)
509  return Info.Ordering != AtomicOrdering::NotAtomic;
510  return Inst->isAtomic();
511  }
512 
513  bool isUnordered() const {
514  if (IsTargetMemInst)
515  return Info.isUnordered();
516 
517  if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
518  return LI->isUnordered();
519  } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
520  return SI->isUnordered();
521  }
522  // Conservative answer
523  return !Inst->isAtomic();
524  }
525 
526  bool isVolatile() const {
527  if (IsTargetMemInst)
528  return Info.IsVolatile;
529 
530  if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
531  return LI->isVolatile();
532  } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
533  return SI->isVolatile();
534  }
535  // Conservative answer
536  return true;
537  }
538 
539  bool isInvariantLoad() const {
540  if (auto *LI = dyn_cast<LoadInst>(Inst))
541  return LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr;
542  return false;
543  }
544 
545  bool isMatchingMemLoc(const ParseMemoryInst &Inst) const {
546  return (getPointerOperand() == Inst.getPointerOperand() &&
547  getMatchingId() == Inst.getMatchingId());
548  }
549 
550  bool isValid() const { return getPointerOperand() != nullptr; }
551 
552  // For regular (non-intrinsic) loads/stores, this is set to -1. For
553  // intrinsic loads/stores, the id is retrieved from the corresponding
554  // field in the MemIntrinsicInfo structure. That field contains
555  // non-negative values only.
556  int getMatchingId() const {
557  if (IsTargetMemInst) return Info.MatchingId;
558  return -1;
559  }
560 
561  Value *getPointerOperand() const {
562  if (IsTargetMemInst) return Info.PtrVal;
563  return getLoadStorePointerOperand(Inst);
564  }
565 
566  bool mayReadFromMemory() const {
567  if (IsTargetMemInst) return Info.ReadMem;
568  return Inst->mayReadFromMemory();
569  }
570 
571  bool mayWriteToMemory() const {
572  if (IsTargetMemInst) return Info.WriteMem;
573  return Inst->mayWriteToMemory();
574  }
575 
576  private:
577  bool IsTargetMemInst = false;
578  MemIntrinsicInfo Info;
579  Instruction *Inst;
580  };
581 
582  bool processNode(DomTreeNode *Node);
583 
584  bool handleBranchCondition(Instruction *CondInst, const BranchInst *BI,
585  const BasicBlock *BB, const BasicBlock *Pred);
586 
587  Value *getOrCreateResult(Value *Inst, Type *ExpectedType) const {
588  if (auto *LI = dyn_cast<LoadInst>(Inst))
589  return LI;
590  if (auto *SI = dyn_cast<StoreInst>(Inst))
591  return SI->getValueOperand();
592  assert(isa<IntrinsicInst>(Inst) && "Instruction not supported");
593  return TTI.getOrCreateResultFromMemIntrinsic(cast<IntrinsicInst>(Inst),
594  ExpectedType);
595  }
596 
597  /// Return true if the instruction is known to only operate on memory
598  /// provably invariant in the given "generation".
599  bool isOperatingOnInvariantMemAt(Instruction *I, unsigned GenAt);
600 
601  bool isSameMemGeneration(unsigned EarlierGeneration, unsigned LaterGeneration,
602  Instruction *EarlierInst, Instruction *LaterInst);
603 
604  void removeMSSA(Instruction *Inst) {
605  if (!MSSA)
606  return;
607  if (VerifyMemorySSA)
608  MSSA->verifyMemorySSA();
609  // Removing a store here can leave MemorySSA in an unoptimized state by
610  // creating MemoryPhis that have identical arguments and by creating
611  // MemoryUses whose defining access is not an actual clobber. We handle the
612  // phi case eagerly here. The non-optimized MemoryUse case is lazily
613  // updated by MemorySSA getClobberingMemoryAccess.
614  if (MemoryAccess *MA = MSSA->getMemoryAccess(Inst)) {
615  // Optimize MemoryPhi nodes that may become redundant by having all the
616  // same input values once MA is removed.
617  SmallSetVector<MemoryPhi *, 4> PhisToCheck;
619  WorkQueue.push_back(MA);
620  // Process MemoryPhi nodes in FIFO order using a ever-growing vector since
621  // we shouldn't be processing that many phis and this will avoid an
622  // allocation in almost all cases.
623  for (unsigned I = 0; I < WorkQueue.size(); ++I) {
624  MemoryAccess *WI = WorkQueue[I];
625 
626  for (auto *U : WI->users())
627  if (MemoryPhi *MP = dyn_cast<MemoryPhi>(U))
628  PhisToCheck.insert(MP);
629 
630  MSSAUpdater->removeMemoryAccess(WI);
631 
632  for (MemoryPhi *MP : PhisToCheck) {
633  MemoryAccess *FirstIn = MP->getIncomingValue(0);
634  if (llvm::all_of(MP->incoming_values(),
635  [=](Use &In) { return In == FirstIn; }))
636  WorkQueue.push_back(MP);
637  }
638  PhisToCheck.clear();
639  }
640  }
641  }
642 };
643 
644 } // end anonymous namespace
645 
646 /// Determine if the memory referenced by LaterInst is from the same heap
647 /// version as EarlierInst.
648 /// This is currently called in two scenarios:
649 ///
650 /// load p
651 /// ...
652 /// load p
653 ///
654 /// and
655 ///
656 /// x = load p
657 /// ...
658 /// store x, p
659 ///
660 /// in both cases we want to verify that there are no possible writes to the
661 /// memory referenced by p between the earlier and later instruction.
662 bool EarlyCSE::isSameMemGeneration(unsigned EarlierGeneration,
663  unsigned LaterGeneration,
664  Instruction *EarlierInst,
665  Instruction *LaterInst) {
666  // Check the simple memory generation tracking first.
667  if (EarlierGeneration == LaterGeneration)
668  return true;
669 
670  if (!MSSA)
671  return false;
672 
673  // If MemorySSA has determined that one of EarlierInst or LaterInst does not
674  // read/write memory, then we can safely return true here.
675  // FIXME: We could be more aggressive when checking doesNotAccessMemory(),
676  // onlyReadsMemory(), mayReadFromMemory(), and mayWriteToMemory() in this pass
677  // by also checking the MemorySSA MemoryAccess on the instruction. Initial
678  // experiments suggest this isn't worthwhile, at least for C/C++ code compiled
679  // with the default optimization pipeline.
680  auto *EarlierMA = MSSA->getMemoryAccess(EarlierInst);
681  if (!EarlierMA)
682  return true;
683  auto *LaterMA = MSSA->getMemoryAccess(LaterInst);
684  if (!LaterMA)
685  return true;
686 
687  // Since we know LaterDef dominates LaterInst and EarlierInst dominates
688  // LaterInst, if LaterDef dominates EarlierInst then it can't occur between
689  // EarlierInst and LaterInst and neither can any other write that potentially
690  // clobbers LaterInst.
691  MemoryAccess *LaterDef =
692  MSSA->getWalker()->getClobberingMemoryAccess(LaterInst);
693  return MSSA->dominates(LaterDef, EarlierMA);
694 }
695 
696 bool EarlyCSE::isOperatingOnInvariantMemAt(Instruction *I, unsigned GenAt) {
697  // A location loaded from with an invariant_load is assumed to *never* change
698  // within the visible scope of the compilation.
699  if (auto *LI = dyn_cast<LoadInst>(I))
700  if (LI->getMetadata(LLVMContext::MD_invariant_load))
701  return true;
702 
703  auto MemLocOpt = MemoryLocation::getOrNone(I);
704  if (!MemLocOpt)
705  // "target" intrinsic forms of loads aren't currently known to
706  // MemoryLocation::get. TODO
707  return false;
708  MemoryLocation MemLoc = *MemLocOpt;
709  if (!AvailableInvariants.count(MemLoc))
710  return false;
711 
712  // Is the generation at which this became invariant older than the
713  // current one?
714  return AvailableInvariants.lookup(MemLoc) <= GenAt;
715 }
716 
717 bool EarlyCSE::handleBranchCondition(Instruction *CondInst,
718  const BranchInst *BI, const BasicBlock *BB,
719  const BasicBlock *Pred) {
720  assert(BI->isConditional() && "Should be a conditional branch!");
721  assert(BI->getCondition() == CondInst && "Wrong condition?");
722  assert(BI->getSuccessor(0) == BB || BI->getSuccessor(1) == BB);
723  auto *TorF = (BI->getSuccessor(0) == BB)
726  auto MatchBinOp = [](Instruction *I, unsigned Opcode) {
727  if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(I))
728  return BOp->getOpcode() == Opcode;
729  return false;
730  };
731  // If the condition is AND operation, we can propagate its operands into the
732  // true branch. If it is OR operation, we can propagate them into the false
733  // branch.
734  unsigned PropagateOpcode =
735  (BI->getSuccessor(0) == BB) ? Instruction::And : Instruction::Or;
736 
737  bool MadeChanges = false;
740  WorkList.push_back(CondInst);
741  while (!WorkList.empty()) {
742  Instruction *Curr = WorkList.pop_back_val();
743 
744  AvailableValues.insert(Curr, TorF);
745  LLVM_DEBUG(dbgs() << "EarlyCSE CVP: Add conditional value for '"
746  << Curr->getName() << "' as " << *TorF << " in "
747  << BB->getName() << "\n");
748  if (!DebugCounter::shouldExecute(CSECounter)) {
749  LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
750  } else {
751  // Replace all dominated uses with the known value.
752  if (unsigned Count = replaceDominatedUsesWith(Curr, TorF, DT,
753  BasicBlockEdge(Pred, BB))) {
754  NumCSECVP += Count;
755  MadeChanges = true;
756  }
757  }
758 
759  if (MatchBinOp(Curr, PropagateOpcode))
760  for (auto &Op : cast<BinaryOperator>(Curr)->operands())
761  if (Instruction *OPI = dyn_cast<Instruction>(Op))
762  if (SimpleValue::canHandle(OPI) && Visited.insert(OPI).second)
763  WorkList.push_back(OPI);
764  }
765 
766  return MadeChanges;
767 }
768 
769 bool EarlyCSE::processNode(DomTreeNode *Node) {
770  bool Changed = false;
771  BasicBlock *BB = Node->getBlock();
772 
773  // If this block has a single predecessor, then the predecessor is the parent
774  // of the domtree node and all of the live out memory values are still current
775  // in this block. If this block has multiple predecessors, then they could
776  // have invalidated the live-out memory values of our parent value. For now,
777  // just be conservative and invalidate memory if this block has multiple
778  // predecessors.
779  if (!BB->getSinglePredecessor())
780  ++CurrentGeneration;
781 
782  // If this node has a single predecessor which ends in a conditional branch,
783  // we can infer the value of the branch condition given that we took this
784  // path. We need the single predecessor to ensure there's not another path
785  // which reaches this block where the condition might hold a different
786  // value. Since we're adding this to the scoped hash table (like any other
787  // def), it will have been popped if we encounter a future merge block.
788  if (BasicBlock *Pred = BB->getSinglePredecessor()) {
789  auto *BI = dyn_cast<BranchInst>(Pred->getTerminator());
790  if (BI && BI->isConditional()) {
791  auto *CondInst = dyn_cast<Instruction>(BI->getCondition());
792  if (CondInst && SimpleValue::canHandle(CondInst))
793  Changed |= handleBranchCondition(CondInst, BI, BB, Pred);
794  }
795  }
796 
797  /// LastStore - Keep track of the last non-volatile store that we saw... for
798  /// as long as there in no instruction that reads memory. If we see a store
799  /// to the same location, we delete the dead store. This zaps trivial dead
800  /// stores which can occur in bitfield code among other things.
801  Instruction *LastStore = nullptr;
802 
803  // See if any instructions in the block can be eliminated. If so, do it. If
804  // not, add them to AvailableValues.
805  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
806  Instruction *Inst = &*I++;
807 
808  // Dead instructions should just be removed.
809  if (isInstructionTriviallyDead(Inst, &TLI)) {
810  LLVM_DEBUG(dbgs() << "EarlyCSE DCE: " << *Inst << '\n');
811  if (!DebugCounter::shouldExecute(CSECounter)) {
812  LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
813  continue;
814  }
815  salvageDebugInfo(*Inst);
816  removeMSSA(Inst);
817  Inst->eraseFromParent();
818  Changed = true;
819  ++NumSimplify;
820  continue;
821  }
822 
823  // Skip assume intrinsics, they don't really have side effects (although
824  // they're marked as such to ensure preservation of control dependencies),
825  // and this pass will not bother with its removal. However, we should mark
826  // its condition as true for all dominated blocks.
827  if (match(Inst, m_Intrinsic<Intrinsic::assume>())) {
828  auto *CondI =
829  dyn_cast<Instruction>(cast<CallInst>(Inst)->getArgOperand(0));
830  if (CondI && SimpleValue::canHandle(CondI)) {
831  LLVM_DEBUG(dbgs() << "EarlyCSE considering assumption: " << *Inst
832  << '\n');
833  AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext()));
834  } else
835  LLVM_DEBUG(dbgs() << "EarlyCSE skipping assumption: " << *Inst << '\n');
836  continue;
837  }
838 
839  // Skip sideeffect intrinsics, for the same reason as assume intrinsics.
840  if (match(Inst, m_Intrinsic<Intrinsic::sideeffect>())) {
841  LLVM_DEBUG(dbgs() << "EarlyCSE skipping sideeffect: " << *Inst << '\n');
842  continue;
843  }
844 
845  // We can skip all invariant.start intrinsics since they only read memory,
846  // and we can forward values across it. For invariant starts without
847  // invariant ends, we can use the fact that the invariantness never ends to
848  // start a scope in the current generaton which is true for all future
849  // generations. Also, we dont need to consume the last store since the
850  // semantics of invariant.start allow us to perform DSE of the last
851  // store, if there was a store following invariant.start. Consider:
852  //
853  // store 30, i8* p
854  // invariant.start(p)
855  // store 40, i8* p
856  // We can DSE the store to 30, since the store 40 to invariant location p
857  // causes undefined behaviour.
858  if (match(Inst, m_Intrinsic<Intrinsic::invariant_start>())) {
859  // If there are any uses, the scope might end.
860  if (!Inst->use_empty())
861  continue;
862  auto *CI = cast<CallInst>(Inst);
863  MemoryLocation MemLoc = MemoryLocation::getForArgument(CI, 1, TLI);
864  // Don't start a scope if we already have a better one pushed
865  if (!AvailableInvariants.count(MemLoc))
866  AvailableInvariants.insert(MemLoc, CurrentGeneration);
867  continue;
868  }
869 
870  if (isGuard(Inst)) {
871  if (auto *CondI =
872  dyn_cast<Instruction>(cast<CallInst>(Inst)->getArgOperand(0))) {
873  if (SimpleValue::canHandle(CondI)) {
874  // Do we already know the actual value of this condition?
875  if (auto *KnownCond = AvailableValues.lookup(CondI)) {
876  // Is the condition known to be true?
877  if (isa<ConstantInt>(KnownCond) &&
878  cast<ConstantInt>(KnownCond)->isOne()) {
879  LLVM_DEBUG(dbgs()
880  << "EarlyCSE removing guard: " << *Inst << '\n');
881  removeMSSA(Inst);
882  Inst->eraseFromParent();
883  Changed = true;
884  continue;
885  } else
886  // Use the known value if it wasn't true.
887  cast<CallInst>(Inst)->setArgOperand(0, KnownCond);
888  }
889  // The condition we're on guarding here is true for all dominated
890  // locations.
891  AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext()));
892  }
893  }
894 
895  // Guard intrinsics read all memory, but don't write any memory.
896  // Accordingly, don't update the generation but consume the last store (to
897  // avoid an incorrect DSE).
898  LastStore = nullptr;
899  continue;
900  }
901 
902  // If the instruction can be simplified (e.g. X+0 = X) then replace it with
903  // its simpler value.
904  if (Value *V = SimplifyInstruction(Inst, SQ)) {
905  LLVM_DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << " to: " << *V
906  << '\n');
907  if (!DebugCounter::shouldExecute(CSECounter)) {
908  LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
909  } else {
910  bool Killed = false;
911  if (!Inst->use_empty()) {
912  Inst->replaceAllUsesWith(V);
913  Changed = true;
914  }
915  if (isInstructionTriviallyDead(Inst, &TLI)) {
916  removeMSSA(Inst);
917  Inst->eraseFromParent();
918  Changed = true;
919  Killed = true;
920  }
921  if (Changed)
922  ++NumSimplify;
923  if (Killed)
924  continue;
925  }
926  }
927 
928  // If this is a simple instruction that we can value number, process it.
929  if (SimpleValue::canHandle(Inst)) {
930  // See if the instruction has an available value. If so, use it.
931  if (Value *V = AvailableValues.lookup(Inst)) {
932  LLVM_DEBUG(dbgs() << "EarlyCSE CSE: " << *Inst << " to: " << *V
933  << '\n');
934  if (!DebugCounter::shouldExecute(CSECounter)) {
935  LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
936  continue;
937  }
938  if (auto *I = dyn_cast<Instruction>(V))
939  I->andIRFlags(Inst);
940  Inst->replaceAllUsesWith(V);
941  removeMSSA(Inst);
942  Inst->eraseFromParent();
943  Changed = true;
944  ++NumCSE;
945  continue;
946  }
947 
948  // Otherwise, just remember that this value is available.
949  AvailableValues.insert(Inst, Inst);
950  continue;
951  }
952 
953  ParseMemoryInst MemInst(Inst, TTI);
954  // If this is a non-volatile load, process it.
955  if (MemInst.isValid() && MemInst.isLoad()) {
956  // (conservatively) we can't peak past the ordering implied by this
957  // operation, but we can add this load to our set of available values
958  if (MemInst.isVolatile() || !MemInst.isUnordered()) {
959  LastStore = nullptr;
960  ++CurrentGeneration;
961  }
962 
963  if (MemInst.isInvariantLoad()) {
964  // If we pass an invariant load, we know that memory location is
965  // indefinitely constant from the moment of first dereferenceability.
966  // We conservatively treat the invariant_load as that moment. If we
967  // pass a invariant load after already establishing a scope, don't
968  // restart it since we want to preserve the earliest point seen.
969  auto MemLoc = MemoryLocation::get(Inst);
970  if (!AvailableInvariants.count(MemLoc))
971  AvailableInvariants.insert(MemLoc, CurrentGeneration);
972  }
973 
974  // If we have an available version of this load, and if it is the right
975  // generation or the load is known to be from an invariant location,
976  // replace this instruction.
977  //
978  // If either the dominating load or the current load are invariant, then
979  // we can assume the current load loads the same value as the dominating
980  // load.
981  LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
982  if (InVal.DefInst != nullptr &&
983  InVal.MatchingId == MemInst.getMatchingId() &&
984  // We don't yet handle removing loads with ordering of any kind.
985  !MemInst.isVolatile() && MemInst.isUnordered() &&
986  // We can't replace an atomic load with one which isn't also atomic.
987  InVal.IsAtomic >= MemInst.isAtomic() &&
988  (isOperatingOnInvariantMemAt(Inst, InVal.Generation) ||
989  isSameMemGeneration(InVal.Generation, CurrentGeneration,
990  InVal.DefInst, Inst))) {
991  Value *Op = getOrCreateResult(InVal.DefInst, Inst->getType());
992  if (Op != nullptr) {
993  LLVM_DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst
994  << " to: " << *InVal.DefInst << '\n');
995  if (!DebugCounter::shouldExecute(CSECounter)) {
996  LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
997  continue;
998  }
999  if (!Inst->use_empty())
1000  Inst->replaceAllUsesWith(Op);
1001  removeMSSA(Inst);
1002  Inst->eraseFromParent();
1003  Changed = true;
1004  ++NumCSELoad;
1005  continue;
1006  }
1007  }
1008 
1009  // Otherwise, remember that we have this instruction.
1010  AvailableLoads.insert(
1011  MemInst.getPointerOperand(),
1012  LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(),
1013  MemInst.isAtomic()));
1014  LastStore = nullptr;
1015  continue;
1016  }
1017 
1018  // If this instruction may read from memory or throw (and potentially read
1019  // from memory in the exception handler), forget LastStore. Load/store
1020  // intrinsics will indicate both a read and a write to memory. The target
1021  // may override this (e.g. so that a store intrinsic does not read from
1022  // memory, and thus will be treated the same as a regular store for
1023  // commoning purposes).
1024  if ((Inst->mayReadFromMemory() || Inst->mayThrow()) &&
1025  !(MemInst.isValid() && !MemInst.mayReadFromMemory()))
1026  LastStore = nullptr;
1027 
1028  // If this is a read-only call, process it.
1029  if (CallValue::canHandle(Inst)) {
1030  // If we have an available version of this call, and if it is the right
1031  // generation, replace this instruction.
1032  std::pair<Instruction *, unsigned> InVal = AvailableCalls.lookup(Inst);
1033  if (InVal.first != nullptr &&
1034  isSameMemGeneration(InVal.second, CurrentGeneration, InVal.first,
1035  Inst)) {
1036  LLVM_DEBUG(dbgs() << "EarlyCSE CSE CALL: " << *Inst
1037  << " to: " << *InVal.first << '\n');
1038  if (!DebugCounter::shouldExecute(CSECounter)) {
1039  LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1040  continue;
1041  }
1042  if (!Inst->use_empty())
1043  Inst->replaceAllUsesWith(InVal.first);
1044  removeMSSA(Inst);
1045  Inst->eraseFromParent();
1046  Changed = true;
1047  ++NumCSECall;
1048  continue;
1049  }
1050 
1051  // Otherwise, remember that we have this instruction.
1052  AvailableCalls.insert(
1053  Inst, std::pair<Instruction *, unsigned>(Inst, CurrentGeneration));
1054  continue;
1055  }
1056 
1057  // A release fence requires that all stores complete before it, but does
1058  // not prevent the reordering of following loads 'before' the fence. As a
1059  // result, we don't need to consider it as writing to memory and don't need
1060  // to advance the generation. We do need to prevent DSE across the fence,
1061  // but that's handled above.
1062  if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
1063  if (FI->getOrdering() == AtomicOrdering::Release) {
1064  assert(Inst->mayReadFromMemory() && "relied on to prevent DSE above");
1065  continue;
1066  }
1067 
1068  // write back DSE - If we write back the same value we just loaded from
1069  // the same location and haven't passed any intervening writes or ordering
1070  // operations, we can remove the write. The primary benefit is in allowing
1071  // the available load table to remain valid and value forward past where
1072  // the store originally was.
1073  if (MemInst.isValid() && MemInst.isStore()) {
1074  LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
1075  if (InVal.DefInst &&
1076  InVal.DefInst == getOrCreateResult(Inst, InVal.DefInst->getType()) &&
1077  InVal.MatchingId == MemInst.getMatchingId() &&
1078  // We don't yet handle removing stores with ordering of any kind.
1079  !MemInst.isVolatile() && MemInst.isUnordered() &&
1080  (isOperatingOnInvariantMemAt(Inst, InVal.Generation) ||
1081  isSameMemGeneration(InVal.Generation, CurrentGeneration,
1082  InVal.DefInst, Inst))) {
1083  // It is okay to have a LastStore to a different pointer here if MemorySSA
1084  // tells us that the load and store are from the same memory generation.
1085  // In that case, LastStore should keep its present value since we're
1086  // removing the current store.
1087  assert((!LastStore ||
1088  ParseMemoryInst(LastStore, TTI).getPointerOperand() ==
1089  MemInst.getPointerOperand() ||
1090  MSSA) &&
1091  "can't have an intervening store if not using MemorySSA!");
1092  LLVM_DEBUG(dbgs() << "EarlyCSE DSE (writeback): " << *Inst << '\n');
1093  if (!DebugCounter::shouldExecute(CSECounter)) {
1094  LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1095  continue;
1096  }
1097  removeMSSA(Inst);
1098  Inst->eraseFromParent();
1099  Changed = true;
1100  ++NumDSE;
1101  // We can avoid incrementing the generation count since we were able
1102  // to eliminate this store.
1103  continue;
1104  }
1105  }
1106 
1107  // Okay, this isn't something we can CSE at all. Check to see if it is
1108  // something that could modify memory. If so, our available memory values
1109  // cannot be used so bump the generation count.
1110  if (Inst->mayWriteToMemory()) {
1111  ++CurrentGeneration;
1112 
1113  if (MemInst.isValid() && MemInst.isStore()) {
1114  // We do a trivial form of DSE if there are two stores to the same
1115  // location with no intervening loads. Delete the earlier store.
1116  // At the moment, we don't remove ordered stores, but do remove
1117  // unordered atomic stores. There's no special requirement (for
1118  // unordered atomics) about removing atomic stores only in favor of
1119  // other atomic stores since we we're going to execute the non-atomic
1120  // one anyway and the atomic one might never have become visible.
1121  if (LastStore) {
1122  ParseMemoryInst LastStoreMemInst(LastStore, TTI);
1123  assert(LastStoreMemInst.isUnordered() &&
1124  !LastStoreMemInst.isVolatile() &&
1125  "Violated invariant");
1126  if (LastStoreMemInst.isMatchingMemLoc(MemInst)) {
1127  LLVM_DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore
1128  << " due to: " << *Inst << '\n');
1129  if (!DebugCounter::shouldExecute(CSECounter)) {
1130  LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1131  } else {
1132  removeMSSA(LastStore);
1133  LastStore->eraseFromParent();
1134  Changed = true;
1135  ++NumDSE;
1136  LastStore = nullptr;
1137  }
1138  }
1139  // fallthrough - we can exploit information about this store
1140  }
1141 
1142  // Okay, we just invalidated anything we knew about loaded values. Try
1143  // to salvage *something* by remembering that the stored value is a live
1144  // version of the pointer. It is safe to forward from volatile stores
1145  // to non-volatile loads, so we don't have to check for volatility of
1146  // the store.
1147  AvailableLoads.insert(
1148  MemInst.getPointerOperand(),
1149  LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(),
1150  MemInst.isAtomic()));
1151 
1152  // Remember that this was the last unordered store we saw for DSE. We
1153  // don't yet handle DSE on ordered or volatile stores since we don't
1154  // have a good way to model the ordering requirement for following
1155  // passes once the store is removed. We could insert a fence, but
1156  // since fences are slightly stronger than stores in their ordering,
1157  // it's not clear this is a profitable transform. Another option would
1158  // be to merge the ordering with that of the post dominating store.
1159  if (MemInst.isUnordered() && !MemInst.isVolatile())
1160  LastStore = Inst;
1161  else
1162  LastStore = nullptr;
1163  }
1164  }
1165  }
1166 
1167  return Changed;
1168 }
1169 
1170 bool EarlyCSE::run() {
1171  // Note, deque is being used here because there is significant performance
1172  // gains over vector when the container becomes very large due to the
1173  // specific access patterns. For more information see the mailing list
1174  // discussion on this:
1175  // http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20120116/135228.html
1176  std::deque<StackNode *> nodesToProcess;
1177 
1178  bool Changed = false;
1179 
1180  // Process the root node.
1181  nodesToProcess.push_back(new StackNode(
1182  AvailableValues, AvailableLoads, AvailableInvariants, AvailableCalls,
1183  CurrentGeneration, DT.getRootNode(),
1184  DT.getRootNode()->begin(), DT.getRootNode()->end()));
1185 
1186  // Save the current generation.
1187  unsigned LiveOutGeneration = CurrentGeneration;
1188 
1189  // Process the stack.
1190  while (!nodesToProcess.empty()) {
1191  // Grab the first item off the stack. Set the current generation, remove
1192  // the node from the stack, and process it.
1193  StackNode *NodeToProcess = nodesToProcess.back();
1194 
1195  // Initialize class members.
1196  CurrentGeneration = NodeToProcess->currentGeneration();
1197 
1198  // Check if the node needs to be processed.
1199  if (!NodeToProcess->isProcessed()) {
1200  // Process the node.
1201  Changed |= processNode(NodeToProcess->node());
1202  NodeToProcess->childGeneration(CurrentGeneration);
1203  NodeToProcess->process();
1204  } else if (NodeToProcess->childIter() != NodeToProcess->end()) {
1205  // Push the next child onto the stack.
1206  DomTreeNode *child = NodeToProcess->nextChild();
1207  nodesToProcess.push_back(
1208  new StackNode(AvailableValues, AvailableLoads, AvailableInvariants,
1209  AvailableCalls, NodeToProcess->childGeneration(),
1210  child, child->begin(), child->end()));
1211  } else {
1212  // It has been processed, and there are no more children to process,
1213  // so delete it and pop it off the stack.
1214  delete NodeToProcess;
1215  nodesToProcess.pop_back();
1216  }
1217  } // while (!nodes...)
1218 
1219  // Reset the current generation.
1220  CurrentGeneration = LiveOutGeneration;
1221 
1222  return Changed;
1223 }
1224 
1227  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1228  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1229  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1230  auto &AC = AM.getResult<AssumptionAnalysis>(F);
1231  auto *MSSA =
1232  UseMemorySSA ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA() : nullptr;
1233 
1234  EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA);
1235 
1236  if (!CSE.run())
1237  return PreservedAnalyses::all();
1238 
1239  PreservedAnalyses PA;
1240  PA.preserveSet<CFGAnalyses>();
1241  PA.preserve<GlobalsAA>();
1242  if (UseMemorySSA)
1244  return PA;
1245 }
1246 
1247 namespace {
1248 
1249 /// A simple and fast domtree-based CSE pass.
1250 ///
1251 /// This pass does a simple depth-first walk over the dominator tree,
1252 /// eliminating trivially redundant instructions and using instsimplify to
1253 /// canonicalize things as it goes. It is intended to be fast and catch obvious
1254 /// cases so that instcombine and other passes are more effective. It is
1255 /// expected that a later pass of GVN will catch the interesting/hard cases.
1256 template<bool UseMemorySSA>
1257 class EarlyCSELegacyCommonPass : public FunctionPass {
1258 public:
1259  static char ID;
1260 
1261  EarlyCSELegacyCommonPass() : FunctionPass(ID) {
1262  if (UseMemorySSA)
1264  else
1266  }
1267 
1268  bool runOnFunction(Function &F) override {
1269  if (skipFunction(F))
1270  return false;
1271 
1272  auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
1273  auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1274  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1275  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1276  auto *MSSA =
1277  UseMemorySSA ? &getAnalysis<MemorySSAWrapperPass>().getMSSA() : nullptr;
1278 
1279  EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA);
1280 
1281  return CSE.run();
1282  }
1283 
1284  void getAnalysisUsage(AnalysisUsage &AU) const override {
1289  if (UseMemorySSA) {
1292  }
1294  AU.setPreservesCFG();
1295  }
1296 };
1297 
1298 } // end anonymous namespace
1299 
1300 using EarlyCSELegacyPass = EarlyCSELegacyCommonPass</*UseMemorySSA=*/false>;
1301 
1302 template<>
1303 char EarlyCSELegacyPass::ID = 0;
1304 
1305 INITIALIZE_PASS_BEGIN(EarlyCSELegacyPass, "early-cse", "Early CSE", false,
1306  false)
1311 INITIALIZE_PASS_END(EarlyCSELegacyPass, "early-cse", "Early CSE", false, false)
1312 
1313 using EarlyCSEMemSSALegacyPass =
1314  EarlyCSELegacyCommonPass</*UseMemorySSA=*/true>;
1315 
1316 template<>
1317 char EarlyCSEMemSSALegacyPass::ID = 0;
1318 
1319 FunctionPass *llvm::createEarlyCSEPass(bool UseMemorySSA) {
1320  if (UseMemorySSA)
1321  return new EarlyCSEMemSSALegacyPass();
1322  else
1323  return new EarlyCSELegacyPass();
1324 }
1325 
1326 INITIALIZE_PASS_BEGIN(EarlyCSEMemSSALegacyPass, "early-cse-memssa",
1327  "Early CSE w/ MemorySSA", false, false)
1333 INITIALIZE_PASS_END(EarlyCSEMemSSALegacyPass, "early-cse-memssa",
1334  "Early CSE w/ MemorySSA", false, false)
Legacy wrapper pass to provide the GlobalsAAResult object.
void initializeEarlyCSELegacyPassPass(PassRegistry &)
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:68
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:259
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:584
static SimpleValue getTombstoneKey()
Definition: EarlyCSE.cpp:119
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:675
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
This instruction extracts a struct member or array element value from an aggregate value...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Value * getPointerOperand(Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
Unsigned minimum.
Atomic ordering constants.
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:84
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:770
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
bool isAtomic() const
Return true if this instruction has an AtomicOrdering of unordered or higher.
This is the interface for a simple mod/ref and alias analysis over globals.
An instruction for ordering other memory operations.
Definition: Instructions.h:444
value_op_iterator value_op_begin()
Definition: User.h:256
This class represents a function call, abstracting a target machine&#39;s calling convention.
An immutable pass that tracks lazily created AssumptionCache objects.
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
A cache of @llvm.assume calls within a function.
Analysis pass providing the TargetTransformInfo.
bool salvageDebugInfo(Instruction &I)
Assuming the instruction I is going to be deleted, attempt to salvage debug users of I by writing the...
Definition: Local.cpp:1609
static CallValue getTombstoneKey()
Definition: EarlyCSE.cpp:291
value_op_iterator value_op_end()
Definition: User.h:259
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1042
BasicBlock * getSuccessor(unsigned i) const
STATISTIC(NumFunctions, "Total number of functions")
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:231
F(f)
block Block Frequency true
An instruction for reading from memory.
Definition: Instructions.h:168
Value * getCondition() const
This defines the Use class.
static Optional< MemoryLocation > getOrNone(const Instruction *Inst)
unsigned replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT, const BasicBlockEdge &Edge)
Replace each use of &#39;From&#39; with &#39;To&#39; if that use is dominated by the given edge.
Definition: Local.cpp:2437
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:33
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
Definition: EarlyCSE.cpp:1225
This file defines the MallocAllocator and BumpPtrAllocator interfaces.
Signed maximum.
bool onlyReadsMemory() const
Determine if the call does not access or only reads memory.
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:264
bool isIdenticalTo(const Instruction *I) const
Return true if the specified instruction is exactly identical to the current one. ...
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:950
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:364
Absolute value.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:392
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
separate const offset from Split GEPs to a variadic base and a constant offset for better CSE
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:700
static bool isLoad(int Opcode)
static CallValue getEmptyKey()
Definition: EarlyCSE.cpp:287
RecyclingAllocator - This class wraps an Allocator, adding the functionality of recycling deleted obj...
This file provides an implementation of debug counters.
static void cse(BasicBlock *BB)
Perform cse of induction variable instructions.
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:142
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref&#39;ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:711
static bool isEqual(const Function &Caller, const Function &Callee)
This file provides the interface for a simple, fast CSE pass.
early cse memssa
Definition: EarlyCSE.cpp:1333
static MemoryLocation getForArgument(ImmutableCallSite CS, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
void andIRFlags(const Value *V)
Logical &#39;and&#39; of any supported wrapping, exact, and fast-math flags of V and this instruction...
static bool isStore(int Opcode)
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:126
Value * getLoadStorePointerOperand(Value *V)
A helper function that returns the pointer operand of a load or store instruction.
An instruction for storing to memory.
Definition: Instructions.h:310
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:439
Optimize for code generation
INITIALIZE_PASS_BEGIN(EarlyCSELegacyPass, "early-cse", "Early CSE", false, false) using EarlyCSEMemSSALegacyPass
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:145
Value * getOperand(unsigned i) const
Definition: User.h:170
bool isVoidTy() const
Return true if this is &#39;void&#39;.
Definition: Type.h:141
BumpPtrAllocatorImpl BumpPtrAllocator
The standard BumpPtrAllocator which just uses the default template parameters.
Definition: Allocator.h:408
NodeT * getBlock() const
static bool runOnFunction(Function &F, bool PostInlining)
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:154
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:235
bool isIdenticalToWhenDefined(const Instruction *I) const
This is like isIdenticalTo, except that it ignores the SubclassOptionalData flags, which may specify conditions under which the instruction&#39;s result is undefined.
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
Conditional or Unconditional Branch instruction.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static SimpleValue getEmptyKey()
Definition: EarlyCSE.cpp:115
This file contains the declarations for the subclasses of Constant, which represent the different fla...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
bool mayThrow() const
Return true if this instruction may throw an exception.
Represent the analysis usage information of a pass.
Analysis pass providing a never-invalidated alias analysis result.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:685
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:72
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:160
size_t size() const
Definition: SmallVector.h:53
static bool isAtomic(Instruction *I)
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
Floating point maxnum.
Representation for a specific memory location.
A function analysis which provides an AssumptionCache.
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:298
Iterator for intrusive lists based on ilist_node.
SelectPatternFlavor Flavor
void verifyMemorySSA() const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition: MemorySSA.cpp:1701
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
iterator end()
Definition: BasicBlock.h:266
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
SelectPatternFlavor
Specific patterns of select instructions we can match.
Provides information about what library functions are available for the current target.
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:914
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:381
bool isConditional() const
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:286
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:577
bool isGuard(const User *U)
Returns true iff U has semantics of a guard.
Definition: GuardUtils.cpp:18
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:941
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition: Hashing.h:601
iterator_range< user_iterator > users()
Definition: Value.h:400
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition: Hashing.h:479
Represents analyses that only rely on functions&#39; control flow.
Definition: PassManager.h:115
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:759
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:190
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:224
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) const
#define I(x, y, z)
Definition: MD5.cpp:58
bool mayReadFromMemory() const
Return true if this instruction may read memory.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:175
DEBUG_COUNTER(CSECounter, "early-cse", "Controls which instructions are removed")
Signed minimum.
EarlyCSELegacyCommonPass< false > EarlyCSELegacyPass
Definition: EarlyCSE.cpp:1300
Analysis pass providing the TargetLibraryInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSentinel(const DWARFDebugNames::AttributeEncoding &AE)
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:566
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction has no side ef...
Definition: Local.cpp:348
LLVM Value Representation.
Definition: Value.h:73
SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp=nullptr, unsigned Depth=0)
Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind and providing the out param...
typename std::vector< DomTreeNodeBase *>::iterator iterator
void initializeEarlyCSEMemSSALegacyPassPass(PassRegistry &)
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
FunctionPass * createEarlyCSEPass(bool UseMemorySSA=false)
Definition: EarlyCSE.cpp:1319
A container for analyses that lazily runs them and caches their results.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:260
This pass exposes codegen information to IR-level passes.
static bool isVolatile(Instruction *Inst)
Represents phi nodes for memory accesses.
Definition: MemorySSA.h:478
const TerminatorInst * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:138
This header defines various interfaces for pass management in LLVM.
#define LLVM_DEBUG(X)
Definition: Debug.h:123
Value * SimplifyInstruction(Instruction *I, const SimplifyQuery &Q, OptimizationRemarkEmitter *ORE=nullptr)
See if we can compute a simplified version of this instruction.
Information about a load/store intrinsic defined by the target.
bool use_empty() const
Definition: Value.h:323
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
This instruction inserts a struct field of array element value into an aggregate value.