LLVM  9.0.0svn
EarlyCSE.cpp
Go to the documentation of this file.
1 //===- EarlyCSE.cpp - Simple and fast CSE pass ----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass performs a simple dominator tree walk that eliminates trivially
10 // redundant instructions.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/DenseMapInfo.h"
16 #include "llvm/ADT/Hashing.h"
17 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/SetVector.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
32 #include "llvm/IR/BasicBlock.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/DataLayout.h"
35 #include "llvm/IR/Dominators.h"
36 #include "llvm/IR/Function.h"
37 #include "llvm/IR/InstrTypes.h"
38 #include "llvm/IR/Instruction.h"
39 #include "llvm/IR/Instructions.h"
40 #include "llvm/IR/IntrinsicInst.h"
41 #include "llvm/IR/Intrinsics.h"
42 #include "llvm/IR/LLVMContext.h"
43 #include "llvm/IR/PassManager.h"
44 #include "llvm/IR/PatternMatch.h"
45 #include "llvm/IR/Type.h"
46 #include "llvm/IR/Use.h"
47 #include "llvm/IR/Value.h"
48 #include "llvm/Pass.h"
49 #include "llvm/Support/Allocator.h"
51 #include "llvm/Support/Casting.h"
52 #include "llvm/Support/Debug.h"
56 #include "llvm/Transforms/Scalar.h"
58 #include <cassert>
59 #include <deque>
60 #include <memory>
61 #include <utility>
62 
63 using namespace llvm;
64 using namespace llvm::PatternMatch;
65 
66 #define DEBUG_TYPE "early-cse"
67 
68 STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd");
69 STATISTIC(NumCSE, "Number of instructions CSE'd");
70 STATISTIC(NumCSECVP, "Number of compare instructions CVP'd");
71 STATISTIC(NumCSELoad, "Number of load instructions CSE'd");
72 STATISTIC(NumCSECall, "Number of call instructions CSE'd");
73 STATISTIC(NumDSE, "Number of trivial dead stores removed");
74 
75 DEBUG_COUNTER(CSECounter, "early-cse",
76  "Controls which instructions are removed");
77 
78 //===----------------------------------------------------------------------===//
79 // SimpleValue
80 //===----------------------------------------------------------------------===//
81 
82 namespace {
83 
84 /// Struct representing the available values in the scoped hash table.
85 struct SimpleValue {
86  Instruction *Inst;
87 
88  SimpleValue(Instruction *I) : Inst(I) {
89  assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
90  }
91 
92  bool isSentinel() const {
95  }
96 
97  static bool canHandle(Instruction *Inst) {
98  // This can only handle non-void readnone functions.
99  if (CallInst *CI = dyn_cast<CallInst>(Inst))
100  return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy();
101  return isa<CastInst>(Inst) || isa<BinaryOperator>(Inst) ||
102  isa<GetElementPtrInst>(Inst) || isa<CmpInst>(Inst) ||
103  isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) ||
104  isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) ||
105  isa<ExtractValueInst>(Inst) || isa<InsertValueInst>(Inst);
106  }
107 };
108 
109 } // end anonymous namespace
110 
111 namespace llvm {
112 
113 template <> struct DenseMapInfo<SimpleValue> {
114  static inline SimpleValue getEmptyKey() {
116  }
117 
118  static inline SimpleValue getTombstoneKey() {
120  }
121 
122  static unsigned getHashValue(SimpleValue Val);
123  static bool isEqual(SimpleValue LHS, SimpleValue RHS);
124 };
125 
126 } // end namespace llvm
127 
128 unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
129  Instruction *Inst = Val.Inst;
130  // Hash in all of the operands as pointers.
131  if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst)) {
132  Value *LHS = BinOp->getOperand(0);
133  Value *RHS = BinOp->getOperand(1);
134  if (BinOp->isCommutative() && BinOp->getOperand(0) > BinOp->getOperand(1))
135  std::swap(LHS, RHS);
136 
137  return hash_combine(BinOp->getOpcode(), LHS, RHS);
138  }
139 
140  if (CmpInst *CI = dyn_cast<CmpInst>(Inst)) {
141  Value *LHS = CI->getOperand(0);
142  Value *RHS = CI->getOperand(1);
143  CmpInst::Predicate Pred = CI->getPredicate();
144  if (Inst->getOperand(0) > Inst->getOperand(1)) {
145  std::swap(LHS, RHS);
146  Pred = CI->getSwappedPredicate();
147  }
148  return hash_combine(Inst->getOpcode(), Pred, LHS, RHS);
149  }
150 
151  // Hash min/max/abs (cmp + select) to allow for commuted operands.
152  // Min/max may also have non-canonical compare predicate (eg, the compare for
153  // smin may use 'sgt' rather than 'slt'), and non-canonical operands in the
154  // compare.
155  Value *A, *B;
157  // TODO: We should also detect FP min/max.
158  if (SPF == SPF_SMIN || SPF == SPF_SMAX ||
159  SPF == SPF_UMIN || SPF == SPF_UMAX) {
160  if (A > B)
161  std::swap(A, B);
162  return hash_combine(Inst->getOpcode(), SPF, A, B);
163  }
164  if (SPF == SPF_ABS || SPF == SPF_NABS) {
165  // ABS/NABS always puts the input in A and its negation in B.
166  return hash_combine(Inst->getOpcode(), SPF, A, B);
167  }
168 
169  if (CastInst *CI = dyn_cast<CastInst>(Inst))
170  return hash_combine(CI->getOpcode(), CI->getType(), CI->getOperand(0));
171 
172  if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Inst))
173  return hash_combine(EVI->getOpcode(), EVI->getOperand(0),
174  hash_combine_range(EVI->idx_begin(), EVI->idx_end()));
175 
176  if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(Inst))
177  return hash_combine(IVI->getOpcode(), IVI->getOperand(0),
178  IVI->getOperand(1),
179  hash_combine_range(IVI->idx_begin(), IVI->idx_end()));
180 
181  assert((isa<CallInst>(Inst) || isa<BinaryOperator>(Inst) ||
182  isa<GetElementPtrInst>(Inst) || isa<SelectInst>(Inst) ||
183  isa<ExtractElementInst>(Inst) || isa<InsertElementInst>(Inst) ||
184  isa<ShuffleVectorInst>(Inst)) &&
185  "Invalid/unknown instruction");
186 
187  // Mix in the opcode.
188  return hash_combine(
189  Inst->getOpcode(),
191 }
192 
193 bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
194  Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
195 
196  if (LHS.isSentinel() || RHS.isSentinel())
197  return LHSI == RHSI;
198 
199  if (LHSI->getOpcode() != RHSI->getOpcode())
200  return false;
201  if (LHSI->isIdenticalToWhenDefined(RHSI))
202  return true;
203 
204  // If we're not strictly identical, we still might be a commutable instruction
205  if (BinaryOperator *LHSBinOp = dyn_cast<BinaryOperator>(LHSI)) {
206  if (!LHSBinOp->isCommutative())
207  return false;
208 
209  assert(isa<BinaryOperator>(RHSI) &&
210  "same opcode, but different instruction type?");
211  BinaryOperator *RHSBinOp = cast<BinaryOperator>(RHSI);
212 
213  // Commuted equality
214  return LHSBinOp->getOperand(0) == RHSBinOp->getOperand(1) &&
215  LHSBinOp->getOperand(1) == RHSBinOp->getOperand(0);
216  }
217  if (CmpInst *LHSCmp = dyn_cast<CmpInst>(LHSI)) {
218  assert(isa<CmpInst>(RHSI) &&
219  "same opcode, but different instruction type?");
220  CmpInst *RHSCmp = cast<CmpInst>(RHSI);
221  // Commuted equality
222  return LHSCmp->getOperand(0) == RHSCmp->getOperand(1) &&
223  LHSCmp->getOperand(1) == RHSCmp->getOperand(0) &&
224  LHSCmp->getSwappedPredicate() == RHSCmp->getPredicate();
225  }
226 
227  // Min/max/abs can occur with commuted operands, non-canonical predicates,
228  // and/or non-canonical operands.
229  Value *LHSA, *LHSB;
230  SelectPatternFlavor LSPF = matchSelectPattern(LHSI, LHSA, LHSB).Flavor;
231  // TODO: We should also detect FP min/max.
232  if (LSPF == SPF_SMIN || LSPF == SPF_SMAX ||
233  LSPF == SPF_UMIN || LSPF == SPF_UMAX ||
234  LSPF == SPF_ABS || LSPF == SPF_NABS) {
235  Value *RHSA, *RHSB;
236  SelectPatternFlavor RSPF = matchSelectPattern(RHSI, RHSA, RHSB).Flavor;
237  if (LSPF == RSPF) {
238  // Abs results are placed in a defined order by matchSelectPattern.
239  if (LSPF == SPF_ABS || LSPF == SPF_NABS)
240  return LHSA == RHSA && LHSB == RHSB;
241  return ((LHSA == RHSA && LHSB == RHSB) ||
242  (LHSA == RHSB && LHSB == RHSA));
243  }
244  }
245 
246  return false;
247 }
248 
249 //===----------------------------------------------------------------------===//
250 // CallValue
251 //===----------------------------------------------------------------------===//
252 
253 namespace {
254 
255 /// Struct representing the available call values in the scoped hash
256 /// table.
257 struct CallValue {
258  Instruction *Inst;
259 
260  CallValue(Instruction *I) : Inst(I) {
261  assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
262  }
263 
264  bool isSentinel() const {
265  return Inst == DenseMapInfo<Instruction *>::getEmptyKey() ||
267  }
268 
269  static bool canHandle(Instruction *Inst) {
270  // Don't value number anything that returns void.
271  if (Inst->getType()->isVoidTy())
272  return false;
273 
274  CallInst *CI = dyn_cast<CallInst>(Inst);
275  if (!CI || !CI->onlyReadsMemory())
276  return false;
277  return true;
278  }
279 };
280 
281 } // end anonymous namespace
282 
283 namespace llvm {
284 
285 template <> struct DenseMapInfo<CallValue> {
286  static inline CallValue getEmptyKey() {
288  }
289 
290  static inline CallValue getTombstoneKey() {
292  }
293 
294  static unsigned getHashValue(CallValue Val);
295  static bool isEqual(CallValue LHS, CallValue RHS);
296 };
297 
298 } // end namespace llvm
299 
300 unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {
301  Instruction *Inst = Val.Inst;
302  // Hash all of the operands as pointers and mix in the opcode.
303  return hash_combine(
304  Inst->getOpcode(),
306 }
307 
308 bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
309  Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
310  if (LHS.isSentinel() || RHS.isSentinel())
311  return LHSI == RHSI;
312  return LHSI->isIdenticalTo(RHSI);
313 }
314 
315 //===----------------------------------------------------------------------===//
316 // EarlyCSE implementation
317 //===----------------------------------------------------------------------===//
318 
319 namespace {
320 
321 /// A simple and fast domtree-based CSE pass.
322 ///
323 /// This pass does a simple depth-first walk over the dominator tree,
324 /// eliminating trivially redundant instructions and using instsimplify to
325 /// canonicalize things as it goes. It is intended to be fast and catch obvious
326 /// cases so that instcombine and other passes are more effective. It is
327 /// expected that a later pass of GVN will catch the interesting/hard cases.
328 class EarlyCSE {
329 public:
330  const TargetLibraryInfo &TLI;
331  const TargetTransformInfo &TTI;
332  DominatorTree &DT;
333  AssumptionCache &AC;
334  const SimplifyQuery SQ;
335  MemorySSA *MSSA;
336  std::unique_ptr<MemorySSAUpdater> MSSAUpdater;
337 
338  using AllocatorTy =
341  using ScopedHTType =
343  AllocatorTy>;
344 
345  /// A scoped hash table of the current values of all of our simple
346  /// scalar expressions.
347  ///
348  /// As we walk down the domtree, we look to see if instructions are in this:
349  /// if so, we replace them with what we find, otherwise we insert them so
350  /// that dominated values can succeed in their lookup.
351  ScopedHTType AvailableValues;
352 
353  /// A scoped hash table of the current values of previously encountered
354  /// memory locations.
355  ///
356  /// This allows us to get efficient access to dominating loads or stores when
357  /// we have a fully redundant load. In addition to the most recent load, we
358  /// keep track of a generation count of the read, which is compared against
359  /// the current generation count. The current generation count is incremented
360  /// after every possibly writing memory operation, which ensures that we only
361  /// CSE loads with other loads that have no intervening store. Ordering
362  /// events (such as fences or atomic instructions) increment the generation
363  /// count as well; essentially, we model these as writes to all possible
364  /// locations. Note that atomic and/or volatile loads and stores can be
365  /// present the table; it is the responsibility of the consumer to inspect
366  /// the atomicity/volatility if needed.
367  struct LoadValue {
368  Instruction *DefInst = nullptr;
369  unsigned Generation = 0;
370  int MatchingId = -1;
371  bool IsAtomic = false;
372 
373  LoadValue() = default;
374  LoadValue(Instruction *Inst, unsigned Generation, unsigned MatchingId,
375  bool IsAtomic)
376  : DefInst(Inst), Generation(Generation), MatchingId(MatchingId),
377  IsAtomic(IsAtomic) {}
378  };
379 
380  using LoadMapAllocator =
383  using LoadHTType =
385  LoadMapAllocator>;
386 
387  LoadHTType AvailableLoads;
388 
389  // A scoped hash table mapping memory locations (represented as typed
390  // addresses) to generation numbers at which that memory location became
391  // (henceforth indefinitely) invariant.
392  using InvariantMapAllocator =
395  using InvariantHTType =
397  InvariantMapAllocator>;
398  InvariantHTType AvailableInvariants;
399 
400  /// A scoped hash table of the current values of read-only call
401  /// values.
402  ///
403  /// It uses the same generation count as loads.
404  using CallHTType =
406  CallHTType AvailableCalls;
407 
408  /// This is the current generation of the memory value.
409  unsigned CurrentGeneration = 0;
410 
411  /// Set up the EarlyCSE runner for a particular function.
412  EarlyCSE(const DataLayout &DL, const TargetLibraryInfo &TLI,
413  const TargetTransformInfo &TTI, DominatorTree &DT,
414  AssumptionCache &AC, MemorySSA *MSSA)
415  : TLI(TLI), TTI(TTI), DT(DT), AC(AC), SQ(DL, &TLI, &DT, &AC), MSSA(MSSA),
416  MSSAUpdater(llvm::make_unique<MemorySSAUpdater>(MSSA)) {}
417 
418  bool run();
419 
420 private:
421  // Almost a POD, but needs to call the constructors for the scoped hash
422  // tables so that a new scope gets pushed on. These are RAII so that the
423  // scope gets popped when the NodeScope is destroyed.
424  class NodeScope {
425  public:
426  NodeScope(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
427  InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls)
428  : Scope(AvailableValues), LoadScope(AvailableLoads),
429  InvariantScope(AvailableInvariants), CallScope(AvailableCalls) {}
430  NodeScope(const NodeScope &) = delete;
431  NodeScope &operator=(const NodeScope &) = delete;
432 
433  private:
434  ScopedHTType::ScopeTy Scope;
435  LoadHTType::ScopeTy LoadScope;
436  InvariantHTType::ScopeTy InvariantScope;
437  CallHTType::ScopeTy CallScope;
438  };
439 
440  // Contains all the needed information to create a stack for doing a depth
441  // first traversal of the tree. This includes scopes for values, loads, and
442  // calls as well as the generation. There is a child iterator so that the
443  // children do not need to be store separately.
444  class StackNode {
445  public:
446  StackNode(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
447  InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls,
448  unsigned cg, DomTreeNode *n, DomTreeNode::iterator child,
450  : CurrentGeneration(cg), ChildGeneration(cg), Node(n), ChildIter(child),
451  EndIter(end),
452  Scopes(AvailableValues, AvailableLoads, AvailableInvariants,
453  AvailableCalls)
454  {}
455  StackNode(const StackNode &) = delete;
456  StackNode &operator=(const StackNode &) = delete;
457 
458  // Accessors.
459  unsigned currentGeneration() { return CurrentGeneration; }
460  unsigned childGeneration() { return ChildGeneration; }
461  void childGeneration(unsigned generation) { ChildGeneration = generation; }
462  DomTreeNode *node() { return Node; }
463  DomTreeNode::iterator childIter() { return ChildIter; }
464 
465  DomTreeNode *nextChild() {
466  DomTreeNode *child = *ChildIter;
467  ++ChildIter;
468  return child;
469  }
470 
471  DomTreeNode::iterator end() { return EndIter; }
472  bool isProcessed() { return Processed; }
473  void process() { Processed = true; }
474 
475  private:
476  unsigned CurrentGeneration;
477  unsigned ChildGeneration;
478  DomTreeNode *Node;
479  DomTreeNode::iterator ChildIter;
480  DomTreeNode::iterator EndIter;
481  NodeScope Scopes;
482  bool Processed = false;
483  };
484 
485  /// Wrapper class to handle memory instructions, including loads,
486  /// stores and intrinsic loads and stores defined by the target.
487  class ParseMemoryInst {
488  public:
489  ParseMemoryInst(Instruction *Inst, const TargetTransformInfo &TTI)
490  : Inst(Inst) {
491  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
492  if (TTI.getTgtMemIntrinsic(II, Info))
493  IsTargetMemInst = true;
494  }
495 
496  bool isLoad() const {
497  if (IsTargetMemInst) return Info.ReadMem;
498  return isa<LoadInst>(Inst);
499  }
500 
501  bool isStore() const {
502  if (IsTargetMemInst) return Info.WriteMem;
503  return isa<StoreInst>(Inst);
504  }
505 
506  bool isAtomic() const {
507  if (IsTargetMemInst)
508  return Info.Ordering != AtomicOrdering::NotAtomic;
509  return Inst->isAtomic();
510  }
511 
512  bool isUnordered() const {
513  if (IsTargetMemInst)
514  return Info.isUnordered();
515 
516  if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
517  return LI->isUnordered();
518  } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
519  return SI->isUnordered();
520  }
521  // Conservative answer
522  return !Inst->isAtomic();
523  }
524 
525  bool isVolatile() const {
526  if (IsTargetMemInst)
527  return Info.IsVolatile;
528 
529  if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
530  return LI->isVolatile();
531  } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
532  return SI->isVolatile();
533  }
534  // Conservative answer
535  return true;
536  }
537 
538  bool isInvariantLoad() const {
539  if (auto *LI = dyn_cast<LoadInst>(Inst))
540  return LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr;
541  return false;
542  }
543 
544  bool isMatchingMemLoc(const ParseMemoryInst &Inst) const {
545  return (getPointerOperand() == Inst.getPointerOperand() &&
546  getMatchingId() == Inst.getMatchingId());
547  }
548 
549  bool isValid() const { return getPointerOperand() != nullptr; }
550 
551  // For regular (non-intrinsic) loads/stores, this is set to -1. For
552  // intrinsic loads/stores, the id is retrieved from the corresponding
553  // field in the MemIntrinsicInfo structure. That field contains
554  // non-negative values only.
555  int getMatchingId() const {
556  if (IsTargetMemInst) return Info.MatchingId;
557  return -1;
558  }
559 
560  Value *getPointerOperand() const {
561  if (IsTargetMemInst) return Info.PtrVal;
562  return getLoadStorePointerOperand(Inst);
563  }
564 
565  bool mayReadFromMemory() const {
566  if (IsTargetMemInst) return Info.ReadMem;
567  return Inst->mayReadFromMemory();
568  }
569 
570  bool mayWriteToMemory() const {
571  if (IsTargetMemInst) return Info.WriteMem;
572  return Inst->mayWriteToMemory();
573  }
574 
575  private:
576  bool IsTargetMemInst = false;
578  Instruction *Inst;
579  };
580 
581  bool processNode(DomTreeNode *Node);
582 
583  bool handleBranchCondition(Instruction *CondInst, const BranchInst *BI,
584  const BasicBlock *BB, const BasicBlock *Pred);
585 
586  Value *getOrCreateResult(Value *Inst, Type *ExpectedType) const {
587  if (auto *LI = dyn_cast<LoadInst>(Inst))
588  return LI;
589  if (auto *SI = dyn_cast<StoreInst>(Inst))
590  return SI->getValueOperand();
591  assert(isa<IntrinsicInst>(Inst) && "Instruction not supported");
592  return TTI.getOrCreateResultFromMemIntrinsic(cast<IntrinsicInst>(Inst),
593  ExpectedType);
594  }
595 
596  /// Return true if the instruction is known to only operate on memory
597  /// provably invariant in the given "generation".
598  bool isOperatingOnInvariantMemAt(Instruction *I, unsigned GenAt);
599 
600  bool isSameMemGeneration(unsigned EarlierGeneration, unsigned LaterGeneration,
601  Instruction *EarlierInst, Instruction *LaterInst);
602 
603  void removeMSSA(Instruction *Inst) {
604  if (!MSSA)
605  return;
606  if (VerifyMemorySSA)
607  MSSA->verifyMemorySSA();
608  // Removing a store here can leave MemorySSA in an unoptimized state by
609  // creating MemoryPhis that have identical arguments and by creating
610  // MemoryUses whose defining access is not an actual clobber. We handle the
611  // phi case eagerly here. The non-optimized MemoryUse case is lazily
612  // updated by MemorySSA getClobberingMemoryAccess.
613  if (MemoryAccess *MA = MSSA->getMemoryAccess(Inst)) {
614  // Optimize MemoryPhi nodes that may become redundant by having all the
615  // same input values once MA is removed.
616  SmallSetVector<MemoryPhi *, 4> PhisToCheck;
618  WorkQueue.push_back(MA);
619  // Process MemoryPhi nodes in FIFO order using a ever-growing vector since
620  // we shouldn't be processing that many phis and this will avoid an
621  // allocation in almost all cases.
622  for (unsigned I = 0; I < WorkQueue.size(); ++I) {
623  MemoryAccess *WI = WorkQueue[I];
624 
625  for (auto *U : WI->users())
626  if (MemoryPhi *MP = dyn_cast<MemoryPhi>(U))
627  PhisToCheck.insert(MP);
628 
629  MSSAUpdater->removeMemoryAccess(WI);
630 
631  for (MemoryPhi *MP : PhisToCheck) {
632  MemoryAccess *FirstIn = MP->getIncomingValue(0);
633  if (llvm::all_of(MP->incoming_values(),
634  [=](Use &In) { return In == FirstIn; }))
635  WorkQueue.push_back(MP);
636  }
637  PhisToCheck.clear();
638  }
639  }
640  }
641 };
642 
643 } // end anonymous namespace
644 
645 /// Determine if the memory referenced by LaterInst is from the same heap
646 /// version as EarlierInst.
647 /// This is currently called in two scenarios:
648 ///
649 /// load p
650 /// ...
651 /// load p
652 ///
653 /// and
654 ///
655 /// x = load p
656 /// ...
657 /// store x, p
658 ///
659 /// in both cases we want to verify that there are no possible writes to the
660 /// memory referenced by p between the earlier and later instruction.
661 bool EarlyCSE::isSameMemGeneration(unsigned EarlierGeneration,
662  unsigned LaterGeneration,
663  Instruction *EarlierInst,
664  Instruction *LaterInst) {
665  // Check the simple memory generation tracking first.
666  if (EarlierGeneration == LaterGeneration)
667  return true;
668 
669  if (!MSSA)
670  return false;
671 
672  // If MemorySSA has determined that one of EarlierInst or LaterInst does not
673  // read/write memory, then we can safely return true here.
674  // FIXME: We could be more aggressive when checking doesNotAccessMemory(),
675  // onlyReadsMemory(), mayReadFromMemory(), and mayWriteToMemory() in this pass
676  // by also checking the MemorySSA MemoryAccess on the instruction. Initial
677  // experiments suggest this isn't worthwhile, at least for C/C++ code compiled
678  // with the default optimization pipeline.
679  auto *EarlierMA = MSSA->getMemoryAccess(EarlierInst);
680  if (!EarlierMA)
681  return true;
682  auto *LaterMA = MSSA->getMemoryAccess(LaterInst);
683  if (!LaterMA)
684  return true;
685 
686  // Since we know LaterDef dominates LaterInst and EarlierInst dominates
687  // LaterInst, if LaterDef dominates EarlierInst then it can't occur between
688  // EarlierInst and LaterInst and neither can any other write that potentially
689  // clobbers LaterInst.
690  MemoryAccess *LaterDef =
691  MSSA->getWalker()->getClobberingMemoryAccess(LaterInst);
692  return MSSA->dominates(LaterDef, EarlierMA);
693 }
694 
695 bool EarlyCSE::isOperatingOnInvariantMemAt(Instruction *I, unsigned GenAt) {
696  // A location loaded from with an invariant_load is assumed to *never* change
697  // within the visible scope of the compilation.
698  if (auto *LI = dyn_cast<LoadInst>(I))
699  if (LI->getMetadata(LLVMContext::MD_invariant_load))
700  return true;
701 
702  auto MemLocOpt = MemoryLocation::getOrNone(I);
703  if (!MemLocOpt)
704  // "target" intrinsic forms of loads aren't currently known to
705  // MemoryLocation::get. TODO
706  return false;
707  MemoryLocation MemLoc = *MemLocOpt;
708  if (!AvailableInvariants.count(MemLoc))
709  return false;
710 
711  // Is the generation at which this became invariant older than the
712  // current one?
713  return AvailableInvariants.lookup(MemLoc) <= GenAt;
714 }
715 
716 bool EarlyCSE::handleBranchCondition(Instruction *CondInst,
717  const BranchInst *BI, const BasicBlock *BB,
718  const BasicBlock *Pred) {
719  assert(BI->isConditional() && "Should be a conditional branch!");
720  assert(BI->getCondition() == CondInst && "Wrong condition?");
721  assert(BI->getSuccessor(0) == BB || BI->getSuccessor(1) == BB);
722  auto *TorF = (BI->getSuccessor(0) == BB)
725  auto MatchBinOp = [](Instruction *I, unsigned Opcode) {
726  if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(I))
727  return BOp->getOpcode() == Opcode;
728  return false;
729  };
730  // If the condition is AND operation, we can propagate its operands into the
731  // true branch. If it is OR operation, we can propagate them into the false
732  // branch.
733  unsigned PropagateOpcode =
734  (BI->getSuccessor(0) == BB) ? Instruction::And : Instruction::Or;
735 
736  bool MadeChanges = false;
739  WorkList.push_back(CondInst);
740  while (!WorkList.empty()) {
741  Instruction *Curr = WorkList.pop_back_val();
742 
743  AvailableValues.insert(Curr, TorF);
744  LLVM_DEBUG(dbgs() << "EarlyCSE CVP: Add conditional value for '"
745  << Curr->getName() << "' as " << *TorF << " in "
746  << BB->getName() << "\n");
747  if (!DebugCounter::shouldExecute(CSECounter)) {
748  LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
749  } else {
750  // Replace all dominated uses with the known value.
751  if (unsigned Count = replaceDominatedUsesWith(Curr, TorF, DT,
752  BasicBlockEdge(Pred, BB))) {
753  NumCSECVP += Count;
754  MadeChanges = true;
755  }
756  }
757 
758  if (MatchBinOp(Curr, PropagateOpcode))
759  for (auto &Op : cast<BinaryOperator>(Curr)->operands())
760  if (Instruction *OPI = dyn_cast<Instruction>(Op))
761  if (SimpleValue::canHandle(OPI) && Visited.insert(OPI).second)
762  WorkList.push_back(OPI);
763  }
764 
765  return MadeChanges;
766 }
767 
768 bool EarlyCSE::processNode(DomTreeNode *Node) {
769  bool Changed = false;
770  BasicBlock *BB = Node->getBlock();
771 
772  // If this block has a single predecessor, then the predecessor is the parent
773  // of the domtree node and all of the live out memory values are still current
774  // in this block. If this block has multiple predecessors, then they could
775  // have invalidated the live-out memory values of our parent value. For now,
776  // just be conservative and invalidate memory if this block has multiple
777  // predecessors.
778  if (!BB->getSinglePredecessor())
779  ++CurrentGeneration;
780 
781  // If this node has a single predecessor which ends in a conditional branch,
782  // we can infer the value of the branch condition given that we took this
783  // path. We need the single predecessor to ensure there's not another path
784  // which reaches this block where the condition might hold a different
785  // value. Since we're adding this to the scoped hash table (like any other
786  // def), it will have been popped if we encounter a future merge block.
787  if (BasicBlock *Pred = BB->getSinglePredecessor()) {
788  auto *BI = dyn_cast<BranchInst>(Pred->getTerminator());
789  if (BI && BI->isConditional()) {
790  auto *CondInst = dyn_cast<Instruction>(BI->getCondition());
791  if (CondInst && SimpleValue::canHandle(CondInst))
792  Changed |= handleBranchCondition(CondInst, BI, BB, Pred);
793  }
794  }
795 
796  /// LastStore - Keep track of the last non-volatile store that we saw... for
797  /// as long as there in no instruction that reads memory. If we see a store
798  /// to the same location, we delete the dead store. This zaps trivial dead
799  /// stores which can occur in bitfield code among other things.
800  Instruction *LastStore = nullptr;
801 
802  // See if any instructions in the block can be eliminated. If so, do it. If
803  // not, add them to AvailableValues.
804  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
805  Instruction *Inst = &*I++;
806 
807  // Dead instructions should just be removed.
808  if (isInstructionTriviallyDead(Inst, &TLI)) {
809  LLVM_DEBUG(dbgs() << "EarlyCSE DCE: " << *Inst << '\n');
810  if (!DebugCounter::shouldExecute(CSECounter)) {
811  LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
812  continue;
813  }
814  if (!salvageDebugInfo(*Inst))
816  removeMSSA(Inst);
817  Inst->eraseFromParent();
818  Changed = true;
819  ++NumSimplify;
820  continue;
821  }
822 
823  // Skip assume intrinsics, they don't really have side effects (although
824  // they're marked as such to ensure preservation of control dependencies),
825  // and this pass will not bother with its removal. However, we should mark
826  // its condition as true for all dominated blocks.
827  if (match(Inst, m_Intrinsic<Intrinsic::assume>())) {
828  auto *CondI =
829  dyn_cast<Instruction>(cast<CallInst>(Inst)->getArgOperand(0));
830  if (CondI && SimpleValue::canHandle(CondI)) {
831  LLVM_DEBUG(dbgs() << "EarlyCSE considering assumption: " << *Inst
832  << '\n');
833  AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext()));
834  } else
835  LLVM_DEBUG(dbgs() << "EarlyCSE skipping assumption: " << *Inst << '\n');
836  continue;
837  }
838 
839  // Skip sideeffect intrinsics, for the same reason as assume intrinsics.
840  if (match(Inst, m_Intrinsic<Intrinsic::sideeffect>())) {
841  LLVM_DEBUG(dbgs() << "EarlyCSE skipping sideeffect: " << *Inst << '\n');
842  continue;
843  }
844 
845  // We can skip all invariant.start intrinsics since they only read memory,
846  // and we can forward values across it. For invariant starts without
847  // invariant ends, we can use the fact that the invariantness never ends to
848  // start a scope in the current generaton which is true for all future
849  // generations. Also, we dont need to consume the last store since the
850  // semantics of invariant.start allow us to perform DSE of the last
851  // store, if there was a store following invariant.start. Consider:
852  //
853  // store 30, i8* p
854  // invariant.start(p)
855  // store 40, i8* p
856  // We can DSE the store to 30, since the store 40 to invariant location p
857  // causes undefined behaviour.
858  if (match(Inst, m_Intrinsic<Intrinsic::invariant_start>())) {
859  // If there are any uses, the scope might end.
860  if (!Inst->use_empty())
861  continue;
862  auto *CI = cast<CallInst>(Inst);
863  MemoryLocation MemLoc = MemoryLocation::getForArgument(CI, 1, TLI);
864  // Don't start a scope if we already have a better one pushed
865  if (!AvailableInvariants.count(MemLoc))
866  AvailableInvariants.insert(MemLoc, CurrentGeneration);
867  continue;
868  }
869 
870  if (isGuard(Inst)) {
871  if (auto *CondI =
872  dyn_cast<Instruction>(cast<CallInst>(Inst)->getArgOperand(0))) {
873  if (SimpleValue::canHandle(CondI)) {
874  // Do we already know the actual value of this condition?
875  if (auto *KnownCond = AvailableValues.lookup(CondI)) {
876  // Is the condition known to be true?
877  if (isa<ConstantInt>(KnownCond) &&
878  cast<ConstantInt>(KnownCond)->isOne()) {
879  LLVM_DEBUG(dbgs()
880  << "EarlyCSE removing guard: " << *Inst << '\n');
881  removeMSSA(Inst);
882  Inst->eraseFromParent();
883  Changed = true;
884  continue;
885  } else
886  // Use the known value if it wasn't true.
887  cast<CallInst>(Inst)->setArgOperand(0, KnownCond);
888  }
889  // The condition we're on guarding here is true for all dominated
890  // locations.
891  AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext()));
892  }
893  }
894 
895  // Guard intrinsics read all memory, but don't write any memory.
896  // Accordingly, don't update the generation but consume the last store (to
897  // avoid an incorrect DSE).
898  LastStore = nullptr;
899  continue;
900  }
901 
902  // If the instruction can be simplified (e.g. X+0 = X) then replace it with
903  // its simpler value.
904  if (Value *V = SimplifyInstruction(Inst, SQ)) {
905  LLVM_DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << " to: " << *V
906  << '\n');
907  if (!DebugCounter::shouldExecute(CSECounter)) {
908  LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
909  } else {
910  bool Killed = false;
911  if (!Inst->use_empty()) {
912  Inst->replaceAllUsesWith(V);
913  Changed = true;
914  }
915  if (isInstructionTriviallyDead(Inst, &TLI)) {
916  removeMSSA(Inst);
917  Inst->eraseFromParent();
918  Changed = true;
919  Killed = true;
920  }
921  if (Changed)
922  ++NumSimplify;
923  if (Killed)
924  continue;
925  }
926  }
927 
928  // If this is a simple instruction that we can value number, process it.
929  if (SimpleValue::canHandle(Inst)) {
930  // See if the instruction has an available value. If so, use it.
931  if (Value *V = AvailableValues.lookup(Inst)) {
932  LLVM_DEBUG(dbgs() << "EarlyCSE CSE: " << *Inst << " to: " << *V
933  << '\n');
934  if (!DebugCounter::shouldExecute(CSECounter)) {
935  LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
936  continue;
937  }
938  if (auto *I = dyn_cast<Instruction>(V))
939  I->andIRFlags(Inst);
940  Inst->replaceAllUsesWith(V);
941  removeMSSA(Inst);
942  Inst->eraseFromParent();
943  Changed = true;
944  ++NumCSE;
945  continue;
946  }
947 
948  // Otherwise, just remember that this value is available.
949  AvailableValues.insert(Inst, Inst);
950  continue;
951  }
952 
953  ParseMemoryInst MemInst(Inst, TTI);
954  // If this is a non-volatile load, process it.
955  if (MemInst.isValid() && MemInst.isLoad()) {
956  // (conservatively) we can't peak past the ordering implied by this
957  // operation, but we can add this load to our set of available values
958  if (MemInst.isVolatile() || !MemInst.isUnordered()) {
959  LastStore = nullptr;
960  ++CurrentGeneration;
961  }
962 
963  if (MemInst.isInvariantLoad()) {
964  // If we pass an invariant load, we know that memory location is
965  // indefinitely constant from the moment of first dereferenceability.
966  // We conservatively treat the invariant_load as that moment. If we
967  // pass a invariant load after already establishing a scope, don't
968  // restart it since we want to preserve the earliest point seen.
969  auto MemLoc = MemoryLocation::get(Inst);
970  if (!AvailableInvariants.count(MemLoc))
971  AvailableInvariants.insert(MemLoc, CurrentGeneration);
972  }
973 
974  // If we have an available version of this load, and if it is the right
975  // generation or the load is known to be from an invariant location,
976  // replace this instruction.
977  //
978  // If either the dominating load or the current load are invariant, then
979  // we can assume the current load loads the same value as the dominating
980  // load.
981  LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
982  if (InVal.DefInst != nullptr &&
983  InVal.MatchingId == MemInst.getMatchingId() &&
984  // We don't yet handle removing loads with ordering of any kind.
985  !MemInst.isVolatile() && MemInst.isUnordered() &&
986  // We can't replace an atomic load with one which isn't also atomic.
987  InVal.IsAtomic >= MemInst.isAtomic() &&
988  (isOperatingOnInvariantMemAt(Inst, InVal.Generation) ||
989  isSameMemGeneration(InVal.Generation, CurrentGeneration,
990  InVal.DefInst, Inst))) {
991  Value *Op = getOrCreateResult(InVal.DefInst, Inst->getType());
992  if (Op != nullptr) {
993  LLVM_DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst
994  << " to: " << *InVal.DefInst << '\n');
995  if (!DebugCounter::shouldExecute(CSECounter)) {
996  LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
997  continue;
998  }
999  if (!Inst->use_empty())
1000  Inst->replaceAllUsesWith(Op);
1001  removeMSSA(Inst);
1002  Inst->eraseFromParent();
1003  Changed = true;
1004  ++NumCSELoad;
1005  continue;
1006  }
1007  }
1008 
1009  // Otherwise, remember that we have this instruction.
1010  AvailableLoads.insert(
1011  MemInst.getPointerOperand(),
1012  LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(),
1013  MemInst.isAtomic()));
1014  LastStore = nullptr;
1015  continue;
1016  }
1017 
1018  // If this instruction may read from memory or throw (and potentially read
1019  // from memory in the exception handler), forget LastStore. Load/store
1020  // intrinsics will indicate both a read and a write to memory. The target
1021  // may override this (e.g. so that a store intrinsic does not read from
1022  // memory, and thus will be treated the same as a regular store for
1023  // commoning purposes).
1024  if ((Inst->mayReadFromMemory() || Inst->mayThrow()) &&
1025  !(MemInst.isValid() && !MemInst.mayReadFromMemory()))
1026  LastStore = nullptr;
1027 
1028  // If this is a read-only call, process it.
1029  if (CallValue::canHandle(Inst)) {
1030  // If we have an available version of this call, and if it is the right
1031  // generation, replace this instruction.
1032  std::pair<Instruction *, unsigned> InVal = AvailableCalls.lookup(Inst);
1033  if (InVal.first != nullptr &&
1034  isSameMemGeneration(InVal.second, CurrentGeneration, InVal.first,
1035  Inst)) {
1036  LLVM_DEBUG(dbgs() << "EarlyCSE CSE CALL: " << *Inst
1037  << " to: " << *InVal.first << '\n');
1038  if (!DebugCounter::shouldExecute(CSECounter)) {
1039  LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1040  continue;
1041  }
1042  if (!Inst->use_empty())
1043  Inst->replaceAllUsesWith(InVal.first);
1044  removeMSSA(Inst);
1045  Inst->eraseFromParent();
1046  Changed = true;
1047  ++NumCSECall;
1048  continue;
1049  }
1050 
1051  // Otherwise, remember that we have this instruction.
1052  AvailableCalls.insert(
1053  Inst, std::pair<Instruction *, unsigned>(Inst, CurrentGeneration));
1054  continue;
1055  }
1056 
1057  // A release fence requires that all stores complete before it, but does
1058  // not prevent the reordering of following loads 'before' the fence. As a
1059  // result, we don't need to consider it as writing to memory and don't need
1060  // to advance the generation. We do need to prevent DSE across the fence,
1061  // but that's handled above.
1062  if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
1063  if (FI->getOrdering() == AtomicOrdering::Release) {
1064  assert(Inst->mayReadFromMemory() && "relied on to prevent DSE above");
1065  continue;
1066  }
1067 
1068  // write back DSE - If we write back the same value we just loaded from
1069  // the same location and haven't passed any intervening writes or ordering
1070  // operations, we can remove the write. The primary benefit is in allowing
1071  // the available load table to remain valid and value forward past where
1072  // the store originally was.
1073  if (MemInst.isValid() && MemInst.isStore()) {
1074  LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
1075  if (InVal.DefInst &&
1076  InVal.DefInst == getOrCreateResult(Inst, InVal.DefInst->getType()) &&
1077  InVal.MatchingId == MemInst.getMatchingId() &&
1078  // We don't yet handle removing stores with ordering of any kind.
1079  !MemInst.isVolatile() && MemInst.isUnordered() &&
1080  (isOperatingOnInvariantMemAt(Inst, InVal.Generation) ||
1081  isSameMemGeneration(InVal.Generation, CurrentGeneration,
1082  InVal.DefInst, Inst))) {
1083  // It is okay to have a LastStore to a different pointer here if MemorySSA
1084  // tells us that the load and store are from the same memory generation.
1085  // In that case, LastStore should keep its present value since we're
1086  // removing the current store.
1087  assert((!LastStore ||
1088  ParseMemoryInst(LastStore, TTI).getPointerOperand() ==
1089  MemInst.getPointerOperand() ||
1090  MSSA) &&
1091  "can't have an intervening store if not using MemorySSA!");
1092  LLVM_DEBUG(dbgs() << "EarlyCSE DSE (writeback): " << *Inst << '\n');
1093  if (!DebugCounter::shouldExecute(CSECounter)) {
1094  LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1095  continue;
1096  }
1097  removeMSSA(Inst);
1098  Inst->eraseFromParent();
1099  Changed = true;
1100  ++NumDSE;
1101  // We can avoid incrementing the generation count since we were able
1102  // to eliminate this store.
1103  continue;
1104  }
1105  }
1106 
1107  // Okay, this isn't something we can CSE at all. Check to see if it is
1108  // something that could modify memory. If so, our available memory values
1109  // cannot be used so bump the generation count.
1110  if (Inst->mayWriteToMemory()) {
1111  ++CurrentGeneration;
1112 
1113  if (MemInst.isValid() && MemInst.isStore()) {
1114  // We do a trivial form of DSE if there are two stores to the same
1115  // location with no intervening loads. Delete the earlier store.
1116  // At the moment, we don't remove ordered stores, but do remove
1117  // unordered atomic stores. There's no special requirement (for
1118  // unordered atomics) about removing atomic stores only in favor of
1119  // other atomic stores since we we're going to execute the non-atomic
1120  // one anyway and the atomic one might never have become visible.
1121  if (LastStore) {
1122  ParseMemoryInst LastStoreMemInst(LastStore, TTI);
1123  assert(LastStoreMemInst.isUnordered() &&
1124  !LastStoreMemInst.isVolatile() &&
1125  "Violated invariant");
1126  if (LastStoreMemInst.isMatchingMemLoc(MemInst)) {
1127  LLVM_DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore
1128  << " due to: " << *Inst << '\n');
1129  if (!DebugCounter::shouldExecute(CSECounter)) {
1130  LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1131  } else {
1132  removeMSSA(LastStore);
1133  LastStore->eraseFromParent();
1134  Changed = true;
1135  ++NumDSE;
1136  LastStore = nullptr;
1137  }
1138  }
1139  // fallthrough - we can exploit information about this store
1140  }
1141 
1142  // Okay, we just invalidated anything we knew about loaded values. Try
1143  // to salvage *something* by remembering that the stored value is a live
1144  // version of the pointer. It is safe to forward from volatile stores
1145  // to non-volatile loads, so we don't have to check for volatility of
1146  // the store.
1147  AvailableLoads.insert(
1148  MemInst.getPointerOperand(),
1149  LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(),
1150  MemInst.isAtomic()));
1151 
1152  // Remember that this was the last unordered store we saw for DSE. We
1153  // don't yet handle DSE on ordered or volatile stores since we don't
1154  // have a good way to model the ordering requirement for following
1155  // passes once the store is removed. We could insert a fence, but
1156  // since fences are slightly stronger than stores in their ordering,
1157  // it's not clear this is a profitable transform. Another option would
1158  // be to merge the ordering with that of the post dominating store.
1159  if (MemInst.isUnordered() && !MemInst.isVolatile())
1160  LastStore = Inst;
1161  else
1162  LastStore = nullptr;
1163  }
1164  }
1165  }
1166 
1167  return Changed;
1168 }
1169 
1170 bool EarlyCSE::run() {
1171  // Note, deque is being used here because there is significant performance
1172  // gains over vector when the container becomes very large due to the
1173  // specific access patterns. For more information see the mailing list
1174  // discussion on this:
1175  // http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20120116/135228.html
1176  std::deque<StackNode *> nodesToProcess;
1177 
1178  bool Changed = false;
1179 
1180  // Process the root node.
1181  nodesToProcess.push_back(new StackNode(
1182  AvailableValues, AvailableLoads, AvailableInvariants, AvailableCalls,
1183  CurrentGeneration, DT.getRootNode(),
1184  DT.getRootNode()->begin(), DT.getRootNode()->end()));
1185 
1186  // Save the current generation.
1187  unsigned LiveOutGeneration = CurrentGeneration;
1188 
1189  // Process the stack.
1190  while (!nodesToProcess.empty()) {
1191  // Grab the first item off the stack. Set the current generation, remove
1192  // the node from the stack, and process it.
1193  StackNode *NodeToProcess = nodesToProcess.back();
1194 
1195  // Initialize class members.
1196  CurrentGeneration = NodeToProcess->currentGeneration();
1197 
1198  // Check if the node needs to be processed.
1199  if (!NodeToProcess->isProcessed()) {
1200  // Process the node.
1201  Changed |= processNode(NodeToProcess->node());
1202  NodeToProcess->childGeneration(CurrentGeneration);
1203  NodeToProcess->process();
1204  } else if (NodeToProcess->childIter() != NodeToProcess->end()) {
1205  // Push the next child onto the stack.
1206  DomTreeNode *child = NodeToProcess->nextChild();
1207  nodesToProcess.push_back(
1208  new StackNode(AvailableValues, AvailableLoads, AvailableInvariants,
1209  AvailableCalls, NodeToProcess->childGeneration(),
1210  child, child->begin(), child->end()));
1211  } else {
1212  // It has been processed, and there are no more children to process,
1213  // so delete it and pop it off the stack.
1214  delete NodeToProcess;
1215  nodesToProcess.pop_back();
1216  }
1217  } // while (!nodes...)
1218 
1219  // Reset the current generation.
1220  CurrentGeneration = LiveOutGeneration;
1221 
1222  return Changed;
1223 }
1224 
1227  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1228  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1229  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1230  auto &AC = AM.getResult<AssumptionAnalysis>(F);
1231  auto *MSSA =
1232  UseMemorySSA ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA() : nullptr;
1233 
1234  EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA);
1235 
1236  if (!CSE.run())
1237  return PreservedAnalyses::all();
1238 
1239  PreservedAnalyses PA;
1240  PA.preserveSet<CFGAnalyses>();
1241  PA.preserve<GlobalsAA>();
1242  if (UseMemorySSA)
1244  return PA;
1245 }
1246 
1247 namespace {
1248 
1249 /// A simple and fast domtree-based CSE pass.
1250 ///
1251 /// This pass does a simple depth-first walk over the dominator tree,
1252 /// eliminating trivially redundant instructions and using instsimplify to
1253 /// canonicalize things as it goes. It is intended to be fast and catch obvious
1254 /// cases so that instcombine and other passes are more effective. It is
1255 /// expected that a later pass of GVN will catch the interesting/hard cases.
1256 template<bool UseMemorySSA>
1257 class EarlyCSELegacyCommonPass : public FunctionPass {
1258 public:
1259  static char ID;
1260 
1261  EarlyCSELegacyCommonPass() : FunctionPass(ID) {
1262  if (UseMemorySSA)
1264  else
1266  }
1267 
1268  bool runOnFunction(Function &F) override {
1269  if (skipFunction(F))
1270  return false;
1271 
1272  auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
1273  auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1274  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1275  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1276  auto *MSSA =
1277  UseMemorySSA ? &getAnalysis<MemorySSAWrapperPass>().getMSSA() : nullptr;
1278 
1279  EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA);
1280 
1281  return CSE.run();
1282  }
1283 
1284  void getAnalysisUsage(AnalysisUsage &AU) const override {
1289  if (UseMemorySSA) {
1292  }
1294  AU.setPreservesCFG();
1295  }
1296 };
1297 
1298 } // end anonymous namespace
1299 
1300 using EarlyCSELegacyPass = EarlyCSELegacyCommonPass</*UseMemorySSA=*/false>;
1301 
1302 template<>
1303 char EarlyCSELegacyPass::ID = 0;
1304 
1305 INITIALIZE_PASS_BEGIN(EarlyCSELegacyPass, "early-cse", "Early CSE", false,
1306  false)
1311 INITIALIZE_PASS_END(EarlyCSELegacyPass, "early-cse", "Early CSE", false, false)
1312 
1313 using EarlyCSEMemSSALegacyPass =
1314  EarlyCSELegacyCommonPass</*UseMemorySSA=*/true>;
1315 
1316 template<>
1317 char EarlyCSEMemSSALegacyPass::ID = 0;
1318 
1319 FunctionPass *llvm::createEarlyCSEPass(bool UseMemorySSA) {
1320  if (UseMemorySSA)
1321  return new EarlyCSEMemSSALegacyPass();
1322  else
1323  return new EarlyCSELegacyPass();
1324 }
1325 
1326 INITIALIZE_PASS_BEGIN(EarlyCSEMemSSALegacyPass, "early-cse-memssa",
1327  "Early CSE w/ MemorySSA", false, false)
1333 INITIALIZE_PASS_END(EarlyCSEMemSSALegacyPass, "early-cse-memssa",
1334  "Early CSE w/ MemorySSA", false, false)
Legacy wrapper pass to provide the GlobalsAAResult object.
void initializeEarlyCSELegacyPassPass(PassRegistry &)
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:67
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:258
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:584
static SimpleValue getTombstoneKey()
Definition: EarlyCSE.cpp:118
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:635
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
This instruction extracts a struct member or array element value from an aggregate value...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Value * getPointerOperand(Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
Unsigned minimum.
Atomic ordering constants.
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:82
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:769
This class represents lattice values for constants.
Definition: AllocatorList.h:23
bool isAtomic() const
Return true if this instruction has an AtomicOrdering of unordered or higher.
This is the interface for a simple mod/ref and alias analysis over globals.
An instruction for ordering other memory operations.
Definition: Instructions.h:454
value_op_iterator value_op_begin()
Definition: User.h:255
This class represents a function call, abstracting a target machine&#39;s calling convention.
An immutable pass that tracks lazily created AssumptionCache objects.
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
A cache of @llvm.assume calls within a function.
Analysis pass providing the TargetTransformInfo.
bool salvageDebugInfo(Instruction &I)
Assuming the instruction I is going to be deleted, attempt to salvage debug users of I by writing the...
Definition: Local.cpp:1590
static CallValue getTombstoneKey()
Definition: EarlyCSE.cpp:290
bool replaceDbgUsesWithUndef(Instruction *I)
Replace all the uses of an SSA value in .dbg intrinsics with undef.
Definition: Local.cpp:478
value_op_iterator value_op_end()
Definition: User.h:258
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1185
BasicBlock * getSuccessor(unsigned i) const
STATISTIC(NumFunctions, "Total number of functions")
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:230
F(f)
block Block Frequency true
An instruction for reading from memory.
Definition: Instructions.h:167
Value * getCondition() const
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:137
This defines the Use class.
static Optional< MemoryLocation > getOrNone(const Instruction *Inst)
unsigned replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT, const BasicBlockEdge &Edge)
Replace each use of &#39;From&#39; with &#39;To&#39; if that use is dominated by the given edge.
Definition: Local.cpp:2433
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:32
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
Definition: EarlyCSE.cpp:1225
This file defines the MallocAllocator and BumpPtrAllocator interfaces.
Signed maximum.
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:268
bool isIdenticalTo(const Instruction *I) const
Return true if the specified instruction is exactly identical to the current one. ...
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:47
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:955
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:370
Absolute value.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:352
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
separate const offset from Split GEPs to a variadic base and a constant offset for better CSE
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:700
static bool isLoad(int Opcode)
static CallValue getEmptyKey()
Definition: EarlyCSE.cpp:286
RecyclingAllocator - This class wraps an Allocator, adding the functionality of recycling deleted obj...
static MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
This file provides an implementation of debug counters.
static void cse(BasicBlock *BB)
Perform cse of induction variable instructions.
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref&#39;ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:712
static bool isEqual(const Function &Caller, const Function &Callee)
This file provides the interface for a simple, fast CSE pass.
early cse memssa
Definition: EarlyCSE.cpp:1333
void andIRFlags(const Value *V)
Logical &#39;and&#39; of any supported wrapping, exact, and fast-math flags of V and this instruction...
static bool isStore(int Opcode)
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
Value * getLoadStorePointerOperand(Value *V)
A helper function that returns the pointer operand of a load or store instruction.
An instruction for storing to memory.
Definition: Instructions.h:320
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:428
Optimize for code generation
INITIALIZE_PASS_BEGIN(EarlyCSELegacyPass, "early-cse", "Early CSE", false, false) using EarlyCSEMemSSALegacyPass
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
Value * getOperand(unsigned i) const
Definition: User.h:169
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
bool isVoidTy() const
Return true if this is &#39;void&#39;.
Definition: Type.h:140
BumpPtrAllocatorImpl BumpPtrAllocator
The standard BumpPtrAllocator which just uses the default template parameters.
Definition: Allocator.h:434
NodeT * getBlock() const
static bool runOnFunction(Function &F, bool PostInlining)
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:233
bool isIdenticalToWhenDefined(const Instruction *I) const
This is like isIdenticalTo, except that it ignores the SubclassOptionalData flags, which may specify conditions under which the instruction&#39;s result is undefined.
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
Conditional or Unconditional Branch instruction.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static SimpleValue getEmptyKey()
Definition: EarlyCSE.cpp:114
This file contains the declarations for the subclasses of Constant, which represent the different fla...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
bool mayThrow() const
Return true if this instruction may throw an exception.
Represent the analysis usage information of a pass.
Analysis pass providing a never-invalidated alias analysis result.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:645
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:73
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
size_t size() const
Definition: SmallVector.h:52
static bool isAtomic(Instruction *I)
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
Floating point maxnum.
Representation for a specific memory location.
A function analysis which provides an AssumptionCache.
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:297
Iterator for intrusive lists based on ilist_node.
SelectPatternFlavor Flavor
void verifyMemorySSA() const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition: MemorySSA.cpp:1775
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
iterator end()
Definition: BasicBlock.h:270
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:845
SelectPatternFlavor
Specific patterns of select instructions we can match.
Provides information about what library functions are available for the current target.
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:919
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:379
bool isConditional() const
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:285
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:577
bool isGuard(const User *U)
Returns true iff U has semantics of a guard.
Definition: GuardUtils.cpp:17
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition: Hashing.h:600
iterator_range< user_iterator > users()
Definition: Value.h:399
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition: Hashing.h:478
Represents analyses that only rely on functions&#39; control flow.
Definition: PassManager.h:114
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:720
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:189
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:213
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) const
bool onlyReadsMemory(unsigned OpNo) const
Definition: InstrTypes.h:1434
#define I(x, y, z)
Definition: MD5.cpp:58
bool mayReadFromMemory() const
Return true if this instruction may read memory.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:322
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:174
DEBUG_COUNTER(CSECounter, "early-cse", "Controls which instructions are removed")
Signed minimum.
EarlyCSELegacyCommonPass< false > EarlyCSELegacyPass
Definition: EarlyCSE.cpp:1300
Analysis pass providing the TargetLibraryInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSentinel(const DWARFDebugNames::AttributeEncoding &AE)
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:565
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction has no side ef...
Definition: Local.cpp:348
LLVM Value Representation.
Definition: Value.h:72
SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp=nullptr, unsigned Depth=0)
Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind and providing the out param...
typename std::vector< DomTreeNodeBase *>::iterator iterator
void initializeEarlyCSEMemSSALegacyPassPass(PassRegistry &)
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
FunctionPass * createEarlyCSEPass(bool UseMemorySSA=false)
Definition: EarlyCSE.cpp:1319
A container for analyses that lazily runs them and caches their results.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:259
This pass exposes codegen information to IR-level passes.
static bool isVolatile(Instruction *Inst)
Represents phi nodes for memory accesses.
Definition: MemorySSA.h:478
This header defines various interfaces for pass management in LLVM.
#define LLVM_DEBUG(X)
Definition: Debug.h:122
Value * SimplifyInstruction(Instruction *I, const SimplifyQuery &Q, OptimizationRemarkEmitter *ORE=nullptr)
See if we can compute a simplified version of this instruction.
Information about a load/store intrinsic defined by the target.
bool use_empty() const
Definition: Value.h:322
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:43
This instruction inserts a struct field of array element value into an aggregate value.