LLVM  3.7.0
EarlyCSE.cpp
Go to the documentation of this file.
1 //===- EarlyCSE.cpp - Simple and fast CSE pass ----------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass performs a simple dominator tree walk that eliminates trivially
11 // redundant instructions.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/Hashing.h"
18 #include "llvm/ADT/Statistic.h"
23 #include "llvm/IR/DataLayout.h"
24 #include "llvm/IR/Dominators.h"
25 #include "llvm/IR/Instructions.h"
26 #include "llvm/IR/IntrinsicInst.h"
27 #include "llvm/IR/PatternMatch.h"
28 #include "llvm/Pass.h"
29 #include "llvm/Support/Debug.h"
32 #include "llvm/Transforms/Scalar.h"
34 #include <deque>
35 using namespace llvm;
36 using namespace llvm::PatternMatch;
37 
38 #define DEBUG_TYPE "early-cse"
39 
40 STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd");
41 STATISTIC(NumCSE, "Number of instructions CSE'd");
42 STATISTIC(NumCSELoad, "Number of load instructions CSE'd");
43 STATISTIC(NumCSECall, "Number of call instructions CSE'd");
44 STATISTIC(NumDSE, "Number of trivial dead stores removed");
45 
46 //===----------------------------------------------------------------------===//
47 // SimpleValue
48 //===----------------------------------------------------------------------===//
49 
50 namespace {
51 /// \brief Struct representing the available values in the scoped hash table.
52 struct SimpleValue {
53  Instruction *Inst;
54 
55  SimpleValue(Instruction *I) : Inst(I) {
56  assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
57  }
58 
59  bool isSentinel() const {
62  }
63 
64  static bool canHandle(Instruction *Inst) {
65  // This can only handle non-void readnone functions.
66  if (CallInst *CI = dyn_cast<CallInst>(Inst))
67  return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy();
68  return isa<CastInst>(Inst) || isa<BinaryOperator>(Inst) ||
69  isa<GetElementPtrInst>(Inst) || isa<CmpInst>(Inst) ||
70  isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) ||
71  isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) ||
72  isa<ExtractValueInst>(Inst) || isa<InsertValueInst>(Inst);
73  }
74 };
75 }
76 
77 namespace llvm {
78 template <> struct DenseMapInfo<SimpleValue> {
79  static inline SimpleValue getEmptyKey() {
81  }
82  static inline SimpleValue getTombstoneKey() {
84  }
85  static unsigned getHashValue(SimpleValue Val);
86  static bool isEqual(SimpleValue LHS, SimpleValue RHS);
87 };
88 }
89 
90 unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
91  Instruction *Inst = Val.Inst;
92  // Hash in all of the operands as pointers.
93  if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst)) {
94  Value *LHS = BinOp->getOperand(0);
95  Value *RHS = BinOp->getOperand(1);
96  if (BinOp->isCommutative() && BinOp->getOperand(0) > BinOp->getOperand(1))
97  std::swap(LHS, RHS);
98 
99  if (isa<OverflowingBinaryOperator>(BinOp)) {
100  // Hash the overflow behavior
101  unsigned Overflow =
102  BinOp->hasNoSignedWrap() * OverflowingBinaryOperator::NoSignedWrap |
103  BinOp->hasNoUnsignedWrap() *
105  return hash_combine(BinOp->getOpcode(), Overflow, LHS, RHS);
106  }
107 
108  return hash_combine(BinOp->getOpcode(), LHS, RHS);
109  }
110 
111  if (CmpInst *CI = dyn_cast<CmpInst>(Inst)) {
112  Value *LHS = CI->getOperand(0);
113  Value *RHS = CI->getOperand(1);
114  CmpInst::Predicate Pred = CI->getPredicate();
115  if (Inst->getOperand(0) > Inst->getOperand(1)) {
116  std::swap(LHS, RHS);
117  Pred = CI->getSwappedPredicate();
118  }
119  return hash_combine(Inst->getOpcode(), Pred, LHS, RHS);
120  }
121 
122  if (CastInst *CI = dyn_cast<CastInst>(Inst))
123  return hash_combine(CI->getOpcode(), CI->getType(), CI->getOperand(0));
124 
125  if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Inst))
126  return hash_combine(EVI->getOpcode(), EVI->getOperand(0),
127  hash_combine_range(EVI->idx_begin(), EVI->idx_end()));
128 
129  if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(Inst))
130  return hash_combine(IVI->getOpcode(), IVI->getOperand(0),
131  IVI->getOperand(1),
132  hash_combine_range(IVI->idx_begin(), IVI->idx_end()));
133 
134  assert((isa<CallInst>(Inst) || isa<BinaryOperator>(Inst) ||
135  isa<GetElementPtrInst>(Inst) || isa<SelectInst>(Inst) ||
136  isa<ExtractElementInst>(Inst) || isa<InsertElementInst>(Inst) ||
137  isa<ShuffleVectorInst>(Inst)) &&
138  "Invalid/unknown instruction");
139 
140  // Mix in the opcode.
141  return hash_combine(
142  Inst->getOpcode(),
144 }
145 
146 bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
147  Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
148 
149  if (LHS.isSentinel() || RHS.isSentinel())
150  return LHSI == RHSI;
151 
152  if (LHSI->getOpcode() != RHSI->getOpcode())
153  return false;
154  if (LHSI->isIdenticalTo(RHSI))
155  return true;
156 
157  // If we're not strictly identical, we still might be a commutable instruction
158  if (BinaryOperator *LHSBinOp = dyn_cast<BinaryOperator>(LHSI)) {
159  if (!LHSBinOp->isCommutative())
160  return false;
161 
162  assert(isa<BinaryOperator>(RHSI) &&
163  "same opcode, but different instruction type?");
164  BinaryOperator *RHSBinOp = cast<BinaryOperator>(RHSI);
165 
166  // Check overflow attributes
167  if (isa<OverflowingBinaryOperator>(LHSBinOp)) {
168  assert(isa<OverflowingBinaryOperator>(RHSBinOp) &&
169  "same opcode, but different operator type?");
170  if (LHSBinOp->hasNoUnsignedWrap() != RHSBinOp->hasNoUnsignedWrap() ||
171  LHSBinOp->hasNoSignedWrap() != RHSBinOp->hasNoSignedWrap())
172  return false;
173  }
174 
175  // Commuted equality
176  return LHSBinOp->getOperand(0) == RHSBinOp->getOperand(1) &&
177  LHSBinOp->getOperand(1) == RHSBinOp->getOperand(0);
178  }
179  if (CmpInst *LHSCmp = dyn_cast<CmpInst>(LHSI)) {
180  assert(isa<CmpInst>(RHSI) &&
181  "same opcode, but different instruction type?");
182  CmpInst *RHSCmp = cast<CmpInst>(RHSI);
183  // Commuted equality
184  return LHSCmp->getOperand(0) == RHSCmp->getOperand(1) &&
185  LHSCmp->getOperand(1) == RHSCmp->getOperand(0) &&
186  LHSCmp->getSwappedPredicate() == RHSCmp->getPredicate();
187  }
188 
189  return false;
190 }
191 
192 //===----------------------------------------------------------------------===//
193 // CallValue
194 //===----------------------------------------------------------------------===//
195 
196 namespace {
197 /// \brief Struct representing the available call values in the scoped hash
198 /// table.
199 struct CallValue {
200  Instruction *Inst;
201 
202  CallValue(Instruction *I) : Inst(I) {
203  assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
204  }
205 
206  bool isSentinel() const {
207  return Inst == DenseMapInfo<Instruction *>::getEmptyKey() ||
209  }
210 
211  static bool canHandle(Instruction *Inst) {
212  // Don't value number anything that returns void.
213  if (Inst->getType()->isVoidTy())
214  return false;
215 
216  CallInst *CI = dyn_cast<CallInst>(Inst);
217  if (!CI || !CI->onlyReadsMemory())
218  return false;
219  return true;
220  }
221 };
222 }
223 
224 namespace llvm {
225 template <> struct DenseMapInfo<CallValue> {
226  static inline CallValue getEmptyKey() {
228  }
229  static inline CallValue getTombstoneKey() {
231  }
232  static unsigned getHashValue(CallValue Val);
233  static bool isEqual(CallValue LHS, CallValue RHS);
234 };
235 }
236 
237 unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {
238  Instruction *Inst = Val.Inst;
239  // Hash all of the operands as pointers and mix in the opcode.
240  return hash_combine(
241  Inst->getOpcode(),
243 }
244 
245 bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
246  Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
247  if (LHS.isSentinel() || RHS.isSentinel())
248  return LHSI == RHSI;
249  return LHSI->isIdenticalTo(RHSI);
250 }
251 
252 //===----------------------------------------------------------------------===//
253 // EarlyCSE implementation
254 //===----------------------------------------------------------------------===//
255 
256 namespace {
257 /// \brief A simple and fast domtree-based CSE pass.
258 ///
259 /// This pass does a simple depth-first walk over the dominator tree,
260 /// eliminating trivially redundant instructions and using instsimplify to
261 /// canonicalize things as it goes. It is intended to be fast and catch obvious
262 /// cases so that instcombine and other passes are more effective. It is
263 /// expected that a later pass of GVN will catch the interesting/hard cases.
264 class EarlyCSE {
265 public:
266  Function &F;
267  const TargetLibraryInfo &TLI;
268  const TargetTransformInfo &TTI;
269  DominatorTree &DT;
270  AssumptionCache &AC;
271  typedef RecyclingAllocator<
274  AllocatorTy> ScopedHTType;
275 
276  /// \brief A scoped hash table of the current values of all of our simple
277  /// scalar expressions.
278  ///
279  /// As we walk down the domtree, we look to see if instructions are in this:
280  /// if so, we replace them with what we find, otherwise we insert them so
281  /// that dominated values can succeed in their lookup.
282  ScopedHTType AvailableValues;
283 
284  /// \brief A scoped hash table of the current values of loads.
285  ///
286  /// This allows us to get efficient access to dominating loads when we have
287  /// a fully redundant load. In addition to the most recent load, we keep
288  /// track of a generation count of the read, which is compared against the
289  /// current generation count. The current generation count is incremented
290  /// after every possibly writing memory operation, which ensures that we only
291  /// CSE loads with other loads that have no intervening store.
292  typedef RecyclingAllocator<
295  LoadMapAllocator;
297  DenseMapInfo<Value *>, LoadMapAllocator> LoadHTType;
298  LoadHTType AvailableLoads;
299 
300  /// \brief A scoped hash table of the current values of read-only call
301  /// values.
302  ///
303  /// It uses the same generation count as loads.
305  CallHTType AvailableCalls;
306 
307  /// \brief This is the current generation of the memory value.
308  unsigned CurrentGeneration;
309 
310  /// \brief Set up the EarlyCSE runner for a particular function.
311  EarlyCSE(Function &F, const TargetLibraryInfo &TLI,
312  const TargetTransformInfo &TTI, DominatorTree &DT,
313  AssumptionCache &AC)
314  : F(F), TLI(TLI), TTI(TTI), DT(DT), AC(AC), CurrentGeneration(0) {}
315 
316  bool run();
317 
318 private:
319  // Almost a POD, but needs to call the constructors for the scoped hash
320  // tables so that a new scope gets pushed on. These are RAII so that the
321  // scope gets popped when the NodeScope is destroyed.
322  class NodeScope {
323  public:
324  NodeScope(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
325  CallHTType &AvailableCalls)
326  : Scope(AvailableValues), LoadScope(AvailableLoads),
327  CallScope(AvailableCalls) {}
328 
329  private:
330  NodeScope(const NodeScope &) = delete;
331  void operator=(const NodeScope &) = delete;
332 
333  ScopedHTType::ScopeTy Scope;
334  LoadHTType::ScopeTy LoadScope;
335  CallHTType::ScopeTy CallScope;
336  };
337 
338  // Contains all the needed information to create a stack for doing a depth
339  // first tranversal of the tree. This includes scopes for values, loads, and
340  // calls as well as the generation. There is a child iterator so that the
341  // children do not need to be store spearately.
342  class StackNode {
343  public:
344  StackNode(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
345  CallHTType &AvailableCalls, unsigned cg, DomTreeNode *n,
347  : CurrentGeneration(cg), ChildGeneration(cg), Node(n), ChildIter(child),
348  EndIter(end), Scopes(AvailableValues, AvailableLoads, AvailableCalls),
349  Processed(false) {}
350 
351  // Accessors.
352  unsigned currentGeneration() { return CurrentGeneration; }
353  unsigned childGeneration() { return ChildGeneration; }
354  void childGeneration(unsigned generation) { ChildGeneration = generation; }
355  DomTreeNode *node() { return Node; }
356  DomTreeNode::iterator childIter() { return ChildIter; }
357  DomTreeNode *nextChild() {
358  DomTreeNode *child = *ChildIter;
359  ++ChildIter;
360  return child;
361  }
362  DomTreeNode::iterator end() { return EndIter; }
363  bool isProcessed() { return Processed; }
364  void process() { Processed = true; }
365 
366  private:
367  StackNode(const StackNode &) = delete;
368  void operator=(const StackNode &) = delete;
369 
370  // Members.
371  unsigned CurrentGeneration;
372  unsigned ChildGeneration;
373  DomTreeNode *Node;
374  DomTreeNode::iterator ChildIter;
375  DomTreeNode::iterator EndIter;
376  NodeScope Scopes;
377  bool Processed;
378  };
379 
380  /// \brief Wrapper class to handle memory instructions, including loads,
381  /// stores and intrinsic loads and stores defined by the target.
382  class ParseMemoryInst {
383  public:
384  ParseMemoryInst(Instruction *Inst, const TargetTransformInfo &TTI)
385  : Load(false), Store(false), Vol(false), MayReadFromMemory(false),
386  MayWriteToMemory(false), MatchingId(-1), Ptr(nullptr) {
387  MayReadFromMemory = Inst->mayReadFromMemory();
388  MayWriteToMemory = Inst->mayWriteToMemory();
389  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
390  MemIntrinsicInfo Info;
391  if (!TTI.getTgtMemIntrinsic(II, Info))
392  return;
393  if (Info.NumMemRefs == 1) {
394  Store = Info.WriteMem;
395  Load = Info.ReadMem;
396  MatchingId = Info.MatchingId;
397  MayReadFromMemory = Info.ReadMem;
398  MayWriteToMemory = Info.WriteMem;
399  Vol = Info.Vol;
400  Ptr = Info.PtrVal;
401  }
402  } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
403  Load = true;
404  Vol = !LI->isSimple();
405  Ptr = LI->getPointerOperand();
406  } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
407  Store = true;
408  Vol = !SI->isSimple();
409  Ptr = SI->getPointerOperand();
410  }
411  }
412  bool isLoad() { return Load; }
413  bool isStore() { return Store; }
414  bool isVolatile() { return Vol; }
415  bool isMatchingMemLoc(const ParseMemoryInst &Inst) {
416  return Ptr == Inst.Ptr && MatchingId == Inst.MatchingId;
417  }
418  bool isValid() { return Ptr != nullptr; }
419  int getMatchingId() { return MatchingId; }
420  Value *getPtr() { return Ptr; }
421  bool mayReadFromMemory() { return MayReadFromMemory; }
422  bool mayWriteToMemory() { return MayWriteToMemory; }
423 
424  private:
425  bool Load;
426  bool Store;
427  bool Vol;
428  bool MayReadFromMemory;
429  bool MayWriteToMemory;
430  // For regular (non-intrinsic) loads/stores, this is set to -1. For
431  // intrinsic loads/stores, the id is retrieved from the corresponding
432  // field in the MemIntrinsicInfo structure. That field contains
433  // non-negative values only.
434  int MatchingId;
435  Value *Ptr;
436  };
437 
438  bool processNode(DomTreeNode *Node);
439 
440  Value *getOrCreateResult(Value *Inst, Type *ExpectedType) const {
441  if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
442  return LI;
443  else if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
444  return SI->getValueOperand();
445  assert(isa<IntrinsicInst>(Inst) && "Instruction not supported");
446  return TTI.getOrCreateResultFromMemIntrinsic(cast<IntrinsicInst>(Inst),
447  ExpectedType);
448  }
449 };
450 }
451 
452 bool EarlyCSE::processNode(DomTreeNode *Node) {
453  BasicBlock *BB = Node->getBlock();
454 
455  // If this block has a single predecessor, then the predecessor is the parent
456  // of the domtree node and all of the live out memory values are still current
457  // in this block. If this block has multiple predecessors, then they could
458  // have invalidated the live-out memory values of our parent value. For now,
459  // just be conservative and invalidate memory if this block has multiple
460  // predecessors.
461  if (!BB->getSinglePredecessor())
462  ++CurrentGeneration;
463 
464  // If this node has a single predecessor which ends in a conditional branch,
465  // we can infer the value of the branch condition given that we took this
466  // path. We need the single predeccesor to ensure there's not another path
467  // which reaches this block where the condition might hold a different
468  // value. Since we're adding this to the scoped hash table (like any other
469  // def), it will have been popped if we encounter a future merge block.
470  if (BasicBlock *Pred = BB->getSinglePredecessor())
471  if (auto *BI = dyn_cast<BranchInst>(Pred->getTerminator()))
472  if (BI->isConditional())
473  if (auto *CondInst = dyn_cast<Instruction>(BI->getCondition()))
474  if (SimpleValue::canHandle(CondInst)) {
475  assert(BI->getSuccessor(0) == BB || BI->getSuccessor(1) == BB);
476  auto *ConditionalConstant = (BI->getSuccessor(0) == BB) ?
479  AvailableValues.insert(CondInst, ConditionalConstant);
480  DEBUG(dbgs() << "EarlyCSE CVP: Add conditional value for '"
481  << CondInst->getName() << "' as " << *ConditionalConstant
482  << " in " << BB->getName() << "\n");
483  // Replace all dominated uses with the known value
484  replaceDominatedUsesWith(CondInst, ConditionalConstant, DT,
485  BasicBlockEdge(Pred, BB));
486  }
487 
488  /// LastStore - Keep track of the last non-volatile store that we saw... for
489  /// as long as there in no instruction that reads memory. If we see a store
490  /// to the same location, we delete the dead store. This zaps trivial dead
491  /// stores which can occur in bitfield code among other things.
492  Instruction *LastStore = nullptr;
493 
494  bool Changed = false;
495  const DataLayout &DL = BB->getModule()->getDataLayout();
496 
497  // See if any instructions in the block can be eliminated. If so, do it. If
498  // not, add them to AvailableValues.
499  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
500  Instruction *Inst = I++;
501 
502  // Dead instructions should just be removed.
503  if (isInstructionTriviallyDead(Inst, &TLI)) {
504  DEBUG(dbgs() << "EarlyCSE DCE: " << *Inst << '\n');
505  Inst->eraseFromParent();
506  Changed = true;
507  ++NumSimplify;
508  continue;
509  }
510 
511  // Skip assume intrinsics, they don't really have side effects (although
512  // they're marked as such to ensure preservation of control dependencies),
513  // and this pass will not disturb any of the assumption's control
514  // dependencies.
515  if (match(Inst, m_Intrinsic<Intrinsic::assume>())) {
516  DEBUG(dbgs() << "EarlyCSE skipping assumption: " << *Inst << '\n');
517  continue;
518  }
519 
520  // If the instruction can be simplified (e.g. X+0 = X) then replace it with
521  // its simpler value.
522  if (Value *V = SimplifyInstruction(Inst, DL, &TLI, &DT, &AC)) {
523  DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << " to: " << *V << '\n');
524  Inst->replaceAllUsesWith(V);
525  Inst->eraseFromParent();
526  Changed = true;
527  ++NumSimplify;
528  continue;
529  }
530 
531  // If this is a simple instruction that we can value number, process it.
532  if (SimpleValue::canHandle(Inst)) {
533  // See if the instruction has an available value. If so, use it.
534  if (Value *V = AvailableValues.lookup(Inst)) {
535  DEBUG(dbgs() << "EarlyCSE CSE: " << *Inst << " to: " << *V << '\n');
536  Inst->replaceAllUsesWith(V);
537  Inst->eraseFromParent();
538  Changed = true;
539  ++NumCSE;
540  continue;
541  }
542 
543  // Otherwise, just remember that this value is available.
544  AvailableValues.insert(Inst, Inst);
545  continue;
546  }
547 
548  ParseMemoryInst MemInst(Inst, TTI);
549  // If this is a non-volatile load, process it.
550  if (MemInst.isValid() && MemInst.isLoad()) {
551  // Ignore volatile loads.
552  if (MemInst.isVolatile()) {
553  LastStore = nullptr;
554  // Don't CSE across synchronization boundaries.
555  if (Inst->mayWriteToMemory())
556  ++CurrentGeneration;
557  continue;
558  }
559 
560  // If we have an available version of this load, and if it is the right
561  // generation, replace this instruction.
562  std::pair<Value *, unsigned> InVal =
563  AvailableLoads.lookup(MemInst.getPtr());
564  if (InVal.first != nullptr && InVal.second == CurrentGeneration) {
565  Value *Op = getOrCreateResult(InVal.first, Inst->getType());
566  if (Op != nullptr) {
567  DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst
568  << " to: " << *InVal.first << '\n');
569  if (!Inst->use_empty())
570  Inst->replaceAllUsesWith(Op);
571  Inst->eraseFromParent();
572  Changed = true;
573  ++NumCSELoad;
574  continue;
575  }
576  }
577 
578  // Otherwise, remember that we have this instruction.
579  AvailableLoads.insert(MemInst.getPtr(), std::pair<Value *, unsigned>(
580  Inst, CurrentGeneration));
581  LastStore = nullptr;
582  continue;
583  }
584 
585  // If this instruction may read from memory, forget LastStore.
586  // Load/store intrinsics will indicate both a read and a write to
587  // memory. The target may override this (e.g. so that a store intrinsic
588  // does not read from memory, and thus will be treated the same as a
589  // regular store for commoning purposes).
590  if (Inst->mayReadFromMemory() &&
591  !(MemInst.isValid() && !MemInst.mayReadFromMemory()))
592  LastStore = nullptr;
593 
594  // If this is a read-only call, process it.
595  if (CallValue::canHandle(Inst)) {
596  // If we have an available version of this call, and if it is the right
597  // generation, replace this instruction.
598  std::pair<Value *, unsigned> InVal = AvailableCalls.lookup(Inst);
599  if (InVal.first != nullptr && InVal.second == CurrentGeneration) {
600  DEBUG(dbgs() << "EarlyCSE CSE CALL: " << *Inst
601  << " to: " << *InVal.first << '\n');
602  if (!Inst->use_empty())
603  Inst->replaceAllUsesWith(InVal.first);
604  Inst->eraseFromParent();
605  Changed = true;
606  ++NumCSECall;
607  continue;
608  }
609 
610  // Otherwise, remember that we have this instruction.
611  AvailableCalls.insert(
612  Inst, std::pair<Value *, unsigned>(Inst, CurrentGeneration));
613  continue;
614  }
615 
616  // Okay, this isn't something we can CSE at all. Check to see if it is
617  // something that could modify memory. If so, our available memory values
618  // cannot be used so bump the generation count.
619  if (Inst->mayWriteToMemory()) {
620  ++CurrentGeneration;
621 
622  if (MemInst.isValid() && MemInst.isStore()) {
623  // We do a trivial form of DSE if there are two stores to the same
624  // location with no intervening loads. Delete the earlier store.
625  if (LastStore) {
626  ParseMemoryInst LastStoreMemInst(LastStore, TTI);
627  if (LastStoreMemInst.isMatchingMemLoc(MemInst)) {
628  DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore
629  << " due to: " << *Inst << '\n');
630  LastStore->eraseFromParent();
631  Changed = true;
632  ++NumDSE;
633  LastStore = nullptr;
634  }
635  // fallthrough - we can exploit information about this store
636  }
637 
638  // Okay, we just invalidated anything we knew about loaded values. Try
639  // to salvage *something* by remembering that the stored value is a live
640  // version of the pointer. It is safe to forward from volatile stores
641  // to non-volatile loads, so we don't have to check for volatility of
642  // the store.
643  AvailableLoads.insert(MemInst.getPtr(), std::pair<Value *, unsigned>(
644  Inst, CurrentGeneration));
645 
646  // Remember that this was the last store we saw for DSE.
647  if (!MemInst.isVolatile())
648  LastStore = Inst;
649  }
650  }
651  }
652 
653  return Changed;
654 }
655 
656 bool EarlyCSE::run() {
657  // Note, deque is being used here because there is significant performance
658  // gains over vector when the container becomes very large due to the
659  // specific access patterns. For more information see the mailing list
660  // discussion on this:
661  // http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20120116/135228.html
662  std::deque<StackNode *> nodesToProcess;
663 
664  bool Changed = false;
665 
666  // Process the root node.
667  nodesToProcess.push_back(new StackNode(
668  AvailableValues, AvailableLoads, AvailableCalls, CurrentGeneration,
669  DT.getRootNode(), DT.getRootNode()->begin(), DT.getRootNode()->end()));
670 
671  // Save the current generation.
672  unsigned LiveOutGeneration = CurrentGeneration;
673 
674  // Process the stack.
675  while (!nodesToProcess.empty()) {
676  // Grab the first item off the stack. Set the current generation, remove
677  // the node from the stack, and process it.
678  StackNode *NodeToProcess = nodesToProcess.back();
679 
680  // Initialize class members.
681  CurrentGeneration = NodeToProcess->currentGeneration();
682 
683  // Check if the node needs to be processed.
684  if (!NodeToProcess->isProcessed()) {
685  // Process the node.
686  Changed |= processNode(NodeToProcess->node());
687  NodeToProcess->childGeneration(CurrentGeneration);
688  NodeToProcess->process();
689  } else if (NodeToProcess->childIter() != NodeToProcess->end()) {
690  // Push the next child onto the stack.
691  DomTreeNode *child = NodeToProcess->nextChild();
692  nodesToProcess.push_back(
693  new StackNode(AvailableValues, AvailableLoads, AvailableCalls,
694  NodeToProcess->childGeneration(), child, child->begin(),
695  child->end()));
696  } else {
697  // It has been processed, and there are no more children to process,
698  // so delete it and pop it off the stack.
699  delete NodeToProcess;
700  nodesToProcess.pop_back();
701  }
702  } // while (!nodes...)
703 
704  // Reset the current generation.
705  CurrentGeneration = LiveOutGeneration;
706 
707  return Changed;
708 }
709 
712  auto &TLI = AM->getResult<TargetLibraryAnalysis>(F);
713  auto &TTI = AM->getResult<TargetIRAnalysis>(F);
714  auto &DT = AM->getResult<DominatorTreeAnalysis>(F);
715  auto &AC = AM->getResult<AssumptionAnalysis>(F);
716 
717  EarlyCSE CSE(F, TLI, TTI, DT, AC);
718 
719  if (!CSE.run())
720  return PreservedAnalyses::all();
721 
722  // CSE preserves the dominator tree because it doesn't mutate the CFG.
723  // FIXME: Bundle this with other CFG-preservation.
726  return PA;
727 }
728 
729 namespace {
730 /// \brief A simple and fast domtree-based CSE pass.
731 ///
732 /// This pass does a simple depth-first walk over the dominator tree,
733 /// eliminating trivially redundant instructions and using instsimplify to
734 /// canonicalize things as it goes. It is intended to be fast and catch obvious
735 /// cases so that instcombine and other passes are more effective. It is
736 /// expected that a later pass of GVN will catch the interesting/hard cases.
737 class EarlyCSELegacyPass : public FunctionPass {
738 public:
739  static char ID;
740 
741  EarlyCSELegacyPass() : FunctionPass(ID) {
743  }
744 
745  bool runOnFunction(Function &F) override {
746  if (skipOptnoneFunction(F))
747  return false;
748 
749  auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
750  auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
751  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
752  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
753 
754  EarlyCSE CSE(F, TLI, TTI, DT, AC);
755 
756  return CSE.run();
757  }
758 
759  void getAnalysisUsage(AnalysisUsage &AU) const override {
764  AU.setPreservesCFG();
765  }
766 };
767 }
768 
769 char EarlyCSELegacyPass::ID = 0;
770 
771 FunctionPass *llvm::createEarlyCSEPass() { return new EarlyCSELegacyPass(); }
772 
773 INITIALIZE_PASS_BEGIN(EarlyCSELegacyPass, "early-cse", "Early CSE", false,
774  false)
779 INITIALIZE_PASS_END(EarlyCSELegacyPass, "early-cse", "Early CSE", false, false)
void initializeEarlyCSELegacyPassPass(PassRegistry &)
iplist< Instruction >::iterator eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing basic block and deletes it...
Definition: Instruction.cpp:70
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:104
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:240
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:537
static SimpleValue getTombstoneKey()
Definition: EarlyCSE.cpp:82
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:679
ExtractValueInst - This instruction extracts a struct member or array element value from an aggregate...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
STATISTIC(NumFunctions,"Total number of functions")
value_op_iterator value_op_begin()
Definition: User.h:209
CallInst - This class represents a function call, abstracting a target machine's calling convention...
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of .assume calls within a function.
Analysis pass providing the TargetTransformInfo.
static CallValue getTombstoneKey()
Definition: EarlyCSE.cpp:229
value_op_iterator value_op_end()
Definition: User.h:212
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:166
F(f)
LoadInst - an instruction for reading from memory.
Definition: Instructions.h:177
unsigned replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT, const BasicBlockEdge &Edge)
Replace each use of 'From' with 'To' if that use is dominated by the given edge.
Definition: Local.cpp:1328
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:188
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:231
bool onlyReadsMemory() const
Determine if the call does not access or only reads memory.
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:41
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:70
bool isIdenticalTo(const Instruction *I) const
isIdenticalTo - Return true if the specified instruction is exactly identical to the current one...
std::vector< DomTreeNodeBase< NodeT > * >::iterator iterator
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:389
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr it the function does no...
Definition: BasicBlock.cpp:116
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:75
separate const offset from Split GEPs to a variadic base and a constant offset for better CSE
static CallValue getEmptyKey()
Definition: EarlyCSE.cpp:226
RecyclingAllocator - This class wraps an Allocator, adding the functionality of recycling deleted obj...
#define false
Definition: ConvertUTF.c:65
bool mayReadFromMemory() const
mayReadFromMemory - Return true if this instruction may read memory.
This file provides the interface for a simple, fast CSE pass.
machine cse
Definition: MachineCSE.cpp:115
Base class for the actual dominator tree node.
StoreInst - an instruction for storing to memory.
Definition: Instructions.h:316
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:351
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:67
BumpPtrAllocatorImpl BumpPtrAllocator
The standard BumpPtrAllocator which just uses the default template paramaters.
Definition: Allocator.h:342
Wrapper pass for TargetTransformInfo.
An abstract set of preserved analyses following a transformation pass run.
Definition: PassManager.h:69
LLVM Basic Block Representation.
Definition: BasicBlock.h:65
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
static SimpleValue getEmptyKey()
Definition: EarlyCSE.cpp:79
Represent the analysis usage information of a pass.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:697
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:294
Value * getOperand(unsigned i) const
Definition: User.h:118
FunctionPass * createEarlyCSEPass()
Definition: EarlyCSE.cpp:771
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:760
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:91
PreservedAnalyses run(Function &F, AnalysisManager< Function > *AM)
Run the pass over the function.
Definition: EarlyCSE.cpp:710
bool hasNoSignedWrap() const
Determine whether the no signed wrap flag is set.
bool mayWriteToMemory() const
mayWriteToMemory - Return true if this instruction may modify memory.
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) const
A function analysis which provides an AssumptionCache.
static const char * getPtr(const MachOObjectFile *O, size_t Offset)
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
iterator end()
Definition: BasicBlock.h:233
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:222
Provides information about what library functions are available for the current target.
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:67
PassT::Result & getResult(IRUnitT &IR)
Get the result of an analysis pass for this module.
Definition: PassManager.h:311
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:263
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:530
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:123
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:576
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition: Hashing.h:603
BasicBlock * getSinglePredecessor()
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:211
NodeT * getBlock() const
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:285
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition: Hashing.h:481
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:372
#define I(x, y, z)
Definition: MD5.cpp:54
void preserve()
Mark a particular pass as preserved, adding it to the set.
Definition: PassManager.h:98
Analysis pass providing the TargetLibraryInfo.
bool use_empty() const
Definition: Value.h:275
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:32
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
isInstructionTriviallyDead - Return true if the result produced by the instruction is not used...
Definition: Local.cpp:282
LLVM Value Representation.
Definition: Value.h:69
bool hasNoUnsignedWrap() const
Determine whether the no unsigned wrap flag is set.
unsigned getOpcode() const
getOpcode() returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:112
#define DEBUG(X)
Definition: Debug.h:92
A generic analysis pass manager with lazy running and caching of results.
Value * SimplifyInstruction(Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const DominatorTree *DT=nullptr, AssumptionCache *AC=nullptr)
SimplifyInstruction - See if we can compute a simplified version of this instruction.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:203
This pass exposes codegen information to IR-level passes.
static bool isVolatile(Instruction *Inst)
Information about a load/store intrinsic defined by the target.
IntrinsicInst - A useful wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:37
bool isVoidTy() const
isVoidTy - Return true if this is 'void'.
Definition: Type.h:137
InsertValueInst - This instruction inserts a struct field of array element value into an aggregate va...