LLVM  16.0.0git
EarlyCSE.cpp
Go to the documentation of this file.
1 //===- EarlyCSE.cpp - Simple and fast CSE pass ----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass performs a simple dominator tree walk that eliminates trivially
10 // redundant instructions.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/DenseMapInfo.h"
16 #include "llvm/ADT/Hashing.h"
17 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/Statistic.h"
30 #include "llvm/IR/BasicBlock.h"
31 #include "llvm/IR/Constants.h"
32 #include "llvm/IR/Dominators.h"
33 #include "llvm/IR/Function.h"
34 #include "llvm/IR/InstrTypes.h"
35 #include "llvm/IR/Instruction.h"
36 #include "llvm/IR/Instructions.h"
37 #include "llvm/IR/IntrinsicInst.h"
38 #include "llvm/IR/LLVMContext.h"
39 #include "llvm/IR/PassManager.h"
40 #include "llvm/IR/PatternMatch.h"
41 #include "llvm/IR/Type.h"
42 #include "llvm/IR/Value.h"
43 #include "llvm/InitializePasses.h"
44 #include "llvm/Pass.h"
45 #include "llvm/Support/Allocator.h"
47 #include "llvm/Support/Casting.h"
48 #include "llvm/Support/Debug.h"
52 #include "llvm/Transforms/Scalar.h"
55 #include <cassert>
56 #include <deque>
57 #include <memory>
58 #include <utility>
59 
60 using namespace llvm;
61 using namespace llvm::PatternMatch;
62 
63 #define DEBUG_TYPE "early-cse"
64 
65 STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd");
66 STATISTIC(NumCSE, "Number of instructions CSE'd");
67 STATISTIC(NumCSECVP, "Number of compare instructions CVP'd");
68 STATISTIC(NumCSELoad, "Number of load instructions CSE'd");
69 STATISTIC(NumCSECall, "Number of call instructions CSE'd");
70 STATISTIC(NumDSE, "Number of trivial dead stores removed");
71 
72 DEBUG_COUNTER(CSECounter, "early-cse",
73  "Controls which instructions are removed");
74 
76  "earlycse-mssa-optimization-cap", cl::init(500), cl::Hidden,
77  cl::desc("Enable imprecision in EarlyCSE in pathological cases, in exchange "
78  "for faster compile. Caps the MemorySSA clobbering calls."));
79 
81  "earlycse-debug-hash", cl::init(false), cl::Hidden,
82  cl::desc("Perform extra assertion checking to verify that SimpleValue's hash "
83  "function is well-behaved w.r.t. its isEqual predicate"));
84 
85 //===----------------------------------------------------------------------===//
86 // SimpleValue
87 //===----------------------------------------------------------------------===//
88 
89 namespace {
90 
91 /// Struct representing the available values in the scoped hash table.
92 struct SimpleValue {
93  Instruction *Inst;
94 
95  SimpleValue(Instruction *I) : Inst(I) {
96  assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
97  }
98 
99  bool isSentinel() const {
100  return Inst == DenseMapInfo<Instruction *>::getEmptyKey() ||
102  }
103 
104  static bool canHandle(Instruction *Inst) {
105  // This can only handle non-void readnone functions.
106  // Also handled are constrained intrinsic that look like the types
107  // of instruction handled below (UnaryOperator, etc.).
108  if (CallInst *CI = dyn_cast<CallInst>(Inst)) {
109  if (Function *F = CI->getCalledFunction()) {
110  switch ((Intrinsic::ID)F->getIntrinsicID()) {
111  case Intrinsic::experimental_constrained_fadd:
112  case Intrinsic::experimental_constrained_fsub:
113  case Intrinsic::experimental_constrained_fmul:
114  case Intrinsic::experimental_constrained_fdiv:
115  case Intrinsic::experimental_constrained_frem:
116  case Intrinsic::experimental_constrained_fptosi:
117  case Intrinsic::experimental_constrained_sitofp:
118  case Intrinsic::experimental_constrained_fptoui:
119  case Intrinsic::experimental_constrained_uitofp:
120  case Intrinsic::experimental_constrained_fcmp:
121  case Intrinsic::experimental_constrained_fcmps: {
122  auto *CFP = cast<ConstrainedFPIntrinsic>(CI);
123  if (CFP->getExceptionBehavior() &&
124  CFP->getExceptionBehavior() == fp::ebStrict)
125  return false;
126  // Since we CSE across function calls we must not allow
127  // the rounding mode to change.
128  if (CFP->getRoundingMode() &&
129  CFP->getRoundingMode() == RoundingMode::Dynamic)
130  return false;
131  return true;
132  }
133  }
134  }
135  return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy() &&
136  // FIXME: Currently the calls which may access the thread id may
137  // be considered as not accessing the memory. But this is
138  // problematic for coroutines, since coroutines may resume in a
139  // different thread. So we disable the optimization here for the
140  // correctness. However, it may block many other correct
141  // optimizations. Revert this one when we detect the memory
142  // accessing kind more precisely.
143  !CI->getFunction()->isPresplitCoroutine();
144  }
145  return isa<CastInst>(Inst) || isa<UnaryOperator>(Inst) ||
146  isa<BinaryOperator>(Inst) || isa<GetElementPtrInst>(Inst) ||
147  isa<CmpInst>(Inst) || isa<SelectInst>(Inst) ||
148  isa<ExtractElementInst>(Inst) || isa<InsertElementInst>(Inst) ||
149  isa<ShuffleVectorInst>(Inst) || isa<ExtractValueInst>(Inst) ||
150  isa<InsertValueInst>(Inst) || isa<FreezeInst>(Inst);
151  }
152 };
153 
154 } // end anonymous namespace
155 
156 namespace llvm {
157 
158 template <> struct DenseMapInfo<SimpleValue> {
159  static inline SimpleValue getEmptyKey() {
161  }
162 
163  static inline SimpleValue getTombstoneKey() {
165  }
166 
167  static unsigned getHashValue(SimpleValue Val);
168  static bool isEqual(SimpleValue LHS, SimpleValue RHS);
169 };
170 
171 } // end namespace llvm
172 
173 /// Match a 'select' including an optional 'not's of the condition.
175  Value *&B,
176  SelectPatternFlavor &Flavor) {
177  // Return false if V is not even a select.
178  if (!match(V, m_Select(m_Value(Cond), m_Value(A), m_Value(B))))
179  return false;
180 
181  // Look through a 'not' of the condition operand by swapping A/B.
182  Value *CondNot;
183  if (match(Cond, m_Not(m_Value(CondNot)))) {
184  Cond = CondNot;
185  std::swap(A, B);
186  }
187 
188  // Match canonical forms of min/max. We are not using ValueTracking's
189  // more powerful matchSelectPattern() because it may rely on instruction flags
190  // such as "nsw". That would be incompatible with the current hashing
191  // mechanism that may remove flags to increase the likelihood of CSE.
192 
193  Flavor = SPF_UNKNOWN;
194  CmpInst::Predicate Pred;
195 
196  if (!match(Cond, m_ICmp(Pred, m_Specific(A), m_Specific(B)))) {
197  // Check for commuted variants of min/max by swapping predicate.
198  // If we do not match the standard or commuted patterns, this is not a
199  // recognized form of min/max, but it is still a select, so return true.
200  if (!match(Cond, m_ICmp(Pred, m_Specific(B), m_Specific(A))))
201  return true;
202  Pred = ICmpInst::getSwappedPredicate(Pred);
203  }
204 
205  switch (Pred) {
206  case CmpInst::ICMP_UGT: Flavor = SPF_UMAX; break;
207  case CmpInst::ICMP_ULT: Flavor = SPF_UMIN; break;
208  case CmpInst::ICMP_SGT: Flavor = SPF_SMAX; break;
209  case CmpInst::ICMP_SLT: Flavor = SPF_SMIN; break;
210  // Non-strict inequalities.
211  case CmpInst::ICMP_ULE: Flavor = SPF_UMIN; break;
212  case CmpInst::ICMP_UGE: Flavor = SPF_UMAX; break;
213  case CmpInst::ICMP_SLE: Flavor = SPF_SMIN; break;
214  case CmpInst::ICMP_SGE: Flavor = SPF_SMAX; break;
215  default: break;
216  }
217 
218  return true;
219 }
220 
221 static unsigned getHashValueImpl(SimpleValue Val) {
222  Instruction *Inst = Val.Inst;
223  // Hash in all of the operands as pointers.
224  if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst)) {
225  Value *LHS = BinOp->getOperand(0);
226  Value *RHS = BinOp->getOperand(1);
227  if (BinOp->isCommutative() && BinOp->getOperand(0) > BinOp->getOperand(1))
228  std::swap(LHS, RHS);
229 
230  return hash_combine(BinOp->getOpcode(), LHS, RHS);
231  }
232 
233  if (CmpInst *CI = dyn_cast<CmpInst>(Inst)) {
234  // Compares can be commuted by swapping the comparands and
235  // updating the predicate. Choose the form that has the
236  // comparands in sorted order, or in the case of a tie, the
237  // one with the lower predicate.
238  Value *LHS = CI->getOperand(0);
239  Value *RHS = CI->getOperand(1);
240  CmpInst::Predicate Pred = CI->getPredicate();
241  CmpInst::Predicate SwappedPred = CI->getSwappedPredicate();
242  if (std::tie(LHS, Pred) > std::tie(RHS, SwappedPred)) {
243  std::swap(LHS, RHS);
244  Pred = SwappedPred;
245  }
246  return hash_combine(Inst->getOpcode(), Pred, LHS, RHS);
247  }
248 
249  // Hash general selects to allow matching commuted true/false operands.
251  Value *Cond, *A, *B;
252  if (matchSelectWithOptionalNotCond(Inst, Cond, A, B, SPF)) {
253  // Hash min/max (cmp + select) to allow for commuted operands.
254  // Min/max may also have non-canonical compare predicate (eg, the compare for
255  // smin may use 'sgt' rather than 'slt'), and non-canonical operands in the
256  // compare.
257  // TODO: We should also detect FP min/max.
258  if (SPF == SPF_SMIN || SPF == SPF_SMAX ||
259  SPF == SPF_UMIN || SPF == SPF_UMAX) {
260  if (A > B)
261  std::swap(A, B);
262  return hash_combine(Inst->getOpcode(), SPF, A, B);
263  }
264 
265  // Hash general selects to allow matching commuted true/false operands.
266 
267  // If we do not have a compare as the condition, just hash in the condition.
268  CmpInst::Predicate Pred;
269  Value *X, *Y;
270  if (!match(Cond, m_Cmp(Pred, m_Value(X), m_Value(Y))))
271  return hash_combine(Inst->getOpcode(), Cond, A, B);
272 
273  // Similar to cmp normalization (above) - canonicalize the predicate value:
274  // select (icmp Pred, X, Y), A, B --> select (icmp InvPred, X, Y), B, A
275  if (CmpInst::getInversePredicate(Pred) < Pred) {
276  Pred = CmpInst::getInversePredicate(Pred);
277  std::swap(A, B);
278  }
279  return hash_combine(Inst->getOpcode(), Pred, X, Y, A, B);
280  }
281 
282  if (CastInst *CI = dyn_cast<CastInst>(Inst))
283  return hash_combine(CI->getOpcode(), CI->getType(), CI->getOperand(0));
284 
285  if (FreezeInst *FI = dyn_cast<FreezeInst>(Inst))
286  return hash_combine(FI->getOpcode(), FI->getOperand(0));
287 
288  if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Inst))
289  return hash_combine(EVI->getOpcode(), EVI->getOperand(0),
290  hash_combine_range(EVI->idx_begin(), EVI->idx_end()));
291 
292  if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(Inst))
293  return hash_combine(IVI->getOpcode(), IVI->getOperand(0),
294  IVI->getOperand(1),
295  hash_combine_range(IVI->idx_begin(), IVI->idx_end()));
296 
297  assert((isa<CallInst>(Inst) || isa<GetElementPtrInst>(Inst) ||
298  isa<ExtractElementInst>(Inst) || isa<InsertElementInst>(Inst) ||
299  isa<ShuffleVectorInst>(Inst) || isa<UnaryOperator>(Inst) ||
300  isa<FreezeInst>(Inst)) &&
301  "Invalid/unknown instruction");
302 
303  // Handle intrinsics with commutative operands.
304  // TODO: Extend this to handle intrinsics with >2 operands where the 1st
305  // 2 operands are commutative.
306  auto *II = dyn_cast<IntrinsicInst>(Inst);
307  if (II && II->isCommutative() && II->arg_size() == 2) {
308  Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
309  if (LHS > RHS)
310  std::swap(LHS, RHS);
311  return hash_combine(II->getOpcode(), LHS, RHS);
312  }
313 
314  // gc.relocate is 'special' call: its second and third operands are
315  // not real values, but indices into statepoint's argument list.
316  // Get values they point to.
317  if (const GCRelocateInst *GCR = dyn_cast<GCRelocateInst>(Inst))
318  return hash_combine(GCR->getOpcode(), GCR->getOperand(0),
319  GCR->getBasePtr(), GCR->getDerivedPtr());
320 
321  // Mix in the opcode.
322  return hash_combine(
323  Inst->getOpcode(),
325 }
326 
327 unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
328 #ifndef NDEBUG
329  // If -earlycse-debug-hash was specified, return a constant -- this
330  // will force all hashing to collide, so we'll exhaustively search
331  // the table for a match, and the assertion in isEqual will fire if
332  // there's a bug causing equal keys to hash differently.
333  if (EarlyCSEDebugHash)
334  return 0;
335 #endif
336  return getHashValueImpl(Val);
337 }
338 
339 static bool isEqualImpl(SimpleValue LHS, SimpleValue RHS) {
340  Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
341 
342  if (LHS.isSentinel() || RHS.isSentinel())
343  return LHSI == RHSI;
344 
345  if (LHSI->getOpcode() != RHSI->getOpcode())
346  return false;
347  if (LHSI->isIdenticalToWhenDefined(RHSI))
348  return true;
349 
350  // If we're not strictly identical, we still might be a commutable instruction
351  if (BinaryOperator *LHSBinOp = dyn_cast<BinaryOperator>(LHSI)) {
352  if (!LHSBinOp->isCommutative())
353  return false;
354 
355  assert(isa<BinaryOperator>(RHSI) &&
356  "same opcode, but different instruction type?");
357  BinaryOperator *RHSBinOp = cast<BinaryOperator>(RHSI);
358 
359  // Commuted equality
360  return LHSBinOp->getOperand(0) == RHSBinOp->getOperand(1) &&
361  LHSBinOp->getOperand(1) == RHSBinOp->getOperand(0);
362  }
363  if (CmpInst *LHSCmp = dyn_cast<CmpInst>(LHSI)) {
364  assert(isa<CmpInst>(RHSI) &&
365  "same opcode, but different instruction type?");
366  CmpInst *RHSCmp = cast<CmpInst>(RHSI);
367  // Commuted equality
368  return LHSCmp->getOperand(0) == RHSCmp->getOperand(1) &&
369  LHSCmp->getOperand(1) == RHSCmp->getOperand(0) &&
370  LHSCmp->getSwappedPredicate() == RHSCmp->getPredicate();
371  }
372 
373  // TODO: Extend this for >2 args by matching the trailing N-2 args.
374  auto *LII = dyn_cast<IntrinsicInst>(LHSI);
375  auto *RII = dyn_cast<IntrinsicInst>(RHSI);
376  if (LII && RII && LII->getIntrinsicID() == RII->getIntrinsicID() &&
377  LII->isCommutative() && LII->arg_size() == 2) {
378  return LII->getArgOperand(0) == RII->getArgOperand(1) &&
379  LII->getArgOperand(1) == RII->getArgOperand(0);
380  }
381 
382  // See comment above in `getHashValue()`.
383  if (const GCRelocateInst *GCR1 = dyn_cast<GCRelocateInst>(LHSI))
384  if (const GCRelocateInst *GCR2 = dyn_cast<GCRelocateInst>(RHSI))
385  return GCR1->getOperand(0) == GCR2->getOperand(0) &&
386  GCR1->getBasePtr() == GCR2->getBasePtr() &&
387  GCR1->getDerivedPtr() == GCR2->getDerivedPtr();
388 
389  // Min/max can occur with commuted operands, non-canonical predicates,
390  // and/or non-canonical operands.
391  // Selects can be non-trivially equivalent via inverted conditions and swaps.
392  SelectPatternFlavor LSPF, RSPF;
393  Value *CondL, *CondR, *LHSA, *RHSA, *LHSB, *RHSB;
394  if (matchSelectWithOptionalNotCond(LHSI, CondL, LHSA, LHSB, LSPF) &&
395  matchSelectWithOptionalNotCond(RHSI, CondR, RHSA, RHSB, RSPF)) {
396  if (LSPF == RSPF) {
397  // TODO: We should also detect FP min/max.
398  if (LSPF == SPF_SMIN || LSPF == SPF_SMAX ||
399  LSPF == SPF_UMIN || LSPF == SPF_UMAX)
400  return ((LHSA == RHSA && LHSB == RHSB) ||
401  (LHSA == RHSB && LHSB == RHSA));
402 
403  // select Cond, A, B <--> select not(Cond), B, A
404  if (CondL == CondR && LHSA == RHSA && LHSB == RHSB)
405  return true;
406  }
407 
408  // If the true/false operands are swapped and the conditions are compares
409  // with inverted predicates, the selects are equal:
410  // select (icmp Pred, X, Y), A, B <--> select (icmp InvPred, X, Y), B, A
411  //
412  // This also handles patterns with a double-negation in the sense of not +
413  // inverse, because we looked through a 'not' in the matching function and
414  // swapped A/B:
415  // select (cmp Pred, X, Y), A, B <--> select (not (cmp InvPred, X, Y)), B, A
416  //
417  // This intentionally does NOT handle patterns with a double-negation in
418  // the sense of not + not, because doing so could result in values
419  // comparing
420  // as equal that hash differently in the min/max cases like:
421  // select (cmp slt, X, Y), X, Y <--> select (not (not (cmp slt, X, Y))), X, Y
422  // ^ hashes as min ^ would not hash as min
423  // In the context of the EarlyCSE pass, however, such cases never reach
424  // this code, as we simplify the double-negation before hashing the second
425  // select (and so still succeed at CSEing them).
426  if (LHSA == RHSB && LHSB == RHSA) {
427  CmpInst::Predicate PredL, PredR;
428  Value *X, *Y;
429  if (match(CondL, m_Cmp(PredL, m_Value(X), m_Value(Y))) &&
430  match(CondR, m_Cmp(PredR, m_Specific(X), m_Specific(Y))) &&
431  CmpInst::getInversePredicate(PredL) == PredR)
432  return true;
433  }
434  }
435 
436  return false;
437 }
438 
439 bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
440  // These comparisons are nontrivial, so assert that equality implies
441  // hash equality (DenseMap demands this as an invariant).
442  bool Result = isEqualImpl(LHS, RHS);
443  assert(!Result || (LHS.isSentinel() && LHS.Inst == RHS.Inst) ||
445  return Result;
446 }
447 
448 //===----------------------------------------------------------------------===//
449 // CallValue
450 //===----------------------------------------------------------------------===//
451 
452 namespace {
453 
454 /// Struct representing the available call values in the scoped hash
455 /// table.
456 struct CallValue {
457  Instruction *Inst;
458 
459  CallValue(Instruction *I) : Inst(I) {
460  assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
461  }
462 
463  bool isSentinel() const {
464  return Inst == DenseMapInfo<Instruction *>::getEmptyKey() ||
466  }
467 
468  static bool canHandle(Instruction *Inst) {
469  // Don't value number anything that returns void.
470  if (Inst->getType()->isVoidTy())
471  return false;
472 
473  CallInst *CI = dyn_cast<CallInst>(Inst);
474  if (!CI || !CI->onlyReadsMemory() ||
475  // FIXME: Currently the calls which may access the thread id may
476  // be considered as not accessing the memory. But this is
477  // problematic for coroutines, since coroutines may resume in a
478  // different thread. So we disable the optimization here for the
479  // correctness. However, it may block many other correct
480  // optimizations. Revert this one when we detect the memory
481  // accessing kind more precisely.
483  return false;
484  return true;
485  }
486 };
487 
488 } // end anonymous namespace
489 
490 namespace llvm {
491 
492 template <> struct DenseMapInfo<CallValue> {
493  static inline CallValue getEmptyKey() {
495  }
496 
497  static inline CallValue getTombstoneKey() {
499  }
500 
501  static unsigned getHashValue(CallValue Val);
502  static bool isEqual(CallValue LHS, CallValue RHS);
503 };
504 
505 } // end namespace llvm
506 
507 unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {
508  Instruction *Inst = Val.Inst;
509 
510  // Hash all of the operands as pointers and mix in the opcode.
511  return hash_combine(
512  Inst->getOpcode(),
514 }
515 
516 bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
517  Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
518  if (LHS.isSentinel() || RHS.isSentinel())
519  return LHSI == RHSI;
520 
521  return LHSI->isIdenticalTo(RHSI);
522 }
523 
524 //===----------------------------------------------------------------------===//
525 // EarlyCSE implementation
526 //===----------------------------------------------------------------------===//
527 
528 namespace {
529 
530 /// A simple and fast domtree-based CSE pass.
531 ///
532 /// This pass does a simple depth-first walk over the dominator tree,
533 /// eliminating trivially redundant instructions and using instsimplify to
534 /// canonicalize things as it goes. It is intended to be fast and catch obvious
535 /// cases so that instcombine and other passes are more effective. It is
536 /// expected that a later pass of GVN will catch the interesting/hard cases.
537 class EarlyCSE {
538 public:
539  const TargetLibraryInfo &TLI;
540  const TargetTransformInfo &TTI;
541  DominatorTree &DT;
542  AssumptionCache &AC;
543  const SimplifyQuery SQ;
544  MemorySSA *MSSA;
545  std::unique_ptr<MemorySSAUpdater> MSSAUpdater;
546 
547  using AllocatorTy =
550  using ScopedHTType =
552  AllocatorTy>;
553 
554  /// A scoped hash table of the current values of all of our simple
555  /// scalar expressions.
556  ///
557  /// As we walk down the domtree, we look to see if instructions are in this:
558  /// if so, we replace them with what we find, otherwise we insert them so
559  /// that dominated values can succeed in their lookup.
560  ScopedHTType AvailableValues;
561 
562  /// A scoped hash table of the current values of previously encountered
563  /// memory locations.
564  ///
565  /// This allows us to get efficient access to dominating loads or stores when
566  /// we have a fully redundant load. In addition to the most recent load, we
567  /// keep track of a generation count of the read, which is compared against
568  /// the current generation count. The current generation count is incremented
569  /// after every possibly writing memory operation, which ensures that we only
570  /// CSE loads with other loads that have no intervening store. Ordering
571  /// events (such as fences or atomic instructions) increment the generation
572  /// count as well; essentially, we model these as writes to all possible
573  /// locations. Note that atomic and/or volatile loads and stores can be
574  /// present the table; it is the responsibility of the consumer to inspect
575  /// the atomicity/volatility if needed.
576  struct LoadValue {
577  Instruction *DefInst = nullptr;
578  unsigned Generation = 0;
579  int MatchingId = -1;
580  bool IsAtomic = false;
581 
582  LoadValue() = default;
583  LoadValue(Instruction *Inst, unsigned Generation, unsigned MatchingId,
584  bool IsAtomic)
585  : DefInst(Inst), Generation(Generation), MatchingId(MatchingId),
586  IsAtomic(IsAtomic) {}
587  };
588 
589  using LoadMapAllocator =
592  using LoadHTType =
594  LoadMapAllocator>;
595 
596  LoadHTType AvailableLoads;
597 
598  // A scoped hash table mapping memory locations (represented as typed
599  // addresses) to generation numbers at which that memory location became
600  // (henceforth indefinitely) invariant.
601  using InvariantMapAllocator =
604  using InvariantHTType =
606  InvariantMapAllocator>;
607  InvariantHTType AvailableInvariants;
608 
609  /// A scoped hash table of the current values of read-only call
610  /// values.
611  ///
612  /// It uses the same generation count as loads.
613  using CallHTType =
615  CallHTType AvailableCalls;
616 
617  /// This is the current generation of the memory value.
618  unsigned CurrentGeneration = 0;
619 
620  /// Set up the EarlyCSE runner for a particular function.
621  EarlyCSE(const DataLayout &DL, const TargetLibraryInfo &TLI,
623  AssumptionCache &AC, MemorySSA *MSSA)
624  : TLI(TLI), TTI(TTI), DT(DT), AC(AC), SQ(DL, &TLI, &DT, &AC), MSSA(MSSA),
625  MSSAUpdater(std::make_unique<MemorySSAUpdater>(MSSA)) {}
626 
627  bool run();
628 
629 private:
630  unsigned ClobberCounter = 0;
631  // Almost a POD, but needs to call the constructors for the scoped hash
632  // tables so that a new scope gets pushed on. These are RAII so that the
633  // scope gets popped when the NodeScope is destroyed.
634  class NodeScope {
635  public:
636  NodeScope(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
637  InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls)
638  : Scope(AvailableValues), LoadScope(AvailableLoads),
639  InvariantScope(AvailableInvariants), CallScope(AvailableCalls) {}
640  NodeScope(const NodeScope &) = delete;
641  NodeScope &operator=(const NodeScope &) = delete;
642 
643  private:
644  ScopedHTType::ScopeTy Scope;
645  LoadHTType::ScopeTy LoadScope;
646  InvariantHTType::ScopeTy InvariantScope;
647  CallHTType::ScopeTy CallScope;
648  };
649 
650  // Contains all the needed information to create a stack for doing a depth
651  // first traversal of the tree. This includes scopes for values, loads, and
652  // calls as well as the generation. There is a child iterator so that the
653  // children do not need to be store separately.
654  class StackNode {
655  public:
656  StackNode(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
657  InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls,
658  unsigned cg, DomTreeNode *n, DomTreeNode::const_iterator child,
660  : CurrentGeneration(cg), ChildGeneration(cg), Node(n), ChildIter(child),
661  EndIter(end),
662  Scopes(AvailableValues, AvailableLoads, AvailableInvariants,
663  AvailableCalls)
664  {}
665  StackNode(const StackNode &) = delete;
666  StackNode &operator=(const StackNode &) = delete;
667 
668  // Accessors.
669  unsigned currentGeneration() const { return CurrentGeneration; }
670  unsigned childGeneration() const { return ChildGeneration; }
671  void childGeneration(unsigned generation) { ChildGeneration = generation; }
672  DomTreeNode *node() { return Node; }
673  DomTreeNode::const_iterator childIter() const { return ChildIter; }
674 
675  DomTreeNode *nextChild() {
676  DomTreeNode *child = *ChildIter;
677  ++ChildIter;
678  return child;
679  }
680 
681  DomTreeNode::const_iterator end() const { return EndIter; }
682  bool isProcessed() const { return Processed; }
683  void process() { Processed = true; }
684 
685  private:
686  unsigned CurrentGeneration;
687  unsigned ChildGeneration;
688  DomTreeNode *Node;
689  DomTreeNode::const_iterator ChildIter;
691  NodeScope Scopes;
692  bool Processed = false;
693  };
694 
695  /// Wrapper class to handle memory instructions, including loads,
696  /// stores and intrinsic loads and stores defined by the target.
697  class ParseMemoryInst {
698  public:
699  ParseMemoryInst(Instruction *Inst, const TargetTransformInfo &TTI)
700  : Inst(Inst) {
701  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
702  IntrID = II->getIntrinsicID();
703  if (TTI.getTgtMemIntrinsic(II, Info))
704  return;
705  if (isHandledNonTargetIntrinsic(IntrID)) {
706  switch (IntrID) {
707  case Intrinsic::masked_load:
708  Info.PtrVal = Inst->getOperand(0);
709  Info.MatchingId = Intrinsic::masked_load;
710  Info.ReadMem = true;
711  Info.WriteMem = false;
712  Info.IsVolatile = false;
713  break;
714  case Intrinsic::masked_store:
715  Info.PtrVal = Inst->getOperand(1);
716  // Use the ID of masked load as the "matching id". This will
717  // prevent matching non-masked loads/stores with masked ones
718  // (which could be done), but at the moment, the code here
719  // does not support matching intrinsics with non-intrinsics,
720  // so keep the MatchingIds specific to masked instructions
721  // for now (TODO).
722  Info.MatchingId = Intrinsic::masked_load;
723  Info.ReadMem = false;
724  Info.WriteMem = true;
725  Info.IsVolatile = false;
726  break;
727  }
728  }
729  }
730  }
731 
732  Instruction *get() { return Inst; }
733  const Instruction *get() const { return Inst; }
734 
735  bool isLoad() const {
736  if (IntrID != 0)
737  return Info.ReadMem;
738  return isa<LoadInst>(Inst);
739  }
740 
741  bool isStore() const {
742  if (IntrID != 0)
743  return Info.WriteMem;
744  return isa<StoreInst>(Inst);
745  }
746 
747  bool isAtomic() const {
748  if (IntrID != 0)
749  return Info.Ordering != AtomicOrdering::NotAtomic;
750  return Inst->isAtomic();
751  }
752 
753  bool isUnordered() const {
754  if (IntrID != 0)
755  return Info.isUnordered();
756 
757  if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
758  return LI->isUnordered();
759  } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
760  return SI->isUnordered();
761  }
762  // Conservative answer
763  return !Inst->isAtomic();
764  }
765 
766  bool isVolatile() const {
767  if (IntrID != 0)
768  return Info.IsVolatile;
769 
770  if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
771  return LI->isVolatile();
772  } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
773  return SI->isVolatile();
774  }
775  // Conservative answer
776  return true;
777  }
778 
779  bool isInvariantLoad() const {
780  if (auto *LI = dyn_cast<LoadInst>(Inst))
781  return LI->hasMetadata(LLVMContext::MD_invariant_load);
782  return false;
783  }
784 
785  bool isValid() const { return getPointerOperand() != nullptr; }
786 
787  // For regular (non-intrinsic) loads/stores, this is set to -1. For
788  // intrinsic loads/stores, the id is retrieved from the corresponding
789  // field in the MemIntrinsicInfo structure. That field contains
790  // non-negative values only.
791  int getMatchingId() const {
792  if (IntrID != 0)
793  return Info.MatchingId;
794  return -1;
795  }
796 
797  Value *getPointerOperand() const {
798  if (IntrID != 0)
799  return Info.PtrVal;
800  return getLoadStorePointerOperand(Inst);
801  }
802 
803  Type *getValueType() const {
804  // TODO: handle target-specific intrinsics.
805  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
806  switch (II->getIntrinsicID()) {
807  case Intrinsic::masked_load:
808  return II->getType();
809  case Intrinsic::masked_store:
810  return II->getArgOperand(0)->getType();
811  default:
812  return nullptr;
813  }
814  }
815  return getLoadStoreType(Inst);
816  }
817 
818  bool mayReadFromMemory() const {
819  if (IntrID != 0)
820  return Info.ReadMem;
821  return Inst->mayReadFromMemory();
822  }
823 
824  bool mayWriteToMemory() const {
825  if (IntrID != 0)
826  return Info.WriteMem;
827  return Inst->mayWriteToMemory();
828  }
829 
830  private:
831  Intrinsic::ID IntrID = 0;
833  Instruction *Inst;
834  };
835 
836  // This function is to prevent accidentally passing a non-target
837  // intrinsic ID to TargetTransformInfo.
838  static bool isHandledNonTargetIntrinsic(Intrinsic::ID ID) {
839  switch (ID) {
840  case Intrinsic::masked_load:
841  case Intrinsic::masked_store:
842  return true;
843  }
844  return false;
845  }
846  static bool isHandledNonTargetIntrinsic(const Value *V) {
847  if (auto *II = dyn_cast<IntrinsicInst>(V))
848  return isHandledNonTargetIntrinsic(II->getIntrinsicID());
849  return false;
850  }
851 
852  bool processNode(DomTreeNode *Node);
853 
854  bool handleBranchCondition(Instruction *CondInst, const BranchInst *BI,
855  const BasicBlock *BB, const BasicBlock *Pred);
856 
857  Value *getMatchingValue(LoadValue &InVal, ParseMemoryInst &MemInst,
858  unsigned CurrentGeneration);
859 
860  bool overridingStores(const ParseMemoryInst &Earlier,
861  const ParseMemoryInst &Later);
862 
863  Value *getOrCreateResult(Value *Inst, Type *ExpectedType) const {
864  // TODO: We could insert relevant casts on type mismatch here.
865  if (auto *LI = dyn_cast<LoadInst>(Inst))
866  return LI->getType() == ExpectedType ? LI : nullptr;
867  else if (auto *SI = dyn_cast<StoreInst>(Inst)) {
868  Value *V = SI->getValueOperand();
869  return V->getType() == ExpectedType ? V : nullptr;
870  }
871  assert(isa<IntrinsicInst>(Inst) && "Instruction not supported");
872  auto *II = cast<IntrinsicInst>(Inst);
873  if (isHandledNonTargetIntrinsic(II->getIntrinsicID()))
874  return getOrCreateResultNonTargetMemIntrinsic(II, ExpectedType);
875  return TTI.getOrCreateResultFromMemIntrinsic(II, ExpectedType);
876  }
877 
878  Value *getOrCreateResultNonTargetMemIntrinsic(IntrinsicInst *II,
879  Type *ExpectedType) const {
880  switch (II->getIntrinsicID()) {
881  case Intrinsic::masked_load:
882  return II;
883  case Intrinsic::masked_store:
884  return II->getOperand(0);
885  }
886  return nullptr;
887  }
888 
889  /// Return true if the instruction is known to only operate on memory
890  /// provably invariant in the given "generation".
891  bool isOperatingOnInvariantMemAt(Instruction *I, unsigned GenAt);
892 
893  bool isSameMemGeneration(unsigned EarlierGeneration, unsigned LaterGeneration,
894  Instruction *EarlierInst, Instruction *LaterInst);
895 
896  bool isNonTargetIntrinsicMatch(const IntrinsicInst *Earlier,
897  const IntrinsicInst *Later) {
898  auto IsSubmask = [](const Value *Mask0, const Value *Mask1) {
899  // Is Mask0 a submask of Mask1?
900  if (Mask0 == Mask1)
901  return true;
902  if (isa<UndefValue>(Mask0) || isa<UndefValue>(Mask1))
903  return false;
904  auto *Vec0 = dyn_cast<ConstantVector>(Mask0);
905  auto *Vec1 = dyn_cast<ConstantVector>(Mask1);
906  if (!Vec0 || !Vec1)
907  return false;
908  if (Vec0->getType() != Vec1->getType())
909  return false;
910  for (int i = 0, e = Vec0->getNumOperands(); i != e; ++i) {
911  Constant *Elem0 = Vec0->getOperand(i);
912  Constant *Elem1 = Vec1->getOperand(i);
913  auto *Int0 = dyn_cast<ConstantInt>(Elem0);
914  if (Int0 && Int0->isZero())
915  continue;
916  auto *Int1 = dyn_cast<ConstantInt>(Elem1);
917  if (Int1 && !Int1->isZero())
918  continue;
919  if (isa<UndefValue>(Elem0) || isa<UndefValue>(Elem1))
920  return false;
921  if (Elem0 == Elem1)
922  continue;
923  return false;
924  }
925  return true;
926  };
927  auto PtrOp = [](const IntrinsicInst *II) {
928  if (II->getIntrinsicID() == Intrinsic::masked_load)
929  return II->getOperand(0);
930  if (II->getIntrinsicID() == Intrinsic::masked_store)
931  return II->getOperand(1);
932  llvm_unreachable("Unexpected IntrinsicInst");
933  };
934  auto MaskOp = [](const IntrinsicInst *II) {
935  if (II->getIntrinsicID() == Intrinsic::masked_load)
936  return II->getOperand(2);
937  if (II->getIntrinsicID() == Intrinsic::masked_store)
938  return II->getOperand(3);
939  llvm_unreachable("Unexpected IntrinsicInst");
940  };
941  auto ThruOp = [](const IntrinsicInst *II) {
942  if (II->getIntrinsicID() == Intrinsic::masked_load)
943  return II->getOperand(3);
944  llvm_unreachable("Unexpected IntrinsicInst");
945  };
946 
947  if (PtrOp(Earlier) != PtrOp(Later))
948  return false;
949 
950  Intrinsic::ID IDE = Earlier->getIntrinsicID();
951  Intrinsic::ID IDL = Later->getIntrinsicID();
952  // We could really use specific intrinsic classes for masked loads
953  // and stores in IntrinsicInst.h.
954  if (IDE == Intrinsic::masked_load && IDL == Intrinsic::masked_load) {
955  // Trying to replace later masked load with the earlier one.
956  // Check that the pointers are the same, and
957  // - masks and pass-throughs are the same, or
958  // - replacee's pass-through is "undef" and replacer's mask is a
959  // super-set of the replacee's mask.
960  if (MaskOp(Earlier) == MaskOp(Later) && ThruOp(Earlier) == ThruOp(Later))
961  return true;
962  if (!isa<UndefValue>(ThruOp(Later)))
963  return false;
964  return IsSubmask(MaskOp(Later), MaskOp(Earlier));
965  }
966  if (IDE == Intrinsic::masked_store && IDL == Intrinsic::masked_load) {
967  // Trying to replace a load of a stored value with the store's value.
968  // Check that the pointers are the same, and
969  // - load's mask is a subset of store's mask, and
970  // - load's pass-through is "undef".
971  if (!IsSubmask(MaskOp(Later), MaskOp(Earlier)))
972  return false;
973  return isa<UndefValue>(ThruOp(Later));
974  }
975  if (IDE == Intrinsic::masked_load && IDL == Intrinsic::masked_store) {
976  // Trying to remove a store of the loaded value.
977  // Check that the pointers are the same, and
978  // - store's mask is a subset of the load's mask.
979  return IsSubmask(MaskOp(Later), MaskOp(Earlier));
980  }
981  if (IDE == Intrinsic::masked_store && IDL == Intrinsic::masked_store) {
982  // Trying to remove a dead store (earlier).
983  // Check that the pointers are the same,
984  // - the to-be-removed store's mask is a subset of the other store's
985  // mask.
986  return IsSubmask(MaskOp(Earlier), MaskOp(Later));
987  }
988  return false;
989  }
990 
991  void removeMSSA(Instruction &Inst) {
992  if (!MSSA)
993  return;
994  if (VerifyMemorySSA)
995  MSSA->verifyMemorySSA();
996  // Removing a store here can leave MemorySSA in an unoptimized state by
997  // creating MemoryPhis that have identical arguments and by creating
998  // MemoryUses whose defining access is not an actual clobber. The phi case
999  // is handled by MemorySSA when passing OptimizePhis = true to
1000  // removeMemoryAccess. The non-optimized MemoryUse case is lazily updated
1001  // by MemorySSA's getClobberingMemoryAccess.
1002  MSSAUpdater->removeMemoryAccess(&Inst, true);
1003  }
1004 };
1005 
1006 } // end anonymous namespace
1007 
1008 /// Determine if the memory referenced by LaterInst is from the same heap
1009 /// version as EarlierInst.
1010 /// This is currently called in two scenarios:
1011 ///
1012 /// load p
1013 /// ...
1014 /// load p
1015 ///
1016 /// and
1017 ///
1018 /// x = load p
1019 /// ...
1020 /// store x, p
1021 ///
1022 /// in both cases we want to verify that there are no possible writes to the
1023 /// memory referenced by p between the earlier and later instruction.
1024 bool EarlyCSE::isSameMemGeneration(unsigned EarlierGeneration,
1025  unsigned LaterGeneration,
1026  Instruction *EarlierInst,
1027  Instruction *LaterInst) {
1028  // Check the simple memory generation tracking first.
1029  if (EarlierGeneration == LaterGeneration)
1030  return true;
1031 
1032  if (!MSSA)
1033  return false;
1034 
1035  // If MemorySSA has determined that one of EarlierInst or LaterInst does not
1036  // read/write memory, then we can safely return true here.
1037  // FIXME: We could be more aggressive when checking doesNotAccessMemory(),
1038  // onlyReadsMemory(), mayReadFromMemory(), and mayWriteToMemory() in this pass
1039  // by also checking the MemorySSA MemoryAccess on the instruction. Initial
1040  // experiments suggest this isn't worthwhile, at least for C/C++ code compiled
1041  // with the default optimization pipeline.
1042  auto *EarlierMA = MSSA->getMemoryAccess(EarlierInst);
1043  if (!EarlierMA)
1044  return true;
1045  auto *LaterMA = MSSA->getMemoryAccess(LaterInst);
1046  if (!LaterMA)
1047  return true;
1048 
1049  // Since we know LaterDef dominates LaterInst and EarlierInst dominates
1050  // LaterInst, if LaterDef dominates EarlierInst then it can't occur between
1051  // EarlierInst and LaterInst and neither can any other write that potentially
1052  // clobbers LaterInst.
1053  MemoryAccess *LaterDef;
1054  if (ClobberCounter < EarlyCSEMssaOptCap) {
1055  LaterDef = MSSA->getWalker()->getClobberingMemoryAccess(LaterInst);
1056  ClobberCounter++;
1057  } else
1058  LaterDef = LaterMA->getDefiningAccess();
1059 
1060  return MSSA->dominates(LaterDef, EarlierMA);
1061 }
1062 
1063 bool EarlyCSE::isOperatingOnInvariantMemAt(Instruction *I, unsigned GenAt) {
1064  // A location loaded from with an invariant_load is assumed to *never* change
1065  // within the visible scope of the compilation.
1066  if (auto *LI = dyn_cast<LoadInst>(I))
1067  if (LI->hasMetadata(LLVMContext::MD_invariant_load))
1068  return true;
1069 
1070  auto MemLocOpt = MemoryLocation::getOrNone(I);
1071  if (!MemLocOpt)
1072  // "target" intrinsic forms of loads aren't currently known to
1073  // MemoryLocation::get. TODO
1074  return false;
1075  MemoryLocation MemLoc = *MemLocOpt;
1076  if (!AvailableInvariants.count(MemLoc))
1077  return false;
1078 
1079  // Is the generation at which this became invariant older than the
1080  // current one?
1081  return AvailableInvariants.lookup(MemLoc) <= GenAt;
1082 }
1083 
1084 bool EarlyCSE::handleBranchCondition(Instruction *CondInst,
1085  const BranchInst *BI, const BasicBlock *BB,
1086  const BasicBlock *Pred) {
1087  assert(BI->isConditional() && "Should be a conditional branch!");
1088  assert(BI->getCondition() == CondInst && "Wrong condition?");
1089  assert(BI->getSuccessor(0) == BB || BI->getSuccessor(1) == BB);
1090  auto *TorF = (BI->getSuccessor(0) == BB)
1091  ? ConstantInt::getTrue(BB->getContext())
1092  : ConstantInt::getFalse(BB->getContext());
1093  auto MatchBinOp = [](Instruction *I, unsigned Opcode, Value *&LHS,
1094  Value *&RHS) {
1095  if (Opcode == Instruction::And &&
1097  return true;
1098  else if (Opcode == Instruction::Or &&
1100  return true;
1101  return false;
1102  };
1103  // If the condition is AND operation, we can propagate its operands into the
1104  // true branch. If it is OR operation, we can propagate them into the false
1105  // branch.
1106  unsigned PropagateOpcode =
1107  (BI->getSuccessor(0) == BB) ? Instruction::And : Instruction::Or;
1108 
1109  bool MadeChanges = false;
1112  WorkList.push_back(CondInst);
1113  while (!WorkList.empty()) {
1114  Instruction *Curr = WorkList.pop_back_val();
1115 
1116  AvailableValues.insert(Curr, TorF);
1117  LLVM_DEBUG(dbgs() << "EarlyCSE CVP: Add conditional value for '"
1118  << Curr->getName() << "' as " << *TorF << " in "
1119  << BB->getName() << "\n");
1120  if (!DebugCounter::shouldExecute(CSECounter)) {
1121  LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1122  } else {
1123  // Replace all dominated uses with the known value.
1124  if (unsigned Count = replaceDominatedUsesWith(Curr, TorF, DT,
1125  BasicBlockEdge(Pred, BB))) {
1126  NumCSECVP += Count;
1127  MadeChanges = true;
1128  }
1129  }
1130 
1131  Value *LHS, *RHS;
1132  if (MatchBinOp(Curr, PropagateOpcode, LHS, RHS))
1133  for (auto *Op : { LHS, RHS })
1134  if (Instruction *OPI = dyn_cast<Instruction>(Op))
1135  if (SimpleValue::canHandle(OPI) && Visited.insert(OPI).second)
1136  WorkList.push_back(OPI);
1137  }
1138 
1139  return MadeChanges;
1140 }
1141 
1142 Value *EarlyCSE::getMatchingValue(LoadValue &InVal, ParseMemoryInst &MemInst,
1143  unsigned CurrentGeneration) {
1144  if (InVal.DefInst == nullptr)
1145  return nullptr;
1146  if (InVal.MatchingId != MemInst.getMatchingId())
1147  return nullptr;
1148  // We don't yet handle removing loads with ordering of any kind.
1149  if (MemInst.isVolatile() || !MemInst.isUnordered())
1150  return nullptr;
1151  // We can't replace an atomic load with one which isn't also atomic.
1152  if (MemInst.isLoad() && !InVal.IsAtomic && MemInst.isAtomic())
1153  return nullptr;
1154  // The value V returned from this function is used differently depending
1155  // on whether MemInst is a load or a store. If it's a load, we will replace
1156  // MemInst with V, if it's a store, we will check if V is the same as the
1157  // available value.
1158  bool MemInstMatching = !MemInst.isLoad();
1159  Instruction *Matching = MemInstMatching ? MemInst.get() : InVal.DefInst;
1160  Instruction *Other = MemInstMatching ? InVal.DefInst : MemInst.get();
1161 
1162  // For stores check the result values before checking memory generation
1163  // (otherwise isSameMemGeneration may crash).
1164  Value *Result = MemInst.isStore()
1165  ? getOrCreateResult(Matching, Other->getType())
1166  : nullptr;
1167  if (MemInst.isStore() && InVal.DefInst != Result)
1168  return nullptr;
1169 
1170  // Deal with non-target memory intrinsics.
1171  bool MatchingNTI = isHandledNonTargetIntrinsic(Matching);
1172  bool OtherNTI = isHandledNonTargetIntrinsic(Other);
1173  if (OtherNTI != MatchingNTI)
1174  return nullptr;
1175  if (OtherNTI && MatchingNTI) {
1176  if (!isNonTargetIntrinsicMatch(cast<IntrinsicInst>(InVal.DefInst),
1177  cast<IntrinsicInst>(MemInst.get())))
1178  return nullptr;
1179  }
1180 
1181  if (!isOperatingOnInvariantMemAt(MemInst.get(), InVal.Generation) &&
1182  !isSameMemGeneration(InVal.Generation, CurrentGeneration, InVal.DefInst,
1183  MemInst.get()))
1184  return nullptr;
1185 
1186  if (!Result)
1187  Result = getOrCreateResult(Matching, Other->getType());
1188  return Result;
1189 }
1190 
1191 bool EarlyCSE::overridingStores(const ParseMemoryInst &Earlier,
1192  const ParseMemoryInst &Later) {
1193  // Can we remove Earlier store because of Later store?
1194 
1195  assert(Earlier.isUnordered() && !Earlier.isVolatile() &&
1196  "Violated invariant");
1197  if (Earlier.getPointerOperand() != Later.getPointerOperand())
1198  return false;
1199  if (!Earlier.getValueType() || !Later.getValueType() ||
1200  Earlier.getValueType() != Later.getValueType())
1201  return false;
1202  if (Earlier.getMatchingId() != Later.getMatchingId())
1203  return false;
1204  // At the moment, we don't remove ordered stores, but do remove
1205  // unordered atomic stores. There's no special requirement (for
1206  // unordered atomics) about removing atomic stores only in favor of
1207  // other atomic stores since we were going to execute the non-atomic
1208  // one anyway and the atomic one might never have become visible.
1209  if (!Earlier.isUnordered() || !Later.isUnordered())
1210  return false;
1211 
1212  // Deal with non-target memory intrinsics.
1213  bool ENTI = isHandledNonTargetIntrinsic(Earlier.get());
1214  bool LNTI = isHandledNonTargetIntrinsic(Later.get());
1215  if (ENTI && LNTI)
1216  return isNonTargetIntrinsicMatch(cast<IntrinsicInst>(Earlier.get()),
1217  cast<IntrinsicInst>(Later.get()));
1218 
1219  // Because of the check above, at least one of them is false.
1220  // For now disallow matching intrinsics with non-intrinsics,
1221  // so assume that the stores match if neither is an intrinsic.
1222  return ENTI == LNTI;
1223 }
1224 
1225 bool EarlyCSE::processNode(DomTreeNode *Node) {
1226  bool Changed = false;
1227  BasicBlock *BB = Node->getBlock();
1228 
1229  // If this block has a single predecessor, then the predecessor is the parent
1230  // of the domtree node and all of the live out memory values are still current
1231  // in this block. If this block has multiple predecessors, then they could
1232  // have invalidated the live-out memory values of our parent value. For now,
1233  // just be conservative and invalidate memory if this block has multiple
1234  // predecessors.
1235  if (!BB->getSinglePredecessor())
1236  ++CurrentGeneration;
1237 
1238  // If this node has a single predecessor which ends in a conditional branch,
1239  // we can infer the value of the branch condition given that we took this
1240  // path. We need the single predecessor to ensure there's not another path
1241  // which reaches this block where the condition might hold a different
1242  // value. Since we're adding this to the scoped hash table (like any other
1243  // def), it will have been popped if we encounter a future merge block.
1244  if (BasicBlock *Pred = BB->getSinglePredecessor()) {
1245  auto *BI = dyn_cast<BranchInst>(Pred->getTerminator());
1246  if (BI && BI->isConditional()) {
1247  auto *CondInst = dyn_cast<Instruction>(BI->getCondition());
1248  if (CondInst && SimpleValue::canHandle(CondInst))
1249  Changed |= handleBranchCondition(CondInst, BI, BB, Pred);
1250  }
1251  }
1252 
1253  /// LastStore - Keep track of the last non-volatile store that we saw... for
1254  /// as long as there in no instruction that reads memory. If we see a store
1255  /// to the same location, we delete the dead store. This zaps trivial dead
1256  /// stores which can occur in bitfield code among other things.
1257  Instruction *LastStore = nullptr;
1258 
1259  // See if any instructions in the block can be eliminated. If so, do it. If
1260  // not, add them to AvailableValues.
1261  for (Instruction &Inst : make_early_inc_range(BB->getInstList())) {
1262  // Dead instructions should just be removed.
1263  if (isInstructionTriviallyDead(&Inst, &TLI)) {
1264  LLVM_DEBUG(dbgs() << "EarlyCSE DCE: " << Inst << '\n');
1265  if (!DebugCounter::shouldExecute(CSECounter)) {
1266  LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1267  continue;
1268  }
1269 
1270  salvageKnowledge(&Inst, &AC);
1271  salvageDebugInfo(Inst);
1272  removeMSSA(Inst);
1273  Inst.eraseFromParent();
1274  Changed = true;
1275  ++NumSimplify;
1276  continue;
1277  }
1278 
1279  // Skip assume intrinsics, they don't really have side effects (although
1280  // they're marked as such to ensure preservation of control dependencies),
1281  // and this pass will not bother with its removal. However, we should mark
1282  // its condition as true for all dominated blocks.
1283  if (auto *Assume = dyn_cast<AssumeInst>(&Inst)) {
1284  auto *CondI = dyn_cast<Instruction>(Assume->getArgOperand(0));
1285  if (CondI && SimpleValue::canHandle(CondI)) {
1286  LLVM_DEBUG(dbgs() << "EarlyCSE considering assumption: " << Inst
1287  << '\n');
1288  AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext()));
1289  } else
1290  LLVM_DEBUG(dbgs() << "EarlyCSE skipping assumption: " << Inst << '\n');
1291  continue;
1292  }
1293 
1294  // Likewise, noalias intrinsics don't actually write.
1295  if (match(&Inst,
1296  m_Intrinsic<Intrinsic::experimental_noalias_scope_decl>())) {
1297  LLVM_DEBUG(dbgs() << "EarlyCSE skipping noalias intrinsic: " << Inst
1298  << '\n');
1299  continue;
1300  }
1301 
1302  // Skip sideeffect intrinsics, for the same reason as assume intrinsics.
1303  if (match(&Inst, m_Intrinsic<Intrinsic::sideeffect>())) {
1304  LLVM_DEBUG(dbgs() << "EarlyCSE skipping sideeffect: " << Inst << '\n');
1305  continue;
1306  }
1307 
1308  // Skip pseudoprobe intrinsics, for the same reason as assume intrinsics.
1309  if (match(&Inst, m_Intrinsic<Intrinsic::pseudoprobe>())) {
1310  LLVM_DEBUG(dbgs() << "EarlyCSE skipping pseudoprobe: " << Inst << '\n');
1311  continue;
1312  }
1313 
1314  // We can skip all invariant.start intrinsics since they only read memory,
1315  // and we can forward values across it. For invariant starts without
1316  // invariant ends, we can use the fact that the invariantness never ends to
1317  // start a scope in the current generaton which is true for all future
1318  // generations. Also, we dont need to consume the last store since the
1319  // semantics of invariant.start allow us to perform DSE of the last
1320  // store, if there was a store following invariant.start. Consider:
1321  //
1322  // store 30, i8* p
1323  // invariant.start(p)
1324  // store 40, i8* p
1325  // We can DSE the store to 30, since the store 40 to invariant location p
1326  // causes undefined behaviour.
1327  if (match(&Inst, m_Intrinsic<Intrinsic::invariant_start>())) {
1328  // If there are any uses, the scope might end.
1329  if (!Inst.use_empty())
1330  continue;
1331  MemoryLocation MemLoc =
1332  MemoryLocation::getForArgument(&cast<CallInst>(Inst), 1, TLI);
1333  // Don't start a scope if we already have a better one pushed
1334  if (!AvailableInvariants.count(MemLoc))
1335  AvailableInvariants.insert(MemLoc, CurrentGeneration);
1336  continue;
1337  }
1338 
1339  if (isGuard(&Inst)) {
1340  if (auto *CondI =
1341  dyn_cast<Instruction>(cast<CallInst>(Inst).getArgOperand(0))) {
1342  if (SimpleValue::canHandle(CondI)) {
1343  // Do we already know the actual value of this condition?
1344  if (auto *KnownCond = AvailableValues.lookup(CondI)) {
1345  // Is the condition known to be true?
1346  if (isa<ConstantInt>(KnownCond) &&
1347  cast<ConstantInt>(KnownCond)->isOne()) {
1348  LLVM_DEBUG(dbgs()
1349  << "EarlyCSE removing guard: " << Inst << '\n');
1350  salvageKnowledge(&Inst, &AC);
1351  removeMSSA(Inst);
1352  Inst.eraseFromParent();
1353  Changed = true;
1354  continue;
1355  } else
1356  // Use the known value if it wasn't true.
1357  cast<CallInst>(Inst).setArgOperand(0, KnownCond);
1358  }
1359  // The condition we're on guarding here is true for all dominated
1360  // locations.
1361  AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext()));
1362  }
1363  }
1364 
1365  // Guard intrinsics read all memory, but don't write any memory.
1366  // Accordingly, don't update the generation but consume the last store (to
1367  // avoid an incorrect DSE).
1368  LastStore = nullptr;
1369  continue;
1370  }
1371 
1372  // If the instruction can be simplified (e.g. X+0 = X) then replace it with
1373  // its simpler value.
1374  if (Value *V = simplifyInstruction(&Inst, SQ)) {
1375  LLVM_DEBUG(dbgs() << "EarlyCSE Simplify: " << Inst << " to: " << *V
1376  << '\n');
1377  if (!DebugCounter::shouldExecute(CSECounter)) {
1378  LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1379  } else {
1380  bool Killed = false;
1381  if (!Inst.use_empty()) {
1382  Inst.replaceAllUsesWith(V);
1383  Changed = true;
1384  }
1385  if (isInstructionTriviallyDead(&Inst, &TLI)) {
1386  salvageKnowledge(&Inst, &AC);
1387  removeMSSA(Inst);
1388  Inst.eraseFromParent();
1389  Changed = true;
1390  Killed = true;
1391  }
1392  if (Changed)
1393  ++NumSimplify;
1394  if (Killed)
1395  continue;
1396  }
1397  }
1398 
1399  // If this is a simple instruction that we can value number, process it.
1400  if (SimpleValue::canHandle(&Inst)) {
1401  if (auto *CI = dyn_cast<ConstrainedFPIntrinsic>(&Inst)) {
1402  assert(CI->getExceptionBehavior() != fp::ebStrict &&
1403  "Unexpected ebStrict from SimpleValue::canHandle()");
1404  assert((!CI->getRoundingMode() ||
1405  CI->getRoundingMode() != RoundingMode::Dynamic) &&
1406  "Unexpected dynamic rounding from SimpleValue::canHandle()");
1407  }
1408  // See if the instruction has an available value. If so, use it.
1409  if (Value *V = AvailableValues.lookup(&Inst)) {
1410  LLVM_DEBUG(dbgs() << "EarlyCSE CSE: " << Inst << " to: " << *V
1411  << '\n');
1412  if (!DebugCounter::shouldExecute(CSECounter)) {
1413  LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1414  continue;
1415  }
1416  if (auto *I = dyn_cast<Instruction>(V)) {
1417  // If I being poison triggers UB, there is no need to drop those
1418  // flags. Otherwise, only retain flags present on both I and Inst.
1419  // TODO: Currently some fast-math flags are not treated as
1420  // poison-generating even though they should. Until this is fixed,
1421  // always retain flags present on both I and Inst for floating point
1422  // instructions.
1423  if (isa<FPMathOperator>(I) || (I->hasPoisonGeneratingFlags() && !programUndefinedIfPoison(I)))
1424  I->andIRFlags(&Inst);
1425  }
1426  Inst.replaceAllUsesWith(V);
1427  salvageKnowledge(&Inst, &AC);
1428  removeMSSA(Inst);
1429  Inst.eraseFromParent();
1430  Changed = true;
1431  ++NumCSE;
1432  continue;
1433  }
1434 
1435  // Otherwise, just remember that this value is available.
1436  AvailableValues.insert(&Inst, &Inst);
1437  continue;
1438  }
1439 
1440  ParseMemoryInst MemInst(&Inst, TTI);
1441  // If this is a non-volatile load, process it.
1442  if (MemInst.isValid() && MemInst.isLoad()) {
1443  // (conservatively) we can't peak past the ordering implied by this
1444  // operation, but we can add this load to our set of available values
1445  if (MemInst.isVolatile() || !MemInst.isUnordered()) {
1446  LastStore = nullptr;
1447  ++CurrentGeneration;
1448  }
1449 
1450  if (MemInst.isInvariantLoad()) {
1451  // If we pass an invariant load, we know that memory location is
1452  // indefinitely constant from the moment of first dereferenceability.
1453  // We conservatively treat the invariant_load as that moment. If we
1454  // pass a invariant load after already establishing a scope, don't
1455  // restart it since we want to preserve the earliest point seen.
1456  auto MemLoc = MemoryLocation::get(&Inst);
1457  if (!AvailableInvariants.count(MemLoc))
1458  AvailableInvariants.insert(MemLoc, CurrentGeneration);
1459  }
1460 
1461  // If we have an available version of this load, and if it is the right
1462  // generation or the load is known to be from an invariant location,
1463  // replace this instruction.
1464  //
1465  // If either the dominating load or the current load are invariant, then
1466  // we can assume the current load loads the same value as the dominating
1467  // load.
1468  LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
1469  if (Value *Op = getMatchingValue(InVal, MemInst, CurrentGeneration)) {
1470  LLVM_DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << Inst
1471  << " to: " << *InVal.DefInst << '\n');
1472  if (!DebugCounter::shouldExecute(CSECounter)) {
1473  LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1474  continue;
1475  }
1476  if (!Inst.use_empty())
1477  Inst.replaceAllUsesWith(Op);
1478  salvageKnowledge(&Inst, &AC);
1479  removeMSSA(Inst);
1480  Inst.eraseFromParent();
1481  Changed = true;
1482  ++NumCSELoad;
1483  continue;
1484  }
1485 
1486  // Otherwise, remember that we have this instruction.
1487  AvailableLoads.insert(MemInst.getPointerOperand(),
1488  LoadValue(&Inst, CurrentGeneration,
1489  MemInst.getMatchingId(),
1490  MemInst.isAtomic()));
1491  LastStore = nullptr;
1492  continue;
1493  }
1494 
1495  // If this instruction may read from memory or throw (and potentially read
1496  // from memory in the exception handler), forget LastStore. Load/store
1497  // intrinsics will indicate both a read and a write to memory. The target
1498  // may override this (e.g. so that a store intrinsic does not read from
1499  // memory, and thus will be treated the same as a regular store for
1500  // commoning purposes).
1501  if ((Inst.mayReadFromMemory() || Inst.mayThrow()) &&
1502  !(MemInst.isValid() && !MemInst.mayReadFromMemory()))
1503  LastStore = nullptr;
1504 
1505  // If this is a read-only call, process it.
1506  if (CallValue::canHandle(&Inst)) {
1507  // If we have an available version of this call, and if it is the right
1508  // generation, replace this instruction.
1509  std::pair<Instruction *, unsigned> InVal = AvailableCalls.lookup(&Inst);
1510  if (InVal.first != nullptr &&
1511  isSameMemGeneration(InVal.second, CurrentGeneration, InVal.first,
1512  &Inst)) {
1513  LLVM_DEBUG(dbgs() << "EarlyCSE CSE CALL: " << Inst
1514  << " to: " << *InVal.first << '\n');
1515  if (!DebugCounter::shouldExecute(CSECounter)) {
1516  LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1517  continue;
1518  }
1519  if (!Inst.use_empty())
1520  Inst.replaceAllUsesWith(InVal.first);
1521  salvageKnowledge(&Inst, &AC);
1522  removeMSSA(Inst);
1523  Inst.eraseFromParent();
1524  Changed = true;
1525  ++NumCSECall;
1526  continue;
1527  }
1528 
1529  // Otherwise, remember that we have this instruction.
1530  AvailableCalls.insert(&Inst, std::make_pair(&Inst, CurrentGeneration));
1531  continue;
1532  }
1533 
1534  // A release fence requires that all stores complete before it, but does
1535  // not prevent the reordering of following loads 'before' the fence. As a
1536  // result, we don't need to consider it as writing to memory and don't need
1537  // to advance the generation. We do need to prevent DSE across the fence,
1538  // but that's handled above.
1539  if (auto *FI = dyn_cast<FenceInst>(&Inst))
1540  if (FI->getOrdering() == AtomicOrdering::Release) {
1541  assert(Inst.mayReadFromMemory() && "relied on to prevent DSE above");
1542  continue;
1543  }
1544 
1545  // write back DSE - If we write back the same value we just loaded from
1546  // the same location and haven't passed any intervening writes or ordering
1547  // operations, we can remove the write. The primary benefit is in allowing
1548  // the available load table to remain valid and value forward past where
1549  // the store originally was.
1550  if (MemInst.isValid() && MemInst.isStore()) {
1551  LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
1552  if (InVal.DefInst &&
1553  InVal.DefInst == getMatchingValue(InVal, MemInst, CurrentGeneration)) {
1554  // It is okay to have a LastStore to a different pointer here if MemorySSA
1555  // tells us that the load and store are from the same memory generation.
1556  // In that case, LastStore should keep its present value since we're
1557  // removing the current store.
1558  assert((!LastStore ||
1559  ParseMemoryInst(LastStore, TTI).getPointerOperand() ==
1560  MemInst.getPointerOperand() ||
1561  MSSA) &&
1562  "can't have an intervening store if not using MemorySSA!");
1563  LLVM_DEBUG(dbgs() << "EarlyCSE DSE (writeback): " << Inst << '\n');
1564  if (!DebugCounter::shouldExecute(CSECounter)) {
1565  LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1566  continue;
1567  }
1568  salvageKnowledge(&Inst, &AC);
1569  removeMSSA(Inst);
1570  Inst.eraseFromParent();
1571  Changed = true;
1572  ++NumDSE;
1573  // We can avoid incrementing the generation count since we were able
1574  // to eliminate this store.
1575  continue;
1576  }
1577  }
1578 
1579  // Okay, this isn't something we can CSE at all. Check to see if it is
1580  // something that could modify memory. If so, our available memory values
1581  // cannot be used so bump the generation count.
1582  if (Inst.mayWriteToMemory()) {
1583  ++CurrentGeneration;
1584 
1585  if (MemInst.isValid() && MemInst.isStore()) {
1586  // We do a trivial form of DSE if there are two stores to the same
1587  // location with no intervening loads. Delete the earlier store.
1588  if (LastStore) {
1589  if (overridingStores(ParseMemoryInst(LastStore, TTI), MemInst)) {
1590  LLVM_DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore
1591  << " due to: " << Inst << '\n');
1592  if (!DebugCounter::shouldExecute(CSECounter)) {
1593  LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1594  } else {
1595  salvageKnowledge(&Inst, &AC);
1596  removeMSSA(*LastStore);
1597  LastStore->eraseFromParent();
1598  Changed = true;
1599  ++NumDSE;
1600  LastStore = nullptr;
1601  }
1602  }
1603  // fallthrough - we can exploit information about this store
1604  }
1605 
1606  // Okay, we just invalidated anything we knew about loaded values. Try
1607  // to salvage *something* by remembering that the stored value is a live
1608  // version of the pointer. It is safe to forward from volatile stores
1609  // to non-volatile loads, so we don't have to check for volatility of
1610  // the store.
1611  AvailableLoads.insert(MemInst.getPointerOperand(),
1612  LoadValue(&Inst, CurrentGeneration,
1613  MemInst.getMatchingId(),
1614  MemInst.isAtomic()));
1615 
1616  // Remember that this was the last unordered store we saw for DSE. We
1617  // don't yet handle DSE on ordered or volatile stores since we don't
1618  // have a good way to model the ordering requirement for following
1619  // passes once the store is removed. We could insert a fence, but
1620  // since fences are slightly stronger than stores in their ordering,
1621  // it's not clear this is a profitable transform. Another option would
1622  // be to merge the ordering with that of the post dominating store.
1623  if (MemInst.isUnordered() && !MemInst.isVolatile())
1624  LastStore = &Inst;
1625  else
1626  LastStore = nullptr;
1627  }
1628  }
1629  }
1630 
1631  return Changed;
1632 }
1633 
1634 bool EarlyCSE::run() {
1635  // Note, deque is being used here because there is significant performance
1636  // gains over vector when the container becomes very large due to the
1637  // specific access patterns. For more information see the mailing list
1638  // discussion on this:
1639  // http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20120116/135228.html
1640  std::deque<StackNode *> nodesToProcess;
1641 
1642  bool Changed = false;
1643 
1644  // Process the root node.
1645  nodesToProcess.push_back(new StackNode(
1646  AvailableValues, AvailableLoads, AvailableInvariants, AvailableCalls,
1647  CurrentGeneration, DT.getRootNode(),
1648  DT.getRootNode()->begin(), DT.getRootNode()->end()));
1649 
1650  assert(!CurrentGeneration && "Create a new EarlyCSE instance to rerun it.");
1651 
1652  // Process the stack.
1653  while (!nodesToProcess.empty()) {
1654  // Grab the first item off the stack. Set the current generation, remove
1655  // the node from the stack, and process it.
1656  StackNode *NodeToProcess = nodesToProcess.back();
1657 
1658  // Initialize class members.
1659  CurrentGeneration = NodeToProcess->currentGeneration();
1660 
1661  // Check if the node needs to be processed.
1662  if (!NodeToProcess->isProcessed()) {
1663  // Process the node.
1664  Changed |= processNode(NodeToProcess->node());
1665  NodeToProcess->childGeneration(CurrentGeneration);
1666  NodeToProcess->process();
1667  } else if (NodeToProcess->childIter() != NodeToProcess->end()) {
1668  // Push the next child onto the stack.
1669  DomTreeNode *child = NodeToProcess->nextChild();
1670  nodesToProcess.push_back(
1671  new StackNode(AvailableValues, AvailableLoads, AvailableInvariants,
1672  AvailableCalls, NodeToProcess->childGeneration(),
1673  child, child->begin(), child->end()));
1674  } else {
1675  // It has been processed, and there are no more children to process,
1676  // so delete it and pop it off the stack.
1677  delete NodeToProcess;
1678  nodesToProcess.pop_back();
1679  }
1680  } // while (!nodes...)
1681 
1682  return Changed;
1683 }
1684 
1687  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1688  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1689  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1690  auto &AC = AM.getResult<AssumptionAnalysis>(F);
1691  auto *MSSA =
1692  UseMemorySSA ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA() : nullptr;
1693 
1694  EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA);
1695 
1696  if (!CSE.run())
1697  return PreservedAnalyses::all();
1698 
1699  PreservedAnalyses PA;
1700  PA.preserveSet<CFGAnalyses>();
1701  if (UseMemorySSA)
1703  return PA;
1704 }
1705 
1707  raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
1708  static_cast<PassInfoMixin<EarlyCSEPass> *>(this)->printPipeline(
1709  OS, MapClassName2PassName);
1710  OS << "<";
1711  if (UseMemorySSA)
1712  OS << "memssa";
1713  OS << ">";
1714 }
1715 
1716 namespace {
1717 
1718 /// A simple and fast domtree-based CSE pass.
1719 ///
1720 /// This pass does a simple depth-first walk over the dominator tree,
1721 /// eliminating trivially redundant instructions and using instsimplify to
1722 /// canonicalize things as it goes. It is intended to be fast and catch obvious
1723 /// cases so that instcombine and other passes are more effective. It is
1724 /// expected that a later pass of GVN will catch the interesting/hard cases.
1725 template<bool UseMemorySSA>
1726 class EarlyCSELegacyCommonPass : public FunctionPass {
1727 public:
1728  static char ID;
1729 
1730  EarlyCSELegacyCommonPass() : FunctionPass(ID) {
1731  if (UseMemorySSA)
1733  else
1735  }
1736 
1737  bool runOnFunction(Function &F) override {
1738  if (skipFunction(F))
1739  return false;
1740 
1741  auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1742  auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1743  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1744  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1745  auto *MSSA =
1746  UseMemorySSA ? &getAnalysis<MemorySSAWrapperPass>().getMSSA() : nullptr;
1747 
1748  EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA);
1749 
1750  return CSE.run();
1751  }
1752 
1753  void getAnalysisUsage(AnalysisUsage &AU) const override {
1758  if (UseMemorySSA) {
1762  }
1765  AU.setPreservesCFG();
1766  }
1767 };
1768 
1769 } // end anonymous namespace
1770 
1771 using EarlyCSELegacyPass = EarlyCSELegacyCommonPass</*UseMemorySSA=*/false>;
1772 
1773 template<>
1774 char EarlyCSELegacyPass::ID = 0;
1775 
1776 INITIALIZE_PASS_BEGIN(EarlyCSELegacyPass, "early-cse", "Early CSE", false,
1777  false)
1783 
1784 using EarlyCSEMemSSALegacyPass =
1785  EarlyCSELegacyCommonPass</*UseMemorySSA=*/true>;
1786 
1787 template<>
1788 char EarlyCSEMemSSALegacyPass::ID = 0;
1789 
1790 FunctionPass *llvm::createEarlyCSEPass(bool UseMemorySSA) {
1791  if (UseMemorySSA)
1792  return new EarlyCSEMemSSALegacyPass();
1793  else
1794  return new EarlyCSELegacyPass();
1795 }
1796 
1797 INITIALIZE_PASS_BEGIN(EarlyCSEMemSSALegacyPass, "early-cse-memssa",
1798  "Early CSE w/ MemorySSA", false, false)
1805 INITIALIZE_PASS_END(EarlyCSEMemSSALegacyPass, "early-cse-memssa",
1806  "Early CSE w/ MemorySSA", false, false)
i
i
Definition: README.txt:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
AssumptionCache.h
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2586
llvm::SPF_SMAX
@ SPF_SMAX
Unsigned minimum.
Definition: ValueTracking.h:691
llvm::CmpInst::getSwappedPredicate
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:850
llvm::MemoryLocation::get
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
Definition: MemoryLocation.cpp:36
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::EarlyCSEPass::printPipeline
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Definition: EarlyCSE.cpp:1706
llvm::MemorySSA::verifyMemorySSA
void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition: MemorySSA.cpp:1869
llvm::ScopedHashTableVal
Definition: ScopedHashTable.h:46
matchSelectWithOptionalNotCond
static bool matchSelectWithOptionalNotCond(Value *V, Value *&Cond, Value *&A, Value *&B, SelectPatternFlavor &Flavor)
Match a 'select' including an optional 'not's of the condition.
Definition: EarlyCSE.cpp:174
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::AArch64PACKey::ID
ID
Definition: AArch64BaseInfo.h:818
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:720
IntrinsicInst.h
llvm::SimplifyQuery
Definition: InstructionSimplify.h:93
getHashValueImpl
static unsigned getHashValueImpl(SimpleValue Val)
Definition: EarlyCSE.cpp:221
AtomicOrdering.h
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
Scalar.h
llvm::PassInfoMixin< EarlyCSEPass >
MemorySSAUpdater.h
llvm::Function
Definition: Function.h:60
Pass.h
llvm::IntrinsicInst::getIntrinsicID
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:53
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
Statistic.h
llvm::Instruction::isIdenticalToWhenDefined
bool isIdenticalToWhenDefined(const Instruction *I) const LLVM_READONLY
This is like isIdenticalTo, except that it ignores the SubclassOptionalData flags,...
Definition: Instruction.cpp:537
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:173
Allocator.h
llvm::CmpInst::getInversePredicate
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:834
llvm::MemorySSA::dominates
bool dominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...
Definition: MemorySSA.cpp:2122
ValueTracking.h
Local.h
DEBUG_COUNTER
DEBUG_COUNTER(CSECounter, "early-cse", "Controls which instructions are removed")
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
GlobalsModRef.h
llvm::createEarlyCSEPass
FunctionPass * createEarlyCSEPass(bool UseMemorySSA=false)
Definition: EarlyCSE.cpp:1790
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
llvm::DenseMapInfo< SimpleValue >::getEmptyKey
static SimpleValue getEmptyKey()
Definition: EarlyCSE.cpp:159
llvm::getLoadStoreType
Type * getLoadStoreType(Value *I)
A helper function that returns the type of a load or store instruction.
Definition: Instructions.h:5406
llvm::programUndefinedIfPoison
bool programUndefinedIfPoison(const Instruction *Inst)
Definition: ValueTracking.cpp:5761
cse
static void cse(BasicBlock *BB)
Perform cse of induction variable instructions.
Definition: LoopVectorize.cpp:3418
llvm::CmpInst::ICMP_SGT
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:747
true
basic Basic Alias true
Definition: BasicAliasAnalysis.cpp:1793
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::sys::path::end
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:235
llvm::BasicBlockEdge
Definition: Dominators.h:98
llvm::User::value_op_begin
value_op_iterator value_op_begin()
Definition: User.h:260
llvm::SPF_UMAX
@ SPF_UMAX
Signed maximum.
Definition: ValueTracking.h:692
llvm::BumpPtrAllocator
BumpPtrAllocatorImpl BumpPtrAllocator
The standard BumpPtrAllocator which just uses the default template parameters.
Definition: Allocator.h:375
llvm::Instruction::mayWriteToMemory
bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
Definition: Instruction.cpp:626
llvm::SmallPtrSet< Instruction *, 4 >
Hashing.h
llvm::CmpInst::ICMP_SLE
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:750
STLExtras.h
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
llvm::PatternMatch::m_Not
BinaryOp_match< ValTy, cst_pred_ty< is_all_ones >, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
Definition: PatternMatch.h:2289
llvm::SelectPatternFlavor
SelectPatternFlavor
Specific patterns of select instructions we can match.
Definition: ValueTracking.h:687
memssa
early cse memssa
Definition: EarlyCSE.cpp:1805
isSentinel
static bool isSentinel(const DWARFDebugNames::AttributeEncoding &AE)
Definition: DWARFAcceleratorTable.cpp:424
llvm::isEqual
bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)
Definition: GCNRegPressure.cpp:21
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::Instruction::mayReadFromMemory
bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
Definition: Instruction.cpp:606
llvm::isGuard
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....
Definition: GuardUtils.cpp:18
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Instruction.h
LHS
Value * LHS
Definition: X86PartialReduction.cpp:75
llvm::DenseMapInfo
An information struct used to provide DenseMap with the various necessary components for a given valu...
Definition: APInt.h:34
llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:164
llvm::SPF_UNKNOWN
@ SPF_UNKNOWN
Definition: ValueTracking.h:688
CSE
separate const offset from Split GEPs to a variadic base and a constant offset for better CSE
Definition: SeparateConstOffsetFromGEP.cpp:495
llvm::TargetTransformInfo::getOrCreateResultFromMemIntrinsic
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) const
Definition: TargetTransformInfo.cpp:1047
llvm::MemorySSAWrapperPass
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:984
llvm::DominatorTreeBase::getRootNode
DomTreeNodeBase< NodeT > * getRootNode()
getRootNode - This returns the entry node for the CFG of the function.
Definition: GenericDomTree.h:370
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24
llvm::PatternMatch::m_Select
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
Definition: PatternMatch.h:1462
Constants.h
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::DenseMapInfo< CallValue >::getEmptyKey
static CallValue getEmptyKey()
Definition: EarlyCSE.cpp:493
llvm::Instruction::isAtomic
bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
Definition: Instruction.cpp:646
llvm::CmpInst::ICMP_ULE
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:746
InstrTypes.h
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
SI
@ SI
Definition: SIInstrInfo.cpp:7882
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::DenseMapInfo< CallValue >::getTombstoneKey
static CallValue getTombstoneKey()
Definition: EarlyCSE.cpp:497
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
TargetLibraryInfo.h
AssumeBundleBuilder.h
false
Definition: StackSlotColoring.cpp:141
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::Instruction
Definition: Instruction.h:42
llvm::DebugCounter::shouldExecute
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:72
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:302
llvm::salvageDebugInfo
void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition: Utils.cpp:1361
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::Instruction::isIdenticalTo
bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
Definition: Instruction.cpp:532
PatternMatch.h
ScopedHashTable.h
llvm::Value::use_empty
bool use_empty() const
Definition: Value.h:344
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::CallBase::onlyReadsMemory
bool onlyReadsMemory(unsigned OpNo) const
Definition: InstrTypes.h:1721
llvm::SPF_SMIN
@ SPF_SMIN
Definition: ValueTracking.h:689
Type.h
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
llvm::ScopedHashTable
Definition: ScopedHashTable.h:43
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::dxil::PointerTypeAnalysis::run
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
Definition: PointerTypeAnalysis.cpp:189
llvm::BranchInst::getCondition
Value * getCondition() const
Definition: Instructions.h:3215
llvm::EarlyCSEPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
Definition: EarlyCSE.cpp:1685
llvm::CmpInst
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:710
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLFunctionalExtras.h:36
llvm::MemorySSAWalker::getClobberingMemoryAccess
MemoryAccess * getClobberingMemoryAccess(const Instruction *I)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Definition: MemorySSA.h:1044
BasicBlock.h
llvm::cl::opt
Definition: CommandLine.h:1412
generation
Optimize for code generation
Definition: CodeGenPrepare.cpp:485
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:298
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:81
EarlyCSE.h
llvm::logicalview::LVCompareKind::Scopes
@ Scopes
llvm::GCRelocateInst
Represents calls to the gc.relocate intrinsic.
Definition: IntrinsicInst.h:1469
llvm::MemoryLocation::getOrNone
static Optional< MemoryLocation > getOrNone(const Instruction *Inst)
Definition: MemoryLocation.cpp:77
llvm::getPointerOperand
const Value * getPointerOperand(const Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
Definition: Instructions.h:5375
llvm::TargetLibraryInfoWrapperPass
Definition: TargetLibraryInfo.h:475
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2642
llvm::PatternMatch::m_LogicalOr
LogicalOp_match< LHS, RHS, Instruction::Or > m_LogicalOr(const LHS &L, const RHS &R)
Matches L || R either in the form of L | R or L ? true : R.
Definition: PatternMatch.h:2561
llvm::MemorySSAUpdater
Definition: MemorySSAUpdater.h:54
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:173
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::PreservedAnalyses::preserve
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:173
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:53
llvm::MemorySSA
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:700
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::MemorySSA::getMemoryAccess
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:717
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:447
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:716
node
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper node
Definition: README-SSE.txt:406
llvm::simplifyInstruction
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q, OptimizationRemarkEmitter *ORE=nullptr)
See if we can compute a simplified version of this instruction.
Definition: InstructionSimplify.cpp:6582
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MemoryLocation::getForArgument
static MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
Definition: MemoryLocation.cpp:159
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
llvm::CmpInst::ICMP_UGE
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:744
llvm::TTI
TargetTransformInfo TTI
Definition: TargetTransformInfo.h:168
llvm::Type::isVoidTy
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:139
llvm::salvageKnowledge
void salvageKnowledge(Instruction *I, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Calls BuildAssumeFromInst and if the resulting llvm.assume is valid insert if before I.
Definition: AssumeBundleBuilder.cpp:293
llvm::MemorySSAAnalysis
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:934
llvm::replaceDominatedUsesWith
unsigned replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT, const BasicBlockEdge &Edge)
Replace each use of 'From' with 'To' if that use is dominated by the given edge.
Definition: Local.cpp:2831
llvm::MemorySSA::getWalker
MemorySSAWalker * getWalker()
Definition: MemorySSA.cpp:1560
isStore
static bool isStore(int Opcode)
Definition: ARCInstrInfo.cpp:58
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::CmpInst::ICMP_SLT
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:749
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:202
llvm::isInstructionTriviallyDead
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:396
llvm::BinaryOperator
Definition: InstrTypes.h:189
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:138
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:265
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
isLoad
static bool isLoad(int Opcode)
Definition: ARCInstrInfo.cpp:53
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
llvm::CmpInst::ICMP_ULT
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:745
llvm::initializeEarlyCSEMemSSALegacyPassPass
void initializeEarlyCSEMemSSALegacyPassPass(PassRegistry &)
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::CFGAnalyses
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:113
llvm::Instruction::getFunction
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:73
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:532
llvm::RoundingMode::Dynamic
@ Dynamic
Denotes mode unknown at compile time.
EarlyCSEMssaOptCap
static cl::opt< unsigned > EarlyCSEMssaOptCap("earlycse-mssa-optimization-cap", cl::init(500), cl::Hidden, cl::desc("Enable imprecision in EarlyCSE in pathological cases, in exchange " "for faster compile. Caps the MemorySSA clobbering calls."))
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::DomTreeNodeBase::begin
iterator begin()
Definition: GenericDomTree.h:75
isEqualImpl
static bool isEqualImpl(SimpleValue LHS, SimpleValue RHS)
Definition: EarlyCSE.cpp:339
llvm::CastInst
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:430
llvm::MemoryAccess
Definition: MemorySSA.h:142
llvm::DomTreeNodeBase< BasicBlock >
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:174
llvm::ConstantInt::getFalse
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:834
llvm::AtomicOrdering::Release
@ Release
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:85
llvm::User::value_op_end
value_op_iterator value_op_end()
Definition: User.h:263
llvm::RecyclingAllocator
RecyclingAllocator - This class wraps an Allocator, adding the functionality of recycling deleted obj...
Definition: RecyclingAllocator.h:26
llvm::ConstantInt::getTrue
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:827
std
Definition: BitVector.h:851
get
Should compile to something r4 addze r3 instead we get
Definition: README.txt:24
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:348
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
llvm::DomTreeNodeBase::end
iterator end()
Definition: GenericDomTree.h:76
llvm::initializeEarlyCSELegacyPassPass
void initializeEarlyCSELegacyPassPass(PassRegistry &)
llvm::ExtractValueInst
This instruction extracts a struct member or array element value from an aggregate value.
Definition: Instructions.h:2446
Casting.h
Function.h
DebugCounter.h
PassManager.h
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:226
RecyclingAllocator.h
isEqual
static bool isEqual(const Function &Caller, const Function &Callee)
Definition: Attributes.cpp:1952
llvm::CmpInst::ICMP_SGE
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:748
EarlyCSELegacyPass
EarlyCSELegacyCommonPass< false > EarlyCSELegacyPass
Definition: EarlyCSE.cpp:1771
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:46
GuardUtils.h
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:267
MemorySSA.h
isValid
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
Definition: RustDemangle.cpp:184
llvm::DomTreeNodeBase< BasicBlock >::const_iterator
typename SmallVector< DomTreeNodeBase *, 4 >::const_iterator const_iterator
Definition: GenericDomTree.h:73
Instructions.h
llvm::PreservedAnalyses::preserveSet
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:188
SmallVector.h
llvm::PatternMatch::m_Specific
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:772
llvm::hash_combine
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition: Hashing.h:605
llvm::PatternMatch::m_ICmp
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
Definition: PatternMatch.h:1394
llvm::CmpInst::ICMP_UGT
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:743
Dominators.h
llvm::FreezeInst
This class represents a freeze function that returns random concrete value if an operand is either a ...
Definition: Instructions.h:5438
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:924
llvm::hash_combine_range
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition: Hashing.h:483
llvm::SPF_UMIN
@ SPF_UMIN
Signed minimum.
Definition: ValueTracking.h:690
InstructionSimplify.h
llvm::SmallVectorImpl::pop_back_val
T pop_back_val()
Definition: SmallVector.h:677
llvm::CmpInst::getPredicate
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:810
llvm::GlobalsAAWrapperPass
Legacy wrapper pass to provide the GlobalsAAResult object.
Definition: GlobalsModRef.h:142
TargetTransformInfo.h
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:119
early
the custom lowered code happens to be but we shouldn t have to custom lower anything This is probably related to< 2 x i64 > ops being so bad LLVM currently generates stack realignment when it is not necessary needed The problem is that we need to know about stack alignment too early
Definition: README-SSE.txt:486
llvm::PatternMatch
Definition: PatternMatch.h:47
DenseMapInfo.h
llvm::MemIntrinsicInfo
Information about a load/store intrinsic defined by the target.
Definition: TargetTransformInfo.h:72
llvm::Instruction::mayThrow
bool mayThrow() const LLVM_READONLY
Return true if this instruction may throw an exception.
Definition: Instruction.cpp:715
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::getLoadStorePointerOperand
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
Definition: Instructions.h:5361
EarlyCSEDebugHash
static cl::opt< bool > EarlyCSEDebugHash("earlycse-debug-hash", cl::init(false), cl::Hidden, cl::desc("Perform extra assertion checking to verify that SimpleValue's hash " "function is well-behaved w.r.t. its isEqual predicate"))
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1474
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::VerifyMemorySSA
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:91
llvm::fp::ebStrict
@ ebStrict
This corresponds to "fpexcept.strict".
Definition: FPEnv.h:41
LLVMContext.h
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:413
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3134
raw_ostream.h
n
The same transformation can work with an even modulo with the addition of a and shrink the compare RHS by the same amount Unless the target supports that transformation probably isn t worthwhile The transformation can also easily be made to work with non zero equality for n
Definition: README.txt:685
llvm::InsertValueInst
This instruction inserts a struct field of array element value into an aggregate value.
Definition: Instructions.h:2557
llvm::TargetTransformInfo::getTgtMemIntrinsic
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
Definition: TargetTransformInfo.cpp:1038
Value.h
llvm::PatternMatch::m_Cmp
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:89
InitializePasses.h
llvm::DenseMapInfo< SimpleValue >::getTombstoneKey
static SimpleValue getTombstoneKey()
Definition: EarlyCSE.cpp:163
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition: TargetLibraryInfo.h:450
llvm::BranchInst::isConditional
bool isConditional() const
Definition: Instructions.h:3213
llvm::AtomicOrdering::NotAtomic
@ NotAtomic
llvm::PatternMatch::m_LogicalAnd
LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R either in the form of L & R or L ? R : false.
Definition: PatternMatch.h:2543
llvm::Function::isPresplitCoroutine
bool isPresplitCoroutine() const
Determine if the function is presplit coroutine.
Definition: Function.h:488
llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition: Instructions.h:3227
llvm::MemoryLocation
Representation for a specific memory location.
Definition: MemoryLocation.h:210
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(EarlyCSELegacyPass, "early-cse", "Early CSE", false, false) using EarlyCSEMemSSALegacyPass
Other
Optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1251
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:39