LLVM  7.0.0svn
GVNSink.cpp
Go to the documentation of this file.
1 //===- GVNSink.cpp - sink expressions into successors ---------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file GVNSink.cpp
11 /// This pass attempts to sink instructions into successors, reducing static
12 /// instruction count and enabling if-conversion.
13 ///
14 /// We use a variant of global value numbering to decide what can be sunk.
15 /// Consider:
16 ///
17 /// [ %a1 = add i32 %b, 1 ] [ %c1 = add i32 %d, 1 ]
18 /// [ %a2 = xor i32 %a1, 1 ] [ %c2 = xor i32 %c1, 1 ]
19 /// \ /
20 /// [ %e = phi i32 %a2, %c2 ]
21 /// [ add i32 %e, 4 ]
22 ///
23 ///
24 /// GVN would number %a1 and %c1 differently because they compute different
25 /// results - the VN of an instruction is a function of its opcode and the
26 /// transitive closure of its operands. This is the key property for hoisting
27 /// and CSE.
28 ///
29 /// What we want when sinking however is for a numbering that is a function of
30 /// the *uses* of an instruction, which allows us to answer the question "if I
31 /// replace %a1 with %c1, will it contribute in an equivalent way to all
32 /// successive instructions?". The PostValueTable class in GVN provides this
33 /// mapping.
34 //
35 //===----------------------------------------------------------------------===//
36 
37 #include "llvm/ADT/ArrayRef.h"
38 #include "llvm/ADT/DenseMap.h"
39 #include "llvm/ADT/DenseMapInfo.h"
40 #include "llvm/ADT/DenseSet.h"
41 #include "llvm/ADT/Hashing.h"
42 #include "llvm/ADT/None.h"
43 #include "llvm/ADT/Optional.h"
45 #include "llvm/ADT/STLExtras.h"
46 #include "llvm/ADT/SmallPtrSet.h"
47 #include "llvm/ADT/SmallVector.h"
48 #include "llvm/ADT/Statistic.h"
49 #include "llvm/ADT/StringExtras.h"
52 #include "llvm/IR/BasicBlock.h"
53 #include "llvm/IR/CFG.h"
54 #include "llvm/IR/Constants.h"
55 #include "llvm/IR/Function.h"
56 #include "llvm/IR/InstrTypes.h"
57 #include "llvm/IR/Instruction.h"
58 #include "llvm/IR/Instructions.h"
59 #include "llvm/IR/PassManager.h"
60 #include "llvm/IR/Type.h"
61 #include "llvm/IR/Use.h"
62 #include "llvm/IR/Value.h"
63 #include "llvm/Pass.h"
64 #include "llvm/Support/Allocator.h"
67 #include "llvm/Support/Casting.h"
68 #include "llvm/Support/Compiler.h"
69 #include "llvm/Support/Debug.h"
71 #include "llvm/Transforms/Scalar.h"
75 #include <algorithm>
76 #include <cassert>
77 #include <cstddef>
78 #include <cstdint>
79 #include <iterator>
80 #include <utility>
81 
82 using namespace llvm;
83 
84 #define DEBUG_TYPE "gvn-sink"
85 
86 STATISTIC(NumRemoved, "Number of instructions removed");
87 
88 namespace llvm {
89 namespace GVNExpression {
90 
92  print(dbgs());
93  dbgs() << "\n";
94 }
95 
96 } // end namespace GVNExpression
97 } // end namespace llvm
98 
99 namespace {
100 
101 static bool isMemoryInst(const Instruction *I) {
102  return isa<LoadInst>(I) || isa<StoreInst>(I) ||
103  (isa<InvokeInst>(I) && !cast<InvokeInst>(I)->doesNotAccessMemory()) ||
104  (isa<CallInst>(I) && !cast<CallInst>(I)->doesNotAccessMemory());
105 }
106 
107 /// Iterates through instructions in a set of blocks in reverse order from the
108 /// first non-terminator. For example (assume all blocks have size n):
109 /// LockstepReverseIterator I([B1, B2, B3]);
110 /// *I-- = [B1[n], B2[n], B3[n]];
111 /// *I-- = [B1[n-1], B2[n-1], B3[n-1]];
112 /// *I-- = [B1[n-2], B2[n-2], B3[n-2]];
113 /// ...
114 ///
115 /// It continues until all blocks have been exhausted. Use \c getActiveBlocks()
116 /// to
117 /// determine which blocks are still going and the order they appear in the
118 /// list returned by operator*.
119 class LockstepReverseIterator {
120  ArrayRef<BasicBlock *> Blocks;
121  SmallSetVector<BasicBlock *, 4> ActiveBlocks;
123  bool Fail;
124 
125 public:
126  LockstepReverseIterator(ArrayRef<BasicBlock *> Blocks) : Blocks(Blocks) {
127  reset();
128  }
129 
130  void reset() {
131  Fail = false;
132  ActiveBlocks.clear();
133  for (BasicBlock *BB : Blocks)
134  ActiveBlocks.insert(BB);
135  Insts.clear();
136  for (BasicBlock *BB : Blocks) {
137  if (BB->size() <= 1) {
138  // Block wasn't big enough - only contained a terminator.
139  ActiveBlocks.remove(BB);
140  continue;
141  }
142  Insts.push_back(BB->getTerminator()->getPrevNode());
143  }
144  if (Insts.empty())
145  Fail = true;
146  }
147 
148  bool isValid() const { return !Fail; }
149  ArrayRef<Instruction *> operator*() const { return Insts; }
150 
151  // Note: This needs to return a SmallSetVector as the elements of
152  // ActiveBlocks will be later copied to Blocks using std::copy. The
153  // resultant order of elements in Blocks needs to be deterministic.
154  // Using SmallPtrSet instead causes non-deterministic order while
155  // copying. And we cannot simply sort Blocks as they need to match the
156  // corresponding Values.
157  SmallSetVector<BasicBlock *, 4> &getActiveBlocks() { return ActiveBlocks; }
158 
159  void restrictToBlocks(SmallSetVector<BasicBlock *, 4> &Blocks) {
160  for (auto II = Insts.begin(); II != Insts.end();) {
161  if (std::find(Blocks.begin(), Blocks.end(), (*II)->getParent()) ==
162  Blocks.end()) {
163  ActiveBlocks.remove((*II)->getParent());
164  II = Insts.erase(II);
165  } else {
166  ++II;
167  }
168  }
169  }
170 
171  void operator--() {
172  if (Fail)
173  return;
175  for (auto *Inst : Insts) {
176  if (Inst == &Inst->getParent()->front())
177  ActiveBlocks.remove(Inst->getParent());
178  else
179  NewInsts.push_back(Inst->getPrevNode());
180  }
181  if (NewInsts.empty()) {
182  Fail = true;
183  return;
184  }
185  Insts = NewInsts;
186  }
187 };
188 
189 //===----------------------------------------------------------------------===//
190 
191 /// Candidate solution for sinking. There may be different ways to
192 /// sink instructions, differing in the number of instructions sunk,
193 /// the number of predecessors sunk from and the number of PHIs
194 /// required.
195 struct SinkingInstructionCandidate {
196  unsigned NumBlocks;
197  unsigned NumInstructions;
198  unsigned NumPHIs;
199  unsigned NumMemoryInsts;
200  int Cost = -1;
202 
203  void calculateCost(unsigned NumOrigPHIs, unsigned NumOrigBlocks) {
204  unsigned NumExtraPHIs = NumPHIs - NumOrigPHIs;
205  unsigned SplitEdgeCost = (NumOrigBlocks > NumBlocks) ? 2 : 0;
206  Cost = (NumInstructions * (NumBlocks - 1)) -
207  (NumExtraPHIs *
208  NumExtraPHIs) // PHIs are expensive, so make sure they're worth it.
209  - SplitEdgeCost;
210  }
211 
212  bool operator>(const SinkingInstructionCandidate &Other) const {
213  return Cost > Other.Cost;
214  }
215 };
216 
217 #ifndef NDEBUG
218 raw_ostream &operator<<(raw_ostream &OS, const SinkingInstructionCandidate &C) {
219  OS << "<Candidate Cost=" << C.Cost << " #Blocks=" << C.NumBlocks
220  << " #Insts=" << C.NumInstructions << " #PHIs=" << C.NumPHIs << ">";
221  return OS;
222 }
223 #endif
224 
225 //===----------------------------------------------------------------------===//
226 
227 /// Describes a PHI node that may or may not exist. These track the PHIs
228 /// that must be created if we sunk a sequence of instructions. It provides
229 /// a hash function for efficient equality comparisons.
230 class ModelledPHI {
233 
234 public:
235  ModelledPHI() = default;
236 
237  ModelledPHI(const PHINode *PN) {
238  // BasicBlock comes first so we sort by basic block pointer order, then by value pointer order.
240  for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I)
241  Ops.push_back({PN->getIncomingBlock(I), PN->getIncomingValue(I)});
242  llvm::sort(Ops.begin(), Ops.end());
243  for (auto &P : Ops) {
244  Blocks.push_back(P.first);
245  Values.push_back(P.second);
246  }
247  }
248 
249  /// Create a dummy ModelledPHI that will compare unequal to any other ModelledPHI
250  /// without the same ID.
251  /// \note This is specifically for DenseMapInfo - do not use this!
252  static ModelledPHI createDummy(size_t ID) {
253  ModelledPHI M;
254  M.Values.push_back(reinterpret_cast<Value*>(ID));
255  return M;
256  }
257 
258  /// Create a PHI from an array of incoming values and incoming blocks.
259  template <typename VArray, typename BArray>
260  ModelledPHI(const VArray &V, const BArray &B) {
261  std::copy(V.begin(), V.end(), std::back_inserter(Values));
262  std::copy(B.begin(), B.end(), std::back_inserter(Blocks));
263  }
264 
265  /// Create a PHI from [I[OpNum] for I in Insts].
266  template <typename BArray>
267  ModelledPHI(ArrayRef<Instruction *> Insts, unsigned OpNum, const BArray &B) {
268  std::copy(B.begin(), B.end(), std::back_inserter(Blocks));
269  for (auto *I : Insts)
270  Values.push_back(I->getOperand(OpNum));
271  }
272 
273  /// Restrict the PHI's contents down to only \c NewBlocks.
274  /// \c NewBlocks must be a subset of \c this->Blocks.
275  void restrictToBlocks(const SmallSetVector<BasicBlock *, 4> &NewBlocks) {
276  auto BI = Blocks.begin();
277  auto VI = Values.begin();
278  while (BI != Blocks.end()) {
279  assert(VI != Values.end());
280  if (std::find(NewBlocks.begin(), NewBlocks.end(), *BI) ==
281  NewBlocks.end()) {
282  BI = Blocks.erase(BI);
283  VI = Values.erase(VI);
284  } else {
285  ++BI;
286  ++VI;
287  }
288  }
289  assert(Blocks.size() == NewBlocks.size());
290  }
291 
292  ArrayRef<Value *> getValues() const { return Values; }
293 
294  bool areAllIncomingValuesSame() const {
295  return llvm::all_of(Values, [&](Value *V) { return V == Values[0]; });
296  }
297 
298  bool areAllIncomingValuesSameType() const {
299  return llvm::all_of(
300  Values, [&](Value *V) { return V->getType() == Values[0]->getType(); });
301  }
302 
303  bool areAnyIncomingValuesConstant() const {
304  return llvm::any_of(Values, [&](Value *V) { return isa<Constant>(V); });
305  }
306 
307  // Hash functor
308  unsigned hash() const {
309  return (unsigned)hash_combine_range(Values.begin(), Values.end());
310  }
311 
312  bool operator==(const ModelledPHI &Other) const {
313  return Values == Other.Values && Blocks == Other.Blocks;
314  }
315 };
316 
317 template <typename ModelledPHI> struct DenseMapInfo {
318  static inline ModelledPHI &getEmptyKey() {
319  static ModelledPHI Dummy = ModelledPHI::createDummy(0);
320  return Dummy;
321  }
322 
323  static inline ModelledPHI &getTombstoneKey() {
324  static ModelledPHI Dummy = ModelledPHI::createDummy(1);
325  return Dummy;
326  }
327 
328  static unsigned getHashValue(const ModelledPHI &V) { return V.hash(); }
329 
330  static bool isEqual(const ModelledPHI &LHS, const ModelledPHI &RHS) {
331  return LHS == RHS;
332  }
333 };
334 
336 
337 //===----------------------------------------------------------------------===//
338 // ValueTable
339 //===----------------------------------------------------------------------===//
340 // This is a value number table where the value number is a function of the
341 // *uses* of a value, rather than its operands. Thus, if VN(A) == VN(B) we know
342 // that the program would be equivalent if we replaced A with PHI(A, B).
343 //===----------------------------------------------------------------------===//
344 
345 /// A GVN expression describing how an instruction is used. The operands
346 /// field of BasicExpression is used to store uses, not operands.
347 ///
348 /// This class also contains fields for discriminators used when determining
349 /// equivalence of instructions with sideeffects.
350 class InstructionUseExpr : public GVNExpression::BasicExpression {
351  unsigned MemoryUseOrder = -1;
352  bool Volatile = false;
353 
354 public:
355  InstructionUseExpr(Instruction *I, ArrayRecycler<Value *> &R,
356  BumpPtrAllocator &A)
358  allocateOperands(R, A);
359  setOpcode(I->getOpcode());
360  setType(I->getType());
361 
362  for (auto &U : I->uses())
363  op_push_back(U.getUser());
364  llvm::sort(op_begin(), op_end());
365  }
366 
367  void setMemoryUseOrder(unsigned MUO) { MemoryUseOrder = MUO; }
368  void setVolatile(bool V) { Volatile = V; }
369 
370  hash_code getHashValue() const override {
372  MemoryUseOrder, Volatile);
373  }
374 
375  template <typename Function> hash_code getHashValue(Function MapFn) {
376  hash_code H =
377  hash_combine(getOpcode(), getType(), MemoryUseOrder, Volatile);
378  for (auto *V : operands())
379  H = hash_combine(H, MapFn(V));
380  return H;
381  }
382 };
383 
384 class ValueTable {
385  DenseMap<Value *, uint32_t> ValueNumbering;
387  DenseMap<size_t, uint32_t> HashNumbering;
390  uint32_t nextValueNumber = 1;
391 
392  /// Create an expression for I based on its opcode and its uses. If I
393  /// touches or reads memory, the expression is also based upon its memory
394  /// order - see \c getMemoryUseOrder().
395  InstructionUseExpr *createExpr(Instruction *I) {
396  InstructionUseExpr *E =
397  new (Allocator) InstructionUseExpr(I, Recycler, Allocator);
398  if (isMemoryInst(I))
399  E->setMemoryUseOrder(getMemoryUseOrder(I));
400 
401  if (CmpInst *C = dyn_cast<CmpInst>(I)) {
402  CmpInst::Predicate Predicate = C->getPredicate();
403  E->setOpcode((C->getOpcode() << 8) | Predicate);
404  }
405  return E;
406  }
407 
408  /// Helper to compute the value number for a memory instruction
409  /// (LoadInst/StoreInst), including checking the memory ordering and
410  /// volatility.
411  template <class Inst> InstructionUseExpr *createMemoryExpr(Inst *I) {
412  if (isStrongerThanUnordered(I->getOrdering()) || I->isAtomic())
413  return nullptr;
414  InstructionUseExpr *E = createExpr(I);
415  E->setVolatile(I->isVolatile());
416  return E;
417  }
418 
419 public:
420  ValueTable() = default;
421 
422  /// Returns the value number for the specified value, assigning
423  /// it a new number if it did not have one before.
424  uint32_t lookupOrAdd(Value *V) {
425  auto VI = ValueNumbering.find(V);
426  if (VI != ValueNumbering.end())
427  return VI->second;
428 
429  if (!isa<Instruction>(V)) {
430  ValueNumbering[V] = nextValueNumber;
431  return nextValueNumber++;
432  }
433 
434  Instruction *I = cast<Instruction>(V);
435  InstructionUseExpr *exp = nullptr;
436  switch (I->getOpcode()) {
437  case Instruction::Load:
438  exp = createMemoryExpr(cast<LoadInst>(I));
439  break;
440  case Instruction::Store:
441  exp = createMemoryExpr(cast<StoreInst>(I));
442  break;
443  case Instruction::Call:
444  case Instruction::Invoke:
445  case Instruction::Add:
446  case Instruction::FAdd:
447  case Instruction::Sub:
448  case Instruction::FSub:
449  case Instruction::Mul:
450  case Instruction::FMul:
451  case Instruction::UDiv:
452  case Instruction::SDiv:
453  case Instruction::FDiv:
454  case Instruction::URem:
455  case Instruction::SRem:
456  case Instruction::FRem:
457  case Instruction::Shl:
458  case Instruction::LShr:
459  case Instruction::AShr:
460  case Instruction::And:
461  case Instruction::Or:
462  case Instruction::Xor:
463  case Instruction::ICmp:
464  case Instruction::FCmp:
465  case Instruction::Trunc:
466  case Instruction::ZExt:
467  case Instruction::SExt:
468  case Instruction::FPToUI:
469  case Instruction::FPToSI:
470  case Instruction::UIToFP:
471  case Instruction::SIToFP:
472  case Instruction::FPTrunc:
473  case Instruction::FPExt:
474  case Instruction::PtrToInt:
475  case Instruction::IntToPtr:
476  case Instruction::BitCast:
477  case Instruction::Select:
478  case Instruction::ExtractElement:
479  case Instruction::InsertElement:
480  case Instruction::ShuffleVector:
481  case Instruction::InsertValue:
482  case Instruction::GetElementPtr:
483  exp = createExpr(I);
484  break;
485  default:
486  break;
487  }
488 
489  if (!exp) {
490  ValueNumbering[V] = nextValueNumber;
491  return nextValueNumber++;
492  }
493 
494  uint32_t e = ExpressionNumbering[exp];
495  if (!e) {
496  hash_code H = exp->getHashValue([=](Value *V) { return lookupOrAdd(V); });
497  auto I = HashNumbering.find(H);
498  if (I != HashNumbering.end()) {
499  e = I->second;
500  } else {
501  e = nextValueNumber++;
502  HashNumbering[H] = e;
503  ExpressionNumbering[exp] = e;
504  }
505  }
506  ValueNumbering[V] = e;
507  return e;
508  }
509 
510  /// Returns the value number of the specified value. Fails if the value has
511  /// not yet been numbered.
512  uint32_t lookup(Value *V) const {
513  auto VI = ValueNumbering.find(V);
514  assert(VI != ValueNumbering.end() && "Value not numbered?");
515  return VI->second;
516  }
517 
518  /// Removes all value numberings and resets the value table.
519  void clear() {
520  ValueNumbering.clear();
521  ExpressionNumbering.clear();
522  HashNumbering.clear();
523  Recycler.clear(Allocator);
524  nextValueNumber = 1;
525  }
526 
527  /// \c Inst uses or touches memory. Return an ID describing the memory state
528  /// at \c Inst such that if getMemoryUseOrder(I1) == getMemoryUseOrder(I2),
529  /// the exact same memory operations happen after I1 and I2.
530  ///
531  /// This is a very hard problem in general, so we use domain-specific
532  /// knowledge that we only ever check for equivalence between blocks sharing a
533  /// single immediate successor that is common, and when determining if I1 ==
534  /// I2 we will have already determined that next(I1) == next(I2). This
535  /// inductive property allows us to simply return the value number of the next
536  /// instruction that defines memory.
537  uint32_t getMemoryUseOrder(Instruction *Inst) {
538  auto *BB = Inst->getParent();
539  for (auto I = std::next(Inst->getIterator()), E = BB->end();
540  I != E && !I->isTerminator(); ++I) {
541  if (!isMemoryInst(&*I))
542  continue;
543  if (isa<LoadInst>(&*I))
544  continue;
545  CallInst *CI = dyn_cast<CallInst>(&*I);
546  if (CI && CI->onlyReadsMemory())
547  continue;
548  InvokeInst *II = dyn_cast<InvokeInst>(&*I);
549  if (II && II->onlyReadsMemory())
550  continue;
551  return lookupOrAdd(&*I);
552  }
553  return 0;
554  }
555 };
556 
557 //===----------------------------------------------------------------------===//
558 
559 class GVNSink {
560 public:
561  GVNSink() = default;
562 
563  bool run(Function &F) {
564  LLVM_DEBUG(dbgs() << "GVNSink: running on function @" << F.getName()
565  << "\n");
566 
567  unsigned NumSunk = 0;
569  for (auto *N : RPOT)
570  NumSunk += sinkBB(N);
571 
572  return NumSunk > 0;
573  }
574 
575 private:
576  ValueTable VN;
577 
578  bool isInstructionBlacklisted(Instruction *I) {
579  // These instructions may change or break semantics if moved.
580  if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
581  I->getType()->isTokenTy())
582  return true;
583  return false;
584  }
585 
586  /// The main heuristic function. Analyze the set of instructions pointed to by
587  /// LRI and return a candidate solution if these instructions can be sunk, or
588  /// None otherwise.
589  Optional<SinkingInstructionCandidate> analyzeInstructionForSinking(
590  LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum,
591  ModelledPHISet &NeededPHIs, SmallPtrSetImpl<Value *> &PHIContents);
592 
593  /// Create a ModelledPHI for each PHI in BB, adding to PHIs.
594  void analyzeInitialPHIs(BasicBlock *BB, ModelledPHISet &PHIs,
595  SmallPtrSetImpl<Value *> &PHIContents) {
596  for (PHINode &PN : BB->phis()) {
597  auto MPHI = ModelledPHI(&PN);
598  PHIs.insert(MPHI);
599  for (auto *V : MPHI.getValues())
600  PHIContents.insert(V);
601  }
602  }
603 
604  /// The main instruction sinking driver. Set up state and try and sink
605  /// instructions into BBEnd from its predecessors.
606  unsigned sinkBB(BasicBlock *BBEnd);
607 
608  /// Perform the actual mechanics of sinking an instruction from Blocks into
609  /// BBEnd, which is their only successor.
611 
612  /// Remove PHIs that all have the same incoming value.
613  void foldPointlessPHINodes(BasicBlock *BB) {
614  auto I = BB->begin();
615  while (PHINode *PN = dyn_cast<PHINode>(I++)) {
616  if (!llvm::all_of(PN->incoming_values(), [&](const Value *V) {
617  return V == PN->getIncomingValue(0);
618  }))
619  continue;
620  if (PN->getIncomingValue(0) != PN)
621  PN->replaceAllUsesWith(PN->getIncomingValue(0));
622  else
623  PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
624  PN->eraseFromParent();
625  }
626  }
627 };
628 
629 Optional<SinkingInstructionCandidate> GVNSink::analyzeInstructionForSinking(
630  LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum,
631  ModelledPHISet &NeededPHIs, SmallPtrSetImpl<Value *> &PHIContents) {
632  auto Insts = *LRI;
633  LLVM_DEBUG(dbgs() << " -- Analyzing instruction set: [\n"; for (auto *I
634  : Insts) {
635  I->dump();
636  } dbgs() << " ]\n";);
637 
639  for (auto *I : Insts) {
640  uint32_t N = VN.lookupOrAdd(I);
641  LLVM_DEBUG(dbgs() << " VN=" << Twine::utohexstr(N) << " for" << *I << "\n");
642  if (N == ~0U)
643  return None;
644  VNums[N]++;
645  }
646  unsigned VNumToSink =
647  std::max_element(VNums.begin(), VNums.end(),
648  [](const std::pair<uint32_t, unsigned> &I,
649  const std::pair<uint32_t, unsigned> &J) {
650  return I.second < J.second;
651  })
652  ->first;
653 
654  if (VNums[VNumToSink] == 1)
655  // Can't sink anything!
656  return None;
657 
658  // Now restrict the number of incoming blocks down to only those with
659  // VNumToSink.
660  auto &ActivePreds = LRI.getActiveBlocks();
661  unsigned InitialActivePredSize = ActivePreds.size();
663  for (auto *I : Insts) {
664  if (VN.lookup(I) != VNumToSink)
665  ActivePreds.remove(I->getParent());
666  else
667  NewInsts.push_back(I);
668  }
669  for (auto *I : NewInsts)
670  if (isInstructionBlacklisted(I))
671  return None;
672 
673  // If we've restricted the incoming blocks, restrict all needed PHIs also
674  // to that set.
675  bool RecomputePHIContents = false;
676  if (ActivePreds.size() != InitialActivePredSize) {
677  ModelledPHISet NewNeededPHIs;
678  for (auto P : NeededPHIs) {
679  P.restrictToBlocks(ActivePreds);
680  NewNeededPHIs.insert(P);
681  }
682  NeededPHIs = NewNeededPHIs;
683  LRI.restrictToBlocks(ActivePreds);
684  RecomputePHIContents = true;
685  }
686 
687  // The sunk instruction's results.
688  ModelledPHI NewPHI(NewInsts, ActivePreds);
689 
690  // Does sinking this instruction render previous PHIs redundant?
691  if (NeededPHIs.find(NewPHI) != NeededPHIs.end()) {
692  NeededPHIs.erase(NewPHI);
693  RecomputePHIContents = true;
694  }
695 
696  if (RecomputePHIContents) {
697  // The needed PHIs have changed, so recompute the set of all needed
698  // values.
699  PHIContents.clear();
700  for (auto &PHI : NeededPHIs)
701  PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end());
702  }
703 
704  // Is this instruction required by a later PHI that doesn't match this PHI?
705  // if so, we can't sink this instruction.
706  for (auto *V : NewPHI.getValues())
707  if (PHIContents.count(V))
708  // V exists in this PHI, but the whole PHI is different to NewPHI
709  // (else it would have been removed earlier). We cannot continue
710  // because this isn't representable.
711  return None;
712 
713  // Which operands need PHIs?
714  // FIXME: If any of these fail, we should partition up the candidates to
715  // try and continue making progress.
716  Instruction *I0 = NewInsts[0];
717  for (unsigned OpNum = 0, E = I0->getNumOperands(); OpNum != E; ++OpNum) {
718  ModelledPHI PHI(NewInsts, OpNum, ActivePreds);
719  if (PHI.areAllIncomingValuesSame())
720  continue;
721  if (!canReplaceOperandWithVariable(I0, OpNum))
722  // We can 't create a PHI from this instruction!
723  return None;
724  if (NeededPHIs.count(PHI))
725  continue;
726  if (!PHI.areAllIncomingValuesSameType())
727  return None;
728  // Don't create indirect calls! The called value is the final operand.
729  if ((isa<CallInst>(I0) || isa<InvokeInst>(I0)) && OpNum == E - 1 &&
730  PHI.areAnyIncomingValuesConstant())
731  return None;
732 
733  NeededPHIs.reserve(NeededPHIs.size());
734  NeededPHIs.insert(PHI);
735  PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end());
736  }
737 
738  if (isMemoryInst(NewInsts[0]))
739  ++MemoryInstNum;
740 
741  SinkingInstructionCandidate Cand;
742  Cand.NumInstructions = ++InstNum;
743  Cand.NumMemoryInsts = MemoryInstNum;
744  Cand.NumBlocks = ActivePreds.size();
745  Cand.NumPHIs = NeededPHIs.size();
746  for (auto *C : ActivePreds)
747  Cand.Blocks.push_back(C);
748 
749  return Cand;
750 }
751 
752 unsigned GVNSink::sinkBB(BasicBlock *BBEnd) {
753  LLVM_DEBUG(dbgs() << "GVNSink: running on basic block ";
754  BBEnd->printAsOperand(dbgs()); dbgs() << "\n");
756  for (auto *B : predecessors(BBEnd)) {
757  auto *T = B->getTerminator();
758  if (isa<BranchInst>(T) || isa<SwitchInst>(T))
759  Preds.push_back(B);
760  else
761  return 0;
762  }
763  if (Preds.size() < 2)
764  return 0;
765  llvm::sort(Preds.begin(), Preds.end());
766 
767  unsigned NumOrigPreds = Preds.size();
768  // We can only sink instructions through unconditional branches.
769  for (auto I = Preds.begin(); I != Preds.end();) {
770  if ((*I)->getTerminator()->getNumSuccessors() != 1)
771  I = Preds.erase(I);
772  else
773  ++I;
774  }
775 
776  LockstepReverseIterator LRI(Preds);
778  unsigned InstNum = 0, MemoryInstNum = 0;
779  ModelledPHISet NeededPHIs;
780  SmallPtrSet<Value *, 4> PHIContents;
781  analyzeInitialPHIs(BBEnd, NeededPHIs, PHIContents);
782  unsigned NumOrigPHIs = NeededPHIs.size();
783 
784  while (LRI.isValid()) {
785  auto Cand = analyzeInstructionForSinking(LRI, InstNum, MemoryInstNum,
786  NeededPHIs, PHIContents);
787  if (!Cand)
788  break;
789  Cand->calculateCost(NumOrigPHIs, Preds.size());
790  Candidates.emplace_back(*Cand);
791  --LRI;
792  }
793 
794  std::stable_sort(
795  Candidates.begin(), Candidates.end(),
796  [](const SinkingInstructionCandidate &A,
797  const SinkingInstructionCandidate &B) { return A > B; });
798  LLVM_DEBUG(dbgs() << " -- Sinking candidates:\n"; for (auto &C
799  : Candidates) dbgs()
800  << " " << C << "\n";);
801 
802  // Pick the top candidate, as long it is positive!
803  if (Candidates.empty() || Candidates.front().Cost <= 0)
804  return 0;
805  auto C = Candidates.front();
806 
807  LLVM_DEBUG(dbgs() << " -- Sinking: " << C << "\n");
808  BasicBlock *InsertBB = BBEnd;
809  if (C.Blocks.size() < NumOrigPreds) {
810  LLVM_DEBUG(dbgs() << " -- Splitting edge to ";
811  BBEnd->printAsOperand(dbgs()); dbgs() << "\n");
812  InsertBB = SplitBlockPredecessors(BBEnd, C.Blocks, ".gvnsink.split");
813  if (!InsertBB) {
814  LLVM_DEBUG(dbgs() << " -- FAILED to split edge!\n");
815  // Edge couldn't be split.
816  return 0;
817  }
818  }
819 
820  for (unsigned I = 0; I < C.NumInstructions; ++I)
821  sinkLastInstruction(C.Blocks, InsertBB);
822 
823  return C.NumInstructions;
824 }
825 
827  BasicBlock *BBEnd) {
829  for (BasicBlock *BB : Blocks)
830  Insts.push_back(BB->getTerminator()->getPrevNode());
831  Instruction *I0 = Insts.front();
832 
833  SmallVector<Value *, 4> NewOperands;
834  for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
835  bool NeedPHI = llvm::any_of(Insts, [&I0, O](const Instruction *I) {
836  return I->getOperand(O) != I0->getOperand(O);
837  });
838  if (!NeedPHI) {
839  NewOperands.push_back(I0->getOperand(O));
840  continue;
841  }
842 
843  // Create a new PHI in the successor block and populate it.
844  auto *Op = I0->getOperand(O);
845  assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
846  auto *PN = PHINode::Create(Op->getType(), Insts.size(),
847  Op->getName() + ".sink", &BBEnd->front());
848  for (auto *I : Insts)
849  PN->addIncoming(I->getOperand(O), I->getParent());
850  NewOperands.push_back(PN);
851  }
852 
853  // Arbitrarily use I0 as the new "common" instruction; remap its operands
854  // and move it to the start of the successor block.
855  for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
856  I0->getOperandUse(O).set(NewOperands[O]);
857  I0->moveBefore(&*BBEnd->getFirstInsertionPt());
858 
859  // Update metadata and IR flags.
860  for (auto *I : Insts)
861  if (I != I0) {
862  combineMetadataForCSE(I0, I);
863  I0->andIRFlags(I);
864  }
865 
866  for (auto *I : Insts)
867  if (I != I0)
868  I->replaceAllUsesWith(I0);
869  foldPointlessPHINodes(BBEnd);
870 
871  // Finally nuke all instructions apart from the common instruction.
872  for (auto *I : Insts)
873  if (I != I0)
874  I->eraseFromParent();
875 
876  NumRemoved += Insts.size() - 1;
877 }
878 
879 ////////////////////////////////////////////////////////////////////////////////
880 // Pass machinery / boilerplate
881 
882 class GVNSinkLegacyPass : public FunctionPass {
883 public:
884  static char ID;
885 
886  GVNSinkLegacyPass() : FunctionPass(ID) {
888  }
889 
890  bool runOnFunction(Function &F) override {
891  if (skipFunction(F))
892  return false;
893  GVNSink G;
894  return G.run(F);
895  }
896 
897  void getAnalysisUsage(AnalysisUsage &AU) const override {
899  }
900 };
901 
902 } // end anonymous namespace
903 
905  GVNSink G;
906  if (!G.run(F))
907  return PreservedAnalyses::all();
908 
910  PA.preserve<GlobalsAA>();
911  return PA;
912 }
913 
914 char GVNSinkLegacyPass::ID = 0;
915 
916 INITIALIZE_PASS_BEGIN(GVNSinkLegacyPass, "gvn-sink",
917  "Early GVN sinking of Expressions", false, false)
920 INITIALIZE_PASS_END(GVNSinkLegacyPass, "gvn-sink",
921  "Early GVN sinking of Expressions", false, false)
922 
923 FunctionPass *llvm::createGVNSinkPass() { return new GVNSinkLegacyPass(); }
Legacy wrapper pass to provide the GlobalsAAResult object.
const NoneType None
Definition: None.h:24
uint64_t CallInst * C
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:68
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:875
iterator_range< use_iterator > uses()
Definition: Value.h:354
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
bool isStrongerThanUnordered(AtomicOrdering ao)
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
gvn sink
When an instruction is found to only be used outside of the loop, this function moves it to the exit ...
Definition: GVNSink.cpp:920
Atomic ordering constants.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:78
This is the interface for a simple mod/ref and alias analysis over globals.
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:137
Implements a dense probed hash-table based set.
Definition: DenseSet.h:221
bool operator>(int64_t V1, const APSInt &V2)
Definition: APSInt.h:327
This class represents a function call, abstracting a target machine&#39;s calling convention.
Recycle small arrays allocated from a BumpPtrAllocator.
Definition: ArrayRecycler.h:29
const Use & getOperandUse(unsigned i) const
Definition: User.h:183
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:908
STATISTIC(NumFunctions, "Total number of functions")
F(f)
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:452
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
Definition: GVNSink.cpp:904
bool operator==(const Expression &Other) const
Definition: GVNExpression.h:78
This defines the Use class.
iterator end()
Get an iterator to the end of the SetVector.
Definition: SetVector.h:93
This file defines the MallocAllocator and BumpPtrAllocator interfaces.
bool onlyReadsMemory() const
Determine if the call does not access or only reads memory.
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:264
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:4216
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
#define Fail
raw_ostream & operator<<(raw_ostream &OS, const Expression &E)
INITIALIZE_PASS_BEGIN(GVNSinkLegacyPass, "gvn-sink", "Early GVN sinking of Expressions", false, false) INITIALIZE_PASS_END(GVNSinkLegacyPass
static const uint16_t * lookup(unsigned opcode, unsigned domain, ArrayRef< uint16_t[3]> Table)
bool remove(const value_type &X)
Remove an item from the set vector.
Definition: SetVector.h:158
ELFYAML::ELF_STO Other
Definition: ELFYAML.cpp:772
APInt operator*(APInt a, uint64_t RHS)
Definition: APInt.h:2084
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:142
static bool isEqual(const Function &Caller, const Function &Callee)
The core GVN pass object.
Definition: GVN.h:68
void andIRFlags(const Value *V)
Logical &#39;and&#39; of any supported wrapping, exact, and fast-math flags of V and this instruction...
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:83
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:126
LLVM_DUMP_METHOD void dump() const
Definition: GVNSink.cpp:91
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:439
virtual hash_code getHashValue() const
Value * getOperand(unsigned i) const
Definition: User.h:170
static bool runOnFunction(Function &F, bool PostInlining)
#define P(N)
void print(raw_ostream &OS) const
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:218
static bool sinkLastInstruction(ArrayRef< BasicBlock *> Blocks)
void set(Value *Val)
Definition: Value.h:670
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
Allocate memory in an ever growing pool, as if by bump-pointer.
Definition: Allocator.h:140
This file provides the interface for LLVM&#39;s Global Value Numbering pass which eliminates fully redund...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:117
bool canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx)
Given an instruction, is it legal to set operand OpIdx to a non-constant value?
Definition: Local.cpp:2720
This file contains the declarations for the subclasses of Constant, which represent the different fla...
const Instruction & front() const
Definition: BasicBlock.h:276
#define H(x, y, z)
Definition: MD5.cpp:57
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
Represent the analysis usage information of a pass.
bool any_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:915
Analysis pass providing a never-invalidated alias analysis result.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:885
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock *> Preds, const char *Suffix, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
self_iterator getIterator()
Definition: ilist_node.h:82
static unsigned getTombstoneKey()
Definition: GVNExpression.h:75
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1392
iterator erase(const_iterator CI)
Definition: SmallVector.h:446
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
static wasm::ValType getType(const TargetRegisterClass *RC)
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:929
void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
Definition: AsmWriter.cpp:4143
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
unsigned first
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:859
hash_code getHashValue() const override
Basic Register Allocator
Machine code sinking
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:298
unsigned getNumOperands() const
Definition: User.h:192
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
void setOpcode(unsigned opcode)
Recycler - This class manages a linked-list of deallocated nodes and facilitates reusing deallocated ...
Definition: Recycler.h:35
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
gvn Early GVN sinking of Expressions
Definition: GVNSink.cpp:920
const DataFlowGraph & G
Definition: RDFGraph.cpp:211
static Twine utohexstr(const uint64_t &Val)
Definition: Twine.h:385
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:113
void clear(AllocatorType &Allocator)
Release all the tracked allocations to the allocator.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition: Hashing.h:602
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition: Hashing.h:480
An opaque object representing a hash code.
Definition: Hashing.h:72
static void clear(coro::Shape &Shape)
Definition: Coroutines.cpp:211
unsigned getNumUses() const
This method computes the number of uses of this Value.
Definition: Value.cpp:170
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:121
void emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:653
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:62
FunctionPass * createGVNSinkPass()
Definition: GVNSink.cpp:923
bool isTokenTy() const
Return true if this is &#39;token&#39;.
Definition: Type.h:194
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:224
void initializeGVNSinkLegacyPassPass(PassRegistry &)
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:174
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:320
void combineMetadataForCSE(Instruction *K, const Instruction *J)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:2321
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:73
The header file for the GVN pass that contains expression handling classes.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:87
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:46
Invoke instruction.
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:552
A container for analyses that lazily runs them and caches their results.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:254
This header defines various interfaces for pass management in LLVM.
#define LLVM_DEBUG(X)
Definition: Debug.h:119
OutputIt copy(R &&Range, OutputIt Out)
Definition: STLExtras.h:960
const BasicBlock * getParent() const
Definition: Instruction.h:67