LLVM  6.0.0svn
GVNSink.cpp
Go to the documentation of this file.
1 //===- GVNSink.cpp - sink expressions into successors ---------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file GVNSink.cpp
11 /// This pass attempts to sink instructions into successors, reducing static
12 /// instruction count and enabling if-conversion.
13 ///
14 /// We use a variant of global value numbering to decide what can be sunk.
15 /// Consider:
16 ///
17 /// [ %a1 = add i32 %b, 1 ] [ %c1 = add i32 %d, 1 ]
18 /// [ %a2 = xor i32 %a1, 1 ] [ %c2 = xor i32 %c1, 1 ]
19 /// \ /
20 /// [ %e = phi i32 %a2, %c2 ]
21 /// [ add i32 %e, 4 ]
22 ///
23 ///
24 /// GVN would number %a1 and %c1 differently because they compute different
25 /// results - the VN of an instruction is a function of its opcode and the
26 /// transitive closure of its operands. This is the key property for hoisting
27 /// and CSE.
28 ///
29 /// What we want when sinking however is for a numbering that is a function of
30 /// the *uses* of an instruction, which allows us to answer the question "if I
31 /// replace %a1 with %c1, will it contribute in an equivalent way to all
32 /// successive instructions?". The PostValueTable class in GVN provides this
33 /// mapping.
34 //
35 //===----------------------------------------------------------------------===//
36 
37 #include "llvm/ADT/ArrayRef.h"
38 #include "llvm/ADT/DenseMap.h"
39 #include "llvm/ADT/DenseMapInfo.h"
40 #include "llvm/ADT/DenseSet.h"
41 #include "llvm/ADT/Hashing.h"
42 #include "llvm/ADT/None.h"
43 #include "llvm/ADT/Optional.h"
45 #include "llvm/ADT/STLExtras.h"
46 #include "llvm/ADT/SmallPtrSet.h"
47 #include "llvm/ADT/SmallVector.h"
48 #include "llvm/ADT/Statistic.h"
49 #include "llvm/ADT/StringExtras.h"
51 #include "llvm/IR/BasicBlock.h"
52 #include "llvm/IR/CFG.h"
53 #include "llvm/IR/Constants.h"
54 #include "llvm/IR/Function.h"
55 #include "llvm/IR/InstrTypes.h"
56 #include "llvm/IR/Instruction.h"
57 #include "llvm/IR/Instructions.h"
58 #include "llvm/IR/PassManager.h"
59 #include "llvm/IR/Type.h"
60 #include "llvm/IR/Use.h"
61 #include "llvm/IR/Value.h"
62 #include "llvm/Pass.h"
63 #include "llvm/Support/Allocator.h"
66 #include "llvm/Support/Casting.h"
67 #include "llvm/Support/Compiler.h"
68 #include "llvm/Support/Debug.h"
70 #include "llvm/Transforms/Scalar.h"
75 #include <algorithm>
76 #include <cassert>
77 #include <cstddef>
78 #include <cstdint>
79 #include <iterator>
80 #include <utility>
81 
82 using namespace llvm;
83 
84 #define DEBUG_TYPE "gvn-sink"
85 
86 STATISTIC(NumRemoved, "Number of instructions removed");
87 
88 namespace llvm {
89 namespace GVNExpression {
90 
92  print(dbgs());
93  dbgs() << "\n";
94 }
95 
96 } // end namespace GVNExpression
97 } // end namespace llvm
98 
99 namespace {
100 
101 static bool isMemoryInst(const Instruction *I) {
102  return isa<LoadInst>(I) || isa<StoreInst>(I) ||
103  (isa<InvokeInst>(I) && !cast<InvokeInst>(I)->doesNotAccessMemory()) ||
104  (isa<CallInst>(I) && !cast<CallInst>(I)->doesNotAccessMemory());
105 }
106 
107 /// Iterates through instructions in a set of blocks in reverse order from the
108 /// first non-terminator. For example (assume all blocks have size n):
109 /// LockstepReverseIterator I([B1, B2, B3]);
110 /// *I-- = [B1[n], B2[n], B3[n]];
111 /// *I-- = [B1[n-1], B2[n-1], B3[n-1]];
112 /// *I-- = [B1[n-2], B2[n-2], B3[n-2]];
113 /// ...
114 ///
115 /// It continues until all blocks have been exhausted. Use \c getActiveBlocks()
116 /// to
117 /// determine which blocks are still going and the order they appear in the
118 /// list returned by operator*.
119 class LockstepReverseIterator {
120  ArrayRef<BasicBlock *> Blocks;
121  SmallSetVector<BasicBlock *, 4> ActiveBlocks;
123  bool Fail;
124 
125 public:
126  LockstepReverseIterator(ArrayRef<BasicBlock *> Blocks) : Blocks(Blocks) {
127  reset();
128  }
129 
130  void reset() {
131  Fail = false;
132  ActiveBlocks.clear();
133  for (BasicBlock *BB : Blocks)
134  ActiveBlocks.insert(BB);
135  Insts.clear();
136  for (BasicBlock *BB : Blocks) {
137  if (BB->size() <= 1) {
138  // Block wasn't big enough - only contained a terminator.
139  ActiveBlocks.remove(BB);
140  continue;
141  }
142  Insts.push_back(BB->getTerminator()->getPrevNode());
143  }
144  if (Insts.empty())
145  Fail = true;
146  }
147 
148  bool isValid() const { return !Fail; }
149  ArrayRef<Instruction *> operator*() const { return Insts; }
150 
151  // Note: This needs to return a SmallSetVector as the elements of
152  // ActiveBlocks will be later copied to Blocks using std::copy. The
153  // resultant order of elements in Blocks needs to be deterministic.
154  // Using SmallPtrSet instead causes non-deterministic order while
155  // copying. And we cannot simply sort Blocks as they need to match the
156  // corresponding Values.
157  SmallSetVector<BasicBlock *, 4> &getActiveBlocks() { return ActiveBlocks; }
158 
159  void restrictToBlocks(SmallSetVector<BasicBlock *, 4> &Blocks) {
160  for (auto II = Insts.begin(); II != Insts.end();) {
161  if (std::find(Blocks.begin(), Blocks.end(), (*II)->getParent()) ==
162  Blocks.end()) {
163  ActiveBlocks.remove((*II)->getParent());
164  II = Insts.erase(II);
165  } else {
166  ++II;
167  }
168  }
169  }
170 
171  void operator--() {
172  if (Fail)
173  return;
175  for (auto *Inst : Insts) {
176  if (Inst == &Inst->getParent()->front())
177  ActiveBlocks.remove(Inst->getParent());
178  else
179  NewInsts.push_back(Inst->getPrevNode());
180  }
181  if (NewInsts.empty()) {
182  Fail = true;
183  return;
184  }
185  Insts = NewInsts;
186  }
187 };
188 
189 //===----------------------------------------------------------------------===//
190 
191 /// Candidate solution for sinking. There may be different ways to
192 /// sink instructions, differing in the number of instructions sunk,
193 /// the number of predecessors sunk from and the number of PHIs
194 /// required.
195 struct SinkingInstructionCandidate {
196  unsigned NumBlocks;
197  unsigned NumInstructions;
198  unsigned NumPHIs;
199  unsigned NumMemoryInsts;
200  int Cost = -1;
202 
203  void calculateCost(unsigned NumOrigPHIs, unsigned NumOrigBlocks) {
204  unsigned NumExtraPHIs = NumPHIs - NumOrigPHIs;
205  unsigned SplitEdgeCost = (NumOrigBlocks > NumBlocks) ? 2 : 0;
206  Cost = (NumInstructions * (NumBlocks - 1)) -
207  (NumExtraPHIs *
208  NumExtraPHIs) // PHIs are expensive, so make sure they're worth it.
209  - SplitEdgeCost;
210  }
211 
212  bool operator>(const SinkingInstructionCandidate &Other) const {
213  return Cost > Other.Cost;
214  }
215 };
216 
217 #ifndef NDEBUG
218 raw_ostream &operator<<(raw_ostream &OS, const SinkingInstructionCandidate &C) {
219  OS << "<Candidate Cost=" << C.Cost << " #Blocks=" << C.NumBlocks
220  << " #Insts=" << C.NumInstructions << " #PHIs=" << C.NumPHIs << ">";
221  return OS;
222 }
223 #endif
224 
225 //===----------------------------------------------------------------------===//
226 
227 /// Describes a PHI node that may or may not exist. These track the PHIs
228 /// that must be created if we sunk a sequence of instructions. It provides
229 /// a hash function for efficient equality comparisons.
230 class ModelledPHI {
233 
234 public:
235  ModelledPHI() = default;
236 
237  ModelledPHI(const PHINode *PN) {
238  // BasicBlock comes first so we sort by basic block pointer order, then by value pointer order.
240  for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I)
241  Ops.push_back({PN->getIncomingBlock(I), PN->getIncomingValue(I)});
242  std::sort(Ops.begin(), Ops.end());
243  for (auto &P : Ops) {
244  Blocks.push_back(P.first);
245  Values.push_back(P.second);
246  }
247  }
248 
249  /// Create a dummy ModelledPHI that will compare unequal to any other ModelledPHI
250  /// without the same ID.
251  /// \note This is specifically for DenseMapInfo - do not use this!
252  static ModelledPHI createDummy(size_t ID) {
253  ModelledPHI M;
254  M.Values.push_back(reinterpret_cast<Value*>(ID));
255  return M;
256  }
257 
258  /// Create a PHI from an array of incoming values and incoming blocks.
259  template <typename VArray, typename BArray>
260  ModelledPHI(const VArray &V, const BArray &B) {
261  std::copy(V.begin(), V.end(), std::back_inserter(Values));
262  std::copy(B.begin(), B.end(), std::back_inserter(Blocks));
263  }
264 
265  /// Create a PHI from [I[OpNum] for I in Insts].
266  template <typename BArray>
267  ModelledPHI(ArrayRef<Instruction *> Insts, unsigned OpNum, const BArray &B) {
268  std::copy(B.begin(), B.end(), std::back_inserter(Blocks));
269  for (auto *I : Insts)
270  Values.push_back(I->getOperand(OpNum));
271  }
272 
273  /// Restrict the PHI's contents down to only \c NewBlocks.
274  /// \c NewBlocks must be a subset of \c this->Blocks.
275  void restrictToBlocks(const SmallSetVector<BasicBlock *, 4> &NewBlocks) {
276  auto BI = Blocks.begin();
277  auto VI = Values.begin();
278  while (BI != Blocks.end()) {
279  assert(VI != Values.end());
280  if (std::find(NewBlocks.begin(), NewBlocks.end(), *BI) ==
281  NewBlocks.end()) {
282  BI = Blocks.erase(BI);
283  VI = Values.erase(VI);
284  } else {
285  ++BI;
286  ++VI;
287  }
288  }
289  assert(Blocks.size() == NewBlocks.size());
290  }
291 
292  ArrayRef<Value *> getValues() const { return Values; }
293 
294  bool areAllIncomingValuesSame() const {
295  return llvm::all_of(Values, [&](Value *V) { return V == Values[0]; });
296  }
297 
298  bool areAllIncomingValuesSameType() const {
299  return llvm::all_of(
300  Values, [&](Value *V) { return V->getType() == Values[0]->getType(); });
301  }
302 
303  bool areAnyIncomingValuesConstant() const {
304  return llvm::any_of(Values, [&](Value *V) { return isa<Constant>(V); });
305  }
306 
307  // Hash functor
308  unsigned hash() const {
309  return (unsigned)hash_combine_range(Values.begin(), Values.end());
310  }
311 
312  bool operator==(const ModelledPHI &Other) const {
313  return Values == Other.Values && Blocks == Other.Blocks;
314  }
315 };
316 
317 template <typename ModelledPHI> struct DenseMapInfo {
318  static inline ModelledPHI &getEmptyKey() {
319  static ModelledPHI Dummy = ModelledPHI::createDummy(0);
320  return Dummy;
321  }
322 
323  static inline ModelledPHI &getTombstoneKey() {
324  static ModelledPHI Dummy = ModelledPHI::createDummy(1);
325  return Dummy;
326  }
327 
328  static unsigned getHashValue(const ModelledPHI &V) { return V.hash(); }
329 
330  static bool isEqual(const ModelledPHI &LHS, const ModelledPHI &RHS) {
331  return LHS == RHS;
332  }
333 };
334 
336 
337 //===----------------------------------------------------------------------===//
338 // ValueTable
339 //===----------------------------------------------------------------------===//
340 // This is a value number table where the value number is a function of the
341 // *uses* of a value, rather than its operands. Thus, if VN(A) == VN(B) we know
342 // that the program would be equivalent if we replaced A with PHI(A, B).
343 //===----------------------------------------------------------------------===//
344 
345 /// A GVN expression describing how an instruction is used. The operands
346 /// field of BasicExpression is used to store uses, not operands.
347 ///
348 /// This class also contains fields for discriminators used when determining
349 /// equivalence of instructions with sideeffects.
350 class InstructionUseExpr : public GVNExpression::BasicExpression {
351  unsigned MemoryUseOrder = -1;
352  bool Volatile = false;
353 
354 public:
355  InstructionUseExpr(Instruction *I, ArrayRecycler<Value *> &R,
356  BumpPtrAllocator &A)
358  allocateOperands(R, A);
359  setOpcode(I->getOpcode());
360  setType(I->getType());
361 
362  for (auto &U : I->uses())
363  op_push_back(U.getUser());
364  std::sort(op_begin(), op_end());
365  }
366 
367  void setMemoryUseOrder(unsigned MUO) { MemoryUseOrder = MUO; }
368  void setVolatile(bool V) { Volatile = V; }
369 
370  hash_code getHashValue() const override {
372  MemoryUseOrder, Volatile);
373  }
374 
375  template <typename Function> hash_code getHashValue(Function MapFn) {
376  hash_code H =
377  hash_combine(getOpcode(), getType(), MemoryUseOrder, Volatile);
378  for (auto *V : operands())
379  H = hash_combine(H, MapFn(V));
380  return H;
381  }
382 };
383 
384 class ValueTable {
385  DenseMap<Value *, uint32_t> ValueNumbering;
387  DenseMap<size_t, uint32_t> HashNumbering;
390  uint32_t nextValueNumber = 1;
391 
392  /// Create an expression for I based on its opcode and its uses. If I
393  /// touches or reads memory, the expression is also based upon its memory
394  /// order - see \c getMemoryUseOrder().
395  InstructionUseExpr *createExpr(Instruction *I) {
396  InstructionUseExpr *E =
397  new (Allocator) InstructionUseExpr(I, Recycler, Allocator);
398  if (isMemoryInst(I))
399  E->setMemoryUseOrder(getMemoryUseOrder(I));
400 
401  if (CmpInst *C = dyn_cast<CmpInst>(I)) {
402  CmpInst::Predicate Predicate = C->getPredicate();
403  E->setOpcode((C->getOpcode() << 8) | Predicate);
404  }
405  return E;
406  }
407 
408  /// Helper to compute the value number for a memory instruction
409  /// (LoadInst/StoreInst), including checking the memory ordering and
410  /// volatility.
411  template <class Inst> InstructionUseExpr *createMemoryExpr(Inst *I) {
412  if (isStrongerThanUnordered(I->getOrdering()) || I->isAtomic())
413  return nullptr;
414  InstructionUseExpr *E = createExpr(I);
415  E->setVolatile(I->isVolatile());
416  return E;
417  }
418 
419 public:
420  ValueTable() = default;
421 
422  /// Returns the value number for the specified value, assigning
423  /// it a new number if it did not have one before.
424  uint32_t lookupOrAdd(Value *V) {
425  auto VI = ValueNumbering.find(V);
426  if (VI != ValueNumbering.end())
427  return VI->second;
428 
429  if (!isa<Instruction>(V)) {
430  ValueNumbering[V] = nextValueNumber;
431  return nextValueNumber++;
432  }
433 
434  Instruction *I = cast<Instruction>(V);
435  InstructionUseExpr *exp = nullptr;
436  switch (I->getOpcode()) {
437  case Instruction::Load:
438  exp = createMemoryExpr(cast<LoadInst>(I));
439  break;
440  case Instruction::Store:
441  exp = createMemoryExpr(cast<StoreInst>(I));
442  break;
443  case Instruction::Call:
444  case Instruction::Invoke:
445  case Instruction::Add:
446  case Instruction::FAdd:
447  case Instruction::Sub:
448  case Instruction::FSub:
449  case Instruction::Mul:
450  case Instruction::FMul:
451  case Instruction::UDiv:
452  case Instruction::SDiv:
453  case Instruction::FDiv:
454  case Instruction::URem:
455  case Instruction::SRem:
456  case Instruction::FRem:
457  case Instruction::Shl:
458  case Instruction::LShr:
459  case Instruction::AShr:
460  case Instruction::And:
461  case Instruction::Or:
462  case Instruction::Xor:
463  case Instruction::ICmp:
464  case Instruction::FCmp:
465  case Instruction::Trunc:
466  case Instruction::ZExt:
467  case Instruction::SExt:
468  case Instruction::FPToUI:
469  case Instruction::FPToSI:
470  case Instruction::UIToFP:
471  case Instruction::SIToFP:
472  case Instruction::FPTrunc:
473  case Instruction::FPExt:
474  case Instruction::PtrToInt:
475  case Instruction::IntToPtr:
476  case Instruction::BitCast:
477  case Instruction::Select:
478  case Instruction::ExtractElement:
479  case Instruction::InsertElement:
480  case Instruction::ShuffleVector:
481  case Instruction::InsertValue:
482  case Instruction::GetElementPtr:
483  exp = createExpr(I);
484  break;
485  default:
486  break;
487  }
488 
489  if (!exp) {
490  ValueNumbering[V] = nextValueNumber;
491  return nextValueNumber++;
492  }
493 
494  uint32_t e = ExpressionNumbering[exp];
495  if (!e) {
496  hash_code H = exp->getHashValue([=](Value *V) { return lookupOrAdd(V); });
497  auto I = HashNumbering.find(H);
498  if (I != HashNumbering.end()) {
499  e = I->second;
500  } else {
501  e = nextValueNumber++;
502  HashNumbering[H] = e;
503  ExpressionNumbering[exp] = e;
504  }
505  }
506  ValueNumbering[V] = e;
507  return e;
508  }
509 
510  /// Returns the value number of the specified value. Fails if the value has
511  /// not yet been numbered.
512  uint32_t lookup(Value *V) const {
513  auto VI = ValueNumbering.find(V);
514  assert(VI != ValueNumbering.end() && "Value not numbered?");
515  return VI->second;
516  }
517 
518  /// Removes all value numberings and resets the value table.
519  void clear() {
520  ValueNumbering.clear();
521  ExpressionNumbering.clear();
522  HashNumbering.clear();
523  Recycler.clear(Allocator);
524  nextValueNumber = 1;
525  }
526 
527  /// \c Inst uses or touches memory. Return an ID describing the memory state
528  /// at \c Inst such that if getMemoryUseOrder(I1) == getMemoryUseOrder(I2),
529  /// the exact same memory operations happen after I1 and I2.
530  ///
531  /// This is a very hard problem in general, so we use domain-specific
532  /// knowledge that we only ever check for equivalence between blocks sharing a
533  /// single immediate successor that is common, and when determining if I1 ==
534  /// I2 we will have already determined that next(I1) == next(I2). This
535  /// inductive property allows us to simply return the value number of the next
536  /// instruction that defines memory.
537  uint32_t getMemoryUseOrder(Instruction *Inst) {
538  auto *BB = Inst->getParent();
539  for (auto I = std::next(Inst->getIterator()), E = BB->end();
540  I != E && !I->isTerminator(); ++I) {
541  if (!isMemoryInst(&*I))
542  continue;
543  if (isa<LoadInst>(&*I))
544  continue;
545  CallInst *CI = dyn_cast<CallInst>(&*I);
546  if (CI && CI->onlyReadsMemory())
547  continue;
548  InvokeInst *II = dyn_cast<InvokeInst>(&*I);
549  if (II && II->onlyReadsMemory())
550  continue;
551  return lookupOrAdd(&*I);
552  }
553  return 0;
554  }
555 };
556 
557 //===----------------------------------------------------------------------===//
558 
559 class GVNSink {
560 public:
561  GVNSink() = default;
562 
563  bool run(Function &F) {
564  DEBUG(dbgs() << "GVNSink: running on function @" << F.getName() << "\n");
565 
566  unsigned NumSunk = 0;
568  for (auto *N : RPOT)
569  NumSunk += sinkBB(N);
570 
571  return NumSunk > 0;
572  }
573 
574 private:
575  ValueTable VN;
576 
577  bool isInstructionBlacklisted(Instruction *I) {
578  // These instructions may change or break semantics if moved.
579  if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
580  I->getType()->isTokenTy())
581  return true;
582  return false;
583  }
584 
585  /// The main heuristic function. Analyze the set of instructions pointed to by
586  /// LRI and return a candidate solution if these instructions can be sunk, or
587  /// None otherwise.
588  Optional<SinkingInstructionCandidate> analyzeInstructionForSinking(
589  LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum,
590  ModelledPHISet &NeededPHIs, SmallPtrSetImpl<Value *> &PHIContents);
591 
592  /// Create a ModelledPHI for each PHI in BB, adding to PHIs.
593  void analyzeInitialPHIs(BasicBlock *BB, ModelledPHISet &PHIs,
594  SmallPtrSetImpl<Value *> &PHIContents) {
595  for (auto &I : *BB) {
596  auto *PN = dyn_cast<PHINode>(&I);
597  if (!PN)
598  return;
599 
600  auto MPHI = ModelledPHI(PN);
601  PHIs.insert(MPHI);
602  for (auto *V : MPHI.getValues())
603  PHIContents.insert(V);
604  }
605  }
606 
607  /// The main instruction sinking driver. Set up state and try and sink
608  /// instructions into BBEnd from its predecessors.
609  unsigned sinkBB(BasicBlock *BBEnd);
610 
611  /// Perform the actual mechanics of sinking an instruction from Blocks into
612  /// BBEnd, which is their only successor.
614 
615  /// Remove PHIs that all have the same incoming value.
616  void foldPointlessPHINodes(BasicBlock *BB) {
617  auto I = BB->begin();
618  while (PHINode *PN = dyn_cast<PHINode>(I++)) {
619  if (!llvm::all_of(PN->incoming_values(), [&](const Value *V) {
620  return V == PN->getIncomingValue(0);
621  }))
622  continue;
623  if (PN->getIncomingValue(0) != PN)
624  PN->replaceAllUsesWith(PN->getIncomingValue(0));
625  else
626  PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
627  PN->eraseFromParent();
628  }
629  }
630 };
631 
632 Optional<SinkingInstructionCandidate> GVNSink::analyzeInstructionForSinking(
633  LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum,
634  ModelledPHISet &NeededPHIs, SmallPtrSetImpl<Value *> &PHIContents) {
635  auto Insts = *LRI;
636  DEBUG(dbgs() << " -- Analyzing instruction set: [\n"; for (auto *I
637  : Insts) {
638  I->dump();
639  } dbgs() << " ]\n";);
640 
642  for (auto *I : Insts) {
643  uint32_t N = VN.lookupOrAdd(I);
644  DEBUG(dbgs() << " VN=" << utohexstr(N) << " for" << *I << "\n");
645  if (N == ~0U)
646  return None;
647  VNums[N]++;
648  }
649  unsigned VNumToSink =
650  std::max_element(VNums.begin(), VNums.end(),
651  [](const std::pair<uint32_t, unsigned> &I,
652  const std::pair<uint32_t, unsigned> &J) {
653  return I.second < J.second;
654  })
655  ->first;
656 
657  if (VNums[VNumToSink] == 1)
658  // Can't sink anything!
659  return None;
660 
661  // Now restrict the number of incoming blocks down to only those with
662  // VNumToSink.
663  auto &ActivePreds = LRI.getActiveBlocks();
664  unsigned InitialActivePredSize = ActivePreds.size();
666  for (auto *I : Insts) {
667  if (VN.lookup(I) != VNumToSink)
668  ActivePreds.remove(I->getParent());
669  else
670  NewInsts.push_back(I);
671  }
672  for (auto *I : NewInsts)
673  if (isInstructionBlacklisted(I))
674  return None;
675 
676  // If we've restricted the incoming blocks, restrict all needed PHIs also
677  // to that set.
678  bool RecomputePHIContents = false;
679  if (ActivePreds.size() != InitialActivePredSize) {
680  ModelledPHISet NewNeededPHIs;
681  for (auto P : NeededPHIs) {
682  P.restrictToBlocks(ActivePreds);
683  NewNeededPHIs.insert(P);
684  }
685  NeededPHIs = NewNeededPHIs;
686  LRI.restrictToBlocks(ActivePreds);
687  RecomputePHIContents = true;
688  }
689 
690  // The sunk instruction's results.
691  ModelledPHI NewPHI(NewInsts, ActivePreds);
692 
693  // Does sinking this instruction render previous PHIs redundant?
694  if (NeededPHIs.find(NewPHI) != NeededPHIs.end()) {
695  NeededPHIs.erase(NewPHI);
696  RecomputePHIContents = true;
697  }
698 
699  if (RecomputePHIContents) {
700  // The needed PHIs have changed, so recompute the set of all needed
701  // values.
702  PHIContents.clear();
703  for (auto &PHI : NeededPHIs)
704  PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end());
705  }
706 
707  // Is this instruction required by a later PHI that doesn't match this PHI?
708  // if so, we can't sink this instruction.
709  for (auto *V : NewPHI.getValues())
710  if (PHIContents.count(V))
711  // V exists in this PHI, but the whole PHI is different to NewPHI
712  // (else it would have been removed earlier). We cannot continue
713  // because this isn't representable.
714  return None;
715 
716  // Which operands need PHIs?
717  // FIXME: If any of these fail, we should partition up the candidates to
718  // try and continue making progress.
719  Instruction *I0 = NewInsts[0];
720  for (unsigned OpNum = 0, E = I0->getNumOperands(); OpNum != E; ++OpNum) {
721  ModelledPHI PHI(NewInsts, OpNum, ActivePreds);
722  if (PHI.areAllIncomingValuesSame())
723  continue;
724  if (!canReplaceOperandWithVariable(I0, OpNum))
725  // We can 't create a PHI from this instruction!
726  return None;
727  if (NeededPHIs.count(PHI))
728  continue;
729  if (!PHI.areAllIncomingValuesSameType())
730  return None;
731  // Don't create indirect calls! The called value is the final operand.
732  if ((isa<CallInst>(I0) || isa<InvokeInst>(I0)) && OpNum == E - 1 &&
733  PHI.areAnyIncomingValuesConstant())
734  return None;
735 
736  NeededPHIs.reserve(NeededPHIs.size());
737  NeededPHIs.insert(PHI);
738  PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end());
739  }
740 
741  if (isMemoryInst(NewInsts[0]))
742  ++MemoryInstNum;
743 
744  SinkingInstructionCandidate Cand;
745  Cand.NumInstructions = ++InstNum;
746  Cand.NumMemoryInsts = MemoryInstNum;
747  Cand.NumBlocks = ActivePreds.size();
748  Cand.NumPHIs = NeededPHIs.size();
749  for (auto *C : ActivePreds)
750  Cand.Blocks.push_back(C);
751 
752  return Cand;
753 }
754 
755 unsigned GVNSink::sinkBB(BasicBlock *BBEnd) {
756  DEBUG(dbgs() << "GVNSink: running on basic block ";
757  BBEnd->printAsOperand(dbgs()); dbgs() << "\n");
759  for (auto *B : predecessors(BBEnd)) {
760  auto *T = B->getTerminator();
761  if (isa<BranchInst>(T) || isa<SwitchInst>(T))
762  Preds.push_back(B);
763  else
764  return 0;
765  }
766  if (Preds.size() < 2)
767  return 0;
768  std::sort(Preds.begin(), Preds.end());
769 
770  unsigned NumOrigPreds = Preds.size();
771  // We can only sink instructions through unconditional branches.
772  for (auto I = Preds.begin(); I != Preds.end();) {
773  if ((*I)->getTerminator()->getNumSuccessors() != 1)
774  I = Preds.erase(I);
775  else
776  ++I;
777  }
778 
779  LockstepReverseIterator LRI(Preds);
781  unsigned InstNum = 0, MemoryInstNum = 0;
782  ModelledPHISet NeededPHIs;
783  SmallPtrSet<Value *, 4> PHIContents;
784  analyzeInitialPHIs(BBEnd, NeededPHIs, PHIContents);
785  unsigned NumOrigPHIs = NeededPHIs.size();
786 
787  while (LRI.isValid()) {
788  auto Cand = analyzeInstructionForSinking(LRI, InstNum, MemoryInstNum,
789  NeededPHIs, PHIContents);
790  if (!Cand)
791  break;
792  Cand->calculateCost(NumOrigPHIs, Preds.size());
793  Candidates.emplace_back(*Cand);
794  --LRI;
795  }
796 
797  std::stable_sort(
798  Candidates.begin(), Candidates.end(),
799  [](const SinkingInstructionCandidate &A,
800  const SinkingInstructionCandidate &B) { return A > B; });
801  DEBUG(dbgs() << " -- Sinking candidates:\n"; for (auto &C
802  : Candidates) dbgs()
803  << " " << C << "\n";);
804 
805  // Pick the top candidate, as long it is positive!
806  if (Candidates.empty() || Candidates.front().Cost <= 0)
807  return 0;
808  auto C = Candidates.front();
809 
810  DEBUG(dbgs() << " -- Sinking: " << C << "\n");
811  BasicBlock *InsertBB = BBEnd;
812  if (C.Blocks.size() < NumOrigPreds) {
813  DEBUG(dbgs() << " -- Splitting edge to "; BBEnd->printAsOperand(dbgs());
814  dbgs() << "\n");
815  InsertBB = SplitBlockPredecessors(BBEnd, C.Blocks, ".gvnsink.split");
816  if (!InsertBB) {
817  DEBUG(dbgs() << " -- FAILED to split edge!\n");
818  // Edge couldn't be split.
819  return 0;
820  }
821  }
822 
823  for (unsigned I = 0; I < C.NumInstructions; ++I)
824  sinkLastInstruction(C.Blocks, InsertBB);
825 
826  return C.NumInstructions;
827 }
828 
830  BasicBlock *BBEnd) {
832  for (BasicBlock *BB : Blocks)
833  Insts.push_back(BB->getTerminator()->getPrevNode());
834  Instruction *I0 = Insts.front();
835 
836  SmallVector<Value *, 4> NewOperands;
837  for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
838  bool NeedPHI = llvm::any_of(Insts, [&I0, O](const Instruction *I) {
839  return I->getOperand(O) != I0->getOperand(O);
840  });
841  if (!NeedPHI) {
842  NewOperands.push_back(I0->getOperand(O));
843  continue;
844  }
845 
846  // Create a new PHI in the successor block and populate it.
847  auto *Op = I0->getOperand(O);
848  assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
849  auto *PN = PHINode::Create(Op->getType(), Insts.size(),
850  Op->getName() + ".sink", &BBEnd->front());
851  for (auto *I : Insts)
852  PN->addIncoming(I->getOperand(O), I->getParent());
853  NewOperands.push_back(PN);
854  }
855 
856  // Arbitrarily use I0 as the new "common" instruction; remap its operands
857  // and move it to the start of the successor block.
858  for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
859  I0->getOperandUse(O).set(NewOperands[O]);
860  I0->moveBefore(&*BBEnd->getFirstInsertionPt());
861 
862  // Update metadata and IR flags.
863  for (auto *I : Insts)
864  if (I != I0) {
865  combineMetadataForCSE(I0, I);
866  I0->andIRFlags(I);
867  }
868 
869  for (auto *I : Insts)
870  if (I != I0)
871  I->replaceAllUsesWith(I0);
872  foldPointlessPHINodes(BBEnd);
873 
874  // Finally nuke all instructions apart from the common instruction.
875  for (auto *I : Insts)
876  if (I != I0)
877  I->eraseFromParent();
878 
879  NumRemoved += Insts.size() - 1;
880 }
881 
882 ////////////////////////////////////////////////////////////////////////////////
883 // Pass machinery / boilerplate
884 
885 class GVNSinkLegacyPass : public FunctionPass {
886 public:
887  static char ID;
888 
889  GVNSinkLegacyPass() : FunctionPass(ID) {
891  }
892 
893  bool runOnFunction(Function &F) override {
894  if (skipFunction(F))
895  return false;
896  GVNSink G;
897  return G.run(F);
898  }
899 
900  void getAnalysisUsage(AnalysisUsage &AU) const override {
902  }
903 };
904 
905 } // end anonymous namespace
906 
908  GVNSink G;
909  if (!G.run(F))
910  return PreservedAnalyses::all();
911 
913  PA.preserve<GlobalsAA>();
914  return PA;
915 }
916 
917 char GVNSinkLegacyPass::ID = 0;
918 
919 INITIALIZE_PASS_BEGIN(GVNSinkLegacyPass, "gvn-sink",
920  "Early GVN sinking of Expressions", false, false)
923 INITIALIZE_PASS_END(GVNSinkLegacyPass, "gvn-sink",
924  "Early GVN sinking of Expressions", false, false)
925 
926 FunctionPass *llvm::createGVNSinkPass() { return new GVNSinkLegacyPass(); }
Legacy wrapper pass to provide the GlobalsAAResult object.
const NoneType None
Definition: None.h:24
uint64_t CallInst * C
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:69
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:843
iterator_range< use_iterator > uses()
Definition: Value.h:356
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
bool isStrongerThanUnordered(AtomicOrdering ao)
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
gvn sink
When an instruction is found to only be used outside of the loop, this function moves it to the exit ...
Definition: GVNSink.cpp:923
Atomic ordering constants.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:449
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:78
This is the interface for a simple mod/ref and alias analysis over globals.
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
Implements a dense probed hash-table based set.
Definition: DenseSet.h:221
bool operator>(int64_t V1, const APSInt &V2)
Definition: APSInt.h:327
This class represents a function call, abstracting a target machine&#39;s calling convention.
bool onlyReadsMemory() const
Determine if the call does not access or only reads memory.
Recycle small arrays allocated from a BumpPtrAllocator.
Definition: ArrayRecycler.h:29
const Use & getOperandUse(unsigned i) const
Definition: User.h:167
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:767
STATISTIC(NumFunctions, "Total number of functions")
F(f)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
Definition: GVNSink.cpp:907
bool operator==(const Expression &Other) const
Definition: GVNExpression.h:78
This defines the Use class.
iterator end()
Get an iterator to the end of the SetVector.
Definition: SetVector.h:93
This file defines the MallocAllocator and BumpPtrAllocator interfaces.
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:252
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:3641
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
#define Fail
raw_ostream & operator<<(raw_ostream &OS, const Expression &E)
INITIALIZE_PASS_BEGIN(GVNSinkLegacyPass, "gvn-sink", "Early GVN sinking of Expressions", false, false) INITIALIZE_PASS_END(GVNSinkLegacyPass
static const uint16_t * lookup(unsigned opcode, unsigned domain, ArrayRef< uint16_t[3]> Table)
bool remove(const value_type &X)
Remove an item from the set vector.
Definition: SetVector.h:158
ELFYAML::ELF_STO Other
Definition: ELFYAML.cpp:736
APInt operator*(APInt a, uint64_t RHS)
Definition: APInt.h:2070
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:142
static bool isEqual(const Function &Caller, const Function &Callee)
The core GVN pass object.
Definition: GVN.h:68
void andIRFlags(const Value *V)
Logical &#39;and&#39; of any supported wrapping, exact, and fast-math flags of V and this instruction...
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:83
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
LLVM_DUMP_METHOD void dump() const
Definition: GVNSink.cpp:91
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:430
virtual hash_code getHashValue() const
bool onlyReadsMemory() const
Determine if the call does not access or only reads memory.
Value * getOperand(unsigned i) const
Definition: User.h:154
static bool runOnFunction(Function &F, bool PostInlining)
#define P(N)
void print(raw_ostream &OS) const
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:200
static bool sinkLastInstruction(ArrayRef< BasicBlock *> Blocks)
void set(Value *Val)
Definition: Value.h:677
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
Allocate memory in an ever growing pool, as if by bump-pointer.
Definition: Allocator.h:138
This file provides the interface for LLVM&#39;s Global Value Numbering pass which eliminates fully redund...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:116
bool canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx)
Given an instruction, is it legal to set operand OpIdx to a non-constant value?
Definition: Local.cpp:2230
This file contains the declarations for the subclasses of Constant, which represent the different fla...
const Instruction & front() const
Definition: BasicBlock.h:264
#define H(x, y, z)
Definition: MD5.cpp:57
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
Represent the analysis usage information of a pass.
bool any_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:774
Analysis pass providing a never-invalidated alias analysis result.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:853
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock *> Preds, const char *Suffix, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
self_iterator getIterator()
Definition: ilist_node.h:82
static unsigned getTombstoneKey()
Definition: GVNExpression.h:75
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1320
iterator erase(const_iterator CI)
Definition: SmallVector.h:449
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
static wasm::ValType getType(const TargetRegisterClass *RC)
void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
Definition: AsmWriter.cpp:3573
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
unsigned first
hash_code getHashValue() const override
Basic Register Allocator
Machine code sinking
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:298
unsigned getNumOperands() const
Definition: User.h:176
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
void setOpcode(unsigned opcode)
auto find(R &&Range, const T &Val) -> decltype(std::begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:788
Recycler - This class manages a linked-list of deallocated nodes and facilitates reusing deallocated ...
Definition: Recycler.h:35
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
gvn Early GVN sinking of Expressions
Definition: GVNSink.cpp:923
const DataFlowGraph & G
Definition: RDFGraph.cpp:211
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:110
void clear(AllocatorType &Allocator)
Release all the tracked allocations to the allocator.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition: Hashing.h:602
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition: Hashing.h:480
An opaque object representing a hash code.
Definition: Hashing.h:72
static void clear(coro::Shape &Shape)
Definition: Coroutines.cpp:210
unsigned getNumUses() const
This method computes the number of uses of this Value.
Definition: Value.cpp:166
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:120
void emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:656
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:61
FunctionPass * createGVNSinkPass()
Definition: GVNSink.cpp:926
bool isTokenTy() const
Return true if this is &#39;token&#39;.
Definition: Type.h:194
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:220
void initializeGVNSinkLegacyPassPass(PassRegistry &)
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:174
void combineMetadataForCSE(Instruction *K, const Instruction *J)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:1829
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:73
The header file for the GVN pass that contains expression handling classes.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:88
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:44
Invoke instruction.
#define DEBUG(X)
Definition: Debug.h:118
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:538
A container for analyses that lazily runs them and caches their results.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:267
void sort(Policy policy, RandomAccessIterator Start, RandomAccessIterator End, const Comparator &Comp=Comparator())
Definition: Parallel.h:199
This header defines various interfaces for pass management in LLVM.
const BasicBlock * getParent() const
Definition: Instruction.h:66
std::string utohexstr(uint64_t X, bool LowerCase=false)
Definition: StringExtras.h:76