LLVM  10.0.0svn
GVNSink.cpp
Go to the documentation of this file.
1 //===- GVNSink.cpp - sink expressions into successors ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file GVNSink.cpp
10 /// This pass attempts to sink instructions into successors, reducing static
11 /// instruction count and enabling if-conversion.
12 ///
13 /// We use a variant of global value numbering to decide what can be sunk.
14 /// Consider:
15 ///
16 /// [ %a1 = add i32 %b, 1 ] [ %c1 = add i32 %d, 1 ]
17 /// [ %a2 = xor i32 %a1, 1 ] [ %c2 = xor i32 %c1, 1 ]
18 /// \ /
19 /// [ %e = phi i32 %a2, %c2 ]
20 /// [ add i32 %e, 4 ]
21 ///
22 ///
23 /// GVN would number %a1 and %c1 differently because they compute different
24 /// results - the VN of an instruction is a function of its opcode and the
25 /// transitive closure of its operands. This is the key property for hoisting
26 /// and CSE.
27 ///
28 /// What we want when sinking however is for a numbering that is a function of
29 /// the *uses* of an instruction, which allows us to answer the question "if I
30 /// replace %a1 with %c1, will it contribute in an equivalent way to all
31 /// successive instructions?". The PostValueTable class in GVN provides this
32 /// mapping.
33 //
34 //===----------------------------------------------------------------------===//
35 
36 #include "llvm/ADT/ArrayRef.h"
37 #include "llvm/ADT/DenseMap.h"
38 #include "llvm/ADT/DenseMapInfo.h"
39 #include "llvm/ADT/DenseSet.h"
40 #include "llvm/ADT/Hashing.h"
41 #include "llvm/ADT/None.h"
42 #include "llvm/ADT/Optional.h"
44 #include "llvm/ADT/STLExtras.h"
45 #include "llvm/ADT/SmallPtrSet.h"
46 #include "llvm/ADT/SmallVector.h"
47 #include "llvm/ADT/Statistic.h"
48 #include "llvm/ADT/StringExtras.h"
51 #include "llvm/IR/BasicBlock.h"
52 #include "llvm/IR/CFG.h"
53 #include "llvm/IR/Constants.h"
54 #include "llvm/IR/Function.h"
55 #include "llvm/IR/InstrTypes.h"
56 #include "llvm/IR/Instruction.h"
57 #include "llvm/IR/Instructions.h"
58 #include "llvm/IR/PassManager.h"
59 #include "llvm/IR/Type.h"
60 #include "llvm/IR/Use.h"
61 #include "llvm/IR/Value.h"
62 #include "llvm/Pass.h"
63 #include "llvm/Support/Allocator.h"
66 #include "llvm/Support/Casting.h"
67 #include "llvm/Support/Compiler.h"
68 #include "llvm/Support/Debug.h"
70 #include "llvm/Transforms/Scalar.h"
74 #include <algorithm>
75 #include <cassert>
76 #include <cstddef>
77 #include <cstdint>
78 #include <iterator>
79 #include <utility>
80 
81 using namespace llvm;
82 
83 #define DEBUG_TYPE "gvn-sink"
84 
85 STATISTIC(NumRemoved, "Number of instructions removed");
86 
87 namespace llvm {
88 namespace GVNExpression {
89 
91  print(dbgs());
92  dbgs() << "\n";
93 }
94 
95 } // end namespace GVNExpression
96 } // end namespace llvm
97 
98 namespace {
99 
100 static bool isMemoryInst(const Instruction *I) {
101  return isa<LoadInst>(I) || isa<StoreInst>(I) ||
102  (isa<InvokeInst>(I) && !cast<InvokeInst>(I)->doesNotAccessMemory()) ||
103  (isa<CallInst>(I) && !cast<CallInst>(I)->doesNotAccessMemory());
104 }
105 
106 /// Iterates through instructions in a set of blocks in reverse order from the
107 /// first non-terminator. For example (assume all blocks have size n):
108 /// LockstepReverseIterator I([B1, B2, B3]);
109 /// *I-- = [B1[n], B2[n], B3[n]];
110 /// *I-- = [B1[n-1], B2[n-1], B3[n-1]];
111 /// *I-- = [B1[n-2], B2[n-2], B3[n-2]];
112 /// ...
113 ///
114 /// It continues until all blocks have been exhausted. Use \c getActiveBlocks()
115 /// to
116 /// determine which blocks are still going and the order they appear in the
117 /// list returned by operator*.
118 class LockstepReverseIterator {
119  ArrayRef<BasicBlock *> Blocks;
120  SmallSetVector<BasicBlock *, 4> ActiveBlocks;
122  bool Fail;
123 
124 public:
125  LockstepReverseIterator(ArrayRef<BasicBlock *> Blocks) : Blocks(Blocks) {
126  reset();
127  }
128 
129  void reset() {
130  Fail = false;
131  ActiveBlocks.clear();
132  for (BasicBlock *BB : Blocks)
133  ActiveBlocks.insert(BB);
134  Insts.clear();
135  for (BasicBlock *BB : Blocks) {
136  if (BB->size() <= 1) {
137  // Block wasn't big enough - only contained a terminator.
138  ActiveBlocks.remove(BB);
139  continue;
140  }
141  Insts.push_back(BB->getTerminator()->getPrevNode());
142  }
143  if (Insts.empty())
144  Fail = true;
145  }
146 
147  bool isValid() const { return !Fail; }
148  ArrayRef<Instruction *> operator*() const { return Insts; }
149 
150  // Note: This needs to return a SmallSetVector as the elements of
151  // ActiveBlocks will be later copied to Blocks using std::copy. The
152  // resultant order of elements in Blocks needs to be deterministic.
153  // Using SmallPtrSet instead causes non-deterministic order while
154  // copying. And we cannot simply sort Blocks as they need to match the
155  // corresponding Values.
156  SmallSetVector<BasicBlock *, 4> &getActiveBlocks() { return ActiveBlocks; }
157 
158  void restrictToBlocks(SmallSetVector<BasicBlock *, 4> &Blocks) {
159  for (auto II = Insts.begin(); II != Insts.end();) {
160  if (std::find(Blocks.begin(), Blocks.end(), (*II)->getParent()) ==
161  Blocks.end()) {
162  ActiveBlocks.remove((*II)->getParent());
163  II = Insts.erase(II);
164  } else {
165  ++II;
166  }
167  }
168  }
169 
170  void operator--() {
171  if (Fail)
172  return;
174  for (auto *Inst : Insts) {
175  if (Inst == &Inst->getParent()->front())
176  ActiveBlocks.remove(Inst->getParent());
177  else
178  NewInsts.push_back(Inst->getPrevNode());
179  }
180  if (NewInsts.empty()) {
181  Fail = true;
182  return;
183  }
184  Insts = NewInsts;
185  }
186 };
187 
188 //===----------------------------------------------------------------------===//
189 
190 /// Candidate solution for sinking. There may be different ways to
191 /// sink instructions, differing in the number of instructions sunk,
192 /// the number of predecessors sunk from and the number of PHIs
193 /// required.
194 struct SinkingInstructionCandidate {
195  unsigned NumBlocks;
196  unsigned NumInstructions;
197  unsigned NumPHIs;
198  unsigned NumMemoryInsts;
199  int Cost = -1;
201 
202  void calculateCost(unsigned NumOrigPHIs, unsigned NumOrigBlocks) {
203  unsigned NumExtraPHIs = NumPHIs - NumOrigPHIs;
204  unsigned SplitEdgeCost = (NumOrigBlocks > NumBlocks) ? 2 : 0;
205  Cost = (NumInstructions * (NumBlocks - 1)) -
206  (NumExtraPHIs *
207  NumExtraPHIs) // PHIs are expensive, so make sure they're worth it.
208  - SplitEdgeCost;
209  }
210 
211  bool operator>(const SinkingInstructionCandidate &Other) const {
212  return Cost > Other.Cost;
213  }
214 };
215 
216 #ifndef NDEBUG
217 raw_ostream &operator<<(raw_ostream &OS, const SinkingInstructionCandidate &C) {
218  OS << "<Candidate Cost=" << C.Cost << " #Blocks=" << C.NumBlocks
219  << " #Insts=" << C.NumInstructions << " #PHIs=" << C.NumPHIs << ">";
220  return OS;
221 }
222 #endif
223 
224 //===----------------------------------------------------------------------===//
225 
226 /// Describes a PHI node that may or may not exist. These track the PHIs
227 /// that must be created if we sunk a sequence of instructions. It provides
228 /// a hash function for efficient equality comparisons.
229 class ModelledPHI {
232 
233 public:
234  ModelledPHI() = default;
235 
236  ModelledPHI(const PHINode *PN) {
237  // BasicBlock comes first so we sort by basic block pointer order, then by value pointer order.
239  for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I)
240  Ops.push_back({PN->getIncomingBlock(I), PN->getIncomingValue(I)});
241  llvm::sort(Ops);
242  for (auto &P : Ops) {
243  Blocks.push_back(P.first);
244  Values.push_back(P.second);
245  }
246  }
247 
248  /// Create a dummy ModelledPHI that will compare unequal to any other ModelledPHI
249  /// without the same ID.
250  /// \note This is specifically for DenseMapInfo - do not use this!
251  static ModelledPHI createDummy(size_t ID) {
252  ModelledPHI M;
253  M.Values.push_back(reinterpret_cast<Value*>(ID));
254  return M;
255  }
256 
257  /// Create a PHI from an array of incoming values and incoming blocks.
258  template <typename VArray, typename BArray>
259  ModelledPHI(const VArray &V, const BArray &B) {
260  llvm::copy(V, std::back_inserter(Values));
261  llvm::copy(B, std::back_inserter(Blocks));
262  }
263 
264  /// Create a PHI from [I[OpNum] for I in Insts].
265  template <typename BArray>
266  ModelledPHI(ArrayRef<Instruction *> Insts, unsigned OpNum, const BArray &B) {
267  llvm::copy(B, std::back_inserter(Blocks));
268  for (auto *I : Insts)
269  Values.push_back(I->getOperand(OpNum));
270  }
271 
272  /// Restrict the PHI's contents down to only \c NewBlocks.
273  /// \c NewBlocks must be a subset of \c this->Blocks.
274  void restrictToBlocks(const SmallSetVector<BasicBlock *, 4> &NewBlocks) {
275  auto BI = Blocks.begin();
276  auto VI = Values.begin();
277  while (BI != Blocks.end()) {
278  assert(VI != Values.end());
279  if (std::find(NewBlocks.begin(), NewBlocks.end(), *BI) ==
280  NewBlocks.end()) {
281  BI = Blocks.erase(BI);
282  VI = Values.erase(VI);
283  } else {
284  ++BI;
285  ++VI;
286  }
287  }
288  assert(Blocks.size() == NewBlocks.size());
289  }
290 
291  ArrayRef<Value *> getValues() const { return Values; }
292 
293  bool areAllIncomingValuesSame() const {
294  return llvm::all_of(Values, [&](Value *V) { return V == Values[0]; });
295  }
296 
297  bool areAllIncomingValuesSameType() const {
298  return llvm::all_of(
299  Values, [&](Value *V) { return V->getType() == Values[0]->getType(); });
300  }
301 
302  bool areAnyIncomingValuesConstant() const {
303  return llvm::any_of(Values, [&](Value *V) { return isa<Constant>(V); });
304  }
305 
306  // Hash functor
307  unsigned hash() const {
308  return (unsigned)hash_combine_range(Values.begin(), Values.end());
309  }
310 
311  bool operator==(const ModelledPHI &Other) const {
312  return Values == Other.Values && Blocks == Other.Blocks;
313  }
314 };
315 
316 template <typename ModelledPHI> struct DenseMapInfo {
317  static inline ModelledPHI &getEmptyKey() {
318  static ModelledPHI Dummy = ModelledPHI::createDummy(0);
319  return Dummy;
320  }
321 
322  static inline ModelledPHI &getTombstoneKey() {
323  static ModelledPHI Dummy = ModelledPHI::createDummy(1);
324  return Dummy;
325  }
326 
327  static unsigned getHashValue(const ModelledPHI &V) { return V.hash(); }
328 
329  static bool isEqual(const ModelledPHI &LHS, const ModelledPHI &RHS) {
330  return LHS == RHS;
331  }
332 };
333 
335 
336 //===----------------------------------------------------------------------===//
337 // ValueTable
338 //===----------------------------------------------------------------------===//
339 // This is a value number table where the value number is a function of the
340 // *uses* of a value, rather than its operands. Thus, if VN(A) == VN(B) we know
341 // that the program would be equivalent if we replaced A with PHI(A, B).
342 //===----------------------------------------------------------------------===//
343 
344 /// A GVN expression describing how an instruction is used. The operands
345 /// field of BasicExpression is used to store uses, not operands.
346 ///
347 /// This class also contains fields for discriminators used when determining
348 /// equivalence of instructions with sideeffects.
349 class InstructionUseExpr : public GVNExpression::BasicExpression {
350  unsigned MemoryUseOrder = -1;
351  bool Volatile = false;
352 
353 public:
354  InstructionUseExpr(Instruction *I, ArrayRecycler<Value *> &R,
355  BumpPtrAllocator &A)
357  allocateOperands(R, A);
358  setOpcode(I->getOpcode());
359  setType(I->getType());
360 
361  for (auto &U : I->uses())
362  op_push_back(U.getUser());
363  llvm::sort(op_begin(), op_end());
364  }
365 
366  void setMemoryUseOrder(unsigned MUO) { MemoryUseOrder = MUO; }
367  void setVolatile(bool V) { Volatile = V; }
368 
369  hash_code getHashValue() const override {
371  MemoryUseOrder, Volatile);
372  }
373 
374  template <typename Function> hash_code getHashValue(Function MapFn) {
375  hash_code H =
376  hash_combine(getOpcode(), getType(), MemoryUseOrder, Volatile);
377  for (auto *V : operands())
378  H = hash_combine(H, MapFn(V));
379  return H;
380  }
381 };
382 
383 class ValueTable {
384  DenseMap<Value *, uint32_t> ValueNumbering;
386  DenseMap<size_t, uint32_t> HashNumbering;
389  uint32_t nextValueNumber = 1;
390 
391  /// Create an expression for I based on its opcode and its uses. If I
392  /// touches or reads memory, the expression is also based upon its memory
393  /// order - see \c getMemoryUseOrder().
394  InstructionUseExpr *createExpr(Instruction *I) {
395  InstructionUseExpr *E =
396  new (Allocator) InstructionUseExpr(I, Recycler, Allocator);
397  if (isMemoryInst(I))
398  E->setMemoryUseOrder(getMemoryUseOrder(I));
399 
400  if (CmpInst *C = dyn_cast<CmpInst>(I)) {
401  CmpInst::Predicate Predicate = C->getPredicate();
402  E->setOpcode((C->getOpcode() << 8) | Predicate);
403  }
404  return E;
405  }
406 
407  /// Helper to compute the value number for a memory instruction
408  /// (LoadInst/StoreInst), including checking the memory ordering and
409  /// volatility.
410  template <class Inst> InstructionUseExpr *createMemoryExpr(Inst *I) {
411  if (isStrongerThanUnordered(I->getOrdering()) || I->isAtomic())
412  return nullptr;
413  InstructionUseExpr *E = createExpr(I);
414  E->setVolatile(I->isVolatile());
415  return E;
416  }
417 
418 public:
419  ValueTable() = default;
420 
421  /// Returns the value number for the specified value, assigning
422  /// it a new number if it did not have one before.
423  uint32_t lookupOrAdd(Value *V) {
424  auto VI = ValueNumbering.find(V);
425  if (VI != ValueNumbering.end())
426  return VI->second;
427 
428  if (!isa<Instruction>(V)) {
429  ValueNumbering[V] = nextValueNumber;
430  return nextValueNumber++;
431  }
432 
433  Instruction *I = cast<Instruction>(V);
434  InstructionUseExpr *exp = nullptr;
435  switch (I->getOpcode()) {
436  case Instruction::Load:
437  exp = createMemoryExpr(cast<LoadInst>(I));
438  break;
439  case Instruction::Store:
440  exp = createMemoryExpr(cast<StoreInst>(I));
441  break;
442  case Instruction::Call:
443  case Instruction::Invoke:
444  case Instruction::FNeg:
445  case Instruction::Add:
446  case Instruction::FAdd:
447  case Instruction::Sub:
448  case Instruction::FSub:
449  case Instruction::Mul:
450  case Instruction::FMul:
451  case Instruction::UDiv:
452  case Instruction::SDiv:
453  case Instruction::FDiv:
454  case Instruction::URem:
455  case Instruction::SRem:
456  case Instruction::FRem:
457  case Instruction::Shl:
458  case Instruction::LShr:
459  case Instruction::AShr:
460  case Instruction::And:
461  case Instruction::Or:
462  case Instruction::Xor:
463  case Instruction::ICmp:
464  case Instruction::FCmp:
465  case Instruction::Trunc:
466  case Instruction::ZExt:
467  case Instruction::SExt:
468  case Instruction::FPToUI:
469  case Instruction::FPToSI:
470  case Instruction::UIToFP:
471  case Instruction::SIToFP:
472  case Instruction::FPTrunc:
473  case Instruction::FPExt:
474  case Instruction::PtrToInt:
475  case Instruction::IntToPtr:
476  case Instruction::BitCast:
477  case Instruction::Select:
478  case Instruction::ExtractElement:
479  case Instruction::InsertElement:
480  case Instruction::ShuffleVector:
481  case Instruction::InsertValue:
482  case Instruction::GetElementPtr:
483  exp = createExpr(I);
484  break;
485  default:
486  break;
487  }
488 
489  if (!exp) {
490  ValueNumbering[V] = nextValueNumber;
491  return nextValueNumber++;
492  }
493 
494  uint32_t e = ExpressionNumbering[exp];
495  if (!e) {
496  hash_code H = exp->getHashValue([=](Value *V) { return lookupOrAdd(V); });
497  auto I = HashNumbering.find(H);
498  if (I != HashNumbering.end()) {
499  e = I->second;
500  } else {
501  e = nextValueNumber++;
502  HashNumbering[H] = e;
503  ExpressionNumbering[exp] = e;
504  }
505  }
506  ValueNumbering[V] = e;
507  return e;
508  }
509 
510  /// Returns the value number of the specified value. Fails if the value has
511  /// not yet been numbered.
512  uint32_t lookup(Value *V) const {
513  auto VI = ValueNumbering.find(V);
514  assert(VI != ValueNumbering.end() && "Value not numbered?");
515  return VI->second;
516  }
517 
518  /// Removes all value numberings and resets the value table.
519  void clear() {
520  ValueNumbering.clear();
521  ExpressionNumbering.clear();
522  HashNumbering.clear();
523  Recycler.clear(Allocator);
524  nextValueNumber = 1;
525  }
526 
527  /// \c Inst uses or touches memory. Return an ID describing the memory state
528  /// at \c Inst such that if getMemoryUseOrder(I1) == getMemoryUseOrder(I2),
529  /// the exact same memory operations happen after I1 and I2.
530  ///
531  /// This is a very hard problem in general, so we use domain-specific
532  /// knowledge that we only ever check for equivalence between blocks sharing a
533  /// single immediate successor that is common, and when determining if I1 ==
534  /// I2 we will have already determined that next(I1) == next(I2). This
535  /// inductive property allows us to simply return the value number of the next
536  /// instruction that defines memory.
537  uint32_t getMemoryUseOrder(Instruction *Inst) {
538  auto *BB = Inst->getParent();
539  for (auto I = std::next(Inst->getIterator()), E = BB->end();
540  I != E && !I->isTerminator(); ++I) {
541  if (!isMemoryInst(&*I))
542  continue;
543  if (isa<LoadInst>(&*I))
544  continue;
545  CallInst *CI = dyn_cast<CallInst>(&*I);
546  if (CI && CI->onlyReadsMemory())
547  continue;
548  InvokeInst *II = dyn_cast<InvokeInst>(&*I);
549  if (II && II->onlyReadsMemory())
550  continue;
551  return lookupOrAdd(&*I);
552  }
553  return 0;
554  }
555 };
556 
557 //===----------------------------------------------------------------------===//
558 
559 class GVNSink {
560 public:
561  GVNSink() = default;
562 
563  bool run(Function &F) {
564  LLVM_DEBUG(dbgs() << "GVNSink: running on function @" << F.getName()
565  << "\n");
566 
567  unsigned NumSunk = 0;
569  for (auto *N : RPOT)
570  NumSunk += sinkBB(N);
571 
572  return NumSunk > 0;
573  }
574 
575 private:
576  ValueTable VN;
577 
578  bool isInstructionBlacklisted(Instruction *I) {
579  // These instructions may change or break semantics if moved.
580  if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
581  I->getType()->isTokenTy())
582  return true;
583  return false;
584  }
585 
586  /// The main heuristic function. Analyze the set of instructions pointed to by
587  /// LRI and return a candidate solution if these instructions can be sunk, or
588  /// None otherwise.
589  Optional<SinkingInstructionCandidate> analyzeInstructionForSinking(
590  LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum,
591  ModelledPHISet &NeededPHIs, SmallPtrSetImpl<Value *> &PHIContents);
592 
593  /// Create a ModelledPHI for each PHI in BB, adding to PHIs.
594  void analyzeInitialPHIs(BasicBlock *BB, ModelledPHISet &PHIs,
595  SmallPtrSetImpl<Value *> &PHIContents) {
596  for (PHINode &PN : BB->phis()) {
597  auto MPHI = ModelledPHI(&PN);
598  PHIs.insert(MPHI);
599  for (auto *V : MPHI.getValues())
600  PHIContents.insert(V);
601  }
602  }
603 
604  /// The main instruction sinking driver. Set up state and try and sink
605  /// instructions into BBEnd from its predecessors.
606  unsigned sinkBB(BasicBlock *BBEnd);
607 
608  /// Perform the actual mechanics of sinking an instruction from Blocks into
609  /// BBEnd, which is their only successor.
611 
612  /// Remove PHIs that all have the same incoming value.
613  void foldPointlessPHINodes(BasicBlock *BB) {
614  auto I = BB->begin();
615  while (PHINode *PN = dyn_cast<PHINode>(I++)) {
616  if (!llvm::all_of(PN->incoming_values(), [&](const Value *V) {
617  return V == PN->getIncomingValue(0);
618  }))
619  continue;
620  if (PN->getIncomingValue(0) != PN)
621  PN->replaceAllUsesWith(PN->getIncomingValue(0));
622  else
623  PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
624  PN->eraseFromParent();
625  }
626  }
627 };
628 
629 Optional<SinkingInstructionCandidate> GVNSink::analyzeInstructionForSinking(
630  LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum,
631  ModelledPHISet &NeededPHIs, SmallPtrSetImpl<Value *> &PHIContents) {
632  auto Insts = *LRI;
633  LLVM_DEBUG(dbgs() << " -- Analyzing instruction set: [\n"; for (auto *I
634  : Insts) {
635  I->dump();
636  } dbgs() << " ]\n";);
637 
639  for (auto *I : Insts) {
640  uint32_t N = VN.lookupOrAdd(I);
641  LLVM_DEBUG(dbgs() << " VN=" << Twine::utohexstr(N) << " for" << *I << "\n");
642  if (N == ~0U)
643  return None;
644  VNums[N]++;
645  }
646  unsigned VNumToSink =
647  std::max_element(VNums.begin(), VNums.end(),
648  [](const std::pair<uint32_t, unsigned> &I,
649  const std::pair<uint32_t, unsigned> &J) {
650  return I.second < J.second;
651  })
652  ->first;
653 
654  if (VNums[VNumToSink] == 1)
655  // Can't sink anything!
656  return None;
657 
658  // Now restrict the number of incoming blocks down to only those with
659  // VNumToSink.
660  auto &ActivePreds = LRI.getActiveBlocks();
661  unsigned InitialActivePredSize = ActivePreds.size();
663  for (auto *I : Insts) {
664  if (VN.lookup(I) != VNumToSink)
665  ActivePreds.remove(I->getParent());
666  else
667  NewInsts.push_back(I);
668  }
669  for (auto *I : NewInsts)
670  if (isInstructionBlacklisted(I))
671  return None;
672 
673  // If we've restricted the incoming blocks, restrict all needed PHIs also
674  // to that set.
675  bool RecomputePHIContents = false;
676  if (ActivePreds.size() != InitialActivePredSize) {
677  ModelledPHISet NewNeededPHIs;
678  for (auto P : NeededPHIs) {
679  P.restrictToBlocks(ActivePreds);
680  NewNeededPHIs.insert(P);
681  }
682  NeededPHIs = NewNeededPHIs;
683  LRI.restrictToBlocks(ActivePreds);
684  RecomputePHIContents = true;
685  }
686 
687  // The sunk instruction's results.
688  ModelledPHI NewPHI(NewInsts, ActivePreds);
689 
690  // Does sinking this instruction render previous PHIs redundant?
691  if (NeededPHIs.find(NewPHI) != NeededPHIs.end()) {
692  NeededPHIs.erase(NewPHI);
693  RecomputePHIContents = true;
694  }
695 
696  if (RecomputePHIContents) {
697  // The needed PHIs have changed, so recompute the set of all needed
698  // values.
699  PHIContents.clear();
700  for (auto &PHI : NeededPHIs)
701  PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end());
702  }
703 
704  // Is this instruction required by a later PHI that doesn't match this PHI?
705  // if so, we can't sink this instruction.
706  for (auto *V : NewPHI.getValues())
707  if (PHIContents.count(V))
708  // V exists in this PHI, but the whole PHI is different to NewPHI
709  // (else it would have been removed earlier). We cannot continue
710  // because this isn't representable.
711  return None;
712 
713  // Which operands need PHIs?
714  // FIXME: If any of these fail, we should partition up the candidates to
715  // try and continue making progress.
716  Instruction *I0 = NewInsts[0];
717 
718  // If all instructions that are going to participate don't have the same
719  // number of operands, we can't do any useful PHI analysis for all operands.
720  auto hasDifferentNumOperands = [&I0](Instruction *I) {
721  return I->getNumOperands() != I0->getNumOperands();
722  };
723  if (any_of(NewInsts, hasDifferentNumOperands))
724  return None;
725 
726  for (unsigned OpNum = 0, E = I0->getNumOperands(); OpNum != E; ++OpNum) {
727  ModelledPHI PHI(NewInsts, OpNum, ActivePreds);
728  if (PHI.areAllIncomingValuesSame())
729  continue;
730  if (!canReplaceOperandWithVariable(I0, OpNum))
731  // We can 't create a PHI from this instruction!
732  return None;
733  if (NeededPHIs.count(PHI))
734  continue;
735  if (!PHI.areAllIncomingValuesSameType())
736  return None;
737  // Don't create indirect calls! The called value is the final operand.
738  if ((isa<CallInst>(I0) || isa<InvokeInst>(I0)) && OpNum == E - 1 &&
739  PHI.areAnyIncomingValuesConstant())
740  return None;
741 
742  NeededPHIs.reserve(NeededPHIs.size());
743  NeededPHIs.insert(PHI);
744  PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end());
745  }
746 
747  if (isMemoryInst(NewInsts[0]))
748  ++MemoryInstNum;
749 
750  SinkingInstructionCandidate Cand;
751  Cand.NumInstructions = ++InstNum;
752  Cand.NumMemoryInsts = MemoryInstNum;
753  Cand.NumBlocks = ActivePreds.size();
754  Cand.NumPHIs = NeededPHIs.size();
755  for (auto *C : ActivePreds)
756  Cand.Blocks.push_back(C);
757 
758  return Cand;
759 }
760 
761 unsigned GVNSink::sinkBB(BasicBlock *BBEnd) {
762  LLVM_DEBUG(dbgs() << "GVNSink: running on basic block ";
763  BBEnd->printAsOperand(dbgs()); dbgs() << "\n");
765  for (auto *B : predecessors(BBEnd)) {
766  auto *T = B->getTerminator();
767  if (isa<BranchInst>(T) || isa<SwitchInst>(T))
768  Preds.push_back(B);
769  else
770  return 0;
771  }
772  if (Preds.size() < 2)
773  return 0;
774  llvm::sort(Preds);
775 
776  unsigned NumOrigPreds = Preds.size();
777  // We can only sink instructions through unconditional branches.
778  for (auto I = Preds.begin(); I != Preds.end();) {
779  if ((*I)->getTerminator()->getNumSuccessors() != 1)
780  I = Preds.erase(I);
781  else
782  ++I;
783  }
784 
785  LockstepReverseIterator LRI(Preds);
787  unsigned InstNum = 0, MemoryInstNum = 0;
788  ModelledPHISet NeededPHIs;
789  SmallPtrSet<Value *, 4> PHIContents;
790  analyzeInitialPHIs(BBEnd, NeededPHIs, PHIContents);
791  unsigned NumOrigPHIs = NeededPHIs.size();
792 
793  while (LRI.isValid()) {
794  auto Cand = analyzeInstructionForSinking(LRI, InstNum, MemoryInstNum,
795  NeededPHIs, PHIContents);
796  if (!Cand)
797  break;
798  Cand->calculateCost(NumOrigPHIs, Preds.size());
799  Candidates.emplace_back(*Cand);
800  --LRI;
801  }
802 
803  llvm::stable_sort(Candidates, std::greater<SinkingInstructionCandidate>());
804  LLVM_DEBUG(dbgs() << " -- Sinking candidates:\n"; for (auto &C
805  : Candidates) dbgs()
806  << " " << C << "\n";);
807 
808  // Pick the top candidate, as long it is positive!
809  if (Candidates.empty() || Candidates.front().Cost <= 0)
810  return 0;
811  auto C = Candidates.front();
812 
813  LLVM_DEBUG(dbgs() << " -- Sinking: " << C << "\n");
814  BasicBlock *InsertBB = BBEnd;
815  if (C.Blocks.size() < NumOrigPreds) {
816  LLVM_DEBUG(dbgs() << " -- Splitting edge to ";
817  BBEnd->printAsOperand(dbgs()); dbgs() << "\n");
818  InsertBB = SplitBlockPredecessors(BBEnd, C.Blocks, ".gvnsink.split");
819  if (!InsertBB) {
820  LLVM_DEBUG(dbgs() << " -- FAILED to split edge!\n");
821  // Edge couldn't be split.
822  return 0;
823  }
824  }
825 
826  for (unsigned I = 0; I < C.NumInstructions; ++I)
827  sinkLastInstruction(C.Blocks, InsertBB);
828 
829  return C.NumInstructions;
830 }
831 
833  BasicBlock *BBEnd) {
835  for (BasicBlock *BB : Blocks)
836  Insts.push_back(BB->getTerminator()->getPrevNode());
837  Instruction *I0 = Insts.front();
838 
839  SmallVector<Value *, 4> NewOperands;
840  for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
841  bool NeedPHI = llvm::any_of(Insts, [&I0, O](const Instruction *I) {
842  return I->getOperand(O) != I0->getOperand(O);
843  });
844  if (!NeedPHI) {
845  NewOperands.push_back(I0->getOperand(O));
846  continue;
847  }
848 
849  // Create a new PHI in the successor block and populate it.
850  auto *Op = I0->getOperand(O);
851  assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
852  auto *PN = PHINode::Create(Op->getType(), Insts.size(),
853  Op->getName() + ".sink", &BBEnd->front());
854  for (auto *I : Insts)
855  PN->addIncoming(I->getOperand(O), I->getParent());
856  NewOperands.push_back(PN);
857  }
858 
859  // Arbitrarily use I0 as the new "common" instruction; remap its operands
860  // and move it to the start of the successor block.
861  for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
862  I0->getOperandUse(O).set(NewOperands[O]);
863  I0->moveBefore(&*BBEnd->getFirstInsertionPt());
864 
865  // Update metadata and IR flags.
866  for (auto *I : Insts)
867  if (I != I0) {
868  combineMetadataForCSE(I0, I, true);
869  I0->andIRFlags(I);
870  }
871 
872  for (auto *I : Insts)
873  if (I != I0)
874  I->replaceAllUsesWith(I0);
875  foldPointlessPHINodes(BBEnd);
876 
877  // Finally nuke all instructions apart from the common instruction.
878  for (auto *I : Insts)
879  if (I != I0)
880  I->eraseFromParent();
881 
882  NumRemoved += Insts.size() - 1;
883 }
884 
885 ////////////////////////////////////////////////////////////////////////////////
886 // Pass machinery / boilerplate
887 
888 class GVNSinkLegacyPass : public FunctionPass {
889 public:
890  static char ID;
891 
892  GVNSinkLegacyPass() : FunctionPass(ID) {
894  }
895 
896  bool runOnFunction(Function &F) override {
897  if (skipFunction(F))
898  return false;
899  GVNSink G;
900  return G.run(F);
901  }
902 
903  void getAnalysisUsage(AnalysisUsage &AU) const override {
905  }
906 };
907 
908 } // end anonymous namespace
909 
911  GVNSink G;
912  if (!G.run(F))
913  return PreservedAnalyses::all();
914 
916  PA.preserve<GlobalsAA>();
917  return PA;
918 }
919 
920 char GVNSinkLegacyPass::ID = 0;
921 
922 INITIALIZE_PASS_BEGIN(GVNSinkLegacyPass, "gvn-sink",
923  "Early GVN sinking of Expressions", false, false)
926 INITIALIZE_PASS_END(GVNSinkLegacyPass, "gvn-sink",
927  "Early GVN sinking of Expressions", false, false)
928 
929 FunctionPass *llvm::createGVNSinkPass() { return new GVNSinkLegacyPass(); }
Legacy wrapper pass to provide the GlobalsAAResult object.
const NoneType None
Definition: None.h:23
uint64_t CallInst * C
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:67
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:641
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:722
iterator_range< use_iterator > uses()
Definition: Value.h:375
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
bool isStrongerThanUnordered(AtomicOrdering ao)
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
gvn sink
When an instruction is found to only be used outside of the loop, this function moves it to the exit ...
Definition: GVNSink.cpp:926
Atomic ordering constants.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:484
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
This is the interface for a simple mod/ref and alias analysis over globals.
Implements a dense probed hash-table based set.
Definition: DenseSet.h:249
bool operator>(int64_t V1, const APSInt &V2)
Definition: APSInt.h:344
This class represents a function call, abstracting a target machine&#39;s calling convention.
Optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:953
Recycle small arrays allocated from a BumpPtrAllocator.
Definition: ArrayRecycler.h:28
const Use & getOperandUse(unsigned i) const
Definition: User.h:182
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1165
STATISTIC(NumFunctions, "Total number of functions")
F(f)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
Definition: GVNSink.cpp:910
bool operator==(const Expression &Other) const
Definition: GVNExpression.h:77
This defines the Use class.
iterator end()
Get an iterator to the end of the SetVector.
Definition: SetVector.h:92
This file defines the MallocAllocator and BumpPtrAllocator interfaces.
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:273
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:4429
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
#define Fail
raw_ostream & operator<<(raw_ostream &OS, const Expression &E)
INITIALIZE_PASS_BEGIN(GVNSinkLegacyPass, "gvn-sink", "Early GVN sinking of Expressions", false, false) INITIALIZE_PASS_END(GVNSinkLegacyPass
static const uint16_t * lookup(unsigned opcode, unsigned domain, ArrayRef< uint16_t[3]> Table)
bool remove(const value_type &X)
Remove an item from the set vector.
Definition: SetVector.h:157
APInt operator*(APInt a, uint64_t RHS)
Definition: APInt.h:2099
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:246
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
The core GVN pass object.
Definition: GVN.h:68
void andIRFlags(const Value *V)
Logical &#39;and&#39; of any supported wrapping, exact, and fast-math flags of V and this instruction...
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:82
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
LLVM_DUMP_METHOD void dump() const
Definition: GVNSink.cpp:90
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:429
virtual hash_code getHashValue() const
Value * getOperand(unsigned i) const
Definition: User.h:169
static bool runOnFunction(Function &F, bool PostInlining)
#define P(N)
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock *> Preds, const char *Suffix, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
void print(raw_ostream &OS) const
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:223
static bool sinkLastInstruction(ArrayRef< BasicBlock *> Blocks)
void set(Value *Val)
Definition: Value.h:730
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
Allocate memory in an ever growing pool, as if by bump-pointer.
Definition: Allocator.h:141
This file provides the interface for LLVM&#39;s Global Value Numbering pass which eliminates fully redund...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
bool canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx)
Given an instruction, is it legal to set operand OpIdx to a non-constant value?
Definition: Local.cpp:2901
This file contains the declarations for the subclasses of Constant, which represent the different fla...
const Instruction & front() const
Definition: BasicBlock.h:285
#define H(x, y, z)
Definition: MD5.cpp:57
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
Represent the analysis usage information of a pass.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1172
Analysis pass providing a never-invalidated alias analysis result.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:732
constexpr double e
Definition: MathExtras.h:57
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
self_iterator getIterator()
Definition: ilist_node.h:81
static unsigned getTombstoneKey()
Definition: GVNExpression.h:74
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1446
iterator erase(const_iterator CI)
Definition: SmallVector.h:434
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
size_t size() const
Definition: SmallVector.h:52
static wasm::ValType getType(const TargetRegisterClass *RC)
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1186
void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
Definition: AsmWriter.cpp:4356
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
unsigned first
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1095
hash_code getHashValue() const override
Basic Register Allocator
Machine code sinking
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:297
unsigned getNumOperands() const
Definition: User.h:191
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
void setOpcode(unsigned opcode)
Recycler - This class manages a linked-list of deallocated nodes and facilitates reusing deallocated ...
Definition: Recycler.h:34
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
gvn Early GVN sinking of Expressions
Definition: GVNSink.cpp:926
const DataFlowGraph & G
Definition: RDFGraph.cpp:202
static Twine utohexstr(const uint64_t &Val)
Definition: Twine.h:387
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:124
void clear(AllocatorType &Allocator)
Release all the tracked allocations to the allocator.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition: Hashing.h:600
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition: Hashing.h:478
An opaque object representing a hash code.
Definition: Hashing.h:71
static void clear(coro::Shape &Shape)
Definition: Coroutines.cpp:225
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
unsigned getNumUses() const
This method computes the number of uses of this Value.
Definition: Value.cpp:160
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
FunctionPass * createGVNSinkPass()
Definition: GVNSink.cpp:929
bool isTokenTy() const
Return true if this is &#39;token&#39;.
Definition: Type.h:194
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
void initializeGVNSinkLegacyPassPass(PassRegistry &)
bool onlyReadsMemory(unsigned OpNo) const
Definition: InstrTypes.h:1557
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:174
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:329
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void stable_sort(R &&Range)
Definition: STLExtras.h:1289
LLVM Value Representation.
Definition: Value.h:74
The header file for the GVN pass that contains expression handling classes.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:86
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:45
Invoke instruction.
void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:2363
bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:593
A container for analyses that lazily runs them and caches their results.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:259
This header defines various interfaces for pass management in LLVM.
#define LLVM_DEBUG(X)
Definition: Debug.h:122
OutputIt copy(R &&Range, OutputIt Out)
Definition: STLExtras.h:1217
const BasicBlock * getParent() const
Definition: Instruction.h:66