LLVM  6.0.0svn
MergeICmps.cpp
Go to the documentation of this file.
1 //===- MergeICmps.cpp - Optimize chains of integer comparisons ------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass turns chains of integer comparisons into memcmp (the memcmp is
11 // later typically inlined as a chain of efficient hardware comparisons). This
12 // typically benefits c++ member or nonmember operator==().
13 //
14 // The basic idea is to replace a larger chain of integer comparisons loaded
15 // from contiguous memory locations into a smaller chain of such integer
16 // comparisons. Benefits are double:
17 // - There are less jumps, and therefore less opportunities for mispredictions
18 // and I-cache misses.
19 // - Code size is smaller, both because jumps are removed and because the
20 // encoding of a 2*n byte compare is smaller than that of two n-byte
21 // compares.
22 
23 //===----------------------------------------------------------------------===//
24 
25 #include <algorithm>
26 #include <numeric>
27 #include <utility>
28 #include <vector>
29 #include "llvm/ADT/APSInt.h"
30 #include "llvm/Analysis/Loads.h"
33 #include "llvm/IR/Function.h"
34 #include "llvm/IR/IRBuilder.h"
35 #include "llvm/IR/IntrinsicInst.h"
36 #include "llvm/Pass.h"
37 #include "llvm/Transforms/Scalar.h"
39 
40 using namespace llvm;
41 
42 namespace {
43 
44 #define DEBUG_TYPE "mergeicmps"
45 
46 // A BCE atom.
47 struct BCEAtom {
48  BCEAtom() : GEP(nullptr), LoadI(nullptr), Offset() {}
49 
50  const Value *Base() const { return GEP ? GEP->getPointerOperand() : nullptr; }
51 
52  bool operator<(const BCEAtom &O) const {
53  assert(Base() && "invalid atom");
54  assert(O.Base() && "invalid atom");
55  // Just ordering by (Base(), Offset) is sufficient. However because this
56  // means that the ordering will depend on the addresses of the base
57  // values, which are not reproducible from run to run. To guarantee
58  // stability, we use the names of the values if they exist; we sort by:
59  // (Base.getName(), Base(), Offset).
60  const int NameCmp = Base()->getName().compare(O.Base()->getName());
61  if (NameCmp == 0) {
62  if (Base() == O.Base()) {
63  return Offset.slt(O.Offset);
64  }
65  return Base() < O.Base();
66  }
67  return NameCmp < 0;
68  }
69 
71  LoadInst *LoadI;
72  APInt Offset;
73 };
74 
75 // If this value is a load from a constant offset w.r.t. a base address, and
76 // there are no othe rusers of the load or address, returns the base address and
77 // the offset.
78 BCEAtom visitICmpLoadOperand(Value *const Val) {
79  BCEAtom Result;
80  if (auto *const LoadI = dyn_cast<LoadInst>(Val)) {
81  DEBUG(dbgs() << "load\n");
82  if (LoadI->isUsedOutsideOfBlock(LoadI->getParent())) {
83  DEBUG(dbgs() << "used outside of block\n");
84  return {};
85  }
86  if (LoadI->isVolatile()) {
87  DEBUG(dbgs() << "volatile\n");
88  return {};
89  }
90  Value *const Addr = LoadI->getOperand(0);
91  if (auto *const GEP = dyn_cast<GetElementPtrInst>(Addr)) {
92  DEBUG(dbgs() << "GEP\n");
93  if (LoadI->isUsedOutsideOfBlock(LoadI->getParent())) {
94  DEBUG(dbgs() << "used outside of block\n");
95  return {};
96  }
97  const auto &DL = GEP->getModule()->getDataLayout();
98  if (!isDereferenceablePointer(GEP, DL)) {
99  DEBUG(dbgs() << "not dereferenceable\n");
100  // We need to make sure that we can do comparison in any order, so we
101  // require memory to be unconditionnally dereferencable.
102  return {};
103  }
104  Result.Offset = APInt(DL.getPointerTypeSizeInBits(GEP->getType()), 0);
105  if (GEP->accumulateConstantOffset(DL, Result.Offset)) {
106  Result.GEP = GEP;
107  Result.LoadI = LoadI;
108  }
109  }
110  }
111  return Result;
112 }
113 
114 // A basic block with a comparison between two BCE atoms.
115 // Note: the terminology is misleading: the comparison is symmetric, so there
116 // is no real {l/r}hs. What we want though is to have the same base on the
117 // left (resp. right), so that we can detect consecutive loads. To ensure this
118 // we put the smallest atom on the left.
119 class BCECmpBlock {
120  public:
121  BCECmpBlock() {}
122 
123  BCECmpBlock(BCEAtom L, BCEAtom R, int SizeBits)
124  : Lhs_(L), Rhs_(R), SizeBits_(SizeBits) {
125  if (Rhs_ < Lhs_) std::swap(Rhs_, Lhs_);
126  }
127 
128  bool IsValid() const {
129  return Lhs_.Base() != nullptr && Rhs_.Base() != nullptr;
130  }
131 
132  // Assert the the block is consistent: If valid, it should also have
133  // non-null members besides Lhs_ and Rhs_.
134  void AssertConsistent() const {
135  if (IsValid()) {
136  assert(BB);
137  assert(CmpI);
138  assert(BranchI);
139  }
140  }
141 
142  const BCEAtom &Lhs() const { return Lhs_; }
143  const BCEAtom &Rhs() const { return Rhs_; }
144  int SizeBits() const { return SizeBits_; }
145 
146  // Returns true if the block does other works besides comparison.
147  bool doesOtherWork() const;
148 
149  // The basic block where this comparison happens.
150  BasicBlock *BB = nullptr;
151  // The ICMP for this comparison.
152  ICmpInst *CmpI = nullptr;
153  // The terminating branch.
154  BranchInst *BranchI = nullptr;
155 
156  private:
157  BCEAtom Lhs_;
158  BCEAtom Rhs_;
159  int SizeBits_ = 0;
160 };
161 
162 bool BCECmpBlock::doesOtherWork() const {
163  AssertConsistent();
164  // TODO(courbet): Can we allow some other things ? This is very conservative.
165  // We might be able to get away with anything does does not have any side
166  // effects outside of the basic block.
167  // Note: The GEPs and/or loads are not necessarily in the same block.
168  for (const Instruction &Inst : *BB) {
169  if (const auto *const GEP = dyn_cast<GetElementPtrInst>(&Inst)) {
170  if (!(Lhs_.GEP == GEP || Rhs_.GEP == GEP)) return true;
171  } else if (const auto *const L = dyn_cast<LoadInst>(&Inst)) {
172  if (!(Lhs_.LoadI == L || Rhs_.LoadI == L)) return true;
173  } else if (const auto *const C = dyn_cast<ICmpInst>(&Inst)) {
174  if (C != CmpI) return true;
175  } else if (const auto *const Br = dyn_cast<BranchInst>(&Inst)) {
176  if (Br != BranchI) return true;
177  } else {
178  return true;
179  }
180  }
181  return false;
182 }
183 
184 // Visit the given comparison. If this is a comparison between two valid
185 // BCE atoms, returns the comparison.
186 BCECmpBlock visitICmp(const ICmpInst *const CmpI,
187  const ICmpInst::Predicate ExpectedPredicate) {
188  if (CmpI->getPredicate() == ExpectedPredicate) {
189  DEBUG(dbgs() << "cmp "
190  << (ExpectedPredicate == ICmpInst::ICMP_EQ ? "eq" : "ne")
191  << "\n");
192  auto Lhs = visitICmpLoadOperand(CmpI->getOperand(0));
193  if (!Lhs.Base()) return {};
194  auto Rhs = visitICmpLoadOperand(CmpI->getOperand(1));
195  if (!Rhs.Base()) return {};
196  return BCECmpBlock(std::move(Lhs), std::move(Rhs),
197  CmpI->getOperand(0)->getType()->getScalarSizeInBits());
198  }
199  return {};
200 }
201 
202 // Visit the given comparison block. If this is a comparison between two valid
203 // BCE atoms, returns the comparison.
204 BCECmpBlock visitCmpBlock(Value *const Val, BasicBlock *const Block,
205  const BasicBlock *const PhiBlock) {
206  if (Block->empty()) return {};
207  auto *const BranchI = dyn_cast<BranchInst>(Block->getTerminator());
208  if (!BranchI) return {};
209  DEBUG(dbgs() << "branch\n");
210  if (BranchI->isUnconditional()) {
211  // In this case, we expect an incoming value which is the result of the
212  // comparison. This is the last link in the chain of comparisons (note
213  // that this does not mean that this is the last incoming value, blocks
214  // can be reordered).
215  auto *const CmpI = dyn_cast<ICmpInst>(Val);
216  if (!CmpI) return {};
217  DEBUG(dbgs() << "icmp\n");
218  auto Result = visitICmp(CmpI, ICmpInst::ICMP_EQ);
219  Result.CmpI = CmpI;
220  Result.BranchI = BranchI;
221  return Result;
222  } else {
223  // In this case, we expect a constant incoming value (the comparison is
224  // chained).
225  const auto *const Const = dyn_cast<ConstantInt>(Val);
226  DEBUG(dbgs() << "const\n");
227  if (!Const->isZero()) return {};
228  DEBUG(dbgs() << "false\n");
229  auto *const CmpI = dyn_cast<ICmpInst>(BranchI->getCondition());
230  if (!CmpI) return {};
231  DEBUG(dbgs() << "icmp\n");
232  assert(BranchI->getNumSuccessors() == 2 && "expecting a cond branch");
233  BasicBlock *const FalseBlock = BranchI->getSuccessor(1);
234  auto Result = visitICmp(
235  CmpI, FalseBlock == PhiBlock ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE);
236  Result.CmpI = CmpI;
237  Result.BranchI = BranchI;
238  return Result;
239  }
240  return {};
241 }
242 
243 // A chain of comparisons.
244 class BCECmpChain {
245  public:
246  BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi);
247 
248  int size() const { return Comparisons_.size(); }
249 
250 #ifdef MERGEICMPS_DOT_ON
251  void dump() const;
252 #endif // MERGEICMPS_DOT_ON
253 
254  bool simplify(const TargetLibraryInfo *const TLI);
255 
256  private:
257  static bool IsContiguous(const BCECmpBlock &First,
258  const BCECmpBlock &Second) {
259  return First.Lhs().Base() == Second.Lhs().Base() &&
260  First.Rhs().Base() == Second.Rhs().Base() &&
261  First.Lhs().Offset + First.SizeBits() / 8 == Second.Lhs().Offset &&
262  First.Rhs().Offset + First.SizeBits() / 8 == Second.Rhs().Offset;
263  }
264 
265  // Merges the given comparison blocks into one memcmp block and update
266  // branches. Comparisons are assumed to be continguous. If NextBBInChain is
267  // null, the merged block will link to the phi block.
268  static void mergeComparisons(ArrayRef<BCECmpBlock> Comparisons,
269  BasicBlock *const NextBBInChain, PHINode &Phi,
270  const TargetLibraryInfo *const TLI);
271 
272  PHINode &Phi_;
273  std::vector<BCECmpBlock> Comparisons_;
274  // The original entry block (before sorting);
275  BasicBlock *EntryBlock_;
276 };
277 
278 BCECmpChain::BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi)
279  : Phi_(Phi) {
280  // Now look inside blocks to check for BCE comparisons.
281  std::vector<BCECmpBlock> Comparisons;
282  for (BasicBlock *Block : Blocks) {
283  BCECmpBlock Comparison = visitCmpBlock(Phi.getIncomingValueForBlock(Block),
284  Block, Phi.getParent());
285  Comparison.BB = Block;
286  if (!Comparison.IsValid()) {
287  DEBUG(dbgs() << "skip: not a valid BCECmpBlock\n");
288  return;
289  }
290  if (Comparison.doesOtherWork()) {
291  DEBUG(dbgs() << "block does extra work besides compare\n");
292  if (Comparisons.empty()) { // First block.
293  // TODO(courbet): The first block can do other things, and we should
294  // split them apart in a separate block before the comparison chain.
295  // Right now we just discard it and make the chain shorter.
296  DEBUG(dbgs()
297  << "ignoring first block that does extra work besides compare\n");
298  continue;
299  }
300  // TODO(courbet): Right now we abort the whole chain. We could be
301  // merging only the blocks that don't do other work and resume the
302  // chain from there. For example:
303  // if (a[0] == b[0]) { // bb1
304  // if (a[1] == b[1]) { // bb2
305  // some_value = 3; //bb3
306  // if (a[2] == b[2]) { //bb3
307  // do a ton of stuff //bb4
308  // }
309  // }
310  // }
311  //
312  // This is:
313  //
314  // bb1 --eq--> bb2 --eq--> bb3* -eq--> bb4 --+
315  // \ \ \ \
316  // ne ne ne \
317  // \ \ \ v
318  // +------------+-----------+----------> bb_phi
319  //
320  // We can only merge the first two comparisons, because bb3* does
321  // "other work" (setting some_value to 3).
322  // We could still merge bb1 and bb2 though.
323  return;
324  }
325  DEBUG(dbgs() << "*Found cmp of " << Comparison.SizeBits()
326  << " bits between " << Comparison.Lhs().Base() << " + "
327  << Comparison.Lhs().Offset << " and "
328  << Comparison.Rhs().Base() << " + " << Comparison.Rhs().Offset
329  << "\n");
330  DEBUG(dbgs() << "\n");
331  Comparisons.push_back(Comparison);
332  }
333  EntryBlock_ = Comparisons[0].BB;
334  Comparisons_ = std::move(Comparisons);
335 #ifdef MERGEICMPS_DOT_ON
336  errs() << "BEFORE REORDERING:\n\n";
337  dump();
338 #endif // MERGEICMPS_DOT_ON
339  // Reorder blocks by LHS. We can do that without changing the
340  // semantics because we are only accessing dereferencable memory.
341  std::sort(Comparisons_.begin(), Comparisons_.end(),
342  [](const BCECmpBlock &a, const BCECmpBlock &b) {
343  return a.Lhs() < b.Lhs();
344  });
345 #ifdef MERGEICMPS_DOT_ON
346  errs() << "AFTER REORDERING:\n\n";
347  dump();
348 #endif // MERGEICMPS_DOT_ON
349 }
350 
351 #ifdef MERGEICMPS_DOT_ON
352 void BCECmpChain::dump() const {
353  errs() << "digraph dag {\n";
354  errs() << " graph [bgcolor=transparent];\n";
355  errs() << " node [color=black,style=filled,fillcolor=lightyellow];\n";
356  errs() << " edge [color=black];\n";
357  for (size_t I = 0; I < Comparisons_.size(); ++I) {
358  const auto &Comparison = Comparisons_[I];
359  errs() << " \"" << I << "\" [label=\"%"
360  << Comparison.Lhs().Base()->getName() << " + "
361  << Comparison.Lhs().Offset << " == %"
362  << Comparison.Rhs().Base()->getName() << " + "
363  << Comparison.Rhs().Offset << " (" << (Comparison.SizeBits() / 8)
364  << " bytes)\"];\n";
365  const Value *const Val = Phi_.getIncomingValueForBlock(Comparison.BB);
366  if (I > 0) errs() << " \"" << (I - 1) << "\" -> \"" << I << "\";\n";
367  errs() << " \"" << I << "\" -> \"Phi\" [label=\"" << *Val << "\"];\n";
368  }
369  errs() << " \"Phi\" [label=\"Phi\"];\n";
370  errs() << "}\n\n";
371 }
372 #endif // MERGEICMPS_DOT_ON
373 
374 bool BCECmpChain::simplify(const TargetLibraryInfo *const TLI) {
375  // First pass to check if there is at least one merge. If not, we don't do
376  // anything and we keep analysis passes intact.
377  {
378  bool AtLeastOneMerged = false;
379  for (size_t I = 1; I < Comparisons_.size(); ++I) {
380  if (IsContiguous(Comparisons_[I - 1], Comparisons_[I])) {
381  AtLeastOneMerged = true;
382  break;
383  }
384  }
385  if (!AtLeastOneMerged) return false;
386  }
387 
388  // Remove phi references to comparison blocks, they will be rebuilt as we
389  // merge the blocks.
390  for (const auto &Comparison : Comparisons_) {
391  Phi_.removeIncomingValue(Comparison.BB, false);
392  }
393 
394  // Point the predecessors of the chain to the first comparison block (which is
395  // the new entry point).
396  if (EntryBlock_ != Comparisons_[0].BB)
397  EntryBlock_->replaceAllUsesWith(Comparisons_[0].BB);
398 
399  // Effectively merge blocks.
400  int NumMerged = 1;
401  for (size_t I = 1; I < Comparisons_.size(); ++I) {
402  if (IsContiguous(Comparisons_[I - 1], Comparisons_[I])) {
403  ++NumMerged;
404  } else {
405  // Merge all previous comparisons and start a new merge block.
406  mergeComparisons(
407  makeArrayRef(Comparisons_).slice(I - NumMerged, NumMerged),
408  Comparisons_[I].BB, Phi_, TLI);
409  NumMerged = 1;
410  }
411  }
412  mergeComparisons(makeArrayRef(Comparisons_)
413  .slice(Comparisons_.size() - NumMerged, NumMerged),
414  nullptr, Phi_, TLI);
415 
416  return true;
417 }
418 
419 void BCECmpChain::mergeComparisons(ArrayRef<BCECmpBlock> Comparisons,
420  BasicBlock *const NextBBInChain,
421  PHINode &Phi,
422  const TargetLibraryInfo *const TLI) {
423  assert(!Comparisons.empty());
424  const auto &FirstComparison = *Comparisons.begin();
425  BasicBlock *const BB = FirstComparison.BB;
426  LLVMContext &Context = BB->getContext();
427 
428  if (Comparisons.size() >= 2) {
429  DEBUG(dbgs() << "Merging " << Comparisons.size() << " comparisons\n");
430  const auto TotalSize =
431  std::accumulate(Comparisons.begin(), Comparisons.end(), 0,
432  [](int Size, const BCECmpBlock &C) {
433  return Size + C.SizeBits();
434  }) /
435  8;
436 
437  // Incoming edges do not need to be updated, and both GEPs are already
438  // computing the right address, we just need to:
439  // - replace the two loads and the icmp with the memcmp
440  // - update the branch
441  // - update the incoming values in the phi.
442  FirstComparison.BranchI->eraseFromParent();
443  FirstComparison.CmpI->eraseFromParent();
444  FirstComparison.Lhs().LoadI->eraseFromParent();
445  FirstComparison.Rhs().LoadI->eraseFromParent();
446 
447  IRBuilder<> Builder(BB);
448  const auto &DL = Phi.getModule()->getDataLayout();
449  Value *const MemCmpCall = emitMemCmp(
450  FirstComparison.Lhs().GEP, FirstComparison.Rhs().GEP, ConstantInt::get(DL.getIntPtrType(Context), TotalSize),
451  Builder, DL, TLI);
452  Value *const MemCmpIsZero = Builder.CreateICmpEQ(
453  MemCmpCall, ConstantInt::get(Type::getInt32Ty(Context), 0));
454 
455  // Add a branch to the next basic block in the chain.
456  if (NextBBInChain) {
457  Builder.CreateCondBr(MemCmpIsZero, NextBBInChain, Phi.getParent());
458  Phi.addIncoming(ConstantInt::getFalse(Context), BB);
459  } else {
460  Builder.CreateBr(Phi.getParent());
461  Phi.addIncoming(MemCmpIsZero, BB);
462  }
463 
464  // Delete merged blocks.
465  for (size_t I = 1; I < Comparisons.size(); ++I) {
466  BasicBlock *CBB = Comparisons[I].BB;
467  CBB->replaceAllUsesWith(BB);
468  CBB->eraseFromParent();
469  }
470  } else {
471  assert(Comparisons.size() == 1);
472  // There are no blocks to merge, but we still need to update the branches.
473  DEBUG(dbgs() << "Only one comparison, updating branches\n");
474  if (NextBBInChain) {
475  if (FirstComparison.BranchI->isConditional()) {
476  DEBUG(dbgs() << "conditional -> conditional\n");
477  // Just update the "true" target, the "false" target should already be
478  // the phi block.
479  assert(FirstComparison.BranchI->getSuccessor(1) == Phi.getParent());
480  FirstComparison.BranchI->setSuccessor(0, NextBBInChain);
481  Phi.addIncoming(ConstantInt::getFalse(Context), BB);
482  } else {
483  DEBUG(dbgs() << "unconditional -> conditional\n");
484  // Replace the unconditional branch by a conditional one.
485  FirstComparison.BranchI->eraseFromParent();
486  IRBuilder<> Builder(BB);
487  Builder.CreateCondBr(FirstComparison.CmpI, NextBBInChain,
488  Phi.getParent());
489  Phi.addIncoming(FirstComparison.CmpI, BB);
490  }
491  } else {
492  if (FirstComparison.BranchI->isConditional()) {
493  DEBUG(dbgs() << "conditional -> unconditional\n");
494  // Replace the conditional branch by an unconditional one.
495  FirstComparison.BranchI->eraseFromParent();
496  IRBuilder<> Builder(BB);
497  Builder.CreateBr(Phi.getParent());
498  Phi.addIncoming(FirstComparison.CmpI, BB);
499  } else {
500  DEBUG(dbgs() << "unconditional -> unconditional\n");
501  Phi.addIncoming(FirstComparison.CmpI, BB);
502  }
503  }
504  }
505 }
506 
507 std::vector<BasicBlock *> getOrderedBlocks(PHINode &Phi,
508  BasicBlock *const LastBlock,
509  int NumBlocks) {
510  // Walk up from the last block to find other blocks.
511  std::vector<BasicBlock *> Blocks(NumBlocks);
512  BasicBlock *CurBlock = LastBlock;
513  for (int BlockIndex = NumBlocks - 1; BlockIndex > 0; --BlockIndex) {
514  if (CurBlock->hasAddressTaken()) {
515  // Somebody is jumping to the block through an address, all bets are
516  // off.
517  DEBUG(dbgs() << "skip: block " << BlockIndex
518  << " has its address taken\n");
519  return {};
520  }
521  Blocks[BlockIndex] = CurBlock;
522  auto *SinglePredecessor = CurBlock->getSinglePredecessor();
523  if (!SinglePredecessor) {
524  // The block has two or more predecessors.
525  DEBUG(dbgs() << "skip: block " << BlockIndex
526  << " has two or more predecessors\n");
527  return {};
528  }
529  if (Phi.getBasicBlockIndex(SinglePredecessor) < 0) {
530  // The block does not link back to the phi.
531  DEBUG(dbgs() << "skip: block " << BlockIndex
532  << " does not link back to the phi\n");
533  return {};
534  }
535  CurBlock = SinglePredecessor;
536  }
537  Blocks[0] = CurBlock;
538  return Blocks;
539 }
540 
541 bool processPhi(PHINode &Phi, const TargetLibraryInfo *const TLI) {
542  DEBUG(dbgs() << "processPhi()\n");
543  if (Phi.getNumIncomingValues() <= 1) {
544  DEBUG(dbgs() << "skip: only one incoming value in phi\n");
545  return false;
546  }
547  // We are looking for something that has the following structure:
548  // bb1 --eq--> bb2 --eq--> bb3 --eq--> bb4 --+
549  // \ \ \ \
550  // ne ne ne \
551  // \ \ \ v
552  // +------------+-----------+----------> bb_phi
553  //
554  // - The last basic block (bb4 here) must branch unconditionally to bb_phi.
555  // It's the only block that contributes a non-constant value to the Phi.
556  // - All other blocks (b1, b2, b3) must have exactly two successors, one of
557  // them being the the phi block.
558  // - All intermediate blocks (bb2, bb3) must have only one predecessor.
559  // - Blocks cannot do other work besides the comparison, see doesOtherWork()
560 
561  // The blocks are not necessarily ordered in the phi, so we start from the
562  // last block and reconstruct the order.
563  BasicBlock *LastBlock = nullptr;
564  for (unsigned I = 0; I < Phi.getNumIncomingValues(); ++I) {
565  if (isa<ConstantInt>(Phi.getIncomingValue(I))) continue;
566  if (LastBlock) {
567  // There are several non-constant values.
568  DEBUG(dbgs() << "skip: several non-constant values\n");
569  return false;
570  }
571  LastBlock = Phi.getIncomingBlock(I);
572  }
573  if (!LastBlock) {
574  // There is no non-constant block.
575  DEBUG(dbgs() << "skip: no non-constant block\n");
576  return false;
577  }
578  if (LastBlock->getSingleSuccessor() != Phi.getParent()) {
579  DEBUG(dbgs() << "skip: last block non-phi successor\n");
580  return false;
581  }
582 
583  const auto Blocks =
584  getOrderedBlocks(Phi, LastBlock, Phi.getNumIncomingValues());
585  if (Blocks.empty()) return false;
586  BCECmpChain CmpChain(Blocks, Phi);
587 
588  if (CmpChain.size() < 2) {
589  DEBUG(dbgs() << "skip: only one compare block\n");
590  return false;
591  }
592 
593  return CmpChain.simplify(TLI);
594 }
595 
596 class MergeICmps : public FunctionPass {
597  public:
598  static char ID;
599 
600  MergeICmps() : FunctionPass(ID) {
602  }
603 
604  bool runOnFunction(Function &F) override {
605  if (skipFunction(F)) return false;
606  const auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
607  const auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
608  auto PA = runImpl(F, &TLI, &TTI);
609  return !PA.areAllPreserved();
610  }
611 
612  private:
613  void getAnalysisUsage(AnalysisUsage &AU) const override {
616  }
617 
619  const TargetTransformInfo *TTI);
620 };
621 
623  const TargetTransformInfo *TTI) {
624  DEBUG(dbgs() << "MergeICmpsPass: " << F.getName() << "\n");
625 
626  // We only try merging comparisons if the target wants to expand memcmp later.
627  // The rationale is to avoid turning small chains into memcmp calls.
628  if (!TTI->enableMemCmpExpansion(true)) return PreservedAnalyses::all();
629 
630  bool MadeChange = false;
631 
632  for (auto BBIt = ++F.begin(); BBIt != F.end(); ++BBIt) {
633  // A Phi operation is always first in a basic block.
634  if (auto *const Phi = dyn_cast<PHINode>(&*BBIt->begin()))
635  MadeChange |= processPhi(*Phi, TLI);
636  }
637 
638  if (MadeChange) return PreservedAnalyses::none();
639  return PreservedAnalyses::all();
640 }
641 
642 } // namespace
643 
644 char MergeICmps::ID = 0;
645 INITIALIZE_PASS_BEGIN(MergeICmps, "mergeicmps",
646  "Merge contiguous icmps into a memcmp", false, false)
650  "Merge contiguous icmps into a memcmp", false, false)
651 
652 Pass *llvm::createMergeICmpsPass() { return new MergeICmps(); }
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:81
uint64_t CallInst * C
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:523
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional &#39;br Cond, TrueDest, FalseDest&#39; instruction.
Definition: IRBuilder.h:779
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
LLVMContext & Context
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
iterator begin() const
Definition: ArrayRef.h:137
iterator end()
Definition: Function.h:590
static void dump(StringRef Title, SpillInfo const &Spills)
Definition: CoroFrame.cpp:283
F(f)
An instruction for reading from memory.
Definition: Instructions.h:164
Hexagon Common GEP
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:33
mergeicmps
Definition: MergeICmps.cpp:649
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:361
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
static bool runImpl(CallGraphSCC &SCC, AARGetterT AARGetter)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:668
Pass * createMergeICmpsPass()
Definition: MergeICmps.cpp:652
bool empty() const
Definition: BasicBlock.h:263
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:244
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:430
iterator begin()
Definition: Function.h:588
Value * getOperand(unsigned i) const
Definition: User.h:154
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:156
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:837
static bool runOnFunction(Function &F, bool PostInlining)
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:217
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
Conditional or Unconditional Branch instruction.
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
Value * getIncomingValueForBlock(const BasicBlock *BB) const
Represent the analysis usage information of a pass.
This instruction compares its operands according to the predicate given to the constructor.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:853
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1550
R600 Clause Merge
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches, switches, etc.
Definition: BasicBlock.h:376
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:130
Provides information about what library functions are available for the current target.
iterator end() const
Definition: ArrayRef.h:138
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:560
unsigned getNumIncomingValues() const
Return the number of incoming edges.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:923
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:57
Class for arbitrary precision integers.
Definition: APInt.h:69
bool isDereferenceablePointer(const Value *V, const DataLayout &DL, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr)
Return true if this is always a dereferenceable pointer.
Definition: Loads.cpp:153
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:927
Merge contiguous icmps into a memcmp
Definition: MergeICmps.cpp:649
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:176
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:220
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink &#39;this&#39; from the containing function and delete it.
Definition: BasicBlock.cpp:97
#define I(x, y, z)
Definition: MD5.cpp:58
const MemCmpExpansionOptions * enableMemCmpExpansion(bool IsZeroCmp) const
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
Value * emitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B, const DataLayout &DL, const TargetLibraryInfo *TLI)
Emit a call to the memcmp function.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:326
LLVM Value Representation.
Definition: Value.h:73
constexpr char Size[]
Key for Kernel::Arg::Metadata::mSize.
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional &#39;br label X&#39; instruction.
Definition: IRBuilder.h:773
#define DEBUG(X)
Definition: Debug.h:118
INITIALIZE_PASS_BEGIN(MergeICmps, "mergeicmps", "Merge contiguous icmps into a memcmp", false, false) INITIALIZE_PASS_END(MergeICmps
void sort(Policy policy, RandomAccessIterator Start, RandomAccessIterator End, const Comparator &Comp=Comparator())
Definition: Parallel.h:199
This pass exposes codegen information to IR-level passes.
void initializeMergeICmpsPass(PassRegistry &)
const TerminatorInst * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:120
loop simplify
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:144
const BasicBlock * getParent() const
Definition: Instruction.h:66