LLVM  7.0.0svn
MergeICmps.cpp
Go to the documentation of this file.
1 //===- MergeICmps.cpp - Optimize chains of integer comparisons ------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass turns chains of integer comparisons into memcmp (the memcmp is
11 // later typically inlined as a chain of efficient hardware comparisons). This
12 // typically benefits c++ member or nonmember operator==().
13 //
14 // The basic idea is to replace a larger chain of integer comparisons loaded
15 // from contiguous memory locations into a smaller chain of such integer
16 // comparisons. Benefits are double:
17 // - There are less jumps, and therefore less opportunities for mispredictions
18 // and I-cache misses.
19 // - Code size is smaller, both because jumps are removed and because the
20 // encoding of a 2*n byte compare is smaller than that of two n-byte
21 // compares.
22 
23 //===----------------------------------------------------------------------===//
24 
25 #include <algorithm>
26 #include <numeric>
27 #include <utility>
28 #include <vector>
29 #include "llvm/Analysis/Loads.h"
32 #include "llvm/IR/Function.h"
33 #include "llvm/IR/IRBuilder.h"
34 #include "llvm/Pass.h"
35 #include "llvm/Transforms/Scalar.h"
37 
38 using namespace llvm;
39 
40 namespace {
41 
42 #define DEBUG_TYPE "mergeicmps"
43 
44 // A BCE atom.
45 struct BCEAtom {
46  BCEAtom() : GEP(nullptr), LoadI(nullptr), Offset() {}
47 
48  const Value *Base() const { return GEP ? GEP->getPointerOperand() : nullptr; }
49 
50  bool operator<(const BCEAtom &O) const {
51  assert(Base() && "invalid atom");
52  assert(O.Base() && "invalid atom");
53  // Just ordering by (Base(), Offset) is sufficient. However because this
54  // means that the ordering will depend on the addresses of the base
55  // values, which are not reproducible from run to run. To guarantee
56  // stability, we use the names of the values if they exist; we sort by:
57  // (Base.getName(), Base(), Offset).
58  const int NameCmp = Base()->getName().compare(O.Base()->getName());
59  if (NameCmp == 0) {
60  if (Base() == O.Base()) {
61  return Offset.slt(O.Offset);
62  }
63  return Base() < O.Base();
64  }
65  return NameCmp < 0;
66  }
67 
69  LoadInst *LoadI;
70  APInt Offset;
71 };
72 
73 // If this value is a load from a constant offset w.r.t. a base address, and
74 // there are no othe rusers of the load or address, returns the base address and
75 // the offset.
76 BCEAtom visitICmpLoadOperand(Value *const Val) {
77  BCEAtom Result;
78  if (auto *const LoadI = dyn_cast<LoadInst>(Val)) {
79  DEBUG(dbgs() << "load\n");
80  if (LoadI->isUsedOutsideOfBlock(LoadI->getParent())) {
81  DEBUG(dbgs() << "used outside of block\n");
82  return {};
83  }
84  if (LoadI->isVolatile()) {
85  DEBUG(dbgs() << "volatile\n");
86  return {};
87  }
88  Value *const Addr = LoadI->getOperand(0);
89  if (auto *const GEP = dyn_cast<GetElementPtrInst>(Addr)) {
90  DEBUG(dbgs() << "GEP\n");
91  if (LoadI->isUsedOutsideOfBlock(LoadI->getParent())) {
92  DEBUG(dbgs() << "used outside of block\n");
93  return {};
94  }
95  const auto &DL = GEP->getModule()->getDataLayout();
96  if (!isDereferenceablePointer(GEP, DL)) {
97  DEBUG(dbgs() << "not dereferenceable\n");
98  // We need to make sure that we can do comparison in any order, so we
99  // require memory to be unconditionnally dereferencable.
100  return {};
101  }
102  Result.Offset = APInt(DL.getPointerTypeSizeInBits(GEP->getType()), 0);
103  if (GEP->accumulateConstantOffset(DL, Result.Offset)) {
104  Result.GEP = GEP;
105  Result.LoadI = LoadI;
106  }
107  }
108  }
109  return Result;
110 }
111 
112 // A basic block with a comparison between two BCE atoms.
113 // Note: the terminology is misleading: the comparison is symmetric, so there
114 // is no real {l/r}hs. What we want though is to have the same base on the
115 // left (resp. right), so that we can detect consecutive loads. To ensure this
116 // we put the smallest atom on the left.
117 class BCECmpBlock {
118  public:
119  BCECmpBlock() {}
120 
121  BCECmpBlock(BCEAtom L, BCEAtom R, int SizeBits)
122  : Lhs_(L), Rhs_(R), SizeBits_(SizeBits) {
123  if (Rhs_ < Lhs_) std::swap(Rhs_, Lhs_);
124  }
125 
126  bool IsValid() const {
127  return Lhs_.Base() != nullptr && Rhs_.Base() != nullptr;
128  }
129 
130  // Assert the block is consistent: If valid, it should also have
131  // non-null members besides Lhs_ and Rhs_.
132  void AssertConsistent() const {
133  if (IsValid()) {
134  assert(BB);
135  assert(CmpI);
136  assert(BranchI);
137  }
138  }
139 
140  const BCEAtom &Lhs() const { return Lhs_; }
141  const BCEAtom &Rhs() const { return Rhs_; }
142  int SizeBits() const { return SizeBits_; }
143 
144  // Returns true if the block does other works besides comparison.
145  bool doesOtherWork() const;
146 
147  // The basic block where this comparison happens.
148  BasicBlock *BB = nullptr;
149  // The ICMP for this comparison.
150  ICmpInst *CmpI = nullptr;
151  // The terminating branch.
152  BranchInst *BranchI = nullptr;
153 
154  private:
155  BCEAtom Lhs_;
156  BCEAtom Rhs_;
157  int SizeBits_ = 0;
158 };
159 
160 bool BCECmpBlock::doesOtherWork() const {
161  AssertConsistent();
162  // TODO(courbet): Can we allow some other things ? This is very conservative.
163  // We might be able to get away with anything does does not have any side
164  // effects outside of the basic block.
165  // Note: The GEPs and/or loads are not necessarily in the same block.
166  for (const Instruction &Inst : *BB) {
167  if (const auto *const GEP = dyn_cast<GetElementPtrInst>(&Inst)) {
168  if (!(Lhs_.GEP == GEP || Rhs_.GEP == GEP)) return true;
169  } else if (const auto *const L = dyn_cast<LoadInst>(&Inst)) {
170  if (!(Lhs_.LoadI == L || Rhs_.LoadI == L)) return true;
171  } else if (const auto *const C = dyn_cast<ICmpInst>(&Inst)) {
172  if (C != CmpI) return true;
173  } else if (const auto *const Br = dyn_cast<BranchInst>(&Inst)) {
174  if (Br != BranchI) return true;
175  } else {
176  return true;
177  }
178  }
179  return false;
180 }
181 
182 // Visit the given comparison. If this is a comparison between two valid
183 // BCE atoms, returns the comparison.
184 BCECmpBlock visitICmp(const ICmpInst *const CmpI,
185  const ICmpInst::Predicate ExpectedPredicate) {
186  if (CmpI->getPredicate() == ExpectedPredicate) {
187  DEBUG(dbgs() << "cmp "
188  << (ExpectedPredicate == ICmpInst::ICMP_EQ ? "eq" : "ne")
189  << "\n");
190  auto Lhs = visitICmpLoadOperand(CmpI->getOperand(0));
191  if (!Lhs.Base()) return {};
192  auto Rhs = visitICmpLoadOperand(CmpI->getOperand(1));
193  if (!Rhs.Base()) return {};
194  return BCECmpBlock(std::move(Lhs), std::move(Rhs),
195  CmpI->getOperand(0)->getType()->getScalarSizeInBits());
196  }
197  return {};
198 }
199 
200 // Visit the given comparison block. If this is a comparison between two valid
201 // BCE atoms, returns the comparison.
202 BCECmpBlock visitCmpBlock(Value *const Val, BasicBlock *const Block,
203  const BasicBlock *const PhiBlock) {
204  if (Block->empty()) return {};
205  auto *const BranchI = dyn_cast<BranchInst>(Block->getTerminator());
206  if (!BranchI) return {};
207  DEBUG(dbgs() << "branch\n");
208  if (BranchI->isUnconditional()) {
209  // In this case, we expect an incoming value which is the result of the
210  // comparison. This is the last link in the chain of comparisons (note
211  // that this does not mean that this is the last incoming value, blocks
212  // can be reordered).
213  auto *const CmpI = dyn_cast<ICmpInst>(Val);
214  if (!CmpI) return {};
215  DEBUG(dbgs() << "icmp\n");
216  auto Result = visitICmp(CmpI, ICmpInst::ICMP_EQ);
217  Result.CmpI = CmpI;
218  Result.BranchI = BranchI;
219  return Result;
220  } else {
221  // In this case, we expect a constant incoming value (the comparison is
222  // chained).
223  const auto *const Const = dyn_cast<ConstantInt>(Val);
224  DEBUG(dbgs() << "const\n");
225  if (!Const->isZero()) return {};
226  DEBUG(dbgs() << "false\n");
227  auto *const CmpI = dyn_cast<ICmpInst>(BranchI->getCondition());
228  if (!CmpI) return {};
229  DEBUG(dbgs() << "icmp\n");
230  assert(BranchI->getNumSuccessors() == 2 && "expecting a cond branch");
231  BasicBlock *const FalseBlock = BranchI->getSuccessor(1);
232  auto Result = visitICmp(
233  CmpI, FalseBlock == PhiBlock ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE);
234  Result.CmpI = CmpI;
235  Result.BranchI = BranchI;
236  return Result;
237  }
238  return {};
239 }
240 
241 // A chain of comparisons.
242 class BCECmpChain {
243  public:
244  BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi);
245 
246  int size() const { return Comparisons_.size(); }
247 
248 #ifdef MERGEICMPS_DOT_ON
249  void dump() const;
250 #endif // MERGEICMPS_DOT_ON
251 
252  bool simplify(const TargetLibraryInfo *const TLI);
253 
254  private:
255  static bool IsContiguous(const BCECmpBlock &First,
256  const BCECmpBlock &Second) {
257  return First.Lhs().Base() == Second.Lhs().Base() &&
258  First.Rhs().Base() == Second.Rhs().Base() &&
259  First.Lhs().Offset + First.SizeBits() / 8 == Second.Lhs().Offset &&
260  First.Rhs().Offset + First.SizeBits() / 8 == Second.Rhs().Offset;
261  }
262 
263  // Merges the given comparison blocks into one memcmp block and update
264  // branches. Comparisons are assumed to be continguous. If NextBBInChain is
265  // null, the merged block will link to the phi block.
266  static void mergeComparisons(ArrayRef<BCECmpBlock> Comparisons,
267  BasicBlock *const NextBBInChain, PHINode &Phi,
268  const TargetLibraryInfo *const TLI);
269 
270  PHINode &Phi_;
271  std::vector<BCECmpBlock> Comparisons_;
272  // The original entry block (before sorting);
273  BasicBlock *EntryBlock_;
274 };
275 
276 BCECmpChain::BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi)
277  : Phi_(Phi) {
278  assert(!Blocks.empty() && "a chain should have at least one block");
279  // Now look inside blocks to check for BCE comparisons.
280  std::vector<BCECmpBlock> Comparisons;
281  for (size_t BlockIdx = 0; BlockIdx < Blocks.size(); ++BlockIdx) {
282  BasicBlock *const Block = Blocks[BlockIdx];
283  assert(Block && "invalid block");
284  BCECmpBlock Comparison = visitCmpBlock(Phi.getIncomingValueForBlock(Block),
285  Block, Phi.getParent());
286  Comparison.BB = Block;
287  if (!Comparison.IsValid()) {
288  DEBUG(dbgs() << "skip: not a valid BCECmpBlock\n");
289  return;
290  }
291  if (Comparison.doesOtherWork()) {
292  DEBUG(dbgs() << "block does extra work besides compare\n");
293  if (BlockIdx == 0) { // First block.
294  // TODO(courbet): The first block can do other things, and we should
295  // split them apart in a separate block before the comparison chain.
296  // Right now we just discard it and make the chain shorter.
297  DEBUG(dbgs()
298  << "ignoring first block that does extra work besides compare\n");
299  continue;
300  }
301  // TODO(courbet): Right now we abort the whole chain. We could be
302  // merging only the blocks that don't do other work and resume the
303  // chain from there. For example:
304  // if (a[0] == b[0]) { // bb1
305  // if (a[1] == b[1]) { // bb2
306  // some_value = 3; //bb3
307  // if (a[2] == b[2]) { //bb3
308  // do a ton of stuff //bb4
309  // }
310  // }
311  // }
312  //
313  // This is:
314  //
315  // bb1 --eq--> bb2 --eq--> bb3* -eq--> bb4 --+
316  // \ \ \ \
317  // ne ne ne \
318  // \ \ \ v
319  // +------------+-----------+----------> bb_phi
320  //
321  // We can only merge the first two comparisons, because bb3* does
322  // "other work" (setting some_value to 3).
323  // We could still merge bb1 and bb2 though.
324  return;
325  }
326  DEBUG(dbgs() << "*Found cmp of " << Comparison.SizeBits()
327  << " bits between " << Comparison.Lhs().Base() << " + "
328  << Comparison.Lhs().Offset << " and "
329  << Comparison.Rhs().Base() << " + " << Comparison.Rhs().Offset
330  << "\n");
331  DEBUG(dbgs() << "\n");
332  Comparisons.push_back(Comparison);
333  }
334  assert(!Comparisons.empty() && "chain with no BCE basic blocks");
335  EntryBlock_ = Comparisons[0].BB;
336  Comparisons_ = std::move(Comparisons);
337 #ifdef MERGEICMPS_DOT_ON
338  errs() << "BEFORE REORDERING:\n\n";
339  dump();
340 #endif // MERGEICMPS_DOT_ON
341  // Reorder blocks by LHS. We can do that without changing the
342  // semantics because we are only accessing dereferencable memory.
343  std::sort(Comparisons_.begin(), Comparisons_.end(),
344  [](const BCECmpBlock &a, const BCECmpBlock &b) {
345  return a.Lhs() < b.Lhs();
346  });
347 #ifdef MERGEICMPS_DOT_ON
348  errs() << "AFTER REORDERING:\n\n";
349  dump();
350 #endif // MERGEICMPS_DOT_ON
351 }
352 
353 #ifdef MERGEICMPS_DOT_ON
354 void BCECmpChain::dump() const {
355  errs() << "digraph dag {\n";
356  errs() << " graph [bgcolor=transparent];\n";
357  errs() << " node [color=black,style=filled,fillcolor=lightyellow];\n";
358  errs() << " edge [color=black];\n";
359  for (size_t I = 0; I < Comparisons_.size(); ++I) {
360  const auto &Comparison = Comparisons_[I];
361  errs() << " \"" << I << "\" [label=\"%"
362  << Comparison.Lhs().Base()->getName() << " + "
363  << Comparison.Lhs().Offset << " == %"
364  << Comparison.Rhs().Base()->getName() << " + "
365  << Comparison.Rhs().Offset << " (" << (Comparison.SizeBits() / 8)
366  << " bytes)\"];\n";
367  const Value *const Val = Phi_.getIncomingValueForBlock(Comparison.BB);
368  if (I > 0) errs() << " \"" << (I - 1) << "\" -> \"" << I << "\";\n";
369  errs() << " \"" << I << "\" -> \"Phi\" [label=\"" << *Val << "\"];\n";
370  }
371  errs() << " \"Phi\" [label=\"Phi\"];\n";
372  errs() << "}\n\n";
373 }
374 #endif // MERGEICMPS_DOT_ON
375 
376 bool BCECmpChain::simplify(const TargetLibraryInfo *const TLI) {
377  // First pass to check if there is at least one merge. If not, we don't do
378  // anything and we keep analysis passes intact.
379  {
380  bool AtLeastOneMerged = false;
381  for (size_t I = 1; I < Comparisons_.size(); ++I) {
382  if (IsContiguous(Comparisons_[I - 1], Comparisons_[I])) {
383  AtLeastOneMerged = true;
384  break;
385  }
386  }
387  if (!AtLeastOneMerged) return false;
388  }
389 
390  // Remove phi references to comparison blocks, they will be rebuilt as we
391  // merge the blocks.
392  for (const auto &Comparison : Comparisons_) {
393  Phi_.removeIncomingValue(Comparison.BB, false);
394  }
395 
396  // Point the predecessors of the chain to the first comparison block (which is
397  // the new entry point).
398  if (EntryBlock_ != Comparisons_[0].BB)
399  EntryBlock_->replaceAllUsesWith(Comparisons_[0].BB);
400 
401  // Effectively merge blocks.
402  int NumMerged = 1;
403  for (size_t I = 1; I < Comparisons_.size(); ++I) {
404  if (IsContiguous(Comparisons_[I - 1], Comparisons_[I])) {
405  ++NumMerged;
406  } else {
407  // Merge all previous comparisons and start a new merge block.
408  mergeComparisons(
409  makeArrayRef(Comparisons_).slice(I - NumMerged, NumMerged),
410  Comparisons_[I].BB, Phi_, TLI);
411  NumMerged = 1;
412  }
413  }
414  mergeComparisons(makeArrayRef(Comparisons_)
415  .slice(Comparisons_.size() - NumMerged, NumMerged),
416  nullptr, Phi_, TLI);
417 
418  return true;
419 }
420 
421 void BCECmpChain::mergeComparisons(ArrayRef<BCECmpBlock> Comparisons,
422  BasicBlock *const NextBBInChain,
423  PHINode &Phi,
424  const TargetLibraryInfo *const TLI) {
425  assert(!Comparisons.empty());
426  const auto &FirstComparison = *Comparisons.begin();
427  BasicBlock *const BB = FirstComparison.BB;
428  LLVMContext &Context = BB->getContext();
429 
430  if (Comparisons.size() >= 2) {
431  DEBUG(dbgs() << "Merging " << Comparisons.size() << " comparisons\n");
432  const auto TotalSize =
433  std::accumulate(Comparisons.begin(), Comparisons.end(), 0,
434  [](int Size, const BCECmpBlock &C) {
435  return Size + C.SizeBits();
436  }) /
437  8;
438 
439  // Incoming edges do not need to be updated, and both GEPs are already
440  // computing the right address, we just need to:
441  // - replace the two loads and the icmp with the memcmp
442  // - update the branch
443  // - update the incoming values in the phi.
444  FirstComparison.BranchI->eraseFromParent();
445  FirstComparison.CmpI->eraseFromParent();
446  FirstComparison.Lhs().LoadI->eraseFromParent();
447  FirstComparison.Rhs().LoadI->eraseFromParent();
448 
449  IRBuilder<> Builder(BB);
450  const auto &DL = Phi.getModule()->getDataLayout();
451  Value *const MemCmpCall = emitMemCmp(
452  FirstComparison.Lhs().GEP, FirstComparison.Rhs().GEP, ConstantInt::get(DL.getIntPtrType(Context), TotalSize),
453  Builder, DL, TLI);
454  Value *const MemCmpIsZero = Builder.CreateICmpEQ(
455  MemCmpCall, ConstantInt::get(Type::getInt32Ty(Context), 0));
456 
457  // Add a branch to the next basic block in the chain.
458  if (NextBBInChain) {
459  Builder.CreateCondBr(MemCmpIsZero, NextBBInChain, Phi.getParent());
460  Phi.addIncoming(ConstantInt::getFalse(Context), BB);
461  } else {
462  Builder.CreateBr(Phi.getParent());
463  Phi.addIncoming(MemCmpIsZero, BB);
464  }
465 
466  // Delete merged blocks.
467  for (size_t I = 1; I < Comparisons.size(); ++I) {
468  BasicBlock *CBB = Comparisons[I].BB;
469  CBB->replaceAllUsesWith(BB);
470  CBB->eraseFromParent();
471  }
472  } else {
473  assert(Comparisons.size() == 1);
474  // There are no blocks to merge, but we still need to update the branches.
475  DEBUG(dbgs() << "Only one comparison, updating branches\n");
476  if (NextBBInChain) {
477  if (FirstComparison.BranchI->isConditional()) {
478  DEBUG(dbgs() << "conditional -> conditional\n");
479  // Just update the "true" target, the "false" target should already be
480  // the phi block.
481  assert(FirstComparison.BranchI->getSuccessor(1) == Phi.getParent());
482  FirstComparison.BranchI->setSuccessor(0, NextBBInChain);
483  Phi.addIncoming(ConstantInt::getFalse(Context), BB);
484  } else {
485  DEBUG(dbgs() << "unconditional -> conditional\n");
486  // Replace the unconditional branch by a conditional one.
487  FirstComparison.BranchI->eraseFromParent();
488  IRBuilder<> Builder(BB);
489  Builder.CreateCondBr(FirstComparison.CmpI, NextBBInChain,
490  Phi.getParent());
491  Phi.addIncoming(FirstComparison.CmpI, BB);
492  }
493  } else {
494  if (FirstComparison.BranchI->isConditional()) {
495  DEBUG(dbgs() << "conditional -> unconditional\n");
496  // Replace the conditional branch by an unconditional one.
497  FirstComparison.BranchI->eraseFromParent();
498  IRBuilder<> Builder(BB);
499  Builder.CreateBr(Phi.getParent());
500  Phi.addIncoming(FirstComparison.CmpI, BB);
501  } else {
502  DEBUG(dbgs() << "unconditional -> unconditional\n");
503  Phi.addIncoming(FirstComparison.CmpI, BB);
504  }
505  }
506  }
507 }
508 
509 std::vector<BasicBlock *> getOrderedBlocks(PHINode &Phi,
510  BasicBlock *const LastBlock,
511  int NumBlocks) {
512  // Walk up from the last block to find other blocks.
513  std::vector<BasicBlock *> Blocks(NumBlocks);
514  assert(LastBlock && "invalid last block");
515  BasicBlock *CurBlock = LastBlock;
516  for (int BlockIndex = NumBlocks - 1; BlockIndex > 0; --BlockIndex) {
517  if (CurBlock->hasAddressTaken()) {
518  // Somebody is jumping to the block through an address, all bets are
519  // off.
520  DEBUG(dbgs() << "skip: block " << BlockIndex
521  << " has its address taken\n");
522  return {};
523  }
524  Blocks[BlockIndex] = CurBlock;
525  auto *SinglePredecessor = CurBlock->getSinglePredecessor();
526  if (!SinglePredecessor) {
527  // The block has two or more predecessors.
528  DEBUG(dbgs() << "skip: block " << BlockIndex
529  << " has two or more predecessors\n");
530  return {};
531  }
532  if (Phi.getBasicBlockIndex(SinglePredecessor) < 0) {
533  // The block does not link back to the phi.
534  DEBUG(dbgs() << "skip: block " << BlockIndex
535  << " does not link back to the phi\n");
536  return {};
537  }
538  CurBlock = SinglePredecessor;
539  }
540  Blocks[0] = CurBlock;
541  return Blocks;
542 }
543 
544 bool processPhi(PHINode &Phi, const TargetLibraryInfo *const TLI) {
545  DEBUG(dbgs() << "processPhi()\n");
546  if (Phi.getNumIncomingValues() <= 1) {
547  DEBUG(dbgs() << "skip: only one incoming value in phi\n");
548  return false;
549  }
550  // We are looking for something that has the following structure:
551  // bb1 --eq--> bb2 --eq--> bb3 --eq--> bb4 --+
552  // \ \ \ \
553  // ne ne ne \
554  // \ \ \ v
555  // +------------+-----------+----------> bb_phi
556  //
557  // - The last basic block (bb4 here) must branch unconditionally to bb_phi.
558  // It's the only block that contributes a non-constant value to the Phi.
559  // - All other blocks (b1, b2, b3) must have exactly two successors, one of
560  // them being the phi block.
561  // - All intermediate blocks (bb2, bb3) must have only one predecessor.
562  // - Blocks cannot do other work besides the comparison, see doesOtherWork()
563 
564  // The blocks are not necessarily ordered in the phi, so we start from the
565  // last block and reconstruct the order.
566  BasicBlock *LastBlock = nullptr;
567  for (unsigned I = 0; I < Phi.getNumIncomingValues(); ++I) {
568  if (isa<ConstantInt>(Phi.getIncomingValue(I))) continue;
569  if (LastBlock) {
570  // There are several non-constant values.
571  DEBUG(dbgs() << "skip: several non-constant values\n");
572  return false;
573  }
574  LastBlock = Phi.getIncomingBlock(I);
575  }
576  if (!LastBlock) {
577  // There is no non-constant block.
578  DEBUG(dbgs() << "skip: no non-constant block\n");
579  return false;
580  }
581  if (LastBlock->getSingleSuccessor() != Phi.getParent()) {
582  DEBUG(dbgs() << "skip: last block non-phi successor\n");
583  return false;
584  }
585 
586  const auto Blocks =
587  getOrderedBlocks(Phi, LastBlock, Phi.getNumIncomingValues());
588  if (Blocks.empty()) return false;
589  BCECmpChain CmpChain(Blocks, Phi);
590 
591  if (CmpChain.size() < 2) {
592  DEBUG(dbgs() << "skip: only one compare block\n");
593  return false;
594  }
595 
596  return CmpChain.simplify(TLI);
597 }
598 
599 class MergeICmps : public FunctionPass {
600  public:
601  static char ID;
602 
603  MergeICmps() : FunctionPass(ID) {
605  }
606 
607  bool runOnFunction(Function &F) override {
608  if (skipFunction(F)) return false;
609  const auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
610  const auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
611  auto PA = runImpl(F, &TLI, &TTI);
612  return !PA.areAllPreserved();
613  }
614 
615  private:
616  void getAnalysisUsage(AnalysisUsage &AU) const override {
619  }
620 
622  const TargetTransformInfo *TTI);
623 };
624 
626  const TargetTransformInfo *TTI) {
627  DEBUG(dbgs() << "MergeICmpsPass: " << F.getName() << "\n");
628 
629  // We only try merging comparisons if the target wants to expand memcmp later.
630  // The rationale is to avoid turning small chains into memcmp calls.
631  if (!TTI->enableMemCmpExpansion(true)) return PreservedAnalyses::all();
632 
633  bool MadeChange = false;
634 
635  for (auto BBIt = ++F.begin(); BBIt != F.end(); ++BBIt) {
636  // A Phi operation is always first in a basic block.
637  if (auto *const Phi = dyn_cast<PHINode>(&*BBIt->begin()))
638  MadeChange |= processPhi(*Phi, TLI);
639  }
640 
641  if (MadeChange) return PreservedAnalyses::none();
642  return PreservedAnalyses::all();
643 }
644 
645 } // namespace
646 
647 char MergeICmps::ID = 0;
648 INITIALIZE_PASS_BEGIN(MergeICmps, "mergeicmps",
649  "Merge contiguous icmps into a memcmp", false, false)
653  "Merge contiguous icmps into a memcmp", false, false)
654 
655 Pass *llvm::createMergeICmpsPass() { return new MergeICmps(); }
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:81
uint64_t CallInst * C
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:548
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional &#39;br Cond, TrueDest, FalseDest&#39; instruction.
Definition: IRBuilder.h:818
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
LLVMContext & Context
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
iterator begin() const
Definition: ArrayRef.h:137
iterator end()
Definition: Function.h:636
static void dump(StringRef Title, SpillInfo const &Spills)
Definition: CoroFrame.cpp:283
F(f)
An instruction for reading from memory.
Definition: Instructions.h:164
Hexagon Common GEP
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:33
mergeicmps
Definition: MergeICmps.cpp:652
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:361
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
static bool runImpl(CallGraphSCC &SCC, AARGetterT AARGetter)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:707
Pass * createMergeICmpsPass()
Definition: MergeICmps.cpp:655
bool empty() const
Definition: BasicBlock.h:263
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:244
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:439
iterator begin()
Definition: Function.h:634
Value * getOperand(unsigned i) const
Definition: User.h:154
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:156
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:837
static bool runOnFunction(Function &F, bool PostInlining)
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:217
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
Conditional or Unconditional Branch instruction.
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
Value * getIncomingValueForBlock(const BasicBlock *BB) const
Represent the analysis usage information of a pass.
This instruction compares its operands according to the predicate given to the constructor.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:853
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1589
R600 Clause Merge
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches, switches, etc.
Definition: BasicBlock.h:376
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:130
Provides information about what library functions are available for the current target.
iterator end() const
Definition: ArrayRef.h:138
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:585
unsigned getNumIncomingValues() const
Return the number of incoming edges.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:924
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:55
Class for arbitrary precision integers.
Definition: APInt.h:69
bool isDereferenceablePointer(const Value *V, const DataLayout &DL, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr)
Return true if this is always a dereferenceable pointer.
Definition: Loads.cpp:153
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:927
Merge contiguous icmps into a memcmp
Definition: MergeICmps.cpp:652
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:176
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:224
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink &#39;this&#39; from the containing function and delete it.
Definition: BasicBlock.cpp:97
#define I(x, y, z)
Definition: MD5.cpp:58
const MemCmpExpansionOptions * enableMemCmpExpansion(bool IsZeroCmp) const
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
Value * emitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B, const DataLayout &DL, const TargetLibraryInfo *TLI)
Emit a call to the memcmp function.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:326
LLVM Value Representation.
Definition: Value.h:73
constexpr char Size[]
Key for Kernel::Arg::Metadata::mSize.
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional &#39;br label X&#39; instruction.
Definition: IRBuilder.h:812
#define DEBUG(X)
Definition: Debug.h:118
INITIALIZE_PASS_BEGIN(MergeICmps, "mergeicmps", "Merge contiguous icmps into a memcmp", false, false) INITIALIZE_PASS_END(MergeICmps
void sort(Policy policy, RandomAccessIterator Start, RandomAccessIterator End, const Comparator &Comp=Comparator())
Definition: Parallel.h:199
This pass exposes codegen information to IR-level passes.
void initializeMergeICmpsPass(PassRegistry &)
const TerminatorInst * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:120
loop simplify
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:144
const BasicBlock * getParent() const
Definition: Instruction.h:67