LLVM  15.0.0git
ExpandMemCmp.cpp
Go to the documentation of this file.
1 //===--- ExpandMemCmp.cpp - Expand memcmp() to load/stores ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass tries to expand memcmp() calls into optimally-sized loads and
10 // compares for the target.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/ADT/Statistic.h"
24 #include "llvm/IR/Dominators.h"
25 #include "llvm/IR/IRBuilder.h"
26 #include "llvm/InitializePasses.h"
31 
32 using namespace llvm;
33 
34 namespace llvm {
35 class TargetLowering;
36 }
37 
38 #define DEBUG_TYPE "expandmemcmp"
39 
40 STATISTIC(NumMemCmpCalls, "Number of memcmp calls");
41 STATISTIC(NumMemCmpNotConstant, "Number of memcmp calls without constant size");
42 STATISTIC(NumMemCmpGreaterThanMax,
43  "Number of memcmp calls with size greater than max size");
44 STATISTIC(NumMemCmpInlined, "Number of inlined memcmp calls");
45 
47  "memcmp-num-loads-per-block", cl::Hidden, cl::init(1),
48  cl::desc("The number of loads per basic block for inline expansion of "
49  "memcmp that is only being compared against zero."));
50 
52  "max-loads-per-memcmp", cl::Hidden,
53  cl::desc("Set maximum number of loads used in expanded memcmp"));
54 
56  "max-loads-per-memcmp-opt-size", cl::Hidden,
57  cl::desc("Set maximum number of loads used in expanded memcmp for -Os/Oz"));
58 
59 namespace {
60 
61 
62 // This class provides helper functions to expand a memcmp library call into an
63 // inline expansion.
64 class MemCmpExpansion {
65  struct ResultBlock {
66  BasicBlock *BB = nullptr;
67  PHINode *PhiSrc1 = nullptr;
68  PHINode *PhiSrc2 = nullptr;
69 
70  ResultBlock() = default;
71  };
72 
73  CallInst *const CI;
74  ResultBlock ResBlock;
75  const uint64_t Size;
76  unsigned MaxLoadSize = 0;
77  uint64_t NumLoadsNonOneByte = 0;
78  const uint64_t NumLoadsPerBlockForZeroCmp;
79  std::vector<BasicBlock *> LoadCmpBlocks;
80  BasicBlock *EndBlock;
81  PHINode *PhiRes;
82  const bool IsUsedForZeroCmp;
83  const DataLayout &DL;
84  DomTreeUpdater *DTU;
86  // Represents the decomposition in blocks of the expansion. For example,
87  // comparing 33 bytes on X86+sse can be done with 2x16-byte loads and
88  // 1x1-byte load, which would be represented as [{16, 0}, {16, 16}, {1, 32}.
89  struct LoadEntry {
90  LoadEntry(unsigned LoadSize, uint64_t Offset)
91  : LoadSize(LoadSize), Offset(Offset) {
92  }
93 
94  // The size of the load for this block, in bytes.
95  unsigned LoadSize;
96  // The offset of this load from the base pointer, in bytes.
98  };
99  using LoadEntryVector = SmallVector<LoadEntry, 8>;
100  LoadEntryVector LoadSequence;
101 
102  void createLoadCmpBlocks();
103  void createResultBlock();
104  void setupResultBlockPHINodes();
105  void setupEndBlockPHINodes();
106  Value *getCompareLoadPairs(unsigned BlockIndex, unsigned &LoadIndex);
107  void emitLoadCompareBlock(unsigned BlockIndex);
108  void emitLoadCompareBlockMultipleLoads(unsigned BlockIndex,
109  unsigned &LoadIndex);
110  void emitLoadCompareByteBlock(unsigned BlockIndex, unsigned OffsetBytes);
111  void emitMemCmpResultBlock();
112  Value *getMemCmpExpansionZeroCase();
113  Value *getMemCmpEqZeroOneBlock();
114  Value *getMemCmpOneBlock();
115  struct LoadPair {
116  Value *Lhs = nullptr;
117  Value *Rhs = nullptr;
118  };
119  LoadPair getLoadPair(Type *LoadSizeType, bool NeedsBSwap, Type *CmpSizeType,
120  unsigned OffsetBytes);
121 
122  static LoadEntryVector
123  computeGreedyLoadSequence(uint64_t Size, llvm::ArrayRef<unsigned> LoadSizes,
124  unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte);
125  static LoadEntryVector
126  computeOverlappingLoadSequence(uint64_t Size, unsigned MaxLoadSize,
127  unsigned MaxNumLoads,
128  unsigned &NumLoadsNonOneByte);
129 
130 public:
131  MemCmpExpansion(CallInst *CI, uint64_t Size,
133  const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout,
134  DomTreeUpdater *DTU);
135 
136  unsigned getNumBlocks();
137  uint64_t getNumLoads() const { return LoadSequence.size(); }
138 
139  Value *getMemCmpExpansion();
140 };
141 
142 MemCmpExpansion::LoadEntryVector MemCmpExpansion::computeGreedyLoadSequence(
143  uint64_t Size, llvm::ArrayRef<unsigned> LoadSizes,
144  const unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte) {
145  NumLoadsNonOneByte = 0;
146  LoadEntryVector LoadSequence;
147  uint64_t Offset = 0;
148  while (Size && !LoadSizes.empty()) {
149  const unsigned LoadSize = LoadSizes.front();
150  const uint64_t NumLoadsForThisSize = Size / LoadSize;
151  if (LoadSequence.size() + NumLoadsForThisSize > MaxNumLoads) {
152  // Do not expand if the total number of loads is larger than what the
153  // target allows. Note that it's important that we exit before completing
154  // the expansion to avoid using a ton of memory to store the expansion for
155  // large sizes.
156  return {};
157  }
158  if (NumLoadsForThisSize > 0) {
159  for (uint64_t I = 0; I < NumLoadsForThisSize; ++I) {
160  LoadSequence.push_back({LoadSize, Offset});
161  Offset += LoadSize;
162  }
163  if (LoadSize > 1)
164  ++NumLoadsNonOneByte;
165  Size = Size % LoadSize;
166  }
167  LoadSizes = LoadSizes.drop_front();
168  }
169  return LoadSequence;
170 }
171 
173 MemCmpExpansion::computeOverlappingLoadSequence(uint64_t Size,
174  const unsigned MaxLoadSize,
175  const unsigned MaxNumLoads,
176  unsigned &NumLoadsNonOneByte) {
177  // These are already handled by the greedy approach.
178  if (Size < 2 || MaxLoadSize < 2)
179  return {};
180 
181  // We try to do as many non-overlapping loads as possible starting from the
182  // beginning.
183  const uint64_t NumNonOverlappingLoads = Size / MaxLoadSize;
184  assert(NumNonOverlappingLoads && "there must be at least one load");
185  // There remain 0 to (MaxLoadSize - 1) bytes to load, this will be done with
186  // an overlapping load.
187  Size = Size - NumNonOverlappingLoads * MaxLoadSize;
188  // Bail if we do not need an overloapping store, this is already handled by
189  // the greedy approach.
190  if (Size == 0)
191  return {};
192  // Bail if the number of loads (non-overlapping + potential overlapping one)
193  // is larger than the max allowed.
194  if ((NumNonOverlappingLoads + 1) > MaxNumLoads)
195  return {};
196 
197  // Add non-overlapping loads.
198  LoadEntryVector LoadSequence;
199  uint64_t Offset = 0;
200  for (uint64_t I = 0; I < NumNonOverlappingLoads; ++I) {
201  LoadSequence.push_back({MaxLoadSize, Offset});
202  Offset += MaxLoadSize;
203  }
204 
205  // Add the last overlapping load.
206  assert(Size > 0 && Size < MaxLoadSize && "broken invariant");
207  LoadSequence.push_back({MaxLoadSize, Offset - (MaxLoadSize - Size)});
208  NumLoadsNonOneByte = 1;
209  return LoadSequence;
210 }
211 
212 // Initialize the basic block structure required for expansion of memcmp call
213 // with given maximum load size and memcmp size parameter.
214 // This structure includes:
215 // 1. A list of load compare blocks - LoadCmpBlocks.
216 // 2. An EndBlock, split from original instruction point, which is the block to
217 // return from.
218 // 3. ResultBlock, block to branch to for early exit when a
219 // LoadCmpBlock finds a difference.
220 MemCmpExpansion::MemCmpExpansion(
221  CallInst *const CI, uint64_t Size,
223  const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout,
224  DomTreeUpdater *DTU)
225  : CI(CI), Size(Size), NumLoadsPerBlockForZeroCmp(Options.NumLoadsPerBlock),
226  IsUsedForZeroCmp(IsUsedForZeroCmp), DL(TheDataLayout), DTU(DTU),
227  Builder(CI) {
228  assert(Size > 0 && "zero blocks");
229  // Scale the max size down if the target can load more bytes than we need.
230  llvm::ArrayRef<unsigned> LoadSizes(Options.LoadSizes);
231  while (!LoadSizes.empty() && LoadSizes.front() > Size) {
232  LoadSizes = LoadSizes.drop_front();
233  }
234  assert(!LoadSizes.empty() && "cannot load Size bytes");
235  MaxLoadSize = LoadSizes.front();
236  // Compute the decomposition.
237  unsigned GreedyNumLoadsNonOneByte = 0;
238  LoadSequence = computeGreedyLoadSequence(Size, LoadSizes, Options.MaxNumLoads,
239  GreedyNumLoadsNonOneByte);
240  NumLoadsNonOneByte = GreedyNumLoadsNonOneByte;
241  assert(LoadSequence.size() <= Options.MaxNumLoads && "broken invariant");
242  // If we allow overlapping loads and the load sequence is not already optimal,
243  // use overlapping loads.
244  if (Options.AllowOverlappingLoads &&
245  (LoadSequence.empty() || LoadSequence.size() > 2)) {
246  unsigned OverlappingNumLoadsNonOneByte = 0;
247  auto OverlappingLoads = computeOverlappingLoadSequence(
248  Size, MaxLoadSize, Options.MaxNumLoads, OverlappingNumLoadsNonOneByte);
249  if (!OverlappingLoads.empty() &&
250  (LoadSequence.empty() ||
251  OverlappingLoads.size() < LoadSequence.size())) {
252  LoadSequence = OverlappingLoads;
253  NumLoadsNonOneByte = OverlappingNumLoadsNonOneByte;
254  }
255  }
256  assert(LoadSequence.size() <= Options.MaxNumLoads && "broken invariant");
257 }
258 
259 unsigned MemCmpExpansion::getNumBlocks() {
260  if (IsUsedForZeroCmp)
261  return getNumLoads() / NumLoadsPerBlockForZeroCmp +
262  (getNumLoads() % NumLoadsPerBlockForZeroCmp != 0 ? 1 : 0);
263  return getNumLoads();
264 }
265 
266 void MemCmpExpansion::createLoadCmpBlocks() {
267  for (unsigned i = 0; i < getNumBlocks(); i++) {
268  BasicBlock *BB = BasicBlock::Create(CI->getContext(), "loadbb",
269  EndBlock->getParent(), EndBlock);
270  LoadCmpBlocks.push_back(BB);
271  }
272 }
273 
274 void MemCmpExpansion::createResultBlock() {
275  ResBlock.BB = BasicBlock::Create(CI->getContext(), "res_block",
276  EndBlock->getParent(), EndBlock);
277 }
278 
279 MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair(Type *LoadSizeType,
280  bool NeedsBSwap,
281  Type *CmpSizeType,
282  unsigned OffsetBytes) {
283  // Get the memory source at offset `OffsetBytes`.
284  Value *LhsSource = CI->getArgOperand(0);
285  Value *RhsSource = CI->getArgOperand(1);
286  Align LhsAlign = LhsSource->getPointerAlignment(DL);
287  Align RhsAlign = RhsSource->getPointerAlignment(DL);
288  if (OffsetBytes > 0) {
289  auto *ByteType = Type::getInt8Ty(CI->getContext());
290  LhsSource = Builder.CreateConstGEP1_64(
291  ByteType, Builder.CreateBitCast(LhsSource, ByteType->getPointerTo()),
292  OffsetBytes);
293  RhsSource = Builder.CreateConstGEP1_64(
294  ByteType, Builder.CreateBitCast(RhsSource, ByteType->getPointerTo()),
295  OffsetBytes);
296  LhsAlign = commonAlignment(LhsAlign, OffsetBytes);
297  RhsAlign = commonAlignment(RhsAlign, OffsetBytes);
298  }
299  LhsSource = Builder.CreateBitCast(LhsSource, LoadSizeType->getPointerTo());
300  RhsSource = Builder.CreateBitCast(RhsSource, LoadSizeType->getPointerTo());
301 
302  // Create a constant or a load from the source.
303  Value *Lhs = nullptr;
304  if (auto *C = dyn_cast<Constant>(LhsSource))
305  Lhs = ConstantFoldLoadFromConstPtr(C, LoadSizeType, DL);
306  if (!Lhs)
307  Lhs = Builder.CreateAlignedLoad(LoadSizeType, LhsSource, LhsAlign);
308 
309  Value *Rhs = nullptr;
310  if (auto *C = dyn_cast<Constant>(RhsSource))
311  Rhs = ConstantFoldLoadFromConstPtr(C, LoadSizeType, DL);
312  if (!Rhs)
313  Rhs = Builder.CreateAlignedLoad(LoadSizeType, RhsSource, RhsAlign);
314 
315  // Swap bytes if required.
316  if (NeedsBSwap) {
318  Intrinsic::bswap, LoadSizeType);
319  Lhs = Builder.CreateCall(Bswap, Lhs);
320  Rhs = Builder.CreateCall(Bswap, Rhs);
321  }
322 
323  // Zero extend if required.
324  if (CmpSizeType != nullptr && CmpSizeType != LoadSizeType) {
325  Lhs = Builder.CreateZExt(Lhs, CmpSizeType);
326  Rhs = Builder.CreateZExt(Rhs, CmpSizeType);
327  }
328  return {Lhs, Rhs};
329 }
330 
331 // This function creates the IR instructions for loading and comparing 1 byte.
332 // It loads 1 byte from each source of the memcmp parameters with the given
333 // GEPIndex. It then subtracts the two loaded values and adds this result to the
334 // final phi node for selecting the memcmp result.
335 void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex,
336  unsigned OffsetBytes) {
337  BasicBlock *BB = LoadCmpBlocks[BlockIndex];
338  Builder.SetInsertPoint(BB);
339  const LoadPair Loads =
340  getLoadPair(Type::getInt8Ty(CI->getContext()), /*NeedsBSwap=*/false,
341  Type::getInt32Ty(CI->getContext()), OffsetBytes);
342  Value *Diff = Builder.CreateSub(Loads.Lhs, Loads.Rhs);
343 
344  PhiRes->addIncoming(Diff, BB);
345 
346  if (BlockIndex < (LoadCmpBlocks.size() - 1)) {
347  // Early exit branch if difference found to EndBlock. Otherwise, continue to
348  // next LoadCmpBlock,
349  Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, Diff,
350  ConstantInt::get(Diff->getType(), 0));
351  BranchInst *CmpBr =
352  BranchInst::Create(EndBlock, LoadCmpBlocks[BlockIndex + 1], Cmp);
353  Builder.Insert(CmpBr);
354  if (DTU)
355  DTU->applyUpdates(
356  {{DominatorTree::Insert, BB, EndBlock},
357  {DominatorTree::Insert, BB, LoadCmpBlocks[BlockIndex + 1]}});
358  } else {
359  // The last block has an unconditional branch to EndBlock.
360  BranchInst *CmpBr = BranchInst::Create(EndBlock);
361  Builder.Insert(CmpBr);
362  if (DTU)
363  DTU->applyUpdates({{DominatorTree::Insert, BB, EndBlock}});
364  }
365 }
366 
367 /// Generate an equality comparison for one or more pairs of loaded values.
368 /// This is used in the case where the memcmp() call is compared equal or not
369 /// equal to zero.
370 Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex,
371  unsigned &LoadIndex) {
372  assert(LoadIndex < getNumLoads() &&
373  "getCompareLoadPairs() called with no remaining loads");
374  std::vector<Value *> XorList, OrList;
375  Value *Diff = nullptr;
376 
377  const unsigned NumLoads =
378  std::min(getNumLoads() - LoadIndex, NumLoadsPerBlockForZeroCmp);
379 
380  // For a single-block expansion, start inserting before the memcmp call.
381  if (LoadCmpBlocks.empty())
382  Builder.SetInsertPoint(CI);
383  else
384  Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);
385 
386  Value *Cmp = nullptr;
387  // If we have multiple loads per block, we need to generate a composite
388  // comparison using xor+or. The type for the combinations is the largest load
389  // type.
390  IntegerType *const MaxLoadType =
391  NumLoads == 1 ? nullptr
392  : IntegerType::get(CI->getContext(), MaxLoadSize * 8);
393  for (unsigned i = 0; i < NumLoads; ++i, ++LoadIndex) {
394  const LoadEntry &CurLoadEntry = LoadSequence[LoadIndex];
395  const LoadPair Loads = getLoadPair(
396  IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8),
397  /*NeedsBSwap=*/false, MaxLoadType, CurLoadEntry.Offset);
398 
399  if (NumLoads != 1) {
400  // If we have multiple loads per block, we need to generate a composite
401  // comparison using xor+or.
402  Diff = Builder.CreateXor(Loads.Lhs, Loads.Rhs);
403  Diff = Builder.CreateZExt(Diff, MaxLoadType);
404  XorList.push_back(Diff);
405  } else {
406  // If there's only one load per block, we just compare the loaded values.
407  Cmp = Builder.CreateICmpNE(Loads.Lhs, Loads.Rhs);
408  }
409  }
410 
411  auto pairWiseOr = [&](std::vector<Value *> &InList) -> std::vector<Value *> {
412  std::vector<Value *> OutList;
413  for (unsigned i = 0; i < InList.size() - 1; i = i + 2) {
414  Value *Or = Builder.CreateOr(InList[i], InList[i + 1]);
415  OutList.push_back(Or);
416  }
417  if (InList.size() % 2 != 0)
418  OutList.push_back(InList.back());
419  return OutList;
420  };
421 
422  if (!Cmp) {
423  // Pairwise OR the XOR results.
424  OrList = pairWiseOr(XorList);
425 
426  // Pairwise OR the OR results until one result left.
427  while (OrList.size() != 1) {
428  OrList = pairWiseOr(OrList);
429  }
430 
431  assert(Diff && "Failed to find comparison diff");
432  Cmp = Builder.CreateICmpNE(OrList[0], ConstantInt::get(Diff->getType(), 0));
433  }
434 
435  return Cmp;
436 }
437 
438 void MemCmpExpansion::emitLoadCompareBlockMultipleLoads(unsigned BlockIndex,
439  unsigned &LoadIndex) {
440  Value *Cmp = getCompareLoadPairs(BlockIndex, LoadIndex);
441 
442  BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1))
443  ? EndBlock
444  : LoadCmpBlocks[BlockIndex + 1];
445  // Early exit branch if difference found to ResultBlock. Otherwise,
446  // continue to next LoadCmpBlock or EndBlock.
447  BasicBlock *BB = Builder.GetInsertBlock();
448  BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp);
449  Builder.Insert(CmpBr);
450  if (DTU)
451  DTU->applyUpdates({{DominatorTree::Insert, BB, ResBlock.BB},
452  {DominatorTree::Insert, BB, NextBB}});
453 
454  // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0
455  // since early exit to ResultBlock was not taken (no difference was found in
456  // any of the bytes).
457  if (BlockIndex == LoadCmpBlocks.size() - 1) {
458  Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0);
459  PhiRes->addIncoming(Zero, LoadCmpBlocks[BlockIndex]);
460  }
461 }
462 
463 // This function creates the IR intructions for loading and comparing using the
464 // given LoadSize. It loads the number of bytes specified by LoadSize from each
465 // source of the memcmp parameters. It then does a subtract to see if there was
466 // a difference in the loaded values. If a difference is found, it branches
467 // with an early exit to the ResultBlock for calculating which source was
468 // larger. Otherwise, it falls through to the either the next LoadCmpBlock or
469 // the EndBlock if this is the last LoadCmpBlock. Loading 1 byte is handled with
470 // a special case through emitLoadCompareByteBlock. The special handling can
471 // simply subtract the loaded values and add it to the result phi node.
472 void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) {
473  // There is one load per block in this case, BlockIndex == LoadIndex.
474  const LoadEntry &CurLoadEntry = LoadSequence[BlockIndex];
475 
476  if (CurLoadEntry.LoadSize == 1) {
477  MemCmpExpansion::emitLoadCompareByteBlock(BlockIndex, CurLoadEntry.Offset);
478  return;
479  }
480 
481  Type *LoadSizeType =
482  IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8);
483  Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);
484  assert(CurLoadEntry.LoadSize <= MaxLoadSize && "Unexpected load type");
485 
486  Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);
487 
488  const LoadPair Loads =
489  getLoadPair(LoadSizeType, /*NeedsBSwap=*/DL.isLittleEndian(), MaxLoadType,
490  CurLoadEntry.Offset);
491 
492  // Add the loaded values to the phi nodes for calculating memcmp result only
493  // if result is not used in a zero equality.
494  if (!IsUsedForZeroCmp) {
495  ResBlock.PhiSrc1->addIncoming(Loads.Lhs, LoadCmpBlocks[BlockIndex]);
496  ResBlock.PhiSrc2->addIncoming(Loads.Rhs, LoadCmpBlocks[BlockIndex]);
497  }
498 
499  Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Loads.Lhs, Loads.Rhs);
500  BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1))
501  ? EndBlock
502  : LoadCmpBlocks[BlockIndex + 1];
503  // Early exit branch if difference found to ResultBlock. Otherwise, continue
504  // to next LoadCmpBlock or EndBlock.
505  BasicBlock *BB = Builder.GetInsertBlock();
506  BranchInst *CmpBr = BranchInst::Create(NextBB, ResBlock.BB, Cmp);
507  Builder.Insert(CmpBr);
508  if (DTU)
509  DTU->applyUpdates({{DominatorTree::Insert, BB, NextBB},
510  {DominatorTree::Insert, BB, ResBlock.BB}});
511 
512  // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0
513  // since early exit to ResultBlock was not taken (no difference was found in
514  // any of the bytes).
515  if (BlockIndex == LoadCmpBlocks.size() - 1) {
516  Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0);
517  PhiRes->addIncoming(Zero, LoadCmpBlocks[BlockIndex]);
518  }
519 }
520 
521 // This function populates the ResultBlock with a sequence to calculate the
522 // memcmp result. It compares the two loaded source values and returns -1 if
523 // src1 < src2 and 1 if src1 > src2.
524 void MemCmpExpansion::emitMemCmpResultBlock() {
525  // Special case: if memcmp result is used in a zero equality, result does not
526  // need to be calculated and can simply return 1.
527  if (IsUsedForZeroCmp) {
528  BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt();
529  Builder.SetInsertPoint(ResBlock.BB, InsertPt);
530  Value *Res = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 1);
531  PhiRes->addIncoming(Res, ResBlock.BB);
532  BranchInst *NewBr = BranchInst::Create(EndBlock);
533  Builder.Insert(NewBr);
534  if (DTU)
535  DTU->applyUpdates({{DominatorTree::Insert, ResBlock.BB, EndBlock}});
536  return;
537  }
538  BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt();
539  Builder.SetInsertPoint(ResBlock.BB, InsertPt);
540 
541  Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, ResBlock.PhiSrc1,
542  ResBlock.PhiSrc2);
543 
544  Value *Res =
545  Builder.CreateSelect(Cmp, ConstantInt::get(Builder.getInt32Ty(), -1),
546  ConstantInt::get(Builder.getInt32Ty(), 1));
547 
548  PhiRes->addIncoming(Res, ResBlock.BB);
549  BranchInst *NewBr = BranchInst::Create(EndBlock);
550  Builder.Insert(NewBr);
551  if (DTU)
552  DTU->applyUpdates({{DominatorTree::Insert, ResBlock.BB, EndBlock}});
553 }
554 
555 void MemCmpExpansion::setupResultBlockPHINodes() {
556  Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);
557  Builder.SetInsertPoint(ResBlock.BB);
558  // Note: this assumes one load per block.
559  ResBlock.PhiSrc1 =
560  Builder.CreatePHI(MaxLoadType, NumLoadsNonOneByte, "phi.src1");
561  ResBlock.PhiSrc2 =
562  Builder.CreatePHI(MaxLoadType, NumLoadsNonOneByte, "phi.src2");
563 }
564 
565 void MemCmpExpansion::setupEndBlockPHINodes() {
566  Builder.SetInsertPoint(&EndBlock->front());
567  PhiRes = Builder.CreatePHI(Type::getInt32Ty(CI->getContext()), 2, "phi.res");
568 }
569 
570 Value *MemCmpExpansion::getMemCmpExpansionZeroCase() {
571  unsigned LoadIndex = 0;
572  // This loop populates each of the LoadCmpBlocks with the IR sequence to
573  // handle multiple loads per block.
574  for (unsigned I = 0; I < getNumBlocks(); ++I) {
575  emitLoadCompareBlockMultipleLoads(I, LoadIndex);
576  }
577 
578  emitMemCmpResultBlock();
579  return PhiRes;
580 }
581 
582 /// A memcmp expansion that compares equality with 0 and only has one block of
583 /// load and compare can bypass the compare, branch, and phi IR that is required
584 /// in the general case.
585 Value *MemCmpExpansion::getMemCmpEqZeroOneBlock() {
586  unsigned LoadIndex = 0;
587  Value *Cmp = getCompareLoadPairs(0, LoadIndex);
588  assert(LoadIndex == getNumLoads() && "some entries were not consumed");
589  return Builder.CreateZExt(Cmp, Type::getInt32Ty(CI->getContext()));
590 }
591 
592 /// A memcmp expansion that only has one block of load and compare can bypass
593 /// the compare, branch, and phi IR that is required in the general case.
594 Value *MemCmpExpansion::getMemCmpOneBlock() {
595  Type *LoadSizeType = IntegerType::get(CI->getContext(), Size * 8);
596  bool NeedsBSwap = DL.isLittleEndian() && Size != 1;
597 
598  // The i8 and i16 cases don't need compares. We zext the loaded values and
599  // subtract them to get the suitable negative, zero, or positive i32 result.
600  if (Size < 4) {
601  const LoadPair Loads =
602  getLoadPair(LoadSizeType, NeedsBSwap, Builder.getInt32Ty(),
603  /*Offset*/ 0);
604  return Builder.CreateSub(Loads.Lhs, Loads.Rhs);
605  }
606 
607  const LoadPair Loads = getLoadPair(LoadSizeType, NeedsBSwap, LoadSizeType,
608  /*Offset*/ 0);
609  // The result of memcmp is negative, zero, or positive, so produce that by
610  // subtracting 2 extended compare bits: sub (ugt, ult).
611  // If a target prefers to use selects to get -1/0/1, they should be able
612  // to transform this later. The inverse transform (going from selects to math)
613  // may not be possible in the DAG because the selects got converted into
614  // branches before we got there.
615  Value *CmpUGT = Builder.CreateICmpUGT(Loads.Lhs, Loads.Rhs);
616  Value *CmpULT = Builder.CreateICmpULT(Loads.Lhs, Loads.Rhs);
617  Value *ZextUGT = Builder.CreateZExt(CmpUGT, Builder.getInt32Ty());
618  Value *ZextULT = Builder.CreateZExt(CmpULT, Builder.getInt32Ty());
619  return Builder.CreateSub(ZextUGT, ZextULT);
620 }
621 
622 // This function expands the memcmp call into an inline expansion and returns
623 // the memcmp result.
624 Value *MemCmpExpansion::getMemCmpExpansion() {
625  // Create the basic block framework for a multi-block expansion.
626  if (getNumBlocks() != 1) {
627  BasicBlock *StartBlock = CI->getParent();
628  EndBlock = SplitBlock(StartBlock, CI, DTU, /*LI=*/nullptr,
629  /*MSSAU=*/nullptr, "endblock");
630  setupEndBlockPHINodes();
631  createResultBlock();
632 
633  // If return value of memcmp is not used in a zero equality, we need to
634  // calculate which source was larger. The calculation requires the
635  // two loaded source values of each load compare block.
636  // These will be saved in the phi nodes created by setupResultBlockPHINodes.
637  if (!IsUsedForZeroCmp) setupResultBlockPHINodes();
638 
639  // Create the number of required load compare basic blocks.
640  createLoadCmpBlocks();
641 
642  // Update the terminator added by SplitBlock to branch to the first
643  // LoadCmpBlock.
644  StartBlock->getTerminator()->setSuccessor(0, LoadCmpBlocks[0]);
645  if (DTU)
646  DTU->applyUpdates({{DominatorTree::Insert, StartBlock, LoadCmpBlocks[0]},
647  {DominatorTree::Delete, StartBlock, EndBlock}});
648  }
649 
650  Builder.SetCurrentDebugLocation(CI->getDebugLoc());
651 
652  if (IsUsedForZeroCmp)
653  return getNumBlocks() == 1 ? getMemCmpEqZeroOneBlock()
654  : getMemCmpExpansionZeroCase();
655 
656  if (getNumBlocks() == 1)
657  return getMemCmpOneBlock();
658 
659  for (unsigned I = 0; I < getNumBlocks(); ++I) {
660  emitLoadCompareBlock(I);
661  }
662 
663  emitMemCmpResultBlock();
664  return PhiRes;
665 }
666 
667 // This function checks to see if an expansion of memcmp can be generated.
668 // It checks for constant compare size that is less than the max inline size.
669 // If an expansion cannot occur, returns false to leave as a library call.
670 // Otherwise, the library call is replaced with a new IR instruction sequence.
671 /// We want to transform:
672 /// %call = call signext i32 @memcmp(i8* %0, i8* %1, i64 15)
673 /// To:
674 /// loadbb:
675 /// %0 = bitcast i32* %buffer2 to i8*
676 /// %1 = bitcast i32* %buffer1 to i8*
677 /// %2 = bitcast i8* %1 to i64*
678 /// %3 = bitcast i8* %0 to i64*
679 /// %4 = load i64, i64* %2
680 /// %5 = load i64, i64* %3
681 /// %6 = call i64 @llvm.bswap.i64(i64 %4)
682 /// %7 = call i64 @llvm.bswap.i64(i64 %5)
683 /// %8 = sub i64 %6, %7
684 /// %9 = icmp ne i64 %8, 0
685 /// br i1 %9, label %res_block, label %loadbb1
686 /// res_block: ; preds = %loadbb2,
687 /// %loadbb1, %loadbb
688 /// %phi.src1 = phi i64 [ %6, %loadbb ], [ %22, %loadbb1 ], [ %36, %loadbb2 ]
689 /// %phi.src2 = phi i64 [ %7, %loadbb ], [ %23, %loadbb1 ], [ %37, %loadbb2 ]
690 /// %10 = icmp ult i64 %phi.src1, %phi.src2
691 /// %11 = select i1 %10, i32 -1, i32 1
692 /// br label %endblock
693 /// loadbb1: ; preds = %loadbb
694 /// %12 = bitcast i32* %buffer2 to i8*
695 /// %13 = bitcast i32* %buffer1 to i8*
696 /// %14 = bitcast i8* %13 to i32*
697 /// %15 = bitcast i8* %12 to i32*
698 /// %16 = getelementptr i32, i32* %14, i32 2
699 /// %17 = getelementptr i32, i32* %15, i32 2
700 /// %18 = load i32, i32* %16
701 /// %19 = load i32, i32* %17
702 /// %20 = call i32 @llvm.bswap.i32(i32 %18)
703 /// %21 = call i32 @llvm.bswap.i32(i32 %19)
704 /// %22 = zext i32 %20 to i64
705 /// %23 = zext i32 %21 to i64
706 /// %24 = sub i64 %22, %23
707 /// %25 = icmp ne i64 %24, 0
708 /// br i1 %25, label %res_block, label %loadbb2
709 /// loadbb2: ; preds = %loadbb1
710 /// %26 = bitcast i32* %buffer2 to i8*
711 /// %27 = bitcast i32* %buffer1 to i8*
712 /// %28 = bitcast i8* %27 to i16*
713 /// %29 = bitcast i8* %26 to i16*
714 /// %30 = getelementptr i16, i16* %28, i16 6
715 /// %31 = getelementptr i16, i16* %29, i16 6
716 /// %32 = load i16, i16* %30
717 /// %33 = load i16, i16* %31
718 /// %34 = call i16 @llvm.bswap.i16(i16 %32)
719 /// %35 = call i16 @llvm.bswap.i16(i16 %33)
720 /// %36 = zext i16 %34 to i64
721 /// %37 = zext i16 %35 to i64
722 /// %38 = sub i64 %36, %37
723 /// %39 = icmp ne i64 %38, 0
724 /// br i1 %39, label %res_block, label %loadbb3
725 /// loadbb3: ; preds = %loadbb2
726 /// %40 = bitcast i32* %buffer2 to i8*
727 /// %41 = bitcast i32* %buffer1 to i8*
728 /// %42 = getelementptr i8, i8* %41, i8 14
729 /// %43 = getelementptr i8, i8* %40, i8 14
730 /// %44 = load i8, i8* %42
731 /// %45 = load i8, i8* %43
732 /// %46 = zext i8 %44 to i32
733 /// %47 = zext i8 %45 to i32
734 /// %48 = sub i32 %46, %47
735 /// br label %endblock
736 /// endblock: ; preds = %res_block,
737 /// %loadbb3
738 /// %phi.res = phi i32 [ %48, %loadbb3 ], [ %11, %res_block ]
739 /// ret i32 %phi.res
740 static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
741  const TargetLowering *TLI, const DataLayout *DL,
743  DomTreeUpdater *DTU, const bool IsBCmp) {
744  NumMemCmpCalls++;
745 
746  // Early exit from expansion if -Oz.
747  if (CI->getFunction()->hasMinSize())
748  return false;
749 
750  // Early exit from expansion if size is not a constant.
751  ConstantInt *SizeCast = dyn_cast<ConstantInt>(CI->getArgOperand(2));
752  if (!SizeCast) {
753  NumMemCmpNotConstant++;
754  return false;
755  }
756  const uint64_t SizeVal = SizeCast->getZExtValue();
757 
758  if (SizeVal == 0) {
759  return false;
760  }
761  // TTI call to check if target would like to expand memcmp. Also, get the
762  // available load sizes.
763  const bool IsUsedForZeroCmp =
765  bool OptForSize = CI->getFunction()->hasOptSize() ||
767  auto Options = TTI->enableMemCmpExpansion(OptForSize,
768  IsUsedForZeroCmp);
769  if (!Options) return false;
770 
771  if (MemCmpEqZeroNumLoadsPerBlock.getNumOccurrences())
772  Options.NumLoadsPerBlock = MemCmpEqZeroNumLoadsPerBlock;
773 
774  if (OptForSize &&
775  MaxLoadsPerMemcmpOptSize.getNumOccurrences())
776  Options.MaxNumLoads = MaxLoadsPerMemcmpOptSize;
777 
778  if (!OptForSize && MaxLoadsPerMemcmp.getNumOccurrences())
779  Options.MaxNumLoads = MaxLoadsPerMemcmp;
780 
781  MemCmpExpansion Expansion(CI, SizeVal, Options, IsUsedForZeroCmp, *DL, DTU);
782 
783  // Don't expand if this will require more loads than desired by the target.
784  if (Expansion.getNumLoads() == 0) {
785  NumMemCmpGreaterThanMax++;
786  return false;
787  }
788 
789  NumMemCmpInlined++;
790 
791  Value *Res = Expansion.getMemCmpExpansion();
792 
793  // Replace call with result of expansion and erase call.
794  CI->replaceAllUsesWith(Res);
795  CI->eraseFromParent();
796 
797  return true;
798 }
799 
800 class ExpandMemCmpPass : public FunctionPass {
801 public:
802  static char ID;
803 
804  ExpandMemCmpPass() : FunctionPass(ID) {
805  initializeExpandMemCmpPassPass(*PassRegistry::getPassRegistry());
806  }
807 
808  bool runOnFunction(Function &F) override {
809  if (skipFunction(F)) return false;
810 
811  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
812  if (!TPC) {
813  return false;
814  }
815  const TargetLowering* TL =
816  TPC->getTM<TargetMachine>().getSubtargetImpl(F)->getTargetLowering();
817 
818  const TargetLibraryInfo *TLI =
819  &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
820  const TargetTransformInfo *TTI =
821  &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
822  auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
823  auto *BFI = (PSI && PSI->hasProfileSummary()) ?
824  &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
825  nullptr;
826  DominatorTree *DT = nullptr;
827  if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
828  DT = &DTWP->getDomTree();
829  auto PA = runImpl(F, TLI, TTI, TL, PSI, BFI, DT);
830  return !PA.areAllPreserved();
831  }
832 
833 private:
834  void getAnalysisUsage(AnalysisUsage &AU) const override {
841  }
842 
844  const TargetTransformInfo *TTI,
845  const TargetLowering *TL, ProfileSummaryInfo *PSI,
847  // Returns true if a change was made.
848  bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
849  const TargetTransformInfo *TTI, const TargetLowering *TL,
850  const DataLayout &DL, ProfileSummaryInfo *PSI,
852 };
853 
854 bool ExpandMemCmpPass::runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
855  const TargetTransformInfo *TTI,
856  const TargetLowering *TL,
857  const DataLayout &DL, ProfileSummaryInfo *PSI,
859  DomTreeUpdater *DTU) {
860  for (Instruction& I : BB) {
861  CallInst *CI = dyn_cast<CallInst>(&I);
862  if (!CI) {
863  continue;
864  }
865  LibFunc Func;
866  if (TLI->getLibFunc(*CI, Func) &&
867  (Func == LibFunc_memcmp || Func == LibFunc_bcmp) &&
868  expandMemCmp(CI, TTI, TL, &DL, PSI, BFI, DTU, Func == LibFunc_bcmp)) {
869  return true;
870  }
871  }
872  return false;
873 }
874 
877  const TargetTransformInfo *TTI,
878  const TargetLowering *TL, ProfileSummaryInfo *PSI,
881  if (DT)
882  DTU.emplace(DT, DomTreeUpdater::UpdateStrategy::Lazy);
883 
884  const DataLayout& DL = F.getParent()->getDataLayout();
885  bool MadeChanges = false;
886  for (auto BBIt = F.begin(); BBIt != F.end();) {
887  if (runOnBlock(*BBIt, TLI, TTI, TL, DL, PSI, BFI,
888  DTU.hasValue() ? DTU.getPointer() : nullptr)) {
889  MadeChanges = true;
890  // If changes were made, restart the function from the beginning, since
891  // the structure of the function was changed.
892  BBIt = F.begin();
893  } else {
894  ++BBIt;
895  }
896  }
897  if (MadeChanges)
898  for (BasicBlock &BB : F)
900  if (!MadeChanges)
901  return PreservedAnalyses::all();
904  return PA;
905 }
906 
907 } // namespace
908 
909 char ExpandMemCmpPass::ID = 0;
910 INITIALIZE_PASS_BEGIN(ExpandMemCmpPass, "expandmemcmp",
911  "Expand memcmp() to load/stores", false, false)
917 INITIALIZE_PASS_END(ExpandMemCmpPass, "expandmemcmp",
918  "Expand memcmp() to load/stores", false, false)
919 
921  return new ExpandMemCmpPass();
922 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:76
i
i
Definition: README.txt:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:65
llvm::Value::getPointerAlignment
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:915
llvm::RecurKind::Or
@ Or
Bitwise or logical OR of integers.
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1410
Insert
Vector Rotate Left Mask Mask Insert
Definition: README_P9.txt:112
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
llvm::DomTreeUpdater::applyUpdates
void applyUpdates(ArrayRef< DominatorTree::UpdateType > Updates)
Submit updates to all available trees.
Definition: DomTreeUpdater.cpp:231
expandmemcmp
expandmemcmp
Definition: ExpandMemCmp.cpp:917
MaxLoadsPerMemcmpOptSize
static cl::opt< unsigned > MaxLoadsPerMemcmpOptSize("max-loads-per-memcmp-opt-size", cl::Hidden, cl::desc("Set maximum number of loads used in expanded memcmp for -Os/Oz"))
llvm::Function
Definition: Function.h:60
llvm::ProfileSummaryInfo::hasProfileSummary
bool hasProfileSummary() const
Returns true if profile summary is available.
Definition: ProfileSummaryInfo.h:68
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
Statistic.h
llvm::FunctionPass::skipFunction
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
Definition: Pass.cpp:173
SizeOpts.h
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:167
LazyBlockFrequencyInfo.h
llvm::IRBuilder<>
DomTreeUpdater.h
ValueTracking.h
Local.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
stores
hexagon widen stores
Definition: HexagonStoreWidening.cpp:118
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
llvm::FunctionPass::runOnFunction
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
to
Should compile to
Definition: README.txt:449
llvm::LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage
static void getLazyBFIAnalysisUsage(AnalysisUsage &AU)
Helper for client passes to set up the analysis usage on behalf of this pass.
Definition: LazyBlockFrequencyInfo.cpp:62
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::Optional
Definition: APInt.h:33
ConstantFolding.h
llvm::ArrayRef::empty
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:159
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::createExpandMemCmpPass
FunctionPass * createExpandMemCmpPass()
Definition: ExpandMemCmp.cpp:920
llvm::LazyBlockFrequencyInfoPass
This is an alternative analysis pass to BlockFrequencyInfoWrapperPass.
Definition: LazyBlockFrequencyInfo.h:98
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::Optional::getPointer
constexpr const T * getPointer() const
Definition: Optional.h:277
llvm::Optional::hasValue
constexpr bool hasValue() const
Definition: Optional.h:283
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
memcmp
Expand memcmp() to load/stores"
llvm::BlockFrequencyInfo
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Definition: BlockFrequencyInfo.h:37
llvm::shouldOptimizeForSize
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
Definition: MachineSizeOpts.cpp:183
llvm::SimplifyInstructionsInBlock
bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Scan the specified basic block and try to simplify any instructions in it and recursively delete dead...
Definition: Local.cpp:707
TargetMachine.h
MaxLoadsPerMemcmp
static cl::opt< unsigned > MaxLoadsPerMemcmp("max-loads-per-memcmp", cl::Hidden, cl::desc("Set maximum number of loads used in expanded memcmp"))
llvm::LibFunc
LibFunc
Definition: TargetLibraryInfo.h:35
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::TargetLowering
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Definition: TargetLowering.h:3394
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
runImpl
static bool runImpl(const TargetLibraryInfo &TLI, Function &F)
Definition: ReplaceWithVeclib.cpp:176
TargetLibraryInfo.h
false
Definition: StackSlotColoring.cpp:141
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
llvm::TargetLibraryInfo::getLibFunc
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
Definition: TargetLibraryInfo.h:294
llvm::Instruction
Definition: Instruction.h:42
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:302
Options
const char LLVMTargetMachineRef LLVMPassBuilderOptionsRef Options
Definition: PassBuilderBindings.cpp:48
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::DomTreeUpdater
Definition: DomTreeUpdater.h:28
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(ExpandMemCmpPass, "expandmemcmp", "Expand memcmp() to load/stores", false, false) INITIALIZE_PASS_END(ExpandMemCmpPass
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition: ProfileSummaryInfo.h:39
llvm::isOnlyUsedInZeroEqualityComparison
bool isOnlyUsedInZeroEqualityComparison(const Instruction *CxtI)
Definition: ValueTracking.cpp:317
llvm::cl::opt
Definition: CommandLine.h:1392
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:77
llvm::ArrayRef::drop_front
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition: ArrayRef.h:203
llvm::AMDGPU::Hwreg::Offset
Offset
Definition: SIDefines.h:409
llvm::TargetLibraryInfoWrapperPass
Definition: TargetLibraryInfo.h:468
uint64_t
ProfileSummaryInfo.h
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2517
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::PreservedAnalyses::preserve
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:173
llvm::initializeExpandMemCmpPassPass
void initializeExpandMemCmpPassPass(PassRegistry &)
llvm::LegalityPredicates::all
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
Definition: LegalizerInfo.h:228
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2814
llvm::Optional::emplace
void emplace(ArgTypes &&... Args)
Create a new object by constructing it in place with the given arguments.
Definition: Optional.h:261
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
llvm::ProfileSummaryInfoWrapperPass
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Definition: ProfileSummaryInfo.h:193
TargetPassConfig.h
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
load
LLVM currently emits rax rax movq rax rax ret It could narrow the loads and stores to emit rax rax movq rax rax ret The trouble is that there is a TokenFactor between the store and the load
Definition: README.txt:1531
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::TargetTransformInfo::MemCmpExpansionOptions
Returns options for expansion of memcmp. IsZeroCmp is.
Definition: TargetTransformInfo.h:773
llvm::TargetTransformInfo::enableMemCmpExpansion
MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
Definition: TargetTransformInfo.cpp:519
llvm::ArrayRef< unsigned >
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::Instruction::setSuccessor
void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
Definition: Instruction.cpp:801
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::Instruction::getFunction
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:69
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:529
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:991
TargetSubtargetInfo.h
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::AMDGPUISD::BFI
@ BFI
Definition: AMDGPUISelLowering.h:429
llvm::ifs::IFSSymbolType::Func
@ Func
llvm::ArrayRef::front
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:167
llvm::Function::hasOptSize
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:663
llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:142
llvm::BasicBlock::front
const Instruction & front() const
Definition: BasicBlock.h:309
llvm::ConstantFoldLoadFromConstPtr
Constant * ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty, APInt Offset, const DataLayout &DL)
Return the value that a load from C with offset Offset would produce if it is constant and determinab...
Definition: ConstantFolding.cpp:697
llvm::commonAlignment
Align commonAlignment(Align A, Align B)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:211
get
Should compile to something r4 addze r3 instead we get
Definition: README.txt:24
llvm::X86::FirstMacroFusionInstKind::Cmp
@ Cmp
llvm::Type::getPointerTo
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition: Type.cpp:774
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:222
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:267
llvm::Instruction::getDebugLoc
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:367
Dominators.h
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1341
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
TargetTransformInfo.h
llvm::PHINode
Definition: Instructions.h:2664
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:119
llvm::Function::hasMinSize
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:660
llvm::Pass::getAnalysisUsage
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:97
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
MemCmpEqZeroNumLoadsPerBlock
static cl::opt< unsigned > MemCmpEqZeroNumLoadsPerBlock("memcmp-num-loads-per-block", cl::Hidden, cl::init(1), cl::desc("The number of loads per basic block for inline expansion of " "memcmp that is only being compared against zero."))
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1474
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::cl::desc
Definition: CommandLine.h:405
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3099
BasicBlockUtils.h
llvm::SplitBlock
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
Definition: BasicBlockUtils.cpp:837
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37