LLVM 20.0.0git
ExpandMemCmp.cpp
Go to the documentation of this file.
1//===--- ExpandMemCmp.cpp - Expand memcmp() to load/stores ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass tries to expand memcmp() calls into optimally-sized loads and
10// compares for the target.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/Statistic.h"
25#include "llvm/IR/Dominators.h"
26#include "llvm/IR/IRBuilder.h"
33#include <optional>
34
35using namespace llvm;
36using namespace llvm::PatternMatch;
37
38namespace llvm {
39class TargetLowering;
40}
41
42#define DEBUG_TYPE "expand-memcmp"
43
44STATISTIC(NumMemCmpCalls, "Number of memcmp calls");
45STATISTIC(NumMemCmpNotConstant, "Number of memcmp calls without constant size");
46STATISTIC(NumMemCmpGreaterThanMax,
47 "Number of memcmp calls with size greater than max size");
48STATISTIC(NumMemCmpInlined, "Number of inlined memcmp calls");
49
51 "memcmp-num-loads-per-block", cl::Hidden, cl::init(1),
52 cl::desc("The number of loads per basic block for inline expansion of "
53 "memcmp that is only being compared against zero."));
54
56 "max-loads-per-memcmp", cl::Hidden,
57 cl::desc("Set maximum number of loads used in expanded memcmp"));
58
60 "max-loads-per-memcmp-opt-size", cl::Hidden,
61 cl::desc("Set maximum number of loads used in expanded memcmp for -Os/Oz"));
62
63namespace {
64
65
66// This class provides helper functions to expand a memcmp library call into an
67// inline expansion.
68class MemCmpExpansion {
69 struct ResultBlock {
70 BasicBlock *BB = nullptr;
71 PHINode *PhiSrc1 = nullptr;
72 PHINode *PhiSrc2 = nullptr;
73
74 ResultBlock() = default;
75 };
76
77 CallInst *const CI = nullptr;
78 ResultBlock ResBlock;
79 const uint64_t Size;
80 unsigned MaxLoadSize = 0;
81 uint64_t NumLoadsNonOneByte = 0;
82 const uint64_t NumLoadsPerBlockForZeroCmp;
83 std::vector<BasicBlock *> LoadCmpBlocks;
84 BasicBlock *EndBlock = nullptr;
85 PHINode *PhiRes = nullptr;
86 const bool IsUsedForZeroCmp;
87 const DataLayout &DL;
88 DomTreeUpdater *DTU = nullptr;
89 IRBuilder<> Builder;
90 // Represents the decomposition in blocks of the expansion. For example,
91 // comparing 33 bytes on X86+sse can be done with 2x16-byte loads and
92 // 1x1-byte load, which would be represented as [{16, 0}, {16, 16}, {1, 32}.
93 struct LoadEntry {
94 LoadEntry(unsigned LoadSize, uint64_t Offset)
95 : LoadSize(LoadSize), Offset(Offset) {
96 }
97
98 // The size of the load for this block, in bytes.
99 unsigned LoadSize;
100 // The offset of this load from the base pointer, in bytes.
102 };
103 using LoadEntryVector = SmallVector<LoadEntry, 8>;
104 LoadEntryVector LoadSequence;
105
106 void createLoadCmpBlocks();
107 void createResultBlock();
108 void setupResultBlockPHINodes();
109 void setupEndBlockPHINodes();
110 Value *getCompareLoadPairs(unsigned BlockIndex, unsigned &LoadIndex);
111 void emitLoadCompareBlock(unsigned BlockIndex);
112 void emitLoadCompareBlockMultipleLoads(unsigned BlockIndex,
113 unsigned &LoadIndex);
114 void emitLoadCompareByteBlock(unsigned BlockIndex, unsigned OffsetBytes);
115 void emitMemCmpResultBlock();
116 Value *getMemCmpExpansionZeroCase();
117 Value *getMemCmpEqZeroOneBlock();
118 Value *getMemCmpOneBlock();
119 struct LoadPair {
120 Value *Lhs = nullptr;
121 Value *Rhs = nullptr;
122 };
123 LoadPair getLoadPair(Type *LoadSizeType, Type *BSwapSizeType,
124 Type *CmpSizeType, unsigned OffsetBytes);
125
126 static LoadEntryVector
127 computeGreedyLoadSequence(uint64_t Size, llvm::ArrayRef<unsigned> LoadSizes,
128 unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte);
129 static LoadEntryVector
130 computeOverlappingLoadSequence(uint64_t Size, unsigned MaxLoadSize,
131 unsigned MaxNumLoads,
132 unsigned &NumLoadsNonOneByte);
133
134 static void optimiseLoadSequence(
135 LoadEntryVector &LoadSequence,
137 bool IsUsedForZeroCmp);
138
139public:
140 MemCmpExpansion(CallInst *CI, uint64_t Size,
142 const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout,
143 DomTreeUpdater *DTU);
144
145 unsigned getNumBlocks();
146 uint64_t getNumLoads() const { return LoadSequence.size(); }
147
148 Value *getMemCmpExpansion();
149};
150
151MemCmpExpansion::LoadEntryVector MemCmpExpansion::computeGreedyLoadSequence(
153 const unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte) {
154 NumLoadsNonOneByte = 0;
155 LoadEntryVector LoadSequence;
156 uint64_t Offset = 0;
157 while (Size && !LoadSizes.empty()) {
158 const unsigned LoadSize = LoadSizes.front();
159 const uint64_t NumLoadsForThisSize = Size / LoadSize;
160 if (LoadSequence.size() + NumLoadsForThisSize > MaxNumLoads) {
161 // Do not expand if the total number of loads is larger than what the
162 // target allows. Note that it's important that we exit before completing
163 // the expansion to avoid using a ton of memory to store the expansion for
164 // large sizes.
165 return {};
166 }
167 if (NumLoadsForThisSize > 0) {
168 for (uint64_t I = 0; I < NumLoadsForThisSize; ++I) {
169 LoadSequence.push_back({LoadSize, Offset});
170 Offset += LoadSize;
171 }
172 if (LoadSize > 1)
173 ++NumLoadsNonOneByte;
174 Size = Size % LoadSize;
175 }
176 LoadSizes = LoadSizes.drop_front();
177 }
178 return LoadSequence;
179}
180
182MemCmpExpansion::computeOverlappingLoadSequence(uint64_t Size,
183 const unsigned MaxLoadSize,
184 const unsigned MaxNumLoads,
185 unsigned &NumLoadsNonOneByte) {
186 // These are already handled by the greedy approach.
187 if (Size < 2 || MaxLoadSize < 2)
188 return {};
189
190 // We try to do as many non-overlapping loads as possible starting from the
191 // beginning.
192 const uint64_t NumNonOverlappingLoads = Size / MaxLoadSize;
193 assert(NumNonOverlappingLoads && "there must be at least one load");
194 // There remain 0 to (MaxLoadSize - 1) bytes to load, this will be done with
195 // an overlapping load.
196 Size = Size - NumNonOverlappingLoads * MaxLoadSize;
197 // Bail if we do not need an overloapping store, this is already handled by
198 // the greedy approach.
199 if (Size == 0)
200 return {};
201 // Bail if the number of loads (non-overlapping + potential overlapping one)
202 // is larger than the max allowed.
203 if ((NumNonOverlappingLoads + 1) > MaxNumLoads)
204 return {};
205
206 // Add non-overlapping loads.
207 LoadEntryVector LoadSequence;
208 uint64_t Offset = 0;
209 for (uint64_t I = 0; I < NumNonOverlappingLoads; ++I) {
210 LoadSequence.push_back({MaxLoadSize, Offset});
211 Offset += MaxLoadSize;
212 }
213
214 // Add the last overlapping load.
215 assert(Size > 0 && Size < MaxLoadSize && "broken invariant");
216 LoadSequence.push_back({MaxLoadSize, Offset - (MaxLoadSize - Size)});
217 NumLoadsNonOneByte = 1;
218 return LoadSequence;
219}
220
221void MemCmpExpansion::optimiseLoadSequence(
222 LoadEntryVector &LoadSequence,
224 bool IsUsedForZeroCmp) {
225 // This part of code attempts to optimize the LoadSequence by merging allowed
226 // subsequences into single loads of allowed sizes from
227 // `MemCmpExpansionOptions::AllowedTailExpansions`. If it is for zero
228 // comparison or if no allowed tail expansions are specified, we exit early.
229 if (IsUsedForZeroCmp || Options.AllowedTailExpansions.empty())
230 return;
231
232 while (LoadSequence.size() >= 2) {
233 auto Last = LoadSequence[LoadSequence.size() - 1];
234 auto PreLast = LoadSequence[LoadSequence.size() - 2];
235
236 // Exit the loop if the two sequences are not contiguous
237 if (PreLast.Offset + PreLast.LoadSize != Last.Offset)
238 break;
239
240 auto LoadSize = Last.LoadSize + PreLast.LoadSize;
241 if (find(Options.AllowedTailExpansions, LoadSize) ==
242 Options.AllowedTailExpansions.end())
243 break;
244
245 // Remove the last two sequences and replace with the combined sequence
246 LoadSequence.pop_back();
247 LoadSequence.pop_back();
248 LoadSequence.emplace_back(PreLast.Offset, LoadSize);
249 }
250}
251
252// Initialize the basic block structure required for expansion of memcmp call
253// with given maximum load size and memcmp size parameter.
254// This structure includes:
255// 1. A list of load compare blocks - LoadCmpBlocks.
256// 2. An EndBlock, split from original instruction point, which is the block to
257// return from.
258// 3. ResultBlock, block to branch to for early exit when a
259// LoadCmpBlock finds a difference.
260MemCmpExpansion::MemCmpExpansion(
261 CallInst *const CI, uint64_t Size,
263 const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout,
264 DomTreeUpdater *DTU)
265 : CI(CI), Size(Size), NumLoadsPerBlockForZeroCmp(Options.NumLoadsPerBlock),
266 IsUsedForZeroCmp(IsUsedForZeroCmp), DL(TheDataLayout), DTU(DTU),
267 Builder(CI) {
268 assert(Size > 0 && "zero blocks");
269 // Scale the max size down if the target can load more bytes than we need.
270 llvm::ArrayRef<unsigned> LoadSizes(Options.LoadSizes);
271 while (!LoadSizes.empty() && LoadSizes.front() > Size) {
272 LoadSizes = LoadSizes.drop_front();
273 }
274 assert(!LoadSizes.empty() && "cannot load Size bytes");
275 MaxLoadSize = LoadSizes.front();
276 // Compute the decomposition.
277 unsigned GreedyNumLoadsNonOneByte = 0;
278 LoadSequence = computeGreedyLoadSequence(Size, LoadSizes, Options.MaxNumLoads,
279 GreedyNumLoadsNonOneByte);
280 NumLoadsNonOneByte = GreedyNumLoadsNonOneByte;
281 assert(LoadSequence.size() <= Options.MaxNumLoads && "broken invariant");
282 // If we allow overlapping loads and the load sequence is not already optimal,
283 // use overlapping loads.
284 if (Options.AllowOverlappingLoads &&
285 (LoadSequence.empty() || LoadSequence.size() > 2)) {
286 unsigned OverlappingNumLoadsNonOneByte = 0;
287 auto OverlappingLoads = computeOverlappingLoadSequence(
288 Size, MaxLoadSize, Options.MaxNumLoads, OverlappingNumLoadsNonOneByte);
289 if (!OverlappingLoads.empty() &&
290 (LoadSequence.empty() ||
291 OverlappingLoads.size() < LoadSequence.size())) {
292 LoadSequence = OverlappingLoads;
293 NumLoadsNonOneByte = OverlappingNumLoadsNonOneByte;
294 }
295 }
296 assert(LoadSequence.size() <= Options.MaxNumLoads && "broken invariant");
297 optimiseLoadSequence(LoadSequence, Options, IsUsedForZeroCmp);
298}
299
300unsigned MemCmpExpansion::getNumBlocks() {
301 if (IsUsedForZeroCmp)
302 return getNumLoads() / NumLoadsPerBlockForZeroCmp +
303 (getNumLoads() % NumLoadsPerBlockForZeroCmp != 0 ? 1 : 0);
304 return getNumLoads();
305}
306
307void MemCmpExpansion::createLoadCmpBlocks() {
308 for (unsigned i = 0; i < getNumBlocks(); i++) {
309 BasicBlock *BB = BasicBlock::Create(CI->getContext(), "loadbb",
310 EndBlock->getParent(), EndBlock);
311 LoadCmpBlocks.push_back(BB);
312 }
313}
314
315void MemCmpExpansion::createResultBlock() {
316 ResBlock.BB = BasicBlock::Create(CI->getContext(), "res_block",
317 EndBlock->getParent(), EndBlock);
318}
319
320MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair(Type *LoadSizeType,
321 Type *BSwapSizeType,
322 Type *CmpSizeType,
323 unsigned OffsetBytes) {
324 // Get the memory source at offset `OffsetBytes`.
325 Value *LhsSource = CI->getArgOperand(0);
326 Value *RhsSource = CI->getArgOperand(1);
327 Align LhsAlign = LhsSource->getPointerAlignment(DL);
328 Align RhsAlign = RhsSource->getPointerAlignment(DL);
329 if (OffsetBytes > 0) {
330 auto *ByteType = Type::getInt8Ty(CI->getContext());
331 LhsSource = Builder.CreateConstGEP1_64(ByteType, LhsSource, OffsetBytes);
332 RhsSource = Builder.CreateConstGEP1_64(ByteType, RhsSource, OffsetBytes);
333 LhsAlign = commonAlignment(LhsAlign, OffsetBytes);
334 RhsAlign = commonAlignment(RhsAlign, OffsetBytes);
335 }
336
337 // Create a constant or a load from the source.
338 Value *Lhs = nullptr;
339 if (auto *C = dyn_cast<Constant>(LhsSource))
340 Lhs = ConstantFoldLoadFromConstPtr(C, LoadSizeType, DL);
341 if (!Lhs)
342 Lhs = Builder.CreateAlignedLoad(LoadSizeType, LhsSource, LhsAlign);
343
344 Value *Rhs = nullptr;
345 if (auto *C = dyn_cast<Constant>(RhsSource))
346 Rhs = ConstantFoldLoadFromConstPtr(C, LoadSizeType, DL);
347 if (!Rhs)
348 Rhs = Builder.CreateAlignedLoad(LoadSizeType, RhsSource, RhsAlign);
349
350 // Zero extend if Byte Swap intrinsic has different type
351 if (BSwapSizeType && LoadSizeType != BSwapSizeType) {
352 Lhs = Builder.CreateZExt(Lhs, BSwapSizeType);
353 Rhs = Builder.CreateZExt(Rhs, BSwapSizeType);
354 }
355
356 // Swap bytes if required.
357 if (BSwapSizeType) {
359 CI->getModule(), Intrinsic::bswap, BSwapSizeType);
360 Lhs = Builder.CreateCall(Bswap, Lhs);
361 Rhs = Builder.CreateCall(Bswap, Rhs);
362 }
363
364 // Zero extend if required.
365 if (CmpSizeType != nullptr && CmpSizeType != Lhs->getType()) {
366 Lhs = Builder.CreateZExt(Lhs, CmpSizeType);
367 Rhs = Builder.CreateZExt(Rhs, CmpSizeType);
368 }
369 return {Lhs, Rhs};
370}
371
372// This function creates the IR instructions for loading and comparing 1 byte.
373// It loads 1 byte from each source of the memcmp parameters with the given
374// GEPIndex. It then subtracts the two loaded values and adds this result to the
375// final phi node for selecting the memcmp result.
376void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex,
377 unsigned OffsetBytes) {
378 BasicBlock *BB = LoadCmpBlocks[BlockIndex];
379 Builder.SetInsertPoint(BB);
380 const LoadPair Loads =
381 getLoadPair(Type::getInt8Ty(CI->getContext()), nullptr,
382 Type::getInt32Ty(CI->getContext()), OffsetBytes);
383 Value *Diff = Builder.CreateSub(Loads.Lhs, Loads.Rhs);
384
385 PhiRes->addIncoming(Diff, BB);
386
387 if (BlockIndex < (LoadCmpBlocks.size() - 1)) {
388 // Early exit branch if difference found to EndBlock. Otherwise, continue to
389 // next LoadCmpBlock,
390 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, Diff,
391 ConstantInt::get(Diff->getType(), 0));
392 BranchInst *CmpBr =
393 BranchInst::Create(EndBlock, LoadCmpBlocks[BlockIndex + 1], Cmp);
394 Builder.Insert(CmpBr);
395 if (DTU)
396 DTU->applyUpdates(
397 {{DominatorTree::Insert, BB, EndBlock},
398 {DominatorTree::Insert, BB, LoadCmpBlocks[BlockIndex + 1]}});
399 } else {
400 // The last block has an unconditional branch to EndBlock.
401 BranchInst *CmpBr = BranchInst::Create(EndBlock);
402 Builder.Insert(CmpBr);
403 if (DTU)
404 DTU->applyUpdates({{DominatorTree::Insert, BB, EndBlock}});
405 }
406}
407
408/// Generate an equality comparison for one or more pairs of loaded values.
409/// This is used in the case where the memcmp() call is compared equal or not
410/// equal to zero.
411Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex,
412 unsigned &LoadIndex) {
413 assert(LoadIndex < getNumLoads() &&
414 "getCompareLoadPairs() called with no remaining loads");
415 std::vector<Value *> XorList, OrList;
416 Value *Diff = nullptr;
417
418 const unsigned NumLoads =
419 std::min(getNumLoads() - LoadIndex, NumLoadsPerBlockForZeroCmp);
420
421 // For a single-block expansion, start inserting before the memcmp call.
422 if (LoadCmpBlocks.empty())
423 Builder.SetInsertPoint(CI);
424 else
425 Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);
426
427 Value *Cmp = nullptr;
428 // If we have multiple loads per block, we need to generate a composite
429 // comparison using xor+or. The type for the combinations is the largest load
430 // type.
431 IntegerType *const MaxLoadType =
432 NumLoads == 1 ? nullptr
433 : IntegerType::get(CI->getContext(), MaxLoadSize * 8);
434
435 for (unsigned i = 0; i < NumLoads; ++i, ++LoadIndex) {
436 const LoadEntry &CurLoadEntry = LoadSequence[LoadIndex];
437 const LoadPair Loads = getLoadPair(
438 IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8), nullptr,
439 MaxLoadType, CurLoadEntry.Offset);
440
441 if (NumLoads != 1) {
442 // If we have multiple loads per block, we need to generate a composite
443 // comparison using xor+or.
444 Diff = Builder.CreateXor(Loads.Lhs, Loads.Rhs);
445 Diff = Builder.CreateZExt(Diff, MaxLoadType);
446 XorList.push_back(Diff);
447 } else {
448 // If there's only one load per block, we just compare the loaded values.
449 Cmp = Builder.CreateICmpNE(Loads.Lhs, Loads.Rhs);
450 }
451 }
452
453 auto pairWiseOr = [&](std::vector<Value *> &InList) -> std::vector<Value *> {
454 std::vector<Value *> OutList;
455 for (unsigned i = 0; i < InList.size() - 1; i = i + 2) {
456 Value *Or = Builder.CreateOr(InList[i], InList[i + 1]);
457 OutList.push_back(Or);
458 }
459 if (InList.size() % 2 != 0)
460 OutList.push_back(InList.back());
461 return OutList;
462 };
463
464 if (!Cmp) {
465 // Pairwise OR the XOR results.
466 OrList = pairWiseOr(XorList);
467
468 // Pairwise OR the OR results until one result left.
469 while (OrList.size() != 1) {
470 OrList = pairWiseOr(OrList);
471 }
472
473 assert(Diff && "Failed to find comparison diff");
474 Cmp = Builder.CreateICmpNE(OrList[0], ConstantInt::get(Diff->getType(), 0));
475 }
476
477 return Cmp;
478}
479
480void MemCmpExpansion::emitLoadCompareBlockMultipleLoads(unsigned BlockIndex,
481 unsigned &LoadIndex) {
482 Value *Cmp = getCompareLoadPairs(BlockIndex, LoadIndex);
483
484 BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1))
485 ? EndBlock
486 : LoadCmpBlocks[BlockIndex + 1];
487 // Early exit branch if difference found to ResultBlock. Otherwise,
488 // continue to next LoadCmpBlock or EndBlock.
489 BasicBlock *BB = Builder.GetInsertBlock();
490 BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp);
491 Builder.Insert(CmpBr);
492 if (DTU)
493 DTU->applyUpdates({{DominatorTree::Insert, BB, ResBlock.BB},
494 {DominatorTree::Insert, BB, NextBB}});
495
496 // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0
497 // since early exit to ResultBlock was not taken (no difference was found in
498 // any of the bytes).
499 if (BlockIndex == LoadCmpBlocks.size() - 1) {
500 Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0);
501 PhiRes->addIncoming(Zero, LoadCmpBlocks[BlockIndex]);
502 }
503}
504
505// This function creates the IR intructions for loading and comparing using the
506// given LoadSize. It loads the number of bytes specified by LoadSize from each
507// source of the memcmp parameters. It then does a subtract to see if there was
508// a difference in the loaded values. If a difference is found, it branches
509// with an early exit to the ResultBlock for calculating which source was
510// larger. Otherwise, it falls through to the either the next LoadCmpBlock or
511// the EndBlock if this is the last LoadCmpBlock. Loading 1 byte is handled with
512// a special case through emitLoadCompareByteBlock. The special handling can
513// simply subtract the loaded values and add it to the result phi node.
514void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) {
515 // There is one load per block in this case, BlockIndex == LoadIndex.
516 const LoadEntry &CurLoadEntry = LoadSequence[BlockIndex];
517
518 if (CurLoadEntry.LoadSize == 1) {
519 MemCmpExpansion::emitLoadCompareByteBlock(BlockIndex, CurLoadEntry.Offset);
520 return;
521 }
522
523 Type *LoadSizeType =
524 IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8);
525 Type *BSwapSizeType =
526 DL.isLittleEndian()
528 PowerOf2Ceil(CurLoadEntry.LoadSize * 8))
529 : nullptr;
530 Type *MaxLoadType = IntegerType::get(
531 CI->getContext(),
532 std::max(MaxLoadSize, (unsigned)PowerOf2Ceil(CurLoadEntry.LoadSize)) * 8);
533 assert(CurLoadEntry.LoadSize <= MaxLoadSize && "Unexpected load type");
534
535 Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);
536
537 const LoadPair Loads = getLoadPair(LoadSizeType, BSwapSizeType, MaxLoadType,
538 CurLoadEntry.Offset);
539
540 // Add the loaded values to the phi nodes for calculating memcmp result only
541 // if result is not used in a zero equality.
542 if (!IsUsedForZeroCmp) {
543 ResBlock.PhiSrc1->addIncoming(Loads.Lhs, LoadCmpBlocks[BlockIndex]);
544 ResBlock.PhiSrc2->addIncoming(Loads.Rhs, LoadCmpBlocks[BlockIndex]);
545 }
546
547 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Loads.Lhs, Loads.Rhs);
548 BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1))
549 ? EndBlock
550 : LoadCmpBlocks[BlockIndex + 1];
551 // Early exit branch if difference found to ResultBlock. Otherwise, continue
552 // to next LoadCmpBlock or EndBlock.
553 BasicBlock *BB = Builder.GetInsertBlock();
554 BranchInst *CmpBr = BranchInst::Create(NextBB, ResBlock.BB, Cmp);
555 Builder.Insert(CmpBr);
556 if (DTU)
557 DTU->applyUpdates({{DominatorTree::Insert, BB, NextBB},
558 {DominatorTree::Insert, BB, ResBlock.BB}});
559
560 // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0
561 // since early exit to ResultBlock was not taken (no difference was found in
562 // any of the bytes).
563 if (BlockIndex == LoadCmpBlocks.size() - 1) {
564 Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0);
565 PhiRes->addIncoming(Zero, LoadCmpBlocks[BlockIndex]);
566 }
567}
568
569// This function populates the ResultBlock with a sequence to calculate the
570// memcmp result. It compares the two loaded source values and returns -1 if
571// src1 < src2 and 1 if src1 > src2.
572void MemCmpExpansion::emitMemCmpResultBlock() {
573 // Special case: if memcmp result is used in a zero equality, result does not
574 // need to be calculated and can simply return 1.
575 if (IsUsedForZeroCmp) {
576 BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt();
577 Builder.SetInsertPoint(ResBlock.BB, InsertPt);
578 Value *Res = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 1);
579 PhiRes->addIncoming(Res, ResBlock.BB);
580 BranchInst *NewBr = BranchInst::Create(EndBlock);
581 Builder.Insert(NewBr);
582 if (DTU)
583 DTU->applyUpdates({{DominatorTree::Insert, ResBlock.BB, EndBlock}});
584 return;
585 }
586 BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt();
587 Builder.SetInsertPoint(ResBlock.BB, InsertPt);
588
589 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, ResBlock.PhiSrc1,
590 ResBlock.PhiSrc2);
591
592 Value *Res =
593 Builder.CreateSelect(Cmp, Constant::getAllOnesValue(Builder.getInt32Ty()),
594 ConstantInt::get(Builder.getInt32Ty(), 1));
595
596 PhiRes->addIncoming(Res, ResBlock.BB);
597 BranchInst *NewBr = BranchInst::Create(EndBlock);
598 Builder.Insert(NewBr);
599 if (DTU)
600 DTU->applyUpdates({{DominatorTree::Insert, ResBlock.BB, EndBlock}});
601}
602
603void MemCmpExpansion::setupResultBlockPHINodes() {
604 Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);
605 Builder.SetInsertPoint(ResBlock.BB);
606 // Note: this assumes one load per block.
607 ResBlock.PhiSrc1 =
608 Builder.CreatePHI(MaxLoadType, NumLoadsNonOneByte, "phi.src1");
609 ResBlock.PhiSrc2 =
610 Builder.CreatePHI(MaxLoadType, NumLoadsNonOneByte, "phi.src2");
611}
612
613void MemCmpExpansion::setupEndBlockPHINodes() {
614 Builder.SetInsertPoint(EndBlock, EndBlock->begin());
615 PhiRes = Builder.CreatePHI(Type::getInt32Ty(CI->getContext()), 2, "phi.res");
616}
617
618Value *MemCmpExpansion::getMemCmpExpansionZeroCase() {
619 unsigned LoadIndex = 0;
620 // This loop populates each of the LoadCmpBlocks with the IR sequence to
621 // handle multiple loads per block.
622 for (unsigned I = 0; I < getNumBlocks(); ++I) {
623 emitLoadCompareBlockMultipleLoads(I, LoadIndex);
624 }
625
626 emitMemCmpResultBlock();
627 return PhiRes;
628}
629
630/// A memcmp expansion that compares equality with 0 and only has one block of
631/// load and compare can bypass the compare, branch, and phi IR that is required
632/// in the general case.
633Value *MemCmpExpansion::getMemCmpEqZeroOneBlock() {
634 unsigned LoadIndex = 0;
635 Value *Cmp = getCompareLoadPairs(0, LoadIndex);
636 assert(LoadIndex == getNumLoads() && "some entries were not consumed");
637 return Builder.CreateZExt(Cmp, Type::getInt32Ty(CI->getContext()));
638}
639
640/// A memcmp expansion that only has one block of load and compare can bypass
641/// the compare, branch, and phi IR that is required in the general case.
642/// This function also analyses users of memcmp, and if there is only one user
643/// from which we can conclude that only 2 out of 3 memcmp outcomes really
644/// matter, then it generates more efficient code with only one comparison.
645Value *MemCmpExpansion::getMemCmpOneBlock() {
646 bool NeedsBSwap = DL.isLittleEndian() && Size != 1;
647 Type *LoadSizeType = IntegerType::get(CI->getContext(), Size * 8);
648 Type *BSwapSizeType =
649 NeedsBSwap ? IntegerType::get(CI->getContext(), PowerOf2Ceil(Size * 8))
650 : nullptr;
651 Type *MaxLoadType =
653 std::max(MaxLoadSize, (unsigned)PowerOf2Ceil(Size)) * 8);
654
655 // The i8 and i16 cases don't need compares. We zext the loaded values and
656 // subtract them to get the suitable negative, zero, or positive i32 result.
657 if (Size == 1 || Size == 2) {
658 const LoadPair Loads = getLoadPair(LoadSizeType, BSwapSizeType,
659 Builder.getInt32Ty(), /*Offset*/ 0);
660 return Builder.CreateSub(Loads.Lhs, Loads.Rhs);
661 }
662
663 const LoadPair Loads = getLoadPair(LoadSizeType, BSwapSizeType, MaxLoadType,
664 /*Offset*/ 0);
665
666 // If a user of memcmp cares only about two outcomes, for example:
667 // bool result = memcmp(a, b, NBYTES) > 0;
668 // We can generate more optimal code with a smaller number of operations
669 if (CI->hasOneUser()) {
670 auto *UI = cast<Instruction>(*CI->user_begin());
671 CmpPredicate Pred = ICmpInst::Predicate::BAD_ICMP_PREDICATE;
672 uint64_t Shift;
673 bool NeedsZExt = false;
674 // This is a special case because instead of checking if the result is less
675 // than zero:
676 // bool result = memcmp(a, b, NBYTES) < 0;
677 // Compiler is clever enough to generate the following code:
678 // bool result = memcmp(a, b, NBYTES) >> 31;
679 if (match(UI, m_LShr(m_Value(), m_ConstantInt(Shift))) &&
680 Shift == (CI->getType()->getIntegerBitWidth() - 1)) {
681 Pred = ICmpInst::ICMP_SLT;
682 NeedsZExt = true;
683 } else {
684 // In case of a successful match this call will set `Pred` variable
685 match(UI, m_ICmp(Pred, m_Specific(CI), m_Zero()));
686 }
687 // Generate new code and remove the original memcmp call and the user
688 if (ICmpInst::isSigned(Pred)) {
690 Loads.Lhs, Loads.Rhs);
691 auto *Result = NeedsZExt ? Builder.CreateZExt(Cmp, UI->getType()) : Cmp;
692 UI->replaceAllUsesWith(Result);
693 UI->eraseFromParent();
694 CI->eraseFromParent();
695 return nullptr;
696 }
697 }
698
699 // The result of memcmp is negative, zero, or positive, so produce that by
700 // subtracting 2 extended compare bits: sub (ugt, ult).
701 // If a target prefers to use selects to get -1/0/1, they should be able
702 // to transform this later. The inverse transform (going from selects to math)
703 // may not be possible in the DAG because the selects got converted into
704 // branches before we got there.
705 Value *CmpUGT = Builder.CreateICmpUGT(Loads.Lhs, Loads.Rhs);
706 Value *CmpULT = Builder.CreateICmpULT(Loads.Lhs, Loads.Rhs);
707 Value *ZextUGT = Builder.CreateZExt(CmpUGT, Builder.getInt32Ty());
708 Value *ZextULT = Builder.CreateZExt(CmpULT, Builder.getInt32Ty());
709 return Builder.CreateSub(ZextUGT, ZextULT);
710}
711
712// This function expands the memcmp call into an inline expansion and returns
713// the memcmp result. Returns nullptr if the memcmp is already replaced.
714Value *MemCmpExpansion::getMemCmpExpansion() {
715 // Create the basic block framework for a multi-block expansion.
716 if (getNumBlocks() != 1) {
717 BasicBlock *StartBlock = CI->getParent();
718 EndBlock = SplitBlock(StartBlock, CI, DTU, /*LI=*/nullptr,
719 /*MSSAU=*/nullptr, "endblock");
720 setupEndBlockPHINodes();
721 createResultBlock();
722
723 // If return value of memcmp is not used in a zero equality, we need to
724 // calculate which source was larger. The calculation requires the
725 // two loaded source values of each load compare block.
726 // These will be saved in the phi nodes created by setupResultBlockPHINodes.
727 if (!IsUsedForZeroCmp) setupResultBlockPHINodes();
728
729 // Create the number of required load compare basic blocks.
730 createLoadCmpBlocks();
731
732 // Update the terminator added by SplitBlock to branch to the first
733 // LoadCmpBlock.
734 StartBlock->getTerminator()->setSuccessor(0, LoadCmpBlocks[0]);
735 if (DTU)
736 DTU->applyUpdates({{DominatorTree::Insert, StartBlock, LoadCmpBlocks[0]},
737 {DominatorTree::Delete, StartBlock, EndBlock}});
738 }
739
741
742 if (IsUsedForZeroCmp)
743 return getNumBlocks() == 1 ? getMemCmpEqZeroOneBlock()
744 : getMemCmpExpansionZeroCase();
745
746 if (getNumBlocks() == 1)
747 return getMemCmpOneBlock();
748
749 for (unsigned I = 0; I < getNumBlocks(); ++I) {
750 emitLoadCompareBlock(I);
751 }
752
753 emitMemCmpResultBlock();
754 return PhiRes;
755}
756
757// This function checks to see if an expansion of memcmp can be generated.
758// It checks for constant compare size that is less than the max inline size.
759// If an expansion cannot occur, returns false to leave as a library call.
760// Otherwise, the library call is replaced with a new IR instruction sequence.
761/// We want to transform:
762/// %call = call signext i32 @memcmp(i8* %0, i8* %1, i64 15)
763/// To:
764/// loadbb:
765/// %0 = bitcast i32* %buffer2 to i8*
766/// %1 = bitcast i32* %buffer1 to i8*
767/// %2 = bitcast i8* %1 to i64*
768/// %3 = bitcast i8* %0 to i64*
769/// %4 = load i64, i64* %2
770/// %5 = load i64, i64* %3
771/// %6 = call i64 @llvm.bswap.i64(i64 %4)
772/// %7 = call i64 @llvm.bswap.i64(i64 %5)
773/// %8 = sub i64 %6, %7
774/// %9 = icmp ne i64 %8, 0
775/// br i1 %9, label %res_block, label %loadbb1
776/// res_block: ; preds = %loadbb2,
777/// %loadbb1, %loadbb
778/// %phi.src1 = phi i64 [ %6, %loadbb ], [ %22, %loadbb1 ], [ %36, %loadbb2 ]
779/// %phi.src2 = phi i64 [ %7, %loadbb ], [ %23, %loadbb1 ], [ %37, %loadbb2 ]
780/// %10 = icmp ult i64 %phi.src1, %phi.src2
781/// %11 = select i1 %10, i32 -1, i32 1
782/// br label %endblock
783/// loadbb1: ; preds = %loadbb
784/// %12 = bitcast i32* %buffer2 to i8*
785/// %13 = bitcast i32* %buffer1 to i8*
786/// %14 = bitcast i8* %13 to i32*
787/// %15 = bitcast i8* %12 to i32*
788/// %16 = getelementptr i32, i32* %14, i32 2
789/// %17 = getelementptr i32, i32* %15, i32 2
790/// %18 = load i32, i32* %16
791/// %19 = load i32, i32* %17
792/// %20 = call i32 @llvm.bswap.i32(i32 %18)
793/// %21 = call i32 @llvm.bswap.i32(i32 %19)
794/// %22 = zext i32 %20 to i64
795/// %23 = zext i32 %21 to i64
796/// %24 = sub i64 %22, %23
797/// %25 = icmp ne i64 %24, 0
798/// br i1 %25, label %res_block, label %loadbb2
799/// loadbb2: ; preds = %loadbb1
800/// %26 = bitcast i32* %buffer2 to i8*
801/// %27 = bitcast i32* %buffer1 to i8*
802/// %28 = bitcast i8* %27 to i16*
803/// %29 = bitcast i8* %26 to i16*
804/// %30 = getelementptr i16, i16* %28, i16 6
805/// %31 = getelementptr i16, i16* %29, i16 6
806/// %32 = load i16, i16* %30
807/// %33 = load i16, i16* %31
808/// %34 = call i16 @llvm.bswap.i16(i16 %32)
809/// %35 = call i16 @llvm.bswap.i16(i16 %33)
810/// %36 = zext i16 %34 to i64
811/// %37 = zext i16 %35 to i64
812/// %38 = sub i64 %36, %37
813/// %39 = icmp ne i64 %38, 0
814/// br i1 %39, label %res_block, label %loadbb3
815/// loadbb3: ; preds = %loadbb2
816/// %40 = bitcast i32* %buffer2 to i8*
817/// %41 = bitcast i32* %buffer1 to i8*
818/// %42 = getelementptr i8, i8* %41, i8 14
819/// %43 = getelementptr i8, i8* %40, i8 14
820/// %44 = load i8, i8* %42
821/// %45 = load i8, i8* %43
822/// %46 = zext i8 %44 to i32
823/// %47 = zext i8 %45 to i32
824/// %48 = sub i32 %46, %47
825/// br label %endblock
826/// endblock: ; preds = %res_block,
827/// %loadbb3
828/// %phi.res = phi i32 [ %48, %loadbb3 ], [ %11, %res_block ]
829/// ret i32 %phi.res
830static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
831 const TargetLowering *TLI, const DataLayout *DL,
833 DomTreeUpdater *DTU, const bool IsBCmp) {
834 NumMemCmpCalls++;
835
836 // Early exit from expansion if -Oz.
837 if (CI->getFunction()->hasMinSize())
838 return false;
839
840 // Early exit from expansion if size is not a constant.
841 ConstantInt *SizeCast = dyn_cast<ConstantInt>(CI->getArgOperand(2));
842 if (!SizeCast) {
843 NumMemCmpNotConstant++;
844 return false;
845 }
846 const uint64_t SizeVal = SizeCast->getZExtValue();
847
848 if (SizeVal == 0) {
849 return false;
850 }
851 // TTI call to check if target would like to expand memcmp. Also, get the
852 // available load sizes.
853 const bool IsUsedForZeroCmp =
855 bool OptForSize = llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI);
856 auto Options = TTI->enableMemCmpExpansion(OptForSize,
857 IsUsedForZeroCmp);
858 if (!Options) return false;
859
860 if (MemCmpEqZeroNumLoadsPerBlock.getNumOccurrences())
861 Options.NumLoadsPerBlock = MemCmpEqZeroNumLoadsPerBlock;
862
863 if (OptForSize &&
864 MaxLoadsPerMemcmpOptSize.getNumOccurrences())
865 Options.MaxNumLoads = MaxLoadsPerMemcmpOptSize;
866
867 if (!OptForSize && MaxLoadsPerMemcmp.getNumOccurrences())
868 Options.MaxNumLoads = MaxLoadsPerMemcmp;
869
870 MemCmpExpansion Expansion(CI, SizeVal, Options, IsUsedForZeroCmp, *DL, DTU);
871
872 // Don't expand if this will require more loads than desired by the target.
873 if (Expansion.getNumLoads() == 0) {
874 NumMemCmpGreaterThanMax++;
875 return false;
876 }
877
878 NumMemCmpInlined++;
879
880 if (Value *Res = Expansion.getMemCmpExpansion()) {
881 // Replace call with result of expansion and erase call.
882 CI->replaceAllUsesWith(Res);
883 CI->eraseFromParent();
884 }
885
886 return true;
887}
888
889// Returns true if a change was made.
890static bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
891 const TargetTransformInfo *TTI, const TargetLowering *TL,
892 const DataLayout &DL, ProfileSummaryInfo *PSI,
894
897 const TargetLowering *TL,
900
901class ExpandMemCmpLegacyPass : public FunctionPass {
902public:
903 static char ID;
904
905 ExpandMemCmpLegacyPass() : FunctionPass(ID) {
907 }
908
909 bool runOnFunction(Function &F) override {
910 if (skipFunction(F)) return false;
911
912 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
913 if (!TPC) {
914 return false;
915 }
916 const TargetLowering* TL =
917 TPC->getTM<TargetMachine>().getSubtargetImpl(F)->getTargetLowering();
918
919 const TargetLibraryInfo *TLI =
920 &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
921 const TargetTransformInfo *TTI =
922 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
923 auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
924 auto *BFI = (PSI && PSI->hasProfileSummary()) ?
925 &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
926 nullptr;
927 DominatorTree *DT = nullptr;
928 if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
929 DT = &DTWP->getDomTree();
930 auto PA = runImpl(F, TLI, TTI, TL, PSI, BFI, DT);
931 return !PA.areAllPreserved();
932 }
933
934private:
935 void getAnalysisUsage(AnalysisUsage &AU) const override {
942 }
943};
944
945bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
946 const TargetTransformInfo *TTI, const TargetLowering *TL,
947 const DataLayout &DL, ProfileSummaryInfo *PSI,
949 for (Instruction &I : BB) {
950 CallInst *CI = dyn_cast<CallInst>(&I);
951 if (!CI) {
952 continue;
953 }
955 if (TLI->getLibFunc(*CI, Func) &&
956 (Func == LibFunc_memcmp || Func == LibFunc_bcmp) &&
957 expandMemCmp(CI, TTI, TL, &DL, PSI, BFI, DTU, Func == LibFunc_bcmp)) {
958 return true;
959 }
960 }
961 return false;
962}
963
966 const TargetLowering *TL, ProfileSummaryInfo *PSI,
968 std::optional<DomTreeUpdater> DTU;
969 if (DT)
970 DTU.emplace(DT, DomTreeUpdater::UpdateStrategy::Lazy);
971
972 const DataLayout& DL = F.getDataLayout();
973 bool MadeChanges = false;
974 for (auto BBIt = F.begin(); BBIt != F.end();) {
975 if (runOnBlock(*BBIt, TLI, TTI, TL, DL, PSI, BFI, DTU ? &*DTU : nullptr)) {
976 MadeChanges = true;
977 // If changes were made, restart the function from the beginning, since
978 // the structure of the function was changed.
979 BBIt = F.begin();
980 } else {
981 ++BBIt;
982 }
983 }
984 if (MadeChanges)
985 for (BasicBlock &BB : F)
987 if (!MadeChanges)
988 return PreservedAnalyses::all();
991 return PA;
992}
993
994} // namespace
995
998 const auto *TL = TM->getSubtargetImpl(F)->getTargetLowering();
999 const auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
1000 const auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
1002 .getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
1003 BlockFrequencyInfo *BFI = (PSI && PSI->hasProfileSummary())
1005 : nullptr;
1007
1008 return runImpl(F, &TLI, &TTI, TL, PSI, BFI, DT);
1009}
1010
1011char ExpandMemCmpLegacyPass::ID = 0;
1012INITIALIZE_PASS_BEGIN(ExpandMemCmpLegacyPass, DEBUG_TYPE,
1013 "Expand memcmp() to load/stores", false, false)
1019INITIALIZE_PASS_END(ExpandMemCmpLegacyPass, DEBUG_TYPE,
1020 "Expand memcmp() to load/stores", false, false)
1021
1023 return new ExpandMemCmpLegacyPass();
1024}
AMDGPU Mark last scratch load
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
DXIL Intrinsic Expansion
uint64_t Size
static bool runImpl(Function &F, const TargetLowering &TLI)
static cl::opt< unsigned > MaxLoadsPerMemcmpOptSize("max-loads-per-memcmp-opt-size", cl::Hidden, cl::desc("Set maximum number of loads used in expanded memcmp for -Os/Oz"))
static cl::opt< unsigned > MaxLoadsPerMemcmp("max-loads-per-memcmp", cl::Hidden, cl::desc("Set maximum number of loads used in expanded memcmp"))
static cl::opt< unsigned > MemCmpEqZeroNumLoadsPerBlock("memcmp-num-loads-per-block", cl::Hidden, cl::init(1), cl::desc("The number of loads per basic block for inline expansion of " "memcmp that is only being compared against zero."))
#define DEBUG_TYPE
hexagon widen stores
static LVOptions Options
Definition: LVOptions.cpp:25
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Merge contiguous icmps into a memcmp
Definition: MergeICmps.cpp:911
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
Target-Independent Code Generator Pass Configuration Options pass.
This pass exposes codegen information to IR-level passes.
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:429
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:410
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition: ArrayRef.h:207
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:171
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:163
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:448
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:212
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1294
This class represents a function call, abstracting a target machine's calling convention.
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Definition: CmpPredicate.h:22
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:157
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:420
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:317
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
Definition: Pass.cpp:178
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:704
void applyUpdates(ArrayRef< UpdateT > Updates)
Submit updates to all available trees.
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2289
Value * CreateConstGEP1_64(Type *Ty, Value *Ptr, uint64_t Idx0, const Twine &Name="")
Definition: IRBuilder.h:1949
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition: IRBuilder.h:1830
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1048
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:523
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:171
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:217
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2277
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2429
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
Definition: IRBuilder.h:142
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1367
Value * CreateICmpUGT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2281
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2048
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2444
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1520
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:177
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1542
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2383
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2697
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:471
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:92
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:70
void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
Class to represent integer types.
Definition: DerivedTypes.h:42
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:311
This is an alternative analysis pass to BlockFrequencyInfoWrapperPass.
static void getLazyBFIAnalysisUsage(AnalysisUsage &AU)
Helper for client passes to set up the analysis usage on behalf of this pass.
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:692
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:131
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool hasProfileSummary() const
Returns true if profile summary is available.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
virtual const TargetLowering * getTargetLowering() const
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
static IntegerType * getInt8Ty(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
user_iterator user_begin()
Definition: Value.h:397
bool hasOneUser() const
Return true if there is exactly one user of this value.
Definition: Value.cpp:157
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:927
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075
const ParentTy * getParent() const
Definition: ilist_node.h:32
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:731
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:885
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:168
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:612
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1759
bool isOnlyUsedInZeroEqualityComparison(const Instruction *CxtI)
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Scan the specified basic block and try to simplify any instructions in it and recursively delete dead...
Definition: Local.cpp:737
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:394
FunctionPass * createExpandMemCmpLegacyPass()
@ Or
Bitwise or logical OR of integers.
BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
Constant * ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty, APInt Offset, const DataLayout &DL)
Return the value that a load from C with offset Offset would produce if it is constant and determinab...
void initializeExpandMemCmpLegacyPassPass(PassRegistry &)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Returns options for expansion of memcmp. IsZeroCmp is.