LLVM 23.0.0git
ExpandMemCmp.cpp
Go to the documentation of this file.
1//===--- ExpandMemCmp.cpp - Expand memcmp() to load/stores ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass tries to expand memcmp() calls into optimally-sized loads and
10// compares for the target.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/Statistic.h"
25#include "llvm/IR/Dominators.h"
26#include "llvm/IR/IRBuilder.h"
33#include <optional>
34
35using namespace llvm;
36using namespace llvm::PatternMatch;
37
38#define DEBUG_TYPE "expand-memcmp"
39
40STATISTIC(NumMemCmpCalls, "Number of memcmp calls");
41STATISTIC(NumMemCmpNotConstant, "Number of memcmp calls without constant size");
42STATISTIC(NumMemCmpGreaterThanMax,
43 "Number of memcmp calls with size greater than max size");
44STATISTIC(NumMemCmpInlined, "Number of inlined memcmp calls");
45
47 "memcmp-num-loads-per-block", cl::Hidden, cl::init(1),
48 cl::desc("The number of loads per basic block for inline expansion of "
49 "memcmp that is only being compared against zero."));
50
52 "max-loads-per-memcmp", cl::Hidden,
53 cl::desc("Set maximum number of loads used in expanded memcmp"));
54
56 "max-loads-per-memcmp-opt-size", cl::Hidden,
57 cl::desc("Set maximum number of loads used in expanded memcmp for -Os/Oz"));
58
59namespace {
60
61
62// This class provides helper functions to expand a memcmp library call into an
63// inline expansion.
64class MemCmpExpansion {
65 struct ResultBlock {
66 BasicBlock *BB = nullptr;
67 PHINode *PhiSrc1 = nullptr;
68 PHINode *PhiSrc2 = nullptr;
69
70 ResultBlock() = default;
71 };
72
73 CallInst *const CI = nullptr;
74 ResultBlock ResBlock;
75 const uint64_t Size;
76 unsigned MaxLoadSize = 0;
77 uint64_t NumLoadsNonOneByte = 0;
78 const uint64_t NumLoadsPerBlockForZeroCmp;
79 std::vector<BasicBlock *> LoadCmpBlocks;
80 BasicBlock *EndBlock = nullptr;
81 PHINode *PhiRes = nullptr;
82 const bool IsUsedForZeroCmp;
83 const DataLayout &DL;
84 DomTreeUpdater *DTU = nullptr;
85 IRBuilder<> Builder;
86 // Represents the decomposition in blocks of the expansion. For example,
87 // comparing 33 bytes on X86+sse can be done with 2x16-byte loads and
88 // 1x1-byte load, which would be represented as [{16, 0}, {16, 16}, {1, 32}.
89 struct LoadEntry {
90 LoadEntry(unsigned LoadSize, uint64_t Offset)
91 : LoadSize(LoadSize), Offset(Offset) {
92 }
93
94 // The size of the load for this block, in bytes.
95 unsigned LoadSize;
96 // The offset of this load from the base pointer, in bytes.
97 uint64_t Offset;
98 };
99 using LoadEntryVector = SmallVector<LoadEntry, 8>;
100 LoadEntryVector LoadSequence;
101
102 void createLoadCmpBlocks();
103 void createResultBlock();
104 void setupResultBlockPHINodes();
105 void setupEndBlockPHINodes();
106 Value *getCompareLoadPairs(unsigned BlockIndex, unsigned &LoadIndex);
107 void emitLoadCompareBlock(unsigned BlockIndex);
108 void emitLoadCompareBlockMultipleLoads(unsigned BlockIndex,
109 unsigned &LoadIndex);
110 void emitLoadCompareByteBlock(unsigned BlockIndex, unsigned OffsetBytes);
111 void emitMemCmpResultBlock();
112 Value *getMemCmpExpansionZeroCase();
113 Value *getMemCmpEqZeroOneBlock();
114 Value *getMemCmpOneBlock();
115 struct LoadPair {
116 Value *Lhs = nullptr;
117 Value *Rhs = nullptr;
118 };
119 LoadPair getLoadPair(Type *LoadSizeType, Type *BSwapSizeType,
120 Type *CmpSizeType, unsigned OffsetBytes);
121
122 static LoadEntryVector
123 computeGreedyLoadSequence(uint64_t Size, llvm::ArrayRef<unsigned> LoadSizes,
124 unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte);
125 static LoadEntryVector
126 computeOverlappingLoadSequence(uint64_t Size, unsigned MaxLoadSize,
127 unsigned MaxNumLoads,
128 unsigned &NumLoadsNonOneByte);
129
130 static void optimiseLoadSequence(
131 LoadEntryVector &LoadSequence,
132 const TargetTransformInfo::MemCmpExpansionOptions &Options,
133 bool IsUsedForZeroCmp);
134
135public:
136 MemCmpExpansion(CallInst *CI, uint64_t Size,
137 const TargetTransformInfo::MemCmpExpansionOptions &Options,
138 const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout,
139 DomTreeUpdater *DTU);
140
141 unsigned getNumBlocks();
142 uint64_t getNumLoads() const { return LoadSequence.size(); }
143
144 Value *getMemCmpExpansion();
145};
146
147MemCmpExpansion::LoadEntryVector MemCmpExpansion::computeGreedyLoadSequence(
149 const unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte) {
150 NumLoadsNonOneByte = 0;
151 LoadEntryVector LoadSequence;
152 uint64_t Offset = 0;
153 while (Size && !LoadSizes.empty()) {
154 const unsigned LoadSize = LoadSizes.front();
155 const uint64_t NumLoadsForThisSize = Size / LoadSize;
156 if (LoadSequence.size() + NumLoadsForThisSize > MaxNumLoads) {
157 // Do not expand if the total number of loads is larger than what the
158 // target allows. Note that it's important that we exit before completing
159 // the expansion to avoid using a ton of memory to store the expansion for
160 // large sizes.
161 return {};
162 }
163 if (NumLoadsForThisSize > 0) {
164 for (uint64_t I = 0; I < NumLoadsForThisSize; ++I) {
165 LoadSequence.push_back({LoadSize, Offset});
166 Offset += LoadSize;
167 }
168 if (LoadSize > 1)
169 ++NumLoadsNonOneByte;
170 Size = Size % LoadSize;
171 }
172 LoadSizes = LoadSizes.drop_front();
173 }
174 return LoadSequence;
175}
176
177MemCmpExpansion::LoadEntryVector
178MemCmpExpansion::computeOverlappingLoadSequence(uint64_t Size,
179 const unsigned MaxLoadSize,
180 const unsigned MaxNumLoads,
181 unsigned &NumLoadsNonOneByte) {
182 // These are already handled by the greedy approach.
183 if (Size < 2 || MaxLoadSize < 2)
184 return {};
185
186 // We try to do as many non-overlapping loads as possible starting from the
187 // beginning.
188 const uint64_t NumNonOverlappingLoads = Size / MaxLoadSize;
189 assert(NumNonOverlappingLoads && "there must be at least one load");
190 // There remain 0 to (MaxLoadSize - 1) bytes to load, this will be done with
191 // an overlapping load.
192 Size = Size - NumNonOverlappingLoads * MaxLoadSize;
193 // Bail if we do not need an overloapping store, this is already handled by
194 // the greedy approach.
195 if (Size == 0)
196 return {};
197 // Bail if the number of loads (non-overlapping + potential overlapping one)
198 // is larger than the max allowed.
199 if ((NumNonOverlappingLoads + 1) > MaxNumLoads)
200 return {};
201
202 // Add non-overlapping loads.
203 LoadEntryVector LoadSequence;
204 uint64_t Offset = 0;
205 for (uint64_t I = 0; I < NumNonOverlappingLoads; ++I) {
206 LoadSequence.push_back({MaxLoadSize, Offset});
207 Offset += MaxLoadSize;
208 }
209
210 // Add the last overlapping load.
211 assert(Size > 0 && Size < MaxLoadSize && "broken invariant");
212 LoadSequence.push_back({MaxLoadSize, Offset - (MaxLoadSize - Size)});
213 NumLoadsNonOneByte = 1;
214 return LoadSequence;
215}
216
217void MemCmpExpansion::optimiseLoadSequence(
218 LoadEntryVector &LoadSequence,
219 const TargetTransformInfo::MemCmpExpansionOptions &Options,
220 bool IsUsedForZeroCmp) {
221 // This part of code attempts to optimize the LoadSequence by merging allowed
222 // subsequences into single loads of allowed sizes from
223 // `MemCmpExpansionOptions::AllowedTailExpansions`. If it is for zero
224 // comparison or if no allowed tail expansions are specified, we exit early.
225 if (IsUsedForZeroCmp || Options.AllowedTailExpansions.empty())
226 return;
227
228 while (LoadSequence.size() >= 2) {
229 auto Last = LoadSequence[LoadSequence.size() - 1];
230 auto PreLast = LoadSequence[LoadSequence.size() - 2];
231
232 // Exit the loop if the two sequences are not contiguous
233 if (PreLast.Offset + PreLast.LoadSize != Last.Offset)
234 break;
235
236 auto LoadSize = Last.LoadSize + PreLast.LoadSize;
237 if (find(Options.AllowedTailExpansions, LoadSize) ==
238 Options.AllowedTailExpansions.end())
239 break;
240
241 // Remove the last two sequences and replace with the combined sequence
242 LoadSequence.pop_back();
243 LoadSequence.pop_back();
244 LoadSequence.emplace_back(PreLast.Offset, LoadSize);
245 }
246}
247
248// Initialize the basic block structure required for expansion of memcmp call
249// with given maximum load size and memcmp size parameter.
250// This structure includes:
251// 1. A list of load compare blocks - LoadCmpBlocks.
252// 2. An EndBlock, split from original instruction point, which is the block to
253// return from.
254// 3. ResultBlock, block to branch to for early exit when a
255// LoadCmpBlock finds a difference.
256MemCmpExpansion::MemCmpExpansion(
257 CallInst *const CI, uint64_t Size,
258 const TargetTransformInfo::MemCmpExpansionOptions &Options,
259 const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout,
260 DomTreeUpdater *DTU)
261 : CI(CI), Size(Size), NumLoadsPerBlockForZeroCmp(Options.NumLoadsPerBlock),
262 IsUsedForZeroCmp(IsUsedForZeroCmp), DL(TheDataLayout), DTU(DTU),
263 Builder(CI) {
264 assert(Size > 0 && "zero blocks");
265 // Scale the max size down if the target can load more bytes than we need.
266 llvm::ArrayRef<unsigned> LoadSizes(Options.LoadSizes);
267 while (!LoadSizes.empty() && LoadSizes.front() > Size) {
268 LoadSizes = LoadSizes.drop_front();
269 }
270 assert(!LoadSizes.empty() && "cannot load Size bytes");
271 MaxLoadSize = LoadSizes.front();
272 // Compute the decomposition.
273 unsigned GreedyNumLoadsNonOneByte = 0;
274 LoadSequence = computeGreedyLoadSequence(Size, LoadSizes, Options.MaxNumLoads,
275 GreedyNumLoadsNonOneByte);
276 NumLoadsNonOneByte = GreedyNumLoadsNonOneByte;
277 assert(LoadSequence.size() <= Options.MaxNumLoads && "broken invariant");
278 // If we allow overlapping loads and the load sequence is not already optimal,
279 // use overlapping loads.
280 if (Options.AllowOverlappingLoads &&
281 (LoadSequence.empty() || LoadSequence.size() > 2)) {
282 unsigned OverlappingNumLoadsNonOneByte = 0;
283 auto OverlappingLoads = computeOverlappingLoadSequence(
284 Size, MaxLoadSize, Options.MaxNumLoads, OverlappingNumLoadsNonOneByte);
285 if (!OverlappingLoads.empty() &&
286 (LoadSequence.empty() ||
287 OverlappingLoads.size() < LoadSequence.size())) {
288 LoadSequence = OverlappingLoads;
289 NumLoadsNonOneByte = OverlappingNumLoadsNonOneByte;
290 }
291 }
292 assert(LoadSequence.size() <= Options.MaxNumLoads && "broken invariant");
293 optimiseLoadSequence(LoadSequence, Options, IsUsedForZeroCmp);
294}
295
296unsigned MemCmpExpansion::getNumBlocks() {
297 if (IsUsedForZeroCmp)
298 return getNumLoads() / NumLoadsPerBlockForZeroCmp +
299 (getNumLoads() % NumLoadsPerBlockForZeroCmp != 0 ? 1 : 0);
300 return getNumLoads();
301}
302
303void MemCmpExpansion::createLoadCmpBlocks() {
304 for (unsigned i = 0; i < getNumBlocks(); i++) {
305 BasicBlock *BB = BasicBlock::Create(CI->getContext(), "loadbb",
306 EndBlock->getParent(), EndBlock);
307 LoadCmpBlocks.push_back(BB);
308 }
309}
310
311void MemCmpExpansion::createResultBlock() {
312 ResBlock.BB = BasicBlock::Create(CI->getContext(), "res_block",
313 EndBlock->getParent(), EndBlock);
314}
315
316MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair(Type *LoadSizeType,
317 Type *BSwapSizeType,
318 Type *CmpSizeType,
319 unsigned OffsetBytes) {
320 // Get the memory source at offset `OffsetBytes`.
321 Value *LhsSource = CI->getArgOperand(0);
322 Value *RhsSource = CI->getArgOperand(1);
323 Align LhsAlign = LhsSource->getPointerAlignment(DL);
324 Align RhsAlign = RhsSource->getPointerAlignment(DL);
325 if (OffsetBytes > 0) {
326 auto *ByteType = Type::getInt8Ty(CI->getContext());
327 LhsSource = Builder.CreateConstGEP1_64(ByteType, LhsSource, OffsetBytes);
328 RhsSource = Builder.CreateConstGEP1_64(ByteType, RhsSource, OffsetBytes);
329 LhsAlign = commonAlignment(LhsAlign, OffsetBytes);
330 RhsAlign = commonAlignment(RhsAlign, OffsetBytes);
331 }
332
333 // Create a constant or a load from the source.
334 Value *Lhs = nullptr;
335 if (auto *C = dyn_cast<Constant>(LhsSource))
336 Lhs = ConstantFoldLoadFromConstPtr(C, LoadSizeType, DL);
337 if (!Lhs)
338 Lhs = Builder.CreateAlignedLoad(LoadSizeType, LhsSource, LhsAlign);
339
340 Value *Rhs = nullptr;
341 if (auto *C = dyn_cast<Constant>(RhsSource))
342 Rhs = ConstantFoldLoadFromConstPtr(C, LoadSizeType, DL);
343 if (!Rhs)
344 Rhs = Builder.CreateAlignedLoad(LoadSizeType, RhsSource, RhsAlign);
345
346 // Zero extend if Byte Swap intrinsic has different type
347 if (BSwapSizeType && LoadSizeType != BSwapSizeType) {
348 Lhs = Builder.CreateZExt(Lhs, BSwapSizeType);
349 Rhs = Builder.CreateZExt(Rhs, BSwapSizeType);
350 }
351
352 // Swap bytes if required.
353 if (BSwapSizeType) {
355 CI->getModule(), Intrinsic::bswap, BSwapSizeType);
356 Lhs = Builder.CreateCall(Bswap, Lhs);
357 Rhs = Builder.CreateCall(Bswap, Rhs);
358 }
359
360 // Zero extend if required.
361 if (CmpSizeType != nullptr && CmpSizeType != Lhs->getType()) {
362 Lhs = Builder.CreateZExt(Lhs, CmpSizeType);
363 Rhs = Builder.CreateZExt(Rhs, CmpSizeType);
364 }
365 return {Lhs, Rhs};
366}
367
368// This function creates the IR instructions for loading and comparing 1 byte.
369// It loads 1 byte from each source of the memcmp parameters with the given
370// GEPIndex. It then subtracts the two loaded values and adds this result to the
371// final phi node for selecting the memcmp result.
372void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex,
373 unsigned OffsetBytes) {
374 BasicBlock *BB = LoadCmpBlocks[BlockIndex];
375 Builder.SetInsertPoint(BB);
376 const LoadPair Loads =
377 getLoadPair(Type::getInt8Ty(CI->getContext()), nullptr,
378 Type::getInt32Ty(CI->getContext()), OffsetBytes);
379 Value *Diff = Builder.CreateSub(Loads.Lhs, Loads.Rhs);
380
381 PhiRes->addIncoming(Diff, BB);
382
383 if (BlockIndex < (LoadCmpBlocks.size() - 1)) {
384 // Early exit branch if difference found to EndBlock. Otherwise, continue to
385 // next LoadCmpBlock,
386 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, Diff,
387 ConstantInt::get(Diff->getType(), 0));
388 Builder.CreateCondBr(Cmp, EndBlock, LoadCmpBlocks[BlockIndex + 1]);
389 if (DTU)
390 DTU->applyUpdates(
391 {{DominatorTree::Insert, BB, EndBlock},
392 {DominatorTree::Insert, BB, LoadCmpBlocks[BlockIndex + 1]}});
393 } else {
394 // The last block has an unconditional branch to EndBlock.
395 Builder.CreateBr(EndBlock);
396 if (DTU)
397 DTU->applyUpdates({{DominatorTree::Insert, BB, EndBlock}});
398 }
399}
400
401/// Generate an equality comparison for one or more pairs of loaded values.
402/// This is used in the case where the memcmp() call is compared equal or not
403/// equal to zero.
404Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex,
405 unsigned &LoadIndex) {
406 assert(LoadIndex < getNumLoads() &&
407 "getCompareLoadPairs() called with no remaining loads");
408 std::vector<Value *> XorList, OrList;
409 Value *Diff = nullptr;
410
411 const unsigned NumLoads =
412 std::min(getNumLoads() - LoadIndex, NumLoadsPerBlockForZeroCmp);
413
414 // For a single-block expansion, start inserting before the memcmp call.
415 if (LoadCmpBlocks.empty())
416 Builder.SetInsertPoint(CI);
417 else
418 Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);
419
420 Value *Cmp = nullptr;
421 // If we have multiple loads per block, we need to generate a composite
422 // comparison using xor+or. The type for the combinations is the largest load
423 // type.
424 IntegerType *const MaxLoadType =
425 NumLoads == 1 ? nullptr
426 : IntegerType::get(CI->getContext(), MaxLoadSize * 8);
427
428 for (unsigned i = 0; i < NumLoads; ++i, ++LoadIndex) {
429 const LoadEntry &CurLoadEntry = LoadSequence[LoadIndex];
430 const LoadPair Loads = getLoadPair(
431 IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8), nullptr,
432 MaxLoadType, CurLoadEntry.Offset);
433
434 if (NumLoads != 1) {
435 // If we have multiple loads per block, we need to generate a composite
436 // comparison using xor+or.
437 Diff = Builder.CreateXor(Loads.Lhs, Loads.Rhs);
438 Diff = Builder.CreateZExt(Diff, MaxLoadType);
439 XorList.push_back(Diff);
440 } else {
441 // If there's only one load per block, we just compare the loaded values.
442 Cmp = Builder.CreateICmpNE(Loads.Lhs, Loads.Rhs);
443 }
444 }
445
446 auto pairWiseOr = [&](std::vector<Value *> &InList) -> std::vector<Value *> {
447 std::vector<Value *> OutList;
448 for (unsigned i = 0; i < InList.size() - 1; i = i + 2) {
449 Value *Or = Builder.CreateOr(InList[i], InList[i + 1]);
450 OutList.push_back(Or);
451 }
452 if (InList.size() % 2 != 0)
453 OutList.push_back(InList.back());
454 return OutList;
455 };
456
457 if (!Cmp) {
458 // Pairwise OR the XOR results.
459 OrList = pairWiseOr(XorList);
460
461 // Pairwise OR the OR results until one result left.
462 while (OrList.size() != 1) {
463 OrList = pairWiseOr(OrList);
464 }
465
466 assert(Diff && "Failed to find comparison diff");
467 Cmp = Builder.CreateICmpNE(OrList[0], ConstantInt::get(Diff->getType(), 0));
468 }
469
470 return Cmp;
471}
472
473void MemCmpExpansion::emitLoadCompareBlockMultipleLoads(unsigned BlockIndex,
474 unsigned &LoadIndex) {
475 Value *Cmp = getCompareLoadPairs(BlockIndex, LoadIndex);
476
477 BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1))
478 ? EndBlock
479 : LoadCmpBlocks[BlockIndex + 1];
480 // Early exit branch if difference found to ResultBlock. Otherwise,
481 // continue to next LoadCmpBlock or EndBlock.
482 BasicBlock *BB = Builder.GetInsertBlock();
483 CondBrInst *CmpBr = Builder.CreateCondBr(Cmp, ResBlock.BB, NextBB);
485 CI->getFunction());
486 if (DTU)
487 DTU->applyUpdates({{DominatorTree::Insert, BB, ResBlock.BB},
488 {DominatorTree::Insert, BB, NextBB}});
489
490 // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0
491 // since early exit to ResultBlock was not taken (no difference was found in
492 // any of the bytes).
493 if (BlockIndex == LoadCmpBlocks.size() - 1) {
494 Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0);
495 PhiRes->addIncoming(Zero, LoadCmpBlocks[BlockIndex]);
496 }
497}
498
499// This function creates the IR intructions for loading and comparing using the
500// given LoadSize. It loads the number of bytes specified by LoadSize from each
501// source of the memcmp parameters. It then does a subtract to see if there was
502// a difference in the loaded values. If a difference is found, it branches
503// with an early exit to the ResultBlock for calculating which source was
504// larger. Otherwise, it falls through to the either the next LoadCmpBlock or
505// the EndBlock if this is the last LoadCmpBlock. Loading 1 byte is handled with
506// a special case through emitLoadCompareByteBlock. The special handling can
507// simply subtract the loaded values and add it to the result phi node.
508void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) {
509 // There is one load per block in this case, BlockIndex == LoadIndex.
510 const LoadEntry &CurLoadEntry = LoadSequence[BlockIndex];
511
512 if (CurLoadEntry.LoadSize == 1) {
513 MemCmpExpansion::emitLoadCompareByteBlock(BlockIndex, CurLoadEntry.Offset);
514 return;
515 }
516
517 Type *LoadSizeType =
518 IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8);
519 Type *BSwapSizeType =
520 DL.isLittleEndian()
522 PowerOf2Ceil(CurLoadEntry.LoadSize * 8))
523 : nullptr;
524 Type *MaxLoadType = IntegerType::get(
525 CI->getContext(),
526 std::max(MaxLoadSize, (unsigned)PowerOf2Ceil(CurLoadEntry.LoadSize)) * 8);
527 assert(CurLoadEntry.LoadSize <= MaxLoadSize && "Unexpected load type");
528
529 Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);
530
531 const LoadPair Loads = getLoadPair(LoadSizeType, BSwapSizeType, MaxLoadType,
532 CurLoadEntry.Offset);
533
534 // Add the loaded values to the phi nodes for calculating memcmp result only
535 // if result is not used in a zero equality.
536 if (!IsUsedForZeroCmp) {
537 ResBlock.PhiSrc1->addIncoming(Loads.Lhs, LoadCmpBlocks[BlockIndex]);
538 ResBlock.PhiSrc2->addIncoming(Loads.Rhs, LoadCmpBlocks[BlockIndex]);
539 }
540
541 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Loads.Lhs, Loads.Rhs);
542 BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1))
543 ? EndBlock
544 : LoadCmpBlocks[BlockIndex + 1];
545 // Early exit branch if difference found to ResultBlock. Otherwise, continue
546 // to next LoadCmpBlock or EndBlock.
547 BasicBlock *BB = Builder.GetInsertBlock();
548 CondBrInst *CmpBr = Builder.CreateCondBr(Cmp, NextBB, ResBlock.BB);
550 CI->getFunction());
551 if (DTU)
552 DTU->applyUpdates({{DominatorTree::Insert, BB, NextBB},
553 {DominatorTree::Insert, BB, ResBlock.BB}});
554
555 // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0
556 // since early exit to ResultBlock was not taken (no difference was found in
557 // any of the bytes).
558 if (BlockIndex == LoadCmpBlocks.size() - 1) {
559 Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0);
560 PhiRes->addIncoming(Zero, LoadCmpBlocks[BlockIndex]);
561 }
562}
563
564// This function populates the ResultBlock with a sequence to calculate the
565// memcmp result. It compares the two loaded source values and returns -1 if
566// src1 < src2 and 1 if src1 > src2.
567void MemCmpExpansion::emitMemCmpResultBlock() {
568 // Special case: if memcmp result is used in a zero equality, result does not
569 // need to be calculated and can simply return 1.
570 if (IsUsedForZeroCmp) {
571 BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt();
572 Builder.SetInsertPoint(ResBlock.BB, InsertPt);
573 Value *Res = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 1);
574 PhiRes->addIncoming(Res, ResBlock.BB);
575 Builder.CreateBr(EndBlock);
576 if (DTU)
577 DTU->applyUpdates({{DominatorTree::Insert, ResBlock.BB, EndBlock}});
578 return;
579 }
580 BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt();
581 Builder.SetInsertPoint(ResBlock.BB, InsertPt);
582
583 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, ResBlock.PhiSrc1,
584 ResBlock.PhiSrc2);
585
586 Value *Res =
587 Builder.CreateSelect(Cmp, Constant::getAllOnesValue(Builder.getInt32Ty()),
588 ConstantInt::get(Builder.getInt32Ty(), 1));
590 DEBUG_TYPE, CI->getFunction());
591
592 PhiRes->addIncoming(Res, ResBlock.BB);
593 Builder.CreateBr(EndBlock);
594 if (DTU)
595 DTU->applyUpdates({{DominatorTree::Insert, ResBlock.BB, EndBlock}});
596}
597
598void MemCmpExpansion::setupResultBlockPHINodes() {
599 Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);
600 Builder.SetInsertPoint(ResBlock.BB);
601 // Note: this assumes one load per block.
602 ResBlock.PhiSrc1 =
603 Builder.CreatePHI(MaxLoadType, NumLoadsNonOneByte, "phi.src1");
604 ResBlock.PhiSrc2 =
605 Builder.CreatePHI(MaxLoadType, NumLoadsNonOneByte, "phi.src2");
606}
607
608void MemCmpExpansion::setupEndBlockPHINodes() {
609 Builder.SetInsertPoint(EndBlock, EndBlock->begin());
610 PhiRes = Builder.CreatePHI(Type::getInt32Ty(CI->getContext()), 2, "phi.res");
611}
612
613Value *MemCmpExpansion::getMemCmpExpansionZeroCase() {
614 unsigned LoadIndex = 0;
615 // This loop populates each of the LoadCmpBlocks with the IR sequence to
616 // handle multiple loads per block.
617 for (unsigned I = 0; I < getNumBlocks(); ++I) {
618 emitLoadCompareBlockMultipleLoads(I, LoadIndex);
619 }
620
621 emitMemCmpResultBlock();
622 return PhiRes;
623}
624
625/// A memcmp expansion that compares equality with 0 and only has one block of
626/// load and compare can bypass the compare, branch, and phi IR that is required
627/// in the general case.
628Value *MemCmpExpansion::getMemCmpEqZeroOneBlock() {
629 unsigned LoadIndex = 0;
630 Value *Cmp = getCompareLoadPairs(0, LoadIndex);
631 assert(LoadIndex == getNumLoads() && "some entries were not consumed");
632 return Builder.CreateZExt(Cmp, Type::getInt32Ty(CI->getContext()));
633}
634
635/// A memcmp expansion that only has one block of load and compare can bypass
636/// the compare, branch, and phi IR that is required in the general case.
637/// This function also analyses users of memcmp, and if there is only one user
638/// from which we can conclude that only 2 out of 3 memcmp outcomes really
639/// matter, then it generates more efficient code with only one comparison.
640Value *MemCmpExpansion::getMemCmpOneBlock() {
641 bool NeedsBSwap = DL.isLittleEndian() && Size != 1;
642 Type *LoadSizeType = IntegerType::get(CI->getContext(), Size * 8);
643 Type *BSwapSizeType =
644 NeedsBSwap ? IntegerType::get(CI->getContext(), PowerOf2Ceil(Size * 8))
645 : nullptr;
646 Type *MaxLoadType =
648 std::max(MaxLoadSize, (unsigned)PowerOf2Ceil(Size)) * 8);
649
650 // The i8 and i16 cases don't need compares. We zext the loaded values and
651 // subtract them to get the suitable negative, zero, or positive i32 result.
652 if (Size == 1 || Size == 2) {
653 const LoadPair Loads = getLoadPair(LoadSizeType, BSwapSizeType,
654 Builder.getInt32Ty(), /*Offset*/ 0);
655 return Builder.CreateSub(Loads.Lhs, Loads.Rhs);
656 }
657
658 const LoadPair Loads = getLoadPair(LoadSizeType, BSwapSizeType, MaxLoadType,
659 /*Offset*/ 0);
660
661 // If a user of memcmp cares only about two outcomes, for example:
662 // bool result = memcmp(a, b, NBYTES) > 0;
663 // We can generate more optimal code with a smaller number of operations
664 if (CI->hasOneUser()) {
665 auto *UI = cast<Instruction>(*CI->user_begin());
666 CmpPredicate Pred = ICmpInst::Predicate::BAD_ICMP_PREDICATE;
667 bool NeedsZExt = false;
668 // This is a special case because instead of checking if the result is less
669 // than zero:
670 // bool result = memcmp(a, b, NBYTES) < 0;
671 // Compiler is clever enough to generate the following code:
672 // bool result = memcmp(a, b, NBYTES) >> 31;
673 if (match(UI,
674 m_LShr(m_Value(),
675 m_SpecificInt(CI->getType()->getIntegerBitWidth() - 1)))) {
676 Pred = ICmpInst::ICMP_SLT;
677 NeedsZExt = true;
678 } else if (match(UI, m_SpecificICmp(ICmpInst::ICMP_SGT, m_Specific(CI),
679 m_AllOnes()))) {
680 // Adjust predicate as if it compared with 0.
681 Pred = ICmpInst::ICMP_SGE;
682 } else if (match(UI, m_SpecificICmp(ICmpInst::ICMP_SLT, m_Specific(CI),
683 m_One()))) {
684 // Adjust predicate as if it compared with 0.
685 Pred = ICmpInst::ICMP_SLE;
686 } else {
687 // In case of a successful match this call will set `Pred` variable
688 match(UI, m_ICmp(Pred, m_Specific(CI), m_Zero()));
689 }
690 // Generate new code and remove the original memcmp call and the user
691 if (ICmpInst::isSigned(Pred)) {
693 Loads.Lhs, Loads.Rhs);
694 auto *Result = NeedsZExt ? Builder.CreateZExt(Cmp, UI->getType()) : Cmp;
695 UI->replaceAllUsesWith(Result);
696 UI->eraseFromParent();
697 CI->eraseFromParent();
698 return nullptr;
699 }
700 }
701
702 // The result of memcmp is negative, zero, or positive.
703 return Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::ucmp,
704 {Loads.Lhs, Loads.Rhs});
705}
706
707// This function expands the memcmp call into an inline expansion and returns
708// the memcmp result. Returns nullptr if the memcmp is already replaced.
709Value *MemCmpExpansion::getMemCmpExpansion() {
710 // Create the basic block framework for a multi-block expansion.
711 if (getNumBlocks() != 1) {
712 BasicBlock *StartBlock = CI->getParent();
713 EndBlock = SplitBlock(StartBlock, CI, DTU, /*LI=*/nullptr,
714 /*MSSAU=*/nullptr, "endblock");
715 setupEndBlockPHINodes();
716 createResultBlock();
717
718 // If return value of memcmp is not used in a zero equality, we need to
719 // calculate which source was larger. The calculation requires the
720 // two loaded source values of each load compare block.
721 // These will be saved in the phi nodes created by setupResultBlockPHINodes.
722 if (!IsUsedForZeroCmp) setupResultBlockPHINodes();
723
724 // Create the number of required load compare basic blocks.
725 createLoadCmpBlocks();
726
727 // Update the terminator added by SplitBlock to branch to the first
728 // LoadCmpBlock.
729 StartBlock->getTerminator()->setSuccessor(0, LoadCmpBlocks[0]);
730 if (DTU)
731 DTU->applyUpdates({{DominatorTree::Insert, StartBlock, LoadCmpBlocks[0]},
732 {DominatorTree::Delete, StartBlock, EndBlock}});
733 }
734
736
737 if (IsUsedForZeroCmp)
738 return getNumBlocks() == 1 ? getMemCmpEqZeroOneBlock()
739 : getMemCmpExpansionZeroCase();
740
741 if (getNumBlocks() == 1)
742 return getMemCmpOneBlock();
743
744 for (unsigned I = 0; I < getNumBlocks(); ++I) {
745 emitLoadCompareBlock(I);
746 }
747
748 emitMemCmpResultBlock();
749 return PhiRes;
750}
751
752// This function checks to see if an expansion of memcmp can be generated.
753// It checks for constant compare size that is less than the max inline size.
754// If an expansion cannot occur, returns false to leave as a library call.
755// Otherwise, the library call is replaced with a new IR instruction sequence.
756/// We want to transform:
757/// %call = call signext i32 @memcmp(i8* %0, i8* %1, i64 15)
758/// To:
759/// loadbb:
760/// %0 = bitcast i32* %buffer2 to i8*
761/// %1 = bitcast i32* %buffer1 to i8*
762/// %2 = bitcast i8* %1 to i64*
763/// %3 = bitcast i8* %0 to i64*
764/// %4 = load i64, i64* %2
765/// %5 = load i64, i64* %3
766/// %6 = call i64 @llvm.bswap.i64(i64 %4)
767/// %7 = call i64 @llvm.bswap.i64(i64 %5)
768/// %8 = sub i64 %6, %7
769/// %9 = icmp ne i64 %8, 0
770/// br i1 %9, label %res_block, label %loadbb1
771/// res_block: ; preds = %loadbb2,
772/// %loadbb1, %loadbb
773/// %phi.src1 = phi i64 [ %6, %loadbb ], [ %22, %loadbb1 ], [ %36, %loadbb2 ]
774/// %phi.src2 = phi i64 [ %7, %loadbb ], [ %23, %loadbb1 ], [ %37, %loadbb2 ]
775/// %10 = icmp ult i64 %phi.src1, %phi.src2
776/// %11 = select i1 %10, i32 -1, i32 1
777/// br label %endblock
778/// loadbb1: ; preds = %loadbb
779/// %12 = bitcast i32* %buffer2 to i8*
780/// %13 = bitcast i32* %buffer1 to i8*
781/// %14 = bitcast i8* %13 to i32*
782/// %15 = bitcast i8* %12 to i32*
783/// %16 = getelementptr i32, i32* %14, i32 2
784/// %17 = getelementptr i32, i32* %15, i32 2
785/// %18 = load i32, i32* %16
786/// %19 = load i32, i32* %17
787/// %20 = call i32 @llvm.bswap.i32(i32 %18)
788/// %21 = call i32 @llvm.bswap.i32(i32 %19)
789/// %22 = zext i32 %20 to i64
790/// %23 = zext i32 %21 to i64
791/// %24 = sub i64 %22, %23
792/// %25 = icmp ne i64 %24, 0
793/// br i1 %25, label %res_block, label %loadbb2
794/// loadbb2: ; preds = %loadbb1
795/// %26 = bitcast i32* %buffer2 to i8*
796/// %27 = bitcast i32* %buffer1 to i8*
797/// %28 = bitcast i8* %27 to i16*
798/// %29 = bitcast i8* %26 to i16*
799/// %30 = getelementptr i16, i16* %28, i16 6
800/// %31 = getelementptr i16, i16* %29, i16 6
801/// %32 = load i16, i16* %30
802/// %33 = load i16, i16* %31
803/// %34 = call i16 @llvm.bswap.i16(i16 %32)
804/// %35 = call i16 @llvm.bswap.i16(i16 %33)
805/// %36 = zext i16 %34 to i64
806/// %37 = zext i16 %35 to i64
807/// %38 = sub i64 %36, %37
808/// %39 = icmp ne i64 %38, 0
809/// br i1 %39, label %res_block, label %loadbb3
810/// loadbb3: ; preds = %loadbb2
811/// %40 = bitcast i32* %buffer2 to i8*
812/// %41 = bitcast i32* %buffer1 to i8*
813/// %42 = getelementptr i8, i8* %41, i8 14
814/// %43 = getelementptr i8, i8* %40, i8 14
815/// %44 = load i8, i8* %42
816/// %45 = load i8, i8* %43
817/// %46 = zext i8 %44 to i32
818/// %47 = zext i8 %45 to i32
819/// %48 = sub i32 %46, %47
820/// br label %endblock
821/// endblock: ; preds = %res_block,
822/// %loadbb3
823/// %phi.res = phi i32 [ %48, %loadbb3 ], [ %11, %res_block ]
824/// ret i32 %phi.res
825static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
826 const DataLayout *DL, ProfileSummaryInfo *PSI,
827 BlockFrequencyInfo *BFI, DomTreeUpdater *DTU,
828 const bool IsBCmp) {
829 NumMemCmpCalls++;
830
831 // Early exit from expansion if -Oz.
832 if (CI->getFunction()->hasMinSize())
833 return false;
834
835 // Early exit from expansion if size is not a constant.
836 ConstantInt *SizeCast = dyn_cast<ConstantInt>(CI->getArgOperand(2));
837 if (!SizeCast) {
838 NumMemCmpNotConstant++;
839 return false;
840 }
841 const uint64_t SizeVal = SizeCast->getZExtValue();
842
843 if (SizeVal == 0) {
844 return false;
845 }
846 // TTI call to check if target would like to expand memcmp. Also, get the
847 // available load sizes.
848 const bool IsUsedForZeroCmp =
850 bool OptForSize = llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI);
851 auto Options = TTI->enableMemCmpExpansion(OptForSize,
852 IsUsedForZeroCmp);
853 if (!Options) return false;
854
855 if (MemCmpEqZeroNumLoadsPerBlock.getNumOccurrences())
857
858 if (OptForSize &&
859 MaxLoadsPerMemcmpOptSize.getNumOccurrences())
860 Options.MaxNumLoads = MaxLoadsPerMemcmpOptSize;
861
862 if (!OptForSize && MaxLoadsPerMemcmp.getNumOccurrences())
863 Options.MaxNumLoads = MaxLoadsPerMemcmp;
864
865 MemCmpExpansion Expansion(CI, SizeVal, Options, IsUsedForZeroCmp, *DL, DTU);
866
867 // Don't expand if this will require more loads than desired by the target.
868 if (Expansion.getNumLoads() == 0) {
869 NumMemCmpGreaterThanMax++;
870 return false;
871 }
872
873 NumMemCmpInlined++;
874
875 if (Value *Res = Expansion.getMemCmpExpansion()) {
876 // Replace call with result of expansion and erase call.
877 CI->replaceAllUsesWith(Res);
878 CI->eraseFromParent();
879 }
880
881 return true;
882}
883
884// Returns true if a change was made.
885static bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
886 const TargetTransformInfo *TTI, const DataLayout &DL,
887 ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI,
888 DomTreeUpdater *DTU);
889
890static PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI,
891 const TargetTransformInfo *TTI,
892 ProfileSummaryInfo *PSI,
893 BlockFrequencyInfo *BFI, DominatorTree *DT);
894
895class ExpandMemCmpLegacyPass : public FunctionPass {
896public:
897 static char ID;
898
899 ExpandMemCmpLegacyPass() : FunctionPass(ID) {}
900
901 bool runOnFunction(Function &F) override {
902 if (skipFunction(F)) return false;
903
904 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
905 if (!TPC) {
906 return false;
907 }
908
909 const TargetLibraryInfo *TLI =
910 &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
911 const TargetTransformInfo *TTI =
912 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
913 auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
914 auto *BFI = (PSI && PSI->hasProfileSummary()) ?
915 &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
916 nullptr;
917 DominatorTree *DT = nullptr;
918 if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
919 DT = &DTWP->getDomTree();
920 auto PA = runImpl(F, TLI, TTI, PSI, BFI, DT);
921 return !PA.areAllPreserved();
922 }
923
924private:
925 void getAnalysisUsage(AnalysisUsage &AU) const override {
926 AU.addRequired<TargetLibraryInfoWrapperPass>();
927 AU.addRequired<TargetTransformInfoWrapperPass>();
928 AU.addRequired<ProfileSummaryInfoWrapperPass>();
929 AU.addPreserved<DominatorTreeWrapperPass>();
931 FunctionPass::getAnalysisUsage(AU);
932 }
933};
934
935bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
936 const TargetTransformInfo *TTI, const DataLayout &DL,
937 ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI,
938 DomTreeUpdater *DTU) {
939 for (Instruction &I : BB) {
940 CallInst *CI = dyn_cast<CallInst>(&I);
941 if (!CI) {
942 continue;
943 }
944 LibFunc Func;
945 if (TLI->getLibFunc(*CI, Func) &&
946 (Func == LibFunc_memcmp || Func == LibFunc_bcmp) &&
947 expandMemCmp(CI, TTI, &DL, PSI, BFI, DTU, Func == LibFunc_bcmp)) {
948 return true;
949 }
950 }
951 return false;
952}
953
954PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI,
955 const TargetTransformInfo *TTI,
956 ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI,
957 DominatorTree *DT) {
958 std::optional<DomTreeUpdater> DTU;
959 if (DT)
960 DTU.emplace(DT, DomTreeUpdater::UpdateStrategy::Lazy);
961
962 const DataLayout& DL = F.getDataLayout();
963 bool MadeChanges = false;
964 for (auto BBIt = F.begin(); BBIt != F.end();) {
965 if (runOnBlock(*BBIt, TLI, TTI, DL, PSI, BFI, DTU ? &*DTU : nullptr)) {
966 MadeChanges = true;
967 // If changes were made, restart the function from the beginning, since
968 // the structure of the function was changed.
969 BBIt = F.begin();
970 } else {
971 ++BBIt;
972 }
973 }
974 if (MadeChanges)
975 for (BasicBlock &BB : F)
977 if (!MadeChanges)
978 return PreservedAnalyses::all();
979 PreservedAnalyses PA;
980 PA.preserve<DominatorTreeAnalysis>();
981 return PA;
982}
983
984} // namespace
985
988 const auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
989 const auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
990 auto *PSI = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F)
991 .getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
992 BlockFrequencyInfo *BFI = (PSI && PSI->hasProfileSummary())
993 ? &FAM.getResult<BlockFrequencyAnalysis>(F)
994 : nullptr;
995 auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
996
997 return runImpl(F, &TLI, &TTI, PSI, BFI, DT);
998}
999
1000char ExpandMemCmpLegacyPass::ID = 0;
1001INITIALIZE_PASS_BEGIN(ExpandMemCmpLegacyPass, DEBUG_TYPE,
1002 "Expand memcmp() to load/stores", false, false)
1008INITIALIZE_PASS_END(ExpandMemCmpLegacyPass, DEBUG_TYPE,
1009 "Expand memcmp() to load/stores", false, false)
1010
1012 return new ExpandMemCmpLegacyPass();
1013}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
DXIL Intrinsic Expansion
static bool runOnFunction(Function &F, bool PostInlining)
static bool runImpl(Function &F, const TargetLowering &TLI, const LibcallLoweringInfo &Libcalls, AssumptionCache *AC)
static cl::opt< unsigned > MaxLoadsPerMemcmpOptSize("max-loads-per-memcmp-opt-size", cl::Hidden, cl::desc("Set maximum number of loads used in expanded memcmp for -Os/Oz"))
static cl::opt< unsigned > MaxLoadsPerMemcmp("max-loads-per-memcmp", cl::Hidden, cl::desc("Set maximum number of loads used in expanded memcmp"))
static cl::opt< unsigned > MemCmpEqZeroNumLoadsPerBlock("memcmp-num-loads-per-block", cl::Hidden, cl::init(1), cl::desc("The number of loads per basic block for inline expansion of " "memcmp that is only being compared against zero."))
#define DEBUG_TYPE
static LVOptions Options
Definition LVOptions.cpp:25
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file contains the declarations for profiling metadata utility functions.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
Target-Independent Code Generator Pass Configuration Options pass.
This pass exposes codegen information to IR-level passes.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition ArrayRef.h:195
const T & front() const
front - Get the first element.
Definition ArrayRef.h:145
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:449
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Value * getArgOperand(unsigned i) const
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
Analysis pass which computes a DominatorTree.
Definition Dominators.h:278
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:316
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:711
void applyUpdates(ArrayRef< UpdateT > Updates)
Submit updates to all available trees.
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
Value * CreateConstGEP1_64(Type *Ty, Value *Ptr, uint64_t Idx0, const Twine &Name="")
Definition IRBuilder.h:2011
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition IRBuilder.h:1894
CondBrInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition IRBuilder.h:1223
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition IRBuilder.h:579
BasicBlock * GetInsertBlock() const
Definition IRBuilder.h:201
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition IRBuilder.h:247
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2335
UncondBrInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition IRBuilder.h:1217
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition IRBuilder.h:2496
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1446
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition IRBuilder.h:2077
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition IRBuilder.h:2510
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:1625
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2441
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition IRBuilder.h:1599
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
LLVM_ABI void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:354
This is an alternative analysis pass to BlockFrequencyInfoWrapperPass.
static void getLazyBFIAnalysisUsage(AnalysisUsage &AU)
Helper for client passes to set up the analysis usage on behalf of this pass.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
bool hasProfileSummary() const
Returns true if profile summary is available.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
Wrapper pass for TargetTransformInfo.
LLVM_ABI MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
LLVM_ABI unsigned getIntegerBitWidth() const
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
user_iterator user_begin()
Definition Value.h:403
LLVM_ABI bool hasOneUser() const
Return true if there is exactly one user of this value.
Definition Value.cpp:166
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:259
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition Value.cpp:967
const ParentTy * getParent() const
Definition ilist_node.h:34
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
initializer< Ty > init(const Ty &Val)
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1765
LLVM_ABI bool isOnlyUsedInZeroEqualityComparison(const Instruction *CxtI)
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, StringRef PassName, const Function *F=nullptr)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
LLVM_ABI bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
LLVM_ABI bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Scan the specified basic block and try to simplify any instructions in it and recursively delete dead...
Definition Local.cpp:723
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:385
LLVM_ABI FunctionPass * createExpandMemCmpLegacyPass()
TargetTransformInfo TTI
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
@ Or
Bitwise or logical OR of integers.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
LLVM_ABI Constant * ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty, APInt Offset, const DataLayout &DL)
Return the value that a load from C with offset Offset would produce if it is constant and determinab...
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.