42#define DEBUG_TYPE "expand-memcmp"
45STATISTIC(NumMemCmpNotConstant,
"Number of memcmp calls without constant size");
47 "Number of memcmp calls with size greater than max size");
48STATISTIC(NumMemCmpInlined,
"Number of inlined memcmp calls");
52 cl::desc(
"The number of loads per basic block for inline expansion of "
53 "memcmp that is only being compared against zero."));
57 cl::desc(
"Set maximum number of loads used in expanded memcmp"));
61 cl::desc(
"Set maximum number of loads used in expanded memcmp for -Os/Oz"));
68class MemCmpExpansion {
74 ResultBlock() =
default;
80 unsigned MaxLoadSize = 0;
82 const uint64_t NumLoadsPerBlockForZeroCmp;
83 std::vector<BasicBlock *> LoadCmpBlocks;
86 const bool IsUsedForZeroCmp;
104 LoadEntryVector LoadSequence;
106 void createLoadCmpBlocks();
107 void createResultBlock();
108 void setupResultBlockPHINodes();
109 void setupEndBlockPHINodes();
110 Value *getCompareLoadPairs(
unsigned BlockIndex,
unsigned &LoadIndex);
111 void emitLoadCompareBlock(
unsigned BlockIndex);
112 void emitLoadCompareBlockMultipleLoads(
unsigned BlockIndex,
113 unsigned &LoadIndex);
114 void emitLoadCompareByteBlock(
unsigned BlockIndex,
unsigned OffsetBytes);
115 void emitMemCmpResultBlock();
116 Value *getMemCmpExpansionZeroCase();
117 Value *getMemCmpEqZeroOneBlock();
118 Value *getMemCmpOneBlock();
120 Value *Lhs =
nullptr;
121 Value *Rhs =
nullptr;
123 LoadPair getLoadPair(
Type *LoadSizeType,
Type *BSwapSizeType,
124 Type *CmpSizeType,
unsigned OffsetBytes);
126 static LoadEntryVector
128 unsigned MaxNumLoads,
unsigned &NumLoadsNonOneByte);
129 static LoadEntryVector
130 computeOverlappingLoadSequence(
uint64_t Size,
unsigned MaxLoadSize,
131 unsigned MaxNumLoads,
132 unsigned &NumLoadsNonOneByte);
134 static void optimiseLoadSequence(
135 LoadEntryVector &LoadSequence,
137 bool IsUsedForZeroCmp);
142 const bool IsUsedForZeroCmp,
const DataLayout &TheDataLayout,
145 unsigned getNumBlocks();
146 uint64_t getNumLoads()
const {
return LoadSequence.size(); }
148 Value *getMemCmpExpansion();
153 const unsigned MaxNumLoads,
unsigned &NumLoadsNonOneByte) {
154 NumLoadsNonOneByte = 0;
155 LoadEntryVector LoadSequence;
158 const unsigned LoadSize = LoadSizes.
front();
160 if (LoadSequence.size() + NumLoadsForThisSize > MaxNumLoads) {
167 if (NumLoadsForThisSize > 0) {
168 for (
uint64_t I = 0;
I < NumLoadsForThisSize; ++
I) {
169 LoadSequence.push_back({LoadSize,
Offset});
173 ++NumLoadsNonOneByte;
182MemCmpExpansion::computeOverlappingLoadSequence(
uint64_t Size,
183 const unsigned MaxLoadSize,
184 const unsigned MaxNumLoads,
185 unsigned &NumLoadsNonOneByte) {
187 if (
Size < 2 || MaxLoadSize < 2)
192 const uint64_t NumNonOverlappingLoads =
Size / MaxLoadSize;
193 assert(NumNonOverlappingLoads &&
"there must be at least one load");
196 Size =
Size - NumNonOverlappingLoads * MaxLoadSize;
203 if ((NumNonOverlappingLoads + 1) > MaxNumLoads)
207 LoadEntryVector LoadSequence;
209 for (
uint64_t I = 0;
I < NumNonOverlappingLoads; ++
I) {
210 LoadSequence.push_back({MaxLoadSize,
Offset});
216 LoadSequence.push_back({MaxLoadSize,
Offset - (MaxLoadSize -
Size)});
217 NumLoadsNonOneByte = 1;
221void MemCmpExpansion::optimiseLoadSequence(
222 LoadEntryVector &LoadSequence,
224 bool IsUsedForZeroCmp) {
229 if (IsUsedForZeroCmp ||
Options.AllowedTailExpansions.empty())
232 while (LoadSequence.size() >= 2) {
233 auto Last = LoadSequence[LoadSequence.size() - 1];
234 auto PreLast = LoadSequence[LoadSequence.size() - 2];
237 if (PreLast.Offset + PreLast.LoadSize !=
Last.Offset)
240 auto LoadSize =
Last.LoadSize + PreLast.LoadSize;
241 if (
find(
Options.AllowedTailExpansions, LoadSize) ==
242 Options.AllowedTailExpansions.end())
246 LoadSequence.pop_back();
247 LoadSequence.pop_back();
248 LoadSequence.emplace_back(PreLast.Offset, LoadSize);
260MemCmpExpansion::MemCmpExpansion(
263 const bool IsUsedForZeroCmp,
const DataLayout &TheDataLayout,
265 : CI(CI),
Size(
Size), NumLoadsPerBlockForZeroCmp(
Options.NumLoadsPerBlock),
266 IsUsedForZeroCmp(IsUsedForZeroCmp),
DL(TheDataLayout), DTU(DTU),
274 assert(!LoadSizes.
empty() &&
"cannot load Size bytes");
275 MaxLoadSize = LoadSizes.
front();
277 unsigned GreedyNumLoadsNonOneByte = 0;
278 LoadSequence = computeGreedyLoadSequence(
Size, LoadSizes,
Options.MaxNumLoads,
279 GreedyNumLoadsNonOneByte);
280 NumLoadsNonOneByte = GreedyNumLoadsNonOneByte;
281 assert(LoadSequence.size() <=
Options.MaxNumLoads &&
"broken invariant");
284 if (
Options.AllowOverlappingLoads &&
285 (LoadSequence.empty() || LoadSequence.size() > 2)) {
286 unsigned OverlappingNumLoadsNonOneByte = 0;
287 auto OverlappingLoads = computeOverlappingLoadSequence(
288 Size, MaxLoadSize,
Options.MaxNumLoads, OverlappingNumLoadsNonOneByte);
289 if (!OverlappingLoads.empty() &&
290 (LoadSequence.empty() ||
291 OverlappingLoads.size() < LoadSequence.size())) {
292 LoadSequence = OverlappingLoads;
293 NumLoadsNonOneByte = OverlappingNumLoadsNonOneByte;
296 assert(LoadSequence.size() <=
Options.MaxNumLoads &&
"broken invariant");
297 optimiseLoadSequence(LoadSequence,
Options, IsUsedForZeroCmp);
300unsigned MemCmpExpansion::getNumBlocks() {
301 if (IsUsedForZeroCmp)
302 return getNumLoads() / NumLoadsPerBlockForZeroCmp +
303 (getNumLoads() % NumLoadsPerBlockForZeroCmp != 0 ? 1 : 0);
304 return getNumLoads();
307void MemCmpExpansion::createLoadCmpBlocks() {
308 for (
unsigned i = 0; i < getNumBlocks(); i++) {
311 LoadCmpBlocks.push_back(BB);
315void MemCmpExpansion::createResultBlock() {
320MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair(
Type *LoadSizeType,
323 unsigned OffsetBytes) {
329 if (OffsetBytes > 0) {
338 Value *Lhs =
nullptr;
339 if (
auto *
C = dyn_cast<Constant>(LhsSource))
344 Value *Rhs =
nullptr;
345 if (
auto *
C = dyn_cast<Constant>(RhsSource))
351 if (BSwapSizeType && LoadSizeType != BSwapSizeType) {
359 CI->
getModule(), Intrinsic::bswap, BSwapSizeType);
365 if (CmpSizeType !=
nullptr && CmpSizeType != Lhs->
getType()) {
376void MemCmpExpansion::emitLoadCompareByteBlock(
unsigned BlockIndex,
377 unsigned OffsetBytes) {
380 const LoadPair Loads =
387 if (BlockIndex < (LoadCmpBlocks.size() - 1)) {
391 ConstantInt::get(Diff->
getType(), 0));
397 {{DominatorTree::Insert, BB, EndBlock},
398 {DominatorTree::Insert, BB, LoadCmpBlocks[BlockIndex + 1]}});
404 DTU->
applyUpdates({{DominatorTree::Insert, BB, EndBlock}});
411Value *MemCmpExpansion::getCompareLoadPairs(
unsigned BlockIndex,
412 unsigned &LoadIndex) {
413 assert(LoadIndex < getNumLoads() &&
414 "getCompareLoadPairs() called with no remaining loads");
415 std::vector<Value *> XorList, OrList;
416 Value *Diff =
nullptr;
418 const unsigned NumLoads =
419 std::min(getNumLoads() - LoadIndex, NumLoadsPerBlockForZeroCmp);
422 if (LoadCmpBlocks.empty())
432 NumLoads == 1 ? nullptr
435 for (
unsigned i = 0; i < NumLoads; ++i, ++LoadIndex) {
436 const LoadEntry &CurLoadEntry = LoadSequence[LoadIndex];
437 const LoadPair Loads = getLoadPair(
439 MaxLoadType, CurLoadEntry.Offset);
444 Diff = Builder.
CreateXor(Loads.Lhs, Loads.Rhs);
446 XorList.push_back(Diff);
453 auto pairWiseOr = [&](std::vector<Value *> &InList) -> std::vector<Value *> {
454 std::vector<Value *> OutList;
455 for (
unsigned i = 0; i < InList.size() - 1; i = i + 2) {
457 OutList.push_back(
Or);
459 if (InList.size() % 2 != 0)
460 OutList.push_back(InList.back());
466 OrList = pairWiseOr(XorList);
469 while (OrList.size() != 1) {
470 OrList = pairWiseOr(OrList);
473 assert(Diff &&
"Failed to find comparison diff");
480void MemCmpExpansion::emitLoadCompareBlockMultipleLoads(
unsigned BlockIndex,
481 unsigned &LoadIndex) {
482 Value *
Cmp = getCompareLoadPairs(BlockIndex, LoadIndex);
484 BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1))
486 : LoadCmpBlocks[BlockIndex + 1];
493 DTU->
applyUpdates({{DominatorTree::Insert, BB, ResBlock.BB},
494 {DominatorTree::Insert, BB, NextBB}});
499 if (BlockIndex == LoadCmpBlocks.size() - 1) {
501 PhiRes->
addIncoming(Zero, LoadCmpBlocks[BlockIndex]);
514void MemCmpExpansion::emitLoadCompareBlock(
unsigned BlockIndex) {
516 const LoadEntry &CurLoadEntry = LoadSequence[BlockIndex];
518 if (CurLoadEntry.LoadSize == 1) {
519 MemCmpExpansion::emitLoadCompareByteBlock(BlockIndex, CurLoadEntry.Offset);
525 Type *BSwapSizeType =
532 std::max(MaxLoadSize, (
unsigned)
PowerOf2Ceil(CurLoadEntry.LoadSize)) * 8);
533 assert(CurLoadEntry.LoadSize <= MaxLoadSize &&
"Unexpected load type");
537 const LoadPair Loads = getLoadPair(LoadSizeType, BSwapSizeType, MaxLoadType,
538 CurLoadEntry.Offset);
542 if (!IsUsedForZeroCmp) {
543 ResBlock.PhiSrc1->addIncoming(Loads.Lhs, LoadCmpBlocks[BlockIndex]);
544 ResBlock.PhiSrc2->addIncoming(Loads.Rhs, LoadCmpBlocks[BlockIndex]);
548 BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1))
550 : LoadCmpBlocks[BlockIndex + 1];
558 {DominatorTree::Insert, BB, ResBlock.BB}});
563 if (BlockIndex == LoadCmpBlocks.size() - 1) {
565 PhiRes->
addIncoming(Zero, LoadCmpBlocks[BlockIndex]);
572void MemCmpExpansion::emitMemCmpResultBlock() {
575 if (IsUsedForZeroCmp) {
583 DTU->
applyUpdates({{DominatorTree::Insert, ResBlock.BB, EndBlock}});
600 DTU->
applyUpdates({{DominatorTree::Insert, ResBlock.BB, EndBlock}});
603void MemCmpExpansion::setupResultBlockPHINodes() {
608 Builder.
CreatePHI(MaxLoadType, NumLoadsNonOneByte,
"phi.src1");
610 Builder.
CreatePHI(MaxLoadType, NumLoadsNonOneByte,
"phi.src2");
613void MemCmpExpansion::setupEndBlockPHINodes() {
618Value *MemCmpExpansion::getMemCmpExpansionZeroCase() {
619 unsigned LoadIndex = 0;
622 for (
unsigned I = 0;
I < getNumBlocks(); ++
I) {
623 emitLoadCompareBlockMultipleLoads(
I, LoadIndex);
626 emitMemCmpResultBlock();
633Value *MemCmpExpansion::getMemCmpEqZeroOneBlock() {
634 unsigned LoadIndex = 0;
635 Value *
Cmp = getCompareLoadPairs(0, LoadIndex);
636 assert(LoadIndex == getNumLoads() &&
"some entries were not consumed");
645Value *MemCmpExpansion::getMemCmpOneBlock() {
646 bool NeedsBSwap =
DL.isLittleEndian() &&
Size != 1;
648 Type *BSwapSizeType =
658 const LoadPair Loads = getLoadPair(LoadSizeType, BSwapSizeType,
660 return Builder.
CreateSub(Loads.Lhs, Loads.Rhs);
663 const LoadPair Loads = getLoadPair(LoadSizeType, BSwapSizeType, MaxLoadType,
670 auto *UI = cast<Instruction>(*CI->
user_begin());
673 bool NeedsZExt =
false;
681 Pred = ICmpInst::ICMP_SLT;
688 if (ICmpInst::isSigned(Pred)) {
690 Loads.Lhs, Loads.Rhs);
692 UI->replaceAllUsesWith(Result);
693 UI->eraseFromParent();
709 return Builder.
CreateSub(ZextUGT, ZextULT);
714Value *MemCmpExpansion::getMemCmpExpansion() {
716 if (getNumBlocks() != 1) {
718 EndBlock =
SplitBlock(StartBlock, CI, DTU,
nullptr,
719 nullptr,
"endblock");
720 setupEndBlockPHINodes();
727 if (!IsUsedForZeroCmp) setupResultBlockPHINodes();
730 createLoadCmpBlocks();
736 DTU->
applyUpdates({{DominatorTree::Insert, StartBlock, LoadCmpBlocks[0]},
737 {DominatorTree::Delete, StartBlock, EndBlock}});
742 if (IsUsedForZeroCmp)
743 return getNumBlocks() == 1 ? getMemCmpEqZeroOneBlock()
744 : getMemCmpExpansionZeroCase();
746 if (getNumBlocks() == 1)
747 return getMemCmpOneBlock();
749 for (
unsigned I = 0;
I < getNumBlocks(); ++
I) {
750 emitLoadCompareBlock(
I);
753 emitMemCmpResultBlock();
843 NumMemCmpNotConstant++;
853 const bool IsUsedForZeroCmp =
875 NumMemCmpGreaterThanMax++;
913 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
918 TPC->getTM<
TargetMachine>().getSubtargetImpl(
F)->getTargetLowering();
921 &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
F);
923 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
924 auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
926 &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
929 if (
auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
930 DT = &DTWP->getDomTree();
932 return !PA.areAllPreserved();
957 (Func == LibFunc_memcmp || Func == LibFunc_bcmp) &&
958 expandMemCmp(CI,
TTI, TL, &
DL, PSI, BFI, DTU, Func == LibFunc_bcmp)) {
969 std::optional<DomTreeUpdater> DTU;
971 DTU.emplace(DT, DomTreeUpdater::UpdateStrategy::Lazy);
974 bool MadeChanges =
false;
975 for (
auto BBIt =
F.begin(); BBIt !=
F.end();) {
976 if (runOnBlock(*BBIt, TLI,
TTI, TL,
DL, PSI, BFI, DTU ? &*DTU :
nullptr)) {
1003 .getCachedResult<ProfileSummaryAnalysis>(*
F.getParent());
1012char ExpandMemCmpLegacyPass::ID = 0;
1014 "Expand memcmp() to load/stores",
false,
false)
1024 return new ExpandMemCmpLegacyPass();
AMDGPU Mark last scratch load
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool runImpl(Function &F, const TargetLowering &TLI)
static cl::opt< unsigned > MaxLoadsPerMemcmpOptSize("max-loads-per-memcmp-opt-size", cl::Hidden, cl::desc("Set maximum number of loads used in expanded memcmp for -Os/Oz"))
static cl::opt< unsigned > MaxLoadsPerMemcmp("max-loads-per-memcmp", cl::Hidden, cl::desc("Set maximum number of loads used in expanded memcmp"))
static cl::opt< unsigned > MemCmpEqZeroNumLoadsPerBlock("memcmp-num-loads-per-block", cl::Hidden, cl::init(1), cl::desc("The number of loads per basic block for inline expansion of " "memcmp that is only being compared against zero."))
Merge contiguous icmps into a memcmp
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Target-Independent Code Generator Pass Configuration Options pass.
A container for analyses that lazily runs them and caches their results.
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
const T & front() const
front - Get the first element.
bool empty() const
empty - Check if the array is empty.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
Value * getArgOperand(unsigned i) const
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Predicate getUnsignedPredicate()
For example, SLT->ULT, SLE->ULE, SGT->UGT, SGE->UGE, ULT->Failed assert.
This is the shared class of boolean and integer constants.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static Constant * getAllOnesValue(Type *Ty)
A parsed version of the target data layout string in and methods for querying it.
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
void applyUpdates(ArrayRef< typename DomTreeT::UpdateType > Updates)
Submit updates to all available trees.
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateConstGEP1_64(Type *Ty, Value *Ptr, uint64_t Idx0, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
BasicBlock * GetInsertBlock() const
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateICmpUGT(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
const Function * getFunction() const
Return the function this instruction belongs to.
void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
Class to represent integer types.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
This is an alternative analysis pass to BlockFrequencyInfoWrapperPass.
static void getLazyBFIAnalysisUsage(AnalysisUsage &AU)
Helper for client passes to set up the analysis usage on behalf of this pass.
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserve()
Mark an analysis as preserved.
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool hasProfileSummary() const
Returns true if profile summary is available.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
virtual const TargetLowering * getTargetLowering() const
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
static IntegerType * getInt8Ty(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUser() const
Return true if there is exactly one user of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVMContext & getContext() const
All values hold a context through their type.
const ParentTy * getParent() const
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
initializer< Ty > init(const Ty &Val)
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
bool isOnlyUsedInZeroEqualityComparison(const Instruction *CxtI)
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Scan the specified basic block and try to simplify any instructions in it and recursively delete dead...
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
FunctionPass * createExpandMemCmpLegacyPass()
@ Or
Bitwise or logical OR of integers.
BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Constant * ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty, APInt Offset, const DataLayout &DL)
Return the value that a load from C with offset Offset would produce if it is constant and determinab...
void initializeExpandMemCmpLegacyPassPass(PassRegistry &)
This struct is a compact representation of a valid (non-zero power of two) alignment.