37#define INSTR_PROF_VALUE_PROF_MEMOP_API
52#define DEBUG_TYPE "pgo-memop-opt"
54STATISTIC(NumOfPGOMemOPOpt,
"Number of memop intrinsics optimized.");
55STATISTIC(NumOfPGOMemOPAnnotate,
"Number of memop intrinsics annotated.");
60 cl::desc(
"The minimum count to optimize memory "
72 cl::desc(
"The percentage threshold for the "
73 "memory intrinsic calls optimization"));
78 cl::desc(
"The max version for the optimized memory "
84 cl::desc(
"Scale the memop size counts using the basic "
85 " block count value"));
90 cl::desc(
"Size-specialize memcmp and bcmp calls"));
94 cl::desc(
"Optimize the memop size <= this value"));
99 switch (
MI->getIntrinsicID()) {
100 case Intrinsic::memcpy:
102 case Intrinsic::memmove:
104 case Intrinsic::memset:
114 MemOp(MemIntrinsic *
MI) : I(
MI) {}
115 MemOp(CallInst *CI) : I(CI) {}
119 if (
auto MI = asMI())
124 if (
auto MI = asMI())
125 return MI->getLength();
126 return asCI()->getArgOperand(2);
129 if (
auto MI = asMI())
131 asCI()->setArgOperand(2,
Length);
133 StringRef getFuncName() {
134 if (
auto MI = asMI())
135 return MI->getCalledFunction()->getName();
136 return asCI()->getCalledFunction()->getName();
139 if (
auto MI = asMI())
140 if (
MI->getIntrinsicID() == Intrinsic::memmove)
144 bool isMemcmp(TargetLibraryInfo &TLI) {
146 if (asMI() ==
nullptr && TLI.
getLibFunc(*asCI(), Func) &&
147 Func == LibFunc_memcmp) {
152 bool isBcmp(TargetLibraryInfo &TLI) {
154 if (asMI() ==
nullptr && TLI.
getLibFunc(*asCI(), Func) &&
155 Func == LibFunc_bcmp) {
160 const char *
getName(TargetLibraryInfo &TLI) {
161 if (
auto MI = asMI())
162 return getMIName(
MI);
165 if (Func == LibFunc_memcmp)
167 if (Func == LibFunc_bcmp)
175class MemOPSizeOpt :
public InstVisitor<MemOPSizeOpt> {
177 MemOPSizeOpt(Function &Func, BlockFrequencyInfo &BFI,
178 OptimizationRemarkEmitter &ORE, DominatorTree *DT,
179 TargetLibraryInfo &TLI)
180 : Func(Func), BFI(BFI), ORE(ORE), DT(DT), TLI(TLI), Changed(
false) {}
181 bool isChanged()
const {
return Changed; }
186 for (
auto &MO : WorkList) {
187 ++NumOfPGOMemOPAnnotate;
192 <<
"is Transformed.\n");
197 void visitMemIntrinsic(MemIntrinsic &
MI) {
202 WorkList.push_back(MemOp(&
MI));
205 void visitCallInst(CallInst &CI) {
207 if (TLI.getLibFunc(CI, Func) &&
208 (Func == LibFunc_memcmp || Func == LibFunc_bcmp) &&
210 WorkList.push_back(MemOp(&CI));
216 BlockFrequencyInfo &BFI;
217 OptimizationRemarkEmitter &ORE;
219 TargetLibraryInfo &TLI;
221 std::vector<MemOp> WorkList;
222 bool perform(MemOp MO);
240 return ScaleCount / Denom;
243bool MemOPSizeOpt::perform(
MemOp MO) {
250 uint32_t MaxNumVals = INSTR_PROF_NUM_BUCKETS;
257 uint64_t ActualCount = TotalCount;
258 uint64_t SavedTotalCount = TotalCount;
263 ActualCount = *BBEdgeCount;
266 LLVM_DEBUG(
dbgs() <<
"Read one memory intrinsic profile with count "
267 << ActualCount <<
"\n");
270 : VDs) {
dbgs() <<
" (" << VD.Value <<
"," << VD.Count <<
")\n"; });
279 TotalCount = ActualCount;
282 <<
" denominator = " << SavedTotalCount <<
"\n");
285 uint64_t RemainCount = TotalCount;
286 uint64_t SavedRemainCount = SavedTotalCount;
287 SmallVector<uint64_t, 16> SizeIds;
288 SmallVector<uint64_t, 16> CaseCounts;
289 SmallDenseSet<uint64_t, 16> SeenSizeId;
290 uint64_t MaxCount = 0;
295 for (
auto I = VDs.begin(),
E = VDs.end();
I !=
E; ++
I) {
297 int64_t
V = VD.Value;
298 uint64_t
C = VD.Count;
300 C = getScaledCount(
C, ActualCount, SavedTotalCount);
314 if (!SeenSizeId.
insert(V).second) {
315 errs() <<
"warning: Invalid Profile Data in Function " <<
Func.getName()
316 <<
": Two identical values in MemOp value counts.\n";
327 assert(SavedRemainCount >= VD.Count);
328 SavedRemainCount -= VD.Count;
339 CaseCounts[0] = RemainCount;
340 if (RemainCount > MaxCount)
341 MaxCount = RemainCount;
343 uint64_t SumForOpt = TotalCount - RemainCount;
346 <<
" Versions (covering " << SumForOpt <<
" out of "
347 << TotalCount <<
")\n");
375 MergeBB->
setName(
"MemOP.Merge");
377 DefaultBB->
setName(
"MemOP.Default");
379 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
380 auto &Ctx =
Func.getContext();
383 Value *SizeVar = MO.getLength();
384 SwitchInst *
SI = IRB.CreateSwitch(SizeVar, DefaultBB, SizeIds.
size());
385 Type *MemOpTy = MO.I->getType();
386 PHINode *
PHI =
nullptr;
390 PHI = IRBM.CreatePHI(MemOpTy, SizeIds.
size() + 1,
"MemOP.RVMerge");
391 MO.I->replaceAllUsesWith(
PHI);
392 PHI->addIncoming(MO.I, DefaultBB);
396 MO.I->setMetadata(LLVMContext::MD_prof,
nullptr);
398 if (SavedRemainCount > 0 ||
Version != VDs.size()) {
401 IPVK_MemOPSize, VDs.
size());
406 std::vector<DominatorTree::UpdateType> Updates;
408 Updates.reserve(2 * SizeIds.
size());
410 for (uint64_t SizeId : SizeIds) {
412 Ctx, Twine(
"MemOP.Case.") + Twine(SizeId), &Func, DefaultBB);
413 MemOp NewMO = MO.clone();
416 assert(SizeType &&
"Expected integer type size argument.");
417 ConstantInt *CaseSizeId = ConstantInt::get(SizeType, SizeId);
418 NewMO.setLength(CaseSizeId);
419 NewMO.I->insertInto(CaseBB, CaseBB->
end());
421 IRBCase.CreateBr(MergeBB);
422 SI->addCase(CaseSizeId, CaseBB);
424 PHI->addIncoming(NewMO.I, CaseBB);
426 Updates.push_back({DominatorTree::Insert, CaseBB, MergeBB});
427 Updates.push_back({DominatorTree::Insert, BB, CaseBB});
431 DTU.applyUpdates(Updates);
443 return OptimizationRemark(
DEBUG_TYPE,
"memopt-opt", MO.I)
444 <<
"optimized " <<
NV(
"Memop", MO.getName(TLI)) <<
" with count "
445 <<
NV(
"Count", SumForOpt) <<
" out of " <<
NV(
"Total", TotalCount)
446 <<
" for " <<
NV(
"Versions",
Version) <<
" versions";
461 MemOPSizeOpt MemOPSizeOpt(
F, BFI, ORE, DT, TLI);
462 MemOPSizeOpt.perform();
463 return MemOPSizeOpt.isChanged();
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Function Alias Analysis false
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This header defines various interfaces for pass management in LLVM.
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
static cl::opt< unsigned > MemOpMaxOptSize("memop-value-prof-max-opt-size", cl::Hidden, cl::init(128), cl::desc("Optimize the memop size <= this value"))
static cl::opt< unsigned > MemOPPercentThreshold("pgo-memop-percent-threshold", cl::init(40), cl::Hidden, cl::desc("The percentage threshold for the " "memory intrinsic calls optimization"))
static cl::opt< bool > DisableMemOPOPT("disable-memop-opt", cl::init(false), cl::Hidden, cl::desc("Disable optimize"))
static cl::opt< unsigned > MemOPMaxVersion("pgo-memop-max-version", cl::init(3), cl::Hidden, cl::desc("The max version for the optimized memory " " intrinsic calls"))
static bool PGOMemOPSizeOptImpl(Function &F, BlockFrequencyInfo &BFI, OptimizationRemarkEmitter &ORE, DominatorTree *DT, TargetLibraryInfo &TLI)
static cl::opt< bool > MemOPScaleCount("pgo-memop-scale-count", cl::init(true), cl::Hidden, cl::desc("Scale the memop size counts using the basic " " block count value"))
cl::opt< bool > MemOPOptMemcmpBcmp("pgo-memop-optimize-memcmp-bcmp", cl::init(true), cl::Hidden, cl::desc("Size-specialize memcmp and bcmp calls"))
static cl::opt< unsigned > MemOPCountThreshold("pgo-memop-count-threshold", cl::Hidden, cl::init(1000), cl::desc("The minimum count to optimize memory " "intrinsic calls"))
FunctionAnalysisManager FAM
static StringRef getName(Value *V)
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
static bool isProfitable(const StableFunctionMap::StableFunctionEntries &SFS)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
InstListType::iterator iterator
Instruction iterators...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
LLVM_ABI void setBlockFreq(const BasicBlock *BB, BlockFrequency Freq)
LLVM_ABI std::optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
LLVM_ABI BlockFrequency getBlockFreq(const BasicBlock *BB) const
getblockFreq - Return block frequency.
Value * getArgOperand(unsigned i) const
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Base class for instruction visitors.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
This is the common base class for memset/memcpy/memmove.
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &MAM)
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
bool isVoidTy() const
Return true if this is 'void'.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
std::pair< iterator, bool > insert(const ValueT &V)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
initializer< Ty > init(const Ty &Val)
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< FuncNode * > Func
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
LLVM_ABI void setProfMetadata(Instruction *TI, ArrayRef< uint64_t > EdgeCounts, uint64_t MaxCount)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Version
LLVM_ABI void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionAddr VTableAddr Count
LLVM_ABI SmallVector< InstrProfValueData, 4 > getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst and returns them if Inst is annotated with value profile dat...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
std::enable_if_t< std::is_unsigned_v< T >, T > SaturatingMultiply(T X, T Y, bool *ResultOverflowed=nullptr)
Multiply two unsigned integers, X and Y, of type T.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.