42 #define INSTR_PROF_VALUE_PROF_MEMOP_API 58 #define DEBUG_TYPE "pgo-memop-opt" 60 STATISTIC(NumOfPGOMemOPOpt,
"Number of memop intrinsics optimized.");
61 STATISTIC(NumOfPGOMemOPAnnotate,
"Number of memop intrinsics annotated.");
67 cl::desc(
"The minimum count to optimize memory " 79 cl::desc(
"The percentage threshold for the " 80 "memory intrinsic calls optimization"));
86 cl::desc(
"The max version for the optimized memory " 92 cl::desc(
"Scale the memop size counts using the basic " 93 " block count value"));
98 cl::desc(
"Size-specialize memcmp and bcmp calls"));
102 cl::desc(
"Optimize the memop size <= this value"));
113 StringRef getPassName()
const override {
return "PGOMemOPSize"; }
129 "Optimize memory intrinsic using its size value profile",
134 "Optimize memory intrinsic using its
size value
profile",
138 return new PGOMemOPSizeOptLegacyPass();
144 switch (
MI->getIntrinsicID()) {
145 case Intrinsic::memcpy:
147 case Intrinsic::memmove:
149 case Intrinsic::memset:
162 CallInst *asCI() {
return cast<CallInst>(
I); }
164 if (
auto MI = asMI())
165 return MemOp(cast<MemIntrinsic>(
MI->clone()));
166 return MemOp(cast<CallInst>(asCI()->clone()));
169 if (
auto MI = asMI())
170 return MI->getLength();
171 return asCI()->getArgOperand(2);
173 void setLength(
Value *Length) {
174 if (
auto MI = asMI())
175 return MI->setLength(Length);
176 asCI()->setArgOperand(2, Length);
179 if (
auto MI = asMI())
180 return MI->getCalledFunction()->getName();
181 return asCI()->getCalledFunction()->getName();
184 if (
auto MI = asMI())
185 if (
MI->getIntrinsicID() == Intrinsic::memmove)
191 if (asMI() ==
nullptr && TLI.
getLibFunc(*asCI(), Func) &&
192 Func == LibFunc_memcmp) {
199 if (asMI() ==
nullptr && TLI.
getLibFunc(*asCI(), Func) &&
200 Func == LibFunc_bcmp) {
206 if (
auto MI = asMI())
207 return getMIName(
MI);
210 if (Func == LibFunc_memcmp)
212 if (Func == LibFunc_bcmp)
220 class MemOPSizeOpt :
public InstVisitor<MemOPSizeOpt> {
229 bool isChanged()
const {
return Changed; }
234 for (
auto &MO : WorkList) {
235 ++NumOfPGOMemOPAnnotate;
240 <<
"is Transformed.\n");
246 Value *Length =
MI.getLength();
248 if (dyn_cast<ConstantInt>(Length))
250 WorkList.push_back(
MemOp(&
MI));
256 (
Func == LibFunc_memcmp ||
Func == LibFunc_bcmp) &&
258 WorkList.push_back(
MemOp(&CI));
269 std::vector<MemOp> WorkList;
271 std::unique_ptr<InstrProfValueData[]> ValueDataArray;
272 bool perform(
MemOp MO);
275 static bool isProfitable(uint64_t Count, uint64_t TotalCount) {
276 assert(Count <= TotalCount);
284 static inline uint64_t getScaledCount(uint64_t Count, uint64_t Num,
290 return ScaleCount / Denom;
293 bool MemOPSizeOpt::perform(
MemOp MO) {
303 ValueDataArray.get(), NumVals, TotalCount))
306 uint64_t ActualCount = TotalCount;
307 uint64_t SavedTotalCount = TotalCount;
309 auto BBEdgeCount =
BFI.getBlockProfileCount(MO.I->getParent());
312 ActualCount = *BBEdgeCount;
316 LLVM_DEBUG(
dbgs() <<
"Read one memory intrinsic profile with count " 317 << ActualCount <<
"\n");
320 : VDs) {
dbgs() <<
" (" << VD.Value <<
"," << VD.Count <<
")\n"; });
329 TotalCount = ActualCount;
332 <<
" denominator = " << SavedTotalCount <<
"\n");
335 uint64_t RemainCount = TotalCount;
336 uint64_t SavedRemainCount = SavedTotalCount;
339 uint64_t MaxCount = 0;
343 for (
auto &VD : VDs) {
344 int64_t V = VD.Value;
345 uint64_t
C = VD.Count;
347 C = getScaledCount(
C, ActualCount, SavedTotalCount);
354 if (!isProfitable(
C, RemainCount))
364 assert(SavedRemainCount >= VD.Count);
365 SavedRemainCount -= VD.Count;
374 CaseCounts[0] = RemainCount;
375 if (RemainCount > MaxCount)
376 MaxCount = RemainCount;
378 uint64_t SumForOpt = TotalCount - RemainCount;
381 <<
" Versions (covering " << SumForOpt <<
" out of " 382 << TotalCount <<
")\n");
403 auto OrigBBFreq =
BFI.getBlockFreq(BB);
410 MergeBB->setName(
"MemOP.Merge");
411 BFI.setBlockFreq(MergeBB, OrigBBFreq.getFrequency());
412 DefaultBB->
setName(
"MemOP.Default");
415 auto &Ctx =
Func.getContext();
418 Value *SizeVar = MO.getLength();
420 Type *MemOpTy = MO.I->getType();
425 PHI = IRBM.CreatePHI(MemOpTy, SizeIds.
size() + 1,
"MemOP.RVMerge");
426 MO.I->replaceAllUsesWith(PHI);
427 PHI->addIncoming(MO.I, DefaultBB);
431 MO.I->setMetadata(LLVMContext::MD_prof,
nullptr);
433 if (SavedRemainCount > 0 ||
Version != NumVals)
436 SavedRemainCount, IPVK_MemOPSize, NumVals);
440 std::vector<DominatorTree::UpdateType> Updates;
442 Updates.reserve(2 * SizeIds.
size());
444 for (uint64_t SizeId : SizeIds) {
446 Ctx,
Twine(
"MemOP.Case.") +
Twine(SizeId), &Func, DefaultBB);
447 MemOp NewMO = MO.clone();
449 auto *SizeType = dyn_cast<IntegerType>(NewMO.getLength()->getType());
450 assert(SizeType &&
"Expected integer type size argument.");
452 NewMO.setLength(CaseSizeId);
455 IRBCase.CreateBr(MergeBB);
456 SI->addCase(CaseSizeId, CaseBB);
458 PHI->addIncoming(NewMO.I, CaseBB);
465 DTU.applyUpdates(Updates);
477 <<
"optimized " <<
NV(
"Memop", MO.getName(TLI)) <<
" with count " 478 <<
NV(
"Count", SumForOpt) <<
" out of " <<
NV(
"Total", TotalCount)
479 <<
" for " <<
NV(
"Versions",
Version) <<
" versions";
492 if (
F.hasFnAttribute(Attribute::OptimizeForSize))
494 MemOPSizeOpt MemOPSizeOpt(
F,
BFI, ORE, DT, TLI);
495 MemOPSizeOpt.perform();
496 return MemOPSizeOpt.isChanged();
501 getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
502 auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
503 auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
506 getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
F);
Legacy wrapper pass to provide the GlobalsAAResult object.
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void setProfMetadata(Module *M, Instruction *TI, ArrayRef< uint64_t > EdgeCounts, uint64_t MaxCount)
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Base class for instruction visitors.
DiagnosticInfoOptimizationBase::Argument NV
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
This class represents lattice values for constants.
This is the interface for a simple mod/ref and alias analysis over globals.
FunctionPass * createPGOMemOPSizeOptLegacyPass()
void push_back(const T &Elt)
This class represents a function call, abstracting a target machine's calling convention.
std::enable_if_t< std::is_unsigned< T >::value, T > SaturatingMultiply(T X, T Y, bool *ResultOverflowed=nullptr)
Multiply two unsigned integers, X and Y, of type T.
STATISTIC(NumFunctions, "Total number of functions")
Analysis pass which computes a DominatorTree.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
static cl::opt< unsigned > MemOPPercentThreshold("pgo-memop-percent-threshold", cl::init(40), cl::Hidden, cl::ZeroOrMore, cl::desc("The percentage threshold for the " "memory intrinsic calls optimization"))
INITIALIZE_PASS_BEGIN(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt", "Optimize memory intrinsic using its size value profile", false, false) INITIALIZE_PASS_END(PGOMemOPSizeOptLegacyPass
Value * getArgOperand(unsigned i) const
AnalysisUsage & addRequired()
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
static StringRef getName(Value *V)
Legacy analysis pass which computes BlockFrequencyInfo.
void setName(const Twine &Name)
Change the name of the value.
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool isVoidTy() const
Return true if this is 'void'.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
static bool runOnFunction(Function &F, bool PostInlining)
initializer< Ty > init(const Ty &Val)
bool getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, InstrProfValueData ValueData[], uint32_t &ActualNumValueData, uint64_t &TotalC)
Extract the value profile data from Inst which is annotated with value profile meta data.
A set of analyses that are preserved following a run of a transformation pass.
static constexpr UpdateKind Insert
LLVM Basic Block Representation.
The instances of the Type class are immutable: once they are created, they are never changed.
Represent the analysis usage information of a pass.
Analysis pass providing a never-invalidated alias analysis result.
FunctionPass class - This class is used to implement most global optimizations.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
cl::opt< bool > MemOPOptMemcmpBcmp("pgo-memop-optimize-memcmp-bcmp", cl::init(true), cl::Hidden, cl::desc("Size-specialize memcmp and bcmp calls"))
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
static cl::opt< bool > MemOPScaleCount("pgo-memop-scale-count", cl::init(true), cl::Hidden, cl::desc("Scale the memop size counts using the basic " " block count value"))
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
const InstListType & getInstList() const
Return the underlying instruction list container.
Analysis pass which computes BlockFrequencyInfo.
This is the common base class for memset/memcpy/memmove.
Iterator for intrusive lists based on ilist_node.
This is the shared class of boolean and integer constants.
static cl::opt< unsigned > MemOPMaxVersion("pgo-memop-max-version", cl::init(3), cl::Hidden, cl::ZeroOrMore, cl::desc("The max version for the optimized memory " " intrinsic calls"))
Provides information about what library functions are available for the current target.
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
static cl::opt< bool > DisableMemOPOPT("disable-memop-opt", cl::init(false), cl::Hidden, cl::desc("Disable optimize"))
void push_back(pointer val)
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
pgo instr Read PGO instrumentation profile
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
static cl::opt< unsigned > MemOPCountThreshold("pgo-memop-count-threshold", cl::Hidden, cl::ZeroOrMore, cl::init(1000), cl::desc("The minimum count to optimize memory " "intrinsic calls"))
void initializePGOMemOPSizeOptLegacyPassPass(PassRegistry &)
Analysis pass providing the TargetLibraryInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM Value Representation.
static bool PGOMemOPSizeOptImpl(Function &F, BlockFrequencyInfo &BFI, OptimizationRemarkEmitter &ORE, DominatorTree *DT, TargetLibraryInfo &TLI)
StringRef - Represent a constant reference to a string, i.e.
A container for analyses that lazily runs them and caches their results.
Legacy analysis pass which computes a DominatorTree.
This header defines various interfaces for pass management in LLVM.
static cl::opt< unsigned > MemOpMaxOptSize("memop-value-prof-max-opt-size", cl::Hidden, cl::init(128), cl::desc("Optimize the memop size <= this value"))
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)