60#define DEBUG_TYPE "hotcoldsplit"
62STATISTIC(NumColdRegionsFound,
"Number of cold regions found.");
63STATISTIC(NumColdRegionsOutlined,
"Number of cold regions outlined.");
72 cl::desc(
"Base penalty for splitting cold code (as a "
73 "multiple of TCC_Basic)"));
77 cl::desc(
"Enable placement of extracted cold functions"
78 " into a separate section after hot-cold splitting."));
83 cl::desc(
"Name for the section containing cold functions "
84 "extracted by hot-cold splitting."));
88 cl::desc(
"Maximum number of parameters for a split function"));
92 cl::desc(
"Divisor of cold branch probability."
93 "BranchProbability = 1/ColdBranchProbDenom"));
108 return !(isa<ReturnInst>(
I) || isa<IndirectBrInst>(
I));
123 auto SumWt = TrueWt + FalseWt;
130 if (TrueProb <= ColdProbThresh)
133 if (FalseProb <= ColdProbThresh)
145 if (
auto *CB = dyn_cast<CallBase>(&
I))
146 if (CB->hasFnAttr(Attribute::Cold) &&
147 !CB->getMetadata(LLVMContext::MD_nosanitize))
154 dyn_cast_or_null<CallInst>(BB.getTerminator()->getPrevNode()))
155 if (CI->hasFnAttr(Attribute::NoReturn))
164static bool mayExtractBlock(
const BasicBlock &BB) {
175 if (isa<InvokeInst>(Term) || isa<ResumeInst>(Term))
184 BB, [](
const Instruction &
I) {
return I.getType()->isTokenTy(); })) {
195static bool markFunctionCold(
Function &
F,
bool UpdateEntryCount =
false) {
196 assert(!
F.hasOptNone() &&
"Can't mark this cold");
197 bool Changed =
false;
198 if (!
F.hasFnAttribute(Attribute::Cold)) {
199 F.addFnAttr(Attribute::Cold);
202 if (!
F.hasFnAttribute(Attribute::MinSize)) {
203 F.addFnAttr(Attribute::MinSize);
206 if (UpdateEntryCount) {
219bool HotColdSplitting::isFunctionCold(
const Function &
F)
const {
220 if (
F.hasFnAttribute(Attribute::Cold))
232bool HotColdSplitting::isBasicBlockCold(
241 analyzeProfMetadata(BB, ColdProbThresh, AnnotatedColdBlocks);
245 if (AnnotatedColdBlocks.
count(BB))
257bool HotColdSplitting::shouldOutlineFrom(
const Function &
F)
const {
258 if (
F.hasFnAttribute(Attribute::AlwaysInline))
261 if (
F.hasFnAttribute(Attribute::NoInline))
266 if (
F.hasFnAttribute(Attribute::NoReturn))
269 if (
F.hasFnAttribute(Attribute::SanitizeAddress) ||
270 F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
271 F.hasFnAttribute(Attribute::SanitizeThread) ||
272 F.hasFnAttribute(Attribute::SanitizeMemory))
276 if (
F.hasPersonalityFn())
300 unsigned NumInputs,
unsigned NumOutputs) {
302 LLVM_DEBUG(
dbgs() <<
"Applying penalty for splitting: " << Penalty <<
"\n");
311 bool NoBlocksReturn =
true;
323 NoBlocksReturn =
false;
324 SuccsOutsideRegion.
insert(SuccBB);
334 unsigned NumSplitExitPhis = 0;
335 for (
BasicBlock *ExitBB : SuccsOutsideRegion) {
336 for (
PHINode &PN : ExitBB->phis()) {
338 int NumIncomingVals = 0;
339 for (
unsigned i = 0; i < PN.getNumIncomingValues(); ++i)
342 if (NumIncomingVals > 1) {
352 int NumOutputsAndSplitPhis = NumOutputs + NumSplitExitPhis;
353 int NumParams = NumInputs + NumOutputsAndSplitPhis;
355 LLVM_DEBUG(
dbgs() << NumInputs <<
" inputs and " << NumOutputsAndSplitPhis
356 <<
" outputs exceeds parameter limit ("
358 return std::numeric_limits<int>::max();
361 LLVM_DEBUG(
dbgs() <<
"Applying penalty for: " << NumParams <<
" params\n");
362 Penalty += CostForArgMaterialization * NumParams;
366 LLVM_DEBUG(
dbgs() <<
"Applying penalty for: " << NumOutputsAndSplitPhis
367 <<
" outputs/split phis\n");
369 Penalty += CostForRegionOutput * NumOutputsAndSplitPhis;
372 if (NoBlocksReturn) {
374 <<
" non-returning terminators\n");
380 if (SuccsOutsideRegion.
size() > 1) {
382 <<
" non-region successors\n");
390bool HotColdSplitting::isSplittingBeneficial(
CodeExtractor &CE,
398 CE.findInputsOutputs(Inputs, Outputs, Sinks);
400 int OutliningPenalty =
402 LLVM_DEBUG(
dbgs() <<
"Split profitability: benefit = " << OutliningBenefit
403 <<
", penalty = " << OutliningPenalty <<
"\n");
404 if (!OutliningBenefit.
isValid() || OutliningBenefit <= OutliningPenalty)
412Function *HotColdSplitting::extractColdRegion(
417 if (
Function *OutF =
CE.extractCodeRegion(CEAC)) {
418 User *
U = *OutF->user_begin();
420 NumColdRegionsOutlined++;
434 markFunctionCold(*OutF, BFI !=
nullptr);
439 &*EntryPoint.
begin())
440 <<
ore::NV(
"Original", OrigF) <<
" split cold code into "
448 &*EntryPoint.
begin())
449 <<
"Failed to extract region at block "
450 <<
ore::NV(
"Block", &EntryPoint);
456using BlockTy = std::pair<BasicBlock *, unsigned>;
463class OutliningRegion {
475 bool EntireFunctionCold =
false;
478 static unsigned getEntryPointScore(
BasicBlock &BB,
unsigned Score) {
479 return mayExtractBlock(BB) ? Score : 0;
484 static constexpr unsigned ScoreForSuccBlock = 1;
485 static constexpr unsigned ScoreForSinkBlock = 1;
487 OutliningRegion(
const OutliningRegion &) =
delete;
488 OutliningRegion &operator=(
const OutliningRegion &) =
delete;
491 OutliningRegion() =
default;
492 OutliningRegion(OutliningRegion &&) =
default;
493 OutliningRegion &operator=(OutliningRegion &&) =
default;
495 static std::vector<OutliningRegion> create(
BasicBlock &SinkBB,
498 std::vector<OutliningRegion> Regions;
501 Regions.emplace_back();
502 OutliningRegion *ColdRegion = &Regions.back();
504 auto addBlockToRegion = [&](
BasicBlock *BB,
unsigned Score) {
506 ColdRegion->Blocks.emplace_back(BB, Score);
510 unsigned SinkScore = getEntryPointScore(SinkBB, ScoreForSinkBlock);
511 ColdRegion->SuggestedEntryPoint = (SinkScore > 0) ? &SinkBB :
nullptr;
512 unsigned BestScore = SinkScore;
516 auto PredEnd =
idf_end(&SinkBB);
517 while (PredIt != PredEnd) {
519 bool SinkPostDom = PDT.
dominates(&SinkBB, &PredBB);
524 ColdRegion->EntireFunctionCold =
true;
530 if (!SinkPostDom || !mayExtractBlock(PredBB)) {
531 PredIt.skipChildren();
538 unsigned PredScore = getEntryPointScore(PredBB, PredIt.getPathLength());
539 if (PredScore > BestScore) {
540 ColdRegion->SuggestedEntryPoint = &PredBB;
541 BestScore = PredScore;
544 addBlockToRegion(&PredBB, PredScore);
554 if (mayExtractBlock(SinkBB)) {
555 addBlockToRegion(&SinkBB, SinkScore);
557 ColdRegion->EntireFunctionCold =
true;
561 Regions.emplace_back();
562 ColdRegion = &Regions.back();
568 auto SuccEnd =
df_end(&SinkBB);
569 while (SuccIt != SuccEnd) {
571 bool SinkDom = DT.
dominates(&SinkBB, &SuccBB);
574 bool DuplicateBlock = RegionBlocks.
count(&SuccBB);
578 if (DuplicateBlock || !SinkDom || !mayExtractBlock(SuccBB)) {
579 SuccIt.skipChildren();
583 unsigned SuccScore = getEntryPointScore(SuccBB, ScoreForSuccBlock);
584 if (SuccScore > BestScore) {
585 ColdRegion->SuggestedEntryPoint = &SuccBB;
586 BestScore = SuccScore;
589 addBlockToRegion(&SuccBB, SuccScore);
597 bool empty()
const {
return !SuggestedEntryPoint; }
603 bool isEntireFunctionCold()
const {
return EntireFunctionCold; }
607 assert(!empty() && !isEntireFunctionCold() &&
"Nothing to extract");
614 unsigned NextScore = 0;
615 auto RegionEndIt =
Blocks.end();
618 unsigned Score =
Block.second;
620 BB == SuggestedEntryPoint || DT.
dominates(SuggestedEntryPoint, BB);
621 if (!InSubRegion && Score > NextScore) {
625 if (InSubRegion && BB != SuggestedEntryPoint)
629 Blocks.erase(RegionStartIt, RegionEndIt);
632 SuggestedEntryPoint = NextEntryPoint;
639bool HotColdSplitting::outlineColdRegions(
Function &
F,
bool HasProfileSummary) {
659 std::unique_ptr<DominatorTree> DT;
660 std::unique_ptr<PostDominatorTree> PDT;
666 if (HasProfileSummary)
677 unsigned OutlinedFunctionID = 1;
681 if (ColdBlocks.
count(BB))
685 if (CannotBeOutlinedColdBlocks.
count(BB))
688 if (!isBasicBlockCold(BB, ColdProbThresh, AnnotatedColdBlocks, BFI))
692 dbgs() <<
"Found a cold block:\n";
697 DT = std::make_unique<DominatorTree>(
F);
699 PDT = std::make_unique<PostDominatorTree>(
F);
701 auto Regions = OutliningRegion::create(*BB, *DT, *PDT);
702 for (OutliningRegion &
Region : Regions) {
706 if (
Region.isEntireFunctionCold()) {
708 return markFunctionCold(
F);
714 dbgs() <<
"Hot/cold splitting attempting to outline these blocks:\n";
721 SubRegion, &*DT,
false,
nullptr,
724 "cold." + std::to_string(OutlinedFunctionID));
726 if (
CE.isEligible() && isSplittingBeneficial(CE, SubRegion,
TTI) &&
736 ColdBlocks.
insert(SubRegion.begin(), SubRegion.end());
739 for (
auto *
Block : SubRegion)
740 dbgs() <<
" contains cold block:" <<
Block->getName() <<
"\n";
744 std::make_pair(SubRegion[0], std::move(CE)));
745 ++OutlinedFunctionID;
748 for (
auto *
Block : SubRegion)
754 }
while (!
Region.empty());
756 ++NumColdRegionsFound;
760 if (OutliningWorklist.
empty())
766 for (
auto &BCE : OutliningWorklist) {
768 extractColdRegion(*BCE.first, BCE.second, CEAC, BFI,
TTI, ORE);
769 assert(Outlined &&
"Should be outlined");
777 bool Changed =
false;
778 bool HasProfileSummary = (M.getProfileSummary(
false) !=
nullptr);
781 if (
F.isDeclaration())
789 if (isFunctionCold(
F)) {
790 Changed |= markFunctionCold(
F);
794 if (!shouldOutlineFrom(
F)) {
800 Changed |= outlineColdRegions(
F, HasProfileSummary);
822 std::unique_ptr<OptimizationRemarkEmitter> ORE;
bbsections Prepares for basic block by splitting functions into clusters of basic blocks
static bool blockEndsInUnreachable(const MachineBasicBlock *MBB)
A no successor, non-return block probably ends in unreachable and is cold.
DenseMap< Block *, BlockRelaxAux > Blocks
std::pair< BasicBlock *, unsigned > BlockTy
A pair of (basic block, score).
static cl::opt< int > SplittingThreshold("hotcoldsplit-threshold", cl::init(2), cl::Hidden, cl::desc("Base penalty for splitting cold code (as a " "multiple of TCC_Basic)"))
static cl::opt< std::string > ColdSectionName("hotcoldsplit-cold-section-name", cl::init("__llvm_cold"), cl::Hidden, cl::desc("Name for the section containing cold functions " "extracted by hot-cold splitting."))
static cl::opt< int > ColdBranchProbDenom("hotcoldsplit-cold-probability-denom", cl::init(100), cl::Hidden, cl::desc("Divisor of cold branch probability." "BranchProbability = 1/ColdBranchProbDenom"))
static cl::opt< int > MaxParametersForSplit("hotcoldsplit-max-params", cl::init(4), cl::Hidden, cl::desc("Maximum number of parameters for a split function"))
static InstructionCost getOutliningBenefit(ArrayRef< BasicBlock * > Region, TargetTransformInfo &TTI)
Get the benefit score of outlining Region.
static cl::opt< bool > EnableColdSection("enable-cold-section", cl::init(false), cl::Hidden, cl::desc("Enable placement of extracted cold functions" " into a separate section after hot-cold splitting."))
static cl::opt< bool > EnableStaticAnalysis("hot-cold-static-analysis", cl::init(true), cl::Hidden)
static int getOutliningPenalty(ArrayRef< BasicBlock * > Region, unsigned NumInputs, unsigned NumOutputs)
Get the penalty score for outlining Region.
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
FunctionAnalysisManager FAM
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
This file contains the declarations for profiling metadata utility functions.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
A container for analyses that lazily runs them and caches their results.
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
const Function * getParent() const
Return the enclosing method, or null if none.
bool isEHPad() const
Return true if this basic block is an exception handling block.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
BasicBlock * getSuccessor(unsigned i) const
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
BranchProbability getCompl() const
void setCallingConv(CallingConv::ID CC)
This class represents a function call, abstracting a target machine's calling convention.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
StringRef getSection() const
Get the custom section of this global if it has one.
bool hasSection() const
Check if this global has a custom object file section.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
A Module instance is used to store all the information related to an LLVM module.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
bool dominates(const Instruction *I1, const Instruction *I2) const
Return true if I1 dominates I2.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool isColdBlock(const BBType *BB, BFIT *BFI) const
Returns true if BasicBlock BB is considered cold.
bool isFunctionEntryCold(const Function *F) const
Returns true if F has cold function entry.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Analysis pass providing the TargetTransformInfo.
void dump() const
Support for debugging, callable in GDB: V->dump()
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
bool succ_empty(const Instruction *I)
auto successors(const MachineBasicBlock *BB)
df_iterator< T > df_begin(const T &G)
bool isScopedEHPersonality(EHPersonality Pers)
Returns true if this personality uses scope-style EH IR instructions: catchswitch,...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
idf_iterator< T > idf_end(const T &G)
auto remove_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::remove_if which take ranges instead of having to pass begin/end explicitly.
idf_iterator< T > idf_begin(const T &G)
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
bool pred_empty(const BasicBlock *BB)
df_iterator< T > df_end(const T &G)