61#define DEBUG_TYPE "hotcoldsplit"
63STATISTIC(NumColdRegionsFound,
"Number of cold regions found.");
64STATISTIC(NumColdRegionsOutlined,
"Number of cold regions outlined.");
73 cl::desc(
"Base penalty for splitting cold code (as a "
74 "multiple of TCC_Basic)"));
78 cl::desc(
"Enable placement of extracted cold functions"
79 " into a separate section after hot-cold splitting."));
84 cl::desc(
"Name for the section containing cold functions "
85 "extracted by hot-cold splitting."));
89 cl::desc(
"Maximum number of parameters for a split function"));
93 cl::desc(
"Divisor of cold branch probability."
94 "BranchProbability = 1/ColdBranchProbDenom"));
109 return !(isa<ReturnInst>(
I) || isa<IndirectBrInst>(
I));
124 auto SumWt = TrueWt + FalseWt;
131 if (TrueProb <= ColdProbThresh)
134 if (FalseProb <= ColdProbThresh)
146 if (
auto *CB = dyn_cast<CallBase>(&
I))
147 if (CB->hasFnAttr(Attribute::Cold) &&
148 !CB->getMetadata(LLVMContext::MD_nosanitize))
155 dyn_cast_or_null<CallInst>(BB.getTerminator()->getPrevNode()))
156 if (CI->hasFnAttr(Attribute::NoReturn))
165static bool mayExtractBlock(
const BasicBlock &BB) {
176 if (isa<InvokeInst>(Term) || isa<ResumeInst>(Term))
185 BB, [](
const Instruction &
I) {
return I.getType()->isTokenTy(); })) {
196static bool markFunctionCold(
Function &
F,
bool UpdateEntryCount =
false) {
197 assert(!
F.hasOptNone() &&
"Can't mark this cold");
198 bool Changed =
false;
199 if (!
F.hasFnAttribute(Attribute::Cold)) {
200 F.addFnAttr(Attribute::Cold);
203 if (!
F.hasFnAttribute(Attribute::MinSize)) {
204 F.addFnAttr(Attribute::MinSize);
207 if (UpdateEntryCount) {
220bool HotColdSplitting::isFunctionCold(
const Function &
F)
const {
221 if (
F.hasFnAttribute(Attribute::Cold))
233bool HotColdSplitting::isBasicBlockCold(
242 analyzeProfMetadata(BB, ColdProbThresh, AnnotatedColdBlocks);
246 if (AnnotatedColdBlocks.
count(BB))
258bool HotColdSplitting::shouldOutlineFrom(
const Function &
F)
const {
259 if (
F.hasFnAttribute(Attribute::AlwaysInline))
262 if (
F.hasFnAttribute(Attribute::NoInline))
267 if (
F.hasFnAttribute(Attribute::NoReturn))
270 if (
F.hasFnAttribute(Attribute::SanitizeAddress) ||
271 F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
272 F.hasFnAttribute(Attribute::SanitizeThread) ||
273 F.hasFnAttribute(Attribute::SanitizeMemory))
277 if (
F.hasPersonalityFn())
301 unsigned NumInputs,
unsigned NumOutputs) {
303 LLVM_DEBUG(
dbgs() <<
"Applying penalty for splitting: " << Penalty <<
"\n");
312 bool NoBlocksReturn =
true;
324 NoBlocksReturn =
false;
325 SuccsOutsideRegion.
insert(SuccBB);
335 unsigned NumSplitExitPhis = 0;
336 for (
BasicBlock *ExitBB : SuccsOutsideRegion) {
337 for (
PHINode &PN : ExitBB->phis()) {
339 int NumIncomingVals = 0;
340 for (
unsigned i = 0; i < PN.getNumIncomingValues(); ++i)
343 if (NumIncomingVals > 1) {
353 int NumOutputsAndSplitPhis = NumOutputs + NumSplitExitPhis;
354 int NumParams = NumInputs + NumOutputsAndSplitPhis;
356 LLVM_DEBUG(
dbgs() << NumInputs <<
" inputs and " << NumOutputsAndSplitPhis
357 <<
" outputs exceeds parameter limit ("
359 return std::numeric_limits<int>::max();
362 LLVM_DEBUG(
dbgs() <<
"Applying penalty for: " << NumParams <<
" params\n");
363 Penalty += CostForArgMaterialization * NumParams;
367 LLVM_DEBUG(
dbgs() <<
"Applying penalty for: " << NumOutputsAndSplitPhis
368 <<
" outputs/split phis\n");
370 Penalty += CostForRegionOutput * NumOutputsAndSplitPhis;
373 if (NoBlocksReturn) {
375 <<
" non-returning terminators\n");
381 if (SuccsOutsideRegion.
size() > 1) {
383 <<
" non-region successors\n");
391bool HotColdSplitting::isSplittingBeneficial(
CodeExtractor &CE,
399 CE.findInputsOutputs(Inputs, Outputs, Sinks);
401 int OutliningPenalty =
403 LLVM_DEBUG(
dbgs() <<
"Split profitability: benefit = " << OutliningBenefit
404 <<
", penalty = " << OutliningPenalty <<
"\n");
405 if (!OutliningBenefit.
isValid() || OutliningBenefit <= OutliningPenalty)
413Function *HotColdSplitting::extractColdRegion(
418 if (
Function *OutF =
CE.extractCodeRegion(CEAC)) {
419 User *
U = *OutF->user_begin();
421 NumColdRegionsOutlined++;
435 markFunctionCold(*OutF, BFI !=
nullptr);
440 &*EntryPoint.
begin())
441 <<
ore::NV(
"Original", OrigF) <<
" split cold code into "
449 &*EntryPoint.
begin())
450 <<
"Failed to extract region at block "
451 <<
ore::NV(
"Block", &EntryPoint);
457using BlockTy = std::pair<BasicBlock *, unsigned>;
464class OutliningRegion {
476 bool EntireFunctionCold =
false;
479 static unsigned getEntryPointScore(
BasicBlock &BB,
unsigned Score) {
480 return mayExtractBlock(BB) ? Score : 0;
485 static constexpr unsigned ScoreForSuccBlock = 1;
486 static constexpr unsigned ScoreForSinkBlock = 1;
488 OutliningRegion(
const OutliningRegion &) =
delete;
489 OutliningRegion &operator=(
const OutliningRegion &) =
delete;
492 OutliningRegion() =
default;
493 OutliningRegion(OutliningRegion &&) =
default;
494 OutliningRegion &operator=(OutliningRegion &&) =
default;
496 static std::vector<OutliningRegion> create(
BasicBlock &SinkBB,
499 std::vector<OutliningRegion> Regions;
502 Regions.emplace_back();
503 OutliningRegion *ColdRegion = &Regions.back();
505 auto addBlockToRegion = [&](
BasicBlock *BB,
unsigned Score) {
507 ColdRegion->Blocks.emplace_back(BB, Score);
511 unsigned SinkScore = getEntryPointScore(SinkBB, ScoreForSinkBlock);
512 ColdRegion->SuggestedEntryPoint = (SinkScore > 0) ? &SinkBB :
nullptr;
513 unsigned BestScore = SinkScore;
517 auto PredEnd =
idf_end(&SinkBB);
518 while (PredIt != PredEnd) {
520 bool SinkPostDom = PDT.
dominates(&SinkBB, &PredBB);
525 ColdRegion->EntireFunctionCold =
true;
531 if (!SinkPostDom || !mayExtractBlock(PredBB)) {
532 PredIt.skipChildren();
539 unsigned PredScore = getEntryPointScore(PredBB, PredIt.getPathLength());
540 if (PredScore > BestScore) {
541 ColdRegion->SuggestedEntryPoint = &PredBB;
542 BestScore = PredScore;
545 addBlockToRegion(&PredBB, PredScore);
555 if (mayExtractBlock(SinkBB)) {
556 addBlockToRegion(&SinkBB, SinkScore);
558 ColdRegion->EntireFunctionCold =
true;
562 Regions.emplace_back();
563 ColdRegion = &Regions.back();
569 auto SuccEnd =
df_end(&SinkBB);
570 while (SuccIt != SuccEnd) {
572 bool SinkDom = DT.
dominates(&SinkBB, &SuccBB);
575 bool DuplicateBlock = RegionBlocks.
count(&SuccBB);
579 if (DuplicateBlock || !SinkDom || !mayExtractBlock(SuccBB)) {
580 SuccIt.skipChildren();
584 unsigned SuccScore = getEntryPointScore(SuccBB, ScoreForSuccBlock);
585 if (SuccScore > BestScore) {
586 ColdRegion->SuggestedEntryPoint = &SuccBB;
587 BestScore = SuccScore;
590 addBlockToRegion(&SuccBB, SuccScore);
598 bool empty()
const {
return !SuggestedEntryPoint; }
604 bool isEntireFunctionCold()
const {
return EntireFunctionCold; }
608 assert(!empty() && !isEntireFunctionCold() &&
"Nothing to extract");
615 unsigned NextScore = 0;
616 auto RegionEndIt =
Blocks.end();
619 unsigned Score =
Block.second;
621 BB == SuggestedEntryPoint || DT.
dominates(SuggestedEntryPoint, BB);
622 if (!InSubRegion && Score > NextScore) {
626 if (InSubRegion && BB != SuggestedEntryPoint)
630 Blocks.erase(RegionStartIt, RegionEndIt);
633 SuggestedEntryPoint = NextEntryPoint;
640bool HotColdSplitting::outlineColdRegions(
Function &
F,
bool HasProfileSummary) {
660 std::unique_ptr<DominatorTree> DT;
661 std::unique_ptr<PostDominatorTree> PDT;
667 if (HasProfileSummary)
678 unsigned OutlinedFunctionID = 1;
682 if (ColdBlocks.
count(BB))
686 if (CannotBeOutlinedColdBlocks.
count(BB))
689 if (!isBasicBlockCold(BB, ColdProbThresh, AnnotatedColdBlocks, BFI))
693 dbgs() <<
"Found a cold block:\n";
698 DT = std::make_unique<DominatorTree>(
F);
700 PDT = std::make_unique<PostDominatorTree>(
F);
702 auto Regions = OutliningRegion::create(*BB, *DT, *PDT);
703 for (OutliningRegion &
Region : Regions) {
707 if (
Region.isEntireFunctionCold()) {
709 return markFunctionCold(
F);
715 dbgs() <<
"Hot/cold splitting attempting to outline these blocks:\n";
722 SubRegion, &*DT,
false,
nullptr,
725 "cold." + std::to_string(OutlinedFunctionID));
727 if (
CE.isEligible() && isSplittingBeneficial(CE, SubRegion,
TTI) &&
737 ColdBlocks.
insert(SubRegion.begin(), SubRegion.end());
740 for (
auto *
Block : SubRegion)
741 dbgs() <<
" contains cold block:" <<
Block->getName() <<
"\n";
745 std::make_pair(SubRegion[0], std::move(CE)));
746 ++OutlinedFunctionID;
749 for (
auto *
Block : SubRegion)
750 if ((DT->dominates(BB,
Block) && PDT->dominates(
Block, BB)) ||
751 (PDT->dominates(BB,
Block) && DT->dominates(
Block, BB)))
755 }
while (!
Region.empty());
757 ++NumColdRegionsFound;
761 if (OutliningWorklist.
empty())
767 for (
auto &BCE : OutliningWorklist) {
769 extractColdRegion(*BCE.first, BCE.second, CEAC, BFI,
TTI, ORE);
770 assert(Outlined &&
"Should be outlined");
778 bool Changed =
false;
779 bool HasProfileSummary = (M.getProfileSummary(
false) !=
nullptr);
782 if (
F.isDeclaration())
790 if (isFunctionCold(
F)) {
791 Changed |= markFunctionCold(
F);
795 if (!shouldOutlineFrom(
F)) {
801 Changed |= outlineColdRegions(
F, HasProfileSummary);
823 std::unique_ptr<OptimizationRemarkEmitter> ORE;
bbsections Prepares for basic block by splitting functions into clusters of basic blocks
static bool blockEndsInUnreachable(const MachineBasicBlock *MBB)
A no successor, non-return block probably ends in unreachable and is cold.
DenseMap< Block *, BlockRelaxAux > Blocks
std::pair< BasicBlock *, unsigned > BlockTy
A pair of (basic block, score).
static cl::opt< int > SplittingThreshold("hotcoldsplit-threshold", cl::init(2), cl::Hidden, cl::desc("Base penalty for splitting cold code (as a " "multiple of TCC_Basic)"))
static cl::opt< std::string > ColdSectionName("hotcoldsplit-cold-section-name", cl::init("__llvm_cold"), cl::Hidden, cl::desc("Name for the section containing cold functions " "extracted by hot-cold splitting."))
static cl::opt< int > ColdBranchProbDenom("hotcoldsplit-cold-probability-denom", cl::init(100), cl::Hidden, cl::desc("Divisor of cold branch probability." "BranchProbability = 1/ColdBranchProbDenom"))
static cl::opt< int > MaxParametersForSplit("hotcoldsplit-max-params", cl::init(4), cl::Hidden, cl::desc("Maximum number of parameters for a split function"))
static InstructionCost getOutliningBenefit(ArrayRef< BasicBlock * > Region, TargetTransformInfo &TTI)
Get the benefit score of outlining Region.
static cl::opt< bool > EnableColdSection("enable-cold-section", cl::init(false), cl::Hidden, cl::desc("Enable placement of extracted cold functions" " into a separate section after hot-cold splitting."))
static cl::opt< bool > EnableStaticAnalysis("hot-cold-static-analysis", cl::init(true), cl::Hidden)
static int getOutliningPenalty(ArrayRef< BasicBlock * > Region, unsigned NumInputs, unsigned NumOutputs)
Get the penalty score for outlining Region.
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
FunctionAnalysisManager FAM
This header defines various interfaces for pass management in LLVM.
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
This file contains the declarations for profiling metadata utility functions.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
A container for analyses that lazily runs them and caches their results.
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
const Function * getParent() const
Return the enclosing method, or null if none.
bool isEHPad() const
Return true if this basic block is an exception handling block.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
BasicBlock * getSuccessor(unsigned i) const
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
BranchProbability getCompl() const
void setCallingConv(CallingConv::ID CC)
This class represents a function call, abstracting a target machine's calling convention.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
StringRef getSection() const
Get the custom section of this global if it has one.
bool hasSection() const
Check if this global has a custom object file section.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
A Module instance is used to store all the information related to an LLVM module.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
bool dominates(const Instruction *I1, const Instruction *I2) const
Return true if I1 dominates I2.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool isColdBlock(const BBType *BB, BFIT *BFI) const
Returns true if BasicBlock BB is considered cold.
bool isFunctionEntryCold(const Function *F) const
Returns true if F has cold function entry.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Analysis pass providing the TargetTransformInfo.
void dump() const
Support for debugging, callable in GDB: V->dump()
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
bool succ_empty(const Instruction *I)
auto successors(const MachineBasicBlock *BB)
df_iterator< T > df_begin(const T &G)
bool isScopedEHPersonality(EHPersonality Pers)
Returns true if this personality uses scope-style EH IR instructions: catchswitch,...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
idf_iterator< T > idf_end(const T &G)
auto remove_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::remove_if which take ranges instead of having to pass begin/end explicitly.
idf_iterator< T > idf_begin(const T &G)
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
bool pred_empty(const BasicBlock *BB)
df_iterator< T > df_end(const T &G)