60#define DEBUG_TYPE "hotcoldsplit"
62STATISTIC(NumColdRegionsFound,
"Number of cold regions found.");
63STATISTIC(NumColdRegionsOutlined,
"Number of cold regions outlined.");
72 cl::desc(
"Base penalty for splitting cold code (as a "
73 "multiple of TCC_Basic)"));
77 cl::desc(
"Enable placement of extracted cold functions"
78 " into a separate section after hot-cold splitting."));
83 cl::desc(
"Name for the section containing cold functions "
84 "extracted by hot-cold splitting."));
88 cl::desc(
"Maximum number of parameters for a split function"));
92 cl::desc(
"Divisor of cold branch probability."
93 "BranchProbability = 1/ColdBranchProbDenom"));
108 return !(isa<ReturnInst>(
I) || isa<IndirectBrInst>(
I));
123 auto SumWt = TrueWt + FalseWt;
130 if (TrueProb <= ColdProbThresh)
133 if (FalseProb <= ColdProbThresh)
145 if (
auto *CB = dyn_cast<CallBase>(&
I))
146 if (CB->hasFnAttr(Attribute::Cold) &&
147 !CB->getMetadata(LLVMContext::MD_nosanitize))
154 dyn_cast_or_null<CallInst>(BB.getTerminator()->getPrevNode()))
155 if (CI->hasFnAttr(Attribute::NoReturn))
164static bool mayExtractBlock(
const BasicBlock &BB) {
174 return !isa<InvokeInst>(Term) && !isa<ResumeInst>(Term);
181static bool markFunctionCold(
Function &
F,
bool UpdateEntryCount =
false) {
182 assert(!
F.hasOptNone() &&
"Can't mark this cold");
183 bool Changed =
false;
184 if (!
F.hasFnAttribute(Attribute::Cold)) {
185 F.addFnAttr(Attribute::Cold);
188 if (!
F.hasFnAttribute(Attribute::MinSize)) {
189 F.addFnAttr(Attribute::MinSize);
192 if (UpdateEntryCount) {
205bool HotColdSplitting::isFunctionCold(
const Function &
F)
const {
206 if (
F.hasFnAttribute(Attribute::Cold))
218bool HotColdSplitting::isBasicBlockCold(
BasicBlock *BB,
224 if (ColdBlocks.
count(BB))
232 analyzeProfMetadata(BB, ColdProbThresh, AnnotatedColdBlocks);
236 if (AnnotatedColdBlocks.
count(BB))
248bool HotColdSplitting::shouldOutlineFrom(
const Function &
F)
const {
249 if (
F.hasFnAttribute(Attribute::AlwaysInline))
252 if (
F.hasFnAttribute(Attribute::NoInline))
257 if (
F.hasFnAttribute(Attribute::NoReturn))
260 if (
F.hasFnAttribute(Attribute::SanitizeAddress) ||
261 F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
262 F.hasFnAttribute(Attribute::SanitizeThread) ||
263 F.hasFnAttribute(Attribute::SanitizeMemory))
286 unsigned NumInputs,
unsigned NumOutputs) {
288 LLVM_DEBUG(
dbgs() <<
"Applying penalty for splitting: " << Penalty <<
"\n");
297 bool NoBlocksReturn =
true;
309 NoBlocksReturn =
false;
310 SuccsOutsideRegion.
insert(SuccBB);
320 unsigned NumSplitExitPhis = 0;
321 for (
BasicBlock *ExitBB : SuccsOutsideRegion) {
322 for (
PHINode &PN : ExitBB->phis()) {
324 int NumIncomingVals = 0;
325 for (
unsigned i = 0; i < PN.getNumIncomingValues(); ++i)
328 if (NumIncomingVals > 1) {
338 int NumOutputsAndSplitPhis = NumOutputs + NumSplitExitPhis;
339 int NumParams = NumInputs + NumOutputsAndSplitPhis;
341 LLVM_DEBUG(
dbgs() << NumInputs <<
" inputs and " << NumOutputsAndSplitPhis
342 <<
" outputs exceeds parameter limit ("
344 return std::numeric_limits<int>::max();
347 LLVM_DEBUG(
dbgs() <<
"Applying penalty for: " << NumParams <<
" params\n");
348 Penalty += CostForArgMaterialization * NumParams;
352 LLVM_DEBUG(
dbgs() <<
"Applying penalty for: " << NumOutputsAndSplitPhis
353 <<
" outputs/split phis\n");
355 Penalty += CostForRegionOutput * NumOutputsAndSplitPhis;
358 if (NoBlocksReturn) {
360 <<
" non-returning terminators\n");
366 if (SuccsOutsideRegion.
size() > 1) {
368 <<
" non-region successors\n");
375Function *HotColdSplitting::extractColdRegion(
385 "cold." + std::to_string(Count));
390 CE.findInputsOutputs(Inputs, Outputs, Sinks);
392 int OutliningPenalty =
394 LLVM_DEBUG(
dbgs() <<
"Split profitability: benefit = " << OutliningBenefit
395 <<
", penalty = " << OutliningPenalty <<
"\n");
396 if (!OutliningBenefit.
isValid() || OutliningBenefit <= OutliningPenalty)
400 if (
Function *OutF =
CE.extractCodeRegion(CEAC)) {
401 User *
U = *OutF->user_begin();
403 NumColdRegionsOutlined++;
417 markFunctionCold(*OutF, BFI !=
nullptr);
423 <<
ore::NV(
"Original", OrigF) <<
" split cold code into "
432 <<
"Failed to extract region at block "
439using BlockTy = std::pair<BasicBlock *, unsigned>;
446class OutliningRegion {
458 bool EntireFunctionCold =
false;
461 static unsigned getEntryPointScore(
BasicBlock &BB,
unsigned Score) {
462 return mayExtractBlock(BB) ? Score : 0;
467 static constexpr unsigned ScoreForSuccBlock = 1;
468 static constexpr unsigned ScoreForSinkBlock = 1;
470 OutliningRegion(
const OutliningRegion &) =
delete;
471 OutliningRegion &operator=(
const OutliningRegion &) =
delete;
474 OutliningRegion() =
default;
475 OutliningRegion(OutliningRegion &&) =
default;
476 OutliningRegion &operator=(OutliningRegion &&) =
default;
478 static std::vector<OutliningRegion> create(
BasicBlock &SinkBB,
481 std::vector<OutliningRegion> Regions;
484 Regions.emplace_back();
485 OutliningRegion *ColdRegion = &Regions.back();
487 auto addBlockToRegion = [&](
BasicBlock *BB,
unsigned Score) {
489 ColdRegion->Blocks.emplace_back(BB, Score);
493 unsigned SinkScore = getEntryPointScore(SinkBB, ScoreForSinkBlock);
494 ColdRegion->SuggestedEntryPoint = (SinkScore > 0) ? &SinkBB :
nullptr;
495 unsigned BestScore = SinkScore;
499 auto PredEnd =
idf_end(&SinkBB);
500 while (PredIt != PredEnd) {
502 bool SinkPostDom = PDT.
dominates(&SinkBB, &PredBB);
507 ColdRegion->EntireFunctionCold =
true;
513 if (!SinkPostDom || !mayExtractBlock(PredBB)) {
514 PredIt.skipChildren();
521 unsigned PredScore = getEntryPointScore(PredBB, PredIt.getPathLength());
522 if (PredScore > BestScore) {
523 ColdRegion->SuggestedEntryPoint = &PredBB;
524 BestScore = PredScore;
527 addBlockToRegion(&PredBB, PredScore);
537 if (mayExtractBlock(SinkBB)) {
538 addBlockToRegion(&SinkBB, SinkScore);
540 ColdRegion->EntireFunctionCold =
true;
544 Regions.emplace_back();
545 ColdRegion = &Regions.back();
551 auto SuccEnd =
df_end(&SinkBB);
552 while (SuccIt != SuccEnd) {
554 bool SinkDom = DT.
dominates(&SinkBB, &SuccBB);
557 bool DuplicateBlock = RegionBlocks.
count(&SuccBB);
561 if (DuplicateBlock || !SinkDom || !mayExtractBlock(SuccBB)) {
562 SuccIt.skipChildren();
566 unsigned SuccScore = getEntryPointScore(SuccBB, ScoreForSuccBlock);
567 if (SuccScore > BestScore) {
568 ColdRegion->SuggestedEntryPoint = &SuccBB;
569 BestScore = SuccScore;
572 addBlockToRegion(&SuccBB, SuccScore);
580 bool empty()
const {
return !SuggestedEntryPoint; }
586 bool isEntireFunctionCold()
const {
return EntireFunctionCold; }
590 assert(!empty() && !isEntireFunctionCold() &&
"Nothing to extract");
597 unsigned NextScore = 0;
598 auto RegionEndIt =
Blocks.end();
601 unsigned Score =
Block.second;
603 BB == SuggestedEntryPoint || DT.
dominates(SuggestedEntryPoint, BB);
604 if (!InSubRegion && Score > NextScore) {
608 if (InSubRegion && BB != SuggestedEntryPoint)
612 Blocks.erase(RegionStartIt, RegionEndIt);
615 SuggestedEntryPoint = NextEntryPoint;
622bool HotColdSplitting::outlineColdRegions(
Function &
F,
bool HasProfileSummary) {
623 bool Changed =
false;
640 std::unique_ptr<DominatorTree> DT;
641 std::unique_ptr<PostDominatorTree> PDT;
647 if (HasProfileSummary)
660 if (!isBasicBlockCold(BB, ColdProbThresh, ColdBlocks, AnnotatedColdBlocks,
665 dbgs() <<
"Found a cold block:\n";
670 DT = std::make_unique<DominatorTree>(
F);
672 PDT = std::make_unique<PostDominatorTree>(
F);
674 auto Regions = OutliningRegion::create(*BB, *DT, *PDT);
675 for (OutliningRegion &
Region : Regions) {
679 if (
Region.isEntireFunctionCold()) {
681 return markFunctionCold(
F);
690 return !ColdBlocks.insert(Block.first).second;
696 ++NumColdRegionsFound;
700 if (OutliningWorklist.
empty())
704 unsigned OutlinedFunctionID = 1;
709 assert(!
Region.empty() &&
"Empty outlining region in worklist");
713 dbgs() <<
"Hot/cold splitting attempting to outline these blocks:\n";
718 Function *Outlined = extractColdRegion(SubRegion, CEAC, *DT, BFI,
TTI,
719 ORE, AC, OutlinedFunctionID);
721 ++OutlinedFunctionID;
724 }
while (!
Region.empty());
725 }
while (!OutliningWorklist.
empty());
731 bool Changed =
false;
732 bool HasProfileSummary = (M.getProfileSummary(
false) !=
nullptr);
735 if (
F.isDeclaration())
743 if (isFunctionCold(
F)) {
744 Changed |= markFunctionCold(
F);
748 if (!shouldOutlineFrom(
F)) {
754 Changed |= outlineColdRegions(
F, HasProfileSummary);
776 std::unique_ptr<OptimizationRemarkEmitter> ORE;
bbsections Prepares for basic block by splitting functions into clusters of basic blocks
static bool blockEndsInUnreachable(const MachineBasicBlock *MBB)
A no successor, non-return block probably ends in unreachable and is cold.
DenseMap< Block *, BlockRelaxAux > Blocks
std::pair< BasicBlock *, unsigned > BlockTy
A pair of (basic block, score).
static cl::opt< int > SplittingThreshold("hotcoldsplit-threshold", cl::init(2), cl::Hidden, cl::desc("Base penalty for splitting cold code (as a " "multiple of TCC_Basic)"))
static cl::opt< std::string > ColdSectionName("hotcoldsplit-cold-section-name", cl::init("__llvm_cold"), cl::Hidden, cl::desc("Name for the section containing cold functions " "extracted by hot-cold splitting."))
static cl::opt< int > ColdBranchProbDenom("hotcoldsplit-cold-probability-denom", cl::init(100), cl::Hidden, cl::desc("Divisor of cold branch probability." "BranchProbability = 1/ColdBranchProbDenom"))
static cl::opt< int > MaxParametersForSplit("hotcoldsplit-max-params", cl::init(4), cl::Hidden, cl::desc("Maximum number of parameters for a split function"))
static InstructionCost getOutliningBenefit(ArrayRef< BasicBlock * > Region, TargetTransformInfo &TTI)
Get the benefit score of outlining Region.
static cl::opt< bool > EnableColdSection("enable-cold-section", cl::init(false), cl::Hidden, cl::desc("Enable placement of extracted cold functions" " into a separate section after hot-cold splitting."))
static cl::opt< bool > EnableStaticAnalysis("hot-cold-static-analysis", cl::init(true), cl::Hidden)
static int getOutliningPenalty(ArrayRef< BasicBlock * > Region, unsigned NumInputs, unsigned NumOutputs)
Get the penalty score for outlining Region.
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
FunctionAnalysisManager FAM
This header defines various interfaces for pass management in LLVM.
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
This file contains the declarations for profiling metadata utility functions.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
A container for analyses that lazily runs them and caches their results.
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
bool isEHPad() const
Return true if this basic block is an exception handling block.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
BasicBlock * getSuccessor(unsigned i) const
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
BranchProbability getCompl() const
void setCallingConv(CallingConv::ID CC)
This class represents a function call, abstracting a target machine's calling convention.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
StringRef getSection() const
Get the custom section of this global if it has one.
bool hasSection() const
Check if this global has a custom object file section.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
A Module instance is used to store all the information related to an LLVM module.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
bool dominates(const Instruction *I1, const Instruction *I2) const
Return true if I1 dominates I2.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool isColdBlock(const BBType *BB, BFIT *BFI) const
Returns true if BasicBlock BB is considered cold.
bool isFunctionEntryCold(const Function *F) const
Returns true if F has cold function entry.
block_range blocks()
Returns a range view of the basic blocks in the region.
RegionT * getParent() const
Get the parent of the Region.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Analysis pass providing the TargetTransformInfo.
void dump() const
Support for debugging, callable in GDB: V->dump()
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
DiagnosticInfoOptimizationBase::Argument NV
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
This is an optimization pass for GlobalISel generic memory operations.
bool succ_empty(const Instruction *I)
auto successors(const MachineBasicBlock *BB)
df_iterator< T > df_begin(const T &G)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
idf_iterator< T > idf_end(const T &G)
auto remove_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::remove_if which take ranges instead of having to pass begin/end explicitly.
idf_iterator< T > idf_begin(const T &G)
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
bool pred_empty(const BasicBlock *BB)
df_iterator< T > df_end(const T &G)