59#define DEBUG_TYPE "hotcoldsplit"
61STATISTIC(NumColdRegionsFound,
"Number of cold regions found.");
62STATISTIC(NumColdRegionsOutlined,
"Number of cold regions outlined.");
71 cl::desc(
"Base penalty for splitting cold code (as a "
72 "multiple of TCC_Basic)"));
76 cl::desc(
"Enable placement of extracted cold functions"
77 " into a separate section after hot-cold splitting."));
82 cl::desc(
"Name for the section containing cold functions "
83 "extracted by hot-cold splitting."));
87 cl::desc(
"Maximum number of parameters for a split function"));
102 return !(isa<ReturnInst>(
I) || isa<IndirectBrInst>(
I));
113 if (
auto *CB = dyn_cast<CallBase>(&
I))
114 if (CB->hasFnAttr(Attribute::Cold) &&
115 !CB->getMetadata(LLVMContext::MD_nosanitize))
122 dyn_cast_or_null<CallInst>(BB.getTerminator()->getPrevNode()))
123 if (CI->hasFnAttr(Attribute::NoReturn))
132static bool mayExtractBlock(
const BasicBlock &BB) {
142 return !isa<InvokeInst>(Term) && !isa<ResumeInst>(Term);
149static bool markFunctionCold(
Function &
F,
bool UpdateEntryCount =
false) {
150 assert(!
F.hasOptNone() &&
"Can't mark this cold");
151 bool Changed =
false;
152 if (!
F.hasFnAttribute(Attribute::Cold)) {
153 F.addFnAttr(Attribute::Cold);
156 if (!
F.hasFnAttribute(Attribute::MinSize)) {
157 F.addFnAttr(Attribute::MinSize);
160 if (UpdateEntryCount) {
173bool HotColdSplitting::isFunctionCold(
const Function &
F)
const {
174 if (
F.hasFnAttribute(Attribute::Cold))
188bool HotColdSplitting::shouldOutlineFrom(
const Function &
F)
const {
189 if (
F.hasFnAttribute(Attribute::AlwaysInline))
192 if (
F.hasFnAttribute(Attribute::NoInline))
197 if (
F.hasFnAttribute(Attribute::NoReturn))
200 if (
F.hasFnAttribute(Attribute::SanitizeAddress) ||
201 F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
202 F.hasFnAttribute(Attribute::SanitizeThread) ||
203 F.hasFnAttribute(Attribute::SanitizeMemory))
226 unsigned NumInputs,
unsigned NumOutputs) {
228 LLVM_DEBUG(
dbgs() <<
"Applying penalty for splitting: " << Penalty <<
"\n");
237 bool NoBlocksReturn =
true;
249 NoBlocksReturn =
false;
250 SuccsOutsideRegion.
insert(SuccBB);
260 unsigned NumSplitExitPhis = 0;
261 for (
BasicBlock *ExitBB : SuccsOutsideRegion) {
262 for (
PHINode &PN : ExitBB->phis()) {
264 int NumIncomingVals = 0;
265 for (
unsigned i = 0; i < PN.getNumIncomingValues(); ++i)
268 if (NumIncomingVals > 1) {
278 int NumOutputsAndSplitPhis = NumOutputs + NumSplitExitPhis;
279 int NumParams = NumInputs + NumOutputsAndSplitPhis;
281 LLVM_DEBUG(
dbgs() << NumInputs <<
" inputs and " << NumOutputsAndSplitPhis
282 <<
" outputs exceeds parameter limit ("
284 return std::numeric_limits<int>::max();
287 LLVM_DEBUG(
dbgs() <<
"Applying penalty for: " << NumParams <<
" params\n");
288 Penalty += CostForArgMaterialization * NumParams;
292 LLVM_DEBUG(
dbgs() <<
"Applying penalty for: " << NumOutputsAndSplitPhis
293 <<
" outputs/split phis\n");
295 Penalty += CostForRegionOutput * NumOutputsAndSplitPhis;
298 if (NoBlocksReturn) {
300 <<
" non-returning terminators\n");
306 if (SuccsOutsideRegion.
size() > 1) {
308 <<
" non-region successors\n");
315Function *HotColdSplitting::extractColdRegion(
325 "cold." + std::to_string(Count));
330 CE.findInputsOutputs(Inputs, Outputs, Sinks);
332 int OutliningPenalty =
334 LLVM_DEBUG(
dbgs() <<
"Split profitability: benefit = " << OutliningBenefit
335 <<
", penalty = " << OutliningPenalty <<
"\n");
336 if (!OutliningBenefit.
isValid() || OutliningBenefit <= OutliningPenalty)
340 if (
Function *OutF =
CE.extractCodeRegion(CEAC)) {
341 User *
U = *OutF->user_begin();
343 NumColdRegionsOutlined++;
357 markFunctionCold(*OutF, BFI !=
nullptr);
363 <<
ore::NV(
"Original", OrigF) <<
" split cold code into "
372 <<
"Failed to extract region at block "
379using BlockTy = std::pair<BasicBlock *, unsigned>;
386class OutliningRegion {
398 bool EntireFunctionCold =
false;
401 static unsigned getEntryPointScore(
BasicBlock &BB,
unsigned Score) {
402 return mayExtractBlock(BB) ? Score : 0;
407 static constexpr unsigned ScoreForSuccBlock = 1;
408 static constexpr unsigned ScoreForSinkBlock = 1;
410 OutliningRegion(
const OutliningRegion &) =
delete;
411 OutliningRegion &operator=(
const OutliningRegion &) =
delete;
414 OutliningRegion() =
default;
415 OutliningRegion(OutliningRegion &&) =
default;
416 OutliningRegion &operator=(OutliningRegion &&) =
default;
418 static std::vector<OutliningRegion> create(
BasicBlock &SinkBB,
421 std::vector<OutliningRegion> Regions;
424 Regions.emplace_back();
425 OutliningRegion *ColdRegion = &Regions.back();
427 auto addBlockToRegion = [&](
BasicBlock *BB,
unsigned Score) {
429 ColdRegion->Blocks.emplace_back(BB, Score);
433 unsigned SinkScore = getEntryPointScore(SinkBB, ScoreForSinkBlock);
434 ColdRegion->SuggestedEntryPoint = (SinkScore > 0) ? &SinkBB :
nullptr;
435 unsigned BestScore = SinkScore;
439 auto PredEnd =
idf_end(&SinkBB);
440 while (PredIt != PredEnd) {
442 bool SinkPostDom = PDT.
dominates(&SinkBB, &PredBB);
447 ColdRegion->EntireFunctionCold =
true;
453 if (!SinkPostDom || !mayExtractBlock(PredBB)) {
454 PredIt.skipChildren();
461 unsigned PredScore = getEntryPointScore(PredBB, PredIt.getPathLength());
462 if (PredScore > BestScore) {
463 ColdRegion->SuggestedEntryPoint = &PredBB;
464 BestScore = PredScore;
467 addBlockToRegion(&PredBB, PredScore);
477 if (mayExtractBlock(SinkBB)) {
478 addBlockToRegion(&SinkBB, SinkScore);
480 ColdRegion->EntireFunctionCold =
true;
484 Regions.emplace_back();
485 ColdRegion = &Regions.back();
491 auto SuccEnd =
df_end(&SinkBB);
492 while (SuccIt != SuccEnd) {
494 bool SinkDom = DT.
dominates(&SinkBB, &SuccBB);
497 bool DuplicateBlock = RegionBlocks.
count(&SuccBB);
501 if (DuplicateBlock || !SinkDom || !mayExtractBlock(SuccBB)) {
502 SuccIt.skipChildren();
506 unsigned SuccScore = getEntryPointScore(SuccBB, ScoreForSuccBlock);
507 if (SuccScore > BestScore) {
508 ColdRegion->SuggestedEntryPoint = &SuccBB;
509 BestScore = SuccScore;
512 addBlockToRegion(&SuccBB, SuccScore);
520 bool empty()
const {
return !SuggestedEntryPoint; }
526 bool isEntireFunctionCold()
const {
return EntireFunctionCold; }
530 assert(!empty() && !isEntireFunctionCold() &&
"Nothing to extract");
537 unsigned NextScore = 0;
538 auto RegionEndIt =
Blocks.end();
541 unsigned Score = Block.second;
543 BB == SuggestedEntryPoint || DT.
dominates(SuggestedEntryPoint, BB);
544 if (!InSubRegion && Score > NextScore) {
548 if (InSubRegion && BB != SuggestedEntryPoint)
552 Blocks.erase(RegionStartIt, RegionEndIt);
555 SuggestedEntryPoint = NextEntryPoint;
562bool HotColdSplitting::outlineColdRegions(
Function &
F,
bool HasProfileSummary) {
563 bool Changed =
false;
577 std::unique_ptr<DominatorTree> DT;
578 std::unique_ptr<PostDominatorTree> PDT;
584 if (HasProfileSummary)
594 if (ColdBlocks.
count(BB))
603 dbgs() <<
"Found a cold block:\n";
608 DT = std::make_unique<DominatorTree>(
F);
610 PDT = std::make_unique<PostDominatorTree>(
F);
612 auto Regions = OutliningRegion::create(*BB, *DT, *PDT);
613 for (OutliningRegion &
Region : Regions) {
617 if (
Region.isEntireFunctionCold()) {
619 return markFunctionCold(
F);
628 return !ColdBlocks.insert(Block.first).second;
634 ++NumColdRegionsFound;
638 if (OutliningWorklist.
empty())
642 unsigned OutlinedFunctionID = 1;
647 assert(!
Region.empty() &&
"Empty outlining region in worklist");
651 dbgs() <<
"Hot/cold splitting attempting to outline these blocks:\n";
656 Function *Outlined = extractColdRegion(SubRegion, CEAC, *DT, BFI,
TTI,
657 ORE, AC, OutlinedFunctionID);
659 ++OutlinedFunctionID;
662 }
while (!
Region.empty());
663 }
while (!OutliningWorklist.
empty());
669 bool Changed =
false;
670 bool HasProfileSummary = (M.getProfileSummary(
false) !=
nullptr);
673 if (
F.isDeclaration())
681 if (isFunctionCold(
F)) {
682 Changed |= markFunctionCold(
F);
686 if (!shouldOutlineFrom(
F)) {
692 Changed |= outlineColdRegions(
F, HasProfileSummary);
714 std::unique_ptr<OptimizationRemarkEmitter> ORE;
static bool blockEndsInUnreachable(const MachineBasicBlock *MBB)
A no successor, non-return block probably ends in unreachable and is cold.
DenseMap< Block *, BlockRelaxAux > Blocks
std::pair< BasicBlock *, unsigned > BlockTy
A pair of (basic block, score).
static cl::opt< int > SplittingThreshold("hotcoldsplit-threshold", cl::init(2), cl::Hidden, cl::desc("Base penalty for splitting cold code (as a " "multiple of TCC_Basic)"))
static cl::opt< std::string > ColdSectionName("hotcoldsplit-cold-section-name", cl::init("__llvm_cold"), cl::Hidden, cl::desc("Name for the section containing cold functions " "extracted by hot-cold splitting."))
static cl::opt< int > MaxParametersForSplit("hotcoldsplit-max-params", cl::init(4), cl::Hidden, cl::desc("Maximum number of parameters for a split function"))
static InstructionCost getOutliningBenefit(ArrayRef< BasicBlock * > Region, TargetTransformInfo &TTI)
Get the benefit score of outlining Region.
static cl::opt< bool > EnableColdSection("enable-cold-section", cl::init(false), cl::Hidden, cl::desc("Enable placement of extracted cold functions" " into a separate section after hot-cold splitting."))
static cl::opt< bool > EnableStaticAnalysis("hot-cold-static-analysis", cl::init(true), cl::Hidden)
static int getOutliningPenalty(ArrayRef< BasicBlock * > Region, unsigned NumInputs, unsigned NumOutputs)
Get the penalty score for outlining Region.
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
FunctionAnalysisManager FAM
This header defines various interfaces for pass management in LLVM.
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
A container for analyses that lazily runs them and caches their results.
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
bool isEHPad() const
Return true if this basic block is an exception handling block.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
void setCallingConv(CallingConv::ID CC)
This class represents a function call, abstracting a target machine's calling convention.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
StringRef getSection() const
Get the custom section of this global if it has one.
bool hasSection() const
Check if this global has a custom object file section.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
A Module instance is used to store all the information related to an LLVM module.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
bool dominates(const Instruction *I1, const Instruction *I2) const
Return true if I1 dominates I2.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool isColdBlock(const BasicBlock *BB, BlockFrequencyInfo *BFI) const
Returns true if BasicBlock BB is considered cold.
bool isFunctionEntryCold(const Function *F) const
Returns true if F has cold function entry.
block_range blocks()
Returns a range view of the basic blocks in the region.
RegionT * getParent() const
Get the parent of the Region.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Analysis pass providing the TargetTransformInfo.
void dump() const
Support for debugging, callable in GDB: V->dump()
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
DiagnosticInfoOptimizationBase::Argument NV
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
This is an optimization pass for GlobalISel generic memory operations.
bool succ_empty(const Instruction *I)
auto successors(const MachineBasicBlock *BB)
df_iterator< T > df_begin(const T &G)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
idf_iterator< T > idf_end(const T &G)
auto remove_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::remove_if which take ranges instead of having to pass begin/end explicitly.
idf_iterator< T > idf_begin(const T &G)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
bool pred_empty(const BasicBlock *BB)
df_iterator< T > df_end(const T &G)