61#define DEBUG_TYPE "hotcoldsplit"
63STATISTIC(NumColdRegionsFound,
"Number of cold regions found.");
64STATISTIC(NumColdRegionsOutlined,
"Number of cold regions outlined.");
73 cl::desc(
"Base penalty for splitting cold code (as a "
74 "multiple of TCC_Basic)"));
78 cl::desc(
"Enable placement of extracted cold functions"
79 " into a separate section after hot-cold splitting."));
84 cl::desc(
"Name for the section containing cold functions "
85 "extracted by hot-cold splitting."));
89 cl::desc(
"Maximum number of parameters for a split function"));
104 return !(isa<ReturnInst>(
I) || isa<IndirectBrInst>(
I));
115 if (
auto *CB = dyn_cast<CallBase>(&
I))
116 if (CB->hasFnAttr(Attribute::Cold) &&
117 !CB->getMetadata(LLVMContext::MD_nosanitize))
124 dyn_cast_or_null<CallInst>(BB.getTerminator()->getPrevNode()))
125 if (CI->hasFnAttr(Attribute::NoReturn))
134static bool mayExtractBlock(
const BasicBlock &BB) {
144 return !isa<InvokeInst>(Term) && !isa<ResumeInst>(Term);
151static bool markFunctionCold(
Function &
F,
bool UpdateEntryCount =
false) {
152 assert(!
F.hasOptNone() &&
"Can't mark this cold");
153 bool Changed =
false;
154 if (!
F.hasFnAttribute(Attribute::Cold)) {
155 F.addFnAttr(Attribute::Cold);
158 if (!
F.hasFnAttribute(Attribute::MinSize)) {
159 F.addFnAttr(Attribute::MinSize);
162 if (UpdateEntryCount) {
172class HotColdSplittingLegacyPass :
public ModulePass {
192bool HotColdSplitting::isFunctionCold(
const Function &
F)
const {
193 if (
F.hasFnAttribute(Attribute::Cold))
207bool HotColdSplitting::shouldOutlineFrom(
const Function &
F)
const {
208 if (
F.hasFnAttribute(Attribute::AlwaysInline))
211 if (
F.hasFnAttribute(Attribute::NoInline))
216 if (
F.hasFnAttribute(Attribute::NoReturn))
219 if (
F.hasFnAttribute(Attribute::SanitizeAddress) ||
220 F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
221 F.hasFnAttribute(Attribute::SanitizeThread) ||
222 F.hasFnAttribute(Attribute::SanitizeMemory))
245 unsigned NumInputs,
unsigned NumOutputs) {
247 LLVM_DEBUG(
dbgs() <<
"Applying penalty for splitting: " << Penalty <<
"\n");
256 bool NoBlocksReturn =
true;
268 NoBlocksReturn =
false;
269 SuccsOutsideRegion.
insert(SuccBB);
279 unsigned NumSplitExitPhis = 0;
280 for (
BasicBlock *ExitBB : SuccsOutsideRegion) {
281 for (
PHINode &PN : ExitBB->phis()) {
283 int NumIncomingVals = 0;
284 for (
unsigned i = 0; i < PN.getNumIncomingValues(); ++i)
287 if (NumIncomingVals > 1) {
297 int NumOutputsAndSplitPhis = NumOutputs + NumSplitExitPhis;
298 int NumParams = NumInputs + NumOutputsAndSplitPhis;
300 LLVM_DEBUG(
dbgs() << NumInputs <<
" inputs and " << NumOutputsAndSplitPhis
301 <<
" outputs exceeds parameter limit ("
303 return std::numeric_limits<int>::max();
306 LLVM_DEBUG(
dbgs() <<
"Applying penalty for: " << NumParams <<
" params\n");
307 Penalty += CostForArgMaterialization * NumParams;
311 LLVM_DEBUG(
dbgs() <<
"Applying penalty for: " << NumOutputsAndSplitPhis
312 <<
" outputs/split phis\n");
314 Penalty += CostForRegionOutput * NumOutputsAndSplitPhis;
317 if (NoBlocksReturn) {
319 <<
" non-returning terminators\n");
325 if (SuccsOutsideRegion.
size() > 1) {
327 <<
" non-region successors\n");
334Function *HotColdSplitting::extractColdRegion(
344 "cold." + std::to_string(Count));
349 CE.findInputsOutputs(Inputs, Outputs, Sinks);
351 int OutliningPenalty =
353 LLVM_DEBUG(
dbgs() <<
"Split profitability: benefit = " << OutliningBenefit
354 <<
", penalty = " << OutliningPenalty <<
"\n");
355 if (!OutliningBenefit.
isValid() || OutliningBenefit <= OutliningPenalty)
359 if (
Function *OutF =
CE.extractCodeRegion(CEAC)) {
362 NumColdRegionsOutlined++;
376 markFunctionCold(*OutF, BFI !=
nullptr);
382 <<
ore::NV(
"Original", OrigF) <<
" split cold code into "
391 <<
"Failed to extract region at block "
398using BlockTy = std::pair<BasicBlock *, unsigned>;
405class OutliningRegion {
417 bool EntireFunctionCold =
false;
420 static unsigned getEntryPointScore(
BasicBlock &BB,
unsigned Score) {
421 return mayExtractBlock(BB) ? Score : 0;
426 static constexpr unsigned ScoreForSuccBlock = 1;
427 static constexpr unsigned ScoreForSinkBlock = 1;
429 OutliningRegion(
const OutliningRegion &) =
delete;
430 OutliningRegion &operator=(
const OutliningRegion &) =
delete;
433 OutliningRegion() =
default;
434 OutliningRegion(OutliningRegion &&) =
default;
435 OutliningRegion &operator=(OutliningRegion &&) =
default;
437 static std::vector<OutliningRegion> create(
BasicBlock &SinkBB,
440 std::vector<OutliningRegion> Regions;
443 Regions.emplace_back();
444 OutliningRegion *ColdRegion = &Regions.back();
446 auto addBlockToRegion = [&](
BasicBlock *BB,
unsigned Score) {
448 ColdRegion->Blocks.emplace_back(BB, Score);
452 unsigned SinkScore = getEntryPointScore(SinkBB, ScoreForSinkBlock);
453 ColdRegion->SuggestedEntryPoint = (SinkScore > 0) ? &SinkBB :
nullptr;
454 unsigned BestScore = SinkScore;
458 auto PredEnd =
idf_end(&SinkBB);
459 while (PredIt != PredEnd) {
461 bool SinkPostDom = PDT.
dominates(&SinkBB, &PredBB);
466 ColdRegion->EntireFunctionCold =
true;
472 if (!SinkPostDom || !mayExtractBlock(PredBB)) {
473 PredIt.skipChildren();
480 unsigned PredScore = getEntryPointScore(PredBB, PredIt.getPathLength());
481 if (PredScore > BestScore) {
482 ColdRegion->SuggestedEntryPoint = &PredBB;
483 BestScore = PredScore;
486 addBlockToRegion(&PredBB, PredScore);
496 if (mayExtractBlock(SinkBB)) {
497 addBlockToRegion(&SinkBB, SinkScore);
499 ColdRegion->EntireFunctionCold =
true;
503 Regions.emplace_back();
504 ColdRegion = &Regions.back();
510 auto SuccEnd =
df_end(&SinkBB);
511 while (SuccIt != SuccEnd) {
513 bool SinkDom = DT.
dominates(&SinkBB, &SuccBB);
516 bool DuplicateBlock = RegionBlocks.
count(&SuccBB);
520 if (DuplicateBlock || !SinkDom || !mayExtractBlock(SuccBB)) {
521 SuccIt.skipChildren();
525 unsigned SuccScore = getEntryPointScore(SuccBB, ScoreForSuccBlock);
526 if (SuccScore > BestScore) {
527 ColdRegion->SuggestedEntryPoint = &SuccBB;
528 BestScore = SuccScore;
531 addBlockToRegion(&SuccBB, SuccScore);
539 bool empty()
const {
return !SuggestedEntryPoint; }
545 bool isEntireFunctionCold()
const {
return EntireFunctionCold; }
549 assert(!empty() && !isEntireFunctionCold() &&
"Nothing to extract");
556 unsigned NextScore = 0;
557 auto RegionEndIt = Blocks.
end();
560 unsigned Score = Block.second;
562 BB == SuggestedEntryPoint || DT.
dominates(SuggestedEntryPoint, BB);
563 if (!InSubRegion && Score > NextScore) {
567 if (InSubRegion && BB != SuggestedEntryPoint)
571 Blocks.
erase(RegionStartIt, RegionEndIt);
574 SuggestedEntryPoint = NextEntryPoint;
581bool HotColdSplitting::outlineColdRegions(
Function &
F,
bool HasProfileSummary) {
582 bool Changed =
false;
596 std::unique_ptr<DominatorTree> DT;
597 std::unique_ptr<PostDominatorTree> PDT;
603 if (HasProfileSummary)
613 if (ColdBlocks.
count(BB))
622 dbgs() <<
"Found a cold block:\n";
627 DT = std::make_unique<DominatorTree>(
F);
629 PDT = std::make_unique<PostDominatorTree>(
F);
631 auto Regions = OutliningRegion::create(*BB, *DT, *PDT);
632 for (OutliningRegion &
Region : Regions) {
636 if (
Region.isEntireFunctionCold()) {
638 return markFunctionCold(
F);
647 return !ColdBlocks.insert(Block.first).second;
653 ++NumColdRegionsFound;
657 if (OutliningWorklist.
empty())
661 unsigned OutlinedFunctionID = 1;
666 assert(!
Region.empty() &&
"Empty outlining region in worklist");
670 dbgs() <<
"Hot/cold splitting attempting to outline these blocks:\n";
675 Function *Outlined = extractColdRegion(SubRegion, CEAC, *DT, BFI,
TTI,
676 ORE, AC, OutlinedFunctionID);
678 ++OutlinedFunctionID;
681 }
while (!
Region.empty());
682 }
while (!OutliningWorklist.
empty());
688 bool Changed =
false;
689 bool HasProfileSummary = (M.getProfileSummary(
false) !=
nullptr);
692 if (
F.isDeclaration())
700 if (isFunctionCold(
F)) {
701 Changed |= markFunctionCold(
F);
705 if (!shouldOutlineFrom(
F)) {
711 Changed |= outlineColdRegions(
F, HasProfileSummary);
716bool HotColdSplittingLegacyPass::runOnModule(
Module &M) {
720 &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
722 return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
725 return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(
F).getBFI();
727 std::unique_ptr<OptimizationRemarkEmitter> ORE;
734 if (
auto *ACT = getAnalysisIfAvailable<AssumptionCacheTracker>())
735 return ACT->lookupAssumptionCache(
F);
759 std::unique_ptr<OptimizationRemarkEmitter> ORE;
773char HotColdSplittingLegacyPass::ID = 0;
775 "Hot Cold Splitting",
false,
false)
782 return new HotColdSplittingLegacyPass();
static bool blockEndsInUnreachable(const MachineBasicBlock *MBB)
A no successor, non-return block probably ends in unreachable and is cold.
std::pair< BasicBlock *, unsigned > BlockTy
A pair of (basic block, score).
static cl::opt< int > SplittingThreshold("hotcoldsplit-threshold", cl::init(2), cl::Hidden, cl::desc("Base penalty for splitting cold code (as a " "multiple of TCC_Basic)"))
static cl::opt< std::string > ColdSectionName("hotcoldsplit-cold-section-name", cl::init("__llvm_cold"), cl::Hidden, cl::desc("Name for the section containing cold functions " "extracted by hot-cold splitting."))
static cl::opt< int > MaxParametersForSplit("hotcoldsplit-max-params", cl::init(4), cl::Hidden, cl::desc("Maximum number of parameters for a split function"))
static InstructionCost getOutliningBenefit(ArrayRef< BasicBlock * > Region, TargetTransformInfo &TTI)
Get the benefit score of outlining Region.
static cl::opt< bool > EnableColdSection("enable-cold-section", cl::init(false), cl::Hidden, cl::desc("Enable placement of extracted cold functions" " into a separate section after hot-cold splitting."))
static cl::opt< bool > EnableStaticAnalysis("hot-cold-static-analysis", cl::init(true), cl::Hidden)
static int getOutliningPenalty(ArrayRef< BasicBlock * > Region, unsigned NumInputs, unsigned NumOutputs)
Get the penalty score for outlining Region.
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
FunctionAnalysisManager FAM
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
A container for analyses that lazily runs them and caches their results.
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addUsedIfAvailable()
Add the specified Pass class to the set of analyses used by this pass.
AnalysisUsage & addRequired()
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
bool isEHPad() const
Return true if this basic block is an exception handling block.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Analysis pass which computes BlockFrequencyInfo.
Legacy analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
void setCallingConv(CallingConv::ID CC)
This class represents a function call, abstracting a target machine's calling convention.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
StringRef getSection() const
Get the custom section of this global if it has one.
bool hasSection() const
Check if this global has a custom object file section.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
virtual bool runOnModule(Module &M)=0
runOnModule - Virtual method overriden by subclasses to process the module being operated on.
A Module instance is used to store all the information related to an LLVM module.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
bool dominates(const Instruction *I1, const Instruction *I2) const
Return true if I1 dominates I2.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool isColdBlock(const BasicBlock *BB, BlockFrequencyInfo *BFI) const
Returns true if BasicBlock BB is considered cold.
bool isFunctionEntryCold(const Function *F) const
Returns true if F has cold function entry.
block_range blocks()
Returns a range view of the basic blocks in the region.
RegionT * getParent() const
Get the parent of the Region.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
iterator erase(const_iterator CI)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Analysis pass providing the TargetTransformInfo.
user_iterator user_begin()
void dump() const
Support for debugging, callable in GDB: V->dump()
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
DiagnosticInfoOptimizationBase::Argument NV
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
This is an optimization pass for GlobalISel generic memory operations.
bool succ_empty(const Instruction *I)
auto successors(const MachineBasicBlock *BB)
df_iterator< T > df_begin(const T &G)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
void initializeHotColdSplittingLegacyPassPass(PassRegistry &)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
ModulePass * createHotColdSplittingPass()
createHotColdSplittingPass - This pass outlines cold blocks into a separate function(s).
idf_iterator< T > idf_end(const T &G)
auto remove_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::remove_if which take ranges instead of having to pass begin/end explicitly.
idf_iterator< T > idf_begin(const T &G)
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
bool pred_empty(const BasicBlock *BB)
df_iterator< T > df_end(const T &G)