LLVM 23.0.0git
MachineFunctionSplitter.cpp
Go to the documentation of this file.
1//===-- MachineFunctionSplitter.cpp - Split machine functions //-----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// \file
10// Uses profile information to split out cold blocks.
11//
12// This pass splits out cold machine basic blocks from the parent function. This
13// implementation leverages the basic block section framework. Blocks marked
14// cold by this pass are grouped together in a separate section prefixed with
15// ".text.unlikely.*". The linker can then group these together as a cold
16// section. The split part of the function is a contiguous region identified by
17// the symbol "foo.cold". Grouping all cold blocks across functions together
18// decreases fragmentation and improves icache and itlb utilization. Note that
19// the overall changes to the binary size are negligible; only a small number of
20// additional jump instructions may be introduced.
21//
22// For the original RFC of this pass please see
23// https://groups.google.com/d/msg/llvm-dev/RUegaMg-iqc/wFAVxa6fCgAJ
24//===----------------------------------------------------------------------===//
25
37#include "llvm/CodeGen/Passes.h"
39#include "llvm/IR/Function.h"
42#include <optional>
43
44using namespace llvm;
45
46// FIXME: This cutoff value is CPU dependent and should be moved to
47// TargetTransformInfo once we consider enabling this on other platforms.
48// The value is expressed as a ProfileSummaryInfo integer percentile cutoff.
49// Defaults to 999950, i.e. all blocks colder than 99.995 percentile are split.
50// The default was empirically determined to be optimal when considering cutoff
51// values between 99%-ile to 100%-ile with respect to iTLB and icache metrics on
52// Intel CPUs.
54 PercentileCutoff("mfs-psi-cutoff",
55 cl::desc("Percentile profile summary cutoff used to "
56 "determine cold blocks. Unused if set to zero."),
57 cl::init(999950), cl::Hidden);
58
60 "mfs-count-threshold",
62 "Minimum number of times a block must be executed to be retained."),
64
66 "mfs-split-ehcode",
67 cl::desc("Splits all EH code and it's descendants by default."),
68 cl::init(false), cl::Hidden);
69
70namespace {
71
72class MachineFunctionSplitter : public MachineFunctionPass {
73public:
74 static char ID;
75 MachineFunctionSplitter() : MachineFunctionPass(ID) {}
76
77 StringRef getPassName() const override {
78 return "Machine Function Splitter Transformation";
79 }
80
81 void getAnalysisUsage(AnalysisUsage &AU) const override;
82
83 bool runOnMachineFunction(MachineFunction &F) override;
84};
85} // end anonymous namespace
86
87/// setDescendantEHBlocksCold - This splits all EH pads and blocks reachable
88/// only by EH pad as cold. This will help mark EH pads statically cold
89/// instead of relying on profile data.
92 computeEHOnlyBlocks(MF, EHBlocks);
93 for (auto Block : EHBlocks) {
95 }
96}
97
99 auto Comparator = [](const MachineBasicBlock &X, const MachineBasicBlock &Y) {
100 return X.getSectionID().Type < Y.getSectionID().Type;
101 };
104}
105
107 const MachineBlockFrequencyInfo *MBFI,
108 ProfileSummaryInfo *PSI) {
109 std::optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB);
110 // For instrumentation profiles and sample profiles, we use different ways
111 // to judge whether a block is cold and should be split.
113 // If using instrument profile, which is deemed "accurate", no count means
114 // cold.
115 if (!Count)
116 return true;
117 if (PercentileCutoff > 0)
119 // Fallthrough to end of function.
120 } else if (PSI->hasSampleProfile()) {
121 // For sample profile, no count means "do not judege coldness".
122 if (!Count)
123 return false;
124 }
125
126 return (*Count < ColdCountThreshold);
127}
128
129bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
130 if (skipFunction(MF.getFunction()))
131 return false;
132
133 // Do not split functions when -basic-block-sections=all is specified.
135 return false;
136 // We target functions with profile data. Static information in the form
137 // of exception handling code may be split to cold if user passes the
138 // mfs-split-ehcode flag.
139 bool UseProfileData = MF.getFunction().hasProfileData();
140 if (!UseProfileData && !SplitAllEHCode)
141 return false;
142
143 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
144 if (!TII.isFunctionSafeToSplit(MF))
145 return false;
146
147 // Do not split functions with BasicBlockSections profiles as they will
148 // be split by the BasicBlockSections pass.
149 auto BBSectionsProfile =
150 getAnalysisIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>();
151 if (BBSectionsProfile != nullptr &&
152 BBSectionsProfile->getBBSPR().isFunctionHot(MF.getName()))
153 return false;
154
155 // Renumbering blocks here preserves the order of the blocks as
156 // sortBasicBlocksAndUpdateBranches uses the numeric identifier to sort
157 // blocks. Preserving the order of blocks is essential to retaining decisions
158 // made by prior passes such as MachineBlockPlacement.
159 MF.RenumberBlocks();
160 MF.setBBSectionsType(BasicBlockSection::Preset);
161
162 MachineBlockFrequencyInfo *MBFI = nullptr;
163 ProfileSummaryInfo *PSI = nullptr;
164 if (UseProfileData) {
165 MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
166 PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
167 // If we don't have a good profile (sample profile is not deemed
168 // as a "good profile") and the function is not hot, then early
169 // return. (Because we can only trust hot functions when profile
170 // quality is not good.)
171 if (PSI->hasSampleProfile() && !PSI->isFunctionHotInCallGraph(&MF, *MBFI)) {
172 // Split all EH code and it's descendant statically by default.
173 if (SplitAllEHCode)
176 return true;
177 }
178 }
179
181 for (auto &MBB : MF) {
182 if (MBB.isEntryBlock())
183 continue;
184
185 if (MBB.isEHPad())
186 LandingPads.push_back(&MBB);
187 else if (UseProfileData && isColdBlock(MBB, MBFI, PSI) &&
188 TII.isMBBSafeToSplitToCold(MBB) && !SplitAllEHCode)
190 }
191
192 // Split all EH code and it's descendant statically by default.
193 if (SplitAllEHCode)
195 // We only split out eh pads if all of them are cold.
196 else {
197 // Here we have UseProfileData == true.
198 bool HasHotLandingPads = false;
199 for (const MachineBasicBlock *LP : LandingPads) {
200 if (!isColdBlock(*LP, MBFI, PSI) || !TII.isMBBSafeToSplitToCold(*LP))
201 HasHotLandingPads = true;
202 }
203 if (!HasHotLandingPads) {
204 for (MachineBasicBlock *LP : LandingPads)
205 LP->setSectionID(MBBSectionID::ColdSectionID);
206 }
207 }
208
210 return true;
211}
212
213void MachineFunctionSplitter::getAnalysisUsage(AnalysisUsage &AU) const {
214 AU.addRequired<MachineModuleInfoWrapperPass>();
215 AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
216 AU.addRequired<ProfileSummaryInfoWrapperPass>();
217 AU.addUsedIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>();
218}
219
220char MachineFunctionSplitter::ID = 0;
221INITIALIZE_PASS(MachineFunctionSplitter, "machine-function-splitter",
222 "Split machine functions using profile information", false,
223 false)
224
226 return new MachineFunctionSplitter();
227}
MachineBasicBlock & MBB
const HexagonInstrInfo * TII
#define F(x, y, z)
Definition MD5.cpp:54
static bool isColdBlock(const MachineBasicBlock &MBB, const MachineBlockFrequencyInfo *MBFI, ProfileSummaryInfo *PSI)
static void finishAdjustingBasicBlocksAndLandingPads(MachineFunction &MF)
static cl::opt< unsigned > ColdCountThreshold("mfs-count-threshold", cl::desc("Minimum number of times a block must be executed to be retained."), cl::init(1), cl::Hidden)
static void setDescendantEHBlocksCold(MachineFunction &MF)
setDescendantEHBlocksCold - This splits all EH pads and blocks reachable only by EH pad as cold.
static cl::opt< unsigned > PercentileCutoff("mfs-psi-cutoff", cl::desc("Percentile profile summary cutoff used to " "determine cold blocks. Unused if set to zero."), cl::init(999950), cl::Hidden)
static cl::opt< bool > SplitAllEHCode("mfs-split-ehcode", cl::desc("Splits all EH code and it's descendants by default."), cl::init(false), cl::Hidden)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
This file defines the SmallVector class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
AnalysisUsage & addUsedIfAvailable()
Add the specified Pass class to the set of analyses used by this pass.
AnalysisUsage & addRequired()
Implements a dense probed hash-table based set.
Definition DenseSet.h:279
bool hasProfileData(bool IncludeSynthetic=false) const
Return true if the function is annotated with profile data.
Definition Function.h:334
bool isEHPad() const
Returns true if the block is a landing pad.
LLVM_ABI bool isEntryBlock() const
Returns true if this is the entry block of the function.
void setSectionID(MBBSectionID V)
Sets the section ID for this basic block.
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
LLVM_ABI std::optional< uint64_t > getBlockProfileCount(const MachineBasicBlock *MBB) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void setBBSectionsType(BasicBlockSection V)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Function & getFunction()
Return the LLVM function that this machine code represents.
void RenumberBlocks(MachineBasicBlock *MBBFrom=nullptr)
RenumberBlocks - This discards all of the MachineBasicBlock numbers and recomputes them.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Analysis providing profile information.
bool hasCSInstrumentationProfile() const
Returns true if module M has context sensitive instrumentation profile.
bool hasInstrumentationProfile() const
Returns true if module M has instrumentation profile.
bool hasSampleProfile() const
Returns true if module M has sample profile.
LLVM_ABI bool isColdCountNthPercentile(int PercentileCutoff, uint64_t C) const
Returns true if count C is considered cold with regard to a given cold percentile cutoff value.
bool isFunctionHotInCallGraph(const FuncT *F, BFIT &BFI) const
Returns true if F contains hot code.
void push_back(const T &Elt)
llvm::BasicBlockSection getBBSectionsType() const
If basic blocks should be emitted into their own section, corresponding to -fbasic-block-sections.
virtual const TargetInstrInfo * getInstrInfo() const
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
LLVM_ABI MachineFunctionPass * createMachineFunctionSplitterPass()
createMachineFunctionSplitterPass - This pass splits machine functions using profile information.
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
static void computeEHOnlyBlocks(FunctionT &F, DenseSet< BlockT * > &EHBlocks)
Compute a list of blocks that are only reachable via EH paths.
Definition EHUtils.h:18
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
void avoidZeroOffsetLandingPad(MachineFunction &MF)
void sortBasicBlocksAndUpdateBranches(MachineFunction &MF, MachineBasicBlockComparator MBBCmp)
LLVM_ABI static const MBBSectionID ColdSectionID