LLVM 20.0.0git
MachineFunctionSplitter.cpp
Go to the documentation of this file.
1//===-- MachineFunctionSplitter.cpp - Split machine functions //-----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// \file
10// Uses profile information to split out cold blocks.
11//
12// This pass splits out cold machine basic blocks from the parent function. This
13// implementation leverages the basic block section framework. Blocks marked
14// cold by this pass are grouped together in a separate section prefixed with
15// ".text.unlikely.*". The linker can then group these together as a cold
16// section. The split part of the function is a contiguous region identified by
17// the symbol "foo.cold". Grouping all cold blocks across functions together
18// decreases fragmentation and improves icache and itlb utilization. Note that
19// the overall changes to the binary size are negligible; only a small number of
20// additional jump instructions may be introduced.
21//
22// For the original RFC of this pass please see
23// https://groups.google.com/d/msg/llvm-dev/RUegaMg-iqc/wFAVxa6fCgAJ
24//===----------------------------------------------------------------------===//
25
37#include "llvm/CodeGen/Passes.h"
39#include "llvm/IR/Function.h"
42#include <optional>
43
44using namespace llvm;
45
46// FIXME: This cutoff value is CPU dependent and should be moved to
47// TargetTransformInfo once we consider enabling this on other platforms.
48// The value is expressed as a ProfileSummaryInfo integer percentile cutoff.
49// Defaults to 999950, i.e. all blocks colder than 99.995 percentile are split.
50// The default was empirically determined to be optimal when considering cutoff
51// values between 99%-ile to 100%-ile with respect to iTLB and icache metrics on
52// Intel CPUs.
54 PercentileCutoff("mfs-psi-cutoff",
55 cl::desc("Percentile profile summary cutoff used to "
56 "determine cold blocks. Unused if set to zero."),
57 cl::init(999950), cl::Hidden);
58
60 "mfs-count-threshold",
62 "Minimum number of times a block must be executed to be retained."),
64
66 "mfs-split-ehcode",
67 cl::desc("Splits all EH code and it's descendants by default."),
68 cl::init(false), cl::Hidden);
69
70namespace {
71
72class MachineFunctionSplitter : public MachineFunctionPass {
73public:
74 static char ID;
75 MachineFunctionSplitter() : MachineFunctionPass(ID) {
77 }
78
79 StringRef getPassName() const override {
80 return "Machine Function Splitter Transformation";
81 }
82
83 void getAnalysisUsage(AnalysisUsage &AU) const override;
84
86};
87} // end anonymous namespace
88
89/// setDescendantEHBlocksCold - This splits all EH pads and blocks reachable
90/// only by EH pad as cold. This will help mark EH pads statically cold
91/// instead of relying on profile data.
94 computeEHOnlyBlocks(MF, EHBlocks);
95 for (auto Block : EHBlocks) {
97 }
98}
99
101 auto Comparator = [](const MachineBasicBlock &X, const MachineBasicBlock &Y) {
102 return X.getSectionID().Type < Y.getSectionID().Type;
103 };
106}
107
109 const MachineBlockFrequencyInfo *MBFI,
110 ProfileSummaryInfo *PSI) {
111 std::optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB);
112 // For instrumentation profiles and sample profiles, we use different ways
113 // to judge whether a block is cold and should be split.
115 // If using instrument profile, which is deemed "accurate", no count means
116 // cold.
117 if (!Count)
118 return true;
119 if (PercentileCutoff > 0)
120 return PSI->isColdCountNthPercentile(PercentileCutoff, *Count);
121 // Fallthrough to end of function.
122 } else if (PSI->hasSampleProfile()) {
123 // For sample profile, no count means "do not judege coldness".
124 if (!Count)
125 return false;
126 }
127
128 return (*Count < ColdCountThreshold);
129}
130
131bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
132 // Do not split functions when -basic-block-sections=all is specified.
134 return false;
135 // We target functions with profile data. Static information in the form
136 // of exception handling code may be split to cold if user passes the
137 // mfs-split-ehcode flag.
138 bool UseProfileData = MF.getFunction().hasProfileData();
139 if (!UseProfileData && !SplitAllEHCode)
140 return false;
141
143 if (!TII.isFunctionSafeToSplit(MF))
144 return false;
145
146 // Do not split functions with BasicBlockSections profiles as they will
147 // be split by the BasicBlockSections pass.
148 auto BBSectionsProfile =
149 getAnalysisIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>();
150 if (BBSectionsProfile != nullptr &&
151 BBSectionsProfile->getBBSPR().isFunctionHot(MF.getName()))
152 return false;
153
154 // Renumbering blocks here preserves the order of the blocks as
155 // sortBasicBlocksAndUpdateBranches uses the numeric identifier to sort
156 // blocks. Preserving the order of blocks is essential to retaining decisions
157 // made by prior passes such as MachineBlockPlacement.
158 MF.RenumberBlocks();
159 MF.setBBSectionsType(BasicBlockSection::Preset);
160
161 MachineBlockFrequencyInfo *MBFI = nullptr;
162 ProfileSummaryInfo *PSI = nullptr;
163 if (UseProfileData) {
164 MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
165 PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
166 // If we don't have a good profile (sample profile is not deemed
167 // as a "good profile") and the function is not hot, then early
168 // return. (Because we can only trust hot functions when profile
169 // quality is not good.)
170 if (PSI->hasSampleProfile() && !PSI->isFunctionHotInCallGraph(&MF, *MBFI)) {
171 // Split all EH code and it's descendant statically by default.
172 if (SplitAllEHCode)
175 return true;
176 }
177 }
178
180 for (auto &MBB : MF) {
181 if (MBB.isEntryBlock())
182 continue;
183
184 if (MBB.isEHPad())
185 LandingPads.push_back(&MBB);
186 else if (UseProfileData && isColdBlock(MBB, MBFI, PSI) &&
187 TII.isMBBSafeToSplitToCold(MBB) && !SplitAllEHCode)
189 }
190
191 // Split all EH code and it's descendant statically by default.
192 if (SplitAllEHCode)
194 // We only split out eh pads if all of them are cold.
195 else {
196 // Here we have UseProfileData == true.
197 bool HasHotLandingPads = false;
198 for (const MachineBasicBlock *LP : LandingPads) {
199 if (!isColdBlock(*LP, MBFI, PSI) || !TII.isMBBSafeToSplitToCold(*LP))
200 HasHotLandingPads = true;
201 }
202 if (!HasHotLandingPads) {
203 for (MachineBasicBlock *LP : LandingPads)
204 LP->setSectionID(MBBSectionID::ColdSectionID);
205 }
206 }
207
209 return true;
210}
211
212void MachineFunctionSplitter::getAnalysisUsage(AnalysisUsage &AU) const {
217}
218
219char MachineFunctionSplitter::ID = 0;
220INITIALIZE_PASS(MachineFunctionSplitter, "machine-function-splitter",
221 "Split machine functions using profile information", false,
222 false)
223
225 return new MachineFunctionSplitter();
226}
MachineBasicBlock & MBB
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const HexagonInstrInfo * TII
#define F(x, y, z)
Definition: MD5.cpp:55
static bool isColdBlock(const MachineBasicBlock &MBB, const MachineBlockFrequencyInfo *MBFI, ProfileSummaryInfo *PSI)
static void finishAdjustingBasicBlocksAndLandingPads(MachineFunction &MF)
static cl::opt< unsigned > ColdCountThreshold("mfs-count-threshold", cl::desc("Minimum number of times a block must be executed to be retained."), cl::init(1), cl::Hidden)
static void setDescendantEHBlocksCold(MachineFunction &MF)
setDescendantEHBlocksCold - This splits all EH pads and blocks reachable only by EH pad as cold.
static cl::opt< unsigned > PercentileCutoff("mfs-psi-cutoff", cl::desc("Percentile profile summary cutoff used to " "determine cold blocks. Unused if set to zero."), cl::init(999950), cl::Hidden)
static cl::opt< bool > SplitAllEHCode("mfs-split-ehcode", cl::desc("Splits all EH code and it's descendants by default."), cl::init(false), cl::Hidden)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
This file defines the SmallVector class.
Represent the analysis usage information of a pass.
AnalysisUsage & addUsedIfAvailable()
Add the specified Pass class to the set of analyses used by this pass.
AnalysisUsage & addRequired()
Implements a dense probed hash-table based set.
Definition: DenseSet.h:278
bool hasProfileData(bool IncludeSynthetic=false) const
Return true if the function is annotated with profile data.
Definition: Function.h:329
bool isEHPad() const
Returns true if the block is a landing pad.
bool isEntryBlock() const
Returns true if this is the entry block of the function.
void setSectionID(MBBSectionID V)
Sets the section ID for this basic block.
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
std::optional< uint64_t > getBlockProfileCount(const MachineBasicBlock *MBB) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void setBBSectionsType(BasicBlockSection V)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Function & getFunction()
Return the LLVM function that this machine code represents.
void RenumberBlocks(MachineBasicBlock *MBBFrom=nullptr)
RenumberBlocks - This discards all of the MachineBasicBlock numbers and recomputes them.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool hasCSInstrumentationProfile() const
Returns true if module M has context sensitive instrumentation profile.
bool hasInstrumentationProfile() const
Returns true if module M has instrumentation profile.
bool hasSampleProfile() const
Returns true if module M has sample profile.
bool isColdCountNthPercentile(int PercentileCutoff, uint64_t C) const
Returns true if count C is considered cold with regard to a given cold percentile cutoff value.
bool isFunctionHotInCallGraph(const FuncT *F, BFIT &BFI) const
Returns true if F contains hot code.
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
TargetInstrInfo - Interface to description of machine instruction set.
llvm::BasicBlockSection getBBSectionsType() const
If basic blocks should be emitted into their own section, corresponding to -fbasic-block-sections.
virtual const TargetInstrInfo * getInstrInfo() const
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void initializeMachineFunctionSplitterPass(PassRegistry &)
static void computeEHOnlyBlocks(FunctionT &F, DenseSet< BlockT * > &EHBlocks)
Compute a list of blocks that are only reachable via EH paths.
Definition: EHUtils.h:18
MachineFunctionPass * createMachineFunctionSplitterPass()
createMachineFunctionSplitterPass - This pass splits machine functions using profile information.
void avoidZeroOffsetLandingPad(MachineFunction &MF)
void sortBasicBlocksAndUpdateBranches(MachineFunction &MF, MachineBasicBlockComparator MBBCmp)
static const MBBSectionID ColdSectionID