LLVM 20.0.0git
MachineFunctionSplitter.cpp
Go to the documentation of this file.
1//===-- MachineFunctionSplitter.cpp - Split machine functions //-----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// \file
10// Uses profile information to split out cold blocks.
11//
12// This pass splits out cold machine basic blocks from the parent function. This
13// implementation leverages the basic block section framework. Blocks marked
14// cold by this pass are grouped together in a separate section prefixed with
15// ".text.unlikely.*". The linker can then group these together as a cold
16// section. The split part of the function is a contiguous region identified by
17// the symbol "foo.cold". Grouping all cold blocks across functions together
18// decreases fragmentation and improves icache and itlb utilization. Note that
19// the overall changes to the binary size are negligible; only a small number of
20// additional jump instructions may be introduced.
21//
22// For the original RFC of this pass please see
23// https://groups.google.com/d/msg/llvm-dev/RUegaMg-iqc/wFAVxa6fCgAJ
24//===----------------------------------------------------------------------===//
25
37#include "llvm/CodeGen/Passes.h"
39#include "llvm/IR/Function.h"
42#include <optional>
43
44using namespace llvm;
45
46// FIXME: This cutoff value is CPU dependent and should be moved to
47// TargetTransformInfo once we consider enabling this on other platforms.
48// The value is expressed as a ProfileSummaryInfo integer percentile cutoff.
49// Defaults to 999950, i.e. all blocks colder than 99.995 percentile are split.
50// The default was empirically determined to be optimal when considering cutoff
51// values between 99%-ile to 100%-ile with respect to iTLB and icache metrics on
52// Intel CPUs.
54 PercentileCutoff("mfs-psi-cutoff",
55 cl::desc("Percentile profile summary cutoff used to "
56 "determine cold blocks. Unused if set to zero."),
57 cl::init(999950), cl::Hidden);
58
60 "mfs-count-threshold",
62 "Minimum number of times a block must be executed to be retained."),
64
66 "mfs-split-ehcode",
67 cl::desc("Splits all EH code and it's descendants by default."),
68 cl::init(false), cl::Hidden);
69
70namespace {
71
72class MachineFunctionSplitter : public MachineFunctionPass {
73public:
74 static char ID;
75 MachineFunctionSplitter() : MachineFunctionPass(ID) {
77 }
78
79 StringRef getPassName() const override {
80 return "Machine Function Splitter Transformation";
81 }
82
83 void getAnalysisUsage(AnalysisUsage &AU) const override;
84
86};
87} // end anonymous namespace
88
89/// setDescendantEHBlocksCold - This splits all EH pads and blocks reachable
90/// only by EH pad as cold. This will help mark EH pads statically cold
91/// instead of relying on profile data.
94 computeEHOnlyBlocks(MF, EHBlocks);
95 for (auto Block : EHBlocks) {
97 }
98}
99
101 auto Comparator = [](const MachineBasicBlock &X, const MachineBasicBlock &Y) {
102 return X.getSectionID().Type < Y.getSectionID().Type;
103 };
106}
107
109 const MachineBlockFrequencyInfo *MBFI,
110 ProfileSummaryInfo *PSI) {
111 std::optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB);
112 // For instrumentation profiles and sample profiles, we use different ways
113 // to judge whether a block is cold and should be split.
115 // If using instrument profile, which is deemed "accurate", no count means
116 // cold.
117 if (!Count)
118 return true;
119 if (PercentileCutoff > 0)
120 return PSI->isColdCountNthPercentile(PercentileCutoff, *Count);
121 // Fallthrough to end of function.
122 } else if (PSI->hasSampleProfile()) {
123 // For sample profile, no count means "do not judege coldness".
124 if (!Count)
125 return false;
126 }
127
128 return (*Count < ColdCountThreshold);
129}
130
131bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
132 // We target functions with profile data. Static information in the form
133 // of exception handling code may be split to cold if user passes the
134 // mfs-split-ehcode flag.
135 bool UseProfileData = MF.getFunction().hasProfileData();
136 if (!UseProfileData && !SplitAllEHCode)
137 return false;
138
140 if (!TII.isFunctionSafeToSplit(MF))
141 return false;
142
143 // Renumbering blocks here preserves the order of the blocks as
144 // sortBasicBlocksAndUpdateBranches uses the numeric identifier to sort
145 // blocks. Preserving the order of blocks is essential to retaining decisions
146 // made by prior passes such as MachineBlockPlacement.
147 MF.RenumberBlocks();
148 MF.setBBSectionsType(BasicBlockSection::Preset);
149
150 MachineBlockFrequencyInfo *MBFI = nullptr;
151 ProfileSummaryInfo *PSI = nullptr;
152 if (UseProfileData) {
153 MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
154 PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
155 // If we don't have a good profile (sample profile is not deemed
156 // as a "good profile") and the function is not hot, then early
157 // return. (Because we can only trust hot functions when profile
158 // quality is not good.)
159 if (PSI->hasSampleProfile() && !PSI->isFunctionHotInCallGraph(&MF, *MBFI)) {
160 // Split all EH code and it's descendant statically by default.
161 if (SplitAllEHCode)
164 return true;
165 }
166 }
167
169 for (auto &MBB : MF) {
170 if (MBB.isEntryBlock())
171 continue;
172
173 if (MBB.isEHPad())
174 LandingPads.push_back(&MBB);
175 else if (UseProfileData && isColdBlock(MBB, MBFI, PSI) &&
176 TII.isMBBSafeToSplitToCold(MBB) && !SplitAllEHCode)
178 }
179
180 // Split all EH code and it's descendant statically by default.
181 if (SplitAllEHCode)
183 // We only split out eh pads if all of them are cold.
184 else {
185 // Here we have UseProfileData == true.
186 bool HasHotLandingPads = false;
187 for (const MachineBasicBlock *LP : LandingPads) {
188 if (!isColdBlock(*LP, MBFI, PSI) || !TII.isMBBSafeToSplitToCold(*LP))
189 HasHotLandingPads = true;
190 }
191 if (!HasHotLandingPads) {
192 for (MachineBasicBlock *LP : LandingPads)
193 LP->setSectionID(MBBSectionID::ColdSectionID);
194 }
195 }
196
198 return true;
199}
200
201void MachineFunctionSplitter::getAnalysisUsage(AnalysisUsage &AU) const {
205}
206
207char MachineFunctionSplitter::ID = 0;
208INITIALIZE_PASS(MachineFunctionSplitter, "machine-function-splitter",
209 "Split machine functions using profile information", false,
210 false)
211
213 return new MachineFunctionSplitter();
214}
MachineBasicBlock & MBB
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const HexagonInstrInfo * TII
#define F(x, y, z)
Definition: MD5.cpp:55
static bool isColdBlock(const MachineBasicBlock &MBB, const MachineBlockFrequencyInfo *MBFI, ProfileSummaryInfo *PSI)
static void finishAdjustingBasicBlocksAndLandingPads(MachineFunction &MF)
static cl::opt< unsigned > ColdCountThreshold("mfs-count-threshold", cl::desc("Minimum number of times a block must be executed to be retained."), cl::init(1), cl::Hidden)
static void setDescendantEHBlocksCold(MachineFunction &MF)
setDescendantEHBlocksCold - This splits all EH pads and blocks reachable only by EH pad as cold.
static cl::opt< unsigned > PercentileCutoff("mfs-psi-cutoff", cl::desc("Percentile profile summary cutoff used to " "determine cold blocks. Unused if set to zero."), cl::init(999950), cl::Hidden)
static cl::opt< bool > SplitAllEHCode("mfs-split-ehcode", cl::desc("Splits all EH code and it's descendants by default."), cl::init(false), cl::Hidden)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
This file defines the SmallVector class.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
bool hasProfileData(bool IncludeSynthetic=false) const
Return true if the function is annotated with profile data.
Definition: Function.h:333
bool isEHPad() const
Returns true if the block is a landing pad.
bool isEntryBlock() const
Returns true if this is the entry block of the function.
void setSectionID(MBBSectionID V)
Sets the section ID for this basic block.
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
std::optional< uint64_t > getBlockProfileCount(const MachineBasicBlock *MBB) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void setBBSectionsType(BasicBlockSection V)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
void RenumberBlocks(MachineBasicBlock *MBBFrom=nullptr)
RenumberBlocks - This discards all of the MachineBasicBlock numbers and recomputes them.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool hasCSInstrumentationProfile() const
Returns true if module M has context sensitive instrumentation profile.
bool hasInstrumentationProfile() const
Returns true if module M has instrumentation profile.
bool hasSampleProfile() const
Returns true if module M has sample profile.
bool isColdCountNthPercentile(int PercentileCutoff, uint64_t C) const
Returns true if count C is considered cold with regard to a given cold percentile cutoff value.
bool isFunctionHotInCallGraph(const FuncT *F, BFIT &BFI) const
Returns true if F contains hot code.
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
TargetInstrInfo - Interface to description of machine instruction set.
virtual const TargetInstrInfo * getInstrInfo() const
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void initializeMachineFunctionSplitterPass(PassRegistry &)
static void computeEHOnlyBlocks(FunctionT &F, DenseSet< BlockT * > &EHBlocks)
Compute a list of blocks that are only reachable via EH paths.
Definition: EHUtils.h:18
MachineFunctionPass * createMachineFunctionSplitterPass()
createMachineFunctionSplitterPass - This pass splits machine functions using profile information.
void avoidZeroOffsetLandingPad(MachineFunction &MF)
void sortBasicBlocksAndUpdateBranches(MachineFunction &MF, MachineBasicBlockComparator MBBCmp)
static const MBBSectionID ColdSectionID