LLVM  12.0.0git
MachineFunctionSplitter.cpp
Go to the documentation of this file.
1 //===-- MachineFunctionSplitter.cpp - Split machine functions //-----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // \file
10 // Uses profile information to split out cold blocks.
11 //
12 // This pass splits out cold machine basic blocks from the parent function. This
13 // implementation leverages the basic block section framework. Blocks marked
14 // cold by this pass are grouped together in a separate section prefixed with
15 // ".text.unlikely.*". The linker can then group these together as a cold
16 // section. The split part of the function is a contiguous region identified by
17 // the symbol "foo.cold". Grouping all cold blocks across functions together
18 // decreases fragmentation and improves icache and itlb utilization. Note that
19 // the overall changes to the binary size are negligible; only a small number of
20 // additional jump instructions may be introduced.
21 //
22 // For the original RFC of this pass please see
23 // https://groups.google.com/d/msg/llvm-dev/RUegaMg-iqc/wFAVxa6fCgAJ
24 //===----------------------------------------------------------------------===//
25 
26 #include "llvm/ADT/Statistic.h"
34 #include "llvm/CodeGen/Passes.h"
35 #include "llvm/IR/Function.h"
36 #include "llvm/IR/Module.h"
37 #include "llvm/InitializePasses.h"
39 
40 using namespace llvm;
41 
42 // FIXME: This cutoff value is CPU dependent and should be moved to
43 // TargetTransformInfo once we consider enabling this on other platforms.
44 // The value is expressed as a ProfileSummaryInfo integer percentile cutoff.
45 // Defaults to 999950, i.e. all blocks colder than 99.995 percentile are split.
46 // The default was empirically determined to be optimal when considering cutoff
47 // values between 99%-ile to 100%-ile with respect to iTLB and icache metrics on
48 // Intel CPUs.
49 static cl::opt<unsigned>
50  PercentileCutoff("mfs-psi-cutoff",
51  cl::desc("Percentile profile summary cutoff used to "
52  "determine cold blocks. Unused if set to zero."),
53  cl::init(999950), cl::Hidden);
54 
56  "mfs-count-threshold",
57  cl::desc(
58  "Minimum number of times a block must be executed to be retained."),
59  cl::init(1), cl::Hidden);
60 
61 namespace {
62 
63 class MachineFunctionSplitter : public MachineFunctionPass {
64 public:
65  static char ID;
66  MachineFunctionSplitter() : MachineFunctionPass(ID) {
68  }
69 
70  StringRef getPassName() const override {
71  return "Machine Function Splitter Transformation";
72  }
73 
74  void getAnalysisUsage(AnalysisUsage &AU) const override;
75 
76  bool runOnMachineFunction(MachineFunction &F) override;
77 };
78 } // end anonymous namespace
79 
81  const MachineBlockFrequencyInfo *MBFI,
82  ProfileSummaryInfo *PSI) {
84  if (!Count.hasValue())
85  return true;
86 
87  if (PercentileCutoff > 0) {
88  return PSI->isColdCountNthPercentile(PercentileCutoff, *Count);
89  }
90  return (*Count < ColdCountThreshold);
91 }
92 
93 bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
94  // TODO: We only target functions with profile data. Static information may
95  // also be considered but we don't see performance improvements yet.
96  if (!MF.getFunction().hasProfileData())
97  return false;
98 
99  // TODO: We don't split functions where a section attribute has been set
100  // since the split part may not be placed in a contiguous region. It may also
101  // be more beneficial to augment the linker to ensure contiguous layout of
102  // split functions within the same section as specified by the attribute.
103  if (!MF.getFunction().getSection().empty())
104  return false;
105 
106  // We don't want to proceed further for cold functions
107  // or functions of unknown hotness. Lukewarm functions have no prefix.
108  Optional<StringRef> SectionPrefix = MF.getFunction().getSectionPrefix();
109  if (SectionPrefix.hasValue() &&
110  (SectionPrefix.getValue().equals("unlikely") ||
111  SectionPrefix.getValue().equals("unknown"))) {
112  return false;
113  }
114 
115  // Renumbering blocks here preserves the order of the blocks as
116  // sortBasicBlocksAndUpdateBranches uses the numeric identifier to sort
117  // blocks. Preserving the order of blocks is essential to retaining decisions
118  // made by prior passes such as MachineBlockPlacement.
119  MF.RenumberBlocks();
121  auto *MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
122  auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
123 
124  for (auto &MBB : MF) {
125  // FIXME: We retain the entry block and conservatively keep all landing pad
126  // blocks as part of the original function. Once D73739 is submitted, we can
127  // improve the handling of ehpads.
128  if ((MBB.pred_empty() || MBB.isEHPad()))
129  continue;
130  if (isColdBlock(MBB, MBFI, PSI))
132  }
133 
134  auto Comparator = [](const MachineBasicBlock &X, const MachineBasicBlock &Y) {
135  return X.getSectionID().Type < Y.getSectionID().Type;
136  };
138 
139  return true;
140 }
141 
142 void MachineFunctionSplitter::getAnalysisUsage(AnalysisUsage &AU) const {
146 }
147 
149 INITIALIZE_PASS(MachineFunctionSplitter, "machine-function-splitter",
150  "Split machine functions using profile information", false,
151  false)
152 
154  return new MachineFunctionSplitter();
155 }
StringRef getSection() const
Get the custom section of this global if it has one.
Definition: GlobalObject.h:114
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
void setSectionID(MBBSectionID V)
Sets the section ID for this basic block.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
void RenumberBlocks(MachineBasicBlock *MBBFrom=nullptr)
RenumberBlocks - This discards all of the MachineBasicBlock numbers and recomputes them.
void initializeMachineFunctionSplitterPass(PassRegistry &)
Analysis providing profile information.
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
F(f)
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineBasicBlock & MBB
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
AnalysisUsage & addRequired()
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:156
Optional< uint64_t > getBlockProfileCount(const MachineBasicBlock *MBB) const
static cl::opt< unsigned > PercentileCutoff("mfs-psi-cutoff", cl::desc("Percentile profile summary cutoff used to " "determine cold blocks. Unused if set to zero."), cl::init(999950), cl::Hidden)
void setBBSectionsType(BasicBlockSection V)
INITIALIZE_PASS(MachineFunctionSplitter, "machine-function-splitter", "Split machine functions using profile information", false, false) MachineFunctionPass *llvm
void sortBasicBlocksAndUpdateBranches(MachineFunction &MF, MachineBasicBlockComparator MBBCmp)
static const MBBSectionID ColdSectionID
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:427
Represent the analysis usage information of a pass.
Optional< StringRef > getSectionPrefix() const
Get the section prefix for this function.
Definition: Function.cpp:1760
bool hasProfileData(bool IncludeSynthetic=false) const
Return true if the function is annotated with profile data.
Definition: Function.h:330
static bool isColdBlock(MachineBasicBlock &MBB, const MachineBlockFrequencyInfo *MBFI, ProfileSummaryInfo *PSI)
Module.h This file contains the declarations for the Module class.
constexpr const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:257
constexpr bool hasValue() const
Definition: Optional.h:263
MachineFunctionPass * createMachineFunctionSplitterPass()
createMachineFunctionSplitterPass - This pass splits machine functions using profile information.
LLVM_NODISCARD bool equals(StringRef RHS) const
equals - Check for string equality, this is more efficient than compare() when the relative ordering ...
Definition: StringRef.h:190
bool isEHPad() const
Returns true if the block is a landing pad.
bool isColdCountNthPercentile(int PercentileCutoff, uint64_t C) const
Returns true if count C is considered cold with regard to a given cold percentile cutoff value.
static cl::opt< unsigned > ColdCountThreshold("mfs-count-threshold", cl::desc("Minimum number of times a block must be executed to be retained."), cl::init(1), cl::Hidden)
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57