LLVM  15.0.0git
X86PadShortFunction.cpp
Go to the documentation of this file.
1 //===-------- X86PadShortFunction.cpp - pad short functions -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the pass which will pad short functions to prevent
10 // a stall if a function returns before the return address is ready. This
11 // is needed for some Intel Atom processors.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 
16 #include "X86.h"
17 #include "X86InstrInfo.h"
18 #include "X86Subtarget.h"
19 #include "llvm/ADT/Statistic.h"
25 #include "llvm/CodeGen/Passes.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/Support/Debug.h"
30 
31 using namespace llvm;
32 
33 #define DEBUG_TYPE "x86-pad-short-functions"
34 
35 STATISTIC(NumBBsPadded, "Number of basic blocks padded");
36 
37 namespace {
38  struct VisitedBBInfo {
39  // HasReturn - Whether the BB contains a return instruction
40  bool HasReturn = false;
41 
42  // Cycles - Number of cycles until return if HasReturn is true, otherwise
43  // number of cycles until end of the BB
44  unsigned int Cycles = 0;
45 
46  VisitedBBInfo() = default;
47  VisitedBBInfo(bool HasReturn, unsigned int Cycles)
48  : HasReturn(HasReturn), Cycles(Cycles) {}
49  };
50 
51  struct PadShortFunc : public MachineFunctionPass {
52  static char ID;
53  PadShortFunc() : MachineFunctionPass(ID) {}
54 
55  bool runOnMachineFunction(MachineFunction &MF) override;
56 
57  void getAnalysisUsage(AnalysisUsage &AU) const override {
62  }
63 
64  MachineFunctionProperties getRequiredProperties() const override {
67  }
68 
69  StringRef getPassName() const override {
70  return "X86 Atom pad short functions";
71  }
72 
73  private:
74  void findReturns(MachineBasicBlock *MBB,
75  unsigned int Cycles = 0);
76 
77  bool cyclesUntilReturn(MachineBasicBlock *MBB,
78  unsigned int &Cycles);
79 
82  unsigned int NOOPsToAdd);
83 
84  const unsigned int Threshold = 4;
85 
86  // ReturnBBs - Maps basic blocks that return to the minimum number of
87  // cycles until the return, starting from the entry block.
89 
90  // VisitedBBs - Cache of previously visited BBs.
92 
93  TargetSchedModel TSM;
94  };
95 
96  char PadShortFunc::ID = 0;
97 }
98 
100  return new PadShortFunc();
101 }
102 
103 /// runOnMachineFunction - Loop over all of the basic blocks, inserting
104 /// NOOP instructions before early exits.
105 bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) {
106  if (skipFunction(MF.getFunction()))
107  return false;
108 
109  if (MF.getFunction().hasOptSize())
110  return false;
111 
112  if (!MF.getSubtarget<X86Subtarget>().padShortFunctions())
113  return false;
114 
115  TSM.init(&MF.getSubtarget());
116 
117  auto *PSI =
118  &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
119  auto *MBFI = (PSI && PSI->hasProfileSummary()) ?
120  &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() :
121  nullptr;
122 
123  // Search through basic blocks and mark the ones that have early returns
124  ReturnBBs.clear();
125  VisitedBBs.clear();
126  findReturns(&MF.front());
127 
128  bool MadeChange = false;
129 
130  // Pad the identified basic blocks with NOOPs
131  for (const auto &ReturnBB : ReturnBBs) {
132  MachineBasicBlock *MBB = ReturnBB.first;
133  unsigned Cycles = ReturnBB.second;
134 
135  // Function::hasOptSize is already checked above.
136  bool OptForSize = llvm::shouldOptimizeForSize(MBB, PSI, MBFI);
137  if (OptForSize)
138  continue;
139 
140  if (Cycles < Threshold) {
141  // BB ends in a return. Skip over any DBG_VALUE instructions
142  // trailing the terminator.
143  assert(MBB->size() > 0 &&
144  "Basic block should contain at least a RET but is empty");
145  MachineBasicBlock::iterator ReturnLoc = --MBB->end();
146 
147  while (ReturnLoc->isDebugInstr())
148  --ReturnLoc;
149  assert(ReturnLoc->isReturn() && !ReturnLoc->isCall() &&
150  "Basic block does not end with RET");
151 
152  addPadding(MBB, ReturnLoc, Threshold - Cycles);
153  NumBBsPadded++;
154  MadeChange = true;
155  }
156  }
157 
158  return MadeChange;
159 }
160 
161 /// findReturn - Starting at MBB, follow control flow and add all
162 /// basic blocks that contain a return to ReturnBBs.
163 void PadShortFunc::findReturns(MachineBasicBlock *MBB, unsigned int Cycles) {
164  // If this BB has a return, note how many cycles it takes to get there.
165  bool hasReturn = cyclesUntilReturn(MBB, Cycles);
166  if (Cycles >= Threshold)
167  return;
168 
169  if (hasReturn) {
170  ReturnBBs[MBB] = std::max(ReturnBBs[MBB], Cycles);
171  return;
172  }
173 
174  // Follow branches in BB and look for returns
175  for (MachineBasicBlock *Succ : MBB->successors())
176  if (Succ != MBB)
177  findReturns(Succ, Cycles);
178 }
179 
180 /// cyclesUntilReturn - return true if the MBB has a return instruction,
181 /// and return false otherwise.
182 /// Cycles will be incremented by the number of cycles taken to reach the
183 /// return or the end of the BB, whichever occurs first.
184 bool PadShortFunc::cyclesUntilReturn(MachineBasicBlock *MBB,
185  unsigned int &Cycles) {
186  // Return cached result if BB was previously visited
188  = VisitedBBs.find(MBB);
189  if (it != VisitedBBs.end()) {
190  VisitedBBInfo BBInfo = it->second;
191  Cycles += BBInfo.Cycles;
192  return BBInfo.HasReturn;
193  }
194 
195  unsigned int CyclesToEnd = 0;
196 
197  for (MachineInstr &MI : *MBB) {
198  // Mark basic blocks with a return instruction. Calls to other
199  // functions do not count because the called function will be padded,
200  // if necessary.
201  if (MI.isReturn() && !MI.isCall()) {
202  VisitedBBs[MBB] = VisitedBBInfo(true, CyclesToEnd);
203  Cycles += CyclesToEnd;
204  return true;
205  }
206 
207  CyclesToEnd += TSM.computeInstrLatency(&MI);
208  }
209 
210  VisitedBBs[MBB] = VisitedBBInfo(false, CyclesToEnd);
211  Cycles += CyclesToEnd;
212  return false;
213 }
214 
215 /// addPadding - Add the given number of NOOP instructions to the function
216 /// just prior to the return at MBBI
219  unsigned int NOOPsToAdd) {
220  const DebugLoc &DL = MBBI->getDebugLoc();
221  unsigned IssueWidth = TSM.getIssueWidth();
222 
223  for (unsigned i = 0, e = IssueWidth * NOOPsToAdd; i != e; ++i)
224  BuildMI(*MBB, MBBI, DL, TSM.getInstrInfo()->get(X86::NOOP));
225 }
i
i
Definition: README.txt:29
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:104
llvm::createX86PadShortFunctions
FunctionPass * createX86PadShortFunctions()
Return a pass that pads short functions with NOOPs.
Definition: X86PadShortFunction.cpp:99
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
it
into xmm2 addss xmm2 xmm1 xmm3 addss xmm3 movaps xmm0 unpcklps xmm0 ret seems silly when it could just be one addps Expand libm rounding functions main should enable SSE DAZ mode and other fast SSE modes Think about doing i64 math in SSE regs on x86 This testcase should have no SSE instructions in it
Definition: README-SSE.txt:81
X86Subtarget.h
Statistic.h
llvm::X86Subtarget
Definition: X86Subtarget.h:52
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
MachineSizeOpts.h
llvm::MachineFunctionProperties
Properties which a MachineFunction may have at a given point in time.
Definition: MachineFunction.h:127
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:103
llvm::MachineFunction::front
const MachineBasicBlock & front() const
Definition: MachineFunction.h:865
llvm::shouldOptimizeForSize
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
Definition: MachineSizeOpts.cpp:183
X86.h
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::MachineFunctionProperties::set
MachineFunctionProperties & set(Property P)
Definition: MachineFunction.h:196
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
llvm::MachineFunctionProperties::Property::NoVRegs
@ NoVRegs
Passes.h
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:656
LazyMachineBlockFrequencyInfo.h
===- LazyMachineBlockFrequencyInfo.h - Lazy Block Frequency -*- C++ -*–===//
TargetSchedule.h
addPadding
static void addPadding(BinaryStreamWriter &Writer)
Definition: ContinuationRecordBuilder.cpp:21
llvm::TargetSchedModel
Provide an instruction scheduling machine model to CodeGen passes.
Definition: TargetSchedule.h:30
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
ProfileSummaryInfo.h
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::DenseMap
Definition: DenseMap.h:716
llvm::ProfileSummaryInfoWrapperPass
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Definition: ProfileSummaryInfo.h:193
MachineFunctionPass.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MachineBasicBlock::size
unsigned size() const
Definition: MachineBasicBlock.h:248
llvm::LazyMachineBlockFrequencyInfoPass
This is an alternative analysis pass to MachineBlockFrequencyInfo.
Definition: LazyMachineBlockFrequencyInfo.h:37
llvm::MachineFunction
Definition: MachineFunction.h:257
llvm::MachineBasicBlock::successors
iterator_range< succ_iterator > successors()
Definition: MachineBasicBlock.h:365
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
MBBI
MachineBasicBlock MachineBasicBlock::iterator MBBI
Definition: AArch64SLSHardening.cpp:75
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::Function::hasOptSize
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:664
hasReturn
static bool hasReturn(const MachineBasicBlock &MBB)
Returns true if MBB contains an instruction that returns.
Definition: HexagonFrameLowering.cpp:349
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:622
Function.h
MachineInstrBuilder.h
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:328
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
raw_ostream.h
X86InstrInfo.h
llvm::MachineInstrBundleIterator< MachineInstr >
Debug.h
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:280
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38