LLVM  14.0.0git
X86InsertPrefetch.cpp
Go to the documentation of this file.
1 //===------- X86InsertPrefetch.cpp - Insert cache prefetch hints ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass applies cache prefetch instructions based on a profile. The pass
10 // assumes DiscriminateMemOps ran immediately before, to ensure debug info
11 // matches the one used at profile generation time. The profile is encoded in
12 // afdo format (text or binary). It contains prefetch hints recommendations.
13 // Each recommendation is made in terms of debug info locations, a type (i.e.
14 // nta, t{0|1|2}) and a delta. The debug info identifies an instruction with a
15 // memory operand (see X86DiscriminateMemOps). The prefetch will be made for
16 // a location at that memory operand + the delta specified in the
17 // recommendation.
18 //
19 //===----------------------------------------------------------------------===//
20 
21 #include "X86.h"
22 #include "X86InstrBuilder.h"
23 #include "X86InstrInfo.h"
24 #include "X86MachineFunctionInfo.h"
25 #include "X86Subtarget.h"
31 using namespace llvm;
32 using namespace sampleprof;
33 
35  PrefetchHintsFile("prefetch-hints-file",
36  cl::desc("Path to the prefetch hints profile. See also "
37  "-x86-discriminate-memops"),
38  cl::Hidden);
39 namespace {
40 
41 class X86InsertPrefetch : public MachineFunctionPass {
42  void getAnalysisUsage(AnalysisUsage &AU) const override;
43  bool doInitialization(Module &) override;
44 
45  bool runOnMachineFunction(MachineFunction &MF) override;
46  struct PrefetchInfo {
47  unsigned InstructionID;
48  int64_t Delta;
49  };
50  typedef SmallVectorImpl<PrefetchInfo> Prefetches;
51  bool findPrefetchInfo(const FunctionSamples *Samples, const MachineInstr &MI,
52  Prefetches &prefetches) const;
53 
54 public:
55  static char ID;
56  X86InsertPrefetch(const std::string &PrefetchHintsFilename);
57  StringRef getPassName() const override {
58  return "X86 Insert Cache Prefetches";
59  }
60 
61 private:
62  std::string Filename;
63  std::unique_ptr<SampleProfileReader> Reader;
64 };
65 
66 using PrefetchHints = SampleRecord::CallTargetMap;
67 
68 // Return any prefetching hints for the specified MachineInstruction. The hints
69 // are returned as pairs (name, delta).
70 ErrorOr<PrefetchHints> getPrefetchHints(const FunctionSamples *TopSamples,
71  const MachineInstr &MI) {
72  if (const auto &Loc = MI.getDebugLoc())
73  if (const auto *Samples = TopSamples->findFunctionSamples(Loc))
74  return Samples->findCallTargetMapAt(FunctionSamples::getOffset(Loc),
75  Loc->getBaseDiscriminator());
76  return std::error_code();
77 }
78 
79 // The prefetch instruction can't take memory operands involving vector
80 // registers.
81 bool IsMemOpCompatibleWithPrefetch(const MachineInstr &MI, int Op) {
82  Register BaseReg = MI.getOperand(Op + X86::AddrBaseReg).getReg();
83  Register IndexReg = MI.getOperand(Op + X86::AddrIndexReg).getReg();
84  return (BaseReg == 0 ||
85  X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) ||
86  X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg)) &&
87  (IndexReg == 0 ||
88  X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) ||
89  X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg));
90 }
91 
92 } // end anonymous namespace
93 
94 //===----------------------------------------------------------------------===//
95 // Implementation
96 //===----------------------------------------------------------------------===//
97 
98 char X86InsertPrefetch::ID = 0;
99 
100 X86InsertPrefetch::X86InsertPrefetch(const std::string &PrefetchHintsFilename)
101  : MachineFunctionPass(ID), Filename(PrefetchHintsFilename) {}
102 
103 /// Return true if the provided MachineInstruction has cache prefetch hints. In
104 /// that case, the prefetch hints are stored, in order, in the Prefetches
105 /// vector.
106 bool X86InsertPrefetch::findPrefetchInfo(const FunctionSamples *TopSamples,
107  const MachineInstr &MI,
108  Prefetches &Prefetches) const {
109  assert(Prefetches.empty() &&
110  "Expected caller passed empty PrefetchInfo vector.");
111  static constexpr std::pair<StringLiteral, unsigned> HintTypes[] = {
112  {"_nta_", X86::PREFETCHNTA},
113  {"_t0_", X86::PREFETCHT0},
114  {"_t1_", X86::PREFETCHT1},
115  {"_t2_", X86::PREFETCHT2},
116  };
117  static const char *SerializedPrefetchPrefix = "__prefetch";
118 
119  const ErrorOr<PrefetchHints> T = getPrefetchHints(TopSamples, MI);
120  if (!T)
121  return false;
122  int16_t max_index = -1;
123  // Convert serialized prefetch hints into PrefetchInfo objects, and populate
124  // the Prefetches vector.
125  for (const auto &S_V : *T) {
126  StringRef Name = S_V.getKey();
127  if (Name.consume_front(SerializedPrefetchPrefix)) {
128  int64_t D = static_cast<int64_t>(S_V.second);
129  unsigned IID = 0;
130  for (const auto &HintType : HintTypes) {
131  if (Name.startswith(HintType.first)) {
132  Name = Name.drop_front(HintType.first.size());
133  IID = HintType.second;
134  break;
135  }
136  }
137  if (IID == 0)
138  return false;
139  uint8_t index = 0;
140  Name.consumeInteger(10, index);
141 
142  if (index >= Prefetches.size())
143  Prefetches.resize(index + 1);
144  Prefetches[index] = {IID, D};
145  max_index = std::max(max_index, static_cast<int16_t>(index));
146  }
147  }
148  assert(max_index + 1 >= 0 &&
149  "Possible overflow: max_index + 1 should be positive.");
150  assert(static_cast<size_t>(max_index + 1) == Prefetches.size() &&
151  "The number of prefetch hints received should match the number of "
152  "PrefetchInfo objects returned");
153  return !Prefetches.empty();
154 }
155 
156 bool X86InsertPrefetch::doInitialization(Module &M) {
157  if (Filename.empty())
158  return false;
159 
160  LLVMContext &Ctx = M.getContext();
162  SampleProfileReader::create(Filename, Ctx);
163  if (std::error_code EC = ReaderOrErr.getError()) {
164  std::string Msg = "Could not open profile: " + EC.message();
165  Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg,
167  return false;
168  }
169  Reader = std::move(ReaderOrErr.get());
170  Reader->read();
171  return true;
172 }
173 
174 void X86InsertPrefetch::getAnalysisUsage(AnalysisUsage &AU) const {
175  AU.setPreservesAll();
176  MachineFunctionPass::getAnalysisUsage(AU);
177 }
178 
179 bool X86InsertPrefetch::runOnMachineFunction(MachineFunction &MF) {
180  if (!Reader)
181  return false;
182  const FunctionSamples *Samples = Reader->getSamplesFor(MF.getFunction());
183  if (!Samples)
184  return false;
185 
186  bool Changed = false;
187 
189  SmallVector<PrefetchInfo, 4> Prefetches;
190  for (auto &MBB : MF) {
191  for (auto MI = MBB.instr_begin(); MI != MBB.instr_end();) {
192  auto Current = MI;
193  ++MI;
194 
195  int Offset = X86II::getMemoryOperandNo(Current->getDesc().TSFlags);
196  if (Offset < 0)
197  continue;
198  unsigned Bias = X86II::getOperandBias(Current->getDesc());
199  int MemOpOffset = Offset + Bias;
200  // FIXME(mtrofin): ORE message when the recommendation cannot be taken.
201  if (!IsMemOpCompatibleWithPrefetch(*Current, MemOpOffset))
202  continue;
203  Prefetches.clear();
204  if (!findPrefetchInfo(Samples, *Current, Prefetches))
205  continue;
206  assert(!Prefetches.empty() &&
207  "The Prefetches vector should contain at least a value if "
208  "findPrefetchInfo returned true.");
209  for (auto &PrefInfo : Prefetches) {
210  unsigned PFetchInstrID = PrefInfo.InstructionID;
211  int64_t Delta = PrefInfo.Delta;
212  const MCInstrDesc &Desc = TII->get(PFetchInstrID);
213  MachineInstr *PFetch =
214  MF.CreateMachineInstr(Desc, Current->getDebugLoc(), true);
215  MachineInstrBuilder MIB(MF, PFetch);
216 
217  static_assert(X86::AddrBaseReg == 0 && X86::AddrScaleAmt == 1 &&
218  X86::AddrIndexReg == 2 && X86::AddrDisp == 3 &&
219  X86::AddrSegmentReg == 4,
220  "Unexpected change in X86 operand offset order.");
221 
222  // This assumes X86::AddBaseReg = 0, {...}ScaleAmt = 1, etc.
223  // FIXME(mtrofin): consider adding a:
224  // MachineInstrBuilder::set(unsigned offset, op).
225  MIB.addReg(Current->getOperand(MemOpOffset + X86::AddrBaseReg).getReg())
226  .addImm(
227  Current->getOperand(MemOpOffset + X86::AddrScaleAmt).getImm())
228  .addReg(
229  Current->getOperand(MemOpOffset + X86::AddrIndexReg).getReg())
230  .addImm(Current->getOperand(MemOpOffset + X86::AddrDisp).getImm() +
231  Delta)
232  .addReg(Current->getOperand(MemOpOffset + X86::AddrSegmentReg)
233  .getReg());
234 
235  if (!Current->memoperands_empty()) {
236  MachineMemOperand *CurrentOp = *(Current->memoperands_begin());
237  MIB.addMemOperand(MF.getMachineMemOperand(
238  CurrentOp, CurrentOp->getOffset() + Delta, CurrentOp->getSize()));
239  }
240 
241  // Insert before Current. This is because Current may clobber some of
242  // the registers used to describe the input memory operand.
243  MBB.insert(Current, PFetch);
244  Changed = true;
245  }
246  }
247  }
248  return Changed;
249 }
250 
252  return new X86InsertPrefetch(PrefetchHintsFile);
253 }
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:105
llvm
This file implements support for optimizing divisions by a constant.
Definition: AllocatorList.h:23
llvm::X86II::getMemoryOperandNo
int getMemoryOperandNo(uint64_t TSFlags)
The function returns the MCInst operand # for the first field of the memory operand.
Definition: X86BaseInfo.h:1090
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
X86Subtarget.h
DebugInfoMetadata.h
T
llvm::TargetSubtargetInfo::getInstrInfo
virtual const TargetInstrInfo * getInstrInfo() const
Definition: TargetSubtargetInfo.h:92
X86InstrBuilder.h
contains
return AArch64::GPR64RegClass contains(Reg)
llvm::X86::AddrIndexReg
@ AddrIndexReg
Definition: X86BaseInfo.h:34
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::MachineMemOperand::getOffset
int64_t getOffset() const
For normal values, this is a byte offset added to the base address.
Definition: MachineMemOperand.h:227
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:128
llvm::X86::AddrDisp
@ AddrDisp
Definition: X86BaseInfo.h:35
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
llvm::X86::AddrSegmentReg
@ AddrSegmentReg
AddrSegmentReg - The operand # of the segment in the memory operand.
Definition: X86BaseInfo.h:38
X86MachineFunctionInfo.h
X86.h
llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition: TargetInstrInfo.h:97
llvm::DS_Warning
@ DS_Warning
Definition: DiagnosticInfo.h:47
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::ErrorOr::getError
std::error_code getError() const
Definition: ErrorOr.h:153
SampleProf.h
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:195
llvm::X86::AddrBaseReg
@ AddrBaseReg
Definition: X86BaseInfo.h:32
SampleProfile.h
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:630
llvm::X86::AddrScaleAmt
@ AddrScaleAmt
Definition: X86BaseInfo.h:33
llvm::cl::opt
Definition: CommandLine.h:1432
llvm::X86II::getOperandBias
unsigned getOperandBias(const MCInstrDesc &Desc)
Compute whether all of the def operands are repeated in the uses and therefore should be skipped.
Definition: X86BaseInfo.h:1050
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
index
splat index
Definition: README_ALTIVEC.txt:181
llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:69
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
llvm::sampleprof::FunctionSamples
Representation of the samples collected for a function.
Definition: SampleProf.h:684
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::createX86InsertPrefetchPass
FunctionPass * createX86InsertPrefetchPass()
This pass applies profiling information to insert cache prefetches.
Definition: X86InsertPrefetch.cpp:251
SampleProfReader.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::codeview::CompileSym2Flags::EC
@ EC
MachineModuleInfo.h
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::MachineBasicBlock::instr_begin
instr_iterator instr_begin()
Definition: MachineBasicBlock.h:252
llvm::sampleprof::FunctionSamples::getOffset
static unsigned getOffset(const DILocation *DIL)
Returns the line offset to the start line of the subprogram.
Definition: SampleProf.cpp:216
llvm::MachineBasicBlock::instr_end
instr_iterator instr_end()
Definition: MachineBasicBlock.h:254
llvm::MachineFunction
Definition: MachineFunction.h:234
llvm::DiagnosticInfoSampleProfile
Diagnostic information for the sample profiler.
Definition: DiagnosticInfo.h:286
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::sampleprof::SampleRecord::CallTargetMap
StringMap< uint64_t > CallTargetMap
Definition: SampleProf.h:328
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::LLVMContext::diagnose
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Definition: LLVMContext.cpp:228
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:52
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:596
llvm::AnalysisUsage::setPreservesAll
void setPreservesAll()
Set by analyses that do not transform their input at all.
Definition: PassAnalysisSupport.h:130
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:324
llvm::MachineBasicBlock::insert
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
Definition: MachineBasicBlock.cpp:1317
llvm::MachineMemOperand::getSize
uint64_t getSize() const
Return the size in bytes of the memory reference.
Definition: MachineMemOperand.h:236
PrefetchHintsFile
static cl::opt< std::string > PrefetchHintsFile("prefetch-hints-file", cl::desc("Path to the prefetch hints profile. See also " "-x86-discriminate-memops"), cl::Hidden)
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:585
llvm::ErrorOr::get
reference get()
Definition: ErrorOr.h:150
llvm::ErrorOr
Represents either an error or a value T.
Definition: ErrorOr.h:56
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:43
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
llvm::cl::desc
Definition: CommandLine.h:412
X86InstrInfo.h
llvm::sampleprof::FunctionSamples::findFunctionSamples
const FunctionSamples * findFunctionSamples(const DILocation *DIL, SampleProfileReaderItaniumRemapper *Remapper=nullptr) const
Get the FunctionSamples of the inline instance where DIL originates from.
Definition: SampleProf.cpp:235
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37