LLVM  16.0.0git
X86InsertPrefetch.cpp
Go to the documentation of this file.
1 //===------- X86InsertPrefetch.cpp - Insert cache prefetch hints ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass applies cache prefetch instructions based on a profile. The pass
10 // assumes DiscriminateMemOps ran immediately before, to ensure debug info
11 // matches the one used at profile generation time. The profile is encoded in
12 // afdo format (text or binary). It contains prefetch hints recommendations.
13 // Each recommendation is made in terms of debug info locations, a type (i.e.
14 // nta, t{0|1|2}) and a delta. The debug info identifies an instruction with a
15 // memory operand (see X86DiscriminateMemOps). The prefetch will be made for
16 // a location at that memory operand + the delta specified in the
17 // recommendation.
18 //
19 //===----------------------------------------------------------------------===//
20 
21 #include "X86.h"
22 #include "X86InstrBuilder.h"
23 #include "X86InstrInfo.h"
24 #include "X86MachineFunctionInfo.h"
25 #include "X86Subtarget.h"
32 using namespace llvm;
33 using namespace sampleprof;
34 
36  PrefetchHintsFile("prefetch-hints-file",
37  cl::desc("Path to the prefetch hints profile. See also "
38  "-x86-discriminate-memops"),
39  cl::Hidden);
40 namespace {
41 
42 class X86InsertPrefetch : public MachineFunctionPass {
43  void getAnalysisUsage(AnalysisUsage &AU) const override;
44  bool doInitialization(Module &) override;
45 
46  bool runOnMachineFunction(MachineFunction &MF) override;
47  struct PrefetchInfo {
48  unsigned InstructionID;
49  int64_t Delta;
50  };
51  typedef SmallVectorImpl<PrefetchInfo> Prefetches;
52  bool findPrefetchInfo(const FunctionSamples *Samples, const MachineInstr &MI,
53  Prefetches &prefetches) const;
54 
55 public:
56  static char ID;
57  X86InsertPrefetch(const std::string &PrefetchHintsFilename);
58  StringRef getPassName() const override {
59  return "X86 Insert Cache Prefetches";
60  }
61 
62 private:
63  std::string Filename;
64  std::unique_ptr<SampleProfileReader> Reader;
65 };
66 
67 using PrefetchHints = SampleRecord::CallTargetMap;
68 
69 // Return any prefetching hints for the specified MachineInstruction. The hints
70 // are returned as pairs (name, delta).
71 ErrorOr<PrefetchHints> getPrefetchHints(const FunctionSamples *TopSamples,
72  const MachineInstr &MI) {
73  if (const auto &Loc = MI.getDebugLoc())
74  if (const auto *Samples = TopSamples->findFunctionSamples(Loc))
75  return Samples->findCallTargetMapAt(FunctionSamples::getOffset(Loc),
76  Loc->getBaseDiscriminator());
77  return std::error_code();
78 }
79 
80 // The prefetch instruction can't take memory operands involving vector
81 // registers.
82 bool IsMemOpCompatibleWithPrefetch(const MachineInstr &MI, int Op) {
83  Register BaseReg = MI.getOperand(Op + X86::AddrBaseReg).getReg();
84  Register IndexReg = MI.getOperand(Op + X86::AddrIndexReg).getReg();
85  return (BaseReg == 0 ||
86  X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) ||
87  X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg)) &&
88  (IndexReg == 0 ||
89  X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) ||
90  X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg));
91 }
92 
93 } // end anonymous namespace
94 
95 //===----------------------------------------------------------------------===//
96 // Implementation
97 //===----------------------------------------------------------------------===//
98 
99 char X86InsertPrefetch::ID = 0;
100 
101 X86InsertPrefetch::X86InsertPrefetch(const std::string &PrefetchHintsFilename)
102  : MachineFunctionPass(ID), Filename(PrefetchHintsFilename) {}
103 
104 /// Return true if the provided MachineInstruction has cache prefetch hints. In
105 /// that case, the prefetch hints are stored, in order, in the Prefetches
106 /// vector.
107 bool X86InsertPrefetch::findPrefetchInfo(const FunctionSamples *TopSamples,
108  const MachineInstr &MI,
109  Prefetches &Prefetches) const {
110  assert(Prefetches.empty() &&
111  "Expected caller passed empty PrefetchInfo vector.");
112  static constexpr std::pair<StringLiteral, unsigned> HintTypes[] = {
113  {"_nta_", X86::PREFETCHNTA},
114  {"_t0_", X86::PREFETCHT0},
115  {"_t1_", X86::PREFETCHT1},
116  {"_t2_", X86::PREFETCHT2},
117  };
118  static const char *SerializedPrefetchPrefix = "__prefetch";
119 
120  const ErrorOr<PrefetchHints> T = getPrefetchHints(TopSamples, MI);
121  if (!T)
122  return false;
123  int16_t max_index = -1;
124  // Convert serialized prefetch hints into PrefetchInfo objects, and populate
125  // the Prefetches vector.
126  for (const auto &S_V : *T) {
127  StringRef Name = S_V.getKey();
128  if (Name.consume_front(SerializedPrefetchPrefix)) {
129  int64_t D = static_cast<int64_t>(S_V.second);
130  unsigned IID = 0;
131  for (const auto &HintType : HintTypes) {
132  if (Name.startswith(HintType.first)) {
133  Name = Name.drop_front(HintType.first.size());
134  IID = HintType.second;
135  break;
136  }
137  }
138  if (IID == 0)
139  return false;
140  uint8_t index = 0;
141  Name.consumeInteger(10, index);
142 
143  if (index >= Prefetches.size())
144  Prefetches.resize(index + 1);
145  Prefetches[index] = {IID, D};
146  max_index = std::max(max_index, static_cast<int16_t>(index));
147  }
148  }
149  assert(max_index + 1 >= 0 &&
150  "Possible overflow: max_index + 1 should be positive.");
151  assert(static_cast<size_t>(max_index + 1) == Prefetches.size() &&
152  "The number of prefetch hints received should match the number of "
153  "PrefetchInfo objects returned");
154  return !Prefetches.empty();
155 }
156 
157 bool X86InsertPrefetch::doInitialization(Module &M) {
158  if (Filename.empty())
159  return false;
160 
161  LLVMContext &Ctx = M.getContext();
163  SampleProfileReader::create(Filename, Ctx);
164  if (std::error_code EC = ReaderOrErr.getError()) {
165  std::string Msg = "Could not open profile: " + EC.message();
168  return false;
169  }
170  Reader = std::move(ReaderOrErr.get());
171  Reader->read();
172  return true;
173 }
174 
175 void X86InsertPrefetch::getAnalysisUsage(AnalysisUsage &AU) const {
176  AU.setPreservesAll();
177  MachineFunctionPass::getAnalysisUsage(AU);
178 }
179 
180 bool X86InsertPrefetch::runOnMachineFunction(MachineFunction &MF) {
181  if (!Reader)
182  return false;
183  const FunctionSamples *Samples = Reader->getSamplesFor(MF.getFunction());
184  if (!Samples)
185  return false;
186 
187  bool Changed = false;
188 
190  SmallVector<PrefetchInfo, 4> Prefetches;
191  for (auto &MBB : MF) {
192  for (auto MI = MBB.instr_begin(); MI != MBB.instr_end();) {
193  auto Current = MI;
194  ++MI;
195 
196  int Offset = X86II::getMemoryOperandNo(Current->getDesc().TSFlags);
197  if (Offset < 0)
198  continue;
199  unsigned Bias = X86II::getOperandBias(Current->getDesc());
200  int MemOpOffset = Offset + Bias;
201  // FIXME(mtrofin): ORE message when the recommendation cannot be taken.
202  if (!IsMemOpCompatibleWithPrefetch(*Current, MemOpOffset))
203  continue;
204  Prefetches.clear();
205  if (!findPrefetchInfo(Samples, *Current, Prefetches))
206  continue;
207  assert(!Prefetches.empty() &&
208  "The Prefetches vector should contain at least a value if "
209  "findPrefetchInfo returned true.");
210  for (auto &PrefInfo : Prefetches) {
211  unsigned PFetchInstrID = PrefInfo.InstructionID;
212  int64_t Delta = PrefInfo.Delta;
213  const MCInstrDesc &Desc = TII->get(PFetchInstrID);
214  MachineInstr *PFetch =
215  MF.CreateMachineInstr(Desc, Current->getDebugLoc(), true);
216  MachineInstrBuilder MIB(MF, PFetch);
217 
218  static_assert(X86::AddrBaseReg == 0 && X86::AddrScaleAmt == 1 &&
219  X86::AddrIndexReg == 2 && X86::AddrDisp == 3 &&
220  X86::AddrSegmentReg == 4,
221  "Unexpected change in X86 operand offset order.");
222 
223  // This assumes X86::AddBaseReg = 0, {...}ScaleAmt = 1, etc.
224  // FIXME(mtrofin): consider adding a:
225  // MachineInstrBuilder::set(unsigned offset, op).
226  MIB.addReg(Current->getOperand(MemOpOffset + X86::AddrBaseReg).getReg())
227  .addImm(
228  Current->getOperand(MemOpOffset + X86::AddrScaleAmt).getImm())
229  .addReg(
230  Current->getOperand(MemOpOffset + X86::AddrIndexReg).getReg())
231  .addImm(Current->getOperand(MemOpOffset + X86::AddrDisp).getImm() +
232  Delta)
233  .addReg(Current->getOperand(MemOpOffset + X86::AddrSegmentReg)
234  .getReg());
235 
236  if (!Current->memoperands_empty()) {
237  MachineMemOperand *CurrentOp = *(Current->memoperands_begin());
238  MIB.addMemOperand(MF.getMachineMemOperand(
239  CurrentOp, CurrentOp->getOffset() + Delta, CurrentOp->getSize()));
240  }
241 
242  // Insert before Current. This is because Current may clobber some of
243  // the registers used to describe the input memory operand.
244  MBB.insert(Current, PFetch);
245  Changed = true;
246  }
247  }
248  }
249  return Changed;
250 }
251 
253  return new X86InsertPrefetch(PrefetchHintsFile);
254 }
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:108
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::X86II::getMemoryOperandNo
int getMemoryOperandNo(uint64_t TSFlags)
The function returns the MCInst operand # for the first field of the memory operand.
Definition: X86BaseInfo.h:1100
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::AArch64PACKey::ID
ID
Definition: AArch64BaseInfo.h:818
X86Subtarget.h
DebugInfoMetadata.h
T
llvm::TargetSubtargetInfo::getInstrInfo
virtual const TargetInstrInfo * getInstrInfo() const
Definition: TargetSubtargetInfo.h:95
X86InstrBuilder.h
contains
return AArch64::GPR64RegClass contains(Reg)
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
llvm::MachineMemOperand::getOffset
int64_t getOffset() const
For normal values, this is a byte offset added to the base address.
Definition: MachineMemOperand.h:226
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
llvm::logicalview::LVAttributeKind::Filename
@ Filename
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:127
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:80
llvm::X86::AddrIndexReg
@ AddrIndexReg
Definition: X86BaseInfo.h:34
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
X86MachineFunctionInfo.h
X86.h
llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition: TargetInstrInfo.h:98
llvm::DS_Warning
@ DS_Warning
Definition: DiagnosticInfo.h:51
llvm::X86::AddrBaseReg
@ AddrBaseReg
Definition: X86BaseInfo.h:32
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::ErrorOr::getError
std::error_code getError() const
Definition: ErrorOr.h:153
SampleProf.h
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:125
llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:197
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
SampleProfile.h
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:657
llvm::cl::opt
Definition: CommandLine.h:1412
llvm::X86II::getOperandBias
unsigned getOperandBias(const MCInstrDesc &Desc)
Compute whether all of the def operands are repeated in the uses and therefore should be skipped.
Definition: X86BaseInfo.h:1060
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
index
splat index
Definition: README_ALTIVEC.txt:181
llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:69
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
llvm::sampleprof::FunctionSamples
Representation of the samples collected for a function.
Definition: SampleProf.h:718
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
llvm::createX86InsertPrefetchPass
FunctionPass * createX86InsertPrefetchPass()
This pass applies profiling information to insert cache prefetches.
Definition: X86InsertPrefetch.cpp:252
SampleProfReader.h
MachineFunctionPass.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::codeview::CompileSym2Flags::EC
@ EC
MachineModuleInfo.h
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::MachineBasicBlock::instr_begin
instr_iterator instr_begin()
Definition: MachineBasicBlock.h:289
llvm::sampleprof::FunctionSamples::getOffset
static unsigned getOffset(const DILocation *DIL)
Returns the line offset to the start line of the subprogram.
Definition: SampleProf.cpp:216
llvm::MachineBasicBlock::instr_end
instr_iterator instr_end()
Definition: MachineBasicBlock.h:291
llvm::MachineFunction
Definition: MachineFunction.h:257
llvm::DiagnosticInfoSampleProfile
Diagnostic information for the sample profiler.
Definition: DiagnosticInfo.h:235
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
llvm::X86::AddrDisp
@ AddrDisp
Definition: X86BaseInfo.h:35
llvm::sampleprof::SampleRecord::CallTargetMap
StringMap< uint64_t > CallTargetMap
Definition: SampleProf.h:338
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::AMDGPU::SendMsg::Msg
const CustomOperand< const MCSubtargetInfo & > Msg[]
Definition: AMDGPUAsmUtils.cpp:39
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::LLVMContext::diagnose
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Definition: LLVMContext.cpp:248
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:50
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:623
llvm::AnalysisUsage::setPreservesAll
void setPreservesAll()
Set by analyses that do not transform their input at all.
Definition: PassAnalysisSupport.h:130
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:348
llvm::MachineBasicBlock::insert
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
Definition: MachineBasicBlock.cpp:1327
llvm::MachineMemOperand::getSize
uint64_t getSize() const
Return the size in bytes of the memory reference.
Definition: MachineMemOperand.h:235
PrefetchHintsFile
static cl::opt< std::string > PrefetchHintsFile("prefetch-hints-file", cl::desc("Path to the prefetch hints profile. See also " "-x86-discriminate-memops"), cl::Hidden)
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:614
llvm::X86::AddrSegmentReg
@ AddrSegmentReg
AddrSegmentReg - The operand # of the segment in the memory operand.
Definition: X86BaseInfo.h:38
llvm::X86::AddrScaleAmt
@ AddrScaleAmt
Definition: X86BaseInfo.h:33
llvm::ErrorOr::get
reference get()
Definition: ErrorOr.h:150
llvm::ErrorOr
Represents either an error or a value T.
Definition: ErrorOr.h:56
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
llvm::cl::desc
Definition: CommandLine.h:413
X86InstrInfo.h
llvm::sampleprof::FunctionSamples::findFunctionSamples
const FunctionSamples * findFunctionSamples(const DILocation *DIL, SampleProfileReaderItaniumRemapper *Remapper=nullptr) const
Get the FunctionSamples of the inline instance where DIL originates from.
Definition: SampleProf.cpp:246