LLVM 18.0.0git
X86InsertPrefetch.cpp
Go to the documentation of this file.
1//===------- X86InsertPrefetch.cpp - Insert cache prefetch hints ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass applies cache prefetch instructions based on a profile. The pass
10// assumes DiscriminateMemOps ran immediately before, to ensure debug info
11// matches the one used at profile generation time. The profile is encoded in
12// afdo format (text or binary). It contains prefetch hints recommendations.
13// Each recommendation is made in terms of debug info locations, a type (i.e.
14// nta, t{0|1|2}) and a delta. The debug info identifies an instruction with a
15// memory operand (see X86DiscriminateMemOps). The prefetch will be made for
16// a location at that memory operand + the delta specified in the
17// recommendation.
18//
19//===----------------------------------------------------------------------===//
20
21#include "X86.h"
22#include "X86InstrBuilder.h"
23#include "X86InstrInfo.h"
25#include "X86Subtarget.h"
33using namespace llvm;
34using namespace sampleprof;
35
37 PrefetchHintsFile("prefetch-hints-file",
38 cl::desc("Path to the prefetch hints profile. See also "
39 "-x86-discriminate-memops"),
41namespace {
42
43class X86InsertPrefetch : public MachineFunctionPass {
44 void getAnalysisUsage(AnalysisUsage &AU) const override;
45 bool doInitialization(Module &) override;
46
47 bool runOnMachineFunction(MachineFunction &MF) override;
48 struct PrefetchInfo {
49 unsigned InstructionID;
50 int64_t Delta;
51 };
52 typedef SmallVectorImpl<PrefetchInfo> Prefetches;
53 bool findPrefetchInfo(const FunctionSamples *Samples, const MachineInstr &MI,
54 Prefetches &prefetches) const;
55
56public:
57 static char ID;
58 X86InsertPrefetch(const std::string &PrefetchHintsFilename);
59 StringRef getPassName() const override {
60 return "X86 Insert Cache Prefetches";
61 }
62
63private:
64 std::string Filename;
65 std::unique_ptr<SampleProfileReader> Reader;
66};
67
68using PrefetchHints = SampleRecord::CallTargetMap;
69
70// Return any prefetching hints for the specified MachineInstruction. The hints
71// are returned as pairs (name, delta).
72ErrorOr<PrefetchHints> getPrefetchHints(const FunctionSamples *TopSamples,
73 const MachineInstr &MI) {
74 if (const auto &Loc = MI.getDebugLoc())
75 if (const auto *Samples = TopSamples->findFunctionSamples(Loc))
76 return Samples->findCallTargetMapAt(FunctionSamples::getOffset(Loc),
77 Loc->getBaseDiscriminator());
78 return std::error_code();
79}
80
81// The prefetch instruction can't take memory operands involving vector
82// registers.
83bool IsMemOpCompatibleWithPrefetch(const MachineInstr &MI, int Op) {
84 Register BaseReg = MI.getOperand(Op + X86::AddrBaseReg).getReg();
85 Register IndexReg = MI.getOperand(Op + X86::AddrIndexReg).getReg();
86 return (BaseReg == 0 ||
87 X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) ||
88 X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg)) &&
89 (IndexReg == 0 ||
90 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) ||
91 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg));
92}
93
94} // end anonymous namespace
95
96//===----------------------------------------------------------------------===//
97// Implementation
98//===----------------------------------------------------------------------===//
99
100char X86InsertPrefetch::ID = 0;
101
102X86InsertPrefetch::X86InsertPrefetch(const std::string &PrefetchHintsFilename)
103 : MachineFunctionPass(ID), Filename(PrefetchHintsFilename) {}
104
105/// Return true if the provided MachineInstruction has cache prefetch hints. In
106/// that case, the prefetch hints are stored, in order, in the Prefetches
107/// vector.
108bool X86InsertPrefetch::findPrefetchInfo(const FunctionSamples *TopSamples,
109 const MachineInstr &MI,
110 Prefetches &Prefetches) const {
111 assert(Prefetches.empty() &&
112 "Expected caller passed empty PrefetchInfo vector.");
113
114 // There is no point to match prefetch hints if the profile is using MD5.
116 return false;
117
118 static constexpr std::pair<StringLiteral, unsigned> HintTypes[] = {
119 {"_nta_", X86::PREFETCHNTA},
120 {"_t0_", X86::PREFETCHT0},
121 {"_t1_", X86::PREFETCHT1},
122 {"_t2_", X86::PREFETCHT2},
123 };
124 static const char *SerializedPrefetchPrefix = "__prefetch";
125
126 const ErrorOr<PrefetchHints> T = getPrefetchHints(TopSamples, MI);
127 if (!T)
128 return false;
129 int16_t max_index = -1;
130 // Convert serialized prefetch hints into PrefetchInfo objects, and populate
131 // the Prefetches vector.
132 for (const auto &S_V : *T) {
133 StringRef Name = S_V.first.stringRef();
134 if (Name.consume_front(SerializedPrefetchPrefix)) {
135 int64_t D = static_cast<int64_t>(S_V.second);
136 unsigned IID = 0;
137 for (const auto &HintType : HintTypes) {
138 if (Name.startswith(HintType.first)) {
139 Name = Name.drop_front(HintType.first.size());
140 IID = HintType.second;
141 break;
142 }
143 }
144 if (IID == 0)
145 return false;
146 uint8_t index = 0;
147 Name.consumeInteger(10, index);
148
149 if (index >= Prefetches.size())
150 Prefetches.resize(index + 1);
151 Prefetches[index] = {IID, D};
152 max_index = std::max(max_index, static_cast<int16_t>(index));
153 }
154 }
155 assert(max_index + 1 >= 0 &&
156 "Possible overflow: max_index + 1 should be positive.");
157 assert(static_cast<size_t>(max_index + 1) == Prefetches.size() &&
158 "The number of prefetch hints received should match the number of "
159 "PrefetchInfo objects returned");
160 return !Prefetches.empty();
161}
162
163bool X86InsertPrefetch::doInitialization(Module &M) {
164 if (Filename.empty())
165 return false;
166
167 LLVMContext &Ctx = M.getContext();
168 // TODO: Propagate virtual file system into LLVM targets.
169 auto FS = vfs::getRealFileSystem();
171 SampleProfileReader::create(Filename, Ctx, *FS);
172 if (std::error_code EC = ReaderOrErr.getError()) {
173 std::string Msg = "Could not open profile: " + EC.message();
174 Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg,
175 DiagnosticSeverity::DS_Warning));
176 return false;
177 }
178 Reader = std::move(ReaderOrErr.get());
179 Reader->read();
180 return true;
181}
182
183void X86InsertPrefetch::getAnalysisUsage(AnalysisUsage &AU) const {
184 AU.setPreservesAll();
186}
187
188bool X86InsertPrefetch::runOnMachineFunction(MachineFunction &MF) {
189 if (!Reader)
190 return false;
191 const FunctionSamples *Samples = Reader->getSamplesFor(MF.getFunction());
192 if (!Samples)
193 return false;
194
195 bool Changed = false;
196
199 for (auto &MBB : MF) {
200 for (auto MI = MBB.instr_begin(); MI != MBB.instr_end();) {
201 auto Current = MI;
202 ++MI;
203
204 int Offset = X86II::getMemoryOperandNo(Current->getDesc().TSFlags);
205 if (Offset < 0)
206 continue;
207 unsigned Bias = X86II::getOperandBias(Current->getDesc());
208 int MemOpOffset = Offset + Bias;
209 // FIXME(mtrofin): ORE message when the recommendation cannot be taken.
210 if (!IsMemOpCompatibleWithPrefetch(*Current, MemOpOffset))
211 continue;
212 Prefetches.clear();
213 if (!findPrefetchInfo(Samples, *Current, Prefetches))
214 continue;
215 assert(!Prefetches.empty() &&
216 "The Prefetches vector should contain at least a value if "
217 "findPrefetchInfo returned true.");
218 for (auto &PrefInfo : Prefetches) {
219 unsigned PFetchInstrID = PrefInfo.InstructionID;
220 int64_t Delta = PrefInfo.Delta;
221 const MCInstrDesc &Desc = TII->get(PFetchInstrID);
222 MachineInstr *PFetch =
223 MF.CreateMachineInstr(Desc, Current->getDebugLoc(), true);
224 MachineInstrBuilder MIB(MF, PFetch);
225
226 static_assert(X86::AddrBaseReg == 0 && X86::AddrScaleAmt == 1 &&
227 X86::AddrIndexReg == 2 && X86::AddrDisp == 3 &&
229 "Unexpected change in X86 operand offset order.");
230
231 // This assumes X86::AddBaseReg = 0, {...}ScaleAmt = 1, etc.
232 // FIXME(mtrofin): consider adding a:
233 // MachineInstrBuilder::set(unsigned offset, op).
234 MIB.addReg(Current->getOperand(MemOpOffset + X86::AddrBaseReg).getReg())
235 .addImm(
236 Current->getOperand(MemOpOffset + X86::AddrScaleAmt).getImm())
237 .addReg(
238 Current->getOperand(MemOpOffset + X86::AddrIndexReg).getReg())
239 .addImm(Current->getOperand(MemOpOffset + X86::AddrDisp).getImm() +
240 Delta)
241 .addReg(Current->getOperand(MemOpOffset + X86::AddrSegmentReg)
242 .getReg());
243
244 if (!Current->memoperands_empty()) {
245 MachineMemOperand *CurrentOp = *(Current->memoperands_begin());
246 MIB.addMemOperand(MF.getMachineMemOperand(
247 CurrentOp, CurrentOp->getOffset() + Delta, CurrentOp->getSize()));
248 }
249
250 // Insert before Current. This is because Current may clobber some of
251 // the registers used to describe the input memory operand.
252 MBB.insert(Current, PFetch);
253 Changed = true;
254 }
255 }
256 }
257 return Changed;
258}
259
261 return new X86InsertPrefetch(PrefetchHintsFile);
262}
MachineBasicBlock & MBB
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
std::string Name
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file provides the interface for the sampled PGO loader pass.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
Defines the virtual file system interface vfs::FileSystem.
static cl::opt< std::string > PrefetchHintsFile("prefetch-hints-file", cl::desc("Path to the prefetch hints profile. See also " "-x86-discriminate-memops"), cl::Hidden)
Represent the analysis usage information of a pass.
void setPreservesAll()
Set by analyses that do not transform their input at all.
This class represents an Operation in the Expression.
Diagnostic information for the sample profiler.
Represents either an error or a value T.
Definition: ErrorOr.h:56
reference get()
Definition: ErrorOr.h:149
std::error_code getError() const
Definition: ErrorOr.h:152
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
instr_iterator instr_begin()
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
instr_iterator instr_end()
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
bool doInitialization(Module &) override
doInitialization - Virtual method overridden by subclasses to do any necessary initialization before ...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
Representation of each machine instruction.
Definition: MachineInstr.h:68
A description of a memory reference used in the backend.
uint64_t getSize() const
Return the size in bytes of the memory reference.
int64_t getOffset() const
For normal values, this is a byte offset added to the base address.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool empty() const
Definition: SmallVector.h:94
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
TargetInstrInfo - Interface to description of machine instruction set.
virtual const TargetInstrInfo * getInstrInfo() const
Representation of the samples collected for a function.
Definition: SampleProf.h:744
static unsigned getOffset(const DILocation *DIL)
Returns the line offset to the start line of the subprogram.
Definition: SampleProf.cpp:216
const FunctionSamples * findFunctionSamples(const DILocation *DIL, SampleProfileReaderItaniumRemapper *Remapper=nullptr) const
Get the FunctionSamples of the inline instance where DIL originates from.
Definition: SampleProf.cpp:238
static bool UseMD5
Whether the profile uses MD5 to represent string.
Definition: SampleProf.h:1190
static ErrorOr< std::unique_ptr< SampleProfileReader > > create(const std::string Filename, LLVMContext &C, vfs::FileSystem &FS, FSDiscriminatorPass P=FSDiscriminatorPass::Base, const std::string RemapFilename="")
Create a sample profile reader appropriate to the file format.
std::unordered_map< FunctionId, uint64_t > CallTargetMap
Definition: SampleProf.h:338
const CustomOperand< const MCSubtargetInfo & > Msg[]
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ FS
Definition: X86.h:206
int getMemoryOperandNo(uint64_t TSFlags)
Definition: X86BaseInfo.h:1000
unsigned getOperandBias(const MCInstrDesc &Desc)
Compute whether all of the def operands are repeated in the uses and therefore should be skipped.
Definition: X86BaseInfo.h:963
@ AddrScaleAmt
Definition: X86BaseInfo.h:30
@ AddrSegmentReg
Definition: X86BaseInfo.h:34
@ AddrIndexReg
Definition: X86BaseInfo.h:31
IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
FunctionPass * createX86InsertPrefetchPass()
This pass applies profiling information to insert cache prefetches.
Description of the encoding of one expression Op.