LLVM 23.0.0git
AMDGPUCustomBehaviour.cpp
Go to the documentation of this file.
1//===------------------ AMDGPUCustomBehaviour.cpp ---------------*-C++ -* -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9///
10/// This file implements methods from the AMDGPUCustomBehaviour class.
11///
12//===----------------------------------------------------------------------===//
13
22
23namespace llvm::mca {
24
26 const MCInst &MCI) {
27 switch (MCI.getOpcode()) {
28 case AMDGPU::S_WAITCNT:
29 case AMDGPU::S_WAITCNT_soft:
30 case AMDGPU::S_WAITCNT_EXPCNT:
31 case AMDGPU::S_WAITCNT_LGKMCNT:
32 case AMDGPU::S_WAITCNT_VMCNT:
33 case AMDGPU::S_WAITCNT_VSCNT:
34 case AMDGPU::S_WAITCNT_VSCNT_soft:
35 case AMDGPU::S_WAITCNT_EXPCNT_gfx10:
36 case AMDGPU::S_WAITCNT_LGKMCNT_gfx10:
37 case AMDGPU::S_WAITCNT_VMCNT_gfx10:
38 case AMDGPU::S_WAITCNT_VSCNT_gfx10:
39 case AMDGPU::S_WAITCNT_gfx10:
40 case AMDGPU::S_WAITCNT_gfx6_gfx7:
41 case AMDGPU::S_WAITCNT_vi:
42 return processWaitCnt(Inst, MCI);
43 }
44}
45
46// s_waitcnt instructions encode important information as immediate operands
47// which are lost during the MCInst -> mca::Instruction lowering.
48void AMDGPUInstrPostProcess::processWaitCnt(Instruction &Inst,
49 const MCInst &MCI) {
50 for (int Idx = 0, N = MCI.size(); Idx < N; Idx++) {
52 const MCOperand &MCOp = MCI.getOperand(Idx);
53 if (MCOp.isReg()) {
55 } else if (MCOp.isImm()) {
57 }
58 Op.setIndex(Idx);
59 Inst.addOperand(Op);
60 }
61}
62
69
71 const InstRef &IR) {
72 const Instruction &Inst = *IR.getInstruction();
73 unsigned Opcode = Inst.getOpcode();
74
75 // llvm-mca is generally run on fully compiled assembly so we wouldn't see any
76 // pseudo instructions here. However, there are plans for the future to make
77 // it possible to use mca within backend passes. As such, I have left the
78 // pseudo version of s_waitcnt within this switch statement.
79 switch (Opcode) {
80 default:
81 return 0;
82 case AMDGPU::S_WAITCNT: // This instruction
83 case AMDGPU::S_WAITCNT_soft:
84 case AMDGPU::S_WAITCNT_EXPCNT:
85 case AMDGPU::S_WAITCNT_LGKMCNT:
86 case AMDGPU::S_WAITCNT_VMCNT:
87 case AMDGPU::S_WAITCNT_VSCNT:
88 case AMDGPU::S_WAITCNT_VSCNT_soft: // to this instruction are all pseudo.
89 case AMDGPU::S_WAITCNT_EXPCNT_gfx10:
90 case AMDGPU::S_WAITCNT_LGKMCNT_gfx10:
91 case AMDGPU::S_WAITCNT_VMCNT_gfx10:
92 case AMDGPU::S_WAITCNT_VSCNT_gfx10:
93 case AMDGPU::S_WAITCNT_gfx10:
94 case AMDGPU::S_WAITCNT_gfx6_gfx7:
95 case AMDGPU::S_WAITCNT_vi:
96 // s_endpgm also behaves as if there is an implicit
97 // s_waitcnt 0, but I'm not sure if it would be appropriate
98 // to model this in llvm-mca based on how the iterations work
99 // while simulating the pipeline over and over.
100 return handleWaitCnt(IssuedInst, IR);
101 }
102
103 return 0;
104}
105
106unsigned AMDGPUCustomBehaviour::handleWaitCnt(ArrayRef<InstRef> IssuedInst,
107 const InstRef &IR) {
108 // Currently, all s_waitcnt instructions are handled except s_waitcnt_depctr.
109 // I do not know how that instruction works so I did not attempt to model it.
110 // set the max values to begin
111 unsigned Vmcnt = 63;
112 unsigned Expcnt = 7;
113 unsigned Lgkmcnt = 31;
114 unsigned Vscnt = 63;
115 unsigned CurrVmcnt = 0;
116 unsigned CurrExpcnt = 0;
117 unsigned CurrLgkmcnt = 0;
118 unsigned CurrVscnt = 0;
119 unsigned CyclesToWaitVm = ~0U;
120 unsigned CyclesToWaitExp = ~0U;
121 unsigned CyclesToWaitLgkm = ~0U;
122 unsigned CyclesToWaitVs = ~0U;
123
124 computeWaitCnt(IR, Vmcnt, Expcnt, Lgkmcnt, Vscnt);
125
126 // We will now look at each of the currently executing instructions
127 // to find out if this wait instruction still needs to wait.
128 for (const InstRef &PrevIR : IssuedInst) {
129 const Instruction &PrevInst = *PrevIR.getInstruction();
130 const unsigned PrevInstIndex = PrevIR.getSourceIndex() % SrcMgr.size();
131 const WaitCntInfo &PrevInstWaitInfo = InstrWaitCntInfo[PrevInstIndex];
132 const int CyclesLeft = PrevInst.getCyclesLeft();
133 assert(CyclesLeft != UNKNOWN_CYCLES &&
134 "We should know how many cycles are left for this instruction");
135 if (PrevInstWaitInfo.VmCnt) {
136 CurrVmcnt++;
137 if ((unsigned)CyclesLeft < CyclesToWaitVm)
138 CyclesToWaitVm = CyclesLeft;
139 }
140 if (PrevInstWaitInfo.ExpCnt) {
141 CurrExpcnt++;
142 if ((unsigned)CyclesLeft < CyclesToWaitExp)
143 CyclesToWaitExp = CyclesLeft;
144 }
145 if (PrevInstWaitInfo.LgkmCnt) {
146 CurrLgkmcnt++;
147 if ((unsigned)CyclesLeft < CyclesToWaitLgkm)
148 CyclesToWaitLgkm = CyclesLeft;
149 }
150 if (PrevInstWaitInfo.VsCnt) {
151 CurrVscnt++;
152 if ((unsigned)CyclesLeft < CyclesToWaitVs)
153 CyclesToWaitVs = CyclesLeft;
154 }
155 }
156
157 unsigned CyclesToWait = ~0U;
158 if (CurrVmcnt > Vmcnt && CyclesToWaitVm < CyclesToWait)
159 CyclesToWait = CyclesToWaitVm;
160 if (CurrExpcnt > Expcnt && CyclesToWaitExp < CyclesToWait)
161 CyclesToWait = CyclesToWaitExp;
162 if (CurrLgkmcnt > Lgkmcnt && CyclesToWaitLgkm < CyclesToWait)
163 CyclesToWait = CyclesToWaitLgkm;
164 if (CurrVscnt > Vscnt && CyclesToWaitVs < CyclesToWait)
165 CyclesToWait = CyclesToWaitVs;
166
167 // We may underestimate how many cycles we need to wait, but this
168 // isn't a big deal. Our return value is just how many cycles until
169 // this function gets run again. So as long as we don't overestimate
170 // the wait time, we'll still end up stalling at this instruction
171 // for the correct number of cycles.
172
173 if (CyclesToWait == ~0U)
174 return 0;
175 return CyclesToWait;
176}
177
178void AMDGPUCustomBehaviour::computeWaitCnt(const InstRef &IR, unsigned &Vmcnt,
179 unsigned &Expcnt, unsigned &Lgkmcnt,
180 unsigned &Vscnt) {
181 AMDGPU::IsaVersion IV = AMDGPU::getIsaVersion(STI.getCPU());
182 const Instruction &Inst = *IR.getInstruction();
183 unsigned Opcode = Inst.getOpcode();
184
185 switch (Opcode) {
186 case AMDGPU::S_WAITCNT_EXPCNT_gfx10:
187 case AMDGPU::S_WAITCNT_LGKMCNT_gfx10:
188 case AMDGPU::S_WAITCNT_VMCNT_gfx10:
189 case AMDGPU::S_WAITCNT_VSCNT_gfx10: {
190 // Should probably be checking for nullptr
191 // here, but I'm not sure how I should handle the case
192 // where we see a nullptr.
193 const MCAOperand *OpReg = Inst.getOperand(0);
194 const MCAOperand *OpImm = Inst.getOperand(1);
195 assert(OpReg && OpReg->isReg() && "First operand should be a register.");
196 assert(OpImm && OpImm->isImm() && "Second operand should be an immediate.");
197 if (OpReg->getReg() != AMDGPU::SGPR_NULL) {
198 // Instruction is using a real register.
199 // Since we can't know what value this register will have,
200 // we can't compute what the value of this wait should be.
201 WithColor::warning() << "The register component of "
202 << MCII.getName(Opcode) << " will be completely "
203 << "ignored. So the wait may not be accurate.\n";
204 }
205 switch (Opcode) {
206 // Redundant switch so I don't have to repeat the code above
207 // for each case. There are more clever ways to avoid this
208 // extra switch and anyone can feel free to implement one of them.
209 case AMDGPU::S_WAITCNT_EXPCNT_gfx10:
210 Expcnt = OpImm->getImm();
211 break;
212 case AMDGPU::S_WAITCNT_LGKMCNT_gfx10:
213 Lgkmcnt = OpImm->getImm();
214 break;
215 case AMDGPU::S_WAITCNT_VMCNT_gfx10:
216 Vmcnt = OpImm->getImm();
217 break;
218 case AMDGPU::S_WAITCNT_VSCNT_gfx10:
219 Vscnt = OpImm->getImm();
220 break;
221 }
222 return;
223 }
224 case AMDGPU::S_WAITCNT_gfx10:
225 case AMDGPU::S_WAITCNT_gfx6_gfx7:
226 case AMDGPU::S_WAITCNT_vi:
227 unsigned WaitCnt = Inst.getOperand(0)->getImm();
228 AMDGPU::decodeWaitcnt(IV, WaitCnt, Vmcnt, Expcnt, Lgkmcnt);
229 return;
230 }
231}
232
233void AMDGPUCustomBehaviour::generateWaitCntInfo() {
234 // The core logic from this function is taken from
235 // SIInsertWaitcnts::updateEventWaitcntAfter() In that pass, the instructions
236 // that are being looked at are in the MachineInstr format, whereas we have
237 // access to the MCInst format. The side effects of this are that we can't use
238 // the mayAccessVMEMThroughFlat(Inst) or mayAccessLDSThroughFlat(Inst)
239 // functions. Therefore, we conservatively assume that these functions will
240 // return true. This may cause a few instructions to be incorrectly tagged
241 // with an extra CNT. However, these are instructions that do interact with at
242 // least one CNT so giving them an extra CNT shouldn't cause issues in most
243 // scenarios.
244 AMDGPU::IsaVersion IV = AMDGPU::getIsaVersion(STI.getCPU());
245 InstrWaitCntInfo.resize(SrcMgr.size());
246
247 for (const auto &EN : llvm::enumerate(SrcMgr.getInstructions())) {
248 const std::unique_ptr<Instruction> &Inst = EN.value();
249 unsigned Index = EN.index();
250 unsigned Opcode = Inst->getOpcode();
251 const MCInstrDesc &MCID = MCII.get(Opcode);
252 if ((MCID.TSFlags & SIInstrFlags::DS) &&
254 InstrWaitCntInfo[Index].LgkmCnt = true;
255 if (isAlwaysGDS(Opcode) || hasModifiersSet(Inst, AMDGPU::OpName::gds))
256 InstrWaitCntInfo[Index].ExpCnt = true;
257 } else if (MCID.TSFlags & SIInstrFlags::FLAT) {
258 // We conservatively assume that mayAccessVMEMThroughFlat(Inst)
259 // and mayAccessLDSThroughFlat(Inst) would both return true for this
260 // instruction. We have to do this because those functions use
261 // information about the memory operands that we don't have access to.
262 InstrWaitCntInfo[Index].LgkmCnt = true;
263 if (!STI.hasFeature(AMDGPU::FeatureVscnt))
264 InstrWaitCntInfo[Index].VmCnt = true;
265 else if (MCID.mayLoad() && !(MCID.TSFlags & SIInstrFlags::IsAtomicNoRet))
266 InstrWaitCntInfo[Index].VmCnt = true;
267 else
268 InstrWaitCntInfo[Index].VsCnt = true;
269 } else if (isVMEM(MCID) && !AMDGPU::getMUBUFIsBufferInv(Opcode)) {
270 if (!STI.hasFeature(AMDGPU::FeatureVscnt))
271 InstrWaitCntInfo[Index].VmCnt = true;
272 else if ((MCID.mayLoad() &&
274 ((MCID.TSFlags & SIInstrFlags::MIMG) && !MCID.mayLoad() &&
275 !MCID.mayStore()))
276 InstrWaitCntInfo[Index].VmCnt = true;
277 else if (MCID.mayStore())
278 InstrWaitCntInfo[Index].VsCnt = true;
279
280 // (IV.Major < 7) is meant to represent
281 // GCNTarget.vmemWriteNeedsExpWaitcnt()
282 // which is defined as
283 // { return getGeneration() < SEA_ISLANDS; }
284 if (IV.Major < 7 &&
285 (MCID.mayStore() || (MCID.TSFlags & SIInstrFlags::IsAtomicRet)))
286 InstrWaitCntInfo[Index].ExpCnt = true;
287 } else if (MCID.TSFlags & SIInstrFlags::SMRD) {
288 InstrWaitCntInfo[Index].LgkmCnt = true;
289 } else if (MCID.TSFlags & SIInstrFlags::EXP) {
290 InstrWaitCntInfo[Index].ExpCnt = true;
291 } else {
292 switch (Opcode) {
293 case AMDGPU::S_SENDMSG:
294 case AMDGPU::S_SENDMSGHALT:
295 case AMDGPU::S_MEMTIME:
296 case AMDGPU::S_MEMREALTIME:
297 InstrWaitCntInfo[Index].LgkmCnt = true;
298 break;
299 }
300 }
301 }
302}
303
304// taken from SIInstrInfo::isVMEM()
305bool AMDGPUCustomBehaviour::isVMEM(const MCInstrDesc &MCID) {
306 return MCID.TSFlags & SIInstrFlags::MUBUF ||
309}
310
311// taken from SIInstrInfo::hasModifiersSet()
312bool AMDGPUCustomBehaviour::hasModifiersSet(
313 const std::unique_ptr<Instruction> &Inst, AMDGPU::OpName OpName) const {
314 int Idx = AMDGPU::getNamedOperandIdx(Inst->getOpcode(), OpName);
315 if (Idx == -1)
316 return false;
317
318 const MCAOperand *Op = Inst->getOperand(Idx);
319 if (Op == nullptr || !Op->isImm() || !Op->getImm())
320 return false;
321
322 return true;
323}
324
325// taken from SIInstrInfo::isGWS()
326bool AMDGPUCustomBehaviour::isGWS(uint32_t Opcode) const {
327 const MCInstrDesc &MCID = MCII.get(Opcode);
328 return MCID.TSFlags & SIInstrFlags::GWS;
329}
330
331// taken from SIInstrInfo::isAlwaysGDS()
332bool AMDGPUCustomBehaviour::isAlwaysGDS(uint32_t Opcode) const {
333 return Opcode == AMDGPU::DS_ORDERED_COUNT ||
334 Opcode == AMDGPU::DS_ADD_GS_REG_RTN ||
335 Opcode == AMDGPU::DS_SUB_GS_REG_RTN || isGWS(Opcode);
336}
337
338} // namespace llvm::mca
339
340using namespace llvm;
341using namespace mca;
342
343static CustomBehaviour *
345 const mca::SourceMgr &SrcMgr,
346 const MCInstrInfo &MCII) {
347 return new AMDGPUCustomBehaviour(STI, SrcMgr, MCII);
348}
349
350static InstrPostProcess *
352 const MCInstrInfo &MCII) {
353 return new AMDGPUInstrPostProcess(STI, MCII);
354}
355
356/// Extern function to initialize the targets for the AMDGPU backend
357
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static CustomBehaviour * createAMDGPUCustomBehaviour(const MCSubtargetInfo &STI, const mca::SourceMgr &SrcMgr, const MCInstrInfo &MCII)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTargetMCA()
Extern function to initialize the targets for the AMDGPU backend.
static InstrPostProcess * createAMDGPUInstrPostProcess(const MCSubtargetInfo &STI, const MCInstrInfo &MCII)
This file defines the AMDGPUCustomBehaviour class which inherits from CustomBehaviour.
Provides AMDGPU specific target descriptions.
#define LLVM_ABI
Definition Compiler.h:213
#define LLVM_EXTERNAL_VISIBILITY
Definition Compiler.h:132
Legalize the Machine IR a function s Machine IR
Definition Legalizer.cpp:81
static const uint32_t IV[8]
Definition blake3_impl.h:83
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
unsigned getOpcode() const
Definition MCInst.h:202
size_t size() const
Definition MCInst.h:226
const MCOperand & getOperand(unsigned i) const
Definition MCInst.h:210
bool mayStore() const
Return true if this instruction could possibly modify memory.
bool mayLoad() const
Return true if this instruction could possibly read memory.
Interface to description of machine instruction set.
Definition MCInstrInfo.h:27
Instances of this class represent operands of the MCInst class.
Definition MCInst.h:40
int64_t getImm() const
Definition MCInst.h:84
bool isImm() const
Definition MCInst.h:66
bool isReg() const
Definition MCInst.h:65
MCRegister getReg() const
Returns the register number.
Definition MCInst.h:73
Generic base class for all target subtargets.
Value * getOperand(unsigned i) const
Definition User.h:207
static LLVM_ABI raw_ostream & warning()
Convenience method for printing "warning: " to stderr.
Definition WithColor.cpp:85
unsigned checkCustomHazard(ArrayRef< InstRef > IssuedInst, const InstRef &IR) override
This method is used to determine if an instruction should be allowed to be dispatched.
AMDGPUCustomBehaviour(const MCSubtargetInfo &STI, const mca::SourceMgr &SrcMgr, const MCInstrInfo &MCII)
void postProcessInstruction(Instruction &Inst, const MCInst &MCI) override
This method can be overriden by targets to modify the mca::Instruction object after it has been lower...
const MCInstrInfo & MCII
const mca::SourceMgr & SrcMgr
const MCSubtargetInfo & STI
CustomBehaviour(const MCSubtargetInfo &STI, const mca::SourceMgr &SrcMgr, const MCInstrInfo &MCII)
An InstRef contains both a SourceMgr index and Instruction pair.
unsigned getOpcode() const
void addOperand(const MCAOperand Op)
An instruction propagated through the simulated instruction pipeline.
int getCyclesLeft() const
A representation of an mca::Instruction operand for use in mca::CustomBehaviour.
Definition Instruction.h:37
unsigned getReg() const
Returns the register number.
Definition Instruction.h:73
static MCAOperand createImm(int64_t Val)
static MCAOperand createReg(unsigned Reg)
Definition Instruction.h:97
int64_t getImm() const
Definition Instruction.h:78
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded)
bool getMUBUFIsBufferInv(unsigned Opc)
constexpr int UNKNOWN_CYCLES
Definition Instruction.h:33
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2553
Target & getTheR600Target()
The target for R600 GPUs.
SourceMgr SrcMgr
Definition Error.cpp:24
Target & getTheGCNTarget()
The target for GCN GPUs.
DWARFExpression::Operation Op
#define N
static void RegisterInstrPostProcess(Target &T, Target::InstrPostProcessCtorTy Fn)
RegisterInstrPostProcess - Register an InstrPostProcess implementation for the given target.
static void RegisterCustomBehaviour(Target &T, Target::CustomBehaviourCtorTy Fn)
RegisterCustomBehaviour - Register a CustomBehaviour implementation for the given target.
Abstracting the input code sequence (a sequence of MCInst) and assigning unique identifiers to every ...
Definition SourceMgr.h:29