28 case AMDGPU::S_WAITCNT:
29 case AMDGPU::S_WAITCNT_soft:
30 case AMDGPU::S_WAITCNT_EXPCNT:
31 case AMDGPU::S_WAITCNT_LGKMCNT:
32 case AMDGPU::S_WAITCNT_VMCNT:
33 case AMDGPU::S_WAITCNT_VSCNT:
34 case AMDGPU::S_WAITCNT_VSCNT_soft:
35 case AMDGPU::S_WAITCNT_EXPCNT_gfx10:
36 case AMDGPU::S_WAITCNT_LGKMCNT_gfx10:
37 case AMDGPU::S_WAITCNT_VMCNT_gfx10:
38 case AMDGPU::S_WAITCNT_VSCNT_gfx10:
39 case AMDGPU::S_WAITCNT_gfx10:
40 case AMDGPU::S_WAITCNT_gfx6_gfx7:
41 case AMDGPU::S_WAITCNT_vi:
42 return processWaitCnt(Inst, MCI);
48void AMDGPUInstrPostProcess::processWaitCnt(
Instruction &Inst,
50 for (
int Idx = 0,
N = MCI.
size(); Idx <
N; Idx++) {
55 }
else if (MCOp.
isImm()) {
67 generateWaitCntInfo();
82 case AMDGPU::S_WAITCNT:
83 case AMDGPU::S_WAITCNT_soft:
84 case AMDGPU::S_WAITCNT_EXPCNT:
85 case AMDGPU::S_WAITCNT_LGKMCNT:
86 case AMDGPU::S_WAITCNT_VMCNT:
87 case AMDGPU::S_WAITCNT_VSCNT:
88 case AMDGPU::S_WAITCNT_VSCNT_soft:
89 case AMDGPU::S_WAITCNT_EXPCNT_gfx10:
90 case AMDGPU::S_WAITCNT_LGKMCNT_gfx10:
91 case AMDGPU::S_WAITCNT_VMCNT_gfx10:
92 case AMDGPU::S_WAITCNT_VSCNT_gfx10:
93 case AMDGPU::S_WAITCNT_gfx10:
94 case AMDGPU::S_WAITCNT_gfx6_gfx7:
95 case AMDGPU::S_WAITCNT_vi:
100 return handleWaitCnt(IssuedInst,
IR);
113 unsigned Lgkmcnt = 31;
115 unsigned CurrVmcnt = 0;
116 unsigned CurrExpcnt = 0;
117 unsigned CurrLgkmcnt = 0;
118 unsigned CurrVscnt = 0;
119 unsigned CyclesToWaitVm = ~0U;
120 unsigned CyclesToWaitExp = ~0U;
121 unsigned CyclesToWaitLgkm = ~0U;
122 unsigned CyclesToWaitVs = ~0U;
124 computeWaitCnt(
IR, Vmcnt, Expcnt, Lgkmcnt, Vscnt);
128 for (
const InstRef &PrevIR : IssuedInst) {
129 const Instruction &PrevInst = *PrevIR.getInstruction();
130 const unsigned PrevInstIndex = PrevIR.getSourceIndex() %
SrcMgr.size();
131 const WaitCntInfo &PrevInstWaitInfo = InstrWaitCntInfo[PrevInstIndex];
134 "We should know how many cycles are left for this instruction");
135 if (PrevInstWaitInfo.
VmCnt) {
137 if ((
unsigned)CyclesLeft < CyclesToWaitVm)
138 CyclesToWaitVm = CyclesLeft;
140 if (PrevInstWaitInfo.
ExpCnt) {
142 if ((
unsigned)CyclesLeft < CyclesToWaitExp)
143 CyclesToWaitExp = CyclesLeft;
145 if (PrevInstWaitInfo.
LgkmCnt) {
147 if ((
unsigned)CyclesLeft < CyclesToWaitLgkm)
148 CyclesToWaitLgkm = CyclesLeft;
150 if (PrevInstWaitInfo.
VsCnt) {
152 if ((
unsigned)CyclesLeft < CyclesToWaitVs)
153 CyclesToWaitVs = CyclesLeft;
157 unsigned CyclesToWait = ~0
U;
158 if (CurrVmcnt > Vmcnt && CyclesToWaitVm < CyclesToWait)
159 CyclesToWait = CyclesToWaitVm;
160 if (CurrExpcnt > Expcnt && CyclesToWaitExp < CyclesToWait)
161 CyclesToWait = CyclesToWaitExp;
162 if (CurrLgkmcnt > Lgkmcnt && CyclesToWaitLgkm < CyclesToWait)
163 CyclesToWait = CyclesToWaitLgkm;
164 if (CurrVscnt > Vscnt && CyclesToWaitVs < CyclesToWait)
165 CyclesToWait = CyclesToWaitVs;
173 if (CyclesToWait == ~0U)
178void AMDGPUCustomBehaviour::computeWaitCnt(
const InstRef &
IR,
unsigned &Vmcnt,
179 unsigned &Expcnt,
unsigned &Lgkmcnt,
186 case AMDGPU::S_WAITCNT_EXPCNT_gfx10:
187 case AMDGPU::S_WAITCNT_LGKMCNT_gfx10:
188 case AMDGPU::S_WAITCNT_VMCNT_gfx10:
189 case AMDGPU::S_WAITCNT_VSCNT_gfx10: {
195 assert(OpReg && OpReg->
isReg() &&
"First operand should be a register.");
196 assert(OpImm && OpImm->
isImm() &&
"Second operand should be an immediate.");
197 if (OpReg->
getReg() != AMDGPU::SGPR_NULL) {
202 <<
MCII.getName(Opcode) <<
" will be completely "
203 <<
"ignored. So the wait may not be accurate.\n";
209 case AMDGPU::S_WAITCNT_EXPCNT_gfx10:
212 case AMDGPU::S_WAITCNT_LGKMCNT_gfx10:
213 Lgkmcnt = OpImm->
getImm();
215 case AMDGPU::S_WAITCNT_VMCNT_gfx10:
218 case AMDGPU::S_WAITCNT_VSCNT_gfx10:
224 case AMDGPU::S_WAITCNT_gfx10:
225 case AMDGPU::S_WAITCNT_gfx6_gfx7:
226 case AMDGPU::S_WAITCNT_vi:
227 unsigned WaitCnt = Inst.
getOperand(0)->getImm();
233void AMDGPUCustomBehaviour::generateWaitCntInfo() {
245 InstrWaitCntInfo.resize(
SrcMgr.size());
248 const std::unique_ptr<Instruction> &Inst = EN.value();
249 unsigned Index = EN.index();
250 unsigned Opcode = Inst->getOpcode();
251 const MCInstrDesc &MCID =
MCII.get(Opcode);
254 InstrWaitCntInfo[
Index].LgkmCnt =
true;
255 if (isAlwaysGDS(Opcode) || hasModifiersSet(Inst, AMDGPU::OpName::gds))
256 InstrWaitCntInfo[
Index].ExpCnt =
true;
262 InstrWaitCntInfo[
Index].LgkmCnt =
true;
263 if (!
STI.hasFeature(AMDGPU::FeatureVscnt))
264 InstrWaitCntInfo[
Index].VmCnt =
true;
266 InstrWaitCntInfo[
Index].VmCnt =
true;
268 InstrWaitCntInfo[
Index].VsCnt =
true;
270 if (!
STI.hasFeature(AMDGPU::FeatureVscnt))
271 InstrWaitCntInfo[
Index].VmCnt =
true;
276 InstrWaitCntInfo[
Index].VmCnt =
true;
278 InstrWaitCntInfo[
Index].VsCnt =
true;
286 InstrWaitCntInfo[
Index].ExpCnt =
true;
288 InstrWaitCntInfo[
Index].LgkmCnt =
true;
290 InstrWaitCntInfo[
Index].ExpCnt =
true;
293 case AMDGPU::S_SENDMSG:
294 case AMDGPU::S_SENDMSGHALT:
295 case AMDGPU::S_MEMTIME:
296 case AMDGPU::S_MEMREALTIME:
297 InstrWaitCntInfo[
Index].LgkmCnt =
true;
305bool AMDGPUCustomBehaviour::isVMEM(
const MCInstrDesc &MCID) {
312bool AMDGPUCustomBehaviour::hasModifiersSet(
313 const std::unique_ptr<Instruction> &Inst, AMDGPU::OpName OpName)
const {
314 int Idx = AMDGPU::getNamedOperandIdx(Inst->getOpcode(), OpName);
318 const MCAOperand *
Op = Inst->getOperand(Idx);
319 if (
Op ==
nullptr || !
Op->isImm() || !
Op->getImm())
326bool AMDGPUCustomBehaviour::isGWS(uint32_t Opcode)
const {
327 const MCInstrDesc &MCID =
MCII.get(Opcode);
332bool AMDGPUCustomBehaviour::isAlwaysGDS(uint32_t Opcode)
const {
333 return Opcode == AMDGPU::DS_ORDERED_COUNT ||
334 Opcode == AMDGPU::DS_ADD_GS_REG_RTN ||
335 Opcode == AMDGPU::DS_SUB_GS_REG_RTN || isGWS(Opcode);
343static CustomBehaviour *
350static InstrPostProcess *
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static CustomBehaviour * createAMDGPUCustomBehaviour(const MCSubtargetInfo &STI, const mca::SourceMgr &SrcMgr, const MCInstrInfo &MCII)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTargetMCA()
Extern function to initialize the targets for the AMDGPU backend.
static InstrPostProcess * createAMDGPUInstrPostProcess(const MCSubtargetInfo &STI, const MCInstrInfo &MCII)
This file defines the AMDGPUCustomBehaviour class which inherits from CustomBehaviour.
Provides AMDGPU specific target descriptions.
#define LLVM_EXTERNAL_VISIBILITY
Legalize the Machine IR a function s Machine IR
static const uint32_t IV[8]
Represent a constant reference to an array (0 or more elements consecutively in memory),...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Instances of this class represent a single low-level machine instruction.
unsigned getOpcode() const
const MCOperand & getOperand(unsigned i) const
bool mayStore() const
Return true if this instruction could possibly modify memory.
bool mayLoad() const
Return true if this instruction could possibly read memory.
Interface to description of machine instruction set.
Instances of this class represent operands of the MCInst class.
MCRegister getReg() const
Returns the register number.
Generic base class for all target subtargets.
Value * getOperand(unsigned i) const
static LLVM_ABI raw_ostream & warning()
Convenience method for printing "warning: " to stderr.
unsigned checkCustomHazard(ArrayRef< InstRef > IssuedInst, const InstRef &IR) override
This method is used to determine if an instruction should be allowed to be dispatched.
AMDGPUCustomBehaviour(const MCSubtargetInfo &STI, const mca::SourceMgr &SrcMgr, const MCInstrInfo &MCII)
void postProcessInstruction(Instruction &Inst, const MCInst &MCI) override
This method can be overriden by targets to modify the mca::Instruction object after it has been lower...
const mca::SourceMgr & SrcMgr
const MCSubtargetInfo & STI
CustomBehaviour(const MCSubtargetInfo &STI, const mca::SourceMgr &SrcMgr, const MCInstrInfo &MCII)
An InstRef contains both a SourceMgr index and Instruction pair.
unsigned getOpcode() const
void addOperand(const MCAOperand Op)
An instruction propagated through the simulated instruction pipeline.
int getCyclesLeft() const
A representation of an mca::Instruction operand for use in mca::CustomBehaviour.
unsigned getReg() const
Returns the register number.
static MCAOperand createImm(int64_t Val)
static MCAOperand createReg(unsigned Reg)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded)
bool getMUBUFIsBufferInv(unsigned Opc)
constexpr int UNKNOWN_CYCLES
friend class Instruction
Iterator for Instructions in a `BasicBlock.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Target & getTheR600Target()
The target for R600 GPUs.
Target & getTheGCNTarget()
The target for GCN GPUs.
DWARFExpression::Operation Op
static void RegisterInstrPostProcess(Target &T, Target::InstrPostProcessCtorTy Fn)
RegisterInstrPostProcess - Register an InstrPostProcess implementation for the given target.
static void RegisterCustomBehaviour(Target &T, Target::CustomBehaviourCtorTy Fn)
RegisterCustomBehaviour - Register a CustomBehaviour implementation for the given target.
Abstracting the input code sequence (a sequence of MCInst) and assigning unique identifiers to every ...