25 std::unique_ptr<Instruction> &Inst,
const MCInst &MCI) {
27 case AMDGPU::S_WAITCNT:
28 case AMDGPU::S_WAITCNT_EXPCNT:
29 case AMDGPU::S_WAITCNT_LGKMCNT:
30 case AMDGPU::S_WAITCNT_VMCNT:
31 case AMDGPU::S_WAITCNT_VSCNT:
32 case AMDGPU::S_WAITCNT_EXPCNT_gfx10:
33 case AMDGPU::S_WAITCNT_LGKMCNT_gfx10:
34 case AMDGPU::S_WAITCNT_VMCNT_gfx10:
35 case AMDGPU::S_WAITCNT_VSCNT_gfx10:
36 case AMDGPU::S_WAITCNT_gfx10:
37 case AMDGPU::S_WAITCNT_gfx6_gfx7:
38 case AMDGPU::S_WAITCNT_vi:
39 return processWaitCnt(Inst, MCI);
45 void AMDGPUInstrPostProcess::processWaitCnt(std::unique_ptr<Instruction> &Inst,
47 for (
int Idx = 0,
N = MCI.
size(); Idx <
N; Idx++) {
52 }
else if (MCOp.
isImm()) {
64 generateWaitCntInfo();
79 case AMDGPU::S_WAITCNT:
80 case AMDGPU::S_WAITCNT_EXPCNT:
81 case AMDGPU::S_WAITCNT_LGKMCNT:
82 case AMDGPU::S_WAITCNT_VMCNT:
83 case AMDGPU::S_WAITCNT_VSCNT:
84 case AMDGPU::S_WAITCNT_EXPCNT_gfx10:
85 case AMDGPU::S_WAITCNT_LGKMCNT_gfx10:
86 case AMDGPU::S_WAITCNT_VMCNT_gfx10:
87 case AMDGPU::S_WAITCNT_VSCNT_gfx10:
88 case AMDGPU::S_WAITCNT_gfx10:
89 case AMDGPU::S_WAITCNT_gfx6_gfx7:
90 case AMDGPU::S_WAITCNT_vi:
95 return handleWaitCnt(IssuedInst,
IR);
108 unsigned Lgkmcnt = 31;
110 unsigned CurrVmcnt = 0;
111 unsigned CurrExpcnt = 0;
112 unsigned CurrLgkmcnt = 0;
113 unsigned CurrVscnt = 0;
114 unsigned CyclesToWaitVm = ~0U;
115 unsigned CyclesToWaitExp = ~0U;
116 unsigned CyclesToWaitLgkm = ~0U;
117 unsigned CyclesToWaitVs = ~0U;
119 computeWaitCnt(
IR, Vmcnt, Expcnt, Lgkmcnt, Vscnt);
123 for (
const InstRef &PrevIR : IssuedInst) {
124 const Instruction &PrevInst = *PrevIR.getInstruction();
125 const unsigned PrevInstIndex = PrevIR.getSourceIndex() %
SrcMgr.
size();
126 const WaitCntInfo &PrevInstWaitInfo = InstrWaitCntInfo[PrevInstIndex];
129 "We should know how many cycles are left for this instruction");
130 if (PrevInstWaitInfo.
VmCnt) {
132 if ((
unsigned)CyclesLeft < CyclesToWaitVm)
133 CyclesToWaitVm = CyclesLeft;
135 if (PrevInstWaitInfo.
ExpCnt) {
137 if ((
unsigned)CyclesLeft < CyclesToWaitExp)
138 CyclesToWaitExp = CyclesLeft;
140 if (PrevInstWaitInfo.
LgkmCnt) {
142 if ((
unsigned)CyclesLeft < CyclesToWaitLgkm)
143 CyclesToWaitLgkm = CyclesLeft;
145 if (PrevInstWaitInfo.
VsCnt) {
147 if ((
unsigned)CyclesLeft < CyclesToWaitVs)
148 CyclesToWaitVs = CyclesLeft;
152 unsigned CyclesToWait = ~0U;
153 if (CurrVmcnt > Vmcnt && CyclesToWaitVm < CyclesToWait)
154 CyclesToWait = CyclesToWaitVm;
155 if (CurrExpcnt > Expcnt && CyclesToWaitExp < CyclesToWait)
156 CyclesToWait = CyclesToWaitExp;
157 if (CurrLgkmcnt > Lgkmcnt && CyclesToWaitLgkm < CyclesToWait)
158 CyclesToWait = CyclesToWaitLgkm;
159 if (CurrVscnt > Vscnt && CyclesToWaitVs < CyclesToWait)
160 CyclesToWait = CyclesToWaitVs;
168 if (CyclesToWait == ~0U)
173 void AMDGPUCustomBehaviour::computeWaitCnt(
const InstRef &
IR,
unsigned &Vmcnt,
174 unsigned &Expcnt,
unsigned &Lgkmcnt,
181 case AMDGPU::S_WAITCNT_EXPCNT_gfx10:
182 case AMDGPU::S_WAITCNT_LGKMCNT_gfx10:
183 case AMDGPU::S_WAITCNT_VMCNT_gfx10:
184 case AMDGPU::S_WAITCNT_VSCNT_gfx10: {
190 assert(OpReg && OpReg->
isReg() &&
"First operand should be a register.");
191 assert(OpImm && OpImm->
isImm() &&
"Second operand should be an immediate.");
192 if (OpReg->
getReg() != AMDGPU::SGPR_NULL) {
198 <<
"ignored. So the wait may not be accurate.\n";
204 case AMDGPU::S_WAITCNT_EXPCNT_gfx10:
207 case AMDGPU::S_WAITCNT_LGKMCNT_gfx10:
208 Lgkmcnt = OpImm->
getImm();
210 case AMDGPU::S_WAITCNT_VMCNT_gfx10:
213 case AMDGPU::S_WAITCNT_VSCNT_gfx10:
219 case AMDGPU::S_WAITCNT_gfx10:
220 case AMDGPU::S_WAITCNT_gfx6_gfx7:
221 case AMDGPU::S_WAITCNT_vi:
222 unsigned WaitCnt = Inst.
getOperand(0)->getImm();
228 void AMDGPUCustomBehaviour::generateWaitCntInfo() {
244 const std::unique_ptr<Instruction> &Inst = *
I;
245 unsigned Opcode = Inst->getOpcode();
249 InstrWaitCntInfo[
Index].LgkmCnt =
true;
250 if (isAlwaysGDS(Opcode) || hasModifiersSet(Inst, AMDGPU::OpName::gds))
251 InstrWaitCntInfo[
Index].ExpCnt =
true;
257 InstrWaitCntInfo[
Index].LgkmCnt =
true;
259 InstrWaitCntInfo[
Index].VmCnt =
true;
261 InstrWaitCntInfo[
Index].VmCnt =
true;
263 InstrWaitCntInfo[
Index].VsCnt =
true;
266 InstrWaitCntInfo[
Index].VmCnt =
true;
271 InstrWaitCntInfo[
Index].VmCnt =
true;
273 InstrWaitCntInfo[
Index].VsCnt =
true;
281 InstrWaitCntInfo[
Index].ExpCnt =
true;
283 InstrWaitCntInfo[
Index].LgkmCnt =
true;
285 InstrWaitCntInfo[
Index].ExpCnt =
true;
288 case AMDGPU::S_SENDMSG:
289 case AMDGPU::S_SENDMSGHALT:
290 case AMDGPU::S_MEMTIME:
291 case AMDGPU::S_MEMREALTIME:
292 InstrWaitCntInfo[
Index].LgkmCnt =
true;
300 bool AMDGPUCustomBehaviour::isVMEM(
const MCInstrDesc &MCID) {
307 bool AMDGPUCustomBehaviour::hasModifiersSet(
308 const std::unique_ptr<Instruction> &Inst,
unsigned OpName)
const {
314 if (
Op ==
nullptr || !
Op->isImm() || !
Op->getImm())
321 bool AMDGPUCustomBehaviour::isAlwaysGDS(
uint16_t Opcode)
const {
323 Opcode == AMDGPU::DS_GWS_SEMA_V || Opcode == AMDGPU::DS_GWS_SEMA_BR ||
324 Opcode == AMDGPU::DS_GWS_SEMA_P ||
325 Opcode == AMDGPU::DS_GWS_SEMA_RELEASE_ALL ||
326 Opcode == AMDGPU::DS_GWS_BARRIER;
332 using namespace llvm;