LLVM 23.0.0git
AMDGPUHWEvents.cpp
Go to the documentation of this file.
1//===- AMDGPUHWEvents.cpp ---------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDGPUHWEvents.h"
10#include "GCNSubtarget.h"
11#include "SIInstrInfo.h"
13#include "llvm/Support/Debug.h"
15
16namespace llvm {
17namespace AMDGPU {
18void HWEventSet::print(raw_ostream &OS) const {
19 ListSeparator LS(", ");
20 for (HWEvent Event : hw_events()) {
21 if (contains(Event))
22 OS << LS << toString(Event);
23 }
24}
25
26void HWEventSet::dump() const {
27 print(dbgs());
28 dbgs() << "\n";
29}
30
32 const SIInstrInfo &TII) {
33 if (TII.isVALU(Inst, /*AllowLDSDMA=*/true) && !SIInstrInfo::isLDSDMA(Inst)) {
34 // Core/Side-, DP-, XDL- and TRANS-MACC VALU instructions complete
35 // out-of-order with respect to each other, so each of these classes
36 // has its own event.
37
38 if (TII.isXDL(Inst))
39 return HWEvent::VGPR_XDL_WRITE;
40
41 if (TII.isTRANS(Inst))
42 return HWEvent::VGPR_TRANS_WRITE;
43
45 return HWEvent::VGPR_DPMACC_WRITE;
46
47 return HWEvent::VGPR_CSMACC_WRITE;
48 }
49
50 // FLAT and LDS instructions may read their VGPR sources out-of-order
51 // with respect to each other and all other VMEM instructions, so
52 // each of these also has a separate event.
53
54 if (TII.isFLAT(Inst))
55 return HWEvent::VGPR_FLAT_READ;
56
57 if (TII.isDS(Inst))
58 return HWEvent::VGPR_LDS_READ;
59
60 if (TII.isVMEM(Inst) || TII.isVIMAGE(Inst) || TII.isVSAMPLE(Inst))
61 return HWEvent::VGPR_VMEM_READ;
62
63 // Otherwise, no hazard.
64 return {};
65}
66
67static HWEvent getVmemHWEvent(const MachineInstr &Inst, const GCNSubtarget &ST,
68 const SIInstrInfo &TII) {
69 switch (Inst.getOpcode()) {
70 // FIXME: GLOBAL_INV needs to be tracked with xcnt too.
71 case AMDGPU::GLOBAL_INV:
72 return HWEvent::GLOBAL_INV_ACCESS; // tracked using loadcnt, but doesn't
73 // write VGPRs
74 case AMDGPU::GLOBAL_WB:
75 case AMDGPU::GLOBAL_WBINV:
76 return HWEvent::VMEM_WRITE_ACCESS; // tracked using storecnt
77 default:
78 break;
79 }
80
82 // LDS DMA loads are also stores, but on the LDS side. On the VMEM side
83 // these should use VM_CNT.
84 if (!ST.hasVscnt() || SIInstrInfo::mayWriteLDSThroughDMA(Inst))
85 return HWEvent::VMEM_ACCESS;
86 if (Inst.mayStore() &&
87 (!Inst.mayLoad() || SIInstrInfo::isAtomicNoRet(Inst))) {
88 if (TII.mayAccessScratch(Inst))
89 return HWEvent::SCRATCH_WRITE_ACCESS;
90 return HWEvent::VMEM_WRITE_ACCESS;
91 }
92 if (!ST.hasExtendedWaitCounts() || SIInstrInfo::isFLAT(Inst))
93 return HWEvent::VMEM_ACCESS;
94
95 if (SIInstrInfo::isImage(Inst)) {
97 const AMDGPU::MIMGBaseOpcodeInfo *BaseInfo =
98 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
99
100 if (BaseInfo->BVH)
101 return HWEvent::VMEM_BVH_READ_ACCESS;
102
103 // We have to make an additional check for isVSAMPLE here since some
104 // instructions don't have a sampler, but are still classified as sampler
105 // instructions for the purposes of e.g. waitcnt.
106 if (BaseInfo->Sampler || BaseInfo->MSAA || SIInstrInfo::isVSAMPLE(Inst))
107 return HWEvent::VMEM_SAMPLER_READ_ACCESS;
108 }
109
110 return HWEvent::VMEM_ACCESS;
111}
112
114 const GCNSubtarget &ST,
115 const SIInstrInfo &TII) {
116 if (TII.isDS(Inst) && TII.usesLGKM_CNT(Inst)) {
117 if (TII.isAlwaysGDS(Inst.getOpcode()) ||
118 TII.hasModifiersSet(Inst, AMDGPU::OpName::gds))
119 return {HWEvent::GDS_ACCESS, HWEvent::GDS_GPR_LOCK};
120
121 return HWEvent::LDS_ACCESS;
122 }
123
124 if (TII.isFLAT(Inst)) {
126 return getVmemHWEvent(Inst, ST, TII);
127
128 assert(Inst.mayLoadOrStore());
129 HWEventSet S;
130 if (TII.mayAccessVMEMThroughFlat(Inst)) {
131 if (ST.hasWaitXcnt())
132 S.insert(HWEvent::VMEM_GROUP);
133 S.insert(getVmemHWEvent(Inst, ST, TII));
134 }
135
136 if (TII.mayAccessLDSThroughFlat(Inst))
137 S.insert(HWEvent::LDS_ACCESS);
138 return S;
139 }
140
141 if (SIInstrInfo::isVMEM(Inst) &&
143 Inst.getOpcode() == AMDGPU::BUFFER_WBL2)) {
144 // BUFFER_WBL2 is included here because unlike invalidates, has to be
145 // followed "S_WAITCNT vmcnt(0)" is needed after to ensure the writeback has
146 // completed.
147 HWEventSet S = {getVmemHWEvent(Inst, ST, TII)};
148 if (ST.hasWaitXcnt())
149 S.insert(HWEvent::VMEM_GROUP);
150 if (ST.vmemWriteNeedsExpWaitcnt() &&
151 (Inst.mayStore() || SIInstrInfo::isAtomicRet(Inst)))
152 S.insert(HWEvent::VMW_GPR_LOCK);
153
154 return S;
155 }
156
157 if (TII.isSMRD(Inst)) {
158 if (ST.hasWaitXcnt())
159 return {HWEvent::SMEM_GROUP, HWEvent::SMEM_ACCESS};
160 return HWEvent::SMEM_ACCESS;
161 }
162
163 if (SIInstrInfo::isLDSDIR(Inst)) {
164 return HWEvent::EXP_LDS_ACCESS;
165 }
166
167 if (SIInstrInfo::isEXP(Inst)) {
168 unsigned Imm = TII.getNamedOperand(Inst, AMDGPU::OpName::tgt)->getImm();
170 return HWEvent::EXP_PARAM_ACCESS;
172 return HWEvent::EXP_POS_ACCESS;
173 return HWEvent::EXP_GPR_LOCK;
174 }
175
177 return HWEvent::SCC_WRITE;
178 }
179
180 switch (Inst.getOpcode()) {
181 case AMDGPU::S_SENDMSG:
182 case AMDGPU::S_SENDMSG_RTN_B32:
183 case AMDGPU::S_SENDMSG_RTN_B64:
184 case AMDGPU::S_SENDMSGHALT:
185 return HWEvent::SQ_MESSAGE;
186 case AMDGPU::S_MEMTIME:
187 case AMDGPU::S_MEMREALTIME:
188 case AMDGPU::S_GET_BARRIER_STATE_M0:
189 case AMDGPU::S_GET_BARRIER_STATE_IMM:
190 return HWEvent::SMEM_ACCESS;
191 }
192
193 return {};
194}
195
197 bool IsExpertMode) {
198 const SIInstrInfo &TII = *ST.getInstrInfo();
199
200 if (IsExpertMode)
201 return getEventsForImpl(Inst, ST, TII) |
203 return getEventsForImpl(Inst, ST, TII);
204}
205} // namespace AMDGPU
206} // namespace llvm
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
Interface definition for SIInstrInfo.
This file contains some functions that are useful when dealing with strings.
void print(raw_ostream &OS) const
bool contains(const HWEvent &Event) const
void insert(const HWEvent &Event)
A helper class to return the specified delimiter string after the first invocation of operator String...
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
static bool isVMEM(const MachineInstr &MI)
static bool isEXP(const MachineInstr &MI)
static bool mayWriteLDSThroughDMA(const MachineInstr &MI)
static bool isLDSDIR(const MachineInstr &MI)
static bool isVSAMPLE(const MachineInstr &MI)
static bool isAtomicRet(const MachineInstr &MI)
static bool isImage(const MachineInstr &MI)
static bool isGFX12CacheInvOrWBInst(unsigned Opc)
static bool isSBarrierSCCWrite(unsigned Opcode)
static bool isFLAT(const MachineInstr &MI)
static bool isLDSDMA(const MachineInstr &MI)
static bool isAtomicNoRet(const MachineInstr &MI)
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isDPMACCInstruction(unsigned Opc)
static HWEventSet getEventsForImpl(const MachineInstr &Inst, const GCNSubtarget &ST, const SIInstrInfo &TII)
static HWEventSet getExpertSchedulingEventType(const MachineInstr &Inst, const SIInstrInfo &TII)
HWEventSet getEventsFor(const MachineInstr &Inst, const GCNSubtarget &ST, bool IsExpertMode)
static HWEvent getVmemHWEvent(const MachineInstr &Inst, const GCNSubtarget &ST, const SIInstrInfo &TII)
static constexpr StringLiteral toString(HWEvent Event)
HWEvent
TODO: This should be a bitmask from the start instead of having this enum.
bool getMUBUFIsBufferInv(unsigned Opc)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
iota_range< HWEvent > hw_events(HWEvent MaxEvent=HWEvent::NUM_WAIT_EVENTS)
Return an iterator over all events between FIRST_WAIT_EVENT and MaxEvent (exclusive,...
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209