LLVM 23.0.0git
AMDGPUWaitcntUtils.h
Go to the documentation of this file.
1//===- AMDGPUWaitcntUtils.h -------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUWAITCNTUTILS_H
10#define LLVM_LIB_TARGET_AMDGPU_AMDGPUWAITCNTUTILS_H
11
12#include "llvm/ADT/Sequence.h"
14#include "llvm/Support/Debug.h"
17
18namespace llvm {
19
20namespace AMDGPU {
21
23 LOAD_CNT = 0, // VMcnt prior to gfx12.
24 DS_CNT, // LKGMcnt prior to gfx12.
26 STORE_CNT, // VScnt in gfx10/gfx11.
29 BVH_CNT, // gfx12+ only.
30 KM_CNT, // gfx12+ only.
31 X_CNT, // gfx1250.
32 ASYNC_CNT, // gfx1250.
33 TENSOR_CNT, // gfx1250.
35 VA_VDST = NUM_EXTENDED_INST_CNTS, // gfx12+ expert mode only.
36 VM_VSRC, // gfx12+ expert mode only.
39};
40
42
43// Return an iterator over all counters between LOAD_CNT (the first counter)
44// and \c MaxCounter (exclusive, default value yields an enumeration over
45// all counters).
48
49/// Represents the hardware counter limits for different wait count types.
51 unsigned LoadcntMax; // Corresponds to Vmcnt prior to gfx12.
52 unsigned ExpcntMax;
53 unsigned DscntMax; // Corresponds to LGKMcnt prior to gfx12.
54 unsigned StorecntMax; // Corresponds to VScnt in gfx10/gfx11.
55 unsigned SamplecntMax; // gfx12+ only.
56 unsigned BvhcntMax; // gfx12+ only.
57 unsigned KmcntMax; // gfx12+ only.
58 unsigned XcntMax; // gfx1250.
59 unsigned AsyncMax; // gfx1250.
60 unsigned VaVdstMax; // gfx12+ expert mode only.
61 unsigned VmVsrcMax; // gfx12+ expert mode only.
62
63 HardwareLimits() = default;
64
65 /// Initializes hardware limits from ISA version.
67
68 unsigned get(InstCounterType T) const;
69};
70
71} // namespace AMDGPU
72
73template <> struct enum_iteration_traits<AMDGPU::InstCounterType> {
74 static constexpr bool is_iterable = true;
75};
76
77namespace AMDGPU {
78
79/// Represents the counter values to wait for in an s_waitcnt instruction.
80///
81/// Large values (including the maximum possible integer) can be used to
82/// represent "don't care" waits.
83class Waitcnt {
84 std::array<unsigned, NUM_INST_CNTS> Cnt;
85
86public:
87 unsigned get(InstCounterType T) const { return Cnt[T]; }
88 void set(InstCounterType T, unsigned Val) { Cnt[T] = Val; }
89
90 Waitcnt() { fill(Cnt, ~0u); }
91 // Pre-gfx12 constructor.
92 Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
93 : Waitcnt() {
94 Cnt[LOAD_CNT] = VmCnt;
95 Cnt[EXP_CNT] = ExpCnt;
96 Cnt[DS_CNT] = LgkmCnt;
97 Cnt[STORE_CNT] = VsCnt;
98 }
99
100 // gfx12+ constructor.
101 Waitcnt(unsigned LoadCnt, unsigned ExpCnt, unsigned DsCnt, unsigned StoreCnt,
102 unsigned SampleCnt, unsigned BvhCnt, unsigned KmCnt, unsigned XCnt,
103 unsigned AsyncCnt, unsigned TensorCnt, unsigned VaVdst,
104 unsigned VmVsrc)
105 : Waitcnt() {
106 Cnt[LOAD_CNT] = LoadCnt;
107 Cnt[DS_CNT] = DsCnt;
108 Cnt[EXP_CNT] = ExpCnt;
109 Cnt[STORE_CNT] = StoreCnt;
110 Cnt[SAMPLE_CNT] = SampleCnt;
111 Cnt[BVH_CNT] = BvhCnt;
112 Cnt[KM_CNT] = KmCnt;
113 Cnt[X_CNT] = XCnt;
114 Cnt[ASYNC_CNT] = AsyncCnt;
115 Cnt[TENSOR_CNT] = TensorCnt;
116 Cnt[VA_VDST] = VaVdst;
117 Cnt[VM_VSRC] = VmVsrc;
118 }
119
120 bool hasWait() const {
121 return any_of(Cnt, [](unsigned Val) { return Val != ~0u; });
122 }
123
126 if (T == STORE_CNT)
127 continue;
128 if (Cnt[T] != ~0u)
129 return true;
130 }
131 return false;
132 }
133
135 set(T, std::min(get(T), Count));
136 }
137
139
140 bool hasWaitStoreCnt() const { return Cnt[STORE_CNT] != ~0u; }
141
142 bool hasWaitDepctr() const {
143 return Cnt[VA_VDST] != ~0u || Cnt[VM_VSRC] != ~0u;
144 }
145
147 // Does the right thing provided self and Other are either both pre-gfx12
148 // or both gfx12+.
151 Wait.Cnt[T] = std::min(Cnt[T], Other.Cnt[T]);
152 return Wait;
153 }
154
155 void print(raw_ostream &OS) const {
156 ListSeparator LS;
158 OS << LS << getInstCounterName(T) << ": " << Cnt[T];
159 if (LS.unused())
160 OS << "none";
161 OS << '\n';
162 }
163
164#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
165 LLVM_DUMP_METHOD void dump() const;
166#endif
167
169 Wait.print(OS);
170 return OS;
171 }
172};
173
174Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded);
175
176unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
177
178// The following are only meaningful on targets that support
179// S_WAIT_LOADCNT_DSCNT and S_WAIT_STORECNT_DSCNT.
180
181/// \returns Decoded Waitcnt structure from given \p LoadcntDscnt for given
182/// isa \p Version.
183Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt);
184
185/// \returns Decoded Waitcnt structure from given \p StorecntDscnt for given
186/// isa \p Version.
187Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt);
188
189/// \returns \p Loadcnt and \p Dscnt components of \p Decoded encoded as an
190/// immediate that can be used with S_WAIT_LOADCNT_DSCNT for given isa
191/// \p Version.
192unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded);
193
194/// \returns \p Storecnt and \p Dscnt components of \p Decoded encoded as an
195/// immediate that can be used with S_WAIT_STORECNT_DSCNT for given isa
196/// \p Version.
197unsigned encodeStorecntDscnt(const IsaVersion &Version, const Waitcnt &Decoded);
198
199/// Determine if \p MI is a gfx12+ single-counter S_WAIT_*CNT instruction,
200/// and if so, which counter it is waiting on.
201std::optional<AMDGPU::InstCounterType> counterTypeForInstr(unsigned Opcode);
202
203} // namespace AMDGPU
204
205} // namespace llvm
206
207#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUWAITCNTUTILS_H
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:663
#define T
Provides some synthesis utilities to produce sequences of values.
This file contains some functions that are useful when dealing with strings.
static const uint32_t IV[8]
Definition blake3_impl.h:83
Represents the counter values to wait for in an s_waitcnt instruction.
void print(raw_ostream &OS) const
void add(AMDGPU::InstCounterType T, unsigned Count)
Waitcnt(unsigned LoadCnt, unsigned ExpCnt, unsigned DsCnt, unsigned StoreCnt, unsigned SampleCnt, unsigned BvhCnt, unsigned KmCnt, unsigned XCnt, unsigned AsyncCnt, unsigned TensorCnt, unsigned VaVdst, unsigned VmVsrc)
LLVM_DUMP_METHOD void dump() const
Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
Waitcnt combined(const Waitcnt &Other) const
unsigned get(InstCounterType T) const
friend raw_ostream & operator<<(raw_ostream &OS, const AMDGPU::Waitcnt &Wait)
void clear(AMDGPU::InstCounterType T)
void set(InstCounterType T, unsigned Val)
A helper class to return the specified delimiter string after the first invocation of operator String...
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:888
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:54
iota_range< InstCounterType > inst_counter_types(InstCounterType MaxCounter)
unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded)
Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded)
unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded)
Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt)
std::optional< AMDGPU::InstCounterType > counterTypeForInstr(unsigned Opcode)
Determine if MI is a gfx12+ single-counter S_WAIT_*CNT instruction, and if so, which counter it is wa...
StringLiteral getInstCounterName(InstCounterType T)
Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt)
unsigned encodeStorecntDscnt(const IsaVersion &Version, const Waitcnt &Decoded)
This is an optimization pass for GlobalISel generic memory operations.
void fill(R &&Range, T &&Value)
Provide wrappers to std::fill which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1759
@ Wait
Definition Threading.h:60
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:334
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
unsigned get(InstCounterType T) const
Instruction set architecture version.