LLVM 23.0.0git
SIProgramInfo.cpp
Go to the documentation of this file.
1//===-- SIProgramInfo.cpp ----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10///
11/// The SIProgramInfo tracks resource usage and hardware flags for kernels and
12/// entry functions.
13//
14//===----------------------------------------------------------------------===//
15//
16
17#include "SIProgramInfo.h"
18#include "GCNSubtarget.h"
19#include "SIDefines.h"
21#include "llvm/MC/MCExpr.h"
22
23using namespace llvm;
24
26 MCContext &Ctx = MF.getContext();
27
28 const MCExpr *ZeroExpr = MCConstantExpr::create(0, Ctx);
29
30 CodeSizeInBytes.reset();
31
32 VGPRBlocks = ZeroExpr;
33 SGPRBlocks = ZeroExpr;
34 Priority = 0;
35 FloatMode = 0;
36 Priv = 0;
37 DX10Clamp = 0;
38 DebugMode = 0;
39 IEEEMode = 0;
40 WgpMode = 0;
41 MemOrdered = 0;
42 FwdProgress = 0;
43 RrWgMode = 0;
44 ScratchSize = ZeroExpr;
45
46 LDSBlocks = 0;
47 ScratchBlocks = ZeroExpr;
48
49 ScratchEnable = ZeroExpr;
50 UserSGPR = 0;
52 TGIdXEnable = 0;
53 TGIdYEnable = 0;
54 TGIdZEnable = 0;
55 TGSizeEnable = 0;
57 EXCPEnMSB = 0;
58 LdsSize = 0;
59 EXCPEnable = 0;
60
61 ComputePGMRSrc3 = ZeroExpr;
62
63 NumVGPR = ZeroExpr;
64 NumArchVGPR = ZeroExpr;
65 NumAccVGPR = ZeroExpr;
66 AccumOffset = ZeroExpr;
67 TgSplit = 0;
68 NumSGPR = ZeroExpr;
69 SGPRSpill = 0;
70 VGPRSpill = 0;
71 LDSSize = 0;
72 FlatUsed = ZeroExpr;
73
74 NumSGPRsForWavesPerEU = ZeroExpr;
75 NumVGPRsForWavesPerEU = ZeroExpr;
76 NamedBarCnt = ZeroExpr;
77 Occupancy = ZeroExpr;
78 DynamicCallStack = ZeroExpr;
79 VCCUsed = ZeroExpr;
80}
81
83 const GCNSubtarget &ST) {
86 S_00B848_PRIV(ProgInfo.Priv) |
88 S_00B848_WGP_MODE(ProgInfo.WgpMode) |
91
92 if (ST.hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode)) {
94 Reg |= S_00B848_IEEE_MODE(ProgInfo.IEEEMode);
95 }
96
97 if (ST.hasRrWGMode())
98 Reg |= S_00B848_RR_WG_MODE(ProgInfo.RrWgMode);
99
100 return Reg;
101}
102
103static uint64_t getPGMRSrc1Reg(const SIProgramInfo &ProgInfo,
104 CallingConv::ID CC, const GCNSubtarget &ST) {
107 S_00B848_PRIV(ProgInfo.Priv) |
109
110 if (ST.hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode)) {
111 Reg |= S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp);
112 Reg |= S_00B848_IEEE_MODE(ProgInfo.IEEEMode);
113 }
114
115 if (ST.hasRrWGMode())
116 Reg |= S_00B848_RR_WG_MODE(ProgInfo.RrWgMode);
117
118 switch (CC) {
121 break;
124 break;
126 Reg |= S_00B228_WGP_MODE(ProgInfo.WgpMode) |
128 break;
130 Reg |= S_00B428_WGP_MODE(ProgInfo.WgpMode) |
132 break;
133 default:
134 break;
135 }
136 return Reg;
137}
138
140 const SIProgramInfo &ProgInfo) {
141 uint64_t MaxNumerUserSGRPs = AMDGPU::getMaxNumUserSGPRs(ST);
142 uint64_t Reg = 0;
143 if (MaxNumerUserSGRPs == 32) {
145 } else if (MaxNumerUserSGRPs == 16) {
146 Reg = (S_00B84C_USER_SGPR(ProgInfo.UserSGPR) |
148 } else {
149 llvm_unreachable("max Number of User SGPRs are either 32 or 16");
150 }
151
152 Reg |= S_00B84C_TGID_X_EN(ProgInfo.TGIdXEnable) |
158 S_00B84C_LDS_SIZE(ProgInfo.LdsSize) |
160
161 return Reg;
162}
163
164static const MCExpr *MaskShift(const MCExpr *Val, uint32_t Mask, uint32_t Shift,
165 MCContext &Ctx) {
166 if (Mask) {
167 const MCExpr *MaskExpr = MCConstantExpr::create(Mask, Ctx);
168 Val = MCBinaryExpr::createAnd(Val, MaskExpr, Ctx);
169 }
170 if (Shift) {
171 const MCExpr *ShiftExpr = MCConstantExpr::create(Shift, Ctx);
172 Val = MCBinaryExpr::createShl(Val, ShiftExpr, Ctx);
173 }
174 return Val;
175}
176
178 MCContext &Ctx) const {
179 uint64_t Reg = getComputePGMRSrc1Reg(*this, ST);
180 const MCExpr *RegExpr = MCConstantExpr::create(Reg, Ctx);
181 const MCExpr *Res = MCBinaryExpr::createOr(
182 MaskShift(VGPRBlocks, /*Mask=*/0x3F, /*Shift=*/0, Ctx),
183 MaskShift(SGPRBlocks, /*Mask=*/0xF, /*Shift=*/6, Ctx), Ctx);
184 return MCBinaryExpr::createOr(RegExpr, Res, Ctx);
185}
186
188 const GCNSubtarget &ST,
189 MCContext &Ctx) const {
190 if (AMDGPU::isCompute(CC)) {
191 return getComputePGMRSrc1(ST, Ctx);
192 }
193
194 uint64_t Reg = getPGMRSrc1Reg(*this, CC, ST);
195 const MCExpr *RegExpr = MCConstantExpr::create(Reg, Ctx);
196 const MCExpr *Res = MCBinaryExpr::createOr(
197 MaskShift(VGPRBlocks, /*Mask=*/0x3F, /*Shift=*/0, Ctx),
198 MaskShift(SGPRBlocks, /*Mask=*/0xF, /*Shift=*/6, Ctx), Ctx);
199 return MCBinaryExpr::createOr(RegExpr, Res, Ctx);
200}
201
203 MCContext &Ctx) const {
204 uint64_t Reg = getComputePGMRSrc2Reg(ST, *this);
205 const MCExpr *RegExpr = MCConstantExpr::create(Reg, Ctx);
206 return MCBinaryExpr::createOr(ScratchEnable, RegExpr, Ctx);
207}
208
210 const GCNSubtarget &ST,
211 MCContext &Ctx) const {
212 if (AMDGPU::isCompute(CC))
213 return getComputePGMRSrc2(ST, Ctx);
214
215 return MCConstantExpr::create(0, Ctx);
216}
217
219 bool IsLowerBound) {
220 if (!IsLowerBound && CodeSizeInBytes.has_value())
221 return *CodeSizeInBytes;
222
223 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
224 const SIInstrInfo *TII = STM.getInstrInfo();
225
226 uint64_t CodeSize = 0;
227
228 for (const MachineBasicBlock &MBB : MF) {
229 // The amount of padding to align code can be both underestimated and
230 // overestimated. In case of inline asm used getInstSizeInBytes() will
231 // return a maximum size of a single instruction, where the real size may
232 // differ. At this point CodeSize may be already off.
233 if (!IsLowerBound)
234 CodeSize = alignTo(CodeSize, MBB.getAlignment());
235
236 for (const MachineInstr &MI : MBB) {
237 // TODO: CodeSize should account for multiple functions.
238
239 if (MI.isMetaInstruction())
240 continue;
241
242 // We cannot properly estimate inline asm size. It can be as small as zero
243 // if that is just a comment.
244 if (IsLowerBound && MI.isInlineAsm())
245 continue;
246
247 CodeSize += TII->getInstSizeInBytes(MI);
248 }
249 }
250
252 return CodeSize;
253}
MachineBasicBlock & MBB
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Register Reg
#define S_00B84C_EXCP_EN(x)
Definition SIDefines.h:1210
#define S_00B428_MEM_ORDERED(x)
Definition SIDefines.h:1171
#define S_00B028_MEM_ORDERED(x)
Definition SIDefines.h:1147
#define S_00B84C_TGID_Z_EN(x)
Definition SIDefines.h:1193
#define S_00B228_WGP_MODE(x)
Definition SIDefines.h:1159
#define S_00B84C_USER_SGPR_GFX1250(x)
Definition SIDefines.h:1214
#define S_00B848_MEM_ORDERED(x)
Definition SIDefines.h:1252
#define S_00B228_MEM_ORDERED(x)
Definition SIDefines.h:1162
#define S_00B848_RR_WG_MODE(x)
Definition SIDefines.h:1240
#define S_00B84C_TGID_X_EN(x)
Definition SIDefines.h:1187
#define S_00B848_DEBUG_MODE(x)
Definition SIDefines.h:1243
#define S_00B428_WGP_MODE(x)
Definition SIDefines.h:1168
#define S_00B848_PRIV(x)
Definition SIDefines.h:1234
#define S_00B84C_TG_SIZE_EN(x)
Definition SIDefines.h:1196
#define S_00B84C_TIDIG_COMP_CNT(x)
Definition SIDefines.h:1199
#define S_00B84C_LDS_SIZE(x)
Definition SIDefines.h:1207
#define S_00B84C_USER_SGPR(x)
Definition SIDefines.h:1181
#define S_00B84C_TRAP_HANDLER(x)
Definition SIDefines.h:1184
#define S_00B84C_TGID_Y_EN(x)
Definition SIDefines.h:1190
#define S_00B128_MEM_ORDERED(x)
Definition SIDefines.h:1154
#define S_00B848_WGP_MODE(x)
Definition SIDefines.h:1249
#define S_00B84C_EXCP_EN_MSB(x)
Definition SIDefines.h:1203
#define S_00B848_DX10_CLAMP(x)
Definition SIDefines.h:1237
#define S_00B848_PRIORITY(x)
Definition SIDefines.h:1228
#define S_00B848_IEEE_MODE(x)
Definition SIDefines.h:1246
#define S_00B848_FWD_PROGRESS(x)
Definition SIDefines.h:1255
#define S_00B848_FLOAT_MODE(x)
Definition SIDefines.h:1231
static uint64_t getPGMRSrc1Reg(const SIProgramInfo &ProgInfo, CallingConv::ID CC, const GCNSubtarget &ST)
static uint64_t getComputePGMRSrc1Reg(const SIProgramInfo &ProgInfo, const GCNSubtarget &ST)
static uint64_t getComputePGMRSrc2Reg(const GCNSubtarget &ST, const SIProgramInfo &ProgInfo)
static const MCExpr * MaskShift(const MCExpr *Val, uint32_t Mask, uint32_t Shift, MCContext &Ctx)
Defines struct to track resource usage and hardware flags for kernels and entry functions.
const SIInstrInfo * getInstrInfo() const override
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:348
static const MCBinaryExpr * createOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:408
static const MCBinaryExpr * createShl(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:413
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212
Context object for machine code objects.
Definition MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MCContext & getContext() const
Representation of each machine instruction.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
LLVM_READNONE constexpr bool isCompute(CallingConv::ID CC)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
This is an optimization pass for GlobalISel generic memory operations.
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
Track resource usage for kernels / entry functions.
const MCExpr * NumSGPR
const MCExpr * NumArchVGPR
uint64_t getFunctionCodeSize(const MachineFunction &MF, bool IsLowerBound=false)
const MCExpr * getPGMRSrc2(CallingConv::ID CC, const GCNSubtarget &ST, MCContext &Ctx) const
const MCExpr * VGPRBlocks
const MCExpr * ScratchBlocks
const MCExpr * ComputePGMRSrc3
const MCExpr * getComputePGMRSrc1(const GCNSubtarget &ST, MCContext &Ctx) const
Compute the value of the ComputePGMRsrc1 register.
const MCExpr * VCCUsed
const MCExpr * FlatUsed
const MCExpr * NamedBarCnt
const MCExpr * ScratchEnable
const MCExpr * AccumOffset
const MCExpr * NumAccVGPR
const MCExpr * DynamicCallStack
const MCExpr * SGPRBlocks
const MCExpr * NumVGPRsForWavesPerEU
std::optional< uint64_t > CodeSizeInBytes
const MCExpr * NumVGPR
const MCExpr * getPGMRSrc1(CallingConv::ID CC, const GCNSubtarget &ST, MCContext &Ctx) const
const MCExpr * Occupancy
const MCExpr * ScratchSize
const MCExpr * NumSGPRsForWavesPerEU
const MCExpr * getComputePGMRSrc2(const GCNSubtarget &ST, MCContext &Ctx) const
Compute the value of the ComputePGMRsrc2 register.
void reset(const MachineFunction &MF)