Line data Source code
1 : //===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 :
10 : #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
11 : #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
12 :
13 : #include "AMDGPU.h"
14 : #include "AMDKernelCodeT.h"
15 : #include "SIDefines.h"
16 : #include "llvm/ADT/StringRef.h"
17 : #include "llvm/IR/CallingConv.h"
18 : #include "llvm/MC/MCInstrDesc.h"
19 : #include "llvm/Support/AMDHSAKernelDescriptor.h"
20 : #include "llvm/Support/Compiler.h"
21 : #include "llvm/Support/ErrorHandling.h"
22 : #include "llvm/Support/TargetParser.h"
23 : #include <cstdint>
24 : #include <string>
25 : #include <utility>
26 :
27 : namespace llvm {
28 :
29 : class Argument;
30 : class AMDGPUSubtarget;
31 : class FeatureBitset;
32 : class Function;
33 : class GCNSubtarget;
34 : class GlobalValue;
35 : class MCContext;
36 : class MCRegisterClass;
37 : class MCRegisterInfo;
38 : class MCSection;
39 : class MCSubtargetInfo;
40 : class MachineMemOperand;
41 : class Triple;
42 :
43 : namespace AMDGPU {
44 :
45 : #define GET_MIMGBaseOpcode_DECL
46 : #define GET_MIMGDim_DECL
47 : #define GET_MIMGEncoding_DECL
48 : #define GET_MIMGLZMapping_DECL
49 : #include "AMDGPUGenSearchableTables.inc"
50 :
51 : namespace IsaInfo {
52 :
53 : enum {
54 : // The closed Vulkan driver sets 96, which limits the wave count to 8 but
55 : // doesn't spill SGPRs as much as when 80 is set.
56 : FIXED_NUM_SGPRS_FOR_INIT_BUG = 96,
57 : TRAP_NUM_SGPRS = 16
58 : };
59 :
60 : /// Streams isa version string for given subtarget \p STI into \p Stream.
61 : void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream);
62 :
63 : /// \returns True if given subtarget \p STI supports code object version 3,
64 : /// false otherwise.
65 : bool hasCodeObjectV3(const MCSubtargetInfo *STI);
66 :
67 : /// \returns Wavefront size for given subtarget \p STI.
68 : unsigned getWavefrontSize(const MCSubtargetInfo *STI);
69 :
70 : /// \returns Local memory size in bytes for given subtarget \p STI.
71 : unsigned getLocalMemorySize(const MCSubtargetInfo *STI);
72 :
73 : /// \returns Number of execution units per compute unit for given subtarget \p
74 : /// STI.
75 : unsigned getEUsPerCU(const MCSubtargetInfo *STI);
76 :
77 : /// \returns Maximum number of work groups per compute unit for given subtarget
78 : /// \p STI and limited by given \p FlatWorkGroupSize.
79 : unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
80 : unsigned FlatWorkGroupSize);
81 :
82 : /// \returns Maximum number of waves per compute unit for given subtarget \p
83 : /// STI without any kind of limitation.
84 : unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI);
85 :
86 : /// \returns Maximum number of waves per compute unit for given subtarget \p
87 : /// STI and limited by given \p FlatWorkGroupSize.
88 : unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI,
89 : unsigned FlatWorkGroupSize);
90 :
91 : /// \returns Minimum number of waves per execution unit for given subtarget \p
92 : /// STI.
93 : unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
94 :
95 : /// \returns Maximum number of waves per execution unit for given subtarget \p
96 : /// STI without any kind of limitation.
97 : unsigned getMaxWavesPerEU();
98 :
99 : /// \returns Maximum number of waves per execution unit for given subtarget \p
100 : /// STI and limited by given \p FlatWorkGroupSize.
101 : unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI,
102 : unsigned FlatWorkGroupSize);
103 :
104 : /// \returns Minimum flat work group size for given subtarget \p STI.
105 : unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
106 :
107 : /// \returns Maximum flat work group size for given subtarget \p STI.
108 : unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI);
109 :
110 : /// \returns Number of waves per work group for given subtarget \p STI and
111 : /// limited by given \p FlatWorkGroupSize.
112 : unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
113 : unsigned FlatWorkGroupSize);
114 :
115 : /// \returns SGPR allocation granularity for given subtarget \p STI.
116 : unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI);
117 :
118 : /// \returns SGPR encoding granularity for given subtarget \p STI.
119 : unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI);
120 :
121 : /// \returns Total number of SGPRs for given subtarget \p STI.
122 : unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI);
123 :
124 : /// \returns Addressable number of SGPRs for given subtarget \p STI.
125 : unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI);
126 :
127 : /// \returns Minimum number of SGPRs that meets the given number of waves per
128 : /// execution unit requirement for given subtarget \p STI.
129 : unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
130 :
131 : /// \returns Maximum number of SGPRs that meets the given number of waves per
132 : /// execution unit requirement for given subtarget \p STI.
133 : unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
134 : bool Addressable);
135 :
136 : /// \returns Number of extra SGPRs implicitly required by given subtarget \p
137 : /// STI when the given special registers are used.
138 : unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
139 : bool FlatScrUsed, bool XNACKUsed);
140 :
141 : /// \returns Number of extra SGPRs implicitly required by given subtarget \p
142 : /// STI when the given special registers are used. XNACK is inferred from
143 : /// \p STI.
144 : unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
145 : bool FlatScrUsed);
146 :
147 : /// \returns Number of SGPR blocks needed for given subtarget \p STI when
148 : /// \p NumSGPRs are used. \p NumSGPRs should already include any special
149 : /// register counts.
150 : unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
151 :
152 : /// \returns VGPR allocation granularity for given subtarget \p STI.
153 : unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI);
154 :
155 : /// \returns VGPR encoding granularity for given subtarget \p STI.
156 : unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI);
157 :
158 : /// \returns Total number of VGPRs for given subtarget \p STI.
159 : unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
160 :
161 : /// \returns Addressable number of VGPRs for given subtarget \p STI.
162 : unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI);
163 :
164 : /// \returns Minimum number of VGPRs that meets given number of waves per
165 : /// execution unit requirement for given subtarget \p STI.
166 : unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
167 :
168 : /// \returns Maximum number of VGPRs that meets given number of waves per
169 : /// execution unit requirement for given subtarget \p STI.
170 : unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
171 :
172 : /// \returns Number of VGPR blocks needed for given subtarget \p STI when
173 : /// \p NumVGPRs are used.
174 : unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
175 :
176 : } // end namespace IsaInfo
177 :
178 : LLVM_READONLY
179 : int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
180 :
181 : struct MIMGBaseOpcodeInfo {
182 : MIMGBaseOpcode BaseOpcode;
183 : bool Store;
184 : bool Atomic;
185 : bool AtomicX2;
186 : bool Sampler;
187 :
188 : uint8_t NumExtraArgs;
189 : bool Gradients;
190 : bool Coordinates;
191 : bool LodOrClampOrMip;
192 : bool HasD16;
193 : };
194 :
195 : LLVM_READONLY
196 : const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode);
197 :
198 : struct MIMGDimInfo {
199 : MIMGDim Dim;
200 : uint8_t NumCoords;
201 : uint8_t NumGradients;
202 : bool DA;
203 : };
204 :
205 : LLVM_READONLY
206 : const MIMGDimInfo *getMIMGDimInfo(unsigned Dim);
207 :
208 : struct MIMGLZMappingInfo {
209 : MIMGBaseOpcode L;
210 : MIMGBaseOpcode LZ;
211 : };
212 :
213 : LLVM_READONLY
214 : const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);
215 :
216 : LLVM_READONLY
217 : int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
218 : unsigned VDataDwords, unsigned VAddrDwords);
219 :
220 : LLVM_READONLY
221 : int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
222 :
223 : LLVM_READONLY
224 : int getMCOpcode(uint16_t Opcode, unsigned Gen);
225 :
226 : void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
227 : const MCSubtargetInfo *STI);
228 :
229 : amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor();
230 :
231 : bool isGroupSegment(const GlobalValue *GV);
232 : bool isGlobalSegment(const GlobalValue *GV);
233 : bool isReadOnlySegment(const GlobalValue *GV);
234 :
235 : /// \returns True if constants should be emitted to .text section for given
236 : /// target triple \p TT, false otherwise.
237 : bool shouldEmitConstantsToTextSection(const Triple &TT);
238 :
239 : /// \returns Integer value requested using \p F's \p Name attribute.
240 : ///
241 : /// \returns \p Default if attribute is not present.
242 : ///
243 : /// \returns \p Default and emits error if requested value cannot be converted
244 : /// to integer.
245 : int getIntegerAttribute(const Function &F, StringRef Name, int Default);
246 :
247 : /// \returns A pair of integer values requested using \p F's \p Name attribute
248 : /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
249 : /// is false).
250 : ///
251 : /// \returns \p Default if attribute is not present.
252 : ///
253 : /// \returns \p Default and emits error if one of the requested values cannot be
254 : /// converted to integer, or \p OnlyFirstRequired is false and "second" value is
255 : /// not present.
256 : std::pair<int, int> getIntegerPairAttribute(const Function &F,
257 : StringRef Name,
258 : std::pair<int, int> Default,
259 : bool OnlyFirstRequired = false);
260 :
261 : /// \returns Vmcnt bit mask for given isa \p Version.
262 : unsigned getVmcntBitMask(const IsaVersion &Version);
263 :
264 : /// \returns Expcnt bit mask for given isa \p Version.
265 : unsigned getExpcntBitMask(const IsaVersion &Version);
266 :
267 : /// \returns Lgkmcnt bit mask for given isa \p Version.
268 : unsigned getLgkmcntBitMask(const IsaVersion &Version);
269 :
270 : /// \returns Waitcnt bit mask for given isa \p Version.
271 : unsigned getWaitcntBitMask(const IsaVersion &Version);
272 :
273 : /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
274 : unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
275 :
276 : /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
277 : unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
278 :
279 : /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
280 : unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
281 :
282 : /// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
283 : /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
284 : /// \p Lgkmcnt respectively.
285 : ///
286 : /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
287 : /// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9 only)
288 : /// \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14] (gfx9+ only)
289 : /// \p Expcnt = \p Waitcnt[6:4]
290 : /// \p Lgkmcnt = \p Waitcnt[11:8]
291 : void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
292 : unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
293 :
294 : /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
295 : unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
296 : unsigned Vmcnt);
297 :
298 : /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
299 : unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
300 : unsigned Expcnt);
301 :
302 : /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
303 : unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
304 : unsigned Lgkmcnt);
305 :
306 : /// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
307 : /// \p Version.
308 : ///
309 : /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
310 : /// Waitcnt[3:0] = \p Vmcnt (pre-gfx9 only)
311 : /// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9+ only)
312 : /// Waitcnt[6:4] = \p Expcnt
313 : /// Waitcnt[11:8] = \p Lgkmcnt
314 : /// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9+ only)
315 : ///
316 : /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
317 : /// isa \p Version.
318 : unsigned encodeWaitcnt(const IsaVersion &Version,
319 : unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
320 :
321 : unsigned getInitialPSInputAddr(const Function &F);
322 :
323 : LLVM_READNONE
324 : bool isShader(CallingConv::ID CC);
325 :
326 : LLVM_READNONE
327 : bool isCompute(CallingConv::ID CC);
328 :
329 : LLVM_READNONE
330 : bool isEntryFunctionCC(CallingConv::ID CC);
331 :
332 : // FIXME: Remove this when calling conventions cleaned up
333 : LLVM_READNONE
334 : inline bool isKernel(CallingConv::ID CC) {
335 39346 : switch (CC) {
336 : case CallingConv::AMDGPU_KERNEL:
337 : case CallingConv::SPIR_KERNEL:
338 : return true;
339 3496 : default:
340 : return false;
341 : }
342 : }
343 :
344 : bool hasXNACK(const MCSubtargetInfo &STI);
345 : bool hasMIMG_R128(const MCSubtargetInfo &STI);
346 : bool hasPackedD16(const MCSubtargetInfo &STI);
347 :
348 : bool isSI(const MCSubtargetInfo &STI);
349 : bool isCI(const MCSubtargetInfo &STI);
350 : bool isVI(const MCSubtargetInfo &STI);
351 : bool isGFX9(const MCSubtargetInfo &STI);
352 :
353 : /// Is Reg - scalar register
354 : bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI);
355 :
356 : /// Is there any intersection between registers
357 : bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI);
358 :
359 : /// If \p Reg is a pseudo reg, return the correct hardware register given
360 : /// \p STI otherwise return \p Reg.
361 : unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI);
362 :
363 : /// Convert hardware register \p Reg to a pseudo register
364 : LLVM_READNONE
365 : unsigned mc2PseudoReg(unsigned Reg);
366 :
367 : /// Can this operand also contain immediate values?
368 : bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo);
369 :
370 : /// Is this floating-point operand?
371 : bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);
372 :
373 : /// Does this opearnd support only inlinable literals?
374 : bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);
375 :
376 : /// Get the size in bits of a register from the register class \p RC.
377 : unsigned getRegBitWidth(unsigned RCID);
378 :
379 : /// Get the size in bits of a register from the register class \p RC.
380 : unsigned getRegBitWidth(const MCRegisterClass &RC);
381 :
382 : /// Get size of register operand
383 : unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
384 : unsigned OpNo);
385 :
386 : LLVM_READNONE
387 : inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
388 23221 : switch (OpInfo.OperandType) {
389 : case AMDGPU::OPERAND_REG_IMM_INT32:
390 : case AMDGPU::OPERAND_REG_IMM_FP32:
391 : case AMDGPU::OPERAND_REG_INLINE_C_INT32:
392 : case AMDGPU::OPERAND_REG_INLINE_C_FP32:
393 : return 4;
394 :
395 : case AMDGPU::OPERAND_REG_IMM_INT64:
396 : case AMDGPU::OPERAND_REG_IMM_FP64:
397 : case AMDGPU::OPERAND_REG_INLINE_C_INT64:
398 : case AMDGPU::OPERAND_REG_INLINE_C_FP64:
399 : return 8;
400 :
401 : case AMDGPU::OPERAND_REG_IMM_INT16:
402 : case AMDGPU::OPERAND_REG_IMM_FP16:
403 : case AMDGPU::OPERAND_REG_INLINE_C_INT16:
404 : case AMDGPU::OPERAND_REG_INLINE_C_FP16:
405 : case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
406 : case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
407 : return 2;
408 :
409 0 : default:
410 0 : llvm_unreachable("unhandled operand type");
411 : }
412 : }
413 :
414 : LLVM_READNONE
415 : inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) {
416 23221 : return getOperandSize(Desc.OpInfo[OpNo]);
417 : }
418 :
419 : /// Is this literal inlinable
420 : LLVM_READNONE
421 : bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
422 :
423 : LLVM_READNONE
424 : bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
425 :
426 : LLVM_READNONE
427 : bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);
428 :
429 : LLVM_READNONE
430 : bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi);
431 :
432 : bool isArgPassedInSGPR(const Argument *Arg);
433 :
434 : /// \returns The encoding that will be used for \p ByteOffset in the SMRD
435 : /// offset field.
436 : int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
437 :
438 : /// \returns true if this offset is small enough to fit in the SMRD
439 : /// offset field. \p ByteOffset should be the offset in bytes and
440 : /// not the encoded offset.
441 : bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
442 :
443 : bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
444 : const GCNSubtarget *Subtarget, uint32_t Align = 4);
445 :
446 : /// \returns true if the intrinsic is divergent
447 : bool isIntrinsicSourceOfDivergence(unsigned IntrID);
448 :
449 : } // end namespace AMDGPU
450 : } // end namespace llvm
451 :
452 : #endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
|