Line data Source code
1 : //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 :
10 : #include "AMDGPUBaseInfo.h"
11 : #include "AMDGPUTargetTransformInfo.h"
12 : #include "AMDGPU.h"
13 : #include "SIDefines.h"
14 : #include "llvm/ADT/StringRef.h"
15 : #include "llvm/ADT/Triple.h"
16 : #include "llvm/BinaryFormat/ELF.h"
17 : #include "llvm/CodeGen/MachineMemOperand.h"
18 : #include "llvm/IR/Attributes.h"
19 : #include "llvm/IR/Constants.h"
20 : #include "llvm/IR/Function.h"
21 : #include "llvm/IR/GlobalValue.h"
22 : #include "llvm/IR/Instruction.h"
23 : #include "llvm/IR/LLVMContext.h"
24 : #include "llvm/IR/Module.h"
25 : #include "llvm/MC/MCContext.h"
26 : #include "llvm/MC/MCInstrDesc.h"
27 : #include "llvm/MC/MCInstrInfo.h"
28 : #include "llvm/MC/MCRegisterInfo.h"
29 : #include "llvm/MC/MCSectionELF.h"
30 : #include "llvm/MC/MCSubtargetInfo.h"
31 : #include "llvm/MC/SubtargetFeature.h"
32 : #include "llvm/Support/Casting.h"
33 : #include "llvm/Support/ErrorHandling.h"
34 : #include "llvm/Support/MathExtras.h"
35 : #include <algorithm>
36 : #include <cassert>
37 : #include <cstdint>
38 : #include <cstring>
39 : #include <utility>
40 :
41 : #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
42 :
43 : #define GET_INSTRINFO_NAMED_OPS
44 : #define GET_INSTRMAP_INFO
45 : #include "AMDGPUGenInstrInfo.inc"
46 : #undef GET_INSTRMAP_INFO
47 : #undef GET_INSTRINFO_NAMED_OPS
48 :
49 : namespace {
50 :
51 : /// \returns Bit mask for given bit \p Shift and bit \p Width.
52 : unsigned getBitMask(unsigned Shift, unsigned Width) {
53 : return ((1 << Width) - 1) << Shift;
54 : }
55 :
56 : /// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
57 : ///
58 : /// \returns Packed \p Dst.
59 : unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
60 125822 : Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width);
61 125822 : Dst |= (Src << Shift) & getBitMask(Shift, Width);
62 : return Dst;
63 : }
64 :
65 : /// Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
66 : ///
67 : /// \returns Unpacked bits.
68 : unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
69 130881 : return (Src & getBitMask(Shift, Width)) >> Shift;
70 : }
71 :
72 : /// \returns Vmcnt bit shift (lower bits).
73 : unsigned getVmcntBitShiftLo() { return 0; }
74 :
75 : /// \returns Vmcnt bit width (lower bits).
76 : unsigned getVmcntBitWidthLo() { return 4; }
77 :
78 : /// \returns Expcnt bit shift.
79 : unsigned getExpcntBitShift() { return 4; }
80 :
81 : /// \returns Expcnt bit width.
82 : unsigned getExpcntBitWidth() { return 3; }
83 :
84 : /// \returns Lgkmcnt bit shift.
85 : unsigned getLgkmcntBitShift() { return 8; }
86 :
87 : /// \returns Lgkmcnt bit width.
88 : unsigned getLgkmcntBitWidth() { return 4; }
89 :
90 : /// \returns Vmcnt bit shift (higher bits).
91 : unsigned getVmcntBitShiftHi() { return 14; }
92 :
93 : /// \returns Vmcnt bit width (higher bits).
94 : unsigned getVmcntBitWidthHi() { return 2; }
95 :
96 : } // end namespace anonymous
97 :
98 : namespace llvm {
99 :
100 : namespace AMDGPU {
101 :
102 : struct MIMGInfo {
103 : uint16_t Opcode;
104 : uint16_t BaseOpcode;
105 : uint8_t MIMGEncoding;
106 : uint8_t VDataDwords;
107 : uint8_t VAddrDwords;
108 : };
109 :
110 : #define GET_MIMGBaseOpcodesTable_IMPL
111 : #define GET_MIMGDimInfoTable_IMPL
112 : #define GET_MIMGInfoTable_IMPL
113 : #define GET_MIMGLZMappingTable_IMPL
114 : #include "AMDGPUGenSearchableTables.inc"
115 :
116 1159 : int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
117 : unsigned VDataDwords, unsigned VAddrDwords) {
118 1159 : const MIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding,
119 : VDataDwords, VAddrDwords);
120 1159 : return Info ? Info->Opcode : -1;
121 : }
122 :
123 193 : int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
124 193 : const MIMGInfo *OrigInfo = getMIMGInfo(Opc);
125 : const MIMGInfo *NewInfo =
126 193 : getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding,
127 193 : NewChannels, OrigInfo->VAddrDwords);
128 193 : return NewInfo ? NewInfo->Opcode : -1;
129 : }
130 :
131 : // Wrapper for Tablegen'd function. enum Subtarget is not defined in any
132 : // header files, so we need to wrap it in a function that takes unsigned
133 : // instead.
134 1292382 : int getMCOpcode(uint16_t Opcode, unsigned Gen) {
135 1292382 : return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
136 : }
137 :
138 : namespace IsaInfo {
139 :
140 1967 : void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) {
141 1967 : auto TargetTriple = STI->getTargetTriple();
142 1967 : auto Version = getIsaVersion(STI->getCPU());
143 :
144 1967 : Stream << TargetTriple.getArchName() << '-'
145 1967 : << TargetTriple.getVendorName() << '-'
146 1967 : << TargetTriple.getOSName() << '-'
147 1967 : << TargetTriple.getEnvironmentName() << '-'
148 1967 : << "gfx"
149 1967 : << Version.Major
150 1967 : << Version.Minor
151 1967 : << Version.Stepping;
152 :
153 1967 : if (hasXNACK(*STI))
154 70 : Stream << "+xnack";
155 :
156 : Stream.flush();
157 1967 : }
158 :
159 488760 : bool hasCodeObjectV3(const MCSubtargetInfo *STI) {
160 488760 : return STI->getFeatureBits().test(FeatureCodeObjectV3);
161 : }
162 :
163 477028 : unsigned getWavefrontSize(const MCSubtargetInfo *STI) {
164 477028 : if (STI->getFeatureBits().test(FeatureWavefrontSize16))
165 : return 16;
166 477028 : if (STI->getFeatureBits().test(FeatureWavefrontSize32))
167 0 : return 32;
168 :
169 : return 64;
170 : }
171 :
172 0 : unsigned getLocalMemorySize(const MCSubtargetInfo *STI) {
173 0 : if (STI->getFeatureBits().test(FeatureLocalMemorySize32768))
174 : return 32768;
175 0 : if (STI->getFeatureBits().test(FeatureLocalMemorySize65536))
176 0 : return 65536;
177 :
178 : return 0;
179 : }
180 :
181 79228 : unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
182 79228 : return 4;
183 : }
184 :
185 202953 : unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
186 : unsigned FlatWorkGroupSize) {
187 202953 : if (!STI->getFeatureBits().test(FeatureGCN))
188 : return 8;
189 198900 : unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
190 198900 : if (N == 1)
191 : return 40;
192 180489 : N = 40 / N;
193 180749 : return std::min(N, 16u);
194 : }
195 :
196 0 : unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI) {
197 0 : return getMaxWavesPerEU() * getEUsPerCU(STI);
198 : }
199 :
200 39614 : unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI,
201 : unsigned FlatWorkGroupSize) {
202 39614 : return getWavesPerWorkGroup(STI, FlatWorkGroupSize);
203 : }
204 :
205 39614 : unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) {
206 39614 : return 1;
207 : }
208 :
209 35998 : unsigned getMaxWavesPerEU() {
210 : // FIXME: Need to take scratch memory into account.
211 35998 : return 10;
212 : }
213 :
214 39614 : unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI,
215 : unsigned FlatWorkGroupSize) {
216 39614 : return alignTo(getMaxWavesPerCU(STI, FlatWorkGroupSize),
217 79228 : getEUsPerCU(STI)) / getEUsPerCU(STI);
218 : }
219 :
220 267721 : unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) {
221 267721 : return 1;
222 : }
223 :
224 267648 : unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) {
225 267648 : return 2048;
226 : }
227 :
228 238514 : unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
229 : unsigned FlatWorkGroupSize) {
230 238514 : return alignTo(FlatWorkGroupSize, getWavefrontSize(STI)) /
231 238514 : getWavefrontSize(STI);
232 : }
233 :
234 345272 : unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) {
235 345272 : IsaVersion Version = getIsaVersion(STI->getCPU());
236 345272 : if (Version.Major >= 8)
237 193871 : return 16;
238 : return 8;
239 : }
240 :
241 35934 : unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) {
242 35934 : return 8;
243 : }
244 :
245 345272 : unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) {
246 345272 : IsaVersion Version = getIsaVersion(STI->getCPU());
247 345272 : if (Version.Major >= 8)
248 193871 : return 800;
249 : return 512;
250 : }
251 :
252 382989 : unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) {
253 382989 : if (STI->getFeatureBits().test(FeatureSGPRInitBug))
254 : return FIXED_NUM_SGPRS_FOR_INIT_BUG;
255 :
256 292304 : IsaVersion Version = getIsaVersion(STI->getCPU());
257 292304 : if (Version.Major >= 8)
258 124325 : return 102;
259 : return 104;
260 : }
261 :
262 18020 : unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
263 : assert(WavesPerEU != 0);
264 :
265 18020 : if (WavesPerEU >= getMaxWavesPerEU())
266 : return 0;
267 :
268 132 : unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
269 132 : if (STI->getFeatureBits().test(FeatureTrapHandler))
270 154 : MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
271 132 : MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1;
272 261 : return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI));
273 : }
274 :
275 345140 : unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
276 : bool Addressable) {
277 : assert(WavesPerEU != 0);
278 :
279 345140 : IsaVersion Version = getIsaVersion(STI->getCPU());
280 345140 : unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
281 345140 : if (Version.Major >= 8 && !Addressable)
282 79342 : AddressableNumSGPRs = 112;
283 345140 : unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
284 345140 : if (STI->getFeatureBits().test(FeatureTrapHandler))
285 98530 : MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
286 345140 : MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI));
287 345140 : return std::min(MaxNumSGPRs, AddressableNumSGPRs);
288 : }
289 :
290 20129 : unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
291 : bool FlatScrUsed, bool XNACKUsed) {
292 : unsigned ExtraSGPRs = 0;
293 20129 : if (VCCUsed)
294 : ExtraSGPRs = 2;
295 :
296 20129 : IsaVersion Version = getIsaVersion(STI->getCPU());
297 20129 : if (Version.Major < 8) {
298 8675 : if (FlatScrUsed)
299 : ExtraSGPRs = 4;
300 : } else {
301 11454 : if (XNACKUsed)
302 : ExtraSGPRs = 4;
303 :
304 11454 : if (FlatScrUsed)
305 : ExtraSGPRs = 6;
306 : }
307 :
308 20129 : return ExtraSGPRs;
309 : }
310 :
311 20122 : unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
312 : bool FlatScrUsed) {
313 20122 : return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
314 20122 : STI->getFeatureBits().test(AMDGPU::FeatureXNACK));
315 : }
316 :
317 17967 : unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
318 35304 : NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(STI));
319 : // SGPRBlocks is actual number of SGPR blocks minus 1.
320 17967 : return NumSGPRs / getSGPREncodingGranule(STI) - 1;
321 : }
322 :
323 202210 : unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI) {
324 202210 : return 4;
325 : }
326 :
327 35934 : unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI) {
328 35934 : return getVGPRAllocGranule(STI);
329 : }
330 :
331 352302 : unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
332 352302 : return 256;
333 : }
334 :
335 186026 : unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) {
336 186026 : return getTotalNumVGPRs(STI);
337 : }
338 :
339 17978 : unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
340 : assert(WavesPerEU != 0);
341 :
342 17978 : if (WavesPerEU >= getMaxWavesPerEU())
343 : return 0;
344 : unsigned MinNumVGPRs =
345 132 : alignDown(getTotalNumVGPRs(STI) / (WavesPerEU + 1),
346 132 : getVGPRAllocGranule(STI)) + 1;
347 132 : return std::min(MinNumVGPRs, getAddressableNumVGPRs(STI));
348 : }
349 :
350 166144 : unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
351 : assert(WavesPerEU != 0);
352 :
353 166144 : unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU,
354 166144 : getVGPRAllocGranule(STI));
355 166144 : unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI);
356 166144 : return std::min(MaxNumVGPRs, AddressableNumVGPRs);
357 : }
358 :
359 17967 : unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs) {
360 33010 : NumVGPRs = alignTo(std::max(1u, NumVGPRs), getVGPREncodingGranule(STI));
361 : // VGPRBlocks is actual number of VGPR blocks minus 1.
362 17967 : return NumVGPRs / getVGPREncodingGranule(STI) - 1;
363 : }
364 :
365 : } // end namespace IsaInfo
366 :
367 2538 : void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
368 : const MCSubtargetInfo *STI) {
369 2538 : IsaVersion Version = getIsaVersion(STI->getCPU());
370 :
371 2538 : memset(&Header, 0, sizeof(Header));
372 :
373 2538 : Header.amd_kernel_code_version_major = 1;
374 2538 : Header.amd_kernel_code_version_minor = 2;
375 2538 : Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
376 2538 : Header.amd_machine_version_major = Version.Major;
377 2538 : Header.amd_machine_version_minor = Version.Minor;
378 2538 : Header.amd_machine_version_stepping = Version.Stepping;
379 2538 : Header.kernel_code_entry_byte_offset = sizeof(Header);
380 : // wavefront_size is specified as a power of 2: 2^6 = 64 threads.
381 2538 : Header.wavefront_size = 6;
382 :
383 : // If the code object does not support indirect functions, then the value must
384 : // be 0xffffffff.
385 2538 : Header.call_convention = -1;
386 :
387 : // These alignment values are specified in powers of two, so alignment =
388 : // 2^n. The minimum alignment is 2^4 = 16.
389 2538 : Header.kernarg_segment_alignment = 4;
390 2538 : Header.group_segment_alignment = 4;
391 2538 : Header.private_segment_alignment = 4;
392 2538 : }
393 :
394 14 : amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor() {
395 : amdhsa::kernel_descriptor_t KD;
396 14 : memset(&KD, 0, sizeof(KD));
397 : AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
398 : amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
399 : amdhsa::FLOAT_DENORM_MODE_FLUSH_NONE);
400 : AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
401 : amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, 1);
402 14 : AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
403 : amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 1);
404 14 : AMDHSA_BITS_SET(KD.compute_pgm_rsrc2,
405 : amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1);
406 14 : return KD;
407 : }
408 :
409 319 : bool isGroupSegment(const GlobalValue *GV) {
410 319 : return GV->getType()->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
411 : }
412 :
413 0 : bool isGlobalSegment(const GlobalValue *GV) {
414 0 : return GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
415 : }
416 :
417 38 : bool isReadOnlySegment(const GlobalValue *GV) {
418 38 : return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
419 38 : GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
420 : }
421 :
422 140 : bool shouldEmitConstantsToTextSection(const Triple &TT) {
423 140 : return TT.getOS() != Triple::AMDHSA;
424 : }
425 :
426 291328 : int getIntegerAttribute(const Function &F, StringRef Name, int Default) {
427 291328 : Attribute A = F.getFnAttribute(Name);
428 : int Result = Default;
429 :
430 291328 : if (A.isStringAttribute()) {
431 1156 : StringRef Str = A.getValueAsString();
432 1156 : if (Str.getAsInteger(0, Result)) {
433 18 : LLVMContext &Ctx = F.getContext();
434 18 : Ctx.emitError("can't parse integer attribute " + Name);
435 : }
436 : }
437 :
438 291328 : return Result;
439 : }
440 :
441 307335 : std::pair<int, int> getIntegerPairAttribute(const Function &F,
442 : StringRef Name,
443 : std::pair<int, int> Default,
444 : bool OnlyFirstRequired) {
445 307335 : Attribute A = F.getFnAttribute(Name);
446 307335 : if (!A.isStringAttribute())
447 306166 : return Default;
448 :
449 1169 : LLVMContext &Ctx = F.getContext();
450 1169 : std::pair<int, int> Ints = Default;
451 2338 : std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
452 2338 : if (Strs.first.trim().getAsInteger(0, Ints.first)) {
453 22 : Ctx.emitError("can't parse first integer attribute " + Name);
454 22 : return Default;
455 : }
456 2294 : if (Strs.second.trim().getAsInteger(0, Ints.second)) {
457 45 : if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
458 33 : Ctx.emitError("can't parse second integer attribute " + Name);
459 33 : return Default;
460 : }
461 : }
462 :
463 1114 : return Ints;
464 : }
465 :
466 61010 : unsigned getVmcntBitMask(const IsaVersion &Version) {
467 : unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1;
468 61010 : if (Version.Major < 9)
469 49929 : return VmcntLo;
470 :
471 : unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo();
472 : return VmcntLo | VmcntHi;
473 : }
474 :
475 63347 : unsigned getExpcntBitMask(const IsaVersion &Version) {
476 63347 : return (1 << getExpcntBitWidth()) - 1;
477 : }
478 :
479 63328 : unsigned getLgkmcntBitMask(const IsaVersion &Version) {
480 63328 : return (1 << getLgkmcntBitWidth()) - 1;
481 : }
482 :
483 39788 : unsigned getWaitcntBitMask(const IsaVersion &Version) {
484 : unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo());
485 : unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
486 : unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
487 : unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt;
488 39788 : if (Version.Major < 9)
489 33127 : return Waitcnt;
490 :
491 : unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi());
492 : return Waitcnt | VmcntHi;
493 : }
494 :
495 43646 : unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
496 : unsigned VmcntLo =
497 : unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
498 43646 : if (Version.Major < 9)
499 : return VmcntLo;
500 :
501 : unsigned VmcntHi =
502 : unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
503 7874 : VmcntHi <<= getVmcntBitWidthLo();
504 7874 : return VmcntLo | VmcntHi;
505 : }
506 :
507 43621 : unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
508 43621 : return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
509 : }
510 :
511 43614 : unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
512 43614 : return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
513 : }
514 :
515 39832 : void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
516 : unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
517 39832 : Vmcnt = decodeVmcnt(Version, Waitcnt);
518 39832 : Expcnt = decodeExpcnt(Version, Waitcnt);
519 39832 : Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
520 39832 : }
521 :
522 39731 : unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
523 : unsigned Vmcnt) {
524 : Waitcnt =
525 : packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
526 39731 : if (Version.Major < 9)
527 : return Waitcnt;
528 :
529 6653 : Vmcnt >>= getVmcntBitWidthLo();
530 6653 : return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
531 : }
532 :
533 39714 : unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
534 : unsigned Expcnt) {
535 39714 : return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
536 : }
537 :
538 39724 : unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
539 : unsigned Lgkmcnt) {
540 39724 : return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
541 : }
542 :
543 39662 : unsigned encodeWaitcnt(const IsaVersion &Version,
544 : unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
545 39662 : unsigned Waitcnt = getWaitcntBitMask(Version);
546 39662 : Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
547 39662 : Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
548 39662 : Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
549 39662 : return Waitcnt;
550 : }
551 :
552 1444 : unsigned getInitialPSInputAddr(const Function &F) {
553 1444 : return getIntegerAttribute(F, "InitialPSInputAddr", 0);
554 : }
555 :
556 577104 : bool isShader(CallingConv::ID cc) {
557 : switch(cc) {
558 : case CallingConv::AMDGPU_VS:
559 : case CallingConv::AMDGPU_LS:
560 : case CallingConv::AMDGPU_HS:
561 : case CallingConv::AMDGPU_ES:
562 : case CallingConv::AMDGPU_GS:
563 : case CallingConv::AMDGPU_PS:
564 : case CallingConv::AMDGPU_CS:
565 : return true;
566 : default:
567 : return false;
568 : }
569 : }
570 :
571 100944 : bool isCompute(CallingConv::ID cc) {
572 100944 : return !isShader(cc) || cc == CallingConv::AMDGPU_CS;
573 : }
574 :
575 163180 : bool isEntryFunctionCC(CallingConv::ID CC) {
576 : switch (CC) {
577 : case CallingConv::AMDGPU_KERNEL:
578 : case CallingConv::SPIR_KERNEL:
579 : case CallingConv::AMDGPU_VS:
580 : case CallingConv::AMDGPU_GS:
581 : case CallingConv::AMDGPU_PS:
582 : case CallingConv::AMDGPU_CS:
583 : case CallingConv::AMDGPU_ES:
584 : case CallingConv::AMDGPU_HS:
585 : case CallingConv::AMDGPU_LS:
586 : return true;
587 : default:
588 : return false;
589 : }
590 : }
591 :
592 2203 : bool hasXNACK(const MCSubtargetInfo &STI) {
593 2203 : return STI.getFeatureBits()[AMDGPU::FeatureXNACK];
594 : }
595 :
596 0 : bool hasMIMG_R128(const MCSubtargetInfo &STI) {
597 0 : return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128];
598 : }
599 :
600 4226 : bool hasPackedD16(const MCSubtargetInfo &STI) {
601 4226 : return !STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem];
602 : }
603 :
604 354338 : bool isSI(const MCSubtargetInfo &STI) {
605 354338 : return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands];
606 : }
607 :
608 456553 : bool isCI(const MCSubtargetInfo &STI) {
609 456553 : return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands];
610 : }
611 :
612 71045 : bool isVI(const MCSubtargetInfo &STI) {
613 71045 : return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
614 : }
615 :
616 63550 : bool isGFX9(const MCSubtargetInfo &STI) {
617 63550 : return STI.getFeatureBits()[AMDGPU::FeatureGFX9];
618 : }
619 :
620 70296 : bool isGCN3Encoding(const MCSubtargetInfo &STI) {
621 70296 : return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding];
622 : }
623 :
624 263879 : bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) {
625 263879 : const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
626 263879 : const unsigned FirstSubReg = TRI->getSubReg(Reg, 1);
627 527758 : return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
628 263879 : Reg == AMDGPU::SCC;
629 : }
630 :
631 1379 : bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) {
632 64623 : for (MCRegAliasIterator R(Reg0, TRI, true); R.isValid(); ++R) {
633 63278 : if (*R == Reg1) return true;
634 : }
635 1345 : return false;
636 : }
637 :
638 : #define MAP_REG2REG \
639 : using namespace AMDGPU; \
640 : switch(Reg) { \
641 : default: return Reg; \
642 : CASE_CI_VI(FLAT_SCR) \
643 : CASE_CI_VI(FLAT_SCR_LO) \
644 : CASE_CI_VI(FLAT_SCR_HI) \
645 : CASE_VI_GFX9(TTMP0) \
646 : CASE_VI_GFX9(TTMP1) \
647 : CASE_VI_GFX9(TTMP2) \
648 : CASE_VI_GFX9(TTMP3) \
649 : CASE_VI_GFX9(TTMP4) \
650 : CASE_VI_GFX9(TTMP5) \
651 : CASE_VI_GFX9(TTMP6) \
652 : CASE_VI_GFX9(TTMP7) \
653 : CASE_VI_GFX9(TTMP8) \
654 : CASE_VI_GFX9(TTMP9) \
655 : CASE_VI_GFX9(TTMP10) \
656 : CASE_VI_GFX9(TTMP11) \
657 : CASE_VI_GFX9(TTMP12) \
658 : CASE_VI_GFX9(TTMP13) \
659 : CASE_VI_GFX9(TTMP14) \
660 : CASE_VI_GFX9(TTMP15) \
661 : CASE_VI_GFX9(TTMP0_TTMP1) \
662 : CASE_VI_GFX9(TTMP2_TTMP3) \
663 : CASE_VI_GFX9(TTMP4_TTMP5) \
664 : CASE_VI_GFX9(TTMP6_TTMP7) \
665 : CASE_VI_GFX9(TTMP8_TTMP9) \
666 : CASE_VI_GFX9(TTMP10_TTMP11) \
667 : CASE_VI_GFX9(TTMP12_TTMP13) \
668 : CASE_VI_GFX9(TTMP14_TTMP15) \
669 : CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3) \
670 : CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7) \
671 : CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11) \
672 : CASE_VI_GFX9(TTMP12_TTMP13_TTMP14_TTMP15) \
673 : CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
674 : CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
675 : CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
676 : CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
677 : }
678 :
679 : #define CASE_CI_VI(node) \
680 : assert(!isSI(STI)); \
681 : case node: return isCI(STI) ? node##_ci : node##_vi;
682 :
683 : #define CASE_VI_GFX9(node) \
684 : case node: return isGFX9(STI) ? node##_gfx9 : node##_vi;
685 :
686 1453274 : unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
687 1453274 : if (STI.getTargetTriple().getArch() == Triple::r600)
688 : return Reg;
689 1247674 : MAP_REG2REG
690 : }
691 :
692 : #undef CASE_CI_VI
693 : #undef CASE_VI_GFX9
694 :
695 : #define CASE_CI_VI(node) case node##_ci: case node##_vi: return node;
696 : #define CASE_VI_GFX9(node) case node##_vi: case node##_gfx9: return node;
697 :
698 316414 : unsigned mc2PseudoReg(unsigned Reg) {
699 316414 : MAP_REG2REG
700 : }
701 :
702 : #undef CASE_CI_VI
703 : #undef CASE_VI_GFX9
704 : #undef MAP_REG2REG
705 :
706 936929 : bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
707 : assert(OpNo < Desc.NumOperands);
708 936929 : unsigned OpType = Desc.OpInfo[OpNo].OperandType;
709 936929 : return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
710 936929 : OpType <= AMDGPU::OPERAND_SRC_LAST;
711 : }
712 :
713 62 : bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
714 : assert(OpNo < Desc.NumOperands);
715 62 : unsigned OpType = Desc.OpInfo[OpNo].OperandType;
716 : switch (OpType) {
717 : case AMDGPU::OPERAND_REG_IMM_FP32:
718 : case AMDGPU::OPERAND_REG_IMM_FP64:
719 : case AMDGPU::OPERAND_REG_IMM_FP16:
720 : case AMDGPU::OPERAND_REG_INLINE_C_FP32:
721 : case AMDGPU::OPERAND_REG_INLINE_C_FP64:
722 : case AMDGPU::OPERAND_REG_INLINE_C_FP16:
723 : case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
724 : return true;
725 : default:
726 : return false;
727 : }
728 : }
729 :
730 0 : bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
731 : assert(OpNo < Desc.NumOperands);
732 0 : unsigned OpType = Desc.OpInfo[OpNo].OperandType;
733 0 : return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
734 0 : OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST;
735 : }
736 :
737 : // Avoid using MCRegisterClass::getSize, since that function will go away
738 : // (move from MC* level to Target* level). Return size in bits.
739 52820 : unsigned getRegBitWidth(unsigned RCID) {
740 52820 : switch (RCID) {
741 : case AMDGPU::SGPR_32RegClassID:
742 : case AMDGPU::VGPR_32RegClassID:
743 : case AMDGPU::VS_32RegClassID:
744 : case AMDGPU::SReg_32RegClassID:
745 : case AMDGPU::SReg_32_XM0RegClassID:
746 : return 32;
747 12271 : case AMDGPU::SGPR_64RegClassID:
748 : case AMDGPU::VS_64RegClassID:
749 : case AMDGPU::SReg_64RegClassID:
750 : case AMDGPU::VReg_64RegClassID:
751 12271 : return 64;
752 338 : case AMDGPU::VReg_96RegClassID:
753 338 : return 96;
754 21223 : case AMDGPU::SGPR_128RegClassID:
755 : case AMDGPU::SReg_128RegClassID:
756 : case AMDGPU::VReg_128RegClassID:
757 21223 : return 128;
758 71 : case AMDGPU::SReg_256RegClassID:
759 : case AMDGPU::VReg_256RegClassID:
760 71 : return 256;
761 22 : case AMDGPU::SReg_512RegClassID:
762 : case AMDGPU::VReg_512RegClassID:
763 22 : return 512;
764 0 : default:
765 0 : llvm_unreachable("Unexpected register class");
766 : }
767 : }
768 :
769 10790 : unsigned getRegBitWidth(const MCRegisterClass &RC) {
770 21580 : return getRegBitWidth(RC.getID());
771 : }
772 :
773 4204 : unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
774 : unsigned OpNo) {
775 : assert(OpNo < Desc.NumOperands);
776 4204 : unsigned RCID = Desc.OpInfo[OpNo].RegClass;
777 8408 : return getRegBitWidth(MRI->getRegClass(RCID)) / 8;
778 : }
779 :
780 78346 : bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
781 78346 : if (Literal >= -16 && Literal <= 64)
782 : return true;
783 :
784 : uint64_t Val = static_cast<uint64_t>(Literal);
785 38216 : return (Val == DoubleToBits(0.0)) ||
786 23452 : (Val == DoubleToBits(1.0)) ||
787 22092 : (Val == DoubleToBits(-1.0)) ||
788 16891 : (Val == DoubleToBits(0.5)) ||
789 16693 : (Val == DoubleToBits(-0.5)) ||
790 13186 : (Val == DoubleToBits(2.0)) ||
791 12988 : (Val == DoubleToBits(-2.0)) ||
792 11329 : (Val == DoubleToBits(4.0)) ||
793 6279 : (Val == DoubleToBits(-4.0)) ||
794 6279 : (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
795 : }
796 :
797 4505919 : bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
798 4505919 : if (Literal >= -16 && Literal <= 64)
799 : return true;
800 :
801 : // The actual type of the operand does not seem to matter as long
802 : // as the bits match one of the inline immediate values. For example:
803 : //
804 : // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
805 : // so it is a legal inline immediate.
806 : //
807 : // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
808 : // floating-point, so it is a legal inline immediate.
809 :
810 : uint32_t Val = static_cast<uint32_t>(Literal);
811 399064 : return (Val == FloatToBits(0.0f)) ||
812 342230 : (Val == FloatToBits(1.0f)) ||
813 335353 : (Val == FloatToBits(-1.0f)) ||
814 318610 : (Val == FloatToBits(0.5f)) ||
815 317269 : (Val == FloatToBits(-0.5f)) ||
816 291534 : (Val == FloatToBits(2.0f)) ||
817 286510 : (Val == FloatToBits(-2.0f)) ||
818 267177 : (Val == FloatToBits(4.0f)) ||
819 253784 : (Val == FloatToBits(-4.0f)) ||
820 253784 : (Val == 0x3e22f983 && HasInv2Pi);
821 : }
822 :
823 214124 : bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
824 214124 : if (!HasInv2Pi)
825 : return false;
826 :
827 213916 : if (Literal >= -16 && Literal <= 64)
828 : return true;
829 :
830 : uint16_t Val = static_cast<uint16_t>(Literal);
831 58288 : return Val == 0x3C00 || // 1.0
832 29144 : Val == 0xBC00 || // -1.0
833 44220 : Val == 0x3800 || // 0.5
834 22110 : Val == 0xB800 || // -0.5
835 35990 : Val == 0x4000 || // 2.0
836 17995 : Val == 0xC000 || // -2.0
837 24382 : Val == 0x4400 || // 4.0
838 41335 : Val == 0xC400 || // -4.0
839 : Val == 0x3118; // 1/2pi
840 : }
841 :
842 6777 : bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) {
843 : assert(HasInv2Pi);
844 :
845 6777 : int16_t Lo16 = static_cast<int16_t>(Literal);
846 6777 : int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
847 6777 : return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi);
848 : }
849 :
850 2 : bool isArgPassedInSGPR(const Argument *A) {
851 2 : const Function *F = A->getParent();
852 :
853 : // Arguments to compute shaders are never a source of divergence.
854 : CallingConv::ID CC = F->getCallingConv();
855 : switch (CC) {
856 : case CallingConv::AMDGPU_KERNEL:
857 : case CallingConv::SPIR_KERNEL:
858 : return true;
859 0 : case CallingConv::AMDGPU_VS:
860 : case CallingConv::AMDGPU_LS:
861 : case CallingConv::AMDGPU_HS:
862 : case CallingConv::AMDGPU_ES:
863 : case CallingConv::AMDGPU_GS:
864 : case CallingConv::AMDGPU_PS:
865 : case CallingConv::AMDGPU_CS:
866 : // For non-compute shaders, SGPR inputs are marked with either inreg or byval.
867 : // Everything else is in VGPRs.
868 0 : return F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) ||
869 0 : F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal);
870 : default:
871 : // TODO: Should calls support inreg for SGPR inputs?
872 : return false;
873 : }
874 : }
875 :
876 47053 : int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
877 47053 : if (isGCN3Encoding(ST))
878 : return ByteOffset;
879 21407 : return ByteOffset >> 2;
880 : }
881 :
882 23243 : bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
883 23243 : int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset);
884 23243 : return isGCN3Encoding(ST) ?
885 23243 : isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset);
886 : }
887 :
888 : // Given Imm, split it into the values to put into the SOffset and ImmOffset
889 : // fields in an MUBUF instruction. Return false if it is not possible (due to a
890 : // hardware bug needing a workaround).
891 : //
892 : // The required alignment ensures that individual address components remain
893 : // aligned if they are aligned to begin with. It also ensures that additional
894 : // offsets within the given alignment can be added to the resulting ImmOffset.
895 400 : bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
896 : const GCNSubtarget *Subtarget, uint32_t Align) {
897 400 : const uint32_t MaxImm = alignDown(4095, Align);
898 : uint32_t Overflow = 0;
899 :
900 400 : if (Imm > MaxImm) {
901 28 : if (Imm <= MaxImm + 64) {
902 : // Use an SOffset inline constant for 4..64
903 10 : Overflow = Imm - MaxImm;
904 : Imm = MaxImm;
905 : } else {
906 : // Try to keep the same value in SOffset for adjacent loads, so that
907 : // the corresponding register contents can be re-used.
908 : //
909 : // Load values with all low-bits (except for alignment bits) set into
910 : // SOffset, so that a larger range of values can be covered using
911 : // s_movk_i32.
912 : //
913 : // Atomic operations fail to work correctly when individual address
914 : // components are unaligned, even if their sum is aligned.
915 18 : uint32_t High = (Imm + Align) & ~4095;
916 18 : uint32_t Low = (Imm + Align) & 4095;
917 : Imm = Low;
918 18 : Overflow = High - Align;
919 : }
920 : }
921 :
922 : // There is a hardware bug in SI and CI which prevents address clamping in
923 : // MUBUF instructions from working correctly with SOffsets. The immediate
924 : // offset is unaffected.
925 28 : if (Overflow > 0 &&
926 28 : Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
927 : return false;
928 :
929 386 : ImmOffset = Imm;
930 386 : SOffset = Overflow;
931 386 : return true;
932 : }
933 :
934 : namespace {
935 :
936 : struct SourceOfDivergence {
937 : unsigned Intr;
938 : };
939 : const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
940 :
941 : #define GET_SourcesOfDivergence_IMPL
942 : #include "AMDGPUGenSearchableTables.inc"
943 :
944 : } // end anonymous namespace
945 :
946 156676 : bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
947 156676 : return lookupSourceOfDivergence(IntrID);
948 : }
949 : } // namespace AMDGPU
950 : } // namespace llvm
|