LLVM 18.0.0git
AMDGPUBaseInfo.cpp
Go to the documentation of this file.
1//===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDGPUBaseInfo.h"
10#include "AMDGPU.h"
11#include "AMDGPUAsmUtils.h"
12#include "AMDKernelCodeT.h"
15#include "llvm/IR/Attributes.h"
16#include "llvm/IR/Constants.h"
17#include "llvm/IR/Function.h"
18#include "llvm/IR/GlobalValue.h"
19#include "llvm/IR/IntrinsicsAMDGPU.h"
20#include "llvm/IR/IntrinsicsR600.h"
21#include "llvm/IR/LLVMContext.h"
22#include "llvm/MC/MCInstrInfo.h"
28#include <optional>
29
30#define GET_INSTRINFO_NAMED_OPS
31#define GET_INSTRMAP_INFO
32#include "AMDGPUGenInstrInfo.inc"
33
35 AmdhsaCodeObjectVersion("amdhsa-code-object-version", llvm::cl::Hidden,
36 llvm::cl::desc("AMDHSA Code Object Version"),
38
39namespace {
40
41/// \returns Bit mask for given bit \p Shift and bit \p Width.
42unsigned getBitMask(unsigned Shift, unsigned Width) {
43 return ((1 << Width) - 1) << Shift;
44}
45
46/// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
47///
48/// \returns Packed \p Dst.
49unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
50 unsigned Mask = getBitMask(Shift, Width);
51 return ((Src << Shift) & Mask) | (Dst & ~Mask);
52}
53
54/// Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
55///
56/// \returns Unpacked bits.
57unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
58 return (Src & getBitMask(Shift, Width)) >> Shift;
59}
60
61/// \returns Vmcnt bit shift (lower bits).
62unsigned getVmcntBitShiftLo(unsigned VersionMajor) {
63 return VersionMajor >= 11 ? 10 : 0;
64}
65
66/// \returns Vmcnt bit width (lower bits).
67unsigned getVmcntBitWidthLo(unsigned VersionMajor) {
68 return VersionMajor >= 11 ? 6 : 4;
69}
70
71/// \returns Expcnt bit shift.
72unsigned getExpcntBitShift(unsigned VersionMajor) {
73 return VersionMajor >= 11 ? 0 : 4;
74}
75
76/// \returns Expcnt bit width.
77unsigned getExpcntBitWidth(unsigned VersionMajor) { return 3; }
78
79/// \returns Lgkmcnt bit shift.
80unsigned getLgkmcntBitShift(unsigned VersionMajor) {
81 return VersionMajor >= 11 ? 4 : 8;
82}
83
84/// \returns Lgkmcnt bit width.
85unsigned getLgkmcntBitWidth(unsigned VersionMajor) {
86 return VersionMajor >= 10 ? 6 : 4;
87}
88
89/// \returns Vmcnt bit shift (higher bits).
90unsigned getVmcntBitShiftHi(unsigned VersionMajor) { return 14; }
91
92/// \returns Vmcnt bit width (higher bits).
93unsigned getVmcntBitWidthHi(unsigned VersionMajor) {
94 return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0;
95}
96
97/// \returns VmVsrc bit width
98inline unsigned getVmVsrcBitWidth() { return 3; }
99
100/// \returns VmVsrc bit shift
101inline unsigned getVmVsrcBitShift() { return 2; }
102
103/// \returns VaVdst bit width
104inline unsigned getVaVdstBitWidth() { return 4; }
105
106/// \returns VaVdst bit shift
107inline unsigned getVaVdstBitShift() { return 12; }
108
109/// \returns SaSdst bit width
110inline unsigned getSaSdstBitWidth() { return 1; }
111
112/// \returns SaSdst bit shift
113inline unsigned getSaSdstBitShift() { return 0; }
114
115} // end namespace anonymous
116
117namespace llvm {
118
119namespace AMDGPU {
120
121/// \returns True if \p STI is AMDHSA.
122bool isHsaAbi(const MCSubtargetInfo &STI) {
123 return STI.getTargetTriple().getOS() == Triple::AMDHSA;
124}
125
126std::optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI) {
127 if (STI && STI->getTargetTriple().getOS() != Triple::AMDHSA)
128 return std::nullopt;
129
130 switch (AmdhsaCodeObjectVersion) {
131 case 3:
133 case 4:
135 case 5:
137 default:
138 report_fatal_error(Twine("Unsupported AMDHSA Code Object Version ") +
140 }
141}
142
144 if (std::optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI))
145 return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V3;
146 return false;
147}
148
150 if (std::optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI))
151 return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V4;
152 return false;
153}
154
156 if (std::optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI))
157 return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V5;
158 return false;
159}
160
163}
164
165unsigned getCodeObjectVersion(const Module &M) {
166 if (auto Ver = mdconst::extract_or_null<ConstantInt>(
167 M.getModuleFlag("amdgpu_code_object_version"))) {
168 return (unsigned)Ver->getZExtValue() / 100;
169 }
170
171 // Default code object version.
172 return AMDHSA_COV4;
173}
174
175unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) {
176 switch (CodeObjectVersion) {
177 case AMDHSA_COV3:
178 case AMDHSA_COV4:
179 return 48;
180 case AMDHSA_COV5:
181 default:
183 }
184}
185
186
187// FIXME: All such magic numbers about the ABI should be in a
188// central TD file.
189unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) {
190 switch (CodeObjectVersion) {
191 case AMDHSA_COV3:
192 case AMDHSA_COV4:
193 return 24;
194 case AMDHSA_COV5:
195 default:
197 }
198}
199
200unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) {
201 switch (CodeObjectVersion) {
202 case AMDHSA_COV3:
203 case AMDHSA_COV4:
204 return 32;
205 case AMDHSA_COV5:
206 default:
208 }
209}
210
211unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion) {
212 switch (CodeObjectVersion) {
213 case AMDHSA_COV3:
214 case AMDHSA_COV4:
215 return 40;
216 case AMDHSA_COV5:
217 default:
219 }
220}
221
222#define GET_MIMGBaseOpcodesTable_IMPL
223#define GET_MIMGDimInfoTable_IMPL
224#define GET_MIMGInfoTable_IMPL
225#define GET_MIMGLZMappingTable_IMPL
226#define GET_MIMGMIPMappingTable_IMPL
227#define GET_MIMGBiasMappingTable_IMPL
228#define GET_MIMGOffsetMappingTable_IMPL
229#define GET_MIMGG16MappingTable_IMPL
230#define GET_MAIInstInfoTable_IMPL
231#include "AMDGPUGenSearchableTables.inc"
232
233int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
234 unsigned VDataDwords, unsigned VAddrDwords) {
235 const MIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding,
236 VDataDwords, VAddrDwords);
237 return Info ? Info->Opcode : -1;
238}
239
241 const MIMGInfo *Info = getMIMGInfo(Opc);
242 return Info ? getMIMGBaseOpcodeInfo(Info->BaseOpcode) : nullptr;
243}
244
245int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
246 const MIMGInfo *OrigInfo = getMIMGInfo(Opc);
247 const MIMGInfo *NewInfo =
248 getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding,
249 NewChannels, OrigInfo->VAddrDwords);
250 return NewInfo ? NewInfo->Opcode : -1;
251}
252
253unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
254 const MIMGDimInfo *Dim, bool IsA16,
255 bool IsG16Supported) {
256 unsigned AddrWords = BaseOpcode->NumExtraArgs;
257 unsigned AddrComponents = (BaseOpcode->Coordinates ? Dim->NumCoords : 0) +
258 (BaseOpcode->LodOrClampOrMip ? 1 : 0);
259 if (IsA16)
260 AddrWords += divideCeil(AddrComponents, 2);
261 else
262 AddrWords += AddrComponents;
263
264 // Note: For subtargets that support A16 but not G16, enabling A16 also
265 // enables 16 bit gradients.
266 // For subtargets that support A16 (operand) and G16 (done with a different
267 // instruction encoding), they are independent.
268
269 if (BaseOpcode->Gradients) {
270 if ((IsA16 && !IsG16Supported) || BaseOpcode->G16)
271 // There are two gradients per coordinate, we pack them separately.
272 // For the 3d case,
273 // we get (dy/du, dx/du) (-, dz/du) (dy/dv, dx/dv) (-, dz/dv)
274 AddrWords += alignTo<2>(Dim->NumGradients / 2);
275 else
276 AddrWords += Dim->NumGradients;
277 }
278 return AddrWords;
279}
280
281struct MUBUFInfo {
284 uint8_t elements;
289};
290
291struct MTBUFInfo {
294 uint8_t elements;
298};
299
300struct SMInfo {
303};
304
305struct VOPInfo {
308};
309
312};
313
318};
319
320struct VOPDInfo {
324};
325
329};
330
331#define GET_MTBUFInfoTable_DECL
332#define GET_MTBUFInfoTable_IMPL
333#define GET_MUBUFInfoTable_DECL
334#define GET_MUBUFInfoTable_IMPL
335#define GET_SMInfoTable_DECL
336#define GET_SMInfoTable_IMPL
337#define GET_VOP1InfoTable_DECL
338#define GET_VOP1InfoTable_IMPL
339#define GET_VOP2InfoTable_DECL
340#define GET_VOP2InfoTable_IMPL
341#define GET_VOP3InfoTable_DECL
342#define GET_VOP3InfoTable_IMPL
343#define GET_VOPC64DPPTable_DECL
344#define GET_VOPC64DPPTable_IMPL
345#define GET_VOPC64DPP8Table_DECL
346#define GET_VOPC64DPP8Table_IMPL
347#define GET_VOPDComponentTable_DECL
348#define GET_VOPDComponentTable_IMPL
349#define GET_VOPDPairs_DECL
350#define GET_VOPDPairs_IMPL
351#define GET_VOPTrue16Table_DECL
352#define GET_VOPTrue16Table_IMPL
353#define GET_WMMAOpcode2AddrMappingTable_DECL
354#define GET_WMMAOpcode2AddrMappingTable_IMPL
355#define GET_WMMAOpcode3AddrMappingTable_DECL
356#define GET_WMMAOpcode3AddrMappingTable_IMPL
357#include "AMDGPUGenSearchableTables.inc"
358
359int getMTBUFBaseOpcode(unsigned Opc) {
360 const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opc);
361 return Info ? Info->BaseOpcode : -1;
362}
363
364int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) {
365 const MTBUFInfo *Info = getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
366 return Info ? Info->Opcode : -1;
367}
368
369int getMTBUFElements(unsigned Opc) {
370 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
371 return Info ? Info->elements : 0;
372}
373
374bool getMTBUFHasVAddr(unsigned Opc) {
375 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
376 return Info ? Info->has_vaddr : false;
377}
378
379bool getMTBUFHasSrsrc(unsigned Opc) {
380 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
381 return Info ? Info->has_srsrc : false;
382}
383
384bool getMTBUFHasSoffset(unsigned Opc) {
385 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
386 return Info ? Info->has_soffset : false;
387}
388
389int getMUBUFBaseOpcode(unsigned Opc) {
390 const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc);
391 return Info ? Info->BaseOpcode : -1;
392}
393
394int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements) {
395 const MUBUFInfo *Info = getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
396 return Info ? Info->Opcode : -1;
397}
398
399int getMUBUFElements(unsigned Opc) {
400 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
401 return Info ? Info->elements : 0;
402}
403
404bool getMUBUFHasVAddr(unsigned Opc) {
405 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
406 return Info ? Info->has_vaddr : false;
407}
408
409bool getMUBUFHasSrsrc(unsigned Opc) {
410 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
411 return Info ? Info->has_srsrc : false;
412}
413
414bool getMUBUFHasSoffset(unsigned Opc) {
415 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
416 return Info ? Info->has_soffset : false;
417}
418
419bool getMUBUFIsBufferInv(unsigned Opc) {
420 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
421 return Info ? Info->IsBufferInv : false;
422}
423
424bool getSMEMIsBuffer(unsigned Opc) {
425 const SMInfo *Info = getSMEMOpcodeHelper(Opc);
426 return Info ? Info->IsBuffer : false;
427}
428
429bool getVOP1IsSingle(unsigned Opc) {
430 const VOPInfo *Info = getVOP1OpcodeHelper(Opc);
431 return Info ? Info->IsSingle : false;
432}
433
434bool getVOP2IsSingle(unsigned Opc) {
435 const VOPInfo *Info = getVOP2OpcodeHelper(Opc);
436 return Info ? Info->IsSingle : false;
437}
438
439bool getVOP3IsSingle(unsigned Opc) {
440 const VOPInfo *Info = getVOP3OpcodeHelper(Opc);
441 return Info ? Info->IsSingle : false;
442}
443
444bool isVOPC64DPP(unsigned Opc) {
445 return isVOPC64DPPOpcodeHelper(Opc) || isVOPC64DPP8OpcodeHelper(Opc);
446}
447
448bool getMAIIsDGEMM(unsigned Opc) {
449 const MAIInstInfo *Info = getMAIInstInfoHelper(Opc);
450 return Info ? Info->is_dgemm : false;
451}
452
453bool getMAIIsGFX940XDL(unsigned Opc) {
454 const MAIInstInfo *Info = getMAIInstInfoHelper(Opc);
455 return Info ? Info->is_gfx940_xdl : false;
456}
457
458CanBeVOPD getCanBeVOPD(unsigned Opc) {
459 const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc);
460 if (Info)
461 return {Info->CanBeVOPDX, true};
462 else
463 return {false, false};
464}
465
466unsigned getVOPDOpcode(unsigned Opc) {
467 const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc);
468 return Info ? Info->VOPDOp : ~0u;
469}
470
471bool isVOPD(unsigned Opc) {
472 return AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0X);
473}
474
475bool isMAC(unsigned Opc) {
476 return Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
477 Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
478 Opc == AMDGPU::V_MAC_F32_e64_vi ||
479 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
480 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
481 Opc == AMDGPU::V_MAC_F16_e64_vi ||
482 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
483 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
484 Opc == AMDGPU::V_FMAC_F32_e64_gfx11 ||
485 Opc == AMDGPU::V_FMAC_F32_e64_vi ||
486 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
487 Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
488 Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||
489 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11 ||
490 Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi ||
491 Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi ||
492 Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi ||
493 Opc == AMDGPU::V_DOT8C_I32_I4_e64_vi;
494}
495
496bool isPermlane16(unsigned Opc) {
497 return Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
498 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10 ||
499 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx11 ||
500 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx11;
501}
502
503bool isGenericAtomic(unsigned Opc) {
504 return Opc == AMDGPU::G_AMDGPU_ATOMIC_FMIN ||
505 Opc == AMDGPU::G_AMDGPU_ATOMIC_FMAX ||
506 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP ||
507 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD ||
508 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB ||
509 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN ||
510 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN ||
511 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX ||
512 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX ||
513 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND ||
514 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR ||
515 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR ||
516 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC ||
517 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC ||
518 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD ||
519 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN ||
520 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX ||
521 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP ||
522 Opc == AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG;
523}
524
525bool isTrue16Inst(unsigned Opc) {
526 const VOPTrue16Info *Info = getTrue16OpcodeHelper(Opc);
527 return Info ? Info->IsTrue16 : false;
528}
529
530unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) {
531 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom2AddrOpcode(Opc);
532 return Info ? Info->Opcode3Addr : ~0u;
533}
534
535unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc) {
536 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom3AddrOpcode(Opc);
537 return Info ? Info->Opcode2Addr : ~0u;
538}
539
540// Wrapper for Tablegen'd function. enum Subtarget is not defined in any
541// header files, so we need to wrap it in a function that takes unsigned
542// instead.
543int getMCOpcode(uint16_t Opcode, unsigned Gen) {
544 return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
545}
546
547int getVOPDFull(unsigned OpX, unsigned OpY) {
548 const VOPDInfo *Info = getVOPDInfoFromComponentOpcodes(OpX, OpY);
549 return Info ? Info->Opcode : -1;
550}
551
552std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode) {
553 const VOPDInfo *Info = getVOPDOpcodeHelper(VOPDOpcode);
554 assert(Info);
555 auto OpX = getVOPDBaseFromComponent(Info->OpX);
556 auto OpY = getVOPDBaseFromComponent(Info->OpY);
557 assert(OpX && OpY);
558 return {OpX->BaseVOP, OpY->BaseVOP};
559}
560
561namespace VOPD {
562
565
568 auto TiedIdx = OpDesc.getOperandConstraint(Component::SRC2, MCOI::TIED_TO);
569 assert(TiedIdx == -1 || TiedIdx == Component::DST);
570 HasSrc2Acc = TiedIdx != -1;
571
572 SrcOperandsNum = OpDesc.getNumOperands() - OpDesc.getNumDefs();
573 assert(SrcOperandsNum <= Component::MAX_SRC_NUM);
574
575 auto OperandsNum = OpDesc.getNumOperands();
576 unsigned CompOprIdx;
577 for (CompOprIdx = Component::SRC1; CompOprIdx < OperandsNum; ++CompOprIdx) {
578 if (OpDesc.operands()[CompOprIdx].OperandType == AMDGPU::OPERAND_KIMM32) {
579 MandatoryLiteralIdx = CompOprIdx;
580 break;
581 }
582 }
583}
584
585unsigned ComponentInfo::getIndexInParsedOperands(unsigned CompOprIdx) const {
586 assert(CompOprIdx < Component::MAX_OPR_NUM);
587
588 if (CompOprIdx == Component::DST)
590
591 auto CompSrcIdx = CompOprIdx - Component::DST_NUM;
592 if (CompSrcIdx < getCompParsedSrcOperandsNum())
593 return getIndexOfSrcInParsedOperands(CompSrcIdx);
594
595 // The specified operand does not exist.
596 return 0;
597}
598
600 std::function<unsigned(unsigned, unsigned)> GetRegIdx) const {
601
602 auto OpXRegs = getRegIndices(ComponentIndex::X, GetRegIdx);
603 auto OpYRegs = getRegIndices(ComponentIndex::Y, GetRegIdx);
604
605 unsigned CompOprIdx;
606 for (CompOprIdx = 0; CompOprIdx < Component::MAX_OPR_NUM; ++CompOprIdx) {
607 unsigned BanksMasks = VOPD_VGPR_BANK_MASKS[CompOprIdx];
608 if (OpXRegs[CompOprIdx] && OpYRegs[CompOprIdx] &&
609 ((OpXRegs[CompOprIdx] & BanksMasks) ==
610 (OpYRegs[CompOprIdx] & BanksMasks)))
611 return CompOprIdx;
612 }
613
614 return {};
615}
616
617// Return an array of VGPR registers [DST,SRC0,SRC1,SRC2] used
618// by the specified component. If an operand is unused
619// or is not a VGPR, the corresponding value is 0.
620//
621// GetRegIdx(Component, MCOperandIdx) must return a VGPR register index
622// for the specified component and MC operand. The callback must return 0
623// if the operand is not a register or not a VGPR.
624InstInfo::RegIndices InstInfo::getRegIndices(
625 unsigned CompIdx,
626 std::function<unsigned(unsigned, unsigned)> GetRegIdx) const {
627 assert(CompIdx < COMPONENTS_NUM);
628
629 const auto &Comp = CompInfo[CompIdx];
631
632 RegIndices[DST] = GetRegIdx(CompIdx, Comp.getIndexOfDstInMCOperands());
633
634 for (unsigned CompOprIdx : {SRC0, SRC1, SRC2}) {
635 unsigned CompSrcIdx = CompOprIdx - DST_NUM;
636 RegIndices[CompOprIdx] =
637 Comp.hasRegSrcOperand(CompSrcIdx)
638 ? GetRegIdx(CompIdx, Comp.getIndexOfSrcInMCOperands(CompSrcIdx))
639 : 0;
640 }
641 return RegIndices;
642}
643
644} // namespace VOPD
645
647 return VOPD::InstInfo(OpX, OpY);
648}
649
650VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode,
651 const MCInstrInfo *InstrInfo) {
652 auto [OpX, OpY] = getVOPDComponents(VOPDOpcode);
653 const auto &OpXDesc = InstrInfo->get(OpX);
654 const auto &OpYDesc = InstrInfo->get(OpY);
656 VOPD::ComponentInfo OpYInfo(OpYDesc, OpXInfo);
657 return VOPD::InstInfo(OpXInfo, OpYInfo);
658}
659
660namespace IsaInfo {
661
663 : STI(STI), XnackSetting(TargetIDSetting::Any),
664 SramEccSetting(TargetIDSetting::Any), CodeObjectVersion(0) {
665 if (!STI.getFeatureBits().test(FeatureSupportsXNACK))
666 XnackSetting = TargetIDSetting::Unsupported;
667 if (!STI.getFeatureBits().test(FeatureSupportsSRAMECC))
668 SramEccSetting = TargetIDSetting::Unsupported;
669}
670
672 // Check if xnack or sramecc is explicitly enabled or disabled. In the
673 // absence of the target features we assume we must generate code that can run
674 // in any environment.
675 SubtargetFeatures Features(FS);
676 std::optional<bool> XnackRequested;
677 std::optional<bool> SramEccRequested;
678
679 for (const std::string &Feature : Features.getFeatures()) {
680 if (Feature == "+xnack")
681 XnackRequested = true;
682 else if (Feature == "-xnack")
683 XnackRequested = false;
684 else if (Feature == "+sramecc")
685 SramEccRequested = true;
686 else if (Feature == "-sramecc")
687 SramEccRequested = false;
688 }
689
690 bool XnackSupported = isXnackSupported();
691 bool SramEccSupported = isSramEccSupported();
692
693 if (XnackRequested) {
694 if (XnackSupported) {
695 XnackSetting =
696 *XnackRequested ? TargetIDSetting::On : TargetIDSetting::Off;
697 } else {
698 // If a specific xnack setting was requested and this GPU does not support
699 // xnack emit a warning. Setting will remain set to "Unsupported".
700 if (*XnackRequested) {
701 errs() << "warning: xnack 'On' was requested for a processor that does "
702 "not support it!\n";
703 } else {
704 errs() << "warning: xnack 'Off' was requested for a processor that "
705 "does not support it!\n";
706 }
707 }
708 }
709
710 if (SramEccRequested) {
711 if (SramEccSupported) {
712 SramEccSetting =
713 *SramEccRequested ? TargetIDSetting::On : TargetIDSetting::Off;
714 } else {
715 // If a specific sramecc setting was requested and this GPU does not
716 // support sramecc emit a warning. Setting will remain set to
717 // "Unsupported".
718 if (*SramEccRequested) {
719 errs() << "warning: sramecc 'On' was requested for a processor that "
720 "does not support it!\n";
721 } else {
722 errs() << "warning: sramecc 'Off' was requested for a processor that "
723 "does not support it!\n";
724 }
725 }
726 }
727}
728
729static TargetIDSetting
731 if (FeatureString.endswith("-"))
733 if (FeatureString.endswith("+"))
734 return TargetIDSetting::On;
735
736 llvm_unreachable("Malformed feature string");
737}
738
740 SmallVector<StringRef, 3> TargetIDSplit;
741 TargetID.split(TargetIDSplit, ':');
742
743 for (const auto &FeatureString : TargetIDSplit) {
744 if (FeatureString.startswith("xnack"))
745 XnackSetting = getTargetIDSettingFromFeatureString(FeatureString);
746 if (FeatureString.startswith("sramecc"))
747 SramEccSetting = getTargetIDSettingFromFeatureString(FeatureString);
748 }
749}
750
751std::string AMDGPUTargetID::toString() const {
752 std::string StringRep;
753 raw_string_ostream StreamRep(StringRep);
754
755 auto TargetTriple = STI.getTargetTriple();
756 auto Version = getIsaVersion(STI.getCPU());
757
758 StreamRep << TargetTriple.getArchName() << '-'
759 << TargetTriple.getVendorName() << '-'
760 << TargetTriple.getOSName() << '-'
761 << TargetTriple.getEnvironmentName() << '-';
762
763 std::string Processor;
764 // TODO: Following else statement is present here because we used various
765 // alias names for GPUs up until GFX9 (e.g. 'fiji' is same as 'gfx803').
766 // Remove once all aliases are removed from GCNProcessors.td.
767 if (Version.Major >= 9)
768 Processor = STI.getCPU().str();
769 else
770 Processor = (Twine("gfx") + Twine(Version.Major) + Twine(Version.Minor) +
771 Twine(Version.Stepping))
772 .str();
773
774 std::string Features;
775 if (STI.getTargetTriple().getOS() == Triple::AMDHSA) {
776 switch (CodeObjectVersion) {
778 // xnack.
779 if (isXnackOnOrAny())
780 Features += "+xnack";
781 // In code object v2 and v3, "sramecc" feature was spelled with a
782 // hyphen ("sram-ecc").
783 if (isSramEccOnOrAny())
784 Features += "+sram-ecc";
785 break;
788 // sramecc.
790 Features += ":sramecc-";
792 Features += ":sramecc+";
793 // xnack.
795 Features += ":xnack-";
797 Features += ":xnack+";
798 break;
799 default:
800 break;
801 }
802 }
803
804 StreamRep << Processor << Features;
805
806 StreamRep.flush();
807 return StringRep;
808}
809
810unsigned getWavefrontSize(const MCSubtargetInfo *STI) {
811 if (STI->getFeatureBits().test(FeatureWavefrontSize16))
812 return 16;
813 if (STI->getFeatureBits().test(FeatureWavefrontSize32))
814 return 32;
815
816 return 64;
817}
818
820 unsigned BytesPerCU = 0;
821 if (STI->getFeatureBits().test(FeatureLocalMemorySize32768))
822 BytesPerCU = 32768;
823 if (STI->getFeatureBits().test(FeatureLocalMemorySize65536))
824 BytesPerCU = 65536;
825
826 // "Per CU" really means "per whatever functional block the waves of a
827 // workgroup must share". So the effective local memory size is doubled in
828 // WGP mode on gfx10.
829 if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode))
830 BytesPerCU *= 2;
831
832 return BytesPerCU;
833}
834
836 if (STI->getFeatureBits().test(FeatureLocalMemorySize32768))
837 return 32768;
838 if (STI->getFeatureBits().test(FeatureLocalMemorySize65536))
839 return 65536;
840 return 0;
841}
842
843unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
844 // "Per CU" really means "per whatever functional block the waves of a
845 // workgroup must share". For gfx10 in CU mode this is the CU, which contains
846 // two SIMDs.
847 if (isGFX10Plus(*STI) && STI->getFeatureBits().test(FeatureCuMode))
848 return 2;
849 // Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP contains
850 // two CUs, so a total of four SIMDs.
851 return 4;
852}
853
855 unsigned FlatWorkGroupSize) {
856 assert(FlatWorkGroupSize != 0);
857 if (STI->getTargetTriple().getArch() != Triple::amdgcn)
858 return 8;
859 unsigned MaxWaves = getMaxWavesPerEU(STI) * getEUsPerCU(STI);
860 unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
861 if (N == 1) {
862 // Single-wave workgroups don't consume barrier resources.
863 return MaxWaves;
864 }
865
866 unsigned MaxBarriers = 16;
867 if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode))
868 MaxBarriers = 32;
869
870 return std::min(MaxWaves / N, MaxBarriers);
871}
872
873unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) {
874 return 1;
875}
876
877unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) {
878 // FIXME: Need to take scratch memory into account.
879 if (isGFX90A(*STI))
880 return 8;
881 if (!isGFX10Plus(*STI))
882 return 10;
883 return hasGFX10_3Insts(*STI) ? 16 : 20;
884}
885
887 unsigned FlatWorkGroupSize) {
888 return divideCeil(getWavesPerWorkGroup(STI, FlatWorkGroupSize),
889 getEUsPerCU(STI));
890}
891
893 return 1;
894}
895
897 // Some subtargets allow encoding 2048, but this isn't tested or supported.
898 return 1024;
899}
900
902 unsigned FlatWorkGroupSize) {
903 return divideCeil(FlatWorkGroupSize, getWavefrontSize(STI));
904}
905
907 IsaVersion Version = getIsaVersion(STI->getCPU());
908 if (Version.Major >= 10)
909 return getAddressableNumSGPRs(STI);
910 if (Version.Major >= 8)
911 return 16;
912 return 8;
913}
914
916 return 8;
917}
918
919unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) {
920 IsaVersion Version = getIsaVersion(STI->getCPU());
921 if (Version.Major >= 8)
922 return 800;
923 return 512;
924}
925
927 if (STI->getFeatureBits().test(FeatureSGPRInitBug))
929
930 IsaVersion Version = getIsaVersion(STI->getCPU());
931 if (Version.Major >= 10)
932 return 106;
933 if (Version.Major >= 8)
934 return 102;
935 return 104;
936}
937
938unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
939 assert(WavesPerEU != 0);
940
941 IsaVersion Version = getIsaVersion(STI->getCPU());
942 if (Version.Major >= 10)
943 return 0;
944
945 if (WavesPerEU >= getMaxWavesPerEU(STI))
946 return 0;
947
948 unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
949 if (STI->getFeatureBits().test(FeatureTrapHandler))
950 MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
951 MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1;
952 return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI));
953}
954
955unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
956 bool Addressable) {
957 assert(WavesPerEU != 0);
958
959 unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
960 IsaVersion Version = getIsaVersion(STI->getCPU());
961 if (Version.Major >= 10)
962 return Addressable ? AddressableNumSGPRs : 108;
963 if (Version.Major >= 8 && !Addressable)
964 AddressableNumSGPRs = 112;
965 unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
966 if (STI->getFeatureBits().test(FeatureTrapHandler))
967 MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
968 MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI));
969 return std::min(MaxNumSGPRs, AddressableNumSGPRs);
970}
971
972unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
973 bool FlatScrUsed, bool XNACKUsed) {
974 unsigned ExtraSGPRs = 0;
975 if (VCCUsed)
976 ExtraSGPRs = 2;
977
978 IsaVersion Version = getIsaVersion(STI->getCPU());
979 if (Version.Major >= 10)
980 return ExtraSGPRs;
981
982 if (Version.Major < 8) {
983 if (FlatScrUsed)
984 ExtraSGPRs = 4;
985 } else {
986 if (XNACKUsed)
987 ExtraSGPRs = 4;
988
989 if (FlatScrUsed ||
990 STI->getFeatureBits().test(AMDGPU::FeatureArchitectedFlatScratch))
991 ExtraSGPRs = 6;
992 }
993
994 return ExtraSGPRs;
995}
996
997unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
998 bool FlatScrUsed) {
999 return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
1000 STI->getFeatureBits().test(AMDGPU::FeatureXNACK));
1001}
1002
1003unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
1004 NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(STI));
1005 // SGPRBlocks is actual number of SGPR blocks minus 1.
1006 return NumSGPRs / getSGPREncodingGranule(STI) - 1;
1007}
1008
1010 std::optional<bool> EnableWavefrontSize32) {
1011 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1012 return 8;
1013
1014 bool IsWave32 = EnableWavefrontSize32 ?
1015 *EnableWavefrontSize32 :
1016 STI->getFeatureBits().test(FeatureWavefrontSize32);
1017
1018 if (STI->getFeatureBits().test(FeatureGFX11FullVGPRs))
1019 return IsWave32 ? 24 : 12;
1020
1021 if (hasGFX10_3Insts(*STI))
1022 return IsWave32 ? 16 : 8;
1023
1024 return IsWave32 ? 8 : 4;
1025}
1026
1028 std::optional<bool> EnableWavefrontSize32) {
1029 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1030 return 8;
1031
1032 bool IsWave32 = EnableWavefrontSize32 ?
1033 *EnableWavefrontSize32 :
1034 STI->getFeatureBits().test(FeatureWavefrontSize32);
1035
1036 return IsWave32 ? 8 : 4;
1037}
1038
1039unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
1040 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1041 return 512;
1042 if (!isGFX10Plus(*STI))
1043 return 256;
1044 bool IsWave32 = STI->getFeatureBits().test(FeatureWavefrontSize32);
1045 if (STI->getFeatureBits().test(FeatureGFX11FullVGPRs))
1046 return IsWave32 ? 1536 : 768;
1047 return IsWave32 ? 1024 : 512;
1048}
1049
1051 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1052 return 512;
1053 return 256;
1054}
1055
1057 unsigned NumVGPRs) {
1058 unsigned MaxWaves = getMaxWavesPerEU(STI);
1059 unsigned Granule = getVGPRAllocGranule(STI);
1060 if (NumVGPRs < Granule)
1061 return MaxWaves;
1062 unsigned RoundedRegs = alignTo(NumVGPRs, Granule);
1063 return std::min(std::max(getTotalNumVGPRs(STI) / RoundedRegs, 1u), MaxWaves);
1064}
1065
1066unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
1067 assert(WavesPerEU != 0);
1068
1069 unsigned MaxWavesPerEU = getMaxWavesPerEU(STI);
1070 if (WavesPerEU >= MaxWavesPerEU)
1071 return 0;
1072
1073 unsigned TotNumVGPRs = getTotalNumVGPRs(STI);
1074 unsigned AddrsableNumVGPRs = getAddressableNumVGPRs(STI);
1075 unsigned Granule = getVGPRAllocGranule(STI);
1076 unsigned MaxNumVGPRs = alignDown(TotNumVGPRs / WavesPerEU, Granule);
1077
1078 if (MaxNumVGPRs == alignDown(TotNumVGPRs / MaxWavesPerEU, Granule))
1079 return 0;
1080
1081 unsigned MinWavesPerEU = getNumWavesPerEUWithNumVGPRs(STI, AddrsableNumVGPRs);
1082 if (WavesPerEU < MinWavesPerEU)
1083 return getMinNumVGPRs(STI, MinWavesPerEU);
1084
1085 unsigned MaxNumVGPRsNext = alignDown(TotNumVGPRs / (WavesPerEU + 1), Granule);
1086 unsigned MinNumVGPRs = 1 + std::min(MaxNumVGPRs - Granule, MaxNumVGPRsNext);
1087 return std::min(MinNumVGPRs, AddrsableNumVGPRs);
1088}
1089
1090unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
1091 assert(WavesPerEU != 0);
1092
1093 unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU,
1094 getVGPRAllocGranule(STI));
1095 unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI);
1096 return std::min(MaxNumVGPRs, AddressableNumVGPRs);
1097}
1098
1099unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs,
1100 std::optional<bool> EnableWavefrontSize32) {
1101 NumVGPRs = alignTo(std::max(1u, NumVGPRs),
1102 getVGPREncodingGranule(STI, EnableWavefrontSize32));
1103 // VGPRBlocks is actual number of VGPR blocks minus 1.
1104 return NumVGPRs / getVGPREncodingGranule(STI, EnableWavefrontSize32) - 1;
1105}
1106
1107} // end namespace IsaInfo
1108
1110 const MCSubtargetInfo *STI) {
1111 IsaVersion Version = getIsaVersion(STI->getCPU());
1112
1113 memset(&Header, 0, sizeof(Header));
1114
1115 Header.amd_kernel_code_version_major = 1;
1116 Header.amd_kernel_code_version_minor = 2;
1117 Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
1118 Header.amd_machine_version_major = Version.Major;
1119 Header.amd_machine_version_minor = Version.Minor;
1120 Header.amd_machine_version_stepping = Version.Stepping;
1121 Header.kernel_code_entry_byte_offset = sizeof(Header);
1122 Header.wavefront_size = 6;
1123
1124 // If the code object does not support indirect functions, then the value must
1125 // be 0xffffffff.
1126 Header.call_convention = -1;
1127
1128 // These alignment values are specified in powers of two, so alignment =
1129 // 2^n. The minimum alignment is 2^4 = 16.
1130 Header.kernarg_segment_alignment = 4;
1131 Header.group_segment_alignment = 4;
1132 Header.private_segment_alignment = 4;
1133
1134 if (Version.Major >= 10) {
1135 if (STI->getFeatureBits().test(FeatureWavefrontSize32)) {
1136 Header.wavefront_size = 5;
1137 Header.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
1138 }
1139 Header.compute_pgm_resource_registers |=
1140 S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
1142 }
1143}
1144
1146 const MCSubtargetInfo *STI) {
1147 IsaVersion Version = getIsaVersion(STI->getCPU());
1148
1150 memset(&KD, 0, sizeof(KD));
1151
1153 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
1156 amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, 1);
1158 amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 1);
1160 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1);
1161 if (Version.Major >= 10) {
1163 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
1164 STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1 : 0);
1166 amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE,
1167 STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1);
1169 amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED, 1);
1170 }
1171 if (AMDGPU::isGFX90A(*STI)) {
1173 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
1174 STI->getFeatureBits().test(FeatureTgSplit) ? 1 : 0);
1175 }
1176 return KD;
1177}
1178
1181}
1182
1185}
1186
1188 unsigned AS = GV->getAddressSpace();
1189 return AS == AMDGPUAS::CONSTANT_ADDRESS ||
1191}
1192
1194 return TT.getArch() == Triple::r600;
1195}
1196
1197std::pair<unsigned, unsigned>
1199 std::pair<unsigned, unsigned> Default,
1200 bool OnlyFirstRequired) {
1201 Attribute A = F.getFnAttribute(Name);
1202 if (!A.isStringAttribute())
1203 return Default;
1204
1205 LLVMContext &Ctx = F.getContext();
1206 std::pair<unsigned, unsigned> Ints = Default;
1207 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
1208 if (Strs.first.trim().getAsInteger(0, Ints.first)) {
1209 Ctx.emitError("can't parse first integer attribute " + Name);
1210 return Default;
1211 }
1212 if (Strs.second.trim().getAsInteger(0, Ints.second)) {
1213 if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
1214 Ctx.emitError("can't parse second integer attribute " + Name);
1215 return Default;
1216 }
1217 }
1218
1219 return Ints;
1220}
1221
1222unsigned getVmcntBitMask(const IsaVersion &Version) {
1223 return (1 << (getVmcntBitWidthLo(Version.Major) +
1224 getVmcntBitWidthHi(Version.Major))) -
1225 1;
1226}
1227
1228unsigned getExpcntBitMask(const IsaVersion &Version) {
1229 return (1 << getExpcntBitWidth(Version.Major)) - 1;
1230}
1231
1232unsigned getLgkmcntBitMask(const IsaVersion &Version) {
1233 return (1 << getLgkmcntBitWidth(Version.Major)) - 1;
1234}
1235
1236unsigned getWaitcntBitMask(const IsaVersion &Version) {
1237 unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(Version.Major),
1238 getVmcntBitWidthLo(Version.Major));
1239 unsigned Expcnt = getBitMask(getExpcntBitShift(Version.Major),
1240 getExpcntBitWidth(Version.Major));
1241 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(Version.Major),
1242 getLgkmcntBitWidth(Version.Major));
1243 unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(Version.Major),
1244 getVmcntBitWidthHi(Version.Major));
1245 return VmcntLo | Expcnt | Lgkmcnt | VmcntHi;
1246}
1247
1248unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1249 unsigned VmcntLo = unpackBits(Waitcnt, getVmcntBitShiftLo(Version.Major),
1250 getVmcntBitWidthLo(Version.Major));
1251 unsigned VmcntHi = unpackBits(Waitcnt, getVmcntBitShiftHi(Version.Major),
1252 getVmcntBitWidthHi(Version.Major));
1253 return VmcntLo | VmcntHi << getVmcntBitWidthLo(Version.Major);
1254}
1255
1256unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
1257 return unpackBits(Waitcnt, getExpcntBitShift(Version.Major),
1258 getExpcntBitWidth(Version.Major));
1259}
1260
1261unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1262 return unpackBits(Waitcnt, getLgkmcntBitShift(Version.Major),
1263 getLgkmcntBitWidth(Version.Major));
1264}
1265
1266void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
1267 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
1268 Vmcnt = decodeVmcnt(Version, Waitcnt);
1269 Expcnt = decodeExpcnt(Version, Waitcnt);
1270 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
1271}
1272
1273Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) {
1274 Waitcnt Decoded;
1275 Decoded.VmCnt = decodeVmcnt(Version, Encoded);
1276 Decoded.ExpCnt = decodeExpcnt(Version, Encoded);
1277 Decoded.LgkmCnt = decodeLgkmcnt(Version, Encoded);
1278 return Decoded;
1279}
1280
1281unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
1282 unsigned Vmcnt) {
1283 Waitcnt = packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(Version.Major),
1284 getVmcntBitWidthLo(Version.Major));
1285 return packBits(Vmcnt >> getVmcntBitWidthLo(Version.Major), Waitcnt,
1286 getVmcntBitShiftHi(Version.Major),
1287 getVmcntBitWidthHi(Version.Major));
1288}
1289
1290unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
1291 unsigned Expcnt) {
1292 return packBits(Expcnt, Waitcnt, getExpcntBitShift(Version.Major),
1293 getExpcntBitWidth(Version.Major));
1294}
1295
1296unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
1297 unsigned Lgkmcnt) {
1298 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(Version.Major),
1299 getLgkmcntBitWidth(Version.Major));
1300}
1301
1302unsigned encodeWaitcnt(const IsaVersion &Version,
1303 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
1304 unsigned Waitcnt = getWaitcntBitMask(Version);
1305 Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
1306 Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
1307 Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
1308 return Waitcnt;
1309}
1310
1311unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) {
1312 return encodeWaitcnt(Version, Decoded.VmCnt, Decoded.ExpCnt, Decoded.LgkmCnt);
1313}
1314
1315//===----------------------------------------------------------------------===//
1316// Custom Operands.
1317//
1318// A table of custom operands shall describe "primary" operand names
1319// first followed by aliases if any. It is not required but recommended
1320// to arrange operands so that operand encoding match operand position
1321// in the table. This will make disassembly a bit more efficient.
1322// Unused slots in the table shall have an empty name.
1323//
1324//===----------------------------------------------------------------------===//
1325
1326template <class T>
1327static bool isValidOpr(int Idx, const CustomOperand<T> OpInfo[], int OpInfoSize,
1328 T Context) {
1329 return 0 <= Idx && Idx < OpInfoSize && !OpInfo[Idx].Name.empty() &&
1330 (!OpInfo[Idx].Cond || OpInfo[Idx].Cond(Context));
1331}
1332
1333template <class T>
1334static int getOprIdx(std::function<bool(const CustomOperand<T> &)> Test,
1335 const CustomOperand<T> OpInfo[], int OpInfoSize,
1336 T Context) {
1337 int InvalidIdx = OPR_ID_UNKNOWN;
1338 for (int Idx = 0; Idx < OpInfoSize; ++Idx) {
1339 if (Test(OpInfo[Idx])) {
1340 if (!OpInfo[Idx].Cond || OpInfo[Idx].Cond(Context))
1341 return Idx;
1342 InvalidIdx = OPR_ID_UNSUPPORTED;
1343 }
1344 }
1345 return InvalidIdx;
1346}
1347
1348template <class T>
1349static int getOprIdx(const StringRef Name, const CustomOperand<T> OpInfo[],
1350 int OpInfoSize, T Context) {
1351 auto Test = [=](const CustomOperand<T> &Op) { return Op.Name == Name; };
1352 return getOprIdx<T>(Test, OpInfo, OpInfoSize, Context);
1353}
1354
1355template <class T>
1356static int getOprIdx(int Id, const CustomOperand<T> OpInfo[], int OpInfoSize,
1357 T Context, bool QuickCheck = true) {
1358 auto Test = [=](const CustomOperand<T> &Op) {
1359 return Op.Encoding == Id && !Op.Name.empty();
1360 };
1361 // This is an optimization that should work in most cases.
1362 // As a side effect, it may cause selection of an alias
1363 // instead of a primary operand name in case of sparse tables.
1364 if (QuickCheck && isValidOpr<T>(Id, OpInfo, OpInfoSize, Context) &&
1365 OpInfo[Id].Encoding == Id) {
1366 return Id;
1367 }
1368 return getOprIdx<T>(Test, OpInfo, OpInfoSize, Context);
1369}
1370
1371//===----------------------------------------------------------------------===//
1372// Custom Operand Values
1373//===----------------------------------------------------------------------===//
1374
1376 int Size,
1377 const MCSubtargetInfo &STI) {
1378 unsigned Enc = 0;
1379 for (int Idx = 0; Idx < Size; ++Idx) {
1380 const auto &Op = Opr[Idx];
1381 if (Op.isSupported(STI))
1382 Enc |= Op.encode(Op.Default);
1383 }
1384 return Enc;
1385}
1386
1388 int Size, unsigned Code,
1389 bool &HasNonDefaultVal,
1390 const MCSubtargetInfo &STI) {
1391 unsigned UsedOprMask = 0;
1392 HasNonDefaultVal = false;
1393 for (int Idx = 0; Idx < Size; ++Idx) {
1394 const auto &Op = Opr[Idx];
1395 if (!Op.isSupported(STI))
1396 continue;
1397 UsedOprMask |= Op.getMask();
1398 unsigned Val = Op.decode(Code);
1399 if (!Op.isValid(Val))
1400 return false;
1401 HasNonDefaultVal |= (Val != Op.Default);
1402 }
1403 return (Code & ~UsedOprMask) == 0;
1404}
1405
1406static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size,
1407 unsigned Code, int &Idx, StringRef &Name,
1408 unsigned &Val, bool &IsDefault,
1409 const MCSubtargetInfo &STI) {
1410 while (Idx < Size) {
1411 const auto &Op = Opr[Idx++];
1412 if (Op.isSupported(STI)) {
1413 Name = Op.Name;
1414 Val = Op.decode(Code);
1415 IsDefault = (Val == Op.Default);
1416 return true;
1417 }
1418 }
1419
1420 return false;
1421}
1422
1424 int64_t InputVal) {
1425 if (InputVal < 0 || InputVal > Op.Max)
1426 return OPR_VAL_INVALID;
1427 return Op.encode(InputVal);
1428}
1429
1430static int encodeCustomOperand(const CustomOperandVal *Opr, int Size,
1431 const StringRef Name, int64_t InputVal,
1432 unsigned &UsedOprMask,
1433 const MCSubtargetInfo &STI) {
1434 int InvalidId = OPR_ID_UNKNOWN;
1435 for (int Idx = 0; Idx < Size; ++Idx) {
1436 const auto &Op = Opr[Idx];
1437 if (Op.Name == Name) {
1438 if (!Op.isSupported(STI)) {
1439 InvalidId = OPR_ID_UNSUPPORTED;
1440 continue;
1441 }
1442 auto OprMask = Op.getMask();
1443 if (OprMask & UsedOprMask)
1444 return OPR_ID_DUPLICATE;
1445 UsedOprMask |= OprMask;
1446 return encodeCustomOperandVal(Op, InputVal);
1447 }
1448 }
1449 return InvalidId;
1450}
1451
1452//===----------------------------------------------------------------------===//
1453// DepCtr
1454//===----------------------------------------------------------------------===//
1455
1456namespace DepCtr {
1457
1459 static int Default = -1;
1460 if (Default == -1)
1462 return Default;
1463}
1464
1465bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
1466 const MCSubtargetInfo &STI) {
1468 HasNonDefaultVal, STI);
1469}
1470
1471bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
1472 bool &IsDefault, const MCSubtargetInfo &STI) {
1473 return decodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Code, Id, Name, Val,
1474 IsDefault, STI);
1475}
1476
1477int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
1478 const MCSubtargetInfo &STI) {
1479 return encodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Name, Val, UsedOprMask,
1480 STI);
1481}
1482
1483unsigned decodeFieldVmVsrc(unsigned Encoded) {
1484 return unpackBits(Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
1485}
1486
1487unsigned decodeFieldVaVdst(unsigned Encoded) {
1488 return unpackBits(Encoded, getVaVdstBitShift(), getVaVdstBitWidth());
1489}
1490
1491unsigned decodeFieldSaSdst(unsigned Encoded) {
1492 return unpackBits(Encoded, getSaSdstBitShift(), getSaSdstBitWidth());
1493}
1494
1495unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc) {
1496 return packBits(VmVsrc, Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
1497}
1498
1499unsigned encodeFieldVmVsrc(unsigned VmVsrc) {
1500 return encodeFieldVmVsrc(0xffff, VmVsrc);
1501}
1502
1503unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst) {
1504 return packBits(VaVdst, Encoded, getVaVdstBitShift(), getVaVdstBitWidth());
1505}
1506
1507unsigned encodeFieldVaVdst(unsigned VaVdst) {
1508 return encodeFieldVaVdst(0xffff, VaVdst);
1509}
1510
1511unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst) {
1512 return packBits(SaSdst, Encoded, getSaSdstBitShift(), getSaSdstBitWidth());
1513}
1514
1515unsigned encodeFieldSaSdst(unsigned SaSdst) {
1516 return encodeFieldSaSdst(0xffff, SaSdst);
1517}
1518
1519} // namespace DepCtr
1520
1521//===----------------------------------------------------------------------===//
1522// hwreg
1523//===----------------------------------------------------------------------===//
1524
1525namespace Hwreg {
1526
1527int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI) {
1528 int Idx = getOprIdx<const MCSubtargetInfo &>(Name, Opr, OPR_SIZE, STI);
1529 return (Idx < 0) ? Idx : Opr[Idx].Encoding;
1530}
1531
1532bool isValidHwreg(int64_t Id) {
1533 return 0 <= Id && isUInt<ID_WIDTH_>(Id);
1534}
1535
1537 return 0 <= Offset && isUInt<OFFSET_WIDTH_>(Offset);
1538}
1539
1541 return 0 <= (Width - 1) && isUInt<WIDTH_M1_WIDTH_>(Width - 1);
1542}
1543
1545 return (Id << ID_SHIFT_) |
1546 (Offset << OFFSET_SHIFT_) |
1547 ((Width - 1) << WIDTH_M1_SHIFT_);
1548}
1549
1550StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI) {
1551 int Idx = getOprIdx<const MCSubtargetInfo &>(Id, Opr, OPR_SIZE, STI);
1552 return (Idx < 0) ? "" : Opr[Idx].Name;
1553}
1554
1555void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width) {
1556 Id = (Val & ID_MASK_) >> ID_SHIFT_;
1557 Offset = (Val & OFFSET_MASK_) >> OFFSET_SHIFT_;
1558 Width = ((Val & WIDTH_M1_MASK_) >> WIDTH_M1_SHIFT_) + 1;
1559}
1560
1561} // namespace Hwreg
1562
1563//===----------------------------------------------------------------------===//
1564// exp tgt
1565//===----------------------------------------------------------------------===//
1566
1567namespace Exp {
1568
1569struct ExpTgt {
1571 unsigned Tgt;
1572 unsigned MaxIndex;
1573};
1574
1575static constexpr ExpTgt ExpTgtInfo[] = {
1576 {{"null"}, ET_NULL, ET_NULL_MAX_IDX},
1577 {{"mrtz"}, ET_MRTZ, ET_MRTZ_MAX_IDX},
1578 {{"prim"}, ET_PRIM, ET_PRIM_MAX_IDX},
1579 {{"mrt"}, ET_MRT0, ET_MRT_MAX_IDX},
1580 {{"pos"}, ET_POS0, ET_POS_MAX_IDX},
1581 {{"dual_src_blend"}, ET_DUAL_SRC_BLEND0, ET_DUAL_SRC_BLEND_MAX_IDX},
1582 {{"param"}, ET_PARAM0, ET_PARAM_MAX_IDX},
1583};
1584
1585bool getTgtName(unsigned Id, StringRef &Name, int &Index) {
1586 for (const ExpTgt &Val : ExpTgtInfo) {
1587 if (Val.Tgt <= Id && Id <= Val.Tgt + Val.MaxIndex) {
1588 Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt);
1589 Name = Val.Name;
1590 return true;
1591 }
1592 }
1593 return false;
1594}
1595
1596unsigned getTgtId(const StringRef Name) {
1597
1598 for (const ExpTgt &Val : ExpTgtInfo) {
1599 if (Val.MaxIndex == 0 && Name == Val.Name)
1600 return Val.Tgt;
1601
1602 if (Val.MaxIndex > 0 && Name.startswith(Val.Name)) {
1603 StringRef Suffix = Name.drop_front(Val.Name.size());
1604
1605 unsigned Id;
1606 if (Suffix.getAsInteger(10, Id) || Id > Val.MaxIndex)
1607 return ET_INVALID;
1608
1609 // Disable leading zeroes
1610 if (Suffix.size() > 1 && Suffix[0] == '0')
1611 return ET_INVALID;
1612
1613 return Val.Tgt + Id;
1614 }
1615 }
1616 return ET_INVALID;
1617}
1618
1619bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI) {
1620 switch (Id) {
1621 case ET_NULL:
1622 return !isGFX11Plus(STI);
1623 case ET_POS4:
1624 case ET_PRIM:
1625 return isGFX10Plus(STI);
1626 case ET_DUAL_SRC_BLEND0:
1627 case ET_DUAL_SRC_BLEND1:
1628 return isGFX11Plus(STI);
1629 default:
1630 if (Id >= ET_PARAM0 && Id <= ET_PARAM31)
1631 return !isGFX11Plus(STI);
1632 return true;
1633 }
1634}
1635
1636} // namespace Exp
1637
1638//===----------------------------------------------------------------------===//
1639// MTBUF Format
1640//===----------------------------------------------------------------------===//
1641
1642namespace MTBUFFormat {
1643
1644int64_t getDfmt(const StringRef Name) {
1645 for (int Id = DFMT_MIN; Id <= DFMT_MAX; ++Id) {
1646 if (Name == DfmtSymbolic[Id])
1647 return Id;
1648 }
1649 return DFMT_UNDEF;
1650}
1651
1653 assert(Id <= DFMT_MAX);
1654 return DfmtSymbolic[Id];
1655}
1656
1658 if (isSI(STI) || isCI(STI))
1659 return NfmtSymbolicSICI;
1660 if (isVI(STI) || isGFX9(STI))
1661 return NfmtSymbolicVI;
1662 return NfmtSymbolicGFX10;
1663}
1664
1665int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI) {
1666 auto lookupTable = getNfmtLookupTable(STI);
1667 for (int Id = NFMT_MIN; Id <= NFMT_MAX; ++Id) {
1668 if (Name == lookupTable[Id])
1669 return Id;
1670 }
1671 return NFMT_UNDEF;
1672}
1673
1674StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI) {
1675 assert(Id <= NFMT_MAX);
1676 return getNfmtLookupTable(STI)[Id];
1677}
1678
1679bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI) {
1680 unsigned Dfmt;
1681 unsigned Nfmt;
1682 decodeDfmtNfmt(Id, Dfmt, Nfmt);
1683 return isValidNfmt(Nfmt, STI);
1684}
1685
1686bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI) {
1687 return !getNfmtName(Id, STI).empty();
1688}
1689
1690int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt) {
1691 return (Dfmt << DFMT_SHIFT) | (Nfmt << NFMT_SHIFT);
1692}
1693
1694void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt) {
1695 Dfmt = (Format >> DFMT_SHIFT) & DFMT_MASK;
1696 Nfmt = (Format >> NFMT_SHIFT) & NFMT_MASK;
1697}
1698
1700 if (isGFX11Plus(STI)) {
1701 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
1702 if (Name == UfmtSymbolicGFX11[Id])
1703 return Id;
1704 }
1705 } else {
1706 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
1707 if (Name == UfmtSymbolicGFX10[Id])
1708 return Id;
1709 }
1710 }
1711 return UFMT_UNDEF;
1712}
1713
1715 if(isValidUnifiedFormat(Id, STI))
1716 return isGFX10(STI) ? UfmtSymbolicGFX10[Id] : UfmtSymbolicGFX11[Id];
1717 return "";
1718}
1719
1720bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI) {
1721 return isGFX10(STI) ? Id <= UfmtGFX10::UFMT_LAST : Id <= UfmtGFX11::UFMT_LAST;
1722}
1723
1724int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt,
1725 const MCSubtargetInfo &STI) {
1726 int64_t Fmt = encodeDfmtNfmt(Dfmt, Nfmt);
1727 if (isGFX11Plus(STI)) {
1728 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
1729 if (Fmt == DfmtNfmt2UFmtGFX11[Id])
1730 return Id;
1731 }
1732 } else {
1733 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
1734 if (Fmt == DfmtNfmt2UFmtGFX10[Id])
1735 return Id;
1736 }
1737 }
1738 return UFMT_UNDEF;
1739}
1740
1741bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI) {
1742 return isGFX10Plus(STI) ? (Val <= UFMT_MAX) : (Val <= DFMT_NFMT_MAX);
1743}
1744
1746 if (isGFX10Plus(STI))
1747 return UFMT_DEFAULT;
1748 return DFMT_NFMT_DEFAULT;
1749}
1750
1751} // namespace MTBUFFormat
1752
1753//===----------------------------------------------------------------------===//
1754// SendMsg
1755//===----------------------------------------------------------------------===//
1756
1757namespace SendMsg {
1758
1761}
1762
1763int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI) {
1764 int Idx = getOprIdx<const MCSubtargetInfo &>(Name, Msg, MSG_SIZE, STI);
1765 return (Idx < 0) ? Idx : Msg[Idx].Encoding;
1766}
1767
1768bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI) {
1769 return (MsgId & ~(getMsgIdMask(STI))) == 0;
1770}
1771
1772StringRef getMsgName(int64_t MsgId, const MCSubtargetInfo &STI) {
1773 int Idx = getOprIdx<const MCSubtargetInfo &>(MsgId, Msg, MSG_SIZE, STI);
1774 return (Idx < 0) ? "" : Msg[Idx].Name;
1775}
1776
1777int64_t getMsgOpId(int64_t MsgId, const StringRef Name) {
1778 const char* const *S = (MsgId == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
1779 const int F = (MsgId == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
1780 const int L = (MsgId == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
1781 for (int i = F; i < L; ++i) {
1782 if (Name == S[i]) {
1783 return i;
1784 }
1785 }
1786 return OP_UNKNOWN_;
1787}
1788
1789bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
1790 bool Strict) {
1791 assert(isValidMsgId(MsgId, STI));
1792
1793 if (!Strict)
1794 return 0 <= OpId && isUInt<OP_WIDTH_>(OpId);
1795
1796 if (MsgId == ID_SYSMSG)
1797 return OP_SYS_FIRST_ <= OpId && OpId < OP_SYS_LAST_;
1798 if (!isGFX11Plus(STI)) {
1799 switch (MsgId) {
1800 case ID_GS_PreGFX11:
1801 return (OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_) && OpId != OP_GS_NOP;
1803 return OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_;
1804 }
1805 }
1806 return OpId == OP_NONE_;
1807}
1808
1809StringRef getMsgOpName(int64_t MsgId, int64_t OpId,
1810 const MCSubtargetInfo &STI) {
1811 assert(msgRequiresOp(MsgId, STI));
1812 return (MsgId == ID_SYSMSG)? OpSysSymbolic[OpId] : OpGsSymbolic[OpId];
1813}
1814
1815bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
1816 const MCSubtargetInfo &STI, bool Strict) {
1817 assert(isValidMsgOp(MsgId, OpId, STI, Strict));
1818
1819 if (!Strict)
1820 return 0 <= StreamId && isUInt<STREAM_ID_WIDTH_>(StreamId);
1821
1822 if (!isGFX11Plus(STI)) {
1823 switch (MsgId) {
1824 case ID_GS_PreGFX11:
1827 return (OpId == OP_GS_NOP) ?
1830 }
1831 }
1832 return StreamId == STREAM_ID_NONE_;
1833}
1834
1835bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI) {
1836 return MsgId == ID_SYSMSG ||
1837 (!isGFX11Plus(STI) &&
1838 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11));
1839}
1840
1841bool msgSupportsStream(int64_t MsgId, int64_t OpId,
1842 const MCSubtargetInfo &STI) {
1843 return !isGFX11Plus(STI) &&
1844 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11) &&
1845 OpId != OP_GS_NOP;
1846}
1847
1848void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
1849 uint16_t &StreamId, const MCSubtargetInfo &STI) {
1850 MsgId = Val & getMsgIdMask(STI);
1851 if (isGFX11Plus(STI)) {
1852 OpId = 0;
1853 StreamId = 0;
1854 } else {
1855 OpId = (Val & OP_MASK_) >> OP_SHIFT_;
1857 }
1858}
1859
1861 uint64_t OpId,
1863 return MsgId | (OpId << OP_SHIFT_) | (StreamId << STREAM_ID_SHIFT_);
1864}
1865
1866} // namespace SendMsg
1867
1868//===----------------------------------------------------------------------===//
1869//
1870//===----------------------------------------------------------------------===//
1871
1873 return F.getFnAttributeAsParsedInteger("InitialPSInputAddr", 0);
1874}
1875
1877 // As a safe default always respond as if PS has color exports.
1878 return F.getFnAttributeAsParsedInteger(
1879 "amdgpu-color-export",
1880 F.getCallingConv() == CallingConv::AMDGPU_PS ? 1 : 0) != 0;
1881}
1882
1884 return F.getFnAttributeAsParsedInteger("amdgpu-depth-export", 0) != 0;
1885}
1886
1888 switch(cc) {
1898 return true;
1899 default:
1900 return false;
1901 }
1902}
1903
1905 return isShader(cc) || cc == CallingConv::AMDGPU_Gfx;
1906}
1907
1909 return !isGraphics(cc) || cc == CallingConv::AMDGPU_CS;
1910}
1911
1913 switch (CC) {
1923 return true;
1924 default:
1925 return false;
1926 }
1927}
1928
1930 switch (CC) {
1932 return true;
1933 default:
1934 return isEntryFunctionCC(CC);
1935 }
1936}
1937
1939 switch (CC) {
1942 return true;
1943 default:
1944 return false;
1945 }
1946}
1947
1948bool isKernelCC(const Function *Func) {
1949 return AMDGPU::isModuleEntryFunctionCC(Func->getCallingConv());
1950}
1951
1952bool hasXNACK(const MCSubtargetInfo &STI) {
1953 return STI.hasFeature(AMDGPU::FeatureXNACK);
1954}
1955
1956bool hasSRAMECC(const MCSubtargetInfo &STI) {
1957 return STI.hasFeature(AMDGPU::FeatureSRAMECC);
1958}
1959
1961 return STI.hasFeature(AMDGPU::FeatureMIMG_R128) && !STI.hasFeature(AMDGPU::FeatureR128A16);
1962}
1963
1964bool hasA16(const MCSubtargetInfo &STI) {
1965 return STI.hasFeature(AMDGPU::FeatureA16);
1966}
1967
1968bool hasG16(const MCSubtargetInfo &STI) {
1969 return STI.hasFeature(AMDGPU::FeatureG16);
1970}
1971
1973 return !STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) && !isCI(STI) &&
1974 !isSI(STI);
1975}
1976
1977bool hasGDS(const MCSubtargetInfo &STI) {
1978 return STI.hasFeature(AMDGPU::FeatureGDS);
1979}
1980
1981unsigned getNSAMaxSize(const MCSubtargetInfo &STI) {
1982 auto Version = getIsaVersion(STI.getCPU());
1983 if (Version.Major == 10)
1984 return Version.Minor >= 3 ? 13 : 5;
1985 if (Version.Major == 11)
1986 return 5;
1987 return 0;
1988}
1989
1990unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI) { return 16; }
1991
1992bool isSI(const MCSubtargetInfo &STI) {
1993 return STI.hasFeature(AMDGPU::FeatureSouthernIslands);
1994}
1995
1996bool isCI(const MCSubtargetInfo &STI) {
1997 return STI.hasFeature(AMDGPU::FeatureSeaIslands);
1998}
1999
2000bool isVI(const MCSubtargetInfo &STI) {
2001 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
2002}
2003
2004bool isGFX9(const MCSubtargetInfo &STI) {
2005 return STI.hasFeature(AMDGPU::FeatureGFX9);
2006}
2007
2009 return isGFX9(STI) || isGFX10(STI);
2010}
2011
2013 return isVI(STI) || isGFX9(STI) || isGFX10(STI);
2014}
2015
2016bool isGFX8Plus(const MCSubtargetInfo &STI) {
2017 return isVI(STI) || isGFX9Plus(STI);
2018}
2019
2020bool isGFX9Plus(const MCSubtargetInfo &STI) {
2021 return isGFX9(STI) || isGFX10Plus(STI);
2022}
2023
2024bool isGFX10(const MCSubtargetInfo &STI) {
2025 return STI.hasFeature(AMDGPU::FeatureGFX10);
2026}
2027
2029 return isGFX10(STI) || isGFX11Plus(STI);
2030}
2031
2032bool isGFX11(const MCSubtargetInfo &STI) {
2033 return STI.hasFeature(AMDGPU::FeatureGFX11);
2034}
2035
2037 return isGFX11(STI);
2038}
2039
2041 return !isGFX11Plus(STI);
2042}
2043
2045 return isSI(STI) || isCI(STI) || isVI(STI) || isGFX9(STI);
2046}
2047
2049 return isGFX10(STI) && !AMDGPU::isGFX10_BEncoding(STI);
2050}
2051
2053 return STI.hasFeature(AMDGPU::FeatureGCN3Encoding);
2054}
2055
2057 return STI.hasFeature(AMDGPU::FeatureGFX10_AEncoding);
2058}
2059
2061 return STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding);
2062}
2063
2065 return STI.hasFeature(AMDGPU::FeatureGFX10_3Insts);
2066}
2067
2068bool isGFX90A(const MCSubtargetInfo &STI) {
2069 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
2070}
2071
2072bool isGFX940(const MCSubtargetInfo &STI) {
2073 return STI.hasFeature(AMDGPU::FeatureGFX940Insts);
2074}
2075
2077 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
2078}
2079
2081 return STI.hasFeature(AMDGPU::FeatureMAIInsts);
2082}
2083
2084bool hasVOPD(const MCSubtargetInfo &STI) {
2085 return STI.hasFeature(AMDGPU::FeatureVOPD);
2086}
2087
2089 return STI.hasFeature(AMDGPU::FeatureKernargPreload);
2090}
2091
2092int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR,
2093 int32_t ArgNumVGPR) {
2094 if (has90AInsts && ArgNumAGPR)
2095 return alignTo(ArgNumVGPR, 4) + ArgNumAGPR;
2096 return std::max(ArgNumVGPR, ArgNumAGPR);
2097}
2098
2099bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) {
2100 const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
2101 const unsigned FirstSubReg = TRI->getSubReg(Reg, AMDGPU::sub0);
2102 return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
2103 Reg == AMDGPU::SCC;
2104}
2105
2106bool isHi(unsigned Reg, const MCRegisterInfo &MRI) {
2107 return MRI.getEncodingValue(Reg) & AMDGPU::EncValues::IS_HI;
2108}
2109
2110#define MAP_REG2REG \
2111 using namespace AMDGPU; \
2112 switch(Reg) { \
2113 default: return Reg; \
2114 CASE_CI_VI(FLAT_SCR) \
2115 CASE_CI_VI(FLAT_SCR_LO) \
2116 CASE_CI_VI(FLAT_SCR_HI) \
2117 CASE_VI_GFX9PLUS(TTMP0) \
2118 CASE_VI_GFX9PLUS(TTMP1) \
2119 CASE_VI_GFX9PLUS(TTMP2) \
2120 CASE_VI_GFX9PLUS(TTMP3) \
2121 CASE_VI_GFX9PLUS(TTMP4) \
2122 CASE_VI_GFX9PLUS(TTMP5) \
2123 CASE_VI_GFX9PLUS(TTMP6) \
2124 CASE_VI_GFX9PLUS(TTMP7) \
2125 CASE_VI_GFX9PLUS(TTMP8) \
2126 CASE_VI_GFX9PLUS(TTMP9) \
2127 CASE_VI_GFX9PLUS(TTMP10) \
2128 CASE_VI_GFX9PLUS(TTMP11) \
2129 CASE_VI_GFX9PLUS(TTMP12) \
2130 CASE_VI_GFX9PLUS(TTMP13) \
2131 CASE_VI_GFX9PLUS(TTMP14) \
2132 CASE_VI_GFX9PLUS(TTMP15) \
2133 CASE_VI_GFX9PLUS(TTMP0_TTMP1) \
2134 CASE_VI_GFX9PLUS(TTMP2_TTMP3) \
2135 CASE_VI_GFX9PLUS(TTMP4_TTMP5) \
2136 CASE_VI_GFX9PLUS(TTMP6_TTMP7) \
2137 CASE_VI_GFX9PLUS(TTMP8_TTMP9) \
2138 CASE_VI_GFX9PLUS(TTMP10_TTMP11) \
2139 CASE_VI_GFX9PLUS(TTMP12_TTMP13) \
2140 CASE_VI_GFX9PLUS(TTMP14_TTMP15) \
2141 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \
2142 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \
2143 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \
2144 CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \
2145 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
2146 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
2147 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2148 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2149 CASE_GFXPRE11_GFX11PLUS(M0) \
2150 CASE_GFXPRE11_GFX11PLUS(SGPR_NULL) \
2151 CASE_GFXPRE11_GFX11PLUS_TO(SGPR_NULL64, SGPR_NULL) \
2152 }
2153
2154#define CASE_CI_VI(node) \
2155 assert(!isSI(STI)); \
2156 case node: return isCI(STI) ? node##_ci : node##_vi;
2157
2158#define CASE_VI_GFX9PLUS(node) \
2159 case node: return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi;
2160
2161#define CASE_GFXPRE11_GFX11PLUS(node) \
2162 case node: return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11;
2163
2164#define CASE_GFXPRE11_GFX11PLUS_TO(node, result) \
2165 case node: return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11;
2166
2167unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
2168 if (STI.getTargetTriple().getArch() == Triple::r600)
2169 return Reg;
2171}
2172
2173#undef CASE_CI_VI
2174#undef CASE_VI_GFX9PLUS
2175#undef CASE_GFXPRE11_GFX11PLUS
2176#undef CASE_GFXPRE11_GFX11PLUS_TO
2177
2178#define CASE_CI_VI(node) case node##_ci: case node##_vi: return node;
2179#define CASE_VI_GFX9PLUS(node) case node##_vi: case node##_gfx9plus: return node;
2180#define CASE_GFXPRE11_GFX11PLUS(node) case node##_gfx11plus: case node##_gfxpre11: return node;
2181#define CASE_GFXPRE11_GFX11PLUS_TO(node, result)
2182
2183unsigned mc2PseudoReg(unsigned Reg) {
2185}
2186
2187bool isInlineValue(unsigned Reg) {
2188 switch (Reg) {
2189 case AMDGPU::SRC_SHARED_BASE_LO:
2190 case AMDGPU::SRC_SHARED_BASE:
2191 case AMDGPU::SRC_SHARED_LIMIT_LO:
2192 case AMDGPU::SRC_SHARED_LIMIT:
2193 case AMDGPU::SRC_PRIVATE_BASE_LO:
2194 case AMDGPU::SRC_PRIVATE_BASE:
2195 case AMDGPU::SRC_PRIVATE_LIMIT_LO:
2196 case AMDGPU::SRC_PRIVATE_LIMIT:
2197 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2198 return true;
2199 case AMDGPU::SRC_VCCZ:
2200 case AMDGPU::SRC_EXECZ:
2201 case AMDGPU::SRC_SCC:
2202 return true;
2203 case AMDGPU::SGPR_NULL:
2204 return true;
2205 default:
2206 return false;
2207 }
2208}
2209
2210#undef CASE_CI_VI
2211#undef CASE_VI_GFX9PLUS
2212#undef CASE_GFXPRE11_GFX11PLUS
2213#undef CASE_GFXPRE11_GFX11PLUS_TO
2214#undef MAP_REG2REG
2215
2216bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2217 assert(OpNo < Desc.NumOperands);
2218 unsigned OpType = Desc.operands()[OpNo].OperandType;
2219 return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
2220 OpType <= AMDGPU::OPERAND_SRC_LAST;
2221}
2222
2223bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2224 assert(OpNo < Desc.NumOperands);
2225 unsigned OpType = Desc.operands()[OpNo].OperandType;
2226 return OpType >= AMDGPU::OPERAND_KIMM_FIRST &&
2227 OpType <= AMDGPU::OPERAND_KIMM_LAST;
2228}
2229
2230bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2231 assert(OpNo < Desc.NumOperands);
2232 unsigned OpType = Desc.operands()[OpNo].OperandType;
2233 switch (OpType) {
2253 return true;
2254 default:
2255 return false;
2256 }
2257}
2258
2259bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2260 assert(OpNo < Desc.NumOperands);
2261 unsigned OpType = Desc.operands()[OpNo].OperandType;
2262 return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
2264}
2265
2266// Avoid using MCRegisterClass::getSize, since that function will go away
2267// (move from MC* level to Target* level). Return size in bits.
2268unsigned getRegBitWidth(unsigned RCID) {
2269 switch (RCID) {
2270 case AMDGPU::VGPR_LO16RegClassID:
2271 case AMDGPU::VGPR_HI16RegClassID:
2272 case AMDGPU::SGPR_LO16RegClassID:
2273 case AMDGPU::AGPR_LO16RegClassID:
2274 return 16;
2275 case AMDGPU::SGPR_32RegClassID:
2276 case AMDGPU::VGPR_32RegClassID:
2277 case AMDGPU::VRegOrLds_32RegClassID:
2278 case AMDGPU::AGPR_32RegClassID:
2279 case AMDGPU::VS_32RegClassID:
2280 case AMDGPU::AV_32RegClassID:
2281 case AMDGPU::SReg_32RegClassID:
2282 case AMDGPU::SReg_32_XM0RegClassID:
2283 case AMDGPU::SRegOrLds_32RegClassID:
2284 return 32;
2285 case AMDGPU::SGPR_64RegClassID:
2286 case AMDGPU::VS_64RegClassID:
2287 case AMDGPU::SReg_64RegClassID:
2288 case AMDGPU::VReg_64RegClassID:
2289 case AMDGPU::AReg_64RegClassID:
2290 case AMDGPU::SReg_64_XEXECRegClassID:
2291 case AMDGPU::VReg_64_Align2RegClassID:
2292 case AMDGPU::AReg_64_Align2RegClassID:
2293 case AMDGPU::AV_64RegClassID:
2294 case AMDGPU::AV_64_Align2RegClassID:
2295 return 64;
2296 case AMDGPU::SGPR_96RegClassID:
2297 case AMDGPU::SReg_96RegClassID:
2298 case AMDGPU::VReg_96RegClassID:
2299 case AMDGPU::AReg_96RegClassID:
2300 case AMDGPU::VReg_96_Align2RegClassID:
2301 case AMDGPU::AReg_96_Align2RegClassID:
2302 case AMDGPU::AV_96RegClassID:
2303 case AMDGPU::AV_96_Align2RegClassID:
2304 return 96;
2305 case AMDGPU::SGPR_128RegClassID:
2306 case AMDGPU::SReg_128RegClassID:
2307 case AMDGPU::VReg_128RegClassID:
2308 case AMDGPU::AReg_128RegClassID:
2309 case AMDGPU::VReg_128_Align2RegClassID:
2310 case AMDGPU::AReg_128_Align2RegClassID:
2311 case AMDGPU::AV_128RegClassID:
2312 case AMDGPU::AV_128_Align2RegClassID:
2313 return 128;
2314 case AMDGPU::SGPR_160RegClassID:
2315 case AMDGPU::SReg_160RegClassID:
2316 case AMDGPU::VReg_160RegClassID:
2317 case AMDGPU::AReg_160RegClassID:
2318 case AMDGPU::VReg_160_Align2RegClassID:
2319 case AMDGPU::AReg_160_Align2RegClassID:
2320 case AMDGPU::AV_160RegClassID:
2321 case AMDGPU::AV_160_Align2RegClassID:
2322 return 160;
2323 case AMDGPU::SGPR_192RegClassID:
2324 case AMDGPU::SReg_192RegClassID:
2325 case AMDGPU::VReg_192RegClassID:
2326 case AMDGPU::AReg_192RegClassID:
2327 case AMDGPU::VReg_192_Align2RegClassID:
2328 case AMDGPU::AReg_192_Align2RegClassID:
2329 case AMDGPU::AV_192RegClassID:
2330 case AMDGPU::AV_192_Align2RegClassID:
2331 return 192;
2332 case AMDGPU::SGPR_224RegClassID:
2333 case AMDGPU::SReg_224RegClassID:
2334 case AMDGPU::VReg_224RegClassID:
2335 case AMDGPU::AReg_224RegClassID:
2336 case AMDGPU::VReg_224_Align2RegClassID:
2337 case AMDGPU::AReg_224_Align2RegClassID:
2338 case AMDGPU::AV_224RegClassID:
2339 case AMDGPU::AV_224_Align2RegClassID:
2340 return 224;
2341 case AMDGPU::SGPR_256RegClassID:
2342 case AMDGPU::SReg_256RegClassID:
2343 case AMDGPU::VReg_256RegClassID:
2344 case AMDGPU::AReg_256RegClassID:
2345 case AMDGPU::VReg_256_Align2RegClassID:
2346 case AMDGPU::AReg_256_Align2RegClassID:
2347 case AMDGPU::AV_256RegClassID:
2348 case AMDGPU::AV_256_Align2RegClassID:
2349 return 256;
2350 case AMDGPU::SGPR_288RegClassID:
2351 case AMDGPU::SReg_288RegClassID:
2352 case AMDGPU::VReg_288RegClassID:
2353 case AMDGPU::AReg_288RegClassID:
2354 case AMDGPU::VReg_288_Align2RegClassID:
2355 case AMDGPU::AReg_288_Align2RegClassID:
2356 case AMDGPU::AV_288RegClassID:
2357 case AMDGPU::AV_288_Align2RegClassID:
2358 return 288;
2359 case AMDGPU::SGPR_320RegClassID:
2360 case AMDGPU::SReg_320RegClassID:
2361 case AMDGPU::VReg_320RegClassID:
2362 case AMDGPU::AReg_320RegClassID:
2363 case AMDGPU::VReg_320_Align2RegClassID:
2364 case AMDGPU::AReg_320_Align2RegClassID:
2365 case AMDGPU::AV_320RegClassID:
2366 case AMDGPU::AV_320_Align2RegClassID:
2367 return 320;
2368 case AMDGPU::SGPR_352RegClassID:
2369 case AMDGPU::SReg_352RegClassID:
2370 case AMDGPU::VReg_352RegClassID:
2371 case AMDGPU::AReg_352RegClassID:
2372 case AMDGPU::VReg_352_Align2RegClassID:
2373 case AMDGPU::AReg_352_Align2RegClassID:
2374 case AMDGPU::AV_352RegClassID:
2375 case AMDGPU::AV_352_Align2RegClassID:
2376 return 352;
2377 case AMDGPU::SGPR_384RegClassID:
2378 case AMDGPU::SReg_384RegClassID:
2379 case AMDGPU::VReg_384RegClassID:
2380 case AMDGPU::AReg_384RegClassID:
2381 case AMDGPU::VReg_384_Align2RegClassID:
2382 case AMDGPU::AReg_384_Align2RegClassID:
2383 case AMDGPU::AV_384RegClassID:
2384 case AMDGPU::AV_384_Align2RegClassID:
2385 return 384;
2386 case AMDGPU::SGPR_512RegClassID:
2387 case AMDGPU::SReg_512RegClassID:
2388 case AMDGPU::VReg_512RegClassID:
2389 case AMDGPU::AReg_512RegClassID:
2390 case AMDGPU::VReg_512_Align2RegClassID:
2391 case AMDGPU::AReg_512_Align2RegClassID:
2392 case AMDGPU::AV_512RegClassID:
2393 case AMDGPU::AV_512_Align2RegClassID:
2394 return 512;
2395 case AMDGPU::SGPR_1024RegClassID:
2396 case AMDGPU::SReg_1024RegClassID:
2397 case AMDGPU::VReg_1024RegClassID:
2398 case AMDGPU::AReg_1024RegClassID:
2399 case AMDGPU::VReg_1024_Align2RegClassID:
2400 case AMDGPU::AReg_1024_Align2RegClassID:
2401 case AMDGPU::AV_1024RegClassID:
2402 case AMDGPU::AV_1024_Align2RegClassID:
2403 return 1024;
2404 default:
2405 llvm_unreachable("Unexpected register class");
2406 }
2407}
2408
2409unsigned getRegBitWidth(const MCRegisterClass &RC) {
2410 return getRegBitWidth(RC.getID());
2411}
2412
2414 unsigned OpNo) {
2415 assert(OpNo < Desc.NumOperands);
2416 unsigned RCID = Desc.operands()[OpNo].RegClass;
2417 return getRegBitWidth(RCID) / 8;
2418}
2419
2420bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
2422 return true;
2423
2424 uint64_t Val = static_cast<uint64_t>(Literal);
2425 return (Val == llvm::bit_cast<uint64_t>(0.0)) ||
2426 (Val == llvm::bit_cast<uint64_t>(1.0)) ||
2427 (Val == llvm::bit_cast<uint64_t>(-1.0)) ||
2428 (Val == llvm::bit_cast<uint64_t>(0.5)) ||
2429 (Val == llvm::bit_cast<uint64_t>(-0.5)) ||
2430 (Val == llvm::bit_cast<uint64_t>(2.0)) ||
2431 (Val == llvm::bit_cast<uint64_t>(-2.0)) ||
2432 (Val == llvm::bit_cast<uint64_t>(4.0)) ||
2433 (Val == llvm::bit_cast<uint64_t>(-4.0)) ||
2434 (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
2435}
2436
2437bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
2439 return true;
2440
2441 // The actual type of the operand does not seem to matter as long
2442 // as the bits match one of the inline immediate values. For example:
2443 //
2444 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
2445 // so it is a legal inline immediate.
2446 //
2447 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
2448 // floating-point, so it is a legal inline immediate.
2449
2450 uint32_t Val = static_cast<uint32_t>(Literal);
2451 return (Val == llvm::bit_cast<uint32_t>(0.0f)) ||
2452 (Val == llvm::bit_cast<uint32_t>(1.0f)) ||
2453 (Val == llvm::bit_cast<uint32_t>(-1.0f)) ||
2454 (Val == llvm::bit_cast<uint32_t>(0.5f)) ||
2455 (Val == llvm::bit_cast<uint32_t>(-0.5f)) ||
2456 (Val == llvm::bit_cast<uint32_t>(2.0f)) ||
2457 (Val == llvm::bit_cast<uint32_t>(-2.0f)) ||
2458 (Val == llvm::bit_cast<uint32_t>(4.0f)) ||
2459 (Val == llvm::bit_cast<uint32_t>(-4.0f)) ||
2460 (Val == 0x3e22f983 && HasInv2Pi);
2461}
2462
2463bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
2464 if (!HasInv2Pi)
2465 return false;
2466
2468 return true;
2469
2470 uint16_t Val = static_cast<uint16_t>(Literal);
2471 return Val == 0x3C00 || // 1.0
2472 Val == 0xBC00 || // -1.0
2473 Val == 0x3800 || // 0.5
2474 Val == 0xB800 || // -0.5
2475 Val == 0x4000 || // 2.0
2476 Val == 0xC000 || // -2.0
2477 Val == 0x4400 || // 4.0
2478 Val == 0xC400 || // -4.0
2479 Val == 0x3118; // 1/2pi
2480}
2481
2482bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) {
2483 assert(HasInv2Pi);
2484
2485 if (isInt<16>(Literal) || isUInt<16>(Literal)) {
2486 int16_t Trunc = static_cast<int16_t>(Literal);
2487 return AMDGPU::isInlinableLiteral16(Trunc, HasInv2Pi);
2488 }
2489 if (!(Literal & 0xffff))
2490 return AMDGPU::isInlinableLiteral16(Literal >> 16, HasInv2Pi);
2491
2492 int16_t Lo16 = static_cast<int16_t>(Literal);
2493 int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
2494 return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi);
2495}
2496
2498 int16_t Lo16 = static_cast<int16_t>(Literal);
2499 if (isInt<16>(Literal) || isUInt<16>(Literal))
2500 return isInlinableIntLiteral(Lo16);
2501
2502 int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
2503 if (!(Literal & 0xffff))
2504 return isInlinableIntLiteral(Hi16);
2505 return Lo16 == Hi16 && isInlinableIntLiteral(Lo16);
2506}
2507
2508bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi) {
2509 assert(HasInv2Pi);
2510
2511 int16_t Lo16 = static_cast<int16_t>(Literal);
2512 if (isInt<16>(Literal) || isUInt<16>(Literal))
2513 return true;
2514
2515 int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
2516 if (!(Literal & 0xffff))
2517 return true;
2518 return Lo16 == Hi16;
2519}
2520
2522 const Function *F = A->getParent();
2523
2524 // Arguments to compute shaders are never a source of divergence.
2525 CallingConv::ID CC = F->getCallingConv();
2526 switch (CC) {
2529 return true;
2540 // For non-compute shaders, SGPR inputs are marked with either inreg or
2541 // byval. Everything else is in VGPRs.
2542 return A->hasAttribute(Attribute::InReg) ||
2543 A->hasAttribute(Attribute::ByVal);
2544 default:
2545 // TODO: Should calls support inreg for SGPR inputs?
2546 return false;
2547 }
2548}
2549
2550bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo) {
2551 // Arguments to compute shaders are never a source of divergence.
2553 switch (CC) {
2556 return true;
2567 // For non-compute shaders, SGPR inputs are marked with either inreg or
2568 // byval. Everything else is in VGPRs.
2569 return CB->paramHasAttr(ArgNo, Attribute::InReg) ||
2570 CB->paramHasAttr(ArgNo, Attribute::ByVal);
2571 default:
2572 // TODO: Should calls support inreg for SGPR inputs?
2573 return false;
2574 }
2575}
2576
2577static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) {
2578 return isGCN3Encoding(ST) || isGFX10Plus(ST);
2579}
2580
2582 return isGFX9Plus(ST);
2583}
2584
2586 int64_t EncodedOffset) {
2587 return hasSMEMByteOffset(ST) ? isUInt<20>(EncodedOffset)
2588 : isUInt<8>(EncodedOffset);
2589}
2590
2592 int64_t EncodedOffset,
2593 bool IsBuffer) {
2594 return !IsBuffer &&
2596 isInt<21>(EncodedOffset);
2597}
2598
2599static bool isDwordAligned(uint64_t ByteOffset) {
2600 return (ByteOffset & 3) == 0;
2601}
2602
2604 uint64_t ByteOffset) {
2605 if (hasSMEMByteOffset(ST))
2606 return ByteOffset;
2607
2608 assert(isDwordAligned(ByteOffset));
2609 return ByteOffset >> 2;
2610}
2611
2612std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
2613 int64_t ByteOffset, bool IsBuffer) {
2614 // The signed version is always a byte offset.
2615 if (!IsBuffer && hasSMRDSignedImmOffset(ST)) {
2617 return isInt<20>(ByteOffset) ? std::optional<int64_t>(ByteOffset)
2618 : std::nullopt;
2619 }
2620
2621 if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST))
2622 return std::nullopt;
2623
2624 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
2625 return isLegalSMRDEncodedUnsignedOffset(ST, EncodedOffset)
2626 ? std::optional<int64_t>(EncodedOffset)
2627 : std::nullopt;
2628}
2629
2630std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
2631 int64_t ByteOffset) {
2632 if (!isCI(ST) || !isDwordAligned(ByteOffset))
2633 return std::nullopt;
2634
2635 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
2636 return isUInt<32>(EncodedOffset) ? std::optional<int64_t>(EncodedOffset)
2637 : std::nullopt;
2638}
2639
2641 // Address offset is 12-bit signed for GFX10, 13-bit for GFX9 and GFX11+.
2642 if (AMDGPU::isGFX10(ST))
2643 return 12;
2644
2645 return 13;
2646}
2647
2648namespace {
2649
2650struct SourceOfDivergence {
2651 unsigned Intr;
2652};
2653const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
2654
2655struct AlwaysUniform {
2656 unsigned Intr;
2657};
2658const AlwaysUniform *lookupAlwaysUniform(unsigned Intr);
2659
2660#define GET_SourcesOfDivergence_IMPL
2661#define GET_UniformIntrinsics_IMPL
2662#define GET_Gfx9BufferFormat_IMPL
2663#define GET_Gfx10BufferFormat_IMPL
2664#define GET_Gfx11PlusBufferFormat_IMPL
2665#include "AMDGPUGenSearchableTables.inc"
2666
2667} // end anonymous namespace
2668
2669bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
2670 return lookupSourceOfDivergence(IntrID);
2671}
2672
2673bool isIntrinsicAlwaysUniform(unsigned IntrID) {
2674 return lookupAlwaysUniform(IntrID);
2675}
2676
2678 uint8_t NumComponents,
2679 uint8_t NumFormat,
2680 const MCSubtargetInfo &STI) {
2681 return isGFX11Plus(STI)
2682 ? getGfx11PlusBufferFormatInfo(BitsPerComp, NumComponents,
2683 NumFormat)
2684 : isGFX10(STI) ? getGfx10BufferFormatInfo(BitsPerComp,
2685 NumComponents, NumFormat)
2686 : getGfx9BufferFormatInfo(BitsPerComp,
2687 NumComponents, NumFormat);
2688}
2689
2691 const MCSubtargetInfo &STI) {
2692 return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(Format)
2693 : isGFX10(STI) ? getGfx10BufferFormatInfo(Format)
2694 : getGfx9BufferFormatInfo(Format);
2695}
2696
2698 for (auto OpName : { OpName::vdst, OpName::src0, OpName::src1,
2699 OpName::src2 }) {
2700 int Idx = getNamedOperandIdx(OpDesc.getOpcode(), OpName);
2701 if (Idx == -1)
2702 continue;
2703
2704 if (OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64RegClassID ||
2705 OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64_Align2RegClassID)
2706 return true;
2707 }
2708
2709 return false;
2710}
2711
2712bool isDPALU_DPP(const MCInstrDesc &OpDesc) {
2713 return hasAny64BitVGPROperands(OpDesc);
2714}
2715
2716} // namespace AMDGPU
2717
2720 switch (S) {
2722 OS << "Unsupported";
2723 break;
2725 OS << "Any";
2726 break;
2728 OS << "Off";
2729 break;
2731 OS << "On";
2732 break;
2733 }
2734 return OS;
2735}
2736
2737} // namespace llvm
unsigned const MachineRegisterInfo * MRI
static llvm::cl::opt< unsigned > AmdhsaCodeObjectVersion("amdhsa-code-object-version", llvm::cl::Hidden, llvm::cl::desc("AMDHSA Code Object Version"), llvm::cl::init(4))
#define MAP_REG2REG
unsigned Intr
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
#define AMDHSA_BITS_SET(DST, MSK, VAL)
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
std::string Name
uint64_t Size
#define F(x, y, z)
Definition: MD5.cpp:55
unsigned const TargetRegisterInfo * TRI
unsigned Reg
LLVMContext & Context
return InstrInfo
const SmallVectorImpl< MachineOperand > & Cond
#define S_00B848_MEM_ORDERED(x)
Definition: SIDefines.h:1067
#define S_00B848_WGP_MODE(x)
Definition: SIDefines.h:1064
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
void setTargetIDFromFeaturesString(StringRef FS)
TargetIDSetting getXnackSetting() const
AMDGPUTargetID(const MCSubtargetInfo &STI)
void setTargetIDFromTargetIDStream(StringRef TargetID)
TargetIDSetting getSramEccSetting() const
unsigned getIndexInParsedOperands(unsigned CompOprIdx) const
unsigned getIndexOfDstInParsedOperands() const
unsigned getIndexOfSrcInParsedOperands(unsigned CompSrcIdx) const
unsigned getCompParsedSrcOperandsNum() const
std::optional< unsigned > getInvalidCompOperandIndex(std::function< unsigned(unsigned, unsigned)> GetRegIdx) const
std::array< unsigned, Component::MAX_OPR_NUM > RegIndices
Definition: Any.h:28
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1190
CallingConv::ID getCallingConv() const
Definition: InstrTypes.h:1470
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
This class represents an Operation in the Expression.
Encoding
Size and signedness of expression operations' operands.
constexpr bool test(unsigned I) const
unsigned getAddressSpace() const
Definition: GlobalValue.h:201
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void emitError(uint64_t LocCookie, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:237
ArrayRef< MCOperandInfo > operands() const
Definition: MCInstrDesc.h:239
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:248
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
Definition: MCInstrDesc.h:219
unsigned getOpcode() const
Return the opcode number for this descriptor.
Definition: MCInstrDesc.h:230
Interface to description of machine instruction set.
Definition: MCInstrInfo.h:26
MCRegisterClass - Base class of TargetRegisterClass.
unsigned getID() const
getID() - Return the register class ID number.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Generic base class for all target subtargets.
bool hasFeature(unsigned Feature) const
const Triple & getTargetTriple() const
const FeatureBitset & getFeatureBits() const
StringRef getCPU() const
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition: StringRef.h:857
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:704
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:474
std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:222
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
bool endswith(StringRef Suffix) const
Definition: StringRef.h:280
Manages the enabling and disabling of subtarget specific features.
const std::vector< std::string > & getFeatures() const
Returns the vector of individual subtarget features.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
OSType getOS() const
Get the parsed operating system type of this triple.
Definition: Triple.h:364
ArchType getArch() const
Get the parsed architecture type of this triple.
Definition: Triple.h:355
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:642
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
Definition: AMDGPU.h:385
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:382
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition: AMDGPU.h:381
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:378
bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val, bool &IsDefault, const MCSubtargetInfo &STI)
unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst)
unsigned decodeFieldSaSdst(unsigned Encoded)
unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc)
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
const CustomOperandVal DepCtrInfo[]
bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal, const MCSubtargetInfo &STI)
unsigned decodeFieldVaVdst(unsigned Encoded)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
unsigned decodeFieldVmVsrc(unsigned Encoded)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
static constexpr ExpTgt ExpTgtInfo[]
bool getTgtName(unsigned Id, StringRef &Name, int &Index)
unsigned getTgtId(const StringRef Name)
constexpr uint32_t VersionMajor
HSA metadata major version.
bool isValidHwreg(int64_t Id)
const CustomOperand< const MCSubtargetInfo & > Opr[]
bool isValidHwregOffset(int64_t Offset)
uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width)
bool isValidHwregWidth(int64_t Width)
int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI)
StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI)
void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width)
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getWavefrontSize(const MCSubtargetInfo *STI)
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)
unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, std::optional< bool > EnableWavefrontSize32)
unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getEUsPerCU(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
static TargetIDSetting getTargetIDSettingFromFeatureString(StringRef FeatureString)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs)
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI, unsigned NumVGPRs)
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
StringLiteral const UfmtSymbolicGFX11[]
bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI)
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI)
unsigned const DfmtNfmt2UFmtGFX10[]
StringLiteral const DfmtSymbolic[]
static StringLiteral const * getNfmtLookupTable(const MCSubtargetInfo &STI)
bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI)
StringLiteral const NfmtSymbolicGFX10[]
bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI)
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
StringRef getDfmtName(unsigned Id)
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI)
unsigned const DfmtNfmt2UFmtGFX11[]
StringLiteral const NfmtSymbolicVI[]
StringLiteral const NfmtSymbolicSICI[]
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
int64_t getDfmt(const StringRef Name)
StringLiteral const UfmtSymbolicGFX10[]
void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt)
StringRef getMsgOpName(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
StringRef getMsgName(int64_t MsgId, const MCSubtargetInfo &STI)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI)
void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId, uint16_t &StreamId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
int64_t getMsgOpId(int64_t MsgId, const StringRef Name)
const char *const OpGsSymbolic[OP_GS_LAST_]
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
const char *const OpSysSymbolic[OP_SYS_LAST_]
static uint64_t getMsgIdMask(const MCSubtargetInfo &STI)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
const CustomOperand< const MCSubtargetInfo & > Msg[]
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
constexpr unsigned VOPD_VGPR_BANK_MASKS[]
constexpr unsigned COMPONENTS_NUM
bool isGCN3Encoding(const MCSubtargetInfo &STI)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI)
int getVOPDFull(unsigned OpX, unsigned OpY)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, unsigned OpNo)
Get size of register operand.
void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt)
Decodes Vmcnt, Expcnt and Lgkmcnt from given Waitcnt for given isa Version, and writes decoded values...
uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset)
Convert ByteOffset to dwords if the subtarget uses dword SMRD immediate offsets.
static bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
static bool hasSMEMByteOffset(const MCSubtargetInfo &ST)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool isHsaAbiVersion5(const MCSubtargetInfo *STI)
bool getMTBUFHasSrsrc(unsigned Opc)
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isInlinableIntLiteralV216(int32_t Literal)
static bool isSymbolicCustomOperandEncoding(const CustomOperandVal *Opr, int Size, unsigned Code, bool &HasNonDefaultVal, const MCSubtargetInfo &STI)
bool isGFX10Before1030(const MCSubtargetInfo &STI)
bool isHsaAbiVersion4(const MCSubtargetInfo *STI)
bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo)
Does this operand support only inlinable literals?
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
const int OPR_ID_UNSUPPORTED
bool shouldEmitConstantsToTextSection(const Triple &TT)
bool isHsaAbiVersion3(const MCSubtargetInfo *STI)
int getMTBUFElements(unsigned Opc)
static int encodeCustomOperandVal(const CustomOperandVal &Op, int64_t InputVal)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const MCSubtargetInfo *STI)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For FLAT segment the offset must be positive; MSB is ignored and forced to zero.
unsigned mc2PseudoReg(unsigned Reg)
Convert hardware register Reg to a pseudo register.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
CanBeVOPD getCanBeVOPD(unsigned Opc)
static int getOprIdx(std::function< bool(const CustomOperand< T > &)> Test, const CustomOperand< T > OpInfo[], int OpInfoSize, T Context)
bool hasPackedD16(const MCSubtargetInfo &STI)
unsigned getCodeObjectVersion(const Module &M)
bool isGFX940(const MCSubtargetInfo &STI)
bool isEntryFunctionCC(CallingConv::ID CC)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
bool isGroupSegment(const GlobalValue *GV)
IsaVersion getIsaVersion(StringRef GPU)
bool getMTBUFHasSoffset(unsigned Opc)
bool hasXNACK(const MCSubtargetInfo &STI)
unsigned getVOPDOpcode(unsigned Opc)
bool isDPALU_DPP(const MCInstrDesc &OpDesc)
unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt)
Encodes Vmcnt, Expcnt and Lgkmcnt into Waitcnt for given isa Version.
bool isVOPC64DPP(unsigned Opc)
int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements)
bool isCompute(CallingConv::ID cc)
bool getMAIIsGFX940XDL(unsigned Opc)
bool isSI(const MCSubtargetInfo &STI)
bool isReadOnlySegment(const GlobalValue *GV)
bool isArgPassedInSGPR(const Argument *A)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
int getMUBUFBaseOpcode(unsigned Opc)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
bool getVOP3IsSingle(unsigned Opc)
bool isGFX9(const MCSubtargetInfo &STI)
bool getVOP1IsSingle(unsigned Opc)
static bool isDwordAligned(uint64_t ByteOffset)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool getHasColorExport(const Function &F)
int getMTBUFBaseOpcode(unsigned Opc)
bool isChainCC(CallingConv::ID CC)
bool isGFX90A(const MCSubtargetInfo &STI)
unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion)
bool hasSRAMECC(const MCSubtargetInfo &STI)
bool getHasDepthExport(const Function &F)
static bool isValidOpr(int Idx, const CustomOperand< T > OpInfo[], int OpInfoSize, T Context)
bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI)
bool getMUBUFHasVAddr(unsigned Opc)
bool isTrue16Inst(unsigned Opc)
bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc)
std::pair< unsigned, unsigned > getVOPDComponents(unsigned VOPDOpcode)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
unsigned getInitialPSInputAddr(const Function &F)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this an AMDGPU specific source operand? These include registers, inline constants,...
unsigned getAmdhsaCodeObjectVersion()
unsigned getVmcntBitMask(const IsaVersion &Version)
bool isNotGFX10Plus(const MCSubtargetInfo &STI)
bool hasMAIInsts(const MCSubtargetInfo &STI)
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
bool isKernelCC(const Function *Func)
bool isGenericAtomic(unsigned Opc)
bool isGFX8Plus(const MCSubtargetInfo &STI)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
unsigned getLgkmcntBitMask(const IsaVersion &Version)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer)
bool isSGPR(unsigned Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
bool isHi(unsigned Reg, const MCRegisterInfo &MRI)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasGFX10_3Insts(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements)
unsigned getExpcntBitMask(const IsaVersion &Version)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
bool getMUBUFHasSoffset(unsigned Opc)
bool isNotGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isInlineValue(unsigned Reg)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isShader(CallingConv::ID cc)
unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion)
static unsigned getDefaultCustomOperandEncoding(const CustomOperandVal *Opr, int Size, const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size, unsigned Code, int &Idx, StringRef &Name, unsigned &Val, bool &IsDefault, const MCSubtargetInfo &STI)
bool isGlobalSegment(const GlobalValue *GV)
@ OPERAND_KIMM_LAST
Definition: SIDefines.h:247
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition: SIDefines.h:214
@ OPERAND_REG_INLINE_C_LAST
Definition: SIDefines.h:238
@ OPERAND_REG_IMM_V2FP16
Definition: SIDefines.h:196
@ OPERAND_REG_INLINE_C_FP64
Definition: SIDefines.h:207
@ OPERAND_REG_IMM_V2INT16
Definition: SIDefines.h:197
@ OPERAND_REG_INLINE_AC_V2FP16
Definition: SIDefines.h:224
@ OPERAND_SRC_FIRST
Definition: SIDefines.h:243
@ OPERAND_KIMM_FIRST
Definition: SIDefines.h:246
@ OPERAND_REG_IMM_FP16
Definition: SIDefines.h:193
@ OPERAND_REG_IMM_FP64
Definition: SIDefines.h:192
@ OPERAND_REG_INLINE_C_V2FP16
Definition: SIDefines.h:209
@ OPERAND_REG_INLINE_AC_V2INT16
Definition: SIDefines.h:223
@ OPERAND_REG_INLINE_AC_FP16
Definition: SIDefines.h:220
@ OPERAND_REG_INLINE_AC_FP32
Definition: SIDefines.h:221
@ OPERAND_REG_IMM_FP32
Definition: SIDefines.h:191
@ OPERAND_REG_INLINE_C_FIRST
Definition: SIDefines.h:237
@ OPERAND_REG_INLINE_C_FP32
Definition: SIDefines.h:206
@ OPERAND_REG_INLINE_C_V2INT16
Definition: SIDefines.h:208
@ OPERAND_REG_IMM_V2FP32
Definition: SIDefines.h:199
@ OPERAND_REG_INLINE_AC_FP64
Definition: SIDefines.h:222
@ OPERAND_REG_INLINE_C_FP16
Definition: SIDefines.h:205
@ OPERAND_REG_INLINE_C_V2FP32
Definition: SIDefines.h:211
@ OPERAND_REG_IMM_FP32_DEFERRED
Definition: SIDefines.h:195
@ OPERAND_SRC_LAST
Definition: SIDefines.h:244
@ OPERAND_REG_IMM_FP16_DEFERRED
Definition: SIDefines.h:194
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
std::optional< uint8_t > getHsaAbiVersion(const MCSubtargetInfo *STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
int getMCOpcode(uint16_t Opcode, unsigned Gen)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
bool getMUBUFIsBufferInv(unsigned Opc)
static int encodeCustomOperand(const CustomOperandVal *Opr, int Size, const StringRef Name, int64_t InputVal, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI)
bool getVOP2IsSingle(unsigned Opc)
bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi)
bool getMAIIsDGEMM(unsigned Opc)
Returns true if MAI operation is a double precision GEMM.
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const int OPR_ID_UNKNOWN
unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion)
int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels)
bool isModuleEntryFunctionCC(CallingConv::ID CC)
bool getMTBUFHasVAddr(unsigned Opc)
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion)
bool isGFX9_GFX10(const MCSubtargetInfo &STI)
int getMUBUFElements(unsigned Opc)
const GcnBufferFormatInfo * getGcnBufferFormatInfo(uint8_t BitsPerComp, uint8_t NumComponents, uint8_t NumFormat, const MCSubtargetInfo &STI)
bool isGraphics(CallingConv::ID cc)
unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc)
bool isPermlane16(unsigned Opc)
bool getMUBUFHasSrsrc(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:119
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:194
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
Definition: CallingConv.h:185
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
Definition: CallingConv.h:197
@ AMDGPU_Gfx
Used for AMD graphics targets.
Definition: CallingConv.h:229
@ AMDGPU_CS_ChainPreserve
Used on AMDGPUs to give the middle-end more control over argument placement.
Definition: CallingConv.h:246
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:203
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:188
@ AMDGPU_CS_Chain
Used on AMDGPUs to give the middle-end more control over argument placement.
Definition: CallingConv.h:242
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:191
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:141
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
Definition: CallingConv.h:215
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
Definition: CallingConv.h:210
@ ELFABIVERSION_AMDGPU_HSA_V4
Definition: ELF.h:376
@ ELFABIVERSION_AMDGPU_HSA_V5
Definition: ELF.h:377
@ ELFABIVERSION_AMDGPU_HSA_V3
Definition: ELF.h:375
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:414
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:292
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:425
@ AlwaysUniform
The result values are always uniform.
@ Default
The result values are uniform if and only if all operands are uniform.
#define N
AMD Kernel Code Object (amd_kernel_code_t).
bool(* Cond)(T Context)
Instruction set architecture version.
Definition: TargetParser.h:114
Represents the counter values to wait for in an s_waitcnt instruction.
Description of the encoding of one expression Op.