LLVM 23.0.0git
AMDGPUBaseInfo.cpp
Go to the documentation of this file.
1//===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDGPUBaseInfo.h"
10#include "AMDGPU.h"
11#include "AMDGPUAsmUtils.h"
12#include "AMDKernelCodeT.h"
17#include "llvm/IR/Attributes.h"
18#include "llvm/IR/Constants.h"
19#include "llvm/IR/Function.h"
20#include "llvm/IR/GlobalValue.h"
21#include "llvm/IR/IntrinsicsAMDGPU.h"
22#include "llvm/IR/IntrinsicsR600.h"
23#include "llvm/IR/LLVMContext.h"
24#include "llvm/IR/Metadata.h"
25#include "llvm/MC/MCInstrInfo.h"
30#include <optional>
31
32#define GET_INSTRINFO_NAMED_OPS
33#define GET_INSTRMAP_INFO
34#include "AMDGPUGenInstrInfo.inc"
35
37 "amdhsa-code-object-version", llvm::cl::Hidden,
39 llvm::cl::desc("Set default AMDHSA Code Object Version (module flag "
40 "or asm directive still take priority if present)"));
41
42namespace {
43
44/// \returns Bit mask for given bit \p Shift and bit \p Width.
45unsigned getBitMask(unsigned Shift, unsigned Width) {
46 return ((1 << Width) - 1) << Shift;
47}
48
49/// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
50///
51/// \returns Packed \p Dst.
52unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
53 unsigned Mask = getBitMask(Shift, Width);
54 return ((Src << Shift) & Mask) | (Dst & ~Mask);
55}
56
57/// Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
58///
59/// \returns Unpacked bits.
60unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
61 return (Src & getBitMask(Shift, Width)) >> Shift;
62}
63
64/// \returns Vmcnt bit shift (lower bits).
65unsigned getVmcntBitShiftLo(unsigned VersionMajor) {
66 return VersionMajor >= 11 ? 10 : 0;
67}
68
69/// \returns Vmcnt bit width (lower bits).
70unsigned getVmcntBitWidthLo(unsigned VersionMajor) {
71 return VersionMajor >= 11 ? 6 : 4;
72}
73
74/// \returns Expcnt bit shift.
75unsigned getExpcntBitShift(unsigned VersionMajor) {
76 return VersionMajor >= 11 ? 0 : 4;
77}
78
79/// \returns Expcnt bit width.
80unsigned getExpcntBitWidth(unsigned VersionMajor) { return 3; }
81
82/// \returns Lgkmcnt bit shift.
83unsigned getLgkmcntBitShift(unsigned VersionMajor) {
84 return VersionMajor >= 11 ? 4 : 8;
85}
86
87/// \returns Lgkmcnt bit width.
88unsigned getLgkmcntBitWidth(unsigned VersionMajor) {
89 return VersionMajor >= 10 ? 6 : 4;
90}
91
92/// \returns Vmcnt bit shift (higher bits).
93unsigned getVmcntBitShiftHi(unsigned VersionMajor) { return 14; }
94
95/// \returns Vmcnt bit width (higher bits).
96unsigned getVmcntBitWidthHi(unsigned VersionMajor) {
97 return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0;
98}
99
100/// \returns Loadcnt bit width
101unsigned getLoadcntBitWidth(unsigned VersionMajor) {
102 return VersionMajor >= 12 ? 6 : 0;
103}
104
105/// \returns Samplecnt bit width.
106unsigned getSamplecntBitWidth(unsigned VersionMajor) {
107 return VersionMajor >= 12 ? 6 : 0;
108}
109
110/// \returns Bvhcnt bit width.
111unsigned getBvhcntBitWidth(unsigned VersionMajor) {
112 return VersionMajor >= 12 ? 3 : 0;
113}
114
115/// \returns Dscnt bit width.
116unsigned getDscntBitWidth(unsigned VersionMajor) {
117 return VersionMajor >= 12 ? 6 : 0;
118}
119
120/// \returns Dscnt bit shift in combined S_WAIT instructions.
121unsigned getDscntBitShift(unsigned VersionMajor) { return 0; }
122
123/// \returns Storecnt or Vscnt bit width, depending on VersionMajor.
124unsigned getStorecntBitWidth(unsigned VersionMajor) {
125 return VersionMajor >= 10 ? 6 : 0;
126}
127
128/// \returns Kmcnt bit width.
129unsigned getKmcntBitWidth(unsigned VersionMajor) {
130 return VersionMajor >= 12 ? 5 : 0;
131}
132
133/// \returns Xcnt bit width.
134unsigned getXcntBitWidth(unsigned VersionMajor, unsigned VersionMinor) {
135 return VersionMajor == 12 && VersionMinor == 5 ? 6 : 0;
136}
137
138/// \returns Asynccnt bit width.
139unsigned getAsynccntBitWidth(unsigned VersionMajor, unsigned VersionMinor) {
140 return VersionMajor == 12 && VersionMinor == 5 ? 6 : 0;
141}
142
143/// \returns shift for Loadcnt/Storecnt in combined S_WAIT instructions.
144unsigned getLoadcntStorecntBitShift(unsigned VersionMajor) {
145 return VersionMajor >= 12 ? 8 : 0;
146}
147
148/// \returns VaSdst bit width
149inline unsigned getVaSdstBitWidth() { return 3; }
150
151/// \returns VaSdst bit shift
152inline unsigned getVaSdstBitShift() { return 9; }
153
154/// \returns VmVsrc bit width
155inline unsigned getVmVsrcBitWidth() { return 3; }
156
157/// \returns VmVsrc bit shift
158inline unsigned getVmVsrcBitShift() { return 2; }
159
160/// \returns VaVdst bit width
161inline unsigned getVaVdstBitWidth() { return 4; }
162
163/// \returns VaVdst bit shift
164inline unsigned getVaVdstBitShift() { return 12; }
165
166/// \returns VaVcc bit width
167inline unsigned getVaVccBitWidth() { return 1; }
168
169/// \returns VaVcc bit shift
170inline unsigned getVaVccBitShift() { return 1; }
171
172/// \returns SaSdst bit width
173inline unsigned getSaSdstBitWidth() { return 1; }
174
175/// \returns SaSdst bit shift
176inline unsigned getSaSdstBitShift() { return 0; }
177
178/// \returns VaSsrc width
179inline unsigned getVaSsrcBitWidth() { return 1; }
180
181/// \returns VaSsrc bit shift
182inline unsigned getVaSsrcBitShift() { return 8; }
183
184/// \returns HoldCnt bit shift
185inline unsigned getHoldCntWidth(unsigned VersionMajor, unsigned VersionMinor) {
186 static constexpr const unsigned MinMajor = 10;
187 static constexpr const unsigned MinMinor = 3;
188 return std::tie(VersionMajor, VersionMinor) >= std::tie(MinMajor, MinMinor)
189 ? 1
190 : 0;
191}
192
193/// \returns HoldCnt bit shift
194inline unsigned getHoldCntBitShift() { return 7; }
195
196} // end anonymous namespace
197
198namespace llvm {
199
200namespace AMDGPU {
201
202/// \returns true if the target supports signed immediate offset for SMRD
203/// instructions.
205 return isGFX9Plus(ST);
206}
207
208/// \returns True if \p STI is AMDHSA.
209bool isHsaAbi(const MCSubtargetInfo &STI) {
210 return STI.getTargetTriple().getOS() == Triple::AMDHSA;
211}
212
215 M.getModuleFlag("amdhsa_code_object_version"))) {
216 return (unsigned)Ver->getZExtValue() / 100;
217 }
218
220}
221
225
226unsigned getAMDHSACodeObjectVersion(unsigned ABIVersion) {
227 switch (ABIVersion) {
229 return 4;
231 return 5;
233 return 6;
234 default:
236 }
237}
238
239uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion) {
240 if (T.getOS() != Triple::AMDHSA)
241 return 0;
242
243 switch (CodeObjectVersion) {
244 case 4:
246 case 5:
248 case 6:
250 default:
251 report_fatal_error("Unsupported AMDHSA Code Object Version " +
252 Twine(CodeObjectVersion));
253 }
254}
255
256unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) {
257 switch (CodeObjectVersion) {
258 case AMDHSA_COV4:
259 return 48;
260 case AMDHSA_COV5:
261 case AMDHSA_COV6:
262 default:
264 }
265}
266
267// FIXME: All such magic numbers about the ABI should be in a
268// central TD file.
269unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) {
270 switch (CodeObjectVersion) {
271 case AMDHSA_COV4:
272 return 24;
273 case AMDHSA_COV5:
274 case AMDHSA_COV6:
275 default:
277 }
278}
279
280unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) {
281 switch (CodeObjectVersion) {
282 case AMDHSA_COV4:
283 return 32;
284 case AMDHSA_COV5:
285 case AMDHSA_COV6:
286 default:
288 }
289}
290
291unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion) {
292 switch (CodeObjectVersion) {
293 case AMDHSA_COV4:
294 return 40;
295 case AMDHSA_COV5:
296 case AMDHSA_COV6:
297 default:
299 }
300}
301
302#define GET_MIMGBaseOpcodesTable_IMPL
303#define GET_MIMGDimInfoTable_IMPL
304#define GET_MIMGInfoTable_IMPL
305#define GET_MIMGLZMappingTable_IMPL
306#define GET_MIMGMIPMappingTable_IMPL
307#define GET_MIMGBiasMappingTable_IMPL
308#define GET_MIMGOffsetMappingTable_IMPL
309#define GET_MIMGG16MappingTable_IMPL
310#define GET_MAIInstInfoTable_IMPL
311#define GET_WMMAInstInfoTable_IMPL
312#include "AMDGPUGenSearchableTables.inc"
313
314int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
315 unsigned VDataDwords, unsigned VAddrDwords) {
316 const MIMGInfo *Info =
317 getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding, VDataDwords, VAddrDwords);
318 return Info ? Info->Opcode : -1;
319}
320
322 const MIMGInfo *Info = getMIMGInfo(Opc);
323 return Info ? getMIMGBaseOpcodeInfo(Info->BaseOpcode) : nullptr;
324}
325
326int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
327 const MIMGInfo *OrigInfo = getMIMGInfo(Opc);
328 const MIMGInfo *NewInfo =
329 getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding,
330 NewChannels, OrigInfo->VAddrDwords);
331 return NewInfo ? NewInfo->Opcode : -1;
332}
333
334unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
335 const MIMGDimInfo *Dim, bool IsA16,
336 bool IsG16Supported) {
337 unsigned AddrWords = BaseOpcode->NumExtraArgs;
338 unsigned AddrComponents = (BaseOpcode->Coordinates ? Dim->NumCoords : 0) +
339 (BaseOpcode->LodOrClampOrMip ? 1 : 0);
340 if (IsA16)
341 AddrWords += divideCeil(AddrComponents, 2);
342 else
343 AddrWords += AddrComponents;
344
345 // Note: For subtargets that support A16 but not G16, enabling A16 also
346 // enables 16 bit gradients.
347 // For subtargets that support A16 (operand) and G16 (done with a different
348 // instruction encoding), they are independent.
349
350 if (BaseOpcode->Gradients) {
351 if ((IsA16 && !IsG16Supported) || BaseOpcode->G16)
352 // There are two gradients per coordinate, we pack them separately.
353 // For the 3d case,
354 // we get (dy/du, dx/du) (-, dz/du) (dy/dv, dx/dv) (-, dz/dv)
355 AddrWords += alignTo<2>(Dim->NumGradients / 2);
356 else
357 AddrWords += Dim->NumGradients;
358 }
359 return AddrWords;
360}
361
372
381
386
391
395
399
403
408
416
421
427
428#define GET_FP4FP8DstByteSelTable_DECL
429#define GET_FP4FP8DstByteSelTable_IMPL
430
435
441
442#define GET_DPMACCInstructionTable_DECL
443#define GET_DPMACCInstructionTable_IMPL
444#define GET_MTBUFInfoTable_DECL
445#define GET_MTBUFInfoTable_IMPL
446#define GET_MUBUFInfoTable_DECL
447#define GET_MUBUFInfoTable_IMPL
448#define GET_SMInfoTable_DECL
449#define GET_SMInfoTable_IMPL
450#define GET_VOP1InfoTable_DECL
451#define GET_VOP1InfoTable_IMPL
452#define GET_VOP2InfoTable_DECL
453#define GET_VOP2InfoTable_IMPL
454#define GET_VOP3InfoTable_DECL
455#define GET_VOP3InfoTable_IMPL
456#define GET_VOPC64DPPTable_DECL
457#define GET_VOPC64DPPTable_IMPL
458#define GET_VOPC64DPP8Table_DECL
459#define GET_VOPC64DPP8Table_IMPL
460#define GET_VOPCAsmOnlyInfoTable_DECL
461#define GET_VOPCAsmOnlyInfoTable_IMPL
462#define GET_VOP3CAsmOnlyInfoTable_DECL
463#define GET_VOP3CAsmOnlyInfoTable_IMPL
464#define GET_VOPDComponentTable_DECL
465#define GET_VOPDComponentTable_IMPL
466#define GET_VOPDPairs_DECL
467#define GET_VOPDPairs_IMPL
468#define GET_VOPDXTable_DECL
469#define GET_VOPDXTable_IMPL
470#define GET_VOPDYTable_DECL
471#define GET_VOPDYTable_IMPL
472#define GET_VOPTrue16Table_DECL
473#define GET_VOPTrue16Table_IMPL
474#define GET_True16D16Table_IMPL
475#define GET_WMMAOpcode2AddrMappingTable_DECL
476#define GET_WMMAOpcode2AddrMappingTable_IMPL
477#define GET_WMMAOpcode3AddrMappingTable_DECL
478#define GET_WMMAOpcode3AddrMappingTable_IMPL
479#define GET_getMFMA_F8F6F4_WithSize_DECL
480#define GET_getMFMA_F8F6F4_WithSize_IMPL
481#define GET_isMFMA_F8F6F4Table_IMPL
482#define GET_isCvtScaleF32_F32F16ToF8F4Table_IMPL
483
484#include "AMDGPUGenSearchableTables.inc"
485
486int getMTBUFBaseOpcode(unsigned Opc) {
487 const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opc);
488 return Info ? Info->BaseOpcode : -1;
489}
490
491int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) {
492 const MTBUFInfo *Info =
493 getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
494 return Info ? Info->Opcode : -1;
495}
496
497int getMTBUFElements(unsigned Opc) {
498 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
499 return Info ? Info->elements : 0;
500}
501
502bool getMTBUFHasVAddr(unsigned Opc) {
503 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
504 return Info && Info->has_vaddr;
505}
506
507bool getMTBUFHasSrsrc(unsigned Opc) {
508 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
509 return Info && Info->has_srsrc;
510}
511
512bool getMTBUFHasSoffset(unsigned Opc) {
513 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
514 return Info && Info->has_soffset;
515}
516
517int getMUBUFBaseOpcode(unsigned Opc) {
518 const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc);
519 return Info ? Info->BaseOpcode : -1;
520}
521
522int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements) {
523 const MUBUFInfo *Info =
524 getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
525 return Info ? Info->Opcode : -1;
526}
527
528int getMUBUFElements(unsigned Opc) {
529 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
530 return Info ? Info->elements : 0;
531}
532
533bool getMUBUFHasVAddr(unsigned Opc) {
534 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
535 return Info && Info->has_vaddr;
536}
537
538bool getMUBUFHasSrsrc(unsigned Opc) {
539 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
540 return Info && Info->has_srsrc;
541}
542
543bool getMUBUFHasSoffset(unsigned Opc) {
544 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
545 return Info && Info->has_soffset;
546}
547
548bool getMUBUFIsBufferInv(unsigned Opc) {
549 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
550 return Info && Info->IsBufferInv;
551}
552
553bool getMUBUFTfe(unsigned Opc) {
554 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
555 return Info && Info->tfe;
556}
557
558bool getSMEMIsBuffer(unsigned Opc) {
559 const SMInfo *Info = getSMEMOpcodeHelper(Opc);
560 return Info && Info->IsBuffer;
561}
562
563bool getVOP1IsSingle(unsigned Opc) {
564 const VOPInfo *Info = getVOP1OpcodeHelper(Opc);
565 return !Info || Info->IsSingle;
566}
567
568bool getVOP2IsSingle(unsigned Opc) {
569 const VOPInfo *Info = getVOP2OpcodeHelper(Opc);
570 return !Info || Info->IsSingle;
571}
572
573bool getVOP3IsSingle(unsigned Opc) {
574 const VOPInfo *Info = getVOP3OpcodeHelper(Opc);
575 return !Info || Info->IsSingle;
576}
577
578bool isVOPC64DPP(unsigned Opc) {
579 return isVOPC64DPPOpcodeHelper(Opc) || isVOPC64DPP8OpcodeHelper(Opc);
580}
581
582bool isVOPCAsmOnly(unsigned Opc) { return isVOPCAsmOnlyOpcodeHelper(Opc); }
583
584bool getMAIIsDGEMM(unsigned Opc) {
585 const MAIInstInfo *Info = getMAIInstInfoHelper(Opc);
586 return Info && Info->is_dgemm;
587}
588
589bool getMAIIsGFX940XDL(unsigned Opc) {
590 const MAIInstInfo *Info = getMAIInstInfoHelper(Opc);
591 return Info && Info->is_gfx940_xdl;
592}
593
594bool getWMMAIsXDL(unsigned Opc) {
595 const WMMAInstInfo *Info = getWMMAInstInfoHelper(Opc);
596 return Info ? Info->is_wmma_xdl : false;
597}
598
599bool getHasMatrixScale(unsigned Opc) {
600 const WMMAInstInfo *Info = getWMMAInstInfoHelper(Opc);
601 return Info && Info->HasMatrixScale;
602}
603
605 switch (EncodingVal) {
608 return 6;
610 return 4;
613 default:
614 return 8;
615 }
616
617 llvm_unreachable("covered switch over mfma scale formats");
618}
619
621 unsigned BLGP,
622 unsigned F8F8Opcode) {
623 uint8_t SrcANumRegs = mfmaScaleF8F6F4FormatToNumRegs(CBSZ);
624 uint8_t SrcBNumRegs = mfmaScaleF8F6F4FormatToNumRegs(BLGP);
625 return getMFMA_F8F6F4_InstWithNumRegs(SrcANumRegs, SrcBNumRegs, F8F8Opcode);
626}
627
629 switch (Fmt) {
632 return 16;
635 return 12;
637 return 8;
638 }
639
640 llvm_unreachable("covered switch over wmma scale formats");
641}
642
644 unsigned FmtB,
645 unsigned F8F8Opcode) {
646 uint8_t SrcANumRegs = wmmaScaleF8F6F4FormatToNumRegs(FmtA);
647 uint8_t SrcBNumRegs = wmmaScaleF8F6F4FormatToNumRegs(FmtB);
648 return getMFMA_F8F6F4_InstWithNumRegs(SrcANumRegs, SrcBNumRegs, F8F8Opcode);
649}
650
651bool isValidWMMAScaleFmtCombination(unsigned AFmt, unsigned AScale,
652 unsigned BFmt, unsigned BScale) {
653 auto isValid = [](unsigned Fmt, unsigned Scale) -> bool {
654 switch (Fmt) {
659 if (Scale != WMMA::MATRIX_SCALE_FMT_E8)
660 return false;
661 break;
663 if (Scale != WMMA::MATRIX_SCALE_FMT_E8 &&
666 return false;
667 break;
668 }
669 return true;
670 };
671
672 if (!isValid(AFmt, AScale) || !isValid(BFmt, BScale))
673 return false;
674
675 if (AFmt == WMMA::MATRIX_FMT_FP4 && BFmt == WMMA::MATRIX_FMT_FP4 &&
676 AScale != BScale)
677 return false;
678
679 return true;
680}
681
683 if (ST.hasFeature(AMDGPU::FeatureGFX13Insts))
685 if (ST.hasFeature(AMDGPU::FeatureGFX1250Insts))
687 if (ST.hasFeature(AMDGPU::FeatureGFX12Insts))
689 if (ST.hasFeature(AMDGPU::FeatureGFX11_7Insts))
691 if (ST.hasFeature(AMDGPU::FeatureGFX11Insts))
693 llvm_unreachable("Subtarget generation does not support VOPD!");
694}
695
696static constexpr unsigned getVOPDXYKey(unsigned VOPDOp, unsigned Subtarget,
697 bool VOPD3) {
698 return (VOPDOp << 5) | (Subtarget << 1) | (VOPD3 ? 1u : 0u);
699}
700
701// TODO: Ideally, the table should be emitted by the TableGen backend, however
702// this is currently not supported, so the direct lookup table is generated
703// manually here.
704constexpr unsigned VOPDXYKeyBits = 11;
705static constexpr std::array<CanBeVOPD, 1 << VOPDXYKeyBits> buildVOPDXYLookup() {
706 std::array<CanBeVOPD, 1 << VOPDXYKeyBits> Table{};
707 for (auto &E : Table)
708 E = {false, false};
709 for (const auto &E : VOPDXTable)
710 Table[getVOPDXYKey(E.VOPDOp, E.Subtarget, E.VOPD3)].X = true;
711 for (const auto &E : VOPDYTable)
712 Table[getVOPDXYKey(E.VOPDOp, E.Subtarget, E.VOPD3)].Y = true;
713 return Table;
714}
715
717
718CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3) {
719 bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(Opc) : 0;
720 Opc = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : Opc;
721 // Normalize through VOPDComponentTable so that e32 and e64 variants
722 // of the same logical opcode all share a single entry.
723 const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc);
724 if (!Info)
725 return {false, false};
726 return VOPDXYLookup[getVOPDXYKey(Info->VOPDOp, EncodingFamily, VOPD3)];
727}
728
729unsigned getVOPDOpcode(unsigned Opc, bool VOPD3) {
730 bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(Opc) : 0;
731 Opc = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : Opc;
732 const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc);
733 return Info ? Info->VOPDOp : ~0u;
734}
735
736bool isVOPD(unsigned Opc) {
737 return AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0X);
738}
739
740bool isMAC(unsigned Opc) {
741 return Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
742 Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
743 Opc == AMDGPU::V_MAC_F32_e64_vi ||
744 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
745 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
746 Opc == AMDGPU::V_MAC_F16_e64_vi ||
747 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
748 Opc == AMDGPU::V_FMAC_F64_e64_gfx12 ||
749 Opc == AMDGPU::V_FMAC_F64_e64_gfx13 ||
750 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
751 Opc == AMDGPU::V_FMAC_F32_e64_gfx11 ||
752 Opc == AMDGPU::V_FMAC_F32_e64_gfx12 ||
753 Opc == AMDGPU::V_FMAC_F32_e64_gfx13 ||
754 Opc == AMDGPU::V_FMAC_F32_e64_vi ||
755 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
756 Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
757 Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||
758 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11 ||
759 Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx11 ||
760 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx12 ||
761 Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx12 ||
762 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx13 ||
763 Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx13 ||
764 Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi ||
765 Opc == AMDGPU::V_DOT2C_F32_BF16_e64_vi ||
766 Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi ||
767 Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi ||
768 Opc == AMDGPU::V_DOT8C_I32_I4_e64_vi;
769}
770
771bool isPermlane16(unsigned Opc) {
772 return Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
773 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10 ||
774 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx11 ||
775 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx11 ||
776 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx12 ||
777 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx13 ||
778 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx12 ||
779 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx13 ||
780 Opc == AMDGPU::V_PERMLANE16_VAR_B32_e64_gfx12 ||
781 Opc == AMDGPU::V_PERMLANE16_VAR_B32_e64_gfx13 ||
782 Opc == AMDGPU::V_PERMLANEX16_VAR_B32_e64_gfx12 ||
783 Opc == AMDGPU::V_PERMLANEX16_VAR_B32_e64_gfx13;
784}
785
787 return Opc == AMDGPU::V_CVT_F32_BF8_e64_gfx12 ||
788 Opc == AMDGPU::V_CVT_F32_FP8_e64_gfx12 ||
789 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp_gfx12 ||
790 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp_gfx12 ||
791 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp8_gfx12 ||
792 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp8_gfx12 ||
793 Opc == AMDGPU::V_CVT_PK_F32_BF8_fake16_e64_gfx12 ||
794 Opc == AMDGPU::V_CVT_PK_F32_FP8_fake16_e64_gfx12 ||
795 Opc == AMDGPU::V_CVT_PK_F32_BF8_t16_e64_gfx12 ||
796 Opc == AMDGPU::V_CVT_PK_F32_FP8_t16_e64_gfx12;
797}
798
799bool isGenericAtomic(unsigned Opc) {
800 return Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP ||
801 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD ||
802 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB ||
803 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN ||
804 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN ||
805 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX ||
806 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX ||
807 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND ||
808 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR ||
809 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR ||
810 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC ||
811 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC ||
812 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD ||
813 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN ||
814 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX ||
815 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP ||
816 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB_CLAMP_U32 ||
817 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_COND_SUB_U32 ||
818 Opc == AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG;
819}
820
821bool isAsyncStore(unsigned Opc) {
822 return Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B8_gfx1250 ||
823 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B32_gfx1250 ||
824 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B64_gfx1250 ||
825 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B128_gfx1250 ||
826 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B8_SADDR_gfx1250 ||
827 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B32_SADDR_gfx1250 ||
828 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B64_SADDR_gfx1250 ||
829 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B128_SADDR_gfx1250;
830}
831
832bool isTensorStore(unsigned Opc) {
833 return Opc == TENSOR_STORE_FROM_LDS_d2_gfx1250 ||
834 Opc == TENSOR_STORE_FROM_LDS_d4_gfx1250;
835}
836
837unsigned getTemporalHintType(const MCInstrDesc TID) {
840 unsigned Opc = TID.getOpcode();
841 // Async and Tensor store should have the temporal hint type of TH_TYPE_STORE
842 if (TID.mayStore() &&
843 (isAsyncStore(Opc) || isTensorStore(Opc) || !TID.mayLoad()))
844 return CPol::TH_TYPE_STORE;
845
846 // This will default to returning TH_TYPE_LOAD when neither MayStore nor
847 // MayLoad flag is present which is the case with instructions like
848 // image_get_resinfo.
849 return CPol::TH_TYPE_LOAD;
850}
851
852bool isTrue16Inst(unsigned Opc) {
853 const VOPTrue16Info *Info = getTrue16OpcodeHelper(Opc);
854 return Info && Info->IsTrue16;
855}
856
858 const FP4FP8DstByteSelInfo *Info = getFP4FP8DstByteSelHelper(Opc);
859 if (!Info)
860 return FPType::None;
861 if (Info->HasFP8DstByteSel)
862 return FPType::FP8;
863 if (Info->HasFP4DstByteSel)
864 return FPType::FP4;
865
866 return FPType::None;
867}
868
869bool isDPMACCInstruction(unsigned Opc) {
870 const DPMACCInstructionInfo *Info = getDPMACCInstructionHelper(Opc);
871 return Info && Info->IsDPMACCInstruction;
872}
873
874unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) {
875 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom2AddrOpcode(Opc);
876 return Info ? Info->Opcode3Addr : ~0u;
877}
878
879unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc) {
880 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom3AddrOpcode(Opc);
881 return Info ? Info->Opcode2Addr : ~0u;
882}
883
884// Wrapper for Tablegen'd function. enum Subtarget is not defined in any
885// header files, so we need to wrap it in a function that takes unsigned
886// instead.
887int32_t getMCOpcode(uint32_t Opcode, unsigned Gen) {
888 return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
889}
890
891unsigned getBitOp2(unsigned Opc) {
892 switch (Opc) {
893 default:
894 return 0;
895 case AMDGPU::V_AND_B32_e32:
896 return 0x40;
897 case AMDGPU::V_OR_B32_e32:
898 return 0x54;
899 case AMDGPU::V_XOR_B32_e32:
900 return 0x14;
901 case AMDGPU::V_XNOR_B32_e32:
902 return 0x41;
903 }
904}
905
906int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily,
907 bool VOPD3) {
908 bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(OpY) : 0;
909 OpY = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : OpY;
910 const VOPDInfo *Info =
911 getVOPDInfoFromComponentOpcodes(OpX, OpY, EncodingFamily, VOPD3);
912 return Info ? Info->Opcode : -1;
913}
914
915std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode) {
916 const VOPDInfo *Info = getVOPDOpcodeHelper(VOPDOpcode);
917 assert(Info);
918 const auto *OpX = getVOPDBaseFromComponent(Info->OpX);
919 const auto *OpY = getVOPDBaseFromComponent(Info->OpY);
920 assert(OpX && OpY);
921 return {OpX->BaseVOP, OpY->BaseVOP};
922}
923
924namespace VOPD {
925
926ComponentProps::ComponentProps(const MCInstrDesc &OpDesc, bool VOP3Layout) {
928
931 auto TiedIdx = OpDesc.getOperandConstraint(Component::SRC2, MCOI::TIED_TO);
932 assert(TiedIdx == -1 || TiedIdx == Component::DST);
933 HasSrc2Acc = TiedIdx != -1;
934 Opcode = OpDesc.getOpcode();
935
936 IsVOP3 = VOP3Layout || (OpDesc.TSFlags & SIInstrFlags::VOP3);
937 SrcOperandsNum = AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src2) ? 3
938 : AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm) ? 3
939 : AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src1) ? 2
940 : 1;
941 assert(SrcOperandsNum <= Component::MAX_SRC_NUM);
942
943 if (Opcode == AMDGPU::V_CNDMASK_B32_e32 ||
944 Opcode == AMDGPU::V_CNDMASK_B32_e64) {
945 // CNDMASK is an awkward exception, it has FP modifiers, but not FP
946 // operands.
947 NumVOPD3Mods = 2;
948 if (IsVOP3)
949 SrcOperandsNum = 3;
950 } else if (isSISrcFPOperand(OpDesc,
951 getNamedOperandIdx(Opcode, OpName::src0))) {
952 // All FP VOPD instructions have Neg modifiers for all operands except
953 // for tied src2.
954 NumVOPD3Mods = SrcOperandsNum;
955 if (HasSrc2Acc)
956 --NumVOPD3Mods;
957 }
958
959 if (OpDesc.TSFlags & SIInstrFlags::VOP3)
960 return;
961
962 auto OperandsNum = OpDesc.getNumOperands();
963 unsigned CompOprIdx;
964 for (CompOprIdx = Component::SRC1; CompOprIdx < OperandsNum; ++CompOprIdx) {
965 if (OpDesc.operands()[CompOprIdx].OperandType == AMDGPU::OPERAND_KIMM32) {
966 MandatoryLiteralIdx = CompOprIdx;
967 break;
968 }
969 }
970}
971
973 return getNamedOperandIdx(Opcode, OpName::bitop3);
974}
975
976unsigned ComponentInfo::getIndexInParsedOperands(unsigned CompOprIdx) const {
977 assert(CompOprIdx < Component::MAX_OPR_NUM);
978
979 if (CompOprIdx == Component::DST)
981
982 auto CompSrcIdx = CompOprIdx - Component::DST_NUM;
983 if (CompSrcIdx < getCompParsedSrcOperandsNum())
984 return getIndexOfSrcInParsedOperands(CompSrcIdx);
985
986 // The specified operand does not exist.
987 return 0;
988}
989
991 std::function<MCRegister(unsigned, unsigned)> GetRegIdx,
992 const MCRegisterInfo &MRI, bool SkipSrc, bool AllowSameVGPR,
993 bool VOPD3) const {
994
995 auto OpXRegs = getRegIndices(ComponentIndex::X, GetRegIdx,
996 CompInfo[ComponentIndex::X].isVOP3());
997 auto OpYRegs = getRegIndices(ComponentIndex::Y, GetRegIdx,
998 CompInfo[ComponentIndex::Y].isVOP3());
999
1000 const auto banksOverlap = [&MRI](MCRegister X, MCRegister Y,
1001 unsigned BanksMask) -> bool {
1002 MCRegister BaseX = MRI.getSubReg(X, AMDGPU::sub0);
1003 MCRegister BaseY = MRI.getSubReg(Y, AMDGPU::sub0);
1004 if (!BaseX)
1005 BaseX = X;
1006 if (!BaseY)
1007 BaseY = Y;
1008 if ((BaseX.id() & BanksMask) == (BaseY.id() & BanksMask))
1009 return true;
1010 if (BaseX != X /* This is 64-bit register */ &&
1011 ((BaseX.id() + 1) & BanksMask) == (BaseY.id() & BanksMask))
1012 return true;
1013 if (BaseY != Y &&
1014 (BaseX.id() & BanksMask) == ((BaseY.id() + 1) & BanksMask))
1015 return true;
1016
1017 // If both are 64-bit bank conflict will be detected yet while checking
1018 // the first subreg.
1019 return false;
1020 };
1021
1022 unsigned CompOprIdx;
1023 for (CompOprIdx = 0; CompOprIdx < Component::MAX_OPR_NUM; ++CompOprIdx) {
1024 unsigned BanksMasks = VOPD3 ? VOPD3_VGPR_BANK_MASKS[CompOprIdx]
1025 : VOPD_VGPR_BANK_MASKS[CompOprIdx];
1026 if (!OpXRegs[CompOprIdx] || !OpYRegs[CompOprIdx])
1027 continue;
1028
1029 if (getVGPREncodingMSBs(OpXRegs[CompOprIdx], MRI) !=
1030 getVGPREncodingMSBs(OpYRegs[CompOprIdx], MRI))
1031 return CompOprIdx;
1032
1033 if (SkipSrc && CompOprIdx >= Component::DST_NUM)
1034 continue;
1035
1036 if (CompOprIdx < Component::DST_NUM) {
1037 // Even if we do not check vdst parity, vdst operands still shall not
1038 // overlap.
1039 if (MRI.regsOverlap(OpXRegs[CompOprIdx], OpYRegs[CompOprIdx]))
1040 return CompOprIdx;
1041 if (VOPD3) // No need to check dst parity.
1042 continue;
1043 }
1044
1045 if (banksOverlap(OpXRegs[CompOprIdx], OpYRegs[CompOprIdx], BanksMasks) &&
1046 (!AllowSameVGPR || CompOprIdx < Component::DST_NUM ||
1047 OpXRegs[CompOprIdx] != OpYRegs[CompOprIdx]))
1048 return CompOprIdx;
1049 }
1050
1051 return {};
1052}
1053
1054// Return an array of VGPR registers [DST,SRC0,SRC1,SRC2] used
1055// by the specified component. If an operand is unused
1056// or is not a VGPR, the corresponding value is 0.
1057//
1058// GetRegIdx(Component, MCOperandIdx) must return a VGPR register index
1059// for the specified component and MC operand. The callback must return 0
1060// if the operand is not a register or not a VGPR.
1062InstInfo::getRegIndices(unsigned CompIdx,
1063 std::function<MCRegister(unsigned, unsigned)> GetRegIdx,
1064 bool VOPD3) const {
1065 assert(CompIdx < COMPONENTS_NUM);
1066
1067 const auto &Comp = CompInfo[CompIdx];
1069
1070 RegIndices[DST] = GetRegIdx(CompIdx, Comp.getIndexOfDstInMCOperands());
1071
1072 for (unsigned CompOprIdx : {SRC0, SRC1, SRC2}) {
1073 unsigned CompSrcIdx = CompOprIdx - DST_NUM;
1074 RegIndices[CompOprIdx] =
1075 Comp.hasRegSrcOperand(CompSrcIdx)
1076 ? GetRegIdx(CompIdx,
1077 Comp.getIndexOfSrcInMCOperands(CompSrcIdx, VOPD3))
1078 : MCRegister();
1079 }
1080 return RegIndices;
1081}
1082
1083} // namespace VOPD
1084
1086 return VOPD::InstInfo(OpX, OpY);
1087}
1088
1090 const MCInstrInfo *InstrInfo) {
1091 auto [OpX, OpY] = getVOPDComponents(VOPDOpcode);
1092 const auto &OpXDesc = InstrInfo->get(OpX);
1093 const auto &OpYDesc = InstrInfo->get(OpY);
1094 bool VOPD3 = InstrInfo->get(VOPDOpcode).TSFlags & SIInstrFlags::VOPD3;
1096 VOPD::ComponentInfo OpYInfo(OpYDesc, OpXInfo, VOPD3);
1097 return VOPD::InstInfo(OpXInfo, OpYInfo);
1098}
1099
1100namespace IsaInfo {
1101
1103 : STI(STI), XnackSetting(TargetIDSetting::Any),
1104 SramEccSetting(TargetIDSetting::Any) {
1105 if (!STI.getFeatureBits().test(FeatureSupportsXNACK))
1106 XnackSetting = TargetIDSetting::Unsupported;
1107 if (!STI.getFeatureBits().test(FeatureSupportsSRAMECC))
1108 SramEccSetting = TargetIDSetting::Unsupported;
1109}
1110
1112 // Check if xnack or sramecc is explicitly enabled or disabled. In the
1113 // absence of the target features we assume we must generate code that can run
1114 // in any environment.
1115 SubtargetFeatures Features(FS);
1116 std::optional<bool> XnackRequested;
1117 std::optional<bool> SramEccRequested;
1118
1119 for (const std::string &Feature : Features.getFeatures()) {
1120 if (Feature == "+xnack")
1121 XnackRequested = true;
1122 else if (Feature == "-xnack")
1123 XnackRequested = false;
1124 else if (Feature == "+sramecc")
1125 SramEccRequested = true;
1126 else if (Feature == "-sramecc")
1127 SramEccRequested = false;
1128 }
1129
1130 bool XnackSupported = isXnackSupported();
1131 bool SramEccSupported = isSramEccSupported();
1132
1133 if (XnackRequested) {
1134 if (XnackSupported) {
1135 XnackSetting =
1136 *XnackRequested ? TargetIDSetting::On : TargetIDSetting::Off;
1137 } else {
1138 // If a specific xnack setting was requested and this GPU does not support
1139 // xnack emit a warning. Setting will remain set to "Unsupported".
1140 if (*XnackRequested) {
1141 errs() << "warning: xnack 'On' was requested for a processor that does "
1142 "not support it!\n";
1143 } else {
1144 errs() << "warning: xnack 'Off' was requested for a processor that "
1145 "does not support it!\n";
1146 }
1147 }
1148 }
1149
1150 if (SramEccRequested) {
1151 if (SramEccSupported) {
1152 SramEccSetting =
1153 *SramEccRequested ? TargetIDSetting::On : TargetIDSetting::Off;
1154 } else {
1155 // If a specific sramecc setting was requested and this GPU does not
1156 // support sramecc emit a warning. Setting will remain set to
1157 // "Unsupported".
1158 if (*SramEccRequested) {
1159 errs() << "warning: sramecc 'On' was requested for a processor that "
1160 "does not support it!\n";
1161 } else {
1162 errs() << "warning: sramecc 'Off' was requested for a processor that "
1163 "does not support it!\n";
1164 }
1165 }
1166 }
1167}
1168
1169static TargetIDSetting
1171 if (FeatureString.ends_with("-"))
1172 return TargetIDSetting::Off;
1173 if (FeatureString.ends_with("+"))
1174 return TargetIDSetting::On;
1175
1176 llvm_unreachable("Malformed feature string");
1177}
1178
1180 SmallVector<StringRef, 3> TargetIDSplit;
1181 TargetID.split(TargetIDSplit, ':');
1182
1183 for (const auto &FeatureString : TargetIDSplit) {
1184 if (FeatureString.starts_with("xnack"))
1185 XnackSetting = getTargetIDSettingFromFeatureString(FeatureString);
1186 if (FeatureString.starts_with("sramecc"))
1187 SramEccSetting = getTargetIDSettingFromFeatureString(FeatureString);
1188 }
1189}
1190
1191void AMDGPUTargetID::print(raw_ostream &StreamRep) const {
1192 const Triple &TargetTriple = STI.getTargetTriple();
1193 auto Version = getIsaVersion(STI.getCPU());
1194
1195 StreamRep << TargetTriple.getArchName() << '-' << TargetTriple.getVendorName()
1196 << '-' << TargetTriple.getOSName() << '-'
1197 << TargetTriple.getEnvironmentName() << '-';
1198
1199 std::string Processor;
1200 // TODO: Following else statement is present here because we used various
1201 // alias names for GPUs up until GFX9 (e.g. 'fiji' is same as 'gfx803').
1202 // Remove once all aliases are removed from GCNProcessors.td.
1203 if (Version.Major >= 9)
1204 Processor = STI.getCPU().str();
1205 else
1206 Processor = (Twine("gfx") + Twine(Version.Major) + Twine(Version.Minor) +
1207 Twine(Version.Stepping))
1208 .str();
1209
1210 std::string Features;
1211 if (TargetTriple.getOS() == Triple::AMDHSA) {
1212 // sramecc.
1214 Features += ":sramecc-";
1216 Features += ":sramecc+";
1217 // xnack.
1219 Features += ":xnack-";
1221 Features += ":xnack+";
1222 }
1223
1224 StreamRep << Processor << Features;
1225}
1226
1227std::string AMDGPUTargetID::toString() const {
1228 std::string Str;
1229 raw_string_ostream OS(Str);
1230 OS << *this;
1231 return Str;
1232}
1233
1235 if (STI.getFeatureBits().test(FeatureInstCacheLineSize128))
1236 return 128;
1237 if (STI.getFeatureBits().test(FeatureInstCacheLineSize64))
1238 return 64;
1239 return 64;
1240}
1241
1242unsigned getWavefrontSize(const MCSubtargetInfo &STI) {
1243 if (STI.getFeatureBits().test(FeatureWavefrontSize16))
1244 return 16;
1245 if (STI.getFeatureBits().test(FeatureWavefrontSize32))
1246 return 32;
1247
1248 return 64;
1249}
1250
1252 unsigned BytesPerCU = getAddressableLocalMemorySize(STI);
1253
1254 // "Per CU" really means "per whatever functional block the waves of a
1255 // workgroup must share". So the effective local memory size is doubled in
1256 // WGP mode on gfx10.
1257 if (isGFX10Plus(STI) && !STI.getFeatureBits().test(FeatureCuMode))
1258 BytesPerCU *= 2;
1259
1260 return BytesPerCU;
1261}
1262
1264 if (STI.getFeatureBits().test(FeatureAddressableLocalMemorySize32768))
1265 return 32768;
1266 if (STI.getFeatureBits().test(FeatureAddressableLocalMemorySize65536))
1267 return 65536;
1268 if (STI.getFeatureBits().test(FeatureAddressableLocalMemorySize163840))
1269 return 163840;
1270 if (STI.getFeatureBits().test(FeatureAddressableLocalMemorySize327680))
1271 return 327680;
1272 return 32768;
1273}
1274
1275unsigned getEUsPerCU(const MCSubtargetInfo &STI) {
1276 // "Per CU" really means "per whatever functional block the waves of a
1277 // workgroup must share".
1278
1279 // GFX12.5 only supports CU mode, which contains four SIMDs.
1280 if (isGFX1250(STI)) {
1281 assert(STI.getFeatureBits().test(FeatureCuMode));
1282 return 4;
1283 }
1284
1285 // For gfx10 in CU mode the functional block is the CU, which contains
1286 // two SIMDs.
1287 if (isGFX10Plus(STI) && STI.getFeatureBits().test(FeatureCuMode))
1288 return 2;
1289
1290 // Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP
1291 // contains two CUs, so a total of four SIMDs.
1292 return 4;
1293}
1294
1296 unsigned FlatWorkGroupSize) {
1297 assert(FlatWorkGroupSize != 0);
1298 if (!STI.getTargetTriple().isAMDGCN())
1299 return 8;
1300 unsigned MaxWaves = getMaxWavesPerEU(STI) * getEUsPerCU(STI);
1301 unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
1302 if (N == 1) {
1303 // Single-wave workgroups don't consume barrier resources.
1304 return MaxWaves;
1305 }
1306
1307 unsigned MaxBarriers = 16;
1308 if (isGFX10Plus(STI) && !STI.getFeatureBits().test(FeatureCuMode))
1309 MaxBarriers = 32;
1310
1311 return std::min(MaxWaves / N, MaxBarriers);
1312}
1313
1314unsigned getMinWavesPerEU(const MCSubtargetInfo &STI) { return 1; }
1315
1316unsigned getMaxWavesPerEU(const MCSubtargetInfo &STI) {
1317 // FIXME: Need to take scratch memory into account.
1318 if (isGFX90A(STI))
1319 return 8;
1320 if (!isGFX10Plus(STI))
1321 return 10;
1322 return hasGFX10_3Insts(STI) ? 16 : 20;
1323}
1324
1326 unsigned FlatWorkGroupSize) {
1327 return divideCeil(getWavesPerWorkGroup(STI, FlatWorkGroupSize),
1328 getEUsPerCU(STI));
1329}
1330
1331unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo &STI) { return 1; }
1332
1334 unsigned FlatWorkGroupSize) {
1335 return divideCeil(FlatWorkGroupSize, getWavefrontSize(STI));
1336}
1337
1340 if (Version.Major >= 10)
1341 return getAddressableNumSGPRs(STI);
1342 if (Version.Major >= 8)
1343 return 16;
1344 return 8;
1345}
1346
1347unsigned getSGPREncodingGranule(const MCSubtargetInfo &STI) { return 8; }
1348
1349unsigned getTotalNumSGPRs(const MCSubtargetInfo &STI) {
1351 if (Version.Major >= 8)
1352 return 800;
1353 return 512;
1354}
1355
1357 if (STI.getFeatureBits().test(FeatureSGPRInitBug))
1359
1361 if (Version.Major >= 10)
1362 return 106;
1363 if (Version.Major >= 8)
1364 return 102;
1365 return 104;
1366}
1367
1368unsigned getMinNumSGPRs(const MCSubtargetInfo &STI, unsigned WavesPerEU) {
1369 assert(WavesPerEU != 0);
1370
1372 if (Version.Major >= 10)
1373 return 0;
1374
1375 if (WavesPerEU >= getMaxWavesPerEU(STI))
1376 return 0;
1377
1378 unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
1379 if (STI.getFeatureBits().test(FeatureTrapHandler))
1380 MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
1381 MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1;
1382 return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI));
1383}
1384
1385unsigned getMaxNumSGPRs(const MCSubtargetInfo &STI, unsigned WavesPerEU,
1386 bool Addressable) {
1387 assert(WavesPerEU != 0);
1388
1389 unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
1391 if (Version.Major >= 10)
1392 return Addressable ? AddressableNumSGPRs : 108;
1393 if (Version.Major >= 8 && !Addressable)
1394 AddressableNumSGPRs = 112;
1395 unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
1396 if (STI.getFeatureBits().test(FeatureTrapHandler))
1397 MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
1398 MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI));
1399 return std::min(MaxNumSGPRs, AddressableNumSGPRs);
1400}
1401
1402unsigned getNumExtraSGPRs(const MCSubtargetInfo &STI, bool VCCUsed,
1403 bool FlatScrUsed, bool XNACKUsed) {
1404 unsigned ExtraSGPRs = 0;
1405 if (VCCUsed)
1406 ExtraSGPRs = 2;
1407
1409 if (Version.Major >= 10)
1410 return ExtraSGPRs;
1411
1412 if (Version.Major < 8) {
1413 if (FlatScrUsed)
1414 ExtraSGPRs = 4;
1415 } else {
1416 if (XNACKUsed)
1417 ExtraSGPRs = 4;
1418
1419 if (FlatScrUsed ||
1420 STI.getFeatureBits().test(AMDGPU::FeatureArchitectedFlatScratch))
1421 ExtraSGPRs = 6;
1422 }
1423
1424 return ExtraSGPRs;
1425}
1426
1427unsigned getNumExtraSGPRs(const MCSubtargetInfo &STI, bool VCCUsed,
1428 bool FlatScrUsed) {
1429 return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
1430 STI.getFeatureBits().test(AMDGPU::FeatureXNACK));
1431}
1432
1433static unsigned getGranulatedNumRegisterBlocks(unsigned NumRegs,
1434 unsigned Granule) {
1435 return divideCeil(std::max(1u, NumRegs), Granule);
1436}
1437
1438unsigned getNumSGPRBlocks(const MCSubtargetInfo &STI, unsigned NumSGPRs) {
1439 // SGPRBlocks is actual number of SGPR blocks minus 1.
1441 1;
1442}
1443
1445 unsigned DynamicVGPRBlockSize,
1446 std::optional<bool> EnableWavefrontSize32) {
1447 if (STI.getFeatureBits().test(FeatureGFX90AInsts))
1448 return 8;
1449
1450 if (DynamicVGPRBlockSize != 0)
1451 return DynamicVGPRBlockSize;
1452
1453 bool IsWave32 = EnableWavefrontSize32
1454 ? *EnableWavefrontSize32
1455 : STI.getFeatureBits().test(FeatureWavefrontSize32);
1456
1457 if (STI.getFeatureBits().test(Feature1536VGPRs))
1458 return IsWave32 ? 24 : 12;
1459
1460 if (hasGFX10_3Insts(STI))
1461 return IsWave32 ? 16 : 8;
1462
1463 return IsWave32 ? 8 : 4;
1464}
1465
1467 std::optional<bool> EnableWavefrontSize32) {
1468 if (STI.getFeatureBits().test(FeatureGFX90AInsts))
1469 return 8;
1470
1471 bool IsWave32 = EnableWavefrontSize32
1472 ? *EnableWavefrontSize32
1473 : STI.getFeatureBits().test(FeatureWavefrontSize32);
1474
1475 if (STI.getFeatureBits().test(Feature1024AddressableVGPRs))
1476 return IsWave32 ? 16 : 8;
1477
1478 return IsWave32 ? 8 : 4;
1479}
1480
1481unsigned getArchVGPRAllocGranule() { return 4; }
1482
1483unsigned getTotalNumVGPRs(const MCSubtargetInfo &STI) {
1484 if (STI.getFeatureBits().test(FeatureGFX90AInsts))
1485 return 512;
1486 if (!isGFX10Plus(STI))
1487 return 256;
1488 bool IsWave32 = STI.getFeatureBits().test(FeatureWavefrontSize32);
1489 if (STI.getFeatureBits().test(Feature1536VGPRs))
1490 return IsWave32 ? 1536 : 768;
1491 return IsWave32 ? 1024 : 512;
1492}
1493
1495 const auto &Features = STI.getFeatureBits();
1496 if (Features.test(Feature1024AddressableVGPRs))
1497 return Features.test(FeatureWavefrontSize32) ? 1024 : 512;
1498 return 256;
1499}
1500
1502 unsigned DynamicVGPRBlockSize) {
1503 const auto &Features = STI.getFeatureBits();
1504 if (Features.test(FeatureGFX90AInsts))
1505 return 512;
1506
1507 if (DynamicVGPRBlockSize != 0) {
1508 // On GFX12 we can allocate at most MaxDynamicVGPRBlocks blocks of VGPRs.
1509 return MaxDynamicVGPRBlocks *
1510 getVGPRAllocGranule(STI, DynamicVGPRBlockSize);
1511 }
1512 return getAddressableNumArchVGPRs(STI);
1513}
1514
1516 unsigned NumVGPRs,
1517 unsigned DynamicVGPRBlockSize) {
1519 NumVGPRs, getVGPRAllocGranule(STI, DynamicVGPRBlockSize),
1521}
1522
1523unsigned getNumWavesPerEUWithNumVGPRs(unsigned NumVGPRs, unsigned Granule,
1524 unsigned MaxWaves,
1525 unsigned TotalNumVGPRs) {
1526 if (NumVGPRs < Granule)
1527 return MaxWaves;
1528 unsigned RoundedRegs = alignTo(NumVGPRs, Granule);
1529 return std::min(std::max(TotalNumVGPRs / RoundedRegs, 1u), MaxWaves);
1530}
1531
1532unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves,
1534 if (Gen >= AMDGPUSubtarget::GFX10)
1535 return MaxWaves;
1536
1538 if (SGPRs <= 80)
1539 return 10;
1540 if (SGPRs <= 88)
1541 return 9;
1542 if (SGPRs <= 100)
1543 return 8;
1544 return 7;
1545 }
1546 if (SGPRs <= 48)
1547 return 10;
1548 if (SGPRs <= 56)
1549 return 9;
1550 if (SGPRs <= 64)
1551 return 8;
1552 if (SGPRs <= 72)
1553 return 7;
1554 if (SGPRs <= 80)
1555 return 6;
1556 return 5;
1557}
1558
1559unsigned getMinNumVGPRs(const MCSubtargetInfo &STI, unsigned WavesPerEU,
1560 unsigned DynamicVGPRBlockSize) {
1561 assert(WavesPerEU != 0);
1562
1563 // In dynamic VGPR mode, (static) occupancy does not depend on VGPR usage,
1564 // so getMaxNumVGPRs does not depend on WavesPerEU, and thus we need to return
1565 // zero because there is no nonzero VGPR usage N where going below N
1566 // achieves higher (static) occupancy.
1567 bool DynamicVGPREnabled = (DynamicVGPRBlockSize != 0);
1568 if (DynamicVGPREnabled)
1569 return 0;
1570
1571 unsigned MaxWavesPerEU = getMaxWavesPerEU(STI);
1572 if (WavesPerEU >= MaxWavesPerEU)
1573 return 0;
1574
1575 unsigned TotNumVGPRs = getTotalNumVGPRs(STI);
1576 unsigned AddrsableNumVGPRs =
1577 getAddressableNumVGPRs(STI, DynamicVGPRBlockSize);
1578 unsigned Granule = getVGPRAllocGranule(STI, DynamicVGPRBlockSize);
1579 unsigned MaxNumVGPRs = alignDown(TotNumVGPRs / WavesPerEU, Granule);
1580
1581 if (MaxNumVGPRs == alignDown(TotNumVGPRs / MaxWavesPerEU, Granule))
1582 return 0;
1583
1584 unsigned MinWavesPerEU = getNumWavesPerEUWithNumVGPRs(STI, AddrsableNumVGPRs,
1585 DynamicVGPRBlockSize);
1586 if (WavesPerEU < MinWavesPerEU)
1587 return getMinNumVGPRs(STI, MinWavesPerEU, DynamicVGPRBlockSize);
1588
1589 unsigned MaxNumVGPRsNext = alignDown(TotNumVGPRs / (WavesPerEU + 1), Granule);
1590 unsigned MinNumVGPRs = 1 + std::min(MaxNumVGPRs - Granule, MaxNumVGPRsNext);
1591 return std::min(MinNumVGPRs, AddrsableNumVGPRs);
1592}
1593
1594unsigned getMaxNumVGPRs(const MCSubtargetInfo &STI, unsigned WavesPerEU,
1595 unsigned DynamicVGPRBlockSize) {
1596 assert(WavesPerEU != 0);
1597
1598 // In dynamic VGPR mode, WavesPerEU does not imply a VGPR limit.
1599 bool DynamicVGPREnabled = (DynamicVGPRBlockSize != 0);
1600 unsigned MaxNumVGPRs =
1601 DynamicVGPREnabled
1602 ? getTotalNumVGPRs(STI)
1603 : alignDown(getTotalNumVGPRs(STI) / WavesPerEU,
1604 getVGPRAllocGranule(STI, DynamicVGPRBlockSize));
1605 unsigned AddressableNumVGPRs =
1606 getAddressableNumVGPRs(STI, DynamicVGPRBlockSize);
1607 return std::min(MaxNumVGPRs, AddressableNumVGPRs);
1608}
1609
1610unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo &STI, unsigned NumVGPRs,
1611 std::optional<bool> EnableWavefrontSize32) {
1613 NumVGPRs, getVGPREncodingGranule(STI, EnableWavefrontSize32)) -
1614 1;
1615}
1616
1618 unsigned NumVGPRs,
1619 unsigned DynamicVGPRBlockSize,
1620 std::optional<bool> EnableWavefrontSize32) {
1622 NumVGPRs,
1623 getVGPRAllocGranule(STI, DynamicVGPRBlockSize, EnableWavefrontSize32));
1624}
1625} // end namespace IsaInfo
1626
1628 const MCSubtargetInfo &STI) {
1630 KernelCode.amd_kernel_code_version_major = 1;
1631 KernelCode.amd_kernel_code_version_minor = 2;
1632 KernelCode.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
1633 KernelCode.amd_machine_version_major = Version.Major;
1634 KernelCode.amd_machine_version_minor = Version.Minor;
1635 KernelCode.amd_machine_version_stepping = Version.Stepping;
1637 if (STI.getFeatureBits().test(FeatureWavefrontSize32)) {
1638 KernelCode.wavefront_size = 5;
1640 } else {
1641 KernelCode.wavefront_size = 6;
1642 }
1643
1644 // If the code object does not support indirect functions, then the value must
1645 // be 0xffffffff.
1646 KernelCode.call_convention = -1;
1647
1648 // These alignment values are specified in powers of two, so alignment =
1649 // 2^n. The minimum alignment is 2^4 = 16.
1650 KernelCode.kernarg_segment_alignment = 4;
1651 KernelCode.group_segment_alignment = 4;
1652 KernelCode.private_segment_alignment = 4;
1653
1654 if (Version.Major >= 10) {
1655 KernelCode.compute_pgm_resource_registers |=
1656 S_00B848_WGP_MODE(STI.getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
1658 }
1659}
1660
1663}
1664
1667}
1668
1670 unsigned AS = GV->getAddressSpace();
1671 return AS == AMDGPUAS::CONSTANT_ADDRESS ||
1673}
1674
1676 return TT.getArch() == Triple::r600;
1677}
1678
1679static bool isValidRegPrefix(char C) {
1680 return C == 'v' || C == 's' || C == 'a';
1681}
1682
1683std::tuple<char, unsigned, unsigned> parseAsmPhysRegName(StringRef RegName) {
1684 char Kind = RegName.front();
1685 if (!isValidRegPrefix(Kind))
1686 return {};
1687
1688 RegName = RegName.drop_front();
1689 if (RegName.consume_front("[")) {
1690 unsigned Idx, End;
1691 bool Failed = RegName.consumeInteger(10, Idx);
1692 Failed |= !RegName.consume_front(":");
1693 Failed |= RegName.consumeInteger(10, End);
1694 Failed |= !RegName.consume_back("]");
1695 if (!Failed) {
1696 unsigned NumRegs = End - Idx + 1;
1697 if (NumRegs > 1)
1698 return {Kind, Idx, NumRegs};
1699 }
1700 } else {
1701 unsigned Idx;
1702 bool Failed = RegName.getAsInteger(10, Idx);
1703 if (!Failed)
1704 return {Kind, Idx, 1};
1705 }
1706
1707 return {};
1708}
1709
1710std::tuple<char, unsigned, unsigned>
1712 StringRef RegName = Constraint;
1713 if (!RegName.consume_front("{") || !RegName.consume_back("}"))
1714 return {};
1716}
1717
1718std::pair<unsigned, unsigned>
1720 std::pair<unsigned, unsigned> Default,
1721 bool OnlyFirstRequired) {
1722 if (auto Attr = getIntegerPairAttribute(F, Name, OnlyFirstRequired))
1723 return {Attr->first, Attr->second.value_or(Default.second)};
1724 return Default;
1725}
1726
1727std::optional<std::pair<unsigned, std::optional<unsigned>>>
1729 bool OnlyFirstRequired) {
1730 Attribute A = F.getFnAttribute(Name);
1731 if (!A.isStringAttribute())
1732 return std::nullopt;
1733
1734 LLVMContext &Ctx = F.getContext();
1735 std::pair<unsigned, std::optional<unsigned>> Ints;
1736 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
1737 if (Strs.first.trim().getAsInteger(0, Ints.first)) {
1738 Ctx.emitError("can't parse first integer attribute " + Name);
1739 return std::nullopt;
1740 }
1741 unsigned Second = 0;
1742 if (Strs.second.trim().getAsInteger(0, Second)) {
1743 if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
1744 Ctx.emitError("can't parse second integer attribute " + Name);
1745 return std::nullopt;
1746 }
1747 } else {
1748 Ints.second = Second;
1749 }
1750
1751 return Ints;
1752}
1753
1755 unsigned Size,
1756 unsigned DefaultVal) {
1757 std::optional<SmallVector<unsigned>> R =
1759 return R.has_value() ? *R : SmallVector<unsigned>(Size, DefaultVal);
1760}
1761
1762std::optional<SmallVector<unsigned>>
1764 assert(Size > 2);
1765 LLVMContext &Ctx = F.getContext();
1766
1767 Attribute A = F.getFnAttribute(Name);
1768 if (!A.isValid())
1769 return std::nullopt;
1770 if (!A.isStringAttribute()) {
1771 Ctx.emitError(Name + " is not a string attribute");
1772 return std::nullopt;
1773 }
1774
1776
1777 StringRef S = A.getValueAsString();
1778 unsigned i = 0;
1779 for (; !S.empty() && i < Size; i++) {
1780 std::pair<StringRef, StringRef> Strs = S.split(',');
1781 unsigned IntVal;
1782 if (Strs.first.trim().getAsInteger(0, IntVal)) {
1783 Ctx.emitError("can't parse integer attribute " + Strs.first + " in " +
1784 Name);
1785 return std::nullopt;
1786 }
1787 Vals[i] = IntVal;
1788 S = Strs.second;
1789 }
1790
1791 if (!S.empty() || i < Size) {
1792 Ctx.emitError("attribute " + Name +
1793 " has incorrect number of integers; expected " +
1795 return std::nullopt;
1796 }
1797 return Vals;
1798}
1799
1800bool hasValueInRangeLikeMetadata(const MDNode &MD, int64_t Val) {
1801 assert((MD.getNumOperands() % 2 == 0) && "invalid number of operands!");
1802 for (unsigned I = 0, E = MD.getNumOperands() / 2; I != E; ++I) {
1803 auto Low =
1804 mdconst::extract<ConstantInt>(MD.getOperand(2 * I + 0))->getValue();
1805 auto High =
1806 mdconst::extract<ConstantInt>(MD.getOperand(2 * I + 1))->getValue();
1807 // There are two types of [A; B) ranges:
1808 // A < B, e.g. [4; 5) which is a range that only includes 4.
1809 // A > B, e.g. [5; 4) which is a range that wraps around and includes
1810 // everything except 4.
1811 if (Low.ult(High)) {
1812 if (Low.ule(Val) && High.ugt(Val))
1813 return true;
1814 } else {
1815 if (Low.uge(Val) && High.ult(Val))
1816 return true;
1817 }
1818 }
1819
1820 return false;
1821}
1822
1824 return (1 << (getVmcntBitWidthLo(Version.Major) +
1825 getVmcntBitWidthHi(Version.Major))) -
1826 1;
1827}
1828
1830 return (1 << getLoadcntBitWidth(Version.Major)) - 1;
1831}
1832
1834 return (1 << getSamplecntBitWidth(Version.Major)) - 1;
1835}
1836
1838 return (1 << getBvhcntBitWidth(Version.Major)) - 1;
1839}
1840
1842 return (1 << getExpcntBitWidth(Version.Major)) - 1;
1843}
1844
1846 return (1 << getLgkmcntBitWidth(Version.Major)) - 1;
1847}
1848
1850 return (1 << getDscntBitWidth(Version.Major)) - 1;
1851}
1852
1854 return (1 << getKmcntBitWidth(Version.Major)) - 1;
1855}
1856
1858 return (1 << getXcntBitWidth(Version.Major, Version.Minor)) - 1;
1859}
1860
1862 return (1 << getAsynccntBitWidth(Version.Major, Version.Minor)) - 1;
1863}
1864
1866 return (1 << getStorecntBitWidth(Version.Major)) - 1;
1867}
1868
1870 bool HasExtendedWaitCounts = IV.Major >= 12;
1871 if (HasExtendedWaitCounts) {
1874 } else {
1877 }
1887}
1888
1890 unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(Version.Major),
1891 getVmcntBitWidthLo(Version.Major));
1892 unsigned Expcnt = getBitMask(getExpcntBitShift(Version.Major),
1893 getExpcntBitWidth(Version.Major));
1894 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(Version.Major),
1895 getLgkmcntBitWidth(Version.Major));
1896 unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(Version.Major),
1897 getVmcntBitWidthHi(Version.Major));
1898 return VmcntLo | Expcnt | Lgkmcnt | VmcntHi;
1899}
1900
1901unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1902 unsigned VmcntLo = unpackBits(Waitcnt, getVmcntBitShiftLo(Version.Major),
1903 getVmcntBitWidthLo(Version.Major));
1904 unsigned VmcntHi = unpackBits(Waitcnt, getVmcntBitShiftHi(Version.Major),
1905 getVmcntBitWidthHi(Version.Major));
1906 return VmcntLo | VmcntHi << getVmcntBitWidthLo(Version.Major);
1907}
1908
1909unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
1910 return unpackBits(Waitcnt, getExpcntBitShift(Version.Major),
1911 getExpcntBitWidth(Version.Major));
1912}
1913
1914unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1915 return unpackBits(Waitcnt, getLgkmcntBitShift(Version.Major),
1916 getLgkmcntBitWidth(Version.Major));
1917}
1918
1919unsigned decodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt) {
1920 return unpackBits(Waitcnt, getLoadcntStorecntBitShift(Version.Major),
1921 getLoadcntBitWidth(Version.Major));
1922}
1923
1924unsigned decodeStorecnt(const IsaVersion &Version, unsigned Waitcnt) {
1925 return unpackBits(Waitcnt, getLoadcntStorecntBitShift(Version.Major),
1926 getStorecntBitWidth(Version.Major));
1927}
1928
1929unsigned decodeDscnt(const IsaVersion &Version, unsigned Waitcnt) {
1930 return unpackBits(Waitcnt, getDscntBitShift(Version.Major),
1931 getDscntBitWidth(Version.Major));
1932}
1933
1934void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt,
1935 unsigned &Expcnt, unsigned &Lgkmcnt) {
1936 Vmcnt = decodeVmcnt(Version, Waitcnt);
1937 Expcnt = decodeExpcnt(Version, Waitcnt);
1938 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
1939}
1940
1941unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
1942 unsigned Vmcnt) {
1943 Waitcnt = packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(Version.Major),
1944 getVmcntBitWidthLo(Version.Major));
1945 return packBits(Vmcnt >> getVmcntBitWidthLo(Version.Major), Waitcnt,
1946 getVmcntBitShiftHi(Version.Major),
1947 getVmcntBitWidthHi(Version.Major));
1948}
1949
1950unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
1951 unsigned Expcnt) {
1952 return packBits(Expcnt, Waitcnt, getExpcntBitShift(Version.Major),
1953 getExpcntBitWidth(Version.Major));
1954}
1955
1956unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
1957 unsigned Lgkmcnt) {
1958 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(Version.Major),
1959 getLgkmcntBitWidth(Version.Major));
1960}
1961
1962unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt,
1963 unsigned Expcnt, unsigned Lgkmcnt) {
1964 unsigned Waitcnt = getWaitcntBitMask(Version);
1966 Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
1967 Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
1968 return Waitcnt;
1969}
1970
1972 bool IsStore) {
1973 unsigned Dscnt = getBitMask(getDscntBitShift(Version.Major),
1974 getDscntBitWidth(Version.Major));
1975 if (IsStore) {
1976 unsigned Storecnt = getBitMask(getLoadcntStorecntBitShift(Version.Major),
1977 getStorecntBitWidth(Version.Major));
1978 return Dscnt | Storecnt;
1979 }
1980 unsigned Loadcnt = getBitMask(getLoadcntStorecntBitShift(Version.Major),
1981 getLoadcntBitWidth(Version.Major));
1982 return Dscnt | Loadcnt;
1983}
1984
1985static unsigned encodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt,
1986 unsigned Loadcnt) {
1987 return packBits(Loadcnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major),
1988 getLoadcntBitWidth(Version.Major));
1989}
1990
1991static unsigned encodeStorecnt(const IsaVersion &Version, unsigned Waitcnt,
1992 unsigned Storecnt) {
1993 return packBits(Storecnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major),
1994 getStorecntBitWidth(Version.Major));
1995}
1996
1997static unsigned encodeDscnt(const IsaVersion &Version, unsigned Waitcnt,
1998 unsigned Dscnt) {
1999 return packBits(Dscnt, Waitcnt, getDscntBitShift(Version.Major),
2000 getDscntBitWidth(Version.Major));
2001}
2002
2003unsigned encodeLoadcntDscnt(const IsaVersion &Version, unsigned Loadcnt,
2004 unsigned Dscnt) {
2005 unsigned Waitcnt = getCombinedCountBitMask(Version, false);
2006 Waitcnt = encodeLoadcnt(Version, Waitcnt, Loadcnt);
2008 return Waitcnt;
2009}
2010
2011unsigned encodeStorecntDscnt(const IsaVersion &Version, unsigned Storecnt,
2012 unsigned Dscnt) {
2013 unsigned Waitcnt = getCombinedCountBitMask(Version, true);
2014 Waitcnt = encodeStorecnt(Version, Waitcnt, Storecnt);
2016 return Waitcnt;
2017}
2018
2019//===----------------------------------------------------------------------===//
2020// Custom Operand Values
2021//===----------------------------------------------------------------------===//
2022
2024 int Size,
2025 const MCSubtargetInfo &STI) {
2026 unsigned Enc = 0;
2027 for (int Idx = 0; Idx < Size; ++Idx) {
2028 const auto &Op = Opr[Idx];
2029 if (Op.isSupported(STI))
2030 Enc |= Op.encode(Op.Default);
2031 }
2032 return Enc;
2033}
2034
2036 int Size, unsigned Code,
2037 bool &HasNonDefaultVal,
2038 const MCSubtargetInfo &STI) {
2039 unsigned UsedOprMask = 0;
2040 HasNonDefaultVal = false;
2041 for (int Idx = 0; Idx < Size; ++Idx) {
2042 const auto &Op = Opr[Idx];
2043 if (!Op.isSupported(STI))
2044 continue;
2045 UsedOprMask |= Op.getMask();
2046 unsigned Val = Op.decode(Code);
2047 if (!Op.isValid(Val))
2048 return false;
2049 HasNonDefaultVal |= (Val != Op.Default);
2050 }
2051 return (Code & ~UsedOprMask) == 0;
2052}
2053
2054static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size,
2055 unsigned Code, int &Idx, StringRef &Name,
2056 unsigned &Val, bool &IsDefault,
2057 const MCSubtargetInfo &STI) {
2058 while (Idx < Size) {
2059 const auto &Op = Opr[Idx++];
2060 if (Op.isSupported(STI)) {
2061 Name = Op.Name;
2062 Val = Op.decode(Code);
2063 IsDefault = (Val == Op.Default);
2064 return true;
2065 }
2066 }
2067
2068 return false;
2069}
2070
2072 int64_t InputVal) {
2073 if (InputVal < 0 || InputVal > Op.Max)
2074 return OPR_VAL_INVALID;
2075 return Op.encode(InputVal);
2076}
2077
2078static int encodeCustomOperand(const CustomOperandVal *Opr, int Size,
2079 const StringRef Name, int64_t InputVal,
2080 unsigned &UsedOprMask,
2081 const MCSubtargetInfo &STI) {
2082 int InvalidId = OPR_ID_UNKNOWN;
2083 for (int Idx = 0; Idx < Size; ++Idx) {
2084 const auto &Op = Opr[Idx];
2085 if (Op.Name == Name) {
2086 if (!Op.isSupported(STI)) {
2087 InvalidId = OPR_ID_UNSUPPORTED;
2088 continue;
2089 }
2090 auto OprMask = Op.getMask();
2091 if (OprMask & UsedOprMask)
2092 return OPR_ID_DUPLICATE;
2093 UsedOprMask |= OprMask;
2094 return encodeCustomOperandVal(Op, InputVal);
2095 }
2096 }
2097 return InvalidId;
2098}
2099
2100//===----------------------------------------------------------------------===//
2101// DepCtr
2102//===----------------------------------------------------------------------===//
2103
2104namespace DepCtr {
2105
2107 static int Default = -1;
2108 if (Default == -1)
2110 return Default;
2111}
2112
2113bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
2114 const MCSubtargetInfo &STI) {
2116 HasNonDefaultVal, STI);
2117}
2118
2119bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
2120 bool &IsDefault, const MCSubtargetInfo &STI) {
2121 return decodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Code, Id, Name, Val,
2122 IsDefault, STI);
2123}
2124
2125int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
2126 const MCSubtargetInfo &STI) {
2127 return encodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Name, Val, UsedOprMask,
2128 STI);
2129}
2130
2131unsigned getVaVdstBitMask() { return (1 << getVaVdstBitWidth()) - 1; }
2132
2133unsigned getVaSdstBitMask() { return (1 << getVaSdstBitWidth()) - 1; }
2134
2135unsigned getVaSsrcBitMask() { return (1 << getVaSsrcBitWidth()) - 1; }
2136
2138 return (1 << getHoldCntWidth(Version.Major, Version.Minor)) - 1;
2139}
2140
2141unsigned getVmVsrcBitMask() { return (1 << getVmVsrcBitWidth()) - 1; }
2142
2143unsigned getVaVccBitMask() { return (1 << getVaVccBitWidth()) - 1; }
2144
2145unsigned getSaSdstBitMask() { return (1 << getSaSdstBitWidth()) - 1; }
2146
2147unsigned decodeFieldVmVsrc(unsigned Encoded) {
2148 return unpackBits(Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
2149}
2150
2151unsigned decodeFieldVaVdst(unsigned Encoded) {
2152 return unpackBits(Encoded, getVaVdstBitShift(), getVaVdstBitWidth());
2153}
2154
2155unsigned decodeFieldSaSdst(unsigned Encoded) {
2156 return unpackBits(Encoded, getSaSdstBitShift(), getSaSdstBitWidth());
2157}
2158
2159unsigned decodeFieldVaSdst(unsigned Encoded) {
2160 return unpackBits(Encoded, getVaSdstBitShift(), getVaSdstBitWidth());
2161}
2162
2163unsigned decodeFieldVaVcc(unsigned Encoded) {
2164 return unpackBits(Encoded, getVaVccBitShift(), getVaVccBitWidth());
2165}
2166
2167unsigned decodeFieldVaSsrc(unsigned Encoded) {
2168 return unpackBits(Encoded, getVaSsrcBitShift(), getVaSsrcBitWidth());
2169}
2170
2171unsigned decodeFieldHoldCnt(unsigned Encoded, const IsaVersion &Version) {
2172 return unpackBits(Encoded, getHoldCntBitShift(),
2173 getHoldCntWidth(Version.Major, Version.Minor));
2174}
2175
2176unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc) {
2177 return packBits(VmVsrc, Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
2178}
2179
2180unsigned encodeFieldVmVsrc(unsigned VmVsrc, const MCSubtargetInfo &STI) {
2181 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2182 return encodeFieldVmVsrc(Encoded, VmVsrc);
2183}
2184
2185unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst) {
2186 return packBits(VaVdst, Encoded, getVaVdstBitShift(), getVaVdstBitWidth());
2187}
2188
2189unsigned encodeFieldVaVdst(unsigned VaVdst, const MCSubtargetInfo &STI) {
2190 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2191 return encodeFieldVaVdst(Encoded, VaVdst);
2192}
2193
2194unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst) {
2195 return packBits(SaSdst, Encoded, getSaSdstBitShift(), getSaSdstBitWidth());
2196}
2197
2198unsigned encodeFieldSaSdst(unsigned SaSdst, const MCSubtargetInfo &STI) {
2199 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2200 return encodeFieldSaSdst(Encoded, SaSdst);
2201}
2202
2203unsigned encodeFieldVaSdst(unsigned Encoded, unsigned VaSdst) {
2204 return packBits(VaSdst, Encoded, getVaSdstBitShift(), getVaSdstBitWidth());
2205}
2206
2207unsigned encodeFieldVaSdst(unsigned VaSdst, const MCSubtargetInfo &STI) {
2208 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2209 return encodeFieldVaSdst(Encoded, VaSdst);
2210}
2211
2212unsigned encodeFieldVaVcc(unsigned Encoded, unsigned VaVcc) {
2213 return packBits(VaVcc, Encoded, getVaVccBitShift(), getVaVccBitWidth());
2214}
2215
2216unsigned encodeFieldVaVcc(unsigned VaVcc, const MCSubtargetInfo &STI) {
2217 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2218 return encodeFieldVaVcc(Encoded, VaVcc);
2219}
2220
2221unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc) {
2222 return packBits(VaSsrc, Encoded, getVaSsrcBitShift(), getVaSsrcBitWidth());
2223}
2224
2225unsigned encodeFieldVaSsrc(unsigned VaSsrc, const MCSubtargetInfo &STI) {
2226 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2227 return encodeFieldVaSsrc(Encoded, VaSsrc);
2228}
2229
2230unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt,
2231 const IsaVersion &Version) {
2232 return packBits(HoldCnt, Encoded, getHoldCntBitShift(),
2233 getHoldCntWidth(Version.Major, Version.Minor));
2234}
2235
2236unsigned encodeFieldHoldCnt(unsigned HoldCnt, const MCSubtargetInfo &STI) {
2237 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2238 return encodeFieldHoldCnt(Encoded, HoldCnt, getIsaVersion(STI.getCPU()));
2239}
2240
2241} // namespace DepCtr
2242
2243//===----------------------------------------------------------------------===//
2244// exp tgt
2245//===----------------------------------------------------------------------===//
2246
2247namespace Exp {
2248
2249struct ExpTgt {
2251 unsigned Tgt;
2252 unsigned MaxIndex;
2253};
2254
2255// clang-format off
2256static constexpr ExpTgt ExpTgtInfo[] = {
2257 {{"null"}, ET_NULL, ET_NULL_MAX_IDX},
2258 {{"mrtz"}, ET_MRTZ, ET_MRTZ_MAX_IDX},
2259 {{"prim"}, ET_PRIM, ET_PRIM_MAX_IDX},
2260 {{"mrt"}, ET_MRT0, ET_MRT_MAX_IDX},
2261 {{"pos"}, ET_POS0, ET_POS_MAX_IDX},
2262 {{"dual_src_blend"},ET_DUAL_SRC_BLEND0, ET_DUAL_SRC_BLEND_MAX_IDX},
2263 {{"param"}, ET_PARAM0, ET_PARAM_MAX_IDX},
2264};
2265// clang-format on
2266
2267bool getTgtName(unsigned Id, StringRef &Name, int &Index) {
2268 for (const ExpTgt &Val : ExpTgtInfo) {
2269 if (Val.Tgt <= Id && Id <= Val.Tgt + Val.MaxIndex) {
2270 Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt);
2271 Name = Val.Name;
2272 return true;
2273 }
2274 }
2275 return false;
2276}
2277
2278unsigned getTgtId(const StringRef Name) {
2279
2280 for (const ExpTgt &Val : ExpTgtInfo) {
2281 if (Val.MaxIndex == 0 && Name == Val.Name)
2282 return Val.Tgt;
2283
2284 if (Val.MaxIndex > 0 && Name.starts_with(Val.Name)) {
2285 StringRef Suffix = Name.drop_front(Val.Name.size());
2286
2287 unsigned Id;
2288 if (Suffix.getAsInteger(10, Id) || Id > Val.MaxIndex)
2289 return ET_INVALID;
2290
2291 // Disable leading zeroes
2292 if (Suffix.size() > 1 && Suffix[0] == '0')
2293 return ET_INVALID;
2294
2295 return Val.Tgt + Id;
2296 }
2297 }
2298 return ET_INVALID;
2299}
2300
2301bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI) {
2302 switch (Id) {
2303 case ET_NULL:
2304 return !isGFX11Plus(STI);
2305 case ET_POS4:
2306 case ET_PRIM:
2307 return isGFX10Plus(STI);
2308 case ET_DUAL_SRC_BLEND0:
2309 case ET_DUAL_SRC_BLEND1:
2310 return isGFX11Plus(STI);
2311 default:
2312 if (Id >= ET_PARAM0 && Id <= ET_PARAM31)
2313 return !isGFX11Plus(STI) || isGFX13Plus(STI);
2314 return true;
2315 }
2316}
2317
2318} // namespace Exp
2319
2320//===----------------------------------------------------------------------===//
2321// MTBUF Format
2322//===----------------------------------------------------------------------===//
2323
2324namespace MTBUFFormat {
2325
2326int64_t getDfmt(const StringRef Name) {
2327 for (int Id = DFMT_MIN; Id <= DFMT_MAX; ++Id) {
2328 if (Name == DfmtSymbolic[Id])
2329 return Id;
2330 }
2331 return DFMT_UNDEF;
2332}
2333
2335 assert(Id <= DFMT_MAX);
2336 return DfmtSymbolic[Id];
2337}
2338
2340 if (isSI(STI) || isCI(STI))
2341 return NfmtSymbolicSICI;
2342 if (isVI(STI) || isGFX9(STI))
2343 return NfmtSymbolicVI;
2344 return NfmtSymbolicGFX10;
2345}
2346
2347int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI) {
2348 const auto *lookupTable = getNfmtLookupTable(STI);
2349 for (int Id = NFMT_MIN; Id <= NFMT_MAX; ++Id) {
2350 if (Name == lookupTable[Id])
2351 return Id;
2352 }
2353 return NFMT_UNDEF;
2354}
2355
2356StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI) {
2357 assert(Id <= NFMT_MAX);
2358 return getNfmtLookupTable(STI)[Id];
2359}
2360
2361bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI) {
2362 unsigned Dfmt;
2363 unsigned Nfmt;
2364 decodeDfmtNfmt(Id, Dfmt, Nfmt);
2365 return isValidNfmt(Nfmt, STI);
2366}
2367
2368bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI) {
2369 return !getNfmtName(Id, STI).empty();
2370}
2371
2372int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt) {
2373 return (Dfmt << DFMT_SHIFT) | (Nfmt << NFMT_SHIFT);
2374}
2375
2376void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt) {
2377 Dfmt = (Format >> DFMT_SHIFT) & DFMT_MASK;
2378 Nfmt = (Format >> NFMT_SHIFT) & NFMT_MASK;
2379}
2380
2381int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI) {
2382 if (isGFX11Plus(STI)) {
2383 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
2384 if (Name == UfmtSymbolicGFX11[Id])
2385 return Id;
2386 }
2387 } else {
2388 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
2389 if (Name == UfmtSymbolicGFX10[Id])
2390 return Id;
2391 }
2392 }
2393 return UFMT_UNDEF;
2394}
2395
2397 if (isValidUnifiedFormat(Id, STI))
2398 return isGFX10(STI) ? UfmtSymbolicGFX10[Id] : UfmtSymbolicGFX11[Id];
2399 return "";
2400}
2401
2402bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI) {
2403 return isGFX10(STI) ? Id <= UfmtGFX10::UFMT_LAST : Id <= UfmtGFX11::UFMT_LAST;
2404}
2405
2406int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt,
2407 const MCSubtargetInfo &STI) {
2408 int64_t Fmt = encodeDfmtNfmt(Dfmt, Nfmt);
2409 if (isGFX11Plus(STI)) {
2410 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
2411 if (Fmt == DfmtNfmt2UFmtGFX11[Id])
2412 return Id;
2413 }
2414 } else {
2415 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
2416 if (Fmt == DfmtNfmt2UFmtGFX10[Id])
2417 return Id;
2418 }
2419 }
2420 return UFMT_UNDEF;
2421}
2422
2423bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI) {
2424 return isGFX10Plus(STI) ? (Val <= UFMT_MAX) : (Val <= DFMT_NFMT_MAX);
2425}
2426
2428 if (isGFX10Plus(STI))
2429 return UFMT_DEFAULT;
2430 return DFMT_NFMT_DEFAULT;
2431}
2432
2433} // namespace MTBUFFormat
2434
2435//===----------------------------------------------------------------------===//
2436// SendMsg
2437//===----------------------------------------------------------------------===//
2438
2439namespace SendMsg {
2440
2444
2445bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI) {
2446 return (MsgId & ~(getMsgIdMask(STI))) == 0;
2447}
2448
2449bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
2450 bool Strict) {
2451 assert(isValidMsgId(MsgId, STI));
2452
2453 if (!Strict)
2454 return 0 <= OpId && isUInt<OP_WIDTH_>(OpId);
2455
2456 if (msgRequiresOp(MsgId, STI)) {
2457 if (MsgId == ID_GS_PreGFX11 && OpId == OP_GS_NOP)
2458 return false;
2459
2460 return !getMsgOpName(MsgId, OpId, STI).empty();
2461 }
2462
2463 return OpId == OP_NONE_;
2464}
2465
2466bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
2467 const MCSubtargetInfo &STI, bool Strict) {
2468 assert(isValidMsgOp(MsgId, OpId, STI, Strict));
2469
2470 if (!Strict)
2472
2473 if (!isGFX11Plus(STI)) {
2474 switch (MsgId) {
2475 case ID_GS_PreGFX11:
2478 return (OpId == OP_GS_NOP)
2481 }
2482 }
2483 return StreamId == STREAM_ID_NONE_;
2484}
2485
2486bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI) {
2487 return MsgId == ID_SYSMSG ||
2488 (!isGFX11Plus(STI) &&
2489 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11));
2490}
2491
2492bool msgSupportsStream(int64_t MsgId, int64_t OpId,
2493 const MCSubtargetInfo &STI) {
2494 return !isGFX11Plus(STI) &&
2495 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11) &&
2496 OpId != OP_GS_NOP;
2497}
2498
2499void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
2500 uint16_t &StreamId, const MCSubtargetInfo &STI) {
2501 MsgId = Val & getMsgIdMask(STI);
2502 if (isGFX11Plus(STI)) {
2503 OpId = 0;
2504 StreamId = 0;
2505 } else {
2506 OpId = (Val & OP_MASK_) >> OP_SHIFT_;
2508 }
2509}
2510
2512 return MsgId | (OpId << OP_SHIFT_) | (StreamId << STREAM_ID_SHIFT_);
2513}
2514
2515bool msgDoesNotUseM0(int64_t MsgId, const MCSubtargetInfo &STI) {
2516 // Explicitly list message types that are known to not use m0.
2517 // This is safer than excluding only GS_ALLOC_REQ, in case new message
2518 // types are added in the future that do use m0.
2519 if (isGFX11Plus(STI)) {
2520 switch (MsgId) {
2522 return true;
2523 default:
2524 break;
2525 }
2526 }
2527 switch (MsgId) {
2528 case ID_SAVEWAVE:
2529 case ID_STALL_WAVE_GEN:
2530 case ID_HALT_WAVES:
2531 case ID_ORDERED_PS_DONE:
2533 case ID_GET_DOORBELL:
2534 case ID_GET_DDID:
2535 case ID_SYSMSG:
2536 return true;
2537 default:
2538 return false;
2539 }
2540}
2541
2542} // namespace SendMsg
2543
2544//===----------------------------------------------------------------------===//
2545//
2546//===----------------------------------------------------------------------===//
2547
2549 return F.getFnAttributeAsParsedInteger("InitialPSInputAddr", 0);
2550}
2551
2553 // As a safe default always respond as if PS has color exports.
2554 return F.getFnAttributeAsParsedInteger(
2555 "amdgpu-color-export",
2556 F.getCallingConv() == CallingConv::AMDGPU_PS ? 1 : 0) != 0;
2557}
2558
2560 return F.getFnAttributeAsParsedInteger("amdgpu-depth-export", 0) != 0;
2561}
2562
2564 unsigned BlockSize =
2565 F.getFnAttributeAsParsedInteger("amdgpu-dynamic-vgpr-block-size", 0);
2566
2567 if (BlockSize == 16 || BlockSize == 32)
2568 return BlockSize;
2569
2570 return 0;
2571}
2572
2573bool hasXNACK(const MCSubtargetInfo &STI) {
2574 return STI.hasFeature(AMDGPU::FeatureXNACK);
2575}
2576
2578 return STI.hasFeature(AMDGPU::FeatureMIMG_R128) &&
2579 !STI.hasFeature(AMDGPU::FeatureR128A16);
2580}
2581
2582bool hasA16(const MCSubtargetInfo &STI) {
2583 return STI.hasFeature(AMDGPU::FeatureA16);
2584}
2585
2586bool hasG16(const MCSubtargetInfo &STI) {
2587 return STI.hasFeature(AMDGPU::FeatureG16);
2588}
2589
2591 return !STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) && !isCI(STI) &&
2592 !isSI(STI);
2593}
2594
2595bool hasGDS(const MCSubtargetInfo &STI) {
2596 return STI.hasFeature(AMDGPU::FeatureGDS);
2597}
2598
2599unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler) {
2600 auto Version = getIsaVersion(STI.getCPU());
2601 if (Version.Major == 10)
2602 return Version.Minor >= 3 ? 13 : 5;
2603 if (Version.Major == 11)
2604 return 5;
2605 if (Version.Major >= 12)
2606 return HasSampler ? 4 : 5;
2607 return 0;
2608}
2609
2611 if (isGFX1250Plus(STI))
2612 return 32;
2613 return 16;
2614}
2615
2616bool isSI(const MCSubtargetInfo &STI) {
2617 return STI.hasFeature(AMDGPU::FeatureSouthernIslands);
2618}
2619
2620bool isCI(const MCSubtargetInfo &STI) {
2621 return STI.hasFeature(AMDGPU::FeatureSeaIslands);
2622}
2623
2624bool isVI(const MCSubtargetInfo &STI) {
2625 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
2626}
2627
2628bool isGFX9(const MCSubtargetInfo &STI) {
2629 return STI.hasFeature(AMDGPU::FeatureGFX9);
2630}
2631
2633 return isGFX9(STI) || isGFX10(STI);
2634}
2635
2637 return isGFX9(STI) || isGFX10(STI) || isGFX11(STI);
2638}
2639
2641 return isVI(STI) || isGFX9(STI) || isGFX10(STI);
2642}
2643
2644bool isGFX8Plus(const MCSubtargetInfo &STI) {
2645 return isVI(STI) || isGFX9Plus(STI);
2646}
2647
2648bool isGFX9Plus(const MCSubtargetInfo &STI) {
2649 return isGFX9(STI) || isGFX10Plus(STI);
2650}
2651
2652bool isNotGFX9Plus(const MCSubtargetInfo &STI) { return !isGFX9Plus(STI); }
2653
2654bool isGFX10(const MCSubtargetInfo &STI) {
2655 return STI.hasFeature(AMDGPU::FeatureGFX10);
2656}
2657
2659 return isGFX10(STI) || isGFX11(STI);
2660}
2661
2663 return isGFX10(STI) || isGFX11Plus(STI);
2664}
2665
2666bool isGFX11(const MCSubtargetInfo &STI) {
2667 return STI.hasFeature(AMDGPU::FeatureGFX11);
2668}
2669
2671 return isGFX11(STI) || isGFX12Plus(STI);
2672}
2673
2674bool isGFX12(const MCSubtargetInfo &STI) {
2675 return STI.getFeatureBits()[AMDGPU::FeatureGFX12];
2676}
2677
2679 return isGFX12(STI) || isGFX13Plus(STI);
2680}
2681
2682bool isNotGFX12Plus(const MCSubtargetInfo &STI) { return !isGFX12Plus(STI); }
2683
2684bool isGFX1250(const MCSubtargetInfo &STI) {
2685 return STI.getFeatureBits()[AMDGPU::FeatureGFX1250Insts] && !isGFX13(STI);
2686}
2687
2689 return STI.getFeatureBits()[AMDGPU::FeatureGFX1250Insts];
2690}
2691
2692bool isGFX13(const MCSubtargetInfo &STI) {
2693 return STI.getFeatureBits()[AMDGPU::FeatureGFX13];
2694}
2695
2696bool isGFX13Plus(const MCSubtargetInfo &STI) { return isGFX13(STI); }
2697
2699 if (isGFX1250(STI))
2700 return false;
2701 return isGFX10Plus(STI);
2702}
2703
2704bool isNotGFX11Plus(const MCSubtargetInfo &STI) { return !isGFX11Plus(STI); }
2705
2707 return isSI(STI) || isCI(STI) || isVI(STI) || isGFX9(STI);
2708}
2709
2711 return isGFX10(STI) && !AMDGPU::isGFX10_BEncoding(STI);
2712}
2713
2715 return STI.hasFeature(AMDGPU::FeatureGCN3Encoding);
2716}
2717
2719 return STI.hasFeature(AMDGPU::FeatureGFX10_AEncoding);
2720}
2721
2723 return STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding);
2724}
2725
2727 return STI.hasFeature(AMDGPU::FeatureGFX10_3Insts);
2728}
2729
2731 return isGFX10_BEncoding(STI) && !isGFX12Plus(STI);
2732}
2733
2734bool isGFX90A(const MCSubtargetInfo &STI) {
2735 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
2736}
2737
2738bool isGFX940(const MCSubtargetInfo &STI) {
2739 return STI.hasFeature(AMDGPU::FeatureGFX940Insts);
2740}
2741
2743 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
2744}
2745
2747 return STI.hasFeature(AMDGPU::FeatureMAIInsts);
2748}
2749
2750bool hasVOPD(const MCSubtargetInfo &STI) {
2751 return STI.hasFeature(AMDGPU::FeatureVOPDInsts);
2752}
2753
2755 return STI.hasFeature(AMDGPU::FeatureDPPSrc1SGPR);
2756}
2757
2759 return STI.hasFeature(AMDGPU::FeatureKernargPreload);
2760}
2761
2762int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR,
2763 int32_t ArgNumVGPR) {
2764 if (has90AInsts && ArgNumAGPR)
2765 return alignTo(ArgNumVGPR, 4) + ArgNumAGPR;
2766 return std::max(ArgNumVGPR, ArgNumAGPR);
2767}
2768
2770 const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
2771 const MCRegister FirstSubReg = TRI->getSubReg(Reg, AMDGPU::sub0);
2772 return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
2773 Reg == AMDGPU::SCC;
2774}
2775
2779
2780#define MAP_REG2REG \
2781 using namespace AMDGPU; \
2782 switch (Reg.id()) { \
2783 default: \
2784 return Reg; \
2785 CASE_CI_VI(FLAT_SCR) \
2786 CASE_CI_VI(FLAT_SCR_LO) \
2787 CASE_CI_VI(FLAT_SCR_HI) \
2788 CASE_VI_GFX9PLUS(TTMP0) \
2789 CASE_VI_GFX9PLUS(TTMP1) \
2790 CASE_VI_GFX9PLUS(TTMP2) \
2791 CASE_VI_GFX9PLUS(TTMP3) \
2792 CASE_VI_GFX9PLUS(TTMP4) \
2793 CASE_VI_GFX9PLUS(TTMP5) \
2794 CASE_VI_GFX9PLUS(TTMP6) \
2795 CASE_VI_GFX9PLUS(TTMP7) \
2796 CASE_VI_GFX9PLUS(TTMP8) \
2797 CASE_VI_GFX9PLUS(TTMP9) \
2798 CASE_VI_GFX9PLUS(TTMP10) \
2799 CASE_VI_GFX9PLUS(TTMP11) \
2800 CASE_VI_GFX9PLUS(TTMP12) \
2801 CASE_VI_GFX9PLUS(TTMP13) \
2802 CASE_VI_GFX9PLUS(TTMP14) \
2803 CASE_VI_GFX9PLUS(TTMP15) \
2804 CASE_VI_GFX9PLUS(TTMP0_TTMP1) \
2805 CASE_VI_GFX9PLUS(TTMP2_TTMP3) \
2806 CASE_VI_GFX9PLUS(TTMP4_TTMP5) \
2807 CASE_VI_GFX9PLUS(TTMP6_TTMP7) \
2808 CASE_VI_GFX9PLUS(TTMP8_TTMP9) \
2809 CASE_VI_GFX9PLUS(TTMP10_TTMP11) \
2810 CASE_VI_GFX9PLUS(TTMP12_TTMP13) \
2811 CASE_VI_GFX9PLUS(TTMP14_TTMP15) \
2812 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \
2813 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \
2814 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \
2815 CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \
2816 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
2817 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
2818 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2819 CASE_VI_GFX9PLUS( \
2820 TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2821 CASE_GFXPRE11_GFX11PLUS(M0) \
2822 CASE_GFXPRE11_GFX11PLUS(SGPR_NULL) \
2823 CASE_GFXPRE11_GFX11PLUS_TO(SGPR_NULL64, SGPR_NULL) \
2824 }
2825
2826#define CASE_CI_VI(node) \
2827 assert(!isSI(STI)); \
2828 case node: \
2829 return isCI(STI) ? node##_ci : node##_vi;
2830
2831#define CASE_VI_GFX9PLUS(node) \
2832 case node: \
2833 return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi;
2834
2835#define CASE_GFXPRE11_GFX11PLUS(node) \
2836 case node: \
2837 return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11;
2838
2839#define CASE_GFXPRE11_GFX11PLUS_TO(node, result) \
2840 case node: \
2841 return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11;
2842
2844 if (STI.getTargetTriple().getArch() == Triple::r600)
2845 return Reg;
2847}
2848
2849#undef CASE_CI_VI
2850#undef CASE_VI_GFX9PLUS
2851#undef CASE_GFXPRE11_GFX11PLUS
2852#undef CASE_GFXPRE11_GFX11PLUS_TO
2853
2854#define CASE_CI_VI(node) \
2855 case node##_ci: \
2856 case node##_vi: \
2857 return node;
2858#define CASE_VI_GFX9PLUS(node) \
2859 case node##_vi: \
2860 case node##_gfx9plus: \
2861 return node;
2862#define CASE_GFXPRE11_GFX11PLUS(node) \
2863 case node##_gfx11plus: \
2864 case node##_gfxpre11: \
2865 return node;
2866#define CASE_GFXPRE11_GFX11PLUS_TO(node, result)
2867
2869
2871 switch (Reg.id()) {
2872 case AMDGPU::SRC_SHARED_BASE_LO:
2873 case AMDGPU::SRC_SHARED_BASE:
2874 case AMDGPU::SRC_SHARED_LIMIT_LO:
2875 case AMDGPU::SRC_SHARED_LIMIT:
2876 case AMDGPU::SRC_PRIVATE_BASE_LO:
2877 case AMDGPU::SRC_PRIVATE_BASE:
2878 case AMDGPU::SRC_PRIVATE_LIMIT_LO:
2879 case AMDGPU::SRC_PRIVATE_LIMIT:
2880 case AMDGPU::SRC_FLAT_SCRATCH_BASE_LO:
2881 case AMDGPU::SRC_FLAT_SCRATCH_BASE_HI:
2882 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2883 return true;
2884 case AMDGPU::SRC_VCCZ:
2885 case AMDGPU::SRC_EXECZ:
2886 case AMDGPU::SRC_SCC:
2887 return true;
2888 case AMDGPU::SGPR_NULL:
2889 return true;
2890 default:
2891 return false;
2892 }
2893}
2894
2895#undef CASE_CI_VI
2896#undef CASE_VI_GFX9PLUS
2897#undef CASE_GFXPRE11_GFX11PLUS
2898#undef CASE_GFXPRE11_GFX11PLUS_TO
2899#undef MAP_REG2REG
2900
2901bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2902 assert(OpNo < Desc.NumOperands);
2903 unsigned OpType = Desc.operands()[OpNo].OperandType;
2904 return OpType >= AMDGPU::OPERAND_KIMM_FIRST &&
2905 OpType <= AMDGPU::OPERAND_KIMM_LAST;
2906}
2907
2908bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2909 assert(OpNo < Desc.NumOperands);
2910 unsigned OpType = Desc.operands()[OpNo].OperandType;
2911 switch (OpType) {
2926 return true;
2927 default:
2928 return false;
2929 }
2930}
2931
2932bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2933 assert(OpNo < Desc.NumOperands);
2934 unsigned OpType = Desc.operands()[OpNo].OperandType;
2935 return (OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
2939}
2940
2941// Avoid using MCRegisterClass::getSize, since that function will go away
2942// (move from MC* level to Target* level). Return size in bits.
2943unsigned getRegBitWidth(unsigned RCID) {
2944 switch (RCID) {
2945 case AMDGPU::VGPR_16RegClassID:
2946 case AMDGPU::VGPR_16_Lo128RegClassID:
2947 case AMDGPU::SGPR_LO16RegClassID:
2948 case AMDGPU::AGPR_LO16RegClassID:
2949 return 16;
2950 case AMDGPU::SGPR_32RegClassID:
2951 case AMDGPU::VGPR_32RegClassID:
2952 case AMDGPU::VGPR_32_Lo256RegClassID:
2953 case AMDGPU::VRegOrLds_32RegClassID:
2954 case AMDGPU::AGPR_32RegClassID:
2955 case AMDGPU::VS_32RegClassID:
2956 case AMDGPU::AV_32RegClassID:
2957 case AMDGPU::SReg_32RegClassID:
2958 case AMDGPU::SReg_32_XM0RegClassID:
2959 case AMDGPU::SRegOrLds_32RegClassID:
2960 return 32;
2961 case AMDGPU::SGPR_64RegClassID:
2962 case AMDGPU::VS_64RegClassID:
2963 case AMDGPU::SReg_64RegClassID:
2964 case AMDGPU::VReg_64RegClassID:
2965 case AMDGPU::AReg_64RegClassID:
2966 case AMDGPU::SReg_64_XEXECRegClassID:
2967 case AMDGPU::VReg_64_Align2RegClassID:
2968 case AMDGPU::AReg_64_Align2RegClassID:
2969 case AMDGPU::AV_64RegClassID:
2970 case AMDGPU::AV_64_Align2RegClassID:
2971 case AMDGPU::VReg_64_Lo256_Align2RegClassID:
2972 case AMDGPU::VS_64_Lo256RegClassID:
2973 return 64;
2974 case AMDGPU::SGPR_96RegClassID:
2975 case AMDGPU::SReg_96RegClassID:
2976 case AMDGPU::VReg_96RegClassID:
2977 case AMDGPU::AReg_96RegClassID:
2978 case AMDGPU::VReg_96_Align2RegClassID:
2979 case AMDGPU::AReg_96_Align2RegClassID:
2980 case AMDGPU::AV_96RegClassID:
2981 case AMDGPU::AV_96_Align2RegClassID:
2982 case AMDGPU::VReg_96_Lo256_Align2RegClassID:
2983 return 96;
2984 case AMDGPU::SGPR_128RegClassID:
2985 case AMDGPU::SReg_128RegClassID:
2986 case AMDGPU::VReg_128RegClassID:
2987 case AMDGPU::AReg_128RegClassID:
2988 case AMDGPU::VReg_128_Align2RegClassID:
2989 case AMDGPU::AReg_128_Align2RegClassID:
2990 case AMDGPU::AV_128RegClassID:
2991 case AMDGPU::AV_128_Align2RegClassID:
2992 case AMDGPU::SReg_128_XNULLRegClassID:
2993 case AMDGPU::VReg_128_Lo256_Align2RegClassID:
2994 return 128;
2995 case AMDGPU::SGPR_160RegClassID:
2996 case AMDGPU::SReg_160RegClassID:
2997 case AMDGPU::VReg_160RegClassID:
2998 case AMDGPU::AReg_160RegClassID:
2999 case AMDGPU::VReg_160_Align2RegClassID:
3000 case AMDGPU::AReg_160_Align2RegClassID:
3001 case AMDGPU::AV_160RegClassID:
3002 case AMDGPU::AV_160_Align2RegClassID:
3003 case AMDGPU::VReg_160_Lo256_Align2RegClassID:
3004 return 160;
3005 case AMDGPU::SGPR_192RegClassID:
3006 case AMDGPU::SReg_192RegClassID:
3007 case AMDGPU::VReg_192RegClassID:
3008 case AMDGPU::AReg_192RegClassID:
3009 case AMDGPU::VReg_192_Align2RegClassID:
3010 case AMDGPU::AReg_192_Align2RegClassID:
3011 case AMDGPU::AV_192RegClassID:
3012 case AMDGPU::AV_192_Align2RegClassID:
3013 case AMDGPU::VReg_192_Lo256_Align2RegClassID:
3014 return 192;
3015 case AMDGPU::SGPR_224RegClassID:
3016 case AMDGPU::SReg_224RegClassID:
3017 case AMDGPU::VReg_224RegClassID:
3018 case AMDGPU::AReg_224RegClassID:
3019 case AMDGPU::VReg_224_Align2RegClassID:
3020 case AMDGPU::AReg_224_Align2RegClassID:
3021 case AMDGPU::AV_224RegClassID:
3022 case AMDGPU::AV_224_Align2RegClassID:
3023 case AMDGPU::VReg_224_Lo256_Align2RegClassID:
3024 return 224;
3025 case AMDGPU::SGPR_256RegClassID:
3026 case AMDGPU::SReg_256RegClassID:
3027 case AMDGPU::VReg_256RegClassID:
3028 case AMDGPU::AReg_256RegClassID:
3029 case AMDGPU::VReg_256_Align2RegClassID:
3030 case AMDGPU::AReg_256_Align2RegClassID:
3031 case AMDGPU::AV_256RegClassID:
3032 case AMDGPU::AV_256_Align2RegClassID:
3033 case AMDGPU::SReg_256_XNULLRegClassID:
3034 case AMDGPU::VReg_256_Lo256_Align2RegClassID:
3035 return 256;
3036 case AMDGPU::SGPR_288RegClassID:
3037 case AMDGPU::SReg_288RegClassID:
3038 case AMDGPU::VReg_288RegClassID:
3039 case AMDGPU::AReg_288RegClassID:
3040 case AMDGPU::VReg_288_Align2RegClassID:
3041 case AMDGPU::AReg_288_Align2RegClassID:
3042 case AMDGPU::AV_288RegClassID:
3043 case AMDGPU::AV_288_Align2RegClassID:
3044 case AMDGPU::VReg_288_Lo256_Align2RegClassID:
3045 return 288;
3046 case AMDGPU::SGPR_320RegClassID:
3047 case AMDGPU::SReg_320RegClassID:
3048 case AMDGPU::VReg_320RegClassID:
3049 case AMDGPU::AReg_320RegClassID:
3050 case AMDGPU::VReg_320_Align2RegClassID:
3051 case AMDGPU::AReg_320_Align2RegClassID:
3052 case AMDGPU::AV_320RegClassID:
3053 case AMDGPU::AV_320_Align2RegClassID:
3054 case AMDGPU::VReg_320_Lo256_Align2RegClassID:
3055 return 320;
3056 case AMDGPU::SGPR_352RegClassID:
3057 case AMDGPU::SReg_352RegClassID:
3058 case AMDGPU::VReg_352RegClassID:
3059 case AMDGPU::AReg_352RegClassID:
3060 case AMDGPU::VReg_352_Align2RegClassID:
3061 case AMDGPU::AReg_352_Align2RegClassID:
3062 case AMDGPU::AV_352RegClassID:
3063 case AMDGPU::AV_352_Align2RegClassID:
3064 case AMDGPU::VReg_352_Lo256_Align2RegClassID:
3065 return 352;
3066 case AMDGPU::SGPR_384RegClassID:
3067 case AMDGPU::SReg_384RegClassID:
3068 case AMDGPU::VReg_384RegClassID:
3069 case AMDGPU::AReg_384RegClassID:
3070 case AMDGPU::VReg_384_Align2RegClassID:
3071 case AMDGPU::AReg_384_Align2RegClassID:
3072 case AMDGPU::AV_384RegClassID:
3073 case AMDGPU::AV_384_Align2RegClassID:
3074 case AMDGPU::VReg_384_Lo256_Align2RegClassID:
3075 return 384;
3076 case AMDGPU::SGPR_512RegClassID:
3077 case AMDGPU::SReg_512RegClassID:
3078 case AMDGPU::VReg_512RegClassID:
3079 case AMDGPU::AReg_512RegClassID:
3080 case AMDGPU::VReg_512_Align2RegClassID:
3081 case AMDGPU::AReg_512_Align2RegClassID:
3082 case AMDGPU::AV_512RegClassID:
3083 case AMDGPU::AV_512_Align2RegClassID:
3084 case AMDGPU::VReg_512_Lo256_Align2RegClassID:
3085 return 512;
3086 case AMDGPU::SGPR_1024RegClassID:
3087 case AMDGPU::SReg_1024RegClassID:
3088 case AMDGPU::VReg_1024RegClassID:
3089 case AMDGPU::AReg_1024RegClassID:
3090 case AMDGPU::VReg_1024_Align2RegClassID:
3091 case AMDGPU::AReg_1024_Align2RegClassID:
3092 case AMDGPU::AV_1024RegClassID:
3093 case AMDGPU::AV_1024_Align2RegClassID:
3094 case AMDGPU::VReg_1024_Lo256_Align2RegClassID:
3095 return 1024;
3096 default:
3097 llvm_unreachable("Unexpected register class");
3098 }
3099}
3100
3101unsigned getRegBitWidth(const MCRegisterClass &RC) {
3102 return getRegBitWidth(RC.getID());
3103}
3104
3105bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
3107 return true;
3108
3109 uint64_t Val = static_cast<uint64_t>(Literal);
3110 return (Val == llvm::bit_cast<uint64_t>(0.0)) ||
3111 (Val == llvm::bit_cast<uint64_t>(1.0)) ||
3112 (Val == llvm::bit_cast<uint64_t>(-1.0)) ||
3113 (Val == llvm::bit_cast<uint64_t>(0.5)) ||
3114 (Val == llvm::bit_cast<uint64_t>(-0.5)) ||
3115 (Val == llvm::bit_cast<uint64_t>(2.0)) ||
3116 (Val == llvm::bit_cast<uint64_t>(-2.0)) ||
3117 (Val == llvm::bit_cast<uint64_t>(4.0)) ||
3118 (Val == llvm::bit_cast<uint64_t>(-4.0)) ||
3119 (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
3120}
3121
3122bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
3124 return true;
3125
3126 // The actual type of the operand does not seem to matter as long
3127 // as the bits match one of the inline immediate values. For example:
3128 //
3129 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
3130 // so it is a legal inline immediate.
3131 //
3132 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
3133 // floating-point, so it is a legal inline immediate.
3134
3135 uint32_t Val = static_cast<uint32_t>(Literal);
3136 return (Val == llvm::bit_cast<uint32_t>(0.0f)) ||
3137 (Val == llvm::bit_cast<uint32_t>(1.0f)) ||
3138 (Val == llvm::bit_cast<uint32_t>(-1.0f)) ||
3139 (Val == llvm::bit_cast<uint32_t>(0.5f)) ||
3140 (Val == llvm::bit_cast<uint32_t>(-0.5f)) ||
3141 (Val == llvm::bit_cast<uint32_t>(2.0f)) ||
3142 (Val == llvm::bit_cast<uint32_t>(-2.0f)) ||
3143 (Val == llvm::bit_cast<uint32_t>(4.0f)) ||
3144 (Val == llvm::bit_cast<uint32_t>(-4.0f)) ||
3145 (Val == 0x3e22f983 && HasInv2Pi);
3146}
3147
3148bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi) {
3149 if (!HasInv2Pi)
3150 return false;
3152 return true;
3153 uint16_t Val = static_cast<uint16_t>(Literal);
3154 return Val == 0x3F00 || // 0.5
3155 Val == 0xBF00 || // -0.5
3156 Val == 0x3F80 || // 1.0
3157 Val == 0xBF80 || // -1.0
3158 Val == 0x4000 || // 2.0
3159 Val == 0xC000 || // -2.0
3160 Val == 0x4080 || // 4.0
3161 Val == 0xC080 || // -4.0
3162 Val == 0x3E22; // 1.0 / (2.0 * pi)
3163}
3164
3165bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi) {
3166 return isInlinableLiteral32(Literal, HasInv2Pi);
3167}
3168
3169bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi) {
3170 if (!HasInv2Pi)
3171 return false;
3173 return true;
3174 uint16_t Val = static_cast<uint16_t>(Literal);
3175 return Val == 0x3C00 || // 1.0
3176 Val == 0xBC00 || // -1.0
3177 Val == 0x3800 || // 0.5
3178 Val == 0xB800 || // -0.5
3179 Val == 0x4000 || // 2.0
3180 Val == 0xC000 || // -2.0
3181 Val == 0x4400 || // 4.0
3182 Val == 0xC400 || // -4.0
3183 Val == 0x3118; // 1/2pi
3184}
3185
3186std::optional<unsigned> getInlineEncodingV216(bool IsFloat, uint32_t Literal) {
3187 // Unfortunately, the Instruction Set Architecture Reference Guide is
3188 // misleading about how the inline operands work for (packed) 16-bit
3189 // instructions. In a nutshell, the actual HW behavior is:
3190 //
3191 // - integer encodings (-16 .. 64) are always produced as sign-extended
3192 // 32-bit values
3193 // - float encodings are produced as:
3194 // - for F16 instructions: corresponding half-precision float values in
3195 // the LSBs, 0 in the MSBs
3196 // - for UI16 instructions: corresponding single-precision float value
3197 int32_t Signed = static_cast<int32_t>(Literal);
3198 if (Signed >= 0 && Signed <= 64)
3199 return 128 + Signed;
3200
3201 if (Signed >= -16 && Signed <= -1)
3202 return 192 + std::abs(Signed);
3203
3204 if (IsFloat) {
3205 // clang-format off
3206 switch (Literal) {
3207 case 0x3800: return 240; // 0.5
3208 case 0xB800: return 241; // -0.5
3209 case 0x3C00: return 242; // 1.0
3210 case 0xBC00: return 243; // -1.0
3211 case 0x4000: return 244; // 2.0
3212 case 0xC000: return 245; // -2.0
3213 case 0x4400: return 246; // 4.0
3214 case 0xC400: return 247; // -4.0
3215 case 0x3118: return 248; // 1.0 / (2.0 * pi)
3216 default: break;
3217 }
3218 // clang-format on
3219 } else {
3220 // clang-format off
3221 switch (Literal) {
3222 case 0x3F000000: return 240; // 0.5
3223 case 0xBF000000: return 241; // -0.5
3224 case 0x3F800000: return 242; // 1.0
3225 case 0xBF800000: return 243; // -1.0
3226 case 0x40000000: return 244; // 2.0
3227 case 0xC0000000: return 245; // -2.0
3228 case 0x40800000: return 246; // 4.0
3229 case 0xC0800000: return 247; // -4.0
3230 case 0x3E22F983: return 248; // 1.0 / (2.0 * pi)
3231 default: break;
3232 }
3233 // clang-format on
3234 }
3235
3236 return {};
3237}
3238
3239// Encoding of the literal as an inline constant for a V_PK_*_IU16 instruction
3240// or nullopt.
3241std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal) {
3242 return getInlineEncodingV216(false, Literal);
3243}
3244
3245// Encoding of the literal as an inline constant for a V_PK_*_BF16 instruction
3246// or nullopt.
3247std::optional<unsigned> getInlineEncodingV2BF16(uint32_t Literal) {
3248 int32_t Signed = static_cast<int32_t>(Literal);
3249 if (Signed >= 0 && Signed <= 64)
3250 return 128 + Signed;
3251
3252 if (Signed >= -16 && Signed <= -1)
3253 return 192 + std::abs(Signed);
3254
3255 // clang-format off
3256 switch (Literal) {
3257 case 0x3F00: return 240; // 0.5
3258 case 0xBF00: return 241; // -0.5
3259 case 0x3F80: return 242; // 1.0
3260 case 0xBF80: return 243; // -1.0
3261 case 0x4000: return 244; // 2.0
3262 case 0xC000: return 245; // -2.0
3263 case 0x4080: return 246; // 4.0
3264 case 0xC080: return 247; // -4.0
3265 case 0x3E22: return 248; // 1.0 / (2.0 * pi)
3266 default: break;
3267 }
3268 // clang-format on
3269
3270 return std::nullopt;
3271}
3272
3273// Encoding of the literal as an inline constant for a V_PK_*_F16 instruction
3274// or nullopt.
3275std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal) {
3276 return getInlineEncodingV216(true, Literal);
3277}
3278
3279// Encoding of the literal as an inline constant for V_PK_FMAC_F16 instruction
3280// or nullopt. This accounts for different inline constant behavior:
3281// - Pre-GFX11: fp16 inline constants have the value in low 16 bits, 0 in high
3282// - GFX11+: fp16 inline constants are duplicated into both halves
3284 bool IsGFX11Plus) {
3285 // Pre-GFX11 behavior: f16 in low bits, 0 in high bits
3286 if (!IsGFX11Plus)
3287 return getInlineEncodingV216(/*IsFloat=*/true, Literal);
3288
3289 // GFX11+ behavior: f16 duplicated in both halves
3290 // First, check for sign-extended integer inline constants (-16 to 64)
3291 // These work the same across all generations
3292 int32_t Signed = static_cast<int32_t>(Literal);
3293 if (Signed >= 0 && Signed <= 64)
3294 return 128 + Signed;
3295
3296 if (Signed >= -16 && Signed <= -1)
3297 return 192 + std::abs(Signed);
3298
3299 // For float inline constants on GFX11+, both halves must be equal
3300 uint16_t Lo = static_cast<uint16_t>(Literal);
3301 uint16_t Hi = static_cast<uint16_t>(Literal >> 16);
3302 if (Lo != Hi)
3303 return std::nullopt;
3304 return getInlineEncodingV216(/*IsFloat=*/true, Lo);
3305}
3306
3307// Whether the given literal can be inlined for a V_PK_* instruction.
3309 switch (OpType) {
3312 return getInlineEncodingV216(false, Literal).has_value();
3315 return getInlineEncodingV216(true, Literal).has_value();
3317 llvm_unreachable("OPERAND_REG_IMM_V2FP16_SPLAT is not supported");
3322 return false;
3323 default:
3324 llvm_unreachable("bad packed operand type");
3325 }
3326}
3327
3328// Whether the given literal can be inlined for a V_PK_*_IU16 instruction.
3332
3333// Whether the given literal can be inlined for a V_PK_*_BF16 instruction.
3337
3338// Whether the given literal can be inlined for a V_PK_*_F16 instruction.
3342
3343// Whether the given literal can be inlined for V_PK_FMAC_F16 instruction.
3345 return getPKFMACF16InlineEncoding(Literal, IsGFX11Plus).has_value();
3346}
3347
3348bool isValid32BitLiteral(uint64_t Val, bool IsFP64) {
3349 if (IsFP64)
3350 return !Lo_32(Val);
3351
3352 return isUInt<32>(Val) || isInt<32>(Val);
3353}
3354
3355int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit) {
3356 switch (Type) {
3357 default:
3358 break;
3363 return Imm & 0xffff;
3377 return Lo_32(Imm);
3380 return IsLit ? Imm : Hi_32(Imm);
3381 }
3382 return Imm;
3383}
3384
3386 const Function *F = A->getParent();
3387
3388 // Arguments to compute shaders are never a source of divergence.
3389 CallingConv::ID CC = F->getCallingConv();
3390 switch (CC) {
3393 return true;
3404 // For non-compute shaders, SGPR inputs are marked with either inreg or
3405 // byval. Everything else is in VGPRs.
3406 return A->hasAttribute(Attribute::InReg) ||
3407 A->hasAttribute(Attribute::ByVal);
3408 default:
3409 // TODO: treat i1 as divergent?
3410 return A->hasAttribute(Attribute::InReg);
3411 }
3412}
3413
3414bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo) {
3415 // Arguments to compute shaders are never a source of divergence.
3417 switch (CC) {
3420 return true;
3431 // For non-compute shaders, SGPR inputs are marked with either inreg or
3432 // byval. Everything else is in VGPRs.
3433 return CB->paramHasAttr(ArgNo, Attribute::InReg) ||
3434 CB->paramHasAttr(ArgNo, Attribute::ByVal);
3435 default:
3436 return CB->paramHasAttr(ArgNo, Attribute::InReg);
3437 }
3438}
3439
3440static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) {
3441 return isGCN3Encoding(ST) || isGFX10Plus(ST);
3442}
3443
3445 int64_t EncodedOffset) {
3446 if (isGFX12Plus(ST))
3447 return isUInt<23>(EncodedOffset);
3448
3449 return hasSMEMByteOffset(ST) ? isUInt<20>(EncodedOffset)
3450 : isUInt<8>(EncodedOffset);
3451}
3452
3454 int64_t EncodedOffset, bool IsBuffer) {
3455 if (isGFX12Plus(ST)) {
3456 if (IsBuffer && EncodedOffset < 0)
3457 return false;
3458 return isInt<24>(EncodedOffset);
3459 }
3460
3461 return !IsBuffer && hasSMRDSignedImmOffset(ST) && isInt<21>(EncodedOffset);
3462}
3463
3464static bool isDwordAligned(uint64_t ByteOffset) {
3465 return (ByteOffset & 3) == 0;
3466}
3467
3469 uint64_t ByteOffset) {
3470 if (hasSMEMByteOffset(ST))
3471 return ByteOffset;
3472
3473 assert(isDwordAligned(ByteOffset));
3474 return ByteOffset >> 2;
3475}
3476
3477std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
3478 int64_t ByteOffset, bool IsBuffer,
3479 bool HasSOffset) {
3480 // For unbuffered smem loads, it is illegal for the Immediate Offset to be
3481 // negative if the resulting (Offset + (M0 or SOffset or zero) is negative.
3482 // Handle case where SOffset is not present.
3483 if (!IsBuffer && !HasSOffset && ByteOffset < 0 && hasSMRDSignedImmOffset(ST))
3484 return std::nullopt;
3485
3486 if (isGFX12Plus(ST)) // 24 bit signed offsets
3487 return isInt<24>(ByteOffset) ? std::optional<int64_t>(ByteOffset)
3488 : std::nullopt;
3489
3490 // The signed version is always a byte offset.
3491 if (!IsBuffer && hasSMRDSignedImmOffset(ST)) {
3493 return isInt<20>(ByteOffset) ? std::optional<int64_t>(ByteOffset)
3494 : std::nullopt;
3495 }
3496
3497 if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST))
3498 return std::nullopt;
3499
3500 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
3501 return isLegalSMRDEncodedUnsignedOffset(ST, EncodedOffset)
3502 ? std::optional<int64_t>(EncodedOffset)
3503 : std::nullopt;
3504}
3505
3506std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
3507 int64_t ByteOffset) {
3508 if (!isCI(ST) || !isDwordAligned(ByteOffset))
3509 return std::nullopt;
3510
3511 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
3512 return isUInt<32>(EncodedOffset) ? std::optional<int64_t>(EncodedOffset)
3513 : std::nullopt;
3514}
3515
3517 if (ST.getFeatureBits().test(FeatureFlatOffsetBits12))
3518 return 12;
3519 if (ST.getFeatureBits().test(FeatureFlatOffsetBits24))
3520 return 24;
3521 return 13;
3522}
3523
3524namespace {
3525
3526struct SourceOfDivergence {
3527 unsigned Intr;
3528};
3529const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
3530
3531struct AlwaysUniform {
3532 unsigned Intr;
3533};
3534const AlwaysUniform *lookupAlwaysUniform(unsigned Intr);
3535
3536#define GET_SourcesOfDivergence_IMPL
3537#define GET_UniformIntrinsics_IMPL
3538#define GET_Gfx9BufferFormat_IMPL
3539#define GET_Gfx10BufferFormat_IMPL
3540#define GET_Gfx11PlusBufferFormat_IMPL
3541
3542#include "AMDGPUGenSearchableTables.inc"
3543
3544} // end anonymous namespace
3545
3546bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
3547 return lookupSourceOfDivergence(IntrID);
3548}
3549
3550bool isIntrinsicAlwaysUniform(unsigned IntrID) {
3551 return lookupAlwaysUniform(IntrID);
3552}
3553
3555 uint8_t NumComponents,
3556 uint8_t NumFormat,
3557 const MCSubtargetInfo &STI) {
3558 return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(
3559 BitsPerComp, NumComponents, NumFormat)
3560 : isGFX10(STI)
3561 ? getGfx10BufferFormatInfo(BitsPerComp, NumComponents, NumFormat)
3562 : getGfx9BufferFormatInfo(BitsPerComp, NumComponents, NumFormat);
3563}
3564
3566 const MCSubtargetInfo &STI) {
3567 return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(Format)
3568 : isGFX10(STI) ? getGfx10BufferFormatInfo(Format)
3569 : getGfx9BufferFormatInfo(Format);
3570}
3571
3573 const MCRegisterInfo &MRI) {
3574 const unsigned VGPRClasses[] = {
3575 AMDGPU::VGPR_16RegClassID, AMDGPU::VGPR_32RegClassID,
3576 AMDGPU::VReg_64RegClassID, AMDGPU::VReg_96RegClassID,
3577 AMDGPU::VReg_128RegClassID, AMDGPU::VReg_160RegClassID,
3578 AMDGPU::VReg_192RegClassID, AMDGPU::VReg_224RegClassID,
3579 AMDGPU::VReg_256RegClassID, AMDGPU::VReg_288RegClassID,
3580 AMDGPU::VReg_320RegClassID, AMDGPU::VReg_352RegClassID,
3581 AMDGPU::VReg_384RegClassID, AMDGPU::VReg_512RegClassID,
3582 AMDGPU::VReg_1024RegClassID};
3583
3584 for (unsigned RCID : VGPRClasses) {
3585 const MCRegisterClass &RC = MRI.getRegClass(RCID);
3586 if (RC.contains(Reg))
3587 return &RC;
3588 }
3589
3590 return nullptr;
3591}
3592
3594 unsigned Enc = MRI.getEncodingValue(Reg);
3595 unsigned Idx = Enc & AMDGPU::HWEncoding::REG_IDX_MASK;
3596 return Idx >> 8;
3597}
3598
3600 const MCRegisterInfo &MRI) {
3601 unsigned Enc = MRI.getEncodingValue(Reg);
3602 unsigned Idx = Enc & AMDGPU::HWEncoding::REG_IDX_MASK;
3603 if (Idx >= 0x100)
3604 return MCRegister();
3605
3606 const MCRegisterClass *RC = getVGPRPhysRegClass(Reg, MRI);
3607 if (!RC)
3608 return MCRegister();
3609
3610 Idx |= MSBs << 8;
3611 if (RC->getID() == AMDGPU::VGPR_16RegClassID) {
3612 // This class has 2048 registers with interleaved lo16 and hi16.
3613 Idx *= 2;
3615 ++Idx;
3616 }
3617
3618 return RC->getRegister(Idx);
3619}
3620
3621static std::optional<unsigned>
3622convertSetRegImmToVgprMSBs(unsigned Imm, unsigned Simm16,
3623 bool HasSetregVGPRMSBFixup) {
3624 constexpr unsigned VGPRMSBShift =
3626
3627 auto [HwRegId, Offset, Size] = Hwreg::HwregEncoding::decode(Simm16);
3628 if (HwRegId != Hwreg::ID_MODE ||
3629 (!HasSetregVGPRMSBFixup && (Offset + Size) < VGPRMSBShift))
3630 return {};
3631 // If there is SetregVGPRMSBFixup then Offset is ignored.
3632 if (!HasSetregVGPRMSBFixup)
3633 Imm <<= Offset;
3634 Imm = (Imm & Hwreg::VGPR_MSB_MASK) >> VGPRMSBShift;
3635 if (!HasSetregVGPRMSBFixup)
3637 return llvm::rotr<uint8_t>(static_cast<uint8_t>(Imm), /*R=*/2);
3638}
3639
3640std::optional<unsigned> convertSetRegImmToVgprMSBs(const MachineInstr &MI,
3641 bool HasSetregVGPRMSBFixup) {
3642 assert(MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32);
3643 return convertSetRegImmToVgprMSBs(MI.getOperand(0).getImm(),
3644 MI.getOperand(1).getImm(),
3645 HasSetregVGPRMSBFixup);
3646}
3647
3648std::optional<unsigned> convertSetRegImmToVgprMSBs(const MCInst &MI,
3649 bool HasSetregVGPRMSBFixup) {
3650 assert(MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32_gfx12);
3651 return convertSetRegImmToVgprMSBs(MI.getOperand(0).getImm(),
3652 MI.getOperand(1).getImm(),
3653 HasSetregVGPRMSBFixup);
3654}
3655
3656std::pair<const AMDGPU::OpName *, const AMDGPU::OpName *>
3658 static const AMDGPU::OpName VOPOps[4] = {
3659 AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2,
3660 AMDGPU::OpName::vdst};
3661 static const AMDGPU::OpName VDSOps[4] = {
3662 AMDGPU::OpName::addr, AMDGPU::OpName::data0, AMDGPU::OpName::data1,
3663 AMDGPU::OpName::vdst};
3664 static const AMDGPU::OpName FLATOps[4] = {
3665 AMDGPU::OpName::vaddr, AMDGPU::OpName::vdata,
3666 AMDGPU::OpName::NUM_OPERAND_NAMES, AMDGPU::OpName::vdst};
3667 static const AMDGPU::OpName BUFOps[4] = {
3668 AMDGPU::OpName::vaddr, AMDGPU::OpName::NUM_OPERAND_NAMES,
3669 AMDGPU::OpName::NUM_OPERAND_NAMES, AMDGPU::OpName::vdata};
3670 static const AMDGPU::OpName VIMGOps[4] = {
3671 AMDGPU::OpName::vaddr0, AMDGPU::OpName::vaddr1, AMDGPU::OpName::vaddr2,
3672 AMDGPU::OpName::vdata};
3673
3674 // For VOPD instructions MSB of a corresponding Y component operand VGPR
3675 // address is supposed to match X operand, otherwise VOPD shall not be
3676 // combined.
3677 static const AMDGPU::OpName VOPDOpsX[4] = {
3678 AMDGPU::OpName::src0X, AMDGPU::OpName::vsrc1X, AMDGPU::OpName::vsrc2X,
3679 AMDGPU::OpName::vdstX};
3680 static const AMDGPU::OpName VOPDOpsY[4] = {
3681 AMDGPU::OpName::src0Y, AMDGPU::OpName::vsrc1Y, AMDGPU::OpName::vsrc2Y,
3682 AMDGPU::OpName::vdstY};
3683
3684 // VOP2 MADMK instructions use src0, imm, src1 scheme.
3685 static const AMDGPU::OpName VOP2MADMKOps[4] = {
3686 AMDGPU::OpName::src0, AMDGPU::OpName::NUM_OPERAND_NAMES,
3687 AMDGPU::OpName::src1, AMDGPU::OpName::vdst};
3688 static const AMDGPU::OpName VOPDFMAMKOpsX[4] = {
3689 AMDGPU::OpName::src0X, AMDGPU::OpName::NUM_OPERAND_NAMES,
3690 AMDGPU::OpName::vsrc1X, AMDGPU::OpName::vdstX};
3691 static const AMDGPU::OpName VOPDFMAMKOpsY[4] = {
3692 AMDGPU::OpName::src0Y, AMDGPU::OpName::NUM_OPERAND_NAMES,
3693 AMDGPU::OpName::vsrc1Y, AMDGPU::OpName::vdstY};
3694
3695 unsigned TSFlags = Desc.TSFlags;
3696
3697 if (TSFlags &
3700 switch (Desc.getOpcode()) {
3701 // LD_SCALE operands ignore MSB.
3702 case AMDGPU::V_WMMA_LD_SCALE_PAIRED_B32:
3703 case AMDGPU::V_WMMA_LD_SCALE_PAIRED_B32_gfx1250:
3704 case AMDGPU::V_WMMA_LD_SCALE16_PAIRED_B64:
3705 case AMDGPU::V_WMMA_LD_SCALE16_PAIRED_B64_gfx1250:
3706 return {};
3707 case AMDGPU::V_FMAMK_F16:
3708 case AMDGPU::V_FMAMK_F16_t16:
3709 case AMDGPU::V_FMAMK_F16_t16_gfx12:
3710 case AMDGPU::V_FMAMK_F16_fake16:
3711 case AMDGPU::V_FMAMK_F16_fake16_gfx12:
3712 case AMDGPU::V_FMAMK_F32:
3713 case AMDGPU::V_FMAMK_F32_gfx12:
3714 case AMDGPU::V_FMAMK_F64:
3715 case AMDGPU::V_FMAMK_F64_gfx1250:
3716 return {VOP2MADMKOps, nullptr};
3717 default:
3718 break;
3719 }
3720 return {VOPOps, nullptr};
3721 }
3722
3723 if (TSFlags & SIInstrFlags::DS)
3724 return {VDSOps, nullptr};
3725
3726 if (TSFlags & SIInstrFlags::FLAT)
3727 return {FLATOps, nullptr};
3728
3729 if (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))
3730 return {BUFOps, nullptr};
3731
3732 if (TSFlags & SIInstrFlags::VIMAGE)
3733 return {VIMGOps, nullptr};
3734
3735 if (AMDGPU::isVOPD(Desc.getOpcode())) {
3736 auto [OpX, OpY] = getVOPDComponents(Desc.getOpcode());
3737 return {(OpX == AMDGPU::V_FMAMK_F32) ? VOPDFMAMKOpsX : VOPDOpsX,
3738 (OpY == AMDGPU::V_FMAMK_F32) ? VOPDFMAMKOpsY : VOPDOpsY};
3739 }
3740
3741 assert(!(TSFlags & SIInstrFlags::MIMG));
3742
3743 if (TSFlags & (SIInstrFlags::VSAMPLE | SIInstrFlags::EXP))
3744 llvm_unreachable("Sample and export VGPR lowering is not implemented and"
3745 " these instructions are not expected on gfx1250");
3746
3747 return {};
3748}
3749
3750bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode) {
3751 uint64_t TSFlags = MII.get(Opcode).TSFlags;
3752
3753 if (TSFlags & SIInstrFlags::SMRD)
3754 return !getSMEMIsBuffer(Opcode);
3755 if (!(TSFlags & SIInstrFlags::FLAT))
3756 return false;
3757
3758 // Only SV and SVS modes are supported.
3759 if (TSFlags & SIInstrFlags::FlatScratch)
3760 return hasNamedOperand(Opcode, OpName::vaddr);
3761
3762 // Only GVS mode is supported.
3763 return hasNamedOperand(Opcode, OpName::vaddr) &&
3764 hasNamedOperand(Opcode, OpName::saddr);
3765
3766 return false;
3767}
3768
3769bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc, const MCInstrInfo &MII,
3770 const MCSubtargetInfo &ST) {
3771 for (auto OpName : {OpName::vdst, OpName::src0, OpName::src1, OpName::src2}) {
3772 int Idx = getNamedOperandIdx(OpDesc.getOpcode(), OpName);
3773 if (Idx == -1)
3774 continue;
3775
3776 const MCOperandInfo &OpInfo = OpDesc.operands()[Idx];
3777 int16_t RegClass = MII.getOpRegClassID(
3778 OpInfo, ST.getHwMode(MCSubtargetInfo::HwMode_RegInfo));
3779 if (RegClass == AMDGPU::VReg_64RegClassID ||
3780 RegClass == AMDGPU::VReg_64_Align2RegClassID)
3781 return true;
3782 }
3783
3784 return false;
3785}
3786
3787bool isDPALU_DPP32BitOpc(unsigned Opc) {
3788 switch (Opc) {
3789 case AMDGPU::V_MUL_LO_U32_e64:
3790 case AMDGPU::V_MUL_LO_U32_e64_dpp:
3791 case AMDGPU::V_MUL_LO_U32_e64_dpp_gfx1250:
3792 case AMDGPU::V_MUL_HI_U32_e64:
3793 case AMDGPU::V_MUL_HI_U32_e64_dpp:
3794 case AMDGPU::V_MUL_HI_U32_e64_dpp_gfx1250:
3795 case AMDGPU::V_MUL_HI_I32_e64:
3796 case AMDGPU::V_MUL_HI_I32_e64_dpp:
3797 case AMDGPU::V_MUL_HI_I32_e64_dpp_gfx1250:
3798 case AMDGPU::V_MAD_U32_e64:
3799 case AMDGPU::V_MAD_U32_e64_dpp:
3800 case AMDGPU::V_MAD_U32_e64_dpp_gfx1250:
3801 return true;
3802 default:
3803 return false;
3804 }
3805}
3806
3807bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII,
3808 const MCSubtargetInfo &ST) {
3809 if (!ST.hasFeature(AMDGPU::FeatureDPALU_DPP))
3810 return false;
3811
3812 if (isDPALU_DPP32BitOpc(OpDesc.getOpcode()))
3813 return ST.hasFeature(AMDGPU::FeatureGFX1250Insts);
3814
3815 return hasAny64BitVGPROperands(OpDesc, MII, ST);
3816}
3817
3819 if (ST.getFeatureBits().test(FeatureAddressableLocalMemorySize32768))
3820 return 64;
3821 if (ST.getFeatureBits().test(FeatureAddressableLocalMemorySize65536))
3822 return 128;
3823 if (ST.getFeatureBits().test(FeatureAddressableLocalMemorySize163840))
3824 return 320;
3825 if (ST.getFeatureBits().test(FeatureAddressableLocalMemorySize327680))
3826 return 512;
3827 return 64; // In sync with getAddressableLocalMemorySize
3828}
3829
3830bool isPackedFP32Inst(unsigned Opc) {
3831 switch (Opc) {
3832 case AMDGPU::V_PK_ADD_F32:
3833 case AMDGPU::V_PK_ADD_F32_gfx12:
3834 case AMDGPU::V_PK_MUL_F32:
3835 case AMDGPU::V_PK_MUL_F32_gfx12:
3836 case AMDGPU::V_PK_FMA_F32:
3837 case AMDGPU::V_PK_FMA_F32_gfx12:
3838 return true;
3839 default:
3840 return false;
3841 }
3842}
3843
3844bool isPacked64BitInst(unsigned Opc) {
3845 switch (Opc) {
3846 case AMDGPU::V_PK_ADD_F64:
3847 case AMDGPU::V_PK_ADD_F64_gfx1250:
3848 case AMDGPU::V_PK_MUL_F64:
3849 case AMDGPU::V_PK_MUL_F64_gfx1250:
3850 case AMDGPU::V_PK_FMA_F64:
3851 case AMDGPU::V_PK_FMA_F64_gfx1250:
3852 case AMDGPU::V_PK_MAX_NUM_F64:
3853 case AMDGPU::V_PK_MAX_NUM_F64_gfx1250:
3854 case AMDGPU::V_PK_MIN_NUM_F64:
3855 case AMDGPU::V_PK_MIN_NUM_F64_gfx1250:
3856 case AMDGPU::V_PK_ADD_NC_U64:
3857 case AMDGPU::V_PK_ADD_NC_U64_gfx1250:
3858 case AMDGPU::V_PK_SUB_NC_U64:
3859 case AMDGPU::V_PK_SUB_NC_U64_gfx1250:
3860 case AMDGPU::V_PK_LSHL_ADD_U64:
3861 case AMDGPU::V_PK_LSHL_ADD_U64_gfx1250:
3862 return true;
3863 default:
3864 return false;
3865 }
3866}
3867
3870}
3871
3872const std::array<unsigned, 3> &ClusterDimsAttr::getDims() const {
3873 assert(isFixedDims() && "expect kind to be FixedDims");
3874 return Dims;
3875}
3876
3877std::string ClusterDimsAttr::to_string() const {
3878 SmallString<10> Buffer;
3879 raw_svector_ostream OS(Buffer);
3880
3881 switch (getKind()) {
3882 case Kind::Unknown:
3883 return "";
3884 case Kind::NoCluster: {
3885 OS << EncoNoCluster << ',' << EncoNoCluster << ',' << EncoNoCluster;
3886 return Buffer.c_str();
3887 }
3888 case Kind::VariableDims: {
3889 OS << EncoVariableDims << ',' << EncoVariableDims << ','
3890 << EncoVariableDims;
3891 return Buffer.c_str();
3892 }
3893 case Kind::FixedDims: {
3894 OS << Dims[0] << ',' << Dims[1] << ',' << Dims[2];
3895 return Buffer.c_str();
3896 }
3897 }
3898 llvm_unreachable("Unknown ClusterDimsAttr kind");
3899}
3900
3902 std::optional<SmallVector<unsigned>> Attr =
3903 getIntegerVecAttribute(F, "amdgpu-cluster-dims", /*Size=*/3);
3905
3906 if (!Attr.has_value())
3907 AttrKind = Kind::Unknown;
3908 else if (all_of(*Attr, equal_to(EncoNoCluster)))
3909 AttrKind = Kind::NoCluster;
3910 else if (all_of(*Attr, equal_to(EncoVariableDims)))
3911 AttrKind = Kind::VariableDims;
3912
3913 ClusterDimsAttr A(AttrKind);
3914 if (AttrKind == Kind::FixedDims)
3915 A.Dims = {(*Attr)[0], (*Attr)[1], (*Attr)[2]};
3916
3917 return A;
3918}
3919
3920} // namespace AMDGPU
3921
3924 switch (S) {
3926 OS << "Unsupported";
3927 break;
3929 OS << "Any";
3930 break;
3932 OS << "Off";
3933 break;
3935 OS << "On";
3936 break;
3937 }
3938 return OS;
3939}
3940
3941} // namespace llvm
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static llvm::cl::opt< unsigned > DefaultAMDHSACodeObjectVersion("amdhsa-code-object-version", llvm::cl::Hidden, llvm::cl::init(llvm::AMDGPU::AMDHSA_COV6), llvm::cl::desc("Set default AMDHSA Code Object Version (module flag " "or asm directive still take priority if present)"))
#define MAP_REG2REG
Provides AMDGPU specific target descriptions.
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
IRTranslator LLVM IR MI
#define RegName(no)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
This file contains the declarations for metadata subclasses.
#define T
uint64_t High
if(PassOpts->AAPipeline)
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
#define S_00B848_MEM_ORDERED(x)
Definition SIDefines.h:1256
#define S_00B848_WGP_MODE(x)
Definition SIDefines.h:1253
#define S_00B848_FWD_PROGRESS(x)
Definition SIDefines.h:1259
This file contains some functions that are useful when dealing with strings.
static const int BlockSize
Definition TarWriter.cpp:33
static const uint32_t IV[8]
Definition blake3_impl.h:83
static ClusterDimsAttr get(const Function &F)
const std::array< unsigned, 3 > & getDims() const
TargetIDSetting getXnackSetting() const
void print(raw_ostream &OS) const
Write string representation to OS.
AMDGPUTargetID(const MCSubtargetInfo &STI)
void setTargetIDFromTargetIDStream(StringRef TargetID)
TargetIDSetting getSramEccSetting() const
unsigned getIndexInParsedOperands(unsigned CompOprIdx) const
unsigned getIndexOfSrcInParsedOperands(unsigned CompSrcIdx) const
std::optional< unsigned > getInvalidCompOperandIndex(std::function< MCRegister(unsigned, unsigned)> GetRegIdx, const MCRegisterInfo &MRI, bool SkipSrc=false, bool AllowSameVGPR=false, bool VOPD3=false) const
std::array< MCRegister, Component::MAX_OPR_NUM > RegIndices
Represents the counter values to wait for in an s_waitcnt instruction.
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
Functions, function parameters, and return types can have attributes to indicate how they should be t...
Definition Attributes.h:105
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
CallingConv::ID getCallingConv() const
LLVM_ABI bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
constexpr bool test(unsigned I) const
unsigned getAddressSpace() const
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
bool mayStore() const
Return true if this instruction could possibly modify memory.
bool mayLoad() const
Return true if this instruction could possibly read memory.
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
unsigned getOpcode() const
Return the opcode number for this descriptor.
Interface to description of machine instruction set.
Definition MCInstrInfo.h:27
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
int16_t getOpRegClassID(const MCOperandInfo &OpInfo, unsigned HwModeId) const
Return the ID of the register class to use for OpInfo, for the active HwMode HwModeId.
Definition MCInstrInfo.h:80
This holds information about one operand of a machine instruction, indicating the register class for ...
Definition MCInstrDesc.h:86
MCRegisterClass - Base class of TargetRegisterClass.
unsigned getID() const
getID() - Return the register class ID number.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
bool regsOverlap(MCRegister RegA, MCRegister RegB) const
Returns true if the two registers are equal or alias each other.
uint16_t getEncodingValue(MCRegister Reg) const
Returns the encoding for Reg.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
constexpr unsigned id() const
Definition MCRegister.h:82
Generic base class for all target subtargets.
bool hasFeature(unsigned Feature) const
const Triple & getTargetTriple() const
const FeatureBitset & getFeatureBits() const
StringRef getCPU() const
Metadata node.
Definition Metadata.h:1069
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1433
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1439
Representation of each machine instruction.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
const char * c_str()
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:882
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:730
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition StringRef.h:490
std::string str() const
Get the contents as an std::string.
Definition StringRef.h:222
constexpr bool empty() const
Check if the string is empty.
Definition StringRef.h:141
constexpr size_t size() const
Get the string size.
Definition StringRef.h:144
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition StringRef.h:270
Manages the enabling and disabling of subtarget specific features.
const std::vector< std::string > & getFeatures() const
Returns the vector of individual subtarget features.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
LLVM_ABI StringRef getVendorName() const
Get the vendor (second) component of the triple.
Definition Triple.cpp:1659
LLVM_ABI StringRef getOSName() const
Get the operating system (third) component of the triple.
Definition Triple.cpp:1664
OSType getOS() const
Get the parsed operating system type of this triple.
Definition Triple.h:445
ArchType getArch() const
Get the parsed architecture type of this triple.
Definition Triple.h:436
LLVM_ABI StringRef getEnvironmentName() const
Get the optional environment (fourth) component of the triple, or "" if empty.
Definition Triple.cpp:1670
bool isAMDGCN() const
Tests whether the target is AMDGCN.
Definition Triple.h:908
LLVM_ABI StringRef getArchName() const
Get the architecture (first) component of the triple.
Definition Triple.cpp:1655
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an std::string.
A raw_ostream that writes to an SmallVector or SmallString.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
unsigned decodeFieldVaVcc(unsigned Encoded)
unsigned encodeFieldVaVcc(unsigned Encoded, unsigned VaVcc)
unsigned decodeFieldHoldCnt(unsigned Encoded, const IsaVersion &Version)
bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val, bool &IsDefault, const MCSubtargetInfo &STI)
unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt, const IsaVersion &Version)
unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc)
unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst)
unsigned decodeFieldSaSdst(unsigned Encoded)
unsigned getHoldCntBitMask(const IsaVersion &Version)
unsigned decodeFieldVaSdst(unsigned Encoded)
unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc)
unsigned decodeFieldVaSsrc(unsigned Encoded)
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
const CustomOperandVal DepCtrInfo[]
bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal, const MCSubtargetInfo &STI)
unsigned decodeFieldVaVdst(unsigned Encoded)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
unsigned decodeFieldVmVsrc(unsigned Encoded)
unsigned encodeFieldVaSdst(unsigned Encoded, unsigned VaSdst)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
static constexpr ExpTgt ExpTgtInfo[]
bool getTgtName(unsigned Id, StringRef &Name, int &Index)
unsigned getTgtId(const StringRef Name)
constexpr uint32_t VersionMinor
HSA metadata minor version.
constexpr uint32_t VersionMajor
HSA metadata major version.
unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo &STI, unsigned NumVGPRs, unsigned DynamicVGPRBlockSize)
unsigned getSGPRAllocGranule(const MCSubtargetInfo &STI)
unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo &STI)
unsigned getArchVGPRAllocGranule()
For subtargets with a unified VGPR file and mixed ArchVGPR/AGPR usage, returns the allocation granule...
unsigned getEUsPerCU(const MCSubtargetInfo &STI)
unsigned getTotalNumSGPRs(const MCSubtargetInfo &STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo &STI)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo &STI)
unsigned getAddressableLocalMemorySize(const MCSubtargetInfo &STI)
unsigned getVGPREncodingGranule(const MCSubtargetInfo &STI, std::optional< bool > EnableWavefrontSize32)
unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo &STI, unsigned NumVGPRs, std::optional< bool > EnableWavefrontSize32)
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo &STI, unsigned FlatWorkGroupSize)
unsigned getMinNumSGPRs(const MCSubtargetInfo &STI, unsigned WavesPerEU)
unsigned getMaxNumSGPRs(const MCSubtargetInfo &STI, unsigned WavesPerEU, bool Addressable)
unsigned getWavefrontSize(const MCSubtargetInfo &STI)
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo &STI, unsigned FlatWorkGroupSize)
unsigned getInstCacheLineSize(const MCSubtargetInfo &STI)
static constexpr unsigned MaxDynamicVGPRBlocks
Maximum number of VGPR blocks that can be allocated in dynamic VGPR mode.
unsigned getSGPREncodingGranule(const MCSubtargetInfo &STI)
unsigned getTotalNumVGPRs(const MCSubtargetInfo &STI)
unsigned getMinNumVGPRs(const MCSubtargetInfo &STI, unsigned WavesPerEU, unsigned DynamicVGPRBlockSize)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo &STI, unsigned DynamicVGPRBlockSize)
unsigned getWavesPerWorkGroup(const MCSubtargetInfo &STI, unsigned FlatWorkGroupSize)
unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo &STI, unsigned NumVGPRs, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
static TargetIDSetting getTargetIDSettingFromFeatureString(StringRef FeatureString)
unsigned getNumSGPRBlocks(const MCSubtargetInfo &STI, unsigned NumSGPRs)
unsigned getMaxWavesPerEU(const MCSubtargetInfo &STI)
unsigned getNumExtraSGPRs(const MCSubtargetInfo &STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
unsigned getLocalMemorySize(const MCSubtargetInfo &STI)
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves, AMDGPUSubtarget::Generation Gen)
unsigned getMaxNumVGPRs(const MCSubtargetInfo &STI, unsigned WavesPerEU, unsigned DynamicVGPRBlockSize)
static unsigned getGranulatedNumRegisterBlocks(unsigned NumRegs, unsigned Granule)
unsigned getVGPRAllocGranule(const MCSubtargetInfo &STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
unsigned getMinWavesPerEU(const MCSubtargetInfo &STI)
StringLiteral const UfmtSymbolicGFX11[]
bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI)
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI)
unsigned const DfmtNfmt2UFmtGFX10[]
StringLiteral const DfmtSymbolic[]
static StringLiteral const * getNfmtLookupTable(const MCSubtargetInfo &STI)
bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI)
StringLiteral const NfmtSymbolicGFX10[]
bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI)
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
StringRef getDfmtName(unsigned Id)
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI)
unsigned const DfmtNfmt2UFmtGFX11[]
StringLiteral const NfmtSymbolicVI[]
StringLiteral const NfmtSymbolicSICI[]
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
int64_t getDfmt(const StringRef Name)
StringLiteral const UfmtSymbolicGFX10[]
void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt)
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId, uint16_t &StreamId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgDoesNotUseM0(int64_t MsgId, const MCSubtargetInfo &STI)
Returns true if the message does not use the m0 operand.
StringRef getMsgOpName(int64_t MsgId, uint64_t Encoding, const MCSubtargetInfo &STI)
Map from an encoding to the symbolic name for a sendmsg operation.
static uint64_t getMsgIdMask(const MCSubtargetInfo &STI)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
constexpr unsigned VOPD_VGPR_BANK_MASKS[]
constexpr unsigned COMPONENTS_NUM
constexpr unsigned VOPD3_VGPR_BANK_MASKS[]
bool isPackedFP32Inst(unsigned Opc)
bool isGCN3Encoding(const MCSubtargetInfo &STI)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
bool isInlineValue(MCRegister Reg)
bool isGFX10_GFX11(const MCSubtargetInfo &STI)
bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType)
bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset)
Convert ByteOffset to dwords if the subtarget uses dword SMRD immediate offsets.
static unsigned encodeStorecnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Storecnt)
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
static bool hasSMEMByteOffset(const MCSubtargetInfo &ST)
bool isVOPCAsmOnly(unsigned Opc)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool getMTBUFHasSrsrc(unsigned Opc)
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool getWMMAIsXDL(unsigned Opc)
static std::optional< unsigned > convertSetRegImmToVgprMSBs(unsigned Imm, unsigned Simm16, bool HasSetregVGPRMSBFixup)
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
static bool isSymbolicCustomOperandEncoding(const CustomOperandVal *Opr, int Size, unsigned Code, bool &HasNonDefaultVal, const MCSubtargetInfo &STI)
bool isGFX10Before1030(const MCSubtargetInfo &STI)
bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo)
Does this operand support only inlinable literals?
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
const int OPR_ID_UNSUPPORTED
void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &KernelCode, const MCSubtargetInfo &STI)
bool shouldEmitConstantsToTextSection(const Triple &TT)
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isDPMACCInstruction(unsigned Opc)
int getMTBUFElements(unsigned Opc)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
static int encodeCustomOperandVal(const CustomOperandVal &Op, int64_t InputVal)
unsigned getTemporalHintType(const MCInstrDesc TID)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
static constexpr std::array< CanBeVOPD, 1<< VOPDXYKeyBits > buildVOPDXYLookup()
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
std::optional< unsigned > getInlineEncodingV216(bool IsFloat, uint32_t Literal)
FPType getFPDstSelType(unsigned Opc)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
const MCRegisterClass * getVGPRPhysRegClass(MCRegister Reg, const MCRegisterInfo &MRI)
unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded)
bool getHasMatrixScale(unsigned Opc)
bool hasPackedD16(const MCSubtargetInfo &STI)
unsigned getStorecntBitMask(const IsaVersion &Version)
unsigned getLdsDwGranularity(const MCSubtargetInfo &ST)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
bool isGFX10_3_GFX11(const MCSubtargetInfo &STI)
bool isGFX13(const MCSubtargetInfo &STI)
unsigned getAsynccntBitMask(const IsaVersion &Version)
bool hasValueInRangeLikeMetadata(const MDNode &MD, int64_t Val)
Checks if Val is inside MD, a !range-like metadata.
uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal)
constexpr unsigned VOPDXYKeyBits
unsigned getVOPDOpcode(unsigned Opc, bool VOPD3)
bool isGroupSegment(const GlobalValue *GV)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
bool getMTBUFHasSoffset(unsigned Opc)
bool hasXNACK(const MCSubtargetInfo &STI)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
static unsigned getCombinedCountBitMask(const IsaVersion &Version, bool IsStore)
CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)
bool isVOPC64DPP(unsigned Opc)
int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements)
bool getMAIIsGFX940XDL(unsigned Opc)
bool isSI(const MCSubtargetInfo &STI)
unsigned getDefaultAMDHSACodeObjectVersion()
bool isReadOnlySegment(const GlobalValue *GV)
Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded)
bool isArgPassedInSGPR(const Argument *A)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
int getMUBUFBaseOpcode(unsigned Opc)
unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded)
unsigned getAMDHSACodeObjectVersion(const Module &M)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool getVOP3IsSingle(unsigned Opc)
bool isGFX9(const MCSubtargetInfo &STI)
bool isDPALU_DPP32BitOpc(unsigned Opc)
bool getVOP1IsSingle(unsigned Opc)
static bool isDwordAligned(uint64_t ByteOffset)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool getHasColorExport(const Function &F)
int getMTBUFBaseOpcode(unsigned Opc)
bool isGFX90A(const MCSubtargetInfo &STI)
unsigned getSamplecntBitMask(const IsaVersion &Version)
unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion)
std::tuple< char, unsigned, unsigned > parseAsmPhysRegName(StringRef RegName)
Returns a valid charcode or 0 in the first entry if this is a valid physical register name.
bool getHasDepthExport(const Function &F)
bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI)
bool getMUBUFHasVAddr(unsigned Opc)
bool isTrue16Inst(unsigned Opc)
unsigned getVGPREncodingMSBs(MCRegister Reg, const MCRegisterInfo &MRI)
std::pair< unsigned, unsigned > getVOPDComponents(unsigned VOPDOpcode)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned getInitialPSInputAddr(const Function &F)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool isAsyncStore(unsigned Opc)
unsigned getDynamicVGPRBlockSize(const Function &F)
unsigned getKmcntBitMask(const IsaVersion &Version)
MCRegister getVGPRWithMSBs(MCRegister Reg, unsigned MSBs, const MCRegisterInfo &MRI)
If Reg is a low VGPR return a corresponding high VGPR with MSBs set.
unsigned getVmcntBitMask(const IsaVersion &Version)
bool isNotGFX10Plus(const MCSubtargetInfo &STI)
bool hasMAIInsts(const MCSubtargetInfo &STI)
unsigned getBitOp2(unsigned Opc)
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
unsigned getXcntBitMask(const IsaVersion &Version)
bool isGenericAtomic(unsigned Opc)
const MFMA_F8F6F4_Info * getWMMA_F8F6F4_WithFormatArgs(unsigned FmtA, unsigned FmtB, unsigned F8F8Opcode)
bool isGFX8Plus(const MCSubtargetInfo &STI)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
unsigned getLgkmcntBitMask(const IsaVersion &Version)
bool getMUBUFTfe(unsigned Opc)
unsigned getBvhcntBitMask(const IsaVersion &Version)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasGFX10_3Insts(const MCSubtargetInfo &STI)
unsigned decodeDscnt(const IsaVersion &Version, unsigned Waitcnt)
std::pair< const AMDGPU::OpName *, const AMDGPU::OpName * > getVGPRLoweringOperandTables(const MCInstrDesc &Desc)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements)
bool isGFX13Plus(const MCSubtargetInfo &STI)
unsigned getExpcntBitMask(const IsaVersion &Version)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
int32_t getMCOpcode(uint32_t Opcode, unsigned Gen)
bool getMUBUFHasSoffset(unsigned Opc)
bool isNotGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
std::optional< unsigned > getInlineEncodingV2F16(uint32_t Literal)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
std::tuple< char, unsigned, unsigned > parseAsmConstraintPhysReg(StringRef Constraint)
Returns a valid charcode or 0 in the first entry if this is a valid physical register constraint.
unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion)
static unsigned getDefaultCustomOperandEncoding(const CustomOperandVal *Opr, int Size, const MCSubtargetInfo &STI)
bool isPackedFP32or64BitInst(unsigned Opc)
static unsigned encodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Loadcnt)
bool isGFX10Plus(const MCSubtargetInfo &STI)
static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size, unsigned Code, int &Idx, StringRef &Name, unsigned &Val, bool &IsDefault, const MCSubtargetInfo &STI)
static bool isValidRegPrefix(char C)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
bool isGlobalSegment(const GlobalValue *GV)
int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit)
bool isValidWMMAScaleFmtCombination(unsigned AFmt, unsigned AScale, unsigned BFmt, unsigned BScale)
@ OPERAND_REG_IMM_V2FP64
Definition SIDefines.h:220
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition SIDefines.h:238
@ OPERAND_REG_INLINE_C_LAST
Definition SIDefines.h:261
@ OPERAND_REG_IMM_V2FP16
Definition SIDefines.h:213
@ OPERAND_REG_INLINE_C_FP64
Definition SIDefines.h:229
@ OPERAND_REG_INLINE_C_BF16
Definition SIDefines.h:226
@ OPERAND_REG_INLINE_C_V2BF16
Definition SIDefines.h:231
@ OPERAND_REG_IMM_V2INT16
Definition SIDefines.h:215
@ OPERAND_REG_IMM_BF16
Definition SIDefines.h:210
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
Definition SIDefines.h:205
@ OPERAND_REG_IMM_V2BF16
Definition SIDefines.h:212
@ OPERAND_REG_INLINE_AC_FIRST
Definition SIDefines.h:263
@ OPERAND_REG_IMM_FP16
Definition SIDefines.h:211
@ OPERAND_REG_IMM_V2FP16_SPLAT
Definition SIDefines.h:214
@ OPERAND_REG_IMM_NOINLINE_V2FP16
Definition SIDefines.h:217
@ OPERAND_REG_IMM_FP64
Definition SIDefines.h:209
@ OPERAND_REG_INLINE_C_V2FP16
Definition SIDefines.h:232
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
Definition SIDefines.h:243
@ OPERAND_REG_INLINE_AC_FP32
Definition SIDefines.h:244
@ OPERAND_REG_IMM_V2INT32
Definition SIDefines.h:218
@ OPERAND_REG_IMM_FP32
Definition SIDefines.h:208
@ OPERAND_REG_INLINE_C_FIRST
Definition SIDefines.h:260
@ OPERAND_REG_INLINE_C_FP32
Definition SIDefines.h:228
@ OPERAND_REG_INLINE_AC_LAST
Definition SIDefines.h:264
@ OPERAND_REG_INLINE_C_INT32
Definition SIDefines.h:224
@ OPERAND_REG_INLINE_C_V2INT16
Definition SIDefines.h:230
@ OPERAND_REG_IMM_V2FP32
Definition SIDefines.h:219
@ OPERAND_REG_INLINE_AC_FP64
Definition SIDefines.h:245
@ OPERAND_REG_INLINE_C_FP16
Definition SIDefines.h:227
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
Definition SIDefines.h:235
std::optional< unsigned > getPKFMACF16InlineEncoding(uint32_t Literal, bool IsGFX11Plus)
bool isNotGFX9Plus(const MCSubtargetInfo &STI)
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
static constexpr unsigned getVOPDXYKey(unsigned VOPDOp, unsigned Subtarget, bool VOPD3)
constexpr auto VOPDXYLookup
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc)
std::optional< unsigned > getInlineEncodingV2I16(uint32_t Literal)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
unsigned encodeStorecntDscnt(const IsaVersion &Version, const Waitcnt &Decoded)
bool isGFX1250(const MCSubtargetInfo &STI)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
bool isTensorStore(unsigned Opc)
bool getMUBUFIsBufferInv(unsigned Opc)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
MCRegister mc2PseudoReg(MCRegister Reg)
Convert hardware register Reg to a pseudo register.
std::optional< unsigned > getInlineEncodingV2BF16(uint32_t Literal)
static int encodeCustomOperand(const CustomOperandVal *Opr, int Size, const StringRef Name, int64_t InputVal, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool supportsWGP(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
bool getVOP2IsSingle(unsigned Opc)
bool getMAIIsDGEMM(unsigned Opc)
Returns true if MAI operation is a double precision GEMM.
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const int OPR_ID_UNKNOWN
unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion)
SmallVector< unsigned > getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size, unsigned DefaultVal)
bool isPacked64BitInst(unsigned Opc)
unsigned decodeStorecnt(const IsaVersion &Version, unsigned Waitcnt)
bool isGFX1250Plus(const MCSubtargetInfo &STI)
int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels)
bool isNotGFX12Plus(const MCSubtargetInfo &STI)
bool getMTBUFHasVAddr(unsigned Opc)
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion)
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
unsigned getLoadcntBitMask(const IsaVersion &Version)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily, bool VOPD3)
static unsigned encodeDscnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Dscnt)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
const MFMA_F8F6F4_Info * getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ, unsigned BLGP, unsigned F8F8Opcode)
unsigned decodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion)
bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI)
bool isGFX9_GFX10(const MCSubtargetInfo &STI)
int getMUBUFElements(unsigned Opc)
const GcnBufferFormatInfo * getGcnBufferFormatInfo(uint8_t BitsPerComp, uint8_t NumComponents, uint8_t NumFormat, const MCSubtargetInfo &STI)
unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc)
bool isPermlane16(unsigned Opc)
bool getMUBUFHasSrsrc(unsigned Opc)
unsigned getDscntBitMask(const IsaVersion &Version)
bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_Gfx
Used for AMD graphics targets.
@ AMDGPU_CS_ChainPreserve
Used on AMDGPUs to give the middle-end more control over argument placement.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_CS_Chain
Used on AMDGPUs to give the middle-end more control over argument placement.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ ELFABIVERSION_AMDGPU_HSA_V4
Definition ELF.h:384
@ ELFABIVERSION_AMDGPU_HSA_V5
Definition ELF.h:385
@ ELFABIVERSION_AMDGPU_HSA_V6
Definition ELF.h:386
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract_or_null(Y &&MD)
Extract a Value from Metadata, allowing null.
Definition Metadata.h:683
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:668
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
@ Offset
Definition DWP.cpp:558
constexpr T rotr(T V, int R)
Definition bit.h:399
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition Error.h:198
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:546
std::string utostr(uint64_t X, bool isNeg=false)
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
Definition STLExtras.h:2172
Op::Description Desc
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:334
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:150
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:155
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
To bit_cast(const From &from) noexcept
Definition bit.h:90
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
constexpr int countr_zero_constexpr(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:190
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
@ AlwaysUniform
The result value is always uniform.
Definition Uniformity.h:23
@ Default
The result value is uniform if and only if all operands are uniform.
Definition Uniformity.h:20
#define N
AMD Kernel Code Object (amd_kernel_code_t).
static std::tuple< typename Fields::ValueType... > decode(uint64_t Encoded)
Instruction set architecture version.