LLVM 23.0.0git
AMDGPUBaseInfo.cpp
Go to the documentation of this file.
1//===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDGPUBaseInfo.h"
10#include "AMDGPU.h"
11#include "AMDGPUAsmUtils.h"
12#include "AMDKernelCodeT.h"
17#include "llvm/IR/Attributes.h"
18#include "llvm/IR/Constants.h"
19#include "llvm/IR/Function.h"
20#include "llvm/IR/GlobalValue.h"
21#include "llvm/IR/IntrinsicsAMDGPU.h"
22#include "llvm/IR/IntrinsicsR600.h"
23#include "llvm/IR/LLVMContext.h"
24#include "llvm/IR/Metadata.h"
25#include "llvm/MC/MCInstrInfo.h"
30#include <optional>
31
32#define GET_INSTRINFO_NAMED_OPS
33#define GET_INSTRMAP_INFO
34#include "AMDGPUGenInstrInfo.inc"
35
37 "amdhsa-code-object-version", llvm::cl::Hidden,
39 llvm::cl::desc("Set default AMDHSA Code Object Version (module flag "
40 "or asm directive still take priority if present)"));
41
42namespace {
43
44/// \returns Bit mask for given bit \p Shift and bit \p Width.
45unsigned getBitMask(unsigned Shift, unsigned Width) {
46 return ((1 << Width) - 1) << Shift;
47}
48
49/// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
50///
51/// \returns Packed \p Dst.
52unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
53 unsigned Mask = getBitMask(Shift, Width);
54 return ((Src << Shift) & Mask) | (Dst & ~Mask);
55}
56
57/// Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
58///
59/// \returns Unpacked bits.
60unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
61 return (Src & getBitMask(Shift, Width)) >> Shift;
62}
63
64/// \returns Vmcnt bit shift (lower bits).
65unsigned getVmcntBitShiftLo(unsigned VersionMajor) {
66 return VersionMajor >= 11 ? 10 : 0;
67}
68
69/// \returns Vmcnt bit width (lower bits).
70unsigned getVmcntBitWidthLo(unsigned VersionMajor) {
71 return VersionMajor >= 11 ? 6 : 4;
72}
73
74/// \returns Expcnt bit shift.
75unsigned getExpcntBitShift(unsigned VersionMajor) {
76 return VersionMajor >= 11 ? 0 : 4;
77}
78
79/// \returns Expcnt bit width.
80unsigned getExpcntBitWidth(unsigned VersionMajor) { return 3; }
81
82/// \returns Lgkmcnt bit shift.
83unsigned getLgkmcntBitShift(unsigned VersionMajor) {
84 return VersionMajor >= 11 ? 4 : 8;
85}
86
87/// \returns Lgkmcnt bit width.
88unsigned getLgkmcntBitWidth(unsigned VersionMajor) {
89 return VersionMajor >= 10 ? 6 : 4;
90}
91
92/// \returns Vmcnt bit shift (higher bits).
93unsigned getVmcntBitShiftHi(unsigned VersionMajor) { return 14; }
94
95/// \returns Vmcnt bit width (higher bits).
96unsigned getVmcntBitWidthHi(unsigned VersionMajor) {
97 return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0;
98}
99
100/// \returns Loadcnt bit width
101unsigned getLoadcntBitWidth(unsigned VersionMajor) {
102 return VersionMajor >= 12 ? 6 : 0;
103}
104
105/// \returns Samplecnt bit width.
106unsigned getSamplecntBitWidth(unsigned VersionMajor) {
107 return VersionMajor >= 12 ? 6 : 0;
108}
109
110/// \returns Bvhcnt bit width.
111unsigned getBvhcntBitWidth(unsigned VersionMajor) {
112 return VersionMajor >= 12 ? 3 : 0;
113}
114
115/// \returns Dscnt bit width.
116unsigned getDscntBitWidth(unsigned VersionMajor) {
117 return VersionMajor >= 12 ? 6 : 0;
118}
119
120/// \returns Dscnt bit shift in combined S_WAIT instructions.
121unsigned getDscntBitShift(unsigned VersionMajor) { return 0; }
122
123/// \returns Storecnt or Vscnt bit width, depending on VersionMajor.
124unsigned getStorecntBitWidth(unsigned VersionMajor) {
125 return VersionMajor >= 10 ? 6 : 0;
126}
127
128/// \returns Kmcnt bit width.
129unsigned getKmcntBitWidth(unsigned VersionMajor) {
130 return VersionMajor >= 12 ? 5 : 0;
131}
132
133/// \returns Xcnt bit width.
134unsigned getXcntBitWidth(unsigned VersionMajor, unsigned VersionMinor) {
135 return VersionMajor == 12 && VersionMinor == 5 ? 6 : 0;
136}
137
138/// \returns Asynccnt bit width.
139unsigned getAsynccntBitWidth(unsigned VersionMajor, unsigned VersionMinor) {
140 return VersionMajor == 12 && VersionMinor == 5 ? 6 : 0;
141}
142
143/// \returns shift for Loadcnt/Storecnt in combined S_WAIT instructions.
144unsigned getLoadcntStorecntBitShift(unsigned VersionMajor) {
145 return VersionMajor >= 12 ? 8 : 0;
146}
147
148/// \returns VaSdst bit width
149inline unsigned getVaSdstBitWidth() { return 3; }
150
151/// \returns VaSdst bit shift
152inline unsigned getVaSdstBitShift() { return 9; }
153
154/// \returns VmVsrc bit width
155inline unsigned getVmVsrcBitWidth() { return 3; }
156
157/// \returns VmVsrc bit shift
158inline unsigned getVmVsrcBitShift() { return 2; }
159
160/// \returns VaVdst bit width
161inline unsigned getVaVdstBitWidth() { return 4; }
162
163/// \returns VaVdst bit shift
164inline unsigned getVaVdstBitShift() { return 12; }
165
166/// \returns VaVcc bit width
167inline unsigned getVaVccBitWidth() { return 1; }
168
169/// \returns VaVcc bit shift
170inline unsigned getVaVccBitShift() { return 1; }
171
172/// \returns SaSdst bit width
173inline unsigned getSaSdstBitWidth() { return 1; }
174
175/// \returns SaSdst bit shift
176inline unsigned getSaSdstBitShift() { return 0; }
177
178/// \returns VaSsrc width
179inline unsigned getVaSsrcBitWidth() { return 1; }
180
181/// \returns VaSsrc bit shift
182inline unsigned getVaSsrcBitShift() { return 8; }
183
184/// \returns HoldCnt bit shift
185inline unsigned getHoldCntWidth(unsigned VersionMajor, unsigned VersionMinor) {
186 static constexpr const unsigned MinMajor = 10;
187 static constexpr const unsigned MinMinor = 3;
188 return std::tie(VersionMajor, VersionMinor) >= std::tie(MinMajor, MinMinor)
189 ? 1
190 : 0;
191}
192
193/// \returns HoldCnt bit shift
194inline unsigned getHoldCntBitShift() { return 7; }
195
196} // end anonymous namespace
197
198namespace llvm {
199
200namespace AMDGPU {
201
202/// \returns true if the target supports signed immediate offset for SMRD
203/// instructions.
205 return isGFX9Plus(ST);
206}
207
208/// \returns True if \p STI is AMDHSA.
209bool isHsaAbi(const MCSubtargetInfo &STI) {
210 return STI.getTargetTriple().getOS() == Triple::AMDHSA;
211}
212
215 M.getModuleFlag("amdhsa_code_object_version"))) {
216 return (unsigned)Ver->getZExtValue() / 100;
217 }
218
220}
221
225
226unsigned getAMDHSACodeObjectVersion(unsigned ABIVersion) {
227 switch (ABIVersion) {
229 return 4;
231 return 5;
233 return 6;
234 default:
236 }
237}
238
239uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion) {
240 if (T.getOS() != Triple::AMDHSA)
241 return 0;
242
243 switch (CodeObjectVersion) {
244 case 4:
246 case 5:
248 case 6:
250 default:
251 report_fatal_error("Unsupported AMDHSA Code Object Version " +
252 Twine(CodeObjectVersion));
253 }
254}
255
256unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) {
257 switch (CodeObjectVersion) {
258 case AMDHSA_COV4:
259 return 48;
260 case AMDHSA_COV5:
261 case AMDHSA_COV6:
262 default:
264 }
265}
266
267// FIXME: All such magic numbers about the ABI should be in a
268// central TD file.
269unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) {
270 switch (CodeObjectVersion) {
271 case AMDHSA_COV4:
272 return 24;
273 case AMDHSA_COV5:
274 case AMDHSA_COV6:
275 default:
277 }
278}
279
280unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) {
281 switch (CodeObjectVersion) {
282 case AMDHSA_COV4:
283 return 32;
284 case AMDHSA_COV5:
285 case AMDHSA_COV6:
286 default:
288 }
289}
290
291unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion) {
292 switch (CodeObjectVersion) {
293 case AMDHSA_COV4:
294 return 40;
295 case AMDHSA_COV5:
296 case AMDHSA_COV6:
297 default:
299 }
300}
301
302#define GET_MIMGBaseOpcodesTable_IMPL
303#define GET_MIMGDimInfoTable_IMPL
304#define GET_MIMGInfoTable_IMPL
305#define GET_MIMGLZMappingTable_IMPL
306#define GET_MIMGMIPMappingTable_IMPL
307#define GET_MIMGBiasMappingTable_IMPL
308#define GET_MIMGOffsetMappingTable_IMPL
309#define GET_MIMGG16MappingTable_IMPL
310#define GET_MAIInstInfoTable_IMPL
311#define GET_WMMAInstInfoTable_IMPL
312#include "AMDGPUGenSearchableTables.inc"
313
314int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
315 unsigned VDataDwords, unsigned VAddrDwords) {
316 const MIMGInfo *Info =
317 getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding, VDataDwords, VAddrDwords);
318 return Info ? Info->Opcode : -1;
319}
320
322 const MIMGInfo *Info = getMIMGInfo(Opc);
323 return Info ? getMIMGBaseOpcodeInfo(Info->BaseOpcode) : nullptr;
324}
325
326int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
327 const MIMGInfo *OrigInfo = getMIMGInfo(Opc);
328 const MIMGInfo *NewInfo =
329 getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding,
330 NewChannels, OrigInfo->VAddrDwords);
331 return NewInfo ? NewInfo->Opcode : -1;
332}
333
334unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
335 const MIMGDimInfo *Dim, bool IsA16,
336 bool IsG16Supported) {
337 unsigned AddrWords = BaseOpcode->NumExtraArgs;
338 unsigned AddrComponents = (BaseOpcode->Coordinates ? Dim->NumCoords : 0) +
339 (BaseOpcode->LodOrClampOrMip ? 1 : 0);
340 if (IsA16)
341 AddrWords += divideCeil(AddrComponents, 2);
342 else
343 AddrWords += AddrComponents;
344
345 // Note: For subtargets that support A16 but not G16, enabling A16 also
346 // enables 16 bit gradients.
347 // For subtargets that support A16 (operand) and G16 (done with a different
348 // instruction encoding), they are independent.
349
350 if (BaseOpcode->Gradients) {
351 if ((IsA16 && !IsG16Supported) || BaseOpcode->G16)
352 // There are two gradients per coordinate, we pack them separately.
353 // For the 3d case,
354 // we get (dy/du, dx/du) (-, dz/du) (dy/dv, dx/dv) (-, dz/dv)
355 AddrWords += alignTo<2>(Dim->NumGradients / 2);
356 else
357 AddrWords += Dim->NumGradients;
358 }
359 return AddrWords;
360}
361
372
381
386
391
395
399
403
408
416
421
424 bool IsX;
425 bool IsY;
426};
427
428#define GET_FP4FP8DstByteSelTable_DECL
429#define GET_FP4FP8DstByteSelTable_IMPL
430
435
441
442#define GET_DPMACCInstructionTable_DECL
443#define GET_DPMACCInstructionTable_IMPL
444#define GET_MTBUFInfoTable_DECL
445#define GET_MTBUFInfoTable_IMPL
446#define GET_MUBUFInfoTable_DECL
447#define GET_MUBUFInfoTable_IMPL
448#define GET_SMInfoTable_DECL
449#define GET_SMInfoTable_IMPL
450#define GET_VOP1InfoTable_DECL
451#define GET_VOP1InfoTable_IMPL
452#define GET_VOP2InfoTable_DECL
453#define GET_VOP2InfoTable_IMPL
454#define GET_VOP3InfoTable_DECL
455#define GET_VOP3InfoTable_IMPL
456#define GET_VOPC64DPPTable_DECL
457#define GET_VOPC64DPPTable_IMPL
458#define GET_VOPC64DPP8Table_DECL
459#define GET_VOPC64DPP8Table_IMPL
460#define GET_VOPCAsmOnlyInfoTable_DECL
461#define GET_VOPCAsmOnlyInfoTable_IMPL
462#define GET_VOP3CAsmOnlyInfoTable_DECL
463#define GET_VOP3CAsmOnlyInfoTable_IMPL
464#define GET_VOPDComponentTable_DECL
465#define GET_VOPDComponentTable_IMPL
466#define GET_VOPDPairs_DECL
467#define GET_VOPDPairs_IMPL
468#define GET_VOPDXYTable_DECL
469#define GET_VOPDXYTable_IMPL
470#define GET_VOPTrue16Table_DECL
471#define GET_VOPTrue16Table_IMPL
472#define GET_True16D16Table_IMPL
473#define GET_WMMAOpcode2AddrMappingTable_DECL
474#define GET_WMMAOpcode2AddrMappingTable_IMPL
475#define GET_WMMAOpcode3AddrMappingTable_DECL
476#define GET_WMMAOpcode3AddrMappingTable_IMPL
477#define GET_getMFMA_F8F6F4_WithSize_DECL
478#define GET_getMFMA_F8F6F4_WithSize_IMPL
479#define GET_isMFMA_F8F6F4Table_IMPL
480#define GET_isCvtScaleF32_F32F16ToF8F4Table_IMPL
481
482#include "AMDGPUGenSearchableTables.inc"
483
484int getMTBUFBaseOpcode(unsigned Opc) {
485 const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opc);
486 return Info ? Info->BaseOpcode : -1;
487}
488
489int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) {
490 const MTBUFInfo *Info =
491 getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
492 return Info ? Info->Opcode : -1;
493}
494
495int getMTBUFElements(unsigned Opc) {
496 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
497 return Info ? Info->elements : 0;
498}
499
500bool getMTBUFHasVAddr(unsigned Opc) {
501 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
502 return Info && Info->has_vaddr;
503}
504
505bool getMTBUFHasSrsrc(unsigned Opc) {
506 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
507 return Info && Info->has_srsrc;
508}
509
510bool getMTBUFHasSoffset(unsigned Opc) {
511 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
512 return Info && Info->has_soffset;
513}
514
515int getMUBUFBaseOpcode(unsigned Opc) {
516 const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc);
517 return Info ? Info->BaseOpcode : -1;
518}
519
520int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements) {
521 const MUBUFInfo *Info =
522 getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
523 return Info ? Info->Opcode : -1;
524}
525
526int getMUBUFElements(unsigned Opc) {
527 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
528 return Info ? Info->elements : 0;
529}
530
531bool getMUBUFHasVAddr(unsigned Opc) {
532 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
533 return Info && Info->has_vaddr;
534}
535
536bool getMUBUFHasSrsrc(unsigned Opc) {
537 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
538 return Info && Info->has_srsrc;
539}
540
541bool getMUBUFHasSoffset(unsigned Opc) {
542 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
543 return Info && Info->has_soffset;
544}
545
546bool getMUBUFIsBufferInv(unsigned Opc) {
547 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
548 return Info && Info->IsBufferInv;
549}
550
551bool getMUBUFTfe(unsigned Opc) {
552 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
553 return Info && Info->tfe;
554}
555
556bool getSMEMIsBuffer(unsigned Opc) {
557 const SMInfo *Info = getSMEMOpcodeHelper(Opc);
558 return Info && Info->IsBuffer;
559}
560
561bool getVOP1IsSingle(unsigned Opc) {
562 const VOPInfo *Info = getVOP1OpcodeHelper(Opc);
563 return !Info || Info->IsSingle;
564}
565
566bool getVOP2IsSingle(unsigned Opc) {
567 const VOPInfo *Info = getVOP2OpcodeHelper(Opc);
568 return !Info || Info->IsSingle;
569}
570
571bool getVOP3IsSingle(unsigned Opc) {
572 const VOPInfo *Info = getVOP3OpcodeHelper(Opc);
573 return !Info || Info->IsSingle;
574}
575
576bool isVOPC64DPP(unsigned Opc) {
577 return isVOPC64DPPOpcodeHelper(Opc) || isVOPC64DPP8OpcodeHelper(Opc);
578}
579
580bool isVOPCAsmOnly(unsigned Opc) { return isVOPCAsmOnlyOpcodeHelper(Opc); }
581
582bool getMAIIsDGEMM(unsigned Opc) {
583 const MAIInstInfo *Info = getMAIInstInfoHelper(Opc);
584 return Info && Info->is_dgemm;
585}
586
587bool getMAIIsGFX940XDL(unsigned Opc) {
588 const MAIInstInfo *Info = getMAIInstInfoHelper(Opc);
589 return Info && Info->is_gfx940_xdl;
590}
591
592bool getWMMAIsXDL(unsigned Opc) {
593 const WMMAInstInfo *Info = getWMMAInstInfoHelper(Opc);
594 return Info ? Info->is_wmma_xdl : false;
595}
596
597bool getHasMatrixScale(unsigned Opc) {
598 const WMMAInstInfo *Info = getWMMAInstInfoHelper(Opc);
599 return Info && Info->HasMatrixScale;
600}
601
603 switch (EncodingVal) {
606 return 6;
608 return 4;
611 default:
612 return 8;
613 }
614
615 llvm_unreachable("covered switch over mfma scale formats");
616}
617
619 unsigned BLGP,
620 unsigned F8F8Opcode) {
621 uint8_t SrcANumRegs = mfmaScaleF8F6F4FormatToNumRegs(CBSZ);
622 uint8_t SrcBNumRegs = mfmaScaleF8F6F4FormatToNumRegs(BLGP);
623 return getMFMA_F8F6F4_InstWithNumRegs(SrcANumRegs, SrcBNumRegs, F8F8Opcode);
624}
625
627 switch (Fmt) {
630 return 16;
633 return 12;
635 return 8;
636 }
637
638 llvm_unreachable("covered switch over wmma scale formats");
639}
640
642 unsigned FmtB,
643 unsigned F8F8Opcode) {
644 uint8_t SrcANumRegs = wmmaScaleF8F6F4FormatToNumRegs(FmtA);
645 uint8_t SrcBNumRegs = wmmaScaleF8F6F4FormatToNumRegs(FmtB);
646 return getMFMA_F8F6F4_InstWithNumRegs(SrcANumRegs, SrcBNumRegs, F8F8Opcode);
647}
648
649bool isValidWMMAScaleFmtCombination(unsigned AFmt, unsigned AScale,
650 unsigned BFmt, unsigned BScale) {
651 auto isValid = [](unsigned Fmt, unsigned Scale) -> bool {
652 switch (Fmt) {
657 if (Scale != WMMA::MATRIX_SCALE_FMT_E8)
658 return false;
659 break;
661 if (Scale != WMMA::MATRIX_SCALE_FMT_E8 &&
664 return false;
665 break;
666 }
667 return true;
668 };
669
670 if (!isValid(AFmt, AScale) || !isValid(BFmt, BScale))
671 return false;
672
673 if (AFmt == WMMA::MATRIX_FMT_FP4 && BFmt == WMMA::MATRIX_FMT_FP4 &&
674 AScale != BScale)
675 return false;
676
677 return true;
678}
679
681 if (ST.hasFeature(AMDGPU::FeatureGFX13Insts))
683 if (ST.hasFeature(AMDGPU::FeatureGFX1250Insts))
685 if (ST.hasFeature(AMDGPU::FeatureGFX12Insts))
687 if (ST.hasFeature(AMDGPU::FeatureGFX11_7Insts))
689 if (ST.hasFeature(AMDGPU::FeatureGFX11Insts))
691 llvm_unreachable("Subtarget generation does not support VOPD!");
692}
693
694CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3) {
695 bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(Opc) : 0;
696 Opc = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : Opc;
697 // Normalize through VOPDComponentTable so that e32 and e64 variants
698 // of the same logical opcode all share a single entry.
699 const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc);
700 if (!Info)
701 return {false, false};
702 unsigned Key =
703 (Info->VOPDOp << 5) | (EncodingFamily << 1) | (VOPD3 ? 1u : 0u);
704 const VOPDXYInfo *XYInfo = getVOPDXYInfo(Key);
705 if (!XYInfo)
706 return {false, false};
707 return {XYInfo->IsX, XYInfo->IsY};
708}
709
710unsigned getVOPDOpcode(unsigned Opc, bool VOPD3) {
711 bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(Opc) : 0;
712 Opc = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : Opc;
713 const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc);
714 return Info ? Info->VOPDOp : ~0u;
715}
716
717bool isVOPD(unsigned Opc) {
718 return AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0X);
719}
720
721bool isMAC(unsigned Opc) {
722 return Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
723 Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
724 Opc == AMDGPU::V_MAC_F32_e64_vi ||
725 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
726 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
727 Opc == AMDGPU::V_MAC_F16_e64_vi ||
728 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
729 Opc == AMDGPU::V_FMAC_F64_e64_gfx12 ||
730 Opc == AMDGPU::V_FMAC_F64_e64_gfx13 ||
731 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
732 Opc == AMDGPU::V_FMAC_F32_e64_gfx11 ||
733 Opc == AMDGPU::V_FMAC_F32_e64_gfx12 ||
734 Opc == AMDGPU::V_FMAC_F32_e64_gfx13 ||
735 Opc == AMDGPU::V_FMAC_F32_e64_vi ||
736 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
737 Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
738 Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||
739 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11 ||
740 Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx11 ||
741 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx12 ||
742 Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx12 ||
743 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx13 ||
744 Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx13 ||
745 Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi ||
746 Opc == AMDGPU::V_DOT2C_F32_BF16_e64_vi ||
747 Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi ||
748 Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi ||
749 Opc == AMDGPU::V_DOT8C_I32_I4_e64_vi;
750}
751
752bool isPermlane16(unsigned Opc) {
753 return Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
754 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10 ||
755 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx11 ||
756 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx11 ||
757 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx12 ||
758 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx13 ||
759 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx12 ||
760 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx13 ||
761 Opc == AMDGPU::V_PERMLANE16_VAR_B32_e64_gfx12 ||
762 Opc == AMDGPU::V_PERMLANE16_VAR_B32_e64_gfx13 ||
763 Opc == AMDGPU::V_PERMLANEX16_VAR_B32_e64_gfx12 ||
764 Opc == AMDGPU::V_PERMLANEX16_VAR_B32_e64_gfx13;
765}
766
768 return Opc == AMDGPU::V_CVT_F32_BF8_e64_gfx12 ||
769 Opc == AMDGPU::V_CVT_F32_FP8_e64_gfx12 ||
770 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp_gfx12 ||
771 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp_gfx12 ||
772 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp8_gfx12 ||
773 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp8_gfx12 ||
774 Opc == AMDGPU::V_CVT_PK_F32_BF8_fake16_e64_gfx12 ||
775 Opc == AMDGPU::V_CVT_PK_F32_FP8_fake16_e64_gfx12 ||
776 Opc == AMDGPU::V_CVT_PK_F32_BF8_t16_e64_gfx12 ||
777 Opc == AMDGPU::V_CVT_PK_F32_FP8_t16_e64_gfx12;
778}
779
780bool isGenericAtomic(unsigned Opc) {
781 return Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP ||
782 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD ||
783 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB ||
784 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN ||
785 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN ||
786 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX ||
787 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX ||
788 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND ||
789 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR ||
790 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR ||
791 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC ||
792 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC ||
793 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD ||
794 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN ||
795 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX ||
796 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP ||
797 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB_CLAMP_U32 ||
798 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_COND_SUB_U32 ||
799 Opc == AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG;
800}
801
802bool isAsyncStore(unsigned Opc) {
803 return Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B8_gfx1250 ||
804 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B32_gfx1250 ||
805 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B64_gfx1250 ||
806 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B128_gfx1250 ||
807 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B8_SADDR_gfx1250 ||
808 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B32_SADDR_gfx1250 ||
809 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B64_SADDR_gfx1250 ||
810 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B128_SADDR_gfx1250;
811}
812
813bool isTensorStore(unsigned Opc) {
814 return Opc == TENSOR_STORE_FROM_LDS_d2_gfx1250 ||
815 Opc == TENSOR_STORE_FROM_LDS_d4_gfx1250;
816}
817
818unsigned getTemporalHintType(const MCInstrDesc TID) {
821 unsigned Opc = TID.getOpcode();
822 // Async and Tensor store should have the temporal hint type of TH_TYPE_STORE
823 if (TID.mayStore() &&
824 (isAsyncStore(Opc) || isTensorStore(Opc) || !TID.mayLoad()))
825 return CPol::TH_TYPE_STORE;
826
827 // This will default to returning TH_TYPE_LOAD when neither MayStore nor
828 // MayLoad flag is present which is the case with instructions like
829 // image_get_resinfo.
830 return CPol::TH_TYPE_LOAD;
831}
832
833bool isTrue16Inst(unsigned Opc) {
834 const VOPTrue16Info *Info = getTrue16OpcodeHelper(Opc);
835 return Info && Info->IsTrue16;
836}
837
839 const FP4FP8DstByteSelInfo *Info = getFP4FP8DstByteSelHelper(Opc);
840 if (!Info)
841 return FPType::None;
842 if (Info->HasFP8DstByteSel)
843 return FPType::FP8;
844 if (Info->HasFP4DstByteSel)
845 return FPType::FP4;
846
847 return FPType::None;
848}
849
850bool isDPMACCInstruction(unsigned Opc) {
851 const DPMACCInstructionInfo *Info = getDPMACCInstructionHelper(Opc);
852 return Info && Info->IsDPMACCInstruction;
853}
854
855unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) {
856 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom2AddrOpcode(Opc);
857 return Info ? Info->Opcode3Addr : ~0u;
858}
859
860unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc) {
861 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom3AddrOpcode(Opc);
862 return Info ? Info->Opcode2Addr : ~0u;
863}
864
865// Wrapper for Tablegen'd function. enum Subtarget is not defined in any
866// header files, so we need to wrap it in a function that takes unsigned
867// instead.
868int32_t getMCOpcode(uint32_t Opcode, unsigned Gen) {
869 return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
870}
871
872unsigned getBitOp2(unsigned Opc) {
873 switch (Opc) {
874 default:
875 return 0;
876 case AMDGPU::V_AND_B32_e32:
877 return 0x40;
878 case AMDGPU::V_OR_B32_e32:
879 return 0x54;
880 case AMDGPU::V_XOR_B32_e32:
881 return 0x14;
882 case AMDGPU::V_XNOR_B32_e32:
883 return 0x41;
884 }
885}
886
887int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily,
888 bool VOPD3) {
889 bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(OpY) : 0;
890 OpY = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : OpY;
891 const VOPDInfo *Info =
892 getVOPDInfoFromComponentOpcodes(OpX, OpY, EncodingFamily, VOPD3);
893 return Info ? Info->Opcode : -1;
894}
895
896std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode) {
897 const VOPDInfo *Info = getVOPDOpcodeHelper(VOPDOpcode);
898 assert(Info);
899 const auto *OpX = getVOPDBaseFromComponent(Info->OpX);
900 const auto *OpY = getVOPDBaseFromComponent(Info->OpY);
901 assert(OpX && OpY);
902 return {OpX->BaseVOP, OpY->BaseVOP};
903}
904
905namespace VOPD {
906
907ComponentProps::ComponentProps(const MCInstrDesc &OpDesc, bool VOP3Layout) {
909
912 auto TiedIdx = OpDesc.getOperandConstraint(Component::SRC2, MCOI::TIED_TO);
913 assert(TiedIdx == -1 || TiedIdx == Component::DST);
914 HasSrc2Acc = TiedIdx != -1;
915 Opcode = OpDesc.getOpcode();
916
917 IsVOP3 = VOP3Layout || (OpDesc.TSFlags & SIInstrFlags::VOP3);
918 SrcOperandsNum = AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src2) ? 3
919 : AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm) ? 3
920 : AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src1) ? 2
921 : 1;
922 assert(SrcOperandsNum <= Component::MAX_SRC_NUM);
923
924 if (Opcode == AMDGPU::V_CNDMASK_B32_e32 ||
925 Opcode == AMDGPU::V_CNDMASK_B32_e64) {
926 // CNDMASK is an awkward exception, it has FP modifiers, but not FP
927 // operands.
928 NumVOPD3Mods = 2;
929 if (IsVOP3)
930 SrcOperandsNum = 3;
931 } else if (isSISrcFPOperand(OpDesc,
932 getNamedOperandIdx(Opcode, OpName::src0))) {
933 // All FP VOPD instructions have Neg modifiers for all operands except
934 // for tied src2.
935 NumVOPD3Mods = SrcOperandsNum;
936 if (HasSrc2Acc)
937 --NumVOPD3Mods;
938 }
939
940 if (OpDesc.TSFlags & SIInstrFlags::VOP3)
941 return;
942
943 auto OperandsNum = OpDesc.getNumOperands();
944 unsigned CompOprIdx;
945 for (CompOprIdx = Component::SRC1; CompOprIdx < OperandsNum; ++CompOprIdx) {
946 if (OpDesc.operands()[CompOprIdx].OperandType == AMDGPU::OPERAND_KIMM32) {
947 MandatoryLiteralIdx = CompOprIdx;
948 break;
949 }
950 }
951}
952
954 return getNamedOperandIdx(Opcode, OpName::bitop3);
955}
956
957unsigned ComponentInfo::getIndexInParsedOperands(unsigned CompOprIdx) const {
958 assert(CompOprIdx < Component::MAX_OPR_NUM);
959
960 if (CompOprIdx == Component::DST)
962
963 auto CompSrcIdx = CompOprIdx - Component::DST_NUM;
964 if (CompSrcIdx < getCompParsedSrcOperandsNum())
965 return getIndexOfSrcInParsedOperands(CompSrcIdx);
966
967 // The specified operand does not exist.
968 return 0;
969}
970
972 std::function<MCRegister(unsigned, unsigned)> GetRegIdx,
973 const MCRegisterInfo &MRI, bool SkipSrc, bool AllowSameVGPR,
974 bool VOPD3) const {
975
976 auto OpXRegs = getRegIndices(ComponentIndex::X, GetRegIdx,
977 CompInfo[ComponentIndex::X].isVOP3());
978 auto OpYRegs = getRegIndices(ComponentIndex::Y, GetRegIdx,
979 CompInfo[ComponentIndex::Y].isVOP3());
980
981 const auto banksOverlap = [&MRI](MCRegister X, MCRegister Y,
982 unsigned BanksMask) -> bool {
983 MCRegister BaseX = MRI.getSubReg(X, AMDGPU::sub0);
984 MCRegister BaseY = MRI.getSubReg(Y, AMDGPU::sub0);
985 if (!BaseX)
986 BaseX = X;
987 if (!BaseY)
988 BaseY = Y;
989 if ((BaseX.id() & BanksMask) == (BaseY.id() & BanksMask))
990 return true;
991 if (BaseX != X /* This is 64-bit register */ &&
992 ((BaseX.id() + 1) & BanksMask) == (BaseY.id() & BanksMask))
993 return true;
994 if (BaseY != Y &&
995 (BaseX.id() & BanksMask) == ((BaseY.id() + 1) & BanksMask))
996 return true;
997
998 // If both are 64-bit bank conflict will be detected yet while checking
999 // the first subreg.
1000 return false;
1001 };
1002
1003 unsigned CompOprIdx;
1004 for (CompOprIdx = 0; CompOprIdx < Component::MAX_OPR_NUM; ++CompOprIdx) {
1005 unsigned BanksMasks = VOPD3 ? VOPD3_VGPR_BANK_MASKS[CompOprIdx]
1006 : VOPD_VGPR_BANK_MASKS[CompOprIdx];
1007 if (!OpXRegs[CompOprIdx] || !OpYRegs[CompOprIdx])
1008 continue;
1009
1010 if (getVGPREncodingMSBs(OpXRegs[CompOprIdx], MRI) !=
1011 getVGPREncodingMSBs(OpYRegs[CompOprIdx], MRI))
1012 return CompOprIdx;
1013
1014 if (SkipSrc && CompOprIdx >= Component::DST_NUM)
1015 continue;
1016
1017 if (CompOprIdx < Component::DST_NUM) {
1018 // Even if we do not check vdst parity, vdst operands still shall not
1019 // overlap.
1020 if (MRI.regsOverlap(OpXRegs[CompOprIdx], OpYRegs[CompOprIdx]))
1021 return CompOprIdx;
1022 if (VOPD3) // No need to check dst parity.
1023 continue;
1024 }
1025
1026 if (banksOverlap(OpXRegs[CompOprIdx], OpYRegs[CompOprIdx], BanksMasks) &&
1027 (!AllowSameVGPR || CompOprIdx < Component::DST_NUM ||
1028 OpXRegs[CompOprIdx] != OpYRegs[CompOprIdx]))
1029 return CompOprIdx;
1030 }
1031
1032 return {};
1033}
1034
1035// Return an array of VGPR registers [DST,SRC0,SRC1,SRC2] used
1036// by the specified component. If an operand is unused
1037// or is not a VGPR, the corresponding value is 0.
1038//
1039// GetRegIdx(Component, MCOperandIdx) must return a VGPR register index
1040// for the specified component and MC operand. The callback must return 0
1041// if the operand is not a register or not a VGPR.
1043InstInfo::getRegIndices(unsigned CompIdx,
1044 std::function<MCRegister(unsigned, unsigned)> GetRegIdx,
1045 bool VOPD3) const {
1046 assert(CompIdx < COMPONENTS_NUM);
1047
1048 const auto &Comp = CompInfo[CompIdx];
1050
1051 RegIndices[DST] = GetRegIdx(CompIdx, Comp.getIndexOfDstInMCOperands());
1052
1053 for (unsigned CompOprIdx : {SRC0, SRC1, SRC2}) {
1054 unsigned CompSrcIdx = CompOprIdx - DST_NUM;
1055 RegIndices[CompOprIdx] =
1056 Comp.hasRegSrcOperand(CompSrcIdx)
1057 ? GetRegIdx(CompIdx,
1058 Comp.getIndexOfSrcInMCOperands(CompSrcIdx, VOPD3))
1059 : MCRegister();
1060 }
1061 return RegIndices;
1062}
1063
1064} // namespace VOPD
1065
1067 return VOPD::InstInfo(OpX, OpY);
1068}
1069
1071 const MCInstrInfo *InstrInfo) {
1072 auto [OpX, OpY] = getVOPDComponents(VOPDOpcode);
1073 const auto &OpXDesc = InstrInfo->get(OpX);
1074 const auto &OpYDesc = InstrInfo->get(OpY);
1075 bool VOPD3 = InstrInfo->get(VOPDOpcode).TSFlags & SIInstrFlags::VOPD3;
1077 VOPD::ComponentInfo OpYInfo(OpYDesc, OpXInfo, VOPD3);
1078 return VOPD::InstInfo(OpXInfo, OpYInfo);
1079}
1080
1082 StringRef FeatureString) {
1084 STI.getFeatureBits().test(FeatureSupportsXNACK)
1085 ? TargetIDSetting::Any
1086 : TargetIDSetting::Unsupported,
1087 STI.getFeatureBits().test(FeatureSupportsSRAMECC)
1088 ? TargetIDSetting::Any
1089 : TargetIDSetting::Unsupported);
1090
1091 // Check if xnack or sramecc is explicitly enabled or disabled. In the
1092 // absence of the target features we assume we must generate code that can run
1093 // in any environment.
1094 SubtargetFeatures Features(FeatureString);
1095 std::optional<bool> XnackRequested;
1096 std::optional<bool> SramEccRequested;
1097
1098 for (const std::string &Feature : Features.getFeatures()) {
1099 if (Feature == "+xnack")
1100 XnackRequested = true;
1101 else if (Feature == "-xnack")
1102 XnackRequested = false;
1103 else if (Feature == "+sramecc")
1104 SramEccRequested = true;
1105 else if (Feature == "-sramecc")
1106 SramEccRequested = false;
1107 }
1108
1109 // Only allow changing xnack setting if the target supports on/off modes.
1110 // Targets without on/off mode support keep their initial setting (Any).
1111
1112 bool XnackSupported = STI.getFeatureBits().test(FeatureXNACKOnOffModes);
1113 bool SramEccSupported = TargetID.isSramEccSupported();
1114
1115 if (XnackRequested) {
1116 if (XnackSupported) {
1117 TargetID.setXnackSetting(*XnackRequested ? TargetIDSetting::On
1118 : TargetIDSetting::Off);
1119 } else {
1120 // If a specific xnack setting was requested and this GPU does not support
1121 // xnack emit a warning. Setting will remain set to "Unsupported".
1122 if (*XnackRequested) {
1123 errs() << "warning: xnack 'On' was requested for a processor that does "
1124 "not support it!\n";
1125 } else {
1126 errs() << "warning: xnack 'Off' was requested for a processor that "
1127 "does not support it!\n";
1128 }
1129 }
1130 }
1131
1132 if (SramEccRequested) {
1133 if (SramEccSupported) {
1134 TargetID.setSramEccSetting(*SramEccRequested ? TargetIDSetting::On
1135 : TargetIDSetting::Off);
1136 } else {
1137 // If a specific sramecc setting was requested and this GPU does not
1138 // support sramecc emit a warning. Setting will remain set to
1139 // "Unsupported".
1140 if (*SramEccRequested) {
1141 errs() << "warning: sramecc 'On' was requested for a processor that "
1142 "does not support it!\n";
1143 } else {
1144 errs() << "warning: sramecc 'Off' was requested for a processor that "
1145 "does not support it!\n";
1146 }
1147 }
1148 }
1149
1150 return TargetID;
1151}
1152
1153namespace IsaInfo {
1154
1156 if (STI.getFeatureBits().test(FeatureInstCacheLineSize128))
1157 return 128;
1158 if (STI.getFeatureBits().test(FeatureInstCacheLineSize64))
1159 return 64;
1160 return 64;
1161}
1162
1163unsigned getWavefrontSize(const MCSubtargetInfo &STI) {
1164 if (STI.getFeatureBits().test(FeatureWavefrontSize16))
1165 return 16;
1166 if (STI.getFeatureBits().test(FeatureWavefrontSize32))
1167 return 32;
1168
1169 return 64;
1170}
1171
1173 unsigned BytesPerCU = getAddressableLocalMemorySize(STI);
1174
1175 // "Per CU" really means "per whatever functional block the waves of a
1176 // workgroup must share". So the effective local memory size is doubled in
1177 // WGP mode on gfx10.
1178 if (isGFX10Plus(STI) && !STI.getFeatureBits().test(FeatureCuMode))
1179 BytesPerCU *= 2;
1180
1181 return BytesPerCU;
1182}
1183
1185 if (STI.getFeatureBits().test(FeatureAddressableLocalMemorySize32768))
1186 return 32768;
1187 if (STI.getFeatureBits().test(FeatureAddressableLocalMemorySize65536))
1188 return 65536;
1189 if (STI.getFeatureBits().test(FeatureAddressableLocalMemorySize163840))
1190 return 163840;
1191 if (STI.getFeatureBits().test(FeatureAddressableLocalMemorySize327680))
1192 return 327680;
1193 return 32768;
1194}
1195
1196unsigned getEUsPerCU(const MCSubtargetInfo &STI) {
1197 // "Per CU" really means "per whatever functional block the waves of a
1198 // workgroup must share".
1199
1200 // GFX12.5 only supports CU mode, which contains four SIMDs.
1201 if (isGFX1250(STI)) {
1202 assert(STI.getFeatureBits().test(FeatureCuMode));
1203 return 4;
1204 }
1205
1206 // For gfx10 in CU mode the functional block is the CU, which contains
1207 // two SIMDs.
1208 if (isGFX10Plus(STI) && STI.getFeatureBits().test(FeatureCuMode))
1209 return 2;
1210
1211 // Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP
1212 // contains two CUs, so a total of four SIMDs.
1213 return 4;
1214}
1215
1217 unsigned FlatWorkGroupSize) {
1218 assert(FlatWorkGroupSize != 0);
1219 if (!STI.getTargetTriple().isAMDGCN())
1220 return 8;
1221 unsigned MaxWaves = getMaxWavesPerEU(STI) * getEUsPerCU(STI);
1222 unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
1223 if (N == 1) {
1224 // Single-wave workgroups don't consume barrier resources.
1225 return MaxWaves;
1226 }
1227
1228 unsigned MaxBarriers = 16;
1229 if (isGFX10Plus(STI) && !STI.getFeatureBits().test(FeatureCuMode))
1230 MaxBarriers = 32;
1231
1232 return std::min(MaxWaves / N, MaxBarriers);
1233}
1234
1235unsigned getMinWavesPerEU(const MCSubtargetInfo &STI) { return 1; }
1236
1237unsigned getMaxWavesPerEU(const MCSubtargetInfo &STI) {
1238 // FIXME: Need to take scratch memory into account.
1239 if (isGFX90A(STI))
1240 return 8;
1241 if (!isGFX10Plus(STI))
1242 return 10;
1243 return hasGFX10_3Insts(STI) ? 16 : 20;
1244}
1245
1247 unsigned FlatWorkGroupSize) {
1248 return divideCeil(getWavesPerWorkGroup(STI, FlatWorkGroupSize),
1249 getEUsPerCU(STI));
1250}
1251
1252unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo &STI) { return 1; }
1253
1255 unsigned FlatWorkGroupSize) {
1256 return divideCeil(FlatWorkGroupSize, getWavefrontSize(STI));
1257}
1258
1261 if (Version.Major >= 10)
1262 return getAddressableNumSGPRs(STI);
1263 if (Version.Major >= 8)
1264 return 16;
1265 return 8;
1266}
1267
1268unsigned getSGPREncodingGranule(const MCSubtargetInfo &STI) { return 8; }
1269
1270unsigned getTotalNumSGPRs(const MCSubtargetInfo &STI) {
1272 if (Version.Major >= 8)
1273 return 800;
1274 return 512;
1275}
1276
1278 if (STI.getFeatureBits().test(FeatureSGPRInitBug))
1280
1282 if (Version.Major >= 10)
1283 return 106;
1284 if (Version.Major >= 8)
1285 return 102;
1286 return 104;
1287}
1288
1289// Per-wave SGPRs reserved for the trap handler when enabled.
1290static unsigned getSGPRTrapHandlerReserve(const MCSubtargetInfo &STI) {
1291 return STI.getFeatureBits().test(FeatureTrapHandler) ? TRAP_NUM_SGPRS : 0;
1292}
1293
1294// Per-wave SGPR budget (before the addressable clamp): take off the trap
1295// reserve, round down to \p Granule. Shared by getMinNumSGPRs() and
1296// getMaxNumSGPRs(); getOccupancyWithNumSGPRs() is the closed-form algebraic
1297// inverse of this same budget (it does not call this helper), so the two encode
1298// one model.
1299static unsigned getSGPRBudgetPerWave(unsigned TotalNumSGPRs,
1300 unsigned WavesPerEU, unsigned TrapReserve,
1301 unsigned Granule) {
1302 assert(WavesPerEU != 0 && Granule != 0);
1303 unsigned Budget = TotalNumSGPRs / WavesPerEU;
1304 Budget -= std::min(Budget, TrapReserve);
1305 return alignDown(Budget, Granule);
1306}
1307
1308unsigned getMinNumSGPRs(const MCSubtargetInfo &STI, unsigned WavesPerEU) {
1309 assert(WavesPerEU != 0);
1310
1312 if (Version.Major >= 10)
1313 return 0;
1314
1315 if (WavesPerEU >= getMaxWavesPerEU(STI))
1316 return 0;
1317
1318 unsigned MinNumSGPRs =
1319 getSGPRBudgetPerWave(getTotalNumSGPRs(STI), WavesPerEU + 1,
1321 getSGPRAllocGranule(STI)) +
1322 1;
1323 return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI));
1324}
1325
1326unsigned getMaxNumSGPRs(const MCSubtargetInfo &STI, unsigned WavesPerEU,
1327 bool Addressable) {
1328 assert(WavesPerEU != 0);
1329
1330 unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
1332 if (Version.Major >= 10)
1333 return Addressable ? AddressableNumSGPRs : 108;
1334 if (Version.Major >= 8 && !Addressable)
1335 AddressableNumSGPRs = 112;
1336 unsigned MaxNumSGPRs = getSGPRBudgetPerWave(getTotalNumSGPRs(STI), WavesPerEU,
1338 getSGPRAllocGranule(STI));
1339 return std::min(MaxNumSGPRs, AddressableNumSGPRs);
1340}
1341
1343 // From GFX10 on the SGPR file is large enough that SGPRs never limit
1344 // occupancy. Kept as one capability so callers don't each test the version.
1345 return getIsaVersion(STI.getCPU()).Major < 10;
1346}
1347
1348unsigned getNumExtraSGPRs(const MCSubtargetInfo &STI, bool VCCUsed,
1349 bool FlatScrUsed, bool XNACKUsed) {
1350 unsigned ExtraSGPRs = 0;
1351 if (VCCUsed)
1352 ExtraSGPRs = 2;
1353
1355 if (Version.Major >= 10)
1356 return ExtraSGPRs;
1357
1358 if (Version.Major < 8) {
1359 if (FlatScrUsed)
1360 ExtraSGPRs = 4;
1361 } else {
1362 if (XNACKUsed)
1363 ExtraSGPRs = 4;
1364
1365 if (FlatScrUsed ||
1366 STI.getFeatureBits().test(AMDGPU::FeatureArchitectedFlatScratch))
1367 ExtraSGPRs = 6;
1368 }
1369
1370 return ExtraSGPRs;
1371}
1372
1373unsigned getNumExtraSGPRs(const MCSubtargetInfo &STI, bool VCCUsed,
1374 bool FlatScrUsed) {
1375 return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
1376 STI.getFeatureBits().test(AMDGPU::FeatureXNACK));
1377}
1378
1379static unsigned getGranulatedNumRegisterBlocks(unsigned NumRegs,
1380 unsigned Granule) {
1381 return divideCeil(std::max(1u, NumRegs), Granule);
1382}
1383
1384unsigned getNumSGPRBlocks(const MCSubtargetInfo &STI, unsigned NumSGPRs) {
1385 // SGPRBlocks is actual number of SGPR blocks minus 1.
1387 1;
1388}
1389
1391 unsigned DynamicVGPRBlockSize,
1392 std::optional<bool> EnableWavefrontSize32) {
1393 if (STI.getFeatureBits().test(FeatureGFX90AInsts))
1394 return 8;
1395
1396 if (DynamicVGPRBlockSize != 0)
1397 return DynamicVGPRBlockSize;
1398
1399 bool IsWave32 = EnableWavefrontSize32
1400 ? *EnableWavefrontSize32
1401 : STI.getFeatureBits().test(FeatureWavefrontSize32);
1402
1403 if (STI.getFeatureBits().test(Feature1536VGPRs))
1404 return IsWave32 ? 24 : 12;
1405
1406 if (hasGFX10_3Insts(STI))
1407 return IsWave32 ? 16 : 8;
1408
1409 return IsWave32 ? 8 : 4;
1410}
1411
1413 std::optional<bool> EnableWavefrontSize32) {
1414 if (STI.getFeatureBits().test(FeatureGFX90AInsts))
1415 return 8;
1416
1417 bool IsWave32 = EnableWavefrontSize32
1418 ? *EnableWavefrontSize32
1419 : STI.getFeatureBits().test(FeatureWavefrontSize32);
1420
1421 if (STI.getFeatureBits().test(Feature1024AddressableVGPRs))
1422 return IsWave32 ? 16 : 8;
1423
1424 return IsWave32 ? 8 : 4;
1425}
1426
1427unsigned getArchVGPRAllocGranule() { return 4; }
1428
1429unsigned getTotalNumVGPRs(const MCSubtargetInfo &STI) {
1430 if (STI.getFeatureBits().test(FeatureGFX90AInsts))
1431 return 512;
1432 if (!isGFX10Plus(STI))
1433 return 256;
1434 bool IsWave32 = STI.getFeatureBits().test(FeatureWavefrontSize32);
1435 if (STI.getFeatureBits().test(Feature1536VGPRs))
1436 return IsWave32 ? 1536 : 768;
1437 return IsWave32 ? 1024 : 512;
1438}
1439
1441 const auto &Features = STI.getFeatureBits();
1442 if (Features.test(Feature1024AddressableVGPRs))
1443 return Features.test(FeatureWavefrontSize32) ? 1024 : 512;
1444 return 256;
1445}
1446
1448 unsigned DynamicVGPRBlockSize) {
1449 const auto &Features = STI.getFeatureBits();
1450 if (Features.test(FeatureGFX90AInsts))
1451 return 512;
1452
1453 if (DynamicVGPRBlockSize != 0) {
1454 // On GFX12 we can allocate at most MaxDynamicVGPRBlocks blocks of VGPRs.
1455 return MaxDynamicVGPRBlocks *
1456 getVGPRAllocGranule(STI, DynamicVGPRBlockSize);
1457 }
1458 return getAddressableNumArchVGPRs(STI);
1459}
1460
1462 unsigned NumVGPRs,
1463 unsigned DynamicVGPRBlockSize) {
1465 NumVGPRs, getVGPRAllocGranule(STI, DynamicVGPRBlockSize),
1467}
1468
1469unsigned getNumWavesPerEUWithNumVGPRs(unsigned NumVGPRs, unsigned Granule,
1470 unsigned MaxWaves,
1471 unsigned TotalNumVGPRs) {
1472 if (NumVGPRs < Granule)
1473 return MaxWaves;
1474 unsigned RoundedRegs = alignTo(NumVGPRs, Granule);
1475 return std::min(std::max(TotalNumVGPRs / RoundedRegs, 1u), MaxWaves);
1476}
1477
1478unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves,
1479 unsigned TotalNumSGPRs, unsigned Granule,
1480 unsigned TrapReserve) {
1481 // Closed-form inverse of getMaxNumSGPRs(): the budget condition
1482 // SGPRs <= alignDown(TotalNumSGPRs / W - TrapReserve, Granule)
1483 // solves to W <= TotalNumSGPRs / (alignTo(SGPRs, Granule) + TrapReserve).
1484 unsigned PerWave = alignTo(SGPRs, Granule) + TrapReserve;
1485 return PerWave ? std::clamp(TotalNumSGPRs / PerWave, 1u, MaxWaves) : MaxWaves;
1486}
1487
1488unsigned getOccupancyWithNumSGPRs(const MCSubtargetInfo &STI, unsigned SGPRs) {
1489 unsigned MaxWaves = getMaxWavesPerEU(STI);
1490
1491 if (!isSGPROccupancyLimited(STI))
1492 return MaxWaves;
1493
1494 return getOccupancyWithNumSGPRs(SGPRs, MaxWaves, getTotalNumSGPRs(STI),
1497}
1498
1499unsigned getMinNumVGPRs(const MCSubtargetInfo &STI, unsigned WavesPerEU,
1500 unsigned DynamicVGPRBlockSize) {
1501 assert(WavesPerEU != 0);
1502
1503 // In dynamic VGPR mode, (static) occupancy does not depend on VGPR usage,
1504 // so getMaxNumVGPRs does not depend on WavesPerEU, and thus we need to return
1505 // zero because there is no nonzero VGPR usage N where going below N
1506 // achieves higher (static) occupancy.
1507 bool DynamicVGPREnabled = (DynamicVGPRBlockSize != 0);
1508 if (DynamicVGPREnabled)
1509 return 0;
1510
1511 unsigned MaxWavesPerEU = getMaxWavesPerEU(STI);
1512 if (WavesPerEU >= MaxWavesPerEU)
1513 return 0;
1514
1515 unsigned TotNumVGPRs = getTotalNumVGPRs(STI);
1516 unsigned AddrsableNumVGPRs =
1517 getAddressableNumVGPRs(STI, DynamicVGPRBlockSize);
1518 unsigned Granule = getVGPRAllocGranule(STI, DynamicVGPRBlockSize);
1519 unsigned MaxNumVGPRs = alignDown(TotNumVGPRs / WavesPerEU, Granule);
1520
1521 if (MaxNumVGPRs == alignDown(TotNumVGPRs / MaxWavesPerEU, Granule))
1522 return 0;
1523
1524 unsigned MinWavesPerEU = getNumWavesPerEUWithNumVGPRs(STI, AddrsableNumVGPRs,
1525 DynamicVGPRBlockSize);
1526 if (WavesPerEU < MinWavesPerEU)
1527 return getMinNumVGPRs(STI, MinWavesPerEU, DynamicVGPRBlockSize);
1528
1529 unsigned MaxNumVGPRsNext = alignDown(TotNumVGPRs / (WavesPerEU + 1), Granule);
1530 unsigned MinNumVGPRs = 1 + std::min(MaxNumVGPRs - Granule, MaxNumVGPRsNext);
1531 return std::min(MinNumVGPRs, AddrsableNumVGPRs);
1532}
1533
1534unsigned getMaxNumVGPRs(const MCSubtargetInfo &STI, unsigned WavesPerEU,
1535 unsigned DynamicVGPRBlockSize) {
1536 assert(WavesPerEU != 0);
1537
1538 // In dynamic VGPR mode, WavesPerEU does not imply a VGPR limit.
1539 bool DynamicVGPREnabled = (DynamicVGPRBlockSize != 0);
1540 unsigned MaxNumVGPRs =
1541 DynamicVGPREnabled
1542 ? getTotalNumVGPRs(STI)
1543 : alignDown(getTotalNumVGPRs(STI) / WavesPerEU,
1544 getVGPRAllocGranule(STI, DynamicVGPRBlockSize));
1545 unsigned AddressableNumVGPRs =
1546 getAddressableNumVGPRs(STI, DynamicVGPRBlockSize);
1547 return std::min(MaxNumVGPRs, AddressableNumVGPRs);
1548}
1549
1550unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo &STI, unsigned NumVGPRs,
1551 std::optional<bool> EnableWavefrontSize32) {
1553 NumVGPRs, getVGPREncodingGranule(STI, EnableWavefrontSize32)) -
1554 1;
1555}
1556
1558 unsigned NumVGPRs,
1559 unsigned DynamicVGPRBlockSize,
1560 std::optional<bool> EnableWavefrontSize32) {
1562 NumVGPRs,
1563 getVGPRAllocGranule(STI, DynamicVGPRBlockSize, EnableWavefrontSize32));
1564}
1565} // end namespace IsaInfo
1566
1568 const MCSubtargetInfo &STI) {
1570 KernelCode.amd_kernel_code_version_major = 1;
1571 KernelCode.amd_kernel_code_version_minor = 2;
1572 KernelCode.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
1573 KernelCode.amd_machine_version_major = Version.Major;
1574 KernelCode.amd_machine_version_minor = Version.Minor;
1575 KernelCode.amd_machine_version_stepping = Version.Stepping;
1577 if (STI.getFeatureBits().test(FeatureWavefrontSize32)) {
1578 KernelCode.wavefront_size = 5;
1580 } else {
1581 KernelCode.wavefront_size = 6;
1582 }
1583
1584 // If the code object does not support indirect functions, then the value must
1585 // be 0xffffffff.
1586 KernelCode.call_convention = -1;
1587
1588 // These alignment values are specified in powers of two, so alignment =
1589 // 2^n. The minimum alignment is 2^4 = 16.
1590 KernelCode.kernarg_segment_alignment = 4;
1591 KernelCode.group_segment_alignment = 4;
1592 KernelCode.private_segment_alignment = 4;
1593
1594 if (Version.Major >= 10) {
1595 KernelCode.compute_pgm_resource_registers |=
1596 S_00B848_WGP_MODE(STI.getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
1598 }
1599}
1600
1603}
1604
1607}
1608
1610 unsigned AS = GV->getAddressSpace();
1611 return AS == AMDGPUAS::CONSTANT_ADDRESS ||
1613}
1614
1616 return TT.getArch() == Triple::r600;
1617}
1618
1619static bool isValidRegPrefix(char C) {
1620 return C == 'v' || C == 's' || C == 'a';
1621}
1622
1623std::tuple<char, unsigned, unsigned> parseAsmPhysRegName(StringRef RegName) {
1624 char Kind = RegName.front();
1625 if (!isValidRegPrefix(Kind))
1626 return {};
1627
1628 RegName = RegName.drop_front();
1629 if (RegName.consume_front("[")) {
1630 unsigned Idx, End;
1631 bool Failed = RegName.consumeInteger(10, Idx);
1632 Failed |= !RegName.consume_front(":");
1633 Failed |= RegName.consumeInteger(10, End);
1634 Failed |= !RegName.consume_back("]");
1635 if (!Failed) {
1636 unsigned NumRegs = End - Idx + 1;
1637 if (NumRegs > 1)
1638 return {Kind, Idx, NumRegs};
1639 }
1640 } else {
1641 unsigned Idx;
1642 bool Failed = RegName.getAsInteger(10, Idx);
1643 if (!Failed)
1644 return {Kind, Idx, 1};
1645 }
1646
1647 return {};
1648}
1649
1650std::tuple<char, unsigned, unsigned>
1652 StringRef RegName = Constraint;
1653 if (!RegName.consume_front("{") || !RegName.consume_back("}"))
1654 return {};
1656}
1657
1658std::pair<unsigned, unsigned>
1660 std::pair<unsigned, unsigned> Default,
1661 bool OnlyFirstRequired) {
1662 if (auto Attr = getIntegerPairAttribute(F, Name, OnlyFirstRequired))
1663 return {Attr->first, Attr->second.value_or(Default.second)};
1664 return Default;
1665}
1666
1667std::optional<std::pair<unsigned, std::optional<unsigned>>>
1669 bool OnlyFirstRequired) {
1670 Attribute A = F.getFnAttribute(Name);
1671 if (!A.isStringAttribute())
1672 return std::nullopt;
1673
1674 LLVMContext &Ctx = F.getContext();
1675 std::pair<unsigned, std::optional<unsigned>> Ints;
1676 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
1677 if (Strs.first.trim().getAsInteger(0, Ints.first)) {
1678 Ctx.emitError("can't parse first integer attribute " + Name);
1679 return std::nullopt;
1680 }
1681 unsigned Second = 0;
1682 if (Strs.second.trim().getAsInteger(0, Second)) {
1683 if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
1684 Ctx.emitError("can't parse second integer attribute " + Name);
1685 return std::nullopt;
1686 }
1687 } else {
1688 Ints.second = Second;
1689 }
1690
1691 return Ints;
1692}
1693
1695 unsigned Size,
1696 unsigned DefaultVal) {
1697 std::optional<SmallVector<unsigned>> R =
1699 return R.has_value() ? *R : SmallVector<unsigned>(Size, DefaultVal);
1700}
1701
1702std::optional<SmallVector<unsigned>>
1704 assert(Size > 2);
1705 LLVMContext &Ctx = F.getContext();
1706
1707 Attribute A = F.getFnAttribute(Name);
1708 if (!A.isValid())
1709 return std::nullopt;
1710 if (!A.isStringAttribute()) {
1711 Ctx.emitError(Name + " is not a string attribute");
1712 return std::nullopt;
1713 }
1714
1716
1717 StringRef S = A.getValueAsString();
1718 unsigned i = 0;
1719 for (; !S.empty() && i < Size; i++) {
1720 std::pair<StringRef, StringRef> Strs = S.split(',');
1721 unsigned IntVal;
1722 if (Strs.first.trim().getAsInteger(0, IntVal)) {
1723 Ctx.emitError("can't parse integer attribute " + Strs.first + " in " +
1724 Name);
1725 return std::nullopt;
1726 }
1727 Vals[i] = IntVal;
1728 S = Strs.second;
1729 }
1730
1731 if (!S.empty() || i < Size) {
1732 Ctx.emitError("attribute " + Name +
1733 " has incorrect number of integers; expected " +
1735 return std::nullopt;
1736 }
1737 return Vals;
1738}
1739
1741 return getIntegerVecAttribute(F, "amdgpu-max-num-workgroups", 3,
1742 std::numeric_limits<uint32_t>::max());
1743}
1744
1745bool hasValueInRangeLikeMetadata(const MDNode &MD, int64_t Val) {
1746 assert((MD.getNumOperands() % 2 == 0) && "invalid number of operands!");
1747 for (unsigned I = 0, E = MD.getNumOperands() / 2; I != E; ++I) {
1748 auto Low =
1749 mdconst::extract<ConstantInt>(MD.getOperand(2 * I + 0))->getValue();
1750 auto High =
1751 mdconst::extract<ConstantInt>(MD.getOperand(2 * I + 1))->getValue();
1752 // There are two types of [A; B) ranges:
1753 // A < B, e.g. [4; 5) which is a range that only includes 4.
1754 // A > B, e.g. [5; 4) which is a range that wraps around and includes
1755 // everything except 4.
1756 if (Low.ult(High)) {
1757 if (Low.ule(Val) && High.ugt(Val))
1758 return true;
1759 } else {
1760 if (Low.uge(Val) && High.ult(Val))
1761 return true;
1762 }
1763 }
1764
1765 return false;
1766}
1767
1769 return (1 << (getVmcntBitWidthLo(Version.Major) +
1770 getVmcntBitWidthHi(Version.Major))) -
1771 1;
1772}
1773
1775 return (1 << getLoadcntBitWidth(Version.Major)) - 1;
1776}
1777
1779 return (1 << getSamplecntBitWidth(Version.Major)) - 1;
1780}
1781
1783 return (1 << getBvhcntBitWidth(Version.Major)) - 1;
1784}
1785
1787 return (1 << getExpcntBitWidth(Version.Major)) - 1;
1788}
1789
1791 return (1 << getLgkmcntBitWidth(Version.Major)) - 1;
1792}
1793
1795 return (1 << getDscntBitWidth(Version.Major)) - 1;
1796}
1797
1799 return (1 << getKmcntBitWidth(Version.Major)) - 1;
1800}
1801
1803 return (1 << getXcntBitWidth(Version.Major, Version.Minor)) - 1;
1804}
1805
1807 return (1 << getAsynccntBitWidth(Version.Major, Version.Minor)) - 1;
1808}
1809
1811 return (1 << getStorecntBitWidth(Version.Major)) - 1;
1812}
1813
1815 unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(Version.Major),
1816 getVmcntBitWidthLo(Version.Major));
1817 unsigned Expcnt = getBitMask(getExpcntBitShift(Version.Major),
1818 getExpcntBitWidth(Version.Major));
1819 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(Version.Major),
1820 getLgkmcntBitWidth(Version.Major));
1821 unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(Version.Major),
1822 getVmcntBitWidthHi(Version.Major));
1823 return VmcntLo | Expcnt | Lgkmcnt | VmcntHi;
1824}
1825
1826unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1827 unsigned VmcntLo = unpackBits(Waitcnt, getVmcntBitShiftLo(Version.Major),
1828 getVmcntBitWidthLo(Version.Major));
1829 unsigned VmcntHi = unpackBits(Waitcnt, getVmcntBitShiftHi(Version.Major),
1830 getVmcntBitWidthHi(Version.Major));
1831 return VmcntLo | VmcntHi << getVmcntBitWidthLo(Version.Major);
1832}
1833
1834unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
1835 return unpackBits(Waitcnt, getExpcntBitShift(Version.Major),
1836 getExpcntBitWidth(Version.Major));
1837}
1838
1839unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1840 return unpackBits(Waitcnt, getLgkmcntBitShift(Version.Major),
1841 getLgkmcntBitWidth(Version.Major));
1842}
1843
1844unsigned decodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt) {
1845 return unpackBits(Waitcnt, getLoadcntStorecntBitShift(Version.Major),
1846 getLoadcntBitWidth(Version.Major));
1847}
1848
1849unsigned decodeStorecnt(const IsaVersion &Version, unsigned Waitcnt) {
1850 return unpackBits(Waitcnt, getLoadcntStorecntBitShift(Version.Major),
1851 getStorecntBitWidth(Version.Major));
1852}
1853
1854unsigned decodeDscnt(const IsaVersion &Version, unsigned Waitcnt) {
1855 return unpackBits(Waitcnt, getDscntBitShift(Version.Major),
1856 getDscntBitWidth(Version.Major));
1857}
1858
1859void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt,
1860 unsigned &Expcnt, unsigned &Lgkmcnt) {
1861 Vmcnt = decodeVmcnt(Version, Waitcnt);
1862 Expcnt = decodeExpcnt(Version, Waitcnt);
1863 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
1864}
1865
1866unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
1867 unsigned Vmcnt) {
1868 Waitcnt = packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(Version.Major),
1869 getVmcntBitWidthLo(Version.Major));
1870 return packBits(Vmcnt >> getVmcntBitWidthLo(Version.Major), Waitcnt,
1871 getVmcntBitShiftHi(Version.Major),
1872 getVmcntBitWidthHi(Version.Major));
1873}
1874
1875unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
1876 unsigned Expcnt) {
1877 return packBits(Expcnt, Waitcnt, getExpcntBitShift(Version.Major),
1878 getExpcntBitWidth(Version.Major));
1879}
1880
1881unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
1882 unsigned Lgkmcnt) {
1883 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(Version.Major),
1884 getLgkmcntBitWidth(Version.Major));
1885}
1886
1887unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt,
1888 unsigned Expcnt, unsigned Lgkmcnt) {
1889 unsigned Waitcnt = getWaitcntBitMask(Version);
1891 Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
1892 Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
1893 return Waitcnt;
1894}
1895
1897 bool IsStore) {
1898 unsigned Dscnt = getBitMask(getDscntBitShift(Version.Major),
1899 getDscntBitWidth(Version.Major));
1900 if (IsStore) {
1901 unsigned Storecnt = getBitMask(getLoadcntStorecntBitShift(Version.Major),
1902 getStorecntBitWidth(Version.Major));
1903 return Dscnt | Storecnt;
1904 }
1905 unsigned Loadcnt = getBitMask(getLoadcntStorecntBitShift(Version.Major),
1906 getLoadcntBitWidth(Version.Major));
1907 return Dscnt | Loadcnt;
1908}
1909
1910static unsigned encodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt,
1911 unsigned Loadcnt) {
1912 return packBits(Loadcnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major),
1913 getLoadcntBitWidth(Version.Major));
1914}
1915
1916static unsigned encodeStorecnt(const IsaVersion &Version, unsigned Waitcnt,
1917 unsigned Storecnt) {
1918 return packBits(Storecnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major),
1919 getStorecntBitWidth(Version.Major));
1920}
1921
1922static unsigned encodeDscnt(const IsaVersion &Version, unsigned Waitcnt,
1923 unsigned Dscnt) {
1924 return packBits(Dscnt, Waitcnt, getDscntBitShift(Version.Major),
1925 getDscntBitWidth(Version.Major));
1926}
1927
1928unsigned encodeLoadcntDscnt(const IsaVersion &Version, unsigned Loadcnt,
1929 unsigned Dscnt) {
1930 unsigned Waitcnt = getCombinedCountBitMask(Version, false);
1931 Waitcnt = encodeLoadcnt(Version, Waitcnt, Loadcnt);
1933 return Waitcnt;
1934}
1935
1936unsigned encodeStorecntDscnt(const IsaVersion &Version, unsigned Storecnt,
1937 unsigned Dscnt) {
1938 unsigned Waitcnt = getCombinedCountBitMask(Version, true);
1939 Waitcnt = encodeStorecnt(Version, Waitcnt, Storecnt);
1941 return Waitcnt;
1942}
1943
1944//===----------------------------------------------------------------------===//
1945// Custom Operand Values
1946//===----------------------------------------------------------------------===//
1947
1949 int Size,
1950 const MCSubtargetInfo &STI) {
1951 unsigned Enc = 0;
1952 for (int Idx = 0; Idx < Size; ++Idx) {
1953 const auto &Op = Opr[Idx];
1954 if (Op.isSupported(STI))
1955 Enc |= Op.encode(Op.Default);
1956 }
1957 return Enc;
1958}
1959
1961 int Size, unsigned Code,
1962 bool &HasNonDefaultVal,
1963 const MCSubtargetInfo &STI) {
1964 unsigned UsedOprMask = 0;
1965 HasNonDefaultVal = false;
1966 for (int Idx = 0; Idx < Size; ++Idx) {
1967 const auto &Op = Opr[Idx];
1968 if (!Op.isSupported(STI))
1969 continue;
1970 UsedOprMask |= Op.getMask();
1971 unsigned Val = Op.decode(Code);
1972 if (!Op.isValid(Val))
1973 return false;
1974 HasNonDefaultVal |= (Val != Op.Default);
1975 }
1976 return (Code & ~UsedOprMask) == 0;
1977}
1978
1979static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size,
1980 unsigned Code, int &Idx, StringRef &Name,
1981 unsigned &Val, bool &IsDefault,
1982 const MCSubtargetInfo &STI) {
1983 while (Idx < Size) {
1984 const auto &Op = Opr[Idx++];
1985 if (Op.isSupported(STI)) {
1986 Name = Op.Name;
1987 Val = Op.decode(Code);
1988 IsDefault = (Val == Op.Default);
1989 return true;
1990 }
1991 }
1992
1993 return false;
1994}
1995
1997 int64_t InputVal) {
1998 if (InputVal < 0 || InputVal > Op.Max)
1999 return OPR_VAL_INVALID;
2000 return Op.encode(InputVal);
2001}
2002
2003static int encodeCustomOperand(const CustomOperandVal *Opr, int Size,
2004 const StringRef Name, int64_t InputVal,
2005 unsigned &UsedOprMask,
2006 const MCSubtargetInfo &STI) {
2007 int InvalidId = OPR_ID_UNKNOWN;
2008 for (int Idx = 0; Idx < Size; ++Idx) {
2009 const auto &Op = Opr[Idx];
2010 if (Op.Name == Name) {
2011 if (!Op.isSupported(STI)) {
2012 InvalidId = OPR_ID_UNSUPPORTED;
2013 continue;
2014 }
2015 auto OprMask = Op.getMask();
2016 if (OprMask & UsedOprMask)
2017 return OPR_ID_DUPLICATE;
2018 UsedOprMask |= OprMask;
2019 return encodeCustomOperandVal(Op, InputVal);
2020 }
2021 }
2022 return InvalidId;
2023}
2024
2025//===----------------------------------------------------------------------===//
2026// DepCtr
2027//===----------------------------------------------------------------------===//
2028
2029namespace DepCtr {
2030
2032 static int Default = -1;
2033 if (Default == -1)
2035 return Default;
2036}
2037
2038bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
2039 const MCSubtargetInfo &STI) {
2041 HasNonDefaultVal, STI);
2042}
2043
2044bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
2045 bool &IsDefault, const MCSubtargetInfo &STI) {
2046 return decodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Code, Id, Name, Val,
2047 IsDefault, STI);
2048}
2049
2050int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
2051 const MCSubtargetInfo &STI) {
2052 return encodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Name, Val, UsedOprMask,
2053 STI);
2054}
2055
2056unsigned getVaVdstBitMask() { return (1 << getVaVdstBitWidth()) - 1; }
2057
2058unsigned getVaSdstBitMask() { return (1 << getVaSdstBitWidth()) - 1; }
2059
2060unsigned getVaSsrcBitMask() { return (1 << getVaSsrcBitWidth()) - 1; }
2061
2063 return (1 << getHoldCntWidth(Version.Major, Version.Minor)) - 1;
2064}
2065
2066unsigned getVmVsrcBitMask() { return (1 << getVmVsrcBitWidth()) - 1; }
2067
2068unsigned getVaVccBitMask() { return (1 << getVaVccBitWidth()) - 1; }
2069
2070unsigned getSaSdstBitMask() { return (1 << getSaSdstBitWidth()) - 1; }
2071
2072unsigned decodeFieldVmVsrc(unsigned Encoded) {
2073 return unpackBits(Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
2074}
2075
2076unsigned decodeFieldVaVdst(unsigned Encoded) {
2077 return unpackBits(Encoded, getVaVdstBitShift(), getVaVdstBitWidth());
2078}
2079
2080unsigned decodeFieldSaSdst(unsigned Encoded) {
2081 return unpackBits(Encoded, getSaSdstBitShift(), getSaSdstBitWidth());
2082}
2083
2084unsigned decodeFieldVaSdst(unsigned Encoded) {
2085 return unpackBits(Encoded, getVaSdstBitShift(), getVaSdstBitWidth());
2086}
2087
2088unsigned decodeFieldVaVcc(unsigned Encoded) {
2089 return unpackBits(Encoded, getVaVccBitShift(), getVaVccBitWidth());
2090}
2091
2092unsigned decodeFieldVaSsrc(unsigned Encoded) {
2093 return unpackBits(Encoded, getVaSsrcBitShift(), getVaSsrcBitWidth());
2094}
2095
2096unsigned decodeFieldHoldCnt(unsigned Encoded, const IsaVersion &Version) {
2097 return unpackBits(Encoded, getHoldCntBitShift(),
2098 getHoldCntWidth(Version.Major, Version.Minor));
2099}
2100
2101unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc) {
2102 return packBits(VmVsrc, Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
2103}
2104
2105unsigned encodeFieldVmVsrc(unsigned VmVsrc, const MCSubtargetInfo &STI) {
2106 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2107 return encodeFieldVmVsrc(Encoded, VmVsrc);
2108}
2109
2110unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst) {
2111 return packBits(VaVdst, Encoded, getVaVdstBitShift(), getVaVdstBitWidth());
2112}
2113
2114unsigned encodeFieldVaVdst(unsigned VaVdst, const MCSubtargetInfo &STI) {
2115 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2116 return encodeFieldVaVdst(Encoded, VaVdst);
2117}
2118
2119unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst) {
2120 return packBits(SaSdst, Encoded, getSaSdstBitShift(), getSaSdstBitWidth());
2121}
2122
2123unsigned encodeFieldSaSdst(unsigned SaSdst, const MCSubtargetInfo &STI) {
2124 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2125 return encodeFieldSaSdst(Encoded, SaSdst);
2126}
2127
2128unsigned encodeFieldVaSdst(unsigned Encoded, unsigned VaSdst) {
2129 return packBits(VaSdst, Encoded, getVaSdstBitShift(), getVaSdstBitWidth());
2130}
2131
2132unsigned encodeFieldVaSdst(unsigned VaSdst, const MCSubtargetInfo &STI) {
2133 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2134 return encodeFieldVaSdst(Encoded, VaSdst);
2135}
2136
2137unsigned encodeFieldVaVcc(unsigned Encoded, unsigned VaVcc) {
2138 return packBits(VaVcc, Encoded, getVaVccBitShift(), getVaVccBitWidth());
2139}
2140
2141unsigned encodeFieldVaVcc(unsigned VaVcc, const MCSubtargetInfo &STI) {
2142 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2143 return encodeFieldVaVcc(Encoded, VaVcc);
2144}
2145
2146unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc) {
2147 return packBits(VaSsrc, Encoded, getVaSsrcBitShift(), getVaSsrcBitWidth());
2148}
2149
2150unsigned encodeFieldVaSsrc(unsigned VaSsrc, const MCSubtargetInfo &STI) {
2151 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2152 return encodeFieldVaSsrc(Encoded, VaSsrc);
2153}
2154
2155unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt,
2156 const IsaVersion &Version) {
2157 return packBits(HoldCnt, Encoded, getHoldCntBitShift(),
2158 getHoldCntWidth(Version.Major, Version.Minor));
2159}
2160
2161unsigned encodeFieldHoldCnt(unsigned HoldCnt, const MCSubtargetInfo &STI) {
2162 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2163 return encodeFieldHoldCnt(Encoded, HoldCnt, getIsaVersion(STI.getCPU()));
2164}
2165
2166} // namespace DepCtr
2167
2168//===----------------------------------------------------------------------===//
2169// exp tgt
2170//===----------------------------------------------------------------------===//
2171
2172namespace Exp {
2173
2174struct ExpTgt {
2176 unsigned Tgt;
2177 unsigned MaxIndex;
2178};
2179
2180// clang-format off
2181static constexpr ExpTgt ExpTgtInfo[] = {
2182 {{"null"}, ET_NULL, ET_NULL_MAX_IDX},
2183 {{"mrtz"}, ET_MRTZ, ET_MRTZ_MAX_IDX},
2184 {{"prim"}, ET_PRIM, ET_PRIM_MAX_IDX},
2185 {{"mrt"}, ET_MRT0, ET_MRT_MAX_IDX},
2186 {{"pos"}, ET_POS0, ET_POS_MAX_IDX},
2187 {{"dual_src_blend"},ET_DUAL_SRC_BLEND0, ET_DUAL_SRC_BLEND_MAX_IDX},
2188 {{"param"}, ET_PARAM0, ET_PARAM_MAX_IDX},
2189};
2190// clang-format on
2191
2192bool getTgtName(unsigned Id, StringRef &Name, int &Index) {
2193 for (const ExpTgt &Val : ExpTgtInfo) {
2194 if (Val.Tgt <= Id && Id <= Val.Tgt + Val.MaxIndex) {
2195 Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt);
2196 Name = Val.Name;
2197 return true;
2198 }
2199 }
2200 return false;
2201}
2202
2203unsigned getTgtId(const StringRef Name) {
2204
2205 for (const ExpTgt &Val : ExpTgtInfo) {
2206 if (Val.MaxIndex == 0 && Name == Val.Name)
2207 return Val.Tgt;
2208
2209 if (Val.MaxIndex > 0 && Name.starts_with(Val.Name)) {
2210 StringRef Suffix = Name.drop_front(Val.Name.size());
2211
2212 unsigned Id;
2213 if (Suffix.getAsInteger(10, Id) || Id > Val.MaxIndex)
2214 return ET_INVALID;
2215
2216 // Disable leading zeroes
2217 if (Suffix.size() > 1 && Suffix[0] == '0')
2218 return ET_INVALID;
2219
2220 return Val.Tgt + Id;
2221 }
2222 }
2223 return ET_INVALID;
2224}
2225
2226bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI) {
2227 switch (Id) {
2228 case ET_NULL:
2229 return !isGFX11Plus(STI);
2230 case ET_POS4:
2231 case ET_PRIM:
2232 return isGFX10Plus(STI);
2233 case ET_DUAL_SRC_BLEND0:
2234 case ET_DUAL_SRC_BLEND1:
2235 return isGFX11Plus(STI);
2236 default:
2237 if (Id >= ET_PARAM0 && Id <= ET_PARAM31)
2238 return !isGFX11Plus(STI) || isGFX13Plus(STI);
2239 return true;
2240 }
2241}
2242
2243} // namespace Exp
2244
2245//===----------------------------------------------------------------------===//
2246// MTBUF Format
2247//===----------------------------------------------------------------------===//
2248
2249namespace MTBUFFormat {
2250
2251int64_t getDfmt(const StringRef Name) {
2252 for (int Id = DFMT_MIN; Id <= DFMT_MAX; ++Id) {
2253 if (Name == DfmtSymbolic[Id])
2254 return Id;
2255 }
2256 return DFMT_UNDEF;
2257}
2258
2260 assert(Id <= DFMT_MAX);
2261 return DfmtSymbolic[Id];
2262}
2263
2265 if (isSI(STI) || isCI(STI))
2266 return NfmtSymbolicSICI;
2267 if (isVI(STI) || isGFX9(STI))
2268 return NfmtSymbolicVI;
2269 return NfmtSymbolicGFX10;
2270}
2271
2272int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI) {
2273 const auto *lookupTable = getNfmtLookupTable(STI);
2274 for (int Id = NFMT_MIN; Id <= NFMT_MAX; ++Id) {
2275 if (Name == lookupTable[Id])
2276 return Id;
2277 }
2278 return NFMT_UNDEF;
2279}
2280
2281StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI) {
2282 assert(Id <= NFMT_MAX);
2283 return getNfmtLookupTable(STI)[Id];
2284}
2285
2286bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI) {
2287 unsigned Dfmt;
2288 unsigned Nfmt;
2289 decodeDfmtNfmt(Id, Dfmt, Nfmt);
2290 return isValidNfmt(Nfmt, STI);
2291}
2292
2293bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI) {
2294 return !getNfmtName(Id, STI).empty();
2295}
2296
2297int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt) {
2298 return (Dfmt << DFMT_SHIFT) | (Nfmt << NFMT_SHIFT);
2299}
2300
2301void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt) {
2302 Dfmt = (Format >> DFMT_SHIFT) & DFMT_MASK;
2303 Nfmt = (Format >> NFMT_SHIFT) & NFMT_MASK;
2304}
2305
2306int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI) {
2307 if (isGFX11Plus(STI)) {
2308 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
2309 if (Name == UfmtSymbolicGFX11[Id])
2310 return Id;
2311 }
2312 } else {
2313 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
2314 if (Name == UfmtSymbolicGFX10[Id])
2315 return Id;
2316 }
2317 }
2318 return UFMT_UNDEF;
2319}
2320
2322 if (isValidUnifiedFormat(Id, STI))
2323 return isGFX10(STI) ? UfmtSymbolicGFX10[Id] : UfmtSymbolicGFX11[Id];
2324 return "";
2325}
2326
2327bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI) {
2328 return isGFX10(STI) ? Id <= UfmtGFX10::UFMT_LAST : Id <= UfmtGFX11::UFMT_LAST;
2329}
2330
2331int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt,
2332 const MCSubtargetInfo &STI) {
2333 int64_t Fmt = encodeDfmtNfmt(Dfmt, Nfmt);
2334 if (isGFX11Plus(STI)) {
2335 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
2336 if (Fmt == DfmtNfmt2UFmtGFX11[Id])
2337 return Id;
2338 }
2339 } else {
2340 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
2341 if (Fmt == DfmtNfmt2UFmtGFX10[Id])
2342 return Id;
2343 }
2344 }
2345 return UFMT_UNDEF;
2346}
2347
2348bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI) {
2349 return isGFX10Plus(STI) ? (Val <= UFMT_MAX) : (Val <= DFMT_NFMT_MAX);
2350}
2351
2353 if (isGFX10Plus(STI))
2354 return UFMT_DEFAULT;
2355 return DFMT_NFMT_DEFAULT;
2356}
2357
2358} // namespace MTBUFFormat
2359
2360//===----------------------------------------------------------------------===//
2361// SendMsg
2362//===----------------------------------------------------------------------===//
2363
2364namespace SendMsg {
2365
2369
2370bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI) {
2371 return (MsgId & ~(getMsgIdMask(STI))) == 0;
2372}
2373
2374bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
2375 bool Strict) {
2376 assert(isValidMsgId(MsgId, STI));
2377
2378 if (!Strict)
2379 return 0 <= OpId && isUInt<OP_WIDTH_>(OpId);
2380
2381 if (msgRequiresOp(MsgId, STI)) {
2382 if (MsgId == ID_GS_PreGFX11 && OpId == OP_GS_NOP)
2383 return false;
2384
2385 return !getMsgOpName(MsgId, OpId, STI).empty();
2386 }
2387
2388 return OpId == OP_NONE_;
2389}
2390
2391bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
2392 const MCSubtargetInfo &STI, bool Strict) {
2393 assert(isValidMsgOp(MsgId, OpId, STI, Strict));
2394
2395 if (!Strict)
2397
2398 if (!isGFX11Plus(STI)) {
2399 switch (MsgId) {
2400 case ID_GS_PreGFX11:
2403 return (OpId == OP_GS_NOP)
2406 }
2407 }
2408 return StreamId == STREAM_ID_NONE_;
2409}
2410
2411bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI) {
2412 return MsgId == ID_SYSMSG ||
2413 (!isGFX11Plus(STI) &&
2414 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11));
2415}
2416
2417bool msgSupportsStream(int64_t MsgId, int64_t OpId,
2418 const MCSubtargetInfo &STI) {
2419 return !isGFX11Plus(STI) &&
2420 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11) &&
2421 OpId != OP_GS_NOP;
2422}
2423
2424void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
2425 uint16_t &StreamId, const MCSubtargetInfo &STI) {
2426 MsgId = Val & getMsgIdMask(STI);
2427 if (isGFX11Plus(STI)) {
2428 OpId = 0;
2429 StreamId = 0;
2430 } else {
2431 OpId = (Val & OP_MASK_) >> OP_SHIFT_;
2433 }
2434}
2435
2437 return MsgId | (OpId << OP_SHIFT_) | (StreamId << STREAM_ID_SHIFT_);
2438}
2439
2440bool msgDoesNotUseM0(int64_t MsgId, const MCSubtargetInfo &STI) {
2441 // Explicitly list message types that are known to not use m0.
2442 // This is safer than excluding only GS_ALLOC_REQ, in case new message
2443 // types are added in the future that do use m0.
2444 if (isGFX11Plus(STI)) {
2445 switch (MsgId) {
2447 return true;
2448 default:
2449 break;
2450 }
2451 }
2452 switch (MsgId) {
2453 case ID_SAVEWAVE:
2454 case ID_STALL_WAVE_GEN:
2455 case ID_HALT_WAVES:
2456 case ID_ORDERED_PS_DONE:
2458 case ID_GET_DOORBELL:
2459 case ID_GET_DDID:
2460 case ID_SYSMSG:
2461 return true;
2462 default:
2463 return false;
2464 }
2465}
2466
2467} // namespace SendMsg
2468
2469//===----------------------------------------------------------------------===//
2470//
2471//===----------------------------------------------------------------------===//
2472
2474 return F.getFnAttributeAsParsedInteger("InitialPSInputAddr", 0);
2475}
2476
2478 // As a safe default always respond as if PS has color exports.
2479 return F.getFnAttributeAsParsedInteger(
2480 "amdgpu-color-export",
2481 F.getCallingConv() == CallingConv::AMDGPU_PS ? 1 : 0) != 0;
2482}
2483
2485 return F.getFnAttributeAsParsedInteger("amdgpu-depth-export", 0) != 0;
2486}
2487
2489 unsigned BlockSize =
2490 F.getFnAttributeAsParsedInteger("amdgpu-dynamic-vgpr-block-size", 0);
2491
2492 if (BlockSize == 16 || BlockSize == 32)
2493 return BlockSize;
2494
2495 return 0;
2496}
2497
2498bool hasXNACK(const MCSubtargetInfo &STI) {
2499 return STI.hasFeature(AMDGPU::FeatureXNACK);
2500}
2501
2503 return STI.hasFeature(AMDGPU::FeatureMIMG_R128) &&
2504 !STI.hasFeature(AMDGPU::FeatureR128A16);
2505}
2506
2507bool hasA16(const MCSubtargetInfo &STI) {
2508 return STI.hasFeature(AMDGPU::FeatureA16);
2509}
2510
2511bool hasG16(const MCSubtargetInfo &STI) {
2512 return STI.hasFeature(AMDGPU::FeatureG16);
2513}
2514
2516 return !STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) && !isCI(STI) &&
2517 !isSI(STI);
2518}
2519
2520bool hasGDS(const MCSubtargetInfo &STI) {
2521 return STI.hasFeature(AMDGPU::FeatureGDS);
2522}
2523
2524unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler) {
2525 auto Version = getIsaVersion(STI.getCPU());
2526 if (Version.Major == 10)
2527 return Version.Minor >= 3 ? 13 : 5;
2528 if (Version.Major == 11)
2529 return 5;
2530 if (Version.Major >= 12)
2531 return HasSampler ? 4 : 5;
2532 return 0;
2533}
2534
2536 if (isGFX1250Plus(STI))
2537 return 32;
2538 return 16;
2539}
2540
2541bool isSI(const MCSubtargetInfo &STI) {
2542 return STI.hasFeature(AMDGPU::FeatureSouthernIslands);
2543}
2544
2545bool isCI(const MCSubtargetInfo &STI) {
2546 return STI.hasFeature(AMDGPU::FeatureSeaIslands);
2547}
2548
2549bool isVI(const MCSubtargetInfo &STI) {
2550 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
2551}
2552
2553bool isGFX9(const MCSubtargetInfo &STI) {
2554 return STI.hasFeature(AMDGPU::FeatureGFX9);
2555}
2556
2558 return isGFX9(STI) || isGFX10(STI);
2559}
2560
2562 return isGFX9(STI) || isGFX10(STI) || isGFX11(STI);
2563}
2564
2566 return isVI(STI) || isGFX9(STI) || isGFX10(STI);
2567}
2568
2569bool isGFX8Plus(const MCSubtargetInfo &STI) {
2570 return isVI(STI) || isGFX9Plus(STI);
2571}
2572
2573bool isGFX9Plus(const MCSubtargetInfo &STI) {
2574 return isGFX9(STI) || isGFX10Plus(STI);
2575}
2576
2577bool isNotGFX9Plus(const MCSubtargetInfo &STI) { return !isGFX9Plus(STI); }
2578
2579bool isGFX10(const MCSubtargetInfo &STI) {
2580 return STI.hasFeature(AMDGPU::FeatureGFX10);
2581}
2582
2584 return isGFX10(STI) || isGFX11(STI);
2585}
2586
2588 return isGFX10(STI) || isGFX11Plus(STI);
2589}
2590
2591bool isGFX11(const MCSubtargetInfo &STI) {
2592 return STI.hasFeature(AMDGPU::FeatureGFX11);
2593}
2594
2596 return isGFX11(STI) || isGFX12Plus(STI);
2597}
2598
2599bool isGFX12(const MCSubtargetInfo &STI) {
2600 return STI.getFeatureBits()[AMDGPU::FeatureGFX12];
2601}
2602
2604 return isGFX12(STI) || isGFX13Plus(STI);
2605}
2606
2607bool isNotGFX12Plus(const MCSubtargetInfo &STI) { return !isGFX12Plus(STI); }
2608
2609bool isGFX1250(const MCSubtargetInfo &STI) {
2610 return STI.getFeatureBits()[AMDGPU::FeatureGFX1250Insts] && !isGFX13(STI);
2611}
2612
2614 return STI.getFeatureBits()[AMDGPU::FeatureGFX1250Insts];
2615}
2616
2617bool isGFX13(const MCSubtargetInfo &STI) {
2618 return STI.getFeatureBits()[AMDGPU::FeatureGFX13];
2619}
2620
2621bool isGFX13Plus(const MCSubtargetInfo &STI) { return isGFX13(STI); }
2622
2624 if (isGFX1250(STI))
2625 return false;
2626 return isGFX10Plus(STI);
2627}
2628
2629bool isNotGFX11Plus(const MCSubtargetInfo &STI) { return !isGFX11Plus(STI); }
2630
2632 return isSI(STI) || isCI(STI) || isVI(STI) || isGFX9(STI);
2633}
2634
2636 return isGFX10(STI) && !AMDGPU::isGFX10_BEncoding(STI);
2637}
2638
2640 return STI.hasFeature(AMDGPU::FeatureGCN3Encoding);
2641}
2642
2644 return STI.hasFeature(AMDGPU::FeatureGFX10_AEncoding);
2645}
2646
2648 return STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding);
2649}
2650
2652 return STI.hasFeature(AMDGPU::FeatureGFX10_3Insts);
2653}
2654
2656 return isGFX10_BEncoding(STI) && !isGFX12Plus(STI);
2657}
2658
2659bool isGFX90A(const MCSubtargetInfo &STI) {
2660 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
2661}
2662
2663bool isGFX940(const MCSubtargetInfo &STI) {
2664 return STI.hasFeature(AMDGPU::FeatureGFX940Insts);
2665}
2666
2668 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
2669}
2670
2672 return STI.hasFeature(AMDGPU::FeatureMAIInsts);
2673}
2674
2675bool hasVOPD(const MCSubtargetInfo &STI) {
2676 return STI.hasFeature(AMDGPU::FeatureVOPDInsts);
2677}
2678
2680 return STI.hasFeature(AMDGPU::FeatureDPPSrc1SGPR);
2681}
2682
2684 return STI.hasFeature(AMDGPU::FeatureKernargPreload);
2685}
2686
2687int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR,
2688 int32_t ArgNumVGPR) {
2689 if (has90AInsts && ArgNumAGPR)
2690 return alignTo(ArgNumVGPR, 4) + ArgNumAGPR;
2691 return std::max(ArgNumVGPR, ArgNumAGPR);
2692}
2693
2695 const MCRegisterClass &SGPRClass =
2696 TRI->getRegClass(AMDGPU::SReg_32RegClassID);
2697 const MCRegister FirstSubReg = TRI->getSubReg(Reg, AMDGPU::sub0);
2698 return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
2699 Reg == AMDGPU::SCC;
2700}
2701
2705
2706#define MAP_REG2REG \
2707 using namespace AMDGPU; \
2708 switch (Reg.id()) { \
2709 default: \
2710 return Reg; \
2711 CASE_CI_VI(FLAT_SCR) \
2712 CASE_CI_VI(FLAT_SCR_LO) \
2713 CASE_CI_VI(FLAT_SCR_HI) \
2714 CASE_VI_GFX9PLUS(TTMP0) \
2715 CASE_VI_GFX9PLUS(TTMP1) \
2716 CASE_VI_GFX9PLUS(TTMP2) \
2717 CASE_VI_GFX9PLUS(TTMP3) \
2718 CASE_VI_GFX9PLUS(TTMP4) \
2719 CASE_VI_GFX9PLUS(TTMP5) \
2720 CASE_VI_GFX9PLUS(TTMP6) \
2721 CASE_VI_GFX9PLUS(TTMP7) \
2722 CASE_VI_GFX9PLUS(TTMP8) \
2723 CASE_VI_GFX9PLUS(TTMP9) \
2724 CASE_VI_GFX9PLUS(TTMP10) \
2725 CASE_VI_GFX9PLUS(TTMP11) \
2726 CASE_VI_GFX9PLUS(TTMP12) \
2727 CASE_VI_GFX9PLUS(TTMP13) \
2728 CASE_VI_GFX9PLUS(TTMP14) \
2729 CASE_VI_GFX9PLUS(TTMP15) \
2730 CASE_VI_GFX9PLUS(TTMP0_TTMP1) \
2731 CASE_VI_GFX9PLUS(TTMP2_TTMP3) \
2732 CASE_VI_GFX9PLUS(TTMP4_TTMP5) \
2733 CASE_VI_GFX9PLUS(TTMP6_TTMP7) \
2734 CASE_VI_GFX9PLUS(TTMP8_TTMP9) \
2735 CASE_VI_GFX9PLUS(TTMP10_TTMP11) \
2736 CASE_VI_GFX9PLUS(TTMP12_TTMP13) \
2737 CASE_VI_GFX9PLUS(TTMP14_TTMP15) \
2738 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \
2739 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \
2740 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \
2741 CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \
2742 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
2743 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
2744 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2745 CASE_VI_GFX9PLUS( \
2746 TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2747 CASE_GFXPRE11_GFX11PLUS(M0) \
2748 CASE_GFXPRE11_GFX11PLUS(SGPR_NULL) \
2749 CASE_GFXPRE11_GFX11PLUS_TO(SGPR_NULL64, SGPR_NULL) \
2750 }
2751
2752#define CASE_CI_VI(node) \
2753 assert(!isSI(STI)); \
2754 case node: \
2755 return isCI(STI) ? node##_ci : node##_vi;
2756
2757#define CASE_VI_GFX9PLUS(node) \
2758 case node: \
2759 return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi;
2760
2761#define CASE_GFXPRE11_GFX11PLUS(node) \
2762 case node: \
2763 return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11;
2764
2765#define CASE_GFXPRE11_GFX11PLUS_TO(node, result) \
2766 case node: \
2767 return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11;
2768
2770 if (STI.getTargetTriple().getArch() == Triple::r600)
2771 return Reg;
2773}
2774
2775#undef CASE_CI_VI
2776#undef CASE_VI_GFX9PLUS
2777#undef CASE_GFXPRE11_GFX11PLUS
2778#undef CASE_GFXPRE11_GFX11PLUS_TO
2779
2780#define CASE_CI_VI(node) \
2781 case node##_ci: \
2782 case node##_vi: \
2783 return node;
2784#define CASE_VI_GFX9PLUS(node) \
2785 case node##_vi: \
2786 case node##_gfx9plus: \
2787 return node;
2788#define CASE_GFXPRE11_GFX11PLUS(node) \
2789 case node##_gfx11plus: \
2790 case node##_gfxpre11: \
2791 return node;
2792#define CASE_GFXPRE11_GFX11PLUS_TO(node, result)
2793
2795
2797 switch (Reg.id()) {
2798 case AMDGPU::SRC_SHARED_BASE_LO:
2799 case AMDGPU::SRC_SHARED_BASE:
2800 case AMDGPU::SRC_SHARED_LIMIT_LO:
2801 case AMDGPU::SRC_SHARED_LIMIT:
2802 case AMDGPU::SRC_PRIVATE_BASE_LO:
2803 case AMDGPU::SRC_PRIVATE_BASE:
2804 case AMDGPU::SRC_PRIVATE_LIMIT_LO:
2805 case AMDGPU::SRC_PRIVATE_LIMIT:
2806 case AMDGPU::SRC_FLAT_SCRATCH_BASE_LO:
2807 case AMDGPU::SRC_FLAT_SCRATCH_BASE_HI:
2808 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2809 return true;
2810 case AMDGPU::SRC_VCCZ:
2811 case AMDGPU::SRC_EXECZ:
2812 case AMDGPU::SRC_SCC:
2813 return true;
2814 case AMDGPU::SGPR_NULL:
2815 return true;
2816 default:
2817 return false;
2818 }
2819}
2820
2821#undef CASE_CI_VI
2822#undef CASE_VI_GFX9PLUS
2823#undef CASE_GFXPRE11_GFX11PLUS
2824#undef CASE_GFXPRE11_GFX11PLUS_TO
2825#undef MAP_REG2REG
2826
2827bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2828 assert(OpNo < Desc.NumOperands);
2829 unsigned OpType = Desc.operands()[OpNo].OperandType;
2830 return OpType >= AMDGPU::OPERAND_KIMM_FIRST &&
2831 OpType <= AMDGPU::OPERAND_KIMM_LAST;
2832}
2833
2834bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2835 assert(OpNo < Desc.NumOperands);
2836 unsigned OpType = Desc.operands()[OpNo].OperandType;
2837 switch (OpType) {
2852 return true;
2853 default:
2854 return false;
2855 }
2856}
2857
2858bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2859 assert(OpNo < Desc.NumOperands);
2860 unsigned OpType = Desc.operands()[OpNo].OperandType;
2861 return (OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
2865}
2866
2867// Avoid using MCRegisterClass::getSize, since that function will go away
2868// (move from MC* level to Target* level). Return size in bits.
2869unsigned getRegBitWidth(unsigned RCID) {
2870 switch (RCID) {
2871 case AMDGPU::VGPR_16RegClassID:
2872 case AMDGPU::VGPR_16_Lo128RegClassID:
2873 case AMDGPU::SGPR_LO16RegClassID:
2874 case AMDGPU::AGPR_LO16RegClassID:
2875 return 16;
2876 case AMDGPU::SGPR_32RegClassID:
2877 case AMDGPU::VGPR_32RegClassID:
2878 case AMDGPU::VGPR_32_Lo256RegClassID:
2879 case AMDGPU::VRegOrLds_32RegClassID:
2880 case AMDGPU::AGPR_32RegClassID:
2881 case AMDGPU::VS_32RegClassID:
2882 case AMDGPU::AV_32RegClassID:
2883 case AMDGPU::SReg_32RegClassID:
2884 case AMDGPU::SReg_32_XM0RegClassID:
2885 case AMDGPU::SRegOrLds_32RegClassID:
2886 return 32;
2887 case AMDGPU::SGPR_64RegClassID:
2888 case AMDGPU::VS_64RegClassID:
2889 case AMDGPU::SReg_64RegClassID:
2890 case AMDGPU::VReg_64RegClassID:
2891 case AMDGPU::AReg_64RegClassID:
2892 case AMDGPU::SReg_64_XEXECRegClassID:
2893 case AMDGPU::VReg_64_Align2RegClassID:
2894 case AMDGPU::AReg_64_Align2RegClassID:
2895 case AMDGPU::AV_64RegClassID:
2896 case AMDGPU::AV_64_Align2RegClassID:
2897 case AMDGPU::VReg_64_Lo256_Align2RegClassID:
2898 case AMDGPU::VS_64_Lo256RegClassID:
2899 return 64;
2900 case AMDGPU::SGPR_96RegClassID:
2901 case AMDGPU::SReg_96RegClassID:
2902 case AMDGPU::VReg_96RegClassID:
2903 case AMDGPU::AReg_96RegClassID:
2904 case AMDGPU::VReg_96_Align2RegClassID:
2905 case AMDGPU::AReg_96_Align2RegClassID:
2906 case AMDGPU::AV_96RegClassID:
2907 case AMDGPU::AV_96_Align2RegClassID:
2908 case AMDGPU::VReg_96_Lo256_Align2RegClassID:
2909 return 96;
2910 case AMDGPU::SGPR_128RegClassID:
2911 case AMDGPU::SReg_128RegClassID:
2912 case AMDGPU::VReg_128RegClassID:
2913 case AMDGPU::AReg_128RegClassID:
2914 case AMDGPU::VReg_128_Align2RegClassID:
2915 case AMDGPU::AReg_128_Align2RegClassID:
2916 case AMDGPU::AV_128RegClassID:
2917 case AMDGPU::AV_128_Align2RegClassID:
2918 case AMDGPU::SReg_128_XNULLRegClassID:
2919 case AMDGPU::VReg_128_Lo256_Align2RegClassID:
2920 return 128;
2921 case AMDGPU::SGPR_160RegClassID:
2922 case AMDGPU::SReg_160RegClassID:
2923 case AMDGPU::VReg_160RegClassID:
2924 case AMDGPU::AReg_160RegClassID:
2925 case AMDGPU::VReg_160_Align2RegClassID:
2926 case AMDGPU::AReg_160_Align2RegClassID:
2927 case AMDGPU::AV_160RegClassID:
2928 case AMDGPU::AV_160_Align2RegClassID:
2929 case AMDGPU::VReg_160_Lo256_Align2RegClassID:
2930 return 160;
2931 case AMDGPU::SGPR_192RegClassID:
2932 case AMDGPU::SReg_192RegClassID:
2933 case AMDGPU::VReg_192RegClassID:
2934 case AMDGPU::AReg_192RegClassID:
2935 case AMDGPU::VReg_192_Align2RegClassID:
2936 case AMDGPU::AReg_192_Align2RegClassID:
2937 case AMDGPU::AV_192RegClassID:
2938 case AMDGPU::AV_192_Align2RegClassID:
2939 case AMDGPU::VReg_192_Lo256_Align2RegClassID:
2940 return 192;
2941 case AMDGPU::SGPR_224RegClassID:
2942 case AMDGPU::SReg_224RegClassID:
2943 case AMDGPU::VReg_224RegClassID:
2944 case AMDGPU::AReg_224RegClassID:
2945 case AMDGPU::VReg_224_Align2RegClassID:
2946 case AMDGPU::AReg_224_Align2RegClassID:
2947 case AMDGPU::AV_224RegClassID:
2948 case AMDGPU::AV_224_Align2RegClassID:
2949 case AMDGPU::VReg_224_Lo256_Align2RegClassID:
2950 return 224;
2951 case AMDGPU::SGPR_256RegClassID:
2952 case AMDGPU::SReg_256RegClassID:
2953 case AMDGPU::VReg_256RegClassID:
2954 case AMDGPU::AReg_256RegClassID:
2955 case AMDGPU::VReg_256_Align2RegClassID:
2956 case AMDGPU::AReg_256_Align2RegClassID:
2957 case AMDGPU::AV_256RegClassID:
2958 case AMDGPU::AV_256_Align2RegClassID:
2959 case AMDGPU::SReg_256_XNULLRegClassID:
2960 case AMDGPU::VReg_256_Lo256_Align2RegClassID:
2961 return 256;
2962 case AMDGPU::SGPR_288RegClassID:
2963 case AMDGPU::SReg_288RegClassID:
2964 case AMDGPU::VReg_288RegClassID:
2965 case AMDGPU::AReg_288RegClassID:
2966 case AMDGPU::VReg_288_Align2RegClassID:
2967 case AMDGPU::AReg_288_Align2RegClassID:
2968 case AMDGPU::AV_288RegClassID:
2969 case AMDGPU::AV_288_Align2RegClassID:
2970 case AMDGPU::VReg_288_Lo256_Align2RegClassID:
2971 return 288;
2972 case AMDGPU::SGPR_320RegClassID:
2973 case AMDGPU::SReg_320RegClassID:
2974 case AMDGPU::VReg_320RegClassID:
2975 case AMDGPU::AReg_320RegClassID:
2976 case AMDGPU::VReg_320_Align2RegClassID:
2977 case AMDGPU::AReg_320_Align2RegClassID:
2978 case AMDGPU::AV_320RegClassID:
2979 case AMDGPU::AV_320_Align2RegClassID:
2980 case AMDGPU::VReg_320_Lo256_Align2RegClassID:
2981 return 320;
2982 case AMDGPU::SGPR_352RegClassID:
2983 case AMDGPU::SReg_352RegClassID:
2984 case AMDGPU::VReg_352RegClassID:
2985 case AMDGPU::AReg_352RegClassID:
2986 case AMDGPU::VReg_352_Align2RegClassID:
2987 case AMDGPU::AReg_352_Align2RegClassID:
2988 case AMDGPU::AV_352RegClassID:
2989 case AMDGPU::AV_352_Align2RegClassID:
2990 case AMDGPU::VReg_352_Lo256_Align2RegClassID:
2991 return 352;
2992 case AMDGPU::SGPR_384RegClassID:
2993 case AMDGPU::SReg_384RegClassID:
2994 case AMDGPU::VReg_384RegClassID:
2995 case AMDGPU::AReg_384RegClassID:
2996 case AMDGPU::VReg_384_Align2RegClassID:
2997 case AMDGPU::AReg_384_Align2RegClassID:
2998 case AMDGPU::AV_384RegClassID:
2999 case AMDGPU::AV_384_Align2RegClassID:
3000 case AMDGPU::VReg_384_Lo256_Align2RegClassID:
3001 return 384;
3002 case AMDGPU::SGPR_512RegClassID:
3003 case AMDGPU::SReg_512RegClassID:
3004 case AMDGPU::VReg_512RegClassID:
3005 case AMDGPU::AReg_512RegClassID:
3006 case AMDGPU::VReg_512_Align2RegClassID:
3007 case AMDGPU::AReg_512_Align2RegClassID:
3008 case AMDGPU::AV_512RegClassID:
3009 case AMDGPU::AV_512_Align2RegClassID:
3010 case AMDGPU::VReg_512_Lo256_Align2RegClassID:
3011 return 512;
3012 case AMDGPU::SGPR_1024RegClassID:
3013 case AMDGPU::SReg_1024RegClassID:
3014 case AMDGPU::VReg_1024RegClassID:
3015 case AMDGPU::AReg_1024RegClassID:
3016 case AMDGPU::VReg_1024_Align2RegClassID:
3017 case AMDGPU::AReg_1024_Align2RegClassID:
3018 case AMDGPU::AV_1024RegClassID:
3019 case AMDGPU::AV_1024_Align2RegClassID:
3020 case AMDGPU::VReg_1024_Lo256_Align2RegClassID:
3021 return 1024;
3022 default:
3023 llvm_unreachable("Unexpected register class");
3024 }
3025}
3026
3027unsigned getRegBitWidth(const MCRegisterClass &RC) {
3028 return getRegBitWidth(RC.getID());
3029}
3030
3031bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
3033 return true;
3034
3035 uint64_t Val = static_cast<uint64_t>(Literal);
3036 return (Val == llvm::bit_cast<uint64_t>(0.0)) ||
3037 (Val == llvm::bit_cast<uint64_t>(1.0)) ||
3038 (Val == llvm::bit_cast<uint64_t>(-1.0)) ||
3039 (Val == llvm::bit_cast<uint64_t>(0.5)) ||
3040 (Val == llvm::bit_cast<uint64_t>(-0.5)) ||
3041 (Val == llvm::bit_cast<uint64_t>(2.0)) ||
3042 (Val == llvm::bit_cast<uint64_t>(-2.0)) ||
3043 (Val == llvm::bit_cast<uint64_t>(4.0)) ||
3044 (Val == llvm::bit_cast<uint64_t>(-4.0)) ||
3045 (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
3046}
3047
3048bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
3050 return true;
3051
3052 // The actual type of the operand does not seem to matter as long
3053 // as the bits match one of the inline immediate values. For example:
3054 //
3055 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
3056 // so it is a legal inline immediate.
3057 //
3058 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
3059 // floating-point, so it is a legal inline immediate.
3060
3061 uint32_t Val = static_cast<uint32_t>(Literal);
3062 return (Val == llvm::bit_cast<uint32_t>(0.0f)) ||
3063 (Val == llvm::bit_cast<uint32_t>(1.0f)) ||
3064 (Val == llvm::bit_cast<uint32_t>(-1.0f)) ||
3065 (Val == llvm::bit_cast<uint32_t>(0.5f)) ||
3066 (Val == llvm::bit_cast<uint32_t>(-0.5f)) ||
3067 (Val == llvm::bit_cast<uint32_t>(2.0f)) ||
3068 (Val == llvm::bit_cast<uint32_t>(-2.0f)) ||
3069 (Val == llvm::bit_cast<uint32_t>(4.0f)) ||
3070 (Val == llvm::bit_cast<uint32_t>(-4.0f)) ||
3071 (Val == 0x3e22f983 && HasInv2Pi);
3072}
3073
3074bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi) {
3075 if (!HasInv2Pi)
3076 return false;
3078 return true;
3079 uint16_t Val = static_cast<uint16_t>(Literal);
3080 return Val == 0x3F00 || // 0.5
3081 Val == 0xBF00 || // -0.5
3082 Val == 0x3F80 || // 1.0
3083 Val == 0xBF80 || // -1.0
3084 Val == 0x4000 || // 2.0
3085 Val == 0xC000 || // -2.0
3086 Val == 0x4080 || // 4.0
3087 Val == 0xC080 || // -4.0
3088 Val == 0x3E22; // 1.0 / (2.0 * pi)
3089}
3090
3091bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi) {
3092 return isInlinableLiteral32(Literal, HasInv2Pi);
3093}
3094
3095bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi) {
3096 if (!HasInv2Pi)
3097 return false;
3099 return true;
3100 uint16_t Val = static_cast<uint16_t>(Literal);
3101 return Val == 0x3C00 || // 1.0
3102 Val == 0xBC00 || // -1.0
3103 Val == 0x3800 || // 0.5
3104 Val == 0xB800 || // -0.5
3105 Val == 0x4000 || // 2.0
3106 Val == 0xC000 || // -2.0
3107 Val == 0x4400 || // 4.0
3108 Val == 0xC400 || // -4.0
3109 Val == 0x3118; // 1/2pi
3110}
3111
3112std::optional<unsigned> getInlineEncodingV216(bool IsFloat, uint32_t Literal) {
3113 // Unfortunately, the Instruction Set Architecture Reference Guide is
3114 // misleading about how the inline operands work for (packed) 16-bit
3115 // instructions. In a nutshell, the actual HW behavior is:
3116 //
3117 // - integer encodings (-16 .. 64) are always produced as sign-extended
3118 // 32-bit values
3119 // - float encodings are produced as:
3120 // - for F16 instructions: corresponding half-precision float values in
3121 // the LSBs, 0 in the MSBs
3122 // - for UI16 instructions: corresponding single-precision float value
3123 int32_t Signed = static_cast<int32_t>(Literal);
3124 if (Signed >= 0 && Signed <= 64)
3125 return 128 + Signed;
3126
3127 if (Signed >= -16 && Signed <= -1)
3128 return 192 + std::abs(Signed);
3129
3130 if (IsFloat) {
3131 // clang-format off
3132 switch (Literal) {
3133 case 0x3800: return 240; // 0.5
3134 case 0xB800: return 241; // -0.5
3135 case 0x3C00: return 242; // 1.0
3136 case 0xBC00: return 243; // -1.0
3137 case 0x4000: return 244; // 2.0
3138 case 0xC000: return 245; // -2.0
3139 case 0x4400: return 246; // 4.0
3140 case 0xC400: return 247; // -4.0
3141 case 0x3118: return 248; // 1.0 / (2.0 * pi)
3142 default: break;
3143 }
3144 // clang-format on
3145 } else {
3146 // clang-format off
3147 switch (Literal) {
3148 case 0x3F000000: return 240; // 0.5
3149 case 0xBF000000: return 241; // -0.5
3150 case 0x3F800000: return 242; // 1.0
3151 case 0xBF800000: return 243; // -1.0
3152 case 0x40000000: return 244; // 2.0
3153 case 0xC0000000: return 245; // -2.0
3154 case 0x40800000: return 246; // 4.0
3155 case 0xC0800000: return 247; // -4.0
3156 case 0x3E22F983: return 248; // 1.0 / (2.0 * pi)
3157 default: break;
3158 }
3159 // clang-format on
3160 }
3161
3162 return {};
3163}
3164
3165// Encoding of the literal as an inline constant for a V_PK_*_IU16 instruction
3166// or nullopt.
3167std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal) {
3168 return getInlineEncodingV216(false, Literal);
3169}
3170
3171// Encoding of the literal as an inline constant for a V_PK_*_BF16 instruction
3172// or nullopt.
3173std::optional<unsigned> getInlineEncodingV2BF16(uint32_t Literal) {
3174 int32_t Signed = static_cast<int32_t>(Literal);
3175 if (Signed >= 0 && Signed <= 64)
3176 return 128 + Signed;
3177
3178 if (Signed >= -16 && Signed <= -1)
3179 return 192 + std::abs(Signed);
3180
3181 // clang-format off
3182 switch (Literal) {
3183 case 0x3F00: return 240; // 0.5
3184 case 0xBF00: return 241; // -0.5
3185 case 0x3F80: return 242; // 1.0
3186 case 0xBF80: return 243; // -1.0
3187 case 0x4000: return 244; // 2.0
3188 case 0xC000: return 245; // -2.0
3189 case 0x4080: return 246; // 4.0
3190 case 0xC080: return 247; // -4.0
3191 case 0x3E22: return 248; // 1.0 / (2.0 * pi)
3192 default: break;
3193 }
3194 // clang-format on
3195
3196 return std::nullopt;
3197}
3198
3199// Encoding of the literal as an inline constant for a V_PK_*_F16 instruction
3200// or nullopt.
3201std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal) {
3202 return getInlineEncodingV216(true, Literal);
3203}
3204
3205// Encoding of the literal as an inline constant for V_PK_FMAC_F16 instruction
3206// or nullopt. This accounts for different inline constant behavior:
3207// - Pre-GFX11: fp16 inline constants have the value in low 16 bits, 0 in high
3208// - GFX11+: fp16 inline constants are duplicated into both halves
3210 bool IsGFX11Plus) {
3211 // Pre-GFX11 behavior: f16 in low bits, 0 in high bits
3212 if (!IsGFX11Plus)
3213 return getInlineEncodingV216(/*IsFloat=*/true, Literal);
3214
3215 // GFX11+ behavior: f16 duplicated in both halves
3216 // First, check for sign-extended integer inline constants (-16 to 64)
3217 // These work the same across all generations
3218 int32_t Signed = static_cast<int32_t>(Literal);
3219 if (Signed >= 0 && Signed <= 64)
3220 return 128 + Signed;
3221
3222 if (Signed >= -16 && Signed <= -1)
3223 return 192 + std::abs(Signed);
3224
3225 // For float inline constants on GFX11+, both halves must be equal
3226 uint16_t Lo = static_cast<uint16_t>(Literal);
3227 uint16_t Hi = static_cast<uint16_t>(Literal >> 16);
3228 if (Lo != Hi)
3229 return std::nullopt;
3230 return getInlineEncodingV216(/*IsFloat=*/true, Lo);
3231}
3232
3233// Whether the given literal can be inlined for a V_PK_* instruction.
3235 switch (OpType) {
3238 return getInlineEncodingV216(false, Literal).has_value();
3241 return getInlineEncodingV216(true, Literal).has_value();
3243 llvm_unreachable("OPERAND_REG_IMM_V2FP16_SPLAT is not supported");
3248 return false;
3249 default:
3250 llvm_unreachable("bad packed operand type");
3251 }
3252}
3253
3254// Whether the given literal can be inlined for a V_PK_*_IU16 instruction.
3258
3259// Whether the given literal can be inlined for a V_PK_*_BF16 instruction.
3263
3264// Whether the given literal can be inlined for a V_PK_*_F16 instruction.
3268
3269// Whether the given literal can be inlined for V_PK_FMAC_F16 instruction.
3271 return getPKFMACF16InlineEncoding(Literal, IsGFX11Plus).has_value();
3272}
3273
3274bool isValid32BitLiteral(uint64_t Val, bool IsFP64) {
3275 if (IsFP64)
3276 return !Lo_32(Val);
3277
3278 return isUInt<32>(Val) || isInt<32>(Val);
3279}
3280
3281int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit) {
3282 switch (Type) {
3283 default:
3284 break;
3289 return Imm & 0xffff;
3303 return Lo_32(Imm);
3306 return IsLit ? Imm : Hi_32(Imm);
3307 }
3308 return Imm;
3309}
3310
3312 const Function *F = A->getParent();
3313
3314 // Arguments to compute shaders are never a source of divergence.
3315 CallingConv::ID CC = F->getCallingConv();
3316 switch (CC) {
3319 return true;
3330 // For non-compute shaders, SGPR inputs are marked with either inreg or
3331 // byval. Everything else is in VGPRs.
3332 return A->hasAttribute(Attribute::InReg) ||
3333 A->hasAttribute(Attribute::ByVal);
3334 default:
3335 // TODO: treat i1 as divergent?
3336 return A->hasAttribute(Attribute::InReg);
3337 }
3338}
3339
3340bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo) {
3341 // Arguments to compute shaders are never a source of divergence.
3343 switch (CC) {
3346 return true;
3357 // For non-compute shaders, SGPR inputs are marked with either inreg or
3358 // byval. Everything else is in VGPRs.
3359 return CB->paramHasAttr(ArgNo, Attribute::InReg) ||
3360 CB->paramHasAttr(ArgNo, Attribute::ByVal);
3361 default:
3362 return CB->paramHasAttr(ArgNo, Attribute::InReg);
3363 }
3364}
3365
3366static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) {
3367 return isGCN3Encoding(ST) || isGFX10Plus(ST);
3368}
3369
3371 int64_t EncodedOffset) {
3372 if (isGFX12Plus(ST))
3373 return isUInt<23>(EncodedOffset);
3374
3375 return hasSMEMByteOffset(ST) ? isUInt<20>(EncodedOffset)
3376 : isUInt<8>(EncodedOffset);
3377}
3378
3380 int64_t EncodedOffset, bool IsBuffer) {
3381 if (isGFX12Plus(ST)) {
3382 if (IsBuffer && EncodedOffset < 0)
3383 return false;
3384 return isInt<24>(EncodedOffset);
3385 }
3386
3387 return !IsBuffer && hasSMRDSignedImmOffset(ST) && isInt<21>(EncodedOffset);
3388}
3389
3390static bool isDwordAligned(uint64_t ByteOffset) {
3391 return (ByteOffset & 3) == 0;
3392}
3393
3395 uint64_t ByteOffset) {
3396 if (hasSMEMByteOffset(ST))
3397 return ByteOffset;
3398
3399 assert(isDwordAligned(ByteOffset));
3400 return ByteOffset >> 2;
3401}
3402
3403std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
3404 int64_t ByteOffset, bool IsBuffer,
3405 bool HasSOffset) {
3406 // For unbuffered smem loads, it is illegal for the Immediate Offset to be
3407 // negative if the resulting (Offset + (M0 or SOffset or zero) is negative.
3408 // Handle case where SOffset is not present.
3409 if (!IsBuffer && !HasSOffset && ByteOffset < 0 && hasSMRDSignedImmOffset(ST))
3410 return std::nullopt;
3411
3412 if (isGFX12Plus(ST)) // 24 bit signed offsets
3413 return isInt<24>(ByteOffset) ? std::optional<int64_t>(ByteOffset)
3414 : std::nullopt;
3415
3416 // The signed version is always a byte offset.
3417 if (!IsBuffer && hasSMRDSignedImmOffset(ST)) {
3419 return isInt<20>(ByteOffset) ? std::optional<int64_t>(ByteOffset)
3420 : std::nullopt;
3421 }
3422
3423 if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST))
3424 return std::nullopt;
3425
3426 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
3427 return isLegalSMRDEncodedUnsignedOffset(ST, EncodedOffset)
3428 ? std::optional<int64_t>(EncodedOffset)
3429 : std::nullopt;
3430}
3431
3432std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
3433 int64_t ByteOffset) {
3434 if (!isCI(ST) || !isDwordAligned(ByteOffset))
3435 return std::nullopt;
3436
3437 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
3438 return isUInt<32>(EncodedOffset) ? std::optional<int64_t>(EncodedOffset)
3439 : std::nullopt;
3440}
3441
3443 if (ST.getFeatureBits().test(FeatureFlatOffsetBits12))
3444 return 12;
3445 if (ST.getFeatureBits().test(FeatureFlatOffsetBits24))
3446 return 24;
3447 return 13;
3448}
3449
3450namespace {
3451
3452struct SourceOfDivergence {
3453 unsigned Intr;
3454};
3455const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
3456
3457struct AlwaysUniform {
3458 unsigned Intr;
3459};
3460const AlwaysUniform *lookupAlwaysUniform(unsigned Intr);
3461
3462#define GET_SourcesOfDivergence_IMPL
3463#define GET_UniformIntrinsics_IMPL
3464#define GET_Gfx9BufferFormat_IMPL
3465#define GET_Gfx10BufferFormat_IMPL
3466#define GET_Gfx11PlusBufferFormat_IMPL
3467
3468#include "AMDGPUGenSearchableTables.inc"
3469
3470} // end anonymous namespace
3471
3472bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
3473 return lookupSourceOfDivergence(IntrID);
3474}
3475
3476bool isIntrinsicAlwaysUniform(unsigned IntrID) {
3477 return lookupAlwaysUniform(IntrID);
3478}
3479
3481 uint8_t NumComponents,
3482 uint8_t NumFormat,
3483 const MCSubtargetInfo &STI) {
3484 return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(
3485 BitsPerComp, NumComponents, NumFormat)
3486 : isGFX10(STI)
3487 ? getGfx10BufferFormatInfo(BitsPerComp, NumComponents, NumFormat)
3488 : getGfx9BufferFormatInfo(BitsPerComp, NumComponents, NumFormat);
3489}
3490
3492 const MCSubtargetInfo &STI) {
3493 return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(Format)
3494 : isGFX10(STI) ? getGfx10BufferFormatInfo(Format)
3495 : getGfx9BufferFormatInfo(Format);
3496}
3497
3499 const MCRegisterInfo &MRI) {
3500 const unsigned VGPRClasses[] = {
3501 AMDGPU::VGPR_16RegClassID, AMDGPU::VGPR_32RegClassID,
3502 AMDGPU::VReg_64RegClassID, AMDGPU::VReg_96RegClassID,
3503 AMDGPU::VReg_128RegClassID, AMDGPU::VReg_160RegClassID,
3504 AMDGPU::VReg_192RegClassID, AMDGPU::VReg_224RegClassID,
3505 AMDGPU::VReg_256RegClassID, AMDGPU::VReg_288RegClassID,
3506 AMDGPU::VReg_320RegClassID, AMDGPU::VReg_352RegClassID,
3507 AMDGPU::VReg_384RegClassID, AMDGPU::VReg_512RegClassID,
3508 AMDGPU::VReg_1024RegClassID};
3509
3510 for (unsigned RCID : VGPRClasses) {
3511 const MCRegisterClass &RC = MRI.getRegClass(RCID);
3512 if (RC.contains(Reg))
3513 return &RC;
3514 }
3515
3516 return nullptr;
3517}
3518
3520 unsigned Enc = MRI.getEncodingValue(Reg);
3521 unsigned Idx = Enc & AMDGPU::HWEncoding::REG_IDX_MASK;
3522 return Idx >> 8;
3523}
3524
3526 const MCRegisterInfo &MRI) {
3527 unsigned Enc = MRI.getEncodingValue(Reg);
3528 unsigned Idx = Enc & AMDGPU::HWEncoding::REG_IDX_MASK;
3529 if (Idx >= 0x100)
3530 return MCRegister();
3531
3532 const MCRegisterClass *RC = getVGPRPhysRegClass(Reg, MRI);
3533 if (!RC)
3534 return MCRegister();
3535
3536 Idx |= MSBs << 8;
3537 if (RC->getID() == AMDGPU::VGPR_16RegClassID) {
3538 // This class has 2048 registers with interleaved lo16 and hi16.
3539 Idx *= 2;
3541 ++Idx;
3542 }
3543
3544 return RC->getRegister(Idx);
3545}
3546
3547static std::optional<unsigned>
3548convertSetRegImmToVgprMSBs(unsigned Imm, unsigned Simm16,
3549 bool HasSetregVGPRMSBFixup) {
3550 constexpr unsigned VGPRMSBShift =
3552
3553 auto [HwRegId, Offset, Size] = Hwreg::HwregEncoding::decode(Simm16);
3554 if (HwRegId != Hwreg::ID_MODE ||
3555 (!HasSetregVGPRMSBFixup && (Offset + Size) < VGPRMSBShift))
3556 return {};
3557 // If there is SetregVGPRMSBFixup then Offset is ignored.
3558 if (!HasSetregVGPRMSBFixup)
3559 Imm <<= Offset;
3560 Imm = (Imm & Hwreg::VGPR_MSB_MASK) >> VGPRMSBShift;
3561 if (!HasSetregVGPRMSBFixup)
3563 return llvm::rotr<uint8_t>(static_cast<uint8_t>(Imm), /*R=*/2);
3564}
3565
3566std::optional<unsigned> convertSetRegImmToVgprMSBs(const MachineInstr &MI,
3567 bool HasSetregVGPRMSBFixup) {
3568 assert(MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32);
3569 return convertSetRegImmToVgprMSBs(MI.getOperand(0).getImm(),
3570 MI.getOperand(1).getImm(),
3571 HasSetregVGPRMSBFixup);
3572}
3573
3574std::optional<unsigned> convertSetRegImmToVgprMSBs(const MCInst &MI,
3575 bool HasSetregVGPRMSBFixup) {
3576 assert(MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32_gfx12);
3577 return convertSetRegImmToVgprMSBs(MI.getOperand(0).getImm(),
3578 MI.getOperand(1).getImm(),
3579 HasSetregVGPRMSBFixup);
3580}
3581
3582std::pair<const AMDGPU::OpName *, const AMDGPU::OpName *>
3584 static const AMDGPU::OpName VOPOps[4] = {
3585 AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2,
3586 AMDGPU::OpName::vdst};
3587 static const AMDGPU::OpName VDSOps[4] = {
3588 AMDGPU::OpName::addr, AMDGPU::OpName::data0, AMDGPU::OpName::data1,
3589 AMDGPU::OpName::vdst};
3590 static const AMDGPU::OpName FLATOps[4] = {
3591 AMDGPU::OpName::vaddr, AMDGPU::OpName::vdata,
3592 AMDGPU::OpName::NUM_OPERAND_NAMES, AMDGPU::OpName::vdst};
3593 static const AMDGPU::OpName BUFOps[4] = {
3594 AMDGPU::OpName::vaddr, AMDGPU::OpName::NUM_OPERAND_NAMES,
3595 AMDGPU::OpName::NUM_OPERAND_NAMES, AMDGPU::OpName::vdata};
3596 static const AMDGPU::OpName VIMGOps[4] = {
3597 AMDGPU::OpName::vaddr0, AMDGPU::OpName::vaddr1, AMDGPU::OpName::vaddr2,
3598 AMDGPU::OpName::vdata};
3599
3600 // For VOPD instructions MSB of a corresponding Y component operand VGPR
3601 // address is supposed to match X operand, otherwise VOPD shall not be
3602 // combined.
3603 static const AMDGPU::OpName VOPDOpsX[4] = {
3604 AMDGPU::OpName::src0X, AMDGPU::OpName::vsrc1X, AMDGPU::OpName::vsrc2X,
3605 AMDGPU::OpName::vdstX};
3606 static const AMDGPU::OpName VOPDOpsY[4] = {
3607 AMDGPU::OpName::src0Y, AMDGPU::OpName::vsrc1Y, AMDGPU::OpName::vsrc2Y,
3608 AMDGPU::OpName::vdstY};
3609
3610 // VOP2 MADMK instructions use src0, imm, src1 scheme.
3611 static const AMDGPU::OpName VOP2MADMKOps[4] = {
3612 AMDGPU::OpName::src0, AMDGPU::OpName::NUM_OPERAND_NAMES,
3613 AMDGPU::OpName::src1, AMDGPU::OpName::vdst};
3614 static const AMDGPU::OpName VOPDFMAMKOpsX[4] = {
3615 AMDGPU::OpName::src0X, AMDGPU::OpName::NUM_OPERAND_NAMES,
3616 AMDGPU::OpName::vsrc1X, AMDGPU::OpName::vdstX};
3617 static const AMDGPU::OpName VOPDFMAMKOpsY[4] = {
3618 AMDGPU::OpName::src0Y, AMDGPU::OpName::NUM_OPERAND_NAMES,
3619 AMDGPU::OpName::vsrc1Y, AMDGPU::OpName::vdstY};
3620
3621 unsigned TSFlags = Desc.TSFlags;
3622
3623 if (TSFlags &
3626 switch (Desc.getOpcode()) {
3627 // LD_SCALE operands ignore MSB.
3628 case AMDGPU::V_WMMA_LD_SCALE_PAIRED_B32:
3629 case AMDGPU::V_WMMA_LD_SCALE_PAIRED_B32_gfx1250:
3630 case AMDGPU::V_WMMA_LD_SCALE16_PAIRED_B64:
3631 case AMDGPU::V_WMMA_LD_SCALE16_PAIRED_B64_gfx1250:
3632 return {};
3633 case AMDGPU::V_FMAMK_F16:
3634 case AMDGPU::V_FMAMK_F16_t16:
3635 case AMDGPU::V_FMAMK_F16_t16_gfx12:
3636 case AMDGPU::V_FMAMK_F16_fake16:
3637 case AMDGPU::V_FMAMK_F16_fake16_gfx12:
3638 case AMDGPU::V_FMAMK_F32:
3639 case AMDGPU::V_FMAMK_F32_gfx12:
3640 case AMDGPU::V_FMAMK_F64:
3641 case AMDGPU::V_FMAMK_F64_gfx1250:
3642 return {VOP2MADMKOps, nullptr};
3643 default:
3644 break;
3645 }
3646 return {VOPOps, nullptr};
3647 }
3648
3649 if (TSFlags & SIInstrFlags::DS)
3650 return {VDSOps, nullptr};
3651
3652 if (TSFlags & SIInstrFlags::FLAT)
3653 return {FLATOps, nullptr};
3654
3655 if (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))
3656 return {BUFOps, nullptr};
3657
3658 if (TSFlags & SIInstrFlags::VIMAGE)
3659 return {VIMGOps, nullptr};
3660
3661 if (AMDGPU::isVOPD(Desc.getOpcode())) {
3662 auto [OpX, OpY] = getVOPDComponents(Desc.getOpcode());
3663 return {(OpX == AMDGPU::V_FMAMK_F32) ? VOPDFMAMKOpsX : VOPDOpsX,
3664 (OpY == AMDGPU::V_FMAMK_F32) ? VOPDFMAMKOpsY : VOPDOpsY};
3665 }
3666
3667 assert(!(TSFlags & SIInstrFlags::MIMG));
3668
3669 if (TSFlags & (SIInstrFlags::VSAMPLE | SIInstrFlags::EXP))
3670 llvm_unreachable("Sample and export VGPR lowering is not implemented and"
3671 " these instructions are not expected on gfx1250");
3672
3673 return {};
3674}
3675
3676bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode) {
3677 uint64_t TSFlags = MII.get(Opcode).TSFlags;
3678
3679 if (TSFlags & SIInstrFlags::SMRD)
3680 return !getSMEMIsBuffer(Opcode);
3681 if (!(TSFlags & SIInstrFlags::FLAT))
3682 return false;
3683
3684 // Only SV and SVS modes are supported.
3685 if (TSFlags & SIInstrFlags::FlatScratch)
3686 return hasNamedOperand(Opcode, OpName::vaddr);
3687
3688 // Only GVS mode is supported.
3689 return hasNamedOperand(Opcode, OpName::vaddr) &&
3690 hasNamedOperand(Opcode, OpName::saddr);
3691
3692 return false;
3693}
3694
3695bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc, const MCInstrInfo &MII,
3696 const MCSubtargetInfo &ST) {
3697 for (auto OpName : {OpName::vdst, OpName::src0, OpName::src1, OpName::src2}) {
3698 int Idx = getNamedOperandIdx(OpDesc.getOpcode(), OpName);
3699 if (Idx == -1)
3700 continue;
3701
3702 const MCOperandInfo &OpInfo = OpDesc.operands()[Idx];
3703 int16_t RegClass = MII.getOpRegClassID(
3704 OpInfo, ST.getHwMode(MCSubtargetInfo::HwMode_RegInfo));
3705 if (RegClass == AMDGPU::VReg_64RegClassID ||
3706 RegClass == AMDGPU::VReg_64_Align2RegClassID)
3707 return true;
3708 }
3709
3710 return false;
3711}
3712
3713bool isDPALU_DPP32BitOpc(unsigned Opc) {
3714 switch (Opc) {
3715 case AMDGPU::V_MUL_LO_U32_e64:
3716 case AMDGPU::V_MUL_LO_U32_e64_dpp:
3717 case AMDGPU::V_MUL_LO_U32_e64_dpp_gfx1250:
3718 case AMDGPU::V_MUL_HI_U32_e64:
3719 case AMDGPU::V_MUL_HI_U32_e64_dpp:
3720 case AMDGPU::V_MUL_HI_U32_e64_dpp_gfx1250:
3721 case AMDGPU::V_MUL_HI_I32_e64:
3722 case AMDGPU::V_MUL_HI_I32_e64_dpp:
3723 case AMDGPU::V_MUL_HI_I32_e64_dpp_gfx1250:
3724 case AMDGPU::V_MAD_U32_e64:
3725 case AMDGPU::V_MAD_U32_e64_dpp:
3726 case AMDGPU::V_MAD_U32_e64_dpp_gfx1250:
3727 return true;
3728 default:
3729 return false;
3730 }
3731}
3732
3733bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII,
3734 const MCSubtargetInfo &ST) {
3735 if (!ST.hasFeature(AMDGPU::FeatureDPALU_DPP))
3736 return false;
3737
3738 if (isDPALU_DPP32BitOpc(OpDesc.getOpcode()))
3739 return ST.hasFeature(AMDGPU::FeatureGFX1250Insts);
3740
3741 return hasAny64BitVGPROperands(OpDesc, MII, ST);
3742}
3743
3745 if (ST.getFeatureBits().test(FeatureAddressableLocalMemorySize32768))
3746 return 64;
3747 if (ST.getFeatureBits().test(FeatureAddressableLocalMemorySize65536))
3748 return 128;
3749 if (ST.getFeatureBits().test(FeatureAddressableLocalMemorySize163840))
3750 return 320;
3751 if (ST.getFeatureBits().test(FeatureAddressableLocalMemorySize327680))
3752 return 512;
3753 return 64; // In sync with getAddressableLocalMemorySize
3754}
3755
3756bool isPackedFP32Inst(unsigned Opc) {
3757 switch (Opc) {
3758 case AMDGPU::V_PK_ADD_F32:
3759 case AMDGPU::V_PK_ADD_F32_gfx12:
3760 case AMDGPU::V_PK_MUL_F32:
3761 case AMDGPU::V_PK_MUL_F32_gfx12:
3762 case AMDGPU::V_PK_FMA_F32:
3763 case AMDGPU::V_PK_FMA_F32_gfx12:
3764 return true;
3765 default:
3766 return false;
3767 }
3768}
3769
3770bool isPacked64BitInst(unsigned Opc) {
3771 switch (Opc) {
3772 case AMDGPU::V_PK_ADD_F64:
3773 case AMDGPU::V_PK_ADD_F64_gfx1250:
3774 case AMDGPU::V_PK_MUL_F64:
3775 case AMDGPU::V_PK_MUL_F64_gfx1250:
3776 case AMDGPU::V_PK_FMA_F64:
3777 case AMDGPU::V_PK_FMA_F64_gfx1250:
3778 case AMDGPU::V_PK_MAX_NUM_F64:
3779 case AMDGPU::V_PK_MAX_NUM_F64_gfx1250:
3780 case AMDGPU::V_PK_MIN_NUM_F64:
3781 case AMDGPU::V_PK_MIN_NUM_F64_gfx1250:
3782 case AMDGPU::V_PK_ADD_NC_U64:
3783 case AMDGPU::V_PK_ADD_NC_U64_gfx1250:
3784 case AMDGPU::V_PK_SUB_NC_U64:
3785 case AMDGPU::V_PK_SUB_NC_U64_gfx1250:
3786 case AMDGPU::V_PK_LSHL_ADD_U64:
3787 case AMDGPU::V_PK_LSHL_ADD_U64_gfx1250:
3788 return true;
3789 default:
3790 return false;
3791 }
3792}
3793
3796}
3797
3798const std::array<unsigned, 3> &ClusterDimsAttr::getDims() const {
3799 assert(isFixedDims() && "expect kind to be FixedDims");
3800 return Dims;
3801}
3802
3803std::string ClusterDimsAttr::to_string() const {
3804 SmallString<10> Buffer;
3805 raw_svector_ostream OS(Buffer);
3806
3807 switch (getKind()) {
3808 case Kind::Unknown:
3809 return "";
3810 case Kind::NoCluster: {
3811 OS << EncoNoCluster << ',' << EncoNoCluster << ',' << EncoNoCluster;
3812 return Buffer.c_str();
3813 }
3814 case Kind::VariableDims: {
3815 OS << EncoVariableDims << ',' << EncoVariableDims << ','
3816 << EncoVariableDims;
3817 return Buffer.c_str();
3818 }
3819 case Kind::FixedDims: {
3820 OS << Dims[0] << ',' << Dims[1] << ',' << Dims[2];
3821 return Buffer.c_str();
3822 }
3823 }
3824 llvm_unreachable("Unknown ClusterDimsAttr kind");
3825}
3826
3828 std::optional<SmallVector<unsigned>> Attr =
3829 getIntegerVecAttribute(F, "amdgpu-cluster-dims", /*Size=*/3);
3831
3832 if (!Attr.has_value())
3833 AttrKind = Kind::Unknown;
3834 else if (all_of(*Attr, equal_to(EncoNoCluster)))
3835 AttrKind = Kind::NoCluster;
3836 else if (all_of(*Attr, equal_to(EncoVariableDims)))
3837 AttrKind = Kind::VariableDims;
3838
3839 ClusterDimsAttr A(AttrKind);
3840 if (AttrKind == Kind::FixedDims)
3841 A.Dims = {(*Attr)[0], (*Attr)[1], (*Attr)[2]};
3842
3843 return A;
3844}
3845
3846} // namespace AMDGPU
3847
3849 switch (S) {
3850 case (AMDGPU::TargetIDSetting::Unsupported):
3851 OS << "Unsupported";
3852 break;
3853 case (AMDGPU::TargetIDSetting::Any):
3854 OS << "Any";
3855 break;
3856 case (AMDGPU::TargetIDSetting::Off):
3857 OS << "Off";
3858 break;
3859 case (AMDGPU::TargetIDSetting::On):
3860 OS << "On";
3861 break;
3862 }
3863 return OS;
3864}
3865
3866} // namespace llvm
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static llvm::cl::opt< unsigned > DefaultAMDHSACodeObjectVersion("amdhsa-code-object-version", llvm::cl::Hidden, llvm::cl::init(llvm::AMDGPU::AMDHSA_COV6), llvm::cl::desc("Set default AMDHSA Code Object Version (module flag " "or asm directive still take priority if present)"))
#define MAP_REG2REG
Provides AMDGPU specific target descriptions.
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
IRTranslator LLVM IR MI
#define RegName(no)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
This file contains the declarations for metadata subclasses.
#define T
uint64_t High
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
#define S_00B848_MEM_ORDERED(x)
Definition SIDefines.h:1469
#define S_00B848_WGP_MODE(x)
Definition SIDefines.h:1466
#define S_00B848_FWD_PROGRESS(x)
Definition SIDefines.h:1472
This file contains some functions that are useful when dealing with strings.
static const int BlockSize
Definition TarWriter.cpp:33
static ClusterDimsAttr get(const Function &F)
const std::array< unsigned, 3 > & getDims() const
void setSramEccSetting(TargetIDSetting NewSramEccSetting)
Sets sramecc setting to NewSramEccSetting.
void setXnackSetting(TargetIDSetting NewXnackSetting)
Sets xnack setting to NewXnackSetting.
unsigned getIndexInParsedOperands(unsigned CompOprIdx) const
unsigned getIndexOfSrcInParsedOperands(unsigned CompSrcIdx) const
std::optional< unsigned > getInvalidCompOperandIndex(std::function< MCRegister(unsigned, unsigned)> GetRegIdx, const MCRegisterInfo &MRI, bool SkipSrc=false, bool AllowSameVGPR=false, bool VOPD3=false) const
std::array< MCRegister, Component::MAX_OPR_NUM > RegIndices
Represents the counter values to wait for in an s_waitcnt instruction.
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
Functions, function parameters, and return types can have attributes to indicate how they should be t...
Definition Attributes.h:105
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
CallingConv::ID getCallingConv() const
LLVM_ABI bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
constexpr bool test(unsigned I) const
unsigned getAddressSpace() const
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
bool mayStore() const
Return true if this instruction could possibly modify memory.
bool mayLoad() const
Return true if this instruction could possibly read memory.
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
unsigned getOpcode() const
Return the opcode number for this descriptor.
Interface to description of machine instruction set.
Definition MCInstrInfo.h:27
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
int16_t getOpRegClassID(const MCOperandInfo &OpInfo, unsigned HwModeId) const
Return the ID of the register class to use for OpInfo, for the active HwMode HwModeId.
Definition MCInstrInfo.h:80
This holds information about one operand of a machine instruction, indicating the register class for ...
Definition MCInstrDesc.h:86
MCRegisterClass - Base class of TargetRegisterClass.
unsigned getID() const
getID() - Return the register class ID number.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
bool regsOverlap(MCRegister RegA, MCRegister RegB) const
Returns true if the two registers are equal or alias each other.
uint16_t getEncodingValue(MCRegister Reg) const
Returns the encoding for Reg.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
constexpr unsigned id() const
Definition MCRegister.h:82
Generic base class for all target subtargets.
bool hasFeature(unsigned Feature) const
const Triple & getTargetTriple() const
const FeatureBitset & getFeatureBits() const
StringRef getCPU() const
Metadata node.
Definition Metadata.h:1069
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1426
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1432
Representation of each machine instruction.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
const char * c_str()
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:888
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:736
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition StringRef.h:490
constexpr bool empty() const
Check if the string is empty.
Definition StringRef.h:141
constexpr size_t size() const
Get the string size.
Definition StringRef.h:144
Manages the enabling and disabling of subtarget specific features.
const std::vector< std::string > & getFeatures() const
Returns the vector of individual subtarget features.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
OSType getOS() const
Get the parsed operating system type of this triple.
Definition Triple.h:434
ArchType getArch() const
Get the parsed architecture type of this triple.
Definition Triple.h:425
bool isAMDGCN() const
Tests whether the target is AMDGCN.
Definition Triple.h:903
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an SmallVector or SmallString.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
unsigned decodeFieldVaVcc(unsigned Encoded)
unsigned encodeFieldVaVcc(unsigned Encoded, unsigned VaVcc)
unsigned decodeFieldHoldCnt(unsigned Encoded, const IsaVersion &Version)
bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val, bool &IsDefault, const MCSubtargetInfo &STI)
unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt, const IsaVersion &Version)
unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc)
unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst)
unsigned decodeFieldSaSdst(unsigned Encoded)
unsigned getHoldCntBitMask(const IsaVersion &Version)
unsigned decodeFieldVaSdst(unsigned Encoded)
unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc)
unsigned decodeFieldVaSsrc(unsigned Encoded)
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
const CustomOperandVal DepCtrInfo[]
bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal, const MCSubtargetInfo &STI)
unsigned decodeFieldVaVdst(unsigned Encoded)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
unsigned decodeFieldVmVsrc(unsigned Encoded)
unsigned encodeFieldVaSdst(unsigned Encoded, unsigned VaSdst)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
static constexpr ExpTgt ExpTgtInfo[]
bool getTgtName(unsigned Id, StringRef &Name, int &Index)
unsigned getTgtId(const StringRef Name)
constexpr uint32_t VersionMinor
HSA metadata minor version.
constexpr uint32_t VersionMajor
HSA metadata major version.
unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo &STI, unsigned NumVGPRs, unsigned DynamicVGPRBlockSize)
unsigned getSGPRAllocGranule(const MCSubtargetInfo &STI)
unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo &STI)
bool isSGPROccupancyLimited(const MCSubtargetInfo &STI)
unsigned getArchVGPRAllocGranule()
For subtargets with a unified VGPR file and mixed ArchVGPR/AGPR usage, returns the allocation granule...
unsigned getEUsPerCU(const MCSubtargetInfo &STI)
unsigned getTotalNumSGPRs(const MCSubtargetInfo &STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo &STI)
static unsigned getSGPRTrapHandlerReserve(const MCSubtargetInfo &STI)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo &STI)
unsigned getAddressableLocalMemorySize(const MCSubtargetInfo &STI)
unsigned getVGPREncodingGranule(const MCSubtargetInfo &STI, std::optional< bool > EnableWavefrontSize32)
unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo &STI, unsigned NumVGPRs, std::optional< bool > EnableWavefrontSize32)
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo &STI, unsigned FlatWorkGroupSize)
unsigned getMinNumSGPRs(const MCSubtargetInfo &STI, unsigned WavesPerEU)
unsigned getMaxNumSGPRs(const MCSubtargetInfo &STI, unsigned WavesPerEU, bool Addressable)
unsigned getWavefrontSize(const MCSubtargetInfo &STI)
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo &STI, unsigned FlatWorkGroupSize)
unsigned getInstCacheLineSize(const MCSubtargetInfo &STI)
static constexpr unsigned MaxDynamicVGPRBlocks
Maximum number of VGPR blocks that can be allocated in dynamic VGPR mode.
unsigned getSGPREncodingGranule(const MCSubtargetInfo &STI)
static unsigned getSGPRBudgetPerWave(unsigned TotalNumSGPRs, unsigned WavesPerEU, unsigned TrapReserve, unsigned Granule)
unsigned getTotalNumVGPRs(const MCSubtargetInfo &STI)
unsigned getMinNumVGPRs(const MCSubtargetInfo &STI, unsigned WavesPerEU, unsigned DynamicVGPRBlockSize)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo &STI, unsigned DynamicVGPRBlockSize)
unsigned getWavesPerWorkGroup(const MCSubtargetInfo &STI, unsigned FlatWorkGroupSize)
unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo &STI, unsigned NumVGPRs, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves, unsigned TotalNumSGPRs, unsigned Granule, unsigned TrapReserve)
unsigned getNumSGPRBlocks(const MCSubtargetInfo &STI, unsigned NumSGPRs)
unsigned getMaxWavesPerEU(const MCSubtargetInfo &STI)
unsigned getNumExtraSGPRs(const MCSubtargetInfo &STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
unsigned getLocalMemorySize(const MCSubtargetInfo &STI)
unsigned getMaxNumVGPRs(const MCSubtargetInfo &STI, unsigned WavesPerEU, unsigned DynamicVGPRBlockSize)
static unsigned getGranulatedNumRegisterBlocks(unsigned NumRegs, unsigned Granule)
unsigned getVGPRAllocGranule(const MCSubtargetInfo &STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
unsigned getMinWavesPerEU(const MCSubtargetInfo &STI)
StringLiteral const UfmtSymbolicGFX11[]
bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI)
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI)
unsigned const DfmtNfmt2UFmtGFX10[]
StringLiteral const DfmtSymbolic[]
static StringLiteral const * getNfmtLookupTable(const MCSubtargetInfo &STI)
bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI)
StringLiteral const NfmtSymbolicGFX10[]
bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI)
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
StringRef getDfmtName(unsigned Id)
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI)
unsigned const DfmtNfmt2UFmtGFX11[]
StringLiteral const NfmtSymbolicVI[]
StringLiteral const NfmtSymbolicSICI[]
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
int64_t getDfmt(const StringRef Name)
StringLiteral const UfmtSymbolicGFX10[]
void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt)
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId, uint16_t &StreamId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgDoesNotUseM0(int64_t MsgId, const MCSubtargetInfo &STI)
Returns true if the message does not use the m0 operand.
StringRef getMsgOpName(int64_t MsgId, uint64_t Encoding, const MCSubtargetInfo &STI)
Map from an encoding to the symbolic name for a sendmsg operation.
static uint64_t getMsgIdMask(const MCSubtargetInfo &STI)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
constexpr unsigned VOPD_VGPR_BANK_MASKS[]
constexpr unsigned COMPONENTS_NUM
constexpr unsigned VOPD3_VGPR_BANK_MASKS[]
bool isPackedFP32Inst(unsigned Opc)
bool isGCN3Encoding(const MCSubtargetInfo &STI)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
bool isInlineValue(MCRegister Reg)
bool isGFX10_GFX11(const MCSubtargetInfo &STI)
bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType)
bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset)
Convert ByteOffset to dwords if the subtarget uses dword SMRD immediate offsets.
static unsigned encodeStorecnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Storecnt)
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
static bool hasSMEMByteOffset(const MCSubtargetInfo &ST)
bool isVOPCAsmOnly(unsigned Opc)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool getMTBUFHasSrsrc(unsigned Opc)
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool getWMMAIsXDL(unsigned Opc)
static std::optional< unsigned > convertSetRegImmToVgprMSBs(unsigned Imm, unsigned Simm16, bool HasSetregVGPRMSBFixup)
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
static bool isSymbolicCustomOperandEncoding(const CustomOperandVal *Opr, int Size, unsigned Code, bool &HasNonDefaultVal, const MCSubtargetInfo &STI)
bool isGFX10Before1030(const MCSubtargetInfo &STI)
bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo)
Does this operand support only inlinable literals?
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
const int OPR_ID_UNSUPPORTED
void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &KernelCode, const MCSubtargetInfo &STI)
bool shouldEmitConstantsToTextSection(const Triple &TT)
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isDPMACCInstruction(unsigned Opc)
int getMTBUFElements(unsigned Opc)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
static int encodeCustomOperandVal(const CustomOperandVal &Op, int64_t InputVal)
unsigned getTemporalHintType(const MCInstrDesc TID)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
std::optional< unsigned > getInlineEncodingV216(bool IsFloat, uint32_t Literal)
FPType getFPDstSelType(unsigned Opc)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
const MCRegisterClass * getVGPRPhysRegClass(MCRegister Reg, const MCRegisterInfo &MRI)
unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded)
bool getHasMatrixScale(unsigned Opc)
bool hasPackedD16(const MCSubtargetInfo &STI)
unsigned getStorecntBitMask(const IsaVersion &Version)
unsigned getLdsDwGranularity(const MCSubtargetInfo &ST)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
bool isGFX10_3_GFX11(const MCSubtargetInfo &STI)
bool isGFX13(const MCSubtargetInfo &STI)
unsigned getAsynccntBitMask(const IsaVersion &Version)
bool hasValueInRangeLikeMetadata(const MDNode &MD, int64_t Val)
Checks if Val is inside MD, a !range-like metadata.
TargetID createAMDGPUTargetID(const MCSubtargetInfo &STI, StringRef FeatureString)
Construct TargetID from MCSubtargetInfo.
uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal)
unsigned getVOPDOpcode(unsigned Opc, bool VOPD3)
bool isGroupSegment(const GlobalValue *GV)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
bool getMTBUFHasSoffset(unsigned Opc)
bool hasXNACK(const MCSubtargetInfo &STI)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
static unsigned getCombinedCountBitMask(const IsaVersion &Version, bool IsStore)
CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)
bool isVOPC64DPP(unsigned Opc)
int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements)
bool getMAIIsGFX940XDL(unsigned Opc)
bool isSI(const MCSubtargetInfo &STI)
unsigned getDefaultAMDHSACodeObjectVersion()
bool isReadOnlySegment(const GlobalValue *GV)
Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded)
bool isArgPassedInSGPR(const Argument *A)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
int getMUBUFBaseOpcode(unsigned Opc)
unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded)
unsigned getAMDHSACodeObjectVersion(const Module &M)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool getVOP3IsSingle(unsigned Opc)
bool isGFX9(const MCSubtargetInfo &STI)
bool isDPALU_DPP32BitOpc(unsigned Opc)
bool getVOP1IsSingle(unsigned Opc)
static bool isDwordAligned(uint64_t ByteOffset)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool getHasColorExport(const Function &F)
int getMTBUFBaseOpcode(unsigned Opc)
bool isGFX90A(const MCSubtargetInfo &STI)
unsigned getSamplecntBitMask(const IsaVersion &Version)
unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion)
std::tuple< char, unsigned, unsigned > parseAsmPhysRegName(StringRef RegName)
Returns a valid charcode or 0 in the first entry if this is a valid physical register name.
bool getHasDepthExport(const Function &F)
bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI)
bool getMUBUFHasVAddr(unsigned Opc)
bool isTrue16Inst(unsigned Opc)
unsigned getVGPREncodingMSBs(MCRegister Reg, const MCRegisterInfo &MRI)
std::pair< unsigned, unsigned > getVOPDComponents(unsigned VOPDOpcode)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned getInitialPSInputAddr(const Function &F)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool isAsyncStore(unsigned Opc)
unsigned getDynamicVGPRBlockSize(const Function &F)
unsigned getKmcntBitMask(const IsaVersion &Version)
MCRegister getVGPRWithMSBs(MCRegister Reg, unsigned MSBs, const MCRegisterInfo &MRI)
If Reg is a low VGPR return a corresponding high VGPR with MSBs set.
unsigned getVmcntBitMask(const IsaVersion &Version)
bool isNotGFX10Plus(const MCSubtargetInfo &STI)
bool hasMAIInsts(const MCSubtargetInfo &STI)
unsigned getBitOp2(unsigned Opc)
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
unsigned getXcntBitMask(const IsaVersion &Version)
bool isGenericAtomic(unsigned Opc)
const MFMA_F8F6F4_Info * getWMMA_F8F6F4_WithFormatArgs(unsigned FmtA, unsigned FmtB, unsigned F8F8Opcode)
bool isGFX8Plus(const MCSubtargetInfo &STI)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
unsigned getLgkmcntBitMask(const IsaVersion &Version)
bool getMUBUFTfe(unsigned Opc)
unsigned getBvhcntBitMask(const IsaVersion &Version)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
LLVM_ABI GPUKind parseArchAMDGCN(StringRef CPU)
bool hasGFX10_3Insts(const MCSubtargetInfo &STI)
unsigned decodeDscnt(const IsaVersion &Version, unsigned Waitcnt)
std::pair< const AMDGPU::OpName *, const AMDGPU::OpName * > getVGPRLoweringOperandTables(const MCInstrDesc &Desc)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements)
bool isGFX13Plus(const MCSubtargetInfo &STI)
unsigned getExpcntBitMask(const IsaVersion &Version)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
int32_t getMCOpcode(uint32_t Opcode, unsigned Gen)
bool getMUBUFHasSoffset(unsigned Opc)
bool isNotGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
std::optional< unsigned > getInlineEncodingV2F16(uint32_t Literal)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
std::tuple< char, unsigned, unsigned > parseAsmConstraintPhysReg(StringRef Constraint)
Returns a valid charcode or 0 in the first entry if this is a valid physical register constraint.
unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion)
static unsigned getDefaultCustomOperandEncoding(const CustomOperandVal *Opr, int Size, const MCSubtargetInfo &STI)
bool isPackedFP32or64BitInst(unsigned Opc)
static unsigned encodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Loadcnt)
bool isGFX10Plus(const MCSubtargetInfo &STI)
static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size, unsigned Code, int &Idx, StringRef &Name, unsigned &Val, bool &IsDefault, const MCSubtargetInfo &STI)
static bool isValidRegPrefix(char C)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
AMDGPU::TargetID TargetID
bool isGlobalSegment(const GlobalValue *GV)
SmallVector< unsigned > getMaxNumWorkGroups(const Function &F)
int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit)
bool isValidWMMAScaleFmtCombination(unsigned AFmt, unsigned AScale, unsigned BFmt, unsigned BScale)
@ OPERAND_REG_IMM_V2FP64
Definition SIDefines.h:433
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition SIDefines.h:451
@ OPERAND_REG_INLINE_C_LAST
Definition SIDefines.h:474
@ OPERAND_REG_IMM_V2FP16
Definition SIDefines.h:426
@ OPERAND_REG_INLINE_C_FP64
Definition SIDefines.h:442
@ OPERAND_REG_INLINE_C_BF16
Definition SIDefines.h:439
@ OPERAND_REG_INLINE_C_V2BF16
Definition SIDefines.h:444
@ OPERAND_REG_IMM_V2INT16
Definition SIDefines.h:428
@ OPERAND_REG_IMM_BF16
Definition SIDefines.h:423
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
Definition SIDefines.h:418
@ OPERAND_REG_IMM_V2BF16
Definition SIDefines.h:425
@ OPERAND_REG_INLINE_AC_FIRST
Definition SIDefines.h:476
@ OPERAND_REG_IMM_FP16
Definition SIDefines.h:424
@ OPERAND_REG_IMM_V2FP16_SPLAT
Definition SIDefines.h:427
@ OPERAND_REG_IMM_NOINLINE_V2FP16
Definition SIDefines.h:430
@ OPERAND_REG_IMM_FP64
Definition SIDefines.h:422
@ OPERAND_REG_INLINE_C_V2FP16
Definition SIDefines.h:445
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
Definition SIDefines.h:456
@ OPERAND_REG_INLINE_AC_FP32
Definition SIDefines.h:457
@ OPERAND_REG_IMM_V2INT32
Definition SIDefines.h:431
@ OPERAND_REG_IMM_FP32
Definition SIDefines.h:421
@ OPERAND_REG_INLINE_C_FIRST
Definition SIDefines.h:473
@ OPERAND_REG_INLINE_C_FP32
Definition SIDefines.h:441
@ OPERAND_REG_INLINE_AC_LAST
Definition SIDefines.h:477
@ OPERAND_REG_INLINE_C_INT32
Definition SIDefines.h:437
@ OPERAND_REG_INLINE_C_V2INT16
Definition SIDefines.h:443
@ OPERAND_REG_IMM_V2FP32
Definition SIDefines.h:432
@ OPERAND_REG_INLINE_AC_FP64
Definition SIDefines.h:458
@ OPERAND_REG_INLINE_C_FP16
Definition SIDefines.h:440
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
Definition SIDefines.h:448
std::optional< unsigned > getPKFMACF16InlineEncoding(uint32_t Literal, bool IsGFX11Plus)
bool isNotGFX9Plus(const MCSubtargetInfo &STI)
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc)
std::optional< unsigned > getInlineEncodingV2I16(uint32_t Literal)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
unsigned encodeStorecntDscnt(const IsaVersion &Version, const Waitcnt &Decoded)
bool isGFX1250(const MCSubtargetInfo &STI)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
bool isTensorStore(unsigned Opc)
bool getMUBUFIsBufferInv(unsigned Opc)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
MCRegister mc2PseudoReg(MCRegister Reg)
Convert hardware register Reg to a pseudo register.
std::optional< unsigned > getInlineEncodingV2BF16(uint32_t Literal)
static int encodeCustomOperand(const CustomOperandVal *Opr, int Size, const StringRef Name, int64_t InputVal, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool supportsWGP(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
bool getVOP2IsSingle(unsigned Opc)
bool getMAIIsDGEMM(unsigned Opc)
Returns true if MAI operation is a double precision GEMM.
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const int OPR_ID_UNKNOWN
unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion)
SmallVector< unsigned > getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size, unsigned DefaultVal)
bool isPacked64BitInst(unsigned Opc)
unsigned decodeStorecnt(const IsaVersion &Version, unsigned Waitcnt)
bool isGFX1250Plus(const MCSubtargetInfo &STI)
int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels)
bool isNotGFX12Plus(const MCSubtargetInfo &STI)
bool getMTBUFHasVAddr(unsigned Opc)
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion)
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
unsigned getLoadcntBitMask(const IsaVersion &Version)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily, bool VOPD3)
static unsigned encodeDscnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Dscnt)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
const MFMA_F8F6F4_Info * getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ, unsigned BLGP, unsigned F8F8Opcode)
unsigned decodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion)
bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI)
bool isGFX9_GFX10(const MCSubtargetInfo &STI)
int getMUBUFElements(unsigned Opc)
const GcnBufferFormatInfo * getGcnBufferFormatInfo(uint8_t BitsPerComp, uint8_t NumComponents, uint8_t NumFormat, const MCSubtargetInfo &STI)
unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc)
bool isPermlane16(unsigned Opc)
bool getMUBUFHasSrsrc(unsigned Opc)
unsigned getDscntBitMask(const IsaVersion &Version)
bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_Gfx
Used for AMD graphics targets.
@ AMDGPU_CS_ChainPreserve
Used on AMDGPUs to give the middle-end more control over argument placement.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_CS_Chain
Used on AMDGPUs to give the middle-end more control over argument placement.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ ELFABIVERSION_AMDGPU_HSA_V4
Definition ELF.h:384
@ ELFABIVERSION_AMDGPU_HSA_V5
Definition ELF.h:385
@ ELFABIVERSION_AMDGPU_HSA_V6
Definition ELF.h:386
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract_or_null(Y &&MD)
Extract a Value from Metadata, allowing null.
Definition Metadata.h:683
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:668
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
@ Offset
Definition DWP.cpp:573
constexpr T rotr(T V, int R)
Definition bit.h:399
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition Error.h:198
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:546
std::string utostr(uint64_t X, bool isNeg=false)
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
Definition STLExtras.h:2173
Op::Description Desc
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:150
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:155
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
To bit_cast(const From &from) noexcept
Definition bit.h:90
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
constexpr int countr_zero_constexpr(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:190
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
@ AlwaysUniform
The result value is always uniform.
Definition Uniformity.h:23
@ Default
The result value is uniform if and only if all operands are uniform.
Definition Uniformity.h:20
#define N
AMD Kernel Code Object (amd_kernel_code_t).
static std::tuple< typename Fields::ValueType... > decode(uint64_t Encoded)
Instruction set architecture version.