LLVM 23.0.0git
AMDGPUBaseInfo.cpp
Go to the documentation of this file.
1//===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDGPUBaseInfo.h"
10#include "AMDGPU.h"
11#include "AMDGPUAsmUtils.h"
12#include "AMDKernelCodeT.h"
17#include "llvm/IR/Attributes.h"
18#include "llvm/IR/Constants.h"
19#include "llvm/IR/Function.h"
20#include "llvm/IR/GlobalValue.h"
21#include "llvm/IR/IntrinsicsAMDGPU.h"
22#include "llvm/IR/IntrinsicsR600.h"
23#include "llvm/IR/LLVMContext.h"
24#include "llvm/IR/Metadata.h"
25#include "llvm/MC/MCInstrInfo.h"
30#include <optional>
31
32#define GET_INSTRINFO_NAMED_OPS
33#define GET_INSTRMAP_INFO
34#include "AMDGPUGenInstrInfo.inc"
35
37 "amdhsa-code-object-version", llvm::cl::Hidden,
39 llvm::cl::desc("Set default AMDHSA Code Object Version (module flag "
40 "or asm directive still take priority if present)"));
41
42namespace {
43
44/// \returns Bit mask for given bit \p Shift and bit \p Width.
45unsigned getBitMask(unsigned Shift, unsigned Width) {
46 return ((1 << Width) - 1) << Shift;
47}
48
49/// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
50///
51/// \returns Packed \p Dst.
52unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
53 unsigned Mask = getBitMask(Shift, Width);
54 return ((Src << Shift) & Mask) | (Dst & ~Mask);
55}
56
57/// Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
58///
59/// \returns Unpacked bits.
60unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
61 return (Src & getBitMask(Shift, Width)) >> Shift;
62}
63
64/// \returns Vmcnt bit shift (lower bits).
65unsigned getVmcntBitShiftLo(unsigned VersionMajor) {
66 return VersionMajor >= 11 ? 10 : 0;
67}
68
69/// \returns Vmcnt bit width (lower bits).
70unsigned getVmcntBitWidthLo(unsigned VersionMajor) {
71 return VersionMajor >= 11 ? 6 : 4;
72}
73
74/// \returns Expcnt bit shift.
75unsigned getExpcntBitShift(unsigned VersionMajor) {
76 return VersionMajor >= 11 ? 0 : 4;
77}
78
79/// \returns Expcnt bit width.
80unsigned getExpcntBitWidth(unsigned VersionMajor) { return 3; }
81
82/// \returns Lgkmcnt bit shift.
83unsigned getLgkmcntBitShift(unsigned VersionMajor) {
84 return VersionMajor >= 11 ? 4 : 8;
85}
86
87/// \returns Lgkmcnt bit width.
88unsigned getLgkmcntBitWidth(unsigned VersionMajor) {
89 return VersionMajor >= 10 ? 6 : 4;
90}
91
92/// \returns Vmcnt bit shift (higher bits).
93unsigned getVmcntBitShiftHi(unsigned VersionMajor) { return 14; }
94
95/// \returns Vmcnt bit width (higher bits).
96unsigned getVmcntBitWidthHi(unsigned VersionMajor) {
97 return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0;
98}
99
100/// \returns Loadcnt bit width
101unsigned getLoadcntBitWidth(unsigned VersionMajor) {
102 return VersionMajor >= 12 ? 6 : 0;
103}
104
105/// \returns Samplecnt bit width.
106unsigned getSamplecntBitWidth(unsigned VersionMajor) {
107 return VersionMajor >= 12 ? 6 : 0;
108}
109
110/// \returns Bvhcnt bit width.
111unsigned getBvhcntBitWidth(unsigned VersionMajor) {
112 return VersionMajor >= 12 ? 3 : 0;
113}
114
115/// \returns Dscnt bit width.
116unsigned getDscntBitWidth(unsigned VersionMajor) {
117 return VersionMajor >= 12 ? 6 : 0;
118}
119
120/// \returns Dscnt bit shift in combined S_WAIT instructions.
121unsigned getDscntBitShift(unsigned VersionMajor) { return 0; }
122
123/// \returns Storecnt or Vscnt bit width, depending on VersionMajor.
124unsigned getStorecntBitWidth(unsigned VersionMajor) {
125 return VersionMajor >= 10 ? 6 : 0;
126}
127
128/// \returns Kmcnt bit width.
129unsigned getKmcntBitWidth(unsigned VersionMajor) {
130 return VersionMajor >= 12 ? 5 : 0;
131}
132
133/// \returns Xcnt bit width.
134unsigned getXcntBitWidth(unsigned VersionMajor, unsigned VersionMinor) {
135 return VersionMajor == 12 && VersionMinor == 5 ? 6 : 0;
136}
137
138/// \returns Asynccnt bit width.
139unsigned getAsynccntBitWidth(unsigned VersionMajor, unsigned VersionMinor) {
140 return VersionMajor == 12 && VersionMinor == 5 ? 6 : 0;
141}
142
143/// \returns shift for Loadcnt/Storecnt in combined S_WAIT instructions.
144unsigned getLoadcntStorecntBitShift(unsigned VersionMajor) {
145 return VersionMajor >= 12 ? 8 : 0;
146}
147
148/// \returns VaSdst bit width
149inline unsigned getVaSdstBitWidth() { return 3; }
150
151/// \returns VaSdst bit shift
152inline unsigned getVaSdstBitShift() { return 9; }
153
154/// \returns VmVsrc bit width
155inline unsigned getVmVsrcBitWidth() { return 3; }
156
157/// \returns VmVsrc bit shift
158inline unsigned getVmVsrcBitShift() { return 2; }
159
160/// \returns VaVdst bit width
161inline unsigned getVaVdstBitWidth() { return 4; }
162
163/// \returns VaVdst bit shift
164inline unsigned getVaVdstBitShift() { return 12; }
165
166/// \returns VaVcc bit width
167inline unsigned getVaVccBitWidth() { return 1; }
168
169/// \returns VaVcc bit shift
170inline unsigned getVaVccBitShift() { return 1; }
171
172/// \returns SaSdst bit width
173inline unsigned getSaSdstBitWidth() { return 1; }
174
175/// \returns SaSdst bit shift
176inline unsigned getSaSdstBitShift() { return 0; }
177
178/// \returns VaSsrc width
179inline unsigned getVaSsrcBitWidth() { return 1; }
180
181/// \returns VaSsrc bit shift
182inline unsigned getVaSsrcBitShift() { return 8; }
183
184/// \returns HoldCnt bit shift
185inline unsigned getHoldCntWidth(unsigned VersionMajor, unsigned VersionMinor) {
186 static constexpr const unsigned MinMajor = 10;
187 static constexpr const unsigned MinMinor = 3;
188 return std::tie(VersionMajor, VersionMinor) >= std::tie(MinMajor, MinMinor)
189 ? 1
190 : 0;
191}
192
193/// \returns HoldCnt bit shift
194inline unsigned getHoldCntBitShift() { return 7; }
195
196} // end anonymous namespace
197
198namespace llvm {
199
200namespace AMDGPU {
201
202/// \returns true if the target supports signed immediate offset for SMRD
203/// instructions.
205 return isGFX9Plus(ST);
206}
207
208/// \returns True if \p STI is AMDHSA.
209bool isHsaAbi(const MCSubtargetInfo &STI) {
210 return STI.getTargetTriple().getOS() == Triple::AMDHSA;
211}
212
215 M.getModuleFlag("amdhsa_code_object_version"))) {
216 return (unsigned)Ver->getZExtValue() / 100;
217 }
218
220}
221
225
226unsigned getAMDHSACodeObjectVersion(unsigned ABIVersion) {
227 switch (ABIVersion) {
229 return 4;
231 return 5;
233 return 6;
234 default:
236 }
237}
238
239uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion) {
240 if (T.getOS() != Triple::AMDHSA)
241 return 0;
242
243 switch (CodeObjectVersion) {
244 case 4:
246 case 5:
248 case 6:
250 default:
251 report_fatal_error("Unsupported AMDHSA Code Object Version " +
252 Twine(CodeObjectVersion));
253 }
254}
255
256unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) {
257 switch (CodeObjectVersion) {
258 case AMDHSA_COV4:
259 return 48;
260 case AMDHSA_COV5:
261 case AMDHSA_COV6:
262 default:
264 }
265}
266
267// FIXME: All such magic numbers about the ABI should be in a
268// central TD file.
269unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) {
270 switch (CodeObjectVersion) {
271 case AMDHSA_COV4:
272 return 24;
273 case AMDHSA_COV5:
274 case AMDHSA_COV6:
275 default:
277 }
278}
279
280unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) {
281 switch (CodeObjectVersion) {
282 case AMDHSA_COV4:
283 return 32;
284 case AMDHSA_COV5:
285 case AMDHSA_COV6:
286 default:
288 }
289}
290
291unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion) {
292 switch (CodeObjectVersion) {
293 case AMDHSA_COV4:
294 return 40;
295 case AMDHSA_COV5:
296 case AMDHSA_COV6:
297 default:
299 }
300}
301
302#define GET_MIMGBaseOpcodesTable_IMPL
303#define GET_MIMGDimInfoTable_IMPL
304#define GET_MIMGInfoTable_IMPL
305#define GET_MIMGLZMappingTable_IMPL
306#define GET_MIMGMIPMappingTable_IMPL
307#define GET_MIMGBiasMappingTable_IMPL
308#define GET_MIMGOffsetMappingTable_IMPL
309#define GET_MIMGG16MappingTable_IMPL
310#define GET_MAIInstInfoTable_IMPL
311#define GET_WMMAInstInfoTable_IMPL
312#include "AMDGPUGenSearchableTables.inc"
313
314int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
315 unsigned VDataDwords, unsigned VAddrDwords) {
316 const MIMGInfo *Info =
317 getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding, VDataDwords, VAddrDwords);
318 return Info ? Info->Opcode : -1;
319}
320
322 const MIMGInfo *Info = getMIMGInfo(Opc);
323 return Info ? getMIMGBaseOpcodeInfo(Info->BaseOpcode) : nullptr;
324}
325
326int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
327 const MIMGInfo *OrigInfo = getMIMGInfo(Opc);
328 const MIMGInfo *NewInfo =
329 getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding,
330 NewChannels, OrigInfo->VAddrDwords);
331 return NewInfo ? NewInfo->Opcode : -1;
332}
333
334unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
335 const MIMGDimInfo *Dim, bool IsA16,
336 bool IsG16Supported) {
337 unsigned AddrWords = BaseOpcode->NumExtraArgs;
338 unsigned AddrComponents = (BaseOpcode->Coordinates ? Dim->NumCoords : 0) +
339 (BaseOpcode->LodOrClampOrMip ? 1 : 0);
340 if (IsA16)
341 AddrWords += divideCeil(AddrComponents, 2);
342 else
343 AddrWords += AddrComponents;
344
345 // Note: For subtargets that support A16 but not G16, enabling A16 also
346 // enables 16 bit gradients.
347 // For subtargets that support A16 (operand) and G16 (done with a different
348 // instruction encoding), they are independent.
349
350 if (BaseOpcode->Gradients) {
351 if ((IsA16 && !IsG16Supported) || BaseOpcode->G16)
352 // There are two gradients per coordinate, we pack them separately.
353 // For the 3d case,
354 // we get (dy/du, dx/du) (-, dz/du) (dy/dv, dx/dv) (-, dz/dv)
355 AddrWords += alignTo<2>(Dim->NumGradients / 2);
356 else
357 AddrWords += Dim->NumGradients;
358 }
359 return AddrWords;
360}
361
372
381
386
391
395
399
403
410
418
423
424#define GET_FP4FP8DstByteSelTable_DECL
425#define GET_FP4FP8DstByteSelTable_IMPL
426
431
437
438#define GET_DPMACCInstructionTable_DECL
439#define GET_DPMACCInstructionTable_IMPL
440#define GET_MTBUFInfoTable_DECL
441#define GET_MTBUFInfoTable_IMPL
442#define GET_MUBUFInfoTable_DECL
443#define GET_MUBUFInfoTable_IMPL
444#define GET_SMInfoTable_DECL
445#define GET_SMInfoTable_IMPL
446#define GET_VOP1InfoTable_DECL
447#define GET_VOP1InfoTable_IMPL
448#define GET_VOP2InfoTable_DECL
449#define GET_VOP2InfoTable_IMPL
450#define GET_VOP3InfoTable_DECL
451#define GET_VOP3InfoTable_IMPL
452#define GET_VOPC64DPPTable_DECL
453#define GET_VOPC64DPPTable_IMPL
454#define GET_VOPC64DPP8Table_DECL
455#define GET_VOPC64DPP8Table_IMPL
456#define GET_VOPCAsmOnlyInfoTable_DECL
457#define GET_VOPCAsmOnlyInfoTable_IMPL
458#define GET_VOP3CAsmOnlyInfoTable_DECL
459#define GET_VOP3CAsmOnlyInfoTable_IMPL
460#define GET_VOPDComponentTable_DECL
461#define GET_VOPDComponentTable_IMPL
462#define GET_VOPDPairs_DECL
463#define GET_VOPDPairs_IMPL
464#define GET_VOPTrue16Table_DECL
465#define GET_VOPTrue16Table_IMPL
466#define GET_True16D16Table_IMPL
467#define GET_WMMAOpcode2AddrMappingTable_DECL
468#define GET_WMMAOpcode2AddrMappingTable_IMPL
469#define GET_WMMAOpcode3AddrMappingTable_DECL
470#define GET_WMMAOpcode3AddrMappingTable_IMPL
471#define GET_getMFMA_F8F6F4_WithSize_DECL
472#define GET_getMFMA_F8F6F4_WithSize_IMPL
473#define GET_isMFMA_F8F6F4Table_IMPL
474#define GET_isCvtScaleF32_F32F16ToF8F4Table_IMPL
475
476#include "AMDGPUGenSearchableTables.inc"
477
478int getMTBUFBaseOpcode(unsigned Opc) {
479 const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opc);
480 return Info ? Info->BaseOpcode : -1;
481}
482
483int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) {
484 const MTBUFInfo *Info =
485 getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
486 return Info ? Info->Opcode : -1;
487}
488
489int getMTBUFElements(unsigned Opc) {
490 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
491 return Info ? Info->elements : 0;
492}
493
494bool getMTBUFHasVAddr(unsigned Opc) {
495 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
496 return Info && Info->has_vaddr;
497}
498
499bool getMTBUFHasSrsrc(unsigned Opc) {
500 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
501 return Info && Info->has_srsrc;
502}
503
504bool getMTBUFHasSoffset(unsigned Opc) {
505 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
506 return Info && Info->has_soffset;
507}
508
509int getMUBUFBaseOpcode(unsigned Opc) {
510 const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc);
511 return Info ? Info->BaseOpcode : -1;
512}
513
514int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements) {
515 const MUBUFInfo *Info =
516 getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
517 return Info ? Info->Opcode : -1;
518}
519
520int getMUBUFElements(unsigned Opc) {
521 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
522 return Info ? Info->elements : 0;
523}
524
525bool getMUBUFHasVAddr(unsigned Opc) {
526 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
527 return Info && Info->has_vaddr;
528}
529
530bool getMUBUFHasSrsrc(unsigned Opc) {
531 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
532 return Info && Info->has_srsrc;
533}
534
535bool getMUBUFHasSoffset(unsigned Opc) {
536 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
537 return Info && Info->has_soffset;
538}
539
540bool getMUBUFIsBufferInv(unsigned Opc) {
541 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
542 return Info && Info->IsBufferInv;
543}
544
545bool getMUBUFTfe(unsigned Opc) {
546 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
547 return Info && Info->tfe;
548}
549
550bool getSMEMIsBuffer(unsigned Opc) {
551 const SMInfo *Info = getSMEMOpcodeHelper(Opc);
552 return Info && Info->IsBuffer;
553}
554
555bool getVOP1IsSingle(unsigned Opc) {
556 const VOPInfo *Info = getVOP1OpcodeHelper(Opc);
557 return !Info || Info->IsSingle;
558}
559
560bool getVOP2IsSingle(unsigned Opc) {
561 const VOPInfo *Info = getVOP2OpcodeHelper(Opc);
562 return !Info || Info->IsSingle;
563}
564
565bool getVOP3IsSingle(unsigned Opc) {
566 const VOPInfo *Info = getVOP3OpcodeHelper(Opc);
567 return !Info || Info->IsSingle;
568}
569
570bool isVOPC64DPP(unsigned Opc) {
571 return isVOPC64DPPOpcodeHelper(Opc) || isVOPC64DPP8OpcodeHelper(Opc);
572}
573
574bool isVOPCAsmOnly(unsigned Opc) { return isVOPCAsmOnlyOpcodeHelper(Opc); }
575
576bool getMAIIsDGEMM(unsigned Opc) {
577 const MAIInstInfo *Info = getMAIInstInfoHelper(Opc);
578 return Info && Info->is_dgemm;
579}
580
581bool getMAIIsGFX940XDL(unsigned Opc) {
582 const MAIInstInfo *Info = getMAIInstInfoHelper(Opc);
583 return Info && Info->is_gfx940_xdl;
584}
585
586bool getWMMAIsXDL(unsigned Opc) {
587 const WMMAInstInfo *Info = getWMMAInstInfoHelper(Opc);
588 return Info ? Info->is_wmma_xdl : false;
589}
590
592 switch (EncodingVal) {
595 return 6;
597 return 4;
600 default:
601 return 8;
602 }
603
604 llvm_unreachable("covered switch over mfma scale formats");
605}
606
608 unsigned BLGP,
609 unsigned F8F8Opcode) {
610 uint8_t SrcANumRegs = mfmaScaleF8F6F4FormatToNumRegs(CBSZ);
611 uint8_t SrcBNumRegs = mfmaScaleF8F6F4FormatToNumRegs(BLGP);
612 return getMFMA_F8F6F4_InstWithNumRegs(SrcANumRegs, SrcBNumRegs, F8F8Opcode);
613}
614
616 switch (Fmt) {
619 return 16;
622 return 12;
624 return 8;
625 }
626
627 llvm_unreachable("covered switch over wmma scale formats");
628}
629
631 unsigned FmtB,
632 unsigned F8F8Opcode) {
633 uint8_t SrcANumRegs = wmmaScaleF8F6F4FormatToNumRegs(FmtA);
634 uint8_t SrcBNumRegs = wmmaScaleF8F6F4FormatToNumRegs(FmtB);
635 return getMFMA_F8F6F4_InstWithNumRegs(SrcANumRegs, SrcBNumRegs, F8F8Opcode);
636}
637
639 if (ST.hasFeature(AMDGPU::FeatureGFX13Insts))
641 if (ST.hasFeature(AMDGPU::FeatureGFX1250Insts))
643 if (ST.hasFeature(AMDGPU::FeatureGFX12Insts))
645 if (ST.hasFeature(AMDGPU::FeatureGFX11_7Insts))
647 if (ST.hasFeature(AMDGPU::FeatureGFX11Insts))
649 llvm_unreachable("Subtarget generation does not support VOPD!");
650}
651
652CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3) {
653 bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(Opc) : 0;
654 Opc = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : Opc;
655 const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc);
656 if (Info) {
657 // Check that Opc can be used as VOPDY for this encoding. V_MOV_B32 as a
658 // VOPDX is just a placeholder here, it is supported on all encodings.
659 // TODO: This can be optimized by creating tables of supported VOPDY
660 // opcodes per encoding.
661 unsigned VOPDMov = AMDGPU::getVOPDOpcode(AMDGPU::V_MOV_B32_e32, VOPD3);
662 bool CanBeVOPDX;
663 if (VOPD3) {
664 CanBeVOPDX = getVOPDFull(AMDGPU::getVOPDOpcode(Opc, VOPD3), VOPDMov,
665 EncodingFamily, VOPD3) != -1;
666 } else {
667 // The list of VOPDX opcodes is currently the same in all encoding
668 // families, so we do not need a family-specific check.
669 CanBeVOPDX = Info->CanBeVOPDX;
670 }
671 bool CanBeVOPDY = getVOPDFull(VOPDMov, AMDGPU::getVOPDOpcode(Opc, VOPD3),
672 EncodingFamily, VOPD3) != -1;
673 return {CanBeVOPDX, CanBeVOPDY};
674 }
675
676 return {false, false};
677}
678
679unsigned getVOPDOpcode(unsigned Opc, bool VOPD3) {
680 bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(Opc) : 0;
681 Opc = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : Opc;
682 const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc);
683 return Info ? Info->VOPDOp : ~0u;
684}
685
686bool isVOPD(unsigned Opc) {
687 return AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0X);
688}
689
690bool isMAC(unsigned Opc) {
691 return Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
692 Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
693 Opc == AMDGPU::V_MAC_F32_e64_vi ||
694 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
695 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
696 Opc == AMDGPU::V_MAC_F16_e64_vi ||
697 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
698 Opc == AMDGPU::V_FMAC_F64_e64_gfx12 ||
699 Opc == AMDGPU::V_FMAC_F64_e64_gfx13 ||
700 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
701 Opc == AMDGPU::V_FMAC_F32_e64_gfx11 ||
702 Opc == AMDGPU::V_FMAC_F32_e64_gfx12 ||
703 Opc == AMDGPU::V_FMAC_F32_e64_gfx13 ||
704 Opc == AMDGPU::V_FMAC_F32_e64_vi ||
705 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
706 Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
707 Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||
708 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11 ||
709 Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx11 ||
710 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx12 ||
711 Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx12 ||
712 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx13 ||
713 Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx13 ||
714 Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi ||
715 Opc == AMDGPU::V_DOT2C_F32_BF16_e64_vi ||
716 Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi ||
717 Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi ||
718 Opc == AMDGPU::V_DOT8C_I32_I4_e64_vi;
719}
720
721bool isPermlane16(unsigned Opc) {
722 return Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
723 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10 ||
724 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx11 ||
725 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx11 ||
726 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx12 ||
727 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx12 ||
728 Opc == AMDGPU::V_PERMLANE16_VAR_B32_e64_gfx12 ||
729 Opc == AMDGPU::V_PERMLANEX16_VAR_B32_e64_gfx12;
730}
731
733 return Opc == AMDGPU::V_CVT_F32_BF8_e64_gfx12 ||
734 Opc == AMDGPU::V_CVT_F32_FP8_e64_gfx12 ||
735 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp_gfx12 ||
736 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp_gfx12 ||
737 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp8_gfx12 ||
738 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp8_gfx12 ||
739 Opc == AMDGPU::V_CVT_PK_F32_BF8_fake16_e64_gfx12 ||
740 Opc == AMDGPU::V_CVT_PK_F32_FP8_fake16_e64_gfx12 ||
741 Opc == AMDGPU::V_CVT_PK_F32_BF8_t16_e64_gfx12 ||
742 Opc == AMDGPU::V_CVT_PK_F32_FP8_t16_e64_gfx12;
743}
744
745bool isGenericAtomic(unsigned Opc) {
746 return Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP ||
747 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD ||
748 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB ||
749 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN ||
750 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN ||
751 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX ||
752 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX ||
753 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND ||
754 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR ||
755 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR ||
756 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC ||
757 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC ||
758 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD ||
759 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN ||
760 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX ||
761 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP ||
762 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB_CLAMP_U32 ||
763 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_COND_SUB_U32 ||
764 Opc == AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG;
765}
766
767bool isAsyncStore(unsigned Opc) {
768 return Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B8_gfx1250 ||
769 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B32_gfx1250 ||
770 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B64_gfx1250 ||
771 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B128_gfx1250 ||
772 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B8_SADDR_gfx1250 ||
773 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B32_SADDR_gfx1250 ||
774 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B64_SADDR_gfx1250 ||
775 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B128_SADDR_gfx1250;
776}
777
778bool isTensorStore(unsigned Opc) {
779 return Opc == TENSOR_STORE_FROM_LDS_d2_gfx1250 ||
780 Opc == TENSOR_STORE_FROM_LDS_d4_gfx1250;
781}
782
783unsigned getTemporalHintType(const MCInstrDesc TID) {
786 unsigned Opc = TID.getOpcode();
787 // Async and Tensor store should have the temporal hint type of TH_TYPE_STORE
788 if (TID.mayStore() &&
789 (isAsyncStore(Opc) || isTensorStore(Opc) || !TID.mayLoad()))
790 return CPol::TH_TYPE_STORE;
791
792 // This will default to returning TH_TYPE_LOAD when neither MayStore nor
793 // MayLoad flag is present which is the case with instructions like
794 // image_get_resinfo.
795 return CPol::TH_TYPE_LOAD;
796}
797
798bool isTrue16Inst(unsigned Opc) {
799 const VOPTrue16Info *Info = getTrue16OpcodeHelper(Opc);
800 return Info && Info->IsTrue16;
801}
802
804 const FP4FP8DstByteSelInfo *Info = getFP4FP8DstByteSelHelper(Opc);
805 if (!Info)
806 return FPType::None;
807 if (Info->HasFP8DstByteSel)
808 return FPType::FP8;
809 if (Info->HasFP4DstByteSel)
810 return FPType::FP4;
811
812 return FPType::None;
813}
814
815bool isDPMACCInstruction(unsigned Opc) {
816 const DPMACCInstructionInfo *Info = getDPMACCInstructionHelper(Opc);
817 return Info && Info->IsDPMACCInstruction;
818}
819
820unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) {
821 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom2AddrOpcode(Opc);
822 return Info ? Info->Opcode3Addr : ~0u;
823}
824
825unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc) {
826 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom3AddrOpcode(Opc);
827 return Info ? Info->Opcode2Addr : ~0u;
828}
829
830// Wrapper for Tablegen'd function. enum Subtarget is not defined in any
831// header files, so we need to wrap it in a function that takes unsigned
832// instead.
833int32_t getMCOpcode(uint32_t Opcode, unsigned Gen) {
834 return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
835}
836
837unsigned getBitOp2(unsigned Opc) {
838 switch (Opc) {
839 default:
840 return 0;
841 case AMDGPU::V_AND_B32_e32:
842 return 0x40;
843 case AMDGPU::V_OR_B32_e32:
844 return 0x54;
845 case AMDGPU::V_XOR_B32_e32:
846 return 0x14;
847 case AMDGPU::V_XNOR_B32_e32:
848 return 0x41;
849 }
850}
851
852int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily,
853 bool VOPD3) {
854 bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(OpY) : 0;
855 OpY = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : OpY;
856 const VOPDInfo *Info =
857 getVOPDInfoFromComponentOpcodes(OpX, OpY, EncodingFamily, VOPD3);
858 return Info ? Info->Opcode : -1;
859}
860
861std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode) {
862 const VOPDInfo *Info = getVOPDOpcodeHelper(VOPDOpcode);
863 assert(Info);
864 const auto *OpX = getVOPDBaseFromComponent(Info->OpX);
865 const auto *OpY = getVOPDBaseFromComponent(Info->OpY);
866 assert(OpX && OpY);
867 return {OpX->BaseVOP, OpY->BaseVOP};
868}
869
870namespace VOPD {
871
872ComponentProps::ComponentProps(const MCInstrDesc &OpDesc, bool VOP3Layout) {
874
877 auto TiedIdx = OpDesc.getOperandConstraint(Component::SRC2, MCOI::TIED_TO);
878 assert(TiedIdx == -1 || TiedIdx == Component::DST);
879 HasSrc2Acc = TiedIdx != -1;
880 Opcode = OpDesc.getOpcode();
881
882 IsVOP3 = VOP3Layout || (OpDesc.TSFlags & SIInstrFlags::VOP3);
883 SrcOperandsNum = AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src2) ? 3
884 : AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm) ? 3
885 : AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src1) ? 2
886 : 1;
887 assert(SrcOperandsNum <= Component::MAX_SRC_NUM);
888
889 if (Opcode == AMDGPU::V_CNDMASK_B32_e32 ||
890 Opcode == AMDGPU::V_CNDMASK_B32_e64) {
891 // CNDMASK is an awkward exception, it has FP modifiers, but not FP
892 // operands.
893 NumVOPD3Mods = 2;
894 if (IsVOP3)
895 SrcOperandsNum = 3;
896 } else if (isSISrcFPOperand(OpDesc,
897 getNamedOperandIdx(Opcode, OpName::src0))) {
898 // All FP VOPD instructions have Neg modifiers for all operands except
899 // for tied src2.
900 NumVOPD3Mods = SrcOperandsNum;
901 if (HasSrc2Acc)
902 --NumVOPD3Mods;
903 }
904
905 if (OpDesc.TSFlags & SIInstrFlags::VOP3)
906 return;
907
908 auto OperandsNum = OpDesc.getNumOperands();
909 unsigned CompOprIdx;
910 for (CompOprIdx = Component::SRC1; CompOprIdx < OperandsNum; ++CompOprIdx) {
911 if (OpDesc.operands()[CompOprIdx].OperandType == AMDGPU::OPERAND_KIMM32) {
912 MandatoryLiteralIdx = CompOprIdx;
913 break;
914 }
915 }
916}
917
919 return getNamedOperandIdx(Opcode, OpName::bitop3);
920}
921
922unsigned ComponentInfo::getIndexInParsedOperands(unsigned CompOprIdx) const {
923 assert(CompOprIdx < Component::MAX_OPR_NUM);
924
925 if (CompOprIdx == Component::DST)
927
928 auto CompSrcIdx = CompOprIdx - Component::DST_NUM;
929 if (CompSrcIdx < getCompParsedSrcOperandsNum())
930 return getIndexOfSrcInParsedOperands(CompSrcIdx);
931
932 // The specified operand does not exist.
933 return 0;
934}
935
937 std::function<MCRegister(unsigned, unsigned)> GetRegIdx,
938 const MCRegisterInfo &MRI, bool SkipSrc, bool AllowSameVGPR,
939 bool VOPD3) const {
940
941 auto OpXRegs = getRegIndices(ComponentIndex::X, GetRegIdx,
942 CompInfo[ComponentIndex::X].isVOP3());
943 auto OpYRegs = getRegIndices(ComponentIndex::Y, GetRegIdx,
944 CompInfo[ComponentIndex::Y].isVOP3());
945
946 const auto banksOverlap = [&MRI](MCRegister X, MCRegister Y,
947 unsigned BanksMask) -> bool {
948 MCRegister BaseX = MRI.getSubReg(X, AMDGPU::sub0);
949 MCRegister BaseY = MRI.getSubReg(Y, AMDGPU::sub0);
950 if (!BaseX)
951 BaseX = X;
952 if (!BaseY)
953 BaseY = Y;
954 if ((BaseX.id() & BanksMask) == (BaseY.id() & BanksMask))
955 return true;
956 if (BaseX != X /* This is 64-bit register */ &&
957 ((BaseX.id() + 1) & BanksMask) == (BaseY.id() & BanksMask))
958 return true;
959 if (BaseY != Y &&
960 (BaseX.id() & BanksMask) == ((BaseY.id() + 1) & BanksMask))
961 return true;
962
963 // If both are 64-bit bank conflict will be detected yet while checking
964 // the first subreg.
965 return false;
966 };
967
968 unsigned CompOprIdx;
969 for (CompOprIdx = 0; CompOprIdx < Component::MAX_OPR_NUM; ++CompOprIdx) {
970 unsigned BanksMasks = VOPD3 ? VOPD3_VGPR_BANK_MASKS[CompOprIdx]
971 : VOPD_VGPR_BANK_MASKS[CompOprIdx];
972 if (!OpXRegs[CompOprIdx] || !OpYRegs[CompOprIdx])
973 continue;
974
975 if (getVGPREncodingMSBs(OpXRegs[CompOprIdx], MRI) !=
976 getVGPREncodingMSBs(OpYRegs[CompOprIdx], MRI))
977 return CompOprIdx;
978
979 if (SkipSrc && CompOprIdx >= Component::DST_NUM)
980 continue;
981
982 if (CompOprIdx < Component::DST_NUM) {
983 // Even if we do not check vdst parity, vdst operands still shall not
984 // overlap.
985 if (MRI.regsOverlap(OpXRegs[CompOprIdx], OpYRegs[CompOprIdx]))
986 return CompOprIdx;
987 if (VOPD3) // No need to check dst parity.
988 continue;
989 }
990
991 if (banksOverlap(OpXRegs[CompOprIdx], OpYRegs[CompOprIdx], BanksMasks) &&
992 (!AllowSameVGPR || CompOprIdx < Component::DST_NUM ||
993 OpXRegs[CompOprIdx] != OpYRegs[CompOprIdx]))
994 return CompOprIdx;
995 }
996
997 return {};
998}
999
1000// Return an array of VGPR registers [DST,SRC0,SRC1,SRC2] used
1001// by the specified component. If an operand is unused
1002// or is not a VGPR, the corresponding value is 0.
1003//
1004// GetRegIdx(Component, MCOperandIdx) must return a VGPR register index
1005// for the specified component and MC operand. The callback must return 0
1006// if the operand is not a register or not a VGPR.
1008InstInfo::getRegIndices(unsigned CompIdx,
1009 std::function<MCRegister(unsigned, unsigned)> GetRegIdx,
1010 bool VOPD3) const {
1011 assert(CompIdx < COMPONENTS_NUM);
1012
1013 const auto &Comp = CompInfo[CompIdx];
1015
1016 RegIndices[DST] = GetRegIdx(CompIdx, Comp.getIndexOfDstInMCOperands());
1017
1018 for (unsigned CompOprIdx : {SRC0, SRC1, SRC2}) {
1019 unsigned CompSrcIdx = CompOprIdx - DST_NUM;
1020 RegIndices[CompOprIdx] =
1021 Comp.hasRegSrcOperand(CompSrcIdx)
1022 ? GetRegIdx(CompIdx,
1023 Comp.getIndexOfSrcInMCOperands(CompSrcIdx, VOPD3))
1024 : MCRegister();
1025 }
1026 return RegIndices;
1027}
1028
1029} // namespace VOPD
1030
1032 return VOPD::InstInfo(OpX, OpY);
1033}
1034
1036 const MCInstrInfo *InstrInfo) {
1037 auto [OpX, OpY] = getVOPDComponents(VOPDOpcode);
1038 const auto &OpXDesc = InstrInfo->get(OpX);
1039 const auto &OpYDesc = InstrInfo->get(OpY);
1040 bool VOPD3 = InstrInfo->get(VOPDOpcode).TSFlags & SIInstrFlags::VOPD3;
1042 VOPD::ComponentInfo OpYInfo(OpYDesc, OpXInfo, VOPD3);
1043 return VOPD::InstInfo(OpXInfo, OpYInfo);
1044}
1045
1046namespace IsaInfo {
1047
1049 : STI(STI), XnackSetting(TargetIDSetting::Any),
1050 SramEccSetting(TargetIDSetting::Any) {
1051 if (!STI.getFeatureBits().test(FeatureSupportsXNACK))
1052 XnackSetting = TargetIDSetting::Unsupported;
1053 if (!STI.getFeatureBits().test(FeatureSupportsSRAMECC))
1054 SramEccSetting = TargetIDSetting::Unsupported;
1055}
1056
1058 // Check if xnack or sramecc is explicitly enabled or disabled. In the
1059 // absence of the target features we assume we must generate code that can run
1060 // in any environment.
1061 SubtargetFeatures Features(FS);
1062 std::optional<bool> XnackRequested;
1063 std::optional<bool> SramEccRequested;
1064
1065 for (const std::string &Feature : Features.getFeatures()) {
1066 if (Feature == "+xnack")
1067 XnackRequested = true;
1068 else if (Feature == "-xnack")
1069 XnackRequested = false;
1070 else if (Feature == "+sramecc")
1071 SramEccRequested = true;
1072 else if (Feature == "-sramecc")
1073 SramEccRequested = false;
1074 }
1075
1076 bool XnackSupported = isXnackSupported();
1077 bool SramEccSupported = isSramEccSupported();
1078
1079 if (XnackRequested) {
1080 if (XnackSupported) {
1081 XnackSetting =
1082 *XnackRequested ? TargetIDSetting::On : TargetIDSetting::Off;
1083 } else {
1084 // If a specific xnack setting was requested and this GPU does not support
1085 // xnack emit a warning. Setting will remain set to "Unsupported".
1086 if (*XnackRequested) {
1087 errs() << "warning: xnack 'On' was requested for a processor that does "
1088 "not support it!\n";
1089 } else {
1090 errs() << "warning: xnack 'Off' was requested for a processor that "
1091 "does not support it!\n";
1092 }
1093 }
1094 }
1095
1096 if (SramEccRequested) {
1097 if (SramEccSupported) {
1098 SramEccSetting =
1099 *SramEccRequested ? TargetIDSetting::On : TargetIDSetting::Off;
1100 } else {
1101 // If a specific sramecc setting was requested and this GPU does not
1102 // support sramecc emit a warning. Setting will remain set to
1103 // "Unsupported".
1104 if (*SramEccRequested) {
1105 errs() << "warning: sramecc 'On' was requested for a processor that "
1106 "does not support it!\n";
1107 } else {
1108 errs() << "warning: sramecc 'Off' was requested for a processor that "
1109 "does not support it!\n";
1110 }
1111 }
1112 }
1113}
1114
1115static TargetIDSetting
1117 if (FeatureString.ends_with("-"))
1118 return TargetIDSetting::Off;
1119 if (FeatureString.ends_with("+"))
1120 return TargetIDSetting::On;
1121
1122 llvm_unreachable("Malformed feature string");
1123}
1124
1126 SmallVector<StringRef, 3> TargetIDSplit;
1127 TargetID.split(TargetIDSplit, ':');
1128
1129 for (const auto &FeatureString : TargetIDSplit) {
1130 if (FeatureString.starts_with("xnack"))
1131 XnackSetting = getTargetIDSettingFromFeatureString(FeatureString);
1132 if (FeatureString.starts_with("sramecc"))
1133 SramEccSetting = getTargetIDSettingFromFeatureString(FeatureString);
1134 }
1135}
1136
1137void AMDGPUTargetID::print(raw_ostream &StreamRep) const {
1138 const Triple &TargetTriple = STI.getTargetTriple();
1139 auto Version = getIsaVersion(STI.getCPU());
1140
1141 StreamRep << TargetTriple.getArchName() << '-' << TargetTriple.getVendorName()
1142 << '-' << TargetTriple.getOSName() << '-'
1143 << TargetTriple.getEnvironmentName() << '-';
1144
1145 std::string Processor;
1146 // TODO: Following else statement is present here because we used various
1147 // alias names for GPUs up until GFX9 (e.g. 'fiji' is same as 'gfx803').
1148 // Remove once all aliases are removed from GCNProcessors.td.
1149 if (Version.Major >= 9)
1150 Processor = STI.getCPU().str();
1151 else
1152 Processor = (Twine("gfx") + Twine(Version.Major) + Twine(Version.Minor) +
1153 Twine(Version.Stepping))
1154 .str();
1155
1156 std::string Features;
1157 if (TargetTriple.getOS() == Triple::AMDHSA) {
1158 // sramecc.
1160 Features += ":sramecc-";
1162 Features += ":sramecc+";
1163 // xnack.
1165 Features += ":xnack-";
1167 Features += ":xnack+";
1168 }
1169
1170 StreamRep << Processor << Features;
1171}
1172
1173std::string AMDGPUTargetID::toString() const {
1174 std::string Str;
1175 raw_string_ostream OS(Str);
1176 OS << *this;
1177 return Str;
1178}
1179
1180unsigned getWavefrontSize(const MCSubtargetInfo *STI) {
1181 if (STI->getFeatureBits().test(FeatureWavefrontSize16))
1182 return 16;
1183 if (STI->getFeatureBits().test(FeatureWavefrontSize32))
1184 return 32;
1185
1186 return 64;
1187}
1188
1190 unsigned BytesPerCU = getAddressableLocalMemorySize(STI);
1191
1192 // "Per CU" really means "per whatever functional block the waves of a
1193 // workgroup must share". So the effective local memory size is doubled in
1194 // WGP mode on gfx10.
1195 if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode))
1196 BytesPerCU *= 2;
1197
1198 return BytesPerCU;
1199}
1200
1202 if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize32768))
1203 return 32768;
1204 if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize65536))
1205 return 65536;
1206 if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize163840))
1207 return 163840;
1208 if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize327680))
1209 return 327680;
1210 return 32768;
1211}
1212
1213unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
1214 // "Per CU" really means "per whatever functional block the waves of a
1215 // workgroup must share".
1216
1217 // GFX12.5 only supports CU mode, which contains four SIMDs.
1218 if (isGFX1250(*STI)) {
1219 assert(STI->getFeatureBits().test(FeatureCuMode));
1220 return 4;
1221 }
1222
1223 // For gfx10 in CU mode the functional block is the CU, which contains
1224 // two SIMDs.
1225 if (isGFX10Plus(*STI) && STI->getFeatureBits().test(FeatureCuMode))
1226 return 2;
1227
1228 // Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP
1229 // contains two CUs, so a total of four SIMDs.
1230 return 4;
1231}
1232
1234 unsigned FlatWorkGroupSize) {
1235 assert(FlatWorkGroupSize != 0);
1236 if (!STI->getTargetTriple().isAMDGCN())
1237 return 8;
1238 unsigned MaxWaves = getMaxWavesPerEU(STI) * getEUsPerCU(STI);
1239 unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
1240 if (N == 1) {
1241 // Single-wave workgroups don't consume barrier resources.
1242 return MaxWaves;
1243 }
1244
1245 unsigned MaxBarriers = 16;
1246 if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode))
1247 MaxBarriers = 32;
1248
1249 return std::min(MaxWaves / N, MaxBarriers);
1250}
1251
1252unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) { return 1; }
1253
1254unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) {
1255 // FIXME: Need to take scratch memory into account.
1256 if (isGFX90A(*STI))
1257 return 8;
1258 if (!isGFX10Plus(*STI))
1259 return 10;
1260 return hasGFX10_3Insts(*STI) ? 16 : 20;
1261}
1262
1264 unsigned FlatWorkGroupSize) {
1265 return divideCeil(getWavesPerWorkGroup(STI, FlatWorkGroupSize),
1266 getEUsPerCU(STI));
1267}
1268
1269unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) { return 1; }
1270
1272 unsigned FlatWorkGroupSize) {
1273 return divideCeil(FlatWorkGroupSize, getWavefrontSize(STI));
1274}
1275
1278 if (Version.Major >= 10)
1279 return getAddressableNumSGPRs(STI);
1280 if (Version.Major >= 8)
1281 return 16;
1282 return 8;
1283}
1284
1285unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) { return 8; }
1286
1287unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) {
1289 if (Version.Major >= 8)
1290 return 800;
1291 return 512;
1292}
1293
1295 if (STI->getFeatureBits().test(FeatureSGPRInitBug))
1297
1299 if (Version.Major >= 10)
1300 return 106;
1301 if (Version.Major >= 8)
1302 return 102;
1303 return 104;
1304}
1305
1306unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
1307 assert(WavesPerEU != 0);
1308
1310 if (Version.Major >= 10)
1311 return 0;
1312
1313 if (WavesPerEU >= getMaxWavesPerEU(STI))
1314 return 0;
1315
1316 unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
1317 if (STI->getFeatureBits().test(FeatureTrapHandler))
1318 MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
1319 MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1;
1320 return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI));
1321}
1322
1323unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
1324 bool Addressable) {
1325 assert(WavesPerEU != 0);
1326
1327 unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
1329 if (Version.Major >= 10)
1330 return Addressable ? AddressableNumSGPRs : 108;
1331 if (Version.Major >= 8 && !Addressable)
1332 AddressableNumSGPRs = 112;
1333 unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
1334 if (STI->getFeatureBits().test(FeatureTrapHandler))
1335 MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
1336 MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI));
1337 return std::min(MaxNumSGPRs, AddressableNumSGPRs);
1338}
1339
1340unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
1341 bool FlatScrUsed, bool XNACKUsed) {
1342 unsigned ExtraSGPRs = 0;
1343 if (VCCUsed)
1344 ExtraSGPRs = 2;
1345
1347 if (Version.Major >= 10)
1348 return ExtraSGPRs;
1349
1350 if (Version.Major < 8) {
1351 if (FlatScrUsed)
1352 ExtraSGPRs = 4;
1353 } else {
1354 if (XNACKUsed)
1355 ExtraSGPRs = 4;
1356
1357 if (FlatScrUsed ||
1358 STI->getFeatureBits().test(AMDGPU::FeatureArchitectedFlatScratch))
1359 ExtraSGPRs = 6;
1360 }
1361
1362 return ExtraSGPRs;
1363}
1364
1365unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
1366 bool FlatScrUsed) {
1367 return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
1368 STI->getFeatureBits().test(AMDGPU::FeatureXNACK));
1369}
1370
1371static unsigned getGranulatedNumRegisterBlocks(unsigned NumRegs,
1372 unsigned Granule) {
1373 return divideCeil(std::max(1u, NumRegs), Granule);
1374}
1375
1376unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
1377 // SGPRBlocks is actual number of SGPR blocks minus 1.
1379 1;
1380}
1381
1383 unsigned DynamicVGPRBlockSize,
1384 std::optional<bool> EnableWavefrontSize32) {
1385 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1386 return 8;
1387
1388 if (DynamicVGPRBlockSize != 0)
1389 return DynamicVGPRBlockSize;
1390
1391 bool IsWave32 = EnableWavefrontSize32
1392 ? *EnableWavefrontSize32
1393 : STI->getFeatureBits().test(FeatureWavefrontSize32);
1394
1395 if (STI->getFeatureBits().test(Feature1536VGPRs))
1396 return IsWave32 ? 24 : 12;
1397
1398 if (hasGFX10_3Insts(*STI))
1399 return IsWave32 ? 16 : 8;
1400
1401 return IsWave32 ? 8 : 4;
1402}
1403
1405 std::optional<bool> EnableWavefrontSize32) {
1406 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1407 return 8;
1408
1409 bool IsWave32 = EnableWavefrontSize32
1410 ? *EnableWavefrontSize32
1411 : STI->getFeatureBits().test(FeatureWavefrontSize32);
1412
1413 if (STI->getFeatureBits().test(Feature1024AddressableVGPRs))
1414 return IsWave32 ? 16 : 8;
1415
1416 return IsWave32 ? 8 : 4;
1417}
1418
1419unsigned getArchVGPRAllocGranule() { return 4; }
1420
1421unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
1422 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1423 return 512;
1424 if (!isGFX10Plus(*STI))
1425 return 256;
1426 bool IsWave32 = STI->getFeatureBits().test(FeatureWavefrontSize32);
1427 if (STI->getFeatureBits().test(Feature1536VGPRs))
1428 return IsWave32 ? 1536 : 768;
1429 return IsWave32 ? 1024 : 512;
1430}
1431
1433 const auto &Features = STI->getFeatureBits();
1434 if (Features.test(Feature1024AddressableVGPRs))
1435 return Features.test(FeatureWavefrontSize32) ? 1024 : 512;
1436 return 256;
1437}
1438
1440 unsigned DynamicVGPRBlockSize) {
1441 const auto &Features = STI->getFeatureBits();
1442 if (Features.test(FeatureGFX90AInsts))
1443 return 512;
1444
1445 if (DynamicVGPRBlockSize != 0)
1446 // On GFX12 we can allocate at most 8 blocks of VGPRs.
1447 return 8 * getVGPRAllocGranule(STI, DynamicVGPRBlockSize);
1448 return getAddressableNumArchVGPRs(STI);
1449}
1450
1452 unsigned NumVGPRs,
1453 unsigned DynamicVGPRBlockSize) {
1455 NumVGPRs, getVGPRAllocGranule(STI, DynamicVGPRBlockSize),
1457}
1458
1459unsigned getNumWavesPerEUWithNumVGPRs(unsigned NumVGPRs, unsigned Granule,
1460 unsigned MaxWaves,
1461 unsigned TotalNumVGPRs) {
1462 if (NumVGPRs < Granule)
1463 return MaxWaves;
1464 unsigned RoundedRegs = alignTo(NumVGPRs, Granule);
1465 return std::min(std::max(TotalNumVGPRs / RoundedRegs, 1u), MaxWaves);
1466}
1467
1468unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves,
1470 if (Gen >= AMDGPUSubtarget::GFX10)
1471 return MaxWaves;
1472
1474 if (SGPRs <= 80)
1475 return 10;
1476 if (SGPRs <= 88)
1477 return 9;
1478 if (SGPRs <= 100)
1479 return 8;
1480 return 7;
1481 }
1482 if (SGPRs <= 48)
1483 return 10;
1484 if (SGPRs <= 56)
1485 return 9;
1486 if (SGPRs <= 64)
1487 return 8;
1488 if (SGPRs <= 72)
1489 return 7;
1490 if (SGPRs <= 80)
1491 return 6;
1492 return 5;
1493}
1494
1495unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
1496 unsigned DynamicVGPRBlockSize) {
1497 assert(WavesPerEU != 0);
1498
1499 // In dynamic VGPR mode, (static) occupancy does not depend on VGPR usage,
1500 // so getMaxNumVGPRs does not depend on WavesPerEU, and thus we need to return
1501 // zero because there is no nonzero VGPR usage N where going below N
1502 // achieves higher (static) occupancy.
1503 bool DynamicVGPREnabled = (DynamicVGPRBlockSize != 0);
1504 if (DynamicVGPREnabled)
1505 return 0;
1506
1507 unsigned MaxWavesPerEU = getMaxWavesPerEU(STI);
1508 if (WavesPerEU >= MaxWavesPerEU)
1509 return 0;
1510
1511 unsigned TotNumVGPRs = getTotalNumVGPRs(STI);
1512 unsigned AddrsableNumVGPRs =
1513 getAddressableNumVGPRs(STI, DynamicVGPRBlockSize);
1514 unsigned Granule = getVGPRAllocGranule(STI, DynamicVGPRBlockSize);
1515 unsigned MaxNumVGPRs = alignDown(TotNumVGPRs / WavesPerEU, Granule);
1516
1517 if (MaxNumVGPRs == alignDown(TotNumVGPRs / MaxWavesPerEU, Granule))
1518 return 0;
1519
1520 unsigned MinWavesPerEU = getNumWavesPerEUWithNumVGPRs(STI, AddrsableNumVGPRs,
1521 DynamicVGPRBlockSize);
1522 if (WavesPerEU < MinWavesPerEU)
1523 return getMinNumVGPRs(STI, MinWavesPerEU, DynamicVGPRBlockSize);
1524
1525 unsigned MaxNumVGPRsNext = alignDown(TotNumVGPRs / (WavesPerEU + 1), Granule);
1526 unsigned MinNumVGPRs = 1 + std::min(MaxNumVGPRs - Granule, MaxNumVGPRsNext);
1527 return std::min(MinNumVGPRs, AddrsableNumVGPRs);
1528}
1529
1530unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
1531 unsigned DynamicVGPRBlockSize) {
1532 assert(WavesPerEU != 0);
1533
1534 // In dynamic VGPR mode, WavesPerEU does not imply a VGPR limit.
1535 bool DynamicVGPREnabled = (DynamicVGPRBlockSize != 0);
1536 unsigned MaxNumVGPRs =
1537 DynamicVGPREnabled
1538 ? getTotalNumVGPRs(STI)
1539 : alignDown(getTotalNumVGPRs(STI) / WavesPerEU,
1540 getVGPRAllocGranule(STI, DynamicVGPRBlockSize));
1541 unsigned AddressableNumVGPRs =
1542 getAddressableNumVGPRs(STI, DynamicVGPRBlockSize);
1543 return std::min(MaxNumVGPRs, AddressableNumVGPRs);
1544}
1545
1546unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs,
1547 std::optional<bool> EnableWavefrontSize32) {
1549 NumVGPRs, getVGPREncodingGranule(STI, EnableWavefrontSize32)) -
1550 1;
1551}
1552
1554 unsigned NumVGPRs,
1555 unsigned DynamicVGPRBlockSize,
1556 std::optional<bool> EnableWavefrontSize32) {
1558 NumVGPRs,
1559 getVGPRAllocGranule(STI, DynamicVGPRBlockSize, EnableWavefrontSize32));
1560}
1561} // end namespace IsaInfo
1562
1564 const MCSubtargetInfo *STI) {
1566 KernelCode.amd_kernel_code_version_major = 1;
1567 KernelCode.amd_kernel_code_version_minor = 2;
1568 KernelCode.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
1569 KernelCode.amd_machine_version_major = Version.Major;
1570 KernelCode.amd_machine_version_minor = Version.Minor;
1571 KernelCode.amd_machine_version_stepping = Version.Stepping;
1573 if (STI->getFeatureBits().test(FeatureWavefrontSize32)) {
1574 KernelCode.wavefront_size = 5;
1576 } else {
1577 KernelCode.wavefront_size = 6;
1578 }
1579
1580 // If the code object does not support indirect functions, then the value must
1581 // be 0xffffffff.
1582 KernelCode.call_convention = -1;
1583
1584 // These alignment values are specified in powers of two, so alignment =
1585 // 2^n. The minimum alignment is 2^4 = 16.
1586 KernelCode.kernarg_segment_alignment = 4;
1587 KernelCode.group_segment_alignment = 4;
1588 KernelCode.private_segment_alignment = 4;
1589
1590 if (Version.Major >= 10) {
1591 KernelCode.compute_pgm_resource_registers |=
1592 S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
1594 }
1595}
1596
1599}
1600
1603}
1604
1606 unsigned AS = GV->getAddressSpace();
1607 return AS == AMDGPUAS::CONSTANT_ADDRESS ||
1609}
1610
1612 return TT.getArch() == Triple::r600;
1613}
1614
1615static bool isValidRegPrefix(char C) {
1616 return C == 'v' || C == 's' || C == 'a';
1617}
1618
1619std::tuple<char, unsigned, unsigned> parseAsmPhysRegName(StringRef RegName) {
1620 char Kind = RegName.front();
1621 if (!isValidRegPrefix(Kind))
1622 return {};
1623
1624 RegName = RegName.drop_front();
1625 if (RegName.consume_front("[")) {
1626 unsigned Idx, End;
1627 bool Failed = RegName.consumeInteger(10, Idx);
1628 Failed |= !RegName.consume_front(":");
1629 Failed |= RegName.consumeInteger(10, End);
1630 Failed |= !RegName.consume_back("]");
1631 if (!Failed) {
1632 unsigned NumRegs = End - Idx + 1;
1633 if (NumRegs > 1)
1634 return {Kind, Idx, NumRegs};
1635 }
1636 } else {
1637 unsigned Idx;
1638 bool Failed = RegName.getAsInteger(10, Idx);
1639 if (!Failed)
1640 return {Kind, Idx, 1};
1641 }
1642
1643 return {};
1644}
1645
1646std::tuple<char, unsigned, unsigned>
1648 StringRef RegName = Constraint;
1649 if (!RegName.consume_front("{") || !RegName.consume_back("}"))
1650 return {};
1652}
1653
1654std::pair<unsigned, unsigned>
1656 std::pair<unsigned, unsigned> Default,
1657 bool OnlyFirstRequired) {
1658 if (auto Attr = getIntegerPairAttribute(F, Name, OnlyFirstRequired))
1659 return {Attr->first, Attr->second.value_or(Default.second)};
1660 return Default;
1661}
1662
1663std::optional<std::pair<unsigned, std::optional<unsigned>>>
1665 bool OnlyFirstRequired) {
1666 Attribute A = F.getFnAttribute(Name);
1667 if (!A.isStringAttribute())
1668 return std::nullopt;
1669
1670 LLVMContext &Ctx = F.getContext();
1671 std::pair<unsigned, std::optional<unsigned>> Ints;
1672 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
1673 if (Strs.first.trim().getAsInteger(0, Ints.first)) {
1674 Ctx.emitError("can't parse first integer attribute " + Name);
1675 return std::nullopt;
1676 }
1677 unsigned Second = 0;
1678 if (Strs.second.trim().getAsInteger(0, Second)) {
1679 if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
1680 Ctx.emitError("can't parse second integer attribute " + Name);
1681 return std::nullopt;
1682 }
1683 } else {
1684 Ints.second = Second;
1685 }
1686
1687 return Ints;
1688}
1689
1691 unsigned Size,
1692 unsigned DefaultVal) {
1693 std::optional<SmallVector<unsigned>> R =
1695 return R.has_value() ? *R : SmallVector<unsigned>(Size, DefaultVal);
1696}
1697
1698std::optional<SmallVector<unsigned>>
1700 assert(Size > 2);
1701 LLVMContext &Ctx = F.getContext();
1702
1703 Attribute A = F.getFnAttribute(Name);
1704 if (!A.isValid())
1705 return std::nullopt;
1706 if (!A.isStringAttribute()) {
1707 Ctx.emitError(Name + " is not a string attribute");
1708 return std::nullopt;
1709 }
1710
1712
1713 StringRef S = A.getValueAsString();
1714 unsigned i = 0;
1715 for (; !S.empty() && i < Size; i++) {
1716 std::pair<StringRef, StringRef> Strs = S.split(',');
1717 unsigned IntVal;
1718 if (Strs.first.trim().getAsInteger(0, IntVal)) {
1719 Ctx.emitError("can't parse integer attribute " + Strs.first + " in " +
1720 Name);
1721 return std::nullopt;
1722 }
1723 Vals[i] = IntVal;
1724 S = Strs.second;
1725 }
1726
1727 if (!S.empty() || i < Size) {
1728 Ctx.emitError("attribute " + Name +
1729 " has incorrect number of integers; expected " +
1731 return std::nullopt;
1732 }
1733 return Vals;
1734}
1735
1736bool hasValueInRangeLikeMetadata(const MDNode &MD, int64_t Val) {
1737 assert((MD.getNumOperands() % 2 == 0) && "invalid number of operands!");
1738 for (unsigned I = 0, E = MD.getNumOperands() / 2; I != E; ++I) {
1739 auto Low =
1740 mdconst::extract<ConstantInt>(MD.getOperand(2 * I + 0))->getValue();
1741 auto High =
1742 mdconst::extract<ConstantInt>(MD.getOperand(2 * I + 1))->getValue();
1743 // There are two types of [A; B) ranges:
1744 // A < B, e.g. [4; 5) which is a range that only includes 4.
1745 // A > B, e.g. [5; 4) which is a range that wraps around and includes
1746 // everything except 4.
1747 if (Low.ult(High)) {
1748 if (Low.ule(Val) && High.ugt(Val))
1749 return true;
1750 } else {
1751 if (Low.uge(Val) && High.ult(Val))
1752 return true;
1753 }
1754 }
1755
1756 return false;
1757}
1758
1760 return (1 << (getVmcntBitWidthLo(Version.Major) +
1761 getVmcntBitWidthHi(Version.Major))) -
1762 1;
1763}
1764
1766 return (1 << getLoadcntBitWidth(Version.Major)) - 1;
1767}
1768
1770 return (1 << getSamplecntBitWidth(Version.Major)) - 1;
1771}
1772
1774 return (1 << getBvhcntBitWidth(Version.Major)) - 1;
1775}
1776
1778 return (1 << getExpcntBitWidth(Version.Major)) - 1;
1779}
1780
1782 return (1 << getLgkmcntBitWidth(Version.Major)) - 1;
1783}
1784
1786 return (1 << getDscntBitWidth(Version.Major)) - 1;
1787}
1788
1790 return (1 << getKmcntBitWidth(Version.Major)) - 1;
1791}
1792
1794 return (1 << getXcntBitWidth(Version.Major, Version.Minor)) - 1;
1795}
1796
1798 return (1 << getAsynccntBitWidth(Version.Major, Version.Minor)) - 1;
1799}
1800
1802 return (1 << getStorecntBitWidth(Version.Major)) - 1;
1803}
1804
1806 bool HasExtendedWaitCounts = IV.Major >= 12;
1807 if (HasExtendedWaitCounts) {
1810 } else {
1813 }
1823}
1824
1826 unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(Version.Major),
1827 getVmcntBitWidthLo(Version.Major));
1828 unsigned Expcnt = getBitMask(getExpcntBitShift(Version.Major),
1829 getExpcntBitWidth(Version.Major));
1830 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(Version.Major),
1831 getLgkmcntBitWidth(Version.Major));
1832 unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(Version.Major),
1833 getVmcntBitWidthHi(Version.Major));
1834 return VmcntLo | Expcnt | Lgkmcnt | VmcntHi;
1835}
1836
1837unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1838 unsigned VmcntLo = unpackBits(Waitcnt, getVmcntBitShiftLo(Version.Major),
1839 getVmcntBitWidthLo(Version.Major));
1840 unsigned VmcntHi = unpackBits(Waitcnt, getVmcntBitShiftHi(Version.Major),
1841 getVmcntBitWidthHi(Version.Major));
1842 return VmcntLo | VmcntHi << getVmcntBitWidthLo(Version.Major);
1843}
1844
1845unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
1846 return unpackBits(Waitcnt, getExpcntBitShift(Version.Major),
1847 getExpcntBitWidth(Version.Major));
1848}
1849
1850unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1851 return unpackBits(Waitcnt, getLgkmcntBitShift(Version.Major),
1852 getLgkmcntBitWidth(Version.Major));
1853}
1854
1855unsigned decodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt) {
1856 return unpackBits(Waitcnt, getLoadcntStorecntBitShift(Version.Major),
1857 getLoadcntBitWidth(Version.Major));
1858}
1859
1860unsigned decodeStorecnt(const IsaVersion &Version, unsigned Waitcnt) {
1861 return unpackBits(Waitcnt, getLoadcntStorecntBitShift(Version.Major),
1862 getStorecntBitWidth(Version.Major));
1863}
1864
1865unsigned decodeDscnt(const IsaVersion &Version, unsigned Waitcnt) {
1866 return unpackBits(Waitcnt, getDscntBitShift(Version.Major),
1867 getDscntBitWidth(Version.Major));
1868}
1869
1870void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt,
1871 unsigned &Expcnt, unsigned &Lgkmcnt) {
1872 Vmcnt = decodeVmcnt(Version, Waitcnt);
1873 Expcnt = decodeExpcnt(Version, Waitcnt);
1874 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
1875}
1876
1877unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
1878 unsigned Vmcnt) {
1879 Waitcnt = packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(Version.Major),
1880 getVmcntBitWidthLo(Version.Major));
1881 return packBits(Vmcnt >> getVmcntBitWidthLo(Version.Major), Waitcnt,
1882 getVmcntBitShiftHi(Version.Major),
1883 getVmcntBitWidthHi(Version.Major));
1884}
1885
1886unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
1887 unsigned Expcnt) {
1888 return packBits(Expcnt, Waitcnt, getExpcntBitShift(Version.Major),
1889 getExpcntBitWidth(Version.Major));
1890}
1891
1892unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
1893 unsigned Lgkmcnt) {
1894 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(Version.Major),
1895 getLgkmcntBitWidth(Version.Major));
1896}
1897
1898unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt,
1899 unsigned Expcnt, unsigned Lgkmcnt) {
1900 unsigned Waitcnt = getWaitcntBitMask(Version);
1902 Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
1903 Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
1904 return Waitcnt;
1905}
1906
1908 bool IsStore) {
1909 unsigned Dscnt = getBitMask(getDscntBitShift(Version.Major),
1910 getDscntBitWidth(Version.Major));
1911 if (IsStore) {
1912 unsigned Storecnt = getBitMask(getLoadcntStorecntBitShift(Version.Major),
1913 getStorecntBitWidth(Version.Major));
1914 return Dscnt | Storecnt;
1915 }
1916 unsigned Loadcnt = getBitMask(getLoadcntStorecntBitShift(Version.Major),
1917 getLoadcntBitWidth(Version.Major));
1918 return Dscnt | Loadcnt;
1919}
1920
1921static unsigned encodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt,
1922 unsigned Loadcnt) {
1923 return packBits(Loadcnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major),
1924 getLoadcntBitWidth(Version.Major));
1925}
1926
1927static unsigned encodeStorecnt(const IsaVersion &Version, unsigned Waitcnt,
1928 unsigned Storecnt) {
1929 return packBits(Storecnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major),
1930 getStorecntBitWidth(Version.Major));
1931}
1932
1933static unsigned encodeDscnt(const IsaVersion &Version, unsigned Waitcnt,
1934 unsigned Dscnt) {
1935 return packBits(Dscnt, Waitcnt, getDscntBitShift(Version.Major),
1936 getDscntBitWidth(Version.Major));
1937}
1938
1939unsigned encodeLoadcntDscnt(const IsaVersion &Version, unsigned Loadcnt,
1940 unsigned Dscnt) {
1941 unsigned Waitcnt = getCombinedCountBitMask(Version, false);
1942 Waitcnt = encodeLoadcnt(Version, Waitcnt, Loadcnt);
1944 return Waitcnt;
1945}
1946
1947unsigned encodeStorecntDscnt(const IsaVersion &Version, unsigned Storecnt,
1948 unsigned Dscnt) {
1949 unsigned Waitcnt = getCombinedCountBitMask(Version, true);
1950 Waitcnt = encodeStorecnt(Version, Waitcnt, Storecnt);
1952 return Waitcnt;
1953}
1954
1955//===----------------------------------------------------------------------===//
1956// Custom Operand Values
1957//===----------------------------------------------------------------------===//
1958
1960 int Size,
1961 const MCSubtargetInfo &STI) {
1962 unsigned Enc = 0;
1963 for (int Idx = 0; Idx < Size; ++Idx) {
1964 const auto &Op = Opr[Idx];
1965 if (Op.isSupported(STI))
1966 Enc |= Op.encode(Op.Default);
1967 }
1968 return Enc;
1969}
1970
1972 int Size, unsigned Code,
1973 bool &HasNonDefaultVal,
1974 const MCSubtargetInfo &STI) {
1975 unsigned UsedOprMask = 0;
1976 HasNonDefaultVal = false;
1977 for (int Idx = 0; Idx < Size; ++Idx) {
1978 const auto &Op = Opr[Idx];
1979 if (!Op.isSupported(STI))
1980 continue;
1981 UsedOprMask |= Op.getMask();
1982 unsigned Val = Op.decode(Code);
1983 if (!Op.isValid(Val))
1984 return false;
1985 HasNonDefaultVal |= (Val != Op.Default);
1986 }
1987 return (Code & ~UsedOprMask) == 0;
1988}
1989
1990static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size,
1991 unsigned Code, int &Idx, StringRef &Name,
1992 unsigned &Val, bool &IsDefault,
1993 const MCSubtargetInfo &STI) {
1994 while (Idx < Size) {
1995 const auto &Op = Opr[Idx++];
1996 if (Op.isSupported(STI)) {
1997 Name = Op.Name;
1998 Val = Op.decode(Code);
1999 IsDefault = (Val == Op.Default);
2000 return true;
2001 }
2002 }
2003
2004 return false;
2005}
2006
2008 int64_t InputVal) {
2009 if (InputVal < 0 || InputVal > Op.Max)
2010 return OPR_VAL_INVALID;
2011 return Op.encode(InputVal);
2012}
2013
2014static int encodeCustomOperand(const CustomOperandVal *Opr, int Size,
2015 const StringRef Name, int64_t InputVal,
2016 unsigned &UsedOprMask,
2017 const MCSubtargetInfo &STI) {
2018 int InvalidId = OPR_ID_UNKNOWN;
2019 for (int Idx = 0; Idx < Size; ++Idx) {
2020 const auto &Op = Opr[Idx];
2021 if (Op.Name == Name) {
2022 if (!Op.isSupported(STI)) {
2023 InvalidId = OPR_ID_UNSUPPORTED;
2024 continue;
2025 }
2026 auto OprMask = Op.getMask();
2027 if (OprMask & UsedOprMask)
2028 return OPR_ID_DUPLICATE;
2029 UsedOprMask |= OprMask;
2030 return encodeCustomOperandVal(Op, InputVal);
2031 }
2032 }
2033 return InvalidId;
2034}
2035
2036//===----------------------------------------------------------------------===//
2037// DepCtr
2038//===----------------------------------------------------------------------===//
2039
2040namespace DepCtr {
2041
2043 static int Default = -1;
2044 if (Default == -1)
2046 return Default;
2047}
2048
2049bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
2050 const MCSubtargetInfo &STI) {
2052 HasNonDefaultVal, STI);
2053}
2054
2055bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
2056 bool &IsDefault, const MCSubtargetInfo &STI) {
2057 return decodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Code, Id, Name, Val,
2058 IsDefault, STI);
2059}
2060
2061int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
2062 const MCSubtargetInfo &STI) {
2063 return encodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Name, Val, UsedOprMask,
2064 STI);
2065}
2066
2067unsigned getVaVdstBitMask() { return (1 << getVaVdstBitWidth()) - 1; }
2068
2069unsigned getVaSdstBitMask() { return (1 << getVaSdstBitWidth()) - 1; }
2070
2071unsigned getVaSsrcBitMask() { return (1 << getVaSsrcBitWidth()) - 1; }
2072
2074 return (1 << getHoldCntWidth(Version.Major, Version.Minor)) - 1;
2075}
2076
2077unsigned getVmVsrcBitMask() { return (1 << getVmVsrcBitWidth()) - 1; }
2078
2079unsigned getVaVccBitMask() { return (1 << getVaVccBitWidth()) - 1; }
2080
2081unsigned getSaSdstBitMask() { return (1 << getSaSdstBitWidth()) - 1; }
2082
2083unsigned decodeFieldVmVsrc(unsigned Encoded) {
2084 return unpackBits(Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
2085}
2086
2087unsigned decodeFieldVaVdst(unsigned Encoded) {
2088 return unpackBits(Encoded, getVaVdstBitShift(), getVaVdstBitWidth());
2089}
2090
2091unsigned decodeFieldSaSdst(unsigned Encoded) {
2092 return unpackBits(Encoded, getSaSdstBitShift(), getSaSdstBitWidth());
2093}
2094
2095unsigned decodeFieldVaSdst(unsigned Encoded) {
2096 return unpackBits(Encoded, getVaSdstBitShift(), getVaSdstBitWidth());
2097}
2098
2099unsigned decodeFieldVaVcc(unsigned Encoded) {
2100 return unpackBits(Encoded, getVaVccBitShift(), getVaVccBitWidth());
2101}
2102
2103unsigned decodeFieldVaSsrc(unsigned Encoded) {
2104 return unpackBits(Encoded, getVaSsrcBitShift(), getVaSsrcBitWidth());
2105}
2106
2107unsigned decodeFieldHoldCnt(unsigned Encoded, const IsaVersion &Version) {
2108 return unpackBits(Encoded, getHoldCntBitShift(),
2109 getHoldCntWidth(Version.Major, Version.Minor));
2110}
2111
2112unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc) {
2113 return packBits(VmVsrc, Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
2114}
2115
2116unsigned encodeFieldVmVsrc(unsigned VmVsrc, const MCSubtargetInfo &STI) {
2117 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2118 return encodeFieldVmVsrc(Encoded, VmVsrc);
2119}
2120
2121unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst) {
2122 return packBits(VaVdst, Encoded, getVaVdstBitShift(), getVaVdstBitWidth());
2123}
2124
2125unsigned encodeFieldVaVdst(unsigned VaVdst, const MCSubtargetInfo &STI) {
2126 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2127 return encodeFieldVaVdst(Encoded, VaVdst);
2128}
2129
2130unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst) {
2131 return packBits(SaSdst, Encoded, getSaSdstBitShift(), getSaSdstBitWidth());
2132}
2133
2134unsigned encodeFieldSaSdst(unsigned SaSdst, const MCSubtargetInfo &STI) {
2135 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2136 return encodeFieldSaSdst(Encoded, SaSdst);
2137}
2138
2139unsigned encodeFieldVaSdst(unsigned Encoded, unsigned VaSdst) {
2140 return packBits(VaSdst, Encoded, getVaSdstBitShift(), getVaSdstBitWidth());
2141}
2142
2143unsigned encodeFieldVaSdst(unsigned VaSdst, const MCSubtargetInfo &STI) {
2144 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2145 return encodeFieldVaSdst(Encoded, VaSdst);
2146}
2147
2148unsigned encodeFieldVaVcc(unsigned Encoded, unsigned VaVcc) {
2149 return packBits(VaVcc, Encoded, getVaVccBitShift(), getVaVccBitWidth());
2150}
2151
2152unsigned encodeFieldVaVcc(unsigned VaVcc, const MCSubtargetInfo &STI) {
2153 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2154 return encodeFieldVaVcc(Encoded, VaVcc);
2155}
2156
2157unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc) {
2158 return packBits(VaSsrc, Encoded, getVaSsrcBitShift(), getVaSsrcBitWidth());
2159}
2160
2161unsigned encodeFieldVaSsrc(unsigned VaSsrc, const MCSubtargetInfo &STI) {
2162 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2163 return encodeFieldVaSsrc(Encoded, VaSsrc);
2164}
2165
2166unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt,
2167 const IsaVersion &Version) {
2168 return packBits(HoldCnt, Encoded, getHoldCntBitShift(),
2169 getHoldCntWidth(Version.Major, Version.Minor));
2170}
2171
2172unsigned encodeFieldHoldCnt(unsigned HoldCnt, const MCSubtargetInfo &STI) {
2173 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2174 return encodeFieldHoldCnt(Encoded, HoldCnt, getIsaVersion(STI.getCPU()));
2175}
2176
2177} // namespace DepCtr
2178
2179//===----------------------------------------------------------------------===//
2180// exp tgt
2181//===----------------------------------------------------------------------===//
2182
2183namespace Exp {
2184
2185struct ExpTgt {
2187 unsigned Tgt;
2188 unsigned MaxIndex;
2189};
2190
2191// clang-format off
2192static constexpr ExpTgt ExpTgtInfo[] = {
2193 {{"null"}, ET_NULL, ET_NULL_MAX_IDX},
2194 {{"mrtz"}, ET_MRTZ, ET_MRTZ_MAX_IDX},
2195 {{"prim"}, ET_PRIM, ET_PRIM_MAX_IDX},
2196 {{"mrt"}, ET_MRT0, ET_MRT_MAX_IDX},
2197 {{"pos"}, ET_POS0, ET_POS_MAX_IDX},
2198 {{"dual_src_blend"},ET_DUAL_SRC_BLEND0, ET_DUAL_SRC_BLEND_MAX_IDX},
2199 {{"param"}, ET_PARAM0, ET_PARAM_MAX_IDX},
2200};
2201// clang-format on
2202
2203bool getTgtName(unsigned Id, StringRef &Name, int &Index) {
2204 for (const ExpTgt &Val : ExpTgtInfo) {
2205 if (Val.Tgt <= Id && Id <= Val.Tgt + Val.MaxIndex) {
2206 Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt);
2207 Name = Val.Name;
2208 return true;
2209 }
2210 }
2211 return false;
2212}
2213
2214unsigned getTgtId(const StringRef Name) {
2215
2216 for (const ExpTgt &Val : ExpTgtInfo) {
2217 if (Val.MaxIndex == 0 && Name == Val.Name)
2218 return Val.Tgt;
2219
2220 if (Val.MaxIndex > 0 && Name.starts_with(Val.Name)) {
2221 StringRef Suffix = Name.drop_front(Val.Name.size());
2222
2223 unsigned Id;
2224 if (Suffix.getAsInteger(10, Id) || Id > Val.MaxIndex)
2225 return ET_INVALID;
2226
2227 // Disable leading zeroes
2228 if (Suffix.size() > 1 && Suffix[0] == '0')
2229 return ET_INVALID;
2230
2231 return Val.Tgt + Id;
2232 }
2233 }
2234 return ET_INVALID;
2235}
2236
2237bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI) {
2238 switch (Id) {
2239 case ET_NULL:
2240 return !isGFX11Plus(STI);
2241 case ET_POS4:
2242 case ET_PRIM:
2243 return isGFX10Plus(STI);
2244 case ET_DUAL_SRC_BLEND0:
2245 case ET_DUAL_SRC_BLEND1:
2246 return isGFX11Plus(STI);
2247 default:
2248 if (Id >= ET_PARAM0 && Id <= ET_PARAM31)
2249 return !isGFX11Plus(STI) || isGFX13Plus(STI);
2250 return true;
2251 }
2252}
2253
2254} // namespace Exp
2255
2256//===----------------------------------------------------------------------===//
2257// MTBUF Format
2258//===----------------------------------------------------------------------===//
2259
2260namespace MTBUFFormat {
2261
2262int64_t getDfmt(const StringRef Name) {
2263 for (int Id = DFMT_MIN; Id <= DFMT_MAX; ++Id) {
2264 if (Name == DfmtSymbolic[Id])
2265 return Id;
2266 }
2267 return DFMT_UNDEF;
2268}
2269
2271 assert(Id <= DFMT_MAX);
2272 return DfmtSymbolic[Id];
2273}
2274
2276 if (isSI(STI) || isCI(STI))
2277 return NfmtSymbolicSICI;
2278 if (isVI(STI) || isGFX9(STI))
2279 return NfmtSymbolicVI;
2280 return NfmtSymbolicGFX10;
2281}
2282
2283int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI) {
2284 const auto *lookupTable = getNfmtLookupTable(STI);
2285 for (int Id = NFMT_MIN; Id <= NFMT_MAX; ++Id) {
2286 if (Name == lookupTable[Id])
2287 return Id;
2288 }
2289 return NFMT_UNDEF;
2290}
2291
2292StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI) {
2293 assert(Id <= NFMT_MAX);
2294 return getNfmtLookupTable(STI)[Id];
2295}
2296
2297bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI) {
2298 unsigned Dfmt;
2299 unsigned Nfmt;
2300 decodeDfmtNfmt(Id, Dfmt, Nfmt);
2301 return isValidNfmt(Nfmt, STI);
2302}
2303
2304bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI) {
2305 return !getNfmtName(Id, STI).empty();
2306}
2307
2308int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt) {
2309 return (Dfmt << DFMT_SHIFT) | (Nfmt << NFMT_SHIFT);
2310}
2311
2312void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt) {
2313 Dfmt = (Format >> DFMT_SHIFT) & DFMT_MASK;
2314 Nfmt = (Format >> NFMT_SHIFT) & NFMT_MASK;
2315}
2316
2317int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI) {
2318 if (isGFX11Plus(STI)) {
2319 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
2320 if (Name == UfmtSymbolicGFX11[Id])
2321 return Id;
2322 }
2323 } else {
2324 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
2325 if (Name == UfmtSymbolicGFX10[Id])
2326 return Id;
2327 }
2328 }
2329 return UFMT_UNDEF;
2330}
2331
2333 if (isValidUnifiedFormat(Id, STI))
2334 return isGFX10(STI) ? UfmtSymbolicGFX10[Id] : UfmtSymbolicGFX11[Id];
2335 return "";
2336}
2337
2338bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI) {
2339 return isGFX10(STI) ? Id <= UfmtGFX10::UFMT_LAST : Id <= UfmtGFX11::UFMT_LAST;
2340}
2341
2342int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt,
2343 const MCSubtargetInfo &STI) {
2344 int64_t Fmt = encodeDfmtNfmt(Dfmt, Nfmt);
2345 if (isGFX11Plus(STI)) {
2346 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
2347 if (Fmt == DfmtNfmt2UFmtGFX11[Id])
2348 return Id;
2349 }
2350 } else {
2351 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
2352 if (Fmt == DfmtNfmt2UFmtGFX10[Id])
2353 return Id;
2354 }
2355 }
2356 return UFMT_UNDEF;
2357}
2358
2359bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI) {
2360 return isGFX10Plus(STI) ? (Val <= UFMT_MAX) : (Val <= DFMT_NFMT_MAX);
2361}
2362
2364 if (isGFX10Plus(STI))
2365 return UFMT_DEFAULT;
2366 return DFMT_NFMT_DEFAULT;
2367}
2368
2369} // namespace MTBUFFormat
2370
2371//===----------------------------------------------------------------------===//
2372// SendMsg
2373//===----------------------------------------------------------------------===//
2374
2375namespace SendMsg {
2376
2380
2381bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI) {
2382 return (MsgId & ~(getMsgIdMask(STI))) == 0;
2383}
2384
2385bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
2386 bool Strict) {
2387 assert(isValidMsgId(MsgId, STI));
2388
2389 if (!Strict)
2390 return 0 <= OpId && isUInt<OP_WIDTH_>(OpId);
2391
2392 if (msgRequiresOp(MsgId, STI)) {
2393 if (MsgId == ID_GS_PreGFX11 && OpId == OP_GS_NOP)
2394 return false;
2395
2396 return !getMsgOpName(MsgId, OpId, STI).empty();
2397 }
2398
2399 return OpId == OP_NONE_;
2400}
2401
2402bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
2403 const MCSubtargetInfo &STI, bool Strict) {
2404 assert(isValidMsgOp(MsgId, OpId, STI, Strict));
2405
2406 if (!Strict)
2408
2409 if (!isGFX11Plus(STI)) {
2410 switch (MsgId) {
2411 case ID_GS_PreGFX11:
2414 return (OpId == OP_GS_NOP)
2417 }
2418 }
2419 return StreamId == STREAM_ID_NONE_;
2420}
2421
2422bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI) {
2423 return MsgId == ID_SYSMSG ||
2424 (!isGFX11Plus(STI) &&
2425 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11));
2426}
2427
2428bool msgSupportsStream(int64_t MsgId, int64_t OpId,
2429 const MCSubtargetInfo &STI) {
2430 return !isGFX11Plus(STI) &&
2431 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11) &&
2432 OpId != OP_GS_NOP;
2433}
2434
2435void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
2436 uint16_t &StreamId, const MCSubtargetInfo &STI) {
2437 MsgId = Val & getMsgIdMask(STI);
2438 if (isGFX11Plus(STI)) {
2439 OpId = 0;
2440 StreamId = 0;
2441 } else {
2442 OpId = (Val & OP_MASK_) >> OP_SHIFT_;
2444 }
2445}
2446
2448 return MsgId | (OpId << OP_SHIFT_) | (StreamId << STREAM_ID_SHIFT_);
2449}
2450
2451bool msgDoesNotUseM0(int64_t MsgId, const MCSubtargetInfo &STI) {
2452 // Explicitly list message types that are known to not use m0.
2453 // This is safer than excluding only GS_ALLOC_REQ, in case new message
2454 // types are added in the future that do use m0.
2455 if (isGFX11Plus(STI)) {
2456 switch (MsgId) {
2458 return true;
2459 default:
2460 break;
2461 }
2462 }
2463 switch (MsgId) {
2464 case ID_SAVEWAVE:
2465 case ID_STALL_WAVE_GEN:
2466 case ID_HALT_WAVES:
2467 case ID_ORDERED_PS_DONE:
2469 case ID_GET_DOORBELL:
2470 case ID_GET_DDID:
2471 case ID_SYSMSG:
2472 return true;
2473 default:
2474 return false;
2475 }
2476}
2477
2478} // namespace SendMsg
2479
2480//===----------------------------------------------------------------------===//
2481//
2482//===----------------------------------------------------------------------===//
2483
2485 return F.getFnAttributeAsParsedInteger("InitialPSInputAddr", 0);
2486}
2487
2489 // As a safe default always respond as if PS has color exports.
2490 return F.getFnAttributeAsParsedInteger(
2491 "amdgpu-color-export",
2492 F.getCallingConv() == CallingConv::AMDGPU_PS ? 1 : 0) != 0;
2493}
2494
2496 return F.getFnAttributeAsParsedInteger("amdgpu-depth-export", 0) != 0;
2497}
2498
2500 unsigned BlockSize =
2501 F.getFnAttributeAsParsedInteger("amdgpu-dynamic-vgpr-block-size", 0);
2502
2503 if (BlockSize == 16 || BlockSize == 32)
2504 return BlockSize;
2505
2506 return 0;
2507}
2508
2509bool hasXNACK(const MCSubtargetInfo &STI) {
2510 return STI.hasFeature(AMDGPU::FeatureXNACK);
2511}
2512
2513bool hasSRAMECC(const MCSubtargetInfo &STI) {
2514 return STI.hasFeature(AMDGPU::FeatureSRAMECC);
2515}
2516
2518 return STI.hasFeature(AMDGPU::FeatureMIMG_R128) &&
2519 !STI.hasFeature(AMDGPU::FeatureR128A16);
2520}
2521
2522bool hasA16(const MCSubtargetInfo &STI) {
2523 return STI.hasFeature(AMDGPU::FeatureA16);
2524}
2525
2526bool hasG16(const MCSubtargetInfo &STI) {
2527 return STI.hasFeature(AMDGPU::FeatureG16);
2528}
2529
2531 return !STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) && !isCI(STI) &&
2532 !isSI(STI);
2533}
2534
2535bool hasGDS(const MCSubtargetInfo &STI) {
2536 return STI.hasFeature(AMDGPU::FeatureGDS);
2537}
2538
2539unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler) {
2540 auto Version = getIsaVersion(STI.getCPU());
2541 if (Version.Major == 10)
2542 return Version.Minor >= 3 ? 13 : 5;
2543 if (Version.Major == 11)
2544 return 5;
2545 if (Version.Major >= 12)
2546 return HasSampler ? 4 : 5;
2547 return 0;
2548}
2549
2551 if (isGFX1250Plus(STI))
2552 return 32;
2553 return 16;
2554}
2555
2556bool isSI(const MCSubtargetInfo &STI) {
2557 return STI.hasFeature(AMDGPU::FeatureSouthernIslands);
2558}
2559
2560bool isCI(const MCSubtargetInfo &STI) {
2561 return STI.hasFeature(AMDGPU::FeatureSeaIslands);
2562}
2563
2564bool isVI(const MCSubtargetInfo &STI) {
2565 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
2566}
2567
2568bool isGFX9(const MCSubtargetInfo &STI) {
2569 return STI.hasFeature(AMDGPU::FeatureGFX9);
2570}
2571
2573 return isGFX9(STI) || isGFX10(STI);
2574}
2575
2577 return isGFX9(STI) || isGFX10(STI) || isGFX11(STI);
2578}
2579
2581 return isVI(STI) || isGFX9(STI) || isGFX10(STI);
2582}
2583
2584bool isGFX8Plus(const MCSubtargetInfo &STI) {
2585 return isVI(STI) || isGFX9Plus(STI);
2586}
2587
2588bool isGFX9Plus(const MCSubtargetInfo &STI) {
2589 return isGFX9(STI) || isGFX10Plus(STI);
2590}
2591
2592bool isNotGFX9Plus(const MCSubtargetInfo &STI) { return !isGFX9Plus(STI); }
2593
2594bool isGFX10(const MCSubtargetInfo &STI) {
2595 return STI.hasFeature(AMDGPU::FeatureGFX10);
2596}
2597
2599 return isGFX10(STI) || isGFX11(STI);
2600}
2601
2603 return isGFX10(STI) || isGFX11Plus(STI);
2604}
2605
2606bool isGFX11(const MCSubtargetInfo &STI) {
2607 return STI.hasFeature(AMDGPU::FeatureGFX11);
2608}
2609
2611 return isGFX11(STI) || isGFX12Plus(STI);
2612}
2613
2614bool isGFX12(const MCSubtargetInfo &STI) {
2615 return STI.getFeatureBits()[AMDGPU::FeatureGFX12];
2616}
2617
2619 return isGFX12(STI) || isGFX13Plus(STI);
2620}
2621
2622bool isNotGFX12Plus(const MCSubtargetInfo &STI) { return !isGFX12Plus(STI); }
2623
2624bool isGFX1250(const MCSubtargetInfo &STI) {
2625 return STI.getFeatureBits()[AMDGPU::FeatureGFX1250Insts] && !isGFX13(STI);
2626}
2627
2629 return STI.getFeatureBits()[AMDGPU::FeatureGFX1250Insts];
2630}
2631
2632bool isGFX13(const MCSubtargetInfo &STI) {
2633 return STI.getFeatureBits()[AMDGPU::FeatureGFX13];
2634}
2635
2636bool isGFX13Plus(const MCSubtargetInfo &STI) { return isGFX13(STI); }
2637
2639 if (isGFX1250(STI))
2640 return false;
2641 return isGFX10Plus(STI);
2642}
2643
2644bool isNotGFX11Plus(const MCSubtargetInfo &STI) { return !isGFX11Plus(STI); }
2645
2647 return isSI(STI) || isCI(STI) || isVI(STI) || isGFX9(STI);
2648}
2649
2651 return isGFX10(STI) && !AMDGPU::isGFX10_BEncoding(STI);
2652}
2653
2655 return STI.hasFeature(AMDGPU::FeatureGCN3Encoding);
2656}
2657
2659 return STI.hasFeature(AMDGPU::FeatureGFX10_AEncoding);
2660}
2661
2663 return STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding);
2664}
2665
2667 return STI.hasFeature(AMDGPU::FeatureGFX10_3Insts);
2668}
2669
2671 return isGFX10_BEncoding(STI) && !isGFX12Plus(STI);
2672}
2673
2674bool isGFX90A(const MCSubtargetInfo &STI) {
2675 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
2676}
2677
2678bool isGFX940(const MCSubtargetInfo &STI) {
2679 return STI.hasFeature(AMDGPU::FeatureGFX940Insts);
2680}
2681
2683 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
2684}
2685
2687 return STI.hasFeature(AMDGPU::FeatureMAIInsts);
2688}
2689
2690bool hasVOPD(const MCSubtargetInfo &STI) {
2691 return STI.hasFeature(AMDGPU::FeatureVOPDInsts);
2692}
2693
2695 return STI.hasFeature(AMDGPU::FeatureDPPSrc1SGPR);
2696}
2697
2699 return STI.hasFeature(AMDGPU::FeatureKernargPreload);
2700}
2701
2702int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR,
2703 int32_t ArgNumVGPR) {
2704 if (has90AInsts && ArgNumAGPR)
2705 return alignTo(ArgNumVGPR, 4) + ArgNumAGPR;
2706 return std::max(ArgNumVGPR, ArgNumAGPR);
2707}
2708
2710 const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
2711 const MCRegister FirstSubReg = TRI->getSubReg(Reg, AMDGPU::sub0);
2712 return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
2713 Reg == AMDGPU::SCC;
2714}
2715
2719
2720#define MAP_REG2REG \
2721 using namespace AMDGPU; \
2722 switch (Reg.id()) { \
2723 default: \
2724 return Reg; \
2725 CASE_CI_VI(FLAT_SCR) \
2726 CASE_CI_VI(FLAT_SCR_LO) \
2727 CASE_CI_VI(FLAT_SCR_HI) \
2728 CASE_VI_GFX9PLUS(TTMP0) \
2729 CASE_VI_GFX9PLUS(TTMP1) \
2730 CASE_VI_GFX9PLUS(TTMP2) \
2731 CASE_VI_GFX9PLUS(TTMP3) \
2732 CASE_VI_GFX9PLUS(TTMP4) \
2733 CASE_VI_GFX9PLUS(TTMP5) \
2734 CASE_VI_GFX9PLUS(TTMP6) \
2735 CASE_VI_GFX9PLUS(TTMP7) \
2736 CASE_VI_GFX9PLUS(TTMP8) \
2737 CASE_VI_GFX9PLUS(TTMP9) \
2738 CASE_VI_GFX9PLUS(TTMP10) \
2739 CASE_VI_GFX9PLUS(TTMP11) \
2740 CASE_VI_GFX9PLUS(TTMP12) \
2741 CASE_VI_GFX9PLUS(TTMP13) \
2742 CASE_VI_GFX9PLUS(TTMP14) \
2743 CASE_VI_GFX9PLUS(TTMP15) \
2744 CASE_VI_GFX9PLUS(TTMP0_TTMP1) \
2745 CASE_VI_GFX9PLUS(TTMP2_TTMP3) \
2746 CASE_VI_GFX9PLUS(TTMP4_TTMP5) \
2747 CASE_VI_GFX9PLUS(TTMP6_TTMP7) \
2748 CASE_VI_GFX9PLUS(TTMP8_TTMP9) \
2749 CASE_VI_GFX9PLUS(TTMP10_TTMP11) \
2750 CASE_VI_GFX9PLUS(TTMP12_TTMP13) \
2751 CASE_VI_GFX9PLUS(TTMP14_TTMP15) \
2752 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \
2753 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \
2754 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \
2755 CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \
2756 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
2757 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
2758 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2759 CASE_VI_GFX9PLUS( \
2760 TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2761 CASE_GFXPRE11_GFX11PLUS(M0) \
2762 CASE_GFXPRE11_GFX11PLUS(SGPR_NULL) \
2763 CASE_GFXPRE11_GFX11PLUS_TO(SGPR_NULL64, SGPR_NULL) \
2764 }
2765
2766#define CASE_CI_VI(node) \
2767 assert(!isSI(STI)); \
2768 case node: \
2769 return isCI(STI) ? node##_ci : node##_vi;
2770
2771#define CASE_VI_GFX9PLUS(node) \
2772 case node: \
2773 return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi;
2774
2775#define CASE_GFXPRE11_GFX11PLUS(node) \
2776 case node: \
2777 return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11;
2778
2779#define CASE_GFXPRE11_GFX11PLUS_TO(node, result) \
2780 case node: \
2781 return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11;
2782
2784 if (STI.getTargetTriple().getArch() == Triple::r600)
2785 return Reg;
2787}
2788
2789#undef CASE_CI_VI
2790#undef CASE_VI_GFX9PLUS
2791#undef CASE_GFXPRE11_GFX11PLUS
2792#undef CASE_GFXPRE11_GFX11PLUS_TO
2793
2794#define CASE_CI_VI(node) \
2795 case node##_ci: \
2796 case node##_vi: \
2797 return node;
2798#define CASE_VI_GFX9PLUS(node) \
2799 case node##_vi: \
2800 case node##_gfx9plus: \
2801 return node;
2802#define CASE_GFXPRE11_GFX11PLUS(node) \
2803 case node##_gfx11plus: \
2804 case node##_gfxpre11: \
2805 return node;
2806#define CASE_GFXPRE11_GFX11PLUS_TO(node, result)
2807
2809
2811 switch (Reg.id()) {
2812 case AMDGPU::SRC_SHARED_BASE_LO:
2813 case AMDGPU::SRC_SHARED_BASE:
2814 case AMDGPU::SRC_SHARED_LIMIT_LO:
2815 case AMDGPU::SRC_SHARED_LIMIT:
2816 case AMDGPU::SRC_PRIVATE_BASE_LO:
2817 case AMDGPU::SRC_PRIVATE_BASE:
2818 case AMDGPU::SRC_PRIVATE_LIMIT_LO:
2819 case AMDGPU::SRC_PRIVATE_LIMIT:
2820 case AMDGPU::SRC_FLAT_SCRATCH_BASE_LO:
2821 case AMDGPU::SRC_FLAT_SCRATCH_BASE_HI:
2822 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2823 return true;
2824 case AMDGPU::SRC_VCCZ:
2825 case AMDGPU::SRC_EXECZ:
2826 case AMDGPU::SRC_SCC:
2827 return true;
2828 case AMDGPU::SGPR_NULL:
2829 return true;
2830 default:
2831 return false;
2832 }
2833}
2834
2835#undef CASE_CI_VI
2836#undef CASE_VI_GFX9PLUS
2837#undef CASE_GFXPRE11_GFX11PLUS
2838#undef CASE_GFXPRE11_GFX11PLUS_TO
2839#undef MAP_REG2REG
2840
2841bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2842 assert(OpNo < Desc.NumOperands);
2843 unsigned OpType = Desc.operands()[OpNo].OperandType;
2844 return OpType >= AMDGPU::OPERAND_KIMM_FIRST &&
2845 OpType <= AMDGPU::OPERAND_KIMM_LAST;
2846}
2847
2848bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2849 assert(OpNo < Desc.NumOperands);
2850 unsigned OpType = Desc.operands()[OpNo].OperandType;
2851 switch (OpType) {
2865 return true;
2866 default:
2867 return false;
2868 }
2869}
2870
2871bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2872 assert(OpNo < Desc.NumOperands);
2873 unsigned OpType = Desc.operands()[OpNo].OperandType;
2874 return (OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
2878}
2879
2880// Avoid using MCRegisterClass::getSize, since that function will go away
2881// (move from MC* level to Target* level). Return size in bits.
2882unsigned getRegBitWidth(unsigned RCID) {
2883 switch (RCID) {
2884 case AMDGPU::VGPR_16RegClassID:
2885 case AMDGPU::VGPR_16_Lo128RegClassID:
2886 case AMDGPU::SGPR_LO16RegClassID:
2887 case AMDGPU::AGPR_LO16RegClassID:
2888 return 16;
2889 case AMDGPU::SGPR_32RegClassID:
2890 case AMDGPU::VGPR_32RegClassID:
2891 case AMDGPU::VGPR_32_Lo256RegClassID:
2892 case AMDGPU::VRegOrLds_32RegClassID:
2893 case AMDGPU::AGPR_32RegClassID:
2894 case AMDGPU::VS_32RegClassID:
2895 case AMDGPU::AV_32RegClassID:
2896 case AMDGPU::SReg_32RegClassID:
2897 case AMDGPU::SReg_32_XM0RegClassID:
2898 case AMDGPU::SRegOrLds_32RegClassID:
2899 return 32;
2900 case AMDGPU::SGPR_64RegClassID:
2901 case AMDGPU::VS_64RegClassID:
2902 case AMDGPU::SReg_64RegClassID:
2903 case AMDGPU::VReg_64RegClassID:
2904 case AMDGPU::AReg_64RegClassID:
2905 case AMDGPU::SReg_64_XEXECRegClassID:
2906 case AMDGPU::VReg_64_Align2RegClassID:
2907 case AMDGPU::AReg_64_Align2RegClassID:
2908 case AMDGPU::AV_64RegClassID:
2909 case AMDGPU::AV_64_Align2RegClassID:
2910 case AMDGPU::VReg_64_Lo256_Align2RegClassID:
2911 case AMDGPU::VS_64_Lo256RegClassID:
2912 return 64;
2913 case AMDGPU::SGPR_96RegClassID:
2914 case AMDGPU::SReg_96RegClassID:
2915 case AMDGPU::VReg_96RegClassID:
2916 case AMDGPU::AReg_96RegClassID:
2917 case AMDGPU::VReg_96_Align2RegClassID:
2918 case AMDGPU::AReg_96_Align2RegClassID:
2919 case AMDGPU::AV_96RegClassID:
2920 case AMDGPU::AV_96_Align2RegClassID:
2921 case AMDGPU::VReg_96_Lo256_Align2RegClassID:
2922 return 96;
2923 case AMDGPU::SGPR_128RegClassID:
2924 case AMDGPU::SReg_128RegClassID:
2925 case AMDGPU::VReg_128RegClassID:
2926 case AMDGPU::AReg_128RegClassID:
2927 case AMDGPU::VReg_128_Align2RegClassID:
2928 case AMDGPU::AReg_128_Align2RegClassID:
2929 case AMDGPU::AV_128RegClassID:
2930 case AMDGPU::AV_128_Align2RegClassID:
2931 case AMDGPU::SReg_128_XNULLRegClassID:
2932 case AMDGPU::VReg_128_Lo256_Align2RegClassID:
2933 return 128;
2934 case AMDGPU::SGPR_160RegClassID:
2935 case AMDGPU::SReg_160RegClassID:
2936 case AMDGPU::VReg_160RegClassID:
2937 case AMDGPU::AReg_160RegClassID:
2938 case AMDGPU::VReg_160_Align2RegClassID:
2939 case AMDGPU::AReg_160_Align2RegClassID:
2940 case AMDGPU::AV_160RegClassID:
2941 case AMDGPU::AV_160_Align2RegClassID:
2942 case AMDGPU::VReg_160_Lo256_Align2RegClassID:
2943 return 160;
2944 case AMDGPU::SGPR_192RegClassID:
2945 case AMDGPU::SReg_192RegClassID:
2946 case AMDGPU::VReg_192RegClassID:
2947 case AMDGPU::AReg_192RegClassID:
2948 case AMDGPU::VReg_192_Align2RegClassID:
2949 case AMDGPU::AReg_192_Align2RegClassID:
2950 case AMDGPU::AV_192RegClassID:
2951 case AMDGPU::AV_192_Align2RegClassID:
2952 case AMDGPU::VReg_192_Lo256_Align2RegClassID:
2953 return 192;
2954 case AMDGPU::SGPR_224RegClassID:
2955 case AMDGPU::SReg_224RegClassID:
2956 case AMDGPU::VReg_224RegClassID:
2957 case AMDGPU::AReg_224RegClassID:
2958 case AMDGPU::VReg_224_Align2RegClassID:
2959 case AMDGPU::AReg_224_Align2RegClassID:
2960 case AMDGPU::AV_224RegClassID:
2961 case AMDGPU::AV_224_Align2RegClassID:
2962 case AMDGPU::VReg_224_Lo256_Align2RegClassID:
2963 return 224;
2964 case AMDGPU::SGPR_256RegClassID:
2965 case AMDGPU::SReg_256RegClassID:
2966 case AMDGPU::VReg_256RegClassID:
2967 case AMDGPU::AReg_256RegClassID:
2968 case AMDGPU::VReg_256_Align2RegClassID:
2969 case AMDGPU::AReg_256_Align2RegClassID:
2970 case AMDGPU::AV_256RegClassID:
2971 case AMDGPU::AV_256_Align2RegClassID:
2972 case AMDGPU::SReg_256_XNULLRegClassID:
2973 case AMDGPU::VReg_256_Lo256_Align2RegClassID:
2974 return 256;
2975 case AMDGPU::SGPR_288RegClassID:
2976 case AMDGPU::SReg_288RegClassID:
2977 case AMDGPU::VReg_288RegClassID:
2978 case AMDGPU::AReg_288RegClassID:
2979 case AMDGPU::VReg_288_Align2RegClassID:
2980 case AMDGPU::AReg_288_Align2RegClassID:
2981 case AMDGPU::AV_288RegClassID:
2982 case AMDGPU::AV_288_Align2RegClassID:
2983 case AMDGPU::VReg_288_Lo256_Align2RegClassID:
2984 return 288;
2985 case AMDGPU::SGPR_320RegClassID:
2986 case AMDGPU::SReg_320RegClassID:
2987 case AMDGPU::VReg_320RegClassID:
2988 case AMDGPU::AReg_320RegClassID:
2989 case AMDGPU::VReg_320_Align2RegClassID:
2990 case AMDGPU::AReg_320_Align2RegClassID:
2991 case AMDGPU::AV_320RegClassID:
2992 case AMDGPU::AV_320_Align2RegClassID:
2993 case AMDGPU::VReg_320_Lo256_Align2RegClassID:
2994 return 320;
2995 case AMDGPU::SGPR_352RegClassID:
2996 case AMDGPU::SReg_352RegClassID:
2997 case AMDGPU::VReg_352RegClassID:
2998 case AMDGPU::AReg_352RegClassID:
2999 case AMDGPU::VReg_352_Align2RegClassID:
3000 case AMDGPU::AReg_352_Align2RegClassID:
3001 case AMDGPU::AV_352RegClassID:
3002 case AMDGPU::AV_352_Align2RegClassID:
3003 case AMDGPU::VReg_352_Lo256_Align2RegClassID:
3004 return 352;
3005 case AMDGPU::SGPR_384RegClassID:
3006 case AMDGPU::SReg_384RegClassID:
3007 case AMDGPU::VReg_384RegClassID:
3008 case AMDGPU::AReg_384RegClassID:
3009 case AMDGPU::VReg_384_Align2RegClassID:
3010 case AMDGPU::AReg_384_Align2RegClassID:
3011 case AMDGPU::AV_384RegClassID:
3012 case AMDGPU::AV_384_Align2RegClassID:
3013 case AMDGPU::VReg_384_Lo256_Align2RegClassID:
3014 return 384;
3015 case AMDGPU::SGPR_512RegClassID:
3016 case AMDGPU::SReg_512RegClassID:
3017 case AMDGPU::VReg_512RegClassID:
3018 case AMDGPU::AReg_512RegClassID:
3019 case AMDGPU::VReg_512_Align2RegClassID:
3020 case AMDGPU::AReg_512_Align2RegClassID:
3021 case AMDGPU::AV_512RegClassID:
3022 case AMDGPU::AV_512_Align2RegClassID:
3023 case AMDGPU::VReg_512_Lo256_Align2RegClassID:
3024 return 512;
3025 case AMDGPU::SGPR_1024RegClassID:
3026 case AMDGPU::SReg_1024RegClassID:
3027 case AMDGPU::VReg_1024RegClassID:
3028 case AMDGPU::AReg_1024RegClassID:
3029 case AMDGPU::VReg_1024_Align2RegClassID:
3030 case AMDGPU::AReg_1024_Align2RegClassID:
3031 case AMDGPU::AV_1024RegClassID:
3032 case AMDGPU::AV_1024_Align2RegClassID:
3033 case AMDGPU::VReg_1024_Lo256_Align2RegClassID:
3034 return 1024;
3035 default:
3036 llvm_unreachable("Unexpected register class");
3037 }
3038}
3039
3040unsigned getRegBitWidth(const MCRegisterClass &RC) {
3041 return getRegBitWidth(RC.getID());
3042}
3043
3044bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
3046 return true;
3047
3048 uint64_t Val = static_cast<uint64_t>(Literal);
3049 return (Val == llvm::bit_cast<uint64_t>(0.0)) ||
3050 (Val == llvm::bit_cast<uint64_t>(1.0)) ||
3051 (Val == llvm::bit_cast<uint64_t>(-1.0)) ||
3052 (Val == llvm::bit_cast<uint64_t>(0.5)) ||
3053 (Val == llvm::bit_cast<uint64_t>(-0.5)) ||
3054 (Val == llvm::bit_cast<uint64_t>(2.0)) ||
3055 (Val == llvm::bit_cast<uint64_t>(-2.0)) ||
3056 (Val == llvm::bit_cast<uint64_t>(4.0)) ||
3057 (Val == llvm::bit_cast<uint64_t>(-4.0)) ||
3058 (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
3059}
3060
3061bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
3063 return true;
3064
3065 // The actual type of the operand does not seem to matter as long
3066 // as the bits match one of the inline immediate values. For example:
3067 //
3068 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
3069 // so it is a legal inline immediate.
3070 //
3071 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
3072 // floating-point, so it is a legal inline immediate.
3073
3074 uint32_t Val = static_cast<uint32_t>(Literal);
3075 return (Val == llvm::bit_cast<uint32_t>(0.0f)) ||
3076 (Val == llvm::bit_cast<uint32_t>(1.0f)) ||
3077 (Val == llvm::bit_cast<uint32_t>(-1.0f)) ||
3078 (Val == llvm::bit_cast<uint32_t>(0.5f)) ||
3079 (Val == llvm::bit_cast<uint32_t>(-0.5f)) ||
3080 (Val == llvm::bit_cast<uint32_t>(2.0f)) ||
3081 (Val == llvm::bit_cast<uint32_t>(-2.0f)) ||
3082 (Val == llvm::bit_cast<uint32_t>(4.0f)) ||
3083 (Val == llvm::bit_cast<uint32_t>(-4.0f)) ||
3084 (Val == 0x3e22f983 && HasInv2Pi);
3085}
3086
3087bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi) {
3088 if (!HasInv2Pi)
3089 return false;
3091 return true;
3092 uint16_t Val = static_cast<uint16_t>(Literal);
3093 return Val == 0x3F00 || // 0.5
3094 Val == 0xBF00 || // -0.5
3095 Val == 0x3F80 || // 1.0
3096 Val == 0xBF80 || // -1.0
3097 Val == 0x4000 || // 2.0
3098 Val == 0xC000 || // -2.0
3099 Val == 0x4080 || // 4.0
3100 Val == 0xC080 || // -4.0
3101 Val == 0x3E22; // 1.0 / (2.0 * pi)
3102}
3103
3104bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi) {
3105 return isInlinableLiteral32(Literal, HasInv2Pi);
3106}
3107
3108bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi) {
3109 if (!HasInv2Pi)
3110 return false;
3112 return true;
3113 uint16_t Val = static_cast<uint16_t>(Literal);
3114 return Val == 0x3C00 || // 1.0
3115 Val == 0xBC00 || // -1.0
3116 Val == 0x3800 || // 0.5
3117 Val == 0xB800 || // -0.5
3118 Val == 0x4000 || // 2.0
3119 Val == 0xC000 || // -2.0
3120 Val == 0x4400 || // 4.0
3121 Val == 0xC400 || // -4.0
3122 Val == 0x3118; // 1/2pi
3123}
3124
3125std::optional<unsigned> getInlineEncodingV216(bool IsFloat, uint32_t Literal) {
3126 // Unfortunately, the Instruction Set Architecture Reference Guide is
3127 // misleading about how the inline operands work for (packed) 16-bit
3128 // instructions. In a nutshell, the actual HW behavior is:
3129 //
3130 // - integer encodings (-16 .. 64) are always produced as sign-extended
3131 // 32-bit values
3132 // - float encodings are produced as:
3133 // - for F16 instructions: corresponding half-precision float values in
3134 // the LSBs, 0 in the MSBs
3135 // - for UI16 instructions: corresponding single-precision float value
3136 int32_t Signed = static_cast<int32_t>(Literal);
3137 if (Signed >= 0 && Signed <= 64)
3138 return 128 + Signed;
3139
3140 if (Signed >= -16 && Signed <= -1)
3141 return 192 + std::abs(Signed);
3142
3143 if (IsFloat) {
3144 // clang-format off
3145 switch (Literal) {
3146 case 0x3800: return 240; // 0.5
3147 case 0xB800: return 241; // -0.5
3148 case 0x3C00: return 242; // 1.0
3149 case 0xBC00: return 243; // -1.0
3150 case 0x4000: return 244; // 2.0
3151 case 0xC000: return 245; // -2.0
3152 case 0x4400: return 246; // 4.0
3153 case 0xC400: return 247; // -4.0
3154 case 0x3118: return 248; // 1.0 / (2.0 * pi)
3155 default: break;
3156 }
3157 // clang-format on
3158 } else {
3159 // clang-format off
3160 switch (Literal) {
3161 case 0x3F000000: return 240; // 0.5
3162 case 0xBF000000: return 241; // -0.5
3163 case 0x3F800000: return 242; // 1.0
3164 case 0xBF800000: return 243; // -1.0
3165 case 0x40000000: return 244; // 2.0
3166 case 0xC0000000: return 245; // -2.0
3167 case 0x40800000: return 246; // 4.0
3168 case 0xC0800000: return 247; // -4.0
3169 case 0x3E22F983: return 248; // 1.0 / (2.0 * pi)
3170 default: break;
3171 }
3172 // clang-format on
3173 }
3174
3175 return {};
3176}
3177
3178// Encoding of the literal as an inline constant for a V_PK_*_IU16 instruction
3179// or nullopt.
3180std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal) {
3181 return getInlineEncodingV216(false, Literal);
3182}
3183
3184// Encoding of the literal as an inline constant for a V_PK_*_BF16 instruction
3185// or nullopt.
3186std::optional<unsigned> getInlineEncodingV2BF16(uint32_t Literal) {
3187 int32_t Signed = static_cast<int32_t>(Literal);
3188 if (Signed >= 0 && Signed <= 64)
3189 return 128 + Signed;
3190
3191 if (Signed >= -16 && Signed <= -1)
3192 return 192 + std::abs(Signed);
3193
3194 // clang-format off
3195 switch (Literal) {
3196 case 0x3F00: return 240; // 0.5
3197 case 0xBF00: return 241; // -0.5
3198 case 0x3F80: return 242; // 1.0
3199 case 0xBF80: return 243; // -1.0
3200 case 0x4000: return 244; // 2.0
3201 case 0xC000: return 245; // -2.0
3202 case 0x4080: return 246; // 4.0
3203 case 0xC080: return 247; // -4.0
3204 case 0x3E22: return 248; // 1.0 / (2.0 * pi)
3205 default: break;
3206 }
3207 // clang-format on
3208
3209 return std::nullopt;
3210}
3211
3212// Encoding of the literal as an inline constant for a V_PK_*_F16 instruction
3213// or nullopt.
3214std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal) {
3215 return getInlineEncodingV216(true, Literal);
3216}
3217
3218// Encoding of the literal as an inline constant for V_PK_FMAC_F16 instruction
3219// or nullopt. This accounts for different inline constant behavior:
3220// - Pre-GFX11: fp16 inline constants have the value in low 16 bits, 0 in high
3221// - GFX11+: fp16 inline constants are duplicated into both halves
3223 bool IsGFX11Plus) {
3224 // Pre-GFX11 behavior: f16 in low bits, 0 in high bits
3225 if (!IsGFX11Plus)
3226 return getInlineEncodingV216(/*IsFloat=*/true, Literal);
3227
3228 // GFX11+ behavior: f16 duplicated in both halves
3229 // First, check for sign-extended integer inline constants (-16 to 64)
3230 // These work the same across all generations
3231 int32_t Signed = static_cast<int32_t>(Literal);
3232 if (Signed >= 0 && Signed <= 64)
3233 return 128 + Signed;
3234
3235 if (Signed >= -16 && Signed <= -1)
3236 return 192 + std::abs(Signed);
3237
3238 // For float inline constants on GFX11+, both halves must be equal
3239 uint16_t Lo = static_cast<uint16_t>(Literal);
3240 uint16_t Hi = static_cast<uint16_t>(Literal >> 16);
3241 if (Lo != Hi)
3242 return std::nullopt;
3243 return getInlineEncodingV216(/*IsFloat=*/true, Lo);
3244}
3245
3246// Whether the given literal can be inlined for a V_PK_* instruction.
3248 switch (OpType) {
3251 return getInlineEncodingV216(false, Literal).has_value();
3254 return getInlineEncodingV216(true, Literal).has_value();
3256 llvm_unreachable("OPERAND_REG_IMM_V2FP16_SPLAT is not supported");
3261 return false;
3262 default:
3263 llvm_unreachable("bad packed operand type");
3264 }
3265}
3266
3267// Whether the given literal can be inlined for a V_PK_*_IU16 instruction.
3271
3272// Whether the given literal can be inlined for a V_PK_*_BF16 instruction.
3276
3277// Whether the given literal can be inlined for a V_PK_*_F16 instruction.
3281
3282// Whether the given literal can be inlined for V_PK_FMAC_F16 instruction.
3284 return getPKFMACF16InlineEncoding(Literal, IsGFX11Plus).has_value();
3285}
3286
3287bool isValid32BitLiteral(uint64_t Val, bool IsFP64) {
3288 if (IsFP64)
3289 return !Lo_32(Val);
3290
3291 return isUInt<32>(Val) || isInt<32>(Val);
3292}
3293
3294int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit) {
3295 switch (Type) {
3296 default:
3297 break;
3302 return Imm & 0xffff;
3316 return Lo_32(Imm);
3318 return IsLit ? Imm : Hi_32(Imm);
3319 }
3320 return Imm;
3321}
3322
3324 const Function *F = A->getParent();
3325
3326 // Arguments to compute shaders are never a source of divergence.
3327 CallingConv::ID CC = F->getCallingConv();
3328 switch (CC) {
3331 return true;
3342 // For non-compute shaders, SGPR inputs are marked with either inreg or
3343 // byval. Everything else is in VGPRs.
3344 return A->hasAttribute(Attribute::InReg) ||
3345 A->hasAttribute(Attribute::ByVal);
3346 default:
3347 // TODO: treat i1 as divergent?
3348 return A->hasAttribute(Attribute::InReg);
3349 }
3350}
3351
3352bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo) {
3353 // Arguments to compute shaders are never a source of divergence.
3355 switch (CC) {
3358 return true;
3369 // For non-compute shaders, SGPR inputs are marked with either inreg or
3370 // byval. Everything else is in VGPRs.
3371 return CB->paramHasAttr(ArgNo, Attribute::InReg) ||
3372 CB->paramHasAttr(ArgNo, Attribute::ByVal);
3373 default:
3374 return CB->paramHasAttr(ArgNo, Attribute::InReg);
3375 }
3376}
3377
3378static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) {
3379 return isGCN3Encoding(ST) || isGFX10Plus(ST);
3380}
3381
3383 int64_t EncodedOffset) {
3384 if (isGFX12Plus(ST))
3385 return isUInt<23>(EncodedOffset);
3386
3387 return hasSMEMByteOffset(ST) ? isUInt<20>(EncodedOffset)
3388 : isUInt<8>(EncodedOffset);
3389}
3390
3392 int64_t EncodedOffset, bool IsBuffer) {
3393 if (isGFX12Plus(ST)) {
3394 if (IsBuffer && EncodedOffset < 0)
3395 return false;
3396 return isInt<24>(EncodedOffset);
3397 }
3398
3399 return !IsBuffer && hasSMRDSignedImmOffset(ST) && isInt<21>(EncodedOffset);
3400}
3401
3402static bool isDwordAligned(uint64_t ByteOffset) {
3403 return (ByteOffset & 3) == 0;
3404}
3405
3407 uint64_t ByteOffset) {
3408 if (hasSMEMByteOffset(ST))
3409 return ByteOffset;
3410
3411 assert(isDwordAligned(ByteOffset));
3412 return ByteOffset >> 2;
3413}
3414
3415std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
3416 int64_t ByteOffset, bool IsBuffer,
3417 bool HasSOffset) {
3418 // For unbuffered smem loads, it is illegal for the Immediate Offset to be
3419 // negative if the resulting (Offset + (M0 or SOffset or zero) is negative.
3420 // Handle case where SOffset is not present.
3421 if (!IsBuffer && !HasSOffset && ByteOffset < 0 && hasSMRDSignedImmOffset(ST))
3422 return std::nullopt;
3423
3424 if (isGFX12Plus(ST)) // 24 bit signed offsets
3425 return isInt<24>(ByteOffset) ? std::optional<int64_t>(ByteOffset)
3426 : std::nullopt;
3427
3428 // The signed version is always a byte offset.
3429 if (!IsBuffer && hasSMRDSignedImmOffset(ST)) {
3431 return isInt<20>(ByteOffset) ? std::optional<int64_t>(ByteOffset)
3432 : std::nullopt;
3433 }
3434
3435 if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST))
3436 return std::nullopt;
3437
3438 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
3439 return isLegalSMRDEncodedUnsignedOffset(ST, EncodedOffset)
3440 ? std::optional<int64_t>(EncodedOffset)
3441 : std::nullopt;
3442}
3443
3444std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
3445 int64_t ByteOffset) {
3446 if (!isCI(ST) || !isDwordAligned(ByteOffset))
3447 return std::nullopt;
3448
3449 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
3450 return isUInt<32>(EncodedOffset) ? std::optional<int64_t>(EncodedOffset)
3451 : std::nullopt;
3452}
3453
3455 if (ST.getFeatureBits().test(FeatureFlatOffsetBits12))
3456 return 12;
3457 if (ST.getFeatureBits().test(FeatureFlatOffsetBits24))
3458 return 24;
3459 return 13;
3460}
3461
3462namespace {
3463
3464struct SourceOfDivergence {
3465 unsigned Intr;
3466};
3467const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
3468
3469struct AlwaysUniform {
3470 unsigned Intr;
3471};
3472const AlwaysUniform *lookupAlwaysUniform(unsigned Intr);
3473
3474#define GET_SourcesOfDivergence_IMPL
3475#define GET_UniformIntrinsics_IMPL
3476#define GET_Gfx9BufferFormat_IMPL
3477#define GET_Gfx10BufferFormat_IMPL
3478#define GET_Gfx11PlusBufferFormat_IMPL
3479
3480#include "AMDGPUGenSearchableTables.inc"
3481
3482} // end anonymous namespace
3483
3484bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
3485 return lookupSourceOfDivergence(IntrID);
3486}
3487
3488bool isIntrinsicAlwaysUniform(unsigned IntrID) {
3489 return lookupAlwaysUniform(IntrID);
3490}
3491
3493 uint8_t NumComponents,
3494 uint8_t NumFormat,
3495 const MCSubtargetInfo &STI) {
3496 return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(
3497 BitsPerComp, NumComponents, NumFormat)
3498 : isGFX10(STI)
3499 ? getGfx10BufferFormatInfo(BitsPerComp, NumComponents, NumFormat)
3500 : getGfx9BufferFormatInfo(BitsPerComp, NumComponents, NumFormat);
3501}
3502
3504 const MCSubtargetInfo &STI) {
3505 return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(Format)
3506 : isGFX10(STI) ? getGfx10BufferFormatInfo(Format)
3507 : getGfx9BufferFormatInfo(Format);
3508}
3509
3511 const MCRegisterInfo &MRI) {
3512 const unsigned VGPRClasses[] = {
3513 AMDGPU::VGPR_16RegClassID, AMDGPU::VGPR_32RegClassID,
3514 AMDGPU::VReg_64RegClassID, AMDGPU::VReg_96RegClassID,
3515 AMDGPU::VReg_128RegClassID, AMDGPU::VReg_160RegClassID,
3516 AMDGPU::VReg_192RegClassID, AMDGPU::VReg_224RegClassID,
3517 AMDGPU::VReg_256RegClassID, AMDGPU::VReg_288RegClassID,
3518 AMDGPU::VReg_320RegClassID, AMDGPU::VReg_352RegClassID,
3519 AMDGPU::VReg_384RegClassID, AMDGPU::VReg_512RegClassID,
3520 AMDGPU::VReg_1024RegClassID};
3521
3522 for (unsigned RCID : VGPRClasses) {
3523 const MCRegisterClass &RC = MRI.getRegClass(RCID);
3524 if (RC.contains(Reg))
3525 return &RC;
3526 }
3527
3528 return nullptr;
3529}
3530
3532 unsigned Enc = MRI.getEncodingValue(Reg);
3533 unsigned Idx = Enc & AMDGPU::HWEncoding::REG_IDX_MASK;
3534 return Idx >> 8;
3535}
3536
3538 const MCRegisterInfo &MRI) {
3539 unsigned Enc = MRI.getEncodingValue(Reg);
3540 unsigned Idx = Enc & AMDGPU::HWEncoding::REG_IDX_MASK;
3541 if (Idx >= 0x100)
3542 return MCRegister();
3543
3544 const MCRegisterClass *RC = getVGPRPhysRegClass(Reg, MRI);
3545 if (!RC)
3546 return MCRegister();
3547
3548 Idx |= MSBs << 8;
3549 if (RC->getID() == AMDGPU::VGPR_16RegClassID) {
3550 // This class has 2048 registers with interleaved lo16 and hi16.
3551 Idx *= 2;
3553 ++Idx;
3554 }
3555
3556 return RC->getRegister(Idx);
3557}
3558
3559static std::optional<unsigned>
3560convertSetRegImmToVgprMSBs(unsigned Imm, unsigned Simm16,
3561 bool HasSetregVGPRMSBFixup) {
3562 constexpr unsigned VGPRMSBShift =
3564
3565 auto [HwRegId, Offset, Size] = Hwreg::HwregEncoding::decode(Simm16);
3566 if (HwRegId != Hwreg::ID_MODE ||
3567 (!HasSetregVGPRMSBFixup && (Offset + Size) < VGPRMSBShift))
3568 return {};
3569 // If there is SetregVGPRMSBFixup then Offset is ignored.
3570 if (!HasSetregVGPRMSBFixup)
3571 Imm <<= Offset;
3572 Imm = (Imm & Hwreg::VGPR_MSB_MASK) >> VGPRMSBShift;
3573 if (!HasSetregVGPRMSBFixup)
3575 return llvm::rotr<uint8_t>(static_cast<uint8_t>(Imm), /*R=*/2);
3576}
3577
3578std::optional<unsigned> convertSetRegImmToVgprMSBs(const MachineInstr &MI,
3579 bool HasSetregVGPRMSBFixup) {
3580 assert(MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32);
3581 return convertSetRegImmToVgprMSBs(MI.getOperand(0).getImm(),
3582 MI.getOperand(1).getImm(),
3583 HasSetregVGPRMSBFixup);
3584}
3585
3586std::optional<unsigned> convertSetRegImmToVgprMSBs(const MCInst &MI,
3587 bool HasSetregVGPRMSBFixup) {
3588 assert(MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32_gfx12);
3589 return convertSetRegImmToVgprMSBs(MI.getOperand(0).getImm(),
3590 MI.getOperand(1).getImm(),
3591 HasSetregVGPRMSBFixup);
3592}
3593
3594std::pair<const AMDGPU::OpName *, const AMDGPU::OpName *>
3596 static const AMDGPU::OpName VOPOps[4] = {
3597 AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2,
3598 AMDGPU::OpName::vdst};
3599 static const AMDGPU::OpName VDSOps[4] = {
3600 AMDGPU::OpName::addr, AMDGPU::OpName::data0, AMDGPU::OpName::data1,
3601 AMDGPU::OpName::vdst};
3602 static const AMDGPU::OpName FLATOps[4] = {
3603 AMDGPU::OpName::vaddr, AMDGPU::OpName::vdata,
3604 AMDGPU::OpName::NUM_OPERAND_NAMES, AMDGPU::OpName::vdst};
3605 static const AMDGPU::OpName BUFOps[4] = {
3606 AMDGPU::OpName::vaddr, AMDGPU::OpName::NUM_OPERAND_NAMES,
3607 AMDGPU::OpName::NUM_OPERAND_NAMES, AMDGPU::OpName::vdata};
3608 static const AMDGPU::OpName VIMGOps[4] = {
3609 AMDGPU::OpName::vaddr0, AMDGPU::OpName::vaddr1, AMDGPU::OpName::vaddr2,
3610 AMDGPU::OpName::vdata};
3611
3612 // For VOPD instructions MSB of a corresponding Y component operand VGPR
3613 // address is supposed to match X operand, otherwise VOPD shall not be
3614 // combined.
3615 static const AMDGPU::OpName VOPDOpsX[4] = {
3616 AMDGPU::OpName::src0X, AMDGPU::OpName::vsrc1X, AMDGPU::OpName::vsrc2X,
3617 AMDGPU::OpName::vdstX};
3618 static const AMDGPU::OpName VOPDOpsY[4] = {
3619 AMDGPU::OpName::src0Y, AMDGPU::OpName::vsrc1Y, AMDGPU::OpName::vsrc2Y,
3620 AMDGPU::OpName::vdstY};
3621
3622 // VOP2 MADMK instructions use src0, imm, src1 scheme.
3623 static const AMDGPU::OpName VOP2MADMKOps[4] = {
3624 AMDGPU::OpName::src0, AMDGPU::OpName::NUM_OPERAND_NAMES,
3625 AMDGPU::OpName::src1, AMDGPU::OpName::vdst};
3626 static const AMDGPU::OpName VOPDFMAMKOpsX[4] = {
3627 AMDGPU::OpName::src0X, AMDGPU::OpName::NUM_OPERAND_NAMES,
3628 AMDGPU::OpName::vsrc1X, AMDGPU::OpName::vdstX};
3629 static const AMDGPU::OpName VOPDFMAMKOpsY[4] = {
3630 AMDGPU::OpName::src0Y, AMDGPU::OpName::NUM_OPERAND_NAMES,
3631 AMDGPU::OpName::vsrc1Y, AMDGPU::OpName::vdstY};
3632
3633 unsigned TSFlags = Desc.TSFlags;
3634
3635 if (TSFlags &
3638 switch (Desc.getOpcode()) {
3639 // LD_SCALE operands ignore MSB.
3640 case AMDGPU::V_WMMA_LD_SCALE_PAIRED_B32:
3641 case AMDGPU::V_WMMA_LD_SCALE_PAIRED_B32_gfx1250:
3642 case AMDGPU::V_WMMA_LD_SCALE16_PAIRED_B64:
3643 case AMDGPU::V_WMMA_LD_SCALE16_PAIRED_B64_gfx1250:
3644 return {};
3645 case AMDGPU::V_FMAMK_F16:
3646 case AMDGPU::V_FMAMK_F16_t16:
3647 case AMDGPU::V_FMAMK_F16_t16_gfx12:
3648 case AMDGPU::V_FMAMK_F16_fake16:
3649 case AMDGPU::V_FMAMK_F16_fake16_gfx12:
3650 case AMDGPU::V_FMAMK_F32:
3651 case AMDGPU::V_FMAMK_F32_gfx12:
3652 case AMDGPU::V_FMAMK_F64:
3653 case AMDGPU::V_FMAMK_F64_gfx1250:
3654 return {VOP2MADMKOps, nullptr};
3655 default:
3656 break;
3657 }
3658 return {VOPOps, nullptr};
3659 }
3660
3661 if (TSFlags & SIInstrFlags::DS)
3662 return {VDSOps, nullptr};
3663
3664 if (TSFlags & SIInstrFlags::FLAT)
3665 return {FLATOps, nullptr};
3666
3667 if (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))
3668 return {BUFOps, nullptr};
3669
3670 if (TSFlags & SIInstrFlags::VIMAGE)
3671 return {VIMGOps, nullptr};
3672
3673 if (AMDGPU::isVOPD(Desc.getOpcode())) {
3674 auto [OpX, OpY] = getVOPDComponents(Desc.getOpcode());
3675 return {(OpX == AMDGPU::V_FMAMK_F32) ? VOPDFMAMKOpsX : VOPDOpsX,
3676 (OpY == AMDGPU::V_FMAMK_F32) ? VOPDFMAMKOpsY : VOPDOpsY};
3677 }
3678
3679 assert(!(TSFlags & SIInstrFlags::MIMG));
3680
3681 if (TSFlags & (SIInstrFlags::VSAMPLE | SIInstrFlags::EXP))
3682 llvm_unreachable("Sample and export VGPR lowering is not implemented and"
3683 " these instructions are not expected on gfx1250");
3684
3685 return {};
3686}
3687
3688bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode) {
3689 uint64_t TSFlags = MII.get(Opcode).TSFlags;
3690
3691 if (TSFlags & SIInstrFlags::SMRD)
3692 return !getSMEMIsBuffer(Opcode);
3693 if (!(TSFlags & SIInstrFlags::FLAT))
3694 return false;
3695
3696 // Only SV and SVS modes are supported.
3697 if (TSFlags & SIInstrFlags::FlatScratch)
3698 return hasNamedOperand(Opcode, OpName::vaddr);
3699
3700 // Only GVS mode is supported.
3701 return hasNamedOperand(Opcode, OpName::vaddr) &&
3702 hasNamedOperand(Opcode, OpName::saddr);
3703
3704 return false;
3705}
3706
3707bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc, const MCInstrInfo &MII,
3708 const MCSubtargetInfo &ST) {
3709 for (auto OpName : {OpName::vdst, OpName::src0, OpName::src1, OpName::src2}) {
3710 int Idx = getNamedOperandIdx(OpDesc.getOpcode(), OpName);
3711 if (Idx == -1)
3712 continue;
3713
3714 const MCOperandInfo &OpInfo = OpDesc.operands()[Idx];
3715 int16_t RegClass = MII.getOpRegClassID(
3716 OpInfo, ST.getHwMode(MCSubtargetInfo::HwMode_RegInfo));
3717 if (RegClass == AMDGPU::VReg_64RegClassID ||
3718 RegClass == AMDGPU::VReg_64_Align2RegClassID)
3719 return true;
3720 }
3721
3722 return false;
3723}
3724
3725bool isDPALU_DPP32BitOpc(unsigned Opc) {
3726 switch (Opc) {
3727 case AMDGPU::V_MUL_LO_U32_e64:
3728 case AMDGPU::V_MUL_LO_U32_e64_dpp:
3729 case AMDGPU::V_MUL_LO_U32_e64_dpp_gfx1250:
3730 case AMDGPU::V_MUL_HI_U32_e64:
3731 case AMDGPU::V_MUL_HI_U32_e64_dpp:
3732 case AMDGPU::V_MUL_HI_U32_e64_dpp_gfx1250:
3733 case AMDGPU::V_MUL_HI_I32_e64:
3734 case AMDGPU::V_MUL_HI_I32_e64_dpp:
3735 case AMDGPU::V_MUL_HI_I32_e64_dpp_gfx1250:
3736 case AMDGPU::V_MAD_U32_e64:
3737 case AMDGPU::V_MAD_U32_e64_dpp:
3738 case AMDGPU::V_MAD_U32_e64_dpp_gfx1250:
3739 return true;
3740 default:
3741 return false;
3742 }
3743}
3744
3745bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII,
3746 const MCSubtargetInfo &ST) {
3747 if (!ST.hasFeature(AMDGPU::FeatureDPALU_DPP))
3748 return false;
3749
3750 if (isDPALU_DPP32BitOpc(OpDesc.getOpcode()))
3751 return ST.hasFeature(AMDGPU::FeatureGFX1250Insts);
3752
3753 return hasAny64BitVGPROperands(OpDesc, MII, ST);
3754}
3755
3757 if (ST.getFeatureBits().test(FeatureAddressableLocalMemorySize32768))
3758 return 64;
3759 if (ST.getFeatureBits().test(FeatureAddressableLocalMemorySize65536))
3760 return 128;
3761 if (ST.getFeatureBits().test(FeatureAddressableLocalMemorySize163840))
3762 return 320;
3763 if (ST.getFeatureBits().test(FeatureAddressableLocalMemorySize327680))
3764 return 512;
3765 return 64; // In sync with getAddressableLocalMemorySize
3766}
3767
3768bool isPackedFP32Inst(unsigned Opc) {
3769 switch (Opc) {
3770 case AMDGPU::V_PK_ADD_F32:
3771 case AMDGPU::V_PK_ADD_F32_gfx12:
3772 case AMDGPU::V_PK_MUL_F32:
3773 case AMDGPU::V_PK_MUL_F32_gfx12:
3774 case AMDGPU::V_PK_FMA_F32:
3775 case AMDGPU::V_PK_FMA_F32_gfx12:
3776 return true;
3777 default:
3778 return false;
3779 }
3780}
3781
3782const std::array<unsigned, 3> &ClusterDimsAttr::getDims() const {
3783 assert(isFixedDims() && "expect kind to be FixedDims");
3784 return Dims;
3785}
3786
3787std::string ClusterDimsAttr::to_string() const {
3788 SmallString<10> Buffer;
3789 raw_svector_ostream OS(Buffer);
3790
3791 switch (getKind()) {
3792 case Kind::Unknown:
3793 return "";
3794 case Kind::NoCluster: {
3795 OS << EncoNoCluster << ',' << EncoNoCluster << ',' << EncoNoCluster;
3796 return Buffer.c_str();
3797 }
3798 case Kind::VariableDims: {
3799 OS << EncoVariableDims << ',' << EncoVariableDims << ','
3800 << EncoVariableDims;
3801 return Buffer.c_str();
3802 }
3803 case Kind::FixedDims: {
3804 OS << Dims[0] << ',' << Dims[1] << ',' << Dims[2];
3805 return Buffer.c_str();
3806 }
3807 }
3808 llvm_unreachable("Unknown ClusterDimsAttr kind");
3809}
3810
3812 std::optional<SmallVector<unsigned>> Attr =
3813 getIntegerVecAttribute(F, "amdgpu-cluster-dims", /*Size=*/3);
3815
3816 if (!Attr.has_value())
3817 AttrKind = Kind::Unknown;
3818 else if (all_of(*Attr, equal_to(EncoNoCluster)))
3819 AttrKind = Kind::NoCluster;
3820 else if (all_of(*Attr, equal_to(EncoVariableDims)))
3821 AttrKind = Kind::VariableDims;
3822
3823 ClusterDimsAttr A(AttrKind);
3824 if (AttrKind == Kind::FixedDims)
3825 A.Dims = {(*Attr)[0], (*Attr)[1], (*Attr)[2]};
3826
3827 return A;
3828}
3829
3830} // namespace AMDGPU
3831
3834 switch (S) {
3836 OS << "Unsupported";
3837 break;
3839 OS << "Any";
3840 break;
3842 OS << "Off";
3843 break;
3845 OS << "On";
3846 break;
3847 }
3848 return OS;
3849}
3850
3851} // namespace llvm
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static llvm::cl::opt< unsigned > DefaultAMDHSACodeObjectVersion("amdhsa-code-object-version", llvm::cl::Hidden, llvm::cl::init(llvm::AMDGPU::AMDHSA_COV6), llvm::cl::desc("Set default AMDHSA Code Object Version (module flag " "or asm directive still take priority if present)"))
#define MAP_REG2REG
Provides AMDGPU specific target descriptions.
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
IRTranslator LLVM IR MI
#define RegName(no)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
This file contains the declarations for metadata subclasses.
#define T
uint64_t High
if(PassOpts->AAPipeline)
#define S_00B848_MEM_ORDERED(x)
Definition SIDefines.h:1248
#define S_00B848_WGP_MODE(x)
Definition SIDefines.h:1245
#define S_00B848_FWD_PROGRESS(x)
Definition SIDefines.h:1251
This file contains some functions that are useful when dealing with strings.
static const int BlockSize
Definition TarWriter.cpp:33
static const uint32_t IV[8]
Definition blake3_impl.h:83
static ClusterDimsAttr get(const Function &F)
const std::array< unsigned, 3 > & getDims() const
TargetIDSetting getXnackSetting() const
void print(raw_ostream &OS) const
Write string representation to OS.
AMDGPUTargetID(const MCSubtargetInfo &STI)
void setTargetIDFromTargetIDStream(StringRef TargetID)
TargetIDSetting getSramEccSetting() const
unsigned getIndexInParsedOperands(unsigned CompOprIdx) const
unsigned getIndexOfSrcInParsedOperands(unsigned CompSrcIdx) const
std::optional< unsigned > getInvalidCompOperandIndex(std::function< MCRegister(unsigned, unsigned)> GetRegIdx, const MCRegisterInfo &MRI, bool SkipSrc=false, bool AllowSameVGPR=false, bool VOPD3=false) const
std::array< MCRegister, Component::MAX_OPR_NUM > RegIndices
Represents the counter values to wait for in an s_waitcnt instruction.
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
Functions, function parameters, and return types can have attributes to indicate how they should be t...
Definition Attributes.h:105
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
CallingConv::ID getCallingConv() const
LLVM_ABI bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
constexpr bool test(unsigned I) const
unsigned getAddressSpace() const
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
bool mayStore() const
Return true if this instruction could possibly modify memory.
bool mayLoad() const
Return true if this instruction could possibly read memory.
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
unsigned getOpcode() const
Return the opcode number for this descriptor.
Interface to description of machine instruction set.
Definition MCInstrInfo.h:27
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
int16_t getOpRegClassID(const MCOperandInfo &OpInfo, unsigned HwModeId) const
Return the ID of the register class to use for OpInfo, for the active HwMode HwModeId.
Definition MCInstrInfo.h:80
This holds information about one operand of a machine instruction, indicating the register class for ...
Definition MCInstrDesc.h:86
MCRegisterClass - Base class of TargetRegisterClass.
unsigned getID() const
getID() - Return the register class ID number.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
bool regsOverlap(MCRegister RegA, MCRegister RegB) const
Returns true if the two registers are equal or alias each other.
uint16_t getEncodingValue(MCRegister Reg) const
Returns the encoding for Reg.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
constexpr unsigned id() const
Definition MCRegister.h:82
Generic base class for all target subtargets.
bool hasFeature(unsigned Feature) const
const Triple & getTargetTriple() const
const FeatureBitset & getFeatureBits() const
StringRef getCPU() const
Metadata node.
Definition Metadata.h:1080
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1444
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1450
Representation of each machine instruction.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
const char * c_str()
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:882
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:730
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition StringRef.h:490
std::string str() const
str - Get the contents as an std::string.
Definition StringRef.h:222
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:140
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:143
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition StringRef.h:270
Manages the enabling and disabling of subtarget specific features.
const std::vector< std::string > & getFeatures() const
Returns the vector of individual subtarget features.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
LLVM_ABI StringRef getVendorName() const
Get the vendor (second) component of the triple.
Definition Triple.cpp:1430
LLVM_ABI StringRef getOSName() const
Get the operating system (third) component of the triple.
Definition Triple.cpp:1435
OSType getOS() const
Get the parsed operating system type of this triple.
Definition Triple.h:436
ArchType getArch() const
Get the parsed architecture type of this triple.
Definition Triple.h:427
LLVM_ABI StringRef getEnvironmentName() const
Get the optional environment (fourth) component of the triple, or "" if empty.
Definition Triple.cpp:1441
bool isAMDGCN() const
Tests whether the target is AMDGCN.
Definition Triple.h:954
LLVM_ABI StringRef getArchName() const
Get the architecture (first) component of the triple.
Definition Triple.cpp:1426
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an std::string.
A raw_ostream that writes to an SmallVector or SmallString.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
unsigned decodeFieldVaVcc(unsigned Encoded)
unsigned encodeFieldVaVcc(unsigned Encoded, unsigned VaVcc)
unsigned decodeFieldHoldCnt(unsigned Encoded, const IsaVersion &Version)
bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val, bool &IsDefault, const MCSubtargetInfo &STI)
unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt, const IsaVersion &Version)
unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc)
unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst)
unsigned decodeFieldSaSdst(unsigned Encoded)
unsigned getHoldCntBitMask(const IsaVersion &Version)
unsigned decodeFieldVaSdst(unsigned Encoded)
unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc)
unsigned decodeFieldVaSsrc(unsigned Encoded)
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
const CustomOperandVal DepCtrInfo[]
bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal, const MCSubtargetInfo &STI)
unsigned decodeFieldVaVdst(unsigned Encoded)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
unsigned decodeFieldVmVsrc(unsigned Encoded)
unsigned encodeFieldVaSdst(unsigned Encoded, unsigned VaSdst)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
static constexpr ExpTgt ExpTgtInfo[]
bool getTgtName(unsigned Id, StringRef &Name, int &Index)
unsigned getTgtId(const StringRef Name)
constexpr uint32_t VersionMinor
HSA metadata minor version.
constexpr uint32_t VersionMajor
HSA metadata major version.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
unsigned getArchVGPRAllocGranule()
For subtargets with a unified VGPR file and mixed ArchVGPR/AGPR usage, returns the allocation granule...
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getWavefrontSize(const MCSubtargetInfo *STI)
unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI, unsigned NumVGPRs, unsigned DynamicVGPRBlockSize)
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)
unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getEUsPerCU(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
static TargetIDSetting getTargetIDSettingFromFeatureString(StringRef FeatureString)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs)
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, unsigned DynamicVGPRBlockSize)
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, unsigned DynamicVGPRBlockSize)
unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, std::optional< bool > EnableWavefrontSize32)
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves, AMDGPUSubtarget::Generation Gen)
static unsigned getGranulatedNumRegisterBlocks(unsigned NumRegs, unsigned Granule)
unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI)
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize)
StringLiteral const UfmtSymbolicGFX11[]
bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI)
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI)
unsigned const DfmtNfmt2UFmtGFX10[]
StringLiteral const DfmtSymbolic[]
static StringLiteral const * getNfmtLookupTable(const MCSubtargetInfo &STI)
bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI)
StringLiteral const NfmtSymbolicGFX10[]
bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI)
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
StringRef getDfmtName(unsigned Id)
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI)
unsigned const DfmtNfmt2UFmtGFX11[]
StringLiteral const NfmtSymbolicVI[]
StringLiteral const NfmtSymbolicSICI[]
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
int64_t getDfmt(const StringRef Name)
StringLiteral const UfmtSymbolicGFX10[]
void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt)
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId, uint16_t &StreamId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgDoesNotUseM0(int64_t MsgId, const MCSubtargetInfo &STI)
Returns true if the message does not use the m0 operand.
StringRef getMsgOpName(int64_t MsgId, uint64_t Encoding, const MCSubtargetInfo &STI)
Map from an encoding to the symbolic name for a sendmsg operation.
static uint64_t getMsgIdMask(const MCSubtargetInfo &STI)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
constexpr unsigned VOPD_VGPR_BANK_MASKS[]
constexpr unsigned COMPONENTS_NUM
constexpr unsigned VOPD3_VGPR_BANK_MASKS[]
bool isPackedFP32Inst(unsigned Opc)
bool isGCN3Encoding(const MCSubtargetInfo &STI)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
bool isInlineValue(MCRegister Reg)
bool isGFX10_GFX11(const MCSubtargetInfo &STI)
bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType)
bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset)
Convert ByteOffset to dwords if the subtarget uses dword SMRD immediate offsets.
static unsigned encodeStorecnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Storecnt)
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
static bool hasSMEMByteOffset(const MCSubtargetInfo &ST)
bool isVOPCAsmOnly(unsigned Opc)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool getMTBUFHasSrsrc(unsigned Opc)
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool getWMMAIsXDL(unsigned Opc)
static std::optional< unsigned > convertSetRegImmToVgprMSBs(unsigned Imm, unsigned Simm16, bool HasSetregVGPRMSBFixup)
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
static bool isSymbolicCustomOperandEncoding(const CustomOperandVal *Opr, int Size, unsigned Code, bool &HasNonDefaultVal, const MCSubtargetInfo &STI)
bool isGFX10Before1030(const MCSubtargetInfo &STI)
bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo)
Does this operand support only inlinable literals?
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
const int OPR_ID_UNSUPPORTED
bool shouldEmitConstantsToTextSection(const Triple &TT)
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isDPMACCInstruction(unsigned Opc)
int getMTBUFElements(unsigned Opc)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
static int encodeCustomOperandVal(const CustomOperandVal &Op, int64_t InputVal)
unsigned getTemporalHintType(const MCInstrDesc TID)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
std::optional< unsigned > getInlineEncodingV216(bool IsFloat, uint32_t Literal)
FPType getFPDstSelType(unsigned Opc)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
const MCRegisterClass * getVGPRPhysRegClass(MCRegister Reg, const MCRegisterInfo &MRI)
unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded)
bool hasPackedD16(const MCSubtargetInfo &STI)
unsigned getStorecntBitMask(const IsaVersion &Version)
unsigned getLdsDwGranularity(const MCSubtargetInfo &ST)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
bool isGFX10_3_GFX11(const MCSubtargetInfo &STI)
bool isGFX13(const MCSubtargetInfo &STI)
unsigned getAsynccntBitMask(const IsaVersion &Version)
bool hasValueInRangeLikeMetadata(const MDNode &MD, int64_t Val)
Checks if Val is inside MD, a !range-like metadata.
uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal)
unsigned getVOPDOpcode(unsigned Opc, bool VOPD3)
bool isGroupSegment(const GlobalValue *GV)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
bool getMTBUFHasSoffset(unsigned Opc)
bool hasXNACK(const MCSubtargetInfo &STI)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
static unsigned getCombinedCountBitMask(const IsaVersion &Version, bool IsStore)
CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)
bool isVOPC64DPP(unsigned Opc)
int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements)
bool getMAIIsGFX940XDL(unsigned Opc)
bool isSI(const MCSubtargetInfo &STI)
unsigned getDefaultAMDHSACodeObjectVersion()
bool isReadOnlySegment(const GlobalValue *GV)
Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded)
bool isArgPassedInSGPR(const Argument *A)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
int getMUBUFBaseOpcode(unsigned Opc)
unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded)
unsigned getAMDHSACodeObjectVersion(const Module &M)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool getVOP3IsSingle(unsigned Opc)
bool isGFX9(const MCSubtargetInfo &STI)
bool isDPALU_DPP32BitOpc(unsigned Opc)
bool getVOP1IsSingle(unsigned Opc)
static bool isDwordAligned(uint64_t ByteOffset)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool getHasColorExport(const Function &F)
int getMTBUFBaseOpcode(unsigned Opc)
bool isGFX90A(const MCSubtargetInfo &STI)
unsigned getSamplecntBitMask(const IsaVersion &Version)
unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion)
std::tuple< char, unsigned, unsigned > parseAsmPhysRegName(StringRef RegName)
Returns a valid charcode or 0 in the first entry if this is a valid physical register name.
bool hasSRAMECC(const MCSubtargetInfo &STI)
bool getHasDepthExport(const Function &F)
bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI)
bool getMUBUFHasVAddr(unsigned Opc)
bool isTrue16Inst(unsigned Opc)
unsigned getVGPREncodingMSBs(MCRegister Reg, const MCRegisterInfo &MRI)
std::pair< unsigned, unsigned > getVOPDComponents(unsigned VOPDOpcode)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned getInitialPSInputAddr(const Function &F)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool isAsyncStore(unsigned Opc)
unsigned getDynamicVGPRBlockSize(const Function &F)
unsigned getKmcntBitMask(const IsaVersion &Version)
MCRegister getVGPRWithMSBs(MCRegister Reg, unsigned MSBs, const MCRegisterInfo &MRI)
If Reg is a low VGPR return a corresponding high VGPR with MSBs set.
unsigned getVmcntBitMask(const IsaVersion &Version)
bool isNotGFX10Plus(const MCSubtargetInfo &STI)
bool hasMAIInsts(const MCSubtargetInfo &STI)
unsigned getBitOp2(unsigned Opc)
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
unsigned getXcntBitMask(const IsaVersion &Version)
bool isGenericAtomic(unsigned Opc)
const MFMA_F8F6F4_Info * getWMMA_F8F6F4_WithFormatArgs(unsigned FmtA, unsigned FmtB, unsigned F8F8Opcode)
bool isGFX8Plus(const MCSubtargetInfo &STI)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
unsigned getLgkmcntBitMask(const IsaVersion &Version)
bool getMUBUFTfe(unsigned Opc)
unsigned getBvhcntBitMask(const IsaVersion &Version)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasGFX10_3Insts(const MCSubtargetInfo &STI)
unsigned decodeDscnt(const IsaVersion &Version, unsigned Waitcnt)
std::pair< const AMDGPU::OpName *, const AMDGPU::OpName * > getVGPRLoweringOperandTables(const MCInstrDesc &Desc)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements)
bool isGFX13Plus(const MCSubtargetInfo &STI)
unsigned getExpcntBitMask(const IsaVersion &Version)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
int32_t getMCOpcode(uint32_t Opcode, unsigned Gen)
bool getMUBUFHasSoffset(unsigned Opc)
bool isNotGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
std::optional< unsigned > getInlineEncodingV2F16(uint32_t Literal)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
std::tuple< char, unsigned, unsigned > parseAsmConstraintPhysReg(StringRef Constraint)
Returns a valid charcode or 0 in the first entry if this is a valid physical register constraint.
unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion)
static unsigned getDefaultCustomOperandEncoding(const CustomOperandVal *Opr, int Size, const MCSubtargetInfo &STI)
static unsigned encodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Loadcnt)
bool isGFX10Plus(const MCSubtargetInfo &STI)
static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size, unsigned Code, int &Idx, StringRef &Name, unsigned &Val, bool &IsDefault, const MCSubtargetInfo &STI)
static bool isValidRegPrefix(char C)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
bool isGlobalSegment(const GlobalValue *GV)
int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition SIDefines.h:234
@ OPERAND_REG_INLINE_C_LAST
Definition SIDefines.h:257
@ OPERAND_REG_IMM_V2FP16
Definition SIDefines.h:211
@ OPERAND_REG_INLINE_C_FP64
Definition SIDefines.h:225
@ OPERAND_REG_INLINE_C_BF16
Definition SIDefines.h:222
@ OPERAND_REG_INLINE_C_V2BF16
Definition SIDefines.h:227
@ OPERAND_REG_IMM_V2INT16
Definition SIDefines.h:213
@ OPERAND_REG_IMM_BF16
Definition SIDefines.h:208
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
Definition SIDefines.h:203
@ OPERAND_REG_IMM_V2BF16
Definition SIDefines.h:210
@ OPERAND_REG_INLINE_AC_FIRST
Definition SIDefines.h:259
@ OPERAND_REG_IMM_FP16
Definition SIDefines.h:209
@ OPERAND_REG_IMM_V2FP16_SPLAT
Definition SIDefines.h:212
@ OPERAND_REG_IMM_NOINLINE_V2FP16
Definition SIDefines.h:214
@ OPERAND_REG_IMM_FP64
Definition SIDefines.h:207
@ OPERAND_REG_INLINE_C_V2FP16
Definition SIDefines.h:228
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
Definition SIDefines.h:239
@ OPERAND_REG_INLINE_AC_FP32
Definition SIDefines.h:240
@ OPERAND_REG_IMM_V2INT32
Definition SIDefines.h:215
@ OPERAND_REG_IMM_FP32
Definition SIDefines.h:206
@ OPERAND_REG_INLINE_C_FIRST
Definition SIDefines.h:256
@ OPERAND_REG_INLINE_C_FP32
Definition SIDefines.h:224
@ OPERAND_REG_INLINE_AC_LAST
Definition SIDefines.h:260
@ OPERAND_REG_INLINE_C_INT32
Definition SIDefines.h:220
@ OPERAND_REG_INLINE_C_V2INT16
Definition SIDefines.h:226
@ OPERAND_REG_IMM_V2FP32
Definition SIDefines.h:216
@ OPERAND_REG_INLINE_AC_FP64
Definition SIDefines.h:241
@ OPERAND_REG_INLINE_C_FP16
Definition SIDefines.h:223
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
Definition SIDefines.h:231
std::optional< unsigned > getPKFMACF16InlineEncoding(uint32_t Literal, bool IsGFX11Plus)
void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &KernelCode, const MCSubtargetInfo *STI)
bool isNotGFX9Plus(const MCSubtargetInfo &STI)
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc)
std::optional< unsigned > getInlineEncodingV2I16(uint32_t Literal)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
unsigned encodeStorecntDscnt(const IsaVersion &Version, const Waitcnt &Decoded)
bool isGFX1250(const MCSubtargetInfo &STI)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
bool isTensorStore(unsigned Opc)
bool getMUBUFIsBufferInv(unsigned Opc)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
MCRegister mc2PseudoReg(MCRegister Reg)
Convert hardware register Reg to a pseudo register.
std::optional< unsigned > getInlineEncodingV2BF16(uint32_t Literal)
static int encodeCustomOperand(const CustomOperandVal *Opr, int Size, const StringRef Name, int64_t InputVal, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool supportsWGP(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
bool getVOP2IsSingle(unsigned Opc)
bool getMAIIsDGEMM(unsigned Opc)
Returns true if MAI operation is a double precision GEMM.
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const int OPR_ID_UNKNOWN
unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion)
SmallVector< unsigned > getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size, unsigned DefaultVal)
unsigned decodeStorecnt(const IsaVersion &Version, unsigned Waitcnt)
bool isGFX1250Plus(const MCSubtargetInfo &STI)
int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels)
bool isNotGFX12Plus(const MCSubtargetInfo &STI)
bool getMTBUFHasVAddr(unsigned Opc)
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion)
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
unsigned getLoadcntBitMask(const IsaVersion &Version)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily, bool VOPD3)
static unsigned encodeDscnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Dscnt)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
const MFMA_F8F6F4_Info * getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ, unsigned BLGP, unsigned F8F8Opcode)
unsigned decodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion)
bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI)
bool isGFX9_GFX10(const MCSubtargetInfo &STI)
int getMUBUFElements(unsigned Opc)
const GcnBufferFormatInfo * getGcnBufferFormatInfo(uint8_t BitsPerComp, uint8_t NumComponents, uint8_t NumFormat, const MCSubtargetInfo &STI)
unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc)
bool isPermlane16(unsigned Opc)
bool getMUBUFHasSrsrc(unsigned Opc)
unsigned getDscntBitMask(const IsaVersion &Version)
bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_Gfx
Used for AMD graphics targets.
@ AMDGPU_CS_ChainPreserve
Used on AMDGPUs to give the middle-end more control over argument placement.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_CS_Chain
Used on AMDGPUs to give the middle-end more control over argument placement.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ ELFABIVERSION_AMDGPU_HSA_V4
Definition ELF.h:384
@ ELFABIVERSION_AMDGPU_HSA_V5
Definition ELF.h:385
@ ELFABIVERSION_AMDGPU_HSA_V6
Definition ELF.h:386
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract_or_null(Y &&MD)
Extract a Value from Metadata, allowing null.
Definition Metadata.h:683
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:668
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
@ Offset
Definition DWP.cpp:532
constexpr T rotr(T V, int R)
Definition bit.h:397
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition Error.h:198
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:546
std::string utostr(uint64_t X, bool isNeg=false)
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
Definition STLExtras.h:2173
Op::Description Desc
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:334
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:150
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:155
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
To bit_cast(const From &from) noexcept
Definition bit.h:90
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
constexpr int countr_zero_constexpr(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:188
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
@ AlwaysUniform
The result value is always uniform.
Definition Uniformity.h:23
@ Default
The result value is uniform if and only if all operands are uniform.
Definition Uniformity.h:20
#define N
AMD Kernel Code Object (amd_kernel_code_t).
static std::tuple< typename Fields::ValueType... > decode(uint64_t Encoded)
Instruction set architecture version.