LLVM 19.0.0git
AMDGPUBaseInfo.cpp
Go to the documentation of this file.
1//===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDGPUBaseInfo.h"
10#include "AMDGPU.h"
11#include "AMDGPUAsmUtils.h"
12#include "AMDKernelCodeT.h"
16#include "llvm/IR/Attributes.h"
17#include "llvm/IR/Constants.h"
18#include "llvm/IR/Function.h"
19#include "llvm/IR/GlobalValue.h"
20#include "llvm/IR/IntrinsicsAMDGPU.h"
21#include "llvm/IR/IntrinsicsR600.h"
22#include "llvm/IR/LLVMContext.h"
23#include "llvm/MC/MCInstrInfo.h"
29#include <optional>
30
31#define GET_INSTRINFO_NAMED_OPS
32#define GET_INSTRMAP_INFO
33#include "AMDGPUGenInstrInfo.inc"
34
36 "amdhsa-code-object-version", llvm::cl::Hidden,
38 llvm::cl::desc("Set default AMDHSA Code Object Version (module flag "
39 "or asm directive still take priority if present)"));
40
41namespace {
42
43/// \returns Bit mask for given bit \p Shift and bit \p Width.
44unsigned getBitMask(unsigned Shift, unsigned Width) {
45 return ((1 << Width) - 1) << Shift;
46}
47
48/// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
49///
50/// \returns Packed \p Dst.
51unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
52 unsigned Mask = getBitMask(Shift, Width);
53 return ((Src << Shift) & Mask) | (Dst & ~Mask);
54}
55
56/// Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
57///
58/// \returns Unpacked bits.
59unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
60 return (Src & getBitMask(Shift, Width)) >> Shift;
61}
62
63/// \returns Vmcnt bit shift (lower bits).
64unsigned getVmcntBitShiftLo(unsigned VersionMajor) {
65 return VersionMajor >= 11 ? 10 : 0;
66}
67
68/// \returns Vmcnt bit width (lower bits).
69unsigned getVmcntBitWidthLo(unsigned VersionMajor) {
70 return VersionMajor >= 11 ? 6 : 4;
71}
72
73/// \returns Expcnt bit shift.
74unsigned getExpcntBitShift(unsigned VersionMajor) {
75 return VersionMajor >= 11 ? 0 : 4;
76}
77
78/// \returns Expcnt bit width.
79unsigned getExpcntBitWidth(unsigned VersionMajor) { return 3; }
80
81/// \returns Lgkmcnt bit shift.
82unsigned getLgkmcntBitShift(unsigned VersionMajor) {
83 return VersionMajor >= 11 ? 4 : 8;
84}
85
86/// \returns Lgkmcnt bit width.
87unsigned getLgkmcntBitWidth(unsigned VersionMajor) {
88 return VersionMajor >= 10 ? 6 : 4;
89}
90
91/// \returns Vmcnt bit shift (higher bits).
92unsigned getVmcntBitShiftHi(unsigned VersionMajor) { return 14; }
93
94/// \returns Vmcnt bit width (higher bits).
95unsigned getVmcntBitWidthHi(unsigned VersionMajor) {
96 return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0;
97}
98
99/// \returns Loadcnt bit width
100unsigned getLoadcntBitWidth(unsigned VersionMajor) {
101 return VersionMajor >= 12 ? 6 : 0;
102}
103
104/// \returns Samplecnt bit width.
105unsigned getSamplecntBitWidth(unsigned VersionMajor) {
106 return VersionMajor >= 12 ? 6 : 0;
107}
108
109/// \returns Bvhcnt bit width.
110unsigned getBvhcntBitWidth(unsigned VersionMajor) {
111 return VersionMajor >= 12 ? 3 : 0;
112}
113
114/// \returns Dscnt bit width.
115unsigned getDscntBitWidth(unsigned VersionMajor) {
116 return VersionMajor >= 12 ? 6 : 0;
117}
118
119/// \returns Dscnt bit shift in combined S_WAIT instructions.
120unsigned getDscntBitShift(unsigned VersionMajor) { return 0; }
121
122/// \returns Storecnt or Vscnt bit width, depending on VersionMajor.
123unsigned getStorecntBitWidth(unsigned VersionMajor) {
124 return VersionMajor >= 10 ? 6 : 0;
125}
126
127/// \returns Kmcnt bit width.
128unsigned getKmcntBitWidth(unsigned VersionMajor) {
129 return VersionMajor >= 12 ? 5 : 0;
130}
131
132/// \returns shift for Loadcnt/Storecnt in combined S_WAIT instructions.
133unsigned getLoadcntStorecntBitShift(unsigned VersionMajor) {
134 return VersionMajor >= 12 ? 8 : 0;
135}
136
137/// \returns VmVsrc bit width
138inline unsigned getVmVsrcBitWidth() { return 3; }
139
140/// \returns VmVsrc bit shift
141inline unsigned getVmVsrcBitShift() { return 2; }
142
143/// \returns VaVdst bit width
144inline unsigned getVaVdstBitWidth() { return 4; }
145
146/// \returns VaVdst bit shift
147inline unsigned getVaVdstBitShift() { return 12; }
148
149/// \returns SaSdst bit width
150inline unsigned getSaSdstBitWidth() { return 1; }
151
152/// \returns SaSdst bit shift
153inline unsigned getSaSdstBitShift() { return 0; }
154
155} // end namespace anonymous
156
157namespace llvm {
158
159namespace AMDGPU {
160
161/// \returns True if \p STI is AMDHSA.
162bool isHsaAbi(const MCSubtargetInfo &STI) {
163 return STI.getTargetTriple().getOS() == Triple::AMDHSA;
164}
165
167 if (auto Ver = mdconst::extract_or_null<ConstantInt>(
168 M.getModuleFlag("amdhsa_code_object_version"))) {
169 return (unsigned)Ver->getZExtValue() / 100;
170 }
171
173}
174
177}
178
179unsigned getAMDHSACodeObjectVersion(unsigned ABIVersion) {
180 switch (ABIVersion) {
182 return 4;
184 return 5;
185 default:
187 }
188}
189
190uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion) {
191 if (T.getOS() != Triple::AMDHSA)
192 return 0;
193
194 switch (CodeObjectVersion) {
195 case 4:
197 case 5:
199 case 6:
201 default:
202 report_fatal_error("Unsupported AMDHSA Code Object Version " +
203 Twine(CodeObjectVersion));
204 }
205}
206
207unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) {
208 switch (CodeObjectVersion) {
209 case AMDHSA_COV4:
210 return 48;
211 case AMDHSA_COV5:
212 case AMDHSA_COV6:
213 default:
215 }
216}
217
218
219// FIXME: All such magic numbers about the ABI should be in a
220// central TD file.
221unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) {
222 switch (CodeObjectVersion) {
223 case AMDHSA_COV4:
224 return 24;
225 case AMDHSA_COV5:
226 case AMDHSA_COV6:
227 default:
229 }
230}
231
232unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) {
233 switch (CodeObjectVersion) {
234 case AMDHSA_COV4:
235 return 32;
236 case AMDHSA_COV5:
237 case AMDHSA_COV6:
238 default:
240 }
241}
242
243unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion) {
244 switch (CodeObjectVersion) {
245 case AMDHSA_COV4:
246 return 40;
247 case AMDHSA_COV5:
248 case AMDHSA_COV6:
249 default:
251 }
252}
253
254#define GET_MIMGBaseOpcodesTable_IMPL
255#define GET_MIMGDimInfoTable_IMPL
256#define GET_MIMGInfoTable_IMPL
257#define GET_MIMGLZMappingTable_IMPL
258#define GET_MIMGMIPMappingTable_IMPL
259#define GET_MIMGBiasMappingTable_IMPL
260#define GET_MIMGOffsetMappingTable_IMPL
261#define GET_MIMGG16MappingTable_IMPL
262#define GET_MAIInstInfoTable_IMPL
263#include "AMDGPUGenSearchableTables.inc"
264
265int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
266 unsigned VDataDwords, unsigned VAddrDwords) {
267 const MIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding,
268 VDataDwords, VAddrDwords);
269 return Info ? Info->Opcode : -1;
270}
271
273 const MIMGInfo *Info = getMIMGInfo(Opc);
274 return Info ? getMIMGBaseOpcodeInfo(Info->BaseOpcode) : nullptr;
275}
276
277int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
278 const MIMGInfo *OrigInfo = getMIMGInfo(Opc);
279 const MIMGInfo *NewInfo =
280 getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding,
281 NewChannels, OrigInfo->VAddrDwords);
282 return NewInfo ? NewInfo->Opcode : -1;
283}
284
285unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
286 const MIMGDimInfo *Dim, bool IsA16,
287 bool IsG16Supported) {
288 unsigned AddrWords = BaseOpcode->NumExtraArgs;
289 unsigned AddrComponents = (BaseOpcode->Coordinates ? Dim->NumCoords : 0) +
290 (BaseOpcode->LodOrClampOrMip ? 1 : 0);
291 if (IsA16)
292 AddrWords += divideCeil(AddrComponents, 2);
293 else
294 AddrWords += AddrComponents;
295
296 // Note: For subtargets that support A16 but not G16, enabling A16 also
297 // enables 16 bit gradients.
298 // For subtargets that support A16 (operand) and G16 (done with a different
299 // instruction encoding), they are independent.
300
301 if (BaseOpcode->Gradients) {
302 if ((IsA16 && !IsG16Supported) || BaseOpcode->G16)
303 // There are two gradients per coordinate, we pack them separately.
304 // For the 3d case,
305 // we get (dy/du, dx/du) (-, dz/du) (dy/dv, dx/dv) (-, dz/dv)
306 AddrWords += alignTo<2>(Dim->NumGradients / 2);
307 else
308 AddrWords += Dim->NumGradients;
309 }
310 return AddrWords;
311}
312
313struct MUBUFInfo {
316 uint8_t elements;
321 bool tfe;
322};
323
324struct MTBUFInfo {
327 uint8_t elements;
331};
332
333struct SMInfo {
336};
337
338struct VOPInfo {
341};
342
345};
346
349};
350
353};
354
359};
360
361struct VOPDInfo {
366};
367
371};
372
373#define GET_MTBUFInfoTable_DECL
374#define GET_MTBUFInfoTable_IMPL
375#define GET_MUBUFInfoTable_DECL
376#define GET_MUBUFInfoTable_IMPL
377#define GET_SMInfoTable_DECL
378#define GET_SMInfoTable_IMPL
379#define GET_VOP1InfoTable_DECL
380#define GET_VOP1InfoTable_IMPL
381#define GET_VOP2InfoTable_DECL
382#define GET_VOP2InfoTable_IMPL
383#define GET_VOP3InfoTable_DECL
384#define GET_VOP3InfoTable_IMPL
385#define GET_VOPC64DPPTable_DECL
386#define GET_VOPC64DPPTable_IMPL
387#define GET_VOPC64DPP8Table_DECL
388#define GET_VOPC64DPP8Table_IMPL
389#define GET_VOPCAsmOnlyInfoTable_DECL
390#define GET_VOPCAsmOnlyInfoTable_IMPL
391#define GET_VOP3CAsmOnlyInfoTable_DECL
392#define GET_VOP3CAsmOnlyInfoTable_IMPL
393#define GET_VOPDComponentTable_DECL
394#define GET_VOPDComponentTable_IMPL
395#define GET_VOPDPairs_DECL
396#define GET_VOPDPairs_IMPL
397#define GET_VOPTrue16Table_DECL
398#define GET_VOPTrue16Table_IMPL
399#define GET_WMMAOpcode2AddrMappingTable_DECL
400#define GET_WMMAOpcode2AddrMappingTable_IMPL
401#define GET_WMMAOpcode3AddrMappingTable_DECL
402#define GET_WMMAOpcode3AddrMappingTable_IMPL
403#include "AMDGPUGenSearchableTables.inc"
404
405int getMTBUFBaseOpcode(unsigned Opc) {
406 const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opc);
407 return Info ? Info->BaseOpcode : -1;
408}
409
410int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) {
411 const MTBUFInfo *Info = getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
412 return Info ? Info->Opcode : -1;
413}
414
415int getMTBUFElements(unsigned Opc) {
416 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
417 return Info ? Info->elements : 0;
418}
419
420bool getMTBUFHasVAddr(unsigned Opc) {
421 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
422 return Info ? Info->has_vaddr : false;
423}
424
425bool getMTBUFHasSrsrc(unsigned Opc) {
426 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
427 return Info ? Info->has_srsrc : false;
428}
429
430bool getMTBUFHasSoffset(unsigned Opc) {
431 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
432 return Info ? Info->has_soffset : false;
433}
434
435int getMUBUFBaseOpcode(unsigned Opc) {
436 const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc);
437 return Info ? Info->BaseOpcode : -1;
438}
439
440int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements) {
441 const MUBUFInfo *Info = getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
442 return Info ? Info->Opcode : -1;
443}
444
445int getMUBUFElements(unsigned Opc) {
446 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
447 return Info ? Info->elements : 0;
448}
449
450bool getMUBUFHasVAddr(unsigned Opc) {
451 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
452 return Info ? Info->has_vaddr : false;
453}
454
455bool getMUBUFHasSrsrc(unsigned Opc) {
456 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
457 return Info ? Info->has_srsrc : false;
458}
459
460bool getMUBUFHasSoffset(unsigned Opc) {
461 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
462 return Info ? Info->has_soffset : false;
463}
464
465bool getMUBUFIsBufferInv(unsigned Opc) {
466 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
467 return Info ? Info->IsBufferInv : false;
468}
469
470bool getMUBUFTfe(unsigned Opc) {
471 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
472 return Info ? Info->tfe : false;
473}
474
475bool getSMEMIsBuffer(unsigned Opc) {
476 const SMInfo *Info = getSMEMOpcodeHelper(Opc);
477 return Info ? Info->IsBuffer : false;
478}
479
480bool getVOP1IsSingle(unsigned Opc) {
481 const VOPInfo *Info = getVOP1OpcodeHelper(Opc);
482 return Info ? Info->IsSingle : false;
483}
484
485bool getVOP2IsSingle(unsigned Opc) {
486 const VOPInfo *Info = getVOP2OpcodeHelper(Opc);
487 return Info ? Info->IsSingle : false;
488}
489
490bool getVOP3IsSingle(unsigned Opc) {
491 const VOPInfo *Info = getVOP3OpcodeHelper(Opc);
492 return Info ? Info->IsSingle : false;
493}
494
495bool isVOPC64DPP(unsigned Opc) {
496 return isVOPC64DPPOpcodeHelper(Opc) || isVOPC64DPP8OpcodeHelper(Opc);
497}
498
499bool isVOPCAsmOnly(unsigned Opc) {
500 return isVOPCAsmOnlyOpcodeHelper(Opc) || isVOP3CAsmOnlyOpcodeHelper(Opc);
501}
502
503bool getMAIIsDGEMM(unsigned Opc) {
504 const MAIInstInfo *Info = getMAIInstInfoHelper(Opc);
505 return Info ? Info->is_dgemm : false;
506}
507
508bool getMAIIsGFX940XDL(unsigned Opc) {
509 const MAIInstInfo *Info = getMAIInstInfoHelper(Opc);
510 return Info ? Info->is_gfx940_xdl : false;
511}
512
514 if (ST.hasFeature(AMDGPU::FeatureGFX12Insts))
516 if (ST.hasFeature(AMDGPU::FeatureGFX11Insts))
518 llvm_unreachable("Subtarget generation does not support VOPD!");
519}
520
521CanBeVOPD getCanBeVOPD(unsigned Opc) {
522 const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc);
523 if (Info)
524 return {Info->CanBeVOPDX, true};
525 else
526 return {false, false};
527}
528
529unsigned getVOPDOpcode(unsigned Opc) {
530 const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc);
531 return Info ? Info->VOPDOp : ~0u;
532}
533
534bool isVOPD(unsigned Opc) {
535 return AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0X);
536}
537
538bool isMAC(unsigned Opc) {
539 return Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
540 Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
541 Opc == AMDGPU::V_MAC_F32_e64_vi ||
542 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
543 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
544 Opc == AMDGPU::V_MAC_F16_e64_vi ||
545 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
546 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
547 Opc == AMDGPU::V_FMAC_F32_e64_gfx11 ||
548 Opc == AMDGPU::V_FMAC_F32_e64_gfx12 ||
549 Opc == AMDGPU::V_FMAC_F32_e64_vi ||
550 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
551 Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
552 Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||
553 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11 ||
554 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx12 ||
555 Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi ||
556 Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi ||
557 Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi ||
558 Opc == AMDGPU::V_DOT8C_I32_I4_e64_vi;
559}
560
561bool isPermlane16(unsigned Opc) {
562 return Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
563 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10 ||
564 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx11 ||
565 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx11 ||
566 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx12 ||
567 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx12 ||
568 Opc == AMDGPU::V_PERMLANE16_VAR_B32_e64_gfx12 ||
569 Opc == AMDGPU::V_PERMLANEX16_VAR_B32_e64_gfx12;
570}
571
572bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc) {
573 return Opc == AMDGPU::V_CVT_F32_BF8_e64_gfx12 ||
574 Opc == AMDGPU::V_CVT_F32_FP8_e64_gfx12 ||
575 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp_gfx12 ||
576 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp_gfx12 ||
577 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp8_gfx12 ||
578 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp8_gfx12 ||
579 Opc == AMDGPU::V_CVT_PK_F32_BF8_e64_gfx12 ||
580 Opc == AMDGPU::V_CVT_PK_F32_FP8_e64_gfx12;
581}
582
583bool isGenericAtomic(unsigned Opc) {
584 return Opc == AMDGPU::G_AMDGPU_ATOMIC_FMIN ||
585 Opc == AMDGPU::G_AMDGPU_ATOMIC_FMAX ||
586 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP ||
587 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD ||
588 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB ||
589 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN ||
590 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN ||
591 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX ||
592 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX ||
593 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND ||
594 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR ||
595 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR ||
596 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC ||
597 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC ||
598 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD ||
599 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN ||
600 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX ||
601 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP ||
602 Opc == AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG;
603}
604
605bool isTrue16Inst(unsigned Opc) {
606 const VOPTrue16Info *Info = getTrue16OpcodeHelper(Opc);
607 return Info ? Info->IsTrue16 : false;
608}
609
610unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) {
611 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom2AddrOpcode(Opc);
612 return Info ? Info->Opcode3Addr : ~0u;
613}
614
615unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc) {
616 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom3AddrOpcode(Opc);
617 return Info ? Info->Opcode2Addr : ~0u;
618}
619
620// Wrapper for Tablegen'd function. enum Subtarget is not defined in any
621// header files, so we need to wrap it in a function that takes unsigned
622// instead.
623int getMCOpcode(uint16_t Opcode, unsigned Gen) {
624 return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
625}
626
627int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily) {
628 const VOPDInfo *Info =
629 getVOPDInfoFromComponentOpcodes(OpX, OpY, EncodingFamily);
630 return Info ? Info->Opcode : -1;
631}
632
633std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode) {
634 const VOPDInfo *Info = getVOPDOpcodeHelper(VOPDOpcode);
635 assert(Info);
636 auto OpX = getVOPDBaseFromComponent(Info->OpX);
637 auto OpY = getVOPDBaseFromComponent(Info->OpY);
638 assert(OpX && OpY);
639 return {OpX->BaseVOP, OpY->BaseVOP};
640}
641
642namespace VOPD {
643
646
649 auto TiedIdx = OpDesc.getOperandConstraint(Component::SRC2, MCOI::TIED_TO);
650 assert(TiedIdx == -1 || TiedIdx == Component::DST);
651 HasSrc2Acc = TiedIdx != -1;
652
653 SrcOperandsNum = OpDesc.getNumOperands() - OpDesc.getNumDefs();
654 assert(SrcOperandsNum <= Component::MAX_SRC_NUM);
655
656 auto OperandsNum = OpDesc.getNumOperands();
657 unsigned CompOprIdx;
658 for (CompOprIdx = Component::SRC1; CompOprIdx < OperandsNum; ++CompOprIdx) {
659 if (OpDesc.operands()[CompOprIdx].OperandType == AMDGPU::OPERAND_KIMM32) {
660 MandatoryLiteralIdx = CompOprIdx;
661 break;
662 }
663 }
664}
665
666unsigned ComponentInfo::getIndexInParsedOperands(unsigned CompOprIdx) const {
667 assert(CompOprIdx < Component::MAX_OPR_NUM);
668
669 if (CompOprIdx == Component::DST)
671
672 auto CompSrcIdx = CompOprIdx - Component::DST_NUM;
673 if (CompSrcIdx < getCompParsedSrcOperandsNum())
674 return getIndexOfSrcInParsedOperands(CompSrcIdx);
675
676 // The specified operand does not exist.
677 return 0;
678}
679
681 std::function<unsigned(unsigned, unsigned)> GetRegIdx, bool SkipSrc) const {
682
683 auto OpXRegs = getRegIndices(ComponentIndex::X, GetRegIdx);
684 auto OpYRegs = getRegIndices(ComponentIndex::Y, GetRegIdx);
685
686 const unsigned CompOprNum =
688 unsigned CompOprIdx;
689 for (CompOprIdx = 0; CompOprIdx < CompOprNum; ++CompOprIdx) {
690 unsigned BanksMasks = VOPD_VGPR_BANK_MASKS[CompOprIdx];
691 if (OpXRegs[CompOprIdx] && OpYRegs[CompOprIdx] &&
692 ((OpXRegs[CompOprIdx] & BanksMasks) ==
693 (OpYRegs[CompOprIdx] & BanksMasks)))
694 return CompOprIdx;
695 }
696
697 return {};
698}
699
700// Return an array of VGPR registers [DST,SRC0,SRC1,SRC2] used
701// by the specified component. If an operand is unused
702// or is not a VGPR, the corresponding value is 0.
703//
704// GetRegIdx(Component, MCOperandIdx) must return a VGPR register index
705// for the specified component and MC operand. The callback must return 0
706// if the operand is not a register or not a VGPR.
707InstInfo::RegIndices InstInfo::getRegIndices(
708 unsigned CompIdx,
709 std::function<unsigned(unsigned, unsigned)> GetRegIdx) const {
710 assert(CompIdx < COMPONENTS_NUM);
711
712 const auto &Comp = CompInfo[CompIdx];
714
715 RegIndices[DST] = GetRegIdx(CompIdx, Comp.getIndexOfDstInMCOperands());
716
717 for (unsigned CompOprIdx : {SRC0, SRC1, SRC2}) {
718 unsigned CompSrcIdx = CompOprIdx - DST_NUM;
719 RegIndices[CompOprIdx] =
720 Comp.hasRegSrcOperand(CompSrcIdx)
721 ? GetRegIdx(CompIdx, Comp.getIndexOfSrcInMCOperands(CompSrcIdx))
722 : 0;
723 }
724 return RegIndices;
725}
726
727} // namespace VOPD
728
730 return VOPD::InstInfo(OpX, OpY);
731}
732
733VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode,
734 const MCInstrInfo *InstrInfo) {
735 auto [OpX, OpY] = getVOPDComponents(VOPDOpcode);
736 const auto &OpXDesc = InstrInfo->get(OpX);
737 const auto &OpYDesc = InstrInfo->get(OpY);
739 VOPD::ComponentInfo OpYInfo(OpYDesc, OpXInfo);
740 return VOPD::InstInfo(OpXInfo, OpYInfo);
741}
742
743namespace IsaInfo {
744
746 : STI(STI), XnackSetting(TargetIDSetting::Any),
747 SramEccSetting(TargetIDSetting::Any) {
748 if (!STI.getFeatureBits().test(FeatureSupportsXNACK))
749 XnackSetting = TargetIDSetting::Unsupported;
750 if (!STI.getFeatureBits().test(FeatureSupportsSRAMECC))
751 SramEccSetting = TargetIDSetting::Unsupported;
752}
753
755 // Check if xnack or sramecc is explicitly enabled or disabled. In the
756 // absence of the target features we assume we must generate code that can run
757 // in any environment.
758 SubtargetFeatures Features(FS);
759 std::optional<bool> XnackRequested;
760 std::optional<bool> SramEccRequested;
761
762 for (const std::string &Feature : Features.getFeatures()) {
763 if (Feature == "+xnack")
764 XnackRequested = true;
765 else if (Feature == "-xnack")
766 XnackRequested = false;
767 else if (Feature == "+sramecc")
768 SramEccRequested = true;
769 else if (Feature == "-sramecc")
770 SramEccRequested = false;
771 }
772
773 bool XnackSupported = isXnackSupported();
774 bool SramEccSupported = isSramEccSupported();
775
776 if (XnackRequested) {
777 if (XnackSupported) {
778 XnackSetting =
779 *XnackRequested ? TargetIDSetting::On : TargetIDSetting::Off;
780 } else {
781 // If a specific xnack setting was requested and this GPU does not support
782 // xnack emit a warning. Setting will remain set to "Unsupported".
783 if (*XnackRequested) {
784 errs() << "warning: xnack 'On' was requested for a processor that does "
785 "not support it!\n";
786 } else {
787 errs() << "warning: xnack 'Off' was requested for a processor that "
788 "does not support it!\n";
789 }
790 }
791 }
792
793 if (SramEccRequested) {
794 if (SramEccSupported) {
795 SramEccSetting =
796 *SramEccRequested ? TargetIDSetting::On : TargetIDSetting::Off;
797 } else {
798 // If a specific sramecc setting was requested and this GPU does not
799 // support sramecc emit a warning. Setting will remain set to
800 // "Unsupported".
801 if (*SramEccRequested) {
802 errs() << "warning: sramecc 'On' was requested for a processor that "
803 "does not support it!\n";
804 } else {
805 errs() << "warning: sramecc 'Off' was requested for a processor that "
806 "does not support it!\n";
807 }
808 }
809 }
810}
811
812static TargetIDSetting
814 if (FeatureString.ends_with("-"))
816 if (FeatureString.ends_with("+"))
817 return TargetIDSetting::On;
818
819 llvm_unreachable("Malformed feature string");
820}
821
823 SmallVector<StringRef, 3> TargetIDSplit;
824 TargetID.split(TargetIDSplit, ':');
825
826 for (const auto &FeatureString : TargetIDSplit) {
827 if (FeatureString.starts_with("xnack"))
828 XnackSetting = getTargetIDSettingFromFeatureString(FeatureString);
829 if (FeatureString.starts_with("sramecc"))
830 SramEccSetting = getTargetIDSettingFromFeatureString(FeatureString);
831 }
832}
833
834std::string AMDGPUTargetID::toString() const {
835 std::string StringRep;
836 raw_string_ostream StreamRep(StringRep);
837
838 auto TargetTriple = STI.getTargetTriple();
839 auto Version = getIsaVersion(STI.getCPU());
840
841 StreamRep << TargetTriple.getArchName() << '-'
842 << TargetTriple.getVendorName() << '-'
843 << TargetTriple.getOSName() << '-'
844 << TargetTriple.getEnvironmentName() << '-';
845
846 std::string Processor;
847 // TODO: Following else statement is present here because we used various
848 // alias names for GPUs up until GFX9 (e.g. 'fiji' is same as 'gfx803').
849 // Remove once all aliases are removed from GCNProcessors.td.
850 if (Version.Major >= 9)
851 Processor = STI.getCPU().str();
852 else
853 Processor = (Twine("gfx") + Twine(Version.Major) + Twine(Version.Minor) +
854 Twine(Version.Stepping))
855 .str();
856
857 std::string Features;
858 if (STI.getTargetTriple().getOS() == Triple::AMDHSA) {
859 // sramecc.
861 Features += ":sramecc-";
863 Features += ":sramecc+";
864 // xnack.
866 Features += ":xnack-";
868 Features += ":xnack+";
869 }
870
871 StreamRep << Processor << Features;
872
873 StreamRep.flush();
874 return StringRep;
875}
876
877unsigned getWavefrontSize(const MCSubtargetInfo *STI) {
878 if (STI->getFeatureBits().test(FeatureWavefrontSize16))
879 return 16;
880 if (STI->getFeatureBits().test(FeatureWavefrontSize32))
881 return 32;
882
883 return 64;
884}
885
887 unsigned BytesPerCU = 0;
888 if (STI->getFeatureBits().test(FeatureLocalMemorySize32768))
889 BytesPerCU = 32768;
890 if (STI->getFeatureBits().test(FeatureLocalMemorySize65536))
891 BytesPerCU = 65536;
892
893 // "Per CU" really means "per whatever functional block the waves of a
894 // workgroup must share". So the effective local memory size is doubled in
895 // WGP mode on gfx10.
896 if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode))
897 BytesPerCU *= 2;
898
899 return BytesPerCU;
900}
901
903 if (STI->getFeatureBits().test(FeatureLocalMemorySize32768))
904 return 32768;
905 if (STI->getFeatureBits().test(FeatureLocalMemorySize65536))
906 return 65536;
907 return 0;
908}
909
910unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
911 // "Per CU" really means "per whatever functional block the waves of a
912 // workgroup must share". For gfx10 in CU mode this is the CU, which contains
913 // two SIMDs.
914 if (isGFX10Plus(*STI) && STI->getFeatureBits().test(FeatureCuMode))
915 return 2;
916 // Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP contains
917 // two CUs, so a total of four SIMDs.
918 return 4;
919}
920
922 unsigned FlatWorkGroupSize) {
923 assert(FlatWorkGroupSize != 0);
924 if (STI->getTargetTriple().getArch() != Triple::amdgcn)
925 return 8;
926 unsigned MaxWaves = getMaxWavesPerEU(STI) * getEUsPerCU(STI);
927 unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
928 if (N == 1) {
929 // Single-wave workgroups don't consume barrier resources.
930 return MaxWaves;
931 }
932
933 unsigned MaxBarriers = 16;
934 if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode))
935 MaxBarriers = 32;
936
937 return std::min(MaxWaves / N, MaxBarriers);
938}
939
940unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) {
941 return 1;
942}
943
944unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) {
945 // FIXME: Need to take scratch memory into account.
946 if (isGFX90A(*STI))
947 return 8;
948 if (!isGFX10Plus(*STI))
949 return 10;
950 return hasGFX10_3Insts(*STI) ? 16 : 20;
951}
952
954 unsigned FlatWorkGroupSize) {
955 return divideCeil(getWavesPerWorkGroup(STI, FlatWorkGroupSize),
956 getEUsPerCU(STI));
957}
958
960 return 1;
961}
962
964 // Some subtargets allow encoding 2048, but this isn't tested or supported.
965 return 1024;
966}
967
969 unsigned FlatWorkGroupSize) {
970 return divideCeil(FlatWorkGroupSize, getWavefrontSize(STI));
971}
972
974 IsaVersion Version = getIsaVersion(STI->getCPU());
975 if (Version.Major >= 10)
976 return getAddressableNumSGPRs(STI);
977 if (Version.Major >= 8)
978 return 16;
979 return 8;
980}
981
983 return 8;
984}
985
986unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) {
987 IsaVersion Version = getIsaVersion(STI->getCPU());
988 if (Version.Major >= 8)
989 return 800;
990 return 512;
991}
992
994 if (STI->getFeatureBits().test(FeatureSGPRInitBug))
996
997 IsaVersion Version = getIsaVersion(STI->getCPU());
998 if (Version.Major >= 10)
999 return 106;
1000 if (Version.Major >= 8)
1001 return 102;
1002 return 104;
1003}
1004
1005unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
1006 assert(WavesPerEU != 0);
1007
1008 IsaVersion Version = getIsaVersion(STI->getCPU());
1009 if (Version.Major >= 10)
1010 return 0;
1011
1012 if (WavesPerEU >= getMaxWavesPerEU(STI))
1013 return 0;
1014
1015 unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
1016 if (STI->getFeatureBits().test(FeatureTrapHandler))
1017 MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
1018 MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1;
1019 return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI));
1020}
1021
1022unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
1023 bool Addressable) {
1024 assert(WavesPerEU != 0);
1025
1026 unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
1027 IsaVersion Version = getIsaVersion(STI->getCPU());
1028 if (Version.Major >= 10)
1029 return Addressable ? AddressableNumSGPRs : 108;
1030 if (Version.Major >= 8 && !Addressable)
1031 AddressableNumSGPRs = 112;
1032 unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
1033 if (STI->getFeatureBits().test(FeatureTrapHandler))
1034 MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
1035 MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI));
1036 return std::min(MaxNumSGPRs, AddressableNumSGPRs);
1037}
1038
1039unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
1040 bool FlatScrUsed, bool XNACKUsed) {
1041 unsigned ExtraSGPRs = 0;
1042 if (VCCUsed)
1043 ExtraSGPRs = 2;
1044
1045 IsaVersion Version = getIsaVersion(STI->getCPU());
1046 if (Version.Major >= 10)
1047 return ExtraSGPRs;
1048
1049 if (Version.Major < 8) {
1050 if (FlatScrUsed)
1051 ExtraSGPRs = 4;
1052 } else {
1053 if (XNACKUsed)
1054 ExtraSGPRs = 4;
1055
1056 if (FlatScrUsed ||
1057 STI->getFeatureBits().test(AMDGPU::FeatureArchitectedFlatScratch))
1058 ExtraSGPRs = 6;
1059 }
1060
1061 return ExtraSGPRs;
1062}
1063
1064unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
1065 bool FlatScrUsed) {
1066 return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
1067 STI->getFeatureBits().test(AMDGPU::FeatureXNACK));
1068}
1069
1070static unsigned getGranulatedNumRegisterBlocks(unsigned NumRegs,
1071 unsigned Granule) {
1072 return divideCeil(std::max(1u, NumRegs), Granule);
1073}
1074
1075unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
1076 // SGPRBlocks is actual number of SGPR blocks minus 1.
1078 1;
1079}
1080
1082 std::optional<bool> EnableWavefrontSize32) {
1083 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1084 return 8;
1085
1086 bool IsWave32 = EnableWavefrontSize32 ?
1087 *EnableWavefrontSize32 :
1088 STI->getFeatureBits().test(FeatureWavefrontSize32);
1089
1090 if (STI->getFeatureBits().test(Feature1_5xVGPRs))
1091 return IsWave32 ? 24 : 12;
1092
1093 if (hasGFX10_3Insts(*STI))
1094 return IsWave32 ? 16 : 8;
1095
1096 return IsWave32 ? 8 : 4;
1097}
1098
1100 std::optional<bool> EnableWavefrontSize32) {
1101 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1102 return 8;
1103
1104 bool IsWave32 = EnableWavefrontSize32 ?
1105 *EnableWavefrontSize32 :
1106 STI->getFeatureBits().test(FeatureWavefrontSize32);
1107
1108 return IsWave32 ? 8 : 4;
1109}
1110
1111unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
1112 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1113 return 512;
1114 if (!isGFX10Plus(*STI))
1115 return 256;
1116 bool IsWave32 = STI->getFeatureBits().test(FeatureWavefrontSize32);
1117 if (STI->getFeatureBits().test(Feature1_5xVGPRs))
1118 return IsWave32 ? 1536 : 768;
1119 return IsWave32 ? 1024 : 512;
1120}
1121
1122unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI) { return 256; }
1123
1125 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1126 return 512;
1127 return getAddressableNumArchVGPRs(STI);
1128}
1129
1131 unsigned NumVGPRs) {
1132 unsigned MaxWaves = getMaxWavesPerEU(STI);
1133 unsigned Granule = getVGPRAllocGranule(STI);
1134 if (NumVGPRs < Granule)
1135 return MaxWaves;
1136 unsigned RoundedRegs = alignTo(NumVGPRs, Granule);
1137 return std::min(std::max(getTotalNumVGPRs(STI) / RoundedRegs, 1u), MaxWaves);
1138}
1139
1140unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
1141 assert(WavesPerEU != 0);
1142
1143 unsigned MaxWavesPerEU = getMaxWavesPerEU(STI);
1144 if (WavesPerEU >= MaxWavesPerEU)
1145 return 0;
1146
1147 unsigned TotNumVGPRs = getTotalNumVGPRs(STI);
1148 unsigned AddrsableNumVGPRs = getAddressableNumVGPRs(STI);
1149 unsigned Granule = getVGPRAllocGranule(STI);
1150 unsigned MaxNumVGPRs = alignDown(TotNumVGPRs / WavesPerEU, Granule);
1151
1152 if (MaxNumVGPRs == alignDown(TotNumVGPRs / MaxWavesPerEU, Granule))
1153 return 0;
1154
1155 unsigned MinWavesPerEU = getNumWavesPerEUWithNumVGPRs(STI, AddrsableNumVGPRs);
1156 if (WavesPerEU < MinWavesPerEU)
1157 return getMinNumVGPRs(STI, MinWavesPerEU);
1158
1159 unsigned MaxNumVGPRsNext = alignDown(TotNumVGPRs / (WavesPerEU + 1), Granule);
1160 unsigned MinNumVGPRs = 1 + std::min(MaxNumVGPRs - Granule, MaxNumVGPRsNext);
1161 return std::min(MinNumVGPRs, AddrsableNumVGPRs);
1162}
1163
1164unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
1165 assert(WavesPerEU != 0);
1166
1167 unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU,
1168 getVGPRAllocGranule(STI));
1169 unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI);
1170 return std::min(MaxNumVGPRs, AddressableNumVGPRs);
1171}
1172
1173unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs,
1174 std::optional<bool> EnableWavefrontSize32) {
1176 NumVGPRs, getVGPREncodingGranule(STI, EnableWavefrontSize32)) -
1177 1;
1178}
1179
1181 unsigned NumVGPRs,
1182 std::optional<bool> EnableWavefrontSize32) {
1184 NumVGPRs, getVGPRAllocGranule(STI, EnableWavefrontSize32));
1185}
1186} // end namespace IsaInfo
1187
1189 const MCSubtargetInfo *STI) {
1190 IsaVersion Version = getIsaVersion(STI->getCPU());
1191
1192 memset(&Header, 0, sizeof(Header));
1193
1194 Header.amd_kernel_code_version_major = 1;
1195 Header.amd_kernel_code_version_minor = 2;
1196 Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
1197 Header.amd_machine_version_major = Version.Major;
1198 Header.amd_machine_version_minor = Version.Minor;
1199 Header.amd_machine_version_stepping = Version.Stepping;
1200 Header.kernel_code_entry_byte_offset = sizeof(Header);
1201 Header.wavefront_size = 6;
1202
1203 // If the code object does not support indirect functions, then the value must
1204 // be 0xffffffff.
1205 Header.call_convention = -1;
1206
1207 // These alignment values are specified in powers of two, so alignment =
1208 // 2^n. The minimum alignment is 2^4 = 16.
1209 Header.kernarg_segment_alignment = 4;
1210 Header.group_segment_alignment = 4;
1211 Header.private_segment_alignment = 4;
1212
1213 if (Version.Major >= 10) {
1214 if (STI->getFeatureBits().test(FeatureWavefrontSize32)) {
1215 Header.wavefront_size = 5;
1216 Header.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
1217 }
1218 Header.compute_pgm_resource_registers |=
1219 S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
1221 }
1222}
1223
1226}
1227
1230}
1231
1233 unsigned AS = GV->getAddressSpace();
1234 return AS == AMDGPUAS::CONSTANT_ADDRESS ||
1236}
1237
1239 return TT.getArch() == Triple::r600;
1240}
1241
1242std::pair<unsigned, unsigned>
1244 std::pair<unsigned, unsigned> Default,
1245 bool OnlyFirstRequired) {
1246 Attribute A = F.getFnAttribute(Name);
1247 if (!A.isStringAttribute())
1248 return Default;
1249
1250 LLVMContext &Ctx = F.getContext();
1251 std::pair<unsigned, unsigned> Ints = Default;
1252 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
1253 if (Strs.first.trim().getAsInteger(0, Ints.first)) {
1254 Ctx.emitError("can't parse first integer attribute " + Name);
1255 return Default;
1256 }
1257 if (Strs.second.trim().getAsInteger(0, Ints.second)) {
1258 if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
1259 Ctx.emitError("can't parse second integer attribute " + Name);
1260 return Default;
1261 }
1262 }
1263
1264 return Ints;
1265}
1266
1268 unsigned Size) {
1269 assert(Size > 2);
1271
1272 Attribute A = F.getFnAttribute(Name);
1273 if (!A.isStringAttribute())
1274 return Default;
1275
1276 SmallVector<unsigned> Vals(Size, 0);
1277
1278 LLVMContext &Ctx = F.getContext();
1279
1280 StringRef S = A.getValueAsString();
1281 unsigned i = 0;
1282 for (; !S.empty() && i < Size; i++) {
1283 std::pair<StringRef, StringRef> Strs = S.split(',');
1284 unsigned IntVal;
1285 if (Strs.first.trim().getAsInteger(0, IntVal)) {
1286 Ctx.emitError("can't parse integer attribute " + Strs.first + " in " +
1287 Name);
1288 return Default;
1289 }
1290 Vals[i] = IntVal;
1291 S = Strs.second;
1292 }
1293
1294 if (!S.empty() || i < Size) {
1295 Ctx.emitError("attribute " + Name +
1296 " has incorrect number of integers; expected " +
1297 llvm::utostr(Size));
1298 return Default;
1299 }
1300 return Vals;
1301}
1302
1303unsigned getVmcntBitMask(const IsaVersion &Version) {
1304 return (1 << (getVmcntBitWidthLo(Version.Major) +
1305 getVmcntBitWidthHi(Version.Major))) -
1306 1;
1307}
1308
1309unsigned getLoadcntBitMask(const IsaVersion &Version) {
1310 return (1 << getLoadcntBitWidth(Version.Major)) - 1;
1311}
1312
1313unsigned getSamplecntBitMask(const IsaVersion &Version) {
1314 return (1 << getSamplecntBitWidth(Version.Major)) - 1;
1315}
1316
1317unsigned getBvhcntBitMask(const IsaVersion &Version) {
1318 return (1 << getBvhcntBitWidth(Version.Major)) - 1;
1319}
1320
1321unsigned getExpcntBitMask(const IsaVersion &Version) {
1322 return (1 << getExpcntBitWidth(Version.Major)) - 1;
1323}
1324
1325unsigned getLgkmcntBitMask(const IsaVersion &Version) {
1326 return (1 << getLgkmcntBitWidth(Version.Major)) - 1;
1327}
1328
1329unsigned getDscntBitMask(const IsaVersion &Version) {
1330 return (1 << getDscntBitWidth(Version.Major)) - 1;
1331}
1332
1333unsigned getKmcntBitMask(const IsaVersion &Version) {
1334 return (1 << getKmcntBitWidth(Version.Major)) - 1;
1335}
1336
1337unsigned getStorecntBitMask(const IsaVersion &Version) {
1338 return (1 << getStorecntBitWidth(Version.Major)) - 1;
1339}
1340
1341unsigned getWaitcntBitMask(const IsaVersion &Version) {
1342 unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(Version.Major),
1343 getVmcntBitWidthLo(Version.Major));
1344 unsigned Expcnt = getBitMask(getExpcntBitShift(Version.Major),
1345 getExpcntBitWidth(Version.Major));
1346 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(Version.Major),
1347 getLgkmcntBitWidth(Version.Major));
1348 unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(Version.Major),
1349 getVmcntBitWidthHi(Version.Major));
1350 return VmcntLo | Expcnt | Lgkmcnt | VmcntHi;
1351}
1352
1353unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1354 unsigned VmcntLo = unpackBits(Waitcnt, getVmcntBitShiftLo(Version.Major),
1355 getVmcntBitWidthLo(Version.Major));
1356 unsigned VmcntHi = unpackBits(Waitcnt, getVmcntBitShiftHi(Version.Major),
1357 getVmcntBitWidthHi(Version.Major));
1358 return VmcntLo | VmcntHi << getVmcntBitWidthLo(Version.Major);
1359}
1360
1361unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
1362 return unpackBits(Waitcnt, getExpcntBitShift(Version.Major),
1363 getExpcntBitWidth(Version.Major));
1364}
1365
1366unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1367 return unpackBits(Waitcnt, getLgkmcntBitShift(Version.Major),
1368 getLgkmcntBitWidth(Version.Major));
1369}
1370
1371void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
1372 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
1373 Vmcnt = decodeVmcnt(Version, Waitcnt);
1374 Expcnt = decodeExpcnt(Version, Waitcnt);
1375 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
1376}
1377
1378Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) {
1379 Waitcnt Decoded;
1380 Decoded.LoadCnt = decodeVmcnt(Version, Encoded);
1381 Decoded.ExpCnt = decodeExpcnt(Version, Encoded);
1382 Decoded.DsCnt = decodeLgkmcnt(Version, Encoded);
1383 return Decoded;
1384}
1385
1386unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
1387 unsigned Vmcnt) {
1388 Waitcnt = packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(Version.Major),
1389 getVmcntBitWidthLo(Version.Major));
1390 return packBits(Vmcnt >> getVmcntBitWidthLo(Version.Major), Waitcnt,
1391 getVmcntBitShiftHi(Version.Major),
1392 getVmcntBitWidthHi(Version.Major));
1393}
1394
1395unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
1396 unsigned Expcnt) {
1397 return packBits(Expcnt, Waitcnt, getExpcntBitShift(Version.Major),
1398 getExpcntBitWidth(Version.Major));
1399}
1400
1401unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
1402 unsigned Lgkmcnt) {
1403 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(Version.Major),
1404 getLgkmcntBitWidth(Version.Major));
1405}
1406
1407unsigned encodeWaitcnt(const IsaVersion &Version,
1408 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
1409 unsigned Waitcnt = getWaitcntBitMask(Version);
1410 Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
1411 Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
1412 Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
1413 return Waitcnt;
1414}
1415
1416unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) {
1417 return encodeWaitcnt(Version, Decoded.LoadCnt, Decoded.ExpCnt, Decoded.DsCnt);
1418}
1419
1420static unsigned getCombinedCountBitMask(const IsaVersion &Version,
1421 bool IsStore) {
1422 unsigned Dscnt = getBitMask(getDscntBitShift(Version.Major),
1423 getDscntBitWidth(Version.Major));
1424 if (IsStore) {
1425 unsigned Storecnt = getBitMask(getLoadcntStorecntBitShift(Version.Major),
1426 getStorecntBitWidth(Version.Major));
1427 return Dscnt | Storecnt;
1428 } else {
1429 unsigned Loadcnt = getBitMask(getLoadcntStorecntBitShift(Version.Major),
1430 getLoadcntBitWidth(Version.Major));
1431 return Dscnt | Loadcnt;
1432 }
1433}
1434
1435Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt) {
1436 Waitcnt Decoded;
1437 Decoded.LoadCnt =
1438 unpackBits(LoadcntDscnt, getLoadcntStorecntBitShift(Version.Major),
1439 getLoadcntBitWidth(Version.Major));
1440 Decoded.DsCnt = unpackBits(LoadcntDscnt, getDscntBitShift(Version.Major),
1441 getDscntBitWidth(Version.Major));
1442 return Decoded;
1443}
1444
1445Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt) {
1446 Waitcnt Decoded;
1447 Decoded.StoreCnt =
1448 unpackBits(StorecntDscnt, getLoadcntStorecntBitShift(Version.Major),
1449 getStorecntBitWidth(Version.Major));
1450 Decoded.DsCnt = unpackBits(StorecntDscnt, getDscntBitShift(Version.Major),
1451 getDscntBitWidth(Version.Major));
1452 return Decoded;
1453}
1454
1455static unsigned encodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt,
1456 unsigned Loadcnt) {
1457 return packBits(Loadcnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major),
1458 getLoadcntBitWidth(Version.Major));
1459}
1460
1461static unsigned encodeStorecnt(const IsaVersion &Version, unsigned Waitcnt,
1462 unsigned Storecnt) {
1463 return packBits(Storecnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major),
1464 getStorecntBitWidth(Version.Major));
1465}
1466
1467static unsigned encodeDscnt(const IsaVersion &Version, unsigned Waitcnt,
1468 unsigned Dscnt) {
1469 return packBits(Dscnt, Waitcnt, getDscntBitShift(Version.Major),
1470 getDscntBitWidth(Version.Major));
1471}
1472
1473static unsigned encodeLoadcntDscnt(const IsaVersion &Version, unsigned Loadcnt,
1474 unsigned Dscnt) {
1475 unsigned Waitcnt = getCombinedCountBitMask(Version, false);
1476 Waitcnt = encodeLoadcnt(Version, Waitcnt, Loadcnt);
1477 Waitcnt = encodeDscnt(Version, Waitcnt, Dscnt);
1478 return Waitcnt;
1479}
1480
1481unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded) {
1482 return encodeLoadcntDscnt(Version, Decoded.LoadCnt, Decoded.DsCnt);
1483}
1484
1485static unsigned encodeStorecntDscnt(const IsaVersion &Version,
1486 unsigned Storecnt, unsigned Dscnt) {
1487 unsigned Waitcnt = getCombinedCountBitMask(Version, true);
1488 Waitcnt = encodeStorecnt(Version, Waitcnt, Storecnt);
1489 Waitcnt = encodeDscnt(Version, Waitcnt, Dscnt);
1490 return Waitcnt;
1491}
1492
1493unsigned encodeStorecntDscnt(const IsaVersion &Version,
1494 const Waitcnt &Decoded) {
1495 return encodeStorecntDscnt(Version, Decoded.StoreCnt, Decoded.DsCnt);
1496}
1497
1498//===----------------------------------------------------------------------===//
1499// Custom Operands.
1500//
1501// A table of custom operands shall describe "primary" operand names
1502// first followed by aliases if any. It is not required but recommended
1503// to arrange operands so that operand encoding match operand position
1504// in the table. This will make disassembly a bit more efficient.
1505// Unused slots in the table shall have an empty name.
1506//
1507//===----------------------------------------------------------------------===//
1508
1509template <class T>
1510static bool isValidOpr(int Idx, const CustomOperand<T> OpInfo[], int OpInfoSize,
1511 T Context) {
1512 return 0 <= Idx && Idx < OpInfoSize && !OpInfo[Idx].Name.empty() &&
1513 (!OpInfo[Idx].Cond || OpInfo[Idx].Cond(Context));
1514}
1515
1516template <class T>
1517static int getOprIdx(std::function<bool(const CustomOperand<T> &)> Test,
1518 const CustomOperand<T> OpInfo[], int OpInfoSize,
1519 T Context) {
1520 int InvalidIdx = OPR_ID_UNKNOWN;
1521 for (int Idx = 0; Idx < OpInfoSize; ++Idx) {
1522 if (Test(OpInfo[Idx])) {
1523 if (!OpInfo[Idx].Cond || OpInfo[Idx].Cond(Context))
1524 return Idx;
1525 InvalidIdx = OPR_ID_UNSUPPORTED;
1526 }
1527 }
1528 return InvalidIdx;
1529}
1530
1531template <class T>
1532static int getOprIdx(const StringRef Name, const CustomOperand<T> OpInfo[],
1533 int OpInfoSize, T Context) {
1534 auto Test = [=](const CustomOperand<T> &Op) { return Op.Name == Name; };
1535 return getOprIdx<T>(Test, OpInfo, OpInfoSize, Context);
1536}
1537
1538template <class T>
1539static int getOprIdx(int Id, const CustomOperand<T> OpInfo[], int OpInfoSize,
1540 T Context, bool QuickCheck = true) {
1541 auto Test = [=](const CustomOperand<T> &Op) {
1542 return Op.Encoding == Id && !Op.Name.empty();
1543 };
1544 // This is an optimization that should work in most cases.
1545 // As a side effect, it may cause selection of an alias
1546 // instead of a primary operand name in case of sparse tables.
1547 if (QuickCheck && isValidOpr<T>(Id, OpInfo, OpInfoSize, Context) &&
1548 OpInfo[Id].Encoding == Id) {
1549 return Id;
1550 }
1551 return getOprIdx<T>(Test, OpInfo, OpInfoSize, Context);
1552}
1553
1554//===----------------------------------------------------------------------===//
1555// Custom Operand Values
1556//===----------------------------------------------------------------------===//
1557
1559 int Size,
1560 const MCSubtargetInfo &STI) {
1561 unsigned Enc = 0;
1562 for (int Idx = 0; Idx < Size; ++Idx) {
1563 const auto &Op = Opr[Idx];
1564 if (Op.isSupported(STI))
1565 Enc |= Op.encode(Op.Default);
1566 }
1567 return Enc;
1568}
1569
1571 int Size, unsigned Code,
1572 bool &HasNonDefaultVal,
1573 const MCSubtargetInfo &STI) {
1574 unsigned UsedOprMask = 0;
1575 HasNonDefaultVal = false;
1576 for (int Idx = 0; Idx < Size; ++Idx) {
1577 const auto &Op = Opr[Idx];
1578 if (!Op.isSupported(STI))
1579 continue;
1580 UsedOprMask |= Op.getMask();
1581 unsigned Val = Op.decode(Code);
1582 if (!Op.isValid(Val))
1583 return false;
1584 HasNonDefaultVal |= (Val != Op.Default);
1585 }
1586 return (Code & ~UsedOprMask) == 0;
1587}
1588
1589static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size,
1590 unsigned Code, int &Idx, StringRef &Name,
1591 unsigned &Val, bool &IsDefault,
1592 const MCSubtargetInfo &STI) {
1593 while (Idx < Size) {
1594 const auto &Op = Opr[Idx++];
1595 if (Op.isSupported(STI)) {
1596 Name = Op.Name;
1597 Val = Op.decode(Code);
1598 IsDefault = (Val == Op.Default);
1599 return true;
1600 }
1601 }
1602
1603 return false;
1604}
1605
1607 int64_t InputVal) {
1608 if (InputVal < 0 || InputVal > Op.Max)
1609 return OPR_VAL_INVALID;
1610 return Op.encode(InputVal);
1611}
1612
1613static int encodeCustomOperand(const CustomOperandVal *Opr, int Size,
1614 const StringRef Name, int64_t InputVal,
1615 unsigned &UsedOprMask,
1616 const MCSubtargetInfo &STI) {
1617 int InvalidId = OPR_ID_UNKNOWN;
1618 for (int Idx = 0; Idx < Size; ++Idx) {
1619 const auto &Op = Opr[Idx];
1620 if (Op.Name == Name) {
1621 if (!Op.isSupported(STI)) {
1622 InvalidId = OPR_ID_UNSUPPORTED;
1623 continue;
1624 }
1625 auto OprMask = Op.getMask();
1626 if (OprMask & UsedOprMask)
1627 return OPR_ID_DUPLICATE;
1628 UsedOprMask |= OprMask;
1629 return encodeCustomOperandVal(Op, InputVal);
1630 }
1631 }
1632 return InvalidId;
1633}
1634
1635//===----------------------------------------------------------------------===//
1636// DepCtr
1637//===----------------------------------------------------------------------===//
1638
1639namespace DepCtr {
1640
1642 static int Default = -1;
1643 if (Default == -1)
1645 return Default;
1646}
1647
1648bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
1649 const MCSubtargetInfo &STI) {
1651 HasNonDefaultVal, STI);
1652}
1653
1654bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
1655 bool &IsDefault, const MCSubtargetInfo &STI) {
1656 return decodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Code, Id, Name, Val,
1657 IsDefault, STI);
1658}
1659
1660int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
1661 const MCSubtargetInfo &STI) {
1662 return encodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Name, Val, UsedOprMask,
1663 STI);
1664}
1665
1666unsigned decodeFieldVmVsrc(unsigned Encoded) {
1667 return unpackBits(Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
1668}
1669
1670unsigned decodeFieldVaVdst(unsigned Encoded) {
1671 return unpackBits(Encoded, getVaVdstBitShift(), getVaVdstBitWidth());
1672}
1673
1674unsigned decodeFieldSaSdst(unsigned Encoded) {
1675 return unpackBits(Encoded, getSaSdstBitShift(), getSaSdstBitWidth());
1676}
1677
1678unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc) {
1679 return packBits(VmVsrc, Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
1680}
1681
1682unsigned encodeFieldVmVsrc(unsigned VmVsrc) {
1683 return encodeFieldVmVsrc(0xffff, VmVsrc);
1684}
1685
1686unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst) {
1687 return packBits(VaVdst, Encoded, getVaVdstBitShift(), getVaVdstBitWidth());
1688}
1689
1690unsigned encodeFieldVaVdst(unsigned VaVdst) {
1691 return encodeFieldVaVdst(0xffff, VaVdst);
1692}
1693
1694unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst) {
1695 return packBits(SaSdst, Encoded, getSaSdstBitShift(), getSaSdstBitWidth());
1696}
1697
1698unsigned encodeFieldSaSdst(unsigned SaSdst) {
1699 return encodeFieldSaSdst(0xffff, SaSdst);
1700}
1701
1702} // namespace DepCtr
1703
1704//===----------------------------------------------------------------------===//
1705// hwreg
1706//===----------------------------------------------------------------------===//
1707
1708namespace Hwreg {
1709
1710int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI) {
1711 int Idx = getOprIdx<const MCSubtargetInfo &>(Name, Opr, OPR_SIZE, STI);
1712 return (Idx < 0) ? Idx : Opr[Idx].Encoding;
1713}
1714
1715StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI) {
1716 int Idx = getOprIdx<const MCSubtargetInfo &>(Id, Opr, OPR_SIZE, STI);
1717 return (Idx < 0) ? "" : Opr[Idx].Name;
1718}
1719
1720} // namespace Hwreg
1721
1722//===----------------------------------------------------------------------===//
1723// exp tgt
1724//===----------------------------------------------------------------------===//
1725
1726namespace Exp {
1727
1728struct ExpTgt {
1730 unsigned Tgt;
1731 unsigned MaxIndex;
1732};
1733
1734static constexpr ExpTgt ExpTgtInfo[] = {
1735 {{"null"}, ET_NULL, ET_NULL_MAX_IDX},
1736 {{"mrtz"}, ET_MRTZ, ET_MRTZ_MAX_IDX},
1737 {{"prim"}, ET_PRIM, ET_PRIM_MAX_IDX},
1738 {{"mrt"}, ET_MRT0, ET_MRT_MAX_IDX},
1739 {{"pos"}, ET_POS0, ET_POS_MAX_IDX},
1740 {{"dual_src_blend"}, ET_DUAL_SRC_BLEND0, ET_DUAL_SRC_BLEND_MAX_IDX},
1741 {{"param"}, ET_PARAM0, ET_PARAM_MAX_IDX},
1742};
1743
1744bool getTgtName(unsigned Id, StringRef &Name, int &Index) {
1745 for (const ExpTgt &Val : ExpTgtInfo) {
1746 if (Val.Tgt <= Id && Id <= Val.Tgt + Val.MaxIndex) {
1747 Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt);
1748 Name = Val.Name;
1749 return true;
1750 }
1751 }
1752 return false;
1753}
1754
1755unsigned getTgtId(const StringRef Name) {
1756
1757 for (const ExpTgt &Val : ExpTgtInfo) {
1758 if (Val.MaxIndex == 0 && Name == Val.Name)
1759 return Val.Tgt;
1760
1761 if (Val.MaxIndex > 0 && Name.starts_with(Val.Name)) {
1762 StringRef Suffix = Name.drop_front(Val.Name.size());
1763
1764 unsigned Id;
1765 if (Suffix.getAsInteger(10, Id) || Id > Val.MaxIndex)
1766 return ET_INVALID;
1767
1768 // Disable leading zeroes
1769 if (Suffix.size() > 1 && Suffix[0] == '0')
1770 return ET_INVALID;
1771
1772 return Val.Tgt + Id;
1773 }
1774 }
1775 return ET_INVALID;
1776}
1777
1778bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI) {
1779 switch (Id) {
1780 case ET_NULL:
1781 return !isGFX11Plus(STI);
1782 case ET_POS4:
1783 case ET_PRIM:
1784 return isGFX10Plus(STI);
1785 case ET_DUAL_SRC_BLEND0:
1786 case ET_DUAL_SRC_BLEND1:
1787 return isGFX11Plus(STI);
1788 default:
1789 if (Id >= ET_PARAM0 && Id <= ET_PARAM31)
1790 return !isGFX11Plus(STI);
1791 return true;
1792 }
1793}
1794
1795} // namespace Exp
1796
1797//===----------------------------------------------------------------------===//
1798// MTBUF Format
1799//===----------------------------------------------------------------------===//
1800
1801namespace MTBUFFormat {
1802
1803int64_t getDfmt(const StringRef Name) {
1804 for (int Id = DFMT_MIN; Id <= DFMT_MAX; ++Id) {
1805 if (Name == DfmtSymbolic[Id])
1806 return Id;
1807 }
1808 return DFMT_UNDEF;
1809}
1810
1812 assert(Id <= DFMT_MAX);
1813 return DfmtSymbolic[Id];
1814}
1815
1817 if (isSI(STI) || isCI(STI))
1818 return NfmtSymbolicSICI;
1819 if (isVI(STI) || isGFX9(STI))
1820 return NfmtSymbolicVI;
1821 return NfmtSymbolicGFX10;
1822}
1823
1824int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI) {
1825 auto lookupTable = getNfmtLookupTable(STI);
1826 for (int Id = NFMT_MIN; Id <= NFMT_MAX; ++Id) {
1827 if (Name == lookupTable[Id])
1828 return Id;
1829 }
1830 return NFMT_UNDEF;
1831}
1832
1833StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI) {
1834 assert(Id <= NFMT_MAX);
1835 return getNfmtLookupTable(STI)[Id];
1836}
1837
1838bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI) {
1839 unsigned Dfmt;
1840 unsigned Nfmt;
1841 decodeDfmtNfmt(Id, Dfmt, Nfmt);
1842 return isValidNfmt(Nfmt, STI);
1843}
1844
1845bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI) {
1846 return !getNfmtName(Id, STI).empty();
1847}
1848
1849int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt) {
1850 return (Dfmt << DFMT_SHIFT) | (Nfmt << NFMT_SHIFT);
1851}
1852
1853void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt) {
1854 Dfmt = (Format >> DFMT_SHIFT) & DFMT_MASK;
1855 Nfmt = (Format >> NFMT_SHIFT) & NFMT_MASK;
1856}
1857
1859 if (isGFX11Plus(STI)) {
1860 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
1861 if (Name == UfmtSymbolicGFX11[Id])
1862 return Id;
1863 }
1864 } else {
1865 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
1866 if (Name == UfmtSymbolicGFX10[Id])
1867 return Id;
1868 }
1869 }
1870 return UFMT_UNDEF;
1871}
1872
1874 if(isValidUnifiedFormat(Id, STI))
1875 return isGFX10(STI) ? UfmtSymbolicGFX10[Id] : UfmtSymbolicGFX11[Id];
1876 return "";
1877}
1878
1879bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI) {
1880 return isGFX10(STI) ? Id <= UfmtGFX10::UFMT_LAST : Id <= UfmtGFX11::UFMT_LAST;
1881}
1882
1883int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt,
1884 const MCSubtargetInfo &STI) {
1885 int64_t Fmt = encodeDfmtNfmt(Dfmt, Nfmt);
1886 if (isGFX11Plus(STI)) {
1887 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
1888 if (Fmt == DfmtNfmt2UFmtGFX11[Id])
1889 return Id;
1890 }
1891 } else {
1892 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
1893 if (Fmt == DfmtNfmt2UFmtGFX10[Id])
1894 return Id;
1895 }
1896 }
1897 return UFMT_UNDEF;
1898}
1899
1900bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI) {
1901 return isGFX10Plus(STI) ? (Val <= UFMT_MAX) : (Val <= DFMT_NFMT_MAX);
1902}
1903
1905 if (isGFX10Plus(STI))
1906 return UFMT_DEFAULT;
1907 return DFMT_NFMT_DEFAULT;
1908}
1909
1910} // namespace MTBUFFormat
1911
1912//===----------------------------------------------------------------------===//
1913// SendMsg
1914//===----------------------------------------------------------------------===//
1915
1916namespace SendMsg {
1917
1920}
1921
1922int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI) {
1923 int Idx = getOprIdx<const MCSubtargetInfo &>(Name, Msg, MSG_SIZE, STI);
1924 return (Idx < 0) ? Idx : Msg[Idx].Encoding;
1925}
1926
1927bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI) {
1928 return (MsgId & ~(getMsgIdMask(STI))) == 0;
1929}
1930
1931StringRef getMsgName(int64_t MsgId, const MCSubtargetInfo &STI) {
1932 int Idx = getOprIdx<const MCSubtargetInfo &>(MsgId, Msg, MSG_SIZE, STI);
1933 return (Idx < 0) ? "" : Msg[Idx].Name;
1934}
1935
1936int64_t getMsgOpId(int64_t MsgId, const StringRef Name) {
1937 const char* const *S = (MsgId == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
1938 const int F = (MsgId == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
1939 const int L = (MsgId == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
1940 for (int i = F; i < L; ++i) {
1941 if (Name == S[i]) {
1942 return i;
1943 }
1944 }
1945 return OP_UNKNOWN_;
1946}
1947
1948bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
1949 bool Strict) {
1950 assert(isValidMsgId(MsgId, STI));
1951
1952 if (!Strict)
1953 return 0 <= OpId && isUInt<OP_WIDTH_>(OpId);
1954
1955 if (MsgId == ID_SYSMSG)
1956 return OP_SYS_FIRST_ <= OpId && OpId < OP_SYS_LAST_;
1957 if (!isGFX11Plus(STI)) {
1958 switch (MsgId) {
1959 case ID_GS_PreGFX11:
1960 return (OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_) && OpId != OP_GS_NOP;
1962 return OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_;
1963 }
1964 }
1965 return OpId == OP_NONE_;
1966}
1967
1968StringRef getMsgOpName(int64_t MsgId, int64_t OpId,
1969 const MCSubtargetInfo &STI) {
1970 assert(msgRequiresOp(MsgId, STI));
1971 return (MsgId == ID_SYSMSG)? OpSysSymbolic[OpId] : OpGsSymbolic[OpId];
1972}
1973
1974bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
1975 const MCSubtargetInfo &STI, bool Strict) {
1976 assert(isValidMsgOp(MsgId, OpId, STI, Strict));
1977
1978 if (!Strict)
1979 return 0 <= StreamId && isUInt<STREAM_ID_WIDTH_>(StreamId);
1980
1981 if (!isGFX11Plus(STI)) {
1982 switch (MsgId) {
1983 case ID_GS_PreGFX11:
1986 return (OpId == OP_GS_NOP) ?
1989 }
1990 }
1991 return StreamId == STREAM_ID_NONE_;
1992}
1993
1994bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI) {
1995 return MsgId == ID_SYSMSG ||
1996 (!isGFX11Plus(STI) &&
1997 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11));
1998}
1999
2000bool msgSupportsStream(int64_t MsgId, int64_t OpId,
2001 const MCSubtargetInfo &STI) {
2002 return !isGFX11Plus(STI) &&
2003 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11) &&
2004 OpId != OP_GS_NOP;
2005}
2006
2007void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
2008 uint16_t &StreamId, const MCSubtargetInfo &STI) {
2009 MsgId = Val & getMsgIdMask(STI);
2010 if (isGFX11Plus(STI)) {
2011 OpId = 0;
2012 StreamId = 0;
2013 } else {
2014 OpId = (Val & OP_MASK_) >> OP_SHIFT_;
2016 }
2017}
2018
2020 uint64_t OpId,
2022 return MsgId | (OpId << OP_SHIFT_) | (StreamId << STREAM_ID_SHIFT_);
2023}
2024
2025} // namespace SendMsg
2026
2027//===----------------------------------------------------------------------===//
2028//
2029//===----------------------------------------------------------------------===//
2030
2032 return F.getFnAttributeAsParsedInteger("InitialPSInputAddr", 0);
2033}
2034
2036 // As a safe default always respond as if PS has color exports.
2037 return F.getFnAttributeAsParsedInteger(
2038 "amdgpu-color-export",
2039 F.getCallingConv() == CallingConv::AMDGPU_PS ? 1 : 0) != 0;
2040}
2041
2043 return F.getFnAttributeAsParsedInteger("amdgpu-depth-export", 0) != 0;
2044}
2045
2047 switch(cc) {
2057 return true;
2058 default:
2059 return false;
2060 }
2061}
2062
2064 return isShader(cc) || cc == CallingConv::AMDGPU_Gfx;
2065}
2066
2068 return !isGraphics(cc) || cc == CallingConv::AMDGPU_CS;
2069}
2070
2072 switch (CC) {
2082 return true;
2083 default:
2084 return false;
2085 }
2086}
2087
2089 switch (CC) {
2091 return true;
2092 default:
2093 return isEntryFunctionCC(CC) || isChainCC(CC);
2094 }
2095}
2096
2098 switch (CC) {
2101 return true;
2102 default:
2103 return false;
2104 }
2105}
2106
2107bool isKernelCC(const Function *Func) {
2108 return AMDGPU::isModuleEntryFunctionCC(Func->getCallingConv());
2109}
2110
2111bool hasXNACK(const MCSubtargetInfo &STI) {
2112 return STI.hasFeature(AMDGPU::FeatureXNACK);
2113}
2114
2115bool hasSRAMECC(const MCSubtargetInfo &STI) {
2116 return STI.hasFeature(AMDGPU::FeatureSRAMECC);
2117}
2118
2120 return STI.hasFeature(AMDGPU::FeatureMIMG_R128) && !STI.hasFeature(AMDGPU::FeatureR128A16);
2121}
2122
2123bool hasA16(const MCSubtargetInfo &STI) {
2124 return STI.hasFeature(AMDGPU::FeatureA16);
2125}
2126
2127bool hasG16(const MCSubtargetInfo &STI) {
2128 return STI.hasFeature(AMDGPU::FeatureG16);
2129}
2130
2132 return !STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) && !isCI(STI) &&
2133 !isSI(STI);
2134}
2135
2136bool hasGDS(const MCSubtargetInfo &STI) {
2137 return STI.hasFeature(AMDGPU::FeatureGDS);
2138}
2139
2140unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler) {
2141 auto Version = getIsaVersion(STI.getCPU());
2142 if (Version.Major == 10)
2143 return Version.Minor >= 3 ? 13 : 5;
2144 if (Version.Major == 11)
2145 return 5;
2146 if (Version.Major >= 12)
2147 return HasSampler ? 4 : 5;
2148 return 0;
2149}
2150
2151unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI) { return 16; }
2152
2153bool isSI(const MCSubtargetInfo &STI) {
2154 return STI.hasFeature(AMDGPU::FeatureSouthernIslands);
2155}
2156
2157bool isCI(const MCSubtargetInfo &STI) {
2158 return STI.hasFeature(AMDGPU::FeatureSeaIslands);
2159}
2160
2161bool isVI(const MCSubtargetInfo &STI) {
2162 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
2163}
2164
2165bool isGFX9(const MCSubtargetInfo &STI) {
2166 return STI.hasFeature(AMDGPU::FeatureGFX9);
2167}
2168
2170 return isGFX9(STI) || isGFX10(STI);
2171}
2172
2174 return isGFX9(STI) || isGFX10(STI) || isGFX11(STI);
2175}
2176
2178 return isVI(STI) || isGFX9(STI) || isGFX10(STI);
2179}
2180
2181bool isGFX8Plus(const MCSubtargetInfo &STI) {
2182 return isVI(STI) || isGFX9Plus(STI);
2183}
2184
2185bool isGFX9Plus(const MCSubtargetInfo &STI) {
2186 return isGFX9(STI) || isGFX10Plus(STI);
2187}
2188
2189bool isGFX10(const MCSubtargetInfo &STI) {
2190 return STI.hasFeature(AMDGPU::FeatureGFX10);
2191}
2192
2194 return isGFX10(STI) || isGFX11(STI);
2195}
2196
2198 return isGFX10(STI) || isGFX11Plus(STI);
2199}
2200
2201bool isGFX11(const MCSubtargetInfo &STI) {
2202 return STI.hasFeature(AMDGPU::FeatureGFX11);
2203}
2204
2206 return isGFX11(STI) || isGFX12Plus(STI);
2207}
2208
2209bool isGFX12(const MCSubtargetInfo &STI) {
2210 return STI.getFeatureBits()[AMDGPU::FeatureGFX12];
2211}
2212
2213bool isGFX12Plus(const MCSubtargetInfo &STI) { return isGFX12(STI); }
2214
2215bool isNotGFX12Plus(const MCSubtargetInfo &STI) { return !isGFX12Plus(STI); }
2216
2218 return !isGFX11Plus(STI);
2219}
2220
2222 return isSI(STI) || isCI(STI) || isVI(STI) || isGFX9(STI);
2223}
2224
2226 return isGFX10(STI) && !AMDGPU::isGFX10_BEncoding(STI);
2227}
2228
2230 return STI.hasFeature(AMDGPU::FeatureGCN3Encoding);
2231}
2232
2234 return STI.hasFeature(AMDGPU::FeatureGFX10_AEncoding);
2235}
2236
2238 return STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding);
2239}
2240
2242 return STI.hasFeature(AMDGPU::FeatureGFX10_3Insts);
2243}
2244
2246 return isGFX10_BEncoding(STI) && !isGFX12Plus(STI);
2247}
2248
2249bool isGFX90A(const MCSubtargetInfo &STI) {
2250 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
2251}
2252
2253bool isGFX940(const MCSubtargetInfo &STI) {
2254 return STI.hasFeature(AMDGPU::FeatureGFX940Insts);
2255}
2256
2258 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
2259}
2260
2262 return STI.hasFeature(AMDGPU::FeatureMAIInsts);
2263}
2264
2265bool hasVOPD(const MCSubtargetInfo &STI) {
2266 return STI.hasFeature(AMDGPU::FeatureVOPD);
2267}
2268
2270 return STI.hasFeature(AMDGPU::FeatureDPPSrc1SGPR);
2271}
2272
2274 return STI.hasFeature(AMDGPU::FeatureKernargPreload);
2275}
2276
2277int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR,
2278 int32_t ArgNumVGPR) {
2279 if (has90AInsts && ArgNumAGPR)
2280 return alignTo(ArgNumVGPR, 4) + ArgNumAGPR;
2281 return std::max(ArgNumVGPR, ArgNumAGPR);
2282}
2283
2284bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) {
2285 const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
2286 const unsigned FirstSubReg = TRI->getSubReg(Reg, AMDGPU::sub0);
2287 return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
2288 Reg == AMDGPU::SCC;
2289}
2290
2291bool isHi(unsigned Reg, const MCRegisterInfo &MRI) {
2292 return MRI.getEncodingValue(Reg) & AMDGPU::HWEncoding::IS_HI;
2293}
2294
2295#define MAP_REG2REG \
2296 using namespace AMDGPU; \
2297 switch(Reg) { \
2298 default: return Reg; \
2299 CASE_CI_VI(FLAT_SCR) \
2300 CASE_CI_VI(FLAT_SCR_LO) \
2301 CASE_CI_VI(FLAT_SCR_HI) \
2302 CASE_VI_GFX9PLUS(TTMP0) \
2303 CASE_VI_GFX9PLUS(TTMP1) \
2304 CASE_VI_GFX9PLUS(TTMP2) \
2305 CASE_VI_GFX9PLUS(TTMP3) \
2306 CASE_VI_GFX9PLUS(TTMP4) \
2307 CASE_VI_GFX9PLUS(TTMP5) \
2308 CASE_VI_GFX9PLUS(TTMP6) \
2309 CASE_VI_GFX9PLUS(TTMP7) \
2310 CASE_VI_GFX9PLUS(TTMP8) \
2311 CASE_VI_GFX9PLUS(TTMP9) \
2312 CASE_VI_GFX9PLUS(TTMP10) \
2313 CASE_VI_GFX9PLUS(TTMP11) \
2314 CASE_VI_GFX9PLUS(TTMP12) \
2315 CASE_VI_GFX9PLUS(TTMP13) \
2316 CASE_VI_GFX9PLUS(TTMP14) \
2317 CASE_VI_GFX9PLUS(TTMP15) \
2318 CASE_VI_GFX9PLUS(TTMP0_TTMP1) \
2319 CASE_VI_GFX9PLUS(TTMP2_TTMP3) \
2320 CASE_VI_GFX9PLUS(TTMP4_TTMP5) \
2321 CASE_VI_GFX9PLUS(TTMP6_TTMP7) \
2322 CASE_VI_GFX9PLUS(TTMP8_TTMP9) \
2323 CASE_VI_GFX9PLUS(TTMP10_TTMP11) \
2324 CASE_VI_GFX9PLUS(TTMP12_TTMP13) \
2325 CASE_VI_GFX9PLUS(TTMP14_TTMP15) \
2326 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \
2327 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \
2328 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \
2329 CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \
2330 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
2331 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
2332 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2333 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2334 CASE_GFXPRE11_GFX11PLUS(M0) \
2335 CASE_GFXPRE11_GFX11PLUS(SGPR_NULL) \
2336 CASE_GFXPRE11_GFX11PLUS_TO(SGPR_NULL64, SGPR_NULL) \
2337 }
2338
2339#define CASE_CI_VI(node) \
2340 assert(!isSI(STI)); \
2341 case node: return isCI(STI) ? node##_ci : node##_vi;
2342
2343#define CASE_VI_GFX9PLUS(node) \
2344 case node: return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi;
2345
2346#define CASE_GFXPRE11_GFX11PLUS(node) \
2347 case node: return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11;
2348
2349#define CASE_GFXPRE11_GFX11PLUS_TO(node, result) \
2350 case node: return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11;
2351
2352unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
2353 if (STI.getTargetTriple().getArch() == Triple::r600)
2354 return Reg;
2356}
2357
2358#undef CASE_CI_VI
2359#undef CASE_VI_GFX9PLUS
2360#undef CASE_GFXPRE11_GFX11PLUS
2361#undef CASE_GFXPRE11_GFX11PLUS_TO
2362
2363#define CASE_CI_VI(node) case node##_ci: case node##_vi: return node;
2364#define CASE_VI_GFX9PLUS(node) case node##_vi: case node##_gfx9plus: return node;
2365#define CASE_GFXPRE11_GFX11PLUS(node) case node##_gfx11plus: case node##_gfxpre11: return node;
2366#define CASE_GFXPRE11_GFX11PLUS_TO(node, result)
2367
2368unsigned mc2PseudoReg(unsigned Reg) {
2370}
2371
2372bool isInlineValue(unsigned Reg) {
2373 switch (Reg) {
2374 case AMDGPU::SRC_SHARED_BASE_LO:
2375 case AMDGPU::SRC_SHARED_BASE:
2376 case AMDGPU::SRC_SHARED_LIMIT_LO:
2377 case AMDGPU::SRC_SHARED_LIMIT:
2378 case AMDGPU::SRC_PRIVATE_BASE_LO:
2379 case AMDGPU::SRC_PRIVATE_BASE:
2380 case AMDGPU::SRC_PRIVATE_LIMIT_LO:
2381 case AMDGPU::SRC_PRIVATE_LIMIT:
2382 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2383 return true;
2384 case AMDGPU::SRC_VCCZ:
2385 case AMDGPU::SRC_EXECZ:
2386 case AMDGPU::SRC_SCC:
2387 return true;
2388 case AMDGPU::SGPR_NULL:
2389 return true;
2390 default:
2391 return false;
2392 }
2393}
2394
2395#undef CASE_CI_VI
2396#undef CASE_VI_GFX9PLUS
2397#undef CASE_GFXPRE11_GFX11PLUS
2398#undef CASE_GFXPRE11_GFX11PLUS_TO
2399#undef MAP_REG2REG
2400
2401bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2402 assert(OpNo < Desc.NumOperands);
2403 unsigned OpType = Desc.operands()[OpNo].OperandType;
2404 return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
2405 OpType <= AMDGPU::OPERAND_SRC_LAST;
2406}
2407
2408bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2409 assert(OpNo < Desc.NumOperands);
2410 unsigned OpType = Desc.operands()[OpNo].OperandType;
2411 return OpType >= AMDGPU::OPERAND_KIMM_FIRST &&
2412 OpType <= AMDGPU::OPERAND_KIMM_LAST;
2413}
2414
2415bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2416 assert(OpNo < Desc.NumOperands);
2417 unsigned OpType = Desc.operands()[OpNo].OperandType;
2418 switch (OpType) {
2435 return true;
2436 default:
2437 return false;
2438 }
2439}
2440
2441bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2442 assert(OpNo < Desc.NumOperands);
2443 unsigned OpType = Desc.operands()[OpNo].OperandType;
2444 return (OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
2448}
2449
2450// Avoid using MCRegisterClass::getSize, since that function will go away
2451// (move from MC* level to Target* level). Return size in bits.
2452unsigned getRegBitWidth(unsigned RCID) {
2453 switch (RCID) {
2454 case AMDGPU::SGPR_LO16RegClassID:
2455 case AMDGPU::AGPR_LO16RegClassID:
2456 return 16;
2457 case AMDGPU::SGPR_32RegClassID:
2458 case AMDGPU::VGPR_32RegClassID:
2459 case AMDGPU::VRegOrLds_32RegClassID:
2460 case AMDGPU::AGPR_32RegClassID:
2461 case AMDGPU::VS_32RegClassID:
2462 case AMDGPU::AV_32RegClassID:
2463 case AMDGPU::SReg_32RegClassID:
2464 case AMDGPU::SReg_32_XM0RegClassID:
2465 case AMDGPU::SRegOrLds_32RegClassID:
2466 return 32;
2467 case AMDGPU::SGPR_64RegClassID:
2468 case AMDGPU::VS_64RegClassID:
2469 case AMDGPU::SReg_64RegClassID:
2470 case AMDGPU::VReg_64RegClassID:
2471 case AMDGPU::AReg_64RegClassID:
2472 case AMDGPU::SReg_64_XEXECRegClassID:
2473 case AMDGPU::VReg_64_Align2RegClassID:
2474 case AMDGPU::AReg_64_Align2RegClassID:
2475 case AMDGPU::AV_64RegClassID:
2476 case AMDGPU::AV_64_Align2RegClassID:
2477 return 64;
2478 case AMDGPU::SGPR_96RegClassID:
2479 case AMDGPU::SReg_96RegClassID:
2480 case AMDGPU::VReg_96RegClassID:
2481 case AMDGPU::AReg_96RegClassID:
2482 case AMDGPU::VReg_96_Align2RegClassID:
2483 case AMDGPU::AReg_96_Align2RegClassID:
2484 case AMDGPU::AV_96RegClassID:
2485 case AMDGPU::AV_96_Align2RegClassID:
2486 return 96;
2487 case AMDGPU::SGPR_128RegClassID:
2488 case AMDGPU::SReg_128RegClassID:
2489 case AMDGPU::VReg_128RegClassID:
2490 case AMDGPU::AReg_128RegClassID:
2491 case AMDGPU::VReg_128_Align2RegClassID:
2492 case AMDGPU::AReg_128_Align2RegClassID:
2493 case AMDGPU::AV_128RegClassID:
2494 case AMDGPU::AV_128_Align2RegClassID:
2495 return 128;
2496 case AMDGPU::SGPR_160RegClassID:
2497 case AMDGPU::SReg_160RegClassID:
2498 case AMDGPU::VReg_160RegClassID:
2499 case AMDGPU::AReg_160RegClassID:
2500 case AMDGPU::VReg_160_Align2RegClassID:
2501 case AMDGPU::AReg_160_Align2RegClassID:
2502 case AMDGPU::AV_160RegClassID:
2503 case AMDGPU::AV_160_Align2RegClassID:
2504 return 160;
2505 case AMDGPU::SGPR_192RegClassID:
2506 case AMDGPU::SReg_192RegClassID:
2507 case AMDGPU::VReg_192RegClassID:
2508 case AMDGPU::AReg_192RegClassID:
2509 case AMDGPU::VReg_192_Align2RegClassID:
2510 case AMDGPU::AReg_192_Align2RegClassID:
2511 case AMDGPU::AV_192RegClassID:
2512 case AMDGPU::AV_192_Align2RegClassID:
2513 return 192;
2514 case AMDGPU::SGPR_224RegClassID:
2515 case AMDGPU::SReg_224RegClassID:
2516 case AMDGPU::VReg_224RegClassID:
2517 case AMDGPU::AReg_224RegClassID:
2518 case AMDGPU::VReg_224_Align2RegClassID:
2519 case AMDGPU::AReg_224_Align2RegClassID:
2520 case AMDGPU::AV_224RegClassID:
2521 case AMDGPU::AV_224_Align2RegClassID:
2522 return 224;
2523 case AMDGPU::SGPR_256RegClassID:
2524 case AMDGPU::SReg_256RegClassID:
2525 case AMDGPU::VReg_256RegClassID:
2526 case AMDGPU::AReg_256RegClassID:
2527 case AMDGPU::VReg_256_Align2RegClassID:
2528 case AMDGPU::AReg_256_Align2RegClassID:
2529 case AMDGPU::AV_256RegClassID:
2530 case AMDGPU::AV_256_Align2RegClassID:
2531 return 256;
2532 case AMDGPU::SGPR_288RegClassID:
2533 case AMDGPU::SReg_288RegClassID:
2534 case AMDGPU::VReg_288RegClassID:
2535 case AMDGPU::AReg_288RegClassID:
2536 case AMDGPU::VReg_288_Align2RegClassID:
2537 case AMDGPU::AReg_288_Align2RegClassID:
2538 case AMDGPU::AV_288RegClassID:
2539 case AMDGPU::AV_288_Align2RegClassID:
2540 return 288;
2541 case AMDGPU::SGPR_320RegClassID:
2542 case AMDGPU::SReg_320RegClassID:
2543 case AMDGPU::VReg_320RegClassID:
2544 case AMDGPU::AReg_320RegClassID:
2545 case AMDGPU::VReg_320_Align2RegClassID:
2546 case AMDGPU::AReg_320_Align2RegClassID:
2547 case AMDGPU::AV_320RegClassID:
2548 case AMDGPU::AV_320_Align2RegClassID:
2549 return 320;
2550 case AMDGPU::SGPR_352RegClassID:
2551 case AMDGPU::SReg_352RegClassID:
2552 case AMDGPU::VReg_352RegClassID:
2553 case AMDGPU::AReg_352RegClassID:
2554 case AMDGPU::VReg_352_Align2RegClassID:
2555 case AMDGPU::AReg_352_Align2RegClassID:
2556 case AMDGPU::AV_352RegClassID:
2557 case AMDGPU::AV_352_Align2RegClassID:
2558 return 352;
2559 case AMDGPU::SGPR_384RegClassID:
2560 case AMDGPU::SReg_384RegClassID:
2561 case AMDGPU::VReg_384RegClassID:
2562 case AMDGPU::AReg_384RegClassID:
2563 case AMDGPU::VReg_384_Align2RegClassID:
2564 case AMDGPU::AReg_384_Align2RegClassID:
2565 case AMDGPU::AV_384RegClassID:
2566 case AMDGPU::AV_384_Align2RegClassID:
2567 return 384;
2568 case AMDGPU::SGPR_512RegClassID:
2569 case AMDGPU::SReg_512RegClassID:
2570 case AMDGPU::VReg_512RegClassID:
2571 case AMDGPU::AReg_512RegClassID:
2572 case AMDGPU::VReg_512_Align2RegClassID:
2573 case AMDGPU::AReg_512_Align2RegClassID:
2574 case AMDGPU::AV_512RegClassID:
2575 case AMDGPU::AV_512_Align2RegClassID:
2576 return 512;
2577 case AMDGPU::SGPR_1024RegClassID:
2578 case AMDGPU::SReg_1024RegClassID:
2579 case AMDGPU::VReg_1024RegClassID:
2580 case AMDGPU::AReg_1024RegClassID:
2581 case AMDGPU::VReg_1024_Align2RegClassID:
2582 case AMDGPU::AReg_1024_Align2RegClassID:
2583 case AMDGPU::AV_1024RegClassID:
2584 case AMDGPU::AV_1024_Align2RegClassID:
2585 return 1024;
2586 default:
2587 llvm_unreachable("Unexpected register class");
2588 }
2589}
2590
2591unsigned getRegBitWidth(const MCRegisterClass &RC) {
2592 return getRegBitWidth(RC.getID());
2593}
2594
2596 unsigned OpNo) {
2597 assert(OpNo < Desc.NumOperands);
2598 unsigned RCID = Desc.operands()[OpNo].RegClass;
2599 return getRegBitWidth(RCID) / 8;
2600}
2601
2602bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
2604 return true;
2605
2606 uint64_t Val = static_cast<uint64_t>(Literal);
2607 return (Val == llvm::bit_cast<uint64_t>(0.0)) ||
2608 (Val == llvm::bit_cast<uint64_t>(1.0)) ||
2609 (Val == llvm::bit_cast<uint64_t>(-1.0)) ||
2610 (Val == llvm::bit_cast<uint64_t>(0.5)) ||
2611 (Val == llvm::bit_cast<uint64_t>(-0.5)) ||
2612 (Val == llvm::bit_cast<uint64_t>(2.0)) ||
2613 (Val == llvm::bit_cast<uint64_t>(-2.0)) ||
2614 (Val == llvm::bit_cast<uint64_t>(4.0)) ||
2615 (Val == llvm::bit_cast<uint64_t>(-4.0)) ||
2616 (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
2617}
2618
2619bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
2621 return true;
2622
2623 // The actual type of the operand does not seem to matter as long
2624 // as the bits match one of the inline immediate values. For example:
2625 //
2626 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
2627 // so it is a legal inline immediate.
2628 //
2629 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
2630 // floating-point, so it is a legal inline immediate.
2631
2632 uint32_t Val = static_cast<uint32_t>(Literal);
2633 return (Val == llvm::bit_cast<uint32_t>(0.0f)) ||
2634 (Val == llvm::bit_cast<uint32_t>(1.0f)) ||
2635 (Val == llvm::bit_cast<uint32_t>(-1.0f)) ||
2636 (Val == llvm::bit_cast<uint32_t>(0.5f)) ||
2637 (Val == llvm::bit_cast<uint32_t>(-0.5f)) ||
2638 (Val == llvm::bit_cast<uint32_t>(2.0f)) ||
2639 (Val == llvm::bit_cast<uint32_t>(-2.0f)) ||
2640 (Val == llvm::bit_cast<uint32_t>(4.0f)) ||
2641 (Val == llvm::bit_cast<uint32_t>(-4.0f)) ||
2642 (Val == 0x3e22f983 && HasInv2Pi);
2643}
2644
2645bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi) {
2646 if (!HasInv2Pi)
2647 return false;
2649 return true;
2650 uint16_t Val = static_cast<uint16_t>(Literal);
2651 return Val == 0x3F00 || // 0.5
2652 Val == 0xBF00 || // -0.5
2653 Val == 0x3F80 || // 1.0
2654 Val == 0xBF80 || // -1.0
2655 Val == 0x4000 || // 2.0
2656 Val == 0xC000 || // -2.0
2657 Val == 0x4080 || // 4.0
2658 Val == 0xC080 || // -4.0
2659 Val == 0x3E22; // 1.0 / (2.0 * pi)
2660}
2661
2662bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi) {
2663 return isInlinableLiteral32(Literal, HasInv2Pi);
2664}
2665
2666bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi) {
2667 if (!HasInv2Pi)
2668 return false;
2670 return true;
2671 uint16_t Val = static_cast<uint16_t>(Literal);
2672 return Val == 0x3C00 || // 1.0
2673 Val == 0xBC00 || // -1.0
2674 Val == 0x3800 || // 0.5
2675 Val == 0xB800 || // -0.5
2676 Val == 0x4000 || // 2.0
2677 Val == 0xC000 || // -2.0
2678 Val == 0x4400 || // 4.0
2679 Val == 0xC400 || // -4.0
2680 Val == 0x3118; // 1/2pi
2681}
2682
2683std::optional<unsigned> getInlineEncodingV216(bool IsFloat, uint32_t Literal) {
2684 // Unfortunately, the Instruction Set Architecture Reference Guide is
2685 // misleading about how the inline operands work for (packed) 16-bit
2686 // instructions. In a nutshell, the actual HW behavior is:
2687 //
2688 // - integer encodings (-16 .. 64) are always produced as sign-extended
2689 // 32-bit values
2690 // - float encodings are produced as:
2691 // - for F16 instructions: corresponding half-precision float values in
2692 // the LSBs, 0 in the MSBs
2693 // - for UI16 instructions: corresponding single-precision float value
2694 int32_t Signed = static_cast<int32_t>(Literal);
2695 if (Signed >= 0 && Signed <= 64)
2696 return 128 + Signed;
2697
2698 if (Signed >= -16 && Signed <= -1)
2699 return 192 + std::abs(Signed);
2700
2701 if (IsFloat) {
2702 // clang-format off
2703 switch (Literal) {
2704 case 0x3800: return 240; // 0.5
2705 case 0xB800: return 241; // -0.5
2706 case 0x3C00: return 242; // 1.0
2707 case 0xBC00: return 243; // -1.0
2708 case 0x4000: return 244; // 2.0
2709 case 0xC000: return 245; // -2.0
2710 case 0x4400: return 246; // 4.0
2711 case 0xC400: return 247; // -4.0
2712 case 0x3118: return 248; // 1.0 / (2.0 * pi)
2713 default: break;
2714 }
2715 // clang-format on
2716 } else {
2717 // clang-format off
2718 switch (Literal) {
2719 case 0x3F000000: return 240; // 0.5
2720 case 0xBF000000: return 241; // -0.5
2721 case 0x3F800000: return 242; // 1.0
2722 case 0xBF800000: return 243; // -1.0
2723 case 0x40000000: return 244; // 2.0
2724 case 0xC0000000: return 245; // -2.0
2725 case 0x40800000: return 246; // 4.0
2726 case 0xC0800000: return 247; // -4.0
2727 case 0x3E22F983: return 248; // 1.0 / (2.0 * pi)
2728 default: break;
2729 }
2730 // clang-format on
2731 }
2732
2733 return {};
2734}
2735
2736// Encoding of the literal as an inline constant for a V_PK_*_IU16 instruction
2737// or nullopt.
2738std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal) {
2739 return getInlineEncodingV216(false, Literal);
2740}
2741
2742// Encoding of the literal as an inline constant for a V_PK_*_BF16 instruction
2743// or nullopt.
2744std::optional<unsigned> getInlineEncodingV2BF16(uint32_t Literal) {
2745 int32_t Signed = static_cast<int32_t>(Literal);
2746 if (Signed >= 0 && Signed <= 64)
2747 return 128 + Signed;
2748
2749 if (Signed >= -16 && Signed <= -1)
2750 return 192 + std::abs(Signed);
2751
2752 // clang-format off
2753 switch (Literal) {
2754 case 0x3F00: return 240; // 0.5
2755 case 0xBF00: return 241; // -0.5
2756 case 0x3F80: return 242; // 1.0
2757 case 0xBF80: return 243; // -1.0
2758 case 0x4000: return 244; // 2.0
2759 case 0xC000: return 245; // -2.0
2760 case 0x4080: return 246; // 4.0
2761 case 0xC080: return 247; // -4.0
2762 case 0x3E22: return 248; // 1.0 / (2.0 * pi)
2763 default: break;
2764 }
2765 // clang-format on
2766
2767 return std::nullopt;
2768}
2769
2770// Encoding of the literal as an inline constant for a V_PK_*_F16 instruction
2771// or nullopt.
2772std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal) {
2773 return getInlineEncodingV216(true, Literal);
2774}
2775
2776// Whether the given literal can be inlined for a V_PK_* instruction.
2778 switch (OpType) {
2782 return getInlineEncodingV216(false, Literal).has_value();
2786 return getInlineEncodingV216(true, Literal).has_value();
2791 default:
2792 llvm_unreachable("bad packed operand type");
2793 }
2794}
2795
2796// Whether the given literal can be inlined for a V_PK_*_IU16 instruction.
2798 return getInlineEncodingV2I16(Literal).has_value();
2799}
2800
2801// Whether the given literal can be inlined for a V_PK_*_BF16 instruction.
2803 return getInlineEncodingV2BF16(Literal).has_value();
2804}
2805
2806// Whether the given literal can be inlined for a V_PK_*_F16 instruction.
2808 return getInlineEncodingV2F16(Literal).has_value();
2809}
2810
2811bool isValid32BitLiteral(uint64_t Val, bool IsFP64) {
2812 if (IsFP64)
2813 return !(Val & 0xffffffffu);
2814
2815 return isUInt<32>(Val) || isInt<32>(Val);
2816}
2817
2819 const Function *F = A->getParent();
2820
2821 // Arguments to compute shaders are never a source of divergence.
2822 CallingConv::ID CC = F->getCallingConv();
2823 switch (CC) {
2826 return true;
2837 // For non-compute shaders, SGPR inputs are marked with either inreg or
2838 // byval. Everything else is in VGPRs.
2839 return A->hasAttribute(Attribute::InReg) ||
2840 A->hasAttribute(Attribute::ByVal);
2841 default:
2842 // TODO: treat i1 as divergent?
2843 return A->hasAttribute(Attribute::InReg);
2844 }
2845}
2846
2847bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo) {
2848 // Arguments to compute shaders are never a source of divergence.
2850 switch (CC) {
2853 return true;
2864 // For non-compute shaders, SGPR inputs are marked with either inreg or
2865 // byval. Everything else is in VGPRs.
2866 return CB->paramHasAttr(ArgNo, Attribute::InReg) ||
2867 CB->paramHasAttr(ArgNo, Attribute::ByVal);
2868 default:
2869 return CB->paramHasAttr(ArgNo, Attribute::InReg);
2870 }
2871}
2872
2873static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) {
2874 return isGCN3Encoding(ST) || isGFX10Plus(ST);
2875}
2876
2878 return isGFX9Plus(ST);
2879}
2880
2882 int64_t EncodedOffset) {
2883 if (isGFX12Plus(ST))
2884 return isUInt<23>(EncodedOffset);
2885
2886 return hasSMEMByteOffset(ST) ? isUInt<20>(EncodedOffset)
2887 : isUInt<8>(EncodedOffset);
2888}
2889
2891 int64_t EncodedOffset,
2892 bool IsBuffer) {
2893 if (isGFX12Plus(ST))
2894 return isInt<24>(EncodedOffset);
2895
2896 return !IsBuffer &&
2898 isInt<21>(EncodedOffset);
2899}
2900
2901static bool isDwordAligned(uint64_t ByteOffset) {
2902 return (ByteOffset & 3) == 0;
2903}
2904
2906 uint64_t ByteOffset) {
2907 if (hasSMEMByteOffset(ST))
2908 return ByteOffset;
2909
2910 assert(isDwordAligned(ByteOffset));
2911 return ByteOffset >> 2;
2912}
2913
2914std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
2915 int64_t ByteOffset, bool IsBuffer) {
2916 if (isGFX12Plus(ST)) // 24 bit signed offsets
2917 return isInt<24>(ByteOffset) ? std::optional<int64_t>(ByteOffset)
2918 : std::nullopt;
2919
2920 // The signed version is always a byte offset.
2921 if (!IsBuffer && hasSMRDSignedImmOffset(ST)) {
2923 return isInt<20>(ByteOffset) ? std::optional<int64_t>(ByteOffset)
2924 : std::nullopt;
2925 }
2926
2927 if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST))
2928 return std::nullopt;
2929
2930 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
2931 return isLegalSMRDEncodedUnsignedOffset(ST, EncodedOffset)
2932 ? std::optional<int64_t>(EncodedOffset)
2933 : std::nullopt;
2934}
2935
2936std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
2937 int64_t ByteOffset) {
2938 if (!isCI(ST) || !isDwordAligned(ByteOffset))
2939 return std::nullopt;
2940
2941 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
2942 return isUInt<32>(EncodedOffset) ? std::optional<int64_t>(EncodedOffset)
2943 : std::nullopt;
2944}
2945
2947 if (AMDGPU::isGFX10(ST))
2948 return 12;
2949
2950 if (AMDGPU::isGFX12(ST))
2951 return 24;
2952 return 13;
2953}
2954
2955namespace {
2956
2957struct SourceOfDivergence {
2958 unsigned Intr;
2959};
2960const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
2961
2962struct AlwaysUniform {
2963 unsigned Intr;
2964};
2965const AlwaysUniform *lookupAlwaysUniform(unsigned Intr);
2966
2967#define GET_SourcesOfDivergence_IMPL
2968#define GET_UniformIntrinsics_IMPL
2969#define GET_Gfx9BufferFormat_IMPL
2970#define GET_Gfx10BufferFormat_IMPL
2971#define GET_Gfx11PlusBufferFormat_IMPL
2972#include "AMDGPUGenSearchableTables.inc"
2973
2974} // end anonymous namespace
2975
2976bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
2977 return lookupSourceOfDivergence(IntrID);
2978}
2979
2980bool isIntrinsicAlwaysUniform(unsigned IntrID) {
2981 return lookupAlwaysUniform(IntrID);
2982}
2983
2985 uint8_t NumComponents,
2986 uint8_t NumFormat,
2987 const MCSubtargetInfo &STI) {
2988 return isGFX11Plus(STI)
2989 ? getGfx11PlusBufferFormatInfo(BitsPerComp, NumComponents,
2990 NumFormat)
2991 : isGFX10(STI) ? getGfx10BufferFormatInfo(BitsPerComp,
2992 NumComponents, NumFormat)
2993 : getGfx9BufferFormatInfo(BitsPerComp,
2994 NumComponents, NumFormat);
2995}
2996
2998 const MCSubtargetInfo &STI) {
2999 return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(Format)
3000 : isGFX10(STI) ? getGfx10BufferFormatInfo(Format)
3001 : getGfx9BufferFormatInfo(Format);
3002}
3003
3005 for (auto OpName : { OpName::vdst, OpName::src0, OpName::src1,
3006 OpName::src2 }) {
3007 int Idx = getNamedOperandIdx(OpDesc.getOpcode(), OpName);
3008 if (Idx == -1)
3009 continue;
3010
3011 if (OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64RegClassID ||
3012 OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64_Align2RegClassID)
3013 return true;
3014 }
3015
3016 return false;
3017}
3018
3019bool isDPALU_DPP(const MCInstrDesc &OpDesc) {
3020 return hasAny64BitVGPROperands(OpDesc);
3021}
3022
3024 // Currently this is 128 for all subtargets
3025 return 128;
3026}
3027
3028} // namespace AMDGPU
3029
3032 switch (S) {
3034 OS << "Unsupported";
3035 break;
3037 OS << "Any";
3038 break;
3040 OS << "Off";
3041 break;
3043 OS << "On";
3044 break;
3045 }
3046 return OS;
3047}
3048
3049} // namespace llvm
unsigned const MachineRegisterInfo * MRI
#define MAP_REG2REG
unsigned Intr
static llvm::cl::opt< unsigned > DefaultAMDHSACodeObjectVersion("amdhsa-code-object-version", llvm::cl::Hidden, llvm::cl::init(llvm::AMDGPU::AMDHSA_COV5), llvm::cl::desc("Set default AMDHSA Code Object Version (module flag " "or asm directive still take priority if present)"))
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
std::string Name
uint64_t Size
#define F(x, y, z)
Definition: MD5.cpp:55
unsigned const TargetRegisterInfo * TRI
unsigned Reg
LLVMContext & Context
const SmallVectorImpl< MachineOperand > & Cond
#define S_00B848_MEM_ORDERED(x)
Definition: SIDefines.h:1153
#define S_00B848_WGP_MODE(x)
Definition: SIDefines.h:1150
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file contains some functions that are useful when dealing with strings.
void setTargetIDFromFeaturesString(StringRef FS)
TargetIDSetting getXnackSetting() const
AMDGPUTargetID(const MCSubtargetInfo &STI)
void setTargetIDFromTargetIDStream(StringRef TargetID)
TargetIDSetting getSramEccSetting() const
unsigned getIndexInParsedOperands(unsigned CompOprIdx) const
unsigned getIndexOfDstInParsedOperands() const
unsigned getIndexOfSrcInParsedOperands(unsigned CompSrcIdx) const
unsigned getCompParsedSrcOperandsNum() const
std::optional< unsigned > getInvalidCompOperandIndex(std::function< unsigned(unsigned, unsigned)> GetRegIdx, bool SkipSrc=false) const
std::array< unsigned, Component::MAX_OPR_NUM > RegIndices
Definition: Any.h:28
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1461
CallingConv::ID getCallingConv() const
Definition: InstrTypes.h:1767
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
This class represents an Operation in the Expression.
Encoding
Size and signedness of expression operations' operands.
constexpr bool test(unsigned I) const
unsigned getAddressSpace() const
Definition: GlobalValue.h:205
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void emitError(uint64_t LocCookie, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:237
ArrayRef< MCOperandInfo > operands() const
Definition: MCInstrDesc.h:239
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:248
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
Definition: MCInstrDesc.h:219
unsigned getOpcode() const
Return the opcode number for this descriptor.
Definition: MCInstrDesc.h:230
Interface to description of machine instruction set.
Definition: MCInstrInfo.h:26
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
MCRegisterClass - Base class of TargetRegisterClass.
unsigned getID() const
getID() - Return the register class ID number.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Generic base class for all target subtargets.
bool hasFeature(unsigned Feature) const
const Triple & getTargetTriple() const
const FeatureBitset & getFeatureBits() const
StringRef getCPU() const
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition: StringRef.h:849
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:696
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:466
std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:222
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition: StringRef.h:271
Manages the enabling and disabling of subtarget specific features.
const std::vector< std::string > & getFeatures() const
Returns the vector of individual subtarget features.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
OSType getOS() const
Get the parsed operating system type of this triple.
Definition: Triple.h:370
ArchType getArch() const
Get the parsed architecture type of this triple.
Definition: Triple.h:361
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:660
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val, bool &IsDefault, const MCSubtargetInfo &STI)
unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst)
unsigned decodeFieldSaSdst(unsigned Encoded)
unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc)
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
const CustomOperandVal DepCtrInfo[]
bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal, const MCSubtargetInfo &STI)
unsigned decodeFieldVaVdst(unsigned Encoded)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
unsigned decodeFieldVmVsrc(unsigned Encoded)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
static constexpr ExpTgt ExpTgtInfo[]
bool getTgtName(unsigned Id, StringRef &Name, int &Index)
unsigned getTgtId(const StringRef Name)
constexpr uint32_t VersionMajor
HSA metadata major version.
const CustomOperand< const MCSubtargetInfo & > Opr[]
int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI)
StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI)
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getWavefrontSize(const MCSubtargetInfo *STI)
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)
unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getEUsPerCU(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
static TargetIDSetting getTargetIDSettingFromFeatureString(StringRef FeatureString)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs)
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI, unsigned NumVGPRs)
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, std::optional< bool > EnableWavefrontSize32)
static unsigned getGranulatedNumRegisterBlocks(unsigned NumRegs, unsigned Granule)
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI)
unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, std::optional< bool > EnableWavefrontSize32)
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
StringLiteral const UfmtSymbolicGFX11[]
bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI)
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI)
unsigned const DfmtNfmt2UFmtGFX10[]
StringLiteral const DfmtSymbolic[]
static StringLiteral const * getNfmtLookupTable(const MCSubtargetInfo &STI)
bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI)
StringLiteral const NfmtSymbolicGFX10[]
bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI)
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
StringRef getDfmtName(unsigned Id)
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI)
unsigned const DfmtNfmt2UFmtGFX11[]
StringLiteral const NfmtSymbolicVI[]
StringLiteral const NfmtSymbolicSICI[]
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
int64_t getDfmt(const StringRef Name)
StringLiteral const UfmtSymbolicGFX10[]
void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt)
StringRef getMsgOpName(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
StringRef getMsgName(int64_t MsgId, const MCSubtargetInfo &STI)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI)
void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId, uint16_t &StreamId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
int64_t getMsgOpId(int64_t MsgId, const StringRef Name)
const char *const OpGsSymbolic[OP_GS_LAST_]
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
const char *const OpSysSymbolic[OP_SYS_LAST_]
static uint64_t getMsgIdMask(const MCSubtargetInfo &STI)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
const CustomOperand< const MCSubtargetInfo & > Msg[]
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
constexpr unsigned VOPD_VGPR_BANK_MASKS[]
constexpr unsigned COMPONENTS_NUM
bool isGCN3Encoding(const MCSubtargetInfo &STI)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
bool isGFX10_GFX11(const MCSubtargetInfo &STI)
bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, unsigned OpNo)
Get size of register operand.
void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt)
Decodes Vmcnt, Expcnt and Lgkmcnt from given Waitcnt for given isa Version, and writes decoded values...
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
SmallVector< unsigned > getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size)
uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset)
Convert ByteOffset to dwords if the subtarget uses dword SMRD immediate offsets.
static unsigned encodeStorecnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Storecnt)
static bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
static bool hasSMEMByteOffset(const MCSubtargetInfo &ST)
bool isVOPCAsmOnly(unsigned Opc)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool getMTBUFHasSrsrc(unsigned Opc)
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
static bool isSymbolicCustomOperandEncoding(const CustomOperandVal *Opr, int Size, unsigned Code, bool &HasNonDefaultVal, const MCSubtargetInfo &STI)
bool isGFX10Before1030(const MCSubtargetInfo &STI)
bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo)
Does this operand support only inlinable literals?
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
const int OPR_ID_UNSUPPORTED
bool shouldEmitConstantsToTextSection(const Triple &TT)
bool isInlinableLiteralV2I16(uint32_t Literal)
int getMTBUFElements(unsigned Opc)
static int encodeCustomOperandVal(const CustomOperandVal &Op, int64_t InputVal)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const MCSubtargetInfo *STI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
std::optional< unsigned > getInlineEncodingV216(bool IsFloat, uint32_t Literal)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
unsigned mc2PseudoReg(unsigned Reg)
Convert hardware register Reg to a pseudo register.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
CanBeVOPD getCanBeVOPD(unsigned Opc)
static int getOprIdx(std::function< bool(const CustomOperand< T > &)> Test, const CustomOperand< T > OpInfo[], int OpInfoSize, T Context)
bool hasPackedD16(const MCSubtargetInfo &STI)
unsigned getStorecntBitMask(const IsaVersion &Version)
unsigned getLdsDwGranularity(const MCSubtargetInfo &ST)
bool isGFX940(const MCSubtargetInfo &STI)
bool isEntryFunctionCC(CallingConv::ID CC)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
bool isGFX10_3_GFX11(const MCSubtargetInfo &STI)
bool isGroupSegment(const GlobalValue *GV)
IsaVersion getIsaVersion(StringRef GPU)
bool getMTBUFHasSoffset(unsigned Opc)
bool hasXNACK(const MCSubtargetInfo &STI)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
static unsigned getCombinedCountBitMask(const IsaVersion &Version, bool IsStore)
unsigned getVOPDOpcode(unsigned Opc)
bool isDPALU_DPP(const MCInstrDesc &OpDesc)
unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt)
Encodes Vmcnt, Expcnt and Lgkmcnt into Waitcnt for given isa Version.
bool isVOPC64DPP(unsigned Opc)
int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements)
bool isCompute(CallingConv::ID cc)
bool getMAIIsGFX940XDL(unsigned Opc)
bool isSI(const MCSubtargetInfo &STI)
unsigned getDefaultAMDHSACodeObjectVersion()
bool isReadOnlySegment(const GlobalValue *GV)
bool isArgPassedInSGPR(const Argument *A)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
int getMUBUFBaseOpcode(unsigned Opc)
unsigned getAMDHSACodeObjectVersion(const Module &M)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
bool getVOP3IsSingle(unsigned Opc)
bool isGFX9(const MCSubtargetInfo &STI)
bool getVOP1IsSingle(unsigned Opc)
static bool isDwordAligned(uint64_t ByteOffset)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool getHasColorExport(const Function &F)
int getMTBUFBaseOpcode(unsigned Opc)
bool isChainCC(CallingConv::ID CC)
bool isGFX90A(const MCSubtargetInfo &STI)
unsigned getSamplecntBitMask(const IsaVersion &Version)
unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion)
bool hasSRAMECC(const MCSubtargetInfo &STI)
bool getHasDepthExport(const Function &F)
static bool isValidOpr(int Idx, const CustomOperand< T > OpInfo[], int OpInfoSize, T Context)
bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI)
bool getMUBUFHasVAddr(unsigned Opc)
int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily)
bool isTrue16Inst(unsigned Opc)
bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc)
std::pair< unsigned, unsigned > getVOPDComponents(unsigned VOPDOpcode)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned getInitialPSInputAddr(const Function &F)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this an AMDGPU specific source operand? These include registers, inline constants,...
unsigned getKmcntBitMask(const IsaVersion &Version)
unsigned getVmcntBitMask(const IsaVersion &Version)
bool isNotGFX10Plus(const MCSubtargetInfo &STI)
bool hasMAIInsts(const MCSubtargetInfo &STI)
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
bool isKernelCC(const Function *Func)
bool isGenericAtomic(unsigned Opc)
Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt)
bool isGFX8Plus(const MCSubtargetInfo &STI)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
unsigned getLgkmcntBitMask(const IsaVersion &Version)
bool getMUBUFTfe(unsigned Opc)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer)
bool isSGPR(unsigned Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
bool isHi(unsigned Reg, const MCRegisterInfo &MRI)
unsigned getBvhcntBitMask(const IsaVersion &Version)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasGFX10_3Insts(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements)
unsigned getExpcntBitMask(const IsaVersion &Version)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
bool getMUBUFHasSoffset(unsigned Opc)
bool isNotGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
std::optional< unsigned > getInlineEncodingV2F16(uint32_t Literal)
bool isInlineValue(unsigned Reg)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isShader(CallingConv::ID cc)
unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion)
static unsigned getDefaultCustomOperandEncoding(const CustomOperandVal *Opr, int Size, const MCSubtargetInfo &STI)
static unsigned encodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Loadcnt)
bool isGFX10Plus(const MCSubtargetInfo &STI)
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size, unsigned Code, int &Idx, StringRef &Name, unsigned &Val, bool &IsDefault, const MCSubtargetInfo &STI)
bool isGlobalSegment(const GlobalValue *GV)
@ OPERAND_KIMM_LAST
Definition: SIDefines.h:269
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition: SIDefines.h:234
@ OPERAND_REG_INLINE_C_LAST
Definition: SIDefines.h:260
@ OPERAND_REG_IMM_V2FP16
Definition: SIDefines.h:211
@ OPERAND_REG_INLINE_C_FP64
Definition: SIDefines.h:223
@ OPERAND_REG_INLINE_C_V2BF16
Definition: SIDefines.h:225
@ OPERAND_REG_IMM_V2INT16
Definition: SIDefines.h:212
@ OPERAND_REG_INLINE_AC_V2FP16
Definition: SIDefines.h:246
@ OPERAND_SRC_FIRST
Definition: SIDefines.h:265
@ OPERAND_REG_IMM_V2BF16
Definition: SIDefines.h:210
@ OPERAND_REG_INLINE_AC_FIRST
Definition: SIDefines.h:262
@ OPERAND_KIMM_FIRST
Definition: SIDefines.h:268
@ OPERAND_REG_IMM_FP16
Definition: SIDefines.h:206
@ OPERAND_REG_IMM_FP64
Definition: SIDefines.h:204
@ OPERAND_REG_INLINE_C_V2FP16
Definition: SIDefines.h:226
@ OPERAND_REG_INLINE_AC_V2INT16
Definition: SIDefines.h:244
@ OPERAND_REG_INLINE_AC_FP16
Definition: SIDefines.h:241
@ OPERAND_REG_INLINE_AC_FP32
Definition: SIDefines.h:242
@ OPERAND_REG_INLINE_AC_V2BF16
Definition: SIDefines.h:245
@ OPERAND_REG_IMM_FP32
Definition: SIDefines.h:203
@ OPERAND_REG_INLINE_C_FIRST
Definition: SIDefines.h:259
@ OPERAND_REG_INLINE_C_FP32
Definition: SIDefines.h:222
@ OPERAND_REG_INLINE_AC_LAST
Definition: SIDefines.h:263
@ OPERAND_REG_INLINE_C_V2INT16
Definition: SIDefines.h:224
@ OPERAND_REG_IMM_V2FP32
Definition: SIDefines.h:214
@ OPERAND_REG_INLINE_AC_FP64
Definition: SIDefines.h:243
@ OPERAND_REG_INLINE_C_FP16
Definition: SIDefines.h:221
@ OPERAND_REG_INLINE_C_V2FP32
Definition: SIDefines.h:228
@ OPERAND_REG_IMM_FP32_DEFERRED
Definition: SIDefines.h:209
@ OPERAND_SRC_LAST
Definition: SIDefines.h:266
@ OPERAND_REG_IMM_FP16_DEFERRED
Definition: SIDefines.h:208
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc)
Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt)
std::optional< unsigned > getInlineEncodingV2I16(uint32_t Literal)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
static unsigned encodeStorecntDscnt(const IsaVersion &Version, unsigned Storecnt, unsigned Dscnt)
int getMCOpcode(uint16_t Opcode, unsigned Gen)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
bool getMUBUFIsBufferInv(unsigned Opc)
std::optional< unsigned > getInlineEncodingV2BF16(uint32_t Literal)
static int encodeCustomOperand(const CustomOperandVal *Opr, int Size, const StringRef Name, int64_t InputVal, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
bool getVOP2IsSingle(unsigned Opc)
bool getMAIIsDGEMM(unsigned Opc)
Returns true if MAI operation is a double precision GEMM.
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const int OPR_ID_UNKNOWN
unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion)
int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels)
bool isModuleEntryFunctionCC(CallingConv::ID CC)
bool isNotGFX12Plus(const MCSubtargetInfo &STI)
bool getMTBUFHasVAddr(unsigned Opc)
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion)
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
unsigned getLoadcntBitMask(const IsaVersion &Version)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
static unsigned encodeDscnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Dscnt)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion)
bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI)
bool isGFX9_GFX10(const MCSubtargetInfo &STI)
int getMUBUFElements(unsigned Opc)
static unsigned encodeLoadcntDscnt(const IsaVersion &Version, unsigned Loadcnt, unsigned Dscnt)
const GcnBufferFormatInfo * getGcnBufferFormatInfo(uint8_t BitsPerComp, uint8_t NumComponents, uint8_t NumFormat, const MCSubtargetInfo &STI)
bool isGraphics(CallingConv::ID cc)
unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc)
bool isPermlane16(unsigned Opc)
bool getMUBUFHasSrsrc(unsigned Opc)
unsigned getDscntBitMask(const IsaVersion &Version)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:197
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
Definition: CallingConv.h:188
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
Definition: CallingConv.h:200
@ AMDGPU_Gfx
Used for AMD graphics targets.
Definition: CallingConv.h:232
@ AMDGPU_CS_ChainPreserve
Used on AMDGPUs to give the middle-end more control over argument placement.
Definition: CallingConv.h:249
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:206
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:191
@ AMDGPU_CS_Chain
Used on AMDGPUs to give the middle-end more control over argument placement.
Definition: CallingConv.h:245
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:194
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:144
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
Definition: CallingConv.h:218
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
Definition: CallingConv.h:213
@ ELFABIVERSION_AMDGPU_HSA_V4
Definition: ELF.h:378
@ ELFABIVERSION_AMDGPU_HSA_V5
Definition: ELF.h:379
@ ELFABIVERSION_AMDGPU_HSA_V6
Definition: ELF.h:380
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:417
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:293
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:428
@ AlwaysUniform
The result values are always uniform.
@ Default
The result values are uniform if and only if all operands are uniform.
#define N
AMD Kernel Code Object (amd_kernel_code_t).
bool(* Cond)(T Context)
Instruction set architecture version.
Definition: TargetParser.h:125
Represents the counter values to wait for in an s_waitcnt instruction.
Description of the encoding of one expression Op.