LLVM 19.0.0git
AMDGPUBaseInfo.cpp
Go to the documentation of this file.
1//===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDGPUBaseInfo.h"
10#include "AMDGPU.h"
11#include "AMDGPUAsmUtils.h"
12#include "AMDKernelCodeT.h"
15#include "llvm/IR/Attributes.h"
16#include "llvm/IR/Constants.h"
17#include "llvm/IR/Function.h"
18#include "llvm/IR/GlobalValue.h"
19#include "llvm/IR/IntrinsicsAMDGPU.h"
20#include "llvm/IR/IntrinsicsR600.h"
21#include "llvm/IR/LLVMContext.h"
22#include "llvm/MC/MCInstrInfo.h"
28#include <optional>
29
30#define GET_INSTRINFO_NAMED_OPS
31#define GET_INSTRMAP_INFO
32#include "AMDGPUGenInstrInfo.inc"
33
35 "amdhsa-code-object-version", llvm::cl::Hidden,
37 llvm::cl::desc("Set default AMDHSA Code Object Version (module flag "
38 "or asm directive still take priority if present)"));
39
40namespace {
41
42/// \returns Bit mask for given bit \p Shift and bit \p Width.
43unsigned getBitMask(unsigned Shift, unsigned Width) {
44 return ((1 << Width) - 1) << Shift;
45}
46
47/// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
48///
49/// \returns Packed \p Dst.
50unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
51 unsigned Mask = getBitMask(Shift, Width);
52 return ((Src << Shift) & Mask) | (Dst & ~Mask);
53}
54
55/// Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
56///
57/// \returns Unpacked bits.
58unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
59 return (Src & getBitMask(Shift, Width)) >> Shift;
60}
61
62/// \returns Vmcnt bit shift (lower bits).
63unsigned getVmcntBitShiftLo(unsigned VersionMajor) {
64 return VersionMajor >= 11 ? 10 : 0;
65}
66
67/// \returns Vmcnt bit width (lower bits).
68unsigned getVmcntBitWidthLo(unsigned VersionMajor) {
69 return VersionMajor >= 11 ? 6 : 4;
70}
71
72/// \returns Expcnt bit shift.
73unsigned getExpcntBitShift(unsigned VersionMajor) {
74 return VersionMajor >= 11 ? 0 : 4;
75}
76
77/// \returns Expcnt bit width.
78unsigned getExpcntBitWidth(unsigned VersionMajor) { return 3; }
79
80/// \returns Lgkmcnt bit shift.
81unsigned getLgkmcntBitShift(unsigned VersionMajor) {
82 return VersionMajor >= 11 ? 4 : 8;
83}
84
85/// \returns Lgkmcnt bit width.
86unsigned getLgkmcntBitWidth(unsigned VersionMajor) {
87 return VersionMajor >= 10 ? 6 : 4;
88}
89
90/// \returns Vmcnt bit shift (higher bits).
91unsigned getVmcntBitShiftHi(unsigned VersionMajor) { return 14; }
92
93/// \returns Vmcnt bit width (higher bits).
94unsigned getVmcntBitWidthHi(unsigned VersionMajor) {
95 return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0;
96}
97
98/// \returns Loadcnt bit width
99unsigned getLoadcntBitWidth(unsigned VersionMajor) {
100 return VersionMajor >= 12 ? 6 : 0;
101}
102
103/// \returns Samplecnt bit width.
104unsigned getSamplecntBitWidth(unsigned VersionMajor) {
105 return VersionMajor >= 12 ? 6 : 0;
106}
107
108/// \returns Bvhcnt bit width.
109unsigned getBvhcntBitWidth(unsigned VersionMajor) {
110 return VersionMajor >= 12 ? 3 : 0;
111}
112
113/// \returns Dscnt bit width.
114unsigned getDscntBitWidth(unsigned VersionMajor) {
115 return VersionMajor >= 12 ? 6 : 0;
116}
117
118/// \returns Dscnt bit shift in combined S_WAIT instructions.
119unsigned getDscntBitShift(unsigned VersionMajor) { return 0; }
120
121/// \returns Storecnt or Vscnt bit width, depending on VersionMajor.
122unsigned getStorecntBitWidth(unsigned VersionMajor) {
123 return VersionMajor >= 10 ? 6 : 0;
124}
125
126/// \returns Kmcnt bit width.
127unsigned getKmcntBitWidth(unsigned VersionMajor) {
128 return VersionMajor >= 12 ? 5 : 0;
129}
130
131/// \returns shift for Loadcnt/Storecnt in combined S_WAIT instructions.
132unsigned getLoadcntStorecntBitShift(unsigned VersionMajor) {
133 return VersionMajor >= 12 ? 8 : 0;
134}
135
136/// \returns VmVsrc bit width
137inline unsigned getVmVsrcBitWidth() { return 3; }
138
139/// \returns VmVsrc bit shift
140inline unsigned getVmVsrcBitShift() { return 2; }
141
142/// \returns VaVdst bit width
143inline unsigned getVaVdstBitWidth() { return 4; }
144
145/// \returns VaVdst bit shift
146inline unsigned getVaVdstBitShift() { return 12; }
147
148/// \returns SaSdst bit width
149inline unsigned getSaSdstBitWidth() { return 1; }
150
151/// \returns SaSdst bit shift
152inline unsigned getSaSdstBitShift() { return 0; }
153
154} // end namespace anonymous
155
156namespace llvm {
157
158namespace AMDGPU {
159
160/// \returns True if \p STI is AMDHSA.
161bool isHsaAbi(const MCSubtargetInfo &STI) {
162 return STI.getTargetTriple().getOS() == Triple::AMDHSA;
163}
164
166 if (auto Ver = mdconst::extract_or_null<ConstantInt>(
167 M.getModuleFlag("amdgpu_code_object_version"))) {
168 return (unsigned)Ver->getZExtValue() / 100;
169 }
170
172}
173
176}
177
178unsigned getAMDHSACodeObjectVersion(unsigned ABIVersion) {
179 switch (ABIVersion) {
181 return 4;
183 return 5;
184 default:
186 }
187}
188
189uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion) {
190 if (T.getOS() != Triple::AMDHSA)
191 return 0;
192
193 switch (CodeObjectVersion) {
194 case 4:
196 case 5:
198 case 6:
200 default:
201 report_fatal_error("Unsupported AMDHSA Code Object Version " +
202 Twine(CodeObjectVersion));
203 }
204}
205
206unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) {
207 switch (CodeObjectVersion) {
208 case AMDHSA_COV4:
209 return 48;
210 case AMDHSA_COV5:
211 case AMDHSA_COV6:
212 default:
214 }
215}
216
217
218// FIXME: All such magic numbers about the ABI should be in a
219// central TD file.
220unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) {
221 switch (CodeObjectVersion) {
222 case AMDHSA_COV4:
223 return 24;
224 case AMDHSA_COV5:
225 case AMDHSA_COV6:
226 default:
228 }
229}
230
231unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) {
232 switch (CodeObjectVersion) {
233 case AMDHSA_COV4:
234 return 32;
235 case AMDHSA_COV5:
236 case AMDHSA_COV6:
237 default:
239 }
240}
241
242unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion) {
243 switch (CodeObjectVersion) {
244 case AMDHSA_COV4:
245 return 40;
246 case AMDHSA_COV5:
247 case AMDHSA_COV6:
248 default:
250 }
251}
252
253#define GET_MIMGBaseOpcodesTable_IMPL
254#define GET_MIMGDimInfoTable_IMPL
255#define GET_MIMGInfoTable_IMPL
256#define GET_MIMGLZMappingTable_IMPL
257#define GET_MIMGMIPMappingTable_IMPL
258#define GET_MIMGBiasMappingTable_IMPL
259#define GET_MIMGOffsetMappingTable_IMPL
260#define GET_MIMGG16MappingTable_IMPL
261#define GET_MAIInstInfoTable_IMPL
262#include "AMDGPUGenSearchableTables.inc"
263
264int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
265 unsigned VDataDwords, unsigned VAddrDwords) {
266 const MIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding,
267 VDataDwords, VAddrDwords);
268 return Info ? Info->Opcode : -1;
269}
270
272 const MIMGInfo *Info = getMIMGInfo(Opc);
273 return Info ? getMIMGBaseOpcodeInfo(Info->BaseOpcode) : nullptr;
274}
275
276int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
277 const MIMGInfo *OrigInfo = getMIMGInfo(Opc);
278 const MIMGInfo *NewInfo =
279 getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding,
280 NewChannels, OrigInfo->VAddrDwords);
281 return NewInfo ? NewInfo->Opcode : -1;
282}
283
284unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
285 const MIMGDimInfo *Dim, bool IsA16,
286 bool IsG16Supported) {
287 unsigned AddrWords = BaseOpcode->NumExtraArgs;
288 unsigned AddrComponents = (BaseOpcode->Coordinates ? Dim->NumCoords : 0) +
289 (BaseOpcode->LodOrClampOrMip ? 1 : 0);
290 if (IsA16)
291 AddrWords += divideCeil(AddrComponents, 2);
292 else
293 AddrWords += AddrComponents;
294
295 // Note: For subtargets that support A16 but not G16, enabling A16 also
296 // enables 16 bit gradients.
297 // For subtargets that support A16 (operand) and G16 (done with a different
298 // instruction encoding), they are independent.
299
300 if (BaseOpcode->Gradients) {
301 if ((IsA16 && !IsG16Supported) || BaseOpcode->G16)
302 // There are two gradients per coordinate, we pack them separately.
303 // For the 3d case,
304 // we get (dy/du, dx/du) (-, dz/du) (dy/dv, dx/dv) (-, dz/dv)
305 AddrWords += alignTo<2>(Dim->NumGradients / 2);
306 else
307 AddrWords += Dim->NumGradients;
308 }
309 return AddrWords;
310}
311
312struct MUBUFInfo {
315 uint8_t elements;
320};
321
322struct MTBUFInfo {
325 uint8_t elements;
329};
330
331struct SMInfo {
334};
335
336struct VOPInfo {
339};
340
343};
344
347};
348
351};
352
357};
358
359struct VOPDInfo {
364};
365
369};
370
371#define GET_MTBUFInfoTable_DECL
372#define GET_MTBUFInfoTable_IMPL
373#define GET_MUBUFInfoTable_DECL
374#define GET_MUBUFInfoTable_IMPL
375#define GET_SMInfoTable_DECL
376#define GET_SMInfoTable_IMPL
377#define GET_VOP1InfoTable_DECL
378#define GET_VOP1InfoTable_IMPL
379#define GET_VOP2InfoTable_DECL
380#define GET_VOP2InfoTable_IMPL
381#define GET_VOP3InfoTable_DECL
382#define GET_VOP3InfoTable_IMPL
383#define GET_VOPC64DPPTable_DECL
384#define GET_VOPC64DPPTable_IMPL
385#define GET_VOPC64DPP8Table_DECL
386#define GET_VOPC64DPP8Table_IMPL
387#define GET_VOPCAsmOnlyInfoTable_DECL
388#define GET_VOPCAsmOnlyInfoTable_IMPL
389#define GET_VOP3CAsmOnlyInfoTable_DECL
390#define GET_VOP3CAsmOnlyInfoTable_IMPL
391#define GET_VOPDComponentTable_DECL
392#define GET_VOPDComponentTable_IMPL
393#define GET_VOPDPairs_DECL
394#define GET_VOPDPairs_IMPL
395#define GET_VOPTrue16Table_DECL
396#define GET_VOPTrue16Table_IMPL
397#define GET_WMMAOpcode2AddrMappingTable_DECL
398#define GET_WMMAOpcode2AddrMappingTable_IMPL
399#define GET_WMMAOpcode3AddrMappingTable_DECL
400#define GET_WMMAOpcode3AddrMappingTable_IMPL
401#include "AMDGPUGenSearchableTables.inc"
402
403int getMTBUFBaseOpcode(unsigned Opc) {
404 const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opc);
405 return Info ? Info->BaseOpcode : -1;
406}
407
408int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) {
409 const MTBUFInfo *Info = getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
410 return Info ? Info->Opcode : -1;
411}
412
413int getMTBUFElements(unsigned Opc) {
414 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
415 return Info ? Info->elements : 0;
416}
417
418bool getMTBUFHasVAddr(unsigned Opc) {
419 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
420 return Info ? Info->has_vaddr : false;
421}
422
423bool getMTBUFHasSrsrc(unsigned Opc) {
424 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
425 return Info ? Info->has_srsrc : false;
426}
427
428bool getMTBUFHasSoffset(unsigned Opc) {
429 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
430 return Info ? Info->has_soffset : false;
431}
432
433int getMUBUFBaseOpcode(unsigned Opc) {
434 const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc);
435 return Info ? Info->BaseOpcode : -1;
436}
437
438int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements) {
439 const MUBUFInfo *Info = getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
440 return Info ? Info->Opcode : -1;
441}
442
443int getMUBUFElements(unsigned Opc) {
444 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
445 return Info ? Info->elements : 0;
446}
447
448bool getMUBUFHasVAddr(unsigned Opc) {
449 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
450 return Info ? Info->has_vaddr : false;
451}
452
453bool getMUBUFHasSrsrc(unsigned Opc) {
454 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
455 return Info ? Info->has_srsrc : false;
456}
457
458bool getMUBUFHasSoffset(unsigned Opc) {
459 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
460 return Info ? Info->has_soffset : false;
461}
462
463bool getMUBUFIsBufferInv(unsigned Opc) {
464 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
465 return Info ? Info->IsBufferInv : false;
466}
467
468bool getSMEMIsBuffer(unsigned Opc) {
469 const SMInfo *Info = getSMEMOpcodeHelper(Opc);
470 return Info ? Info->IsBuffer : false;
471}
472
473bool getVOP1IsSingle(unsigned Opc) {
474 const VOPInfo *Info = getVOP1OpcodeHelper(Opc);
475 return Info ? Info->IsSingle : false;
476}
477
478bool getVOP2IsSingle(unsigned Opc) {
479 const VOPInfo *Info = getVOP2OpcodeHelper(Opc);
480 return Info ? Info->IsSingle : false;
481}
482
483bool getVOP3IsSingle(unsigned Opc) {
484 const VOPInfo *Info = getVOP3OpcodeHelper(Opc);
485 return Info ? Info->IsSingle : false;
486}
487
488bool isVOPC64DPP(unsigned Opc) {
489 return isVOPC64DPPOpcodeHelper(Opc) || isVOPC64DPP8OpcodeHelper(Opc);
490}
491
492bool isVOPCAsmOnly(unsigned Opc) {
493 return isVOPCAsmOnlyOpcodeHelper(Opc) || isVOP3CAsmOnlyOpcodeHelper(Opc);
494}
495
496bool getMAIIsDGEMM(unsigned Opc) {
497 const MAIInstInfo *Info = getMAIInstInfoHelper(Opc);
498 return Info ? Info->is_dgemm : false;
499}
500
501bool getMAIIsGFX940XDL(unsigned Opc) {
502 const MAIInstInfo *Info = getMAIInstInfoHelper(Opc);
503 return Info ? Info->is_gfx940_xdl : false;
504}
505
507 if (ST.hasFeature(AMDGPU::FeatureGFX12Insts))
509 if (ST.hasFeature(AMDGPU::FeatureGFX11Insts))
511 llvm_unreachable("Subtarget generation does not support VOPD!");
512}
513
514CanBeVOPD getCanBeVOPD(unsigned Opc) {
515 const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc);
516 if (Info)
517 return {Info->CanBeVOPDX, true};
518 else
519 return {false, false};
520}
521
522unsigned getVOPDOpcode(unsigned Opc) {
523 const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc);
524 return Info ? Info->VOPDOp : ~0u;
525}
526
527bool isVOPD(unsigned Opc) {
528 return AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0X);
529}
530
531bool isMAC(unsigned Opc) {
532 return Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
533 Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
534 Opc == AMDGPU::V_MAC_F32_e64_vi ||
535 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
536 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
537 Opc == AMDGPU::V_MAC_F16_e64_vi ||
538 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
539 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
540 Opc == AMDGPU::V_FMAC_F32_e64_gfx11 ||
541 Opc == AMDGPU::V_FMAC_F32_e64_gfx12 ||
542 Opc == AMDGPU::V_FMAC_F32_e64_vi ||
543 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
544 Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
545 Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||
546 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11 ||
547 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx12 ||
548 Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi ||
549 Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi ||
550 Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi ||
551 Opc == AMDGPU::V_DOT8C_I32_I4_e64_vi;
552}
553
554bool isPermlane16(unsigned Opc) {
555 return Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
556 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10 ||
557 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx11 ||
558 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx11 ||
559 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx12 ||
560 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx12 ||
561 Opc == AMDGPU::V_PERMLANE16_VAR_B32_e64_gfx12 ||
562 Opc == AMDGPU::V_PERMLANEX16_VAR_B32_e64_gfx12;
563}
564
565bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc) {
566 return Opc == AMDGPU::V_CVT_F32_BF8_e64_gfx12 ||
567 Opc == AMDGPU::V_CVT_F32_FP8_e64_gfx12 ||
568 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp_gfx12 ||
569 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp_gfx12 ||
570 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp8_gfx12 ||
571 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp8_gfx12 ||
572 Opc == AMDGPU::V_CVT_PK_F32_BF8_e64_gfx12 ||
573 Opc == AMDGPU::V_CVT_PK_F32_FP8_e64_gfx12;
574}
575
576bool isGenericAtomic(unsigned Opc) {
577 return Opc == AMDGPU::G_AMDGPU_ATOMIC_FMIN ||
578 Opc == AMDGPU::G_AMDGPU_ATOMIC_FMAX ||
579 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP ||
580 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD ||
581 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB ||
582 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN ||
583 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN ||
584 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX ||
585 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX ||
586 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND ||
587 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR ||
588 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR ||
589 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC ||
590 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC ||
591 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD ||
592 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN ||
593 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX ||
594 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP ||
595 Opc == AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG;
596}
597
598bool isTrue16Inst(unsigned Opc) {
599 const VOPTrue16Info *Info = getTrue16OpcodeHelper(Opc);
600 return Info ? Info->IsTrue16 : false;
601}
602
603unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) {
604 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom2AddrOpcode(Opc);
605 return Info ? Info->Opcode3Addr : ~0u;
606}
607
608unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc) {
609 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom3AddrOpcode(Opc);
610 return Info ? Info->Opcode2Addr : ~0u;
611}
612
613// Wrapper for Tablegen'd function. enum Subtarget is not defined in any
614// header files, so we need to wrap it in a function that takes unsigned
615// instead.
616int getMCOpcode(uint16_t Opcode, unsigned Gen) {
617 return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
618}
619
620int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily) {
621 const VOPDInfo *Info =
622 getVOPDInfoFromComponentOpcodes(OpX, OpY, EncodingFamily);
623 return Info ? Info->Opcode : -1;
624}
625
626std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode) {
627 const VOPDInfo *Info = getVOPDOpcodeHelper(VOPDOpcode);
628 assert(Info);
629 auto OpX = getVOPDBaseFromComponent(Info->OpX);
630 auto OpY = getVOPDBaseFromComponent(Info->OpY);
631 assert(OpX && OpY);
632 return {OpX->BaseVOP, OpY->BaseVOP};
633}
634
635namespace VOPD {
636
639
642 auto TiedIdx = OpDesc.getOperandConstraint(Component::SRC2, MCOI::TIED_TO);
643 assert(TiedIdx == -1 || TiedIdx == Component::DST);
644 HasSrc2Acc = TiedIdx != -1;
645
646 SrcOperandsNum = OpDesc.getNumOperands() - OpDesc.getNumDefs();
647 assert(SrcOperandsNum <= Component::MAX_SRC_NUM);
648
649 auto OperandsNum = OpDesc.getNumOperands();
650 unsigned CompOprIdx;
651 for (CompOprIdx = Component::SRC1; CompOprIdx < OperandsNum; ++CompOprIdx) {
652 if (OpDesc.operands()[CompOprIdx].OperandType == AMDGPU::OPERAND_KIMM32) {
653 MandatoryLiteralIdx = CompOprIdx;
654 break;
655 }
656 }
657}
658
659unsigned ComponentInfo::getIndexInParsedOperands(unsigned CompOprIdx) const {
660 assert(CompOprIdx < Component::MAX_OPR_NUM);
661
662 if (CompOprIdx == Component::DST)
664
665 auto CompSrcIdx = CompOprIdx - Component::DST_NUM;
666 if (CompSrcIdx < getCompParsedSrcOperandsNum())
667 return getIndexOfSrcInParsedOperands(CompSrcIdx);
668
669 // The specified operand does not exist.
670 return 0;
671}
672
674 std::function<unsigned(unsigned, unsigned)> GetRegIdx, bool SkipSrc) const {
675
676 auto OpXRegs = getRegIndices(ComponentIndex::X, GetRegIdx);
677 auto OpYRegs = getRegIndices(ComponentIndex::Y, GetRegIdx);
678
679 const unsigned CompOprNum =
681 unsigned CompOprIdx;
682 for (CompOprIdx = 0; CompOprIdx < CompOprNum; ++CompOprIdx) {
683 unsigned BanksMasks = VOPD_VGPR_BANK_MASKS[CompOprIdx];
684 if (OpXRegs[CompOprIdx] && OpYRegs[CompOprIdx] &&
685 ((OpXRegs[CompOprIdx] & BanksMasks) ==
686 (OpYRegs[CompOprIdx] & BanksMasks)))
687 return CompOprIdx;
688 }
689
690 return {};
691}
692
693// Return an array of VGPR registers [DST,SRC0,SRC1,SRC2] used
694// by the specified component. If an operand is unused
695// or is not a VGPR, the corresponding value is 0.
696//
697// GetRegIdx(Component, MCOperandIdx) must return a VGPR register index
698// for the specified component and MC operand. The callback must return 0
699// if the operand is not a register or not a VGPR.
700InstInfo::RegIndices InstInfo::getRegIndices(
701 unsigned CompIdx,
702 std::function<unsigned(unsigned, unsigned)> GetRegIdx) const {
703 assert(CompIdx < COMPONENTS_NUM);
704
705 const auto &Comp = CompInfo[CompIdx];
707
708 RegIndices[DST] = GetRegIdx(CompIdx, Comp.getIndexOfDstInMCOperands());
709
710 for (unsigned CompOprIdx : {SRC0, SRC1, SRC2}) {
711 unsigned CompSrcIdx = CompOprIdx - DST_NUM;
712 RegIndices[CompOprIdx] =
713 Comp.hasRegSrcOperand(CompSrcIdx)
714 ? GetRegIdx(CompIdx, Comp.getIndexOfSrcInMCOperands(CompSrcIdx))
715 : 0;
716 }
717 return RegIndices;
718}
719
720} // namespace VOPD
721
723 return VOPD::InstInfo(OpX, OpY);
724}
725
726VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode,
727 const MCInstrInfo *InstrInfo) {
728 auto [OpX, OpY] = getVOPDComponents(VOPDOpcode);
729 const auto &OpXDesc = InstrInfo->get(OpX);
730 const auto &OpYDesc = InstrInfo->get(OpY);
732 VOPD::ComponentInfo OpYInfo(OpYDesc, OpXInfo);
733 return VOPD::InstInfo(OpXInfo, OpYInfo);
734}
735
736namespace IsaInfo {
737
739 : STI(STI), XnackSetting(TargetIDSetting::Any),
740 SramEccSetting(TargetIDSetting::Any) {
741 if (!STI.getFeatureBits().test(FeatureSupportsXNACK))
742 XnackSetting = TargetIDSetting::Unsupported;
743 if (!STI.getFeatureBits().test(FeatureSupportsSRAMECC))
744 SramEccSetting = TargetIDSetting::Unsupported;
745}
746
748 // Check if xnack or sramecc is explicitly enabled or disabled. In the
749 // absence of the target features we assume we must generate code that can run
750 // in any environment.
751 SubtargetFeatures Features(FS);
752 std::optional<bool> XnackRequested;
753 std::optional<bool> SramEccRequested;
754
755 for (const std::string &Feature : Features.getFeatures()) {
756 if (Feature == "+xnack")
757 XnackRequested = true;
758 else if (Feature == "-xnack")
759 XnackRequested = false;
760 else if (Feature == "+sramecc")
761 SramEccRequested = true;
762 else if (Feature == "-sramecc")
763 SramEccRequested = false;
764 }
765
766 bool XnackSupported = isXnackSupported();
767 bool SramEccSupported = isSramEccSupported();
768
769 if (XnackRequested) {
770 if (XnackSupported) {
771 XnackSetting =
772 *XnackRequested ? TargetIDSetting::On : TargetIDSetting::Off;
773 } else {
774 // If a specific xnack setting was requested and this GPU does not support
775 // xnack emit a warning. Setting will remain set to "Unsupported".
776 if (*XnackRequested) {
777 errs() << "warning: xnack 'On' was requested for a processor that does "
778 "not support it!\n";
779 } else {
780 errs() << "warning: xnack 'Off' was requested for a processor that "
781 "does not support it!\n";
782 }
783 }
784 }
785
786 if (SramEccRequested) {
787 if (SramEccSupported) {
788 SramEccSetting =
789 *SramEccRequested ? TargetIDSetting::On : TargetIDSetting::Off;
790 } else {
791 // If a specific sramecc setting was requested and this GPU does not
792 // support sramecc emit a warning. Setting will remain set to
793 // "Unsupported".
794 if (*SramEccRequested) {
795 errs() << "warning: sramecc 'On' was requested for a processor that "
796 "does not support it!\n";
797 } else {
798 errs() << "warning: sramecc 'Off' was requested for a processor that "
799 "does not support it!\n";
800 }
801 }
802 }
803}
804
805static TargetIDSetting
807 if (FeatureString.ends_with("-"))
809 if (FeatureString.ends_with("+"))
810 return TargetIDSetting::On;
811
812 llvm_unreachable("Malformed feature string");
813}
814
816 SmallVector<StringRef, 3> TargetIDSplit;
817 TargetID.split(TargetIDSplit, ':');
818
819 for (const auto &FeatureString : TargetIDSplit) {
820 if (FeatureString.starts_with("xnack"))
821 XnackSetting = getTargetIDSettingFromFeatureString(FeatureString);
822 if (FeatureString.starts_with("sramecc"))
823 SramEccSetting = getTargetIDSettingFromFeatureString(FeatureString);
824 }
825}
826
827std::string AMDGPUTargetID::toString() const {
828 std::string StringRep;
829 raw_string_ostream StreamRep(StringRep);
830
831 auto TargetTriple = STI.getTargetTriple();
832 auto Version = getIsaVersion(STI.getCPU());
833
834 StreamRep << TargetTriple.getArchName() << '-'
835 << TargetTriple.getVendorName() << '-'
836 << TargetTriple.getOSName() << '-'
837 << TargetTriple.getEnvironmentName() << '-';
838
839 std::string Processor;
840 // TODO: Following else statement is present here because we used various
841 // alias names for GPUs up until GFX9 (e.g. 'fiji' is same as 'gfx803').
842 // Remove once all aliases are removed from GCNProcessors.td.
843 if (Version.Major >= 9)
844 Processor = STI.getCPU().str();
845 else
846 Processor = (Twine("gfx") + Twine(Version.Major) + Twine(Version.Minor) +
847 Twine(Version.Stepping))
848 .str();
849
850 std::string Features;
851 if (STI.getTargetTriple().getOS() == Triple::AMDHSA) {
852 // sramecc.
854 Features += ":sramecc-";
856 Features += ":sramecc+";
857 // xnack.
859 Features += ":xnack-";
861 Features += ":xnack+";
862 }
863
864 StreamRep << Processor << Features;
865
866 StreamRep.flush();
867 return StringRep;
868}
869
870unsigned getWavefrontSize(const MCSubtargetInfo *STI) {
871 if (STI->getFeatureBits().test(FeatureWavefrontSize16))
872 return 16;
873 if (STI->getFeatureBits().test(FeatureWavefrontSize32))
874 return 32;
875
876 return 64;
877}
878
880 unsigned BytesPerCU = 0;
881 if (STI->getFeatureBits().test(FeatureLocalMemorySize32768))
882 BytesPerCU = 32768;
883 if (STI->getFeatureBits().test(FeatureLocalMemorySize65536))
884 BytesPerCU = 65536;
885
886 // "Per CU" really means "per whatever functional block the waves of a
887 // workgroup must share". So the effective local memory size is doubled in
888 // WGP mode on gfx10.
889 if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode))
890 BytesPerCU *= 2;
891
892 return BytesPerCU;
893}
894
896 if (STI->getFeatureBits().test(FeatureLocalMemorySize32768))
897 return 32768;
898 if (STI->getFeatureBits().test(FeatureLocalMemorySize65536))
899 return 65536;
900 return 0;
901}
902
903unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
904 // "Per CU" really means "per whatever functional block the waves of a
905 // workgroup must share". For gfx10 in CU mode this is the CU, which contains
906 // two SIMDs.
907 if (isGFX10Plus(*STI) && STI->getFeatureBits().test(FeatureCuMode))
908 return 2;
909 // Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP contains
910 // two CUs, so a total of four SIMDs.
911 return 4;
912}
913
915 unsigned FlatWorkGroupSize) {
916 assert(FlatWorkGroupSize != 0);
917 if (STI->getTargetTriple().getArch() != Triple::amdgcn)
918 return 8;
919 unsigned MaxWaves = getMaxWavesPerEU(STI) * getEUsPerCU(STI);
920 unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
921 if (N == 1) {
922 // Single-wave workgroups don't consume barrier resources.
923 return MaxWaves;
924 }
925
926 unsigned MaxBarriers = 16;
927 if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode))
928 MaxBarriers = 32;
929
930 return std::min(MaxWaves / N, MaxBarriers);
931}
932
933unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) {
934 return 1;
935}
936
937unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) {
938 // FIXME: Need to take scratch memory into account.
939 if (isGFX90A(*STI))
940 return 8;
941 if (!isGFX10Plus(*STI))
942 return 10;
943 return hasGFX10_3Insts(*STI) ? 16 : 20;
944}
945
947 unsigned FlatWorkGroupSize) {
948 return divideCeil(getWavesPerWorkGroup(STI, FlatWorkGroupSize),
949 getEUsPerCU(STI));
950}
951
953 return 1;
954}
955
957 // Some subtargets allow encoding 2048, but this isn't tested or supported.
958 return 1024;
959}
960
962 unsigned FlatWorkGroupSize) {
963 return divideCeil(FlatWorkGroupSize, getWavefrontSize(STI));
964}
965
967 IsaVersion Version = getIsaVersion(STI->getCPU());
968 if (Version.Major >= 10)
969 return getAddressableNumSGPRs(STI);
970 if (Version.Major >= 8)
971 return 16;
972 return 8;
973}
974
976 return 8;
977}
978
979unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) {
980 IsaVersion Version = getIsaVersion(STI->getCPU());
981 if (Version.Major >= 8)
982 return 800;
983 return 512;
984}
985
987 if (STI->getFeatureBits().test(FeatureSGPRInitBug))
989
990 IsaVersion Version = getIsaVersion(STI->getCPU());
991 if (Version.Major >= 10)
992 return 106;
993 if (Version.Major >= 8)
994 return 102;
995 return 104;
996}
997
998unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
999 assert(WavesPerEU != 0);
1000
1001 IsaVersion Version = getIsaVersion(STI->getCPU());
1002 if (Version.Major >= 10)
1003 return 0;
1004
1005 if (WavesPerEU >= getMaxWavesPerEU(STI))
1006 return 0;
1007
1008 unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
1009 if (STI->getFeatureBits().test(FeatureTrapHandler))
1010 MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
1011 MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1;
1012 return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI));
1013}
1014
1015unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
1016 bool Addressable) {
1017 assert(WavesPerEU != 0);
1018
1019 unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
1020 IsaVersion Version = getIsaVersion(STI->getCPU());
1021 if (Version.Major >= 10)
1022 return Addressable ? AddressableNumSGPRs : 108;
1023 if (Version.Major >= 8 && !Addressable)
1024 AddressableNumSGPRs = 112;
1025 unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
1026 if (STI->getFeatureBits().test(FeatureTrapHandler))
1027 MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
1028 MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI));
1029 return std::min(MaxNumSGPRs, AddressableNumSGPRs);
1030}
1031
1032unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
1033 bool FlatScrUsed, bool XNACKUsed) {
1034 unsigned ExtraSGPRs = 0;
1035 if (VCCUsed)
1036 ExtraSGPRs = 2;
1037
1038 IsaVersion Version = getIsaVersion(STI->getCPU());
1039 if (Version.Major >= 10)
1040 return ExtraSGPRs;
1041
1042 if (Version.Major < 8) {
1043 if (FlatScrUsed)
1044 ExtraSGPRs = 4;
1045 } else {
1046 if (XNACKUsed)
1047 ExtraSGPRs = 4;
1048
1049 if (FlatScrUsed ||
1050 STI->getFeatureBits().test(AMDGPU::FeatureArchitectedFlatScratch))
1051 ExtraSGPRs = 6;
1052 }
1053
1054 return ExtraSGPRs;
1055}
1056
1057unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
1058 bool FlatScrUsed) {
1059 return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
1060 STI->getFeatureBits().test(AMDGPU::FeatureXNACK));
1061}
1062
1063unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
1064 NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(STI));
1065 // SGPRBlocks is actual number of SGPR blocks minus 1.
1066 return NumSGPRs / getSGPREncodingGranule(STI) - 1;
1067}
1068
1070 std::optional<bool> EnableWavefrontSize32) {
1071 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1072 return 8;
1073
1074 bool IsWave32 = EnableWavefrontSize32 ?
1075 *EnableWavefrontSize32 :
1076 STI->getFeatureBits().test(FeatureWavefrontSize32);
1077
1078 if (STI->getFeatureBits().test(FeatureGFX11FullVGPRs))
1079 return IsWave32 ? 24 : 12;
1080
1081 if (hasGFX10_3Insts(*STI))
1082 return IsWave32 ? 16 : 8;
1083
1084 return IsWave32 ? 8 : 4;
1085}
1086
1088 std::optional<bool> EnableWavefrontSize32) {
1089 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1090 return 8;
1091
1092 bool IsWave32 = EnableWavefrontSize32 ?
1093 *EnableWavefrontSize32 :
1094 STI->getFeatureBits().test(FeatureWavefrontSize32);
1095
1096 return IsWave32 ? 8 : 4;
1097}
1098
1099unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
1100 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1101 return 512;
1102 if (!isGFX10Plus(*STI))
1103 return 256;
1104 bool IsWave32 = STI->getFeatureBits().test(FeatureWavefrontSize32);
1105 if (STI->getFeatureBits().test(FeatureGFX11FullVGPRs))
1106 return IsWave32 ? 1536 : 768;
1107 return IsWave32 ? 1024 : 512;
1108}
1109
1111 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1112 return 512;
1113 return 256;
1114}
1115
1117 unsigned NumVGPRs) {
1118 unsigned MaxWaves = getMaxWavesPerEU(STI);
1119 unsigned Granule = getVGPRAllocGranule(STI);
1120 if (NumVGPRs < Granule)
1121 return MaxWaves;
1122 unsigned RoundedRegs = alignTo(NumVGPRs, Granule);
1123 return std::min(std::max(getTotalNumVGPRs(STI) / RoundedRegs, 1u), MaxWaves);
1124}
1125
1126unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
1127 assert(WavesPerEU != 0);
1128
1129 unsigned MaxWavesPerEU = getMaxWavesPerEU(STI);
1130 if (WavesPerEU >= MaxWavesPerEU)
1131 return 0;
1132
1133 unsigned TotNumVGPRs = getTotalNumVGPRs(STI);
1134 unsigned AddrsableNumVGPRs = getAddressableNumVGPRs(STI);
1135 unsigned Granule = getVGPRAllocGranule(STI);
1136 unsigned MaxNumVGPRs = alignDown(TotNumVGPRs / WavesPerEU, Granule);
1137
1138 if (MaxNumVGPRs == alignDown(TotNumVGPRs / MaxWavesPerEU, Granule))
1139 return 0;
1140
1141 unsigned MinWavesPerEU = getNumWavesPerEUWithNumVGPRs(STI, AddrsableNumVGPRs);
1142 if (WavesPerEU < MinWavesPerEU)
1143 return getMinNumVGPRs(STI, MinWavesPerEU);
1144
1145 unsigned MaxNumVGPRsNext = alignDown(TotNumVGPRs / (WavesPerEU + 1), Granule);
1146 unsigned MinNumVGPRs = 1 + std::min(MaxNumVGPRs - Granule, MaxNumVGPRsNext);
1147 return std::min(MinNumVGPRs, AddrsableNumVGPRs);
1148}
1149
1150unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
1151 assert(WavesPerEU != 0);
1152
1153 unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU,
1154 getVGPRAllocGranule(STI));
1155 unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI);
1156 return std::min(MaxNumVGPRs, AddressableNumVGPRs);
1157}
1158
1159unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs,
1160 std::optional<bool> EnableWavefrontSize32) {
1161 NumVGPRs = alignTo(std::max(1u, NumVGPRs),
1162 getVGPREncodingGranule(STI, EnableWavefrontSize32));
1163 // VGPRBlocks is actual number of VGPR blocks minus 1.
1164 return NumVGPRs / getVGPREncodingGranule(STI, EnableWavefrontSize32) - 1;
1165}
1166
1167} // end namespace IsaInfo
1168
1170 const MCSubtargetInfo *STI) {
1171 IsaVersion Version = getIsaVersion(STI->getCPU());
1172
1173 memset(&Header, 0, sizeof(Header));
1174
1175 Header.amd_kernel_code_version_major = 1;
1176 Header.amd_kernel_code_version_minor = 2;
1177 Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
1178 Header.amd_machine_version_major = Version.Major;
1179 Header.amd_machine_version_minor = Version.Minor;
1180 Header.amd_machine_version_stepping = Version.Stepping;
1181 Header.kernel_code_entry_byte_offset = sizeof(Header);
1182 Header.wavefront_size = 6;
1183
1184 // If the code object does not support indirect functions, then the value must
1185 // be 0xffffffff.
1186 Header.call_convention = -1;
1187
1188 // These alignment values are specified in powers of two, so alignment =
1189 // 2^n. The minimum alignment is 2^4 = 16.
1190 Header.kernarg_segment_alignment = 4;
1191 Header.group_segment_alignment = 4;
1192 Header.private_segment_alignment = 4;
1193
1194 if (Version.Major >= 10) {
1195 if (STI->getFeatureBits().test(FeatureWavefrontSize32)) {
1196 Header.wavefront_size = 5;
1197 Header.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
1198 }
1199 Header.compute_pgm_resource_registers |=
1200 S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
1202 }
1203}
1204
1206 const MCSubtargetInfo *STI) {
1207 IsaVersion Version = getIsaVersion(STI->getCPU());
1208
1210 memset(&KD, 0, sizeof(KD));
1211
1213 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
1215 if (Version.Major >= 12) {
1217 amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, 0);
1219 amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_DISABLE_PERF, 0);
1220 } else {
1222 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, 1);
1224 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, 1);
1225 }
1227 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1);
1228 if (Version.Major >= 10) {
1230 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
1231 STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1 : 0);
1233 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE,
1234 STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1);
1236 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, 1);
1237 }
1238 if (AMDGPU::isGFX90A(*STI)) {
1240 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
1241 STI->getFeatureBits().test(FeatureTgSplit) ? 1 : 0);
1242 }
1243 return KD;
1244}
1245
1248}
1249
1252}
1253
1255 unsigned AS = GV->getAddressSpace();
1256 return AS == AMDGPUAS::CONSTANT_ADDRESS ||
1258}
1259
1261 return TT.getArch() == Triple::r600;
1262}
1263
1264std::pair<unsigned, unsigned>
1266 std::pair<unsigned, unsigned> Default,
1267 bool OnlyFirstRequired) {
1268 Attribute A = F.getFnAttribute(Name);
1269 if (!A.isStringAttribute())
1270 return Default;
1271
1272 LLVMContext &Ctx = F.getContext();
1273 std::pair<unsigned, unsigned> Ints = Default;
1274 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
1275 if (Strs.first.trim().getAsInteger(0, Ints.first)) {
1276 Ctx.emitError("can't parse first integer attribute " + Name);
1277 return Default;
1278 }
1279 if (Strs.second.trim().getAsInteger(0, Ints.second)) {
1280 if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
1281 Ctx.emitError("can't parse second integer attribute " + Name);
1282 return Default;
1283 }
1284 }
1285
1286 return Ints;
1287}
1288
1289unsigned getVmcntBitMask(const IsaVersion &Version) {
1290 return (1 << (getVmcntBitWidthLo(Version.Major) +
1291 getVmcntBitWidthHi(Version.Major))) -
1292 1;
1293}
1294
1295unsigned getLoadcntBitMask(const IsaVersion &Version) {
1296 return (1 << getLoadcntBitWidth(Version.Major)) - 1;
1297}
1298
1299unsigned getSamplecntBitMask(const IsaVersion &Version) {
1300 return (1 << getSamplecntBitWidth(Version.Major)) - 1;
1301}
1302
1303unsigned getBvhcntBitMask(const IsaVersion &Version) {
1304 return (1 << getBvhcntBitWidth(Version.Major)) - 1;
1305}
1306
1307unsigned getExpcntBitMask(const IsaVersion &Version) {
1308 return (1 << getExpcntBitWidth(Version.Major)) - 1;
1309}
1310
1311unsigned getLgkmcntBitMask(const IsaVersion &Version) {
1312 return (1 << getLgkmcntBitWidth(Version.Major)) - 1;
1313}
1314
1315unsigned getDscntBitMask(const IsaVersion &Version) {
1316 return (1 << getDscntBitWidth(Version.Major)) - 1;
1317}
1318
1319unsigned getKmcntBitMask(const IsaVersion &Version) {
1320 return (1 << getKmcntBitWidth(Version.Major)) - 1;
1321}
1322
1323unsigned getStorecntBitMask(const IsaVersion &Version) {
1324 return (1 << getStorecntBitWidth(Version.Major)) - 1;
1325}
1326
1327unsigned getWaitcntBitMask(const IsaVersion &Version) {
1328 unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(Version.Major),
1329 getVmcntBitWidthLo(Version.Major));
1330 unsigned Expcnt = getBitMask(getExpcntBitShift(Version.Major),
1331 getExpcntBitWidth(Version.Major));
1332 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(Version.Major),
1333 getLgkmcntBitWidth(Version.Major));
1334 unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(Version.Major),
1335 getVmcntBitWidthHi(Version.Major));
1336 return VmcntLo | Expcnt | Lgkmcnt | VmcntHi;
1337}
1338
1339unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1340 unsigned VmcntLo = unpackBits(Waitcnt, getVmcntBitShiftLo(Version.Major),
1341 getVmcntBitWidthLo(Version.Major));
1342 unsigned VmcntHi = unpackBits(Waitcnt, getVmcntBitShiftHi(Version.Major),
1343 getVmcntBitWidthHi(Version.Major));
1344 return VmcntLo | VmcntHi << getVmcntBitWidthLo(Version.Major);
1345}
1346
1347unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
1348 return unpackBits(Waitcnt, getExpcntBitShift(Version.Major),
1349 getExpcntBitWidth(Version.Major));
1350}
1351
1352unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1353 return unpackBits(Waitcnt, getLgkmcntBitShift(Version.Major),
1354 getLgkmcntBitWidth(Version.Major));
1355}
1356
1357void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
1358 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
1359 Vmcnt = decodeVmcnt(Version, Waitcnt);
1360 Expcnt = decodeExpcnt(Version, Waitcnt);
1361 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
1362}
1363
1364Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) {
1365 Waitcnt Decoded;
1366 Decoded.LoadCnt = decodeVmcnt(Version, Encoded);
1367 Decoded.ExpCnt = decodeExpcnt(Version, Encoded);
1368 Decoded.DsCnt = decodeLgkmcnt(Version, Encoded);
1369 return Decoded;
1370}
1371
1372unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
1373 unsigned Vmcnt) {
1374 Waitcnt = packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(Version.Major),
1375 getVmcntBitWidthLo(Version.Major));
1376 return packBits(Vmcnt >> getVmcntBitWidthLo(Version.Major), Waitcnt,
1377 getVmcntBitShiftHi(Version.Major),
1378 getVmcntBitWidthHi(Version.Major));
1379}
1380
1381unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
1382 unsigned Expcnt) {
1383 return packBits(Expcnt, Waitcnt, getExpcntBitShift(Version.Major),
1384 getExpcntBitWidth(Version.Major));
1385}
1386
1387unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
1388 unsigned Lgkmcnt) {
1389 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(Version.Major),
1390 getLgkmcntBitWidth(Version.Major));
1391}
1392
1393unsigned encodeWaitcnt(const IsaVersion &Version,
1394 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
1395 unsigned Waitcnt = getWaitcntBitMask(Version);
1396 Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
1397 Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
1398 Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
1399 return Waitcnt;
1400}
1401
1402unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) {
1403 return encodeWaitcnt(Version, Decoded.LoadCnt, Decoded.ExpCnt, Decoded.DsCnt);
1404}
1405
1406static unsigned getCombinedCountBitMask(const IsaVersion &Version,
1407 bool IsStore) {
1408 unsigned Dscnt = getBitMask(getDscntBitShift(Version.Major),
1409 getDscntBitWidth(Version.Major));
1410 if (IsStore) {
1411 unsigned Storecnt = getBitMask(getLoadcntStorecntBitShift(Version.Major),
1412 getStorecntBitWidth(Version.Major));
1413 return Dscnt | Storecnt;
1414 } else {
1415 unsigned Loadcnt = getBitMask(getLoadcntStorecntBitShift(Version.Major),
1416 getLoadcntBitWidth(Version.Major));
1417 return Dscnt | Loadcnt;
1418 }
1419}
1420
1421Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt) {
1422 Waitcnt Decoded;
1423 Decoded.LoadCnt =
1424 unpackBits(LoadcntDscnt, getLoadcntStorecntBitShift(Version.Major),
1425 getLoadcntBitWidth(Version.Major));
1426 Decoded.DsCnt = unpackBits(LoadcntDscnt, getDscntBitShift(Version.Major),
1427 getDscntBitWidth(Version.Major));
1428 return Decoded;
1429}
1430
1431Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt) {
1432 Waitcnt Decoded;
1433 Decoded.StoreCnt =
1434 unpackBits(StorecntDscnt, getLoadcntStorecntBitShift(Version.Major),
1435 getStorecntBitWidth(Version.Major));
1436 Decoded.DsCnt = unpackBits(StorecntDscnt, getDscntBitShift(Version.Major),
1437 getDscntBitWidth(Version.Major));
1438 return Decoded;
1439}
1440
1441static unsigned encodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt,
1442 unsigned Loadcnt) {
1443 return packBits(Loadcnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major),
1444 getLoadcntBitWidth(Version.Major));
1445}
1446
1447static unsigned encodeStorecnt(const IsaVersion &Version, unsigned Waitcnt,
1448 unsigned Storecnt) {
1449 return packBits(Storecnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major),
1450 getStorecntBitWidth(Version.Major));
1451}
1452
1453static unsigned encodeDscnt(const IsaVersion &Version, unsigned Waitcnt,
1454 unsigned Dscnt) {
1455 return packBits(Dscnt, Waitcnt, getDscntBitShift(Version.Major),
1456 getDscntBitWidth(Version.Major));
1457}
1458
1459static unsigned encodeLoadcntDscnt(const IsaVersion &Version, unsigned Loadcnt,
1460 unsigned Dscnt) {
1461 unsigned Waitcnt = getCombinedCountBitMask(Version, false);
1462 Waitcnt = encodeLoadcnt(Version, Waitcnt, Loadcnt);
1463 Waitcnt = encodeDscnt(Version, Waitcnt, Dscnt);
1464 return Waitcnt;
1465}
1466
1467unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded) {
1468 return encodeLoadcntDscnt(Version, Decoded.LoadCnt, Decoded.DsCnt);
1469}
1470
1471static unsigned encodeStorecntDscnt(const IsaVersion &Version,
1472 unsigned Storecnt, unsigned Dscnt) {
1473 unsigned Waitcnt = getCombinedCountBitMask(Version, true);
1474 Waitcnt = encodeStorecnt(Version, Waitcnt, Storecnt);
1475 Waitcnt = encodeDscnt(Version, Waitcnt, Dscnt);
1476 return Waitcnt;
1477}
1478
1479unsigned encodeStorecntDscnt(const IsaVersion &Version,
1480 const Waitcnt &Decoded) {
1481 return encodeStorecntDscnt(Version, Decoded.StoreCnt, Decoded.DsCnt);
1482}
1483
1484//===----------------------------------------------------------------------===//
1485// Custom Operands.
1486//
1487// A table of custom operands shall describe "primary" operand names
1488// first followed by aliases if any. It is not required but recommended
1489// to arrange operands so that operand encoding match operand position
1490// in the table. This will make disassembly a bit more efficient.
1491// Unused slots in the table shall have an empty name.
1492//
1493//===----------------------------------------------------------------------===//
1494
1495template <class T>
1496static bool isValidOpr(int Idx, const CustomOperand<T> OpInfo[], int OpInfoSize,
1497 T Context) {
1498 return 0 <= Idx && Idx < OpInfoSize && !OpInfo[Idx].Name.empty() &&
1499 (!OpInfo[Idx].Cond || OpInfo[Idx].Cond(Context));
1500}
1501
1502template <class T>
1503static int getOprIdx(std::function<bool(const CustomOperand<T> &)> Test,
1504 const CustomOperand<T> OpInfo[], int OpInfoSize,
1505 T Context) {
1506 int InvalidIdx = OPR_ID_UNKNOWN;
1507 for (int Idx = 0; Idx < OpInfoSize; ++Idx) {
1508 if (Test(OpInfo[Idx])) {
1509 if (!OpInfo[Idx].Cond || OpInfo[Idx].Cond(Context))
1510 return Idx;
1511 InvalidIdx = OPR_ID_UNSUPPORTED;
1512 }
1513 }
1514 return InvalidIdx;
1515}
1516
1517template <class T>
1518static int getOprIdx(const StringRef Name, const CustomOperand<T> OpInfo[],
1519 int OpInfoSize, T Context) {
1520 auto Test = [=](const CustomOperand<T> &Op) { return Op.Name == Name; };
1521 return getOprIdx<T>(Test, OpInfo, OpInfoSize, Context);
1522}
1523
1524template <class T>
1525static int getOprIdx(int Id, const CustomOperand<T> OpInfo[], int OpInfoSize,
1526 T Context, bool QuickCheck = true) {
1527 auto Test = [=](const CustomOperand<T> &Op) {
1528 return Op.Encoding == Id && !Op.Name.empty();
1529 };
1530 // This is an optimization that should work in most cases.
1531 // As a side effect, it may cause selection of an alias
1532 // instead of a primary operand name in case of sparse tables.
1533 if (QuickCheck && isValidOpr<T>(Id, OpInfo, OpInfoSize, Context) &&
1534 OpInfo[Id].Encoding == Id) {
1535 return Id;
1536 }
1537 return getOprIdx<T>(Test, OpInfo, OpInfoSize, Context);
1538}
1539
1540//===----------------------------------------------------------------------===//
1541// Custom Operand Values
1542//===----------------------------------------------------------------------===//
1543
1545 int Size,
1546 const MCSubtargetInfo &STI) {
1547 unsigned Enc = 0;
1548 for (int Idx = 0; Idx < Size; ++Idx) {
1549 const auto &Op = Opr[Idx];
1550 if (Op.isSupported(STI))
1551 Enc |= Op.encode(Op.Default);
1552 }
1553 return Enc;
1554}
1555
1557 int Size, unsigned Code,
1558 bool &HasNonDefaultVal,
1559 const MCSubtargetInfo &STI) {
1560 unsigned UsedOprMask = 0;
1561 HasNonDefaultVal = false;
1562 for (int Idx = 0; Idx < Size; ++Idx) {
1563 const auto &Op = Opr[Idx];
1564 if (!Op.isSupported(STI))
1565 continue;
1566 UsedOprMask |= Op.getMask();
1567 unsigned Val = Op.decode(Code);
1568 if (!Op.isValid(Val))
1569 return false;
1570 HasNonDefaultVal |= (Val != Op.Default);
1571 }
1572 return (Code & ~UsedOprMask) == 0;
1573}
1574
1575static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size,
1576 unsigned Code, int &Idx, StringRef &Name,
1577 unsigned &Val, bool &IsDefault,
1578 const MCSubtargetInfo &STI) {
1579 while (Idx < Size) {
1580 const auto &Op = Opr[Idx++];
1581 if (Op.isSupported(STI)) {
1582 Name = Op.Name;
1583 Val = Op.decode(Code);
1584 IsDefault = (Val == Op.Default);
1585 return true;
1586 }
1587 }
1588
1589 return false;
1590}
1591
1593 int64_t InputVal) {
1594 if (InputVal < 0 || InputVal > Op.Max)
1595 return OPR_VAL_INVALID;
1596 return Op.encode(InputVal);
1597}
1598
1599static int encodeCustomOperand(const CustomOperandVal *Opr, int Size,
1600 const StringRef Name, int64_t InputVal,
1601 unsigned &UsedOprMask,
1602 const MCSubtargetInfo &STI) {
1603 int InvalidId = OPR_ID_UNKNOWN;
1604 for (int Idx = 0; Idx < Size; ++Idx) {
1605 const auto &Op = Opr[Idx];
1606 if (Op.Name == Name) {
1607 if (!Op.isSupported(STI)) {
1608 InvalidId = OPR_ID_UNSUPPORTED;
1609 continue;
1610 }
1611 auto OprMask = Op.getMask();
1612 if (OprMask & UsedOprMask)
1613 return OPR_ID_DUPLICATE;
1614 UsedOprMask |= OprMask;
1615 return encodeCustomOperandVal(Op, InputVal);
1616 }
1617 }
1618 return InvalidId;
1619}
1620
1621//===----------------------------------------------------------------------===//
1622// DepCtr
1623//===----------------------------------------------------------------------===//
1624
1625namespace DepCtr {
1626
1628 static int Default = -1;
1629 if (Default == -1)
1631 return Default;
1632}
1633
1634bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
1635 const MCSubtargetInfo &STI) {
1637 HasNonDefaultVal, STI);
1638}
1639
1640bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
1641 bool &IsDefault, const MCSubtargetInfo &STI) {
1642 return decodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Code, Id, Name, Val,
1643 IsDefault, STI);
1644}
1645
1646int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
1647 const MCSubtargetInfo &STI) {
1648 return encodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Name, Val, UsedOprMask,
1649 STI);
1650}
1651
1652unsigned decodeFieldVmVsrc(unsigned Encoded) {
1653 return unpackBits(Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
1654}
1655
1656unsigned decodeFieldVaVdst(unsigned Encoded) {
1657 return unpackBits(Encoded, getVaVdstBitShift(), getVaVdstBitWidth());
1658}
1659
1660unsigned decodeFieldSaSdst(unsigned Encoded) {
1661 return unpackBits(Encoded, getSaSdstBitShift(), getSaSdstBitWidth());
1662}
1663
1664unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc) {
1665 return packBits(VmVsrc, Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
1666}
1667
1668unsigned encodeFieldVmVsrc(unsigned VmVsrc) {
1669 return encodeFieldVmVsrc(0xffff, VmVsrc);
1670}
1671
1672unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst) {
1673 return packBits(VaVdst, Encoded, getVaVdstBitShift(), getVaVdstBitWidth());
1674}
1675
1676unsigned encodeFieldVaVdst(unsigned VaVdst) {
1677 return encodeFieldVaVdst(0xffff, VaVdst);
1678}
1679
1680unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst) {
1681 return packBits(SaSdst, Encoded, getSaSdstBitShift(), getSaSdstBitWidth());
1682}
1683
1684unsigned encodeFieldSaSdst(unsigned SaSdst) {
1685 return encodeFieldSaSdst(0xffff, SaSdst);
1686}
1687
1688} // namespace DepCtr
1689
1690//===----------------------------------------------------------------------===//
1691// hwreg
1692//===----------------------------------------------------------------------===//
1693
1694namespace Hwreg {
1695
1696int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI) {
1697 int Idx = getOprIdx<const MCSubtargetInfo &>(Name, Opr, OPR_SIZE, STI);
1698 return (Idx < 0) ? Idx : Opr[Idx].Encoding;
1699}
1700
1701bool isValidHwreg(int64_t Id) {
1702 return 0 <= Id && isUInt<ID_WIDTH_>(Id);
1703}
1704
1706 return 0 <= Offset && isUInt<OFFSET_WIDTH_>(Offset);
1707}
1708
1710 return 0 <= (Width - 1) && isUInt<WIDTH_M1_WIDTH_>(Width - 1);
1711}
1712
1714 return (Id << ID_SHIFT_) |
1715 (Offset << OFFSET_SHIFT_) |
1716 ((Width - 1) << WIDTH_M1_SHIFT_);
1717}
1718
1719StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI) {
1720 int Idx = getOprIdx<const MCSubtargetInfo &>(Id, Opr, OPR_SIZE, STI);
1721 return (Idx < 0) ? "" : Opr[Idx].Name;
1722}
1723
1724void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width) {
1725 Id = (Val & ID_MASK_) >> ID_SHIFT_;
1726 Offset = (Val & OFFSET_MASK_) >> OFFSET_SHIFT_;
1727 Width = ((Val & WIDTH_M1_MASK_) >> WIDTH_M1_SHIFT_) + 1;
1728}
1729
1730} // namespace Hwreg
1731
1732//===----------------------------------------------------------------------===//
1733// exp tgt
1734//===----------------------------------------------------------------------===//
1735
1736namespace Exp {
1737
1738struct ExpTgt {
1740 unsigned Tgt;
1741 unsigned MaxIndex;
1742};
1743
1744static constexpr ExpTgt ExpTgtInfo[] = {
1745 {{"null"}, ET_NULL, ET_NULL_MAX_IDX},
1746 {{"mrtz"}, ET_MRTZ, ET_MRTZ_MAX_IDX},
1747 {{"prim"}, ET_PRIM, ET_PRIM_MAX_IDX},
1748 {{"mrt"}, ET_MRT0, ET_MRT_MAX_IDX},
1749 {{"pos"}, ET_POS0, ET_POS_MAX_IDX},
1750 {{"dual_src_blend"}, ET_DUAL_SRC_BLEND0, ET_DUAL_SRC_BLEND_MAX_IDX},
1751 {{"param"}, ET_PARAM0, ET_PARAM_MAX_IDX},
1752};
1753
1754bool getTgtName(unsigned Id, StringRef &Name, int &Index) {
1755 for (const ExpTgt &Val : ExpTgtInfo) {
1756 if (Val.Tgt <= Id && Id <= Val.Tgt + Val.MaxIndex) {
1757 Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt);
1758 Name = Val.Name;
1759 return true;
1760 }
1761 }
1762 return false;
1763}
1764
1765unsigned getTgtId(const StringRef Name) {
1766
1767 for (const ExpTgt &Val : ExpTgtInfo) {
1768 if (Val.MaxIndex == 0 && Name == Val.Name)
1769 return Val.Tgt;
1770
1771 if (Val.MaxIndex > 0 && Name.starts_with(Val.Name)) {
1772 StringRef Suffix = Name.drop_front(Val.Name.size());
1773
1774 unsigned Id;
1775 if (Suffix.getAsInteger(10, Id) || Id > Val.MaxIndex)
1776 return ET_INVALID;
1777
1778 // Disable leading zeroes
1779 if (Suffix.size() > 1 && Suffix[0] == '0')
1780 return ET_INVALID;
1781
1782 return Val.Tgt + Id;
1783 }
1784 }
1785 return ET_INVALID;
1786}
1787
1788bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI) {
1789 switch (Id) {
1790 case ET_NULL:
1791 return !isGFX11Plus(STI);
1792 case ET_POS4:
1793 case ET_PRIM:
1794 return isGFX10Plus(STI);
1795 case ET_DUAL_SRC_BLEND0:
1796 case ET_DUAL_SRC_BLEND1:
1797 return isGFX11Plus(STI);
1798 default:
1799 if (Id >= ET_PARAM0 && Id <= ET_PARAM31)
1800 return !isGFX11Plus(STI);
1801 return true;
1802 }
1803}
1804
1805} // namespace Exp
1806
1807//===----------------------------------------------------------------------===//
1808// MTBUF Format
1809//===----------------------------------------------------------------------===//
1810
1811namespace MTBUFFormat {
1812
1813int64_t getDfmt(const StringRef Name) {
1814 for (int Id = DFMT_MIN; Id <= DFMT_MAX; ++Id) {
1815 if (Name == DfmtSymbolic[Id])
1816 return Id;
1817 }
1818 return DFMT_UNDEF;
1819}
1820
1822 assert(Id <= DFMT_MAX);
1823 return DfmtSymbolic[Id];
1824}
1825
1827 if (isSI(STI) || isCI(STI))
1828 return NfmtSymbolicSICI;
1829 if (isVI(STI) || isGFX9(STI))
1830 return NfmtSymbolicVI;
1831 return NfmtSymbolicGFX10;
1832}
1833
1834int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI) {
1835 auto lookupTable = getNfmtLookupTable(STI);
1836 for (int Id = NFMT_MIN; Id <= NFMT_MAX; ++Id) {
1837 if (Name == lookupTable[Id])
1838 return Id;
1839 }
1840 return NFMT_UNDEF;
1841}
1842
1843StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI) {
1844 assert(Id <= NFMT_MAX);
1845 return getNfmtLookupTable(STI)[Id];
1846}
1847
1848bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI) {
1849 unsigned Dfmt;
1850 unsigned Nfmt;
1851 decodeDfmtNfmt(Id, Dfmt, Nfmt);
1852 return isValidNfmt(Nfmt, STI);
1853}
1854
1855bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI) {
1856 return !getNfmtName(Id, STI).empty();
1857}
1858
1859int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt) {
1860 return (Dfmt << DFMT_SHIFT) | (Nfmt << NFMT_SHIFT);
1861}
1862
1863void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt) {
1864 Dfmt = (Format >> DFMT_SHIFT) & DFMT_MASK;
1865 Nfmt = (Format >> NFMT_SHIFT) & NFMT_MASK;
1866}
1867
1869 if (isGFX11Plus(STI)) {
1870 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
1871 if (Name == UfmtSymbolicGFX11[Id])
1872 return Id;
1873 }
1874 } else {
1875 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
1876 if (Name == UfmtSymbolicGFX10[Id])
1877 return Id;
1878 }
1879 }
1880 return UFMT_UNDEF;
1881}
1882
1884 if(isValidUnifiedFormat(Id, STI))
1885 return isGFX10(STI) ? UfmtSymbolicGFX10[Id] : UfmtSymbolicGFX11[Id];
1886 return "";
1887}
1888
1889bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI) {
1890 return isGFX10(STI) ? Id <= UfmtGFX10::UFMT_LAST : Id <= UfmtGFX11::UFMT_LAST;
1891}
1892
1893int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt,
1894 const MCSubtargetInfo &STI) {
1895 int64_t Fmt = encodeDfmtNfmt(Dfmt, Nfmt);
1896 if (isGFX11Plus(STI)) {
1897 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
1898 if (Fmt == DfmtNfmt2UFmtGFX11[Id])
1899 return Id;
1900 }
1901 } else {
1902 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
1903 if (Fmt == DfmtNfmt2UFmtGFX10[Id])
1904 return Id;
1905 }
1906 }
1907 return UFMT_UNDEF;
1908}
1909
1910bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI) {
1911 return isGFX10Plus(STI) ? (Val <= UFMT_MAX) : (Val <= DFMT_NFMT_MAX);
1912}
1913
1915 if (isGFX10Plus(STI))
1916 return UFMT_DEFAULT;
1917 return DFMT_NFMT_DEFAULT;
1918}
1919
1920} // namespace MTBUFFormat
1921
1922//===----------------------------------------------------------------------===//
1923// SendMsg
1924//===----------------------------------------------------------------------===//
1925
1926namespace SendMsg {
1927
1930}
1931
1932int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI) {
1933 int Idx = getOprIdx<const MCSubtargetInfo &>(Name, Msg, MSG_SIZE, STI);
1934 return (Idx < 0) ? Idx : Msg[Idx].Encoding;
1935}
1936
1937bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI) {
1938 return (MsgId & ~(getMsgIdMask(STI))) == 0;
1939}
1940
1941StringRef getMsgName(int64_t MsgId, const MCSubtargetInfo &STI) {
1942 int Idx = getOprIdx<const MCSubtargetInfo &>(MsgId, Msg, MSG_SIZE, STI);
1943 return (Idx < 0) ? "" : Msg[Idx].Name;
1944}
1945
1946int64_t getMsgOpId(int64_t MsgId, const StringRef Name) {
1947 const char* const *S = (MsgId == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
1948 const int F = (MsgId == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
1949 const int L = (MsgId == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
1950 for (int i = F; i < L; ++i) {
1951 if (Name == S[i]) {
1952 return i;
1953 }
1954 }
1955 return OP_UNKNOWN_;
1956}
1957
1958bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
1959 bool Strict) {
1960 assert(isValidMsgId(MsgId, STI));
1961
1962 if (!Strict)
1963 return 0 <= OpId && isUInt<OP_WIDTH_>(OpId);
1964
1965 if (MsgId == ID_SYSMSG)
1966 return OP_SYS_FIRST_ <= OpId && OpId < OP_SYS_LAST_;
1967 if (!isGFX11Plus(STI)) {
1968 switch (MsgId) {
1969 case ID_GS_PreGFX11:
1970 return (OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_) && OpId != OP_GS_NOP;
1972 return OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_;
1973 }
1974 }
1975 return OpId == OP_NONE_;
1976}
1977
1978StringRef getMsgOpName(int64_t MsgId, int64_t OpId,
1979 const MCSubtargetInfo &STI) {
1980 assert(msgRequiresOp(MsgId, STI));
1981 return (MsgId == ID_SYSMSG)? OpSysSymbolic[OpId] : OpGsSymbolic[OpId];
1982}
1983
1984bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
1985 const MCSubtargetInfo &STI, bool Strict) {
1986 assert(isValidMsgOp(MsgId, OpId, STI, Strict));
1987
1988 if (!Strict)
1989 return 0 <= StreamId && isUInt<STREAM_ID_WIDTH_>(StreamId);
1990
1991 if (!isGFX11Plus(STI)) {
1992 switch (MsgId) {
1993 case ID_GS_PreGFX11:
1996 return (OpId == OP_GS_NOP) ?
1999 }
2000 }
2001 return StreamId == STREAM_ID_NONE_;
2002}
2003
2004bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI) {
2005 return MsgId == ID_SYSMSG ||
2006 (!isGFX11Plus(STI) &&
2007 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11));
2008}
2009
2010bool msgSupportsStream(int64_t MsgId, int64_t OpId,
2011 const MCSubtargetInfo &STI) {
2012 return !isGFX11Plus(STI) &&
2013 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11) &&
2014 OpId != OP_GS_NOP;
2015}
2016
2017void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
2018 uint16_t &StreamId, const MCSubtargetInfo &STI) {
2019 MsgId = Val & getMsgIdMask(STI);
2020 if (isGFX11Plus(STI)) {
2021 OpId = 0;
2022 StreamId = 0;
2023 } else {
2024 OpId = (Val & OP_MASK_) >> OP_SHIFT_;
2026 }
2027}
2028
2030 uint64_t OpId,
2032 return MsgId | (OpId << OP_SHIFT_) | (StreamId << STREAM_ID_SHIFT_);
2033}
2034
2035} // namespace SendMsg
2036
2037//===----------------------------------------------------------------------===//
2038//
2039//===----------------------------------------------------------------------===//
2040
2042 return F.getFnAttributeAsParsedInteger("InitialPSInputAddr", 0);
2043}
2044
2046 // As a safe default always respond as if PS has color exports.
2047 return F.getFnAttributeAsParsedInteger(
2048 "amdgpu-color-export",
2049 F.getCallingConv() == CallingConv::AMDGPU_PS ? 1 : 0) != 0;
2050}
2051
2053 return F.getFnAttributeAsParsedInteger("amdgpu-depth-export", 0) != 0;
2054}
2055
2057 switch(cc) {
2067 return true;
2068 default:
2069 return false;
2070 }
2071}
2072
2074 return isShader(cc) || cc == CallingConv::AMDGPU_Gfx;
2075}
2076
2078 return !isGraphics(cc) || cc == CallingConv::AMDGPU_CS;
2079}
2080
2082 switch (CC) {
2092 return true;
2093 default:
2094 return false;
2095 }
2096}
2097
2099 switch (CC) {
2101 return true;
2102 default:
2103 return isEntryFunctionCC(CC) || isChainCC(CC);
2104 }
2105}
2106
2108 switch (CC) {
2111 return true;
2112 default:
2113 return false;
2114 }
2115}
2116
2117bool isKernelCC(const Function *Func) {
2118 return AMDGPU::isModuleEntryFunctionCC(Func->getCallingConv());
2119}
2120
2121bool hasXNACK(const MCSubtargetInfo &STI) {
2122 return STI.hasFeature(AMDGPU::FeatureXNACK);
2123}
2124
2125bool hasSRAMECC(const MCSubtargetInfo &STI) {
2126 return STI.hasFeature(AMDGPU::FeatureSRAMECC);
2127}
2128
2130 return STI.hasFeature(AMDGPU::FeatureMIMG_R128) && !STI.hasFeature(AMDGPU::FeatureR128A16);
2131}
2132
2133bool hasA16(const MCSubtargetInfo &STI) {
2134 return STI.hasFeature(AMDGPU::FeatureA16);
2135}
2136
2137bool hasG16(const MCSubtargetInfo &STI) {
2138 return STI.hasFeature(AMDGPU::FeatureG16);
2139}
2140
2142 return !STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) && !isCI(STI) &&
2143 !isSI(STI);
2144}
2145
2146bool hasGDS(const MCSubtargetInfo &STI) {
2147 return STI.hasFeature(AMDGPU::FeatureGDS);
2148}
2149
2150unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler) {
2151 auto Version = getIsaVersion(STI.getCPU());
2152 if (Version.Major == 10)
2153 return Version.Minor >= 3 ? 13 : 5;
2154 if (Version.Major == 11)
2155 return 5;
2156 if (Version.Major >= 12)
2157 return HasSampler ? 4 : 5;
2158 return 0;
2159}
2160
2161unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI) { return 16; }
2162
2163bool isSI(const MCSubtargetInfo &STI) {
2164 return STI.hasFeature(AMDGPU::FeatureSouthernIslands);
2165}
2166
2167bool isCI(const MCSubtargetInfo &STI) {
2168 return STI.hasFeature(AMDGPU::FeatureSeaIslands);
2169}
2170
2171bool isVI(const MCSubtargetInfo &STI) {
2172 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
2173}
2174
2175bool isGFX9(const MCSubtargetInfo &STI) {
2176 return STI.hasFeature(AMDGPU::FeatureGFX9);
2177}
2178
2180 return isGFX9(STI) || isGFX10(STI);
2181}
2182
2184 return isGFX9(STI) || isGFX10(STI) || isGFX11(STI);
2185}
2186
2188 return isVI(STI) || isGFX9(STI) || isGFX10(STI);
2189}
2190
2191bool isGFX8Plus(const MCSubtargetInfo &STI) {
2192 return isVI(STI) || isGFX9Plus(STI);
2193}
2194
2195bool isGFX9Plus(const MCSubtargetInfo &STI) {
2196 return isGFX9(STI) || isGFX10Plus(STI);
2197}
2198
2199bool isGFX10(const MCSubtargetInfo &STI) {
2200 return STI.hasFeature(AMDGPU::FeatureGFX10);
2201}
2202
2204 return isGFX10(STI) || isGFX11(STI);
2205}
2206
2208 return isGFX10(STI) || isGFX11Plus(STI);
2209}
2210
2211bool isGFX11(const MCSubtargetInfo &STI) {
2212 return STI.hasFeature(AMDGPU::FeatureGFX11);
2213}
2214
2216 return isGFX11(STI) || isGFX12Plus(STI);
2217}
2218
2219bool isGFX12(const MCSubtargetInfo &STI) {
2220 return STI.getFeatureBits()[AMDGPU::FeatureGFX12];
2221}
2222
2223bool isGFX12Plus(const MCSubtargetInfo &STI) { return isGFX12(STI); }
2224
2225bool isNotGFX12Plus(const MCSubtargetInfo &STI) { return !isGFX12Plus(STI); }
2226
2228 return !isGFX11Plus(STI);
2229}
2230
2232 return isSI(STI) || isCI(STI) || isVI(STI) || isGFX9(STI);
2233}
2234
2236 return isGFX10(STI) && !AMDGPU::isGFX10_BEncoding(STI);
2237}
2238
2240 return STI.hasFeature(AMDGPU::FeatureGCN3Encoding);
2241}
2242
2244 return STI.hasFeature(AMDGPU::FeatureGFX10_AEncoding);
2245}
2246
2248 return STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding);
2249}
2250
2252 return STI.hasFeature(AMDGPU::FeatureGFX10_3Insts);
2253}
2254
2256 return isGFX10_BEncoding(STI) && !isGFX12Plus(STI);
2257}
2258
2259bool isGFX90A(const MCSubtargetInfo &STI) {
2260 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
2261}
2262
2263bool isGFX940(const MCSubtargetInfo &STI) {
2264 return STI.hasFeature(AMDGPU::FeatureGFX940Insts);
2265}
2266
2268 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
2269}
2270
2272 return STI.hasFeature(AMDGPU::FeatureMAIInsts);
2273}
2274
2275bool hasVOPD(const MCSubtargetInfo &STI) {
2276 return STI.hasFeature(AMDGPU::FeatureVOPD);
2277}
2278
2280 return STI.hasFeature(AMDGPU::FeatureDPPSrc1SGPR);
2281}
2282
2284 return STI.hasFeature(AMDGPU::FeatureKernargPreload);
2285}
2286
2287int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR,
2288 int32_t ArgNumVGPR) {
2289 if (has90AInsts && ArgNumAGPR)
2290 return alignTo(ArgNumVGPR, 4) + ArgNumAGPR;
2291 return std::max(ArgNumVGPR, ArgNumAGPR);
2292}
2293
2294bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) {
2295 const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
2296 const unsigned FirstSubReg = TRI->getSubReg(Reg, AMDGPU::sub0);
2297 return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
2298 Reg == AMDGPU::SCC;
2299}
2300
2301bool isHi(unsigned Reg, const MCRegisterInfo &MRI) {
2302 return MRI.getEncodingValue(Reg) & AMDGPU::HWEncoding::IS_HI;
2303}
2304
2305#define MAP_REG2REG \
2306 using namespace AMDGPU; \
2307 switch(Reg) { \
2308 default: return Reg; \
2309 CASE_CI_VI(FLAT_SCR) \
2310 CASE_CI_VI(FLAT_SCR_LO) \
2311 CASE_CI_VI(FLAT_SCR_HI) \
2312 CASE_VI_GFX9PLUS(TTMP0) \
2313 CASE_VI_GFX9PLUS(TTMP1) \
2314 CASE_VI_GFX9PLUS(TTMP2) \
2315 CASE_VI_GFX9PLUS(TTMP3) \
2316 CASE_VI_GFX9PLUS(TTMP4) \
2317 CASE_VI_GFX9PLUS(TTMP5) \
2318 CASE_VI_GFX9PLUS(TTMP6) \
2319 CASE_VI_GFX9PLUS(TTMP7) \
2320 CASE_VI_GFX9PLUS(TTMP8) \
2321 CASE_VI_GFX9PLUS(TTMP9) \
2322 CASE_VI_GFX9PLUS(TTMP10) \
2323 CASE_VI_GFX9PLUS(TTMP11) \
2324 CASE_VI_GFX9PLUS(TTMP12) \
2325 CASE_VI_GFX9PLUS(TTMP13) \
2326 CASE_VI_GFX9PLUS(TTMP14) \
2327 CASE_VI_GFX9PLUS(TTMP15) \
2328 CASE_VI_GFX9PLUS(TTMP0_TTMP1) \
2329 CASE_VI_GFX9PLUS(TTMP2_TTMP3) \
2330 CASE_VI_GFX9PLUS(TTMP4_TTMP5) \
2331 CASE_VI_GFX9PLUS(TTMP6_TTMP7) \
2332 CASE_VI_GFX9PLUS(TTMP8_TTMP9) \
2333 CASE_VI_GFX9PLUS(TTMP10_TTMP11) \
2334 CASE_VI_GFX9PLUS(TTMP12_TTMP13) \
2335 CASE_VI_GFX9PLUS(TTMP14_TTMP15) \
2336 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \
2337 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \
2338 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \
2339 CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \
2340 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
2341 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
2342 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2343 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2344 CASE_GFXPRE11_GFX11PLUS(M0) \
2345 CASE_GFXPRE11_GFX11PLUS(SGPR_NULL) \
2346 CASE_GFXPRE11_GFX11PLUS_TO(SGPR_NULL64, SGPR_NULL) \
2347 }
2348
2349#define CASE_CI_VI(node) \
2350 assert(!isSI(STI)); \
2351 case node: return isCI(STI) ? node##_ci : node##_vi;
2352
2353#define CASE_VI_GFX9PLUS(node) \
2354 case node: return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi;
2355
2356#define CASE_GFXPRE11_GFX11PLUS(node) \
2357 case node: return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11;
2358
2359#define CASE_GFXPRE11_GFX11PLUS_TO(node, result) \
2360 case node: return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11;
2361
2362unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
2363 if (STI.getTargetTriple().getArch() == Triple::r600)
2364 return Reg;
2366}
2367
2368#undef CASE_CI_VI
2369#undef CASE_VI_GFX9PLUS
2370#undef CASE_GFXPRE11_GFX11PLUS
2371#undef CASE_GFXPRE11_GFX11PLUS_TO
2372
2373#define CASE_CI_VI(node) case node##_ci: case node##_vi: return node;
2374#define CASE_VI_GFX9PLUS(node) case node##_vi: case node##_gfx9plus: return node;
2375#define CASE_GFXPRE11_GFX11PLUS(node) case node##_gfx11plus: case node##_gfxpre11: return node;
2376#define CASE_GFXPRE11_GFX11PLUS_TO(node, result)
2377
2378unsigned mc2PseudoReg(unsigned Reg) {
2380}
2381
2382bool isInlineValue(unsigned Reg) {
2383 switch (Reg) {
2384 case AMDGPU::SRC_SHARED_BASE_LO:
2385 case AMDGPU::SRC_SHARED_BASE:
2386 case AMDGPU::SRC_SHARED_LIMIT_LO:
2387 case AMDGPU::SRC_SHARED_LIMIT:
2388 case AMDGPU::SRC_PRIVATE_BASE_LO:
2389 case AMDGPU::SRC_PRIVATE_BASE:
2390 case AMDGPU::SRC_PRIVATE_LIMIT_LO:
2391 case AMDGPU::SRC_PRIVATE_LIMIT:
2392 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2393 return true;
2394 case AMDGPU::SRC_VCCZ:
2395 case AMDGPU::SRC_EXECZ:
2396 case AMDGPU::SRC_SCC:
2397 return true;
2398 case AMDGPU::SGPR_NULL:
2399 return true;
2400 default:
2401 return false;
2402 }
2403}
2404
2405#undef CASE_CI_VI
2406#undef CASE_VI_GFX9PLUS
2407#undef CASE_GFXPRE11_GFX11PLUS
2408#undef CASE_GFXPRE11_GFX11PLUS_TO
2409#undef MAP_REG2REG
2410
2411bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2412 assert(OpNo < Desc.NumOperands);
2413 unsigned OpType = Desc.operands()[OpNo].OperandType;
2414 return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
2415 OpType <= AMDGPU::OPERAND_SRC_LAST;
2416}
2417
2418bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2419 assert(OpNo < Desc.NumOperands);
2420 unsigned OpType = Desc.operands()[OpNo].OperandType;
2421 return OpType >= AMDGPU::OPERAND_KIMM_FIRST &&
2422 OpType <= AMDGPU::OPERAND_KIMM_LAST;
2423}
2424
2425bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2426 assert(OpNo < Desc.NumOperands);
2427 unsigned OpType = Desc.operands()[OpNo].OperandType;
2428 switch (OpType) {
2445 return true;
2446 default:
2447 return false;
2448 }
2449}
2450
2451bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2452 assert(OpNo < Desc.NumOperands);
2453 unsigned OpType = Desc.operands()[OpNo].OperandType;
2454 return (OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
2458}
2459
2460// Avoid using MCRegisterClass::getSize, since that function will go away
2461// (move from MC* level to Target* level). Return size in bits.
2462unsigned getRegBitWidth(unsigned RCID) {
2463 switch (RCID) {
2464 case AMDGPU::SGPR_LO16RegClassID:
2465 case AMDGPU::AGPR_LO16RegClassID:
2466 return 16;
2467 case AMDGPU::SGPR_32RegClassID:
2468 case AMDGPU::VGPR_32RegClassID:
2469 case AMDGPU::VRegOrLds_32RegClassID:
2470 case AMDGPU::AGPR_32RegClassID:
2471 case AMDGPU::VS_32RegClassID:
2472 case AMDGPU::AV_32RegClassID:
2473 case AMDGPU::SReg_32RegClassID:
2474 case AMDGPU::SReg_32_XM0RegClassID:
2475 case AMDGPU::SRegOrLds_32RegClassID:
2476 return 32;
2477 case AMDGPU::SGPR_64RegClassID:
2478 case AMDGPU::VS_64RegClassID:
2479 case AMDGPU::SReg_64RegClassID:
2480 case AMDGPU::VReg_64RegClassID:
2481 case AMDGPU::AReg_64RegClassID:
2482 case AMDGPU::SReg_64_XEXECRegClassID:
2483 case AMDGPU::VReg_64_Align2RegClassID:
2484 case AMDGPU::AReg_64_Align2RegClassID:
2485 case AMDGPU::AV_64RegClassID:
2486 case AMDGPU::AV_64_Align2RegClassID:
2487 return 64;
2488 case AMDGPU::SGPR_96RegClassID:
2489 case AMDGPU::SReg_96RegClassID:
2490 case AMDGPU::VReg_96RegClassID:
2491 case AMDGPU::AReg_96RegClassID:
2492 case AMDGPU::VReg_96_Align2RegClassID:
2493 case AMDGPU::AReg_96_Align2RegClassID:
2494 case AMDGPU::AV_96RegClassID:
2495 case AMDGPU::AV_96_Align2RegClassID:
2496 return 96;
2497 case AMDGPU::SGPR_128RegClassID:
2498 case AMDGPU::SReg_128RegClassID:
2499 case AMDGPU::VReg_128RegClassID:
2500 case AMDGPU::AReg_128RegClassID:
2501 case AMDGPU::VReg_128_Align2RegClassID:
2502 case AMDGPU::AReg_128_Align2RegClassID:
2503 case AMDGPU::AV_128RegClassID:
2504 case AMDGPU::AV_128_Align2RegClassID:
2505 return 128;
2506 case AMDGPU::SGPR_160RegClassID:
2507 case AMDGPU::SReg_160RegClassID:
2508 case AMDGPU::VReg_160RegClassID:
2509 case AMDGPU::AReg_160RegClassID:
2510 case AMDGPU::VReg_160_Align2RegClassID:
2511 case AMDGPU::AReg_160_Align2RegClassID:
2512 case AMDGPU::AV_160RegClassID:
2513 case AMDGPU::AV_160_Align2RegClassID:
2514 return 160;
2515 case AMDGPU::SGPR_192RegClassID:
2516 case AMDGPU::SReg_192RegClassID:
2517 case AMDGPU::VReg_192RegClassID:
2518 case AMDGPU::AReg_192RegClassID:
2519 case AMDGPU::VReg_192_Align2RegClassID:
2520 case AMDGPU::AReg_192_Align2RegClassID:
2521 case AMDGPU::AV_192RegClassID:
2522 case AMDGPU::AV_192_Align2RegClassID:
2523 return 192;
2524 case AMDGPU::SGPR_224RegClassID:
2525 case AMDGPU::SReg_224RegClassID:
2526 case AMDGPU::VReg_224RegClassID:
2527 case AMDGPU::AReg_224RegClassID:
2528 case AMDGPU::VReg_224_Align2RegClassID:
2529 case AMDGPU::AReg_224_Align2RegClassID:
2530 case AMDGPU::AV_224RegClassID:
2531 case AMDGPU::AV_224_Align2RegClassID:
2532 return 224;
2533 case AMDGPU::SGPR_256RegClassID:
2534 case AMDGPU::SReg_256RegClassID:
2535 case AMDGPU::VReg_256RegClassID:
2536 case AMDGPU::AReg_256RegClassID:
2537 case AMDGPU::VReg_256_Align2RegClassID:
2538 case AMDGPU::AReg_256_Align2RegClassID:
2539 case AMDGPU::AV_256RegClassID:
2540 case AMDGPU::AV_256_Align2RegClassID:
2541 return 256;
2542 case AMDGPU::SGPR_288RegClassID:
2543 case AMDGPU::SReg_288RegClassID:
2544 case AMDGPU::VReg_288RegClassID:
2545 case AMDGPU::AReg_288RegClassID:
2546 case AMDGPU::VReg_288_Align2RegClassID:
2547 case AMDGPU::AReg_288_Align2RegClassID:
2548 case AMDGPU::AV_288RegClassID:
2549 case AMDGPU::AV_288_Align2RegClassID:
2550 return 288;
2551 case AMDGPU::SGPR_320RegClassID:
2552 case AMDGPU::SReg_320RegClassID:
2553 case AMDGPU::VReg_320RegClassID:
2554 case AMDGPU::AReg_320RegClassID:
2555 case AMDGPU::VReg_320_Align2RegClassID:
2556 case AMDGPU::AReg_320_Align2RegClassID:
2557 case AMDGPU::AV_320RegClassID:
2558 case AMDGPU::AV_320_Align2RegClassID:
2559 return 320;
2560 case AMDGPU::SGPR_352RegClassID:
2561 case AMDGPU::SReg_352RegClassID:
2562 case AMDGPU::VReg_352RegClassID:
2563 case AMDGPU::AReg_352RegClassID:
2564 case AMDGPU::VReg_352_Align2RegClassID:
2565 case AMDGPU::AReg_352_Align2RegClassID:
2566 case AMDGPU::AV_352RegClassID:
2567 case AMDGPU::AV_352_Align2RegClassID:
2568 return 352;
2569 case AMDGPU::SGPR_384RegClassID:
2570 case AMDGPU::SReg_384RegClassID:
2571 case AMDGPU::VReg_384RegClassID:
2572 case AMDGPU::AReg_384RegClassID:
2573 case AMDGPU::VReg_384_Align2RegClassID:
2574 case AMDGPU::AReg_384_Align2RegClassID:
2575 case AMDGPU::AV_384RegClassID:
2576 case AMDGPU::AV_384_Align2RegClassID:
2577 return 384;
2578 case AMDGPU::SGPR_512RegClassID:
2579 case AMDGPU::SReg_512RegClassID:
2580 case AMDGPU::VReg_512RegClassID:
2581 case AMDGPU::AReg_512RegClassID:
2582 case AMDGPU::VReg_512_Align2RegClassID:
2583 case AMDGPU::AReg_512_Align2RegClassID:
2584 case AMDGPU::AV_512RegClassID:
2585 case AMDGPU::AV_512_Align2RegClassID:
2586 return 512;
2587 case AMDGPU::SGPR_1024RegClassID:
2588 case AMDGPU::SReg_1024RegClassID:
2589 case AMDGPU::VReg_1024RegClassID:
2590 case AMDGPU::AReg_1024RegClassID:
2591 case AMDGPU::VReg_1024_Align2RegClassID:
2592 case AMDGPU::AReg_1024_Align2RegClassID:
2593 case AMDGPU::AV_1024RegClassID:
2594 case AMDGPU::AV_1024_Align2RegClassID:
2595 return 1024;
2596 default:
2597 llvm_unreachable("Unexpected register class");
2598 }
2599}
2600
2601unsigned getRegBitWidth(const MCRegisterClass &RC) {
2602 return getRegBitWidth(RC.getID());
2603}
2604
2606 unsigned OpNo) {
2607 assert(OpNo < Desc.NumOperands);
2608 unsigned RCID = Desc.operands()[OpNo].RegClass;
2609 return getRegBitWidth(RCID) / 8;
2610}
2611
2612bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
2614 return true;
2615
2616 uint64_t Val = static_cast<uint64_t>(Literal);
2617 return (Val == llvm::bit_cast<uint64_t>(0.0)) ||
2618 (Val == llvm::bit_cast<uint64_t>(1.0)) ||
2619 (Val == llvm::bit_cast<uint64_t>(-1.0)) ||
2620 (Val == llvm::bit_cast<uint64_t>(0.5)) ||
2621 (Val == llvm::bit_cast<uint64_t>(-0.5)) ||
2622 (Val == llvm::bit_cast<uint64_t>(2.0)) ||
2623 (Val == llvm::bit_cast<uint64_t>(-2.0)) ||
2624 (Val == llvm::bit_cast<uint64_t>(4.0)) ||
2625 (Val == llvm::bit_cast<uint64_t>(-4.0)) ||
2626 (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
2627}
2628
2629bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
2631 return true;
2632
2633 // The actual type of the operand does not seem to matter as long
2634 // as the bits match one of the inline immediate values. For example:
2635 //
2636 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
2637 // so it is a legal inline immediate.
2638 //
2639 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
2640 // floating-point, so it is a legal inline immediate.
2641
2642 uint32_t Val = static_cast<uint32_t>(Literal);
2643 return (Val == llvm::bit_cast<uint32_t>(0.0f)) ||
2644 (Val == llvm::bit_cast<uint32_t>(1.0f)) ||
2645 (Val == llvm::bit_cast<uint32_t>(-1.0f)) ||
2646 (Val == llvm::bit_cast<uint32_t>(0.5f)) ||
2647 (Val == llvm::bit_cast<uint32_t>(-0.5f)) ||
2648 (Val == llvm::bit_cast<uint32_t>(2.0f)) ||
2649 (Val == llvm::bit_cast<uint32_t>(-2.0f)) ||
2650 (Val == llvm::bit_cast<uint32_t>(4.0f)) ||
2651 (Val == llvm::bit_cast<uint32_t>(-4.0f)) ||
2652 (Val == 0x3e22f983 && HasInv2Pi);
2653}
2654
2655bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi) {
2656 if (!HasInv2Pi)
2657 return false;
2659 return true;
2660 uint16_t Val = static_cast<uint16_t>(Literal);
2661 return Val == 0x3F00 || // 0.5
2662 Val == 0xBF00 || // -0.5
2663 Val == 0x3F80 || // 1.0
2664 Val == 0xBF80 || // -1.0
2665 Val == 0x4000 || // 2.0
2666 Val == 0xC000 || // -2.0
2667 Val == 0x4080 || // 4.0
2668 Val == 0xC080 || // -4.0
2669 Val == 0x3E22; // 1.0 / (2.0 * pi)
2670}
2671
2672bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
2673 if (!HasInv2Pi)
2674 return false;
2675
2677 return true;
2678
2679 uint16_t Val = static_cast<uint16_t>(Literal);
2680 return Val == 0x3C00 || // 1.0
2681 Val == 0xBC00 || // -1.0
2682 Val == 0x3800 || // 0.5
2683 Val == 0xB800 || // -0.5
2684 Val == 0x4000 || // 2.0
2685 Val == 0xC000 || // -2.0
2686 Val == 0x4400 || // 4.0
2687 Val == 0xC400 || // -4.0
2688 Val == 0x3118; // 1/2pi
2689}
2690
2691std::optional<unsigned> getInlineEncodingV216(bool IsFloat, uint32_t Literal) {
2692 // Unfortunately, the Instruction Set Architecture Reference Guide is
2693 // misleading about how the inline operands work for (packed) 16-bit
2694 // instructions. In a nutshell, the actual HW behavior is:
2695 //
2696 // - integer encodings (-16 .. 64) are always produced as sign-extended
2697 // 32-bit values
2698 // - float encodings are produced as:
2699 // - for F16 instructions: corresponding half-precision float values in
2700 // the LSBs, 0 in the MSBs
2701 // - for UI16 instructions: corresponding single-precision float value
2702 int32_t Signed = static_cast<int32_t>(Literal);
2703 if (Signed >= 0 && Signed <= 64)
2704 return 128 + Signed;
2705
2706 if (Signed >= -16 && Signed <= -1)
2707 return 192 + std::abs(Signed);
2708
2709 if (IsFloat) {
2710 // clang-format off
2711 switch (Literal) {
2712 case 0x3800: return 240; // 0.5
2713 case 0xB800: return 241; // -0.5
2714 case 0x3C00: return 242; // 1.0
2715 case 0xBC00: return 243; // -1.0
2716 case 0x4000: return 244; // 2.0
2717 case 0xC000: return 245; // -2.0
2718 case 0x4400: return 246; // 4.0
2719 case 0xC400: return 247; // -4.0
2720 case 0x3118: return 248; // 1.0 / (2.0 * pi)
2721 default: break;
2722 }
2723 // clang-format on
2724 } else {
2725 // clang-format off
2726 switch (Literal) {
2727 case 0x3F000000: return 240; // 0.5
2728 case 0xBF000000: return 241; // -0.5
2729 case 0x3F800000: return 242; // 1.0
2730 case 0xBF800000: return 243; // -1.0
2731 case 0x40000000: return 244; // 2.0
2732 case 0xC0000000: return 245; // -2.0
2733 case 0x40800000: return 246; // 4.0
2734 case 0xC0800000: return 247; // -4.0
2735 case 0x3E22F983: return 248; // 1.0 / (2.0 * pi)
2736 default: break;
2737 }
2738 // clang-format on
2739 }
2740
2741 return {};
2742}
2743
2744// Encoding of the literal as an inline constant for a V_PK_*_IU16 instruction
2745// or nullopt.
2746std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal) {
2747 return getInlineEncodingV216(false, Literal);
2748}
2749
2750// Encoding of the literal as an inline constant for a V_PK_*_BF16 instruction
2751// or nullopt.
2752std::optional<unsigned> getInlineEncodingV2BF16(uint32_t Literal) {
2753 int32_t Signed = static_cast<int32_t>(Literal);
2754 if (Signed >= 0 && Signed <= 64)
2755 return 128 + Signed;
2756
2757 if (Signed >= -16 && Signed <= -1)
2758 return 192 + std::abs(Signed);
2759
2760 // clang-format off
2761 switch (Literal) {
2762 case 0x3F00: return 240; // 0.5
2763 case 0xBF00: return 241; // -0.5
2764 case 0x3F80: return 242; // 1.0
2765 case 0xBF80: return 243; // -1.0
2766 case 0x4000: return 244; // 2.0
2767 case 0xC000: return 245; // -2.0
2768 case 0x4080: return 246; // 4.0
2769 case 0xC080: return 247; // -4.0
2770 case 0x3E22: return 248; // 1.0 / (2.0 * pi)
2771 default: break;
2772 }
2773 // clang-format on
2774
2775 return std::nullopt;
2776}
2777
2778// Encoding of the literal as an inline constant for a V_PK_*_F16 instruction
2779// or nullopt.
2780std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal) {
2781 return getInlineEncodingV216(true, Literal);
2782}
2783
2784// Whether the given literal can be inlined for a V_PK_* instruction.
2786 switch (OpType) {
2790 return getInlineEncodingV216(false, Literal).has_value();
2794 return getInlineEncodingV216(true, Literal).has_value();
2799 default:
2800 llvm_unreachable("bad packed operand type");
2801 }
2802}
2803
2804// Whether the given literal can be inlined for a V_PK_*_IU16 instruction.
2806 return getInlineEncodingV2I16(Literal).has_value();
2807}
2808
2809// Whether the given literal can be inlined for a V_PK_*_BF16 instruction.
2811 return getInlineEncodingV2BF16(Literal).has_value();
2812}
2813
2814// Whether the given literal can be inlined for a V_PK_*_F16 instruction.
2816 return getInlineEncodingV2F16(Literal).has_value();
2817}
2818
2819bool isValid32BitLiteral(uint64_t Val, bool IsFP64) {
2820 if (IsFP64)
2821 return !(Val & 0xffffffffu);
2822
2823 return isUInt<32>(Val) || isInt<32>(Val);
2824}
2825
2827 const Function *F = A->getParent();
2828
2829 // Arguments to compute shaders are never a source of divergence.
2830 CallingConv::ID CC = F->getCallingConv();
2831 switch (CC) {
2834 return true;
2845 // For non-compute shaders, SGPR inputs are marked with either inreg or
2846 // byval. Everything else is in VGPRs.
2847 return A->hasAttribute(Attribute::InReg) ||
2848 A->hasAttribute(Attribute::ByVal);
2849 default:
2850 // TODO: treat i1 as divergent?
2851 return A->hasAttribute(Attribute::InReg);
2852 }
2853}
2854
2855bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo) {
2856 // Arguments to compute shaders are never a source of divergence.
2858 switch (CC) {
2861 return true;
2872 // For non-compute shaders, SGPR inputs are marked with either inreg or
2873 // byval. Everything else is in VGPRs.
2874 return CB->paramHasAttr(ArgNo, Attribute::InReg) ||
2875 CB->paramHasAttr(ArgNo, Attribute::ByVal);
2876 default:
2877 return CB->paramHasAttr(ArgNo, Attribute::InReg);
2878 }
2879}
2880
2881static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) {
2882 return isGCN3Encoding(ST) || isGFX10Plus(ST);
2883}
2884
2886 return isGFX9Plus(ST);
2887}
2888
2890 int64_t EncodedOffset) {
2891 if (isGFX12Plus(ST))
2892 return isUInt<23>(EncodedOffset);
2893
2894 return hasSMEMByteOffset(ST) ? isUInt<20>(EncodedOffset)
2895 : isUInt<8>(EncodedOffset);
2896}
2897
2899 int64_t EncodedOffset,
2900 bool IsBuffer) {
2901 if (isGFX12Plus(ST))
2902 return isInt<24>(EncodedOffset);
2903
2904 return !IsBuffer &&
2906 isInt<21>(EncodedOffset);
2907}
2908
2909static bool isDwordAligned(uint64_t ByteOffset) {
2910 return (ByteOffset & 3) == 0;
2911}
2912
2914 uint64_t ByteOffset) {
2915 if (hasSMEMByteOffset(ST))
2916 return ByteOffset;
2917
2918 assert(isDwordAligned(ByteOffset));
2919 return ByteOffset >> 2;
2920}
2921
2922std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
2923 int64_t ByteOffset, bool IsBuffer) {
2924 if (isGFX12Plus(ST)) // 24 bit signed offsets
2925 return isInt<24>(ByteOffset) ? std::optional<int64_t>(ByteOffset)
2926 : std::nullopt;
2927
2928 // The signed version is always a byte offset.
2929 if (!IsBuffer && hasSMRDSignedImmOffset(ST)) {
2931 return isInt<20>(ByteOffset) ? std::optional<int64_t>(ByteOffset)
2932 : std::nullopt;
2933 }
2934
2935 if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST))
2936 return std::nullopt;
2937
2938 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
2939 return isLegalSMRDEncodedUnsignedOffset(ST, EncodedOffset)
2940 ? std::optional<int64_t>(EncodedOffset)
2941 : std::nullopt;
2942}
2943
2944std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
2945 int64_t ByteOffset) {
2946 if (!isCI(ST) || !isDwordAligned(ByteOffset))
2947 return std::nullopt;
2948
2949 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
2950 return isUInt<32>(EncodedOffset) ? std::optional<int64_t>(EncodedOffset)
2951 : std::nullopt;
2952}
2953
2955 if (AMDGPU::isGFX10(ST))
2956 return 12;
2957
2958 if (AMDGPU::isGFX12(ST))
2959 return 24;
2960 return 13;
2961}
2962
2963namespace {
2964
2965struct SourceOfDivergence {
2966 unsigned Intr;
2967};
2968const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
2969
2970struct AlwaysUniform {
2971 unsigned Intr;
2972};
2973const AlwaysUniform *lookupAlwaysUniform(unsigned Intr);
2974
2975#define GET_SourcesOfDivergence_IMPL
2976#define GET_UniformIntrinsics_IMPL
2977#define GET_Gfx9BufferFormat_IMPL
2978#define GET_Gfx10BufferFormat_IMPL
2979#define GET_Gfx11PlusBufferFormat_IMPL
2980#include "AMDGPUGenSearchableTables.inc"
2981
2982} // end anonymous namespace
2983
2984bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
2985 return lookupSourceOfDivergence(IntrID);
2986}
2987
2988bool isIntrinsicAlwaysUniform(unsigned IntrID) {
2989 return lookupAlwaysUniform(IntrID);
2990}
2991
2993 uint8_t NumComponents,
2994 uint8_t NumFormat,
2995 const MCSubtargetInfo &STI) {
2996 return isGFX11Plus(STI)
2997 ? getGfx11PlusBufferFormatInfo(BitsPerComp, NumComponents,
2998 NumFormat)
2999 : isGFX10(STI) ? getGfx10BufferFormatInfo(BitsPerComp,
3000 NumComponents, NumFormat)
3001 : getGfx9BufferFormatInfo(BitsPerComp,
3002 NumComponents, NumFormat);
3003}
3004
3006 const MCSubtargetInfo &STI) {
3007 return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(Format)
3008 : isGFX10(STI) ? getGfx10BufferFormatInfo(Format)
3009 : getGfx9BufferFormatInfo(Format);
3010}
3011
3013 for (auto OpName : { OpName::vdst, OpName::src0, OpName::src1,
3014 OpName::src2 }) {
3015 int Idx = getNamedOperandIdx(OpDesc.getOpcode(), OpName);
3016 if (Idx == -1)
3017 continue;
3018
3019 if (OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64RegClassID ||
3020 OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64_Align2RegClassID)
3021 return true;
3022 }
3023
3024 return false;
3025}
3026
3027bool isDPALU_DPP(const MCInstrDesc &OpDesc) {
3028 return hasAny64BitVGPROperands(OpDesc);
3029}
3030
3031} // namespace AMDGPU
3032
3035 switch (S) {
3037 OS << "Unsupported";
3038 break;
3040 OS << "Any";
3041 break;
3043 OS << "Off";
3044 break;
3046 OS << "On";
3047 break;
3048 }
3049 return OS;
3050}
3051
3052} // namespace llvm
unsigned const MachineRegisterInfo * MRI
#define MAP_REG2REG
unsigned Intr
static llvm::cl::opt< unsigned > DefaultAMDHSACodeObjectVersion("amdhsa-code-object-version", llvm::cl::Hidden, llvm::cl::init(llvm::AMDGPU::AMDHSA_COV5), llvm::cl::desc("Set default AMDHSA Code Object Version (module flag " "or asm directive still take priority if present)"))
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
#define AMDHSA_BITS_SET(DST, MSK, VAL)
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
std::string Name
uint64_t Size
#define F(x, y, z)
Definition: MD5.cpp:55
unsigned const TargetRegisterInfo * TRI
unsigned Reg
LLVMContext & Context
const SmallVectorImpl< MachineOperand > & Cond
#define S_00B848_MEM_ORDERED(x)
Definition: SIDefines.h:1173
#define S_00B848_WGP_MODE(x)
Definition: SIDefines.h:1170
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
void setTargetIDFromFeaturesString(StringRef FS)
TargetIDSetting getXnackSetting() const
AMDGPUTargetID(const MCSubtargetInfo &STI)
void setTargetIDFromTargetIDStream(StringRef TargetID)
TargetIDSetting getSramEccSetting() const
unsigned getIndexInParsedOperands(unsigned CompOprIdx) const
unsigned getIndexOfDstInParsedOperands() const
unsigned getIndexOfSrcInParsedOperands(unsigned CompSrcIdx) const
unsigned getCompParsedSrcOperandsNum() const
std::optional< unsigned > getInvalidCompOperandIndex(std::function< unsigned(unsigned, unsigned)> GetRegIdx, bool SkipSrc=false) const
std::array< unsigned, Component::MAX_OPR_NUM > RegIndices
Definition: Any.h:28
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1259
CallingConv::ID getCallingConv() const
Definition: InstrTypes.h:1539
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
This class represents an Operation in the Expression.
Encoding
Size and signedness of expression operations' operands.
constexpr bool test(unsigned I) const
unsigned getAddressSpace() const
Definition: GlobalValue.h:205
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void emitError(uint64_t LocCookie, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:237
ArrayRef< MCOperandInfo > operands() const
Definition: MCInstrDesc.h:239
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:248
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
Definition: MCInstrDesc.h:219
unsigned getOpcode() const
Return the opcode number for this descriptor.
Definition: MCInstrDesc.h:230
Interface to description of machine instruction set.
Definition: MCInstrInfo.h:26
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
MCRegisterClass - Base class of TargetRegisterClass.
unsigned getID() const
getID() - Return the register class ID number.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Generic base class for all target subtargets.
bool hasFeature(unsigned Feature) const
const Triple & getTargetTriple() const
const FeatureBitset & getFeatureBits() const
StringRef getCPU() const
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition: StringRef.h:849
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:696
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:466
std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:222
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition: StringRef.h:271
Manages the enabling and disabling of subtarget specific features.
const std::vector< std::string > & getFeatures() const
Returns the vector of individual subtarget features.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
OSType getOS() const
Get the parsed operating system type of this triple.
Definition: Triple.h:370
ArchType getArch() const
Get the parsed architecture type of this triple.
Definition: Triple.h:361
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:660
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val, bool &IsDefault, const MCSubtargetInfo &STI)
unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst)
unsigned decodeFieldSaSdst(unsigned Encoded)
unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc)
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
const CustomOperandVal DepCtrInfo[]
bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal, const MCSubtargetInfo &STI)
unsigned decodeFieldVaVdst(unsigned Encoded)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
unsigned decodeFieldVmVsrc(unsigned Encoded)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
static constexpr ExpTgt ExpTgtInfo[]
bool getTgtName(unsigned Id, StringRef &Name, int &Index)
unsigned getTgtId(const StringRef Name)
constexpr uint32_t VersionMajor
HSA metadata major version.
bool isValidHwreg(int64_t Id)
const CustomOperand< const MCSubtargetInfo & > Opr[]
bool isValidHwregOffset(int64_t Offset)
uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width)
bool isValidHwregWidth(int64_t Width)
int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI)
StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI)
void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width)
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getWavefrontSize(const MCSubtargetInfo *STI)
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)
unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, std::optional< bool > EnableWavefrontSize32)
unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getEUsPerCU(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
static TargetIDSetting getTargetIDSettingFromFeatureString(StringRef FeatureString)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs)
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI, unsigned NumVGPRs)
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
StringLiteral const UfmtSymbolicGFX11[]
bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI)
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI)
unsigned const DfmtNfmt2UFmtGFX10[]
StringLiteral const DfmtSymbolic[]
static StringLiteral const * getNfmtLookupTable(const MCSubtargetInfo &STI)
bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI)
StringLiteral const NfmtSymbolicGFX10[]
bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI)
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
StringRef getDfmtName(unsigned Id)
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI)
unsigned const DfmtNfmt2UFmtGFX11[]
StringLiteral const NfmtSymbolicVI[]
StringLiteral const NfmtSymbolicSICI[]
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
int64_t getDfmt(const StringRef Name)
StringLiteral const UfmtSymbolicGFX10[]
void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt)
StringRef getMsgOpName(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
StringRef getMsgName(int64_t MsgId, const MCSubtargetInfo &STI)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI)
void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId, uint16_t &StreamId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
int64_t getMsgOpId(int64_t MsgId, const StringRef Name)
const char *const OpGsSymbolic[OP_GS_LAST_]
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
const char *const OpSysSymbolic[OP_SYS_LAST_]
static uint64_t getMsgIdMask(const MCSubtargetInfo &STI)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
const CustomOperand< const MCSubtargetInfo & > Msg[]
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
constexpr unsigned VOPD_VGPR_BANK_MASKS[]
constexpr unsigned COMPONENTS_NUM
bool isGCN3Encoding(const MCSubtargetInfo &STI)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_GFX11(const MCSubtargetInfo &STI)
bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, unsigned OpNo)
Get size of register operand.
void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt)
Decodes Vmcnt, Expcnt and Lgkmcnt from given Waitcnt for given isa Version, and writes decoded values...
uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset)
Convert ByteOffset to dwords if the subtarget uses dword SMRD immediate offsets.
static unsigned encodeStorecnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Storecnt)
static bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
static bool hasSMEMByteOffset(const MCSubtargetInfo &ST)
bool isVOPCAsmOnly(unsigned Opc)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool getMTBUFHasSrsrc(unsigned Opc)
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
static bool isSymbolicCustomOperandEncoding(const CustomOperandVal *Opr, int Size, unsigned Code, bool &HasNonDefaultVal, const MCSubtargetInfo &STI)
bool isGFX10Before1030(const MCSubtargetInfo &STI)
bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo)
Does this operand support only inlinable literals?
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
const int OPR_ID_UNSUPPORTED
bool shouldEmitConstantsToTextSection(const Triple &TT)
bool isInlinableLiteralV2I16(uint32_t Literal)
int getMTBUFElements(unsigned Opc)
static int encodeCustomOperandVal(const CustomOperandVal &Op, int64_t InputVal)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const MCSubtargetInfo *STI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
std::optional< unsigned > getInlineEncodingV216(bool IsFloat, uint32_t Literal)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
unsigned mc2PseudoReg(unsigned Reg)
Convert hardware register Reg to a pseudo register.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
CanBeVOPD getCanBeVOPD(unsigned Opc)
static int getOprIdx(std::function< bool(const CustomOperand< T > &)> Test, const CustomOperand< T > OpInfo[], int OpInfoSize, T Context)
bool hasPackedD16(const MCSubtargetInfo &STI)
unsigned getStorecntBitMask(const IsaVersion &Version)
bool isGFX940(const MCSubtargetInfo &STI)
bool isEntryFunctionCC(CallingConv::ID CC)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
bool isGFX10_3_GFX11(const MCSubtargetInfo &STI)
bool isGroupSegment(const GlobalValue *GV)
IsaVersion getIsaVersion(StringRef GPU)
bool getMTBUFHasSoffset(unsigned Opc)
bool hasXNACK(const MCSubtargetInfo &STI)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
static unsigned getCombinedCountBitMask(const IsaVersion &Version, bool IsStore)
unsigned getVOPDOpcode(unsigned Opc)
bool isDPALU_DPP(const MCInstrDesc &OpDesc)
unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt)
Encodes Vmcnt, Expcnt and Lgkmcnt into Waitcnt for given isa Version.
bool isVOPC64DPP(unsigned Opc)
int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements)
bool isCompute(CallingConv::ID cc)
bool getMAIIsGFX940XDL(unsigned Opc)
bool isSI(const MCSubtargetInfo &STI)
unsigned getDefaultAMDHSACodeObjectVersion()
bool isReadOnlySegment(const GlobalValue *GV)
bool isArgPassedInSGPR(const Argument *A)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
int getMUBUFBaseOpcode(unsigned Opc)
unsigned getAMDHSACodeObjectVersion(const Module &M)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
bool getVOP3IsSingle(unsigned Opc)
bool isGFX9(const MCSubtargetInfo &STI)
bool getVOP1IsSingle(unsigned Opc)
static bool isDwordAligned(uint64_t ByteOffset)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool getHasColorExport(const Function &F)
int getMTBUFBaseOpcode(unsigned Opc)
bool isChainCC(CallingConv::ID CC)
bool isGFX90A(const MCSubtargetInfo &STI)
unsigned getSamplecntBitMask(const IsaVersion &Version)
unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion)
bool hasSRAMECC(const MCSubtargetInfo &STI)
bool getHasDepthExport(const Function &F)
static bool isValidOpr(int Idx, const CustomOperand< T > OpInfo[], int OpInfoSize, T Context)
bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI)
bool getMUBUFHasVAddr(unsigned Opc)
int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily)
bool isTrue16Inst(unsigned Opc)
bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc)
std::pair< unsigned, unsigned > getVOPDComponents(unsigned VOPDOpcode)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned getInitialPSInputAddr(const Function &F)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this an AMDGPU specific source operand? These include registers, inline constants,...
unsigned getKmcntBitMask(const IsaVersion &Version)
unsigned getVmcntBitMask(const IsaVersion &Version)
bool isNotGFX10Plus(const MCSubtargetInfo &STI)
bool hasMAIInsts(const MCSubtargetInfo &STI)
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
bool isKernelCC(const Function *Func)
bool isGenericAtomic(unsigned Opc)
Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt)
bool isGFX8Plus(const MCSubtargetInfo &STI)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
unsigned getLgkmcntBitMask(const IsaVersion &Version)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer)
bool isSGPR(unsigned Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
bool isHi(unsigned Reg, const MCRegisterInfo &MRI)
unsigned getBvhcntBitMask(const IsaVersion &Version)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasGFX10_3Insts(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements)
unsigned getExpcntBitMask(const IsaVersion &Version)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
bool getMUBUFHasSoffset(unsigned Opc)
bool isNotGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
std::optional< unsigned > getInlineEncodingV2F16(uint32_t Literal)
bool isInlineValue(unsigned Reg)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isShader(CallingConv::ID cc)
unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion)
static unsigned getDefaultCustomOperandEncoding(const CustomOperandVal *Opr, int Size, const MCSubtargetInfo &STI)
static unsigned encodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Loadcnt)
bool isGFX10Plus(const MCSubtargetInfo &STI)
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size, unsigned Code, int &Idx, StringRef &Name, unsigned &Val, bool &IsDefault, const MCSubtargetInfo &STI)
bool isGlobalSegment(const GlobalValue *GV)
@ OPERAND_KIMM_LAST
Definition: SIDefines.h:269
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition: SIDefines.h:234
@ OPERAND_REG_INLINE_C_LAST
Definition: SIDefines.h:260
@ OPERAND_REG_IMM_V2FP16
Definition: SIDefines.h:211
@ OPERAND_REG_INLINE_C_FP64
Definition: SIDefines.h:223
@ OPERAND_REG_INLINE_C_V2BF16
Definition: SIDefines.h:225
@ OPERAND_REG_IMM_V2INT16
Definition: SIDefines.h:212
@ OPERAND_REG_INLINE_AC_V2FP16
Definition: SIDefines.h:246
@ OPERAND_SRC_FIRST
Definition: SIDefines.h:265
@ OPERAND_REG_IMM_V2BF16
Definition: SIDefines.h:210
@ OPERAND_REG_INLINE_AC_FIRST
Definition: SIDefines.h:262
@ OPERAND_KIMM_FIRST
Definition: SIDefines.h:268
@ OPERAND_REG_IMM_FP16
Definition: SIDefines.h:206
@ OPERAND_REG_IMM_FP64
Definition: SIDefines.h:204
@ OPERAND_REG_INLINE_C_V2FP16
Definition: SIDefines.h:226
@ OPERAND_REG_INLINE_AC_V2INT16
Definition: SIDefines.h:244
@ OPERAND_REG_INLINE_AC_FP16
Definition: SIDefines.h:241
@ OPERAND_REG_INLINE_AC_FP32
Definition: SIDefines.h:242
@ OPERAND_REG_INLINE_AC_V2BF16
Definition: SIDefines.h:245
@ OPERAND_REG_IMM_FP32
Definition: SIDefines.h:203
@ OPERAND_REG_INLINE_C_FIRST
Definition: SIDefines.h:259
@ OPERAND_REG_INLINE_C_FP32
Definition: SIDefines.h:222
@ OPERAND_REG_INLINE_AC_LAST
Definition: SIDefines.h:263
@ OPERAND_REG_INLINE_C_V2INT16
Definition: SIDefines.h:224
@ OPERAND_REG_IMM_V2FP32
Definition: SIDefines.h:214
@ OPERAND_REG_INLINE_AC_FP64
Definition: SIDefines.h:243
@ OPERAND_REG_INLINE_C_FP16
Definition: SIDefines.h:221
@ OPERAND_REG_INLINE_C_V2FP32
Definition: SIDefines.h:228
@ OPERAND_REG_IMM_FP32_DEFERRED
Definition: SIDefines.h:209
@ OPERAND_SRC_LAST
Definition: SIDefines.h:266
@ OPERAND_REG_IMM_FP16_DEFERRED
Definition: SIDefines.h:208
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc)
Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt)
std::optional< unsigned > getInlineEncodingV2I16(uint32_t Literal)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
static unsigned encodeStorecntDscnt(const IsaVersion &Version, unsigned Storecnt, unsigned Dscnt)
int getMCOpcode(uint16_t Opcode, unsigned Gen)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
bool getMUBUFIsBufferInv(unsigned Opc)
std::optional< unsigned > getInlineEncodingV2BF16(uint32_t Literal)
static int encodeCustomOperand(const CustomOperandVal *Opr, int Size, const StringRef Name, int64_t InputVal, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI)
bool getVOP2IsSingle(unsigned Opc)
bool getMAIIsDGEMM(unsigned Opc)
Returns true if MAI operation is a double precision GEMM.
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const int OPR_ID_UNKNOWN
unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion)
int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels)
bool isModuleEntryFunctionCC(CallingConv::ID CC)
bool isNotGFX12Plus(const MCSubtargetInfo &STI)
bool getMTBUFHasVAddr(unsigned Opc)
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion)
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
unsigned getLoadcntBitMask(const IsaVersion &Version)
bool hasVOPD(const MCSubtargetInfo &STI)
static unsigned encodeDscnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Dscnt)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion)
bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI)
bool isGFX9_GFX10(const MCSubtargetInfo &STI)
int getMUBUFElements(unsigned Opc)
static unsigned encodeLoadcntDscnt(const IsaVersion &Version, unsigned Loadcnt, unsigned Dscnt)
const GcnBufferFormatInfo * getGcnBufferFormatInfo(uint8_t BitsPerComp, uint8_t NumComponents, uint8_t NumFormat, const MCSubtargetInfo &STI)
bool isGraphics(CallingConv::ID cc)
unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc)
bool isPermlane16(unsigned Opc)
bool getMUBUFHasSrsrc(unsigned Opc)
unsigned getDscntBitMask(const IsaVersion &Version)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:197
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
Definition: CallingConv.h:188
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
Definition: CallingConv.h:200
@ AMDGPU_Gfx
Used for AMD graphics targets.
Definition: CallingConv.h:232
@ AMDGPU_CS_ChainPreserve
Used on AMDGPUs to give the middle-end more control over argument placement.
Definition: CallingConv.h:249
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:206
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:191
@ AMDGPU_CS_Chain
Used on AMDGPUs to give the middle-end more control over argument placement.
Definition: CallingConv.h:245
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:194
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:144
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
Definition: CallingConv.h:218
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
Definition: CallingConv.h:213
@ ELFABIVERSION_AMDGPU_HSA_V4
Definition: ELF.h:377
@ ELFABIVERSION_AMDGPU_HSA_V5
Definition: ELF.h:378
@ ELFABIVERSION_AMDGPU_HSA_V6
Definition: ELF.h:379
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:417
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:293
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:428
@ AlwaysUniform
The result values are always uniform.
@ Default
The result values are uniform if and only if all operands are uniform.
#define N
AMD Kernel Code Object (amd_kernel_code_t).
bool(* Cond)(T Context)
Instruction set architecture version.
Definition: TargetParser.h:125
Represents the counter values to wait for in an s_waitcnt instruction.
Description of the encoding of one expression Op.