LLVM 19.0.0git
AMDGPUBaseInfo.cpp
Go to the documentation of this file.
1//===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDGPUBaseInfo.h"
10#include "AMDGPU.h"
11#include "AMDGPUAsmUtils.h"
12#include "AMDKernelCodeT.h"
17#include "llvm/IR/Attributes.h"
18#include "llvm/IR/Constants.h"
19#include "llvm/IR/Function.h"
20#include "llvm/IR/GlobalValue.h"
21#include "llvm/IR/IntrinsicsAMDGPU.h"
22#include "llvm/IR/IntrinsicsR600.h"
23#include "llvm/IR/LLVMContext.h"
24#include "llvm/MC/MCInstrInfo.h"
30#include <optional>
31
32#define GET_INSTRINFO_NAMED_OPS
33#define GET_INSTRMAP_INFO
34#include "AMDGPUGenInstrInfo.inc"
35
37 "amdhsa-code-object-version", llvm::cl::Hidden,
39 llvm::cl::desc("Set default AMDHSA Code Object Version (module flag "
40 "or asm directive still take priority if present)"));
41
42namespace {
43
44/// \returns Bit mask for given bit \p Shift and bit \p Width.
45unsigned getBitMask(unsigned Shift, unsigned Width) {
46 return ((1 << Width) - 1) << Shift;
47}
48
49/// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
50///
51/// \returns Packed \p Dst.
52unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
53 unsigned Mask = getBitMask(Shift, Width);
54 return ((Src << Shift) & Mask) | (Dst & ~Mask);
55}
56
57/// Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
58///
59/// \returns Unpacked bits.
60unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
61 return (Src & getBitMask(Shift, Width)) >> Shift;
62}
63
64/// \returns Vmcnt bit shift (lower bits).
65unsigned getVmcntBitShiftLo(unsigned VersionMajor) {
66 return VersionMajor >= 11 ? 10 : 0;
67}
68
69/// \returns Vmcnt bit width (lower bits).
70unsigned getVmcntBitWidthLo(unsigned VersionMajor) {
71 return VersionMajor >= 11 ? 6 : 4;
72}
73
74/// \returns Expcnt bit shift.
75unsigned getExpcntBitShift(unsigned VersionMajor) {
76 return VersionMajor >= 11 ? 0 : 4;
77}
78
79/// \returns Expcnt bit width.
80unsigned getExpcntBitWidth(unsigned VersionMajor) { return 3; }
81
82/// \returns Lgkmcnt bit shift.
83unsigned getLgkmcntBitShift(unsigned VersionMajor) {
84 return VersionMajor >= 11 ? 4 : 8;
85}
86
87/// \returns Lgkmcnt bit width.
88unsigned getLgkmcntBitWidth(unsigned VersionMajor) {
89 return VersionMajor >= 10 ? 6 : 4;
90}
91
92/// \returns Vmcnt bit shift (higher bits).
93unsigned getVmcntBitShiftHi(unsigned VersionMajor) { return 14; }
94
95/// \returns Vmcnt bit width (higher bits).
96unsigned getVmcntBitWidthHi(unsigned VersionMajor) {
97 return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0;
98}
99
100/// \returns Loadcnt bit width
101unsigned getLoadcntBitWidth(unsigned VersionMajor) {
102 return VersionMajor >= 12 ? 6 : 0;
103}
104
105/// \returns Samplecnt bit width.
106unsigned getSamplecntBitWidth(unsigned VersionMajor) {
107 return VersionMajor >= 12 ? 6 : 0;
108}
109
110/// \returns Bvhcnt bit width.
111unsigned getBvhcntBitWidth(unsigned VersionMajor) {
112 return VersionMajor >= 12 ? 3 : 0;
113}
114
115/// \returns Dscnt bit width.
116unsigned getDscntBitWidth(unsigned VersionMajor) {
117 return VersionMajor >= 12 ? 6 : 0;
118}
119
120/// \returns Dscnt bit shift in combined S_WAIT instructions.
121unsigned getDscntBitShift(unsigned VersionMajor) { return 0; }
122
123/// \returns Storecnt or Vscnt bit width, depending on VersionMajor.
124unsigned getStorecntBitWidth(unsigned VersionMajor) {
125 return VersionMajor >= 10 ? 6 : 0;
126}
127
128/// \returns Kmcnt bit width.
129unsigned getKmcntBitWidth(unsigned VersionMajor) {
130 return VersionMajor >= 12 ? 5 : 0;
131}
132
133/// \returns shift for Loadcnt/Storecnt in combined S_WAIT instructions.
134unsigned getLoadcntStorecntBitShift(unsigned VersionMajor) {
135 return VersionMajor >= 12 ? 8 : 0;
136}
137
138/// \returns VmVsrc bit width
139inline unsigned getVmVsrcBitWidth() { return 3; }
140
141/// \returns VmVsrc bit shift
142inline unsigned getVmVsrcBitShift() { return 2; }
143
144/// \returns VaVdst bit width
145inline unsigned getVaVdstBitWidth() { return 4; }
146
147/// \returns VaVdst bit shift
148inline unsigned getVaVdstBitShift() { return 12; }
149
150/// \returns SaSdst bit width
151inline unsigned getSaSdstBitWidth() { return 1; }
152
153/// \returns SaSdst bit shift
154inline unsigned getSaSdstBitShift() { return 0; }
155
156} // end namespace anonymous
157
158namespace llvm {
159
160namespace AMDGPU {
161
162/// \returns true if the target supports signed immediate offset for SMRD
163/// instructions.
165 return isGFX9Plus(ST);
166}
167
168/// \returns True if \p STI is AMDHSA.
169bool isHsaAbi(const MCSubtargetInfo &STI) {
170 return STI.getTargetTriple().getOS() == Triple::AMDHSA;
171}
172
174 if (auto Ver = mdconst::extract_or_null<ConstantInt>(
175 M.getModuleFlag("amdhsa_code_object_version"))) {
176 return (unsigned)Ver->getZExtValue() / 100;
177 }
178
180}
181
184}
185
186unsigned getAMDHSACodeObjectVersion(unsigned ABIVersion) {
187 switch (ABIVersion) {
189 return 4;
191 return 5;
193 return 6;
194 default:
196 }
197}
198
199uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion) {
200 if (T.getOS() != Triple::AMDHSA)
201 return 0;
202
203 switch (CodeObjectVersion) {
204 case 4:
206 case 5:
208 case 6:
210 default:
211 report_fatal_error("Unsupported AMDHSA Code Object Version " +
212 Twine(CodeObjectVersion));
213 }
214}
215
216unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) {
217 switch (CodeObjectVersion) {
218 case AMDHSA_COV4:
219 return 48;
220 case AMDHSA_COV5:
221 case AMDHSA_COV6:
222 default:
224 }
225}
226
227
228// FIXME: All such magic numbers about the ABI should be in a
229// central TD file.
230unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) {
231 switch (CodeObjectVersion) {
232 case AMDHSA_COV4:
233 return 24;
234 case AMDHSA_COV5:
235 case AMDHSA_COV6:
236 default:
238 }
239}
240
241unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) {
242 switch (CodeObjectVersion) {
243 case AMDHSA_COV4:
244 return 32;
245 case AMDHSA_COV5:
246 case AMDHSA_COV6:
247 default:
249 }
250}
251
252unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion) {
253 switch (CodeObjectVersion) {
254 case AMDHSA_COV4:
255 return 40;
256 case AMDHSA_COV5:
257 case AMDHSA_COV6:
258 default:
260 }
261}
262
263#define GET_MIMGBaseOpcodesTable_IMPL
264#define GET_MIMGDimInfoTable_IMPL
265#define GET_MIMGInfoTable_IMPL
266#define GET_MIMGLZMappingTable_IMPL
267#define GET_MIMGMIPMappingTable_IMPL
268#define GET_MIMGBiasMappingTable_IMPL
269#define GET_MIMGOffsetMappingTable_IMPL
270#define GET_MIMGG16MappingTable_IMPL
271#define GET_MAIInstInfoTable_IMPL
272#include "AMDGPUGenSearchableTables.inc"
273
274int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
275 unsigned VDataDwords, unsigned VAddrDwords) {
276 const MIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding,
277 VDataDwords, VAddrDwords);
278 return Info ? Info->Opcode : -1;
279}
280
282 const MIMGInfo *Info = getMIMGInfo(Opc);
283 return Info ? getMIMGBaseOpcodeInfo(Info->BaseOpcode) : nullptr;
284}
285
286int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
287 const MIMGInfo *OrigInfo = getMIMGInfo(Opc);
288 const MIMGInfo *NewInfo =
289 getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding,
290 NewChannels, OrigInfo->VAddrDwords);
291 return NewInfo ? NewInfo->Opcode : -1;
292}
293
294unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
295 const MIMGDimInfo *Dim, bool IsA16,
296 bool IsG16Supported) {
297 unsigned AddrWords = BaseOpcode->NumExtraArgs;
298 unsigned AddrComponents = (BaseOpcode->Coordinates ? Dim->NumCoords : 0) +
299 (BaseOpcode->LodOrClampOrMip ? 1 : 0);
300 if (IsA16)
301 AddrWords += divideCeil(AddrComponents, 2);
302 else
303 AddrWords += AddrComponents;
304
305 // Note: For subtargets that support A16 but not G16, enabling A16 also
306 // enables 16 bit gradients.
307 // For subtargets that support A16 (operand) and G16 (done with a different
308 // instruction encoding), they are independent.
309
310 if (BaseOpcode->Gradients) {
311 if ((IsA16 && !IsG16Supported) || BaseOpcode->G16)
312 // There are two gradients per coordinate, we pack them separately.
313 // For the 3d case,
314 // we get (dy/du, dx/du) (-, dz/du) (dy/dv, dx/dv) (-, dz/dv)
315 AddrWords += alignTo<2>(Dim->NumGradients / 2);
316 else
317 AddrWords += Dim->NumGradients;
318 }
319 return AddrWords;
320}
321
322struct MUBUFInfo {
325 uint8_t elements;
330 bool tfe;
331};
332
333struct MTBUFInfo {
336 uint8_t elements;
340};
341
342struct SMInfo {
345};
346
347struct VOPInfo {
350};
351
354};
355
358};
359
362};
363
368};
369
370struct VOPDInfo {
375};
376
380};
381
386};
387
388#define GET_MTBUFInfoTable_DECL
389#define GET_MTBUFInfoTable_IMPL
390#define GET_MUBUFInfoTable_DECL
391#define GET_MUBUFInfoTable_IMPL
392#define GET_SingleUseExceptionTable_DECL
393#define GET_SingleUseExceptionTable_IMPL
394#define GET_SMInfoTable_DECL
395#define GET_SMInfoTable_IMPL
396#define GET_VOP1InfoTable_DECL
397#define GET_VOP1InfoTable_IMPL
398#define GET_VOP2InfoTable_DECL
399#define GET_VOP2InfoTable_IMPL
400#define GET_VOP3InfoTable_DECL
401#define GET_VOP3InfoTable_IMPL
402#define GET_VOPC64DPPTable_DECL
403#define GET_VOPC64DPPTable_IMPL
404#define GET_VOPC64DPP8Table_DECL
405#define GET_VOPC64DPP8Table_IMPL
406#define GET_VOPCAsmOnlyInfoTable_DECL
407#define GET_VOPCAsmOnlyInfoTable_IMPL
408#define GET_VOP3CAsmOnlyInfoTable_DECL
409#define GET_VOP3CAsmOnlyInfoTable_IMPL
410#define GET_VOPDComponentTable_DECL
411#define GET_VOPDComponentTable_IMPL
412#define GET_VOPDPairs_DECL
413#define GET_VOPDPairs_IMPL
414#define GET_VOPTrue16Table_DECL
415#define GET_VOPTrue16Table_IMPL
416#define GET_WMMAOpcode2AddrMappingTable_DECL
417#define GET_WMMAOpcode2AddrMappingTable_IMPL
418#define GET_WMMAOpcode3AddrMappingTable_DECL
419#define GET_WMMAOpcode3AddrMappingTable_IMPL
420#include "AMDGPUGenSearchableTables.inc"
421
422int getMTBUFBaseOpcode(unsigned Opc) {
423 const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opc);
424 return Info ? Info->BaseOpcode : -1;
425}
426
427int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) {
428 const MTBUFInfo *Info = getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
429 return Info ? Info->Opcode : -1;
430}
431
432int getMTBUFElements(unsigned Opc) {
433 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
434 return Info ? Info->elements : 0;
435}
436
437bool getMTBUFHasVAddr(unsigned Opc) {
438 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
439 return Info ? Info->has_vaddr : false;
440}
441
442bool getMTBUFHasSrsrc(unsigned Opc) {
443 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
444 return Info ? Info->has_srsrc : false;
445}
446
447bool getMTBUFHasSoffset(unsigned Opc) {
448 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
449 return Info ? Info->has_soffset : false;
450}
451
452int getMUBUFBaseOpcode(unsigned Opc) {
453 const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc);
454 return Info ? Info->BaseOpcode : -1;
455}
456
457int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements) {
458 const MUBUFInfo *Info = getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
459 return Info ? Info->Opcode : -1;
460}
461
462int getMUBUFElements(unsigned Opc) {
463 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
464 return Info ? Info->elements : 0;
465}
466
467bool getMUBUFHasVAddr(unsigned Opc) {
468 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
469 return Info ? Info->has_vaddr : false;
470}
471
472bool getMUBUFHasSrsrc(unsigned Opc) {
473 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
474 return Info ? Info->has_srsrc : false;
475}
476
477bool getMUBUFHasSoffset(unsigned Opc) {
478 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
479 return Info ? Info->has_soffset : false;
480}
481
482bool getMUBUFIsBufferInv(unsigned Opc) {
483 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
484 return Info ? Info->IsBufferInv : false;
485}
486
487bool getMUBUFTfe(unsigned Opc) {
488 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
489 return Info ? Info->tfe : false;
490}
491
492bool getSMEMIsBuffer(unsigned Opc) {
493 const SMInfo *Info = getSMEMOpcodeHelper(Opc);
494 return Info ? Info->IsBuffer : false;
495}
496
497bool getVOP1IsSingle(unsigned Opc) {
498 const VOPInfo *Info = getVOP1OpcodeHelper(Opc);
499 return Info ? Info->IsSingle : false;
500}
501
502bool getVOP2IsSingle(unsigned Opc) {
503 const VOPInfo *Info = getVOP2OpcodeHelper(Opc);
504 return Info ? Info->IsSingle : false;
505}
506
507bool getVOP3IsSingle(unsigned Opc) {
508 const VOPInfo *Info = getVOP3OpcodeHelper(Opc);
509 return Info ? Info->IsSingle : false;
510}
511
512bool isVOPC64DPP(unsigned Opc) {
513 return isVOPC64DPPOpcodeHelper(Opc) || isVOPC64DPP8OpcodeHelper(Opc);
514}
515
516bool isVOPCAsmOnly(unsigned Opc) { return isVOPCAsmOnlyOpcodeHelper(Opc); }
517
518bool getMAIIsDGEMM(unsigned Opc) {
519 const MAIInstInfo *Info = getMAIInstInfoHelper(Opc);
520 return Info ? Info->is_dgemm : false;
521}
522
523bool getMAIIsGFX940XDL(unsigned Opc) {
524 const MAIInstInfo *Info = getMAIInstInfoHelper(Opc);
525 return Info ? Info->is_gfx940_xdl : false;
526}
527
529 if (ST.hasFeature(AMDGPU::FeatureGFX12Insts))
531 if (ST.hasFeature(AMDGPU::FeatureGFX11Insts))
533 llvm_unreachable("Subtarget generation does not support VOPD!");
534}
535
536CanBeVOPD getCanBeVOPD(unsigned Opc) {
537 const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc);
538 if (Info)
539 return {Info->CanBeVOPDX, true};
540 else
541 return {false, false};
542}
543
544unsigned getVOPDOpcode(unsigned Opc) {
545 const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc);
546 return Info ? Info->VOPDOp : ~0u;
547}
548
549bool isVOPD(unsigned Opc) {
550 return AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0X);
551}
552
553bool isMAC(unsigned Opc) {
554 return Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
555 Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
556 Opc == AMDGPU::V_MAC_F32_e64_vi ||
557 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
558 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
559 Opc == AMDGPU::V_MAC_F16_e64_vi ||
560 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
561 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
562 Opc == AMDGPU::V_FMAC_F32_e64_gfx11 ||
563 Opc == AMDGPU::V_FMAC_F32_e64_gfx12 ||
564 Opc == AMDGPU::V_FMAC_F32_e64_vi ||
565 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
566 Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
567 Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||
568 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11 ||
569 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx12 ||
570 Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi ||
571 Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi ||
572 Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi ||
573 Opc == AMDGPU::V_DOT8C_I32_I4_e64_vi;
574}
575
576bool isPermlane16(unsigned Opc) {
577 return Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
578 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10 ||
579 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx11 ||
580 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx11 ||
581 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx12 ||
582 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx12 ||
583 Opc == AMDGPU::V_PERMLANE16_VAR_B32_e64_gfx12 ||
584 Opc == AMDGPU::V_PERMLANEX16_VAR_B32_e64_gfx12;
585}
586
587bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc) {
588 return Opc == AMDGPU::V_CVT_F32_BF8_e64_gfx12 ||
589 Opc == AMDGPU::V_CVT_F32_FP8_e64_gfx12 ||
590 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp_gfx12 ||
591 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp_gfx12 ||
592 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp8_gfx12 ||
593 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp8_gfx12 ||
594 Opc == AMDGPU::V_CVT_PK_F32_BF8_e64_gfx12 ||
595 Opc == AMDGPU::V_CVT_PK_F32_FP8_e64_gfx12;
596}
597
598bool isGenericAtomic(unsigned Opc) {
599 return Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP ||
600 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD ||
601 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB ||
602 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN ||
603 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN ||
604 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX ||
605 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX ||
606 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND ||
607 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR ||
608 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR ||
609 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC ||
610 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC ||
611 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD ||
612 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN ||
613 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX ||
614 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP ||
615 Opc == AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG;
616}
617
618bool isTrue16Inst(unsigned Opc) {
619 const VOPTrue16Info *Info = getTrue16OpcodeHelper(Opc);
620 return Info ? Info->IsTrue16 : false;
621}
622
624 const SingleUseExceptionInfo *Info = getSingleUseExceptionHelper(Opc);
625 return Info && Info->IsInvalidSingleUseConsumer;
626}
627
629 const SingleUseExceptionInfo *Info = getSingleUseExceptionHelper(Opc);
630 return Info && Info->IsInvalidSingleUseProducer;
631}
632
633unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) {
634 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom2AddrOpcode(Opc);
635 return Info ? Info->Opcode3Addr : ~0u;
636}
637
638unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc) {
639 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom3AddrOpcode(Opc);
640 return Info ? Info->Opcode2Addr : ~0u;
641}
642
643// Wrapper for Tablegen'd function. enum Subtarget is not defined in any
644// header files, so we need to wrap it in a function that takes unsigned
645// instead.
646int getMCOpcode(uint16_t Opcode, unsigned Gen) {
647 return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
648}
649
650int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily) {
651 const VOPDInfo *Info =
652 getVOPDInfoFromComponentOpcodes(OpX, OpY, EncodingFamily);
653 return Info ? Info->Opcode : -1;
654}
655
656std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode) {
657 const VOPDInfo *Info = getVOPDOpcodeHelper(VOPDOpcode);
658 assert(Info);
659 auto OpX = getVOPDBaseFromComponent(Info->OpX);
660 auto OpY = getVOPDBaseFromComponent(Info->OpY);
661 assert(OpX && OpY);
662 return {OpX->BaseVOP, OpY->BaseVOP};
663}
664
665namespace VOPD {
666
669
672 auto TiedIdx = OpDesc.getOperandConstraint(Component::SRC2, MCOI::TIED_TO);
673 assert(TiedIdx == -1 || TiedIdx == Component::DST);
674 HasSrc2Acc = TiedIdx != -1;
675
676 SrcOperandsNum = OpDesc.getNumOperands() - OpDesc.getNumDefs();
677 assert(SrcOperandsNum <= Component::MAX_SRC_NUM);
678
679 auto OperandsNum = OpDesc.getNumOperands();
680 unsigned CompOprIdx;
681 for (CompOprIdx = Component::SRC1; CompOprIdx < OperandsNum; ++CompOprIdx) {
682 if (OpDesc.operands()[CompOprIdx].OperandType == AMDGPU::OPERAND_KIMM32) {
683 MandatoryLiteralIdx = CompOprIdx;
684 break;
685 }
686 }
687}
688
689unsigned ComponentInfo::getIndexInParsedOperands(unsigned CompOprIdx) const {
690 assert(CompOprIdx < Component::MAX_OPR_NUM);
691
692 if (CompOprIdx == Component::DST)
694
695 auto CompSrcIdx = CompOprIdx - Component::DST_NUM;
696 if (CompSrcIdx < getCompParsedSrcOperandsNum())
697 return getIndexOfSrcInParsedOperands(CompSrcIdx);
698
699 // The specified operand does not exist.
700 return 0;
701}
702
704 std::function<unsigned(unsigned, unsigned)> GetRegIdx, bool SkipSrc) const {
705
706 auto OpXRegs = getRegIndices(ComponentIndex::X, GetRegIdx);
707 auto OpYRegs = getRegIndices(ComponentIndex::Y, GetRegIdx);
708
709 const unsigned CompOprNum =
711 unsigned CompOprIdx;
712 for (CompOprIdx = 0; CompOprIdx < CompOprNum; ++CompOprIdx) {
713 unsigned BanksMasks = VOPD_VGPR_BANK_MASKS[CompOprIdx];
714 if (OpXRegs[CompOprIdx] && OpYRegs[CompOprIdx] &&
715 ((OpXRegs[CompOprIdx] & BanksMasks) ==
716 (OpYRegs[CompOprIdx] & BanksMasks)))
717 return CompOprIdx;
718 }
719
720 return {};
721}
722
723// Return an array of VGPR registers [DST,SRC0,SRC1,SRC2] used
724// by the specified component. If an operand is unused
725// or is not a VGPR, the corresponding value is 0.
726//
727// GetRegIdx(Component, MCOperandIdx) must return a VGPR register index
728// for the specified component and MC operand. The callback must return 0
729// if the operand is not a register or not a VGPR.
730InstInfo::RegIndices InstInfo::getRegIndices(
731 unsigned CompIdx,
732 std::function<unsigned(unsigned, unsigned)> GetRegIdx) const {
733 assert(CompIdx < COMPONENTS_NUM);
734
735 const auto &Comp = CompInfo[CompIdx];
737
738 RegIndices[DST] = GetRegIdx(CompIdx, Comp.getIndexOfDstInMCOperands());
739
740 for (unsigned CompOprIdx : {SRC0, SRC1, SRC2}) {
741 unsigned CompSrcIdx = CompOprIdx - DST_NUM;
742 RegIndices[CompOprIdx] =
743 Comp.hasRegSrcOperand(CompSrcIdx)
744 ? GetRegIdx(CompIdx, Comp.getIndexOfSrcInMCOperands(CompSrcIdx))
745 : 0;
746 }
747 return RegIndices;
748}
749
750} // namespace VOPD
751
753 return VOPD::InstInfo(OpX, OpY);
754}
755
756VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode,
757 const MCInstrInfo *InstrInfo) {
758 auto [OpX, OpY] = getVOPDComponents(VOPDOpcode);
759 const auto &OpXDesc = InstrInfo->get(OpX);
760 const auto &OpYDesc = InstrInfo->get(OpY);
762 VOPD::ComponentInfo OpYInfo(OpYDesc, OpXInfo);
763 return VOPD::InstInfo(OpXInfo, OpYInfo);
764}
765
766namespace IsaInfo {
767
769 : STI(STI), XnackSetting(TargetIDSetting::Any),
770 SramEccSetting(TargetIDSetting::Any) {
771 if (!STI.getFeatureBits().test(FeatureSupportsXNACK))
772 XnackSetting = TargetIDSetting::Unsupported;
773 if (!STI.getFeatureBits().test(FeatureSupportsSRAMECC))
774 SramEccSetting = TargetIDSetting::Unsupported;
775}
776
778 // Check if xnack or sramecc is explicitly enabled or disabled. In the
779 // absence of the target features we assume we must generate code that can run
780 // in any environment.
781 SubtargetFeatures Features(FS);
782 std::optional<bool> XnackRequested;
783 std::optional<bool> SramEccRequested;
784
785 for (const std::string &Feature : Features.getFeatures()) {
786 if (Feature == "+xnack")
787 XnackRequested = true;
788 else if (Feature == "-xnack")
789 XnackRequested = false;
790 else if (Feature == "+sramecc")
791 SramEccRequested = true;
792 else if (Feature == "-sramecc")
793 SramEccRequested = false;
794 }
795
796 bool XnackSupported = isXnackSupported();
797 bool SramEccSupported = isSramEccSupported();
798
799 if (XnackRequested) {
800 if (XnackSupported) {
801 XnackSetting =
802 *XnackRequested ? TargetIDSetting::On : TargetIDSetting::Off;
803 } else {
804 // If a specific xnack setting was requested and this GPU does not support
805 // xnack emit a warning. Setting will remain set to "Unsupported".
806 if (*XnackRequested) {
807 errs() << "warning: xnack 'On' was requested for a processor that does "
808 "not support it!\n";
809 } else {
810 errs() << "warning: xnack 'Off' was requested for a processor that "
811 "does not support it!\n";
812 }
813 }
814 }
815
816 if (SramEccRequested) {
817 if (SramEccSupported) {
818 SramEccSetting =
819 *SramEccRequested ? TargetIDSetting::On : TargetIDSetting::Off;
820 } else {
821 // If a specific sramecc setting was requested and this GPU does not
822 // support sramecc emit a warning. Setting will remain set to
823 // "Unsupported".
824 if (*SramEccRequested) {
825 errs() << "warning: sramecc 'On' was requested for a processor that "
826 "does not support it!\n";
827 } else {
828 errs() << "warning: sramecc 'Off' was requested for a processor that "
829 "does not support it!\n";
830 }
831 }
832 }
833}
834
835static TargetIDSetting
837 if (FeatureString.ends_with("-"))
839 if (FeatureString.ends_with("+"))
840 return TargetIDSetting::On;
841
842 llvm_unreachable("Malformed feature string");
843}
844
846 SmallVector<StringRef, 3> TargetIDSplit;
847 TargetID.split(TargetIDSplit, ':');
848
849 for (const auto &FeatureString : TargetIDSplit) {
850 if (FeatureString.starts_with("xnack"))
851 XnackSetting = getTargetIDSettingFromFeatureString(FeatureString);
852 if (FeatureString.starts_with("sramecc"))
853 SramEccSetting = getTargetIDSettingFromFeatureString(FeatureString);
854 }
855}
856
857std::string AMDGPUTargetID::toString() const {
858 std::string StringRep;
859 raw_string_ostream StreamRep(StringRep);
860
861 auto TargetTriple = STI.getTargetTriple();
862 auto Version = getIsaVersion(STI.getCPU());
863
864 StreamRep << TargetTriple.getArchName() << '-'
865 << TargetTriple.getVendorName() << '-'
866 << TargetTriple.getOSName() << '-'
867 << TargetTriple.getEnvironmentName() << '-';
868
869 std::string Processor;
870 // TODO: Following else statement is present here because we used various
871 // alias names for GPUs up until GFX9 (e.g. 'fiji' is same as 'gfx803').
872 // Remove once all aliases are removed from GCNProcessors.td.
873 if (Version.Major >= 9)
874 Processor = STI.getCPU().str();
875 else
876 Processor = (Twine("gfx") + Twine(Version.Major) + Twine(Version.Minor) +
877 Twine(Version.Stepping))
878 .str();
879
880 std::string Features;
881 if (STI.getTargetTriple().getOS() == Triple::AMDHSA) {
882 // sramecc.
884 Features += ":sramecc-";
886 Features += ":sramecc+";
887 // xnack.
889 Features += ":xnack-";
891 Features += ":xnack+";
892 }
893
894 StreamRep << Processor << Features;
895
896 StreamRep.flush();
897 return StringRep;
898}
899
900unsigned getWavefrontSize(const MCSubtargetInfo *STI) {
901 if (STI->getFeatureBits().test(FeatureWavefrontSize16))
902 return 16;
903 if (STI->getFeatureBits().test(FeatureWavefrontSize32))
904 return 32;
905
906 return 64;
907}
908
910 unsigned BytesPerCU = 0;
911 if (STI->getFeatureBits().test(FeatureLocalMemorySize32768))
912 BytesPerCU = 32768;
913 if (STI->getFeatureBits().test(FeatureLocalMemorySize65536))
914 BytesPerCU = 65536;
915
916 // "Per CU" really means "per whatever functional block the waves of a
917 // workgroup must share". So the effective local memory size is doubled in
918 // WGP mode on gfx10.
919 if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode))
920 BytesPerCU *= 2;
921
922 return BytesPerCU;
923}
924
926 if (STI->getFeatureBits().test(FeatureLocalMemorySize32768))
927 return 32768;
928 if (STI->getFeatureBits().test(FeatureLocalMemorySize65536))
929 return 65536;
930 return 0;
931}
932
933unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
934 // "Per CU" really means "per whatever functional block the waves of a
935 // workgroup must share". For gfx10 in CU mode this is the CU, which contains
936 // two SIMDs.
937 if (isGFX10Plus(*STI) && STI->getFeatureBits().test(FeatureCuMode))
938 return 2;
939 // Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP contains
940 // two CUs, so a total of four SIMDs.
941 return 4;
942}
943
945 unsigned FlatWorkGroupSize) {
946 assert(FlatWorkGroupSize != 0);
947 if (STI->getTargetTriple().getArch() != Triple::amdgcn)
948 return 8;
949 unsigned MaxWaves = getMaxWavesPerEU(STI) * getEUsPerCU(STI);
950 unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
951 if (N == 1) {
952 // Single-wave workgroups don't consume barrier resources.
953 return MaxWaves;
954 }
955
956 unsigned MaxBarriers = 16;
957 if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode))
958 MaxBarriers = 32;
959
960 return std::min(MaxWaves / N, MaxBarriers);
961}
962
963unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) {
964 return 1;
965}
966
967unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) {
968 // FIXME: Need to take scratch memory into account.
969 if (isGFX90A(*STI))
970 return 8;
971 if (!isGFX10Plus(*STI))
972 return 10;
973 return hasGFX10_3Insts(*STI) ? 16 : 20;
974}
975
977 unsigned FlatWorkGroupSize) {
978 return divideCeil(getWavesPerWorkGroup(STI, FlatWorkGroupSize),
979 getEUsPerCU(STI));
980}
981
983 return 1;
984}
985
987 // Some subtargets allow encoding 2048, but this isn't tested or supported.
988 return 1024;
989}
990
992 unsigned FlatWorkGroupSize) {
993 return divideCeil(FlatWorkGroupSize, getWavefrontSize(STI));
994}
995
998 if (Version.Major >= 10)
999 return getAddressableNumSGPRs(STI);
1000 if (Version.Major >= 8)
1001 return 16;
1002 return 8;
1003}
1004
1006 return 8;
1007}
1008
1009unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) {
1011 if (Version.Major >= 8)
1012 return 800;
1013 return 512;
1014}
1015
1017 if (STI->getFeatureBits().test(FeatureSGPRInitBug))
1019
1021 if (Version.Major >= 10)
1022 return 106;
1023 if (Version.Major >= 8)
1024 return 102;
1025 return 104;
1026}
1027
1028unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
1029 assert(WavesPerEU != 0);
1030
1032 if (Version.Major >= 10)
1033 return 0;
1034
1035 if (WavesPerEU >= getMaxWavesPerEU(STI))
1036 return 0;
1037
1038 unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
1039 if (STI->getFeatureBits().test(FeatureTrapHandler))
1040 MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
1041 MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1;
1042 return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI));
1043}
1044
1045unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
1046 bool Addressable) {
1047 assert(WavesPerEU != 0);
1048
1049 unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
1051 if (Version.Major >= 10)
1052 return Addressable ? AddressableNumSGPRs : 108;
1053 if (Version.Major >= 8 && !Addressable)
1054 AddressableNumSGPRs = 112;
1055 unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
1056 if (STI->getFeatureBits().test(FeatureTrapHandler))
1057 MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
1058 MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI));
1059 return std::min(MaxNumSGPRs, AddressableNumSGPRs);
1060}
1061
1062unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
1063 bool FlatScrUsed, bool XNACKUsed) {
1064 unsigned ExtraSGPRs = 0;
1065 if (VCCUsed)
1066 ExtraSGPRs = 2;
1067
1069 if (Version.Major >= 10)
1070 return ExtraSGPRs;
1071
1072 if (Version.Major < 8) {
1073 if (FlatScrUsed)
1074 ExtraSGPRs = 4;
1075 } else {
1076 if (XNACKUsed)
1077 ExtraSGPRs = 4;
1078
1079 if (FlatScrUsed ||
1080 STI->getFeatureBits().test(AMDGPU::FeatureArchitectedFlatScratch))
1081 ExtraSGPRs = 6;
1082 }
1083
1084 return ExtraSGPRs;
1085}
1086
1087unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
1088 bool FlatScrUsed) {
1089 return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
1090 STI->getFeatureBits().test(AMDGPU::FeatureXNACK));
1091}
1092
1093static unsigned getGranulatedNumRegisterBlocks(unsigned NumRegs,
1094 unsigned Granule) {
1095 return divideCeil(std::max(1u, NumRegs), Granule);
1096}
1097
1098unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
1099 // SGPRBlocks is actual number of SGPR blocks minus 1.
1101 1;
1102}
1103
1105 std::optional<bool> EnableWavefrontSize32) {
1106 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1107 return 8;
1108
1109 bool IsWave32 = EnableWavefrontSize32 ?
1110 *EnableWavefrontSize32 :
1111 STI->getFeatureBits().test(FeatureWavefrontSize32);
1112
1113 if (STI->getFeatureBits().test(Feature1_5xVGPRs))
1114 return IsWave32 ? 24 : 12;
1115
1116 if (hasGFX10_3Insts(*STI))
1117 return IsWave32 ? 16 : 8;
1118
1119 return IsWave32 ? 8 : 4;
1120}
1121
1123 std::optional<bool> EnableWavefrontSize32) {
1124 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1125 return 8;
1126
1127 bool IsWave32 = EnableWavefrontSize32 ?
1128 *EnableWavefrontSize32 :
1129 STI->getFeatureBits().test(FeatureWavefrontSize32);
1130
1131 return IsWave32 ? 8 : 4;
1132}
1133
1134unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
1135 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1136 return 512;
1137 if (!isGFX10Plus(*STI))
1138 return 256;
1139 bool IsWave32 = STI->getFeatureBits().test(FeatureWavefrontSize32);
1140 if (STI->getFeatureBits().test(Feature1_5xVGPRs))
1141 return IsWave32 ? 1536 : 768;
1142 return IsWave32 ? 1024 : 512;
1143}
1144
1145unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI) { return 256; }
1146
1148 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1149 return 512;
1150 return getAddressableNumArchVGPRs(STI);
1151}
1152
1154 unsigned NumVGPRs) {
1156 getMaxWavesPerEU(STI),
1157 getTotalNumVGPRs(STI));
1158}
1159
1160unsigned getNumWavesPerEUWithNumVGPRs(unsigned NumVGPRs, unsigned Granule,
1161 unsigned MaxWaves,
1162 unsigned TotalNumVGPRs) {
1163 if (NumVGPRs < Granule)
1164 return MaxWaves;
1165 unsigned RoundedRegs = alignTo(NumVGPRs, Granule);
1166 return std::min(std::max(TotalNumVGPRs / RoundedRegs, 1u), MaxWaves);
1167}
1168
1169unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves,
1171 if (Gen >= AMDGPUSubtarget::GFX10)
1172 return MaxWaves;
1173
1175 if (SGPRs <= 80)
1176 return 10;
1177 if (SGPRs <= 88)
1178 return 9;
1179 if (SGPRs <= 100)
1180 return 8;
1181 return 7;
1182 }
1183 if (SGPRs <= 48)
1184 return 10;
1185 if (SGPRs <= 56)
1186 return 9;
1187 if (SGPRs <= 64)
1188 return 8;
1189 if (SGPRs <= 72)
1190 return 7;
1191 if (SGPRs <= 80)
1192 return 6;
1193 return 5;
1194}
1195
1196unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
1197 assert(WavesPerEU != 0);
1198
1199 unsigned MaxWavesPerEU = getMaxWavesPerEU(STI);
1200 if (WavesPerEU >= MaxWavesPerEU)
1201 return 0;
1202
1203 unsigned TotNumVGPRs = getTotalNumVGPRs(STI);
1204 unsigned AddrsableNumVGPRs = getAddressableNumVGPRs(STI);
1205 unsigned Granule = getVGPRAllocGranule(STI);
1206 unsigned MaxNumVGPRs = alignDown(TotNumVGPRs / WavesPerEU, Granule);
1207
1208 if (MaxNumVGPRs == alignDown(TotNumVGPRs / MaxWavesPerEU, Granule))
1209 return 0;
1210
1211 unsigned MinWavesPerEU = getNumWavesPerEUWithNumVGPRs(STI, AddrsableNumVGPRs);
1212 if (WavesPerEU < MinWavesPerEU)
1213 return getMinNumVGPRs(STI, MinWavesPerEU);
1214
1215 unsigned MaxNumVGPRsNext = alignDown(TotNumVGPRs / (WavesPerEU + 1), Granule);
1216 unsigned MinNumVGPRs = 1 + std::min(MaxNumVGPRs - Granule, MaxNumVGPRsNext);
1217 return std::min(MinNumVGPRs, AddrsableNumVGPRs);
1218}
1219
1220unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
1221 assert(WavesPerEU != 0);
1222
1223 unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU,
1224 getVGPRAllocGranule(STI));
1225 unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI);
1226 return std::min(MaxNumVGPRs, AddressableNumVGPRs);
1227}
1228
1229unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs,
1230 std::optional<bool> EnableWavefrontSize32) {
1232 NumVGPRs, getVGPREncodingGranule(STI, EnableWavefrontSize32)) -
1233 1;
1234}
1235
1237 unsigned NumVGPRs,
1238 std::optional<bool> EnableWavefrontSize32) {
1240 NumVGPRs, getVGPRAllocGranule(STI, EnableWavefrontSize32));
1241}
1242} // end namespace IsaInfo
1243
1245 const MCSubtargetInfo *STI) {
1247 KernelCode.amd_kernel_code_version_major = 1;
1248 KernelCode.amd_kernel_code_version_minor = 2;
1249 KernelCode.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
1250 KernelCode.amd_machine_version_major = Version.Major;
1251 KernelCode.amd_machine_version_minor = Version.Minor;
1252 KernelCode.amd_machine_version_stepping = Version.Stepping;
1254 if (STI->getFeatureBits().test(FeatureWavefrontSize32)) {
1255 KernelCode.wavefront_size = 5;
1257 } else {
1258 KernelCode.wavefront_size = 6;
1259 }
1260
1261 // If the code object does not support indirect functions, then the value must
1262 // be 0xffffffff.
1263 KernelCode.call_convention = -1;
1264
1265 // These alignment values are specified in powers of two, so alignment =
1266 // 2^n. The minimum alignment is 2^4 = 16.
1267 KernelCode.kernarg_segment_alignment = 4;
1268 KernelCode.group_segment_alignment = 4;
1269 KernelCode.private_segment_alignment = 4;
1270
1271 if (Version.Major >= 10) {
1272 KernelCode.compute_pgm_resource_registers |=
1273 S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
1275 }
1276}
1277
1280}
1281
1284}
1285
1287 unsigned AS = GV->getAddressSpace();
1288 return AS == AMDGPUAS::CONSTANT_ADDRESS ||
1290}
1291
1293 return TT.getArch() == Triple::r600;
1294}
1295
1296std::pair<unsigned, unsigned>
1298 std::pair<unsigned, unsigned> Default,
1299 bool OnlyFirstRequired) {
1300 Attribute A = F.getFnAttribute(Name);
1301 if (!A.isStringAttribute())
1302 return Default;
1303
1304 LLVMContext &Ctx = F.getContext();
1305 std::pair<unsigned, unsigned> Ints = Default;
1306 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
1307 if (Strs.first.trim().getAsInteger(0, Ints.first)) {
1308 Ctx.emitError("can't parse first integer attribute " + Name);
1309 return Default;
1310 }
1311 if (Strs.second.trim().getAsInteger(0, Ints.second)) {
1312 if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
1313 Ctx.emitError("can't parse second integer attribute " + Name);
1314 return Default;
1315 }
1316 }
1317
1318 return Ints;
1319}
1320
1322 unsigned Size) {
1323 assert(Size > 2);
1325
1326 Attribute A = F.getFnAttribute(Name);
1327 if (!A.isStringAttribute())
1328 return Default;
1329
1330 SmallVector<unsigned> Vals(Size, 0);
1331
1332 LLVMContext &Ctx = F.getContext();
1333
1334 StringRef S = A.getValueAsString();
1335 unsigned i = 0;
1336 for (; !S.empty() && i < Size; i++) {
1337 std::pair<StringRef, StringRef> Strs = S.split(',');
1338 unsigned IntVal;
1339 if (Strs.first.trim().getAsInteger(0, IntVal)) {
1340 Ctx.emitError("can't parse integer attribute " + Strs.first + " in " +
1341 Name);
1342 return Default;
1343 }
1344 Vals[i] = IntVal;
1345 S = Strs.second;
1346 }
1347
1348 if (!S.empty() || i < Size) {
1349 Ctx.emitError("attribute " + Name +
1350 " has incorrect number of integers; expected " +
1351 llvm::utostr(Size));
1352 return Default;
1353 }
1354 return Vals;
1355}
1356
1358 return (1 << (getVmcntBitWidthLo(Version.Major) +
1359 getVmcntBitWidthHi(Version.Major))) -
1360 1;
1361}
1362
1364 return (1 << getLoadcntBitWidth(Version.Major)) - 1;
1365}
1366
1368 return (1 << getSamplecntBitWidth(Version.Major)) - 1;
1369}
1370
1372 return (1 << getBvhcntBitWidth(Version.Major)) - 1;
1373}
1374
1376 return (1 << getExpcntBitWidth(Version.Major)) - 1;
1377}
1378
1380 return (1 << getLgkmcntBitWidth(Version.Major)) - 1;
1381}
1382
1384 return (1 << getDscntBitWidth(Version.Major)) - 1;
1385}
1386
1388 return (1 << getKmcntBitWidth(Version.Major)) - 1;
1389}
1390
1392 return (1 << getStorecntBitWidth(Version.Major)) - 1;
1393}
1394
1396 unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(Version.Major),
1397 getVmcntBitWidthLo(Version.Major));
1398 unsigned Expcnt = getBitMask(getExpcntBitShift(Version.Major),
1399 getExpcntBitWidth(Version.Major));
1400 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(Version.Major),
1401 getLgkmcntBitWidth(Version.Major));
1402 unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(Version.Major),
1403 getVmcntBitWidthHi(Version.Major));
1404 return VmcntLo | Expcnt | Lgkmcnt | VmcntHi;
1405}
1406
1407unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1408 unsigned VmcntLo = unpackBits(Waitcnt, getVmcntBitShiftLo(Version.Major),
1409 getVmcntBitWidthLo(Version.Major));
1410 unsigned VmcntHi = unpackBits(Waitcnt, getVmcntBitShiftHi(Version.Major),
1411 getVmcntBitWidthHi(Version.Major));
1412 return VmcntLo | VmcntHi << getVmcntBitWidthLo(Version.Major);
1413}
1414
1415unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
1416 return unpackBits(Waitcnt, getExpcntBitShift(Version.Major),
1417 getExpcntBitWidth(Version.Major));
1418}
1419
1420unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1421 return unpackBits(Waitcnt, getLgkmcntBitShift(Version.Major),
1422 getLgkmcntBitWidth(Version.Major));
1423}
1424
1426 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
1427 Vmcnt = decodeVmcnt(Version, Waitcnt);
1428 Expcnt = decodeExpcnt(Version, Waitcnt);
1429 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
1430}
1431
1432Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) {
1433 Waitcnt Decoded;
1434 Decoded.LoadCnt = decodeVmcnt(Version, Encoded);
1435 Decoded.ExpCnt = decodeExpcnt(Version, Encoded);
1436 Decoded.DsCnt = decodeLgkmcnt(Version, Encoded);
1437 return Decoded;
1438}
1439
1440unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
1441 unsigned Vmcnt) {
1442 Waitcnt = packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(Version.Major),
1443 getVmcntBitWidthLo(Version.Major));
1444 return packBits(Vmcnt >> getVmcntBitWidthLo(Version.Major), Waitcnt,
1445 getVmcntBitShiftHi(Version.Major),
1446 getVmcntBitWidthHi(Version.Major));
1447}
1448
1449unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
1450 unsigned Expcnt) {
1451 return packBits(Expcnt, Waitcnt, getExpcntBitShift(Version.Major),
1452 getExpcntBitWidth(Version.Major));
1453}
1454
1455unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
1456 unsigned Lgkmcnt) {
1457 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(Version.Major),
1458 getLgkmcntBitWidth(Version.Major));
1459}
1460
1462 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
1463 unsigned Waitcnt = getWaitcntBitMask(Version);
1465 Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
1466 Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
1467 return Waitcnt;
1468}
1469
1470unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) {
1471 return encodeWaitcnt(Version, Decoded.LoadCnt, Decoded.ExpCnt, Decoded.DsCnt);
1472}
1473
1475 bool IsStore) {
1476 unsigned Dscnt = getBitMask(getDscntBitShift(Version.Major),
1477 getDscntBitWidth(Version.Major));
1478 if (IsStore) {
1479 unsigned Storecnt = getBitMask(getLoadcntStorecntBitShift(Version.Major),
1480 getStorecntBitWidth(Version.Major));
1481 return Dscnt | Storecnt;
1482 } else {
1483 unsigned Loadcnt = getBitMask(getLoadcntStorecntBitShift(Version.Major),
1484 getLoadcntBitWidth(Version.Major));
1485 return Dscnt | Loadcnt;
1486 }
1487}
1488
1489Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt) {
1490 Waitcnt Decoded;
1491 Decoded.LoadCnt =
1492 unpackBits(LoadcntDscnt, getLoadcntStorecntBitShift(Version.Major),
1493 getLoadcntBitWidth(Version.Major));
1494 Decoded.DsCnt = unpackBits(LoadcntDscnt, getDscntBitShift(Version.Major),
1495 getDscntBitWidth(Version.Major));
1496 return Decoded;
1497}
1498
1499Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt) {
1500 Waitcnt Decoded;
1501 Decoded.StoreCnt =
1502 unpackBits(StorecntDscnt, getLoadcntStorecntBitShift(Version.Major),
1503 getStorecntBitWidth(Version.Major));
1504 Decoded.DsCnt = unpackBits(StorecntDscnt, getDscntBitShift(Version.Major),
1505 getDscntBitWidth(Version.Major));
1506 return Decoded;
1507}
1508
1509static unsigned encodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt,
1510 unsigned Loadcnt) {
1511 return packBits(Loadcnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major),
1512 getLoadcntBitWidth(Version.Major));
1513}
1514
1515static unsigned encodeStorecnt(const IsaVersion &Version, unsigned Waitcnt,
1516 unsigned Storecnt) {
1517 return packBits(Storecnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major),
1518 getStorecntBitWidth(Version.Major));
1519}
1520
1521static unsigned encodeDscnt(const IsaVersion &Version, unsigned Waitcnt,
1522 unsigned Dscnt) {
1523 return packBits(Dscnt, Waitcnt, getDscntBitShift(Version.Major),
1524 getDscntBitWidth(Version.Major));
1525}
1526
1527static unsigned encodeLoadcntDscnt(const IsaVersion &Version, unsigned Loadcnt,
1528 unsigned Dscnt) {
1529 unsigned Waitcnt = getCombinedCountBitMask(Version, false);
1530 Waitcnt = encodeLoadcnt(Version, Waitcnt, Loadcnt);
1532 return Waitcnt;
1533}
1534
1535unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded) {
1536 return encodeLoadcntDscnt(Version, Decoded.LoadCnt, Decoded.DsCnt);
1537}
1538
1540 unsigned Storecnt, unsigned Dscnt) {
1541 unsigned Waitcnt = getCombinedCountBitMask(Version, true);
1542 Waitcnt = encodeStorecnt(Version, Waitcnt, Storecnt);
1544 return Waitcnt;
1545}
1546
1548 const Waitcnt &Decoded) {
1549 return encodeStorecntDscnt(Version, Decoded.StoreCnt, Decoded.DsCnt);
1550}
1551
1552//===----------------------------------------------------------------------===//
1553// Custom Operand Values
1554//===----------------------------------------------------------------------===//
1555
1557 int Size,
1558 const MCSubtargetInfo &STI) {
1559 unsigned Enc = 0;
1560 for (int Idx = 0; Idx < Size; ++Idx) {
1561 const auto &Op = Opr[Idx];
1562 if (Op.isSupported(STI))
1563 Enc |= Op.encode(Op.Default);
1564 }
1565 return Enc;
1566}
1567
1569 int Size, unsigned Code,
1570 bool &HasNonDefaultVal,
1571 const MCSubtargetInfo &STI) {
1572 unsigned UsedOprMask = 0;
1573 HasNonDefaultVal = false;
1574 for (int Idx = 0; Idx < Size; ++Idx) {
1575 const auto &Op = Opr[Idx];
1576 if (!Op.isSupported(STI))
1577 continue;
1578 UsedOprMask |= Op.getMask();
1579 unsigned Val = Op.decode(Code);
1580 if (!Op.isValid(Val))
1581 return false;
1582 HasNonDefaultVal |= (Val != Op.Default);
1583 }
1584 return (Code & ~UsedOprMask) == 0;
1585}
1586
1587static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size,
1588 unsigned Code, int &Idx, StringRef &Name,
1589 unsigned &Val, bool &IsDefault,
1590 const MCSubtargetInfo &STI) {
1591 while (Idx < Size) {
1592 const auto &Op = Opr[Idx++];
1593 if (Op.isSupported(STI)) {
1594 Name = Op.Name;
1595 Val = Op.decode(Code);
1596 IsDefault = (Val == Op.Default);
1597 return true;
1598 }
1599 }
1600
1601 return false;
1602}
1603
1605 int64_t InputVal) {
1606 if (InputVal < 0 || InputVal > Op.Max)
1607 return OPR_VAL_INVALID;
1608 return Op.encode(InputVal);
1609}
1610
1611static int encodeCustomOperand(const CustomOperandVal *Opr, int Size,
1612 const StringRef Name, int64_t InputVal,
1613 unsigned &UsedOprMask,
1614 const MCSubtargetInfo &STI) {
1615 int InvalidId = OPR_ID_UNKNOWN;
1616 for (int Idx = 0; Idx < Size; ++Idx) {
1617 const auto &Op = Opr[Idx];
1618 if (Op.Name == Name) {
1619 if (!Op.isSupported(STI)) {
1620 InvalidId = OPR_ID_UNSUPPORTED;
1621 continue;
1622 }
1623 auto OprMask = Op.getMask();
1624 if (OprMask & UsedOprMask)
1625 return OPR_ID_DUPLICATE;
1626 UsedOprMask |= OprMask;
1627 return encodeCustomOperandVal(Op, InputVal);
1628 }
1629 }
1630 return InvalidId;
1631}
1632
1633//===----------------------------------------------------------------------===//
1634// DepCtr
1635//===----------------------------------------------------------------------===//
1636
1637namespace DepCtr {
1638
1640 static int Default = -1;
1641 if (Default == -1)
1643 return Default;
1644}
1645
1646bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
1647 const MCSubtargetInfo &STI) {
1649 HasNonDefaultVal, STI);
1650}
1651
1652bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
1653 bool &IsDefault, const MCSubtargetInfo &STI) {
1654 return decodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Code, Id, Name, Val,
1655 IsDefault, STI);
1656}
1657
1658int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
1659 const MCSubtargetInfo &STI) {
1660 return encodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Name, Val, UsedOprMask,
1661 STI);
1662}
1663
1664unsigned decodeFieldVmVsrc(unsigned Encoded) {
1665 return unpackBits(Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
1666}
1667
1668unsigned decodeFieldVaVdst(unsigned Encoded) {
1669 return unpackBits(Encoded, getVaVdstBitShift(), getVaVdstBitWidth());
1670}
1671
1672unsigned decodeFieldSaSdst(unsigned Encoded) {
1673 return unpackBits(Encoded, getSaSdstBitShift(), getSaSdstBitWidth());
1674}
1675
1676unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc) {
1677 return packBits(VmVsrc, Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
1678}
1679
1680unsigned encodeFieldVmVsrc(unsigned VmVsrc) {
1681 return encodeFieldVmVsrc(0xffff, VmVsrc);
1682}
1683
1684unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst) {
1685 return packBits(VaVdst, Encoded, getVaVdstBitShift(), getVaVdstBitWidth());
1686}
1687
1688unsigned encodeFieldVaVdst(unsigned VaVdst) {
1689 return encodeFieldVaVdst(0xffff, VaVdst);
1690}
1691
1692unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst) {
1693 return packBits(SaSdst, Encoded, getSaSdstBitShift(), getSaSdstBitWidth());
1694}
1695
1696unsigned encodeFieldSaSdst(unsigned SaSdst) {
1697 return encodeFieldSaSdst(0xffff, SaSdst);
1698}
1699
1700} // namespace DepCtr
1701
1702//===----------------------------------------------------------------------===//
1703// exp tgt
1704//===----------------------------------------------------------------------===//
1705
1706namespace Exp {
1707
1708struct ExpTgt {
1710 unsigned Tgt;
1711 unsigned MaxIndex;
1712};
1713
1714static constexpr ExpTgt ExpTgtInfo[] = {
1715 {{"null"}, ET_NULL, ET_NULL_MAX_IDX},
1716 {{"mrtz"}, ET_MRTZ, ET_MRTZ_MAX_IDX},
1717 {{"prim"}, ET_PRIM, ET_PRIM_MAX_IDX},
1718 {{"mrt"}, ET_MRT0, ET_MRT_MAX_IDX},
1719 {{"pos"}, ET_POS0, ET_POS_MAX_IDX},
1720 {{"dual_src_blend"}, ET_DUAL_SRC_BLEND0, ET_DUAL_SRC_BLEND_MAX_IDX},
1721 {{"param"}, ET_PARAM0, ET_PARAM_MAX_IDX},
1722};
1723
1724bool getTgtName(unsigned Id, StringRef &Name, int &Index) {
1725 for (const ExpTgt &Val : ExpTgtInfo) {
1726 if (Val.Tgt <= Id && Id <= Val.Tgt + Val.MaxIndex) {
1727 Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt);
1728 Name = Val.Name;
1729 return true;
1730 }
1731 }
1732 return false;
1733}
1734
1735unsigned getTgtId(const StringRef Name) {
1736
1737 for (const ExpTgt &Val : ExpTgtInfo) {
1738 if (Val.MaxIndex == 0 && Name == Val.Name)
1739 return Val.Tgt;
1740
1741 if (Val.MaxIndex > 0 && Name.starts_with(Val.Name)) {
1742 StringRef Suffix = Name.drop_front(Val.Name.size());
1743
1744 unsigned Id;
1745 if (Suffix.getAsInteger(10, Id) || Id > Val.MaxIndex)
1746 return ET_INVALID;
1747
1748 // Disable leading zeroes
1749 if (Suffix.size() > 1 && Suffix[0] == '0')
1750 return ET_INVALID;
1751
1752 return Val.Tgt + Id;
1753 }
1754 }
1755 return ET_INVALID;
1756}
1757
1758bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI) {
1759 switch (Id) {
1760 case ET_NULL:
1761 return !isGFX11Plus(STI);
1762 case ET_POS4:
1763 case ET_PRIM:
1764 return isGFX10Plus(STI);
1765 case ET_DUAL_SRC_BLEND0:
1766 case ET_DUAL_SRC_BLEND1:
1767 return isGFX11Plus(STI);
1768 default:
1769 if (Id >= ET_PARAM0 && Id <= ET_PARAM31)
1770 return !isGFX11Plus(STI);
1771 return true;
1772 }
1773}
1774
1775} // namespace Exp
1776
1777//===----------------------------------------------------------------------===//
1778// MTBUF Format
1779//===----------------------------------------------------------------------===//
1780
1781namespace MTBUFFormat {
1782
1783int64_t getDfmt(const StringRef Name) {
1784 for (int Id = DFMT_MIN; Id <= DFMT_MAX; ++Id) {
1785 if (Name == DfmtSymbolic[Id])
1786 return Id;
1787 }
1788 return DFMT_UNDEF;
1789}
1790
1792 assert(Id <= DFMT_MAX);
1793 return DfmtSymbolic[Id];
1794}
1795
1797 if (isSI(STI) || isCI(STI))
1798 return NfmtSymbolicSICI;
1799 if (isVI(STI) || isGFX9(STI))
1800 return NfmtSymbolicVI;
1801 return NfmtSymbolicGFX10;
1802}
1803
1804int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI) {
1805 auto lookupTable = getNfmtLookupTable(STI);
1806 for (int Id = NFMT_MIN; Id <= NFMT_MAX; ++Id) {
1807 if (Name == lookupTable[Id])
1808 return Id;
1809 }
1810 return NFMT_UNDEF;
1811}
1812
1813StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI) {
1814 assert(Id <= NFMT_MAX);
1815 return getNfmtLookupTable(STI)[Id];
1816}
1817
1818bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI) {
1819 unsigned Dfmt;
1820 unsigned Nfmt;
1821 decodeDfmtNfmt(Id, Dfmt, Nfmt);
1822 return isValidNfmt(Nfmt, STI);
1823}
1824
1825bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI) {
1826 return !getNfmtName(Id, STI).empty();
1827}
1828
1829int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt) {
1830 return (Dfmt << DFMT_SHIFT) | (Nfmt << NFMT_SHIFT);
1831}
1832
1833void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt) {
1834 Dfmt = (Format >> DFMT_SHIFT) & DFMT_MASK;
1835 Nfmt = (Format >> NFMT_SHIFT) & NFMT_MASK;
1836}
1837
1839 if (isGFX11Plus(STI)) {
1840 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
1841 if (Name == UfmtSymbolicGFX11[Id])
1842 return Id;
1843 }
1844 } else {
1845 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
1846 if (Name == UfmtSymbolicGFX10[Id])
1847 return Id;
1848 }
1849 }
1850 return UFMT_UNDEF;
1851}
1852
1854 if(isValidUnifiedFormat(Id, STI))
1855 return isGFX10(STI) ? UfmtSymbolicGFX10[Id] : UfmtSymbolicGFX11[Id];
1856 return "";
1857}
1858
1859bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI) {
1860 return isGFX10(STI) ? Id <= UfmtGFX10::UFMT_LAST : Id <= UfmtGFX11::UFMT_LAST;
1861}
1862
1863int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt,
1864 const MCSubtargetInfo &STI) {
1865 int64_t Fmt = encodeDfmtNfmt(Dfmt, Nfmt);
1866 if (isGFX11Plus(STI)) {
1867 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
1868 if (Fmt == DfmtNfmt2UFmtGFX11[Id])
1869 return Id;
1870 }
1871 } else {
1872 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
1873 if (Fmt == DfmtNfmt2UFmtGFX10[Id])
1874 return Id;
1875 }
1876 }
1877 return UFMT_UNDEF;
1878}
1879
1880bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI) {
1881 return isGFX10Plus(STI) ? (Val <= UFMT_MAX) : (Val <= DFMT_NFMT_MAX);
1882}
1883
1885 if (isGFX10Plus(STI))
1886 return UFMT_DEFAULT;
1887 return DFMT_NFMT_DEFAULT;
1888}
1889
1890} // namespace MTBUFFormat
1891
1892//===----------------------------------------------------------------------===//
1893// SendMsg
1894//===----------------------------------------------------------------------===//
1895
1896namespace SendMsg {
1897
1900}
1901
1902bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI) {
1903 return (MsgId & ~(getMsgIdMask(STI))) == 0;
1904}
1905
1906bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
1907 bool Strict) {
1908 assert(isValidMsgId(MsgId, STI));
1909
1910 if (!Strict)
1911 return 0 <= OpId && isUInt<OP_WIDTH_>(OpId);
1912
1913 if (msgRequiresOp(MsgId, STI)) {
1914 if (MsgId == ID_GS_PreGFX11 && OpId == OP_GS_NOP)
1915 return false;
1916
1917 return !getMsgOpName(MsgId, OpId, STI).empty();
1918 }
1919
1920 return OpId == OP_NONE_;
1921}
1922
1923bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
1924 const MCSubtargetInfo &STI, bool Strict) {
1925 assert(isValidMsgOp(MsgId, OpId, STI, Strict));
1926
1927 if (!Strict)
1928 return 0 <= StreamId && isUInt<STREAM_ID_WIDTH_>(StreamId);
1929
1930 if (!isGFX11Plus(STI)) {
1931 switch (MsgId) {
1932 case ID_GS_PreGFX11:
1935 return (OpId == OP_GS_NOP) ?
1938 }
1939 }
1940 return StreamId == STREAM_ID_NONE_;
1941}
1942
1943bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI) {
1944 return MsgId == ID_SYSMSG ||
1945 (!isGFX11Plus(STI) &&
1946 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11));
1947}
1948
1949bool msgSupportsStream(int64_t MsgId, int64_t OpId,
1950 const MCSubtargetInfo &STI) {
1951 return !isGFX11Plus(STI) &&
1952 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11) &&
1953 OpId != OP_GS_NOP;
1954}
1955
1956void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
1957 uint16_t &StreamId, const MCSubtargetInfo &STI) {
1958 MsgId = Val & getMsgIdMask(STI);
1959 if (isGFX11Plus(STI)) {
1960 OpId = 0;
1961 StreamId = 0;
1962 } else {
1963 OpId = (Val & OP_MASK_) >> OP_SHIFT_;
1965 }
1966}
1967
1969 uint64_t OpId,
1971 return MsgId | (OpId << OP_SHIFT_) | (StreamId << STREAM_ID_SHIFT_);
1972}
1973
1974} // namespace SendMsg
1975
1976//===----------------------------------------------------------------------===//
1977//
1978//===----------------------------------------------------------------------===//
1979
1981 return F.getFnAttributeAsParsedInteger("InitialPSInputAddr", 0);
1982}
1983
1985 // As a safe default always respond as if PS has color exports.
1986 return F.getFnAttributeAsParsedInteger(
1987 "amdgpu-color-export",
1988 F.getCallingConv() == CallingConv::AMDGPU_PS ? 1 : 0) != 0;
1989}
1990
1992 return F.getFnAttributeAsParsedInteger("amdgpu-depth-export", 0) != 0;
1993}
1994
1996 switch(cc) {
2006 return true;
2007 default:
2008 return false;
2009 }
2010}
2011
2013 return isShader(cc) || cc == CallingConv::AMDGPU_Gfx;
2014}
2015
2017 return !isGraphics(cc) || cc == CallingConv::AMDGPU_CS;
2018}
2019
2021 switch (CC) {
2031 return true;
2032 default:
2033 return false;
2034 }
2035}
2036
2038 switch (CC) {
2040 return true;
2041 default:
2042 return isEntryFunctionCC(CC) || isChainCC(CC);
2043 }
2044}
2045
2047 switch (CC) {
2050 return true;
2051 default:
2052 return false;
2053 }
2054}
2055
2056bool isKernelCC(const Function *Func) {
2057 return AMDGPU::isModuleEntryFunctionCC(Func->getCallingConv());
2058}
2059
2060bool hasXNACK(const MCSubtargetInfo &STI) {
2061 return STI.hasFeature(AMDGPU::FeatureXNACK);
2062}
2063
2064bool hasSRAMECC(const MCSubtargetInfo &STI) {
2065 return STI.hasFeature(AMDGPU::FeatureSRAMECC);
2066}
2067
2069 return STI.hasFeature(AMDGPU::FeatureMIMG_R128) && !STI.hasFeature(AMDGPU::FeatureR128A16);
2070}
2071
2072bool hasA16(const MCSubtargetInfo &STI) {
2073 return STI.hasFeature(AMDGPU::FeatureA16);
2074}
2075
2076bool hasG16(const MCSubtargetInfo &STI) {
2077 return STI.hasFeature(AMDGPU::FeatureG16);
2078}
2079
2081 return !STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) && !isCI(STI) &&
2082 !isSI(STI);
2083}
2084
2085bool hasGDS(const MCSubtargetInfo &STI) {
2086 return STI.hasFeature(AMDGPU::FeatureGDS);
2087}
2088
2089unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler) {
2090 auto Version = getIsaVersion(STI.getCPU());
2091 if (Version.Major == 10)
2092 return Version.Minor >= 3 ? 13 : 5;
2093 if (Version.Major == 11)
2094 return 5;
2095 if (Version.Major >= 12)
2096 return HasSampler ? 4 : 5;
2097 return 0;
2098}
2099
2100unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI) { return 16; }
2101
2102bool isSI(const MCSubtargetInfo &STI) {
2103 return STI.hasFeature(AMDGPU::FeatureSouthernIslands);
2104}
2105
2106bool isCI(const MCSubtargetInfo &STI) {
2107 return STI.hasFeature(AMDGPU::FeatureSeaIslands);
2108}
2109
2110bool isVI(const MCSubtargetInfo &STI) {
2111 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
2112}
2113
2114bool isGFX9(const MCSubtargetInfo &STI) {
2115 return STI.hasFeature(AMDGPU::FeatureGFX9);
2116}
2117
2119 return isGFX9(STI) || isGFX10(STI);
2120}
2121
2123 return isGFX9(STI) || isGFX10(STI) || isGFX11(STI);
2124}
2125
2127 return isVI(STI) || isGFX9(STI) || isGFX10(STI);
2128}
2129
2130bool isGFX8Plus(const MCSubtargetInfo &STI) {
2131 return isVI(STI) || isGFX9Plus(STI);
2132}
2133
2134bool isGFX9Plus(const MCSubtargetInfo &STI) {
2135 return isGFX9(STI) || isGFX10Plus(STI);
2136}
2137
2138bool isNotGFX9Plus(const MCSubtargetInfo &STI) { return !isGFX9Plus(STI); }
2139
2140bool isGFX10(const MCSubtargetInfo &STI) {
2141 return STI.hasFeature(AMDGPU::FeatureGFX10);
2142}
2143
2145 return isGFX10(STI) || isGFX11(STI);
2146}
2147
2149 return isGFX10(STI) || isGFX11Plus(STI);
2150}
2151
2152bool isGFX11(const MCSubtargetInfo &STI) {
2153 return STI.hasFeature(AMDGPU::FeatureGFX11);
2154}
2155
2157 return isGFX11(STI) || isGFX12Plus(STI);
2158}
2159
2160bool isGFX12(const MCSubtargetInfo &STI) {
2161 return STI.getFeatureBits()[AMDGPU::FeatureGFX12];
2162}
2163
2164bool isGFX12Plus(const MCSubtargetInfo &STI) { return isGFX12(STI); }
2165
2166bool isNotGFX12Plus(const MCSubtargetInfo &STI) { return !isGFX12Plus(STI); }
2167
2169 return !isGFX11Plus(STI);
2170}
2171
2173 return isSI(STI) || isCI(STI) || isVI(STI) || isGFX9(STI);
2174}
2175
2177 return isGFX10(STI) && !AMDGPU::isGFX10_BEncoding(STI);
2178}
2179
2181 return STI.hasFeature(AMDGPU::FeatureGCN3Encoding);
2182}
2183
2185 return STI.hasFeature(AMDGPU::FeatureGFX10_AEncoding);
2186}
2187
2189 return STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding);
2190}
2191
2193 return STI.hasFeature(AMDGPU::FeatureGFX10_3Insts);
2194}
2195
2197 return isGFX10_BEncoding(STI) && !isGFX12Plus(STI);
2198}
2199
2200bool isGFX90A(const MCSubtargetInfo &STI) {
2201 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
2202}
2203
2204bool isGFX940(const MCSubtargetInfo &STI) {
2205 return STI.hasFeature(AMDGPU::FeatureGFX940Insts);
2206}
2207
2209 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
2210}
2211
2213 return STI.hasFeature(AMDGPU::FeatureMAIInsts);
2214}
2215
2216bool hasVOPD(const MCSubtargetInfo &STI) {
2217 return STI.hasFeature(AMDGPU::FeatureVOPD);
2218}
2219
2221 return STI.hasFeature(AMDGPU::FeatureDPPSrc1SGPR);
2222}
2223
2225 return STI.hasFeature(AMDGPU::FeatureKernargPreload);
2226}
2227
2228int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR,
2229 int32_t ArgNumVGPR) {
2230 if (has90AInsts && ArgNumAGPR)
2231 return alignTo(ArgNumVGPR, 4) + ArgNumAGPR;
2232 return std::max(ArgNumVGPR, ArgNumAGPR);
2233}
2234
2235bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) {
2236 const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
2237 const unsigned FirstSubReg = TRI->getSubReg(Reg, AMDGPU::sub0);
2238 return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
2239 Reg == AMDGPU::SCC;
2240}
2241
2242bool isHi(unsigned Reg, const MCRegisterInfo &MRI) {
2243 return MRI.getEncodingValue(Reg) & AMDGPU::HWEncoding::IS_HI;
2244}
2245
2246#define MAP_REG2REG \
2247 using namespace AMDGPU; \
2248 switch(Reg) { \
2249 default: return Reg; \
2250 CASE_CI_VI(FLAT_SCR) \
2251 CASE_CI_VI(FLAT_SCR_LO) \
2252 CASE_CI_VI(FLAT_SCR_HI) \
2253 CASE_VI_GFX9PLUS(TTMP0) \
2254 CASE_VI_GFX9PLUS(TTMP1) \
2255 CASE_VI_GFX9PLUS(TTMP2) \
2256 CASE_VI_GFX9PLUS(TTMP3) \
2257 CASE_VI_GFX9PLUS(TTMP4) \
2258 CASE_VI_GFX9PLUS(TTMP5) \
2259 CASE_VI_GFX9PLUS(TTMP6) \
2260 CASE_VI_GFX9PLUS(TTMP7) \
2261 CASE_VI_GFX9PLUS(TTMP8) \
2262 CASE_VI_GFX9PLUS(TTMP9) \
2263 CASE_VI_GFX9PLUS(TTMP10) \
2264 CASE_VI_GFX9PLUS(TTMP11) \
2265 CASE_VI_GFX9PLUS(TTMP12) \
2266 CASE_VI_GFX9PLUS(TTMP13) \
2267 CASE_VI_GFX9PLUS(TTMP14) \
2268 CASE_VI_GFX9PLUS(TTMP15) \
2269 CASE_VI_GFX9PLUS(TTMP0_TTMP1) \
2270 CASE_VI_GFX9PLUS(TTMP2_TTMP3) \
2271 CASE_VI_GFX9PLUS(TTMP4_TTMP5) \
2272 CASE_VI_GFX9PLUS(TTMP6_TTMP7) \
2273 CASE_VI_GFX9PLUS(TTMP8_TTMP9) \
2274 CASE_VI_GFX9PLUS(TTMP10_TTMP11) \
2275 CASE_VI_GFX9PLUS(TTMP12_TTMP13) \
2276 CASE_VI_GFX9PLUS(TTMP14_TTMP15) \
2277 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \
2278 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \
2279 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \
2280 CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \
2281 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
2282 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
2283 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2284 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2285 CASE_GFXPRE11_GFX11PLUS(M0) \
2286 CASE_GFXPRE11_GFX11PLUS(SGPR_NULL) \
2287 CASE_GFXPRE11_GFX11PLUS_TO(SGPR_NULL64, SGPR_NULL) \
2288 }
2289
2290#define CASE_CI_VI(node) \
2291 assert(!isSI(STI)); \
2292 case node: return isCI(STI) ? node##_ci : node##_vi;
2293
2294#define CASE_VI_GFX9PLUS(node) \
2295 case node: return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi;
2296
2297#define CASE_GFXPRE11_GFX11PLUS(node) \
2298 case node: return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11;
2299
2300#define CASE_GFXPRE11_GFX11PLUS_TO(node, result) \
2301 case node: return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11;
2302
2303unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
2304 if (STI.getTargetTriple().getArch() == Triple::r600)
2305 return Reg;
2307}
2308
2309#undef CASE_CI_VI
2310#undef CASE_VI_GFX9PLUS
2311#undef CASE_GFXPRE11_GFX11PLUS
2312#undef CASE_GFXPRE11_GFX11PLUS_TO
2313
2314#define CASE_CI_VI(node) case node##_ci: case node##_vi: return node;
2315#define CASE_VI_GFX9PLUS(node) case node##_vi: case node##_gfx9plus: return node;
2316#define CASE_GFXPRE11_GFX11PLUS(node) case node##_gfx11plus: case node##_gfxpre11: return node;
2317#define CASE_GFXPRE11_GFX11PLUS_TO(node, result)
2318
2319unsigned mc2PseudoReg(unsigned Reg) {
2321}
2322
2323bool isInlineValue(unsigned Reg) {
2324 switch (Reg) {
2325 case AMDGPU::SRC_SHARED_BASE_LO:
2326 case AMDGPU::SRC_SHARED_BASE:
2327 case AMDGPU::SRC_SHARED_LIMIT_LO:
2328 case AMDGPU::SRC_SHARED_LIMIT:
2329 case AMDGPU::SRC_PRIVATE_BASE_LO:
2330 case AMDGPU::SRC_PRIVATE_BASE:
2331 case AMDGPU::SRC_PRIVATE_LIMIT_LO:
2332 case AMDGPU::SRC_PRIVATE_LIMIT:
2333 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2334 return true;
2335 case AMDGPU::SRC_VCCZ:
2336 case AMDGPU::SRC_EXECZ:
2337 case AMDGPU::SRC_SCC:
2338 return true;
2339 case AMDGPU::SGPR_NULL:
2340 return true;
2341 default:
2342 return false;
2343 }
2344}
2345
2346#undef CASE_CI_VI
2347#undef CASE_VI_GFX9PLUS
2348#undef CASE_GFXPRE11_GFX11PLUS
2349#undef CASE_GFXPRE11_GFX11PLUS_TO
2350#undef MAP_REG2REG
2351
2352bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2353 assert(OpNo < Desc.NumOperands);
2354 unsigned OpType = Desc.operands()[OpNo].OperandType;
2355 return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
2356 OpType <= AMDGPU::OPERAND_SRC_LAST;
2357}
2358
2359bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2360 assert(OpNo < Desc.NumOperands);
2361 unsigned OpType = Desc.operands()[OpNo].OperandType;
2362 return OpType >= AMDGPU::OPERAND_KIMM_FIRST &&
2363 OpType <= AMDGPU::OPERAND_KIMM_LAST;
2364}
2365
2366bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2367 assert(OpNo < Desc.NumOperands);
2368 unsigned OpType = Desc.operands()[OpNo].OperandType;
2369 switch (OpType) {
2386 return true;
2387 default:
2388 return false;
2389 }
2390}
2391
2392bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2393 assert(OpNo < Desc.NumOperands);
2394 unsigned OpType = Desc.operands()[OpNo].OperandType;
2395 return (OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
2399}
2400
2401// Avoid using MCRegisterClass::getSize, since that function will go away
2402// (move from MC* level to Target* level). Return size in bits.
2403unsigned getRegBitWidth(unsigned RCID) {
2404 switch (RCID) {
2405 case AMDGPU::SGPR_LO16RegClassID:
2406 case AMDGPU::AGPR_LO16RegClassID:
2407 return 16;
2408 case AMDGPU::SGPR_32RegClassID:
2409 case AMDGPU::VGPR_32RegClassID:
2410 case AMDGPU::VRegOrLds_32RegClassID:
2411 case AMDGPU::AGPR_32RegClassID:
2412 case AMDGPU::VS_32RegClassID:
2413 case AMDGPU::AV_32RegClassID:
2414 case AMDGPU::SReg_32RegClassID:
2415 case AMDGPU::SReg_32_XM0RegClassID:
2416 case AMDGPU::SRegOrLds_32RegClassID:
2417 return 32;
2418 case AMDGPU::SGPR_64RegClassID:
2419 case AMDGPU::VS_64RegClassID:
2420 case AMDGPU::SReg_64RegClassID:
2421 case AMDGPU::VReg_64RegClassID:
2422 case AMDGPU::AReg_64RegClassID:
2423 case AMDGPU::SReg_64_XEXECRegClassID:
2424 case AMDGPU::VReg_64_Align2RegClassID:
2425 case AMDGPU::AReg_64_Align2RegClassID:
2426 case AMDGPU::AV_64RegClassID:
2427 case AMDGPU::AV_64_Align2RegClassID:
2428 return 64;
2429 case AMDGPU::SGPR_96RegClassID:
2430 case AMDGPU::SReg_96RegClassID:
2431 case AMDGPU::VReg_96RegClassID:
2432 case AMDGPU::AReg_96RegClassID:
2433 case AMDGPU::VReg_96_Align2RegClassID:
2434 case AMDGPU::AReg_96_Align2RegClassID:
2435 case AMDGPU::AV_96RegClassID:
2436 case AMDGPU::AV_96_Align2RegClassID:
2437 return 96;
2438 case AMDGPU::SGPR_128RegClassID:
2439 case AMDGPU::SReg_128RegClassID:
2440 case AMDGPU::VReg_128RegClassID:
2441 case AMDGPU::AReg_128RegClassID:
2442 case AMDGPU::VReg_128_Align2RegClassID:
2443 case AMDGPU::AReg_128_Align2RegClassID:
2444 case AMDGPU::AV_128RegClassID:
2445 case AMDGPU::AV_128_Align2RegClassID:
2446 return 128;
2447 case AMDGPU::SGPR_160RegClassID:
2448 case AMDGPU::SReg_160RegClassID:
2449 case AMDGPU::VReg_160RegClassID:
2450 case AMDGPU::AReg_160RegClassID:
2451 case AMDGPU::VReg_160_Align2RegClassID:
2452 case AMDGPU::AReg_160_Align2RegClassID:
2453 case AMDGPU::AV_160RegClassID:
2454 case AMDGPU::AV_160_Align2RegClassID:
2455 return 160;
2456 case AMDGPU::SGPR_192RegClassID:
2457 case AMDGPU::SReg_192RegClassID:
2458 case AMDGPU::VReg_192RegClassID:
2459 case AMDGPU::AReg_192RegClassID:
2460 case AMDGPU::VReg_192_Align2RegClassID:
2461 case AMDGPU::AReg_192_Align2RegClassID:
2462 case AMDGPU::AV_192RegClassID:
2463 case AMDGPU::AV_192_Align2RegClassID:
2464 return 192;
2465 case AMDGPU::SGPR_224RegClassID:
2466 case AMDGPU::SReg_224RegClassID:
2467 case AMDGPU::VReg_224RegClassID:
2468 case AMDGPU::AReg_224RegClassID:
2469 case AMDGPU::VReg_224_Align2RegClassID:
2470 case AMDGPU::AReg_224_Align2RegClassID:
2471 case AMDGPU::AV_224RegClassID:
2472 case AMDGPU::AV_224_Align2RegClassID:
2473 return 224;
2474 case AMDGPU::SGPR_256RegClassID:
2475 case AMDGPU::SReg_256RegClassID:
2476 case AMDGPU::VReg_256RegClassID:
2477 case AMDGPU::AReg_256RegClassID:
2478 case AMDGPU::VReg_256_Align2RegClassID:
2479 case AMDGPU::AReg_256_Align2RegClassID:
2480 case AMDGPU::AV_256RegClassID:
2481 case AMDGPU::AV_256_Align2RegClassID:
2482 return 256;
2483 case AMDGPU::SGPR_288RegClassID:
2484 case AMDGPU::SReg_288RegClassID:
2485 case AMDGPU::VReg_288RegClassID:
2486 case AMDGPU::AReg_288RegClassID:
2487 case AMDGPU::VReg_288_Align2RegClassID:
2488 case AMDGPU::AReg_288_Align2RegClassID:
2489 case AMDGPU::AV_288RegClassID:
2490 case AMDGPU::AV_288_Align2RegClassID:
2491 return 288;
2492 case AMDGPU::SGPR_320RegClassID:
2493 case AMDGPU::SReg_320RegClassID:
2494 case AMDGPU::VReg_320RegClassID:
2495 case AMDGPU::AReg_320RegClassID:
2496 case AMDGPU::VReg_320_Align2RegClassID:
2497 case AMDGPU::AReg_320_Align2RegClassID:
2498 case AMDGPU::AV_320RegClassID:
2499 case AMDGPU::AV_320_Align2RegClassID:
2500 return 320;
2501 case AMDGPU::SGPR_352RegClassID:
2502 case AMDGPU::SReg_352RegClassID:
2503 case AMDGPU::VReg_352RegClassID:
2504 case AMDGPU::AReg_352RegClassID:
2505 case AMDGPU::VReg_352_Align2RegClassID:
2506 case AMDGPU::AReg_352_Align2RegClassID:
2507 case AMDGPU::AV_352RegClassID:
2508 case AMDGPU::AV_352_Align2RegClassID:
2509 return 352;
2510 case AMDGPU::SGPR_384RegClassID:
2511 case AMDGPU::SReg_384RegClassID:
2512 case AMDGPU::VReg_384RegClassID:
2513 case AMDGPU::AReg_384RegClassID:
2514 case AMDGPU::VReg_384_Align2RegClassID:
2515 case AMDGPU::AReg_384_Align2RegClassID:
2516 case AMDGPU::AV_384RegClassID:
2517 case AMDGPU::AV_384_Align2RegClassID:
2518 return 384;
2519 case AMDGPU::SGPR_512RegClassID:
2520 case AMDGPU::SReg_512RegClassID:
2521 case AMDGPU::VReg_512RegClassID:
2522 case AMDGPU::AReg_512RegClassID:
2523 case AMDGPU::VReg_512_Align2RegClassID:
2524 case AMDGPU::AReg_512_Align2RegClassID:
2525 case AMDGPU::AV_512RegClassID:
2526 case AMDGPU::AV_512_Align2RegClassID:
2527 return 512;
2528 case AMDGPU::SGPR_1024RegClassID:
2529 case AMDGPU::SReg_1024RegClassID:
2530 case AMDGPU::VReg_1024RegClassID:
2531 case AMDGPU::AReg_1024RegClassID:
2532 case AMDGPU::VReg_1024_Align2RegClassID:
2533 case AMDGPU::AReg_1024_Align2RegClassID:
2534 case AMDGPU::AV_1024RegClassID:
2535 case AMDGPU::AV_1024_Align2RegClassID:
2536 return 1024;
2537 default:
2538 llvm_unreachable("Unexpected register class");
2539 }
2540}
2541
2542unsigned getRegBitWidth(const MCRegisterClass &RC) {
2543 return getRegBitWidth(RC.getID());
2544}
2545
2547 unsigned OpNo) {
2548 assert(OpNo < Desc.NumOperands);
2549 unsigned RCID = Desc.operands()[OpNo].RegClass;
2550 return getRegBitWidth(RCID) / 8;
2551}
2552
2553bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
2555 return true;
2556
2557 uint64_t Val = static_cast<uint64_t>(Literal);
2558 return (Val == llvm::bit_cast<uint64_t>(0.0)) ||
2559 (Val == llvm::bit_cast<uint64_t>(1.0)) ||
2560 (Val == llvm::bit_cast<uint64_t>(-1.0)) ||
2561 (Val == llvm::bit_cast<uint64_t>(0.5)) ||
2562 (Val == llvm::bit_cast<uint64_t>(-0.5)) ||
2563 (Val == llvm::bit_cast<uint64_t>(2.0)) ||
2564 (Val == llvm::bit_cast<uint64_t>(-2.0)) ||
2565 (Val == llvm::bit_cast<uint64_t>(4.0)) ||
2566 (Val == llvm::bit_cast<uint64_t>(-4.0)) ||
2567 (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
2568}
2569
2570bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
2572 return true;
2573
2574 // The actual type of the operand does not seem to matter as long
2575 // as the bits match one of the inline immediate values. For example:
2576 //
2577 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
2578 // so it is a legal inline immediate.
2579 //
2580 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
2581 // floating-point, so it is a legal inline immediate.
2582
2583 uint32_t Val = static_cast<uint32_t>(Literal);
2584 return (Val == llvm::bit_cast<uint32_t>(0.0f)) ||
2585 (Val == llvm::bit_cast<uint32_t>(1.0f)) ||
2586 (Val == llvm::bit_cast<uint32_t>(-1.0f)) ||
2587 (Val == llvm::bit_cast<uint32_t>(0.5f)) ||
2588 (Val == llvm::bit_cast<uint32_t>(-0.5f)) ||
2589 (Val == llvm::bit_cast<uint32_t>(2.0f)) ||
2590 (Val == llvm::bit_cast<uint32_t>(-2.0f)) ||
2591 (Val == llvm::bit_cast<uint32_t>(4.0f)) ||
2592 (Val == llvm::bit_cast<uint32_t>(-4.0f)) ||
2593 (Val == 0x3e22f983 && HasInv2Pi);
2594}
2595
2596bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi) {
2597 if (!HasInv2Pi)
2598 return false;
2600 return true;
2601 uint16_t Val = static_cast<uint16_t>(Literal);
2602 return Val == 0x3F00 || // 0.5
2603 Val == 0xBF00 || // -0.5
2604 Val == 0x3F80 || // 1.0
2605 Val == 0xBF80 || // -1.0
2606 Val == 0x4000 || // 2.0
2607 Val == 0xC000 || // -2.0
2608 Val == 0x4080 || // 4.0
2609 Val == 0xC080 || // -4.0
2610 Val == 0x3E22; // 1.0 / (2.0 * pi)
2611}
2612
2613bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi) {
2614 return isInlinableLiteral32(Literal, HasInv2Pi);
2615}
2616
2617bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi) {
2618 if (!HasInv2Pi)
2619 return false;
2621 return true;
2622 uint16_t Val = static_cast<uint16_t>(Literal);
2623 return Val == 0x3C00 || // 1.0
2624 Val == 0xBC00 || // -1.0
2625 Val == 0x3800 || // 0.5
2626 Val == 0xB800 || // -0.5
2627 Val == 0x4000 || // 2.0
2628 Val == 0xC000 || // -2.0
2629 Val == 0x4400 || // 4.0
2630 Val == 0xC400 || // -4.0
2631 Val == 0x3118; // 1/2pi
2632}
2633
2634std::optional<unsigned> getInlineEncodingV216(bool IsFloat, uint32_t Literal) {
2635 // Unfortunately, the Instruction Set Architecture Reference Guide is
2636 // misleading about how the inline operands work for (packed) 16-bit
2637 // instructions. In a nutshell, the actual HW behavior is:
2638 //
2639 // - integer encodings (-16 .. 64) are always produced as sign-extended
2640 // 32-bit values
2641 // - float encodings are produced as:
2642 // - for F16 instructions: corresponding half-precision float values in
2643 // the LSBs, 0 in the MSBs
2644 // - for UI16 instructions: corresponding single-precision float value
2645 int32_t Signed = static_cast<int32_t>(Literal);
2646 if (Signed >= 0 && Signed <= 64)
2647 return 128 + Signed;
2648
2649 if (Signed >= -16 && Signed <= -1)
2650 return 192 + std::abs(Signed);
2651
2652 if (IsFloat) {
2653 // clang-format off
2654 switch (Literal) {
2655 case 0x3800: return 240; // 0.5
2656 case 0xB800: return 241; // -0.5
2657 case 0x3C00: return 242; // 1.0
2658 case 0xBC00: return 243; // -1.0
2659 case 0x4000: return 244; // 2.0
2660 case 0xC000: return 245; // -2.0
2661 case 0x4400: return 246; // 4.0
2662 case 0xC400: return 247; // -4.0
2663 case 0x3118: return 248; // 1.0 / (2.0 * pi)
2664 default: break;
2665 }
2666 // clang-format on
2667 } else {
2668 // clang-format off
2669 switch (Literal) {
2670 case 0x3F000000: return 240; // 0.5
2671 case 0xBF000000: return 241; // -0.5
2672 case 0x3F800000: return 242; // 1.0
2673 case 0xBF800000: return 243; // -1.0
2674 case 0x40000000: return 244; // 2.0
2675 case 0xC0000000: return 245; // -2.0
2676 case 0x40800000: return 246; // 4.0
2677 case 0xC0800000: return 247; // -4.0
2678 case 0x3E22F983: return 248; // 1.0 / (2.0 * pi)
2679 default: break;
2680 }
2681 // clang-format on
2682 }
2683
2684 return {};
2685}
2686
2687// Encoding of the literal as an inline constant for a V_PK_*_IU16 instruction
2688// or nullopt.
2689std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal) {
2690 return getInlineEncodingV216(false, Literal);
2691}
2692
2693// Encoding of the literal as an inline constant for a V_PK_*_BF16 instruction
2694// or nullopt.
2695std::optional<unsigned> getInlineEncodingV2BF16(uint32_t Literal) {
2696 int32_t Signed = static_cast<int32_t>(Literal);
2697 if (Signed >= 0 && Signed <= 64)
2698 return 128 + Signed;
2699
2700 if (Signed >= -16 && Signed <= -1)
2701 return 192 + std::abs(Signed);
2702
2703 // clang-format off
2704 switch (Literal) {
2705 case 0x3F00: return 240; // 0.5
2706 case 0xBF00: return 241; // -0.5
2707 case 0x3F80: return 242; // 1.0
2708 case 0xBF80: return 243; // -1.0
2709 case 0x4000: return 244; // 2.0
2710 case 0xC000: return 245; // -2.0
2711 case 0x4080: return 246; // 4.0
2712 case 0xC080: return 247; // -4.0
2713 case 0x3E22: return 248; // 1.0 / (2.0 * pi)
2714 default: break;
2715 }
2716 // clang-format on
2717
2718 return std::nullopt;
2719}
2720
2721// Encoding of the literal as an inline constant for a V_PK_*_F16 instruction
2722// or nullopt.
2723std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal) {
2724 return getInlineEncodingV216(true, Literal);
2725}
2726
2727// Whether the given literal can be inlined for a V_PK_* instruction.
2729 switch (OpType) {
2733 return getInlineEncodingV216(false, Literal).has_value();
2737 return getInlineEncodingV216(true, Literal).has_value();
2742 default:
2743 llvm_unreachable("bad packed operand type");
2744 }
2745}
2746
2747// Whether the given literal can be inlined for a V_PK_*_IU16 instruction.
2749 return getInlineEncodingV2I16(Literal).has_value();
2750}
2751
2752// Whether the given literal can be inlined for a V_PK_*_BF16 instruction.
2754 return getInlineEncodingV2BF16(Literal).has_value();
2755}
2756
2757// Whether the given literal can be inlined for a V_PK_*_F16 instruction.
2759 return getInlineEncodingV2F16(Literal).has_value();
2760}
2761
2762bool isValid32BitLiteral(uint64_t Val, bool IsFP64) {
2763 if (IsFP64)
2764 return !(Val & 0xffffffffu);
2765
2766 return isUInt<32>(Val) || isInt<32>(Val);
2767}
2768
2770 const Function *F = A->getParent();
2771
2772 // Arguments to compute shaders are never a source of divergence.
2773 CallingConv::ID CC = F->getCallingConv();
2774 switch (CC) {
2777 return true;
2788 // For non-compute shaders, SGPR inputs are marked with either inreg or
2789 // byval. Everything else is in VGPRs.
2790 return A->hasAttribute(Attribute::InReg) ||
2791 A->hasAttribute(Attribute::ByVal);
2792 default:
2793 // TODO: treat i1 as divergent?
2794 return A->hasAttribute(Attribute::InReg);
2795 }
2796}
2797
2798bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo) {
2799 // Arguments to compute shaders are never a source of divergence.
2801 switch (CC) {
2804 return true;
2815 // For non-compute shaders, SGPR inputs are marked with either inreg or
2816 // byval. Everything else is in VGPRs.
2817 return CB->paramHasAttr(ArgNo, Attribute::InReg) ||
2818 CB->paramHasAttr(ArgNo, Attribute::ByVal);
2819 default:
2820 return CB->paramHasAttr(ArgNo, Attribute::InReg);
2821 }
2822}
2823
2824static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) {
2825 return isGCN3Encoding(ST) || isGFX10Plus(ST);
2826}
2827
2829 int64_t EncodedOffset) {
2830 if (isGFX12Plus(ST))
2831 return isUInt<23>(EncodedOffset);
2832
2833 return hasSMEMByteOffset(ST) ? isUInt<20>(EncodedOffset)
2834 : isUInt<8>(EncodedOffset);
2835}
2836
2838 int64_t EncodedOffset,
2839 bool IsBuffer) {
2840 if (isGFX12Plus(ST))
2841 return isInt<24>(EncodedOffset);
2842
2843 return !IsBuffer &&
2845 isInt<21>(EncodedOffset);
2846}
2847
2848static bool isDwordAligned(uint64_t ByteOffset) {
2849 return (ByteOffset & 3) == 0;
2850}
2851
2853 uint64_t ByteOffset) {
2854 if (hasSMEMByteOffset(ST))
2855 return ByteOffset;
2856
2857 assert(isDwordAligned(ByteOffset));
2858 return ByteOffset >> 2;
2859}
2860
2861std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
2862 int64_t ByteOffset, bool IsBuffer,
2863 bool HasSOffset) {
2864 // For unbuffered smem loads, it is illegal for the Immediate Offset to be
2865 // negative if the resulting (Offset + (M0 or SOffset or zero) is negative.
2866 // Handle case where SOffset is not present.
2867 if (!IsBuffer && !HasSOffset && ByteOffset < 0 && hasSMRDSignedImmOffset(ST))
2868 return std::nullopt;
2869
2870 if (isGFX12Plus(ST)) // 24 bit signed offsets
2871 return isInt<24>(ByteOffset) ? std::optional<int64_t>(ByteOffset)
2872 : std::nullopt;
2873
2874 // The signed version is always a byte offset.
2875 if (!IsBuffer && hasSMRDSignedImmOffset(ST)) {
2877 return isInt<20>(ByteOffset) ? std::optional<int64_t>(ByteOffset)
2878 : std::nullopt;
2879 }
2880
2881 if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST))
2882 return std::nullopt;
2883
2884 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
2885 return isLegalSMRDEncodedUnsignedOffset(ST, EncodedOffset)
2886 ? std::optional<int64_t>(EncodedOffset)
2887 : std::nullopt;
2888}
2889
2890std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
2891 int64_t ByteOffset) {
2892 if (!isCI(ST) || !isDwordAligned(ByteOffset))
2893 return std::nullopt;
2894
2895 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
2896 return isUInt<32>(EncodedOffset) ? std::optional<int64_t>(EncodedOffset)
2897 : std::nullopt;
2898}
2899
2901 if (AMDGPU::isGFX10(ST))
2902 return 12;
2903
2904 if (AMDGPU::isGFX12(ST))
2905 return 24;
2906 return 13;
2907}
2908
2909namespace {
2910
2911struct SourceOfDivergence {
2912 unsigned Intr;
2913};
2914const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
2915
2916struct AlwaysUniform {
2917 unsigned Intr;
2918};
2919const AlwaysUniform *lookupAlwaysUniform(unsigned Intr);
2920
2921#define GET_SourcesOfDivergence_IMPL
2922#define GET_UniformIntrinsics_IMPL
2923#define GET_Gfx9BufferFormat_IMPL
2924#define GET_Gfx10BufferFormat_IMPL
2925#define GET_Gfx11PlusBufferFormat_IMPL
2926#include "AMDGPUGenSearchableTables.inc"
2927
2928} // end anonymous namespace
2929
2930bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
2931 return lookupSourceOfDivergence(IntrID);
2932}
2933
2934bool isIntrinsicAlwaysUniform(unsigned IntrID) {
2935 return lookupAlwaysUniform(IntrID);
2936}
2937
2939 uint8_t NumComponents,
2940 uint8_t NumFormat,
2941 const MCSubtargetInfo &STI) {
2942 return isGFX11Plus(STI)
2943 ? getGfx11PlusBufferFormatInfo(BitsPerComp, NumComponents,
2944 NumFormat)
2945 : isGFX10(STI) ? getGfx10BufferFormatInfo(BitsPerComp,
2946 NumComponents, NumFormat)
2947 : getGfx9BufferFormatInfo(BitsPerComp,
2948 NumComponents, NumFormat);
2949}
2950
2952 const MCSubtargetInfo &STI) {
2953 return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(Format)
2954 : isGFX10(STI) ? getGfx10BufferFormatInfo(Format)
2955 : getGfx9BufferFormatInfo(Format);
2956}
2957
2959 for (auto OpName : { OpName::vdst, OpName::src0, OpName::src1,
2960 OpName::src2 }) {
2961 int Idx = getNamedOperandIdx(OpDesc.getOpcode(), OpName);
2962 if (Idx == -1)
2963 continue;
2964
2965 if (OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64RegClassID ||
2966 OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64_Align2RegClassID)
2967 return true;
2968 }
2969
2970 return false;
2971}
2972
2973bool isDPALU_DPP(const MCInstrDesc &OpDesc) {
2974 return hasAny64BitVGPROperands(OpDesc);
2975}
2976
2978 // Currently this is 128 for all subtargets
2979 return 128;
2980}
2981
2982} // namespace AMDGPU
2983
2986 switch (S) {
2988 OS << "Unsupported";
2989 break;
2991 OS << "Any";
2992 break;
2994 OS << "Off";
2995 break;
2997 OS << "On";
2998 break;
2999 }
3000 return OS;
3001}
3002
3003} // namespace llvm
unsigned const MachineRegisterInfo * MRI
#define MAP_REG2REG
unsigned Intr
static llvm::cl::opt< unsigned > DefaultAMDHSACodeObjectVersion("amdhsa-code-object-version", llvm::cl::Hidden, llvm::cl::init(llvm::AMDGPU::AMDHSA_COV5), llvm::cl::desc("Set default AMDHSA Code Object Version (module flag " "or asm directive still take priority if present)"))
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
std::string Name
uint64_t Size
#define F(x, y, z)
Definition: MD5.cpp:55
unsigned const TargetRegisterInfo * TRI
unsigned Reg
#define S_00B848_MEM_ORDERED(x)
Definition: SIDefines.h:1150
#define S_00B848_WGP_MODE(x)
Definition: SIDefines.h:1147
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file contains some functions that are useful when dealing with strings.
void setTargetIDFromFeaturesString(StringRef FS)
TargetIDSetting getXnackSetting() const
AMDGPUTargetID(const MCSubtargetInfo &STI)
void setTargetIDFromTargetIDStream(StringRef TargetID)
TargetIDSetting getSramEccSetting() const
unsigned getIndexInParsedOperands(unsigned CompOprIdx) const
unsigned getIndexOfDstInParsedOperands() const
unsigned getIndexOfSrcInParsedOperands(unsigned CompSrcIdx) const
unsigned getCompParsedSrcOperandsNum() const
std::optional< unsigned > getInvalidCompOperandIndex(std::function< unsigned(unsigned, unsigned)> GetRegIdx, bool SkipSrc=false) const
std::array< unsigned, Component::MAX_OPR_NUM > RegIndices
Definition: Any.h:28
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1236
CallingConv::ID getCallingConv() const
Definition: InstrTypes.h:1523
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
This class represents an Operation in the Expression.
constexpr bool test(unsigned I) const
unsigned getAddressSpace() const
Definition: GlobalValue.h:205
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void emitError(uint64_t LocCookie, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:237
ArrayRef< MCOperandInfo > operands() const
Definition: MCInstrDesc.h:239
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:248
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
Definition: MCInstrDesc.h:219
unsigned getOpcode() const
Return the opcode number for this descriptor.
Definition: MCInstrDesc.h:230
Interface to description of machine instruction set.
Definition: MCInstrInfo.h:26
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
MCRegisterClass - Base class of TargetRegisterClass.
unsigned getID() const
getID() - Return the register class ID number.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Generic base class for all target subtargets.
bool hasFeature(unsigned Feature) const
const Triple & getTargetTriple() const
const FeatureBitset & getFeatureBits() const
StringRef getCPU() const
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition: StringRef.h:846
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:693
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:463
std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:223
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition: StringRef.h:270
Manages the enabling and disabling of subtarget specific features.
const std::vector< std::string > & getFeatures() const
Returns the vector of individual subtarget features.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
OSType getOS() const
Get the parsed operating system type of this triple.
Definition: Triple.h:382
ArchType getArch() const
Get the parsed architecture type of this triple.
Definition: Triple.h:373
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val, bool &IsDefault, const MCSubtargetInfo &STI)
unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst)
unsigned decodeFieldSaSdst(unsigned Encoded)
unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc)
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
const CustomOperandVal DepCtrInfo[]
bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal, const MCSubtargetInfo &STI)
unsigned decodeFieldVaVdst(unsigned Encoded)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
unsigned decodeFieldVmVsrc(unsigned Encoded)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
static constexpr ExpTgt ExpTgtInfo[]
bool getTgtName(unsigned Id, StringRef &Name, int &Index)
unsigned getTgtId(const StringRef Name)
constexpr uint32_t VersionMajor
HSA metadata major version.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getWavefrontSize(const MCSubtargetInfo *STI)
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)
unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getEUsPerCU(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
static TargetIDSetting getTargetIDSettingFromFeatureString(StringRef FeatureString)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs)
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI, unsigned NumVGPRs)
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, std::optional< bool > EnableWavefrontSize32)
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves, AMDGPUSubtarget::Generation Gen)
static unsigned getGranulatedNumRegisterBlocks(unsigned NumRegs, unsigned Granule)
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI)
unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, std::optional< bool > EnableWavefrontSize32)
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
StringLiteral const UfmtSymbolicGFX11[]
bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI)
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI)
unsigned const DfmtNfmt2UFmtGFX10[]
StringLiteral const DfmtSymbolic[]
static StringLiteral const * getNfmtLookupTable(const MCSubtargetInfo &STI)
bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI)
StringLiteral const NfmtSymbolicGFX10[]
bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI)
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
StringRef getDfmtName(unsigned Id)
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI)
unsigned const DfmtNfmt2UFmtGFX11[]
StringLiteral const NfmtSymbolicVI[]
StringLiteral const NfmtSymbolicSICI[]
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
int64_t getDfmt(const StringRef Name)
StringLiteral const UfmtSymbolicGFX10[]
void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt)
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId, uint16_t &StreamId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
StringRef getMsgOpName(int64_t MsgId, uint64_t Encoding, const MCSubtargetInfo &STI)
Map from an encoding to the symbolic name for a sendmsg operation.
static uint64_t getMsgIdMask(const MCSubtargetInfo &STI)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
constexpr unsigned VOPD_VGPR_BANK_MASKS[]
constexpr unsigned COMPONENTS_NUM
bool isGCN3Encoding(const MCSubtargetInfo &STI)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
bool isGFX10_GFX11(const MCSubtargetInfo &STI)
bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, unsigned OpNo)
Get size of register operand.
void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt)
Decodes Vmcnt, Expcnt and Lgkmcnt from given Waitcnt for given isa Version, and writes decoded values...
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
SmallVector< unsigned > getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size)
uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset)
Convert ByteOffset to dwords if the subtarget uses dword SMRD immediate offsets.
static unsigned encodeStorecnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Storecnt)
static bool hasSMEMByteOffset(const MCSubtargetInfo &ST)
bool isVOPCAsmOnly(unsigned Opc)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool getMTBUFHasSrsrc(unsigned Opc)
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
static bool isSymbolicCustomOperandEncoding(const CustomOperandVal *Opr, int Size, unsigned Code, bool &HasNonDefaultVal, const MCSubtargetInfo &STI)
bool isGFX10Before1030(const MCSubtargetInfo &STI)
bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo)
Does this operand support only inlinable literals?
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
const int OPR_ID_UNSUPPORTED
bool shouldEmitConstantsToTextSection(const Triple &TT)
bool isInlinableLiteralV2I16(uint32_t Literal)
int getMTBUFElements(unsigned Opc)
static int encodeCustomOperandVal(const CustomOperandVal &Op, int64_t InputVal)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
std::optional< unsigned > getInlineEncodingV216(bool IsFloat, uint32_t Literal)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
unsigned mc2PseudoReg(unsigned Reg)
Convert hardware register Reg to a pseudo register.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
CanBeVOPD getCanBeVOPD(unsigned Opc)
bool hasPackedD16(const MCSubtargetInfo &STI)
unsigned getStorecntBitMask(const IsaVersion &Version)
unsigned getLdsDwGranularity(const MCSubtargetInfo &ST)
bool isGFX940(const MCSubtargetInfo &STI)
bool isEntryFunctionCC(CallingConv::ID CC)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
bool isGFX10_3_GFX11(const MCSubtargetInfo &STI)
bool isInvalidSingleUseProducerInst(unsigned Opc)
bool isInvalidSingleUseConsumerInst(unsigned Opc)
bool isGroupSegment(const GlobalValue *GV)
IsaVersion getIsaVersion(StringRef GPU)
bool getMTBUFHasSoffset(unsigned Opc)
bool hasXNACK(const MCSubtargetInfo &STI)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
static unsigned getCombinedCountBitMask(const IsaVersion &Version, bool IsStore)
unsigned getVOPDOpcode(unsigned Opc)
bool isDPALU_DPP(const MCInstrDesc &OpDesc)
unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt)
Encodes Vmcnt, Expcnt and Lgkmcnt into Waitcnt for given isa Version.
bool isVOPC64DPP(unsigned Opc)
int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements)
bool isCompute(CallingConv::ID cc)
bool getMAIIsGFX940XDL(unsigned Opc)
bool isSI(const MCSubtargetInfo &STI)
unsigned getDefaultAMDHSACodeObjectVersion()
bool isReadOnlySegment(const GlobalValue *GV)
bool isArgPassedInSGPR(const Argument *A)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
int getMUBUFBaseOpcode(unsigned Opc)
unsigned getAMDHSACodeObjectVersion(const Module &M)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
bool getVOP3IsSingle(unsigned Opc)
bool isGFX9(const MCSubtargetInfo &STI)
bool getVOP1IsSingle(unsigned Opc)
static bool isDwordAligned(uint64_t ByteOffset)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool getHasColorExport(const Function &F)
int getMTBUFBaseOpcode(unsigned Opc)
bool isChainCC(CallingConv::ID CC)
bool isGFX90A(const MCSubtargetInfo &STI)
unsigned getSamplecntBitMask(const IsaVersion &Version)
unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion)
bool hasSRAMECC(const MCSubtargetInfo &STI)
bool getHasDepthExport(const Function &F)
bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI)
bool getMUBUFHasVAddr(unsigned Opc)
int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily)
bool isTrue16Inst(unsigned Opc)
bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc)
std::pair< unsigned, unsigned > getVOPDComponents(unsigned VOPDOpcode)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned getInitialPSInputAddr(const Function &F)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this an AMDGPU specific source operand? These include registers, inline constants,...
unsigned getKmcntBitMask(const IsaVersion &Version)
unsigned getVmcntBitMask(const IsaVersion &Version)
bool isNotGFX10Plus(const MCSubtargetInfo &STI)
bool hasMAIInsts(const MCSubtargetInfo &STI)
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
bool isKernelCC(const Function *Func)
bool isGenericAtomic(unsigned Opc)
Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt)
bool isGFX8Plus(const MCSubtargetInfo &STI)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
unsigned getLgkmcntBitMask(const IsaVersion &Version)
bool getMUBUFTfe(unsigned Opc)
bool isSGPR(unsigned Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
bool isHi(unsigned Reg, const MCRegisterInfo &MRI)
unsigned getBvhcntBitMask(const IsaVersion &Version)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasGFX10_3Insts(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements)
unsigned getExpcntBitMask(const IsaVersion &Version)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
bool getMUBUFHasSoffset(unsigned Opc)
bool isNotGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
std::optional< unsigned > getInlineEncodingV2F16(uint32_t Literal)
bool isInlineValue(unsigned Reg)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isShader(CallingConv::ID cc)
unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion)
static unsigned getDefaultCustomOperandEncoding(const CustomOperandVal *Opr, int Size, const MCSubtargetInfo &STI)
static unsigned encodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Loadcnt)
bool isGFX10Plus(const MCSubtargetInfo &STI)
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size, unsigned Code, int &Idx, StringRef &Name, unsigned &Val, bool &IsDefault, const MCSubtargetInfo &STI)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
bool isGlobalSegment(const GlobalValue *GV)
@ OPERAND_KIMM_LAST
Definition: SIDefines.h:269
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition: SIDefines.h:234
@ OPERAND_REG_INLINE_C_LAST
Definition: SIDefines.h:260
@ OPERAND_REG_IMM_V2FP16
Definition: SIDefines.h:211
@ OPERAND_REG_INLINE_C_FP64
Definition: SIDefines.h:223
@ OPERAND_REG_INLINE_C_V2BF16
Definition: SIDefines.h:225
@ OPERAND_REG_IMM_V2INT16
Definition: SIDefines.h:212
@ OPERAND_REG_INLINE_AC_V2FP16
Definition: SIDefines.h:246
@ OPERAND_SRC_FIRST
Definition: SIDefines.h:265
@ OPERAND_REG_IMM_V2BF16
Definition: SIDefines.h:210
@ OPERAND_REG_INLINE_AC_FIRST
Definition: SIDefines.h:262
@ OPERAND_KIMM_FIRST
Definition: SIDefines.h:268
@ OPERAND_REG_IMM_FP16
Definition: SIDefines.h:206
@ OPERAND_REG_IMM_FP64
Definition: SIDefines.h:204
@ OPERAND_REG_INLINE_C_V2FP16
Definition: SIDefines.h:226
@ OPERAND_REG_INLINE_AC_V2INT16
Definition: SIDefines.h:244
@ OPERAND_REG_INLINE_AC_FP16
Definition: SIDefines.h:241
@ OPERAND_REG_INLINE_AC_FP32
Definition: SIDefines.h:242
@ OPERAND_REG_INLINE_AC_V2BF16
Definition: SIDefines.h:245
@ OPERAND_REG_IMM_FP32
Definition: SIDefines.h:203
@ OPERAND_REG_INLINE_C_FIRST
Definition: SIDefines.h:259
@ OPERAND_REG_INLINE_C_FP32
Definition: SIDefines.h:222
@ OPERAND_REG_INLINE_AC_LAST
Definition: SIDefines.h:263
@ OPERAND_REG_INLINE_C_V2INT16
Definition: SIDefines.h:224
@ OPERAND_REG_IMM_V2FP32
Definition: SIDefines.h:214
@ OPERAND_REG_INLINE_AC_FP64
Definition: SIDefines.h:243
@ OPERAND_REG_INLINE_C_FP16
Definition: SIDefines.h:221
@ OPERAND_REG_INLINE_C_V2FP32
Definition: SIDefines.h:228
@ OPERAND_REG_IMM_FP32_DEFERRED
Definition: SIDefines.h:209
@ OPERAND_SRC_LAST
Definition: SIDefines.h:266
@ OPERAND_REG_IMM_FP16_DEFERRED
Definition: SIDefines.h:208
void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &KernelCode, const MCSubtargetInfo *STI)
bool isNotGFX9Plus(const MCSubtargetInfo &STI)
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc)
Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt)
std::optional< unsigned > getInlineEncodingV2I16(uint32_t Literal)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
static unsigned encodeStorecntDscnt(const IsaVersion &Version, unsigned Storecnt, unsigned Dscnt)
int getMCOpcode(uint16_t Opcode, unsigned Gen)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
bool getMUBUFIsBufferInv(unsigned Opc)
std::optional< unsigned > getInlineEncodingV2BF16(uint32_t Literal)
static int encodeCustomOperand(const CustomOperandVal *Opr, int Size, const StringRef Name, int64_t InputVal, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
bool getVOP2IsSingle(unsigned Opc)
bool getMAIIsDGEMM(unsigned Opc)
Returns true if MAI operation is a double precision GEMM.
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const int OPR_ID_UNKNOWN
unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion)
int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels)
bool isModuleEntryFunctionCC(CallingConv::ID CC)
bool isNotGFX12Plus(const MCSubtargetInfo &STI)
bool getMTBUFHasVAddr(unsigned Opc)
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion)
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
unsigned getLoadcntBitMask(const IsaVersion &Version)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
static unsigned encodeDscnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Dscnt)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion)
bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI)
bool isGFX9_GFX10(const MCSubtargetInfo &STI)
int getMUBUFElements(unsigned Opc)
static unsigned encodeLoadcntDscnt(const IsaVersion &Version, unsigned Loadcnt, unsigned Dscnt)
const GcnBufferFormatInfo * getGcnBufferFormatInfo(uint8_t BitsPerComp, uint8_t NumComponents, uint8_t NumFormat, const MCSubtargetInfo &STI)
bool isGraphics(CallingConv::ID cc)
unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc)
bool isPermlane16(unsigned Opc)
bool getMUBUFHasSrsrc(unsigned Opc)
unsigned getDscntBitMask(const IsaVersion &Version)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:197
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
Definition: CallingConv.h:188
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
Definition: CallingConv.h:200
@ AMDGPU_Gfx
Used for AMD graphics targets.
Definition: CallingConv.h:232
@ AMDGPU_CS_ChainPreserve
Used on AMDGPUs to give the middle-end more control over argument placement.
Definition: CallingConv.h:249
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:206
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:191
@ AMDGPU_CS_Chain
Used on AMDGPUs to give the middle-end more control over argument placement.
Definition: CallingConv.h:245
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:194
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:144
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
Definition: CallingConv.h:218
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
Definition: CallingConv.h:213
@ ELFABIVERSION_AMDGPU_HSA_V4
Definition: ELF.h:378
@ ELFABIVERSION_AMDGPU_HSA_V5
Definition: ELF.h:379
@ ELFABIVERSION_AMDGPU_HSA_V6
Definition: ELF.h:380
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:431
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:293
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:483
@ AlwaysUniform
The result values are always uniform.
@ Default
The result values are uniform if and only if all operands are uniform.
#define N
AMD Kernel Code Object (amd_kernel_code_t).
Instruction set architecture version.
Definition: TargetParser.h:127
Represents the counter values to wait for in an s_waitcnt instruction.
Description of the encoding of one expression Op.