LLVM 17.0.0git
AMDGPUBaseInfo.cpp
Go to the documentation of this file.
1//===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDGPUBaseInfo.h"
10#include "AMDGPU.h"
11#include "AMDGPUAsmUtils.h"
12#include "AMDKernelCodeT.h"
15#include "llvm/IR/Attributes.h"
16#include "llvm/IR/Constants.h"
17#include "llvm/IR/Function.h"
18#include "llvm/IR/GlobalValue.h"
19#include "llvm/IR/IntrinsicsAMDGPU.h"
20#include "llvm/IR/IntrinsicsR600.h"
21#include "llvm/IR/LLVMContext.h"
22#include "llvm/MC/MCInstrInfo.h"
28#include <optional>
29
30#define GET_INSTRINFO_NAMED_OPS
31#define GET_INSTRMAP_INFO
32#include "AMDGPUGenInstrInfo.inc"
33
35 AmdhsaCodeObjectVersion("amdhsa-code-object-version", llvm::cl::Hidden,
36 llvm::cl::desc("AMDHSA Code Object Version"),
38
39namespace {
40
41/// \returns Bit mask for given bit \p Shift and bit \p Width.
42unsigned getBitMask(unsigned Shift, unsigned Width) {
43 return ((1 << Width) - 1) << Shift;
44}
45
46/// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
47///
48/// \returns Packed \p Dst.
49unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
50 unsigned Mask = getBitMask(Shift, Width);
51 return ((Src << Shift) & Mask) | (Dst & ~Mask);
52}
53
54/// Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
55///
56/// \returns Unpacked bits.
57unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
58 return (Src & getBitMask(Shift, Width)) >> Shift;
59}
60
61/// \returns Vmcnt bit shift (lower bits).
62unsigned getVmcntBitShiftLo(unsigned VersionMajor) {
63 return VersionMajor >= 11 ? 10 : 0;
64}
65
66/// \returns Vmcnt bit width (lower bits).
67unsigned getVmcntBitWidthLo(unsigned VersionMajor) {
68 return VersionMajor >= 11 ? 6 : 4;
69}
70
71/// \returns Expcnt bit shift.
72unsigned getExpcntBitShift(unsigned VersionMajor) {
73 return VersionMajor >= 11 ? 0 : 4;
74}
75
76/// \returns Expcnt bit width.
77unsigned getExpcntBitWidth(unsigned VersionMajor) { return 3; }
78
79/// \returns Lgkmcnt bit shift.
80unsigned getLgkmcntBitShift(unsigned VersionMajor) {
81 return VersionMajor >= 11 ? 4 : 8;
82}
83
84/// \returns Lgkmcnt bit width.
85unsigned getLgkmcntBitWidth(unsigned VersionMajor) {
86 return VersionMajor >= 10 ? 6 : 4;
87}
88
89/// \returns Vmcnt bit shift (higher bits).
90unsigned getVmcntBitShiftHi(unsigned VersionMajor) { return 14; }
91
92/// \returns Vmcnt bit width (higher bits).
93unsigned getVmcntBitWidthHi(unsigned VersionMajor) {
94 return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0;
95}
96
97} // end namespace anonymous
98
99namespace llvm {
100
101namespace AMDGPU {
102
103std::optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI) {
104 if (STI && STI->getTargetTriple().getOS() != Triple::AMDHSA)
105 return std::nullopt;
106
107 switch (AmdhsaCodeObjectVersion) {
108 case 2:
110 case 3:
112 case 4:
114 case 5:
116 default:
117 report_fatal_error(Twine("Unsupported AMDHSA Code Object Version ") +
119 }
120}
121
123 if (std::optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI))
124 return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V2;
125 return false;
126}
127
129 if (std::optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI))
130 return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V3;
131 return false;
132}
133
135 if (std::optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI))
136 return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V4;
137 return false;
138}
139
141 if (std::optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI))
142 return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V5;
143 return false;
144}
145
147 return isHsaAbiVersion3(STI) || isHsaAbiVersion4(STI) ||
148 isHsaAbiVersion5(STI);
149}
150
153}
154
155unsigned getCodeObjectVersion(const Module &M) {
156 if (auto Ver = mdconst::extract_or_null<ConstantInt>(
157 M.getModuleFlag("amdgpu_code_object_version"))) {
158 return (unsigned)Ver->getZExtValue() / 100;
159 }
160
161 // Default code object version.
162 return AMDHSA_COV4;
163}
164
165unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) {
166 switch (CodeObjectVersion) {
167 case AMDHSA_COV2:
168 case AMDHSA_COV3:
169 case AMDHSA_COV4:
170 return 48;
171 case AMDHSA_COV5:
172 default:
174 }
175}
176
177
178// FIXME: All such magic numbers about the ABI should be in a
179// central TD file.
180unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) {
181 switch (CodeObjectVersion) {
182 case AMDHSA_COV2:
183 case AMDHSA_COV3:
184 case AMDHSA_COV4:
185 return 24;
186 case AMDHSA_COV5:
187 default:
189 }
190}
191
192unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) {
193 switch (CodeObjectVersion) {
194 case AMDHSA_COV2:
195 case AMDHSA_COV3:
196 case AMDHSA_COV4:
197 return 32;
198 case AMDHSA_COV5:
199 default:
201 }
202}
203
204unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion) {
205 switch (CodeObjectVersion) {
206 case AMDHSA_COV2:
207 case AMDHSA_COV3:
208 case AMDHSA_COV4:
209 return 40;
210 case AMDHSA_COV5:
211 default:
213 }
214}
215
216#define GET_MIMGBaseOpcodesTable_IMPL
217#define GET_MIMGDimInfoTable_IMPL
218#define GET_MIMGInfoTable_IMPL
219#define GET_MIMGLZMappingTable_IMPL
220#define GET_MIMGMIPMappingTable_IMPL
221#define GET_MIMGBiasMappingTable_IMPL
222#define GET_MIMGOffsetMappingTable_IMPL
223#define GET_MIMGG16MappingTable_IMPL
224#define GET_MAIInstInfoTable_IMPL
225#include "AMDGPUGenSearchableTables.inc"
226
227int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
228 unsigned VDataDwords, unsigned VAddrDwords) {
229 const MIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding,
230 VDataDwords, VAddrDwords);
231 return Info ? Info->Opcode : -1;
232}
233
235 const MIMGInfo *Info = getMIMGInfo(Opc);
236 return Info ? getMIMGBaseOpcodeInfo(Info->BaseOpcode) : nullptr;
237}
238
239int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
240 const MIMGInfo *OrigInfo = getMIMGInfo(Opc);
241 const MIMGInfo *NewInfo =
242 getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding,
243 NewChannels, OrigInfo->VAddrDwords);
244 return NewInfo ? NewInfo->Opcode : -1;
245}
246
247unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
248 const MIMGDimInfo *Dim, bool IsA16,
249 bool IsG16Supported) {
250 unsigned AddrWords = BaseOpcode->NumExtraArgs;
251 unsigned AddrComponents = (BaseOpcode->Coordinates ? Dim->NumCoords : 0) +
252 (BaseOpcode->LodOrClampOrMip ? 1 : 0);
253 if (IsA16)
254 AddrWords += divideCeil(AddrComponents, 2);
255 else
256 AddrWords += AddrComponents;
257
258 // Note: For subtargets that support A16 but not G16, enabling A16 also
259 // enables 16 bit gradients.
260 // For subtargets that support A16 (operand) and G16 (done with a different
261 // instruction encoding), they are independent.
262
263 if (BaseOpcode->Gradients) {
264 if ((IsA16 && !IsG16Supported) || BaseOpcode->G16)
265 // There are two gradients per coordinate, we pack them separately.
266 // For the 3d case,
267 // we get (dy/du, dx/du) (-, dz/du) (dy/dv, dx/dv) (-, dz/dv)
268 AddrWords += alignTo<2>(Dim->NumGradients / 2);
269 else
270 AddrWords += Dim->NumGradients;
271 }
272 return AddrWords;
273}
274
275struct MUBUFInfo {
278 uint8_t elements;
283};
284
285struct MTBUFInfo {
288 uint8_t elements;
292};
293
294struct SMInfo {
297};
298
299struct VOPInfo {
302};
303
306};
307
312};
313
314struct VOPDInfo {
318};
319
323};
324
325#define GET_MTBUFInfoTable_DECL
326#define GET_MTBUFInfoTable_IMPL
327#define GET_MUBUFInfoTable_DECL
328#define GET_MUBUFInfoTable_IMPL
329#define GET_SMInfoTable_DECL
330#define GET_SMInfoTable_IMPL
331#define GET_VOP1InfoTable_DECL
332#define GET_VOP1InfoTable_IMPL
333#define GET_VOP2InfoTable_DECL
334#define GET_VOP2InfoTable_IMPL
335#define GET_VOP3InfoTable_DECL
336#define GET_VOP3InfoTable_IMPL
337#define GET_VOPC64DPPTable_DECL
338#define GET_VOPC64DPPTable_IMPL
339#define GET_VOPC64DPP8Table_DECL
340#define GET_VOPC64DPP8Table_IMPL
341#define GET_VOPDComponentTable_DECL
342#define GET_VOPDComponentTable_IMPL
343#define GET_VOPDPairs_DECL
344#define GET_VOPDPairs_IMPL
345#define GET_VOPTrue16Table_DECL
346#define GET_VOPTrue16Table_IMPL
347#define GET_WMMAOpcode2AddrMappingTable_DECL
348#define GET_WMMAOpcode2AddrMappingTable_IMPL
349#define GET_WMMAOpcode3AddrMappingTable_DECL
350#define GET_WMMAOpcode3AddrMappingTable_IMPL
351#include "AMDGPUGenSearchableTables.inc"
352
353int getMTBUFBaseOpcode(unsigned Opc) {
354 const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opc);
355 return Info ? Info->BaseOpcode : -1;
356}
357
358int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) {
359 const MTBUFInfo *Info = getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
360 return Info ? Info->Opcode : -1;
361}
362
363int getMTBUFElements(unsigned Opc) {
364 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
365 return Info ? Info->elements : 0;
366}
367
368bool getMTBUFHasVAddr(unsigned Opc) {
369 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
370 return Info ? Info->has_vaddr : false;
371}
372
373bool getMTBUFHasSrsrc(unsigned Opc) {
374 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
375 return Info ? Info->has_srsrc : false;
376}
377
378bool getMTBUFHasSoffset(unsigned Opc) {
379 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
380 return Info ? Info->has_soffset : false;
381}
382
383int getMUBUFBaseOpcode(unsigned Opc) {
384 const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc);
385 return Info ? Info->BaseOpcode : -1;
386}
387
388int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements) {
389 const MUBUFInfo *Info = getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
390 return Info ? Info->Opcode : -1;
391}
392
393int getMUBUFElements(unsigned Opc) {
394 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
395 return Info ? Info->elements : 0;
396}
397
398bool getMUBUFHasVAddr(unsigned Opc) {
399 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
400 return Info ? Info->has_vaddr : false;
401}
402
403bool getMUBUFHasSrsrc(unsigned Opc) {
404 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
405 return Info ? Info->has_srsrc : false;
406}
407
408bool getMUBUFHasSoffset(unsigned Opc) {
409 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
410 return Info ? Info->has_soffset : false;
411}
412
413bool getMUBUFIsBufferInv(unsigned Opc) {
414 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
415 return Info ? Info->IsBufferInv : false;
416}
417
418bool getSMEMIsBuffer(unsigned Opc) {
419 const SMInfo *Info = getSMEMOpcodeHelper(Opc);
420 return Info ? Info->IsBuffer : false;
421}
422
423bool getVOP1IsSingle(unsigned Opc) {
424 const VOPInfo *Info = getVOP1OpcodeHelper(Opc);
425 return Info ? Info->IsSingle : false;
426}
427
428bool getVOP2IsSingle(unsigned Opc) {
429 const VOPInfo *Info = getVOP2OpcodeHelper(Opc);
430 return Info ? Info->IsSingle : false;
431}
432
433bool getVOP3IsSingle(unsigned Opc) {
434 const VOPInfo *Info = getVOP3OpcodeHelper(Opc);
435 return Info ? Info->IsSingle : false;
436}
437
438bool isVOPC64DPP(unsigned Opc) {
439 return isVOPC64DPPOpcodeHelper(Opc) || isVOPC64DPP8OpcodeHelper(Opc);
440}
441
442bool getMAIIsDGEMM(unsigned Opc) {
443 const MAIInstInfo *Info = getMAIInstInfoHelper(Opc);
444 return Info ? Info->is_dgemm : false;
445}
446
447bool getMAIIsGFX940XDL(unsigned Opc) {
448 const MAIInstInfo *Info = getMAIInstInfoHelper(Opc);
449 return Info ? Info->is_gfx940_xdl : false;
450}
451
452CanBeVOPD getCanBeVOPD(unsigned Opc) {
453 const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc);
454 if (Info)
455 return {Info->CanBeVOPDX, true};
456 else
457 return {false, false};
458}
459
460unsigned getVOPDOpcode(unsigned Opc) {
461 const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc);
462 return Info ? Info->VOPDOp : ~0u;
463}
464
465bool isVOPD(unsigned Opc) {
466 return AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0X);
467}
468
469bool isMAC(unsigned Opc) {
470 return Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
471 Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
472 Opc == AMDGPU::V_MAC_F32_e64_vi ||
473 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
474 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
475 Opc == AMDGPU::V_MAC_F16_e64_vi ||
476 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
477 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
478 Opc == AMDGPU::V_FMAC_F32_e64_gfx11 ||
479 Opc == AMDGPU::V_FMAC_F32_e64_vi ||
480 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
481 Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
482 Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||
483 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11 ||
484 Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi ||
485 Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi ||
486 Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi ||
487 Opc == AMDGPU::V_DOT8C_I32_I4_e64_vi;
488}
489
490bool isPermlane16(unsigned Opc) {
491 return Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
492 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10 ||
493 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx11 ||
494 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx11;
495}
496
497bool isTrue16Inst(unsigned Opc) {
498 const VOPTrue16Info *Info = getTrue16OpcodeHelper(Opc);
499 return Info ? Info->IsTrue16 : false;
500}
501
502unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) {
503 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom2AddrOpcode(Opc);
504 return Info ? Info->Opcode3Addr : ~0u;
505}
506
507unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc) {
508 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom3AddrOpcode(Opc);
509 return Info ? Info->Opcode2Addr : ~0u;
510}
511
512// Wrapper for Tablegen'd function. enum Subtarget is not defined in any
513// header files, so we need to wrap it in a function that takes unsigned
514// instead.
515int getMCOpcode(uint16_t Opcode, unsigned Gen) {
516 return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
517}
518
519int getVOPDFull(unsigned OpX, unsigned OpY) {
520 const VOPDInfo *Info = getVOPDInfoFromComponentOpcodes(OpX, OpY);
521 return Info ? Info->Opcode : -1;
522}
523
524std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode) {
525 const VOPDInfo *Info = getVOPDOpcodeHelper(VOPDOpcode);
526 assert(Info);
527 auto OpX = getVOPDBaseFromComponent(Info->OpX);
528 auto OpY = getVOPDBaseFromComponent(Info->OpY);
529 assert(OpX && OpY);
530 return {OpX->BaseVOP, OpY->BaseVOP};
531}
532
533namespace VOPD {
534
537
540 auto TiedIdx = OpDesc.getOperandConstraint(Component::SRC2, MCOI::TIED_TO);
541 assert(TiedIdx == -1 || TiedIdx == Component::DST);
542 HasSrc2Acc = TiedIdx != -1;
543
544 SrcOperandsNum = OpDesc.getNumOperands() - OpDesc.getNumDefs();
545 assert(SrcOperandsNum <= Component::MAX_SRC_NUM);
546
547 auto OperandsNum = OpDesc.getNumOperands();
548 unsigned CompOprIdx;
549 for (CompOprIdx = Component::SRC1; CompOprIdx < OperandsNum; ++CompOprIdx) {
550 if (OpDesc.operands()[CompOprIdx].OperandType == AMDGPU::OPERAND_KIMM32) {
551 MandatoryLiteralIdx = CompOprIdx;
552 break;
553 }
554 }
555}
556
557unsigned ComponentInfo::getIndexInParsedOperands(unsigned CompOprIdx) const {
558 assert(CompOprIdx < Component::MAX_OPR_NUM);
559
560 if (CompOprIdx == Component::DST)
562
563 auto CompSrcIdx = CompOprIdx - Component::DST_NUM;
564 if (CompSrcIdx < getCompParsedSrcOperandsNum())
565 return getIndexOfSrcInParsedOperands(CompSrcIdx);
566
567 // The specified operand does not exist.
568 return 0;
569}
570
572 std::function<unsigned(unsigned, unsigned)> GetRegIdx) const {
573
574 auto OpXRegs = getRegIndices(ComponentIndex::X, GetRegIdx);
575 auto OpYRegs = getRegIndices(ComponentIndex::Y, GetRegIdx);
576
577 unsigned CompOprIdx;
578 for (CompOprIdx = 0; CompOprIdx < Component::MAX_OPR_NUM; ++CompOprIdx) {
579 unsigned BanksNum = BANKS_NUM[CompOprIdx];
580 if (OpXRegs[CompOprIdx] && OpYRegs[CompOprIdx] &&
581 (OpXRegs[CompOprIdx] % BanksNum == OpYRegs[CompOprIdx] % BanksNum))
582 return CompOprIdx;
583 }
584
585 return {};
586}
587
588// Return an array of VGPR registers [DST,SRC0,SRC1,SRC2] used
589// by the specified component. If an operand is unused
590// or is not a VGPR, the corresponding value is 0.
591//
592// GetRegIdx(Component, MCOperandIdx) must return a VGPR register index
593// for the specified component and MC operand. The callback must return 0
594// if the operand is not a register or not a VGPR.
595InstInfo::RegIndices InstInfo::getRegIndices(
596 unsigned CompIdx,
597 std::function<unsigned(unsigned, unsigned)> GetRegIdx) const {
598 assert(CompIdx < COMPONENTS_NUM);
599
600 const auto &Comp = CompInfo[CompIdx];
602
603 RegIndices[DST] = GetRegIdx(CompIdx, Comp.getIndexOfDstInMCOperands());
604
605 for (unsigned CompOprIdx : {SRC0, SRC1, SRC2}) {
606 unsigned CompSrcIdx = CompOprIdx - DST_NUM;
607 RegIndices[CompOprIdx] =
608 Comp.hasRegSrcOperand(CompSrcIdx)
609 ? GetRegIdx(CompIdx, Comp.getIndexOfSrcInMCOperands(CompSrcIdx))
610 : 0;
611 }
612 return RegIndices;
613}
614
615} // namespace VOPD
616
618 return VOPD::InstInfo(OpX, OpY);
619}
620
621VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode,
622 const MCInstrInfo *InstrInfo) {
623 auto [OpX, OpY] = getVOPDComponents(VOPDOpcode);
624 const auto &OpXDesc = InstrInfo->get(OpX);
625 const auto &OpYDesc = InstrInfo->get(OpY);
627 VOPD::ComponentInfo OpYInfo(OpYDesc, OpXInfo);
628 return VOPD::InstInfo(OpXInfo, OpYInfo);
629}
630
631namespace IsaInfo {
632
634 : STI(STI), XnackSetting(TargetIDSetting::Any),
635 SramEccSetting(TargetIDSetting::Any), CodeObjectVersion(0) {
636 if (!STI.getFeatureBits().test(FeatureSupportsXNACK))
637 XnackSetting = TargetIDSetting::Unsupported;
638 if (!STI.getFeatureBits().test(FeatureSupportsSRAMECC))
639 SramEccSetting = TargetIDSetting::Unsupported;
640}
641
643 // Check if xnack or sramecc is explicitly enabled or disabled. In the
644 // absence of the target features we assume we must generate code that can run
645 // in any environment.
646 SubtargetFeatures Features(FS);
647 std::optional<bool> XnackRequested;
648 std::optional<bool> SramEccRequested;
649
650 for (const std::string &Feature : Features.getFeatures()) {
651 if (Feature == "+xnack")
652 XnackRequested = true;
653 else if (Feature == "-xnack")
654 XnackRequested = false;
655 else if (Feature == "+sramecc")
656 SramEccRequested = true;
657 else if (Feature == "-sramecc")
658 SramEccRequested = false;
659 }
660
661 bool XnackSupported = isXnackSupported();
662 bool SramEccSupported = isSramEccSupported();
663
664 if (XnackRequested) {
665 if (XnackSupported) {
666 XnackSetting =
667 *XnackRequested ? TargetIDSetting::On : TargetIDSetting::Off;
668 } else {
669 // If a specific xnack setting was requested and this GPU does not support
670 // xnack emit a warning. Setting will remain set to "Unsupported".
671 if (*XnackRequested) {
672 errs() << "warning: xnack 'On' was requested for a processor that does "
673 "not support it!\n";
674 } else {
675 errs() << "warning: xnack 'Off' was requested for a processor that "
676 "does not support it!\n";
677 }
678 }
679 }
680
681 if (SramEccRequested) {
682 if (SramEccSupported) {
683 SramEccSetting =
684 *SramEccRequested ? TargetIDSetting::On : TargetIDSetting::Off;
685 } else {
686 // If a specific sramecc setting was requested and this GPU does not
687 // support sramecc emit a warning. Setting will remain set to
688 // "Unsupported".
689 if (*SramEccRequested) {
690 errs() << "warning: sramecc 'On' was requested for a processor that "
691 "does not support it!\n";
692 } else {
693 errs() << "warning: sramecc 'Off' was requested for a processor that "
694 "does not support it!\n";
695 }
696 }
697 }
698}
699
700static TargetIDSetting
702 if (FeatureString.endswith("-"))
704 if (FeatureString.endswith("+"))
705 return TargetIDSetting::On;
706
707 llvm_unreachable("Malformed feature string");
708}
709
711 SmallVector<StringRef, 3> TargetIDSplit;
712 TargetID.split(TargetIDSplit, ':');
713
714 for (const auto &FeatureString : TargetIDSplit) {
715 if (FeatureString.startswith("xnack"))
716 XnackSetting = getTargetIDSettingFromFeatureString(FeatureString);
717 if (FeatureString.startswith("sramecc"))
718 SramEccSetting = getTargetIDSettingFromFeatureString(FeatureString);
719 }
720}
721
722std::string AMDGPUTargetID::toString() const {
723 std::string StringRep;
724 raw_string_ostream StreamRep(StringRep);
725
726 auto TargetTriple = STI.getTargetTriple();
727 auto Version = getIsaVersion(STI.getCPU());
728
729 StreamRep << TargetTriple.getArchName() << '-'
730 << TargetTriple.getVendorName() << '-'
731 << TargetTriple.getOSName() << '-'
732 << TargetTriple.getEnvironmentName() << '-';
733
734 std::string Processor;
735 // TODO: Following else statement is present here because we used various
736 // alias names for GPUs up until GFX9 (e.g. 'fiji' is same as 'gfx803').
737 // Remove once all aliases are removed from GCNProcessors.td.
738 if (Version.Major >= 9)
739 Processor = STI.getCPU().str();
740 else
741 Processor = (Twine("gfx") + Twine(Version.Major) + Twine(Version.Minor) +
742 Twine(Version.Stepping))
743 .str();
744
745 std::string Features;
746 if (STI.getTargetTriple().getOS() == Triple::AMDHSA) {
747 switch (CodeObjectVersion) {
749 // Code object V2 only supported specific processors and had fixed
750 // settings for the XNACK.
751 if (Processor == "gfx600") {
752 } else if (Processor == "gfx601") {
753 } else if (Processor == "gfx602") {
754 } else if (Processor == "gfx700") {
755 } else if (Processor == "gfx701") {
756 } else if (Processor == "gfx702") {
757 } else if (Processor == "gfx703") {
758 } else if (Processor == "gfx704") {
759 } else if (Processor == "gfx705") {
760 } else if (Processor == "gfx801") {
761 if (!isXnackOnOrAny())
763 "AMD GPU code object V2 does not support processor " +
764 Twine(Processor) + " without XNACK");
765 } else if (Processor == "gfx802") {
766 } else if (Processor == "gfx803") {
767 } else if (Processor == "gfx805") {
768 } else if (Processor == "gfx810") {
769 if (!isXnackOnOrAny())
771 "AMD GPU code object V2 does not support processor " +
772 Twine(Processor) + " without XNACK");
773 } else if (Processor == "gfx900") {
774 if (isXnackOnOrAny())
775 Processor = "gfx901";
776 } else if (Processor == "gfx902") {
777 if (isXnackOnOrAny())
778 Processor = "gfx903";
779 } else if (Processor == "gfx904") {
780 if (isXnackOnOrAny())
781 Processor = "gfx905";
782 } else if (Processor == "gfx906") {
783 if (isXnackOnOrAny())
784 Processor = "gfx907";
785 } else if (Processor == "gfx90c") {
786 if (isXnackOnOrAny())
788 "AMD GPU code object V2 does not support processor " +
789 Twine(Processor) + " with XNACK being ON or ANY");
790 } else {
792 "AMD GPU code object V2 does not support processor " +
793 Twine(Processor));
794 }
795 break;
797 // xnack.
798 if (isXnackOnOrAny())
799 Features += "+xnack";
800 // In code object v2 and v3, "sramecc" feature was spelled with a
801 // hyphen ("sram-ecc").
802 if (isSramEccOnOrAny())
803 Features += "+sram-ecc";
804 break;
807 // sramecc.
809 Features += ":sramecc-";
811 Features += ":sramecc+";
812 // xnack.
814 Features += ":xnack-";
816 Features += ":xnack+";
817 break;
818 default:
819 break;
820 }
821 }
822
823 StreamRep << Processor << Features;
824
825 StreamRep.flush();
826 return StringRep;
827}
828
829unsigned getWavefrontSize(const MCSubtargetInfo *STI) {
830 if (STI->getFeatureBits().test(FeatureWavefrontSize16))
831 return 16;
832 if (STI->getFeatureBits().test(FeatureWavefrontSize32))
833 return 32;
834
835 return 64;
836}
837
839 unsigned BytesPerCU = 0;
840 if (STI->getFeatureBits().test(FeatureLocalMemorySize32768))
841 BytesPerCU = 32768;
842 if (STI->getFeatureBits().test(FeatureLocalMemorySize65536))
843 BytesPerCU = 65536;
844
845 // "Per CU" really means "per whatever functional block the waves of a
846 // workgroup must share". So the effective local memory size is doubled in
847 // WGP mode on gfx10.
848 if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode))
849 BytesPerCU *= 2;
850
851 return BytesPerCU;
852}
853
855 if (STI->getFeatureBits().test(FeatureLocalMemorySize32768))
856 return 32768;
857 if (STI->getFeatureBits().test(FeatureLocalMemorySize65536))
858 return 65536;
859 return 0;
860}
861
862unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
863 // "Per CU" really means "per whatever functional block the waves of a
864 // workgroup must share". For gfx10 in CU mode this is the CU, which contains
865 // two SIMDs.
866 if (isGFX10Plus(*STI) && STI->getFeatureBits().test(FeatureCuMode))
867 return 2;
868 // Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP contains
869 // two CUs, so a total of four SIMDs.
870 return 4;
871}
872
874 unsigned FlatWorkGroupSize) {
875 assert(FlatWorkGroupSize != 0);
876 if (STI->getTargetTriple().getArch() != Triple::amdgcn)
877 return 8;
878 unsigned MaxWaves = getMaxWavesPerEU(STI) * getEUsPerCU(STI);
879 unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
880 if (N == 1) {
881 // Single-wave workgroups don't consume barrier resources.
882 return MaxWaves;
883 }
884
885 unsigned MaxBarriers = 16;
886 if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode))
887 MaxBarriers = 32;
888
889 return std::min(MaxWaves / N, MaxBarriers);
890}
891
892unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) {
893 return 1;
894}
895
896unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) {
897 // FIXME: Need to take scratch memory into account.
898 if (isGFX90A(*STI))
899 return 8;
900 if (!isGFX10Plus(*STI))
901 return 10;
902 return hasGFX10_3Insts(*STI) ? 16 : 20;
903}
904
906 unsigned FlatWorkGroupSize) {
907 return divideCeil(getWavesPerWorkGroup(STI, FlatWorkGroupSize),
908 getEUsPerCU(STI));
909}
910
912 return 1;
913}
914
916 // Some subtargets allow encoding 2048, but this isn't tested or supported.
917 return 1024;
918}
919
921 unsigned FlatWorkGroupSize) {
922 return divideCeil(FlatWorkGroupSize, getWavefrontSize(STI));
923}
924
926 IsaVersion Version = getIsaVersion(STI->getCPU());
927 if (Version.Major >= 10)
928 return getAddressableNumSGPRs(STI);
929 if (Version.Major >= 8)
930 return 16;
931 return 8;
932}
933
935 return 8;
936}
937
938unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) {
939 IsaVersion Version = getIsaVersion(STI->getCPU());
940 if (Version.Major >= 8)
941 return 800;
942 return 512;
943}
944
946 if (STI->getFeatureBits().test(FeatureSGPRInitBug))
948
949 IsaVersion Version = getIsaVersion(STI->getCPU());
950 if (Version.Major >= 10)
951 return 106;
952 if (Version.Major >= 8)
953 return 102;
954 return 104;
955}
956
957unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
958 assert(WavesPerEU != 0);
959
960 IsaVersion Version = getIsaVersion(STI->getCPU());
961 if (Version.Major >= 10)
962 return 0;
963
964 if (WavesPerEU >= getMaxWavesPerEU(STI))
965 return 0;
966
967 unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
968 if (STI->getFeatureBits().test(FeatureTrapHandler))
969 MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
970 MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1;
971 return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI));
972}
973
974unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
975 bool Addressable) {
976 assert(WavesPerEU != 0);
977
978 unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
979 IsaVersion Version = getIsaVersion(STI->getCPU());
980 if (Version.Major >= 10)
981 return Addressable ? AddressableNumSGPRs : 108;
982 if (Version.Major >= 8 && !Addressable)
983 AddressableNumSGPRs = 112;
984 unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
985 if (STI->getFeatureBits().test(FeatureTrapHandler))
986 MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
987 MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI));
988 return std::min(MaxNumSGPRs, AddressableNumSGPRs);
989}
990
991unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
992 bool FlatScrUsed, bool XNACKUsed) {
993 unsigned ExtraSGPRs = 0;
994 if (VCCUsed)
995 ExtraSGPRs = 2;
996
997 IsaVersion Version = getIsaVersion(STI->getCPU());
998 if (Version.Major >= 10)
999 return ExtraSGPRs;
1000
1001 if (Version.Major < 8) {
1002 if (FlatScrUsed)
1003 ExtraSGPRs = 4;
1004 } else {
1005 if (XNACKUsed)
1006 ExtraSGPRs = 4;
1007
1008 if (FlatScrUsed ||
1009 STI->getFeatureBits().test(AMDGPU::FeatureArchitectedFlatScratch))
1010 ExtraSGPRs = 6;
1011 }
1012
1013 return ExtraSGPRs;
1014}
1015
1016unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
1017 bool FlatScrUsed) {
1018 return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
1019 STI->getFeatureBits().test(AMDGPU::FeatureXNACK));
1020}
1021
1022unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
1023 NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(STI));
1024 // SGPRBlocks is actual number of SGPR blocks minus 1.
1025 return NumSGPRs / getSGPREncodingGranule(STI) - 1;
1026}
1027
1029 std::optional<bool> EnableWavefrontSize32) {
1030 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1031 return 8;
1032
1033 bool IsWave32 = EnableWavefrontSize32 ?
1034 *EnableWavefrontSize32 :
1035 STI->getFeatureBits().test(FeatureWavefrontSize32);
1036
1037 if (STI->getFeatureBits().test(FeatureGFX11FullVGPRs))
1038 return IsWave32 ? 24 : 12;
1039
1040 if (hasGFX10_3Insts(*STI))
1041 return IsWave32 ? 16 : 8;
1042
1043 return IsWave32 ? 8 : 4;
1044}
1045
1047 std::optional<bool> EnableWavefrontSize32) {
1048 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1049 return 8;
1050
1051 bool IsWave32 = EnableWavefrontSize32 ?
1052 *EnableWavefrontSize32 :
1053 STI->getFeatureBits().test(FeatureWavefrontSize32);
1054
1055 return IsWave32 ? 8 : 4;
1056}
1057
1058unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
1059 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1060 return 512;
1061 if (!isGFX10Plus(*STI))
1062 return 256;
1063 bool IsWave32 = STI->getFeatureBits().test(FeatureWavefrontSize32);
1064 if (STI->getFeatureBits().test(FeatureGFX11FullVGPRs))
1065 return IsWave32 ? 1536 : 768;
1066 return IsWave32 ? 1024 : 512;
1067}
1068
1070 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1071 return 512;
1072 return 256;
1073}
1074
1076 unsigned NumVGPRs) {
1077 unsigned MaxWaves = getMaxWavesPerEU(STI);
1078 unsigned Granule = getVGPRAllocGranule(STI);
1079 if (NumVGPRs < Granule)
1080 return MaxWaves;
1081 unsigned RoundedRegs = alignTo(NumVGPRs, Granule);
1082 return std::min(std::max(getTotalNumVGPRs(STI) / RoundedRegs, 1u), MaxWaves);
1083}
1084
1085unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
1086 assert(WavesPerEU != 0);
1087
1088 unsigned MaxWavesPerEU = getMaxWavesPerEU(STI);
1089 if (WavesPerEU >= MaxWavesPerEU)
1090 return 0;
1091
1092 unsigned TotNumVGPRs = getTotalNumVGPRs(STI);
1093 unsigned AddrsableNumVGPRs = getAddressableNumVGPRs(STI);
1094 unsigned Granule = getVGPRAllocGranule(STI);
1095 unsigned MaxNumVGPRs = alignDown(TotNumVGPRs / WavesPerEU, Granule);
1096
1097 if (MaxNumVGPRs == alignDown(TotNumVGPRs / MaxWavesPerEU, Granule))
1098 return 0;
1099
1100 unsigned MinWavesPerEU = getNumWavesPerEUWithNumVGPRs(STI, AddrsableNumVGPRs);
1101 if (WavesPerEU < MinWavesPerEU)
1102 return getMinNumVGPRs(STI, MinWavesPerEU);
1103
1104 unsigned MaxNumVGPRsNext = alignDown(TotNumVGPRs / (WavesPerEU + 1), Granule);
1105 unsigned MinNumVGPRs = 1 + std::min(MaxNumVGPRs - Granule, MaxNumVGPRsNext);
1106 return std::min(MinNumVGPRs, AddrsableNumVGPRs);
1107}
1108
1109unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
1110 assert(WavesPerEU != 0);
1111
1112 unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU,
1113 getVGPRAllocGranule(STI));
1114 unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI);
1115 return std::min(MaxNumVGPRs, AddressableNumVGPRs);
1116}
1117
1118unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs,
1119 std::optional<bool> EnableWavefrontSize32) {
1120 NumVGPRs = alignTo(std::max(1u, NumVGPRs),
1121 getVGPREncodingGranule(STI, EnableWavefrontSize32));
1122 // VGPRBlocks is actual number of VGPR blocks minus 1.
1123 return NumVGPRs / getVGPREncodingGranule(STI, EnableWavefrontSize32) - 1;
1124}
1125
1126} // end namespace IsaInfo
1127
1129 const MCSubtargetInfo *STI) {
1130 IsaVersion Version = getIsaVersion(STI->getCPU());
1131
1132 memset(&Header, 0, sizeof(Header));
1133
1134 Header.amd_kernel_code_version_major = 1;
1135 Header.amd_kernel_code_version_minor = 2;
1136 Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
1137 Header.amd_machine_version_major = Version.Major;
1138 Header.amd_machine_version_minor = Version.Minor;
1139 Header.amd_machine_version_stepping = Version.Stepping;
1140 Header.kernel_code_entry_byte_offset = sizeof(Header);
1141 Header.wavefront_size = 6;
1142
1143 // If the code object does not support indirect functions, then the value must
1144 // be 0xffffffff.
1145 Header.call_convention = -1;
1146
1147 // These alignment values are specified in powers of two, so alignment =
1148 // 2^n. The minimum alignment is 2^4 = 16.
1149 Header.kernarg_segment_alignment = 4;
1150 Header.group_segment_alignment = 4;
1151 Header.private_segment_alignment = 4;
1152
1153 if (Version.Major >= 10) {
1154 if (STI->getFeatureBits().test(FeatureWavefrontSize32)) {
1155 Header.wavefront_size = 5;
1156 Header.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
1157 }
1158 Header.compute_pgm_resource_registers |=
1159 S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
1161 }
1162}
1163
1165 const MCSubtargetInfo *STI) {
1166 IsaVersion Version = getIsaVersion(STI->getCPU());
1167
1169 memset(&KD, 0, sizeof(KD));
1170
1172 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
1175 amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, 1);
1177 amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 1);
1179 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1);
1180 if (Version.Major >= 10) {
1182 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
1183 STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1 : 0);
1185 amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE,
1186 STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1);
1188 amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED, 1);
1189 }
1190 if (AMDGPU::isGFX90A(*STI)) {
1192 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
1193 STI->getFeatureBits().test(FeatureTgSplit) ? 1 : 0);
1194 }
1195 return KD;
1196}
1197
1200}
1201
1204}
1205
1207 unsigned AS = GV->getAddressSpace();
1208 return AS == AMDGPUAS::CONSTANT_ADDRESS ||
1210}
1211
1213 return TT.getArch() == Triple::r600;
1214}
1215
1216std::pair<int, int> getIntegerPairAttribute(const Function &F,
1218 std::pair<int, int> Default,
1219 bool OnlyFirstRequired) {
1220 Attribute A = F.getFnAttribute(Name);
1221 if (!A.isStringAttribute())
1222 return Default;
1223
1224 LLVMContext &Ctx = F.getContext();
1225 std::pair<int, int> Ints = Default;
1226 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
1227 if (Strs.first.trim().getAsInteger(0, Ints.first)) {
1228 Ctx.emitError("can't parse first integer attribute " + Name);
1229 return Default;
1230 }
1231 if (Strs.second.trim().getAsInteger(0, Ints.second)) {
1232 if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
1233 Ctx.emitError("can't parse second integer attribute " + Name);
1234 return Default;
1235 }
1236 }
1237
1238 return Ints;
1239}
1240
1241unsigned getVmcntBitMask(const IsaVersion &Version) {
1242 return (1 << (getVmcntBitWidthLo(Version.Major) +
1243 getVmcntBitWidthHi(Version.Major))) -
1244 1;
1245}
1246
1247unsigned getExpcntBitMask(const IsaVersion &Version) {
1248 return (1 << getExpcntBitWidth(Version.Major)) - 1;
1249}
1250
1251unsigned getLgkmcntBitMask(const IsaVersion &Version) {
1252 return (1 << getLgkmcntBitWidth(Version.Major)) - 1;
1253}
1254
1255unsigned getWaitcntBitMask(const IsaVersion &Version) {
1256 unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(Version.Major),
1257 getVmcntBitWidthLo(Version.Major));
1258 unsigned Expcnt = getBitMask(getExpcntBitShift(Version.Major),
1259 getExpcntBitWidth(Version.Major));
1260 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(Version.Major),
1261 getLgkmcntBitWidth(Version.Major));
1262 unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(Version.Major),
1263 getVmcntBitWidthHi(Version.Major));
1264 return VmcntLo | Expcnt | Lgkmcnt | VmcntHi;
1265}
1266
1267unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1268 unsigned VmcntLo = unpackBits(Waitcnt, getVmcntBitShiftLo(Version.Major),
1269 getVmcntBitWidthLo(Version.Major));
1270 unsigned VmcntHi = unpackBits(Waitcnt, getVmcntBitShiftHi(Version.Major),
1271 getVmcntBitWidthHi(Version.Major));
1272 return VmcntLo | VmcntHi << getVmcntBitWidthLo(Version.Major);
1273}
1274
1275unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
1276 return unpackBits(Waitcnt, getExpcntBitShift(Version.Major),
1277 getExpcntBitWidth(Version.Major));
1278}
1279
1280unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1281 return unpackBits(Waitcnt, getLgkmcntBitShift(Version.Major),
1282 getLgkmcntBitWidth(Version.Major));
1283}
1284
1285void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
1286 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
1287 Vmcnt = decodeVmcnt(Version, Waitcnt);
1288 Expcnt = decodeExpcnt(Version, Waitcnt);
1289 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
1290}
1291
1292Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) {
1293 Waitcnt Decoded;
1294 Decoded.VmCnt = decodeVmcnt(Version, Encoded);
1295 Decoded.ExpCnt = decodeExpcnt(Version, Encoded);
1296 Decoded.LgkmCnt = decodeLgkmcnt(Version, Encoded);
1297 return Decoded;
1298}
1299
1300unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
1301 unsigned Vmcnt) {
1302 Waitcnt = packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(Version.Major),
1303 getVmcntBitWidthLo(Version.Major));
1304 return packBits(Vmcnt >> getVmcntBitWidthLo(Version.Major), Waitcnt,
1305 getVmcntBitShiftHi(Version.Major),
1306 getVmcntBitWidthHi(Version.Major));
1307}
1308
1309unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
1310 unsigned Expcnt) {
1311 return packBits(Expcnt, Waitcnt, getExpcntBitShift(Version.Major),
1312 getExpcntBitWidth(Version.Major));
1313}
1314
1315unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
1316 unsigned Lgkmcnt) {
1317 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(Version.Major),
1318 getLgkmcntBitWidth(Version.Major));
1319}
1320
1321unsigned encodeWaitcnt(const IsaVersion &Version,
1322 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
1323 unsigned Waitcnt = getWaitcntBitMask(Version);
1324 Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
1325 Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
1326 Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
1327 return Waitcnt;
1328}
1329
1330unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) {
1331 return encodeWaitcnt(Version, Decoded.VmCnt, Decoded.ExpCnt, Decoded.LgkmCnt);
1332}
1333
1334//===----------------------------------------------------------------------===//
1335// Custom Operands.
1336//
1337// A table of custom operands shall describe "primary" operand names
1338// first followed by aliases if any. It is not required but recommended
1339// to arrange operands so that operand encoding match operand position
1340// in the table. This will make disassembly a bit more efficient.
1341// Unused slots in the table shall have an empty name.
1342//
1343//===----------------------------------------------------------------------===//
1344
1345template <class T>
1346static bool isValidOpr(int Idx, const CustomOperand<T> OpInfo[], int OpInfoSize,
1347 T Context) {
1348 return 0 <= Idx && Idx < OpInfoSize && !OpInfo[Idx].Name.empty() &&
1349 (!OpInfo[Idx].Cond || OpInfo[Idx].Cond(Context));
1350}
1351
1352template <class T>
1353static int getOprIdx(std::function<bool(const CustomOperand<T> &)> Test,
1354 const CustomOperand<T> OpInfo[], int OpInfoSize,
1355 T Context) {
1356 int InvalidIdx = OPR_ID_UNKNOWN;
1357 for (int Idx = 0; Idx < OpInfoSize; ++Idx) {
1358 if (Test(OpInfo[Idx])) {
1359 if (!OpInfo[Idx].Cond || OpInfo[Idx].Cond(Context))
1360 return Idx;
1361 InvalidIdx = OPR_ID_UNSUPPORTED;
1362 }
1363 }
1364 return InvalidIdx;
1365}
1366
1367template <class T>
1368static int getOprIdx(const StringRef Name, const CustomOperand<T> OpInfo[],
1369 int OpInfoSize, T Context) {
1370 auto Test = [=](const CustomOperand<T> &Op) { return Op.Name == Name; };
1371 return getOprIdx<T>(Test, OpInfo, OpInfoSize, Context);
1372}
1373
1374template <class T>
1375static int getOprIdx(int Id, const CustomOperand<T> OpInfo[], int OpInfoSize,
1376 T Context, bool QuickCheck = true) {
1377 auto Test = [=](const CustomOperand<T> &Op) {
1378 return Op.Encoding == Id && !Op.Name.empty();
1379 };
1380 // This is an optimization that should work in most cases.
1381 // As a side effect, it may cause selection of an alias
1382 // instead of a primary operand name in case of sparse tables.
1383 if (QuickCheck && isValidOpr<T>(Id, OpInfo, OpInfoSize, Context) &&
1384 OpInfo[Id].Encoding == Id) {
1385 return Id;
1386 }
1387 return getOprIdx<T>(Test, OpInfo, OpInfoSize, Context);
1388}
1389
1390//===----------------------------------------------------------------------===//
1391// Custom Operand Values
1392//===----------------------------------------------------------------------===//
1393
1395 int Size,
1396 const MCSubtargetInfo &STI) {
1397 unsigned Enc = 0;
1398 for (int Idx = 0; Idx < Size; ++Idx) {
1399 const auto &Op = Opr[Idx];
1400 if (Op.isSupported(STI))
1401 Enc |= Op.encode(Op.Default);
1402 }
1403 return Enc;
1404}
1405
1407 int Size, unsigned Code,
1408 bool &HasNonDefaultVal,
1409 const MCSubtargetInfo &STI) {
1410 unsigned UsedOprMask = 0;
1411 HasNonDefaultVal = false;
1412 for (int Idx = 0; Idx < Size; ++Idx) {
1413 const auto &Op = Opr[Idx];
1414 if (!Op.isSupported(STI))
1415 continue;
1416 UsedOprMask |= Op.getMask();
1417 unsigned Val = Op.decode(Code);
1418 if (!Op.isValid(Val))
1419 return false;
1420 HasNonDefaultVal |= (Val != Op.Default);
1421 }
1422 return (Code & ~UsedOprMask) == 0;
1423}
1424
1425static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size,
1426 unsigned Code, int &Idx, StringRef &Name,
1427 unsigned &Val, bool &IsDefault,
1428 const MCSubtargetInfo &STI) {
1429 while (Idx < Size) {
1430 const auto &Op = Opr[Idx++];
1431 if (Op.isSupported(STI)) {
1432 Name = Op.Name;
1433 Val = Op.decode(Code);
1434 IsDefault = (Val == Op.Default);
1435 return true;
1436 }
1437 }
1438
1439 return false;
1440}
1441
1443 int64_t InputVal) {
1444 if (InputVal < 0 || InputVal > Op.Max)
1445 return OPR_VAL_INVALID;
1446 return Op.encode(InputVal);
1447}
1448
1449static int encodeCustomOperand(const CustomOperandVal *Opr, int Size,
1450 const StringRef Name, int64_t InputVal,
1451 unsigned &UsedOprMask,
1452 const MCSubtargetInfo &STI) {
1453 int InvalidId = OPR_ID_UNKNOWN;
1454 for (int Idx = 0; Idx < Size; ++Idx) {
1455 const auto &Op = Opr[Idx];
1456 if (Op.Name == Name) {
1457 if (!Op.isSupported(STI)) {
1458 InvalidId = OPR_ID_UNSUPPORTED;
1459 continue;
1460 }
1461 auto OprMask = Op.getMask();
1462 if (OprMask & UsedOprMask)
1463 return OPR_ID_DUPLICATE;
1464 UsedOprMask |= OprMask;
1465 return encodeCustomOperandVal(Op, InputVal);
1466 }
1467 }
1468 return InvalidId;
1469}
1470
1471//===----------------------------------------------------------------------===//
1472// DepCtr
1473//===----------------------------------------------------------------------===//
1474
1475namespace DepCtr {
1476
1478 static int Default = -1;
1479 if (Default == -1)
1481 return Default;
1482}
1483
1484bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
1485 const MCSubtargetInfo &STI) {
1487 HasNonDefaultVal, STI);
1488}
1489
1490bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
1491 bool &IsDefault, const MCSubtargetInfo &STI) {
1492 return decodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Code, Id, Name, Val,
1493 IsDefault, STI);
1494}
1495
1496int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
1497 const MCSubtargetInfo &STI) {
1498 return encodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Name, Val, UsedOprMask,
1499 STI);
1500}
1501
1502} // namespace DepCtr
1503
1504//===----------------------------------------------------------------------===//
1505// hwreg
1506//===----------------------------------------------------------------------===//
1507
1508namespace Hwreg {
1509
1510int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI) {
1511 int Idx = getOprIdx<const MCSubtargetInfo &>(Name, Opr, OPR_SIZE, STI);
1512 return (Idx < 0) ? Idx : Opr[Idx].Encoding;
1513}
1514
1515bool isValidHwreg(int64_t Id) {
1516 return 0 <= Id && isUInt<ID_WIDTH_>(Id);
1517}
1518
1520 return 0 <= Offset && isUInt<OFFSET_WIDTH_>(Offset);
1521}
1522
1524 return 0 <= (Width - 1) && isUInt<WIDTH_M1_WIDTH_>(Width - 1);
1525}
1526
1528 return (Id << ID_SHIFT_) |
1529 (Offset << OFFSET_SHIFT_) |
1530 ((Width - 1) << WIDTH_M1_SHIFT_);
1531}
1532
1533StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI) {
1534 int Idx = getOprIdx<const MCSubtargetInfo &>(Id, Opr, OPR_SIZE, STI);
1535 return (Idx < 0) ? "" : Opr[Idx].Name;
1536}
1537
1538void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width) {
1539 Id = (Val & ID_MASK_) >> ID_SHIFT_;
1540 Offset = (Val & OFFSET_MASK_) >> OFFSET_SHIFT_;
1541 Width = ((Val & WIDTH_M1_MASK_) >> WIDTH_M1_SHIFT_) + 1;
1542}
1543
1544} // namespace Hwreg
1545
1546//===----------------------------------------------------------------------===//
1547// exp tgt
1548//===----------------------------------------------------------------------===//
1549
1550namespace Exp {
1551
1552struct ExpTgt {
1554 unsigned Tgt;
1555 unsigned MaxIndex;
1556};
1557
1558static constexpr ExpTgt ExpTgtInfo[] = {
1559 {{"null"}, ET_NULL, ET_NULL_MAX_IDX},
1560 {{"mrtz"}, ET_MRTZ, ET_MRTZ_MAX_IDX},
1561 {{"prim"}, ET_PRIM, ET_PRIM_MAX_IDX},
1562 {{"mrt"}, ET_MRT0, ET_MRT_MAX_IDX},
1563 {{"pos"}, ET_POS0, ET_POS_MAX_IDX},
1564 {{"dual_src_blend"}, ET_DUAL_SRC_BLEND0, ET_DUAL_SRC_BLEND_MAX_IDX},
1565 {{"param"}, ET_PARAM0, ET_PARAM_MAX_IDX},
1566};
1567
1568bool getTgtName(unsigned Id, StringRef &Name, int &Index) {
1569 for (const ExpTgt &Val : ExpTgtInfo) {
1570 if (Val.Tgt <= Id && Id <= Val.Tgt + Val.MaxIndex) {
1571 Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt);
1572 Name = Val.Name;
1573 return true;
1574 }
1575 }
1576 return false;
1577}
1578
1579unsigned getTgtId(const StringRef Name) {
1580
1581 for (const ExpTgt &Val : ExpTgtInfo) {
1582 if (Val.MaxIndex == 0 && Name == Val.Name)
1583 return Val.Tgt;
1584
1585 if (Val.MaxIndex > 0 && Name.startswith(Val.Name)) {
1586 StringRef Suffix = Name.drop_front(Val.Name.size());
1587
1588 unsigned Id;
1589 if (Suffix.getAsInteger(10, Id) || Id > Val.MaxIndex)
1590 return ET_INVALID;
1591
1592 // Disable leading zeroes
1593 if (Suffix.size() > 1 && Suffix[0] == '0')
1594 return ET_INVALID;
1595
1596 return Val.Tgt + Id;
1597 }
1598 }
1599 return ET_INVALID;
1600}
1601
1602bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI) {
1603 switch (Id) {
1604 case ET_NULL:
1605 return !isGFX11Plus(STI);
1606 case ET_POS4:
1607 case ET_PRIM:
1608 return isGFX10Plus(STI);
1609 case ET_DUAL_SRC_BLEND0:
1610 case ET_DUAL_SRC_BLEND1:
1611 return isGFX11Plus(STI);
1612 default:
1613 if (Id >= ET_PARAM0 && Id <= ET_PARAM31)
1614 return !isGFX11Plus(STI);
1615 return true;
1616 }
1617}
1618
1619} // namespace Exp
1620
1621//===----------------------------------------------------------------------===//
1622// MTBUF Format
1623//===----------------------------------------------------------------------===//
1624
1625namespace MTBUFFormat {
1626
1627int64_t getDfmt(const StringRef Name) {
1628 for (int Id = DFMT_MIN; Id <= DFMT_MAX; ++Id) {
1629 if (Name == DfmtSymbolic[Id])
1630 return Id;
1631 }
1632 return DFMT_UNDEF;
1633}
1634
1636 assert(Id <= DFMT_MAX);
1637 return DfmtSymbolic[Id];
1638}
1639
1641 if (isSI(STI) || isCI(STI))
1642 return NfmtSymbolicSICI;
1643 if (isVI(STI) || isGFX9(STI))
1644 return NfmtSymbolicVI;
1645 return NfmtSymbolicGFX10;
1646}
1647
1648int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI) {
1649 auto lookupTable = getNfmtLookupTable(STI);
1650 for (int Id = NFMT_MIN; Id <= NFMT_MAX; ++Id) {
1651 if (Name == lookupTable[Id])
1652 return Id;
1653 }
1654 return NFMT_UNDEF;
1655}
1656
1657StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI) {
1658 assert(Id <= NFMT_MAX);
1659 return getNfmtLookupTable(STI)[Id];
1660}
1661
1662bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI) {
1663 unsigned Dfmt;
1664 unsigned Nfmt;
1665 decodeDfmtNfmt(Id, Dfmt, Nfmt);
1666 return isValidNfmt(Nfmt, STI);
1667}
1668
1669bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI) {
1670 return !getNfmtName(Id, STI).empty();
1671}
1672
1673int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt) {
1674 return (Dfmt << DFMT_SHIFT) | (Nfmt << NFMT_SHIFT);
1675}
1676
1677void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt) {
1678 Dfmt = (Format >> DFMT_SHIFT) & DFMT_MASK;
1679 Nfmt = (Format >> NFMT_SHIFT) & NFMT_MASK;
1680}
1681
1683 if (isGFX11Plus(STI)) {
1684 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
1685 if (Name == UfmtSymbolicGFX11[Id])
1686 return Id;
1687 }
1688 } else {
1689 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
1690 if (Name == UfmtSymbolicGFX10[Id])
1691 return Id;
1692 }
1693 }
1694 return UFMT_UNDEF;
1695}
1696
1698 if(isValidUnifiedFormat(Id, STI))
1699 return isGFX10(STI) ? UfmtSymbolicGFX10[Id] : UfmtSymbolicGFX11[Id];
1700 return "";
1701}
1702
1703bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI) {
1704 return isGFX10(STI) ? Id <= UfmtGFX10::UFMT_LAST : Id <= UfmtGFX11::UFMT_LAST;
1705}
1706
1707int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt,
1708 const MCSubtargetInfo &STI) {
1709 int64_t Fmt = encodeDfmtNfmt(Dfmt, Nfmt);
1710 if (isGFX11Plus(STI)) {
1711 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
1712 if (Fmt == DfmtNfmt2UFmtGFX11[Id])
1713 return Id;
1714 }
1715 } else {
1716 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
1717 if (Fmt == DfmtNfmt2UFmtGFX10[Id])
1718 return Id;
1719 }
1720 }
1721 return UFMT_UNDEF;
1722}
1723
1724bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI) {
1725 return isGFX10Plus(STI) ? (Val <= UFMT_MAX) : (Val <= DFMT_NFMT_MAX);
1726}
1727
1729 if (isGFX10Plus(STI))
1730 return UFMT_DEFAULT;
1731 return DFMT_NFMT_DEFAULT;
1732}
1733
1734} // namespace MTBUFFormat
1735
1736//===----------------------------------------------------------------------===//
1737// SendMsg
1738//===----------------------------------------------------------------------===//
1739
1740namespace SendMsg {
1741
1744}
1745
1746int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI) {
1747 int Idx = getOprIdx<const MCSubtargetInfo &>(Name, Msg, MSG_SIZE, STI);
1748 return (Idx < 0) ? Idx : Msg[Idx].Encoding;
1749}
1750
1751bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI) {
1752 return (MsgId & ~(getMsgIdMask(STI))) == 0;
1753}
1754
1755StringRef getMsgName(int64_t MsgId, const MCSubtargetInfo &STI) {
1756 int Idx = getOprIdx<const MCSubtargetInfo &>(MsgId, Msg, MSG_SIZE, STI);
1757 return (Idx < 0) ? "" : Msg[Idx].Name;
1758}
1759
1760int64_t getMsgOpId(int64_t MsgId, const StringRef Name) {
1761 const char* const *S = (MsgId == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
1762 const int F = (MsgId == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
1763 const int L = (MsgId == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
1764 for (int i = F; i < L; ++i) {
1765 if (Name == S[i]) {
1766 return i;
1767 }
1768 }
1769 return OP_UNKNOWN_;
1770}
1771
1772bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
1773 bool Strict) {
1774 assert(isValidMsgId(MsgId, STI));
1775
1776 if (!Strict)
1777 return 0 <= OpId && isUInt<OP_WIDTH_>(OpId);
1778
1779 if (MsgId == ID_SYSMSG)
1780 return OP_SYS_FIRST_ <= OpId && OpId < OP_SYS_LAST_;
1781 if (!isGFX11Plus(STI)) {
1782 switch (MsgId) {
1783 case ID_GS_PreGFX11:
1784 return (OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_) && OpId != OP_GS_NOP;
1786 return OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_;
1787 }
1788 }
1789 return OpId == OP_NONE_;
1790}
1791
1792StringRef getMsgOpName(int64_t MsgId, int64_t OpId,
1793 const MCSubtargetInfo &STI) {
1794 assert(msgRequiresOp(MsgId, STI));
1795 return (MsgId == ID_SYSMSG)? OpSysSymbolic[OpId] : OpGsSymbolic[OpId];
1796}
1797
1798bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
1799 const MCSubtargetInfo &STI, bool Strict) {
1800 assert(isValidMsgOp(MsgId, OpId, STI, Strict));
1801
1802 if (!Strict)
1803 return 0 <= StreamId && isUInt<STREAM_ID_WIDTH_>(StreamId);
1804
1805 if (!isGFX11Plus(STI)) {
1806 switch (MsgId) {
1807 case ID_GS_PreGFX11:
1810 return (OpId == OP_GS_NOP) ?
1813 }
1814 }
1815 return StreamId == STREAM_ID_NONE_;
1816}
1817
1818bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI) {
1819 return MsgId == ID_SYSMSG ||
1820 (!isGFX11Plus(STI) &&
1821 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11));
1822}
1823
1824bool msgSupportsStream(int64_t MsgId, int64_t OpId,
1825 const MCSubtargetInfo &STI) {
1826 return !isGFX11Plus(STI) &&
1827 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11) &&
1828 OpId != OP_GS_NOP;
1829}
1830
1831void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
1832 uint16_t &StreamId, const MCSubtargetInfo &STI) {
1833 MsgId = Val & getMsgIdMask(STI);
1834 if (isGFX11Plus(STI)) {
1835 OpId = 0;
1836 StreamId = 0;
1837 } else {
1838 OpId = (Val & OP_MASK_) >> OP_SHIFT_;
1840 }
1841}
1842
1844 uint64_t OpId,
1846 return MsgId | (OpId << OP_SHIFT_) | (StreamId << STREAM_ID_SHIFT_);
1847}
1848
1849} // namespace SendMsg
1850
1851//===----------------------------------------------------------------------===//
1852//
1853//===----------------------------------------------------------------------===//
1854
1856 return F.getFnAttributeAsParsedInteger("InitialPSInputAddr", 0);
1857}
1858
1860 // As a safe default always respond as if PS has color exports.
1861 return F.getFnAttributeAsParsedInteger(
1862 "amdgpu-color-export",
1863 F.getCallingConv() == CallingConv::AMDGPU_PS ? 1 : 0) != 0;
1864}
1865
1867 return F.getFnAttributeAsParsedInteger("amdgpu-depth-export", 0) != 0;
1868}
1869
1871 switch(cc) {
1879 return true;
1880 default:
1881 return false;
1882 }
1883}
1884
1886 return isShader(cc) || cc == CallingConv::AMDGPU_Gfx;
1887}
1888
1890 return !isGraphics(cc) || cc == CallingConv::AMDGPU_CS;
1891}
1892
1894 switch (CC) {
1904 return true;
1905 default:
1906 return false;
1907 }
1908}
1909
1911 switch (CC) {
1913 return true;
1914 default:
1915 return isEntryFunctionCC(CC);
1916 }
1917}
1918
1919bool isKernelCC(const Function *Func) {
1920 return AMDGPU::isModuleEntryFunctionCC(Func->getCallingConv());
1921}
1922
1923bool hasXNACK(const MCSubtargetInfo &STI) {
1924 return STI.hasFeature(AMDGPU::FeatureXNACK);
1925}
1926
1927bool hasSRAMECC(const MCSubtargetInfo &STI) {
1928 return STI.hasFeature(AMDGPU::FeatureSRAMECC);
1929}
1930
1932 return STI.hasFeature(AMDGPU::FeatureMIMG_R128) && !STI.hasFeature(AMDGPU::FeatureR128A16);
1933}
1934
1935bool hasA16(const MCSubtargetInfo &STI) {
1936 return STI.hasFeature(AMDGPU::FeatureA16);
1937}
1938
1939bool hasG16(const MCSubtargetInfo &STI) {
1940 return STI.hasFeature(AMDGPU::FeatureG16);
1941}
1942
1944 return !STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) && !isCI(STI) &&
1945 !isSI(STI);
1946}
1947
1948unsigned getNSAMaxSize(const MCSubtargetInfo &STI) {
1949 auto Version = getIsaVersion(STI.getCPU());
1950 if (Version.Major == 10)
1951 return Version.Minor >= 3 ? 13 : 5;
1952 if (Version.Major == 11)
1953 return 5;
1954 return 0;
1955}
1956
1957bool isSI(const MCSubtargetInfo &STI) {
1958 return STI.hasFeature(AMDGPU::FeatureSouthernIslands);
1959}
1960
1961bool isCI(const MCSubtargetInfo &STI) {
1962 return STI.hasFeature(AMDGPU::FeatureSeaIslands);
1963}
1964
1965bool isVI(const MCSubtargetInfo &STI) {
1966 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
1967}
1968
1969bool isGFX9(const MCSubtargetInfo &STI) {
1970 return STI.hasFeature(AMDGPU::FeatureGFX9);
1971}
1972
1974 return isGFX9(STI) || isGFX10(STI);
1975}
1976
1978 return isVI(STI) || isGFX9(STI) || isGFX10(STI);
1979}
1980
1981bool isGFX8Plus(const MCSubtargetInfo &STI) {
1982 return isVI(STI) || isGFX9Plus(STI);
1983}
1984
1985bool isGFX9Plus(const MCSubtargetInfo &STI) {
1986 return isGFX9(STI) || isGFX10Plus(STI);
1987}
1988
1989bool isGFX10(const MCSubtargetInfo &STI) {
1990 return STI.hasFeature(AMDGPU::FeatureGFX10);
1991}
1992
1994 return isGFX10(STI) || isGFX11Plus(STI);
1995}
1996
1997bool isGFX11(const MCSubtargetInfo &STI) {
1998 return STI.hasFeature(AMDGPU::FeatureGFX11);
1999}
2000
2002 return isGFX11(STI);
2003}
2004
2006 return !isGFX11Plus(STI);
2007}
2008
2010 return isSI(STI) || isCI(STI) || isVI(STI) || isGFX9(STI);
2011}
2012
2014 return isGFX10(STI) && !AMDGPU::isGFX10_BEncoding(STI);
2015}
2016
2018 return STI.hasFeature(AMDGPU::FeatureGCN3Encoding);
2019}
2020
2022 return STI.hasFeature(AMDGPU::FeatureGFX10_AEncoding);
2023}
2024
2026 return STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding);
2027}
2028
2030 return STI.hasFeature(AMDGPU::FeatureGFX10_3Insts);
2031}
2032
2033bool isGFX90A(const MCSubtargetInfo &STI) {
2034 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
2035}
2036
2037bool isGFX940(const MCSubtargetInfo &STI) {
2038 return STI.hasFeature(AMDGPU::FeatureGFX940Insts);
2039}
2040
2042 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
2043}
2044
2046 return STI.hasFeature(AMDGPU::FeatureMAIInsts);
2047}
2048
2049bool hasVOPD(const MCSubtargetInfo &STI) {
2050 return STI.hasFeature(AMDGPU::FeatureVOPD);
2051}
2052
2053int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR,
2054 int32_t ArgNumVGPR) {
2055 if (has90AInsts && ArgNumAGPR)
2056 return alignTo(ArgNumVGPR, 4) + ArgNumAGPR;
2057 return std::max(ArgNumVGPR, ArgNumAGPR);
2058}
2059
2060bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) {
2061 const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
2062 const unsigned FirstSubReg = TRI->getSubReg(Reg, AMDGPU::sub0);
2063 return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
2064 Reg == AMDGPU::SCC;
2065}
2066
2067#define MAP_REG2REG \
2068 using namespace AMDGPU; \
2069 switch(Reg) { \
2070 default: return Reg; \
2071 CASE_CI_VI(FLAT_SCR) \
2072 CASE_CI_VI(FLAT_SCR_LO) \
2073 CASE_CI_VI(FLAT_SCR_HI) \
2074 CASE_VI_GFX9PLUS(TTMP0) \
2075 CASE_VI_GFX9PLUS(TTMP1) \
2076 CASE_VI_GFX9PLUS(TTMP2) \
2077 CASE_VI_GFX9PLUS(TTMP3) \
2078 CASE_VI_GFX9PLUS(TTMP4) \
2079 CASE_VI_GFX9PLUS(TTMP5) \
2080 CASE_VI_GFX9PLUS(TTMP6) \
2081 CASE_VI_GFX9PLUS(TTMP7) \
2082 CASE_VI_GFX9PLUS(TTMP8) \
2083 CASE_VI_GFX9PLUS(TTMP9) \
2084 CASE_VI_GFX9PLUS(TTMP10) \
2085 CASE_VI_GFX9PLUS(TTMP11) \
2086 CASE_VI_GFX9PLUS(TTMP12) \
2087 CASE_VI_GFX9PLUS(TTMP13) \
2088 CASE_VI_GFX9PLUS(TTMP14) \
2089 CASE_VI_GFX9PLUS(TTMP15) \
2090 CASE_VI_GFX9PLUS(TTMP0_TTMP1) \
2091 CASE_VI_GFX9PLUS(TTMP2_TTMP3) \
2092 CASE_VI_GFX9PLUS(TTMP4_TTMP5) \
2093 CASE_VI_GFX9PLUS(TTMP6_TTMP7) \
2094 CASE_VI_GFX9PLUS(TTMP8_TTMP9) \
2095 CASE_VI_GFX9PLUS(TTMP10_TTMP11) \
2096 CASE_VI_GFX9PLUS(TTMP12_TTMP13) \
2097 CASE_VI_GFX9PLUS(TTMP14_TTMP15) \
2098 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \
2099 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \
2100 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \
2101 CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \
2102 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
2103 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
2104 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2105 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2106 CASE_GFXPRE11_GFX11PLUS(M0) \
2107 CASE_GFXPRE11_GFX11PLUS(SGPR_NULL) \
2108 CASE_GFXPRE11_GFX11PLUS_TO(SGPR_NULL64, SGPR_NULL) \
2109 }
2110
2111#define CASE_CI_VI(node) \
2112 assert(!isSI(STI)); \
2113 case node: return isCI(STI) ? node##_ci : node##_vi;
2114
2115#define CASE_VI_GFX9PLUS(node) \
2116 case node: return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi;
2117
2118#define CASE_GFXPRE11_GFX11PLUS(node) \
2119 case node: return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11;
2120
2121#define CASE_GFXPRE11_GFX11PLUS_TO(node, result) \
2122 case node: return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11;
2123
2124unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
2125 if (STI.getTargetTriple().getArch() == Triple::r600)
2126 return Reg;
2128}
2129
2130#undef CASE_CI_VI
2131#undef CASE_VI_GFX9PLUS
2132#undef CASE_GFXPRE11_GFX11PLUS
2133#undef CASE_GFXPRE11_GFX11PLUS_TO
2134
2135#define CASE_CI_VI(node) case node##_ci: case node##_vi: return node;
2136#define CASE_VI_GFX9PLUS(node) case node##_vi: case node##_gfx9plus: return node;
2137#define CASE_GFXPRE11_GFX11PLUS(node) case node##_gfx11plus: case node##_gfxpre11: return node;
2138#define CASE_GFXPRE11_GFX11PLUS_TO(node, result)
2139
2140unsigned mc2PseudoReg(unsigned Reg) {
2142}
2143
2144bool isInlineValue(unsigned Reg) {
2145 switch (Reg) {
2146 case AMDGPU::SRC_SHARED_BASE_LO:
2147 case AMDGPU::SRC_SHARED_BASE:
2148 case AMDGPU::SRC_SHARED_LIMIT_LO:
2149 case AMDGPU::SRC_SHARED_LIMIT:
2150 case AMDGPU::SRC_PRIVATE_BASE_LO:
2151 case AMDGPU::SRC_PRIVATE_BASE:
2152 case AMDGPU::SRC_PRIVATE_LIMIT_LO:
2153 case AMDGPU::SRC_PRIVATE_LIMIT:
2154 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2155 return true;
2156 case AMDGPU::SRC_VCCZ:
2157 case AMDGPU::SRC_EXECZ:
2158 case AMDGPU::SRC_SCC:
2159 return true;
2160 case AMDGPU::SGPR_NULL:
2161 return true;
2162 default:
2163 return false;
2164 }
2165}
2166
2167#undef CASE_CI_VI
2168#undef CASE_VI_GFX9PLUS
2169#undef CASE_GFXPRE11_GFX11PLUS
2170#undef CASE_GFXPRE11_GFX11PLUS_TO
2171#undef MAP_REG2REG
2172
2173bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2174 assert(OpNo < Desc.NumOperands);
2175 unsigned OpType = Desc.operands()[OpNo].OperandType;
2176 return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
2177 OpType <= AMDGPU::OPERAND_SRC_LAST;
2178}
2179
2180bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2181 assert(OpNo < Desc.NumOperands);
2182 unsigned OpType = Desc.operands()[OpNo].OperandType;
2183 return OpType >= AMDGPU::OPERAND_KIMM_FIRST &&
2184 OpType <= AMDGPU::OPERAND_KIMM_LAST;
2185}
2186
2187bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2188 assert(OpNo < Desc.NumOperands);
2189 unsigned OpType = Desc.operands()[OpNo].OperandType;
2190 switch (OpType) {
2210 return true;
2211 default:
2212 return false;
2213 }
2214}
2215
2216bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2217 assert(OpNo < Desc.NumOperands);
2218 unsigned OpType = Desc.operands()[OpNo].OperandType;
2219 return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
2221}
2222
2223// Avoid using MCRegisterClass::getSize, since that function will go away
2224// (move from MC* level to Target* level). Return size in bits.
2225unsigned getRegBitWidth(unsigned RCID) {
2226 switch (RCID) {
2227 case AMDGPU::VGPR_LO16RegClassID:
2228 case AMDGPU::VGPR_HI16RegClassID:
2229 case AMDGPU::SGPR_LO16RegClassID:
2230 case AMDGPU::AGPR_LO16RegClassID:
2231 return 16;
2232 case AMDGPU::SGPR_32RegClassID:
2233 case AMDGPU::VGPR_32RegClassID:
2234 case AMDGPU::VRegOrLds_32RegClassID:
2235 case AMDGPU::AGPR_32RegClassID:
2236 case AMDGPU::VS_32RegClassID:
2237 case AMDGPU::AV_32RegClassID:
2238 case AMDGPU::SReg_32RegClassID:
2239 case AMDGPU::SReg_32_XM0RegClassID:
2240 case AMDGPU::SRegOrLds_32RegClassID:
2241 return 32;
2242 case AMDGPU::SGPR_64RegClassID:
2243 case AMDGPU::VS_64RegClassID:
2244 case AMDGPU::SReg_64RegClassID:
2245 case AMDGPU::VReg_64RegClassID:
2246 case AMDGPU::AReg_64RegClassID:
2247 case AMDGPU::SReg_64_XEXECRegClassID:
2248 case AMDGPU::VReg_64_Align2RegClassID:
2249 case AMDGPU::AReg_64_Align2RegClassID:
2250 case AMDGPU::AV_64RegClassID:
2251 case AMDGPU::AV_64_Align2RegClassID:
2252 return 64;
2253 case AMDGPU::SGPR_96RegClassID:
2254 case AMDGPU::SReg_96RegClassID:
2255 case AMDGPU::VReg_96RegClassID:
2256 case AMDGPU::AReg_96RegClassID:
2257 case AMDGPU::VReg_96_Align2RegClassID:
2258 case AMDGPU::AReg_96_Align2RegClassID:
2259 case AMDGPU::AV_96RegClassID:
2260 case AMDGPU::AV_96_Align2RegClassID:
2261 return 96;
2262 case AMDGPU::SGPR_128RegClassID:
2263 case AMDGPU::SReg_128RegClassID:
2264 case AMDGPU::VReg_128RegClassID:
2265 case AMDGPU::AReg_128RegClassID:
2266 case AMDGPU::VReg_128_Align2RegClassID:
2267 case AMDGPU::AReg_128_Align2RegClassID:
2268 case AMDGPU::AV_128RegClassID:
2269 case AMDGPU::AV_128_Align2RegClassID:
2270 return 128;
2271 case AMDGPU::SGPR_160RegClassID:
2272 case AMDGPU::SReg_160RegClassID:
2273 case AMDGPU::VReg_160RegClassID:
2274 case AMDGPU::AReg_160RegClassID:
2275 case AMDGPU::VReg_160_Align2RegClassID:
2276 case AMDGPU::AReg_160_Align2RegClassID:
2277 case AMDGPU::AV_160RegClassID:
2278 case AMDGPU::AV_160_Align2RegClassID:
2279 return 160;
2280 case AMDGPU::SGPR_192RegClassID:
2281 case AMDGPU::SReg_192RegClassID:
2282 case AMDGPU::VReg_192RegClassID:
2283 case AMDGPU::AReg_192RegClassID:
2284 case AMDGPU::VReg_192_Align2RegClassID:
2285 case AMDGPU::AReg_192_Align2RegClassID:
2286 case AMDGPU::AV_192RegClassID:
2287 case AMDGPU::AV_192_Align2RegClassID:
2288 return 192;
2289 case AMDGPU::SGPR_224RegClassID:
2290 case AMDGPU::SReg_224RegClassID:
2291 case AMDGPU::VReg_224RegClassID:
2292 case AMDGPU::AReg_224RegClassID:
2293 case AMDGPU::VReg_224_Align2RegClassID:
2294 case AMDGPU::AReg_224_Align2RegClassID:
2295 case AMDGPU::AV_224RegClassID:
2296 case AMDGPU::AV_224_Align2RegClassID:
2297 return 224;
2298 case AMDGPU::SGPR_256RegClassID:
2299 case AMDGPU::SReg_256RegClassID:
2300 case AMDGPU::VReg_256RegClassID:
2301 case AMDGPU::AReg_256RegClassID:
2302 case AMDGPU::VReg_256_Align2RegClassID:
2303 case AMDGPU::AReg_256_Align2RegClassID:
2304 case AMDGPU::AV_256RegClassID:
2305 case AMDGPU::AV_256_Align2RegClassID:
2306 return 256;
2307 case AMDGPU::SGPR_288RegClassID:
2308 case AMDGPU::SReg_288RegClassID:
2309 case AMDGPU::VReg_288RegClassID:
2310 case AMDGPU::AReg_288RegClassID:
2311 case AMDGPU::VReg_288_Align2RegClassID:
2312 case AMDGPU::AReg_288_Align2RegClassID:
2313 case AMDGPU::AV_288RegClassID:
2314 case AMDGPU::AV_288_Align2RegClassID:
2315 return 288;
2316 case AMDGPU::SGPR_320RegClassID:
2317 case AMDGPU::SReg_320RegClassID:
2318 case AMDGPU::VReg_320RegClassID:
2319 case AMDGPU::AReg_320RegClassID:
2320 case AMDGPU::VReg_320_Align2RegClassID:
2321 case AMDGPU::AReg_320_Align2RegClassID:
2322 case AMDGPU::AV_320RegClassID:
2323 case AMDGPU::AV_320_Align2RegClassID:
2324 return 320;
2325 case AMDGPU::SGPR_352RegClassID:
2326 case AMDGPU::SReg_352RegClassID:
2327 case AMDGPU::VReg_352RegClassID:
2328 case AMDGPU::AReg_352RegClassID:
2329 case AMDGPU::VReg_352_Align2RegClassID:
2330 case AMDGPU::AReg_352_Align2RegClassID:
2331 case AMDGPU::AV_352RegClassID:
2332 case AMDGPU::AV_352_Align2RegClassID:
2333 return 352;
2334 case AMDGPU::SGPR_384RegClassID:
2335 case AMDGPU::SReg_384RegClassID:
2336 case AMDGPU::VReg_384RegClassID:
2337 case AMDGPU::AReg_384RegClassID:
2338 case AMDGPU::VReg_384_Align2RegClassID:
2339 case AMDGPU::AReg_384_Align2RegClassID:
2340 case AMDGPU::AV_384RegClassID:
2341 case AMDGPU::AV_384_Align2RegClassID:
2342 return 384;
2343 case AMDGPU::SGPR_512RegClassID:
2344 case AMDGPU::SReg_512RegClassID:
2345 case AMDGPU::VReg_512RegClassID:
2346 case AMDGPU::AReg_512RegClassID:
2347 case AMDGPU::VReg_512_Align2RegClassID:
2348 case AMDGPU::AReg_512_Align2RegClassID:
2349 case AMDGPU::AV_512RegClassID:
2350 case AMDGPU::AV_512_Align2RegClassID:
2351 return 512;
2352 case AMDGPU::SGPR_1024RegClassID:
2353 case AMDGPU::SReg_1024RegClassID:
2354 case AMDGPU::VReg_1024RegClassID:
2355 case AMDGPU::AReg_1024RegClassID:
2356 case AMDGPU::VReg_1024_Align2RegClassID:
2357 case AMDGPU::AReg_1024_Align2RegClassID:
2358 case AMDGPU::AV_1024RegClassID:
2359 case AMDGPU::AV_1024_Align2RegClassID:
2360 return 1024;
2361 default:
2362 llvm_unreachable("Unexpected register class");
2363 }
2364}
2365
2366unsigned getRegBitWidth(const MCRegisterClass &RC) {
2367 return getRegBitWidth(RC.getID());
2368}
2369
2371 unsigned OpNo) {
2372 assert(OpNo < Desc.NumOperands);
2373 unsigned RCID = Desc.operands()[OpNo].RegClass;
2374 return getRegBitWidth(MRI->getRegClass(RCID)) / 8;
2375}
2376
2377bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
2379 return true;
2380
2381 uint64_t Val = static_cast<uint64_t>(Literal);
2382 return (Val == llvm::bit_cast<uint64_t>(0.0)) ||
2383 (Val == llvm::bit_cast<uint64_t>(1.0)) ||
2384 (Val == llvm::bit_cast<uint64_t>(-1.0)) ||
2385 (Val == llvm::bit_cast<uint64_t>(0.5)) ||
2386 (Val == llvm::bit_cast<uint64_t>(-0.5)) ||
2387 (Val == llvm::bit_cast<uint64_t>(2.0)) ||
2388 (Val == llvm::bit_cast<uint64_t>(-2.0)) ||
2389 (Val == llvm::bit_cast<uint64_t>(4.0)) ||
2390 (Val == llvm::bit_cast<uint64_t>(-4.0)) ||
2391 (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
2392}
2393
2394bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
2396 return true;
2397
2398 // The actual type of the operand does not seem to matter as long
2399 // as the bits match one of the inline immediate values. For example:
2400 //
2401 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
2402 // so it is a legal inline immediate.
2403 //
2404 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
2405 // floating-point, so it is a legal inline immediate.
2406
2407 uint32_t Val = static_cast<uint32_t>(Literal);
2408 return (Val == llvm::bit_cast<uint32_t>(0.0f)) ||
2409 (Val == llvm::bit_cast<uint32_t>(1.0f)) ||
2410 (Val == llvm::bit_cast<uint32_t>(-1.0f)) ||
2411 (Val == llvm::bit_cast<uint32_t>(0.5f)) ||
2412 (Val == llvm::bit_cast<uint32_t>(-0.5f)) ||
2413 (Val == llvm::bit_cast<uint32_t>(2.0f)) ||
2414 (Val == llvm::bit_cast<uint32_t>(-2.0f)) ||
2415 (Val == llvm::bit_cast<uint32_t>(4.0f)) ||
2416 (Val == llvm::bit_cast<uint32_t>(-4.0f)) ||
2417 (Val == 0x3e22f983 && HasInv2Pi);
2418}
2419
2420bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
2421 if (!HasInv2Pi)
2422 return false;
2423
2425 return true;
2426
2427 uint16_t Val = static_cast<uint16_t>(Literal);
2428 return Val == 0x3C00 || // 1.0
2429 Val == 0xBC00 || // -1.0
2430 Val == 0x3800 || // 0.5
2431 Val == 0xB800 || // -0.5
2432 Val == 0x4000 || // 2.0
2433 Val == 0xC000 || // -2.0
2434 Val == 0x4400 || // 4.0
2435 Val == 0xC400 || // -4.0
2436 Val == 0x3118; // 1/2pi
2437}
2438
2439bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) {
2440 assert(HasInv2Pi);
2441
2442 if (isInt<16>(Literal) || isUInt<16>(Literal)) {
2443 int16_t Trunc = static_cast<int16_t>(Literal);
2444 return AMDGPU::isInlinableLiteral16(Trunc, HasInv2Pi);
2445 }
2446 if (!(Literal & 0xffff))
2447 return AMDGPU::isInlinableLiteral16(Literal >> 16, HasInv2Pi);
2448
2449 int16_t Lo16 = static_cast<int16_t>(Literal);
2450 int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
2451 return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi);
2452}
2453
2455 int16_t Lo16 = static_cast<int16_t>(Literal);
2456 if (isInt<16>(Literal) || isUInt<16>(Literal))
2457 return isInlinableIntLiteral(Lo16);
2458
2459 int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
2460 if (!(Literal & 0xffff))
2461 return isInlinableIntLiteral(Hi16);
2462 return Lo16 == Hi16 && isInlinableIntLiteral(Lo16);
2463}
2464
2465bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi) {
2466 assert(HasInv2Pi);
2467
2468 int16_t Lo16 = static_cast<int16_t>(Literal);
2469 if (isInt<16>(Literal) || isUInt<16>(Literal))
2470 return true;
2471
2472 int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
2473 if (!(Literal & 0xffff))
2474 return true;
2475 return Lo16 == Hi16;
2476}
2477
2479 const Function *F = A->getParent();
2480
2481 // Arguments to compute shaders are never a source of divergence.
2482 CallingConv::ID CC = F->getCallingConv();
2483 switch (CC) {
2486 return true;
2495 // For non-compute shaders, SGPR inputs are marked with either inreg or
2496 // byval. Everything else is in VGPRs.
2497 return A->hasAttribute(Attribute::InReg) ||
2498 A->hasAttribute(Attribute::ByVal);
2499 default:
2500 // TODO: Should calls support inreg for SGPR inputs?
2501 return false;
2502 }
2503}
2504
2505bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo) {
2506 // Arguments to compute shaders are never a source of divergence.
2508 switch (CC) {
2511 return true;
2520 // For non-compute shaders, SGPR inputs are marked with either inreg or
2521 // byval. Everything else is in VGPRs.
2522 return CB->paramHasAttr(ArgNo, Attribute::InReg) ||
2523 CB->paramHasAttr(ArgNo, Attribute::ByVal);
2524 default:
2525 // TODO: Should calls support inreg for SGPR inputs?
2526 return false;
2527 }
2528}
2529
2530static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) {
2531 return isGCN3Encoding(ST) || isGFX10Plus(ST);
2532}
2533
2535 return isGFX9Plus(ST);
2536}
2537
2539 int64_t EncodedOffset) {
2540 return hasSMEMByteOffset(ST) ? isUInt<20>(EncodedOffset)
2541 : isUInt<8>(EncodedOffset);
2542}
2543
2545 int64_t EncodedOffset,
2546 bool IsBuffer) {
2547 return !IsBuffer &&
2549 isInt<21>(EncodedOffset);
2550}
2551
2552static bool isDwordAligned(uint64_t ByteOffset) {
2553 return (ByteOffset & 3) == 0;
2554}
2555
2557 uint64_t ByteOffset) {
2558 if (hasSMEMByteOffset(ST))
2559 return ByteOffset;
2560
2561 assert(isDwordAligned(ByteOffset));
2562 return ByteOffset >> 2;
2563}
2564
2565std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
2566 int64_t ByteOffset, bool IsBuffer) {
2567 // The signed version is always a byte offset.
2568 if (!IsBuffer && hasSMRDSignedImmOffset(ST)) {
2570 return isInt<20>(ByteOffset) ? std::optional<int64_t>(ByteOffset)
2571 : std::nullopt;
2572 }
2573
2574 if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST))
2575 return std::nullopt;
2576
2577 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
2578 return isLegalSMRDEncodedUnsignedOffset(ST, EncodedOffset)
2579 ? std::optional<int64_t>(EncodedOffset)
2580 : std::nullopt;
2581}
2582
2583std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
2584 int64_t ByteOffset) {
2585 if (!isCI(ST) || !isDwordAligned(ByteOffset))
2586 return std::nullopt;
2587
2588 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
2589 return isUInt<32>(EncodedOffset) ? std::optional<int64_t>(EncodedOffset)
2590 : std::nullopt;
2591}
2592
2594 // Address offset is 12-bit signed for GFX10, 13-bit for GFX9 and GFX11+.
2595 if (AMDGPU::isGFX10(ST))
2596 return 12;
2597
2598 return 13;
2599}
2600
2601namespace {
2602
2603struct SourceOfDivergence {
2604 unsigned Intr;
2605};
2606const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
2607
2608struct AlwaysUniform {
2609 unsigned Intr;
2610};
2611const AlwaysUniform *lookupAlwaysUniform(unsigned Intr);
2612
2613#define GET_SourcesOfDivergence_IMPL
2614#define GET_UniformIntrinsics_IMPL
2615#define GET_Gfx9BufferFormat_IMPL
2616#define GET_Gfx10BufferFormat_IMPL
2617#define GET_Gfx11PlusBufferFormat_IMPL
2618#include "AMDGPUGenSearchableTables.inc"
2619
2620} // end anonymous namespace
2621
2622bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
2623 return lookupSourceOfDivergence(IntrID);
2624}
2625
2626bool isIntrinsicAlwaysUniform(unsigned IntrID) {
2627 return lookupAlwaysUniform(IntrID);
2628}
2629
2631 uint8_t NumComponents,
2632 uint8_t NumFormat,
2633 const MCSubtargetInfo &STI) {
2634 return isGFX11Plus(STI)
2635 ? getGfx11PlusBufferFormatInfo(BitsPerComp, NumComponents,
2636 NumFormat)
2637 : isGFX10(STI) ? getGfx10BufferFormatInfo(BitsPerComp,
2638 NumComponents, NumFormat)
2639 : getGfx9BufferFormatInfo(BitsPerComp,
2640 NumComponents, NumFormat);
2641}
2642
2644 const MCSubtargetInfo &STI) {
2645 return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(Format)
2646 : isGFX10(STI) ? getGfx10BufferFormatInfo(Format)
2647 : getGfx9BufferFormatInfo(Format);
2648}
2649
2650} // namespace AMDGPU
2651
2654 switch (S) {
2656 OS << "Unsupported";
2657 break;
2659 OS << "Any";
2660 break;
2662 OS << "Off";
2663 break;
2665 OS << "On";
2666 break;
2667 }
2668 return OS;
2669}
2670
2671} // namespace llvm
unsigned const MachineRegisterInfo * MRI
static llvm::cl::opt< unsigned > AmdhsaCodeObjectVersion("amdhsa-code-object-version", llvm::cl::Hidden, llvm::cl::desc("AMDHSA Code Object Version"), llvm::cl::init(4))
#define MAP_REG2REG
unsigned Intr
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
#define AMDHSA_BITS_SET(DST, MSK, VAL)
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file contains the simple types necessary to represent the attributes associated with functions a...
SmallVector< MachineOperand, 4 > Cond
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
std::string Name
uint64_t Size
#define F(x, y, z)
Definition: MD5.cpp:55
unsigned const TargetRegisterInfo * TRI
unsigned Reg
LLVMContext & Context
return InstrInfo
#define S_00B848_MEM_ORDERED(x)
Definition: SIDefines.h:1032
#define S_00B848_WGP_MODE(x)
Definition: SIDefines.h:1029
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
void setTargetIDFromFeaturesString(StringRef FS)
TargetIDSetting getXnackSetting() const
AMDGPUTargetID(const MCSubtargetInfo &STI)
void setTargetIDFromTargetIDStream(StringRef TargetID)
TargetIDSetting getSramEccSetting() const
unsigned getIndexInParsedOperands(unsigned CompOprIdx) const
unsigned getIndexOfDstInParsedOperands() const
unsigned getIndexOfSrcInParsedOperands(unsigned CompSrcIdx) const
unsigned getCompParsedSrcOperandsNum() const
std::optional< unsigned > getInvalidCompOperandIndex(std::function< unsigned(unsigned, unsigned)> GetRegIdx) const
std::array< unsigned, Component::MAX_OPR_NUM > RegIndices
Definition: Any.h:28
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1190
CallingConv::ID getCallingConv() const
Definition: InstrTypes.h:1471
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
constexpr bool test(unsigned I) const
unsigned getAddressSpace() const
Definition: GlobalValue.h:201
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void emitError(uint64_t LocCookie, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:237
ArrayRef< MCOperandInfo > operands() const
Definition: MCInstrDesc.h:239
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:248
unsigned short NumOperands
Definition: MCInstrDesc.h:206
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
Definition: MCInstrDesc.h:219
Interface to description of machine instruction set.
Definition: MCInstrInfo.h:26
MCRegisterClass - Base class of TargetRegisterClass.
unsigned getID() const
getID() - Return the register class ID number.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Generic base class for all target subtargets.
bool hasFeature(unsigned Feature) const
const Triple & getTargetTriple() const
const FeatureBitset & getFeatureBits() const
StringRef getCPU() const
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition: StringRef.h:851
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:698
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:468
std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:222
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
bool endswith(StringRef Suffix) const
Definition: StringRef.h:277
Manages the enabling and disabling of subtarget specific features.
const std::vector< std::string > & getFeatures() const
Returns the vector of individual subtarget features.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
OSType getOS() const
Get the parsed operating system type of this triple.
Definition: Triple.h:365
ArchType getArch() const
Get the parsed architecture type of this triple.
Definition: Triple.h:356
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:642
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
Definition: AMDGPU.h:396
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:393
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition: AMDGPU.h:392
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:389
bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val, bool &IsDefault, const MCSubtargetInfo &STI)
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
const CustomOperandVal DepCtrInfo[]
bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
static constexpr ExpTgt ExpTgtInfo[]
bool getTgtName(unsigned Id, StringRef &Name, int &Index)
unsigned getTgtId(const StringRef Name)
constexpr uint32_t VersionMajor
HSA metadata major version.
bool isValidHwreg(int64_t Id)
const CustomOperand< const MCSubtargetInfo & > Opr[]
bool isValidHwregOffset(int64_t Offset)
uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width)
bool isValidHwregWidth(int64_t Width)
int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI)
StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI)
void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width)
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getWavefrontSize(const MCSubtargetInfo *STI)
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)
unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, std::optional< bool > EnableWavefrontSize32)
unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getEUsPerCU(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
static TargetIDSetting getTargetIDSettingFromFeatureString(StringRef FeatureString)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs)
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI, unsigned NumVGPRs)
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
StringLiteral const UfmtSymbolicGFX11[]
bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI)
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI)
unsigned const DfmtNfmt2UFmtGFX10[]
StringLiteral const DfmtSymbolic[]
static StringLiteral const * getNfmtLookupTable(const MCSubtargetInfo &STI)
bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI)
StringLiteral const NfmtSymbolicGFX10[]
bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI)
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
StringRef getDfmtName(unsigned Id)
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI)
unsigned const DfmtNfmt2UFmtGFX11[]
StringLiteral const NfmtSymbolicVI[]
StringLiteral const NfmtSymbolicSICI[]
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
int64_t getDfmt(const StringRef Name)
StringLiteral const UfmtSymbolicGFX10[]
void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt)
StringRef getMsgOpName(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
StringRef getMsgName(int64_t MsgId, const MCSubtargetInfo &STI)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI)
void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId, uint16_t &StreamId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
int64_t getMsgOpId(int64_t MsgId, const StringRef Name)
const char *const OpGsSymbolic[OP_GS_LAST_]
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
const char *const OpSysSymbolic[OP_SYS_LAST_]
static uint64_t getMsgIdMask(const MCSubtargetInfo &STI)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
const CustomOperand< const MCSubtargetInfo & > Msg[]
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
constexpr unsigned COMPONENTS_NUM
constexpr unsigned BANKS_NUM[]
bool isGCN3Encoding(const MCSubtargetInfo &STI)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI)
int getVOPDFull(unsigned OpX, unsigned OpY)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, unsigned OpNo)
Get size of register operand.
void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt)
Decodes Vmcnt, Expcnt and Lgkmcnt from given Waitcnt for given isa Version, and writes decoded values...
uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset)
Convert ByteOffset to dwords if the subtarget uses dword SMRD immediate offsets.
static bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
static bool hasSMEMByteOffset(const MCSubtargetInfo &ST)
bool isHsaAbiVersion2(const MCSubtargetInfo *STI)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool isHsaAbiVersion5(const MCSubtargetInfo *STI)
bool getMTBUFHasSrsrc(unsigned Opc)
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isInlinableIntLiteralV216(int32_t Literal)
static bool isSymbolicCustomOperandEncoding(const CustomOperandVal *Opr, int Size, unsigned Code, bool &HasNonDefaultVal, const MCSubtargetInfo &STI)
bool isGFX10Before1030(const MCSubtargetInfo &STI)
bool isHsaAbiVersion4(const MCSubtargetInfo *STI)
bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo)
Does this operand support only inlinable literals?
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
const int OPR_ID_UNSUPPORTED
bool shouldEmitConstantsToTextSection(const Triple &TT)
bool isHsaAbiVersion3(const MCSubtargetInfo *STI)
int getMTBUFElements(unsigned Opc)
static int encodeCustomOperandVal(const CustomOperandVal &Op, int64_t InputVal)
std::pair< int, int > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< int, int > Default, bool OnlyFirstRequired)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const MCSubtargetInfo *STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For FLAT segment the offset must be positive; MSB is ignored and forced to zero.
unsigned mc2PseudoReg(unsigned Reg)
Convert hardware register Reg to a pseudo register.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
CanBeVOPD getCanBeVOPD(unsigned Opc)
static int getOprIdx(std::function< bool(const CustomOperand< T > &)> Test, const CustomOperand< T > OpInfo[], int OpInfoSize, T Context)
bool hasPackedD16(const MCSubtargetInfo &STI)
unsigned getCodeObjectVersion(const Module &M)
bool isGFX940(const MCSubtargetInfo &STI)
bool isEntryFunctionCC(CallingConv::ID CC)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
bool isGroupSegment(const GlobalValue *GV)
IsaVersion getIsaVersion(StringRef GPU)
bool getMTBUFHasSoffset(unsigned Opc)
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
bool hasXNACK(const MCSubtargetInfo &STI)
unsigned getVOPDOpcode(unsigned Opc)
unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt)
Encodes Vmcnt, Expcnt and Lgkmcnt into Waitcnt for given isa Version.
bool isVOPC64DPP(unsigned Opc)
int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements)
bool isCompute(CallingConv::ID cc)
bool getMAIIsGFX940XDL(unsigned Opc)
bool isSI(const MCSubtargetInfo &STI)
bool isReadOnlySegment(const GlobalValue *GV)
bool isArgPassedInSGPR(const Argument *A)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
int getMUBUFBaseOpcode(unsigned Opc)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
bool getVOP3IsSingle(unsigned Opc)
bool isGFX9(const MCSubtargetInfo &STI)
bool getVOP1IsSingle(unsigned Opc)
static bool isDwordAligned(uint64_t ByteOffset)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool getHasColorExport(const Function &F)
int getMTBUFBaseOpcode(unsigned Opc)
bool isGFX90A(const MCSubtargetInfo &STI)
unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion)
bool hasSRAMECC(const MCSubtargetInfo &STI)
bool getHasDepthExport(const Function &F)
static bool isValidOpr(int Idx, const CustomOperand< T > OpInfo[], int OpInfoSize, T Context)
bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI)
bool getMUBUFHasVAddr(unsigned Opc)
bool isTrue16Inst(unsigned Opc)
std::pair< unsigned, unsigned > getVOPDComponents(unsigned VOPDOpcode)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
unsigned getInitialPSInputAddr(const Function &F)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this an AMDGPU specific source operand? These include registers, inline constants,...
unsigned getAmdhsaCodeObjectVersion()
unsigned getVmcntBitMask(const IsaVersion &Version)
bool isNotGFX10Plus(const MCSubtargetInfo &STI)
bool hasMAIInsts(const MCSubtargetInfo &STI)
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
bool isKernelCC(const Function *Func)
bool isGFX8Plus(const MCSubtargetInfo &STI)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
unsigned getLgkmcntBitMask(const IsaVersion &Version)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer)
bool isSGPR(unsigned Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasGFX10_3Insts(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements)
unsigned getExpcntBitMask(const IsaVersion &Version)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
bool getMUBUFHasSoffset(unsigned Opc)
bool isNotGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isInlineValue(unsigned Reg)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isShader(CallingConv::ID cc)
unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion)
static unsigned getDefaultCustomOperandEncoding(const CustomOperandVal *Opr, int Size, const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size, unsigned Code, int &Idx, StringRef &Name, unsigned &Val, bool &IsDefault, const MCSubtargetInfo &STI)
bool isGlobalSegment(const GlobalValue *GV)
@ OPERAND_KIMM_LAST
Definition: SIDefines.h:226
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition: SIDefines.h:193
@ OPERAND_REG_INLINE_C_LAST
Definition: SIDefines.h:217
@ OPERAND_REG_IMM_V2FP16
Definition: SIDefines.h:175
@ OPERAND_REG_INLINE_C_FP64
Definition: SIDefines.h:186
@ OPERAND_REG_IMM_V2INT16
Definition: SIDefines.h:176
@ OPERAND_REG_INLINE_AC_V2FP16
Definition: SIDefines.h:203
@ OPERAND_SRC_FIRST
Definition: SIDefines.h:222
@ OPERAND_KIMM_FIRST
Definition: SIDefines.h:225
@ OPERAND_REG_IMM_FP16
Definition: SIDefines.h:172
@ OPERAND_REG_IMM_FP64
Definition: SIDefines.h:171
@ OPERAND_REG_INLINE_C_V2FP16
Definition: SIDefines.h:188
@ OPERAND_REG_INLINE_AC_V2INT16
Definition: SIDefines.h:202
@ OPERAND_REG_INLINE_AC_FP16
Definition: SIDefines.h:199
@ OPERAND_REG_INLINE_AC_FP32
Definition: SIDefines.h:200
@ OPERAND_REG_IMM_FP32
Definition: SIDefines.h:170
@ OPERAND_REG_INLINE_C_FIRST
Definition: SIDefines.h:216
@ OPERAND_REG_INLINE_C_FP32
Definition: SIDefines.h:185
@ OPERAND_REG_INLINE_C_V2INT16
Definition: SIDefines.h:187
@ OPERAND_REG_IMM_V2FP32
Definition: SIDefines.h:178
@ OPERAND_REG_INLINE_AC_FP64
Definition: SIDefines.h:201
@ OPERAND_REG_INLINE_C_FP16
Definition: SIDefines.h:184
@ OPERAND_REG_INLINE_C_V2FP32
Definition: SIDefines.h:190
@ OPERAND_REG_IMM_FP32_DEFERRED
Definition: SIDefines.h:174
@ OPERAND_SRC_LAST
Definition: SIDefines.h:223
@ OPERAND_REG_IMM_FP16_DEFERRED
Definition: SIDefines.h:173
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
std::optional< uint8_t > getHsaAbiVersion(const MCSubtargetInfo *STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
int getMCOpcode(uint16_t Opcode, unsigned Gen)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
bool getMUBUFIsBufferInv(unsigned Opc)
static int encodeCustomOperand(const CustomOperandVal *Opr, int Size, const StringRef Name, int64_t InputVal, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI)
bool isMAC(unsigned Opc)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI)
bool getVOP2IsSingle(unsigned Opc)
bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi)
bool getMAIIsDGEMM(unsigned Opc)
Returns true if MAI operation is a double precision GEMM.
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const int OPR_ID_UNKNOWN
unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion)
int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels)
bool isModuleEntryFunctionCC(CallingConv::ID CC)
bool getMTBUFHasVAddr(unsigned Opc)
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion)
bool isGFX9_GFX10(const MCSubtargetInfo &STI)
int getMUBUFElements(unsigned Opc)
const GcnBufferFormatInfo * getGcnBufferFormatInfo(uint8_t BitsPerComp, uint8_t NumComponents, uint8_t NumFormat, const MCSubtargetInfo &STI)
bool isGraphics(CallingConv::ID cc)
unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc)
bool isPermlane16(unsigned Opc)
bool getMUBUFHasSrsrc(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:119
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:194
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
Definition: CallingConv.h:185
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
Definition: CallingConv.h:197
@ AMDGPU_Gfx
Used for AMD graphics targets.
Definition: CallingConv.h:229
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:203
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:188
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:191
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:141
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
Definition: CallingConv.h:215
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
Definition: CallingConv.h:210
@ ELFABIVERSION_AMDGPU_HSA_V4
Definition: ELF.h:376
@ ELFABIVERSION_AMDGPU_HSA_V5
Definition: ELF.h:377
@ ELFABIVERSION_AMDGPU_HSA_V3
Definition: ELF.h:375
@ ELFABIVERSION_AMDGPU_HSA_V2
Definition: ELF.h:374
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:522
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:292
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:533
@ AlwaysUniform
The result values are always uniform.
@ Default
The result values are uniform if and only if all operands are uniform.
#define N
AMD Kernel Code Object (amd_kernel_code_t).
bool(* Cond)(T Context)
Instruction set architecture version.
Definition: TargetParser.h:112
Represents the counter values to wait for in an s_waitcnt instruction.