29#include "llvm/IR/IntrinsicsAMDGPU.h"
32#define DEBUG_TYPE "amdgpu-isel"
35using namespace MIPatternMatch;
37#define GET_GLOBALISEL_IMPL
38#define AMDGPUSubtarget GCNSubtarget
39#include "AMDGPUGenGlobalISel.inc"
40#undef GET_GLOBALISEL_IMPL
46 :
TII(*STI.getInstrInfo()),
TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM),
49#include
"AMDGPUGenGlobalISel.inc"
52#include
"AMDGPUGenGlobalISel.inc"
71 return Def->getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS
72 ? Def->getOperand(1).getReg()
76bool AMDGPUInstructionSelector::isVCC(
Register Reg,
82 auto &RegClassOrBank =
MRI.getRegClassOrRegBank(Reg);
84 dyn_cast<const TargetRegisterClass *>(RegClassOrBank);
86 const LLT Ty =
MRI.getType(Reg);
90 return MRI.getVRegDef(Reg)->getOpcode() != AMDGPU::G_TRUNC &&
94 const RegisterBank *RB = cast<const RegisterBank *>(RegClassOrBank);
95 return RB->
getID() == AMDGPU::VCCRegBankID;
98bool AMDGPUInstructionSelector::constrainCopyLikeIntrin(
MachineInstr &
MI,
99 unsigned NewOpc)
const {
100 MI.setDesc(TII.get(NewOpc));
115 if (!DstRC || DstRC != SrcRC)
122bool AMDGPUInstructionSelector::selectCOPY(
MachineInstr &
I)
const {
125 I.setDesc(TII.get(TargetOpcode::COPY));
132 if (isVCC(DstReg, *MRI)) {
133 if (SrcReg == AMDGPU::SCC) {
141 if (!isVCC(SrcReg, *MRI)) {
149 std::optional<ValueAndVReg> ConstVal =
153 STI.
isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
155 .
addImm(ConstVal->Value.getBoolValue() ? -1 : 0);
157 Register MaskedReg =
MRI->createVirtualRegister(SrcRC);
165 const int64_t NoMods = 0;
166 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_AND_B16_t16_e64), MaskedReg)
172 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CMP_NE_U16_t16_e64), DstReg)
180 unsigned AndOpc = IsSGPR ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32;
185 And.setOperandDead(3);
187 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
193 if (!
MRI->getRegClassOrNull(SrcReg))
194 MRI->setRegClass(SrcReg, SrcRC);
208 if (MO.getReg().isPhysical())
220bool AMDGPUInstructionSelector::selectPHI(
MachineInstr &
I)
const {
221 const Register DefReg =
I.getOperand(0).getReg();
222 const LLT DefTy =
MRI->getType(DefReg);
234 MRI->getRegClassOrRegBank(DefReg);
237 dyn_cast<const TargetRegisterClass *>(RegClassOrBank);
244 const RegisterBank &RB = *cast<const RegisterBank *>(RegClassOrBank);
253 I.setDesc(TII.get(TargetOpcode::PHI));
260 unsigned SubIdx)
const {
264 Register DstReg =
MRI->createVirtualRegister(&SubRC);
267 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.
getSubReg(), SubIdx);
269 BuildMI(*BB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
270 .
addReg(Reg, 0, ComposedSubIdx);
295 return Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
297 return Is64 ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32;
299 return Is64 ? AMDGPU::S_XOR_B64 : AMDGPU::S_XOR_B32;
305bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(
MachineInstr &
I)
const {
306 Register DstReg =
I.getOperand(0).getReg();
310 if (DstRB->
getID() != AMDGPU::SGPRRegBankID &&
311 DstRB->
getID() != AMDGPU::VCCRegBankID)
314 bool Is64 =
Size > 32 || (DstRB->
getID() == AMDGPU::VCCRegBankID &&
326bool AMDGPUInstructionSelector::selectG_ADD_SUB(
MachineInstr &
I)
const {
329 Register DstReg =
I.getOperand(0).getReg();
331 LLT Ty =
MRI->getType(DstReg);
337 const bool IsSALU = DstRB->
getID() == AMDGPU::SGPRRegBankID;
338 const bool Sub =
I.getOpcode() == TargetOpcode::G_SUB;
342 const unsigned Opc = Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
345 .
add(
I.getOperand(1))
346 .
add(
I.getOperand(2))
353 const unsigned Opc = Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64;
354 I.setDesc(TII.get(Opc));
360 const unsigned Opc = Sub ? AMDGPU::V_SUB_CO_U32_e64 : AMDGPU::V_ADD_CO_U32_e64;
366 .
add(
I.getOperand(1))
367 .
add(
I.getOperand(2))
373 assert(!Sub &&
"illegal sub should not reach here");
376 = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass;
378 = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass;
380 MachineOperand Lo1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub0));
381 MachineOperand Lo2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub0));
382 MachineOperand Hi1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub1));
383 MachineOperand Hi2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub1));
385 Register DstLo =
MRI->createVirtualRegister(&HalfRC);
386 Register DstHi =
MRI->createVirtualRegister(&HalfRC);
389 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
392 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
398 Register CarryReg =
MRI->createVirtualRegister(CarryRC);
399 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_ADD_CO_U32_e64), DstLo)
415 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
429bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE(
434 Register Dst0Reg =
I.getOperand(0).getReg();
435 Register Dst1Reg =
I.getOperand(1).getReg();
436 const bool IsAdd =
I.getOpcode() == AMDGPU::G_UADDO ||
437 I.getOpcode() == AMDGPU::G_UADDE;
438 const bool HasCarryIn =
I.getOpcode() == AMDGPU::G_UADDE ||
439 I.getOpcode() == AMDGPU::G_USUBE;
441 if (isVCC(Dst1Reg, *MRI)) {
442 unsigned NoCarryOpc =
443 IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
444 unsigned CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
445 I.setDesc(TII.get(HasCarryIn ? CarryOpc : NoCarryOpc));
451 Register Src0Reg =
I.getOperand(2).getReg();
452 Register Src1Reg =
I.getOperand(3).getReg();
455 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
456 .
addReg(
I.getOperand(4).getReg());
459 unsigned NoCarryOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
460 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
462 auto CarryInst =
BuildMI(*BB, &
I,
DL, TII.get(HasCarryIn ? CarryOpc : NoCarryOpc), Dst0Reg)
463 .
add(
I.getOperand(2))
464 .
add(
I.getOperand(3));
466 if (
MRI->use_nodbg_empty(Dst1Reg)) {
469 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), Dst1Reg)
471 if (!
MRI->getRegClassOrNull(Dst1Reg))
472 MRI->setRegClass(Dst1Reg, &AMDGPU::SReg_32RegClass);
482 AMDGPU::SReg_32RegClass, *MRI))
489bool AMDGPUInstructionSelector::selectG_AMDGPU_MAD_64_32(
493 const bool IsUnsigned =
I.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;
497 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_gfx11_e64
498 : AMDGPU::V_MAD_I64_I32_gfx11_e64;
500 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_e64 : AMDGPU::V_MAD_I64_I32_e64;
501 I.setDesc(TII.get(Opc));
503 I.addImplicitDefUseOperands(*
MF);
508bool AMDGPUInstructionSelector::selectG_EXTRACT(
MachineInstr &
I)
const {
510 Register DstReg =
I.getOperand(0).getReg();
511 Register SrcReg =
I.getOperand(1).getReg();
512 LLT DstTy =
MRI->getType(DstReg);
513 LLT SrcTy =
MRI->getType(SrcReg);
518 unsigned Offset =
I.getOperand(2).getImm();
519 if (
Offset % 32 != 0 || DstSize > 128)
539 SrcRC = TRI.getSubClassWithSubReg(SrcRC,
SubReg);
544 *SrcRC,
I.getOperand(1));
546 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::COPY), DstReg)
553bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(
MachineInstr &
MI)
const {
556 LLT DstTy =
MRI->getType(DstReg);
557 LLT SrcTy =
MRI->getType(
MI.getOperand(1).getReg());
573 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg);
574 for (
int I = 0, E =
MI.getNumOperands() - 1;
I != E; ++
I) {
588 MI.eraseFromParent();
592bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(
MachineInstr &
MI)
const {
594 const int NumDst =
MI.getNumOperands() - 1;
600 LLT DstTy =
MRI->getType(DstReg0);
601 LLT SrcTy =
MRI->getType(SrcReg);
617 for (
int I = 0, E = NumDst;
I != E; ++
I) {
619 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::COPY), Dst.getReg())
620 .
addReg(SrcReg, 0, SubRegs[
I]);
623 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegs[
I]);
633 MI.eraseFromParent();
637bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR(
MachineInstr &
MI)
const {
638 assert(
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC ||
639 MI.getOpcode() == AMDGPU::G_BUILD_VECTOR);
643 LLT SrcTy =
MRI->getType(Src0);
647 if (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR && SrcSize >= 32) {
648 return selectG_MERGE_VALUES(
MI);
655 (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC &&
660 if (DstBank->
getID() == AMDGPU::AGPRRegBankID)
663 assert(DstBank->
getID() == AMDGPU::SGPRRegBankID ||
664 DstBank->
getID() == AMDGPU::VGPRRegBankID);
665 const bool IsVector = DstBank->
getID() == AMDGPU::VGPRRegBankID;
678 const int64_t K0 = ConstSrc0->Value.getSExtValue();
679 const int64_t K1 = ConstSrc1->Value.getSExtValue();
687 MI.eraseFromParent();
693 MI.eraseFromParent();
705 if (Src1Def->
getOpcode() == AMDGPU::G_IMPLICIT_DEF) {
706 MI.setDesc(TII.get(AMDGPU::COPY));
709 IsVector ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
716 Register TmpReg =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
717 auto MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_AND_B32_e32), TmpReg)
723 MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_LSHL_OR_B32_e64), Dst)
730 MI.eraseFromParent();
755 unsigned Opc = AMDGPU::S_PACK_LL_B32_B16;
756 if (Shift0 && Shift1) {
757 Opc = AMDGPU::S_PACK_HH_B32_B16;
758 MI.getOperand(1).setReg(ShiftSrc0);
759 MI.getOperand(2).setReg(ShiftSrc1);
761 Opc = AMDGPU::S_PACK_LH_B32_B16;
762 MI.getOperand(2).setReg(ShiftSrc1);
766 if (ConstSrc1 && ConstSrc1->Value == 0) {
768 auto MIB =
BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_LSHR_B32), Dst)
773 MI.eraseFromParent();
777 Opc = AMDGPU::S_PACK_HL_B32_B16;
778 MI.getOperand(1).setReg(ShiftSrc0);
782 MI.setDesc(TII.get(Opc));
786bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(
MachineInstr &
I)
const {
792 if ((!RC && !
MRI->getRegBankOrNull(MO.
getReg())) ||
794 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
801bool AMDGPUInstructionSelector::selectG_INSERT(
MachineInstr &
I)
const {
804 Register DstReg =
I.getOperand(0).getReg();
805 Register Src0Reg =
I.getOperand(1).getReg();
806 Register Src1Reg =
I.getOperand(2).getReg();
807 LLT Src1Ty =
MRI->getType(Src1Reg);
809 unsigned DstSize =
MRI->getType(DstReg).getSizeInBits();
812 int64_t
Offset =
I.getOperand(3).getImm();
815 if (
Offset % 32 != 0 || InsSize % 32 != 0)
823 if (
SubReg == AMDGPU::NoSubRegister)
841 Src0RC = TRI.getSubClassWithSubReg(Src0RC,
SubReg);
842 if (!Src0RC || !Src1RC)
851 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::INSERT_SUBREG), DstReg)
860bool AMDGPUInstructionSelector::selectG_SBFX_UBFX(
MachineInstr &
MI)
const {
863 Register OffsetReg =
MI.getOperand(2).getReg();
864 Register WidthReg =
MI.getOperand(3).getReg();
867 "scalar BFX instructions are expanded in regbankselect");
868 assert(
MRI->getType(
MI.getOperand(0).getReg()).getSizeInBits() == 32 &&
869 "64-bit vector BFX instructions are expanded in regbankselect");
874 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SBFX;
875 unsigned Opc = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
880 MI.eraseFromParent();
884bool AMDGPUInstructionSelector::selectInterpP1F16(
MachineInstr &
MI)
const {
903 Register InterpMov =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
909 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_INTERP_MOV_F32), InterpMov)
925 MI.eraseFromParent();
934bool AMDGPUInstructionSelector::selectWritelane(
MachineInstr &
MI)
const {
943 Register LaneSelect =
MI.getOperand(3).getReg();
946 auto MIB =
BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_WRITELANE_B32), VDst);
948 std::optional<ValueAndVReg> ConstSelect =
954 MIB.
addImm(ConstSelect->Value.getSExtValue() &
957 std::optional<ValueAndVReg> ConstVal =
964 MIB.
addImm(ConstVal->Value.getSExtValue());
974 BuildMI(*
MBB, *MIB,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
982 MI.eraseFromParent();
988bool AMDGPUInstructionSelector::selectDivScale(
MachineInstr &
MI)
const {
992 LLT Ty =
MRI->getType(Dst0);
995 Opc = AMDGPU::V_DIV_SCALE_F32_e64;
997 Opc = AMDGPU::V_DIV_SCALE_F64_e64;
1008 unsigned ChooseDenom =
MI.getOperand(5).getImm();
1010 Register Src0 = ChooseDenom != 0 ? Numer : Denom;
1023 MI.eraseFromParent();
1027bool AMDGPUInstructionSelector::selectG_INTRINSIC(
MachineInstr &
I)
const {
1028 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(
I).getIntrinsicID();
1029 switch (IntrinsicID) {
1030 case Intrinsic::amdgcn_if_break: {
1035 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))
1036 .
add(
I.getOperand(0))
1037 .
add(
I.getOperand(2))
1038 .
add(
I.getOperand(3));
1040 Register DstReg =
I.getOperand(0).getReg();
1041 Register Src0Reg =
I.getOperand(2).getReg();
1042 Register Src1Reg =
I.getOperand(3).getReg();
1044 I.eraseFromParent();
1046 for (
Register Reg : { DstReg, Src0Reg, Src1Reg })
1051 case Intrinsic::amdgcn_interp_p1_f16:
1052 return selectInterpP1F16(
I);
1053 case Intrinsic::amdgcn_wqm:
1054 return constrainCopyLikeIntrin(
I, AMDGPU::WQM);
1055 case Intrinsic::amdgcn_softwqm:
1056 return constrainCopyLikeIntrin(
I, AMDGPU::SOFT_WQM);
1057 case Intrinsic::amdgcn_strict_wwm:
1058 case Intrinsic::amdgcn_wwm:
1059 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WWM);
1060 case Intrinsic::amdgcn_strict_wqm:
1061 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WQM);
1062 case Intrinsic::amdgcn_writelane:
1063 return selectWritelane(
I);
1064 case Intrinsic::amdgcn_div_scale:
1065 return selectDivScale(
I);
1066 case Intrinsic::amdgcn_icmp:
1067 case Intrinsic::amdgcn_fcmp:
1070 return selectIntrinsicCmp(
I);
1071 case Intrinsic::amdgcn_ballot:
1072 return selectBallot(
I);
1073 case Intrinsic::amdgcn_reloc_constant:
1074 return selectRelocConstant(
I);
1075 case Intrinsic::amdgcn_groupstaticsize:
1076 return selectGroupStaticSize(
I);
1077 case Intrinsic::returnaddress:
1078 return selectReturnAddress(
I);
1079 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
1080 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
1081 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
1082 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
1083 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
1084 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
1085 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
1086 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
1087 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
1088 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
1089 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
1090 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
1091 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
1092 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
1093 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:
1094 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:
1095 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:
1096 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:
1097 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:
1098 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:
1099 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:
1100 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:
1101 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:
1102 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:
1103 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:
1104 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
1105 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
1106 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8:
1107 return selectSMFMACIntrin(
I);
1108 case Intrinsic::amdgcn_permlane16_swap:
1109 case Intrinsic::amdgcn_permlane32_swap:
1110 return selectPermlaneSwapIntrin(
I, IntrinsicID);
1121 if (
Size == 16 && !ST.has16BitInsts())
1124 const auto Select = [&](
unsigned S16Opc,
unsigned TrueS16Opc,
1125 unsigned FakeS16Opc,
unsigned S32Opc,
1128 return ST.hasTrue16BitInsts()
1129 ? ST.useRealTrue16Insts() ? TrueS16Opc : FakeS16Opc
1140 return Select(AMDGPU::V_CMP_NE_U16_e64, AMDGPU::V_CMP_NE_U16_t16_e64,
1141 AMDGPU::V_CMP_NE_U16_fake16_e64, AMDGPU::V_CMP_NE_U32_e64,
1142 AMDGPU::V_CMP_NE_U64_e64);
1144 return Select(AMDGPU::V_CMP_EQ_U16_e64, AMDGPU::V_CMP_EQ_U16_t16_e64,
1145 AMDGPU::V_CMP_EQ_U16_fake16_e64, AMDGPU::V_CMP_EQ_U32_e64,
1146 AMDGPU::V_CMP_EQ_U64_e64);
1148 return Select(AMDGPU::V_CMP_GT_I16_e64, AMDGPU::V_CMP_GT_I16_t16_e64,
1149 AMDGPU::V_CMP_GT_I16_fake16_e64, AMDGPU::V_CMP_GT_I32_e64,
1150 AMDGPU::V_CMP_GT_I64_e64);
1152 return Select(AMDGPU::V_CMP_GE_I16_e64, AMDGPU::V_CMP_GE_I16_t16_e64,
1153 AMDGPU::V_CMP_GE_I16_fake16_e64, AMDGPU::V_CMP_GE_I32_e64,
1154 AMDGPU::V_CMP_GE_I64_e64);
1156 return Select(AMDGPU::V_CMP_LT_I16_e64, AMDGPU::V_CMP_LT_I16_t16_e64,
1157 AMDGPU::V_CMP_LT_I16_fake16_e64, AMDGPU::V_CMP_LT_I32_e64,
1158 AMDGPU::V_CMP_LT_I64_e64);
1160 return Select(AMDGPU::V_CMP_LE_I16_e64, AMDGPU::V_CMP_LE_I16_t16_e64,
1161 AMDGPU::V_CMP_LE_I16_fake16_e64, AMDGPU::V_CMP_LE_I32_e64,
1162 AMDGPU::V_CMP_LE_I64_e64);
1164 return Select(AMDGPU::V_CMP_GT_U16_e64, AMDGPU::V_CMP_GT_U16_t16_e64,
1165 AMDGPU::V_CMP_GT_U16_fake16_e64, AMDGPU::V_CMP_GT_U32_e64,
1166 AMDGPU::V_CMP_GT_U64_e64);
1168 return Select(AMDGPU::V_CMP_GE_U16_e64, AMDGPU::V_CMP_GE_U16_t16_e64,
1169 AMDGPU::V_CMP_GE_U16_fake16_e64, AMDGPU::V_CMP_GE_U32_e64,
1170 AMDGPU::V_CMP_GE_U64_e64);
1172 return Select(AMDGPU::V_CMP_LT_U16_e64, AMDGPU::V_CMP_LT_U16_t16_e64,
1173 AMDGPU::V_CMP_LT_U16_fake16_e64, AMDGPU::V_CMP_LT_U32_e64,
1174 AMDGPU::V_CMP_LT_U64_e64);
1176 return Select(AMDGPU::V_CMP_LE_U16_e64, AMDGPU::V_CMP_LE_U16_t16_e64,
1177 AMDGPU::V_CMP_LE_U16_fake16_e64, AMDGPU::V_CMP_LE_U32_e64,
1178 AMDGPU::V_CMP_LE_U64_e64);
1181 return Select(AMDGPU::V_CMP_EQ_F16_e64, AMDGPU::V_CMP_EQ_F16_t16_e64,
1182 AMDGPU::V_CMP_EQ_F16_fake16_e64, AMDGPU::V_CMP_EQ_F32_e64,
1183 AMDGPU::V_CMP_EQ_F64_e64);
1185 return Select(AMDGPU::V_CMP_GT_F16_e64, AMDGPU::V_CMP_GT_F16_t16_e64,
1186 AMDGPU::V_CMP_GT_F16_fake16_e64, AMDGPU::V_CMP_GT_F32_e64,
1187 AMDGPU::V_CMP_GT_F64_e64);
1189 return Select(AMDGPU::V_CMP_GE_F16_e64, AMDGPU::V_CMP_GE_F16_t16_e64,
1190 AMDGPU::V_CMP_GE_F16_fake16_e64, AMDGPU::V_CMP_GE_F32_e64,
1191 AMDGPU::V_CMP_GE_F64_e64);
1193 return Select(AMDGPU::V_CMP_LT_F16_e64, AMDGPU::V_CMP_LT_F16_t16_e64,
1194 AMDGPU::V_CMP_LT_F16_fake16_e64, AMDGPU::V_CMP_LT_F32_e64,
1195 AMDGPU::V_CMP_LT_F64_e64);
1197 return Select(AMDGPU::V_CMP_LE_F16_e64, AMDGPU::V_CMP_LE_F16_t16_e64,
1198 AMDGPU::V_CMP_LE_F16_fake16_e64, AMDGPU::V_CMP_LE_F32_e64,
1199 AMDGPU::V_CMP_LE_F64_e64);
1201 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1202 AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
1203 AMDGPU::V_CMP_NEQ_F64_e64);
1205 return Select(AMDGPU::V_CMP_O_F16_e64, AMDGPU::V_CMP_O_F16_t16_e64,
1206 AMDGPU::V_CMP_O_F16_fake16_e64, AMDGPU::V_CMP_O_F32_e64,
1207 AMDGPU::V_CMP_O_F64_e64);
1209 return Select(AMDGPU::V_CMP_U_F16_e64, AMDGPU::V_CMP_U_F16_t16_e64,
1210 AMDGPU::V_CMP_U_F16_fake16_e64, AMDGPU::V_CMP_U_F32_e64,
1211 AMDGPU::V_CMP_U_F64_e64);
1213 return Select(AMDGPU::V_CMP_NLG_F16_e64, AMDGPU::V_CMP_NLG_F16_t16_e64,
1214 AMDGPU::V_CMP_NLG_F16_fake16_e64, AMDGPU::V_CMP_NLG_F32_e64,
1215 AMDGPU::V_CMP_NLG_F64_e64);
1217 return Select(AMDGPU::V_CMP_NLE_F16_e64, AMDGPU::V_CMP_NLE_F16_t16_e64,
1218 AMDGPU::V_CMP_NLE_F16_fake16_e64, AMDGPU::V_CMP_NLE_F32_e64,
1219 AMDGPU::V_CMP_NLE_F64_e64);
1221 return Select(AMDGPU::V_CMP_NLT_F16_e64, AMDGPU::V_CMP_NLT_F16_t16_e64,
1222 AMDGPU::V_CMP_NLT_F16_fake16_e64, AMDGPU::V_CMP_NLT_F32_e64,
1223 AMDGPU::V_CMP_NLT_F64_e64);
1225 return Select(AMDGPU::V_CMP_NGE_F16_e64, AMDGPU::V_CMP_NGE_F16_t16_e64,
1226 AMDGPU::V_CMP_NGE_F16_fake16_e64, AMDGPU::V_CMP_NGE_F32_e64,
1227 AMDGPU::V_CMP_NGE_F64_e64);
1229 return Select(AMDGPU::V_CMP_NGT_F16_e64, AMDGPU::V_CMP_NGT_F16_t16_e64,
1230 AMDGPU::V_CMP_NGT_F16_fake16_e64, AMDGPU::V_CMP_NGT_F32_e64,
1231 AMDGPU::V_CMP_NGT_F64_e64);
1233 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1234 AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
1235 AMDGPU::V_CMP_NEQ_F64_e64);
1237 return Select(AMDGPU::V_CMP_TRU_F16_e64, AMDGPU::V_CMP_TRU_F16_t16_e64,
1238 AMDGPU::V_CMP_TRU_F16_fake16_e64, AMDGPU::V_CMP_TRU_F32_e64,
1239 AMDGPU::V_CMP_TRU_F64_e64);
1241 return Select(AMDGPU::V_CMP_F_F16_e64, AMDGPU::V_CMP_F_F16_t16_e64,
1242 AMDGPU::V_CMP_F_F16_fake16_e64, AMDGPU::V_CMP_F_F32_e64,
1243 AMDGPU::V_CMP_F_F64_e64);
1248 unsigned Size)
const {
1255 return AMDGPU::S_CMP_LG_U64;
1257 return AMDGPU::S_CMP_EQ_U64;
1266 return AMDGPU::S_CMP_LG_U32;
1268 return AMDGPU::S_CMP_EQ_U32;
1270 return AMDGPU::S_CMP_GT_I32;
1272 return AMDGPU::S_CMP_GE_I32;
1274 return AMDGPU::S_CMP_LT_I32;
1276 return AMDGPU::S_CMP_LE_I32;
1278 return AMDGPU::S_CMP_GT_U32;
1280 return AMDGPU::S_CMP_GE_U32;
1282 return AMDGPU::S_CMP_LT_U32;
1284 return AMDGPU::S_CMP_LE_U32;
1286 return AMDGPU::S_CMP_EQ_F32;
1288 return AMDGPU::S_CMP_GT_F32;
1290 return AMDGPU::S_CMP_GE_F32;
1292 return AMDGPU::S_CMP_LT_F32;
1294 return AMDGPU::S_CMP_LE_F32;
1296 return AMDGPU::S_CMP_LG_F32;
1298 return AMDGPU::S_CMP_O_F32;
1300 return AMDGPU::S_CMP_U_F32;
1302 return AMDGPU::S_CMP_NLG_F32;
1304 return AMDGPU::S_CMP_NLE_F32;
1306 return AMDGPU::S_CMP_NLT_F32;
1308 return AMDGPU::S_CMP_NGE_F32;
1310 return AMDGPU::S_CMP_NGT_F32;
1312 return AMDGPU::S_CMP_NEQ_F32;
1324 return AMDGPU::S_CMP_EQ_F16;
1326 return AMDGPU::S_CMP_GT_F16;
1328 return AMDGPU::S_CMP_GE_F16;
1330 return AMDGPU::S_CMP_LT_F16;
1332 return AMDGPU::S_CMP_LE_F16;
1334 return AMDGPU::S_CMP_LG_F16;
1336 return AMDGPU::S_CMP_O_F16;
1338 return AMDGPU::S_CMP_U_F16;
1340 return AMDGPU::S_CMP_NLG_F16;
1342 return AMDGPU::S_CMP_NLE_F16;
1344 return AMDGPU::S_CMP_NLT_F16;
1346 return AMDGPU::S_CMP_NGE_F16;
1348 return AMDGPU::S_CMP_NGT_F16;
1350 return AMDGPU::S_CMP_NEQ_F16;
1359bool AMDGPUInstructionSelector::selectG_ICMP_or_FCMP(
MachineInstr &
I)
const {
1364 Register SrcReg =
I.getOperand(2).getReg();
1369 Register CCReg =
I.getOperand(0).getReg();
1370 if (!isVCC(CCReg, *MRI)) {
1371 int Opcode = getS_CMPOpcode(Pred,
Size);
1375 .
add(
I.getOperand(2))
1376 .
add(
I.getOperand(3));
1377 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CCReg)
1382 I.eraseFromParent();
1386 if (
I.getOpcode() == AMDGPU::G_FCMP)
1394 I.getOperand(0).getReg())
1395 .
add(
I.getOperand(2))
1396 .
add(
I.getOperand(3));
1400 I.eraseFromParent();
1404bool AMDGPUInstructionSelector::selectIntrinsicCmp(
MachineInstr &
I)
const {
1405 Register Dst =
I.getOperand(0).getReg();
1406 if (isVCC(Dst, *MRI))
1409 LLT DstTy =
MRI->getType(Dst);
1415 Register SrcReg =
I.getOperand(2).getReg();
1424 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), Dst);
1425 I.eraseFromParent();
1436 auto [Src0, Src0Mods] = selectVOP3ModsImpl(
LHS.getReg());
1437 auto [Src1, Src1Mods] = selectVOP3ModsImpl(
RHS.getReg());
1439 copyToVGPRIfSrcFolded(Src0, Src0Mods, LHS, &
I,
true);
1441 copyToVGPRIfSrcFolded(Src1, Src1Mods, RHS, &
I,
true);
1442 SelectedMI =
BuildMI(*BB, &
I,
DL, TII.get(Opcode), Dst);
1444 SelectedMI.
addImm(Src0Mods);
1445 SelectedMI.
addReg(Src0Reg);
1447 SelectedMI.
addImm(Src1Mods);
1448 SelectedMI.
addReg(Src1Reg);
1458 I.eraseFromParent();
1469 if (
MI->getParent() !=
MBB)
1473 if (
MI->getOpcode() == AMDGPU::COPY) {
1474 auto DstRB =
MRI.getRegBankOrNull(
MI->getOperand(0).getReg());
1475 auto SrcRB =
MRI.getRegBankOrNull(
MI->getOperand(1).getReg());
1476 if (DstRB && SrcRB && DstRB->
getID() == AMDGPU::VCCRegBankID &&
1477 SrcRB->getID() == AMDGPU::SGPRRegBankID)
1482 if (isa<GAnyCmp>(
MI))
1494bool AMDGPUInstructionSelector::selectBallot(
MachineInstr &
I)
const {
1497 Register DstReg =
I.getOperand(0).getReg();
1498 Register SrcReg =
I.getOperand(2).getReg();
1499 const unsigned BallotSize =
MRI->getType(DstReg).getSizeInBits();
1504 if (BallotSize != WaveSize && (BallotSize != 64 || WaveSize != 32))
1507 std::optional<ValueAndVReg> Arg =
1512 if (BallotSize != WaveSize) {
1517 const int64_t
Value = Arg->
Value.getZExtValue();
1520 unsigned Opcode = WaveSize == 64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
1537 unsigned AndOpc = WaveSize == 64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
1548 if (BallotSize != WaveSize) {
1549 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1551 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
1558 I.eraseFromParent();
1562bool AMDGPUInstructionSelector::selectRelocConstant(
MachineInstr &
I)
const {
1563 Register DstReg =
I.getOperand(0).getReg();
1569 const bool IsVALU = DstBank->
getID() == AMDGPU::VGPRRegBankID;
1574 auto *RelocSymbol = cast<GlobalVariable>(
1579 TII.get(IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32), DstReg)
1582 I.eraseFromParent();
1586bool AMDGPUInstructionSelector::selectGroupStaticSize(
MachineInstr &
I)
const {
1589 Register DstReg =
I.getOperand(0).getReg();
1591 unsigned Mov = DstRB->
getID() == AMDGPU::SGPRRegBankID ?
1592 AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1609 I.eraseFromParent();
1613bool AMDGPUInstructionSelector::selectReturnAddress(
MachineInstr &
I)
const {
1620 unsigned Depth =
I.getOperand(2).getImm();
1633 I.eraseFromParent();
1644 AMDGPU::SReg_64RegClass,
DL);
1647 I.eraseFromParent();
1651bool AMDGPUInstructionSelector::selectEndCfIntrinsic(
MachineInstr &
MI)
const {
1655 BuildMI(*BB, &
MI,
MI.getDebugLoc(), TII.get(AMDGPU::SI_END_CF))
1656 .
add(
MI.getOperand(1));
1659 MI.eraseFromParent();
1661 if (!
MRI->getRegClassOrNull(Reg))
1666bool AMDGPUInstructionSelector::selectDSOrderedIntrinsic(
1672 unsigned IndexOperand =
MI.getOperand(7).getImm();
1673 bool WaveRelease =
MI.getOperand(8).getImm() != 0;
1674 bool WaveDone =
MI.getOperand(9).getImm() != 0;
1676 if (WaveDone && !WaveRelease)
1679 unsigned OrderedCountIndex = IndexOperand & 0x3f;
1680 IndexOperand &= ~0x3f;
1681 unsigned CountDw = 0;
1684 CountDw = (IndexOperand >> 24) & 0xf;
1685 IndexOperand &= ~(0xf << 24);
1687 if (CountDw < 1 || CountDw > 4) {
1689 "ds_ordered_count: dword count must be between 1 and 4");
1696 unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1;
1699 unsigned Offset0 = OrderedCountIndex << 2;
1700 unsigned Offset1 = WaveRelease | (WaveDone << 1) | (
Instruction << 4);
1703 Offset1 |= (CountDw - 1) << 6;
1706 Offset1 |= ShaderType << 2;
1708 unsigned Offset = Offset0 | (Offset1 << 8);
1717 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::DS_ORDERED_COUNT), DstReg)
1726 MI.eraseFromParent();
1732 case Intrinsic::amdgcn_ds_gws_init:
1733 return AMDGPU::DS_GWS_INIT;
1734 case Intrinsic::amdgcn_ds_gws_barrier:
1735 return AMDGPU::DS_GWS_BARRIER;
1736 case Intrinsic::amdgcn_ds_gws_sema_v:
1737 return AMDGPU::DS_GWS_SEMA_V;
1738 case Intrinsic::amdgcn_ds_gws_sema_br:
1739 return AMDGPU::DS_GWS_SEMA_BR;
1740 case Intrinsic::amdgcn_ds_gws_sema_p:
1741 return AMDGPU::DS_GWS_SEMA_P;
1742 case Intrinsic::amdgcn_ds_gws_sema_release_all:
1743 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
1749bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(
MachineInstr &
MI,
1751 if (!STI.
hasGWS() || (IID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
1756 const bool HasVSrc =
MI.getNumOperands() == 3;
1757 assert(HasVSrc ||
MI.getNumOperands() == 2);
1759 Register BaseOffset =
MI.getOperand(HasVSrc ? 2 : 1).getReg();
1761 if (OffsetRB->
getID() != AMDGPU::SGPRRegBankID)
1775 if (OffsetDef->
getOpcode() == AMDGPU::V_READFIRSTLANE_B32) {
1776 Readfirstlane = OffsetDef;
1781 if (OffsetDef->
getOpcode() == AMDGPU::G_CONSTANT) {
1791 std::tie(BaseOffset, ImmOffset) =
1794 if (Readfirstlane) {
1804 AMDGPU::SReg_32RegClass, *MRI))
1808 Register M0Base =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1836 MI.eraseFromParent();
1840bool AMDGPUInstructionSelector::selectDSAppendConsume(
MachineInstr &
MI,
1841 bool IsAppend)
const {
1842 Register PtrBase =
MI.getOperand(2).getReg();
1843 LLT PtrTy =
MRI->getType(PtrBase);
1847 std::tie(PtrBase,
Offset) = selectDS1Addr1OffsetImpl(
MI.getOperand(2));
1850 if (!isDSOffsetLegal(PtrBase,
Offset)) {
1851 PtrBase =
MI.getOperand(2).getReg();
1857 const unsigned Opc = IsAppend ? AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
1868 MI.eraseFromParent();
1872bool AMDGPUInstructionSelector::selectInitWholeWave(
MachineInstr &
MI)
const {
1880bool AMDGPUInstructionSelector::selectSBarrier(
MachineInstr &
MI)
const {
1887 if (IntrinsicID == Intrinsic::amdgcn_s_barrier ||
1888 IntrinsicID == Intrinsic::amdgcn_s_barrier_wait) {
1893 MI.eraseFromParent();
1898 if (STI.
hasSplitBarriers() && IntrinsicID == Intrinsic::amdgcn_s_barrier) {
1906 MI.eraseFromParent();
1918 TFE = (TexFailCtrl & 0x1) ?
true :
false;
1920 LWE = (TexFailCtrl & 0x2) ?
true :
false;
1923 return TexFailCtrl == 0;
1926bool AMDGPUInstructionSelector::selectImageIntrinsic(
1935 unsigned IntrOpcode =
Intr->BaseOpcode;
1940 const unsigned ArgOffset =
MI.getNumExplicitDefs() + 1;
1944 int NumVDataDwords = -1;
1945 bool IsD16 =
MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16 ||
1946 MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16;
1952 Unorm =
MI.getOperand(ArgOffset +
Intr->UnormIndex).getImm() != 0;
1956 bool IsTexFail =
false;
1958 TFE, LWE, IsTexFail))
1961 const int Flags =
MI.getOperand(ArgOffset +
Intr->NumArgs).getImm();
1962 const bool IsA16 = (
Flags & 1) != 0;
1963 const bool IsG16 = (
Flags & 2) != 0;
1966 if (IsA16 && !STI.
hasG16() && !IsG16)
1970 unsigned DMaskLanes = 0;
1972 if (BaseOpcode->
Atomic) {
1973 VDataOut =
MI.getOperand(0).getReg();
1974 VDataIn =
MI.getOperand(2).getReg();
1975 LLT Ty =
MRI->getType(VDataIn);
1978 const bool Is64Bit = BaseOpcode->
AtomicX2 ?
1983 assert(
MI.getOperand(3).getReg() == AMDGPU::NoRegister);
1985 DMask = Is64Bit ? 0xf : 0x3;
1986 NumVDataDwords = Is64Bit ? 4 : 2;
1988 DMask = Is64Bit ? 0x3 : 0x1;
1989 NumVDataDwords = Is64Bit ? 2 : 1;
1992 DMask =
MI.getOperand(ArgOffset +
Intr->DMaskIndex).getImm();
1995 if (BaseOpcode->
Store) {
1996 VDataIn =
MI.getOperand(1).getReg();
1997 VDataTy =
MRI->getType(VDataIn);
2002 VDataOut =
MI.getOperand(0).getReg();
2003 VDataTy =
MRI->getType(VDataOut);
2004 NumVDataDwords = DMaskLanes;
2007 NumVDataDwords = (DMaskLanes + 1) / 2;
2012 if (Subtarget->
hasG16() && IsG16) {
2016 IntrOpcode = G16MappingInfo->
G16;
2020 assert((!IsTexFail || DMaskLanes >= 1) &&
"should have legalized this");
2022 unsigned CPol =
MI.getOperand(ArgOffset +
Intr->CachePolicyIndex).getImm();
2029 int NumVAddrRegs = 0;
2030 int NumVAddrDwords = 0;
2031 for (
unsigned I =
Intr->VAddrStart; I < Intr->VAddrEnd;
I++) {
2034 if (!AddrOp.
isReg())
2042 NumVAddrDwords += (
MRI->getType(
Addr).getSizeInBits() + 31) / 32;
2049 NumVAddrRegs != 1 &&
2051 : NumVAddrDwords == NumVAddrRegs);
2052 if (UseNSA && !STI.hasFeature(AMDGPU::FeatureNSAEncoding)) {
2063 NumVDataDwords, NumVAddrDwords);
2064 }
else if (IsGFX11Plus) {
2066 UseNSA ? AMDGPU::MIMGEncGfx11NSA
2067 : AMDGPU::MIMGEncGfx11Default,
2068 NumVDataDwords, NumVAddrDwords);
2069 }
else if (IsGFX10Plus) {
2071 UseNSA ? AMDGPU::MIMGEncGfx10NSA
2072 : AMDGPU::MIMGEncGfx10Default,
2073 NumVDataDwords, NumVAddrDwords);
2077 NumVDataDwords, NumVAddrDwords);
2081 <<
"requested image instruction is not supported on this GPU\n");
2088 NumVDataDwords, NumVAddrDwords);
2091 NumVDataDwords, NumVAddrDwords);
2101 const bool Is64 =
MRI->getType(VDataOut).getSizeInBits() == 64;
2104 Is64 ? &AMDGPU::VReg_128RegClass : &AMDGPU::VReg_64RegClass);
2105 unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
2108 if (!
MRI->use_empty(VDataOut)) {
2121 for (
int I = 0;
I != NumVAddrRegs; ++
I) {
2123 if (
SrcOp.isReg()) {
2129 MIB.
addReg(
MI.getOperand(ArgOffset +
Intr->RsrcIndex).getReg());
2131 MIB.
addReg(
MI.getOperand(ArgOffset +
Intr->SampIndex).getReg());
2142 STI.hasFeature(AMDGPU::FeatureR128A16) ? -1 : 0);
2144 MIB.
addImm(IsA16 ? -1 : 0);
2158 MIB.
addImm(IsD16 ? -1 : 0);
2160 MI.eraseFromParent();
2168bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic(
2179 unsigned Offset =
MI.getOperand(6).getImm();
2181 auto MIB =
BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::DS_BVH_STACK_RTN_B32), Dst0)
2189 MI.eraseFromParent();
2193bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
2195 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(
I).getIntrinsicID();
2196 switch (IntrinsicID) {
2197 case Intrinsic::amdgcn_end_cf:
2198 return selectEndCfIntrinsic(
I);
2199 case Intrinsic::amdgcn_ds_ordered_add:
2200 case Intrinsic::amdgcn_ds_ordered_swap:
2201 return selectDSOrderedIntrinsic(
I, IntrinsicID);
2202 case Intrinsic::amdgcn_ds_gws_init:
2203 case Intrinsic::amdgcn_ds_gws_barrier:
2204 case Intrinsic::amdgcn_ds_gws_sema_v:
2205 case Intrinsic::amdgcn_ds_gws_sema_br:
2206 case Intrinsic::amdgcn_ds_gws_sema_p:
2207 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2208 return selectDSGWSIntrinsic(
I, IntrinsicID);
2209 case Intrinsic::amdgcn_ds_append:
2210 return selectDSAppendConsume(
I,
true);
2211 case Intrinsic::amdgcn_ds_consume:
2212 return selectDSAppendConsume(
I,
false);
2213 case Intrinsic::amdgcn_init_whole_wave:
2214 return selectInitWholeWave(
I);
2215 case Intrinsic::amdgcn_s_barrier:
2216 case Intrinsic::amdgcn_s_barrier_signal:
2217 case Intrinsic::amdgcn_s_barrier_wait:
2218 return selectSBarrier(
I);
2219 case Intrinsic::amdgcn_raw_buffer_load_lds:
2220 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
2221 case Intrinsic::amdgcn_struct_buffer_load_lds:
2222 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds:
2223 return selectBufferLoadLds(
I);
2224 case Intrinsic::amdgcn_global_load_lds:
2225 return selectGlobalLoadLds(
I);
2226 case Intrinsic::amdgcn_exp_compr:
2230 F,
"intrinsic not supported on subtarget",
I.getDebugLoc(),
DS_Error);
2231 F.getContext().diagnose(NoFpRet);
2235 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2236 return selectDSBvhStackIntrinsic(
I);
2237 case Intrinsic::amdgcn_s_barrier_init:
2238 case Intrinsic::amdgcn_s_barrier_signal_var:
2239 return selectNamedBarrierInit(
I, IntrinsicID);
2240 case Intrinsic::amdgcn_s_barrier_join:
2241 case Intrinsic::amdgcn_s_wakeup_barrier:
2242 case Intrinsic::amdgcn_s_get_named_barrier_state:
2243 return selectNamedBarrierInst(
I, IntrinsicID);
2244 case Intrinsic::amdgcn_s_get_barrier_state:
2245 return selectSGetBarrierState(
I, IntrinsicID);
2246 case Intrinsic::amdgcn_s_barrier_signal_isfirst:
2247 return selectSBarrierSignalIsfirst(
I, IntrinsicID);
2252bool AMDGPUInstructionSelector::selectG_SELECT(
MachineInstr &
I)
const {
2259 Register DstReg =
I.getOperand(0).getReg();
2264 if (!isVCC(CCReg, *MRI)) {
2265 unsigned SelectOpcode =
Size == 64 ? AMDGPU::S_CSELECT_B64 :
2266 AMDGPU::S_CSELECT_B32;
2273 if (!
MRI->getRegClassOrNull(CCReg))
2276 .
add(
I.getOperand(2))
2277 .
add(
I.getOperand(3));
2282 I.eraseFromParent();
2291 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
2293 .
add(
I.getOperand(3))
2295 .
add(
I.getOperand(2))
2296 .
add(
I.getOperand(1));
2299 I.eraseFromParent();
2303bool AMDGPUInstructionSelector::selectG_TRUNC(
MachineInstr &
I)
const {
2304 Register DstReg =
I.getOperand(0).getReg();
2305 Register SrcReg =
I.getOperand(1).getReg();
2306 const LLT DstTy =
MRI->getType(DstReg);
2307 const LLT SrcTy =
MRI->getType(SrcReg);
2322 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2331 if (!SrcRC || !DstRC)
2340 if (DstRC == &AMDGPU::VGPR_16RegClass && SrcSize == 32) {
2345 .
addReg(SrcReg, 0, AMDGPU::lo16);
2346 I.eraseFromParent();
2354 Register LoReg =
MRI->createVirtualRegister(DstRC);
2355 Register HiReg =
MRI->createVirtualRegister(DstRC);
2357 .
addReg(SrcReg, 0, AMDGPU::sub0);
2359 .
addReg(SrcReg, 0, AMDGPU::sub1);
2361 if (IsVALU && STI.
hasSDWA()) {
2365 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg)
2375 Register TmpReg0 =
MRI->createVirtualRegister(DstRC);
2376 Register TmpReg1 =
MRI->createVirtualRegister(DstRC);
2377 Register ImmReg =
MRI->createVirtualRegister(DstRC);
2379 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), TmpReg0)
2389 unsigned MovOpc = IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
2390 unsigned AndOpc = IsVALU ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
2391 unsigned OrOpc = IsVALU ? AMDGPU::V_OR_B32_e64 : AMDGPU::S_OR_B32;
2403 And.setOperandDead(3);
2404 Or.setOperandDead(3);
2408 I.eraseFromParent();
2416 unsigned SubRegIdx =
2418 if (SubRegIdx == AMDGPU::NoSubRegister)
2424 = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
2428 if (SrcWithSubRC != SrcRC) {
2433 I.getOperand(1).setSubReg(SubRegIdx);
2436 I.setDesc(TII.get(TargetOpcode::COPY));
2442 Mask = maskTrailingOnes<unsigned>(
Size);
2443 int SignedMask =
static_cast<int>(Mask);
2444 return SignedMask >= -16 && SignedMask <= 64;
2448const RegisterBank *AMDGPUInstructionSelector::getArtifactRegBank(
2452 if (
auto *RB = dyn_cast<const RegisterBank *>(RegClassOrBank))
2456 if (
auto *RC = dyn_cast<const TargetRegisterClass *>(RegClassOrBank))
2461bool AMDGPUInstructionSelector::selectG_SZA_EXT(
MachineInstr &
I)
const {
2462 bool InReg =
I.getOpcode() == AMDGPU::G_SEXT_INREG;
2463 bool Signed =
I.getOpcode() == AMDGPU::G_SEXT || InReg;
2466 const Register DstReg =
I.getOperand(0).getReg();
2467 const Register SrcReg =
I.getOperand(1).getReg();
2469 const LLT DstTy =
MRI->getType(DstReg);
2470 const LLT SrcTy =
MRI->getType(SrcReg);
2471 const unsigned SrcSize =
I.getOpcode() == AMDGPU::G_SEXT_INREG ?
2478 const RegisterBank *SrcBank = getArtifactRegBank(SrcReg, *MRI, TRI);
2481 if (
I.getOpcode() == AMDGPU::G_ANYEXT) {
2483 return selectCOPY(
I);
2486 TRI.getRegClassForTypeOnBank(SrcTy, *SrcBank);
2489 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
2491 Register UndefReg =
MRI->createVirtualRegister(SrcRC);
2492 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2498 I.eraseFromParent();
2504 if (SrcBank->
getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
2514 I.eraseFromParent();
2518 const unsigned BFE =
Signed ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2524 I.eraseFromParent();
2528 if (SrcBank->
getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
2530 AMDGPU::SReg_64RegClass : AMDGPU::SReg_32RegClass;
2534 if (
Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
2535 const unsigned SextOpc = SrcSize == 8 ?
2536 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
2539 I.eraseFromParent();
2545 if (DstSize > 32 && SrcSize == 32) {
2546 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2547 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2562 I.eraseFromParent();
2567 const unsigned BFE64 =
Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
2568 const unsigned BFE32 =
Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2571 if (DstSize > 32 && (SrcSize <= 32 || InReg)) {
2573 Register ExtReg =
MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
2574 Register UndefReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2575 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2577 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2588 I.eraseFromParent();
2604 I.eraseFromParent();
2639 if (Shuffle->
getOpcode() != AMDGPU::G_SHUFFLE_VECTOR)
2646 assert(Mask.size() == 2);
2648 if (Mask[0] == 1 && Mask[1] <= 1) {
2656bool AMDGPUInstructionSelector::selectG_FPEXT(
MachineInstr &
I)
const {
2660 Register Dst =
I.getOperand(0).getReg();
2662 if (DstRB->
getID() != AMDGPU::SGPRRegBankID)
2665 Register Src =
I.getOperand(1).getReg();
2671 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_CVT_HI_F32_F16), Dst)
2673 I.eraseFromParent();
2681bool AMDGPUInstructionSelector::selectG_FNEG(
MachineInstr &
MI)
const {
2695 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2710 Register LoReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2711 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2712 Register ConstReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2713 Register OpReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2715 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2716 .
addReg(Src, 0, AMDGPU::sub0);
2717 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2718 .
addReg(Src, 0, AMDGPU::sub1);
2719 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2723 unsigned Opc = Fabs ? AMDGPU::S_OR_B32 : AMDGPU::S_XOR_B32;
2728 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2733 MI.eraseFromParent();
2738bool AMDGPUInstructionSelector::selectG_FABS(
MachineInstr &
MI)
const {
2741 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2748 Register LoReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2749 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2750 Register ConstReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2751 Register OpReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2757 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2758 .
addReg(Src, 0, AMDGPU::sub0);
2759 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2760 .
addReg(Src, 0, AMDGPU::sub1);
2761 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2766 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_AND_B32), OpReg)
2770 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2776 MI.eraseFromParent();
2781 return MI.getOpcode() == TargetOpcode::G_CONSTANT;
2784void AMDGPUInstructionSelector::getAddrModeInfo(
const MachineInstr &Load,
2787 unsigned OpNo =
Load.getOpcode() == AMDGPU::G_PREFETCH ? 0 : 1;
2789 MRI.getUniqueVRegDef(
Load.getOperand(OpNo).getReg());
2793 if (PtrMI->
getOpcode() != TargetOpcode::G_PTR_ADD)
2798 for (
unsigned i = 1; i != 3; ++i) {
2805 assert(GEPInfo.Imm == 0);
2810 if (OpBank->
getID() == AMDGPU::SGPRRegBankID)
2811 GEPInfo.SgprParts.push_back(GEPOp.
getReg());
2813 GEPInfo.VgprParts.push_back(GEPOp.
getReg());
2817 getAddrModeInfo(*PtrMI, MRI, AddrInfo);
2820bool AMDGPUInstructionSelector::isSGPR(
Register Reg)
const {
2821 return RBI.
getRegBank(Reg, *MRI, TRI)->
getID() == AMDGPU::SGPRRegBankID;
2824bool AMDGPUInstructionSelector::isInstrUniform(
const MachineInstr &
MI)
const {
2825 if (!
MI.hasOneMemOperand())
2835 if (!
Ptr || isa<UndefValue>(
Ptr) || isa<Argument>(
Ptr) ||
2836 isa<Constant>(
Ptr) || isa<GlobalValue>(
Ptr))
2842 if (
MI.getOpcode() == AMDGPU::G_PREFETCH)
2844 AMDGPU::SGPRRegBankID;
2847 return I &&
I->getMetadata(
"amdgpu.uniform");
2851 for (
const GEPInfo &GEPInfo : AddrInfo) {
2852 if (!GEPInfo.VgprParts.empty())
2858void AMDGPUInstructionSelector::initM0(
MachineInstr &
I)
const {
2859 const LLT PtrTy =
MRI->getType(
I.getOperand(1).getReg());
2866 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2871bool AMDGPUInstructionSelector::selectG_LOAD_STORE_ATOMICRMW(
2878 if (Reg.isPhysical())
2882 const unsigned Opcode =
MI.getOpcode();
2884 if (Opcode == AMDGPU::COPY)
2887 if (Opcode == AMDGPU::G_AND || Opcode == AMDGPU::G_OR ||
2888 Opcode == AMDGPU::G_XOR)
2892 if (
auto *GI = dyn_cast<GIntrinsic>(&
MI))
2893 return GI->is(Intrinsic::amdgcn_class);
2895 return Opcode == AMDGPU::G_ICMP || Opcode == AMDGPU::G_FCMP;
2898bool AMDGPUInstructionSelector::selectG_BRCOND(
MachineInstr &
I)
const {
2913 if (!isVCC(CondReg, *MRI)) {
2917 CondPhysReg = AMDGPU::SCC;
2918 BrOpcode = AMDGPU::S_CBRANCH_SCC1;
2919 ConstrainRC = &AMDGPU::SReg_32RegClass;
2927 const unsigned Opcode = Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
2928 const Register Exec = Is64 ? AMDGPU::EXEC : AMDGPU::EXEC_LO;
2931 BuildMI(*BB, &
I,
DL, TII.get(Opcode), TmpReg)
2938 CondPhysReg =
TRI.getVCC();
2939 BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
2940 ConstrainRC =
TRI.getBoolRC();
2943 if (!
MRI->getRegClassOrNull(CondReg))
2944 MRI->setRegClass(CondReg, ConstrainRC);
2946 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CondPhysReg)
2949 .
addMBB(
I.getOperand(1).getMBB());
2951 I.eraseFromParent();
2955bool AMDGPUInstructionSelector::selectG_GLOBAL_VALUE(
2957 Register DstReg =
I.getOperand(0).getReg();
2959 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2960 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32));
2965 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, *MRI);
2968bool AMDGPUInstructionSelector::selectG_PTRMASK(
MachineInstr &
I)
const {
2969 Register DstReg =
I.getOperand(0).getReg();
2970 Register SrcReg =
I.getOperand(1).getReg();
2971 Register MaskReg =
I.getOperand(2).getReg();
2972 LLT Ty =
MRI->getType(DstReg);
2973 LLT MaskTy =
MRI->getType(MaskReg);
2980 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2990 const bool CanCopyLow32 = (MaskOnes & MaskLo32) == MaskLo32;
2991 const bool CanCopyHi32 = (MaskOnes & MaskHi32) == MaskHi32;
2994 !CanCopyLow32 && !CanCopyHi32) {
2995 auto MIB =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_AND_B64), DstReg)
2999 I.eraseFromParent();
3003 unsigned NewOpc = IsVGPR ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
3005 = IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
3010 TRI.getRegClassForTypeOnBank(MaskTy, *MaskRB);
3019 "ptrmask should have been narrowed during legalize");
3021 auto NewOp =
BuildMI(*BB, &
I,
DL, TII.get(NewOpc), DstReg)
3027 I.eraseFromParent();
3031 Register HiReg =
MRI->createVirtualRegister(&RegRC);
3032 Register LoReg =
MRI->createVirtualRegister(&RegRC);
3035 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), LoReg)
3036 .
addReg(SrcReg, 0, AMDGPU::sub0);
3037 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), HiReg)
3038 .
addReg(SrcReg, 0, AMDGPU::sub1);
3047 Register MaskLo =
MRI->createVirtualRegister(&RegRC);
3048 MaskedLo =
MRI->createVirtualRegister(&RegRC);
3050 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskLo)
3051 .
addReg(MaskReg, 0, AMDGPU::sub0);
3052 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedLo)
3061 Register MaskHi =
MRI->createVirtualRegister(&RegRC);
3062 MaskedHi =
MRI->createVirtualRegister(&RegRC);
3064 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskHi)
3065 .
addReg(MaskReg, 0, AMDGPU::sub1);
3066 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedHi)
3071 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
3076 I.eraseFromParent();
3082static std::pair<Register, unsigned>
3089 std::tie(IdxBaseReg,
Offset) =
3091 if (IdxBaseReg == AMDGPU::NoRegister) {
3095 IdxBaseReg = IdxReg;
3102 if (
static_cast<unsigned>(
Offset) >= SubRegs.
size())
3103 return std::pair(IdxReg, SubRegs[0]);
3104 return std::pair(IdxBaseReg, SubRegs[
Offset]);
3107bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT(
3113 LLT DstTy =
MRI->getType(DstReg);
3114 LLT SrcTy =
MRI->getType(SrcReg);
3122 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3126 TRI.getRegClassForTypeOnBank(SrcTy, *SrcRB);
3128 TRI.getRegClassForTypeOnBank(DstTy, *DstRB);
3129 if (!SrcRC || !DstRC)
3144 if (SrcRB->
getID() == AMDGPU::SGPRRegBankID) {
3148 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3151 unsigned Opc = Is64 ? AMDGPU::S_MOVRELS_B64 : AMDGPU::S_MOVRELS_B32;
3155 MI.eraseFromParent();
3163 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3165 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::V_MOVRELS_B32_e32), DstReg)
3168 MI.eraseFromParent();
3179 MI.eraseFromParent();
3184bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT(
3191 LLT VecTy =
MRI->getType(DstReg);
3192 LLT ValTy =
MRI->getType(ValReg);
3204 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3208 TRI.getRegClassForTypeOnBank(VecTy, *VecRB);
3210 TRI.getRegClassForTypeOnBank(ValTy, *ValRB);
3218 if (VecRB->
getID() == AMDGPU::VGPRRegBankID && ValSize != 32)
3222 std::tie(IdxReg,
SubReg) =
3225 const bool IndexMode = VecRB->
getID() == AMDGPU::VGPRRegBankID &&
3232 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3236 VecSize, ValSize, VecRB->
getID() == AMDGPU::SGPRRegBankID);
3241 MI.eraseFromParent();
3253 MI.eraseFromParent();
3257bool AMDGPUInstructionSelector::selectBufferLoadLds(
MachineInstr &
MI)
const {
3260 unsigned Size =
MI.getOperand(3).getImm();
3263 const bool HasVIndex =
MI.getNumOperands() == 9;
3267 VIndex =
MI.getOperand(4).getReg();
3271 Register VOffset =
MI.getOperand(4 + OpOffset).getReg();
3272 std::optional<ValueAndVReg> MaybeVOffset =
3274 const bool HasVOffset = !MaybeVOffset || MaybeVOffset->Value.getZExtValue();
3280 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_BOTHEN
3281 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_IDXEN
3282 : HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFEN
3283 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFSET;
3286 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_BOTHEN
3287 : AMDGPU::BUFFER_LOAD_USHORT_LDS_IDXEN
3288 : HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFEN
3289 : AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFSET;
3292 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_BOTHEN
3293 : AMDGPU::BUFFER_LOAD_DWORD_LDS_IDXEN
3294 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN
3295 : AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET;
3301 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_BOTHEN
3302 : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_IDXEN
3303 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFEN
3304 : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFSET;
3310 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_BOTHEN
3311 : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_IDXEN
3312 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFEN
3313 : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFSET;
3320 .
add(
MI.getOperand(2));
3324 if (HasVIndex && HasVOffset) {
3325 Register IdxReg =
MRI->createVirtualRegister(
TRI.getVGPR64Class());
3326 BuildMI(*
MBB, &*MIB,
DL, TII.get(AMDGPU::REG_SEQUENCE), IdxReg)
3333 }
else if (HasVIndex) {
3335 }
else if (HasVOffset) {
3339 MIB.
add(
MI.getOperand(1));
3340 MIB.
add(
MI.getOperand(5 + OpOffset));
3341 MIB.
add(
MI.getOperand(6 + OpOffset));
3343 unsigned Aux =
MI.getOperand(7 + OpOffset).getImm();
3353 LoadPtrI.
Offset =
MI.getOperand(6 + OpOffset).getImm();
3355 StorePtrI.
V =
nullptr;
3369 MI.eraseFromParent();
3381 if (Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3384 assert(Def->getNumOperands() == 3 &&
3387 return Def->getOperand(1).getReg();
3393bool AMDGPUInstructionSelector::selectGlobalLoadLds(
MachineInstr &
MI)
const{
3395 unsigned Size =
MI.getOperand(3).getImm();
3401 Opc = AMDGPU::GLOBAL_LOAD_LDS_UBYTE;
3404 Opc = AMDGPU::GLOBAL_LOAD_LDS_USHORT;
3407 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORD;
3412 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX3;
3417 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX4;
3424 .
add(
MI.getOperand(2));
3430 if (!isSGPR(
Addr)) {
3432 if (isSGPR(AddrDef->Reg)) {
3433 Addr = AddrDef->Reg;
3434 }
else if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
3437 if (isSGPR(SAddr)) {
3438 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
3450 VOffset =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3462 MIB.
add(
MI.getOperand(4))
3463 .
add(
MI.getOperand(5));
3467 LoadPtrI.
Offset =
MI.getOperand(4).getImm();
3477 sizeof(int32_t),
Align(4));
3481 MI.eraseFromParent();
3485bool AMDGPUInstructionSelector::selectBVHIntrinsic(
MachineInstr &
MI)
const{
3486 MI.setDesc(TII.get(
MI.getOperand(1).getImm()));
3487 MI.removeOperand(1);
3488 MI.addImplicitDefUseOperands(*
MI.getParent()->getParent());
3494bool AMDGPUInstructionSelector::selectSMFMACIntrin(
MachineInstr &
MI)
const {
3497 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
3498 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_F16_e64;
3500 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
3501 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_F16_e64;
3503 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
3504 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_BF16_e64;
3506 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
3507 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_BF16_e64;
3509 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
3510 Opc = AMDGPU::V_SMFMAC_I32_16X16X64_I8_e64;
3512 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
3513 Opc = AMDGPU::V_SMFMAC_I32_32X32X32_I8_e64;
3515 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
3516 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_BF8_e64;
3518 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
3519 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_FP8_e64;
3521 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
3522 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_BF8_e64;
3524 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
3525 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_FP8_e64;
3527 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
3528 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_BF8_e64;
3530 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
3531 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_FP8_e64;
3533 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
3534 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_BF8_e64;
3536 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
3537 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_FP8_e64;
3539 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:
3540 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_F16_e64;
3542 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:
3543 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_F16_e64;
3545 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:
3546 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF16_e64;
3548 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:
3549 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF16_e64;
3551 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:
3552 Opc = AMDGPU::V_SMFMAC_I32_16X16X128_I8_e64;
3554 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:
3555 Opc = AMDGPU::V_SMFMAC_I32_32X32X64_I8_e64;
3557 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:
3558 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_BF8_BF8_e64;
3560 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:
3561 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_BF8_FP8_e64;
3563 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:
3564 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_FP8_BF8_e64;
3566 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:
3567 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_FP8_FP8_e64;
3569 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:
3570 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_BF8_BF8_e64;
3572 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
3573 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_BF8_FP8_e64;
3575 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
3576 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_FP8_BF8_e64;
3578 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8:
3579 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_FP8_FP8_e64;
3585 auto VDst_In =
MI.getOperand(4);
3587 MI.setDesc(TII.get(Opc));
3588 MI.removeOperand(4);
3589 MI.removeOperand(1);
3590 MI.addOperand(VDst_In);
3591 MI.addImplicitDefUseOperands(*
MI.getParent()->getParent());
3595bool AMDGPUInstructionSelector::selectPermlaneSwapIntrin(
3597 if (IntrID == Intrinsic::amdgcn_permlane16_swap &&
3600 if (IntrID == Intrinsic::amdgcn_permlane32_swap &&
3604 unsigned Opcode = IntrID == Intrinsic::amdgcn_permlane16_swap
3605 ? AMDGPU::V_PERMLANE16_SWAP_B32_e64
3606 : AMDGPU::V_PERMLANE32_SWAP_B32_e64;
3608 MI.removeOperand(2);
3609 MI.setDesc(TII.get(Opcode));
3618bool AMDGPUInstructionSelector::selectWaveAddress(
MachineInstr &
MI)
const {
3622 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3627 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_LSHRREV_B32_e64), DstReg)
3638 IsVALU ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
3642 MI.eraseFromParent();
3651 unsigned NumOpcodes = 0;
3664 const uint8_t SrcBits[3] = { 0xf0, 0xcc, 0xaa };
3675 for (
unsigned I = 0;
I < Src.size(); ++
I) {
3689 if (Src.size() == 3) {
3696 for (
unsigned I = 0;
I < Src.size(); ++
I) {
3697 if (Src[
I] ==
LHS) {
3707 Bits = SrcBits[Src.size()];
3713 switch (
MI->getOpcode()) {
3714 case TargetOpcode::G_AND:
3715 case TargetOpcode::G_OR:
3716 case TargetOpcode::G_XOR: {
3721 if (!getOperandBits(
LHS, LHSBits) ||
3722 !getOperandBits(
RHS, RHSBits)) {
3724 return std::make_pair(0, 0);
3730 NumOpcodes +=
Op.first;
3731 LHSBits =
Op.second;
3736 NumOpcodes +=
Op.first;
3737 RHSBits =
Op.second;
3742 return std::make_pair(0, 0);
3746 switch (
MI->getOpcode()) {
3747 case TargetOpcode::G_AND:
3748 TTbl = LHSBits & RHSBits;
3750 case TargetOpcode::G_OR:
3751 TTbl = LHSBits | RHSBits;
3753 case TargetOpcode::G_XOR:
3754 TTbl = LHSBits ^ RHSBits;
3760 return std::make_pair(NumOpcodes + 1, TTbl);
3763bool AMDGPUInstructionSelector::selectBITOP3(
MachineInstr &
MI)
const {
3769 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3775 unsigned NumOpcodes;
3777 std::tie(NumOpcodes, TTbl) =
BitOp3_Op(DstReg, Src, *MRI);
3781 if (NumOpcodes < 2 || Src.empty())
3791 if (NumOpcodes == 2 && IsB32) {
3801 unsigned Opc = IsB32 ? AMDGPU::V_BITOP3_B32_e64 : AMDGPU::V_BITOP3_B16_e64;
3806 for (
unsigned I = 0;
I < Src.size(); ++
I) {
3808 if (RB->
getID() != AMDGPU::SGPRRegBankID)
3814 Register NewReg =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3825 while (Src.size() < 3)
3826 Src.push_back(Src[0]);
3843 MI.eraseFromParent();
3848bool AMDGPUInstructionSelector::selectStackRestore(
MachineInstr &
MI)
const {
3861 WaveAddr =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
3871 MI.eraseFromParent();
3877 if (!
I.isPreISelOpcode()) {
3879 return selectCOPY(
I);
3883 switch (
I.getOpcode()) {
3884 case TargetOpcode::G_AND:
3885 case TargetOpcode::G_OR:
3886 case TargetOpcode::G_XOR:
3887 if (selectBITOP3(
I))
3891 return selectG_AND_OR_XOR(
I);
3892 case TargetOpcode::G_ADD:
3893 case TargetOpcode::G_SUB:
3894 case TargetOpcode::G_PTR_ADD:
3897 return selectG_ADD_SUB(
I);
3898 case TargetOpcode::G_UADDO:
3899 case TargetOpcode::G_USUBO:
3900 case TargetOpcode::G_UADDE:
3901 case TargetOpcode::G_USUBE:
3902 return selectG_UADDO_USUBO_UADDE_USUBE(
I);
3903 case AMDGPU::G_AMDGPU_MAD_U64_U32:
3904 case AMDGPU::G_AMDGPU_MAD_I64_I32:
3905 return selectG_AMDGPU_MAD_64_32(
I);
3906 case TargetOpcode::G_INTTOPTR:
3907 case TargetOpcode::G_BITCAST:
3908 case TargetOpcode::G_PTRTOINT:
3909 case TargetOpcode::G_FREEZE:
3910 return selectCOPY(
I);
3911 case TargetOpcode::G_FNEG:
3914 return selectG_FNEG(
I);
3915 case TargetOpcode::G_FABS:
3918 return selectG_FABS(
I);
3919 case TargetOpcode::G_EXTRACT:
3920 return selectG_EXTRACT(
I);
3921 case TargetOpcode::G_MERGE_VALUES:
3922 case TargetOpcode::G_CONCAT_VECTORS:
3923 return selectG_MERGE_VALUES(
I);
3924 case TargetOpcode::G_UNMERGE_VALUES:
3925 return selectG_UNMERGE_VALUES(
I);
3926 case TargetOpcode::G_BUILD_VECTOR:
3927 case TargetOpcode::G_BUILD_VECTOR_TRUNC:
3928 return selectG_BUILD_VECTOR(
I);
3929 case TargetOpcode::G_IMPLICIT_DEF:
3930 return selectG_IMPLICIT_DEF(
I);
3931 case TargetOpcode::G_INSERT:
3932 return selectG_INSERT(
I);
3933 case TargetOpcode::G_INTRINSIC:
3934 case TargetOpcode::G_INTRINSIC_CONVERGENT:
3935 return selectG_INTRINSIC(
I);
3936 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3937 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
3938 return selectG_INTRINSIC_W_SIDE_EFFECTS(
I);
3939 case TargetOpcode::G_ICMP:
3940 case TargetOpcode::G_FCMP:
3941 if (selectG_ICMP_or_FCMP(
I))
3944 case TargetOpcode::G_LOAD:
3945 case TargetOpcode::G_ZEXTLOAD:
3946 case TargetOpcode::G_SEXTLOAD:
3947 case TargetOpcode::G_STORE:
3948 case TargetOpcode::G_ATOMIC_CMPXCHG:
3949 case TargetOpcode::G_ATOMICRMW_XCHG:
3950 case TargetOpcode::G_ATOMICRMW_ADD:
3951 case TargetOpcode::G_ATOMICRMW_SUB:
3952 case TargetOpcode::G_ATOMICRMW_AND:
3953 case TargetOpcode::G_ATOMICRMW_OR:
3954 case TargetOpcode::G_ATOMICRMW_XOR:
3955 case TargetOpcode::G_ATOMICRMW_MIN:
3956 case TargetOpcode::G_ATOMICRMW_MAX:
3957 case TargetOpcode::G_ATOMICRMW_UMIN:
3958 case TargetOpcode::G_ATOMICRMW_UMAX:
3959 case TargetOpcode::G_ATOMICRMW_UINC_WRAP:
3960 case TargetOpcode::G_ATOMICRMW_UDEC_WRAP:
3961 case TargetOpcode::G_ATOMICRMW_FADD:
3962 case TargetOpcode::G_ATOMICRMW_FMIN:
3963 case TargetOpcode::G_ATOMICRMW_FMAX:
3964 return selectG_LOAD_STORE_ATOMICRMW(
I);
3965 case TargetOpcode::G_SELECT:
3966 return selectG_SELECT(
I);
3967 case TargetOpcode::G_TRUNC:
3968 return selectG_TRUNC(
I);
3969 case TargetOpcode::G_SEXT:
3970 case TargetOpcode::G_ZEXT:
3971 case TargetOpcode::G_ANYEXT:
3972 case TargetOpcode::G_SEXT_INREG:
3979 return selectG_SZA_EXT(
I);
3980 case TargetOpcode::G_FPEXT:
3981 if (selectG_FPEXT(
I))
3984 case TargetOpcode::G_BRCOND:
3985 return selectG_BRCOND(
I);
3986 case TargetOpcode::G_GLOBAL_VALUE:
3987 return selectG_GLOBAL_VALUE(
I);
3988 case TargetOpcode::G_PTRMASK:
3989 return selectG_PTRMASK(
I);
3990 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3991 return selectG_EXTRACT_VECTOR_ELT(
I);
3992 case TargetOpcode::G_INSERT_VECTOR_ELT:
3993 return selectG_INSERT_VECTOR_ELT(
I);
3994 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
3995 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
3996 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET:
3997 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
3998 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
4001 assert(
Intr &&
"not an image intrinsic with image pseudo");
4002 return selectImageIntrinsic(
I,
Intr);
4004 case AMDGPU::G_AMDGPU_INTRIN_BVH_INTERSECT_RAY:
4005 return selectBVHIntrinsic(
I);
4006 case AMDGPU::G_SBFX:
4007 case AMDGPU::G_UBFX:
4008 return selectG_SBFX_UBFX(
I);
4009 case AMDGPU::G_SI_CALL:
4010 I.setDesc(TII.get(AMDGPU::SI_CALL));
4012 case AMDGPU::G_AMDGPU_WAVE_ADDRESS:
4013 return selectWaveAddress(
I);
4014 case AMDGPU::G_STACKRESTORE:
4015 return selectStackRestore(
I);
4017 return selectPHI(
I);
4018 case TargetOpcode::G_CONSTANT:
4019 case TargetOpcode::G_FCONSTANT:
4027AMDGPUInstructionSelector::selectVCSRC(
MachineOperand &Root)
const {
4034std::pair<Register, unsigned> AMDGPUInstructionSelector::selectVOP3ModsImpl(
4035 Register Src,
bool IsCanonicalizing,
bool AllowAbs,
bool OpSel)
const {
4039 if (
MI->getOpcode() == AMDGPU::G_FNEG) {
4040 Src =
MI->getOperand(1).getReg();
4043 }
else if (
MI->getOpcode() == AMDGPU::G_FSUB && IsCanonicalizing) {
4048 if (LHS &&
LHS->isZero()) {
4050 Src =
MI->getOperand(2).getReg();
4054 if (AllowAbs &&
MI->getOpcode() == AMDGPU::G_FABS) {
4055 Src =
MI->getOperand(1).getReg();
4062 return std::pair(Src, Mods);
4065Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
4067 bool ForceVGPR)
const {
4068 if ((Mods != 0 || ForceVGPR) &&
4076 TII.get(AMDGPU::COPY), VGPRSrc)
4088AMDGPUInstructionSelector::selectVSRC0(
MachineOperand &Root)
const {
4095AMDGPUInstructionSelector::selectVOP3Mods0(
MachineOperand &Root)
const {
4098 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
4102 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4111AMDGPUInstructionSelector::selectVOP3BMods0(
MachineOperand &Root)
const {
4114 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
4120 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4129AMDGPUInstructionSelector::selectVOP3OMods(
MachineOperand &Root)
const {
4138AMDGPUInstructionSelector::selectVOP3Mods(
MachineOperand &Root)
const {
4141 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
4145 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4152AMDGPUInstructionSelector::selectVOP3ModsNonCanonicalizing(
4156 std::tie(Src, Mods) =
4157 selectVOP3ModsImpl(Root.
getReg(),
false);
4161 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4168AMDGPUInstructionSelector::selectVOP3BMods(
MachineOperand &Root)
const {
4171 std::tie(Src, Mods) =
4172 selectVOP3ModsImpl(Root.
getReg(),
true,
4177 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4184AMDGPUInstructionSelector::selectVOP3NoMods(
MachineOperand &Root)
const {
4187 if (
Def->getOpcode() == AMDGPU::G_FNEG ||
Def->getOpcode() == AMDGPU::G_FABS)
4194std::pair<Register, unsigned>
4195AMDGPUInstructionSelector::selectVOP3PModsImpl(
4200 if (
MI->getOpcode() == AMDGPU::G_FNEG &&
4205 Src =
MI->getOperand(1).getReg();
4206 MI =
MRI.getVRegDef(Src);
4217 return std::pair(Src, Mods);
4221AMDGPUInstructionSelector::selectVOP3PMods(
MachineOperand &Root)
const {
4227 std::tie(Src, Mods) = selectVOP3PModsImpl(Root.
getReg(), MRI);
4236AMDGPUInstructionSelector::selectVOP3PModsDOT(
MachineOperand &Root)
const {
4242 std::tie(Src, Mods) = selectVOP3PModsImpl(Root.
getReg(), MRI,
true);
4251AMDGPUInstructionSelector::selectVOP3PModsNeg(
MachineOperand &Root)
const {
4256 "expected i1 value");
4266AMDGPUInstructionSelector::selectWMMAOpSelVOP3PMods(
4269 "expected i1 value");
4283 switch (Elts.
size()) {
4285 DstRegClass = &AMDGPU::VReg_256RegClass;
4288 DstRegClass = &AMDGPU::VReg_128RegClass;
4291 DstRegClass = &AMDGPU::VReg_64RegClass;
4298 auto MIB =
B.buildInstr(AMDGPU::REG_SEQUENCE)
4299 .addDef(
MRI.createVirtualRegister(DstRegClass));
4300 for (
unsigned i = 0; i < Elts.
size(); ++i) {
4311 if (ModOpcode == TargetOpcode::G_FNEG) {
4315 for (
auto El : Elts) {
4321 if (Elts.size() != NegAbsElts.
size()) {
4330 assert(ModOpcode == TargetOpcode::G_FABS);
4338AMDGPUInstructionSelector::selectWMMAModsF32NegAbs(
MachineOperand &Root)
const {
4343 if (
GBuildVector *BV = dyn_cast<GBuildVector>(
MRI->getVRegDef(Src))) {
4344 assert(BV->getNumSources() > 0);
4347 unsigned ModOpcode = (ElF32->
getOpcode() == AMDGPU::G_FNEG)
4350 for (
unsigned i = 0; i < BV->getNumSources(); ++i) {
4351 ElF32 =
MRI->getVRegDef(BV->getSourceReg(i));
4358 if (BV->getNumSources() == EltsF32.
size()) {
4369AMDGPUInstructionSelector::selectWMMAModsF16Neg(
MachineOperand &Root)
const {
4375 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
4383 if (CV->getNumSources() == EltsV2F16.
size()) {
4395AMDGPUInstructionSelector::selectWMMAModsF16NegAbs(
MachineOperand &Root)
const {
4401 assert(CV->getNumSources() > 0);
4404 unsigned ModOpcode = (ElV2F16->
getOpcode() == AMDGPU::G_FNEG)
4408 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
4409 ElV2F16 =
MRI->getVRegDef(CV->getSourceReg(i));
4416 if (CV->getNumSources() == EltsV2F16.
size()) {
4428AMDGPUInstructionSelector::selectWMMAVISrc(
MachineOperand &Root)
const {
4429 std::optional<FPValueAndVReg> FPValReg;
4433 MIB.
addImm(FPValReg->Value.bitcastToAPInt().getSExtValue());
4453AMDGPUInstructionSelector::selectSWMMACIndex8(
MachineOperand &Root)
const {
4459 std::optional<ValueAndVReg> ShiftAmt;
4461 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
4462 ShiftAmt->Value.getZExtValue() % 8 == 0) {
4463 Key = ShiftAmt->Value.getZExtValue() / 8;
4474AMDGPUInstructionSelector::selectSWMMACIndex16(
MachineOperand &Root)
const {
4481 std::optional<ValueAndVReg> ShiftAmt;
4483 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
4484 ShiftAmt->Value.getZExtValue() == 16) {
4496AMDGPUInstructionSelector::selectVOP3OpSelMods(
MachineOperand &Root)
const {
4499 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
4509AMDGPUInstructionSelector::selectVINTERPMods(
MachineOperand &Root)
const {
4512 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
4520 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
4527AMDGPUInstructionSelector::selectVINTERPModsHi(
MachineOperand &Root)
const {
4530 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
4538 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
4544bool AMDGPUInstructionSelector::selectSmrdOffset(
MachineOperand &Root,
4554 getAddrModeInfo(*
MI, *MRI, AddrInfo);
4556 if (AddrInfo.
empty())
4559 const GEPInfo &GEPI = AddrInfo[0];
4560 std::optional<int64_t> EncodedImm;
4565 if (GEPI.SgprParts.size() == 1 && GEPI.Imm != 0 && EncodedImm &&
4566 AddrInfo.
size() > 1) {
4567 const GEPInfo &GEPI2 = AddrInfo[1];
4568 if (GEPI2.SgprParts.size() == 2 && GEPI2.Imm == 0) {
4571 Base = GEPI2.SgprParts[0];
4572 *SOffset = OffsetReg;
4582 if (*
Offset + SKnown.getMinValue().getSExtValue() < 0)
4594 if (
Offset && GEPI.SgprParts.size() == 1 && EncodedImm) {
4595 Base = GEPI.SgprParts[0];
4601 if (SOffset && GEPI.SgprParts.size() == 1 && isUInt<32>(GEPI.Imm) &&
4607 Base = GEPI.SgprParts[0];
4608 *SOffset =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
4609 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), *SOffset)
4614 if (SOffset && GEPI.SgprParts.size() && GEPI.Imm == 0) {
4616 Base = GEPI.SgprParts[0];
4617 *SOffset = OffsetReg;
4626AMDGPUInstructionSelector::selectSmrdImm(
MachineOperand &Root)
const {
4629 if (!selectSmrdOffset(Root,
Base,
nullptr, &
Offset))
4630 return std::nullopt;
4637AMDGPUInstructionSelector::selectSmrdImm32(
MachineOperand &Root)
const {
4639 getAddrModeInfo(*Root.
getParent(), *MRI, AddrInfo);
4641 if (AddrInfo.
empty() || AddrInfo[0].SgprParts.size() != 1)
4642 return std::nullopt;
4644 const GEPInfo &GEPInfo = AddrInfo[0];
4645 Register PtrReg = GEPInfo.SgprParts[0];
4646 std::optional<int64_t> EncodedImm =
4649 return std::nullopt;
4658AMDGPUInstructionSelector::selectSmrdSgpr(
MachineOperand &Root)
const {
4660 if (!selectSmrdOffset(Root,
Base, &SOffset,
nullptr))
4661 return std::nullopt;
4668AMDGPUInstructionSelector::selectSmrdSgprImm(
MachineOperand &Root)
const {
4671 if (!selectSmrdOffset(Root,
Base, &SOffset, &
Offset))
4672 return std::nullopt;
4679std::pair<Register, int>
4680AMDGPUInstructionSelector::selectFlatOffsetImpl(
MachineOperand &Root,
4690 int64_t ConstOffset;
4691 std::tie(PtrBase, ConstOffset) =
4692 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
4695 !isFlatScratchBaseLegal(Root.
getReg())))
4698 unsigned AddrSpace = (*
MI->memoperands_begin())->getAddrSpace();
4702 return std::pair(PtrBase, ConstOffset);
4706AMDGPUInstructionSelector::selectFlatOffset(
MachineOperand &Root)
const {
4716AMDGPUInstructionSelector::selectGlobalOffset(
MachineOperand &Root)
const {
4726AMDGPUInstructionSelector::selectScratchOffset(
MachineOperand &Root)
const {
4737AMDGPUInstructionSelector::selectGlobalSAddr(
MachineOperand &Root)
const {
4740 int64_t ConstOffset;
4741 int64_t ImmOffset = 0;
4745 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(
Addr, *MRI);
4747 if (ConstOffset != 0) {
4751 ImmOffset = ConstOffset;
4754 if (isSGPR(PtrBaseDef->Reg)) {
4755 if (ConstOffset > 0) {
4761 int64_t SplitImmOffset, RemainderOffset;
4765 if (isUInt<32>(RemainderOffset)) {
4769 MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4771 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
4773 .
addImm(RemainderOffset);
4790 unsigned NumLiterals =
4794 return std::nullopt;
4801 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
4806 if (isSGPR(SAddr)) {
4807 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
4827 if (AddrDef->MI->getOpcode() == AMDGPU::G_IMPLICIT_DEF ||
4828 AddrDef->MI->getOpcode() == AMDGPU::G_CONSTANT || !isSGPR(AddrDef->Reg))
4829 return std::nullopt;
4835 Register VOffset =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4837 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32), VOffset)
4848AMDGPUInstructionSelector::selectScratchSAddr(
MachineOperand &Root)
const {
4851 int64_t ConstOffset;
4852 int64_t ImmOffset = 0;
4856 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(
Addr, *MRI);
4858 if (ConstOffset != 0 && isFlatScratchBaseLegal(
Addr) &&
4862 ImmOffset = ConstOffset;
4866 if (AddrDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
4867 int FI = AddrDef->MI->getOperand(1).
getIndex();
4876 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
4877 Register LHS = AddrDef->MI->getOperand(1).getReg();
4878 Register RHS = AddrDef->MI->getOperand(2).getReg();
4882 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX &&
4883 isSGPR(RHSDef->Reg)) {
4884 int FI = LHSDef->MI->getOperand(1).getIndex();
4888 SAddr =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
4890 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_I32), SAddr)
4898 return std::nullopt;
4907bool AMDGPUInstructionSelector::checkFlatScratchSVSSwizzleBug(
4918 uint64_t VMax = VKnown.getMaxValue().getZExtValue();
4920 return (VMax & 3) + (
SMax & 3) >= 4;
4924AMDGPUInstructionSelector::selectScratchSVAddr(
MachineOperand &Root)
const {
4927 int64_t ConstOffset;
4928 int64_t ImmOffset = 0;
4932 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(
Addr, *MRI);
4935 if (ConstOffset != 0 &&
4938 ImmOffset = ConstOffset;
4942 if (AddrDef->MI->getOpcode() != AMDGPU::G_PTR_ADD)
4943 return std::nullopt;
4945 Register RHS = AddrDef->MI->getOperand(2).getReg();
4947 return std::nullopt;
4949 Register LHS = AddrDef->MI->getOperand(1).getReg();
4952 if (OrigAddr !=
Addr) {
4953 if (!isFlatScratchBaseLegalSVImm(OrigAddr))
4954 return std::nullopt;
4956 if (!isFlatScratchBaseLegalSV(OrigAddr))
4957 return std::nullopt;
4960 if (checkFlatScratchSVSSwizzleBug(RHS, LHS, ImmOffset))
4961 return std::nullopt;
4963 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
4964 int FI = LHSDef->MI->getOperand(1).getIndex();
4973 return std::nullopt;
4983AMDGPUInstructionSelector::selectMUBUFScratchOffen(
MachineOperand &Root)
const {
4992 Register HighBits =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4997 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
5021 std::optional<int> FI;
5026 int64_t ConstOffset;
5027 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(VAddr, *MRI);
5028 if (ConstOffset != 0) {
5033 if (PtrBaseDef->
getOpcode() == AMDGPU::G_FRAME_INDEX)
5039 }
else if (RootDef->
getOpcode() == AMDGPU::G_FRAME_INDEX) {
5062bool AMDGPUInstructionSelector::isDSOffsetLegal(
Register Base,
5075bool AMDGPUInstructionSelector::isDSOffset2Legal(
Register Base, int64_t Offset0,
5077 unsigned Size)
const {
5078 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
5080 if (!isUInt<8>(Offset0 /
Size) || !isUInt<8>(Offset1 /
Size))
5093 return Addr->getOpcode() == TargetOpcode::G_OR ||
5094 (
Addr->getOpcode() == TargetOpcode::G_PTR_ADD &&
5101bool AMDGPUInstructionSelector::isFlatScratchBaseLegal(
Register Addr)
const {
5115 if (AddrMI->
getOpcode() == TargetOpcode::G_PTR_ADD) {
5116 std::optional<ValueAndVReg> RhsValReg =
5122 if (RhsValReg && RhsValReg->Value.getSExtValue() < 0 &&
5123 RhsValReg->Value.getSExtValue() > -0x40000000)
5132bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSV(
Register Addr)
const {
5150bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSVImm(
5159 std::optional<DefinitionAndSourceRegister> BaseDef =
5161 std::optional<ValueAndVReg> RHSOffset =
5171 (RHSOffset->Value.getSExtValue() < 0 &&
5172 RHSOffset->Value.getSExtValue() > -0x40000000)))
5175 Register LHS = BaseDef->MI->getOperand(1).getReg();
5176 Register RHS = BaseDef->MI->getOperand(2).getReg();
5180bool AMDGPUInstructionSelector::isUnneededShiftMask(
const MachineInstr &
MI,
5181 unsigned ShAmtBits)
const {
5182 assert(
MI.getOpcode() == TargetOpcode::G_AND);
5184 std::optional<APInt>
RHS =
5189 if (
RHS->countr_one() >= ShAmtBits)
5193 return (LHSKnownZeros | *RHS).
countr_one() >= ShAmtBits;
5197AMDGPUInstructionSelector::selectMUBUFScratchOffset(
5202 std::optional<DefinitionAndSourceRegister>
Def =
5204 assert(Def &&
"this shouldn't be an optional result");
5259std::pair<Register, unsigned>
5260AMDGPUInstructionSelector::selectDS1Addr1OffsetImpl(
MachineOperand &Root)
const {
5262 int64_t ConstAddr = 0;
5266 std::tie(PtrBase,
Offset) =
5267 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
5270 if (isDSOffsetLegal(PtrBase,
Offset)) {
5272 return std::pair(PtrBase,
Offset);
5274 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
5283 return std::pair(Root.
getReg(), 0);
5287AMDGPUInstructionSelector::selectDS1Addr1Offset(
MachineOperand &Root)
const {
5290 std::tie(Reg,
Offset) = selectDS1Addr1OffsetImpl(Root);
5298AMDGPUInstructionSelector::selectDS64Bit4ByteAligned(
MachineOperand &Root)
const {
5299 return selectDSReadWrite2(Root, 4);
5303AMDGPUInstructionSelector::selectDS128Bit8ByteAligned(
MachineOperand &Root)
const {
5304 return selectDSReadWrite2(Root, 8);
5308AMDGPUInstructionSelector::selectDSReadWrite2(
MachineOperand &Root,
5309 unsigned Size)
const {
5312 std::tie(Reg,
Offset) = selectDSReadWrite2Impl(Root,
Size);
5320std::pair<Register, unsigned>
5321AMDGPUInstructionSelector::selectDSReadWrite2Impl(
MachineOperand &Root,
5322 unsigned Size)
const {
5324 int64_t ConstAddr = 0;
5328 std::tie(PtrBase,
Offset) =
5329 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
5332 int64_t OffsetValue0 =
Offset;
5334 if (isDSOffset2Legal(PtrBase, OffsetValue0, OffsetValue1,
Size)) {
5336 return std::pair(PtrBase, OffsetValue0 /
Size);
5338 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
5346 return std::pair(Root.
getReg(), 0);
5353std::pair<Register, int64_t>
5354AMDGPUInstructionSelector::getPtrBaseWithConstantOffset(
5357 if (RootI->
getOpcode() != TargetOpcode::G_PTR_ADD)
5361 std::optional<ValueAndVReg> MaybeOffset =
5377 Register RSrc2 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5378 Register RSrc3 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5379 Register RSrcHi =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
5380 Register RSrc =
MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
5382 B.buildInstr(AMDGPU::S_MOV_B32)
5385 B.buildInstr(AMDGPU::S_MOV_B32)
5392 B.buildInstr(AMDGPU::REG_SEQUENCE)
5395 .addImm(AMDGPU::sub0)
5397 .addImm(AMDGPU::sub1);
5401 RSrcLo =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
5402 B.buildInstr(AMDGPU::S_MOV_B64)
5407 B.buildInstr(AMDGPU::REG_SEQUENCE)
5410 .addImm(AMDGPU::sub0_sub1)
5412 .addImm(AMDGPU::sub2_sub3);
5419 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
5428 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
5435AMDGPUInstructionSelector::MUBUFAddressData
5436AMDGPUInstructionSelector::parseMUBUFAddress(
Register Src)
const {
5437 MUBUFAddressData
Data;
5443 std::tie(PtrBase,
Offset) = getPtrBaseWithConstantOffset(Src, *MRI);
5444 if (isUInt<32>(
Offset)) {
5451 Data.N2 = InputAdd->getOperand(1).getReg();
5452 Data.N3 = InputAdd->getOperand(2).getReg();
5467bool AMDGPUInstructionSelector::shouldUseAddr64(MUBUFAddressData
Addr)
const {
5474 return N0Bank->
getID() == AMDGPU::VGPRRegBankID;
5480void AMDGPUInstructionSelector::splitIllegalMUBUFOffset(
5486 SOffset =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5487 B.buildInstr(AMDGPU::S_MOV_B32)
5493bool AMDGPUInstructionSelector::selectMUBUFAddr64Impl(
5501 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
5502 if (!shouldUseAddr64(AddrData))
5508 Offset = AddrData.Offset;
5514 if (RBI.
getRegBank(N2, *MRI, TRI)->
getID() == AMDGPU::VGPRRegBankID) {
5516 if (RBI.
getRegBank(N3, *MRI, TRI)->
getID() == AMDGPU::VGPRRegBankID) {
5529 }
else if (RBI.
getRegBank(N0, *MRI, TRI)->
getID() == AMDGPU::VGPRRegBankID) {
5540 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
5544bool AMDGPUInstructionSelector::selectMUBUFOffsetImpl(
5552 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
5553 if (shouldUseAddr64(AddrData))
5559 Offset = AddrData.Offset;
5565 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
5570AMDGPUInstructionSelector::selectMUBUFAddr64(
MachineOperand &Root)
const {
5576 if (!selectMUBUFAddr64Impl(Root, VAddr, RSrcReg, SOffset,
Offset))
5592 MIB.
addReg(AMDGPU::SGPR_NULL);
5606AMDGPUInstructionSelector::selectMUBUFOffset(
MachineOperand &Root)
const {
5611 if (!selectMUBUFOffsetImpl(Root, RSrcReg, SOffset,
Offset))
5622 MIB.
addReg(AMDGPU::SGPR_NULL);
5634AMDGPUInstructionSelector::selectBUFSOffset(
MachineOperand &Root)
const {
5639 SOffset = AMDGPU::SGPR_NULL;
5645static std::optional<uint64_t>
5649 if (!OffsetVal || !isInt<32>(*OffsetVal))
5650 return std::nullopt;
5651 return Lo_32(*OffsetVal);
5655AMDGPUInstructionSelector::selectSMRDBufferImm(
MachineOperand &Root)
const {
5656 std::optional<uint64_t> OffsetVal =
5661 std::optional<int64_t> EncodedImm =
5670AMDGPUInstructionSelector::selectSMRDBufferImm32(
MachineOperand &Root)
const {
5677 std::optional<int64_t> EncodedImm =
5686AMDGPUInstructionSelector::selectSMRDBufferSgprImm(
MachineOperand &Root)
const {
5694 return std::nullopt;
5696 std::optional<int64_t> EncodedOffset =
5699 return std::nullopt;
5706std::pair<Register, unsigned>
5707AMDGPUInstructionSelector::selectVOP3PMadMixModsImpl(
MachineOperand &Root,
5708 bool &Matched)
const {
5713 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
5723 const auto CheckAbsNeg = [&]() {
5728 std::tie(Src, ModsTmp) = selectVOP3ModsImpl(Src);
5759AMDGPUInstructionSelector::selectVOP3PMadMixModsExt(
5764 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
5775AMDGPUInstructionSelector::selectVOP3PMadMixMods(
MachineOperand &Root)
const {
5779 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
5787bool AMDGPUInstructionSelector::selectSBarrierSignalIsfirst(
5791 Register CCReg =
I.getOperand(0).getReg();
5793 BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM))
5794 .
addImm(
I.getOperand(2).getImm());
5798 I.eraseFromParent();
5803bool AMDGPUInstructionSelector::selectSGetBarrierState(
5808 std::optional<int64_t> BarValImm =
5812 auto CopyMIB =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
5817 unsigned Opc = BarValImm ? AMDGPU::S_GET_BARRIER_STATE_IMM
5818 : AMDGPU::S_GET_BARRIER_STATE_M0;
5821 auto DstReg =
I.getOperand(0).getReg();
5823 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
5830 I.eraseFromParent();
5835 if (HasInlineConst) {
5839 case Intrinsic::amdgcn_s_barrier_join:
5840 return AMDGPU::S_BARRIER_JOIN_IMM;
5841 case Intrinsic::amdgcn_s_wakeup_barrier:
5842 return AMDGPU::S_WAKEUP_BARRIER_IMM;
5843 case Intrinsic::amdgcn_s_get_named_barrier_state:
5844 return AMDGPU::S_GET_BARRIER_STATE_IMM;
5850 case Intrinsic::amdgcn_s_barrier_join:
5851 return AMDGPU::S_BARRIER_JOIN_M0;
5852 case Intrinsic::amdgcn_s_wakeup_barrier:
5853 return AMDGPU::S_WAKEUP_BARRIER_M0;
5854 case Intrinsic::amdgcn_s_get_named_barrier_state:
5855 return AMDGPU::S_GET_BARRIER_STATE_M0;
5860bool AMDGPUInstructionSelector::selectNamedBarrierInit(
5868 Register TmpReg0 =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5874 Register TmpReg1 =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5881 Register TmpReg2 =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5887 Register TmpReg3 =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5888 constexpr unsigned ShAmt = 16;
5894 Register TmpReg4 =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5904 unsigned Opc = IntrID == Intrinsic::amdgcn_s_barrier_init
5905 ? AMDGPU::S_BARRIER_INIT_M0
5906 : AMDGPU::S_BARRIER_SIGNAL_M0;
5910 I.eraseFromParent();
5914bool AMDGPUInstructionSelector::selectNamedBarrierInst(
5918 MachineOperand BarOp = IntrID == Intrinsic::amdgcn_s_get_named_barrier_state
5921 std::optional<int64_t> BarValImm =
5926 Register TmpReg0 =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5932 Register TmpReg1 =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5938 auto CopyMIB =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
5947 if (IntrID == Intrinsic::amdgcn_s_get_named_barrier_state) {
5948 auto DstReg =
I.getOperand(0).getReg();
5950 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
5957 auto BarId = ((*BarValImm) >> 4) & 0x3F;
5961 I.eraseFromParent();
5968 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5969 "Expected G_CONSTANT");
5970 MIB.
addImm(
MI.getOperand(1).getCImm()->getSExtValue());
5976 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5977 "Expected G_CONSTANT");
5978 MIB.
addImm(-
MI.getOperand(1).getCImm()->getSExtValue());
5985 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1);
5986 MIB.
addImm(
Op.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
5992 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5993 "Expected G_CONSTANT");
5994 MIB.
addImm(
MI.getOperand(1).getCImm()->getValue().popcount());
6013 MIB.
addImm(
MI.getOperand(OpIdx).getImm() != 0);
6019 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
6023void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_0_0(
6025 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
6030void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_0_1(
6032 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
6033 MIB.
addImm((
MI.getOperand(OpIdx).getImm() & 0x2)
6038void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_1_0(
6040 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
6045void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_1_1(
6047 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
6048 MIB.
addImm((
MI.getOperand(OpIdx).getImm() & 0x1)
6053void AMDGPUInstructionSelector::renderDstSelToOpSelXForm(
6055 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
6060void AMDGPUInstructionSelector::renderSrcSelToOpSelXForm(
6062 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
6067void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_2_0(
6069 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
6074void AMDGPUInstructionSelector::renderDstSelToOpSel3XFormXForm(
6076 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
6084 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
6085 MIB.
addImm(
MI.getOperand(OpIdx).getImm() &
6093 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
6094 const bool Swizzle =
MI.getOperand(OpIdx).getImm() &
6100void AMDGPUInstructionSelector::renderExtractCpolSetGLC(
6102 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
6103 const uint32_t Cpol =
MI.getOperand(OpIdx).getImm() &
6118 const APFloat &APF =
MI.getOperand(1).getFPImm()->getValueAPF();
6120 assert(ExpVal != INT_MIN);
6131 MIB.
addImm((
MI.getOperand(OpIdx).getImm() + 3) % 4);
6135void AMDGPUInstructionSelector::renderScaledMAIIntrinsicOperand(
6137 unsigned Val =
MI.getOperand(OpIdx).getImm();
6146bool AMDGPUInstructionSelector::isInlineImmediate(
const APInt &Imm)
const {
6150bool AMDGPUInstructionSelector::isInlineImmediate(
const APFloat &Imm)
const {
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
static unsigned getIntrinsicID(const SDNode *N)
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static bool isNoUnsignedWrap(MachineInstr *Addr)
static Register buildOffsetSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
unsigned getNamedBarrierOp(bool HasInlineConst, Intrinsic::ID IntrID)
#define GET_GLOBALISEL_PREDICATES_INIT
#define GET_GLOBALISEL_TEMPORARIES_INIT
static Register getWaveAddress(const MachineInstr *Def)
static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)
static bool shouldUseAndMask(unsigned Size, unsigned &Mask)
static std::pair< unsigned, uint8_t > BitOp3_Op(Register R, SmallVectorImpl< Register > &Src, const MachineRegisterInfo &MRI)
static bool isLaneMaskFromSameBlock(Register Reg, MachineRegisterInfo &MRI, MachineBasicBlock *MBB)
static bool parseTexFail(uint64_t TexFailCtrl, bool &TFE, bool &LWE, bool &IsTexFail)
static std::pair< Register, unsigned > computeIndirectRegIndex(MachineRegisterInfo &MRI, const SIRegisterInfo &TRI, const TargetRegisterClass *SuperRC, Register IdxReg, unsigned EltSize, GISelKnownBits &KnownBits)
Return the register to use for the index value, and the subregister to use for the indirectly accesse...
static void addZeroImm(MachineInstrBuilder &MIB)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
static bool isConstant(const MachineInstr &MI)
static Register buildRegSequence(SmallVectorImpl< Register > &Elts, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
static Register buildRSRC(MachineIRBuilder &B, MachineRegisterInfo &MRI, uint32_t FormatLo, uint32_t FormatHi, Register BasePtr)
Return a resource descriptor for use with an arbitrary 64-bit pointer.
static Register matchZeroExtendFromS32(MachineRegisterInfo &MRI, Register Reg)
Match a zero extend from a 32-bit value to 64-bits.
static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64)
static Register stripCopy(Register Reg, MachineRegisterInfo &MRI)
static Register stripBitCast(Register Reg, MachineRegisterInfo &MRI)
static std::optional< uint64_t > getConstantZext32Val(Register Reg, const MachineRegisterInfo &MRI)
Get an immediate that must be 32-bits, and treated as zero extended.
static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size, const GCNSubtarget &ST)
static bool isVCmpResult(Register Reg, MachineRegisterInfo &MRI)
static Register buildAddr64RSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, SmallVectorImpl< Register > &Elts, Register &Src, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
This file declares the targeting of the InstructionSelector class for AMDGPU.
AMDGPU Register Bank Select
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
static std::vector< std::pair< int, unsigned > > Swizzle(std::vector< std::pair< int, unsigned > > Src, R600InstrInfo::BankSwizzle Swz)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
AMDGPUInstructionSelector(const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI, const AMDGPUTargetMachine &TM)
static const char * getName()
bool select(MachineInstr &I) override
Select the (possibly generic) instruction I to only use target-specific opcodes.
void setupMF(MachineFunction &MF, GISelKnownBits *KB, CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) override
Setup per-MF executor state.
uint32_t getLDSSize() const
bool isEntryFunction() const
const RegisterBank & getRegBankFromRegClass(const TargetRegisterClass &RC, LLT) const override
Get a register bank that covers RC.
bool useRealTrue16Insts() const
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
unsigned getWavefrontSizeLog2() const
unsigned getWavefrontSize() const
bool hasInv2PiInlineImm() const
static int64_t getNullPointerValue(unsigned AddrSpace)
Get the integer value of a null pointer in the given address space.
LLVM_READONLY int getExactLog2Abs() const
Class for arbitrary precision integers.
APInt zext(unsigned width) const
Zero extend to a new width.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
unsigned countr_one() const
Count the number of trailing one bits.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
bool isFPPredicate() const
bool isIntPredicate() const
ConstantFP - Floating Point Values [float, double].
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
This class represents an Operation in the Expression.
Diagnostic information for unsupported feature in backend.
Represents a G_BUILD_VECTOR.
bool useVGPRIndexMode() const
bool hasPermlane32Swap() const
bool hasScalarCompareEq64() const
int getLDSBankCount() const
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
bool unsafeDSOffsetFoldingEnabled() const
bool hasBitOp3Insts() const
bool hasFlatInstOffsets() const
bool hasCompressedExport() const
Return true if the target's EXP instruction has the COMPR flag, which affects the meaning of the EN (...
bool hasGFX90AInsts() const
bool hasLDSLoadB96_B128() const
Returns true if the target supports global_load_lds_dwordx3/global_load_lds_dwordx4 or buffer_load_dw...
unsigned getConstantBusLimit(unsigned Opcode) const
bool hasMADIntraFwdBug() const
bool privateMemoryResourceIsRangeChecked() const
bool hasSignedScratchOffsets() const
bool hasRestrictedSOffset() const
const SITargetLowering * getTargetLowering() const override
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
bool hasSPackHL() const
Return true if the target has the S_PACK_HL_B32_B16 instruction.
bool hasPermlane16Swap() const
bool hasFlatScratchSVSSwizzleBug() const
bool useFlatForGlobal() const
Generation getGeneration() const
bool hasSplitBarriers() const
bool hasUnpackedD16VMem() const
bool hasGWSSemaReleaseAll() const
bool hasAddNoCarry() const
bool hasSALUFloatInsts() const
bool hasPartialNSAEncoding() const
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
Represents a G_CONCAT_VECTORS.
std::optional< SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > > ComplexRendererFns
virtual void setupMF(MachineFunction &mf, GISelKnownBits *kb, CodeGenCoverage *covinfo=nullptr, ProfileSummaryInfo *psi=nullptr, BlockFrequencyInfo *bfi=nullptr)
Setup per-MF executor state.
CodeGenCoverage * CoverageInfo
APInt getKnownOnes(Register R)
KnownBits getKnownBits(Register R)
bool signBitIsZero(Register Op)
APInt getKnownZeroes(Register R)
Module * getParent()
Get the module that this global value is contained inside of...
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr bool isVector() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Describe properties that are true of each instruction in the target description file.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & setOperandDead(unsigned OpIdx) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
unsigned getNumOperands() const
Retuns the total number of operands.
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
unsigned getAddrSpace() const
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const ConstantInt * getCImm() const
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
ArrayRef< int > getShuffleMask() const
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
bool isInternalRead() const
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
A Module instance is used to store all the information related to an LLVM module.
Analysis providing profile information.
static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
TypeSize getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
bool isLegalMUBUFImmOffset(unsigned Imm) const
bool isInlineConstant(const APInt &Imm) const
const MCInstrDesc & getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, bool IsSGPR) const
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
const MCInstrDesc & getIndirectGPRIDXPseudo(unsigned VecSize, bool IsIndirectSrc) const
std::pair< int64_t, int64_t > splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, uint64_t FlatVariant) const
Split COffsetVal into {immediate offset field, remainder offset} values.
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, uint64_t FlatVariant) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction.
void enforceOperandRCAlignment(MachineInstr &MI, unsigned OpName) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
MCRegister getReturnAddressReg(const MachineFunction &MF) const
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
const TargetRegisterClass * getRegClassForSizeOnBank(unsigned Size, const RegisterBank &Bank) const
const TargetRegisterClass * getConstrainedRegClassForOperand(const MachineOperand &MO, const MachineRegisterInfo &MRI) const override
const TargetRegisterClass * getRegClassForTypeOnBank(LLT Ty, const RegisterBank &Bank) const
const TargetRegisterClass * getBoolRC() const
MCRegister getExec() const
const TargetRegisterClass * getWaveMaskRegClass() const
static bool isSGPRClass(const TargetRegisterClass *RC)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
const Triple & getTargetTriple() const
unsigned getID() const
Return the register class ID number.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
OSType getOS() const
Get the parsed operating system type of this triple.
static IntegerType * getInt32Ty(LLVMContext &C)
LLVM Value Representation.
Value(Type *Ty, unsigned scid)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
LLVM_READONLY const MIMGG16MappingInfo * getMIMGG16MappingInfo(unsigned G)
LLVM_READONLY int getGlobalSaddrOp(uint16_t Opcode)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfo(unsigned DimEnum)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
Intrinsic::ID getIntrinsicID(const MachineInstr &I)
Return the intrinsic ID for opcodes with the G_AMDGPU_INTRIN_ prefix.
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg, GISelKnownBits *KnownBits=nullptr, bool CheckNUW=false)
Returns base register and constant offset.
IndexMode
ARM Index Modes.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
operand_type_match m_Reg()
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
UnaryOp_match< SrcTy, TargetOpcode::COPY > m_Copy(SrcTy &&Src)
SpecificConstantMatch m_SpecificICst(int64_t RequestedValue)
Matches a constant equal to RequestedValue.
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_XOR, true > m_GXor(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_FPEXT > m_GFPExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
SpecificConstantMatch m_AllOnesInt()
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
UnaryOp_match< SrcTy, TargetOpcode::G_FABS > m_GFabs(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
OneUse_match< T > m_OneUse(const T &SubPattern)
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
int popcount(T Value) noexcept
Count the number of set bits in a value.
const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
unsigned getUndefRegState(bool B)
@ SMax
Signed integer max implemented in terms of select(cmp()).
DWARFExpression::Operation Op
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
@ Default
The result values are uniform if and only if all operands are uniform.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
This class contains a discriminated union of information about pointers in memory operands,...
int64_t Offset
Offset - This is an offset from the base Value*.
PointerUnion< const Value *, const PseudoSourceValue * > V
This is the IR pointer value for the access, or it is null if unknown.