29#include "llvm/IR/IntrinsicsAMDGPU.h"
32#define DEBUG_TYPE "amdgpu-isel"
35using namespace MIPatternMatch;
37#define GET_GLOBALISEL_IMPL
38#define AMDGPUSubtarget GCNSubtarget
39#include "AMDGPUGenGlobalISel.inc"
40#undef GET_GLOBALISEL_IMPL
46 :
TII(*STI.getInstrInfo()),
TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM),
49#include
"AMDGPUGenGlobalISel.inc"
52#include
"AMDGPUGenGlobalISel.inc"
71 return Def->getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS
72 ? Def->getOperand(1).getReg()
76bool AMDGPUInstructionSelector::isVCC(
Register Reg,
82 auto &RegClassOrBank =
MRI.getRegClassOrRegBank(Reg);
84 dyn_cast<const TargetRegisterClass *>(RegClassOrBank);
86 const LLT Ty =
MRI.getType(Reg);
90 return MRI.getVRegDef(Reg)->getOpcode() != AMDGPU::G_TRUNC &&
94 const RegisterBank *RB = cast<const RegisterBank *>(RegClassOrBank);
95 return RB->
getID() == AMDGPU::VCCRegBankID;
98bool AMDGPUInstructionSelector::constrainCopyLikeIntrin(
MachineInstr &
MI,
99 unsigned NewOpc)
const {
100 MI.setDesc(TII.get(NewOpc));
115 if (!DstRC || DstRC != SrcRC)
122bool AMDGPUInstructionSelector::selectCOPY(
MachineInstr &
I)
const {
125 I.setDesc(TII.get(TargetOpcode::COPY));
132 if (isVCC(DstReg, *MRI)) {
133 if (SrcReg == AMDGPU::SCC) {
141 if (!isVCC(SrcReg, *MRI)) {
149 std::optional<ValueAndVReg> ConstVal =
153 STI.
isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
155 .
addImm(ConstVal->Value.getBoolValue() ? -1 : 0);
157 Register MaskedReg =
MRI->createVirtualRegister(SrcRC);
165 const int64_t NoMods = 0;
166 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_AND_B16_t16_e64), MaskedReg)
172 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CMP_NE_U16_t16_e64), DstReg)
180 unsigned AndOpc = IsSGPR ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32;
185 And.setOperandDead(3);
187 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
193 if (!
MRI->getRegClassOrNull(SrcReg))
194 MRI->setRegClass(SrcReg, SrcRC);
208 if (MO.getReg().isPhysical())
220bool AMDGPUInstructionSelector::selectPHI(
MachineInstr &
I)
const {
221 const Register DefReg =
I.getOperand(0).getReg();
222 const LLT DefTy =
MRI->getType(DefReg);
234 MRI->getRegClassOrRegBank(DefReg);
237 dyn_cast<const TargetRegisterClass *>(RegClassOrBank);
244 const RegisterBank &RB = *cast<const RegisterBank *>(RegClassOrBank);
253 I.setDesc(TII.get(TargetOpcode::PHI));
260 unsigned SubIdx)
const {
264 Register DstReg =
MRI->createVirtualRegister(&SubRC);
267 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.
getSubReg(), SubIdx);
269 BuildMI(*BB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
270 .
addReg(Reg, 0, ComposedSubIdx);
295 return Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
297 return Is64 ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32;
299 return Is64 ? AMDGPU::S_XOR_B64 : AMDGPU::S_XOR_B32;
305bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(
MachineInstr &
I)
const {
306 Register DstReg =
I.getOperand(0).getReg();
310 if (DstRB->
getID() != AMDGPU::SGPRRegBankID &&
311 DstRB->
getID() != AMDGPU::VCCRegBankID)
314 bool Is64 =
Size > 32 || (DstRB->
getID() == AMDGPU::VCCRegBankID &&
326bool AMDGPUInstructionSelector::selectG_ADD_SUB(
MachineInstr &
I)
const {
329 Register DstReg =
I.getOperand(0).getReg();
331 LLT Ty =
MRI->getType(DstReg);
337 const bool IsSALU = DstRB->
getID() == AMDGPU::SGPRRegBankID;
338 const bool Sub =
I.getOpcode() == TargetOpcode::G_SUB;
342 const unsigned Opc = Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
345 .
add(
I.getOperand(1))
346 .
add(
I.getOperand(2))
353 const unsigned Opc = Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64;
354 I.setDesc(TII.get(Opc));
360 const unsigned Opc = Sub ? AMDGPU::V_SUB_CO_U32_e64 : AMDGPU::V_ADD_CO_U32_e64;
366 .
add(
I.getOperand(1))
367 .
add(
I.getOperand(2))
373 assert(!Sub &&
"illegal sub should not reach here");
376 = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass;
378 = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass;
380 MachineOperand Lo1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub0));
381 MachineOperand Lo2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub0));
382 MachineOperand Hi1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub1));
383 MachineOperand Hi2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub1));
385 Register DstLo =
MRI->createVirtualRegister(&HalfRC);
386 Register DstHi =
MRI->createVirtualRegister(&HalfRC);
389 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
392 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
398 Register CarryReg =
MRI->createVirtualRegister(CarryRC);
399 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_ADD_CO_U32_e64), DstLo)
415 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
429bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE(
434 Register Dst0Reg =
I.getOperand(0).getReg();
435 Register Dst1Reg =
I.getOperand(1).getReg();
436 const bool IsAdd =
I.getOpcode() == AMDGPU::G_UADDO ||
437 I.getOpcode() == AMDGPU::G_UADDE;
438 const bool HasCarryIn =
I.getOpcode() == AMDGPU::G_UADDE ||
439 I.getOpcode() == AMDGPU::G_USUBE;
441 if (isVCC(Dst1Reg, *MRI)) {
442 unsigned NoCarryOpc =
443 IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
444 unsigned CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
445 I.setDesc(TII.get(HasCarryIn ? CarryOpc : NoCarryOpc));
451 Register Src0Reg =
I.getOperand(2).getReg();
452 Register Src1Reg =
I.getOperand(3).getReg();
455 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
456 .
addReg(
I.getOperand(4).getReg());
459 unsigned NoCarryOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
460 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
462 auto CarryInst =
BuildMI(*BB, &
I,
DL, TII.get(HasCarryIn ? CarryOpc : NoCarryOpc), Dst0Reg)
463 .
add(
I.getOperand(2))
464 .
add(
I.getOperand(3));
466 if (
MRI->use_nodbg_empty(Dst1Reg)) {
469 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), Dst1Reg)
471 if (!
MRI->getRegClassOrNull(Dst1Reg))
472 MRI->setRegClass(Dst1Reg, &AMDGPU::SReg_32RegClass);
482 AMDGPU::SReg_32RegClass, *MRI))
489bool AMDGPUInstructionSelector::selectG_AMDGPU_MAD_64_32(
493 const bool IsUnsigned =
I.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;
497 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_gfx11_e64
498 : AMDGPU::V_MAD_I64_I32_gfx11_e64;
500 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_e64 : AMDGPU::V_MAD_I64_I32_e64;
501 I.setDesc(TII.get(Opc));
503 I.addImplicitDefUseOperands(*
MF);
508bool AMDGPUInstructionSelector::selectG_EXTRACT(
MachineInstr &
I)
const {
510 Register DstReg =
I.getOperand(0).getReg();
511 Register SrcReg =
I.getOperand(1).getReg();
512 LLT DstTy =
MRI->getType(DstReg);
513 LLT SrcTy =
MRI->getType(SrcReg);
518 unsigned Offset =
I.getOperand(2).getImm();
519 if (
Offset % 32 != 0 || DstSize > 128)
539 SrcRC = TRI.getSubClassWithSubReg(SrcRC,
SubReg);
544 *SrcRC,
I.getOperand(1));
546 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::COPY), DstReg)
553bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(
MachineInstr &
MI)
const {
556 LLT DstTy =
MRI->getType(DstReg);
557 LLT SrcTy =
MRI->getType(
MI.getOperand(1).getReg());
573 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg);
574 for (
int I = 0, E =
MI.getNumOperands() - 1;
I != E; ++
I) {
588 MI.eraseFromParent();
592bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(
MachineInstr &
MI)
const {
594 const int NumDst =
MI.getNumOperands() - 1;
600 LLT DstTy =
MRI->getType(DstReg0);
601 LLT SrcTy =
MRI->getType(SrcReg);
617 for (
int I = 0, E = NumDst;
I != E; ++
I) {
619 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::COPY), Dst.getReg())
620 .
addReg(SrcReg, 0, SubRegs[
I]);
623 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegs[
I]);
633 MI.eraseFromParent();
637bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR(
MachineInstr &
MI)
const {
638 assert(
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC ||
639 MI.getOpcode() == AMDGPU::G_BUILD_VECTOR);
643 LLT SrcTy =
MRI->getType(Src0);
647 if (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR && SrcSize >= 32) {
648 return selectG_MERGE_VALUES(
MI);
655 (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC &&
660 if (DstBank->
getID() == AMDGPU::AGPRRegBankID)
663 assert(DstBank->
getID() == AMDGPU::SGPRRegBankID ||
664 DstBank->
getID() == AMDGPU::VGPRRegBankID);
665 const bool IsVector = DstBank->
getID() == AMDGPU::VGPRRegBankID;
678 const int64_t K0 = ConstSrc0->Value.getSExtValue();
679 const int64_t K1 = ConstSrc1->Value.getSExtValue();
687 MI.eraseFromParent();
693 MI.eraseFromParent();
705 if (Src1Def->
getOpcode() == AMDGPU::G_IMPLICIT_DEF) {
706 MI.setDesc(TII.get(AMDGPU::COPY));
709 IsVector ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
716 Register TmpReg =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
717 auto MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_AND_B32_e32), TmpReg)
723 MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_LSHL_OR_B32_e64), Dst)
730 MI.eraseFromParent();
755 unsigned Opc = AMDGPU::S_PACK_LL_B32_B16;
756 if (Shift0 && Shift1) {
757 Opc = AMDGPU::S_PACK_HH_B32_B16;
758 MI.getOperand(1).setReg(ShiftSrc0);
759 MI.getOperand(2).setReg(ShiftSrc1);
761 Opc = AMDGPU::S_PACK_LH_B32_B16;
762 MI.getOperand(2).setReg(ShiftSrc1);
766 if (ConstSrc1 && ConstSrc1->Value == 0) {
768 auto MIB =
BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_LSHR_B32), Dst)
773 MI.eraseFromParent();
777 Opc = AMDGPU::S_PACK_HL_B32_B16;
778 MI.getOperand(1).setReg(ShiftSrc0);
782 MI.setDesc(TII.get(Opc));
786bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(
MachineInstr &
I)
const {
792 if ((!RC && !
MRI->getRegBankOrNull(MO.
getReg())) ||
794 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
801bool AMDGPUInstructionSelector::selectG_INSERT(
MachineInstr &
I)
const {
804 Register DstReg =
I.getOperand(0).getReg();
805 Register Src0Reg =
I.getOperand(1).getReg();
806 Register Src1Reg =
I.getOperand(2).getReg();
807 LLT Src1Ty =
MRI->getType(Src1Reg);
809 unsigned DstSize =
MRI->getType(DstReg).getSizeInBits();
812 int64_t
Offset =
I.getOperand(3).getImm();
815 if (
Offset % 32 != 0 || InsSize % 32 != 0)
823 if (
SubReg == AMDGPU::NoSubRegister)
841 Src0RC = TRI.getSubClassWithSubReg(Src0RC,
SubReg);
842 if (!Src0RC || !Src1RC)
851 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::INSERT_SUBREG), DstReg)
860bool AMDGPUInstructionSelector::selectG_SBFX_UBFX(
MachineInstr &
MI)
const {
863 Register OffsetReg =
MI.getOperand(2).getReg();
864 Register WidthReg =
MI.getOperand(3).getReg();
867 "scalar BFX instructions are expanded in regbankselect");
868 assert(
MRI->getType(
MI.getOperand(0).getReg()).getSizeInBits() == 32 &&
869 "64-bit vector BFX instructions are expanded in regbankselect");
874 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SBFX;
875 unsigned Opc = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
880 MI.eraseFromParent();
884bool AMDGPUInstructionSelector::selectInterpP1F16(
MachineInstr &
MI)
const {
903 Register InterpMov =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
909 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_INTERP_MOV_F32), InterpMov)
925 MI.eraseFromParent();
934bool AMDGPUInstructionSelector::selectWritelane(
MachineInstr &
MI)
const {
943 Register LaneSelect =
MI.getOperand(3).getReg();
946 auto MIB =
BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_WRITELANE_B32), VDst);
948 std::optional<ValueAndVReg> ConstSelect =
954 MIB.
addImm(ConstSelect->Value.getSExtValue() &
957 std::optional<ValueAndVReg> ConstVal =
964 MIB.
addImm(ConstVal->Value.getSExtValue());
974 BuildMI(*
MBB, *MIB,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
982 MI.eraseFromParent();
988bool AMDGPUInstructionSelector::selectDivScale(
MachineInstr &
MI)
const {
992 LLT Ty =
MRI->getType(Dst0);
995 Opc = AMDGPU::V_DIV_SCALE_F32_e64;
997 Opc = AMDGPU::V_DIV_SCALE_F64_e64;
1008 unsigned ChooseDenom =
MI.getOperand(5).getImm();
1010 Register Src0 = ChooseDenom != 0 ? Numer : Denom;
1023 MI.eraseFromParent();
1027bool AMDGPUInstructionSelector::selectG_INTRINSIC(
MachineInstr &
I)
const {
1028 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(
I).getIntrinsicID();
1029 switch (IntrinsicID) {
1030 case Intrinsic::amdgcn_if_break: {
1035 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))
1036 .
add(
I.getOperand(0))
1037 .
add(
I.getOperand(2))
1038 .
add(
I.getOperand(3));
1040 Register DstReg =
I.getOperand(0).getReg();
1041 Register Src0Reg =
I.getOperand(2).getReg();
1042 Register Src1Reg =
I.getOperand(3).getReg();
1044 I.eraseFromParent();
1046 for (
Register Reg : { DstReg, Src0Reg, Src1Reg })
1051 case Intrinsic::amdgcn_interp_p1_f16:
1052 return selectInterpP1F16(
I);
1053 case Intrinsic::amdgcn_wqm:
1054 return constrainCopyLikeIntrin(
I, AMDGPU::WQM);
1055 case Intrinsic::amdgcn_softwqm:
1056 return constrainCopyLikeIntrin(
I, AMDGPU::SOFT_WQM);
1057 case Intrinsic::amdgcn_strict_wwm:
1058 case Intrinsic::amdgcn_wwm:
1059 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WWM);
1060 case Intrinsic::amdgcn_strict_wqm:
1061 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WQM);
1062 case Intrinsic::amdgcn_writelane:
1063 return selectWritelane(
I);
1064 case Intrinsic::amdgcn_div_scale:
1065 return selectDivScale(
I);
1066 case Intrinsic::amdgcn_icmp:
1067 case Intrinsic::amdgcn_fcmp:
1070 return selectIntrinsicCmp(
I);
1071 case Intrinsic::amdgcn_ballot:
1072 return selectBallot(
I);
1073 case Intrinsic::amdgcn_reloc_constant:
1074 return selectRelocConstant(
I);
1075 case Intrinsic::amdgcn_groupstaticsize:
1076 return selectGroupStaticSize(
I);
1077 case Intrinsic::returnaddress:
1078 return selectReturnAddress(
I);
1079 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
1080 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
1081 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
1082 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
1083 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
1084 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
1085 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
1086 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
1087 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
1088 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
1089 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
1090 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
1091 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
1092 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
1093 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:
1094 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:
1095 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:
1096 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:
1097 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:
1098 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:
1099 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:
1100 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:
1101 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:
1102 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:
1103 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:
1104 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
1105 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
1106 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8:
1107 return selectSMFMACIntrin(
I);
1108 case Intrinsic::amdgcn_permlane16_swap:
1109 case Intrinsic::amdgcn_permlane32_swap:
1110 return selectPermlaneSwapIntrin(
I, IntrinsicID);
1121 if (
Size == 16 && !ST.has16BitInsts())
1124 const auto Select = [&](
unsigned S16Opc,
unsigned TrueS16Opc,
1125 unsigned FakeS16Opc,
unsigned S32Opc,
1129 return ST.hasTrue16BitInsts()
1130 ? ST.useRealTrue16Insts() ? FakeS16Opc : FakeS16Opc
1141 return Select(AMDGPU::V_CMP_NE_U16_e64, AMDGPU::V_CMP_NE_U16_t16_e64,
1142 AMDGPU::V_CMP_NE_U16_fake16_e64, AMDGPU::V_CMP_NE_U32_e64,
1143 AMDGPU::V_CMP_NE_U64_e64);
1145 return Select(AMDGPU::V_CMP_EQ_U16_e64, AMDGPU::V_CMP_EQ_U16_t16_e64,
1146 AMDGPU::V_CMP_EQ_U16_fake16_e64, AMDGPU::V_CMP_EQ_U32_e64,
1147 AMDGPU::V_CMP_EQ_U64_e64);
1149 return Select(AMDGPU::V_CMP_GT_I16_e64, AMDGPU::V_CMP_GT_I16_t16_e64,
1150 AMDGPU::V_CMP_GT_I16_fake16_e64, AMDGPU::V_CMP_GT_I32_e64,
1151 AMDGPU::V_CMP_GT_I64_e64);
1153 return Select(AMDGPU::V_CMP_GE_I16_e64, AMDGPU::V_CMP_GE_I16_t16_e64,
1154 AMDGPU::V_CMP_GE_I16_fake16_e64, AMDGPU::V_CMP_GE_I32_e64,
1155 AMDGPU::V_CMP_GE_I64_e64);
1157 return Select(AMDGPU::V_CMP_LT_I16_e64, AMDGPU::V_CMP_LT_I16_t16_e64,
1158 AMDGPU::V_CMP_LT_I16_fake16_e64, AMDGPU::V_CMP_LT_I32_e64,
1159 AMDGPU::V_CMP_LT_I64_e64);
1161 return Select(AMDGPU::V_CMP_LE_I16_e64, AMDGPU::V_CMP_LE_I16_t16_e64,
1162 AMDGPU::V_CMP_LE_I16_fake16_e64, AMDGPU::V_CMP_LE_I32_e64,
1163 AMDGPU::V_CMP_LE_I64_e64);
1165 return Select(AMDGPU::V_CMP_GT_U16_e64, AMDGPU::V_CMP_GT_U16_t16_e64,
1166 AMDGPU::V_CMP_GT_U16_fake16_e64, AMDGPU::V_CMP_GT_U32_e64,
1167 AMDGPU::V_CMP_GT_U64_e64);
1169 return Select(AMDGPU::V_CMP_GE_U16_e64, AMDGPU::V_CMP_GE_U16_t16_e64,
1170 AMDGPU::V_CMP_GE_U16_fake16_e64, AMDGPU::V_CMP_GE_U32_e64,
1171 AMDGPU::V_CMP_GE_U64_e64);
1173 return Select(AMDGPU::V_CMP_LT_U16_e64, AMDGPU::V_CMP_LT_U16_t16_e64,
1174 AMDGPU::V_CMP_LT_U16_fake16_e64, AMDGPU::V_CMP_LT_U32_e64,
1175 AMDGPU::V_CMP_LT_U64_e64);
1177 return Select(AMDGPU::V_CMP_LE_U16_e64, AMDGPU::V_CMP_LE_U16_t16_e64,
1178 AMDGPU::V_CMP_LE_U16_fake16_e64, AMDGPU::V_CMP_LE_U32_e64,
1179 AMDGPU::V_CMP_LE_U64_e64);
1182 return Select(AMDGPU::V_CMP_EQ_F16_e64, AMDGPU::V_CMP_EQ_F16_t16_e64,
1183 AMDGPU::V_CMP_EQ_F16_fake16_e64, AMDGPU::V_CMP_EQ_F32_e64,
1184 AMDGPU::V_CMP_EQ_F64_e64);
1186 return Select(AMDGPU::V_CMP_GT_F16_e64, AMDGPU::V_CMP_GT_F16_t16_e64,
1187 AMDGPU::V_CMP_GT_F16_fake16_e64, AMDGPU::V_CMP_GT_F32_e64,
1188 AMDGPU::V_CMP_GT_F64_e64);
1190 return Select(AMDGPU::V_CMP_GE_F16_e64, AMDGPU::V_CMP_GE_F16_t16_e64,
1191 AMDGPU::V_CMP_GE_F16_fake16_e64, AMDGPU::V_CMP_GE_F32_e64,
1192 AMDGPU::V_CMP_GE_F64_e64);
1194 return Select(AMDGPU::V_CMP_LT_F16_e64, AMDGPU::V_CMP_LT_F16_t16_e64,
1195 AMDGPU::V_CMP_LT_F16_fake16_e64, AMDGPU::V_CMP_LT_F32_e64,
1196 AMDGPU::V_CMP_LT_F64_e64);
1198 return Select(AMDGPU::V_CMP_LE_F16_e64, AMDGPU::V_CMP_LE_F16_t16_e64,
1199 AMDGPU::V_CMP_LE_F16_fake16_e64, AMDGPU::V_CMP_LE_F32_e64,
1200 AMDGPU::V_CMP_LE_F64_e64);
1202 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1203 AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
1204 AMDGPU::V_CMP_NEQ_F64_e64);
1206 return Select(AMDGPU::V_CMP_O_F16_e64, AMDGPU::V_CMP_O_F16_t16_e64,
1207 AMDGPU::V_CMP_O_F16_fake16_e64, AMDGPU::V_CMP_O_F32_e64,
1208 AMDGPU::V_CMP_O_F64_e64);
1210 return Select(AMDGPU::V_CMP_U_F16_e64, AMDGPU::V_CMP_U_F16_t16_e64,
1211 AMDGPU::V_CMP_U_F16_fake16_e64, AMDGPU::V_CMP_U_F32_e64,
1212 AMDGPU::V_CMP_U_F64_e64);
1214 return Select(AMDGPU::V_CMP_NLG_F16_e64, AMDGPU::V_CMP_NLG_F16_t16_e64,
1215 AMDGPU::V_CMP_NLG_F16_fake16_e64, AMDGPU::V_CMP_NLG_F32_e64,
1216 AMDGPU::V_CMP_NLG_F64_e64);
1218 return Select(AMDGPU::V_CMP_NLE_F16_e64, AMDGPU::V_CMP_NLE_F16_t16_e64,
1219 AMDGPU::V_CMP_NLE_F16_fake16_e64, AMDGPU::V_CMP_NLE_F32_e64,
1220 AMDGPU::V_CMP_NLE_F64_e64);
1222 return Select(AMDGPU::V_CMP_NLT_F16_e64, AMDGPU::V_CMP_NLT_F16_t16_e64,
1223 AMDGPU::V_CMP_NLT_F16_fake16_e64, AMDGPU::V_CMP_NLT_F32_e64,
1224 AMDGPU::V_CMP_NLT_F64_e64);
1226 return Select(AMDGPU::V_CMP_NGE_F16_e64, AMDGPU::V_CMP_NGE_F16_t16_e64,
1227 AMDGPU::V_CMP_NGE_F16_fake16_e64, AMDGPU::V_CMP_NGE_F32_e64,
1228 AMDGPU::V_CMP_NGE_F64_e64);
1230 return Select(AMDGPU::V_CMP_NGT_F16_e64, AMDGPU::V_CMP_NGT_F16_t16_e64,
1231 AMDGPU::V_CMP_NGT_F16_fake16_e64, AMDGPU::V_CMP_NGT_F32_e64,
1232 AMDGPU::V_CMP_NGT_F64_e64);
1234 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1235 AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
1236 AMDGPU::V_CMP_NEQ_F64_e64);
1238 return Select(AMDGPU::V_CMP_TRU_F16_e64, AMDGPU::V_CMP_TRU_F16_t16_e64,
1239 AMDGPU::V_CMP_TRU_F16_fake16_e64, AMDGPU::V_CMP_TRU_F32_e64,
1240 AMDGPU::V_CMP_TRU_F64_e64);
1242 return Select(AMDGPU::V_CMP_F_F16_e64, AMDGPU::V_CMP_F_F16_t16_e64,
1243 AMDGPU::V_CMP_F_F16_fake16_e64, AMDGPU::V_CMP_F_F32_e64,
1244 AMDGPU::V_CMP_F_F64_e64);
1249 unsigned Size)
const {
1256 return AMDGPU::S_CMP_LG_U64;
1258 return AMDGPU::S_CMP_EQ_U64;
1267 return AMDGPU::S_CMP_LG_U32;
1269 return AMDGPU::S_CMP_EQ_U32;
1271 return AMDGPU::S_CMP_GT_I32;
1273 return AMDGPU::S_CMP_GE_I32;
1275 return AMDGPU::S_CMP_LT_I32;
1277 return AMDGPU::S_CMP_LE_I32;
1279 return AMDGPU::S_CMP_GT_U32;
1281 return AMDGPU::S_CMP_GE_U32;
1283 return AMDGPU::S_CMP_LT_U32;
1285 return AMDGPU::S_CMP_LE_U32;
1287 return AMDGPU::S_CMP_EQ_F32;
1289 return AMDGPU::S_CMP_GT_F32;
1291 return AMDGPU::S_CMP_GE_F32;
1293 return AMDGPU::S_CMP_LT_F32;
1295 return AMDGPU::S_CMP_LE_F32;
1297 return AMDGPU::S_CMP_LG_F32;
1299 return AMDGPU::S_CMP_O_F32;
1301 return AMDGPU::S_CMP_U_F32;
1303 return AMDGPU::S_CMP_NLG_F32;
1305 return AMDGPU::S_CMP_NLE_F32;
1307 return AMDGPU::S_CMP_NLT_F32;
1309 return AMDGPU::S_CMP_NGE_F32;
1311 return AMDGPU::S_CMP_NGT_F32;
1313 return AMDGPU::S_CMP_NEQ_F32;
1325 return AMDGPU::S_CMP_EQ_F16;
1327 return AMDGPU::S_CMP_GT_F16;
1329 return AMDGPU::S_CMP_GE_F16;
1331 return AMDGPU::S_CMP_LT_F16;
1333 return AMDGPU::S_CMP_LE_F16;
1335 return AMDGPU::S_CMP_LG_F16;
1337 return AMDGPU::S_CMP_O_F16;
1339 return AMDGPU::S_CMP_U_F16;
1341 return AMDGPU::S_CMP_NLG_F16;
1343 return AMDGPU::S_CMP_NLE_F16;
1345 return AMDGPU::S_CMP_NLT_F16;
1347 return AMDGPU::S_CMP_NGE_F16;
1349 return AMDGPU::S_CMP_NGT_F16;
1351 return AMDGPU::S_CMP_NEQ_F16;
1360bool AMDGPUInstructionSelector::selectG_ICMP_or_FCMP(
MachineInstr &
I)
const {
1365 Register SrcReg =
I.getOperand(2).getReg();
1370 Register CCReg =
I.getOperand(0).getReg();
1371 if (!isVCC(CCReg, *MRI)) {
1372 int Opcode = getS_CMPOpcode(Pred,
Size);
1376 .
add(
I.getOperand(2))
1377 .
add(
I.getOperand(3));
1378 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CCReg)
1383 I.eraseFromParent();
1387 if (
I.getOpcode() == AMDGPU::G_FCMP)
1395 I.getOperand(0).getReg())
1396 .
add(
I.getOperand(2))
1397 .
add(
I.getOperand(3));
1401 I.eraseFromParent();
1405bool AMDGPUInstructionSelector::selectIntrinsicCmp(
MachineInstr &
I)
const {
1406 Register Dst =
I.getOperand(0).getReg();
1407 if (isVCC(Dst, *MRI))
1410 LLT DstTy =
MRI->getType(Dst);
1416 Register SrcReg =
I.getOperand(2).getReg();
1425 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), Dst);
1426 I.eraseFromParent();
1437 auto [Src0, Src0Mods] = selectVOP3ModsImpl(
LHS.getReg());
1438 auto [Src1, Src1Mods] = selectVOP3ModsImpl(
RHS.getReg());
1440 copyToVGPRIfSrcFolded(Src0, Src0Mods, LHS, &
I,
true);
1442 copyToVGPRIfSrcFolded(Src1, Src1Mods, RHS, &
I,
true);
1443 SelectedMI =
BuildMI(*BB, &
I,
DL, TII.get(Opcode), Dst);
1445 SelectedMI.
addImm(Src0Mods);
1446 SelectedMI.
addReg(Src0Reg);
1448 SelectedMI.
addImm(Src1Mods);
1449 SelectedMI.
addReg(Src1Reg);
1459 I.eraseFromParent();
1470 if (
MI->getParent() !=
MBB)
1474 if (
MI->getOpcode() == AMDGPU::COPY) {
1475 auto DstRB =
MRI.getRegBankOrNull(
MI->getOperand(0).getReg());
1476 auto SrcRB =
MRI.getRegBankOrNull(
MI->getOperand(1).getReg());
1477 if (DstRB && SrcRB && DstRB->
getID() == AMDGPU::VCCRegBankID &&
1478 SrcRB->getID() == AMDGPU::SGPRRegBankID)
1483 if (isa<GAnyCmp>(
MI))
1495bool AMDGPUInstructionSelector::selectBallot(
MachineInstr &
I)
const {
1498 Register DstReg =
I.getOperand(0).getReg();
1499 Register SrcReg =
I.getOperand(2).getReg();
1500 const unsigned BallotSize =
MRI->getType(DstReg).getSizeInBits();
1505 if (BallotSize != WaveSize && (BallotSize != 64 || WaveSize != 32))
1508 std::optional<ValueAndVReg> Arg =
1513 if (BallotSize != WaveSize) {
1518 const int64_t
Value = Arg->
Value.getZExtValue();
1521 unsigned Opcode = WaveSize == 64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
1538 unsigned AndOpc = WaveSize == 64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
1549 if (BallotSize != WaveSize) {
1550 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1552 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
1559 I.eraseFromParent();
1563bool AMDGPUInstructionSelector::selectRelocConstant(
MachineInstr &
I)
const {
1564 Register DstReg =
I.getOperand(0).getReg();
1570 const bool IsVALU = DstBank->
getID() == AMDGPU::VGPRRegBankID;
1575 auto *RelocSymbol = cast<GlobalVariable>(
1580 TII.get(IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32), DstReg)
1583 I.eraseFromParent();
1587bool AMDGPUInstructionSelector::selectGroupStaticSize(
MachineInstr &
I)
const {
1590 Register DstReg =
I.getOperand(0).getReg();
1592 unsigned Mov = DstRB->
getID() == AMDGPU::SGPRRegBankID ?
1593 AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1610 I.eraseFromParent();
1614bool AMDGPUInstructionSelector::selectReturnAddress(
MachineInstr &
I)
const {
1621 unsigned Depth =
I.getOperand(2).getImm();
1634 I.eraseFromParent();
1645 AMDGPU::SReg_64RegClass,
DL);
1648 I.eraseFromParent();
1652bool AMDGPUInstructionSelector::selectEndCfIntrinsic(
MachineInstr &
MI)
const {
1656 BuildMI(*BB, &
MI,
MI.getDebugLoc(), TII.get(AMDGPU::SI_END_CF))
1657 .
add(
MI.getOperand(1));
1660 MI.eraseFromParent();
1662 if (!
MRI->getRegClassOrNull(Reg))
1667bool AMDGPUInstructionSelector::selectDSOrderedIntrinsic(
1673 unsigned IndexOperand =
MI.getOperand(7).getImm();
1674 bool WaveRelease =
MI.getOperand(8).getImm() != 0;
1675 bool WaveDone =
MI.getOperand(9).getImm() != 0;
1677 if (WaveDone && !WaveRelease)
1680 unsigned OrderedCountIndex = IndexOperand & 0x3f;
1681 IndexOperand &= ~0x3f;
1682 unsigned CountDw = 0;
1685 CountDw = (IndexOperand >> 24) & 0xf;
1686 IndexOperand &= ~(0xf << 24);
1688 if (CountDw < 1 || CountDw > 4) {
1690 "ds_ordered_count: dword count must be between 1 and 4");
1697 unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1;
1700 unsigned Offset0 = OrderedCountIndex << 2;
1701 unsigned Offset1 = WaveRelease | (WaveDone << 1) | (
Instruction << 4);
1704 Offset1 |= (CountDw - 1) << 6;
1707 Offset1 |= ShaderType << 2;
1709 unsigned Offset = Offset0 | (Offset1 << 8);
1718 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::DS_ORDERED_COUNT), DstReg)
1727 MI.eraseFromParent();
1733 case Intrinsic::amdgcn_ds_gws_init:
1734 return AMDGPU::DS_GWS_INIT;
1735 case Intrinsic::amdgcn_ds_gws_barrier:
1736 return AMDGPU::DS_GWS_BARRIER;
1737 case Intrinsic::amdgcn_ds_gws_sema_v:
1738 return AMDGPU::DS_GWS_SEMA_V;
1739 case Intrinsic::amdgcn_ds_gws_sema_br:
1740 return AMDGPU::DS_GWS_SEMA_BR;
1741 case Intrinsic::amdgcn_ds_gws_sema_p:
1742 return AMDGPU::DS_GWS_SEMA_P;
1743 case Intrinsic::amdgcn_ds_gws_sema_release_all:
1744 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
1750bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(
MachineInstr &
MI,
1752 if (!STI.
hasGWS() || (IID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
1757 const bool HasVSrc =
MI.getNumOperands() == 3;
1758 assert(HasVSrc ||
MI.getNumOperands() == 2);
1760 Register BaseOffset =
MI.getOperand(HasVSrc ? 2 : 1).getReg();
1762 if (OffsetRB->
getID() != AMDGPU::SGPRRegBankID)
1776 if (OffsetDef->
getOpcode() == AMDGPU::V_READFIRSTLANE_B32) {
1777 Readfirstlane = OffsetDef;
1782 if (OffsetDef->
getOpcode() == AMDGPU::G_CONSTANT) {
1792 std::tie(BaseOffset, ImmOffset) =
1795 if (Readfirstlane) {
1805 AMDGPU::SReg_32RegClass, *MRI))
1809 Register M0Base =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1837 MI.eraseFromParent();
1841bool AMDGPUInstructionSelector::selectDSAppendConsume(
MachineInstr &
MI,
1842 bool IsAppend)
const {
1843 Register PtrBase =
MI.getOperand(2).getReg();
1844 LLT PtrTy =
MRI->getType(PtrBase);
1848 std::tie(PtrBase,
Offset) = selectDS1Addr1OffsetImpl(
MI.getOperand(2));
1851 if (!isDSOffsetLegal(PtrBase,
Offset)) {
1852 PtrBase =
MI.getOperand(2).getReg();
1858 const unsigned Opc = IsAppend ? AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
1869 MI.eraseFromParent();
1873bool AMDGPUInstructionSelector::selectInitWholeWave(
MachineInstr &
MI)
const {
1881bool AMDGPUInstructionSelector::selectSBarrier(
MachineInstr &
MI)
const {
1888 if (IntrinsicID == Intrinsic::amdgcn_s_barrier ||
1889 IntrinsicID == Intrinsic::amdgcn_s_barrier_wait) {
1894 MI.eraseFromParent();
1899 if (STI.
hasSplitBarriers() && IntrinsicID == Intrinsic::amdgcn_s_barrier) {
1907 MI.eraseFromParent();
1919 TFE = (TexFailCtrl & 0x1) ?
true :
false;
1921 LWE = (TexFailCtrl & 0x2) ?
true :
false;
1924 return TexFailCtrl == 0;
1927bool AMDGPUInstructionSelector::selectImageIntrinsic(
1936 unsigned IntrOpcode =
Intr->BaseOpcode;
1941 const unsigned ArgOffset =
MI.getNumExplicitDefs() + 1;
1945 int NumVDataDwords = -1;
1946 bool IsD16 =
MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16 ||
1947 MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16;
1953 Unorm =
MI.getOperand(ArgOffset +
Intr->UnormIndex).getImm() != 0;
1957 bool IsTexFail =
false;
1959 TFE, LWE, IsTexFail))
1962 const int Flags =
MI.getOperand(ArgOffset +
Intr->NumArgs).getImm();
1963 const bool IsA16 = (
Flags & 1) != 0;
1964 const bool IsG16 = (
Flags & 2) != 0;
1967 if (IsA16 && !STI.
hasG16() && !IsG16)
1971 unsigned DMaskLanes = 0;
1973 if (BaseOpcode->
Atomic) {
1974 VDataOut =
MI.getOperand(0).getReg();
1975 VDataIn =
MI.getOperand(2).getReg();
1976 LLT Ty =
MRI->getType(VDataIn);
1979 const bool Is64Bit = BaseOpcode->
AtomicX2 ?
1984 assert(
MI.getOperand(3).getReg() == AMDGPU::NoRegister);
1986 DMask = Is64Bit ? 0xf : 0x3;
1987 NumVDataDwords = Is64Bit ? 4 : 2;
1989 DMask = Is64Bit ? 0x3 : 0x1;
1990 NumVDataDwords = Is64Bit ? 2 : 1;
1993 DMask =
MI.getOperand(ArgOffset +
Intr->DMaskIndex).getImm();
1996 if (BaseOpcode->
Store) {
1997 VDataIn =
MI.getOperand(1).getReg();
1998 VDataTy =
MRI->getType(VDataIn);
2003 VDataOut =
MI.getOperand(0).getReg();
2004 VDataTy =
MRI->getType(VDataOut);
2005 NumVDataDwords = DMaskLanes;
2008 NumVDataDwords = (DMaskLanes + 1) / 2;
2013 if (Subtarget->
hasG16() && IsG16) {
2017 IntrOpcode = G16MappingInfo->
G16;
2021 assert((!IsTexFail || DMaskLanes >= 1) &&
"should have legalized this");
2023 unsigned CPol =
MI.getOperand(ArgOffset +
Intr->CachePolicyIndex).getImm();
2030 int NumVAddrRegs = 0;
2031 int NumVAddrDwords = 0;
2032 for (
unsigned I =
Intr->VAddrStart; I < Intr->VAddrEnd;
I++) {
2035 if (!AddrOp.
isReg())
2043 NumVAddrDwords += (
MRI->getType(
Addr).getSizeInBits() + 31) / 32;
2050 NumVAddrRegs != 1 &&
2052 : NumVAddrDwords == NumVAddrRegs);
2053 if (UseNSA && !STI.hasFeature(AMDGPU::FeatureNSAEncoding)) {
2064 NumVDataDwords, NumVAddrDwords);
2065 }
else if (IsGFX11Plus) {
2067 UseNSA ? AMDGPU::MIMGEncGfx11NSA
2068 : AMDGPU::MIMGEncGfx11Default,
2069 NumVDataDwords, NumVAddrDwords);
2070 }
else if (IsGFX10Plus) {
2072 UseNSA ? AMDGPU::MIMGEncGfx10NSA
2073 : AMDGPU::MIMGEncGfx10Default,
2074 NumVDataDwords, NumVAddrDwords);
2078 NumVDataDwords, NumVAddrDwords);
2082 <<
"requested image instruction is not supported on this GPU\n");
2089 NumVDataDwords, NumVAddrDwords);
2092 NumVDataDwords, NumVAddrDwords);
2102 const bool Is64 =
MRI->getType(VDataOut).getSizeInBits() == 64;
2105 Is64 ? &AMDGPU::VReg_128RegClass : &AMDGPU::VReg_64RegClass);
2106 unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
2109 if (!
MRI->use_empty(VDataOut)) {
2122 for (
int I = 0;
I != NumVAddrRegs; ++
I) {
2124 if (
SrcOp.isReg()) {
2130 MIB.
addReg(
MI.getOperand(ArgOffset +
Intr->RsrcIndex).getReg());
2132 MIB.
addReg(
MI.getOperand(ArgOffset +
Intr->SampIndex).getReg());
2143 STI.hasFeature(AMDGPU::FeatureR128A16) ? -1 : 0);
2145 MIB.
addImm(IsA16 ? -1 : 0);
2159 MIB.
addImm(IsD16 ? -1 : 0);
2161 MI.eraseFromParent();
2169bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic(
2180 unsigned Offset =
MI.getOperand(6).getImm();
2182 auto MIB =
BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::DS_BVH_STACK_RTN_B32), Dst0)
2190 MI.eraseFromParent();
2194bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
2196 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(
I).getIntrinsicID();
2197 switch (IntrinsicID) {
2198 case Intrinsic::amdgcn_end_cf:
2199 return selectEndCfIntrinsic(
I);
2200 case Intrinsic::amdgcn_ds_ordered_add:
2201 case Intrinsic::amdgcn_ds_ordered_swap:
2202 return selectDSOrderedIntrinsic(
I, IntrinsicID);
2203 case Intrinsic::amdgcn_ds_gws_init:
2204 case Intrinsic::amdgcn_ds_gws_barrier:
2205 case Intrinsic::amdgcn_ds_gws_sema_v:
2206 case Intrinsic::amdgcn_ds_gws_sema_br:
2207 case Intrinsic::amdgcn_ds_gws_sema_p:
2208 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2209 return selectDSGWSIntrinsic(
I, IntrinsicID);
2210 case Intrinsic::amdgcn_ds_append:
2211 return selectDSAppendConsume(
I,
true);
2212 case Intrinsic::amdgcn_ds_consume:
2213 return selectDSAppendConsume(
I,
false);
2214 case Intrinsic::amdgcn_init_whole_wave:
2215 return selectInitWholeWave(
I);
2216 case Intrinsic::amdgcn_s_barrier:
2217 case Intrinsic::amdgcn_s_barrier_signal:
2218 case Intrinsic::amdgcn_s_barrier_wait:
2219 return selectSBarrier(
I);
2220 case Intrinsic::amdgcn_raw_buffer_load_lds:
2221 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
2222 case Intrinsic::amdgcn_struct_buffer_load_lds:
2223 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds:
2224 return selectBufferLoadLds(
I);
2225 case Intrinsic::amdgcn_global_load_lds:
2226 return selectGlobalLoadLds(
I);
2227 case Intrinsic::amdgcn_exp_compr:
2231 F,
"intrinsic not supported on subtarget",
I.getDebugLoc(),
DS_Error);
2232 F.getContext().diagnose(NoFpRet);
2236 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2237 return selectDSBvhStackIntrinsic(
I);
2238 case Intrinsic::amdgcn_s_barrier_init:
2239 case Intrinsic::amdgcn_s_barrier_signal_var:
2240 return selectNamedBarrierInit(
I, IntrinsicID);
2241 case Intrinsic::amdgcn_s_barrier_join:
2242 case Intrinsic::amdgcn_s_wakeup_barrier:
2243 case Intrinsic::amdgcn_s_get_named_barrier_state:
2244 return selectNamedBarrierInst(
I, IntrinsicID);
2245 case Intrinsic::amdgcn_s_get_barrier_state:
2246 return selectSGetBarrierState(
I, IntrinsicID);
2247 case Intrinsic::amdgcn_s_barrier_signal_isfirst:
2248 return selectSBarrierSignalIsfirst(
I, IntrinsicID);
2253bool AMDGPUInstructionSelector::selectG_SELECT(
MachineInstr &
I)
const {
2260 Register DstReg =
I.getOperand(0).getReg();
2265 if (!isVCC(CCReg, *MRI)) {
2266 unsigned SelectOpcode =
Size == 64 ? AMDGPU::S_CSELECT_B64 :
2267 AMDGPU::S_CSELECT_B32;
2274 if (!
MRI->getRegClassOrNull(CCReg))
2277 .
add(
I.getOperand(2))
2278 .
add(
I.getOperand(3));
2283 I.eraseFromParent();
2292 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
2294 .
add(
I.getOperand(3))
2296 .
add(
I.getOperand(2))
2297 .
add(
I.getOperand(1));
2300 I.eraseFromParent();
2304bool AMDGPUInstructionSelector::selectG_TRUNC(
MachineInstr &
I)
const {
2305 Register DstReg =
I.getOperand(0).getReg();
2306 Register SrcReg =
I.getOperand(1).getReg();
2307 const LLT DstTy =
MRI->getType(DstReg);
2308 const LLT SrcTy =
MRI->getType(SrcReg);
2323 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2332 if (!SrcRC || !DstRC)
2341 if (DstRC == &AMDGPU::VGPR_16RegClass && SrcSize == 32) {
2346 .
addReg(SrcReg, 0, AMDGPU::lo16);
2347 I.eraseFromParent();
2355 Register LoReg =
MRI->createVirtualRegister(DstRC);
2356 Register HiReg =
MRI->createVirtualRegister(DstRC);
2358 .
addReg(SrcReg, 0, AMDGPU::sub0);
2360 .
addReg(SrcReg, 0, AMDGPU::sub1);
2362 if (IsVALU && STI.
hasSDWA()) {
2366 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg)
2376 Register TmpReg0 =
MRI->createVirtualRegister(DstRC);
2377 Register TmpReg1 =
MRI->createVirtualRegister(DstRC);
2378 Register ImmReg =
MRI->createVirtualRegister(DstRC);
2380 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), TmpReg0)
2390 unsigned MovOpc = IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
2391 unsigned AndOpc = IsVALU ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
2392 unsigned OrOpc = IsVALU ? AMDGPU::V_OR_B32_e64 : AMDGPU::S_OR_B32;
2404 And.setOperandDead(3);
2405 Or.setOperandDead(3);
2409 I.eraseFromParent();
2417 unsigned SubRegIdx =
2419 if (SubRegIdx == AMDGPU::NoSubRegister)
2425 = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
2429 if (SrcWithSubRC != SrcRC) {
2434 I.getOperand(1).setSubReg(SubRegIdx);
2437 I.setDesc(TII.get(TargetOpcode::COPY));
2443 Mask = maskTrailingOnes<unsigned>(
Size);
2444 int SignedMask =
static_cast<int>(Mask);
2445 return SignedMask >= -16 && SignedMask <= 64;
2449const RegisterBank *AMDGPUInstructionSelector::getArtifactRegBank(
2453 if (
auto *RB = dyn_cast<const RegisterBank *>(RegClassOrBank))
2457 if (
auto *RC = dyn_cast<const TargetRegisterClass *>(RegClassOrBank))
2462bool AMDGPUInstructionSelector::selectG_SZA_EXT(
MachineInstr &
I)
const {
2463 bool InReg =
I.getOpcode() == AMDGPU::G_SEXT_INREG;
2464 bool Signed =
I.getOpcode() == AMDGPU::G_SEXT || InReg;
2467 const Register DstReg =
I.getOperand(0).getReg();
2468 const Register SrcReg =
I.getOperand(1).getReg();
2470 const LLT DstTy =
MRI->getType(DstReg);
2471 const LLT SrcTy =
MRI->getType(SrcReg);
2472 const unsigned SrcSize =
I.getOpcode() == AMDGPU::G_SEXT_INREG ?
2479 const RegisterBank *SrcBank = getArtifactRegBank(SrcReg, *MRI, TRI);
2482 if (
I.getOpcode() == AMDGPU::G_ANYEXT) {
2484 return selectCOPY(
I);
2487 TRI.getRegClassForTypeOnBank(SrcTy, *SrcBank);
2490 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
2492 Register UndefReg =
MRI->createVirtualRegister(SrcRC);
2493 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2499 I.eraseFromParent();
2505 if (SrcBank->
getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
2515 I.eraseFromParent();
2519 const unsigned BFE =
Signed ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2525 I.eraseFromParent();
2529 if (SrcBank->
getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
2531 AMDGPU::SReg_64RegClass : AMDGPU::SReg_32RegClass;
2535 if (
Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
2536 const unsigned SextOpc = SrcSize == 8 ?
2537 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
2540 I.eraseFromParent();
2546 if (DstSize > 32 && SrcSize == 32) {
2547 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2548 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2563 I.eraseFromParent();
2568 const unsigned BFE64 =
Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
2569 const unsigned BFE32 =
Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2572 if (DstSize > 32 && (SrcSize <= 32 || InReg)) {
2574 Register ExtReg =
MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
2575 Register UndefReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2576 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2578 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2589 I.eraseFromParent();
2605 I.eraseFromParent();
2640 if (Shuffle->
getOpcode() != AMDGPU::G_SHUFFLE_VECTOR)
2647 assert(Mask.size() == 2);
2649 if (Mask[0] == 1 && Mask[1] <= 1) {
2657bool AMDGPUInstructionSelector::selectG_FPEXT(
MachineInstr &
I)
const {
2661 Register Dst =
I.getOperand(0).getReg();
2663 if (DstRB->
getID() != AMDGPU::SGPRRegBankID)
2666 Register Src =
I.getOperand(1).getReg();
2672 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_CVT_HI_F32_F16), Dst)
2674 I.eraseFromParent();
2682bool AMDGPUInstructionSelector::selectG_FNEG(
MachineInstr &
MI)
const {
2696 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2711 Register LoReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2712 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2713 Register ConstReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2714 Register OpReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2716 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2717 .
addReg(Src, 0, AMDGPU::sub0);
2718 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2719 .
addReg(Src, 0, AMDGPU::sub1);
2720 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2724 unsigned Opc = Fabs ? AMDGPU::S_OR_B32 : AMDGPU::S_XOR_B32;
2729 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2734 MI.eraseFromParent();
2739bool AMDGPUInstructionSelector::selectG_FABS(
MachineInstr &
MI)
const {
2742 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2749 Register LoReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2750 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2751 Register ConstReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2752 Register OpReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2758 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2759 .
addReg(Src, 0, AMDGPU::sub0);
2760 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2761 .
addReg(Src, 0, AMDGPU::sub1);
2762 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2767 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_AND_B32), OpReg)
2771 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2777 MI.eraseFromParent();
2782 return MI.getOpcode() == TargetOpcode::G_CONSTANT;
2785void AMDGPUInstructionSelector::getAddrModeInfo(
const MachineInstr &Load,
2788 unsigned OpNo =
Load.getOpcode() == AMDGPU::G_PREFETCH ? 0 : 1;
2790 MRI.getUniqueVRegDef(
Load.getOperand(OpNo).getReg());
2794 if (PtrMI->
getOpcode() != TargetOpcode::G_PTR_ADD)
2799 for (
unsigned i = 1; i != 3; ++i) {
2806 assert(GEPInfo.Imm == 0);
2811 if (OpBank->
getID() == AMDGPU::SGPRRegBankID)
2812 GEPInfo.SgprParts.push_back(GEPOp.
getReg());
2814 GEPInfo.VgprParts.push_back(GEPOp.
getReg());
2818 getAddrModeInfo(*PtrMI, MRI, AddrInfo);
2821bool AMDGPUInstructionSelector::isSGPR(
Register Reg)
const {
2822 return RBI.
getRegBank(Reg, *MRI, TRI)->
getID() == AMDGPU::SGPRRegBankID;
2825bool AMDGPUInstructionSelector::isInstrUniform(
const MachineInstr &
MI)
const {
2826 if (!
MI.hasOneMemOperand())
2836 if (!
Ptr || isa<UndefValue>(
Ptr) || isa<Argument>(
Ptr) ||
2837 isa<Constant>(
Ptr) || isa<GlobalValue>(
Ptr))
2843 if (
MI.getOpcode() == AMDGPU::G_PREFETCH)
2845 AMDGPU::SGPRRegBankID;
2848 return I &&
I->getMetadata(
"amdgpu.uniform");
2852 for (
const GEPInfo &GEPInfo : AddrInfo) {
2853 if (!GEPInfo.VgprParts.empty())
2859void AMDGPUInstructionSelector::initM0(
MachineInstr &
I)
const {
2860 const LLT PtrTy =
MRI->getType(
I.getOperand(1).getReg());
2867 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2872bool AMDGPUInstructionSelector::selectG_LOAD_STORE_ATOMICRMW(
2879 if (Reg.isPhysical())
2883 const unsigned Opcode =
MI.getOpcode();
2885 if (Opcode == AMDGPU::COPY)
2888 if (Opcode == AMDGPU::G_AND || Opcode == AMDGPU::G_OR ||
2889 Opcode == AMDGPU::G_XOR)
2893 if (
auto *GI = dyn_cast<GIntrinsic>(&
MI))
2894 return GI->is(Intrinsic::amdgcn_class);
2896 return Opcode == AMDGPU::G_ICMP || Opcode == AMDGPU::G_FCMP;
2899bool AMDGPUInstructionSelector::selectG_BRCOND(
MachineInstr &
I)
const {
2914 if (!isVCC(CondReg, *MRI)) {
2918 CondPhysReg = AMDGPU::SCC;
2919 BrOpcode = AMDGPU::S_CBRANCH_SCC1;
2920 ConstrainRC = &AMDGPU::SReg_32RegClass;
2928 const unsigned Opcode = Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
2929 const Register Exec = Is64 ? AMDGPU::EXEC : AMDGPU::EXEC_LO;
2932 BuildMI(*BB, &
I,
DL, TII.get(Opcode), TmpReg)
2939 CondPhysReg =
TRI.getVCC();
2940 BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
2941 ConstrainRC =
TRI.getBoolRC();
2944 if (!
MRI->getRegClassOrNull(CondReg))
2945 MRI->setRegClass(CondReg, ConstrainRC);
2947 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CondPhysReg)
2950 .
addMBB(
I.getOperand(1).getMBB());
2952 I.eraseFromParent();
2956bool AMDGPUInstructionSelector::selectG_GLOBAL_VALUE(
2958 Register DstReg =
I.getOperand(0).getReg();
2960 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2961 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32));
2966 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, *MRI);
2969bool AMDGPUInstructionSelector::selectG_PTRMASK(
MachineInstr &
I)
const {
2970 Register DstReg =
I.getOperand(0).getReg();
2971 Register SrcReg =
I.getOperand(1).getReg();
2972 Register MaskReg =
I.getOperand(2).getReg();
2973 LLT Ty =
MRI->getType(DstReg);
2974 LLT MaskTy =
MRI->getType(MaskReg);
2981 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2991 const bool CanCopyLow32 = (MaskOnes & MaskLo32) == MaskLo32;
2992 const bool CanCopyHi32 = (MaskOnes & MaskHi32) == MaskHi32;
2995 !CanCopyLow32 && !CanCopyHi32) {
2996 auto MIB =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_AND_B64), DstReg)
3000 I.eraseFromParent();
3004 unsigned NewOpc = IsVGPR ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
3006 = IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
3011 TRI.getRegClassForTypeOnBank(MaskTy, *MaskRB);
3020 "ptrmask should have been narrowed during legalize");
3022 auto NewOp =
BuildMI(*BB, &
I,
DL, TII.get(NewOpc), DstReg)
3028 I.eraseFromParent();
3032 Register HiReg =
MRI->createVirtualRegister(&RegRC);
3033 Register LoReg =
MRI->createVirtualRegister(&RegRC);
3036 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), LoReg)
3037 .
addReg(SrcReg, 0, AMDGPU::sub0);
3038 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), HiReg)
3039 .
addReg(SrcReg, 0, AMDGPU::sub1);
3048 Register MaskLo =
MRI->createVirtualRegister(&RegRC);
3049 MaskedLo =
MRI->createVirtualRegister(&RegRC);
3051 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskLo)
3052 .
addReg(MaskReg, 0, AMDGPU::sub0);
3053 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedLo)
3062 Register MaskHi =
MRI->createVirtualRegister(&RegRC);
3063 MaskedHi =
MRI->createVirtualRegister(&RegRC);
3065 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskHi)
3066 .
addReg(MaskReg, 0, AMDGPU::sub1);
3067 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedHi)
3072 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
3077 I.eraseFromParent();
3083static std::pair<Register, unsigned>
3090 std::tie(IdxBaseReg,
Offset) =
3092 if (IdxBaseReg == AMDGPU::NoRegister) {
3096 IdxBaseReg = IdxReg;
3103 if (
static_cast<unsigned>(
Offset) >= SubRegs.
size())
3104 return std::pair(IdxReg, SubRegs[0]);
3105 return std::pair(IdxBaseReg, SubRegs[
Offset]);
3108bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT(
3114 LLT DstTy =
MRI->getType(DstReg);
3115 LLT SrcTy =
MRI->getType(SrcReg);
3123 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3127 TRI.getRegClassForTypeOnBank(SrcTy, *SrcRB);
3129 TRI.getRegClassForTypeOnBank(DstTy, *DstRB);
3130 if (!SrcRC || !DstRC)
3145 if (SrcRB->
getID() == AMDGPU::SGPRRegBankID) {
3149 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3152 unsigned Opc = Is64 ? AMDGPU::S_MOVRELS_B64 : AMDGPU::S_MOVRELS_B32;
3156 MI.eraseFromParent();
3164 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3166 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::V_MOVRELS_B32_e32), DstReg)
3169 MI.eraseFromParent();
3180 MI.eraseFromParent();
3185bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT(
3192 LLT VecTy =
MRI->getType(DstReg);
3193 LLT ValTy =
MRI->getType(ValReg);
3205 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3209 TRI.getRegClassForTypeOnBank(VecTy, *VecRB);
3211 TRI.getRegClassForTypeOnBank(ValTy, *ValRB);
3219 if (VecRB->
getID() == AMDGPU::VGPRRegBankID && ValSize != 32)
3223 std::tie(IdxReg,
SubReg) =
3226 const bool IndexMode = VecRB->
getID() == AMDGPU::VGPRRegBankID &&
3233 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3237 VecSize, ValSize, VecRB->
getID() == AMDGPU::SGPRRegBankID);
3242 MI.eraseFromParent();
3254 MI.eraseFromParent();
3258bool AMDGPUInstructionSelector::selectBufferLoadLds(
MachineInstr &
MI)
const {
3261 unsigned Size =
MI.getOperand(3).getImm();
3264 const bool HasVIndex =
MI.getNumOperands() == 9;
3268 VIndex =
MI.getOperand(4).getReg();
3272 Register VOffset =
MI.getOperand(4 + OpOffset).getReg();
3273 std::optional<ValueAndVReg> MaybeVOffset =
3275 const bool HasVOffset = !MaybeVOffset || MaybeVOffset->Value.getZExtValue();
3281 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_BOTHEN
3282 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_IDXEN
3283 : HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFEN
3284 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFSET;
3287 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_BOTHEN
3288 : AMDGPU::BUFFER_LOAD_USHORT_LDS_IDXEN
3289 : HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFEN
3290 : AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFSET;
3293 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_BOTHEN
3294 : AMDGPU::BUFFER_LOAD_DWORD_LDS_IDXEN
3295 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN
3296 : AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET;
3302 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_BOTHEN
3303 : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_IDXEN
3304 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFEN
3305 : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFSET;
3311 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_BOTHEN
3312 : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_IDXEN
3313 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFEN
3314 : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFSET;
3321 .
add(
MI.getOperand(2));
3325 if (HasVIndex && HasVOffset) {
3326 Register IdxReg =
MRI->createVirtualRegister(
TRI.getVGPR64Class());
3327 BuildMI(*
MBB, &*MIB,
DL, TII.get(AMDGPU::REG_SEQUENCE), IdxReg)
3334 }
else if (HasVIndex) {
3336 }
else if (HasVOffset) {
3340 MIB.
add(
MI.getOperand(1));
3341 MIB.
add(
MI.getOperand(5 + OpOffset));
3342 MIB.
add(
MI.getOperand(6 + OpOffset));
3344 unsigned Aux =
MI.getOperand(7 + OpOffset).getImm();
3354 LoadPtrI.
Offset =
MI.getOperand(6 + OpOffset).getImm();
3356 StorePtrI.
V =
nullptr;
3370 MI.eraseFromParent();
3382 if (Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3385 assert(Def->getNumOperands() == 3 &&
3388 return Def->getOperand(1).getReg();
3394bool AMDGPUInstructionSelector::selectGlobalLoadLds(
MachineInstr &
MI)
const{
3396 unsigned Size =
MI.getOperand(3).getImm();
3402 Opc = AMDGPU::GLOBAL_LOAD_LDS_UBYTE;
3405 Opc = AMDGPU::GLOBAL_LOAD_LDS_USHORT;
3408 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORD;
3413 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX3;
3418 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX4;
3425 .
add(
MI.getOperand(2));
3431 if (!isSGPR(
Addr)) {
3433 if (isSGPR(AddrDef->Reg)) {
3434 Addr = AddrDef->Reg;
3435 }
else if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
3438 if (isSGPR(SAddr)) {
3439 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
3451 VOffset =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3463 MIB.
add(
MI.getOperand(4))
3464 .
add(
MI.getOperand(5));
3468 LoadPtrI.
Offset =
MI.getOperand(4).getImm();
3478 sizeof(int32_t),
Align(4));
3482 MI.eraseFromParent();
3486bool AMDGPUInstructionSelector::selectBVHIntrinsic(
MachineInstr &
MI)
const{
3487 MI.setDesc(TII.get(
MI.getOperand(1).getImm()));
3488 MI.removeOperand(1);
3489 MI.addImplicitDefUseOperands(*
MI.getParent()->getParent());
3495bool AMDGPUInstructionSelector::selectSMFMACIntrin(
MachineInstr &
MI)
const {
3498 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
3499 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_F16_e64;
3501 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
3502 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_F16_e64;
3504 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
3505 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_BF16_e64;
3507 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
3508 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_BF16_e64;
3510 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
3511 Opc = AMDGPU::V_SMFMAC_I32_16X16X64_I8_e64;
3513 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
3514 Opc = AMDGPU::V_SMFMAC_I32_32X32X32_I8_e64;
3516 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
3517 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_BF8_e64;
3519 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
3520 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_FP8_e64;
3522 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
3523 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_BF8_e64;
3525 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
3526 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_FP8_e64;
3528 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
3529 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_BF8_e64;
3531 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
3532 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_FP8_e64;
3534 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
3535 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_BF8_e64;
3537 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
3538 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_FP8_e64;
3540 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:
3541 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_F16_e64;
3543 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:
3544 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_F16_e64;
3546 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:
3547 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF16_e64;
3549 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:
3550 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF16_e64;
3552 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:
3553 Opc = AMDGPU::V_SMFMAC_I32_16X16X128_I8_e64;
3555 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:
3556 Opc = AMDGPU::V_SMFMAC_I32_32X32X64_I8_e64;
3558 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:
3559 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_BF8_BF8_e64;
3561 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:
3562 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_BF8_FP8_e64;
3564 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:
3565 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_FP8_BF8_e64;
3567 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:
3568 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_FP8_FP8_e64;
3570 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:
3571 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_BF8_BF8_e64;
3573 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
3574 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_BF8_FP8_e64;
3576 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
3577 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_FP8_BF8_e64;
3579 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8:
3580 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_FP8_FP8_e64;
3586 auto VDst_In =
MI.getOperand(4);
3588 MI.setDesc(TII.get(Opc));
3589 MI.removeOperand(4);
3590 MI.removeOperand(1);
3591 MI.addOperand(VDst_In);
3592 MI.addImplicitDefUseOperands(*
MI.getParent()->getParent());
3596bool AMDGPUInstructionSelector::selectPermlaneSwapIntrin(
3598 if (IntrID == Intrinsic::amdgcn_permlane16_swap &&
3601 if (IntrID == Intrinsic::amdgcn_permlane32_swap &&
3605 unsigned Opcode = IntrID == Intrinsic::amdgcn_permlane16_swap
3606 ? AMDGPU::V_PERMLANE16_SWAP_B32_e64
3607 : AMDGPU::V_PERMLANE32_SWAP_B32_e64;
3609 MI.removeOperand(2);
3610 MI.setDesc(TII.get(Opcode));
3619bool AMDGPUInstructionSelector::selectWaveAddress(
MachineInstr &
MI)
const {
3623 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3628 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_LSHRREV_B32_e64), DstReg)
3639 IsVALU ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
3643 MI.eraseFromParent();
3652 unsigned NumOpcodes = 0;
3665 const uint8_t SrcBits[3] = { 0xf0, 0xcc, 0xaa };
3676 for (
unsigned I = 0;
I < Src.size(); ++
I) {
3690 if (Src.size() == 3) {
3697 for (
unsigned I = 0;
I < Src.size(); ++
I) {
3698 if (Src[
I] ==
LHS) {
3708 Bits = SrcBits[Src.size()];
3714 switch (
MI->getOpcode()) {
3715 case TargetOpcode::G_AND:
3716 case TargetOpcode::G_OR:
3717 case TargetOpcode::G_XOR: {
3722 if (!getOperandBits(
LHS, LHSBits) ||
3723 !getOperandBits(
RHS, RHSBits)) {
3725 return std::make_pair(0, 0);
3731 NumOpcodes +=
Op.first;
3732 LHSBits =
Op.second;
3737 NumOpcodes +=
Op.first;
3738 RHSBits =
Op.second;
3743 return std::make_pair(0, 0);
3747 switch (
MI->getOpcode()) {
3748 case TargetOpcode::G_AND:
3749 TTbl = LHSBits & RHSBits;
3751 case TargetOpcode::G_OR:
3752 TTbl = LHSBits | RHSBits;
3754 case TargetOpcode::G_XOR:
3755 TTbl = LHSBits ^ RHSBits;
3761 return std::make_pair(NumOpcodes + 1, TTbl);
3764bool AMDGPUInstructionSelector::selectBITOP3(
MachineInstr &
MI)
const {
3770 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3776 unsigned NumOpcodes;
3778 std::tie(NumOpcodes, TTbl) =
BitOp3_Op(DstReg, Src, *MRI);
3782 if (NumOpcodes < 2 || Src.empty())
3792 if (NumOpcodes == 2 && IsB32) {
3802 unsigned Opc = IsB32 ? AMDGPU::V_BITOP3_B32_e64 : AMDGPU::V_BITOP3_B16_e64;
3807 for (
unsigned I = 0;
I < Src.size(); ++
I) {
3809 if (RB->
getID() != AMDGPU::SGPRRegBankID)
3815 Register NewReg =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3826 while (Src.size() < 3)
3827 Src.push_back(Src[0]);
3844 MI.eraseFromParent();
3849bool AMDGPUInstructionSelector::selectStackRestore(
MachineInstr &
MI)
const {
3862 WaveAddr =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
3872 MI.eraseFromParent();
3878 if (!
I.isPreISelOpcode()) {
3880 return selectCOPY(
I);
3884 switch (
I.getOpcode()) {
3885 case TargetOpcode::G_AND:
3886 case TargetOpcode::G_OR:
3887 case TargetOpcode::G_XOR:
3888 if (selectBITOP3(
I))
3892 return selectG_AND_OR_XOR(
I);
3893 case TargetOpcode::G_ADD:
3894 case TargetOpcode::G_SUB:
3895 case TargetOpcode::G_PTR_ADD:
3898 return selectG_ADD_SUB(
I);
3899 case TargetOpcode::G_UADDO:
3900 case TargetOpcode::G_USUBO:
3901 case TargetOpcode::G_UADDE:
3902 case TargetOpcode::G_USUBE:
3903 return selectG_UADDO_USUBO_UADDE_USUBE(
I);
3904 case AMDGPU::G_AMDGPU_MAD_U64_U32:
3905 case AMDGPU::G_AMDGPU_MAD_I64_I32:
3906 return selectG_AMDGPU_MAD_64_32(
I);
3907 case TargetOpcode::G_INTTOPTR:
3908 case TargetOpcode::G_BITCAST:
3909 case TargetOpcode::G_PTRTOINT:
3910 case TargetOpcode::G_FREEZE:
3911 return selectCOPY(
I);
3912 case TargetOpcode::G_FNEG:
3915 return selectG_FNEG(
I);
3916 case TargetOpcode::G_FABS:
3919 return selectG_FABS(
I);
3920 case TargetOpcode::G_EXTRACT:
3921 return selectG_EXTRACT(
I);
3922 case TargetOpcode::G_MERGE_VALUES:
3923 case TargetOpcode::G_CONCAT_VECTORS:
3924 return selectG_MERGE_VALUES(
I);
3925 case TargetOpcode::G_UNMERGE_VALUES:
3926 return selectG_UNMERGE_VALUES(
I);
3927 case TargetOpcode::G_BUILD_VECTOR:
3928 case TargetOpcode::G_BUILD_VECTOR_TRUNC:
3929 return selectG_BUILD_VECTOR(
I);
3930 case TargetOpcode::G_IMPLICIT_DEF:
3931 return selectG_IMPLICIT_DEF(
I);
3932 case TargetOpcode::G_INSERT:
3933 return selectG_INSERT(
I);
3934 case TargetOpcode::G_INTRINSIC:
3935 case TargetOpcode::G_INTRINSIC_CONVERGENT:
3936 return selectG_INTRINSIC(
I);
3937 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3938 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
3939 return selectG_INTRINSIC_W_SIDE_EFFECTS(
I);
3940 case TargetOpcode::G_ICMP:
3941 case TargetOpcode::G_FCMP:
3942 if (selectG_ICMP_or_FCMP(
I))
3945 case TargetOpcode::G_LOAD:
3946 case TargetOpcode::G_ZEXTLOAD:
3947 case TargetOpcode::G_SEXTLOAD:
3948 case TargetOpcode::G_STORE:
3949 case TargetOpcode::G_ATOMIC_CMPXCHG:
3950 case TargetOpcode::G_ATOMICRMW_XCHG:
3951 case TargetOpcode::G_ATOMICRMW_ADD:
3952 case TargetOpcode::G_ATOMICRMW_SUB:
3953 case TargetOpcode::G_ATOMICRMW_AND:
3954 case TargetOpcode::G_ATOMICRMW_OR:
3955 case TargetOpcode::G_ATOMICRMW_XOR:
3956 case TargetOpcode::G_ATOMICRMW_MIN:
3957 case TargetOpcode::G_ATOMICRMW_MAX:
3958 case TargetOpcode::G_ATOMICRMW_UMIN:
3959 case TargetOpcode::G_ATOMICRMW_UMAX:
3960 case TargetOpcode::G_ATOMICRMW_UINC_WRAP:
3961 case TargetOpcode::G_ATOMICRMW_UDEC_WRAP:
3962 case TargetOpcode::G_ATOMICRMW_FADD:
3963 case TargetOpcode::G_ATOMICRMW_FMIN:
3964 case TargetOpcode::G_ATOMICRMW_FMAX:
3965 return selectG_LOAD_STORE_ATOMICRMW(
I);
3966 case TargetOpcode::G_SELECT:
3967 return selectG_SELECT(
I);
3968 case TargetOpcode::G_TRUNC:
3969 return selectG_TRUNC(
I);
3970 case TargetOpcode::G_SEXT:
3971 case TargetOpcode::G_ZEXT:
3972 case TargetOpcode::G_ANYEXT:
3973 case TargetOpcode::G_SEXT_INREG:
3980 return selectG_SZA_EXT(
I);
3981 case TargetOpcode::G_FPEXT:
3982 if (selectG_FPEXT(
I))
3985 case TargetOpcode::G_BRCOND:
3986 return selectG_BRCOND(
I);
3987 case TargetOpcode::G_GLOBAL_VALUE:
3988 return selectG_GLOBAL_VALUE(
I);
3989 case TargetOpcode::G_PTRMASK:
3990 return selectG_PTRMASK(
I);
3991 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3992 return selectG_EXTRACT_VECTOR_ELT(
I);
3993 case TargetOpcode::G_INSERT_VECTOR_ELT:
3994 return selectG_INSERT_VECTOR_ELT(
I);
3995 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
3996 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
3997 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET:
3998 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
3999 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
4002 assert(
Intr &&
"not an image intrinsic with image pseudo");
4003 return selectImageIntrinsic(
I,
Intr);
4005 case AMDGPU::G_AMDGPU_INTRIN_BVH_INTERSECT_RAY:
4006 return selectBVHIntrinsic(
I);
4007 case AMDGPU::G_SBFX:
4008 case AMDGPU::G_UBFX:
4009 return selectG_SBFX_UBFX(
I);
4010 case AMDGPU::G_SI_CALL:
4011 I.setDesc(TII.get(AMDGPU::SI_CALL));
4013 case AMDGPU::G_AMDGPU_WAVE_ADDRESS:
4014 return selectWaveAddress(
I);
4015 case AMDGPU::G_STACKRESTORE:
4016 return selectStackRestore(
I);
4018 return selectPHI(
I);
4019 case TargetOpcode::G_CONSTANT:
4020 case TargetOpcode::G_FCONSTANT:
4028AMDGPUInstructionSelector::selectVCSRC(
MachineOperand &Root)
const {
4035std::pair<Register, unsigned> AMDGPUInstructionSelector::selectVOP3ModsImpl(
4036 Register Src,
bool IsCanonicalizing,
bool AllowAbs,
bool OpSel)
const {
4040 if (
MI->getOpcode() == AMDGPU::G_FNEG) {
4041 Src =
MI->getOperand(1).getReg();
4044 }
else if (
MI->getOpcode() == AMDGPU::G_FSUB && IsCanonicalizing) {
4049 if (LHS &&
LHS->isZero()) {
4051 Src =
MI->getOperand(2).getReg();
4055 if (AllowAbs &&
MI->getOpcode() == AMDGPU::G_FABS) {
4056 Src =
MI->getOperand(1).getReg();
4063 return std::pair(Src, Mods);
4066Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
4068 bool ForceVGPR)
const {
4069 if ((Mods != 0 || ForceVGPR) &&
4077 TII.get(AMDGPU::COPY), VGPRSrc)
4089AMDGPUInstructionSelector::selectVSRC0(
MachineOperand &Root)
const {
4096AMDGPUInstructionSelector::selectVOP3Mods0(
MachineOperand &Root)
const {
4099 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
4103 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4112AMDGPUInstructionSelector::selectVOP3BMods0(
MachineOperand &Root)
const {
4115 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
4121 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4130AMDGPUInstructionSelector::selectVOP3OMods(
MachineOperand &Root)
const {
4139AMDGPUInstructionSelector::selectVOP3Mods(
MachineOperand &Root)
const {
4142 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
4146 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4153AMDGPUInstructionSelector::selectVOP3ModsNonCanonicalizing(
4157 std::tie(Src, Mods) =
4158 selectVOP3ModsImpl(Root.
getReg(),
false);
4162 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4169AMDGPUInstructionSelector::selectVOP3BMods(
MachineOperand &Root)
const {
4172 std::tie(Src, Mods) =
4173 selectVOP3ModsImpl(Root.
getReg(),
true,
4178 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4185AMDGPUInstructionSelector::selectVOP3NoMods(
MachineOperand &Root)
const {
4188 if (
Def->getOpcode() == AMDGPU::G_FNEG ||
Def->getOpcode() == AMDGPU::G_FABS)
4195std::pair<Register, unsigned>
4196AMDGPUInstructionSelector::selectVOP3PModsImpl(
4201 if (
MI->getOpcode() == AMDGPU::G_FNEG &&
4206 Src =
MI->getOperand(1).getReg();
4207 MI =
MRI.getVRegDef(Src);
4218 return std::pair(Src, Mods);
4222AMDGPUInstructionSelector::selectVOP3PMods(
MachineOperand &Root)
const {
4228 std::tie(Src, Mods) = selectVOP3PModsImpl(Root.
getReg(), MRI);
4237AMDGPUInstructionSelector::selectVOP3PModsDOT(
MachineOperand &Root)
const {
4243 std::tie(Src, Mods) = selectVOP3PModsImpl(Root.
getReg(), MRI,
true);
4252AMDGPUInstructionSelector::selectVOP3PModsNeg(
MachineOperand &Root)
const {
4257 "expected i1 value");
4267AMDGPUInstructionSelector::selectWMMAOpSelVOP3PMods(
4270 "expected i1 value");
4284 switch (Elts.
size()) {
4286 DstRegClass = &AMDGPU::VReg_256RegClass;
4289 DstRegClass = &AMDGPU::VReg_128RegClass;
4292 DstRegClass = &AMDGPU::VReg_64RegClass;
4299 auto MIB =
B.buildInstr(AMDGPU::REG_SEQUENCE)
4300 .addDef(
MRI.createVirtualRegister(DstRegClass));
4301 for (
unsigned i = 0; i < Elts.
size(); ++i) {
4312 if (ModOpcode == TargetOpcode::G_FNEG) {
4316 for (
auto El : Elts) {
4322 if (Elts.size() != NegAbsElts.
size()) {
4331 assert(ModOpcode == TargetOpcode::G_FABS);
4339AMDGPUInstructionSelector::selectWMMAModsF32NegAbs(
MachineOperand &Root)
const {
4344 if (
GBuildVector *BV = dyn_cast<GBuildVector>(
MRI->getVRegDef(Src))) {
4345 assert(BV->getNumSources() > 0);
4348 unsigned ModOpcode = (ElF32->
getOpcode() == AMDGPU::G_FNEG)
4351 for (
unsigned i = 0; i < BV->getNumSources(); ++i) {
4352 ElF32 =
MRI->getVRegDef(BV->getSourceReg(i));
4359 if (BV->getNumSources() == EltsF32.
size()) {
4370AMDGPUInstructionSelector::selectWMMAModsF16Neg(
MachineOperand &Root)
const {
4376 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
4384 if (CV->getNumSources() == EltsV2F16.
size()) {
4396AMDGPUInstructionSelector::selectWMMAModsF16NegAbs(
MachineOperand &Root)
const {
4402 assert(CV->getNumSources() > 0);
4405 unsigned ModOpcode = (ElV2F16->
getOpcode() == AMDGPU::G_FNEG)
4409 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
4410 ElV2F16 =
MRI->getVRegDef(CV->getSourceReg(i));
4417 if (CV->getNumSources() == EltsV2F16.
size()) {
4429AMDGPUInstructionSelector::selectWMMAVISrc(
MachineOperand &Root)
const {
4430 std::optional<FPValueAndVReg> FPValReg;
4434 MIB.
addImm(FPValReg->Value.bitcastToAPInt().getSExtValue());
4454AMDGPUInstructionSelector::selectSWMMACIndex8(
MachineOperand &Root)
const {
4460 std::optional<ValueAndVReg> ShiftAmt;
4462 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
4463 ShiftAmt->Value.getZExtValue() % 8 == 0) {
4464 Key = ShiftAmt->Value.getZExtValue() / 8;
4475AMDGPUInstructionSelector::selectSWMMACIndex16(
MachineOperand &Root)
const {
4482 std::optional<ValueAndVReg> ShiftAmt;
4484 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
4485 ShiftAmt->Value.getZExtValue() == 16) {
4497AMDGPUInstructionSelector::selectVOP3OpSelMods(
MachineOperand &Root)
const {
4500 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
4510AMDGPUInstructionSelector::selectVINTERPMods(
MachineOperand &Root)
const {
4513 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
4521 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
4528AMDGPUInstructionSelector::selectVINTERPModsHi(
MachineOperand &Root)
const {
4531 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
4539 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
4545bool AMDGPUInstructionSelector::selectSmrdOffset(
MachineOperand &Root,
4555 getAddrModeInfo(*
MI, *MRI, AddrInfo);
4557 if (AddrInfo.
empty())
4560 const GEPInfo &GEPI = AddrInfo[0];
4561 std::optional<int64_t> EncodedImm;
4566 if (GEPI.SgprParts.size() == 1 && GEPI.Imm != 0 && EncodedImm &&
4567 AddrInfo.
size() > 1) {
4568 const GEPInfo &GEPI2 = AddrInfo[1];
4569 if (GEPI2.SgprParts.size() == 2 && GEPI2.Imm == 0) {
4572 Base = GEPI2.SgprParts[0];
4573 *SOffset = OffsetReg;
4583 if (*
Offset + SKnown.getMinValue().getSExtValue() < 0)
4595 if (
Offset && GEPI.SgprParts.size() == 1 && EncodedImm) {
4596 Base = GEPI.SgprParts[0];
4602 if (SOffset && GEPI.SgprParts.size() == 1 && isUInt<32>(GEPI.Imm) &&
4608 Base = GEPI.SgprParts[0];
4609 *SOffset =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
4610 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), *SOffset)
4615 if (SOffset && GEPI.SgprParts.size() && GEPI.Imm == 0) {
4617 Base = GEPI.SgprParts[0];
4618 *SOffset = OffsetReg;
4627AMDGPUInstructionSelector::selectSmrdImm(
MachineOperand &Root)
const {
4630 if (!selectSmrdOffset(Root,
Base,
nullptr, &
Offset))
4631 return std::nullopt;
4638AMDGPUInstructionSelector::selectSmrdImm32(
MachineOperand &Root)
const {
4640 getAddrModeInfo(*Root.
getParent(), *MRI, AddrInfo);
4642 if (AddrInfo.
empty() || AddrInfo[0].SgprParts.size() != 1)
4643 return std::nullopt;
4645 const GEPInfo &GEPInfo = AddrInfo[0];
4646 Register PtrReg = GEPInfo.SgprParts[0];
4647 std::optional<int64_t> EncodedImm =
4650 return std::nullopt;
4659AMDGPUInstructionSelector::selectSmrdSgpr(
MachineOperand &Root)
const {
4661 if (!selectSmrdOffset(Root,
Base, &SOffset,
nullptr))
4662 return std::nullopt;
4669AMDGPUInstructionSelector::selectSmrdSgprImm(
MachineOperand &Root)
const {
4672 if (!selectSmrdOffset(Root,
Base, &SOffset, &
Offset))
4673 return std::nullopt;
4680std::pair<Register, int>
4681AMDGPUInstructionSelector::selectFlatOffsetImpl(
MachineOperand &Root,
4691 int64_t ConstOffset;
4692 std::tie(PtrBase, ConstOffset) =
4693 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
4696 !isFlatScratchBaseLegal(Root.
getReg())))
4699 unsigned AddrSpace = (*
MI->memoperands_begin())->getAddrSpace();
4703 return std::pair(PtrBase, ConstOffset);
4707AMDGPUInstructionSelector::selectFlatOffset(
MachineOperand &Root)
const {
4717AMDGPUInstructionSelector::selectGlobalOffset(
MachineOperand &Root)
const {
4727AMDGPUInstructionSelector::selectScratchOffset(
MachineOperand &Root)
const {
4738AMDGPUInstructionSelector::selectGlobalSAddr(
MachineOperand &Root)
const {
4741 int64_t ConstOffset;
4742 int64_t ImmOffset = 0;
4746 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(
Addr, *MRI);
4748 if (ConstOffset != 0) {
4752 ImmOffset = ConstOffset;
4755 if (isSGPR(PtrBaseDef->Reg)) {
4756 if (ConstOffset > 0) {
4762 int64_t SplitImmOffset, RemainderOffset;
4766 if (isUInt<32>(RemainderOffset)) {
4770 MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4772 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
4774 .
addImm(RemainderOffset);
4791 unsigned NumLiterals =
4795 return std::nullopt;
4802 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
4807 if (isSGPR(SAddr)) {
4808 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
4828 if (AddrDef->MI->getOpcode() == AMDGPU::G_IMPLICIT_DEF ||
4829 AddrDef->MI->getOpcode() == AMDGPU::G_CONSTANT || !isSGPR(AddrDef->Reg))
4830 return std::nullopt;
4836 Register VOffset =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4838 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32), VOffset)
4849AMDGPUInstructionSelector::selectScratchSAddr(
MachineOperand &Root)
const {
4852 int64_t ConstOffset;
4853 int64_t ImmOffset = 0;
4857 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(
Addr, *MRI);
4859 if (ConstOffset != 0 && isFlatScratchBaseLegal(
Addr) &&
4863 ImmOffset = ConstOffset;
4867 if (AddrDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
4868 int FI = AddrDef->MI->getOperand(1).
getIndex();
4877 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
4878 Register LHS = AddrDef->MI->getOperand(1).getReg();
4879 Register RHS = AddrDef->MI->getOperand(2).getReg();
4883 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX &&
4884 isSGPR(RHSDef->Reg)) {
4885 int FI = LHSDef->MI->getOperand(1).getIndex();
4889 SAddr =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
4891 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_I32), SAddr)
4899 return std::nullopt;
4908bool AMDGPUInstructionSelector::checkFlatScratchSVSSwizzleBug(
4919 uint64_t VMax = VKnown.getMaxValue().getZExtValue();
4921 return (VMax & 3) + (
SMax & 3) >= 4;
4925AMDGPUInstructionSelector::selectScratchSVAddr(
MachineOperand &Root)
const {
4928 int64_t ConstOffset;
4929 int64_t ImmOffset = 0;
4933 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(
Addr, *MRI);
4936 if (ConstOffset != 0 &&
4939 ImmOffset = ConstOffset;
4943 if (AddrDef->MI->getOpcode() != AMDGPU::G_PTR_ADD)
4944 return std::nullopt;
4946 Register RHS = AddrDef->MI->getOperand(2).getReg();
4948 return std::nullopt;
4950 Register LHS = AddrDef->MI->getOperand(1).getReg();
4953 if (OrigAddr !=
Addr) {
4954 if (!isFlatScratchBaseLegalSVImm(OrigAddr))
4955 return std::nullopt;
4957 if (!isFlatScratchBaseLegalSV(OrigAddr))
4958 return std::nullopt;
4961 if (checkFlatScratchSVSSwizzleBug(RHS, LHS, ImmOffset))
4962 return std::nullopt;
4964 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
4965 int FI = LHSDef->MI->getOperand(1).getIndex();
4974 return std::nullopt;
4984AMDGPUInstructionSelector::selectMUBUFScratchOffen(
MachineOperand &Root)
const {
4993 Register HighBits =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4998 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
5022 std::optional<int> FI;
5027 int64_t ConstOffset;
5028 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(VAddr, *MRI);
5029 if (ConstOffset != 0) {
5034 if (PtrBaseDef->
getOpcode() == AMDGPU::G_FRAME_INDEX)
5040 }
else if (RootDef->
getOpcode() == AMDGPU::G_FRAME_INDEX) {
5063bool AMDGPUInstructionSelector::isDSOffsetLegal(
Register Base,
5076bool AMDGPUInstructionSelector::isDSOffset2Legal(
Register Base, int64_t Offset0,
5078 unsigned Size)
const {
5079 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
5081 if (!isUInt<8>(Offset0 /
Size) || !isUInt<8>(Offset1 /
Size))
5094 return Addr->getOpcode() == TargetOpcode::G_OR ||
5095 (
Addr->getOpcode() == TargetOpcode::G_PTR_ADD &&
5102bool AMDGPUInstructionSelector::isFlatScratchBaseLegal(
Register Addr)
const {
5116 if (AddrMI->
getOpcode() == TargetOpcode::G_PTR_ADD) {
5117 std::optional<ValueAndVReg> RhsValReg =
5123 if (RhsValReg && RhsValReg->Value.getSExtValue() < 0 &&
5124 RhsValReg->Value.getSExtValue() > -0x40000000)
5133bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSV(
Register Addr)
const {
5151bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSVImm(
5160 std::optional<DefinitionAndSourceRegister> BaseDef =
5162 std::optional<ValueAndVReg> RHSOffset =
5172 (RHSOffset->Value.getSExtValue() < 0 &&
5173 RHSOffset->Value.getSExtValue() > -0x40000000)))
5176 Register LHS = BaseDef->MI->getOperand(1).getReg();
5177 Register RHS = BaseDef->MI->getOperand(2).getReg();
5181bool AMDGPUInstructionSelector::isUnneededShiftMask(
const MachineInstr &
MI,
5182 unsigned ShAmtBits)
const {
5183 assert(
MI.getOpcode() == TargetOpcode::G_AND);
5185 std::optional<APInt>
RHS =
5190 if (
RHS->countr_one() >= ShAmtBits)
5194 return (LHSKnownZeros | *RHS).
countr_one() >= ShAmtBits;
5198AMDGPUInstructionSelector::selectMUBUFScratchOffset(
5203 std::optional<DefinitionAndSourceRegister>
Def =
5205 assert(Def &&
"this shouldn't be an optional result");
5260std::pair<Register, unsigned>
5261AMDGPUInstructionSelector::selectDS1Addr1OffsetImpl(
MachineOperand &Root)
const {
5263 int64_t ConstAddr = 0;
5267 std::tie(PtrBase,
Offset) =
5268 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
5271 if (isDSOffsetLegal(PtrBase,
Offset)) {
5273 return std::pair(PtrBase,
Offset);
5275 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
5284 return std::pair(Root.
getReg(), 0);
5288AMDGPUInstructionSelector::selectDS1Addr1Offset(
MachineOperand &Root)
const {
5291 std::tie(Reg,
Offset) = selectDS1Addr1OffsetImpl(Root);
5299AMDGPUInstructionSelector::selectDS64Bit4ByteAligned(
MachineOperand &Root)
const {
5300 return selectDSReadWrite2(Root, 4);
5304AMDGPUInstructionSelector::selectDS128Bit8ByteAligned(
MachineOperand &Root)
const {
5305 return selectDSReadWrite2(Root, 8);
5309AMDGPUInstructionSelector::selectDSReadWrite2(
MachineOperand &Root,
5310 unsigned Size)
const {
5313 std::tie(Reg,
Offset) = selectDSReadWrite2Impl(Root,
Size);
5321std::pair<Register, unsigned>
5322AMDGPUInstructionSelector::selectDSReadWrite2Impl(
MachineOperand &Root,
5323 unsigned Size)
const {
5325 int64_t ConstAddr = 0;
5329 std::tie(PtrBase,
Offset) =
5330 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
5333 int64_t OffsetValue0 =
Offset;
5335 if (isDSOffset2Legal(PtrBase, OffsetValue0, OffsetValue1,
Size)) {
5337 return std::pair(PtrBase, OffsetValue0 /
Size);
5339 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
5347 return std::pair(Root.
getReg(), 0);
5354std::pair<Register, int64_t>
5355AMDGPUInstructionSelector::getPtrBaseWithConstantOffset(
5358 if (RootI->
getOpcode() != TargetOpcode::G_PTR_ADD)
5362 std::optional<ValueAndVReg> MaybeOffset =
5378 Register RSrc2 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5379 Register RSrc3 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5380 Register RSrcHi =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
5381 Register RSrc =
MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
5383 B.buildInstr(AMDGPU::S_MOV_B32)
5386 B.buildInstr(AMDGPU::S_MOV_B32)
5393 B.buildInstr(AMDGPU::REG_SEQUENCE)
5396 .addImm(AMDGPU::sub0)
5398 .addImm(AMDGPU::sub1);
5402 RSrcLo =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
5403 B.buildInstr(AMDGPU::S_MOV_B64)
5408 B.buildInstr(AMDGPU::REG_SEQUENCE)
5411 .addImm(AMDGPU::sub0_sub1)
5413 .addImm(AMDGPU::sub2_sub3);
5420 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
5429 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
5436AMDGPUInstructionSelector::MUBUFAddressData
5437AMDGPUInstructionSelector::parseMUBUFAddress(
Register Src)
const {
5438 MUBUFAddressData
Data;
5444 std::tie(PtrBase,
Offset) = getPtrBaseWithConstantOffset(Src, *MRI);
5445 if (isUInt<32>(
Offset)) {
5452 Data.N2 = InputAdd->getOperand(1).getReg();
5453 Data.N3 = InputAdd->getOperand(2).getReg();
5468bool AMDGPUInstructionSelector::shouldUseAddr64(MUBUFAddressData
Addr)
const {
5475 return N0Bank->
getID() == AMDGPU::VGPRRegBankID;
5481void AMDGPUInstructionSelector::splitIllegalMUBUFOffset(
5487 SOffset =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5488 B.buildInstr(AMDGPU::S_MOV_B32)
5494bool AMDGPUInstructionSelector::selectMUBUFAddr64Impl(
5502 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
5503 if (!shouldUseAddr64(AddrData))
5509 Offset = AddrData.Offset;
5515 if (RBI.
getRegBank(N2, *MRI, TRI)->
getID() == AMDGPU::VGPRRegBankID) {
5517 if (RBI.
getRegBank(N3, *MRI, TRI)->
getID() == AMDGPU::VGPRRegBankID) {
5530 }
else if (RBI.
getRegBank(N0, *MRI, TRI)->
getID() == AMDGPU::VGPRRegBankID) {
5541 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
5545bool AMDGPUInstructionSelector::selectMUBUFOffsetImpl(
5553 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
5554 if (shouldUseAddr64(AddrData))
5560 Offset = AddrData.Offset;
5566 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
5571AMDGPUInstructionSelector::selectMUBUFAddr64(
MachineOperand &Root)
const {
5577 if (!selectMUBUFAddr64Impl(Root, VAddr, RSrcReg, SOffset,
Offset))
5593 MIB.
addReg(AMDGPU::SGPR_NULL);
5607AMDGPUInstructionSelector::selectMUBUFOffset(
MachineOperand &Root)
const {
5612 if (!selectMUBUFOffsetImpl(Root, RSrcReg, SOffset,
Offset))
5623 MIB.
addReg(AMDGPU::SGPR_NULL);
5635AMDGPUInstructionSelector::selectBUFSOffset(
MachineOperand &Root)
const {
5640 SOffset = AMDGPU::SGPR_NULL;
5646static std::optional<uint64_t>
5650 if (!OffsetVal || !isInt<32>(*OffsetVal))
5651 return std::nullopt;
5652 return Lo_32(*OffsetVal);
5656AMDGPUInstructionSelector::selectSMRDBufferImm(
MachineOperand &Root)
const {
5657 std::optional<uint64_t> OffsetVal =
5662 std::optional<int64_t> EncodedImm =
5671AMDGPUInstructionSelector::selectSMRDBufferImm32(
MachineOperand &Root)
const {
5678 std::optional<int64_t> EncodedImm =
5687AMDGPUInstructionSelector::selectSMRDBufferSgprImm(
MachineOperand &Root)
const {
5695 return std::nullopt;
5697 std::optional<int64_t> EncodedOffset =
5700 return std::nullopt;
5707std::pair<Register, unsigned>
5708AMDGPUInstructionSelector::selectVOP3PMadMixModsImpl(
MachineOperand &Root,
5709 bool &Matched)
const {
5714 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
5724 const auto CheckAbsNeg = [&]() {
5729 std::tie(Src, ModsTmp) = selectVOP3ModsImpl(Src);
5760AMDGPUInstructionSelector::selectVOP3PMadMixModsExt(
5765 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
5776AMDGPUInstructionSelector::selectVOP3PMadMixMods(
MachineOperand &Root)
const {
5780 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
5788bool AMDGPUInstructionSelector::selectSBarrierSignalIsfirst(
5792 Register CCReg =
I.getOperand(0).getReg();
5794 BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM))
5795 .
addImm(
I.getOperand(2).getImm());
5799 I.eraseFromParent();
5804bool AMDGPUInstructionSelector::selectSGetBarrierState(
5809 std::optional<int64_t> BarValImm =
5813 auto CopyMIB =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
5818 unsigned Opc = BarValImm ? AMDGPU::S_GET_BARRIER_STATE_IMM
5819 : AMDGPU::S_GET_BARRIER_STATE_M0;
5822 auto DstReg =
I.getOperand(0).getReg();
5824 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
5831 I.eraseFromParent();
5836 if (HasInlineConst) {
5840 case Intrinsic::amdgcn_s_barrier_join:
5841 return AMDGPU::S_BARRIER_JOIN_IMM;
5842 case Intrinsic::amdgcn_s_wakeup_barrier:
5843 return AMDGPU::S_WAKEUP_BARRIER_IMM;
5844 case Intrinsic::amdgcn_s_get_named_barrier_state:
5845 return AMDGPU::S_GET_BARRIER_STATE_IMM;
5851 case Intrinsic::amdgcn_s_barrier_join:
5852 return AMDGPU::S_BARRIER_JOIN_M0;
5853 case Intrinsic::amdgcn_s_wakeup_barrier:
5854 return AMDGPU::S_WAKEUP_BARRIER_M0;
5855 case Intrinsic::amdgcn_s_get_named_barrier_state:
5856 return AMDGPU::S_GET_BARRIER_STATE_M0;
5861bool AMDGPUInstructionSelector::selectNamedBarrierInit(
5869 Register TmpReg0 =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5875 Register TmpReg1 =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5882 Register TmpReg2 =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5888 Register TmpReg3 =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5889 constexpr unsigned ShAmt = 16;
5895 Register TmpReg4 =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5905 unsigned Opc = IntrID == Intrinsic::amdgcn_s_barrier_init
5906 ? AMDGPU::S_BARRIER_INIT_M0
5907 : AMDGPU::S_BARRIER_SIGNAL_M0;
5911 I.eraseFromParent();
5915bool AMDGPUInstructionSelector::selectNamedBarrierInst(
5919 MachineOperand BarOp = IntrID == Intrinsic::amdgcn_s_get_named_barrier_state
5922 std::optional<int64_t> BarValImm =
5927 Register TmpReg0 =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5933 Register TmpReg1 =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5939 auto CopyMIB =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
5948 if (IntrID == Intrinsic::amdgcn_s_get_named_barrier_state) {
5949 auto DstReg =
I.getOperand(0).getReg();
5951 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
5958 auto BarId = ((*BarValImm) >> 4) & 0x3F;
5962 I.eraseFromParent();
5969 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5970 "Expected G_CONSTANT");
5971 MIB.
addImm(
MI.getOperand(1).getCImm()->getSExtValue());
5977 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5978 "Expected G_CONSTANT");
5979 MIB.
addImm(-
MI.getOperand(1).getCImm()->getSExtValue());
5986 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1);
5987 MIB.
addImm(
Op.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
5993 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5994 "Expected G_CONSTANT");
5995 MIB.
addImm(
MI.getOperand(1).getCImm()->getValue().popcount());
6014 MIB.
addImm(
MI.getOperand(OpIdx).getImm() != 0);
6020 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
6024void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_0_0(
6026 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
6031void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_0_1(
6033 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
6034 MIB.
addImm((
MI.getOperand(OpIdx).getImm() & 0x2)
6039void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_1_0(
6041 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
6046void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_1_1(
6048 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
6049 MIB.
addImm((
MI.getOperand(OpIdx).getImm() & 0x1)
6054void AMDGPUInstructionSelector::renderDstSelToOpSelXForm(
6056 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
6061void AMDGPUInstructionSelector::renderSrcSelToOpSelXForm(
6063 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
6068void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_2_0(
6070 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
6075void AMDGPUInstructionSelector::renderDstSelToOpSel3XFormXForm(
6077 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
6085 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
6086 MIB.
addImm(
MI.getOperand(OpIdx).getImm() &
6094 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
6095 const bool Swizzle =
MI.getOperand(OpIdx).getImm() &
6101void AMDGPUInstructionSelector::renderExtractCpolSetGLC(
6103 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
6104 const uint32_t Cpol =
MI.getOperand(OpIdx).getImm() &
6119 const APFloat &APF =
MI.getOperand(1).getFPImm()->getValueAPF();
6121 assert(ExpVal != INT_MIN);
6132 MIB.
addImm((
MI.getOperand(OpIdx).getImm() + 3) % 4);
6136void AMDGPUInstructionSelector::renderScaledMAIIntrinsicOperand(
6138 unsigned Val =
MI.getOperand(OpIdx).getImm();
6147bool AMDGPUInstructionSelector::isInlineImmediate(
const APInt &Imm)
const {
6151bool AMDGPUInstructionSelector::isInlineImmediate(
const APFloat &Imm)
const {
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
static unsigned getIntrinsicID(const SDNode *N)
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static bool isNoUnsignedWrap(MachineInstr *Addr)
static Register buildOffsetSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
unsigned getNamedBarrierOp(bool HasInlineConst, Intrinsic::ID IntrID)
#define GET_GLOBALISEL_PREDICATES_INIT
#define GET_GLOBALISEL_TEMPORARIES_INIT
static Register getWaveAddress(const MachineInstr *Def)
static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)
static bool shouldUseAndMask(unsigned Size, unsigned &Mask)
static std::pair< unsigned, uint8_t > BitOp3_Op(Register R, SmallVectorImpl< Register > &Src, const MachineRegisterInfo &MRI)
static bool isLaneMaskFromSameBlock(Register Reg, MachineRegisterInfo &MRI, MachineBasicBlock *MBB)
static bool parseTexFail(uint64_t TexFailCtrl, bool &TFE, bool &LWE, bool &IsTexFail)
static std::pair< Register, unsigned > computeIndirectRegIndex(MachineRegisterInfo &MRI, const SIRegisterInfo &TRI, const TargetRegisterClass *SuperRC, Register IdxReg, unsigned EltSize, GISelKnownBits &KnownBits)
Return the register to use for the index value, and the subregister to use for the indirectly accesse...
static void addZeroImm(MachineInstrBuilder &MIB)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
static bool isConstant(const MachineInstr &MI)
static Register buildRegSequence(SmallVectorImpl< Register > &Elts, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
static Register buildRSRC(MachineIRBuilder &B, MachineRegisterInfo &MRI, uint32_t FormatLo, uint32_t FormatHi, Register BasePtr)
Return a resource descriptor for use with an arbitrary 64-bit pointer.
static Register matchZeroExtendFromS32(MachineRegisterInfo &MRI, Register Reg)
Match a zero extend from a 32-bit value to 64-bits.
static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64)
static Register stripCopy(Register Reg, MachineRegisterInfo &MRI)
static Register stripBitCast(Register Reg, MachineRegisterInfo &MRI)
static std::optional< uint64_t > getConstantZext32Val(Register Reg, const MachineRegisterInfo &MRI)
Get an immediate that must be 32-bits, and treated as zero extended.
static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size, const GCNSubtarget &ST)
static bool isVCmpResult(Register Reg, MachineRegisterInfo &MRI)
static Register buildAddr64RSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, SmallVectorImpl< Register > &Elts, Register &Src, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
This file declares the targeting of the InstructionSelector class for AMDGPU.
AMDGPU Register Bank Select
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
static std::vector< std::pair< int, unsigned > > Swizzle(std::vector< std::pair< int, unsigned > > Src, R600InstrInfo::BankSwizzle Swz)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
AMDGPUInstructionSelector(const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI, const AMDGPUTargetMachine &TM)
static const char * getName()
bool select(MachineInstr &I) override
Select the (possibly generic) instruction I to only use target-specific opcodes.
void setupMF(MachineFunction &MF, GISelKnownBits *KB, CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) override
Setup per-MF executor state.
uint32_t getLDSSize() const
bool isEntryFunction() const
const RegisterBank & getRegBankFromRegClass(const TargetRegisterClass &RC, LLT) const override
Get a register bank that covers RC.
bool useRealTrue16Insts() const
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
unsigned getWavefrontSizeLog2() const
unsigned getWavefrontSize() const
bool hasInv2PiInlineImm() const
static int64_t getNullPointerValue(unsigned AddrSpace)
Get the integer value of a null pointer in the given address space.
LLVM_READONLY int getExactLog2Abs() const
Class for arbitrary precision integers.
APInt zext(unsigned width) const
Zero extend to a new width.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
unsigned countr_one() const
Count the number of trailing one bits.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
bool isFPPredicate() const
bool isIntPredicate() const
ConstantFP - Floating Point Values [float, double].
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
This class represents an Operation in the Expression.
Diagnostic information for unsupported feature in backend.
Represents a G_BUILD_VECTOR.
bool useVGPRIndexMode() const
bool hasPermlane32Swap() const
bool hasScalarCompareEq64() const
int getLDSBankCount() const
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
bool unsafeDSOffsetFoldingEnabled() const
bool hasBitOp3Insts() const
bool hasFlatInstOffsets() const
bool hasCompressedExport() const
Return true if the target's EXP instruction has the COMPR flag, which affects the meaning of the EN (...
bool hasGFX90AInsts() const
bool hasLDSLoadB96_B128() const
Returns true if the target supports global_load_lds_dwordx3/global_load_lds_dwordx4 or buffer_load_dw...
unsigned getConstantBusLimit(unsigned Opcode) const
bool hasMADIntraFwdBug() const
bool privateMemoryResourceIsRangeChecked() const
bool hasSignedScratchOffsets() const
bool hasRestrictedSOffset() const
const SITargetLowering * getTargetLowering() const override
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
bool hasSPackHL() const
Return true if the target has the S_PACK_HL_B32_B16 instruction.
bool hasPermlane16Swap() const
bool hasFlatScratchSVSSwizzleBug() const
bool useFlatForGlobal() const
Generation getGeneration() const
bool hasSplitBarriers() const
bool hasUnpackedD16VMem() const
bool hasGWSSemaReleaseAll() const
bool hasAddNoCarry() const
bool hasSALUFloatInsts() const
bool hasPartialNSAEncoding() const
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
Represents a G_CONCAT_VECTORS.
std::optional< SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > > ComplexRendererFns
virtual void setupMF(MachineFunction &mf, GISelKnownBits *kb, CodeGenCoverage *covinfo=nullptr, ProfileSummaryInfo *psi=nullptr, BlockFrequencyInfo *bfi=nullptr)
Setup per-MF executor state.
CodeGenCoverage * CoverageInfo
APInt getKnownOnes(Register R)
KnownBits getKnownBits(Register R)
bool signBitIsZero(Register Op)
APInt getKnownZeroes(Register R)
Module * getParent()
Get the module that this global value is contained inside of...
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr bool isVector() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Describe properties that are true of each instruction in the target description file.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & setOperandDead(unsigned OpIdx) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
unsigned getNumOperands() const
Retuns the total number of operands.
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
unsigned getAddrSpace() const
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const ConstantInt * getCImm() const
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
ArrayRef< int > getShuffleMask() const
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
bool isInternalRead() const
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
A Module instance is used to store all the information related to an LLVM module.
Analysis providing profile information.
static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
TypeSize getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
bool isLegalMUBUFImmOffset(unsigned Imm) const
bool isInlineConstant(const APInt &Imm) const
const MCInstrDesc & getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, bool IsSGPR) const
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
const MCInstrDesc & getIndirectGPRIDXPseudo(unsigned VecSize, bool IsIndirectSrc) const
std::pair< int64_t, int64_t > splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, uint64_t FlatVariant) const
Split COffsetVal into {immediate offset field, remainder offset} values.
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, uint64_t FlatVariant) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction.
void enforceOperandRCAlignment(MachineInstr &MI, unsigned OpName) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
MCRegister getReturnAddressReg(const MachineFunction &MF) const
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
const TargetRegisterClass * getRegClassForSizeOnBank(unsigned Size, const RegisterBank &Bank) const
const TargetRegisterClass * getConstrainedRegClassForOperand(const MachineOperand &MO, const MachineRegisterInfo &MRI) const override
const TargetRegisterClass * getRegClassForTypeOnBank(LLT Ty, const RegisterBank &Bank) const
const TargetRegisterClass * getBoolRC() const
MCRegister getExec() const
const TargetRegisterClass * getWaveMaskRegClass() const
static bool isSGPRClass(const TargetRegisterClass *RC)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
const Triple & getTargetTriple() const
unsigned getID() const
Return the register class ID number.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
OSType getOS() const
Get the parsed operating system type of this triple.
static IntegerType * getInt32Ty(LLVMContext &C)
LLVM Value Representation.
Value(Type *Ty, unsigned scid)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
LLVM_READONLY const MIMGG16MappingInfo * getMIMGG16MappingInfo(unsigned G)
LLVM_READONLY int getGlobalSaddrOp(uint16_t Opcode)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfo(unsigned DimEnum)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
Intrinsic::ID getIntrinsicID(const MachineInstr &I)
Return the intrinsic ID for opcodes with the G_AMDGPU_INTRIN_ prefix.
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg, GISelKnownBits *KnownBits=nullptr, bool CheckNUW=false)
Returns base register and constant offset.
IndexMode
ARM Index Modes.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
operand_type_match m_Reg()
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
UnaryOp_match< SrcTy, TargetOpcode::COPY > m_Copy(SrcTy &&Src)
SpecificConstantMatch m_SpecificICst(int64_t RequestedValue)
Matches a constant equal to RequestedValue.
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_XOR, true > m_GXor(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_FPEXT > m_GFPExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
SpecificConstantMatch m_AllOnesInt()
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
UnaryOp_match< SrcTy, TargetOpcode::G_FABS > m_GFabs(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
OneUse_match< T > m_OneUse(const T &SubPattern)
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
int popcount(T Value) noexcept
Count the number of set bits in a value.
const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
unsigned getUndefRegState(bool B)
@ SMax
Signed integer max implemented in terms of select(cmp()).
DWARFExpression::Operation Op
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
@ Default
The result values are uniform if and only if all operands are uniform.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
This class contains a discriminated union of information about pointers in memory operands,...
int64_t Offset
Offset - This is an offset from the base Value*.
PointerUnion< const Value *, const PseudoSourceValue * > V
This is the IR pointer value for the access, or it is null if unknown.