29#include "llvm/IR/IntrinsicsAMDGPU.h"
32#define DEBUG_TYPE "amdgpu-isel"
35using namespace MIPatternMatch;
38 "amdgpu-global-isel-risky-select",
39 cl::desc(
"Allow GlobalISel to select cases that are likely to not work yet"),
43#define GET_GLOBALISEL_IMPL
44#define AMDGPUSubtarget GCNSubtarget
45#include "AMDGPUGenGlobalISel.inc"
46#undef GET_GLOBALISEL_IMPL
52 :
TII(*STI.getInstrInfo()),
TRI(*STI.getRegisterInfo()), RBI(RBI),
TM(
TM),
56#include
"AMDGPUGenGlobalISel.inc"
59#include
"AMDGPUGenGlobalISel.inc"
77 return Def->getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS
78 ? Def->getOperand(1).getReg()
82bool AMDGPUInstructionSelector::isVCC(
Register Reg,
88 auto &RegClassOrBank =
MRI.getRegClassOrRegBank(Reg);
92 const LLT Ty =
MRI.getType(Reg);
96 return MRI.getVRegDef(Reg)->getOpcode() != AMDGPU::G_TRUNC &&
101 return RB->
getID() == AMDGPU::VCCRegBankID;
104bool AMDGPUInstructionSelector::constrainCopyLikeIntrin(
MachineInstr &
MI,
105 unsigned NewOpc)
const {
106 MI.setDesc(TII.get(NewOpc));
121 if (!DstRC || DstRC != SrcRC)
128bool AMDGPUInstructionSelector::selectCOPY(
MachineInstr &
I)
const {
131 I.setDesc(TII.get(TargetOpcode::COPY));
138 if (isVCC(DstReg, *MRI)) {
139 if (SrcReg == AMDGPU::SCC) {
147 if (!isVCC(SrcReg, *MRI)) {
155 std::optional<ValueAndVReg> ConstVal =
159 STI.
isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
161 .
addImm(ConstVal->Value.getBoolValue() ? -1 : 0);
163 Register MaskedReg =
MRI->createVirtualRegister(SrcRC);
171 IsSGPR ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32;
176 And.setOperandDead(3);
178 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
183 if (!
MRI->getRegClassOrNull(SrcReg))
184 MRI->setRegClass(SrcReg, SrcRC);
198 if (MO.getReg().isPhysical())
210bool AMDGPUInstructionSelector::selectPHI(
MachineInstr &
I)
const {
211 const Register DefReg =
I.getOperand(0).getReg();
212 const LLT DefTy =
MRI->getType(DefReg);
225 MRI->getRegClassOrRegBank(DefReg);
244 I.setDesc(TII.get(TargetOpcode::PHI));
251 unsigned SubIdx)
const {
255 Register DstReg =
MRI->createVirtualRegister(&SubRC);
258 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.
getSubReg(), SubIdx);
260 BuildMI(*BB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
261 .
addReg(Reg, 0, ComposedSubIdx);
286 return Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
288 return Is64 ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32;
290 return Is64 ? AMDGPU::S_XOR_B64 : AMDGPU::S_XOR_B32;
296bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(
MachineInstr &
I)
const {
297 Register DstReg =
I.getOperand(0).getReg();
301 if (DstRB->
getID() != AMDGPU::SGPRRegBankID &&
302 DstRB->
getID() != AMDGPU::VCCRegBankID)
305 bool Is64 =
Size > 32 || (DstRB->
getID() == AMDGPU::VCCRegBankID &&
317bool AMDGPUInstructionSelector::selectG_ADD_SUB(
MachineInstr &
I)
const {
320 Register DstReg =
I.getOperand(0).getReg();
322 LLT Ty =
MRI->getType(DstReg);
328 const bool IsSALU = DstRB->
getID() == AMDGPU::SGPRRegBankID;
329 const bool Sub =
I.getOpcode() == TargetOpcode::G_SUB;
333 const unsigned Opc = Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
336 .
add(
I.getOperand(1))
337 .
add(
I.getOperand(2))
344 const unsigned Opc = Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64;
345 I.setDesc(TII.get(Opc));
351 const unsigned Opc = Sub ? AMDGPU::V_SUB_CO_U32_e64 : AMDGPU::V_ADD_CO_U32_e64;
357 .
add(
I.getOperand(1))
358 .
add(
I.getOperand(2))
364 assert(!Sub &&
"illegal sub should not reach here");
367 = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass;
369 = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass;
371 MachineOperand Lo1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub0));
372 MachineOperand Lo2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub0));
373 MachineOperand Hi1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub1));
374 MachineOperand Hi2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub1));
376 Register DstLo =
MRI->createVirtualRegister(&HalfRC);
377 Register DstHi =
MRI->createVirtualRegister(&HalfRC);
380 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
383 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
389 Register CarryReg =
MRI->createVirtualRegister(CarryRC);
390 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_ADD_CO_U32_e64), DstLo)
406 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
420bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE(
425 Register Dst0Reg =
I.getOperand(0).getReg();
426 Register Dst1Reg =
I.getOperand(1).getReg();
427 const bool IsAdd =
I.getOpcode() == AMDGPU::G_UADDO ||
428 I.getOpcode() == AMDGPU::G_UADDE;
429 const bool HasCarryIn =
I.getOpcode() == AMDGPU::G_UADDE ||
430 I.getOpcode() == AMDGPU::G_USUBE;
432 if (isVCC(Dst1Reg, *MRI)) {
433 unsigned NoCarryOpc =
434 IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
435 unsigned CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
436 I.setDesc(TII.get(HasCarryIn ? CarryOpc : NoCarryOpc));
442 Register Src0Reg =
I.getOperand(2).getReg();
443 Register Src1Reg =
I.getOperand(3).getReg();
446 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
447 .
addReg(
I.getOperand(4).getReg());
450 unsigned NoCarryOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
451 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
453 auto CarryInst =
BuildMI(*BB, &
I,
DL, TII.get(HasCarryIn ? CarryOpc : NoCarryOpc), Dst0Reg)
454 .
add(
I.getOperand(2))
455 .
add(
I.getOperand(3));
457 if (
MRI->use_nodbg_empty(Dst1Reg)) {
460 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), Dst1Reg)
462 if (!
MRI->getRegClassOrNull(Dst1Reg))
463 MRI->setRegClass(Dst1Reg, &AMDGPU::SReg_32RegClass);
473 AMDGPU::SReg_32RegClass, *MRI))
480bool AMDGPUInstructionSelector::selectG_AMDGPU_MAD_64_32(
484 const bool IsUnsigned =
I.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;
488 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_gfx11_e64
489 : AMDGPU::V_MAD_I64_I32_gfx11_e64;
491 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_e64 : AMDGPU::V_MAD_I64_I32_e64;
492 I.setDesc(TII.get(Opc));
494 I.addImplicitDefUseOperands(*
MF);
499bool AMDGPUInstructionSelector::selectG_EXTRACT(
MachineInstr &
I)
const {
501 Register DstReg =
I.getOperand(0).getReg();
502 Register SrcReg =
I.getOperand(1).getReg();
503 LLT DstTy =
MRI->getType(DstReg);
504 LLT SrcTy =
MRI->getType(SrcReg);
509 unsigned Offset =
I.getOperand(2).getImm();
510 if (
Offset % 32 != 0 || DstSize > 128)
530 SrcRC = TRI.getSubClassWithSubReg(SrcRC,
SubReg);
535 *SrcRC,
I.getOperand(1));
537 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::COPY), DstReg)
544bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(
MachineInstr &
MI)
const {
547 LLT DstTy =
MRI->getType(DstReg);
548 LLT SrcTy =
MRI->getType(
MI.getOperand(1).getReg());
564 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg);
565 for (
int I = 0,
E =
MI.getNumOperands() - 1;
I !=
E; ++
I) {
579 MI.eraseFromParent();
583bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(
MachineInstr &
MI)
const {
585 const int NumDst =
MI.getNumOperands() - 1;
591 LLT DstTy =
MRI->getType(DstReg0);
592 LLT SrcTy =
MRI->getType(SrcReg);
608 for (
int I = 0,
E = NumDst;
I !=
E; ++
I) {
610 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::COPY), Dst.getReg())
611 .
addReg(SrcReg, 0, SubRegs[
I]);
614 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegs[
I]);
624 MI.eraseFromParent();
628bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR(
MachineInstr &
MI)
const {
629 assert(
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC ||
630 MI.getOpcode() == AMDGPU::G_BUILD_VECTOR);
634 LLT SrcTy =
MRI->getType(Src0);
638 if (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR && SrcSize >= 32) {
639 return selectG_MERGE_VALUES(
MI);
646 (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC &&
651 if (DstBank->
getID() == AMDGPU::AGPRRegBankID)
654 assert(DstBank->
getID() == AMDGPU::SGPRRegBankID ||
655 DstBank->
getID() == AMDGPU::VGPRRegBankID);
656 const bool IsVector = DstBank->
getID() == AMDGPU::VGPRRegBankID;
669 const int64_t K0 = ConstSrc0->Value.getSExtValue();
670 const int64_t K1 = ConstSrc1->Value.getSExtValue();
678 MI.eraseFromParent();
684 MI.eraseFromParent();
696 if (Src1Def->
getOpcode() == AMDGPU::G_IMPLICIT_DEF) {
697 MI.setDesc(TII.get(AMDGPU::COPY));
700 IsVector ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
707 Register TmpReg =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
708 auto MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_AND_B32_e32), TmpReg)
714 MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_LSHL_OR_B32_e64), Dst)
721 MI.eraseFromParent();
746 unsigned Opc = AMDGPU::S_PACK_LL_B32_B16;
747 if (Shift0 && Shift1) {
748 Opc = AMDGPU::S_PACK_HH_B32_B16;
749 MI.getOperand(1).setReg(ShiftSrc0);
750 MI.getOperand(2).setReg(ShiftSrc1);
752 Opc = AMDGPU::S_PACK_LH_B32_B16;
753 MI.getOperand(2).setReg(ShiftSrc1);
757 if (ConstSrc1 && ConstSrc1->Value == 0) {
759 auto MIB =
BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_LSHR_B32), Dst)
764 MI.eraseFromParent();
768 Opc = AMDGPU::S_PACK_HL_B32_B16;
769 MI.getOperand(1).setReg(ShiftSrc0);
773 MI.setDesc(TII.get(Opc));
777bool AMDGPUInstructionSelector::selectG_PTR_ADD(
MachineInstr &
I)
const {
778 return selectG_ADD_SUB(
I);
781bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(
MachineInstr &
I)
const {
787 if ((!RC && !
MRI->getRegBankOrNull(MO.
getReg())) ||
789 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
796bool AMDGPUInstructionSelector::selectG_INSERT(
MachineInstr &
I)
const {
799 Register DstReg =
I.getOperand(0).getReg();
800 Register Src0Reg =
I.getOperand(1).getReg();
801 Register Src1Reg =
I.getOperand(2).getReg();
802 LLT Src1Ty =
MRI->getType(Src1Reg);
804 unsigned DstSize =
MRI->getType(DstReg).getSizeInBits();
807 int64_t
Offset =
I.getOperand(3).getImm();
810 if (
Offset % 32 != 0 || InsSize % 32 != 0)
818 if (
SubReg == AMDGPU::NoSubRegister)
836 Src0RC = TRI.getSubClassWithSubReg(Src0RC,
SubReg);
837 if (!Src0RC || !Src1RC)
846 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::INSERT_SUBREG), DstReg)
855bool AMDGPUInstructionSelector::selectG_SBFX_UBFX(
MachineInstr &
MI)
const {
858 Register OffsetReg =
MI.getOperand(2).getReg();
859 Register WidthReg =
MI.getOperand(3).getReg();
862 "scalar BFX instructions are expanded in regbankselect");
863 assert(
MRI->getType(
MI.getOperand(0).getReg()).getSizeInBits() == 32 &&
864 "64-bit vector BFX instructions are expanded in regbankselect");
869 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SBFX;
870 unsigned Opc = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
875 MI.eraseFromParent();
879bool AMDGPUInstructionSelector::selectInterpP1F16(
MachineInstr &
MI)
const {
898 Register InterpMov =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
904 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_INTERP_MOV_F32), InterpMov)
920 MI.eraseFromParent();
929bool AMDGPUInstructionSelector::selectWritelane(
MachineInstr &
MI)
const {
938 Register LaneSelect =
MI.getOperand(3).getReg();
941 auto MIB =
BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_WRITELANE_B32), VDst);
943 std::optional<ValueAndVReg> ConstSelect =
949 MIB.
addImm(ConstSelect->Value.getSExtValue() &
952 std::optional<ValueAndVReg> ConstVal =
959 MIB.
addImm(ConstVal->Value.getSExtValue());
969 BuildMI(*
MBB, *MIB,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
977 MI.eraseFromParent();
983bool AMDGPUInstructionSelector::selectDivScale(
MachineInstr &
MI)
const {
987 LLT Ty =
MRI->getType(Dst0);
990 Opc = AMDGPU::V_DIV_SCALE_F32_e64;
992 Opc = AMDGPU::V_DIV_SCALE_F64_e64;
1003 unsigned ChooseDenom =
MI.getOperand(5).getImm();
1005 Register Src0 = ChooseDenom != 0 ? Numer : Denom;
1018 MI.eraseFromParent();
1022bool AMDGPUInstructionSelector::selectG_INTRINSIC(
MachineInstr &
I)
const {
1023 unsigned IntrinsicID = cast<GIntrinsic>(
I).getIntrinsicID();
1024 switch (IntrinsicID) {
1025 case Intrinsic::amdgcn_if_break: {
1030 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))
1031 .
add(
I.getOperand(0))
1032 .
add(
I.getOperand(2))
1033 .
add(
I.getOperand(3));
1035 Register DstReg =
I.getOperand(0).getReg();
1036 Register Src0Reg =
I.getOperand(2).getReg();
1037 Register Src1Reg =
I.getOperand(3).getReg();
1039 I.eraseFromParent();
1041 for (
Register Reg : { DstReg, Src0Reg, Src1Reg })
1046 case Intrinsic::amdgcn_interp_p1_f16:
1047 return selectInterpP1F16(
I);
1048 case Intrinsic::amdgcn_wqm:
1049 return constrainCopyLikeIntrin(
I, AMDGPU::WQM);
1050 case Intrinsic::amdgcn_softwqm:
1051 return constrainCopyLikeIntrin(
I, AMDGPU::SOFT_WQM);
1052 case Intrinsic::amdgcn_strict_wwm:
1053 case Intrinsic::amdgcn_wwm:
1054 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WWM);
1055 case Intrinsic::amdgcn_strict_wqm:
1056 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WQM);
1057 case Intrinsic::amdgcn_writelane:
1058 return selectWritelane(
I);
1059 case Intrinsic::amdgcn_div_scale:
1060 return selectDivScale(
I);
1061 case Intrinsic::amdgcn_icmp:
1062 case Intrinsic::amdgcn_fcmp:
1065 return selectIntrinsicCmp(
I);
1066 case Intrinsic::amdgcn_ballot:
1067 return selectBallot(
I);
1068 case Intrinsic::amdgcn_inverse_ballot:
1069 return selectInverseBallot(
I);
1070 case Intrinsic::amdgcn_reloc_constant:
1071 return selectRelocConstant(
I);
1072 case Intrinsic::amdgcn_groupstaticsize:
1073 return selectGroupStaticSize(
I);
1074 case Intrinsic::returnaddress:
1075 return selectReturnAddress(
I);
1076 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
1077 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
1078 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
1079 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
1080 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
1081 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
1082 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
1083 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
1084 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
1085 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
1086 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
1087 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
1088 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
1089 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
1090 return selectSMFMACIntrin(
I);
1101 if (
Size == 16 && !ST.has16BitInsts())
1104 const auto Select = [&](
unsigned S16Opc,
unsigned TrueS16Opc,
unsigned S32Opc,
1107 return ST.hasTrue16BitInsts() ? TrueS16Opc : S16Opc;
1117 return Select(AMDGPU::V_CMP_NE_U16_e64, AMDGPU::V_CMP_NE_U16_t16_e64,
1118 AMDGPU::V_CMP_NE_U32_e64, AMDGPU::V_CMP_NE_U64_e64);
1120 return Select(AMDGPU::V_CMP_EQ_U16_e64, AMDGPU::V_CMP_EQ_U16_t16_e64,
1121 AMDGPU::V_CMP_EQ_U32_e64, AMDGPU::V_CMP_EQ_U64_e64);
1123 return Select(AMDGPU::V_CMP_GT_I16_e64, AMDGPU::V_CMP_GT_I16_t16_e64,
1124 AMDGPU::V_CMP_GT_I32_e64, AMDGPU::V_CMP_GT_I64_e64);
1126 return Select(AMDGPU::V_CMP_GE_I16_e64, AMDGPU::V_CMP_GE_I16_t16_e64,
1127 AMDGPU::V_CMP_GE_I32_e64, AMDGPU::V_CMP_GE_I64_e64);
1129 return Select(AMDGPU::V_CMP_LT_I16_e64, AMDGPU::V_CMP_LT_I16_t16_e64,
1130 AMDGPU::V_CMP_LT_I32_e64, AMDGPU::V_CMP_LT_I64_e64);
1132 return Select(AMDGPU::V_CMP_LE_I16_e64, AMDGPU::V_CMP_LE_I16_t16_e64,
1133 AMDGPU::V_CMP_LE_I32_e64, AMDGPU::V_CMP_LE_I64_e64);
1135 return Select(AMDGPU::V_CMP_GT_U16_e64, AMDGPU::V_CMP_GT_U16_t16_e64,
1136 AMDGPU::V_CMP_GT_U32_e64, AMDGPU::V_CMP_GT_U64_e64);
1138 return Select(AMDGPU::V_CMP_GE_U16_e64, AMDGPU::V_CMP_GE_U16_t16_e64,
1139 AMDGPU::V_CMP_GE_U32_e64, AMDGPU::V_CMP_GE_U64_e64);
1141 return Select(AMDGPU::V_CMP_LT_U16_e64, AMDGPU::V_CMP_LT_U16_t16_e64,
1142 AMDGPU::V_CMP_LT_U32_e64, AMDGPU::V_CMP_LT_U64_e64);
1144 return Select(AMDGPU::V_CMP_LE_U16_e64, AMDGPU::V_CMP_LE_U16_t16_e64,
1145 AMDGPU::V_CMP_LE_U32_e64, AMDGPU::V_CMP_LE_U64_e64);
1148 return Select(AMDGPU::V_CMP_EQ_F16_e64, AMDGPU::V_CMP_EQ_F16_t16_e64,
1149 AMDGPU::V_CMP_EQ_F32_e64, AMDGPU::V_CMP_EQ_F64_e64);
1151 return Select(AMDGPU::V_CMP_GT_F16_e64, AMDGPU::V_CMP_GT_F16_t16_e64,
1152 AMDGPU::V_CMP_GT_F32_e64, AMDGPU::V_CMP_GT_F64_e64);
1154 return Select(AMDGPU::V_CMP_GE_F16_e64, AMDGPU::V_CMP_GE_F16_t16_e64,
1155 AMDGPU::V_CMP_GE_F32_e64, AMDGPU::V_CMP_GE_F64_e64);
1157 return Select(AMDGPU::V_CMP_LT_F16_e64, AMDGPU::V_CMP_LT_F16_t16_e64,
1158 AMDGPU::V_CMP_LT_F32_e64, AMDGPU::V_CMP_LT_F64_e64);
1160 return Select(AMDGPU::V_CMP_LE_F16_e64, AMDGPU::V_CMP_LE_F16_t16_e64,
1161 AMDGPU::V_CMP_LE_F32_e64, AMDGPU::V_CMP_LE_F64_e64);
1163 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1164 AMDGPU::V_CMP_NEQ_F32_e64, AMDGPU::V_CMP_NEQ_F64_e64);
1166 return Select(AMDGPU::V_CMP_O_F16_e64, AMDGPU::V_CMP_O_F16_t16_e64,
1167 AMDGPU::V_CMP_O_F32_e64, AMDGPU::V_CMP_O_F64_e64);
1169 return Select(AMDGPU::V_CMP_U_F16_e64, AMDGPU::V_CMP_U_F16_t16_e64,
1170 AMDGPU::V_CMP_U_F32_e64, AMDGPU::V_CMP_U_F64_e64);
1172 return Select(AMDGPU::V_CMP_NLG_F16_e64, AMDGPU::V_CMP_NLG_F16_t16_e64,
1173 AMDGPU::V_CMP_NLG_F32_e64, AMDGPU::V_CMP_NLG_F64_e64);
1175 return Select(AMDGPU::V_CMP_NLE_F16_e64, AMDGPU::V_CMP_NLE_F16_t16_e64,
1176 AMDGPU::V_CMP_NLE_F32_e64, AMDGPU::V_CMP_NLE_F64_e64);
1178 return Select(AMDGPU::V_CMP_NLT_F16_e64, AMDGPU::V_CMP_NLT_F16_t16_e64,
1179 AMDGPU::V_CMP_NLT_F32_e64, AMDGPU::V_CMP_NLT_F64_e64);
1181 return Select(AMDGPU::V_CMP_NGE_F16_e64, AMDGPU::V_CMP_NGE_F16_t16_e64,
1182 AMDGPU::V_CMP_NGE_F32_e64, AMDGPU::V_CMP_NGE_F64_e64);
1184 return Select(AMDGPU::V_CMP_NGT_F16_e64, AMDGPU::V_CMP_NGT_F16_t16_e64,
1185 AMDGPU::V_CMP_NGT_F32_e64, AMDGPU::V_CMP_NGT_F64_e64);
1187 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1188 AMDGPU::V_CMP_NEQ_F32_e64, AMDGPU::V_CMP_NEQ_F64_e64);
1190 return Select(AMDGPU::V_CMP_TRU_F16_e64, AMDGPU::V_CMP_TRU_F16_t16_e64,
1191 AMDGPU::V_CMP_TRU_F32_e64, AMDGPU::V_CMP_TRU_F64_e64);
1193 return Select(AMDGPU::V_CMP_F_F16_e64, AMDGPU::V_CMP_F_F16_t16_e64,
1194 AMDGPU::V_CMP_F_F32_e64, AMDGPU::V_CMP_F_F64_e64);
1199 unsigned Size)
const {
1206 return AMDGPU::S_CMP_LG_U64;
1208 return AMDGPU::S_CMP_EQ_U64;
1217 return AMDGPU::S_CMP_LG_U32;
1219 return AMDGPU::S_CMP_EQ_U32;
1221 return AMDGPU::S_CMP_GT_I32;
1223 return AMDGPU::S_CMP_GE_I32;
1225 return AMDGPU::S_CMP_LT_I32;
1227 return AMDGPU::S_CMP_LE_I32;
1229 return AMDGPU::S_CMP_GT_U32;
1231 return AMDGPU::S_CMP_GE_U32;
1233 return AMDGPU::S_CMP_LT_U32;
1235 return AMDGPU::S_CMP_LE_U32;
1237 return AMDGPU::S_CMP_EQ_F32;
1239 return AMDGPU::S_CMP_GT_F32;
1241 return AMDGPU::S_CMP_GE_F32;
1243 return AMDGPU::S_CMP_LT_F32;
1245 return AMDGPU::S_CMP_LE_F32;
1247 return AMDGPU::S_CMP_LG_F32;
1249 return AMDGPU::S_CMP_O_F32;
1251 return AMDGPU::S_CMP_U_F32;
1253 return AMDGPU::S_CMP_NLG_F32;
1255 return AMDGPU::S_CMP_NLE_F32;
1257 return AMDGPU::S_CMP_NLT_F32;
1259 return AMDGPU::S_CMP_NGE_F32;
1261 return AMDGPU::S_CMP_NGT_F32;
1263 return AMDGPU::S_CMP_NEQ_F32;
1275 return AMDGPU::S_CMP_EQ_F16;
1277 return AMDGPU::S_CMP_GT_F16;
1279 return AMDGPU::S_CMP_GE_F16;
1281 return AMDGPU::S_CMP_LT_F16;
1283 return AMDGPU::S_CMP_LE_F16;
1285 return AMDGPU::S_CMP_LG_F16;
1287 return AMDGPU::S_CMP_O_F16;
1289 return AMDGPU::S_CMP_U_F16;
1291 return AMDGPU::S_CMP_NLG_F16;
1293 return AMDGPU::S_CMP_NLE_F16;
1295 return AMDGPU::S_CMP_NLT_F16;
1297 return AMDGPU::S_CMP_NGE_F16;
1299 return AMDGPU::S_CMP_NGT_F16;
1301 return AMDGPU::S_CMP_NEQ_F16;
1310bool AMDGPUInstructionSelector::selectG_ICMP_or_FCMP(
MachineInstr &
I)
const {
1315 Register SrcReg =
I.getOperand(2).getReg();
1320 Register CCReg =
I.getOperand(0).getReg();
1321 if (!isVCC(CCReg, *MRI)) {
1322 int Opcode = getS_CMPOpcode(Pred,
Size);
1326 .
add(
I.getOperand(2))
1327 .
add(
I.getOperand(3));
1328 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CCReg)
1333 I.eraseFromParent();
1337 if (
I.getOpcode() == AMDGPU::G_FCMP)
1345 I.getOperand(0).getReg())
1346 .
add(
I.getOperand(2))
1347 .
add(
I.getOperand(3));
1351 I.eraseFromParent();
1355bool AMDGPUInstructionSelector::selectIntrinsicCmp(
MachineInstr &
I)
const {
1356 Register Dst =
I.getOperand(0).getReg();
1357 if (isVCC(Dst, *MRI))
1360 LLT DstTy =
MRI->getType(Dst);
1366 Register SrcReg =
I.getOperand(2).getReg();
1375 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), Dst);
1376 I.eraseFromParent();
1387 auto [Src0, Src0Mods] = selectVOP3ModsImpl(LHS);
1388 auto [Src1, Src1Mods] = selectVOP3ModsImpl(RHS);
1390 copyToVGPRIfSrcFolded(Src0, Src0Mods, LHS, &
I,
true);
1392 copyToVGPRIfSrcFolded(Src1, Src1Mods, RHS, &
I,
true);
1393 SelectedMI =
BuildMI(*BB, &
I,
DL, TII.get(Opcode), Dst);
1395 SelectedMI.
addImm(Src0Mods);
1396 SelectedMI.
addReg(Src0Reg);
1398 SelectedMI.
addImm(Src1Mods);
1399 SelectedMI.
addReg(Src1Reg);
1409 I.eraseFromParent();
1413bool AMDGPUInstructionSelector::selectBallot(
MachineInstr &
I)
const {
1416 Register DstReg =
I.getOperand(0).getReg();
1417 const unsigned Size =
MRI->getType(DstReg).getSizeInBits();
1418 const bool Is64 =
Size == 64;
1426 std::optional<ValueAndVReg> Arg =
1429 const auto BuildCopy = [&](
Register SrcReg) {
1431 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), DstReg)
1437 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1439 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
1447 const int64_t
Value = Arg->
Value.getSExtValue();
1449 unsigned Opcode = Is64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
1451 }
else if (
Value == -1)
1452 BuildCopy(IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC);
1456 BuildCopy(
I.getOperand(2).getReg());
1458 I.eraseFromParent();
1462bool AMDGPUInstructionSelector::selectInverseBallot(
MachineInstr &
I)
const {
1465 const Register DstReg =
I.getOperand(0).getReg();
1466 const Register MaskReg =
I.getOperand(2).getReg();
1469 I.eraseFromParent();
1473bool AMDGPUInstructionSelector::selectRelocConstant(
MachineInstr &
I)
const {
1474 Register DstReg =
I.getOperand(0).getReg();
1480 const bool IsVALU = DstBank->
getID() == AMDGPU::VGPRRegBankID;
1485 auto RelocSymbol = cast<GlobalVariable>(
1490 TII.get(IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32), DstReg)
1493 I.eraseFromParent();
1497bool AMDGPUInstructionSelector::selectGroupStaticSize(
MachineInstr &
I)
const {
1500 Register DstReg =
I.getOperand(0).getReg();
1502 unsigned Mov = DstRB->
getID() == AMDGPU::SGPRRegBankID ?
1503 AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1520 I.eraseFromParent();
1524bool AMDGPUInstructionSelector::selectReturnAddress(
MachineInstr &
I)
const {
1531 unsigned Depth =
I.getOperand(2).getImm();
1544 I.eraseFromParent();
1555 AMDGPU::SReg_64RegClass,
DL);
1558 I.eraseFromParent();
1562bool AMDGPUInstructionSelector::selectEndCfIntrinsic(
MachineInstr &
MI)
const {
1566 BuildMI(*BB, &
MI,
MI.getDebugLoc(), TII.get(AMDGPU::SI_END_CF))
1567 .
add(
MI.getOperand(1));
1570 MI.eraseFromParent();
1572 if (!
MRI->getRegClassOrNull(Reg))
1577bool AMDGPUInstructionSelector::selectDSOrderedIntrinsic(
1583 unsigned IndexOperand =
MI.getOperand(7).getImm();
1584 bool WaveRelease =
MI.getOperand(8).getImm() != 0;
1585 bool WaveDone =
MI.getOperand(9).getImm() != 0;
1587 if (WaveDone && !WaveRelease)
1590 unsigned OrderedCountIndex = IndexOperand & 0x3f;
1591 IndexOperand &= ~0x3f;
1592 unsigned CountDw = 0;
1595 CountDw = (IndexOperand >> 24) & 0xf;
1596 IndexOperand &= ~(0xf << 24);
1598 if (CountDw < 1 || CountDw > 4) {
1600 "ds_ordered_count: dword count must be between 1 and 4");
1607 unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1;
1610 unsigned Offset0 = OrderedCountIndex << 2;
1611 unsigned Offset1 = WaveRelease | (WaveDone << 1) | (
Instruction << 4);
1614 Offset1 |= (CountDw - 1) << 6;
1617 Offset1 |= ShaderType << 2;
1619 unsigned Offset = Offset0 | (Offset1 << 8);
1628 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::DS_ORDERED_COUNT), DstReg)
1637 MI.eraseFromParent();
1643 case Intrinsic::amdgcn_ds_gws_init:
1644 return AMDGPU::DS_GWS_INIT;
1645 case Intrinsic::amdgcn_ds_gws_barrier:
1646 return AMDGPU::DS_GWS_BARRIER;
1647 case Intrinsic::amdgcn_ds_gws_sema_v:
1648 return AMDGPU::DS_GWS_SEMA_V;
1649 case Intrinsic::amdgcn_ds_gws_sema_br:
1650 return AMDGPU::DS_GWS_SEMA_BR;
1651 case Intrinsic::amdgcn_ds_gws_sema_p:
1652 return AMDGPU::DS_GWS_SEMA_P;
1653 case Intrinsic::amdgcn_ds_gws_sema_release_all:
1654 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
1660bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(
MachineInstr &
MI,
1662 if (!STI.
hasGWS() || (IID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
1667 const bool HasVSrc =
MI.getNumOperands() == 3;
1668 assert(HasVSrc ||
MI.getNumOperands() == 2);
1670 Register BaseOffset =
MI.getOperand(HasVSrc ? 2 : 1).getReg();
1672 if (OffsetRB->
getID() != AMDGPU::SGPRRegBankID)
1686 if (OffsetDef->
getOpcode() == AMDGPU::V_READFIRSTLANE_B32) {
1687 Readfirstlane = OffsetDef;
1692 if (OffsetDef->
getOpcode() == AMDGPU::G_CONSTANT) {
1702 std::tie(BaseOffset, ImmOffset) =
1705 if (Readfirstlane) {
1715 AMDGPU::SReg_32RegClass, *MRI))
1719 Register M0Base =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1747 MI.eraseFromParent();
1751bool AMDGPUInstructionSelector::selectDSAppendConsume(
MachineInstr &
MI,
1752 bool IsAppend)
const {
1753 Register PtrBase =
MI.getOperand(2).getReg();
1754 LLT PtrTy =
MRI->getType(PtrBase);
1758 std::tie(PtrBase,
Offset) = selectDS1Addr1OffsetImpl(
MI.getOperand(2));
1761 if (!isDSOffsetLegal(PtrBase,
Offset)) {
1762 PtrBase =
MI.getOperand(2).getReg();
1768 const unsigned Opc = IsAppend ? AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
1779 MI.eraseFromParent();
1783bool AMDGPUInstructionSelector::selectSBarrier(
MachineInstr &
MI)
const {
1790 MI.eraseFromParent();
1802 TFE = (TexFailCtrl & 0x1) ?
true :
false;
1804 LWE = (TexFailCtrl & 0x2) ?
true :
false;
1807 return TexFailCtrl == 0;
1810bool AMDGPUInstructionSelector::selectImageIntrinsic(
1819 unsigned IntrOpcode =
Intr->BaseOpcode;
1823 const unsigned ArgOffset =
MI.getNumExplicitDefs() + 1;
1827 int NumVDataDwords = -1;
1828 bool IsD16 =
MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16 ||
1829 MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16;
1835 Unorm =
MI.getOperand(ArgOffset +
Intr->UnormIndex).getImm() != 0;
1839 bool IsTexFail =
false;
1841 TFE, LWE, IsTexFail))
1844 const int Flags =
MI.getOperand(ArgOffset +
Intr->NumArgs).getImm();
1845 const bool IsA16 = (
Flags & 1) != 0;
1846 const bool IsG16 = (
Flags & 2) != 0;
1849 if (IsA16 && !STI.
hasG16() && !IsG16)
1853 unsigned DMaskLanes = 0;
1855 if (BaseOpcode->
Atomic) {
1856 VDataOut =
MI.getOperand(0).getReg();
1857 VDataIn =
MI.getOperand(2).getReg();
1858 LLT Ty =
MRI->getType(VDataIn);
1861 const bool Is64Bit = BaseOpcode->
AtomicX2 ?
1866 assert(
MI.getOperand(3).getReg() == AMDGPU::NoRegister);
1868 DMask = Is64Bit ? 0xf : 0x3;
1869 NumVDataDwords = Is64Bit ? 4 : 2;
1871 DMask = Is64Bit ? 0x3 : 0x1;
1872 NumVDataDwords = Is64Bit ? 2 : 1;
1875 DMask =
MI.getOperand(ArgOffset +
Intr->DMaskIndex).getImm();
1878 if (BaseOpcode->
Store) {
1879 VDataIn =
MI.getOperand(1).getReg();
1880 VDataTy =
MRI->getType(VDataIn);
1883 VDataOut =
MI.getOperand(0).getReg();
1884 VDataTy =
MRI->getType(VDataOut);
1885 NumVDataDwords = DMaskLanes;
1888 NumVDataDwords = (DMaskLanes + 1) / 2;
1893 if (Subtarget->
hasG16() && IsG16) {
1897 IntrOpcode = G16MappingInfo->
G16;
1901 assert((!IsTexFail || DMaskLanes >= 1) &&
"should have legalized this");
1903 unsigned CPol =
MI.getOperand(ArgOffset +
Intr->CachePolicyIndex).getImm();
1909 int NumVAddrRegs = 0;
1910 int NumVAddrDwords = 0;
1911 for (
unsigned I =
Intr->VAddrStart; I < Intr->VAddrEnd;
I++) {
1914 if (!AddrOp.
isReg())
1922 NumVAddrDwords += (
MRI->getType(
Addr).getSizeInBits() + 31) / 32;
1929 NumVAddrRegs != 1 &&
1931 : NumVAddrDwords == NumVAddrRegs);
1932 if (UseNSA && !STI.hasFeature(AMDGPU::FeatureNSAEncoding)) {
1943 UseNSA ? AMDGPU::MIMGEncGfx11NSA
1944 : AMDGPU::MIMGEncGfx11Default,
1945 NumVDataDwords, NumVAddrDwords);
1946 }
else if (IsGFX10Plus) {
1948 UseNSA ? AMDGPU::MIMGEncGfx10NSA
1949 : AMDGPU::MIMGEncGfx10Default,
1950 NumVDataDwords, NumVAddrDwords);
1954 NumVDataDwords, NumVAddrDwords);
1958 <<
"requested image instruction is not supported on this GPU\n");
1965 NumVDataDwords, NumVAddrDwords);
1968 NumVDataDwords, NumVAddrDwords);
1978 const bool Is64 =
MRI->getType(VDataOut).getSizeInBits() == 64;
1981 Is64 ? &AMDGPU::VReg_128RegClass : &AMDGPU::VReg_64RegClass);
1982 unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
1985 if (!
MRI->use_empty(VDataOut)) {
1998 for (
int I = 0;
I != NumVAddrRegs; ++
I) {
2000 if (
SrcOp.isReg()) {
2006 MIB.
addReg(
MI.getOperand(ArgOffset +
Intr->RsrcIndex).getReg());
2008 MIB.
addReg(
MI.getOperand(ArgOffset +
Intr->SampIndex).getReg());
2018 STI.hasFeature(AMDGPU::FeatureR128A16) ? -1 : 0);
2020 MIB.
addImm(IsA16 ? -1 : 0);
2033 MIB.
addImm(IsD16 ? -1 : 0);
2039 assert(VDataOut && !VDataIn);
2040 Register Tied =
MRI->cloneVirtualRegister(VDataOut);
2042 BuildMI(*
MBB, *MIB,
DL, TII.get(AMDGPU::V_MOV_B32_e32), Zero)
2049 BuildMI(*
MBB, *MIB,
DL, TII.get(AMDGPU::REG_SEQUENCE), Tied);
2050 for (
auto Sub : Parts)
2051 RegSeq.addReg(Zero).addImm(Sub);
2056 BuildMI(*
MBB, *MIB,
DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef);
2058 BuildMI(*
MBB, *MIB,
DL, TII.get(AMDGPU::REG_SEQUENCE), Tied);
2059 for (
auto Sub : Parts.drop_back(1))
2060 RegSeq.addReg(Undef).addImm(Sub);
2061 RegSeq.addReg(Zero).addImm(Parts.back());
2067 MI.eraseFromParent();
2075bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic(
2086 unsigned Offset =
MI.getOperand(6).getImm();
2088 auto MIB =
BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::DS_BVH_STACK_RTN_B32), Dst0)
2096 MI.eraseFromParent();
2100bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
2102 unsigned IntrinsicID = cast<GIntrinsic>(
I).getIntrinsicID();
2103 switch (IntrinsicID) {
2104 case Intrinsic::amdgcn_end_cf:
2105 return selectEndCfIntrinsic(
I);
2106 case Intrinsic::amdgcn_ds_ordered_add:
2107 case Intrinsic::amdgcn_ds_ordered_swap:
2108 return selectDSOrderedIntrinsic(
I, IntrinsicID);
2109 case Intrinsic::amdgcn_ds_gws_init:
2110 case Intrinsic::amdgcn_ds_gws_barrier:
2111 case Intrinsic::amdgcn_ds_gws_sema_v:
2112 case Intrinsic::amdgcn_ds_gws_sema_br:
2113 case Intrinsic::amdgcn_ds_gws_sema_p:
2114 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2115 return selectDSGWSIntrinsic(
I, IntrinsicID);
2116 case Intrinsic::amdgcn_ds_append:
2117 return selectDSAppendConsume(
I,
true);
2118 case Intrinsic::amdgcn_ds_consume:
2119 return selectDSAppendConsume(
I,
false);
2120 case Intrinsic::amdgcn_s_barrier:
2121 return selectSBarrier(
I);
2122 case Intrinsic::amdgcn_raw_buffer_load_lds:
2123 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
2124 case Intrinsic::amdgcn_struct_buffer_load_lds:
2125 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds:
2126 return selectBufferLoadLds(
I);
2127 case Intrinsic::amdgcn_global_load_lds:
2128 return selectGlobalLoadLds(
I);
2129 case Intrinsic::amdgcn_exp_compr:
2133 F,
"intrinsic not supported on subtarget",
I.getDebugLoc(),
DS_Error);
2134 F.getContext().diagnose(NoFpRet);
2138 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2139 return selectDSBvhStackIntrinsic(
I);
2144bool AMDGPUInstructionSelector::selectG_SELECT(
MachineInstr &
I)
const {
2151 Register DstReg =
I.getOperand(0).getReg();
2156 if (!isVCC(CCReg, *MRI)) {
2157 unsigned SelectOpcode =
Size == 64 ? AMDGPU::S_CSELECT_B64 :
2158 AMDGPU::S_CSELECT_B32;
2165 if (!
MRI->getRegClassOrNull(CCReg))
2168 .
add(
I.getOperand(2))
2169 .
add(
I.getOperand(3));
2174 I.eraseFromParent();
2183 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
2185 .
add(
I.getOperand(3))
2187 .
add(
I.getOperand(2))
2188 .
add(
I.getOperand(1));
2191 I.eraseFromParent();
2198 return AMDGPU::sub0;
2200 return AMDGPU::sub0_sub1;
2202 return AMDGPU::sub0_sub1_sub2;
2204 return AMDGPU::sub0_sub1_sub2_sub3;
2206 return AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7;
2209 return AMDGPU::sub0;
2216bool AMDGPUInstructionSelector::selectG_TRUNC(
MachineInstr &
I)
const {
2217 Register DstReg =
I.getOperand(0).getReg();
2218 Register SrcReg =
I.getOperand(1).getReg();
2219 const LLT DstTy =
MRI->getType(DstReg);
2220 const LLT SrcTy =
MRI->getType(SrcReg);
2235 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2244 if (!SrcRC || !DstRC)
2257 Register LoReg =
MRI->createVirtualRegister(DstRC);
2258 Register HiReg =
MRI->createVirtualRegister(DstRC);
2260 .
addReg(SrcReg, 0, AMDGPU::sub0);
2262 .
addReg(SrcReg, 0, AMDGPU::sub1);
2264 if (IsVALU && STI.
hasSDWA()) {
2268 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg)
2278 Register TmpReg0 =
MRI->createVirtualRegister(DstRC);
2279 Register TmpReg1 =
MRI->createVirtualRegister(DstRC);
2280 Register ImmReg =
MRI->createVirtualRegister(DstRC);
2282 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), TmpReg0)
2292 unsigned MovOpc = IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
2293 unsigned AndOpc = IsVALU ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
2294 unsigned OrOpc = IsVALU ? AMDGPU::V_OR_B32_e64 : AMDGPU::S_OR_B32;
2306 And.setOperandDead(3);
2307 Or.setOperandDead(3);
2311 I.eraseFromParent();
2320 if (SubRegIdx == -1)
2326 = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
2330 if (SrcWithSubRC != SrcRC) {
2335 I.getOperand(1).setSubReg(SubRegIdx);
2338 I.setDesc(TII.get(TargetOpcode::COPY));
2344 Mask = maskTrailingOnes<unsigned>(
Size);
2345 int SignedMask =
static_cast<int>(Mask);
2346 return SignedMask >= -16 && SignedMask <= 64;
2350const RegisterBank *AMDGPUInstructionSelector::getArtifactRegBank(
2363bool AMDGPUInstructionSelector::selectG_SZA_EXT(
MachineInstr &
I)
const {
2364 bool InReg =
I.getOpcode() == AMDGPU::G_SEXT_INREG;
2365 bool Signed =
I.getOpcode() == AMDGPU::G_SEXT || InReg;
2368 const Register DstReg =
I.getOperand(0).getReg();
2369 const Register SrcReg =
I.getOperand(1).getReg();
2371 const LLT DstTy =
MRI->getType(DstReg);
2372 const LLT SrcTy =
MRI->getType(SrcReg);
2373 const unsigned SrcSize =
I.getOpcode() == AMDGPU::G_SEXT_INREG ?
2380 const RegisterBank *SrcBank = getArtifactRegBank(SrcReg, *MRI, TRI);
2383 if (
I.getOpcode() == AMDGPU::G_ANYEXT) {
2385 return selectCOPY(
I);
2388 TRI.getRegClassForTypeOnBank(SrcTy, *SrcBank);
2391 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
2393 Register UndefReg =
MRI->createVirtualRegister(SrcRC);
2394 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2400 I.eraseFromParent();
2406 if (SrcBank->
getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
2416 I.eraseFromParent();
2420 const unsigned BFE =
Signed ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2426 I.eraseFromParent();
2430 if (SrcBank->
getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
2432 AMDGPU::SReg_64RegClass : AMDGPU::SReg_32RegClass;
2436 if (
Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
2437 const unsigned SextOpc = SrcSize == 8 ?
2438 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
2441 I.eraseFromParent();
2447 if (DstSize > 32 && SrcSize == 32) {
2448 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2449 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2464 I.eraseFromParent();
2469 const unsigned BFE64 =
Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
2470 const unsigned BFE32 =
Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2473 if (DstSize > 32 && (SrcSize <= 32 || InReg)) {
2475 Register ExtReg =
MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
2476 Register UndefReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2477 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2479 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2490 I.eraseFromParent();
2506 I.eraseFromParent();
2524bool AMDGPUInstructionSelector::selectG_FPEXT(
MachineInstr &
I)
const {
2528 Register Dst =
I.getOperand(0).getReg();
2530 if (DstRB->
getID() != AMDGPU::SGPRRegBankID)
2533 Register Src =
I.getOperand(1).getReg();
2539 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_CVT_HI_F32_F16), Dst)
2541 I.eraseFromParent();
2549bool AMDGPUInstructionSelector::selectG_CONSTANT(
MachineInstr &
I)
const {
2552 Register DstReg =
I.getOperand(0).getReg();
2553 unsigned Size =
MRI->getType(DstReg).getSizeInBits();
2559 }
else if (ImmOp.
isCImm()) {
2566 const bool IsSgpr = DstRB->
getID() == AMDGPU::SGPRRegBankID;
2569 if (DstRB->
getID() == AMDGPU::VCCRegBankID) {
2570 Opcode = STI.
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
2572 Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
2582 I.setDesc(TII.get(Opcode));
2583 I.addImplicitDefUseOperands(*
MF);
2593 ResInst =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_MOV_B64), DstReg)
2594 .
addImm(
I.getOperand(1).getImm());
2597 &AMDGPU::SReg_32RegClass : &AMDGPU::VGPR_32RegClass;
2607 ResInst =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
2616 I.eraseFromParent();
2618 TRI.getConstrainedRegClassForOperand(ResInst->
getOperand(0), *MRI);
2624bool AMDGPUInstructionSelector::selectG_FNEG(
MachineInstr &
MI)
const {
2638 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2653 Register LoReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2654 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2655 Register ConstReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2656 Register OpReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2658 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2659 .
addReg(Src, 0, AMDGPU::sub0);
2660 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2661 .
addReg(Src, 0, AMDGPU::sub1);
2662 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2666 unsigned Opc = Fabs ? AMDGPU::S_OR_B32 : AMDGPU::S_XOR_B32;
2671 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2676 MI.eraseFromParent();
2681bool AMDGPUInstructionSelector::selectG_FABS(
MachineInstr &
MI)
const {
2684 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2691 Register LoReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2692 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2693 Register ConstReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2694 Register OpReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2700 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2701 .
addReg(Src, 0, AMDGPU::sub0);
2702 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2703 .
addReg(Src, 0, AMDGPU::sub1);
2704 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2709 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_AND_B32), OpReg)
2713 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2719 MI.eraseFromParent();
2724 return MI.getOpcode() == TargetOpcode::G_CONSTANT;
2727void AMDGPUInstructionSelector::getAddrModeInfo(
const MachineInstr &Load,
2734 if (PtrMI->
getOpcode() != TargetOpcode::G_PTR_ADD)
2739 for (
unsigned i = 1; i != 3; ++i) {
2746 assert(GEPInfo.Imm == 0);
2751 if (OpBank->
getID() == AMDGPU::SGPRRegBankID)
2752 GEPInfo.SgprParts.push_back(GEPOp.
getReg());
2754 GEPInfo.VgprParts.push_back(GEPOp.
getReg());
2758 getAddrModeInfo(*PtrMI, MRI, AddrInfo);
2761bool AMDGPUInstructionSelector::isSGPR(
Register Reg)
const {
2762 return RBI.
getRegBank(Reg, *MRI, TRI)->
getID() == AMDGPU::SGPRRegBankID;
2765bool AMDGPUInstructionSelector::isInstrUniform(
const MachineInstr &
MI)
const {
2766 if (!
MI.hasOneMemOperand())
2776 if (!
Ptr || isa<UndefValue>(
Ptr) || isa<Argument>(
Ptr) ||
2777 isa<Constant>(
Ptr) || isa<GlobalValue>(
Ptr))
2784 return I &&
I->getMetadata(
"amdgpu.uniform");
2788 for (
const GEPInfo &GEPInfo : AddrInfo) {
2789 if (!GEPInfo.VgprParts.empty())
2795void AMDGPUInstructionSelector::initM0(
MachineInstr &
I)
const {
2796 const LLT PtrTy =
MRI->getType(
I.getOperand(1).getReg());
2803 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2808bool AMDGPUInstructionSelector::selectG_LOAD_STORE_ATOMICRMW(
2815 if (Reg.isPhysical())
2819 const unsigned Opcode =
MI.getOpcode();
2821 if (Opcode == AMDGPU::COPY)
2824 if (Opcode == AMDGPU::G_AND || Opcode == AMDGPU::G_OR ||
2825 Opcode == AMDGPU::G_XOR)
2829 if (
auto *GI = dyn_cast<GIntrinsic>(&
MI))
2830 return GI->is(Intrinsic::amdgcn_class);
2832 return Opcode == AMDGPU::G_ICMP || Opcode == AMDGPU::G_FCMP;
2835bool AMDGPUInstructionSelector::selectG_BRCOND(
MachineInstr &
I)
const {
2850 if (!isVCC(CondReg, *MRI)) {
2854 CondPhysReg = AMDGPU::SCC;
2855 BrOpcode = AMDGPU::S_CBRANCH_SCC1;
2856 ConstrainRC = &AMDGPU::SReg_32RegClass;
2864 const unsigned Opcode = Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
2865 const Register Exec = Is64 ? AMDGPU::EXEC : AMDGPU::EXEC_LO;
2868 BuildMI(*BB, &
I,
DL, TII.get(Opcode), TmpReg)
2875 CondPhysReg =
TRI.getVCC();
2876 BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
2877 ConstrainRC =
TRI.getBoolRC();
2880 if (!
MRI->getRegClassOrNull(CondReg))
2881 MRI->setRegClass(CondReg, ConstrainRC);
2883 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CondPhysReg)
2886 .
addMBB(
I.getOperand(1).getMBB());
2888 I.eraseFromParent();
2892bool AMDGPUInstructionSelector::selectG_GLOBAL_VALUE(
2894 Register DstReg =
I.getOperand(0).getReg();
2896 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2897 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32));
2902 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, *MRI);
2905bool AMDGPUInstructionSelector::selectG_PTRMASK(
MachineInstr &
I)
const {
2906 Register DstReg =
I.getOperand(0).getReg();
2907 Register SrcReg =
I.getOperand(1).getReg();
2908 Register MaskReg =
I.getOperand(2).getReg();
2909 LLT Ty =
MRI->getType(DstReg);
2910 LLT MaskTy =
MRI->getType(MaskReg);
2917 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2927 const bool CanCopyLow32 = (MaskOnes & MaskLo32) == MaskLo32;
2928 const bool CanCopyHi32 = (MaskOnes & MaskHi32) == MaskHi32;
2931 !CanCopyLow32 && !CanCopyHi32) {
2932 auto MIB =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_AND_B64), DstReg)
2936 I.eraseFromParent();
2940 unsigned NewOpc = IsVGPR ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
2942 = IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
2947 TRI.getRegClassForTypeOnBank(MaskTy, *MaskRB);
2956 "ptrmask should have been narrowed during legalize");
2958 auto NewOp =
BuildMI(*BB, &
I,
DL, TII.get(NewOpc), DstReg)
2964 I.eraseFromParent();
2968 Register HiReg =
MRI->createVirtualRegister(&RegRC);
2969 Register LoReg =
MRI->createVirtualRegister(&RegRC);
2972 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), LoReg)
2973 .
addReg(SrcReg, 0, AMDGPU::sub0);
2974 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), HiReg)
2975 .
addReg(SrcReg, 0, AMDGPU::sub1);
2984 Register MaskLo =
MRI->createVirtualRegister(&RegRC);
2985 MaskedLo =
MRI->createVirtualRegister(&RegRC);
2987 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskLo)
2988 .
addReg(MaskReg, 0, AMDGPU::sub0);
2989 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedLo)
2998 Register MaskHi =
MRI->createVirtualRegister(&RegRC);
2999 MaskedHi =
MRI->createVirtualRegister(&RegRC);
3001 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskHi)
3002 .
addReg(MaskReg, 0, AMDGPU::sub1);
3003 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedHi)
3008 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
3013 I.eraseFromParent();
3019static std::pair<Register, unsigned>
3026 std::tie(IdxBaseReg,
Offset) =
3028 if (IdxBaseReg == AMDGPU::NoRegister) {
3032 IdxBaseReg = IdxReg;
3039 if (
static_cast<unsigned>(
Offset) >= SubRegs.
size())
3040 return std::pair(IdxReg, SubRegs[0]);
3041 return std::pair(IdxBaseReg, SubRegs[
Offset]);
3044bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT(
3050 LLT DstTy =
MRI->getType(DstReg);
3051 LLT SrcTy =
MRI->getType(SrcReg);
3059 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3063 TRI.getRegClassForTypeOnBank(SrcTy, *SrcRB);
3065 TRI.getRegClassForTypeOnBank(DstTy, *DstRB);
3066 if (!SrcRC || !DstRC)
3081 if (SrcRB->
getID() == AMDGPU::SGPRRegBankID) {
3085 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3088 unsigned Opc = Is64 ? AMDGPU::S_MOVRELS_B64 : AMDGPU::S_MOVRELS_B32;
3092 MI.eraseFromParent();
3100 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3102 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::V_MOVRELS_B32_e32), DstReg)
3105 MI.eraseFromParent();
3116 MI.eraseFromParent();
3121bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT(
3128 LLT VecTy =
MRI->getType(DstReg);
3129 LLT ValTy =
MRI->getType(ValReg);
3141 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3145 TRI.getRegClassForTypeOnBank(VecTy, *VecRB);
3147 TRI.getRegClassForTypeOnBank(ValTy, *ValRB);
3155 if (VecRB->
getID() == AMDGPU::VGPRRegBankID && ValSize != 32)
3159 std::tie(IdxReg,
SubReg) =
3162 const bool IndexMode = VecRB->
getID() == AMDGPU::VGPRRegBankID &&
3169 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3173 VecSize, ValSize, VecRB->
getID() == AMDGPU::SGPRRegBankID);
3178 MI.eraseFromParent();
3190 MI.eraseFromParent();
3194bool AMDGPUInstructionSelector::selectBufferLoadLds(
MachineInstr &
MI)
const {
3196 unsigned Size =
MI.getOperand(3).getImm();
3199 const bool HasVIndex =
MI.getNumOperands() == 9;
3203 VIndex =
MI.getOperand(4).getReg();
3207 Register VOffset =
MI.getOperand(4 + OpOffset).getReg();
3208 std::optional<ValueAndVReg> MaybeVOffset =
3210 const bool HasVOffset = !MaybeVOffset || MaybeVOffset->Value.getZExtValue();
3216 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_BOTHEN
3217 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_IDXEN
3218 : HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFEN
3219 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFSET;
3222 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_BOTHEN
3223 : AMDGPU::BUFFER_LOAD_USHORT_LDS_IDXEN
3224 : HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFEN
3225 : AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFSET;
3228 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_BOTHEN
3229 : AMDGPU::BUFFER_LOAD_DWORD_LDS_IDXEN
3230 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN
3231 : AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET;
3238 .
add(
MI.getOperand(2));
3242 if (HasVIndex && HasVOffset) {
3243 Register IdxReg =
MRI->createVirtualRegister(
TRI.getVGPR64Class());
3244 BuildMI(*
MBB, &*MIB,
DL, TII.get(AMDGPU::REG_SEQUENCE), IdxReg)
3251 }
else if (HasVIndex) {
3253 }
else if (HasVOffset) {
3257 MIB.
add(
MI.getOperand(1));
3258 MIB.
add(
MI.getOperand(5 + OpOffset));
3259 MIB.
add(
MI.getOperand(6 + OpOffset));
3260 unsigned Aux =
MI.getOperand(7 + OpOffset).getImm();
3262 MIB.
addImm((Aux >> 3) & 1);
3266 LoadPtrI.
Offset =
MI.getOperand(6 + OpOffset).getImm();
3268 StorePtrI.
V =
nullptr;
3282 MI.eraseFromParent();
3294 if (Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3297 assert(Def->getNumOperands() == 3 &&
3300 return Def->getOperand(1).getReg();
3306bool AMDGPUInstructionSelector::selectGlobalLoadLds(
MachineInstr &
MI)
const{
3308 unsigned Size =
MI.getOperand(3).getImm();
3314 Opc = AMDGPU::GLOBAL_LOAD_LDS_UBYTE;
3317 Opc = AMDGPU::GLOBAL_LOAD_LDS_USHORT;
3320 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORD;
3327 .
add(
MI.getOperand(2));
3333 if (!isSGPR(
Addr)) {
3335 if (isSGPR(AddrDef->Reg)) {
3336 Addr = AddrDef->Reg;
3337 }
else if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
3340 if (isSGPR(SAddr)) {
3341 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
3353 VOffset =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3365 MIB.
add(
MI.getOperand(4))
3366 .
add(
MI.getOperand(5));
3370 LoadPtrI.
Offset =
MI.getOperand(4).getImm();
3380 sizeof(int32_t),
Align(4));
3384 MI.eraseFromParent();
3388bool AMDGPUInstructionSelector::selectBVHIntrinsic(
MachineInstr &
MI)
const{
3389 MI.setDesc(TII.get(
MI.getOperand(1).getImm()));
3390 MI.removeOperand(1);
3391 MI.addImplicitDefUseOperands(*
MI.getParent()->getParent());
3395bool AMDGPUInstructionSelector::selectSMFMACIntrin(
MachineInstr &
MI)
const {
3398 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
3399 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_F16_e64;
3401 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
3402 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_F16_e64;
3404 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
3405 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_BF16_e64;
3407 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
3408 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_BF16_e64;
3410 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
3411 Opc = AMDGPU::V_SMFMAC_I32_16X16X64_I8_e64;
3413 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
3414 Opc = AMDGPU::V_SMFMAC_I32_32X32X32_I8_e64;
3416 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
3417 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_BF8_e64;
3419 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
3420 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_FP8_e64;
3422 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
3423 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_BF8_e64;
3425 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
3426 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_FP8_e64;
3428 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
3429 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_BF8_e64;
3431 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
3432 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_FP8_e64;
3434 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
3435 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_BF8_e64;
3437 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
3438 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_FP8_e64;
3444 auto VDst_In =
MI.getOperand(4);
3446 MI.setDesc(TII.get(Opc));
3447 MI.removeOperand(4);
3448 MI.removeOperand(1);
3449 MI.addOperand(VDst_In);
3450 MI.addImplicitDefUseOperands(*
MI.getParent()->getParent());
3454bool AMDGPUInstructionSelector::selectWaveAddress(
MachineInstr &
MI)
const {
3458 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3463 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_LSHRREV_B32_e64), DstReg)
3474 IsVALU ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
3478 MI.eraseFromParent();
3482bool AMDGPUInstructionSelector::selectStackRestore(
MachineInstr &
MI)
const {
3495 WaveAddr =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
3505 MI.eraseFromParent();
3511 return selectPHI(
I);
3513 if (!
I.isPreISelOpcode()) {
3515 return selectCOPY(
I);
3519 switch (
I.getOpcode()) {
3520 case TargetOpcode::G_AND:
3521 case TargetOpcode::G_OR:
3522 case TargetOpcode::G_XOR:
3525 return selectG_AND_OR_XOR(
I);
3526 case TargetOpcode::G_ADD:
3527 case TargetOpcode::G_SUB:
3530 return selectG_ADD_SUB(
I);
3531 case TargetOpcode::G_UADDO:
3532 case TargetOpcode::G_USUBO:
3533 case TargetOpcode::G_UADDE:
3534 case TargetOpcode::G_USUBE:
3535 return selectG_UADDO_USUBO_UADDE_USUBE(
I);
3536 case AMDGPU::G_AMDGPU_MAD_U64_U32:
3537 case AMDGPU::G_AMDGPU_MAD_I64_I32:
3538 return selectG_AMDGPU_MAD_64_32(
I);
3539 case TargetOpcode::G_INTTOPTR:
3540 case TargetOpcode::G_BITCAST:
3541 case TargetOpcode::G_PTRTOINT:
3542 return selectCOPY(
I);
3543 case TargetOpcode::G_CONSTANT:
3544 case TargetOpcode::G_FCONSTANT:
3545 return selectG_CONSTANT(
I);
3546 case TargetOpcode::G_FNEG:
3549 return selectG_FNEG(
I);
3550 case TargetOpcode::G_FABS:
3553 return selectG_FABS(
I);
3554 case TargetOpcode::G_EXTRACT:
3555 return selectG_EXTRACT(
I);
3556 case TargetOpcode::G_MERGE_VALUES:
3557 case TargetOpcode::G_CONCAT_VECTORS:
3558 return selectG_MERGE_VALUES(
I);
3559 case TargetOpcode::G_UNMERGE_VALUES:
3560 return selectG_UNMERGE_VALUES(
I);
3561 case TargetOpcode::G_BUILD_VECTOR:
3562 case TargetOpcode::G_BUILD_VECTOR_TRUNC:
3563 return selectG_BUILD_VECTOR(
I);
3564 case TargetOpcode::G_PTR_ADD:
3567 return selectG_PTR_ADD(
I);
3568 case TargetOpcode::G_IMPLICIT_DEF:
3569 return selectG_IMPLICIT_DEF(
I);
3570 case TargetOpcode::G_FREEZE:
3571 return selectCOPY(
I);
3572 case TargetOpcode::G_INSERT:
3573 return selectG_INSERT(
I);
3574 case TargetOpcode::G_INTRINSIC:
3575 case TargetOpcode::G_INTRINSIC_CONVERGENT:
3576 return selectG_INTRINSIC(
I);
3577 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3578 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
3579 return selectG_INTRINSIC_W_SIDE_EFFECTS(
I);
3580 case TargetOpcode::G_ICMP:
3581 case TargetOpcode::G_FCMP:
3582 if (selectG_ICMP_or_FCMP(
I))
3585 case TargetOpcode::G_LOAD:
3586 case TargetOpcode::G_STORE:
3587 case TargetOpcode::G_ATOMIC_CMPXCHG:
3588 case TargetOpcode::G_ATOMICRMW_XCHG:
3589 case TargetOpcode::G_ATOMICRMW_ADD:
3590 case TargetOpcode::G_ATOMICRMW_SUB:
3591 case TargetOpcode::G_ATOMICRMW_AND:
3592 case TargetOpcode::G_ATOMICRMW_OR:
3593 case TargetOpcode::G_ATOMICRMW_XOR:
3594 case TargetOpcode::G_ATOMICRMW_MIN:
3595 case TargetOpcode::G_ATOMICRMW_MAX:
3596 case TargetOpcode::G_ATOMICRMW_UMIN:
3597 case TargetOpcode::G_ATOMICRMW_UMAX:
3598 case TargetOpcode::G_ATOMICRMW_UINC_WRAP:
3599 case TargetOpcode::G_ATOMICRMW_UDEC_WRAP:
3600 case TargetOpcode::G_ATOMICRMW_FADD:
3601 case AMDGPU::G_AMDGPU_ATOMIC_FMIN:
3602 case AMDGPU::G_AMDGPU_ATOMIC_FMAX:
3603 return selectG_LOAD_STORE_ATOMICRMW(
I);
3604 case TargetOpcode::G_SELECT:
3605 return selectG_SELECT(
I);
3606 case TargetOpcode::G_TRUNC:
3607 return selectG_TRUNC(
I);
3608 case TargetOpcode::G_SEXT:
3609 case TargetOpcode::G_ZEXT:
3610 case TargetOpcode::G_ANYEXT:
3611 case TargetOpcode::G_SEXT_INREG:
3618 return selectG_SZA_EXT(
I);
3619 case TargetOpcode::G_FPEXT:
3620 if (selectG_FPEXT(
I))
3623 case TargetOpcode::G_BRCOND:
3624 return selectG_BRCOND(
I);
3625 case TargetOpcode::G_GLOBAL_VALUE:
3626 return selectG_GLOBAL_VALUE(
I);
3627 case TargetOpcode::G_PTRMASK:
3628 return selectG_PTRMASK(
I);
3629 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3630 return selectG_EXTRACT_VECTOR_ELT(
I);
3631 case TargetOpcode::G_INSERT_VECTOR_ELT:
3632 return selectG_INSERT_VECTOR_ELT(
I);
3633 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
3634 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
3635 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
3636 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
3639 assert(
Intr &&
"not an image intrinsic with image pseudo");
3640 return selectImageIntrinsic(
I,
Intr);
3642 case AMDGPU::G_AMDGPU_INTRIN_BVH_INTERSECT_RAY:
3643 return selectBVHIntrinsic(
I);
3644 case AMDGPU::G_SBFX:
3645 case AMDGPU::G_UBFX:
3646 return selectG_SBFX_UBFX(
I);
3647 case AMDGPU::G_SI_CALL:
3648 I.setDesc(TII.get(AMDGPU::SI_CALL));
3650 case AMDGPU::G_AMDGPU_WAVE_ADDRESS:
3651 return selectWaveAddress(
I);
3652 case AMDGPU::G_STACKRESTORE:
3653 return selectStackRestore(
I);
3661AMDGPUInstructionSelector::selectVCSRC(
MachineOperand &Root)
const {
3668std::pair<Register, unsigned>
3669AMDGPUInstructionSelector::selectVOP3ModsImpl(
MachineOperand &Root,
3670 bool IsCanonicalizing,
3671 bool AllowAbs,
bool OpSel)
const {
3676 if (
MI->getOpcode() == AMDGPU::G_FNEG) {
3677 Src =
MI->getOperand(1).getReg();
3680 }
else if (
MI->getOpcode() == AMDGPU::G_FSUB && IsCanonicalizing) {
3685 if (LHS &&
LHS->isZero()) {
3687 Src =
MI->getOperand(2).getReg();
3691 if (AllowAbs &&
MI->getOpcode() == AMDGPU::G_FABS) {
3692 Src =
MI->getOperand(1).getReg();
3699 return std::pair(Src, Mods);
3702Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
3704 bool ForceVGPR)
const {
3705 if ((Mods != 0 || ForceVGPR) &&
3713 TII.get(AMDGPU::COPY), VGPRSrc)
3725AMDGPUInstructionSelector::selectVSRC0(
MachineOperand &Root)
const {
3732AMDGPUInstructionSelector::selectVOP3Mods0(
MachineOperand &Root)
const {
3735 std::tie(Src, Mods) = selectVOP3ModsImpl(Root);
3739 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
3748AMDGPUInstructionSelector::selectVOP3BMods0(
MachineOperand &Root)
const {
3751 std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
3757 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
3766AMDGPUInstructionSelector::selectVOP3OMods(
MachineOperand &Root)
const {
3775AMDGPUInstructionSelector::selectVOP3Mods(
MachineOperand &Root)
const {
3778 std::tie(Src, Mods) = selectVOP3ModsImpl(Root);
3782 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
3789AMDGPUInstructionSelector::selectVOP3ModsNonCanonicalizing(
3793 std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
false);
3797 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
3804AMDGPUInstructionSelector::selectVOP3BMods(
MachineOperand &Root)
const {
3807 std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
true,
3812 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
3819AMDGPUInstructionSelector::selectVOP3NoMods(
MachineOperand &Root)
const {
3822 if (
Def->getOpcode() == AMDGPU::G_FNEG ||
Def->getOpcode() == AMDGPU::G_FABS)
3829std::pair<Register, unsigned>
3830AMDGPUInstructionSelector::selectVOP3PModsImpl(
3835 if (
MI &&
MI->getOpcode() == AMDGPU::G_FNEG &&
3840 Src =
MI->getOperand(1).getReg();
3841 MI =
MRI.getVRegDef(Src);
3852 return std::pair(Src, Mods);
3856AMDGPUInstructionSelector::selectVOP3PMods(
MachineOperand &Root)
const {
3862 std::tie(Src, Mods) = selectVOP3PModsImpl(Root.
getReg(), MRI);
3871AMDGPUInstructionSelector::selectVOP3PModsDOT(
MachineOperand &Root)
const {
3877 std::tie(Src, Mods) = selectVOP3PModsImpl(Root.
getReg(), MRI,
true);
3886AMDGPUInstructionSelector::selectDotIUVOP3PMods(
MachineOperand &Root)
const {
3891 "expected i1 value");
3901AMDGPUInstructionSelector::selectWMMAOpSelVOP3PMods(
3904 "expected i1 value");
3915AMDGPUInstructionSelector::selectVOP3OpSelMods(
MachineOperand &Root)
const {
3918 std::tie(Src, Mods) = selectVOP3ModsImpl(Root);
3928AMDGPUInstructionSelector::selectVINTERPMods(
MachineOperand &Root)
const {
3931 std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
3939 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
3946AMDGPUInstructionSelector::selectVINTERPModsHi(
MachineOperand &Root)
const {
3949 std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
3957 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
3963bool AMDGPUInstructionSelector::selectSmrdOffset(
MachineOperand &Root,
3973 getAddrModeInfo(*
MI, *MRI, AddrInfo);
3975 if (AddrInfo.
empty())
3978 const GEPInfo &GEPI = AddrInfo[0];
3979 std::optional<int64_t> EncodedImm =
3983 if (GEPI.SgprParts.size() == 1 && GEPI.Imm != 0 && EncodedImm &&
3984 AddrInfo.
size() > 1) {
3985 const GEPInfo &GEPI2 = AddrInfo[1];
3986 if (GEPI2.SgprParts.size() == 2 && GEPI2.Imm == 0) {
3989 Base = GEPI2.SgprParts[0];
3990 *SOffset = OffsetReg;
3999 if (
Offset && GEPI.SgprParts.size() == 1 && EncodedImm) {
4000 Base = GEPI.SgprParts[0];
4006 if (SOffset && GEPI.SgprParts.size() == 1 && isUInt<32>(GEPI.Imm) &&
4012 Base = GEPI.SgprParts[0];
4013 *SOffset =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
4014 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), *SOffset)
4019 if (SOffset && GEPI.SgprParts.size() && GEPI.Imm == 0) {
4021 Base = GEPI.SgprParts[0];
4022 *SOffset = OffsetReg;
4031AMDGPUInstructionSelector::selectSmrdImm(
MachineOperand &Root)
const {
4034 if (!selectSmrdOffset(Root,
Base,
nullptr, &
Offset))
4035 return std::nullopt;
4042AMDGPUInstructionSelector::selectSmrdImm32(
MachineOperand &Root)
const {
4044 getAddrModeInfo(*Root.
getParent(), *MRI, AddrInfo);
4046 if (AddrInfo.
empty() || AddrInfo[0].SgprParts.size() != 1)
4047 return std::nullopt;
4049 const GEPInfo &GEPInfo = AddrInfo[0];
4050 Register PtrReg = GEPInfo.SgprParts[0];
4051 std::optional<int64_t> EncodedImm =
4054 return std::nullopt;
4063AMDGPUInstructionSelector::selectSmrdSgpr(
MachineOperand &Root)
const {
4065 if (!selectSmrdOffset(Root,
Base, &SOffset,
nullptr))
4066 return std::nullopt;
4073AMDGPUInstructionSelector::selectSmrdSgprImm(
MachineOperand &Root)
const {
4076 if (!selectSmrdOffset(Root,
Base, &SOffset, &
Offset))
4077 return std::nullopt;
4084std::pair<Register, int>
4085AMDGPUInstructionSelector::selectFlatOffsetImpl(
MachineOperand &Root,
4095 int64_t ConstOffset;
4096 std::tie(PtrBase, ConstOffset) =
4097 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
4098 if (ConstOffset == 0 || !isFlatScratchBaseLegal(PtrBase, FlatVariant))
4105 return std::pair(PtrBase, ConstOffset);
4109AMDGPUInstructionSelector::selectFlatOffset(
MachineOperand &Root)
const {
4119AMDGPUInstructionSelector::selectGlobalOffset(
MachineOperand &Root)
const {
4129AMDGPUInstructionSelector::selectScratchOffset(
MachineOperand &Root)
const {
4140AMDGPUInstructionSelector::selectGlobalSAddr(
MachineOperand &Root)
const {
4143 int64_t ConstOffset;
4144 int64_t ImmOffset = 0;
4148 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(
Addr, *MRI);
4150 if (ConstOffset != 0) {
4154 ImmOffset = ConstOffset;
4157 if (isSGPR(PtrBaseDef->Reg)) {
4158 if (ConstOffset > 0) {
4164 int64_t SplitImmOffset, RemainderOffset;
4168 if (isUInt<32>(RemainderOffset)) {
4172 MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4174 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
4176 .
addImm(RemainderOffset);
4193 unsigned NumLiterals =
4197 return std::nullopt;
4204 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
4209 if (isSGPR(SAddr)) {
4210 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
4230 if (AddrDef->MI->getOpcode() == AMDGPU::G_IMPLICIT_DEF ||
4231 AddrDef->MI->getOpcode() == AMDGPU::G_CONSTANT || !isSGPR(AddrDef->Reg))
4232 return std::nullopt;
4238 Register VOffset =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4240 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32), VOffset)
4251AMDGPUInstructionSelector::selectScratchSAddr(
MachineOperand &Root)
const {
4254 int64_t ConstOffset;
4255 int64_t ImmOffset = 0;
4259 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(
Addr, *MRI);
4261 if (ConstOffset != 0 && isFlatScratchBaseLegal(PtrBase) &&
4265 ImmOffset = ConstOffset;
4269 if (AddrDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
4270 int FI = AddrDef->MI->getOperand(1).getIndex();
4279 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
4280 Register LHS = AddrDef->MI->getOperand(1).getReg();
4281 Register RHS = AddrDef->MI->getOperand(2).getReg();
4285 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX &&
4286 isSGPR(RHSDef->Reg)) {
4287 int FI = LHSDef->MI->getOperand(1).getIndex();
4291 SAddr =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
4293 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_I32), SAddr)
4301 return std::nullopt;
4310bool AMDGPUInstructionSelector::checkFlatScratchSVSSwizzleBug(
4322 uint64_t VMax = VKnown.getMaxValue().getZExtValue();
4324 return (VMax & 3) + (
SMax & 3) >= 4;
4328AMDGPUInstructionSelector::selectScratchSVAddr(
MachineOperand &Root)
const {
4331 int64_t ConstOffset;
4332 int64_t ImmOffset = 0;
4336 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(
Addr, *MRI);
4338 if (ConstOffset != 0 &&
4341 ImmOffset = ConstOffset;
4345 if (AddrDef->MI->getOpcode() != AMDGPU::G_PTR_ADD)
4346 return std::nullopt;
4348 Register RHS = AddrDef->MI->getOperand(2).getReg();
4350 return std::nullopt;
4352 Register LHS = AddrDef->MI->getOperand(1).getReg();
4355 if (!isFlatScratchBaseLegal(LHS) || !isFlatScratchBaseLegal(RHS))
4356 return std::nullopt;
4358 if (checkFlatScratchSVSSwizzleBug(RHS, LHS, ImmOffset))
4359 return std::nullopt;
4361 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
4362 int FI = LHSDef->MI->getOperand(1).getIndex();
4371 return std::nullopt;
4381AMDGPUInstructionSelector::selectMUBUFScratchOffen(
MachineOperand &Root)
const {
4390 Register HighBits =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4395 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
4419 std::optional<int> FI;
4423 int64_t ConstOffset;
4424 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(VAddr, *MRI);
4425 if (ConstOffset != 0) {
4430 if (PtrBaseDef->
getOpcode() == AMDGPU::G_FRAME_INDEX)
4436 }
else if (RootDef->getOpcode() == AMDGPU::G_FRAME_INDEX) {
4437 FI = RootDef->getOperand(1).getIndex();
4460bool AMDGPUInstructionSelector::isDSOffsetLegal(
Register Base,
4473bool AMDGPUInstructionSelector::isDSOffset2Legal(
Register Base, int64_t Offset0,
4475 unsigned Size)
const {
4476 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
4478 if (!isUInt<8>(Offset0 /
Size) || !isUInt<8>(Offset1 /
Size))
4489bool AMDGPUInstructionSelector::isFlatScratchBaseLegal(
4499bool AMDGPUInstructionSelector::isUnneededShiftMask(
const MachineInstr &
MI,
4500 unsigned ShAmtBits)
const {
4501 assert(
MI.getOpcode() == TargetOpcode::G_AND);
4503 std::optional<APInt>
RHS =
4508 if (
RHS->countr_one() >= ShAmtBits)
4512 return (LHSKnownZeros | *RHS).
countr_one() >= ShAmtBits;
4516AMDGPUInstructionSelector::selectMUBUFScratchOffset(
4521 std::optional<DefinitionAndSourceRegister>
Def =
4523 assert(Def &&
"this shouldn't be an optional result");
4578std::pair<Register, unsigned>
4579AMDGPUInstructionSelector::selectDS1Addr1OffsetImpl(
MachineOperand &Root)
const {
4582 return std::pair(Root.
getReg(), 0);
4584 int64_t ConstAddr = 0;
4588 std::tie(PtrBase,
Offset) =
4589 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
4592 if (isDSOffsetLegal(PtrBase,
Offset)) {
4594 return std::pair(PtrBase,
Offset);
4596 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
4605 return std::pair(Root.
getReg(), 0);
4609AMDGPUInstructionSelector::selectDS1Addr1Offset(
MachineOperand &Root)
const {
4612 std::tie(Reg,
Offset) = selectDS1Addr1OffsetImpl(Root);
4620AMDGPUInstructionSelector::selectDS64Bit4ByteAligned(
MachineOperand &Root)
const {
4621 return selectDSReadWrite2(Root, 4);
4625AMDGPUInstructionSelector::selectDS128Bit8ByteAligned(
MachineOperand &Root)
const {
4626 return selectDSReadWrite2(Root, 8);
4630AMDGPUInstructionSelector::selectDSReadWrite2(
MachineOperand &Root,
4631 unsigned Size)
const {
4634 std::tie(Reg,
Offset) = selectDSReadWrite2Impl(Root,
Size);
4642std::pair<Register, unsigned>
4643AMDGPUInstructionSelector::selectDSReadWrite2Impl(
MachineOperand &Root,
4644 unsigned Size)
const {
4647 return std::pair(Root.
getReg(), 0);
4649 int64_t ConstAddr = 0;
4653 std::tie(PtrBase,
Offset) =
4654 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
4657 int64_t OffsetValue0 =
Offset;
4659 if (isDSOffset2Legal(PtrBase, OffsetValue0, OffsetValue1,
Size)) {
4661 return std::pair(PtrBase, OffsetValue0 /
Size);