28#include "llvm/IR/IntrinsicsAMDGPU.h"
31#define DEBUG_TYPE "amdgpu-isel"
34using namespace MIPatternMatch;
37 "amdgpu-global-isel-risky-select",
38 cl::desc(
"Allow GlobalISel to select cases that are likely to not work yet"),
42#define GET_GLOBALISEL_IMPL
43#define AMDGPUSubtarget GCNSubtarget
44#include "AMDGPUGenGlobalISel.inc"
45#undef GET_GLOBALISEL_IMPL
51 :
TII(*STI.getInstrInfo()),
TRI(*STI.getRegisterInfo()), RBI(RBI),
TM(
TM),
55#include
"AMDGPUGenGlobalISel.inc"
58#include
"AMDGPUGenGlobalISel.inc"
74bool AMDGPUInstructionSelector::isVCC(
Register Reg,
80 auto &RegClassOrBank =
MRI.getRegClassOrRegBank(Reg);
84 const LLT Ty =
MRI.getType(Reg);
88 return MRI.getVRegDef(Reg)->getOpcode() != AMDGPU::G_TRUNC &&
93 return RB->
getID() == AMDGPU::VCCRegBankID;
96bool AMDGPUInstructionSelector::constrainCopyLikeIntrin(
MachineInstr &
MI,
97 unsigned NewOpc)
const {
98 MI.setDesc(TII.get(NewOpc));
113 if (!DstRC || DstRC != SrcRC)
120bool AMDGPUInstructionSelector::selectCOPY(
MachineInstr &
I)
const {
123 I.setDesc(TII.get(TargetOpcode::COPY));
130 if (isVCC(DstReg, *MRI)) {
131 if (SrcReg == AMDGPU::SCC) {
139 if (!isVCC(SrcReg, *MRI)) {
147 std::optional<ValueAndVReg> ConstVal =
151 STI.
isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
153 .
addImm(ConstVal->Value.getBoolValue() ? -1 : 0);
155 Register MaskedReg =
MRI->createVirtualRegister(SrcRC);
162 TRI.
isSGPRClass(SrcRC) ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32;
163 BuildMI(*BB, &
I,
DL, TII.get(AndOpc), MaskedReg)
166 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
171 if (!
MRI->getRegClassOrNull(SrcReg))
172 MRI->setRegClass(SrcReg, SrcRC);
186 if (MO.getReg().isPhysical())
198bool AMDGPUInstructionSelector::selectPHI(
MachineInstr &
I)
const {
199 const Register DefReg =
I.getOperand(0).getReg();
200 const LLT DefTy =
MRI->getType(DefReg);
213 MRI->getRegClassOrRegBank(DefReg);
232 I.setDesc(TII.get(TargetOpcode::PHI));
239 unsigned SubIdx)
const {
243 Register DstReg =
MRI->createVirtualRegister(&SubRC);
246 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.
getSubReg(), SubIdx);
248 BuildMI(*BB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
249 .
addReg(Reg, 0, ComposedSubIdx);
274 return Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
276 return Is64 ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32;
278 return Is64 ? AMDGPU::S_XOR_B64 : AMDGPU::S_XOR_B32;
284bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(
MachineInstr &
I)
const {
285 Register DstReg =
I.getOperand(0).getReg();
289 if (DstRB->
getID() != AMDGPU::SGPRRegBankID &&
290 DstRB->
getID() != AMDGPU::VCCRegBankID)
293 bool Is64 =
Size > 32 || (DstRB->
getID() == AMDGPU::VCCRegBankID &&
305bool AMDGPUInstructionSelector::selectG_ADD_SUB(
MachineInstr &
I)
const {
308 Register DstReg =
I.getOperand(0).getReg();
310 LLT Ty =
MRI->getType(DstReg);
316 const bool IsSALU = DstRB->
getID() == AMDGPU::SGPRRegBankID;
317 const bool Sub =
I.getOpcode() == TargetOpcode::G_SUB;
321 const unsigned Opc = Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
324 .
add(
I.getOperand(1))
325 .
add(
I.getOperand(2));
331 const unsigned Opc = Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64;
332 I.setDesc(TII.get(Opc));
338 const unsigned Opc = Sub ? AMDGPU::V_SUB_CO_U32_e64 : AMDGPU::V_ADD_CO_U32_e64;
344 .
add(
I.getOperand(1))
345 .
add(
I.getOperand(2))
351 assert(!Sub &&
"illegal sub should not reach here");
354 = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass;
356 = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass;
358 MachineOperand Lo1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub0));
359 MachineOperand Lo2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub0));
360 MachineOperand Hi1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub1));
361 MachineOperand Hi2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub1));
363 Register DstLo =
MRI->createVirtualRegister(&HalfRC);
364 Register DstHi =
MRI->createVirtualRegister(&HalfRC);
367 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
370 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
375 Register CarryReg =
MRI->createVirtualRegister(CarryRC);
376 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_ADD_CO_U32_e64), DstLo)
392 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
406bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE(
411 Register Dst0Reg =
I.getOperand(0).getReg();
412 Register Dst1Reg =
I.getOperand(1).getReg();
413 const bool IsAdd =
I.getOpcode() == AMDGPU::G_UADDO ||
414 I.getOpcode() == AMDGPU::G_UADDE;
415 const bool HasCarryIn =
I.getOpcode() == AMDGPU::G_UADDE ||
416 I.getOpcode() == AMDGPU::G_USUBE;
418 if (isVCC(Dst1Reg, *MRI)) {
419 unsigned NoCarryOpc =
420 IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
421 unsigned CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
422 I.setDesc(TII.get(HasCarryIn ? CarryOpc : NoCarryOpc));
428 Register Src0Reg =
I.getOperand(2).getReg();
429 Register Src1Reg =
I.getOperand(3).getReg();
432 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
433 .
addReg(
I.getOperand(4).getReg());
436 unsigned NoCarryOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
437 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
439 BuildMI(*BB, &
I,
DL, TII.get(HasCarryIn ? CarryOpc : NoCarryOpc), Dst0Reg)
440 .
add(
I.getOperand(2))
441 .
add(
I.getOperand(3));
442 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), Dst1Reg)
445 if (!
MRI->getRegClassOrNull(Dst1Reg))
446 MRI->setRegClass(Dst1Reg, &AMDGPU::SReg_32RegClass);
455 AMDGPU::SReg_32RegClass, *MRI))
462bool AMDGPUInstructionSelector::selectG_AMDGPU_MAD_64_32(
466 const bool IsUnsigned =
I.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;
470 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_gfx11_e64
471 : AMDGPU::V_MAD_I64_I32_gfx11_e64;
473 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_e64 : AMDGPU::V_MAD_I64_I32_e64;
474 I.setDesc(TII.get(Opc));
476 I.addImplicitDefUseOperands(*
MF);
481bool AMDGPUInstructionSelector::selectG_EXTRACT(
MachineInstr &
I)
const {
483 Register DstReg =
I.getOperand(0).getReg();
484 Register SrcReg =
I.getOperand(1).getReg();
485 LLT DstTy =
MRI->getType(DstReg);
486 LLT SrcTy =
MRI->getType(SrcReg);
491 unsigned Offset =
I.getOperand(2).getImm();
492 if (
Offset % 32 != 0 || DstSize > 128)
512 SrcRC = TRI.getSubClassWithSubReg(SrcRC,
SubReg);
517 *SrcRC,
I.getOperand(1));
519 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::COPY), DstReg)
526bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(
MachineInstr &
MI)
const {
529 LLT DstTy =
MRI->getType(DstReg);
530 LLT SrcTy =
MRI->getType(
MI.getOperand(1).getReg());
546 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg);
547 for (
int I = 0,
E =
MI.getNumOperands() - 1;
I !=
E; ++
I) {
561 MI.eraseFromParent();
565bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(
MachineInstr &
MI)
const {
567 const int NumDst =
MI.getNumOperands() - 1;
573 LLT DstTy =
MRI->getType(DstReg0);
574 LLT SrcTy =
MRI->getType(SrcReg);
590 for (
int I = 0,
E = NumDst;
I !=
E; ++
I) {
592 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::COPY), Dst.getReg())
593 .
addReg(SrcReg, 0, SubRegs[
I]);
596 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegs[
I]);
606 MI.eraseFromParent();
610bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR(
MachineInstr &
MI)
const {
611 assert(
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC ||
612 MI.getOpcode() == AMDGPU::G_BUILD_VECTOR);
616 LLT SrcTy =
MRI->getType(Src0);
620 if (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR && SrcSize >= 32) {
621 return selectG_MERGE_VALUES(
MI);
628 (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC &&
633 if (DstBank->
getID() == AMDGPU::AGPRRegBankID)
636 assert(DstBank->
getID() == AMDGPU::SGPRRegBankID ||
637 DstBank->
getID() == AMDGPU::VGPRRegBankID);
638 const bool IsVector = DstBank->
getID() == AMDGPU::VGPRRegBankID;
651 const int64_t K0 = ConstSrc0->Value.getSExtValue();
652 const int64_t K1 = ConstSrc1->Value.getSExtValue();
660 MI.eraseFromParent();
666 MI.eraseFromParent();
678 if (Src1Def->
getOpcode() == AMDGPU::G_IMPLICIT_DEF) {
679 MI.setDesc(TII.get(AMDGPU::COPY));
682 IsVector ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
689 Register TmpReg =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
690 auto MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_AND_B32_e32), TmpReg)
696 MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_LSHL_OR_B32_e64), Dst)
703 MI.eraseFromParent();
728 unsigned Opc = AMDGPU::S_PACK_LL_B32_B16;
729 if (Shift0 && Shift1) {
730 Opc = AMDGPU::S_PACK_HH_B32_B16;
731 MI.getOperand(1).setReg(ShiftSrc0);
732 MI.getOperand(2).setReg(ShiftSrc1);
734 Opc = AMDGPU::S_PACK_LH_B32_B16;
735 MI.getOperand(2).setReg(ShiftSrc1);
739 if (ConstSrc1 && ConstSrc1->Value == 0) {
741 auto MIB =
BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_LSHR_B32), Dst)
745 MI.eraseFromParent();
749 Opc = AMDGPU::S_PACK_HL_B32_B16;
750 MI.getOperand(1).setReg(ShiftSrc0);
754 MI.setDesc(TII.get(Opc));
758bool AMDGPUInstructionSelector::selectG_PTR_ADD(
MachineInstr &
I)
const {
759 return selectG_ADD_SUB(
I);
762bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(
MachineInstr &
I)
const {
768 if ((!RC && !
MRI->getRegBankOrNull(MO.
getReg())) ||
770 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
777bool AMDGPUInstructionSelector::selectG_INSERT(
MachineInstr &
I)
const {
780 Register DstReg =
I.getOperand(0).getReg();
781 Register Src0Reg =
I.getOperand(1).getReg();
782 Register Src1Reg =
I.getOperand(2).getReg();
783 LLT Src1Ty =
MRI->getType(Src1Reg);
785 unsigned DstSize =
MRI->getType(DstReg).getSizeInBits();
788 int64_t
Offset =
I.getOperand(3).getImm();
791 if (
Offset % 32 != 0 || InsSize % 32 != 0)
799 if (
SubReg == AMDGPU::NoSubRegister)
817 Src0RC = TRI.getSubClassWithSubReg(Src0RC,
SubReg);
818 if (!Src0RC || !Src1RC)
827 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::INSERT_SUBREG), DstReg)
836bool AMDGPUInstructionSelector::selectG_SBFX_UBFX(
MachineInstr &
MI)
const {
839 Register OffsetReg =
MI.getOperand(2).getReg();
840 Register WidthReg =
MI.getOperand(3).getReg();
843 "scalar BFX instructions are expanded in regbankselect");
844 assert(
MRI->getType(
MI.getOperand(0).getReg()).getSizeInBits() == 32 &&
845 "64-bit vector BFX instructions are expanded in regbankselect");
850 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SBFX;
851 unsigned Opc = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
856 MI.eraseFromParent();
860bool AMDGPUInstructionSelector::selectInterpP1F16(
MachineInstr &
MI)
const {
879 Register InterpMov =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
885 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_INTERP_MOV_F32), InterpMov)
901 MI.eraseFromParent();
910bool AMDGPUInstructionSelector::selectWritelane(
MachineInstr &
MI)
const {
919 Register LaneSelect =
MI.getOperand(3).getReg();
922 auto MIB =
BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_WRITELANE_B32), VDst);
924 std::optional<ValueAndVReg> ConstSelect =
930 MIB.
addImm(ConstSelect->Value.getSExtValue() &
933 std::optional<ValueAndVReg> ConstVal =
940 MIB.
addImm(ConstVal->Value.getSExtValue());
950 BuildMI(*
MBB, *MIB,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
958 MI.eraseFromParent();
964bool AMDGPUInstructionSelector::selectDivScale(
MachineInstr &
MI)
const {
968 LLT Ty =
MRI->getType(Dst0);
971 Opc = AMDGPU::V_DIV_SCALE_F32_e64;
973 Opc = AMDGPU::V_DIV_SCALE_F64_e64;
984 unsigned ChooseDenom =
MI.getOperand(5).getImm();
986 Register Src0 = ChooseDenom != 0 ? Numer : Denom;
999 MI.eraseFromParent();
1003bool AMDGPUInstructionSelector::selectG_INTRINSIC(
MachineInstr &
I)
const {
1004 unsigned IntrinsicID =
I.getIntrinsicID();
1005 switch (IntrinsicID) {
1006 case Intrinsic::amdgcn_if_break: {
1011 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))
1012 .
add(
I.getOperand(0))
1013 .
add(
I.getOperand(2))
1014 .
add(
I.getOperand(3));
1016 Register DstReg =
I.getOperand(0).getReg();
1017 Register Src0Reg =
I.getOperand(2).getReg();
1018 Register Src1Reg =
I.getOperand(3).getReg();
1020 I.eraseFromParent();
1022 for (
Register Reg : { DstReg, Src0Reg, Src1Reg })
1027 case Intrinsic::amdgcn_interp_p1_f16:
1028 return selectInterpP1F16(
I);
1029 case Intrinsic::amdgcn_wqm:
1030 return constrainCopyLikeIntrin(
I, AMDGPU::WQM);
1031 case Intrinsic::amdgcn_softwqm:
1032 return constrainCopyLikeIntrin(
I, AMDGPU::SOFT_WQM);
1033 case Intrinsic::amdgcn_strict_wwm:
1034 case Intrinsic::amdgcn_wwm:
1035 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WWM);
1036 case Intrinsic::amdgcn_strict_wqm:
1037 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WQM);
1038 case Intrinsic::amdgcn_writelane:
1039 return selectWritelane(
I);
1040 case Intrinsic::amdgcn_div_scale:
1041 return selectDivScale(
I);
1042 case Intrinsic::amdgcn_icmp:
1043 case Intrinsic::amdgcn_fcmp:
1046 return selectIntrinsicCmp(
I);
1047 case Intrinsic::amdgcn_ballot:
1048 return selectBallot(
I);
1049 case Intrinsic::amdgcn_reloc_constant:
1050 return selectRelocConstant(
I);
1051 case Intrinsic::amdgcn_groupstaticsize:
1052 return selectGroupStaticSize(
I);
1053 case Intrinsic::returnaddress:
1054 return selectReturnAddress(
I);
1055 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
1056 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
1057 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
1058 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
1059 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
1060 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
1061 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
1062 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
1063 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
1064 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
1065 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
1066 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
1067 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
1068 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
1069 return selectSMFMACIntrin(
I);
1080 if (
Size == 16 && !ST.has16BitInsts())
1083 const auto Select = [&](
unsigned S16Opc,
unsigned TrueS16Opc,
unsigned S32Opc,
1086 return ST.hasTrue16BitInsts() ? TrueS16Opc : S16Opc;
1096 return Select(AMDGPU::V_CMP_NE_U16_e64, AMDGPU::V_CMP_NE_U16_t16_e64,
1097 AMDGPU::V_CMP_NE_U32_e64, AMDGPU::V_CMP_NE_U64_e64);
1099 return Select(AMDGPU::V_CMP_EQ_U16_e64, AMDGPU::V_CMP_EQ_U16_t16_e64,
1100 AMDGPU::V_CMP_EQ_U32_e64, AMDGPU::V_CMP_EQ_U64_e64);
1102 return Select(AMDGPU::V_CMP_GT_I16_e64, AMDGPU::V_CMP_GT_I16_t16_e64,
1103 AMDGPU::V_CMP_GT_I32_e64, AMDGPU::V_CMP_GT_I64_e64);
1105 return Select(AMDGPU::V_CMP_GE_I16_e64, AMDGPU::V_CMP_GE_I16_t16_e64,
1106 AMDGPU::V_CMP_GE_I32_e64, AMDGPU::V_CMP_GE_I64_e64);
1108 return Select(AMDGPU::V_CMP_LT_I16_e64, AMDGPU::V_CMP_LT_I16_t16_e64,
1109 AMDGPU::V_CMP_LT_I32_e64, AMDGPU::V_CMP_LT_I64_e64);
1111 return Select(AMDGPU::V_CMP_LE_I16_e64, AMDGPU::V_CMP_LE_I16_t16_e64,
1112 AMDGPU::V_CMP_LE_I32_e64, AMDGPU::V_CMP_LE_I64_e64);
1114 return Select(AMDGPU::V_CMP_GT_U16_e64, AMDGPU::V_CMP_GT_U16_t16_e64,
1115 AMDGPU::V_CMP_GT_U32_e64, AMDGPU::V_CMP_GT_U64_e64);
1117 return Select(AMDGPU::V_CMP_GE_U16_e64, AMDGPU::V_CMP_GE_U16_t16_e64,
1118 AMDGPU::V_CMP_GE_U32_e64, AMDGPU::V_CMP_GE_U64_e64);
1120 return Select(AMDGPU::V_CMP_LT_U16_e64, AMDGPU::V_CMP_LT_U16_t16_e64,
1121 AMDGPU::V_CMP_LT_U32_e64, AMDGPU::V_CMP_LT_U64_e64);
1123 return Select(AMDGPU::V_CMP_LE_U16_e64, AMDGPU::V_CMP_LE_U16_t16_e64,
1124 AMDGPU::V_CMP_LE_U32_e64, AMDGPU::V_CMP_LE_U64_e64);
1127 return Select(AMDGPU::V_CMP_EQ_F16_e64, AMDGPU::V_CMP_EQ_F16_t16_e64,
1128 AMDGPU::V_CMP_EQ_F32_e64, AMDGPU::V_CMP_EQ_F64_e64);
1130 return Select(AMDGPU::V_CMP_GT_F16_e64, AMDGPU::V_CMP_GT_F16_t16_e64,
1131 AMDGPU::V_CMP_GT_F32_e64, AMDGPU::V_CMP_GT_F64_e64);
1133 return Select(AMDGPU::V_CMP_GE_F16_e64, AMDGPU::V_CMP_GE_F16_t16_e64,
1134 AMDGPU::V_CMP_GE_F32_e64, AMDGPU::V_CMP_GE_F64_e64);
1136 return Select(AMDGPU::V_CMP_LT_F16_e64, AMDGPU::V_CMP_LT_F16_t16_e64,
1137 AMDGPU::V_CMP_LT_F32_e64, AMDGPU::V_CMP_LT_F64_e64);
1139 return Select(AMDGPU::V_CMP_LE_F16_e64, AMDGPU::V_CMP_LE_F16_t16_e64,
1140 AMDGPU::V_CMP_LE_F32_e64, AMDGPU::V_CMP_LE_F64_e64);
1142 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1143 AMDGPU::V_CMP_NEQ_F32_e64, AMDGPU::V_CMP_NEQ_F64_e64);
1145 return Select(AMDGPU::V_CMP_O_F16_e64, AMDGPU::V_CMP_O_F16_t16_e64,
1146 AMDGPU::V_CMP_O_F32_e64, AMDGPU::V_CMP_O_F64_e64);
1148 return Select(AMDGPU::V_CMP_U_F16_e64, AMDGPU::V_CMP_U_F16_t16_e64,
1149 AMDGPU::V_CMP_U_F32_e64, AMDGPU::V_CMP_U_F64_e64);
1151 return Select(AMDGPU::V_CMP_NLG_F16_e64, AMDGPU::V_CMP_NLG_F16_t16_e64,
1152 AMDGPU::V_CMP_NLG_F32_e64, AMDGPU::V_CMP_NLG_F64_e64);
1154 return Select(AMDGPU::V_CMP_NLE_F16_e64, AMDGPU::V_CMP_NLE_F16_t16_e64,
1155 AMDGPU::V_CMP_NLE_F32_e64, AMDGPU::V_CMP_NLE_F64_e64);
1157 return Select(AMDGPU::V_CMP_NLT_F16_e64, AMDGPU::V_CMP_NLT_F16_t16_e64,
1158 AMDGPU::V_CMP_NLT_F32_e64, AMDGPU::V_CMP_NLT_F64_e64);
1160 return Select(AMDGPU::V_CMP_NGE_F16_e64, AMDGPU::V_CMP_NGE_F16_t16_e64,
1161 AMDGPU::V_CMP_NGE_F32_e64, AMDGPU::V_CMP_NGE_F64_e64);
1163 return Select(AMDGPU::V_CMP_NGT_F16_e64, AMDGPU::V_CMP_NGT_F16_t16_e64,
1164 AMDGPU::V_CMP_NGT_F32_e64, AMDGPU::V_CMP_NGT_F64_e64);
1166 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1167 AMDGPU::V_CMP_NEQ_F32_e64, AMDGPU::V_CMP_NEQ_F64_e64);
1169 return Select(AMDGPU::V_CMP_TRU_F16_e64, AMDGPU::V_CMP_TRU_F16_t16_e64,
1170 AMDGPU::V_CMP_TRU_F32_e64, AMDGPU::V_CMP_TRU_F64_e64);
1172 return Select(AMDGPU::V_CMP_F_F16_e64, AMDGPU::V_CMP_F_F16_t16_e64,
1173 AMDGPU::V_CMP_F_F32_e64, AMDGPU::V_CMP_F_F64_e64);
1178 unsigned Size)
const {
1185 return AMDGPU::S_CMP_LG_U64;
1187 return AMDGPU::S_CMP_EQ_U64;
1198 return AMDGPU::S_CMP_LG_U32;
1200 return AMDGPU::S_CMP_EQ_U32;
1202 return AMDGPU::S_CMP_GT_I32;
1204 return AMDGPU::S_CMP_GE_I32;
1206 return AMDGPU::S_CMP_LT_I32;
1208 return AMDGPU::S_CMP_LE_I32;
1210 return AMDGPU::S_CMP_GT_U32;
1212 return AMDGPU::S_CMP_GE_U32;
1214 return AMDGPU::S_CMP_LT_U32;
1216 return AMDGPU::S_CMP_LE_U32;
1222bool AMDGPUInstructionSelector::selectG_ICMP(
MachineInstr &
I)
const {
1226 Register SrcReg =
I.getOperand(2).getReg();
1231 Register CCReg =
I.getOperand(0).getReg();
1232 if (!isVCC(CCReg, *MRI)) {
1233 int Opcode = getS_CMPOpcode(Pred,
Size);
1237 .
add(
I.getOperand(2))
1238 .
add(
I.getOperand(3));
1239 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CCReg)
1244 I.eraseFromParent();
1253 I.getOperand(0).getReg())
1254 .
add(
I.getOperand(2))
1255 .
add(
I.getOperand(3));
1259 I.eraseFromParent();
1263bool AMDGPUInstructionSelector::selectIntrinsicCmp(
MachineInstr &
I)
const {
1264 Register Dst =
I.getOperand(0).getReg();
1265 if (isVCC(Dst, *MRI))
1268 LLT DstTy =
MRI->getType(Dst);
1274 Register SrcReg =
I.getOperand(2).getReg();
1283 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), Dst);
1284 I.eraseFromParent();
1296 auto [Src0, Src0Mods] = selectVOP3ModsImpl(LHS);
1297 auto [Src1, Src1Mods] = selectVOP3ModsImpl(RHS);
1299 copyToVGPRIfSrcFolded(Src0, Src0Mods, LHS, &
I,
true);
1301 copyToVGPRIfSrcFolded(Src1, Src1Mods, RHS, &
I,
true);
1302 SelectedMI =
BuildMI(*BB, &
I,
DL, TII.get(Opcode), Dst)
1309 SelectedMI =
BuildMI(*BB, &
I,
DL, TII.get(Opcode), Dst)
1310 .
add(
I.getOperand(2))
1311 .
add(
I.getOperand(3));
1318 I.eraseFromParent();
1322bool AMDGPUInstructionSelector::selectBallot(
MachineInstr &
I)
const {
1325 Register DstReg =
I.getOperand(0).getReg();
1326 const unsigned Size =
MRI->getType(DstReg).getSizeInBits();
1327 const bool Is64 =
Size == 64;
1332 std::optional<ValueAndVReg>
Arg =
1336 const int64_t
Value =
Arg->Value.getSExtValue();
1338 unsigned Opcode = Is64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
1340 }
else if (
Value == -1) {
1341 Register SrcReg = Is64 ? AMDGPU::EXEC : AMDGPU::EXEC_LO;
1346 Register SrcReg =
I.getOperand(2).getReg();
1350 I.eraseFromParent();
1354bool AMDGPUInstructionSelector::selectRelocConstant(
MachineInstr &
I)
const {
1355 Register DstReg =
I.getOperand(0).getReg();
1361 const bool IsVALU = DstBank->
getID() == AMDGPU::VGPRRegBankID;
1366 auto RelocSymbol = cast<GlobalVariable>(
1371 TII.get(IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32), DstReg)
1374 I.eraseFromParent();
1378bool AMDGPUInstructionSelector::selectGroupStaticSize(
MachineInstr &
I)
const {
1381 Register DstReg =
I.getOperand(0).getReg();
1383 unsigned Mov = DstRB->
getID() == AMDGPU::SGPRRegBankID ?
1384 AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1401 I.eraseFromParent();
1405bool AMDGPUInstructionSelector::selectReturnAddress(
MachineInstr &
I)
const {
1412 unsigned Depth =
I.getOperand(2).getImm();
1425 I.eraseFromParent();
1436 AMDGPU::SReg_64RegClass,
DL);
1439 I.eraseFromParent();
1443bool AMDGPUInstructionSelector::selectEndCfIntrinsic(
MachineInstr &
MI)
const {
1447 BuildMI(*BB, &
MI,
MI.getDebugLoc(), TII.get(AMDGPU::SI_END_CF))
1448 .
add(
MI.getOperand(1));
1451 MI.eraseFromParent();
1453 if (!
MRI->getRegClassOrNull(Reg))
1458bool AMDGPUInstructionSelector::selectDSOrderedIntrinsic(
1464 unsigned IndexOperand =
MI.getOperand(7).getImm();
1465 bool WaveRelease =
MI.getOperand(8).getImm() != 0;
1466 bool WaveDone =
MI.getOperand(9).getImm() != 0;
1468 if (WaveDone && !WaveRelease)
1471 unsigned OrderedCountIndex = IndexOperand & 0x3f;
1472 IndexOperand &= ~0x3f;
1473 unsigned CountDw = 0;
1476 CountDw = (IndexOperand >> 24) & 0xf;
1477 IndexOperand &= ~(0xf << 24);
1479 if (CountDw < 1 || CountDw > 4) {
1481 "ds_ordered_count: dword count must be between 1 and 4");
1488 unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1;
1491 unsigned Offset0 = OrderedCountIndex << 2;
1492 unsigned Offset1 = WaveRelease | (WaveDone << 1) | (
Instruction << 4);
1495 Offset1 |= (CountDw - 1) << 6;
1498 Offset1 |= ShaderType << 2;
1500 unsigned Offset = Offset0 | (Offset1 << 8);
1509 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::DS_ORDERED_COUNT), DstReg)
1518 MI.eraseFromParent();
1524 case Intrinsic::amdgcn_ds_gws_init:
1525 return AMDGPU::DS_GWS_INIT;
1526 case Intrinsic::amdgcn_ds_gws_barrier:
1527 return AMDGPU::DS_GWS_BARRIER;
1528 case Intrinsic::amdgcn_ds_gws_sema_v:
1529 return AMDGPU::DS_GWS_SEMA_V;
1530 case Intrinsic::amdgcn_ds_gws_sema_br:
1531 return AMDGPU::DS_GWS_SEMA_BR;
1532 case Intrinsic::amdgcn_ds_gws_sema_p:
1533 return AMDGPU::DS_GWS_SEMA_P;
1534 case Intrinsic::amdgcn_ds_gws_sema_release_all:
1535 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
1541bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(
MachineInstr &
MI,
1543 if (IID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
1548 const bool HasVSrc =
MI.getNumOperands() == 3;
1549 assert(HasVSrc ||
MI.getNumOperands() == 2);
1551 Register BaseOffset =
MI.getOperand(HasVSrc ? 2 : 1).getReg();
1553 if (OffsetRB->
getID() != AMDGPU::SGPRRegBankID)
1567 if (OffsetDef->
getOpcode() == AMDGPU::V_READFIRSTLANE_B32) {
1568 Readfirstlane = OffsetDef;
1573 if (OffsetDef->
getOpcode() == AMDGPU::G_CONSTANT) {
1583 std::tie(BaseOffset, ImmOffset) =
1586 if (Readfirstlane) {
1596 AMDGPU::SReg_32RegClass, *MRI))
1600 Register M0Base =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1627 MI.eraseFromParent();
1631bool AMDGPUInstructionSelector::selectDSAppendConsume(
MachineInstr &
MI,
1632 bool IsAppend)
const {
1633 Register PtrBase =
MI.getOperand(2).getReg();
1634 LLT PtrTy =
MRI->getType(PtrBase);
1638 std::tie(PtrBase,
Offset) = selectDS1Addr1OffsetImpl(
MI.getOperand(2));
1641 if (!isDSOffsetLegal(PtrBase,
Offset)) {
1642 PtrBase =
MI.getOperand(2).getReg();
1648 const unsigned Opc = IsAppend ? AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
1659 MI.eraseFromParent();
1663bool AMDGPUInstructionSelector::selectSBarrier(
MachineInstr &
MI)
const {
1670 MI.eraseFromParent();
1682 TFE = (TexFailCtrl & 0x1) ?
true :
false;
1684 LWE = (TexFailCtrl & 0x2) ?
true :
false;
1687 return TexFailCtrl == 0;
1690bool AMDGPUInstructionSelector::selectImageIntrinsic(
1699 unsigned IntrOpcode =
Intr->BaseOpcode;
1703 const unsigned ArgOffset =
MI.getNumExplicitDefs() + 1;
1707 int NumVDataDwords = -1;
1708 bool IsD16 =
MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16 ||
1709 MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16;
1715 Unorm =
MI.getOperand(ArgOffset +
Intr->UnormIndex).getImm() != 0;
1719 bool IsTexFail =
false;
1721 TFE, LWE, IsTexFail))
1724 const int Flags =
MI.getOperand(ArgOffset +
Intr->NumArgs).getImm();
1725 const bool IsA16 = (
Flags & 1) != 0;
1726 const bool IsG16 = (
Flags & 2) != 0;
1729 if (IsA16 && !STI.
hasG16() && !IsG16)
1733 unsigned DMaskLanes = 0;
1735 if (BaseOpcode->
Atomic) {
1736 VDataOut =
MI.getOperand(0).getReg();
1737 VDataIn =
MI.getOperand(2).getReg();
1738 LLT Ty =
MRI->getType(VDataIn);
1741 const bool Is64Bit = BaseOpcode->
AtomicX2 ?
1746 assert(
MI.getOperand(3).getReg() == AMDGPU::NoRegister);
1748 DMask = Is64Bit ? 0xf : 0x3;
1749 NumVDataDwords = Is64Bit ? 4 : 2;
1751 DMask = Is64Bit ? 0x3 : 0x1;
1752 NumVDataDwords = Is64Bit ? 2 : 1;
1755 DMask =
MI.getOperand(ArgOffset +
Intr->DMaskIndex).getImm();
1758 if (BaseOpcode->
Store) {
1759 VDataIn =
MI.getOperand(1).getReg();
1760 VDataTy =
MRI->getType(VDataIn);
1763 VDataOut =
MI.getOperand(0).getReg();
1764 VDataTy =
MRI->getType(VDataOut);
1765 NumVDataDwords = DMaskLanes;
1768 NumVDataDwords = (DMaskLanes + 1) / 2;
1773 if (Subtarget->
hasG16() && IsG16) {
1777 IntrOpcode = G16MappingInfo->
G16;
1781 assert((!IsTexFail || DMaskLanes >= 1) &&
"should have legalized this");
1783 unsigned CPol =
MI.getOperand(ArgOffset +
Intr->CachePolicyIndex).getImm();
1789 int NumVAddrRegs = 0;
1790 int NumVAddrDwords = 0;
1791 for (
unsigned I =
Intr->VAddrStart; I < Intr->VAddrEnd;
I++) {
1794 if (!AddrOp.
isReg())
1802 NumVAddrDwords += (
MRI->getType(
Addr).getSizeInBits() + 31) / 32;
1809 NumVAddrRegs != 1 &&
1811 : NumVAddrDwords == NumVAddrRegs);
1812 if (UseNSA && !STI.hasFeature(AMDGPU::FeatureNSAEncoding)) {
1823 UseNSA ? AMDGPU::MIMGEncGfx11NSA
1824 : AMDGPU::MIMGEncGfx11Default,
1825 NumVDataDwords, NumVAddrDwords);
1826 }
else if (IsGFX10Plus) {
1828 UseNSA ? AMDGPU::MIMGEncGfx10NSA
1829 : AMDGPU::MIMGEncGfx10Default,
1830 NumVDataDwords, NumVAddrDwords);
1834 NumVDataDwords, NumVAddrDwords);
1838 <<
"requested image instruction is not supported on this GPU\n");
1845 NumVDataDwords, NumVAddrDwords);
1848 NumVDataDwords, NumVAddrDwords);
1857 const bool Is64 =
MRI->getType(VDataOut).getSizeInBits() == 64;
1860 Is64 ? &AMDGPU::VReg_128RegClass : &AMDGPU::VReg_64RegClass);
1861 unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
1864 if (!
MRI->use_empty(VDataOut)) {
1877 for (
int I = 0;
I != NumVAddrRegs; ++
I) {
1879 if (
SrcOp.isReg()) {
1885 MIB.
addReg(
MI.getOperand(ArgOffset +
Intr->RsrcIndex).getReg());
1887 MIB.
addReg(
MI.getOperand(ArgOffset +
Intr->SampIndex).getReg());
1897 STI.hasFeature(AMDGPU::FeatureR128A16) ? -1 : 0);
1899 MIB.
addImm(IsA16 ? -1 : 0);
1912 MIB.
addImm(IsD16 ? -1 : 0);
1918 assert(VDataOut && !VDataIn);
1919 Register Tied =
MRI->cloneVirtualRegister(VDataOut);
1921 BuildMI(*
MBB, *MIB,
DL, TII.get(AMDGPU::V_MOV_B32_e32), Zero)
1928 BuildMI(*
MBB, *MIB,
DL, TII.get(AMDGPU::REG_SEQUENCE), Tied);
1929 for (
auto Sub : Parts)
1930 RegSeq.addReg(Zero).addImm(Sub);
1935 BuildMI(*
MBB, *MIB,
DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef);
1937 BuildMI(*
MBB, *MIB,
DL, TII.get(AMDGPU::REG_SEQUENCE), Tied);
1938 for (
auto Sub : Parts.drop_back(1))
1939 RegSeq.addReg(Undef).addImm(Sub);
1940 RegSeq.addReg(Zero).addImm(Parts.back());
1946 MI.eraseFromParent();
1954bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic(
1965 unsigned Offset =
MI.getOperand(6).getImm();
1967 auto MIB =
BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::DS_BVH_STACK_RTN_B32), Dst0)
1975 MI.eraseFromParent();
1979bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
1981 unsigned IntrinsicID =
I.getIntrinsicID();
1982 switch (IntrinsicID) {
1983 case Intrinsic::amdgcn_end_cf:
1984 return selectEndCfIntrinsic(
I);
1985 case Intrinsic::amdgcn_ds_ordered_add:
1986 case Intrinsic::amdgcn_ds_ordered_swap:
1987 return selectDSOrderedIntrinsic(
I, IntrinsicID);
1988 case Intrinsic::amdgcn_ds_gws_init:
1989 case Intrinsic::amdgcn_ds_gws_barrier:
1990 case Intrinsic::amdgcn_ds_gws_sema_v:
1991 case Intrinsic::amdgcn_ds_gws_sema_br:
1992 case Intrinsic::amdgcn_ds_gws_sema_p:
1993 case Intrinsic::amdgcn_ds_gws_sema_release_all:
1994 return selectDSGWSIntrinsic(
I, IntrinsicID);
1995 case Intrinsic::amdgcn_ds_append:
1996 return selectDSAppendConsume(
I,
true);
1997 case Intrinsic::amdgcn_ds_consume:
1998 return selectDSAppendConsume(
I,
false);
1999 case Intrinsic::amdgcn_s_barrier:
2000 return selectSBarrier(
I);
2001 case Intrinsic::amdgcn_raw_buffer_load_lds:
2002 case Intrinsic::amdgcn_struct_buffer_load_lds:
2003 return selectBufferLoadLds(
I);
2004 case Intrinsic::amdgcn_global_load_lds:
2005 return selectGlobalLoadLds(
I);
2006 case Intrinsic::amdgcn_exp_compr:
2010 F,
"intrinsic not supported on subtarget",
I.getDebugLoc(),
DS_Error);
2011 F.getContext().diagnose(NoFpRet);
2015 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2016 return selectDSBvhStackIntrinsic(
I);
2021bool AMDGPUInstructionSelector::selectG_SELECT(
MachineInstr &
I)
const {
2028 Register DstReg =
I.getOperand(0).getReg();
2033 if (!isVCC(CCReg, *MRI)) {
2034 unsigned SelectOpcode =
Size == 64 ? AMDGPU::S_CSELECT_B64 :
2035 AMDGPU::S_CSELECT_B32;
2042 if (!
MRI->getRegClassOrNull(CCReg))
2045 .
add(
I.getOperand(2))
2046 .
add(
I.getOperand(3));
2051 I.eraseFromParent();
2060 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
2062 .
add(
I.getOperand(3))
2064 .
add(
I.getOperand(2))
2065 .
add(
I.getOperand(1));
2068 I.eraseFromParent();
2075 return AMDGPU::sub0;
2077 return AMDGPU::sub0_sub1;
2079 return AMDGPU::sub0_sub1_sub2;
2081 return AMDGPU::sub0_sub1_sub2_sub3;
2083 return AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7;
2086 return AMDGPU::sub0;
2093bool AMDGPUInstructionSelector::selectG_TRUNC(
MachineInstr &
I)
const {
2094 Register DstReg =
I.getOperand(0).getReg();
2095 Register SrcReg =
I.getOperand(1).getReg();
2096 const LLT DstTy =
MRI->getType(DstReg);
2097 const LLT SrcTy =
MRI->getType(SrcReg);
2112 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2121 if (!SrcRC || !DstRC)
2134 Register LoReg =
MRI->createVirtualRegister(DstRC);
2135 Register HiReg =
MRI->createVirtualRegister(DstRC);
2137 .
addReg(SrcReg, 0, AMDGPU::sub0);
2139 .
addReg(SrcReg, 0, AMDGPU::sub1);
2141 if (IsVALU && STI.
hasSDWA()) {
2145 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg)
2155 Register TmpReg0 =
MRI->createVirtualRegister(DstRC);
2156 Register TmpReg1 =
MRI->createVirtualRegister(DstRC);
2157 Register ImmReg =
MRI->createVirtualRegister(DstRC);
2159 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), TmpReg0)
2168 unsigned MovOpc = IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
2169 unsigned AndOpc = IsVALU ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
2170 unsigned OrOpc = IsVALU ? AMDGPU::V_OR_B32_e64 : AMDGPU::S_OR_B32;
2182 I.eraseFromParent();
2191 if (SubRegIdx == -1)
2197 = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
2201 if (SrcWithSubRC != SrcRC) {
2206 I.getOperand(1).setSubReg(SubRegIdx);
2209 I.setDesc(TII.get(TargetOpcode::COPY));
2215 Mask = maskTrailingOnes<unsigned>(
Size);
2216 int SignedMask =
static_cast<int>(Mask);
2217 return SignedMask >= -16 && SignedMask <= 64;
2221const RegisterBank *AMDGPUInstructionSelector::getArtifactRegBank(
2234bool AMDGPUInstructionSelector::selectG_SZA_EXT(
MachineInstr &
I)
const {
2235 bool InReg =
I.getOpcode() == AMDGPU::G_SEXT_INREG;
2236 bool Signed =
I.getOpcode() == AMDGPU::G_SEXT || InReg;
2239 const Register DstReg =
I.getOperand(0).getReg();
2240 const Register SrcReg =
I.getOperand(1).getReg();
2242 const LLT DstTy =
MRI->getType(DstReg);
2243 const LLT SrcTy =
MRI->getType(SrcReg);
2244 const unsigned SrcSize =
I.getOpcode() == AMDGPU::G_SEXT_INREG ?
2251 const RegisterBank *SrcBank = getArtifactRegBank(SrcReg, *MRI, TRI);
2254 if (
I.getOpcode() == AMDGPU::G_ANYEXT) {
2256 return selectCOPY(
I);
2259 TRI.getRegClassForTypeOnBank(SrcTy, *SrcBank);
2262 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
2264 Register UndefReg =
MRI->createVirtualRegister(SrcRC);
2265 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2271 I.eraseFromParent();
2277 if (SrcBank->
getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
2287 I.eraseFromParent();
2291 const unsigned BFE =
Signed ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2297 I.eraseFromParent();
2301 if (SrcBank->
getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
2303 AMDGPU::SReg_64RegClass : AMDGPU::SReg_32RegClass;
2307 if (
Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
2308 const unsigned SextOpc = SrcSize == 8 ?
2309 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
2312 I.eraseFromParent();
2318 if (DstSize > 32 && SrcSize == 32) {
2319 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2320 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2334 I.eraseFromParent();
2339 const unsigned BFE64 =
Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
2340 const unsigned BFE32 =
Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2343 if (DstSize > 32 && (SrcSize <= 32 || InReg)) {
2345 Register ExtReg =
MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
2346 Register UndefReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2347 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2349 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2360 I.eraseFromParent();
2375 I.eraseFromParent();
2382bool AMDGPUInstructionSelector::selectG_CONSTANT(
MachineInstr &
I)
const {
2385 Register DstReg =
I.getOperand(0).getReg();
2386 unsigned Size =
MRI->getType(DstReg).getSizeInBits();
2392 }
else if (ImmOp.
isCImm()) {
2399 const bool IsSgpr = DstRB->
getID() == AMDGPU::SGPRRegBankID;
2402 if (DstRB->
getID() == AMDGPU::VCCRegBankID) {
2403 Opcode = STI.
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
2405 Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
2415 I.setDesc(TII.get(Opcode));
2416 I.addImplicitDefUseOperands(*
MF);
2426 ResInst =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_MOV_B64), DstReg)
2427 .
addImm(
I.getOperand(1).getImm());
2430 &AMDGPU::SReg_32RegClass : &AMDGPU::VGPR_32RegClass;
2440 ResInst =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
2449 I.eraseFromParent();
2451 TRI.getConstrainedRegClassForOperand(ResInst->
getOperand(0), *MRI);
2457bool AMDGPUInstructionSelector::selectG_FNEG(
MachineInstr &
MI)
const {
2471 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2486 Register LoReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2487 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2488 Register ConstReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2489 Register OpReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2491 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2492 .
addReg(Src, 0, AMDGPU::sub0);
2493 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2494 .
addReg(Src, 0, AMDGPU::sub1);
2495 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2499 unsigned Opc = Fabs ? AMDGPU::S_OR_B32 : AMDGPU::S_XOR_B32;
2503 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2508 MI.eraseFromParent();
2513bool AMDGPUInstructionSelector::selectG_FABS(
MachineInstr &
MI)
const {
2516 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2523 Register LoReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2524 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2525 Register ConstReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2526 Register OpReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2532 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2533 .
addReg(Src, 0, AMDGPU::sub0);
2534 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2535 .
addReg(Src, 0, AMDGPU::sub1);
2536 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2541 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_AND_B32), OpReg)
2544 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2550 MI.eraseFromParent();
2555 return MI.getOpcode() == TargetOpcode::G_CONSTANT;
2558void AMDGPUInstructionSelector::getAddrModeInfo(
const MachineInstr &Load,
2565 if (PtrMI->
getOpcode() != TargetOpcode::G_PTR_ADD)
2570 for (
unsigned i = 1; i != 3; ++i) {
2577 assert(GEPInfo.Imm == 0);
2582 if (OpBank->
getID() == AMDGPU::SGPRRegBankID)
2583 GEPInfo.SgprParts.push_back(GEPOp.
getReg());
2585 GEPInfo.VgprParts.push_back(GEPOp.
getReg());
2589 getAddrModeInfo(*PtrMI, MRI, AddrInfo);
2592bool AMDGPUInstructionSelector::isSGPR(
Register Reg)
const {
2593 return RBI.
getRegBank(Reg, *MRI, TRI)->
getID() == AMDGPU::SGPRRegBankID;
2596bool AMDGPUInstructionSelector::isInstrUniform(
const MachineInstr &
MI)
const {
2597 if (!
MI.hasOneMemOperand())
2607 if (!
Ptr || isa<UndefValue>(
Ptr) || isa<Argument>(
Ptr) ||
2608 isa<Constant>(
Ptr) || isa<GlobalValue>(
Ptr))
2615 return I &&
I->getMetadata(
"amdgpu.uniform");
2619 for (
const GEPInfo &GEPInfo : AddrInfo) {
2620 if (!GEPInfo.VgprParts.empty())
2626void AMDGPUInstructionSelector::initM0(
MachineInstr &
I)
const {
2627 const LLT PtrTy =
MRI->getType(
I.getOperand(1).getReg());
2634 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2639bool AMDGPUInstructionSelector::selectG_LOAD_STORE_ATOMICRMW(
2646 if (Reg.isPhysical())
2650 const unsigned Opcode =
MI.getOpcode();
2652 if (Opcode == AMDGPU::COPY)
2655 if (Opcode == AMDGPU::G_AND || Opcode == AMDGPU::G_OR ||
2656 Opcode == AMDGPU::G_XOR)
2660 if (Opcode == TargetOpcode::G_INTRINSIC)
2661 return MI.getIntrinsicID() == Intrinsic::amdgcn_class;
2663 return Opcode == AMDGPU::G_ICMP || Opcode == AMDGPU::G_FCMP;
2666bool AMDGPUInstructionSelector::selectG_BRCOND(
MachineInstr &
I)
const {
2681 if (!isVCC(CondReg, *MRI)) {
2685 CondPhysReg = AMDGPU::SCC;
2686 BrOpcode = AMDGPU::S_CBRANCH_SCC1;
2687 ConstrainRC = &AMDGPU::SReg_32RegClass;
2695 const unsigned Opcode = Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
2696 const Register Exec = Is64 ? AMDGPU::EXEC : AMDGPU::EXEC_LO;
2699 BuildMI(*BB, &
I,
DL, TII.get(Opcode), TmpReg)
2705 CondPhysReg =
TRI.getVCC();
2706 BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
2707 ConstrainRC =
TRI.getBoolRC();
2710 if (!
MRI->getRegClassOrNull(CondReg))
2711 MRI->setRegClass(CondReg, ConstrainRC);
2713 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CondPhysReg)
2716 .
addMBB(
I.getOperand(1).getMBB());
2718 I.eraseFromParent();
2722bool AMDGPUInstructionSelector::selectG_GLOBAL_VALUE(
2724 Register DstReg =
I.getOperand(0).getReg();
2726 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2727 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32));
2732 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, *MRI);
2735bool AMDGPUInstructionSelector::selectG_PTRMASK(
MachineInstr &
I)
const {
2736 Register DstReg =
I.getOperand(0).getReg();
2737 Register SrcReg =
I.getOperand(1).getReg();
2738 Register MaskReg =
I.getOperand(2).getReg();
2739 LLT Ty =
MRI->getType(DstReg);
2740 LLT MaskTy =
MRI->getType(MaskReg);
2747 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2757 const bool CanCopyLow32 = (MaskOnes & MaskLo32) == MaskLo32;
2758 const bool CanCopyHi32 = (MaskOnes & MaskHi32) == MaskHi32;
2761 !CanCopyLow32 && !CanCopyHi32) {
2762 auto MIB =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_AND_B64), DstReg)
2765 I.eraseFromParent();
2769 unsigned NewOpc = IsVGPR ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
2771 = IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
2776 TRI.getRegClassForTypeOnBank(MaskTy, *MaskRB);
2785 "ptrmask should have been narrowed during legalize");
2787 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), DstReg)
2790 I.eraseFromParent();
2794 Register HiReg =
MRI->createVirtualRegister(&RegRC);
2795 Register LoReg =
MRI->createVirtualRegister(&RegRC);
2798 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), LoReg)
2799 .
addReg(SrcReg, 0, AMDGPU::sub0);
2800 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), HiReg)
2801 .
addReg(SrcReg, 0, AMDGPU::sub1);
2810 Register MaskLo =
MRI->createVirtualRegister(&RegRC);
2811 MaskedLo =
MRI->createVirtualRegister(&RegRC);
2813 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskLo)
2814 .
addReg(MaskReg, 0, AMDGPU::sub0);
2815 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedLo)
2824 Register MaskHi =
MRI->createVirtualRegister(&RegRC);
2825 MaskedHi =
MRI->createVirtualRegister(&RegRC);
2827 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskHi)
2828 .
addReg(MaskReg, 0, AMDGPU::sub1);
2829 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedHi)
2834 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
2839 I.eraseFromParent();
2845static std::pair<Register, unsigned>
2852 std::tie(IdxBaseReg,
Offset) =
2854 if (IdxBaseReg == AMDGPU::NoRegister) {
2858 IdxBaseReg = IdxReg;
2865 if (
static_cast<unsigned>(
Offset) >= SubRegs.
size())
2866 return std::pair(IdxReg, SubRegs[0]);
2867 return std::pair(IdxBaseReg, SubRegs[
Offset]);
2870bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT(
2876 LLT DstTy =
MRI->getType(DstReg);
2877 LLT SrcTy =
MRI->getType(SrcReg);
2885 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
2889 TRI.getRegClassForTypeOnBank(SrcTy, *SrcRB);
2891 TRI.getRegClassForTypeOnBank(DstTy, *DstRB);
2892 if (!SrcRC || !DstRC)
2907 if (SrcRB->
getID() == AMDGPU::SGPRRegBankID) {
2911 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
2914 unsigned Opc = Is64 ? AMDGPU::S_MOVRELS_B64 : AMDGPU::S_MOVRELS_B32;
2918 MI.eraseFromParent();
2926 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
2928 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::V_MOVRELS_B32_e32), DstReg)
2931 MI.eraseFromParent();
2942 MI.eraseFromParent();
2947bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT(
2954 LLT VecTy =
MRI->getType(DstReg);
2955 LLT ValTy =
MRI->getType(ValReg);
2967 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
2971 TRI.getRegClassForTypeOnBank(VecTy, *VecRB);
2973 TRI.getRegClassForTypeOnBank(ValTy, *ValRB);
2981 if (VecRB->
getID() == AMDGPU::VGPRRegBankID && ValSize != 32)
2988 const bool IndexMode = VecRB->
getID() == AMDGPU::VGPRRegBankID &&
2995 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
2999 VecSize, ValSize, VecRB->
getID() == AMDGPU::SGPRRegBankID);
3004 MI.eraseFromParent();
3016 MI.eraseFromParent();
3020bool AMDGPUInstructionSelector::selectBufferLoadLds(
MachineInstr &
MI)
const {
3022 unsigned Size =
MI.getOperand(3).getImm();
3025 const bool HasVIndex =
MI.getNumOperands() == 9;
3029 VIndex =
MI.getOperand(4).getReg();
3033 Register VOffset =
MI.getOperand(4 + OpOffset).getReg();
3034 std::optional<ValueAndVReg> MaybeVOffset =
3036 const bool HasVOffset = !MaybeVOffset || MaybeVOffset->Value.getZExtValue();
3042 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_BOTHEN
3043 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_IDXEN
3044 : HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFEN
3045 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFSET;
3048 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_BOTHEN
3049 : AMDGPU::BUFFER_LOAD_USHORT_LDS_IDXEN
3050 : HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFEN
3051 : AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFSET;
3054 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_BOTHEN
3055 : AMDGPU::BUFFER_LOAD_DWORD_LDS_IDXEN
3056 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN
3057 : AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET;
3064 .
add(
MI.getOperand(2));
3068 if (HasVIndex && HasVOffset) {
3069 Register IdxReg =
MRI->createVirtualRegister(
TRI.getVGPR64Class());
3070 BuildMI(*
MBB, &*MIB,
DL, TII.get(AMDGPU::REG_SEQUENCE), IdxReg)
3077 }
else if (HasVIndex) {
3079 }
else if (HasVOffset) {
3083 MIB.
add(
MI.getOperand(1));
3084 MIB.
add(
MI.getOperand(5 + OpOffset));
3085 MIB.
add(
MI.getOperand(6 + OpOffset));
3086 unsigned Aux =
MI.getOperand(7 + OpOffset).getImm();
3088 MIB.
addImm((Aux >> 3) & 1);
3092 LoadPtrI.
Offset =
MI.getOperand(6 + OpOffset).getImm();
3094 StorePtrI.
V =
nullptr;
3108 MI.eraseFromParent();
3120 if (Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3123 assert(Def->getNumOperands() == 3 &&
3126 return Def->getOperand(1).getReg();
3132bool AMDGPUInstructionSelector::selectGlobalLoadLds(
MachineInstr &
MI)
const{
3134 unsigned Size =
MI.getOperand(3).getImm();
3140 Opc = AMDGPU::GLOBAL_LOAD_LDS_UBYTE;
3143 Opc = AMDGPU::GLOBAL_LOAD_LDS_USHORT;
3146 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORD;
3153 .
add(
MI.getOperand(2));
3159 if (!isSGPR(
Addr)) {
3161 if (isSGPR(AddrDef->Reg)) {
3162 Addr = AddrDef->Reg;
3163 }
else if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
3166 if (isSGPR(SAddr)) {
3167 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
3179 VOffset =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3191 MIB.
add(
MI.getOperand(4))
3192 .
add(
MI.getOperand(5));
3196 LoadPtrI.
Offset =
MI.getOperand(4).getImm();
3206 sizeof(int32_t),
Align(4));
3210 MI.eraseFromParent();
3214bool AMDGPUInstructionSelector::selectBVHIntrinsic(
MachineInstr &
MI)
const{
3215 MI.setDesc(TII.get(
MI.getOperand(1).getImm()));
3216 MI.removeOperand(1);
3217 MI.addImplicitDefUseOperands(*
MI.getParent()->getParent());
3221bool AMDGPUInstructionSelector::selectSMFMACIntrin(
MachineInstr &
MI)
const {
3223 switch (
MI.getIntrinsicID()) {
3224 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
3225 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_F16_e64;
3227 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
3228 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_F16_e64;
3230 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
3231 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_BF16_e64;
3233 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
3234 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_BF16_e64;
3236 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
3237 Opc = AMDGPU::V_SMFMAC_I32_16X16X64_I8_e64;
3239 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
3240 Opc = AMDGPU::V_SMFMAC_I32_32X32X32_I8_e64;
3242 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
3243 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_BF8_e64;
3245 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
3246 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_FP8_e64;
3248 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
3249 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_BF8_e64;
3251 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
3252 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_FP8_e64;
3254 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
3255 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_BF8_e64;
3257 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
3258 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_FP8_e64;
3260 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
3261 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_BF8_e64;
3263 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
3264 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_FP8_e64;
3270 auto VDst_In =
MI.getOperand(4);
3272 MI.setDesc(TII.get(Opc));
3273 MI.removeOperand(4);
3274 MI.removeOperand(1);
3275 MI.addOperand(VDst_In);
3276 MI.addImplicitDefUseOperands(*
MI.getParent()->getParent());
3280bool AMDGPUInstructionSelector::selectWaveAddress(
MachineInstr &
MI)
const {
3284 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3289 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_LSHRREV_B32_e64), DstReg)
3299 IsVALU ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
3303 MI.eraseFromParent();
3309 return selectPHI(
I);
3311 if (!
I.isPreISelOpcode()) {
3313 return selectCOPY(
I);
3317 switch (
I.getOpcode()) {
3318 case TargetOpcode::G_AND:
3319 case TargetOpcode::G_OR:
3320 case TargetOpcode::G_XOR:
3323 return selectG_AND_OR_XOR(
I);
3324 case TargetOpcode::G_ADD:
3325 case TargetOpcode::G_SUB:
3328 return selectG_ADD_SUB(
I);
3329 case TargetOpcode::G_UADDO:
3330 case TargetOpcode::G_USUBO:
3331 case TargetOpcode::G_UADDE:
3332 case TargetOpcode::G_USUBE:
3333 return selectG_UADDO_USUBO_UADDE_USUBE(
I);
3334 case AMDGPU::G_AMDGPU_MAD_U64_U32:
3335 case AMDGPU::G_AMDGPU_MAD_I64_I32:
3336 return selectG_AMDGPU_MAD_64_32(
I);
3337 case TargetOpcode::G_INTTOPTR:
3338 case TargetOpcode::G_BITCAST:
3339 case TargetOpcode::G_PTRTOINT:
3340 return selectCOPY(
I);
3341 case TargetOpcode::G_CONSTANT:
3342 case TargetOpcode::G_FCONSTANT:
3343 return selectG_CONSTANT(
I);
3344 case TargetOpcode::G_FNEG:
3347 return selectG_FNEG(
I);
3348 case TargetOpcode::G_FABS:
3351 return selectG_FABS(
I);
3352 case TargetOpcode::G_EXTRACT:
3353 return selectG_EXTRACT(
I);
3354 case TargetOpcode::G_MERGE_VALUES:
3355 case TargetOpcode::G_CONCAT_VECTORS:
3356 return selectG_MERGE_VALUES(
I);
3357 case TargetOpcode::G_UNMERGE_VALUES:
3358 return selectG_UNMERGE_VALUES(
I);
3359 case TargetOpcode::G_BUILD_VECTOR:
3360 case TargetOpcode::G_BUILD_VECTOR_TRUNC:
3361 return selectG_BUILD_VECTOR(
I);
3362 case TargetOpcode::G_PTR_ADD:
3365 return selectG_PTR_ADD(
I);
3366 case TargetOpcode::G_IMPLICIT_DEF:
3367 return selectG_IMPLICIT_DEF(
I);
3368 case TargetOpcode::G_FREEZE:
3369 return selectCOPY(
I);
3370 case TargetOpcode::G_INSERT:
3371 return selectG_INSERT(
I);
3372 case TargetOpcode::G_INTRINSIC:
3373 return selectG_INTRINSIC(
I);
3374 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3375 return selectG_INTRINSIC_W_SIDE_EFFECTS(
I);
3376 case TargetOpcode::G_ICMP:
3377 if (selectG_ICMP(
I))
3380 case TargetOpcode::G_LOAD:
3381 case TargetOpcode::G_STORE:
3382 case TargetOpcode::G_ATOMIC_CMPXCHG:
3383 case TargetOpcode::G_ATOMICRMW_XCHG:
3384 case TargetOpcode::G_ATOMICRMW_ADD:
3385 case TargetOpcode::G_ATOMICRMW_SUB:
3386 case TargetOpcode::G_ATOMICRMW_AND:
3387 case TargetOpcode::G_ATOMICRMW_OR:
3388 case TargetOpcode::G_ATOMICRMW_XOR:
3389 case TargetOpcode::G_ATOMICRMW_MIN:
3390 case TargetOpcode::G_ATOMICRMW_MAX:
3391 case TargetOpcode::G_ATOMICRMW_UMIN:
3392 case TargetOpcode::G_ATOMICRMW_UMAX:
3393 case TargetOpcode::G_ATOMICRMW_UINC_WRAP:
3394 case TargetOpcode::G_ATOMICRMW_UDEC_WRAP:
3395 case TargetOpcode::G_ATOMICRMW_FADD:
3396 case AMDGPU::G_AMDGPU_ATOMIC_FMIN:
3397 case AMDGPU::G_AMDGPU_ATOMIC_FMAX:
3398 return selectG_LOAD_STORE_ATOMICRMW(
I);
3399 case TargetOpcode::G_SELECT:
3400 return selectG_SELECT(
I);
3401 case TargetOpcode::G_TRUNC:
3402 return selectG_TRUNC(
I);
3403 case TargetOpcode::G_SEXT:
3404 case TargetOpcode::G_ZEXT:
3405 case TargetOpcode::G_ANYEXT:
3406 case TargetOpcode::G_SEXT_INREG:
3409 return selectG_SZA_EXT(
I);
3410 case TargetOpcode::G_BRCOND:
3411 return selectG_BRCOND(
I);
3412 case TargetOpcode::G_GLOBAL_VALUE:
3413 return selectG_GLOBAL_VALUE(
I);
3414 case TargetOpcode::G_PTRMASK:
3415 return selectG_PTRMASK(
I);
3416 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3417 return selectG_EXTRACT_VECTOR_ELT(
I);
3418 case TargetOpcode::G_INSERT_VECTOR_ELT:
3419 return selectG_INSERT_VECTOR_ELT(
I);
3420 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
3421 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
3422 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
3423 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
3426 assert(
Intr &&
"not an image intrinsic with image pseudo");
3427 return selectImageIntrinsic(
I,
Intr);
3429 case AMDGPU::G_AMDGPU_INTRIN_BVH_INTERSECT_RAY:
3430 return selectBVHIntrinsic(
I);
3431 case AMDGPU::G_SBFX:
3432 case AMDGPU::G_UBFX:
3433 return selectG_SBFX_UBFX(
I);
3434 case AMDGPU::G_SI_CALL:
3435 I.setDesc(TII.get(AMDGPU::SI_CALL));
3437 case AMDGPU::G_AMDGPU_WAVE_ADDRESS:
3438 return selectWaveAddress(
I);
3446AMDGPUInstructionSelector::selectVCSRC(
MachineOperand &Root)
const {
3453std::pair<Register, unsigned> AMDGPUInstructionSelector::selectVOP3ModsImpl(
3459 if (
MI->getOpcode() == AMDGPU::G_FNEG) {
3460 Src =
MI->getOperand(1).getReg();
3465 if (AllowAbs &&
MI->getOpcode() == AMDGPU::G_FABS) {
3466 Src =
MI->getOperand(1).getReg();
3473 return std::pair(Src, Mods);
3476Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
3478 bool ForceVGPR)
const {
3479 if ((Mods != 0 || ForceVGPR) &&
3487 TII.get(AMDGPU::COPY), VGPRSrc)
3499AMDGPUInstructionSelector::selectVSRC0(
MachineOperand &Root)
const {
3506AMDGPUInstructionSelector::selectVOP3Mods0(
MachineOperand &Root)
const {
3509 std::tie(Src, Mods) = selectVOP3ModsImpl(Root);
3513 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
3522AMDGPUInstructionSelector::selectVOP3BMods0(
MachineOperand &Root)
const {
3525 std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
false);
3529 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
3538AMDGPUInstructionSelector::selectVOP3OMods(
MachineOperand &Root)
const {
3547AMDGPUInstructionSelector::selectVOP3Mods(
MachineOperand &Root)
const {
3550 std::tie(Src, Mods) = selectVOP3ModsImpl(Root);
3554 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
3561AMDGPUInstructionSelector::selectVOP3BMods(
MachineOperand &Root)
const {
3564 std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
false);
3568 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
3575AMDGPUInstructionSelector::selectVOP3NoMods(
MachineOperand &Root)
const {
3578 if (
Def->getOpcode() == AMDGPU::G_FNEG ||
Def->getOpcode() == AMDGPU::G_FABS)
3585std::pair<Register, unsigned>
3586AMDGPUInstructionSelector::selectVOP3PModsImpl(
3591 if (
MI &&
MI->getOpcode() == AMDGPU::G_FNEG &&
3596 Src =
MI->getOperand(1).getReg();
3597 MI =
MRI.getVRegDef(Src);
3606 return std::pair(Src, Mods);
3610AMDGPUInstructionSelector::selectVOP3PMods(
MachineOperand &Root)
const {
3616 std::tie(Src, Mods) = selectVOP3PModsImpl(Root.
getReg(), MRI);
3625AMDGPUInstructionSelector::selectVOP3PModsDOT(
MachineOperand &Root)
const {
3631 std::tie(Src, Mods) = selectVOP3PModsImpl(Root.
getReg(), MRI,
true);
3640AMDGPUInstructionSelector::selectDotIUVOP3PMods(
MachineOperand &Root)
const {
3645 "expected i1 value");
3655AMDGPUInstructionSelector::selectWMMAOpSelVOP3PMods(
3658 "expected i1 value");
3669AMDGPUInstructionSelector::selectVOP3OpSelMods(
MachineOperand &Root)
const {
3672 std::tie(Src, Mods) = selectVOP3ModsImpl(Root);
3682AMDGPUInstructionSelector::selectVINTERPMods(
MachineOperand &Root)
const {
3685 std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
3692 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
3699AMDGPUInstructionSelector::selectVINTERPModsHi(
MachineOperand &Root)
const {
3702 std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
3709 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
3715bool AMDGPUInstructionSelector::selectSmrdOffset(
MachineOperand &Root,
3725 getAddrModeInfo(*
MI, *MRI, AddrInfo);
3727 if (AddrInfo.
empty())
3730 const GEPInfo &GEPI = AddrInfo[0];
3731 std::optional<int64_t> EncodedImm =
3735 if (GEPI.SgprParts.size() == 1 && GEPI.Imm != 0 && EncodedImm &&
3736 AddrInfo.
size() > 1) {
3737 const GEPInfo &GEPI2 = AddrInfo[1];
3738 if (GEPI2.SgprParts.size() == 2 && GEPI2.Imm == 0) {
3741 Base = GEPI2.SgprParts[0];
3742 *SOffset = OffsetReg;
3751 if (
Offset && GEPI.SgprParts.size() == 1 && EncodedImm) {
3752 Base = GEPI.SgprParts[0];
3758 if (SOffset && GEPI.SgprParts.size() == 1 && isUInt<32>(GEPI.Imm) &&
3764 Base = GEPI.SgprParts[0];
3765 *SOffset =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
3766 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), *SOffset)
3771 if (SOffset && GEPI.SgprParts.size() && GEPI.Imm == 0) {
3773 Base = GEPI.SgprParts[0];
3774 *SOffset = OffsetReg;
3783AMDGPUInstructionSelector::selectSmrdImm(
MachineOperand &Root)
const {
3786 if (!selectSmrdOffset(Root,
Base,
nullptr, &
Offset))
3787 return std::nullopt;
3794AMDGPUInstructionSelector::selectSmrdImm32(
MachineOperand &Root)
const {
3796 getAddrModeInfo(*Root.
getParent(), *MRI, AddrInfo);
3798 if (AddrInfo.
empty() || AddrInfo[0].SgprParts.size() != 1)
3799 return std::nullopt;
3801 const GEPInfo &GEPInfo = AddrInfo[0];
3802 Register PtrReg = GEPInfo.SgprParts[0];
3803 std::optional<int64_t> EncodedImm =
3806 return std::nullopt;
3815AMDGPUInstructionSelector::selectSmrdSgpr(
MachineOperand &Root)
const {
3817 if (!selectSmrdOffset(Root,
Base, &SOffset,
nullptr))
3818 return std::nullopt;
3825AMDGPUInstructionSelector::selectSmrdSgprImm(
MachineOperand &Root)
const {
3828 if (!selectSmrdOffset(Root,
Base, &SOffset, &
Offset))
3829 return std::nullopt;
3836std::pair<Register, int>
3837AMDGPUInstructionSelector::selectFlatOffsetImpl(
MachineOperand &Root,
3847 int64_t ConstOffset;
3848 std::tie(PtrBase, ConstOffset) =
3849 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
3850 if (ConstOffset == 0 || !isFlatScratchBaseLegal(PtrBase, FlatVariant))
3857 return std::pair(PtrBase, ConstOffset);
3861AMDGPUInstructionSelector::selectFlatOffset(
MachineOperand &Root)
const {
3871AMDGPUInstructionSelector::selectGlobalOffset(
MachineOperand &Root)
const {
3881AMDGPUInstructionSelector::selectScratchOffset(
MachineOperand &Root)
const {
3892AMDGPUInstructionSelector::selectGlobalSAddr(
MachineOperand &Root)
const {
3895 int64_t ConstOffset;
3896 int64_t ImmOffset = 0;
3900 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(
Addr, *MRI);
3902 if (ConstOffset != 0) {
3906 ImmOffset = ConstOffset;
3909 if (isSGPR(PtrBaseDef->Reg)) {
3910 if (ConstOffset > 0) {
3916 int64_t SplitImmOffset, RemainderOffset;
3920 if (isUInt<32>(RemainderOffset)) {
3924 MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3926 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
3928 .
addImm(RemainderOffset);
3945 unsigned NumLiterals =
3949 return std::nullopt;
3956 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
3961 if (isSGPR(SAddr)) {
3962 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
3982 if (AddrDef->MI->getOpcode() == AMDGPU::G_IMPLICIT_DEF ||
3983 AddrDef->MI->getOpcode() == AMDGPU::G_CONSTANT || !isSGPR(AddrDef->Reg))
3984 return std::nullopt;
3990 Register VOffset =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3992 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32), VOffset)
4003AMDGPUInstructionSelector::selectScratchSAddr(
MachineOperand &Root)
const {
4006 int64_t ConstOffset;
4007 int64_t ImmOffset = 0;
4011 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(
Addr, *MRI);
4013 if (ConstOffset != 0 && isFlatScratchBaseLegal(PtrBase) &&
4017 ImmOffset = ConstOffset;
4021 if (AddrDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
4022 int FI = AddrDef->MI->getOperand(1).getIndex();
4031 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
4032 Register LHS = AddrDef->MI->getOperand(1).getReg();
4033 Register RHS = AddrDef->MI->getOperand(2).getReg();
4037 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX &&
4038 isSGPR(RHSDef->Reg)) {
4039 int FI = LHSDef->MI->getOperand(1).getIndex();
4043 SAddr =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
4045 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_I32), SAddr)
4052 return std::nullopt;
4061bool AMDGPUInstructionSelector::checkFlatScratchSVSSwizzleBug(
4069 auto VKnown =
KnownBits->getKnownBits(VAddr);
4071 true,
false,
KnownBits->getKnownBits(SAddr),
4073 uint64_t VMax = VKnown.getMaxValue().getZExtValue();
4075 return (VMax & 3) + (
SMax & 3) >= 4;
4079AMDGPUInstructionSelector::selectScratchSVAddr(
MachineOperand &Root)
const {
4082 int64_t ConstOffset;
4083 int64_t ImmOffset = 0;
4087 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(
Addr, *MRI);
4089 if (ConstOffset != 0 &&
4092 ImmOffset = ConstOffset;
4096 if (AddrDef->MI->getOpcode() != AMDGPU::G_PTR_ADD)
4097 return std::nullopt;
4099 Register RHS = AddrDef->MI->getOperand(2).getReg();
4101 return std::nullopt;
4103 Register LHS = AddrDef->MI->getOperand(1).getReg();
4106 if (!isFlatScratchBaseLegal(LHS) || !isFlatScratchBaseLegal(RHS))
4107 return std::nullopt;
4109 if (checkFlatScratchSVSSwizzleBug(RHS, LHS, ImmOffset))
4110 return std::nullopt;
4112 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
4113 int FI = LHSDef->MI->getOperand(1).getIndex();
4122 return std::nullopt;
4132AMDGPUInstructionSelector::selectMUBUFScratchOffen(
MachineOperand &Root)
const {
4141 Register HighBits =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4146 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
4170 std::optional<int> FI;
4174 int64_t ConstOffset;
4175 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(VAddr, *MRI);
4176 if (ConstOffset != 0) {
4181 if (PtrBaseDef->
getOpcode() == AMDGPU::G_FRAME_INDEX)
4187 }
else if (RootDef->getOpcode() == AMDGPU::G_FRAME_INDEX) {
4188 FI = RootDef->getOperand(1).getIndex();
4211bool AMDGPUInstructionSelector::isDSOffsetLegal(
Register Base,
4224bool AMDGPUInstructionSelector::isDSOffset2Legal(
Register Base, int64_t Offset0,
4226 unsigned Size)
const {
4227 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
4229 if (!isUInt<8>(Offset0 /
Size) || !isUInt<8>(Offset1 /
Size))
4240bool AMDGPUInstructionSelector::isFlatScratchBaseLegal(
4250bool AMDGPUInstructionSelector::isUnneededShiftMask(
const MachineInstr &
MI,
4251 unsigned ShAmtBits)
const {
4252 assert(
MI.getOpcode() == TargetOpcode::G_AND);
4254 std::optional<APInt>
RHS =
4259 if (
RHS->countr_one() >= ShAmtBits)
4262 const APInt &LHSKnownZeros =
4263 KnownBits->getKnownZeroes(
MI.getOperand(1).getReg());
4264 return (LHSKnownZeros | *RHS).countr_one() >= ShAmtBits;
4269 return Def->getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS
4270 ? Def->getOperand(1).getReg()
4275AMDGPUInstructionSelector::selectMUBUFScratchOffset(
4331std::pair<Register, unsigned>
4332AMDGPUInstructionSelector::selectDS1Addr1OffsetImpl(
MachineOperand &Root)
const {
4335 return std::pair(Root.
getReg(), 0);
4337 int64_t ConstAddr = 0;
4341 std::tie(PtrBase,
Offset) =
4342 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
4345 if (isDSOffsetLegal(PtrBase,
Offset)) {
4347 return std::pair(PtrBase,
Offset);
4349 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
4358 return std::pair(Root.
getReg(), 0);
4362AMDGPUInstructionSelector::selectDS1Addr1Offset(
MachineOperand &Root)
const {
4365 std::tie(Reg,
Offset) = selectDS1Addr1OffsetImpl(Root);
4373AMDGPUInstructionSelector::selectDS64Bit4ByteAligned(
MachineOperand &Root)
const {
4374 return selectDSReadWrite2(Root, 4);
4378AMDGPUInstructionSelector::selectDS128Bit8ByteAligned(
MachineOperand &Root)
const {
4379 return selectDSReadWrite2(Root, 8);
4383AMDGPUInstructionSelector::selectDSReadWrite2(
MachineOperand &Root,
4384 unsigned Size)
const {
4387 std::tie(Reg,
Offset) = selectDSReadWrite2Impl(Root,
Size);
4395std::pair<Register, unsigned>
4396AMDGPUInstructionSelector::selectDSReadWrite2Impl(
MachineOperand &Root,
4397 unsigned Size)
const {
4400 return std::pair(Root.
getReg(), 0);
4402 int64_t ConstAddr = 0;
4406 std::tie(PtrBase,
Offset) =
4407 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
4410 int64_t OffsetValue0 =
Offset;
4412 if (isDSOffset2Legal(PtrBase, OffsetValue0, OffsetValue1,
Size)) {
4414 return std::pair(PtrBase, OffsetValue0 /
Size);
4416 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
4424 return std::pair(Root.
getReg(), 0);
4431std::pair<Register, int64_t>
4432AMDGPUInstructionSelector::getPtrBaseWithConstantOffset(
4435 if (RootI->
getOpcode() != TargetOpcode::G_PTR_ADD)
4439 std::optional<ValueAndVReg> MaybeOffset =
4455 Register RSrc2 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
4456 Register RSrc3 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
4457 Register RSrcHi =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
4458 Register RSrc =
MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
4460 B.buildInstr(AMDGPU::S_MOV_B32)
4463 B.buildInstr(AMDGPU::S_MOV_B32)
4470 B.buildInstr(AMDGPU::REG_SEQUENCE)
4473 .addImm(AMDGPU::sub0)
4475 .addImm(AMDGPU::sub1);
4479 RSrcLo =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
4480 B.buildInstr(AMDGPU::S_MOV_B64)
4485 B.buildInstr(AMDGPU::REG_SEQUENCE)
4488 .addImm(AMDGPU::sub0_sub1)
4490 .addImm(AMDGPU::sub2_sub3);
4497 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
4506 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
4513AMDGPUInstructionSelector::MUBUFAddressData
4514AMDGPUInstructionSelector::parseMUBUFAddress(
Register Src)
const {
4515 MUBUFAddressData
Data;
4521 std::tie(PtrBase,
Offset) = getPtrBaseWithConstantOffset(Src, *MRI);
4522 if (isUInt<32>(
Offset)) {
4529 Data.N2 = InputAdd->getOperand(1).getReg();
4530 Data.N3 = InputAdd->getOperand(2).getReg();
4545bool AMDGPUInstructionSelector::shouldUseAddr64(MUBUFAddressData
Addr)
const {
4552 return N0Bank->
getID() == AMDGPU::VGPRRegBankID;
4558void AMDGPUInstructionSelector::splitIllegalMUBUFOffset(
4564 SOffset =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
4565 B.buildInstr(AMDGPU::S_MOV_B32)
4571bool AMDGPUInstructionSelector::selectMUBUFAddr64Impl(
4579 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
4580 if (!shouldUseAddr64(AddrData))
4586 Offset = AddrData.Offset;
4592 if (RBI.
getRegBank(N2, *MRI, TRI)->
getID() == AMDGPU::VGPRRegBankID) {
4594 if (RBI.
getRegBank(N3, *MRI, TRI)->
getID() == AMDGPU::VGPRRegBankID) {
4607 }
else if (RBI.
getRegBank(N0, *MRI, TRI)->
getID() == AMDGPU::VGPRRegBankID) {
4618 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
4622bool AMDGPUInstructionSelector::selectMUBUFOffsetImpl(
4630 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
4631 if (shouldUseAddr64(AddrData))
4637 Offset = AddrData.Offset;
4643 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
4648AMDGPUInstructionSelector::selectMUBUFAddr64(
MachineOperand &Root)
const {
4654 if (!selectMUBUFAddr64Impl(Root, VAddr, RSrcReg, SOffset,
Offset))
4682AMDGPUInstructionSelector::selectMUBUFOffset(
MachineOperand &Root)
const {
4687 if (!selectMUBUFOffsetImpl(Root, RSrcReg, SOffset,
Offset))