29#include "llvm/IR/IntrinsicsAMDGPU.h"
32#define DEBUG_TYPE "amdgpu-isel"
35using namespace MIPatternMatch;
37#define GET_GLOBALISEL_IMPL
38#define AMDGPUSubtarget GCNSubtarget
39#include "AMDGPUGenGlobalISel.inc"
40#undef GET_GLOBALISEL_IMPL
46 :
TII(*STI.getInstrInfo()),
TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM),
50#include
"AMDGPUGenGlobalISel.inc"
53#include
"AMDGPUGenGlobalISel.inc"
72 return Def->getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS
73 ? Def->getOperand(1).getReg()
77bool AMDGPUInstructionSelector::isVCC(
Register Reg,
83 auto &RegClassOrBank =
MRI.getRegClassOrRegBank(Reg);
87 const LLT Ty =
MRI.getType(Reg);
91 return MRI.getVRegDef(Reg)->getOpcode() != AMDGPU::G_TRUNC &&
96 return RB->
getID() == AMDGPU::VCCRegBankID;
99bool AMDGPUInstructionSelector::constrainCopyLikeIntrin(
MachineInstr &
MI,
100 unsigned NewOpc)
const {
101 MI.setDesc(TII.get(NewOpc));
116 if (!DstRC || DstRC != SrcRC)
123bool AMDGPUInstructionSelector::selectCOPY(
MachineInstr &
I)
const {
126 I.setDesc(TII.get(TargetOpcode::COPY));
133 if (isVCC(DstReg, *MRI)) {
134 if (SrcReg == AMDGPU::SCC) {
142 if (!isVCC(SrcReg, *MRI)) {
150 std::optional<ValueAndVReg> ConstVal =
154 STI.
isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
156 .
addImm(ConstVal->Value.getBoolValue() ? -1 : 0);
158 Register MaskedReg =
MRI->createVirtualRegister(SrcRC);
166 const int64_t NoMods = 0;
167 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_AND_B16_t16_e64), MaskedReg)
173 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CMP_NE_U16_t16_e64), DstReg)
181 unsigned AndOpc = IsSGPR ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32;
186 And.setOperandDead(3);
188 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
194 if (!
MRI->getRegClassOrNull(SrcReg))
195 MRI->setRegClass(SrcReg, SrcRC);
209 if (MO.getReg().isPhysical())
221bool AMDGPUInstructionSelector::selectPHI(
MachineInstr &
I)
const {
222 const Register DefReg =
I.getOperand(0).getReg();
223 const LLT DefTy =
MRI->getType(DefReg);
235 MRI->getRegClassOrRegBank(DefReg);
254 I.setDesc(TII.get(TargetOpcode::PHI));
261 unsigned SubIdx)
const {
265 Register DstReg =
MRI->createVirtualRegister(&SubRC);
268 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.
getSubReg(), SubIdx);
270 BuildMI(*BB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
271 .
addReg(Reg, 0, ComposedSubIdx);
296 return Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
298 return Is64 ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32;
300 return Is64 ? AMDGPU::S_XOR_B64 : AMDGPU::S_XOR_B32;
306bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(
MachineInstr &
I)
const {
307 Register DstReg =
I.getOperand(0).getReg();
311 if (DstRB->
getID() != AMDGPU::SGPRRegBankID &&
312 DstRB->
getID() != AMDGPU::VCCRegBankID)
315 bool Is64 =
Size > 32 || (DstRB->
getID() == AMDGPU::VCCRegBankID &&
327bool AMDGPUInstructionSelector::selectG_ADD_SUB(
MachineInstr &
I)
const {
330 Register DstReg =
I.getOperand(0).getReg();
332 LLT Ty =
MRI->getType(DstReg);
338 const bool IsSALU = DstRB->
getID() == AMDGPU::SGPRRegBankID;
339 const bool Sub =
I.getOpcode() == TargetOpcode::G_SUB;
343 const unsigned Opc = Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
346 .
add(
I.getOperand(1))
347 .
add(
I.getOperand(2))
354 const unsigned Opc = Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64;
355 I.setDesc(TII.get(Opc));
361 const unsigned Opc = Sub ? AMDGPU::V_SUB_CO_U32_e64 : AMDGPU::V_ADD_CO_U32_e64;
367 .
add(
I.getOperand(1))
368 .
add(
I.getOperand(2))
374 assert(!Sub &&
"illegal sub should not reach here");
377 = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass;
379 = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass;
381 MachineOperand Lo1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub0));
382 MachineOperand Lo2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub0));
383 MachineOperand Hi1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub1));
384 MachineOperand Hi2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub1));
386 Register DstLo =
MRI->createVirtualRegister(&HalfRC);
387 Register DstHi =
MRI->createVirtualRegister(&HalfRC);
390 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
393 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
399 Register CarryReg =
MRI->createVirtualRegister(CarryRC);
400 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_ADD_CO_U32_e64), DstLo)
416 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
430bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE(
435 Register Dst0Reg =
I.getOperand(0).getReg();
436 Register Dst1Reg =
I.getOperand(1).getReg();
437 const bool IsAdd =
I.getOpcode() == AMDGPU::G_UADDO ||
438 I.getOpcode() == AMDGPU::G_UADDE;
439 const bool HasCarryIn =
I.getOpcode() == AMDGPU::G_UADDE ||
440 I.getOpcode() == AMDGPU::G_USUBE;
442 if (isVCC(Dst1Reg, *MRI)) {
443 unsigned NoCarryOpc =
444 IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
445 unsigned CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
446 I.setDesc(TII.get(HasCarryIn ? CarryOpc : NoCarryOpc));
452 Register Src0Reg =
I.getOperand(2).getReg();
453 Register Src1Reg =
I.getOperand(3).getReg();
456 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
457 .
addReg(
I.getOperand(4).getReg());
460 unsigned NoCarryOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
461 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
463 auto CarryInst =
BuildMI(*BB, &
I,
DL, TII.get(HasCarryIn ? CarryOpc : NoCarryOpc), Dst0Reg)
464 .
add(
I.getOperand(2))
465 .
add(
I.getOperand(3));
467 if (
MRI->use_nodbg_empty(Dst1Reg)) {
470 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), Dst1Reg)
472 if (!
MRI->getRegClassOrNull(Dst1Reg))
473 MRI->setRegClass(Dst1Reg, &AMDGPU::SReg_32RegClass);
483 AMDGPU::SReg_32RegClass, *MRI))
490bool AMDGPUInstructionSelector::selectG_AMDGPU_MAD_64_32(
494 const bool IsUnsigned =
I.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;
498 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_gfx11_e64
499 : AMDGPU::V_MAD_I64_I32_gfx11_e64;
501 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_e64 : AMDGPU::V_MAD_I64_I32_e64;
502 I.setDesc(TII.get(Opc));
504 I.addImplicitDefUseOperands(*
MF);
509bool AMDGPUInstructionSelector::selectG_EXTRACT(
MachineInstr &
I)
const {
511 Register DstReg =
I.getOperand(0).getReg();
512 Register SrcReg =
I.getOperand(1).getReg();
513 LLT DstTy =
MRI->getType(DstReg);
514 LLT SrcTy =
MRI->getType(SrcReg);
519 unsigned Offset =
I.getOperand(2).getImm();
520 if (
Offset % 32 != 0 || DstSize > 128)
540 SrcRC = TRI.getSubClassWithSubReg(SrcRC,
SubReg);
545 *SrcRC,
I.getOperand(1));
547 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::COPY), DstReg)
554bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(
MachineInstr &
MI)
const {
557 LLT DstTy =
MRI->getType(DstReg);
558 LLT SrcTy =
MRI->getType(
MI.getOperand(1).getReg());
574 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg);
575 for (
int I = 0, E =
MI.getNumOperands() - 1;
I != E; ++
I) {
589 MI.eraseFromParent();
593bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(
MachineInstr &
MI)
const {
595 const int NumDst =
MI.getNumOperands() - 1;
601 LLT DstTy =
MRI->getType(DstReg0);
602 LLT SrcTy =
MRI->getType(SrcReg);
618 for (
int I = 0, E = NumDst;
I != E; ++
I) {
620 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::COPY), Dst.getReg())
621 .
addReg(SrcReg, 0, SubRegs[
I]);
624 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegs[
I]);
634 MI.eraseFromParent();
638bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR(
MachineInstr &
MI)
const {
639 assert(
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC ||
640 MI.getOpcode() == AMDGPU::G_BUILD_VECTOR);
644 LLT SrcTy =
MRI->getType(Src0);
648 if (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR && SrcSize >= 32) {
649 return selectG_MERGE_VALUES(
MI);
656 (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC &&
661 if (DstBank->
getID() == AMDGPU::AGPRRegBankID)
664 assert(DstBank->
getID() == AMDGPU::SGPRRegBankID ||
665 DstBank->
getID() == AMDGPU::VGPRRegBankID);
666 const bool IsVector = DstBank->
getID() == AMDGPU::VGPRRegBankID;
679 const int64_t K0 = ConstSrc0->Value.getSExtValue();
680 const int64_t K1 = ConstSrc1->Value.getSExtValue();
688 MI.eraseFromParent();
694 MI.eraseFromParent();
706 if (Src1Def->
getOpcode() == AMDGPU::G_IMPLICIT_DEF) {
707 MI.setDesc(TII.get(AMDGPU::COPY));
710 IsVector ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
717 Register TmpReg =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
718 auto MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_AND_B32_e32), TmpReg)
724 MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_LSHL_OR_B32_e64), Dst)
731 MI.eraseFromParent();
756 unsigned Opc = AMDGPU::S_PACK_LL_B32_B16;
757 if (Shift0 && Shift1) {
758 Opc = AMDGPU::S_PACK_HH_B32_B16;
759 MI.getOperand(1).setReg(ShiftSrc0);
760 MI.getOperand(2).setReg(ShiftSrc1);
762 Opc = AMDGPU::S_PACK_LH_B32_B16;
763 MI.getOperand(2).setReg(ShiftSrc1);
767 if (ConstSrc1 && ConstSrc1->Value == 0) {
769 auto MIB =
BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_LSHR_B32), Dst)
774 MI.eraseFromParent();
778 Opc = AMDGPU::S_PACK_HL_B32_B16;
779 MI.getOperand(1).setReg(ShiftSrc0);
783 MI.setDesc(TII.get(Opc));
787bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(
MachineInstr &
I)
const {
793 if ((!RC && !
MRI->getRegBankOrNull(MO.
getReg())) ||
795 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
802bool AMDGPUInstructionSelector::selectG_INSERT(
MachineInstr &
I)
const {
805 Register DstReg =
I.getOperand(0).getReg();
806 Register Src0Reg =
I.getOperand(1).getReg();
807 Register Src1Reg =
I.getOperand(2).getReg();
808 LLT Src1Ty =
MRI->getType(Src1Reg);
810 unsigned DstSize =
MRI->getType(DstReg).getSizeInBits();
813 int64_t
Offset =
I.getOperand(3).getImm();
816 if (
Offset % 32 != 0 || InsSize % 32 != 0)
824 if (
SubReg == AMDGPU::NoSubRegister)
842 Src0RC = TRI.getSubClassWithSubReg(Src0RC,
SubReg);
843 if (!Src0RC || !Src1RC)
852 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::INSERT_SUBREG), DstReg)
861bool AMDGPUInstructionSelector::selectG_SBFX_UBFX(
MachineInstr &
MI)
const {
864 Register OffsetReg =
MI.getOperand(2).getReg();
865 Register WidthReg =
MI.getOperand(3).getReg();
868 "scalar BFX instructions are expanded in regbankselect");
869 assert(
MRI->getType(
MI.getOperand(0).getReg()).getSizeInBits() == 32 &&
870 "64-bit vector BFX instructions are expanded in regbankselect");
875 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SBFX;
876 unsigned Opc = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
881 MI.eraseFromParent();
885bool AMDGPUInstructionSelector::selectInterpP1F16(
MachineInstr &
MI)
const {
904 Register InterpMov =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
910 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_INTERP_MOV_F32), InterpMov)
926 MI.eraseFromParent();
935bool AMDGPUInstructionSelector::selectWritelane(
MachineInstr &
MI)
const {
944 Register LaneSelect =
MI.getOperand(3).getReg();
947 auto MIB =
BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_WRITELANE_B32), VDst);
949 std::optional<ValueAndVReg> ConstSelect =
955 MIB.
addImm(ConstSelect->Value.getSExtValue() &
958 std::optional<ValueAndVReg> ConstVal =
965 MIB.
addImm(ConstVal->Value.getSExtValue());
975 BuildMI(*
MBB, *MIB,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
983 MI.eraseFromParent();
989bool AMDGPUInstructionSelector::selectDivScale(
MachineInstr &
MI)
const {
993 LLT Ty =
MRI->getType(Dst0);
996 Opc = AMDGPU::V_DIV_SCALE_F32_e64;
998 Opc = AMDGPU::V_DIV_SCALE_F64_e64;
1009 unsigned ChooseDenom =
MI.getOperand(5).getImm();
1011 Register Src0 = ChooseDenom != 0 ? Numer : Denom;
1024 MI.eraseFromParent();
1028bool AMDGPUInstructionSelector::selectG_INTRINSIC(
MachineInstr &
I)
const {
1029 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(
I).getIntrinsicID();
1030 switch (IntrinsicID) {
1031 case Intrinsic::amdgcn_if_break: {
1036 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))
1037 .
add(
I.getOperand(0))
1038 .
add(
I.getOperand(2))
1039 .
add(
I.getOperand(3));
1041 Register DstReg =
I.getOperand(0).getReg();
1042 Register Src0Reg =
I.getOperand(2).getReg();
1043 Register Src1Reg =
I.getOperand(3).getReg();
1045 I.eraseFromParent();
1047 for (
Register Reg : { DstReg, Src0Reg, Src1Reg })
1052 case Intrinsic::amdgcn_interp_p1_f16:
1053 return selectInterpP1F16(
I);
1054 case Intrinsic::amdgcn_wqm:
1055 return constrainCopyLikeIntrin(
I, AMDGPU::WQM);
1056 case Intrinsic::amdgcn_softwqm:
1057 return constrainCopyLikeIntrin(
I, AMDGPU::SOFT_WQM);
1058 case Intrinsic::amdgcn_strict_wwm:
1059 case Intrinsic::amdgcn_wwm:
1060 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WWM);
1061 case Intrinsic::amdgcn_strict_wqm:
1062 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WQM);
1063 case Intrinsic::amdgcn_writelane:
1064 return selectWritelane(
I);
1065 case Intrinsic::amdgcn_div_scale:
1066 return selectDivScale(
I);
1067 case Intrinsic::amdgcn_icmp:
1068 case Intrinsic::amdgcn_fcmp:
1071 return selectIntrinsicCmp(
I);
1072 case Intrinsic::amdgcn_ballot:
1073 return selectBallot(
I);
1074 case Intrinsic::amdgcn_reloc_constant:
1075 return selectRelocConstant(
I);
1076 case Intrinsic::amdgcn_groupstaticsize:
1077 return selectGroupStaticSize(
I);
1078 case Intrinsic::returnaddress:
1079 return selectReturnAddress(
I);
1080 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
1081 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
1082 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
1083 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
1084 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
1085 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
1086 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
1087 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
1088 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
1089 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
1090 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
1091 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
1092 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
1093 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
1094 return selectSMFMACIntrin(
I);
1105 if (
Size == 16 && !ST.has16BitInsts())
1108 const auto Select = [&](
unsigned S16Opc,
unsigned TrueS16Opc,
unsigned S32Opc,
1111 return ST.hasTrue16BitInsts() ? TrueS16Opc : S16Opc;
1121 return Select(AMDGPU::V_CMP_NE_U16_e64, AMDGPU::V_CMP_NE_U16_t16_e64,
1122 AMDGPU::V_CMP_NE_U32_e64, AMDGPU::V_CMP_NE_U64_e64);
1124 return Select(AMDGPU::V_CMP_EQ_U16_e64, AMDGPU::V_CMP_EQ_U16_t16_e64,
1125 AMDGPU::V_CMP_EQ_U32_e64, AMDGPU::V_CMP_EQ_U64_e64);
1127 return Select(AMDGPU::V_CMP_GT_I16_e64, AMDGPU::V_CMP_GT_I16_t16_e64,
1128 AMDGPU::V_CMP_GT_I32_e64, AMDGPU::V_CMP_GT_I64_e64);
1130 return Select(AMDGPU::V_CMP_GE_I16_e64, AMDGPU::V_CMP_GE_I16_t16_e64,
1131 AMDGPU::V_CMP_GE_I32_e64, AMDGPU::V_CMP_GE_I64_e64);
1133 return Select(AMDGPU::V_CMP_LT_I16_e64, AMDGPU::V_CMP_LT_I16_t16_e64,
1134 AMDGPU::V_CMP_LT_I32_e64, AMDGPU::V_CMP_LT_I64_e64);
1136 return Select(AMDGPU::V_CMP_LE_I16_e64, AMDGPU::V_CMP_LE_I16_t16_e64,
1137 AMDGPU::V_CMP_LE_I32_e64, AMDGPU::V_CMP_LE_I64_e64);
1139 return Select(AMDGPU::V_CMP_GT_U16_e64, AMDGPU::V_CMP_GT_U16_t16_e64,
1140 AMDGPU::V_CMP_GT_U32_e64, AMDGPU::V_CMP_GT_U64_e64);
1142 return Select(AMDGPU::V_CMP_GE_U16_e64, AMDGPU::V_CMP_GE_U16_t16_e64,
1143 AMDGPU::V_CMP_GE_U32_e64, AMDGPU::V_CMP_GE_U64_e64);
1145 return Select(AMDGPU::V_CMP_LT_U16_e64, AMDGPU::V_CMP_LT_U16_t16_e64,
1146 AMDGPU::V_CMP_LT_U32_e64, AMDGPU::V_CMP_LT_U64_e64);
1148 return Select(AMDGPU::V_CMP_LE_U16_e64, AMDGPU::V_CMP_LE_U16_t16_e64,
1149 AMDGPU::V_CMP_LE_U32_e64, AMDGPU::V_CMP_LE_U64_e64);
1152 return Select(AMDGPU::V_CMP_EQ_F16_e64, AMDGPU::V_CMP_EQ_F16_t16_e64,
1153 AMDGPU::V_CMP_EQ_F32_e64, AMDGPU::V_CMP_EQ_F64_e64);
1155 return Select(AMDGPU::V_CMP_GT_F16_e64, AMDGPU::V_CMP_GT_F16_t16_e64,
1156 AMDGPU::V_CMP_GT_F32_e64, AMDGPU::V_CMP_GT_F64_e64);
1158 return Select(AMDGPU::V_CMP_GE_F16_e64, AMDGPU::V_CMP_GE_F16_t16_e64,
1159 AMDGPU::V_CMP_GE_F32_e64, AMDGPU::V_CMP_GE_F64_e64);
1161 return Select(AMDGPU::V_CMP_LT_F16_e64, AMDGPU::V_CMP_LT_F16_t16_e64,
1162 AMDGPU::V_CMP_LT_F32_e64, AMDGPU::V_CMP_LT_F64_e64);
1164 return Select(AMDGPU::V_CMP_LE_F16_e64, AMDGPU::V_CMP_LE_F16_t16_e64,
1165 AMDGPU::V_CMP_LE_F32_e64, AMDGPU::V_CMP_LE_F64_e64);
1167 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1168 AMDGPU::V_CMP_NEQ_F32_e64, AMDGPU::V_CMP_NEQ_F64_e64);
1170 return Select(AMDGPU::V_CMP_O_F16_e64, AMDGPU::V_CMP_O_F16_t16_e64,
1171 AMDGPU::V_CMP_O_F32_e64, AMDGPU::V_CMP_O_F64_e64);
1173 return Select(AMDGPU::V_CMP_U_F16_e64, AMDGPU::V_CMP_U_F16_t16_e64,
1174 AMDGPU::V_CMP_U_F32_e64, AMDGPU::V_CMP_U_F64_e64);
1176 return Select(AMDGPU::V_CMP_NLG_F16_e64, AMDGPU::V_CMP_NLG_F16_t16_e64,
1177 AMDGPU::V_CMP_NLG_F32_e64, AMDGPU::V_CMP_NLG_F64_e64);
1179 return Select(AMDGPU::V_CMP_NLE_F16_e64, AMDGPU::V_CMP_NLE_F16_t16_e64,
1180 AMDGPU::V_CMP_NLE_F32_e64, AMDGPU::V_CMP_NLE_F64_e64);
1182 return Select(AMDGPU::V_CMP_NLT_F16_e64, AMDGPU::V_CMP_NLT_F16_t16_e64,
1183 AMDGPU::V_CMP_NLT_F32_e64, AMDGPU::V_CMP_NLT_F64_e64);
1185 return Select(AMDGPU::V_CMP_NGE_F16_e64, AMDGPU::V_CMP_NGE_F16_t16_e64,
1186 AMDGPU::V_CMP_NGE_F32_e64, AMDGPU::V_CMP_NGE_F64_e64);
1188 return Select(AMDGPU::V_CMP_NGT_F16_e64, AMDGPU::V_CMP_NGT_F16_t16_e64,
1189 AMDGPU::V_CMP_NGT_F32_e64, AMDGPU::V_CMP_NGT_F64_e64);
1191 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1192 AMDGPU::V_CMP_NEQ_F32_e64, AMDGPU::V_CMP_NEQ_F64_e64);
1194 return Select(AMDGPU::V_CMP_TRU_F16_e64, AMDGPU::V_CMP_TRU_F16_t16_e64,
1195 AMDGPU::V_CMP_TRU_F32_e64, AMDGPU::V_CMP_TRU_F64_e64);
1197 return Select(AMDGPU::V_CMP_F_F16_e64, AMDGPU::V_CMP_F_F16_t16_e64,
1198 AMDGPU::V_CMP_F_F32_e64, AMDGPU::V_CMP_F_F64_e64);
1203 unsigned Size)
const {
1210 return AMDGPU::S_CMP_LG_U64;
1212 return AMDGPU::S_CMP_EQ_U64;
1221 return AMDGPU::S_CMP_LG_U32;
1223 return AMDGPU::S_CMP_EQ_U32;
1225 return AMDGPU::S_CMP_GT_I32;
1227 return AMDGPU::S_CMP_GE_I32;
1229 return AMDGPU::S_CMP_LT_I32;
1231 return AMDGPU::S_CMP_LE_I32;
1233 return AMDGPU::S_CMP_GT_U32;
1235 return AMDGPU::S_CMP_GE_U32;
1237 return AMDGPU::S_CMP_LT_U32;
1239 return AMDGPU::S_CMP_LE_U32;
1241 return AMDGPU::S_CMP_EQ_F32;
1243 return AMDGPU::S_CMP_GT_F32;
1245 return AMDGPU::S_CMP_GE_F32;
1247 return AMDGPU::S_CMP_LT_F32;
1249 return AMDGPU::S_CMP_LE_F32;
1251 return AMDGPU::S_CMP_LG_F32;
1253 return AMDGPU::S_CMP_O_F32;
1255 return AMDGPU::S_CMP_U_F32;
1257 return AMDGPU::S_CMP_NLG_F32;
1259 return AMDGPU::S_CMP_NLE_F32;
1261 return AMDGPU::S_CMP_NLT_F32;
1263 return AMDGPU::S_CMP_NGE_F32;
1265 return AMDGPU::S_CMP_NGT_F32;
1267 return AMDGPU::S_CMP_NEQ_F32;
1279 return AMDGPU::S_CMP_EQ_F16;
1281 return AMDGPU::S_CMP_GT_F16;
1283 return AMDGPU::S_CMP_GE_F16;
1285 return AMDGPU::S_CMP_LT_F16;
1287 return AMDGPU::S_CMP_LE_F16;
1289 return AMDGPU::S_CMP_LG_F16;
1291 return AMDGPU::S_CMP_O_F16;
1293 return AMDGPU::S_CMP_U_F16;
1295 return AMDGPU::S_CMP_NLG_F16;
1297 return AMDGPU::S_CMP_NLE_F16;
1299 return AMDGPU::S_CMP_NLT_F16;
1301 return AMDGPU::S_CMP_NGE_F16;
1303 return AMDGPU::S_CMP_NGT_F16;
1305 return AMDGPU::S_CMP_NEQ_F16;
1314bool AMDGPUInstructionSelector::selectG_ICMP_or_FCMP(
MachineInstr &
I)
const {
1319 Register SrcReg =
I.getOperand(2).getReg();
1324 Register CCReg =
I.getOperand(0).getReg();
1325 if (!isVCC(CCReg, *MRI)) {
1326 int Opcode = getS_CMPOpcode(Pred,
Size);
1330 .
add(
I.getOperand(2))
1331 .
add(
I.getOperand(3));
1332 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CCReg)
1337 I.eraseFromParent();
1341 if (
I.getOpcode() == AMDGPU::G_FCMP)
1349 I.getOperand(0).getReg())
1350 .
add(
I.getOperand(2))
1351 .
add(
I.getOperand(3));
1355 I.eraseFromParent();
1359bool AMDGPUInstructionSelector::selectIntrinsicCmp(
MachineInstr &
I)
const {
1360 Register Dst =
I.getOperand(0).getReg();
1361 if (isVCC(Dst, *MRI))
1364 LLT DstTy =
MRI->getType(Dst);
1370 Register SrcReg =
I.getOperand(2).getReg();
1379 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), Dst);
1380 I.eraseFromParent();
1391 auto [Src0, Src0Mods] = selectVOP3ModsImpl(
LHS.getReg());
1392 auto [Src1, Src1Mods] = selectVOP3ModsImpl(
RHS.getReg());
1394 copyToVGPRIfSrcFolded(Src0, Src0Mods, LHS, &
I,
true);
1396 copyToVGPRIfSrcFolded(Src1, Src1Mods, RHS, &
I,
true);
1397 SelectedMI =
BuildMI(*BB, &
I,
DL, TII.get(Opcode), Dst);
1399 SelectedMI.
addImm(Src0Mods);
1400 SelectedMI.
addReg(Src0Reg);
1402 SelectedMI.
addImm(Src1Mods);
1403 SelectedMI.
addReg(Src1Reg);
1413 I.eraseFromParent();
1417bool AMDGPUInstructionSelector::selectBallot(
MachineInstr &
I)
const {
1420 Register DstReg =
I.getOperand(0).getReg();
1421 const unsigned Size =
MRI->getType(DstReg).getSizeInBits();
1422 const bool Is64 =
Size == 64;
1430 std::optional<ValueAndVReg> Arg =
1433 const auto BuildCopy = [&](
Register SrcReg) {
1435 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), DstReg)
1441 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1443 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
1451 const int64_t
Value = Arg->
Value.getSExtValue();
1453 unsigned Opcode = Is64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
1455 }
else if (
Value == -1)
1456 BuildCopy(IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC);
1460 BuildCopy(
I.getOperand(2).getReg());
1462 I.eraseFromParent();
1466bool AMDGPUInstructionSelector::selectRelocConstant(
MachineInstr &
I)
const {
1467 Register DstReg =
I.getOperand(0).getReg();
1473 const bool IsVALU = DstBank->
getID() == AMDGPU::VGPRRegBankID;
1478 auto RelocSymbol = cast<GlobalVariable>(
1483 TII.get(IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32), DstReg)
1486 I.eraseFromParent();
1490bool AMDGPUInstructionSelector::selectGroupStaticSize(
MachineInstr &
I)
const {
1493 Register DstReg =
I.getOperand(0).getReg();
1495 unsigned Mov = DstRB->
getID() == AMDGPU::SGPRRegBankID ?
1496 AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1513 I.eraseFromParent();
1517bool AMDGPUInstructionSelector::selectReturnAddress(
MachineInstr &
I)
const {
1524 unsigned Depth =
I.getOperand(2).getImm();
1537 I.eraseFromParent();
1548 AMDGPU::SReg_64RegClass,
DL);
1551 I.eraseFromParent();
1555bool AMDGPUInstructionSelector::selectEndCfIntrinsic(
MachineInstr &
MI)
const {
1559 BuildMI(*BB, &
MI,
MI.getDebugLoc(), TII.get(AMDGPU::SI_END_CF))
1560 .
add(
MI.getOperand(1));
1563 MI.eraseFromParent();
1565 if (!
MRI->getRegClassOrNull(Reg))
1570bool AMDGPUInstructionSelector::selectDSOrderedIntrinsic(
1576 unsigned IndexOperand =
MI.getOperand(7).getImm();
1577 bool WaveRelease =
MI.getOperand(8).getImm() != 0;
1578 bool WaveDone =
MI.getOperand(9).getImm() != 0;
1580 if (WaveDone && !WaveRelease)
1583 unsigned OrderedCountIndex = IndexOperand & 0x3f;
1584 IndexOperand &= ~0x3f;
1585 unsigned CountDw = 0;
1588 CountDw = (IndexOperand >> 24) & 0xf;
1589 IndexOperand &= ~(0xf << 24);
1591 if (CountDw < 1 || CountDw > 4) {
1593 "ds_ordered_count: dword count must be between 1 and 4");
1600 unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1;
1603 unsigned Offset0 = OrderedCountIndex << 2;
1604 unsigned Offset1 = WaveRelease | (WaveDone << 1) | (
Instruction << 4);
1607 Offset1 |= (CountDw - 1) << 6;
1610 Offset1 |= ShaderType << 2;
1612 unsigned Offset = Offset0 | (Offset1 << 8);
1621 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::DS_ORDERED_COUNT), DstReg)
1630 MI.eraseFromParent();
1636 case Intrinsic::amdgcn_ds_gws_init:
1637 return AMDGPU::DS_GWS_INIT;
1638 case Intrinsic::amdgcn_ds_gws_barrier:
1639 return AMDGPU::DS_GWS_BARRIER;
1640 case Intrinsic::amdgcn_ds_gws_sema_v:
1641 return AMDGPU::DS_GWS_SEMA_V;
1642 case Intrinsic::amdgcn_ds_gws_sema_br:
1643 return AMDGPU::DS_GWS_SEMA_BR;
1644 case Intrinsic::amdgcn_ds_gws_sema_p:
1645 return AMDGPU::DS_GWS_SEMA_P;
1646 case Intrinsic::amdgcn_ds_gws_sema_release_all:
1647 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
1653bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(
MachineInstr &
MI,
1655 if (!STI.
hasGWS() || (IID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
1660 const bool HasVSrc =
MI.getNumOperands() == 3;
1661 assert(HasVSrc ||
MI.getNumOperands() == 2);
1663 Register BaseOffset =
MI.getOperand(HasVSrc ? 2 : 1).getReg();
1665 if (OffsetRB->
getID() != AMDGPU::SGPRRegBankID)
1679 if (OffsetDef->
getOpcode() == AMDGPU::V_READFIRSTLANE_B32) {
1680 Readfirstlane = OffsetDef;
1685 if (OffsetDef->
getOpcode() == AMDGPU::G_CONSTANT) {
1695 std::tie(BaseOffset, ImmOffset) =
1698 if (Readfirstlane) {
1708 AMDGPU::SReg_32RegClass, *MRI))
1712 Register M0Base =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1740 MI.eraseFromParent();
1744bool AMDGPUInstructionSelector::selectDSAppendConsume(
MachineInstr &
MI,
1745 bool IsAppend)
const {
1746 Register PtrBase =
MI.getOperand(2).getReg();
1747 LLT PtrTy =
MRI->getType(PtrBase);
1751 std::tie(PtrBase,
Offset) = selectDS1Addr1OffsetImpl(
MI.getOperand(2));
1754 if (!isDSOffsetLegal(PtrBase,
Offset)) {
1755 PtrBase =
MI.getOperand(2).getReg();
1761 const unsigned Opc = IsAppend ? AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
1772 MI.eraseFromParent();
1776bool AMDGPUInstructionSelector::selectSBarrier(
MachineInstr &
MI)
const {
1783 MI.eraseFromParent();
1796 MI.eraseFromParent();
1808 TFE = (TexFailCtrl & 0x1) ?
true :
false;
1810 LWE = (TexFailCtrl & 0x2) ?
true :
false;
1813 return TexFailCtrl == 0;
1816bool AMDGPUInstructionSelector::selectImageIntrinsic(
1825 unsigned IntrOpcode =
Intr->BaseOpcode;
1830 const unsigned ArgOffset =
MI.getNumExplicitDefs() + 1;
1834 int NumVDataDwords = -1;
1835 bool IsD16 =
MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16 ||
1836 MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16;
1842 Unorm =
MI.getOperand(ArgOffset +
Intr->UnormIndex).getImm() != 0;
1846 bool IsTexFail =
false;
1848 TFE, LWE, IsTexFail))
1851 const int Flags =
MI.getOperand(ArgOffset +
Intr->NumArgs).getImm();
1852 const bool IsA16 = (
Flags & 1) != 0;
1853 const bool IsG16 = (
Flags & 2) != 0;
1856 if (IsA16 && !STI.
hasG16() && !IsG16)
1860 unsigned DMaskLanes = 0;
1862 if (BaseOpcode->
Atomic) {
1863 VDataOut =
MI.getOperand(0).getReg();
1864 VDataIn =
MI.getOperand(2).getReg();
1865 LLT Ty =
MRI->getType(VDataIn);
1868 const bool Is64Bit = BaseOpcode->
AtomicX2 ?
1873 assert(
MI.getOperand(3).getReg() == AMDGPU::NoRegister);
1875 DMask = Is64Bit ? 0xf : 0x3;
1876 NumVDataDwords = Is64Bit ? 4 : 2;
1878 DMask = Is64Bit ? 0x3 : 0x1;
1879 NumVDataDwords = Is64Bit ? 2 : 1;
1882 DMask =
MI.getOperand(ArgOffset +
Intr->DMaskIndex).getImm();
1885 if (BaseOpcode->
Store) {
1886 VDataIn =
MI.getOperand(1).getReg();
1887 VDataTy =
MRI->getType(VDataIn);
1892 VDataOut =
MI.getOperand(0).getReg();
1893 VDataTy =
MRI->getType(VDataOut);
1894 NumVDataDwords = DMaskLanes;
1897 NumVDataDwords = (DMaskLanes + 1) / 2;
1902 if (Subtarget->
hasG16() && IsG16) {
1906 IntrOpcode = G16MappingInfo->
G16;
1910 assert((!IsTexFail || DMaskLanes >= 1) &&
"should have legalized this");
1912 unsigned CPol =
MI.getOperand(ArgOffset +
Intr->CachePolicyIndex).getImm();
1919 int NumVAddrRegs = 0;
1920 int NumVAddrDwords = 0;
1921 for (
unsigned I =
Intr->VAddrStart; I < Intr->VAddrEnd;
I++) {
1924 if (!AddrOp.
isReg())
1932 NumVAddrDwords += (
MRI->getType(
Addr).getSizeInBits() + 31) / 32;
1939 NumVAddrRegs != 1 &&
1941 : NumVAddrDwords == NumVAddrRegs);
1942 if (UseNSA && !STI.hasFeature(AMDGPU::FeatureNSAEncoding)) {
1953 NumVDataDwords, NumVAddrDwords);
1954 }
else if (IsGFX11Plus) {
1956 UseNSA ? AMDGPU::MIMGEncGfx11NSA
1957 : AMDGPU::MIMGEncGfx11Default,
1958 NumVDataDwords, NumVAddrDwords);
1959 }
else if (IsGFX10Plus) {
1961 UseNSA ? AMDGPU::MIMGEncGfx10NSA
1962 : AMDGPU::MIMGEncGfx10Default,
1963 NumVDataDwords, NumVAddrDwords);
1967 NumVDataDwords, NumVAddrDwords);
1971 <<
"requested image instruction is not supported on this GPU\n");
1978 NumVDataDwords, NumVAddrDwords);
1981 NumVDataDwords, NumVAddrDwords);
1991 const bool Is64 =
MRI->getType(VDataOut).getSizeInBits() == 64;
1994 Is64 ? &AMDGPU::VReg_128RegClass : &AMDGPU::VReg_64RegClass);
1995 unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
1998 if (!
MRI->use_empty(VDataOut)) {
2011 for (
int I = 0;
I != NumVAddrRegs; ++
I) {
2013 if (
SrcOp.isReg()) {
2019 MIB.
addReg(
MI.getOperand(ArgOffset +
Intr->RsrcIndex).getReg());
2021 MIB.
addReg(
MI.getOperand(ArgOffset +
Intr->SampIndex).getReg());
2032 STI.hasFeature(AMDGPU::FeatureR128A16) ? -1 : 0);
2034 MIB.
addImm(IsA16 ? -1 : 0);
2048 MIB.
addImm(IsD16 ? -1 : 0);
2050 MI.eraseFromParent();
2058bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic(
2069 unsigned Offset =
MI.getOperand(6).getImm();
2071 auto MIB =
BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::DS_BVH_STACK_RTN_B32), Dst0)
2079 MI.eraseFromParent();
2083bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
2085 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(
I).getIntrinsicID();
2086 switch (IntrinsicID) {
2087 case Intrinsic::amdgcn_end_cf:
2088 return selectEndCfIntrinsic(
I);
2089 case Intrinsic::amdgcn_ds_ordered_add:
2090 case Intrinsic::amdgcn_ds_ordered_swap:
2091 return selectDSOrderedIntrinsic(
I, IntrinsicID);
2092 case Intrinsic::amdgcn_ds_gws_init:
2093 case Intrinsic::amdgcn_ds_gws_barrier:
2094 case Intrinsic::amdgcn_ds_gws_sema_v:
2095 case Intrinsic::amdgcn_ds_gws_sema_br:
2096 case Intrinsic::amdgcn_ds_gws_sema_p:
2097 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2098 return selectDSGWSIntrinsic(
I, IntrinsicID);
2099 case Intrinsic::amdgcn_ds_append:
2100 return selectDSAppendConsume(
I,
true);
2101 case Intrinsic::amdgcn_ds_consume:
2102 return selectDSAppendConsume(
I,
false);
2103 case Intrinsic::amdgcn_s_barrier:
2104 return selectSBarrier(
I);
2105 case Intrinsic::amdgcn_raw_buffer_load_lds:
2106 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
2107 case Intrinsic::amdgcn_struct_buffer_load_lds:
2108 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds:
2109 return selectBufferLoadLds(
I);
2110 case Intrinsic::amdgcn_global_load_lds:
2111 return selectGlobalLoadLds(
I);
2112 case Intrinsic::amdgcn_exp_compr:
2116 F,
"intrinsic not supported on subtarget",
I.getDebugLoc(),
DS_Error);
2117 F.getContext().diagnose(NoFpRet);
2121 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2122 return selectDSBvhStackIntrinsic(
I);
2123 case Intrinsic::amdgcn_s_barrier_init:
2124 case Intrinsic::amdgcn_s_barrier_join:
2125 case Intrinsic::amdgcn_s_wakeup_barrier:
2126 case Intrinsic::amdgcn_s_get_barrier_state:
2127 return selectNamedBarrierInst(
I, IntrinsicID);
2128 case Intrinsic::amdgcn_s_barrier_signal_isfirst:
2129 case Intrinsic::amdgcn_s_barrier_signal_isfirst_var:
2130 return selectSBarrierSignalIsfirst(
I, IntrinsicID);
2131 case Intrinsic::amdgcn_s_barrier_leave:
2132 return selectSBarrierLeave(
I);
2137bool AMDGPUInstructionSelector::selectG_SELECT(
MachineInstr &
I)
const {
2144 Register DstReg =
I.getOperand(0).getReg();
2149 if (!isVCC(CCReg, *MRI)) {
2150 unsigned SelectOpcode =
Size == 64 ? AMDGPU::S_CSELECT_B64 :
2151 AMDGPU::S_CSELECT_B32;
2158 if (!
MRI->getRegClassOrNull(CCReg))
2161 .
add(
I.getOperand(2))
2162 .
add(
I.getOperand(3));
2167 I.eraseFromParent();
2176 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
2178 .
add(
I.getOperand(3))
2180 .
add(
I.getOperand(2))
2181 .
add(
I.getOperand(1));
2184 I.eraseFromParent();
2188bool AMDGPUInstructionSelector::selectG_TRUNC(
MachineInstr &
I)
const {
2189 Register DstReg =
I.getOperand(0).getReg();
2190 Register SrcReg =
I.getOperand(1).getReg();
2191 const LLT DstTy =
MRI->getType(DstReg);
2192 const LLT SrcTy =
MRI->getType(SrcReg);
2207 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2216 if (!SrcRC || !DstRC)
2225 if (DstRC == &AMDGPU::VGPR_16RegClass && SrcSize == 32) {
2230 .
addReg(SrcReg, 0, AMDGPU::lo16);
2231 I.eraseFromParent();
2239 Register LoReg =
MRI->createVirtualRegister(DstRC);
2240 Register HiReg =
MRI->createVirtualRegister(DstRC);
2242 .
addReg(SrcReg, 0, AMDGPU::sub0);
2244 .
addReg(SrcReg, 0, AMDGPU::sub1);
2246 if (IsVALU && STI.
hasSDWA()) {
2250 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg)
2260 Register TmpReg0 =
MRI->createVirtualRegister(DstRC);
2261 Register TmpReg1 =
MRI->createVirtualRegister(DstRC);
2262 Register ImmReg =
MRI->createVirtualRegister(DstRC);
2264 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), TmpReg0)
2274 unsigned MovOpc = IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
2275 unsigned AndOpc = IsVALU ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
2276 unsigned OrOpc = IsVALU ? AMDGPU::V_OR_B32_e64 : AMDGPU::S_OR_B32;
2288 And.setOperandDead(3);
2289 Or.setOperandDead(3);
2293 I.eraseFromParent();
2301 unsigned SubRegIdx =
2303 if (SubRegIdx == AMDGPU::NoSubRegister)
2309 = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
2313 if (SrcWithSubRC != SrcRC) {
2318 I.getOperand(1).setSubReg(SubRegIdx);
2321 I.setDesc(TII.get(TargetOpcode::COPY));
2327 Mask = maskTrailingOnes<unsigned>(
Size);
2328 int SignedMask =
static_cast<int>(Mask);
2329 return SignedMask >= -16 && SignedMask <= 64;
2333const RegisterBank *AMDGPUInstructionSelector::getArtifactRegBank(
2346bool AMDGPUInstructionSelector::selectG_SZA_EXT(
MachineInstr &
I)
const {
2347 bool InReg =
I.getOpcode() == AMDGPU::G_SEXT_INREG;
2348 bool Signed =
I.getOpcode() == AMDGPU::G_SEXT || InReg;
2351 const Register DstReg =
I.getOperand(0).getReg();
2352 const Register SrcReg =
I.getOperand(1).getReg();
2354 const LLT DstTy =
MRI->getType(DstReg);
2355 const LLT SrcTy =
MRI->getType(SrcReg);
2356 const unsigned SrcSize =
I.getOpcode() == AMDGPU::G_SEXT_INREG ?
2363 const RegisterBank *SrcBank = getArtifactRegBank(SrcReg, *MRI, TRI);
2366 if (
I.getOpcode() == AMDGPU::G_ANYEXT) {
2368 return selectCOPY(
I);
2371 TRI.getRegClassForTypeOnBank(SrcTy, *SrcBank);
2374 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
2376 Register UndefReg =
MRI->createVirtualRegister(SrcRC);
2377 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2383 I.eraseFromParent();
2389 if (SrcBank->
getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
2399 I.eraseFromParent();
2403 const unsigned BFE =
Signed ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2409 I.eraseFromParent();
2413 if (SrcBank->
getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
2415 AMDGPU::SReg_64RegClass : AMDGPU::SReg_32RegClass;
2419 if (
Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
2420 const unsigned SextOpc = SrcSize == 8 ?
2421 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
2424 I.eraseFromParent();
2430 if (DstSize > 32 && SrcSize == 32) {
2431 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2432 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2447 I.eraseFromParent();
2452 const unsigned BFE64 =
Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
2453 const unsigned BFE32 =
Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2456 if (DstSize > 32 && (SrcSize <= 32 || InReg)) {
2458 Register ExtReg =
MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
2459 Register UndefReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2460 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2462 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2473 I.eraseFromParent();
2489 I.eraseFromParent();
2524 if (Shuffle->
getOpcode() != AMDGPU::G_SHUFFLE_VECTOR)
2531 assert(Mask.size() == 2);
2533 if (Mask[0] == 1 && Mask[1] <= 1) {
2541bool AMDGPUInstructionSelector::selectG_FPEXT(
MachineInstr &
I)
const {
2545 Register Dst =
I.getOperand(0).getReg();
2547 if (DstRB->
getID() != AMDGPU::SGPRRegBankID)
2550 Register Src =
I.getOperand(1).getReg();
2556 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_CVT_HI_F32_F16), Dst)
2558 I.eraseFromParent();
2566bool AMDGPUInstructionSelector::selectG_FNEG(
MachineInstr &
MI)
const {
2580 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2595 Register LoReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2596 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2597 Register ConstReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2598 Register OpReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2600 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2601 .
addReg(Src, 0, AMDGPU::sub0);
2602 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2603 .
addReg(Src, 0, AMDGPU::sub1);
2604 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2608 unsigned Opc = Fabs ? AMDGPU::S_OR_B32 : AMDGPU::S_XOR_B32;
2613 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2618 MI.eraseFromParent();
2623bool AMDGPUInstructionSelector::selectG_FABS(
MachineInstr &
MI)
const {
2626 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2633 Register LoReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2634 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2635 Register ConstReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2636 Register OpReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2642 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2643 .
addReg(Src, 0, AMDGPU::sub0);
2644 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2645 .
addReg(Src, 0, AMDGPU::sub1);
2646 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2651 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_AND_B32), OpReg)
2655 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2661 MI.eraseFromParent();
2666 return MI.getOpcode() == TargetOpcode::G_CONSTANT;
2669void AMDGPUInstructionSelector::getAddrModeInfo(
const MachineInstr &Load,
2672 unsigned OpNo =
Load.getOpcode() == AMDGPU::G_PREFETCH ? 0 : 1;
2674 MRI.getUniqueVRegDef(
Load.getOperand(OpNo).getReg());
2678 if (PtrMI->
getOpcode() != TargetOpcode::G_PTR_ADD)
2683 for (
unsigned i = 1; i != 3; ++i) {
2690 assert(GEPInfo.Imm == 0);
2695 if (OpBank->
getID() == AMDGPU::SGPRRegBankID)
2696 GEPInfo.SgprParts.push_back(GEPOp.
getReg());
2698 GEPInfo.VgprParts.push_back(GEPOp.
getReg());
2702 getAddrModeInfo(*PtrMI, MRI, AddrInfo);
2705bool AMDGPUInstructionSelector::isSGPR(
Register Reg)
const {
2706 return RBI.
getRegBank(Reg, *MRI, TRI)->
getID() == AMDGPU::SGPRRegBankID;
2709bool AMDGPUInstructionSelector::isInstrUniform(
const MachineInstr &
MI)
const {
2710 if (!
MI.hasOneMemOperand())
2720 if (!
Ptr || isa<UndefValue>(
Ptr) || isa<Argument>(
Ptr) ||
2721 isa<Constant>(
Ptr) || isa<GlobalValue>(
Ptr))
2727 if (
MI.getOpcode() == AMDGPU::G_PREFETCH)
2729 AMDGPU::SGPRRegBankID;
2732 return I &&
I->getMetadata(
"amdgpu.uniform");
2736 for (
const GEPInfo &GEPInfo : AddrInfo) {
2737 if (!GEPInfo.VgprParts.empty())
2743void AMDGPUInstructionSelector::initM0(
MachineInstr &
I)
const {
2744 const LLT PtrTy =
MRI->getType(
I.getOperand(1).getReg());
2751 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2756bool AMDGPUInstructionSelector::selectG_LOAD_STORE_ATOMICRMW(
2763 if (Reg.isPhysical())
2767 const unsigned Opcode =
MI.getOpcode();
2769 if (Opcode == AMDGPU::COPY)
2772 if (Opcode == AMDGPU::G_AND || Opcode == AMDGPU::G_OR ||
2773 Opcode == AMDGPU::G_XOR)
2777 if (
auto *GI = dyn_cast<GIntrinsic>(&
MI))
2778 return GI->is(Intrinsic::amdgcn_class);
2780 return Opcode == AMDGPU::G_ICMP || Opcode == AMDGPU::G_FCMP;
2783bool AMDGPUInstructionSelector::selectG_BRCOND(
MachineInstr &
I)
const {
2798 if (!isVCC(CondReg, *MRI)) {
2802 CondPhysReg = AMDGPU::SCC;
2803 BrOpcode = AMDGPU::S_CBRANCH_SCC1;
2804 ConstrainRC = &AMDGPU::SReg_32RegClass;
2812 const unsigned Opcode = Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
2813 const Register Exec = Is64 ? AMDGPU::EXEC : AMDGPU::EXEC_LO;
2816 BuildMI(*BB, &
I,
DL, TII.get(Opcode), TmpReg)
2823 CondPhysReg =
TRI.getVCC();
2824 BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
2825 ConstrainRC =
TRI.getBoolRC();
2828 if (!
MRI->getRegClassOrNull(CondReg))
2829 MRI->setRegClass(CondReg, ConstrainRC);
2831 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CondPhysReg)
2834 .
addMBB(
I.getOperand(1).getMBB());
2836 I.eraseFromParent();
2840bool AMDGPUInstructionSelector::selectG_GLOBAL_VALUE(
2842 Register DstReg =
I.getOperand(0).getReg();
2844 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2845 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32));
2850 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, *MRI);
2853bool AMDGPUInstructionSelector::selectG_PTRMASK(
MachineInstr &
I)
const {
2854 Register DstReg =
I.getOperand(0).getReg();
2855 Register SrcReg =
I.getOperand(1).getReg();
2856 Register MaskReg =
I.getOperand(2).getReg();
2857 LLT Ty =
MRI->getType(DstReg);
2858 LLT MaskTy =
MRI->getType(MaskReg);
2865 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2875 const bool CanCopyLow32 = (MaskOnes & MaskLo32) == MaskLo32;
2876 const bool CanCopyHi32 = (MaskOnes & MaskHi32) == MaskHi32;
2879 !CanCopyLow32 && !CanCopyHi32) {
2880 auto MIB =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_AND_B64), DstReg)
2884 I.eraseFromParent();
2888 unsigned NewOpc = IsVGPR ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
2890 = IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
2895 TRI.getRegClassForTypeOnBank(MaskTy, *MaskRB);
2904 "ptrmask should have been narrowed during legalize");
2906 auto NewOp =
BuildMI(*BB, &
I,
DL, TII.get(NewOpc), DstReg)
2912 I.eraseFromParent();
2916 Register HiReg =
MRI->createVirtualRegister(&RegRC);
2917 Register LoReg =
MRI->createVirtualRegister(&RegRC);
2920 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), LoReg)
2921 .
addReg(SrcReg, 0, AMDGPU::sub0);
2922 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), HiReg)
2923 .
addReg(SrcReg, 0, AMDGPU::sub1);
2932 Register MaskLo =
MRI->createVirtualRegister(&RegRC);
2933 MaskedLo =
MRI->createVirtualRegister(&RegRC);
2935 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskLo)
2936 .
addReg(MaskReg, 0, AMDGPU::sub0);
2937 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedLo)
2946 Register MaskHi =
MRI->createVirtualRegister(&RegRC);
2947 MaskedHi =
MRI->createVirtualRegister(&RegRC);
2949 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskHi)
2950 .
addReg(MaskReg, 0, AMDGPU::sub1);
2951 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedHi)
2956 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
2961 I.eraseFromParent();
2967static std::pair<Register, unsigned>
2974 std::tie(IdxBaseReg,
Offset) =
2976 if (IdxBaseReg == AMDGPU::NoRegister) {
2980 IdxBaseReg = IdxReg;
2987 if (
static_cast<unsigned>(
Offset) >= SubRegs.
size())
2988 return std::pair(IdxReg, SubRegs[0]);
2989 return std::pair(IdxBaseReg, SubRegs[
Offset]);
2992bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT(
2998 LLT DstTy =
MRI->getType(DstReg);
2999 LLT SrcTy =
MRI->getType(SrcReg);
3007 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3011 TRI.getRegClassForTypeOnBank(SrcTy, *SrcRB);
3013 TRI.getRegClassForTypeOnBank(DstTy, *DstRB);
3014 if (!SrcRC || !DstRC)
3029 if (SrcRB->
getID() == AMDGPU::SGPRRegBankID) {
3033 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3036 unsigned Opc = Is64 ? AMDGPU::S_MOVRELS_B64 : AMDGPU::S_MOVRELS_B32;
3040 MI.eraseFromParent();
3048 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3050 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::V_MOVRELS_B32_e32), DstReg)
3053 MI.eraseFromParent();
3064 MI.eraseFromParent();
3069bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT(
3076 LLT VecTy =
MRI->getType(DstReg);
3077 LLT ValTy =
MRI->getType(ValReg);
3089 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3093 TRI.getRegClassForTypeOnBank(VecTy, *VecRB);
3095 TRI.getRegClassForTypeOnBank(ValTy, *ValRB);
3103 if (VecRB->
getID() == AMDGPU::VGPRRegBankID && ValSize != 32)
3107 std::tie(IdxReg,
SubReg) =
3110 const bool IndexMode = VecRB->
getID() == AMDGPU::VGPRRegBankID &&
3117 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3121 VecSize, ValSize, VecRB->
getID() == AMDGPU::SGPRRegBankID);
3126 MI.eraseFromParent();
3138 MI.eraseFromParent();
3142bool AMDGPUInstructionSelector::selectBufferLoadLds(
MachineInstr &
MI)
const {
3145 unsigned Size =
MI.getOperand(3).getImm();
3148 const bool HasVIndex =
MI.getNumOperands() == 9;
3152 VIndex =
MI.getOperand(4).getReg();
3156 Register VOffset =
MI.getOperand(4 + OpOffset).getReg();
3157 std::optional<ValueAndVReg> MaybeVOffset =
3159 const bool HasVOffset = !MaybeVOffset || MaybeVOffset->Value.getZExtValue();
3165 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_BOTHEN
3166 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_IDXEN
3167 : HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFEN
3168 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFSET;
3171 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_BOTHEN
3172 : AMDGPU::BUFFER_LOAD_USHORT_LDS_IDXEN
3173 : HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFEN
3174 : AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFSET;
3177 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_BOTHEN
3178 : AMDGPU::BUFFER_LOAD_DWORD_LDS_IDXEN
3179 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN
3180 : AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET;
3187 .
add(
MI.getOperand(2));
3191 if (HasVIndex && HasVOffset) {
3192 Register IdxReg =
MRI->createVirtualRegister(
TRI.getVGPR64Class());
3193 BuildMI(*
MBB, &*MIB,
DL, TII.get(AMDGPU::REG_SEQUENCE), IdxReg)
3200 }
else if (HasVIndex) {
3202 }
else if (HasVOffset) {
3206 MIB.
add(
MI.getOperand(1));
3207 MIB.
add(
MI.getOperand(5 + OpOffset));
3208 MIB.
add(
MI.getOperand(6 + OpOffset));
3209 unsigned Aux =
MI.getOperand(7 + OpOffset).getImm();
3215 LoadPtrI.
Offset =
MI.getOperand(6 + OpOffset).getImm();
3217 StorePtrI.
V =
nullptr;
3231 MI.eraseFromParent();
3243 if (Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3246 assert(Def->getNumOperands() == 3 &&
3249 return Def->getOperand(1).getReg();
3255bool AMDGPUInstructionSelector::selectGlobalLoadLds(
MachineInstr &
MI)
const{
3257 unsigned Size =
MI.getOperand(3).getImm();
3263 Opc = AMDGPU::GLOBAL_LOAD_LDS_UBYTE;
3266 Opc = AMDGPU::GLOBAL_LOAD_LDS_USHORT;
3269 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORD;
3276 .
add(
MI.getOperand(2));
3282 if (!isSGPR(
Addr)) {
3284 if (isSGPR(AddrDef->Reg)) {
3285 Addr = AddrDef->Reg;
3286 }
else if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
3289 if (isSGPR(SAddr)) {
3290 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
3302 VOffset =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3314 MIB.
add(
MI.getOperand(4))
3315 .
add(
MI.getOperand(5));
3319 LoadPtrI.
Offset =
MI.getOperand(4).getImm();
3329 sizeof(int32_t),
Align(4));
3333 MI.eraseFromParent();
3337bool AMDGPUInstructionSelector::selectBVHIntrinsic(
MachineInstr &
MI)
const{
3338 MI.setDesc(TII.get(
MI.getOperand(1).getImm()));
3339 MI.removeOperand(1);
3340 MI.addImplicitDefUseOperands(*
MI.getParent()->getParent());
3344bool AMDGPUInstructionSelector::selectSMFMACIntrin(
MachineInstr &
MI)
const {
3347 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
3348 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_F16_e64;
3350 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
3351 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_F16_e64;
3353 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
3354 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_BF16_e64;
3356 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
3357 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_BF16_e64;
3359 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
3360 Opc = AMDGPU::V_SMFMAC_I32_16X16X64_I8_e64;
3362 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
3363 Opc = AMDGPU::V_SMFMAC_I32_32X32X32_I8_e64;
3365 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
3366 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_BF8_e64;
3368 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
3369 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_FP8_e64;
3371 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
3372 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_BF8_e64;
3374 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
3375 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_FP8_e64;
3377 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
3378 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_BF8_e64;
3380 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
3381 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_FP8_e64;
3383 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
3384 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_BF8_e64;
3386 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
3387 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_FP8_e64;
3393 auto VDst_In =
MI.getOperand(4);
3395 MI.setDesc(TII.get(Opc));
3396 MI.removeOperand(4);
3397 MI.removeOperand(1);
3398 MI.addOperand(VDst_In);
3399 MI.addImplicitDefUseOperands(*
MI.getParent()->getParent());
3403bool AMDGPUInstructionSelector::selectWaveAddress(
MachineInstr &
MI)
const {
3407 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3412 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_LSHRREV_B32_e64), DstReg)
3423 IsVALU ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
3427 MI.eraseFromParent();
3431bool AMDGPUInstructionSelector::selectStackRestore(
MachineInstr &
MI)
const {
3444 WaveAddr =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
3454 MI.eraseFromParent();
3460 if (!
I.isPreISelOpcode()) {
3462 return selectCOPY(
I);
3466 switch (
I.getOpcode()) {
3467 case TargetOpcode::G_AND:
3468 case TargetOpcode::G_OR:
3469 case TargetOpcode::G_XOR:
3472 return selectG_AND_OR_XOR(
I);
3473 case TargetOpcode::G_ADD:
3474 case TargetOpcode::G_SUB:
3475 case TargetOpcode::G_PTR_ADD:
3478 return selectG_ADD_SUB(
I);
3479 case TargetOpcode::G_UADDO:
3480 case TargetOpcode::G_USUBO:
3481 case TargetOpcode::G_UADDE:
3482 case TargetOpcode::G_USUBE:
3483 return selectG_UADDO_USUBO_UADDE_USUBE(
I);
3484 case AMDGPU::G_AMDGPU_MAD_U64_U32:
3485 case AMDGPU::G_AMDGPU_MAD_I64_I32:
3486 return selectG_AMDGPU_MAD_64_32(
I);
3487 case TargetOpcode::G_INTTOPTR:
3488 case TargetOpcode::G_BITCAST:
3489 case TargetOpcode::G_PTRTOINT:
3490 case TargetOpcode::G_FREEZE:
3491 return selectCOPY(
I);
3492 case TargetOpcode::G_FNEG:
3495 return selectG_FNEG(
I);
3496 case TargetOpcode::G_FABS:
3499 return selectG_FABS(
I);
3500 case TargetOpcode::G_EXTRACT:
3501 return selectG_EXTRACT(
I);
3502 case TargetOpcode::G_MERGE_VALUES:
3503 case TargetOpcode::G_CONCAT_VECTORS:
3504 return selectG_MERGE_VALUES(
I);
3505 case TargetOpcode::G_UNMERGE_VALUES:
3506 return selectG_UNMERGE_VALUES(
I);
3507 case TargetOpcode::G_BUILD_VECTOR:
3508 case TargetOpcode::G_BUILD_VECTOR_TRUNC:
3509 return selectG_BUILD_VECTOR(
I);
3510 case TargetOpcode::G_IMPLICIT_DEF:
3511 return selectG_IMPLICIT_DEF(
I);
3512 case TargetOpcode::G_INSERT:
3513 return selectG_INSERT(
I);
3514 case TargetOpcode::G_INTRINSIC:
3515 case TargetOpcode::G_INTRINSIC_CONVERGENT:
3516 return selectG_INTRINSIC(
I);
3517 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3518 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
3519 return selectG_INTRINSIC_W_SIDE_EFFECTS(
I);
3520 case TargetOpcode::G_ICMP:
3521 case TargetOpcode::G_FCMP:
3522 if (selectG_ICMP_or_FCMP(
I))
3525 case TargetOpcode::G_LOAD:
3526 case TargetOpcode::G_STORE:
3527 case TargetOpcode::G_ATOMIC_CMPXCHG:
3528 case TargetOpcode::G_ATOMICRMW_XCHG:
3529 case TargetOpcode::G_ATOMICRMW_ADD:
3530 case TargetOpcode::G_ATOMICRMW_SUB:
3531 case TargetOpcode::G_ATOMICRMW_AND:
3532 case TargetOpcode::G_ATOMICRMW_OR:
3533 case TargetOpcode::G_ATOMICRMW_XOR:
3534 case TargetOpcode::G_ATOMICRMW_MIN:
3535 case TargetOpcode::G_ATOMICRMW_MAX:
3536 case TargetOpcode::G_ATOMICRMW_UMIN:
3537 case TargetOpcode::G_ATOMICRMW_UMAX:
3538 case TargetOpcode::G_ATOMICRMW_UINC_WRAP:
3539 case TargetOpcode::G_ATOMICRMW_UDEC_WRAP:
3540 case TargetOpcode::G_ATOMICRMW_FADD:
3541 case TargetOpcode::G_ATOMICRMW_FMIN:
3542 case TargetOpcode::G_ATOMICRMW_FMAX:
3543 return selectG_LOAD_STORE_ATOMICRMW(
I);
3544 case TargetOpcode::G_SELECT:
3545 return selectG_SELECT(
I);
3546 case TargetOpcode::G_TRUNC:
3547 return selectG_TRUNC(
I);
3548 case TargetOpcode::G_SEXT:
3549 case TargetOpcode::G_ZEXT:
3550 case TargetOpcode::G_ANYEXT:
3551 case TargetOpcode::G_SEXT_INREG:
3558 return selectG_SZA_EXT(
I);
3559 case TargetOpcode::G_FPEXT:
3560 if (selectG_FPEXT(
I))
3563 case TargetOpcode::G_BRCOND:
3564 return selectG_BRCOND(
I);
3565 case TargetOpcode::G_GLOBAL_VALUE:
3566 return selectG_GLOBAL_VALUE(
I);
3567 case TargetOpcode::G_PTRMASK:
3568 return selectG_PTRMASK(
I);
3569 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3570 return selectG_EXTRACT_VECTOR_ELT(
I);
3571 case TargetOpcode::G_INSERT_VECTOR_ELT:
3572 return selectG_INSERT_VECTOR_ELT(
I);
3573 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
3574 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
3575 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET:
3576 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
3577 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
3580 assert(
Intr &&
"not an image intrinsic with image pseudo");
3581 return selectImageIntrinsic(
I,
Intr);
3583 case AMDGPU::G_AMDGPU_INTRIN_BVH_INTERSECT_RAY:
3584 return selectBVHIntrinsic(
I);
3585 case AMDGPU::G_SBFX:
3586 case AMDGPU::G_UBFX:
3587 return selectG_SBFX_UBFX(
I);
3588 case AMDGPU::G_SI_CALL:
3589 I.setDesc(TII.get(AMDGPU::SI_CALL));
3591 case AMDGPU::G_AMDGPU_WAVE_ADDRESS:
3592 return selectWaveAddress(
I);
3593 case AMDGPU::G_STACKRESTORE:
3594 return selectStackRestore(
I);
3596 return selectPHI(
I);
3597 case TargetOpcode::G_CONSTANT:
3598 case TargetOpcode::G_FCONSTANT:
3606AMDGPUInstructionSelector::selectVCSRC(
MachineOperand &Root)
const {
3613std::pair<Register, unsigned> AMDGPUInstructionSelector::selectVOP3ModsImpl(
3614 Register Src,
bool IsCanonicalizing,
bool AllowAbs,
bool OpSel)
const {
3618 if (
MI->getOpcode() == AMDGPU::G_FNEG) {
3619 Src =
MI->getOperand(1).getReg();
3622 }
else if (
MI->getOpcode() == AMDGPU::G_FSUB && IsCanonicalizing) {
3627 if (LHS &&
LHS->isZero()) {
3629 Src =
MI->getOperand(2).getReg();
3633 if (AllowAbs &&
MI->getOpcode() == AMDGPU::G_FABS) {
3634 Src =
MI->getOperand(1).getReg();
3641 return std::pair(Src, Mods);
3644Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
3646 bool ForceVGPR)
const {
3647 if ((Mods != 0 || ForceVGPR) &&
3655 TII.get(AMDGPU::COPY), VGPRSrc)
3667AMDGPUInstructionSelector::selectVSRC0(
MachineOperand &Root)
const {
3674AMDGPUInstructionSelector::selectVOP3Mods0(
MachineOperand &Root)
const {
3677 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
3681 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
3690AMDGPUInstructionSelector::selectVOP3BMods0(
MachineOperand &Root)
const {
3693 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
3699 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
3708AMDGPUInstructionSelector::selectVOP3OMods(
MachineOperand &Root)
const {
3717AMDGPUInstructionSelector::selectVOP3Mods(
MachineOperand &Root)
const {
3720 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
3724 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
3731AMDGPUInstructionSelector::selectVOP3ModsNonCanonicalizing(
3735 std::tie(Src, Mods) =
3736 selectVOP3ModsImpl(Root.
getReg(),
false);
3740 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
3747AMDGPUInstructionSelector::selectVOP3BMods(
MachineOperand &Root)
const {
3750 std::tie(Src, Mods) =
3751 selectVOP3ModsImpl(Root.
getReg(),
true,
3756 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
3763AMDGPUInstructionSelector::selectVOP3NoMods(
MachineOperand &Root)
const {
3766 if (
Def->getOpcode() == AMDGPU::G_FNEG ||
Def->getOpcode() == AMDGPU::G_FABS)
3773std::pair<Register, unsigned>
3774AMDGPUInstructionSelector::selectVOP3PModsImpl(
3779 if (
MI &&
MI->getOpcode() == AMDGPU::G_FNEG &&
3784 Src =
MI->getOperand(1).getReg();
3785 MI =
MRI.getVRegDef(Src);
3796 return std::pair(Src, Mods);
3800AMDGPUInstructionSelector::selectVOP3PMods(
MachineOperand &Root)
const {
3806 std::tie(Src, Mods) = selectVOP3PModsImpl(Root.
getReg(), MRI);
3815AMDGPUInstructionSelector::selectVOP3PModsDOT(
MachineOperand &Root)
const {
3821 std::tie(Src, Mods) = selectVOP3PModsImpl(Root.
getReg(), MRI,
true);
3830AMDGPUInstructionSelector::selectVOP3PModsNeg(
MachineOperand &Root)
const {
3835 "expected i1 value");
3845AMDGPUInstructionSelector::selectWMMAOpSelVOP3PMods(
3848 "expected i1 value");
3862 switch (Elts.
size()) {
3864 DstRegClass = &AMDGPU::VReg_256RegClass;
3867 DstRegClass = &AMDGPU::VReg_128RegClass;
3870 DstRegClass = &AMDGPU::VReg_64RegClass;
3877 auto MIB =
B.buildInstr(AMDGPU::REG_SEQUENCE)
3878 .addDef(
MRI.createVirtualRegister(DstRegClass));
3879 for (
unsigned i = 0; i < Elts.
size(); ++i) {
3890 if (ModOpcode == TargetOpcode::G_FNEG) {
3894 for (
auto El : Elts) {
3900 if (Elts.size() != NegAbsElts.
size()) {
3909 assert(ModOpcode == TargetOpcode::G_FABS);
3917AMDGPUInstructionSelector::selectWMMAModsF32NegAbs(
MachineOperand &Root)
const {
3922 if (
GBuildVector *BV = dyn_cast<GBuildVector>(
MRI->getVRegDef(Src))) {
3923 assert(BV->getNumSources() > 0);
3926 unsigned ModOpcode = (ElF32->
getOpcode() == AMDGPU::G_FNEG)
3929 for (
unsigned i = 0; i < BV->getNumSources(); ++i) {
3930 ElF32 =
MRI->getVRegDef(BV->getSourceReg(i));
3937 if (BV->getNumSources() == EltsF32.
size()) {
3948AMDGPUInstructionSelector::selectWMMAModsF16Neg(
MachineOperand &Root)
const {
3954 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
3962 if (CV->getNumSources() == EltsV2F16.
size()) {
3974AMDGPUInstructionSelector::selectWMMAModsF16NegAbs(
MachineOperand &Root)
const {
3980 assert(CV->getNumSources() > 0);
3983 unsigned ModOpcode = (ElV2F16->
getOpcode() == AMDGPU::G_FNEG)
3987 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
3988 ElV2F16 =
MRI->getVRegDef(CV->getSourceReg(i));
3995 if (CV->getNumSources() == EltsV2F16.
size()) {
4007AMDGPUInstructionSelector::selectWMMAVISrc(
MachineOperand &Root)
const {
4008 std::optional<FPValueAndVReg> FPValReg;
4012 MIB.
addImm(FPValReg->Value.bitcastToAPInt().getSExtValue());
4032AMDGPUInstructionSelector::selectSWMMACIndex8(
MachineOperand &Root)
const {
4038 std::optional<ValueAndVReg> ShiftAmt;
4040 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
4041 ShiftAmt->Value.getZExtValue() % 8 == 0) {
4042 Key = ShiftAmt->Value.getZExtValue() / 8;
4053AMDGPUInstructionSelector::selectSWMMACIndex16(
MachineOperand &Root)
const {
4060 std::optional<ValueAndVReg> ShiftAmt;
4062 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
4063 ShiftAmt->Value.getZExtValue() == 16) {
4075AMDGPUInstructionSelector::selectVOP3OpSelMods(
MachineOperand &Root)
const {
4078 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
4088AMDGPUInstructionSelector::selectVINTERPMods(
MachineOperand &Root)
const {
4091 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
4099 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
4106AMDGPUInstructionSelector::selectVINTERPModsHi(
MachineOperand &Root)
const {
4109 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
4117 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
4123bool AMDGPUInstructionSelector::selectSmrdOffset(
MachineOperand &Root,
4133 getAddrModeInfo(*
MI, *MRI, AddrInfo);
4135 if (AddrInfo.
empty())
4138 const GEPInfo &GEPI = AddrInfo[0];
4139 std::optional<int64_t> EncodedImm;
4144 if (GEPI.SgprParts.size() == 1 && GEPI.Imm != 0 && EncodedImm &&
4145 AddrInfo.
size() > 1) {
4146 const GEPInfo &GEPI2 = AddrInfo[1];
4147 if (GEPI2.SgprParts.size() == 2 && GEPI2.Imm == 0) {
4150 Base = GEPI2.SgprParts[0];
4151 *SOffset = OffsetReg;
4161 if (*
Offset + SKnown.getMinValue().getSExtValue() < 0)
4173 if (
Offset && GEPI.SgprParts.size() == 1 && EncodedImm) {
4174 Base = GEPI.SgprParts[0];
4180 if (SOffset && GEPI.SgprParts.size() == 1 && isUInt<32>(GEPI.Imm) &&
4186 Base = GEPI.SgprParts[0];
4187 *SOffset =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
4188 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), *SOffset)
4193 if (SOffset && GEPI.SgprParts.size() && GEPI.Imm == 0) {
4195 Base = GEPI.SgprParts[0];
4196 *SOffset = OffsetReg;
4205AMDGPUInstructionSelector::selectSmrdImm(
MachineOperand &Root)
const {
4208 if (!selectSmrdOffset(Root,
Base,
nullptr, &
Offset))
4209 return std::nullopt;
4216AMDGPUInstructionSelector::selectSmrdImm32(
MachineOperand &Root)
const {
4218 getAddrModeInfo(*Root.
getParent(), *MRI, AddrInfo);
4220 if (AddrInfo.
empty() || AddrInfo[0].SgprParts.size() != 1)
4221 return std::nullopt;
4223 const GEPInfo &GEPInfo = AddrInfo[0];
4224 Register PtrReg = GEPInfo.SgprParts[0];
4225 std::optional<int64_t> EncodedImm =
4228 return std::nullopt;
4237AMDGPUInstructionSelector::selectSmrdSgpr(
MachineOperand &Root)
const {
4239 if (!selectSmrdOffset(Root,
Base, &SOffset,
nullptr))
4240 return std::nullopt;
4247AMDGPUInstructionSelector::selectSmrdSgprImm(
MachineOperand &Root)
const {
4250 if (!selectSmrdOffset(Root,
Base, &SOffset, &
Offset))
4251 return std::nullopt;
4258std::pair<Register, int>
4259AMDGPUInstructionSelector::selectFlatOffsetImpl(
MachineOperand &Root,
4269 int64_t ConstOffset;
4270 std::tie(PtrBase, ConstOffset) =
4271 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
4274 !isFlatScratchBaseLegal(Root.
getReg())))
4277 unsigned AddrSpace = (*
MI->memoperands_begin())->getAddrSpace();
4281 return std::pair(PtrBase, ConstOffset);
4285AMDGPUInstructionSelector::selectFlatOffset(
MachineOperand &Root)
const {
4295AMDGPUInstructionSelector::selectGlobalOffset(
MachineOperand &Root)
const {
4305AMDGPUInstructionSelector::selectScratchOffset(
MachineOperand &Root)
const {
4316AMDGPUInstructionSelector::selectGlobalSAddr(
MachineOperand &Root)
const {
4319 int64_t ConstOffset;
4320 int64_t ImmOffset = 0;
4324 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(
Addr, *MRI);
4326 if (ConstOffset != 0) {
4330 ImmOffset = ConstOffset;
4333 if (isSGPR(PtrBaseDef->Reg)) {
4334 if (ConstOffset > 0) {
4340 int64_t SplitImmOffset, RemainderOffset;
4344 if (isUInt<32>(RemainderOffset)) {
4348 MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4350 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
4352 .
addImm(RemainderOffset);
4369 unsigned NumLiterals =
4373 return std::nullopt;
4380 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
4385 if (isSGPR(SAddr)) {
4386 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
4406 if (AddrDef->MI->getOpcode() == AMDGPU::G_IMPLICIT_DEF ||
4407 AddrDef->MI->getOpcode() == AMDGPU::G_CONSTANT || !isSGPR(AddrDef->Reg))
4408 return std::nullopt;
4414 Register VOffset =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4416 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32), VOffset)
4427AMDGPUInstructionSelector::selectScratchSAddr(
MachineOperand &Root)
const {
4430 int64_t ConstOffset;
4431 int64_t ImmOffset = 0;
4435 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(
Addr, *MRI);
4437 if (ConstOffset != 0 && isFlatScratchBaseLegal(
Addr) &&
4441 ImmOffset = ConstOffset;
4445 if (AddrDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
4446 int FI = AddrDef->MI->getOperand(1).getIndex();
4455 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
4456 Register LHS = AddrDef->MI->getOperand(1).getReg();
4457 Register RHS = AddrDef->MI->getOperand(2).getReg();
4461 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX &&
4462 isSGPR(RHSDef->Reg)) {
4463 int FI = LHSDef->MI->getOperand(1).getIndex();
4467 SAddr =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
4469 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_I32), SAddr)
4477 return std::nullopt;
4486bool AMDGPUInstructionSelector::checkFlatScratchSVSSwizzleBug(
4497 uint64_t VMax = VKnown.getMaxValue().getZExtValue();
4499 return (VMax & 3) + (
SMax & 3) >= 4;
4503AMDGPUInstructionSelector::selectScratchSVAddr(
MachineOperand &Root)
const {
4506 int64_t ConstOffset;
4507 int64_t ImmOffset = 0;
4511 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(
Addr, *MRI);
4514 if (ConstOffset != 0 &&
4517 ImmOffset = ConstOffset;
4521 if (AddrDef->MI->getOpcode() != AMDGPU::G_PTR_ADD)
4522 return std::nullopt;
4524 Register RHS = AddrDef->MI->getOperand(2).getReg();
4526 return std::nullopt;
4528 Register LHS = AddrDef->MI->getOperand(1).getReg();
4531 if (OrigAddr !=
Addr) {
4532 if (!isFlatScratchBaseLegalSVImm(OrigAddr))
4533 return std::nullopt;
4535 if (!isFlatScratchBaseLegalSV(OrigAddr))
4536 return std::nullopt;
4539 if (checkFlatScratchSVSSwizzleBug(RHS, LHS, ImmOffset))
4540 return std::nullopt;
4542 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
4543 int FI = LHSDef->MI->getOperand(1).getIndex();
4552 return std::nullopt;
4562AMDGPUInstructionSelector::selectMUBUFScratchOffen(
MachineOperand &Root)
const {
4571 Register HighBits =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4576 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
4600 std::optional<int> FI;
4604 int64_t ConstOffset;
4605 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(VAddr, *MRI);
4606 if (ConstOffset != 0) {
4611 if (PtrBaseDef->
getOpcode() == AMDGPU::G_FRAME_INDEX)
4617 }
else if (RootDef->getOpcode() == AMDGPU::G_FRAME_INDEX) {
4618 FI = RootDef->getOperand(1).getIndex();
4641bool AMDGPUInstructionSelector::isDSOffsetLegal(
Register Base,
4654bool AMDGPUInstructionSelector::isDSOffset2Legal(
Register Base, int64_t Offset0,
4656 unsigned Size)
const {
4657 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
4659 if (!isUInt<8>(Offset0 /
Size) || !isUInt<8>(Offset1 /
Size))
4672 return Addr->getOpcode() == TargetOpcode::G_OR ||
4673 (
Addr->getOpcode() == TargetOpcode::G_PTR_ADD &&
4680bool AMDGPUInstructionSelector::isFlatScratchBaseLegal(
Register Addr)
const {
4694 if (AddrMI->
getOpcode() == TargetOpcode::G_PTR_ADD) {
4695 std::optional<ValueAndVReg> RhsValReg =
4701 if (RhsValReg && RhsValReg->Value.getSExtValue() < 0 &&
4702 RhsValReg->Value.getSExtValue() > -0x40000000)
4711bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSV(
Register Addr)
const {
4729bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSVImm(
4738 std::optional<DefinitionAndSourceRegister> BaseDef =
4740 std::optional<ValueAndVReg> RHSOffset =
4750 (RHSOffset->Value.getSExtValue() < 0 &&
4751 RHSOffset->Value.getSExtValue() > -0x40000000)))
4754 Register LHS = BaseDef->MI->getOperand(1).getReg();
4755 Register RHS = BaseDef->MI->getOperand(2).getReg();
4759bool AMDGPUInstructionSelector::isUnneededShiftMask(
const MachineInstr &
MI,
4760 unsigned ShAmtBits)
const {
4761 assert(
MI.getOpcode() == TargetOpcode::G_AND);
4763 std::optional<APInt>
RHS =
4768 if (
RHS->countr_one() >= ShAmtBits)
4772 return (LHSKnownZeros | *RHS).
countr_one() >= ShAmtBits;
4776AMDGPUInstructionSelector::selectMUBUFScratchOffset(
4781 std::optional<DefinitionAndSourceRegister>
Def =
4783 assert(Def &&
"this shouldn't be an optional result");
4838std::pair<Register, unsigned>
4839AMDGPUInstructionSelector::selectDS1Addr1OffsetImpl(
MachineOperand &Root)
const {
4842 return std::pair(Root.
getReg(), 0);
4844 int64_t ConstAddr = 0;
4848 std::tie(PtrBase,
Offset) =
4849 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
4852 if (isDSOffsetLegal(PtrBase,
Offset)) {
4854 return std::pair(PtrBase,
Offset);
4856 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
4865 return std::pair(Root.
getReg(), 0);
4869AMDGPUInstructionSelector::selectDS1Addr1Offset(
MachineOperand &Root)
const {
4872 std::tie(Reg,
Offset) = selectDS1Addr1OffsetImpl(Root);
4880AMDGPUInstructionSelector::selectDS64Bit4ByteAligned(
MachineOperand &Root)
const {
4881 return selectDSReadWrite2(Root, 4);
4885AMDGPUInstructionSelector::selectDS128Bit8ByteAligned(
MachineOperand &Root)
const {
4886 return selectDSReadWrite2(Root, 8);
4890AMDGPUInstructionSelector::selectDSReadWrite2(
MachineOperand &Root,
4891 unsigned Size)
const {
4894 std::tie(Reg,
Offset) = selectDSReadWrite2Impl(Root,
Size);
4902std::pair<Register, unsigned>
4903AMDGPUInstructionSelector::selectDSReadWrite2Impl(
MachineOperand &Root,
4904 unsigned Size)
const {
4907 return std::pair(Root.
getReg(), 0);
4909 int64_t ConstAddr = 0;
4913 std::tie(PtrBase,
Offset) =
4914 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
4917 int64_t OffsetValue0 =
Offset;
4919 if (isDSOffset2Legal(PtrBase, OffsetValue0, OffsetValue1,
Size)) {
4921 return std::pair(PtrBase, OffsetValue0 /
Size);
4923 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
4931 return std::pair(Root.
getReg(), 0);
4938std::pair<Register, int64_t>
4939AMDGPUInstructionSelector::getPtrBaseWithConstantOffset(
4942 if (RootI->
getOpcode() != TargetOpcode::G_PTR_ADD)
4946 std::optional<ValueAndVReg> MaybeOffset =
4962 Register RSrc2 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
4963 Register RSrc3 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
4964 Register RSrcHi =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
4965 Register RSrc =
MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
4967 B.buildInstr(AMDGPU::S_MOV_B32)
4970 B.buildInstr(AMDGPU::S_MOV_B32)
4977 B.buildInstr(AMDGPU::REG_SEQUENCE)
4980 .addImm(AMDGPU::sub0)
4982 .addImm(AMDGPU::sub1);
4986 RSrcLo =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
4987 B.buildInstr(AMDGPU::S_MOV_B64)
4992 B.buildInstr(AMDGPU::REG_SEQUENCE)
4995 .addImm(AMDGPU::sub0_sub1)
4997 .addImm(AMDGPU::sub2_sub3);
5004 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
5013 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
5020AMDGPUInstructionSelector::MUBUFAddressData
5021AMDGPUInstructionSelector::parseMUBUFAddress(
Register Src)
const {
5022 MUBUFAddressData
Data;
5028 std::tie(PtrBase,
Offset) = getPtrBaseWithConstantOffset(Src, *MRI);
5029 if (isUInt<32>(
Offset)) {
5036 Data.N2 = InputAdd->getOperand(1).getReg();
5037 Data.N3 = InputAdd->getOperand(2).getReg();
5052bool AMDGPUInstructionSelector::shouldUseAddr64(MUBUFAddressData
Addr)
const {
5059 return N0Bank->
getID() == AMDGPU::VGPRRegBankID;
5065void AMDGPUInstructionSelector::splitIllegalMUBUFOffset(
5071 SOffset =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5072 B.buildInstr(AMDGPU::S_MOV_B32)
5078bool AMDGPUInstructionSelector::selectMUBUFAddr64Impl(
5086 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
5087 if (!shouldUseAddr64(AddrData))
5093 Offset = AddrData.Offset;
5099 if (RBI.
getRegBank(N2, *MRI, TRI)->
getID() == AMDGPU::VGPRRegBankID) {
5101 if (RBI.
getRegBank(N3, *MRI, TRI)->
getID() == AMDGPU::VGPRRegBankID) {
5114 }
else if (RBI.
getRegBank(N0, *MRI, TRI)->
getID() == AMDGPU::VGPRRegBankID) {
5125 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
5129bool AMDGPUInstructionSelector::selectMUBUFOffsetImpl(
5137 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
5138 if (shouldUseAddr64(AddrData))
5144 Offset = AddrData.Offset;
5150 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
5155AMDGPUInstructionSelector::selectMUBUFAddr64(
MachineOperand &Root)
const {
5161 if (!selectMUBUFAddr64Impl(Root, VAddr, RSrcReg, SOffset,
Offset))
5177 MIB.
addReg(AMDGPU::SGPR_NULL);
5191AMDGPUInstructionSelector::selectMUBUFOffset(
MachineOperand &Root)
const {
5196 if (!selectMUBUFOffsetImpl(Root, RSrcReg, SOffset,
Offset))
5207 MIB.
addReg(AMDGPU::SGPR_NULL);
5219AMDGPUInstructionSelector::selectBUFSOffset(
MachineOperand &Root)
const {
5224 SOffset = AMDGPU::SGPR_NULL;
5230static std::optional<uint64_t>
5234 if (!OffsetVal || !isInt<32>(*OffsetVal))
5235 return std::nullopt;
5236 return Lo_32(*OffsetVal);
5240AMDGPUInstructionSelector::selectSMRDBufferImm(
MachineOperand &Root)
const {
5245 std::optional<int64_t> EncodedImm =
5254AMDGPUInstructionSelector::selectSMRDBufferImm32(
MachineOperand &Root)
const {
5261 std::optional<int64_t> EncodedImm =
5270AMDGPUInstructionSelector::selectSMRDBufferSgprImm(
MachineOperand &Root)
const {
5278 return std::nullopt;
5280 std::optional<int64_t> EncodedOffset =
5283 return std::nullopt;
5290std::pair<Register, unsigned>
5291AMDGPUInstructionSelector::selectVOP3PMadMixModsImpl(
MachineOperand &Root,
5292 bool &Matched)
const {
5297 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
5309 const auto CheckAbsNeg = [&]() {
5314 std::tie(PeekSrc, ModsTmp) = selectVOP3ModsImpl(PeekSrc);
5350AMDGPUInstructionSelector::selectVOP3PMadMixModsExt(
5355 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
5366AMDGPUInstructionSelector::selectVOP3PMadMixMods(
MachineOperand &Root)
const {
5370 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
5378bool AMDGPUInstructionSelector::selectSBarrierSignalIsfirst(
5382 Register CCReg =
I.getOperand(0).getReg();
5384 bool HasM0 = IntrID == Intrinsic::amdgcn_s_barrier_signal_isfirst_var;
5387 auto CopyMIB =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
5388 .
addReg(
I.getOperand(2).getReg());
5389 BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_ISFIRST_M0));
5393 BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM))
5394 .
addImm(
I.getOperand(2).getImm());
5399 I.eraseFromParent();
5405 if (HasInlineConst) {
5409 case Intrinsic::amdgcn_s_barrier_init:
5410 return AMDGPU::S_BARRIER_INIT_IMM;
5411 case Intrinsic::amdgcn_s_barrier_join:
5412 return AMDGPU::S_BARRIER_JOIN_IMM;
5413 case Intrinsic::amdgcn_s_wakeup_barrier:
5414 return AMDGPU::S_WAKEUP_BARRIER_IMM;
5415 case Intrinsic::amdgcn_s_get_barrier_state:
5416 return AMDGPU::S_GET_BARRIER_STATE_IMM;
5422 case Intrinsic::amdgcn_s_barrier_init:
5423 return AMDGPU::S_BARRIER_INIT_M0;
5424 case Intrinsic::amdgcn_s_barrier_join:
5425 return AMDGPU::S_BARRIER_JOIN_M0;
5426 case Intrinsic::amdgcn_s_wakeup_barrier:
5427 return AMDGPU::S_WAKEUP_BARRIER_M0;
5428 case Intrinsic::amdgcn_s_get_barrier_state:
5429 return AMDGPU::S_GET_BARRIER_STATE_M0;
5434bool AMDGPUInstructionSelector::selectNamedBarrierInst(
5438 MachineOperand BarOp = IntrID == Intrinsic::amdgcn_s_get_barrier_state
5441 std::optional<int64_t> BarValImm =
5447 if (IntrID == Intrinsic::amdgcn_s_barrier_init) {
5448 Register MemberCount =
I.getOperand(2).getReg();
5449 TmpReg0 =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5460 if (IntrID == Intrinsic::amdgcn_s_barrier_init) {
5464 Register TmpReg1 =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5485 if (IntrID == Intrinsic::amdgcn_s_get_barrier_state)
5486 MIB.
addDef(
I.getOperand(0).getReg());
5491 I.eraseFromParent();
5495bool AMDGPUInstructionSelector::selectSBarrierLeave(
MachineInstr &
I)
const {
5498 Register CCReg =
I.getOperand(0).getReg();
5500 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_BARRIER_LEAVE));
5503 I.eraseFromParent();
5511 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5512 "Expected G_CONSTANT");
5513 MIB.
addImm(
MI.getOperand(1).getCImm()->getSExtValue());
5519 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5520 "Expected G_CONSTANT");
5521 MIB.
addImm(-
MI.getOperand(1).getCImm()->getSExtValue());
5528 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1);
5529 MIB.
addImm(
Op.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
5535 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5536 "Expected G_CONSTANT");
5537 MIB.
addImm(
MI.getOperand(1).getCImm()->getValue().popcount());
5545 MIB.
addImm(
MI.getOperand(OpIdx).getImm());
5551 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
5558 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
5559 MIB.
addImm(
MI.getOperand(OpIdx).getImm() &
5567 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
5568 const bool Swizzle =
MI.getOperand(OpIdx).getImm() &
5574void AMDGPUInstructionSelector::renderExtractCpolSetGLC(
5576 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
5577 const uint32_t Cpol =
MI.getOperand(OpIdx).getImm() &
5592 const APFloat &APF =
MI.getOperand(1).getFPImm()->getValueAPF();
5594 assert(ExpVal != INT_MIN);
5598bool AMDGPUInstructionSelector::isInlineImmediate(
const APInt &Imm)
const {
5602bool AMDGPUInstructionSelector::isInlineImmediate(
const APFloat &Imm)
const {
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
static unsigned getIntrinsicID(const SDNode *N)
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static bool isNoUnsignedWrap(MachineInstr *Addr)
static Register buildOffsetSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
unsigned getNamedBarrierOp(bool HasInlineConst, Intrinsic::ID IntrID)
#define GET_GLOBALISEL_PREDICATES_INIT
#define GET_GLOBALISEL_TEMPORARIES_INIT
static Register getWaveAddress(const MachineInstr *Def)
static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)
static bool shouldUseAndMask(unsigned Size, unsigned &Mask)
static bool parseTexFail(uint64_t TexFailCtrl, bool &TFE, bool &LWE, bool &IsTexFail)
static std::pair< Register, unsigned > computeIndirectRegIndex(MachineRegisterInfo &MRI, const SIRegisterInfo &TRI, const TargetRegisterClass *SuperRC, Register IdxReg, unsigned EltSize, GISelKnownBits &KnownBits)
Return the register to use for the index value, and the subregister to use for the indirectly accesse...
static void addZeroImm(MachineInstrBuilder &MIB)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
static bool isConstant(const MachineInstr &MI)
static Register buildRegSequence(SmallVectorImpl< Register > &Elts, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
static Register buildRSRC(MachineIRBuilder &B, MachineRegisterInfo &MRI, uint32_t FormatLo, uint32_t FormatHi, Register BasePtr)
Return a resource descriptor for use with an arbitrary 64-bit pointer.
static Register matchZeroExtendFromS32(MachineRegisterInfo &MRI, Register Reg)
Match a zero extend from a 32-bit value to 64-bits.
static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64)
static Register stripCopy(Register Reg, MachineRegisterInfo &MRI)
static Register stripBitCast(Register Reg, MachineRegisterInfo &MRI)
static std::optional< uint64_t > getConstantZext32Val(Register Reg, const MachineRegisterInfo &MRI)
Get an immediate that must be 32-bits, and treated as zero extended.
static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size, const GCNSubtarget &ST)
static bool isVCmpResult(Register Reg, MachineRegisterInfo &MRI)
static Register buildAddr64RSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, SmallVectorImpl< Register > &Elts, Register &Src, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
This file declares the targeting of the InstructionSelector class for AMDGPU.
amdgpu AMDGPU Register Bank Select
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
static std::vector< std::pair< int, unsigned > > Swizzle(std::vector< std::pair< int, unsigned > > Src, R600InstrInfo::BankSwizzle Swz)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
AMDGPUInstructionSelector(const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI, const AMDGPUTargetMachine &TM)
static const char * getName()
bool select(MachineInstr &I) override
Select the (possibly generic) instruction I to only use target-specific opcodes.
void setupMF(MachineFunction &MF, GISelKnownBits *KB, CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) override
Setup per-MF executor state.
uint32_t getLDSSize() const
bool isEntryFunction() const
const RegisterBank & getRegBankFromRegClass(const TargetRegisterClass &RC, LLT) const override
Get a register bank that covers RC.
bool useRealTrue16Insts() const
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
unsigned getWavefrontSizeLog2() const
unsigned getWavefrontSize() const
bool hasInv2PiInlineImm() const
static int64_t getNullPointerValue(unsigned AddrSpace)
Get the integer value of a null pointer in the given address space.
LLVM_READONLY int getExactLog2Abs() const
Class for arbitrary precision integers.
APInt zext(unsigned width) const
Zero extend to a new width.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
unsigned countr_one() const
Count the number of trailing one bits.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
bool isFPPredicate() const
bool isIntPredicate() const
ConstantFP - Floating Point Values [float, double].
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
This class represents an Operation in the Expression.
Diagnostic information for unsupported feature in backend.
Represents a G_BUILD_VECTOR.
bool useVGPRIndexMode() const
bool hasScalarCompareEq64() const
int getLDSBankCount() const
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
bool unsafeDSOffsetFoldingEnabled() const
bool hasFlatInstOffsets() const
bool hasCompressedExport() const
Return true if the target's EXP instruction has the COMPR flag, which affects the meaning of the EN (...
bool hasGFX90AInsts() const
unsigned getConstantBusLimit(unsigned Opcode) const
bool hasMADIntraFwdBug() const
bool privateMemoryResourceIsRangeChecked() const
bool hasSignedScratchOffsets() const
bool hasRestrictedSOffset() const
const SITargetLowering * getTargetLowering() const override
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
bool hasSPackHL() const
Return true if the target has the S_PACK_HL_B32_B16 instruction.
bool hasFlatScratchSVSSwizzleBug() const
bool useFlatForGlobal() const
Generation getGeneration() const
bool hasSplitBarriers() const
bool hasUnpackedD16VMem() const
bool hasGWSSemaReleaseAll() const
bool hasAddNoCarry() const
bool hasSALUFloatInsts() const
bool hasPartialNSAEncoding() const
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
Represents a G_CONCAT_VECTORS.
std::optional< SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > > ComplexRendererFns
virtual void setupMF(MachineFunction &mf, GISelKnownBits *kb, CodeGenCoverage *covinfo=nullptr, ProfileSummaryInfo *psi=nullptr, BlockFrequencyInfo *bfi=nullptr)
Setup per-MF executor state.
CodeGenCoverage * CoverageInfo
APInt getKnownOnes(Register R)
KnownBits getKnownBits(Register R)
bool signBitIsZero(Register Op)
APInt getKnownZeroes(Register R)
Module * getParent()
Get the module that this global value is contained inside of...
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr bool isVector() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Describe properties that are true of each instruction in the target description file.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Helper class to build MachineInstr.
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & setOperandDead(unsigned OpIdx) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
unsigned getNumOperands() const
Retuns the total number of operands.
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
unsigned getAddrSpace() const
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const ConstantInt * getCImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
ArrayRef< int > getShuffleMask() const
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
bool isInternalRead() const
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
A Module instance is used to store all the information related to an LLVM module.
T get() const
Returns the value of the specified pointer type.
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwise returns null.
Analysis providing profile information.
static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
TypeSize getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
bool isLegalMUBUFImmOffset(unsigned Imm) const
bool isInlineConstant(const APInt &Imm) const
const MCInstrDesc & getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, bool IsSGPR) const
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
const MCInstrDesc & getIndirectGPRIDXPseudo(unsigned VecSize, bool IsIndirectSrc) const
std::pair< int64_t, int64_t > splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, uint64_t FlatVariant) const
Split COffsetVal into {immediate offset field, remainder offset} values.
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, uint64_t FlatVariant) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction.
void enforceOperandRCAlignment(MachineInstr &MI, unsigned OpName) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
MCRegister getReturnAddressReg(const MachineFunction &MF) const
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
const TargetRegisterClass * getRegClassForSizeOnBank(unsigned Size, const RegisterBank &Bank) const
const TargetRegisterClass * getConstrainedRegClassForOperand(const MachineOperand &MO, const MachineRegisterInfo &MRI) const override
const TargetRegisterClass * getRegClassForTypeOnBank(LLT Ty, const RegisterBank &Bank) const
const TargetRegisterClass * getBoolRC() const
const TargetRegisterClass * getWaveMaskRegClass() const
static bool isSGPRClass(const TargetRegisterClass *RC)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
const Triple & getTargetTriple() const
unsigned getID() const
Return the register class ID number.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
OSType getOS() const
Get the parsed operating system type of this triple.
static IntegerType * getInt32Ty(LLVMContext &C)
LLVM Value Representation.
Value(Type *Ty, unsigned scid)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
LLVM_READONLY const MIMGG16MappingInfo * getMIMGG16MappingInfo(unsigned G)
LLVM_READONLY int getGlobalSaddrOp(uint16_t Opcode)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfo(unsigned DimEnum)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
Intrinsic::ID getIntrinsicID(const MachineInstr &I)
Return the intrinsic ID for opcodes with the G_AMDGPU_INTRIN_ prefix.
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg, GISelKnownBits *KnownBits=nullptr, bool CheckNUW=false)
Returns base register and constant offset.
IndexMode
ARM Index Modes.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
operand_type_match m_Reg()
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
UnaryOp_match< SrcTy, TargetOpcode::COPY > m_Copy(SrcTy &&Src)
SpecificConstantMatch m_SpecificICst(int64_t RequestedValue)
Matches a constant equal to RequestedValue.
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_FPEXT > m_GFPExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
UnaryOp_match< SrcTy, TargetOpcode::G_FABS > m_GFabs(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
OneUse_match< T > m_OneUse(const T &SubPattern)
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
int popcount(T Value) noexcept
Count the number of set bits in a value.
const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
unsigned getUndefRegState(bool B)
@ SMax
Signed integer max implemented in terms of select(cmp()).
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
@ Default
The result values are uniform if and only if all operands are uniform.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
This class contains a discriminated union of information about pointers in memory operands,...
int64_t Offset
Offset - This is an offset from the base Value*.
PointerUnion< const Value *, const PseudoSourceValue * > V
This is the IR pointer value for the access, or it is null if unknown.