29#include "llvm/IR/IntrinsicsAMDGPU.h"
32#define DEBUG_TYPE "amdgpu-isel"
35using namespace MIPatternMatch;
37#define GET_GLOBALISEL_IMPL
38#define AMDGPUSubtarget GCNSubtarget
39#include "AMDGPUGenGlobalISel.inc"
40#undef GET_GLOBALISEL_IMPL
46 :
TII(*STI.getInstrInfo()),
TRI(*STI.getRegisterInfo()), RBI(RBI),
TM(
TM),
50#include
"AMDGPUGenGlobalISel.inc"
53#include
"AMDGPUGenGlobalISel.inc"
71 return Def->getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS
72 ? Def->getOperand(1).getReg()
76bool AMDGPUInstructionSelector::isVCC(
Register Reg,
82 auto &RegClassOrBank =
MRI.getRegClassOrRegBank(Reg);
86 const LLT Ty =
MRI.getType(Reg);
90 return MRI.getVRegDef(Reg)->getOpcode() != AMDGPU::G_TRUNC &&
95 return RB->
getID() == AMDGPU::VCCRegBankID;
98bool AMDGPUInstructionSelector::constrainCopyLikeIntrin(
MachineInstr &
MI,
99 unsigned NewOpc)
const {
100 MI.setDesc(TII.get(NewOpc));
115 if (!DstRC || DstRC != SrcRC)
122bool AMDGPUInstructionSelector::selectCOPY(
MachineInstr &
I)
const {
125 I.setDesc(TII.get(TargetOpcode::COPY));
132 if (isVCC(DstReg, *MRI)) {
133 if (SrcReg == AMDGPU::SCC) {
141 if (!isVCC(SrcReg, *MRI)) {
149 std::optional<ValueAndVReg> ConstVal =
153 STI.
isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
155 .
addImm(ConstVal->Value.getBoolValue() ? -1 : 0);
157 Register MaskedReg =
MRI->createVirtualRegister(SrcRC);
165 IsSGPR ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32;
170 And.setOperandDead(3);
172 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
177 if (!
MRI->getRegClassOrNull(SrcReg))
178 MRI->setRegClass(SrcReg, SrcRC);
192 if (MO.getReg().isPhysical())
204bool AMDGPUInstructionSelector::selectPHI(
MachineInstr &
I)
const {
205 const Register DefReg =
I.getOperand(0).getReg();
206 const LLT DefTy =
MRI->getType(DefReg);
218 MRI->getRegClassOrRegBank(DefReg);
237 I.setDesc(TII.get(TargetOpcode::PHI));
244 unsigned SubIdx)
const {
248 Register DstReg =
MRI->createVirtualRegister(&SubRC);
251 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.
getSubReg(), SubIdx);
253 BuildMI(*BB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
254 .
addReg(Reg, 0, ComposedSubIdx);
279 return Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
281 return Is64 ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32;
283 return Is64 ? AMDGPU::S_XOR_B64 : AMDGPU::S_XOR_B32;
289bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(
MachineInstr &
I)
const {
290 Register DstReg =
I.getOperand(0).getReg();
294 if (DstRB->
getID() != AMDGPU::SGPRRegBankID &&
295 DstRB->
getID() != AMDGPU::VCCRegBankID)
298 bool Is64 =
Size > 32 || (DstRB->
getID() == AMDGPU::VCCRegBankID &&
310bool AMDGPUInstructionSelector::selectG_ADD_SUB(
MachineInstr &
I)
const {
313 Register DstReg =
I.getOperand(0).getReg();
315 LLT Ty =
MRI->getType(DstReg);
321 const bool IsSALU = DstRB->
getID() == AMDGPU::SGPRRegBankID;
322 const bool Sub =
I.getOpcode() == TargetOpcode::G_SUB;
326 const unsigned Opc = Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
329 .
add(
I.getOperand(1))
330 .
add(
I.getOperand(2))
337 const unsigned Opc = Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64;
338 I.setDesc(TII.get(Opc));
344 const unsigned Opc = Sub ? AMDGPU::V_SUB_CO_U32_e64 : AMDGPU::V_ADD_CO_U32_e64;
350 .
add(
I.getOperand(1))
351 .
add(
I.getOperand(2))
357 assert(!Sub &&
"illegal sub should not reach here");
360 = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass;
362 = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass;
364 MachineOperand Lo1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub0));
365 MachineOperand Lo2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub0));
366 MachineOperand Hi1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub1));
367 MachineOperand Hi2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub1));
369 Register DstLo =
MRI->createVirtualRegister(&HalfRC);
370 Register DstHi =
MRI->createVirtualRegister(&HalfRC);
373 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
376 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
382 Register CarryReg =
MRI->createVirtualRegister(CarryRC);
383 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_ADD_CO_U32_e64), DstLo)
399 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
413bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE(
418 Register Dst0Reg =
I.getOperand(0).getReg();
419 Register Dst1Reg =
I.getOperand(1).getReg();
420 const bool IsAdd =
I.getOpcode() == AMDGPU::G_UADDO ||
421 I.getOpcode() == AMDGPU::G_UADDE;
422 const bool HasCarryIn =
I.getOpcode() == AMDGPU::G_UADDE ||
423 I.getOpcode() == AMDGPU::G_USUBE;
425 if (isVCC(Dst1Reg, *MRI)) {
426 unsigned NoCarryOpc =
427 IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
428 unsigned CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
429 I.setDesc(TII.get(HasCarryIn ? CarryOpc : NoCarryOpc));
435 Register Src0Reg =
I.getOperand(2).getReg();
436 Register Src1Reg =
I.getOperand(3).getReg();
439 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
440 .
addReg(
I.getOperand(4).getReg());
443 unsigned NoCarryOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
444 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
446 auto CarryInst =
BuildMI(*BB, &
I,
DL, TII.get(HasCarryIn ? CarryOpc : NoCarryOpc), Dst0Reg)
447 .
add(
I.getOperand(2))
448 .
add(
I.getOperand(3));
450 if (
MRI->use_nodbg_empty(Dst1Reg)) {
453 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), Dst1Reg)
455 if (!
MRI->getRegClassOrNull(Dst1Reg))
456 MRI->setRegClass(Dst1Reg, &AMDGPU::SReg_32RegClass);
466 AMDGPU::SReg_32RegClass, *MRI))
473bool AMDGPUInstructionSelector::selectG_AMDGPU_MAD_64_32(
477 const bool IsUnsigned =
I.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;
481 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_gfx11_e64
482 : AMDGPU::V_MAD_I64_I32_gfx11_e64;
484 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_e64 : AMDGPU::V_MAD_I64_I32_e64;
485 I.setDesc(TII.get(Opc));
487 I.addImplicitDefUseOperands(*
MF);
492bool AMDGPUInstructionSelector::selectG_EXTRACT(
MachineInstr &
I)
const {
494 Register DstReg =
I.getOperand(0).getReg();
495 Register SrcReg =
I.getOperand(1).getReg();
496 LLT DstTy =
MRI->getType(DstReg);
497 LLT SrcTy =
MRI->getType(SrcReg);
502 unsigned Offset =
I.getOperand(2).getImm();
503 if (
Offset % 32 != 0 || DstSize > 128)
523 SrcRC = TRI.getSubClassWithSubReg(SrcRC,
SubReg);
528 *SrcRC,
I.getOperand(1));
530 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::COPY), DstReg)
537bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(
MachineInstr &
MI)
const {
540 LLT DstTy =
MRI->getType(DstReg);
541 LLT SrcTy =
MRI->getType(
MI.getOperand(1).getReg());
557 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg);
558 for (
int I = 0, E =
MI.getNumOperands() - 1;
I != E; ++
I) {
572 MI.eraseFromParent();
576bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(
MachineInstr &
MI)
const {
578 const int NumDst =
MI.getNumOperands() - 1;
584 LLT DstTy =
MRI->getType(DstReg0);
585 LLT SrcTy =
MRI->getType(SrcReg);
601 for (
int I = 0, E = NumDst;
I != E; ++
I) {
603 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::COPY), Dst.getReg())
604 .
addReg(SrcReg, 0, SubRegs[
I]);
607 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegs[
I]);
617 MI.eraseFromParent();
621bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR(
MachineInstr &
MI)
const {
622 assert(
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC ||
623 MI.getOpcode() == AMDGPU::G_BUILD_VECTOR);
627 LLT SrcTy =
MRI->getType(Src0);
631 if (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR && SrcSize >= 32) {
632 return selectG_MERGE_VALUES(
MI);
639 (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC &&
644 if (DstBank->
getID() == AMDGPU::AGPRRegBankID)
647 assert(DstBank->
getID() == AMDGPU::SGPRRegBankID ||
648 DstBank->
getID() == AMDGPU::VGPRRegBankID);
649 const bool IsVector = DstBank->
getID() == AMDGPU::VGPRRegBankID;
662 const int64_t K0 = ConstSrc0->Value.getSExtValue();
663 const int64_t K1 = ConstSrc1->Value.getSExtValue();
671 MI.eraseFromParent();
677 MI.eraseFromParent();
689 if (Src1Def->
getOpcode() == AMDGPU::G_IMPLICIT_DEF) {
690 MI.setDesc(TII.get(AMDGPU::COPY));
693 IsVector ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
700 Register TmpReg =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
701 auto MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_AND_B32_e32), TmpReg)
707 MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_LSHL_OR_B32_e64), Dst)
714 MI.eraseFromParent();
739 unsigned Opc = AMDGPU::S_PACK_LL_B32_B16;
740 if (Shift0 && Shift1) {
741 Opc = AMDGPU::S_PACK_HH_B32_B16;
742 MI.getOperand(1).setReg(ShiftSrc0);
743 MI.getOperand(2).setReg(ShiftSrc1);
745 Opc = AMDGPU::S_PACK_LH_B32_B16;
746 MI.getOperand(2).setReg(ShiftSrc1);
750 if (ConstSrc1 && ConstSrc1->Value == 0) {
752 auto MIB =
BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_LSHR_B32), Dst)
757 MI.eraseFromParent();
761 Opc = AMDGPU::S_PACK_HL_B32_B16;
762 MI.getOperand(1).setReg(ShiftSrc0);
766 MI.setDesc(TII.get(Opc));
770bool AMDGPUInstructionSelector::selectG_PTR_ADD(
MachineInstr &
I)
const {
771 return selectG_ADD_SUB(
I);
774bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(
MachineInstr &
I)
const {
780 if ((!RC && !
MRI->getRegBankOrNull(MO.
getReg())) ||
782 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
789bool AMDGPUInstructionSelector::selectG_INSERT(
MachineInstr &
I)
const {
792 Register DstReg =
I.getOperand(0).getReg();
793 Register Src0Reg =
I.getOperand(1).getReg();
794 Register Src1Reg =
I.getOperand(2).getReg();
795 LLT Src1Ty =
MRI->getType(Src1Reg);
797 unsigned DstSize =
MRI->getType(DstReg).getSizeInBits();
800 int64_t
Offset =
I.getOperand(3).getImm();
803 if (
Offset % 32 != 0 || InsSize % 32 != 0)
811 if (
SubReg == AMDGPU::NoSubRegister)
829 Src0RC = TRI.getSubClassWithSubReg(Src0RC,
SubReg);
830 if (!Src0RC || !Src1RC)
839 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::INSERT_SUBREG), DstReg)
848bool AMDGPUInstructionSelector::selectG_SBFX_UBFX(
MachineInstr &
MI)
const {
851 Register OffsetReg =
MI.getOperand(2).getReg();
852 Register WidthReg =
MI.getOperand(3).getReg();
855 "scalar BFX instructions are expanded in regbankselect");
856 assert(
MRI->getType(
MI.getOperand(0).getReg()).getSizeInBits() == 32 &&
857 "64-bit vector BFX instructions are expanded in regbankselect");
862 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SBFX;
863 unsigned Opc = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
868 MI.eraseFromParent();
872bool AMDGPUInstructionSelector::selectInterpP1F16(
MachineInstr &
MI)
const {
891 Register InterpMov =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
897 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_INTERP_MOV_F32), InterpMov)
913 MI.eraseFromParent();
922bool AMDGPUInstructionSelector::selectWritelane(
MachineInstr &
MI)
const {
931 Register LaneSelect =
MI.getOperand(3).getReg();
934 auto MIB =
BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_WRITELANE_B32), VDst);
936 std::optional<ValueAndVReg> ConstSelect =
942 MIB.
addImm(ConstSelect->Value.getSExtValue() &
945 std::optional<ValueAndVReg> ConstVal =
952 MIB.
addImm(ConstVal->Value.getSExtValue());
962 BuildMI(*
MBB, *MIB,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
970 MI.eraseFromParent();
976bool AMDGPUInstructionSelector::selectDivScale(
MachineInstr &
MI)
const {
980 LLT Ty =
MRI->getType(Dst0);
983 Opc = AMDGPU::V_DIV_SCALE_F32_e64;
985 Opc = AMDGPU::V_DIV_SCALE_F64_e64;
996 unsigned ChooseDenom =
MI.getOperand(5).getImm();
998 Register Src0 = ChooseDenom != 0 ? Numer : Denom;
1011 MI.eraseFromParent();
1015bool AMDGPUInstructionSelector::selectG_INTRINSIC(
MachineInstr &
I)
const {
1016 unsigned IntrinsicID = cast<GIntrinsic>(
I).getIntrinsicID();
1017 switch (IntrinsicID) {
1018 case Intrinsic::amdgcn_if_break: {
1023 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))
1024 .
add(
I.getOperand(0))
1025 .
add(
I.getOperand(2))
1026 .
add(
I.getOperand(3));
1028 Register DstReg =
I.getOperand(0).getReg();
1029 Register Src0Reg =
I.getOperand(2).getReg();
1030 Register Src1Reg =
I.getOperand(3).getReg();
1032 I.eraseFromParent();
1034 for (
Register Reg : { DstReg, Src0Reg, Src1Reg })
1039 case Intrinsic::amdgcn_interp_p1_f16:
1040 return selectInterpP1F16(
I);
1041 case Intrinsic::amdgcn_wqm:
1042 return constrainCopyLikeIntrin(
I, AMDGPU::WQM);
1043 case Intrinsic::amdgcn_softwqm:
1044 return constrainCopyLikeIntrin(
I, AMDGPU::SOFT_WQM);
1045 case Intrinsic::amdgcn_strict_wwm:
1046 case Intrinsic::amdgcn_wwm:
1047 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WWM);
1048 case Intrinsic::amdgcn_strict_wqm:
1049 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WQM);
1050 case Intrinsic::amdgcn_writelane:
1051 return selectWritelane(
I);
1052 case Intrinsic::amdgcn_div_scale:
1053 return selectDivScale(
I);
1054 case Intrinsic::amdgcn_icmp:
1055 case Intrinsic::amdgcn_fcmp:
1058 return selectIntrinsicCmp(
I);
1059 case Intrinsic::amdgcn_ballot:
1060 return selectBallot(
I);
1061 case Intrinsic::amdgcn_inverse_ballot:
1062 return selectInverseBallot(
I);
1063 case Intrinsic::amdgcn_reloc_constant:
1064 return selectRelocConstant(
I);
1065 case Intrinsic::amdgcn_groupstaticsize:
1066 return selectGroupStaticSize(
I);
1067 case Intrinsic::returnaddress:
1068 return selectReturnAddress(
I);
1069 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
1070 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
1071 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
1072 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
1073 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
1074 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
1075 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
1076 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
1077 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
1078 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
1079 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
1080 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
1081 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
1082 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
1083 return selectSMFMACIntrin(
I);
1094 if (
Size == 16 && !ST.has16BitInsts())
1097 const auto Select = [&](
unsigned S16Opc,
unsigned TrueS16Opc,
unsigned S32Opc,
1100 return ST.hasTrue16BitInsts() ? TrueS16Opc : S16Opc;
1110 return Select(AMDGPU::V_CMP_NE_U16_e64, AMDGPU::V_CMP_NE_U16_t16_e64,
1111 AMDGPU::V_CMP_NE_U32_e64, AMDGPU::V_CMP_NE_U64_e64);
1113 return Select(AMDGPU::V_CMP_EQ_U16_e64, AMDGPU::V_CMP_EQ_U16_t16_e64,
1114 AMDGPU::V_CMP_EQ_U32_e64, AMDGPU::V_CMP_EQ_U64_e64);
1116 return Select(AMDGPU::V_CMP_GT_I16_e64, AMDGPU::V_CMP_GT_I16_t16_e64,
1117 AMDGPU::V_CMP_GT_I32_e64, AMDGPU::V_CMP_GT_I64_e64);
1119 return Select(AMDGPU::V_CMP_GE_I16_e64, AMDGPU::V_CMP_GE_I16_t16_e64,
1120 AMDGPU::V_CMP_GE_I32_e64, AMDGPU::V_CMP_GE_I64_e64);
1122 return Select(AMDGPU::V_CMP_LT_I16_e64, AMDGPU::V_CMP_LT_I16_t16_e64,
1123 AMDGPU::V_CMP_LT_I32_e64, AMDGPU::V_CMP_LT_I64_e64);
1125 return Select(AMDGPU::V_CMP_LE_I16_e64, AMDGPU::V_CMP_LE_I16_t16_e64,
1126 AMDGPU::V_CMP_LE_I32_e64, AMDGPU::V_CMP_LE_I64_e64);
1128 return Select(AMDGPU::V_CMP_GT_U16_e64, AMDGPU::V_CMP_GT_U16_t16_e64,
1129 AMDGPU::V_CMP_GT_U32_e64, AMDGPU::V_CMP_GT_U64_e64);
1131 return Select(AMDGPU::V_CMP_GE_U16_e64, AMDGPU::V_CMP_GE_U16_t16_e64,
1132 AMDGPU::V_CMP_GE_U32_e64, AMDGPU::V_CMP_GE_U64_e64);
1134 return Select(AMDGPU::V_CMP_LT_U16_e64, AMDGPU::V_CMP_LT_U16_t16_e64,
1135 AMDGPU::V_CMP_LT_U32_e64, AMDGPU::V_CMP_LT_U64_e64);
1137 return Select(AMDGPU::V_CMP_LE_U16_e64, AMDGPU::V_CMP_LE_U16_t16_e64,
1138 AMDGPU::V_CMP_LE_U32_e64, AMDGPU::V_CMP_LE_U64_e64);
1141 return Select(AMDGPU::V_CMP_EQ_F16_e64, AMDGPU::V_CMP_EQ_F16_t16_e64,
1142 AMDGPU::V_CMP_EQ_F32_e64, AMDGPU::V_CMP_EQ_F64_e64);
1144 return Select(AMDGPU::V_CMP_GT_F16_e64, AMDGPU::V_CMP_GT_F16_t16_e64,
1145 AMDGPU::V_CMP_GT_F32_e64, AMDGPU::V_CMP_GT_F64_e64);
1147 return Select(AMDGPU::V_CMP_GE_F16_e64, AMDGPU::V_CMP_GE_F16_t16_e64,
1148 AMDGPU::V_CMP_GE_F32_e64, AMDGPU::V_CMP_GE_F64_e64);
1150 return Select(AMDGPU::V_CMP_LT_F16_e64, AMDGPU::V_CMP_LT_F16_t16_e64,
1151 AMDGPU::V_CMP_LT_F32_e64, AMDGPU::V_CMP_LT_F64_e64);
1153 return Select(AMDGPU::V_CMP_LE_F16_e64, AMDGPU::V_CMP_LE_F16_t16_e64,
1154 AMDGPU::V_CMP_LE_F32_e64, AMDGPU::V_CMP_LE_F64_e64);
1156 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1157 AMDGPU::V_CMP_NEQ_F32_e64, AMDGPU::V_CMP_NEQ_F64_e64);
1159 return Select(AMDGPU::V_CMP_O_F16_e64, AMDGPU::V_CMP_O_F16_t16_e64,
1160 AMDGPU::V_CMP_O_F32_e64, AMDGPU::V_CMP_O_F64_e64);
1162 return Select(AMDGPU::V_CMP_U_F16_e64, AMDGPU::V_CMP_U_F16_t16_e64,
1163 AMDGPU::V_CMP_U_F32_e64, AMDGPU::V_CMP_U_F64_e64);
1165 return Select(AMDGPU::V_CMP_NLG_F16_e64, AMDGPU::V_CMP_NLG_F16_t16_e64,
1166 AMDGPU::V_CMP_NLG_F32_e64, AMDGPU::V_CMP_NLG_F64_e64);
1168 return Select(AMDGPU::V_CMP_NLE_F16_e64, AMDGPU::V_CMP_NLE_F16_t16_e64,
1169 AMDGPU::V_CMP_NLE_F32_e64, AMDGPU::V_CMP_NLE_F64_e64);
1171 return Select(AMDGPU::V_CMP_NLT_F16_e64, AMDGPU::V_CMP_NLT_F16_t16_e64,
1172 AMDGPU::V_CMP_NLT_F32_e64, AMDGPU::V_CMP_NLT_F64_e64);
1174 return Select(AMDGPU::V_CMP_NGE_F16_e64, AMDGPU::V_CMP_NGE_F16_t16_e64,
1175 AMDGPU::V_CMP_NGE_F32_e64, AMDGPU::V_CMP_NGE_F64_e64);
1177 return Select(AMDGPU::V_CMP_NGT_F16_e64, AMDGPU::V_CMP_NGT_F16_t16_e64,
1178 AMDGPU::V_CMP_NGT_F32_e64, AMDGPU::V_CMP_NGT_F64_e64);
1180 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1181 AMDGPU::V_CMP_NEQ_F32_e64, AMDGPU::V_CMP_NEQ_F64_e64);
1183 return Select(AMDGPU::V_CMP_TRU_F16_e64, AMDGPU::V_CMP_TRU_F16_t16_e64,
1184 AMDGPU::V_CMP_TRU_F32_e64, AMDGPU::V_CMP_TRU_F64_e64);
1186 return Select(AMDGPU::V_CMP_F_F16_e64, AMDGPU::V_CMP_F_F16_t16_e64,
1187 AMDGPU::V_CMP_F_F32_e64, AMDGPU::V_CMP_F_F64_e64);
1192 unsigned Size)
const {
1199 return AMDGPU::S_CMP_LG_U64;
1201 return AMDGPU::S_CMP_EQ_U64;
1210 return AMDGPU::S_CMP_LG_U32;
1212 return AMDGPU::S_CMP_EQ_U32;
1214 return AMDGPU::S_CMP_GT_I32;
1216 return AMDGPU::S_CMP_GE_I32;
1218 return AMDGPU::S_CMP_LT_I32;
1220 return AMDGPU::S_CMP_LE_I32;
1222 return AMDGPU::S_CMP_GT_U32;
1224 return AMDGPU::S_CMP_GE_U32;
1226 return AMDGPU::S_CMP_LT_U32;
1228 return AMDGPU::S_CMP_LE_U32;
1230 return AMDGPU::S_CMP_EQ_F32;
1232 return AMDGPU::S_CMP_GT_F32;
1234 return AMDGPU::S_CMP_GE_F32;
1236 return AMDGPU::S_CMP_LT_F32;
1238 return AMDGPU::S_CMP_LE_F32;
1240 return AMDGPU::S_CMP_LG_F32;
1242 return AMDGPU::S_CMP_O_F32;
1244 return AMDGPU::S_CMP_U_F32;
1246 return AMDGPU::S_CMP_NLG_F32;
1248 return AMDGPU::S_CMP_NLE_F32;
1250 return AMDGPU::S_CMP_NLT_F32;
1252 return AMDGPU::S_CMP_NGE_F32;
1254 return AMDGPU::S_CMP_NGT_F32;
1256 return AMDGPU::S_CMP_NEQ_F32;
1268 return AMDGPU::S_CMP_EQ_F16;
1270 return AMDGPU::S_CMP_GT_F16;
1272 return AMDGPU::S_CMP_GE_F16;
1274 return AMDGPU::S_CMP_LT_F16;
1276 return AMDGPU::S_CMP_LE_F16;
1278 return AMDGPU::S_CMP_LG_F16;
1280 return AMDGPU::S_CMP_O_F16;
1282 return AMDGPU::S_CMP_U_F16;
1284 return AMDGPU::S_CMP_NLG_F16;
1286 return AMDGPU::S_CMP_NLE_F16;
1288 return AMDGPU::S_CMP_NLT_F16;
1290 return AMDGPU::S_CMP_NGE_F16;
1292 return AMDGPU::S_CMP_NGT_F16;
1294 return AMDGPU::S_CMP_NEQ_F16;
1303bool AMDGPUInstructionSelector::selectG_ICMP_or_FCMP(
MachineInstr &
I)
const {
1308 Register SrcReg =
I.getOperand(2).getReg();
1313 Register CCReg =
I.getOperand(0).getReg();
1314 if (!isVCC(CCReg, *MRI)) {
1315 int Opcode = getS_CMPOpcode(Pred,
Size);
1319 .
add(
I.getOperand(2))
1320 .
add(
I.getOperand(3));
1321 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CCReg)
1326 I.eraseFromParent();
1330 if (
I.getOpcode() == AMDGPU::G_FCMP)
1338 I.getOperand(0).getReg())
1339 .
add(
I.getOperand(2))
1340 .
add(
I.getOperand(3));
1344 I.eraseFromParent();
1348bool AMDGPUInstructionSelector::selectIntrinsicCmp(
MachineInstr &
I)
const {
1349 Register Dst =
I.getOperand(0).getReg();
1350 if (isVCC(Dst, *MRI))
1353 LLT DstTy =
MRI->getType(Dst);
1359 Register SrcReg =
I.getOperand(2).getReg();
1368 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), Dst);
1369 I.eraseFromParent();
1380 auto [Src0, Src0Mods] = selectVOP3ModsImpl(LHS);
1381 auto [Src1, Src1Mods] = selectVOP3ModsImpl(RHS);
1383 copyToVGPRIfSrcFolded(Src0, Src0Mods, LHS, &
I,
true);
1385 copyToVGPRIfSrcFolded(Src1, Src1Mods, RHS, &
I,
true);
1386 SelectedMI =
BuildMI(*BB, &
I,
DL, TII.get(Opcode), Dst);
1388 SelectedMI.
addImm(Src0Mods);
1389 SelectedMI.
addReg(Src0Reg);
1391 SelectedMI.
addImm(Src1Mods);
1392 SelectedMI.
addReg(Src1Reg);
1402 I.eraseFromParent();
1406bool AMDGPUInstructionSelector::selectBallot(
MachineInstr &
I)
const {
1409 Register DstReg =
I.getOperand(0).getReg();
1410 const unsigned Size =
MRI->getType(DstReg).getSizeInBits();
1411 const bool Is64 =
Size == 64;
1419 std::optional<ValueAndVReg> Arg =
1422 const auto BuildCopy = [&](
Register SrcReg) {
1424 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), DstReg)
1430 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1432 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
1440 const int64_t
Value = Arg->
Value.getSExtValue();
1442 unsigned Opcode = Is64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
1444 }
else if (
Value == -1)
1445 BuildCopy(IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC);
1449 BuildCopy(
I.getOperand(2).getReg());
1451 I.eraseFromParent();
1455bool AMDGPUInstructionSelector::selectInverseBallot(
MachineInstr &
I)
const {
1458 const Register DstReg =
I.getOperand(0).getReg();
1459 const Register MaskReg =
I.getOperand(2).getReg();
1462 I.eraseFromParent();
1466bool AMDGPUInstructionSelector::selectRelocConstant(
MachineInstr &
I)
const {
1467 Register DstReg =
I.getOperand(0).getReg();
1473 const bool IsVALU = DstBank->
getID() == AMDGPU::VGPRRegBankID;
1478 auto RelocSymbol = cast<GlobalVariable>(
1483 TII.get(IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32), DstReg)
1486 I.eraseFromParent();
1490bool AMDGPUInstructionSelector::selectGroupStaticSize(
MachineInstr &
I)
const {
1493 Register DstReg =
I.getOperand(0).getReg();
1495 unsigned Mov = DstRB->
getID() == AMDGPU::SGPRRegBankID ?
1496 AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1513 I.eraseFromParent();
1517bool AMDGPUInstructionSelector::selectReturnAddress(
MachineInstr &
I)
const {
1524 unsigned Depth =
I.getOperand(2).getImm();
1537 I.eraseFromParent();
1548 AMDGPU::SReg_64RegClass,
DL);
1551 I.eraseFromParent();
1555bool AMDGPUInstructionSelector::selectEndCfIntrinsic(
MachineInstr &
MI)
const {
1559 BuildMI(*BB, &
MI,
MI.getDebugLoc(), TII.get(AMDGPU::SI_END_CF))
1560 .
add(
MI.getOperand(1));
1563 MI.eraseFromParent();
1565 if (!
MRI->getRegClassOrNull(Reg))
1570bool AMDGPUInstructionSelector::selectDSOrderedIntrinsic(
1576 unsigned IndexOperand =
MI.getOperand(7).getImm();
1577 bool WaveRelease =
MI.getOperand(8).getImm() != 0;
1578 bool WaveDone =
MI.getOperand(9).getImm() != 0;
1580 if (WaveDone && !WaveRelease)
1583 unsigned OrderedCountIndex = IndexOperand & 0x3f;
1584 IndexOperand &= ~0x3f;
1585 unsigned CountDw = 0;
1588 CountDw = (IndexOperand >> 24) & 0xf;
1589 IndexOperand &= ~(0xf << 24);
1591 if (CountDw < 1 || CountDw > 4) {
1593 "ds_ordered_count: dword count must be between 1 and 4");
1600 unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1;
1603 unsigned Offset0 = OrderedCountIndex << 2;
1604 unsigned Offset1 = WaveRelease | (WaveDone << 1) | (
Instruction << 4);
1607 Offset1 |= (CountDw - 1) << 6;
1610 Offset1 |= ShaderType << 2;
1612 unsigned Offset = Offset0 | (Offset1 << 8);
1621 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::DS_ORDERED_COUNT), DstReg)
1630 MI.eraseFromParent();
1636 case Intrinsic::amdgcn_ds_gws_init:
1637 return AMDGPU::DS_GWS_INIT;
1638 case Intrinsic::amdgcn_ds_gws_barrier:
1639 return AMDGPU::DS_GWS_BARRIER;
1640 case Intrinsic::amdgcn_ds_gws_sema_v:
1641 return AMDGPU::DS_GWS_SEMA_V;
1642 case Intrinsic::amdgcn_ds_gws_sema_br:
1643 return AMDGPU::DS_GWS_SEMA_BR;
1644 case Intrinsic::amdgcn_ds_gws_sema_p:
1645 return AMDGPU::DS_GWS_SEMA_P;
1646 case Intrinsic::amdgcn_ds_gws_sema_release_all:
1647 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
1653bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(
MachineInstr &
MI,
1655 if (!STI.
hasGWS() || (IID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
1660 const bool HasVSrc =
MI.getNumOperands() == 3;
1661 assert(HasVSrc ||
MI.getNumOperands() == 2);
1663 Register BaseOffset =
MI.getOperand(HasVSrc ? 2 : 1).getReg();
1665 if (OffsetRB->
getID() != AMDGPU::SGPRRegBankID)
1679 if (OffsetDef->
getOpcode() == AMDGPU::V_READFIRSTLANE_B32) {
1680 Readfirstlane = OffsetDef;
1685 if (OffsetDef->
getOpcode() == AMDGPU::G_CONSTANT) {
1695 std::tie(BaseOffset, ImmOffset) =
1698 if (Readfirstlane) {
1708 AMDGPU::SReg_32RegClass, *MRI))
1712 Register M0Base =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1740 MI.eraseFromParent();
1744bool AMDGPUInstructionSelector::selectDSAppendConsume(
MachineInstr &
MI,
1745 bool IsAppend)
const {
1746 Register PtrBase =
MI.getOperand(2).getReg();
1747 LLT PtrTy =
MRI->getType(PtrBase);
1751 std::tie(PtrBase,
Offset) = selectDS1Addr1OffsetImpl(
MI.getOperand(2));
1754 if (!isDSOffsetLegal(PtrBase,
Offset)) {
1755 PtrBase =
MI.getOperand(2).getReg();
1761 const unsigned Opc = IsAppend ? AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
1772 MI.eraseFromParent();
1776bool AMDGPUInstructionSelector::selectSBarrier(
MachineInstr &
MI)
const {
1783 MI.eraseFromParent();
1796 MI.eraseFromParent();
1808 TFE = (TexFailCtrl & 0x1) ?
true :
false;
1810 LWE = (TexFailCtrl & 0x2) ?
true :
false;
1813 return TexFailCtrl == 0;
1816bool AMDGPUInstructionSelector::selectImageIntrinsic(
1825 unsigned IntrOpcode =
Intr->BaseOpcode;
1830 const unsigned ArgOffset =
MI.getNumExplicitDefs() + 1;
1834 int NumVDataDwords = -1;
1835 bool IsD16 =
MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16 ||
1836 MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16;
1842 Unorm =
MI.getOperand(ArgOffset +
Intr->UnormIndex).getImm() != 0;
1846 bool IsTexFail =
false;
1848 TFE, LWE, IsTexFail))
1851 const int Flags =
MI.getOperand(ArgOffset +
Intr->NumArgs).getImm();
1852 const bool IsA16 = (
Flags & 1) != 0;
1853 const bool IsG16 = (
Flags & 2) != 0;
1856 if (IsA16 && !STI.
hasG16() && !IsG16)
1860 unsigned DMaskLanes = 0;
1862 if (BaseOpcode->
Atomic) {
1863 VDataOut =
MI.getOperand(0).getReg();
1864 VDataIn =
MI.getOperand(2).getReg();
1865 LLT Ty =
MRI->getType(VDataIn);
1868 const bool Is64Bit = BaseOpcode->
AtomicX2 ?
1873 assert(
MI.getOperand(3).getReg() == AMDGPU::NoRegister);
1875 DMask = Is64Bit ? 0xf : 0x3;
1876 NumVDataDwords = Is64Bit ? 4 : 2;
1878 DMask = Is64Bit ? 0x3 : 0x1;
1879 NumVDataDwords = Is64Bit ? 2 : 1;
1882 DMask =
MI.getOperand(ArgOffset +
Intr->DMaskIndex).getImm();
1885 if (BaseOpcode->
Store) {
1886 VDataIn =
MI.getOperand(1).getReg();
1887 VDataTy =
MRI->getType(VDataIn);
1890 VDataOut =
MI.getOperand(0).getReg();
1891 VDataTy =
MRI->getType(VDataOut);
1892 NumVDataDwords = DMaskLanes;
1895 NumVDataDwords = (DMaskLanes + 1) / 2;
1900 if (Subtarget->
hasG16() && IsG16) {
1904 IntrOpcode = G16MappingInfo->
G16;
1908 assert((!IsTexFail || DMaskLanes >= 1) &&
"should have legalized this");
1910 unsigned CPol =
MI.getOperand(ArgOffset +
Intr->CachePolicyIndex).getImm();
1917 int NumVAddrRegs = 0;
1918 int NumVAddrDwords = 0;
1919 for (
unsigned I =
Intr->VAddrStart; I < Intr->VAddrEnd;
I++) {
1922 if (!AddrOp.
isReg())
1930 NumVAddrDwords += (
MRI->getType(
Addr).getSizeInBits() + 31) / 32;
1937 NumVAddrRegs != 1 &&
1939 : NumVAddrDwords == NumVAddrRegs);
1940 if (UseNSA && !STI.hasFeature(AMDGPU::FeatureNSAEncoding)) {
1951 NumVDataDwords, NumVAddrDwords);
1952 }
else if (IsGFX11Plus) {
1954 UseNSA ? AMDGPU::MIMGEncGfx11NSA
1955 : AMDGPU::MIMGEncGfx11Default,
1956 NumVDataDwords, NumVAddrDwords);
1957 }
else if (IsGFX10Plus) {
1959 UseNSA ? AMDGPU::MIMGEncGfx10NSA
1960 : AMDGPU::MIMGEncGfx10Default,
1961 NumVDataDwords, NumVAddrDwords);
1965 NumVDataDwords, NumVAddrDwords);
1969 <<
"requested image instruction is not supported on this GPU\n");
1976 NumVDataDwords, NumVAddrDwords);
1979 NumVDataDwords, NumVAddrDwords);
1989 const bool Is64 =
MRI->getType(VDataOut).getSizeInBits() == 64;
1992 Is64 ? &AMDGPU::VReg_128RegClass : &AMDGPU::VReg_64RegClass);
1993 unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
1996 if (!
MRI->use_empty(VDataOut)) {
2009 for (
int I = 0;
I != NumVAddrRegs; ++
I) {
2011 if (
SrcOp.isReg()) {
2017 MIB.
addReg(
MI.getOperand(ArgOffset +
Intr->RsrcIndex).getReg());
2019 MIB.
addReg(
MI.getOperand(ArgOffset +
Intr->SampIndex).getReg());
2030 STI.hasFeature(AMDGPU::FeatureR128A16) ? -1 : 0);
2032 MIB.
addImm(IsA16 ? -1 : 0);
2046 MIB.
addImm(IsD16 ? -1 : 0);
2048 MI.eraseFromParent();
2056bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic(
2067 unsigned Offset =
MI.getOperand(6).getImm();
2069 auto MIB =
BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::DS_BVH_STACK_RTN_B32), Dst0)
2077 MI.eraseFromParent();
2081bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
2083 unsigned IntrinsicID = cast<GIntrinsic>(
I).getIntrinsicID();
2084 switch (IntrinsicID) {
2085 case Intrinsic::amdgcn_end_cf:
2086 return selectEndCfIntrinsic(
I);
2087 case Intrinsic::amdgcn_ds_ordered_add:
2088 case Intrinsic::amdgcn_ds_ordered_swap:
2089 return selectDSOrderedIntrinsic(
I, IntrinsicID);
2090 case Intrinsic::amdgcn_ds_gws_init:
2091 case Intrinsic::amdgcn_ds_gws_barrier:
2092 case Intrinsic::amdgcn_ds_gws_sema_v:
2093 case Intrinsic::amdgcn_ds_gws_sema_br:
2094 case Intrinsic::amdgcn_ds_gws_sema_p:
2095 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2096 return selectDSGWSIntrinsic(
I, IntrinsicID);
2097 case Intrinsic::amdgcn_ds_append:
2098 return selectDSAppendConsume(
I,
true);
2099 case Intrinsic::amdgcn_ds_consume:
2100 return selectDSAppendConsume(
I,
false);
2101 case Intrinsic::amdgcn_s_barrier:
2102 return selectSBarrier(
I);
2103 case Intrinsic::amdgcn_raw_buffer_load_lds:
2104 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
2105 case Intrinsic::amdgcn_struct_buffer_load_lds:
2106 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds:
2107 return selectBufferLoadLds(
I);
2108 case Intrinsic::amdgcn_global_load_lds:
2109 return selectGlobalLoadLds(
I);
2110 case Intrinsic::amdgcn_exp_compr:
2114 F,
"intrinsic not supported on subtarget",
I.getDebugLoc(),
DS_Error);
2115 F.getContext().diagnose(NoFpRet);
2119 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2120 return selectDSBvhStackIntrinsic(
I);
2121 case Intrinsic::amdgcn_s_barrier_init:
2122 case Intrinsic::amdgcn_s_barrier_join:
2123 case Intrinsic::amdgcn_s_wakeup_barrier:
2124 case Intrinsic::amdgcn_s_get_barrier_state:
2125 return selectNamedBarrierInst(
I, IntrinsicID);
2126 case Intrinsic::amdgcn_s_barrier_signal_isfirst:
2127 case Intrinsic::amdgcn_s_barrier_signal_isfirst_var:
2128 return selectSBarrierSignalIsfirst(
I, IntrinsicID);
2129 case Intrinsic::amdgcn_s_barrier_leave:
2130 return selectSBarrierLeave(
I);
2135bool AMDGPUInstructionSelector::selectG_SELECT(
MachineInstr &
I)
const {
2142 Register DstReg =
I.getOperand(0).getReg();
2147 if (!isVCC(CCReg, *MRI)) {
2148 unsigned SelectOpcode =
Size == 64 ? AMDGPU::S_CSELECT_B64 :
2149 AMDGPU::S_CSELECT_B32;
2156 if (!
MRI->getRegClassOrNull(CCReg))
2159 .
add(
I.getOperand(2))
2160 .
add(
I.getOperand(3));
2165 I.eraseFromParent();
2174 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
2176 .
add(
I.getOperand(3))
2178 .
add(
I.getOperand(2))
2179 .
add(
I.getOperand(1));
2182 I.eraseFromParent();
2189 return AMDGPU::sub0;
2191 return AMDGPU::sub0_sub1;
2193 return AMDGPU::sub0_sub1_sub2;
2195 return AMDGPU::sub0_sub1_sub2_sub3;
2197 return AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7;
2200 return AMDGPU::sub0;
2207bool AMDGPUInstructionSelector::selectG_TRUNC(
MachineInstr &
I)
const {
2208 Register DstReg =
I.getOperand(0).getReg();
2209 Register SrcReg =
I.getOperand(1).getReg();
2210 const LLT DstTy =
MRI->getType(DstReg);
2211 const LLT SrcTy =
MRI->getType(SrcReg);
2226 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2235 if (!SrcRC || !DstRC)
2248 Register LoReg =
MRI->createVirtualRegister(DstRC);
2249 Register HiReg =
MRI->createVirtualRegister(DstRC);
2251 .
addReg(SrcReg, 0, AMDGPU::sub0);
2253 .
addReg(SrcReg, 0, AMDGPU::sub1);
2255 if (IsVALU && STI.
hasSDWA()) {
2259 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg)
2269 Register TmpReg0 =
MRI->createVirtualRegister(DstRC);
2270 Register TmpReg1 =
MRI->createVirtualRegister(DstRC);
2271 Register ImmReg =
MRI->createVirtualRegister(DstRC);
2273 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), TmpReg0)
2283 unsigned MovOpc = IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
2284 unsigned AndOpc = IsVALU ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
2285 unsigned OrOpc = IsVALU ? AMDGPU::V_OR_B32_e64 : AMDGPU::S_OR_B32;
2297 And.setOperandDead(3);
2298 Or.setOperandDead(3);
2302 I.eraseFromParent();
2311 if (SubRegIdx == -1)
2317 = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
2321 if (SrcWithSubRC != SrcRC) {
2326 I.getOperand(1).setSubReg(SubRegIdx);
2329 I.setDesc(TII.get(TargetOpcode::COPY));
2335 Mask = maskTrailingOnes<unsigned>(
Size);
2336 int SignedMask =
static_cast<int>(Mask);
2337 return SignedMask >= -16 && SignedMask <= 64;
2341const RegisterBank *AMDGPUInstructionSelector::getArtifactRegBank(
2354bool AMDGPUInstructionSelector::selectG_SZA_EXT(
MachineInstr &
I)
const {
2355 bool InReg =
I.getOpcode() == AMDGPU::G_SEXT_INREG;
2356 bool Signed =
I.getOpcode() == AMDGPU::G_SEXT || InReg;
2359 const Register DstReg =
I.getOperand(0).getReg();
2360 const Register SrcReg =
I.getOperand(1).getReg();
2362 const LLT DstTy =
MRI->getType(DstReg);
2363 const LLT SrcTy =
MRI->getType(SrcReg);
2364 const unsigned SrcSize =
I.getOpcode() == AMDGPU::G_SEXT_INREG ?
2371 const RegisterBank *SrcBank = getArtifactRegBank(SrcReg, *MRI, TRI);
2374 if (
I.getOpcode() == AMDGPU::G_ANYEXT) {
2376 return selectCOPY(
I);
2379 TRI.getRegClassForTypeOnBank(SrcTy, *SrcBank);
2382 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
2384 Register UndefReg =
MRI->createVirtualRegister(SrcRC);
2385 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2391 I.eraseFromParent();
2397 if (SrcBank->
getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
2407 I.eraseFromParent();
2411 const unsigned BFE =
Signed ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2417 I.eraseFromParent();
2421 if (SrcBank->
getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
2423 AMDGPU::SReg_64RegClass : AMDGPU::SReg_32RegClass;
2427 if (
Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
2428 const unsigned SextOpc = SrcSize == 8 ?
2429 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
2432 I.eraseFromParent();
2438 if (DstSize > 32 && SrcSize == 32) {
2439 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2440 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2455 I.eraseFromParent();
2460 const unsigned BFE64 =
Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
2461 const unsigned BFE32 =
Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2464 if (DstSize > 32 && (SrcSize <= 32 || InReg)) {
2466 Register ExtReg =
MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
2467 Register UndefReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2468 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2470 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2481 I.eraseFromParent();
2497 I.eraseFromParent();
2515bool AMDGPUInstructionSelector::selectG_FPEXT(
MachineInstr &
I)
const {
2519 Register Dst =
I.getOperand(0).getReg();
2521 if (DstRB->
getID() != AMDGPU::SGPRRegBankID)
2524 Register Src =
I.getOperand(1).getReg();
2530 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_CVT_HI_F32_F16), Dst)
2532 I.eraseFromParent();
2540bool AMDGPUInstructionSelector::selectG_CONSTANT(
MachineInstr &
I)
const {
2543 Register DstReg =
I.getOperand(0).getReg();
2544 unsigned Size =
MRI->getType(DstReg).getSizeInBits();
2552 }
else if (ImmOp.
isCImm()) {
2559 const bool IsSgpr = DstRB->
getID() == AMDGPU::SGPRRegBankID;
2562 if (DstRB->
getID() == AMDGPU::VCCRegBankID) {
2563 Opcode = STI.
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
2564 }
else if (
Size == 64 &&
2566 Opcode = IsSgpr ? AMDGPU::S_MOV_B64_IMM_PSEUDO : AMDGPU::V_MOV_B64_PSEUDO;
2567 I.setDesc(TII.get(Opcode));
2568 I.addImplicitDefUseOperands(*
MF);
2571 Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
2581 I.setDesc(TII.get(Opcode));
2582 I.addImplicitDefUseOperands(*
MF);
2592 ResInst =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_MOV_B64), DstReg)
2593 .
addImm(
I.getOperand(1).getImm());
2596 &AMDGPU::SReg_32RegClass : &AMDGPU::VGPR_32RegClass;
2606 ResInst =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
2615 I.eraseFromParent();
2617 TRI.getConstrainedRegClassForOperand(ResInst->
getOperand(0), *MRI);
2623bool AMDGPUInstructionSelector::selectG_FNEG(
MachineInstr &
MI)
const {
2637 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2652 Register LoReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2653 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2654 Register ConstReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2655 Register OpReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2657 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2658 .
addReg(Src, 0, AMDGPU::sub0);
2659 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2660 .
addReg(Src, 0, AMDGPU::sub1);
2661 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2665 unsigned Opc = Fabs ? AMDGPU::S_OR_B32 : AMDGPU::S_XOR_B32;
2670 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2675 MI.eraseFromParent();
2680bool AMDGPUInstructionSelector::selectG_FABS(
MachineInstr &
MI)
const {
2683 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2690 Register LoReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2691 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2692 Register ConstReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2693 Register OpReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2699 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2700 .
addReg(Src, 0, AMDGPU::sub0);
2701 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2702 .
addReg(Src, 0, AMDGPU::sub1);
2703 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2708 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_AND_B32), OpReg)
2712 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2718 MI.eraseFromParent();
2723 return MI.getOpcode() == TargetOpcode::G_CONSTANT;
2726void AMDGPUInstructionSelector::getAddrModeInfo(
const MachineInstr &Load,
2729 unsigned OpNo =
Load.getOpcode() == AMDGPU::G_PREFETCH ? 0 : 1;
2731 MRI.getUniqueVRegDef(
Load.getOperand(OpNo).getReg());
2735 if (PtrMI->
getOpcode() != TargetOpcode::G_PTR_ADD)
2740 for (
unsigned i = 1; i != 3; ++i) {
2747 assert(GEPInfo.Imm == 0);
2752 if (OpBank->
getID() == AMDGPU::SGPRRegBankID)
2753 GEPInfo.SgprParts.push_back(GEPOp.
getReg());
2755 GEPInfo.VgprParts.push_back(GEPOp.
getReg());
2759 getAddrModeInfo(*PtrMI, MRI, AddrInfo);
2762bool AMDGPUInstructionSelector::isSGPR(
Register Reg)
const {
2763 return RBI.
getRegBank(Reg, *MRI, TRI)->
getID() == AMDGPU::SGPRRegBankID;
2766bool AMDGPUInstructionSelector::isInstrUniform(
const MachineInstr &
MI)
const {
2767 if (!
MI.hasOneMemOperand())
2777 if (!
Ptr || isa<UndefValue>(
Ptr) || isa<Argument>(
Ptr) ||
2778 isa<Constant>(
Ptr) || isa<GlobalValue>(
Ptr))
2784 if (
MI.getOpcode() == AMDGPU::G_PREFETCH)
2786 AMDGPU::SGPRRegBankID;
2789 return I &&
I->getMetadata(
"amdgpu.uniform");
2793 for (
const GEPInfo &GEPInfo : AddrInfo) {
2794 if (!GEPInfo.VgprParts.empty())
2800void AMDGPUInstructionSelector::initM0(
MachineInstr &
I)
const {
2801 const LLT PtrTy =
MRI->getType(
I.getOperand(1).getReg());
2808 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2813bool AMDGPUInstructionSelector::selectG_LOAD_STORE_ATOMICRMW(
2820 if (Reg.isPhysical())
2824 const unsigned Opcode =
MI.getOpcode();
2826 if (Opcode == AMDGPU::COPY)
2829 if (Opcode == AMDGPU::G_AND || Opcode == AMDGPU::G_OR ||
2830 Opcode == AMDGPU::G_XOR)
2834 if (
auto *GI = dyn_cast<GIntrinsic>(&
MI))
2835 return GI->is(Intrinsic::amdgcn_class);
2837 return Opcode == AMDGPU::G_ICMP || Opcode == AMDGPU::G_FCMP;
2840bool AMDGPUInstructionSelector::selectG_BRCOND(
MachineInstr &
I)
const {
2855 if (!isVCC(CondReg, *MRI)) {
2859 CondPhysReg = AMDGPU::SCC;
2860 BrOpcode = AMDGPU::S_CBRANCH_SCC1;
2861 ConstrainRC = &AMDGPU::SReg_32RegClass;
2869 const unsigned Opcode = Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
2870 const Register Exec = Is64 ? AMDGPU::EXEC : AMDGPU::EXEC_LO;
2873 BuildMI(*BB, &
I,
DL, TII.get(Opcode), TmpReg)
2880 CondPhysReg =
TRI.getVCC();
2881 BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
2882 ConstrainRC =
TRI.getBoolRC();
2885 if (!
MRI->getRegClassOrNull(CondReg))
2886 MRI->setRegClass(CondReg, ConstrainRC);
2888 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CondPhysReg)
2891 .
addMBB(
I.getOperand(1).getMBB());
2893 I.eraseFromParent();
2897bool AMDGPUInstructionSelector::selectG_GLOBAL_VALUE(
2899 Register DstReg =
I.getOperand(0).getReg();
2901 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2902 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32));
2907 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, *MRI);
2910bool AMDGPUInstructionSelector::selectG_PTRMASK(
MachineInstr &
I)
const {
2911 Register DstReg =
I.getOperand(0).getReg();
2912 Register SrcReg =
I.getOperand(1).getReg();
2913 Register MaskReg =
I.getOperand(2).getReg();
2914 LLT Ty =
MRI->getType(DstReg);
2915 LLT MaskTy =
MRI->getType(MaskReg);
2922 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2932 const bool CanCopyLow32 = (MaskOnes & MaskLo32) == MaskLo32;
2933 const bool CanCopyHi32 = (MaskOnes & MaskHi32) == MaskHi32;
2936 !CanCopyLow32 && !CanCopyHi32) {
2937 auto MIB =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_AND_B64), DstReg)
2941 I.eraseFromParent();
2945 unsigned NewOpc = IsVGPR ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
2947 = IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
2952 TRI.getRegClassForTypeOnBank(MaskTy, *MaskRB);
2961 "ptrmask should have been narrowed during legalize");
2963 auto NewOp =
BuildMI(*BB, &
I,
DL, TII.get(NewOpc), DstReg)
2969 I.eraseFromParent();
2973 Register HiReg =
MRI->createVirtualRegister(&RegRC);
2974 Register LoReg =
MRI->createVirtualRegister(&RegRC);
2977 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), LoReg)
2978 .
addReg(SrcReg, 0, AMDGPU::sub0);
2979 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), HiReg)
2980 .
addReg(SrcReg, 0, AMDGPU::sub1);
2989 Register MaskLo =
MRI->createVirtualRegister(&RegRC);
2990 MaskedLo =
MRI->createVirtualRegister(&RegRC);
2992 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskLo)
2993 .
addReg(MaskReg, 0, AMDGPU::sub0);
2994 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedLo)
3003 Register MaskHi =
MRI->createVirtualRegister(&RegRC);
3004 MaskedHi =
MRI->createVirtualRegister(&RegRC);
3006 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskHi)
3007 .
addReg(MaskReg, 0, AMDGPU::sub1);
3008 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedHi)
3013 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
3018 I.eraseFromParent();
3024static std::pair<Register, unsigned>
3031 std::tie(IdxBaseReg,
Offset) =
3033 if (IdxBaseReg == AMDGPU::NoRegister) {
3037 IdxBaseReg = IdxReg;
3044 if (
static_cast<unsigned>(
Offset) >= SubRegs.
size())
3045 return std::pair(IdxReg, SubRegs[0]);
3046 return std::pair(IdxBaseReg, SubRegs[
Offset]);
3049bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT(
3055 LLT DstTy =
MRI->getType(DstReg);
3056 LLT SrcTy =
MRI->getType(SrcReg);
3064 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3068 TRI.getRegClassForTypeOnBank(SrcTy, *SrcRB);
3070 TRI.getRegClassForTypeOnBank(DstTy, *DstRB);
3071 if (!SrcRC || !DstRC)
3086 if (SrcRB->
getID() == AMDGPU::SGPRRegBankID) {
3090 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3093 unsigned Opc = Is64 ? AMDGPU::S_MOVRELS_B64 : AMDGPU::S_MOVRELS_B32;
3097 MI.eraseFromParent();
3105 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3107 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::V_MOVRELS_B32_e32), DstReg)
3110 MI.eraseFromParent();
3121 MI.eraseFromParent();
3126bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT(
3133 LLT VecTy =
MRI->getType(DstReg);
3134 LLT ValTy =
MRI->getType(ValReg);
3146 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3150 TRI.getRegClassForTypeOnBank(VecTy, *VecRB);
3152 TRI.getRegClassForTypeOnBank(ValTy, *ValRB);
3160 if (VecRB->
getID() == AMDGPU::VGPRRegBankID && ValSize != 32)
3164 std::tie(IdxReg,
SubReg) =
3167 const bool IndexMode = VecRB->
getID() == AMDGPU::VGPRRegBankID &&
3174 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3178 VecSize, ValSize, VecRB->
getID() == AMDGPU::SGPRRegBankID);
3183 MI.eraseFromParent();
3195 MI.eraseFromParent();
3199bool AMDGPUInstructionSelector::selectBufferLoadLds(
MachineInstr &
MI)
const {
3202 unsigned Size =
MI.getOperand(3).getImm();
3205 const bool HasVIndex =
MI.getNumOperands() == 9;
3209 VIndex =
MI.getOperand(4).getReg();
3213 Register VOffset =
MI.getOperand(4 + OpOffset).getReg();
3214 std::optional<ValueAndVReg> MaybeVOffset =
3216 const bool HasVOffset = !MaybeVOffset || MaybeVOffset->Value.getZExtValue();
3222 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_BOTHEN
3223 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_IDXEN
3224 : HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFEN
3225 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFSET;
3228 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_BOTHEN
3229 : AMDGPU::BUFFER_LOAD_USHORT_LDS_IDXEN
3230 : HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFEN
3231 : AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFSET;
3234 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_BOTHEN
3235 : AMDGPU::BUFFER_LOAD_DWORD_LDS_IDXEN
3236 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN
3237 : AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET;
3244 .
add(
MI.getOperand(2));
3248 if (HasVIndex && HasVOffset) {
3249 Register IdxReg =
MRI->createVirtualRegister(
TRI.getVGPR64Class());
3250 BuildMI(*
MBB, &*MIB,
DL, TII.get(AMDGPU::REG_SEQUENCE), IdxReg)
3257 }
else if (HasVIndex) {
3259 }
else if (HasVOffset) {
3263 MIB.
add(
MI.getOperand(1));
3264 MIB.
add(
MI.getOperand(5 + OpOffset));
3265 MIB.
add(
MI.getOperand(6 + OpOffset));
3266 unsigned Aux =
MI.getOperand(7 + OpOffset).getImm();
3272 LoadPtrI.
Offset =
MI.getOperand(6 + OpOffset).getImm();
3274 StorePtrI.
V =
nullptr;
3288 MI.eraseFromParent();
3300 if (Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3303 assert(Def->getNumOperands() == 3 &&
3306 return Def->getOperand(1).getReg();
3312bool AMDGPUInstructionSelector::selectGlobalLoadLds(
MachineInstr &
MI)
const{
3314 unsigned Size =
MI.getOperand(3).getImm();
3320 Opc = AMDGPU::GLOBAL_LOAD_LDS_UBYTE;
3323 Opc = AMDGPU::GLOBAL_LOAD_LDS_USHORT;
3326 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORD;
3333 .
add(
MI.getOperand(2));
3339 if (!isSGPR(
Addr)) {
3341 if (isSGPR(AddrDef->Reg)) {
3342 Addr = AddrDef->Reg;
3343 }
else if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
3346 if (isSGPR(SAddr)) {
3347 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
3359 VOffset =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3371 MIB.
add(
MI.getOperand(4))
3372 .
add(
MI.getOperand(5));
3376 LoadPtrI.
Offset =
MI.getOperand(4).getImm();
3386 sizeof(int32_t),
Align(4));
3390 MI.eraseFromParent();
3394bool AMDGPUInstructionSelector::selectBVHIntrinsic(
MachineInstr &
MI)
const{
3395 MI.setDesc(TII.get(
MI.getOperand(1).getImm()));
3396 MI.removeOperand(1);
3397 MI.addImplicitDefUseOperands(*
MI.getParent()->getParent());
3401bool AMDGPUInstructionSelector::selectSMFMACIntrin(
MachineInstr &
MI)
const {
3404 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
3405 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_F16_e64;
3407 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
3408 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_F16_e64;
3410 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
3411 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_BF16_e64;
3413 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
3414 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_BF16_e64;
3416 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
3417 Opc = AMDGPU::V_SMFMAC_I32_16X16X64_I8_e64;
3419 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
3420 Opc = AMDGPU::V_SMFMAC_I32_32X32X32_I8_e64;
3422 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
3423 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_BF8_e64;
3425 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
3426 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_FP8_e64;
3428 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
3429 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_BF8_e64;
3431 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
3432 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_FP8_e64;
3434 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
3435 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_BF8_e64;
3437 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
3438 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_FP8_e64;
3440 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
3441 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_BF8_e64;
3443 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
3444 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_FP8_e64;
3450 auto VDst_In =
MI.getOperand(4);
3452 MI.setDesc(TII.get(Opc));
3453 MI.removeOperand(4);
3454 MI.removeOperand(1);
3455 MI.addOperand(VDst_In);
3456 MI.addImplicitDefUseOperands(*
MI.getParent()->getParent());
3460bool AMDGPUInstructionSelector::selectWaveAddress(
MachineInstr &
MI)
const {
3464 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3469 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_LSHRREV_B32_e64), DstReg)
3480 IsVALU ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
3484 MI.eraseFromParent();
3488bool AMDGPUInstructionSelector::selectStackRestore(
MachineInstr &
MI)
const {
3501 WaveAddr =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
3511 MI.eraseFromParent();
3517 if (!
I.isPreISelOpcode()) {
3519 return selectCOPY(
I);
3523 switch (
I.getOpcode()) {
3524 case TargetOpcode::G_AND:
3525 case TargetOpcode::G_OR:
3526 case TargetOpcode::G_XOR:
3529 return selectG_AND_OR_XOR(
I);
3530 case TargetOpcode::G_ADD:
3531 case TargetOpcode::G_SUB:
3534 return selectG_ADD_SUB(
I);
3535 case TargetOpcode::G_UADDO:
3536 case TargetOpcode::G_USUBO:
3537 case TargetOpcode::G_UADDE:
3538 case TargetOpcode::G_USUBE:
3539 return selectG_UADDO_USUBO_UADDE_USUBE(
I);
3540 case AMDGPU::G_AMDGPU_MAD_U64_U32:
3541 case AMDGPU::G_AMDGPU_MAD_I64_I32:
3542 return selectG_AMDGPU_MAD_64_32(
I);
3543 case TargetOpcode::G_INTTOPTR:
3544 case TargetOpcode::G_BITCAST:
3545 case TargetOpcode::G_PTRTOINT:
3546 return selectCOPY(
I);
3547 case TargetOpcode::G_CONSTANT:
3548 case TargetOpcode::G_FCONSTANT:
3549 return selectG_CONSTANT(
I);
3550 case TargetOpcode::G_FNEG:
3553 return selectG_FNEG(
I);
3554 case TargetOpcode::G_FABS:
3557 return selectG_FABS(
I);
3558 case TargetOpcode::G_EXTRACT:
3559 return selectG_EXTRACT(
I);
3560 case TargetOpcode::G_MERGE_VALUES:
3561 case TargetOpcode::G_CONCAT_VECTORS:
3562 return selectG_MERGE_VALUES(
I);
3563 case TargetOpcode::G_UNMERGE_VALUES:
3564 return selectG_UNMERGE_VALUES(
I);
3565 case TargetOpcode::G_BUILD_VECTOR:
3566 case TargetOpcode::G_BUILD_VECTOR_TRUNC:
3567 return selectG_BUILD_VECTOR(
I);
3568 case TargetOpcode::G_PTR_ADD:
3571 return selectG_PTR_ADD(
I);
3572 case TargetOpcode::G_IMPLICIT_DEF:
3573 return selectG_IMPLICIT_DEF(
I);
3574 case TargetOpcode::G_FREEZE:
3575 return selectCOPY(
I);
3576 case TargetOpcode::G_INSERT:
3577 return selectG_INSERT(
I);
3578 case TargetOpcode::G_INTRINSIC:
3579 case TargetOpcode::G_INTRINSIC_CONVERGENT:
3580 return selectG_INTRINSIC(
I);
3581 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3582 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
3583 return selectG_INTRINSIC_W_SIDE_EFFECTS(
I);
3584 case TargetOpcode::G_ICMP:
3585 case TargetOpcode::G_FCMP:
3586 if (selectG_ICMP_or_FCMP(
I))
3589 case TargetOpcode::G_LOAD:
3590 case TargetOpcode::G_STORE:
3591 case TargetOpcode::G_ATOMIC_CMPXCHG:
3592 case TargetOpcode::G_ATOMICRMW_XCHG:
3593 case TargetOpcode::G_ATOMICRMW_ADD:
3594 case TargetOpcode::G_ATOMICRMW_SUB:
3595 case TargetOpcode::G_ATOMICRMW_AND:
3596 case TargetOpcode::G_ATOMICRMW_OR:
3597 case TargetOpcode::G_ATOMICRMW_XOR:
3598 case TargetOpcode::G_ATOMICRMW_MIN:
3599 case TargetOpcode::G_ATOMICRMW_MAX:
3600 case TargetOpcode::G_ATOMICRMW_UMIN:
3601 case TargetOpcode::G_ATOMICRMW_UMAX:
3602 case TargetOpcode::G_ATOMICRMW_UINC_WRAP:
3603 case TargetOpcode::G_ATOMICRMW_UDEC_WRAP:
3604 case TargetOpcode::G_ATOMICRMW_FADD:
3605 case AMDGPU::G_AMDGPU_ATOMIC_FMIN:
3606 case AMDGPU::G_AMDGPU_ATOMIC_FMAX:
3607 return selectG_LOAD_STORE_ATOMICRMW(
I);
3608 case TargetOpcode::G_SELECT:
3609 return selectG_SELECT(
I);
3610 case TargetOpcode::G_TRUNC:
3611 return selectG_TRUNC(
I);
3612 case TargetOpcode::G_SEXT:
3613 case TargetOpcode::G_ZEXT:
3614 case TargetOpcode::G_ANYEXT:
3615 case TargetOpcode::G_SEXT_INREG:
3622 return selectG_SZA_EXT(
I);
3623 case TargetOpcode::G_FPEXT:
3624 if (selectG_FPEXT(
I))
3627 case TargetOpcode::G_BRCOND:
3628 return selectG_BRCOND(
I);
3629 case TargetOpcode::G_GLOBAL_VALUE:
3630 return selectG_GLOBAL_VALUE(
I);
3631 case TargetOpcode::G_PTRMASK:
3632 return selectG_PTRMASK(
I);
3633 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3634 return selectG_EXTRACT_VECTOR_ELT(
I);
3635 case TargetOpcode::G_INSERT_VECTOR_ELT:
3636 return selectG_INSERT_VECTOR_ELT(
I);
3637 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
3638 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
3639 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
3640 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
3643 assert(
Intr &&
"not an image intrinsic with image pseudo");
3644 return selectImageIntrinsic(
I,
Intr);
3646 case AMDGPU::G_AMDGPU_INTRIN_BVH_INTERSECT_RAY:
3647 return selectBVHIntrinsic(
I);
3648 case AMDGPU::G_SBFX:
3649 case AMDGPU::G_UBFX:
3650 return selectG_SBFX_UBFX(
I);
3651 case AMDGPU::G_SI_CALL:
3652 I.setDesc(TII.get(AMDGPU::SI_CALL));
3654 case AMDGPU::G_AMDGPU_WAVE_ADDRESS:
3655 return selectWaveAddress(
I);
3656 case AMDGPU::G_STACKRESTORE:
3657 return selectStackRestore(
I);
3659 return selectPHI(
I);
3667AMDGPUInstructionSelector::selectVCSRC(
MachineOperand &Root)
const {
3674std::pair<Register, unsigned>
3675AMDGPUInstructionSelector::selectVOP3ModsImpl(
MachineOperand &Root,
3676 bool IsCanonicalizing,
3677 bool AllowAbs,
bool OpSel)
const {
3682 if (
MI->getOpcode() == AMDGPU::G_FNEG) {
3683 Src =
MI->getOperand(1).getReg();
3686 }
else if (
MI->getOpcode() == AMDGPU::G_FSUB && IsCanonicalizing) {
3691 if (LHS &&
LHS->isZero()) {
3693 Src =
MI->getOperand(2).getReg();
3697 if (AllowAbs &&
MI->getOpcode() == AMDGPU::G_FABS) {
3698 Src =
MI->getOperand(1).getReg();
3705 return std::pair(Src, Mods);
3708Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
3710 bool ForceVGPR)
const {
3711 if ((Mods != 0 || ForceVGPR) &&
3719 TII.get(AMDGPU::COPY), VGPRSrc)
3731AMDGPUInstructionSelector::selectVSRC0(
MachineOperand &Root)
const {
3738AMDGPUInstructionSelector::selectVOP3Mods0(
MachineOperand &Root)
const {
3741 std::tie(Src, Mods) = selectVOP3ModsImpl(Root);
3745 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
3754AMDGPUInstructionSelector::selectVOP3BMods0(
MachineOperand &Root)
const {
3757 std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
3763 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
3772AMDGPUInstructionSelector::selectVOP3OMods(
MachineOperand &Root)
const {
3781AMDGPUInstructionSelector::selectVOP3Mods(
MachineOperand &Root)
const {
3784 std::tie(Src, Mods) = selectVOP3ModsImpl(Root);
3788 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
3795AMDGPUInstructionSelector::selectVOP3ModsNonCanonicalizing(
3799 std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
false);
3803 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
3810AMDGPUInstructionSelector::selectVOP3BMods(
MachineOperand &Root)
const {
3813 std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
true,
3818 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
3825AMDGPUInstructionSelector::selectVOP3NoMods(
MachineOperand &Root)
const {
3828 if (
Def->getOpcode() == AMDGPU::G_FNEG ||
Def->getOpcode() == AMDGPU::G_FABS)
3835std::pair<Register, unsigned>
3836AMDGPUInstructionSelector::selectVOP3PModsImpl(
3841 if (
MI &&
MI->getOpcode() == AMDGPU::G_FNEG &&
3846 Src =
MI->getOperand(1).getReg();
3847 MI =
MRI.getVRegDef(Src);
3858 return std::pair(Src, Mods);
3862AMDGPUInstructionSelector::selectVOP3PMods(
MachineOperand &Root)
const {
3868 std::tie(Src, Mods) = selectVOP3PModsImpl(Root.
getReg(), MRI);
3877AMDGPUInstructionSelector::selectVOP3PModsDOT(
MachineOperand &Root)
const {
3883 std::tie(Src, Mods) = selectVOP3PModsImpl(Root.
getReg(), MRI,
true);
3892AMDGPUInstructionSelector::selectVOP3PModsNeg(
MachineOperand &Root)
const {
3897 "expected i1 value");
3907AMDGPUInstructionSelector::selectWMMAOpSelVOP3PMods(
3910 "expected i1 value");
3924 switch (Elts.
size()) {
3926 DstRegClass = &AMDGPU::VReg_256RegClass;
3929 DstRegClass = &AMDGPU::VReg_128RegClass;
3932 DstRegClass = &AMDGPU::VReg_64RegClass;
3939 auto MIB =
B.buildInstr(AMDGPU::REG_SEQUENCE)
3940 .addDef(
MRI.createVirtualRegister(DstRegClass));
3941 for (
unsigned i = 0; i < Elts.
size(); ++i) {
3952 if (ModOpcode == TargetOpcode::G_FNEG) {
3956 for (
auto El : Elts) {
3962 if (Elts.size() != NegAbsElts.
size()) {
3971 assert(ModOpcode == TargetOpcode::G_FABS);
3979AMDGPUInstructionSelector::selectWMMAModsF32NegAbs(
MachineOperand &Root)
const {
3984 if (
GBuildVector *BV = dyn_cast<GBuildVector>(
MRI->getVRegDef(Src))) {
3985 assert(BV->getNumSources() > 0);
3988 unsigned ModOpcode = (ElF32->
getOpcode() == AMDGPU::G_FNEG)
3991 for (
unsigned i = 0; i < BV->getNumSources(); ++i) {
3992 ElF32 =
MRI->getVRegDef(BV->getSourceReg(i));
3999 if (BV->getNumSources() == EltsF32.
size()) {
4010AMDGPUInstructionSelector::selectWMMAModsF16Neg(
MachineOperand &Root)
const {
4016 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
4024 if (CV->getNumSources() == EltsV2F16.
size()) {
4036AMDGPUInstructionSelector::selectWMMAModsF16NegAbs(
MachineOperand &Root)
const {
4042 assert(CV->getNumSources() > 0);
4045 unsigned ModOpcode = (ElV2F16->
getOpcode() == AMDGPU::G_FNEG)
4049 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
4050 ElV2F16 =
MRI->getVRegDef(CV->getSourceReg(i));
4057 if (CV->getNumSources() == EltsV2F16.
size()) {
4069AMDGPUInstructionSelector::selectWMMAVISrc(
MachineOperand &Root)
const {
4070 std::optional<FPValueAndVReg> FPValReg;
4074 MIB.
addImm(FPValReg->Value.bitcastToAPInt().getSExtValue());
4094AMDGPUInstructionSelector::selectSWMMACIndex8(
MachineOperand &Root)
const {
4100 std::optional<ValueAndVReg> ShiftAmt;
4102 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
4103 ShiftAmt->Value.getZExtValue() % 8 == 0) {
4104 Key = ShiftAmt->Value.getZExtValue() / 8;
4115AMDGPUInstructionSelector::selectSWMMACIndex16(
MachineOperand &Root)
const {
4122 std::optional<ValueAndVReg> ShiftAmt;
4124 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
4125 ShiftAmt->Value.getZExtValue() == 16) {
4137AMDGPUInstructionSelector::selectVOP3OpSelMods(
MachineOperand &Root)
const {
4140 std::tie(Src, Mods) = selectVOP3ModsImpl(Root);
4150AMDGPUInstructionSelector::selectVINTERPMods(
MachineOperand &Root)
const {
4153 std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
4161 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
4168AMDGPUInstructionSelector::selectVINTERPModsHi(
MachineOperand &Root)
const {
4171 std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
4179 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
4185bool AMDGPUInstructionSelector::selectSmrdOffset(
MachineOperand &Root,
4195 getAddrModeInfo(*
MI, *MRI, AddrInfo);
4197 if (AddrInfo.
empty())
4200 const GEPInfo &GEPI = AddrInfo[0];
4201 std::optional<int64_t> EncodedImm =
4205 if (GEPI.SgprParts.size() == 1 && GEPI.Imm != 0 && EncodedImm &&
4206 AddrInfo.
size() > 1) {
4207 const GEPInfo &GEPI2 = AddrInfo[1];
4208 if (GEPI2.SgprParts.size() == 2 && GEPI2.Imm == 0) {
4211 Base = GEPI2.SgprParts[0];
4212 *SOffset = OffsetReg;
4221 if (
Offset && GEPI.SgprParts.size() == 1 && EncodedImm) {
4222 Base = GEPI.SgprParts[0];
4228 if (SOffset && GEPI.SgprParts.size() == 1 && isUInt<32>(GEPI.Imm) &&
4234 Base = GEPI.SgprParts[0];
4235 *SOffset =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
4236 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), *SOffset)
4241 if (SOffset && GEPI.SgprParts.size() && GEPI.Imm == 0) {
4243 Base = GEPI.SgprParts[0];
4244 *SOffset = OffsetReg;
4253AMDGPUInstructionSelector::selectSmrdImm(
MachineOperand &Root)
const {
4256 if (!selectSmrdOffset(Root,
Base,
nullptr, &
Offset))
4257 return std::nullopt;
4264AMDGPUInstructionSelector::selectSmrdImm32(
MachineOperand &Root)
const {
4266 getAddrModeInfo(*Root.
getParent(), *MRI, AddrInfo);
4268 if (AddrInfo.
empty() || AddrInfo[0].SgprParts.size() != 1)
4269 return std::nullopt;
4271 const GEPInfo &GEPInfo = AddrInfo[0];
4272 Register PtrReg = GEPInfo.SgprParts[0];
4273 std::optional<int64_t> EncodedImm =
4276 return std::nullopt;
4285AMDGPUInstructionSelector::selectSmrdSgpr(
MachineOperand &Root)
const {
4287 if (!selectSmrdOffset(Root,
Base, &SOffset,
nullptr))
4288 return std::nullopt;
4295AMDGPUInstructionSelector::selectSmrdSgprImm(
MachineOperand &Root)
const {
4298 if (!selectSmrdOffset(Root,
Base, &SOffset, &
Offset))
4299 return std::nullopt;
4306std::pair<Register, int>
4307AMDGPUInstructionSelector::selectFlatOffsetImpl(
MachineOperand &Root,
4317 int64_t ConstOffset;
4318 std::tie(PtrBase, ConstOffset) =
4319 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
4322 !isFlatScratchBaseLegal(Root.
getReg())))
4329 return std::pair(PtrBase, ConstOffset);
4333AMDGPUInstructionSelector::selectFlatOffset(
MachineOperand &Root)
const {
4343AMDGPUInstructionSelector::selectGlobalOffset(
MachineOperand &Root)
const {
4353AMDGPUInstructionSelector::selectScratchOffset(
MachineOperand &Root)
const {
4364AMDGPUInstructionSelector::selectGlobalSAddr(
MachineOperand &Root)
const {
4367 int64_t ConstOffset;
4368 int64_t ImmOffset = 0;
4372 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(
Addr, *MRI);
4374 if (ConstOffset != 0) {
4378 ImmOffset = ConstOffset;
4381 if (isSGPR(PtrBaseDef->Reg)) {
4382 if (ConstOffset > 0) {
4388 int64_t SplitImmOffset, RemainderOffset;
4392 if (isUInt<32>(RemainderOffset)) {
4396 MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4398 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
4400 .
addImm(RemainderOffset);
4417 unsigned NumLiterals =
4421 return std::nullopt;
4428 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
4433 if (isSGPR(SAddr)) {
4434 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
4454 if (AddrDef->MI->getOpcode() == AMDGPU::G_IMPLICIT_DEF ||
4455 AddrDef->MI->getOpcode() == AMDGPU::G_CONSTANT || !isSGPR(AddrDef->Reg))
4456 return std::nullopt;
4462 Register VOffset =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4464 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32), VOffset)
4475AMDGPUInstructionSelector::selectScratchSAddr(
MachineOperand &Root)
const {
4478 int64_t ConstOffset;
4479 int64_t ImmOffset = 0;
4483 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(
Addr, *MRI);
4485 if (ConstOffset != 0 && isFlatScratchBaseLegal(
Addr) &&
4489 ImmOffset = ConstOffset;
4493 if (AddrDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
4494 int FI = AddrDef->MI->getOperand(1).getIndex();
4503 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
4504 Register LHS = AddrDef->MI->getOperand(1).getReg();
4505 Register RHS = AddrDef->MI->getOperand(2).getReg();
4509 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX &&
4510 isSGPR(RHSDef->Reg)) {
4511 int FI = LHSDef->MI->getOperand(1).getIndex();
4515 SAddr =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
4517 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_I32), SAddr)
4525 return std::nullopt;
4534bool AMDGPUInstructionSelector::checkFlatScratchSVSSwizzleBug(
4546 uint64_t VMax = VKnown.getMaxValue().getZExtValue();
4548 return (VMax & 3) + (
SMax & 3) >= 4;
4552AMDGPUInstructionSelector::selectScratchSVAddr(
MachineOperand &Root)
const {
4555 int64_t ConstOffset;
4556 int64_t ImmOffset = 0;
4560 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(
Addr, *MRI);
4563 if (ConstOffset != 0 &&
4566 ImmOffset = ConstOffset;
4570 if (AddrDef->MI->getOpcode() != AMDGPU::G_PTR_ADD)
4571 return std::nullopt;
4573 Register RHS = AddrDef->MI->getOperand(2).getReg();
4575 return std::nullopt;
4577 Register LHS = AddrDef->MI->getOperand(1).getReg();
4580 if (OrigAddr !=
Addr) {
4581 if (!isFlatScratchBaseLegalSVImm(OrigAddr))
4582 return std::nullopt;
4584 if (!isFlatScratchBaseLegalSV(OrigAddr))
4585 return std::nullopt;
4588 if (checkFlatScratchSVSSwizzleBug(RHS, LHS, ImmOffset))
4589 return std::nullopt;
4591 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
4592 int FI = LHSDef->MI->getOperand(1).getIndex();
4601 return std::nullopt;
4611AMDGPUInstructionSelector::selectMUBUFScratchOffen(
MachineOperand &Root)
const {
4620 Register HighBits =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4625 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
4649 std::optional<int> FI;
4653 int64_t ConstOffset;
4654 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(VAddr, *MRI);
4655 if (ConstOffset != 0) {
4660 if (PtrBaseDef->
getOpcode() == AMDGPU::G_FRAME_INDEX)
4666 }
else if (RootDef->getOpcode() == AMDGPU::G_FRAME_INDEX) {
4667 FI = RootDef->getOperand(1).getIndex();
4690bool AMDGPUInstructionSelector::isDSOffsetLegal(
Register Base,
4703bool AMDGPUInstructionSelector::isDSOffset2Legal(
Register Base, int64_t Offset0,
4705 unsigned Size)
const {
4706 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
4708 if (!isUInt<8>(Offset0 /
Size) || !isUInt<8>(Offset1 /
Size))
4721 return Addr->getOpcode() == TargetOpcode::G_OR ||
4722 (
Addr->getOpcode() == TargetOpcode::G_PTR_ADD &&
4729bool AMDGPUInstructionSelector::isFlatScratchBaseLegal(
Register Addr)
const {
4743 if (AddrMI->
getOpcode() == TargetOpcode::G_PTR_ADD) {
4744 std::optional<ValueAndVReg> RhsValReg =
4750 if (RhsValReg && RhsValReg->Value.getSExtValue() < 0 &&
4751 RhsValReg->Value.getSExtValue() > -0x40000000)
4760bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSV(
Register Addr)
const {
4778bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSVImm(
4787 std::optional<DefinitionAndSourceRegister> BaseDef =
4789 std::optional<ValueAndVReg> RHSOffset =
4799 (RHSOffset->Value.getSExtValue() < 0 &&
4800 RHSOffset->Value.getSExtValue() > -0x40000000)))
4803 Register LHS = BaseDef->MI->getOperand(1).getReg();
4804 Register RHS = BaseDef->MI->getOperand(2).getReg();
4808bool AMDGPUInstructionSelector::isUnneededShiftMask(
const MachineInstr &
MI,
4809 unsigned ShAmtBits)
const {
4810 assert(
MI.getOpcode() == TargetOpcode::G_AND);
4812 std::optional<APInt>
RHS =
4817 if (
RHS->countr_one() >= ShAmtBits)
4821 return (LHSKnownZeros | *RHS).
countr_one() >= ShAmtBits;
4825AMDGPUInstructionSelector::selectMUBUFScratchOffset(
4830 std::optional<DefinitionAndSourceRegister>
Def =
4832 assert(Def &&
"this shouldn't be an optional result");
4887std::pair<Register, unsigned>
4888AMDGPUInstructionSelector::selectDS1Addr1OffsetImpl(
MachineOperand &Root)
const {
4891 return std::pair(Root.
getReg(), 0);
4893 int64_t ConstAddr = 0;
4897 std::tie(PtrBase,
Offset) =
4898 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
4901 if (isDSOffsetLegal(PtrBase,
Offset)) {
4903 return std::pair(PtrBase,
Offset);
4905 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
4914 return std::pair(Root.
getReg(), 0);
4918AMDGPUInstructionSelector::selectDS1Addr1Offset(
MachineOperand &Root)
const {
4921 std::tie(Reg,
Offset) = selectDS1Addr1OffsetImpl(Root);
4929AMDGPUInstructionSelector::selectDS64Bit4ByteAligned(
MachineOperand &Root)
const {
4930 return selectDSReadWrite2(Root, 4);
4934AMDGPUInstructionSelector::selectDS128Bit8ByteAligned(
MachineOperand &Root)
const {
4935 return selectDSReadWrite2(Root, 8);
4939AMDGPUInstructionSelector::selectDSReadWrite2(
MachineOperand &Root,
4940 unsigned Size)
const {
4943 std::tie(Reg,
Offset) = selectDSReadWrite2Impl(Root,
Size);
4951std::pair<Register, unsigned>
4952AMDGPUInstructionSelector::selectDSReadWrite2Impl(
MachineOperand &Root,
4953 unsigned Size)
const {
4956 return std::pair(Root.
getReg(), 0);
4958 int64_t ConstAddr = 0;
4962 std::tie(PtrBase,
Offset) =
4963 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
4966 int64_t OffsetValue0 =
Offset;
4968 if (isDSOffset2Legal(PtrBase, OffsetValue0, OffsetValue1,
Size)) {
4970 return std::pair(PtrBase, OffsetValue0 /
Size);
4972 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
4980 return std::pair(Root.
getReg(), 0);
4987std::pair<Register, int64_t>
4988AMDGPUInstructionSelector::getPtrBaseWithConstantOffset(
4991 if (RootI->
getOpcode() != TargetOpcode::G_PTR_ADD)
4995 std::optional<ValueAndVReg> MaybeOffset =
5011 Register RSrc2 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5012 Register RSrc3 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5013 Register RSrcHi =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
5014 Register RSrc =
MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
5016 B.buildInstr(AMDGPU::S_MOV_B32)
5019 B.buildInstr(AMDGPU::S_MOV_B32)
5026 B.buildInstr(AMDGPU::REG_SEQUENCE)
5029 .addImm(AMDGPU::sub0)
5031 .addImm(AMDGPU::sub1);
5035 RSrcLo =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
5036 B.buildInstr(AMDGPU::S_MOV_B64)
5041 B.buildInstr(AMDGPU::REG_SEQUENCE)
5044 .addImm(AMDGPU::sub0_sub1)
5046 .addImm(AMDGPU::sub2_sub3);
5053 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
5062 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
5069AMDGPUInstructionSelector::MUBUFAddressData
5070AMDGPUInstructionSelector::parseMUBUFAddress(
Register Src)
const {
5071 MUBUFAddressData
Data;
5077 std::tie(PtrBase,
Offset) = getPtrBaseWithConstantOffset(Src, *MRI);
5078 if (isUInt<32>(
Offset)) {
5085 Data.N2 = InputAdd->getOperand(1).getReg();
5086 Data.N3 = InputAdd->getOperand(2).getReg();
5101bool AMDGPUInstructionSelector::shouldUseAddr64(MUBUFAddressData
Addr)
const {
5108 return N0Bank->
getID() == AMDGPU::VGPRRegBankID;
5114void AMDGPUInstructionSelector::splitIllegalMUBUFOffset(
5120 SOffset =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5121 B.buildInstr(AMDGPU::S_MOV_B32)
5127bool AMDGPUInstructionSelector::selectMUBUFAddr64Impl(
5135 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
5136 if (!shouldUseAddr64(AddrData))
5142 Offset = AddrData.Offset;
5148 if (RBI.
getRegBank(N2, *MRI, TRI)->
getID() == AMDGPU::VGPRRegBankID) {
5150 if (RBI.
getRegBank(N3, *MRI, TRI)->
getID() == AMDGPU::VGPRRegBankID) {
5163 }
else if (RBI.
getRegBank(N0, *MRI, TRI)->
getID() == AMDGPU::VGPRRegBankID) {
5174 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
5178bool AMDGPUInstructionSelector::selectMUBUFOffsetImpl(
5186 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
5187 if (shouldUseAddr64(AddrData))
5193 Offset = AddrData.Offset;
5199 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
5204AMDGPUInstructionSelector::selectMUBUFAddr64(
MachineOperand &Root)
const {
5210 if (!selectMUBUFAddr64Impl(Root, VAddr, RSrcReg, SOffset,
Offset))
5226 MIB.
addReg(AMDGPU::SGPR_NULL);
5240AMDGPUInstructionSelector::selectMUBUFOffset(
MachineOperand &Root)
const {
5245 if (!selectMUBUFOffsetImpl(Root, RSrcReg, SOffset,
Offset))
5256 MIB.
addReg(AMDGPU::SGPR_NULL);
5268AMDGPUInstructionSelector::selectBUFSOffset(
MachineOperand &Root)
const {
5273 SOffset = AMDGPU::SGPR_NULL;
5279static std::optional<uint64_t>
5283 if (!OffsetVal || !isInt<32>(*OffsetVal))
5284 return std::nullopt;
5285 return Lo_32(*OffsetVal);
5289AMDGPUInstructionSelector::selectSMRDBufferImm(
MachineOperand &Root)
const {
5294 std::optional<int64_t> EncodedImm =
5303AMDGPUInstructionSelector::selectSMRDBufferImm32(
MachineOperand &Root)
const {
5310 std::optional<int64_t> EncodedImm =
5319AMDGPUInstructionSelector::selectSMRDBufferSgprImm(
MachineOperand &Root)
const {
5327 return std::nullopt;
5329 std::optional<int64_t> EncodedOffset =
5332 return std::nullopt;
5342 if (
MI->getOpcode() == AMDGPU::G_BITCAST)
5353 if (Inst->
getOpcode() != AMDGPU::G_TRUNC)
5361 if (TruncOp->
getOpcode() == AMDGPU::G_LSHR) {
5364 if (SrlAmount && SrlAmount->Value.getZExtValue() == 16) {
5375 if (TruncOp->
getOpcode() == AMDGPU::G_SHUFFLE_VECTOR) {
5380 assert(Mask.size() == 2);
5382 if (Mask[0] == 1 && Mask[1] <= 1) {
5392std::pair<Register, unsigned>
5393AMDGPUInstructionSelector::selectVOP3PMadMixModsImpl(
MachineOperand &Root,
5394 bool &Matched)
const {
5399 std::tie(Src, Mods) = selectVOP3ModsImpl(Root);
5402 if (
MI->getOpcode() == AMDGPU::G_FPEXT) {
5411 if (
MI->getOpcode() == AMDGPU::G_BITCAST) {
5412 MO = &
MI->getOperand(1);
5417 const auto CheckAbsNeg = [&]() {
5422 std::tie(Src, ModsTmp) = selectVOP3ModsImpl(*MO);
5444 MI = ExtractHiEltMI;
5445 MO = &
MI->getOperand(0);
5458AMDGPUInstructionSelector::selectVOP3PMadMixModsExt(
5463 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
5474AMDGPUInstructionSelector::selectVOP3PMadMixMods(
MachineOperand &Root)
const {
5478 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
5486bool AMDGPUInstructionSelector::selectSBarrierSignalIsfirst(
5490 Register CCReg =
I.getOperand(0).getReg();
5492 bool HasM0 = IntrID == Intrinsic::amdgcn_s_barrier_signal_isfirst_var;
5495 auto CopyMIB =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
5496 .
addReg(
I.getOperand(2).getReg());
5497 BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_ISFIRST_M0));
5501 BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM))
5502 .
addImm(
I.getOperand(2).getImm());
5507 I.eraseFromParent();
5513 if (HasInlineConst) {
5517 case Intrinsic::amdgcn_s_barrier_init:
5518 return AMDGPU::S_BARRIER_INIT_IMM;
5519 case Intrinsic::amdgcn_s_barrier_join:
5520 return AMDGPU::S_BARRIER_JOIN_IMM;
5521 case Intrinsic::amdgcn_s_wakeup_barrier:
5522 return AMDGPU::S_WAKEUP_BARRIER_IMM;
5523 case Intrinsic::amdgcn_s_get_barrier_state:
5524 return AMDGPU::S_GET_BARRIER_STATE_IMM;
5530 case Intrinsic::amdgcn_s_barrier_init:
5531 return AMDGPU::S_BARRIER_INIT_M0;
5532 case Intrinsic::amdgcn_s_barrier_join:
5533 return AMDGPU::S_BARRIER_JOIN_M0;
5534 case Intrinsic::amdgcn_s_wakeup_barrier:
5535 return AMDGPU::S_WAKEUP_BARRIER_M0;
5536 case Intrinsic::amdgcn_s_get_barrier_state:
5537 return AMDGPU::S_GET_BARRIER_STATE_M0;
5542bool AMDGPUInstructionSelector::selectNamedBarrierInst(
5546 MachineOperand BarOp = IntrID == Intrinsic::amdgcn_s_get_barrier_state
5549 std::optional<int64_t> BarValImm =
5555 if (IntrID == Intrinsic::amdgcn_s_barrier_init) {
5556 Register MemberCount =
I.getOperand(2).getReg();
5557 TmpReg0 =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5568 if (IntrID == Intrinsic::amdgcn_s_barrier_init) {
5572 Register TmpReg1 =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5593 if (IntrID == Intrinsic::amdgcn_s_get_barrier_state)
5594 MIB.
addDef(
I.getOperand(0).getReg());
5599 I.eraseFromParent();
5603bool AMDGPUInstructionSelector::selectSBarrierLeave(
MachineInstr &
I)
const {
5606 Register CCReg =
I.getOperand(0).getReg();
5608 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_BARRIER_LEAVE));
5611 I.eraseFromParent();
5619 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5620 "Expected G_CONSTANT");
5621 MIB.
addImm(
MI.getOperand(1).getCImm()->getSExtValue());
5627 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5628 "Expected G_CONSTANT");
5629 MIB.
addImm(-
MI.getOperand(1).getCImm()->getSExtValue());
5638 if (
MI.getOpcode() == TargetOpcode::G_FCONSTANT)
5639 MIB.
addImm(
Op.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
5641 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
"Expected G_CONSTANT");
5642 MIB.
addImm(
Op.getCImm()->getSExtValue());
5649 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5650 "Expected G_CONSTANT");
5651 MIB.
addImm(
MI.getOperand(1).getCImm()->getValue().popcount());
5659 MIB.
addImm(
MI.getOperand(OpIdx).getImm());
5665 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
5672 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
5673 MIB.
addImm(
MI.getOperand(OpIdx).getImm() &
5681 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
5682 const bool Swizzle =
MI.getOperand(OpIdx).getImm() &
5688void AMDGPUInstructionSelector::renderExtractCpolSetGLC(
5690 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
5691 const uint32_t Cpol =
MI.getOperand(OpIdx).getImm() &
5706 const APFloat &APF =
MI.getOperand(1).getFPImm()->getValueAPF();
5708 assert(ExpVal != INT_MIN);
5712bool AMDGPUInstructionSelector::isInlineImmediate(
const APInt &Imm)
const {
5716bool AMDGPUInstructionSelector::isInlineImmediate(
const APFloat &Imm)
const {
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
static unsigned getIntrinsicID(const SDNode *N)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static bool isNoUnsignedWrap(MachineInstr *Addr)
static Register buildOffsetSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
unsigned getNamedBarrierOp(bool HasInlineConst, Intrinsic::ID IntrID)
#define GET_GLOBALISEL_PREDICATES_INIT
#define GET_GLOBALISEL_TEMPORARIES_INIT
static Register getWaveAddress(const MachineInstr *Def)
static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)
static bool shouldUseAndMask(unsigned Size, unsigned &Mask)
static bool parseTexFail(uint64_t TexFailCtrl, bool &TFE, bool &LWE, bool &IsTexFail)
static std::pair< Register, unsigned > computeIndirectRegIndex(MachineRegisterInfo &MRI, const SIRegisterInfo &TRI, const TargetRegisterClass *SuperRC, Register IdxReg, unsigned EltSize, GISelKnownBits &KnownBits)
Return the register to use for the index value, and the subregister to use for the indirectly accesse...
static void addZeroImm(MachineInstrBuilder &MIB)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
static bool isConstant(const MachineInstr &MI)
static int sizeToSubRegIndex(unsigned Size)
static Register buildRegSequence(SmallVectorImpl< Register > &Elts, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
static Register buildRSRC(MachineIRBuilder &B, MachineRegisterInfo &MRI, uint32_t FormatLo, uint32_t FormatHi, Register BasePtr)
Return a resource descriptor for use with an arbitrary 64-bit pointer.
static Register matchZeroExtendFromS32(MachineRegisterInfo &MRI, Register Reg)
Match a zero extend from a 32-bit value to 64-bits.
static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64)
static std::optional< uint64_t > getConstantZext32Val(Register Reg, const MachineRegisterInfo &MRI)
Get an immediate that must be 32-bits, and treated as zero extended.
static MachineInstr * stripBitCast(MachineInstr *MI, MachineRegisterInfo &MRI)
static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size, const GCNSubtarget &ST)
static bool isVCmpResult(Register Reg, MachineRegisterInfo &MRI)
static Register buildAddr64RSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, SmallVectorImpl< Register > &Elts, Register &Src, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
This file declares the targeting of the InstructionSelector class for AMDGPU.
amdgpu AMDGPU Register Bank Select
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
The AMDGPU TargetMachine interface definition for hw codegen targets.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
static Error getAddrSpace(StringRef R, unsigned &AddrSpace)
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
const char LLVMTargetMachineRef TM
static std::vector< std::pair< int, unsigned > > Swizzle(std::vector< std::pair< int, unsigned > > Src, R600InstrInfo::BankSwizzle Swz)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
AMDGPUInstructionSelector(const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI, const AMDGPUTargetMachine &TM)
static const char * getName()
bool select(MachineInstr &I) override
Select the (possibly generic) instruction I to only use target-specific opcodes.
void setupMF(MachineFunction &MF, GISelKnownBits *KB, CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) override
Setup per-MF executor state.
uint32_t getLDSSize() const
bool isEntryFunction() const
const RegisterBank & getRegBankFromRegClass(const TargetRegisterClass &RC, LLT) const override
Get a register bank that covers RC.
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
unsigned getWavefrontSizeLog2() const
unsigned getWavefrontSize() const
bool hasInv2PiInlineImm() const
static int64_t getNullPointerValue(unsigned AddrSpace)
Get the integer value of a null pointer in the given address space.
LLVM_READONLY int getExactLog2Abs() const
APInt bitcastToAPInt() const
Class for arbitrary precision integers.
APInt zext(unsigned width) const
Zero extend to a new width.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
unsigned countr_one() const
Count the number of trailing one bits.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
bool isFPPredicate() const
bool isIntPredicate() const
ConstantFP - Floating Point Values [float, double].
const APFloat & getValueAPF() const
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
This class represents an Operation in the Expression.
Diagnostic information for unsupported feature in backend.
Represents a G_BUILD_VECTOR.
bool useVGPRIndexMode() const
bool hasScalarCompareEq64() const
int getLDSBankCount() const
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
bool unsafeDSOffsetFoldingEnabled() const
bool hasFlatInstOffsets() const
bool hasCompressedExport() const
Return true if the target's EXP instruction has the COMPR flag, which affects the meaning of the EN (...
bool hasGFX90AInsts() const
unsigned getConstantBusLimit(unsigned Opcode) const
bool hasMADIntraFwdBug() const
bool privateMemoryResourceIsRangeChecked() const
bool hasSignedScratchOffsets() const
bool hasRestrictedSOffset() const
const SITargetLowering * getTargetLowering() const override
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
bool hasSPackHL() const
Return true if the target has the S_PACK_HL_B32_B16 instruction.
bool hasFlatScratchSVSSwizzleBug() const
bool useFlatForGlobal() const
Generation getGeneration() const
bool hasSplitBarriers() const
bool hasUnpackedD16VMem() const
bool hasGWSSemaReleaseAll() const
bool hasAddNoCarry() const
bool hasSALUFloatInsts() const
bool hasPartialNSAEncoding() const
Represents a G_CONCAT_VECTORS.
std::optional< SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > > ComplexRendererFns
virtual void setupMF(MachineFunction &mf, GISelKnownBits *kb, CodeGenCoverage *covinfo=nullptr, ProfileSummaryInfo *psi=nullptr, BlockFrequencyInfo *bfi=nullptr)
Setup per-MF executor state.
CodeGenCoverage * CoverageInfo
APInt getKnownOnes(Register R)
KnownBits getKnownBits(Register R)
bool signBitIsZero(Register Op)
APInt getKnownZeroes(Register R)
Module * getParent()
Get the module that this global value is contained inside of...
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr bool isVector() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Describe properties that are true of each instruction in the target description file.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Helper class to build MachineInstr.
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & setOperandDead(unsigned OpIdx) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
unsigned getNumOperands() const
Retuns the total number of operands.
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
unsigned getAddrSpace() const
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const ConstantInt * getCImm() const
bool isCImm() const
isCImm - Test if this is a MO_CImmediate operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
ArrayRef< int > getShuffleMask() const
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
bool isInternalRead() const
const ConstantFP * getFPImm() const
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
bool isFPImm() const
isFPImm - Tests if this is a MO_FPImmediate operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
A Module instance is used to store all the information related to an LLVM module.
T get() const
Returns the value of the specified pointer type.
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwise returns null.
Analysis providing profile information.
static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
TypeSize getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
bool isLegalMUBUFImmOffset(unsigned Imm) const
bool isInlineConstant(const APInt &Imm) const
const MCInstrDesc & getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, bool IsSGPR) const
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
const MCInstrDesc & getIndirectGPRIDXPseudo(unsigned VecSize, bool IsIndirectSrc) const
std::pair< int64_t, int64_t > splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, uint64_t FlatVariant) const
Split COffsetVal into {immediate offset field, remainder offset} values.
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, uint64_t FlatVariant) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction.
void enforceOperandRCAlignment(MachineInstr &MI, unsigned OpName) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
MCRegister getReturnAddressReg(const MachineFunction &MF) const
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
const TargetRegisterClass * getRegClassForSizeOnBank(unsigned Size, const RegisterBank &Bank) const
const TargetRegisterClass * getConstrainedRegClassForOperand(const MachineOperand &MO, const MachineRegisterInfo &MRI) const override
const TargetRegisterClass * getRegClassForTypeOnBank(LLT Ty, const RegisterBank &Bank) const
const TargetRegisterClass * getBoolRC() const
const TargetRegisterClass * getWaveMaskRegClass() const
static bool isSGPRClass(const TargetRegisterClass *RC)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
const Triple & getTargetTriple() const
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
OSType getOS() const
Get the parsed operating system type of this triple.
static IntegerType * getInt32Ty(LLVMContext &C)
LLVM Value Representation.
Value(Type *Ty, unsigned scid)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
LLVM_READONLY const MIMGG16MappingInfo * getMIMGG16MappingInfo(unsigned G)
LLVM_READONLY int getGlobalSaddrOp(uint16_t Opcode)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfo(unsigned DimEnum)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
unsigned getIntrinsicID(const MachineInstr &I)
Return the intrinsic ID for opcodes with the G_AMDGPU_INTRIN_ prefix.
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg, GISelKnownBits *KnownBits=nullptr, bool CheckNUW=false)
Returns base register and constant offset.
IndexMode
ARM Index Modes.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
operand_type_match m_Reg()
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
UnaryOp_match< SrcTy, TargetOpcode::COPY > m_Copy(SrcTy &&Src)
SpecificConstantMatch m_SpecificICst(int64_t RequestedValue)
Matches a constant equal to RequestedValue.
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
UnaryOp_match< SrcTy, TargetOpcode::G_FABS > m_GFabs(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
OneUse_match< T > m_OneUse(const T &SubPattern)
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
int popcount(T Value) noexcept
Count the number of set bits in a value.
const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
unsigned getUndefRegState(bool B)
@ SMax
Signed integer max implemented in terms of select(cmp()).
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
@ Default
The result values are uniform if and only if all operands are uniform.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
static KnownBits computeForAddSub(bool Add, bool NSW, bool NUW, const KnownBits &LHS, const KnownBits &RHS)
Compute known bits resulting from adding LHS and RHS.
This class contains a discriminated union of information about pointers in memory operands,...
int64_t Offset
Offset - This is an offset from the base Value*.
PointerUnion< const Value *, const PseudoSourceValue * > V
This is the IR pointer value for the access, or it is null if unknown.