29#include "llvm/IR/IntrinsicsAMDGPU.h"
32#define DEBUG_TYPE "amdgpu-isel"
35using namespace MIPatternMatch;
37#define GET_GLOBALISEL_IMPL
38#define AMDGPUSubtarget GCNSubtarget
39#include "AMDGPUGenGlobalISel.inc"
40#undef GET_GLOBALISEL_IMPL
46 :
TII(*STI.getInstrInfo()),
TRI(*STI.getRegisterInfo()), RBI(RBI),
TM(
TM),
50#include
"AMDGPUGenGlobalISel.inc"
53#include
"AMDGPUGenGlobalISel.inc"
72 return Def->getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS
73 ? Def->getOperand(1).getReg()
77bool AMDGPUInstructionSelector::isVCC(
Register Reg,
83 auto &RegClassOrBank =
MRI.getRegClassOrRegBank(Reg);
87 const LLT Ty =
MRI.getType(Reg);
91 return MRI.getVRegDef(Reg)->getOpcode() != AMDGPU::G_TRUNC &&
96 return RB->
getID() == AMDGPU::VCCRegBankID;
99bool AMDGPUInstructionSelector::constrainCopyLikeIntrin(
MachineInstr &
MI,
100 unsigned NewOpc)
const {
101 MI.setDesc(TII.get(NewOpc));
116 if (!DstRC || DstRC != SrcRC)
123bool AMDGPUInstructionSelector::selectCOPY(
MachineInstr &
I)
const {
126 I.setDesc(TII.get(TargetOpcode::COPY));
133 if (isVCC(DstReg, *MRI)) {
134 if (SrcReg == AMDGPU::SCC) {
142 if (!isVCC(SrcReg, *MRI)) {
150 std::optional<ValueAndVReg> ConstVal =
154 STI.
isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
156 .
addImm(ConstVal->Value.getBoolValue() ? -1 : 0);
158 Register MaskedReg =
MRI->createVirtualRegister(SrcRC);
166 IsSGPR ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32;
171 And.setOperandDead(3);
173 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
178 if (!
MRI->getRegClassOrNull(SrcReg))
179 MRI->setRegClass(SrcReg, SrcRC);
193 if (MO.getReg().isPhysical())
205bool AMDGPUInstructionSelector::selectPHI(
MachineInstr &
I)
const {
206 const Register DefReg =
I.getOperand(0).getReg();
207 const LLT DefTy =
MRI->getType(DefReg);
219 MRI->getRegClassOrRegBank(DefReg);
238 I.setDesc(TII.get(TargetOpcode::PHI));
245 unsigned SubIdx)
const {
249 Register DstReg =
MRI->createVirtualRegister(&SubRC);
252 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.
getSubReg(), SubIdx);
254 BuildMI(*BB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
255 .
addReg(Reg, 0, ComposedSubIdx);
280 return Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
282 return Is64 ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32;
284 return Is64 ? AMDGPU::S_XOR_B64 : AMDGPU::S_XOR_B32;
290bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(
MachineInstr &
I)
const {
291 Register DstReg =
I.getOperand(0).getReg();
295 if (DstRB->
getID() != AMDGPU::SGPRRegBankID &&
296 DstRB->
getID() != AMDGPU::VCCRegBankID)
299 bool Is64 =
Size > 32 || (DstRB->
getID() == AMDGPU::VCCRegBankID &&
311bool AMDGPUInstructionSelector::selectG_ADD_SUB(
MachineInstr &
I)
const {
314 Register DstReg =
I.getOperand(0).getReg();
316 LLT Ty =
MRI->getType(DstReg);
322 const bool IsSALU = DstRB->
getID() == AMDGPU::SGPRRegBankID;
323 const bool Sub =
I.getOpcode() == TargetOpcode::G_SUB;
327 const unsigned Opc = Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
330 .
add(
I.getOperand(1))
331 .
add(
I.getOperand(2))
338 const unsigned Opc = Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64;
339 I.setDesc(TII.get(Opc));
345 const unsigned Opc = Sub ? AMDGPU::V_SUB_CO_U32_e64 : AMDGPU::V_ADD_CO_U32_e64;
351 .
add(
I.getOperand(1))
352 .
add(
I.getOperand(2))
358 assert(!Sub &&
"illegal sub should not reach here");
361 = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass;
363 = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass;
365 MachineOperand Lo1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub0));
366 MachineOperand Lo2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub0));
367 MachineOperand Hi1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub1));
368 MachineOperand Hi2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub1));
370 Register DstLo =
MRI->createVirtualRegister(&HalfRC);
371 Register DstHi =
MRI->createVirtualRegister(&HalfRC);
374 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
377 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
383 Register CarryReg =
MRI->createVirtualRegister(CarryRC);
384 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_ADD_CO_U32_e64), DstLo)
400 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
414bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE(
419 Register Dst0Reg =
I.getOperand(0).getReg();
420 Register Dst1Reg =
I.getOperand(1).getReg();
421 const bool IsAdd =
I.getOpcode() == AMDGPU::G_UADDO ||
422 I.getOpcode() == AMDGPU::G_UADDE;
423 const bool HasCarryIn =
I.getOpcode() == AMDGPU::G_UADDE ||
424 I.getOpcode() == AMDGPU::G_USUBE;
426 if (isVCC(Dst1Reg, *MRI)) {
427 unsigned NoCarryOpc =
428 IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
429 unsigned CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
430 I.setDesc(TII.get(HasCarryIn ? CarryOpc : NoCarryOpc));
436 Register Src0Reg =
I.getOperand(2).getReg();
437 Register Src1Reg =
I.getOperand(3).getReg();
440 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
441 .
addReg(
I.getOperand(4).getReg());
444 unsigned NoCarryOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
445 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
447 auto CarryInst =
BuildMI(*BB, &
I,
DL, TII.get(HasCarryIn ? CarryOpc : NoCarryOpc), Dst0Reg)
448 .
add(
I.getOperand(2))
449 .
add(
I.getOperand(3));
451 if (
MRI->use_nodbg_empty(Dst1Reg)) {
454 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), Dst1Reg)
456 if (!
MRI->getRegClassOrNull(Dst1Reg))
457 MRI->setRegClass(Dst1Reg, &AMDGPU::SReg_32RegClass);
467 AMDGPU::SReg_32RegClass, *MRI))
474bool AMDGPUInstructionSelector::selectG_AMDGPU_MAD_64_32(
478 const bool IsUnsigned =
I.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;
482 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_gfx11_e64
483 : AMDGPU::V_MAD_I64_I32_gfx11_e64;
485 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_e64 : AMDGPU::V_MAD_I64_I32_e64;
486 I.setDesc(TII.get(Opc));
488 I.addImplicitDefUseOperands(*
MF);
493bool AMDGPUInstructionSelector::selectG_EXTRACT(
MachineInstr &
I)
const {
495 Register DstReg =
I.getOperand(0).getReg();
496 Register SrcReg =
I.getOperand(1).getReg();
497 LLT DstTy =
MRI->getType(DstReg);
498 LLT SrcTy =
MRI->getType(SrcReg);
503 unsigned Offset =
I.getOperand(2).getImm();
504 if (
Offset % 32 != 0 || DstSize > 128)
524 SrcRC = TRI.getSubClassWithSubReg(SrcRC,
SubReg);
529 *SrcRC,
I.getOperand(1));
531 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::COPY), DstReg)
538bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(
MachineInstr &
MI)
const {
541 LLT DstTy =
MRI->getType(DstReg);
542 LLT SrcTy =
MRI->getType(
MI.getOperand(1).getReg());
558 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg);
559 for (
int I = 0, E =
MI.getNumOperands() - 1;
I != E; ++
I) {
573 MI.eraseFromParent();
577bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(
MachineInstr &
MI)
const {
579 const int NumDst =
MI.getNumOperands() - 1;
585 LLT DstTy =
MRI->getType(DstReg0);
586 LLT SrcTy =
MRI->getType(SrcReg);
602 for (
int I = 0, E = NumDst;
I != E; ++
I) {
604 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::COPY), Dst.getReg())
605 .
addReg(SrcReg, 0, SubRegs[
I]);
608 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegs[
I]);
618 MI.eraseFromParent();
622bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR(
MachineInstr &
MI)
const {
623 assert(
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC ||
624 MI.getOpcode() == AMDGPU::G_BUILD_VECTOR);
628 LLT SrcTy =
MRI->getType(Src0);
632 if (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR && SrcSize >= 32) {
633 return selectG_MERGE_VALUES(
MI);
640 (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC &&
645 if (DstBank->
getID() == AMDGPU::AGPRRegBankID)
648 assert(DstBank->
getID() == AMDGPU::SGPRRegBankID ||
649 DstBank->
getID() == AMDGPU::VGPRRegBankID);
650 const bool IsVector = DstBank->
getID() == AMDGPU::VGPRRegBankID;
663 const int64_t K0 = ConstSrc0->Value.getSExtValue();
664 const int64_t K1 = ConstSrc1->Value.getSExtValue();
672 MI.eraseFromParent();
678 MI.eraseFromParent();
690 if (Src1Def->
getOpcode() == AMDGPU::G_IMPLICIT_DEF) {
691 MI.setDesc(TII.get(AMDGPU::COPY));
694 IsVector ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
701 Register TmpReg =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
702 auto MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_AND_B32_e32), TmpReg)
708 MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_LSHL_OR_B32_e64), Dst)
715 MI.eraseFromParent();
740 unsigned Opc = AMDGPU::S_PACK_LL_B32_B16;
741 if (Shift0 && Shift1) {
742 Opc = AMDGPU::S_PACK_HH_B32_B16;
743 MI.getOperand(1).setReg(ShiftSrc0);
744 MI.getOperand(2).setReg(ShiftSrc1);
746 Opc = AMDGPU::S_PACK_LH_B32_B16;
747 MI.getOperand(2).setReg(ShiftSrc1);
751 if (ConstSrc1 && ConstSrc1->Value == 0) {
753 auto MIB =
BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_LSHR_B32), Dst)
758 MI.eraseFromParent();
762 Opc = AMDGPU::S_PACK_HL_B32_B16;
763 MI.getOperand(1).setReg(ShiftSrc0);
767 MI.setDesc(TII.get(Opc));
771bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(
MachineInstr &
I)
const {
777 if ((!RC && !
MRI->getRegBankOrNull(MO.
getReg())) ||
779 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
786bool AMDGPUInstructionSelector::selectG_INSERT(
MachineInstr &
I)
const {
789 Register DstReg =
I.getOperand(0).getReg();
790 Register Src0Reg =
I.getOperand(1).getReg();
791 Register Src1Reg =
I.getOperand(2).getReg();
792 LLT Src1Ty =
MRI->getType(Src1Reg);
794 unsigned DstSize =
MRI->getType(DstReg).getSizeInBits();
797 int64_t
Offset =
I.getOperand(3).getImm();
800 if (
Offset % 32 != 0 || InsSize % 32 != 0)
808 if (
SubReg == AMDGPU::NoSubRegister)
826 Src0RC = TRI.getSubClassWithSubReg(Src0RC,
SubReg);
827 if (!Src0RC || !Src1RC)
836 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::INSERT_SUBREG), DstReg)
845bool AMDGPUInstructionSelector::selectG_SBFX_UBFX(
MachineInstr &
MI)
const {
848 Register OffsetReg =
MI.getOperand(2).getReg();
849 Register WidthReg =
MI.getOperand(3).getReg();
852 "scalar BFX instructions are expanded in regbankselect");
853 assert(
MRI->getType(
MI.getOperand(0).getReg()).getSizeInBits() == 32 &&
854 "64-bit vector BFX instructions are expanded in regbankselect");
859 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SBFX;
860 unsigned Opc = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
865 MI.eraseFromParent();
869bool AMDGPUInstructionSelector::selectInterpP1F16(
MachineInstr &
MI)
const {
888 Register InterpMov =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
894 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_INTERP_MOV_F32), InterpMov)
910 MI.eraseFromParent();
919bool AMDGPUInstructionSelector::selectWritelane(
MachineInstr &
MI)
const {
928 Register LaneSelect =
MI.getOperand(3).getReg();
931 auto MIB =
BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_WRITELANE_B32), VDst);
933 std::optional<ValueAndVReg> ConstSelect =
939 MIB.
addImm(ConstSelect->Value.getSExtValue() &
942 std::optional<ValueAndVReg> ConstVal =
949 MIB.
addImm(ConstVal->Value.getSExtValue());
959 BuildMI(*
MBB, *MIB,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
967 MI.eraseFromParent();
973bool AMDGPUInstructionSelector::selectDivScale(
MachineInstr &
MI)
const {
977 LLT Ty =
MRI->getType(Dst0);
980 Opc = AMDGPU::V_DIV_SCALE_F32_e64;
982 Opc = AMDGPU::V_DIV_SCALE_F64_e64;
993 unsigned ChooseDenom =
MI.getOperand(5).getImm();
995 Register Src0 = ChooseDenom != 0 ? Numer : Denom;
1008 MI.eraseFromParent();
1012bool AMDGPUInstructionSelector::selectG_INTRINSIC(
MachineInstr &
I)
const {
1013 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(
I).getIntrinsicID();
1014 switch (IntrinsicID) {
1015 case Intrinsic::amdgcn_if_break: {
1020 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))
1021 .
add(
I.getOperand(0))
1022 .
add(
I.getOperand(2))
1023 .
add(
I.getOperand(3));
1025 Register DstReg =
I.getOperand(0).getReg();
1026 Register Src0Reg =
I.getOperand(2).getReg();
1027 Register Src1Reg =
I.getOperand(3).getReg();
1029 I.eraseFromParent();
1031 for (
Register Reg : { DstReg, Src0Reg, Src1Reg })
1036 case Intrinsic::amdgcn_interp_p1_f16:
1037 return selectInterpP1F16(
I);
1038 case Intrinsic::amdgcn_wqm:
1039 return constrainCopyLikeIntrin(
I, AMDGPU::WQM);
1040 case Intrinsic::amdgcn_softwqm:
1041 return constrainCopyLikeIntrin(
I, AMDGPU::SOFT_WQM);
1042 case Intrinsic::amdgcn_strict_wwm:
1043 case Intrinsic::amdgcn_wwm:
1044 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WWM);
1045 case Intrinsic::amdgcn_strict_wqm:
1046 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WQM);
1047 case Intrinsic::amdgcn_writelane:
1048 return selectWritelane(
I);
1049 case Intrinsic::amdgcn_div_scale:
1050 return selectDivScale(
I);
1051 case Intrinsic::amdgcn_icmp:
1052 case Intrinsic::amdgcn_fcmp:
1055 return selectIntrinsicCmp(
I);
1056 case Intrinsic::amdgcn_ballot:
1057 return selectBallot(
I);
1058 case Intrinsic::amdgcn_inverse_ballot:
1059 return selectInverseBallot(
I);
1060 case Intrinsic::amdgcn_reloc_constant:
1061 return selectRelocConstant(
I);
1062 case Intrinsic::amdgcn_groupstaticsize:
1063 return selectGroupStaticSize(
I);
1064 case Intrinsic::returnaddress:
1065 return selectReturnAddress(
I);
1066 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
1067 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
1068 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
1069 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
1070 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
1071 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
1072 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
1073 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
1074 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
1075 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
1076 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
1077 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
1078 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
1079 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
1080 return selectSMFMACIntrin(
I);
1091 if (
Size == 16 && !ST.has16BitInsts())
1094 const auto Select = [&](
unsigned S16Opc,
unsigned TrueS16Opc,
unsigned S32Opc,
1097 return ST.hasTrue16BitInsts() ? TrueS16Opc : S16Opc;
1107 return Select(AMDGPU::V_CMP_NE_U16_e64, AMDGPU::V_CMP_NE_U16_t16_e64,
1108 AMDGPU::V_CMP_NE_U32_e64, AMDGPU::V_CMP_NE_U64_e64);
1110 return Select(AMDGPU::V_CMP_EQ_U16_e64, AMDGPU::V_CMP_EQ_U16_t16_e64,
1111 AMDGPU::V_CMP_EQ_U32_e64, AMDGPU::V_CMP_EQ_U64_e64);
1113 return Select(AMDGPU::V_CMP_GT_I16_e64, AMDGPU::V_CMP_GT_I16_t16_e64,
1114 AMDGPU::V_CMP_GT_I32_e64, AMDGPU::V_CMP_GT_I64_e64);
1116 return Select(AMDGPU::V_CMP_GE_I16_e64, AMDGPU::V_CMP_GE_I16_t16_e64,
1117 AMDGPU::V_CMP_GE_I32_e64, AMDGPU::V_CMP_GE_I64_e64);
1119 return Select(AMDGPU::V_CMP_LT_I16_e64, AMDGPU::V_CMP_LT_I16_t16_e64,
1120 AMDGPU::V_CMP_LT_I32_e64, AMDGPU::V_CMP_LT_I64_e64);
1122 return Select(AMDGPU::V_CMP_LE_I16_e64, AMDGPU::V_CMP_LE_I16_t16_e64,
1123 AMDGPU::V_CMP_LE_I32_e64, AMDGPU::V_CMP_LE_I64_e64);
1125 return Select(AMDGPU::V_CMP_GT_U16_e64, AMDGPU::V_CMP_GT_U16_t16_e64,
1126 AMDGPU::V_CMP_GT_U32_e64, AMDGPU::V_CMP_GT_U64_e64);
1128 return Select(AMDGPU::V_CMP_GE_U16_e64, AMDGPU::V_CMP_GE_U16_t16_e64,
1129 AMDGPU::V_CMP_GE_U32_e64, AMDGPU::V_CMP_GE_U64_e64);
1131 return Select(AMDGPU::V_CMP_LT_U16_e64, AMDGPU::V_CMP_LT_U16_t16_e64,
1132 AMDGPU::V_CMP_LT_U32_e64, AMDGPU::V_CMP_LT_U64_e64);
1134 return Select(AMDGPU::V_CMP_LE_U16_e64, AMDGPU::V_CMP_LE_U16_t16_e64,
1135 AMDGPU::V_CMP_LE_U32_e64, AMDGPU::V_CMP_LE_U64_e64);
1138 return Select(AMDGPU::V_CMP_EQ_F16_e64, AMDGPU::V_CMP_EQ_F16_t16_e64,
1139 AMDGPU::V_CMP_EQ_F32_e64, AMDGPU::V_CMP_EQ_F64_e64);
1141 return Select(AMDGPU::V_CMP_GT_F16_e64, AMDGPU::V_CMP_GT_F16_t16_e64,
1142 AMDGPU::V_CMP_GT_F32_e64, AMDGPU::V_CMP_GT_F64_e64);
1144 return Select(AMDGPU::V_CMP_GE_F16_e64, AMDGPU::V_CMP_GE_F16_t16_e64,
1145 AMDGPU::V_CMP_GE_F32_e64, AMDGPU::V_CMP_GE_F64_e64);
1147 return Select(AMDGPU::V_CMP_LT_F16_e64, AMDGPU::V_CMP_LT_F16_t16_e64,
1148 AMDGPU::V_CMP_LT_F32_e64, AMDGPU::V_CMP_LT_F64_e64);
1150 return Select(AMDGPU::V_CMP_LE_F16_e64, AMDGPU::V_CMP_LE_F16_t16_e64,
1151 AMDGPU::V_CMP_LE_F32_e64, AMDGPU::V_CMP_LE_F64_e64);
1153 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1154 AMDGPU::V_CMP_NEQ_F32_e64, AMDGPU::V_CMP_NEQ_F64_e64);
1156 return Select(AMDGPU::V_CMP_O_F16_e64, AMDGPU::V_CMP_O_F16_t16_e64,
1157 AMDGPU::V_CMP_O_F32_e64, AMDGPU::V_CMP_O_F64_e64);
1159 return Select(AMDGPU::V_CMP_U_F16_e64, AMDGPU::V_CMP_U_F16_t16_e64,
1160 AMDGPU::V_CMP_U_F32_e64, AMDGPU::V_CMP_U_F64_e64);
1162 return Select(AMDGPU::V_CMP_NLG_F16_e64, AMDGPU::V_CMP_NLG_F16_t16_e64,
1163 AMDGPU::V_CMP_NLG_F32_e64, AMDGPU::V_CMP_NLG_F64_e64);
1165 return Select(AMDGPU::V_CMP_NLE_F16_e64, AMDGPU::V_CMP_NLE_F16_t16_e64,
1166 AMDGPU::V_CMP_NLE_F32_e64, AMDGPU::V_CMP_NLE_F64_e64);
1168 return Select(AMDGPU::V_CMP_NLT_F16_e64, AMDGPU::V_CMP_NLT_F16_t16_e64,
1169 AMDGPU::V_CMP_NLT_F32_e64, AMDGPU::V_CMP_NLT_F64_e64);
1171 return Select(AMDGPU::V_CMP_NGE_F16_e64, AMDGPU::V_CMP_NGE_F16_t16_e64,
1172 AMDGPU::V_CMP_NGE_F32_e64, AMDGPU::V_CMP_NGE_F64_e64);
1174 return Select(AMDGPU::V_CMP_NGT_F16_e64, AMDGPU::V_CMP_NGT_F16_t16_e64,
1175 AMDGPU::V_CMP_NGT_F32_e64, AMDGPU::V_CMP_NGT_F64_e64);
1177 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1178 AMDGPU::V_CMP_NEQ_F32_e64, AMDGPU::V_CMP_NEQ_F64_e64);
1180 return Select(AMDGPU::V_CMP_TRU_F16_e64, AMDGPU::V_CMP_TRU_F16_t16_e64,
1181 AMDGPU::V_CMP_TRU_F32_e64, AMDGPU::V_CMP_TRU_F64_e64);
1183 return Select(AMDGPU::V_CMP_F_F16_e64, AMDGPU::V_CMP_F_F16_t16_e64,
1184 AMDGPU::V_CMP_F_F32_e64, AMDGPU::V_CMP_F_F64_e64);
1189 unsigned Size)
const {
1196 return AMDGPU::S_CMP_LG_U64;
1198 return AMDGPU::S_CMP_EQ_U64;
1207 return AMDGPU::S_CMP_LG_U32;
1209 return AMDGPU::S_CMP_EQ_U32;
1211 return AMDGPU::S_CMP_GT_I32;
1213 return AMDGPU::S_CMP_GE_I32;
1215 return AMDGPU::S_CMP_LT_I32;
1217 return AMDGPU::S_CMP_LE_I32;
1219 return AMDGPU::S_CMP_GT_U32;
1221 return AMDGPU::S_CMP_GE_U32;
1223 return AMDGPU::S_CMP_LT_U32;
1225 return AMDGPU::S_CMP_LE_U32;
1227 return AMDGPU::S_CMP_EQ_F32;
1229 return AMDGPU::S_CMP_GT_F32;
1231 return AMDGPU::S_CMP_GE_F32;
1233 return AMDGPU::S_CMP_LT_F32;
1235 return AMDGPU::S_CMP_LE_F32;
1237 return AMDGPU::S_CMP_LG_F32;
1239 return AMDGPU::S_CMP_O_F32;
1241 return AMDGPU::S_CMP_U_F32;
1243 return AMDGPU::S_CMP_NLG_F32;
1245 return AMDGPU::S_CMP_NLE_F32;
1247 return AMDGPU::S_CMP_NLT_F32;
1249 return AMDGPU::S_CMP_NGE_F32;
1251 return AMDGPU::S_CMP_NGT_F32;
1253 return AMDGPU::S_CMP_NEQ_F32;
1265 return AMDGPU::S_CMP_EQ_F16;
1267 return AMDGPU::S_CMP_GT_F16;
1269 return AMDGPU::S_CMP_GE_F16;
1271 return AMDGPU::S_CMP_LT_F16;
1273 return AMDGPU::S_CMP_LE_F16;
1275 return AMDGPU::S_CMP_LG_F16;
1277 return AMDGPU::S_CMP_O_F16;
1279 return AMDGPU::S_CMP_U_F16;
1281 return AMDGPU::S_CMP_NLG_F16;
1283 return AMDGPU::S_CMP_NLE_F16;
1285 return AMDGPU::S_CMP_NLT_F16;
1287 return AMDGPU::S_CMP_NGE_F16;
1289 return AMDGPU::S_CMP_NGT_F16;
1291 return AMDGPU::S_CMP_NEQ_F16;
1300bool AMDGPUInstructionSelector::selectG_ICMP_or_FCMP(
MachineInstr &
I)
const {
1305 Register SrcReg =
I.getOperand(2).getReg();
1310 Register CCReg =
I.getOperand(0).getReg();
1311 if (!isVCC(CCReg, *MRI)) {
1312 int Opcode = getS_CMPOpcode(Pred,
Size);
1316 .
add(
I.getOperand(2))
1317 .
add(
I.getOperand(3));
1318 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CCReg)
1323 I.eraseFromParent();
1327 if (
I.getOpcode() == AMDGPU::G_FCMP)
1335 I.getOperand(0).getReg())
1336 .
add(
I.getOperand(2))
1337 .
add(
I.getOperand(3));
1341 I.eraseFromParent();
1345bool AMDGPUInstructionSelector::selectIntrinsicCmp(
MachineInstr &
I)
const {
1346 Register Dst =
I.getOperand(0).getReg();
1347 if (isVCC(Dst, *MRI))
1350 LLT DstTy =
MRI->getType(Dst);
1356 Register SrcReg =
I.getOperand(2).getReg();
1365 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), Dst);
1366 I.eraseFromParent();
1377 auto [Src0, Src0Mods] = selectVOP3ModsImpl(LHS);
1378 auto [Src1, Src1Mods] = selectVOP3ModsImpl(RHS);
1380 copyToVGPRIfSrcFolded(Src0, Src0Mods, LHS, &
I,
true);
1382 copyToVGPRIfSrcFolded(Src1, Src1Mods, RHS, &
I,
true);
1383 SelectedMI =
BuildMI(*BB, &
I,
DL, TII.get(Opcode), Dst);
1385 SelectedMI.
addImm(Src0Mods);
1386 SelectedMI.
addReg(Src0Reg);
1388 SelectedMI.
addImm(Src1Mods);
1389 SelectedMI.
addReg(Src1Reg);
1399 I.eraseFromParent();
1403bool AMDGPUInstructionSelector::selectBallot(
MachineInstr &
I)
const {
1406 Register DstReg =
I.getOperand(0).getReg();
1407 const unsigned Size =
MRI->getType(DstReg).getSizeInBits();
1408 const bool Is64 =
Size == 64;
1416 std::optional<ValueAndVReg> Arg =
1419 const auto BuildCopy = [&](
Register SrcReg) {
1421 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), DstReg)
1427 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1429 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
1437 const int64_t
Value = Arg->
Value.getSExtValue();
1439 unsigned Opcode = Is64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
1441 }
else if (
Value == -1)
1442 BuildCopy(IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC);
1446 BuildCopy(
I.getOperand(2).getReg());
1448 I.eraseFromParent();
1452bool AMDGPUInstructionSelector::selectInverseBallot(
MachineInstr &
I)
const {
1455 const Register DstReg =
I.getOperand(0).getReg();
1456 const Register MaskReg =
I.getOperand(2).getReg();
1459 I.eraseFromParent();
1463bool AMDGPUInstructionSelector::selectRelocConstant(
MachineInstr &
I)
const {
1464 Register DstReg =
I.getOperand(0).getReg();
1470 const bool IsVALU = DstBank->
getID() == AMDGPU::VGPRRegBankID;
1475 auto RelocSymbol = cast<GlobalVariable>(
1480 TII.get(IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32), DstReg)
1483 I.eraseFromParent();
1487bool AMDGPUInstructionSelector::selectGroupStaticSize(
MachineInstr &
I)
const {
1490 Register DstReg =
I.getOperand(0).getReg();
1492 unsigned Mov = DstRB->
getID() == AMDGPU::SGPRRegBankID ?
1493 AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1510 I.eraseFromParent();
1514bool AMDGPUInstructionSelector::selectReturnAddress(
MachineInstr &
I)
const {
1521 unsigned Depth =
I.getOperand(2).getImm();
1534 I.eraseFromParent();
1545 AMDGPU::SReg_64RegClass,
DL);
1548 I.eraseFromParent();
1552bool AMDGPUInstructionSelector::selectEndCfIntrinsic(
MachineInstr &
MI)
const {
1556 BuildMI(*BB, &
MI,
MI.getDebugLoc(), TII.get(AMDGPU::SI_END_CF))
1557 .
add(
MI.getOperand(1));
1560 MI.eraseFromParent();
1562 if (!
MRI->getRegClassOrNull(Reg))
1567bool AMDGPUInstructionSelector::selectDSOrderedIntrinsic(
1573 unsigned IndexOperand =
MI.getOperand(7).getImm();
1574 bool WaveRelease =
MI.getOperand(8).getImm() != 0;
1575 bool WaveDone =
MI.getOperand(9).getImm() != 0;
1577 if (WaveDone && !WaveRelease)
1580 unsigned OrderedCountIndex = IndexOperand & 0x3f;
1581 IndexOperand &= ~0x3f;
1582 unsigned CountDw = 0;
1585 CountDw = (IndexOperand >> 24) & 0xf;
1586 IndexOperand &= ~(0xf << 24);
1588 if (CountDw < 1 || CountDw > 4) {
1590 "ds_ordered_count: dword count must be between 1 and 4");
1597 unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1;
1600 unsigned Offset0 = OrderedCountIndex << 2;
1601 unsigned Offset1 = WaveRelease | (WaveDone << 1) | (
Instruction << 4);
1604 Offset1 |= (CountDw - 1) << 6;
1607 Offset1 |= ShaderType << 2;
1609 unsigned Offset = Offset0 | (Offset1 << 8);
1618 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::DS_ORDERED_COUNT), DstReg)
1627 MI.eraseFromParent();
1633 case Intrinsic::amdgcn_ds_gws_init:
1634 return AMDGPU::DS_GWS_INIT;
1635 case Intrinsic::amdgcn_ds_gws_barrier:
1636 return AMDGPU::DS_GWS_BARRIER;
1637 case Intrinsic::amdgcn_ds_gws_sema_v:
1638 return AMDGPU::DS_GWS_SEMA_V;
1639 case Intrinsic::amdgcn_ds_gws_sema_br:
1640 return AMDGPU::DS_GWS_SEMA_BR;
1641 case Intrinsic::amdgcn_ds_gws_sema_p:
1642 return AMDGPU::DS_GWS_SEMA_P;
1643 case Intrinsic::amdgcn_ds_gws_sema_release_all:
1644 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
1650bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(
MachineInstr &
MI,
1652 if (!STI.
hasGWS() || (IID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
1657 const bool HasVSrc =
MI.getNumOperands() == 3;
1658 assert(HasVSrc ||
MI.getNumOperands() == 2);
1660 Register BaseOffset =
MI.getOperand(HasVSrc ? 2 : 1).getReg();
1662 if (OffsetRB->
getID() != AMDGPU::SGPRRegBankID)
1676 if (OffsetDef->
getOpcode() == AMDGPU::V_READFIRSTLANE_B32) {
1677 Readfirstlane = OffsetDef;
1682 if (OffsetDef->
getOpcode() == AMDGPU::G_CONSTANT) {
1692 std::tie(BaseOffset, ImmOffset) =
1695 if (Readfirstlane) {
1705 AMDGPU::SReg_32RegClass, *MRI))
1709 Register M0Base =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1737 MI.eraseFromParent();
1741bool AMDGPUInstructionSelector::selectDSAppendConsume(
MachineInstr &
MI,
1742 bool IsAppend)
const {
1743 Register PtrBase =
MI.getOperand(2).getReg();
1744 LLT PtrTy =
MRI->getType(PtrBase);
1748 std::tie(PtrBase,
Offset) = selectDS1Addr1OffsetImpl(
MI.getOperand(2));
1751 if (!isDSOffsetLegal(PtrBase,
Offset)) {
1752 PtrBase =
MI.getOperand(2).getReg();
1758 const unsigned Opc = IsAppend ? AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
1769 MI.eraseFromParent();
1773bool AMDGPUInstructionSelector::selectSBarrier(
MachineInstr &
MI)
const {
1780 MI.eraseFromParent();
1793 MI.eraseFromParent();
1805 TFE = (TexFailCtrl & 0x1) ?
true :
false;
1807 LWE = (TexFailCtrl & 0x2) ?
true :
false;
1810 return TexFailCtrl == 0;
1813bool AMDGPUInstructionSelector::selectImageIntrinsic(
1822 unsigned IntrOpcode =
Intr->BaseOpcode;
1827 const unsigned ArgOffset =
MI.getNumExplicitDefs() + 1;
1831 int NumVDataDwords = -1;
1832 bool IsD16 =
MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16 ||
1833 MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16;
1839 Unorm =
MI.getOperand(ArgOffset +
Intr->UnormIndex).getImm() != 0;
1843 bool IsTexFail =
false;
1845 TFE, LWE, IsTexFail))
1848 const int Flags =
MI.getOperand(ArgOffset +
Intr->NumArgs).getImm();
1849 const bool IsA16 = (
Flags & 1) != 0;
1850 const bool IsG16 = (
Flags & 2) != 0;
1853 if (IsA16 && !STI.
hasG16() && !IsG16)
1857 unsigned DMaskLanes = 0;
1859 if (BaseOpcode->
Atomic) {
1860 VDataOut =
MI.getOperand(0).getReg();
1861 VDataIn =
MI.getOperand(2).getReg();
1862 LLT Ty =
MRI->getType(VDataIn);
1865 const bool Is64Bit = BaseOpcode->
AtomicX2 ?
1870 assert(
MI.getOperand(3).getReg() == AMDGPU::NoRegister);
1872 DMask = Is64Bit ? 0xf : 0x3;
1873 NumVDataDwords = Is64Bit ? 4 : 2;
1875 DMask = Is64Bit ? 0x3 : 0x1;
1876 NumVDataDwords = Is64Bit ? 2 : 1;
1879 DMask =
MI.getOperand(ArgOffset +
Intr->DMaskIndex).getImm();
1882 if (BaseOpcode->
Store) {
1883 VDataIn =
MI.getOperand(1).getReg();
1884 VDataTy =
MRI->getType(VDataIn);
1887 VDataOut =
MI.getOperand(0).getReg();
1888 VDataTy =
MRI->getType(VDataOut);
1889 NumVDataDwords = DMaskLanes;
1892 NumVDataDwords = (DMaskLanes + 1) / 2;
1897 if (Subtarget->
hasG16() && IsG16) {
1901 IntrOpcode = G16MappingInfo->
G16;
1905 assert((!IsTexFail || DMaskLanes >= 1) &&
"should have legalized this");
1907 unsigned CPol =
MI.getOperand(ArgOffset +
Intr->CachePolicyIndex).getImm();
1914 int NumVAddrRegs = 0;
1915 int NumVAddrDwords = 0;
1916 for (
unsigned I =
Intr->VAddrStart; I < Intr->VAddrEnd;
I++) {
1919 if (!AddrOp.
isReg())
1927 NumVAddrDwords += (
MRI->getType(
Addr).getSizeInBits() + 31) / 32;
1934 NumVAddrRegs != 1 &&
1936 : NumVAddrDwords == NumVAddrRegs);
1937 if (UseNSA && !STI.hasFeature(AMDGPU::FeatureNSAEncoding)) {
1948 NumVDataDwords, NumVAddrDwords);
1949 }
else if (IsGFX11Plus) {
1951 UseNSA ? AMDGPU::MIMGEncGfx11NSA
1952 : AMDGPU::MIMGEncGfx11Default,
1953 NumVDataDwords, NumVAddrDwords);
1954 }
else if (IsGFX10Plus) {
1956 UseNSA ? AMDGPU::MIMGEncGfx10NSA
1957 : AMDGPU::MIMGEncGfx10Default,
1958 NumVDataDwords, NumVAddrDwords);
1962 NumVDataDwords, NumVAddrDwords);
1966 <<
"requested image instruction is not supported on this GPU\n");
1973 NumVDataDwords, NumVAddrDwords);
1976 NumVDataDwords, NumVAddrDwords);
1986 const bool Is64 =
MRI->getType(VDataOut).getSizeInBits() == 64;
1989 Is64 ? &AMDGPU::VReg_128RegClass : &AMDGPU::VReg_64RegClass);
1990 unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
1993 if (!
MRI->use_empty(VDataOut)) {
2006 for (
int I = 0;
I != NumVAddrRegs; ++
I) {
2008 if (
SrcOp.isReg()) {
2014 MIB.
addReg(
MI.getOperand(ArgOffset +
Intr->RsrcIndex).getReg());
2016 MIB.
addReg(
MI.getOperand(ArgOffset +
Intr->SampIndex).getReg());
2027 STI.hasFeature(AMDGPU::FeatureR128A16) ? -1 : 0);
2029 MIB.
addImm(IsA16 ? -1 : 0);
2043 MIB.
addImm(IsD16 ? -1 : 0);
2045 MI.eraseFromParent();
2053bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic(
2064 unsigned Offset =
MI.getOperand(6).getImm();
2066 auto MIB =
BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::DS_BVH_STACK_RTN_B32), Dst0)
2074 MI.eraseFromParent();
2078bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
2080 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(
I).getIntrinsicID();
2081 switch (IntrinsicID) {
2082 case Intrinsic::amdgcn_end_cf:
2083 return selectEndCfIntrinsic(
I);
2084 case Intrinsic::amdgcn_ds_ordered_add:
2085 case Intrinsic::amdgcn_ds_ordered_swap:
2086 return selectDSOrderedIntrinsic(
I, IntrinsicID);
2087 case Intrinsic::amdgcn_ds_gws_init:
2088 case Intrinsic::amdgcn_ds_gws_barrier:
2089 case Intrinsic::amdgcn_ds_gws_sema_v:
2090 case Intrinsic::amdgcn_ds_gws_sema_br:
2091 case Intrinsic::amdgcn_ds_gws_sema_p:
2092 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2093 return selectDSGWSIntrinsic(
I, IntrinsicID);
2094 case Intrinsic::amdgcn_ds_append:
2095 return selectDSAppendConsume(
I,
true);
2096 case Intrinsic::amdgcn_ds_consume:
2097 return selectDSAppendConsume(
I,
false);
2098 case Intrinsic::amdgcn_s_barrier:
2099 return selectSBarrier(
I);
2100 case Intrinsic::amdgcn_raw_buffer_load_lds:
2101 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
2102 case Intrinsic::amdgcn_struct_buffer_load_lds:
2103 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds:
2104 return selectBufferLoadLds(
I);
2105 case Intrinsic::amdgcn_global_load_lds:
2106 return selectGlobalLoadLds(
I);
2107 case Intrinsic::amdgcn_exp_compr:
2111 F,
"intrinsic not supported on subtarget",
I.getDebugLoc(),
DS_Error);
2112 F.getContext().diagnose(NoFpRet);
2116 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2117 return selectDSBvhStackIntrinsic(
I);
2118 case Intrinsic::amdgcn_s_barrier_init:
2119 case Intrinsic::amdgcn_s_barrier_join:
2120 case Intrinsic::amdgcn_s_wakeup_barrier:
2121 case Intrinsic::amdgcn_s_get_barrier_state:
2122 return selectNamedBarrierInst(
I, IntrinsicID);
2123 case Intrinsic::amdgcn_s_barrier_signal_isfirst:
2124 case Intrinsic::amdgcn_s_barrier_signal_isfirst_var:
2125 return selectSBarrierSignalIsfirst(
I, IntrinsicID);
2126 case Intrinsic::amdgcn_s_barrier_leave:
2127 return selectSBarrierLeave(
I);
2132bool AMDGPUInstructionSelector::selectG_SELECT(
MachineInstr &
I)
const {
2139 Register DstReg =
I.getOperand(0).getReg();
2144 if (!isVCC(CCReg, *MRI)) {
2145 unsigned SelectOpcode =
Size == 64 ? AMDGPU::S_CSELECT_B64 :
2146 AMDGPU::S_CSELECT_B32;
2153 if (!
MRI->getRegClassOrNull(CCReg))
2156 .
add(
I.getOperand(2))
2157 .
add(
I.getOperand(3));
2162 I.eraseFromParent();
2171 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
2173 .
add(
I.getOperand(3))
2175 .
add(
I.getOperand(2))
2176 .
add(
I.getOperand(1));
2179 I.eraseFromParent();
2186 return AMDGPU::sub0;
2188 return AMDGPU::sub0_sub1;
2190 return AMDGPU::sub0_sub1_sub2;
2192 return AMDGPU::sub0_sub1_sub2_sub3;
2194 return AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7;
2197 return AMDGPU::sub0;
2204bool AMDGPUInstructionSelector::selectG_TRUNC(
MachineInstr &
I)
const {
2205 Register DstReg =
I.getOperand(0).getReg();
2206 Register SrcReg =
I.getOperand(1).getReg();
2207 const LLT DstTy =
MRI->getType(DstReg);
2208 const LLT SrcTy =
MRI->getType(SrcReg);
2223 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2232 if (!SrcRC || !DstRC)
2245 Register LoReg =
MRI->createVirtualRegister(DstRC);
2246 Register HiReg =
MRI->createVirtualRegister(DstRC);
2248 .
addReg(SrcReg, 0, AMDGPU::sub0);
2250 .
addReg(SrcReg, 0, AMDGPU::sub1);
2252 if (IsVALU && STI.
hasSDWA()) {
2256 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg)
2266 Register TmpReg0 =
MRI->createVirtualRegister(DstRC);
2267 Register TmpReg1 =
MRI->createVirtualRegister(DstRC);
2268 Register ImmReg =
MRI->createVirtualRegister(DstRC);
2270 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), TmpReg0)
2280 unsigned MovOpc = IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
2281 unsigned AndOpc = IsVALU ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
2282 unsigned OrOpc = IsVALU ? AMDGPU::V_OR_B32_e64 : AMDGPU::S_OR_B32;
2294 And.setOperandDead(3);
2295 Or.setOperandDead(3);
2299 I.eraseFromParent();
2308 if (SubRegIdx == -1)
2314 = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
2318 if (SrcWithSubRC != SrcRC) {
2323 I.getOperand(1).setSubReg(SubRegIdx);
2326 I.setDesc(TII.get(TargetOpcode::COPY));
2332 Mask = maskTrailingOnes<unsigned>(
Size);
2333 int SignedMask =
static_cast<int>(Mask);
2334 return SignedMask >= -16 && SignedMask <= 64;
2338const RegisterBank *AMDGPUInstructionSelector::getArtifactRegBank(
2351bool AMDGPUInstructionSelector::selectG_SZA_EXT(
MachineInstr &
I)
const {
2352 bool InReg =
I.getOpcode() == AMDGPU::G_SEXT_INREG;
2353 bool Signed =
I.getOpcode() == AMDGPU::G_SEXT || InReg;
2356 const Register DstReg =
I.getOperand(0).getReg();
2357 const Register SrcReg =
I.getOperand(1).getReg();
2359 const LLT DstTy =
MRI->getType(DstReg);
2360 const LLT SrcTy =
MRI->getType(SrcReg);
2361 const unsigned SrcSize =
I.getOpcode() == AMDGPU::G_SEXT_INREG ?
2368 const RegisterBank *SrcBank = getArtifactRegBank(SrcReg, *MRI, TRI);
2371 if (
I.getOpcode() == AMDGPU::G_ANYEXT) {
2373 return selectCOPY(
I);
2376 TRI.getRegClassForTypeOnBank(SrcTy, *SrcBank);
2379 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
2381 Register UndefReg =
MRI->createVirtualRegister(SrcRC);
2382 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2388 I.eraseFromParent();
2394 if (SrcBank->
getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
2404 I.eraseFromParent();
2408 const unsigned BFE =
Signed ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2414 I.eraseFromParent();
2418 if (SrcBank->
getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
2420 AMDGPU::SReg_64RegClass : AMDGPU::SReg_32RegClass;
2424 if (
Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
2425 const unsigned SextOpc = SrcSize == 8 ?
2426 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
2429 I.eraseFromParent();
2435 if (DstSize > 32 && SrcSize == 32) {
2436 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2437 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2452 I.eraseFromParent();
2457 const unsigned BFE64 =
Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
2458 const unsigned BFE32 =
Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2461 if (DstSize > 32 && (SrcSize <= 32 || InReg)) {
2463 Register ExtReg =
MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
2464 Register UndefReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2465 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2467 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2478 I.eraseFromParent();
2494 I.eraseFromParent();
2512bool AMDGPUInstructionSelector::selectG_FPEXT(
MachineInstr &
I)
const {
2516 Register Dst =
I.getOperand(0).getReg();
2518 if (DstRB->
getID() != AMDGPU::SGPRRegBankID)
2521 Register Src =
I.getOperand(1).getReg();
2527 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_CVT_HI_F32_F16), Dst)
2529 I.eraseFromParent();
2537bool AMDGPUInstructionSelector::selectG_CONSTANT(
MachineInstr &
I)
const {
2540 Register DstReg =
I.getOperand(0).getReg();
2541 unsigned Size =
MRI->getType(DstReg).getSizeInBits();
2549 }
else if (ImmOp.
isCImm()) {
2556 const bool IsSgpr = DstRB->
getID() == AMDGPU::SGPRRegBankID;
2559 if (DstRB->
getID() == AMDGPU::VCCRegBankID) {
2560 Opcode = STI.
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
2561 }
else if (
Size == 64 &&
2563 Opcode = IsSgpr ? AMDGPU::S_MOV_B64_IMM_PSEUDO : AMDGPU::V_MOV_B64_PSEUDO;
2564 I.setDesc(TII.get(Opcode));
2565 I.addImplicitDefUseOperands(*
MF);
2568 Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
2578 I.setDesc(TII.get(Opcode));
2579 I.addImplicitDefUseOperands(*
MF);
2589 ResInst =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_MOV_B64), DstReg)
2590 .
addImm(
I.getOperand(1).getImm());
2593 &AMDGPU::SReg_32RegClass : &AMDGPU::VGPR_32RegClass;
2603 ResInst =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
2612 I.eraseFromParent();
2614 TRI.getConstrainedRegClassForOperand(ResInst->
getOperand(0), *MRI);
2620bool AMDGPUInstructionSelector::selectG_FNEG(
MachineInstr &
MI)
const {
2634 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2649 Register LoReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2650 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2651 Register ConstReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2652 Register OpReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2654 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2655 .
addReg(Src, 0, AMDGPU::sub0);
2656 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2657 .
addReg(Src, 0, AMDGPU::sub1);
2658 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2662 unsigned Opc = Fabs ? AMDGPU::S_OR_B32 : AMDGPU::S_XOR_B32;
2667 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2672 MI.eraseFromParent();
2677bool AMDGPUInstructionSelector::selectG_FABS(
MachineInstr &
MI)
const {
2680 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2687 Register LoReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2688 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2689 Register ConstReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2690 Register OpReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2696 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2697 .
addReg(Src, 0, AMDGPU::sub0);
2698 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2699 .
addReg(Src, 0, AMDGPU::sub1);
2700 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2705 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_AND_B32), OpReg)
2709 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2715 MI.eraseFromParent();
2720 return MI.getOpcode() == TargetOpcode::G_CONSTANT;
2723void AMDGPUInstructionSelector::getAddrModeInfo(
const MachineInstr &Load,
2726 unsigned OpNo =
Load.getOpcode() == AMDGPU::G_PREFETCH ? 0 : 1;
2728 MRI.getUniqueVRegDef(
Load.getOperand(OpNo).getReg());
2732 if (PtrMI->
getOpcode() != TargetOpcode::G_PTR_ADD)
2737 for (
unsigned i = 1; i != 3; ++i) {
2744 assert(GEPInfo.Imm == 0);
2749 if (OpBank->
getID() == AMDGPU::SGPRRegBankID)
2750 GEPInfo.SgprParts.push_back(GEPOp.
getReg());
2752 GEPInfo.VgprParts.push_back(GEPOp.
getReg());
2756 getAddrModeInfo(*PtrMI, MRI, AddrInfo);
2759bool AMDGPUInstructionSelector::isSGPR(
Register Reg)
const {
2760 return RBI.
getRegBank(Reg, *MRI, TRI)->
getID() == AMDGPU::SGPRRegBankID;
2763bool AMDGPUInstructionSelector::isInstrUniform(
const MachineInstr &
MI)
const {
2764 if (!
MI.hasOneMemOperand())
2774 if (!
Ptr || isa<UndefValue>(
Ptr) || isa<Argument>(
Ptr) ||
2775 isa<Constant>(
Ptr) || isa<GlobalValue>(
Ptr))
2781 if (
MI.getOpcode() == AMDGPU::G_PREFETCH)
2783 AMDGPU::SGPRRegBankID;
2786 return I &&
I->getMetadata(
"amdgpu.uniform");
2790 for (
const GEPInfo &GEPInfo : AddrInfo) {
2791 if (!GEPInfo.VgprParts.empty())
2797void AMDGPUInstructionSelector::initM0(
MachineInstr &
I)
const {
2798 const LLT PtrTy =
MRI->getType(
I.getOperand(1).getReg());
2805 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2810bool AMDGPUInstructionSelector::selectG_LOAD_STORE_ATOMICRMW(
2817 if (Reg.isPhysical())
2821 const unsigned Opcode =
MI.getOpcode();
2823 if (Opcode == AMDGPU::COPY)
2826 if (Opcode == AMDGPU::G_AND || Opcode == AMDGPU::G_OR ||
2827 Opcode == AMDGPU::G_XOR)
2831 if (
auto *GI = dyn_cast<GIntrinsic>(&
MI))
2832 return GI->is(Intrinsic::amdgcn_class);
2834 return Opcode == AMDGPU::G_ICMP || Opcode == AMDGPU::G_FCMP;
2837bool AMDGPUInstructionSelector::selectG_BRCOND(
MachineInstr &
I)
const {
2852 if (!isVCC(CondReg, *MRI)) {
2856 CondPhysReg = AMDGPU::SCC;
2857 BrOpcode = AMDGPU::S_CBRANCH_SCC1;
2858 ConstrainRC = &AMDGPU::SReg_32RegClass;
2866 const unsigned Opcode = Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
2867 const Register Exec = Is64 ? AMDGPU::EXEC : AMDGPU::EXEC_LO;
2870 BuildMI(*BB, &
I,
DL, TII.get(Opcode), TmpReg)
2877 CondPhysReg =
TRI.getVCC();
2878 BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
2879 ConstrainRC =
TRI.getBoolRC();
2882 if (!
MRI->getRegClassOrNull(CondReg))
2883 MRI->setRegClass(CondReg, ConstrainRC);
2885 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CondPhysReg)
2888 .
addMBB(
I.getOperand(1).getMBB());
2890 I.eraseFromParent();
2894bool AMDGPUInstructionSelector::selectG_GLOBAL_VALUE(
2896 Register DstReg =
I.getOperand(0).getReg();
2898 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2899 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32));
2904 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, *MRI);
2907bool AMDGPUInstructionSelector::selectG_PTRMASK(
MachineInstr &
I)
const {
2908 Register DstReg =
I.getOperand(0).getReg();
2909 Register SrcReg =
I.getOperand(1).getReg();
2910 Register MaskReg =
I.getOperand(2).getReg();
2911 LLT Ty =
MRI->getType(DstReg);
2912 LLT MaskTy =
MRI->getType(MaskReg);
2919 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2929 const bool CanCopyLow32 = (MaskOnes & MaskLo32) == MaskLo32;
2930 const bool CanCopyHi32 = (MaskOnes & MaskHi32) == MaskHi32;
2933 !CanCopyLow32 && !CanCopyHi32) {
2934 auto MIB =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_AND_B64), DstReg)
2938 I.eraseFromParent();
2942 unsigned NewOpc = IsVGPR ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
2944 = IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
2949 TRI.getRegClassForTypeOnBank(MaskTy, *MaskRB);
2958 "ptrmask should have been narrowed during legalize");
2960 auto NewOp =
BuildMI(*BB, &
I,
DL, TII.get(NewOpc), DstReg)
2966 I.eraseFromParent();
2970 Register HiReg =
MRI->createVirtualRegister(&RegRC);
2971 Register LoReg =
MRI->createVirtualRegister(&RegRC);
2974 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), LoReg)
2975 .
addReg(SrcReg, 0, AMDGPU::sub0);
2976 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), HiReg)
2977 .
addReg(SrcReg, 0, AMDGPU::sub1);
2986 Register MaskLo =
MRI->createVirtualRegister(&RegRC);
2987 MaskedLo =
MRI->createVirtualRegister(&RegRC);
2989 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskLo)
2990 .
addReg(MaskReg, 0, AMDGPU::sub0);
2991 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedLo)
3000 Register MaskHi =
MRI->createVirtualRegister(&RegRC);
3001 MaskedHi =
MRI->createVirtualRegister(&RegRC);
3003 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskHi)
3004 .
addReg(MaskReg, 0, AMDGPU::sub1);
3005 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedHi)
3010 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
3015 I.eraseFromParent();
3021static std::pair<Register, unsigned>
3028 std::tie(IdxBaseReg,
Offset) =
3030 if (IdxBaseReg == AMDGPU::NoRegister) {
3034 IdxBaseReg = IdxReg;
3041 if (
static_cast<unsigned>(
Offset) >= SubRegs.
size())
3042 return std::pair(IdxReg, SubRegs[0]);
3043 return std::pair(IdxBaseReg, SubRegs[
Offset]);
3046bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT(
3052 LLT DstTy =
MRI->getType(DstReg);
3053 LLT SrcTy =
MRI->getType(SrcReg);
3061 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3065 TRI.getRegClassForTypeOnBank(SrcTy, *SrcRB);
3067 TRI.getRegClassForTypeOnBank(DstTy, *DstRB);
3068 if (!SrcRC || !DstRC)
3083 if (SrcRB->
getID() == AMDGPU::SGPRRegBankID) {
3087 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3090 unsigned Opc = Is64 ? AMDGPU::S_MOVRELS_B64 : AMDGPU::S_MOVRELS_B32;
3094 MI.eraseFromParent();
3102 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3104 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::V_MOVRELS_B32_e32), DstReg)
3107 MI.eraseFromParent();
3118 MI.eraseFromParent();
3123bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT(
3130 LLT VecTy =
MRI->getType(DstReg);
3131 LLT ValTy =
MRI->getType(ValReg);
3143 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3147 TRI.getRegClassForTypeOnBank(VecTy, *VecRB);
3149 TRI.getRegClassForTypeOnBank(ValTy, *ValRB);
3157 if (VecRB->
getID() == AMDGPU::VGPRRegBankID && ValSize != 32)
3161 std::tie(IdxReg,
SubReg) =
3164 const bool IndexMode = VecRB->
getID() == AMDGPU::VGPRRegBankID &&
3171 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3175 VecSize, ValSize, VecRB->
getID() == AMDGPU::SGPRRegBankID);
3180 MI.eraseFromParent();
3192 MI.eraseFromParent();
3196bool AMDGPUInstructionSelector::selectBufferLoadLds(
MachineInstr &
MI)
const {
3199 unsigned Size =
MI.getOperand(3).getImm();
3202 const bool HasVIndex =
MI.getNumOperands() == 9;
3206 VIndex =
MI.getOperand(4).getReg();
3210 Register VOffset =
MI.getOperand(4 + OpOffset).getReg();
3211 std::optional<ValueAndVReg> MaybeVOffset =
3213 const bool HasVOffset = !MaybeVOffset || MaybeVOffset->Value.getZExtValue();
3219 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_BOTHEN
3220 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_IDXEN
3221 : HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFEN
3222 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFSET;
3225 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_BOTHEN
3226 : AMDGPU::BUFFER_LOAD_USHORT_LDS_IDXEN
3227 : HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFEN
3228 : AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFSET;
3231 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_BOTHEN
3232 : AMDGPU::BUFFER_LOAD_DWORD_LDS_IDXEN
3233 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN
3234 : AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET;
3241 .
add(
MI.getOperand(2));
3245 if (HasVIndex && HasVOffset) {
3246 Register IdxReg =
MRI->createVirtualRegister(
TRI.getVGPR64Class());
3247 BuildMI(*
MBB, &*MIB,
DL, TII.get(AMDGPU::REG_SEQUENCE), IdxReg)
3254 }
else if (HasVIndex) {
3256 }
else if (HasVOffset) {
3260 MIB.
add(
MI.getOperand(1));
3261 MIB.
add(
MI.getOperand(5 + OpOffset));
3262 MIB.
add(
MI.getOperand(6 + OpOffset));
3263 unsigned Aux =
MI.getOperand(7 + OpOffset).getImm();
3269 LoadPtrI.
Offset =
MI.getOperand(6 + OpOffset).getImm();
3271 StorePtrI.
V =
nullptr;
3285 MI.eraseFromParent();
3297 if (Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3300 assert(Def->getNumOperands() == 3 &&
3303 return Def->getOperand(1).getReg();
3309bool AMDGPUInstructionSelector::selectGlobalLoadLds(
MachineInstr &
MI)
const{
3311 unsigned Size =
MI.getOperand(3).getImm();
3317 Opc = AMDGPU::GLOBAL_LOAD_LDS_UBYTE;
3320 Opc = AMDGPU::GLOBAL_LOAD_LDS_USHORT;
3323 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORD;
3330 .
add(
MI.getOperand(2));
3336 if (!isSGPR(
Addr)) {
3338 if (isSGPR(AddrDef->Reg)) {
3339 Addr = AddrDef->Reg;
3340 }
else if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
3343 if (isSGPR(SAddr)) {
3344 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
3356 VOffset =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3368 MIB.
add(
MI.getOperand(4))
3369 .
add(
MI.getOperand(5));
3373 LoadPtrI.
Offset =
MI.getOperand(4).getImm();
3383 sizeof(int32_t),
Align(4));
3387 MI.eraseFromParent();
3391bool AMDGPUInstructionSelector::selectBVHIntrinsic(
MachineInstr &
MI)
const{
3392 MI.setDesc(TII.get(
MI.getOperand(1).getImm()));
3393 MI.removeOperand(1);
3394 MI.addImplicitDefUseOperands(*
MI.getParent()->getParent());
3398bool AMDGPUInstructionSelector::selectSMFMACIntrin(
MachineInstr &
MI)
const {
3401 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
3402 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_F16_e64;
3404 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
3405 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_F16_e64;
3407 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
3408 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_BF16_e64;
3410 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
3411 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_BF16_e64;
3413 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
3414 Opc = AMDGPU::V_SMFMAC_I32_16X16X64_I8_e64;
3416 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
3417 Opc = AMDGPU::V_SMFMAC_I32_32X32X32_I8_e64;
3419 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
3420 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_BF8_e64;
3422 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
3423 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_FP8_e64;
3425 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
3426 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_BF8_e64;
3428 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
3429 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_FP8_e64;
3431 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
3432 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_BF8_e64;
3434 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
3435 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_FP8_e64;
3437 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
3438 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_BF8_e64;
3440 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
3441 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_FP8_e64;
3447 auto VDst_In =
MI.getOperand(4);
3449 MI.setDesc(TII.get(Opc));
3450 MI.removeOperand(4);
3451 MI.removeOperand(1);
3452 MI.addOperand(VDst_In);
3453 MI.addImplicitDefUseOperands(*
MI.getParent()->getParent());
3457bool AMDGPUInstructionSelector::selectWaveAddress(
MachineInstr &
MI)
const {
3461 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3466 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_LSHRREV_B32_e64), DstReg)
3477 IsVALU ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
3481 MI.eraseFromParent();
3485bool AMDGPUInstructionSelector::selectStackRestore(
MachineInstr &
MI)
const {
3498 WaveAddr =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
3508 MI.eraseFromParent();
3514 if (!
I.isPreISelOpcode()) {
3516 return selectCOPY(
I);
3520 switch (
I.getOpcode()) {
3521 case TargetOpcode::G_AND:
3522 case TargetOpcode::G_OR:
3523 case TargetOpcode::G_XOR:
3526 return selectG_AND_OR_XOR(
I);
3527 case TargetOpcode::G_ADD:
3528 case TargetOpcode::G_SUB:
3529 case TargetOpcode::G_PTR_ADD:
3532 return selectG_ADD_SUB(
I);
3533 case TargetOpcode::G_UADDO:
3534 case TargetOpcode::G_USUBO:
3535 case TargetOpcode::G_UADDE:
3536 case TargetOpcode::G_USUBE:
3537 return selectG_UADDO_USUBO_UADDE_USUBE(
I);
3538 case AMDGPU::G_AMDGPU_MAD_U64_U32:
3539 case AMDGPU::G_AMDGPU_MAD_I64_I32:
3540 return selectG_AMDGPU_MAD_64_32(
I);
3541 case TargetOpcode::G_INTTOPTR:
3542 case TargetOpcode::G_BITCAST:
3543 case TargetOpcode::G_PTRTOINT:
3544 case TargetOpcode::G_FREEZE:
3545 return selectCOPY(
I);
3546 case TargetOpcode::G_CONSTANT:
3547 case TargetOpcode::G_FCONSTANT:
3548 return selectG_CONSTANT(
I);
3549 case TargetOpcode::G_FNEG:
3552 return selectG_FNEG(
I);
3553 case TargetOpcode::G_FABS:
3556 return selectG_FABS(
I);
3557 case TargetOpcode::G_EXTRACT:
3558 return selectG_EXTRACT(
I);
3559 case TargetOpcode::G_MERGE_VALUES:
3560 case TargetOpcode::G_CONCAT_VECTORS:
3561 return selectG_MERGE_VALUES(
I);
3562 case TargetOpcode::G_UNMERGE_VALUES:
3563 return selectG_UNMERGE_VALUES(
I);
3564 case TargetOpcode::G_BUILD_VECTOR:
3565 case TargetOpcode::G_BUILD_VECTOR_TRUNC:
3566 return selectG_BUILD_VECTOR(
I);
3567 case TargetOpcode::G_IMPLICIT_DEF:
3568 return selectG_IMPLICIT_DEF(
I);
3569 case TargetOpcode::G_INSERT:
3570 return selectG_INSERT(
I);
3571 case TargetOpcode::G_INTRINSIC:
3572 case TargetOpcode::G_INTRINSIC_CONVERGENT:
3573 return selectG_INTRINSIC(
I);
3574 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3575 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
3576 return selectG_INTRINSIC_W_SIDE_EFFECTS(
I);
3577 case TargetOpcode::G_ICMP:
3578 case TargetOpcode::G_FCMP:
3579 if (selectG_ICMP_or_FCMP(
I))
3582 case TargetOpcode::G_LOAD:
3583 case TargetOpcode::G_STORE:
3584 case TargetOpcode::G_ATOMIC_CMPXCHG:
3585 case TargetOpcode::G_ATOMICRMW_XCHG:
3586 case TargetOpcode::G_ATOMICRMW_ADD:
3587 case TargetOpcode::G_ATOMICRMW_SUB:
3588 case TargetOpcode::G_ATOMICRMW_AND:
3589 case TargetOpcode::G_ATOMICRMW_OR:
3590 case TargetOpcode::G_ATOMICRMW_XOR:
3591 case TargetOpcode::G_ATOMICRMW_MIN:
3592 case TargetOpcode::G_ATOMICRMW_MAX:
3593 case TargetOpcode::G_ATOMICRMW_UMIN:
3594 case TargetOpcode::G_ATOMICRMW_UMAX:
3595 case TargetOpcode::G_ATOMICRMW_UINC_WRAP:
3596 case TargetOpcode::G_ATOMICRMW_UDEC_WRAP:
3597 case TargetOpcode::G_ATOMICRMW_FADD:
3598 case TargetOpcode::G_ATOMICRMW_FMIN:
3599 case TargetOpcode::G_ATOMICRMW_FMAX:
3600 return selectG_LOAD_STORE_ATOMICRMW(
I);
3601 case TargetOpcode::G_SELECT:
3602 return selectG_SELECT(
I);
3603 case TargetOpcode::G_TRUNC:
3604 return selectG_TRUNC(
I);
3605 case TargetOpcode::G_SEXT:
3606 case TargetOpcode::G_ZEXT:
3607 case TargetOpcode::G_ANYEXT:
3608 case TargetOpcode::G_SEXT_INREG:
3615 return selectG_SZA_EXT(
I);
3616 case TargetOpcode::G_FPEXT:
3617 if (selectG_FPEXT(
I))
3620 case TargetOpcode::G_BRCOND:
3621 return selectG_BRCOND(
I);
3622 case TargetOpcode::G_GLOBAL_VALUE:
3623 return selectG_GLOBAL_VALUE(
I);
3624 case TargetOpcode::G_PTRMASK:
3625 return selectG_PTRMASK(
I);
3626 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3627 return selectG_EXTRACT_VECTOR_ELT(
I);
3628 case TargetOpcode::G_INSERT_VECTOR_ELT:
3629 return selectG_INSERT_VECTOR_ELT(
I);
3630 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
3631 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
3632 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
3633 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
3636 assert(
Intr &&
"not an image intrinsic with image pseudo");
3637 return selectImageIntrinsic(
I,
Intr);
3639 case AMDGPU::G_AMDGPU_INTRIN_BVH_INTERSECT_RAY:
3640 return selectBVHIntrinsic(
I);
3641 case AMDGPU::G_SBFX:
3642 case AMDGPU::G_UBFX:
3643 return selectG_SBFX_UBFX(
I);
3644 case AMDGPU::G_SI_CALL:
3645 I.setDesc(TII.get(AMDGPU::SI_CALL));
3647 case AMDGPU::G_AMDGPU_WAVE_ADDRESS:
3648 return selectWaveAddress(
I);
3649 case AMDGPU::G_STACKRESTORE:
3650 return selectStackRestore(
I);
3652 return selectPHI(
I);
3660AMDGPUInstructionSelector::selectVCSRC(
MachineOperand &Root)
const {
3667std::pair<Register, unsigned>
3668AMDGPUInstructionSelector::selectVOP3ModsImpl(
MachineOperand &Root,
3669 bool IsCanonicalizing,
3670 bool AllowAbs,
bool OpSel)
const {
3675 if (
MI->getOpcode() == AMDGPU::G_FNEG) {
3676 Src =
MI->getOperand(1).getReg();
3679 }
else if (
MI->getOpcode() == AMDGPU::G_FSUB && IsCanonicalizing) {
3684 if (LHS &&
LHS->isZero()) {
3686 Src =
MI->getOperand(2).getReg();
3690 if (AllowAbs &&
MI->getOpcode() == AMDGPU::G_FABS) {
3691 Src =
MI->getOperand(1).getReg();
3698 return std::pair(Src, Mods);
3701Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
3703 bool ForceVGPR)
const {
3704 if ((Mods != 0 || ForceVGPR) &&
3712 TII.get(AMDGPU::COPY), VGPRSrc)
3724AMDGPUInstructionSelector::selectVSRC0(
MachineOperand &Root)
const {
3731AMDGPUInstructionSelector::selectVOP3Mods0(
MachineOperand &Root)
const {
3734 std::tie(Src, Mods) = selectVOP3ModsImpl(Root);
3738 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
3747AMDGPUInstructionSelector::selectVOP3BMods0(
MachineOperand &Root)
const {
3750 std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
3756 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
3765AMDGPUInstructionSelector::selectVOP3OMods(
MachineOperand &Root)
const {
3774AMDGPUInstructionSelector::selectVOP3Mods(
MachineOperand &Root)
const {
3777 std::tie(Src, Mods) = selectVOP3ModsImpl(Root);
3781 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
3788AMDGPUInstructionSelector::selectVOP3ModsNonCanonicalizing(
3792 std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
false);
3796 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
3803AMDGPUInstructionSelector::selectVOP3BMods(
MachineOperand &Root)
const {
3806 std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
true,
3811 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
3818AMDGPUInstructionSelector::selectVOP3NoMods(
MachineOperand &Root)
const {
3821 if (
Def->getOpcode() == AMDGPU::G_FNEG ||
Def->getOpcode() == AMDGPU::G_FABS)
3828std::pair<Register, unsigned>
3829AMDGPUInstructionSelector::selectVOP3PModsImpl(
3834 if (
MI &&
MI->getOpcode() == AMDGPU::G_FNEG &&
3839 Src =
MI->getOperand(1).getReg();
3840 MI =
MRI.getVRegDef(Src);
3851 return std::pair(Src, Mods);
3855AMDGPUInstructionSelector::selectVOP3PMods(
MachineOperand &Root)
const {
3861 std::tie(Src, Mods) = selectVOP3PModsImpl(Root.
getReg(), MRI);
3870AMDGPUInstructionSelector::selectVOP3PModsDOT(
MachineOperand &Root)
const {
3876 std::tie(Src, Mods) = selectVOP3PModsImpl(Root.
getReg(), MRI,
true);
3885AMDGPUInstructionSelector::selectVOP3PModsNeg(
MachineOperand &Root)
const {
3890 "expected i1 value");
3900AMDGPUInstructionSelector::selectWMMAOpSelVOP3PMods(
3903 "expected i1 value");
3917 switch (Elts.
size()) {
3919 DstRegClass = &AMDGPU::VReg_256RegClass;
3922 DstRegClass = &AMDGPU::VReg_128RegClass;
3925 DstRegClass = &AMDGPU::VReg_64RegClass;
3932 auto MIB =
B.buildInstr(AMDGPU::REG_SEQUENCE)
3933 .addDef(
MRI.createVirtualRegister(DstRegClass));
3934 for (
unsigned i = 0; i < Elts.
size(); ++i) {
3945 if (ModOpcode == TargetOpcode::G_FNEG) {
3949 for (
auto El : Elts) {
3955 if (Elts.size() != NegAbsElts.
size()) {
3964 assert(ModOpcode == TargetOpcode::G_FABS);
3972AMDGPUInstructionSelector::selectWMMAModsF32NegAbs(
MachineOperand &Root)
const {
3977 if (
GBuildVector *BV = dyn_cast<GBuildVector>(
MRI->getVRegDef(Src))) {
3978 assert(BV->getNumSources() > 0);
3981 unsigned ModOpcode = (ElF32->
getOpcode() == AMDGPU::G_FNEG)
3984 for (
unsigned i = 0; i < BV->getNumSources(); ++i) {
3985 ElF32 =
MRI->getVRegDef(BV->getSourceReg(i));
3992 if (BV->getNumSources() == EltsF32.
size()) {
4003AMDGPUInstructionSelector::selectWMMAModsF16Neg(
MachineOperand &Root)
const {
4009 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
4017 if (CV->getNumSources() == EltsV2F16.
size()) {
4029AMDGPUInstructionSelector::selectWMMAModsF16NegAbs(
MachineOperand &Root)
const {
4035 assert(CV->getNumSources() > 0);
4038 unsigned ModOpcode = (ElV2F16->
getOpcode() == AMDGPU::G_FNEG)
4042 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
4043 ElV2F16 =
MRI->getVRegDef(CV->getSourceReg(i));
4050 if (CV->getNumSources() == EltsV2F16.
size()) {
4062AMDGPUInstructionSelector::selectWMMAVISrc(
MachineOperand &Root)
const {
4063 std::optional<FPValueAndVReg> FPValReg;
4067 MIB.
addImm(FPValReg->Value.bitcastToAPInt().getSExtValue());
4087AMDGPUInstructionSelector::selectSWMMACIndex8(
MachineOperand &Root)
const {
4093 std::optional<ValueAndVReg> ShiftAmt;
4095 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
4096 ShiftAmt->Value.getZExtValue() % 8 == 0) {
4097 Key = ShiftAmt->Value.getZExtValue() / 8;
4108AMDGPUInstructionSelector::selectSWMMACIndex16(
MachineOperand &Root)
const {
4115 std::optional<ValueAndVReg> ShiftAmt;
4117 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
4118 ShiftAmt->Value.getZExtValue() == 16) {
4130AMDGPUInstructionSelector::selectVOP3OpSelMods(
MachineOperand &Root)
const {
4133 std::tie(Src, Mods) = selectVOP3ModsImpl(Root);
4143AMDGPUInstructionSelector::selectVINTERPMods(
MachineOperand &Root)
const {
4146 std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
4154 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
4161AMDGPUInstructionSelector::selectVINTERPModsHi(
MachineOperand &Root)
const {
4164 std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
4172 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
4178bool AMDGPUInstructionSelector::selectSmrdOffset(
MachineOperand &Root,
4188 getAddrModeInfo(*
MI, *MRI, AddrInfo);
4190 if (AddrInfo.
empty())
4193 const GEPInfo &GEPI = AddrInfo[0];
4194 std::optional<int64_t> EncodedImm;
4199 if (GEPI.SgprParts.size() == 1 && GEPI.Imm != 0 && EncodedImm &&
4200 AddrInfo.
size() > 1) {
4201 const GEPInfo &GEPI2 = AddrInfo[1];
4202 if (GEPI2.SgprParts.size() == 2 && GEPI2.Imm == 0) {
4205 Base = GEPI2.SgprParts[0];
4206 *SOffset = OffsetReg;
4216 if (*
Offset + SKnown.getMinValue().getSExtValue() < 0)
4228 if (
Offset && GEPI.SgprParts.size() == 1 && EncodedImm) {
4229 Base = GEPI.SgprParts[0];
4235 if (SOffset && GEPI.SgprParts.size() == 1 && isUInt<32>(GEPI.Imm) &&
4241 Base = GEPI.SgprParts[0];
4242 *SOffset =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
4243 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), *SOffset)
4248 if (SOffset && GEPI.SgprParts.size() && GEPI.Imm == 0) {
4250 Base = GEPI.SgprParts[0];
4251 *SOffset = OffsetReg;
4260AMDGPUInstructionSelector::selectSmrdImm(
MachineOperand &Root)
const {
4263 if (!selectSmrdOffset(Root,
Base,
nullptr, &
Offset))
4264 return std::nullopt;
4271AMDGPUInstructionSelector::selectSmrdImm32(
MachineOperand &Root)
const {
4273 getAddrModeInfo(*Root.
getParent(), *MRI, AddrInfo);
4275 if (AddrInfo.
empty() || AddrInfo[0].SgprParts.size() != 1)
4276 return std::nullopt;
4278 const GEPInfo &GEPInfo = AddrInfo[0];
4279 Register PtrReg = GEPInfo.SgprParts[0];
4280 std::optional<int64_t> EncodedImm =
4283 return std::nullopt;
4292AMDGPUInstructionSelector::selectSmrdSgpr(
MachineOperand &Root)
const {
4294 if (!selectSmrdOffset(Root,
Base, &SOffset,
nullptr))
4295 return std::nullopt;
4302AMDGPUInstructionSelector::selectSmrdSgprImm(
MachineOperand &Root)
const {
4305 if (!selectSmrdOffset(Root,
Base, &SOffset, &
Offset))
4306 return std::nullopt;
4313std::pair<Register, int>
4314AMDGPUInstructionSelector::selectFlatOffsetImpl(
MachineOperand &Root,
4324 int64_t ConstOffset;
4325 std::tie(PtrBase, ConstOffset) =
4326 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
4329 !isFlatScratchBaseLegal(Root.
getReg())))
4336 return std::pair(PtrBase, ConstOffset);
4340AMDGPUInstructionSelector::selectFlatOffset(
MachineOperand &Root)
const {
4350AMDGPUInstructionSelector::selectGlobalOffset(
MachineOperand &Root)
const {
4360AMDGPUInstructionSelector::selectScratchOffset(
MachineOperand &Root)
const {
4371AMDGPUInstructionSelector::selectGlobalSAddr(
MachineOperand &Root)
const {
4374 int64_t ConstOffset;
4375 int64_t ImmOffset = 0;
4379 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(
Addr, *MRI);
4381 if (ConstOffset != 0) {
4385 ImmOffset = ConstOffset;
4388 if (isSGPR(PtrBaseDef->Reg)) {
4389 if (ConstOffset > 0) {
4395 int64_t SplitImmOffset, RemainderOffset;
4399 if (isUInt<32>(RemainderOffset)) {
4403 MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4405 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
4407 .
addImm(RemainderOffset);
4424 unsigned NumLiterals =
4428 return std::nullopt;
4435 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
4440 if (isSGPR(SAddr)) {
4441 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
4461 if (AddrDef->MI->getOpcode() == AMDGPU::G_IMPLICIT_DEF ||
4462 AddrDef->MI->getOpcode() == AMDGPU::G_CONSTANT || !isSGPR(AddrDef->Reg))
4463 return std::nullopt;
4469 Register VOffset =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4471 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32), VOffset)
4482AMDGPUInstructionSelector::selectScratchSAddr(
MachineOperand &Root)
const {
4485 int64_t ConstOffset;
4486 int64_t ImmOffset = 0;
4490 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(
Addr, *MRI);
4492 if (ConstOffset != 0 && isFlatScratchBaseLegal(
Addr) &&
4496 ImmOffset = ConstOffset;
4500 if (AddrDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
4501 int FI = AddrDef->MI->getOperand(1).getIndex();
4510 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
4511 Register LHS = AddrDef->MI->getOperand(1).getReg();
4512 Register RHS = AddrDef->MI->getOperand(2).getReg();
4516 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX &&
4517 isSGPR(RHSDef->Reg)) {
4518 int FI = LHSDef->MI->getOperand(1).getIndex();
4522 SAddr =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
4524 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_I32), SAddr)
4532 return std::nullopt;
4541bool AMDGPUInstructionSelector::checkFlatScratchSVSSwizzleBug(
4553 uint64_t VMax = VKnown.getMaxValue().getZExtValue();
4555 return (VMax & 3) + (
SMax & 3) >= 4;
4559AMDGPUInstructionSelector::selectScratchSVAddr(
MachineOperand &Root)
const {
4562 int64_t ConstOffset;
4563 int64_t ImmOffset = 0;
4567 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(
Addr, *MRI);
4570 if (ConstOffset != 0 &&
4573 ImmOffset = ConstOffset;
4577 if (AddrDef->MI->getOpcode() != AMDGPU::G_PTR_ADD)
4578 return std::nullopt;
4580 Register RHS = AddrDef->MI->getOperand(2).getReg();
4582 return std::nullopt;
4584 Register LHS = AddrDef->MI->getOperand(1).getReg();
4587 if (OrigAddr !=
Addr) {
4588 if (!isFlatScratchBaseLegalSVImm(OrigAddr))
4589 return std::nullopt;
4591 if (!isFlatScratchBaseLegalSV(OrigAddr))
4592 return std::nullopt;
4595 if (checkFlatScratchSVSSwizzleBug(RHS, LHS, ImmOffset))
4596 return std::nullopt;
4598 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
4599 int FI = LHSDef->MI->getOperand(1).getIndex();
4608 return std::nullopt;
4618AMDGPUInstructionSelector::selectMUBUFScratchOffen(
MachineOperand &Root)
const {
4627 Register HighBits =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4632 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
4656 std::optional<int> FI;
4660 int64_t ConstOffset;
4661 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(VAddr, *MRI);
4662 if (ConstOffset != 0) {
4667 if (PtrBaseDef->
getOpcode() == AMDGPU::G_FRAME_INDEX)
4673 }
else if (RootDef->getOpcode() == AMDGPU::G_FRAME_INDEX) {
4674 FI = RootDef->getOperand(1).getIndex();
4697bool AMDGPUInstructionSelector::isDSOffsetLegal(
Register Base,
4710bool AMDGPUInstructionSelector::isDSOffset2Legal(
Register Base, int64_t Offset0,
4712 unsigned Size)
const {
4713 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
4715 if (!isUInt<8>(Offset0 /
Size) || !isUInt<8>(Offset1 /
Size))
4728 return Addr->getOpcode() == TargetOpcode::G_OR ||
4729 (
Addr->getOpcode() == TargetOpcode::G_PTR_ADD &&
4736bool AMDGPUInstructionSelector::isFlatScratchBaseLegal(
Register Addr)
const {
4750 if (AddrMI->
getOpcode() == TargetOpcode::G_PTR_ADD) {
4751 std::optional<ValueAndVReg> RhsValReg =
4757 if (RhsValReg && RhsValReg->Value.getSExtValue() < 0 &&
4758 RhsValReg->Value.getSExtValue() > -0x40000000)
4767bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSV(
Register Addr)
const {
4785bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSVImm(
4794 std::optional<DefinitionAndSourceRegister> BaseDef =
4796 std::optional<ValueAndVReg> RHSOffset =
4806 (RHSOffset->Value.getSExtValue() < 0 &&
4807 RHSOffset->Value.getSExtValue() > -0x40000000)))
4810 Register LHS = BaseDef->MI->getOperand(1).getReg();
4811 Register RHS = BaseDef->MI->getOperand(2).getReg();
4815bool AMDGPUInstructionSelector::isUnneededShiftMask(
const MachineInstr &
MI,
4816 unsigned ShAmtBits)
const {
4817 assert(
MI.getOpcode() == TargetOpcode::G_AND);
4819 std::optional<APInt>
RHS =
4824 if (
RHS->countr_one() >= ShAmtBits)
4828 return (LHSKnownZeros | *RHS).
countr_one() >= ShAmtBits;
4832AMDGPUInstructionSelector::selectMUBUFScratchOffset(
4837 std::optional<DefinitionAndSourceRegister>
Def =
4839 assert(Def &&
"this shouldn't be an optional result");
4894std::pair<Register, unsigned>
4895AMDGPUInstructionSelector::selectDS1Addr1OffsetImpl(
MachineOperand &Root)
const {
4898 return std::pair(Root.
getReg(), 0);
4900 int64_t ConstAddr = 0;
4904 std::tie(PtrBase,
Offset) =
4905 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
4908 if (isDSOffsetLegal(PtrBase,
Offset)) {
4910 return std::pair(PtrBase,
Offset);
4912 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
4921 return std::pair(Root.
getReg(), 0);
4925AMDGPUInstructionSelector::selectDS1Addr1Offset(
MachineOperand &Root)
const {
4928 std::tie(Reg,
Offset) = selectDS1Addr1OffsetImpl(Root);
4936AMDGPUInstructionSelector::selectDS64Bit4ByteAligned(
MachineOperand &Root)
const {
4937 return selectDSReadWrite2(Root, 4);
4941AMDGPUInstructionSelector::selectDS128Bit8ByteAligned(
MachineOperand &Root)
const {
4942 return selectDSReadWrite2(Root, 8);
4946AMDGPUInstructionSelector::selectDSReadWrite2(
MachineOperand &Root,
4947 unsigned Size)
const {
4950 std::tie(Reg,
Offset) = selectDSReadWrite2Impl(Root,
Size);
4958std::pair<Register, unsigned>
4959AMDGPUInstructionSelector::selectDSReadWrite2Impl(
MachineOperand &Root,
4960 unsigned Size)
const {
4963 return std::pair(Root.
getReg(), 0);
4965 int64_t ConstAddr = 0;
4969 std::tie(PtrBase,
Offset) =
4970 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
4973 int64_t OffsetValue0 =
Offset;
4975 if (isDSOffset2Legal(PtrBase, OffsetValue0, OffsetValue1,
Size)) {
4977 return std::pair(PtrBase, OffsetValue0 /
Size);
4979 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
4987 return std::pair(Root.
getReg(), 0);
4994std::pair<Register, int64_t>
4995AMDGPUInstructionSelector::getPtrBaseWithConstantOffset(
4998 if (RootI->
getOpcode() != TargetOpcode::G_PTR_ADD)
5002 std::optional<ValueAndVReg> MaybeOffset =
5018 Register RSrc2 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5019 Register RSrc3 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5020 Register RSrcHi =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
5021 Register RSrc =
MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
5023 B.buildInstr(AMDGPU::S_MOV_B32)
5026 B.buildInstr(AMDGPU::S_MOV_B32)
5033 B.buildInstr(AMDGPU::REG_SEQUENCE)
5036 .addImm(AMDGPU::sub0)
5038 .addImm(AMDGPU::sub1);
5042 RSrcLo =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
5043 B.buildInstr(AMDGPU::S_MOV_B64)
5048 B.buildInstr(AMDGPU::REG_SEQUENCE)
5051 .addImm(AMDGPU::sub0_sub1)
5053 .addImm(AMDGPU::sub2_sub3);
5060 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
5069 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
5076AMDGPUInstructionSelector::MUBUFAddressData
5077AMDGPUInstructionSelector::parseMUBUFAddress(
Register Src)
const {
5078 MUBUFAddressData
Data;
5084 std::tie(PtrBase,
Offset) = getPtrBaseWithConstantOffset(Src, *MRI);
5085 if (isUInt<32>(
Offset)) {
5092 Data.N2 = InputAdd->getOperand(1).getReg();
5093 Data.N3 = InputAdd->getOperand(2).getReg();
5108bool AMDGPUInstructionSelector::shouldUseAddr64(MUBUFAddressData
Addr)
const {
5115 return N0Bank->
getID() == AMDGPU::VGPRRegBankID;
5121void AMDGPUInstructionSelector::splitIllegalMUBUFOffset(
5127 SOffset =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5128 B.buildInstr(AMDGPU::S_MOV_B32)
5134bool AMDGPUInstructionSelector::selectMUBUFAddr64Impl(
5142 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
5143 if (!shouldUseAddr64(AddrData))
5149 Offset = AddrData.Offset;
5155 if (RBI.
getRegBank(N2, *MRI, TRI)->
getID() == AMDGPU::VGPRRegBankID) {
5157 if (RBI.
getRegBank(N3, *MRI, TRI)->
getID() == AMDGPU::VGPRRegBankID) {
5170 }
else if (RBI.
getRegBank(N0, *MRI, TRI)->
getID() == AMDGPU::VGPRRegBankID) {
5181 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
5185bool AMDGPUInstructionSelector::selectMUBUFOffsetImpl(
5193 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
5194 if (shouldUseAddr64(AddrData))
5200 Offset = AddrData.Offset;
5206 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
5211AMDGPUInstructionSelector::selectMUBUFAddr64(
MachineOperand &Root)
const {
5217 if (!selectMUBUFAddr64Impl(Root, VAddr, RSrcReg, SOffset,
Offset))
5233 MIB.
addReg(AMDGPU::SGPR_NULL);
5247AMDGPUInstructionSelector::selectMUBUFOffset(
MachineOperand &Root)
const {
5252 if (!selectMUBUFOffsetImpl(Root, RSrcReg, SOffset,
Offset))
5263 MIB.
addReg(AMDGPU::SGPR_NULL);
5275AMDGPUInstructionSelector::selectBUFSOffset(
MachineOperand &Root)
const {
5280 SOffset = AMDGPU::SGPR_NULL;
5286static std::optional<uint64_t>
5290 if (!OffsetVal || !isInt<32>(*OffsetVal))
5291 return std::nullopt;
5292 return Lo_32(*OffsetVal);
5296AMDGPUInstructionSelector::selectSMRDBufferImm(
MachineOperand &Root)
const {
5301 std::optional<int64_t> EncodedImm =
5310AMDGPUInstructionSelector::selectSMRDBufferImm32(
MachineOperand &Root)
const {
5317 std::optional<int64_t> EncodedImm =
5326AMDGPUInstructionSelector::selectSMRDBufferSgprImm(
MachineOperand &Root)
const {
5334 return std::nullopt;
5336 std::optional<int64_t> EncodedOffset =
5339 return std::nullopt;
5349 if (
MI->getOpcode() == AMDGPU::G_BITCAST)
5360 if (Inst->
getOpcode() != AMDGPU::G_TRUNC)
5368 if (TruncOp->
getOpcode() == AMDGPU::G_LSHR) {
5371 if (SrlAmount && SrlAmount->Value.getZExtValue() == 16) {
5382 if (TruncOp->
getOpcode() == AMDGPU::G_SHUFFLE_VECTOR) {
5387 assert(Mask.size() == 2);
5389 if (Mask[0] == 1 && Mask[1] <= 1) {
5399std::pair<Register, unsigned>
5400AMDGPUInstructionSelector::selectVOP3PMadMixModsImpl(
MachineOperand &Root,
5401 bool &Matched)
const {
5406 std::tie(Src, Mods) = selectVOP3ModsImpl(Root);
5409 if (
MI->getOpcode() == AMDGPU::G_FPEXT) {
5418 if (
MI->getOpcode() == AMDGPU::G_BITCAST) {
5419 MO = &
MI->getOperand(1);
5424 const auto CheckAbsNeg = [&]() {
5429 std::tie(Src, ModsTmp) = selectVOP3ModsImpl(*MO);
5451 MI = ExtractHiEltMI;
5452 MO = &
MI->getOperand(0);
5465AMDGPUInstructionSelector::selectVOP3PMadMixModsExt(
5470 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
5481AMDGPUInstructionSelector::selectVOP3PMadMixMods(
MachineOperand &Root)
const {
5485 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
5493bool AMDGPUInstructionSelector::selectSBarrierSignalIsfirst(
5497 Register CCReg =
I.getOperand(0).getReg();
5499 bool HasM0 = IntrID == Intrinsic::amdgcn_s_barrier_signal_isfirst_var;
5502 auto CopyMIB =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
5503 .
addReg(
I.getOperand(2).getReg());
5504 BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_ISFIRST_M0));
5508 BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM))
5509 .
addImm(
I.getOperand(2).getImm());
5514 I.eraseFromParent();
5520 if (HasInlineConst) {
5524 case Intrinsic::amdgcn_s_barrier_init:
5525 return AMDGPU::S_BARRIER_INIT_IMM;
5526 case Intrinsic::amdgcn_s_barrier_join:
5527 return AMDGPU::S_BARRIER_JOIN_IMM;
5528 case Intrinsic::amdgcn_s_wakeup_barrier:
5529 return AMDGPU::S_WAKEUP_BARRIER_IMM;
5530 case Intrinsic::amdgcn_s_get_barrier_state:
5531 return AMDGPU::S_GET_BARRIER_STATE_IMM;
5537 case Intrinsic::amdgcn_s_barrier_init:
5538 return AMDGPU::S_BARRIER_INIT_M0;
5539 case Intrinsic::amdgcn_s_barrier_join:
5540 return AMDGPU::S_BARRIER_JOIN_M0;
5541 case Intrinsic::amdgcn_s_wakeup_barrier:
5542 return AMDGPU::S_WAKEUP_BARRIER_M0;
5543 case Intrinsic::amdgcn_s_get_barrier_state:
5544 return AMDGPU::S_GET_BARRIER_STATE_M0;
5549bool AMDGPUInstructionSelector::selectNamedBarrierInst(
5553 MachineOperand BarOp = IntrID == Intrinsic::amdgcn_s_get_barrier_state
5556 std::optional<int64_t> BarValImm =
5562 if (IntrID == Intrinsic::amdgcn_s_barrier_init) {
5563 Register MemberCount =
I.getOperand(2).getReg();
5564 TmpReg0 =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5575 if (IntrID == Intrinsic::amdgcn_s_barrier_init) {
5579 Register TmpReg1 =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5600 if (IntrID == Intrinsic::amdgcn_s_get_barrier_state)
5601 MIB.
addDef(
I.getOperand(0).getReg());
5606 I.eraseFromParent();
5610bool AMDGPUInstructionSelector::selectSBarrierLeave(
MachineInstr &
I)
const {
5613 Register CCReg =
I.getOperand(0).getReg();
5615 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_BARRIER_LEAVE));
5618 I.eraseFromParent();
5626 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5627 "Expected G_CONSTANT");
5628 MIB.
addImm(
MI.getOperand(1).getCImm()->getSExtValue());
5634 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5635 "Expected G_CONSTANT");
5636 MIB.
addImm(-
MI.getOperand(1).getCImm()->getSExtValue());
5645 if (
MI.getOpcode() == TargetOpcode::G_FCONSTANT)
5646 MIB.
addImm(
Op.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
5648 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
"Expected G_CONSTANT");
5649 MIB.
addImm(
Op.getCImm()->getSExtValue());
5656 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5657 "Expected G_CONSTANT");
5658 MIB.
addImm(
MI.getOperand(1).getCImm()->getValue().popcount());
5666 MIB.
addImm(
MI.getOperand(OpIdx).getImm());
5672 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
5679 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
5680 MIB.
addImm(
MI.getOperand(OpIdx).getImm() &
5688 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
5689 const bool Swizzle =
MI.getOperand(OpIdx).getImm() &
5695void AMDGPUInstructionSelector::renderExtractCpolSetGLC(
5697 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
5698 const uint32_t Cpol =
MI.getOperand(OpIdx).getImm() &
5713 const APFloat &APF =
MI.getOperand(1).getFPImm()->getValueAPF();
5715 assert(ExpVal != INT_MIN);
5719bool AMDGPUInstructionSelector::isInlineImmediate(
const APInt &Imm)
const {
5723bool AMDGPUInstructionSelector::isInlineImmediate(
const APFloat &Imm)
const {
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
static unsigned getIntrinsicID(const SDNode *N)
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static bool isNoUnsignedWrap(MachineInstr *Addr)
static Register buildOffsetSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
unsigned getNamedBarrierOp(bool HasInlineConst, Intrinsic::ID IntrID)
#define GET_GLOBALISEL_PREDICATES_INIT
#define GET_GLOBALISEL_TEMPORARIES_INIT
static Register getWaveAddress(const MachineInstr *Def)
static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)
static bool shouldUseAndMask(unsigned Size, unsigned &Mask)
static bool parseTexFail(uint64_t TexFailCtrl, bool &TFE, bool &LWE, bool &IsTexFail)
static std::pair< Register, unsigned > computeIndirectRegIndex(MachineRegisterInfo &MRI, const SIRegisterInfo &TRI, const TargetRegisterClass *SuperRC, Register IdxReg, unsigned EltSize, GISelKnownBits &KnownBits)
Return the register to use for the index value, and the subregister to use for the indirectly accesse...
static void addZeroImm(MachineInstrBuilder &MIB)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
static bool isConstant(const MachineInstr &MI)
static int sizeToSubRegIndex(unsigned Size)
static Register buildRegSequence(SmallVectorImpl< Register > &Elts, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
static Register buildRSRC(MachineIRBuilder &B, MachineRegisterInfo &MRI, uint32_t FormatLo, uint32_t FormatHi, Register BasePtr)
Return a resource descriptor for use with an arbitrary 64-bit pointer.
static Register matchZeroExtendFromS32(MachineRegisterInfo &MRI, Register Reg)
Match a zero extend from a 32-bit value to 64-bits.
static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64)
static std::optional< uint64_t > getConstantZext32Val(Register Reg, const MachineRegisterInfo &MRI)
Get an immediate that must be 32-bits, and treated as zero extended.
static MachineInstr * stripBitCast(MachineInstr *MI, MachineRegisterInfo &MRI)
static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size, const GCNSubtarget &ST)
static bool isVCmpResult(Register Reg, MachineRegisterInfo &MRI)
static Register buildAddr64RSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, SmallVectorImpl< Register > &Elts, Register &Src, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
This file declares the targeting of the InstructionSelector class for AMDGPU.
amdgpu AMDGPU Register Bank Select
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
static Error getAddrSpace(StringRef R, unsigned &AddrSpace)
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
const char LLVMTargetMachineRef TM
static std::vector< std::pair< int, unsigned > > Swizzle(std::vector< std::pair< int, unsigned > > Src, R600InstrInfo::BankSwizzle Swz)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
AMDGPUInstructionSelector(const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI, const AMDGPUTargetMachine &TM)
static const char * getName()
bool select(MachineInstr &I) override
Select the (possibly generic) instruction I to only use target-specific opcodes.
void setupMF(MachineFunction &MF, GISelKnownBits *KB, CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) override
Setup per-MF executor state.
uint32_t getLDSSize() const
bool isEntryFunction() const
const RegisterBank & getRegBankFromRegClass(const TargetRegisterClass &RC, LLT) const override
Get a register bank that covers RC.
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
unsigned getWavefrontSizeLog2() const
unsigned getWavefrontSize() const
bool hasInv2PiInlineImm() const
static int64_t getNullPointerValue(unsigned AddrSpace)
Get the integer value of a null pointer in the given address space.
LLVM_READONLY int getExactLog2Abs() const
APInt bitcastToAPInt() const
Class for arbitrary precision integers.
APInt zext(unsigned width) const
Zero extend to a new width.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
unsigned countr_one() const
Count the number of trailing one bits.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
bool isFPPredicate() const
bool isIntPredicate() const
ConstantFP - Floating Point Values [float, double].
const APFloat & getValueAPF() const
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
This class represents an Operation in the Expression.
Diagnostic information for unsupported feature in backend.
Represents a G_BUILD_VECTOR.
bool useVGPRIndexMode() const
bool hasScalarCompareEq64() const
int getLDSBankCount() const
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
bool unsafeDSOffsetFoldingEnabled() const
bool hasFlatInstOffsets() const
bool hasCompressedExport() const
Return true if the target's EXP instruction has the COMPR flag, which affects the meaning of the EN (...
bool hasGFX90AInsts() const
unsigned getConstantBusLimit(unsigned Opcode) const
bool hasMADIntraFwdBug() const
bool privateMemoryResourceIsRangeChecked() const
bool hasSignedScratchOffsets() const
bool hasRestrictedSOffset() const
const SITargetLowering * getTargetLowering() const override
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
bool hasSPackHL() const
Return true if the target has the S_PACK_HL_B32_B16 instruction.
bool hasFlatScratchSVSSwizzleBug() const
bool useFlatForGlobal() const
Generation getGeneration() const
bool hasSplitBarriers() const
bool hasUnpackedD16VMem() const
bool hasGWSSemaReleaseAll() const
bool hasAddNoCarry() const
bool hasSALUFloatInsts() const
bool hasPartialNSAEncoding() const
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
Represents a G_CONCAT_VECTORS.
std::optional< SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > > ComplexRendererFns
virtual void setupMF(MachineFunction &mf, GISelKnownBits *kb, CodeGenCoverage *covinfo=nullptr, ProfileSummaryInfo *psi=nullptr, BlockFrequencyInfo *bfi=nullptr)
Setup per-MF executor state.
CodeGenCoverage * CoverageInfo
APInt getKnownOnes(Register R)
KnownBits getKnownBits(Register R)
bool signBitIsZero(Register Op)
APInt getKnownZeroes(Register R)
Module * getParent()
Get the module that this global value is contained inside of...
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr bool isVector() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Describe properties that are true of each instruction in the target description file.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Helper class to build MachineInstr.
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & setOperandDead(unsigned OpIdx) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
unsigned getNumOperands() const
Retuns the total number of operands.
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
unsigned getAddrSpace() const
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const ConstantInt * getCImm() const
bool isCImm() const
isCImm - Test if this is a MO_CImmediate operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
ArrayRef< int > getShuffleMask() const
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
bool isInternalRead() const
const ConstantFP * getFPImm() const
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
bool isFPImm() const
isFPImm - Tests if this is a MO_FPImmediate operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
A Module instance is used to store all the information related to an LLVM module.
T get() const
Returns the value of the specified pointer type.
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwise returns null.
Analysis providing profile information.
static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
TypeSize getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
bool isLegalMUBUFImmOffset(unsigned Imm) const
bool isInlineConstant(const APInt &Imm) const
const MCInstrDesc & getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, bool IsSGPR) const
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
const MCInstrDesc & getIndirectGPRIDXPseudo(unsigned VecSize, bool IsIndirectSrc) const
std::pair< int64_t, int64_t > splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, uint64_t FlatVariant) const
Split COffsetVal into {immediate offset field, remainder offset} values.
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, uint64_t FlatVariant) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction.
void enforceOperandRCAlignment(MachineInstr &MI, unsigned OpName) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
MCRegister getReturnAddressReg(const MachineFunction &MF) const
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
const TargetRegisterClass * getRegClassForSizeOnBank(unsigned Size, const RegisterBank &Bank) const
const TargetRegisterClass * getConstrainedRegClassForOperand(const MachineOperand &MO, const MachineRegisterInfo &MRI) const override
const TargetRegisterClass * getRegClassForTypeOnBank(LLT Ty, const RegisterBank &Bank) const
const TargetRegisterClass * getBoolRC() const
const TargetRegisterClass * getWaveMaskRegClass() const
static bool isSGPRClass(const TargetRegisterClass *RC)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
const Triple & getTargetTriple() const
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
OSType getOS() const
Get the parsed operating system type of this triple.
static IntegerType * getInt32Ty(LLVMContext &C)
LLVM Value Representation.
Value(Type *Ty, unsigned scid)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
LLVM_READONLY const MIMGG16MappingInfo * getMIMGG16MappingInfo(unsigned G)
LLVM_READONLY int getGlobalSaddrOp(uint16_t Opcode)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfo(unsigned DimEnum)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
Intrinsic::ID getIntrinsicID(const MachineInstr &I)
Return the intrinsic ID for opcodes with the G_AMDGPU_INTRIN_ prefix.
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg, GISelKnownBits *KnownBits=nullptr, bool CheckNUW=false)
Returns base register and constant offset.
IndexMode
ARM Index Modes.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
operand_type_match m_Reg()
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
UnaryOp_match< SrcTy, TargetOpcode::COPY > m_Copy(SrcTy &&Src)
SpecificConstantMatch m_SpecificICst(int64_t RequestedValue)
Matches a constant equal to RequestedValue.
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
UnaryOp_match< SrcTy, TargetOpcode::G_FABS > m_GFabs(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
OneUse_match< T > m_OneUse(const T &SubPattern)
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
int popcount(T Value) noexcept
Count the number of set bits in a value.
const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
unsigned getUndefRegState(bool B)
@ SMax
Signed integer max implemented in terms of select(cmp()).
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
@ Default
The result values are uniform if and only if all operands are uniform.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
static KnownBits computeForAddSub(bool Add, bool NSW, bool NUW, const KnownBits &LHS, const KnownBits &RHS)
Compute known bits resulting from adding LHS and RHS.
This class contains a discriminated union of information about pointers in memory operands,...
int64_t Offset
Offset - This is an offset from the base Value*.
PointerUnion< const Value *, const PseudoSourceValue * > V
This is the IR pointer value for the access, or it is null if unknown.