29#include "llvm/IR/IntrinsicsAMDGPU.h"
32#define DEBUG_TYPE "amdgpu-isel"
35using namespace MIPatternMatch;
37#define GET_GLOBALISEL_IMPL
38#define AMDGPUSubtarget GCNSubtarget
39#include "AMDGPUGenGlobalISel.inc"
40#undef GET_GLOBALISEL_IMPL
46 :
TII(*STI.getInstrInfo()),
TRI(*STI.getRegisterInfo()), RBI(RBI),
TM(
TM),
50#include
"AMDGPUGenGlobalISel.inc"
53#include
"AMDGPUGenGlobalISel.inc"
72 return Def->getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS
73 ? Def->getOperand(1).getReg()
77bool AMDGPUInstructionSelector::isVCC(
Register Reg,
83 auto &RegClassOrBank =
MRI.getRegClassOrRegBank(Reg);
87 const LLT Ty =
MRI.getType(Reg);
91 return MRI.getVRegDef(Reg)->getOpcode() != AMDGPU::G_TRUNC &&
96 return RB->
getID() == AMDGPU::VCCRegBankID;
99bool AMDGPUInstructionSelector::constrainCopyLikeIntrin(
MachineInstr &
MI,
100 unsigned NewOpc)
const {
101 MI.setDesc(TII.get(NewOpc));
116 if (!DstRC || DstRC != SrcRC)
123bool AMDGPUInstructionSelector::selectCOPY(
MachineInstr &
I)
const {
126 I.setDesc(TII.get(TargetOpcode::COPY));
133 if (isVCC(DstReg, *MRI)) {
134 if (SrcReg == AMDGPU::SCC) {
142 if (!isVCC(SrcReg, *MRI)) {
150 std::optional<ValueAndVReg> ConstVal =
154 STI.
isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
156 .
addImm(ConstVal->Value.getBoolValue() ? -1 : 0);
158 Register MaskedReg =
MRI->createVirtualRegister(SrcRC);
166 IsSGPR ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32;
171 And.setOperandDead(3);
173 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
178 if (!
MRI->getRegClassOrNull(SrcReg))
179 MRI->setRegClass(SrcReg, SrcRC);
193 if (MO.getReg().isPhysical())
205bool AMDGPUInstructionSelector::selectPHI(
MachineInstr &
I)
const {
206 const Register DefReg =
I.getOperand(0).getReg();
207 const LLT DefTy =
MRI->getType(DefReg);
219 MRI->getRegClassOrRegBank(DefReg);
238 I.setDesc(TII.get(TargetOpcode::PHI));
245 unsigned SubIdx)
const {
249 Register DstReg =
MRI->createVirtualRegister(&SubRC);
252 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.
getSubReg(), SubIdx);
254 BuildMI(*BB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
255 .
addReg(Reg, 0, ComposedSubIdx);
280 return Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
282 return Is64 ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32;
284 return Is64 ? AMDGPU::S_XOR_B64 : AMDGPU::S_XOR_B32;
290bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(
MachineInstr &
I)
const {
291 Register DstReg =
I.getOperand(0).getReg();
295 if (DstRB->
getID() != AMDGPU::SGPRRegBankID &&
296 DstRB->
getID() != AMDGPU::VCCRegBankID)
299 bool Is64 =
Size > 32 || (DstRB->
getID() == AMDGPU::VCCRegBankID &&
311bool AMDGPUInstructionSelector::selectG_ADD_SUB(
MachineInstr &
I)
const {
314 Register DstReg =
I.getOperand(0).getReg();
316 LLT Ty =
MRI->getType(DstReg);
322 const bool IsSALU = DstRB->
getID() == AMDGPU::SGPRRegBankID;
323 const bool Sub =
I.getOpcode() == TargetOpcode::G_SUB;
327 const unsigned Opc = Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
330 .
add(
I.getOperand(1))
331 .
add(
I.getOperand(2))
338 const unsigned Opc = Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64;
339 I.setDesc(TII.get(Opc));
345 const unsigned Opc = Sub ? AMDGPU::V_SUB_CO_U32_e64 : AMDGPU::V_ADD_CO_U32_e64;
351 .
add(
I.getOperand(1))
352 .
add(
I.getOperand(2))
358 assert(!Sub &&
"illegal sub should not reach here");
361 = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass;
363 = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass;
365 MachineOperand Lo1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub0));
366 MachineOperand Lo2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub0));
367 MachineOperand Hi1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub1));
368 MachineOperand Hi2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub1));
370 Register DstLo =
MRI->createVirtualRegister(&HalfRC);
371 Register DstHi =
MRI->createVirtualRegister(&HalfRC);
374 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
377 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
383 Register CarryReg =
MRI->createVirtualRegister(CarryRC);
384 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_ADD_CO_U32_e64), DstLo)
400 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
414bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE(
419 Register Dst0Reg =
I.getOperand(0).getReg();
420 Register Dst1Reg =
I.getOperand(1).getReg();
421 const bool IsAdd =
I.getOpcode() == AMDGPU::G_UADDO ||
422 I.getOpcode() == AMDGPU::G_UADDE;
423 const bool HasCarryIn =
I.getOpcode() == AMDGPU::G_UADDE ||
424 I.getOpcode() == AMDGPU::G_USUBE;
426 if (isVCC(Dst1Reg, *MRI)) {
427 unsigned NoCarryOpc =
428 IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
429 unsigned CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
430 I.setDesc(TII.get(HasCarryIn ? CarryOpc : NoCarryOpc));
436 Register Src0Reg =
I.getOperand(2).getReg();
437 Register Src1Reg =
I.getOperand(3).getReg();
440 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
441 .
addReg(
I.getOperand(4).getReg());
444 unsigned NoCarryOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
445 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
447 auto CarryInst =
BuildMI(*BB, &
I,
DL, TII.get(HasCarryIn ? CarryOpc : NoCarryOpc), Dst0Reg)
448 .
add(
I.getOperand(2))
449 .
add(
I.getOperand(3));
451 if (
MRI->use_nodbg_empty(Dst1Reg)) {
454 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), Dst1Reg)
456 if (!
MRI->getRegClassOrNull(Dst1Reg))
457 MRI->setRegClass(Dst1Reg, &AMDGPU::SReg_32RegClass);
467 AMDGPU::SReg_32RegClass, *MRI))
474bool AMDGPUInstructionSelector::selectG_AMDGPU_MAD_64_32(
478 const bool IsUnsigned =
I.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;
482 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_gfx11_e64
483 : AMDGPU::V_MAD_I64_I32_gfx11_e64;
485 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_e64 : AMDGPU::V_MAD_I64_I32_e64;
486 I.setDesc(TII.get(Opc));
488 I.addImplicitDefUseOperands(*
MF);
493bool AMDGPUInstructionSelector::selectG_EXTRACT(
MachineInstr &
I)
const {
495 Register DstReg =
I.getOperand(0).getReg();
496 Register SrcReg =
I.getOperand(1).getReg();
497 LLT DstTy =
MRI->getType(DstReg);
498 LLT SrcTy =
MRI->getType(SrcReg);
503 unsigned Offset =
I.getOperand(2).getImm();
504 if (
Offset % 32 != 0 || DstSize > 128)
524 SrcRC = TRI.getSubClassWithSubReg(SrcRC,
SubReg);
529 *SrcRC,
I.getOperand(1));
531 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::COPY), DstReg)
538bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(
MachineInstr &
MI)
const {
541 LLT DstTy =
MRI->getType(DstReg);
542 LLT SrcTy =
MRI->getType(
MI.getOperand(1).getReg());
558 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg);
559 for (
int I = 0, E =
MI.getNumOperands() - 1;
I != E; ++
I) {
573 MI.eraseFromParent();
577bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(
MachineInstr &
MI)
const {
579 const int NumDst =
MI.getNumOperands() - 1;
585 LLT DstTy =
MRI->getType(DstReg0);
586 LLT SrcTy =
MRI->getType(SrcReg);
602 for (
int I = 0, E = NumDst;
I != E; ++
I) {
604 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::COPY), Dst.getReg())
605 .
addReg(SrcReg, 0, SubRegs[
I]);
608 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegs[
I]);
618 MI.eraseFromParent();
622bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR(
MachineInstr &
MI)
const {
623 assert(
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC ||
624 MI.getOpcode() == AMDGPU::G_BUILD_VECTOR);
628 LLT SrcTy =
MRI->getType(Src0);
632 if (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR && SrcSize >= 32) {
633 return selectG_MERGE_VALUES(
MI);
640 (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC &&
645 if (DstBank->
getID() == AMDGPU::AGPRRegBankID)
648 assert(DstBank->
getID() == AMDGPU::SGPRRegBankID ||
649 DstBank->
getID() == AMDGPU::VGPRRegBankID);
650 const bool IsVector = DstBank->
getID() == AMDGPU::VGPRRegBankID;
663 const int64_t K0 = ConstSrc0->Value.getSExtValue();
664 const int64_t K1 = ConstSrc1->Value.getSExtValue();
672 MI.eraseFromParent();
678 MI.eraseFromParent();
690 if (Src1Def->
getOpcode() == AMDGPU::G_IMPLICIT_DEF) {
691 MI.setDesc(TII.get(AMDGPU::COPY));
694 IsVector ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
701 Register TmpReg =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
702 auto MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_AND_B32_e32), TmpReg)
708 MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_LSHL_OR_B32_e64), Dst)
715 MI.eraseFromParent();
740 unsigned Opc = AMDGPU::S_PACK_LL_B32_B16;
741 if (Shift0 && Shift1) {
742 Opc = AMDGPU::S_PACK_HH_B32_B16;
743 MI.getOperand(1).setReg(ShiftSrc0);
744 MI.getOperand(2).setReg(ShiftSrc1);
746 Opc = AMDGPU::S_PACK_LH_B32_B16;
747 MI.getOperand(2).setReg(ShiftSrc1);
751 if (ConstSrc1 && ConstSrc1->Value == 0) {
753 auto MIB =
BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_LSHR_B32), Dst)
758 MI.eraseFromParent();
762 Opc = AMDGPU::S_PACK_HL_B32_B16;
763 MI.getOperand(1).setReg(ShiftSrc0);
767 MI.setDesc(TII.get(Opc));
771bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(
MachineInstr &
I)
const {
777 if ((!RC && !
MRI->getRegBankOrNull(MO.
getReg())) ||
779 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
786bool AMDGPUInstructionSelector::selectG_INSERT(
MachineInstr &
I)
const {
789 Register DstReg =
I.getOperand(0).getReg();
790 Register Src0Reg =
I.getOperand(1).getReg();
791 Register Src1Reg =
I.getOperand(2).getReg();
792 LLT Src1Ty =
MRI->getType(Src1Reg);
794 unsigned DstSize =
MRI->getType(DstReg).getSizeInBits();
797 int64_t
Offset =
I.getOperand(3).getImm();
800 if (
Offset % 32 != 0 || InsSize % 32 != 0)
808 if (
SubReg == AMDGPU::NoSubRegister)
826 Src0RC = TRI.getSubClassWithSubReg(Src0RC,
SubReg);
827 if (!Src0RC || !Src1RC)
836 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::INSERT_SUBREG), DstReg)
845bool AMDGPUInstructionSelector::selectG_SBFX_UBFX(
MachineInstr &
MI)
const {
848 Register OffsetReg =
MI.getOperand(2).getReg();
849 Register WidthReg =
MI.getOperand(3).getReg();
852 "scalar BFX instructions are expanded in regbankselect");
853 assert(
MRI->getType(
MI.getOperand(0).getReg()).getSizeInBits() == 32 &&
854 "64-bit vector BFX instructions are expanded in regbankselect");
859 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SBFX;
860 unsigned Opc = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
865 MI.eraseFromParent();
869bool AMDGPUInstructionSelector::selectInterpP1F16(
MachineInstr &
MI)
const {
888 Register InterpMov =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
894 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_INTERP_MOV_F32), InterpMov)
910 MI.eraseFromParent();
919bool AMDGPUInstructionSelector::selectWritelane(
MachineInstr &
MI)
const {
928 Register LaneSelect =
MI.getOperand(3).getReg();
931 auto MIB =
BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_WRITELANE_B32), VDst);
933 std::optional<ValueAndVReg> ConstSelect =
939 MIB.
addImm(ConstSelect->Value.getSExtValue() &
942 std::optional<ValueAndVReg> ConstVal =
949 MIB.
addImm(ConstVal->Value.getSExtValue());
959 BuildMI(*
MBB, *MIB,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
967 MI.eraseFromParent();
973bool AMDGPUInstructionSelector::selectDivScale(
MachineInstr &
MI)
const {
977 LLT Ty =
MRI->getType(Dst0);
980 Opc = AMDGPU::V_DIV_SCALE_F32_e64;
982 Opc = AMDGPU::V_DIV_SCALE_F64_e64;
993 unsigned ChooseDenom =
MI.getOperand(5).getImm();
995 Register Src0 = ChooseDenom != 0 ? Numer : Denom;
1008 MI.eraseFromParent();
1012bool AMDGPUInstructionSelector::selectG_INTRINSIC(
MachineInstr &
I)
const {
1013 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(
I).getIntrinsicID();
1014 switch (IntrinsicID) {
1015 case Intrinsic::amdgcn_if_break: {
1020 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))
1021 .
add(
I.getOperand(0))
1022 .
add(
I.getOperand(2))
1023 .
add(
I.getOperand(3));
1025 Register DstReg =
I.getOperand(0).getReg();
1026 Register Src0Reg =
I.getOperand(2).getReg();
1027 Register Src1Reg =
I.getOperand(3).getReg();
1029 I.eraseFromParent();
1031 for (
Register Reg : { DstReg, Src0Reg, Src1Reg })
1036 case Intrinsic::amdgcn_interp_p1_f16:
1037 return selectInterpP1F16(
I);
1038 case Intrinsic::amdgcn_wqm:
1039 return constrainCopyLikeIntrin(
I, AMDGPU::WQM);
1040 case Intrinsic::amdgcn_softwqm:
1041 return constrainCopyLikeIntrin(
I, AMDGPU::SOFT_WQM);
1042 case Intrinsic::amdgcn_strict_wwm:
1043 case Intrinsic::amdgcn_wwm:
1044 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WWM);
1045 case Intrinsic::amdgcn_strict_wqm:
1046 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WQM);
1047 case Intrinsic::amdgcn_writelane:
1048 return selectWritelane(
I);
1049 case Intrinsic::amdgcn_div_scale:
1050 return selectDivScale(
I);
1051 case Intrinsic::amdgcn_icmp:
1052 case Intrinsic::amdgcn_fcmp:
1055 return selectIntrinsicCmp(
I);
1056 case Intrinsic::amdgcn_ballot:
1057 return selectBallot(
I);
1058 case Intrinsic::amdgcn_reloc_constant:
1059 return selectRelocConstant(
I);
1060 case Intrinsic::amdgcn_groupstaticsize:
1061 return selectGroupStaticSize(
I);
1062 case Intrinsic::returnaddress:
1063 return selectReturnAddress(
I);
1064 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
1065 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
1066 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
1067 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
1068 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
1069 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
1070 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
1071 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
1072 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
1073 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
1074 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
1075 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
1076 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
1077 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
1078 return selectSMFMACIntrin(
I);
1089 if (
Size == 16 && !ST.has16BitInsts())
1092 const auto Select = [&](
unsigned S16Opc,
unsigned TrueS16Opc,
unsigned S32Opc,
1095 return ST.hasTrue16BitInsts() ? TrueS16Opc : S16Opc;
1105 return Select(AMDGPU::V_CMP_NE_U16_e64, AMDGPU::V_CMP_NE_U16_t16_e64,
1106 AMDGPU::V_CMP_NE_U32_e64, AMDGPU::V_CMP_NE_U64_e64);
1108 return Select(AMDGPU::V_CMP_EQ_U16_e64, AMDGPU::V_CMP_EQ_U16_t16_e64,
1109 AMDGPU::V_CMP_EQ_U32_e64, AMDGPU::V_CMP_EQ_U64_e64);
1111 return Select(AMDGPU::V_CMP_GT_I16_e64, AMDGPU::V_CMP_GT_I16_t16_e64,
1112 AMDGPU::V_CMP_GT_I32_e64, AMDGPU::V_CMP_GT_I64_e64);
1114 return Select(AMDGPU::V_CMP_GE_I16_e64, AMDGPU::V_CMP_GE_I16_t16_e64,
1115 AMDGPU::V_CMP_GE_I32_e64, AMDGPU::V_CMP_GE_I64_e64);
1117 return Select(AMDGPU::V_CMP_LT_I16_e64, AMDGPU::V_CMP_LT_I16_t16_e64,
1118 AMDGPU::V_CMP_LT_I32_e64, AMDGPU::V_CMP_LT_I64_e64);
1120 return Select(AMDGPU::V_CMP_LE_I16_e64, AMDGPU::V_CMP_LE_I16_t16_e64,
1121 AMDGPU::V_CMP_LE_I32_e64, AMDGPU::V_CMP_LE_I64_e64);
1123 return Select(AMDGPU::V_CMP_GT_U16_e64, AMDGPU::V_CMP_GT_U16_t16_e64,
1124 AMDGPU::V_CMP_GT_U32_e64, AMDGPU::V_CMP_GT_U64_e64);
1126 return Select(AMDGPU::V_CMP_GE_U16_e64, AMDGPU::V_CMP_GE_U16_t16_e64,
1127 AMDGPU::V_CMP_GE_U32_e64, AMDGPU::V_CMP_GE_U64_e64);
1129 return Select(AMDGPU::V_CMP_LT_U16_e64, AMDGPU::V_CMP_LT_U16_t16_e64,
1130 AMDGPU::V_CMP_LT_U32_e64, AMDGPU::V_CMP_LT_U64_e64);
1132 return Select(AMDGPU::V_CMP_LE_U16_e64, AMDGPU::V_CMP_LE_U16_t16_e64,
1133 AMDGPU::V_CMP_LE_U32_e64, AMDGPU::V_CMP_LE_U64_e64);
1136 return Select(AMDGPU::V_CMP_EQ_F16_e64, AMDGPU::V_CMP_EQ_F16_t16_e64,
1137 AMDGPU::V_CMP_EQ_F32_e64, AMDGPU::V_CMP_EQ_F64_e64);
1139 return Select(AMDGPU::V_CMP_GT_F16_e64, AMDGPU::V_CMP_GT_F16_t16_e64,
1140 AMDGPU::V_CMP_GT_F32_e64, AMDGPU::V_CMP_GT_F64_e64);
1142 return Select(AMDGPU::V_CMP_GE_F16_e64, AMDGPU::V_CMP_GE_F16_t16_e64,
1143 AMDGPU::V_CMP_GE_F32_e64, AMDGPU::V_CMP_GE_F64_e64);
1145 return Select(AMDGPU::V_CMP_LT_F16_e64, AMDGPU::V_CMP_LT_F16_t16_e64,
1146 AMDGPU::V_CMP_LT_F32_e64, AMDGPU::V_CMP_LT_F64_e64);
1148 return Select(AMDGPU::V_CMP_LE_F16_e64, AMDGPU::V_CMP_LE_F16_t16_e64,
1149 AMDGPU::V_CMP_LE_F32_e64, AMDGPU::V_CMP_LE_F64_e64);
1151 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1152 AMDGPU::V_CMP_NEQ_F32_e64, AMDGPU::V_CMP_NEQ_F64_e64);
1154 return Select(AMDGPU::V_CMP_O_F16_e64, AMDGPU::V_CMP_O_F16_t16_e64,
1155 AMDGPU::V_CMP_O_F32_e64, AMDGPU::V_CMP_O_F64_e64);
1157 return Select(AMDGPU::V_CMP_U_F16_e64, AMDGPU::V_CMP_U_F16_t16_e64,
1158 AMDGPU::V_CMP_U_F32_e64, AMDGPU::V_CMP_U_F64_e64);
1160 return Select(AMDGPU::V_CMP_NLG_F16_e64, AMDGPU::V_CMP_NLG_F16_t16_e64,
1161 AMDGPU::V_CMP_NLG_F32_e64, AMDGPU::V_CMP_NLG_F64_e64);
1163 return Select(AMDGPU::V_CMP_NLE_F16_e64, AMDGPU::V_CMP_NLE_F16_t16_e64,
1164 AMDGPU::V_CMP_NLE_F32_e64, AMDGPU::V_CMP_NLE_F64_e64);
1166 return Select(AMDGPU::V_CMP_NLT_F16_e64, AMDGPU::V_CMP_NLT_F16_t16_e64,
1167 AMDGPU::V_CMP_NLT_F32_e64, AMDGPU::V_CMP_NLT_F64_e64);
1169 return Select(AMDGPU::V_CMP_NGE_F16_e64, AMDGPU::V_CMP_NGE_F16_t16_e64,
1170 AMDGPU::V_CMP_NGE_F32_e64, AMDGPU::V_CMP_NGE_F64_e64);
1172 return Select(AMDGPU::V_CMP_NGT_F16_e64, AMDGPU::V_CMP_NGT_F16_t16_e64,
1173 AMDGPU::V_CMP_NGT_F32_e64, AMDGPU::V_CMP_NGT_F64_e64);
1175 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1176 AMDGPU::V_CMP_NEQ_F32_e64, AMDGPU::V_CMP_NEQ_F64_e64);
1178 return Select(AMDGPU::V_CMP_TRU_F16_e64, AMDGPU::V_CMP_TRU_F16_t16_e64,
1179 AMDGPU::V_CMP_TRU_F32_e64, AMDGPU::V_CMP_TRU_F64_e64);
1181 return Select(AMDGPU::V_CMP_F_F16_e64, AMDGPU::V_CMP_F_F16_t16_e64,
1182 AMDGPU::V_CMP_F_F32_e64, AMDGPU::V_CMP_F_F64_e64);
1187 unsigned Size)
const {
1194 return AMDGPU::S_CMP_LG_U64;
1196 return AMDGPU::S_CMP_EQ_U64;
1205 return AMDGPU::S_CMP_LG_U32;
1207 return AMDGPU::S_CMP_EQ_U32;
1209 return AMDGPU::S_CMP_GT_I32;
1211 return AMDGPU::S_CMP_GE_I32;
1213 return AMDGPU::S_CMP_LT_I32;
1215 return AMDGPU::S_CMP_LE_I32;
1217 return AMDGPU::S_CMP_GT_U32;
1219 return AMDGPU::S_CMP_GE_U32;
1221 return AMDGPU::S_CMP_LT_U32;
1223 return AMDGPU::S_CMP_LE_U32;
1225 return AMDGPU::S_CMP_EQ_F32;
1227 return AMDGPU::S_CMP_GT_F32;
1229 return AMDGPU::S_CMP_GE_F32;
1231 return AMDGPU::S_CMP_LT_F32;
1233 return AMDGPU::S_CMP_LE_F32;
1235 return AMDGPU::S_CMP_LG_F32;
1237 return AMDGPU::S_CMP_O_F32;
1239 return AMDGPU::S_CMP_U_F32;
1241 return AMDGPU::S_CMP_NLG_F32;
1243 return AMDGPU::S_CMP_NLE_F32;
1245 return AMDGPU::S_CMP_NLT_F32;
1247 return AMDGPU::S_CMP_NGE_F32;
1249 return AMDGPU::S_CMP_NGT_F32;
1251 return AMDGPU::S_CMP_NEQ_F32;
1263 return AMDGPU::S_CMP_EQ_F16;
1265 return AMDGPU::S_CMP_GT_F16;
1267 return AMDGPU::S_CMP_GE_F16;
1269 return AMDGPU::S_CMP_LT_F16;
1271 return AMDGPU::S_CMP_LE_F16;
1273 return AMDGPU::S_CMP_LG_F16;
1275 return AMDGPU::S_CMP_O_F16;
1277 return AMDGPU::S_CMP_U_F16;
1279 return AMDGPU::S_CMP_NLG_F16;
1281 return AMDGPU::S_CMP_NLE_F16;
1283 return AMDGPU::S_CMP_NLT_F16;
1285 return AMDGPU::S_CMP_NGE_F16;
1287 return AMDGPU::S_CMP_NGT_F16;
1289 return AMDGPU::S_CMP_NEQ_F16;
1298bool AMDGPUInstructionSelector::selectG_ICMP_or_FCMP(
MachineInstr &
I)
const {
1303 Register SrcReg =
I.getOperand(2).getReg();
1308 Register CCReg =
I.getOperand(0).getReg();
1309 if (!isVCC(CCReg, *MRI)) {
1310 int Opcode = getS_CMPOpcode(Pred,
Size);
1314 .
add(
I.getOperand(2))
1315 .
add(
I.getOperand(3));
1316 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CCReg)
1321 I.eraseFromParent();
1325 if (
I.getOpcode() == AMDGPU::G_FCMP)
1333 I.getOperand(0).getReg())
1334 .
add(
I.getOperand(2))
1335 .
add(
I.getOperand(3));
1339 I.eraseFromParent();
1343bool AMDGPUInstructionSelector::selectIntrinsicCmp(
MachineInstr &
I)
const {
1344 Register Dst =
I.getOperand(0).getReg();
1345 if (isVCC(Dst, *MRI))
1348 LLT DstTy =
MRI->getType(Dst);
1354 Register SrcReg =
I.getOperand(2).getReg();
1363 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), Dst);
1364 I.eraseFromParent();
1375 auto [Src0, Src0Mods] = selectVOP3ModsImpl(LHS);
1376 auto [Src1, Src1Mods] = selectVOP3ModsImpl(RHS);
1378 copyToVGPRIfSrcFolded(Src0, Src0Mods, LHS, &
I,
true);
1380 copyToVGPRIfSrcFolded(Src1, Src1Mods, RHS, &
I,
true);
1381 SelectedMI =
BuildMI(*BB, &
I,
DL, TII.get(Opcode), Dst);
1383 SelectedMI.
addImm(Src0Mods);
1384 SelectedMI.
addReg(Src0Reg);
1386 SelectedMI.
addImm(Src1Mods);
1387 SelectedMI.
addReg(Src1Reg);
1397 I.eraseFromParent();
1401bool AMDGPUInstructionSelector::selectBallot(
MachineInstr &
I)
const {
1404 Register DstReg =
I.getOperand(0).getReg();
1405 const unsigned Size =
MRI->getType(DstReg).getSizeInBits();
1406 const bool Is64 =
Size == 64;
1414 std::optional<ValueAndVReg> Arg =
1417 const auto BuildCopy = [&](
Register SrcReg) {
1419 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), DstReg)
1425 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1427 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
1435 const int64_t
Value = Arg->
Value.getSExtValue();
1437 unsigned Opcode = Is64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
1439 }
else if (
Value == -1)
1440 BuildCopy(IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC);
1444 BuildCopy(
I.getOperand(2).getReg());
1446 I.eraseFromParent();
1450bool AMDGPUInstructionSelector::selectRelocConstant(
MachineInstr &
I)
const {
1451 Register DstReg =
I.getOperand(0).getReg();
1457 const bool IsVALU = DstBank->
getID() == AMDGPU::VGPRRegBankID;
1462 auto RelocSymbol = cast<GlobalVariable>(
1467 TII.get(IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32), DstReg)
1470 I.eraseFromParent();
1474bool AMDGPUInstructionSelector::selectGroupStaticSize(
MachineInstr &
I)
const {
1477 Register DstReg =
I.getOperand(0).getReg();
1479 unsigned Mov = DstRB->
getID() == AMDGPU::SGPRRegBankID ?
1480 AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1497 I.eraseFromParent();
1501bool AMDGPUInstructionSelector::selectReturnAddress(
MachineInstr &
I)
const {
1508 unsigned Depth =
I.getOperand(2).getImm();
1521 I.eraseFromParent();
1532 AMDGPU::SReg_64RegClass,
DL);
1535 I.eraseFromParent();
1539bool AMDGPUInstructionSelector::selectEndCfIntrinsic(
MachineInstr &
MI)
const {
1543 BuildMI(*BB, &
MI,
MI.getDebugLoc(), TII.get(AMDGPU::SI_END_CF))
1544 .
add(
MI.getOperand(1));
1547 MI.eraseFromParent();
1549 if (!
MRI->getRegClassOrNull(Reg))
1554bool AMDGPUInstructionSelector::selectDSOrderedIntrinsic(
1560 unsigned IndexOperand =
MI.getOperand(7).getImm();
1561 bool WaveRelease =
MI.getOperand(8).getImm() != 0;
1562 bool WaveDone =
MI.getOperand(9).getImm() != 0;
1564 if (WaveDone && !WaveRelease)
1567 unsigned OrderedCountIndex = IndexOperand & 0x3f;
1568 IndexOperand &= ~0x3f;
1569 unsigned CountDw = 0;
1572 CountDw = (IndexOperand >> 24) & 0xf;
1573 IndexOperand &= ~(0xf << 24);
1575 if (CountDw < 1 || CountDw > 4) {
1577 "ds_ordered_count: dword count must be between 1 and 4");
1584 unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1;
1587 unsigned Offset0 = OrderedCountIndex << 2;
1588 unsigned Offset1 = WaveRelease | (WaveDone << 1) | (
Instruction << 4);
1591 Offset1 |= (CountDw - 1) << 6;
1594 Offset1 |= ShaderType << 2;
1596 unsigned Offset = Offset0 | (Offset1 << 8);
1605 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::DS_ORDERED_COUNT), DstReg)
1614 MI.eraseFromParent();
1620 case Intrinsic::amdgcn_ds_gws_init:
1621 return AMDGPU::DS_GWS_INIT;
1622 case Intrinsic::amdgcn_ds_gws_barrier:
1623 return AMDGPU::DS_GWS_BARRIER;
1624 case Intrinsic::amdgcn_ds_gws_sema_v:
1625 return AMDGPU::DS_GWS_SEMA_V;
1626 case Intrinsic::amdgcn_ds_gws_sema_br:
1627 return AMDGPU::DS_GWS_SEMA_BR;
1628 case Intrinsic::amdgcn_ds_gws_sema_p:
1629 return AMDGPU::DS_GWS_SEMA_P;
1630 case Intrinsic::amdgcn_ds_gws_sema_release_all:
1631 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
1637bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(
MachineInstr &
MI,
1639 if (!STI.
hasGWS() || (IID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
1644 const bool HasVSrc =
MI.getNumOperands() == 3;
1645 assert(HasVSrc ||
MI.getNumOperands() == 2);
1647 Register BaseOffset =
MI.getOperand(HasVSrc ? 2 : 1).getReg();
1649 if (OffsetRB->
getID() != AMDGPU::SGPRRegBankID)
1663 if (OffsetDef->
getOpcode() == AMDGPU::V_READFIRSTLANE_B32) {
1664 Readfirstlane = OffsetDef;
1669 if (OffsetDef->
getOpcode() == AMDGPU::G_CONSTANT) {
1679 std::tie(BaseOffset, ImmOffset) =
1682 if (Readfirstlane) {
1692 AMDGPU::SReg_32RegClass, *MRI))
1696 Register M0Base =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1724 MI.eraseFromParent();
1728bool AMDGPUInstructionSelector::selectDSAppendConsume(
MachineInstr &
MI,
1729 bool IsAppend)
const {
1730 Register PtrBase =
MI.getOperand(2).getReg();
1731 LLT PtrTy =
MRI->getType(PtrBase);
1735 std::tie(PtrBase,
Offset) = selectDS1Addr1OffsetImpl(
MI.getOperand(2));
1738 if (!isDSOffsetLegal(PtrBase,
Offset)) {
1739 PtrBase =
MI.getOperand(2).getReg();
1745 const unsigned Opc = IsAppend ? AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
1756 MI.eraseFromParent();
1760bool AMDGPUInstructionSelector::selectSBarrier(
MachineInstr &
MI)
const {
1767 MI.eraseFromParent();
1780 MI.eraseFromParent();
1792 TFE = (TexFailCtrl & 0x1) ?
true :
false;
1794 LWE = (TexFailCtrl & 0x2) ?
true :
false;
1797 return TexFailCtrl == 0;
1800bool AMDGPUInstructionSelector::selectImageIntrinsic(
1809 unsigned IntrOpcode =
Intr->BaseOpcode;
1814 const unsigned ArgOffset =
MI.getNumExplicitDefs() + 1;
1818 int NumVDataDwords = -1;
1819 bool IsD16 =
MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16 ||
1820 MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16;
1826 Unorm =
MI.getOperand(ArgOffset +
Intr->UnormIndex).getImm() != 0;
1830 bool IsTexFail =
false;
1832 TFE, LWE, IsTexFail))
1835 const int Flags =
MI.getOperand(ArgOffset +
Intr->NumArgs).getImm();
1836 const bool IsA16 = (
Flags & 1) != 0;
1837 const bool IsG16 = (
Flags & 2) != 0;
1840 if (IsA16 && !STI.
hasG16() && !IsG16)
1844 unsigned DMaskLanes = 0;
1846 if (BaseOpcode->
Atomic) {
1847 VDataOut =
MI.getOperand(0).getReg();
1848 VDataIn =
MI.getOperand(2).getReg();
1849 LLT Ty =
MRI->getType(VDataIn);
1852 const bool Is64Bit = BaseOpcode->
AtomicX2 ?
1857 assert(
MI.getOperand(3).getReg() == AMDGPU::NoRegister);
1859 DMask = Is64Bit ? 0xf : 0x3;
1860 NumVDataDwords = Is64Bit ? 4 : 2;
1862 DMask = Is64Bit ? 0x3 : 0x1;
1863 NumVDataDwords = Is64Bit ? 2 : 1;
1866 DMask =
MI.getOperand(ArgOffset +
Intr->DMaskIndex).getImm();
1869 if (BaseOpcode->
Store) {
1870 VDataIn =
MI.getOperand(1).getReg();
1871 VDataTy =
MRI->getType(VDataIn);
1874 VDataOut =
MI.getOperand(0).getReg();
1875 VDataTy =
MRI->getType(VDataOut);
1876 NumVDataDwords = DMaskLanes;
1879 NumVDataDwords = (DMaskLanes + 1) / 2;
1884 if (Subtarget->
hasG16() && IsG16) {
1888 IntrOpcode = G16MappingInfo->
G16;
1892 assert((!IsTexFail || DMaskLanes >= 1) &&
"should have legalized this");
1894 unsigned CPol =
MI.getOperand(ArgOffset +
Intr->CachePolicyIndex).getImm();
1901 int NumVAddrRegs = 0;
1902 int NumVAddrDwords = 0;
1903 for (
unsigned I =
Intr->VAddrStart; I < Intr->VAddrEnd;
I++) {
1906 if (!AddrOp.
isReg())
1914 NumVAddrDwords += (
MRI->getType(
Addr).getSizeInBits() + 31) / 32;
1921 NumVAddrRegs != 1 &&
1923 : NumVAddrDwords == NumVAddrRegs);
1924 if (UseNSA && !STI.hasFeature(AMDGPU::FeatureNSAEncoding)) {
1935 NumVDataDwords, NumVAddrDwords);
1936 }
else if (IsGFX11Plus) {
1938 UseNSA ? AMDGPU::MIMGEncGfx11NSA
1939 : AMDGPU::MIMGEncGfx11Default,
1940 NumVDataDwords, NumVAddrDwords);
1941 }
else if (IsGFX10Plus) {
1943 UseNSA ? AMDGPU::MIMGEncGfx10NSA
1944 : AMDGPU::MIMGEncGfx10Default,
1945 NumVDataDwords, NumVAddrDwords);
1949 NumVDataDwords, NumVAddrDwords);
1953 <<
"requested image instruction is not supported on this GPU\n");
1960 NumVDataDwords, NumVAddrDwords);
1963 NumVDataDwords, NumVAddrDwords);
1973 const bool Is64 =
MRI->getType(VDataOut).getSizeInBits() == 64;
1976 Is64 ? &AMDGPU::VReg_128RegClass : &AMDGPU::VReg_64RegClass);
1977 unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
1980 if (!
MRI->use_empty(VDataOut)) {
1993 for (
int I = 0;
I != NumVAddrRegs; ++
I) {
1995 if (
SrcOp.isReg()) {
2001 MIB.
addReg(
MI.getOperand(ArgOffset +
Intr->RsrcIndex).getReg());
2003 MIB.
addReg(
MI.getOperand(ArgOffset +
Intr->SampIndex).getReg());
2014 STI.hasFeature(AMDGPU::FeatureR128A16) ? -1 : 0);
2016 MIB.
addImm(IsA16 ? -1 : 0);
2030 MIB.
addImm(IsD16 ? -1 : 0);
2032 MI.eraseFromParent();
2040bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic(
2051 unsigned Offset =
MI.getOperand(6).getImm();
2053 auto MIB =
BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::DS_BVH_STACK_RTN_B32), Dst0)
2061 MI.eraseFromParent();
2065bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
2067 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(
I).getIntrinsicID();
2068 switch (IntrinsicID) {
2069 case Intrinsic::amdgcn_end_cf:
2070 return selectEndCfIntrinsic(
I);
2071 case Intrinsic::amdgcn_ds_ordered_add:
2072 case Intrinsic::amdgcn_ds_ordered_swap:
2073 return selectDSOrderedIntrinsic(
I, IntrinsicID);
2074 case Intrinsic::amdgcn_ds_gws_init:
2075 case Intrinsic::amdgcn_ds_gws_barrier:
2076 case Intrinsic::amdgcn_ds_gws_sema_v:
2077 case Intrinsic::amdgcn_ds_gws_sema_br:
2078 case Intrinsic::amdgcn_ds_gws_sema_p:
2079 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2080 return selectDSGWSIntrinsic(
I, IntrinsicID);
2081 case Intrinsic::amdgcn_ds_append:
2082 return selectDSAppendConsume(
I,
true);
2083 case Intrinsic::amdgcn_ds_consume:
2084 return selectDSAppendConsume(
I,
false);
2085 case Intrinsic::amdgcn_s_barrier:
2086 return selectSBarrier(
I);
2087 case Intrinsic::amdgcn_raw_buffer_load_lds:
2088 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
2089 case Intrinsic::amdgcn_struct_buffer_load_lds:
2090 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds:
2091 return selectBufferLoadLds(
I);
2092 case Intrinsic::amdgcn_global_load_lds:
2093 return selectGlobalLoadLds(
I);
2094 case Intrinsic::amdgcn_exp_compr:
2098 F,
"intrinsic not supported on subtarget",
I.getDebugLoc(),
DS_Error);
2099 F.getContext().diagnose(NoFpRet);
2103 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2104 return selectDSBvhStackIntrinsic(
I);
2105 case Intrinsic::amdgcn_s_barrier_init:
2106 case Intrinsic::amdgcn_s_barrier_join:
2107 case Intrinsic::amdgcn_s_wakeup_barrier:
2108 case Intrinsic::amdgcn_s_get_barrier_state:
2109 return selectNamedBarrierInst(
I, IntrinsicID);
2110 case Intrinsic::amdgcn_s_barrier_signal_isfirst:
2111 case Intrinsic::amdgcn_s_barrier_signal_isfirst_var:
2112 return selectSBarrierSignalIsfirst(
I, IntrinsicID);
2113 case Intrinsic::amdgcn_s_barrier_leave:
2114 return selectSBarrierLeave(
I);
2119bool AMDGPUInstructionSelector::selectG_SELECT(
MachineInstr &
I)
const {
2126 Register DstReg =
I.getOperand(0).getReg();
2131 if (!isVCC(CCReg, *MRI)) {
2132 unsigned SelectOpcode =
Size == 64 ? AMDGPU::S_CSELECT_B64 :
2133 AMDGPU::S_CSELECT_B32;
2140 if (!
MRI->getRegClassOrNull(CCReg))
2143 .
add(
I.getOperand(2))
2144 .
add(
I.getOperand(3));
2149 I.eraseFromParent();
2158 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
2160 .
add(
I.getOperand(3))
2162 .
add(
I.getOperand(2))
2163 .
add(
I.getOperand(1));
2166 I.eraseFromParent();
2173 return AMDGPU::sub0;
2175 return AMDGPU::sub0_sub1;
2177 return AMDGPU::sub0_sub1_sub2;
2179 return AMDGPU::sub0_sub1_sub2_sub3;
2181 return AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7;
2184 return AMDGPU::sub0;
2191bool AMDGPUInstructionSelector::selectG_TRUNC(
MachineInstr &
I)
const {
2192 Register DstReg =
I.getOperand(0).getReg();
2193 Register SrcReg =
I.getOperand(1).getReg();
2194 const LLT DstTy =
MRI->getType(DstReg);
2195 const LLT SrcTy =
MRI->getType(SrcReg);
2210 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2219 if (!SrcRC || !DstRC)
2232 Register LoReg =
MRI->createVirtualRegister(DstRC);
2233 Register HiReg =
MRI->createVirtualRegister(DstRC);
2235 .
addReg(SrcReg, 0, AMDGPU::sub0);
2237 .
addReg(SrcReg, 0, AMDGPU::sub1);
2239 if (IsVALU && STI.
hasSDWA()) {
2243 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg)
2253 Register TmpReg0 =
MRI->createVirtualRegister(DstRC);
2254 Register TmpReg1 =
MRI->createVirtualRegister(DstRC);
2255 Register ImmReg =
MRI->createVirtualRegister(DstRC);
2257 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), TmpReg0)
2267 unsigned MovOpc = IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
2268 unsigned AndOpc = IsVALU ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
2269 unsigned OrOpc = IsVALU ? AMDGPU::V_OR_B32_e64 : AMDGPU::S_OR_B32;
2281 And.setOperandDead(3);
2282 Or.setOperandDead(3);
2286 I.eraseFromParent();
2295 if (SubRegIdx == -1)
2301 = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
2305 if (SrcWithSubRC != SrcRC) {
2310 I.getOperand(1).setSubReg(SubRegIdx);
2313 I.setDesc(TII.get(TargetOpcode::COPY));
2319 Mask = maskTrailingOnes<unsigned>(
Size);
2320 int SignedMask =
static_cast<int>(Mask);
2321 return SignedMask >= -16 && SignedMask <= 64;
2325const RegisterBank *AMDGPUInstructionSelector::getArtifactRegBank(
2338bool AMDGPUInstructionSelector::selectG_SZA_EXT(
MachineInstr &
I)
const {
2339 bool InReg =
I.getOpcode() == AMDGPU::G_SEXT_INREG;
2340 bool Signed =
I.getOpcode() == AMDGPU::G_SEXT || InReg;
2343 const Register DstReg =
I.getOperand(0).getReg();
2344 const Register SrcReg =
I.getOperand(1).getReg();
2346 const LLT DstTy =
MRI->getType(DstReg);
2347 const LLT SrcTy =
MRI->getType(SrcReg);
2348 const unsigned SrcSize =
I.getOpcode() == AMDGPU::G_SEXT_INREG ?
2355 const RegisterBank *SrcBank = getArtifactRegBank(SrcReg, *MRI, TRI);
2358 if (
I.getOpcode() == AMDGPU::G_ANYEXT) {
2360 return selectCOPY(
I);
2363 TRI.getRegClassForTypeOnBank(SrcTy, *SrcBank);
2366 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
2368 Register UndefReg =
MRI->createVirtualRegister(SrcRC);
2369 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2375 I.eraseFromParent();
2381 if (SrcBank->
getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
2391 I.eraseFromParent();
2395 const unsigned BFE =
Signed ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2401 I.eraseFromParent();
2405 if (SrcBank->
getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
2407 AMDGPU::SReg_64RegClass : AMDGPU::SReg_32RegClass;
2411 if (
Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
2412 const unsigned SextOpc = SrcSize == 8 ?
2413 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
2416 I.eraseFromParent();
2422 if (DstSize > 32 && SrcSize == 32) {
2423 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2424 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2439 I.eraseFromParent();
2444 const unsigned BFE64 =
Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
2445 const unsigned BFE32 =
Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2448 if (DstSize > 32 && (SrcSize <= 32 || InReg)) {
2450 Register ExtReg =
MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
2451 Register UndefReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2452 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2454 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2465 I.eraseFromParent();
2481 I.eraseFromParent();
2499bool AMDGPUInstructionSelector::selectG_FPEXT(
MachineInstr &
I)
const {
2503 Register Dst =
I.getOperand(0).getReg();
2505 if (DstRB->
getID() != AMDGPU::SGPRRegBankID)
2508 Register Src =
I.getOperand(1).getReg();
2514 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_CVT_HI_F32_F16), Dst)
2516 I.eraseFromParent();
2524bool AMDGPUInstructionSelector::selectG_CONSTANT(
MachineInstr &
I)
const {
2527 Register DstReg =
I.getOperand(0).getReg();
2528 unsigned Size =
MRI->getType(DstReg).getSizeInBits();
2536 }
else if (ImmOp.
isCImm()) {
2543 const bool IsSgpr = DstRB->
getID() == AMDGPU::SGPRRegBankID;
2546 if (DstRB->
getID() == AMDGPU::VCCRegBankID) {
2547 Opcode = STI.
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
2548 }
else if (
Size == 64 &&
2550 Opcode = IsSgpr ? AMDGPU::S_MOV_B64_IMM_PSEUDO : AMDGPU::V_MOV_B64_PSEUDO;
2551 I.setDesc(TII.get(Opcode));
2552 I.addImplicitDefUseOperands(*
MF);
2555 Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
2565 I.setDesc(TII.get(Opcode));
2566 I.addImplicitDefUseOperands(*
MF);
2576 ResInst =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_MOV_B64), DstReg)
2577 .
addImm(
I.getOperand(1).getImm());
2580 &AMDGPU::SReg_32RegClass : &AMDGPU::VGPR_32RegClass;
2590 ResInst =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
2599 I.eraseFromParent();
2601 TRI.getConstrainedRegClassForOperand(ResInst->
getOperand(0), *MRI);
2607bool AMDGPUInstructionSelector::selectG_FNEG(
MachineInstr &
MI)
const {
2621 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2636 Register LoReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2637 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2638 Register ConstReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2639 Register OpReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2641 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2642 .
addReg(Src, 0, AMDGPU::sub0);
2643 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2644 .
addReg(Src, 0, AMDGPU::sub1);
2645 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2649 unsigned Opc = Fabs ? AMDGPU::S_OR_B32 : AMDGPU::S_XOR_B32;
2654 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2659 MI.eraseFromParent();
2664bool AMDGPUInstructionSelector::selectG_FABS(
MachineInstr &
MI)
const {
2667 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2674 Register LoReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2675 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2676 Register ConstReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2677 Register OpReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2683 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2684 .
addReg(Src, 0, AMDGPU::sub0);
2685 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2686 .
addReg(Src, 0, AMDGPU::sub1);
2687 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2692 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_AND_B32), OpReg)
2696 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2702 MI.eraseFromParent();
2707 return MI.getOpcode() == TargetOpcode::G_CONSTANT;
2710void AMDGPUInstructionSelector::getAddrModeInfo(
const MachineInstr &Load,
2713 unsigned OpNo =
Load.getOpcode() == AMDGPU::G_PREFETCH ? 0 : 1;
2715 MRI.getUniqueVRegDef(
Load.getOperand(OpNo).getReg());
2719 if (PtrMI->
getOpcode() != TargetOpcode::G_PTR_ADD)
2724 for (
unsigned i = 1; i != 3; ++i) {
2731 assert(GEPInfo.Imm == 0);
2736 if (OpBank->
getID() == AMDGPU::SGPRRegBankID)
2737 GEPInfo.SgprParts.push_back(GEPOp.
getReg());
2739 GEPInfo.VgprParts.push_back(GEPOp.
getReg());
2743 getAddrModeInfo(*PtrMI, MRI, AddrInfo);
2746bool AMDGPUInstructionSelector::isSGPR(
Register Reg)
const {
2747 return RBI.
getRegBank(Reg, *MRI, TRI)->
getID() == AMDGPU::SGPRRegBankID;
2750bool AMDGPUInstructionSelector::isInstrUniform(
const MachineInstr &
MI)
const {
2751 if (!
MI.hasOneMemOperand())
2761 if (!
Ptr || isa<UndefValue>(
Ptr) || isa<Argument>(
Ptr) ||
2762 isa<Constant>(
Ptr) || isa<GlobalValue>(
Ptr))
2768 if (
MI.getOpcode() == AMDGPU::G_PREFETCH)
2770 AMDGPU::SGPRRegBankID;
2773 return I &&
I->getMetadata(
"amdgpu.uniform");
2777 for (
const GEPInfo &GEPInfo : AddrInfo) {
2778 if (!GEPInfo.VgprParts.empty())
2784void AMDGPUInstructionSelector::initM0(
MachineInstr &
I)
const {
2785 const LLT PtrTy =
MRI->getType(
I.getOperand(1).getReg());
2792 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2797bool AMDGPUInstructionSelector::selectG_LOAD_STORE_ATOMICRMW(
2804 if (Reg.isPhysical())
2808 const unsigned Opcode =
MI.getOpcode();
2810 if (Opcode == AMDGPU::COPY)
2813 if (Opcode == AMDGPU::G_AND || Opcode == AMDGPU::G_OR ||
2814 Opcode == AMDGPU::G_XOR)
2818 if (
auto *GI = dyn_cast<GIntrinsic>(&
MI))
2819 return GI->is(Intrinsic::amdgcn_class);
2821 return Opcode == AMDGPU::G_ICMP || Opcode == AMDGPU::G_FCMP;
2824bool AMDGPUInstructionSelector::selectG_BRCOND(
MachineInstr &
I)
const {
2839 if (!isVCC(CondReg, *MRI)) {
2843 CondPhysReg = AMDGPU::SCC;
2844 BrOpcode = AMDGPU::S_CBRANCH_SCC1;
2845 ConstrainRC = &AMDGPU::SReg_32RegClass;
2853 const unsigned Opcode = Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
2854 const Register Exec = Is64 ? AMDGPU::EXEC : AMDGPU::EXEC_LO;
2857 BuildMI(*BB, &
I,
DL, TII.get(Opcode), TmpReg)
2864 CondPhysReg =
TRI.getVCC();
2865 BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
2866 ConstrainRC =
TRI.getBoolRC();
2869 if (!
MRI->getRegClassOrNull(CondReg))
2870 MRI->setRegClass(CondReg, ConstrainRC);
2872 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CondPhysReg)
2875 .
addMBB(
I.getOperand(1).getMBB());
2877 I.eraseFromParent();
2881bool AMDGPUInstructionSelector::selectG_GLOBAL_VALUE(
2883 Register DstReg =
I.getOperand(0).getReg();
2885 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2886 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32));
2891 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, *MRI);
2894bool AMDGPUInstructionSelector::selectG_PTRMASK(
MachineInstr &
I)
const {
2895 Register DstReg =
I.getOperand(0).getReg();
2896 Register SrcReg =
I.getOperand(1).getReg();
2897 Register MaskReg =
I.getOperand(2).getReg();
2898 LLT Ty =
MRI->getType(DstReg);
2899 LLT MaskTy =
MRI->getType(MaskReg);
2906 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2916 const bool CanCopyLow32 = (MaskOnes & MaskLo32) == MaskLo32;
2917 const bool CanCopyHi32 = (MaskOnes & MaskHi32) == MaskHi32;
2920 !CanCopyLow32 && !CanCopyHi32) {
2921 auto MIB =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_AND_B64), DstReg)
2925 I.eraseFromParent();
2929 unsigned NewOpc = IsVGPR ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
2931 = IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
2936 TRI.getRegClassForTypeOnBank(MaskTy, *MaskRB);
2945 "ptrmask should have been narrowed during legalize");
2947 auto NewOp =
BuildMI(*BB, &
I,
DL, TII.get(NewOpc), DstReg)
2953 I.eraseFromParent();
2957 Register HiReg =
MRI->createVirtualRegister(&RegRC);
2958 Register LoReg =
MRI->createVirtualRegister(&RegRC);
2961 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), LoReg)
2962 .
addReg(SrcReg, 0, AMDGPU::sub0);
2963 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), HiReg)
2964 .
addReg(SrcReg, 0, AMDGPU::sub1);
2973 Register MaskLo =
MRI->createVirtualRegister(&RegRC);
2974 MaskedLo =
MRI->createVirtualRegister(&RegRC);
2976 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskLo)
2977 .
addReg(MaskReg, 0, AMDGPU::sub0);
2978 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedLo)
2987 Register MaskHi =
MRI->createVirtualRegister(&RegRC);
2988 MaskedHi =
MRI->createVirtualRegister(&RegRC);
2990 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskHi)
2991 .
addReg(MaskReg, 0, AMDGPU::sub1);
2992 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedHi)
2997 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
3002 I.eraseFromParent();
3008static std::pair<Register, unsigned>
3015 std::tie(IdxBaseReg,
Offset) =
3017 if (IdxBaseReg == AMDGPU::NoRegister) {
3021 IdxBaseReg = IdxReg;
3028 if (
static_cast<unsigned>(
Offset) >= SubRegs.
size())
3029 return std::pair(IdxReg, SubRegs[0]);
3030 return std::pair(IdxBaseReg, SubRegs[
Offset]);
3033bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT(
3039 LLT DstTy =
MRI->getType(DstReg);
3040 LLT SrcTy =
MRI->getType(SrcReg);
3048 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3052 TRI.getRegClassForTypeOnBank(SrcTy, *SrcRB);
3054 TRI.getRegClassForTypeOnBank(DstTy, *DstRB);
3055 if (!SrcRC || !DstRC)
3070 if (SrcRB->
getID() == AMDGPU::SGPRRegBankID) {
3074 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3077 unsigned Opc = Is64 ? AMDGPU::S_MOVRELS_B64 : AMDGPU::S_MOVRELS_B32;
3081 MI.eraseFromParent();
3089 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3091 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::V_MOVRELS_B32_e32), DstReg)
3094 MI.eraseFromParent();
3105 MI.eraseFromParent();
3110bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT(
3117 LLT VecTy =
MRI->getType(DstReg);
3118 LLT ValTy =
MRI->getType(ValReg);
3130 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3134 TRI.getRegClassForTypeOnBank(VecTy, *VecRB);
3136 TRI.getRegClassForTypeOnBank(ValTy, *ValRB);
3144 if (VecRB->
getID() == AMDGPU::VGPRRegBankID && ValSize != 32)
3148 std::tie(IdxReg,
SubReg) =
3151 const bool IndexMode = VecRB->
getID() == AMDGPU::VGPRRegBankID &&
3158 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3162 VecSize, ValSize, VecRB->
getID() == AMDGPU::SGPRRegBankID);
3167 MI.eraseFromParent();
3179 MI.eraseFromParent();
3183bool AMDGPUInstructionSelector::selectBufferLoadLds(
MachineInstr &
MI)
const {
3186 unsigned Size =
MI.getOperand(3).getImm();
3189 const bool HasVIndex =
MI.getNumOperands() == 9;
3193 VIndex =
MI.getOperand(4).getReg();
3197 Register VOffset =
MI.getOperand(4 + OpOffset).getReg();
3198 std::optional<ValueAndVReg> MaybeVOffset =
3200 const bool HasVOffset = !MaybeVOffset || MaybeVOffset->Value.getZExtValue();
3206 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_BOTHEN
3207 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_IDXEN
3208 : HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFEN
3209 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFSET;
3212 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_BOTHEN
3213 : AMDGPU::BUFFER_LOAD_USHORT_LDS_IDXEN
3214 : HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFEN
3215 : AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFSET;
3218 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_BOTHEN
3219 : AMDGPU::BUFFER_LOAD_DWORD_LDS_IDXEN
3220 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN
3221 : AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET;
3228 .
add(
MI.getOperand(2));
3232 if (HasVIndex && HasVOffset) {
3233 Register IdxReg =
MRI->createVirtualRegister(
TRI.getVGPR64Class());
3234 BuildMI(*
MBB, &*MIB,
DL, TII.get(AMDGPU::REG_SEQUENCE), IdxReg)
3241 }
else if (HasVIndex) {
3243 }
else if (HasVOffset) {
3247 MIB.
add(
MI.getOperand(1));
3248 MIB.
add(
MI.getOperand(5 + OpOffset));
3249 MIB.
add(
MI.getOperand(6 + OpOffset));
3250 unsigned Aux =
MI.getOperand(7 + OpOffset).getImm();
3256 LoadPtrI.
Offset =
MI.getOperand(6 + OpOffset).getImm();
3258 StorePtrI.
V =
nullptr;
3272 MI.eraseFromParent();
3284 if (Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3287 assert(Def->getNumOperands() == 3 &&
3290 return Def->getOperand(1).getReg();
3296bool AMDGPUInstructionSelector::selectGlobalLoadLds(
MachineInstr &
MI)
const{
3298 unsigned Size =
MI.getOperand(3).getImm();
3304 Opc = AMDGPU::GLOBAL_LOAD_LDS_UBYTE;
3307 Opc = AMDGPU::GLOBAL_LOAD_LDS_USHORT;
3310 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORD;
3317 .
add(
MI.getOperand(2));
3323 if (!isSGPR(
Addr)) {
3325 if (isSGPR(AddrDef->Reg)) {
3326 Addr = AddrDef->Reg;
3327 }
else if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
3330 if (isSGPR(SAddr)) {
3331 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
3343 VOffset =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3355 MIB.
add(
MI.getOperand(4))
3356 .
add(
MI.getOperand(5));
3360 LoadPtrI.
Offset =
MI.getOperand(4).getImm();
3370 sizeof(int32_t),
Align(4));
3374 MI.eraseFromParent();
3378bool AMDGPUInstructionSelector::selectBVHIntrinsic(
MachineInstr &
MI)
const{
3379 MI.setDesc(TII.get(
MI.getOperand(1).getImm()));
3380 MI.removeOperand(1);
3381 MI.addImplicitDefUseOperands(*
MI.getParent()->getParent());
3385bool AMDGPUInstructionSelector::selectSMFMACIntrin(
MachineInstr &
MI)
const {
3388 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
3389 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_F16_e64;
3391 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
3392 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_F16_e64;
3394 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
3395 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_BF16_e64;
3397 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
3398 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_BF16_e64;
3400 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
3401 Opc = AMDGPU::V_SMFMAC_I32_16X16X64_I8_e64;
3403 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
3404 Opc = AMDGPU::V_SMFMAC_I32_32X32X32_I8_e64;
3406 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
3407 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_BF8_e64;
3409 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
3410 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_FP8_e64;
3412 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
3413 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_BF8_e64;
3415 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
3416 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_FP8_e64;
3418 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
3419 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_BF8_e64;
3421 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
3422 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_FP8_e64;
3424 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
3425 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_BF8_e64;
3427 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
3428 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_FP8_e64;
3434 auto VDst_In =
MI.getOperand(4);
3436 MI.setDesc(TII.get(Opc));
3437 MI.removeOperand(4);
3438 MI.removeOperand(1);
3439 MI.addOperand(VDst_In);
3440 MI.addImplicitDefUseOperands(*
MI.getParent()->getParent());
3444bool AMDGPUInstructionSelector::selectWaveAddress(
MachineInstr &
MI)
const {
3448 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3453 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_LSHRREV_B32_e64), DstReg)
3464 IsVALU ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
3468 MI.eraseFromParent();
3472bool AMDGPUInstructionSelector::selectStackRestore(
MachineInstr &
MI)
const {
3485 WaveAddr =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
3495 MI.eraseFromParent();
3501 if (!
I.isPreISelOpcode()) {
3503 return selectCOPY(
I);
3507 switch (
I.getOpcode()) {
3508 case TargetOpcode::G_AND:
3509 case TargetOpcode::G_OR:
3510 case TargetOpcode::G_XOR:
3513 return selectG_AND_OR_XOR(
I);
3514 case TargetOpcode::G_ADD:
3515 case TargetOpcode::G_SUB:
3516 case TargetOpcode::G_PTR_ADD:
3519 return selectG_ADD_SUB(
I);
3520 case TargetOpcode::G_UADDO:
3521 case TargetOpcode::G_USUBO:
3522 case TargetOpcode::G_UADDE:
3523 case TargetOpcode::G_USUBE:
3524 return selectG_UADDO_USUBO_UADDE_USUBE(
I);
3525 case AMDGPU::G_AMDGPU_MAD_U64_U32:
3526 case AMDGPU::G_AMDGPU_MAD_I64_I32:
3527 return selectG_AMDGPU_MAD_64_32(
I);
3528 case TargetOpcode::G_INTTOPTR:
3529 case TargetOpcode::G_BITCAST:
3530 case TargetOpcode::G_PTRTOINT:
3531 case TargetOpcode::G_FREEZE:
3532 return selectCOPY(
I);
3533 case TargetOpcode::G_CONSTANT:
3534 case TargetOpcode::G_FCONSTANT:
3535 return selectG_CONSTANT(
I);
3536 case TargetOpcode::G_FNEG:
3539 return selectG_FNEG(
I);
3540 case TargetOpcode::G_FABS:
3543 return selectG_FABS(
I);
3544 case TargetOpcode::G_EXTRACT:
3545 return selectG_EXTRACT(
I);
3546 case TargetOpcode::G_MERGE_VALUES:
3547 case TargetOpcode::G_CONCAT_VECTORS:
3548 return selectG_MERGE_VALUES(
I);
3549 case TargetOpcode::G_UNMERGE_VALUES:
3550 return selectG_UNMERGE_VALUES(
I);
3551 case TargetOpcode::G_BUILD_VECTOR:
3552 case TargetOpcode::G_BUILD_VECTOR_TRUNC:
3553 return selectG_BUILD_VECTOR(
I);
3554 case TargetOpcode::G_IMPLICIT_DEF:
3555 return selectG_IMPLICIT_DEF(
I);
3556 case TargetOpcode::G_INSERT:
3557 return selectG_INSERT(
I);
3558 case TargetOpcode::G_INTRINSIC:
3559 case TargetOpcode::G_INTRINSIC_CONVERGENT:
3560 return selectG_INTRINSIC(
I);
3561 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3562 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
3563 return selectG_INTRINSIC_W_SIDE_EFFECTS(
I);
3564 case TargetOpcode::G_ICMP:
3565 case TargetOpcode::G_FCMP:
3566 if (selectG_ICMP_or_FCMP(
I))
3569 case TargetOpcode::G_LOAD:
3570 case TargetOpcode::G_STORE:
3571 case TargetOpcode::G_ATOMIC_CMPXCHG:
3572 case TargetOpcode::G_ATOMICRMW_XCHG:
3573 case TargetOpcode::G_ATOMICRMW_ADD:
3574 case TargetOpcode::G_ATOMICRMW_SUB:
3575 case TargetOpcode::G_ATOMICRMW_AND:
3576 case TargetOpcode::G_ATOMICRMW_OR:
3577 case TargetOpcode::G_ATOMICRMW_XOR:
3578 case TargetOpcode::G_ATOMICRMW_MIN:
3579 case TargetOpcode::G_ATOMICRMW_MAX:
3580 case TargetOpcode::G_ATOMICRMW_UMIN:
3581 case TargetOpcode::G_ATOMICRMW_UMAX:
3582 case TargetOpcode::G_ATOMICRMW_UINC_WRAP:
3583 case TargetOpcode::G_ATOMICRMW_UDEC_WRAP:
3584 case TargetOpcode::G_ATOMICRMW_FADD:
3585 case TargetOpcode::G_ATOMICRMW_FMIN:
3586 case TargetOpcode::G_ATOMICRMW_FMAX:
3587 return selectG_LOAD_STORE_ATOMICRMW(
I);
3588 case TargetOpcode::G_SELECT:
3589 return selectG_SELECT(
I);
3590 case TargetOpcode::G_TRUNC:
3591 return selectG_TRUNC(
I);
3592 case TargetOpcode::G_SEXT:
3593 case TargetOpcode::G_ZEXT:
3594 case TargetOpcode::G_ANYEXT:
3595 case TargetOpcode::G_SEXT_INREG:
3602 return selectG_SZA_EXT(
I);
3603 case TargetOpcode::G_FPEXT:
3604 if (selectG_FPEXT(
I))
3607 case TargetOpcode::G_BRCOND:
3608 return selectG_BRCOND(
I);
3609 case TargetOpcode::G_GLOBAL_VALUE:
3610 return selectG_GLOBAL_VALUE(
I);
3611 case TargetOpcode::G_PTRMASK:
3612 return selectG_PTRMASK(
I);
3613 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3614 return selectG_EXTRACT_VECTOR_ELT(
I);
3615 case TargetOpcode::G_INSERT_VECTOR_ELT:
3616 return selectG_INSERT_VECTOR_ELT(
I);
3617 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
3618 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
3619 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
3620 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
3623 assert(
Intr &&
"not an image intrinsic with image pseudo");
3624 return selectImageIntrinsic(
I,
Intr);
3626 case AMDGPU::G_AMDGPU_INTRIN_BVH_INTERSECT_RAY:
3627 return selectBVHIntrinsic(
I);
3628 case AMDGPU::G_SBFX:
3629 case AMDGPU::G_UBFX:
3630 return selectG_SBFX_UBFX(
I);
3631 case AMDGPU::G_SI_CALL:
3632 I.setDesc(TII.get(AMDGPU::SI_CALL));
3634 case AMDGPU::G_AMDGPU_WAVE_ADDRESS:
3635 return selectWaveAddress(
I);
3636 case AMDGPU::G_STACKRESTORE:
3637 return selectStackRestore(
I);
3639 return selectPHI(
I);
3647AMDGPUInstructionSelector::selectVCSRC(
MachineOperand &Root)
const {
3654std::pair<Register, unsigned>
3655AMDGPUInstructionSelector::selectVOP3ModsImpl(
MachineOperand &Root,
3656 bool IsCanonicalizing,
3657 bool AllowAbs,
bool OpSel)
const {
3662 if (
MI->getOpcode() == AMDGPU::G_FNEG) {
3663 Src =
MI->getOperand(1).getReg();
3666 }
else if (
MI->getOpcode() == AMDGPU::G_FSUB && IsCanonicalizing) {
3671 if (LHS &&
LHS->isZero()) {
3673 Src =
MI->getOperand(2).getReg();
3677 if (AllowAbs &&
MI->getOpcode() == AMDGPU::G_FABS) {
3678 Src =
MI->getOperand(1).getReg();
3685 return std::pair(Src, Mods);
3688Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
3690 bool ForceVGPR)
const {
3691 if ((Mods != 0 || ForceVGPR) &&
3699 TII.get(AMDGPU::COPY), VGPRSrc)
3711AMDGPUInstructionSelector::selectVSRC0(
MachineOperand &Root)
const {
3718AMDGPUInstructionSelector::selectVOP3Mods0(
MachineOperand &Root)
const {
3721 std::tie(Src, Mods) = selectVOP3ModsImpl(Root);
3725 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
3734AMDGPUInstructionSelector::selectVOP3BMods0(
MachineOperand &Root)
const {
3737 std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
3743 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
3752AMDGPUInstructionSelector::selectVOP3OMods(
MachineOperand &Root)
const {
3761AMDGPUInstructionSelector::selectVOP3Mods(
MachineOperand &Root)
const {
3764 std::tie(Src, Mods) = selectVOP3ModsImpl(Root);
3768 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
3775AMDGPUInstructionSelector::selectVOP3ModsNonCanonicalizing(
3779 std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
false);
3783 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
3790AMDGPUInstructionSelector::selectVOP3BMods(
MachineOperand &Root)
const {
3793 std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
true,
3798 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
3805AMDGPUInstructionSelector::selectVOP3NoMods(
MachineOperand &Root)
const {
3808 if (
Def->getOpcode() == AMDGPU::G_FNEG ||
Def->getOpcode() == AMDGPU::G_FABS)
3815std::pair<Register, unsigned>
3816AMDGPUInstructionSelector::selectVOP3PModsImpl(
3821 if (
MI &&
MI->getOpcode() == AMDGPU::G_FNEG &&
3826 Src =
MI->getOperand(1).getReg();
3827 MI =
MRI.getVRegDef(Src);
3838 return std::pair(Src, Mods);
3842AMDGPUInstructionSelector::selectVOP3PMods(
MachineOperand &Root)
const {
3848 std::tie(Src, Mods) = selectVOP3PModsImpl(Root.
getReg(), MRI);
3857AMDGPUInstructionSelector::selectVOP3PModsDOT(
MachineOperand &Root)
const {
3863 std::tie(Src, Mods) = selectVOP3PModsImpl(Root.
getReg(), MRI,
true);
3872AMDGPUInstructionSelector::selectVOP3PModsNeg(
MachineOperand &Root)
const {
3877 "expected i1 value");
3887AMDGPUInstructionSelector::selectWMMAOpSelVOP3PMods(
3890 "expected i1 value");
3904 switch (Elts.
size()) {
3906 DstRegClass = &AMDGPU::VReg_256RegClass;
3909 DstRegClass = &AMDGPU::VReg_128RegClass;
3912 DstRegClass = &AMDGPU::VReg_64RegClass;
3919 auto MIB =
B.buildInstr(AMDGPU::REG_SEQUENCE)
3920 .addDef(
MRI.createVirtualRegister(DstRegClass));
3921 for (
unsigned i = 0; i < Elts.
size(); ++i) {
3932 if (ModOpcode == TargetOpcode::G_FNEG) {
3936 for (
auto El : Elts) {
3942 if (Elts.size() != NegAbsElts.
size()) {
3951 assert(ModOpcode == TargetOpcode::G_FABS);
3959AMDGPUInstructionSelector::selectWMMAModsF32NegAbs(
MachineOperand &Root)
const {
3964 if (
GBuildVector *BV = dyn_cast<GBuildVector>(
MRI->getVRegDef(Src))) {
3965 assert(BV->getNumSources() > 0);
3968 unsigned ModOpcode = (ElF32->
getOpcode() == AMDGPU::G_FNEG)
3971 for (
unsigned i = 0; i < BV->getNumSources(); ++i) {
3972 ElF32 =
MRI->getVRegDef(BV->getSourceReg(i));
3979 if (BV->getNumSources() == EltsF32.
size()) {
3990AMDGPUInstructionSelector::selectWMMAModsF16Neg(
MachineOperand &Root)
const {
3996 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
4004 if (CV->getNumSources() == EltsV2F16.
size()) {
4016AMDGPUInstructionSelector::selectWMMAModsF16NegAbs(
MachineOperand &Root)
const {
4022 assert(CV->getNumSources() > 0);
4025 unsigned ModOpcode = (ElV2F16->
getOpcode() == AMDGPU::G_FNEG)
4029 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
4030 ElV2F16 =
MRI->getVRegDef(CV->getSourceReg(i));
4037 if (CV->getNumSources() == EltsV2F16.
size()) {
4049AMDGPUInstructionSelector::selectWMMAVISrc(
MachineOperand &Root)
const {
4050 std::optional<FPValueAndVReg> FPValReg;
4054 MIB.
addImm(FPValReg->Value.bitcastToAPInt().getSExtValue());
4074AMDGPUInstructionSelector::selectSWMMACIndex8(
MachineOperand &Root)
const {
4080 std::optional<ValueAndVReg> ShiftAmt;
4082 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
4083 ShiftAmt->Value.getZExtValue() % 8 == 0) {
4084 Key = ShiftAmt->Value.getZExtValue() / 8;
4095AMDGPUInstructionSelector::selectSWMMACIndex16(
MachineOperand &Root)
const {
4102 std::optional<ValueAndVReg> ShiftAmt;
4104 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
4105 ShiftAmt->Value.getZExtValue() == 16) {
4117AMDGPUInstructionSelector::selectVOP3OpSelMods(
MachineOperand &Root)
const {
4120 std::tie(Src, Mods) = selectVOP3ModsImpl(Root);
4130AMDGPUInstructionSelector::selectVINTERPMods(
MachineOperand &Root)
const {
4133 std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
4141 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
4148AMDGPUInstructionSelector::selectVINTERPModsHi(
MachineOperand &Root)
const {
4151 std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
4159 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
4165bool AMDGPUInstructionSelector::selectSmrdOffset(
MachineOperand &Root,
4175 getAddrModeInfo(*
MI, *MRI, AddrInfo);
4177 if (AddrInfo.
empty())
4180 const GEPInfo &GEPI = AddrInfo[0];
4181 std::optional<int64_t> EncodedImm;
4186 if (GEPI.SgprParts.size() == 1 && GEPI.Imm != 0 && EncodedImm &&
4187 AddrInfo.
size() > 1) {
4188 const GEPInfo &GEPI2 = AddrInfo[1];
4189 if (GEPI2.SgprParts.size() == 2 && GEPI2.Imm == 0) {
4192 Base = GEPI2.SgprParts[0];
4193 *SOffset = OffsetReg;
4203 if (*
Offset + SKnown.getMinValue().getSExtValue() < 0)
4215 if (
Offset && GEPI.SgprParts.size() == 1 && EncodedImm) {
4216 Base = GEPI.SgprParts[0];
4222 if (SOffset && GEPI.SgprParts.size() == 1 && isUInt<32>(GEPI.Imm) &&
4228 Base = GEPI.SgprParts[0];
4229 *SOffset =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
4230 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), *SOffset)
4235 if (SOffset && GEPI.SgprParts.size() && GEPI.Imm == 0) {
4237 Base = GEPI.SgprParts[0];
4238 *SOffset = OffsetReg;
4247AMDGPUInstructionSelector::selectSmrdImm(
MachineOperand &Root)
const {
4250 if (!selectSmrdOffset(Root,
Base,
nullptr, &
Offset))
4251 return std::nullopt;
4258AMDGPUInstructionSelector::selectSmrdImm32(
MachineOperand &Root)
const {
4260 getAddrModeInfo(*Root.
getParent(), *MRI, AddrInfo);
4262 if (AddrInfo.
empty() || AddrInfo[0].SgprParts.size() != 1)
4263 return std::nullopt;
4265 const GEPInfo &GEPInfo = AddrInfo[0];
4266 Register PtrReg = GEPInfo.SgprParts[0];
4267 std::optional<int64_t> EncodedImm =
4270 return std::nullopt;
4279AMDGPUInstructionSelector::selectSmrdSgpr(
MachineOperand &Root)
const {
4281 if (!selectSmrdOffset(Root,
Base, &SOffset,
nullptr))
4282 return std::nullopt;
4289AMDGPUInstructionSelector::selectSmrdSgprImm(
MachineOperand &Root)
const {
4292 if (!selectSmrdOffset(Root,
Base, &SOffset, &
Offset))
4293 return std::nullopt;
4300std::pair<Register, int>
4301AMDGPUInstructionSelector::selectFlatOffsetImpl(
MachineOperand &Root,
4311 int64_t ConstOffset;
4312 std::tie(PtrBase, ConstOffset) =
4313 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
4316 !isFlatScratchBaseLegal(Root.
getReg())))
4323 return std::pair(PtrBase, ConstOffset);
4327AMDGPUInstructionSelector::selectFlatOffset(
MachineOperand &Root)
const {
4337AMDGPUInstructionSelector::selectGlobalOffset(
MachineOperand &Root)
const {
4347AMDGPUInstructionSelector::selectScratchOffset(
MachineOperand &Root)
const {
4358AMDGPUInstructionSelector::selectGlobalSAddr(
MachineOperand &Root)
const {
4361 int64_t ConstOffset;
4362 int64_t ImmOffset = 0;
4366 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(
Addr, *MRI);
4368 if (ConstOffset != 0) {
4372 ImmOffset = ConstOffset;
4375 if (isSGPR(PtrBaseDef->Reg)) {
4376 if (ConstOffset > 0) {
4382 int64_t SplitImmOffset, RemainderOffset;
4386 if (isUInt<32>(RemainderOffset)) {
4390 MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4392 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
4394 .
addImm(RemainderOffset);
4411 unsigned NumLiterals =
4415 return std::nullopt;
4422 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
4427 if (isSGPR(SAddr)) {
4428 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
4448 if (AddrDef->MI->getOpcode() == AMDGPU::G_IMPLICIT_DEF ||
4449 AddrDef->MI->getOpcode() == AMDGPU::G_CONSTANT || !isSGPR(AddrDef->Reg))
4450 return std::nullopt;
4456 Register VOffset =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4458 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32), VOffset)
4469AMDGPUInstructionSelector::selectScratchSAddr(
MachineOperand &Root)
const {
4472 int64_t ConstOffset;
4473 int64_t ImmOffset = 0;
4477 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(
Addr, *MRI);
4479 if (ConstOffset != 0 && isFlatScratchBaseLegal(
Addr) &&
4483 ImmOffset = ConstOffset;
4487 if (AddrDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
4488 int FI = AddrDef->MI->getOperand(1).getIndex();
4497 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
4498 Register LHS = AddrDef->MI->getOperand(1).getReg();
4499 Register RHS = AddrDef->MI->getOperand(2).getReg();
4503 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX &&
4504 isSGPR(RHSDef->Reg)) {
4505 int FI = LHSDef->MI->getOperand(1).getIndex();
4509 SAddr =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
4511 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_I32), SAddr)
4519 return std::nullopt;
4528bool AMDGPUInstructionSelector::checkFlatScratchSVSSwizzleBug(
4540 uint64_t VMax = VKnown.getMaxValue().getZExtValue();
4542 return (VMax & 3) + (
SMax & 3) >= 4;
4546AMDGPUInstructionSelector::selectScratchSVAddr(
MachineOperand &Root)
const {
4549 int64_t ConstOffset;
4550 int64_t ImmOffset = 0;
4554 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(
Addr, *MRI);
4557 if (ConstOffset != 0 &&
4560 ImmOffset = ConstOffset;
4564 if (AddrDef->MI->getOpcode() != AMDGPU::G_PTR_ADD)
4565 return std::nullopt;
4567 Register RHS = AddrDef->MI->getOperand(2).getReg();
4569 return std::nullopt;
4571 Register LHS = AddrDef->MI->getOperand(1).getReg();
4574 if (OrigAddr !=
Addr) {
4575 if (!isFlatScratchBaseLegalSVImm(OrigAddr))
4576 return std::nullopt;
4578 if (!isFlatScratchBaseLegalSV(OrigAddr))
4579 return std::nullopt;
4582 if (checkFlatScratchSVSSwizzleBug(RHS, LHS, ImmOffset))
4583 return std::nullopt;
4585 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
4586 int FI = LHSDef->MI->getOperand(1).getIndex();
4595 return std::nullopt;
4605AMDGPUInstructionSelector::selectMUBUFScratchOffen(
MachineOperand &Root)
const {
4614 Register HighBits =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4619 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
4643 std::optional<int> FI;
4647 int64_t ConstOffset;
4648 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(VAddr, *MRI);
4649 if (ConstOffset != 0) {
4654 if (PtrBaseDef->
getOpcode() == AMDGPU::G_FRAME_INDEX)
4660 }
else if (RootDef->getOpcode() == AMDGPU::G_FRAME_INDEX) {
4661 FI = RootDef->getOperand(1).getIndex();
4684bool AMDGPUInstructionSelector::isDSOffsetLegal(
Register Base,
4697bool AMDGPUInstructionSelector::isDSOffset2Legal(
Register Base, int64_t Offset0,
4699 unsigned Size)
const {
4700 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
4702 if (!isUInt<8>(Offset0 /
Size) || !isUInt<8>(Offset1 /
Size))
4715 return Addr->getOpcode() == TargetOpcode::G_OR ||
4716 (
Addr->getOpcode() == TargetOpcode::G_PTR_ADD &&
4723bool AMDGPUInstructionSelector::isFlatScratchBaseLegal(
Register Addr)
const {
4737 if (AddrMI->
getOpcode() == TargetOpcode::G_PTR_ADD) {
4738 std::optional<ValueAndVReg> RhsValReg =
4744 if (RhsValReg && RhsValReg->Value.getSExtValue() < 0 &&
4745 RhsValReg->Value.getSExtValue() > -0x40000000)
4754bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSV(
Register Addr)
const {
4772bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSVImm(
4781 std::optional<DefinitionAndSourceRegister> BaseDef =
4783 std::optional<ValueAndVReg> RHSOffset =
4793 (RHSOffset->Value.getSExtValue() < 0 &&
4794 RHSOffset->Value.getSExtValue() > -0x40000000)))
4797 Register LHS = BaseDef->MI->getOperand(1).getReg();
4798 Register RHS = BaseDef->MI->getOperand(2).getReg();
4802bool AMDGPUInstructionSelector::isUnneededShiftMask(
const MachineInstr &
MI,
4803 unsigned ShAmtBits)
const {
4804 assert(
MI.getOpcode() == TargetOpcode::G_AND);
4806 std::optional<APInt>
RHS =
4811 if (
RHS->countr_one() >= ShAmtBits)
4815 return (LHSKnownZeros | *RHS).
countr_one() >= ShAmtBits;
4819AMDGPUInstructionSelector::selectMUBUFScratchOffset(
4824 std::optional<DefinitionAndSourceRegister>
Def =
4826 assert(Def &&
"this shouldn't be an optional result");
4881std::pair<Register, unsigned>
4882AMDGPUInstructionSelector::selectDS1Addr1OffsetImpl(
MachineOperand &Root)
const {
4885 return std::pair(Root.
getReg(), 0);
4887 int64_t ConstAddr = 0;
4891 std::tie(PtrBase,
Offset) =
4892 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
4895 if (isDSOffsetLegal(PtrBase,
Offset)) {
4897 return std::pair(PtrBase,
Offset);
4899 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
4908 return std::pair(Root.
getReg(), 0);
4912AMDGPUInstructionSelector::selectDS1Addr1Offset(
MachineOperand &Root)
const {
4915 std::tie(Reg,
Offset) = selectDS1Addr1OffsetImpl(Root);
4923AMDGPUInstructionSelector::selectDS64Bit4ByteAligned(
MachineOperand &Root)
const {
4924 return selectDSReadWrite2(Root, 4);
4928AMDGPUInstructionSelector::selectDS128Bit8ByteAligned(
MachineOperand &Root)
const {
4929 return selectDSReadWrite2(Root, 8);
4933AMDGPUInstructionSelector::selectDSReadWrite2(
MachineOperand &Root,
4934 unsigned Size)
const {
4937 std::tie(Reg,
Offset) = selectDSReadWrite2Impl(Root,
Size);
4945std::pair<Register, unsigned>
4946AMDGPUInstructionSelector::selectDSReadWrite2Impl(
MachineOperand &Root,
4947 unsigned Size)
const {
4950 return std::pair(Root.
getReg(), 0);
4952 int64_t ConstAddr = 0;
4956 std::tie(PtrBase,
Offset) =
4957 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
4960 int64_t OffsetValue0 =
Offset;
4962 if (isDSOffset2Legal(PtrBase, OffsetValue0, OffsetValue1,
Size)) {
4964 return std::pair(PtrBase, OffsetValue0 /
Size);
4966 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
4974 return std::pair(Root.
getReg(), 0);
4981std::pair<Register, int64_t>
4982AMDGPUInstructionSelector::getPtrBaseWithConstantOffset(
4985 if (RootI->
getOpcode() != TargetOpcode::G_PTR_ADD)
4989 std::optional<ValueAndVReg> MaybeOffset =
5005 Register RSrc2 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5006 Register RSrc3 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5007 Register RSrcHi =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
5008 Register RSrc =
MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
5010 B.buildInstr(AMDGPU::S_MOV_B32)
5013 B.buildInstr(AMDGPU::S_MOV_B32)
5020 B.buildInstr(AMDGPU::REG_SEQUENCE)
5023 .addImm(AMDGPU::sub0)
5025 .addImm(AMDGPU::sub1);
5029 RSrcLo =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
5030 B.buildInstr(AMDGPU::S_MOV_B64)
5035 B.buildInstr(AMDGPU::REG_SEQUENCE)
5038 .addImm(AMDGPU::sub0_sub1)
5040 .addImm(AMDGPU::sub2_sub3);
5047 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
5056 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
5063AMDGPUInstructionSelector::MUBUFAddressData
5064AMDGPUInstructionSelector::parseMUBUFAddress(
Register Src)
const {
5065 MUBUFAddressData
Data;
5071 std::tie(PtrBase,
Offset) = getPtrBaseWithConstantOffset(Src, *MRI);
5072 if (isUInt<32>(
Offset)) {
5079 Data.N2 = InputAdd->getOperand(1).getReg();
5080 Data.N3 = InputAdd->getOperand(2).getReg();
5095bool AMDGPUInstructionSelector::shouldUseAddr64(MUBUFAddressData
Addr)
const {
5102 return N0Bank->
getID() == AMDGPU::VGPRRegBankID;
5108void AMDGPUInstructionSelector::splitIllegalMUBUFOffset(
5114 SOffset =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5115 B.buildInstr(AMDGPU::S_MOV_B32)
5121bool AMDGPUInstructionSelector::selectMUBUFAddr64Impl(
5129 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
5130 if (!shouldUseAddr64(AddrData))
5136 Offset = AddrData.Offset;
5142 if (RBI.
getRegBank(N2, *MRI, TRI)->
getID() == AMDGPU::VGPRRegBankID) {
5144 if (RBI.
getRegBank(N3, *MRI, TRI)->
getID() == AMDGPU::VGPRRegBankID) {
5157 }
else if (RBI.
getRegBank(N0, *MRI, TRI)->
getID() == AMDGPU::VGPRRegBankID) {
5168 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
5172bool AMDGPUInstructionSelector::selectMUBUFOffsetImpl(
5180 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
5181 if (shouldUseAddr64(AddrData))
5187 Offset = AddrData.Offset;
5193 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
5198AMDGPUInstructionSelector::selectMUBUFAddr64(
MachineOperand &Root)
const {
5204 if (!selectMUBUFAddr64Impl(Root, VAddr, RSrcReg, SOffset,
Offset))
5220 MIB.
addReg(AMDGPU::SGPR_NULL);
5234AMDGPUInstructionSelector::selectMUBUFOffset(
MachineOperand &Root)
const {
5239 if (!selectMUBUFOffsetImpl(Root, RSrcReg, SOffset,
Offset))
5250 MIB.
addReg(AMDGPU::SGPR_NULL);
5262AMDGPUInstructionSelector::selectBUFSOffset(
MachineOperand &Root)
const {
5267 SOffset = AMDGPU::SGPR_NULL;
5273static std::optional<uint64_t>
5277 if (!OffsetVal || !isInt<32>(*OffsetVal))
5278 return std::nullopt;
5279 return Lo_32(*OffsetVal);
5283AMDGPUInstructionSelector::selectSMRDBufferImm(
MachineOperand &Root)
const {
5288 std::optional<int64_t> EncodedImm =
5297AMDGPUInstructionSelector::selectSMRDBufferImm32(
MachineOperand &Root)
const {
5304 std::optional<int64_t> EncodedImm =
5313AMDGPUInstructionSelector::selectSMRDBufferSgprImm(
MachineOperand &Root)
const {
5321 return std::nullopt;
5323 std::optional<int64_t> EncodedOffset =
5326 return std::nullopt;
5336 if (
MI->getOpcode() == AMDGPU::G_BITCAST)
5347 if (Inst->
getOpcode() != AMDGPU::G_TRUNC)
5355 if (TruncOp->
getOpcode() == AMDGPU::G_LSHR) {
5358 if (SrlAmount && SrlAmount->Value.getZExtValue() == 16) {
5369 if (TruncOp->
getOpcode() == AMDGPU::G_SHUFFLE_VECTOR) {
5374 assert(Mask.size() == 2);
5376 if (Mask[0] == 1 && Mask[1] <= 1) {
5386std::pair<Register, unsigned>
5387AMDGPUInstructionSelector::selectVOP3PMadMixModsImpl(
MachineOperand &Root,
5388 bool &Matched)
const {
5393 std::tie(Src, Mods) = selectVOP3ModsImpl(Root);
5396 if (
MI->getOpcode() == AMDGPU::G_FPEXT) {
5405 if (
MI->getOpcode() == AMDGPU::G_BITCAST) {
5406 MO = &
MI->getOperand(1);
5411 const auto CheckAbsNeg = [&]() {
5416 std::tie(Src, ModsTmp) = selectVOP3ModsImpl(*MO);
5438 MI = ExtractHiEltMI;
5439 MO = &
MI->getOperand(0);
5452AMDGPUInstructionSelector::selectVOP3PMadMixModsExt(
5457 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
5468AMDGPUInstructionSelector::selectVOP3PMadMixMods(
MachineOperand &Root)
const {
5472 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
5480bool AMDGPUInstructionSelector::selectSBarrierSignalIsfirst(
5484 Register CCReg =
I.getOperand(0).getReg();
5486 bool HasM0 = IntrID == Intrinsic::amdgcn_s_barrier_signal_isfirst_var;
5489 auto CopyMIB =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
5490 .
addReg(
I.getOperand(2).getReg());
5491 BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_ISFIRST_M0));
5495 BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM))
5496 .
addImm(
I.getOperand(2).getImm());
5501 I.eraseFromParent();
5507 if (HasInlineConst) {
5511 case Intrinsic::amdgcn_s_barrier_init:
5512 return AMDGPU::S_BARRIER_INIT_IMM;
5513 case Intrinsic::amdgcn_s_barrier_join:
5514 return AMDGPU::S_BARRIER_JOIN_IMM;
5515 case Intrinsic::amdgcn_s_wakeup_barrier:
5516 return AMDGPU::S_WAKEUP_BARRIER_IMM;
5517 case Intrinsic::amdgcn_s_get_barrier_state:
5518 return AMDGPU::S_GET_BARRIER_STATE_IMM;
5524 case Intrinsic::amdgcn_s_barrier_init:
5525 return AMDGPU::S_BARRIER_INIT_M0;
5526 case Intrinsic::amdgcn_s_barrier_join:
5527 return AMDGPU::S_BARRIER_JOIN_M0;
5528 case Intrinsic::amdgcn_s_wakeup_barrier:
5529 return AMDGPU::S_WAKEUP_BARRIER_M0;
5530 case Intrinsic::amdgcn_s_get_barrier_state:
5531 return AMDGPU::S_GET_BARRIER_STATE_M0;
5536bool AMDGPUInstructionSelector::selectNamedBarrierInst(
5540 MachineOperand BarOp = IntrID == Intrinsic::amdgcn_s_get_barrier_state
5543 std::optional<int64_t> BarValImm =
5549 if (IntrID == Intrinsic::amdgcn_s_barrier_init) {
5550 Register MemberCount =
I.getOperand(2).getReg();
5551 TmpReg0 =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5562 if (IntrID == Intrinsic::amdgcn_s_barrier_init) {
5566 Register TmpReg1 =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5587 if (IntrID == Intrinsic::amdgcn_s_get_barrier_state)
5588 MIB.
addDef(
I.getOperand(0).getReg());
5593 I.eraseFromParent();
5597bool AMDGPUInstructionSelector::selectSBarrierLeave(
MachineInstr &
I)
const {
5600 Register CCReg =
I.getOperand(0).getReg();
5602 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_BARRIER_LEAVE));
5605 I.eraseFromParent();
5613 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5614 "Expected G_CONSTANT");
5615 MIB.
addImm(
MI.getOperand(1).getCImm()->getSExtValue());
5621 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5622 "Expected G_CONSTANT");
5623 MIB.
addImm(-
MI.getOperand(1).getCImm()->getSExtValue());
5632 if (
MI.getOpcode() == TargetOpcode::G_FCONSTANT)
5633 MIB.
addImm(
Op.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
5635 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
"Expected G_CONSTANT");
5636 MIB.
addImm(
Op.getCImm()->getSExtValue());
5643 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5644 "Expected G_CONSTANT");
5645 MIB.
addImm(
MI.getOperand(1).getCImm()->getValue().popcount());
5653 MIB.
addImm(
MI.getOperand(OpIdx).getImm());
5659 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
5666 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
5667 MIB.
addImm(
MI.getOperand(OpIdx).getImm() &
5675 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
5676 const bool Swizzle =
MI.getOperand(OpIdx).getImm() &
5682void AMDGPUInstructionSelector::renderExtractCpolSetGLC(
5684 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
5685 const uint32_t Cpol =
MI.getOperand(OpIdx).getImm() &
5700 const APFloat &APF =
MI.getOperand(1).getFPImm()->getValueAPF();
5702 assert(ExpVal != INT_MIN);
5706bool AMDGPUInstructionSelector::isInlineImmediate(
const APInt &Imm)
const {
5710bool AMDGPUInstructionSelector::isInlineImmediate(
const APFloat &Imm)
const {
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
static unsigned getIntrinsicID(const SDNode *N)
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static bool isNoUnsignedWrap(MachineInstr *Addr)
static Register buildOffsetSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
unsigned getNamedBarrierOp(bool HasInlineConst, Intrinsic::ID IntrID)
#define GET_GLOBALISEL_PREDICATES_INIT
#define GET_GLOBALISEL_TEMPORARIES_INIT
static Register getWaveAddress(const MachineInstr *Def)
static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)
static bool shouldUseAndMask(unsigned Size, unsigned &Mask)
static bool parseTexFail(uint64_t TexFailCtrl, bool &TFE, bool &LWE, bool &IsTexFail)
static std::pair< Register, unsigned > computeIndirectRegIndex(MachineRegisterInfo &MRI, const SIRegisterInfo &TRI, const TargetRegisterClass *SuperRC, Register IdxReg, unsigned EltSize, GISelKnownBits &KnownBits)
Return the register to use for the index value, and the subregister to use for the indirectly accesse...
static void addZeroImm(MachineInstrBuilder &MIB)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
static bool isConstant(const MachineInstr &MI)
static int sizeToSubRegIndex(unsigned Size)
static Register buildRegSequence(SmallVectorImpl< Register > &Elts, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
static Register buildRSRC(MachineIRBuilder &B, MachineRegisterInfo &MRI, uint32_t FormatLo, uint32_t FormatHi, Register BasePtr)
Return a resource descriptor for use with an arbitrary 64-bit pointer.
static Register matchZeroExtendFromS32(MachineRegisterInfo &MRI, Register Reg)
Match a zero extend from a 32-bit value to 64-bits.
static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64)
static std::optional< uint64_t > getConstantZext32Val(Register Reg, const MachineRegisterInfo &MRI)
Get an immediate that must be 32-bits, and treated as zero extended.
static MachineInstr * stripBitCast(MachineInstr *MI, MachineRegisterInfo &MRI)
static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size, const GCNSubtarget &ST)
static bool isVCmpResult(Register Reg, MachineRegisterInfo &MRI)
static Register buildAddr64RSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, SmallVectorImpl< Register > &Elts, Register &Src, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
This file declares the targeting of the InstructionSelector class for AMDGPU.
amdgpu AMDGPU Register Bank Select
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
static Error getAddrSpace(StringRef R, unsigned &AddrSpace)
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
const char LLVMTargetMachineRef TM
static std::vector< std::pair< int, unsigned > > Swizzle(std::vector< std::pair< int, unsigned > > Src, R600InstrInfo::BankSwizzle Swz)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
AMDGPUInstructionSelector(const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI, const AMDGPUTargetMachine &TM)
static const char * getName()
bool select(MachineInstr &I) override
Select the (possibly generic) instruction I to only use target-specific opcodes.
void setupMF(MachineFunction &MF, GISelKnownBits *KB, CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) override
Setup per-MF executor state.
uint32_t getLDSSize() const
bool isEntryFunction() const
const RegisterBank & getRegBankFromRegClass(const TargetRegisterClass &RC, LLT) const override
Get a register bank that covers RC.
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
unsigned getWavefrontSizeLog2() const
unsigned getWavefrontSize() const
bool hasInv2PiInlineImm() const
static int64_t getNullPointerValue(unsigned AddrSpace)
Get the integer value of a null pointer in the given address space.
LLVM_READONLY int getExactLog2Abs() const
APInt bitcastToAPInt() const
Class for arbitrary precision integers.
APInt zext(unsigned width) const
Zero extend to a new width.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
unsigned countr_one() const
Count the number of trailing one bits.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
bool isFPPredicate() const
bool isIntPredicate() const
ConstantFP - Floating Point Values [float, double].
const APFloat & getValueAPF() const
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
This class represents an Operation in the Expression.
Diagnostic information for unsupported feature in backend.
Represents a G_BUILD_VECTOR.
bool useVGPRIndexMode() const
bool hasScalarCompareEq64() const
int getLDSBankCount() const
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
bool unsafeDSOffsetFoldingEnabled() const
bool hasFlatInstOffsets() const
bool hasCompressedExport() const
Return true if the target's EXP instruction has the COMPR flag, which affects the meaning of the EN (...
bool hasGFX90AInsts() const
unsigned getConstantBusLimit(unsigned Opcode) const
bool hasMADIntraFwdBug() const
bool privateMemoryResourceIsRangeChecked() const
bool hasSignedScratchOffsets() const
bool hasRestrictedSOffset() const
const SITargetLowering * getTargetLowering() const override
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
bool hasSPackHL() const
Return true if the target has the S_PACK_HL_B32_B16 instruction.
bool hasFlatScratchSVSSwizzleBug() const
bool useFlatForGlobal() const
Generation getGeneration() const
bool hasSplitBarriers() const
bool hasUnpackedD16VMem() const
bool hasGWSSemaReleaseAll() const
bool hasAddNoCarry() const
bool hasSALUFloatInsts() const
bool hasPartialNSAEncoding() const
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
Represents a G_CONCAT_VECTORS.
std::optional< SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > > ComplexRendererFns
virtual void setupMF(MachineFunction &mf, GISelKnownBits *kb, CodeGenCoverage *covinfo=nullptr, ProfileSummaryInfo *psi=nullptr, BlockFrequencyInfo *bfi=nullptr)
Setup per-MF executor state.
CodeGenCoverage * CoverageInfo
APInt getKnownOnes(Register R)
KnownBits getKnownBits(Register R)
bool signBitIsZero(Register Op)
APInt getKnownZeroes(Register R)
Module * getParent()
Get the module that this global value is contained inside of...
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr bool isVector() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Describe properties that are true of each instruction in the target description file.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Helper class to build MachineInstr.
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & setOperandDead(unsigned OpIdx) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
unsigned getNumOperands() const
Retuns the total number of operands.
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
unsigned getAddrSpace() const
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const ConstantInt * getCImm() const
bool isCImm() const
isCImm - Test if this is a MO_CImmediate operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
ArrayRef< int > getShuffleMask() const
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
bool isInternalRead() const
const ConstantFP * getFPImm() const
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
bool isFPImm() const
isFPImm - Tests if this is a MO_FPImmediate operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
A Module instance is used to store all the information related to an LLVM module.
T get() const
Returns the value of the specified pointer type.
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwise returns null.
Analysis providing profile information.
static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
TypeSize getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
bool isLegalMUBUFImmOffset(unsigned Imm) const
bool isInlineConstant(const APInt &Imm) const
const MCInstrDesc & getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, bool IsSGPR) const
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
const MCInstrDesc & getIndirectGPRIDXPseudo(unsigned VecSize, bool IsIndirectSrc) const
std::pair< int64_t, int64_t > splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, uint64_t FlatVariant) const
Split COffsetVal into {immediate offset field, remainder offset} values.
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, uint64_t FlatVariant) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction.
void enforceOperandRCAlignment(MachineInstr &MI, unsigned OpName) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
MCRegister getReturnAddressReg(const MachineFunction &MF) const
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
const TargetRegisterClass * getRegClassForSizeOnBank(unsigned Size, const RegisterBank &Bank) const
const TargetRegisterClass * getConstrainedRegClassForOperand(const MachineOperand &MO, const MachineRegisterInfo &MRI) const override
const TargetRegisterClass * getRegClassForTypeOnBank(LLT Ty, const RegisterBank &Bank) const
const TargetRegisterClass * getBoolRC() const
const TargetRegisterClass * getWaveMaskRegClass() const
static bool isSGPRClass(const TargetRegisterClass *RC)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
const Triple & getTargetTriple() const
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
OSType getOS() const
Get the parsed operating system type of this triple.
static IntegerType * getInt32Ty(LLVMContext &C)
LLVM Value Representation.
Value(Type *Ty, unsigned scid)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
LLVM_READONLY const MIMGG16MappingInfo * getMIMGG16MappingInfo(unsigned G)
LLVM_READONLY int getGlobalSaddrOp(uint16_t Opcode)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfo(unsigned DimEnum)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
Intrinsic::ID getIntrinsicID(const MachineInstr &I)
Return the intrinsic ID for opcodes with the G_AMDGPU_INTRIN_ prefix.
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg, GISelKnownBits *KnownBits=nullptr, bool CheckNUW=false)
Returns base register and constant offset.
IndexMode
ARM Index Modes.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
operand_type_match m_Reg()
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
UnaryOp_match< SrcTy, TargetOpcode::COPY > m_Copy(SrcTy &&Src)
SpecificConstantMatch m_SpecificICst(int64_t RequestedValue)
Matches a constant equal to RequestedValue.
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
UnaryOp_match< SrcTy, TargetOpcode::G_FABS > m_GFabs(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
OneUse_match< T > m_OneUse(const T &SubPattern)
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
int popcount(T Value) noexcept
Count the number of set bits in a value.
const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
unsigned getUndefRegState(bool B)
@ SMax
Signed integer max implemented in terms of select(cmp()).
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
@ Default
The result values are uniform if and only if all operands are uniform.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
static KnownBits computeForAddSub(bool Add, bool NSW, bool NUW, const KnownBits &LHS, const KnownBits &RHS)
Compute known bits resulting from adding LHS and RHS.
This class contains a discriminated union of information about pointers in memory operands,...
int64_t Offset
Offset - This is an offset from the base Value*.
PointerUnion< const Value *, const PseudoSourceValue * > V
This is the IR pointer value for the access, or it is null if unknown.