29#include "llvm/IR/IntrinsicsAMDGPU.h"
32#define DEBUG_TYPE "amdgpu-isel"
37#define GET_GLOBALISEL_IMPL
38#define AMDGPUSubtarget GCNSubtarget
39#include "AMDGPUGenGlobalISel.inc"
40#undef GET_GLOBALISEL_IMPL
46 : TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM),
49#include
"AMDGPUGenGlobalISel.inc"
52#include
"AMDGPUGenGlobalISel.inc"
64 MRI = &
MF.getRegInfo();
72 return Def->getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS
73 ? Def->getOperand(1).getReg()
83 auto &RegClassOrBank = MRI.getRegClassOrRegBank(
Reg);
84 const TargetRegisterClass *RC =
87 const LLT Ty = MRI.getType(
Reg);
91 return MRI.getVRegDef(
Reg)->getOpcode() != AMDGPU::G_TRUNC &&
96 return RB->
getID() == AMDGPU::VCCRegBankID;
99bool AMDGPUInstructionSelector::constrainCopyLikeIntrin(
MachineInstr &
MI,
100 unsigned NewOpc)
const {
101 MI.setDesc(TII.get(NewOpc));
105 MachineOperand &Dst =
MI.getOperand(0);
106 MachineOperand &Src =
MI.getOperand(1);
112 const TargetRegisterClass *DstRC
113 = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
114 const TargetRegisterClass *SrcRC
115 = TRI.getConstrainedRegClassForOperand(Src, *MRI);
116 if (!DstRC || DstRC != SrcRC)
119 if (!RBI.constrainGenericRegister(Dst.getReg(), *DstRC, *MRI) ||
120 !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, *MRI))
122 const MCInstrDesc &MCID =
MI.getDesc();
124 MI.getOperand(0).setIsEarlyClobber(
true);
129bool AMDGPUInstructionSelector::selectCOPY(
MachineInstr &
I)
const {
132 I.setDesc(TII.get(TargetOpcode::COPY));
134 const MachineOperand &Src =
I.getOperand(1);
135 MachineOperand &Dst =
I.getOperand(0);
139 if (isVCC(DstReg, *MRI)) {
140 if (SrcReg == AMDGPU::SCC) {
141 const TargetRegisterClass *RC
142 = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
145 return RBI.constrainGenericRegister(DstReg, *RC, *MRI);
148 if (!isVCC(SrcReg, *MRI)) {
150 if (!RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), *MRI))
153 const TargetRegisterClass *SrcRC
154 = TRI.getConstrainedRegClassForOperand(Src, *MRI);
156 std::optional<ValueAndVReg> ConstVal =
160 STI.isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
162 .
addImm(ConstVal->Value.getBoolValue() ? -1 : 0);
164 Register MaskedReg = MRI->createVirtualRegister(SrcRC);
171 assert(Subtarget->useRealTrue16Insts());
172 const int64_t NoMods = 0;
173 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_AND_B16_t16_e64), MaskedReg)
179 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CMP_NE_U16_t16_e64), DstReg)
186 bool IsSGPR = TRI.isSGPRClass(SrcRC);
187 unsigned AndOpc = IsSGPR ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32;
194 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
200 if (!MRI->getRegClassOrNull(SrcReg))
201 MRI->setRegClass(SrcReg, SrcRC);
206 const TargetRegisterClass *RC =
207 TRI.getConstrainedRegClassForOperand(Dst, *MRI);
208 if (RC && !RBI.constrainGenericRegister(DstReg, *RC, *MRI))
214 for (
const MachineOperand &MO :
I.operands()) {
215 if (MO.getReg().isPhysical())
218 const TargetRegisterClass *RC =
219 TRI.getConstrainedRegClassForOperand(MO, *MRI);
222 RBI.constrainGenericRegister(MO.getReg(), *RC, *MRI);
227bool AMDGPUInstructionSelector::selectCOPY_SCC_VCC(
MachineInstr &
I)
const {
230 Register VCCReg =
I.getOperand(1).getReg();
234 if (STI.hasScalarCompareEq64()) {
236 STI.isWave64() ? AMDGPU::S_CMP_LG_U64 : AMDGPU::S_CMP_LG_U32;
239 Register DeadDst = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
240 Cmp =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_OR_B64), DeadDst)
247 Register DstReg =
I.getOperand(0).getReg();
251 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
254bool AMDGPUInstructionSelector::selectCOPY_VCC_SCC(
MachineInstr &
I)
const {
258 Register DstReg =
I.getOperand(0).getReg();
259 Register SrcReg =
I.getOperand(1).getReg();
260 std::optional<ValueAndVReg> Arg =
264 const int64_t
Value = Arg->Value.getZExtValue();
266 unsigned Opcode = STI.isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
273 return RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), *MRI);
279 unsigned SelectOpcode =
280 STI.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
290bool AMDGPUInstructionSelector::selectReadAnyLane(
MachineInstr &
I)
const {
291 Register DstReg =
I.getOperand(0).getReg();
292 Register SrcReg =
I.getOperand(1).getReg();
297 auto RFL =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
305bool AMDGPUInstructionSelector::selectPHI(
MachineInstr &
I)
const {
306 const Register DefReg =
I.getOperand(0).getReg();
307 const LLT DefTy = MRI->getType(DefReg);
319 MRI->getRegClassOrRegBank(DefReg);
321 const TargetRegisterClass *DefRC =
330 DefRC = TRI.getRegClassForTypeOnBank(DefTy, RB);
339 for (
unsigned i = 1; i !=
I.getNumOperands(); i += 2) {
340 const Register SrcReg =
I.getOperand(i).getReg();
342 const RegisterBank *RB = MRI->getRegBankOrNull(SrcReg);
344 const LLT SrcTy = MRI->getType(SrcReg);
345 const TargetRegisterClass *SrcRC =
346 TRI.getRegClassForTypeOnBank(SrcTy, *RB);
347 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
352 I.setDesc(TII.get(TargetOpcode::PHI));
353 return RBI.constrainGenericRegister(DefReg, *DefRC, *MRI);
359 unsigned SubIdx)
const {
363 Register DstReg = MRI->createVirtualRegister(&SubRC);
366 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.
getSubReg(), SubIdx);
368 BuildMI(*BB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
394 return Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
396 return Is64 ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32;
398 return Is64 ? AMDGPU::S_XOR_B64 : AMDGPU::S_XOR_B32;
404bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(
MachineInstr &
I)
const {
405 Register DstReg =
I.getOperand(0).getReg();
406 unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI);
408 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
409 if (DstRB->
getID() != AMDGPU::SGPRRegBankID &&
410 DstRB->
getID() != AMDGPU::VCCRegBankID)
413 bool Is64 =
Size > 32 || (DstRB->
getID() == AMDGPU::VCCRegBankID &&
426bool AMDGPUInstructionSelector::selectG_ADD_SUB(
MachineInstr &
I)
const {
429 Register DstReg =
I.getOperand(0).getReg();
431 LLT Ty = MRI->getType(DstReg);
436 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
437 const bool IsSALU = DstRB->
getID() == AMDGPU::SGPRRegBankID;
438 const bool Sub =
I.getOpcode() == TargetOpcode::G_SUB;
442 const unsigned Opc =
Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
445 .
add(
I.getOperand(1))
446 .
add(
I.getOperand(2))
453 if (STI.hasAddNoCarryInsts()) {
454 const unsigned Opc =
Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64;
455 I.setDesc(TII.get(
Opc));
462 const unsigned Opc =
Sub ? AMDGPU::V_SUB_CO_U32_e64 : AMDGPU::V_ADD_CO_U32_e64;
464 Register UnusedCarry = MRI->createVirtualRegister(TRI.getWaveMaskRegClass());
468 .
add(
I.getOperand(1))
469 .
add(
I.getOperand(2))
476 assert(!
Sub &&
"illegal sub should not reach here");
478 const TargetRegisterClass &RC
479 = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass;
480 const TargetRegisterClass &HalfRC
481 = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass;
483 MachineOperand Lo1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub0));
484 MachineOperand Lo2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub0));
485 MachineOperand Hi1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub1));
486 MachineOperand Hi2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub1));
488 Register DstLo = MRI->createVirtualRegister(&HalfRC);
489 Register DstHi = MRI->createVirtualRegister(&HalfRC);
492 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
495 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
500 const TargetRegisterClass *CarryRC = TRI.getWaveMaskRegClass();
501 Register CarryReg = MRI->createVirtualRegister(CarryRC);
502 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_ADD_CO_U32_e64), DstLo)
507 MachineInstr *Addc =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_ADDC_U32_e64), DstHi)
517 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
524 if (!RBI.constrainGenericRegister(DstReg, RC, *MRI))
531bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE(
536 Register Dst0Reg =
I.getOperand(0).getReg();
537 Register Dst1Reg =
I.getOperand(1).getReg();
538 const bool IsAdd =
I.getOpcode() == AMDGPU::G_UADDO ||
539 I.getOpcode() == AMDGPU::G_UADDE;
540 const bool HasCarryIn =
I.getOpcode() == AMDGPU::G_UADDE ||
541 I.getOpcode() == AMDGPU::G_USUBE;
543 if (isVCC(Dst1Reg, *MRI)) {
544 unsigned NoCarryOpc =
545 IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
546 unsigned CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
547 I.setDesc(TII.get(HasCarryIn ? CarryOpc : NoCarryOpc));
554 Register Src0Reg =
I.getOperand(2).getReg();
555 Register Src1Reg =
I.getOperand(3).getReg();
558 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
559 .
addReg(
I.getOperand(4).getReg());
562 unsigned NoCarryOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
563 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
565 auto CarryInst =
BuildMI(*BB, &
I,
DL, TII.get(HasCarryIn ? CarryOpc : NoCarryOpc), Dst0Reg)
566 .
add(
I.getOperand(2))
567 .
add(
I.getOperand(3));
569 if (MRI->use_nodbg_empty(Dst1Reg)) {
570 CarryInst.setOperandDead(3);
572 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), Dst1Reg)
574 if (!MRI->getRegClassOrNull(Dst1Reg))
575 MRI->setRegClass(Dst1Reg, &AMDGPU::SReg_32RegClass);
578 if (!RBI.constrainGenericRegister(Dst0Reg, AMDGPU::SReg_32RegClass, *MRI) ||
579 !RBI.constrainGenericRegister(Src0Reg, AMDGPU::SReg_32RegClass, *MRI) ||
580 !RBI.constrainGenericRegister(Src1Reg, AMDGPU::SReg_32RegClass, *MRI))
584 !RBI.constrainGenericRegister(
I.getOperand(4).getReg(),
585 AMDGPU::SReg_32RegClass, *MRI))
592bool AMDGPUInstructionSelector::selectG_AMDGPU_MAD_64_32(
596 const bool IsUnsigned =
I.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;
597 bool UseNoCarry = Subtarget->hasMadU64U32NoCarry() &&
598 MRI->use_nodbg_empty(
I.getOperand(1).getReg());
601 if (Subtarget->hasMADIntraFwdBug())
602 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_gfx11_e64
603 : AMDGPU::V_MAD_I64_I32_gfx11_e64;
605 Opc = IsUnsigned ? AMDGPU::V_MAD_NC_U64_U32_e64
606 : AMDGPU::V_MAD_NC_I64_I32_e64;
608 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_e64 : AMDGPU::V_MAD_I64_I32_e64;
613 I.setDesc(TII.get(
Opc));
615 I.addImplicitDefUseOperands(*
MF);
616 I.getOperand(0).setIsEarlyClobber(
true);
622bool AMDGPUInstructionSelector::selectG_EXTRACT(
MachineInstr &
I)
const {
624 Register DstReg =
I.getOperand(0).getReg();
625 Register SrcReg =
I.getOperand(1).getReg();
626 LLT DstTy = MRI->getType(DstReg);
627 LLT SrcTy = MRI->getType(SrcReg);
632 unsigned Offset =
I.getOperand(2).getImm();
633 if (
Offset % 32 != 0 || DstSize > 128)
641 const TargetRegisterClass *DstRC =
642 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
643 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
646 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI);
647 const TargetRegisterClass *SrcRC =
648 TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank);
653 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubReg);
658 *SrcRC,
I.getOperand(1));
660 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::COPY), DstReg)
661 .
addReg(SrcReg, {}, SubReg);
667bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(
MachineInstr &
MI)
const {
668 MachineBasicBlock *BB =
MI.getParent();
670 LLT DstTy = MRI->getType(DstReg);
671 LLT SrcTy = MRI->getType(
MI.getOperand(1).getReg());
678 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
680 const TargetRegisterClass *DstRC =
681 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
685 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(DstRC, SrcSize / 8);
686 MachineInstrBuilder MIB =
687 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg);
688 for (
int I = 0,
E =
MI.getNumOperands() - 1;
I !=
E; ++
I) {
689 MachineOperand &Src =
MI.getOperand(
I + 1);
693 const TargetRegisterClass *SrcRC
694 = TRI.getConstrainedRegClassForOperand(Src, *MRI);
695 if (SrcRC && !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, *MRI))
699 if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
702 MI.eraseFromParent();
706bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(
MachineInstr &
MI)
const {
707 MachineBasicBlock *BB =
MI.getParent();
708 const int NumDst =
MI.getNumOperands() - 1;
710 MachineOperand &Src =
MI.getOperand(NumDst);
714 LLT DstTy = MRI->getType(DstReg0);
715 LLT SrcTy = MRI->getType(SrcReg);
720 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI);
722 const TargetRegisterClass *SrcRC =
723 TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank);
724 if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
730 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(SrcRC, DstSize / 8);
731 for (
int I = 0,
E = NumDst;
I !=
E; ++
I) {
732 MachineOperand &Dst =
MI.getOperand(
I);
733 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::COPY), Dst.getReg())
734 .
addReg(SrcReg, {}, SubRegs[
I]);
737 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegs[
I]);
738 if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
741 const TargetRegisterClass *DstRC =
742 TRI.getConstrainedRegClassForOperand(Dst, *MRI);
743 if (DstRC && !RBI.constrainGenericRegister(Dst.getReg(), *DstRC, *MRI))
747 MI.eraseFromParent();
751bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR(
MachineInstr &
MI)
const {
752 assert(
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC ||
753 MI.getOpcode() == AMDGPU::G_BUILD_VECTOR);
757 LLT SrcTy = MRI->getType(Src0);
761 if (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR && SrcSize >= 32) {
762 return selectG_MERGE_VALUES(
MI);
769 (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC &&
773 const RegisterBank *DstBank = RBI.getRegBank(Dst, *MRI, TRI);
774 if (DstBank->
getID() == AMDGPU::AGPRRegBankID)
777 assert(DstBank->
getID() == AMDGPU::SGPRRegBankID ||
778 DstBank->
getID() == AMDGPU::VGPRRegBankID);
779 const bool IsVector = DstBank->
getID() == AMDGPU::VGPRRegBankID;
782 MachineBasicBlock *BB =
MI.getParent();
792 const int64_t K0 = ConstSrc0->Value.getSExtValue();
793 const int64_t K1 = ConstSrc1->Value.getSExtValue();
794 uint32_t Lo16 =
static_cast<uint32_t
>(K0) & 0xffff;
795 uint32_t Hi16 =
static_cast<uint32_t
>(K1) & 0xffff;
796 uint32_t
Imm = Lo16 | (Hi16 << 16);
801 MI.eraseFromParent();
802 return RBI.constrainGenericRegister(Dst, AMDGPU::VGPR_32RegClass, *MRI);
807 MI.eraseFromParent();
808 return RBI.constrainGenericRegister(Dst, AMDGPU::SReg_32RegClass, *MRI);
819 if (Src1Def->
getOpcode() == AMDGPU::G_IMPLICIT_DEF) {
820 MI.setDesc(TII.get(AMDGPU::COPY));
823 IsVector ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
824 return RBI.constrainGenericRegister(Dst, RC, *MRI) &&
825 RBI.constrainGenericRegister(Src0, RC, *MRI);
830 Register TmpReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
831 auto MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_AND_B32_e32), TmpReg)
836 MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_LSHL_OR_B32_e64), Dst)
842 MI.eraseFromParent();
867 unsigned Opc = AMDGPU::S_PACK_LL_B32_B16;
868 if (Shift0 && Shift1) {
869 Opc = AMDGPU::S_PACK_HH_B32_B16;
870 MI.getOperand(1).setReg(ShiftSrc0);
871 MI.getOperand(2).setReg(ShiftSrc1);
873 Opc = AMDGPU::S_PACK_LH_B32_B16;
874 MI.getOperand(2).setReg(ShiftSrc1);
878 if (ConstSrc1 && ConstSrc1->Value == 0) {
880 auto MIB =
BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_LSHR_B32), Dst)
885 MI.eraseFromParent();
889 if (STI.hasSPackHL()) {
890 Opc = AMDGPU::S_PACK_HL_B32_B16;
891 MI.getOperand(1).setReg(ShiftSrc0);
895 MI.setDesc(TII.get(
Opc));
900bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(
MachineInstr &
I)
const {
901 const MachineOperand &MO =
I.getOperand(0);
905 const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, *MRI);
906 if ((!RC && !MRI->getRegBankOrNull(MO.
getReg())) ||
907 (RC && RBI.constrainGenericRegister(MO.
getReg(), *RC, *MRI))) {
908 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
915bool AMDGPUInstructionSelector::selectG_INSERT(
MachineInstr &
I)
const {
918 Register DstReg =
I.getOperand(0).getReg();
919 Register Src0Reg =
I.getOperand(1).getReg();
920 Register Src1Reg =
I.getOperand(2).getReg();
921 LLT Src1Ty = MRI->getType(Src1Reg);
923 unsigned DstSize = MRI->getType(DstReg).getSizeInBits();
926 int64_t
Offset =
I.getOperand(3).getImm();
929 if (
Offset % 32 != 0 || InsSize % 32 != 0)
936 unsigned SubReg = TRI.getSubRegFromChannel(
Offset / 32, InsSize / 32);
937 if (SubReg == AMDGPU::NoSubRegister)
940 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
941 const TargetRegisterClass *DstRC =
942 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
946 const RegisterBank *Src0Bank = RBI.getRegBank(Src0Reg, *MRI, TRI);
947 const RegisterBank *Src1Bank = RBI.getRegBank(Src1Reg, *MRI, TRI);
948 const TargetRegisterClass *Src0RC =
949 TRI.getRegClassForSizeOnBank(DstSize, *Src0Bank);
950 const TargetRegisterClass *Src1RC =
951 TRI.getRegClassForSizeOnBank(InsSize, *Src1Bank);
955 Src0RC = TRI.getSubClassWithSubReg(Src0RC, SubReg);
956 if (!Src0RC || !Src1RC)
959 if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
960 !RBI.constrainGenericRegister(Src0Reg, *Src0RC, *MRI) ||
961 !RBI.constrainGenericRegister(Src1Reg, *Src1RC, *MRI))
965 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::INSERT_SUBREG), DstReg)
974bool AMDGPUInstructionSelector::selectG_SBFX_UBFX(
MachineInstr &
MI)
const {
977 Register OffsetReg =
MI.getOperand(2).getReg();
978 Register WidthReg =
MI.getOperand(3).getReg();
980 assert(RBI.getRegBank(DstReg, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID &&
981 "scalar BFX instructions are expanded in regbankselect");
982 assert(MRI->getType(
MI.getOperand(0).getReg()).getSizeInBits() == 32 &&
983 "64-bit vector BFX instructions are expanded in regbankselect");
986 MachineBasicBlock *
MBB =
MI.getParent();
988 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SBFX;
989 unsigned Opc = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
994 MI.eraseFromParent();
999bool AMDGPUInstructionSelector::selectInterpP1F16(
MachineInstr &
MI)
const {
1000 if (STI.getLDSBankCount() != 16)
1006 if (!RBI.constrainGenericRegister(M0Val, AMDGPU::SReg_32RegClass, *MRI) ||
1007 !RBI.constrainGenericRegister(Dst, AMDGPU::VGPR_32RegClass, *MRI) ||
1008 !RBI.constrainGenericRegister(Src0, AMDGPU::VGPR_32RegClass, *MRI))
1018 Register InterpMov = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1020 MachineBasicBlock *
MBB =
MI.getParent();
1024 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_INTERP_MOV_F32), InterpMov)
1027 .
addImm(
MI.getOperand(3).getImm());
1040 MI.eraseFromParent();
1049bool AMDGPUInstructionSelector::selectWritelane(
MachineInstr &
MI)
const {
1051 if (STI.getConstantBusLimit(AMDGPU::V_WRITELANE_B32) > 1)
1054 MachineBasicBlock *
MBB =
MI.getParent();
1058 Register LaneSelect =
MI.getOperand(3).getReg();
1061 auto MIB =
BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_WRITELANE_B32), VDst);
1063 std::optional<ValueAndVReg> ConstSelect =
1069 MIB.
addImm(ConstSelect->Value.getSExtValue() &
1072 std::optional<ValueAndVReg> ConstVal =
1078 STI.hasInv2PiInlineImm())) {
1079 MIB.
addImm(ConstVal->Value.getSExtValue());
1087 RBI.constrainGenericRegister(LaneSelect, AMDGPU::SReg_32_XM0RegClass, *MRI);
1089 BuildMI(*
MBB, *MIB,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
1097 MI.eraseFromParent();
1104bool AMDGPUInstructionSelector::selectDivScale(
MachineInstr &
MI)
const {
1108 LLT Ty = MRI->getType(Dst0);
1111 Opc = AMDGPU::V_DIV_SCALE_F32_e64;
1113 Opc = AMDGPU::V_DIV_SCALE_F64_e64;
1120 MachineBasicBlock *
MBB =
MI.getParent();
1124 unsigned ChooseDenom =
MI.getOperand(5).getImm();
1126 Register Src0 = ChooseDenom != 0 ? Numer : Denom;
1139 MI.eraseFromParent();
1144bool AMDGPUInstructionSelector::selectG_INTRINSIC(
MachineInstr &
I)
const {
1146 switch (IntrinsicID) {
1147 case Intrinsic::amdgcn_if_break: {
1152 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))
1153 .
add(
I.getOperand(0))
1154 .
add(
I.getOperand(2))
1155 .
add(
I.getOperand(3));
1157 Register DstReg =
I.getOperand(0).getReg();
1158 Register Src0Reg =
I.getOperand(2).getReg();
1159 Register Src1Reg =
I.getOperand(3).getReg();
1161 I.eraseFromParent();
1164 MRI->setRegClass(
Reg, TRI.getWaveMaskRegClass());
1168 case Intrinsic::amdgcn_interp_p1_f16:
1169 return selectInterpP1F16(
I);
1170 case Intrinsic::amdgcn_wqm:
1171 return constrainCopyLikeIntrin(
I, AMDGPU::WQM);
1172 case Intrinsic::amdgcn_softwqm:
1173 return constrainCopyLikeIntrin(
I, AMDGPU::SOFT_WQM);
1174 case Intrinsic::amdgcn_strict_wwm:
1175 case Intrinsic::amdgcn_wwm:
1176 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WWM);
1177 case Intrinsic::amdgcn_strict_wqm:
1178 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WQM);
1179 case Intrinsic::amdgcn_writelane:
1180 return selectWritelane(
I);
1181 case Intrinsic::amdgcn_div_scale:
1182 return selectDivScale(
I);
1183 case Intrinsic::amdgcn_icmp:
1184 case Intrinsic::amdgcn_fcmp:
1187 return selectIntrinsicCmp(
I);
1188 case Intrinsic::amdgcn_ballot:
1189 return selectBallot(
I);
1190 case Intrinsic::amdgcn_reloc_constant:
1191 return selectRelocConstant(
I);
1192 case Intrinsic::amdgcn_groupstaticsize:
1193 return selectGroupStaticSize(
I);
1194 case Intrinsic::returnaddress:
1195 return selectReturnAddress(
I);
1196 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
1197 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
1198 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
1199 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
1200 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
1201 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
1202 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
1203 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
1204 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
1205 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
1206 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
1207 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
1208 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
1209 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
1210 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:
1211 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:
1212 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:
1213 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:
1214 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:
1215 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:
1216 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:
1217 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:
1218 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:
1219 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:
1220 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:
1221 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
1222 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
1223 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8:
1224 return selectSMFMACIntrin(
I);
1225 case Intrinsic::amdgcn_permlane16_swap:
1226 case Intrinsic::amdgcn_permlane32_swap:
1227 return selectPermlaneSwapIntrin(
I, IntrinsicID);
1228 case Intrinsic::amdgcn_wave_shuffle:
1229 return selectWaveShuffleIntrin(
I);
1240 if (
Size == 16 && !ST.has16BitInsts())
1243 const auto Select = [&](
unsigned S16Opc,
unsigned TrueS16Opc,
1244 unsigned FakeS16Opc,
unsigned S32Opc,
1247 return ST.hasTrue16BitInsts()
1248 ? ST.useRealTrue16Insts() ? TrueS16Opc : FakeS16Opc
1259 return Select(AMDGPU::V_CMP_NE_U16_e64, AMDGPU::V_CMP_NE_U16_t16_e64,
1260 AMDGPU::V_CMP_NE_U16_fake16_e64, AMDGPU::V_CMP_NE_U32_e64,
1261 AMDGPU::V_CMP_NE_U64_e64);
1263 return Select(AMDGPU::V_CMP_EQ_U16_e64, AMDGPU::V_CMP_EQ_U16_t16_e64,
1264 AMDGPU::V_CMP_EQ_U16_fake16_e64, AMDGPU::V_CMP_EQ_U32_e64,
1265 AMDGPU::V_CMP_EQ_U64_e64);
1267 return Select(AMDGPU::V_CMP_GT_I16_e64, AMDGPU::V_CMP_GT_I16_t16_e64,
1268 AMDGPU::V_CMP_GT_I16_fake16_e64, AMDGPU::V_CMP_GT_I32_e64,
1269 AMDGPU::V_CMP_GT_I64_e64);
1271 return Select(AMDGPU::V_CMP_GE_I16_e64, AMDGPU::V_CMP_GE_I16_t16_e64,
1272 AMDGPU::V_CMP_GE_I16_fake16_e64, AMDGPU::V_CMP_GE_I32_e64,
1273 AMDGPU::V_CMP_GE_I64_e64);
1275 return Select(AMDGPU::V_CMP_LT_I16_e64, AMDGPU::V_CMP_LT_I16_t16_e64,
1276 AMDGPU::V_CMP_LT_I16_fake16_e64, AMDGPU::V_CMP_LT_I32_e64,
1277 AMDGPU::V_CMP_LT_I64_e64);
1279 return Select(AMDGPU::V_CMP_LE_I16_e64, AMDGPU::V_CMP_LE_I16_t16_e64,
1280 AMDGPU::V_CMP_LE_I16_fake16_e64, AMDGPU::V_CMP_LE_I32_e64,
1281 AMDGPU::V_CMP_LE_I64_e64);
1283 return Select(AMDGPU::V_CMP_GT_U16_e64, AMDGPU::V_CMP_GT_U16_t16_e64,
1284 AMDGPU::V_CMP_GT_U16_fake16_e64, AMDGPU::V_CMP_GT_U32_e64,
1285 AMDGPU::V_CMP_GT_U64_e64);
1287 return Select(AMDGPU::V_CMP_GE_U16_e64, AMDGPU::V_CMP_GE_U16_t16_e64,
1288 AMDGPU::V_CMP_GE_U16_fake16_e64, AMDGPU::V_CMP_GE_U32_e64,
1289 AMDGPU::V_CMP_GE_U64_e64);
1291 return Select(AMDGPU::V_CMP_LT_U16_e64, AMDGPU::V_CMP_LT_U16_t16_e64,
1292 AMDGPU::V_CMP_LT_U16_fake16_e64, AMDGPU::V_CMP_LT_U32_e64,
1293 AMDGPU::V_CMP_LT_U64_e64);
1295 return Select(AMDGPU::V_CMP_LE_U16_e64, AMDGPU::V_CMP_LE_U16_t16_e64,
1296 AMDGPU::V_CMP_LE_U16_fake16_e64, AMDGPU::V_CMP_LE_U32_e64,
1297 AMDGPU::V_CMP_LE_U64_e64);
1300 return Select(AMDGPU::V_CMP_EQ_F16_e64, AMDGPU::V_CMP_EQ_F16_t16_e64,
1301 AMDGPU::V_CMP_EQ_F16_fake16_e64, AMDGPU::V_CMP_EQ_F32_e64,
1302 AMDGPU::V_CMP_EQ_F64_e64);
1304 return Select(AMDGPU::V_CMP_GT_F16_e64, AMDGPU::V_CMP_GT_F16_t16_e64,
1305 AMDGPU::V_CMP_GT_F16_fake16_e64, AMDGPU::V_CMP_GT_F32_e64,
1306 AMDGPU::V_CMP_GT_F64_e64);
1308 return Select(AMDGPU::V_CMP_GE_F16_e64, AMDGPU::V_CMP_GE_F16_t16_e64,
1309 AMDGPU::V_CMP_GE_F16_fake16_e64, AMDGPU::V_CMP_GE_F32_e64,
1310 AMDGPU::V_CMP_GE_F64_e64);
1312 return Select(AMDGPU::V_CMP_LT_F16_e64, AMDGPU::V_CMP_LT_F16_t16_e64,
1313 AMDGPU::V_CMP_LT_F16_fake16_e64, AMDGPU::V_CMP_LT_F32_e64,
1314 AMDGPU::V_CMP_LT_F64_e64);
1316 return Select(AMDGPU::V_CMP_LE_F16_e64, AMDGPU::V_CMP_LE_F16_t16_e64,
1317 AMDGPU::V_CMP_LE_F16_fake16_e64, AMDGPU::V_CMP_LE_F32_e64,
1318 AMDGPU::V_CMP_LE_F64_e64);
1320 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1321 AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
1322 AMDGPU::V_CMP_NEQ_F64_e64);
1324 return Select(AMDGPU::V_CMP_O_F16_e64, AMDGPU::V_CMP_O_F16_t16_e64,
1325 AMDGPU::V_CMP_O_F16_fake16_e64, AMDGPU::V_CMP_O_F32_e64,
1326 AMDGPU::V_CMP_O_F64_e64);
1328 return Select(AMDGPU::V_CMP_U_F16_e64, AMDGPU::V_CMP_U_F16_t16_e64,
1329 AMDGPU::V_CMP_U_F16_fake16_e64, AMDGPU::V_CMP_U_F32_e64,
1330 AMDGPU::V_CMP_U_F64_e64);
1332 return Select(AMDGPU::V_CMP_NLG_F16_e64, AMDGPU::V_CMP_NLG_F16_t16_e64,
1333 AMDGPU::V_CMP_NLG_F16_fake16_e64, AMDGPU::V_CMP_NLG_F32_e64,
1334 AMDGPU::V_CMP_NLG_F64_e64);
1336 return Select(AMDGPU::V_CMP_NLE_F16_e64, AMDGPU::V_CMP_NLE_F16_t16_e64,
1337 AMDGPU::V_CMP_NLE_F16_fake16_e64, AMDGPU::V_CMP_NLE_F32_e64,
1338 AMDGPU::V_CMP_NLE_F64_e64);
1340 return Select(AMDGPU::V_CMP_NLT_F16_e64, AMDGPU::V_CMP_NLT_F16_t16_e64,
1341 AMDGPU::V_CMP_NLT_F16_fake16_e64, AMDGPU::V_CMP_NLT_F32_e64,
1342 AMDGPU::V_CMP_NLT_F64_e64);
1344 return Select(AMDGPU::V_CMP_NGE_F16_e64, AMDGPU::V_CMP_NGE_F16_t16_e64,
1345 AMDGPU::V_CMP_NGE_F16_fake16_e64, AMDGPU::V_CMP_NGE_F32_e64,
1346 AMDGPU::V_CMP_NGE_F64_e64);
1348 return Select(AMDGPU::V_CMP_NGT_F16_e64, AMDGPU::V_CMP_NGT_F16_t16_e64,
1349 AMDGPU::V_CMP_NGT_F16_fake16_e64, AMDGPU::V_CMP_NGT_F32_e64,
1350 AMDGPU::V_CMP_NGT_F64_e64);
1352 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1353 AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
1354 AMDGPU::V_CMP_NEQ_F64_e64);
1356 return Select(AMDGPU::V_CMP_TRU_F16_e64, AMDGPU::V_CMP_TRU_F16_t16_e64,
1357 AMDGPU::V_CMP_TRU_F16_fake16_e64, AMDGPU::V_CMP_TRU_F32_e64,
1358 AMDGPU::V_CMP_TRU_F64_e64);
1360 return Select(AMDGPU::V_CMP_F_F16_e64, AMDGPU::V_CMP_F_F16_t16_e64,
1361 AMDGPU::V_CMP_F_F16_fake16_e64, AMDGPU::V_CMP_F_F32_e64,
1362 AMDGPU::V_CMP_F_F64_e64);
1367 unsigned Size)
const {
1369 if (!STI.hasScalarCompareEq64())
1374 return AMDGPU::S_CMP_LG_U64;
1376 return AMDGPU::S_CMP_EQ_U64;
1385 return AMDGPU::S_CMP_LG_U32;
1387 return AMDGPU::S_CMP_EQ_U32;
1389 return AMDGPU::S_CMP_GT_I32;
1391 return AMDGPU::S_CMP_GE_I32;
1393 return AMDGPU::S_CMP_LT_I32;
1395 return AMDGPU::S_CMP_LE_I32;
1397 return AMDGPU::S_CMP_GT_U32;
1399 return AMDGPU::S_CMP_GE_U32;
1401 return AMDGPU::S_CMP_LT_U32;
1403 return AMDGPU::S_CMP_LE_U32;
1405 return AMDGPU::S_CMP_EQ_F32;
1407 return AMDGPU::S_CMP_GT_F32;
1409 return AMDGPU::S_CMP_GE_F32;
1411 return AMDGPU::S_CMP_LT_F32;
1413 return AMDGPU::S_CMP_LE_F32;
1415 return AMDGPU::S_CMP_LG_F32;
1417 return AMDGPU::S_CMP_O_F32;
1419 return AMDGPU::S_CMP_U_F32;
1421 return AMDGPU::S_CMP_NLG_F32;
1423 return AMDGPU::S_CMP_NLE_F32;
1425 return AMDGPU::S_CMP_NLT_F32;
1427 return AMDGPU::S_CMP_NGE_F32;
1429 return AMDGPU::S_CMP_NGT_F32;
1431 return AMDGPU::S_CMP_NEQ_F32;
1438 if (!STI.hasSALUFloatInsts())
1443 return AMDGPU::S_CMP_EQ_F16;
1445 return AMDGPU::S_CMP_GT_F16;
1447 return AMDGPU::S_CMP_GE_F16;
1449 return AMDGPU::S_CMP_LT_F16;
1451 return AMDGPU::S_CMP_LE_F16;
1453 return AMDGPU::S_CMP_LG_F16;
1455 return AMDGPU::S_CMP_O_F16;
1457 return AMDGPU::S_CMP_U_F16;
1459 return AMDGPU::S_CMP_NLG_F16;
1461 return AMDGPU::S_CMP_NLE_F16;
1463 return AMDGPU::S_CMP_NLT_F16;
1465 return AMDGPU::S_CMP_NGE_F16;
1467 return AMDGPU::S_CMP_NGT_F16;
1469 return AMDGPU::S_CMP_NEQ_F16;
1478bool AMDGPUInstructionSelector::selectG_ICMP_or_FCMP(
MachineInstr &
I)
const {
1483 Register SrcReg =
I.getOperand(2).getReg();
1484 unsigned Size = RBI.getSizeInBits(SrcReg, *MRI, TRI);
1488 Register CCReg =
I.getOperand(0).getReg();
1489 if (!isVCC(CCReg, *MRI)) {
1490 int Opcode = getS_CMPOpcode(Pred,
Size);
1493 MachineInstr *ICmp =
BuildMI(*BB, &
I,
DL, TII.get(Opcode))
1494 .
add(
I.getOperand(2))
1495 .
add(
I.getOperand(3));
1496 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CCReg)
1500 RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, *MRI);
1501 I.eraseFromParent();
1505 if (
I.getOpcode() == AMDGPU::G_FCMP)
1512 MachineInstrBuilder ICmp;
1515 ICmp =
BuildMI(*BB, &
I,
DL, TII.get(Opcode),
I.getOperand(0).getReg())
1517 .
add(
I.getOperand(2))
1519 .
add(
I.getOperand(3))
1522 ICmp =
BuildMI(*BB, &
I,
DL, TII.get(Opcode),
I.getOperand(0).getReg())
1523 .
add(
I.getOperand(2))
1524 .
add(
I.getOperand(3));
1528 *TRI.getBoolRC(), *MRI);
1530 I.eraseFromParent();
1534bool AMDGPUInstructionSelector::selectIntrinsicCmp(
MachineInstr &
I)
const {
1535 Register Dst =
I.getOperand(0).getReg();
1536 if (isVCC(Dst, *MRI))
1539 LLT DstTy = MRI->getType(Dst);
1545 Register SrcReg =
I.getOperand(2).getReg();
1546 unsigned Size = RBI.getSizeInBits(SrcReg, *MRI, TRI);
1554 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), Dst);
1555 I.eraseFromParent();
1556 return RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI);
1563 MachineInstrBuilder SelectedMI;
1564 MachineOperand &
LHS =
I.getOperand(2);
1565 MachineOperand &
RHS =
I.getOperand(3);
1566 auto [Src0, Src0Mods] = selectVOP3ModsImpl(
LHS.getReg());
1567 auto [Src1, Src1Mods] = selectVOP3ModsImpl(
RHS.getReg());
1569 copyToVGPRIfSrcFolded(Src0, Src0Mods,
LHS, &
I,
true);
1571 copyToVGPRIfSrcFolded(Src1, Src1Mods,
RHS, &
I,
true);
1572 SelectedMI =
BuildMI(*BB, &
I,
DL, TII.get(Opcode), Dst);
1574 SelectedMI.
addImm(Src0Mods);
1575 SelectedMI.
addReg(Src0Reg);
1577 SelectedMI.
addImm(Src1Mods);
1578 SelectedMI.
addReg(Src1Reg);
1584 RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI);
1587 I.eraseFromParent();
1598 if (
MI->getParent() !=
MBB)
1602 if (
MI->getOpcode() == AMDGPU::COPY) {
1605 if (DstRB && SrcRB && DstRB->
getID() == AMDGPU::VCCRegBankID &&
1606 SrcRB->getID() == AMDGPU::SGPRRegBankID)
1623bool AMDGPUInstructionSelector::selectBallot(
MachineInstr &
I)
const {
1626 Register DstReg =
I.getOperand(0).getReg();
1627 Register SrcReg =
I.getOperand(2).getReg();
1628 const unsigned BallotSize = MRI->getType(DstReg).getSizeInBits();
1629 const unsigned WaveSize = STI.getWavefrontSize();
1633 if (BallotSize != WaveSize && (BallotSize != 64 || WaveSize != 32))
1636 std::optional<ValueAndVReg> Arg =
1641 if (BallotSize != WaveSize) {
1642 Dst = MRI->createVirtualRegister(TRI.getBoolRC());
1646 const int64_t
Value = Arg->Value.getZExtValue();
1649 unsigned Opcode = WaveSize == 64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
1656 if (!RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI))
1662 if (!RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI))
1666 unsigned AndOpc = WaveSize == 64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
1676 if (BallotSize != WaveSize) {
1677 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1679 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
1686 I.eraseFromParent();
1690bool AMDGPUInstructionSelector::selectRelocConstant(
MachineInstr &
I)
const {
1691 Register DstReg =
I.getOperand(0).getReg();
1692 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
1693 const TargetRegisterClass *DstRC = TRI.getRegClassForSizeOnBank(32, *DstBank);
1694 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
1697 const bool IsVALU = DstBank->
getID() == AMDGPU::VGPRRegBankID;
1699 Module *
M =
MF->getFunction().getParent();
1700 const MDNode *
Metadata =
I.getOperand(2).getMetadata();
1707 TII.get(IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32), DstReg)
1710 I.eraseFromParent();
1714bool AMDGPUInstructionSelector::selectGroupStaticSize(
MachineInstr &
I)
const {
1717 Register DstReg =
I.getOperand(0).getReg();
1718 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
1719 unsigned Mov = DstRB->
getID() == AMDGPU::SGPRRegBankID ?
1720 AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1728 const SIMachineFunctionInfo *MFI =
MF->getInfo<SIMachineFunctionInfo>();
1731 Module *
M =
MF->getFunction().getParent();
1732 const GlobalValue *GV =
1737 I.eraseFromParent();
1742bool AMDGPUInstructionSelector::selectReturnAddress(
MachineInstr &
I)
const {
1747 MachineOperand &Dst =
I.getOperand(0);
1749 unsigned Depth =
I.getOperand(2).getImm();
1751 const TargetRegisterClass *RC
1752 = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
1754 !RBI.constrainGenericRegister(DstReg, *RC, *MRI))
1759 MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction()) {
1762 I.eraseFromParent();
1766 MachineFrameInfo &MFI =
MF.getFrameInfo();
1771 Register ReturnAddrReg = TRI.getReturnAddressReg(
MF);
1773 AMDGPU::SReg_64RegClass,
DL);
1776 I.eraseFromParent();
1780bool AMDGPUInstructionSelector::selectEndCfIntrinsic(
MachineInstr &
MI)
const {
1783 MachineBasicBlock *BB =
MI.getParent();
1784 BuildMI(*BB, &
MI,
MI.getDebugLoc(), TII.get(AMDGPU::SI_END_CF))
1785 .
add(
MI.getOperand(1));
1788 MI.eraseFromParent();
1790 if (!MRI->getRegClassOrNull(
Reg))
1791 MRI->setRegClass(
Reg, TRI.getWaveMaskRegClass());
1795bool AMDGPUInstructionSelector::selectDSOrderedIntrinsic(
1797 MachineBasicBlock *
MBB =
MI.getParent();
1801 unsigned IndexOperand =
MI.getOperand(7).getImm();
1802 bool WaveRelease =
MI.getOperand(8).getImm() != 0;
1803 bool WaveDone =
MI.getOperand(9).getImm() != 0;
1805 if (WaveDone && !WaveRelease) {
1809 Fn,
"ds_ordered_count: wave_done requires wave_release",
DL));
1812 unsigned OrderedCountIndex = IndexOperand & 0x3f;
1813 IndexOperand &= ~0x3f;
1814 unsigned CountDw = 0;
1817 CountDw = (IndexOperand >> 24) & 0xf;
1818 IndexOperand &= ~(0xf << 24);
1820 if (CountDw < 1 || CountDw > 4) {
1823 Fn,
"ds_ordered_count: dword count must be between 1 and 4",
DL));
1831 Fn,
"ds_ordered_count: bad index operand",
DL));
1834 unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1;
1837 unsigned Offset0 = OrderedCountIndex << 2;
1838 unsigned Offset1 = WaveRelease | (WaveDone << 1) | (Instruction << 4);
1841 Offset1 |= (CountDw - 1) << 6;
1844 Offset1 |= ShaderType << 2;
1846 unsigned Offset = Offset0 | (Offset1 << 8);
1854 MachineInstrBuilder
DS =
1855 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::DS_ORDERED_COUNT), DstReg)
1860 if (!RBI.constrainGenericRegister(M0Val, AMDGPU::SReg_32RegClass, *MRI))
1864 MI.eraseFromParent();
1870 case Intrinsic::amdgcn_ds_gws_init:
1871 return AMDGPU::DS_GWS_INIT;
1872 case Intrinsic::amdgcn_ds_gws_barrier:
1873 return AMDGPU::DS_GWS_BARRIER;
1874 case Intrinsic::amdgcn_ds_gws_sema_v:
1875 return AMDGPU::DS_GWS_SEMA_V;
1876 case Intrinsic::amdgcn_ds_gws_sema_br:
1877 return AMDGPU::DS_GWS_SEMA_BR;
1878 case Intrinsic::amdgcn_ds_gws_sema_p:
1879 return AMDGPU::DS_GWS_SEMA_P;
1880 case Intrinsic::amdgcn_ds_gws_sema_release_all:
1881 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
1887bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(
MachineInstr &
MI,
1889 if (!STI.hasGWS() || (IID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
1890 !STI.hasGWSSemaReleaseAll()))
1894 const bool HasVSrc =
MI.getNumOperands() == 3;
1895 assert(HasVSrc ||
MI.getNumOperands() == 2);
1897 Register BaseOffset =
MI.getOperand(HasVSrc ? 2 : 1).getReg();
1898 const RegisterBank *OffsetRB = RBI.getRegBank(BaseOffset, *MRI, TRI);
1899 if (OffsetRB->
getID() != AMDGPU::SGPRRegBankID)
1905 MachineBasicBlock *
MBB =
MI.getParent();
1908 MachineInstr *Readfirstlane =
nullptr;
1913 if (OffsetDef->
getOpcode() == AMDGPU::V_READFIRSTLANE_B32) {
1914 Readfirstlane = OffsetDef;
1919 if (OffsetDef->
getOpcode() == AMDGPU::G_CONSTANT) {
1929 std::tie(BaseOffset, ImmOffset) =
1932 if (Readfirstlane) {
1935 if (!RBI.constrainGenericRegister(BaseOffset, AMDGPU::VGPR_32RegClass, *MRI))
1941 if (!RBI.constrainGenericRegister(BaseOffset,
1942 AMDGPU::SReg_32RegClass, *MRI))
1946 Register M0Base = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1961 const MCInstrDesc &InstrDesc = TII.get(
Opc);
1966 int Data0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
1967 const TargetRegisterClass *DataRC = TII.getRegClass(InstrDesc, Data0Idx);
1968 const TargetRegisterClass *SubRC =
1969 TRI.getSubRegisterClass(DataRC, AMDGPU::sub0);
1973 if (!RBI.constrainGenericRegister(VSrc, *DataRC, *MRI))
1983 Register DataReg = MRI->createVirtualRegister(DataRC);
1984 if (!RBI.constrainGenericRegister(VSrc, *SubRC, *MRI))
1987 Register UndefReg = MRI->createVirtualRegister(SubRC);
2006 MI.eraseFromParent();
2010bool AMDGPUInstructionSelector::selectDSAppendConsume(
MachineInstr &
MI,
2011 bool IsAppend)
const {
2012 Register PtrBase =
MI.getOperand(2).getReg();
2013 LLT PtrTy = MRI->getType(PtrBase);
2017 std::tie(PtrBase,
Offset) = selectDS1Addr1OffsetImpl(
MI.getOperand(2));
2020 if (!isDSOffsetLegal(PtrBase,
Offset)) {
2021 PtrBase =
MI.getOperand(2).getReg();
2025 MachineBasicBlock *
MBB =
MI.getParent();
2027 const unsigned Opc = IsAppend ? AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
2031 if (!RBI.constrainGenericRegister(PtrBase, AMDGPU::SReg_32RegClass, *MRI))
2038 MI.eraseFromParent();
2043bool AMDGPUInstructionSelector::selectInitWholeWave(
MachineInstr &
MI)
const {
2044 MachineFunction *
MF =
MI.getMF();
2045 SIMachineFunctionInfo *MFInfo =
MF->getInfo<SIMachineFunctionInfo>();
2056 TFE = TexFailCtrl & 0x1;
2058 LWE = TexFailCtrl & 0x2;
2061 return TexFailCtrl == 0;
2064bool AMDGPUInstructionSelector::selectImageIntrinsic(
2066 MachineBasicBlock *
MBB =
MI.getParent();
2072 Register ResultDef =
MI.getOperand(0).getReg();
2073 if (MRI->use_nodbg_empty(ResultDef))
2077 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
2085 const unsigned ArgOffset =
MI.getNumExplicitDefs() + 1;
2087 Register VDataIn = AMDGPU::NoRegister;
2088 Register VDataOut = AMDGPU::NoRegister;
2090 int NumVDataDwords = -1;
2091 bool IsD16 =
MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16 ||
2092 MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16;
2098 Unorm =
MI.getOperand(ArgOffset + Intr->
UnormIndex).getImm() != 0;
2102 bool IsTexFail =
false;
2104 TFE, LWE, IsTexFail))
2107 const int Flags =
MI.getOperand(ArgOffset + Intr->
NumArgs).getImm();
2108 const bool IsA16 = (
Flags & 1) != 0;
2109 const bool IsG16 = (
Flags & 2) != 0;
2112 if (IsA16 && !STI.hasG16() && !IsG16)
2116 unsigned DMaskLanes = 0;
2118 if (BaseOpcode->
Atomic) {
2120 VDataOut =
MI.getOperand(0).getReg();
2121 VDataIn =
MI.getOperand(2).getReg();
2122 LLT Ty = MRI->getType(VDataIn);
2125 const bool Is64Bit = BaseOpcode->
AtomicX2 ?
2130 assert(
MI.getOperand(3).getReg() == AMDGPU::NoRegister);
2132 DMask = Is64Bit ? 0xf : 0x3;
2133 NumVDataDwords = Is64Bit ? 4 : 2;
2135 DMask = Is64Bit ? 0x3 : 0x1;
2136 NumVDataDwords = Is64Bit ? 2 : 1;
2139 DMask =
MI.getOperand(ArgOffset + Intr->
DMaskIndex).getImm();
2142 if (BaseOpcode->
Store) {
2143 VDataIn =
MI.getOperand(1).getReg();
2144 VDataTy = MRI->getType(VDataIn);
2149 VDataOut =
MI.getOperand(0).getReg();
2150 VDataTy = MRI->getType(VDataOut);
2151 NumVDataDwords = DMaskLanes;
2153 if (IsD16 && !STI.hasUnpackedD16VMem())
2154 NumVDataDwords = (DMaskLanes + 1) / 2;
2159 if (Subtarget->hasG16() && IsG16) {
2160 const AMDGPU::MIMGG16MappingInfo *G16MappingInfo =
2163 IntrOpcode = G16MappingInfo->
G16;
2167 assert((!IsTexFail || DMaskLanes >= 1) &&
"should have legalized this");
2177 int NumVAddrRegs = 0;
2178 int NumVAddrDwords = 0;
2181 MachineOperand &AddrOp =
MI.getOperand(ArgOffset +
I);
2182 if (!AddrOp.
isReg())
2190 NumVAddrDwords += (MRI->getType(Addr).getSizeInBits() + 31) / 32;
2197 NumVAddrRegs != 1 &&
2198 (STI.hasPartialNSAEncoding() ? NumVAddrDwords >= NumVAddrRegs
2199 : NumVAddrDwords == NumVAddrRegs);
2200 if (UseNSA && !STI.hasFeature(AMDGPU::FeatureNSAEncoding)) {
2211 NumVDataDwords, NumVAddrDwords);
2212 }
else if (IsGFX11Plus) {
2214 UseNSA ? AMDGPU::MIMGEncGfx11NSA
2215 : AMDGPU::MIMGEncGfx11Default,
2216 NumVDataDwords, NumVAddrDwords);
2217 }
else if (IsGFX10Plus) {
2219 UseNSA ? AMDGPU::MIMGEncGfx10NSA
2220 : AMDGPU::MIMGEncGfx10Default,
2221 NumVDataDwords, NumVAddrDwords);
2223 if (Subtarget->hasGFX90AInsts()) {
2225 NumVDataDwords, NumVAddrDwords);
2229 <<
"requested image instruction is not supported on this GPU\n");
2236 NumVDataDwords, NumVAddrDwords);
2239 NumVDataDwords, NumVAddrDwords);
2249 const bool Is64 = MRI->getType(VDataOut).getSizeInBits() == 64;
2251 Register TmpReg = MRI->createVirtualRegister(
2252 Is64 ? &AMDGPU::VReg_128RegClass : &AMDGPU::VReg_64RegClass);
2253 unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
2256 if (!MRI->use_empty(VDataOut)) {
2269 for (
int I = 0;
I != NumVAddrRegs; ++
I) {
2270 MachineOperand &SrcOp =
MI.getOperand(ArgOffset + Intr->
VAddrStart +
I);
2271 if (SrcOp.
isReg()) {
2290 STI.hasFeature(AMDGPU::FeatureR128A16) ? -1 : 0);
2292 MIB.
addImm(IsA16 ? -1 : 0);
2294 if (!Subtarget->hasGFX90AInsts()) {
2306 MIB.
addImm(IsD16 ? -1 : 0);
2308 MI.eraseFromParent();
2310 TII.enforceOperandRCAlignment(*MIB, AMDGPU::OpName::vaddr);
2316bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic(
2322 MachineBasicBlock *
MBB =
MI.getParent();
2327 unsigned Offset =
MI.getOperand(6).getImm();
2331 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2332 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
2333 Opc = AMDGPU::DS_BVH_STACK_RTN_B32;
2335 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
2336 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP1_RTN_B32;
2338 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
2339 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP2_RTN_B64;
2351 MI.eraseFromParent();
2356bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
2359 switch (IntrinsicID) {
2360 case Intrinsic::amdgcn_end_cf:
2361 return selectEndCfIntrinsic(
I);
2362 case Intrinsic::amdgcn_ds_ordered_add:
2363 case Intrinsic::amdgcn_ds_ordered_swap:
2364 return selectDSOrderedIntrinsic(
I, IntrinsicID);
2365 case Intrinsic::amdgcn_ds_gws_init:
2366 case Intrinsic::amdgcn_ds_gws_barrier:
2367 case Intrinsic::amdgcn_ds_gws_sema_v:
2368 case Intrinsic::amdgcn_ds_gws_sema_br:
2369 case Intrinsic::amdgcn_ds_gws_sema_p:
2370 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2371 return selectDSGWSIntrinsic(
I, IntrinsicID);
2372 case Intrinsic::amdgcn_ds_append:
2373 return selectDSAppendConsume(
I,
true);
2374 case Intrinsic::amdgcn_ds_consume:
2375 return selectDSAppendConsume(
I,
false);
2376 case Intrinsic::amdgcn_init_whole_wave:
2377 return selectInitWholeWave(
I);
2378 case Intrinsic::amdgcn_raw_buffer_load_lds:
2379 case Intrinsic::amdgcn_raw_buffer_load_async_lds:
2380 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
2381 case Intrinsic::amdgcn_raw_ptr_buffer_load_async_lds:
2382 case Intrinsic::amdgcn_struct_buffer_load_lds:
2383 case Intrinsic::amdgcn_struct_buffer_load_async_lds:
2384 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds:
2385 case Intrinsic::amdgcn_struct_ptr_buffer_load_async_lds:
2386 return selectBufferLoadLds(
I);
2391 case Intrinsic::amdgcn_load_to_lds:
2392 case Intrinsic::amdgcn_load_async_to_lds:
2393 case Intrinsic::amdgcn_global_load_lds:
2394 case Intrinsic::amdgcn_global_load_async_lds:
2395 return selectGlobalLoadLds(
I);
2396 case Intrinsic::amdgcn_tensor_load_to_lds:
2397 case Intrinsic::amdgcn_tensor_store_from_lds:
2398 return selectTensorLoadStore(
I, IntrinsicID);
2399 case Intrinsic::amdgcn_asyncmark:
2400 case Intrinsic::amdgcn_wait_asyncmark:
2401 if (!Subtarget->hasAsyncMark())
2404 case Intrinsic::amdgcn_exp_compr:
2405 if (!STI.hasCompressedExport()) {
2407 F.getContext().diagnose(
2408 DiagnosticInfoUnsupported(
F,
"intrinsic not supported on subtarget",
2413 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2414 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
2415 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
2416 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
2417 return selectDSBvhStackIntrinsic(
I);
2418 case Intrinsic::amdgcn_s_alloc_vgpr: {
2424 Register ResReg =
I.getOperand(0).getReg();
2426 MachineInstr *AllocMI =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::S_ALLOC_VGPR))
2427 .
add(
I.getOperand(2));
2430 I.eraseFromParent();
2432 return RBI.constrainGenericRegister(ResReg, AMDGPU::SReg_32RegClass, *MRI);
2434 case Intrinsic::amdgcn_s_barrier_init:
2435 case Intrinsic::amdgcn_s_barrier_signal_var:
2436 return selectNamedBarrierInit(
I, IntrinsicID);
2437 case Intrinsic::amdgcn_s_wakeup_barrier: {
2438 if (!STI.hasSWakeupBarrier()) {
2440 F.getContext().diagnose(
2441 DiagnosticInfoUnsupported(
F,
"intrinsic not supported on subtarget",
2445 return selectNamedBarrierInst(
I, IntrinsicID);
2447 case Intrinsic::amdgcn_s_barrier_join:
2448 case Intrinsic::amdgcn_s_get_named_barrier_state:
2449 return selectNamedBarrierInst(
I, IntrinsicID);
2450 case Intrinsic::amdgcn_s_get_barrier_state:
2451 return selectSGetBarrierState(
I, IntrinsicID);
2452 case Intrinsic::amdgcn_s_barrier_signal_isfirst:
2453 return selectSBarrierSignalIsfirst(
I, IntrinsicID);
2458bool AMDGPUInstructionSelector::selectG_SELECT(
MachineInstr &
I)
const {
2465 Register DstReg =
I.getOperand(0).getReg();
2466 unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI);
2468 const MachineOperand &CCOp =
I.getOperand(1);
2470 if (!isVCC(CCReg, *MRI)) {
2471 unsigned SelectOpcode =
Size == 64 ? AMDGPU::S_CSELECT_B64 :
2472 AMDGPU::S_CSELECT_B32;
2473 MachineInstr *CopySCC =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
2479 if (!MRI->getRegClassOrNull(CCReg))
2480 MRI->setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, *MRI));
2482 .
add(
I.getOperand(2))
2483 .
add(
I.getOperand(3));
2487 I.eraseFromParent();
2496 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
2498 .
add(
I.getOperand(3))
2500 .
add(
I.getOperand(2))
2501 .
add(
I.getOperand(1));
2504 I.eraseFromParent();
2508bool AMDGPUInstructionSelector::selectG_TRUNC(
MachineInstr &
I)
const {
2509 Register DstReg =
I.getOperand(0).getReg();
2510 Register SrcReg =
I.getOperand(1).getReg();
2511 const LLT DstTy = MRI->getType(DstReg);
2512 const LLT SrcTy = MRI->getType(SrcReg);
2515 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
2516 const RegisterBank *DstRB;
2522 DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
2527 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2532 const TargetRegisterClass *SrcRC =
2533 TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB);
2534 const TargetRegisterClass *DstRC =
2535 TRI.getRegClassForSizeOnBank(DstSize, *DstRB);
2536 if (!SrcRC || !DstRC)
2539 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
2540 !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI)) {
2545 if (DstRC == &AMDGPU::VGPR_16RegClass && SrcSize == 32) {
2546 assert(STI.useRealTrue16Insts());
2550 .
addReg(SrcReg, {}, AMDGPU::lo16);
2551 I.eraseFromParent();
2559 Register LoReg = MRI->createVirtualRegister(DstRC);
2560 Register HiReg = MRI->createVirtualRegister(DstRC);
2562 .
addReg(SrcReg, {}, AMDGPU::sub0);
2564 .
addReg(SrcReg, {}, AMDGPU::sub1);
2566 if (IsVALU && STI.hasSDWA()) {
2569 MachineInstr *MovSDWA =
2570 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg)
2580 Register TmpReg0 = MRI->createVirtualRegister(DstRC);
2581 Register TmpReg1 = MRI->createVirtualRegister(DstRC);
2582 Register ImmReg = MRI->createVirtualRegister(DstRC);
2584 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), TmpReg0)
2594 unsigned MovOpc = IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
2595 unsigned AndOpc = IsVALU ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
2596 unsigned OrOpc = IsVALU ? AMDGPU::V_OR_B32_e64 : AMDGPU::S_OR_B32;
2608 And.setOperandDead(3);
2609 Or.setOperandDead(3);
2613 I.eraseFromParent();
2621 unsigned SubRegIdx = DstSize < 32
2622 ?
static_cast<unsigned>(AMDGPU::sub0)
2623 : TRI.getSubRegFromChannel(0, DstSize / 32);
2624 if (SubRegIdx == AMDGPU::NoSubRegister)
2629 const TargetRegisterClass *SrcWithSubRC
2630 = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
2634 if (SrcWithSubRC != SrcRC) {
2635 if (!RBI.constrainGenericRegister(SrcReg, *SrcWithSubRC, *MRI))
2639 I.getOperand(1).setSubReg(SubRegIdx);
2642 I.setDesc(TII.get(TargetOpcode::COPY));
2649 int SignedMask =
static_cast<int>(Mask);
2650 return SignedMask >= -16 && SignedMask <= 64;
2654const RegisterBank *AMDGPUInstructionSelector::getArtifactRegBank(
2663 return &RBI.getRegBankFromRegClass(*RC, LLT());
2667bool AMDGPUInstructionSelector::selectG_SZA_EXT(
MachineInstr &
I)
const {
2668 bool InReg =
I.getOpcode() == AMDGPU::G_SEXT_INREG;
2669 bool Signed =
I.getOpcode() == AMDGPU::G_SEXT || InReg;
2672 const Register DstReg =
I.getOperand(0).getReg();
2673 const Register SrcReg =
I.getOperand(1).getReg();
2675 const LLT DstTy = MRI->getType(DstReg);
2676 const LLT SrcTy = MRI->getType(SrcReg);
2677 const unsigned SrcSize =
I.getOpcode() == AMDGPU::G_SEXT_INREG ?
2684 const RegisterBank *SrcBank = getArtifactRegBank(SrcReg, *MRI, TRI);
2687 if (
I.getOpcode() == AMDGPU::G_ANYEXT) {
2689 return selectCOPY(
I);
2691 const TargetRegisterClass *SrcRC =
2692 TRI.getRegClassForTypeOnBank(SrcTy, *SrcBank);
2693 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
2694 const TargetRegisterClass *DstRC =
2695 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
2697 Register UndefReg = MRI->createVirtualRegister(SrcRC);
2698 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2704 I.eraseFromParent();
2706 return RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) &&
2707 RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI);
2710 if (SrcBank->
getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
2716 MachineInstr *ExtI =
2720 I.eraseFromParent();
2725 const unsigned BFE =
Signed ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2726 MachineInstr *ExtI =
2731 I.eraseFromParent();
2736 if (SrcBank->
getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
2737 const TargetRegisterClass &SrcRC = InReg && DstSize > 32 ?
2738 AMDGPU::SReg_64RegClass : AMDGPU::SReg_32RegClass;
2739 if (!RBI.constrainGenericRegister(SrcReg, SrcRC, *MRI))
2742 if (
Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
2743 const unsigned SextOpc = SrcSize == 8 ?
2744 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
2747 I.eraseFromParent();
2748 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
2753 if (DstSize > 32 && SrcSize == 32) {
2754 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2755 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2758 .
addReg(SrcReg, {}, SubReg)
2766 .
addReg(SrcReg, {}, SubReg)
2767 .addImm(AMDGPU::sub0)
2770 I.eraseFromParent();
2771 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass,
2775 const unsigned BFE64 =
Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
2776 const unsigned BFE32 =
Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2779 if (DstSize > 32 && (SrcSize <= 32 || InReg)) {
2781 Register ExtReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
2782 Register UndefReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2783 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2785 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2787 .
addReg(SrcReg, {}, SubReg)
2788 .addImm(AMDGPU::sub0)
2796 I.eraseFromParent();
2797 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, *MRI);
2812 I.eraseFromParent();
2813 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
2847 if (Shuffle->
getOpcode() != AMDGPU::G_SHUFFLE_VECTOR)
2854 assert(Mask.size() == 2);
2856 if (Mask[0] == 1 && Mask[1] <= 1) {
2864bool AMDGPUInstructionSelector::selectG_FPEXT(
MachineInstr &
I)
const {
2865 if (!Subtarget->hasSALUFloatInsts())
2868 Register Dst =
I.getOperand(0).getReg();
2869 const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
2870 if (DstRB->
getID() != AMDGPU::SGPRRegBankID)
2873 Register Src =
I.getOperand(1).getReg();
2879 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_CVT_HI_F32_F16), Dst)
2881 I.eraseFromParent();
2882 return RBI.constrainGenericRegister(Dst, AMDGPU::SReg_32RegClass, *MRI);
2889bool AMDGPUInstructionSelector::selectG_FNEG(
MachineInstr &
MI)
const {
2902 const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
2903 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2908 MachineInstr *Fabs =
getOpcodeDef(TargetOpcode::G_FABS, Src, *MRI);
2912 if (!RBI.constrainGenericRegister(Src, AMDGPU::SReg_64RegClass, *MRI) ||
2913 !RBI.constrainGenericRegister(Dst, AMDGPU::SReg_64RegClass, *MRI))
2916 MachineBasicBlock *BB =
MI.getParent();
2918 Register LoReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2919 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2920 Register ConstReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2921 Register OpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2923 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2924 .
addReg(Src, {}, AMDGPU::sub0);
2925 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2926 .
addReg(Src, {}, AMDGPU::sub1);
2927 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2931 unsigned Opc = Fabs ? AMDGPU::S_OR_B32 : AMDGPU::S_XOR_B32;
2936 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2941 MI.eraseFromParent();
2946bool AMDGPUInstructionSelector::selectG_FABS(
MachineInstr &
MI)
const {
2948 const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
2949 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2954 MachineBasicBlock *BB =
MI.getParent();
2956 Register LoReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2957 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2958 Register ConstReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2959 Register OpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2961 if (!RBI.constrainGenericRegister(Src, AMDGPU::SReg_64RegClass, *MRI) ||
2962 !RBI.constrainGenericRegister(Dst, AMDGPU::SReg_64RegClass, *MRI))
2965 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2966 .
addReg(Src, {}, AMDGPU::sub0);
2967 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2968 .
addReg(Src, {}, AMDGPU::sub1);
2969 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2974 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_AND_B32), OpReg)
2978 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2984 MI.eraseFromParent();
2989 return MI.getOpcode() == TargetOpcode::G_CONSTANT;
2992void AMDGPUInstructionSelector::getAddrModeInfo(
const MachineInstr &Load,
2995 unsigned OpNo =
Load.getOpcode() == AMDGPU::G_PREFETCH ? 0 : 1;
2996 const MachineInstr *PtrMI =
2997 MRI.getUniqueVRegDef(
Load.getOperand(OpNo).getReg());
3001 if (PtrMI->
getOpcode() != TargetOpcode::G_PTR_ADD)
3006 for (
unsigned i = 1; i != 3; ++i) {
3007 const MachineOperand &GEPOp = PtrMI->
getOperand(i);
3008 const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.
getReg());
3013 assert(GEPInfo.Imm == 0);
3017 const RegisterBank *OpBank = RBI.getRegBank(GEPOp.
getReg(), MRI, TRI);
3018 if (OpBank->
getID() == AMDGPU::SGPRRegBankID)
3019 GEPInfo.SgprParts.push_back(GEPOp.
getReg());
3021 GEPInfo.VgprParts.push_back(GEPOp.
getReg());
3025 getAddrModeInfo(*PtrMI, MRI, AddrInfo);
3028bool AMDGPUInstructionSelector::isSGPR(
Register Reg)
const {
3029 return RBI.getRegBank(
Reg, *MRI, TRI)->getID() == AMDGPU::SGPRRegBankID;
3032bool AMDGPUInstructionSelector::isInstrUniform(
const MachineInstr &
MI)
const {
3033 if (!
MI.hasOneMemOperand())
3036 const MachineMemOperand *MMO = *
MI.memoperands_begin();
3049 if (
MI.getOpcode() == AMDGPU::G_PREFETCH)
3050 return RBI.getRegBank(
MI.getOperand(0).getReg(), *MRI, TRI)->getID() ==
3051 AMDGPU::SGPRRegBankID;
3054 return I &&
I->getMetadata(
"amdgpu.uniform");
3058 for (
const GEPInfo &GEPInfo : AddrInfo) {
3059 if (!GEPInfo.VgprParts.empty())
3065void AMDGPUInstructionSelector::initM0(
MachineInstr &
I)
const {
3066 const LLT PtrTy = MRI->getType(
I.getOperand(1).getReg());
3069 STI.ldsRequiresM0Init()) {
3073 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
3078bool AMDGPUInstructionSelector::selectG_LOAD_STORE_ATOMICRMW(
3085 if (
Reg.isPhysical())
3089 const unsigned Opcode =
MI.getOpcode();
3091 if (Opcode == AMDGPU::COPY)
3094 if (Opcode == AMDGPU::G_AND || Opcode == AMDGPU::G_OR ||
3095 Opcode == AMDGPU::G_XOR)
3100 return GI->is(Intrinsic::amdgcn_class);
3102 return Opcode == AMDGPU::G_ICMP || Opcode == AMDGPU::G_FCMP;
3105bool AMDGPUInstructionSelector::selectG_BRCOND(
MachineInstr &
I)
const {
3107 MachineOperand &CondOp =
I.getOperand(0);
3113 const TargetRegisterClass *ConstrainRC;
3120 if (!isVCC(CondReg, *MRI)) {
3124 CondPhysReg = AMDGPU::SCC;
3125 BrOpcode = AMDGPU::S_CBRANCH_SCC1;
3126 ConstrainRC = &AMDGPU::SReg_32RegClass;
3133 const bool Is64 = STI.isWave64();
3134 const unsigned Opcode = Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
3135 const Register Exec = Is64 ? AMDGPU::EXEC : AMDGPU::EXEC_LO;
3137 Register TmpReg = MRI->createVirtualRegister(TRI.getBoolRC());
3138 BuildMI(*BB, &
I,
DL, TII.get(Opcode), TmpReg)
3145 CondPhysReg = TRI.getVCC();
3146 BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
3147 ConstrainRC = TRI.getBoolRC();
3150 if (!MRI->getRegClassOrNull(CondReg))
3151 MRI->setRegClass(CondReg, ConstrainRC);
3153 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CondPhysReg)
3156 .
addMBB(
I.getOperand(1).getMBB());
3158 I.eraseFromParent();
3162bool AMDGPUInstructionSelector::selectG_GLOBAL_VALUE(
3164 Register DstReg =
I.getOperand(0).getReg();
3165 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3166 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3167 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32));
3171 return RBI.constrainGenericRegister(
3172 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, *MRI);
3175bool AMDGPUInstructionSelector::selectG_PTRMASK(
MachineInstr &
I)
const {
3176 Register DstReg =
I.getOperand(0).getReg();
3177 Register SrcReg =
I.getOperand(1).getReg();
3178 Register MaskReg =
I.getOperand(2).getReg();
3179 LLT Ty = MRI->getType(DstReg);
3180 LLT MaskTy = MRI->getType(MaskReg);
3184 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3185 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
3186 const RegisterBank *MaskRB = RBI.getRegBank(MaskReg, *MRI, TRI);
3187 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3193 APInt MaskOnes =
VT->getKnownOnes(MaskReg).zext(64);
3197 const bool CanCopyLow32 = (MaskOnes & MaskLo32) == MaskLo32;
3198 const bool CanCopyHi32 = (MaskOnes & MaskHi32) == MaskHi32;
3201 !CanCopyLow32 && !CanCopyHi32) {
3202 auto MIB =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_AND_B64), DstReg)
3206 I.eraseFromParent();
3211 unsigned NewOpc = IsVGPR ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
3212 const TargetRegisterClass &RegRC
3213 = IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
3215 const TargetRegisterClass *DstRC = TRI.getRegClassForTypeOnBank(Ty, *DstRB);
3216 const TargetRegisterClass *SrcRC = TRI.getRegClassForTypeOnBank(Ty, *SrcRB);
3217 const TargetRegisterClass *MaskRC =
3218 TRI.getRegClassForTypeOnBank(MaskTy, *MaskRB);
3220 if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
3221 !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
3222 !RBI.constrainGenericRegister(MaskReg, *MaskRC, *MRI))
3227 "ptrmask should have been narrowed during legalize");
3229 auto NewOp =
BuildMI(*BB, &
I,
DL, TII.get(NewOpc), DstReg)
3235 I.eraseFromParent();
3239 Register HiReg = MRI->createVirtualRegister(&RegRC);
3240 Register LoReg = MRI->createVirtualRegister(&RegRC);
3243 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), LoReg)
3244 .
addReg(SrcReg, {}, AMDGPU::sub0);
3245 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), HiReg)
3246 .
addReg(SrcReg, {}, AMDGPU::sub1);
3255 Register MaskLo = MRI->createVirtualRegister(&RegRC);
3256 MaskedLo = MRI->createVirtualRegister(&RegRC);
3258 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskLo)
3259 .
addReg(MaskReg, {}, AMDGPU::sub0);
3260 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedLo)
3269 Register MaskHi = MRI->createVirtualRegister(&RegRC);
3270 MaskedHi = MRI->createVirtualRegister(&RegRC);
3272 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskHi)
3273 .
addReg(MaskReg, {}, AMDGPU::sub1);
3274 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedHi)
3279 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
3284 I.eraseFromParent();
3290static std::pair<Register, unsigned>
3297 std::tie(IdxBaseReg,
Offset) =
3299 if (IdxBaseReg == AMDGPU::NoRegister) {
3303 IdxBaseReg = IdxReg;
3310 if (
static_cast<unsigned>(
Offset) >= SubRegs.
size())
3311 return std::pair(IdxReg, SubRegs[0]);
3312 return std::pair(IdxBaseReg, SubRegs[
Offset]);
3315bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT(
3321 LLT DstTy = MRI->getType(DstReg);
3322 LLT SrcTy = MRI->getType(SrcReg);
3324 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3325 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
3326 const RegisterBank *IdxRB = RBI.getRegBank(IdxReg, *MRI, TRI);
3330 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3333 const TargetRegisterClass *SrcRC =
3334 TRI.getRegClassForTypeOnBank(SrcTy, *SrcRB);
3335 const TargetRegisterClass *DstRC =
3336 TRI.getRegClassForTypeOnBank(DstTy, *DstRB);
3337 if (!SrcRC || !DstRC)
3339 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
3340 !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
3341 !RBI.constrainGenericRegister(IdxReg, AMDGPU::SReg_32RegClass, *MRI))
3344 MachineBasicBlock *BB =
MI.getParent();
3352 if (SrcRB->
getID() == AMDGPU::SGPRRegBankID) {
3356 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3359 unsigned Opc = Is64 ? AMDGPU::S_MOVRELS_B64 : AMDGPU::S_MOVRELS_B32;
3361 .
addReg(SrcReg, {}, SubReg)
3363 MI.eraseFromParent();
3370 if (!STI.useVGPRIndexMode()) {
3371 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3373 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::V_MOVRELS_B32_e32), DstReg)
3374 .
addReg(SrcReg, {}, SubReg)
3376 MI.eraseFromParent();
3380 const MCInstrDesc &GPRIDXDesc =
3381 TII.getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*SrcRC),
true);
3387 MI.eraseFromParent();
3392bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT(
3399 LLT VecTy = MRI->getType(DstReg);
3400 LLT ValTy = MRI->getType(ValReg);
3404 const RegisterBank *VecRB = RBI.getRegBank(VecReg, *MRI, TRI);
3405 const RegisterBank *ValRB = RBI.getRegBank(ValReg, *MRI, TRI);
3406 const RegisterBank *IdxRB = RBI.getRegBank(IdxReg, *MRI, TRI);
3412 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3415 const TargetRegisterClass *VecRC =
3416 TRI.getRegClassForTypeOnBank(VecTy, *VecRB);
3417 const TargetRegisterClass *ValRC =
3418 TRI.getRegClassForTypeOnBank(ValTy, *ValRB);
3420 if (!RBI.constrainGenericRegister(VecReg, *VecRC, *MRI) ||
3421 !RBI.constrainGenericRegister(DstReg, *VecRC, *MRI) ||
3422 !RBI.constrainGenericRegister(ValReg, *ValRC, *MRI) ||
3423 !RBI.constrainGenericRegister(IdxReg, AMDGPU::SReg_32RegClass, *MRI))
3426 if (VecRB->
getID() == AMDGPU::VGPRRegBankID && ValSize != 32)
3430 std::tie(IdxReg, SubReg) =
3433 const bool IndexMode = VecRB->
getID() == AMDGPU::VGPRRegBankID &&
3434 STI.useVGPRIndexMode();
3436 MachineBasicBlock *BB =
MI.getParent();
3440 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3443 const MCInstrDesc &RegWriteOp = TII.getIndirectRegWriteMovRelPseudo(
3444 VecSize, ValSize, VecRB->
getID() == AMDGPU::SGPRRegBankID);
3449 MI.eraseFromParent();
3453 const MCInstrDesc &GPRIDXDesc =
3454 TII.getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*VecRC),
false);
3461 MI.eraseFromParent();
3467 case Intrinsic::amdgcn_raw_buffer_load_async_lds:
3468 case Intrinsic::amdgcn_raw_ptr_buffer_load_async_lds:
3469 case Intrinsic::amdgcn_struct_buffer_load_async_lds:
3470 case Intrinsic::amdgcn_struct_ptr_buffer_load_async_lds:
3471 case Intrinsic::amdgcn_load_async_to_lds:
3472 case Intrinsic::amdgcn_global_load_async_lds:
3478bool AMDGPUInstructionSelector::selectBufferLoadLds(
MachineInstr &
MI)
const {
3479 if (!Subtarget->hasVMemToLDSLoad())
3482 unsigned Size =
MI.getOperand(3).getImm();
3486 const bool HasVIndex =
MI.getNumOperands() == 9;
3490 VIndex =
MI.getOperand(4).getReg();
3494 Register VOffset =
MI.getOperand(4 + OpOffset).getReg();
3495 std::optional<ValueAndVReg> MaybeVOffset =
3497 const bool HasVOffset = !MaybeVOffset || MaybeVOffset->Value.getZExtValue();
3503 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_BOTHEN
3504 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_IDXEN
3505 : HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFEN
3506 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFSET;
3509 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_BOTHEN
3510 : AMDGPU::BUFFER_LOAD_USHORT_LDS_IDXEN
3511 : HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFEN
3512 : AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFSET;
3515 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_BOTHEN
3516 : AMDGPU::BUFFER_LOAD_DWORD_LDS_IDXEN
3517 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN
3518 : AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET;
3521 if (!Subtarget->hasLDSLoadB96_B128())
3524 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_BOTHEN
3525 : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_IDXEN
3526 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFEN
3527 : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFSET;
3530 if (!Subtarget->hasLDSLoadB96_B128())
3533 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_BOTHEN
3534 : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_IDXEN
3535 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFEN
3536 : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFSET;
3540 MachineBasicBlock *
MBB =
MI.getParent();
3543 .
add(
MI.getOperand(2));
3547 if (HasVIndex && HasVOffset) {
3548 Register IdxReg = MRI->createVirtualRegister(TRI.getVGPR64Class());
3549 BuildMI(*
MBB, &*MIB,
DL, TII.get(AMDGPU::REG_SEQUENCE), IdxReg)
3556 }
else if (HasVIndex) {
3558 }
else if (HasVOffset) {
3562 MIB.
add(
MI.getOperand(1));
3563 MIB.
add(
MI.getOperand(5 + OpOffset));
3564 MIB.
add(
MI.getOperand(6 + OpOffset));
3566 unsigned Aux =
MI.getOperand(7 + OpOffset).getImm();
3575 MachineMemOperand *LoadMMO = *
MI.memoperands_begin();
3580 MachinePointerInfo StorePtrI = LoadPtrI;
3591 MachineMemOperand *StoreMMO =
3597 MI.eraseFromParent();
3610 if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3616 return Def->getOperand(1).getReg();
3630 if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3638 return Def->getOperand(1).getReg();
3640 if (
VT->signBitIsZero(
Reg))
3641 return matchZeroExtendFromS32(
Reg);
3649AMDGPUInstructionSelector::matchZeroExtendFromS32OrS32(
Register Reg)
const {
3651 : matchZeroExtendFromS32(
Reg);
3657AMDGPUInstructionSelector::matchSignExtendFromS32OrS32(
Register Reg)
const {
3659 : matchSignExtendFromS32(
Reg);
3663AMDGPUInstructionSelector::matchExtendFromS32OrS32(
Register Reg,
3664 bool IsSigned)
const {
3666 return matchSignExtendFromS32OrS32(
Reg);
3668 return matchZeroExtendFromS32OrS32(
Reg);
3678 if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3685 return Def->getOperand(1).getReg();
3690bool AMDGPUInstructionSelector::selectGlobalLoadLds(
MachineInstr &
MI)
const{
3691 if (!Subtarget->hasVMemToLDSLoad())
3695 unsigned Size =
MI.getOperand(3).getImm();
3702 Opc = AMDGPU::GLOBAL_LOAD_LDS_UBYTE;
3705 Opc = AMDGPU::GLOBAL_LOAD_LDS_USHORT;
3708 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORD;
3711 if (!Subtarget->hasLDSLoadB96_B128())
3713 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX3;
3716 if (!Subtarget->hasLDSLoadB96_B128())
3718 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX4;
3722 MachineBasicBlock *
MBB =
MI.getParent();
3725 .
add(
MI.getOperand(2));
3731 if (!isSGPR(Addr)) {
3733 if (isSGPR(AddrDef->Reg)) {
3734 Addr = AddrDef->Reg;
3735 }
else if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
3738 if (isSGPR(SAddr)) {
3739 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
3740 if (
Register Off = matchZeroExtendFromS32(PtrBaseOffset)) {
3751 VOffset = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3763 MIB.
add(
MI.getOperand(4));
3765 unsigned Aux =
MI.getOperand(5).getImm();
3769 MachineMemOperand *LoadMMO = *
MI.memoperands_begin();
3771 LoadPtrI.
Offset =
MI.getOperand(4).getImm();
3772 MachinePointerInfo StorePtrI = LoadPtrI;
3781 MachineMemOperand *StoreMMO =
3783 sizeof(int32_t),
Align(4));
3787 MI.eraseFromParent();
3792bool AMDGPUInstructionSelector::selectTensorLoadStore(
MachineInstr &
MI,
3794 bool IsLoad = IID == Intrinsic::amdgcn_tensor_load_to_lds;
3796 IsLoad ? AMDGPU::TENSOR_LOAD_TO_LDS_d4 : AMDGPU::TENSOR_STORE_FROM_LDS_d4;
3800 const auto isAllZeros = [&](MachineOperand &Opnd) {
3801 const MachineInstr *
DefMI = MRI->getVRegDef(Opnd.getReg());
3810 Opc = IsLoad ? AMDGPU::TENSOR_LOAD_TO_LDS_d2
3811 : AMDGPU::TENSOR_STORE_FROM_LDS_d2;
3816 MachineBasicBlock *
MBB =
MI.getParent();
3818 .
add(
MI.getOperand(1))
3819 .
add(
MI.getOperand(2));
3821 if (NumGroups >= 4) {
3822 MIB.
add(
MI.getOperand(3))
3823 .
add(
MI.getOperand(4));
3827 .
add(
MI.getOperand(6));
3829 MI.eraseFromParent();
3833bool AMDGPUInstructionSelector::selectBVHIntersectRayIntrinsic(
3835 unsigned OpcodeOpIdx =
3836 MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY ? 1 : 3;
3837 MI.setDesc(TII.get(
MI.getOperand(OpcodeOpIdx).getImm()));
3838 MI.removeOperand(OpcodeOpIdx);
3839 MI.addImplicitDefUseOperands(*
MI.getMF());
3846bool AMDGPUInstructionSelector::selectSMFMACIntrin(
MachineInstr &
MI)
const {
3849 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
3850 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_F16_e64;
3852 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
3853 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_F16_e64;
3855 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
3856 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_BF16_e64;
3858 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
3859 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_BF16_e64;
3861 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
3862 Opc = AMDGPU::V_SMFMAC_I32_16X16X64_I8_e64;
3864 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
3865 Opc = AMDGPU::V_SMFMAC_I32_32X32X32_I8_e64;
3867 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
3868 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_BF8_e64;
3870 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
3871 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_FP8_e64;
3873 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
3874 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_BF8_e64;
3876 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
3877 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_FP8_e64;
3879 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
3880 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_BF8_e64;
3882 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
3883 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_FP8_e64;
3885 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
3886 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_BF8_e64;
3888 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
3889 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_FP8_e64;
3891 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:
3892 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_F16_e64;
3894 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:
3895 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_F16_e64;
3897 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:
3898 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF16_e64;
3900 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:
3901 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF16_e64;
3903 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:
3904 Opc = AMDGPU::V_SMFMAC_I32_16X16X128_I8_e64;
3906 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:
3907 Opc = AMDGPU::V_SMFMAC_I32_32X32X64_I8_e64;
3909 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:
3910 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_BF8_BF8_e64;
3912 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:
3913 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_BF8_FP8_e64;
3915 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:
3916 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_FP8_BF8_e64;
3918 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:
3919 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_FP8_FP8_e64;
3921 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:
3922 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_BF8_BF8_e64;
3924 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
3925 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_BF8_FP8_e64;
3927 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
3928 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_FP8_BF8_e64;
3930 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8:
3931 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_FP8_FP8_e64;
3937 auto VDst_In =
MI.getOperand(4);
3939 MI.setDesc(TII.get(
Opc));
3940 MI.removeOperand(4);
3941 MI.removeOperand(1);
3942 MI.addOperand(VDst_In);
3943 MI.addImplicitDefUseOperands(*
MI.getMF());
3944 const MCInstrDesc &MCID =
MI.getDesc();
3946 MI.getOperand(0).setIsEarlyClobber(
true);
3951bool AMDGPUInstructionSelector::selectPermlaneSwapIntrin(
3953 if (IntrID == Intrinsic::amdgcn_permlane16_swap &&
3954 !Subtarget->hasPermlane16Swap())
3956 if (IntrID == Intrinsic::amdgcn_permlane32_swap &&
3957 !Subtarget->hasPermlane32Swap())
3960 unsigned Opcode = IntrID == Intrinsic::amdgcn_permlane16_swap
3961 ? AMDGPU::V_PERMLANE16_SWAP_B32_e64
3962 : AMDGPU::V_PERMLANE32_SWAP_B32_e64;
3964 MI.removeOperand(2);
3965 MI.setDesc(TII.get(Opcode));
3968 MachineOperand &FI =
MI.getOperand(4);
3975bool AMDGPUInstructionSelector::selectWaveAddress(
MachineInstr &
MI)
const {
3978 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3979 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3980 MachineBasicBlock *
MBB =
MI.getParent();
3984 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_LSHRREV_B32_e64), DstReg)
3985 .
addImm(Subtarget->getWavefrontSizeLog2())
3990 .
addImm(Subtarget->getWavefrontSizeLog2())
3994 const TargetRegisterClass &RC =
3995 IsVALU ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
3996 if (!RBI.constrainGenericRegister(DstReg, RC, *MRI))
3999 MI.eraseFromParent();
4003bool AMDGPUInstructionSelector::selectWaveShuffleIntrin(
4006 MachineBasicBlock *
MBB =
MI.getParent();
4013 const LLT DstTy = MRI->getType(DstReg);
4015 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
4016 const TargetRegisterClass *DstRC =
4017 TRI.getRegClassForSizeOnBank(DstSize, *DstRB);
4022 if (!Subtarget->supportsBPermute())
4026 if (Subtarget->supportsWaveWideBPermute()) {
4027 Register ShiftIdxReg = MRI->createVirtualRegister(DstRC);
4028 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), ShiftIdxReg)
4038 assert(Subtarget->isWave64());
4042 MRI->createVirtualRegister(TRI.getRegClass(AMDGPU::SReg_32RegClassID));
4043 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefValReg);
4045 Register UndefExecReg = MRI->createVirtualRegister(
4046 TRI.getRegClass(AMDGPU::SReg_64_XEXECRegClassID));
4047 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefExecReg);
4049 Register PoisonValReg = MRI->createVirtualRegister(DstRC);
4050 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_SET_INACTIVE_B32), PoisonValReg)
4058 Register ShiftIdxReg = MRI->createVirtualRegister(DstRC);
4059 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), ShiftIdxReg)
4063 Register PoisonIdxReg = MRI->createVirtualRegister(DstRC);
4064 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_SET_INACTIVE_B32), PoisonIdxReg)
4072 Register SameSidePermReg = MRI->createVirtualRegister(DstRC);
4073 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::DS_BPERMUTE_B32), SameSidePermReg)
4078 Register SwappedValReg = MRI->createVirtualRegister(DstRC);
4079 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_PERMLANE64_B32), SwappedValReg)
4082 Register OppSidePermReg = MRI->createVirtualRegister(DstRC);
4083 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::DS_BPERMUTE_B32), OppSidePermReg)
4088 Register WWMSwapPermReg = MRI->createVirtualRegister(DstRC);
4089 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::STRICT_WWM), WWMSwapPermReg)
4096 Register ThreadIDReg = MRI->createVirtualRegister(DstRC);
4097 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_MBCNT_LO_U32_B32_e64), ThreadIDReg)
4101 Register XORReg = MRI->createVirtualRegister(DstRC);
4106 Register ANDReg = MRI->createVirtualRegister(DstRC);
4111 Register CompareReg = MRI->createVirtualRegister(
4112 TRI.getRegClass(AMDGPU::SReg_64_XEXECRegClassID));
4113 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_CMP_EQ_U32_e64), CompareReg)
4118 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
4126 MI.eraseFromParent();
4135 unsigned NumOpcodes = 0;
4148 const uint8_t SrcBits[3] = { 0xf0, 0xcc, 0xaa };
4159 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4173 if (Src.size() == 3) {
4180 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4181 if (Src[
I] ==
LHS) {
4191 Bits = SrcBits[Src.size()];
4197 switch (
MI->getOpcode()) {
4198 case TargetOpcode::G_AND:
4199 case TargetOpcode::G_OR:
4200 case TargetOpcode::G_XOR: {
4205 if (!getOperandBits(
LHS, LHSBits) ||
4206 !getOperandBits(
RHS, RHSBits)) {
4207 Src = std::move(Backup);
4208 return std::make_pair(0, 0);
4214 NumOpcodes +=
Op.first;
4215 LHSBits =
Op.second;
4220 NumOpcodes +=
Op.first;
4221 RHSBits =
Op.second;
4226 return std::make_pair(0, 0);
4230 switch (
MI->getOpcode()) {
4231 case TargetOpcode::G_AND:
4232 TTbl = LHSBits & RHSBits;
4234 case TargetOpcode::G_OR:
4235 TTbl = LHSBits | RHSBits;
4237 case TargetOpcode::G_XOR:
4238 TTbl = LHSBits ^ RHSBits;
4244 return std::make_pair(NumOpcodes + 1, TTbl);
4247bool AMDGPUInstructionSelector::selectBITOP3(
MachineInstr &
MI)
const {
4248 if (!Subtarget->hasBitOp3Insts())
4252 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
4253 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
4259 unsigned NumOpcodes;
4261 std::tie(NumOpcodes, TTbl) =
BitOp3_Op(DstReg, Src, *MRI);
4265 if (NumOpcodes < 2 || Src.empty())
4268 const bool IsB32 = MRI->getType(DstReg) ==
LLT::scalar(32);
4269 if (NumOpcodes == 2 && IsB32) {
4277 }
else if (NumOpcodes < 4) {
4284 unsigned Opc = IsB32 ? AMDGPU::V_BITOP3_B32_e64 : AMDGPU::V_BITOP3_B16_e64;
4285 if (!IsB32 && STI.hasTrue16BitInsts())
4286 Opc = STI.useRealTrue16Insts() ? AMDGPU::V_BITOP3_B16_gfx1250_t16_e64
4287 : AMDGPU::V_BITOP3_B16_gfx1250_fake16_e64;
4288 unsigned CBL = STI.getConstantBusLimit(
Opc);
4289 MachineBasicBlock *
MBB =
MI.getParent();
4292 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4293 const RegisterBank *RB = RBI.getRegBank(Src[
I], *MRI, TRI);
4294 if (RB->
getID() != AMDGPU::SGPRRegBankID)
4300 Register NewReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4311 while (Src.size() < 3)
4312 Src.push_back(Src[0]);
4329 MI.eraseFromParent();
4334bool AMDGPUInstructionSelector::selectStackRestore(
MachineInstr &
MI)
const {
4336 if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, *MRI))
4339 MachineInstr *
DefMI = MRI->getVRegDef(SrcReg);
4341 Subtarget->getTargetLowering()->getStackPointerRegisterToSaveRestore();
4343 MachineBasicBlock *
MBB =
MI.getParent();
4347 WaveAddr = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
4350 .
addImm(Subtarget->getWavefrontSizeLog2())
4357 MI.eraseFromParent();
4363 if (!
I.isPreISelOpcode()) {
4365 return selectCOPY(
I);
4369 switch (
I.getOpcode()) {
4370 case TargetOpcode::G_AND:
4371 case TargetOpcode::G_OR:
4372 case TargetOpcode::G_XOR:
4373 if (selectBITOP3(
I))
4377 return selectG_AND_OR_XOR(
I);
4378 case TargetOpcode::G_ADD:
4379 case TargetOpcode::G_SUB:
4380 case TargetOpcode::G_PTR_ADD:
4383 return selectG_ADD_SUB(
I);
4384 case TargetOpcode::G_UADDO:
4385 case TargetOpcode::G_USUBO:
4386 case TargetOpcode::G_UADDE:
4387 case TargetOpcode::G_USUBE:
4388 return selectG_UADDO_USUBO_UADDE_USUBE(
I);
4389 case AMDGPU::G_AMDGPU_MAD_U64_U32:
4390 case AMDGPU::G_AMDGPU_MAD_I64_I32:
4391 return selectG_AMDGPU_MAD_64_32(
I);
4392 case TargetOpcode::G_INTTOPTR:
4393 case TargetOpcode::G_BITCAST:
4394 case TargetOpcode::G_PTRTOINT:
4395 case TargetOpcode::G_FREEZE:
4396 return selectCOPY(
I);
4397 case TargetOpcode::G_FNEG:
4400 return selectG_FNEG(
I);
4401 case TargetOpcode::G_FABS:
4404 return selectG_FABS(
I);
4405 case TargetOpcode::G_EXTRACT:
4406 return selectG_EXTRACT(
I);
4407 case TargetOpcode::G_MERGE_VALUES:
4408 case TargetOpcode::G_CONCAT_VECTORS:
4409 return selectG_MERGE_VALUES(
I);
4410 case TargetOpcode::G_UNMERGE_VALUES:
4411 return selectG_UNMERGE_VALUES(
I);
4412 case TargetOpcode::G_BUILD_VECTOR:
4413 case TargetOpcode::G_BUILD_VECTOR_TRUNC:
4414 return selectG_BUILD_VECTOR(
I);
4415 case TargetOpcode::G_IMPLICIT_DEF:
4416 return selectG_IMPLICIT_DEF(
I);
4417 case TargetOpcode::G_INSERT:
4418 return selectG_INSERT(
I);
4419 case TargetOpcode::G_INTRINSIC:
4420 case TargetOpcode::G_INTRINSIC_CONVERGENT:
4421 return selectG_INTRINSIC(
I);
4422 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
4423 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
4424 return selectG_INTRINSIC_W_SIDE_EFFECTS(
I);
4425 case TargetOpcode::G_ICMP:
4426 case TargetOpcode::G_FCMP:
4427 if (selectG_ICMP_or_FCMP(
I))
4430 case TargetOpcode::G_LOAD:
4431 case TargetOpcode::G_ZEXTLOAD:
4432 case TargetOpcode::G_SEXTLOAD:
4433 case TargetOpcode::G_STORE:
4434 case TargetOpcode::G_ATOMIC_CMPXCHG:
4435 case TargetOpcode::G_ATOMICRMW_XCHG:
4436 case TargetOpcode::G_ATOMICRMW_ADD:
4437 case TargetOpcode::G_ATOMICRMW_SUB:
4438 case TargetOpcode::G_ATOMICRMW_AND:
4439 case TargetOpcode::G_ATOMICRMW_OR:
4440 case TargetOpcode::G_ATOMICRMW_XOR:
4441 case TargetOpcode::G_ATOMICRMW_MIN:
4442 case TargetOpcode::G_ATOMICRMW_MAX:
4443 case TargetOpcode::G_ATOMICRMW_UMIN:
4444 case TargetOpcode::G_ATOMICRMW_UMAX:
4445 case TargetOpcode::G_ATOMICRMW_UINC_WRAP:
4446 case TargetOpcode::G_ATOMICRMW_UDEC_WRAP:
4447 case TargetOpcode::G_ATOMICRMW_USUB_COND:
4448 case TargetOpcode::G_ATOMICRMW_USUB_SAT:
4449 case TargetOpcode::G_ATOMICRMW_FADD:
4450 case TargetOpcode::G_ATOMICRMW_FMIN:
4451 case TargetOpcode::G_ATOMICRMW_FMAX:
4452 return selectG_LOAD_STORE_ATOMICRMW(
I);
4453 case TargetOpcode::G_SELECT:
4454 return selectG_SELECT(
I);
4455 case TargetOpcode::G_TRUNC:
4456 return selectG_TRUNC(
I);
4457 case TargetOpcode::G_SEXT:
4458 case TargetOpcode::G_ZEXT:
4459 case TargetOpcode::G_ANYEXT:
4460 case TargetOpcode::G_SEXT_INREG:
4464 if (MRI->getType(
I.getOperand(1).getReg()) !=
LLT::scalar(1) &&
4467 return selectG_SZA_EXT(
I);
4468 case TargetOpcode::G_FPEXT:
4469 if (selectG_FPEXT(
I))
4472 case TargetOpcode::G_BRCOND:
4473 return selectG_BRCOND(
I);
4474 case TargetOpcode::G_GLOBAL_VALUE:
4475 return selectG_GLOBAL_VALUE(
I);
4476 case TargetOpcode::G_PTRMASK:
4477 return selectG_PTRMASK(
I);
4478 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4479 return selectG_EXTRACT_VECTOR_ELT(
I);
4480 case TargetOpcode::G_INSERT_VECTOR_ELT:
4481 return selectG_INSERT_VECTOR_ELT(
I);
4482 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
4483 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
4484 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET:
4485 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
4486 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
4489 assert(Intr &&
"not an image intrinsic with image pseudo");
4490 return selectImageIntrinsic(
I, Intr);
4492 case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY:
4493 case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
4494 case AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY:
4495 return selectBVHIntersectRayIntrinsic(
I);
4496 case AMDGPU::G_SBFX:
4497 case AMDGPU::G_UBFX:
4498 return selectG_SBFX_UBFX(
I);
4499 case AMDGPU::G_SI_CALL:
4500 I.setDesc(TII.get(AMDGPU::SI_CALL));
4502 case AMDGPU::G_AMDGPU_WAVE_ADDRESS:
4503 return selectWaveAddress(
I);
4504 case AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_RETURN: {
4505 I.setDesc(TII.get(AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN));
4508 case AMDGPU::G_STACKRESTORE:
4509 return selectStackRestore(
I);
4511 return selectPHI(
I);
4512 case AMDGPU::G_AMDGPU_COPY_SCC_VCC:
4513 return selectCOPY_SCC_VCC(
I);
4514 case AMDGPU::G_AMDGPU_COPY_VCC_SCC:
4515 return selectCOPY_VCC_SCC(
I);
4516 case AMDGPU::G_AMDGPU_READANYLANE:
4517 return selectReadAnyLane(
I);
4518 case TargetOpcode::G_CONSTANT:
4519 case TargetOpcode::G_FCONSTANT:
4527AMDGPUInstructionSelector::selectVCSRC(
MachineOperand &Root)
const {
4534std::pair<Register, unsigned> AMDGPUInstructionSelector::selectVOP3ModsImpl(
4535 Register Src,
bool IsCanonicalizing,
bool AllowAbs,
bool OpSel)
const {
4539 if (
MI->getOpcode() == AMDGPU::G_FNEG) {
4540 Src =
MI->getOperand(1).getReg();
4543 }
else if (
MI->getOpcode() == AMDGPU::G_FSUB && IsCanonicalizing) {
4548 if (
LHS &&
LHS->isZero()) {
4550 Src =
MI->getOperand(2).getReg();
4554 if (AllowAbs &&
MI->getOpcode() == AMDGPU::G_FABS) {
4555 Src =
MI->getOperand(1).getReg();
4562 return std::pair(Src, Mods);
4565std::pair<Register, unsigned>
4566AMDGPUInstructionSelector::selectVOP3PModsF32Impl(
Register Src)
const {
4568 std::tie(Src, Mods) = selectVOP3ModsImpl(Src);
4570 return std::pair(Src, Mods);
4573Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
4575 bool ForceVGPR)
const {
4576 if ((Mods != 0 || ForceVGPR) &&
4577 RBI.getRegBank(Src, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID) {
4584 TII.
get(AMDGPU::COPY), VGPRSrc)
4596AMDGPUInstructionSelector::selectVSRC0(
MachineOperand &Root)
const {
4598 [=](MachineInstrBuilder &MIB) { MIB.
add(Root); }
4603AMDGPUInstructionSelector::selectVOP3Mods0(
MachineOperand &Root)
const {
4606 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
4609 [=](MachineInstrBuilder &MIB) {
4610 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4612 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
4613 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); },
4614 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
4619AMDGPUInstructionSelector::selectVOP3BMods0(
MachineOperand &Root)
const {
4622 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
4627 [=](MachineInstrBuilder &MIB) {
4628 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4630 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
4631 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); },
4632 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
4637AMDGPUInstructionSelector::selectVOP3OMods(
MachineOperand &Root)
const {
4639 [=](MachineInstrBuilder &MIB) { MIB.
add(Root); },
4640 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); },
4641 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
4646AMDGPUInstructionSelector::selectVOP3Mods(
MachineOperand &Root)
const {
4649 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
4652 [=](MachineInstrBuilder &MIB) {
4653 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4655 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4660AMDGPUInstructionSelector::selectVOP3ModsNonCanonicalizing(
4664 std::tie(Src, Mods) =
4665 selectVOP3ModsImpl(Root.
getReg(),
false);
4668 [=](MachineInstrBuilder &MIB) {
4669 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4671 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4676AMDGPUInstructionSelector::selectVOP3BMods(
MachineOperand &Root)
const {
4679 std::tie(Src, Mods) =
4680 selectVOP3ModsImpl(Root.
getReg(),
true,
4684 [=](MachineInstrBuilder &MIB) {
4685 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4687 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4692AMDGPUInstructionSelector::selectVOP3NoMods(
MachineOperand &Root)
const {
4695 if (
Def->getOpcode() == AMDGPU::G_FNEG ||
Def->getOpcode() == AMDGPU::G_FABS)
4698 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
4723 if (
MI->getOpcode() != AMDGPU::G_TRUNC)
4728 return DstSize * 2 == SrcSize;
4734 if (
MI->getOpcode() != AMDGPU::G_LSHR)
4738 std::optional<ValueAndVReg> ShiftAmt;
4739 if (
mi_match(
MI->getOperand(0).getReg(), MRI,
4742 unsigned Shift = ShiftAmt->Value.getZExtValue();
4743 return Shift * 2 == SrcSize;
4751 if (
MI->getOpcode() != AMDGPU::G_SHL)
4755 std::optional<ValueAndVReg> ShiftAmt;
4756 if (
mi_match(
MI->getOperand(0).getReg(), MRI,
4759 unsigned Shift = ShiftAmt->Value.getZExtValue();
4760 return Shift * 2 == SrcSize;
4768 if (
MI->getOpcode() != AMDGPU::G_UNMERGE_VALUES)
4770 return MI->getNumOperands() == 3 &&
MI->getOperand(0).isDef() &&
4771 MI->getOperand(1).isDef() && !
MI->getOperand(2).isDef();
4941static std::optional<std::pair<Register, SrcStatus>>
4946 unsigned Opc =
MI->getOpcode();
4950 case AMDGPU::G_BITCAST:
4951 return std::optional<std::pair<Register, SrcStatus>>(
4952 {
MI->getOperand(1).getReg(), Curr.second});
4954 if (
MI->getOperand(1).getReg().isPhysical())
4955 return std::nullopt;
4956 return std::optional<std::pair<Register, SrcStatus>>(
4957 {
MI->getOperand(1).getReg(), Curr.second});
4958 case AMDGPU::G_FNEG: {
4961 return std::nullopt;
4962 return std::optional<std::pair<Register, SrcStatus>>(
4963 {
MI->getOperand(1).getReg(), Stat});
4970 switch (Curr.second) {
4973 return std::optional<std::pair<Register, SrcStatus>>(
4976 if (Curr.first ==
MI->getOperand(0).getReg())
4977 return std::optional<std::pair<Register, SrcStatus>>(
4979 return std::optional<std::pair<Register, SrcStatus>>(
4991 return std::optional<std::pair<Register, SrcStatus>>(
4995 if (Curr.first ==
MI->getOperand(0).getReg())
4996 return std::optional<std::pair<Register, SrcStatus>>(
4998 return std::optional<std::pair<Register, SrcStatus>>(
5004 return std::optional<std::pair<Register, SrcStatus>>(
5009 return std::optional<std::pair<Register, SrcStatus>>(
5014 return std::optional<std::pair<Register, SrcStatus>>(
5019 return std::optional<std::pair<Register, SrcStatus>>(
5025 return std::nullopt;
5035 bool HasNeg =
false;
5037 bool HasOpsel =
true;
5042 unsigned Opc =
MI->getOpcode();
5044 if (
Opc == TargetOpcode::G_INTRINSIC) {
5047 if (IntrinsicID == Intrinsic::amdgcn_fdot2)
5074 while (
Depth <= MaxDepth && Curr.has_value()) {
5077 Statlist.push_back(Curr.value());
5084static std::pair<Register, SrcStatus>
5091 while (
Depth <= MaxDepth && Curr.has_value()) {
5097 LastSameOrNeg = Curr.value();
5102 return LastSameOrNeg;
5109 return Width1 == Width2;
5144 return isSameBitWidth(NewReg, RootReg, MRI) && IsHalfState(LoStat) &&
5145 IsHalfState(HiStat);
5148std::pair<Register, unsigned> AMDGPUInstructionSelector::selectVOP3PModsImpl(
5154 return {RootReg, Mods};
5157 SearchOptions SO(RootReg, MRI);
5168 MachineInstr *
MI = MRI.getVRegDef(Stat.first);
5170 if (
MI->getOpcode() != AMDGPU::G_BUILD_VECTOR ||
MI->getNumOperands() != 3 ||
5171 (IsDOT && Subtarget->hasDOTOpSelHazard())) {
5173 return {Stat.first, Mods};
5179 if (StatlistHi.
empty()) {
5181 return {Stat.first, Mods};
5187 if (StatlistLo.
empty()) {
5189 return {Stat.first, Mods};
5192 for (
int I = StatlistHi.
size() - 1;
I >= 0;
I--) {
5193 for (
int J = StatlistLo.
size() - 1; J >= 0; J--) {
5194 if (StatlistHi[
I].first == StatlistLo[J].first &&
5196 StatlistHi[
I].first, RootReg, TII, MRI))
5197 return {StatlistHi[
I].first,
5198 updateMods(StatlistHi[
I].second, StatlistLo[J].second, Mods)};
5204 return {Stat.first, Mods};
5214 return RB->
getID() == RBNo;
5231 if (
checkRB(RootReg, AMDGPU::SGPRRegBankID, RBI, MRI,
TRI) ||
5232 checkRB(NewReg, AMDGPU::VGPRRegBankID, RBI, MRI,
TRI))
5236 if (
MI->getOpcode() == AMDGPU::COPY && NewReg ==
MI->getOperand(1).getReg()) {
5245 BuildMI(*BB,
MI,
MI->getDebugLoc(),
TII.get(AMDGPU::COPY), DstReg)
5253AMDGPUInstructionSelector::selectVOP3PRetHelper(
MachineOperand &Root,
5258 std::tie(
Reg, Mods) = selectVOP3PModsImpl(Root.
getReg(), MRI, IsDOT);
5262 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
5263 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
5268AMDGPUInstructionSelector::selectVOP3PMods(
MachineOperand &Root)
const {
5270 return selectVOP3PRetHelper(Root);
5274AMDGPUInstructionSelector::selectVOP3PModsDOT(
MachineOperand &Root)
const {
5276 return selectVOP3PRetHelper(Root,
true);
5280AMDGPUInstructionSelector::selectVOP3PNoModsDOT(
MachineOperand &Root)
const {
5284 std::tie(Src, Mods) = selectVOP3PModsImpl(Root.
getReg(), MRI,
true );
5288 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); }}};
5292AMDGPUInstructionSelector::selectVOP3PModsF32(
MachineOperand &Root)
const {
5295 std::tie(Src, Mods) = selectVOP3PModsF32Impl(Root.
getReg());
5298 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5299 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
5304AMDGPUInstructionSelector::selectVOP3PNoModsF32(
MachineOperand &Root)
const {
5307 std::tie(Src, Mods) = selectVOP3PModsF32Impl(Root.
getReg());
5311 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); }}};
5315AMDGPUInstructionSelector::selectWMMAOpSelVOP3PMods(
5318 "expected i1 value");
5324 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
5332 switch (Elts.
size()) {
5334 DstRegClass = &AMDGPU::VReg_256RegClass;
5337 DstRegClass = &AMDGPU::VReg_128RegClass;
5340 DstRegClass = &AMDGPU::VReg_64RegClass;
5347 auto MIB =
B.buildInstr(AMDGPU::REG_SEQUENCE)
5349 for (
unsigned i = 0; i < Elts.
size(); ++i) {
5360 if (ModOpcode == TargetOpcode::G_FNEG) {
5364 for (
auto El : Elts) {
5370 if (Elts.size() != NegAbsElts.
size()) {
5379 assert(ModOpcode == TargetOpcode::G_FABS);
5387AMDGPUInstructionSelector::selectWMMAModsF32NegAbs(
MachineOperand &Root)
const {
5393 assert(BV->getNumSources() > 0);
5395 MachineInstr *ElF32 = MRI->getVRegDef(BV->getSourceReg(0));
5396 unsigned ModOpcode = (ElF32->
getOpcode() == AMDGPU::G_FNEG)
5399 for (
unsigned i = 0; i < BV->getNumSources(); ++i) {
5400 ElF32 = MRI->getVRegDef(BV->getSourceReg(i));
5407 if (BV->getNumSources() == EltsF32.
size()) {
5413 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5414 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }}};
5418AMDGPUInstructionSelector::selectWMMAModsF16Neg(
MachineOperand &Root)
const {
5424 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
5432 if (CV->getNumSources() == EltsV2F16.
size()) {
5439 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5440 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }}};
5444AMDGPUInstructionSelector::selectWMMAModsF16NegAbs(
MachineOperand &Root)
const {
5450 assert(CV->getNumSources() > 0);
5451 MachineInstr *ElV2F16 = MRI->getVRegDef(CV->getSourceReg(0));
5453 unsigned ModOpcode = (ElV2F16->
getOpcode() == AMDGPU::G_FNEG)
5457 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
5458 ElV2F16 = MRI->getVRegDef(CV->getSourceReg(i));
5465 if (CV->getNumSources() == EltsV2F16.
size()) {
5472 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5473 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }}};
5477AMDGPUInstructionSelector::selectWMMAVISrc(
MachineOperand &Root)
const {
5478 std::optional<FPValueAndVReg> FPValReg;
5480 if (TII.isInlineConstant(FPValReg->Value)) {
5481 return {{[=](MachineInstrBuilder &MIB) {
5482 MIB.
addImm(FPValReg->Value.bitcastToAPInt().getSExtValue());
5492 if (TII.isInlineConstant(ICst)) {
5502AMDGPUInstructionSelector::selectSWMMACIndex8(
MachineOperand &Root)
const {
5508 std::optional<ValueAndVReg> ShiftAmt;
5510 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
5511 ShiftAmt->Value.getZExtValue() % 8 == 0) {
5512 Key = ShiftAmt->Value.getZExtValue() / 8;
5517 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5518 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Key); }
5523AMDGPUInstructionSelector::selectSWMMACIndex16(
MachineOperand &Root)
const {
5530 std::optional<ValueAndVReg> ShiftAmt;
5532 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
5533 ShiftAmt->Value.getZExtValue() == 16) {
5539 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5540 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Key); }
5545AMDGPUInstructionSelector::selectSWMMACIndex32(
MachineOperand &Root)
const {
5552 S32 = matchAnyExtendFromS32(Src);
5556 if (
Def->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {
5561 Src =
Def->getOperand(2).getReg();
5568 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5569 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Key); }
5574AMDGPUInstructionSelector::selectVOP3OpSelMods(
MachineOperand &Root)
const {
5577 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
5581 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5582 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
5588AMDGPUInstructionSelector::selectVINTERPMods(
MachineOperand &Root)
const {
5591 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
5597 [=](MachineInstrBuilder &MIB) {
5599 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
5601 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
5606AMDGPUInstructionSelector::selectVINTERPModsHi(
MachineOperand &Root)
const {
5609 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
5615 [=](MachineInstrBuilder &MIB) {
5617 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
5619 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
5626bool AMDGPUInstructionSelector::selectScaleOffset(
MachineOperand &Root,
5628 bool IsSigned)
const {
5629 if (!Subtarget->hasScaleOffset())
5633 MachineMemOperand *MMO = *
MI.memoperands_begin();
5645 OffsetReg =
Def->Reg;
5660 m_BinOp(IsSigned ? AMDGPU::S_MUL_I64_I32_PSEUDO : AMDGPU::S_MUL_U64,
5664 (
Mul->getOpcode() == (IsSigned ? AMDGPU::G_AMDGPU_MAD_I64_I32
5665 : AMDGPU::G_AMDGPU_MAD_U64_U32) ||
5666 (IsSigned &&
Mul->getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32 &&
5667 VT->signBitIsZero(
Mul->getOperand(2).getReg()))) &&
5680bool AMDGPUInstructionSelector::selectSmrdOffset(
MachineOperand &Root,
5684 bool *ScaleOffset)
const {
5686 MachineBasicBlock *
MBB =
MI->getParent();
5691 getAddrModeInfo(*
MI, *MRI, AddrInfo);
5693 if (AddrInfo.
empty())
5696 const GEPInfo &GEPI = AddrInfo[0];
5697 std::optional<int64_t> EncodedImm;
5700 *ScaleOffset =
false;
5705 if (GEPI.SgprParts.size() == 1 && GEPI.Imm != 0 && EncodedImm &&
5706 AddrInfo.
size() > 1) {
5707 const GEPInfo &GEPI2 = AddrInfo[1];
5708 if (GEPI2.SgprParts.size() == 2 && GEPI2.Imm == 0) {
5709 Register OffsetReg = GEPI2.SgprParts[1];
5712 selectScaleOffset(Root, OffsetReg,
false );
5713 OffsetReg = matchZeroExtendFromS32OrS32(OffsetReg);
5715 Base = GEPI2.SgprParts[0];
5716 *SOffset = OffsetReg;
5725 auto SKnown =
VT->getKnownBits(*SOffset);
5726 if (*
Offset + SKnown.getMinValue().getSExtValue() < 0)
5738 if (
Offset && GEPI.SgprParts.size() == 1 && EncodedImm) {
5739 Base = GEPI.SgprParts[0];
5745 if (SOffset && GEPI.SgprParts.size() == 1 &&
isUInt<32>(GEPI.Imm) &&
5751 Base = GEPI.SgprParts[0];
5752 *SOffset = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5753 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), *SOffset)
5758 if (SOffset && GEPI.SgprParts.size() && GEPI.Imm == 0) {
5759 Register OffsetReg = GEPI.SgprParts[1];
5761 *ScaleOffset = selectScaleOffset(Root, OffsetReg,
false );
5762 OffsetReg = matchZeroExtendFromS32OrS32(OffsetReg);
5764 Base = GEPI.SgprParts[0];
5765 *SOffset = OffsetReg;
5774AMDGPUInstructionSelector::selectSmrdImm(
MachineOperand &Root)
const {
5777 if (!selectSmrdOffset(Root,
Base,
nullptr, &
Offset,
5779 return std::nullopt;
5781 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(
Base); },
5782 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Offset); }}};
5786AMDGPUInstructionSelector::selectSmrdImm32(
MachineOperand &Root)
const {
5788 getAddrModeInfo(*Root.
getParent(), *MRI, AddrInfo);
5790 if (AddrInfo.
empty() || AddrInfo[0].SgprParts.size() != 1)
5791 return std::nullopt;
5793 const GEPInfo &GEPInfo = AddrInfo[0];
5794 Register PtrReg = GEPInfo.SgprParts[0];
5795 std::optional<int64_t> EncodedImm =
5798 return std::nullopt;
5801 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrReg); },
5802 [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedImm); }
5807AMDGPUInstructionSelector::selectSmrdSgpr(
MachineOperand &Root)
const {
5810 if (!selectSmrdOffset(Root,
Base, &SOffset,
nullptr,
5812 return std::nullopt;
5815 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(
Base); },
5816 [=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); },
5817 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }}};
5821AMDGPUInstructionSelector::selectSmrdSgprImm(
MachineOperand &Root)
const {
5825 if (!selectSmrdOffset(Root,
Base, &SOffset, &
Offset, &ScaleOffset))
5826 return std::nullopt;
5829 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(
Base); },
5830 [=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); },
5832 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }}};
5835std::pair<Register, int>
5836AMDGPUInstructionSelector::selectFlatOffsetImpl(
MachineOperand &Root,
5837 uint64_t FlatVariant)
const {
5842 if (!STI.hasFlatInstOffsets())
5846 int64_t ConstOffset;
5848 std::tie(PtrBase, ConstOffset, IsInBounds) =
5849 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
5855 if (ConstOffset == 0 ||
5857 !isFlatScratchBaseLegal(Root.
getReg())) ||
5861 unsigned AddrSpace = (*
MI->memoperands_begin())->getAddrSpace();
5862 if (!TII.isLegalFLATOffset(ConstOffset, AddrSpace, FlatVariant))
5865 return std::pair(PtrBase, ConstOffset);
5869AMDGPUInstructionSelector::selectFlatOffset(
MachineOperand &Root)
const {
5873 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrWithOffset.first); },
5874 [=](MachineInstrBuilder &MIB) { MIB.
addImm(PtrWithOffset.second); },
5879AMDGPUInstructionSelector::selectGlobalOffset(
MachineOperand &Root)
const {
5883 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrWithOffset.first); },
5884 [=](MachineInstrBuilder &MIB) { MIB.
addImm(PtrWithOffset.second); },
5889AMDGPUInstructionSelector::selectScratchOffset(
MachineOperand &Root)
const {
5893 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrWithOffset.first); },
5894 [=](MachineInstrBuilder &MIB) { MIB.
addImm(PtrWithOffset.second); },
5900AMDGPUInstructionSelector::selectGlobalSAddr(
MachineOperand &Root,
5902 bool NeedIOffset)
const {
5905 int64_t ConstOffset;
5906 int64_t ImmOffset = 0;
5910 std::tie(PtrBase, ConstOffset, std::ignore) =
5911 getPtrBaseWithConstantOffset(Addr, *MRI);
5913 if (ConstOffset != 0) {
5918 ImmOffset = ConstOffset;
5921 if (isSGPR(PtrBaseDef->Reg)) {
5922 if (ConstOffset > 0) {
5928 int64_t SplitImmOffset = 0, RemainderOffset = ConstOffset;
5930 std::tie(SplitImmOffset, RemainderOffset) =
5935 if (Subtarget->hasSignedGVSOffset() ?
isInt<32>(RemainderOffset)
5938 MachineBasicBlock *
MBB =
MI->getParent();
5940 MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
5942 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
5944 .
addImm(RemainderOffset);
5948 [=](MachineInstrBuilder &MIB) {
5951 [=](MachineInstrBuilder &MIB) {
5954 [=](MachineInstrBuilder &MIB) { MIB.
addImm(SplitImmOffset); },
5955 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); },
5958 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrBase); },
5959 [=](MachineInstrBuilder &MIB) {
5962 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); },
5972 unsigned NumLiterals =
5973 !TII.isInlineConstant(APInt(32,
Lo_32(ConstOffset))) +
5974 !TII.isInlineConstant(APInt(32,
Hi_32(ConstOffset)));
5975 if (STI.getConstantBusLimit(AMDGPU::V_ADD_U32_e64) > NumLiterals)
5976 return std::nullopt;
5983 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
5988 if (isSGPR(SAddr)) {
5989 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
5993 bool ScaleOffset = selectScaleOffset(Root, PtrBaseOffset,
5994 Subtarget->hasSignedGVSOffset());
5995 if (
Register VOffset = matchExtendFromS32OrS32(
5996 PtrBaseOffset, Subtarget->hasSignedGVSOffset())) {
5998 return {{[=](MachineInstrBuilder &MIB) {
6001 [=](MachineInstrBuilder &MIB) {
6004 [=](MachineInstrBuilder &MIB) {
6007 [=](MachineInstrBuilder &MIB) {
6011 return {{[=](MachineInstrBuilder &MIB) {
6014 [=](MachineInstrBuilder &MIB) {
6017 [=](MachineInstrBuilder &MIB) {
6027 if (AddrDef->MI->getOpcode() == AMDGPU::G_IMPLICIT_DEF ||
6028 AddrDef->MI->getOpcode() == AMDGPU::G_CONSTANT || !isSGPR(AddrDef->Reg))
6029 return std::nullopt;
6034 MachineBasicBlock *
MBB =
MI->getParent();
6035 Register VOffset = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
6037 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32), VOffset)
6042 [=](MachineInstrBuilder &MIB) { MIB.
addReg(AddrDef->Reg); },
6043 [=](MachineInstrBuilder &MIB) { MIB.
addReg(VOffset); },
6044 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); },
6045 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); }
6048 [=](MachineInstrBuilder &MIB) { MIB.
addReg(AddrDef->Reg); },
6049 [=](MachineInstrBuilder &MIB) { MIB.
addReg(VOffset); },
6050 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); }
6055AMDGPUInstructionSelector::selectGlobalSAddr(
MachineOperand &Root)
const {
6056 return selectGlobalSAddr(Root, 0);
6060AMDGPUInstructionSelector::selectGlobalSAddrCPol(
MachineOperand &Root)
const {
6066 return selectGlobalSAddr(Root, PassedCPol);
6070AMDGPUInstructionSelector::selectGlobalSAddrCPolM0(
MachineOperand &Root)
const {
6076 return selectGlobalSAddr(Root, PassedCPol);
6080AMDGPUInstructionSelector::selectGlobalSAddrGLC(
MachineOperand &Root)
const {
6085AMDGPUInstructionSelector::selectGlobalSAddrNoIOffset(
6092 return selectGlobalSAddr(Root, PassedCPol,
false);
6096AMDGPUInstructionSelector::selectGlobalSAddrNoIOffsetM0(
6103 return selectGlobalSAddr(Root, PassedCPol,
false);
6107AMDGPUInstructionSelector::selectScratchSAddr(
MachineOperand &Root)
const {
6110 int64_t ConstOffset;
6111 int64_t ImmOffset = 0;
6115 std::tie(PtrBase, ConstOffset, std::ignore) =
6116 getPtrBaseWithConstantOffset(Addr, *MRI);
6118 if (ConstOffset != 0 && isFlatScratchBaseLegal(Addr) &&
6122 ImmOffset = ConstOffset;
6126 if (AddrDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
6127 int FI = AddrDef->MI->getOperand(1).
getIndex();
6130 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); }
6136 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
6137 Register LHS = AddrDef->MI->getOperand(1).getReg();
6138 Register RHS = AddrDef->MI->getOperand(2).getReg();
6142 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX &&
6143 isSGPR(RHSDef->Reg)) {
6144 int FI = LHSDef->MI->getOperand(1).getIndex();
6148 SAddr = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6150 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_I32), SAddr)
6158 return std::nullopt;
6161 [=](MachineInstrBuilder &MIB) { MIB.
addReg(SAddr); },
6162 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); }
6167bool AMDGPUInstructionSelector::checkFlatScratchSVSSwizzleBug(
6169 if (!Subtarget->hasFlatScratchSVSSwizzleBug())
6175 auto VKnown =
VT->getKnownBits(VAddr);
6178 uint64_t VMax = VKnown.getMaxValue().getZExtValue();
6179 uint64_t
SMax = SKnown.getMaxValue().getZExtValue();
6180 return (VMax & 3) + (
SMax & 3) >= 4;
6184AMDGPUInstructionSelector::selectScratchSVAddr(
MachineOperand &Root)
const {
6187 int64_t ConstOffset;
6188 int64_t ImmOffset = 0;
6192 std::tie(PtrBase, ConstOffset, std::ignore) =
6193 getPtrBaseWithConstantOffset(Addr, *MRI);
6196 if (ConstOffset != 0 &&
6200 ImmOffset = ConstOffset;
6204 if (AddrDef->MI->getOpcode() != AMDGPU::G_PTR_ADD)
6205 return std::nullopt;
6207 Register RHS = AddrDef->MI->getOperand(2).getReg();
6208 if (RBI.getRegBank(
RHS, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID)
6209 return std::nullopt;
6211 Register LHS = AddrDef->MI->getOperand(1).getReg();
6214 if (OrigAddr != Addr) {
6215 if (!isFlatScratchBaseLegalSVImm(OrigAddr))
6216 return std::nullopt;
6218 if (!isFlatScratchBaseLegalSV(OrigAddr))
6219 return std::nullopt;
6222 if (checkFlatScratchSVSSwizzleBug(
RHS,
LHS, ImmOffset))
6223 return std::nullopt;
6225 unsigned CPol = selectScaleOffset(Root,
RHS,
true )
6229 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
6230 int FI = LHSDef->MI->getOperand(1).getIndex();
6232 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
RHS); },
6234 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); },
6235 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }
6244 return std::nullopt;
6247 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
RHS); },
6248 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
LHS); },
6249 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); },
6250 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }
6255AMDGPUInstructionSelector::selectMUBUFScratchOffen(
MachineOperand &Root)
const {
6257 MachineBasicBlock *
MBB =
MI->getParent();
6259 const SIMachineFunctionInfo *
Info =
MF->getInfo<SIMachineFunctionInfo>();
6264 Register HighBits = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
6269 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
6273 return {{[=](MachineInstrBuilder &MIB) {
6276 [=](MachineInstrBuilder &MIB) {
6279 [=](MachineInstrBuilder &MIB) {
6284 [=](MachineInstrBuilder &MIB) {
6293 std::optional<int> FI;
6296 const MachineInstr *RootDef = MRI->getVRegDef(Root.
getReg());
6298 int64_t ConstOffset;
6299 std::tie(PtrBase, ConstOffset, std::ignore) =
6300 getPtrBaseWithConstantOffset(VAddr, *MRI);
6301 if (ConstOffset != 0) {
6302 if (TII.isLegalMUBUFImmOffset(ConstOffset) &&
6303 (!STI.privateMemoryResourceIsRangeChecked() ||
6304 VT->signBitIsZero(PtrBase))) {
6305 const MachineInstr *PtrBaseDef = MRI->getVRegDef(PtrBase);
6306 if (PtrBaseDef->
getOpcode() == AMDGPU::G_FRAME_INDEX)
6312 }
else if (RootDef->
getOpcode() == AMDGPU::G_FRAME_INDEX) {
6316 return {{[=](MachineInstrBuilder &MIB) {
6319 [=](MachineInstrBuilder &MIB) {
6325 [=](MachineInstrBuilder &MIB) {
6330 [=](MachineInstrBuilder &MIB) {
6335bool AMDGPUInstructionSelector::isDSOffsetLegal(
Register Base,
6340 if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled())
6345 return VT->signBitIsZero(
Base);
6348bool AMDGPUInstructionSelector::isDSOffset2Legal(
Register Base, int64_t Offset0,
6350 unsigned Size)
const {
6351 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
6356 if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled())
6361 return VT->signBitIsZero(
Base);
6366 return Addr->
getOpcode() == TargetOpcode::G_OR ||
6367 (Addr->
getOpcode() == TargetOpcode::G_PTR_ADD &&
6374bool AMDGPUInstructionSelector::isFlatScratchBaseLegal(
Register Addr)
const {
6382 if (STI.hasSignedScratchOffsets())
6388 if (AddrMI->
getOpcode() == TargetOpcode::G_PTR_ADD) {
6389 std::optional<ValueAndVReg> RhsValReg =
6395 if (RhsValReg && RhsValReg->Value.getSExtValue() < 0 &&
6396 RhsValReg->Value.getSExtValue() > -0x40000000)
6400 return VT->signBitIsZero(
LHS);
6405bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSV(
Register Addr)
const {
6413 if (STI.hasSignedScratchOffsets())
6418 return VT->signBitIsZero(
RHS) &&
VT->signBitIsZero(
LHS);
6423bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSVImm(
6427 if (STI.hasSignedScratchOffsets())
6432 std::optional<DefinitionAndSourceRegister> BaseDef =
6434 std::optional<ValueAndVReg> RHSOffset =
6444 (RHSOffset->Value.getSExtValue() < 0 &&
6445 RHSOffset->Value.getSExtValue() > -0x40000000)))
6448 Register LHS = BaseDef->MI->getOperand(1).getReg();
6449 Register RHS = BaseDef->MI->getOperand(2).getReg();
6450 return VT->signBitIsZero(
RHS) &&
VT->signBitIsZero(
LHS);
6453bool AMDGPUInstructionSelector::isUnneededShiftMask(
const MachineInstr &
MI,
6454 unsigned ShAmtBits)
const {
6455 assert(
MI.getOpcode() == TargetOpcode::G_AND);
6457 std::optional<APInt>
RHS =
6462 if (
RHS->countr_one() >= ShAmtBits)
6465 const APInt &LHSKnownZeros =
VT->getKnownZeroes(
MI.getOperand(1).getReg());
6466 return (LHSKnownZeros | *
RHS).countr_one() >= ShAmtBits;
6470AMDGPUInstructionSelector::selectMUBUFScratchOffset(
6473 const SIMachineFunctionInfo *
Info =
MF->getInfo<SIMachineFunctionInfo>();
6475 std::optional<DefinitionAndSourceRegister>
Def =
6477 assert(Def &&
"this shouldn't be an optional result");
6482 [=](MachineInstrBuilder &MIB) {
6485 [=](MachineInstrBuilder &MIB) {
6488 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
6499 if (!TII.isLegalMUBUFImmOffset(
Offset))
6507 [=](MachineInstrBuilder &MIB) {
6510 [=](MachineInstrBuilder &MIB) {
6518 !TII.isLegalMUBUFImmOffset(
Offset))
6522 [=](MachineInstrBuilder &MIB) {
6525 [=](MachineInstrBuilder &MIB) {
6532std::pair<Register, unsigned>
6533AMDGPUInstructionSelector::selectDS1Addr1OffsetImpl(
MachineOperand &Root)
const {
6534 const MachineInstr *RootDef = MRI->getVRegDef(Root.
getReg());
6535 int64_t ConstAddr = 0;
6539 std::tie(PtrBase,
Offset, std::ignore) =
6540 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
6543 if (isDSOffsetLegal(PtrBase,
Offset)) {
6545 return std::pair(PtrBase,
Offset);
6547 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
6556 return std::pair(Root.
getReg(), 0);
6560AMDGPUInstructionSelector::selectDS1Addr1Offset(
MachineOperand &Root)
const {
6563 std::tie(
Reg,
Offset) = selectDS1Addr1OffsetImpl(Root);
6565 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
6571AMDGPUInstructionSelector::selectDS64Bit4ByteAligned(
MachineOperand &Root)
const {
6572 return selectDSReadWrite2(Root, 4);
6576AMDGPUInstructionSelector::selectDS128Bit8ByteAligned(
MachineOperand &Root)
const {
6577 return selectDSReadWrite2(Root, 8);
6581AMDGPUInstructionSelector::selectDSReadWrite2(
MachineOperand &Root,
6582 unsigned Size)
const {
6587 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
6589 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Offset+1); }
6593std::pair<Register, unsigned>
6594AMDGPUInstructionSelector::selectDSReadWrite2Impl(
MachineOperand &Root,
6595 unsigned Size)
const {
6596 const MachineInstr *RootDef = MRI->getVRegDef(Root.
getReg());
6597 int64_t ConstAddr = 0;
6601 std::tie(PtrBase,
Offset, std::ignore) =
6602 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
6605 int64_t OffsetValue0 =
Offset;
6607 if (isDSOffset2Legal(PtrBase, OffsetValue0, OffsetValue1,
Size)) {
6609 return std::pair(PtrBase, OffsetValue0 /
Size);
6611 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
6619 return std::pair(Root.
getReg(), 0);
6627std::tuple<Register, int64_t, bool>
6628AMDGPUInstructionSelector::getPtrBaseWithConstantOffset(
6631 if (RootI->
getOpcode() != TargetOpcode::G_PTR_ADD)
6632 return {Root, 0,
false};
6635 std::optional<ValueAndVReg> MaybeOffset =
6638 return {Root, 0,
false};
6658 B.buildInstr(AMDGPU::S_MOV_B32)
6661 B.buildInstr(AMDGPU::S_MOV_B32)
6668 B.buildInstr(AMDGPU::REG_SEQUENCE)
6671 .addImm(AMDGPU::sub0)
6673 .addImm(AMDGPU::sub1);
6678 B.buildInstr(AMDGPU::S_MOV_B64)
6683 B.buildInstr(AMDGPU::REG_SEQUENCE)
6686 .addImm(AMDGPU::sub0_sub1)
6688 .addImm(AMDGPU::sub2_sub3);
6695 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
6704 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
6711AMDGPUInstructionSelector::MUBUFAddressData
6712AMDGPUInstructionSelector::parseMUBUFAddress(
Register Src)
const {
6713 MUBUFAddressData
Data;
6719 std::tie(PtrBase,
Offset, std::ignore) =
6720 getPtrBaseWithConstantOffset(Src, *MRI);
6726 if (MachineInstr *InputAdd
6728 Data.N2 = InputAdd->getOperand(1).getReg();
6729 Data.N3 = InputAdd->getOperand(2).getReg();
6744bool AMDGPUInstructionSelector::shouldUseAddr64(MUBUFAddressData Addr)
const {
6750 const RegisterBank *N0Bank = RBI.getRegBank(Addr.N0, *MRI, TRI);
6751 return N0Bank->
getID() == AMDGPU::VGPRRegBankID;
6757void AMDGPUInstructionSelector::splitIllegalMUBUFOffset(
6759 if (TII.isLegalMUBUFImmOffset(ImmOffset))
6763 SOffset = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6764 B.buildInstr(AMDGPU::S_MOV_B32)
6770bool AMDGPUInstructionSelector::selectMUBUFAddr64Impl(
6775 if (!STI.hasAddr64() || STI.useFlatForGlobal())
6778 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
6779 if (!shouldUseAddr64(AddrData))
6785 Offset = AddrData.Offset;
6791 if (RBI.getRegBank(N2, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
6793 if (RBI.getRegBank(N3, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
6806 }
else if (RBI.getRegBank(N0, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
6817 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
6821bool AMDGPUInstructionSelector::selectMUBUFOffsetImpl(
6826 if (STI.useFlatForGlobal())
6829 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
6830 if (shouldUseAddr64(AddrData))
6836 Offset = AddrData.Offset;
6842 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
6847AMDGPUInstructionSelector::selectMUBUFAddr64(
MachineOperand &Root)
const {
6853 if (!selectMUBUFAddr64Impl(Root, VAddr, RSrcReg, SOffset,
Offset))
6859 [=](MachineInstrBuilder &MIB) {
6862 [=](MachineInstrBuilder &MIB) {
6865 [=](MachineInstrBuilder &MIB) {
6868 else if (STI.hasRestrictedSOffset())
6869 MIB.
addReg(AMDGPU::SGPR_NULL);
6873 [=](MachineInstrBuilder &MIB) {
6883AMDGPUInstructionSelector::selectMUBUFOffset(
MachineOperand &Root)
const {
6888 if (!selectMUBUFOffsetImpl(Root, RSrcReg, SOffset,
Offset))
6892 [=](MachineInstrBuilder &MIB) {
6895 [=](MachineInstrBuilder &MIB) {
6898 else if (STI.hasRestrictedSOffset())
6899 MIB.
addReg(AMDGPU::SGPR_NULL);
6911AMDGPUInstructionSelector::selectBUFSOffset(
MachineOperand &Root)
const {
6916 SOffset = AMDGPU::SGPR_NULL;
6918 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); }}};
6922static std::optional<uint64_t>
6926 if (!OffsetVal || !
isInt<32>(*OffsetVal))
6927 return std::nullopt;
6928 return Lo_32(*OffsetVal);
6932AMDGPUInstructionSelector::selectSMRDBufferImm(
MachineOperand &Root)
const {
6933 std::optional<uint64_t> OffsetVal =
6938 std::optional<int64_t> EncodedImm =
6943 return {{ [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedImm); } }};
6947AMDGPUInstructionSelector::selectSMRDBufferImm32(
MachineOperand &Root)
const {
6954 std::optional<int64_t> EncodedImm =
6959 return {{ [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedImm); } }};
6963AMDGPUInstructionSelector::selectSMRDBufferSgprImm(
MachineOperand &Root)
const {
6971 return std::nullopt;
6973 std::optional<int64_t> EncodedOffset =
6976 return std::nullopt;
6979 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); },
6980 [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedOffset); }}};
6983std::pair<Register, unsigned>
6984AMDGPUInstructionSelector::selectVOP3PMadMixModsImpl(
MachineOperand &Root,
6985 bool &Matched)
const {
6990 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
7000 const auto CheckAbsNeg = [&]() {
7005 std::tie(Src, ModsTmp) = selectVOP3ModsImpl(Src);
7036AMDGPUInstructionSelector::selectVOP3PMadMixModsExt(
7041 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
7046 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
7047 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
7052AMDGPUInstructionSelector::selectVOP3PMadMixMods(
MachineOperand &Root)
const {
7056 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
7059 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
7060 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
7064bool AMDGPUInstructionSelector::selectSBarrierSignalIsfirst(
7068 Register CCReg =
I.getOperand(0).getReg();
7073 BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM))
7074 .
addImm(
I.getOperand(2).getImm());
7078 I.eraseFromParent();
7079 return RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32_XM0_XEXECRegClass,
7083bool AMDGPUInstructionSelector::selectSGetBarrierState(
7087 const MachineOperand &BarOp =
I.getOperand(2);
7088 std::optional<int64_t> BarValImm =
7092 auto CopyMIB =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
7096 MachineInstrBuilder MIB;
7097 unsigned Opc = BarValImm ? AMDGPU::S_GET_BARRIER_STATE_IMM
7098 : AMDGPU::S_GET_BARRIER_STATE_M0;
7101 auto DstReg =
I.getOperand(0).getReg();
7102 const TargetRegisterClass *DstRC =
7103 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
7104 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
7110 I.eraseFromParent();
7115 if (HasInlineConst) {
7119 case Intrinsic::amdgcn_s_barrier_join:
7120 return AMDGPU::S_BARRIER_JOIN_IMM;
7121 case Intrinsic::amdgcn_s_wakeup_barrier:
7122 return AMDGPU::S_WAKEUP_BARRIER_IMM;
7123 case Intrinsic::amdgcn_s_get_named_barrier_state:
7124 return AMDGPU::S_GET_BARRIER_STATE_IMM;
7130 case Intrinsic::amdgcn_s_barrier_join:
7131 return AMDGPU::S_BARRIER_JOIN_M0;
7132 case Intrinsic::amdgcn_s_wakeup_barrier:
7133 return AMDGPU::S_WAKEUP_BARRIER_M0;
7134 case Intrinsic::amdgcn_s_get_named_barrier_state:
7135 return AMDGPU::S_GET_BARRIER_STATE_M0;
7140bool AMDGPUInstructionSelector::selectNamedBarrierInit(
7144 const MachineOperand &BarOp =
I.getOperand(1);
7145 const MachineOperand &CntOp =
I.getOperand(2);
7149 if (IntrID == Intrinsic::amdgcn_s_barrier_signal_var) {
7150 std::optional<int64_t> CntImm =
7152 if (CntImm && *CntImm == 0) {
7153 std::optional<int64_t> BarValImm =
7156 auto BarID = ((*BarValImm) >> 4) & 0x3F;
7157 BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_IMM))
7159 I.eraseFromParent();
7166 Register TmpReg0 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7172 Register TmpReg1 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7179 Register TmpReg2 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7185 Register TmpReg3 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7186 constexpr unsigned ShAmt = 16;
7192 Register TmpReg4 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7202 unsigned Opc = IntrID == Intrinsic::amdgcn_s_barrier_init
7203 ? AMDGPU::S_BARRIER_INIT_M0
7204 : AMDGPU::S_BARRIER_SIGNAL_M0;
7205 MachineInstrBuilder MIB;
7208 I.eraseFromParent();
7212bool AMDGPUInstructionSelector::selectNamedBarrierInst(
7216 MachineOperand BarOp = IntrID == Intrinsic::amdgcn_s_get_named_barrier_state
7219 std::optional<int64_t> BarValImm =
7224 Register TmpReg0 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7230 Register TmpReg1 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7236 auto CopyMIB =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
7241 MachineInstrBuilder MIB;
7245 if (IntrID == Intrinsic::amdgcn_s_get_named_barrier_state) {
7246 auto DstReg =
I.getOperand(0).getReg();
7247 const TargetRegisterClass *DstRC =
7248 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
7249 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
7255 auto BarId = ((*BarValImm) >> 4) & 0x3F;
7259 I.eraseFromParent();
7266 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
7267 "Expected G_CONSTANT");
7268 MIB.
addImm(
MI.getOperand(1).getCImm()->getSExtValue());
7274 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
7275 "Expected G_CONSTANT");
7276 MIB.
addImm(-
MI.getOperand(1).getCImm()->getSExtValue());
7282 const MachineOperand &
Op =
MI.getOperand(1);
7283 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT &&
OpIdx == -1);
7284 MIB.
addImm(
Op.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
7287void AMDGPUInstructionSelector::renderCountTrailingOnesImm(
7289 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
7290 "Expected G_CONSTANT");
7291 MIB.
addImm(
MI.getOperand(1).getCImm()->getValue().countTrailingOnes());
7299 const MachineOperand &
Op =
MI.getOperand(
OpIdx);
7316 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7320void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_0_0(
7322 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7327void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_0_1(
7329 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7335void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_1_0(
7337 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7342void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_1_1(
7344 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7350void AMDGPUInstructionSelector::renderDstSelToOpSelXForm(
7352 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7357void AMDGPUInstructionSelector::renderSrcSelToOpSelXForm(
7359 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7364void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_2_0(
7366 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7371void AMDGPUInstructionSelector::renderDstSelToOpSel3XFormXForm(
7373 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7382 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7391 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7398void AMDGPUInstructionSelector::renderExtractCpolSetGLC(
7400 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7401 const uint32_t Cpol =
MI.getOperand(
OpIdx).getImm() &
7416 const APFloat &APF =
MI.getOperand(1).getFPImm()->getValueAPF();
7418 assert(ExpVal != INT_MIN);
7436 if (
MI.getOperand(
OpIdx).getImm())
7438 MIB.
addImm((int64_t)Mods);
7445 if (
MI.getOperand(
OpIdx).getImm())
7447 MIB.
addImm((int64_t)Mods);
7453 unsigned Val =
MI.getOperand(
OpIdx).getImm();
7461 MIB.
addImm((int64_t)Mods);
7467 uint32_t
V =
MI.getOperand(2).getImm();
7470 if (!Subtarget->hasSafeCUPrefetch())
7476void AMDGPUInstructionSelector::renderScaledMAIIntrinsicOperand(
7478 unsigned Val =
MI.getOperand(
OpIdx).getImm();
7487bool AMDGPUInstructionSelector::isInlineImmediate(
const APInt &Imm)
const {
7488 return TII.isInlineConstant(Imm);
7491bool AMDGPUInstructionSelector::isInlineImmediate(
const APFloat &Imm)
const {
7492 return TII.isInlineConstant(Imm);
MachineInstrBuilder MachineInstrBuilder & DefMI
static unsigned getIntrinsicID(const SDNode *N)
#define GET_GLOBALISEL_PREDICATES_INIT
#define GET_GLOBALISEL_TEMPORARIES_INIT
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static Register getLegalRegBank(Register NewReg, Register RootReg, const AMDGPURegisterBankInfo &RBI, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const SIInstrInfo &TII)
static bool isShlHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is shift left with half bits, such as reg0:2n =G_SHL reg1:2n, CONST(n)
static bool isNoUnsignedWrap(MachineInstr *Addr)
static Register buildOffsetSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
unsigned getNamedBarrierOp(bool HasInlineConst, Intrinsic::ID IntrID)
static bool checkRB(Register Reg, unsigned int RBNo, const AMDGPURegisterBankInfo &RBI, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI)
static unsigned updateMods(SrcStatus HiStat, SrcStatus LoStat, unsigned Mods)
static bool isTruncHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is truncating to half, such as reg0:n = G_TRUNC reg1:2n
static Register getWaveAddress(const MachineInstr *Def)
static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)
static bool shouldUseAndMask(unsigned Size, unsigned &Mask)
static std::pair< unsigned, uint8_t > BitOp3_Op(Register R, SmallVectorImpl< Register > &Src, const MachineRegisterInfo &MRI)
static TypeClass isVectorOfTwoOrScalar(Register Reg, const MachineRegisterInfo &MRI)
static bool isLaneMaskFromSameBlock(Register Reg, MachineRegisterInfo &MRI, MachineBasicBlock *MBB)
static bool parseTexFail(uint64_t TexFailCtrl, bool &TFE, bool &LWE, bool &IsTexFail)
static void addZeroImm(MachineInstrBuilder &MIB)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
static bool isConstant(const MachineInstr &MI)
static bool isSameBitWidth(Register Reg1, Register Reg2, const MachineRegisterInfo &MRI)
static Register buildRegSequence(SmallVectorImpl< Register > &Elts, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
static Register buildRSRC(MachineIRBuilder &B, MachineRegisterInfo &MRI, uint32_t FormatLo, uint32_t FormatHi, Register BasePtr)
Return a resource descriptor for use with an arbitrary 64-bit pointer.
static bool isAsyncLDSDMA(Intrinsic::ID Intr)
static std::pair< Register, unsigned > computeIndirectRegIndex(MachineRegisterInfo &MRI, const SIRegisterInfo &TRI, const TargetRegisterClass *SuperRC, Register IdxReg, unsigned EltSize, GISelValueTracking &ValueTracking)
Return the register to use for the index value, and the subregister to use for the indirectly accesse...
static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64)
static std::pair< Register, SrcStatus > getLastSameOrNeg(Register Reg, const MachineRegisterInfo &MRI, SearchOptions SO, int MaxDepth=3)
static Register stripCopy(Register Reg, MachineRegisterInfo &MRI)
static std::optional< std::pair< Register, SrcStatus > > calcNextStatus(std::pair< Register, SrcStatus > Curr, const MachineRegisterInfo &MRI)
static Register stripBitCast(Register Reg, MachineRegisterInfo &MRI)
static std::optional< uint64_t > getConstantZext32Val(Register Reg, const MachineRegisterInfo &MRI)
Get an immediate that must be 32-bits, and treated as zero extended.
static bool isValidToPack(SrcStatus HiStat, SrcStatus LoStat, Register NewReg, Register RootReg, const SIInstrInfo &TII, const MachineRegisterInfo &MRI)
static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size, const GCNSubtarget &ST)
static SmallVector< std::pair< Register, SrcStatus > > getSrcStats(Register Reg, const MachineRegisterInfo &MRI, SearchOptions SO, int MaxDepth=3)
static bool isUnmergeHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test function, if the MI is reg0:n, reg1:n = G_UNMERGE_VALUES reg2:2n
static SrcStatus getNegStatus(Register Reg, SrcStatus S, const MachineRegisterInfo &MRI)
static bool isVCmpResult(Register Reg, MachineRegisterInfo &MRI)
static Register buildAddr64RSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
static bool isLshrHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is logic shift right with half bits, such as reg0:2n =G_LSHR reg1:2n,...
static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, SmallVectorImpl< Register > &Elts, Register &Src, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
This file declares the targeting of the InstructionSelector class for AMDGPU.
AMDGPU Register Bank Select
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool isAllZeros(StringRef Arr)
Return true if the array is empty or all zeros.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
Contains matchers for matching SSA Machine Instructions.
Machine Check Debug Module
This file declares the MachineIRBuilder class.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
MachineInstr unsigned OpIdx
static std::vector< std::pair< int, unsigned > > Swizzle(std::vector< std::pair< int, unsigned > > Src, R600InstrInfo::BankSwizzle Swz)
This is used to control valid status that current MI supports.
bool checkOptions(SrcStatus Stat) const
SearchOptions(Register Reg, const MachineRegisterInfo &MRI)
AMDGPUInstructionSelector(const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI, const AMDGPUTargetMachine &TM)
static const char * getName()
bool select(MachineInstr &I) override
Select the (possibly generic) instruction I to only use target-specific opcodes.
void setupMF(MachineFunction &MF, GISelValueTracking *VT, CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) override
Setup per-MF executor state.
uint32_t getLDSSize() const
LLVM_READONLY int getExactLog2Abs() const
Class for arbitrary precision integers.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
bool isFPPredicate() const
bool isIntPredicate() const
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
LLVM_ABI DILocation * get() const
Get the underlying DILocation.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
std::optional< SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > > ComplexRendererFns
virtual void setupMF(MachineFunction &mf, GISelValueTracking *vt, CodeGenCoverage *covinfo=nullptr, ProfileSummaryInfo *psi=nullptr, BlockFrequencyInfo *bfi=nullptr)
Setup per-MF executor state.
CodeGenCoverage * CoverageInfo
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
TypeSize getValue() const
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void setReturnAddressIsTaken(bool s)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Helper class to build MachineInstr.
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & setOperandDead(unsigned OpIdx) const
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
LocationSize getSize() const
Return the size in bytes of the memory reference.
unsigned getAddrSpace() const
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const ConstantInt * getCImm() const
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
ArrayRef< int > getShuffleMask() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
bool isInternalRead() const
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
const RegisterBank * getRegBankOrNull(Register Reg) const
Return the register bank of Reg, or null if Reg has not been assigned a register bank or has been ass...
LLVM_ABI Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
LLVM_ABI MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Analysis providing profile information.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
static bool isGenericOpcode(unsigned Opc)
unsigned getID() const
Return the register class ID number.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
@ BUFFER_RESOURCE
Address space for 128-bit buffer resources.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
LLVM_READONLY const MIMGG16MappingInfo * getMIMGG16MappingInfo(unsigned G)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isGFX12Plus(const MCSubtargetInfo &STI)
constexpr int64_t getNullPointerValue(unsigned AS)
Get the null pointer value for the given address space.
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
LLVM_READONLY int32_t getGlobalSaddrOp(uint32_t Opcode)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfo(unsigned DimEnum)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
Intrinsic::ID getIntrinsicID(const MachineInstr &I)
Return the intrinsic ID for opcodes with the G_AMDGPU_INTRIN_ prefix.
std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg, GISelValueTracking *ValueTracking=nullptr, bool CheckNUW=false)
Returns base register and constant offset.
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
IndexMode
ARM Index Modes.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(const APInt &RequestedValue)
Matches a constant equal to RequestedValue.
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
UnaryOp_match< SrcTy, TargetOpcode::COPY > m_Copy(SrcTy &&Src)
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_XOR, true > m_GXor(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_SEXT > m_GSExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_FPEXT > m_GFPExt(const SrcTy &Src)
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
ConstantMatch< APInt > m_ICst(APInt &Cst)
SpecificConstantMatch m_AllOnesInt()
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
ImplicitDefMatch m_GImplicitDef()
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
BinaryOp_match< LHS, RHS, TargetOpcode::G_ASHR, false > m_GAShr(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
SpecificRegisterMatch m_SpecificReg(Register RequestedReg)
Matches a register only if it is equal to RequestedReg.
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
UnaryOp_match< SrcTy, TargetOpcode::G_FABS > m_GFabs(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ANYEXT > m_GAnyExt(const SrcTy &Src)
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, TargetOpcode::G_MUL, true > m_GMul(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
NodeAddr< DefNode * > Def
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
FunctionAddr VTableAddr Value
LLVM_ABI bool isBuildVectorAllZeros(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndef=false)
Return true if the specified instruction is a G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC where all of the...
LLVM_ABI Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
PointerUnion< const TargetRegisterClass *, const RegisterBank * > RegClassOrRegBank
Convenient type to represent either a register class or a register bank.
LLVM_ABI const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI void constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
LLVM_ABI std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
FunctionAddr VTableAddr uintptr_t uintptr_t Data
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
LLVM_ABI std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
LLVM_ABI Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
constexpr RegState getUndefRegState(bool B)
@ Default
The result value is uniform if and only if all operands are uniform.
unsigned AtomicNoRetBaseOpcode
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false, bool SelfAdd=false)
Compute knownbits resulting from addition of LHS and RHS.
int64_t Offset
Offset - This is an offset from the base Value*.
PointerUnion< const Value *, const PseudoSourceValue * > V
This is the IR pointer value for the access, or it is null if unknown.