29#include "llvm/IR/IntrinsicsAMDGPU.h"
32#define DEBUG_TYPE "amdgpu-isel"
37#define GET_GLOBALISEL_IMPL
38#define AMDGPUSubtarget GCNSubtarget
39#include "AMDGPUGenGlobalISel.inc"
40#undef GET_GLOBALISEL_IMPL
46 : TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM),
49#include
"AMDGPUGenGlobalISel.inc"
52#include
"AMDGPUGenGlobalISel.inc"
64 MRI = &
MF.getRegInfo();
72 return Def->getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS
73 ? Def->getOperand(1).getReg()
83 auto &RegClassOrBank = MRI.getRegClassOrRegBank(
Reg);
84 const TargetRegisterClass *RC =
87 const LLT Ty = MRI.getType(
Reg);
91 return MRI.getVRegDef(
Reg)->getOpcode() != AMDGPU::G_TRUNC &&
96 return RB->
getID() == AMDGPU::VCCRegBankID;
99bool AMDGPUInstructionSelector::constrainCopyLikeIntrin(
MachineInstr &
MI,
100 unsigned NewOpc)
const {
101 MI.setDesc(TII.get(NewOpc));
105 MachineOperand &Dst =
MI.getOperand(0);
106 MachineOperand &Src =
MI.getOperand(1);
112 const TargetRegisterClass *DstRC
113 = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
114 const TargetRegisterClass *SrcRC
115 = TRI.getConstrainedRegClassForOperand(Src, *MRI);
116 if (!DstRC || DstRC != SrcRC)
119 if (!RBI.constrainGenericRegister(Dst.getReg(), *DstRC, *MRI) ||
120 !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, *MRI))
122 const MCInstrDesc &MCID =
MI.getDesc();
124 MI.getOperand(0).setIsEarlyClobber(
true);
129bool AMDGPUInstructionSelector::selectCOPY(
MachineInstr &
I)
const {
132 I.setDesc(TII.get(TargetOpcode::COPY));
134 const MachineOperand &Src =
I.getOperand(1);
135 MachineOperand &Dst =
I.getOperand(0);
139 if (isVCC(DstReg, *MRI)) {
140 if (SrcReg == AMDGPU::SCC) {
141 const TargetRegisterClass *RC
142 = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
145 return RBI.constrainGenericRegister(DstReg, *RC, *MRI);
148 if (!isVCC(SrcReg, *MRI)) {
150 if (!RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), *MRI))
153 const TargetRegisterClass *SrcRC
154 = TRI.getConstrainedRegClassForOperand(Src, *MRI);
156 std::optional<ValueAndVReg> ConstVal =
160 STI.isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
162 .
addImm(ConstVal->Value.getBoolValue() ? -1 : 0);
164 Register MaskedReg = MRI->createVirtualRegister(SrcRC);
171 assert(Subtarget->useRealTrue16Insts());
172 const int64_t NoMods = 0;
173 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_AND_B16_t16_e64), MaskedReg)
179 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CMP_NE_U16_t16_e64), DstReg)
186 bool IsSGPR = TRI.isSGPRClass(SrcRC);
187 unsigned AndOpc = IsSGPR ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32;
194 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
200 if (!MRI->getRegClassOrNull(SrcReg))
201 MRI->setRegClass(SrcReg, SrcRC);
206 const TargetRegisterClass *RC =
207 TRI.getConstrainedRegClassForOperand(Dst, *MRI);
208 if (RC && !RBI.constrainGenericRegister(DstReg, *RC, *MRI))
214 for (
const MachineOperand &MO :
I.operands()) {
215 if (MO.getReg().isPhysical())
218 const TargetRegisterClass *RC =
219 TRI.getConstrainedRegClassForOperand(MO, *MRI);
222 RBI.constrainGenericRegister(MO.getReg(), *RC, *MRI);
227bool AMDGPUInstructionSelector::selectCOPY_SCC_VCC(
MachineInstr &
I)
const {
230 Register VCCReg =
I.getOperand(1).getReg();
234 if (STI.hasScalarCompareEq64()) {
236 STI.isWave64() ? AMDGPU::S_CMP_LG_U64 : AMDGPU::S_CMP_LG_U32;
239 Register DeadDst = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
240 Cmp =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_OR_B64), DeadDst)
248 Register DstReg =
I.getOperand(0).getReg();
252 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
255bool AMDGPUInstructionSelector::selectCOPY_VCC_SCC(
MachineInstr &
I)
const {
259 Register DstReg =
I.getOperand(0).getReg();
260 Register SrcReg =
I.getOperand(1).getReg();
261 std::optional<ValueAndVReg> Arg =
265 const int64_t
Value = Arg->Value.getZExtValue();
267 unsigned Opcode = STI.isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
274 return RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), *MRI);
280 unsigned SelectOpcode =
281 STI.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
290bool AMDGPUInstructionSelector::selectReadAnyLane(
MachineInstr &
I)
const {
291 Register DstReg =
I.getOperand(0).getReg();
292 Register SrcReg =
I.getOperand(1).getReg();
297 auto RFL =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
304bool AMDGPUInstructionSelector::selectPHI(
MachineInstr &
I)
const {
305 const Register DefReg =
I.getOperand(0).getReg();
306 const LLT DefTy = MRI->getType(DefReg);
318 MRI->getRegClassOrRegBank(DefReg);
320 const TargetRegisterClass *DefRC =
329 DefRC = TRI.getRegClassForTypeOnBank(DefTy, RB);
338 for (
unsigned i = 1; i !=
I.getNumOperands(); i += 2) {
339 const Register SrcReg =
I.getOperand(i).getReg();
341 const RegisterBank *RB = MRI->getRegBankOrNull(SrcReg);
343 const LLT SrcTy = MRI->getType(SrcReg);
344 const TargetRegisterClass *SrcRC =
345 TRI.getRegClassForTypeOnBank(SrcTy, *RB);
346 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
351 I.setDesc(TII.get(TargetOpcode::PHI));
352 return RBI.constrainGenericRegister(DefReg, *DefRC, *MRI);
358 unsigned SubIdx)
const {
362 Register DstReg = MRI->createVirtualRegister(&SubRC);
365 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.
getSubReg(), SubIdx);
367 BuildMI(*BB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
393 return Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
395 return Is64 ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32;
397 return Is64 ? AMDGPU::S_XOR_B64 : AMDGPU::S_XOR_B32;
403bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(
MachineInstr &
I)
const {
404 Register DstReg =
I.getOperand(0).getReg();
405 unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI);
407 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
408 if (DstRB->
getID() != AMDGPU::SGPRRegBankID &&
409 DstRB->
getID() != AMDGPU::VCCRegBankID)
412 bool Is64 =
Size > 32 || (DstRB->
getID() == AMDGPU::VCCRegBankID &&
424bool AMDGPUInstructionSelector::selectG_ADD_SUB(
MachineInstr &
I)
const {
427 Register DstReg =
I.getOperand(0).getReg();
429 LLT Ty = MRI->getType(DstReg);
434 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
435 const bool IsSALU = DstRB->
getID() == AMDGPU::SGPRRegBankID;
436 const bool Sub =
I.getOpcode() == TargetOpcode::G_SUB;
440 const unsigned Opc =
Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
443 .
add(
I.getOperand(1))
444 .
add(
I.getOperand(2))
450 if (STI.hasAddNoCarry()) {
451 const unsigned Opc =
Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64;
452 I.setDesc(TII.get(
Opc));
458 const unsigned Opc =
Sub ? AMDGPU::V_SUB_CO_U32_e64 : AMDGPU::V_ADD_CO_U32_e64;
460 Register UnusedCarry = MRI->createVirtualRegister(TRI.getWaveMaskRegClass());
464 .
add(
I.getOperand(1))
465 .
add(
I.getOperand(2))
471 assert(!
Sub &&
"illegal sub should not reach here");
473 const TargetRegisterClass &RC
474 = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass;
475 const TargetRegisterClass &HalfRC
476 = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass;
478 MachineOperand Lo1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub0));
479 MachineOperand Lo2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub0));
480 MachineOperand Hi1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub1));
481 MachineOperand Hi2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub1));
483 Register DstLo = MRI->createVirtualRegister(&HalfRC);
484 Register DstHi = MRI->createVirtualRegister(&HalfRC);
487 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
490 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
495 const TargetRegisterClass *CarryRC = TRI.getWaveMaskRegClass();
496 Register CarryReg = MRI->createVirtualRegister(CarryRC);
497 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_ADD_CO_U32_e64), DstLo)
502 MachineInstr *Addc =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_ADDC_U32_e64), DstHi)
513 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
520 if (!RBI.constrainGenericRegister(DstReg, RC, *MRI))
527bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE(
532 Register Dst0Reg =
I.getOperand(0).getReg();
533 Register Dst1Reg =
I.getOperand(1).getReg();
534 const bool IsAdd =
I.getOpcode() == AMDGPU::G_UADDO ||
535 I.getOpcode() == AMDGPU::G_UADDE;
536 const bool HasCarryIn =
I.getOpcode() == AMDGPU::G_UADDE ||
537 I.getOpcode() == AMDGPU::G_USUBE;
539 if (isVCC(Dst1Reg, *MRI)) {
540 unsigned NoCarryOpc =
541 IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
542 unsigned CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
543 I.setDesc(TII.get(HasCarryIn ? CarryOpc : NoCarryOpc));
549 Register Src0Reg =
I.getOperand(2).getReg();
550 Register Src1Reg =
I.getOperand(3).getReg();
553 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
554 .
addReg(
I.getOperand(4).getReg());
557 unsigned NoCarryOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
558 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
560 auto CarryInst =
BuildMI(*BB, &
I,
DL, TII.get(HasCarryIn ? CarryOpc : NoCarryOpc), Dst0Reg)
561 .
add(
I.getOperand(2))
562 .
add(
I.getOperand(3));
564 if (MRI->use_nodbg_empty(Dst1Reg)) {
565 CarryInst.setOperandDead(3);
567 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), Dst1Reg)
569 if (!MRI->getRegClassOrNull(Dst1Reg))
570 MRI->setRegClass(Dst1Reg, &AMDGPU::SReg_32RegClass);
573 if (!RBI.constrainGenericRegister(Dst0Reg, AMDGPU::SReg_32RegClass, *MRI) ||
574 !RBI.constrainGenericRegister(Src0Reg, AMDGPU::SReg_32RegClass, *MRI) ||
575 !RBI.constrainGenericRegister(Src1Reg, AMDGPU::SReg_32RegClass, *MRI))
579 !RBI.constrainGenericRegister(
I.getOperand(4).getReg(),
580 AMDGPU::SReg_32RegClass, *MRI))
587bool AMDGPUInstructionSelector::selectG_AMDGPU_MAD_64_32(
591 const bool IsUnsigned =
I.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;
592 bool UseNoCarry = Subtarget->hasMadU64U32NoCarry() &&
593 MRI->use_nodbg_empty(
I.getOperand(1).getReg());
596 if (Subtarget->hasMADIntraFwdBug())
597 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_gfx11_e64
598 : AMDGPU::V_MAD_I64_I32_gfx11_e64;
600 Opc = IsUnsigned ? AMDGPU::V_MAD_NC_U64_U32_e64
601 : AMDGPU::V_MAD_NC_I64_I32_e64;
603 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_e64 : AMDGPU::V_MAD_I64_I32_e64;
608 I.setDesc(TII.get(
Opc));
610 I.addImplicitDefUseOperands(*
MF);
611 I.getOperand(0).setIsEarlyClobber(
true);
616bool AMDGPUInstructionSelector::selectG_EXTRACT(
MachineInstr &
I)
const {
618 Register DstReg =
I.getOperand(0).getReg();
619 Register SrcReg =
I.getOperand(1).getReg();
620 LLT DstTy = MRI->getType(DstReg);
621 LLT SrcTy = MRI->getType(SrcReg);
626 unsigned Offset =
I.getOperand(2).getImm();
627 if (
Offset % 32 != 0 || DstSize > 128)
635 const TargetRegisterClass *DstRC =
636 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
637 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
640 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI);
641 const TargetRegisterClass *SrcRC =
642 TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank);
647 SrcRC = TRI.getSubClassWithSubReg(SrcRC,
SubReg);
652 *SrcRC,
I.getOperand(1));
654 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::COPY), DstReg)
661bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(
MachineInstr &
MI)
const {
662 MachineBasicBlock *BB =
MI.getParent();
664 LLT DstTy = MRI->getType(DstReg);
665 LLT SrcTy = MRI->getType(
MI.getOperand(1).getReg());
672 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
674 const TargetRegisterClass *DstRC =
675 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
679 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(DstRC, SrcSize / 8);
680 MachineInstrBuilder MIB =
681 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg);
682 for (
int I = 0,
E =
MI.getNumOperands() - 1;
I !=
E; ++
I) {
683 MachineOperand &Src =
MI.getOperand(
I + 1);
687 const TargetRegisterClass *SrcRC
688 = TRI.getConstrainedRegClassForOperand(Src, *MRI);
689 if (SrcRC && !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, *MRI))
693 if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
696 MI.eraseFromParent();
700bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(
MachineInstr &
MI)
const {
701 MachineBasicBlock *BB =
MI.getParent();
702 const int NumDst =
MI.getNumOperands() - 1;
704 MachineOperand &Src =
MI.getOperand(NumDst);
708 LLT DstTy = MRI->getType(DstReg0);
709 LLT SrcTy = MRI->getType(SrcReg);
714 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI);
716 const TargetRegisterClass *SrcRC =
717 TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank);
718 if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
724 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(SrcRC, DstSize / 8);
725 for (
int I = 0,
E = NumDst;
I !=
E; ++
I) {
726 MachineOperand &Dst =
MI.getOperand(
I);
727 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::COPY), Dst.getReg())
728 .
addReg(SrcReg, 0, SubRegs[
I]);
731 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegs[
I]);
732 if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
735 const TargetRegisterClass *DstRC =
736 TRI.getConstrainedRegClassForOperand(Dst, *MRI);
737 if (DstRC && !RBI.constrainGenericRegister(Dst.getReg(), *DstRC, *MRI))
741 MI.eraseFromParent();
745bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR(
MachineInstr &
MI)
const {
746 assert(
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC ||
747 MI.getOpcode() == AMDGPU::G_BUILD_VECTOR);
751 LLT SrcTy = MRI->getType(Src0);
755 if (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR && SrcSize >= 32) {
756 return selectG_MERGE_VALUES(
MI);
763 (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC &&
767 const RegisterBank *DstBank = RBI.getRegBank(Dst, *MRI, TRI);
768 if (DstBank->
getID() == AMDGPU::AGPRRegBankID)
771 assert(DstBank->
getID() == AMDGPU::SGPRRegBankID ||
772 DstBank->
getID() == AMDGPU::VGPRRegBankID);
773 const bool IsVector = DstBank->
getID() == AMDGPU::VGPRRegBankID;
776 MachineBasicBlock *BB =
MI.getParent();
786 const int64_t K0 = ConstSrc0->Value.getSExtValue();
787 const int64_t K1 = ConstSrc1->Value.getSExtValue();
788 uint32_t Lo16 =
static_cast<uint32_t
>(K0) & 0xffff;
789 uint32_t Hi16 =
static_cast<uint32_t
>(K1) & 0xffff;
790 uint32_t
Imm = Lo16 | (Hi16 << 16);
795 MI.eraseFromParent();
796 return RBI.constrainGenericRegister(Dst, AMDGPU::VGPR_32RegClass, *MRI);
801 MI.eraseFromParent();
802 return RBI.constrainGenericRegister(Dst, AMDGPU::SReg_32RegClass, *MRI);
813 if (Src1Def->
getOpcode() == AMDGPU::G_IMPLICIT_DEF) {
814 MI.setDesc(TII.get(AMDGPU::COPY));
817 IsVector ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
818 return RBI.constrainGenericRegister(Dst, RC, *MRI) &&
819 RBI.constrainGenericRegister(Src0, RC, *MRI);
824 Register TmpReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
825 auto MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_AND_B32_e32), TmpReg)
831 MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_LSHL_OR_B32_e64), Dst)
838 MI.eraseFromParent();
863 unsigned Opc = AMDGPU::S_PACK_LL_B32_B16;
864 if (Shift0 && Shift1) {
865 Opc = AMDGPU::S_PACK_HH_B32_B16;
866 MI.getOperand(1).setReg(ShiftSrc0);
867 MI.getOperand(2).setReg(ShiftSrc1);
869 Opc = AMDGPU::S_PACK_LH_B32_B16;
870 MI.getOperand(2).setReg(ShiftSrc1);
874 if (ConstSrc1 && ConstSrc1->Value == 0) {
876 auto MIB =
BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_LSHR_B32), Dst)
881 MI.eraseFromParent();
884 if (STI.hasSPackHL()) {
885 Opc = AMDGPU::S_PACK_HL_B32_B16;
886 MI.getOperand(1).setReg(ShiftSrc0);
890 MI.setDesc(TII.get(
Opc));
894bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(
MachineInstr &
I)
const {
895 const MachineOperand &MO =
I.getOperand(0);
899 const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, *MRI);
900 if ((!RC && !MRI->getRegBankOrNull(MO.
getReg())) ||
901 (RC && RBI.constrainGenericRegister(MO.
getReg(), *RC, *MRI))) {
902 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
909bool AMDGPUInstructionSelector::selectG_INSERT(
MachineInstr &
I)
const {
912 Register DstReg =
I.getOperand(0).getReg();
913 Register Src0Reg =
I.getOperand(1).getReg();
914 Register Src1Reg =
I.getOperand(2).getReg();
915 LLT Src1Ty = MRI->getType(Src1Reg);
917 unsigned DstSize = MRI->getType(DstReg).getSizeInBits();
920 int64_t
Offset =
I.getOperand(3).getImm();
923 if (
Offset % 32 != 0 || InsSize % 32 != 0)
930 unsigned SubReg = TRI.getSubRegFromChannel(
Offset / 32, InsSize / 32);
931 if (
SubReg == AMDGPU::NoSubRegister)
934 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
935 const TargetRegisterClass *DstRC =
936 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
940 const RegisterBank *Src0Bank = RBI.getRegBank(Src0Reg, *MRI, TRI);
941 const RegisterBank *Src1Bank = RBI.getRegBank(Src1Reg, *MRI, TRI);
942 const TargetRegisterClass *Src0RC =
943 TRI.getRegClassForSizeOnBank(DstSize, *Src0Bank);
944 const TargetRegisterClass *Src1RC =
945 TRI.getRegClassForSizeOnBank(InsSize, *Src1Bank);
949 Src0RC = TRI.getSubClassWithSubReg(Src0RC,
SubReg);
950 if (!Src0RC || !Src1RC)
953 if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
954 !RBI.constrainGenericRegister(Src0Reg, *Src0RC, *MRI) ||
955 !RBI.constrainGenericRegister(Src1Reg, *Src1RC, *MRI))
959 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::INSERT_SUBREG), DstReg)
968bool AMDGPUInstructionSelector::selectG_SBFX_UBFX(
MachineInstr &
MI)
const {
971 Register OffsetReg =
MI.getOperand(2).getReg();
972 Register WidthReg =
MI.getOperand(3).getReg();
974 assert(RBI.getRegBank(DstReg, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID &&
975 "scalar BFX instructions are expanded in regbankselect");
976 assert(MRI->getType(
MI.getOperand(0).getReg()).getSizeInBits() == 32 &&
977 "64-bit vector BFX instructions are expanded in regbankselect");
980 MachineBasicBlock *
MBB =
MI.getParent();
982 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SBFX;
983 unsigned Opc = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
988 MI.eraseFromParent();
992bool AMDGPUInstructionSelector::selectInterpP1F16(
MachineInstr &
MI)
const {
993 if (STI.getLDSBankCount() != 16)
999 if (!RBI.constrainGenericRegister(M0Val, AMDGPU::SReg_32RegClass, *MRI) ||
1000 !RBI.constrainGenericRegister(Dst, AMDGPU::VGPR_32RegClass, *MRI) ||
1001 !RBI.constrainGenericRegister(Src0, AMDGPU::VGPR_32RegClass, *MRI))
1011 Register InterpMov = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1013 MachineBasicBlock *
MBB =
MI.getParent();
1017 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_INTERP_MOV_F32), InterpMov)
1020 .
addImm(
MI.getOperand(3).getImm());
1033 MI.eraseFromParent();
1042bool AMDGPUInstructionSelector::selectWritelane(
MachineInstr &
MI)
const {
1044 if (STI.getConstantBusLimit(AMDGPU::V_WRITELANE_B32) > 1)
1047 MachineBasicBlock *
MBB =
MI.getParent();
1051 Register LaneSelect =
MI.getOperand(3).getReg();
1054 auto MIB =
BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_WRITELANE_B32), VDst);
1056 std::optional<ValueAndVReg> ConstSelect =
1062 MIB.
addImm(ConstSelect->Value.getSExtValue() &
1065 std::optional<ValueAndVReg> ConstVal =
1071 STI.hasInv2PiInlineImm())) {
1072 MIB.
addImm(ConstVal->Value.getSExtValue());
1080 RBI.constrainGenericRegister(LaneSelect, AMDGPU::SReg_32_XM0RegClass, *MRI);
1082 BuildMI(*
MBB, *MIB,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
1090 MI.eraseFromParent();
1096bool AMDGPUInstructionSelector::selectDivScale(
MachineInstr &
MI)
const {
1100 LLT Ty = MRI->getType(Dst0);
1103 Opc = AMDGPU::V_DIV_SCALE_F32_e64;
1105 Opc = AMDGPU::V_DIV_SCALE_F64_e64;
1112 MachineBasicBlock *
MBB =
MI.getParent();
1116 unsigned ChooseDenom =
MI.getOperand(5).getImm();
1118 Register Src0 = ChooseDenom != 0 ? Numer : Denom;
1131 MI.eraseFromParent();
1135bool AMDGPUInstructionSelector::selectG_INTRINSIC(
MachineInstr &
I)
const {
1137 switch (IntrinsicID) {
1138 case Intrinsic::amdgcn_if_break: {
1143 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))
1144 .
add(
I.getOperand(0))
1145 .
add(
I.getOperand(2))
1146 .
add(
I.getOperand(3));
1148 Register DstReg =
I.getOperand(0).getReg();
1149 Register Src0Reg =
I.getOperand(2).getReg();
1150 Register Src1Reg =
I.getOperand(3).getReg();
1152 I.eraseFromParent();
1155 MRI->setRegClass(
Reg, TRI.getWaveMaskRegClass());
1159 case Intrinsic::amdgcn_interp_p1_f16:
1160 return selectInterpP1F16(
I);
1161 case Intrinsic::amdgcn_wqm:
1162 return constrainCopyLikeIntrin(
I, AMDGPU::WQM);
1163 case Intrinsic::amdgcn_softwqm:
1164 return constrainCopyLikeIntrin(
I, AMDGPU::SOFT_WQM);
1165 case Intrinsic::amdgcn_strict_wwm:
1166 case Intrinsic::amdgcn_wwm:
1167 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WWM);
1168 case Intrinsic::amdgcn_strict_wqm:
1169 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WQM);
1170 case Intrinsic::amdgcn_writelane:
1171 return selectWritelane(
I);
1172 case Intrinsic::amdgcn_div_scale:
1173 return selectDivScale(
I);
1174 case Intrinsic::amdgcn_icmp:
1175 case Intrinsic::amdgcn_fcmp:
1178 return selectIntrinsicCmp(
I);
1179 case Intrinsic::amdgcn_ballot:
1180 return selectBallot(
I);
1181 case Intrinsic::amdgcn_reloc_constant:
1182 return selectRelocConstant(
I);
1183 case Intrinsic::amdgcn_groupstaticsize:
1184 return selectGroupStaticSize(
I);
1185 case Intrinsic::returnaddress:
1186 return selectReturnAddress(
I);
1187 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
1188 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
1189 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
1190 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
1191 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
1192 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
1193 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
1194 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
1195 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
1196 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
1197 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
1198 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
1199 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
1200 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
1201 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:
1202 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:
1203 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:
1204 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:
1205 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:
1206 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:
1207 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:
1208 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:
1209 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:
1210 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:
1211 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:
1212 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
1213 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
1214 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8:
1215 return selectSMFMACIntrin(
I);
1216 case Intrinsic::amdgcn_permlane16_swap:
1217 case Intrinsic::amdgcn_permlane32_swap:
1218 return selectPermlaneSwapIntrin(
I, IntrinsicID);
1229 if (
Size == 16 && !ST.has16BitInsts())
1232 const auto Select = [&](
unsigned S16Opc,
unsigned TrueS16Opc,
1233 unsigned FakeS16Opc,
unsigned S32Opc,
1236 return ST.hasTrue16BitInsts()
1237 ? ST.useRealTrue16Insts() ? TrueS16Opc : FakeS16Opc
1248 return Select(AMDGPU::V_CMP_NE_U16_e64, AMDGPU::V_CMP_NE_U16_t16_e64,
1249 AMDGPU::V_CMP_NE_U16_fake16_e64, AMDGPU::V_CMP_NE_U32_e64,
1250 AMDGPU::V_CMP_NE_U64_e64);
1252 return Select(AMDGPU::V_CMP_EQ_U16_e64, AMDGPU::V_CMP_EQ_U16_t16_e64,
1253 AMDGPU::V_CMP_EQ_U16_fake16_e64, AMDGPU::V_CMP_EQ_U32_e64,
1254 AMDGPU::V_CMP_EQ_U64_e64);
1256 return Select(AMDGPU::V_CMP_GT_I16_e64, AMDGPU::V_CMP_GT_I16_t16_e64,
1257 AMDGPU::V_CMP_GT_I16_fake16_e64, AMDGPU::V_CMP_GT_I32_e64,
1258 AMDGPU::V_CMP_GT_I64_e64);
1260 return Select(AMDGPU::V_CMP_GE_I16_e64, AMDGPU::V_CMP_GE_I16_t16_e64,
1261 AMDGPU::V_CMP_GE_I16_fake16_e64, AMDGPU::V_CMP_GE_I32_e64,
1262 AMDGPU::V_CMP_GE_I64_e64);
1264 return Select(AMDGPU::V_CMP_LT_I16_e64, AMDGPU::V_CMP_LT_I16_t16_e64,
1265 AMDGPU::V_CMP_LT_I16_fake16_e64, AMDGPU::V_CMP_LT_I32_e64,
1266 AMDGPU::V_CMP_LT_I64_e64);
1268 return Select(AMDGPU::V_CMP_LE_I16_e64, AMDGPU::V_CMP_LE_I16_t16_e64,
1269 AMDGPU::V_CMP_LE_I16_fake16_e64, AMDGPU::V_CMP_LE_I32_e64,
1270 AMDGPU::V_CMP_LE_I64_e64);
1272 return Select(AMDGPU::V_CMP_GT_U16_e64, AMDGPU::V_CMP_GT_U16_t16_e64,
1273 AMDGPU::V_CMP_GT_U16_fake16_e64, AMDGPU::V_CMP_GT_U32_e64,
1274 AMDGPU::V_CMP_GT_U64_e64);
1276 return Select(AMDGPU::V_CMP_GE_U16_e64, AMDGPU::V_CMP_GE_U16_t16_e64,
1277 AMDGPU::V_CMP_GE_U16_fake16_e64, AMDGPU::V_CMP_GE_U32_e64,
1278 AMDGPU::V_CMP_GE_U64_e64);
1280 return Select(AMDGPU::V_CMP_LT_U16_e64, AMDGPU::V_CMP_LT_U16_t16_e64,
1281 AMDGPU::V_CMP_LT_U16_fake16_e64, AMDGPU::V_CMP_LT_U32_e64,
1282 AMDGPU::V_CMP_LT_U64_e64);
1284 return Select(AMDGPU::V_CMP_LE_U16_e64, AMDGPU::V_CMP_LE_U16_t16_e64,
1285 AMDGPU::V_CMP_LE_U16_fake16_e64, AMDGPU::V_CMP_LE_U32_e64,
1286 AMDGPU::V_CMP_LE_U64_e64);
1289 return Select(AMDGPU::V_CMP_EQ_F16_e64, AMDGPU::V_CMP_EQ_F16_t16_e64,
1290 AMDGPU::V_CMP_EQ_F16_fake16_e64, AMDGPU::V_CMP_EQ_F32_e64,
1291 AMDGPU::V_CMP_EQ_F64_e64);
1293 return Select(AMDGPU::V_CMP_GT_F16_e64, AMDGPU::V_CMP_GT_F16_t16_e64,
1294 AMDGPU::V_CMP_GT_F16_fake16_e64, AMDGPU::V_CMP_GT_F32_e64,
1295 AMDGPU::V_CMP_GT_F64_e64);
1297 return Select(AMDGPU::V_CMP_GE_F16_e64, AMDGPU::V_CMP_GE_F16_t16_e64,
1298 AMDGPU::V_CMP_GE_F16_fake16_e64, AMDGPU::V_CMP_GE_F32_e64,
1299 AMDGPU::V_CMP_GE_F64_e64);
1301 return Select(AMDGPU::V_CMP_LT_F16_e64, AMDGPU::V_CMP_LT_F16_t16_e64,
1302 AMDGPU::V_CMP_LT_F16_fake16_e64, AMDGPU::V_CMP_LT_F32_e64,
1303 AMDGPU::V_CMP_LT_F64_e64);
1305 return Select(AMDGPU::V_CMP_LE_F16_e64, AMDGPU::V_CMP_LE_F16_t16_e64,
1306 AMDGPU::V_CMP_LE_F16_fake16_e64, AMDGPU::V_CMP_LE_F32_e64,
1307 AMDGPU::V_CMP_LE_F64_e64);
1309 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1310 AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
1311 AMDGPU::V_CMP_NEQ_F64_e64);
1313 return Select(AMDGPU::V_CMP_O_F16_e64, AMDGPU::V_CMP_O_F16_t16_e64,
1314 AMDGPU::V_CMP_O_F16_fake16_e64, AMDGPU::V_CMP_O_F32_e64,
1315 AMDGPU::V_CMP_O_F64_e64);
1317 return Select(AMDGPU::V_CMP_U_F16_e64, AMDGPU::V_CMP_U_F16_t16_e64,
1318 AMDGPU::V_CMP_U_F16_fake16_e64, AMDGPU::V_CMP_U_F32_e64,
1319 AMDGPU::V_CMP_U_F64_e64);
1321 return Select(AMDGPU::V_CMP_NLG_F16_e64, AMDGPU::V_CMP_NLG_F16_t16_e64,
1322 AMDGPU::V_CMP_NLG_F16_fake16_e64, AMDGPU::V_CMP_NLG_F32_e64,
1323 AMDGPU::V_CMP_NLG_F64_e64);
1325 return Select(AMDGPU::V_CMP_NLE_F16_e64, AMDGPU::V_CMP_NLE_F16_t16_e64,
1326 AMDGPU::V_CMP_NLE_F16_fake16_e64, AMDGPU::V_CMP_NLE_F32_e64,
1327 AMDGPU::V_CMP_NLE_F64_e64);
1329 return Select(AMDGPU::V_CMP_NLT_F16_e64, AMDGPU::V_CMP_NLT_F16_t16_e64,
1330 AMDGPU::V_CMP_NLT_F16_fake16_e64, AMDGPU::V_CMP_NLT_F32_e64,
1331 AMDGPU::V_CMP_NLT_F64_e64);
1333 return Select(AMDGPU::V_CMP_NGE_F16_e64, AMDGPU::V_CMP_NGE_F16_t16_e64,
1334 AMDGPU::V_CMP_NGE_F16_fake16_e64, AMDGPU::V_CMP_NGE_F32_e64,
1335 AMDGPU::V_CMP_NGE_F64_e64);
1337 return Select(AMDGPU::V_CMP_NGT_F16_e64, AMDGPU::V_CMP_NGT_F16_t16_e64,
1338 AMDGPU::V_CMP_NGT_F16_fake16_e64, AMDGPU::V_CMP_NGT_F32_e64,
1339 AMDGPU::V_CMP_NGT_F64_e64);
1341 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1342 AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
1343 AMDGPU::V_CMP_NEQ_F64_e64);
1345 return Select(AMDGPU::V_CMP_TRU_F16_e64, AMDGPU::V_CMP_TRU_F16_t16_e64,
1346 AMDGPU::V_CMP_TRU_F16_fake16_e64, AMDGPU::V_CMP_TRU_F32_e64,
1347 AMDGPU::V_CMP_TRU_F64_e64);
1349 return Select(AMDGPU::V_CMP_F_F16_e64, AMDGPU::V_CMP_F_F16_t16_e64,
1350 AMDGPU::V_CMP_F_F16_fake16_e64, AMDGPU::V_CMP_F_F32_e64,
1351 AMDGPU::V_CMP_F_F64_e64);
1356 unsigned Size)
const {
1358 if (!STI.hasScalarCompareEq64())
1363 return AMDGPU::S_CMP_LG_U64;
1365 return AMDGPU::S_CMP_EQ_U64;
1374 return AMDGPU::S_CMP_LG_U32;
1376 return AMDGPU::S_CMP_EQ_U32;
1378 return AMDGPU::S_CMP_GT_I32;
1380 return AMDGPU::S_CMP_GE_I32;
1382 return AMDGPU::S_CMP_LT_I32;
1384 return AMDGPU::S_CMP_LE_I32;
1386 return AMDGPU::S_CMP_GT_U32;
1388 return AMDGPU::S_CMP_GE_U32;
1390 return AMDGPU::S_CMP_LT_U32;
1392 return AMDGPU::S_CMP_LE_U32;
1394 return AMDGPU::S_CMP_EQ_F32;
1396 return AMDGPU::S_CMP_GT_F32;
1398 return AMDGPU::S_CMP_GE_F32;
1400 return AMDGPU::S_CMP_LT_F32;
1402 return AMDGPU::S_CMP_LE_F32;
1404 return AMDGPU::S_CMP_LG_F32;
1406 return AMDGPU::S_CMP_O_F32;
1408 return AMDGPU::S_CMP_U_F32;
1410 return AMDGPU::S_CMP_NLG_F32;
1412 return AMDGPU::S_CMP_NLE_F32;
1414 return AMDGPU::S_CMP_NLT_F32;
1416 return AMDGPU::S_CMP_NGE_F32;
1418 return AMDGPU::S_CMP_NGT_F32;
1420 return AMDGPU::S_CMP_NEQ_F32;
1427 if (!STI.hasSALUFloatInsts())
1432 return AMDGPU::S_CMP_EQ_F16;
1434 return AMDGPU::S_CMP_GT_F16;
1436 return AMDGPU::S_CMP_GE_F16;
1438 return AMDGPU::S_CMP_LT_F16;
1440 return AMDGPU::S_CMP_LE_F16;
1442 return AMDGPU::S_CMP_LG_F16;
1444 return AMDGPU::S_CMP_O_F16;
1446 return AMDGPU::S_CMP_U_F16;
1448 return AMDGPU::S_CMP_NLG_F16;
1450 return AMDGPU::S_CMP_NLE_F16;
1452 return AMDGPU::S_CMP_NLT_F16;
1454 return AMDGPU::S_CMP_NGE_F16;
1456 return AMDGPU::S_CMP_NGT_F16;
1458 return AMDGPU::S_CMP_NEQ_F16;
1467bool AMDGPUInstructionSelector::selectG_ICMP_or_FCMP(
MachineInstr &
I)
const {
1472 Register SrcReg =
I.getOperand(2).getReg();
1473 unsigned Size = RBI.getSizeInBits(SrcReg, *MRI, TRI);
1477 Register CCReg =
I.getOperand(0).getReg();
1478 if (!isVCC(CCReg, *MRI)) {
1479 int Opcode = getS_CMPOpcode(Pred,
Size);
1482 MachineInstr *ICmp =
BuildMI(*BB, &
I,
DL, TII.get(Opcode))
1483 .
add(
I.getOperand(2))
1484 .
add(
I.getOperand(3));
1485 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CCReg)
1489 RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, *MRI);
1490 I.eraseFromParent();
1494 if (
I.getOpcode() == AMDGPU::G_FCMP)
1501 MachineInstrBuilder ICmp;
1504 ICmp =
BuildMI(*BB, &
I,
DL, TII.get(Opcode),
I.getOperand(0).getReg())
1506 .
add(
I.getOperand(2))
1508 .
add(
I.getOperand(3))
1511 ICmp =
BuildMI(*BB, &
I,
DL, TII.get(Opcode),
I.getOperand(0).getReg())
1512 .
add(
I.getOperand(2))
1513 .
add(
I.getOperand(3));
1517 *TRI.getBoolRC(), *MRI);
1519 I.eraseFromParent();
1523bool AMDGPUInstructionSelector::selectIntrinsicCmp(
MachineInstr &
I)
const {
1524 Register Dst =
I.getOperand(0).getReg();
1525 if (isVCC(Dst, *MRI))
1528 LLT DstTy = MRI->getType(Dst);
1534 Register SrcReg =
I.getOperand(2).getReg();
1535 unsigned Size = RBI.getSizeInBits(SrcReg, *MRI, TRI);
1543 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), Dst);
1544 I.eraseFromParent();
1545 return RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI);
1552 MachineInstrBuilder SelectedMI;
1553 MachineOperand &
LHS =
I.getOperand(2);
1554 MachineOperand &
RHS =
I.getOperand(3);
1555 auto [Src0, Src0Mods] = selectVOP3ModsImpl(
LHS.getReg());
1556 auto [Src1, Src1Mods] = selectVOP3ModsImpl(
RHS.getReg());
1558 copyToVGPRIfSrcFolded(Src0, Src0Mods,
LHS, &
I,
true);
1560 copyToVGPRIfSrcFolded(Src1, Src1Mods,
RHS, &
I,
true);
1561 SelectedMI =
BuildMI(*BB, &
I,
DL, TII.get(Opcode), Dst);
1563 SelectedMI.
addImm(Src0Mods);
1564 SelectedMI.
addReg(Src0Reg);
1566 SelectedMI.
addImm(Src1Mods);
1567 SelectedMI.
addReg(Src1Reg);
1573 RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI);
1577 I.eraseFromParent();
1588 if (
MI->getParent() !=
MBB)
1592 if (
MI->getOpcode() == AMDGPU::COPY) {
1593 auto DstRB =
MRI.getRegBankOrNull(
MI->getOperand(0).getReg());
1594 auto SrcRB =
MRI.getRegBankOrNull(
MI->getOperand(1).getReg());
1595 if (DstRB && SrcRB && DstRB->
getID() == AMDGPU::VCCRegBankID &&
1596 SrcRB->getID() == AMDGPU::SGPRRegBankID)
1613bool AMDGPUInstructionSelector::selectBallot(
MachineInstr &
I)
const {
1616 Register DstReg =
I.getOperand(0).getReg();
1617 Register SrcReg =
I.getOperand(2).getReg();
1618 const unsigned BallotSize = MRI->getType(DstReg).getSizeInBits();
1619 const unsigned WaveSize = STI.getWavefrontSize();
1623 if (BallotSize != WaveSize && (BallotSize != 64 || WaveSize != 32))
1626 std::optional<ValueAndVReg> Arg =
1631 if (BallotSize != WaveSize) {
1632 Dst = MRI->createVirtualRegister(TRI.getBoolRC());
1636 const int64_t
Value = Arg->Value.getZExtValue();
1639 unsigned Opcode = WaveSize == 64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
1646 if (!RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI))
1652 if (!RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI))
1656 unsigned AndOpc = WaveSize == 64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
1667 if (BallotSize != WaveSize) {
1668 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1670 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
1677 I.eraseFromParent();
1681bool AMDGPUInstructionSelector::selectRelocConstant(
MachineInstr &
I)
const {
1682 Register DstReg =
I.getOperand(0).getReg();
1683 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
1684 const TargetRegisterClass *DstRC = TRI.getRegClassForSizeOnBank(32, *DstBank);
1685 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
1688 const bool IsVALU = DstBank->
getID() == AMDGPU::VGPRRegBankID;
1690 Module *
M =
MF->getFunction().getParent();
1691 const MDNode *
Metadata =
I.getOperand(2).getMetadata();
1698 TII.get(IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32), DstReg)
1701 I.eraseFromParent();
1705bool AMDGPUInstructionSelector::selectGroupStaticSize(
MachineInstr &
I)
const {
1708 Register DstReg =
I.getOperand(0).getReg();
1709 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
1710 unsigned Mov = DstRB->
getID() == AMDGPU::SGPRRegBankID ?
1711 AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1719 const SIMachineFunctionInfo *MFI =
MF->getInfo<SIMachineFunctionInfo>();
1722 Module *
M =
MF->getFunction().getParent();
1723 const GlobalValue *GV =
1728 I.eraseFromParent();
1732bool AMDGPUInstructionSelector::selectReturnAddress(
MachineInstr &
I)
const {
1737 MachineOperand &Dst =
I.getOperand(0);
1739 unsigned Depth =
I.getOperand(2).getImm();
1741 const TargetRegisterClass *RC
1742 = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
1744 !RBI.constrainGenericRegister(DstReg, *RC, *MRI))
1749 MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction()) {
1752 I.eraseFromParent();
1756 MachineFrameInfo &MFI =
MF.getFrameInfo();
1761 Register ReturnAddrReg = TRI.getReturnAddressReg(
MF);
1763 AMDGPU::SReg_64RegClass,
DL);
1766 I.eraseFromParent();
1770bool AMDGPUInstructionSelector::selectEndCfIntrinsic(
MachineInstr &
MI)
const {
1773 MachineBasicBlock *BB =
MI.getParent();
1774 BuildMI(*BB, &
MI,
MI.getDebugLoc(), TII.get(AMDGPU::SI_END_CF))
1775 .
add(
MI.getOperand(1));
1778 MI.eraseFromParent();
1780 if (!MRI->getRegClassOrNull(
Reg))
1781 MRI->setRegClass(
Reg, TRI.getWaveMaskRegClass());
1785bool AMDGPUInstructionSelector::selectDSOrderedIntrinsic(
1787 MachineBasicBlock *
MBB =
MI.getParent();
1791 unsigned IndexOperand =
MI.getOperand(7).getImm();
1792 bool WaveRelease =
MI.getOperand(8).getImm() != 0;
1793 bool WaveDone =
MI.getOperand(9).getImm() != 0;
1795 if (WaveDone && !WaveRelease) {
1799 Fn,
"ds_ordered_count: wave_done requires wave_release",
DL));
1802 unsigned OrderedCountIndex = IndexOperand & 0x3f;
1803 IndexOperand &= ~0x3f;
1804 unsigned CountDw = 0;
1807 CountDw = (IndexOperand >> 24) & 0xf;
1808 IndexOperand &= ~(0xf << 24);
1810 if (CountDw < 1 || CountDw > 4) {
1813 Fn,
"ds_ordered_count: dword count must be between 1 and 4",
DL));
1821 Fn,
"ds_ordered_count: bad index operand",
DL));
1824 unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1;
1827 unsigned Offset0 = OrderedCountIndex << 2;
1828 unsigned Offset1 = WaveRelease | (WaveDone << 1) | (Instruction << 4);
1831 Offset1 |= (CountDw - 1) << 6;
1834 Offset1 |= ShaderType << 2;
1836 unsigned Offset = Offset0 | (Offset1 << 8);
1844 MachineInstrBuilder
DS =
1845 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::DS_ORDERED_COUNT), DstReg)
1850 if (!RBI.constrainGenericRegister(M0Val, AMDGPU::SReg_32RegClass, *MRI))
1854 MI.eraseFromParent();
1860 case Intrinsic::amdgcn_ds_gws_init:
1861 return AMDGPU::DS_GWS_INIT;
1862 case Intrinsic::amdgcn_ds_gws_barrier:
1863 return AMDGPU::DS_GWS_BARRIER;
1864 case Intrinsic::amdgcn_ds_gws_sema_v:
1865 return AMDGPU::DS_GWS_SEMA_V;
1866 case Intrinsic::amdgcn_ds_gws_sema_br:
1867 return AMDGPU::DS_GWS_SEMA_BR;
1868 case Intrinsic::amdgcn_ds_gws_sema_p:
1869 return AMDGPU::DS_GWS_SEMA_P;
1870 case Intrinsic::amdgcn_ds_gws_sema_release_all:
1871 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
1877bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(
MachineInstr &
MI,
1879 if (!STI.hasGWS() || (IID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
1880 !STI.hasGWSSemaReleaseAll()))
1884 const bool HasVSrc =
MI.getNumOperands() == 3;
1885 assert(HasVSrc ||
MI.getNumOperands() == 2);
1887 Register BaseOffset =
MI.getOperand(HasVSrc ? 2 : 1).getReg();
1888 const RegisterBank *OffsetRB = RBI.getRegBank(BaseOffset, *MRI, TRI);
1889 if (OffsetRB->
getID() != AMDGPU::SGPRRegBankID)
1895 MachineBasicBlock *
MBB =
MI.getParent();
1898 MachineInstr *Readfirstlane =
nullptr;
1903 if (OffsetDef->
getOpcode() == AMDGPU::V_READFIRSTLANE_B32) {
1904 Readfirstlane = OffsetDef;
1909 if (OffsetDef->
getOpcode() == AMDGPU::G_CONSTANT) {
1919 std::tie(BaseOffset, ImmOffset) =
1922 if (Readfirstlane) {
1925 if (!RBI.constrainGenericRegister(BaseOffset, AMDGPU::VGPR_32RegClass, *MRI))
1931 if (!RBI.constrainGenericRegister(BaseOffset,
1932 AMDGPU::SReg_32RegClass, *MRI))
1936 Register M0Base = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1951 const MCInstrDesc &InstrDesc = TII.get(
Opc);
1956 int Data0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
1957 const TargetRegisterClass *DataRC = TII.getRegClass(InstrDesc, Data0Idx);
1958 const TargetRegisterClass *SubRC =
1959 TRI.getSubRegisterClass(DataRC, AMDGPU::sub0);
1963 if (!RBI.constrainGenericRegister(VSrc, *DataRC, *MRI))
1973 Register DataReg = MRI->createVirtualRegister(DataRC);
1974 if (!RBI.constrainGenericRegister(VSrc, *SubRC, *MRI))
1977 Register UndefReg = MRI->createVirtualRegister(SubRC);
1996 MI.eraseFromParent();
2000bool AMDGPUInstructionSelector::selectDSAppendConsume(
MachineInstr &
MI,
2001 bool IsAppend)
const {
2002 Register PtrBase =
MI.getOperand(2).getReg();
2003 LLT PtrTy = MRI->getType(PtrBase);
2007 std::tie(PtrBase,
Offset) = selectDS1Addr1OffsetImpl(
MI.getOperand(2));
2010 if (!isDSOffsetLegal(PtrBase,
Offset)) {
2011 PtrBase =
MI.getOperand(2).getReg();
2015 MachineBasicBlock *
MBB =
MI.getParent();
2017 const unsigned Opc = IsAppend ? AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
2021 if (!RBI.constrainGenericRegister(PtrBase, AMDGPU::SReg_32RegClass, *MRI))
2028 MI.eraseFromParent();
2032bool AMDGPUInstructionSelector::selectInitWholeWave(
MachineInstr &
MI)
const {
2033 MachineFunction *
MF =
MI.getMF();
2034 SIMachineFunctionInfo *MFInfo =
MF->getInfo<SIMachineFunctionInfo>();
2045 TFE = TexFailCtrl & 0x1;
2047 LWE = TexFailCtrl & 0x2;
2050 return TexFailCtrl == 0;
2053bool AMDGPUInstructionSelector::selectImageIntrinsic(
2055 MachineBasicBlock *
MBB =
MI.getParent();
2061 Register ResultDef =
MI.getOperand(0).getReg();
2062 if (MRI->use_nodbg_empty(ResultDef))
2066 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
2074 const unsigned ArgOffset =
MI.getNumExplicitDefs() + 1;
2076 Register VDataIn = AMDGPU::NoRegister;
2077 Register VDataOut = AMDGPU::NoRegister;
2079 int NumVDataDwords = -1;
2080 bool IsD16 =
MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16 ||
2081 MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16;
2087 Unorm =
MI.getOperand(ArgOffset + Intr->
UnormIndex).getImm() != 0;
2091 bool IsTexFail =
false;
2093 TFE, LWE, IsTexFail))
2096 const int Flags =
MI.getOperand(ArgOffset + Intr->
NumArgs).getImm();
2097 const bool IsA16 = (
Flags & 1) != 0;
2098 const bool IsG16 = (
Flags & 2) != 0;
2101 if (IsA16 && !STI.hasG16() && !IsG16)
2105 unsigned DMaskLanes = 0;
2107 if (BaseOpcode->
Atomic) {
2109 VDataOut =
MI.getOperand(0).getReg();
2110 VDataIn =
MI.getOperand(2).getReg();
2111 LLT Ty = MRI->getType(VDataIn);
2114 const bool Is64Bit = BaseOpcode->
AtomicX2 ?
2119 assert(
MI.getOperand(3).getReg() == AMDGPU::NoRegister);
2121 DMask = Is64Bit ? 0xf : 0x3;
2122 NumVDataDwords = Is64Bit ? 4 : 2;
2124 DMask = Is64Bit ? 0x3 : 0x1;
2125 NumVDataDwords = Is64Bit ? 2 : 1;
2128 DMask =
MI.getOperand(ArgOffset + Intr->
DMaskIndex).getImm();
2131 if (BaseOpcode->
Store) {
2132 VDataIn =
MI.getOperand(1).getReg();
2133 VDataTy = MRI->getType(VDataIn);
2138 VDataOut =
MI.getOperand(0).getReg();
2139 VDataTy = MRI->getType(VDataOut);
2140 NumVDataDwords = DMaskLanes;
2142 if (IsD16 && !STI.hasUnpackedD16VMem())
2143 NumVDataDwords = (DMaskLanes + 1) / 2;
2148 if (Subtarget->hasG16() && IsG16) {
2149 const AMDGPU::MIMGG16MappingInfo *G16MappingInfo =
2152 IntrOpcode = G16MappingInfo->
G16;
2156 assert((!IsTexFail || DMaskLanes >= 1) &&
"should have legalized this");
2166 int NumVAddrRegs = 0;
2167 int NumVAddrDwords = 0;
2170 MachineOperand &AddrOp =
MI.getOperand(ArgOffset +
I);
2171 if (!AddrOp.
isReg())
2179 NumVAddrDwords += (MRI->getType(Addr).getSizeInBits() + 31) / 32;
2186 NumVAddrRegs != 1 &&
2187 (STI.hasPartialNSAEncoding() ? NumVAddrDwords >= NumVAddrRegs
2188 : NumVAddrDwords == NumVAddrRegs);
2189 if (UseNSA && !STI.hasFeature(AMDGPU::FeatureNSAEncoding)) {
2200 NumVDataDwords, NumVAddrDwords);
2201 }
else if (IsGFX11Plus) {
2203 UseNSA ? AMDGPU::MIMGEncGfx11NSA
2204 : AMDGPU::MIMGEncGfx11Default,
2205 NumVDataDwords, NumVAddrDwords);
2206 }
else if (IsGFX10Plus) {
2208 UseNSA ? AMDGPU::MIMGEncGfx10NSA
2209 : AMDGPU::MIMGEncGfx10Default,
2210 NumVDataDwords, NumVAddrDwords);
2212 if (Subtarget->hasGFX90AInsts()) {
2214 NumVDataDwords, NumVAddrDwords);
2218 <<
"requested image instruction is not supported on this GPU\n");
2225 NumVDataDwords, NumVAddrDwords);
2228 NumVDataDwords, NumVAddrDwords);
2238 const bool Is64 = MRI->getType(VDataOut).getSizeInBits() == 64;
2240 Register TmpReg = MRI->createVirtualRegister(
2241 Is64 ? &AMDGPU::VReg_128RegClass : &AMDGPU::VReg_64RegClass);
2242 unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
2245 if (!MRI->use_empty(VDataOut)) {
2258 for (
int I = 0;
I != NumVAddrRegs; ++
I) {
2259 MachineOperand &SrcOp =
MI.getOperand(ArgOffset + Intr->
VAddrStart +
I);
2260 if (SrcOp.
isReg()) {
2279 STI.hasFeature(AMDGPU::FeatureR128A16) ? -1 : 0);
2281 MIB.
addImm(IsA16 ? -1 : 0);
2283 if (!Subtarget->hasGFX90AInsts()) {
2295 MIB.
addImm(IsD16 ? -1 : 0);
2297 MI.eraseFromParent();
2299 TII.enforceOperandRCAlignment(*MIB, AMDGPU::OpName::vaddr);
2305bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic(
2311 MachineBasicBlock *
MBB =
MI.getParent();
2316 unsigned Offset =
MI.getOperand(6).getImm();
2320 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2321 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
2322 Opc = AMDGPU::DS_BVH_STACK_RTN_B32;
2324 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
2325 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP1_RTN_B32;
2327 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
2328 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP2_RTN_B64;
2340 MI.eraseFromParent();
2344bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
2347 switch (IntrinsicID) {
2348 case Intrinsic::amdgcn_end_cf:
2349 return selectEndCfIntrinsic(
I);
2350 case Intrinsic::amdgcn_ds_ordered_add:
2351 case Intrinsic::amdgcn_ds_ordered_swap:
2352 return selectDSOrderedIntrinsic(
I, IntrinsicID);
2353 case Intrinsic::amdgcn_ds_gws_init:
2354 case Intrinsic::amdgcn_ds_gws_barrier:
2355 case Intrinsic::amdgcn_ds_gws_sema_v:
2356 case Intrinsic::amdgcn_ds_gws_sema_br:
2357 case Intrinsic::amdgcn_ds_gws_sema_p:
2358 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2359 return selectDSGWSIntrinsic(
I, IntrinsicID);
2360 case Intrinsic::amdgcn_ds_append:
2361 return selectDSAppendConsume(
I,
true);
2362 case Intrinsic::amdgcn_ds_consume:
2363 return selectDSAppendConsume(
I,
false);
2364 case Intrinsic::amdgcn_init_whole_wave:
2365 return selectInitWholeWave(
I);
2366 case Intrinsic::amdgcn_raw_buffer_load_lds:
2367 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
2368 case Intrinsic::amdgcn_struct_buffer_load_lds:
2369 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds:
2370 return selectBufferLoadLds(
I);
2375 case Intrinsic::amdgcn_load_to_lds:
2376 case Intrinsic::amdgcn_global_load_lds:
2377 return selectGlobalLoadLds(
I);
2378 case Intrinsic::amdgcn_exp_compr:
2379 if (!STI.hasCompressedExport()) {
2381 F.getContext().diagnose(
2382 DiagnosticInfoUnsupported(
F,
"intrinsic not supported on subtarget",
2387 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2388 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
2389 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
2390 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
2391 return selectDSBvhStackIntrinsic(
I);
2392 case Intrinsic::amdgcn_s_barrier_init:
2393 case Intrinsic::amdgcn_s_barrier_signal_var:
2394 return selectNamedBarrierInit(
I, IntrinsicID);
2395 case Intrinsic::amdgcn_s_wakeup_barrier: {
2396 if (!STI.hasSWakeupBarrier()) {
2398 F.getContext().diagnose(
2399 DiagnosticInfoUnsupported(
F,
"intrinsic not supported on subtarget",
2403 return selectNamedBarrierInst(
I, IntrinsicID);
2405 case Intrinsic::amdgcn_s_barrier_join:
2406 case Intrinsic::amdgcn_s_get_named_barrier_state:
2407 return selectNamedBarrierInst(
I, IntrinsicID);
2408 case Intrinsic::amdgcn_s_get_barrier_state:
2409 return selectSGetBarrierState(
I, IntrinsicID);
2410 case Intrinsic::amdgcn_s_barrier_signal_isfirst:
2411 return selectSBarrierSignalIsfirst(
I, IntrinsicID);
2416bool AMDGPUInstructionSelector::selectG_SELECT(
MachineInstr &
I)
const {
2423 Register DstReg =
I.getOperand(0).getReg();
2424 unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI);
2426 const MachineOperand &CCOp =
I.getOperand(1);
2428 if (!isVCC(CCReg, *MRI)) {
2429 unsigned SelectOpcode =
Size == 64 ? AMDGPU::S_CSELECT_B64 :
2430 AMDGPU::S_CSELECT_B32;
2431 MachineInstr *CopySCC =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
2437 if (!MRI->getRegClassOrNull(CCReg))
2438 MRI->setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, *MRI));
2440 .
add(
I.getOperand(2))
2441 .
add(
I.getOperand(3));
2446 I.eraseFromParent();
2455 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
2457 .
add(
I.getOperand(3))
2459 .
add(
I.getOperand(2))
2460 .
add(
I.getOperand(1));
2463 I.eraseFromParent();
2467bool AMDGPUInstructionSelector::selectG_TRUNC(
MachineInstr &
I)
const {
2468 Register DstReg =
I.getOperand(0).getReg();
2469 Register SrcReg =
I.getOperand(1).getReg();
2470 const LLT DstTy = MRI->getType(DstReg);
2471 const LLT SrcTy = MRI->getType(SrcReg);
2474 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
2475 const RegisterBank *DstRB;
2481 DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
2486 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2491 const TargetRegisterClass *SrcRC =
2492 TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB);
2493 const TargetRegisterClass *DstRC =
2494 TRI.getRegClassForSizeOnBank(DstSize, *DstRB);
2495 if (!SrcRC || !DstRC)
2498 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
2499 !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI)) {
2504 if (DstRC == &AMDGPU::VGPR_16RegClass && SrcSize == 32) {
2505 assert(STI.useRealTrue16Insts());
2509 .
addReg(SrcReg, 0, AMDGPU::lo16);
2510 I.eraseFromParent();
2518 Register LoReg = MRI->createVirtualRegister(DstRC);
2519 Register HiReg = MRI->createVirtualRegister(DstRC);
2521 .
addReg(SrcReg, 0, AMDGPU::sub0);
2523 .
addReg(SrcReg, 0, AMDGPU::sub1);
2525 if (IsVALU && STI.hasSDWA()) {
2528 MachineInstr *MovSDWA =
2529 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg)
2539 Register TmpReg0 = MRI->createVirtualRegister(DstRC);
2540 Register TmpReg1 = MRI->createVirtualRegister(DstRC);
2541 Register ImmReg = MRI->createVirtualRegister(DstRC);
2543 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), TmpReg0)
2553 unsigned MovOpc = IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
2554 unsigned AndOpc = IsVALU ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
2555 unsigned OrOpc = IsVALU ? AMDGPU::V_OR_B32_e64 : AMDGPU::S_OR_B32;
2567 And.setOperandDead(3);
2568 Or.setOperandDead(3);
2572 I.eraseFromParent();
2580 unsigned SubRegIdx = DstSize < 32
2581 ?
static_cast<unsigned>(AMDGPU::sub0)
2582 : TRI.getSubRegFromChannel(0, DstSize / 32);
2583 if (SubRegIdx == AMDGPU::NoSubRegister)
2588 const TargetRegisterClass *SrcWithSubRC
2589 = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
2593 if (SrcWithSubRC != SrcRC) {
2594 if (!RBI.constrainGenericRegister(SrcReg, *SrcWithSubRC, *MRI))
2598 I.getOperand(1).setSubReg(SubRegIdx);
2601 I.setDesc(TII.get(TargetOpcode::COPY));
2608 int SignedMask =
static_cast<int>(Mask);
2609 return SignedMask >= -16 && SignedMask <= 64;
2613const RegisterBank *AMDGPUInstructionSelector::getArtifactRegBank(
2622 return &RBI.getRegBankFromRegClass(*RC, LLT());
2626bool AMDGPUInstructionSelector::selectG_SZA_EXT(
MachineInstr &
I)
const {
2627 bool InReg =
I.getOpcode() == AMDGPU::G_SEXT_INREG;
2628 bool Signed =
I.getOpcode() == AMDGPU::G_SEXT || InReg;
2631 const Register DstReg =
I.getOperand(0).getReg();
2632 const Register SrcReg =
I.getOperand(1).getReg();
2634 const LLT DstTy = MRI->getType(DstReg);
2635 const LLT SrcTy = MRI->getType(SrcReg);
2636 const unsigned SrcSize =
I.getOpcode() == AMDGPU::G_SEXT_INREG ?
2643 const RegisterBank *SrcBank = getArtifactRegBank(SrcReg, *MRI, TRI);
2646 if (
I.getOpcode() == AMDGPU::G_ANYEXT) {
2648 return selectCOPY(
I);
2650 const TargetRegisterClass *SrcRC =
2651 TRI.getRegClassForTypeOnBank(SrcTy, *SrcBank);
2652 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
2653 const TargetRegisterClass *DstRC =
2654 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
2656 Register UndefReg = MRI->createVirtualRegister(SrcRC);
2657 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2663 I.eraseFromParent();
2665 return RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) &&
2666 RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI);
2669 if (SrcBank->
getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
2675 MachineInstr *ExtI =
2679 I.eraseFromParent();
2683 const unsigned BFE =
Signed ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2684 MachineInstr *ExtI =
2689 I.eraseFromParent();
2693 if (SrcBank->
getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
2694 const TargetRegisterClass &SrcRC = InReg && DstSize > 32 ?
2695 AMDGPU::SReg_64RegClass : AMDGPU::SReg_32RegClass;
2696 if (!RBI.constrainGenericRegister(SrcReg, SrcRC, *MRI))
2699 if (
Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
2700 const unsigned SextOpc = SrcSize == 8 ?
2701 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
2704 I.eraseFromParent();
2705 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
2710 if (DstSize > 32 && SrcSize == 32) {
2711 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2712 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2727 I.eraseFromParent();
2728 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass,
2732 const unsigned BFE64 =
Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
2733 const unsigned BFE32 =
Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2736 if (DstSize > 32 && (SrcSize <= 32 || InReg)) {
2738 Register ExtReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
2739 Register UndefReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2740 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2742 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2753 I.eraseFromParent();
2754 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, *MRI);
2769 I.eraseFromParent();
2770 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
2804 if (Shuffle->
getOpcode() != AMDGPU::G_SHUFFLE_VECTOR)
2811 assert(Mask.size() == 2);
2813 if (Mask[0] == 1 && Mask[1] <= 1) {
2821bool AMDGPUInstructionSelector::selectG_FPEXT(
MachineInstr &
I)
const {
2822 if (!Subtarget->hasSALUFloatInsts())
2825 Register Dst =
I.getOperand(0).getReg();
2826 const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
2827 if (DstRB->
getID() != AMDGPU::SGPRRegBankID)
2830 Register Src =
I.getOperand(1).getReg();
2836 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_CVT_HI_F32_F16), Dst)
2838 I.eraseFromParent();
2839 return RBI.constrainGenericRegister(Dst, AMDGPU::SReg_32RegClass, *MRI);
2846bool AMDGPUInstructionSelector::selectG_FNEG(
MachineInstr &
MI)
const {
2859 const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
2860 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2865 MachineInstr *Fabs =
getOpcodeDef(TargetOpcode::G_FABS, Src, *MRI);
2869 if (!RBI.constrainGenericRegister(Src, AMDGPU::SReg_64RegClass, *MRI) ||
2870 !RBI.constrainGenericRegister(Dst, AMDGPU::SReg_64RegClass, *MRI))
2873 MachineBasicBlock *BB =
MI.getParent();
2875 Register LoReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2876 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2877 Register ConstReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2878 Register OpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2880 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2881 .
addReg(Src, 0, AMDGPU::sub0);
2882 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2883 .
addReg(Src, 0, AMDGPU::sub1);
2884 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2888 unsigned Opc = Fabs ? AMDGPU::S_OR_B32 : AMDGPU::S_XOR_B32;
2893 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2898 MI.eraseFromParent();
2903bool AMDGPUInstructionSelector::selectG_FABS(
MachineInstr &
MI)
const {
2905 const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
2906 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2911 MachineBasicBlock *BB =
MI.getParent();
2913 Register LoReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2914 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2915 Register ConstReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2916 Register OpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2918 if (!RBI.constrainGenericRegister(Src, AMDGPU::SReg_64RegClass, *MRI) ||
2919 !RBI.constrainGenericRegister(Dst, AMDGPU::SReg_64RegClass, *MRI))
2922 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2923 .
addReg(Src, 0, AMDGPU::sub0);
2924 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2925 .
addReg(Src, 0, AMDGPU::sub1);
2926 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2931 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_AND_B32), OpReg)
2935 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2941 MI.eraseFromParent();
2946 return MI.getOpcode() == TargetOpcode::G_CONSTANT;
2949void AMDGPUInstructionSelector::getAddrModeInfo(
const MachineInstr &Load,
2952 unsigned OpNo =
Load.getOpcode() == AMDGPU::G_PREFETCH ? 0 : 1;
2953 const MachineInstr *PtrMI =
2954 MRI.getUniqueVRegDef(
Load.getOperand(OpNo).getReg());
2958 if (PtrMI->
getOpcode() != TargetOpcode::G_PTR_ADD)
2963 for (
unsigned i = 1; i != 3; ++i) {
2964 const MachineOperand &GEPOp = PtrMI->
getOperand(i);
2965 const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.
getReg());
2970 assert(GEPInfo.Imm == 0);
2974 const RegisterBank *OpBank = RBI.getRegBank(GEPOp.
getReg(), MRI, TRI);
2975 if (OpBank->
getID() == AMDGPU::SGPRRegBankID)
2976 GEPInfo.SgprParts.push_back(GEPOp.
getReg());
2978 GEPInfo.VgprParts.push_back(GEPOp.
getReg());
2982 getAddrModeInfo(*PtrMI, MRI, AddrInfo);
2985bool AMDGPUInstructionSelector::isSGPR(
Register Reg)
const {
2986 return RBI.getRegBank(
Reg, *MRI, TRI)->getID() == AMDGPU::SGPRRegBankID;
2989bool AMDGPUInstructionSelector::isInstrUniform(
const MachineInstr &
MI)
const {
2990 if (!
MI.hasOneMemOperand())
2993 const MachineMemOperand *MMO = *
MI.memoperands_begin();
3006 if (
MI.getOpcode() == AMDGPU::G_PREFETCH)
3007 return RBI.getRegBank(
MI.getOperand(0).getReg(), *MRI, TRI)->getID() ==
3008 AMDGPU::SGPRRegBankID;
3011 return I &&
I->getMetadata(
"amdgpu.uniform");
3015 for (
const GEPInfo &GEPInfo : AddrInfo) {
3016 if (!GEPInfo.VgprParts.empty())
3022void AMDGPUInstructionSelector::initM0(
MachineInstr &
I)
const {
3023 const LLT PtrTy = MRI->getType(
I.getOperand(1).getReg());
3026 STI.ldsRequiresM0Init()) {
3030 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
3035bool AMDGPUInstructionSelector::selectG_LOAD_STORE_ATOMICRMW(
3042 if (
Reg.isPhysical())
3046 const unsigned Opcode =
MI.getOpcode();
3048 if (Opcode == AMDGPU::COPY)
3051 if (Opcode == AMDGPU::G_AND || Opcode == AMDGPU::G_OR ||
3052 Opcode == AMDGPU::G_XOR)
3057 return GI->is(Intrinsic::amdgcn_class);
3059 return Opcode == AMDGPU::G_ICMP || Opcode == AMDGPU::G_FCMP;
3062bool AMDGPUInstructionSelector::selectG_BRCOND(
MachineInstr &
I)
const {
3064 MachineOperand &CondOp =
I.getOperand(0);
3070 const TargetRegisterClass *ConstrainRC;
3077 if (!isVCC(CondReg, *MRI)) {
3081 CondPhysReg = AMDGPU::SCC;
3082 BrOpcode = AMDGPU::S_CBRANCH_SCC1;
3083 ConstrainRC = &AMDGPU::SReg_32RegClass;
3090 const bool Is64 = STI.isWave64();
3091 const unsigned Opcode = Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
3092 const Register Exec = Is64 ? AMDGPU::EXEC : AMDGPU::EXEC_LO;
3094 Register TmpReg = MRI->createVirtualRegister(TRI.getBoolRC());
3095 BuildMI(*BB, &
I,
DL, TII.get(Opcode), TmpReg)
3102 CondPhysReg = TRI.getVCC();
3103 BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
3104 ConstrainRC = TRI.getBoolRC();
3107 if (!MRI->getRegClassOrNull(CondReg))
3108 MRI->setRegClass(CondReg, ConstrainRC);
3110 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CondPhysReg)
3113 .
addMBB(
I.getOperand(1).getMBB());
3115 I.eraseFromParent();
3119bool AMDGPUInstructionSelector::selectG_GLOBAL_VALUE(
3121 Register DstReg =
I.getOperand(0).getReg();
3122 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3123 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3124 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32));
3128 return RBI.constrainGenericRegister(
3129 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, *MRI);
3132bool AMDGPUInstructionSelector::selectG_PTRMASK(
MachineInstr &
I)
const {
3133 Register DstReg =
I.getOperand(0).getReg();
3134 Register SrcReg =
I.getOperand(1).getReg();
3135 Register MaskReg =
I.getOperand(2).getReg();
3136 LLT Ty = MRI->getType(DstReg);
3137 LLT MaskTy = MRI->getType(MaskReg);
3141 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3142 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
3143 const RegisterBank *MaskRB = RBI.getRegBank(MaskReg, *MRI, TRI);
3144 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3150 APInt MaskOnes =
VT->getKnownOnes(MaskReg).zext(64);
3154 const bool CanCopyLow32 = (MaskOnes & MaskLo32) == MaskLo32;
3155 const bool CanCopyHi32 = (MaskOnes & MaskHi32) == MaskHi32;
3158 !CanCopyLow32 && !CanCopyHi32) {
3159 auto MIB =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_AND_B64), DstReg)
3163 I.eraseFromParent();
3167 unsigned NewOpc = IsVGPR ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
3168 const TargetRegisterClass &RegRC
3169 = IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
3171 const TargetRegisterClass *DstRC = TRI.getRegClassForTypeOnBank(Ty, *DstRB);
3172 const TargetRegisterClass *SrcRC = TRI.getRegClassForTypeOnBank(Ty, *SrcRB);
3173 const TargetRegisterClass *MaskRC =
3174 TRI.getRegClassForTypeOnBank(MaskTy, *MaskRB);
3176 if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
3177 !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
3178 !RBI.constrainGenericRegister(MaskReg, *MaskRC, *MRI))
3183 "ptrmask should have been narrowed during legalize");
3185 auto NewOp =
BuildMI(*BB, &
I,
DL, TII.get(NewOpc), DstReg)
3191 I.eraseFromParent();
3195 Register HiReg = MRI->createVirtualRegister(&RegRC);
3196 Register LoReg = MRI->createVirtualRegister(&RegRC);
3199 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), LoReg)
3200 .
addReg(SrcReg, 0, AMDGPU::sub0);
3201 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), HiReg)
3202 .
addReg(SrcReg, 0, AMDGPU::sub1);
3211 Register MaskLo = MRI->createVirtualRegister(&RegRC);
3212 MaskedLo = MRI->createVirtualRegister(&RegRC);
3214 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskLo)
3215 .
addReg(MaskReg, 0, AMDGPU::sub0);
3216 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedLo)
3225 Register MaskHi = MRI->createVirtualRegister(&RegRC);
3226 MaskedHi = MRI->createVirtualRegister(&RegRC);
3228 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskHi)
3229 .
addReg(MaskReg, 0, AMDGPU::sub1);
3230 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedHi)
3235 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
3240 I.eraseFromParent();
3246static std::pair<Register, unsigned>
3253 std::tie(IdxBaseReg,
Offset) =
3255 if (IdxBaseReg == AMDGPU::NoRegister) {
3259 IdxBaseReg = IdxReg;
3266 if (
static_cast<unsigned>(
Offset) >= SubRegs.
size())
3267 return std::pair(IdxReg, SubRegs[0]);
3268 return std::pair(IdxBaseReg, SubRegs[
Offset]);
3271bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT(
3277 LLT DstTy = MRI->getType(DstReg);
3278 LLT SrcTy = MRI->getType(SrcReg);
3280 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3281 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
3282 const RegisterBank *IdxRB = RBI.getRegBank(IdxReg, *MRI, TRI);
3286 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3289 const TargetRegisterClass *SrcRC =
3290 TRI.getRegClassForTypeOnBank(SrcTy, *SrcRB);
3291 const TargetRegisterClass *DstRC =
3292 TRI.getRegClassForTypeOnBank(DstTy, *DstRB);
3293 if (!SrcRC || !DstRC)
3295 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
3296 !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
3297 !RBI.constrainGenericRegister(IdxReg, AMDGPU::SReg_32RegClass, *MRI))
3300 MachineBasicBlock *BB =
MI.getParent();
3308 if (SrcRB->
getID() == AMDGPU::SGPRRegBankID) {
3312 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3315 unsigned Opc = Is64 ? AMDGPU::S_MOVRELS_B64 : AMDGPU::S_MOVRELS_B32;
3319 MI.eraseFromParent();
3326 if (!STI.useVGPRIndexMode()) {
3327 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3329 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::V_MOVRELS_B32_e32), DstReg)
3332 MI.eraseFromParent();
3336 const MCInstrDesc &GPRIDXDesc =
3337 TII.getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*SrcRC),
true);
3343 MI.eraseFromParent();
3348bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT(
3355 LLT VecTy = MRI->getType(DstReg);
3356 LLT ValTy = MRI->getType(ValReg);
3360 const RegisterBank *VecRB = RBI.getRegBank(VecReg, *MRI, TRI);
3361 const RegisterBank *ValRB = RBI.getRegBank(ValReg, *MRI, TRI);
3362 const RegisterBank *IdxRB = RBI.getRegBank(IdxReg, *MRI, TRI);
3368 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3371 const TargetRegisterClass *VecRC =
3372 TRI.getRegClassForTypeOnBank(VecTy, *VecRB);
3373 const TargetRegisterClass *ValRC =
3374 TRI.getRegClassForTypeOnBank(ValTy, *ValRB);
3376 if (!RBI.constrainGenericRegister(VecReg, *VecRC, *MRI) ||
3377 !RBI.constrainGenericRegister(DstReg, *VecRC, *MRI) ||
3378 !RBI.constrainGenericRegister(ValReg, *ValRC, *MRI) ||
3379 !RBI.constrainGenericRegister(IdxReg, AMDGPU::SReg_32RegClass, *MRI))
3382 if (VecRB->
getID() == AMDGPU::VGPRRegBankID && ValSize != 32)
3386 std::tie(IdxReg,
SubReg) =
3389 const bool IndexMode = VecRB->
getID() == AMDGPU::VGPRRegBankID &&
3390 STI.useVGPRIndexMode();
3392 MachineBasicBlock *BB =
MI.getParent();
3396 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3399 const MCInstrDesc &RegWriteOp = TII.getIndirectRegWriteMovRelPseudo(
3400 VecSize, ValSize, VecRB->
getID() == AMDGPU::SGPRRegBankID);
3405 MI.eraseFromParent();
3409 const MCInstrDesc &GPRIDXDesc =
3410 TII.getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*VecRC),
false);
3417 MI.eraseFromParent();
3421bool AMDGPUInstructionSelector::selectBufferLoadLds(
MachineInstr &
MI)
const {
3422 if (!Subtarget->hasVMemToLDSLoad())
3425 unsigned Size =
MI.getOperand(3).getImm();
3428 const bool HasVIndex =
MI.getNumOperands() == 9;
3432 VIndex =
MI.getOperand(4).getReg();
3436 Register VOffset =
MI.getOperand(4 + OpOffset).getReg();
3437 std::optional<ValueAndVReg> MaybeVOffset =
3439 const bool HasVOffset = !MaybeVOffset || MaybeVOffset->Value.getZExtValue();
3445 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_BOTHEN
3446 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_IDXEN
3447 : HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFEN
3448 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFSET;
3451 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_BOTHEN
3452 : AMDGPU::BUFFER_LOAD_USHORT_LDS_IDXEN
3453 : HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFEN
3454 : AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFSET;
3457 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_BOTHEN
3458 : AMDGPU::BUFFER_LOAD_DWORD_LDS_IDXEN
3459 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN
3460 : AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET;
3463 if (!Subtarget->hasLDSLoadB96_B128())
3466 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_BOTHEN
3467 : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_IDXEN
3468 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFEN
3469 : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFSET;
3472 if (!Subtarget->hasLDSLoadB96_B128())
3475 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_BOTHEN
3476 : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_IDXEN
3477 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFEN
3478 : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFSET;
3482 MachineBasicBlock *
MBB =
MI.getParent();
3485 .
add(
MI.getOperand(2));
3489 if (HasVIndex && HasVOffset) {
3490 Register IdxReg = MRI->createVirtualRegister(TRI.getVGPR64Class());
3491 BuildMI(*
MBB, &*MIB,
DL, TII.get(AMDGPU::REG_SEQUENCE), IdxReg)
3498 }
else if (HasVIndex) {
3500 }
else if (HasVOffset) {
3504 MIB.
add(
MI.getOperand(1));
3505 MIB.
add(
MI.getOperand(5 + OpOffset));
3506 MIB.
add(
MI.getOperand(6 + OpOffset));
3508 unsigned Aux =
MI.getOperand(7 + OpOffset).getImm();
3516 MachineMemOperand *LoadMMO = *
MI.memoperands_begin();
3521 MachinePointerInfo StorePtrI = LoadPtrI;
3532 MachineMemOperand *StoreMMO =
3538 MI.eraseFromParent();
3550 if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3556 return Def->getOperand(1).getReg();
3570 if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3578 return Def->getOperand(1).getReg();
3580 if (
VT->signBitIsZero(
Reg))
3581 return matchZeroExtendFromS32(
Reg);
3589AMDGPUInstructionSelector::matchZeroExtendFromS32OrS32(
Register Reg)
const {
3591 : matchZeroExtendFromS32(
Reg);
3597AMDGPUInstructionSelector::matchSignExtendFromS32OrS32(
Register Reg)
const {
3599 : matchSignExtendFromS32(
Reg);
3603AMDGPUInstructionSelector::matchExtendFromS32OrS32(
Register Reg,
3604 bool IsSigned)
const {
3606 return matchSignExtendFromS32OrS32(
Reg);
3608 return matchZeroExtendFromS32OrS32(
Reg);
3618 if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3625 return Def->getOperand(1).getReg();
3630bool AMDGPUInstructionSelector::selectGlobalLoadLds(
MachineInstr &
MI)
const{
3631 if (!Subtarget->hasVMemToLDSLoad())
3635 unsigned Size =
MI.getOperand(3).getImm();
3641 Opc = AMDGPU::GLOBAL_LOAD_LDS_UBYTE;
3644 Opc = AMDGPU::GLOBAL_LOAD_LDS_USHORT;
3647 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORD;
3650 if (!Subtarget->hasLDSLoadB96_B128())
3652 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX3;
3655 if (!Subtarget->hasLDSLoadB96_B128())
3657 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX4;
3661 MachineBasicBlock *
MBB =
MI.getParent();
3664 .
add(
MI.getOperand(2));
3670 if (!isSGPR(Addr)) {
3672 if (isSGPR(AddrDef->Reg)) {
3673 Addr = AddrDef->Reg;
3674 }
else if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
3677 if (isSGPR(SAddr)) {
3678 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
3679 if (
Register Off = matchZeroExtendFromS32(PtrBaseOffset)) {
3690 VOffset = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3702 MIB.
add(
MI.getOperand(4));
3704 unsigned Aux =
MI.getOperand(5).getImm();
3707 MachineMemOperand *LoadMMO = *
MI.memoperands_begin();
3709 LoadPtrI.
Offset =
MI.getOperand(4).getImm();
3710 MachinePointerInfo StorePtrI = LoadPtrI;
3719 MachineMemOperand *StoreMMO =
3721 sizeof(int32_t),
Align(4));
3725 MI.eraseFromParent();
3729bool AMDGPUInstructionSelector::selectBVHIntersectRayIntrinsic(
3731 unsigned OpcodeOpIdx =
3732 MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY ? 1 : 3;
3733 MI.setDesc(TII.get(
MI.getOperand(OpcodeOpIdx).getImm()));
3734 MI.removeOperand(OpcodeOpIdx);
3735 MI.addImplicitDefUseOperands(*
MI.getMF());
3741bool AMDGPUInstructionSelector::selectSMFMACIntrin(
MachineInstr &
MI)
const {
3744 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
3745 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_F16_e64;
3747 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
3748 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_F16_e64;
3750 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
3751 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_BF16_e64;
3753 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
3754 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_BF16_e64;
3756 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
3757 Opc = AMDGPU::V_SMFMAC_I32_16X16X64_I8_e64;
3759 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
3760 Opc = AMDGPU::V_SMFMAC_I32_32X32X32_I8_e64;
3762 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
3763 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_BF8_e64;
3765 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
3766 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_FP8_e64;
3768 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
3769 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_BF8_e64;
3771 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
3772 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_FP8_e64;
3774 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
3775 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_BF8_e64;
3777 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
3778 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_FP8_e64;
3780 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
3781 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_BF8_e64;
3783 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
3784 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_FP8_e64;
3786 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:
3787 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_F16_e64;
3789 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:
3790 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_F16_e64;
3792 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:
3793 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF16_e64;
3795 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:
3796 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF16_e64;
3798 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:
3799 Opc = AMDGPU::V_SMFMAC_I32_16X16X128_I8_e64;
3801 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:
3802 Opc = AMDGPU::V_SMFMAC_I32_32X32X64_I8_e64;
3804 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:
3805 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_BF8_BF8_e64;
3807 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:
3808 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_BF8_FP8_e64;
3810 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:
3811 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_FP8_BF8_e64;
3813 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:
3814 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_FP8_FP8_e64;
3816 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:
3817 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_BF8_BF8_e64;
3819 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
3820 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_BF8_FP8_e64;
3822 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
3823 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_FP8_BF8_e64;
3825 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8:
3826 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_FP8_FP8_e64;
3832 auto VDst_In =
MI.getOperand(4);
3834 MI.setDesc(TII.get(
Opc));
3835 MI.removeOperand(4);
3836 MI.removeOperand(1);
3837 MI.addOperand(VDst_In);
3838 MI.addImplicitDefUseOperands(*
MI.getMF());
3839 const MCInstrDesc &MCID =
MI.getDesc();
3841 MI.getOperand(0).setIsEarlyClobber(
true);
3846bool AMDGPUInstructionSelector::selectPermlaneSwapIntrin(
3848 if (IntrID == Intrinsic::amdgcn_permlane16_swap &&
3849 !Subtarget->hasPermlane16Swap())
3851 if (IntrID == Intrinsic::amdgcn_permlane32_swap &&
3852 !Subtarget->hasPermlane32Swap())
3855 unsigned Opcode = IntrID == Intrinsic::amdgcn_permlane16_swap
3856 ? AMDGPU::V_PERMLANE16_SWAP_B32_e64
3857 : AMDGPU::V_PERMLANE32_SWAP_B32_e64;
3859 MI.removeOperand(2);
3860 MI.setDesc(TII.get(Opcode));
3863 MachineOperand &FI =
MI.getOperand(4);
3869bool AMDGPUInstructionSelector::selectWaveAddress(
MachineInstr &
MI)
const {
3872 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3873 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3874 MachineBasicBlock *
MBB =
MI.getParent();
3878 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_LSHRREV_B32_e64), DstReg)
3879 .
addImm(Subtarget->getWavefrontSizeLog2())
3884 .
addImm(Subtarget->getWavefrontSizeLog2())
3888 const TargetRegisterClass &RC =
3889 IsVALU ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
3890 if (!RBI.constrainGenericRegister(DstReg, RC, *MRI))
3893 MI.eraseFromParent();
3902 unsigned NumOpcodes = 0;
3915 const uint8_t SrcBits[3] = { 0xf0, 0xcc, 0xaa };
3926 for (
unsigned I = 0;
I < Src.size(); ++
I) {
3940 if (Src.size() == 3) {
3947 for (
unsigned I = 0;
I < Src.size(); ++
I) {
3948 if (Src[
I] ==
LHS) {
3958 Bits = SrcBits[Src.size()];
3964 switch (
MI->getOpcode()) {
3965 case TargetOpcode::G_AND:
3966 case TargetOpcode::G_OR:
3967 case TargetOpcode::G_XOR: {
3972 if (!getOperandBits(
LHS, LHSBits) ||
3973 !getOperandBits(
RHS, RHSBits)) {
3975 return std::make_pair(0, 0);
3981 NumOpcodes +=
Op.first;
3982 LHSBits =
Op.second;
3987 NumOpcodes +=
Op.first;
3988 RHSBits =
Op.second;
3993 return std::make_pair(0, 0);
3997 switch (
MI->getOpcode()) {
3998 case TargetOpcode::G_AND:
3999 TTbl = LHSBits & RHSBits;
4001 case TargetOpcode::G_OR:
4002 TTbl = LHSBits | RHSBits;
4004 case TargetOpcode::G_XOR:
4005 TTbl = LHSBits ^ RHSBits;
4011 return std::make_pair(NumOpcodes + 1, TTbl);
4014bool AMDGPUInstructionSelector::selectBITOP3(
MachineInstr &
MI)
const {
4015 if (!Subtarget->hasBitOp3Insts())
4019 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
4020 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
4026 unsigned NumOpcodes;
4028 std::tie(NumOpcodes, TTbl) =
BitOp3_Op(DstReg, Src, *MRI);
4032 if (NumOpcodes < 2 || Src.empty())
4035 const bool IsB32 = MRI->getType(DstReg) ==
LLT::scalar(32);
4036 if (NumOpcodes == 2 && IsB32) {
4044 }
else if (NumOpcodes < 4) {
4051 unsigned Opc = IsB32 ? AMDGPU::V_BITOP3_B32_e64 : AMDGPU::V_BITOP3_B16_e64;
4052 if (!IsB32 && STI.hasTrue16BitInsts())
4053 Opc = STI.useRealTrue16Insts() ? AMDGPU::V_BITOP3_B16_gfx1250_t16_e64
4054 : AMDGPU::V_BITOP3_B16_gfx1250_fake16_e64;
4055 unsigned CBL = STI.getConstantBusLimit(
Opc);
4056 MachineBasicBlock *
MBB =
MI.getParent();
4059 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4060 const RegisterBank *RB = RBI.getRegBank(Src[
I], *MRI, TRI);
4061 if (RB->
getID() != AMDGPU::SGPRRegBankID)
4067 Register NewReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4078 while (Src.size() < 3)
4079 Src.push_back(Src[0]);
4096 MI.eraseFromParent();
4101bool AMDGPUInstructionSelector::selectStackRestore(
MachineInstr &
MI)
const {
4103 if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, *MRI))
4106 MachineInstr *
DefMI = MRI->getVRegDef(SrcReg);
4108 Subtarget->getTargetLowering()->getStackPointerRegisterToSaveRestore();
4110 MachineBasicBlock *
MBB =
MI.getParent();
4114 WaveAddr = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
4117 .
addImm(Subtarget->getWavefrontSizeLog2())
4124 MI.eraseFromParent();
4130 if (!
I.isPreISelOpcode()) {
4132 return selectCOPY(
I);
4136 switch (
I.getOpcode()) {
4137 case TargetOpcode::G_AND:
4138 case TargetOpcode::G_OR:
4139 case TargetOpcode::G_XOR:
4140 if (selectBITOP3(
I))
4144 return selectG_AND_OR_XOR(
I);
4145 case TargetOpcode::G_ADD:
4146 case TargetOpcode::G_SUB:
4147 case TargetOpcode::G_PTR_ADD:
4150 return selectG_ADD_SUB(
I);
4151 case TargetOpcode::G_UADDO:
4152 case TargetOpcode::G_USUBO:
4153 case TargetOpcode::G_UADDE:
4154 case TargetOpcode::G_USUBE:
4155 return selectG_UADDO_USUBO_UADDE_USUBE(
I);
4156 case AMDGPU::G_AMDGPU_MAD_U64_U32:
4157 case AMDGPU::G_AMDGPU_MAD_I64_I32:
4158 return selectG_AMDGPU_MAD_64_32(
I);
4159 case TargetOpcode::G_INTTOPTR:
4160 case TargetOpcode::G_BITCAST:
4161 case TargetOpcode::G_PTRTOINT:
4162 case TargetOpcode::G_FREEZE:
4163 return selectCOPY(
I);
4164 case TargetOpcode::G_FNEG:
4167 return selectG_FNEG(
I);
4168 case TargetOpcode::G_FABS:
4171 return selectG_FABS(
I);
4172 case TargetOpcode::G_EXTRACT:
4173 return selectG_EXTRACT(
I);
4174 case TargetOpcode::G_MERGE_VALUES:
4175 case TargetOpcode::G_CONCAT_VECTORS:
4176 return selectG_MERGE_VALUES(
I);
4177 case TargetOpcode::G_UNMERGE_VALUES:
4178 return selectG_UNMERGE_VALUES(
I);
4179 case TargetOpcode::G_BUILD_VECTOR:
4180 case TargetOpcode::G_BUILD_VECTOR_TRUNC:
4181 return selectG_BUILD_VECTOR(
I);
4182 case TargetOpcode::G_IMPLICIT_DEF:
4183 return selectG_IMPLICIT_DEF(
I);
4184 case TargetOpcode::G_INSERT:
4185 return selectG_INSERT(
I);
4186 case TargetOpcode::G_INTRINSIC:
4187 case TargetOpcode::G_INTRINSIC_CONVERGENT:
4188 return selectG_INTRINSIC(
I);
4189 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
4190 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
4191 return selectG_INTRINSIC_W_SIDE_EFFECTS(
I);
4192 case TargetOpcode::G_ICMP:
4193 case TargetOpcode::G_FCMP:
4194 if (selectG_ICMP_or_FCMP(
I))
4197 case TargetOpcode::G_LOAD:
4198 case TargetOpcode::G_ZEXTLOAD:
4199 case TargetOpcode::G_SEXTLOAD:
4200 case TargetOpcode::G_STORE:
4201 case TargetOpcode::G_ATOMIC_CMPXCHG:
4202 case TargetOpcode::G_ATOMICRMW_XCHG:
4203 case TargetOpcode::G_ATOMICRMW_ADD:
4204 case TargetOpcode::G_ATOMICRMW_SUB:
4205 case TargetOpcode::G_ATOMICRMW_AND:
4206 case TargetOpcode::G_ATOMICRMW_OR:
4207 case TargetOpcode::G_ATOMICRMW_XOR:
4208 case TargetOpcode::G_ATOMICRMW_MIN:
4209 case TargetOpcode::G_ATOMICRMW_MAX:
4210 case TargetOpcode::G_ATOMICRMW_UMIN:
4211 case TargetOpcode::G_ATOMICRMW_UMAX:
4212 case TargetOpcode::G_ATOMICRMW_UINC_WRAP:
4213 case TargetOpcode::G_ATOMICRMW_UDEC_WRAP:
4214 case TargetOpcode::G_ATOMICRMW_USUB_COND:
4215 case TargetOpcode::G_ATOMICRMW_USUB_SAT:
4216 case TargetOpcode::G_ATOMICRMW_FADD:
4217 case TargetOpcode::G_ATOMICRMW_FMIN:
4218 case TargetOpcode::G_ATOMICRMW_FMAX:
4219 return selectG_LOAD_STORE_ATOMICRMW(
I);
4220 case TargetOpcode::G_SELECT:
4221 return selectG_SELECT(
I);
4222 case TargetOpcode::G_TRUNC:
4223 return selectG_TRUNC(
I);
4224 case TargetOpcode::G_SEXT:
4225 case TargetOpcode::G_ZEXT:
4226 case TargetOpcode::G_ANYEXT:
4227 case TargetOpcode::G_SEXT_INREG:
4231 if (MRI->getType(
I.getOperand(1).getReg()) !=
LLT::scalar(1) &&
4234 return selectG_SZA_EXT(
I);
4235 case TargetOpcode::G_FPEXT:
4236 if (selectG_FPEXT(
I))
4239 case TargetOpcode::G_BRCOND:
4240 return selectG_BRCOND(
I);
4241 case TargetOpcode::G_GLOBAL_VALUE:
4242 return selectG_GLOBAL_VALUE(
I);
4243 case TargetOpcode::G_PTRMASK:
4244 return selectG_PTRMASK(
I);
4245 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4246 return selectG_EXTRACT_VECTOR_ELT(
I);
4247 case TargetOpcode::G_INSERT_VECTOR_ELT:
4248 return selectG_INSERT_VECTOR_ELT(
I);
4249 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
4250 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
4251 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET:
4252 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
4253 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
4256 assert(Intr &&
"not an image intrinsic with image pseudo");
4257 return selectImageIntrinsic(
I, Intr);
4259 case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY:
4260 case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
4261 case AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY:
4262 return selectBVHIntersectRayIntrinsic(
I);
4263 case AMDGPU::G_SBFX:
4264 case AMDGPU::G_UBFX:
4265 return selectG_SBFX_UBFX(
I);
4266 case AMDGPU::G_SI_CALL:
4267 I.setDesc(TII.get(AMDGPU::SI_CALL));
4269 case AMDGPU::G_AMDGPU_WAVE_ADDRESS:
4270 return selectWaveAddress(
I);
4271 case AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_RETURN: {
4272 I.setDesc(TII.get(AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN));
4275 case AMDGPU::G_STACKRESTORE:
4276 return selectStackRestore(
I);
4278 return selectPHI(
I);
4279 case AMDGPU::G_AMDGPU_COPY_SCC_VCC:
4280 return selectCOPY_SCC_VCC(
I);
4281 case AMDGPU::G_AMDGPU_COPY_VCC_SCC:
4282 return selectCOPY_VCC_SCC(
I);
4283 case AMDGPU::G_AMDGPU_READANYLANE:
4284 return selectReadAnyLane(
I);
4285 case TargetOpcode::G_CONSTANT:
4286 case TargetOpcode::G_FCONSTANT:
4294AMDGPUInstructionSelector::selectVCSRC(
MachineOperand &Root)
const {
4301std::pair<Register, unsigned> AMDGPUInstructionSelector::selectVOP3ModsImpl(
4302 Register Src,
bool IsCanonicalizing,
bool AllowAbs,
bool OpSel)
const {
4306 if (
MI->getOpcode() == AMDGPU::G_FNEG) {
4307 Src =
MI->getOperand(1).getReg();
4310 }
else if (
MI->getOpcode() == AMDGPU::G_FSUB && IsCanonicalizing) {
4315 if (
LHS &&
LHS->isZero()) {
4317 Src =
MI->getOperand(2).getReg();
4321 if (AllowAbs &&
MI->getOpcode() == AMDGPU::G_FABS) {
4322 Src =
MI->getOperand(1).getReg();
4329 return std::pair(Src, Mods);
4332Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
4334 bool ForceVGPR)
const {
4335 if ((Mods != 0 || ForceVGPR) &&
4336 RBI.getRegBank(Src, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID) {
4343 TII.
get(AMDGPU::COPY), VGPRSrc)
4355AMDGPUInstructionSelector::selectVSRC0(
MachineOperand &Root)
const {
4357 [=](MachineInstrBuilder &MIB) { MIB.
add(Root); }
4362AMDGPUInstructionSelector::selectVOP3Mods0(
MachineOperand &Root)
const {
4365 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
4368 [=](MachineInstrBuilder &MIB) {
4369 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4371 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
4372 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); },
4373 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
4378AMDGPUInstructionSelector::selectVOP3BMods0(
MachineOperand &Root)
const {
4381 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
4386 [=](MachineInstrBuilder &MIB) {
4387 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4389 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
4390 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); },
4391 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
4396AMDGPUInstructionSelector::selectVOP3OMods(
MachineOperand &Root)
const {
4398 [=](MachineInstrBuilder &MIB) { MIB.
add(Root); },
4399 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); },
4400 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
4405AMDGPUInstructionSelector::selectVOP3Mods(
MachineOperand &Root)
const {
4408 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
4411 [=](MachineInstrBuilder &MIB) {
4412 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4414 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4419AMDGPUInstructionSelector::selectVOP3ModsNonCanonicalizing(
4423 std::tie(Src, Mods) =
4424 selectVOP3ModsImpl(Root.
getReg(),
false);
4427 [=](MachineInstrBuilder &MIB) {
4428 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4430 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4435AMDGPUInstructionSelector::selectVOP3BMods(
MachineOperand &Root)
const {
4438 std::tie(Src, Mods) =
4439 selectVOP3ModsImpl(Root.
getReg(),
true,
4443 [=](MachineInstrBuilder &MIB) {
4444 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4446 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4451AMDGPUInstructionSelector::selectVOP3NoMods(
MachineOperand &Root)
const {
4454 if (
Def->getOpcode() == AMDGPU::G_FNEG ||
Def->getOpcode() == AMDGPU::G_FABS)
4457 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
4482 if (
MI->getOpcode() != AMDGPU::G_TRUNC)
4485 unsigned DstSize =
MRI.getType(
MI->getOperand(0).getReg()).getSizeInBits();
4486 unsigned SrcSize =
MRI.getType(
MI->getOperand(1).getReg()).getSizeInBits();
4487 return DstSize * 2 == SrcSize;
4493 if (
MI->getOpcode() != AMDGPU::G_LSHR)
4497 std::optional<ValueAndVReg> ShiftAmt;
4500 unsigned SrcSize =
MRI.getType(
MI->getOperand(1).getReg()).getSizeInBits();
4501 unsigned Shift = ShiftAmt->Value.getZExtValue();
4502 return Shift * 2 == SrcSize;
4510 if (
MI->getOpcode() != AMDGPU::G_SHL)
4514 std::optional<ValueAndVReg> ShiftAmt;
4517 unsigned SrcSize =
MRI.getType(
MI->getOperand(1).getReg()).getSizeInBits();
4518 unsigned Shift = ShiftAmt->Value.getZExtValue();
4519 return Shift * 2 == SrcSize;
4527 if (
MI->getOpcode() != AMDGPU::G_UNMERGE_VALUES)
4529 return MI->getNumOperands() == 3 &&
MI->getOperand(0).isDef() &&
4530 MI->getOperand(1).isDef() && !
MI->getOperand(2).isDef();
4700static std::optional<std::pair<Register, SrcStatus>>
4705 unsigned Opc =
MI->getOpcode();
4709 case AMDGPU::G_BITCAST:
4710 return std::optional<std::pair<Register, SrcStatus>>(
4711 {
MI->getOperand(1).getReg(), Curr.second});
4713 if (
MI->getOperand(1).getReg().isPhysical())
4714 return std::nullopt;
4715 return std::optional<std::pair<Register, SrcStatus>>(
4716 {
MI->getOperand(1).getReg(), Curr.second});
4717 case AMDGPU::G_FNEG: {
4720 return std::nullopt;
4721 return std::optional<std::pair<Register, SrcStatus>>(
4722 {
MI->getOperand(1).getReg(), Stat});
4729 switch (Curr.second) {
4732 return std::optional<std::pair<Register, SrcStatus>>(
4735 if (Curr.first ==
MI->getOperand(0).getReg())
4736 return std::optional<std::pair<Register, SrcStatus>>(
4738 return std::optional<std::pair<Register, SrcStatus>>(
4750 return std::optional<std::pair<Register, SrcStatus>>(
4754 if (Curr.first ==
MI->getOperand(0).getReg())
4755 return std::optional<std::pair<Register, SrcStatus>>(
4757 return std::optional<std::pair<Register, SrcStatus>>(
4763 return std::optional<std::pair<Register, SrcStatus>>(
4768 return std::optional<std::pair<Register, SrcStatus>>(
4773 return std::optional<std::pair<Register, SrcStatus>>(
4778 return std::optional<std::pair<Register, SrcStatus>>(
4784 return std::nullopt;
4794 bool HasNeg =
false;
4796 bool HasOpsel =
true;
4801 unsigned Opc =
MI->getOpcode();
4803 if (
Opc < TargetOpcode::GENERIC_OP_END) {
4806 }
else if (
Opc == TargetOpcode::G_INTRINSIC) {
4809 if (IntrinsicID == Intrinsic::amdgcn_fdot2)
4833 while (
Depth <= MaxDepth && Curr.has_value()) {
4836 Statlist.push_back(Curr.value());
4843static std::pair<Register, SrcStatus>
4850 while (
Depth <= MaxDepth && Curr.has_value()) {
4856 LastSameOrNeg = Curr.value();
4861 return LastSameOrNeg;
4866 unsigned Width1 =
MRI.getType(Reg1).getSizeInBits();
4867 unsigned Width2 =
MRI.getType(Reg2).getSizeInBits();
4868 return Width1 == Width2;
4904 IsHalfState(HiStat);
4907std::pair<Register, unsigned> AMDGPUInstructionSelector::selectVOP3PModsImpl(
4913 return {RootReg, Mods};
4916 SearchOptions SO(RootReg, MRI);
4927 MachineInstr *
MI = MRI.getVRegDef(Stat.first);
4929 if (
MI->getOpcode() != AMDGPU::G_BUILD_VECTOR ||
MI->getNumOperands() != 3 ||
4930 (IsDOT && Subtarget->hasDOTOpSelHazard())) {
4932 return {Stat.first, Mods};
4938 if (StatlistHi.
empty()) {
4940 return {Stat.first, Mods};
4946 if (StatlistLo.
empty()) {
4948 return {Stat.first, Mods};
4951 for (
int I = StatlistHi.
size() - 1;
I >= 0;
I--) {
4952 for (
int J = StatlistLo.
size() - 1; J >= 0; J--) {
4953 if (StatlistHi[
I].first == StatlistLo[J].first &&
4955 StatlistHi[
I].first, RootReg, TII, MRI))
4956 return {StatlistHi[
I].first,
4957 updateMods(StatlistHi[
I].second, StatlistLo[J].second, Mods)};
4963 return {Stat.first, Mods};
4973 return RB->
getID() == RBNo;
4990 if (
checkRB(RootReg, AMDGPU::SGPRRegBankID, RBI,
MRI,
TRI) ||
4995 if (
MI->getOpcode() == AMDGPU::COPY && NewReg ==
MI->getOperand(1).getReg()) {
5001 Register DstReg =
MRI.cloneVirtualRegister(RootReg);
5004 BuildMI(*BB,
MI,
MI->getDebugLoc(),
TII.get(AMDGPU::COPY), DstReg)
5012AMDGPUInstructionSelector::selectVOP3PRetHelper(
MachineOperand &Root,
5017 std::tie(
Reg, Mods) = selectVOP3PModsImpl(Root.
getReg(), MRI, IsDOT);
5021 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
5022 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
5027AMDGPUInstructionSelector::selectVOP3PMods(
MachineOperand &Root)
const {
5029 return selectVOP3PRetHelper(Root);
5033AMDGPUInstructionSelector::selectVOP3PModsDOT(
MachineOperand &Root)
const {
5035 return selectVOP3PRetHelper(Root,
true);
5039AMDGPUInstructionSelector::selectWMMAOpSelVOP3PMods(
5042 "expected i1 value");
5048 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
5056 switch (Elts.
size()) {
5058 DstRegClass = &AMDGPU::VReg_256RegClass;
5061 DstRegClass = &AMDGPU::VReg_128RegClass;
5064 DstRegClass = &AMDGPU::VReg_64RegClass;
5071 auto MIB =
B.buildInstr(AMDGPU::REG_SEQUENCE)
5072 .addDef(
MRI.createVirtualRegister(DstRegClass));
5073 for (
unsigned i = 0; i < Elts.
size(); ++i) {
5084 if (ModOpcode == TargetOpcode::G_FNEG) {
5088 for (
auto El : Elts) {
5094 if (Elts.size() != NegAbsElts.
size()) {
5103 assert(ModOpcode == TargetOpcode::G_FABS);
5111AMDGPUInstructionSelector::selectWMMAModsF32NegAbs(
MachineOperand &Root)
const {
5117 assert(BV->getNumSources() > 0);
5119 MachineInstr *ElF32 = MRI->getVRegDef(BV->getSourceReg(0));
5120 unsigned ModOpcode = (ElF32->
getOpcode() == AMDGPU::G_FNEG)
5123 for (
unsigned i = 0; i < BV->getNumSources(); ++i) {
5124 ElF32 = MRI->getVRegDef(BV->getSourceReg(i));
5131 if (BV->getNumSources() == EltsF32.
size()) {
5137 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5138 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }}};
5142AMDGPUInstructionSelector::selectWMMAModsF16Neg(
MachineOperand &Root)
const {
5148 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
5156 if (CV->getNumSources() == EltsV2F16.
size()) {
5163 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5164 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }}};
5168AMDGPUInstructionSelector::selectWMMAModsF16NegAbs(
MachineOperand &Root)
const {
5174 assert(CV->getNumSources() > 0);
5175 MachineInstr *ElV2F16 = MRI->getVRegDef(CV->getSourceReg(0));
5177 unsigned ModOpcode = (ElV2F16->
getOpcode() == AMDGPU::G_FNEG)
5181 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
5182 ElV2F16 = MRI->getVRegDef(CV->getSourceReg(i));
5189 if (CV->getNumSources() == EltsV2F16.
size()) {
5196 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5197 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }}};
5201AMDGPUInstructionSelector::selectWMMAVISrc(
MachineOperand &Root)
const {
5202 std::optional<FPValueAndVReg> FPValReg;
5204 if (TII.isInlineConstant(FPValReg->Value)) {
5205 return {{[=](MachineInstrBuilder &MIB) {
5206 MIB.
addImm(FPValReg->Value.bitcastToAPInt().getSExtValue());
5216 if (TII.isInlineConstant(ICst)) {
5226AMDGPUInstructionSelector::selectSWMMACIndex8(
MachineOperand &Root)
const {
5232 std::optional<ValueAndVReg> ShiftAmt;
5234 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
5235 ShiftAmt->Value.getZExtValue() % 8 == 0) {
5236 Key = ShiftAmt->Value.getZExtValue() / 8;
5241 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5242 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Key); }
5247AMDGPUInstructionSelector::selectSWMMACIndex16(
MachineOperand &Root)
const {
5254 std::optional<ValueAndVReg> ShiftAmt;
5256 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
5257 ShiftAmt->Value.getZExtValue() == 16) {
5263 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5264 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Key); }
5269AMDGPUInstructionSelector::selectSWMMACIndex32(
MachineOperand &Root)
const {
5276 S32 = matchAnyExtendFromS32(Src);
5280 if (
Def->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {
5285 Src =
Def->getOperand(2).getReg();
5292 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5293 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Key); }
5298AMDGPUInstructionSelector::selectVOP3OpSelMods(
MachineOperand &Root)
const {
5301 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
5305 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5306 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
5312AMDGPUInstructionSelector::selectVINTERPMods(
MachineOperand &Root)
const {
5315 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
5321 [=](MachineInstrBuilder &MIB) {
5323 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
5325 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
5330AMDGPUInstructionSelector::selectVINTERPModsHi(
MachineOperand &Root)
const {
5333 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
5339 [=](MachineInstrBuilder &MIB) {
5341 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
5343 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
5350bool AMDGPUInstructionSelector::selectScaleOffset(
MachineOperand &Root,
5352 bool IsSigned)
const {
5353 if (!Subtarget->hasScaleOffset())
5357 MachineMemOperand *MMO = *
MI.memoperands_begin();
5369 OffsetReg =
Def->Reg;
5384 m_BinOp(IsSigned ? AMDGPU::S_MUL_I64_I32_PSEUDO : AMDGPU::S_MUL_U64,
5388 (
Mul->getOpcode() == (IsSigned ? AMDGPU::G_AMDGPU_MAD_I64_I32
5389 : AMDGPU::G_AMDGPU_MAD_U64_U32) ||
5390 (IsSigned &&
Mul->getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32 &&
5391 VT->signBitIsZero(
Mul->getOperand(2).getReg()))) &&
5404bool AMDGPUInstructionSelector::selectSmrdOffset(
MachineOperand &Root,
5408 bool *ScaleOffset)
const {
5410 MachineBasicBlock *
MBB =
MI->getParent();
5415 getAddrModeInfo(*
MI, *MRI, AddrInfo);
5417 if (AddrInfo.
empty())
5420 const GEPInfo &GEPI = AddrInfo[0];
5421 std::optional<int64_t> EncodedImm;
5424 *ScaleOffset =
false;
5429 if (GEPI.SgprParts.size() == 1 && GEPI.Imm != 0 && EncodedImm &&
5430 AddrInfo.
size() > 1) {
5431 const GEPInfo &GEPI2 = AddrInfo[1];
5432 if (GEPI2.SgprParts.size() == 2 && GEPI2.Imm == 0) {
5433 Register OffsetReg = GEPI2.SgprParts[1];
5436 selectScaleOffset(Root, OffsetReg,
false );
5437 OffsetReg = matchZeroExtendFromS32OrS32(OffsetReg);
5439 Base = GEPI2.SgprParts[0];
5440 *SOffset = OffsetReg;
5449 auto SKnown =
VT->getKnownBits(*SOffset);
5450 if (*
Offset + SKnown.getMinValue().getSExtValue() < 0)
5462 if (
Offset && GEPI.SgprParts.size() == 1 && EncodedImm) {
5463 Base = GEPI.SgprParts[0];
5469 if (SOffset && GEPI.SgprParts.size() == 1 &&
isUInt<32>(GEPI.Imm) &&
5475 Base = GEPI.SgprParts[0];
5476 *SOffset = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5477 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), *SOffset)
5482 if (SOffset && GEPI.SgprParts.size() && GEPI.Imm == 0) {
5483 Register OffsetReg = GEPI.SgprParts[1];
5485 *ScaleOffset = selectScaleOffset(Root, OffsetReg,
false );
5486 OffsetReg = matchZeroExtendFromS32OrS32(OffsetReg);
5488 Base = GEPI.SgprParts[0];
5489 *SOffset = OffsetReg;
5498AMDGPUInstructionSelector::selectSmrdImm(
MachineOperand &Root)
const {
5501 if (!selectSmrdOffset(Root,
Base,
nullptr, &
Offset,
5503 return std::nullopt;
5505 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(
Base); },
5506 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Offset); }}};
5510AMDGPUInstructionSelector::selectSmrdImm32(
MachineOperand &Root)
const {
5512 getAddrModeInfo(*Root.
getParent(), *MRI, AddrInfo);
5514 if (AddrInfo.
empty() || AddrInfo[0].SgprParts.size() != 1)
5515 return std::nullopt;
5517 const GEPInfo &GEPInfo = AddrInfo[0];
5518 Register PtrReg = GEPInfo.SgprParts[0];
5519 std::optional<int64_t> EncodedImm =
5522 return std::nullopt;
5525 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrReg); },
5526 [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedImm); }
5531AMDGPUInstructionSelector::selectSmrdSgpr(
MachineOperand &Root)
const {
5534 if (!selectSmrdOffset(Root,
Base, &SOffset,
nullptr,
5536 return std::nullopt;
5539 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(
Base); },
5540 [=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); },
5541 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }}};
5545AMDGPUInstructionSelector::selectSmrdSgprImm(
MachineOperand &Root)
const {
5549 if (!selectSmrdOffset(Root,
Base, &SOffset, &
Offset, &ScaleOffset))
5550 return std::nullopt;
5553 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(
Base); },
5554 [=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); },
5556 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }}};
5559std::pair<Register, int>
5560AMDGPUInstructionSelector::selectFlatOffsetImpl(
MachineOperand &Root,
5561 uint64_t FlatVariant)
const {
5566 if (!STI.hasFlatInstOffsets())
5570 int64_t ConstOffset;
5572 std::tie(PtrBase, ConstOffset, IsInBounds) =
5573 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
5579 if (ConstOffset == 0 ||
5581 !isFlatScratchBaseLegal(Root.
getReg())) ||
5585 unsigned AddrSpace = (*
MI->memoperands_begin())->getAddrSpace();
5586 if (!TII.isLegalFLATOffset(ConstOffset, AddrSpace, FlatVariant))
5589 return std::pair(PtrBase, ConstOffset);
5593AMDGPUInstructionSelector::selectFlatOffset(
MachineOperand &Root)
const {
5597 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrWithOffset.first); },
5598 [=](MachineInstrBuilder &MIB) { MIB.
addImm(PtrWithOffset.second); },
5603AMDGPUInstructionSelector::selectGlobalOffset(
MachineOperand &Root)
const {
5607 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrWithOffset.first); },
5608 [=](MachineInstrBuilder &MIB) { MIB.
addImm(PtrWithOffset.second); },
5613AMDGPUInstructionSelector::selectScratchOffset(
MachineOperand &Root)
const {
5617 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrWithOffset.first); },
5618 [=](MachineInstrBuilder &MIB) { MIB.
addImm(PtrWithOffset.second); },
5624AMDGPUInstructionSelector::selectGlobalSAddr(
MachineOperand &Root,
5626 bool NeedIOffset)
const {
5629 int64_t ConstOffset;
5630 int64_t ImmOffset = 0;
5634 std::tie(PtrBase, ConstOffset, std::ignore) =
5635 getPtrBaseWithConstantOffset(Addr, *MRI);
5637 if (ConstOffset != 0) {
5642 ImmOffset = ConstOffset;
5645 if (isSGPR(PtrBaseDef->Reg)) {
5646 if (ConstOffset > 0) {
5652 int64_t SplitImmOffset = 0, RemainderOffset = ConstOffset;
5654 std::tie(SplitImmOffset, RemainderOffset) =
5659 if (Subtarget->hasSignedGVSOffset() ?
isInt<32>(RemainderOffset)
5662 MachineBasicBlock *
MBB =
MI->getParent();
5664 MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
5666 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
5668 .
addImm(RemainderOffset);
5672 [=](MachineInstrBuilder &MIB) {
5675 [=](MachineInstrBuilder &MIB) {
5678 [=](MachineInstrBuilder &MIB) { MIB.
addImm(SplitImmOffset); },
5679 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); },
5682 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrBase); },
5683 [=](MachineInstrBuilder &MIB) {
5686 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); },
5696 unsigned NumLiterals =
5697 !TII.isInlineConstant(APInt(32,
Lo_32(ConstOffset))) +
5698 !TII.isInlineConstant(APInt(32,
Hi_32(ConstOffset)));
5699 if (STI.getConstantBusLimit(AMDGPU::V_ADD_U32_e64) > NumLiterals)
5700 return std::nullopt;
5707 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
5712 if (isSGPR(SAddr)) {
5713 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
5717 bool ScaleOffset = selectScaleOffset(Root, PtrBaseOffset,
5718 Subtarget->hasSignedGVSOffset());
5719 if (
Register VOffset = matchExtendFromS32OrS32(
5720 PtrBaseOffset, Subtarget->hasSignedGVSOffset())) {
5722 return {{[=](MachineInstrBuilder &MIB) {
5725 [=](MachineInstrBuilder &MIB) {
5728 [=](MachineInstrBuilder &MIB) {
5731 [=](MachineInstrBuilder &MIB) {
5735 return {{[=](MachineInstrBuilder &MIB) {
5738 [=](MachineInstrBuilder &MIB) {
5741 [=](MachineInstrBuilder &MIB) {
5751 if (AddrDef->MI->getOpcode() == AMDGPU::G_IMPLICIT_DEF ||
5752 AddrDef->MI->getOpcode() == AMDGPU::G_CONSTANT || !isSGPR(AddrDef->Reg))
5753 return std::nullopt;
5758 MachineBasicBlock *
MBB =
MI->getParent();
5759 Register VOffset = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
5761 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32), VOffset)
5766 [=](MachineInstrBuilder &MIB) { MIB.
addReg(AddrDef->Reg); },
5767 [=](MachineInstrBuilder &MIB) { MIB.
addReg(VOffset); },
5768 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); },
5769 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); }
5772 [=](MachineInstrBuilder &MIB) { MIB.
addReg(AddrDef->Reg); },
5773 [=](MachineInstrBuilder &MIB) { MIB.
addReg(VOffset); },
5774 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); }
5779AMDGPUInstructionSelector::selectGlobalSAddr(
MachineOperand &Root)
const {
5780 return selectGlobalSAddr(Root, 0);
5784AMDGPUInstructionSelector::selectGlobalSAddrCPol(
MachineOperand &Root)
const {
5790 return selectGlobalSAddr(Root, PassedCPol);
5794AMDGPUInstructionSelector::selectGlobalSAddrCPolM0(
MachineOperand &Root)
const {
5800 return selectGlobalSAddr(Root, PassedCPol);
5804AMDGPUInstructionSelector::selectGlobalSAddrGLC(
MachineOperand &Root)
const {
5809AMDGPUInstructionSelector::selectGlobalSAddrNoIOffset(
5816 return selectGlobalSAddr(Root, PassedCPol,
false);
5820AMDGPUInstructionSelector::selectGlobalSAddrNoIOffsetM0(
5827 return selectGlobalSAddr(Root, PassedCPol,
false);
5831AMDGPUInstructionSelector::selectScratchSAddr(
MachineOperand &Root)
const {
5834 int64_t ConstOffset;
5835 int64_t ImmOffset = 0;
5839 std::tie(PtrBase, ConstOffset, std::ignore) =
5840 getPtrBaseWithConstantOffset(Addr, *MRI);
5842 if (ConstOffset != 0 && isFlatScratchBaseLegal(Addr) &&
5846 ImmOffset = ConstOffset;
5850 if (AddrDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
5851 int FI = AddrDef->MI->getOperand(1).
getIndex();
5854 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); }
5860 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
5861 Register LHS = AddrDef->MI->getOperand(1).getReg();
5862 Register RHS = AddrDef->MI->getOperand(2).getReg();
5866 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX &&
5867 isSGPR(RHSDef->Reg)) {
5868 int FI = LHSDef->MI->getOperand(1).getIndex();
5872 SAddr = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5874 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_I32), SAddr)
5882 return std::nullopt;
5885 [=](MachineInstrBuilder &MIB) { MIB.
addReg(SAddr); },
5886 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); }
5891bool AMDGPUInstructionSelector::checkFlatScratchSVSSwizzleBug(
5893 if (!Subtarget->hasFlatScratchSVSSwizzleBug())
5899 auto VKnown =
VT->getKnownBits(VAddr);
5902 uint64_t VMax = VKnown.getMaxValue().getZExtValue();
5903 uint64_t
SMax = SKnown.getMaxValue().getZExtValue();
5904 return (VMax & 3) + (
SMax & 3) >= 4;
5908AMDGPUInstructionSelector::selectScratchSVAddr(
MachineOperand &Root)
const {
5911 int64_t ConstOffset;
5912 int64_t ImmOffset = 0;
5916 std::tie(PtrBase, ConstOffset, std::ignore) =
5917 getPtrBaseWithConstantOffset(Addr, *MRI);
5920 if (ConstOffset != 0 &&
5924 ImmOffset = ConstOffset;
5928 if (AddrDef->MI->getOpcode() != AMDGPU::G_PTR_ADD)
5929 return std::nullopt;
5931 Register RHS = AddrDef->MI->getOperand(2).getReg();
5932 if (RBI.getRegBank(
RHS, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID)
5933 return std::nullopt;
5935 Register LHS = AddrDef->MI->getOperand(1).getReg();
5938 if (OrigAddr != Addr) {
5939 if (!isFlatScratchBaseLegalSVImm(OrigAddr))
5940 return std::nullopt;
5942 if (!isFlatScratchBaseLegalSV(OrigAddr))
5943 return std::nullopt;
5946 if (checkFlatScratchSVSSwizzleBug(
RHS,
LHS, ImmOffset))
5947 return std::nullopt;
5949 unsigned CPol = selectScaleOffset(Root,
RHS,
true )
5953 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
5954 int FI = LHSDef->MI->getOperand(1).getIndex();
5956 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
RHS); },
5958 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); },
5959 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }
5968 return std::nullopt;
5971 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
RHS); },
5972 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
LHS); },
5973 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); },
5974 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }
5979AMDGPUInstructionSelector::selectMUBUFScratchOffen(
MachineOperand &Root)
const {
5981 MachineBasicBlock *
MBB =
MI->getParent();
5983 const SIMachineFunctionInfo *
Info =
MF->getInfo<SIMachineFunctionInfo>();
5988 Register HighBits = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
5993 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
5997 return {{[=](MachineInstrBuilder &MIB) {
6000 [=](MachineInstrBuilder &MIB) {
6003 [=](MachineInstrBuilder &MIB) {
6008 [=](MachineInstrBuilder &MIB) {
6017 std::optional<int> FI;
6020 const MachineInstr *RootDef = MRI->getVRegDef(Root.
getReg());
6022 int64_t ConstOffset;
6023 std::tie(PtrBase, ConstOffset, std::ignore) =
6024 getPtrBaseWithConstantOffset(VAddr, *MRI);
6025 if (ConstOffset != 0) {
6026 if (TII.isLegalMUBUFImmOffset(ConstOffset) &&
6027 (!STI.privateMemoryResourceIsRangeChecked() ||
6028 VT->signBitIsZero(PtrBase))) {
6029 const MachineInstr *PtrBaseDef = MRI->getVRegDef(PtrBase);
6030 if (PtrBaseDef->
getOpcode() == AMDGPU::G_FRAME_INDEX)
6036 }
else if (RootDef->
getOpcode() == AMDGPU::G_FRAME_INDEX) {
6040 return {{[=](MachineInstrBuilder &MIB) {
6043 [=](MachineInstrBuilder &MIB) {
6049 [=](MachineInstrBuilder &MIB) {
6054 [=](MachineInstrBuilder &MIB) {
6059bool AMDGPUInstructionSelector::isDSOffsetLegal(
Register Base,
6064 if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled())
6069 return VT->signBitIsZero(
Base);
6072bool AMDGPUInstructionSelector::isDSOffset2Legal(
Register Base, int64_t Offset0,
6074 unsigned Size)
const {
6075 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
6080 if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled())
6085 return VT->signBitIsZero(
Base);
6090 return Addr->
getOpcode() == TargetOpcode::G_OR ||
6091 (Addr->
getOpcode() == TargetOpcode::G_PTR_ADD &&
6098bool AMDGPUInstructionSelector::isFlatScratchBaseLegal(
Register Addr)
const {
6106 if (STI.hasSignedScratchOffsets())
6112 if (AddrMI->
getOpcode() == TargetOpcode::G_PTR_ADD) {
6113 std::optional<ValueAndVReg> RhsValReg =
6119 if (RhsValReg && RhsValReg->Value.getSExtValue() < 0 &&
6120 RhsValReg->Value.getSExtValue() > -0x40000000)
6124 return VT->signBitIsZero(
LHS);
6129bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSV(
Register Addr)
const {
6137 if (STI.hasSignedScratchOffsets())
6142 return VT->signBitIsZero(
RHS) &&
VT->signBitIsZero(
LHS);
6147bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSVImm(
6151 if (STI.hasSignedScratchOffsets())
6156 std::optional<DefinitionAndSourceRegister> BaseDef =
6158 std::optional<ValueAndVReg> RHSOffset =
6168 (RHSOffset->Value.getSExtValue() < 0 &&
6169 RHSOffset->Value.getSExtValue() > -0x40000000)))
6172 Register LHS = BaseDef->MI->getOperand(1).getReg();
6173 Register RHS = BaseDef->MI->getOperand(2).getReg();
6174 return VT->signBitIsZero(
RHS) &&
VT->signBitIsZero(
LHS);
6177bool AMDGPUInstructionSelector::isUnneededShiftMask(
const MachineInstr &
MI,
6178 unsigned ShAmtBits)
const {
6179 assert(
MI.getOpcode() == TargetOpcode::G_AND);
6181 std::optional<APInt>
RHS =
6186 if (
RHS->countr_one() >= ShAmtBits)
6189 const APInt &LHSKnownZeros =
VT->getKnownZeroes(
MI.getOperand(1).getReg());
6190 return (LHSKnownZeros | *
RHS).countr_one() >= ShAmtBits;
6194AMDGPUInstructionSelector::selectMUBUFScratchOffset(
6197 const SIMachineFunctionInfo *
Info =
MF->getInfo<SIMachineFunctionInfo>();
6199 std::optional<DefinitionAndSourceRegister>
Def =
6201 assert(Def &&
"this shouldn't be an optional result");
6206 [=](MachineInstrBuilder &MIB) {
6209 [=](MachineInstrBuilder &MIB) {
6212 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
6223 if (!TII.isLegalMUBUFImmOffset(
Offset))
6231 [=](MachineInstrBuilder &MIB) {
6234 [=](MachineInstrBuilder &MIB) {
6242 !TII.isLegalMUBUFImmOffset(
Offset))
6246 [=](MachineInstrBuilder &MIB) {
6249 [=](MachineInstrBuilder &MIB) {
6256std::pair<Register, unsigned>
6257AMDGPUInstructionSelector::selectDS1Addr1OffsetImpl(
MachineOperand &Root)
const {
6258 const MachineInstr *RootDef = MRI->getVRegDef(Root.
getReg());
6259 int64_t ConstAddr = 0;
6263 std::tie(PtrBase,
Offset, std::ignore) =
6264 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
6267 if (isDSOffsetLegal(PtrBase,
Offset)) {
6269 return std::pair(PtrBase,
Offset);
6271 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
6280 return std::pair(Root.
getReg(), 0);
6284AMDGPUInstructionSelector::selectDS1Addr1Offset(
MachineOperand &Root)
const {
6287 std::tie(
Reg,
Offset) = selectDS1Addr1OffsetImpl(Root);
6289 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
6295AMDGPUInstructionSelector::selectDS64Bit4ByteAligned(
MachineOperand &Root)
const {
6296 return selectDSReadWrite2(Root, 4);
6300AMDGPUInstructionSelector::selectDS128Bit8ByteAligned(
MachineOperand &Root)
const {
6301 return selectDSReadWrite2(Root, 8);
6305AMDGPUInstructionSelector::selectDSReadWrite2(
MachineOperand &Root,
6306 unsigned Size)
const {
6311 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
6313 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Offset+1); }
6317std::pair<Register, unsigned>
6318AMDGPUInstructionSelector::selectDSReadWrite2Impl(
MachineOperand &Root,
6319 unsigned Size)
const {
6320 const MachineInstr *RootDef = MRI->getVRegDef(Root.
getReg());
6321 int64_t ConstAddr = 0;
6325 std::tie(PtrBase,
Offset, std::ignore) =
6326 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
6329 int64_t OffsetValue0 =
Offset;
6331 if (isDSOffset2Legal(PtrBase, OffsetValue0, OffsetValue1,
Size)) {
6333 return std::pair(PtrBase, OffsetValue0 /
Size);
6335 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
6343 return std::pair(Root.
getReg(), 0);
6351std::tuple<Register, int64_t, bool>
6352AMDGPUInstructionSelector::getPtrBaseWithConstantOffset(
6355 if (RootI->
getOpcode() != TargetOpcode::G_PTR_ADD)
6356 return {Root, 0,
false};
6359 std::optional<ValueAndVReg> MaybeOffset =
6362 return {Root, 0,
false};
6377 Register RSrc2 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
6378 Register RSrc3 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
6379 Register RSrcHi =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
6380 Register RSrc =
MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
6382 B.buildInstr(AMDGPU::S_MOV_B32)
6385 B.buildInstr(AMDGPU::S_MOV_B32)
6392 B.buildInstr(AMDGPU::REG_SEQUENCE)
6395 .addImm(AMDGPU::sub0)
6397 .addImm(AMDGPU::sub1);
6401 RSrcLo =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
6402 B.buildInstr(AMDGPU::S_MOV_B64)
6407 B.buildInstr(AMDGPU::REG_SEQUENCE)
6410 .addImm(AMDGPU::sub0_sub1)
6412 .addImm(AMDGPU::sub2_sub3);
6419 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
6428 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
6435AMDGPUInstructionSelector::MUBUFAddressData
6436AMDGPUInstructionSelector::parseMUBUFAddress(
Register Src)
const {
6437 MUBUFAddressData
Data;
6443 std::tie(PtrBase,
Offset, std::ignore) =
6444 getPtrBaseWithConstantOffset(Src, *MRI);
6450 if (MachineInstr *InputAdd
6452 Data.N2 = InputAdd->getOperand(1).getReg();
6453 Data.N3 = InputAdd->getOperand(2).getReg();
6468bool AMDGPUInstructionSelector::shouldUseAddr64(MUBUFAddressData Addr)
const {
6474 const RegisterBank *N0Bank = RBI.getRegBank(Addr.N0, *MRI, TRI);
6475 return N0Bank->
getID() == AMDGPU::VGPRRegBankID;
6481void AMDGPUInstructionSelector::splitIllegalMUBUFOffset(
6483 if (TII.isLegalMUBUFImmOffset(ImmOffset))
6487 SOffset = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6488 B.buildInstr(AMDGPU::S_MOV_B32)
6494bool AMDGPUInstructionSelector::selectMUBUFAddr64Impl(
6499 if (!STI.hasAddr64() || STI.useFlatForGlobal())
6502 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
6503 if (!shouldUseAddr64(AddrData))
6509 Offset = AddrData.Offset;
6515 if (RBI.getRegBank(N2, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
6517 if (RBI.getRegBank(N3, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
6530 }
else if (RBI.getRegBank(N0, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
6541 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
6545bool AMDGPUInstructionSelector::selectMUBUFOffsetImpl(
6550 if (STI.useFlatForGlobal())
6553 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
6554 if (shouldUseAddr64(AddrData))
6560 Offset = AddrData.Offset;
6566 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
6571AMDGPUInstructionSelector::selectMUBUFAddr64(
MachineOperand &Root)
const {
6577 if (!selectMUBUFAddr64Impl(Root, VAddr, RSrcReg, SOffset,
Offset))
6583 [=](MachineInstrBuilder &MIB) {
6586 [=](MachineInstrBuilder &MIB) {
6589 [=](MachineInstrBuilder &MIB) {
6592 else if (STI.hasRestrictedSOffset())
6593 MIB.
addReg(AMDGPU::SGPR_NULL);
6597 [=](MachineInstrBuilder &MIB) {
6607AMDGPUInstructionSelector::selectMUBUFOffset(
MachineOperand &Root)
const {
6612 if (!selectMUBUFOffsetImpl(Root, RSrcReg, SOffset,
Offset))
6616 [=](MachineInstrBuilder &MIB) {
6619 [=](MachineInstrBuilder &MIB) {
6622 else if (STI.hasRestrictedSOffset())
6623 MIB.
addReg(AMDGPU::SGPR_NULL);
6635AMDGPUInstructionSelector::selectBUFSOffset(
MachineOperand &Root)
const {
6640 SOffset = AMDGPU::SGPR_NULL;
6642 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); }}};
6646static std::optional<uint64_t>
6650 if (!OffsetVal || !
isInt<32>(*OffsetVal))
6651 return std::nullopt;
6652 return Lo_32(*OffsetVal);
6656AMDGPUInstructionSelector::selectSMRDBufferImm(
MachineOperand &Root)
const {
6657 std::optional<uint64_t> OffsetVal =
6662 std::optional<int64_t> EncodedImm =
6667 return {{ [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedImm); } }};
6671AMDGPUInstructionSelector::selectSMRDBufferImm32(
MachineOperand &Root)
const {
6678 std::optional<int64_t> EncodedImm =
6683 return {{ [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedImm); } }};
6687AMDGPUInstructionSelector::selectSMRDBufferSgprImm(
MachineOperand &Root)
const {
6695 return std::nullopt;
6697 std::optional<int64_t> EncodedOffset =
6700 return std::nullopt;
6703 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); },
6704 [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedOffset); }}};
6707std::pair<Register, unsigned>
6708AMDGPUInstructionSelector::selectVOP3PMadMixModsImpl(
MachineOperand &Root,
6709 bool &Matched)
const {
6714 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
6724 const auto CheckAbsNeg = [&]() {
6729 std::tie(Src, ModsTmp) = selectVOP3ModsImpl(Src);
6760AMDGPUInstructionSelector::selectVOP3PMadMixModsExt(
6765 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
6770 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
6771 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
6776AMDGPUInstructionSelector::selectVOP3PMadMixMods(
MachineOperand &Root)
const {
6780 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
6783 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
6784 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
6788bool AMDGPUInstructionSelector::selectSBarrierSignalIsfirst(
6792 Register CCReg =
I.getOperand(0).getReg();
6797 BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM))
6798 .
addImm(
I.getOperand(2).getImm());
6802 I.eraseFromParent();
6803 return RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32_XM0_XEXECRegClass,
6807bool AMDGPUInstructionSelector::selectSGetBarrierState(
6811 const MachineOperand &BarOp =
I.getOperand(2);
6812 std::optional<int64_t> BarValImm =
6816 auto CopyMIB =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
6820 MachineInstrBuilder MIB;
6821 unsigned Opc = BarValImm ? AMDGPU::S_GET_BARRIER_STATE_IMM
6822 : AMDGPU::S_GET_BARRIER_STATE_M0;
6825 auto DstReg =
I.getOperand(0).getReg();
6826 const TargetRegisterClass *DstRC =
6827 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
6828 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
6834 I.eraseFromParent();
6839 if (HasInlineConst) {
6843 case Intrinsic::amdgcn_s_barrier_join:
6844 return AMDGPU::S_BARRIER_JOIN_IMM;
6845 case Intrinsic::amdgcn_s_wakeup_barrier:
6846 return AMDGPU::S_WAKEUP_BARRIER_IMM;
6847 case Intrinsic::amdgcn_s_get_named_barrier_state:
6848 return AMDGPU::S_GET_BARRIER_STATE_IMM;
6854 case Intrinsic::amdgcn_s_barrier_join:
6855 return AMDGPU::S_BARRIER_JOIN_M0;
6856 case Intrinsic::amdgcn_s_wakeup_barrier:
6857 return AMDGPU::S_WAKEUP_BARRIER_M0;
6858 case Intrinsic::amdgcn_s_get_named_barrier_state:
6859 return AMDGPU::S_GET_BARRIER_STATE_M0;
6864bool AMDGPUInstructionSelector::selectNamedBarrierInit(
6868 const MachineOperand &BarOp =
I.getOperand(1);
6869 const MachineOperand &CntOp =
I.getOperand(2);
6872 Register TmpReg0 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6878 Register TmpReg1 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6885 Register TmpReg2 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6891 Register TmpReg3 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6892 constexpr unsigned ShAmt = 16;
6898 Register TmpReg4 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6908 unsigned Opc = IntrID == Intrinsic::amdgcn_s_barrier_init
6909 ? AMDGPU::S_BARRIER_INIT_M0
6910 : AMDGPU::S_BARRIER_SIGNAL_M0;
6911 MachineInstrBuilder MIB;
6914 I.eraseFromParent();
6918bool AMDGPUInstructionSelector::selectNamedBarrierInst(
6922 MachineOperand BarOp = IntrID == Intrinsic::amdgcn_s_get_named_barrier_state
6925 std::optional<int64_t> BarValImm =
6930 Register TmpReg0 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6936 Register TmpReg1 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6942 auto CopyMIB =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
6947 MachineInstrBuilder MIB;
6951 if (IntrID == Intrinsic::amdgcn_s_get_named_barrier_state) {
6952 auto DstReg =
I.getOperand(0).getReg();
6953 const TargetRegisterClass *DstRC =
6954 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
6955 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
6961 auto BarId = ((*BarValImm) >> 4) & 0x3F;
6965 I.eraseFromParent();
6972 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
6973 "Expected G_CONSTANT");
6974 MIB.
addImm(
MI.getOperand(1).getCImm()->getSExtValue());
6980 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
6981 "Expected G_CONSTANT");
6982 MIB.
addImm(-
MI.getOperand(1).getCImm()->getSExtValue());
6988 const MachineOperand &
Op =
MI.getOperand(1);
6989 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT &&
OpIdx == -1);
6990 MIB.
addImm(
Op.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
6996 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
6997 "Expected G_CONSTANT");
6998 MIB.
addImm(
MI.getOperand(1).getCImm()->getValue().popcount());
7006 const MachineOperand &
Op =
MI.getOperand(
OpIdx);
7023 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7027void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_0_0(
7029 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7034void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_0_1(
7036 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7042void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_1_0(
7044 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7049void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_1_1(
7051 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7057void AMDGPUInstructionSelector::renderDstSelToOpSelXForm(
7059 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7064void AMDGPUInstructionSelector::renderSrcSelToOpSelXForm(
7066 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7071void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_2_0(
7073 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7078void AMDGPUInstructionSelector::renderDstSelToOpSel3XFormXForm(
7080 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7089 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7098 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7105void AMDGPUInstructionSelector::renderExtractCpolSetGLC(
7107 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7108 const uint32_t Cpol =
MI.getOperand(
OpIdx).getImm() &
7123 const APFloat &APF =
MI.getOperand(1).getFPImm()->getValueAPF();
7125 assert(ExpVal != INT_MIN);
7143 if (
MI.getOperand(
OpIdx).getImm())
7145 MIB.
addImm((int64_t)Mods);
7152 if (
MI.getOperand(
OpIdx).getImm())
7154 MIB.
addImm((int64_t)Mods);
7160 unsigned Val =
MI.getOperand(
OpIdx).getImm();
7168 MIB.
addImm((int64_t)Mods);
7174 uint32_t
V =
MI.getOperand(2).getImm();
7177 if (!Subtarget->hasSafeCUPrefetch())
7183void AMDGPUInstructionSelector::renderScaledMAIIntrinsicOperand(
7185 unsigned Val =
MI.getOperand(
OpIdx).getImm();
7194bool AMDGPUInstructionSelector::isInlineImmediate(
const APInt &Imm)
const {
7195 return TII.isInlineConstant(Imm);
7198bool AMDGPUInstructionSelector::isInlineImmediate(
const APFloat &Imm)
const {
7199 return TII.isInlineConstant(Imm);
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
static unsigned getIntrinsicID(const SDNode *N)
#define GET_GLOBALISEL_PREDICATES_INIT
#define GET_GLOBALISEL_TEMPORARIES_INIT
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static Register getLegalRegBank(Register NewReg, Register RootReg, const AMDGPURegisterBankInfo &RBI, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const SIInstrInfo &TII)
static bool isShlHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is shift left with half bits, such as reg0:2n =G_SHL reg1:2n, CONST(n)
static bool isNoUnsignedWrap(MachineInstr *Addr)
static Register buildOffsetSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
unsigned getNamedBarrierOp(bool HasInlineConst, Intrinsic::ID IntrID)
static bool checkRB(Register Reg, unsigned int RBNo, const AMDGPURegisterBankInfo &RBI, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI)
static unsigned updateMods(SrcStatus HiStat, SrcStatus LoStat, unsigned Mods)
static bool isTruncHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is truncating to half, such as reg0:n = G_TRUNC reg1:2n
static Register getWaveAddress(const MachineInstr *Def)
static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)
static bool shouldUseAndMask(unsigned Size, unsigned &Mask)
static std::pair< unsigned, uint8_t > BitOp3_Op(Register R, SmallVectorImpl< Register > &Src, const MachineRegisterInfo &MRI)
static TypeClass isVectorOfTwoOrScalar(Register Reg, const MachineRegisterInfo &MRI)
static bool isLaneMaskFromSameBlock(Register Reg, MachineRegisterInfo &MRI, MachineBasicBlock *MBB)
static bool parseTexFail(uint64_t TexFailCtrl, bool &TFE, bool &LWE, bool &IsTexFail)
static void addZeroImm(MachineInstrBuilder &MIB)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
static bool isConstant(const MachineInstr &MI)
static bool isSameBitWidth(Register Reg1, Register Reg2, const MachineRegisterInfo &MRI)
static Register buildRegSequence(SmallVectorImpl< Register > &Elts, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
static Register buildRSRC(MachineIRBuilder &B, MachineRegisterInfo &MRI, uint32_t FormatLo, uint32_t FormatHi, Register BasePtr)
Return a resource descriptor for use with an arbitrary 64-bit pointer.
static std::pair< Register, unsigned > computeIndirectRegIndex(MachineRegisterInfo &MRI, const SIRegisterInfo &TRI, const TargetRegisterClass *SuperRC, Register IdxReg, unsigned EltSize, GISelValueTracking &ValueTracking)
Return the register to use for the index value, and the subregister to use for the indirectly accesse...
static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64)
static std::pair< Register, SrcStatus > getLastSameOrNeg(Register Reg, const MachineRegisterInfo &MRI, SearchOptions SO, int MaxDepth=3)
static Register stripCopy(Register Reg, MachineRegisterInfo &MRI)
static std::optional< std::pair< Register, SrcStatus > > calcNextStatus(std::pair< Register, SrcStatus > Curr, const MachineRegisterInfo &MRI)
static Register stripBitCast(Register Reg, MachineRegisterInfo &MRI)
static std::optional< uint64_t > getConstantZext32Val(Register Reg, const MachineRegisterInfo &MRI)
Get an immediate that must be 32-bits, and treated as zero extended.
static bool isValidToPack(SrcStatus HiStat, SrcStatus LoStat, Register NewReg, Register RootReg, const SIInstrInfo &TII, const MachineRegisterInfo &MRI)
static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size, const GCNSubtarget &ST)
static SmallVector< std::pair< Register, SrcStatus > > getSrcStats(Register Reg, const MachineRegisterInfo &MRI, SearchOptions SO, int MaxDepth=3)
static bool isUnmergeHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test function, if the MI is reg0:n, reg1:n = G_UNMERGE_VALUES reg2:2n
static SrcStatus getNegStatus(Register Reg, SrcStatus S, const MachineRegisterInfo &MRI)
static bool isVCmpResult(Register Reg, MachineRegisterInfo &MRI)
static Register buildAddr64RSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
static bool isLshrHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is logic shift right with half bits, such as reg0:2n =G_LSHR reg1:2n,...
static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, SmallVectorImpl< Register > &Elts, Register &Src, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
This file declares the targeting of the InstructionSelector class for AMDGPU.
AMDGPU Register Bank Select
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
Contains matchers for matching SSA Machine Instructions.
Machine Check Debug Module
This file declares the MachineIRBuilder class.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
MachineInstr unsigned OpIdx
static std::vector< std::pair< int, unsigned > > Swizzle(std::vector< std::pair< int, unsigned > > Src, R600InstrInfo::BankSwizzle Swz)
This is used to control valid status that current MI supports.
bool checkOptions(SrcStatus Stat) const
SearchOptions(Register Reg, const MachineRegisterInfo &MRI)
AMDGPUInstructionSelector(const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI, const AMDGPUTargetMachine &TM)
static const char * getName()
bool select(MachineInstr &I) override
Select the (possibly generic) instruction I to only use target-specific opcodes.
void setupMF(MachineFunction &MF, GISelValueTracking *VT, CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) override
Setup per-MF executor state.
uint32_t getLDSSize() const
LLVM_READONLY int getExactLog2Abs() const
Class for arbitrary precision integers.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
bool isFPPredicate() const
bool isIntPredicate() const
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
LLVM_ABI DILocation * get() const
Get the underlying DILocation.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
std::optional< SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > > ComplexRendererFns
virtual void setupMF(MachineFunction &mf, GISelValueTracking *vt, CodeGenCoverage *covinfo=nullptr, ProfileSummaryInfo *psi=nullptr, BlockFrequencyInfo *bfi=nullptr)
Setup per-MF executor state.
CodeGenCoverage * CoverageInfo
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
TypeSize getValue() const
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void setReturnAddressIsTaken(bool s)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Helper class to build MachineInstr.
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & setOperandDead(unsigned OpIdx) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
LocationSize getSize() const
Return the size in bytes of the memory reference.
unsigned getAddrSpace() const
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const ConstantInt * getCImm() const
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
ArrayRef< int > getShuffleMask() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
bool isInternalRead() const
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Analysis providing profile information.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
unsigned getID() const
Return the register class ID number.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
@ BUFFER_RESOURCE
Address space for 128-bit buffer resources.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
LLVM_READONLY const MIMGG16MappingInfo * getMIMGG16MappingInfo(unsigned G)
LLVM_READONLY int getGlobalSaddrOp(uint16_t Opcode)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isGFX12Plus(const MCSubtargetInfo &STI)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfo(unsigned DimEnum)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
Intrinsic::ID getIntrinsicID(const MachineInstr &I)
Return the intrinsic ID for opcodes with the G_AMDGPU_INTRIN_ prefix.
std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg, GISelValueTracking *ValueTracking=nullptr, bool CheckNUW=false)
Returns base register and constant offset.
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
IndexMode
ARM Index Modes.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(const APInt &RequestedValue)
Matches a constant equal to RequestedValue.
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
UnaryOp_match< SrcTy, TargetOpcode::COPY > m_Copy(SrcTy &&Src)
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_XOR, true > m_GXor(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_SEXT > m_GSExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_FPEXT > m_GFPExt(const SrcTy &Src)
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
ConstantMatch< APInt > m_ICst(APInt &Cst)
SpecificConstantMatch m_AllOnesInt()
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
ImplicitDefMatch m_GImplicitDef()
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
BinaryOp_match< LHS, RHS, TargetOpcode::G_ASHR, false > m_GAShr(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
SpecificRegisterMatch m_SpecificReg(Register RequestedReg)
Matches a register only if it is equal to RequestedReg.
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
UnaryOp_match< SrcTy, TargetOpcode::G_FABS > m_GFabs(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ANYEXT > m_GAnyExt(const SrcTy &Src)
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, TargetOpcode::G_MUL, true > m_GMul(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
NodeAddr< DefNode * > Def
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
FunctionAddr VTableAddr Value
LLVM_ABI Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
PointerUnion< const TargetRegisterClass *, const RegisterBank * > RegClassOrRegBank
Convenient type to represent either a register class or a register bank.
LLVM_ABI const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
LLVM_ABI std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
FunctionAddr VTableAddr uintptr_t uintptr_t Data
unsigned getUndefRegState(bool B)
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
LLVM_ABI std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
LLVM_ABI Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
@ Default
The result values are uniform if and only if all operands are uniform.
unsigned AtomicNoRetBaseOpcode
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
int64_t Offset
Offset - This is an offset from the base Value*.
PointerUnion< const Value *, const PseudoSourceValue * > V
This is the IR pointer value for the access, or it is null if unknown.