29#include "llvm/IR/IntrinsicsAMDGPU.h"
32#define DEBUG_TYPE "amdgpu-isel"
37#define GET_GLOBALISEL_IMPL
38#define AMDGPUSubtarget GCNSubtarget
39#include "AMDGPUGenGlobalISel.inc"
40#undef GET_GLOBALISEL_IMPL
46 : TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM),
49#include
"AMDGPUGenGlobalISel.inc"
52#include
"AMDGPUGenGlobalISel.inc"
64 MRI = &
MF.getRegInfo();
72 return Def->getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS
73 ? Def->getOperand(1).getReg()
83 auto &RegClassOrBank = MRI.getRegClassOrRegBank(
Reg);
84 const TargetRegisterClass *RC =
87 const LLT Ty = MRI.getType(
Reg);
91 return MRI.getVRegDef(
Reg)->getOpcode() != AMDGPU::G_TRUNC &&
96 return RB->
getID() == AMDGPU::VCCRegBankID;
99bool AMDGPUInstructionSelector::constrainCopyLikeIntrin(
MachineInstr &
MI,
100 unsigned NewOpc)
const {
101 MI.setDesc(TII.get(NewOpc));
105 MachineOperand &Dst =
MI.getOperand(0);
106 MachineOperand &Src =
MI.getOperand(1);
112 const TargetRegisterClass *DstRC
113 = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
114 const TargetRegisterClass *SrcRC
115 = TRI.getConstrainedRegClassForOperand(Src, *MRI);
116 if (!DstRC || DstRC != SrcRC)
119 if (!RBI.constrainGenericRegister(Dst.getReg(), *DstRC, *MRI) ||
120 !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, *MRI))
122 const MCInstrDesc &MCID =
MI.getDesc();
124 MI.getOperand(0).setIsEarlyClobber(
true);
129bool AMDGPUInstructionSelector::selectCOPY(
MachineInstr &
I)
const {
132 I.setDesc(TII.get(TargetOpcode::COPY));
134 const MachineOperand &Src =
I.getOperand(1);
135 MachineOperand &Dst =
I.getOperand(0);
139 if (isVCC(DstReg, *MRI)) {
140 if (SrcReg == AMDGPU::SCC) {
141 const TargetRegisterClass *RC
142 = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
145 return RBI.constrainGenericRegister(DstReg, *RC, *MRI);
148 if (!isVCC(SrcReg, *MRI)) {
150 if (!RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), *MRI))
153 const TargetRegisterClass *SrcRC
154 = TRI.getConstrainedRegClassForOperand(Src, *MRI);
156 std::optional<ValueAndVReg> ConstVal =
160 STI.isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
162 .
addImm(ConstVal->Value.getBoolValue() ? -1 : 0);
164 Register MaskedReg = MRI->createVirtualRegister(SrcRC);
171 assert(Subtarget->useRealTrue16Insts());
172 const int64_t NoMods = 0;
173 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_AND_B16_t16_e64), MaskedReg)
179 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CMP_NE_U16_t16_e64), DstReg)
186 bool IsSGPR = TRI.isSGPRClass(SrcRC);
187 unsigned AndOpc = IsSGPR ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32;
194 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
200 if (!MRI->getRegClassOrNull(SrcReg))
201 MRI->setRegClass(SrcReg, SrcRC);
206 const TargetRegisterClass *RC =
207 TRI.getConstrainedRegClassForOperand(Dst, *MRI);
208 if (RC && !RBI.constrainGenericRegister(DstReg, *RC, *MRI))
214 for (
const MachineOperand &MO :
I.operands()) {
215 if (MO.getReg().isPhysical())
218 const TargetRegisterClass *RC =
219 TRI.getConstrainedRegClassForOperand(MO, *MRI);
222 RBI.constrainGenericRegister(MO.getReg(), *RC, *MRI);
227bool AMDGPUInstructionSelector::selectCOPY_SCC_VCC(
MachineInstr &
I)
const {
230 Register VCCReg =
I.getOperand(1).getReg();
234 if (STI.hasScalarCompareEq64()) {
236 STI.isWave64() ? AMDGPU::S_CMP_LG_U64 : AMDGPU::S_CMP_LG_U32;
239 Register DeadDst = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
240 Cmp =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_OR_B64), DeadDst)
248 Register DstReg =
I.getOperand(0).getReg();
252 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
255bool AMDGPUInstructionSelector::selectCOPY_VCC_SCC(
MachineInstr &
I)
const {
259 Register DstReg =
I.getOperand(0).getReg();
260 Register SrcReg =
I.getOperand(1).getReg();
261 std::optional<ValueAndVReg> Arg =
265 const int64_t
Value = Arg->Value.getZExtValue();
267 unsigned Opcode = STI.isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
274 return RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), *MRI);
280 unsigned SelectOpcode =
281 STI.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
290bool AMDGPUInstructionSelector::selectReadAnyLane(
MachineInstr &
I)
const {
291 Register DstReg =
I.getOperand(0).getReg();
292 Register SrcReg =
I.getOperand(1).getReg();
297 auto RFL =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
304bool AMDGPUInstructionSelector::selectPHI(
MachineInstr &
I)
const {
305 const Register DefReg =
I.getOperand(0).getReg();
306 const LLT DefTy = MRI->getType(DefReg);
318 MRI->getRegClassOrRegBank(DefReg);
320 const TargetRegisterClass *DefRC =
329 DefRC = TRI.getRegClassForTypeOnBank(DefTy, RB);
338 for (
unsigned i = 1; i !=
I.getNumOperands(); i += 2) {
339 const Register SrcReg =
I.getOperand(i).getReg();
341 const RegisterBank *RB = MRI->getRegBankOrNull(SrcReg);
343 const LLT SrcTy = MRI->getType(SrcReg);
344 const TargetRegisterClass *SrcRC =
345 TRI.getRegClassForTypeOnBank(SrcTy, *RB);
346 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
351 I.setDesc(TII.get(TargetOpcode::PHI));
352 return RBI.constrainGenericRegister(DefReg, *DefRC, *MRI);
358 unsigned SubIdx)
const {
362 Register DstReg = MRI->createVirtualRegister(&SubRC);
365 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.
getSubReg(), SubIdx);
367 BuildMI(*BB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
393 return Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
395 return Is64 ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32;
397 return Is64 ? AMDGPU::S_XOR_B64 : AMDGPU::S_XOR_B32;
403bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(
MachineInstr &
I)
const {
404 Register DstReg =
I.getOperand(0).getReg();
405 unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI);
407 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
408 if (DstRB->
getID() != AMDGPU::SGPRRegBankID &&
409 DstRB->
getID() != AMDGPU::VCCRegBankID)
412 bool Is64 =
Size > 32 || (DstRB->
getID() == AMDGPU::VCCRegBankID &&
424bool AMDGPUInstructionSelector::selectG_ADD_SUB(
MachineInstr &
I)
const {
427 Register DstReg =
I.getOperand(0).getReg();
429 LLT Ty = MRI->getType(DstReg);
434 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
435 const bool IsSALU = DstRB->
getID() == AMDGPU::SGPRRegBankID;
436 const bool Sub =
I.getOpcode() == TargetOpcode::G_SUB;
440 const unsigned Opc =
Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
443 .
add(
I.getOperand(1))
444 .
add(
I.getOperand(2))
450 if (STI.hasAddNoCarry()) {
451 const unsigned Opc =
Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64;
452 I.setDesc(TII.get(
Opc));
458 const unsigned Opc =
Sub ? AMDGPU::V_SUB_CO_U32_e64 : AMDGPU::V_ADD_CO_U32_e64;
460 Register UnusedCarry = MRI->createVirtualRegister(TRI.getWaveMaskRegClass());
464 .
add(
I.getOperand(1))
465 .
add(
I.getOperand(2))
471 assert(!
Sub &&
"illegal sub should not reach here");
473 const TargetRegisterClass &RC
474 = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass;
475 const TargetRegisterClass &HalfRC
476 = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass;
478 MachineOperand Lo1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub0));
479 MachineOperand Lo2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub0));
480 MachineOperand Hi1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub1));
481 MachineOperand Hi2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub1));
483 Register DstLo = MRI->createVirtualRegister(&HalfRC);
484 Register DstHi = MRI->createVirtualRegister(&HalfRC);
487 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
490 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
495 const TargetRegisterClass *CarryRC = TRI.getWaveMaskRegClass();
496 Register CarryReg = MRI->createVirtualRegister(CarryRC);
497 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_ADD_CO_U32_e64), DstLo)
502 MachineInstr *Addc =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_ADDC_U32_e64), DstHi)
513 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
520 if (!RBI.constrainGenericRegister(DstReg, RC, *MRI))
527bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE(
532 Register Dst0Reg =
I.getOperand(0).getReg();
533 Register Dst1Reg =
I.getOperand(1).getReg();
534 const bool IsAdd =
I.getOpcode() == AMDGPU::G_UADDO ||
535 I.getOpcode() == AMDGPU::G_UADDE;
536 const bool HasCarryIn =
I.getOpcode() == AMDGPU::G_UADDE ||
537 I.getOpcode() == AMDGPU::G_USUBE;
539 if (isVCC(Dst1Reg, *MRI)) {
540 unsigned NoCarryOpc =
541 IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
542 unsigned CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
543 I.setDesc(TII.get(HasCarryIn ? CarryOpc : NoCarryOpc));
549 Register Src0Reg =
I.getOperand(2).getReg();
550 Register Src1Reg =
I.getOperand(3).getReg();
553 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
554 .
addReg(
I.getOperand(4).getReg());
557 unsigned NoCarryOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
558 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
560 auto CarryInst =
BuildMI(*BB, &
I,
DL, TII.get(HasCarryIn ? CarryOpc : NoCarryOpc), Dst0Reg)
561 .
add(
I.getOperand(2))
562 .
add(
I.getOperand(3));
564 if (MRI->use_nodbg_empty(Dst1Reg)) {
565 CarryInst.setOperandDead(3);
567 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), Dst1Reg)
569 if (!MRI->getRegClassOrNull(Dst1Reg))
570 MRI->setRegClass(Dst1Reg, &AMDGPU::SReg_32RegClass);
573 if (!RBI.constrainGenericRegister(Dst0Reg, AMDGPU::SReg_32RegClass, *MRI) ||
574 !RBI.constrainGenericRegister(Src0Reg, AMDGPU::SReg_32RegClass, *MRI) ||
575 !RBI.constrainGenericRegister(Src1Reg, AMDGPU::SReg_32RegClass, *MRI))
579 !RBI.constrainGenericRegister(
I.getOperand(4).getReg(),
580 AMDGPU::SReg_32RegClass, *MRI))
587bool AMDGPUInstructionSelector::selectG_AMDGPU_MAD_64_32(
591 const bool IsUnsigned =
I.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;
592 bool UseNoCarry = Subtarget->hasMadU64U32NoCarry() &&
593 MRI->use_nodbg_empty(
I.getOperand(1).getReg());
596 if (Subtarget->hasMADIntraFwdBug())
597 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_gfx11_e64
598 : AMDGPU::V_MAD_I64_I32_gfx11_e64;
600 Opc = IsUnsigned ? AMDGPU::V_MAD_NC_U64_U32_e64
601 : AMDGPU::V_MAD_NC_I64_I32_e64;
603 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_e64 : AMDGPU::V_MAD_I64_I32_e64;
608 I.setDesc(TII.get(
Opc));
610 I.addImplicitDefUseOperands(*
MF);
611 I.getOperand(0).setIsEarlyClobber(
true);
616bool AMDGPUInstructionSelector::selectG_EXTRACT(
MachineInstr &
I)
const {
618 Register DstReg =
I.getOperand(0).getReg();
619 Register SrcReg =
I.getOperand(1).getReg();
620 LLT DstTy = MRI->getType(DstReg);
621 LLT SrcTy = MRI->getType(SrcReg);
626 unsigned Offset =
I.getOperand(2).getImm();
627 if (
Offset % 32 != 0 || DstSize > 128)
635 const TargetRegisterClass *DstRC =
636 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
637 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
640 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI);
641 const TargetRegisterClass *SrcRC =
642 TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank);
647 SrcRC = TRI.getSubClassWithSubReg(SrcRC,
SubReg);
652 *SrcRC,
I.getOperand(1));
654 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::COPY), DstReg)
661bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(
MachineInstr &
MI)
const {
662 MachineBasicBlock *BB =
MI.getParent();
664 LLT DstTy = MRI->getType(DstReg);
665 LLT SrcTy = MRI->getType(
MI.getOperand(1).getReg());
672 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
674 const TargetRegisterClass *DstRC =
675 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
679 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(DstRC, SrcSize / 8);
680 MachineInstrBuilder MIB =
681 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg);
682 for (
int I = 0,
E =
MI.getNumOperands() - 1;
I !=
E; ++
I) {
683 MachineOperand &Src =
MI.getOperand(
I + 1);
687 const TargetRegisterClass *SrcRC
688 = TRI.getConstrainedRegClassForOperand(Src, *MRI);
689 if (SrcRC && !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, *MRI))
693 if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
696 MI.eraseFromParent();
700bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(
MachineInstr &
MI)
const {
701 MachineBasicBlock *BB =
MI.getParent();
702 const int NumDst =
MI.getNumOperands() - 1;
704 MachineOperand &Src =
MI.getOperand(NumDst);
708 LLT DstTy = MRI->getType(DstReg0);
709 LLT SrcTy = MRI->getType(SrcReg);
714 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI);
716 const TargetRegisterClass *SrcRC =
717 TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank);
718 if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
724 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(SrcRC, DstSize / 8);
725 for (
int I = 0,
E = NumDst;
I !=
E; ++
I) {
726 MachineOperand &Dst =
MI.getOperand(
I);
727 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::COPY), Dst.getReg())
728 .
addReg(SrcReg, 0, SubRegs[
I]);
731 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegs[
I]);
732 if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
735 const TargetRegisterClass *DstRC =
736 TRI.getConstrainedRegClassForOperand(Dst, *MRI);
737 if (DstRC && !RBI.constrainGenericRegister(Dst.getReg(), *DstRC, *MRI))
741 MI.eraseFromParent();
745bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR(
MachineInstr &
MI)
const {
746 assert(
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC ||
747 MI.getOpcode() == AMDGPU::G_BUILD_VECTOR);
751 LLT SrcTy = MRI->getType(Src0);
755 if (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR && SrcSize >= 32) {
756 return selectG_MERGE_VALUES(
MI);
763 (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC &&
767 const RegisterBank *DstBank = RBI.getRegBank(Dst, *MRI, TRI);
768 if (DstBank->
getID() == AMDGPU::AGPRRegBankID)
771 assert(DstBank->
getID() == AMDGPU::SGPRRegBankID ||
772 DstBank->
getID() == AMDGPU::VGPRRegBankID);
773 const bool IsVector = DstBank->
getID() == AMDGPU::VGPRRegBankID;
776 MachineBasicBlock *BB =
MI.getParent();
786 const int64_t K0 = ConstSrc0->Value.getSExtValue();
787 const int64_t K1 = ConstSrc1->Value.getSExtValue();
788 uint32_t Lo16 =
static_cast<uint32_t
>(K0) & 0xffff;
789 uint32_t Hi16 =
static_cast<uint32_t
>(K1) & 0xffff;
790 uint32_t
Imm = Lo16 | (Hi16 << 16);
795 MI.eraseFromParent();
796 return RBI.constrainGenericRegister(Dst, AMDGPU::VGPR_32RegClass, *MRI);
801 MI.eraseFromParent();
802 return RBI.constrainGenericRegister(Dst, AMDGPU::SReg_32RegClass, *MRI);
813 if (Src1Def->
getOpcode() == AMDGPU::G_IMPLICIT_DEF) {
814 MI.setDesc(TII.get(AMDGPU::COPY));
817 IsVector ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
818 return RBI.constrainGenericRegister(Dst, RC, *MRI) &&
819 RBI.constrainGenericRegister(Src0, RC, *MRI);
824 Register TmpReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
825 auto MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_AND_B32_e32), TmpReg)
831 MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_LSHL_OR_B32_e64), Dst)
838 MI.eraseFromParent();
863 unsigned Opc = AMDGPU::S_PACK_LL_B32_B16;
864 if (Shift0 && Shift1) {
865 Opc = AMDGPU::S_PACK_HH_B32_B16;
866 MI.getOperand(1).setReg(ShiftSrc0);
867 MI.getOperand(2).setReg(ShiftSrc1);
869 Opc = AMDGPU::S_PACK_LH_B32_B16;
870 MI.getOperand(2).setReg(ShiftSrc1);
874 if (ConstSrc1 && ConstSrc1->Value == 0) {
876 auto MIB =
BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_LSHR_B32), Dst)
881 MI.eraseFromParent();
884 if (STI.hasSPackHL()) {
885 Opc = AMDGPU::S_PACK_HL_B32_B16;
886 MI.getOperand(1).setReg(ShiftSrc0);
890 MI.setDesc(TII.get(
Opc));
894bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(
MachineInstr &
I)
const {
895 const MachineOperand &MO =
I.getOperand(0);
899 const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, *MRI);
900 if ((!RC && !MRI->getRegBankOrNull(MO.
getReg())) ||
901 (RC && RBI.constrainGenericRegister(MO.
getReg(), *RC, *MRI))) {
902 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
909bool AMDGPUInstructionSelector::selectG_INSERT(
MachineInstr &
I)
const {
912 Register DstReg =
I.getOperand(0).getReg();
913 Register Src0Reg =
I.getOperand(1).getReg();
914 Register Src1Reg =
I.getOperand(2).getReg();
915 LLT Src1Ty = MRI->getType(Src1Reg);
917 unsigned DstSize = MRI->getType(DstReg).getSizeInBits();
920 int64_t
Offset =
I.getOperand(3).getImm();
923 if (
Offset % 32 != 0 || InsSize % 32 != 0)
930 unsigned SubReg = TRI.getSubRegFromChannel(
Offset / 32, InsSize / 32);
931 if (
SubReg == AMDGPU::NoSubRegister)
934 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
935 const TargetRegisterClass *DstRC =
936 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
940 const RegisterBank *Src0Bank = RBI.getRegBank(Src0Reg, *MRI, TRI);
941 const RegisterBank *Src1Bank = RBI.getRegBank(Src1Reg, *MRI, TRI);
942 const TargetRegisterClass *Src0RC =
943 TRI.getRegClassForSizeOnBank(DstSize, *Src0Bank);
944 const TargetRegisterClass *Src1RC =
945 TRI.getRegClassForSizeOnBank(InsSize, *Src1Bank);
949 Src0RC = TRI.getSubClassWithSubReg(Src0RC,
SubReg);
950 if (!Src0RC || !Src1RC)
953 if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
954 !RBI.constrainGenericRegister(Src0Reg, *Src0RC, *MRI) ||
955 !RBI.constrainGenericRegister(Src1Reg, *Src1RC, *MRI))
959 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::INSERT_SUBREG), DstReg)
968bool AMDGPUInstructionSelector::selectG_SBFX_UBFX(
MachineInstr &
MI)
const {
971 Register OffsetReg =
MI.getOperand(2).getReg();
972 Register WidthReg =
MI.getOperand(3).getReg();
974 assert(RBI.getRegBank(DstReg, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID &&
975 "scalar BFX instructions are expanded in regbankselect");
976 assert(MRI->getType(
MI.getOperand(0).getReg()).getSizeInBits() == 32 &&
977 "64-bit vector BFX instructions are expanded in regbankselect");
980 MachineBasicBlock *
MBB =
MI.getParent();
982 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SBFX;
983 unsigned Opc = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
988 MI.eraseFromParent();
992bool AMDGPUInstructionSelector::selectInterpP1F16(
MachineInstr &
MI)
const {
993 if (STI.getLDSBankCount() != 16)
999 if (!RBI.constrainGenericRegister(M0Val, AMDGPU::SReg_32RegClass, *MRI) ||
1000 !RBI.constrainGenericRegister(Dst, AMDGPU::VGPR_32RegClass, *MRI) ||
1001 !RBI.constrainGenericRegister(Src0, AMDGPU::VGPR_32RegClass, *MRI))
1011 Register InterpMov = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1013 MachineBasicBlock *
MBB =
MI.getParent();
1017 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_INTERP_MOV_F32), InterpMov)
1020 .
addImm(
MI.getOperand(3).getImm());
1033 MI.eraseFromParent();
1042bool AMDGPUInstructionSelector::selectWritelane(
MachineInstr &
MI)
const {
1044 if (STI.getConstantBusLimit(AMDGPU::V_WRITELANE_B32) > 1)
1047 MachineBasicBlock *
MBB =
MI.getParent();
1051 Register LaneSelect =
MI.getOperand(3).getReg();
1054 auto MIB =
BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_WRITELANE_B32), VDst);
1056 std::optional<ValueAndVReg> ConstSelect =
1062 MIB.
addImm(ConstSelect->Value.getSExtValue() &
1065 std::optional<ValueAndVReg> ConstVal =
1071 STI.hasInv2PiInlineImm())) {
1072 MIB.
addImm(ConstVal->Value.getSExtValue());
1080 RBI.constrainGenericRegister(LaneSelect, AMDGPU::SReg_32_XM0RegClass, *MRI);
1082 BuildMI(*
MBB, *MIB,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
1090 MI.eraseFromParent();
1096bool AMDGPUInstructionSelector::selectDivScale(
MachineInstr &
MI)
const {
1100 LLT Ty = MRI->getType(Dst0);
1103 Opc = AMDGPU::V_DIV_SCALE_F32_e64;
1105 Opc = AMDGPU::V_DIV_SCALE_F64_e64;
1112 MachineBasicBlock *
MBB =
MI.getParent();
1116 unsigned ChooseDenom =
MI.getOperand(5).getImm();
1118 Register Src0 = ChooseDenom != 0 ? Numer : Denom;
1131 MI.eraseFromParent();
1135bool AMDGPUInstructionSelector::selectG_INTRINSIC(
MachineInstr &
I)
const {
1137 switch (IntrinsicID) {
1138 case Intrinsic::amdgcn_if_break: {
1143 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))
1144 .
add(
I.getOperand(0))
1145 .
add(
I.getOperand(2))
1146 .
add(
I.getOperand(3));
1148 Register DstReg =
I.getOperand(0).getReg();
1149 Register Src0Reg =
I.getOperand(2).getReg();
1150 Register Src1Reg =
I.getOperand(3).getReg();
1152 I.eraseFromParent();
1155 MRI->setRegClass(
Reg, TRI.getWaveMaskRegClass());
1159 case Intrinsic::amdgcn_interp_p1_f16:
1160 return selectInterpP1F16(
I);
1161 case Intrinsic::amdgcn_wqm:
1162 return constrainCopyLikeIntrin(
I, AMDGPU::WQM);
1163 case Intrinsic::amdgcn_softwqm:
1164 return constrainCopyLikeIntrin(
I, AMDGPU::SOFT_WQM);
1165 case Intrinsic::amdgcn_strict_wwm:
1166 case Intrinsic::amdgcn_wwm:
1167 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WWM);
1168 case Intrinsic::amdgcn_strict_wqm:
1169 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WQM);
1170 case Intrinsic::amdgcn_writelane:
1171 return selectWritelane(
I);
1172 case Intrinsic::amdgcn_div_scale:
1173 return selectDivScale(
I);
1174 case Intrinsic::amdgcn_icmp:
1175 case Intrinsic::amdgcn_fcmp:
1178 return selectIntrinsicCmp(
I);
1179 case Intrinsic::amdgcn_ballot:
1180 return selectBallot(
I);
1181 case Intrinsic::amdgcn_reloc_constant:
1182 return selectRelocConstant(
I);
1183 case Intrinsic::amdgcn_groupstaticsize:
1184 return selectGroupStaticSize(
I);
1185 case Intrinsic::returnaddress:
1186 return selectReturnAddress(
I);
1187 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
1188 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
1189 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
1190 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
1191 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
1192 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
1193 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
1194 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
1195 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
1196 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
1197 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
1198 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
1199 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
1200 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
1201 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:
1202 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:
1203 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:
1204 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:
1205 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:
1206 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:
1207 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:
1208 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:
1209 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:
1210 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:
1211 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:
1212 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
1213 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
1214 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8:
1215 return selectSMFMACIntrin(
I);
1216 case Intrinsic::amdgcn_permlane16_swap:
1217 case Intrinsic::amdgcn_permlane32_swap:
1218 return selectPermlaneSwapIntrin(
I, IntrinsicID);
1229 if (
Size == 16 && !ST.has16BitInsts())
1232 const auto Select = [&](
unsigned S16Opc,
unsigned TrueS16Opc,
1233 unsigned FakeS16Opc,
unsigned S32Opc,
1236 return ST.hasTrue16BitInsts()
1237 ? ST.useRealTrue16Insts() ? TrueS16Opc : FakeS16Opc
1248 return Select(AMDGPU::V_CMP_NE_U16_e64, AMDGPU::V_CMP_NE_U16_t16_e64,
1249 AMDGPU::V_CMP_NE_U16_fake16_e64, AMDGPU::V_CMP_NE_U32_e64,
1250 AMDGPU::V_CMP_NE_U64_e64);
1252 return Select(AMDGPU::V_CMP_EQ_U16_e64, AMDGPU::V_CMP_EQ_U16_t16_e64,
1253 AMDGPU::V_CMP_EQ_U16_fake16_e64, AMDGPU::V_CMP_EQ_U32_e64,
1254 AMDGPU::V_CMP_EQ_U64_e64);
1256 return Select(AMDGPU::V_CMP_GT_I16_e64, AMDGPU::V_CMP_GT_I16_t16_e64,
1257 AMDGPU::V_CMP_GT_I16_fake16_e64, AMDGPU::V_CMP_GT_I32_e64,
1258 AMDGPU::V_CMP_GT_I64_e64);
1260 return Select(AMDGPU::V_CMP_GE_I16_e64, AMDGPU::V_CMP_GE_I16_t16_e64,
1261 AMDGPU::V_CMP_GE_I16_fake16_e64, AMDGPU::V_CMP_GE_I32_e64,
1262 AMDGPU::V_CMP_GE_I64_e64);
1264 return Select(AMDGPU::V_CMP_LT_I16_e64, AMDGPU::V_CMP_LT_I16_t16_e64,
1265 AMDGPU::V_CMP_LT_I16_fake16_e64, AMDGPU::V_CMP_LT_I32_e64,
1266 AMDGPU::V_CMP_LT_I64_e64);
1268 return Select(AMDGPU::V_CMP_LE_I16_e64, AMDGPU::V_CMP_LE_I16_t16_e64,
1269 AMDGPU::V_CMP_LE_I16_fake16_e64, AMDGPU::V_CMP_LE_I32_e64,
1270 AMDGPU::V_CMP_LE_I64_e64);
1272 return Select(AMDGPU::V_CMP_GT_U16_e64, AMDGPU::V_CMP_GT_U16_t16_e64,
1273 AMDGPU::V_CMP_GT_U16_fake16_e64, AMDGPU::V_CMP_GT_U32_e64,
1274 AMDGPU::V_CMP_GT_U64_e64);
1276 return Select(AMDGPU::V_CMP_GE_U16_e64, AMDGPU::V_CMP_GE_U16_t16_e64,
1277 AMDGPU::V_CMP_GE_U16_fake16_e64, AMDGPU::V_CMP_GE_U32_e64,
1278 AMDGPU::V_CMP_GE_U64_e64);
1280 return Select(AMDGPU::V_CMP_LT_U16_e64, AMDGPU::V_CMP_LT_U16_t16_e64,
1281 AMDGPU::V_CMP_LT_U16_fake16_e64, AMDGPU::V_CMP_LT_U32_e64,
1282 AMDGPU::V_CMP_LT_U64_e64);
1284 return Select(AMDGPU::V_CMP_LE_U16_e64, AMDGPU::V_CMP_LE_U16_t16_e64,
1285 AMDGPU::V_CMP_LE_U16_fake16_e64, AMDGPU::V_CMP_LE_U32_e64,
1286 AMDGPU::V_CMP_LE_U64_e64);
1289 return Select(AMDGPU::V_CMP_EQ_F16_e64, AMDGPU::V_CMP_EQ_F16_t16_e64,
1290 AMDGPU::V_CMP_EQ_F16_fake16_e64, AMDGPU::V_CMP_EQ_F32_e64,
1291 AMDGPU::V_CMP_EQ_F64_e64);
1293 return Select(AMDGPU::V_CMP_GT_F16_e64, AMDGPU::V_CMP_GT_F16_t16_e64,
1294 AMDGPU::V_CMP_GT_F16_fake16_e64, AMDGPU::V_CMP_GT_F32_e64,
1295 AMDGPU::V_CMP_GT_F64_e64);
1297 return Select(AMDGPU::V_CMP_GE_F16_e64, AMDGPU::V_CMP_GE_F16_t16_e64,
1298 AMDGPU::V_CMP_GE_F16_fake16_e64, AMDGPU::V_CMP_GE_F32_e64,
1299 AMDGPU::V_CMP_GE_F64_e64);
1301 return Select(AMDGPU::V_CMP_LT_F16_e64, AMDGPU::V_CMP_LT_F16_t16_e64,
1302 AMDGPU::V_CMP_LT_F16_fake16_e64, AMDGPU::V_CMP_LT_F32_e64,
1303 AMDGPU::V_CMP_LT_F64_e64);
1305 return Select(AMDGPU::V_CMP_LE_F16_e64, AMDGPU::V_CMP_LE_F16_t16_e64,
1306 AMDGPU::V_CMP_LE_F16_fake16_e64, AMDGPU::V_CMP_LE_F32_e64,
1307 AMDGPU::V_CMP_LE_F64_e64);
1309 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1310 AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
1311 AMDGPU::V_CMP_NEQ_F64_e64);
1313 return Select(AMDGPU::V_CMP_O_F16_e64, AMDGPU::V_CMP_O_F16_t16_e64,
1314 AMDGPU::V_CMP_O_F16_fake16_e64, AMDGPU::V_CMP_O_F32_e64,
1315 AMDGPU::V_CMP_O_F64_e64);
1317 return Select(AMDGPU::V_CMP_U_F16_e64, AMDGPU::V_CMP_U_F16_t16_e64,
1318 AMDGPU::V_CMP_U_F16_fake16_e64, AMDGPU::V_CMP_U_F32_e64,
1319 AMDGPU::V_CMP_U_F64_e64);
1321 return Select(AMDGPU::V_CMP_NLG_F16_e64, AMDGPU::V_CMP_NLG_F16_t16_e64,
1322 AMDGPU::V_CMP_NLG_F16_fake16_e64, AMDGPU::V_CMP_NLG_F32_e64,
1323 AMDGPU::V_CMP_NLG_F64_e64);
1325 return Select(AMDGPU::V_CMP_NLE_F16_e64, AMDGPU::V_CMP_NLE_F16_t16_e64,
1326 AMDGPU::V_CMP_NLE_F16_fake16_e64, AMDGPU::V_CMP_NLE_F32_e64,
1327 AMDGPU::V_CMP_NLE_F64_e64);
1329 return Select(AMDGPU::V_CMP_NLT_F16_e64, AMDGPU::V_CMP_NLT_F16_t16_e64,
1330 AMDGPU::V_CMP_NLT_F16_fake16_e64, AMDGPU::V_CMP_NLT_F32_e64,
1331 AMDGPU::V_CMP_NLT_F64_e64);
1333 return Select(AMDGPU::V_CMP_NGE_F16_e64, AMDGPU::V_CMP_NGE_F16_t16_e64,
1334 AMDGPU::V_CMP_NGE_F16_fake16_e64, AMDGPU::V_CMP_NGE_F32_e64,
1335 AMDGPU::V_CMP_NGE_F64_e64);
1337 return Select(AMDGPU::V_CMP_NGT_F16_e64, AMDGPU::V_CMP_NGT_F16_t16_e64,
1338 AMDGPU::V_CMP_NGT_F16_fake16_e64, AMDGPU::V_CMP_NGT_F32_e64,
1339 AMDGPU::V_CMP_NGT_F64_e64);
1341 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1342 AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
1343 AMDGPU::V_CMP_NEQ_F64_e64);
1345 return Select(AMDGPU::V_CMP_TRU_F16_e64, AMDGPU::V_CMP_TRU_F16_t16_e64,
1346 AMDGPU::V_CMP_TRU_F16_fake16_e64, AMDGPU::V_CMP_TRU_F32_e64,
1347 AMDGPU::V_CMP_TRU_F64_e64);
1349 return Select(AMDGPU::V_CMP_F_F16_e64, AMDGPU::V_CMP_F_F16_t16_e64,
1350 AMDGPU::V_CMP_F_F16_fake16_e64, AMDGPU::V_CMP_F_F32_e64,
1351 AMDGPU::V_CMP_F_F64_e64);
1356 unsigned Size)
const {
1358 if (!STI.hasScalarCompareEq64())
1363 return AMDGPU::S_CMP_LG_U64;
1365 return AMDGPU::S_CMP_EQ_U64;
1374 return AMDGPU::S_CMP_LG_U32;
1376 return AMDGPU::S_CMP_EQ_U32;
1378 return AMDGPU::S_CMP_GT_I32;
1380 return AMDGPU::S_CMP_GE_I32;
1382 return AMDGPU::S_CMP_LT_I32;
1384 return AMDGPU::S_CMP_LE_I32;
1386 return AMDGPU::S_CMP_GT_U32;
1388 return AMDGPU::S_CMP_GE_U32;
1390 return AMDGPU::S_CMP_LT_U32;
1392 return AMDGPU::S_CMP_LE_U32;
1394 return AMDGPU::S_CMP_EQ_F32;
1396 return AMDGPU::S_CMP_GT_F32;
1398 return AMDGPU::S_CMP_GE_F32;
1400 return AMDGPU::S_CMP_LT_F32;
1402 return AMDGPU::S_CMP_LE_F32;
1404 return AMDGPU::S_CMP_LG_F32;
1406 return AMDGPU::S_CMP_O_F32;
1408 return AMDGPU::S_CMP_U_F32;
1410 return AMDGPU::S_CMP_NLG_F32;
1412 return AMDGPU::S_CMP_NLE_F32;
1414 return AMDGPU::S_CMP_NLT_F32;
1416 return AMDGPU::S_CMP_NGE_F32;
1418 return AMDGPU::S_CMP_NGT_F32;
1420 return AMDGPU::S_CMP_NEQ_F32;
1427 if (!STI.hasSALUFloatInsts())
1432 return AMDGPU::S_CMP_EQ_F16;
1434 return AMDGPU::S_CMP_GT_F16;
1436 return AMDGPU::S_CMP_GE_F16;
1438 return AMDGPU::S_CMP_LT_F16;
1440 return AMDGPU::S_CMP_LE_F16;
1442 return AMDGPU::S_CMP_LG_F16;
1444 return AMDGPU::S_CMP_O_F16;
1446 return AMDGPU::S_CMP_U_F16;
1448 return AMDGPU::S_CMP_NLG_F16;
1450 return AMDGPU::S_CMP_NLE_F16;
1452 return AMDGPU::S_CMP_NLT_F16;
1454 return AMDGPU::S_CMP_NGE_F16;
1456 return AMDGPU::S_CMP_NGT_F16;
1458 return AMDGPU::S_CMP_NEQ_F16;
1467bool AMDGPUInstructionSelector::selectG_ICMP_or_FCMP(
MachineInstr &
I)
const {
1472 Register SrcReg =
I.getOperand(2).getReg();
1473 unsigned Size = RBI.getSizeInBits(SrcReg, *MRI, TRI);
1477 Register CCReg =
I.getOperand(0).getReg();
1478 if (!isVCC(CCReg, *MRI)) {
1479 int Opcode = getS_CMPOpcode(Pred,
Size);
1482 MachineInstr *ICmp =
BuildMI(*BB, &
I,
DL, TII.get(Opcode))
1483 .
add(
I.getOperand(2))
1484 .
add(
I.getOperand(3));
1485 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CCReg)
1489 RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, *MRI);
1490 I.eraseFromParent();
1494 if (
I.getOpcode() == AMDGPU::G_FCMP)
1501 MachineInstrBuilder ICmp;
1504 ICmp =
BuildMI(*BB, &
I,
DL, TII.get(Opcode),
I.getOperand(0).getReg())
1506 .
add(
I.getOperand(2))
1508 .
add(
I.getOperand(3))
1511 ICmp =
BuildMI(*BB, &
I,
DL, TII.get(Opcode),
I.getOperand(0).getReg())
1512 .
add(
I.getOperand(2))
1513 .
add(
I.getOperand(3));
1517 *TRI.getBoolRC(), *MRI);
1519 I.eraseFromParent();
1523bool AMDGPUInstructionSelector::selectIntrinsicCmp(
MachineInstr &
I)
const {
1524 Register Dst =
I.getOperand(0).getReg();
1525 if (isVCC(Dst, *MRI))
1528 LLT DstTy = MRI->getType(Dst);
1534 Register SrcReg =
I.getOperand(2).getReg();
1535 unsigned Size = RBI.getSizeInBits(SrcReg, *MRI, TRI);
1543 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), Dst);
1544 I.eraseFromParent();
1545 return RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI);
1552 MachineInstrBuilder SelectedMI;
1553 MachineOperand &
LHS =
I.getOperand(2);
1554 MachineOperand &
RHS =
I.getOperand(3);
1555 auto [Src0, Src0Mods] = selectVOP3ModsImpl(
LHS.getReg());
1556 auto [Src1, Src1Mods] = selectVOP3ModsImpl(
RHS.getReg());
1558 copyToVGPRIfSrcFolded(Src0, Src0Mods,
LHS, &
I,
true);
1560 copyToVGPRIfSrcFolded(Src1, Src1Mods,
RHS, &
I,
true);
1561 SelectedMI =
BuildMI(*BB, &
I,
DL, TII.get(Opcode), Dst);
1563 SelectedMI.
addImm(Src0Mods);
1564 SelectedMI.
addReg(Src0Reg);
1566 SelectedMI.
addImm(Src1Mods);
1567 SelectedMI.
addReg(Src1Reg);
1573 RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI);
1577 I.eraseFromParent();
1588 if (
MI->getParent() !=
MBB)
1592 if (
MI->getOpcode() == AMDGPU::COPY) {
1593 auto DstRB =
MRI.getRegBankOrNull(
MI->getOperand(0).getReg());
1594 auto SrcRB =
MRI.getRegBankOrNull(
MI->getOperand(1).getReg());
1595 if (DstRB && SrcRB && DstRB->
getID() == AMDGPU::VCCRegBankID &&
1596 SrcRB->getID() == AMDGPU::SGPRRegBankID)
1613bool AMDGPUInstructionSelector::selectBallot(
MachineInstr &
I)
const {
1616 Register DstReg =
I.getOperand(0).getReg();
1617 Register SrcReg =
I.getOperand(2).getReg();
1618 const unsigned BallotSize = MRI->getType(DstReg).getSizeInBits();
1619 const unsigned WaveSize = STI.getWavefrontSize();
1623 if (BallotSize != WaveSize && (BallotSize != 64 || WaveSize != 32))
1626 std::optional<ValueAndVReg> Arg =
1631 if (BallotSize != WaveSize) {
1632 Dst = MRI->createVirtualRegister(TRI.getBoolRC());
1636 const int64_t
Value = Arg->Value.getZExtValue();
1639 unsigned Opcode = WaveSize == 64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
1646 if (!RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI))
1652 if (!RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI))
1656 unsigned AndOpc = WaveSize == 64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
1667 if (BallotSize != WaveSize) {
1668 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1670 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
1677 I.eraseFromParent();
1681bool AMDGPUInstructionSelector::selectRelocConstant(
MachineInstr &
I)
const {
1682 Register DstReg =
I.getOperand(0).getReg();
1683 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
1684 const TargetRegisterClass *DstRC = TRI.getRegClassForSizeOnBank(32, *DstBank);
1685 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
1688 const bool IsVALU = DstBank->
getID() == AMDGPU::VGPRRegBankID;
1690 Module *
M =
MF->getFunction().getParent();
1691 const MDNode *
Metadata =
I.getOperand(2).getMetadata();
1698 TII.get(IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32), DstReg)
1701 I.eraseFromParent();
1705bool AMDGPUInstructionSelector::selectGroupStaticSize(
MachineInstr &
I)
const {
1708 Register DstReg =
I.getOperand(0).getReg();
1709 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
1710 unsigned Mov = DstRB->
getID() == AMDGPU::SGPRRegBankID ?
1711 AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1719 const SIMachineFunctionInfo *MFI =
MF->getInfo<SIMachineFunctionInfo>();
1722 Module *
M =
MF->getFunction().getParent();
1723 const GlobalValue *GV =
1728 I.eraseFromParent();
1732bool AMDGPUInstructionSelector::selectReturnAddress(
MachineInstr &
I)
const {
1737 MachineOperand &Dst =
I.getOperand(0);
1739 unsigned Depth =
I.getOperand(2).getImm();
1741 const TargetRegisterClass *RC
1742 = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
1744 !RBI.constrainGenericRegister(DstReg, *RC, *MRI))
1749 MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction()) {
1752 I.eraseFromParent();
1756 MachineFrameInfo &MFI =
MF.getFrameInfo();
1761 Register ReturnAddrReg = TRI.getReturnAddressReg(
MF);
1763 AMDGPU::SReg_64RegClass,
DL);
1766 I.eraseFromParent();
1770bool AMDGPUInstructionSelector::selectEndCfIntrinsic(
MachineInstr &
MI)
const {
1773 MachineBasicBlock *BB =
MI.getParent();
1774 BuildMI(*BB, &
MI,
MI.getDebugLoc(), TII.get(AMDGPU::SI_END_CF))
1775 .
add(
MI.getOperand(1));
1778 MI.eraseFromParent();
1780 if (!MRI->getRegClassOrNull(
Reg))
1781 MRI->setRegClass(
Reg, TRI.getWaveMaskRegClass());
1785bool AMDGPUInstructionSelector::selectDSOrderedIntrinsic(
1787 MachineBasicBlock *
MBB =
MI.getParent();
1791 unsigned IndexOperand =
MI.getOperand(7).getImm();
1792 bool WaveRelease =
MI.getOperand(8).getImm() != 0;
1793 bool WaveDone =
MI.getOperand(9).getImm() != 0;
1795 if (WaveDone && !WaveRelease) {
1799 Fn,
"ds_ordered_count: wave_done requires wave_release",
DL));
1802 unsigned OrderedCountIndex = IndexOperand & 0x3f;
1803 IndexOperand &= ~0x3f;
1804 unsigned CountDw = 0;
1807 CountDw = (IndexOperand >> 24) & 0xf;
1808 IndexOperand &= ~(0xf << 24);
1810 if (CountDw < 1 || CountDw > 4) {
1813 Fn,
"ds_ordered_count: dword count must be between 1 and 4",
DL));
1821 Fn,
"ds_ordered_count: bad index operand",
DL));
1824 unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1;
1827 unsigned Offset0 = OrderedCountIndex << 2;
1828 unsigned Offset1 = WaveRelease | (WaveDone << 1) | (Instruction << 4);
1831 Offset1 |= (CountDw - 1) << 6;
1834 Offset1 |= ShaderType << 2;
1836 unsigned Offset = Offset0 | (Offset1 << 8);
1844 MachineInstrBuilder
DS =
1845 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::DS_ORDERED_COUNT), DstReg)
1850 if (!RBI.constrainGenericRegister(M0Val, AMDGPU::SReg_32RegClass, *MRI))
1854 MI.eraseFromParent();
1860 case Intrinsic::amdgcn_ds_gws_init:
1861 return AMDGPU::DS_GWS_INIT;
1862 case Intrinsic::amdgcn_ds_gws_barrier:
1863 return AMDGPU::DS_GWS_BARRIER;
1864 case Intrinsic::amdgcn_ds_gws_sema_v:
1865 return AMDGPU::DS_GWS_SEMA_V;
1866 case Intrinsic::amdgcn_ds_gws_sema_br:
1867 return AMDGPU::DS_GWS_SEMA_BR;
1868 case Intrinsic::amdgcn_ds_gws_sema_p:
1869 return AMDGPU::DS_GWS_SEMA_P;
1870 case Intrinsic::amdgcn_ds_gws_sema_release_all:
1871 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
1877bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(
MachineInstr &
MI,
1879 if (!STI.hasGWS() || (IID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
1880 !STI.hasGWSSemaReleaseAll()))
1884 const bool HasVSrc =
MI.getNumOperands() == 3;
1885 assert(HasVSrc ||
MI.getNumOperands() == 2);
1887 Register BaseOffset =
MI.getOperand(HasVSrc ? 2 : 1).getReg();
1888 const RegisterBank *OffsetRB = RBI.getRegBank(BaseOffset, *MRI, TRI);
1889 if (OffsetRB->
getID() != AMDGPU::SGPRRegBankID)
1895 MachineBasicBlock *
MBB =
MI.getParent();
1898 MachineInstr *Readfirstlane =
nullptr;
1903 if (OffsetDef->
getOpcode() == AMDGPU::V_READFIRSTLANE_B32) {
1904 Readfirstlane = OffsetDef;
1909 if (OffsetDef->
getOpcode() == AMDGPU::G_CONSTANT) {
1919 std::tie(BaseOffset, ImmOffset) =
1922 if (Readfirstlane) {
1925 if (!RBI.constrainGenericRegister(BaseOffset, AMDGPU::VGPR_32RegClass, *MRI))
1931 if (!RBI.constrainGenericRegister(BaseOffset,
1932 AMDGPU::SReg_32RegClass, *MRI))
1936 Register M0Base = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1951 const MCInstrDesc &InstrDesc = TII.get(
Opc);
1956 int Data0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
1957 const TargetRegisterClass *DataRC = TII.getRegClass(InstrDesc, Data0Idx);
1958 const TargetRegisterClass *SubRC =
1959 TRI.getSubRegisterClass(DataRC, AMDGPU::sub0);
1963 if (!RBI.constrainGenericRegister(VSrc, *DataRC, *MRI))
1973 Register DataReg = MRI->createVirtualRegister(DataRC);
1974 if (!RBI.constrainGenericRegister(VSrc, *SubRC, *MRI))
1977 Register UndefReg = MRI->createVirtualRegister(SubRC);
1996 MI.eraseFromParent();
2000bool AMDGPUInstructionSelector::selectDSAppendConsume(
MachineInstr &
MI,
2001 bool IsAppend)
const {
2002 Register PtrBase =
MI.getOperand(2).getReg();
2003 LLT PtrTy = MRI->getType(PtrBase);
2007 std::tie(PtrBase,
Offset) = selectDS1Addr1OffsetImpl(
MI.getOperand(2));
2010 if (!isDSOffsetLegal(PtrBase,
Offset)) {
2011 PtrBase =
MI.getOperand(2).getReg();
2015 MachineBasicBlock *
MBB =
MI.getParent();
2017 const unsigned Opc = IsAppend ? AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
2021 if (!RBI.constrainGenericRegister(PtrBase, AMDGPU::SReg_32RegClass, *MRI))
2028 MI.eraseFromParent();
2032bool AMDGPUInstructionSelector::selectInitWholeWave(
MachineInstr &
MI)
const {
2033 MachineFunction *
MF =
MI.getMF();
2034 SIMachineFunctionInfo *MFInfo =
MF->getInfo<SIMachineFunctionInfo>();
2045 TFE = TexFailCtrl & 0x1;
2047 LWE = TexFailCtrl & 0x2;
2050 return TexFailCtrl == 0;
2053bool AMDGPUInstructionSelector::selectImageIntrinsic(
2055 MachineBasicBlock *
MBB =
MI.getParent();
2061 Register ResultDef =
MI.getOperand(0).getReg();
2062 if (MRI->use_nodbg_empty(ResultDef))
2066 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
2074 const unsigned ArgOffset =
MI.getNumExplicitDefs() + 1;
2076 Register VDataIn = AMDGPU::NoRegister;
2077 Register VDataOut = AMDGPU::NoRegister;
2079 int NumVDataDwords = -1;
2080 bool IsD16 =
MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16 ||
2081 MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16;
2087 Unorm =
MI.getOperand(ArgOffset + Intr->
UnormIndex).getImm() != 0;
2091 bool IsTexFail =
false;
2093 TFE, LWE, IsTexFail))
2096 const int Flags =
MI.getOperand(ArgOffset + Intr->
NumArgs).getImm();
2097 const bool IsA16 = (
Flags & 1) != 0;
2098 const bool IsG16 = (
Flags & 2) != 0;
2101 if (IsA16 && !STI.hasG16() && !IsG16)
2105 unsigned DMaskLanes = 0;
2107 if (BaseOpcode->
Atomic) {
2109 VDataOut =
MI.getOperand(0).getReg();
2110 VDataIn =
MI.getOperand(2).getReg();
2111 LLT Ty = MRI->getType(VDataIn);
2114 const bool Is64Bit = BaseOpcode->
AtomicX2 ?
2119 assert(
MI.getOperand(3).getReg() == AMDGPU::NoRegister);
2121 DMask = Is64Bit ? 0xf : 0x3;
2122 NumVDataDwords = Is64Bit ? 4 : 2;
2124 DMask = Is64Bit ? 0x3 : 0x1;
2125 NumVDataDwords = Is64Bit ? 2 : 1;
2128 DMask =
MI.getOperand(ArgOffset + Intr->
DMaskIndex).getImm();
2131 if (BaseOpcode->
Store) {
2132 VDataIn =
MI.getOperand(1).getReg();
2133 VDataTy = MRI->getType(VDataIn);
2138 VDataOut =
MI.getOperand(0).getReg();
2139 VDataTy = MRI->getType(VDataOut);
2140 NumVDataDwords = DMaskLanes;
2142 if (IsD16 && !STI.hasUnpackedD16VMem())
2143 NumVDataDwords = (DMaskLanes + 1) / 2;
2148 if (Subtarget->hasG16() && IsG16) {
2149 const AMDGPU::MIMGG16MappingInfo *G16MappingInfo =
2152 IntrOpcode = G16MappingInfo->
G16;
2156 assert((!IsTexFail || DMaskLanes >= 1) &&
"should have legalized this");
2166 int NumVAddrRegs = 0;
2167 int NumVAddrDwords = 0;
2170 MachineOperand &AddrOp =
MI.getOperand(ArgOffset +
I);
2171 if (!AddrOp.
isReg())
2179 NumVAddrDwords += (MRI->getType(Addr).getSizeInBits() + 31) / 32;
2186 NumVAddrRegs != 1 &&
2187 (STI.hasPartialNSAEncoding() ? NumVAddrDwords >= NumVAddrRegs
2188 : NumVAddrDwords == NumVAddrRegs);
2189 if (UseNSA && !STI.hasFeature(AMDGPU::FeatureNSAEncoding)) {
2200 NumVDataDwords, NumVAddrDwords);
2201 }
else if (IsGFX11Plus) {
2203 UseNSA ? AMDGPU::MIMGEncGfx11NSA
2204 : AMDGPU::MIMGEncGfx11Default,
2205 NumVDataDwords, NumVAddrDwords);
2206 }
else if (IsGFX10Plus) {
2208 UseNSA ? AMDGPU::MIMGEncGfx10NSA
2209 : AMDGPU::MIMGEncGfx10Default,
2210 NumVDataDwords, NumVAddrDwords);
2212 if (Subtarget->hasGFX90AInsts()) {
2214 NumVDataDwords, NumVAddrDwords);
2218 <<
"requested image instruction is not supported on this GPU\n");
2225 NumVDataDwords, NumVAddrDwords);
2228 NumVDataDwords, NumVAddrDwords);
2238 const bool Is64 = MRI->getType(VDataOut).getSizeInBits() == 64;
2240 Register TmpReg = MRI->createVirtualRegister(
2241 Is64 ? &AMDGPU::VReg_128RegClass : &AMDGPU::VReg_64RegClass);
2242 unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
2245 if (!MRI->use_empty(VDataOut)) {
2258 for (
int I = 0;
I != NumVAddrRegs; ++
I) {
2259 MachineOperand &SrcOp =
MI.getOperand(ArgOffset + Intr->
VAddrStart +
I);
2260 if (SrcOp.
isReg()) {
2279 STI.hasFeature(AMDGPU::FeatureR128A16) ? -1 : 0);
2281 MIB.
addImm(IsA16 ? -1 : 0);
2283 if (!Subtarget->hasGFX90AInsts()) {
2295 MIB.
addImm(IsD16 ? -1 : 0);
2297 MI.eraseFromParent();
2299 TII.enforceOperandRCAlignment(*MIB, AMDGPU::OpName::vaddr);
2305bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic(
2311 MachineBasicBlock *
MBB =
MI.getParent();
2316 unsigned Offset =
MI.getOperand(6).getImm();
2320 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2321 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
2322 Opc = AMDGPU::DS_BVH_STACK_RTN_B32;
2324 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
2325 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP1_RTN_B32;
2327 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
2328 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP2_RTN_B64;
2340 MI.eraseFromParent();
2344bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
2347 switch (IntrinsicID) {
2348 case Intrinsic::amdgcn_end_cf:
2349 return selectEndCfIntrinsic(
I);
2350 case Intrinsic::amdgcn_ds_ordered_add:
2351 case Intrinsic::amdgcn_ds_ordered_swap:
2352 return selectDSOrderedIntrinsic(
I, IntrinsicID);
2353 case Intrinsic::amdgcn_ds_gws_init:
2354 case Intrinsic::amdgcn_ds_gws_barrier:
2355 case Intrinsic::amdgcn_ds_gws_sema_v:
2356 case Intrinsic::amdgcn_ds_gws_sema_br:
2357 case Intrinsic::amdgcn_ds_gws_sema_p:
2358 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2359 return selectDSGWSIntrinsic(
I, IntrinsicID);
2360 case Intrinsic::amdgcn_ds_append:
2361 return selectDSAppendConsume(
I,
true);
2362 case Intrinsic::amdgcn_ds_consume:
2363 return selectDSAppendConsume(
I,
false);
2364 case Intrinsic::amdgcn_init_whole_wave:
2365 return selectInitWholeWave(
I);
2366 case Intrinsic::amdgcn_raw_buffer_load_lds:
2367 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
2368 case Intrinsic::amdgcn_struct_buffer_load_lds:
2369 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds:
2370 return selectBufferLoadLds(
I);
2375 case Intrinsic::amdgcn_load_to_lds:
2376 case Intrinsic::amdgcn_global_load_lds:
2377 return selectGlobalLoadLds(
I);
2378 case Intrinsic::amdgcn_exp_compr:
2379 if (!STI.hasCompressedExport()) {
2381 F.getContext().diagnose(
2382 DiagnosticInfoUnsupported(
F,
"intrinsic not supported on subtarget",
2387 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2388 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
2389 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
2390 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
2391 return selectDSBvhStackIntrinsic(
I);
2392 case Intrinsic::amdgcn_s_barrier_init:
2393 case Intrinsic::amdgcn_s_barrier_signal_var:
2394 return selectNamedBarrierInit(
I, IntrinsicID);
2395 case Intrinsic::amdgcn_s_barrier_join:
2396 case Intrinsic::amdgcn_s_get_named_barrier_state:
2397 return selectNamedBarrierInst(
I, IntrinsicID);
2398 case Intrinsic::amdgcn_s_get_barrier_state:
2399 return selectSGetBarrierState(
I, IntrinsicID);
2400 case Intrinsic::amdgcn_s_barrier_signal_isfirst:
2401 return selectSBarrierSignalIsfirst(
I, IntrinsicID);
2406bool AMDGPUInstructionSelector::selectG_SELECT(
MachineInstr &
I)
const {
2413 Register DstReg =
I.getOperand(0).getReg();
2414 unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI);
2416 const MachineOperand &CCOp =
I.getOperand(1);
2418 if (!isVCC(CCReg, *MRI)) {
2419 unsigned SelectOpcode =
Size == 64 ? AMDGPU::S_CSELECT_B64 :
2420 AMDGPU::S_CSELECT_B32;
2421 MachineInstr *CopySCC =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
2427 if (!MRI->getRegClassOrNull(CCReg))
2428 MRI->setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, *MRI));
2430 .
add(
I.getOperand(2))
2431 .
add(
I.getOperand(3));
2436 I.eraseFromParent();
2445 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
2447 .
add(
I.getOperand(3))
2449 .
add(
I.getOperand(2))
2450 .
add(
I.getOperand(1));
2453 I.eraseFromParent();
2457bool AMDGPUInstructionSelector::selectG_TRUNC(
MachineInstr &
I)
const {
2458 Register DstReg =
I.getOperand(0).getReg();
2459 Register SrcReg =
I.getOperand(1).getReg();
2460 const LLT DstTy = MRI->getType(DstReg);
2461 const LLT SrcTy = MRI->getType(SrcReg);
2464 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
2465 const RegisterBank *DstRB;
2471 DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
2476 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2481 const TargetRegisterClass *SrcRC =
2482 TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB);
2483 const TargetRegisterClass *DstRC =
2484 TRI.getRegClassForSizeOnBank(DstSize, *DstRB);
2485 if (!SrcRC || !DstRC)
2488 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
2489 !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI)) {
2494 if (DstRC == &AMDGPU::VGPR_16RegClass && SrcSize == 32) {
2495 assert(STI.useRealTrue16Insts());
2499 .
addReg(SrcReg, 0, AMDGPU::lo16);
2500 I.eraseFromParent();
2508 Register LoReg = MRI->createVirtualRegister(DstRC);
2509 Register HiReg = MRI->createVirtualRegister(DstRC);
2511 .
addReg(SrcReg, 0, AMDGPU::sub0);
2513 .
addReg(SrcReg, 0, AMDGPU::sub1);
2515 if (IsVALU && STI.hasSDWA()) {
2518 MachineInstr *MovSDWA =
2519 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg)
2529 Register TmpReg0 = MRI->createVirtualRegister(DstRC);
2530 Register TmpReg1 = MRI->createVirtualRegister(DstRC);
2531 Register ImmReg = MRI->createVirtualRegister(DstRC);
2533 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), TmpReg0)
2543 unsigned MovOpc = IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
2544 unsigned AndOpc = IsVALU ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
2545 unsigned OrOpc = IsVALU ? AMDGPU::V_OR_B32_e64 : AMDGPU::S_OR_B32;
2557 And.setOperandDead(3);
2558 Or.setOperandDead(3);
2562 I.eraseFromParent();
2570 unsigned SubRegIdx = DstSize < 32
2571 ?
static_cast<unsigned>(AMDGPU::sub0)
2572 : TRI.getSubRegFromChannel(0, DstSize / 32);
2573 if (SubRegIdx == AMDGPU::NoSubRegister)
2578 const TargetRegisterClass *SrcWithSubRC
2579 = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
2583 if (SrcWithSubRC != SrcRC) {
2584 if (!RBI.constrainGenericRegister(SrcReg, *SrcWithSubRC, *MRI))
2588 I.getOperand(1).setSubReg(SubRegIdx);
2591 I.setDesc(TII.get(TargetOpcode::COPY));
2598 int SignedMask =
static_cast<int>(Mask);
2599 return SignedMask >= -16 && SignedMask <= 64;
2603const RegisterBank *AMDGPUInstructionSelector::getArtifactRegBank(
2612 return &RBI.getRegBankFromRegClass(*RC, LLT());
2616bool AMDGPUInstructionSelector::selectG_SZA_EXT(
MachineInstr &
I)
const {
2617 bool InReg =
I.getOpcode() == AMDGPU::G_SEXT_INREG;
2618 bool Signed =
I.getOpcode() == AMDGPU::G_SEXT || InReg;
2621 const Register DstReg =
I.getOperand(0).getReg();
2622 const Register SrcReg =
I.getOperand(1).getReg();
2624 const LLT DstTy = MRI->getType(DstReg);
2625 const LLT SrcTy = MRI->getType(SrcReg);
2626 const unsigned SrcSize =
I.getOpcode() == AMDGPU::G_SEXT_INREG ?
2633 const RegisterBank *SrcBank = getArtifactRegBank(SrcReg, *MRI, TRI);
2636 if (
I.getOpcode() == AMDGPU::G_ANYEXT) {
2638 return selectCOPY(
I);
2640 const TargetRegisterClass *SrcRC =
2641 TRI.getRegClassForTypeOnBank(SrcTy, *SrcBank);
2642 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
2643 const TargetRegisterClass *DstRC =
2644 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
2646 Register UndefReg = MRI->createVirtualRegister(SrcRC);
2647 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2653 I.eraseFromParent();
2655 return RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) &&
2656 RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI);
2659 if (SrcBank->
getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
2665 MachineInstr *ExtI =
2669 I.eraseFromParent();
2673 const unsigned BFE =
Signed ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2674 MachineInstr *ExtI =
2679 I.eraseFromParent();
2683 if (SrcBank->
getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
2684 const TargetRegisterClass &SrcRC = InReg && DstSize > 32 ?
2685 AMDGPU::SReg_64RegClass : AMDGPU::SReg_32RegClass;
2686 if (!RBI.constrainGenericRegister(SrcReg, SrcRC, *MRI))
2689 if (
Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
2690 const unsigned SextOpc = SrcSize == 8 ?
2691 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
2694 I.eraseFromParent();
2695 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
2700 if (DstSize > 32 && SrcSize == 32) {
2701 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2702 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2717 I.eraseFromParent();
2718 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass,
2722 const unsigned BFE64 =
Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
2723 const unsigned BFE32 =
Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2726 if (DstSize > 32 && (SrcSize <= 32 || InReg)) {
2728 Register ExtReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
2729 Register UndefReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2730 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2732 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2743 I.eraseFromParent();
2744 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, *MRI);
2759 I.eraseFromParent();
2760 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
2794 if (Shuffle->
getOpcode() != AMDGPU::G_SHUFFLE_VECTOR)
2801 assert(Mask.size() == 2);
2803 if (Mask[0] == 1 && Mask[1] <= 1) {
2811bool AMDGPUInstructionSelector::selectG_FPEXT(
MachineInstr &
I)
const {
2812 if (!Subtarget->hasSALUFloatInsts())
2815 Register Dst =
I.getOperand(0).getReg();
2816 const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
2817 if (DstRB->
getID() != AMDGPU::SGPRRegBankID)
2820 Register Src =
I.getOperand(1).getReg();
2826 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_CVT_HI_F32_F16), Dst)
2828 I.eraseFromParent();
2829 return RBI.constrainGenericRegister(Dst, AMDGPU::SReg_32RegClass, *MRI);
2836bool AMDGPUInstructionSelector::selectG_FNEG(
MachineInstr &
MI)
const {
2849 const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
2850 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2855 MachineInstr *Fabs =
getOpcodeDef(TargetOpcode::G_FABS, Src, *MRI);
2859 if (!RBI.constrainGenericRegister(Src, AMDGPU::SReg_64RegClass, *MRI) ||
2860 !RBI.constrainGenericRegister(Dst, AMDGPU::SReg_64RegClass, *MRI))
2863 MachineBasicBlock *BB =
MI.getParent();
2865 Register LoReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2866 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2867 Register ConstReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2868 Register OpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2870 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2871 .
addReg(Src, 0, AMDGPU::sub0);
2872 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2873 .
addReg(Src, 0, AMDGPU::sub1);
2874 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2878 unsigned Opc = Fabs ? AMDGPU::S_OR_B32 : AMDGPU::S_XOR_B32;
2883 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2888 MI.eraseFromParent();
2893bool AMDGPUInstructionSelector::selectG_FABS(
MachineInstr &
MI)
const {
2895 const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
2896 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2901 MachineBasicBlock *BB =
MI.getParent();
2903 Register LoReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2904 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2905 Register ConstReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2906 Register OpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2908 if (!RBI.constrainGenericRegister(Src, AMDGPU::SReg_64RegClass, *MRI) ||
2909 !RBI.constrainGenericRegister(Dst, AMDGPU::SReg_64RegClass, *MRI))
2912 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2913 .
addReg(Src, 0, AMDGPU::sub0);
2914 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2915 .
addReg(Src, 0, AMDGPU::sub1);
2916 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2921 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_AND_B32), OpReg)
2925 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2931 MI.eraseFromParent();
2936 return MI.getOpcode() == TargetOpcode::G_CONSTANT;
2939void AMDGPUInstructionSelector::getAddrModeInfo(
const MachineInstr &Load,
2942 unsigned OpNo =
Load.getOpcode() == AMDGPU::G_PREFETCH ? 0 : 1;
2943 const MachineInstr *PtrMI =
2944 MRI.getUniqueVRegDef(
Load.getOperand(OpNo).getReg());
2948 if (PtrMI->
getOpcode() != TargetOpcode::G_PTR_ADD)
2953 for (
unsigned i = 1; i != 3; ++i) {
2954 const MachineOperand &GEPOp = PtrMI->
getOperand(i);
2955 const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.
getReg());
2960 assert(GEPInfo.Imm == 0);
2964 const RegisterBank *OpBank = RBI.getRegBank(GEPOp.
getReg(), MRI, TRI);
2965 if (OpBank->
getID() == AMDGPU::SGPRRegBankID)
2966 GEPInfo.SgprParts.push_back(GEPOp.
getReg());
2968 GEPInfo.VgprParts.push_back(GEPOp.
getReg());
2972 getAddrModeInfo(*PtrMI, MRI, AddrInfo);
2975bool AMDGPUInstructionSelector::isSGPR(
Register Reg)
const {
2976 return RBI.getRegBank(
Reg, *MRI, TRI)->getID() == AMDGPU::SGPRRegBankID;
2979bool AMDGPUInstructionSelector::isInstrUniform(
const MachineInstr &
MI)
const {
2980 if (!
MI.hasOneMemOperand())
2983 const MachineMemOperand *MMO = *
MI.memoperands_begin();
2996 if (
MI.getOpcode() == AMDGPU::G_PREFETCH)
2997 return RBI.getRegBank(
MI.getOperand(0).getReg(), *MRI, TRI)->getID() ==
2998 AMDGPU::SGPRRegBankID;
3001 return I &&
I->getMetadata(
"amdgpu.uniform");
3005 for (
const GEPInfo &GEPInfo : AddrInfo) {
3006 if (!GEPInfo.VgprParts.empty())
3012void AMDGPUInstructionSelector::initM0(
MachineInstr &
I)
const {
3013 const LLT PtrTy = MRI->getType(
I.getOperand(1).getReg());
3016 STI.ldsRequiresM0Init()) {
3020 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
3025bool AMDGPUInstructionSelector::selectG_LOAD_STORE_ATOMICRMW(
3032 if (
Reg.isPhysical())
3036 const unsigned Opcode =
MI.getOpcode();
3038 if (Opcode == AMDGPU::COPY)
3041 if (Opcode == AMDGPU::G_AND || Opcode == AMDGPU::G_OR ||
3042 Opcode == AMDGPU::G_XOR)
3047 return GI->is(Intrinsic::amdgcn_class);
3049 return Opcode == AMDGPU::G_ICMP || Opcode == AMDGPU::G_FCMP;
3052bool AMDGPUInstructionSelector::selectG_BRCOND(
MachineInstr &
I)
const {
3054 MachineOperand &CondOp =
I.getOperand(0);
3060 const TargetRegisterClass *ConstrainRC;
3067 if (!isVCC(CondReg, *MRI)) {
3071 CondPhysReg = AMDGPU::SCC;
3072 BrOpcode = AMDGPU::S_CBRANCH_SCC1;
3073 ConstrainRC = &AMDGPU::SReg_32RegClass;
3080 const bool Is64 = STI.isWave64();
3081 const unsigned Opcode = Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
3082 const Register Exec = Is64 ? AMDGPU::EXEC : AMDGPU::EXEC_LO;
3084 Register TmpReg = MRI->createVirtualRegister(TRI.getBoolRC());
3085 BuildMI(*BB, &
I,
DL, TII.get(Opcode), TmpReg)
3092 CondPhysReg = TRI.getVCC();
3093 BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
3094 ConstrainRC = TRI.getBoolRC();
3097 if (!MRI->getRegClassOrNull(CondReg))
3098 MRI->setRegClass(CondReg, ConstrainRC);
3100 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CondPhysReg)
3103 .
addMBB(
I.getOperand(1).getMBB());
3105 I.eraseFromParent();
3109bool AMDGPUInstructionSelector::selectG_GLOBAL_VALUE(
3111 Register DstReg =
I.getOperand(0).getReg();
3112 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3113 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3114 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32));
3118 return RBI.constrainGenericRegister(
3119 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, *MRI);
3122bool AMDGPUInstructionSelector::selectG_PTRMASK(
MachineInstr &
I)
const {
3123 Register DstReg =
I.getOperand(0).getReg();
3124 Register SrcReg =
I.getOperand(1).getReg();
3125 Register MaskReg =
I.getOperand(2).getReg();
3126 LLT Ty = MRI->getType(DstReg);
3127 LLT MaskTy = MRI->getType(MaskReg);
3131 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3132 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
3133 const RegisterBank *MaskRB = RBI.getRegBank(MaskReg, *MRI, TRI);
3134 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3140 APInt MaskOnes =
VT->getKnownOnes(MaskReg).zext(64);
3144 const bool CanCopyLow32 = (MaskOnes & MaskLo32) == MaskLo32;
3145 const bool CanCopyHi32 = (MaskOnes & MaskHi32) == MaskHi32;
3148 !CanCopyLow32 && !CanCopyHi32) {
3149 auto MIB =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_AND_B64), DstReg)
3153 I.eraseFromParent();
3157 unsigned NewOpc = IsVGPR ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
3158 const TargetRegisterClass &RegRC
3159 = IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
3161 const TargetRegisterClass *DstRC = TRI.getRegClassForTypeOnBank(Ty, *DstRB);
3162 const TargetRegisterClass *SrcRC = TRI.getRegClassForTypeOnBank(Ty, *SrcRB);
3163 const TargetRegisterClass *MaskRC =
3164 TRI.getRegClassForTypeOnBank(MaskTy, *MaskRB);
3166 if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
3167 !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
3168 !RBI.constrainGenericRegister(MaskReg, *MaskRC, *MRI))
3173 "ptrmask should have been narrowed during legalize");
3175 auto NewOp =
BuildMI(*BB, &
I,
DL, TII.get(NewOpc), DstReg)
3181 I.eraseFromParent();
3185 Register HiReg = MRI->createVirtualRegister(&RegRC);
3186 Register LoReg = MRI->createVirtualRegister(&RegRC);
3189 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), LoReg)
3190 .
addReg(SrcReg, 0, AMDGPU::sub0);
3191 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), HiReg)
3192 .
addReg(SrcReg, 0, AMDGPU::sub1);
3201 Register MaskLo = MRI->createVirtualRegister(&RegRC);
3202 MaskedLo = MRI->createVirtualRegister(&RegRC);
3204 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskLo)
3205 .
addReg(MaskReg, 0, AMDGPU::sub0);
3206 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedLo)
3215 Register MaskHi = MRI->createVirtualRegister(&RegRC);
3216 MaskedHi = MRI->createVirtualRegister(&RegRC);
3218 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskHi)
3219 .
addReg(MaskReg, 0, AMDGPU::sub1);
3220 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedHi)
3225 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
3230 I.eraseFromParent();
3236static std::pair<Register, unsigned>
3243 std::tie(IdxBaseReg,
Offset) =
3245 if (IdxBaseReg == AMDGPU::NoRegister) {
3249 IdxBaseReg = IdxReg;
3256 if (
static_cast<unsigned>(
Offset) >= SubRegs.
size())
3257 return std::pair(IdxReg, SubRegs[0]);
3258 return std::pair(IdxBaseReg, SubRegs[
Offset]);
3261bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT(
3267 LLT DstTy = MRI->getType(DstReg);
3268 LLT SrcTy = MRI->getType(SrcReg);
3270 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3271 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
3272 const RegisterBank *IdxRB = RBI.getRegBank(IdxReg, *MRI, TRI);
3276 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3279 const TargetRegisterClass *SrcRC =
3280 TRI.getRegClassForTypeOnBank(SrcTy, *SrcRB);
3281 const TargetRegisterClass *DstRC =
3282 TRI.getRegClassForTypeOnBank(DstTy, *DstRB);
3283 if (!SrcRC || !DstRC)
3285 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
3286 !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
3287 !RBI.constrainGenericRegister(IdxReg, AMDGPU::SReg_32RegClass, *MRI))
3290 MachineBasicBlock *BB =
MI.getParent();
3298 if (SrcRB->
getID() == AMDGPU::SGPRRegBankID) {
3302 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3305 unsigned Opc = Is64 ? AMDGPU::S_MOVRELS_B64 : AMDGPU::S_MOVRELS_B32;
3309 MI.eraseFromParent();
3316 if (!STI.useVGPRIndexMode()) {
3317 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3319 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::V_MOVRELS_B32_e32), DstReg)
3322 MI.eraseFromParent();
3326 const MCInstrDesc &GPRIDXDesc =
3327 TII.getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*SrcRC),
true);
3333 MI.eraseFromParent();
3338bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT(
3345 LLT VecTy = MRI->getType(DstReg);
3346 LLT ValTy = MRI->getType(ValReg);
3350 const RegisterBank *VecRB = RBI.getRegBank(VecReg, *MRI, TRI);
3351 const RegisterBank *ValRB = RBI.getRegBank(ValReg, *MRI, TRI);
3352 const RegisterBank *IdxRB = RBI.getRegBank(IdxReg, *MRI, TRI);
3358 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3361 const TargetRegisterClass *VecRC =
3362 TRI.getRegClassForTypeOnBank(VecTy, *VecRB);
3363 const TargetRegisterClass *ValRC =
3364 TRI.getRegClassForTypeOnBank(ValTy, *ValRB);
3366 if (!RBI.constrainGenericRegister(VecReg, *VecRC, *MRI) ||
3367 !RBI.constrainGenericRegister(DstReg, *VecRC, *MRI) ||
3368 !RBI.constrainGenericRegister(ValReg, *ValRC, *MRI) ||
3369 !RBI.constrainGenericRegister(IdxReg, AMDGPU::SReg_32RegClass, *MRI))
3372 if (VecRB->
getID() == AMDGPU::VGPRRegBankID && ValSize != 32)
3376 std::tie(IdxReg,
SubReg) =
3379 const bool IndexMode = VecRB->
getID() == AMDGPU::VGPRRegBankID &&
3380 STI.useVGPRIndexMode();
3382 MachineBasicBlock *BB =
MI.getParent();
3386 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3389 const MCInstrDesc &RegWriteOp = TII.getIndirectRegWriteMovRelPseudo(
3390 VecSize, ValSize, VecRB->
getID() == AMDGPU::SGPRRegBankID);
3395 MI.eraseFromParent();
3399 const MCInstrDesc &GPRIDXDesc =
3400 TII.getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*VecRC),
false);
3407 MI.eraseFromParent();
3411bool AMDGPUInstructionSelector::selectBufferLoadLds(
MachineInstr &
MI)
const {
3412 if (!Subtarget->hasVMemToLDSLoad())
3415 unsigned Size =
MI.getOperand(3).getImm();
3418 const bool HasVIndex =
MI.getNumOperands() == 9;
3422 VIndex =
MI.getOperand(4).getReg();
3426 Register VOffset =
MI.getOperand(4 + OpOffset).getReg();
3427 std::optional<ValueAndVReg> MaybeVOffset =
3429 const bool HasVOffset = !MaybeVOffset || MaybeVOffset->Value.getZExtValue();
3435 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_BOTHEN
3436 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_IDXEN
3437 : HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFEN
3438 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFSET;
3441 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_BOTHEN
3442 : AMDGPU::BUFFER_LOAD_USHORT_LDS_IDXEN
3443 : HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFEN
3444 : AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFSET;
3447 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_BOTHEN
3448 : AMDGPU::BUFFER_LOAD_DWORD_LDS_IDXEN
3449 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN
3450 : AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET;
3453 if (!Subtarget->hasLDSLoadB96_B128())
3456 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_BOTHEN
3457 : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_IDXEN
3458 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFEN
3459 : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFSET;
3462 if (!Subtarget->hasLDSLoadB96_B128())
3465 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_BOTHEN
3466 : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_IDXEN
3467 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFEN
3468 : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFSET;
3472 MachineBasicBlock *
MBB =
MI.getParent();
3475 .
add(
MI.getOperand(2));
3479 if (HasVIndex && HasVOffset) {
3480 Register IdxReg = MRI->createVirtualRegister(TRI.getVGPR64Class());
3481 BuildMI(*
MBB, &*MIB,
DL, TII.get(AMDGPU::REG_SEQUENCE), IdxReg)
3488 }
else if (HasVIndex) {
3490 }
else if (HasVOffset) {
3494 MIB.
add(
MI.getOperand(1));
3495 MIB.
add(
MI.getOperand(5 + OpOffset));
3496 MIB.
add(
MI.getOperand(6 + OpOffset));
3498 unsigned Aux =
MI.getOperand(7 + OpOffset).getImm();
3506 MachineMemOperand *LoadMMO = *
MI.memoperands_begin();
3511 MachinePointerInfo StorePtrI = LoadPtrI;
3522 MachineMemOperand *StoreMMO =
3528 MI.eraseFromParent();
3540 if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3546 return Def->getOperand(1).getReg();
3560 if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3568 return Def->getOperand(1).getReg();
3570 if (
VT->signBitIsZero(
Reg))
3571 return matchZeroExtendFromS32(
Reg);
3579AMDGPUInstructionSelector::matchZeroExtendFromS32OrS32(
Register Reg)
const {
3581 : matchZeroExtendFromS32(
Reg);
3587AMDGPUInstructionSelector::matchSignExtendFromS32OrS32(
Register Reg)
const {
3589 : matchSignExtendFromS32(
Reg);
3593AMDGPUInstructionSelector::matchExtendFromS32OrS32(
Register Reg,
3594 bool IsSigned)
const {
3596 return matchSignExtendFromS32OrS32(
Reg);
3598 return matchZeroExtendFromS32OrS32(
Reg);
3608 if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3615 return Def->getOperand(1).getReg();
3620bool AMDGPUInstructionSelector::selectGlobalLoadLds(
MachineInstr &
MI)
const{
3621 if (!Subtarget->hasVMemToLDSLoad())
3625 unsigned Size =
MI.getOperand(3).getImm();
3631 Opc = AMDGPU::GLOBAL_LOAD_LDS_UBYTE;
3634 Opc = AMDGPU::GLOBAL_LOAD_LDS_USHORT;
3637 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORD;
3640 if (!Subtarget->hasLDSLoadB96_B128())
3642 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX3;
3645 if (!Subtarget->hasLDSLoadB96_B128())
3647 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX4;
3651 MachineBasicBlock *
MBB =
MI.getParent();
3654 .
add(
MI.getOperand(2));
3660 if (!isSGPR(Addr)) {
3662 if (isSGPR(AddrDef->Reg)) {
3663 Addr = AddrDef->Reg;
3664 }
else if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
3667 if (isSGPR(SAddr)) {
3668 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
3669 if (
Register Off = matchZeroExtendFromS32(PtrBaseOffset)) {
3680 VOffset = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3692 MIB.
add(
MI.getOperand(4));
3694 unsigned Aux =
MI.getOperand(5).getImm();
3697 MachineMemOperand *LoadMMO = *
MI.memoperands_begin();
3699 LoadPtrI.
Offset =
MI.getOperand(4).getImm();
3700 MachinePointerInfo StorePtrI = LoadPtrI;
3709 MachineMemOperand *StoreMMO =
3711 sizeof(int32_t),
Align(4));
3715 MI.eraseFromParent();
3719bool AMDGPUInstructionSelector::selectBVHIntersectRayIntrinsic(
3721 unsigned OpcodeOpIdx =
3722 MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY ? 1 : 3;
3723 MI.setDesc(TII.get(
MI.getOperand(OpcodeOpIdx).getImm()));
3724 MI.removeOperand(OpcodeOpIdx);
3725 MI.addImplicitDefUseOperands(*
MI.getMF());
3731bool AMDGPUInstructionSelector::selectSMFMACIntrin(
MachineInstr &
MI)
const {
3734 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
3735 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_F16_e64;
3737 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
3738 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_F16_e64;
3740 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
3741 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_BF16_e64;
3743 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
3744 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_BF16_e64;
3746 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
3747 Opc = AMDGPU::V_SMFMAC_I32_16X16X64_I8_e64;
3749 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
3750 Opc = AMDGPU::V_SMFMAC_I32_32X32X32_I8_e64;
3752 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
3753 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_BF8_e64;
3755 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
3756 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_FP8_e64;
3758 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
3759 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_BF8_e64;
3761 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
3762 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_FP8_e64;
3764 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
3765 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_BF8_e64;
3767 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
3768 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_FP8_e64;
3770 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
3771 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_BF8_e64;
3773 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
3774 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_FP8_e64;
3776 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:
3777 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_F16_e64;
3779 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:
3780 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_F16_e64;
3782 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:
3783 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF16_e64;
3785 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:
3786 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF16_e64;
3788 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:
3789 Opc = AMDGPU::V_SMFMAC_I32_16X16X128_I8_e64;
3791 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:
3792 Opc = AMDGPU::V_SMFMAC_I32_32X32X64_I8_e64;
3794 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:
3795 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_BF8_BF8_e64;
3797 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:
3798 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_BF8_FP8_e64;
3800 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:
3801 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_FP8_BF8_e64;
3803 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:
3804 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_FP8_FP8_e64;
3806 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:
3807 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_BF8_BF8_e64;
3809 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
3810 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_BF8_FP8_e64;
3812 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
3813 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_FP8_BF8_e64;
3815 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8:
3816 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_FP8_FP8_e64;
3822 auto VDst_In =
MI.getOperand(4);
3824 MI.setDesc(TII.get(
Opc));
3825 MI.removeOperand(4);
3826 MI.removeOperand(1);
3827 MI.addOperand(VDst_In);
3828 MI.addImplicitDefUseOperands(*
MI.getMF());
3829 const MCInstrDesc &MCID =
MI.getDesc();
3831 MI.getOperand(0).setIsEarlyClobber(
true);
3836bool AMDGPUInstructionSelector::selectPermlaneSwapIntrin(
3838 if (IntrID == Intrinsic::amdgcn_permlane16_swap &&
3839 !Subtarget->hasPermlane16Swap())
3841 if (IntrID == Intrinsic::amdgcn_permlane32_swap &&
3842 !Subtarget->hasPermlane32Swap())
3845 unsigned Opcode = IntrID == Intrinsic::amdgcn_permlane16_swap
3846 ? AMDGPU::V_PERMLANE16_SWAP_B32_e64
3847 : AMDGPU::V_PERMLANE32_SWAP_B32_e64;
3849 MI.removeOperand(2);
3850 MI.setDesc(TII.get(Opcode));
3853 MachineOperand &FI =
MI.getOperand(4);
3859bool AMDGPUInstructionSelector::selectWaveAddress(
MachineInstr &
MI)
const {
3862 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3863 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3864 MachineBasicBlock *
MBB =
MI.getParent();
3868 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_LSHRREV_B32_e64), DstReg)
3869 .
addImm(Subtarget->getWavefrontSizeLog2())
3874 .
addImm(Subtarget->getWavefrontSizeLog2())
3878 const TargetRegisterClass &RC =
3879 IsVALU ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
3880 if (!RBI.constrainGenericRegister(DstReg, RC, *MRI))
3883 MI.eraseFromParent();
3892 unsigned NumOpcodes = 0;
3905 const uint8_t SrcBits[3] = { 0xf0, 0xcc, 0xaa };
3916 for (
unsigned I = 0;
I < Src.size(); ++
I) {
3930 if (Src.size() == 3) {
3937 for (
unsigned I = 0;
I < Src.size(); ++
I) {
3938 if (Src[
I] ==
LHS) {
3948 Bits = SrcBits[Src.size()];
3954 switch (
MI->getOpcode()) {
3955 case TargetOpcode::G_AND:
3956 case TargetOpcode::G_OR:
3957 case TargetOpcode::G_XOR: {
3962 if (!getOperandBits(
LHS, LHSBits) ||
3963 !getOperandBits(
RHS, RHSBits)) {
3965 return std::make_pair(0, 0);
3971 NumOpcodes +=
Op.first;
3972 LHSBits =
Op.second;
3977 NumOpcodes +=
Op.first;
3978 RHSBits =
Op.second;
3983 return std::make_pair(0, 0);
3987 switch (
MI->getOpcode()) {
3988 case TargetOpcode::G_AND:
3989 TTbl = LHSBits & RHSBits;
3991 case TargetOpcode::G_OR:
3992 TTbl = LHSBits | RHSBits;
3994 case TargetOpcode::G_XOR:
3995 TTbl = LHSBits ^ RHSBits;
4001 return std::make_pair(NumOpcodes + 1, TTbl);
4004bool AMDGPUInstructionSelector::selectBITOP3(
MachineInstr &
MI)
const {
4005 if (!Subtarget->hasBitOp3Insts())
4009 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
4010 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
4016 unsigned NumOpcodes;
4018 std::tie(NumOpcodes, TTbl) =
BitOp3_Op(DstReg, Src, *MRI);
4022 if (NumOpcodes < 2 || Src.empty())
4025 const bool IsB32 = MRI->getType(DstReg) ==
LLT::scalar(32);
4026 if (NumOpcodes == 2 && IsB32) {
4034 }
else if (NumOpcodes < 4) {
4041 unsigned Opc = IsB32 ? AMDGPU::V_BITOP3_B32_e64 : AMDGPU::V_BITOP3_B16_e64;
4042 if (!IsB32 && STI.hasTrue16BitInsts())
4043 Opc = STI.useRealTrue16Insts() ? AMDGPU::V_BITOP3_B16_gfx1250_t16_e64
4044 : AMDGPU::V_BITOP3_B16_gfx1250_fake16_e64;
4045 unsigned CBL = STI.getConstantBusLimit(
Opc);
4046 MachineBasicBlock *
MBB =
MI.getParent();
4049 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4050 const RegisterBank *RB = RBI.getRegBank(Src[
I], *MRI, TRI);
4051 if (RB->
getID() != AMDGPU::SGPRRegBankID)
4057 Register NewReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4068 while (Src.size() < 3)
4069 Src.push_back(Src[0]);
4086 MI.eraseFromParent();
4091bool AMDGPUInstructionSelector::selectStackRestore(
MachineInstr &
MI)
const {
4093 if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, *MRI))
4096 MachineInstr *
DefMI = MRI->getVRegDef(SrcReg);
4098 Subtarget->getTargetLowering()->getStackPointerRegisterToSaveRestore();
4100 MachineBasicBlock *
MBB =
MI.getParent();
4104 WaveAddr = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
4107 .
addImm(Subtarget->getWavefrontSizeLog2())
4114 MI.eraseFromParent();
4120 if (!
I.isPreISelOpcode()) {
4122 return selectCOPY(
I);
4126 switch (
I.getOpcode()) {
4127 case TargetOpcode::G_AND:
4128 case TargetOpcode::G_OR:
4129 case TargetOpcode::G_XOR:
4130 if (selectBITOP3(
I))
4134 return selectG_AND_OR_XOR(
I);
4135 case TargetOpcode::G_ADD:
4136 case TargetOpcode::G_SUB:
4137 case TargetOpcode::G_PTR_ADD:
4140 return selectG_ADD_SUB(
I);
4141 case TargetOpcode::G_UADDO:
4142 case TargetOpcode::G_USUBO:
4143 case TargetOpcode::G_UADDE:
4144 case TargetOpcode::G_USUBE:
4145 return selectG_UADDO_USUBO_UADDE_USUBE(
I);
4146 case AMDGPU::G_AMDGPU_MAD_U64_U32:
4147 case AMDGPU::G_AMDGPU_MAD_I64_I32:
4148 return selectG_AMDGPU_MAD_64_32(
I);
4149 case TargetOpcode::G_INTTOPTR:
4150 case TargetOpcode::G_BITCAST:
4151 case TargetOpcode::G_PTRTOINT:
4152 case TargetOpcode::G_FREEZE:
4153 return selectCOPY(
I);
4154 case TargetOpcode::G_FNEG:
4157 return selectG_FNEG(
I);
4158 case TargetOpcode::G_FABS:
4161 return selectG_FABS(
I);
4162 case TargetOpcode::G_EXTRACT:
4163 return selectG_EXTRACT(
I);
4164 case TargetOpcode::G_MERGE_VALUES:
4165 case TargetOpcode::G_CONCAT_VECTORS:
4166 return selectG_MERGE_VALUES(
I);
4167 case TargetOpcode::G_UNMERGE_VALUES:
4168 return selectG_UNMERGE_VALUES(
I);
4169 case TargetOpcode::G_BUILD_VECTOR:
4170 case TargetOpcode::G_BUILD_VECTOR_TRUNC:
4171 return selectG_BUILD_VECTOR(
I);
4172 case TargetOpcode::G_IMPLICIT_DEF:
4173 return selectG_IMPLICIT_DEF(
I);
4174 case TargetOpcode::G_INSERT:
4175 return selectG_INSERT(
I);
4176 case TargetOpcode::G_INTRINSIC:
4177 case TargetOpcode::G_INTRINSIC_CONVERGENT:
4178 return selectG_INTRINSIC(
I);
4179 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
4180 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
4181 return selectG_INTRINSIC_W_SIDE_EFFECTS(
I);
4182 case TargetOpcode::G_ICMP:
4183 case TargetOpcode::G_FCMP:
4184 if (selectG_ICMP_or_FCMP(
I))
4187 case TargetOpcode::G_LOAD:
4188 case TargetOpcode::G_ZEXTLOAD:
4189 case TargetOpcode::G_SEXTLOAD:
4190 case TargetOpcode::G_STORE:
4191 case TargetOpcode::G_ATOMIC_CMPXCHG:
4192 case TargetOpcode::G_ATOMICRMW_XCHG:
4193 case TargetOpcode::G_ATOMICRMW_ADD:
4194 case TargetOpcode::G_ATOMICRMW_SUB:
4195 case TargetOpcode::G_ATOMICRMW_AND:
4196 case TargetOpcode::G_ATOMICRMW_OR:
4197 case TargetOpcode::G_ATOMICRMW_XOR:
4198 case TargetOpcode::G_ATOMICRMW_MIN:
4199 case TargetOpcode::G_ATOMICRMW_MAX:
4200 case TargetOpcode::G_ATOMICRMW_UMIN:
4201 case TargetOpcode::G_ATOMICRMW_UMAX:
4202 case TargetOpcode::G_ATOMICRMW_UINC_WRAP:
4203 case TargetOpcode::G_ATOMICRMW_UDEC_WRAP:
4204 case TargetOpcode::G_ATOMICRMW_FADD:
4205 case TargetOpcode::G_ATOMICRMW_FMIN:
4206 case TargetOpcode::G_ATOMICRMW_FMAX:
4207 return selectG_LOAD_STORE_ATOMICRMW(
I);
4208 case TargetOpcode::G_SELECT:
4209 return selectG_SELECT(
I);
4210 case TargetOpcode::G_TRUNC:
4211 return selectG_TRUNC(
I);
4212 case TargetOpcode::G_SEXT:
4213 case TargetOpcode::G_ZEXT:
4214 case TargetOpcode::G_ANYEXT:
4215 case TargetOpcode::G_SEXT_INREG:
4219 if (MRI->getType(
I.getOperand(1).getReg()) !=
LLT::scalar(1) &&
4222 return selectG_SZA_EXT(
I);
4223 case TargetOpcode::G_FPEXT:
4224 if (selectG_FPEXT(
I))
4227 case TargetOpcode::G_BRCOND:
4228 return selectG_BRCOND(
I);
4229 case TargetOpcode::G_GLOBAL_VALUE:
4230 return selectG_GLOBAL_VALUE(
I);
4231 case TargetOpcode::G_PTRMASK:
4232 return selectG_PTRMASK(
I);
4233 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4234 return selectG_EXTRACT_VECTOR_ELT(
I);
4235 case TargetOpcode::G_INSERT_VECTOR_ELT:
4236 return selectG_INSERT_VECTOR_ELT(
I);
4237 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
4238 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
4239 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET:
4240 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
4241 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
4244 assert(Intr &&
"not an image intrinsic with image pseudo");
4245 return selectImageIntrinsic(
I, Intr);
4247 case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY:
4248 case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
4249 case AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY:
4250 return selectBVHIntersectRayIntrinsic(
I);
4251 case AMDGPU::G_SBFX:
4252 case AMDGPU::G_UBFX:
4253 return selectG_SBFX_UBFX(
I);
4254 case AMDGPU::G_SI_CALL:
4255 I.setDesc(TII.get(AMDGPU::SI_CALL));
4257 case AMDGPU::G_AMDGPU_WAVE_ADDRESS:
4258 return selectWaveAddress(
I);
4259 case AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_RETURN: {
4260 I.setDesc(TII.get(AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN));
4263 case AMDGPU::G_STACKRESTORE:
4264 return selectStackRestore(
I);
4266 return selectPHI(
I);
4267 case AMDGPU::G_AMDGPU_COPY_SCC_VCC:
4268 return selectCOPY_SCC_VCC(
I);
4269 case AMDGPU::G_AMDGPU_COPY_VCC_SCC:
4270 return selectCOPY_VCC_SCC(
I);
4271 case AMDGPU::G_AMDGPU_READANYLANE:
4272 return selectReadAnyLane(
I);
4273 case TargetOpcode::G_CONSTANT:
4274 case TargetOpcode::G_FCONSTANT:
4282AMDGPUInstructionSelector::selectVCSRC(
MachineOperand &Root)
const {
4289std::pair<Register, unsigned> AMDGPUInstructionSelector::selectVOP3ModsImpl(
4290 Register Src,
bool IsCanonicalizing,
bool AllowAbs,
bool OpSel)
const {
4294 if (
MI->getOpcode() == AMDGPU::G_FNEG) {
4295 Src =
MI->getOperand(1).getReg();
4298 }
else if (
MI->getOpcode() == AMDGPU::G_FSUB && IsCanonicalizing) {
4303 if (
LHS &&
LHS->isZero()) {
4305 Src =
MI->getOperand(2).getReg();
4309 if (AllowAbs &&
MI->getOpcode() == AMDGPU::G_FABS) {
4310 Src =
MI->getOperand(1).getReg();
4317 return std::pair(Src, Mods);
4320Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
4322 bool ForceVGPR)
const {
4323 if ((Mods != 0 || ForceVGPR) &&
4324 RBI.getRegBank(Src, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID) {
4331 TII.
get(AMDGPU::COPY), VGPRSrc)
4343AMDGPUInstructionSelector::selectVSRC0(
MachineOperand &Root)
const {
4345 [=](MachineInstrBuilder &MIB) { MIB.
add(Root); }
4350AMDGPUInstructionSelector::selectVOP3Mods0(
MachineOperand &Root)
const {
4353 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
4356 [=](MachineInstrBuilder &MIB) {
4357 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4359 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
4360 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); },
4361 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
4366AMDGPUInstructionSelector::selectVOP3BMods0(
MachineOperand &Root)
const {
4369 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
4374 [=](MachineInstrBuilder &MIB) {
4375 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4377 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
4378 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); },
4379 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
4384AMDGPUInstructionSelector::selectVOP3OMods(
MachineOperand &Root)
const {
4386 [=](MachineInstrBuilder &MIB) { MIB.
add(Root); },
4387 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); },
4388 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
4393AMDGPUInstructionSelector::selectVOP3Mods(
MachineOperand &Root)
const {
4396 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
4399 [=](MachineInstrBuilder &MIB) {
4400 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4402 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4407AMDGPUInstructionSelector::selectVOP3ModsNonCanonicalizing(
4411 std::tie(Src, Mods) =
4412 selectVOP3ModsImpl(Root.
getReg(),
false);
4415 [=](MachineInstrBuilder &MIB) {
4416 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4418 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4423AMDGPUInstructionSelector::selectVOP3BMods(
MachineOperand &Root)
const {
4426 std::tie(Src, Mods) =
4427 selectVOP3ModsImpl(Root.
getReg(),
true,
4431 [=](MachineInstrBuilder &MIB) {
4432 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4434 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4439AMDGPUInstructionSelector::selectVOP3NoMods(
MachineOperand &Root)
const {
4442 if (
Def->getOpcode() == AMDGPU::G_FNEG ||
Def->getOpcode() == AMDGPU::G_FABS)
4445 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
4470 if (
MI->getOpcode() != AMDGPU::G_TRUNC)
4473 unsigned DstSize =
MRI.getType(
MI->getOperand(0).getReg()).getSizeInBits();
4474 unsigned SrcSize =
MRI.getType(
MI->getOperand(1).getReg()).getSizeInBits();
4475 return DstSize * 2 == SrcSize;
4481 if (
MI->getOpcode() != AMDGPU::G_LSHR)
4485 std::optional<ValueAndVReg> ShiftAmt;
4488 unsigned SrcSize =
MRI.getType(
MI->getOperand(1).getReg()).getSizeInBits();
4489 unsigned Shift = ShiftAmt->Value.getZExtValue();
4490 return Shift * 2 == SrcSize;
4498 if (
MI->getOpcode() != AMDGPU::G_SHL)
4502 std::optional<ValueAndVReg> ShiftAmt;
4505 unsigned SrcSize =
MRI.getType(
MI->getOperand(1).getReg()).getSizeInBits();
4506 unsigned Shift = ShiftAmt->Value.getZExtValue();
4507 return Shift * 2 == SrcSize;
4515 if (
MI->getOpcode() != AMDGPU::G_UNMERGE_VALUES)
4517 return MI->getNumOperands() == 3 &&
MI->getOperand(0).isDef() &&
4518 MI->getOperand(1).isDef() && !
MI->getOperand(2).isDef();
4688static std::optional<std::pair<Register, SrcStatus>>
4693 unsigned Opc =
MI->getOpcode();
4697 case AMDGPU::G_BITCAST:
4698 return std::optional<std::pair<Register, SrcStatus>>(
4699 {
MI->getOperand(1).getReg(), Curr.second});
4701 if (
MI->getOperand(1).getReg().isPhysical())
4702 return std::nullopt;
4703 return std::optional<std::pair<Register, SrcStatus>>(
4704 {
MI->getOperand(1).getReg(), Curr.second});
4705 case AMDGPU::G_FNEG: {
4708 return std::nullopt;
4709 return std::optional<std::pair<Register, SrcStatus>>(
4710 {
MI->getOperand(1).getReg(), Stat});
4717 switch (Curr.second) {
4720 return std::optional<std::pair<Register, SrcStatus>>(
4723 if (Curr.first ==
MI->getOperand(0).getReg())
4724 return std::optional<std::pair<Register, SrcStatus>>(
4726 return std::optional<std::pair<Register, SrcStatus>>(
4738 return std::optional<std::pair<Register, SrcStatus>>(
4742 if (Curr.first ==
MI->getOperand(0).getReg())
4743 return std::optional<std::pair<Register, SrcStatus>>(
4745 return std::optional<std::pair<Register, SrcStatus>>(
4751 return std::optional<std::pair<Register, SrcStatus>>(
4756 return std::optional<std::pair<Register, SrcStatus>>(
4761 return std::optional<std::pair<Register, SrcStatus>>(
4766 return std::optional<std::pair<Register, SrcStatus>>(
4772 return std::nullopt;
4782 bool HasNeg =
false;
4784 bool HasOpsel =
true;
4789 unsigned Opc =
MI->getOpcode();
4791 if (
Opc < TargetOpcode::GENERIC_OP_END) {
4794 }
else if (
Opc == TargetOpcode::G_INTRINSIC) {
4797 if (IntrinsicID == Intrinsic::amdgcn_fdot2)
4821 while (
Depth <= MaxDepth && Curr.has_value()) {
4824 Statlist.push_back(Curr.value());
4831static std::pair<Register, SrcStatus>
4838 while (
Depth <= MaxDepth && Curr.has_value()) {
4844 LastSameOrNeg = Curr.value();
4849 return LastSameOrNeg;
4854 unsigned Width1 =
MRI.getType(Reg1).getSizeInBits();
4855 unsigned Width2 =
MRI.getType(Reg2).getSizeInBits();
4856 return Width1 == Width2;
4892 IsHalfState(HiStat);
4895std::pair<Register, unsigned> AMDGPUInstructionSelector::selectVOP3PModsImpl(
4901 return {RootReg, Mods};
4904 SearchOptions SO(RootReg, MRI);
4915 MachineInstr *
MI = MRI.getVRegDef(Stat.first);
4917 if (
MI->getOpcode() != AMDGPU::G_BUILD_VECTOR ||
MI->getNumOperands() != 3 ||
4918 (IsDOT && Subtarget->hasDOTOpSelHazard())) {
4920 return {Stat.first, Mods};
4926 if (StatlistHi.
empty()) {
4928 return {Stat.first, Mods};
4934 if (StatlistLo.
empty()) {
4936 return {Stat.first, Mods};
4939 for (
int I = StatlistHi.
size() - 1;
I >= 0;
I--) {
4940 for (
int J = StatlistLo.
size() - 1; J >= 0; J--) {
4941 if (StatlistHi[
I].first == StatlistLo[J].first &&
4943 StatlistHi[
I].first, RootReg, TII, MRI))
4944 return {StatlistHi[
I].first,
4945 updateMods(StatlistHi[
I].second, StatlistLo[J].second, Mods)};
4951 return {Stat.first, Mods};
4961 return RB->
getID() == RBNo;
4978 if (
checkRB(RootReg, AMDGPU::SGPRRegBankID, RBI,
MRI,
TRI) ||
4983 if (
MI->getOpcode() == AMDGPU::COPY && NewReg ==
MI->getOperand(1).getReg()) {
4989 Register DstReg =
MRI.cloneVirtualRegister(RootReg);
4992 BuildMI(*BB,
MI,
MI->getDebugLoc(),
TII.get(AMDGPU::COPY), DstReg)
5000AMDGPUInstructionSelector::selectVOP3PRetHelper(
MachineOperand &Root,
5005 std::tie(
Reg, Mods) = selectVOP3PModsImpl(Root.
getReg(), MRI, IsDOT);
5009 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
5010 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
5015AMDGPUInstructionSelector::selectVOP3PMods(
MachineOperand &Root)
const {
5017 return selectVOP3PRetHelper(Root);
5021AMDGPUInstructionSelector::selectVOP3PModsDOT(
MachineOperand &Root)
const {
5023 return selectVOP3PRetHelper(Root,
true);
5027AMDGPUInstructionSelector::selectWMMAOpSelVOP3PMods(
5030 "expected i1 value");
5036 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
5044 switch (Elts.
size()) {
5046 DstRegClass = &AMDGPU::VReg_256RegClass;
5049 DstRegClass = &AMDGPU::VReg_128RegClass;
5052 DstRegClass = &AMDGPU::VReg_64RegClass;
5059 auto MIB =
B.buildInstr(AMDGPU::REG_SEQUENCE)
5060 .addDef(
MRI.createVirtualRegister(DstRegClass));
5061 for (
unsigned i = 0; i < Elts.
size(); ++i) {
5072 if (ModOpcode == TargetOpcode::G_FNEG) {
5076 for (
auto El : Elts) {
5082 if (Elts.size() != NegAbsElts.
size()) {
5091 assert(ModOpcode == TargetOpcode::G_FABS);
5099AMDGPUInstructionSelector::selectWMMAModsF32NegAbs(
MachineOperand &Root)
const {
5105 assert(BV->getNumSources() > 0);
5107 MachineInstr *ElF32 = MRI->getVRegDef(BV->getSourceReg(0));
5108 unsigned ModOpcode = (ElF32->
getOpcode() == AMDGPU::G_FNEG)
5111 for (
unsigned i = 0; i < BV->getNumSources(); ++i) {
5112 ElF32 = MRI->getVRegDef(BV->getSourceReg(i));
5119 if (BV->getNumSources() == EltsF32.
size()) {
5125 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5126 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }}};
5130AMDGPUInstructionSelector::selectWMMAModsF16Neg(
MachineOperand &Root)
const {
5136 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
5144 if (CV->getNumSources() == EltsV2F16.
size()) {
5151 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5152 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }}};
5156AMDGPUInstructionSelector::selectWMMAModsF16NegAbs(
MachineOperand &Root)
const {
5162 assert(CV->getNumSources() > 0);
5163 MachineInstr *ElV2F16 = MRI->getVRegDef(CV->getSourceReg(0));
5165 unsigned ModOpcode = (ElV2F16->
getOpcode() == AMDGPU::G_FNEG)
5169 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
5170 ElV2F16 = MRI->getVRegDef(CV->getSourceReg(i));
5177 if (CV->getNumSources() == EltsV2F16.
size()) {
5184 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5185 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }}};
5189AMDGPUInstructionSelector::selectWMMAVISrc(
MachineOperand &Root)
const {
5190 std::optional<FPValueAndVReg> FPValReg;
5192 if (TII.isInlineConstant(FPValReg->Value)) {
5193 return {{[=](MachineInstrBuilder &MIB) {
5194 MIB.
addImm(FPValReg->Value.bitcastToAPInt().getSExtValue());
5204 if (TII.isInlineConstant(ICst)) {
5214AMDGPUInstructionSelector::selectSWMMACIndex8(
MachineOperand &Root)
const {
5220 std::optional<ValueAndVReg> ShiftAmt;
5222 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
5223 ShiftAmt->Value.getZExtValue() % 8 == 0) {
5224 Key = ShiftAmt->Value.getZExtValue() / 8;
5229 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5230 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Key); }
5235AMDGPUInstructionSelector::selectSWMMACIndex16(
MachineOperand &Root)
const {
5242 std::optional<ValueAndVReg> ShiftAmt;
5244 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
5245 ShiftAmt->Value.getZExtValue() == 16) {
5251 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5252 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Key); }
5257AMDGPUInstructionSelector::selectSWMMACIndex32(
MachineOperand &Root)
const {
5264 S32 = matchAnyExtendFromS32(Src);
5268 if (
Def->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {
5273 Src =
Def->getOperand(2).getReg();
5280 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5281 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Key); }
5286AMDGPUInstructionSelector::selectVOP3OpSelMods(
MachineOperand &Root)
const {
5289 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
5293 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5294 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
5300AMDGPUInstructionSelector::selectVINTERPMods(
MachineOperand &Root)
const {
5303 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
5309 [=](MachineInstrBuilder &MIB) {
5311 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
5313 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
5318AMDGPUInstructionSelector::selectVINTERPModsHi(
MachineOperand &Root)
const {
5321 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
5327 [=](MachineInstrBuilder &MIB) {
5329 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
5331 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
5338bool AMDGPUInstructionSelector::selectScaleOffset(
MachineOperand &Root,
5340 bool IsSigned)
const {
5341 if (!Subtarget->hasScaleOffset())
5345 MachineMemOperand *MMO = *
MI.memoperands_begin();
5357 OffsetReg =
Def->Reg;
5372 m_BinOp(IsSigned ? AMDGPU::S_MUL_I64_I32_PSEUDO : AMDGPU::S_MUL_U64,
5376 (
Mul->getOpcode() == (IsSigned ? AMDGPU::G_AMDGPU_MAD_I64_I32
5377 : AMDGPU::G_AMDGPU_MAD_U64_U32) ||
5378 (IsSigned &&
Mul->getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32 &&
5379 VT->signBitIsZero(
Mul->getOperand(2).getReg()))) &&
5392bool AMDGPUInstructionSelector::selectSmrdOffset(
MachineOperand &Root,
5396 bool *ScaleOffset)
const {
5398 MachineBasicBlock *
MBB =
MI->getParent();
5403 getAddrModeInfo(*
MI, *MRI, AddrInfo);
5405 if (AddrInfo.
empty())
5408 const GEPInfo &GEPI = AddrInfo[0];
5409 std::optional<int64_t> EncodedImm;
5412 *ScaleOffset =
false;
5417 if (GEPI.SgprParts.size() == 1 && GEPI.Imm != 0 && EncodedImm &&
5418 AddrInfo.
size() > 1) {
5419 const GEPInfo &GEPI2 = AddrInfo[1];
5420 if (GEPI2.SgprParts.size() == 2 && GEPI2.Imm == 0) {
5421 Register OffsetReg = GEPI2.SgprParts[1];
5424 selectScaleOffset(Root, OffsetReg,
false );
5425 OffsetReg = matchZeroExtendFromS32OrS32(OffsetReg);
5427 Base = GEPI2.SgprParts[0];
5428 *SOffset = OffsetReg;
5437 auto SKnown =
VT->getKnownBits(*SOffset);
5438 if (*
Offset + SKnown.getMinValue().getSExtValue() < 0)
5450 if (
Offset && GEPI.SgprParts.size() == 1 && EncodedImm) {
5451 Base = GEPI.SgprParts[0];
5457 if (SOffset && GEPI.SgprParts.size() == 1 &&
isUInt<32>(GEPI.Imm) &&
5463 Base = GEPI.SgprParts[0];
5464 *SOffset = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5465 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), *SOffset)
5470 if (SOffset && GEPI.SgprParts.size() && GEPI.Imm == 0) {
5471 Register OffsetReg = GEPI.SgprParts[1];
5473 *ScaleOffset = selectScaleOffset(Root, OffsetReg,
false );
5474 OffsetReg = matchZeroExtendFromS32OrS32(OffsetReg);
5476 Base = GEPI.SgprParts[0];
5477 *SOffset = OffsetReg;
5486AMDGPUInstructionSelector::selectSmrdImm(
MachineOperand &Root)
const {
5489 if (!selectSmrdOffset(Root,
Base,
nullptr, &
Offset,
5491 return std::nullopt;
5493 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(
Base); },
5494 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Offset); }}};
5498AMDGPUInstructionSelector::selectSmrdImm32(
MachineOperand &Root)
const {
5500 getAddrModeInfo(*Root.
getParent(), *MRI, AddrInfo);
5502 if (AddrInfo.
empty() || AddrInfo[0].SgprParts.size() != 1)
5503 return std::nullopt;
5505 const GEPInfo &GEPInfo = AddrInfo[0];
5506 Register PtrReg = GEPInfo.SgprParts[0];
5507 std::optional<int64_t> EncodedImm =
5510 return std::nullopt;
5513 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrReg); },
5514 [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedImm); }
5519AMDGPUInstructionSelector::selectSmrdSgpr(
MachineOperand &Root)
const {
5522 if (!selectSmrdOffset(Root,
Base, &SOffset,
nullptr,
5524 return std::nullopt;
5527 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(
Base); },
5528 [=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); },
5529 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }}};
5533AMDGPUInstructionSelector::selectSmrdSgprImm(
MachineOperand &Root)
const {
5537 if (!selectSmrdOffset(Root,
Base, &SOffset, &
Offset, &ScaleOffset))
5538 return std::nullopt;
5541 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(
Base); },
5542 [=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); },
5544 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }}};
5547std::pair<Register, int>
5548AMDGPUInstructionSelector::selectFlatOffsetImpl(
MachineOperand &Root,
5549 uint64_t FlatVariant)
const {
5554 if (!STI.hasFlatInstOffsets())
5558 int64_t ConstOffset;
5560 std::tie(PtrBase, ConstOffset, IsInBounds) =
5561 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
5567 if (ConstOffset == 0 ||
5569 !isFlatScratchBaseLegal(Root.
getReg())) ||
5573 unsigned AddrSpace = (*
MI->memoperands_begin())->getAddrSpace();
5574 if (!TII.isLegalFLATOffset(ConstOffset, AddrSpace, FlatVariant))
5577 return std::pair(PtrBase, ConstOffset);
5581AMDGPUInstructionSelector::selectFlatOffset(
MachineOperand &Root)
const {
5585 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrWithOffset.first); },
5586 [=](MachineInstrBuilder &MIB) { MIB.
addImm(PtrWithOffset.second); },
5591AMDGPUInstructionSelector::selectGlobalOffset(
MachineOperand &Root)
const {
5595 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrWithOffset.first); },
5596 [=](MachineInstrBuilder &MIB) { MIB.
addImm(PtrWithOffset.second); },
5601AMDGPUInstructionSelector::selectScratchOffset(
MachineOperand &Root)
const {
5605 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrWithOffset.first); },
5606 [=](MachineInstrBuilder &MIB) { MIB.
addImm(PtrWithOffset.second); },
5612AMDGPUInstructionSelector::selectGlobalSAddr(
MachineOperand &Root,
5614 bool NeedIOffset)
const {
5617 int64_t ConstOffset;
5618 int64_t ImmOffset = 0;
5622 std::tie(PtrBase, ConstOffset, std::ignore) =
5623 getPtrBaseWithConstantOffset(Addr, *MRI);
5625 if (ConstOffset != 0) {
5630 ImmOffset = ConstOffset;
5633 if (isSGPR(PtrBaseDef->Reg)) {
5634 if (ConstOffset > 0) {
5640 int64_t SplitImmOffset = 0, RemainderOffset = ConstOffset;
5642 std::tie(SplitImmOffset, RemainderOffset) =
5647 if (Subtarget->hasSignedGVSOffset() ?
isInt<32>(RemainderOffset)
5650 MachineBasicBlock *
MBB =
MI->getParent();
5652 MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
5654 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
5656 .
addImm(RemainderOffset);
5660 [=](MachineInstrBuilder &MIB) {
5663 [=](MachineInstrBuilder &MIB) {
5666 [=](MachineInstrBuilder &MIB) { MIB.
addImm(SplitImmOffset); },
5667 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); },
5670 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrBase); },
5671 [=](MachineInstrBuilder &MIB) {
5674 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); },
5684 unsigned NumLiterals =
5685 !TII.isInlineConstant(APInt(32,
Lo_32(ConstOffset))) +
5686 !TII.isInlineConstant(APInt(32,
Hi_32(ConstOffset)));
5687 if (STI.getConstantBusLimit(AMDGPU::V_ADD_U32_e64) > NumLiterals)
5688 return std::nullopt;
5695 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
5700 if (isSGPR(SAddr)) {
5701 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
5705 bool ScaleOffset = selectScaleOffset(Root, PtrBaseOffset,
5706 Subtarget->hasSignedGVSOffset());
5707 if (
Register VOffset = matchExtendFromS32OrS32(
5708 PtrBaseOffset, Subtarget->hasSignedGVSOffset())) {
5710 return {{[=](MachineInstrBuilder &MIB) {
5713 [=](MachineInstrBuilder &MIB) {
5716 [=](MachineInstrBuilder &MIB) {
5719 [=](MachineInstrBuilder &MIB) {
5723 return {{[=](MachineInstrBuilder &MIB) {
5726 [=](MachineInstrBuilder &MIB) {
5729 [=](MachineInstrBuilder &MIB) {
5739 if (AddrDef->MI->getOpcode() == AMDGPU::G_IMPLICIT_DEF ||
5740 AddrDef->MI->getOpcode() == AMDGPU::G_CONSTANT || !isSGPR(AddrDef->Reg))
5741 return std::nullopt;
5746 MachineBasicBlock *
MBB =
MI->getParent();
5747 Register VOffset = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
5749 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32), VOffset)
5754 [=](MachineInstrBuilder &MIB) { MIB.
addReg(AddrDef->Reg); },
5755 [=](MachineInstrBuilder &MIB) { MIB.
addReg(VOffset); },
5756 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); },
5757 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); }
5760 [=](MachineInstrBuilder &MIB) { MIB.
addReg(AddrDef->Reg); },
5761 [=](MachineInstrBuilder &MIB) { MIB.
addReg(VOffset); },
5762 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); }
5767AMDGPUInstructionSelector::selectGlobalSAddr(
MachineOperand &Root)
const {
5768 return selectGlobalSAddr(Root, 0);
5772AMDGPUInstructionSelector::selectGlobalSAddrCPol(
MachineOperand &Root)
const {
5778 return selectGlobalSAddr(Root, PassedCPol);
5782AMDGPUInstructionSelector::selectGlobalSAddrCPolM0(
MachineOperand &Root)
const {
5788 return selectGlobalSAddr(Root, PassedCPol);
5792AMDGPUInstructionSelector::selectGlobalSAddrGLC(
MachineOperand &Root)
const {
5797AMDGPUInstructionSelector::selectGlobalSAddrNoIOffset(
5804 return selectGlobalSAddr(Root, PassedCPol,
false);
5808AMDGPUInstructionSelector::selectGlobalSAddrNoIOffsetM0(
5815 return selectGlobalSAddr(Root, PassedCPol,
false);
5819AMDGPUInstructionSelector::selectScratchSAddr(
MachineOperand &Root)
const {
5822 int64_t ConstOffset;
5823 int64_t ImmOffset = 0;
5827 std::tie(PtrBase, ConstOffset, std::ignore) =
5828 getPtrBaseWithConstantOffset(Addr, *MRI);
5830 if (ConstOffset != 0 && isFlatScratchBaseLegal(Addr) &&
5834 ImmOffset = ConstOffset;
5838 if (AddrDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
5839 int FI = AddrDef->MI->getOperand(1).
getIndex();
5842 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); }
5848 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
5849 Register LHS = AddrDef->MI->getOperand(1).getReg();
5850 Register RHS = AddrDef->MI->getOperand(2).getReg();
5854 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX &&
5855 isSGPR(RHSDef->Reg)) {
5856 int FI = LHSDef->MI->getOperand(1).getIndex();
5860 SAddr = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5862 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_I32), SAddr)
5870 return std::nullopt;
5873 [=](MachineInstrBuilder &MIB) { MIB.
addReg(SAddr); },
5874 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); }
5879bool AMDGPUInstructionSelector::checkFlatScratchSVSSwizzleBug(
5881 if (!Subtarget->hasFlatScratchSVSSwizzleBug())
5887 auto VKnown =
VT->getKnownBits(VAddr);
5890 uint64_t VMax = VKnown.getMaxValue().getZExtValue();
5891 uint64_t
SMax = SKnown.getMaxValue().getZExtValue();
5892 return (VMax & 3) + (
SMax & 3) >= 4;
5896AMDGPUInstructionSelector::selectScratchSVAddr(
MachineOperand &Root)
const {
5899 int64_t ConstOffset;
5900 int64_t ImmOffset = 0;
5904 std::tie(PtrBase, ConstOffset, std::ignore) =
5905 getPtrBaseWithConstantOffset(Addr, *MRI);
5908 if (ConstOffset != 0 &&
5912 ImmOffset = ConstOffset;
5916 if (AddrDef->MI->getOpcode() != AMDGPU::G_PTR_ADD)
5917 return std::nullopt;
5919 Register RHS = AddrDef->MI->getOperand(2).getReg();
5920 if (RBI.getRegBank(
RHS, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID)
5921 return std::nullopt;
5923 Register LHS = AddrDef->MI->getOperand(1).getReg();
5926 if (OrigAddr != Addr) {
5927 if (!isFlatScratchBaseLegalSVImm(OrigAddr))
5928 return std::nullopt;
5930 if (!isFlatScratchBaseLegalSV(OrigAddr))
5931 return std::nullopt;
5934 if (checkFlatScratchSVSSwizzleBug(
RHS,
LHS, ImmOffset))
5935 return std::nullopt;
5937 unsigned CPol = selectScaleOffset(Root,
RHS,
true )
5941 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
5942 int FI = LHSDef->MI->getOperand(1).getIndex();
5944 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
RHS); },
5946 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); },
5947 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }
5956 return std::nullopt;
5959 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
RHS); },
5960 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
LHS); },
5961 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); },
5962 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }
5967AMDGPUInstructionSelector::selectMUBUFScratchOffen(
MachineOperand &Root)
const {
5969 MachineBasicBlock *
MBB =
MI->getParent();
5971 const SIMachineFunctionInfo *
Info =
MF->getInfo<SIMachineFunctionInfo>();
5976 Register HighBits = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
5981 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
5985 return {{[=](MachineInstrBuilder &MIB) {
5988 [=](MachineInstrBuilder &MIB) {
5991 [=](MachineInstrBuilder &MIB) {
5996 [=](MachineInstrBuilder &MIB) {
6005 std::optional<int> FI;
6008 const MachineInstr *RootDef = MRI->getVRegDef(Root.
getReg());
6010 int64_t ConstOffset;
6011 std::tie(PtrBase, ConstOffset, std::ignore) =
6012 getPtrBaseWithConstantOffset(VAddr, *MRI);
6013 if (ConstOffset != 0) {
6014 if (TII.isLegalMUBUFImmOffset(ConstOffset) &&
6015 (!STI.privateMemoryResourceIsRangeChecked() ||
6016 VT->signBitIsZero(PtrBase))) {
6017 const MachineInstr *PtrBaseDef = MRI->getVRegDef(PtrBase);
6018 if (PtrBaseDef->
getOpcode() == AMDGPU::G_FRAME_INDEX)
6024 }
else if (RootDef->
getOpcode() == AMDGPU::G_FRAME_INDEX) {
6028 return {{[=](MachineInstrBuilder &MIB) {
6031 [=](MachineInstrBuilder &MIB) {
6037 [=](MachineInstrBuilder &MIB) {
6042 [=](MachineInstrBuilder &MIB) {
6047bool AMDGPUInstructionSelector::isDSOffsetLegal(
Register Base,
6052 if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled())
6057 return VT->signBitIsZero(
Base);
6060bool AMDGPUInstructionSelector::isDSOffset2Legal(
Register Base, int64_t Offset0,
6062 unsigned Size)
const {
6063 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
6068 if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled())
6073 return VT->signBitIsZero(
Base);
6078 return Addr->
getOpcode() == TargetOpcode::G_OR ||
6079 (Addr->
getOpcode() == TargetOpcode::G_PTR_ADD &&
6086bool AMDGPUInstructionSelector::isFlatScratchBaseLegal(
Register Addr)
const {
6094 if (STI.hasSignedScratchOffsets())
6100 if (AddrMI->
getOpcode() == TargetOpcode::G_PTR_ADD) {
6101 std::optional<ValueAndVReg> RhsValReg =
6107 if (RhsValReg && RhsValReg->Value.getSExtValue() < 0 &&
6108 RhsValReg->Value.getSExtValue() > -0x40000000)
6112 return VT->signBitIsZero(
LHS);
6117bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSV(
Register Addr)
const {
6125 if (STI.hasSignedScratchOffsets())
6130 return VT->signBitIsZero(
RHS) &&
VT->signBitIsZero(
LHS);
6135bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSVImm(
6139 if (STI.hasSignedScratchOffsets())
6144 std::optional<DefinitionAndSourceRegister> BaseDef =
6146 std::optional<ValueAndVReg> RHSOffset =
6156 (RHSOffset->Value.getSExtValue() < 0 &&
6157 RHSOffset->Value.getSExtValue() > -0x40000000)))
6160 Register LHS = BaseDef->MI->getOperand(1).getReg();
6161 Register RHS = BaseDef->MI->getOperand(2).getReg();
6162 return VT->signBitIsZero(
RHS) &&
VT->signBitIsZero(
LHS);
6165bool AMDGPUInstructionSelector::isUnneededShiftMask(
const MachineInstr &
MI,
6166 unsigned ShAmtBits)
const {
6167 assert(
MI.getOpcode() == TargetOpcode::G_AND);
6169 std::optional<APInt>
RHS =
6174 if (
RHS->countr_one() >= ShAmtBits)
6177 const APInt &LHSKnownZeros =
VT->getKnownZeroes(
MI.getOperand(1).getReg());
6178 return (LHSKnownZeros | *
RHS).countr_one() >= ShAmtBits;
6182AMDGPUInstructionSelector::selectMUBUFScratchOffset(
6185 const SIMachineFunctionInfo *
Info =
MF->getInfo<SIMachineFunctionInfo>();
6187 std::optional<DefinitionAndSourceRegister>
Def =
6189 assert(Def &&
"this shouldn't be an optional result");
6194 [=](MachineInstrBuilder &MIB) {
6197 [=](MachineInstrBuilder &MIB) {
6200 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
6211 if (!TII.isLegalMUBUFImmOffset(
Offset))
6219 [=](MachineInstrBuilder &MIB) {
6222 [=](MachineInstrBuilder &MIB) {
6230 !TII.isLegalMUBUFImmOffset(
Offset))
6234 [=](MachineInstrBuilder &MIB) {
6237 [=](MachineInstrBuilder &MIB) {
6244std::pair<Register, unsigned>
6245AMDGPUInstructionSelector::selectDS1Addr1OffsetImpl(
MachineOperand &Root)
const {
6246 const MachineInstr *RootDef = MRI->getVRegDef(Root.
getReg());
6247 int64_t ConstAddr = 0;
6251 std::tie(PtrBase,
Offset, std::ignore) =
6252 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
6255 if (isDSOffsetLegal(PtrBase,
Offset)) {
6257 return std::pair(PtrBase,
Offset);
6259 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
6268 return std::pair(Root.
getReg(), 0);
6272AMDGPUInstructionSelector::selectDS1Addr1Offset(
MachineOperand &Root)
const {
6275 std::tie(
Reg,
Offset) = selectDS1Addr1OffsetImpl(Root);
6277 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
6283AMDGPUInstructionSelector::selectDS64Bit4ByteAligned(
MachineOperand &Root)
const {
6284 return selectDSReadWrite2(Root, 4);
6288AMDGPUInstructionSelector::selectDS128Bit8ByteAligned(
MachineOperand &Root)
const {
6289 return selectDSReadWrite2(Root, 8);
6293AMDGPUInstructionSelector::selectDSReadWrite2(
MachineOperand &Root,
6294 unsigned Size)
const {
6299 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
6301 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Offset+1); }
6305std::pair<Register, unsigned>
6306AMDGPUInstructionSelector::selectDSReadWrite2Impl(
MachineOperand &Root,
6307 unsigned Size)
const {
6308 const MachineInstr *RootDef = MRI->getVRegDef(Root.
getReg());
6309 int64_t ConstAddr = 0;
6313 std::tie(PtrBase,
Offset, std::ignore) =
6314 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
6317 int64_t OffsetValue0 =
Offset;
6319 if (isDSOffset2Legal(PtrBase, OffsetValue0, OffsetValue1,
Size)) {
6321 return std::pair(PtrBase, OffsetValue0 /
Size);
6323 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
6331 return std::pair(Root.
getReg(), 0);
6339std::tuple<Register, int64_t, bool>
6340AMDGPUInstructionSelector::getPtrBaseWithConstantOffset(
6343 if (RootI->
getOpcode() != TargetOpcode::G_PTR_ADD)
6344 return {Root, 0,
false};
6347 std::optional<ValueAndVReg> MaybeOffset =
6350 return {Root, 0,
false};
6365 Register RSrc2 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
6366 Register RSrc3 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
6367 Register RSrcHi =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
6368 Register RSrc =
MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
6370 B.buildInstr(AMDGPU::S_MOV_B32)
6373 B.buildInstr(AMDGPU::S_MOV_B32)
6380 B.buildInstr(AMDGPU::REG_SEQUENCE)
6383 .addImm(AMDGPU::sub0)
6385 .addImm(AMDGPU::sub1);
6389 RSrcLo =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
6390 B.buildInstr(AMDGPU::S_MOV_B64)
6395 B.buildInstr(AMDGPU::REG_SEQUENCE)
6398 .addImm(AMDGPU::sub0_sub1)
6400 .addImm(AMDGPU::sub2_sub3);
6407 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
6416 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
6423AMDGPUInstructionSelector::MUBUFAddressData
6424AMDGPUInstructionSelector::parseMUBUFAddress(
Register Src)
const {
6425 MUBUFAddressData
Data;
6431 std::tie(PtrBase,
Offset, std::ignore) =
6432 getPtrBaseWithConstantOffset(Src, *MRI);
6438 if (MachineInstr *InputAdd
6440 Data.N2 = InputAdd->getOperand(1).getReg();
6441 Data.N3 = InputAdd->getOperand(2).getReg();
6456bool AMDGPUInstructionSelector::shouldUseAddr64(MUBUFAddressData Addr)
const {
6462 const RegisterBank *N0Bank = RBI.getRegBank(Addr.N0, *MRI, TRI);
6463 return N0Bank->
getID() == AMDGPU::VGPRRegBankID;
6469void AMDGPUInstructionSelector::splitIllegalMUBUFOffset(
6471 if (TII.isLegalMUBUFImmOffset(ImmOffset))
6475 SOffset = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6476 B.buildInstr(AMDGPU::S_MOV_B32)
6482bool AMDGPUInstructionSelector::selectMUBUFAddr64Impl(
6487 if (!STI.hasAddr64() || STI.useFlatForGlobal())
6490 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
6491 if (!shouldUseAddr64(AddrData))
6497 Offset = AddrData.Offset;
6503 if (RBI.getRegBank(N2, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
6505 if (RBI.getRegBank(N3, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
6518 }
else if (RBI.getRegBank(N0, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
6529 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
6533bool AMDGPUInstructionSelector::selectMUBUFOffsetImpl(
6538 if (STI.useFlatForGlobal())
6541 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
6542 if (shouldUseAddr64(AddrData))
6548 Offset = AddrData.Offset;
6554 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
6559AMDGPUInstructionSelector::selectMUBUFAddr64(
MachineOperand &Root)
const {
6565 if (!selectMUBUFAddr64Impl(Root, VAddr, RSrcReg, SOffset,
Offset))
6571 [=](MachineInstrBuilder &MIB) {
6574 [=](MachineInstrBuilder &MIB) {
6577 [=](MachineInstrBuilder &MIB) {
6580 else if (STI.hasRestrictedSOffset())
6581 MIB.
addReg(AMDGPU::SGPR_NULL);
6585 [=](MachineInstrBuilder &MIB) {
6595AMDGPUInstructionSelector::selectMUBUFOffset(
MachineOperand &Root)
const {
6600 if (!selectMUBUFOffsetImpl(Root, RSrcReg, SOffset,
Offset))
6604 [=](MachineInstrBuilder &MIB) {
6607 [=](MachineInstrBuilder &MIB) {
6610 else if (STI.hasRestrictedSOffset())
6611 MIB.
addReg(AMDGPU::SGPR_NULL);
6623AMDGPUInstructionSelector::selectBUFSOffset(
MachineOperand &Root)
const {
6628 SOffset = AMDGPU::SGPR_NULL;
6630 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); }}};
6634static std::optional<uint64_t>
6638 if (!OffsetVal || !
isInt<32>(*OffsetVal))
6639 return std::nullopt;
6640 return Lo_32(*OffsetVal);
6644AMDGPUInstructionSelector::selectSMRDBufferImm(
MachineOperand &Root)
const {
6645 std::optional<uint64_t> OffsetVal =
6650 std::optional<int64_t> EncodedImm =
6655 return {{ [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedImm); } }};
6659AMDGPUInstructionSelector::selectSMRDBufferImm32(
MachineOperand &Root)
const {
6666 std::optional<int64_t> EncodedImm =
6671 return {{ [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedImm); } }};
6675AMDGPUInstructionSelector::selectSMRDBufferSgprImm(
MachineOperand &Root)
const {
6683 return std::nullopt;
6685 std::optional<int64_t> EncodedOffset =
6688 return std::nullopt;
6691 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); },
6692 [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedOffset); }}};
6695std::pair<Register, unsigned>
6696AMDGPUInstructionSelector::selectVOP3PMadMixModsImpl(
MachineOperand &Root,
6697 bool &Matched)
const {
6702 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
6712 const auto CheckAbsNeg = [&]() {
6717 std::tie(Src, ModsTmp) = selectVOP3ModsImpl(Src);
6748AMDGPUInstructionSelector::selectVOP3PMadMixModsExt(
6753 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
6758 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
6759 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
6764AMDGPUInstructionSelector::selectVOP3PMadMixMods(
MachineOperand &Root)
const {
6768 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
6771 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
6772 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
6776bool AMDGPUInstructionSelector::selectSBarrierSignalIsfirst(
6780 Register CCReg =
I.getOperand(0).getReg();
6785 BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM))
6786 .
addImm(
I.getOperand(2).getImm());
6790 I.eraseFromParent();
6791 return RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32_XM0_XEXECRegClass,
6795bool AMDGPUInstructionSelector::selectSGetBarrierState(
6799 const MachineOperand &BarOp =
I.getOperand(2);
6800 std::optional<int64_t> BarValImm =
6804 auto CopyMIB =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
6808 MachineInstrBuilder MIB;
6809 unsigned Opc = BarValImm ? AMDGPU::S_GET_BARRIER_STATE_IMM
6810 : AMDGPU::S_GET_BARRIER_STATE_M0;
6813 auto DstReg =
I.getOperand(0).getReg();
6814 const TargetRegisterClass *DstRC =
6815 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
6816 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
6822 I.eraseFromParent();
6827 if (HasInlineConst) {
6831 case Intrinsic::amdgcn_s_barrier_join:
6832 return AMDGPU::S_BARRIER_JOIN_IMM;
6833 case Intrinsic::amdgcn_s_get_named_barrier_state:
6834 return AMDGPU::S_GET_BARRIER_STATE_IMM;
6840 case Intrinsic::amdgcn_s_barrier_join:
6841 return AMDGPU::S_BARRIER_JOIN_M0;
6842 case Intrinsic::amdgcn_s_get_named_barrier_state:
6843 return AMDGPU::S_GET_BARRIER_STATE_M0;
6848bool AMDGPUInstructionSelector::selectNamedBarrierInit(
6852 const MachineOperand &BarOp =
I.getOperand(1);
6853 const MachineOperand &CntOp =
I.getOperand(2);
6856 Register TmpReg0 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6862 Register TmpReg1 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6869 Register TmpReg2 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6875 Register TmpReg3 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6876 constexpr unsigned ShAmt = 16;
6882 Register TmpReg4 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6892 unsigned Opc = IntrID == Intrinsic::amdgcn_s_barrier_init
6893 ? AMDGPU::S_BARRIER_INIT_M0
6894 : AMDGPU::S_BARRIER_SIGNAL_M0;
6895 MachineInstrBuilder MIB;
6898 I.eraseFromParent();
6902bool AMDGPUInstructionSelector::selectNamedBarrierInst(
6906 MachineOperand BarOp = IntrID == Intrinsic::amdgcn_s_get_named_barrier_state
6909 std::optional<int64_t> BarValImm =
6914 Register TmpReg0 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6920 Register TmpReg1 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6926 auto CopyMIB =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
6931 MachineInstrBuilder MIB;
6935 if (IntrID == Intrinsic::amdgcn_s_get_named_barrier_state) {
6936 auto DstReg =
I.getOperand(0).getReg();
6937 const TargetRegisterClass *DstRC =
6938 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
6939 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
6945 auto BarId = ((*BarValImm) >> 4) & 0x3F;
6949 I.eraseFromParent();
6956 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
6957 "Expected G_CONSTANT");
6958 MIB.
addImm(
MI.getOperand(1).getCImm()->getSExtValue());
6964 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
6965 "Expected G_CONSTANT");
6966 MIB.
addImm(-
MI.getOperand(1).getCImm()->getSExtValue());
6972 const MachineOperand &
Op =
MI.getOperand(1);
6973 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT &&
OpIdx == -1);
6974 MIB.
addImm(
Op.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
6980 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
6981 "Expected G_CONSTANT");
6982 MIB.
addImm(
MI.getOperand(1).getCImm()->getValue().popcount());
6990 const MachineOperand &
Op =
MI.getOperand(
OpIdx);
7007 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7011void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_0_0(
7013 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7018void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_0_1(
7020 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7026void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_1_0(
7028 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7033void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_1_1(
7035 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7041void AMDGPUInstructionSelector::renderDstSelToOpSelXForm(
7043 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7048void AMDGPUInstructionSelector::renderSrcSelToOpSelXForm(
7050 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7055void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_2_0(
7057 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7062void AMDGPUInstructionSelector::renderDstSelToOpSel3XFormXForm(
7064 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7073 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7082 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7089void AMDGPUInstructionSelector::renderExtractCpolSetGLC(
7091 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7092 const uint32_t Cpol =
MI.getOperand(
OpIdx).getImm() &
7107 const APFloat &APF =
MI.getOperand(1).getFPImm()->getValueAPF();
7109 assert(ExpVal != INT_MIN);
7127 if (
MI.getOperand(
OpIdx).getImm())
7129 MIB.
addImm((int64_t)Mods);
7136 if (
MI.getOperand(
OpIdx).getImm())
7138 MIB.
addImm((int64_t)Mods);
7144 unsigned Val =
MI.getOperand(
OpIdx).getImm();
7152 MIB.
addImm((int64_t)Mods);
7158 uint32_t
V =
MI.getOperand(2).getImm();
7161 if (!Subtarget->hasSafeCUPrefetch())
7167void AMDGPUInstructionSelector::renderScaledMAIIntrinsicOperand(
7169 unsigned Val =
MI.getOperand(
OpIdx).getImm();
7178bool AMDGPUInstructionSelector::isInlineImmediate(
const APInt &Imm)
const {
7179 return TII.isInlineConstant(Imm);
7182bool AMDGPUInstructionSelector::isInlineImmediate(
const APFloat &Imm)
const {
7183 return TII.isInlineConstant(Imm);
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
static unsigned getIntrinsicID(const SDNode *N)
#define GET_GLOBALISEL_PREDICATES_INIT
#define GET_GLOBALISEL_TEMPORARIES_INIT
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static Register getLegalRegBank(Register NewReg, Register RootReg, const AMDGPURegisterBankInfo &RBI, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const SIInstrInfo &TII)
static bool isShlHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is shift left with half bits, such as reg0:2n =G_SHL reg1:2n, CONST(n)
static bool isNoUnsignedWrap(MachineInstr *Addr)
static Register buildOffsetSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
unsigned getNamedBarrierOp(bool HasInlineConst, Intrinsic::ID IntrID)
static bool checkRB(Register Reg, unsigned int RBNo, const AMDGPURegisterBankInfo &RBI, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI)
static unsigned updateMods(SrcStatus HiStat, SrcStatus LoStat, unsigned Mods)
static bool isTruncHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is truncating to half, such as reg0:n = G_TRUNC reg1:2n
static Register getWaveAddress(const MachineInstr *Def)
static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)
static bool shouldUseAndMask(unsigned Size, unsigned &Mask)
static std::pair< unsigned, uint8_t > BitOp3_Op(Register R, SmallVectorImpl< Register > &Src, const MachineRegisterInfo &MRI)
static TypeClass isVectorOfTwoOrScalar(Register Reg, const MachineRegisterInfo &MRI)
static bool isLaneMaskFromSameBlock(Register Reg, MachineRegisterInfo &MRI, MachineBasicBlock *MBB)
static bool parseTexFail(uint64_t TexFailCtrl, bool &TFE, bool &LWE, bool &IsTexFail)
static void addZeroImm(MachineInstrBuilder &MIB)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
static bool isConstant(const MachineInstr &MI)
static bool isSameBitWidth(Register Reg1, Register Reg2, const MachineRegisterInfo &MRI)
static Register buildRegSequence(SmallVectorImpl< Register > &Elts, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
static Register buildRSRC(MachineIRBuilder &B, MachineRegisterInfo &MRI, uint32_t FormatLo, uint32_t FormatHi, Register BasePtr)
Return a resource descriptor for use with an arbitrary 64-bit pointer.
static std::pair< Register, unsigned > computeIndirectRegIndex(MachineRegisterInfo &MRI, const SIRegisterInfo &TRI, const TargetRegisterClass *SuperRC, Register IdxReg, unsigned EltSize, GISelValueTracking &ValueTracking)
Return the register to use for the index value, and the subregister to use for the indirectly accesse...
static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64)
static std::pair< Register, SrcStatus > getLastSameOrNeg(Register Reg, const MachineRegisterInfo &MRI, SearchOptions SO, int MaxDepth=3)
static Register stripCopy(Register Reg, MachineRegisterInfo &MRI)
static std::optional< std::pair< Register, SrcStatus > > calcNextStatus(std::pair< Register, SrcStatus > Curr, const MachineRegisterInfo &MRI)
static Register stripBitCast(Register Reg, MachineRegisterInfo &MRI)
static std::optional< uint64_t > getConstantZext32Val(Register Reg, const MachineRegisterInfo &MRI)
Get an immediate that must be 32-bits, and treated as zero extended.
static bool isValidToPack(SrcStatus HiStat, SrcStatus LoStat, Register NewReg, Register RootReg, const SIInstrInfo &TII, const MachineRegisterInfo &MRI)
static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size, const GCNSubtarget &ST)
static SmallVector< std::pair< Register, SrcStatus > > getSrcStats(Register Reg, const MachineRegisterInfo &MRI, SearchOptions SO, int MaxDepth=3)
static bool isUnmergeHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test function, if the MI is reg0:n, reg1:n = G_UNMERGE_VALUES reg2:2n
static SrcStatus getNegStatus(Register Reg, SrcStatus S, const MachineRegisterInfo &MRI)
static bool isVCmpResult(Register Reg, MachineRegisterInfo &MRI)
static Register buildAddr64RSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
static bool isLshrHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is logic shift right with half bits, such as reg0:2n =G_LSHR reg1:2n,...
static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, SmallVectorImpl< Register > &Elts, Register &Src, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
This file declares the targeting of the InstructionSelector class for AMDGPU.
AMDGPU Register Bank Select
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
Contains matchers for matching SSA Machine Instructions.
Machine Check Debug Module
This file declares the MachineIRBuilder class.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
MachineInstr unsigned OpIdx
static std::vector< std::pair< int, unsigned > > Swizzle(std::vector< std::pair< int, unsigned > > Src, R600InstrInfo::BankSwizzle Swz)
This is used to control valid status that current MI supports.
bool checkOptions(SrcStatus Stat) const
SearchOptions(Register Reg, const MachineRegisterInfo &MRI)
AMDGPUInstructionSelector(const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI, const AMDGPUTargetMachine &TM)
static const char * getName()
bool select(MachineInstr &I) override
Select the (possibly generic) instruction I to only use target-specific opcodes.
void setupMF(MachineFunction &MF, GISelValueTracking *VT, CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) override
Setup per-MF executor state.
uint32_t getLDSSize() const
LLVM_READONLY int getExactLog2Abs() const
Class for arbitrary precision integers.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
bool isFPPredicate() const
bool isIntPredicate() const
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
LLVM_ABI DILocation * get() const
Get the underlying DILocation.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
std::optional< SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > > ComplexRendererFns
virtual void setupMF(MachineFunction &mf, GISelValueTracking *vt, CodeGenCoverage *covinfo=nullptr, ProfileSummaryInfo *psi=nullptr, BlockFrequencyInfo *bfi=nullptr)
Setup per-MF executor state.
CodeGenCoverage * CoverageInfo
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
TypeSize getValue() const
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void setReturnAddressIsTaken(bool s)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Helper class to build MachineInstr.
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & setOperandDead(unsigned OpIdx) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
LocationSize getSize() const
Return the size in bytes of the memory reference.
unsigned getAddrSpace() const
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const ConstantInt * getCImm() const
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
ArrayRef< int > getShuffleMask() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
bool isInternalRead() const
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Analysis providing profile information.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
unsigned getID() const
Return the register class ID number.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
@ BUFFER_RESOURCE
Address space for 128-bit buffer resources.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
LLVM_READONLY const MIMGG16MappingInfo * getMIMGG16MappingInfo(unsigned G)
LLVM_READONLY int getGlobalSaddrOp(uint16_t Opcode)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isGFX12Plus(const MCSubtargetInfo &STI)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfo(unsigned DimEnum)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
Intrinsic::ID getIntrinsicID(const MachineInstr &I)
Return the intrinsic ID for opcodes with the G_AMDGPU_INTRIN_ prefix.
std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg, GISelValueTracking *ValueTracking=nullptr, bool CheckNUW=false)
Returns base register and constant offset.
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
IndexMode
ARM Index Modes.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(const APInt &RequestedValue)
Matches a constant equal to RequestedValue.
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
UnaryOp_match< SrcTy, TargetOpcode::COPY > m_Copy(SrcTy &&Src)
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_XOR, true > m_GXor(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_SEXT > m_GSExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_FPEXT > m_GFPExt(const SrcTy &Src)
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
ConstantMatch< APInt > m_ICst(APInt &Cst)
SpecificConstantMatch m_AllOnesInt()
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
ImplicitDefMatch m_GImplicitDef()
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
BinaryOp_match< LHS, RHS, TargetOpcode::G_ASHR, false > m_GAShr(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
SpecificRegisterMatch m_SpecificReg(Register RequestedReg)
Matches a register only if it is equal to RequestedReg.
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
UnaryOp_match< SrcTy, TargetOpcode::G_FABS > m_GFabs(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ANYEXT > m_GAnyExt(const SrcTy &Src)
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, TargetOpcode::G_MUL, true > m_GMul(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
NodeAddr< DefNode * > Def
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
FunctionAddr VTableAddr Value
LLVM_ABI Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
PointerUnion< const TargetRegisterClass *, const RegisterBank * > RegClassOrRegBank
Convenient type to represent either a register class or a register bank.
LLVM_ABI const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
LLVM_ABI std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
FunctionAddr VTableAddr uintptr_t uintptr_t Data
unsigned getUndefRegState(bool B)
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
LLVM_ABI std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
LLVM_ABI Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
@ Default
The result values are uniform if and only if all operands are uniform.
unsigned AtomicNoRetBaseOpcode
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
int64_t Offset
Offset - This is an offset from the base Value*.
PointerUnion< const Value *, const PseudoSourceValue * > V
This is the IR pointer value for the access, or it is null if unknown.