29#include "llvm/IR/IntrinsicsAMDGPU.h"
32#define DEBUG_TYPE "amdgpu-isel"
37#define GET_GLOBALISEL_IMPL
38#define AMDGPUSubtarget GCNSubtarget
39#include "AMDGPUGenGlobalISel.inc"
40#undef GET_GLOBALISEL_IMPL
46 : TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM),
49#include
"AMDGPUGenGlobalISel.inc"
52#include
"AMDGPUGenGlobalISel.inc"
64 MRI = &
MF.getRegInfo();
72 return Def->getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS
73 ? Def->getOperand(1).getReg()
83 auto &RegClassOrBank = MRI.getRegClassOrRegBank(
Reg);
84 const TargetRegisterClass *RC =
87 const LLT Ty = MRI.getType(
Reg);
91 return MRI.getVRegDef(
Reg)->getOpcode() != AMDGPU::G_TRUNC &&
96 return RB->
getID() == AMDGPU::VCCRegBankID;
99bool AMDGPUInstructionSelector::constrainCopyLikeIntrin(
MachineInstr &
MI,
100 unsigned NewOpc)
const {
101 MI.setDesc(TII.get(NewOpc));
105 MachineOperand &Dst =
MI.getOperand(0);
106 MachineOperand &Src =
MI.getOperand(1);
112 const TargetRegisterClass *DstRC
113 = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
114 const TargetRegisterClass *SrcRC
115 = TRI.getConstrainedRegClassForOperand(Src, *MRI);
116 if (!DstRC || DstRC != SrcRC)
119 return RBI.constrainGenericRegister(Dst.getReg(), *DstRC, *MRI) &&
120 RBI.constrainGenericRegister(Src.getReg(), *SrcRC, *MRI);
123bool AMDGPUInstructionSelector::selectCOPY(
MachineInstr &
I)
const {
126 I.setDesc(TII.get(TargetOpcode::COPY));
128 const MachineOperand &Src =
I.getOperand(1);
129 MachineOperand &Dst =
I.getOperand(0);
133 if (isVCC(DstReg, *MRI)) {
134 if (SrcReg == AMDGPU::SCC) {
135 const TargetRegisterClass *RC
136 = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
139 return RBI.constrainGenericRegister(DstReg, *RC, *MRI);
142 if (!isVCC(SrcReg, *MRI)) {
144 if (!RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), *MRI))
147 const TargetRegisterClass *SrcRC
148 = TRI.getConstrainedRegClassForOperand(Src, *MRI);
150 std::optional<ValueAndVReg> ConstVal =
154 STI.isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
156 .
addImm(ConstVal->Value.getBoolValue() ? -1 : 0);
158 Register MaskedReg = MRI->createVirtualRegister(SrcRC);
165 assert(Subtarget->useRealTrue16Insts());
166 const int64_t NoMods = 0;
167 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_AND_B16_t16_e64), MaskedReg)
173 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CMP_NE_U16_t16_e64), DstReg)
180 bool IsSGPR = TRI.isSGPRClass(SrcRC);
181 unsigned AndOpc = IsSGPR ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32;
188 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
194 if (!MRI->getRegClassOrNull(SrcReg))
195 MRI->setRegClass(SrcReg, SrcRC);
200 const TargetRegisterClass *RC =
201 TRI.getConstrainedRegClassForOperand(Dst, *MRI);
202 if (RC && !RBI.constrainGenericRegister(DstReg, *RC, *MRI))
208 for (
const MachineOperand &MO :
I.operands()) {
209 if (MO.getReg().isPhysical())
212 const TargetRegisterClass *RC =
213 TRI.getConstrainedRegClassForOperand(MO, *MRI);
216 RBI.constrainGenericRegister(MO.getReg(), *RC, *MRI);
221bool AMDGPUInstructionSelector::selectCOPY_SCC_VCC(
MachineInstr &
I)
const {
226 STI.isWave64() ? AMDGPU::S_CMP_LG_U64 : AMDGPU::S_CMP_LG_U32;
228 .
addReg(
I.getOperand(1).getReg())
233 Register DstReg =
I.getOperand(0).getReg();
237 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
240bool AMDGPUInstructionSelector::selectCOPY_VCC_SCC(
MachineInstr &
I)
const {
244 Register DstReg =
I.getOperand(0).getReg();
245 Register SrcReg =
I.getOperand(1).getReg();
246 std::optional<ValueAndVReg> Arg =
250 const int64_t
Value = Arg->Value.getZExtValue();
252 unsigned Opcode = STI.isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
259 return RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), *MRI);
265 unsigned SelectOpcode =
266 STI.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
275bool AMDGPUInstructionSelector::selectReadAnyLane(
MachineInstr &
I)
const {
276 Register DstReg =
I.getOperand(0).getReg();
277 Register SrcReg =
I.getOperand(1).getReg();
282 auto RFL =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
289bool AMDGPUInstructionSelector::selectPHI(
MachineInstr &
I)
const {
290 const Register DefReg =
I.getOperand(0).getReg();
291 const LLT DefTy = MRI->getType(DefReg);
303 MRI->getRegClassOrRegBank(DefReg);
305 const TargetRegisterClass *DefRC =
314 DefRC = TRI.getRegClassForTypeOnBank(DefTy, RB);
323 for (
unsigned i = 1; i !=
I.getNumOperands(); i += 2) {
324 const Register SrcReg =
I.getOperand(i).getReg();
326 const RegisterBank *RB = MRI->getRegBankOrNull(SrcReg);
328 const LLT SrcTy = MRI->getType(SrcReg);
329 const TargetRegisterClass *SrcRC =
330 TRI.getRegClassForTypeOnBank(SrcTy, *RB);
331 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
336 I.setDesc(TII.get(TargetOpcode::PHI));
337 return RBI.constrainGenericRegister(DefReg, *DefRC, *MRI);
343 unsigned SubIdx)
const {
347 Register DstReg = MRI->createVirtualRegister(&SubRC);
350 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.
getSubReg(), SubIdx);
352 BuildMI(*BB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
378 return Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
380 return Is64 ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32;
382 return Is64 ? AMDGPU::S_XOR_B64 : AMDGPU::S_XOR_B32;
388bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(
MachineInstr &
I)
const {
389 Register DstReg =
I.getOperand(0).getReg();
390 unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI);
392 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
393 if (DstRB->
getID() != AMDGPU::SGPRRegBankID &&
394 DstRB->
getID() != AMDGPU::VCCRegBankID)
397 bool Is64 =
Size > 32 || (DstRB->
getID() == AMDGPU::VCCRegBankID &&
409bool AMDGPUInstructionSelector::selectG_ADD_SUB(
MachineInstr &
I)
const {
412 Register DstReg =
I.getOperand(0).getReg();
414 LLT Ty = MRI->getType(DstReg);
419 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
420 const bool IsSALU = DstRB->
getID() == AMDGPU::SGPRRegBankID;
421 const bool Sub =
I.getOpcode() == TargetOpcode::G_SUB;
425 const unsigned Opc =
Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
428 .
add(
I.getOperand(1))
429 .
add(
I.getOperand(2))
435 if (STI.hasAddNoCarry()) {
436 const unsigned Opc =
Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64;
437 I.setDesc(TII.get(
Opc));
443 const unsigned Opc =
Sub ? AMDGPU::V_SUB_CO_U32_e64 : AMDGPU::V_ADD_CO_U32_e64;
445 Register UnusedCarry = MRI->createVirtualRegister(TRI.getWaveMaskRegClass());
449 .
add(
I.getOperand(1))
450 .
add(
I.getOperand(2))
456 assert(!
Sub &&
"illegal sub should not reach here");
458 const TargetRegisterClass &RC
459 = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass;
460 const TargetRegisterClass &HalfRC
461 = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass;
463 MachineOperand Lo1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub0));
464 MachineOperand Lo2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub0));
465 MachineOperand Hi1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub1));
466 MachineOperand Hi2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub1));
468 Register DstLo = MRI->createVirtualRegister(&HalfRC);
469 Register DstHi = MRI->createVirtualRegister(&HalfRC);
472 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
475 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
480 const TargetRegisterClass *CarryRC = TRI.getWaveMaskRegClass();
481 Register CarryReg = MRI->createVirtualRegister(CarryRC);
482 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_ADD_CO_U32_e64), DstLo)
487 MachineInstr *Addc =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_ADDC_U32_e64), DstHi)
498 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
505 if (!RBI.constrainGenericRegister(DstReg, RC, *MRI))
512bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE(
517 Register Dst0Reg =
I.getOperand(0).getReg();
518 Register Dst1Reg =
I.getOperand(1).getReg();
519 const bool IsAdd =
I.getOpcode() == AMDGPU::G_UADDO ||
520 I.getOpcode() == AMDGPU::G_UADDE;
521 const bool HasCarryIn =
I.getOpcode() == AMDGPU::G_UADDE ||
522 I.getOpcode() == AMDGPU::G_USUBE;
524 if (isVCC(Dst1Reg, *MRI)) {
525 unsigned NoCarryOpc =
526 IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
527 unsigned CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
528 I.setDesc(TII.get(HasCarryIn ? CarryOpc : NoCarryOpc));
534 Register Src0Reg =
I.getOperand(2).getReg();
535 Register Src1Reg =
I.getOperand(3).getReg();
538 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
539 .
addReg(
I.getOperand(4).getReg());
542 unsigned NoCarryOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
543 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
545 auto CarryInst =
BuildMI(*BB, &
I,
DL, TII.get(HasCarryIn ? CarryOpc : NoCarryOpc), Dst0Reg)
546 .
add(
I.getOperand(2))
547 .
add(
I.getOperand(3));
549 if (MRI->use_nodbg_empty(Dst1Reg)) {
550 CarryInst.setOperandDead(3);
552 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), Dst1Reg)
554 if (!MRI->getRegClassOrNull(Dst1Reg))
555 MRI->setRegClass(Dst1Reg, &AMDGPU::SReg_32RegClass);
558 if (!RBI.constrainGenericRegister(Dst0Reg, AMDGPU::SReg_32RegClass, *MRI) ||
559 !RBI.constrainGenericRegister(Src0Reg, AMDGPU::SReg_32RegClass, *MRI) ||
560 !RBI.constrainGenericRegister(Src1Reg, AMDGPU::SReg_32RegClass, *MRI))
564 !RBI.constrainGenericRegister(
I.getOperand(4).getReg(),
565 AMDGPU::SReg_32RegClass, *MRI))
572bool AMDGPUInstructionSelector::selectG_AMDGPU_MAD_64_32(
576 const bool IsUnsigned =
I.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;
577 bool UseNoCarry = Subtarget->hasMadU64U32NoCarry() &&
578 MRI->use_nodbg_empty(
I.getOperand(1).getReg());
581 if (Subtarget->hasMADIntraFwdBug())
582 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_gfx11_e64
583 : AMDGPU::V_MAD_I64_I32_gfx11_e64;
585 Opc = IsUnsigned ? AMDGPU::V_MAD_NC_U64_U32_e64
586 : AMDGPU::V_MAD_NC_I64_I32_e64;
588 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_e64 : AMDGPU::V_MAD_I64_I32_e64;
593 I.setDesc(TII.get(
Opc));
595 I.addImplicitDefUseOperands(*
MF);
600bool AMDGPUInstructionSelector::selectG_EXTRACT(
MachineInstr &
I)
const {
602 Register DstReg =
I.getOperand(0).getReg();
603 Register SrcReg =
I.getOperand(1).getReg();
604 LLT DstTy = MRI->getType(DstReg);
605 LLT SrcTy = MRI->getType(SrcReg);
610 unsigned Offset =
I.getOperand(2).getImm();
611 if (
Offset % 32 != 0 || DstSize > 128)
619 const TargetRegisterClass *DstRC =
620 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
621 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
624 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI);
625 const TargetRegisterClass *SrcRC =
626 TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank);
631 SrcRC = TRI.getSubClassWithSubReg(SrcRC,
SubReg);
636 *SrcRC,
I.getOperand(1));
638 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::COPY), DstReg)
645bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(
MachineInstr &
MI)
const {
646 MachineBasicBlock *BB =
MI.getParent();
648 LLT DstTy = MRI->getType(DstReg);
649 LLT SrcTy = MRI->getType(
MI.getOperand(1).getReg());
656 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
658 const TargetRegisterClass *DstRC =
659 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
663 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(DstRC, SrcSize / 8);
664 MachineInstrBuilder MIB =
665 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg);
666 for (
int I = 0,
E =
MI.getNumOperands() - 1;
I !=
E; ++
I) {
667 MachineOperand &Src =
MI.getOperand(
I + 1);
671 const TargetRegisterClass *SrcRC
672 = TRI.getConstrainedRegClassForOperand(Src, *MRI);
673 if (SrcRC && !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, *MRI))
677 if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
680 MI.eraseFromParent();
684bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(
MachineInstr &
MI)
const {
685 MachineBasicBlock *BB =
MI.getParent();
686 const int NumDst =
MI.getNumOperands() - 1;
688 MachineOperand &Src =
MI.getOperand(NumDst);
692 LLT DstTy = MRI->getType(DstReg0);
693 LLT SrcTy = MRI->getType(SrcReg);
698 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI);
700 const TargetRegisterClass *SrcRC =
701 TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank);
702 if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
708 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(SrcRC, DstSize / 8);
709 for (
int I = 0,
E = NumDst;
I !=
E; ++
I) {
710 MachineOperand &Dst =
MI.getOperand(
I);
711 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::COPY), Dst.getReg())
712 .
addReg(SrcReg, 0, SubRegs[
I]);
715 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegs[
I]);
716 if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
719 const TargetRegisterClass *DstRC =
720 TRI.getConstrainedRegClassForOperand(Dst, *MRI);
721 if (DstRC && !RBI.constrainGenericRegister(Dst.getReg(), *DstRC, *MRI))
725 MI.eraseFromParent();
729bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR(
MachineInstr &
MI)
const {
730 assert(
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC ||
731 MI.getOpcode() == AMDGPU::G_BUILD_VECTOR);
735 LLT SrcTy = MRI->getType(Src0);
739 if (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR && SrcSize >= 32) {
740 return selectG_MERGE_VALUES(
MI);
747 (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC &&
751 const RegisterBank *DstBank = RBI.getRegBank(Dst, *MRI, TRI);
752 if (DstBank->
getID() == AMDGPU::AGPRRegBankID)
755 assert(DstBank->
getID() == AMDGPU::SGPRRegBankID ||
756 DstBank->
getID() == AMDGPU::VGPRRegBankID);
757 const bool IsVector = DstBank->
getID() == AMDGPU::VGPRRegBankID;
760 MachineBasicBlock *BB =
MI.getParent();
770 const int64_t K0 = ConstSrc0->Value.getSExtValue();
771 const int64_t K1 = ConstSrc1->Value.getSExtValue();
772 uint32_t Lo16 =
static_cast<uint32_t
>(K0) & 0xffff;
773 uint32_t Hi16 =
static_cast<uint32_t
>(K1) & 0xffff;
774 uint32_t
Imm = Lo16 | (Hi16 << 16);
779 MI.eraseFromParent();
780 return RBI.constrainGenericRegister(Dst, AMDGPU::VGPR_32RegClass, *MRI);
785 MI.eraseFromParent();
786 return RBI.constrainGenericRegister(Dst, AMDGPU::SReg_32RegClass, *MRI);
797 if (Src1Def->
getOpcode() == AMDGPU::G_IMPLICIT_DEF) {
798 MI.setDesc(TII.get(AMDGPU::COPY));
801 IsVector ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
802 return RBI.constrainGenericRegister(Dst, RC, *MRI) &&
803 RBI.constrainGenericRegister(Src0, RC, *MRI);
808 Register TmpReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
809 auto MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_AND_B32_e32), TmpReg)
815 MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_LSHL_OR_B32_e64), Dst)
822 MI.eraseFromParent();
847 unsigned Opc = AMDGPU::S_PACK_LL_B32_B16;
848 if (Shift0 && Shift1) {
849 Opc = AMDGPU::S_PACK_HH_B32_B16;
850 MI.getOperand(1).setReg(ShiftSrc0);
851 MI.getOperand(2).setReg(ShiftSrc1);
853 Opc = AMDGPU::S_PACK_LH_B32_B16;
854 MI.getOperand(2).setReg(ShiftSrc1);
858 if (ConstSrc1 && ConstSrc1->Value == 0) {
860 auto MIB =
BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_LSHR_B32), Dst)
865 MI.eraseFromParent();
868 if (STI.hasSPackHL()) {
869 Opc = AMDGPU::S_PACK_HL_B32_B16;
870 MI.getOperand(1).setReg(ShiftSrc0);
874 MI.setDesc(TII.get(
Opc));
878bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(
MachineInstr &
I)
const {
879 const MachineOperand &MO =
I.getOperand(0);
883 const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, *MRI);
884 if ((!RC && !MRI->getRegBankOrNull(MO.
getReg())) ||
885 (RC && RBI.constrainGenericRegister(MO.
getReg(), *RC, *MRI))) {
886 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
893bool AMDGPUInstructionSelector::selectG_INSERT(
MachineInstr &
I)
const {
896 Register DstReg =
I.getOperand(0).getReg();
897 Register Src0Reg =
I.getOperand(1).getReg();
898 Register Src1Reg =
I.getOperand(2).getReg();
899 LLT Src1Ty = MRI->getType(Src1Reg);
901 unsigned DstSize = MRI->getType(DstReg).getSizeInBits();
904 int64_t
Offset =
I.getOperand(3).getImm();
907 if (
Offset % 32 != 0 || InsSize % 32 != 0)
914 unsigned SubReg = TRI.getSubRegFromChannel(
Offset / 32, InsSize / 32);
915 if (
SubReg == AMDGPU::NoSubRegister)
918 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
919 const TargetRegisterClass *DstRC =
920 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
924 const RegisterBank *Src0Bank = RBI.getRegBank(Src0Reg, *MRI, TRI);
925 const RegisterBank *Src1Bank = RBI.getRegBank(Src1Reg, *MRI, TRI);
926 const TargetRegisterClass *Src0RC =
927 TRI.getRegClassForSizeOnBank(DstSize, *Src0Bank);
928 const TargetRegisterClass *Src1RC =
929 TRI.getRegClassForSizeOnBank(InsSize, *Src1Bank);
933 Src0RC = TRI.getSubClassWithSubReg(Src0RC,
SubReg);
934 if (!Src0RC || !Src1RC)
937 if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
938 !RBI.constrainGenericRegister(Src0Reg, *Src0RC, *MRI) ||
939 !RBI.constrainGenericRegister(Src1Reg, *Src1RC, *MRI))
943 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::INSERT_SUBREG), DstReg)
952bool AMDGPUInstructionSelector::selectG_SBFX_UBFX(
MachineInstr &
MI)
const {
955 Register OffsetReg =
MI.getOperand(2).getReg();
956 Register WidthReg =
MI.getOperand(3).getReg();
958 assert(RBI.getRegBank(DstReg, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID &&
959 "scalar BFX instructions are expanded in regbankselect");
960 assert(MRI->getType(
MI.getOperand(0).getReg()).getSizeInBits() == 32 &&
961 "64-bit vector BFX instructions are expanded in regbankselect");
964 MachineBasicBlock *
MBB =
MI.getParent();
966 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SBFX;
967 unsigned Opc = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
972 MI.eraseFromParent();
976bool AMDGPUInstructionSelector::selectInterpP1F16(
MachineInstr &
MI)
const {
977 if (STI.getLDSBankCount() != 16)
983 if (!RBI.constrainGenericRegister(M0Val, AMDGPU::SReg_32RegClass, *MRI) ||
984 !RBI.constrainGenericRegister(Dst, AMDGPU::VGPR_32RegClass, *MRI) ||
985 !RBI.constrainGenericRegister(Src0, AMDGPU::VGPR_32RegClass, *MRI))
995 Register InterpMov = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
997 MachineBasicBlock *
MBB =
MI.getParent();
1001 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_INTERP_MOV_F32), InterpMov)
1004 .
addImm(
MI.getOperand(3).getImm());
1017 MI.eraseFromParent();
1026bool AMDGPUInstructionSelector::selectWritelane(
MachineInstr &
MI)
const {
1028 if (STI.getConstantBusLimit(AMDGPU::V_WRITELANE_B32) > 1)
1031 MachineBasicBlock *
MBB =
MI.getParent();
1035 Register LaneSelect =
MI.getOperand(3).getReg();
1038 auto MIB =
BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_WRITELANE_B32), VDst);
1040 std::optional<ValueAndVReg> ConstSelect =
1046 MIB.
addImm(ConstSelect->Value.getSExtValue() &
1049 std::optional<ValueAndVReg> ConstVal =
1055 STI.hasInv2PiInlineImm())) {
1056 MIB.
addImm(ConstVal->Value.getSExtValue());
1064 RBI.constrainGenericRegister(LaneSelect, AMDGPU::SReg_32_XM0RegClass, *MRI);
1066 BuildMI(*
MBB, *MIB,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
1074 MI.eraseFromParent();
1080bool AMDGPUInstructionSelector::selectDivScale(
MachineInstr &
MI)
const {
1084 LLT Ty = MRI->getType(Dst0);
1087 Opc = AMDGPU::V_DIV_SCALE_F32_e64;
1089 Opc = AMDGPU::V_DIV_SCALE_F64_e64;
1096 MachineBasicBlock *
MBB =
MI.getParent();
1100 unsigned ChooseDenom =
MI.getOperand(5).getImm();
1102 Register Src0 = ChooseDenom != 0 ? Numer : Denom;
1115 MI.eraseFromParent();
1119bool AMDGPUInstructionSelector::selectG_INTRINSIC(
MachineInstr &
I)
const {
1121 switch (IntrinsicID) {
1122 case Intrinsic::amdgcn_if_break: {
1127 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))
1128 .
add(
I.getOperand(0))
1129 .
add(
I.getOperand(2))
1130 .
add(
I.getOperand(3));
1132 Register DstReg =
I.getOperand(0).getReg();
1133 Register Src0Reg =
I.getOperand(2).getReg();
1134 Register Src1Reg =
I.getOperand(3).getReg();
1136 I.eraseFromParent();
1139 MRI->setRegClass(
Reg, TRI.getWaveMaskRegClass());
1143 case Intrinsic::amdgcn_interp_p1_f16:
1144 return selectInterpP1F16(
I);
1145 case Intrinsic::amdgcn_wqm:
1146 return constrainCopyLikeIntrin(
I, AMDGPU::WQM);
1147 case Intrinsic::amdgcn_softwqm:
1148 return constrainCopyLikeIntrin(
I, AMDGPU::SOFT_WQM);
1149 case Intrinsic::amdgcn_strict_wwm:
1150 case Intrinsic::amdgcn_wwm:
1151 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WWM);
1152 case Intrinsic::amdgcn_strict_wqm:
1153 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WQM);
1154 case Intrinsic::amdgcn_writelane:
1155 return selectWritelane(
I);
1156 case Intrinsic::amdgcn_div_scale:
1157 return selectDivScale(
I);
1158 case Intrinsic::amdgcn_icmp:
1159 case Intrinsic::amdgcn_fcmp:
1162 return selectIntrinsicCmp(
I);
1163 case Intrinsic::amdgcn_ballot:
1164 return selectBallot(
I);
1165 case Intrinsic::amdgcn_reloc_constant:
1166 return selectRelocConstant(
I);
1167 case Intrinsic::amdgcn_groupstaticsize:
1168 return selectGroupStaticSize(
I);
1169 case Intrinsic::returnaddress:
1170 return selectReturnAddress(
I);
1171 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
1172 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
1173 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
1174 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
1175 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
1176 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
1177 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
1178 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
1179 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
1180 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
1181 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
1182 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
1183 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
1184 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
1185 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:
1186 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:
1187 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:
1188 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:
1189 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:
1190 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:
1191 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:
1192 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:
1193 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:
1194 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:
1195 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:
1196 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
1197 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
1198 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8:
1199 return selectSMFMACIntrin(
I);
1200 case Intrinsic::amdgcn_permlane16_swap:
1201 case Intrinsic::amdgcn_permlane32_swap:
1202 return selectPermlaneSwapIntrin(
I, IntrinsicID);
1213 if (
Size == 16 && !ST.has16BitInsts())
1216 const auto Select = [&](
unsigned S16Opc,
unsigned TrueS16Opc,
1217 unsigned FakeS16Opc,
unsigned S32Opc,
1220 return ST.hasTrue16BitInsts()
1221 ? ST.useRealTrue16Insts() ? TrueS16Opc : FakeS16Opc
1232 return Select(AMDGPU::V_CMP_NE_U16_e64, AMDGPU::V_CMP_NE_U16_t16_e64,
1233 AMDGPU::V_CMP_NE_U16_fake16_e64, AMDGPU::V_CMP_NE_U32_e64,
1234 AMDGPU::V_CMP_NE_U64_e64);
1236 return Select(AMDGPU::V_CMP_EQ_U16_e64, AMDGPU::V_CMP_EQ_U16_t16_e64,
1237 AMDGPU::V_CMP_EQ_U16_fake16_e64, AMDGPU::V_CMP_EQ_U32_e64,
1238 AMDGPU::V_CMP_EQ_U64_e64);
1240 return Select(AMDGPU::V_CMP_GT_I16_e64, AMDGPU::V_CMP_GT_I16_t16_e64,
1241 AMDGPU::V_CMP_GT_I16_fake16_e64, AMDGPU::V_CMP_GT_I32_e64,
1242 AMDGPU::V_CMP_GT_I64_e64);
1244 return Select(AMDGPU::V_CMP_GE_I16_e64, AMDGPU::V_CMP_GE_I16_t16_e64,
1245 AMDGPU::V_CMP_GE_I16_fake16_e64, AMDGPU::V_CMP_GE_I32_e64,
1246 AMDGPU::V_CMP_GE_I64_e64);
1248 return Select(AMDGPU::V_CMP_LT_I16_e64, AMDGPU::V_CMP_LT_I16_t16_e64,
1249 AMDGPU::V_CMP_LT_I16_fake16_e64, AMDGPU::V_CMP_LT_I32_e64,
1250 AMDGPU::V_CMP_LT_I64_e64);
1252 return Select(AMDGPU::V_CMP_LE_I16_e64, AMDGPU::V_CMP_LE_I16_t16_e64,
1253 AMDGPU::V_CMP_LE_I16_fake16_e64, AMDGPU::V_CMP_LE_I32_e64,
1254 AMDGPU::V_CMP_LE_I64_e64);
1256 return Select(AMDGPU::V_CMP_GT_U16_e64, AMDGPU::V_CMP_GT_U16_t16_e64,
1257 AMDGPU::V_CMP_GT_U16_fake16_e64, AMDGPU::V_CMP_GT_U32_e64,
1258 AMDGPU::V_CMP_GT_U64_e64);
1260 return Select(AMDGPU::V_CMP_GE_U16_e64, AMDGPU::V_CMP_GE_U16_t16_e64,
1261 AMDGPU::V_CMP_GE_U16_fake16_e64, AMDGPU::V_CMP_GE_U32_e64,
1262 AMDGPU::V_CMP_GE_U64_e64);
1264 return Select(AMDGPU::V_CMP_LT_U16_e64, AMDGPU::V_CMP_LT_U16_t16_e64,
1265 AMDGPU::V_CMP_LT_U16_fake16_e64, AMDGPU::V_CMP_LT_U32_e64,
1266 AMDGPU::V_CMP_LT_U64_e64);
1268 return Select(AMDGPU::V_CMP_LE_U16_e64, AMDGPU::V_CMP_LE_U16_t16_e64,
1269 AMDGPU::V_CMP_LE_U16_fake16_e64, AMDGPU::V_CMP_LE_U32_e64,
1270 AMDGPU::V_CMP_LE_U64_e64);
1273 return Select(AMDGPU::V_CMP_EQ_F16_e64, AMDGPU::V_CMP_EQ_F16_t16_e64,
1274 AMDGPU::V_CMP_EQ_F16_fake16_e64, AMDGPU::V_CMP_EQ_F32_e64,
1275 AMDGPU::V_CMP_EQ_F64_e64);
1277 return Select(AMDGPU::V_CMP_GT_F16_e64, AMDGPU::V_CMP_GT_F16_t16_e64,
1278 AMDGPU::V_CMP_GT_F16_fake16_e64, AMDGPU::V_CMP_GT_F32_e64,
1279 AMDGPU::V_CMP_GT_F64_e64);
1281 return Select(AMDGPU::V_CMP_GE_F16_e64, AMDGPU::V_CMP_GE_F16_t16_e64,
1282 AMDGPU::V_CMP_GE_F16_fake16_e64, AMDGPU::V_CMP_GE_F32_e64,
1283 AMDGPU::V_CMP_GE_F64_e64);
1285 return Select(AMDGPU::V_CMP_LT_F16_e64, AMDGPU::V_CMP_LT_F16_t16_e64,
1286 AMDGPU::V_CMP_LT_F16_fake16_e64, AMDGPU::V_CMP_LT_F32_e64,
1287 AMDGPU::V_CMP_LT_F64_e64);
1289 return Select(AMDGPU::V_CMP_LE_F16_e64, AMDGPU::V_CMP_LE_F16_t16_e64,
1290 AMDGPU::V_CMP_LE_F16_fake16_e64, AMDGPU::V_CMP_LE_F32_e64,
1291 AMDGPU::V_CMP_LE_F64_e64);
1293 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1294 AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
1295 AMDGPU::V_CMP_NEQ_F64_e64);
1297 return Select(AMDGPU::V_CMP_O_F16_e64, AMDGPU::V_CMP_O_F16_t16_e64,
1298 AMDGPU::V_CMP_O_F16_fake16_e64, AMDGPU::V_CMP_O_F32_e64,
1299 AMDGPU::V_CMP_O_F64_e64);
1301 return Select(AMDGPU::V_CMP_U_F16_e64, AMDGPU::V_CMP_U_F16_t16_e64,
1302 AMDGPU::V_CMP_U_F16_fake16_e64, AMDGPU::V_CMP_U_F32_e64,
1303 AMDGPU::V_CMP_U_F64_e64);
1305 return Select(AMDGPU::V_CMP_NLG_F16_e64, AMDGPU::V_CMP_NLG_F16_t16_e64,
1306 AMDGPU::V_CMP_NLG_F16_fake16_e64, AMDGPU::V_CMP_NLG_F32_e64,
1307 AMDGPU::V_CMP_NLG_F64_e64);
1309 return Select(AMDGPU::V_CMP_NLE_F16_e64, AMDGPU::V_CMP_NLE_F16_t16_e64,
1310 AMDGPU::V_CMP_NLE_F16_fake16_e64, AMDGPU::V_CMP_NLE_F32_e64,
1311 AMDGPU::V_CMP_NLE_F64_e64);
1313 return Select(AMDGPU::V_CMP_NLT_F16_e64, AMDGPU::V_CMP_NLT_F16_t16_e64,
1314 AMDGPU::V_CMP_NLT_F16_fake16_e64, AMDGPU::V_CMP_NLT_F32_e64,
1315 AMDGPU::V_CMP_NLT_F64_e64);
1317 return Select(AMDGPU::V_CMP_NGE_F16_e64, AMDGPU::V_CMP_NGE_F16_t16_e64,
1318 AMDGPU::V_CMP_NGE_F16_fake16_e64, AMDGPU::V_CMP_NGE_F32_e64,
1319 AMDGPU::V_CMP_NGE_F64_e64);
1321 return Select(AMDGPU::V_CMP_NGT_F16_e64, AMDGPU::V_CMP_NGT_F16_t16_e64,
1322 AMDGPU::V_CMP_NGT_F16_fake16_e64, AMDGPU::V_CMP_NGT_F32_e64,
1323 AMDGPU::V_CMP_NGT_F64_e64);
1325 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1326 AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
1327 AMDGPU::V_CMP_NEQ_F64_e64);
1329 return Select(AMDGPU::V_CMP_TRU_F16_e64, AMDGPU::V_CMP_TRU_F16_t16_e64,
1330 AMDGPU::V_CMP_TRU_F16_fake16_e64, AMDGPU::V_CMP_TRU_F32_e64,
1331 AMDGPU::V_CMP_TRU_F64_e64);
1333 return Select(AMDGPU::V_CMP_F_F16_e64, AMDGPU::V_CMP_F_F16_t16_e64,
1334 AMDGPU::V_CMP_F_F16_fake16_e64, AMDGPU::V_CMP_F_F32_e64,
1335 AMDGPU::V_CMP_F_F64_e64);
1340 unsigned Size)
const {
1342 if (!STI.hasScalarCompareEq64())
1347 return AMDGPU::S_CMP_LG_U64;
1349 return AMDGPU::S_CMP_EQ_U64;
1358 return AMDGPU::S_CMP_LG_U32;
1360 return AMDGPU::S_CMP_EQ_U32;
1362 return AMDGPU::S_CMP_GT_I32;
1364 return AMDGPU::S_CMP_GE_I32;
1366 return AMDGPU::S_CMP_LT_I32;
1368 return AMDGPU::S_CMP_LE_I32;
1370 return AMDGPU::S_CMP_GT_U32;
1372 return AMDGPU::S_CMP_GE_U32;
1374 return AMDGPU::S_CMP_LT_U32;
1376 return AMDGPU::S_CMP_LE_U32;
1378 return AMDGPU::S_CMP_EQ_F32;
1380 return AMDGPU::S_CMP_GT_F32;
1382 return AMDGPU::S_CMP_GE_F32;
1384 return AMDGPU::S_CMP_LT_F32;
1386 return AMDGPU::S_CMP_LE_F32;
1388 return AMDGPU::S_CMP_LG_F32;
1390 return AMDGPU::S_CMP_O_F32;
1392 return AMDGPU::S_CMP_U_F32;
1394 return AMDGPU::S_CMP_NLG_F32;
1396 return AMDGPU::S_CMP_NLE_F32;
1398 return AMDGPU::S_CMP_NLT_F32;
1400 return AMDGPU::S_CMP_NGE_F32;
1402 return AMDGPU::S_CMP_NGT_F32;
1404 return AMDGPU::S_CMP_NEQ_F32;
1411 if (!STI.hasSALUFloatInsts())
1416 return AMDGPU::S_CMP_EQ_F16;
1418 return AMDGPU::S_CMP_GT_F16;
1420 return AMDGPU::S_CMP_GE_F16;
1422 return AMDGPU::S_CMP_LT_F16;
1424 return AMDGPU::S_CMP_LE_F16;
1426 return AMDGPU::S_CMP_LG_F16;
1428 return AMDGPU::S_CMP_O_F16;
1430 return AMDGPU::S_CMP_U_F16;
1432 return AMDGPU::S_CMP_NLG_F16;
1434 return AMDGPU::S_CMP_NLE_F16;
1436 return AMDGPU::S_CMP_NLT_F16;
1438 return AMDGPU::S_CMP_NGE_F16;
1440 return AMDGPU::S_CMP_NGT_F16;
1442 return AMDGPU::S_CMP_NEQ_F16;
1451bool AMDGPUInstructionSelector::selectG_ICMP_or_FCMP(
MachineInstr &
I)
const {
1456 Register SrcReg =
I.getOperand(2).getReg();
1457 unsigned Size = RBI.getSizeInBits(SrcReg, *MRI, TRI);
1461 Register CCReg =
I.getOperand(0).getReg();
1462 if (!isVCC(CCReg, *MRI)) {
1463 int Opcode = getS_CMPOpcode(Pred,
Size);
1466 MachineInstr *ICmp =
BuildMI(*BB, &
I,
DL, TII.get(Opcode))
1467 .
add(
I.getOperand(2))
1468 .
add(
I.getOperand(3));
1469 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CCReg)
1473 RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, *MRI);
1474 I.eraseFromParent();
1478 if (
I.getOpcode() == AMDGPU::G_FCMP)
1485 MachineInstrBuilder ICmp;
1488 ICmp =
BuildMI(*BB, &
I,
DL, TII.get(Opcode),
I.getOperand(0).getReg())
1490 .
add(
I.getOperand(2))
1492 .
add(
I.getOperand(3))
1495 ICmp =
BuildMI(*BB, &
I,
DL, TII.get(Opcode),
I.getOperand(0).getReg())
1496 .
add(
I.getOperand(2))
1497 .
add(
I.getOperand(3));
1501 *TRI.getBoolRC(), *MRI);
1503 I.eraseFromParent();
1507bool AMDGPUInstructionSelector::selectIntrinsicCmp(
MachineInstr &
I)
const {
1508 Register Dst =
I.getOperand(0).getReg();
1509 if (isVCC(Dst, *MRI))
1512 LLT DstTy = MRI->getType(Dst);
1518 Register SrcReg =
I.getOperand(2).getReg();
1519 unsigned Size = RBI.getSizeInBits(SrcReg, *MRI, TRI);
1527 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), Dst);
1528 I.eraseFromParent();
1529 return RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI);
1536 MachineInstrBuilder SelectedMI;
1537 MachineOperand &
LHS =
I.getOperand(2);
1538 MachineOperand &
RHS =
I.getOperand(3);
1539 auto [Src0, Src0Mods] = selectVOP3ModsImpl(
LHS.getReg());
1540 auto [Src1, Src1Mods] = selectVOP3ModsImpl(
RHS.getReg());
1542 copyToVGPRIfSrcFolded(Src0, Src0Mods,
LHS, &
I,
true);
1544 copyToVGPRIfSrcFolded(Src1, Src1Mods,
RHS, &
I,
true);
1545 SelectedMI =
BuildMI(*BB, &
I,
DL, TII.get(Opcode), Dst);
1547 SelectedMI.
addImm(Src0Mods);
1548 SelectedMI.
addReg(Src0Reg);
1550 SelectedMI.
addImm(Src1Mods);
1551 SelectedMI.
addReg(Src1Reg);
1557 RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI);
1561 I.eraseFromParent();
1572 if (
MI->getParent() !=
MBB)
1576 if (
MI->getOpcode() == AMDGPU::COPY) {
1577 auto DstRB =
MRI.getRegBankOrNull(
MI->getOperand(0).getReg());
1578 auto SrcRB =
MRI.getRegBankOrNull(
MI->getOperand(1).getReg());
1579 if (DstRB && SrcRB && DstRB->
getID() == AMDGPU::VCCRegBankID &&
1580 SrcRB->getID() == AMDGPU::SGPRRegBankID)
1597bool AMDGPUInstructionSelector::selectBallot(
MachineInstr &
I)
const {
1600 Register DstReg =
I.getOperand(0).getReg();
1601 Register SrcReg =
I.getOperand(2).getReg();
1602 const unsigned BallotSize = MRI->getType(DstReg).getSizeInBits();
1603 const unsigned WaveSize = STI.getWavefrontSize();
1607 if (BallotSize != WaveSize && (BallotSize != 64 || WaveSize != 32))
1610 std::optional<ValueAndVReg> Arg =
1615 if (BallotSize != WaveSize) {
1616 Dst = MRI->createVirtualRegister(TRI.getBoolRC());
1620 const int64_t
Value = Arg->Value.getZExtValue();
1623 unsigned Opcode = WaveSize == 64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
1630 if (!RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI))
1636 if (!RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI))
1640 unsigned AndOpc = WaveSize == 64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
1651 if (BallotSize != WaveSize) {
1652 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1654 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
1661 I.eraseFromParent();
1665bool AMDGPUInstructionSelector::selectRelocConstant(
MachineInstr &
I)
const {
1666 Register DstReg =
I.getOperand(0).getReg();
1667 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
1668 const TargetRegisterClass *DstRC = TRI.getRegClassForSizeOnBank(32, *DstBank);
1669 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
1672 const bool IsVALU = DstBank->
getID() == AMDGPU::VGPRRegBankID;
1674 Module *
M =
MF->getFunction().getParent();
1675 const MDNode *
Metadata =
I.getOperand(2).getMetadata();
1682 TII.get(IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32), DstReg)
1685 I.eraseFromParent();
1689bool AMDGPUInstructionSelector::selectGroupStaticSize(
MachineInstr &
I)
const {
1692 Register DstReg =
I.getOperand(0).getReg();
1693 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
1694 unsigned Mov = DstRB->
getID() == AMDGPU::SGPRRegBankID ?
1695 AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1703 const SIMachineFunctionInfo *MFI =
MF->getInfo<SIMachineFunctionInfo>();
1706 Module *
M =
MF->getFunction().getParent();
1707 const GlobalValue *GV =
1712 I.eraseFromParent();
1716bool AMDGPUInstructionSelector::selectReturnAddress(
MachineInstr &
I)
const {
1721 MachineOperand &Dst =
I.getOperand(0);
1723 unsigned Depth =
I.getOperand(2).getImm();
1725 const TargetRegisterClass *RC
1726 = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
1728 !RBI.constrainGenericRegister(DstReg, *RC, *MRI))
1733 MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction()) {
1736 I.eraseFromParent();
1740 MachineFrameInfo &MFI =
MF.getFrameInfo();
1745 Register ReturnAddrReg = TRI.getReturnAddressReg(
MF);
1747 AMDGPU::SReg_64RegClass,
DL);
1750 I.eraseFromParent();
1754bool AMDGPUInstructionSelector::selectEndCfIntrinsic(
MachineInstr &
MI)
const {
1757 MachineBasicBlock *BB =
MI.getParent();
1758 BuildMI(*BB, &
MI,
MI.getDebugLoc(), TII.get(AMDGPU::SI_END_CF))
1759 .
add(
MI.getOperand(1));
1762 MI.eraseFromParent();
1764 if (!MRI->getRegClassOrNull(
Reg))
1765 MRI->setRegClass(
Reg, TRI.getWaveMaskRegClass());
1769bool AMDGPUInstructionSelector::selectDSOrderedIntrinsic(
1771 MachineBasicBlock *
MBB =
MI.getParent();
1775 unsigned IndexOperand =
MI.getOperand(7).getImm();
1776 bool WaveRelease =
MI.getOperand(8).getImm() != 0;
1777 bool WaveDone =
MI.getOperand(9).getImm() != 0;
1779 if (WaveDone && !WaveRelease) {
1783 Fn,
"ds_ordered_count: wave_done requires wave_release",
DL));
1786 unsigned OrderedCountIndex = IndexOperand & 0x3f;
1787 IndexOperand &= ~0x3f;
1788 unsigned CountDw = 0;
1791 CountDw = (IndexOperand >> 24) & 0xf;
1792 IndexOperand &= ~(0xf << 24);
1794 if (CountDw < 1 || CountDw > 4) {
1797 Fn,
"ds_ordered_count: dword count must be between 1 and 4",
DL));
1805 Fn,
"ds_ordered_count: bad index operand",
DL));
1808 unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1;
1811 unsigned Offset0 = OrderedCountIndex << 2;
1812 unsigned Offset1 = WaveRelease | (WaveDone << 1) | (Instruction << 4);
1815 Offset1 |= (CountDw - 1) << 6;
1818 Offset1 |= ShaderType << 2;
1820 unsigned Offset = Offset0 | (Offset1 << 8);
1828 MachineInstrBuilder
DS =
1829 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::DS_ORDERED_COUNT), DstReg)
1834 if (!RBI.constrainGenericRegister(M0Val, AMDGPU::SReg_32RegClass, *MRI))
1838 MI.eraseFromParent();
1844 case Intrinsic::amdgcn_ds_gws_init:
1845 return AMDGPU::DS_GWS_INIT;
1846 case Intrinsic::amdgcn_ds_gws_barrier:
1847 return AMDGPU::DS_GWS_BARRIER;
1848 case Intrinsic::amdgcn_ds_gws_sema_v:
1849 return AMDGPU::DS_GWS_SEMA_V;
1850 case Intrinsic::amdgcn_ds_gws_sema_br:
1851 return AMDGPU::DS_GWS_SEMA_BR;
1852 case Intrinsic::amdgcn_ds_gws_sema_p:
1853 return AMDGPU::DS_GWS_SEMA_P;
1854 case Intrinsic::amdgcn_ds_gws_sema_release_all:
1855 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
1861bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(
MachineInstr &
MI,
1863 if (!STI.hasGWS() || (IID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
1864 !STI.hasGWSSemaReleaseAll()))
1868 const bool HasVSrc =
MI.getNumOperands() == 3;
1869 assert(HasVSrc ||
MI.getNumOperands() == 2);
1871 Register BaseOffset =
MI.getOperand(HasVSrc ? 2 : 1).getReg();
1872 const RegisterBank *OffsetRB = RBI.getRegBank(BaseOffset, *MRI, TRI);
1873 if (OffsetRB->
getID() != AMDGPU::SGPRRegBankID)
1879 MachineBasicBlock *
MBB =
MI.getParent();
1882 MachineInstr *Readfirstlane =
nullptr;
1887 if (OffsetDef->
getOpcode() == AMDGPU::V_READFIRSTLANE_B32) {
1888 Readfirstlane = OffsetDef;
1893 if (OffsetDef->
getOpcode() == AMDGPU::G_CONSTANT) {
1903 std::tie(BaseOffset, ImmOffset) =
1906 if (Readfirstlane) {
1909 if (!RBI.constrainGenericRegister(BaseOffset, AMDGPU::VGPR_32RegClass, *MRI))
1915 if (!RBI.constrainGenericRegister(BaseOffset,
1916 AMDGPU::SReg_32RegClass, *MRI))
1920 Register M0Base = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1939 if (!RBI.constrainGenericRegister(VSrc, AMDGPU::VGPR_32RegClass, *MRI))
1946 TII.enforceOperandRCAlignment(*MIB, AMDGPU::OpName::data0);
1948 MI.eraseFromParent();
1952bool AMDGPUInstructionSelector::selectDSAppendConsume(
MachineInstr &
MI,
1953 bool IsAppend)
const {
1954 Register PtrBase =
MI.getOperand(2).getReg();
1955 LLT PtrTy = MRI->getType(PtrBase);
1959 std::tie(PtrBase,
Offset) = selectDS1Addr1OffsetImpl(
MI.getOperand(2));
1962 if (!isDSOffsetLegal(PtrBase,
Offset)) {
1963 PtrBase =
MI.getOperand(2).getReg();
1967 MachineBasicBlock *
MBB =
MI.getParent();
1969 const unsigned Opc = IsAppend ? AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
1973 if (!RBI.constrainGenericRegister(PtrBase, AMDGPU::SReg_32RegClass, *MRI))
1980 MI.eraseFromParent();
1984bool AMDGPUInstructionSelector::selectInitWholeWave(
MachineInstr &
MI)
const {
1985 MachineFunction *
MF =
MI.getParent()->getParent();
1986 SIMachineFunctionInfo *MFInfo =
MF->getInfo<SIMachineFunctionInfo>();
1997 TFE = TexFailCtrl & 0x1;
1999 LWE = TexFailCtrl & 0x2;
2002 return TexFailCtrl == 0;
2005bool AMDGPUInstructionSelector::selectImageIntrinsic(
2007 MachineBasicBlock *
MBB =
MI.getParent();
2010 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
2019 const unsigned ArgOffset =
MI.getNumExplicitDefs() + 1;
2023 int NumVDataDwords = -1;
2024 bool IsD16 =
MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16 ||
2025 MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16;
2031 Unorm =
MI.getOperand(ArgOffset + Intr->
UnormIndex).getImm() != 0;
2035 bool IsTexFail =
false;
2037 TFE, LWE, IsTexFail))
2040 const int Flags =
MI.getOperand(ArgOffset + Intr->
NumArgs).getImm();
2041 const bool IsA16 = (
Flags & 1) != 0;
2042 const bool IsG16 = (
Flags & 2) != 0;
2045 if (IsA16 && !STI.hasG16() && !IsG16)
2049 unsigned DMaskLanes = 0;
2051 if (BaseOpcode->
Atomic) {
2052 VDataOut =
MI.getOperand(0).getReg();
2053 VDataIn =
MI.getOperand(2).getReg();
2054 LLT Ty = MRI->getType(VDataIn);
2057 const bool Is64Bit = BaseOpcode->
AtomicX2 ?
2062 assert(
MI.getOperand(3).getReg() == AMDGPU::NoRegister);
2064 DMask = Is64Bit ? 0xf : 0x3;
2065 NumVDataDwords = Is64Bit ? 4 : 2;
2067 DMask = Is64Bit ? 0x3 : 0x1;
2068 NumVDataDwords = Is64Bit ? 2 : 1;
2071 DMask =
MI.getOperand(ArgOffset + Intr->
DMaskIndex).getImm();
2074 if (BaseOpcode->
Store) {
2075 VDataIn =
MI.getOperand(1).getReg();
2076 VDataTy = MRI->getType(VDataIn);
2081 VDataOut =
MI.getOperand(0).getReg();
2082 VDataTy = MRI->getType(VDataOut);
2083 NumVDataDwords = DMaskLanes;
2085 if (IsD16 && !STI.hasUnpackedD16VMem())
2086 NumVDataDwords = (DMaskLanes + 1) / 2;
2091 if (Subtarget->hasG16() && IsG16) {
2092 const AMDGPU::MIMGG16MappingInfo *G16MappingInfo =
2095 IntrOpcode = G16MappingInfo->
G16;
2099 assert((!IsTexFail || DMaskLanes >= 1) &&
"should have legalized this");
2108 int NumVAddrRegs = 0;
2109 int NumVAddrDwords = 0;
2112 MachineOperand &AddrOp =
MI.getOperand(ArgOffset +
I);
2113 if (!AddrOp.
isReg())
2121 NumVAddrDwords += (MRI->getType(Addr).getSizeInBits() + 31) / 32;
2128 NumVAddrRegs != 1 &&
2129 (STI.hasPartialNSAEncoding() ? NumVAddrDwords >= NumVAddrRegs
2130 : NumVAddrDwords == NumVAddrRegs);
2131 if (UseNSA && !STI.hasFeature(AMDGPU::FeatureNSAEncoding)) {
2142 NumVDataDwords, NumVAddrDwords);
2143 }
else if (IsGFX11Plus) {
2145 UseNSA ? AMDGPU::MIMGEncGfx11NSA
2146 : AMDGPU::MIMGEncGfx11Default,
2147 NumVDataDwords, NumVAddrDwords);
2148 }
else if (IsGFX10Plus) {
2150 UseNSA ? AMDGPU::MIMGEncGfx10NSA
2151 : AMDGPU::MIMGEncGfx10Default,
2152 NumVDataDwords, NumVAddrDwords);
2154 if (Subtarget->hasGFX90AInsts()) {
2156 NumVDataDwords, NumVAddrDwords);
2160 <<
"requested image instruction is not supported on this GPU\n");
2167 NumVDataDwords, NumVAddrDwords);
2170 NumVDataDwords, NumVAddrDwords);
2180 const bool Is64 = MRI->getType(VDataOut).getSizeInBits() == 64;
2182 Register TmpReg = MRI->createVirtualRegister(
2183 Is64 ? &AMDGPU::VReg_128RegClass : &AMDGPU::VReg_64RegClass);
2184 unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
2187 if (!MRI->use_empty(VDataOut)) {
2200 for (
int I = 0;
I != NumVAddrRegs; ++
I) {
2201 MachineOperand &SrcOp =
MI.getOperand(ArgOffset + Intr->
VAddrStart +
I);
2202 if (SrcOp.
isReg()) {
2221 STI.hasFeature(AMDGPU::FeatureR128A16) ? -1 : 0);
2223 MIB.
addImm(IsA16 ? -1 : 0);
2225 if (!Subtarget->hasGFX90AInsts()) {
2237 MIB.
addImm(IsD16 ? -1 : 0);
2239 MI.eraseFromParent();
2241 TII.enforceOperandRCAlignment(*MIB, AMDGPU::OpName::vaddr);
2247bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic(
2253 MachineBasicBlock *
MBB =
MI.getParent();
2258 unsigned Offset =
MI.getOperand(6).getImm();
2262 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2263 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
2264 Opc = AMDGPU::DS_BVH_STACK_RTN_B32;
2266 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
2267 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP1_RTN_B32;
2269 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
2270 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP2_RTN_B64;
2282 MI.eraseFromParent();
2286bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
2289 switch (IntrinsicID) {
2290 case Intrinsic::amdgcn_end_cf:
2291 return selectEndCfIntrinsic(
I);
2292 case Intrinsic::amdgcn_ds_ordered_add:
2293 case Intrinsic::amdgcn_ds_ordered_swap:
2294 return selectDSOrderedIntrinsic(
I, IntrinsicID);
2295 case Intrinsic::amdgcn_ds_gws_init:
2296 case Intrinsic::amdgcn_ds_gws_barrier:
2297 case Intrinsic::amdgcn_ds_gws_sema_v:
2298 case Intrinsic::amdgcn_ds_gws_sema_br:
2299 case Intrinsic::amdgcn_ds_gws_sema_p:
2300 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2301 return selectDSGWSIntrinsic(
I, IntrinsicID);
2302 case Intrinsic::amdgcn_ds_append:
2303 return selectDSAppendConsume(
I,
true);
2304 case Intrinsic::amdgcn_ds_consume:
2305 return selectDSAppendConsume(
I,
false);
2306 case Intrinsic::amdgcn_init_whole_wave:
2307 return selectInitWholeWave(
I);
2308 case Intrinsic::amdgcn_raw_buffer_load_lds:
2309 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
2310 case Intrinsic::amdgcn_struct_buffer_load_lds:
2311 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds:
2312 return selectBufferLoadLds(
I);
2317 case Intrinsic::amdgcn_load_to_lds:
2318 case Intrinsic::amdgcn_global_load_lds:
2319 return selectGlobalLoadLds(
I);
2320 case Intrinsic::amdgcn_exp_compr:
2321 if (!STI.hasCompressedExport()) {
2323 F.getContext().diagnose(
2324 DiagnosticInfoUnsupported(
F,
"intrinsic not supported on subtarget",
2329 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2330 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
2331 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
2332 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
2333 return selectDSBvhStackIntrinsic(
I);
2334 case Intrinsic::amdgcn_s_barrier_init:
2335 case Intrinsic::amdgcn_s_barrier_signal_var:
2336 return selectNamedBarrierInit(
I, IntrinsicID);
2337 case Intrinsic::amdgcn_s_barrier_join:
2338 case Intrinsic::amdgcn_s_get_named_barrier_state:
2339 return selectNamedBarrierInst(
I, IntrinsicID);
2340 case Intrinsic::amdgcn_s_get_barrier_state:
2341 return selectSGetBarrierState(
I, IntrinsicID);
2342 case Intrinsic::amdgcn_s_barrier_signal_isfirst:
2343 return selectSBarrierSignalIsfirst(
I, IntrinsicID);
2348bool AMDGPUInstructionSelector::selectG_SELECT(
MachineInstr &
I)
const {
2355 Register DstReg =
I.getOperand(0).getReg();
2356 unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI);
2358 const MachineOperand &CCOp =
I.getOperand(1);
2360 if (!isVCC(CCReg, *MRI)) {
2361 unsigned SelectOpcode =
Size == 64 ? AMDGPU::S_CSELECT_B64 :
2362 AMDGPU::S_CSELECT_B32;
2363 MachineInstr *CopySCC =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
2369 if (!MRI->getRegClassOrNull(CCReg))
2370 MRI->setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, *MRI));
2372 .
add(
I.getOperand(2))
2373 .
add(
I.getOperand(3));
2378 I.eraseFromParent();
2387 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
2389 .
add(
I.getOperand(3))
2391 .
add(
I.getOperand(2))
2392 .
add(
I.getOperand(1));
2395 I.eraseFromParent();
2399bool AMDGPUInstructionSelector::selectG_TRUNC(
MachineInstr &
I)
const {
2400 Register DstReg =
I.getOperand(0).getReg();
2401 Register SrcReg =
I.getOperand(1).getReg();
2402 const LLT DstTy = MRI->getType(DstReg);
2403 const LLT SrcTy = MRI->getType(SrcReg);
2406 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
2407 const RegisterBank *DstRB;
2413 DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
2418 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2423 const TargetRegisterClass *SrcRC =
2424 TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB);
2425 const TargetRegisterClass *DstRC =
2426 TRI.getRegClassForSizeOnBank(DstSize, *DstRB);
2427 if (!SrcRC || !DstRC)
2430 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
2431 !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI)) {
2436 if (DstRC == &AMDGPU::VGPR_16RegClass && SrcSize == 32) {
2437 assert(STI.useRealTrue16Insts());
2441 .
addReg(SrcReg, 0, AMDGPU::lo16);
2442 I.eraseFromParent();
2450 Register LoReg = MRI->createVirtualRegister(DstRC);
2451 Register HiReg = MRI->createVirtualRegister(DstRC);
2453 .
addReg(SrcReg, 0, AMDGPU::sub0);
2455 .
addReg(SrcReg, 0, AMDGPU::sub1);
2457 if (IsVALU && STI.hasSDWA()) {
2460 MachineInstr *MovSDWA =
2461 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg)
2471 Register TmpReg0 = MRI->createVirtualRegister(DstRC);
2472 Register TmpReg1 = MRI->createVirtualRegister(DstRC);
2473 Register ImmReg = MRI->createVirtualRegister(DstRC);
2475 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), TmpReg0)
2485 unsigned MovOpc = IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
2486 unsigned AndOpc = IsVALU ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
2487 unsigned OrOpc = IsVALU ? AMDGPU::V_OR_B32_e64 : AMDGPU::S_OR_B32;
2499 And.setOperandDead(3);
2500 Or.setOperandDead(3);
2504 I.eraseFromParent();
2512 unsigned SubRegIdx = DstSize < 32
2513 ?
static_cast<unsigned>(AMDGPU::sub0)
2514 : TRI.getSubRegFromChannel(0, DstSize / 32);
2515 if (SubRegIdx == AMDGPU::NoSubRegister)
2520 const TargetRegisterClass *SrcWithSubRC
2521 = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
2525 if (SrcWithSubRC != SrcRC) {
2526 if (!RBI.constrainGenericRegister(SrcReg, *SrcWithSubRC, *MRI))
2530 I.getOperand(1).setSubReg(SubRegIdx);
2533 I.setDesc(TII.get(TargetOpcode::COPY));
2540 int SignedMask =
static_cast<int>(Mask);
2541 return SignedMask >= -16 && SignedMask <= 64;
2545const RegisterBank *AMDGPUInstructionSelector::getArtifactRegBank(
2554 return &RBI.getRegBankFromRegClass(*RC, LLT());
2558bool AMDGPUInstructionSelector::selectG_SZA_EXT(
MachineInstr &
I)
const {
2559 bool InReg =
I.getOpcode() == AMDGPU::G_SEXT_INREG;
2560 bool Signed =
I.getOpcode() == AMDGPU::G_SEXT || InReg;
2563 const Register DstReg =
I.getOperand(0).getReg();
2564 const Register SrcReg =
I.getOperand(1).getReg();
2566 const LLT DstTy = MRI->getType(DstReg);
2567 const LLT SrcTy = MRI->getType(SrcReg);
2568 const unsigned SrcSize =
I.getOpcode() == AMDGPU::G_SEXT_INREG ?
2575 const RegisterBank *SrcBank = getArtifactRegBank(SrcReg, *MRI, TRI);
2578 if (
I.getOpcode() == AMDGPU::G_ANYEXT) {
2580 return selectCOPY(
I);
2582 const TargetRegisterClass *SrcRC =
2583 TRI.getRegClassForTypeOnBank(SrcTy, *SrcBank);
2584 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
2585 const TargetRegisterClass *DstRC =
2586 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
2588 Register UndefReg = MRI->createVirtualRegister(SrcRC);
2589 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2595 I.eraseFromParent();
2597 return RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) &&
2598 RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI);
2601 if (SrcBank->
getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
2607 MachineInstr *ExtI =
2611 I.eraseFromParent();
2615 const unsigned BFE =
Signed ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2616 MachineInstr *ExtI =
2621 I.eraseFromParent();
2625 if (SrcBank->
getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
2626 const TargetRegisterClass &SrcRC = InReg && DstSize > 32 ?
2627 AMDGPU::SReg_64RegClass : AMDGPU::SReg_32RegClass;
2628 if (!RBI.constrainGenericRegister(SrcReg, SrcRC, *MRI))
2631 if (
Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
2632 const unsigned SextOpc = SrcSize == 8 ?
2633 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
2636 I.eraseFromParent();
2637 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
2642 if (DstSize > 32 && SrcSize == 32) {
2643 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2644 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2659 I.eraseFromParent();
2660 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass,
2664 const unsigned BFE64 =
Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
2665 const unsigned BFE32 =
Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2668 if (DstSize > 32 && (SrcSize <= 32 || InReg)) {
2670 Register ExtReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
2671 Register UndefReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2672 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2674 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2685 I.eraseFromParent();
2686 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, *MRI);
2701 I.eraseFromParent();
2702 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
2736 if (Shuffle->
getOpcode() != AMDGPU::G_SHUFFLE_VECTOR)
2743 assert(Mask.size() == 2);
2745 if (Mask[0] == 1 && Mask[1] <= 1) {
2753bool AMDGPUInstructionSelector::selectG_FPEXT(
MachineInstr &
I)
const {
2754 if (!Subtarget->hasSALUFloatInsts())
2757 Register Dst =
I.getOperand(0).getReg();
2758 const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
2759 if (DstRB->
getID() != AMDGPU::SGPRRegBankID)
2762 Register Src =
I.getOperand(1).getReg();
2768 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_CVT_HI_F32_F16), Dst)
2770 I.eraseFromParent();
2771 return RBI.constrainGenericRegister(Dst, AMDGPU::SReg_32RegClass, *MRI);
2778bool AMDGPUInstructionSelector::selectG_FNEG(
MachineInstr &
MI)
const {
2791 const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
2792 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2797 MachineInstr *Fabs =
getOpcodeDef(TargetOpcode::G_FABS, Src, *MRI);
2801 if (!RBI.constrainGenericRegister(Src, AMDGPU::SReg_64RegClass, *MRI) ||
2802 !RBI.constrainGenericRegister(Dst, AMDGPU::SReg_64RegClass, *MRI))
2805 MachineBasicBlock *BB =
MI.getParent();
2807 Register LoReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2808 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2809 Register ConstReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2810 Register OpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2812 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2813 .
addReg(Src, 0, AMDGPU::sub0);
2814 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2815 .
addReg(Src, 0, AMDGPU::sub1);
2816 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2820 unsigned Opc = Fabs ? AMDGPU::S_OR_B32 : AMDGPU::S_XOR_B32;
2825 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2830 MI.eraseFromParent();
2835bool AMDGPUInstructionSelector::selectG_FABS(
MachineInstr &
MI)
const {
2837 const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
2838 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2843 MachineBasicBlock *BB =
MI.getParent();
2845 Register LoReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2846 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2847 Register ConstReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2848 Register OpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2850 if (!RBI.constrainGenericRegister(Src, AMDGPU::SReg_64RegClass, *MRI) ||
2851 !RBI.constrainGenericRegister(Dst, AMDGPU::SReg_64RegClass, *MRI))
2854 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2855 .
addReg(Src, 0, AMDGPU::sub0);
2856 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2857 .
addReg(Src, 0, AMDGPU::sub1);
2858 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2863 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_AND_B32), OpReg)
2867 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2873 MI.eraseFromParent();
2878 return MI.getOpcode() == TargetOpcode::G_CONSTANT;
2881void AMDGPUInstructionSelector::getAddrModeInfo(
const MachineInstr &Load,
2884 unsigned OpNo =
Load.getOpcode() == AMDGPU::G_PREFETCH ? 0 : 1;
2885 const MachineInstr *PtrMI =
2886 MRI.getUniqueVRegDef(
Load.getOperand(OpNo).getReg());
2890 if (PtrMI->
getOpcode() != TargetOpcode::G_PTR_ADD)
2895 for (
unsigned i = 1; i != 3; ++i) {
2896 const MachineOperand &GEPOp = PtrMI->
getOperand(i);
2897 const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.
getReg());
2902 assert(GEPInfo.Imm == 0);
2906 const RegisterBank *OpBank = RBI.getRegBank(GEPOp.
getReg(), MRI, TRI);
2907 if (OpBank->
getID() == AMDGPU::SGPRRegBankID)
2908 GEPInfo.SgprParts.push_back(GEPOp.
getReg());
2910 GEPInfo.VgprParts.push_back(GEPOp.
getReg());
2914 getAddrModeInfo(*PtrMI, MRI, AddrInfo);
2917bool AMDGPUInstructionSelector::isSGPR(
Register Reg)
const {
2918 return RBI.getRegBank(
Reg, *MRI, TRI)->getID() == AMDGPU::SGPRRegBankID;
2921bool AMDGPUInstructionSelector::isInstrUniform(
const MachineInstr &
MI)
const {
2922 if (!
MI.hasOneMemOperand())
2925 const MachineMemOperand *MMO = *
MI.memoperands_begin();
2938 if (
MI.getOpcode() == AMDGPU::G_PREFETCH)
2939 return RBI.getRegBank(
MI.getOperand(0).getReg(), *MRI, TRI)->getID() ==
2940 AMDGPU::SGPRRegBankID;
2943 return I &&
I->getMetadata(
"amdgpu.uniform");
2947 for (
const GEPInfo &GEPInfo : AddrInfo) {
2948 if (!GEPInfo.VgprParts.empty())
2954void AMDGPUInstructionSelector::initM0(
MachineInstr &
I)
const {
2955 const LLT PtrTy = MRI->getType(
I.getOperand(1).getReg());
2958 STI.ldsRequiresM0Init()) {
2962 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2967bool AMDGPUInstructionSelector::selectG_LOAD_STORE_ATOMICRMW(
2974 if (
Reg.isPhysical())
2978 const unsigned Opcode =
MI.getOpcode();
2980 if (Opcode == AMDGPU::COPY)
2983 if (Opcode == AMDGPU::G_AND || Opcode == AMDGPU::G_OR ||
2984 Opcode == AMDGPU::G_XOR)
2989 return GI->is(Intrinsic::amdgcn_class);
2991 return Opcode == AMDGPU::G_ICMP || Opcode == AMDGPU::G_FCMP;
2994bool AMDGPUInstructionSelector::selectG_BRCOND(
MachineInstr &
I)
const {
2996 MachineOperand &CondOp =
I.getOperand(0);
3002 const TargetRegisterClass *ConstrainRC;
3009 if (!isVCC(CondReg, *MRI)) {
3013 CondPhysReg = AMDGPU::SCC;
3014 BrOpcode = AMDGPU::S_CBRANCH_SCC1;
3015 ConstrainRC = &AMDGPU::SReg_32RegClass;
3022 const bool Is64 = STI.isWave64();
3023 const unsigned Opcode = Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
3024 const Register Exec = Is64 ? AMDGPU::EXEC : AMDGPU::EXEC_LO;
3026 Register TmpReg = MRI->createVirtualRegister(TRI.getBoolRC());
3027 BuildMI(*BB, &
I,
DL, TII.get(Opcode), TmpReg)
3034 CondPhysReg = TRI.getVCC();
3035 BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
3036 ConstrainRC = TRI.getBoolRC();
3039 if (!MRI->getRegClassOrNull(CondReg))
3040 MRI->setRegClass(CondReg, ConstrainRC);
3042 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CondPhysReg)
3045 .
addMBB(
I.getOperand(1).getMBB());
3047 I.eraseFromParent();
3051bool AMDGPUInstructionSelector::selectG_GLOBAL_VALUE(
3053 Register DstReg =
I.getOperand(0).getReg();
3054 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3055 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3056 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32));
3060 return RBI.constrainGenericRegister(
3061 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, *MRI);
3064bool AMDGPUInstructionSelector::selectG_PTRMASK(
MachineInstr &
I)
const {
3065 Register DstReg =
I.getOperand(0).getReg();
3066 Register SrcReg =
I.getOperand(1).getReg();
3067 Register MaskReg =
I.getOperand(2).getReg();
3068 LLT Ty = MRI->getType(DstReg);
3069 LLT MaskTy = MRI->getType(MaskReg);
3073 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3074 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
3075 const RegisterBank *MaskRB = RBI.getRegBank(MaskReg, *MRI, TRI);
3076 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3082 APInt MaskOnes =
VT->getKnownOnes(MaskReg).zext(64);
3086 const bool CanCopyLow32 = (MaskOnes & MaskLo32) == MaskLo32;
3087 const bool CanCopyHi32 = (MaskOnes & MaskHi32) == MaskHi32;
3090 !CanCopyLow32 && !CanCopyHi32) {
3091 auto MIB =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_AND_B64), DstReg)
3095 I.eraseFromParent();
3099 unsigned NewOpc = IsVGPR ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
3100 const TargetRegisterClass &RegRC
3101 = IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
3103 const TargetRegisterClass *DstRC = TRI.getRegClassForTypeOnBank(Ty, *DstRB);
3104 const TargetRegisterClass *SrcRC = TRI.getRegClassForTypeOnBank(Ty, *SrcRB);
3105 const TargetRegisterClass *MaskRC =
3106 TRI.getRegClassForTypeOnBank(MaskTy, *MaskRB);
3108 if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
3109 !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
3110 !RBI.constrainGenericRegister(MaskReg, *MaskRC, *MRI))
3115 "ptrmask should have been narrowed during legalize");
3117 auto NewOp =
BuildMI(*BB, &
I,
DL, TII.get(NewOpc), DstReg)
3123 I.eraseFromParent();
3127 Register HiReg = MRI->createVirtualRegister(&RegRC);
3128 Register LoReg = MRI->createVirtualRegister(&RegRC);
3131 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), LoReg)
3132 .
addReg(SrcReg, 0, AMDGPU::sub0);
3133 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), HiReg)
3134 .
addReg(SrcReg, 0, AMDGPU::sub1);
3143 Register MaskLo = MRI->createVirtualRegister(&RegRC);
3144 MaskedLo = MRI->createVirtualRegister(&RegRC);
3146 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskLo)
3147 .
addReg(MaskReg, 0, AMDGPU::sub0);
3148 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedLo)
3157 Register MaskHi = MRI->createVirtualRegister(&RegRC);
3158 MaskedHi = MRI->createVirtualRegister(&RegRC);
3160 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskHi)
3161 .
addReg(MaskReg, 0, AMDGPU::sub1);
3162 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedHi)
3167 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
3172 I.eraseFromParent();
3178static std::pair<Register, unsigned>
3185 std::tie(IdxBaseReg,
Offset) =
3187 if (IdxBaseReg == AMDGPU::NoRegister) {
3191 IdxBaseReg = IdxReg;
3198 if (
static_cast<unsigned>(
Offset) >= SubRegs.
size())
3199 return std::pair(IdxReg, SubRegs[0]);
3200 return std::pair(IdxBaseReg, SubRegs[
Offset]);
3203bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT(
3209 LLT DstTy = MRI->getType(DstReg);
3210 LLT SrcTy = MRI->getType(SrcReg);
3212 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3213 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
3214 const RegisterBank *IdxRB = RBI.getRegBank(IdxReg, *MRI, TRI);
3218 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3221 const TargetRegisterClass *SrcRC =
3222 TRI.getRegClassForTypeOnBank(SrcTy, *SrcRB);
3223 const TargetRegisterClass *DstRC =
3224 TRI.getRegClassForTypeOnBank(DstTy, *DstRB);
3225 if (!SrcRC || !DstRC)
3227 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
3228 !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
3229 !RBI.constrainGenericRegister(IdxReg, AMDGPU::SReg_32RegClass, *MRI))
3232 MachineBasicBlock *BB =
MI.getParent();
3240 if (SrcRB->
getID() == AMDGPU::SGPRRegBankID) {
3244 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3247 unsigned Opc = Is64 ? AMDGPU::S_MOVRELS_B64 : AMDGPU::S_MOVRELS_B32;
3251 MI.eraseFromParent();
3258 if (!STI.useVGPRIndexMode()) {
3259 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3261 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::V_MOVRELS_B32_e32), DstReg)
3264 MI.eraseFromParent();
3268 const MCInstrDesc &GPRIDXDesc =
3269 TII.getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*SrcRC),
true);
3275 MI.eraseFromParent();
3280bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT(
3287 LLT VecTy = MRI->getType(DstReg);
3288 LLT ValTy = MRI->getType(ValReg);
3292 const RegisterBank *VecRB = RBI.getRegBank(VecReg, *MRI, TRI);
3293 const RegisterBank *ValRB = RBI.getRegBank(ValReg, *MRI, TRI);
3294 const RegisterBank *IdxRB = RBI.getRegBank(IdxReg, *MRI, TRI);
3300 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3303 const TargetRegisterClass *VecRC =
3304 TRI.getRegClassForTypeOnBank(VecTy, *VecRB);
3305 const TargetRegisterClass *ValRC =
3306 TRI.getRegClassForTypeOnBank(ValTy, *ValRB);
3308 if (!RBI.constrainGenericRegister(VecReg, *VecRC, *MRI) ||
3309 !RBI.constrainGenericRegister(DstReg, *VecRC, *MRI) ||
3310 !RBI.constrainGenericRegister(ValReg, *ValRC, *MRI) ||
3311 !RBI.constrainGenericRegister(IdxReg, AMDGPU::SReg_32RegClass, *MRI))
3314 if (VecRB->
getID() == AMDGPU::VGPRRegBankID && ValSize != 32)
3318 std::tie(IdxReg,
SubReg) =
3321 const bool IndexMode = VecRB->
getID() == AMDGPU::VGPRRegBankID &&
3322 STI.useVGPRIndexMode();
3324 MachineBasicBlock *BB =
MI.getParent();
3328 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3331 const MCInstrDesc &RegWriteOp = TII.getIndirectRegWriteMovRelPseudo(
3332 VecSize, ValSize, VecRB->
getID() == AMDGPU::SGPRRegBankID);
3337 MI.eraseFromParent();
3341 const MCInstrDesc &GPRIDXDesc =
3342 TII.getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*VecRC),
false);
3349 MI.eraseFromParent();
3353bool AMDGPUInstructionSelector::selectBufferLoadLds(
MachineInstr &
MI)
const {
3354 if (!Subtarget->hasVMemToLDSLoad())
3357 unsigned Size =
MI.getOperand(3).getImm();
3360 const bool HasVIndex =
MI.getNumOperands() == 9;
3364 VIndex =
MI.getOperand(4).getReg();
3368 Register VOffset =
MI.getOperand(4 + OpOffset).getReg();
3369 std::optional<ValueAndVReg> MaybeVOffset =
3371 const bool HasVOffset = !MaybeVOffset || MaybeVOffset->Value.getZExtValue();
3377 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_BOTHEN
3378 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_IDXEN
3379 : HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFEN
3380 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFSET;
3383 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_BOTHEN
3384 : AMDGPU::BUFFER_LOAD_USHORT_LDS_IDXEN
3385 : HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFEN
3386 : AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFSET;
3389 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_BOTHEN
3390 : AMDGPU::BUFFER_LOAD_DWORD_LDS_IDXEN
3391 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN
3392 : AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET;
3395 if (!Subtarget->hasLDSLoadB96_B128())
3398 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_BOTHEN
3399 : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_IDXEN
3400 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFEN
3401 : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFSET;
3404 if (!Subtarget->hasLDSLoadB96_B128())
3407 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_BOTHEN
3408 : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_IDXEN
3409 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFEN
3410 : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFSET;
3414 MachineBasicBlock *
MBB =
MI.getParent();
3417 .
add(
MI.getOperand(2));
3421 if (HasVIndex && HasVOffset) {
3422 Register IdxReg = MRI->createVirtualRegister(TRI.getVGPR64Class());
3423 BuildMI(*
MBB, &*MIB,
DL, TII.get(AMDGPU::REG_SEQUENCE), IdxReg)
3430 }
else if (HasVIndex) {
3432 }
else if (HasVOffset) {
3436 MIB.
add(
MI.getOperand(1));
3437 MIB.
add(
MI.getOperand(5 + OpOffset));
3438 MIB.
add(
MI.getOperand(6 + OpOffset));
3440 unsigned Aux =
MI.getOperand(7 + OpOffset).getImm();
3448 MachineMemOperand *LoadMMO = *
MI.memoperands_begin();
3453 MachinePointerInfo StorePtrI = LoadPtrI;
3464 MachineMemOperand *StoreMMO =
3470 MI.eraseFromParent();
3482 if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3488 return Def->getOperand(1).getReg();
3502 if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3510 return Def->getOperand(1).getReg();
3512 if (
VT->signBitIsZero(
Reg))
3513 return matchZeroExtendFromS32(
Reg);
3521AMDGPUInstructionSelector::matchZeroExtendFromS32OrS32(
Register Reg)
const {
3523 : matchZeroExtendFromS32(
Reg);
3529AMDGPUInstructionSelector::matchSignExtendFromS32OrS32(
Register Reg)
const {
3531 : matchSignExtendFromS32(
Reg);
3535AMDGPUInstructionSelector::matchExtendFromS32OrS32(
Register Reg,
3536 bool IsSigned)
const {
3538 return matchSignExtendFromS32OrS32(
Reg);
3540 return matchZeroExtendFromS32OrS32(
Reg);
3550 if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3557 return Def->getOperand(1).getReg();
3562bool AMDGPUInstructionSelector::selectGlobalLoadLds(
MachineInstr &
MI)
const{
3563 if (!Subtarget->hasVMemToLDSLoad())
3567 unsigned Size =
MI.getOperand(3).getImm();
3573 Opc = AMDGPU::GLOBAL_LOAD_LDS_UBYTE;
3576 Opc = AMDGPU::GLOBAL_LOAD_LDS_USHORT;
3579 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORD;
3582 if (!Subtarget->hasLDSLoadB96_B128())
3584 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX3;
3587 if (!Subtarget->hasLDSLoadB96_B128())
3589 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX4;
3593 MachineBasicBlock *
MBB =
MI.getParent();
3596 .
add(
MI.getOperand(2));
3602 if (!isSGPR(Addr)) {
3604 if (isSGPR(AddrDef->Reg)) {
3605 Addr = AddrDef->Reg;
3606 }
else if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
3609 if (isSGPR(SAddr)) {
3610 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
3611 if (
Register Off = matchZeroExtendFromS32(PtrBaseOffset)) {
3622 VOffset = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3634 MIB.
add(
MI.getOperand(4));
3636 unsigned Aux =
MI.getOperand(5).getImm();
3639 MachineMemOperand *LoadMMO = *
MI.memoperands_begin();
3641 LoadPtrI.
Offset =
MI.getOperand(4).getImm();
3642 MachinePointerInfo StorePtrI = LoadPtrI;
3651 MachineMemOperand *StoreMMO =
3653 sizeof(int32_t),
Align(4));
3657 MI.eraseFromParent();
3661bool AMDGPUInstructionSelector::selectBVHIntersectRayIntrinsic(
3663 unsigned OpcodeOpIdx =
3664 MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY ? 1 : 3;
3665 MI.setDesc(TII.get(
MI.getOperand(OpcodeOpIdx).getImm()));
3666 MI.removeOperand(OpcodeOpIdx);
3667 MI.addImplicitDefUseOperands(*
MI.getParent()->getParent());
3673bool AMDGPUInstructionSelector::selectSMFMACIntrin(
MachineInstr &
MI)
const {
3676 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
3677 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_F16_e64;
3679 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
3680 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_F16_e64;
3682 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
3683 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_BF16_e64;
3685 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
3686 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_BF16_e64;
3688 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
3689 Opc = AMDGPU::V_SMFMAC_I32_16X16X64_I8_e64;
3691 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
3692 Opc = AMDGPU::V_SMFMAC_I32_32X32X32_I8_e64;
3694 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
3695 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_BF8_e64;
3697 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
3698 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_FP8_e64;
3700 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
3701 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_BF8_e64;
3703 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
3704 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_FP8_e64;
3706 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
3707 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_BF8_e64;
3709 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
3710 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_FP8_e64;
3712 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
3713 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_BF8_e64;
3715 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
3716 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_FP8_e64;
3718 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:
3719 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_F16_e64;
3721 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:
3722 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_F16_e64;
3724 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:
3725 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF16_e64;
3727 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:
3728 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF16_e64;
3730 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:
3731 Opc = AMDGPU::V_SMFMAC_I32_16X16X128_I8_e64;
3733 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:
3734 Opc = AMDGPU::V_SMFMAC_I32_32X32X64_I8_e64;
3736 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:
3737 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_BF8_BF8_e64;
3739 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:
3740 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_BF8_FP8_e64;
3742 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:
3743 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_FP8_BF8_e64;
3745 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:
3746 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_FP8_FP8_e64;
3748 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:
3749 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_BF8_BF8_e64;
3751 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
3752 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_BF8_FP8_e64;
3754 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
3755 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_FP8_BF8_e64;
3757 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8:
3758 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_FP8_FP8_e64;
3764 auto VDst_In =
MI.getOperand(4);
3766 MI.setDesc(TII.get(
Opc));
3767 MI.removeOperand(4);
3768 MI.removeOperand(1);
3769 MI.addOperand(VDst_In);
3770 MI.addImplicitDefUseOperands(*
MI.getParent()->getParent());
3774bool AMDGPUInstructionSelector::selectPermlaneSwapIntrin(
3776 if (IntrID == Intrinsic::amdgcn_permlane16_swap &&
3777 !Subtarget->hasPermlane16Swap())
3779 if (IntrID == Intrinsic::amdgcn_permlane32_swap &&
3780 !Subtarget->hasPermlane32Swap())
3783 unsigned Opcode = IntrID == Intrinsic::amdgcn_permlane16_swap
3784 ? AMDGPU::V_PERMLANE16_SWAP_B32_e64
3785 : AMDGPU::V_PERMLANE32_SWAP_B32_e64;
3787 MI.removeOperand(2);
3788 MI.setDesc(TII.get(Opcode));
3791 MachineOperand &FI =
MI.getOperand(4);
3797bool AMDGPUInstructionSelector::selectWaveAddress(
MachineInstr &
MI)
const {
3800 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3801 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3802 MachineBasicBlock *
MBB =
MI.getParent();
3806 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_LSHRREV_B32_e64), DstReg)
3807 .
addImm(Subtarget->getWavefrontSizeLog2())
3812 .
addImm(Subtarget->getWavefrontSizeLog2())
3816 const TargetRegisterClass &RC =
3817 IsVALU ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
3818 if (!RBI.constrainGenericRegister(DstReg, RC, *MRI))
3821 MI.eraseFromParent();
3830 unsigned NumOpcodes = 0;
3843 const uint8_t SrcBits[3] = { 0xf0, 0xcc, 0xaa };
3854 for (
unsigned I = 0;
I < Src.size(); ++
I) {
3868 if (Src.size() == 3) {
3875 for (
unsigned I = 0;
I < Src.size(); ++
I) {
3876 if (Src[
I] ==
LHS) {
3886 Bits = SrcBits[Src.size()];
3892 switch (
MI->getOpcode()) {
3893 case TargetOpcode::G_AND:
3894 case TargetOpcode::G_OR:
3895 case TargetOpcode::G_XOR: {
3900 if (!getOperandBits(
LHS, LHSBits) ||
3901 !getOperandBits(
RHS, RHSBits)) {
3903 return std::make_pair(0, 0);
3909 NumOpcodes +=
Op.first;
3910 LHSBits =
Op.second;
3915 NumOpcodes +=
Op.first;
3916 RHSBits =
Op.second;
3921 return std::make_pair(0, 0);
3925 switch (
MI->getOpcode()) {
3926 case TargetOpcode::G_AND:
3927 TTbl = LHSBits & RHSBits;
3929 case TargetOpcode::G_OR:
3930 TTbl = LHSBits | RHSBits;
3932 case TargetOpcode::G_XOR:
3933 TTbl = LHSBits ^ RHSBits;
3939 return std::make_pair(NumOpcodes + 1, TTbl);
3942bool AMDGPUInstructionSelector::selectBITOP3(
MachineInstr &
MI)
const {
3943 if (!Subtarget->hasBitOp3Insts())
3947 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3948 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3954 unsigned NumOpcodes;
3956 std::tie(NumOpcodes, TTbl) =
BitOp3_Op(DstReg, Src, *MRI);
3960 if (NumOpcodes < 2 || Src.empty())
3963 const bool IsB32 = MRI->getType(DstReg) ==
LLT::scalar(32);
3964 if (NumOpcodes == 2 && IsB32) {
3972 }
else if (NumOpcodes < 4) {
3979 unsigned Opc = IsB32 ? AMDGPU::V_BITOP3_B32_e64 : AMDGPU::V_BITOP3_B16_e64;
3980 if (!IsB32 && STI.hasTrue16BitInsts())
3981 Opc = STI.useRealTrue16Insts() ? AMDGPU::V_BITOP3_B16_gfx1250_t16_e64
3982 : AMDGPU::V_BITOP3_B16_gfx1250_fake16_e64;
3983 unsigned CBL = STI.getConstantBusLimit(
Opc);
3984 MachineBasicBlock *
MBB =
MI.getParent();
3987 for (
unsigned I = 0;
I < Src.size(); ++
I) {
3988 const RegisterBank *RB = RBI.getRegBank(Src[
I], *MRI, TRI);
3989 if (RB->
getID() != AMDGPU::SGPRRegBankID)
3995 Register NewReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4006 while (Src.size() < 3)
4007 Src.push_back(Src[0]);
4024 MI.eraseFromParent();
4029bool AMDGPUInstructionSelector::selectStackRestore(
MachineInstr &
MI)
const {
4031 if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, *MRI))
4034 MachineInstr *
DefMI = MRI->getVRegDef(SrcReg);
4036 Subtarget->getTargetLowering()->getStackPointerRegisterToSaveRestore();
4038 MachineBasicBlock *
MBB =
MI.getParent();
4042 WaveAddr = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
4045 .
addImm(Subtarget->getWavefrontSizeLog2())
4052 MI.eraseFromParent();
4058 if (!
I.isPreISelOpcode()) {
4060 return selectCOPY(
I);
4064 switch (
I.getOpcode()) {
4065 case TargetOpcode::G_AND:
4066 case TargetOpcode::G_OR:
4067 case TargetOpcode::G_XOR:
4068 if (selectBITOP3(
I))
4072 return selectG_AND_OR_XOR(
I);
4073 case TargetOpcode::G_ADD:
4074 case TargetOpcode::G_SUB:
4075 case TargetOpcode::G_PTR_ADD:
4078 return selectG_ADD_SUB(
I);
4079 case TargetOpcode::G_UADDO:
4080 case TargetOpcode::G_USUBO:
4081 case TargetOpcode::G_UADDE:
4082 case TargetOpcode::G_USUBE:
4083 return selectG_UADDO_USUBO_UADDE_USUBE(
I);
4084 case AMDGPU::G_AMDGPU_MAD_U64_U32:
4085 case AMDGPU::G_AMDGPU_MAD_I64_I32:
4086 return selectG_AMDGPU_MAD_64_32(
I);
4087 case TargetOpcode::G_INTTOPTR:
4088 case TargetOpcode::G_BITCAST:
4089 case TargetOpcode::G_PTRTOINT:
4090 case TargetOpcode::G_FREEZE:
4091 return selectCOPY(
I);
4092 case TargetOpcode::G_FNEG:
4095 return selectG_FNEG(
I);
4096 case TargetOpcode::G_FABS:
4099 return selectG_FABS(
I);
4100 case TargetOpcode::G_EXTRACT:
4101 return selectG_EXTRACT(
I);
4102 case TargetOpcode::G_MERGE_VALUES:
4103 case TargetOpcode::G_CONCAT_VECTORS:
4104 return selectG_MERGE_VALUES(
I);
4105 case TargetOpcode::G_UNMERGE_VALUES:
4106 return selectG_UNMERGE_VALUES(
I);
4107 case TargetOpcode::G_BUILD_VECTOR:
4108 case TargetOpcode::G_BUILD_VECTOR_TRUNC:
4109 return selectG_BUILD_VECTOR(
I);
4110 case TargetOpcode::G_IMPLICIT_DEF:
4111 return selectG_IMPLICIT_DEF(
I);
4112 case TargetOpcode::G_INSERT:
4113 return selectG_INSERT(
I);
4114 case TargetOpcode::G_INTRINSIC:
4115 case TargetOpcode::G_INTRINSIC_CONVERGENT:
4116 return selectG_INTRINSIC(
I);
4117 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
4118 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
4119 return selectG_INTRINSIC_W_SIDE_EFFECTS(
I);
4120 case TargetOpcode::G_ICMP:
4121 case TargetOpcode::G_FCMP:
4122 if (selectG_ICMP_or_FCMP(
I))
4125 case TargetOpcode::G_LOAD:
4126 case TargetOpcode::G_ZEXTLOAD:
4127 case TargetOpcode::G_SEXTLOAD:
4128 case TargetOpcode::G_STORE:
4129 case TargetOpcode::G_ATOMIC_CMPXCHG:
4130 case TargetOpcode::G_ATOMICRMW_XCHG:
4131 case TargetOpcode::G_ATOMICRMW_ADD:
4132 case TargetOpcode::G_ATOMICRMW_SUB:
4133 case TargetOpcode::G_ATOMICRMW_AND:
4134 case TargetOpcode::G_ATOMICRMW_OR:
4135 case TargetOpcode::G_ATOMICRMW_XOR:
4136 case TargetOpcode::G_ATOMICRMW_MIN:
4137 case TargetOpcode::G_ATOMICRMW_MAX:
4138 case TargetOpcode::G_ATOMICRMW_UMIN:
4139 case TargetOpcode::G_ATOMICRMW_UMAX:
4140 case TargetOpcode::G_ATOMICRMW_UINC_WRAP:
4141 case TargetOpcode::G_ATOMICRMW_UDEC_WRAP:
4142 case TargetOpcode::G_ATOMICRMW_FADD:
4143 case TargetOpcode::G_ATOMICRMW_FMIN:
4144 case TargetOpcode::G_ATOMICRMW_FMAX:
4145 return selectG_LOAD_STORE_ATOMICRMW(
I);
4146 case TargetOpcode::G_SELECT:
4147 return selectG_SELECT(
I);
4148 case TargetOpcode::G_TRUNC:
4149 return selectG_TRUNC(
I);
4150 case TargetOpcode::G_SEXT:
4151 case TargetOpcode::G_ZEXT:
4152 case TargetOpcode::G_ANYEXT:
4153 case TargetOpcode::G_SEXT_INREG:
4157 if (MRI->getType(
I.getOperand(1).getReg()) !=
LLT::scalar(1) &&
4160 return selectG_SZA_EXT(
I);
4161 case TargetOpcode::G_FPEXT:
4162 if (selectG_FPEXT(
I))
4165 case TargetOpcode::G_BRCOND:
4166 return selectG_BRCOND(
I);
4167 case TargetOpcode::G_GLOBAL_VALUE:
4168 return selectG_GLOBAL_VALUE(
I);
4169 case TargetOpcode::G_PTRMASK:
4170 return selectG_PTRMASK(
I);
4171 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4172 return selectG_EXTRACT_VECTOR_ELT(
I);
4173 case TargetOpcode::G_INSERT_VECTOR_ELT:
4174 return selectG_INSERT_VECTOR_ELT(
I);
4175 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
4176 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
4177 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET:
4178 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
4179 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
4182 assert(Intr &&
"not an image intrinsic with image pseudo");
4183 return selectImageIntrinsic(
I, Intr);
4185 case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY:
4186 case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
4187 case AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY:
4188 return selectBVHIntersectRayIntrinsic(
I);
4189 case AMDGPU::G_SBFX:
4190 case AMDGPU::G_UBFX:
4191 return selectG_SBFX_UBFX(
I);
4192 case AMDGPU::G_SI_CALL:
4193 I.setDesc(TII.get(AMDGPU::SI_CALL));
4195 case AMDGPU::G_AMDGPU_WAVE_ADDRESS:
4196 return selectWaveAddress(
I);
4197 case AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_RETURN: {
4198 I.setDesc(TII.get(AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN));
4201 case AMDGPU::G_STACKRESTORE:
4202 return selectStackRestore(
I);
4204 return selectPHI(
I);
4205 case AMDGPU::G_AMDGPU_COPY_SCC_VCC:
4206 return selectCOPY_SCC_VCC(
I);
4207 case AMDGPU::G_AMDGPU_COPY_VCC_SCC:
4208 return selectCOPY_VCC_SCC(
I);
4209 case AMDGPU::G_AMDGPU_READANYLANE:
4210 return selectReadAnyLane(
I);
4211 case TargetOpcode::G_CONSTANT:
4212 case TargetOpcode::G_FCONSTANT:
4220AMDGPUInstructionSelector::selectVCSRC(
MachineOperand &Root)
const {
4227std::pair<Register, unsigned> AMDGPUInstructionSelector::selectVOP3ModsImpl(
4228 Register Src,
bool IsCanonicalizing,
bool AllowAbs,
bool OpSel)
const {
4232 if (
MI->getOpcode() == AMDGPU::G_FNEG) {
4233 Src =
MI->getOperand(1).getReg();
4236 }
else if (
MI->getOpcode() == AMDGPU::G_FSUB && IsCanonicalizing) {
4241 if (
LHS &&
LHS->isZero()) {
4243 Src =
MI->getOperand(2).getReg();
4247 if (AllowAbs &&
MI->getOpcode() == AMDGPU::G_FABS) {
4248 Src =
MI->getOperand(1).getReg();
4255 return std::pair(Src, Mods);
4258Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
4260 bool ForceVGPR)
const {
4261 if ((Mods != 0 || ForceVGPR) &&
4262 RBI.getRegBank(Src, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID) {
4269 TII.
get(AMDGPU::COPY), VGPRSrc)
4281AMDGPUInstructionSelector::selectVSRC0(
MachineOperand &Root)
const {
4283 [=](MachineInstrBuilder &MIB) { MIB.
add(Root); }
4288AMDGPUInstructionSelector::selectVOP3Mods0(
MachineOperand &Root)
const {
4291 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
4294 [=](MachineInstrBuilder &MIB) {
4295 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4297 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
4298 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); },
4299 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
4304AMDGPUInstructionSelector::selectVOP3BMods0(
MachineOperand &Root)
const {
4307 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
4312 [=](MachineInstrBuilder &MIB) {
4313 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4315 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
4316 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); },
4317 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
4322AMDGPUInstructionSelector::selectVOP3OMods(
MachineOperand &Root)
const {
4324 [=](MachineInstrBuilder &MIB) { MIB.
add(Root); },
4325 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); },
4326 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
4331AMDGPUInstructionSelector::selectVOP3Mods(
MachineOperand &Root)
const {
4334 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
4337 [=](MachineInstrBuilder &MIB) {
4338 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4340 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4345AMDGPUInstructionSelector::selectVOP3ModsNonCanonicalizing(
4349 std::tie(Src, Mods) =
4350 selectVOP3ModsImpl(Root.
getReg(),
false);
4353 [=](MachineInstrBuilder &MIB) {
4354 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4356 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4361AMDGPUInstructionSelector::selectVOP3BMods(
MachineOperand &Root)
const {
4364 std::tie(Src, Mods) =
4365 selectVOP3ModsImpl(Root.
getReg(),
true,
4369 [=](MachineInstrBuilder &MIB) {
4370 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4372 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4377AMDGPUInstructionSelector::selectVOP3NoMods(
MachineOperand &Root)
const {
4380 if (
Def->getOpcode() == AMDGPU::G_FNEG ||
Def->getOpcode() == AMDGPU::G_FABS)
4383 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
4408 if (
MI->getOpcode() != AMDGPU::G_TRUNC)
4411 unsigned DstSize =
MRI.getType(
MI->getOperand(0).getReg()).getSizeInBits();
4412 unsigned SrcSize =
MRI.getType(
MI->getOperand(1).getReg()).getSizeInBits();
4413 return DstSize * 2 == SrcSize;
4419 if (
MI->getOpcode() != AMDGPU::G_LSHR)
4423 std::optional<ValueAndVReg> ShiftAmt;
4426 unsigned SrcSize =
MRI.getType(
MI->getOperand(1).getReg()).getSizeInBits();
4427 unsigned Shift = ShiftAmt->Value.getZExtValue();
4428 return Shift * 2 == SrcSize;
4436 if (
MI->getOpcode() != AMDGPU::G_SHL)
4440 std::optional<ValueAndVReg> ShiftAmt;
4443 unsigned SrcSize =
MRI.getType(
MI->getOperand(1).getReg()).getSizeInBits();
4444 unsigned Shift = ShiftAmt->Value.getZExtValue();
4445 return Shift * 2 == SrcSize;
4453 if (
MI->getOpcode() != AMDGPU::G_UNMERGE_VALUES)
4455 return MI->getNumOperands() == 3 &&
MI->getOperand(0).isDef() &&
4456 MI->getOperand(1).isDef() && !
MI->getOperand(2).isDef();
4626static std::optional<std::pair<Register, SrcStatus>>
4631 unsigned Opc =
MI->getOpcode();
4635 case AMDGPU::G_BITCAST:
4636 return std::optional<std::pair<Register, SrcStatus>>(
4637 {
MI->getOperand(1).getReg(), Curr.second});
4639 if (
MI->getOperand(1).getReg().isPhysical())
4640 return std::nullopt;
4641 return std::optional<std::pair<Register, SrcStatus>>(
4642 {
MI->getOperand(1).getReg(), Curr.second});
4643 case AMDGPU::G_FNEG: {
4646 return std::nullopt;
4647 return std::optional<std::pair<Register, SrcStatus>>(
4648 {
MI->getOperand(1).getReg(), Stat});
4655 switch (Curr.second) {
4658 return std::optional<std::pair<Register, SrcStatus>>(
4661 if (Curr.first ==
MI->getOperand(0).getReg())
4662 return std::optional<std::pair<Register, SrcStatus>>(
4664 return std::optional<std::pair<Register, SrcStatus>>(
4676 return std::optional<std::pair<Register, SrcStatus>>(
4680 if (Curr.first ==
MI->getOperand(0).getReg())
4681 return std::optional<std::pair<Register, SrcStatus>>(
4683 return std::optional<std::pair<Register, SrcStatus>>(
4689 return std::optional<std::pair<Register, SrcStatus>>(
4694 return std::optional<std::pair<Register, SrcStatus>>(
4699 return std::optional<std::pair<Register, SrcStatus>>(
4704 return std::optional<std::pair<Register, SrcStatus>>(
4710 return std::nullopt;
4720 bool HasNeg =
false;
4722 bool HasOpsel =
true;
4727 unsigned Opc =
MI->getOpcode();
4729 if (
Opc < TargetOpcode::GENERIC_OP_END) {
4732 }
else if (
Opc == TargetOpcode::G_INTRINSIC) {
4735 if (IntrinsicID == Intrinsic::amdgcn_fdot2)
4759 while (
Depth <= MaxDepth && Curr.has_value()) {
4762 Statlist.push_back(Curr.value());
4769static std::pair<Register, SrcStatus>
4776 while (
Depth <= MaxDepth && Curr.has_value()) {
4782 LastSameOrNeg = Curr.value();
4787 return LastSameOrNeg;
4792 unsigned Width1 =
MRI.getType(Reg1).getSizeInBits();
4793 unsigned Width2 =
MRI.getType(Reg2).getSizeInBits();
4794 return Width1 == Width2;
4830 IsHalfState(HiStat);
4833std::pair<Register, unsigned> AMDGPUInstructionSelector::selectVOP3PModsImpl(
4839 return {RootReg, Mods};
4842 SearchOptions SO(RootReg, MRI);
4853 MachineInstr *
MI = MRI.getVRegDef(Stat.first);
4855 if (
MI->getOpcode() != AMDGPU::G_BUILD_VECTOR ||
MI->getNumOperands() != 3 ||
4856 (IsDOT && Subtarget->hasDOTOpSelHazard())) {
4858 return {Stat.first, Mods};
4864 if (StatlistHi.
empty()) {
4866 return {Stat.first, Mods};
4872 if (StatlistLo.
empty()) {
4874 return {Stat.first, Mods};
4877 for (
int I = StatlistHi.
size() - 1;
I >= 0;
I--) {
4878 for (
int J = StatlistLo.
size() - 1; J >= 0; J--) {
4879 if (StatlistHi[
I].first == StatlistLo[J].first &&
4881 StatlistHi[
I].first, RootReg, TII, MRI))
4882 return {StatlistHi[
I].first,
4883 updateMods(StatlistHi[
I].second, StatlistLo[J].second, Mods)};
4889 return {Stat.first, Mods};
4899 return RB->
getID() == RBNo;
4916 if (
checkRB(RootReg, AMDGPU::SGPRRegBankID, RBI,
MRI,
TRI) ||
4921 if (
MI->getOpcode() == AMDGPU::COPY && NewReg ==
MI->getOperand(1).getReg()) {
4927 Register DstReg =
MRI.cloneVirtualRegister(RootReg);
4930 BuildMI(*BB,
MI,
MI->getDebugLoc(),
TII.get(AMDGPU::COPY), DstReg)
4938AMDGPUInstructionSelector::selectVOP3PRetHelper(
MachineOperand &Root,
4943 std::tie(
Reg, Mods) = selectVOP3PModsImpl(Root.
getReg(), MRI, IsDOT);
4947 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
4948 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4953AMDGPUInstructionSelector::selectVOP3PMods(
MachineOperand &Root)
const {
4955 return selectVOP3PRetHelper(Root);
4959AMDGPUInstructionSelector::selectVOP3PModsDOT(
MachineOperand &Root)
const {
4961 return selectVOP3PRetHelper(Root,
true);
4965AMDGPUInstructionSelector::selectWMMAOpSelVOP3PMods(
4968 "expected i1 value");
4974 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4982 switch (Elts.
size()) {
4984 DstRegClass = &AMDGPU::VReg_256RegClass;
4987 DstRegClass = &AMDGPU::VReg_128RegClass;
4990 DstRegClass = &AMDGPU::VReg_64RegClass;
4997 auto MIB =
B.buildInstr(AMDGPU::REG_SEQUENCE)
4998 .addDef(
MRI.createVirtualRegister(DstRegClass));
4999 for (
unsigned i = 0; i < Elts.
size(); ++i) {
5010 if (ModOpcode == TargetOpcode::G_FNEG) {
5014 for (
auto El : Elts) {
5020 if (Elts.size() != NegAbsElts.
size()) {
5029 assert(ModOpcode == TargetOpcode::G_FABS);
5037AMDGPUInstructionSelector::selectWMMAModsF32NegAbs(
MachineOperand &Root)
const {
5043 assert(BV->getNumSources() > 0);
5045 MachineInstr *ElF32 = MRI->getVRegDef(BV->getSourceReg(0));
5046 unsigned ModOpcode = (ElF32->
getOpcode() == AMDGPU::G_FNEG)
5049 for (
unsigned i = 0; i < BV->getNumSources(); ++i) {
5050 ElF32 = MRI->getVRegDef(BV->getSourceReg(i));
5057 if (BV->getNumSources() == EltsF32.
size()) {
5063 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5064 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }}};
5068AMDGPUInstructionSelector::selectWMMAModsF16Neg(
MachineOperand &Root)
const {
5074 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
5082 if (CV->getNumSources() == EltsV2F16.
size()) {
5089 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5090 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }}};
5094AMDGPUInstructionSelector::selectWMMAModsF16NegAbs(
MachineOperand &Root)
const {
5100 assert(CV->getNumSources() > 0);
5101 MachineInstr *ElV2F16 = MRI->getVRegDef(CV->getSourceReg(0));
5103 unsigned ModOpcode = (ElV2F16->
getOpcode() == AMDGPU::G_FNEG)
5107 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
5108 ElV2F16 = MRI->getVRegDef(CV->getSourceReg(i));
5115 if (CV->getNumSources() == EltsV2F16.
size()) {
5122 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5123 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }}};
5127AMDGPUInstructionSelector::selectWMMAVISrc(
MachineOperand &Root)
const {
5128 std::optional<FPValueAndVReg> FPValReg;
5130 if (TII.isInlineConstant(FPValReg->Value)) {
5131 return {{[=](MachineInstrBuilder &MIB) {
5132 MIB.
addImm(FPValReg->Value.bitcastToAPInt().getSExtValue());
5142 if (TII.isInlineConstant(ICst)) {
5152AMDGPUInstructionSelector::selectSWMMACIndex8(
MachineOperand &Root)
const {
5158 std::optional<ValueAndVReg> ShiftAmt;
5160 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
5161 ShiftAmt->Value.getZExtValue() % 8 == 0) {
5162 Key = ShiftAmt->Value.getZExtValue() / 8;
5167 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5168 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Key); }
5173AMDGPUInstructionSelector::selectSWMMACIndex16(
MachineOperand &Root)
const {
5180 std::optional<ValueAndVReg> ShiftAmt;
5182 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
5183 ShiftAmt->Value.getZExtValue() == 16) {
5189 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5190 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Key); }
5195AMDGPUInstructionSelector::selectSWMMACIndex32(
MachineOperand &Root)
const {
5202 S32 = matchAnyExtendFromS32(Src);
5206 if (
Def->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {
5211 Src =
Def->getOperand(2).getReg();
5218 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5219 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Key); }
5224AMDGPUInstructionSelector::selectVOP3OpSelMods(
MachineOperand &Root)
const {
5227 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
5231 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5232 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
5238AMDGPUInstructionSelector::selectVINTERPMods(
MachineOperand &Root)
const {
5241 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
5247 [=](MachineInstrBuilder &MIB) {
5249 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
5251 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
5256AMDGPUInstructionSelector::selectVINTERPModsHi(
MachineOperand &Root)
const {
5259 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
5265 [=](MachineInstrBuilder &MIB) {
5267 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
5269 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
5276bool AMDGPUInstructionSelector::selectScaleOffset(
MachineOperand &Root,
5278 bool IsSigned)
const {
5279 if (!Subtarget->hasScaleOffset())
5283 MachineMemOperand *MMO = *
MI.memoperands_begin();
5295 OffsetReg =
Def->Reg;
5310 m_BinOp(IsSigned ? AMDGPU::S_MUL_I64_I32_PSEUDO : AMDGPU::S_MUL_U64,
5314 (
Mul->getOpcode() == (IsSigned ? AMDGPU::G_AMDGPU_MAD_I64_I32
5315 : AMDGPU::G_AMDGPU_MAD_U64_U32) ||
5316 (IsSigned &&
Mul->getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32 &&
5317 VT->signBitIsZero(
Mul->getOperand(2).getReg()))) &&
5330bool AMDGPUInstructionSelector::selectSmrdOffset(
MachineOperand &Root,
5334 bool *ScaleOffset)
const {
5336 MachineBasicBlock *
MBB =
MI->getParent();
5341 getAddrModeInfo(*
MI, *MRI, AddrInfo);
5343 if (AddrInfo.
empty())
5346 const GEPInfo &GEPI = AddrInfo[0];
5347 std::optional<int64_t> EncodedImm;
5350 *ScaleOffset =
false;
5355 if (GEPI.SgprParts.size() == 1 && GEPI.Imm != 0 && EncodedImm &&
5356 AddrInfo.
size() > 1) {
5357 const GEPInfo &GEPI2 = AddrInfo[1];
5358 if (GEPI2.SgprParts.size() == 2 && GEPI2.Imm == 0) {
5359 Register OffsetReg = GEPI2.SgprParts[1];
5362 selectScaleOffset(Root, OffsetReg,
false );
5363 OffsetReg = matchZeroExtendFromS32OrS32(OffsetReg);
5365 Base = GEPI2.SgprParts[0];
5366 *SOffset = OffsetReg;
5375 auto SKnown =
VT->getKnownBits(*SOffset);
5376 if (*
Offset + SKnown.getMinValue().getSExtValue() < 0)
5388 if (
Offset && GEPI.SgprParts.size() == 1 && EncodedImm) {
5389 Base = GEPI.SgprParts[0];
5395 if (SOffset && GEPI.SgprParts.size() == 1 &&
isUInt<32>(GEPI.Imm) &&
5401 Base = GEPI.SgprParts[0];
5402 *SOffset = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5403 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), *SOffset)
5408 if (SOffset && GEPI.SgprParts.size() && GEPI.Imm == 0) {
5409 Register OffsetReg = GEPI.SgprParts[1];
5411 *ScaleOffset = selectScaleOffset(Root, OffsetReg,
false );
5412 OffsetReg = matchZeroExtendFromS32OrS32(OffsetReg);
5414 Base = GEPI.SgprParts[0];
5415 *SOffset = OffsetReg;
5424AMDGPUInstructionSelector::selectSmrdImm(
MachineOperand &Root)
const {
5427 if (!selectSmrdOffset(Root,
Base,
nullptr, &
Offset,
5429 return std::nullopt;
5431 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(
Base); },
5432 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Offset); }}};
5436AMDGPUInstructionSelector::selectSmrdImm32(
MachineOperand &Root)
const {
5438 getAddrModeInfo(*Root.
getParent(), *MRI, AddrInfo);
5440 if (AddrInfo.
empty() || AddrInfo[0].SgprParts.size() != 1)
5441 return std::nullopt;
5443 const GEPInfo &GEPInfo = AddrInfo[0];
5444 Register PtrReg = GEPInfo.SgprParts[0];
5445 std::optional<int64_t> EncodedImm =
5448 return std::nullopt;
5451 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrReg); },
5452 [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedImm); }
5457AMDGPUInstructionSelector::selectSmrdSgpr(
MachineOperand &Root)
const {
5460 if (!selectSmrdOffset(Root,
Base, &SOffset,
nullptr,
5462 return std::nullopt;
5465 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(
Base); },
5466 [=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); },
5467 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }}};
5471AMDGPUInstructionSelector::selectSmrdSgprImm(
MachineOperand &Root)
const {
5475 if (!selectSmrdOffset(Root,
Base, &SOffset, &
Offset, &ScaleOffset))
5476 return std::nullopt;
5479 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(
Base); },
5480 [=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); },
5482 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }}};
5485std::pair<Register, int>
5486AMDGPUInstructionSelector::selectFlatOffsetImpl(
MachineOperand &Root,
5487 uint64_t FlatVariant)
const {
5492 if (!STI.hasFlatInstOffsets())
5496 int64_t ConstOffset;
5498 std::tie(PtrBase, ConstOffset, IsInBounds) =
5499 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
5505 if (ConstOffset == 0 ||
5507 !isFlatScratchBaseLegal(Root.
getReg())) ||
5511 unsigned AddrSpace = (*
MI->memoperands_begin())->getAddrSpace();
5512 if (!TII.isLegalFLATOffset(ConstOffset, AddrSpace, FlatVariant))
5515 return std::pair(PtrBase, ConstOffset);
5519AMDGPUInstructionSelector::selectFlatOffset(
MachineOperand &Root)
const {
5523 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrWithOffset.first); },
5524 [=](MachineInstrBuilder &MIB) { MIB.
addImm(PtrWithOffset.second); },
5529AMDGPUInstructionSelector::selectGlobalOffset(
MachineOperand &Root)
const {
5533 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrWithOffset.first); },
5534 [=](MachineInstrBuilder &MIB) { MIB.
addImm(PtrWithOffset.second); },
5539AMDGPUInstructionSelector::selectScratchOffset(
MachineOperand &Root)
const {
5543 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrWithOffset.first); },
5544 [=](MachineInstrBuilder &MIB) { MIB.
addImm(PtrWithOffset.second); },
5550AMDGPUInstructionSelector::selectGlobalSAddr(
MachineOperand &Root,
5552 bool NeedIOffset)
const {
5555 int64_t ConstOffset;
5556 int64_t ImmOffset = 0;
5560 std::tie(PtrBase, ConstOffset, std::ignore) =
5561 getPtrBaseWithConstantOffset(Addr, *MRI);
5563 if (ConstOffset != 0) {
5568 ImmOffset = ConstOffset;
5571 if (isSGPR(PtrBaseDef->Reg)) {
5572 if (ConstOffset > 0) {
5578 int64_t SplitImmOffset = 0, RemainderOffset = ConstOffset;
5580 std::tie(SplitImmOffset, RemainderOffset) =
5585 if (Subtarget->hasSignedGVSOffset() ?
isInt<32>(RemainderOffset)
5588 MachineBasicBlock *
MBB =
MI->getParent();
5590 MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
5592 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
5594 .
addImm(RemainderOffset);
5598 [=](MachineInstrBuilder &MIB) {
5601 [=](MachineInstrBuilder &MIB) {
5604 [=](MachineInstrBuilder &MIB) { MIB.
addImm(SplitImmOffset); },
5605 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); },
5608 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrBase); },
5609 [=](MachineInstrBuilder &MIB) {
5612 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); },
5622 unsigned NumLiterals =
5623 !TII.isInlineConstant(APInt(32,
Lo_32(ConstOffset))) +
5624 !TII.isInlineConstant(APInt(32,
Hi_32(ConstOffset)));
5625 if (STI.getConstantBusLimit(AMDGPU::V_ADD_U32_e64) > NumLiterals)
5626 return std::nullopt;
5633 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
5638 if (isSGPR(SAddr)) {
5639 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
5643 bool ScaleOffset = selectScaleOffset(Root, PtrBaseOffset,
5644 Subtarget->hasSignedGVSOffset());
5645 if (
Register VOffset = matchExtendFromS32OrS32(
5646 PtrBaseOffset, Subtarget->hasSignedGVSOffset())) {
5648 return {{[=](MachineInstrBuilder &MIB) {
5651 [=](MachineInstrBuilder &MIB) {
5654 [=](MachineInstrBuilder &MIB) {
5657 [=](MachineInstrBuilder &MIB) {
5661 return {{[=](MachineInstrBuilder &MIB) {
5664 [=](MachineInstrBuilder &MIB) {
5667 [=](MachineInstrBuilder &MIB) {
5677 if (AddrDef->MI->getOpcode() == AMDGPU::G_IMPLICIT_DEF ||
5678 AddrDef->MI->getOpcode() == AMDGPU::G_CONSTANT || !isSGPR(AddrDef->Reg))
5679 return std::nullopt;
5684 MachineBasicBlock *
MBB =
MI->getParent();
5685 Register VOffset = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
5687 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32), VOffset)
5692 [=](MachineInstrBuilder &MIB) { MIB.
addReg(AddrDef->Reg); },
5693 [=](MachineInstrBuilder &MIB) { MIB.
addReg(VOffset); },
5694 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); },
5695 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); }
5698 [=](MachineInstrBuilder &MIB) { MIB.
addReg(AddrDef->Reg); },
5699 [=](MachineInstrBuilder &MIB) { MIB.
addReg(VOffset); },
5700 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); }
5705AMDGPUInstructionSelector::selectGlobalSAddr(
MachineOperand &Root)
const {
5706 return selectGlobalSAddr(Root, 0);
5710AMDGPUInstructionSelector::selectGlobalSAddrCPol(
MachineOperand &Root)
const {
5716 return selectGlobalSAddr(Root, PassedCPol);
5720AMDGPUInstructionSelector::selectGlobalSAddrCPolM0(
MachineOperand &Root)
const {
5726 return selectGlobalSAddr(Root, PassedCPol);
5730AMDGPUInstructionSelector::selectGlobalSAddrGLC(
MachineOperand &Root)
const {
5735AMDGPUInstructionSelector::selectGlobalSAddrNoIOffset(
5742 return selectGlobalSAddr(Root, PassedCPol,
false);
5746AMDGPUInstructionSelector::selectGlobalSAddrNoIOffsetM0(
5753 return selectGlobalSAddr(Root, PassedCPol,
false);
5757AMDGPUInstructionSelector::selectScratchSAddr(
MachineOperand &Root)
const {
5760 int64_t ConstOffset;
5761 int64_t ImmOffset = 0;
5765 std::tie(PtrBase, ConstOffset, std::ignore) =
5766 getPtrBaseWithConstantOffset(Addr, *MRI);
5768 if (ConstOffset != 0 && isFlatScratchBaseLegal(Addr) &&
5772 ImmOffset = ConstOffset;
5776 if (AddrDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
5777 int FI = AddrDef->MI->getOperand(1).
getIndex();
5780 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); }
5786 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
5787 Register LHS = AddrDef->MI->getOperand(1).getReg();
5788 Register RHS = AddrDef->MI->getOperand(2).getReg();
5792 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX &&
5793 isSGPR(RHSDef->Reg)) {
5794 int FI = LHSDef->MI->getOperand(1).getIndex();
5798 SAddr = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5800 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_I32), SAddr)
5808 return std::nullopt;
5811 [=](MachineInstrBuilder &MIB) { MIB.
addReg(SAddr); },
5812 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); }
5817bool AMDGPUInstructionSelector::checkFlatScratchSVSSwizzleBug(
5819 if (!Subtarget->hasFlatScratchSVSSwizzleBug())
5825 auto VKnown =
VT->getKnownBits(VAddr);
5828 uint64_t VMax = VKnown.getMaxValue().getZExtValue();
5829 uint64_t
SMax = SKnown.getMaxValue().getZExtValue();
5830 return (VMax & 3) + (
SMax & 3) >= 4;
5834AMDGPUInstructionSelector::selectScratchSVAddr(
MachineOperand &Root)
const {
5837 int64_t ConstOffset;
5838 int64_t ImmOffset = 0;
5842 std::tie(PtrBase, ConstOffset, std::ignore) =
5843 getPtrBaseWithConstantOffset(Addr, *MRI);
5846 if (ConstOffset != 0 &&
5850 ImmOffset = ConstOffset;
5854 if (AddrDef->MI->getOpcode() != AMDGPU::G_PTR_ADD)
5855 return std::nullopt;
5857 Register RHS = AddrDef->MI->getOperand(2).getReg();
5858 if (RBI.getRegBank(
RHS, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID)
5859 return std::nullopt;
5861 Register LHS = AddrDef->MI->getOperand(1).getReg();
5864 if (OrigAddr != Addr) {
5865 if (!isFlatScratchBaseLegalSVImm(OrigAddr))
5866 return std::nullopt;
5868 if (!isFlatScratchBaseLegalSV(OrigAddr))
5869 return std::nullopt;
5872 if (checkFlatScratchSVSSwizzleBug(
RHS,
LHS, ImmOffset))
5873 return std::nullopt;
5875 unsigned CPol = selectScaleOffset(Root,
RHS,
true )
5879 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
5880 int FI = LHSDef->MI->getOperand(1).getIndex();
5882 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
RHS); },
5884 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); },
5885 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }
5894 return std::nullopt;
5897 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
RHS); },
5898 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
LHS); },
5899 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); },
5900 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }
5905AMDGPUInstructionSelector::selectMUBUFScratchOffen(
MachineOperand &Root)
const {
5907 MachineBasicBlock *
MBB =
MI->getParent();
5909 const SIMachineFunctionInfo *
Info =
MF->getInfo<SIMachineFunctionInfo>();
5914 Register HighBits = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
5919 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
5923 return {{[=](MachineInstrBuilder &MIB) {
5926 [=](MachineInstrBuilder &MIB) {
5929 [=](MachineInstrBuilder &MIB) {
5934 [=](MachineInstrBuilder &MIB) {
5943 std::optional<int> FI;
5946 const MachineInstr *RootDef = MRI->getVRegDef(Root.
getReg());
5948 int64_t ConstOffset;
5949 std::tie(PtrBase, ConstOffset, std::ignore) =
5950 getPtrBaseWithConstantOffset(VAddr, *MRI);
5951 if (ConstOffset != 0) {
5952 if (TII.isLegalMUBUFImmOffset(ConstOffset) &&
5953 (!STI.privateMemoryResourceIsRangeChecked() ||
5954 VT->signBitIsZero(PtrBase))) {
5955 const MachineInstr *PtrBaseDef = MRI->getVRegDef(PtrBase);
5956 if (PtrBaseDef->
getOpcode() == AMDGPU::G_FRAME_INDEX)
5962 }
else if (RootDef->
getOpcode() == AMDGPU::G_FRAME_INDEX) {
5966 return {{[=](MachineInstrBuilder &MIB) {
5969 [=](MachineInstrBuilder &MIB) {
5975 [=](MachineInstrBuilder &MIB) {
5980 [=](MachineInstrBuilder &MIB) {
5985bool AMDGPUInstructionSelector::isDSOffsetLegal(
Register Base,
5990 if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled())
5995 return VT->signBitIsZero(
Base);
5998bool AMDGPUInstructionSelector::isDSOffset2Legal(
Register Base, int64_t Offset0,
6000 unsigned Size)
const {
6001 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
6006 if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled())
6011 return VT->signBitIsZero(
Base);
6016 return Addr->
getOpcode() == TargetOpcode::G_OR ||
6017 (Addr->
getOpcode() == TargetOpcode::G_PTR_ADD &&
6024bool AMDGPUInstructionSelector::isFlatScratchBaseLegal(
Register Addr)
const {
6032 if (STI.hasSignedScratchOffsets())
6038 if (AddrMI->
getOpcode() == TargetOpcode::G_PTR_ADD) {
6039 std::optional<ValueAndVReg> RhsValReg =
6045 if (RhsValReg && RhsValReg->Value.getSExtValue() < 0 &&
6046 RhsValReg->Value.getSExtValue() > -0x40000000)
6050 return VT->signBitIsZero(
LHS);
6055bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSV(
Register Addr)
const {
6063 if (STI.hasSignedScratchOffsets())
6068 return VT->signBitIsZero(
RHS) &&
VT->signBitIsZero(
LHS);
6073bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSVImm(
6077 if (STI.hasSignedScratchOffsets())
6082 std::optional<DefinitionAndSourceRegister> BaseDef =
6084 std::optional<ValueAndVReg> RHSOffset =
6094 (RHSOffset->Value.getSExtValue() < 0 &&
6095 RHSOffset->Value.getSExtValue() > -0x40000000)))
6098 Register LHS = BaseDef->MI->getOperand(1).getReg();
6099 Register RHS = BaseDef->MI->getOperand(2).getReg();
6100 return VT->signBitIsZero(
RHS) &&
VT->signBitIsZero(
LHS);
6103bool AMDGPUInstructionSelector::isUnneededShiftMask(
const MachineInstr &
MI,
6104 unsigned ShAmtBits)
const {
6105 assert(
MI.getOpcode() == TargetOpcode::G_AND);
6107 std::optional<APInt>
RHS =
6112 if (
RHS->countr_one() >= ShAmtBits)
6115 const APInt &LHSKnownZeros =
VT->getKnownZeroes(
MI.getOperand(1).getReg());
6116 return (LHSKnownZeros | *
RHS).countr_one() >= ShAmtBits;
6120AMDGPUInstructionSelector::selectMUBUFScratchOffset(
6123 const SIMachineFunctionInfo *
Info =
MF->getInfo<SIMachineFunctionInfo>();
6125 std::optional<DefinitionAndSourceRegister>
Def =
6127 assert(Def &&
"this shouldn't be an optional result");
6132 [=](MachineInstrBuilder &MIB) {
6135 [=](MachineInstrBuilder &MIB) {
6138 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
6149 if (!TII.isLegalMUBUFImmOffset(
Offset))
6157 [=](MachineInstrBuilder &MIB) {
6160 [=](MachineInstrBuilder &MIB) {
6168 !TII.isLegalMUBUFImmOffset(
Offset))
6172 [=](MachineInstrBuilder &MIB) {
6175 [=](MachineInstrBuilder &MIB) {
6182std::pair<Register, unsigned>
6183AMDGPUInstructionSelector::selectDS1Addr1OffsetImpl(
MachineOperand &Root)
const {
6184 const MachineInstr *RootDef = MRI->getVRegDef(Root.
getReg());
6185 int64_t ConstAddr = 0;
6189 std::tie(PtrBase,
Offset, std::ignore) =
6190 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
6193 if (isDSOffsetLegal(PtrBase,
Offset)) {
6195 return std::pair(PtrBase,
Offset);
6197 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
6206 return std::pair(Root.
getReg(), 0);
6210AMDGPUInstructionSelector::selectDS1Addr1Offset(
MachineOperand &Root)
const {
6213 std::tie(
Reg,
Offset) = selectDS1Addr1OffsetImpl(Root);
6215 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
6221AMDGPUInstructionSelector::selectDS64Bit4ByteAligned(
MachineOperand &Root)
const {
6222 return selectDSReadWrite2(Root, 4);
6226AMDGPUInstructionSelector::selectDS128Bit8ByteAligned(
MachineOperand &Root)
const {
6227 return selectDSReadWrite2(Root, 8);
6231AMDGPUInstructionSelector::selectDSReadWrite2(
MachineOperand &Root,
6232 unsigned Size)
const {
6237 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
6239 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Offset+1); }
6243std::pair<Register, unsigned>
6244AMDGPUInstructionSelector::selectDSReadWrite2Impl(
MachineOperand &Root,
6245 unsigned Size)
const {
6246 const MachineInstr *RootDef = MRI->getVRegDef(Root.
getReg());
6247 int64_t ConstAddr = 0;
6251 std::tie(PtrBase,
Offset, std::ignore) =
6252 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
6255 int64_t OffsetValue0 =
Offset;
6257 if (isDSOffset2Legal(PtrBase, OffsetValue0, OffsetValue1,
Size)) {
6259 return std::pair(PtrBase, OffsetValue0 /
Size);
6261 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
6269 return std::pair(Root.
getReg(), 0);
6277std::tuple<Register, int64_t, bool>
6278AMDGPUInstructionSelector::getPtrBaseWithConstantOffset(
6281 if (RootI->
getOpcode() != TargetOpcode::G_PTR_ADD)
6282 return {Root, 0,
false};
6285 std::optional<ValueAndVReg> MaybeOffset =
6288 return {Root, 0,
false};
6303 Register RSrc2 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
6304 Register RSrc3 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
6305 Register RSrcHi =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
6306 Register RSrc =
MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
6308 B.buildInstr(AMDGPU::S_MOV_B32)
6311 B.buildInstr(AMDGPU::S_MOV_B32)
6318 B.buildInstr(AMDGPU::REG_SEQUENCE)
6321 .addImm(AMDGPU::sub0)
6323 .addImm(AMDGPU::sub1);
6327 RSrcLo =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
6328 B.buildInstr(AMDGPU::S_MOV_B64)
6333 B.buildInstr(AMDGPU::REG_SEQUENCE)
6336 .addImm(AMDGPU::sub0_sub1)
6338 .addImm(AMDGPU::sub2_sub3);
6345 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
6354 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
6361AMDGPUInstructionSelector::MUBUFAddressData
6362AMDGPUInstructionSelector::parseMUBUFAddress(
Register Src)
const {
6363 MUBUFAddressData
Data;
6369 std::tie(PtrBase,
Offset, std::ignore) =
6370 getPtrBaseWithConstantOffset(Src, *MRI);
6376 if (MachineInstr *InputAdd
6378 Data.N2 = InputAdd->getOperand(1).getReg();
6379 Data.N3 = InputAdd->getOperand(2).getReg();
6394bool AMDGPUInstructionSelector::shouldUseAddr64(MUBUFAddressData Addr)
const {
6400 const RegisterBank *N0Bank = RBI.getRegBank(Addr.N0, *MRI, TRI);
6401 return N0Bank->
getID() == AMDGPU::VGPRRegBankID;
6407void AMDGPUInstructionSelector::splitIllegalMUBUFOffset(
6409 if (TII.isLegalMUBUFImmOffset(ImmOffset))
6413 SOffset = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6414 B.buildInstr(AMDGPU::S_MOV_B32)
6420bool AMDGPUInstructionSelector::selectMUBUFAddr64Impl(
6425 if (!STI.hasAddr64() || STI.useFlatForGlobal())
6428 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
6429 if (!shouldUseAddr64(AddrData))
6435 Offset = AddrData.Offset;
6441 if (RBI.getRegBank(N2, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
6443 if (RBI.getRegBank(N3, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
6456 }
else if (RBI.getRegBank(N0, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
6467 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
6471bool AMDGPUInstructionSelector::selectMUBUFOffsetImpl(
6476 if (STI.useFlatForGlobal())
6479 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
6480 if (shouldUseAddr64(AddrData))
6486 Offset = AddrData.Offset;
6492 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
6497AMDGPUInstructionSelector::selectMUBUFAddr64(
MachineOperand &Root)
const {
6503 if (!selectMUBUFAddr64Impl(Root, VAddr, RSrcReg, SOffset,
Offset))
6509 [=](MachineInstrBuilder &MIB) {
6512 [=](MachineInstrBuilder &MIB) {
6515 [=](MachineInstrBuilder &MIB) {
6518 else if (STI.hasRestrictedSOffset())
6519 MIB.
addReg(AMDGPU::SGPR_NULL);
6523 [=](MachineInstrBuilder &MIB) {
6533AMDGPUInstructionSelector::selectMUBUFOffset(
MachineOperand &Root)
const {
6538 if (!selectMUBUFOffsetImpl(Root, RSrcReg, SOffset,
Offset))
6542 [=](MachineInstrBuilder &MIB) {
6545 [=](MachineInstrBuilder &MIB) {
6548 else if (STI.hasRestrictedSOffset())
6549 MIB.
addReg(AMDGPU::SGPR_NULL);
6561AMDGPUInstructionSelector::selectBUFSOffset(
MachineOperand &Root)
const {
6566 SOffset = AMDGPU::SGPR_NULL;
6568 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); }}};
6572static std::optional<uint64_t>
6576 if (!OffsetVal || !
isInt<32>(*OffsetVal))
6577 return std::nullopt;
6578 return Lo_32(*OffsetVal);
6582AMDGPUInstructionSelector::selectSMRDBufferImm(
MachineOperand &Root)
const {
6583 std::optional<uint64_t> OffsetVal =
6588 std::optional<int64_t> EncodedImm =
6593 return {{ [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedImm); } }};
6597AMDGPUInstructionSelector::selectSMRDBufferImm32(
MachineOperand &Root)
const {
6604 std::optional<int64_t> EncodedImm =
6609 return {{ [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedImm); } }};
6613AMDGPUInstructionSelector::selectSMRDBufferSgprImm(
MachineOperand &Root)
const {
6621 return std::nullopt;
6623 std::optional<int64_t> EncodedOffset =
6626 return std::nullopt;
6629 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); },
6630 [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedOffset); }}};
6633std::pair<Register, unsigned>
6634AMDGPUInstructionSelector::selectVOP3PMadMixModsImpl(
MachineOperand &Root,
6635 bool &Matched)
const {
6640 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
6650 const auto CheckAbsNeg = [&]() {
6655 std::tie(Src, ModsTmp) = selectVOP3ModsImpl(Src);
6686AMDGPUInstructionSelector::selectVOP3PMadMixModsExt(
6691 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
6696 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
6697 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
6702AMDGPUInstructionSelector::selectVOP3PMadMixMods(
MachineOperand &Root)
const {
6706 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
6709 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
6710 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
6714bool AMDGPUInstructionSelector::selectSBarrierSignalIsfirst(
6718 Register CCReg =
I.getOperand(0).getReg();
6723 BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM))
6724 .
addImm(
I.getOperand(2).getImm());
6728 I.eraseFromParent();
6729 return RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32_XM0_XEXECRegClass,
6733bool AMDGPUInstructionSelector::selectSGetBarrierState(
6737 MachineOperand BarOp =
I.getOperand(2);
6738 std::optional<int64_t> BarValImm =
6742 auto CopyMIB =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
6746 MachineInstrBuilder MIB;
6747 unsigned Opc = BarValImm ? AMDGPU::S_GET_BARRIER_STATE_IMM
6748 : AMDGPU::S_GET_BARRIER_STATE_M0;
6751 auto DstReg =
I.getOperand(0).getReg();
6752 const TargetRegisterClass *DstRC =
6753 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
6754 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
6760 I.eraseFromParent();
6765 if (HasInlineConst) {
6769 case Intrinsic::amdgcn_s_barrier_join:
6770 return AMDGPU::S_BARRIER_JOIN_IMM;
6771 case Intrinsic::amdgcn_s_get_named_barrier_state:
6772 return AMDGPU::S_GET_BARRIER_STATE_IMM;
6778 case Intrinsic::amdgcn_s_barrier_join:
6779 return AMDGPU::S_BARRIER_JOIN_M0;
6780 case Intrinsic::amdgcn_s_get_named_barrier_state:
6781 return AMDGPU::S_GET_BARRIER_STATE_M0;
6786bool AMDGPUInstructionSelector::selectNamedBarrierInit(
6790 MachineOperand BarOp =
I.getOperand(1);
6791 MachineOperand CntOp =
I.getOperand(2);
6794 Register TmpReg0 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6800 Register TmpReg1 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6807 Register TmpReg2 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6813 Register TmpReg3 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6814 constexpr unsigned ShAmt = 16;
6820 Register TmpReg4 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6830 unsigned Opc = IntrID == Intrinsic::amdgcn_s_barrier_init
6831 ? AMDGPU::S_BARRIER_INIT_M0
6832 : AMDGPU::S_BARRIER_SIGNAL_M0;
6833 MachineInstrBuilder MIB;
6836 I.eraseFromParent();
6840bool AMDGPUInstructionSelector::selectNamedBarrierInst(
6844 MachineOperand BarOp = IntrID == Intrinsic::amdgcn_s_get_named_barrier_state
6847 std::optional<int64_t> BarValImm =
6852 Register TmpReg0 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6858 Register TmpReg1 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6864 auto CopyMIB =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
6869 MachineInstrBuilder MIB;
6873 if (IntrID == Intrinsic::amdgcn_s_get_named_barrier_state) {
6874 auto DstReg =
I.getOperand(0).getReg();
6875 const TargetRegisterClass *DstRC =
6876 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
6877 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
6883 auto BarId = ((*BarValImm) >> 4) & 0x3F;
6887 I.eraseFromParent();
6894 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
6895 "Expected G_CONSTANT");
6896 MIB.
addImm(
MI.getOperand(1).getCImm()->getSExtValue());
6902 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
6903 "Expected G_CONSTANT");
6904 MIB.
addImm(-
MI.getOperand(1).getCImm()->getSExtValue());
6910 const MachineOperand &
Op =
MI.getOperand(1);
6911 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT &&
OpIdx == -1);
6912 MIB.
addImm(
Op.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
6918 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
6919 "Expected G_CONSTANT");
6920 MIB.
addImm(
MI.getOperand(1).getCImm()->getValue().popcount());
6928 const MachineOperand &
Op =
MI.getOperand(
OpIdx);
6945 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
6949void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_0_0(
6951 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
6956void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_0_1(
6958 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
6964void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_1_0(
6966 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
6971void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_1_1(
6973 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
6979void AMDGPUInstructionSelector::renderDstSelToOpSelXForm(
6981 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
6986void AMDGPUInstructionSelector::renderSrcSelToOpSelXForm(
6988 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
6993void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_2_0(
6995 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7000void AMDGPUInstructionSelector::renderDstSelToOpSel3XFormXForm(
7002 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7011 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7020 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7027void AMDGPUInstructionSelector::renderExtractCpolSetGLC(
7029 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7030 const uint32_t Cpol =
MI.getOperand(
OpIdx).getImm() &
7045 const APFloat &APF =
MI.getOperand(1).getFPImm()->getValueAPF();
7047 assert(ExpVal != INT_MIN);
7065 if (
MI.getOperand(
OpIdx).getImm())
7067 MIB.
addImm((int64_t)Mods);
7074 if (
MI.getOperand(
OpIdx).getImm())
7076 MIB.
addImm((int64_t)Mods);
7082 unsigned Val =
MI.getOperand(
OpIdx).getImm();
7090 MIB.
addImm((int64_t)Mods);
7096 uint32_t
V =
MI.getOperand(2).getImm();
7099 if (!Subtarget->hasSafeCUPrefetch())
7105void AMDGPUInstructionSelector::renderScaledMAIIntrinsicOperand(
7107 unsigned Val =
MI.getOperand(
OpIdx).getImm();
7116bool AMDGPUInstructionSelector::isInlineImmediate(
const APInt &Imm)
const {
7117 return TII.isInlineConstant(Imm);
7120bool AMDGPUInstructionSelector::isInlineImmediate(
const APFloat &Imm)
const {
7121 return TII.isInlineConstant(Imm);
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
static unsigned getIntrinsicID(const SDNode *N)
#define GET_GLOBALISEL_PREDICATES_INIT
#define GET_GLOBALISEL_TEMPORARIES_INIT
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static Register getLegalRegBank(Register NewReg, Register RootReg, const AMDGPURegisterBankInfo &RBI, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const SIInstrInfo &TII)
static bool isShlHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is shift left with half bits, such as reg0:2n =G_SHL reg1:2n, CONST(n)
static bool isNoUnsignedWrap(MachineInstr *Addr)
static Register buildOffsetSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
unsigned getNamedBarrierOp(bool HasInlineConst, Intrinsic::ID IntrID)
static bool checkRB(Register Reg, unsigned int RBNo, const AMDGPURegisterBankInfo &RBI, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI)
static unsigned updateMods(SrcStatus HiStat, SrcStatus LoStat, unsigned Mods)
static bool isTruncHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is truncating to half, such as reg0:n = G_TRUNC reg1:2n
static Register getWaveAddress(const MachineInstr *Def)
static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)
static bool shouldUseAndMask(unsigned Size, unsigned &Mask)
static std::pair< unsigned, uint8_t > BitOp3_Op(Register R, SmallVectorImpl< Register > &Src, const MachineRegisterInfo &MRI)
static TypeClass isVectorOfTwoOrScalar(Register Reg, const MachineRegisterInfo &MRI)
static bool isLaneMaskFromSameBlock(Register Reg, MachineRegisterInfo &MRI, MachineBasicBlock *MBB)
static bool parseTexFail(uint64_t TexFailCtrl, bool &TFE, bool &LWE, bool &IsTexFail)
static void addZeroImm(MachineInstrBuilder &MIB)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
static bool isConstant(const MachineInstr &MI)
static bool isSameBitWidth(Register Reg1, Register Reg2, const MachineRegisterInfo &MRI)
static Register buildRegSequence(SmallVectorImpl< Register > &Elts, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
static Register buildRSRC(MachineIRBuilder &B, MachineRegisterInfo &MRI, uint32_t FormatLo, uint32_t FormatHi, Register BasePtr)
Return a resource descriptor for use with an arbitrary 64-bit pointer.
static std::pair< Register, unsigned > computeIndirectRegIndex(MachineRegisterInfo &MRI, const SIRegisterInfo &TRI, const TargetRegisterClass *SuperRC, Register IdxReg, unsigned EltSize, GISelValueTracking &ValueTracking)
Return the register to use for the index value, and the subregister to use for the indirectly accesse...
static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64)
static std::pair< Register, SrcStatus > getLastSameOrNeg(Register Reg, const MachineRegisterInfo &MRI, SearchOptions SO, int MaxDepth=3)
static Register stripCopy(Register Reg, MachineRegisterInfo &MRI)
static std::optional< std::pair< Register, SrcStatus > > calcNextStatus(std::pair< Register, SrcStatus > Curr, const MachineRegisterInfo &MRI)
static Register stripBitCast(Register Reg, MachineRegisterInfo &MRI)
static std::optional< uint64_t > getConstantZext32Val(Register Reg, const MachineRegisterInfo &MRI)
Get an immediate that must be 32-bits, and treated as zero extended.
static bool isValidToPack(SrcStatus HiStat, SrcStatus LoStat, Register NewReg, Register RootReg, const SIInstrInfo &TII, const MachineRegisterInfo &MRI)
static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size, const GCNSubtarget &ST)
static SmallVector< std::pair< Register, SrcStatus > > getSrcStats(Register Reg, const MachineRegisterInfo &MRI, SearchOptions SO, int MaxDepth=3)
static bool isUnmergeHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test function, if the MI is reg0:n, reg1:n = G_UNMERGE_VALUES reg2:2n
static SrcStatus getNegStatus(Register Reg, SrcStatus S, const MachineRegisterInfo &MRI)
static bool isVCmpResult(Register Reg, MachineRegisterInfo &MRI)
static Register buildAddr64RSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
static bool isLshrHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is logic shift right with half bits, such as reg0:2n =G_LSHR reg1:2n,...
static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, SmallVectorImpl< Register > &Elts, Register &Src, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
This file declares the targeting of the InstructionSelector class for AMDGPU.
AMDGPU Register Bank Select
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
Contains matchers for matching SSA Machine Instructions.
Machine Check Debug Module
This file declares the MachineIRBuilder class.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
MachineInstr unsigned OpIdx
static std::vector< std::pair< int, unsigned > > Swizzle(std::vector< std::pair< int, unsigned > > Src, R600InstrInfo::BankSwizzle Swz)
This is used to control valid status that current MI supports.
bool checkOptions(SrcStatus Stat) const
SearchOptions(Register Reg, const MachineRegisterInfo &MRI)
AMDGPUInstructionSelector(const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI, const AMDGPUTargetMachine &TM)
static const char * getName()
bool select(MachineInstr &I) override
Select the (possibly generic) instruction I to only use target-specific opcodes.
void setupMF(MachineFunction &MF, GISelValueTracking *VT, CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) override
Setup per-MF executor state.
uint32_t getLDSSize() const
LLVM_READONLY int getExactLog2Abs() const
Class for arbitrary precision integers.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
bool isFPPredicate() const
bool isIntPredicate() const
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
LLVM_ABI DILocation * get() const
Get the underlying DILocation.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
std::optional< SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > > ComplexRendererFns
virtual void setupMF(MachineFunction &mf, GISelValueTracking *vt, CodeGenCoverage *covinfo=nullptr, ProfileSummaryInfo *psi=nullptr, BlockFrequencyInfo *bfi=nullptr)
Setup per-MF executor state.
CodeGenCoverage * CoverageInfo
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
TypeSize getValue() const
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void setReturnAddressIsTaken(bool s)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Helper class to build MachineInstr.
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & setOperandDead(unsigned OpIdx) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
LocationSize getSize() const
Return the size in bytes of the memory reference.
unsigned getAddrSpace() const
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const ConstantInt * getCImm() const
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
ArrayRef< int > getShuffleMask() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
bool isInternalRead() const
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Analysis providing profile information.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
unsigned getID() const
Return the register class ID number.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
@ BUFFER_RESOURCE
Address space for 128-bit buffer resources.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
LLVM_READONLY const MIMGG16MappingInfo * getMIMGG16MappingInfo(unsigned G)
LLVM_READONLY int getGlobalSaddrOp(uint16_t Opcode)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isGFX12Plus(const MCSubtargetInfo &STI)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfo(unsigned DimEnum)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
Intrinsic::ID getIntrinsicID(const MachineInstr &I)
Return the intrinsic ID for opcodes with the G_AMDGPU_INTRIN_ prefix.
std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg, GISelValueTracking *ValueTracking=nullptr, bool CheckNUW=false)
Returns base register and constant offset.
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
IndexMode
ARM Index Modes.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(const APInt &RequestedValue)
Matches a constant equal to RequestedValue.
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
UnaryOp_match< SrcTy, TargetOpcode::COPY > m_Copy(SrcTy &&Src)
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_XOR, true > m_GXor(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_SEXT > m_GSExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_FPEXT > m_GFPExt(const SrcTy &Src)
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
ConstantMatch< APInt > m_ICst(APInt &Cst)
SpecificConstantMatch m_AllOnesInt()
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
ImplicitDefMatch m_GImplicitDef()
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
BinaryOp_match< LHS, RHS, TargetOpcode::G_ASHR, false > m_GAShr(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
SpecificRegisterMatch m_SpecificReg(Register RequestedReg)
Matches a register only if it is equal to RequestedReg.
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
UnaryOp_match< SrcTy, TargetOpcode::G_FABS > m_GFabs(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ANYEXT > m_GAnyExt(const SrcTy &Src)
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, TargetOpcode::G_MUL, true > m_GMul(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
NodeAddr< DefNode * > Def
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
FunctionAddr VTableAddr Value
LLVM_ABI Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
PointerUnion< const TargetRegisterClass *, const RegisterBank * > RegClassOrRegBank
Convenient type to represent either a register class or a register bank.
LLVM_ABI const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
LLVM_ABI std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
FunctionAddr VTableAddr uintptr_t uintptr_t Data
unsigned getUndefRegState(bool B)
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
LLVM_ABI std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
LLVM_ABI Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
@ Default
The result values are uniform if and only if all operands are uniform.
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
int64_t Offset
Offset - This is an offset from the base Value*.
PointerUnion< const Value *, const PseudoSourceValue * > V
This is the IR pointer value for the access, or it is null if unknown.