29#include "llvm/IR/IntrinsicsAMDGPU.h"
32#define DEBUG_TYPE "amdgpu-isel"
37#define GET_GLOBALISEL_IMPL
38#define AMDGPUSubtarget GCNSubtarget
39#include "AMDGPUGenGlobalISel.inc"
40#undef GET_GLOBALISEL_IMPL
46 : TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM),
49#include
"AMDGPUGenGlobalISel.inc"
52#include
"AMDGPUGenGlobalISel.inc"
64 MRI = &
MF.getRegInfo();
72 return Def->getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS
73 ? Def->getOperand(1).getReg()
83 auto &RegClassOrBank = MRI.getRegClassOrRegBank(
Reg);
84 const TargetRegisterClass *RC =
87 const LLT Ty = MRI.getType(
Reg);
91 return MRI.getVRegDef(
Reg)->getOpcode() != AMDGPU::G_TRUNC &&
96 return RB->
getID() == AMDGPU::VCCRegBankID;
99bool AMDGPUInstructionSelector::constrainCopyLikeIntrin(
MachineInstr &
MI,
100 unsigned NewOpc)
const {
101 MI.setDesc(TII.get(NewOpc));
105 MachineOperand &Dst =
MI.getOperand(0);
106 MachineOperand &Src =
MI.getOperand(1);
112 const TargetRegisterClass *DstRC
113 = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
114 const TargetRegisterClass *SrcRC
115 = TRI.getConstrainedRegClassForOperand(Src, *MRI);
116 if (!DstRC || DstRC != SrcRC)
119 if (!RBI.constrainGenericRegister(Dst.getReg(), *DstRC, *MRI) ||
120 !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, *MRI))
122 const MCInstrDesc &MCID =
MI.getDesc();
124 MI.getOperand(0).setIsEarlyClobber(
true);
129bool AMDGPUInstructionSelector::selectCOPY(
MachineInstr &
I)
const {
132 I.setDesc(TII.get(TargetOpcode::COPY));
134 const MachineOperand &Src =
I.getOperand(1);
135 MachineOperand &Dst =
I.getOperand(0);
139 if (isVCC(DstReg, *MRI)) {
140 if (SrcReg == AMDGPU::SCC) {
141 const TargetRegisterClass *RC
142 = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
145 return RBI.constrainGenericRegister(DstReg, *RC, *MRI);
148 if (!isVCC(SrcReg, *MRI)) {
150 if (!RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), *MRI))
153 const TargetRegisterClass *SrcRC
154 = TRI.getConstrainedRegClassForOperand(Src, *MRI);
156 std::optional<ValueAndVReg> ConstVal =
160 STI.isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
162 .
addImm(ConstVal->Value.getBoolValue() ? -1 : 0);
164 Register MaskedReg = MRI->createVirtualRegister(SrcRC);
171 assert(Subtarget->useRealTrue16Insts());
172 const int64_t NoMods = 0;
173 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_AND_B16_t16_e64), MaskedReg)
179 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CMP_NE_U16_t16_e64), DstReg)
186 bool IsSGPR = TRI.isSGPRClass(SrcRC);
187 unsigned AndOpc = IsSGPR ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32;
194 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
200 if (!MRI->getRegClassOrNull(SrcReg))
201 MRI->setRegClass(SrcReg, SrcRC);
206 const TargetRegisterClass *RC =
207 TRI.getConstrainedRegClassForOperand(Dst, *MRI);
208 if (RC && !RBI.constrainGenericRegister(DstReg, *RC, *MRI))
214 for (
const MachineOperand &MO :
I.operands()) {
215 if (MO.getReg().isPhysical())
218 const TargetRegisterClass *RC =
219 TRI.getConstrainedRegClassForOperand(MO, *MRI);
222 RBI.constrainGenericRegister(MO.getReg(), *RC, *MRI);
227bool AMDGPUInstructionSelector::selectCOPY_SCC_VCC(
MachineInstr &
I)
const {
230 Register VCCReg =
I.getOperand(1).getReg();
234 if (STI.hasScalarCompareEq64()) {
236 STI.isWave64() ? AMDGPU::S_CMP_LG_U64 : AMDGPU::S_CMP_LG_U32;
239 Register DeadDst = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
240 Cmp =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_OR_B64), DeadDst)
247 Register DstReg =
I.getOperand(0).getReg();
251 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
254bool AMDGPUInstructionSelector::selectCOPY_VCC_SCC(
MachineInstr &
I)
const {
258 Register DstReg =
I.getOperand(0).getReg();
259 Register SrcReg =
I.getOperand(1).getReg();
260 std::optional<ValueAndVReg> Arg =
264 const int64_t
Value = Arg->Value.getZExtValue();
266 unsigned Opcode = STI.isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
273 return RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), *MRI);
279 unsigned SelectOpcode =
280 STI.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
290bool AMDGPUInstructionSelector::selectReadAnyLane(
MachineInstr &
I)
const {
291 Register DstReg =
I.getOperand(0).getReg();
292 Register SrcReg =
I.getOperand(1).getReg();
297 auto RFL =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
305bool AMDGPUInstructionSelector::selectPHI(
MachineInstr &
I)
const {
306 const Register DefReg =
I.getOperand(0).getReg();
307 const LLT DefTy = MRI->getType(DefReg);
319 MRI->getRegClassOrRegBank(DefReg);
321 const TargetRegisterClass *DefRC =
330 DefRC = TRI.getRegClassForTypeOnBank(DefTy, RB);
339 for (
unsigned i = 1; i !=
I.getNumOperands(); i += 2) {
340 const Register SrcReg =
I.getOperand(i).getReg();
342 const RegisterBank *RB = MRI->getRegBankOrNull(SrcReg);
344 const LLT SrcTy = MRI->getType(SrcReg);
345 const TargetRegisterClass *SrcRC =
346 TRI.getRegClassForTypeOnBank(SrcTy, *RB);
347 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
352 I.setDesc(TII.get(TargetOpcode::PHI));
353 return RBI.constrainGenericRegister(DefReg, *DefRC, *MRI);
359 unsigned SubIdx)
const {
363 Register DstReg = MRI->createVirtualRegister(&SubRC);
366 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.
getSubReg(), SubIdx);
368 BuildMI(*BB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
394 return Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
396 return Is64 ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32;
398 return Is64 ? AMDGPU::S_XOR_B64 : AMDGPU::S_XOR_B32;
404bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(
MachineInstr &
I)
const {
405 Register DstReg =
I.getOperand(0).getReg();
406 unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI);
408 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
409 if (DstRB->
getID() != AMDGPU::SGPRRegBankID &&
410 DstRB->
getID() != AMDGPU::VCCRegBankID)
413 bool Is64 =
Size > 32 || (DstRB->
getID() == AMDGPU::VCCRegBankID &&
426bool AMDGPUInstructionSelector::selectG_ADD_SUB(
MachineInstr &
I)
const {
429 Register DstReg =
I.getOperand(0).getReg();
431 LLT Ty = MRI->getType(DstReg);
436 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
437 const bool IsSALU = DstRB->
getID() == AMDGPU::SGPRRegBankID;
438 const bool Sub =
I.getOpcode() == TargetOpcode::G_SUB;
442 const unsigned Opc =
Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
445 .
add(
I.getOperand(1))
446 .
add(
I.getOperand(2))
453 if (STI.hasAddNoCarryInsts()) {
454 const unsigned Opc =
Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64;
455 I.setDesc(TII.get(
Opc));
462 const unsigned Opc =
Sub ? AMDGPU::V_SUB_CO_U32_e64 : AMDGPU::V_ADD_CO_U32_e64;
464 Register UnusedCarry = MRI->createVirtualRegister(TRI.getWaveMaskRegClass());
468 .
add(
I.getOperand(1))
469 .
add(
I.getOperand(2))
476 assert(!
Sub &&
"illegal sub should not reach here");
478 const TargetRegisterClass &RC
479 = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass;
480 const TargetRegisterClass &HalfRC
481 = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass;
483 MachineOperand Lo1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub0));
484 MachineOperand Lo2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub0));
485 MachineOperand Hi1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub1));
486 MachineOperand Hi2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub1));
488 Register DstLo = MRI->createVirtualRegister(&HalfRC);
489 Register DstHi = MRI->createVirtualRegister(&HalfRC);
492 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
495 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
500 const TargetRegisterClass *CarryRC = TRI.getWaveMaskRegClass();
501 Register CarryReg = MRI->createVirtualRegister(CarryRC);
502 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_ADD_CO_U32_e64), DstLo)
507 MachineInstr *Addc =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_ADDC_U32_e64), DstHi)
517 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
524 if (!RBI.constrainGenericRegister(DstReg, RC, *MRI))
531bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE(
536 Register Dst0Reg =
I.getOperand(0).getReg();
537 Register Dst1Reg =
I.getOperand(1).getReg();
538 const bool IsAdd =
I.getOpcode() == AMDGPU::G_UADDO ||
539 I.getOpcode() == AMDGPU::G_UADDE;
540 const bool HasCarryIn =
I.getOpcode() == AMDGPU::G_UADDE ||
541 I.getOpcode() == AMDGPU::G_USUBE;
543 if (isVCC(Dst1Reg, *MRI)) {
544 unsigned NoCarryOpc =
545 IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
546 unsigned CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
547 I.setDesc(TII.get(HasCarryIn ? CarryOpc : NoCarryOpc));
554 Register Src0Reg =
I.getOperand(2).getReg();
555 Register Src1Reg =
I.getOperand(3).getReg();
558 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
559 .
addReg(
I.getOperand(4).getReg());
562 unsigned NoCarryOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
563 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
565 auto CarryInst =
BuildMI(*BB, &
I,
DL, TII.get(HasCarryIn ? CarryOpc : NoCarryOpc), Dst0Reg)
566 .
add(
I.getOperand(2))
567 .
add(
I.getOperand(3));
569 if (MRI->use_nodbg_empty(Dst1Reg)) {
570 CarryInst.setOperandDead(3);
572 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), Dst1Reg)
574 if (!MRI->getRegClassOrNull(Dst1Reg))
575 MRI->setRegClass(Dst1Reg, &AMDGPU::SReg_32RegClass);
578 if (!RBI.constrainGenericRegister(Dst0Reg, AMDGPU::SReg_32RegClass, *MRI) ||
579 !RBI.constrainGenericRegister(Src0Reg, AMDGPU::SReg_32RegClass, *MRI) ||
580 !RBI.constrainGenericRegister(Src1Reg, AMDGPU::SReg_32RegClass, *MRI))
584 !RBI.constrainGenericRegister(
I.getOperand(4).getReg(),
585 AMDGPU::SReg_32RegClass, *MRI))
592bool AMDGPUInstructionSelector::selectG_AMDGPU_MAD_64_32(
596 const bool IsUnsigned =
I.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;
597 bool UseNoCarry = Subtarget->hasMadU64U32NoCarry() &&
598 MRI->use_nodbg_empty(
I.getOperand(1).getReg());
601 if (Subtarget->hasMADIntraFwdBug())
602 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_gfx11_e64
603 : AMDGPU::V_MAD_I64_I32_gfx11_e64;
605 Opc = IsUnsigned ? AMDGPU::V_MAD_NC_U64_U32_e64
606 : AMDGPU::V_MAD_NC_I64_I32_e64;
608 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_e64 : AMDGPU::V_MAD_I64_I32_e64;
613 I.setDesc(TII.get(
Opc));
615 I.addImplicitDefUseOperands(*
MF);
616 I.getOperand(0).setIsEarlyClobber(
true);
622bool AMDGPUInstructionSelector::selectG_EXTRACT(
MachineInstr &
I)
const {
624 Register DstReg =
I.getOperand(0).getReg();
625 Register SrcReg =
I.getOperand(1).getReg();
626 LLT DstTy = MRI->getType(DstReg);
627 LLT SrcTy = MRI->getType(SrcReg);
632 unsigned Offset =
I.getOperand(2).getImm();
633 if (
Offset % 32 != 0 || DstSize > 128)
641 const TargetRegisterClass *DstRC =
642 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
643 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
646 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI);
647 const TargetRegisterClass *SrcRC =
648 TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank);
653 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubReg);
658 *SrcRC,
I.getOperand(1));
660 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::COPY), DstReg)
661 .
addReg(SrcReg, {}, SubReg);
667bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(
MachineInstr &
MI)
const {
668 MachineBasicBlock *BB =
MI.getParent();
670 LLT DstTy = MRI->getType(DstReg);
671 LLT SrcTy = MRI->getType(
MI.getOperand(1).getReg());
678 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
680 const TargetRegisterClass *DstRC =
681 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
685 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(DstRC, SrcSize / 8);
686 MachineInstrBuilder MIB =
687 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg);
688 for (
int I = 0,
E =
MI.getNumOperands() - 1;
I !=
E; ++
I) {
689 MachineOperand &Src =
MI.getOperand(
I + 1);
693 const TargetRegisterClass *SrcRC
694 = TRI.getConstrainedRegClassForOperand(Src, *MRI);
695 if (SrcRC && !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, *MRI))
699 if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
702 MI.eraseFromParent();
706bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(
MachineInstr &
MI)
const {
707 MachineBasicBlock *BB =
MI.getParent();
708 const int NumDst =
MI.getNumOperands() - 1;
710 MachineOperand &Src =
MI.getOperand(NumDst);
714 LLT DstTy = MRI->getType(DstReg0);
715 LLT SrcTy = MRI->getType(SrcReg);
720 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI);
722 const TargetRegisterClass *SrcRC =
723 TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank);
724 if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
730 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(SrcRC, DstSize / 8);
731 for (
int I = 0,
E = NumDst;
I !=
E; ++
I) {
732 MachineOperand &Dst =
MI.getOperand(
I);
734 if (SrcBank->
getID() == AMDGPU::SGPRRegBankID &&
735 SubRegs[
I] == AMDGPU::hi16) {
736 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_LSHR_B32), Dst.getReg())
740 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::COPY), Dst.getReg())
741 .
addReg(SrcReg, {}, SubRegs[
I]);
745 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegs[
I]);
746 if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
749 const TargetRegisterClass *DstRC =
750 TRI.getConstrainedRegClassForOperand(Dst, *MRI);
751 if (DstRC && !RBI.constrainGenericRegister(Dst.getReg(), *DstRC, *MRI))
755 MI.eraseFromParent();
759bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR(
MachineInstr &
MI)
const {
760 assert(
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC ||
761 MI.getOpcode() == AMDGPU::G_BUILD_VECTOR);
765 LLT SrcTy = MRI->getType(Src0);
769 if (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR && SrcSize >= 32) {
770 return selectG_MERGE_VALUES(
MI);
777 (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC &&
781 const RegisterBank *DstBank = RBI.getRegBank(Dst, *MRI, TRI);
782 if (DstBank->
getID() == AMDGPU::AGPRRegBankID)
785 assert(DstBank->
getID() == AMDGPU::SGPRRegBankID ||
786 DstBank->
getID() == AMDGPU::VGPRRegBankID);
787 const bool IsVector = DstBank->
getID() == AMDGPU::VGPRRegBankID;
790 MachineBasicBlock *BB =
MI.getParent();
800 const int64_t K0 = ConstSrc0->Value.getSExtValue();
801 const int64_t K1 = ConstSrc1->Value.getSExtValue();
802 uint32_t Lo16 =
static_cast<uint32_t
>(K0) & 0xffff;
803 uint32_t Hi16 =
static_cast<uint32_t
>(K1) & 0xffff;
804 uint32_t
Imm = Lo16 | (Hi16 << 16);
809 MI.eraseFromParent();
810 return RBI.constrainGenericRegister(Dst, AMDGPU::VGPR_32RegClass, *MRI);
815 MI.eraseFromParent();
816 return RBI.constrainGenericRegister(Dst, AMDGPU::SReg_32RegClass, *MRI);
827 if (Src1Def->
getOpcode() == AMDGPU::G_IMPLICIT_DEF) {
828 MI.setDesc(TII.get(AMDGPU::COPY));
831 IsVector ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
832 return RBI.constrainGenericRegister(Dst, RC, *MRI) &&
833 RBI.constrainGenericRegister(Src0, RC, *MRI);
838 Register TmpReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
839 auto MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_AND_B32_e32), TmpReg)
844 MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_LSHL_OR_B32_e64), Dst)
850 MI.eraseFromParent();
875 unsigned Opc = AMDGPU::S_PACK_LL_B32_B16;
876 if (Shift0 && Shift1) {
877 Opc = AMDGPU::S_PACK_HH_B32_B16;
878 MI.getOperand(1).setReg(ShiftSrc0);
879 MI.getOperand(2).setReg(ShiftSrc1);
881 Opc = AMDGPU::S_PACK_LH_B32_B16;
882 MI.getOperand(2).setReg(ShiftSrc1);
886 if (ConstSrc1 && ConstSrc1->Value == 0) {
888 auto MIB =
BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_LSHR_B32), Dst)
893 MI.eraseFromParent();
897 if (STI.hasSPackHL()) {
898 Opc = AMDGPU::S_PACK_HL_B32_B16;
899 MI.getOperand(1).setReg(ShiftSrc0);
903 MI.setDesc(TII.get(
Opc));
908bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(
MachineInstr &
I)
const {
909 const MachineOperand &MO =
I.getOperand(0);
913 const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, *MRI);
914 if ((!RC && !MRI->getRegBankOrNull(MO.
getReg())) ||
915 (RC && RBI.constrainGenericRegister(MO.
getReg(), *RC, *MRI))) {
916 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
923bool AMDGPUInstructionSelector::selectG_INSERT(
MachineInstr &
I)
const {
926 Register DstReg =
I.getOperand(0).getReg();
927 Register Src0Reg =
I.getOperand(1).getReg();
928 Register Src1Reg =
I.getOperand(2).getReg();
929 LLT Src1Ty = MRI->getType(Src1Reg);
931 unsigned DstSize = MRI->getType(DstReg).getSizeInBits();
934 int64_t
Offset =
I.getOperand(3).getImm();
937 if (
Offset % 32 != 0 || InsSize % 32 != 0)
944 unsigned SubReg = TRI.getSubRegFromChannel(
Offset / 32, InsSize / 32);
945 if (SubReg == AMDGPU::NoSubRegister)
948 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
949 const TargetRegisterClass *DstRC =
950 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
954 const RegisterBank *Src0Bank = RBI.getRegBank(Src0Reg, *MRI, TRI);
955 const RegisterBank *Src1Bank = RBI.getRegBank(Src1Reg, *MRI, TRI);
956 const TargetRegisterClass *Src0RC =
957 TRI.getRegClassForSizeOnBank(DstSize, *Src0Bank);
958 const TargetRegisterClass *Src1RC =
959 TRI.getRegClassForSizeOnBank(InsSize, *Src1Bank);
963 Src0RC = TRI.getSubClassWithSubReg(Src0RC, SubReg);
964 if (!Src0RC || !Src1RC)
967 if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
968 !RBI.constrainGenericRegister(Src0Reg, *Src0RC, *MRI) ||
969 !RBI.constrainGenericRegister(Src1Reg, *Src1RC, *MRI))
973 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::INSERT_SUBREG), DstReg)
982bool AMDGPUInstructionSelector::selectG_SBFX_UBFX(
MachineInstr &
MI)
const {
985 Register OffsetReg =
MI.getOperand(2).getReg();
986 Register WidthReg =
MI.getOperand(3).getReg();
988 assert(RBI.getRegBank(DstReg, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID &&
989 "scalar BFX instructions are expanded in regbankselect");
990 assert(MRI->getType(
MI.getOperand(0).getReg()).getSizeInBits() == 32 &&
991 "64-bit vector BFX instructions are expanded in regbankselect");
994 MachineBasicBlock *
MBB =
MI.getParent();
996 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SBFX;
997 unsigned Opc = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
1002 MI.eraseFromParent();
1007bool AMDGPUInstructionSelector::selectInterpP1F16(
MachineInstr &
MI)
const {
1008 if (STI.getLDSBankCount() != 16)
1014 if (!RBI.constrainGenericRegister(M0Val, AMDGPU::SReg_32RegClass, *MRI) ||
1015 !RBI.constrainGenericRegister(Dst, AMDGPU::VGPR_32RegClass, *MRI) ||
1016 !RBI.constrainGenericRegister(Src0, AMDGPU::VGPR_32RegClass, *MRI))
1026 Register InterpMov = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1028 MachineBasicBlock *
MBB =
MI.getParent();
1032 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_INTERP_MOV_F32), InterpMov)
1035 .
addImm(
MI.getOperand(3).getImm());
1048 MI.eraseFromParent();
1057bool AMDGPUInstructionSelector::selectWritelane(
MachineInstr &
MI)
const {
1059 if (STI.getConstantBusLimit(AMDGPU::V_WRITELANE_B32) > 1)
1062 MachineBasicBlock *
MBB =
MI.getParent();
1066 Register LaneSelect =
MI.getOperand(3).getReg();
1069 auto MIB =
BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_WRITELANE_B32), VDst);
1071 std::optional<ValueAndVReg> ConstSelect =
1077 MIB.
addImm(ConstSelect->Value.getSExtValue() &
1080 std::optional<ValueAndVReg> ConstVal =
1086 STI.hasInv2PiInlineImm())) {
1087 MIB.
addImm(ConstVal->Value.getSExtValue());
1095 RBI.constrainGenericRegister(LaneSelect, AMDGPU::SReg_32_XM0RegClass, *MRI);
1097 BuildMI(*
MBB, *MIB,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
1105 MI.eraseFromParent();
1112bool AMDGPUInstructionSelector::selectDivScale(
MachineInstr &
MI)
const {
1116 LLT Ty = MRI->getType(Dst0);
1119 Opc = AMDGPU::V_DIV_SCALE_F32_e64;
1121 Opc = AMDGPU::V_DIV_SCALE_F64_e64;
1128 MachineBasicBlock *
MBB =
MI.getParent();
1132 unsigned ChooseDenom =
MI.getOperand(5).getImm();
1134 Register Src0 = ChooseDenom != 0 ? Numer : Denom;
1147 MI.eraseFromParent();
1152bool AMDGPUInstructionSelector::selectG_INTRINSIC(
MachineInstr &
I)
const {
1154 switch (IntrinsicID) {
1155 case Intrinsic::amdgcn_if_break: {
1160 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))
1161 .
add(
I.getOperand(0))
1162 .
add(
I.getOperand(2))
1163 .
add(
I.getOperand(3));
1165 Register DstReg =
I.getOperand(0).getReg();
1166 Register Src0Reg =
I.getOperand(2).getReg();
1167 Register Src1Reg =
I.getOperand(3).getReg();
1169 I.eraseFromParent();
1172 MRI->setRegClass(
Reg, TRI.getWaveMaskRegClass());
1176 case Intrinsic::amdgcn_interp_p1_f16:
1177 return selectInterpP1F16(
I);
1178 case Intrinsic::amdgcn_wqm:
1179 return constrainCopyLikeIntrin(
I, AMDGPU::WQM);
1180 case Intrinsic::amdgcn_softwqm:
1181 return constrainCopyLikeIntrin(
I, AMDGPU::SOFT_WQM);
1182 case Intrinsic::amdgcn_strict_wwm:
1183 case Intrinsic::amdgcn_wwm:
1184 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WWM);
1185 case Intrinsic::amdgcn_strict_wqm:
1186 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WQM);
1187 case Intrinsic::amdgcn_writelane:
1188 return selectWritelane(
I);
1189 case Intrinsic::amdgcn_div_scale:
1190 return selectDivScale(
I);
1191 case Intrinsic::amdgcn_icmp:
1192 case Intrinsic::amdgcn_fcmp:
1195 return selectIntrinsicCmp(
I);
1196 case Intrinsic::amdgcn_ballot:
1197 return selectBallot(
I);
1198 case Intrinsic::amdgcn_reloc_constant:
1199 return selectRelocConstant(
I);
1200 case Intrinsic::amdgcn_groupstaticsize:
1201 return selectGroupStaticSize(
I);
1202 case Intrinsic::returnaddress:
1203 return selectReturnAddress(
I);
1204 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
1205 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
1206 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
1207 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
1208 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
1209 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
1210 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
1211 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
1212 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
1213 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
1214 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
1215 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
1216 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
1217 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
1218 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:
1219 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:
1220 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:
1221 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:
1222 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:
1223 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:
1224 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:
1225 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:
1226 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:
1227 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:
1228 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:
1229 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
1230 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
1231 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8:
1232 return selectSMFMACIntrin(
I);
1233 case Intrinsic::amdgcn_permlane16_swap:
1234 case Intrinsic::amdgcn_permlane32_swap:
1235 return selectPermlaneSwapIntrin(
I, IntrinsicID);
1236 case Intrinsic::amdgcn_wave_shuffle:
1237 return selectWaveShuffleIntrin(
I);
1248 if (
Size == 16 && !ST.has16BitInsts())
1251 const auto Select = [&](
unsigned S16Opc,
unsigned TrueS16Opc,
1252 unsigned FakeS16Opc,
unsigned S32Opc,
1255 return ST.hasTrue16BitInsts()
1256 ? ST.useRealTrue16Insts() ? TrueS16Opc : FakeS16Opc
1267 return Select(AMDGPU::V_CMP_NE_U16_e64, AMDGPU::V_CMP_NE_U16_t16_e64,
1268 AMDGPU::V_CMP_NE_U16_fake16_e64, AMDGPU::V_CMP_NE_U32_e64,
1269 AMDGPU::V_CMP_NE_U64_e64);
1271 return Select(AMDGPU::V_CMP_EQ_U16_e64, AMDGPU::V_CMP_EQ_U16_t16_e64,
1272 AMDGPU::V_CMP_EQ_U16_fake16_e64, AMDGPU::V_CMP_EQ_U32_e64,
1273 AMDGPU::V_CMP_EQ_U64_e64);
1275 return Select(AMDGPU::V_CMP_GT_I16_e64, AMDGPU::V_CMP_GT_I16_t16_e64,
1276 AMDGPU::V_CMP_GT_I16_fake16_e64, AMDGPU::V_CMP_GT_I32_e64,
1277 AMDGPU::V_CMP_GT_I64_e64);
1279 return Select(AMDGPU::V_CMP_GE_I16_e64, AMDGPU::V_CMP_GE_I16_t16_e64,
1280 AMDGPU::V_CMP_GE_I16_fake16_e64, AMDGPU::V_CMP_GE_I32_e64,
1281 AMDGPU::V_CMP_GE_I64_e64);
1283 return Select(AMDGPU::V_CMP_LT_I16_e64, AMDGPU::V_CMP_LT_I16_t16_e64,
1284 AMDGPU::V_CMP_LT_I16_fake16_e64, AMDGPU::V_CMP_LT_I32_e64,
1285 AMDGPU::V_CMP_LT_I64_e64);
1287 return Select(AMDGPU::V_CMP_LE_I16_e64, AMDGPU::V_CMP_LE_I16_t16_e64,
1288 AMDGPU::V_CMP_LE_I16_fake16_e64, AMDGPU::V_CMP_LE_I32_e64,
1289 AMDGPU::V_CMP_LE_I64_e64);
1291 return Select(AMDGPU::V_CMP_GT_U16_e64, AMDGPU::V_CMP_GT_U16_t16_e64,
1292 AMDGPU::V_CMP_GT_U16_fake16_e64, AMDGPU::V_CMP_GT_U32_e64,
1293 AMDGPU::V_CMP_GT_U64_e64);
1295 return Select(AMDGPU::V_CMP_GE_U16_e64, AMDGPU::V_CMP_GE_U16_t16_e64,
1296 AMDGPU::V_CMP_GE_U16_fake16_e64, AMDGPU::V_CMP_GE_U32_e64,
1297 AMDGPU::V_CMP_GE_U64_e64);
1299 return Select(AMDGPU::V_CMP_LT_U16_e64, AMDGPU::V_CMP_LT_U16_t16_e64,
1300 AMDGPU::V_CMP_LT_U16_fake16_e64, AMDGPU::V_CMP_LT_U32_e64,
1301 AMDGPU::V_CMP_LT_U64_e64);
1303 return Select(AMDGPU::V_CMP_LE_U16_e64, AMDGPU::V_CMP_LE_U16_t16_e64,
1304 AMDGPU::V_CMP_LE_U16_fake16_e64, AMDGPU::V_CMP_LE_U32_e64,
1305 AMDGPU::V_CMP_LE_U64_e64);
1308 return Select(AMDGPU::V_CMP_EQ_F16_e64, AMDGPU::V_CMP_EQ_F16_t16_e64,
1309 AMDGPU::V_CMP_EQ_F16_fake16_e64, AMDGPU::V_CMP_EQ_F32_e64,
1310 AMDGPU::V_CMP_EQ_F64_e64);
1312 return Select(AMDGPU::V_CMP_GT_F16_e64, AMDGPU::V_CMP_GT_F16_t16_e64,
1313 AMDGPU::V_CMP_GT_F16_fake16_e64, AMDGPU::V_CMP_GT_F32_e64,
1314 AMDGPU::V_CMP_GT_F64_e64);
1316 return Select(AMDGPU::V_CMP_GE_F16_e64, AMDGPU::V_CMP_GE_F16_t16_e64,
1317 AMDGPU::V_CMP_GE_F16_fake16_e64, AMDGPU::V_CMP_GE_F32_e64,
1318 AMDGPU::V_CMP_GE_F64_e64);
1320 return Select(AMDGPU::V_CMP_LT_F16_e64, AMDGPU::V_CMP_LT_F16_t16_e64,
1321 AMDGPU::V_CMP_LT_F16_fake16_e64, AMDGPU::V_CMP_LT_F32_e64,
1322 AMDGPU::V_CMP_LT_F64_e64);
1324 return Select(AMDGPU::V_CMP_LE_F16_e64, AMDGPU::V_CMP_LE_F16_t16_e64,
1325 AMDGPU::V_CMP_LE_F16_fake16_e64, AMDGPU::V_CMP_LE_F32_e64,
1326 AMDGPU::V_CMP_LE_F64_e64);
1328 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1329 AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
1330 AMDGPU::V_CMP_NEQ_F64_e64);
1332 return Select(AMDGPU::V_CMP_O_F16_e64, AMDGPU::V_CMP_O_F16_t16_e64,
1333 AMDGPU::V_CMP_O_F16_fake16_e64, AMDGPU::V_CMP_O_F32_e64,
1334 AMDGPU::V_CMP_O_F64_e64);
1336 return Select(AMDGPU::V_CMP_U_F16_e64, AMDGPU::V_CMP_U_F16_t16_e64,
1337 AMDGPU::V_CMP_U_F16_fake16_e64, AMDGPU::V_CMP_U_F32_e64,
1338 AMDGPU::V_CMP_U_F64_e64);
1340 return Select(AMDGPU::V_CMP_NLG_F16_e64, AMDGPU::V_CMP_NLG_F16_t16_e64,
1341 AMDGPU::V_CMP_NLG_F16_fake16_e64, AMDGPU::V_CMP_NLG_F32_e64,
1342 AMDGPU::V_CMP_NLG_F64_e64);
1344 return Select(AMDGPU::V_CMP_NLE_F16_e64, AMDGPU::V_CMP_NLE_F16_t16_e64,
1345 AMDGPU::V_CMP_NLE_F16_fake16_e64, AMDGPU::V_CMP_NLE_F32_e64,
1346 AMDGPU::V_CMP_NLE_F64_e64);
1348 return Select(AMDGPU::V_CMP_NLT_F16_e64, AMDGPU::V_CMP_NLT_F16_t16_e64,
1349 AMDGPU::V_CMP_NLT_F16_fake16_e64, AMDGPU::V_CMP_NLT_F32_e64,
1350 AMDGPU::V_CMP_NLT_F64_e64);
1352 return Select(AMDGPU::V_CMP_NGE_F16_e64, AMDGPU::V_CMP_NGE_F16_t16_e64,
1353 AMDGPU::V_CMP_NGE_F16_fake16_e64, AMDGPU::V_CMP_NGE_F32_e64,
1354 AMDGPU::V_CMP_NGE_F64_e64);
1356 return Select(AMDGPU::V_CMP_NGT_F16_e64, AMDGPU::V_CMP_NGT_F16_t16_e64,
1357 AMDGPU::V_CMP_NGT_F16_fake16_e64, AMDGPU::V_CMP_NGT_F32_e64,
1358 AMDGPU::V_CMP_NGT_F64_e64);
1360 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1361 AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
1362 AMDGPU::V_CMP_NEQ_F64_e64);
1364 return Select(AMDGPU::V_CMP_TRU_F16_e64, AMDGPU::V_CMP_TRU_F16_t16_e64,
1365 AMDGPU::V_CMP_TRU_F16_fake16_e64, AMDGPU::V_CMP_TRU_F32_e64,
1366 AMDGPU::V_CMP_TRU_F64_e64);
1368 return Select(AMDGPU::V_CMP_F_F16_e64, AMDGPU::V_CMP_F_F16_t16_e64,
1369 AMDGPU::V_CMP_F_F16_fake16_e64, AMDGPU::V_CMP_F_F32_e64,
1370 AMDGPU::V_CMP_F_F64_e64);
1375 unsigned Size)
const {
1377 if (!STI.hasScalarCompareEq64())
1382 return AMDGPU::S_CMP_LG_U64;
1384 return AMDGPU::S_CMP_EQ_U64;
1393 return AMDGPU::S_CMP_LG_U32;
1395 return AMDGPU::S_CMP_EQ_U32;
1397 return AMDGPU::S_CMP_GT_I32;
1399 return AMDGPU::S_CMP_GE_I32;
1401 return AMDGPU::S_CMP_LT_I32;
1403 return AMDGPU::S_CMP_LE_I32;
1405 return AMDGPU::S_CMP_GT_U32;
1407 return AMDGPU::S_CMP_GE_U32;
1409 return AMDGPU::S_CMP_LT_U32;
1411 return AMDGPU::S_CMP_LE_U32;
1413 return AMDGPU::S_CMP_EQ_F32;
1415 return AMDGPU::S_CMP_GT_F32;
1417 return AMDGPU::S_CMP_GE_F32;
1419 return AMDGPU::S_CMP_LT_F32;
1421 return AMDGPU::S_CMP_LE_F32;
1423 return AMDGPU::S_CMP_LG_F32;
1425 return AMDGPU::S_CMP_O_F32;
1427 return AMDGPU::S_CMP_U_F32;
1429 return AMDGPU::S_CMP_NLG_F32;
1431 return AMDGPU::S_CMP_NLE_F32;
1433 return AMDGPU::S_CMP_NLT_F32;
1435 return AMDGPU::S_CMP_NGE_F32;
1437 return AMDGPU::S_CMP_NGT_F32;
1439 return AMDGPU::S_CMP_NEQ_F32;
1446 if (!STI.hasSALUFloatInsts())
1451 return AMDGPU::S_CMP_EQ_F16;
1453 return AMDGPU::S_CMP_GT_F16;
1455 return AMDGPU::S_CMP_GE_F16;
1457 return AMDGPU::S_CMP_LT_F16;
1459 return AMDGPU::S_CMP_LE_F16;
1461 return AMDGPU::S_CMP_LG_F16;
1463 return AMDGPU::S_CMP_O_F16;
1465 return AMDGPU::S_CMP_U_F16;
1467 return AMDGPU::S_CMP_NLG_F16;
1469 return AMDGPU::S_CMP_NLE_F16;
1471 return AMDGPU::S_CMP_NLT_F16;
1473 return AMDGPU::S_CMP_NGE_F16;
1475 return AMDGPU::S_CMP_NGT_F16;
1477 return AMDGPU::S_CMP_NEQ_F16;
1486bool AMDGPUInstructionSelector::selectG_ICMP_or_FCMP(
MachineInstr &
I)
const {
1491 Register SrcReg =
I.getOperand(2).getReg();
1492 unsigned Size = RBI.getSizeInBits(SrcReg, *MRI, TRI);
1496 Register CCReg =
I.getOperand(0).getReg();
1497 if (!isVCC(CCReg, *MRI)) {
1498 int Opcode = getS_CMPOpcode(Pred,
Size);
1501 MachineInstr *ICmp =
BuildMI(*BB, &
I,
DL, TII.get(Opcode))
1502 .
add(
I.getOperand(2))
1503 .
add(
I.getOperand(3));
1504 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CCReg)
1508 RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, *MRI);
1509 I.eraseFromParent();
1513 if (
I.getOpcode() == AMDGPU::G_FCMP)
1520 MachineInstrBuilder ICmp;
1523 ICmp =
BuildMI(*BB, &
I,
DL, TII.get(Opcode),
I.getOperand(0).getReg())
1525 .
add(
I.getOperand(2))
1527 .
add(
I.getOperand(3))
1530 ICmp =
BuildMI(*BB, &
I,
DL, TII.get(Opcode),
I.getOperand(0).getReg())
1531 .
add(
I.getOperand(2))
1532 .
add(
I.getOperand(3));
1536 *TRI.getBoolRC(), *MRI);
1538 I.eraseFromParent();
1542bool AMDGPUInstructionSelector::selectIntrinsicCmp(
MachineInstr &
I)
const {
1543 Register Dst =
I.getOperand(0).getReg();
1544 if (isVCC(Dst, *MRI))
1547 LLT DstTy = MRI->getType(Dst);
1553 Register SrcReg =
I.getOperand(2).getReg();
1554 unsigned Size = RBI.getSizeInBits(SrcReg, *MRI, TRI);
1562 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), Dst);
1563 I.eraseFromParent();
1564 return RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI);
1571 MachineInstrBuilder SelectedMI;
1572 MachineOperand &
LHS =
I.getOperand(2);
1573 MachineOperand &
RHS =
I.getOperand(3);
1574 auto [Src0, Src0Mods] = selectVOP3ModsImpl(
LHS.getReg());
1575 auto [Src1, Src1Mods] = selectVOP3ModsImpl(
RHS.getReg());
1577 copyToVGPRIfSrcFolded(Src0, Src0Mods,
LHS, &
I,
true);
1579 copyToVGPRIfSrcFolded(Src1, Src1Mods,
RHS, &
I,
true);
1580 SelectedMI =
BuildMI(*BB, &
I,
DL, TII.get(Opcode), Dst);
1582 SelectedMI.
addImm(Src0Mods);
1583 SelectedMI.
addReg(Src0Reg);
1585 SelectedMI.
addImm(Src1Mods);
1586 SelectedMI.
addReg(Src1Reg);
1592 RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI);
1595 I.eraseFromParent();
1606 if (
MI->getParent() !=
MBB)
1610 if (
MI->getOpcode() == AMDGPU::COPY) {
1613 if (DstRB && SrcRB && DstRB->
getID() == AMDGPU::VCCRegBankID &&
1614 SrcRB->getID() == AMDGPU::SGPRRegBankID)
1631bool AMDGPUInstructionSelector::selectBallot(
MachineInstr &
I)
const {
1634 Register DstReg =
I.getOperand(0).getReg();
1635 Register SrcReg =
I.getOperand(2).getReg();
1636 const unsigned BallotSize = MRI->getType(DstReg).getSizeInBits();
1637 const unsigned WaveSize = STI.getWavefrontSize();
1641 if (BallotSize != WaveSize && (BallotSize != 64 || WaveSize != 32))
1644 std::optional<ValueAndVReg> Arg =
1649 if (BallotSize != WaveSize) {
1650 Dst = MRI->createVirtualRegister(TRI.getBoolRC());
1654 const int64_t
Value = Arg->Value.getZExtValue();
1657 unsigned Opcode = WaveSize == 64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
1664 if (!RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI))
1670 if (!RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI))
1674 unsigned AndOpc = WaveSize == 64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
1684 if (BallotSize != WaveSize) {
1685 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1687 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
1694 I.eraseFromParent();
1698bool AMDGPUInstructionSelector::selectRelocConstant(
MachineInstr &
I)
const {
1699 Register DstReg =
I.getOperand(0).getReg();
1700 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
1701 const TargetRegisterClass *DstRC = TRI.getRegClassForSizeOnBank(32, *DstBank);
1702 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
1705 const bool IsVALU = DstBank->
getID() == AMDGPU::VGPRRegBankID;
1707 Module *
M =
MF->getFunction().getParent();
1708 const MDNode *
Metadata =
I.getOperand(2).getMetadata();
1715 TII.get(IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32), DstReg)
1718 I.eraseFromParent();
1722bool AMDGPUInstructionSelector::selectGroupStaticSize(
MachineInstr &
I)
const {
1725 Register DstReg =
I.getOperand(0).getReg();
1726 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
1727 unsigned Mov = DstRB->
getID() == AMDGPU::SGPRRegBankID ?
1728 AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1736 const SIMachineFunctionInfo *MFI =
MF->getInfo<SIMachineFunctionInfo>();
1739 Module *
M =
MF->getFunction().getParent();
1740 const GlobalValue *GV =
1745 I.eraseFromParent();
1750bool AMDGPUInstructionSelector::selectReturnAddress(
MachineInstr &
I)
const {
1755 MachineOperand &Dst =
I.getOperand(0);
1757 unsigned Depth =
I.getOperand(2).getImm();
1759 const TargetRegisterClass *RC
1760 = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
1762 !RBI.constrainGenericRegister(DstReg, *RC, *MRI))
1767 MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction()) {
1770 I.eraseFromParent();
1774 MachineFrameInfo &MFI =
MF.getFrameInfo();
1779 Register ReturnAddrReg = TRI.getReturnAddressReg(
MF);
1781 AMDGPU::SReg_64RegClass,
DL);
1784 I.eraseFromParent();
1788bool AMDGPUInstructionSelector::selectEndCfIntrinsic(
MachineInstr &
MI)
const {
1791 MachineBasicBlock *BB =
MI.getParent();
1792 BuildMI(*BB, &
MI,
MI.getDebugLoc(), TII.get(AMDGPU::SI_END_CF))
1793 .
add(
MI.getOperand(1));
1796 MI.eraseFromParent();
1798 if (!MRI->getRegClassOrNull(
Reg))
1799 MRI->setRegClass(
Reg, TRI.getWaveMaskRegClass());
1803bool AMDGPUInstructionSelector::selectDSOrderedIntrinsic(
1805 MachineBasicBlock *
MBB =
MI.getParent();
1809 unsigned IndexOperand =
MI.getOperand(7).getImm();
1810 bool WaveRelease =
MI.getOperand(8).getImm() != 0;
1811 bool WaveDone =
MI.getOperand(9).getImm() != 0;
1813 if (WaveDone && !WaveRelease) {
1817 Fn,
"ds_ordered_count: wave_done requires wave_release",
DL));
1820 unsigned OrderedCountIndex = IndexOperand & 0x3f;
1821 IndexOperand &= ~0x3f;
1822 unsigned CountDw = 0;
1825 CountDw = (IndexOperand >> 24) & 0xf;
1826 IndexOperand &= ~(0xf << 24);
1828 if (CountDw < 1 || CountDw > 4) {
1831 Fn,
"ds_ordered_count: dword count must be between 1 and 4",
DL));
1839 Fn,
"ds_ordered_count: bad index operand",
DL));
1842 unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1;
1845 unsigned Offset0 = OrderedCountIndex << 2;
1846 unsigned Offset1 = WaveRelease | (WaveDone << 1) | (Instruction << 4);
1849 Offset1 |= (CountDw - 1) << 6;
1852 Offset1 |= ShaderType << 2;
1854 unsigned Offset = Offset0 | (Offset1 << 8);
1862 MachineInstrBuilder
DS =
1863 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::DS_ORDERED_COUNT), DstReg)
1868 if (!RBI.constrainGenericRegister(M0Val, AMDGPU::SReg_32RegClass, *MRI))
1872 MI.eraseFromParent();
1878 case Intrinsic::amdgcn_ds_gws_init:
1879 return AMDGPU::DS_GWS_INIT;
1880 case Intrinsic::amdgcn_ds_gws_barrier:
1881 return AMDGPU::DS_GWS_BARRIER;
1882 case Intrinsic::amdgcn_ds_gws_sema_v:
1883 return AMDGPU::DS_GWS_SEMA_V;
1884 case Intrinsic::amdgcn_ds_gws_sema_br:
1885 return AMDGPU::DS_GWS_SEMA_BR;
1886 case Intrinsic::amdgcn_ds_gws_sema_p:
1887 return AMDGPU::DS_GWS_SEMA_P;
1888 case Intrinsic::amdgcn_ds_gws_sema_release_all:
1889 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
1895bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(
MachineInstr &
MI,
1897 if (!STI.hasGWS() || (IID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
1898 !STI.hasGWSSemaReleaseAll()))
1902 const bool HasVSrc =
MI.getNumOperands() == 3;
1903 assert(HasVSrc ||
MI.getNumOperands() == 2);
1905 Register BaseOffset =
MI.getOperand(HasVSrc ? 2 : 1).getReg();
1906 const RegisterBank *OffsetRB = RBI.getRegBank(BaseOffset, *MRI, TRI);
1907 if (OffsetRB->
getID() != AMDGPU::SGPRRegBankID)
1913 MachineBasicBlock *
MBB =
MI.getParent();
1916 MachineInstr *Readfirstlane =
nullptr;
1921 if (OffsetDef->
getOpcode() == AMDGPU::V_READFIRSTLANE_B32) {
1922 Readfirstlane = OffsetDef;
1927 if (OffsetDef->
getOpcode() == AMDGPU::G_CONSTANT) {
1937 std::tie(BaseOffset, ImmOffset) =
1940 if (Readfirstlane) {
1943 if (!RBI.constrainGenericRegister(BaseOffset, AMDGPU::VGPR_32RegClass, *MRI))
1949 if (!RBI.constrainGenericRegister(BaseOffset,
1950 AMDGPU::SReg_32RegClass, *MRI))
1954 Register M0Base = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1969 const MCInstrDesc &InstrDesc = TII.get(
Opc);
1974 int Data0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
1975 const TargetRegisterClass *DataRC = TII.getRegClass(InstrDesc, Data0Idx);
1976 const TargetRegisterClass *SubRC =
1977 TRI.getSubRegisterClass(DataRC, AMDGPU::sub0);
1981 if (!RBI.constrainGenericRegister(VSrc, *DataRC, *MRI))
1991 Register DataReg = MRI->createVirtualRegister(DataRC);
1992 if (!RBI.constrainGenericRegister(VSrc, *SubRC, *MRI))
1995 Register UndefReg = MRI->createVirtualRegister(SubRC);
2014 MI.eraseFromParent();
2018bool AMDGPUInstructionSelector::selectDSAppendConsume(
MachineInstr &
MI,
2019 bool IsAppend)
const {
2020 Register PtrBase =
MI.getOperand(2).getReg();
2021 LLT PtrTy = MRI->getType(PtrBase);
2025 std::tie(PtrBase,
Offset) = selectDS1Addr1OffsetImpl(
MI.getOperand(2));
2028 if (!isDSOffsetLegal(PtrBase,
Offset)) {
2029 PtrBase =
MI.getOperand(2).getReg();
2033 MachineBasicBlock *
MBB =
MI.getParent();
2035 const unsigned Opc = IsAppend ? AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
2039 if (!RBI.constrainGenericRegister(PtrBase, AMDGPU::SReg_32RegClass, *MRI))
2046 MI.eraseFromParent();
2051bool AMDGPUInstructionSelector::selectInitWholeWave(
MachineInstr &
MI)
const {
2052 MachineFunction *
MF =
MI.getMF();
2053 SIMachineFunctionInfo *MFInfo =
MF->getInfo<SIMachineFunctionInfo>();
2064 TFE = TexFailCtrl & 0x1;
2066 LWE = TexFailCtrl & 0x2;
2069 return TexFailCtrl == 0;
2072bool AMDGPUInstructionSelector::selectImageIntrinsic(
2074 MachineBasicBlock *
MBB =
MI.getParent();
2080 Register ResultDef =
MI.getOperand(0).getReg();
2081 if (MRI->use_nodbg_empty(ResultDef))
2085 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
2093 const unsigned ArgOffset =
MI.getNumExplicitDefs() + 1;
2095 Register VDataIn = AMDGPU::NoRegister;
2096 Register VDataOut = AMDGPU::NoRegister;
2098 int NumVDataDwords = -1;
2099 bool IsD16 =
MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16 ||
2100 MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16;
2106 Unorm =
MI.getOperand(ArgOffset + Intr->
UnormIndex).getImm() != 0;
2110 bool IsTexFail =
false;
2112 TFE, LWE, IsTexFail))
2115 const int Flags =
MI.getOperand(ArgOffset + Intr->
NumArgs).getImm();
2116 const bool IsA16 = (
Flags & 1) != 0;
2117 const bool IsG16 = (
Flags & 2) != 0;
2120 if (IsA16 && !STI.hasG16() && !IsG16)
2124 unsigned DMaskLanes = 0;
2126 if (BaseOpcode->
Atomic) {
2128 VDataOut =
MI.getOperand(0).getReg();
2129 VDataIn =
MI.getOperand(2).getReg();
2130 LLT Ty = MRI->getType(VDataIn);
2133 const bool Is64Bit = BaseOpcode->
AtomicX2 ?
2138 assert(
MI.getOperand(3).getReg() == AMDGPU::NoRegister);
2140 DMask = Is64Bit ? 0xf : 0x3;
2141 NumVDataDwords = Is64Bit ? 4 : 2;
2143 DMask = Is64Bit ? 0x3 : 0x1;
2144 NumVDataDwords = Is64Bit ? 2 : 1;
2147 DMask =
MI.getOperand(ArgOffset + Intr->
DMaskIndex).getImm();
2150 if (BaseOpcode->
Store) {
2151 VDataIn =
MI.getOperand(1).getReg();
2152 VDataTy = MRI->getType(VDataIn);
2157 VDataOut =
MI.getOperand(0).getReg();
2158 VDataTy = MRI->getType(VDataOut);
2159 NumVDataDwords = DMaskLanes;
2161 if (IsD16 && !STI.hasUnpackedD16VMem())
2162 NumVDataDwords = (DMaskLanes + 1) / 2;
2167 if (Subtarget->hasG16() && IsG16) {
2168 const AMDGPU::MIMGG16MappingInfo *G16MappingInfo =
2171 IntrOpcode = G16MappingInfo->
G16;
2175 assert((!IsTexFail || DMaskLanes >= 1) &&
"should have legalized this");
2185 int NumVAddrRegs = 0;
2186 int NumVAddrDwords = 0;
2189 MachineOperand &AddrOp =
MI.getOperand(ArgOffset +
I);
2190 if (!AddrOp.
isReg())
2198 NumVAddrDwords += (MRI->getType(Addr).getSizeInBits() + 31) / 32;
2205 NumVAddrRegs != 1 &&
2206 (STI.hasPartialNSAEncoding() ? NumVAddrDwords >= NumVAddrRegs
2207 : NumVAddrDwords == NumVAddrRegs);
2208 if (UseNSA && !STI.hasFeature(AMDGPU::FeatureNSAEncoding)) {
2219 NumVDataDwords, NumVAddrDwords);
2220 }
else if (IsGFX11Plus) {
2222 UseNSA ? AMDGPU::MIMGEncGfx11NSA
2223 : AMDGPU::MIMGEncGfx11Default,
2224 NumVDataDwords, NumVAddrDwords);
2225 }
else if (IsGFX10Plus) {
2227 UseNSA ? AMDGPU::MIMGEncGfx10NSA
2228 : AMDGPU::MIMGEncGfx10Default,
2229 NumVDataDwords, NumVAddrDwords);
2231 if (Subtarget->hasGFX90AInsts()) {
2233 NumVDataDwords, NumVAddrDwords);
2237 <<
"requested image instruction is not supported on this GPU\n");
2244 NumVDataDwords, NumVAddrDwords);
2247 NumVDataDwords, NumVAddrDwords);
2257 const bool Is64 = MRI->getType(VDataOut).getSizeInBits() == 64;
2259 Register TmpReg = MRI->createVirtualRegister(
2260 Is64 ? &AMDGPU::VReg_128RegClass : &AMDGPU::VReg_64RegClass);
2261 unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
2264 if (!MRI->use_empty(VDataOut)) {
2277 for (
int I = 0;
I != NumVAddrRegs; ++
I) {
2278 MachineOperand &SrcOp =
MI.getOperand(ArgOffset + Intr->
VAddrStart +
I);
2279 if (SrcOp.
isReg()) {
2298 STI.hasFeature(AMDGPU::FeatureR128A16) ? -1 : 0);
2300 MIB.
addImm(IsA16 ? -1 : 0);
2302 if (!Subtarget->hasGFX90AInsts()) {
2314 MIB.
addImm(IsD16 ? -1 : 0);
2316 MI.eraseFromParent();
2318 TII.enforceOperandRCAlignment(*MIB, AMDGPU::OpName::vaddr);
2324bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic(
2330 MachineBasicBlock *
MBB =
MI.getParent();
2335 unsigned Offset =
MI.getOperand(6).getImm();
2339 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2340 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
2341 Opc = AMDGPU::DS_BVH_STACK_RTN_B32;
2343 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
2344 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP1_RTN_B32;
2346 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
2347 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP2_RTN_B64;
2359 MI.eraseFromParent();
2364bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
2367 switch (IntrinsicID) {
2368 case Intrinsic::amdgcn_end_cf:
2369 return selectEndCfIntrinsic(
I);
2370 case Intrinsic::amdgcn_ds_ordered_add:
2371 case Intrinsic::amdgcn_ds_ordered_swap:
2372 return selectDSOrderedIntrinsic(
I, IntrinsicID);
2373 case Intrinsic::amdgcn_ds_gws_init:
2374 case Intrinsic::amdgcn_ds_gws_barrier:
2375 case Intrinsic::amdgcn_ds_gws_sema_v:
2376 case Intrinsic::amdgcn_ds_gws_sema_br:
2377 case Intrinsic::amdgcn_ds_gws_sema_p:
2378 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2379 return selectDSGWSIntrinsic(
I, IntrinsicID);
2380 case Intrinsic::amdgcn_ds_append:
2381 return selectDSAppendConsume(
I,
true);
2382 case Intrinsic::amdgcn_ds_consume:
2383 return selectDSAppendConsume(
I,
false);
2384 case Intrinsic::amdgcn_init_whole_wave:
2385 return selectInitWholeWave(
I);
2386 case Intrinsic::amdgcn_raw_buffer_load_lds:
2387 case Intrinsic::amdgcn_raw_buffer_load_async_lds:
2388 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
2389 case Intrinsic::amdgcn_raw_ptr_buffer_load_async_lds:
2390 case Intrinsic::amdgcn_struct_buffer_load_lds:
2391 case Intrinsic::amdgcn_struct_buffer_load_async_lds:
2392 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds:
2393 case Intrinsic::amdgcn_struct_ptr_buffer_load_async_lds:
2394 return selectBufferLoadLds(
I);
2399 case Intrinsic::amdgcn_load_to_lds:
2400 case Intrinsic::amdgcn_load_async_to_lds:
2401 case Intrinsic::amdgcn_global_load_lds:
2402 case Intrinsic::amdgcn_global_load_async_lds:
2403 return selectGlobalLoadLds(
I);
2404 case Intrinsic::amdgcn_tensor_load_to_lds:
2405 case Intrinsic::amdgcn_tensor_store_from_lds:
2406 return selectTensorLoadStore(
I, IntrinsicID);
2407 case Intrinsic::amdgcn_asyncmark:
2408 case Intrinsic::amdgcn_wait_asyncmark:
2409 if (!Subtarget->hasAsyncMark())
2412 case Intrinsic::amdgcn_exp_compr:
2413 if (!STI.hasCompressedExport()) {
2415 F.getContext().diagnose(
2416 DiagnosticInfoUnsupported(
F,
"intrinsic not supported on subtarget",
2421 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2422 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
2423 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
2424 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
2425 return selectDSBvhStackIntrinsic(
I);
2426 case Intrinsic::amdgcn_s_alloc_vgpr: {
2432 Register ResReg =
I.getOperand(0).getReg();
2434 MachineInstr *AllocMI =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::S_ALLOC_VGPR))
2435 .
add(
I.getOperand(2));
2438 I.eraseFromParent();
2440 return RBI.constrainGenericRegister(ResReg, AMDGPU::SReg_32RegClass, *MRI);
2442 case Intrinsic::amdgcn_s_barrier_init:
2443 case Intrinsic::amdgcn_s_barrier_signal_var:
2444 return selectNamedBarrierInit(
I, IntrinsicID);
2445 case Intrinsic::amdgcn_s_wakeup_barrier: {
2446 if (!STI.hasSWakeupBarrier()) {
2448 F.getContext().diagnose(
2449 DiagnosticInfoUnsupported(
F,
"intrinsic not supported on subtarget",
2453 return selectNamedBarrierInst(
I, IntrinsicID);
2455 case Intrinsic::amdgcn_s_barrier_join:
2456 case Intrinsic::amdgcn_s_get_named_barrier_state:
2457 return selectNamedBarrierInst(
I, IntrinsicID);
2458 case Intrinsic::amdgcn_s_get_barrier_state:
2459 return selectSGetBarrierState(
I, IntrinsicID);
2460 case Intrinsic::amdgcn_s_barrier_signal_isfirst:
2461 return selectSBarrierSignalIsfirst(
I, IntrinsicID);
2466bool AMDGPUInstructionSelector::selectG_SELECT(
MachineInstr &
I)
const {
2473 Register DstReg =
I.getOperand(0).getReg();
2474 unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI);
2476 const MachineOperand &CCOp =
I.getOperand(1);
2478 if (!isVCC(CCReg, *MRI)) {
2479 unsigned SelectOpcode =
Size == 64 ? AMDGPU::S_CSELECT_B64 :
2480 AMDGPU::S_CSELECT_B32;
2481 MachineInstr *CopySCC =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
2487 if (!MRI->getRegClassOrNull(CCReg))
2488 MRI->setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, *MRI));
2490 .
add(
I.getOperand(2))
2491 .
add(
I.getOperand(3));
2495 I.eraseFromParent();
2504 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
2506 .
add(
I.getOperand(3))
2508 .
add(
I.getOperand(2))
2509 .
add(
I.getOperand(1));
2512 I.eraseFromParent();
2516bool AMDGPUInstructionSelector::selectG_TRUNC(
MachineInstr &
I)
const {
2517 Register DstReg =
I.getOperand(0).getReg();
2518 Register SrcReg =
I.getOperand(1).getReg();
2519 const LLT DstTy = MRI->getType(DstReg);
2520 const LLT SrcTy = MRI->getType(SrcReg);
2523 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
2524 const RegisterBank *DstRB;
2530 DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
2535 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2540 const TargetRegisterClass *SrcRC =
2541 TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB);
2542 const TargetRegisterClass *DstRC =
2543 TRI.getRegClassForSizeOnBank(DstSize, *DstRB);
2544 if (!SrcRC || !DstRC)
2547 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
2548 !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI)) {
2553 if (DstRC == &AMDGPU::VGPR_16RegClass && SrcSize == 32) {
2554 assert(STI.useRealTrue16Insts());
2558 .
addReg(SrcReg, {}, AMDGPU::lo16);
2559 I.eraseFromParent();
2567 Register LoReg = MRI->createVirtualRegister(DstRC);
2568 Register HiReg = MRI->createVirtualRegister(DstRC);
2570 .
addReg(SrcReg, {}, AMDGPU::sub0);
2572 .
addReg(SrcReg, {}, AMDGPU::sub1);
2574 if (IsVALU && STI.hasSDWA()) {
2577 MachineInstr *MovSDWA =
2578 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg)
2588 Register TmpReg0 = MRI->createVirtualRegister(DstRC);
2589 Register TmpReg1 = MRI->createVirtualRegister(DstRC);
2590 Register ImmReg = MRI->createVirtualRegister(DstRC);
2592 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), TmpReg0)
2602 unsigned MovOpc = IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
2603 unsigned AndOpc = IsVALU ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
2604 unsigned OrOpc = IsVALU ? AMDGPU::V_OR_B32_e64 : AMDGPU::S_OR_B32;
2616 And.setOperandDead(3);
2617 Or.setOperandDead(3);
2621 I.eraseFromParent();
2629 unsigned SubRegIdx = DstSize < 32
2630 ?
static_cast<unsigned>(AMDGPU::sub0)
2631 : TRI.getSubRegFromChannel(0, DstSize / 32);
2632 if (SubRegIdx == AMDGPU::NoSubRegister)
2637 const TargetRegisterClass *SrcWithSubRC
2638 = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
2642 if (SrcWithSubRC != SrcRC) {
2643 if (!RBI.constrainGenericRegister(SrcReg, *SrcWithSubRC, *MRI))
2647 I.getOperand(1).setSubReg(SubRegIdx);
2650 I.setDesc(TII.get(TargetOpcode::COPY));
2657 int SignedMask =
static_cast<int>(Mask);
2658 return SignedMask >= -16 && SignedMask <= 64;
2662const RegisterBank *AMDGPUInstructionSelector::getArtifactRegBank(
2671 return &RBI.getRegBankFromRegClass(*RC, LLT());
2675bool AMDGPUInstructionSelector::selectG_SZA_EXT(
MachineInstr &
I)
const {
2676 bool InReg =
I.getOpcode() == AMDGPU::G_SEXT_INREG;
2677 bool Signed =
I.getOpcode() == AMDGPU::G_SEXT || InReg;
2680 const Register DstReg =
I.getOperand(0).getReg();
2681 const Register SrcReg =
I.getOperand(1).getReg();
2683 const LLT DstTy = MRI->getType(DstReg);
2684 const LLT SrcTy = MRI->getType(SrcReg);
2685 const unsigned SrcSize =
I.getOpcode() == AMDGPU::G_SEXT_INREG ?
2692 const RegisterBank *SrcBank = getArtifactRegBank(SrcReg, *MRI, TRI);
2695 if (
I.getOpcode() == AMDGPU::G_ANYEXT) {
2697 return selectCOPY(
I);
2699 const TargetRegisterClass *SrcRC =
2700 TRI.getRegClassForTypeOnBank(SrcTy, *SrcBank);
2701 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
2702 const TargetRegisterClass *DstRC =
2703 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
2705 Register UndefReg = MRI->createVirtualRegister(SrcRC);
2706 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2712 I.eraseFromParent();
2714 return RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) &&
2715 RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI);
2718 if (SrcBank->
getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
2724 MachineInstr *ExtI =
2728 I.eraseFromParent();
2733 const unsigned BFE =
Signed ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2734 MachineInstr *ExtI =
2739 I.eraseFromParent();
2744 if (SrcBank->
getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
2745 const TargetRegisterClass &SrcRC = InReg && DstSize > 32 ?
2746 AMDGPU::SReg_64RegClass : AMDGPU::SReg_32RegClass;
2747 if (!RBI.constrainGenericRegister(SrcReg, SrcRC, *MRI))
2750 if (
Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
2751 const unsigned SextOpc = SrcSize == 8 ?
2752 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
2755 I.eraseFromParent();
2756 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
2761 if (DstSize > 32 && SrcSize == 32) {
2762 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2763 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2766 .
addReg(SrcReg, {}, SubReg)
2774 .
addReg(SrcReg, {}, SubReg)
2775 .addImm(AMDGPU::sub0)
2778 I.eraseFromParent();
2779 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass,
2783 const unsigned BFE64 =
Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
2784 const unsigned BFE32 =
Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2787 if (DstSize > 32 && (SrcSize <= 32 || InReg)) {
2789 Register ExtReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
2790 Register UndefReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2791 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2793 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2795 .
addReg(SrcReg, {}, SubReg)
2796 .addImm(AMDGPU::sub0)
2804 I.eraseFromParent();
2805 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, *MRI);
2820 I.eraseFromParent();
2821 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
2855 if (Shuffle->
getOpcode() != AMDGPU::G_SHUFFLE_VECTOR)
2862 assert(Mask.size() == 2);
2864 if (Mask[0] == 1 && Mask[1] <= 1) {
2872bool AMDGPUInstructionSelector::selectG_FPEXT(
MachineInstr &
I)
const {
2873 if (!Subtarget->hasSALUFloatInsts())
2876 Register Dst =
I.getOperand(0).getReg();
2877 const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
2878 if (DstRB->
getID() != AMDGPU::SGPRRegBankID)
2881 Register Src =
I.getOperand(1).getReg();
2887 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_CVT_HI_F32_F16), Dst)
2889 I.eraseFromParent();
2890 return RBI.constrainGenericRegister(Dst, AMDGPU::SReg_32RegClass, *MRI);
2897bool AMDGPUInstructionSelector::selectG_FNEG(
MachineInstr &
MI)
const {
2910 const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
2911 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2916 MachineInstr *Fabs =
getOpcodeDef(TargetOpcode::G_FABS, Src, *MRI);
2920 if (!RBI.constrainGenericRegister(Src, AMDGPU::SReg_64RegClass, *MRI) ||
2921 !RBI.constrainGenericRegister(Dst, AMDGPU::SReg_64RegClass, *MRI))
2924 MachineBasicBlock *BB =
MI.getParent();
2926 Register LoReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2927 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2928 Register ConstReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2929 Register OpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2931 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2932 .
addReg(Src, {}, AMDGPU::sub0);
2933 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2934 .
addReg(Src, {}, AMDGPU::sub1);
2935 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2939 unsigned Opc = Fabs ? AMDGPU::S_OR_B32 : AMDGPU::S_XOR_B32;
2944 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2949 MI.eraseFromParent();
2954bool AMDGPUInstructionSelector::selectG_FABS(
MachineInstr &
MI)
const {
2956 const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
2957 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2962 MachineBasicBlock *BB =
MI.getParent();
2964 Register LoReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2965 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2966 Register ConstReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2967 Register OpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2969 if (!RBI.constrainGenericRegister(Src, AMDGPU::SReg_64RegClass, *MRI) ||
2970 !RBI.constrainGenericRegister(Dst, AMDGPU::SReg_64RegClass, *MRI))
2973 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2974 .
addReg(Src, {}, AMDGPU::sub0);
2975 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2976 .
addReg(Src, {}, AMDGPU::sub1);
2977 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2982 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_AND_B32), OpReg)
2986 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2992 MI.eraseFromParent();
2997 return MI.getOpcode() == TargetOpcode::G_CONSTANT;
3000void AMDGPUInstructionSelector::getAddrModeInfo(
const MachineInstr &Load,
3003 unsigned OpNo =
Load.getOpcode() == AMDGPU::G_PREFETCH ? 0 : 1;
3004 const MachineInstr *PtrMI =
3005 MRI.getUniqueVRegDef(
Load.getOperand(OpNo).getReg());
3009 if (PtrMI->
getOpcode() != TargetOpcode::G_PTR_ADD)
3014 for (
unsigned i = 1; i != 3; ++i) {
3015 const MachineOperand &GEPOp = PtrMI->
getOperand(i);
3016 const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.
getReg());
3021 assert(GEPInfo.Imm == 0);
3025 const RegisterBank *OpBank = RBI.getRegBank(GEPOp.
getReg(), MRI, TRI);
3026 if (OpBank->
getID() == AMDGPU::SGPRRegBankID)
3027 GEPInfo.SgprParts.push_back(GEPOp.
getReg());
3029 GEPInfo.VgprParts.push_back(GEPOp.
getReg());
3033 getAddrModeInfo(*PtrMI, MRI, AddrInfo);
3036bool AMDGPUInstructionSelector::isSGPR(
Register Reg)
const {
3037 return RBI.getRegBank(
Reg, *MRI, TRI)->getID() == AMDGPU::SGPRRegBankID;
3040bool AMDGPUInstructionSelector::isInstrUniform(
const MachineInstr &
MI)
const {
3041 if (!
MI.hasOneMemOperand())
3044 const MachineMemOperand *MMO = *
MI.memoperands_begin();
3057 if (
MI.getOpcode() == AMDGPU::G_PREFETCH)
3058 return RBI.getRegBank(
MI.getOperand(0).getReg(), *MRI, TRI)->getID() ==
3059 AMDGPU::SGPRRegBankID;
3062 return I &&
I->getMetadata(
"amdgpu.uniform");
3066 for (
const GEPInfo &GEPInfo : AddrInfo) {
3067 if (!GEPInfo.VgprParts.empty())
3073void AMDGPUInstructionSelector::initM0(
MachineInstr &
I)
const {
3074 const LLT PtrTy = MRI->getType(
I.getOperand(1).getReg());
3077 STI.ldsRequiresM0Init()) {
3081 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
3086bool AMDGPUInstructionSelector::selectG_LOAD_STORE_ATOMICRMW(
3093 if (
Reg.isPhysical())
3097 const unsigned Opcode =
MI.getOpcode();
3099 if (Opcode == AMDGPU::COPY)
3102 if (Opcode == AMDGPU::G_AND || Opcode == AMDGPU::G_OR ||
3103 Opcode == AMDGPU::G_XOR)
3108 return GI->is(Intrinsic::amdgcn_class);
3110 return Opcode == AMDGPU::G_ICMP || Opcode == AMDGPU::G_FCMP;
3113bool AMDGPUInstructionSelector::selectG_BRCOND(
MachineInstr &
I)
const {
3115 MachineOperand &CondOp =
I.getOperand(0);
3121 const TargetRegisterClass *ConstrainRC;
3128 if (!isVCC(CondReg, *MRI)) {
3132 CondPhysReg = AMDGPU::SCC;
3133 BrOpcode = AMDGPU::S_CBRANCH_SCC1;
3134 ConstrainRC = &AMDGPU::SReg_32RegClass;
3141 const bool Is64 = STI.isWave64();
3142 const unsigned Opcode = Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
3143 const Register Exec = Is64 ? AMDGPU::EXEC : AMDGPU::EXEC_LO;
3145 Register TmpReg = MRI->createVirtualRegister(TRI.getBoolRC());
3146 BuildMI(*BB, &
I,
DL, TII.get(Opcode), TmpReg)
3153 CondPhysReg = TRI.getVCC();
3154 BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
3155 ConstrainRC = TRI.getBoolRC();
3158 if (!MRI->getRegClassOrNull(CondReg))
3159 MRI->setRegClass(CondReg, ConstrainRC);
3161 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CondPhysReg)
3164 .
addMBB(
I.getOperand(1).getMBB());
3166 I.eraseFromParent();
3170bool AMDGPUInstructionSelector::selectG_GLOBAL_VALUE(
3172 Register DstReg =
I.getOperand(0).getReg();
3173 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3174 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3175 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32));
3179 return RBI.constrainGenericRegister(
3180 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, *MRI);
3183bool AMDGPUInstructionSelector::selectG_PTRMASK(
MachineInstr &
I)
const {
3184 Register DstReg =
I.getOperand(0).getReg();
3185 Register SrcReg =
I.getOperand(1).getReg();
3186 Register MaskReg =
I.getOperand(2).getReg();
3187 LLT Ty = MRI->getType(DstReg);
3188 LLT MaskTy = MRI->getType(MaskReg);
3192 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3193 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
3194 const RegisterBank *MaskRB = RBI.getRegBank(MaskReg, *MRI, TRI);
3195 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3201 APInt MaskOnes =
VT->getKnownOnes(MaskReg).zext(64);
3205 const bool CanCopyLow32 = (MaskOnes & MaskLo32) == MaskLo32;
3206 const bool CanCopyHi32 = (MaskOnes & MaskHi32) == MaskHi32;
3209 !CanCopyLow32 && !CanCopyHi32) {
3210 auto MIB =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_AND_B64), DstReg)
3214 I.eraseFromParent();
3219 unsigned NewOpc = IsVGPR ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
3220 const TargetRegisterClass &RegRC
3221 = IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
3223 const TargetRegisterClass *DstRC = TRI.getRegClassForTypeOnBank(Ty, *DstRB);
3224 const TargetRegisterClass *SrcRC = TRI.getRegClassForTypeOnBank(Ty, *SrcRB);
3225 const TargetRegisterClass *MaskRC =
3226 TRI.getRegClassForTypeOnBank(MaskTy, *MaskRB);
3228 if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
3229 !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
3230 !RBI.constrainGenericRegister(MaskReg, *MaskRC, *MRI))
3235 "ptrmask should have been narrowed during legalize");
3237 auto NewOp =
BuildMI(*BB, &
I,
DL, TII.get(NewOpc), DstReg)
3243 I.eraseFromParent();
3247 Register HiReg = MRI->createVirtualRegister(&RegRC);
3248 Register LoReg = MRI->createVirtualRegister(&RegRC);
3251 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), LoReg)
3252 .
addReg(SrcReg, {}, AMDGPU::sub0);
3253 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), HiReg)
3254 .
addReg(SrcReg, {}, AMDGPU::sub1);
3263 Register MaskLo = MRI->createVirtualRegister(&RegRC);
3264 MaskedLo = MRI->createVirtualRegister(&RegRC);
3266 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskLo)
3267 .
addReg(MaskReg, {}, AMDGPU::sub0);
3268 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedLo)
3277 Register MaskHi = MRI->createVirtualRegister(&RegRC);
3278 MaskedHi = MRI->createVirtualRegister(&RegRC);
3280 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskHi)
3281 .
addReg(MaskReg, {}, AMDGPU::sub1);
3282 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedHi)
3287 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
3292 I.eraseFromParent();
3298static std::pair<Register, unsigned>
3305 std::tie(IdxBaseReg,
Offset) =
3307 if (IdxBaseReg == AMDGPU::NoRegister) {
3311 IdxBaseReg = IdxReg;
3318 if (
static_cast<unsigned>(
Offset) >= SubRegs.
size())
3319 return std::pair(IdxReg, SubRegs[0]);
3320 return std::pair(IdxBaseReg, SubRegs[
Offset]);
3323bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT(
3329 LLT DstTy = MRI->getType(DstReg);
3330 LLT SrcTy = MRI->getType(SrcReg);
3332 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3333 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
3334 const RegisterBank *IdxRB = RBI.getRegBank(IdxReg, *MRI, TRI);
3338 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3341 const TargetRegisterClass *SrcRC =
3342 TRI.getRegClassForTypeOnBank(SrcTy, *SrcRB);
3343 const TargetRegisterClass *DstRC =
3344 TRI.getRegClassForTypeOnBank(DstTy, *DstRB);
3345 if (!SrcRC || !DstRC)
3347 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
3348 !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
3349 !RBI.constrainGenericRegister(IdxReg, AMDGPU::SReg_32RegClass, *MRI))
3352 MachineBasicBlock *BB =
MI.getParent();
3360 if (SrcRB->
getID() == AMDGPU::SGPRRegBankID) {
3364 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3367 unsigned Opc = Is64 ? AMDGPU::S_MOVRELS_B64 : AMDGPU::S_MOVRELS_B32;
3369 .
addReg(SrcReg, {}, SubReg)
3371 MI.eraseFromParent();
3378 if (!STI.useVGPRIndexMode()) {
3379 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3381 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::V_MOVRELS_B32_e32), DstReg)
3382 .
addReg(SrcReg, {}, SubReg)
3384 MI.eraseFromParent();
3388 const MCInstrDesc &GPRIDXDesc =
3389 TII.getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*SrcRC),
true);
3395 MI.eraseFromParent();
3400bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT(
3407 LLT VecTy = MRI->getType(DstReg);
3408 LLT ValTy = MRI->getType(ValReg);
3412 const RegisterBank *VecRB = RBI.getRegBank(VecReg, *MRI, TRI);
3413 const RegisterBank *ValRB = RBI.getRegBank(ValReg, *MRI, TRI);
3414 const RegisterBank *IdxRB = RBI.getRegBank(IdxReg, *MRI, TRI);
3420 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3423 const TargetRegisterClass *VecRC =
3424 TRI.getRegClassForTypeOnBank(VecTy, *VecRB);
3425 const TargetRegisterClass *ValRC =
3426 TRI.getRegClassForTypeOnBank(ValTy, *ValRB);
3428 if (!RBI.constrainGenericRegister(VecReg, *VecRC, *MRI) ||
3429 !RBI.constrainGenericRegister(DstReg, *VecRC, *MRI) ||
3430 !RBI.constrainGenericRegister(ValReg, *ValRC, *MRI) ||
3431 !RBI.constrainGenericRegister(IdxReg, AMDGPU::SReg_32RegClass, *MRI))
3434 if (VecRB->
getID() == AMDGPU::VGPRRegBankID && ValSize != 32)
3438 std::tie(IdxReg, SubReg) =
3441 const bool IndexMode = VecRB->
getID() == AMDGPU::VGPRRegBankID &&
3442 STI.useVGPRIndexMode();
3444 MachineBasicBlock *BB =
MI.getParent();
3448 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3451 const MCInstrDesc &RegWriteOp = TII.getIndirectRegWriteMovRelPseudo(
3452 VecSize, ValSize, VecRB->
getID() == AMDGPU::SGPRRegBankID);
3457 MI.eraseFromParent();
3461 const MCInstrDesc &GPRIDXDesc =
3462 TII.getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*VecRC),
false);
3469 MI.eraseFromParent();
3475 case Intrinsic::amdgcn_raw_buffer_load_async_lds:
3476 case Intrinsic::amdgcn_raw_ptr_buffer_load_async_lds:
3477 case Intrinsic::amdgcn_struct_buffer_load_async_lds:
3478 case Intrinsic::amdgcn_struct_ptr_buffer_load_async_lds:
3479 case Intrinsic::amdgcn_load_async_to_lds:
3480 case Intrinsic::amdgcn_global_load_async_lds:
3486bool AMDGPUInstructionSelector::selectBufferLoadLds(
MachineInstr &
MI)
const {
3487 if (!Subtarget->hasVMemToLDSLoad())
3490 unsigned Size =
MI.getOperand(3).getImm();
3494 const bool HasVIndex =
MI.getNumOperands() == 9;
3498 VIndex =
MI.getOperand(4).getReg();
3502 Register VOffset =
MI.getOperand(4 + OpOffset).getReg();
3503 std::optional<ValueAndVReg> MaybeVOffset =
3505 const bool HasVOffset = !MaybeVOffset || MaybeVOffset->Value.getZExtValue();
3511 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_BOTHEN
3512 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_IDXEN
3513 : HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFEN
3514 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFSET;
3517 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_BOTHEN
3518 : AMDGPU::BUFFER_LOAD_USHORT_LDS_IDXEN
3519 : HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFEN
3520 : AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFSET;
3523 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_BOTHEN
3524 : AMDGPU::BUFFER_LOAD_DWORD_LDS_IDXEN
3525 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN
3526 : AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET;
3529 if (!Subtarget->hasLDSLoadB96_B128())
3532 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_BOTHEN
3533 : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_IDXEN
3534 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFEN
3535 : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFSET;
3538 if (!Subtarget->hasLDSLoadB96_B128())
3541 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_BOTHEN
3542 : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_IDXEN
3543 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFEN
3544 : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFSET;
3548 MachineBasicBlock *
MBB =
MI.getParent();
3551 .
add(
MI.getOperand(2));
3555 if (HasVIndex && HasVOffset) {
3556 Register IdxReg = MRI->createVirtualRegister(TRI.getVGPR64Class());
3557 BuildMI(*
MBB, &*MIB,
DL, TII.get(AMDGPU::REG_SEQUENCE), IdxReg)
3564 }
else if (HasVIndex) {
3566 }
else if (HasVOffset) {
3570 MIB.
add(
MI.getOperand(1));
3571 MIB.
add(
MI.getOperand(5 + OpOffset));
3572 MIB.
add(
MI.getOperand(6 + OpOffset));
3574 unsigned Aux =
MI.getOperand(7 + OpOffset).getImm();
3583 MachineMemOperand *LoadMMO = *
MI.memoperands_begin();
3588 MachinePointerInfo StorePtrI = LoadPtrI;
3599 MachineMemOperand *StoreMMO =
3605 MI.eraseFromParent();
3618 if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3624 return Def->getOperand(1).getReg();
3638 if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3646 return Def->getOperand(1).getReg();
3648 if (
VT->signBitIsZero(
Reg))
3649 return matchZeroExtendFromS32(
Reg);
3657AMDGPUInstructionSelector::matchZeroExtendFromS32OrS32(
Register Reg)
const {
3659 : matchZeroExtendFromS32(
Reg);
3665AMDGPUInstructionSelector::matchSignExtendFromS32OrS32(
Register Reg)
const {
3667 : matchSignExtendFromS32(
Reg);
3671AMDGPUInstructionSelector::matchExtendFromS32OrS32(
Register Reg,
3672 bool IsSigned)
const {
3674 return matchSignExtendFromS32OrS32(
Reg);
3676 return matchZeroExtendFromS32OrS32(
Reg);
3686 if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3693 return Def->getOperand(1).getReg();
3698bool AMDGPUInstructionSelector::selectGlobalLoadLds(
MachineInstr &
MI)
const{
3699 if (!Subtarget->hasVMemToLDSLoad())
3703 unsigned Size =
MI.getOperand(3).getImm();
3710 Opc = AMDGPU::GLOBAL_LOAD_LDS_UBYTE;
3713 Opc = AMDGPU::GLOBAL_LOAD_LDS_USHORT;
3716 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORD;
3719 if (!Subtarget->hasLDSLoadB96_B128())
3721 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX3;
3724 if (!Subtarget->hasLDSLoadB96_B128())
3726 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX4;
3730 MachineBasicBlock *
MBB =
MI.getParent();
3733 .
add(
MI.getOperand(2));
3739 if (!isSGPR(Addr)) {
3741 if (isSGPR(AddrDef->Reg)) {
3742 Addr = AddrDef->Reg;
3743 }
else if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
3746 if (isSGPR(SAddr)) {
3747 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
3748 if (
Register Off = matchZeroExtendFromS32(PtrBaseOffset)) {
3759 VOffset = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3771 MIB.
add(
MI.getOperand(4));
3773 unsigned Aux =
MI.getOperand(5).getImm();
3777 MachineMemOperand *LoadMMO = *
MI.memoperands_begin();
3779 LoadPtrI.
Offset =
MI.getOperand(4).getImm();
3780 MachinePointerInfo StorePtrI = LoadPtrI;
3789 MachineMemOperand *StoreMMO =
3791 sizeof(int32_t),
Align(4));
3795 MI.eraseFromParent();
3800bool AMDGPUInstructionSelector::selectTensorLoadStore(
MachineInstr &
MI,
3802 bool IsLoad = IID == Intrinsic::amdgcn_tensor_load_to_lds;
3804 IsLoad ? AMDGPU::TENSOR_LOAD_TO_LDS_d4 : AMDGPU::TENSOR_STORE_FROM_LDS_d4;
3808 const auto isAllZeros = [&](MachineOperand &Opnd) {
3809 const MachineInstr *
DefMI = MRI->getVRegDef(Opnd.getReg());
3818 Opc = IsLoad ? AMDGPU::TENSOR_LOAD_TO_LDS_d2
3819 : AMDGPU::TENSOR_STORE_FROM_LDS_d2;
3824 MachineBasicBlock *
MBB =
MI.getParent();
3826 .
add(
MI.getOperand(1))
3827 .
add(
MI.getOperand(2));
3829 if (NumGroups >= 4) {
3830 MIB.
add(
MI.getOperand(3))
3831 .
add(
MI.getOperand(4));
3835 .
add(
MI.getOperand(6));
3837 MI.eraseFromParent();
3841bool AMDGPUInstructionSelector::selectBVHIntersectRayIntrinsic(
3843 unsigned OpcodeOpIdx =
3844 MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY ? 1 : 3;
3845 MI.setDesc(TII.get(
MI.getOperand(OpcodeOpIdx).getImm()));
3846 MI.removeOperand(OpcodeOpIdx);
3847 MI.addImplicitDefUseOperands(*
MI.getMF());
3854bool AMDGPUInstructionSelector::selectSMFMACIntrin(
MachineInstr &
MI)
const {
3857 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
3858 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_F16_e64;
3860 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
3861 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_F16_e64;
3863 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
3864 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_BF16_e64;
3866 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
3867 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_BF16_e64;
3869 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
3870 Opc = AMDGPU::V_SMFMAC_I32_16X16X64_I8_e64;
3872 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
3873 Opc = AMDGPU::V_SMFMAC_I32_32X32X32_I8_e64;
3875 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
3876 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_BF8_e64;
3878 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
3879 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_FP8_e64;
3881 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
3882 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_BF8_e64;
3884 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
3885 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_FP8_e64;
3887 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
3888 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_BF8_e64;
3890 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
3891 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_FP8_e64;
3893 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
3894 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_BF8_e64;
3896 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
3897 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_FP8_e64;
3899 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:
3900 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_F16_e64;
3902 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:
3903 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_F16_e64;
3905 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:
3906 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF16_e64;
3908 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:
3909 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF16_e64;
3911 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:
3912 Opc = AMDGPU::V_SMFMAC_I32_16X16X128_I8_e64;
3914 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:
3915 Opc = AMDGPU::V_SMFMAC_I32_32X32X64_I8_e64;
3917 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:
3918 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_BF8_BF8_e64;
3920 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:
3921 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_BF8_FP8_e64;
3923 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:
3924 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_FP8_BF8_e64;
3926 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:
3927 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_FP8_FP8_e64;
3929 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:
3930 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_BF8_BF8_e64;
3932 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
3933 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_BF8_FP8_e64;
3935 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
3936 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_FP8_BF8_e64;
3938 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8:
3939 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_FP8_FP8_e64;
3945 auto VDst_In =
MI.getOperand(4);
3947 MI.setDesc(TII.get(
Opc));
3948 MI.removeOperand(4);
3949 MI.removeOperand(1);
3950 MI.addOperand(VDst_In);
3951 MI.addImplicitDefUseOperands(*
MI.getMF());
3952 const MCInstrDesc &MCID =
MI.getDesc();
3954 MI.getOperand(0).setIsEarlyClobber(
true);
3959bool AMDGPUInstructionSelector::selectPermlaneSwapIntrin(
3961 if (IntrID == Intrinsic::amdgcn_permlane16_swap &&
3962 !Subtarget->hasPermlane16Swap())
3964 if (IntrID == Intrinsic::amdgcn_permlane32_swap &&
3965 !Subtarget->hasPermlane32Swap())
3968 unsigned Opcode = IntrID == Intrinsic::amdgcn_permlane16_swap
3969 ? AMDGPU::V_PERMLANE16_SWAP_B32_e64
3970 : AMDGPU::V_PERMLANE32_SWAP_B32_e64;
3972 MI.removeOperand(2);
3973 MI.setDesc(TII.get(Opcode));
3976 MachineOperand &FI =
MI.getOperand(4);
3983bool AMDGPUInstructionSelector::selectWaveAddress(
MachineInstr &
MI)
const {
3986 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3987 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3988 MachineBasicBlock *
MBB =
MI.getParent();
3992 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_LSHRREV_B32_e64), DstReg)
3993 .
addImm(Subtarget->getWavefrontSizeLog2())
3998 .
addImm(Subtarget->getWavefrontSizeLog2())
4002 const TargetRegisterClass &RC =
4003 IsVALU ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
4004 if (!RBI.constrainGenericRegister(DstReg, RC, *MRI))
4007 MI.eraseFromParent();
4011bool AMDGPUInstructionSelector::selectWaveShuffleIntrin(
4014 MachineBasicBlock *
MBB =
MI.getParent();
4021 const LLT DstTy = MRI->getType(DstReg);
4023 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
4024 const TargetRegisterClass *DstRC =
4025 TRI.getRegClassForSizeOnBank(DstSize, *DstRB);
4030 if (!Subtarget->supportsBPermute())
4034 if (Subtarget->supportsWaveWideBPermute()) {
4035 Register ShiftIdxReg = MRI->createVirtualRegister(DstRC);
4036 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), ShiftIdxReg)
4046 assert(Subtarget->isWave64());
4050 MRI->createVirtualRegister(TRI.getRegClass(AMDGPU::SReg_32RegClassID));
4051 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefValReg);
4053 Register UndefExecReg = MRI->createVirtualRegister(
4054 TRI.getRegClass(AMDGPU::SReg_64_XEXECRegClassID));
4055 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefExecReg);
4057 Register PoisonValReg = MRI->createVirtualRegister(DstRC);
4058 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_SET_INACTIVE_B32), PoisonValReg)
4066 Register ShiftIdxReg = MRI->createVirtualRegister(DstRC);
4067 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), ShiftIdxReg)
4071 Register PoisonIdxReg = MRI->createVirtualRegister(DstRC);
4072 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_SET_INACTIVE_B32), PoisonIdxReg)
4080 Register SameSidePermReg = MRI->createVirtualRegister(DstRC);
4081 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::DS_BPERMUTE_B32), SameSidePermReg)
4086 Register SwappedValReg = MRI->createVirtualRegister(DstRC);
4087 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_PERMLANE64_B32), SwappedValReg)
4090 Register OppSidePermReg = MRI->createVirtualRegister(DstRC);
4091 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::DS_BPERMUTE_B32), OppSidePermReg)
4096 Register WWMSwapPermReg = MRI->createVirtualRegister(DstRC);
4097 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::STRICT_WWM), WWMSwapPermReg)
4104 Register ThreadIDReg = MRI->createVirtualRegister(DstRC);
4105 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_MBCNT_LO_U32_B32_e64), ThreadIDReg)
4109 Register XORReg = MRI->createVirtualRegister(DstRC);
4114 Register ANDReg = MRI->createVirtualRegister(DstRC);
4119 Register CompareReg = MRI->createVirtualRegister(
4120 TRI.getRegClass(AMDGPU::SReg_64_XEXECRegClassID));
4121 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_CMP_EQ_U32_e64), CompareReg)
4126 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
4134 MI.eraseFromParent();
4143 unsigned NumOpcodes = 0;
4156 const uint8_t SrcBits[3] = { 0xf0, 0xcc, 0xaa };
4167 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4181 if (Src.size() == 3) {
4188 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4189 if (Src[
I] ==
LHS) {
4199 Bits = SrcBits[Src.size()];
4205 switch (
MI->getOpcode()) {
4206 case TargetOpcode::G_AND:
4207 case TargetOpcode::G_OR:
4208 case TargetOpcode::G_XOR: {
4213 if (!getOperandBits(
LHS, LHSBits) ||
4214 !getOperandBits(
RHS, RHSBits)) {
4215 Src = std::move(Backup);
4216 return std::make_pair(0, 0);
4222 NumOpcodes +=
Op.first;
4223 LHSBits =
Op.second;
4228 NumOpcodes +=
Op.first;
4229 RHSBits =
Op.second;
4234 return std::make_pair(0, 0);
4238 switch (
MI->getOpcode()) {
4239 case TargetOpcode::G_AND:
4240 TTbl = LHSBits & RHSBits;
4242 case TargetOpcode::G_OR:
4243 TTbl = LHSBits | RHSBits;
4245 case TargetOpcode::G_XOR:
4246 TTbl = LHSBits ^ RHSBits;
4252 return std::make_pair(NumOpcodes + 1, TTbl);
4255bool AMDGPUInstructionSelector::selectBITOP3(
MachineInstr &
MI)
const {
4256 if (!Subtarget->hasBitOp3Insts())
4260 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
4261 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
4267 unsigned NumOpcodes;
4269 std::tie(NumOpcodes, TTbl) =
BitOp3_Op(DstReg, Src, *MRI);
4273 if (NumOpcodes < 2 || Src.empty())
4276 const bool IsB32 = MRI->getType(DstReg) ==
LLT::scalar(32);
4277 if (NumOpcodes == 2 && IsB32) {
4285 }
else if (NumOpcodes < 4) {
4292 unsigned Opc = IsB32 ? AMDGPU::V_BITOP3_B32_e64 : AMDGPU::V_BITOP3_B16_e64;
4293 if (!IsB32 && STI.hasTrue16BitInsts())
4294 Opc = STI.useRealTrue16Insts() ? AMDGPU::V_BITOP3_B16_gfx1250_t16_e64
4295 : AMDGPU::V_BITOP3_B16_gfx1250_fake16_e64;
4296 unsigned CBL = STI.getConstantBusLimit(
Opc);
4297 MachineBasicBlock *
MBB =
MI.getParent();
4300 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4301 const RegisterBank *RB = RBI.getRegBank(Src[
I], *MRI, TRI);
4302 if (RB->
getID() != AMDGPU::SGPRRegBankID)
4308 Register NewReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4319 while (Src.size() < 3)
4320 Src.push_back(Src[0]);
4337 MI.eraseFromParent();
4342bool AMDGPUInstructionSelector::selectStackRestore(
MachineInstr &
MI)
const {
4344 if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, *MRI))
4347 MachineInstr *
DefMI = MRI->getVRegDef(SrcReg);
4349 Subtarget->getTargetLowering()->getStackPointerRegisterToSaveRestore();
4351 MachineBasicBlock *
MBB =
MI.getParent();
4355 WaveAddr = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
4358 .
addImm(Subtarget->getWavefrontSizeLog2())
4365 MI.eraseFromParent();
4371 if (!
I.isPreISelOpcode()) {
4373 return selectCOPY(
I);
4377 switch (
I.getOpcode()) {
4378 case TargetOpcode::G_AND:
4379 case TargetOpcode::G_OR:
4380 case TargetOpcode::G_XOR:
4381 if (selectBITOP3(
I))
4385 return selectG_AND_OR_XOR(
I);
4386 case TargetOpcode::G_ADD:
4387 case TargetOpcode::G_SUB:
4388 case TargetOpcode::G_PTR_ADD:
4391 return selectG_ADD_SUB(
I);
4392 case TargetOpcode::G_UADDO:
4393 case TargetOpcode::G_USUBO:
4394 case TargetOpcode::G_UADDE:
4395 case TargetOpcode::G_USUBE:
4396 return selectG_UADDO_USUBO_UADDE_USUBE(
I);
4397 case AMDGPU::G_AMDGPU_MAD_U64_U32:
4398 case AMDGPU::G_AMDGPU_MAD_I64_I32:
4399 return selectG_AMDGPU_MAD_64_32(
I);
4400 case TargetOpcode::G_INTTOPTR:
4401 case TargetOpcode::G_BITCAST:
4402 case TargetOpcode::G_PTRTOINT:
4403 case TargetOpcode::G_FREEZE:
4404 return selectCOPY(
I);
4405 case TargetOpcode::G_FNEG:
4408 return selectG_FNEG(
I);
4409 case TargetOpcode::G_FABS:
4412 return selectG_FABS(
I);
4413 case TargetOpcode::G_EXTRACT:
4414 return selectG_EXTRACT(
I);
4415 case TargetOpcode::G_MERGE_VALUES:
4416 case TargetOpcode::G_CONCAT_VECTORS:
4417 return selectG_MERGE_VALUES(
I);
4418 case TargetOpcode::G_UNMERGE_VALUES:
4419 return selectG_UNMERGE_VALUES(
I);
4420 case TargetOpcode::G_BUILD_VECTOR:
4421 case TargetOpcode::G_BUILD_VECTOR_TRUNC:
4422 return selectG_BUILD_VECTOR(
I);
4423 case TargetOpcode::G_IMPLICIT_DEF:
4424 return selectG_IMPLICIT_DEF(
I);
4425 case TargetOpcode::G_INSERT:
4426 return selectG_INSERT(
I);
4427 case TargetOpcode::G_INTRINSIC:
4428 case TargetOpcode::G_INTRINSIC_CONVERGENT:
4429 return selectG_INTRINSIC(
I);
4430 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
4431 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
4432 return selectG_INTRINSIC_W_SIDE_EFFECTS(
I);
4433 case TargetOpcode::G_ICMP:
4434 case TargetOpcode::G_FCMP:
4435 if (selectG_ICMP_or_FCMP(
I))
4438 case TargetOpcode::G_LOAD:
4439 case TargetOpcode::G_ZEXTLOAD:
4440 case TargetOpcode::G_SEXTLOAD:
4441 case TargetOpcode::G_STORE:
4442 case TargetOpcode::G_ATOMIC_CMPXCHG:
4443 case TargetOpcode::G_ATOMICRMW_XCHG:
4444 case TargetOpcode::G_ATOMICRMW_ADD:
4445 case TargetOpcode::G_ATOMICRMW_SUB:
4446 case TargetOpcode::G_ATOMICRMW_AND:
4447 case TargetOpcode::G_ATOMICRMW_OR:
4448 case TargetOpcode::G_ATOMICRMW_XOR:
4449 case TargetOpcode::G_ATOMICRMW_MIN:
4450 case TargetOpcode::G_ATOMICRMW_MAX:
4451 case TargetOpcode::G_ATOMICRMW_UMIN:
4452 case TargetOpcode::G_ATOMICRMW_UMAX:
4453 case TargetOpcode::G_ATOMICRMW_UINC_WRAP:
4454 case TargetOpcode::G_ATOMICRMW_UDEC_WRAP:
4455 case TargetOpcode::G_ATOMICRMW_USUB_COND:
4456 case TargetOpcode::G_ATOMICRMW_USUB_SAT:
4457 case TargetOpcode::G_ATOMICRMW_FADD:
4458 case TargetOpcode::G_ATOMICRMW_FMIN:
4459 case TargetOpcode::G_ATOMICRMW_FMAX:
4460 return selectG_LOAD_STORE_ATOMICRMW(
I);
4461 case TargetOpcode::G_SELECT:
4462 return selectG_SELECT(
I);
4463 case TargetOpcode::G_TRUNC:
4464 return selectG_TRUNC(
I);
4465 case TargetOpcode::G_SEXT:
4466 case TargetOpcode::G_ZEXT:
4467 case TargetOpcode::G_ANYEXT:
4468 case TargetOpcode::G_SEXT_INREG:
4472 if (MRI->getType(
I.getOperand(1).getReg()) !=
LLT::scalar(1) &&
4475 return selectG_SZA_EXT(
I);
4476 case TargetOpcode::G_FPEXT:
4477 if (selectG_FPEXT(
I))
4480 case TargetOpcode::G_BRCOND:
4481 return selectG_BRCOND(
I);
4482 case TargetOpcode::G_GLOBAL_VALUE:
4483 return selectG_GLOBAL_VALUE(
I);
4484 case TargetOpcode::G_PTRMASK:
4485 return selectG_PTRMASK(
I);
4486 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4487 return selectG_EXTRACT_VECTOR_ELT(
I);
4488 case TargetOpcode::G_INSERT_VECTOR_ELT:
4489 return selectG_INSERT_VECTOR_ELT(
I);
4490 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
4491 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
4492 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET:
4493 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
4494 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
4497 assert(Intr &&
"not an image intrinsic with image pseudo");
4498 return selectImageIntrinsic(
I, Intr);
4500 case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY:
4501 case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
4502 case AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY:
4503 return selectBVHIntersectRayIntrinsic(
I);
4504 case AMDGPU::G_SBFX:
4505 case AMDGPU::G_UBFX:
4506 return selectG_SBFX_UBFX(
I);
4507 case AMDGPU::G_SI_CALL:
4508 I.setDesc(TII.get(AMDGPU::SI_CALL));
4510 case AMDGPU::G_AMDGPU_WAVE_ADDRESS:
4511 return selectWaveAddress(
I);
4512 case AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_RETURN: {
4513 I.setDesc(TII.get(AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN));
4516 case AMDGPU::G_STACKRESTORE:
4517 return selectStackRestore(
I);
4519 return selectPHI(
I);
4520 case AMDGPU::G_AMDGPU_COPY_SCC_VCC:
4521 return selectCOPY_SCC_VCC(
I);
4522 case AMDGPU::G_AMDGPU_COPY_VCC_SCC:
4523 return selectCOPY_VCC_SCC(
I);
4524 case AMDGPU::G_AMDGPU_READANYLANE:
4525 return selectReadAnyLane(
I);
4526 case TargetOpcode::G_CONSTANT:
4527 case TargetOpcode::G_FCONSTANT:
4535AMDGPUInstructionSelector::selectVCSRC(
MachineOperand &Root)
const {
4542std::pair<Register, unsigned> AMDGPUInstructionSelector::selectVOP3ModsImpl(
4543 Register Src,
bool IsCanonicalizing,
bool AllowAbs,
bool OpSel)
const {
4547 if (
MI->getOpcode() == AMDGPU::G_FNEG) {
4548 Src =
MI->getOperand(1).getReg();
4551 }
else if (
MI->getOpcode() == AMDGPU::G_FSUB && IsCanonicalizing) {
4556 if (
LHS &&
LHS->isZero()) {
4558 Src =
MI->getOperand(2).getReg();
4562 if (AllowAbs &&
MI->getOpcode() == AMDGPU::G_FABS) {
4563 Src =
MI->getOperand(1).getReg();
4570 return std::pair(Src, Mods);
4573std::pair<Register, unsigned>
4574AMDGPUInstructionSelector::selectVOP3PModsF32Impl(
Register Src)
const {
4576 std::tie(Src, Mods) = selectVOP3ModsImpl(Src);
4578 return std::pair(Src, Mods);
4581Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
4583 bool ForceVGPR)
const {
4584 if ((Mods != 0 || ForceVGPR) &&
4585 RBI.getRegBank(Src, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID) {
4592 TII.
get(AMDGPU::COPY), VGPRSrc)
4604AMDGPUInstructionSelector::selectVSRC0(
MachineOperand &Root)
const {
4606 [=](MachineInstrBuilder &MIB) { MIB.
add(Root); }
4611AMDGPUInstructionSelector::selectVOP3Mods0(
MachineOperand &Root)
const {
4614 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
4617 [=](MachineInstrBuilder &MIB) {
4618 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4620 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
4621 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); },
4622 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
4627AMDGPUInstructionSelector::selectVOP3BMods0(
MachineOperand &Root)
const {
4630 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
4635 [=](MachineInstrBuilder &MIB) {
4636 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4638 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
4639 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); },
4640 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
4645AMDGPUInstructionSelector::selectVOP3OMods(
MachineOperand &Root)
const {
4647 [=](MachineInstrBuilder &MIB) { MIB.
add(Root); },
4648 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); },
4649 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
4654AMDGPUInstructionSelector::selectVOP3Mods(
MachineOperand &Root)
const {
4657 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
4660 [=](MachineInstrBuilder &MIB) {
4661 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4663 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4668AMDGPUInstructionSelector::selectVOP3ModsNonCanonicalizing(
4672 std::tie(Src, Mods) =
4673 selectVOP3ModsImpl(Root.
getReg(),
false);
4676 [=](MachineInstrBuilder &MIB) {
4677 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4679 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4684AMDGPUInstructionSelector::selectVOP3BMods(
MachineOperand &Root)
const {
4687 std::tie(Src, Mods) =
4688 selectVOP3ModsImpl(Root.
getReg(),
true,
4692 [=](MachineInstrBuilder &MIB) {
4693 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4695 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4700AMDGPUInstructionSelector::selectVOP3NoMods(
MachineOperand &Root)
const {
4703 if (
Def->getOpcode() == AMDGPU::G_FNEG ||
Def->getOpcode() == AMDGPU::G_FABS)
4706 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
4731 if (
MI->getOpcode() != AMDGPU::G_TRUNC)
4736 return DstSize * 2 == SrcSize;
4742 if (
MI->getOpcode() != AMDGPU::G_LSHR)
4746 std::optional<ValueAndVReg> ShiftAmt;
4747 if (
mi_match(
MI->getOperand(0).getReg(), MRI,
4750 unsigned Shift = ShiftAmt->Value.getZExtValue();
4751 return Shift * 2 == SrcSize;
4759 if (
MI->getOpcode() != AMDGPU::G_SHL)
4763 std::optional<ValueAndVReg> ShiftAmt;
4764 if (
mi_match(
MI->getOperand(0).getReg(), MRI,
4767 unsigned Shift = ShiftAmt->Value.getZExtValue();
4768 return Shift * 2 == SrcSize;
4776 if (
MI->getOpcode() != AMDGPU::G_UNMERGE_VALUES)
4778 return MI->getNumOperands() == 3 &&
MI->getOperand(0).isDef() &&
4779 MI->getOperand(1).isDef() && !
MI->getOperand(2).isDef();
4949static std::optional<std::pair<Register, SrcStatus>>
4954 unsigned Opc =
MI->getOpcode();
4958 case AMDGPU::G_BITCAST:
4959 return std::optional<std::pair<Register, SrcStatus>>(
4960 {
MI->getOperand(1).getReg(), Curr.second});
4962 if (
MI->getOperand(1).getReg().isPhysical())
4963 return std::nullopt;
4964 return std::optional<std::pair<Register, SrcStatus>>(
4965 {
MI->getOperand(1).getReg(), Curr.second});
4966 case AMDGPU::G_FNEG: {
4969 return std::nullopt;
4970 return std::optional<std::pair<Register, SrcStatus>>(
4971 {
MI->getOperand(1).getReg(), Stat});
4978 switch (Curr.second) {
4981 return std::optional<std::pair<Register, SrcStatus>>(
4984 if (Curr.first ==
MI->getOperand(0).getReg())
4985 return std::optional<std::pair<Register, SrcStatus>>(
4987 return std::optional<std::pair<Register, SrcStatus>>(
4999 return std::optional<std::pair<Register, SrcStatus>>(
5003 if (Curr.first ==
MI->getOperand(0).getReg())
5004 return std::optional<std::pair<Register, SrcStatus>>(
5006 return std::optional<std::pair<Register, SrcStatus>>(
5012 return std::optional<std::pair<Register, SrcStatus>>(
5017 return std::optional<std::pair<Register, SrcStatus>>(
5022 return std::optional<std::pair<Register, SrcStatus>>(
5027 return std::optional<std::pair<Register, SrcStatus>>(
5033 return std::nullopt;
5043 bool HasNeg =
false;
5045 bool HasOpsel =
true;
5050 unsigned Opc =
MI->getOpcode();
5052 if (
Opc == TargetOpcode::G_INTRINSIC) {
5055 if (IntrinsicID == Intrinsic::amdgcn_fdot2)
5082 while (
Depth <= MaxDepth && Curr.has_value()) {
5085 Statlist.push_back(Curr.value());
5092static std::pair<Register, SrcStatus>
5099 while (
Depth <= MaxDepth && Curr.has_value()) {
5105 LastSameOrNeg = Curr.value();
5110 return LastSameOrNeg;
5117 return Width1 == Width2;
5152 return isSameBitWidth(NewReg, RootReg, MRI) && IsHalfState(LoStat) &&
5153 IsHalfState(HiStat);
5156std::pair<Register, unsigned> AMDGPUInstructionSelector::selectVOP3PModsImpl(
5162 return {RootReg, Mods};
5165 SearchOptions SO(RootReg, MRI);
5176 MachineInstr *
MI = MRI.getVRegDef(Stat.first);
5178 if (
MI->getOpcode() != AMDGPU::G_BUILD_VECTOR ||
MI->getNumOperands() != 3 ||
5179 (IsDOT && Subtarget->hasDOTOpSelHazard())) {
5181 return {Stat.first, Mods};
5187 if (StatlistHi.
empty()) {
5189 return {Stat.first, Mods};
5195 if (StatlistLo.
empty()) {
5197 return {Stat.first, Mods};
5200 for (
int I = StatlistHi.
size() - 1;
I >= 0;
I--) {
5201 for (
int J = StatlistLo.
size() - 1; J >= 0; J--) {
5202 if (StatlistHi[
I].first == StatlistLo[J].first &&
5204 StatlistHi[
I].first, RootReg, TII, MRI))
5205 return {StatlistHi[
I].first,
5206 updateMods(StatlistHi[
I].second, StatlistLo[J].second, Mods)};
5212 return {Stat.first, Mods};
5222 return RB->
getID() == RBNo;
5239 if (
checkRB(RootReg, AMDGPU::SGPRRegBankID, RBI, MRI,
TRI) ||
5240 checkRB(NewReg, AMDGPU::VGPRRegBankID, RBI, MRI,
TRI))
5244 if (
MI->getOpcode() == AMDGPU::COPY && NewReg ==
MI->getOperand(1).getReg()) {
5253 BuildMI(*BB,
MI,
MI->getDebugLoc(),
TII.get(AMDGPU::COPY), DstReg)
5261AMDGPUInstructionSelector::selectVOP3PRetHelper(
MachineOperand &Root,
5266 std::tie(
Reg, Mods) = selectVOP3PModsImpl(Root.
getReg(), MRI, IsDOT);
5270 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
5271 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
5276AMDGPUInstructionSelector::selectVOP3PMods(
MachineOperand &Root)
const {
5278 return selectVOP3PRetHelper(Root);
5282AMDGPUInstructionSelector::selectVOP3PModsDOT(
MachineOperand &Root)
const {
5284 return selectVOP3PRetHelper(Root,
true);
5288AMDGPUInstructionSelector::selectVOP3PNoModsDOT(
MachineOperand &Root)
const {
5292 std::tie(Src, Mods) = selectVOP3PModsImpl(Root.
getReg(), MRI,
true );
5296 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); }}};
5300AMDGPUInstructionSelector::selectVOP3PModsF32(
MachineOperand &Root)
const {
5303 std::tie(Src, Mods) = selectVOP3PModsF32Impl(Root.
getReg());
5306 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5307 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
5312AMDGPUInstructionSelector::selectVOP3PNoModsF32(
MachineOperand &Root)
const {
5315 std::tie(Src, Mods) = selectVOP3PModsF32Impl(Root.
getReg());
5319 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); }}};
5323AMDGPUInstructionSelector::selectWMMAOpSelVOP3PMods(
5326 "expected i1 value");
5332 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
5340 switch (Elts.
size()) {
5342 DstRegClass = &AMDGPU::VReg_256RegClass;
5345 DstRegClass = &AMDGPU::VReg_128RegClass;
5348 DstRegClass = &AMDGPU::VReg_64RegClass;
5355 auto MIB =
B.buildInstr(AMDGPU::REG_SEQUENCE)
5357 for (
unsigned i = 0; i < Elts.
size(); ++i) {
5368 if (ModOpcode == TargetOpcode::G_FNEG) {
5372 for (
auto El : Elts) {
5378 if (Elts.size() != NegAbsElts.
size()) {
5387 assert(ModOpcode == TargetOpcode::G_FABS);
5395AMDGPUInstructionSelector::selectWMMAModsF32NegAbs(
MachineOperand &Root)
const {
5401 assert(BV->getNumSources() > 0);
5403 MachineInstr *ElF32 = MRI->getVRegDef(BV->getSourceReg(0));
5404 unsigned ModOpcode = (ElF32->
getOpcode() == AMDGPU::G_FNEG)
5407 for (
unsigned i = 0; i < BV->getNumSources(); ++i) {
5408 ElF32 = MRI->getVRegDef(BV->getSourceReg(i));
5415 if (BV->getNumSources() == EltsF32.
size()) {
5421 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5422 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }}};
5426AMDGPUInstructionSelector::selectWMMAModsF16Neg(
MachineOperand &Root)
const {
5432 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
5440 if (CV->getNumSources() == EltsV2F16.
size()) {
5447 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5448 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }}};
5452AMDGPUInstructionSelector::selectWMMAModsF16NegAbs(
MachineOperand &Root)
const {
5458 assert(CV->getNumSources() > 0);
5459 MachineInstr *ElV2F16 = MRI->getVRegDef(CV->getSourceReg(0));
5461 unsigned ModOpcode = (ElV2F16->
getOpcode() == AMDGPU::G_FNEG)
5465 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
5466 ElV2F16 = MRI->getVRegDef(CV->getSourceReg(i));
5473 if (CV->getNumSources() == EltsV2F16.
size()) {
5480 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5481 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }}};
5485AMDGPUInstructionSelector::selectWMMAVISrc(
MachineOperand &Root)
const {
5486 std::optional<FPValueAndVReg> FPValReg;
5488 if (TII.isInlineConstant(FPValReg->Value)) {
5489 return {{[=](MachineInstrBuilder &MIB) {
5490 MIB.
addImm(FPValReg->Value.bitcastToAPInt().getSExtValue());
5500 if (TII.isInlineConstant(ICst)) {
5510AMDGPUInstructionSelector::selectSWMMACIndex8(
MachineOperand &Root)
const {
5516 std::optional<ValueAndVReg> ShiftAmt;
5518 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
5519 ShiftAmt->Value.getZExtValue() % 8 == 0) {
5520 Key = ShiftAmt->Value.getZExtValue() / 8;
5525 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5526 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Key); }
5531AMDGPUInstructionSelector::selectSWMMACIndex16(
MachineOperand &Root)
const {
5538 std::optional<ValueAndVReg> ShiftAmt;
5540 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
5541 ShiftAmt->Value.getZExtValue() == 16) {
5547 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5548 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Key); }
5553AMDGPUInstructionSelector::selectSWMMACIndex32(
MachineOperand &Root)
const {
5560 S32 = matchAnyExtendFromS32(Src);
5564 if (
Def->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {
5569 Src =
Def->getOperand(2).getReg();
5576 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5577 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Key); }
5582AMDGPUInstructionSelector::selectVOP3OpSelMods(
MachineOperand &Root)
const {
5585 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
5589 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5590 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
5596AMDGPUInstructionSelector::selectVINTERPMods(
MachineOperand &Root)
const {
5599 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
5605 [=](MachineInstrBuilder &MIB) {
5607 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
5609 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
5614AMDGPUInstructionSelector::selectVINTERPModsHi(
MachineOperand &Root)
const {
5617 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
5623 [=](MachineInstrBuilder &MIB) {
5625 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
5627 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
5634bool AMDGPUInstructionSelector::selectScaleOffset(
MachineOperand &Root,
5636 bool IsSigned)
const {
5637 if (!Subtarget->hasScaleOffset())
5641 MachineMemOperand *MMO = *
MI.memoperands_begin();
5653 OffsetReg =
Def->Reg;
5668 m_BinOp(IsSigned ? AMDGPU::S_MUL_I64_I32_PSEUDO : AMDGPU::S_MUL_U64,
5672 (
Mul->getOpcode() == (IsSigned ? AMDGPU::G_AMDGPU_MAD_I64_I32
5673 : AMDGPU::G_AMDGPU_MAD_U64_U32) ||
5674 (IsSigned &&
Mul->getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32 &&
5675 VT->signBitIsZero(
Mul->getOperand(2).getReg()))) &&
5688bool AMDGPUInstructionSelector::selectSmrdOffset(
MachineOperand &Root,
5692 bool *ScaleOffset)
const {
5694 MachineBasicBlock *
MBB =
MI->getParent();
5699 getAddrModeInfo(*
MI, *MRI, AddrInfo);
5701 if (AddrInfo.
empty())
5704 const GEPInfo &GEPI = AddrInfo[0];
5705 std::optional<int64_t> EncodedImm;
5708 *ScaleOffset =
false;
5713 if (GEPI.SgprParts.size() == 1 && GEPI.Imm != 0 && EncodedImm &&
5714 AddrInfo.
size() > 1) {
5715 const GEPInfo &GEPI2 = AddrInfo[1];
5716 if (GEPI2.SgprParts.size() == 2 && GEPI2.Imm == 0) {
5717 Register OffsetReg = GEPI2.SgprParts[1];
5720 selectScaleOffset(Root, OffsetReg,
false );
5721 OffsetReg = matchZeroExtendFromS32OrS32(OffsetReg);
5723 Base = GEPI2.SgprParts[0];
5724 *SOffset = OffsetReg;
5733 auto SKnown =
VT->getKnownBits(*SOffset);
5734 if (*
Offset + SKnown.getMinValue().getSExtValue() < 0)
5746 if (
Offset && GEPI.SgprParts.size() == 1 && EncodedImm) {
5747 Base = GEPI.SgprParts[0];
5753 if (SOffset && GEPI.SgprParts.size() == 1 &&
isUInt<32>(GEPI.Imm) &&
5759 Base = GEPI.SgprParts[0];
5760 *SOffset = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5761 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), *SOffset)
5766 if (SOffset && GEPI.SgprParts.size() && GEPI.Imm == 0) {
5767 Register OffsetReg = GEPI.SgprParts[1];
5769 *ScaleOffset = selectScaleOffset(Root, OffsetReg,
false );
5770 OffsetReg = matchZeroExtendFromS32OrS32(OffsetReg);
5772 Base = GEPI.SgprParts[0];
5773 *SOffset = OffsetReg;
5782AMDGPUInstructionSelector::selectSmrdImm(
MachineOperand &Root)
const {
5785 if (!selectSmrdOffset(Root,
Base,
nullptr, &
Offset,
5787 return std::nullopt;
5789 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(
Base); },
5790 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Offset); }}};
5794AMDGPUInstructionSelector::selectSmrdImm32(
MachineOperand &Root)
const {
5796 getAddrModeInfo(*Root.
getParent(), *MRI, AddrInfo);
5798 if (AddrInfo.
empty() || AddrInfo[0].SgprParts.size() != 1)
5799 return std::nullopt;
5801 const GEPInfo &GEPInfo = AddrInfo[0];
5802 Register PtrReg = GEPInfo.SgprParts[0];
5803 std::optional<int64_t> EncodedImm =
5806 return std::nullopt;
5809 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrReg); },
5810 [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedImm); }
5815AMDGPUInstructionSelector::selectSmrdSgpr(
MachineOperand &Root)
const {
5818 if (!selectSmrdOffset(Root,
Base, &SOffset,
nullptr,
5820 return std::nullopt;
5823 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(
Base); },
5824 [=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); },
5825 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }}};
5829AMDGPUInstructionSelector::selectSmrdSgprImm(
MachineOperand &Root)
const {
5833 if (!selectSmrdOffset(Root,
Base, &SOffset, &
Offset, &ScaleOffset))
5834 return std::nullopt;
5837 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(
Base); },
5838 [=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); },
5840 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }}};
5843std::pair<Register, int>
5844AMDGPUInstructionSelector::selectFlatOffsetImpl(
MachineOperand &Root,
5845 uint64_t FlatVariant)
const {
5850 if (!STI.hasFlatInstOffsets())
5854 int64_t ConstOffset;
5856 std::tie(PtrBase, ConstOffset, IsInBounds) =
5857 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
5863 if (ConstOffset == 0 ||
5865 !isFlatScratchBaseLegal(Root.
getReg())) ||
5869 unsigned AddrSpace = (*
MI->memoperands_begin())->getAddrSpace();
5870 if (!TII.isLegalFLATOffset(ConstOffset, AddrSpace, FlatVariant))
5873 return std::pair(PtrBase, ConstOffset);
5877AMDGPUInstructionSelector::selectFlatOffset(
MachineOperand &Root)
const {
5881 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrWithOffset.first); },
5882 [=](MachineInstrBuilder &MIB) { MIB.
addImm(PtrWithOffset.second); },
5887AMDGPUInstructionSelector::selectGlobalOffset(
MachineOperand &Root)
const {
5891 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrWithOffset.first); },
5892 [=](MachineInstrBuilder &MIB) { MIB.
addImm(PtrWithOffset.second); },
5897AMDGPUInstructionSelector::selectScratchOffset(
MachineOperand &Root)
const {
5901 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrWithOffset.first); },
5902 [=](MachineInstrBuilder &MIB) { MIB.
addImm(PtrWithOffset.second); },
5908AMDGPUInstructionSelector::selectGlobalSAddr(
MachineOperand &Root,
5910 bool NeedIOffset)
const {
5913 int64_t ConstOffset;
5914 int64_t ImmOffset = 0;
5918 std::tie(PtrBase, ConstOffset, std::ignore) =
5919 getPtrBaseWithConstantOffset(Addr, *MRI);
5921 if (ConstOffset != 0) {
5926 ImmOffset = ConstOffset;
5929 if (isSGPR(PtrBaseDef->Reg)) {
5930 if (ConstOffset > 0) {
5936 int64_t SplitImmOffset = 0, RemainderOffset = ConstOffset;
5938 std::tie(SplitImmOffset, RemainderOffset) =
5943 if (Subtarget->hasSignedGVSOffset() ?
isInt<32>(RemainderOffset)
5946 MachineBasicBlock *
MBB =
MI->getParent();
5948 MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
5950 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
5952 .
addImm(RemainderOffset);
5956 [=](MachineInstrBuilder &MIB) {
5959 [=](MachineInstrBuilder &MIB) {
5962 [=](MachineInstrBuilder &MIB) { MIB.
addImm(SplitImmOffset); },
5963 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); },
5966 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrBase); },
5967 [=](MachineInstrBuilder &MIB) {
5970 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); },
5980 unsigned NumLiterals =
5981 !TII.isInlineConstant(APInt(32,
Lo_32(ConstOffset))) +
5982 !TII.isInlineConstant(APInt(32,
Hi_32(ConstOffset)));
5983 if (STI.getConstantBusLimit(AMDGPU::V_ADD_U32_e64) > NumLiterals)
5984 return std::nullopt;
5991 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
5996 if (isSGPR(SAddr)) {
5997 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
6001 bool ScaleOffset = selectScaleOffset(Root, PtrBaseOffset,
6002 Subtarget->hasSignedGVSOffset());
6003 if (
Register VOffset = matchExtendFromS32OrS32(
6004 PtrBaseOffset, Subtarget->hasSignedGVSOffset())) {
6006 return {{[=](MachineInstrBuilder &MIB) {
6009 [=](MachineInstrBuilder &MIB) {
6012 [=](MachineInstrBuilder &MIB) {
6015 [=](MachineInstrBuilder &MIB) {
6019 return {{[=](MachineInstrBuilder &MIB) {
6022 [=](MachineInstrBuilder &MIB) {
6025 [=](MachineInstrBuilder &MIB) {
6035 if (AddrDef->MI->getOpcode() == AMDGPU::G_IMPLICIT_DEF ||
6036 AddrDef->MI->getOpcode() == AMDGPU::G_CONSTANT || !isSGPR(AddrDef->Reg))
6037 return std::nullopt;
6042 MachineBasicBlock *
MBB =
MI->getParent();
6043 Register VOffset = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
6045 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32), VOffset)
6050 [=](MachineInstrBuilder &MIB) { MIB.
addReg(AddrDef->Reg); },
6051 [=](MachineInstrBuilder &MIB) { MIB.
addReg(VOffset); },
6052 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); },
6053 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); }
6056 [=](MachineInstrBuilder &MIB) { MIB.
addReg(AddrDef->Reg); },
6057 [=](MachineInstrBuilder &MIB) { MIB.
addReg(VOffset); },
6058 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); }
6063AMDGPUInstructionSelector::selectGlobalSAddr(
MachineOperand &Root)
const {
6064 return selectGlobalSAddr(Root, 0);
6068AMDGPUInstructionSelector::selectGlobalSAddrCPol(
MachineOperand &Root)
const {
6074 return selectGlobalSAddr(Root, PassedCPol);
6078AMDGPUInstructionSelector::selectGlobalSAddrCPolM0(
MachineOperand &Root)
const {
6084 return selectGlobalSAddr(Root, PassedCPol);
6088AMDGPUInstructionSelector::selectGlobalSAddrGLC(
MachineOperand &Root)
const {
6093AMDGPUInstructionSelector::selectGlobalSAddrNoIOffset(
6100 return selectGlobalSAddr(Root, PassedCPol,
false);
6104AMDGPUInstructionSelector::selectGlobalSAddrNoIOffsetM0(
6111 return selectGlobalSAddr(Root, PassedCPol,
false);
6115AMDGPUInstructionSelector::selectScratchSAddr(
MachineOperand &Root)
const {
6118 int64_t ConstOffset;
6119 int64_t ImmOffset = 0;
6123 std::tie(PtrBase, ConstOffset, std::ignore) =
6124 getPtrBaseWithConstantOffset(Addr, *MRI);
6126 if (ConstOffset != 0 && isFlatScratchBaseLegal(Addr) &&
6130 ImmOffset = ConstOffset;
6134 if (AddrDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
6135 int FI = AddrDef->MI->getOperand(1).
getIndex();
6138 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); }
6144 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
6145 Register LHS = AddrDef->MI->getOperand(1).getReg();
6146 Register RHS = AddrDef->MI->getOperand(2).getReg();
6150 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX &&
6151 isSGPR(RHSDef->Reg)) {
6152 int FI = LHSDef->MI->getOperand(1).getIndex();
6156 SAddr = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6158 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_I32), SAddr)
6166 return std::nullopt;
6169 [=](MachineInstrBuilder &MIB) { MIB.
addReg(SAddr); },
6170 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); }
6175bool AMDGPUInstructionSelector::checkFlatScratchSVSSwizzleBug(
6177 if (!Subtarget->hasFlatScratchSVSSwizzleBug())
6183 auto VKnown =
VT->getKnownBits(VAddr);
6186 uint64_t VMax = VKnown.getMaxValue().getZExtValue();
6187 uint64_t
SMax = SKnown.getMaxValue().getZExtValue();
6188 return (VMax & 3) + (
SMax & 3) >= 4;
6192AMDGPUInstructionSelector::selectScratchSVAddr(
MachineOperand &Root)
const {
6195 int64_t ConstOffset;
6196 int64_t ImmOffset = 0;
6200 std::tie(PtrBase, ConstOffset, std::ignore) =
6201 getPtrBaseWithConstantOffset(Addr, *MRI);
6204 if (ConstOffset != 0 &&
6208 ImmOffset = ConstOffset;
6212 if (AddrDef->MI->getOpcode() != AMDGPU::G_PTR_ADD)
6213 return std::nullopt;
6215 Register RHS = AddrDef->MI->getOperand(2).getReg();
6216 if (RBI.getRegBank(
RHS, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID)
6217 return std::nullopt;
6219 Register LHS = AddrDef->MI->getOperand(1).getReg();
6222 if (OrigAddr != Addr) {
6223 if (!isFlatScratchBaseLegalSVImm(OrigAddr))
6224 return std::nullopt;
6226 if (!isFlatScratchBaseLegalSV(OrigAddr))
6227 return std::nullopt;
6230 if (checkFlatScratchSVSSwizzleBug(
RHS,
LHS, ImmOffset))
6231 return std::nullopt;
6233 unsigned CPol = selectScaleOffset(Root,
RHS,
true )
6237 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
6238 int FI = LHSDef->MI->getOperand(1).getIndex();
6240 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
RHS); },
6242 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); },
6243 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }
6252 return std::nullopt;
6255 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
RHS); },
6256 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
LHS); },
6257 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); },
6258 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }
6263AMDGPUInstructionSelector::selectMUBUFScratchOffen(
MachineOperand &Root)
const {
6265 MachineBasicBlock *
MBB =
MI->getParent();
6267 const SIMachineFunctionInfo *
Info =
MF->getInfo<SIMachineFunctionInfo>();
6272 Register HighBits = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
6277 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
6281 return {{[=](MachineInstrBuilder &MIB) {
6284 [=](MachineInstrBuilder &MIB) {
6287 [=](MachineInstrBuilder &MIB) {
6292 [=](MachineInstrBuilder &MIB) {
6301 std::optional<int> FI;
6304 const MachineInstr *RootDef = MRI->getVRegDef(Root.
getReg());
6306 int64_t ConstOffset;
6307 std::tie(PtrBase, ConstOffset, std::ignore) =
6308 getPtrBaseWithConstantOffset(VAddr, *MRI);
6309 if (ConstOffset != 0) {
6310 if (TII.isLegalMUBUFImmOffset(ConstOffset) &&
6311 (!STI.privateMemoryResourceIsRangeChecked() ||
6312 VT->signBitIsZero(PtrBase))) {
6313 const MachineInstr *PtrBaseDef = MRI->getVRegDef(PtrBase);
6314 if (PtrBaseDef->
getOpcode() == AMDGPU::G_FRAME_INDEX)
6320 }
else if (RootDef->
getOpcode() == AMDGPU::G_FRAME_INDEX) {
6324 return {{[=](MachineInstrBuilder &MIB) {
6327 [=](MachineInstrBuilder &MIB) {
6333 [=](MachineInstrBuilder &MIB) {
6338 [=](MachineInstrBuilder &MIB) {
6343bool AMDGPUInstructionSelector::isDSOffsetLegal(
Register Base,
6348 if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled())
6353 return VT->signBitIsZero(
Base);
6356bool AMDGPUInstructionSelector::isDSOffset2Legal(
Register Base, int64_t Offset0,
6358 unsigned Size)
const {
6359 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
6364 if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled())
6369 return VT->signBitIsZero(
Base);
6374 return Addr->
getOpcode() == TargetOpcode::G_OR ||
6375 (Addr->
getOpcode() == TargetOpcode::G_PTR_ADD &&
6382bool AMDGPUInstructionSelector::isFlatScratchBaseLegal(
Register Addr)
const {
6390 if (STI.hasSignedScratchOffsets())
6396 if (AddrMI->
getOpcode() == TargetOpcode::G_PTR_ADD) {
6397 std::optional<ValueAndVReg> RhsValReg =
6403 if (RhsValReg && RhsValReg->Value.getSExtValue() < 0 &&
6404 RhsValReg->Value.getSExtValue() > -0x40000000)
6408 return VT->signBitIsZero(
LHS);
6413bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSV(
Register Addr)
const {
6421 if (STI.hasSignedScratchOffsets())
6426 return VT->signBitIsZero(
RHS) &&
VT->signBitIsZero(
LHS);
6431bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSVImm(
6435 if (STI.hasSignedScratchOffsets())
6440 std::optional<DefinitionAndSourceRegister> BaseDef =
6442 std::optional<ValueAndVReg> RHSOffset =
6452 (RHSOffset->Value.getSExtValue() < 0 &&
6453 RHSOffset->Value.getSExtValue() > -0x40000000)))
6456 Register LHS = BaseDef->MI->getOperand(1).getReg();
6457 Register RHS = BaseDef->MI->getOperand(2).getReg();
6458 return VT->signBitIsZero(
RHS) &&
VT->signBitIsZero(
LHS);
6461bool AMDGPUInstructionSelector::isUnneededShiftMask(
const MachineInstr &
MI,
6462 unsigned ShAmtBits)
const {
6463 assert(
MI.getOpcode() == TargetOpcode::G_AND);
6465 std::optional<APInt>
RHS =
6470 if (
RHS->countr_one() >= ShAmtBits)
6473 const APInt &LHSKnownZeros =
VT->getKnownZeroes(
MI.getOperand(1).getReg());
6474 return (LHSKnownZeros | *
RHS).countr_one() >= ShAmtBits;
6478AMDGPUInstructionSelector::selectMUBUFScratchOffset(
6481 const SIMachineFunctionInfo *
Info =
MF->getInfo<SIMachineFunctionInfo>();
6483 std::optional<DefinitionAndSourceRegister>
Def =
6485 assert(Def &&
"this shouldn't be an optional result");
6490 [=](MachineInstrBuilder &MIB) {
6493 [=](MachineInstrBuilder &MIB) {
6496 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
6507 if (!TII.isLegalMUBUFImmOffset(
Offset))
6515 [=](MachineInstrBuilder &MIB) {
6518 [=](MachineInstrBuilder &MIB) {
6526 !TII.isLegalMUBUFImmOffset(
Offset))
6530 [=](MachineInstrBuilder &MIB) {
6533 [=](MachineInstrBuilder &MIB) {
6540std::pair<Register, unsigned>
6541AMDGPUInstructionSelector::selectDS1Addr1OffsetImpl(
MachineOperand &Root)
const {
6542 const MachineInstr *RootDef = MRI->getVRegDef(Root.
getReg());
6543 int64_t ConstAddr = 0;
6547 std::tie(PtrBase,
Offset, std::ignore) =
6548 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
6551 if (isDSOffsetLegal(PtrBase,
Offset)) {
6553 return std::pair(PtrBase,
Offset);
6555 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
6564 return std::pair(Root.
getReg(), 0);
6568AMDGPUInstructionSelector::selectDS1Addr1Offset(
MachineOperand &Root)
const {
6571 std::tie(
Reg,
Offset) = selectDS1Addr1OffsetImpl(Root);
6573 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
6579AMDGPUInstructionSelector::selectDS64Bit4ByteAligned(
MachineOperand &Root)
const {
6580 return selectDSReadWrite2(Root, 4);
6584AMDGPUInstructionSelector::selectDS128Bit8ByteAligned(
MachineOperand &Root)
const {
6585 return selectDSReadWrite2(Root, 8);
6589AMDGPUInstructionSelector::selectDSReadWrite2(
MachineOperand &Root,
6590 unsigned Size)
const {
6595 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
6597 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Offset+1); }
6601std::pair<Register, unsigned>
6602AMDGPUInstructionSelector::selectDSReadWrite2Impl(
MachineOperand &Root,
6603 unsigned Size)
const {
6604 const MachineInstr *RootDef = MRI->getVRegDef(Root.
getReg());
6605 int64_t ConstAddr = 0;
6609 std::tie(PtrBase,
Offset, std::ignore) =
6610 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
6613 int64_t OffsetValue0 =
Offset;
6615 if (isDSOffset2Legal(PtrBase, OffsetValue0, OffsetValue1,
Size)) {
6617 return std::pair(PtrBase, OffsetValue0 /
Size);
6619 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
6627 return std::pair(Root.
getReg(), 0);
6635std::tuple<Register, int64_t, bool>
6636AMDGPUInstructionSelector::getPtrBaseWithConstantOffset(
6639 if (RootI->
getOpcode() != TargetOpcode::G_PTR_ADD)
6640 return {Root, 0,
false};
6643 std::optional<ValueAndVReg> MaybeOffset =
6646 return {Root, 0,
false};
6666 B.buildInstr(AMDGPU::S_MOV_B32)
6669 B.buildInstr(AMDGPU::S_MOV_B32)
6676 B.buildInstr(AMDGPU::REG_SEQUENCE)
6679 .addImm(AMDGPU::sub0)
6681 .addImm(AMDGPU::sub1);
6686 B.buildInstr(AMDGPU::S_MOV_B64)
6691 B.buildInstr(AMDGPU::REG_SEQUENCE)
6694 .addImm(AMDGPU::sub0_sub1)
6696 .addImm(AMDGPU::sub2_sub3);
6703 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
6712 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
6719AMDGPUInstructionSelector::MUBUFAddressData
6720AMDGPUInstructionSelector::parseMUBUFAddress(
Register Src)
const {
6721 MUBUFAddressData
Data;
6727 std::tie(PtrBase,
Offset, std::ignore) =
6728 getPtrBaseWithConstantOffset(Src, *MRI);
6734 if (MachineInstr *InputAdd
6736 Data.N2 = InputAdd->getOperand(1).getReg();
6737 Data.N3 = InputAdd->getOperand(2).getReg();
6752bool AMDGPUInstructionSelector::shouldUseAddr64(MUBUFAddressData Addr)
const {
6758 const RegisterBank *N0Bank = RBI.getRegBank(Addr.N0, *MRI, TRI);
6759 return N0Bank->
getID() == AMDGPU::VGPRRegBankID;
6765void AMDGPUInstructionSelector::splitIllegalMUBUFOffset(
6767 if (TII.isLegalMUBUFImmOffset(ImmOffset))
6771 SOffset = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6772 B.buildInstr(AMDGPU::S_MOV_B32)
6778bool AMDGPUInstructionSelector::selectMUBUFAddr64Impl(
6783 if (!STI.hasAddr64() || STI.useFlatForGlobal())
6786 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
6787 if (!shouldUseAddr64(AddrData))
6793 Offset = AddrData.Offset;
6799 if (RBI.getRegBank(N2, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
6801 if (RBI.getRegBank(N3, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
6814 }
else if (RBI.getRegBank(N0, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
6825 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
6829bool AMDGPUInstructionSelector::selectMUBUFOffsetImpl(
6834 if (STI.useFlatForGlobal())
6837 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
6838 if (shouldUseAddr64(AddrData))
6844 Offset = AddrData.Offset;
6850 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
6855AMDGPUInstructionSelector::selectMUBUFAddr64(
MachineOperand &Root)
const {
6861 if (!selectMUBUFAddr64Impl(Root, VAddr, RSrcReg, SOffset,
Offset))
6867 [=](MachineInstrBuilder &MIB) {
6870 [=](MachineInstrBuilder &MIB) {
6873 [=](MachineInstrBuilder &MIB) {
6876 else if (STI.hasRestrictedSOffset())
6877 MIB.
addReg(AMDGPU::SGPR_NULL);
6881 [=](MachineInstrBuilder &MIB) {
6891AMDGPUInstructionSelector::selectMUBUFOffset(
MachineOperand &Root)
const {
6896 if (!selectMUBUFOffsetImpl(Root, RSrcReg, SOffset,
Offset))
6900 [=](MachineInstrBuilder &MIB) {
6903 [=](MachineInstrBuilder &MIB) {
6906 else if (STI.hasRestrictedSOffset())
6907 MIB.
addReg(AMDGPU::SGPR_NULL);
6919AMDGPUInstructionSelector::selectBUFSOffset(
MachineOperand &Root)
const {
6924 SOffset = AMDGPU::SGPR_NULL;
6926 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); }}};
6930static std::optional<uint64_t>
6934 if (!OffsetVal || !
isInt<32>(*OffsetVal))
6935 return std::nullopt;
6936 return Lo_32(*OffsetVal);
6940AMDGPUInstructionSelector::selectSMRDBufferImm(
MachineOperand &Root)
const {
6941 std::optional<uint64_t> OffsetVal =
6946 std::optional<int64_t> EncodedImm =
6951 return {{ [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedImm); } }};
6955AMDGPUInstructionSelector::selectSMRDBufferImm32(
MachineOperand &Root)
const {
6962 std::optional<int64_t> EncodedImm =
6967 return {{ [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedImm); } }};
6971AMDGPUInstructionSelector::selectSMRDBufferSgprImm(
MachineOperand &Root)
const {
6979 return std::nullopt;
6981 std::optional<int64_t> EncodedOffset =
6984 return std::nullopt;
6987 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); },
6988 [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedOffset); }}};
6991std::pair<Register, unsigned>
6992AMDGPUInstructionSelector::selectVOP3PMadMixModsImpl(
MachineOperand &Root,
6993 bool &Matched)
const {
6998 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
7008 const auto CheckAbsNeg = [&]() {
7013 std::tie(Src, ModsTmp) = selectVOP3ModsImpl(Src);
7044AMDGPUInstructionSelector::selectVOP3PMadMixModsExt(
7049 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
7054 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
7055 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
7060AMDGPUInstructionSelector::selectVOP3PMadMixMods(
MachineOperand &Root)
const {
7064 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
7067 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
7068 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
7072bool AMDGPUInstructionSelector::selectSBarrierSignalIsfirst(
7076 Register CCReg =
I.getOperand(0).getReg();
7081 BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM))
7082 .
addImm(
I.getOperand(2).getImm());
7086 I.eraseFromParent();
7087 return RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32_XM0_XEXECRegClass,
7091bool AMDGPUInstructionSelector::selectSGetBarrierState(
7095 const MachineOperand &BarOp =
I.getOperand(2);
7096 std::optional<int64_t> BarValImm =
7100 auto CopyMIB =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
7104 MachineInstrBuilder MIB;
7105 unsigned Opc = BarValImm ? AMDGPU::S_GET_BARRIER_STATE_IMM
7106 : AMDGPU::S_GET_BARRIER_STATE_M0;
7109 auto DstReg =
I.getOperand(0).getReg();
7110 const TargetRegisterClass *DstRC =
7111 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
7112 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
7118 I.eraseFromParent();
7123 if (HasInlineConst) {
7127 case Intrinsic::amdgcn_s_barrier_join:
7128 return AMDGPU::S_BARRIER_JOIN_IMM;
7129 case Intrinsic::amdgcn_s_wakeup_barrier:
7130 return AMDGPU::S_WAKEUP_BARRIER_IMM;
7131 case Intrinsic::amdgcn_s_get_named_barrier_state:
7132 return AMDGPU::S_GET_BARRIER_STATE_IMM;
7138 case Intrinsic::amdgcn_s_barrier_join:
7139 return AMDGPU::S_BARRIER_JOIN_M0;
7140 case Intrinsic::amdgcn_s_wakeup_barrier:
7141 return AMDGPU::S_WAKEUP_BARRIER_M0;
7142 case Intrinsic::amdgcn_s_get_named_barrier_state:
7143 return AMDGPU::S_GET_BARRIER_STATE_M0;
7148bool AMDGPUInstructionSelector::selectNamedBarrierInit(
7152 const MachineOperand &BarOp =
I.getOperand(1);
7153 const MachineOperand &CntOp =
I.getOperand(2);
7157 if (IntrID == Intrinsic::amdgcn_s_barrier_signal_var) {
7158 std::optional<int64_t> CntImm =
7160 if (CntImm && *CntImm == 0) {
7161 std::optional<int64_t> BarValImm =
7164 auto BarID = ((*BarValImm) >> 4) & 0x3F;
7165 BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_IMM))
7167 I.eraseFromParent();
7174 Register TmpReg0 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7180 Register TmpReg1 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7187 Register TmpReg2 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7193 Register TmpReg3 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7194 constexpr unsigned ShAmt = 16;
7200 Register TmpReg4 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7210 unsigned Opc = IntrID == Intrinsic::amdgcn_s_barrier_init
7211 ? AMDGPU::S_BARRIER_INIT_M0
7212 : AMDGPU::S_BARRIER_SIGNAL_M0;
7213 MachineInstrBuilder MIB;
7216 I.eraseFromParent();
7220bool AMDGPUInstructionSelector::selectNamedBarrierInst(
7224 MachineOperand BarOp = IntrID == Intrinsic::amdgcn_s_get_named_barrier_state
7227 std::optional<int64_t> BarValImm =
7232 Register TmpReg0 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7238 Register TmpReg1 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7244 auto CopyMIB =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
7249 MachineInstrBuilder MIB;
7253 if (IntrID == Intrinsic::amdgcn_s_get_named_barrier_state) {
7254 auto DstReg =
I.getOperand(0).getReg();
7255 const TargetRegisterClass *DstRC =
7256 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
7257 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
7263 auto BarId = ((*BarValImm) >> 4) & 0x3F;
7267 I.eraseFromParent();
7274 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
7275 "Expected G_CONSTANT");
7276 MIB.
addImm(
MI.getOperand(1).getCImm()->getSExtValue());
7282 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
7283 "Expected G_CONSTANT");
7284 MIB.
addImm(-
MI.getOperand(1).getCImm()->getSExtValue());
7290 const MachineOperand &
Op =
MI.getOperand(1);
7291 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT &&
OpIdx == -1);
7292 MIB.
addImm(
Op.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
7295void AMDGPUInstructionSelector::renderCountTrailingOnesImm(
7297 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
7298 "Expected G_CONSTANT");
7299 MIB.
addImm(
MI.getOperand(1).getCImm()->getValue().countTrailingOnes());
7307 const MachineOperand &
Op =
MI.getOperand(
OpIdx);
7324 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7328void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_0_0(
7330 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7335void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_0_1(
7337 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7343void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_1_0(
7345 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7350void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_1_1(
7352 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7358void AMDGPUInstructionSelector::renderDstSelToOpSelXForm(
7360 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7365void AMDGPUInstructionSelector::renderSrcSelToOpSelXForm(
7367 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7372void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_2_0(
7374 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7379void AMDGPUInstructionSelector::renderDstSelToOpSel3XFormXForm(
7381 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7390 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7399 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7406void AMDGPUInstructionSelector::renderExtractCpolSetGLC(
7408 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7409 const uint32_t Cpol =
MI.getOperand(
OpIdx).getImm() &
7424 const APFloat &APF =
MI.getOperand(1).getFPImm()->getValueAPF();
7426 assert(ExpVal != INT_MIN);
7444 if (
MI.getOperand(
OpIdx).getImm())
7446 MIB.
addImm((int64_t)Mods);
7453 if (
MI.getOperand(
OpIdx).getImm())
7455 MIB.
addImm((int64_t)Mods);
7461 unsigned Val =
MI.getOperand(
OpIdx).getImm();
7469 MIB.
addImm((int64_t)Mods);
7475 uint32_t
V =
MI.getOperand(2).getImm();
7478 if (!Subtarget->hasSafeCUPrefetch())
7484void AMDGPUInstructionSelector::renderScaledMAIIntrinsicOperand(
7486 unsigned Val =
MI.getOperand(
OpIdx).getImm();
7495bool AMDGPUInstructionSelector::isInlineImmediate(
const APInt &Imm)
const {
7496 return TII.isInlineConstant(Imm);
7499bool AMDGPUInstructionSelector::isInlineImmediate(
const APFloat &Imm)
const {
7500 return TII.isInlineConstant(Imm);
MachineInstrBuilder MachineInstrBuilder & DefMI
static unsigned getIntrinsicID(const SDNode *N)
#define GET_GLOBALISEL_PREDICATES_INIT
#define GET_GLOBALISEL_TEMPORARIES_INIT
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static Register getLegalRegBank(Register NewReg, Register RootReg, const AMDGPURegisterBankInfo &RBI, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const SIInstrInfo &TII)
static bool isShlHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is shift left with half bits, such as reg0:2n =G_SHL reg1:2n, CONST(n)
static bool isNoUnsignedWrap(MachineInstr *Addr)
static Register buildOffsetSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
unsigned getNamedBarrierOp(bool HasInlineConst, Intrinsic::ID IntrID)
static bool checkRB(Register Reg, unsigned int RBNo, const AMDGPURegisterBankInfo &RBI, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI)
static unsigned updateMods(SrcStatus HiStat, SrcStatus LoStat, unsigned Mods)
static bool isTruncHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is truncating to half, such as reg0:n = G_TRUNC reg1:2n
static Register getWaveAddress(const MachineInstr *Def)
static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)
static bool shouldUseAndMask(unsigned Size, unsigned &Mask)
static std::pair< unsigned, uint8_t > BitOp3_Op(Register R, SmallVectorImpl< Register > &Src, const MachineRegisterInfo &MRI)
static TypeClass isVectorOfTwoOrScalar(Register Reg, const MachineRegisterInfo &MRI)
static bool isLaneMaskFromSameBlock(Register Reg, MachineRegisterInfo &MRI, MachineBasicBlock *MBB)
static bool parseTexFail(uint64_t TexFailCtrl, bool &TFE, bool &LWE, bool &IsTexFail)
static void addZeroImm(MachineInstrBuilder &MIB)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
static bool isConstant(const MachineInstr &MI)
static bool isSameBitWidth(Register Reg1, Register Reg2, const MachineRegisterInfo &MRI)
static Register buildRegSequence(SmallVectorImpl< Register > &Elts, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
static Register buildRSRC(MachineIRBuilder &B, MachineRegisterInfo &MRI, uint32_t FormatLo, uint32_t FormatHi, Register BasePtr)
Return a resource descriptor for use with an arbitrary 64-bit pointer.
static bool isAsyncLDSDMA(Intrinsic::ID Intr)
static std::pair< Register, unsigned > computeIndirectRegIndex(MachineRegisterInfo &MRI, const SIRegisterInfo &TRI, const TargetRegisterClass *SuperRC, Register IdxReg, unsigned EltSize, GISelValueTracking &ValueTracking)
Return the register to use for the index value, and the subregister to use for the indirectly accesse...
static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64)
static std::pair< Register, SrcStatus > getLastSameOrNeg(Register Reg, const MachineRegisterInfo &MRI, SearchOptions SO, int MaxDepth=3)
static Register stripCopy(Register Reg, MachineRegisterInfo &MRI)
static std::optional< std::pair< Register, SrcStatus > > calcNextStatus(std::pair< Register, SrcStatus > Curr, const MachineRegisterInfo &MRI)
static Register stripBitCast(Register Reg, MachineRegisterInfo &MRI)
static std::optional< uint64_t > getConstantZext32Val(Register Reg, const MachineRegisterInfo &MRI)
Get an immediate that must be 32-bits, and treated as zero extended.
static bool isValidToPack(SrcStatus HiStat, SrcStatus LoStat, Register NewReg, Register RootReg, const SIInstrInfo &TII, const MachineRegisterInfo &MRI)
static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size, const GCNSubtarget &ST)
static SmallVector< std::pair< Register, SrcStatus > > getSrcStats(Register Reg, const MachineRegisterInfo &MRI, SearchOptions SO, int MaxDepth=3)
static bool isUnmergeHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test function, if the MI is reg0:n, reg1:n = G_UNMERGE_VALUES reg2:2n
static SrcStatus getNegStatus(Register Reg, SrcStatus S, const MachineRegisterInfo &MRI)
static bool isVCmpResult(Register Reg, MachineRegisterInfo &MRI)
static Register buildAddr64RSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
static bool isLshrHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is logic shift right with half bits, such as reg0:2n =G_LSHR reg1:2n,...
static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, SmallVectorImpl< Register > &Elts, Register &Src, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
This file declares the targeting of the InstructionSelector class for AMDGPU.
AMDGPU Register Bank Select
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool isAllZeros(StringRef Arr)
Return true if the array is empty or all zeros.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
Contains matchers for matching SSA Machine Instructions.
Machine Check Debug Module
This file declares the MachineIRBuilder class.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
MachineInstr unsigned OpIdx
static std::vector< std::pair< int, unsigned > > Swizzle(std::vector< std::pair< int, unsigned > > Src, R600InstrInfo::BankSwizzle Swz)
This is used to control valid status that current MI supports.
bool checkOptions(SrcStatus Stat) const
SearchOptions(Register Reg, const MachineRegisterInfo &MRI)
AMDGPUInstructionSelector(const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI, const AMDGPUTargetMachine &TM)
static const char * getName()
bool select(MachineInstr &I) override
Select the (possibly generic) instruction I to only use target-specific opcodes.
void setupMF(MachineFunction &MF, GISelValueTracking *VT, CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) override
Setup per-MF executor state.
uint32_t getLDSSize() const
LLVM_READONLY int getExactLog2Abs() const
Class for arbitrary precision integers.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
Get the array size.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
bool isFPPredicate() const
bool isIntPredicate() const
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
LLVM_ABI DILocation * get() const
Get the underlying DILocation.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
std::optional< SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > > ComplexRendererFns
virtual void setupMF(MachineFunction &mf, GISelValueTracking *vt, CodeGenCoverage *covinfo=nullptr, ProfileSummaryInfo *psi=nullptr, BlockFrequencyInfo *bfi=nullptr)
Setup per-MF executor state.
CodeGenCoverage * CoverageInfo
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
TypeSize getValue() const
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void setReturnAddressIsTaken(bool s)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Helper class to build MachineInstr.
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & setOperandDead(unsigned OpIdx) const
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
LocationSize getSize() const
Return the size in bytes of the memory reference.
unsigned getAddrSpace() const
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const ConstantInt * getCImm() const
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
ArrayRef< int > getShuffleMask() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
bool isInternalRead() const
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
const RegisterBank * getRegBankOrNull(Register Reg) const
Return the register bank of Reg, or null if Reg has not been assigned a register bank or has been ass...
LLVM_ABI Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
LLVM_ABI MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Analysis providing profile information.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
static bool isGenericOpcode(unsigned Opc)
unsigned getID() const
Return the register class ID number.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
@ BUFFER_RESOURCE
Address space for 128-bit buffer resources.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
LLVM_READONLY const MIMGG16MappingInfo * getMIMGG16MappingInfo(unsigned G)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isGFX12Plus(const MCSubtargetInfo &STI)
constexpr int64_t getNullPointerValue(unsigned AS)
Get the null pointer value for the given address space.
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
LLVM_READONLY int32_t getGlobalSaddrOp(uint32_t Opcode)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfo(unsigned DimEnum)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
Intrinsic::ID getIntrinsicID(const MachineInstr &I)
Return the intrinsic ID for opcodes with the G_AMDGPU_INTRIN_ prefix.
std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg, GISelValueTracking *ValueTracking=nullptr, bool CheckNUW=false)
Returns base register and constant offset.
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
IndexMode
ARM Index Modes.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(const APInt &RequestedValue)
Matches a constant equal to RequestedValue.
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
UnaryOp_match< SrcTy, TargetOpcode::COPY > m_Copy(SrcTy &&Src)
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_XOR, true > m_GXor(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_SEXT > m_GSExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_FPEXT > m_GFPExt(const SrcTy &Src)
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
ConstantMatch< APInt > m_ICst(APInt &Cst)
SpecificConstantMatch m_AllOnesInt()
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
ImplicitDefMatch m_GImplicitDef()
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
BinaryOp_match< LHS, RHS, TargetOpcode::G_ASHR, false > m_GAShr(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
SpecificRegisterMatch m_SpecificReg(Register RequestedReg)
Matches a register only if it is equal to RequestedReg.
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
UnaryOp_match< SrcTy, TargetOpcode::G_FABS > m_GFabs(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ANYEXT > m_GAnyExt(const SrcTy &Src)
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, TargetOpcode::G_MUL, true > m_GMul(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
NodeAddr< DefNode * > Def
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
FunctionAddr VTableAddr Value
LLVM_ABI bool isBuildVectorAllZeros(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndef=false)
Return true if the specified instruction is a G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC where all of the...
LLVM_ABI Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
PointerUnion< const TargetRegisterClass *, const RegisterBank * > RegClassOrRegBank
Convenient type to represent either a register class or a register bank.
LLVM_ABI const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI void constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
LLVM_ABI std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
FunctionAddr VTableAddr uintptr_t uintptr_t Data
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
LLVM_ABI std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
LLVM_ABI Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
constexpr RegState getUndefRegState(bool B)
@ Default
The result value is uniform if and only if all operands are uniform.
unsigned AtomicNoRetBaseOpcode
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false, bool SelfAdd=false)
Compute knownbits resulting from addition of LHS and RHS.
int64_t Offset
Offset - This is an offset from the base Value*.
PointerUnion< const Value *, const PseudoSourceValue * > V
This is the IR pointer value for the access, or it is null if unknown.