29#include "llvm/IR/IntrinsicsAMDGPU.h"
32#define DEBUG_TYPE "amdgpu-isel"
37#define GET_GLOBALISEL_IMPL
38#define AMDGPUSubtarget GCNSubtarget
39#include "AMDGPUGenGlobalISel.inc"
40#undef GET_GLOBALISEL_IMPL
46 : TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM),
49#include
"AMDGPUGenGlobalISel.inc"
52#include
"AMDGPUGenGlobalISel.inc"
64 MRI = &
MF.getRegInfo();
72 return Def->getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS
73 ? Def->getOperand(1).getReg()
83 auto &RegClassOrBank = MRI.getRegClassOrRegBank(
Reg);
84 const TargetRegisterClass *RC =
87 const LLT Ty = MRI.getType(
Reg);
91 return MRI.getVRegDef(
Reg)->getOpcode() != AMDGPU::G_TRUNC &&
96 return RB->
getID() == AMDGPU::VCCRegBankID;
99bool AMDGPUInstructionSelector::constrainCopyLikeIntrin(
MachineInstr &
MI,
100 unsigned NewOpc)
const {
101 MI.setDesc(TII.get(NewOpc));
105 MachineOperand &Dst =
MI.getOperand(0);
106 MachineOperand &Src =
MI.getOperand(1);
112 const TargetRegisterClass *DstRC
113 = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
114 const TargetRegisterClass *SrcRC
115 = TRI.getConstrainedRegClassForOperand(Src, *MRI);
116 if (!DstRC || DstRC != SrcRC)
119 if (!RBI.constrainGenericRegister(Dst.getReg(), *DstRC, *MRI) ||
120 !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, *MRI))
122 const MCInstrDesc &MCID =
MI.getDesc();
124 MI.getOperand(0).setIsEarlyClobber(
true);
129bool AMDGPUInstructionSelector::selectCOPY(
MachineInstr &
I)
const {
132 I.setDesc(TII.get(TargetOpcode::COPY));
134 const MachineOperand &Src =
I.getOperand(1);
135 MachineOperand &Dst =
I.getOperand(0);
139 if (isVCC(DstReg, *MRI)) {
140 if (SrcReg == AMDGPU::SCC) {
141 const TargetRegisterClass *RC
142 = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
145 return RBI.constrainGenericRegister(DstReg, *RC, *MRI);
148 if (!isVCC(SrcReg, *MRI)) {
150 if (!RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), *MRI))
153 const TargetRegisterClass *SrcRC
154 = TRI.getConstrainedRegClassForOperand(Src, *MRI);
156 std::optional<ValueAndVReg> ConstVal =
160 STI.isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
162 .
addImm(ConstVal->Value.getBoolValue() ? -1 : 0);
164 Register MaskedReg = MRI->createVirtualRegister(SrcRC);
171 assert(Subtarget->useRealTrue16Insts());
172 const int64_t NoMods = 0;
173 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_AND_B16_t16_e64), MaskedReg)
179 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CMP_NE_U16_t16_e64), DstReg)
186 bool IsSGPR = TRI.isSGPRClass(SrcRC);
187 unsigned AndOpc = IsSGPR ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32;
194 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
200 if (!MRI->getRegClassOrNull(SrcReg))
201 MRI->setRegClass(SrcReg, SrcRC);
206 const TargetRegisterClass *RC =
207 TRI.getConstrainedRegClassForOperand(Dst, *MRI);
208 if (RC && !RBI.constrainGenericRegister(DstReg, *RC, *MRI))
214 for (
const MachineOperand &MO :
I.operands()) {
215 if (MO.getReg().isPhysical())
218 const TargetRegisterClass *RC =
219 TRI.getConstrainedRegClassForOperand(MO, *MRI);
222 RBI.constrainGenericRegister(MO.getReg(), *RC, *MRI);
227bool AMDGPUInstructionSelector::selectCOPY_SCC_VCC(
MachineInstr &
I)
const {
230 Register VCCReg =
I.getOperand(1).getReg();
234 if (STI.hasScalarCompareEq64()) {
236 STI.isWave64() ? AMDGPU::S_CMP_LG_U64 : AMDGPU::S_CMP_LG_U32;
239 Register DeadDst = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
240 Cmp =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_OR_B64), DeadDst)
247 Register DstReg =
I.getOperand(0).getReg();
251 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
254bool AMDGPUInstructionSelector::selectCOPY_VCC_SCC(
MachineInstr &
I)
const {
258 Register DstReg =
I.getOperand(0).getReg();
259 Register SrcReg =
I.getOperand(1).getReg();
260 std::optional<ValueAndVReg> Arg =
264 const int64_t
Value = Arg->Value.getZExtValue();
266 unsigned Opcode = STI.isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
273 return RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), *MRI);
279 unsigned SelectOpcode =
280 STI.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
290bool AMDGPUInstructionSelector::selectReadAnyLane(
MachineInstr &
I)
const {
291 Register DstReg =
I.getOperand(0).getReg();
292 Register SrcReg =
I.getOperand(1).getReg();
297 auto RFL =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
305bool AMDGPUInstructionSelector::selectPHI(
MachineInstr &
I)
const {
306 const Register DefReg =
I.getOperand(0).getReg();
307 const LLT DefTy = MRI->getType(DefReg);
319 MRI->getRegClassOrRegBank(DefReg);
321 const TargetRegisterClass *DefRC =
330 DefRC = TRI.getRegClassForTypeOnBank(DefTy, RB);
339 for (
unsigned i = 1; i !=
I.getNumOperands(); i += 2) {
340 const Register SrcReg =
I.getOperand(i).getReg();
342 const RegisterBank *RB = MRI->getRegBankOrNull(SrcReg);
344 const LLT SrcTy = MRI->getType(SrcReg);
345 const TargetRegisterClass *SrcRC =
346 TRI.getRegClassForTypeOnBank(SrcTy, *RB);
347 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
352 I.setDesc(TII.get(TargetOpcode::PHI));
353 return RBI.constrainGenericRegister(DefReg, *DefRC, *MRI);
359 unsigned SubIdx)
const {
363 Register DstReg = MRI->createVirtualRegister(&SubRC);
366 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.
getSubReg(), SubIdx);
368 BuildMI(*BB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
394 return Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
396 return Is64 ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32;
398 return Is64 ? AMDGPU::S_XOR_B64 : AMDGPU::S_XOR_B32;
404bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(
MachineInstr &
I)
const {
405 Register DstReg =
I.getOperand(0).getReg();
406 unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI);
408 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
409 if (DstRB->
getID() != AMDGPU::SGPRRegBankID &&
410 DstRB->
getID() != AMDGPU::VCCRegBankID)
413 bool Is64 =
Size > 32 || (DstRB->
getID() == AMDGPU::VCCRegBankID &&
426bool AMDGPUInstructionSelector::selectG_ADD_SUB(
MachineInstr &
I)
const {
429 Register DstReg =
I.getOperand(0).getReg();
431 LLT Ty = MRI->getType(DstReg);
436 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
437 const bool IsSALU = DstRB->
getID() == AMDGPU::SGPRRegBankID;
438 const bool Sub =
I.getOpcode() == TargetOpcode::G_SUB;
442 const unsigned Opc =
Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
445 .
add(
I.getOperand(1))
446 .
add(
I.getOperand(2))
453 if (STI.hasAddNoCarryInsts()) {
454 const unsigned Opc =
Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64;
455 I.setDesc(TII.get(
Opc));
462 const unsigned Opc =
Sub ? AMDGPU::V_SUB_CO_U32_e64 : AMDGPU::V_ADD_CO_U32_e64;
464 Register UnusedCarry = MRI->createVirtualRegister(TRI.getWaveMaskRegClass());
468 .
add(
I.getOperand(1))
469 .
add(
I.getOperand(2))
476 assert(!
Sub &&
"illegal sub should not reach here");
478 const TargetRegisterClass &RC
479 = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass;
480 const TargetRegisterClass &HalfRC
481 = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass;
483 MachineOperand Lo1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub0));
484 MachineOperand Lo2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub0));
485 MachineOperand Hi1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub1));
486 MachineOperand Hi2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub1));
488 Register DstLo = MRI->createVirtualRegister(&HalfRC);
489 Register DstHi = MRI->createVirtualRegister(&HalfRC);
492 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
495 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
500 const TargetRegisterClass *CarryRC = TRI.getWaveMaskRegClass();
501 Register CarryReg = MRI->createVirtualRegister(CarryRC);
502 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_ADD_CO_U32_e64), DstLo)
507 MachineInstr *Addc =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_ADDC_U32_e64), DstHi)
517 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
524 if (!RBI.constrainGenericRegister(DstReg, RC, *MRI))
531bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE(
536 Register Dst0Reg =
I.getOperand(0).getReg();
537 Register Dst1Reg =
I.getOperand(1).getReg();
538 const bool IsAdd =
I.getOpcode() == AMDGPU::G_UADDO ||
539 I.getOpcode() == AMDGPU::G_UADDE;
540 const bool HasCarryIn =
I.getOpcode() == AMDGPU::G_UADDE ||
541 I.getOpcode() == AMDGPU::G_USUBE;
543 if (isVCC(Dst1Reg, *MRI)) {
544 unsigned NoCarryOpc =
545 IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
546 unsigned CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
547 I.setDesc(TII.get(HasCarryIn ? CarryOpc : NoCarryOpc));
554 Register Src0Reg =
I.getOperand(2).getReg();
555 Register Src1Reg =
I.getOperand(3).getReg();
558 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
559 .
addReg(
I.getOperand(4).getReg());
562 unsigned NoCarryOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
563 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
565 auto CarryInst =
BuildMI(*BB, &
I,
DL, TII.get(HasCarryIn ? CarryOpc : NoCarryOpc), Dst0Reg)
566 .
add(
I.getOperand(2))
567 .
add(
I.getOperand(3));
569 if (MRI->use_nodbg_empty(Dst1Reg)) {
570 CarryInst.setOperandDead(3);
572 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), Dst1Reg)
574 if (!MRI->getRegClassOrNull(Dst1Reg))
575 MRI->setRegClass(Dst1Reg, &AMDGPU::SReg_32RegClass);
578 if (!RBI.constrainGenericRegister(Dst0Reg, AMDGPU::SReg_32RegClass, *MRI) ||
579 !RBI.constrainGenericRegister(Src0Reg, AMDGPU::SReg_32RegClass, *MRI) ||
580 !RBI.constrainGenericRegister(Src1Reg, AMDGPU::SReg_32RegClass, *MRI))
584 !RBI.constrainGenericRegister(
I.getOperand(4).getReg(),
585 AMDGPU::SReg_32RegClass, *MRI))
592bool AMDGPUInstructionSelector::selectG_AMDGPU_MAD_64_32(
596 const bool IsUnsigned =
I.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;
597 bool UseNoCarry = Subtarget->hasMadU64U32NoCarry() &&
598 MRI->use_nodbg_empty(
I.getOperand(1).getReg());
601 if (Subtarget->hasMADIntraFwdBug())
602 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_gfx11_e64
603 : AMDGPU::V_MAD_I64_I32_gfx11_e64;
605 Opc = IsUnsigned ? AMDGPU::V_MAD_NC_U64_U32_e64
606 : AMDGPU::V_MAD_NC_I64_I32_e64;
608 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_e64 : AMDGPU::V_MAD_I64_I32_e64;
613 I.setDesc(TII.get(
Opc));
615 I.addImplicitDefUseOperands(*
MF);
616 I.getOperand(0).setIsEarlyClobber(
true);
622bool AMDGPUInstructionSelector::selectG_EXTRACT(
MachineInstr &
I)
const {
624 Register DstReg =
I.getOperand(0).getReg();
625 Register SrcReg =
I.getOperand(1).getReg();
626 LLT DstTy = MRI->getType(DstReg);
627 LLT SrcTy = MRI->getType(SrcReg);
632 unsigned Offset =
I.getOperand(2).getImm();
633 if (
Offset % 32 != 0 || DstSize > 128)
641 const TargetRegisterClass *DstRC =
642 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
643 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
646 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI);
647 const TargetRegisterClass *SrcRC =
648 TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank);
653 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubReg);
658 *SrcRC,
I.getOperand(1));
660 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::COPY), DstReg)
661 .
addReg(SrcReg, {}, SubReg);
667bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(
MachineInstr &
MI)
const {
668 MachineBasicBlock *BB =
MI.getParent();
670 LLT DstTy = MRI->getType(DstReg);
671 LLT SrcTy = MRI->getType(
MI.getOperand(1).getReg());
678 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
680 const TargetRegisterClass *DstRC =
681 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
685 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(DstRC, SrcSize / 8);
686 MachineInstrBuilder MIB =
687 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg);
688 for (
int I = 0,
E =
MI.getNumOperands() - 1;
I !=
E; ++
I) {
689 MachineOperand &Src =
MI.getOperand(
I + 1);
693 const TargetRegisterClass *SrcRC
694 = TRI.getConstrainedRegClassForOperand(Src, *MRI);
695 if (SrcRC && !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, *MRI))
699 if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
702 MI.eraseFromParent();
706bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(
MachineInstr &
MI)
const {
707 MachineBasicBlock *BB =
MI.getParent();
708 const int NumDst =
MI.getNumOperands() - 1;
710 MachineOperand &Src =
MI.getOperand(NumDst);
714 LLT DstTy = MRI->getType(DstReg0);
715 LLT SrcTy = MRI->getType(SrcReg);
720 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI);
722 const TargetRegisterClass *SrcRC =
723 TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank);
724 if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
730 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(SrcRC, DstSize / 8);
731 for (
int I = 0,
E = NumDst;
I !=
E; ++
I) {
732 MachineOperand &Dst =
MI.getOperand(
I);
734 if (SrcBank->
getID() == AMDGPU::SGPRRegBankID &&
735 SubRegs[
I] == AMDGPU::hi16) {
736 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_LSHR_B32), Dst.getReg())
740 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::COPY), Dst.getReg())
741 .
addReg(SrcReg, {}, SubRegs[
I]);
745 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegs[
I]);
746 if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
749 const TargetRegisterClass *DstRC =
750 TRI.getConstrainedRegClassForOperand(Dst, *MRI);
751 if (DstRC && !RBI.constrainGenericRegister(Dst.getReg(), *DstRC, *MRI))
755 MI.eraseFromParent();
759bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR(
MachineInstr &
MI)
const {
760 assert(
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC ||
761 MI.getOpcode() == AMDGPU::G_BUILD_VECTOR);
765 LLT SrcTy = MRI->getType(Src0);
769 if (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR && SrcSize >= 32) {
770 return selectG_MERGE_VALUES(
MI);
777 (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC &&
781 const RegisterBank *DstBank = RBI.getRegBank(Dst, *MRI, TRI);
782 if (DstBank->
getID() == AMDGPU::AGPRRegBankID)
785 assert(DstBank->
getID() == AMDGPU::SGPRRegBankID ||
786 DstBank->
getID() == AMDGPU::VGPRRegBankID);
787 const bool IsVector = DstBank->
getID() == AMDGPU::VGPRRegBankID;
790 MachineBasicBlock *BB =
MI.getParent();
800 const int64_t K0 = ConstSrc0->Value.getSExtValue();
801 const int64_t K1 = ConstSrc1->Value.getSExtValue();
802 uint32_t Lo16 =
static_cast<uint32_t
>(K0) & 0xffff;
803 uint32_t Hi16 =
static_cast<uint32_t
>(K1) & 0xffff;
804 uint32_t
Imm = Lo16 | (Hi16 << 16);
809 MI.eraseFromParent();
810 return RBI.constrainGenericRegister(Dst, AMDGPU::VGPR_32RegClass, *MRI);
815 MI.eraseFromParent();
816 return RBI.constrainGenericRegister(Dst, AMDGPU::SReg_32RegClass, *MRI);
827 if (Src1Def->
getOpcode() == AMDGPU::G_IMPLICIT_DEF) {
828 MI.setDesc(TII.get(AMDGPU::COPY));
831 IsVector ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
832 return RBI.constrainGenericRegister(Dst, RC, *MRI) &&
833 RBI.constrainGenericRegister(Src0, RC, *MRI);
838 Register TmpReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
839 auto MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_AND_B32_e32), TmpReg)
844 MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_LSHL_OR_B32_e64), Dst)
850 MI.eraseFromParent();
875 unsigned Opc = AMDGPU::S_PACK_LL_B32_B16;
876 if (Shift0 && Shift1) {
877 Opc = AMDGPU::S_PACK_HH_B32_B16;
878 MI.getOperand(1).setReg(ShiftSrc0);
879 MI.getOperand(2).setReg(ShiftSrc1);
881 Opc = AMDGPU::S_PACK_LH_B32_B16;
882 MI.getOperand(2).setReg(ShiftSrc1);
886 if (ConstSrc1 && ConstSrc1->Value == 0) {
888 auto MIB =
BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_LSHR_B32), Dst)
893 MI.eraseFromParent();
897 if (STI.hasSPackHL()) {
898 Opc = AMDGPU::S_PACK_HL_B32_B16;
899 MI.getOperand(1).setReg(ShiftSrc0);
903 MI.setDesc(TII.get(
Opc));
908bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(
MachineInstr &
I)
const {
909 const MachineOperand &MO =
I.getOperand(0);
913 const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, *MRI);
914 if ((!RC && !MRI->getRegBankOrNull(MO.
getReg())) ||
915 (RC && RBI.constrainGenericRegister(MO.
getReg(), *RC, *MRI))) {
916 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
923bool AMDGPUInstructionSelector::selectG_INSERT(
MachineInstr &
I)
const {
926 Register DstReg =
I.getOperand(0).getReg();
927 Register Src0Reg =
I.getOperand(1).getReg();
928 Register Src1Reg =
I.getOperand(2).getReg();
929 LLT Src1Ty = MRI->getType(Src1Reg);
931 unsigned DstSize = MRI->getType(DstReg).getSizeInBits();
934 int64_t
Offset =
I.getOperand(3).getImm();
937 if (
Offset % 32 != 0 || InsSize % 32 != 0)
944 unsigned SubReg = TRI.getSubRegFromChannel(
Offset / 32, InsSize / 32);
945 if (SubReg == AMDGPU::NoSubRegister)
948 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
949 const TargetRegisterClass *DstRC =
950 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
954 const RegisterBank *Src0Bank = RBI.getRegBank(Src0Reg, *MRI, TRI);
955 const RegisterBank *Src1Bank = RBI.getRegBank(Src1Reg, *MRI, TRI);
956 const TargetRegisterClass *Src0RC =
957 TRI.getRegClassForSizeOnBank(DstSize, *Src0Bank);
958 const TargetRegisterClass *Src1RC =
959 TRI.getRegClassForSizeOnBank(InsSize, *Src1Bank);
963 Src0RC = TRI.getSubClassWithSubReg(Src0RC, SubReg);
964 if (!Src0RC || !Src1RC)
967 if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
968 !RBI.constrainGenericRegister(Src0Reg, *Src0RC, *MRI) ||
969 !RBI.constrainGenericRegister(Src1Reg, *Src1RC, *MRI))
973 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::INSERT_SUBREG), DstReg)
982bool AMDGPUInstructionSelector::selectG_SBFX_UBFX(
MachineInstr &
MI)
const {
985 Register OffsetReg =
MI.getOperand(2).getReg();
986 Register WidthReg =
MI.getOperand(3).getReg();
988 assert(RBI.getRegBank(DstReg, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID &&
989 "scalar BFX instructions are expanded in regbankselect");
990 assert(MRI->getType(
MI.getOperand(0).getReg()).getSizeInBits() == 32 &&
991 "64-bit vector BFX instructions are expanded in regbankselect");
994 MachineBasicBlock *
MBB =
MI.getParent();
996 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SBFX;
997 unsigned Opc = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
1002 MI.eraseFromParent();
1007bool AMDGPUInstructionSelector::selectInterpP1F16(
MachineInstr &
MI)
const {
1008 if (STI.getLDSBankCount() != 16)
1014 if (!RBI.constrainGenericRegister(M0Val, AMDGPU::SReg_32RegClass, *MRI) ||
1015 !RBI.constrainGenericRegister(Dst, AMDGPU::VGPR_32RegClass, *MRI) ||
1016 !RBI.constrainGenericRegister(Src0, AMDGPU::VGPR_32RegClass, *MRI))
1026 Register InterpMov = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1028 MachineBasicBlock *
MBB =
MI.getParent();
1032 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_INTERP_MOV_F32), InterpMov)
1035 .
addImm(
MI.getOperand(3).getImm());
1048 MI.eraseFromParent();
1057bool AMDGPUInstructionSelector::selectWritelane(
MachineInstr &
MI)
const {
1059 if (STI.getConstantBusLimit(AMDGPU::V_WRITELANE_B32) > 1)
1062 MachineBasicBlock *
MBB =
MI.getParent();
1066 Register LaneSelect =
MI.getOperand(3).getReg();
1069 auto MIB =
BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_WRITELANE_B32), VDst);
1071 std::optional<ValueAndVReg> ConstSelect =
1077 MIB.
addImm(ConstSelect->Value.getSExtValue() &
1080 std::optional<ValueAndVReg> ConstVal =
1086 STI.hasInv2PiInlineImm())) {
1087 MIB.
addImm(ConstVal->Value.getSExtValue());
1095 RBI.constrainGenericRegister(LaneSelect, AMDGPU::SReg_32_XM0RegClass, *MRI);
1097 BuildMI(*
MBB, *MIB,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
1105 MI.eraseFromParent();
1112bool AMDGPUInstructionSelector::selectDivScale(
MachineInstr &
MI)
const {
1116 LLT Ty = MRI->getType(Dst0);
1119 Opc = AMDGPU::V_DIV_SCALE_F32_e64;
1121 Opc = AMDGPU::V_DIV_SCALE_F64_e64;
1128 MachineBasicBlock *
MBB =
MI.getParent();
1132 unsigned ChooseDenom =
MI.getOperand(5).getImm();
1134 Register Src0 = ChooseDenom != 0 ? Numer : Denom;
1147 MI.eraseFromParent();
1152bool AMDGPUInstructionSelector::selectG_INTRINSIC(
MachineInstr &
I)
const {
1154 switch (IntrinsicID) {
1155 case Intrinsic::amdgcn_if_break: {
1160 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))
1161 .
add(
I.getOperand(0))
1162 .
add(
I.getOperand(2))
1163 .
add(
I.getOperand(3));
1165 Register DstReg =
I.getOperand(0).getReg();
1166 Register Src0Reg =
I.getOperand(2).getReg();
1167 Register Src1Reg =
I.getOperand(3).getReg();
1169 I.eraseFromParent();
1172 MRI->setRegClass(
Reg, TRI.getWaveMaskRegClass());
1176 case Intrinsic::amdgcn_interp_p1_f16:
1177 return selectInterpP1F16(
I);
1178 case Intrinsic::amdgcn_wqm:
1179 return constrainCopyLikeIntrin(
I, AMDGPU::WQM);
1180 case Intrinsic::amdgcn_softwqm:
1181 return constrainCopyLikeIntrin(
I, AMDGPU::SOFT_WQM);
1182 case Intrinsic::amdgcn_strict_wwm:
1183 case Intrinsic::amdgcn_wwm:
1184 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WWM);
1185 case Intrinsic::amdgcn_strict_wqm:
1186 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WQM);
1187 case Intrinsic::amdgcn_writelane:
1188 return selectWritelane(
I);
1189 case Intrinsic::amdgcn_div_scale:
1190 return selectDivScale(
I);
1191 case Intrinsic::amdgcn_icmp:
1192 case Intrinsic::amdgcn_fcmp:
1195 return selectIntrinsicCmp(
I);
1196 case Intrinsic::amdgcn_ballot:
1197 return selectBallot(
I);
1198 case Intrinsic::amdgcn_reloc_constant:
1199 return selectRelocConstant(
I);
1200 case Intrinsic::amdgcn_groupstaticsize:
1201 return selectGroupStaticSize(
I);
1202 case Intrinsic::returnaddress:
1203 return selectReturnAddress(
I);
1204 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
1205 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
1206 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
1207 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
1208 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
1209 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
1210 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
1211 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
1212 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
1213 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
1214 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
1215 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
1216 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
1217 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
1218 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:
1219 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:
1220 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:
1221 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:
1222 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:
1223 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:
1224 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:
1225 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:
1226 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:
1227 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:
1228 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:
1229 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
1230 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
1231 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8:
1232 return selectSMFMACIntrin(
I);
1233 case Intrinsic::amdgcn_permlane16_swap:
1234 case Intrinsic::amdgcn_permlane32_swap:
1235 return selectPermlaneSwapIntrin(
I, IntrinsicID);
1236 case Intrinsic::amdgcn_wave_shuffle:
1237 return selectWaveShuffleIntrin(
I);
1248 if (
Size == 16 && !ST.has16BitInsts())
1251 const auto Select = [&](
unsigned S16Opc,
unsigned TrueS16Opc,
1252 unsigned FakeS16Opc,
unsigned S32Opc,
1255 return ST.hasTrue16BitInsts()
1256 ? ST.useRealTrue16Insts() ? TrueS16Opc : FakeS16Opc
1267 return Select(AMDGPU::V_CMP_NE_U16_e64, AMDGPU::V_CMP_NE_U16_t16_e64,
1268 AMDGPU::V_CMP_NE_U16_fake16_e64, AMDGPU::V_CMP_NE_U32_e64,
1269 AMDGPU::V_CMP_NE_U64_e64);
1271 return Select(AMDGPU::V_CMP_EQ_U16_e64, AMDGPU::V_CMP_EQ_U16_t16_e64,
1272 AMDGPU::V_CMP_EQ_U16_fake16_e64, AMDGPU::V_CMP_EQ_U32_e64,
1273 AMDGPU::V_CMP_EQ_U64_e64);
1275 return Select(AMDGPU::V_CMP_GT_I16_e64, AMDGPU::V_CMP_GT_I16_t16_e64,
1276 AMDGPU::V_CMP_GT_I16_fake16_e64, AMDGPU::V_CMP_GT_I32_e64,
1277 AMDGPU::V_CMP_GT_I64_e64);
1279 return Select(AMDGPU::V_CMP_GE_I16_e64, AMDGPU::V_CMP_GE_I16_t16_e64,
1280 AMDGPU::V_CMP_GE_I16_fake16_e64, AMDGPU::V_CMP_GE_I32_e64,
1281 AMDGPU::V_CMP_GE_I64_e64);
1283 return Select(AMDGPU::V_CMP_LT_I16_e64, AMDGPU::V_CMP_LT_I16_t16_e64,
1284 AMDGPU::V_CMP_LT_I16_fake16_e64, AMDGPU::V_CMP_LT_I32_e64,
1285 AMDGPU::V_CMP_LT_I64_e64);
1287 return Select(AMDGPU::V_CMP_LE_I16_e64, AMDGPU::V_CMP_LE_I16_t16_e64,
1288 AMDGPU::V_CMP_LE_I16_fake16_e64, AMDGPU::V_CMP_LE_I32_e64,
1289 AMDGPU::V_CMP_LE_I64_e64);
1291 return Select(AMDGPU::V_CMP_GT_U16_e64, AMDGPU::V_CMP_GT_U16_t16_e64,
1292 AMDGPU::V_CMP_GT_U16_fake16_e64, AMDGPU::V_CMP_GT_U32_e64,
1293 AMDGPU::V_CMP_GT_U64_e64);
1295 return Select(AMDGPU::V_CMP_GE_U16_e64, AMDGPU::V_CMP_GE_U16_t16_e64,
1296 AMDGPU::V_CMP_GE_U16_fake16_e64, AMDGPU::V_CMP_GE_U32_e64,
1297 AMDGPU::V_CMP_GE_U64_e64);
1299 return Select(AMDGPU::V_CMP_LT_U16_e64, AMDGPU::V_CMP_LT_U16_t16_e64,
1300 AMDGPU::V_CMP_LT_U16_fake16_e64, AMDGPU::V_CMP_LT_U32_e64,
1301 AMDGPU::V_CMP_LT_U64_e64);
1303 return Select(AMDGPU::V_CMP_LE_U16_e64, AMDGPU::V_CMP_LE_U16_t16_e64,
1304 AMDGPU::V_CMP_LE_U16_fake16_e64, AMDGPU::V_CMP_LE_U32_e64,
1305 AMDGPU::V_CMP_LE_U64_e64);
1308 return Select(AMDGPU::V_CMP_EQ_F16_e64, AMDGPU::V_CMP_EQ_F16_t16_e64,
1309 AMDGPU::V_CMP_EQ_F16_fake16_e64, AMDGPU::V_CMP_EQ_F32_e64,
1310 AMDGPU::V_CMP_EQ_F64_e64);
1312 return Select(AMDGPU::V_CMP_GT_F16_e64, AMDGPU::V_CMP_GT_F16_t16_e64,
1313 AMDGPU::V_CMP_GT_F16_fake16_e64, AMDGPU::V_CMP_GT_F32_e64,
1314 AMDGPU::V_CMP_GT_F64_e64);
1316 return Select(AMDGPU::V_CMP_GE_F16_e64, AMDGPU::V_CMP_GE_F16_t16_e64,
1317 AMDGPU::V_CMP_GE_F16_fake16_e64, AMDGPU::V_CMP_GE_F32_e64,
1318 AMDGPU::V_CMP_GE_F64_e64);
1320 return Select(AMDGPU::V_CMP_LT_F16_e64, AMDGPU::V_CMP_LT_F16_t16_e64,
1321 AMDGPU::V_CMP_LT_F16_fake16_e64, AMDGPU::V_CMP_LT_F32_e64,
1322 AMDGPU::V_CMP_LT_F64_e64);
1324 return Select(AMDGPU::V_CMP_LE_F16_e64, AMDGPU::V_CMP_LE_F16_t16_e64,
1325 AMDGPU::V_CMP_LE_F16_fake16_e64, AMDGPU::V_CMP_LE_F32_e64,
1326 AMDGPU::V_CMP_LE_F64_e64);
1328 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1329 AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
1330 AMDGPU::V_CMP_NEQ_F64_e64);
1332 return Select(AMDGPU::V_CMP_O_F16_e64, AMDGPU::V_CMP_O_F16_t16_e64,
1333 AMDGPU::V_CMP_O_F16_fake16_e64, AMDGPU::V_CMP_O_F32_e64,
1334 AMDGPU::V_CMP_O_F64_e64);
1336 return Select(AMDGPU::V_CMP_U_F16_e64, AMDGPU::V_CMP_U_F16_t16_e64,
1337 AMDGPU::V_CMP_U_F16_fake16_e64, AMDGPU::V_CMP_U_F32_e64,
1338 AMDGPU::V_CMP_U_F64_e64);
1340 return Select(AMDGPU::V_CMP_NLG_F16_e64, AMDGPU::V_CMP_NLG_F16_t16_e64,
1341 AMDGPU::V_CMP_NLG_F16_fake16_e64, AMDGPU::V_CMP_NLG_F32_e64,
1342 AMDGPU::V_CMP_NLG_F64_e64);
1344 return Select(AMDGPU::V_CMP_NLE_F16_e64, AMDGPU::V_CMP_NLE_F16_t16_e64,
1345 AMDGPU::V_CMP_NLE_F16_fake16_e64, AMDGPU::V_CMP_NLE_F32_e64,
1346 AMDGPU::V_CMP_NLE_F64_e64);
1348 return Select(AMDGPU::V_CMP_NLT_F16_e64, AMDGPU::V_CMP_NLT_F16_t16_e64,
1349 AMDGPU::V_CMP_NLT_F16_fake16_e64, AMDGPU::V_CMP_NLT_F32_e64,
1350 AMDGPU::V_CMP_NLT_F64_e64);
1352 return Select(AMDGPU::V_CMP_NGE_F16_e64, AMDGPU::V_CMP_NGE_F16_t16_e64,
1353 AMDGPU::V_CMP_NGE_F16_fake16_e64, AMDGPU::V_CMP_NGE_F32_e64,
1354 AMDGPU::V_CMP_NGE_F64_e64);
1356 return Select(AMDGPU::V_CMP_NGT_F16_e64, AMDGPU::V_CMP_NGT_F16_t16_e64,
1357 AMDGPU::V_CMP_NGT_F16_fake16_e64, AMDGPU::V_CMP_NGT_F32_e64,
1358 AMDGPU::V_CMP_NGT_F64_e64);
1360 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1361 AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
1362 AMDGPU::V_CMP_NEQ_F64_e64);
1364 return Select(AMDGPU::V_CMP_TRU_F16_e64, AMDGPU::V_CMP_TRU_F16_t16_e64,
1365 AMDGPU::V_CMP_TRU_F16_fake16_e64, AMDGPU::V_CMP_TRU_F32_e64,
1366 AMDGPU::V_CMP_TRU_F64_e64);
1368 return Select(AMDGPU::V_CMP_F_F16_e64, AMDGPU::V_CMP_F_F16_t16_e64,
1369 AMDGPU::V_CMP_F_F16_fake16_e64, AMDGPU::V_CMP_F_F32_e64,
1370 AMDGPU::V_CMP_F_F64_e64);
1375 unsigned Size)
const {
1377 if (!STI.hasScalarCompareEq64())
1382 return AMDGPU::S_CMP_LG_U64;
1384 return AMDGPU::S_CMP_EQ_U64;
1393 return AMDGPU::S_CMP_LG_U32;
1395 return AMDGPU::S_CMP_EQ_U32;
1397 return AMDGPU::S_CMP_GT_I32;
1399 return AMDGPU::S_CMP_GE_I32;
1401 return AMDGPU::S_CMP_LT_I32;
1403 return AMDGPU::S_CMP_LE_I32;
1405 return AMDGPU::S_CMP_GT_U32;
1407 return AMDGPU::S_CMP_GE_U32;
1409 return AMDGPU::S_CMP_LT_U32;
1411 return AMDGPU::S_CMP_LE_U32;
1413 return AMDGPU::S_CMP_EQ_F32;
1415 return AMDGPU::S_CMP_GT_F32;
1417 return AMDGPU::S_CMP_GE_F32;
1419 return AMDGPU::S_CMP_LT_F32;
1421 return AMDGPU::S_CMP_LE_F32;
1423 return AMDGPU::S_CMP_LG_F32;
1425 return AMDGPU::S_CMP_O_F32;
1427 return AMDGPU::S_CMP_U_F32;
1429 return AMDGPU::S_CMP_NLG_F32;
1431 return AMDGPU::S_CMP_NLE_F32;
1433 return AMDGPU::S_CMP_NLT_F32;
1435 return AMDGPU::S_CMP_NGE_F32;
1437 return AMDGPU::S_CMP_NGT_F32;
1439 return AMDGPU::S_CMP_NEQ_F32;
1446 if (!STI.hasSALUFloatInsts())
1451 return AMDGPU::S_CMP_EQ_F16;
1453 return AMDGPU::S_CMP_GT_F16;
1455 return AMDGPU::S_CMP_GE_F16;
1457 return AMDGPU::S_CMP_LT_F16;
1459 return AMDGPU::S_CMP_LE_F16;
1461 return AMDGPU::S_CMP_LG_F16;
1463 return AMDGPU::S_CMP_O_F16;
1465 return AMDGPU::S_CMP_U_F16;
1467 return AMDGPU::S_CMP_NLG_F16;
1469 return AMDGPU::S_CMP_NLE_F16;
1471 return AMDGPU::S_CMP_NLT_F16;
1473 return AMDGPU::S_CMP_NGE_F16;
1475 return AMDGPU::S_CMP_NGT_F16;
1477 return AMDGPU::S_CMP_NEQ_F16;
1486bool AMDGPUInstructionSelector::selectG_ICMP_or_FCMP(
MachineInstr &
I)
const {
1491 Register SrcReg =
I.getOperand(2).getReg();
1492 unsigned Size = RBI.getSizeInBits(SrcReg, *MRI, TRI);
1496 Register CCReg =
I.getOperand(0).getReg();
1497 if (!isVCC(CCReg, *MRI)) {
1498 int Opcode = getS_CMPOpcode(Pred,
Size);
1501 MachineInstr *ICmp =
BuildMI(*BB, &
I,
DL, TII.get(Opcode))
1502 .
add(
I.getOperand(2))
1503 .
add(
I.getOperand(3));
1504 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CCReg)
1508 RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, *MRI);
1509 I.eraseFromParent();
1513 if (
I.getOpcode() == AMDGPU::G_FCMP)
1520 MachineInstrBuilder ICmp;
1523 ICmp =
BuildMI(*BB, &
I,
DL, TII.get(Opcode),
I.getOperand(0).getReg())
1525 .
add(
I.getOperand(2))
1527 .
add(
I.getOperand(3))
1530 ICmp =
BuildMI(*BB, &
I,
DL, TII.get(Opcode),
I.getOperand(0).getReg())
1531 .
add(
I.getOperand(2))
1532 .
add(
I.getOperand(3));
1536 *TRI.getBoolRC(), *MRI);
1538 I.eraseFromParent();
1542bool AMDGPUInstructionSelector::selectIntrinsicCmp(
MachineInstr &
I)
const {
1543 Register Dst =
I.getOperand(0).getReg();
1544 if (isVCC(Dst, *MRI))
1547 LLT DstTy = MRI->getType(Dst);
1553 Register SrcReg =
I.getOperand(2).getReg();
1554 unsigned Size = RBI.getSizeInBits(SrcReg, *MRI, TRI);
1562 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), Dst);
1563 I.eraseFromParent();
1564 return RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI);
1571 MachineInstrBuilder SelectedMI;
1572 MachineOperand &
LHS =
I.getOperand(2);
1573 MachineOperand &
RHS =
I.getOperand(3);
1574 auto [Src0, Src0Mods] = selectVOP3ModsImpl(
LHS.getReg());
1575 auto [Src1, Src1Mods] = selectVOP3ModsImpl(
RHS.getReg());
1577 copyToVGPRIfSrcFolded(Src0, Src0Mods,
LHS, &
I,
true);
1579 copyToVGPRIfSrcFolded(Src1, Src1Mods,
RHS, &
I,
true);
1580 SelectedMI =
BuildMI(*BB, &
I,
DL, TII.get(Opcode), Dst);
1582 SelectedMI.
addImm(Src0Mods);
1583 SelectedMI.
addReg(Src0Reg);
1585 SelectedMI.
addImm(Src1Mods);
1586 SelectedMI.
addReg(Src1Reg);
1592 RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI);
1595 I.eraseFromParent();
1606 if (
MI->getParent() !=
MBB)
1610 if (
MI->getOpcode() == AMDGPU::COPY) {
1613 if (DstRB && SrcRB && DstRB->
getID() == AMDGPU::VCCRegBankID &&
1614 SrcRB->getID() == AMDGPU::SGPRRegBankID)
1631bool AMDGPUInstructionSelector::selectBallot(
MachineInstr &
I)
const {
1634 Register DstReg =
I.getOperand(0).getReg();
1635 Register SrcReg =
I.getOperand(2).getReg();
1636 const unsigned BallotSize = MRI->getType(DstReg).getSizeInBits();
1637 const unsigned WaveSize = STI.getWavefrontSize();
1641 if (BallotSize != WaveSize && (BallotSize != 64 || WaveSize != 32))
1644 std::optional<ValueAndVReg> Arg =
1649 if (BallotSize != WaveSize) {
1650 Dst = MRI->createVirtualRegister(TRI.getBoolRC());
1654 const int64_t
Value = Arg->Value.getZExtValue();
1657 unsigned Opcode = WaveSize == 64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
1664 if (!RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI))
1670 if (!RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI))
1674 unsigned AndOpc = WaveSize == 64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
1684 if (BallotSize != WaveSize) {
1685 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1687 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
1694 I.eraseFromParent();
1698bool AMDGPUInstructionSelector::selectRelocConstant(
MachineInstr &
I)
const {
1699 Register DstReg =
I.getOperand(0).getReg();
1700 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
1701 const TargetRegisterClass *DstRC = TRI.getRegClassForSizeOnBank(32, *DstBank);
1702 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
1705 const bool IsVALU = DstBank->
getID() == AMDGPU::VGPRRegBankID;
1707 Module *
M =
MF->getFunction().getParent();
1708 const MDNode *
Metadata =
I.getOperand(2).getMetadata();
1715 TII.get(IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32), DstReg)
1718 I.eraseFromParent();
1722bool AMDGPUInstructionSelector::selectGroupStaticSize(
MachineInstr &
I)
const {
1725 Register DstReg =
I.getOperand(0).getReg();
1726 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
1727 unsigned Mov = DstRB->
getID() == AMDGPU::SGPRRegBankID ?
1728 AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1736 const SIMachineFunctionInfo *MFI =
MF->getInfo<SIMachineFunctionInfo>();
1739 Module *
M =
MF->getFunction().getParent();
1740 const GlobalValue *GV =
1745 I.eraseFromParent();
1750bool AMDGPUInstructionSelector::selectReturnAddress(
MachineInstr &
I)
const {
1755 MachineOperand &Dst =
I.getOperand(0);
1757 unsigned Depth =
I.getOperand(2).getImm();
1759 const TargetRegisterClass *RC
1760 = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
1762 !RBI.constrainGenericRegister(DstReg, *RC, *MRI))
1767 MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction()) {
1770 I.eraseFromParent();
1774 MachineFrameInfo &MFI =
MF.getFrameInfo();
1779 Register ReturnAddrReg = TRI.getReturnAddressReg(
MF);
1781 AMDGPU::SReg_64RegClass,
DL);
1784 I.eraseFromParent();
1788bool AMDGPUInstructionSelector::selectEndCfIntrinsic(
MachineInstr &
MI)
const {
1791 MachineBasicBlock *BB =
MI.getParent();
1792 BuildMI(*BB, &
MI,
MI.getDebugLoc(), TII.get(AMDGPU::SI_END_CF))
1793 .
add(
MI.getOperand(1));
1796 MI.eraseFromParent();
1798 if (!MRI->getRegClassOrNull(
Reg))
1799 MRI->setRegClass(
Reg, TRI.getWaveMaskRegClass());
1803bool AMDGPUInstructionSelector::selectDSOrderedIntrinsic(
1805 MachineBasicBlock *
MBB =
MI.getParent();
1809 unsigned IndexOperand =
MI.getOperand(7).getImm();
1810 bool WaveRelease =
MI.getOperand(8).getImm() != 0;
1811 bool WaveDone =
MI.getOperand(9).getImm() != 0;
1813 if (WaveDone && !WaveRelease) {
1817 Fn,
"ds_ordered_count: wave_done requires wave_release",
DL));
1820 unsigned OrderedCountIndex = IndexOperand & 0x3f;
1821 IndexOperand &= ~0x3f;
1822 unsigned CountDw = 0;
1825 CountDw = (IndexOperand >> 24) & 0xf;
1826 IndexOperand &= ~(0xf << 24);
1828 if (CountDw < 1 || CountDw > 4) {
1831 Fn,
"ds_ordered_count: dword count must be between 1 and 4",
DL));
1839 Fn,
"ds_ordered_count: bad index operand",
DL));
1842 unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1;
1845 unsigned Offset0 = OrderedCountIndex << 2;
1846 unsigned Offset1 = WaveRelease | (WaveDone << 1) | (Instruction << 4);
1849 Offset1 |= (CountDw - 1) << 6;
1852 Offset1 |= ShaderType << 2;
1854 unsigned Offset = Offset0 | (Offset1 << 8);
1862 MachineInstrBuilder
DS =
1863 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::DS_ORDERED_COUNT), DstReg)
1868 if (!RBI.constrainGenericRegister(M0Val, AMDGPU::SReg_32RegClass, *MRI))
1872 MI.eraseFromParent();
1878 case Intrinsic::amdgcn_ds_gws_init:
1879 return AMDGPU::DS_GWS_INIT;
1880 case Intrinsic::amdgcn_ds_gws_barrier:
1881 return AMDGPU::DS_GWS_BARRIER;
1882 case Intrinsic::amdgcn_ds_gws_sema_v:
1883 return AMDGPU::DS_GWS_SEMA_V;
1884 case Intrinsic::amdgcn_ds_gws_sema_br:
1885 return AMDGPU::DS_GWS_SEMA_BR;
1886 case Intrinsic::amdgcn_ds_gws_sema_p:
1887 return AMDGPU::DS_GWS_SEMA_P;
1888 case Intrinsic::amdgcn_ds_gws_sema_release_all:
1889 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
1895bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(
MachineInstr &
MI,
1897 if (!STI.hasGWS() || (IID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
1898 !STI.hasGWSSemaReleaseAll()))
1902 const bool HasVSrc =
MI.getNumOperands() == 3;
1903 assert(HasVSrc ||
MI.getNumOperands() == 2);
1905 Register BaseOffset =
MI.getOperand(HasVSrc ? 2 : 1).getReg();
1906 const RegisterBank *OffsetRB = RBI.getRegBank(BaseOffset, *MRI, TRI);
1907 if (OffsetRB->
getID() != AMDGPU::SGPRRegBankID)
1913 MachineBasicBlock *
MBB =
MI.getParent();
1916 MachineInstr *Readfirstlane =
nullptr;
1921 if (OffsetDef->
getOpcode() == AMDGPU::V_READFIRSTLANE_B32) {
1922 Readfirstlane = OffsetDef;
1927 if (OffsetDef->
getOpcode() == AMDGPU::G_CONSTANT) {
1937 std::tie(BaseOffset, ImmOffset) =
1940 if (Readfirstlane) {
1943 if (!RBI.constrainGenericRegister(BaseOffset, AMDGPU::VGPR_32RegClass, *MRI))
1949 if (!RBI.constrainGenericRegister(BaseOffset,
1950 AMDGPU::SReg_32RegClass, *MRI))
1954 Register M0Base = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1969 const MCInstrDesc &InstrDesc = TII.get(
Opc);
1974 int Data0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
1975 const TargetRegisterClass *DataRC = TII.getRegClass(InstrDesc, Data0Idx);
1976 const TargetRegisterClass *SubRC =
1977 TRI.getSubRegisterClass(DataRC, AMDGPU::sub0);
1981 if (!RBI.constrainGenericRegister(VSrc, *DataRC, *MRI))
1991 Register DataReg = MRI->createVirtualRegister(DataRC);
1992 if (!RBI.constrainGenericRegister(VSrc, *SubRC, *MRI))
1995 Register UndefReg = MRI->createVirtualRegister(SubRC);
2014 MI.eraseFromParent();
2018bool AMDGPUInstructionSelector::selectDSAppendConsume(
MachineInstr &
MI,
2019 bool IsAppend)
const {
2020 Register PtrBase =
MI.getOperand(2).getReg();
2021 LLT PtrTy = MRI->getType(PtrBase);
2025 std::tie(PtrBase,
Offset) = selectDS1Addr1OffsetImpl(
MI.getOperand(2));
2028 if (!isDSOffsetLegal(PtrBase,
Offset)) {
2029 PtrBase =
MI.getOperand(2).getReg();
2033 MachineBasicBlock *
MBB =
MI.getParent();
2035 const unsigned Opc = IsAppend ? AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
2039 if (!RBI.constrainGenericRegister(PtrBase, AMDGPU::SReg_32RegClass, *MRI))
2046 MI.eraseFromParent();
2051bool AMDGPUInstructionSelector::selectInitWholeWave(
MachineInstr &
MI)
const {
2052 MachineFunction *
MF =
MI.getMF();
2053 SIMachineFunctionInfo *MFInfo =
MF->getInfo<SIMachineFunctionInfo>();
2064 TFE = TexFailCtrl & 0x1;
2066 LWE = TexFailCtrl & 0x2;
2069 return TexFailCtrl == 0;
2072bool AMDGPUInstructionSelector::selectImageIntrinsic(
2074 MachineBasicBlock *
MBB =
MI.getParent();
2080 Register ResultDef =
MI.getOperand(0).getReg();
2081 if (MRI->use_nodbg_empty(ResultDef))
2085 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
2093 const unsigned ArgOffset =
MI.getNumExplicitDefs() + 1;
2095 Register VDataIn = AMDGPU::NoRegister;
2096 Register VDataOut = AMDGPU::NoRegister;
2098 int NumVDataDwords = -1;
2099 bool IsD16 =
MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16 ||
2100 MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16;
2106 Unorm =
MI.getOperand(ArgOffset + Intr->
UnormIndex).getImm() != 0;
2110 bool IsTexFail =
false;
2112 TFE, LWE, IsTexFail))
2115 const int Flags =
MI.getOperand(ArgOffset + Intr->
NumArgs).getImm();
2116 const bool IsA16 = (
Flags & 1) != 0;
2117 const bool IsG16 = (
Flags & 2) != 0;
2120 if (IsA16 && !STI.hasG16() && !IsG16)
2124 unsigned DMaskLanes = 0;
2126 if (BaseOpcode->
Atomic) {
2128 VDataOut =
MI.getOperand(0).getReg();
2129 VDataIn =
MI.getOperand(2).getReg();
2130 LLT Ty = MRI->getType(VDataIn);
2133 const bool Is64Bit = BaseOpcode->
AtomicX2 ?
2138 assert(
MI.getOperand(3).getReg() == AMDGPU::NoRegister);
2140 DMask = Is64Bit ? 0xf : 0x3;
2141 NumVDataDwords = Is64Bit ? 4 : 2;
2143 DMask = Is64Bit ? 0x3 : 0x1;
2144 NumVDataDwords = Is64Bit ? 2 : 1;
2147 DMask =
MI.getOperand(ArgOffset + Intr->
DMaskIndex).getImm();
2150 if (BaseOpcode->
Store) {
2151 VDataIn =
MI.getOperand(1).getReg();
2152 VDataTy = MRI->getType(VDataIn);
2157 VDataOut =
MI.getOperand(0).getReg();
2158 VDataTy = MRI->getType(VDataOut);
2159 NumVDataDwords = DMaskLanes;
2161 if (IsD16 && !STI.hasUnpackedD16VMem())
2162 NumVDataDwords = (DMaskLanes + 1) / 2;
2167 if (Subtarget->hasG16() && IsG16) {
2168 const AMDGPU::MIMGG16MappingInfo *G16MappingInfo =
2171 IntrOpcode = G16MappingInfo->
G16;
2175 assert((!IsTexFail || DMaskLanes >= 1) &&
"should have legalized this");
2185 int NumVAddrRegs = 0;
2186 int NumVAddrDwords = 0;
2189 MachineOperand &AddrOp =
MI.getOperand(ArgOffset +
I);
2190 if (!AddrOp.
isReg())
2198 NumVAddrDwords += (MRI->getType(Addr).getSizeInBits() + 31) / 32;
2205 NumVAddrRegs != 1 &&
2206 (STI.hasPartialNSAEncoding() ? NumVAddrDwords >= NumVAddrRegs
2207 : NumVAddrDwords == NumVAddrRegs);
2208 if (UseNSA && !STI.hasFeature(AMDGPU::FeatureNSAEncoding)) {
2219 NumVDataDwords, NumVAddrDwords);
2220 }
else if (IsGFX11Plus) {
2222 UseNSA ? AMDGPU::MIMGEncGfx11NSA
2223 : AMDGPU::MIMGEncGfx11Default,
2224 NumVDataDwords, NumVAddrDwords);
2225 }
else if (IsGFX10Plus) {
2227 UseNSA ? AMDGPU::MIMGEncGfx10NSA
2228 : AMDGPU::MIMGEncGfx10Default,
2229 NumVDataDwords, NumVAddrDwords);
2231 if (Subtarget->hasGFX90AInsts()) {
2233 NumVDataDwords, NumVAddrDwords);
2237 <<
"requested image instruction is not supported on this GPU\n");
2244 NumVDataDwords, NumVAddrDwords);
2247 NumVDataDwords, NumVAddrDwords);
2257 const bool Is64 = MRI->getType(VDataOut).getSizeInBits() == 64;
2259 Register TmpReg = MRI->createVirtualRegister(
2260 Is64 ? &AMDGPU::VReg_128RegClass : &AMDGPU::VReg_64RegClass);
2261 unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
2264 if (!MRI->use_empty(VDataOut)) {
2277 for (
int I = 0;
I != NumVAddrRegs; ++
I) {
2278 MachineOperand &SrcOp =
MI.getOperand(ArgOffset + Intr->
VAddrStart +
I);
2279 if (SrcOp.
isReg()) {
2298 STI.hasFeature(AMDGPU::FeatureR128A16) ? -1 : 0);
2300 MIB.
addImm(IsA16 ? -1 : 0);
2302 if (!Subtarget->hasGFX90AInsts()) {
2314 MIB.
addImm(IsD16 ? -1 : 0);
2316 MI.eraseFromParent();
2318 TII.enforceOperandRCAlignment(*MIB, AMDGPU::OpName::vaddr);
2324bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic(
2330 MachineBasicBlock *
MBB =
MI.getParent();
2335 unsigned Offset =
MI.getOperand(6).getImm();
2339 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2340 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
2341 Opc = AMDGPU::DS_BVH_STACK_RTN_B32;
2343 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
2344 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP1_RTN_B32;
2346 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
2347 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP2_RTN_B64;
2359 MI.eraseFromParent();
2364bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
2367 switch (IntrinsicID) {
2368 case Intrinsic::amdgcn_end_cf:
2369 return selectEndCfIntrinsic(
I);
2370 case Intrinsic::amdgcn_ds_ordered_add:
2371 case Intrinsic::amdgcn_ds_ordered_swap:
2372 return selectDSOrderedIntrinsic(
I, IntrinsicID);
2373 case Intrinsic::amdgcn_ds_gws_init:
2374 case Intrinsic::amdgcn_ds_gws_barrier:
2375 case Intrinsic::amdgcn_ds_gws_sema_v:
2376 case Intrinsic::amdgcn_ds_gws_sema_br:
2377 case Intrinsic::amdgcn_ds_gws_sema_p:
2378 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2379 return selectDSGWSIntrinsic(
I, IntrinsicID);
2380 case Intrinsic::amdgcn_ds_append:
2381 return selectDSAppendConsume(
I,
true);
2382 case Intrinsic::amdgcn_ds_consume:
2383 return selectDSAppendConsume(
I,
false);
2384 case Intrinsic::amdgcn_init_whole_wave:
2385 return selectInitWholeWave(
I);
2386 case Intrinsic::amdgcn_raw_buffer_load_lds:
2387 case Intrinsic::amdgcn_raw_buffer_load_async_lds:
2388 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
2389 case Intrinsic::amdgcn_raw_ptr_buffer_load_async_lds:
2390 case Intrinsic::amdgcn_struct_buffer_load_lds:
2391 case Intrinsic::amdgcn_struct_buffer_load_async_lds:
2392 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds:
2393 case Intrinsic::amdgcn_struct_ptr_buffer_load_async_lds:
2394 return selectBufferLoadLds(
I);
2399 case Intrinsic::amdgcn_load_to_lds:
2400 case Intrinsic::amdgcn_load_async_to_lds:
2401 case Intrinsic::amdgcn_global_load_lds:
2402 case Intrinsic::amdgcn_global_load_async_lds:
2403 return selectGlobalLoadLds(
I);
2404 case Intrinsic::amdgcn_tensor_load_to_lds:
2405 case Intrinsic::amdgcn_tensor_store_from_lds:
2406 return selectTensorLoadStore(
I, IntrinsicID);
2407 case Intrinsic::amdgcn_asyncmark:
2408 case Intrinsic::amdgcn_wait_asyncmark:
2409 if (!Subtarget->hasAsyncMark())
2412 case Intrinsic::amdgcn_exp_compr:
2413 if (!STI.hasCompressedExport()) {
2415 F.getContext().diagnose(
2416 DiagnosticInfoUnsupported(
F,
"intrinsic not supported on subtarget",
2421 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2422 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
2423 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
2424 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
2425 return selectDSBvhStackIntrinsic(
I);
2426 case Intrinsic::amdgcn_s_alloc_vgpr: {
2432 Register ResReg =
I.getOperand(0).getReg();
2434 MachineInstr *AllocMI =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::S_ALLOC_VGPR))
2435 .
add(
I.getOperand(2));
2438 I.eraseFromParent();
2440 return RBI.constrainGenericRegister(ResReg, AMDGPU::SReg_32RegClass, *MRI);
2442 case Intrinsic::amdgcn_s_barrier_init:
2443 case Intrinsic::amdgcn_s_barrier_signal_var:
2444 return selectNamedBarrierInit(
I, IntrinsicID);
2445 case Intrinsic::amdgcn_s_wakeup_barrier: {
2446 if (!STI.hasSWakeupBarrier()) {
2448 F.getContext().diagnose(
2449 DiagnosticInfoUnsupported(
F,
"intrinsic not supported on subtarget",
2453 return selectNamedBarrierInst(
I, IntrinsicID);
2455 case Intrinsic::amdgcn_s_barrier_join:
2456 case Intrinsic::amdgcn_s_get_named_barrier_state:
2457 return selectNamedBarrierInst(
I, IntrinsicID);
2458 case Intrinsic::amdgcn_s_get_barrier_state:
2459 return selectSGetBarrierState(
I, IntrinsicID);
2460 case Intrinsic::amdgcn_s_barrier_signal_isfirst:
2461 return selectSBarrierSignalIsfirst(
I, IntrinsicID);
2466bool AMDGPUInstructionSelector::selectG_SELECT(
MachineInstr &
I)
const {
2473 Register DstReg =
I.getOperand(0).getReg();
2474 unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI);
2476 const MachineOperand &CCOp =
I.getOperand(1);
2478 if (!isVCC(CCReg, *MRI)) {
2479 unsigned SelectOpcode =
Size == 64 ? AMDGPU::S_CSELECT_B64 :
2480 AMDGPU::S_CSELECT_B32;
2481 MachineInstr *CopySCC =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
2487 if (!MRI->getRegClassOrNull(CCReg))
2488 MRI->setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, *MRI));
2490 .
add(
I.getOperand(2))
2491 .
add(
I.getOperand(3));
2495 I.eraseFromParent();
2504 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
2506 .
add(
I.getOperand(3))
2508 .
add(
I.getOperand(2))
2509 .
add(
I.getOperand(1));
2512 I.eraseFromParent();
2516bool AMDGPUInstructionSelector::selectG_TRUNC(
MachineInstr &
I)
const {
2517 Register DstReg =
I.getOperand(0).getReg();
2518 Register SrcReg =
I.getOperand(1).getReg();
2519 const LLT DstTy = MRI->getType(DstReg);
2520 const LLT SrcTy = MRI->getType(SrcReg);
2523 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
2524 const RegisterBank *DstRB;
2530 DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
2535 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2540 const TargetRegisterClass *SrcRC =
2541 TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB);
2542 const TargetRegisterClass *DstRC =
2543 TRI.getRegClassForSizeOnBank(DstSize, *DstRB);
2544 if (!SrcRC || !DstRC)
2547 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
2548 !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI)) {
2553 if (DstRC == &AMDGPU::VGPR_16RegClass && SrcSize == 32) {
2554 assert(STI.useRealTrue16Insts());
2558 .
addReg(SrcReg, {}, AMDGPU::lo16);
2559 I.eraseFromParent();
2567 Register LoReg = MRI->createVirtualRegister(DstRC);
2568 Register HiReg = MRI->createVirtualRegister(DstRC);
2570 .
addReg(SrcReg, {}, AMDGPU::sub0);
2572 .
addReg(SrcReg, {}, AMDGPU::sub1);
2574 if (IsVALU && STI.hasSDWA()) {
2577 MachineInstr *MovSDWA =
2578 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg)
2588 Register TmpReg0 = MRI->createVirtualRegister(DstRC);
2589 Register TmpReg1 = MRI->createVirtualRegister(DstRC);
2590 Register ImmReg = MRI->createVirtualRegister(DstRC);
2592 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), TmpReg0)
2602 unsigned MovOpc = IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
2603 unsigned AndOpc = IsVALU ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
2604 unsigned OrOpc = IsVALU ? AMDGPU::V_OR_B32_e64 : AMDGPU::S_OR_B32;
2616 And.setOperandDead(3);
2617 Or.setOperandDead(3);
2621 I.eraseFromParent();
2629 unsigned SubRegIdx = DstSize < 32
2630 ?
static_cast<unsigned>(AMDGPU::sub0)
2631 : TRI.getSubRegFromChannel(0, DstSize / 32);
2632 if (SubRegIdx == AMDGPU::NoSubRegister)
2637 const TargetRegisterClass *SrcWithSubRC
2638 = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
2642 if (SrcWithSubRC != SrcRC) {
2643 if (!RBI.constrainGenericRegister(SrcReg, *SrcWithSubRC, *MRI))
2647 I.getOperand(1).setSubReg(SubRegIdx);
2650 I.setDesc(TII.get(TargetOpcode::COPY));
2657 int SignedMask =
static_cast<int>(Mask);
2658 return SignedMask >= -16 && SignedMask <= 64;
2662const RegisterBank *AMDGPUInstructionSelector::getArtifactRegBank(
2671 return &RBI.getRegBankFromRegClass(*RC, LLT());
2675bool AMDGPUInstructionSelector::selectG_SZA_EXT(
MachineInstr &
I)
const {
2676 bool InReg =
I.getOpcode() == AMDGPU::G_SEXT_INREG;
2677 bool Signed =
I.getOpcode() == AMDGPU::G_SEXT || InReg;
2680 const Register DstReg =
I.getOperand(0).getReg();
2681 const Register SrcReg =
I.getOperand(1).getReg();
2683 const LLT DstTy = MRI->getType(DstReg);
2684 const LLT SrcTy = MRI->getType(SrcReg);
2685 const unsigned SrcSize =
I.getOpcode() == AMDGPU::G_SEXT_INREG ?
2692 const RegisterBank *SrcBank = getArtifactRegBank(SrcReg, *MRI, TRI);
2695 if (
I.getOpcode() == AMDGPU::G_ANYEXT) {
2697 return selectCOPY(
I);
2699 const TargetRegisterClass *SrcRC =
2700 TRI.getRegClassForTypeOnBank(SrcTy, *SrcBank);
2701 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
2702 const TargetRegisterClass *DstRC =
2703 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
2705 Register UndefReg = MRI->createVirtualRegister(SrcRC);
2706 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2712 I.eraseFromParent();
2714 return RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) &&
2715 RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI);
2718 if (SrcBank->
getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
2724 MachineInstr *ExtI =
2728 I.eraseFromParent();
2733 const unsigned BFE =
Signed ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2734 MachineInstr *ExtI =
2739 I.eraseFromParent();
2744 if (SrcBank->
getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
2745 const TargetRegisterClass &SrcRC = InReg && DstSize > 32 ?
2746 AMDGPU::SReg_64RegClass : AMDGPU::SReg_32RegClass;
2747 if (!RBI.constrainGenericRegister(SrcReg, SrcRC, *MRI))
2750 if (
Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
2751 const unsigned SextOpc = SrcSize == 8 ?
2752 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
2755 I.eraseFromParent();
2756 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
2761 if (DstSize > 32 && SrcSize == 32) {
2762 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2763 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2766 .
addReg(SrcReg, {}, SubReg)
2774 .
addReg(SrcReg, {}, SubReg)
2775 .addImm(AMDGPU::sub0)
2778 I.eraseFromParent();
2779 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass,
2783 const unsigned BFE64 =
Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
2784 const unsigned BFE32 =
Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2787 if (DstSize > 32 && (SrcSize <= 32 || InReg)) {
2789 Register ExtReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
2790 Register UndefReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2791 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2793 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2795 .
addReg(SrcReg, {}, SubReg)
2796 .addImm(AMDGPU::sub0)
2804 I.eraseFromParent();
2805 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, *MRI);
2820 I.eraseFromParent();
2821 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
2845 if (Unmerge->getNumDefs() == 2 && Unmerge->getOperand(1).getReg() == In &&
2847 Out = Unmerge->getSourceReg();
2867 if (Shuffle->
getOpcode() != AMDGPU::G_SHUFFLE_VECTOR)
2874 assert(Mask.size() == 2);
2876 if (Mask[0] == 1 && Mask[1] <= 1) {
2884bool AMDGPUInstructionSelector::selectG_FPEXT(
MachineInstr &
I)
const {
2885 if (!Subtarget->hasSALUFloatInsts())
2888 Register Dst =
I.getOperand(0).getReg();
2889 const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
2890 if (DstRB->
getID() != AMDGPU::SGPRRegBankID)
2893 Register Src =
I.getOperand(1).getReg();
2899 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_CVT_HI_F32_F16), Dst)
2901 I.eraseFromParent();
2902 return RBI.constrainGenericRegister(Dst, AMDGPU::SReg_32RegClass, *MRI);
2909bool AMDGPUInstructionSelector::selectG_FNEG(
MachineInstr &
MI)
const {
2922 const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
2923 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2928 MachineInstr *Fabs =
getOpcodeDef(TargetOpcode::G_FABS, Src, *MRI);
2932 if (!RBI.constrainGenericRegister(Src, AMDGPU::SReg_64RegClass, *MRI) ||
2933 !RBI.constrainGenericRegister(Dst, AMDGPU::SReg_64RegClass, *MRI))
2936 MachineBasicBlock *BB =
MI.getParent();
2938 Register LoReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2939 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2940 Register ConstReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2941 Register OpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2943 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2944 .
addReg(Src, {}, AMDGPU::sub0);
2945 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2946 .
addReg(Src, {}, AMDGPU::sub1);
2947 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2951 unsigned Opc = Fabs ? AMDGPU::S_OR_B32 : AMDGPU::S_XOR_B32;
2956 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2961 MI.eraseFromParent();
2966bool AMDGPUInstructionSelector::selectG_FABS(
MachineInstr &
MI)
const {
2968 const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
2969 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2974 MachineBasicBlock *BB =
MI.getParent();
2976 Register LoReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2977 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2978 Register ConstReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2979 Register OpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2981 if (!RBI.constrainGenericRegister(Src, AMDGPU::SReg_64RegClass, *MRI) ||
2982 !RBI.constrainGenericRegister(Dst, AMDGPU::SReg_64RegClass, *MRI))
2985 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2986 .
addReg(Src, {}, AMDGPU::sub0);
2987 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2988 .
addReg(Src, {}, AMDGPU::sub1);
2989 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2994 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_AND_B32), OpReg)
2998 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
3004 MI.eraseFromParent();
3009 return MI.getOpcode() == TargetOpcode::G_CONSTANT;
3012void AMDGPUInstructionSelector::getAddrModeInfo(
const MachineInstr &Load,
3015 unsigned OpNo =
Load.getOpcode() == AMDGPU::G_PREFETCH ? 0 : 1;
3016 const MachineInstr *PtrMI =
3017 MRI.getUniqueVRegDef(
Load.getOperand(OpNo).getReg());
3021 if (PtrMI->
getOpcode() != TargetOpcode::G_PTR_ADD)
3026 for (
unsigned i = 1; i != 3; ++i) {
3027 const MachineOperand &GEPOp = PtrMI->
getOperand(i);
3028 const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.
getReg());
3033 assert(GEPInfo.Imm == 0);
3037 const RegisterBank *OpBank = RBI.getRegBank(GEPOp.
getReg(), MRI, TRI);
3038 if (OpBank->
getID() == AMDGPU::SGPRRegBankID)
3039 GEPInfo.SgprParts.push_back(GEPOp.
getReg());
3041 GEPInfo.VgprParts.push_back(GEPOp.
getReg());
3045 getAddrModeInfo(*PtrMI, MRI, AddrInfo);
3048bool AMDGPUInstructionSelector::isSGPR(
Register Reg)
const {
3049 return RBI.getRegBank(
Reg, *MRI, TRI)->getID() == AMDGPU::SGPRRegBankID;
3052bool AMDGPUInstructionSelector::isInstrUniform(
const MachineInstr &
MI)
const {
3053 if (!
MI.hasOneMemOperand())
3056 const MachineMemOperand *MMO = *
MI.memoperands_begin();
3069 if (
MI.getOpcode() == AMDGPU::G_PREFETCH)
3070 return RBI.getRegBank(
MI.getOperand(0).getReg(), *MRI, TRI)->getID() ==
3071 AMDGPU::SGPRRegBankID;
3074 return I &&
I->getMetadata(
"amdgpu.uniform");
3078 for (
const GEPInfo &GEPInfo : AddrInfo) {
3079 if (!GEPInfo.VgprParts.empty())
3085void AMDGPUInstructionSelector::initM0(
MachineInstr &
I)
const {
3086 const LLT PtrTy = MRI->getType(
I.getOperand(1).getReg());
3089 STI.ldsRequiresM0Init()) {
3093 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
3098bool AMDGPUInstructionSelector::selectG_LOAD_STORE_ATOMICRMW(
3105 if (
Reg.isPhysical())
3109 const unsigned Opcode =
MI.getOpcode();
3111 if (Opcode == AMDGPU::COPY)
3114 if (Opcode == AMDGPU::G_AND || Opcode == AMDGPU::G_OR ||
3115 Opcode == AMDGPU::G_XOR)
3120 return GI->is(Intrinsic::amdgcn_class);
3122 return Opcode == AMDGPU::G_ICMP || Opcode == AMDGPU::G_FCMP;
3125bool AMDGPUInstructionSelector::selectG_BRCOND(
MachineInstr &
I)
const {
3127 MachineOperand &CondOp =
I.getOperand(0);
3133 const TargetRegisterClass *ConstrainRC;
3140 if (!isVCC(CondReg, *MRI)) {
3144 CondPhysReg = AMDGPU::SCC;
3145 BrOpcode = AMDGPU::S_CBRANCH_SCC1;
3146 ConstrainRC = &AMDGPU::SReg_32RegClass;
3153 const bool Is64 = STI.isWave64();
3154 const unsigned Opcode = Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
3155 const Register Exec = Is64 ? AMDGPU::EXEC : AMDGPU::EXEC_LO;
3157 Register TmpReg = MRI->createVirtualRegister(TRI.getBoolRC());
3158 BuildMI(*BB, &
I,
DL, TII.get(Opcode), TmpReg)
3165 CondPhysReg = TRI.getVCC();
3166 BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
3167 ConstrainRC = TRI.getBoolRC();
3170 if (!MRI->getRegClassOrNull(CondReg))
3171 MRI->setRegClass(CondReg, ConstrainRC);
3173 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CondPhysReg)
3176 .
addMBB(
I.getOperand(1).getMBB());
3178 I.eraseFromParent();
3182bool AMDGPUInstructionSelector::selectG_GLOBAL_VALUE(
3184 Register DstReg =
I.getOperand(0).getReg();
3185 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3186 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3187 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32));
3191 return RBI.constrainGenericRegister(
3192 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, *MRI);
3195bool AMDGPUInstructionSelector::selectG_PTRMASK(
MachineInstr &
I)
const {
3196 Register DstReg =
I.getOperand(0).getReg();
3197 Register SrcReg =
I.getOperand(1).getReg();
3198 Register MaskReg =
I.getOperand(2).getReg();
3199 LLT Ty = MRI->getType(DstReg);
3200 LLT MaskTy = MRI->getType(MaskReg);
3204 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3205 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
3206 const RegisterBank *MaskRB = RBI.getRegBank(MaskReg, *MRI, TRI);
3207 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3213 APInt MaskOnes =
VT->getKnownOnes(MaskReg).zext(64);
3217 const bool CanCopyLow32 = (MaskOnes & MaskLo32) == MaskLo32;
3218 const bool CanCopyHi32 = (MaskOnes & MaskHi32) == MaskHi32;
3221 !CanCopyLow32 && !CanCopyHi32) {
3222 auto MIB =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_AND_B64), DstReg)
3226 I.eraseFromParent();
3231 unsigned NewOpc = IsVGPR ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
3232 const TargetRegisterClass &RegRC
3233 = IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
3235 const TargetRegisterClass *DstRC = TRI.getRegClassForTypeOnBank(Ty, *DstRB);
3236 const TargetRegisterClass *SrcRC = TRI.getRegClassForTypeOnBank(Ty, *SrcRB);
3237 const TargetRegisterClass *MaskRC =
3238 TRI.getRegClassForTypeOnBank(MaskTy, *MaskRB);
3240 if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
3241 !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
3242 !RBI.constrainGenericRegister(MaskReg, *MaskRC, *MRI))
3247 "ptrmask should have been narrowed during legalize");
3249 auto NewOp =
BuildMI(*BB, &
I,
DL, TII.get(NewOpc), DstReg)
3255 I.eraseFromParent();
3259 Register HiReg = MRI->createVirtualRegister(&RegRC);
3260 Register LoReg = MRI->createVirtualRegister(&RegRC);
3263 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), LoReg)
3264 .
addReg(SrcReg, {}, AMDGPU::sub0);
3265 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), HiReg)
3266 .
addReg(SrcReg, {}, AMDGPU::sub1);
3275 Register MaskLo = MRI->createVirtualRegister(&RegRC);
3276 MaskedLo = MRI->createVirtualRegister(&RegRC);
3278 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskLo)
3279 .
addReg(MaskReg, {}, AMDGPU::sub0);
3280 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedLo)
3289 Register MaskHi = MRI->createVirtualRegister(&RegRC);
3290 MaskedHi = MRI->createVirtualRegister(&RegRC);
3292 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskHi)
3293 .
addReg(MaskReg, {}, AMDGPU::sub1);
3294 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedHi)
3299 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
3304 I.eraseFromParent();
3310static std::pair<Register, unsigned>
3317 std::tie(IdxBaseReg,
Offset) =
3319 if (IdxBaseReg == AMDGPU::NoRegister) {
3323 IdxBaseReg = IdxReg;
3330 if (
static_cast<unsigned>(
Offset) >= SubRegs.
size())
3331 return std::pair(IdxReg, SubRegs[0]);
3332 return std::pair(IdxBaseReg, SubRegs[
Offset]);
3335bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT(
3341 LLT DstTy = MRI->getType(DstReg);
3342 LLT SrcTy = MRI->getType(SrcReg);
3344 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3345 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
3346 const RegisterBank *IdxRB = RBI.getRegBank(IdxReg, *MRI, TRI);
3350 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3353 const TargetRegisterClass *SrcRC =
3354 TRI.getRegClassForTypeOnBank(SrcTy, *SrcRB);
3355 const TargetRegisterClass *DstRC =
3356 TRI.getRegClassForTypeOnBank(DstTy, *DstRB);
3357 if (!SrcRC || !DstRC)
3359 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
3360 !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
3361 !RBI.constrainGenericRegister(IdxReg, AMDGPU::SReg_32RegClass, *MRI))
3364 MachineBasicBlock *BB =
MI.getParent();
3372 if (SrcRB->
getID() == AMDGPU::SGPRRegBankID) {
3376 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3379 unsigned Opc = Is64 ? AMDGPU::S_MOVRELS_B64 : AMDGPU::S_MOVRELS_B32;
3381 .
addReg(SrcReg, {}, SubReg)
3383 MI.eraseFromParent();
3390 if (!STI.useVGPRIndexMode()) {
3391 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3393 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::V_MOVRELS_B32_e32), DstReg)
3394 .
addReg(SrcReg, {}, SubReg)
3396 MI.eraseFromParent();
3400 const MCInstrDesc &GPRIDXDesc =
3401 TII.getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*SrcRC),
true);
3407 MI.eraseFromParent();
3412bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT(
3419 LLT VecTy = MRI->getType(DstReg);
3420 LLT ValTy = MRI->getType(ValReg);
3424 const RegisterBank *VecRB = RBI.getRegBank(VecReg, *MRI, TRI);
3425 const RegisterBank *ValRB = RBI.getRegBank(ValReg, *MRI, TRI);
3426 const RegisterBank *IdxRB = RBI.getRegBank(IdxReg, *MRI, TRI);
3432 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3435 const TargetRegisterClass *VecRC =
3436 TRI.getRegClassForTypeOnBank(VecTy, *VecRB);
3437 const TargetRegisterClass *ValRC =
3438 TRI.getRegClassForTypeOnBank(ValTy, *ValRB);
3440 if (!RBI.constrainGenericRegister(VecReg, *VecRC, *MRI) ||
3441 !RBI.constrainGenericRegister(DstReg, *VecRC, *MRI) ||
3442 !RBI.constrainGenericRegister(ValReg, *ValRC, *MRI) ||
3443 !RBI.constrainGenericRegister(IdxReg, AMDGPU::SReg_32RegClass, *MRI))
3446 if (VecRB->
getID() == AMDGPU::VGPRRegBankID && ValSize != 32)
3450 std::tie(IdxReg, SubReg) =
3453 const bool IndexMode = VecRB->
getID() == AMDGPU::VGPRRegBankID &&
3454 STI.useVGPRIndexMode();
3456 MachineBasicBlock *BB =
MI.getParent();
3460 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3463 const MCInstrDesc &RegWriteOp = TII.getIndirectRegWriteMovRelPseudo(
3464 VecSize, ValSize, VecRB->
getID() == AMDGPU::SGPRRegBankID);
3469 MI.eraseFromParent();
3473 const MCInstrDesc &GPRIDXDesc =
3474 TII.getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*VecRC),
false);
3481 MI.eraseFromParent();
3487 case Intrinsic::amdgcn_raw_buffer_load_async_lds:
3488 case Intrinsic::amdgcn_raw_ptr_buffer_load_async_lds:
3489 case Intrinsic::amdgcn_struct_buffer_load_async_lds:
3490 case Intrinsic::amdgcn_struct_ptr_buffer_load_async_lds:
3491 case Intrinsic::amdgcn_load_async_to_lds:
3492 case Intrinsic::amdgcn_global_load_async_lds:
3498bool AMDGPUInstructionSelector::selectBufferLoadLds(
MachineInstr &
MI)
const {
3499 if (!Subtarget->hasVMemToLDSLoad())
3502 unsigned Size =
MI.getOperand(3).getImm();
3506 const bool HasVIndex =
MI.getNumOperands() == 9;
3510 VIndex =
MI.getOperand(4).getReg();
3514 Register VOffset =
MI.getOperand(4 + OpOffset).getReg();
3515 std::optional<ValueAndVReg> MaybeVOffset =
3517 const bool HasVOffset = !MaybeVOffset || MaybeVOffset->Value.getZExtValue();
3523 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_BOTHEN
3524 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_IDXEN
3525 : HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFEN
3526 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFSET;
3529 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_BOTHEN
3530 : AMDGPU::BUFFER_LOAD_USHORT_LDS_IDXEN
3531 : HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFEN
3532 : AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFSET;
3535 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_BOTHEN
3536 : AMDGPU::BUFFER_LOAD_DWORD_LDS_IDXEN
3537 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN
3538 : AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET;
3541 if (!Subtarget->hasLDSLoadB96_B128())
3544 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_BOTHEN
3545 : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_IDXEN
3546 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFEN
3547 : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFSET;
3550 if (!Subtarget->hasLDSLoadB96_B128())
3553 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_BOTHEN
3554 : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_IDXEN
3555 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFEN
3556 : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFSET;
3560 MachineBasicBlock *
MBB =
MI.getParent();
3563 .
add(
MI.getOperand(2));
3567 if (HasVIndex && HasVOffset) {
3568 Register IdxReg = MRI->createVirtualRegister(TRI.getVGPR64Class());
3569 BuildMI(*
MBB, &*MIB,
DL, TII.get(AMDGPU::REG_SEQUENCE), IdxReg)
3576 }
else if (HasVIndex) {
3578 }
else if (HasVOffset) {
3582 MIB.
add(
MI.getOperand(1));
3583 MIB.
add(
MI.getOperand(5 + OpOffset));
3584 MIB.
add(
MI.getOperand(6 + OpOffset));
3586 unsigned Aux =
MI.getOperand(7 + OpOffset).getImm();
3595 MachineMemOperand *LoadMMO = *
MI.memoperands_begin();
3600 MachinePointerInfo StorePtrI = LoadPtrI;
3611 MachineMemOperand *StoreMMO =
3617 MI.eraseFromParent();
3630 if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3636 return Def->getOperand(1).getReg();
3650 if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3658 return Def->getOperand(1).getReg();
3660 if (
VT->signBitIsZero(
Reg))
3661 return matchZeroExtendFromS32(
Reg);
3669AMDGPUInstructionSelector::matchZeroExtendFromS32OrS32(
Register Reg)
const {
3671 : matchZeroExtendFromS32(
Reg);
3677AMDGPUInstructionSelector::matchSignExtendFromS32OrS32(
Register Reg)
const {
3679 : matchSignExtendFromS32(
Reg);
3683AMDGPUInstructionSelector::matchExtendFromS32OrS32(
Register Reg,
3684 bool IsSigned)
const {
3686 return matchSignExtendFromS32OrS32(
Reg);
3688 return matchZeroExtendFromS32OrS32(
Reg);
3698 if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3705 return Def->getOperand(1).getReg();
3710bool AMDGPUInstructionSelector::selectGlobalLoadLds(
MachineInstr &
MI)
const{
3711 if (!Subtarget->hasVMemToLDSLoad())
3715 unsigned Size =
MI.getOperand(3).getImm();
3722 Opc = AMDGPU::GLOBAL_LOAD_LDS_UBYTE;
3725 Opc = AMDGPU::GLOBAL_LOAD_LDS_USHORT;
3728 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORD;
3731 if (!Subtarget->hasLDSLoadB96_B128())
3733 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX3;
3736 if (!Subtarget->hasLDSLoadB96_B128())
3738 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX4;
3742 MachineBasicBlock *
MBB =
MI.getParent();
3745 .
add(
MI.getOperand(2));
3751 if (!isSGPR(Addr)) {
3753 if (isSGPR(AddrDef->Reg)) {
3754 Addr = AddrDef->Reg;
3755 }
else if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
3758 if (isSGPR(SAddr)) {
3759 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
3760 if (
Register Off = matchZeroExtendFromS32(PtrBaseOffset)) {
3771 VOffset = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3783 MIB.
add(
MI.getOperand(4));
3785 unsigned Aux =
MI.getOperand(5).getImm();
3789 MachineMemOperand *LoadMMO = *
MI.memoperands_begin();
3791 LoadPtrI.
Offset =
MI.getOperand(4).getImm();
3792 MachinePointerInfo StorePtrI = LoadPtrI;
3801 MachineMemOperand *StoreMMO =
3803 sizeof(int32_t),
Align(4));
3807 MI.eraseFromParent();
3812bool AMDGPUInstructionSelector::selectTensorLoadStore(
MachineInstr &
MI,
3814 bool IsLoad = IID == Intrinsic::amdgcn_tensor_load_to_lds;
3816 IsLoad ? AMDGPU::TENSOR_LOAD_TO_LDS_d4 : AMDGPU::TENSOR_STORE_FROM_LDS_d4;
3820 const auto isAllZeros = [&](MachineOperand &Opnd) {
3821 const MachineInstr *
DefMI = MRI->getVRegDef(Opnd.getReg());
3830 Opc = IsLoad ? AMDGPU::TENSOR_LOAD_TO_LDS_d2
3831 : AMDGPU::TENSOR_STORE_FROM_LDS_d2;
3836 MachineBasicBlock *
MBB =
MI.getParent();
3838 .
add(
MI.getOperand(1))
3839 .
add(
MI.getOperand(2));
3841 if (NumGroups >= 4) {
3842 MIB.
add(
MI.getOperand(3))
3843 .
add(
MI.getOperand(4));
3847 .
add(
MI.getOperand(6));
3849 MI.eraseFromParent();
3853bool AMDGPUInstructionSelector::selectBVHIntersectRayIntrinsic(
3855 unsigned OpcodeOpIdx =
3856 MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY ? 1 : 3;
3857 MI.setDesc(TII.get(
MI.getOperand(OpcodeOpIdx).getImm()));
3858 MI.removeOperand(OpcodeOpIdx);
3859 MI.addImplicitDefUseOperands(*
MI.getMF());
3866bool AMDGPUInstructionSelector::selectSMFMACIntrin(
MachineInstr &
MI)
const {
3869 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
3870 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_F16_e64;
3872 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
3873 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_F16_e64;
3875 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
3876 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_BF16_e64;
3878 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
3879 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_BF16_e64;
3881 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
3882 Opc = AMDGPU::V_SMFMAC_I32_16X16X64_I8_e64;
3884 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
3885 Opc = AMDGPU::V_SMFMAC_I32_32X32X32_I8_e64;
3887 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
3888 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_BF8_e64;
3890 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
3891 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_FP8_e64;
3893 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
3894 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_BF8_e64;
3896 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
3897 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_FP8_e64;
3899 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
3900 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_BF8_e64;
3902 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
3903 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_FP8_e64;
3905 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
3906 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_BF8_e64;
3908 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
3909 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_FP8_e64;
3911 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:
3912 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_F16_e64;
3914 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:
3915 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_F16_e64;
3917 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:
3918 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF16_e64;
3920 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:
3921 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF16_e64;
3923 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:
3924 Opc = AMDGPU::V_SMFMAC_I32_16X16X128_I8_e64;
3926 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:
3927 Opc = AMDGPU::V_SMFMAC_I32_32X32X64_I8_e64;
3929 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:
3930 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_BF8_BF8_e64;
3932 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:
3933 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_BF8_FP8_e64;
3935 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:
3936 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_FP8_BF8_e64;
3938 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:
3939 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_FP8_FP8_e64;
3941 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:
3942 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_BF8_BF8_e64;
3944 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
3945 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_BF8_FP8_e64;
3947 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
3948 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_FP8_BF8_e64;
3950 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8:
3951 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_FP8_FP8_e64;
3957 auto VDst_In =
MI.getOperand(4);
3959 MI.setDesc(TII.get(
Opc));
3960 MI.removeOperand(4);
3961 MI.removeOperand(1);
3962 MI.addOperand(VDst_In);
3963 MI.addImplicitDefUseOperands(*
MI.getMF());
3964 const MCInstrDesc &MCID =
MI.getDesc();
3966 MI.getOperand(0).setIsEarlyClobber(
true);
3971bool AMDGPUInstructionSelector::selectPermlaneSwapIntrin(
3973 if (IntrID == Intrinsic::amdgcn_permlane16_swap &&
3974 !Subtarget->hasPermlane16Swap())
3976 if (IntrID == Intrinsic::amdgcn_permlane32_swap &&
3977 !Subtarget->hasPermlane32Swap())
3980 unsigned Opcode = IntrID == Intrinsic::amdgcn_permlane16_swap
3981 ? AMDGPU::V_PERMLANE16_SWAP_B32_e64
3982 : AMDGPU::V_PERMLANE32_SWAP_B32_e64;
3984 MI.removeOperand(2);
3985 MI.setDesc(TII.get(Opcode));
3988 MachineOperand &FI =
MI.getOperand(4);
3995bool AMDGPUInstructionSelector::selectWaveAddress(
MachineInstr &
MI)
const {
3998 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3999 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
4000 MachineBasicBlock *
MBB =
MI.getParent();
4004 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_LSHRREV_B32_e64), DstReg)
4005 .
addImm(Subtarget->getWavefrontSizeLog2())
4010 .
addImm(Subtarget->getWavefrontSizeLog2())
4014 const TargetRegisterClass &RC =
4015 IsVALU ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
4016 if (!RBI.constrainGenericRegister(DstReg, RC, *MRI))
4019 MI.eraseFromParent();
4023bool AMDGPUInstructionSelector::selectWaveShuffleIntrin(
4026 MachineBasicBlock *
MBB =
MI.getParent();
4033 const LLT DstTy = MRI->getType(DstReg);
4035 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
4036 const TargetRegisterClass *DstRC =
4037 TRI.getRegClassForSizeOnBank(DstSize, *DstRB);
4042 if (!Subtarget->supportsBPermute())
4046 if (Subtarget->supportsWaveWideBPermute()) {
4047 Register ShiftIdxReg = MRI->createVirtualRegister(DstRC);
4048 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), ShiftIdxReg)
4058 assert(Subtarget->isWave64());
4062 MRI->createVirtualRegister(TRI.getRegClass(AMDGPU::SReg_32RegClassID));
4063 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefValReg);
4065 Register UndefExecReg = MRI->createVirtualRegister(
4066 TRI.getRegClass(AMDGPU::SReg_64_XEXECRegClassID));
4067 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefExecReg);
4069 Register PoisonValReg = MRI->createVirtualRegister(DstRC);
4070 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_SET_INACTIVE_B32), PoisonValReg)
4078 Register ShiftIdxReg = MRI->createVirtualRegister(DstRC);
4079 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), ShiftIdxReg)
4083 Register PoisonIdxReg = MRI->createVirtualRegister(DstRC);
4084 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_SET_INACTIVE_B32), PoisonIdxReg)
4092 Register SameSidePermReg = MRI->createVirtualRegister(DstRC);
4093 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::DS_BPERMUTE_B32), SameSidePermReg)
4098 Register SwappedValReg = MRI->createVirtualRegister(DstRC);
4099 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_PERMLANE64_B32), SwappedValReg)
4102 Register OppSidePermReg = MRI->createVirtualRegister(DstRC);
4103 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::DS_BPERMUTE_B32), OppSidePermReg)
4108 Register WWMSwapPermReg = MRI->createVirtualRegister(DstRC);
4109 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::STRICT_WWM), WWMSwapPermReg)
4116 Register ThreadIDReg = MRI->createVirtualRegister(DstRC);
4117 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_MBCNT_LO_U32_B32_e64), ThreadIDReg)
4121 Register XORReg = MRI->createVirtualRegister(DstRC);
4126 Register ANDReg = MRI->createVirtualRegister(DstRC);
4131 Register CompareReg = MRI->createVirtualRegister(
4132 TRI.getRegClass(AMDGPU::SReg_64_XEXECRegClassID));
4133 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_CMP_EQ_U32_e64), CompareReg)
4138 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
4146 MI.eraseFromParent();
4155 unsigned NumOpcodes = 0;
4168 const uint8_t SrcBits[3] = { 0xf0, 0xcc, 0xaa };
4179 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4193 if (Src.size() == 3) {
4200 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4201 if (Src[
I] ==
LHS) {
4211 Bits = SrcBits[Src.size()];
4217 switch (
MI->getOpcode()) {
4218 case TargetOpcode::G_AND:
4219 case TargetOpcode::G_OR:
4220 case TargetOpcode::G_XOR: {
4225 if (!getOperandBits(
LHS, LHSBits) ||
4226 !getOperandBits(
RHS, RHSBits)) {
4227 Src = std::move(Backup);
4228 return std::make_pair(0, 0);
4234 NumOpcodes +=
Op.first;
4235 LHSBits =
Op.second;
4240 NumOpcodes +=
Op.first;
4241 RHSBits =
Op.second;
4246 return std::make_pair(0, 0);
4250 switch (
MI->getOpcode()) {
4251 case TargetOpcode::G_AND:
4252 TTbl = LHSBits & RHSBits;
4254 case TargetOpcode::G_OR:
4255 TTbl = LHSBits | RHSBits;
4257 case TargetOpcode::G_XOR:
4258 TTbl = LHSBits ^ RHSBits;
4264 return std::make_pair(NumOpcodes + 1, TTbl);
4267bool AMDGPUInstructionSelector::selectBITOP3(
MachineInstr &
MI)
const {
4268 if (!Subtarget->hasBitOp3Insts())
4272 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
4273 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
4279 unsigned NumOpcodes;
4281 std::tie(NumOpcodes, TTbl) =
BitOp3_Op(DstReg, Src, *MRI);
4285 if (NumOpcodes < 2 || Src.empty())
4288 const bool IsB32 = MRI->getType(DstReg) ==
LLT::scalar(32);
4289 if (NumOpcodes == 2 && IsB32) {
4297 }
else if (NumOpcodes < 4) {
4304 unsigned Opc = IsB32 ? AMDGPU::V_BITOP3_B32_e64 : AMDGPU::V_BITOP3_B16_e64;
4305 if (!IsB32 && STI.hasTrue16BitInsts())
4306 Opc = STI.useRealTrue16Insts() ? AMDGPU::V_BITOP3_B16_gfx1250_t16_e64
4307 : AMDGPU::V_BITOP3_B16_gfx1250_fake16_e64;
4308 unsigned CBL = STI.getConstantBusLimit(
Opc);
4309 MachineBasicBlock *
MBB =
MI.getParent();
4312 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4313 const RegisterBank *RB = RBI.getRegBank(Src[
I], *MRI, TRI);
4314 if (RB->
getID() != AMDGPU::SGPRRegBankID)
4320 Register NewReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4331 while (Src.size() < 3)
4332 Src.push_back(Src[0]);
4349 MI.eraseFromParent();
4354bool AMDGPUInstructionSelector::selectStackRestore(
MachineInstr &
MI)
const {
4356 if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, *MRI))
4359 MachineInstr *
DefMI = MRI->getVRegDef(SrcReg);
4361 Subtarget->getTargetLowering()->getStackPointerRegisterToSaveRestore();
4363 MachineBasicBlock *
MBB =
MI.getParent();
4367 WaveAddr = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
4370 .
addImm(Subtarget->getWavefrontSizeLog2())
4377 MI.eraseFromParent();
4383 if (!
I.isPreISelOpcode()) {
4385 return selectCOPY(
I);
4389 switch (
I.getOpcode()) {
4390 case TargetOpcode::G_AND:
4391 case TargetOpcode::G_OR:
4392 case TargetOpcode::G_XOR:
4393 if (selectBITOP3(
I))
4397 return selectG_AND_OR_XOR(
I);
4398 case TargetOpcode::G_ADD:
4399 case TargetOpcode::G_SUB:
4400 case TargetOpcode::G_PTR_ADD:
4403 return selectG_ADD_SUB(
I);
4404 case TargetOpcode::G_UADDO:
4405 case TargetOpcode::G_USUBO:
4406 case TargetOpcode::G_UADDE:
4407 case TargetOpcode::G_USUBE:
4408 return selectG_UADDO_USUBO_UADDE_USUBE(
I);
4409 case AMDGPU::G_AMDGPU_MAD_U64_U32:
4410 case AMDGPU::G_AMDGPU_MAD_I64_I32:
4411 return selectG_AMDGPU_MAD_64_32(
I);
4412 case TargetOpcode::G_INTTOPTR:
4413 case TargetOpcode::G_BITCAST:
4414 case TargetOpcode::G_PTRTOINT:
4415 case TargetOpcode::G_FREEZE:
4416 return selectCOPY(
I);
4417 case TargetOpcode::G_FNEG:
4420 return selectG_FNEG(
I);
4421 case TargetOpcode::G_FABS:
4424 return selectG_FABS(
I);
4425 case TargetOpcode::G_EXTRACT:
4426 return selectG_EXTRACT(
I);
4427 case TargetOpcode::G_MERGE_VALUES:
4428 case TargetOpcode::G_CONCAT_VECTORS:
4429 return selectG_MERGE_VALUES(
I);
4430 case TargetOpcode::G_UNMERGE_VALUES:
4431 return selectG_UNMERGE_VALUES(
I);
4432 case TargetOpcode::G_BUILD_VECTOR:
4433 case TargetOpcode::G_BUILD_VECTOR_TRUNC:
4434 return selectG_BUILD_VECTOR(
I);
4435 case TargetOpcode::G_IMPLICIT_DEF:
4436 return selectG_IMPLICIT_DEF(
I);
4437 case TargetOpcode::G_INSERT:
4438 return selectG_INSERT(
I);
4439 case TargetOpcode::G_INTRINSIC:
4440 case TargetOpcode::G_INTRINSIC_CONVERGENT:
4441 return selectG_INTRINSIC(
I);
4442 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
4443 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
4444 return selectG_INTRINSIC_W_SIDE_EFFECTS(
I);
4445 case TargetOpcode::G_ICMP:
4446 case TargetOpcode::G_FCMP:
4447 if (selectG_ICMP_or_FCMP(
I))
4450 case TargetOpcode::G_LOAD:
4451 case TargetOpcode::G_ZEXTLOAD:
4452 case TargetOpcode::G_SEXTLOAD:
4453 case TargetOpcode::G_STORE:
4454 case TargetOpcode::G_ATOMIC_CMPXCHG:
4455 case TargetOpcode::G_ATOMICRMW_XCHG:
4456 case TargetOpcode::G_ATOMICRMW_ADD:
4457 case TargetOpcode::G_ATOMICRMW_SUB:
4458 case TargetOpcode::G_ATOMICRMW_AND:
4459 case TargetOpcode::G_ATOMICRMW_OR:
4460 case TargetOpcode::G_ATOMICRMW_XOR:
4461 case TargetOpcode::G_ATOMICRMW_MIN:
4462 case TargetOpcode::G_ATOMICRMW_MAX:
4463 case TargetOpcode::G_ATOMICRMW_UMIN:
4464 case TargetOpcode::G_ATOMICRMW_UMAX:
4465 case TargetOpcode::G_ATOMICRMW_UINC_WRAP:
4466 case TargetOpcode::G_ATOMICRMW_UDEC_WRAP:
4467 case TargetOpcode::G_ATOMICRMW_USUB_COND:
4468 case TargetOpcode::G_ATOMICRMW_USUB_SAT:
4469 case TargetOpcode::G_ATOMICRMW_FADD:
4470 case TargetOpcode::G_ATOMICRMW_FMIN:
4471 case TargetOpcode::G_ATOMICRMW_FMAX:
4472 return selectG_LOAD_STORE_ATOMICRMW(
I);
4473 case TargetOpcode::G_SELECT:
4474 return selectG_SELECT(
I);
4475 case TargetOpcode::G_TRUNC:
4476 return selectG_TRUNC(
I);
4477 case TargetOpcode::G_SEXT:
4478 case TargetOpcode::G_ZEXT:
4479 case TargetOpcode::G_ANYEXT:
4480 case TargetOpcode::G_SEXT_INREG:
4484 if (MRI->getType(
I.getOperand(1).getReg()) !=
LLT::scalar(1) &&
4487 return selectG_SZA_EXT(
I);
4488 case TargetOpcode::G_FPEXT:
4489 if (selectG_FPEXT(
I))
4492 case TargetOpcode::G_BRCOND:
4493 return selectG_BRCOND(
I);
4494 case TargetOpcode::G_GLOBAL_VALUE:
4495 return selectG_GLOBAL_VALUE(
I);
4496 case TargetOpcode::G_PTRMASK:
4497 return selectG_PTRMASK(
I);
4498 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4499 return selectG_EXTRACT_VECTOR_ELT(
I);
4500 case TargetOpcode::G_INSERT_VECTOR_ELT:
4501 return selectG_INSERT_VECTOR_ELT(
I);
4502 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
4503 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
4504 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET:
4505 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
4506 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
4509 assert(Intr &&
"not an image intrinsic with image pseudo");
4510 return selectImageIntrinsic(
I, Intr);
4512 case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY:
4513 case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
4514 case AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY:
4515 return selectBVHIntersectRayIntrinsic(
I);
4516 case AMDGPU::G_SBFX:
4517 case AMDGPU::G_UBFX:
4518 return selectG_SBFX_UBFX(
I);
4519 case AMDGPU::G_SI_CALL:
4520 I.setDesc(TII.get(AMDGPU::SI_CALL));
4522 case AMDGPU::G_AMDGPU_WAVE_ADDRESS:
4523 return selectWaveAddress(
I);
4524 case AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_RETURN: {
4525 I.setDesc(TII.get(AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN));
4528 case AMDGPU::G_STACKRESTORE:
4529 return selectStackRestore(
I);
4531 return selectPHI(
I);
4532 case AMDGPU::G_AMDGPU_COPY_SCC_VCC:
4533 return selectCOPY_SCC_VCC(
I);
4534 case AMDGPU::G_AMDGPU_COPY_VCC_SCC:
4535 return selectCOPY_VCC_SCC(
I);
4536 case AMDGPU::G_AMDGPU_READANYLANE:
4537 return selectReadAnyLane(
I);
4538 case TargetOpcode::G_CONSTANT:
4539 case TargetOpcode::G_FCONSTANT:
4547AMDGPUInstructionSelector::selectVCSRC(
MachineOperand &Root)
const {
4554std::pair<Register, unsigned> AMDGPUInstructionSelector::selectVOP3ModsImpl(
4555 Register Src,
bool IsCanonicalizing,
bool AllowAbs,
bool OpSel)
const {
4559 if (
MI->getOpcode() == AMDGPU::G_FNEG) {
4560 Src =
MI->getOperand(1).getReg();
4563 }
else if (
MI->getOpcode() == AMDGPU::G_FSUB && IsCanonicalizing) {
4568 if (
LHS &&
LHS->isZero()) {
4570 Src =
MI->getOperand(2).getReg();
4574 if (AllowAbs &&
MI->getOpcode() == AMDGPU::G_FABS) {
4575 Src =
MI->getOperand(1).getReg();
4582 return std::pair(Src, Mods);
4585std::pair<Register, unsigned>
4586AMDGPUInstructionSelector::selectVOP3PModsF32Impl(
Register Src)
const {
4588 std::tie(Src, Mods) = selectVOP3ModsImpl(Src);
4590 return std::pair(Src, Mods);
4593Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
4595 bool ForceVGPR)
const {
4596 if ((Mods != 0 || ForceVGPR) &&
4597 RBI.getRegBank(Src, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID) {
4604 TII.
get(AMDGPU::COPY), VGPRSrc)
4616AMDGPUInstructionSelector::selectVSRC0(
MachineOperand &Root)
const {
4618 [=](MachineInstrBuilder &MIB) { MIB.
add(Root); }
4623AMDGPUInstructionSelector::selectVOP3Mods0(
MachineOperand &Root)
const {
4626 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
4629 [=](MachineInstrBuilder &MIB) {
4630 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4632 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
4633 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); },
4634 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
4639AMDGPUInstructionSelector::selectVOP3BMods0(
MachineOperand &Root)
const {
4642 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
4647 [=](MachineInstrBuilder &MIB) {
4648 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4650 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
4651 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); },
4652 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
4657AMDGPUInstructionSelector::selectVOP3OMods(
MachineOperand &Root)
const {
4659 [=](MachineInstrBuilder &MIB) { MIB.
add(Root); },
4660 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); },
4661 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
4666AMDGPUInstructionSelector::selectVOP3Mods(
MachineOperand &Root)
const {
4669 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
4672 [=](MachineInstrBuilder &MIB) {
4673 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4675 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4680AMDGPUInstructionSelector::selectVOP3ModsNonCanonicalizing(
4684 std::tie(Src, Mods) =
4685 selectVOP3ModsImpl(Root.
getReg(),
false);
4688 [=](MachineInstrBuilder &MIB) {
4689 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4691 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4696AMDGPUInstructionSelector::selectVOP3BMods(
MachineOperand &Root)
const {
4699 std::tie(Src, Mods) =
4700 selectVOP3ModsImpl(Root.
getReg(),
true,
4704 [=](MachineInstrBuilder &MIB) {
4705 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4707 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4712AMDGPUInstructionSelector::selectVOP3NoMods(
MachineOperand &Root)
const {
4715 if (
Def->getOpcode() == AMDGPU::G_FNEG ||
Def->getOpcode() == AMDGPU::G_FABS)
4718 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
4743 if (
MI->getOpcode() != AMDGPU::G_TRUNC)
4748 return DstSize * 2 == SrcSize;
4754 if (
MI->getOpcode() != AMDGPU::G_LSHR)
4758 std::optional<ValueAndVReg> ShiftAmt;
4759 if (
mi_match(
MI->getOperand(0).getReg(), MRI,
4762 unsigned Shift = ShiftAmt->Value.getZExtValue();
4763 return Shift * 2 == SrcSize;
4771 if (
MI->getOpcode() != AMDGPU::G_SHL)
4775 std::optional<ValueAndVReg> ShiftAmt;
4776 if (
mi_match(
MI->getOperand(0).getReg(), MRI,
4779 unsigned Shift = ShiftAmt->Value.getZExtValue();
4780 return Shift * 2 == SrcSize;
4788 if (
MI->getOpcode() != AMDGPU::G_UNMERGE_VALUES)
4790 return MI->getNumOperands() == 3 &&
MI->getOperand(0).isDef() &&
4791 MI->getOperand(1).isDef() && !
MI->getOperand(2).isDef();
4961static std::optional<std::pair<Register, SrcStatus>>
4966 unsigned Opc =
MI->getOpcode();
4970 case AMDGPU::G_BITCAST:
4971 return std::optional<std::pair<Register, SrcStatus>>(
4972 {
MI->getOperand(1).getReg(), Curr.second});
4974 if (
MI->getOperand(1).getReg().isPhysical())
4975 return std::nullopt;
4976 return std::optional<std::pair<Register, SrcStatus>>(
4977 {
MI->getOperand(1).getReg(), Curr.second});
4978 case AMDGPU::G_FNEG: {
4981 return std::nullopt;
4982 return std::optional<std::pair<Register, SrcStatus>>(
4983 {
MI->getOperand(1).getReg(), Stat});
4990 switch (Curr.second) {
4993 return std::optional<std::pair<Register, SrcStatus>>(
4996 if (Curr.first ==
MI->getOperand(0).getReg())
4997 return std::optional<std::pair<Register, SrcStatus>>(
4999 return std::optional<std::pair<Register, SrcStatus>>(
5011 return std::optional<std::pair<Register, SrcStatus>>(
5015 if (Curr.first ==
MI->getOperand(0).getReg())
5016 return std::optional<std::pair<Register, SrcStatus>>(
5018 return std::optional<std::pair<Register, SrcStatus>>(
5024 return std::optional<std::pair<Register, SrcStatus>>(
5029 return std::optional<std::pair<Register, SrcStatus>>(
5034 return std::optional<std::pair<Register, SrcStatus>>(
5039 return std::optional<std::pair<Register, SrcStatus>>(
5045 return std::nullopt;
5055 bool HasNeg =
false;
5057 bool HasOpsel =
true;
5062 unsigned Opc =
MI->getOpcode();
5064 if (
Opc == TargetOpcode::G_INTRINSIC) {
5067 if (IntrinsicID == Intrinsic::amdgcn_fdot2)
5094 while (
Depth <= MaxDepth && Curr.has_value()) {
5097 Statlist.push_back(Curr.value());
5104static std::pair<Register, SrcStatus>
5111 while (
Depth <= MaxDepth && Curr.has_value()) {
5117 LastSameOrNeg = Curr.value();
5122 return LastSameOrNeg;
5129 return Width1 == Width2;
5164 return isSameBitWidth(NewReg, RootReg, MRI) && IsHalfState(LoStat) &&
5165 IsHalfState(HiStat);
5168std::pair<Register, unsigned> AMDGPUInstructionSelector::selectVOP3PModsImpl(
5174 return {RootReg, Mods};
5177 SearchOptions SO(RootReg, MRI);
5188 MachineInstr *
MI = MRI.getVRegDef(Stat.first);
5190 if (
MI->getOpcode() != AMDGPU::G_BUILD_VECTOR ||
MI->getNumOperands() != 3 ||
5191 (IsDOT && Subtarget->hasDOTOpSelHazard())) {
5193 return {Stat.first, Mods};
5199 if (StatlistHi.
empty()) {
5201 return {Stat.first, Mods};
5207 if (StatlistLo.
empty()) {
5209 return {Stat.first, Mods};
5212 for (
int I = StatlistHi.
size() - 1;
I >= 0;
I--) {
5213 for (
int J = StatlistLo.
size() - 1; J >= 0; J--) {
5214 if (StatlistHi[
I].first == StatlistLo[J].first &&
5216 StatlistHi[
I].first, RootReg, TII, MRI))
5217 return {StatlistHi[
I].first,
5218 updateMods(StatlistHi[
I].second, StatlistLo[J].second, Mods)};
5224 return {Stat.first, Mods};
5234 return RB->
getID() == RBNo;
5251 if (
checkRB(RootReg, AMDGPU::SGPRRegBankID, RBI, MRI,
TRI) ||
5252 checkRB(NewReg, AMDGPU::VGPRRegBankID, RBI, MRI,
TRI))
5256 if (
MI->getOpcode() == AMDGPU::COPY && NewReg ==
MI->getOperand(1).getReg()) {
5265 BuildMI(*BB,
MI,
MI->getDebugLoc(),
TII.get(AMDGPU::COPY), DstReg)
5273AMDGPUInstructionSelector::selectVOP3PRetHelper(
MachineOperand &Root,
5278 std::tie(
Reg, Mods) = selectVOP3PModsImpl(Root.
getReg(), MRI, IsDOT);
5282 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
5283 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
5288AMDGPUInstructionSelector::selectVOP3PMods(
MachineOperand &Root)
const {
5290 return selectVOP3PRetHelper(Root);
5294AMDGPUInstructionSelector::selectVOP3PModsDOT(
MachineOperand &Root)
const {
5296 return selectVOP3PRetHelper(Root,
true);
5300AMDGPUInstructionSelector::selectVOP3PNoModsDOT(
MachineOperand &Root)
const {
5304 std::tie(Src, Mods) = selectVOP3PModsImpl(Root.
getReg(), MRI,
true );
5308 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); }}};
5312AMDGPUInstructionSelector::selectVOP3PModsF32(
MachineOperand &Root)
const {
5315 std::tie(Src, Mods) = selectVOP3PModsF32Impl(Root.
getReg());
5318 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5319 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
5324AMDGPUInstructionSelector::selectVOP3PNoModsF32(
MachineOperand &Root)
const {
5327 std::tie(Src, Mods) = selectVOP3PModsF32Impl(Root.
getReg());
5331 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); }}};
5335AMDGPUInstructionSelector::selectWMMAOpSelVOP3PMods(
5338 "expected i1 value");
5344 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
5352 switch (Elts.
size()) {
5354 DstRegClass = &AMDGPU::VReg_256RegClass;
5357 DstRegClass = &AMDGPU::VReg_128RegClass;
5360 DstRegClass = &AMDGPU::VReg_64RegClass;
5367 auto MIB =
B.buildInstr(AMDGPU::REG_SEQUENCE)
5369 for (
unsigned i = 0; i < Elts.
size(); ++i) {
5380 if (ModOpcode == TargetOpcode::G_FNEG) {
5384 for (
auto El : Elts) {
5390 if (Elts.size() != NegAbsElts.
size()) {
5399 assert(ModOpcode == TargetOpcode::G_FABS);
5407AMDGPUInstructionSelector::selectWMMAModsF32NegAbs(
MachineOperand &Root)
const {
5413 assert(BV->getNumSources() > 0);
5415 MachineInstr *ElF32 = MRI->getVRegDef(BV->getSourceReg(0));
5416 unsigned ModOpcode = (ElF32->
getOpcode() == AMDGPU::G_FNEG)
5419 for (
unsigned i = 0; i < BV->getNumSources(); ++i) {
5420 ElF32 = MRI->getVRegDef(BV->getSourceReg(i));
5427 if (BV->getNumSources() == EltsF32.
size()) {
5433 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5434 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }}};
5438AMDGPUInstructionSelector::selectWMMAModsF16Neg(
MachineOperand &Root)
const {
5444 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
5452 if (CV->getNumSources() == EltsV2F16.
size()) {
5459 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5460 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }}};
5464AMDGPUInstructionSelector::selectWMMAModsF16NegAbs(
MachineOperand &Root)
const {
5470 assert(CV->getNumSources() > 0);
5471 MachineInstr *ElV2F16 = MRI->getVRegDef(CV->getSourceReg(0));
5473 unsigned ModOpcode = (ElV2F16->
getOpcode() == AMDGPU::G_FNEG)
5477 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
5478 ElV2F16 = MRI->getVRegDef(CV->getSourceReg(i));
5485 if (CV->getNumSources() == EltsV2F16.
size()) {
5492 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5493 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }}};
5497AMDGPUInstructionSelector::selectWMMAVISrc(
MachineOperand &Root)
const {
5498 std::optional<FPValueAndVReg> FPValReg;
5500 if (TII.isInlineConstant(FPValReg->Value)) {
5501 return {{[=](MachineInstrBuilder &MIB) {
5502 MIB.
addImm(FPValReg->Value.bitcastToAPInt().getSExtValue());
5512 if (TII.isInlineConstant(ICst)) {
5522AMDGPUInstructionSelector::selectSWMMACIndex8(
MachineOperand &Root)
const {
5528 std::optional<ValueAndVReg> ShiftAmt;
5530 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
5531 ShiftAmt->Value.getZExtValue() % 8 == 0) {
5532 Key = ShiftAmt->Value.getZExtValue() / 8;
5537 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5538 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Key); }
5543AMDGPUInstructionSelector::selectSWMMACIndex16(
MachineOperand &Root)
const {
5550 std::optional<ValueAndVReg> ShiftAmt;
5552 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
5553 ShiftAmt->Value.getZExtValue() == 16) {
5559 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5560 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Key); }
5565AMDGPUInstructionSelector::selectSWMMACIndex32(
MachineOperand &Root)
const {
5572 S32 = matchAnyExtendFromS32(Src);
5576 if (
Def->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {
5581 Src =
Def->getOperand(2).getReg();
5588 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5589 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Key); }
5594AMDGPUInstructionSelector::selectVOP3OpSelMods(
MachineOperand &Root)
const {
5597 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
5601 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5602 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
5608AMDGPUInstructionSelector::selectVINTERPMods(
MachineOperand &Root)
const {
5611 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
5617 [=](MachineInstrBuilder &MIB) {
5619 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
5621 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
5626AMDGPUInstructionSelector::selectVINTERPModsHi(
MachineOperand &Root)
const {
5629 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
5635 [=](MachineInstrBuilder &MIB) {
5637 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
5639 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
5646bool AMDGPUInstructionSelector::selectScaleOffset(
MachineOperand &Root,
5648 bool IsSigned)
const {
5649 if (!Subtarget->hasScaleOffset())
5653 MachineMemOperand *MMO = *
MI.memoperands_begin();
5665 OffsetReg =
Def->Reg;
5680 m_BinOp(IsSigned ? AMDGPU::S_MUL_I64_I32_PSEUDO : AMDGPU::S_MUL_U64,
5684 (
Mul->getOpcode() == (IsSigned ? AMDGPU::G_AMDGPU_MAD_I64_I32
5685 : AMDGPU::G_AMDGPU_MAD_U64_U32) ||
5686 (IsSigned &&
Mul->getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32 &&
5687 VT->signBitIsZero(
Mul->getOperand(2).getReg()))) &&
5700bool AMDGPUInstructionSelector::selectSmrdOffset(
MachineOperand &Root,
5704 bool *ScaleOffset)
const {
5706 MachineBasicBlock *
MBB =
MI->getParent();
5711 getAddrModeInfo(*
MI, *MRI, AddrInfo);
5713 if (AddrInfo.
empty())
5716 const GEPInfo &GEPI = AddrInfo[0];
5717 std::optional<int64_t> EncodedImm;
5720 *ScaleOffset =
false;
5725 if (GEPI.SgprParts.size() == 1 && GEPI.Imm != 0 && EncodedImm &&
5726 AddrInfo.
size() > 1) {
5727 const GEPInfo &GEPI2 = AddrInfo[1];
5728 if (GEPI2.SgprParts.size() == 2 && GEPI2.Imm == 0) {
5729 Register OffsetReg = GEPI2.SgprParts[1];
5732 selectScaleOffset(Root, OffsetReg,
false );
5733 OffsetReg = matchZeroExtendFromS32OrS32(OffsetReg);
5735 Base = GEPI2.SgprParts[0];
5736 *SOffset = OffsetReg;
5745 auto SKnown =
VT->getKnownBits(*SOffset);
5746 if (*
Offset + SKnown.getMinValue().getSExtValue() < 0)
5758 if (
Offset && GEPI.SgprParts.size() == 1 && EncodedImm) {
5759 Base = GEPI.SgprParts[0];
5765 if (SOffset && GEPI.SgprParts.size() == 1 &&
isUInt<32>(GEPI.Imm) &&
5771 Base = GEPI.SgprParts[0];
5772 *SOffset = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5773 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), *SOffset)
5778 if (SOffset && GEPI.SgprParts.size() && GEPI.Imm == 0) {
5779 Register OffsetReg = GEPI.SgprParts[1];
5781 *ScaleOffset = selectScaleOffset(Root, OffsetReg,
false );
5782 OffsetReg = matchZeroExtendFromS32OrS32(OffsetReg);
5784 Base = GEPI.SgprParts[0];
5785 *SOffset = OffsetReg;
5794AMDGPUInstructionSelector::selectSmrdImm(
MachineOperand &Root)
const {
5797 if (!selectSmrdOffset(Root,
Base,
nullptr, &
Offset,
5799 return std::nullopt;
5801 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(
Base); },
5802 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Offset); }}};
5806AMDGPUInstructionSelector::selectSmrdImm32(
MachineOperand &Root)
const {
5808 getAddrModeInfo(*Root.
getParent(), *MRI, AddrInfo);
5810 if (AddrInfo.
empty() || AddrInfo[0].SgprParts.size() != 1)
5811 return std::nullopt;
5813 const GEPInfo &GEPInfo = AddrInfo[0];
5814 Register PtrReg = GEPInfo.SgprParts[0];
5815 std::optional<int64_t> EncodedImm =
5818 return std::nullopt;
5821 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrReg); },
5822 [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedImm); }
5827AMDGPUInstructionSelector::selectSmrdSgpr(
MachineOperand &Root)
const {
5830 if (!selectSmrdOffset(Root,
Base, &SOffset,
nullptr,
5832 return std::nullopt;
5835 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(
Base); },
5836 [=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); },
5837 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }}};
5841AMDGPUInstructionSelector::selectSmrdSgprImm(
MachineOperand &Root)
const {
5845 if (!selectSmrdOffset(Root,
Base, &SOffset, &
Offset, &ScaleOffset))
5846 return std::nullopt;
5849 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(
Base); },
5850 [=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); },
5852 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }}};
5855std::pair<Register, int>
5856AMDGPUInstructionSelector::selectFlatOffsetImpl(
MachineOperand &Root,
5857 uint64_t FlatVariant)
const {
5862 if (!STI.hasFlatInstOffsets())
5866 int64_t ConstOffset;
5868 std::tie(PtrBase, ConstOffset, IsInBounds) =
5869 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
5875 if (ConstOffset == 0 ||
5877 !isFlatScratchBaseLegal(Root.
getReg())) ||
5881 unsigned AddrSpace = (*
MI->memoperands_begin())->getAddrSpace();
5882 if (!TII.isLegalFLATOffset(ConstOffset, AddrSpace, FlatVariant))
5885 return std::pair(PtrBase, ConstOffset);
5889AMDGPUInstructionSelector::selectFlatOffset(
MachineOperand &Root)
const {
5893 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrWithOffset.first); },
5894 [=](MachineInstrBuilder &MIB) { MIB.
addImm(PtrWithOffset.second); },
5899AMDGPUInstructionSelector::selectGlobalOffset(
MachineOperand &Root)
const {
5903 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrWithOffset.first); },
5904 [=](MachineInstrBuilder &MIB) { MIB.
addImm(PtrWithOffset.second); },
5909AMDGPUInstructionSelector::selectScratchOffset(
MachineOperand &Root)
const {
5913 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrWithOffset.first); },
5914 [=](MachineInstrBuilder &MIB) { MIB.
addImm(PtrWithOffset.second); },
5920AMDGPUInstructionSelector::selectGlobalSAddr(
MachineOperand &Root,
5922 bool NeedIOffset)
const {
5925 int64_t ConstOffset;
5926 int64_t ImmOffset = 0;
5930 std::tie(PtrBase, ConstOffset, std::ignore) =
5931 getPtrBaseWithConstantOffset(Addr, *MRI);
5933 if (ConstOffset != 0) {
5938 ImmOffset = ConstOffset;
5941 if (isSGPR(PtrBaseDef->Reg)) {
5942 if (ConstOffset > 0) {
5948 int64_t SplitImmOffset = 0, RemainderOffset = ConstOffset;
5950 std::tie(SplitImmOffset, RemainderOffset) =
5955 if (Subtarget->hasSignedGVSOffset() ?
isInt<32>(RemainderOffset)
5958 MachineBasicBlock *
MBB =
MI->getParent();
5960 MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
5962 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
5964 .
addImm(RemainderOffset);
5968 [=](MachineInstrBuilder &MIB) {
5971 [=](MachineInstrBuilder &MIB) {
5974 [=](MachineInstrBuilder &MIB) { MIB.
addImm(SplitImmOffset); },
5975 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); },
5978 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrBase); },
5979 [=](MachineInstrBuilder &MIB) {
5982 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); },
5992 unsigned NumLiterals =
5993 !TII.isInlineConstant(APInt(32,
Lo_32(ConstOffset))) +
5994 !TII.isInlineConstant(APInt(32,
Hi_32(ConstOffset)));
5995 if (STI.getConstantBusLimit(AMDGPU::V_ADD_U32_e64) > NumLiterals)
5996 return std::nullopt;
6003 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
6008 if (isSGPR(SAddr)) {
6009 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
6013 bool ScaleOffset = selectScaleOffset(Root, PtrBaseOffset,
6014 Subtarget->hasSignedGVSOffset());
6015 if (
Register VOffset = matchExtendFromS32OrS32(
6016 PtrBaseOffset, Subtarget->hasSignedGVSOffset())) {
6018 return {{[=](MachineInstrBuilder &MIB) {
6021 [=](MachineInstrBuilder &MIB) {
6024 [=](MachineInstrBuilder &MIB) {
6027 [=](MachineInstrBuilder &MIB) {
6031 return {{[=](MachineInstrBuilder &MIB) {
6034 [=](MachineInstrBuilder &MIB) {
6037 [=](MachineInstrBuilder &MIB) {
6047 if (AddrDef->MI->getOpcode() == AMDGPU::G_IMPLICIT_DEF ||
6048 AddrDef->MI->getOpcode() == AMDGPU::G_CONSTANT || !isSGPR(AddrDef->Reg))
6049 return std::nullopt;
6054 MachineBasicBlock *
MBB =
MI->getParent();
6055 Register VOffset = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
6057 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32), VOffset)
6062 [=](MachineInstrBuilder &MIB) { MIB.
addReg(AddrDef->Reg); },
6063 [=](MachineInstrBuilder &MIB) { MIB.
addReg(VOffset); },
6064 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); },
6065 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); }
6068 [=](MachineInstrBuilder &MIB) { MIB.
addReg(AddrDef->Reg); },
6069 [=](MachineInstrBuilder &MIB) { MIB.
addReg(VOffset); },
6070 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); }
6075AMDGPUInstructionSelector::selectGlobalSAddr(
MachineOperand &Root)
const {
6076 return selectGlobalSAddr(Root, 0);
6080AMDGPUInstructionSelector::selectGlobalSAddrCPol(
MachineOperand &Root)
const {
6086 return selectGlobalSAddr(Root, PassedCPol);
6090AMDGPUInstructionSelector::selectGlobalSAddrCPolM0(
MachineOperand &Root)
const {
6096 return selectGlobalSAddr(Root, PassedCPol);
6100AMDGPUInstructionSelector::selectGlobalSAddrGLC(
MachineOperand &Root)
const {
6105AMDGPUInstructionSelector::selectGlobalSAddrNoIOffset(
6112 return selectGlobalSAddr(Root, PassedCPol,
false);
6116AMDGPUInstructionSelector::selectGlobalSAddrNoIOffsetM0(
6123 return selectGlobalSAddr(Root, PassedCPol,
false);
6127AMDGPUInstructionSelector::selectScratchSAddr(
MachineOperand &Root)
const {
6130 int64_t ConstOffset;
6131 int64_t ImmOffset = 0;
6135 std::tie(PtrBase, ConstOffset, std::ignore) =
6136 getPtrBaseWithConstantOffset(Addr, *MRI);
6138 if (ConstOffset != 0 && isFlatScratchBaseLegal(Addr) &&
6142 ImmOffset = ConstOffset;
6146 if (AddrDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
6147 int FI = AddrDef->MI->getOperand(1).
getIndex();
6150 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); }
6156 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
6157 Register LHS = AddrDef->MI->getOperand(1).getReg();
6158 Register RHS = AddrDef->MI->getOperand(2).getReg();
6162 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX &&
6163 isSGPR(RHSDef->Reg)) {
6164 int FI = LHSDef->MI->getOperand(1).getIndex();
6168 SAddr = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6170 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_I32), SAddr)
6178 return std::nullopt;
6181 [=](MachineInstrBuilder &MIB) { MIB.
addReg(SAddr); },
6182 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); }
6187bool AMDGPUInstructionSelector::checkFlatScratchSVSSwizzleBug(
6189 if (!Subtarget->hasFlatScratchSVSSwizzleBug())
6195 auto VKnown =
VT->getKnownBits(VAddr);
6198 uint64_t VMax = VKnown.getMaxValue().getZExtValue();
6199 uint64_t
SMax = SKnown.getMaxValue().getZExtValue();
6200 return (VMax & 3) + (
SMax & 3) >= 4;
6204AMDGPUInstructionSelector::selectScratchSVAddr(
MachineOperand &Root)
const {
6207 int64_t ConstOffset;
6208 int64_t ImmOffset = 0;
6212 std::tie(PtrBase, ConstOffset, std::ignore) =
6213 getPtrBaseWithConstantOffset(Addr, *MRI);
6216 if (ConstOffset != 0 &&
6220 ImmOffset = ConstOffset;
6224 if (AddrDef->MI->getOpcode() != AMDGPU::G_PTR_ADD)
6225 return std::nullopt;
6227 Register RHS = AddrDef->MI->getOperand(2).getReg();
6228 if (RBI.getRegBank(
RHS, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID)
6229 return std::nullopt;
6231 Register LHS = AddrDef->MI->getOperand(1).getReg();
6234 if (OrigAddr != Addr) {
6235 if (!isFlatScratchBaseLegalSVImm(OrigAddr))
6236 return std::nullopt;
6238 if (!isFlatScratchBaseLegalSV(OrigAddr))
6239 return std::nullopt;
6242 if (checkFlatScratchSVSSwizzleBug(
RHS,
LHS, ImmOffset))
6243 return std::nullopt;
6245 unsigned CPol = selectScaleOffset(Root,
RHS,
true )
6249 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
6250 int FI = LHSDef->MI->getOperand(1).getIndex();
6252 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
RHS); },
6254 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); },
6255 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }
6264 return std::nullopt;
6267 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
RHS); },
6268 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
LHS); },
6269 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); },
6270 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }
6275AMDGPUInstructionSelector::selectMUBUFScratchOffen(
MachineOperand &Root)
const {
6277 MachineBasicBlock *
MBB =
MI->getParent();
6279 const SIMachineFunctionInfo *
Info =
MF->getInfo<SIMachineFunctionInfo>();
6284 Register HighBits = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
6289 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
6293 return {{[=](MachineInstrBuilder &MIB) {
6296 [=](MachineInstrBuilder &MIB) {
6299 [=](MachineInstrBuilder &MIB) {
6304 [=](MachineInstrBuilder &MIB) {
6313 std::optional<int> FI;
6316 const MachineInstr *RootDef = MRI->getVRegDef(Root.
getReg());
6318 int64_t ConstOffset;
6319 std::tie(PtrBase, ConstOffset, std::ignore) =
6320 getPtrBaseWithConstantOffset(VAddr, *MRI);
6321 if (ConstOffset != 0) {
6322 if (TII.isLegalMUBUFImmOffset(ConstOffset) &&
6323 (!STI.privateMemoryResourceIsRangeChecked() ||
6324 VT->signBitIsZero(PtrBase))) {
6325 const MachineInstr *PtrBaseDef = MRI->getVRegDef(PtrBase);
6326 if (PtrBaseDef->
getOpcode() == AMDGPU::G_FRAME_INDEX)
6332 }
else if (RootDef->
getOpcode() == AMDGPU::G_FRAME_INDEX) {
6336 return {{[=](MachineInstrBuilder &MIB) {
6339 [=](MachineInstrBuilder &MIB) {
6345 [=](MachineInstrBuilder &MIB) {
6350 [=](MachineInstrBuilder &MIB) {
6355bool AMDGPUInstructionSelector::isDSOffsetLegal(
Register Base,
6360 if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled())
6365 return VT->signBitIsZero(
Base);
6368bool AMDGPUInstructionSelector::isDSOffset2Legal(
Register Base, int64_t Offset0,
6370 unsigned Size)
const {
6371 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
6376 if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled())
6381 return VT->signBitIsZero(
Base);
6386 return Addr->
getOpcode() == TargetOpcode::G_OR ||
6387 (Addr->
getOpcode() == TargetOpcode::G_PTR_ADD &&
6394bool AMDGPUInstructionSelector::isFlatScratchBaseLegal(
Register Addr)
const {
6402 if (STI.hasSignedScratchOffsets())
6408 if (AddrMI->
getOpcode() == TargetOpcode::G_PTR_ADD) {
6409 std::optional<ValueAndVReg> RhsValReg =
6415 if (RhsValReg && RhsValReg->Value.getSExtValue() < 0 &&
6416 RhsValReg->Value.getSExtValue() > -0x40000000)
6420 return VT->signBitIsZero(
LHS);
6425bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSV(
Register Addr)
const {
6433 if (STI.hasSignedScratchOffsets())
6438 return VT->signBitIsZero(
RHS) &&
VT->signBitIsZero(
LHS);
6443bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSVImm(
6447 if (STI.hasSignedScratchOffsets())
6452 std::optional<DefinitionAndSourceRegister> BaseDef =
6454 std::optional<ValueAndVReg> RHSOffset =
6464 (RHSOffset->Value.getSExtValue() < 0 &&
6465 RHSOffset->Value.getSExtValue() > -0x40000000)))
6468 Register LHS = BaseDef->MI->getOperand(1).getReg();
6469 Register RHS = BaseDef->MI->getOperand(2).getReg();
6470 return VT->signBitIsZero(
RHS) &&
VT->signBitIsZero(
LHS);
6473bool AMDGPUInstructionSelector::isUnneededShiftMask(
const MachineInstr &
MI,
6474 unsigned ShAmtBits)
const {
6475 assert(
MI.getOpcode() == TargetOpcode::G_AND);
6477 std::optional<APInt>
RHS =
6482 if (
RHS->countr_one() >= ShAmtBits)
6485 const APInt &LHSKnownZeros =
VT->getKnownZeroes(
MI.getOperand(1).getReg());
6486 return (LHSKnownZeros | *
RHS).countr_one() >= ShAmtBits;
6490AMDGPUInstructionSelector::selectMUBUFScratchOffset(
6493 const SIMachineFunctionInfo *
Info =
MF->getInfo<SIMachineFunctionInfo>();
6495 std::optional<DefinitionAndSourceRegister>
Def =
6497 assert(Def &&
"this shouldn't be an optional result");
6502 [=](MachineInstrBuilder &MIB) {
6505 [=](MachineInstrBuilder &MIB) {
6508 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
6519 if (!TII.isLegalMUBUFImmOffset(
Offset))
6527 [=](MachineInstrBuilder &MIB) {
6530 [=](MachineInstrBuilder &MIB) {
6538 !TII.isLegalMUBUFImmOffset(
Offset))
6542 [=](MachineInstrBuilder &MIB) {
6545 [=](MachineInstrBuilder &MIB) {
6552std::pair<Register, unsigned>
6553AMDGPUInstructionSelector::selectDS1Addr1OffsetImpl(
MachineOperand &Root)
const {
6554 const MachineInstr *RootDef = MRI->getVRegDef(Root.
getReg());
6555 int64_t ConstAddr = 0;
6559 std::tie(PtrBase,
Offset, std::ignore) =
6560 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
6563 if (isDSOffsetLegal(PtrBase,
Offset)) {
6565 return std::pair(PtrBase,
Offset);
6567 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
6576 return std::pair(Root.
getReg(), 0);
6580AMDGPUInstructionSelector::selectDS1Addr1Offset(
MachineOperand &Root)
const {
6583 std::tie(
Reg,
Offset) = selectDS1Addr1OffsetImpl(Root);
6585 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
6591AMDGPUInstructionSelector::selectDS64Bit4ByteAligned(
MachineOperand &Root)
const {
6592 return selectDSReadWrite2(Root, 4);
6596AMDGPUInstructionSelector::selectDS128Bit8ByteAligned(
MachineOperand &Root)
const {
6597 return selectDSReadWrite2(Root, 8);
6601AMDGPUInstructionSelector::selectDSReadWrite2(
MachineOperand &Root,
6602 unsigned Size)
const {
6607 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
6609 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Offset+1); }
6613std::pair<Register, unsigned>
6614AMDGPUInstructionSelector::selectDSReadWrite2Impl(
MachineOperand &Root,
6615 unsigned Size)
const {
6616 const MachineInstr *RootDef = MRI->getVRegDef(Root.
getReg());
6617 int64_t ConstAddr = 0;
6621 std::tie(PtrBase,
Offset, std::ignore) =
6622 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
6625 int64_t OffsetValue0 =
Offset;
6627 if (isDSOffset2Legal(PtrBase, OffsetValue0, OffsetValue1,
Size)) {
6629 return std::pair(PtrBase, OffsetValue0 /
Size);
6631 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
6639 return std::pair(Root.
getReg(), 0);
6647std::tuple<Register, int64_t, bool>
6648AMDGPUInstructionSelector::getPtrBaseWithConstantOffset(
6651 if (RootI->
getOpcode() != TargetOpcode::G_PTR_ADD)
6652 return {Root, 0,
false};
6655 std::optional<ValueAndVReg> MaybeOffset =
6658 return {Root, 0,
false};
6678 B.buildInstr(AMDGPU::S_MOV_B32)
6681 B.buildInstr(AMDGPU::S_MOV_B32)
6688 B.buildInstr(AMDGPU::REG_SEQUENCE)
6691 .addImm(AMDGPU::sub0)
6693 .addImm(AMDGPU::sub1);
6698 B.buildInstr(AMDGPU::S_MOV_B64)
6703 B.buildInstr(AMDGPU::REG_SEQUENCE)
6706 .addImm(AMDGPU::sub0_sub1)
6708 .addImm(AMDGPU::sub2_sub3);
6715 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
6724 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
6731AMDGPUInstructionSelector::MUBUFAddressData
6732AMDGPUInstructionSelector::parseMUBUFAddress(
Register Src)
const {
6733 MUBUFAddressData
Data;
6739 std::tie(PtrBase,
Offset, std::ignore) =
6740 getPtrBaseWithConstantOffset(Src, *MRI);
6746 if (MachineInstr *InputAdd
6748 Data.N2 = InputAdd->getOperand(1).getReg();
6749 Data.N3 = InputAdd->getOperand(2).getReg();
6764bool AMDGPUInstructionSelector::shouldUseAddr64(MUBUFAddressData Addr)
const {
6770 const RegisterBank *N0Bank = RBI.getRegBank(Addr.N0, *MRI, TRI);
6771 return N0Bank->
getID() == AMDGPU::VGPRRegBankID;
6777void AMDGPUInstructionSelector::splitIllegalMUBUFOffset(
6779 if (TII.isLegalMUBUFImmOffset(ImmOffset))
6783 SOffset = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6784 B.buildInstr(AMDGPU::S_MOV_B32)
6790bool AMDGPUInstructionSelector::selectMUBUFAddr64Impl(
6795 if (!STI.hasAddr64() || STI.useFlatForGlobal())
6798 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
6799 if (!shouldUseAddr64(AddrData))
6805 Offset = AddrData.Offset;
6811 if (RBI.getRegBank(N2, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
6813 if (RBI.getRegBank(N3, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
6826 }
else if (RBI.getRegBank(N0, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
6837 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
6841bool AMDGPUInstructionSelector::selectMUBUFOffsetImpl(
6846 if (STI.useFlatForGlobal())
6849 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
6850 if (shouldUseAddr64(AddrData))
6856 Offset = AddrData.Offset;
6862 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
6867AMDGPUInstructionSelector::selectMUBUFAddr64(
MachineOperand &Root)
const {
6873 if (!selectMUBUFAddr64Impl(Root, VAddr, RSrcReg, SOffset,
Offset))
6879 [=](MachineInstrBuilder &MIB) {
6882 [=](MachineInstrBuilder &MIB) {
6885 [=](MachineInstrBuilder &MIB) {
6888 else if (STI.hasRestrictedSOffset())
6889 MIB.
addReg(AMDGPU::SGPR_NULL);
6893 [=](MachineInstrBuilder &MIB) {
6903AMDGPUInstructionSelector::selectMUBUFOffset(
MachineOperand &Root)
const {
6908 if (!selectMUBUFOffsetImpl(Root, RSrcReg, SOffset,
Offset))
6912 [=](MachineInstrBuilder &MIB) {
6915 [=](MachineInstrBuilder &MIB) {
6918 else if (STI.hasRestrictedSOffset())
6919 MIB.
addReg(AMDGPU::SGPR_NULL);
6931AMDGPUInstructionSelector::selectBUFSOffset(
MachineOperand &Root)
const {
6936 SOffset = AMDGPU::SGPR_NULL;
6938 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); }}};
6942static std::optional<uint64_t>
6946 if (!OffsetVal || !
isInt<32>(*OffsetVal))
6947 return std::nullopt;
6948 return Lo_32(*OffsetVal);
6952AMDGPUInstructionSelector::selectSMRDBufferImm(
MachineOperand &Root)
const {
6953 std::optional<uint64_t> OffsetVal =
6958 std::optional<int64_t> EncodedImm =
6963 return {{ [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedImm); } }};
6967AMDGPUInstructionSelector::selectSMRDBufferImm32(
MachineOperand &Root)
const {
6974 std::optional<int64_t> EncodedImm =
6979 return {{ [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedImm); } }};
6983AMDGPUInstructionSelector::selectSMRDBufferSgprImm(
MachineOperand &Root)
const {
6991 return std::nullopt;
6993 std::optional<int64_t> EncodedOffset =
6996 return std::nullopt;
6999 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); },
7000 [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedOffset); }}};
7003std::pair<Register, unsigned>
7004AMDGPUInstructionSelector::selectVOP3PMadMixModsImpl(
MachineOperand &Root,
7005 bool &Matched)
const {
7010 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
7020 const auto CheckAbsNeg = [&]() {
7025 std::tie(Src, ModsTmp) = selectVOP3ModsImpl(Src);
7056AMDGPUInstructionSelector::selectVOP3PMadMixModsExt(
7061 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
7066 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
7067 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
7072AMDGPUInstructionSelector::selectVOP3PMadMixMods(
MachineOperand &Root)
const {
7076 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
7079 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
7080 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
7084bool AMDGPUInstructionSelector::selectSBarrierSignalIsfirst(
7088 Register CCReg =
I.getOperand(0).getReg();
7093 BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM))
7094 .
addImm(
I.getOperand(2).getImm());
7098 I.eraseFromParent();
7099 return RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32_XM0_XEXECRegClass,
7103bool AMDGPUInstructionSelector::selectSGetBarrierState(
7107 const MachineOperand &BarOp =
I.getOperand(2);
7108 std::optional<int64_t> BarValImm =
7112 auto CopyMIB =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
7116 MachineInstrBuilder MIB;
7117 unsigned Opc = BarValImm ? AMDGPU::S_GET_BARRIER_STATE_IMM
7118 : AMDGPU::S_GET_BARRIER_STATE_M0;
7121 auto DstReg =
I.getOperand(0).getReg();
7122 const TargetRegisterClass *DstRC =
7123 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
7124 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
7130 I.eraseFromParent();
7135 if (HasInlineConst) {
7139 case Intrinsic::amdgcn_s_barrier_join:
7140 return AMDGPU::S_BARRIER_JOIN_IMM;
7141 case Intrinsic::amdgcn_s_wakeup_barrier:
7142 return AMDGPU::S_WAKEUP_BARRIER_IMM;
7143 case Intrinsic::amdgcn_s_get_named_barrier_state:
7144 return AMDGPU::S_GET_BARRIER_STATE_IMM;
7150 case Intrinsic::amdgcn_s_barrier_join:
7151 return AMDGPU::S_BARRIER_JOIN_M0;
7152 case Intrinsic::amdgcn_s_wakeup_barrier:
7153 return AMDGPU::S_WAKEUP_BARRIER_M0;
7154 case Intrinsic::amdgcn_s_get_named_barrier_state:
7155 return AMDGPU::S_GET_BARRIER_STATE_M0;
7160bool AMDGPUInstructionSelector::selectNamedBarrierInit(
7164 const MachineOperand &BarOp =
I.getOperand(1);
7165 const MachineOperand &CntOp =
I.getOperand(2);
7169 if (IntrID == Intrinsic::amdgcn_s_barrier_signal_var) {
7170 std::optional<int64_t> CntImm =
7172 if (CntImm && *CntImm == 0) {
7173 std::optional<int64_t> BarValImm =
7176 auto BarID = ((*BarValImm) >> 4) & 0x3F;
7177 BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_IMM))
7179 I.eraseFromParent();
7186 Register TmpReg0 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7192 Register TmpReg1 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7199 Register TmpReg2 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7205 Register TmpReg3 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7206 constexpr unsigned ShAmt = 16;
7212 Register TmpReg4 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7222 unsigned Opc = IntrID == Intrinsic::amdgcn_s_barrier_init
7223 ? AMDGPU::S_BARRIER_INIT_M0
7224 : AMDGPU::S_BARRIER_SIGNAL_M0;
7225 MachineInstrBuilder MIB;
7228 I.eraseFromParent();
7232bool AMDGPUInstructionSelector::selectNamedBarrierInst(
7236 MachineOperand BarOp = IntrID == Intrinsic::amdgcn_s_get_named_barrier_state
7239 std::optional<int64_t> BarValImm =
7244 Register TmpReg0 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7250 Register TmpReg1 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7256 auto CopyMIB =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
7261 MachineInstrBuilder MIB;
7265 if (IntrID == Intrinsic::amdgcn_s_get_named_barrier_state) {
7266 auto DstReg =
I.getOperand(0).getReg();
7267 const TargetRegisterClass *DstRC =
7268 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
7269 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
7275 auto BarId = ((*BarValImm) >> 4) & 0x3F;
7279 I.eraseFromParent();
7286 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
7287 "Expected G_CONSTANT");
7288 MIB.
addImm(
MI.getOperand(1).getCImm()->getSExtValue());
7294 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
7295 "Expected G_CONSTANT");
7296 MIB.
addImm(-
MI.getOperand(1).getCImm()->getSExtValue());
7302 const MachineOperand &
Op =
MI.getOperand(1);
7303 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT &&
OpIdx == -1);
7304 MIB.
addImm(
Op.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
7307void AMDGPUInstructionSelector::renderCountTrailingOnesImm(
7309 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
7310 "Expected G_CONSTANT");
7311 MIB.
addImm(
MI.getOperand(1).getCImm()->getValue().countTrailingOnes());
7319 const MachineOperand &
Op =
MI.getOperand(
OpIdx);
7336 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7340void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_0_0(
7342 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7347void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_0_1(
7349 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7355void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_1_0(
7357 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7362void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_1_1(
7364 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7370void AMDGPUInstructionSelector::renderDstSelToOpSelXForm(
7372 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7377void AMDGPUInstructionSelector::renderSrcSelToOpSelXForm(
7379 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7384void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_2_0(
7386 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7391void AMDGPUInstructionSelector::renderDstSelToOpSel3XFormXForm(
7393 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7402 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7411 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7418void AMDGPUInstructionSelector::renderExtractCpolSetGLC(
7420 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7421 const uint32_t Cpol =
MI.getOperand(
OpIdx).getImm() &
7436 const APFloat &APF =
MI.getOperand(1).getFPImm()->getValueAPF();
7438 assert(ExpVal != INT_MIN);
7456 if (
MI.getOperand(
OpIdx).getImm())
7458 MIB.
addImm((int64_t)Mods);
7465 if (
MI.getOperand(
OpIdx).getImm())
7467 MIB.
addImm((int64_t)Mods);
7473 unsigned Val =
MI.getOperand(
OpIdx).getImm();
7481 MIB.
addImm((int64_t)Mods);
7487 uint32_t
V =
MI.getOperand(2).getImm();
7490 if (!Subtarget->hasSafeCUPrefetch())
7496void AMDGPUInstructionSelector::renderScaledMAIIntrinsicOperand(
7498 unsigned Val =
MI.getOperand(
OpIdx).getImm();
7507bool AMDGPUInstructionSelector::isInlineImmediate(
const APInt &Imm)
const {
7508 return TII.isInlineConstant(Imm);
7511bool AMDGPUInstructionSelector::isInlineImmediate(
const APFloat &Imm)
const {
7512 return TII.isInlineConstant(Imm);
MachineInstrBuilder MachineInstrBuilder & DefMI
static unsigned getIntrinsicID(const SDNode *N)
#define GET_GLOBALISEL_PREDICATES_INIT
#define GET_GLOBALISEL_TEMPORARIES_INIT
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static Register getLegalRegBank(Register NewReg, Register RootReg, const AMDGPURegisterBankInfo &RBI, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const SIInstrInfo &TII)
static bool isShlHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is shift left with half bits, such as reg0:2n =G_SHL reg1:2n, CONST(n)
static bool isNoUnsignedWrap(MachineInstr *Addr)
static Register buildOffsetSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
unsigned getNamedBarrierOp(bool HasInlineConst, Intrinsic::ID IntrID)
static bool checkRB(Register Reg, unsigned int RBNo, const AMDGPURegisterBankInfo &RBI, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI)
static unsigned updateMods(SrcStatus HiStat, SrcStatus LoStat, unsigned Mods)
static bool isTruncHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is truncating to half, such as reg0:n = G_TRUNC reg1:2n
static Register getWaveAddress(const MachineInstr *Def)
static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)
static bool shouldUseAndMask(unsigned Size, unsigned &Mask)
static std::pair< unsigned, uint8_t > BitOp3_Op(Register R, SmallVectorImpl< Register > &Src, const MachineRegisterInfo &MRI)
static TypeClass isVectorOfTwoOrScalar(Register Reg, const MachineRegisterInfo &MRI)
static bool isLaneMaskFromSameBlock(Register Reg, MachineRegisterInfo &MRI, MachineBasicBlock *MBB)
static bool parseTexFail(uint64_t TexFailCtrl, bool &TFE, bool &LWE, bool &IsTexFail)
static void addZeroImm(MachineInstrBuilder &MIB)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
static bool isConstant(const MachineInstr &MI)
static bool isSameBitWidth(Register Reg1, Register Reg2, const MachineRegisterInfo &MRI)
static Register buildRegSequence(SmallVectorImpl< Register > &Elts, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
static Register buildRSRC(MachineIRBuilder &B, MachineRegisterInfo &MRI, uint32_t FormatLo, uint32_t FormatHi, Register BasePtr)
Return a resource descriptor for use with an arbitrary 64-bit pointer.
static bool isAsyncLDSDMA(Intrinsic::ID Intr)
static std::pair< Register, unsigned > computeIndirectRegIndex(MachineRegisterInfo &MRI, const SIRegisterInfo &TRI, const TargetRegisterClass *SuperRC, Register IdxReg, unsigned EltSize, GISelValueTracking &ValueTracking)
Return the register to use for the index value, and the subregister to use for the indirectly accesse...
static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64)
static std::pair< Register, SrcStatus > getLastSameOrNeg(Register Reg, const MachineRegisterInfo &MRI, SearchOptions SO, int MaxDepth=3)
static Register stripCopy(Register Reg, MachineRegisterInfo &MRI)
static std::optional< std::pair< Register, SrcStatus > > calcNextStatus(std::pair< Register, SrcStatus > Curr, const MachineRegisterInfo &MRI)
static Register stripBitCast(Register Reg, MachineRegisterInfo &MRI)
static std::optional< uint64_t > getConstantZext32Val(Register Reg, const MachineRegisterInfo &MRI)
Get an immediate that must be 32-bits, and treated as zero extended.
static bool isValidToPack(SrcStatus HiStat, SrcStatus LoStat, Register NewReg, Register RootReg, const SIInstrInfo &TII, const MachineRegisterInfo &MRI)
static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size, const GCNSubtarget &ST)
static SmallVector< std::pair< Register, SrcStatus > > getSrcStats(Register Reg, const MachineRegisterInfo &MRI, SearchOptions SO, int MaxDepth=3)
static bool isUnmergeHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test function, if the MI is reg0:n, reg1:n = G_UNMERGE_VALUES reg2:2n
static SrcStatus getNegStatus(Register Reg, SrcStatus S, const MachineRegisterInfo &MRI)
static bool isVCmpResult(Register Reg, MachineRegisterInfo &MRI)
static Register buildAddr64RSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
static bool isLshrHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is logic shift right with half bits, such as reg0:2n =G_LSHR reg1:2n,...
static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, SmallVectorImpl< Register > &Elts, Register &Src, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
This file declares the targeting of the InstructionSelector class for AMDGPU.
AMDGPU Register Bank Select
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool isAllZeros(StringRef Arr)
Return true if the array is empty or all zeros.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
Contains matchers for matching SSA Machine Instructions.
Machine Check Debug Module
This file declares the MachineIRBuilder class.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
MachineInstr unsigned OpIdx
static std::vector< std::pair< int, unsigned > > Swizzle(std::vector< std::pair< int, unsigned > > Src, R600InstrInfo::BankSwizzle Swz)
This is used to control valid status that current MI supports.
bool checkOptions(SrcStatus Stat) const
SearchOptions(Register Reg, const MachineRegisterInfo &MRI)
AMDGPUInstructionSelector(const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI, const AMDGPUTargetMachine &TM)
static const char * getName()
bool select(MachineInstr &I) override
Select the (possibly generic) instruction I to only use target-specific opcodes.
void setupMF(MachineFunction &MF, GISelValueTracking *VT, CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) override
Setup per-MF executor state.
uint32_t getLDSSize() const
LLVM_READONLY int getExactLog2Abs() const
Class for arbitrary precision integers.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
Get the array size.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
bool isFPPredicate() const
bool isIntPredicate() const
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
LLVM_ABI DILocation * get() const
Get the underlying DILocation.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
std::optional< SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > > ComplexRendererFns
virtual void setupMF(MachineFunction &mf, GISelValueTracking *vt, CodeGenCoverage *covinfo=nullptr, ProfileSummaryInfo *psi=nullptr, BlockFrequencyInfo *bfi=nullptr)
Setup per-MF executor state.
CodeGenCoverage * CoverageInfo
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
TypeSize getValue() const
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void setReturnAddressIsTaken(bool s)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Helper class to build MachineInstr.
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & setOperandDead(unsigned OpIdx) const
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
LocationSize getSize() const
Return the size in bytes of the memory reference.
unsigned getAddrSpace() const
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const ConstantInt * getCImm() const
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
ArrayRef< int > getShuffleMask() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
bool isInternalRead() const
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
const RegisterBank * getRegBankOrNull(Register Reg) const
Return the register bank of Reg, or null if Reg has not been assigned a register bank or has been ass...
LLVM_ABI Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
LLVM_ABI MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Analysis providing profile information.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
static bool isGenericOpcode(unsigned Opc)
unsigned getID() const
Return the register class ID number.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
@ BUFFER_RESOURCE
Address space for 128-bit buffer resources.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
LLVM_READONLY const MIMGG16MappingInfo * getMIMGG16MappingInfo(unsigned G)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isGFX12Plus(const MCSubtargetInfo &STI)
constexpr int64_t getNullPointerValue(unsigned AS)
Get the null pointer value for the given address space.
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
LLVM_READONLY int32_t getGlobalSaddrOp(uint32_t Opcode)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfo(unsigned DimEnum)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
Intrinsic::ID getIntrinsicID(const MachineInstr &I)
Return the intrinsic ID for opcodes with the G_AMDGPU_INTRIN_ prefix.
std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg, GISelValueTracking *ValueTracking=nullptr, bool CheckNUW=false)
Returns base register and constant offset.
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
IndexMode
ARM Index Modes.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(const APInt &RequestedValue)
Matches a constant equal to RequestedValue.
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
UnaryOp_match< SrcTy, TargetOpcode::COPY > m_Copy(SrcTy &&Src)
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_XOR, true > m_GXor(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_SEXT > m_GSExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_FPEXT > m_GFPExt(const SrcTy &Src)
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
ConstantMatch< APInt > m_ICst(APInt &Cst)
SpecificConstantMatch m_AllOnesInt()
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
ImplicitDefMatch m_GImplicitDef()
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
BinaryOp_match< LHS, RHS, TargetOpcode::G_ASHR, false > m_GAShr(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
SpecificRegisterMatch m_SpecificReg(Register RequestedReg)
Matches a register only if it is equal to RequestedReg.
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
UnaryOp_match< SrcTy, TargetOpcode::G_FABS > m_GFabs(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ANYEXT > m_GAnyExt(const SrcTy &Src)
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, TargetOpcode::G_MUL, true > m_GMul(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
NodeAddr< DefNode * > Def
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
FunctionAddr VTableAddr Value
LLVM_ABI bool isBuildVectorAllZeros(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndef=false)
Return true if the specified instruction is a G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC where all of the...
LLVM_ABI Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
PointerUnion< const TargetRegisterClass *, const RegisterBank * > RegClassOrRegBank
Convenient type to represent either a register class or a register bank.
LLVM_ABI const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI void constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
LLVM_ABI std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
FunctionAddr VTableAddr uintptr_t uintptr_t Data
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
LLVM_ABI std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
LLVM_ABI Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
constexpr RegState getUndefRegState(bool B)
@ Default
The result value is uniform if and only if all operands are uniform.
unsigned AtomicNoRetBaseOpcode
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false, bool SelfAdd=false)
Compute knownbits resulting from addition of LHS and RHS.
int64_t Offset
Offset - This is an offset from the base Value*.
PointerUnion< const Value *, const PseudoSourceValue * > V
This is the IR pointer value for the access, or it is null if unknown.