29#include "llvm/IR/IntrinsicsAMDGPU.h"
32#define DEBUG_TYPE "amdgpu-isel"
37#define GET_GLOBALISEL_IMPL
38#define AMDGPUSubtarget GCNSubtarget
39#include "AMDGPUGenGlobalISel.inc"
40#undef GET_GLOBALISEL_IMPL
46 : TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM),
49#include
"AMDGPUGenGlobalISel.inc"
52#include
"AMDGPUGenGlobalISel.inc"
64 MRI = &
MF.getRegInfo();
72 return Def->getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS
73 ? Def->getOperand(1).getReg()
80 F,
"intrinsic not supported on subtarget",
I.getDebugLoc(),
DS_Error));
89 auto &RegClassOrBank = MRI.getRegClassOrRegBank(
Reg);
90 const TargetRegisterClass *RC =
93 const LLT Ty = MRI.getType(
Reg);
97 return MRI.getVRegDef(
Reg)->getOpcode() != AMDGPU::G_TRUNC &&
102 return RB->
getID() == AMDGPU::VCCRegBankID;
105bool AMDGPUInstructionSelector::constrainCopyLikeIntrin(
MachineInstr &
MI,
106 unsigned NewOpc)
const {
107 MI.setDesc(TII.get(NewOpc));
111 MachineOperand &Dst =
MI.getOperand(0);
112 MachineOperand &Src =
MI.getOperand(1);
118 const TargetRegisterClass *DstRC
119 = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
120 const TargetRegisterClass *SrcRC
121 = TRI.getConstrainedRegClassForOperand(Src, *MRI);
122 if (!DstRC || DstRC != SrcRC)
125 if (!RBI.constrainGenericRegister(Dst.getReg(), *DstRC, *MRI) ||
126 !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, *MRI))
128 const MCInstrDesc &MCID =
MI.getDesc();
130 MI.getOperand(0).setIsEarlyClobber(
true);
135bool AMDGPUInstructionSelector::selectCOPY(
MachineInstr &
I)
const {
138 I.setDesc(TII.get(TargetOpcode::COPY));
140 const MachineOperand &Src =
I.getOperand(1);
141 MachineOperand &Dst =
I.getOperand(0);
145 if (isVCC(DstReg, *MRI)) {
146 if (SrcReg == AMDGPU::SCC) {
147 const TargetRegisterClass *RC
148 = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
151 return RBI.constrainGenericRegister(DstReg, *RC, *MRI);
154 if (!isVCC(SrcReg, *MRI)) {
156 if (!RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), *MRI))
159 const TargetRegisterClass *SrcRC
160 = TRI.getConstrainedRegClassForOperand(Src, *MRI);
162 std::optional<ValueAndVReg> ConstVal =
166 STI.isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
168 .
addImm(ConstVal->Value.getBoolValue() ? -1 : 0);
170 Register MaskedReg = MRI->createVirtualRegister(SrcRC);
177 assert(Subtarget->useRealTrue16Insts());
178 const int64_t NoMods = 0;
179 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_AND_B16_t16_e64), MaskedReg)
185 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CMP_NE_U16_t16_e64), DstReg)
192 bool IsSGPR = TRI.isSGPRClass(SrcRC);
193 unsigned AndOpc = IsSGPR ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32;
200 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
206 if (!MRI->getRegClassOrNull(SrcReg))
207 MRI->setRegClass(SrcReg, SrcRC);
212 const TargetRegisterClass *RC =
213 TRI.getConstrainedRegClassForOperand(Dst, *MRI);
214 if (RC && !RBI.constrainGenericRegister(DstReg, *RC, *MRI))
220 for (
const MachineOperand &MO :
I.operands()) {
221 if (MO.getReg().isPhysical())
224 const TargetRegisterClass *RC =
225 TRI.getConstrainedRegClassForOperand(MO, *MRI);
228 RBI.constrainGenericRegister(MO.getReg(), *RC, *MRI);
233bool AMDGPUInstructionSelector::selectCOPY_SCC_VCC(
MachineInstr &
I)
const {
236 Register VCCReg =
I.getOperand(1).getReg();
240 if (STI.hasScalarCompareEq64()) {
242 STI.isWave64() ? AMDGPU::S_CMP_LG_U64 : AMDGPU::S_CMP_LG_U32;
245 Register DeadDst = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
246 Cmp =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_OR_B64), DeadDst)
253 Register DstReg =
I.getOperand(0).getReg();
257 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
260bool AMDGPUInstructionSelector::selectCOPY_VCC_SCC(
MachineInstr &
I)
const {
264 Register DstReg =
I.getOperand(0).getReg();
265 Register SrcReg =
I.getOperand(1).getReg();
266 std::optional<ValueAndVReg> Arg =
270 const int64_t
Value = Arg->Value.getZExtValue();
272 unsigned Opcode = STI.isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
279 return RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), *MRI);
285 unsigned SelectOpcode =
286 STI.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
296bool AMDGPUInstructionSelector::selectReadAnyLane(
MachineInstr &
I)
const {
297 Register DstReg =
I.getOperand(0).getReg();
298 Register SrcReg =
I.getOperand(1).getReg();
303 auto RFL =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
311bool AMDGPUInstructionSelector::selectPHI(
MachineInstr &
I)
const {
312 const Register DefReg =
I.getOperand(0).getReg();
313 const LLT DefTy = MRI->getType(DefReg);
325 MRI->getRegClassOrRegBank(DefReg);
327 const TargetRegisterClass *DefRC =
336 DefRC = TRI.getRegClassForTypeOnBank(DefTy, RB);
345 for (
unsigned i = 1; i !=
I.getNumOperands(); i += 2) {
346 const Register SrcReg =
I.getOperand(i).getReg();
348 const RegisterBank *RB = MRI->getRegBankOrNull(SrcReg);
350 const LLT SrcTy = MRI->getType(SrcReg);
351 const TargetRegisterClass *SrcRC =
352 TRI.getRegClassForTypeOnBank(SrcTy, *RB);
353 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
358 I.setDesc(TII.get(TargetOpcode::PHI));
359 return RBI.constrainGenericRegister(DefReg, *DefRC, *MRI);
365 unsigned SubIdx)
const {
369 Register DstReg = MRI->createVirtualRegister(&SubRC);
372 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.
getSubReg(), SubIdx);
374 BuildMI(*BB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
400 return Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
402 return Is64 ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32;
404 return Is64 ? AMDGPU::S_XOR_B64 : AMDGPU::S_XOR_B32;
410bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(
MachineInstr &
I)
const {
411 Register DstReg =
I.getOperand(0).getReg();
412 unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI);
414 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
415 if (DstRB->
getID() != AMDGPU::SGPRRegBankID &&
416 DstRB->
getID() != AMDGPU::VCCRegBankID)
419 bool Is64 =
Size > 32 || (DstRB->
getID() == AMDGPU::VCCRegBankID &&
432bool AMDGPUInstructionSelector::selectG_ADD_SUB(
MachineInstr &
I)
const {
435 Register DstReg =
I.getOperand(0).getReg();
437 LLT Ty = MRI->getType(DstReg);
442 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
443 const bool IsSALU = DstRB->
getID() == AMDGPU::SGPRRegBankID;
444 const bool Sub =
I.getOpcode() == TargetOpcode::G_SUB;
448 const unsigned Opc =
Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
451 .
add(
I.getOperand(1))
452 .
add(
I.getOperand(2))
459 if (STI.hasAddNoCarryInsts()) {
460 const unsigned Opc =
Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64;
461 I.setDesc(TII.get(
Opc));
468 const unsigned Opc =
Sub ? AMDGPU::V_SUB_CO_U32_e64 : AMDGPU::V_ADD_CO_U32_e64;
470 Register UnusedCarry = MRI->createVirtualRegister(TRI.getWaveMaskRegClass());
474 .
add(
I.getOperand(1))
475 .
add(
I.getOperand(2))
482 assert(!
Sub &&
"illegal sub should not reach here");
484 const TargetRegisterClass &RC
485 = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass;
486 const TargetRegisterClass &HalfRC
487 = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass;
489 MachineOperand Lo1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub0));
490 MachineOperand Lo2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub0));
491 MachineOperand Hi1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub1));
492 MachineOperand Hi2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub1));
494 Register DstLo = MRI->createVirtualRegister(&HalfRC);
495 Register DstHi = MRI->createVirtualRegister(&HalfRC);
498 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
501 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
506 const TargetRegisterClass *CarryRC = TRI.getWaveMaskRegClass();
507 Register CarryReg = MRI->createVirtualRegister(CarryRC);
508 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_ADD_CO_U32_e64), DstLo)
513 MachineInstr *Addc =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_ADDC_U32_e64), DstHi)
523 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
530 if (!RBI.constrainGenericRegister(DstReg, RC, *MRI))
537bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE(
542 Register Dst0Reg =
I.getOperand(0).getReg();
543 Register Dst1Reg =
I.getOperand(1).getReg();
544 const bool IsAdd =
I.getOpcode() == AMDGPU::G_UADDO ||
545 I.getOpcode() == AMDGPU::G_UADDE;
546 const bool HasCarryIn =
I.getOpcode() == AMDGPU::G_UADDE ||
547 I.getOpcode() == AMDGPU::G_USUBE;
549 if (isVCC(Dst1Reg, *MRI)) {
550 unsigned NoCarryOpc =
551 IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
552 unsigned CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
553 I.setDesc(TII.get(HasCarryIn ? CarryOpc : NoCarryOpc));
560 Register Src0Reg =
I.getOperand(2).getReg();
561 Register Src1Reg =
I.getOperand(3).getReg();
564 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
565 .
addReg(
I.getOperand(4).getReg());
568 unsigned NoCarryOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
569 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
571 auto CarryInst =
BuildMI(*BB, &
I,
DL, TII.get(HasCarryIn ? CarryOpc : NoCarryOpc), Dst0Reg)
572 .
add(
I.getOperand(2))
573 .
add(
I.getOperand(3));
575 if (MRI->use_nodbg_empty(Dst1Reg)) {
576 CarryInst.setOperandDead(3);
578 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), Dst1Reg)
580 if (!MRI->getRegClassOrNull(Dst1Reg))
581 MRI->setRegClass(Dst1Reg, &AMDGPU::SReg_32RegClass);
584 if (!RBI.constrainGenericRegister(Dst0Reg, AMDGPU::SReg_32RegClass, *MRI) ||
585 !RBI.constrainGenericRegister(Src0Reg, AMDGPU::SReg_32RegClass, *MRI) ||
586 !RBI.constrainGenericRegister(Src1Reg, AMDGPU::SReg_32RegClass, *MRI))
590 !RBI.constrainGenericRegister(
I.getOperand(4).getReg(),
591 AMDGPU::SReg_32RegClass, *MRI))
598bool AMDGPUInstructionSelector::selectG_AMDGPU_MAD_64_32(
602 const bool IsUnsigned =
I.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;
603 bool UseNoCarry = Subtarget->hasMadNC64_32Insts() &&
604 MRI->use_nodbg_empty(
I.getOperand(1).getReg());
607 if (Subtarget->hasMADIntraFwdBug())
608 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_gfx11_e64
609 : AMDGPU::V_MAD_I64_I32_gfx11_e64;
611 Opc = IsUnsigned ? AMDGPU::V_MAD_NC_U64_U32_e64
612 : AMDGPU::V_MAD_NC_I64_I32_e64;
614 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_e64 : AMDGPU::V_MAD_I64_I32_e64;
619 I.setDesc(TII.get(
Opc));
621 I.addImplicitDefUseOperands(*
MF);
622 I.getOperand(0).setIsEarlyClobber(
true);
628bool AMDGPUInstructionSelector::selectG_EXTRACT(
MachineInstr &
I)
const {
630 Register DstReg =
I.getOperand(0).getReg();
631 Register SrcReg =
I.getOperand(1).getReg();
632 LLT DstTy = MRI->getType(DstReg);
633 LLT SrcTy = MRI->getType(SrcReg);
638 unsigned Offset =
I.getOperand(2).getImm();
639 if (
Offset % 32 != 0 || DstSize > 128)
647 const TargetRegisterClass *DstRC =
648 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
649 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
652 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI);
653 const TargetRegisterClass *SrcRC =
654 TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank);
659 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubReg);
664 *SrcRC,
I.getOperand(1));
666 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::COPY), DstReg)
667 .
addReg(SrcReg, {}, SubReg);
673bool AMDGPUInstructionSelector::selectS16MergeToS32(
MachineInstr &
MI)
const {
678 LLT Src0Ty = MRI->getType(Src0);
679 LLT Src1Ty = MRI->getType(Src1);
681 const RegisterBank *DstBank = RBI.getRegBank(Dst, *MRI, TRI);
682 const RegisterBank *Src0Bank = RBI.getRegBank(Src0, *MRI, TRI);
683 const RegisterBank *Src1Bank = RBI.getRegBank(Src1, *MRI, TRI);
684 const bool IsVector = DstBank->
getID() == AMDGPU::VGPRRegBankID;
690 MachineBasicBlock *BB =
MI.getParent();
695 if (Src0Bank->
getID() == AMDGPU::VGPRRegBankID &&
696 Src1Bank->
getID() == AMDGPU::VGPRRegBankID &&
698 BuildMI(*BB,
MI,
DL, TII.get(TargetOpcode::REG_SEQUENCE), Dst)
704 if (!RBI.constrainGenericRegister(Dst, AMDGPU::VGPR_32RegClass, *MRI))
707 MI.eraseFromParent();
712 Register TmpReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
713 auto MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_AND_B32_e32), TmpReg)
718 MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_LSHL_OR_B32_e64), Dst)
724 MI.eraseFromParent();
747 unsigned Opc = AMDGPU::S_PACK_LL_B32_B16;
748 if (Shift0 && Shift1) {
749 Opc = AMDGPU::S_PACK_HH_B32_B16;
750 MI.getOperand(1).setReg(ShiftSrc0);
751 MI.getOperand(2).setReg(ShiftSrc1);
753 Opc = AMDGPU::S_PACK_LH_B32_B16;
754 MI.getOperand(2).setReg(ShiftSrc1);
758 if (ConstSrc1 && ConstSrc1->Value == 0) {
760 auto MIB =
BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_LSHR_B32), Dst)
765 MI.eraseFromParent();
769 if (STI.hasSPackHL()) {
770 Opc = AMDGPU::S_PACK_HL_B32_B16;
771 MI.getOperand(1).setReg(ShiftSrc0);
775 MI.setDesc(TII.get(
Opc));
780bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(
MachineInstr &
MI)
const {
781 MachineBasicBlock *BB =
MI.getParent();
783 LLT DstTy = MRI->getType(DstReg);
784 LLT SrcTy = MRI->getType(
MI.getOperand(1).getReg());
790 MI.getNumOperands() == 3) {
791 return selectS16MergeToS32(
MI);
797 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
799 const TargetRegisterClass *DstRC =
800 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
804 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(DstRC, SrcSize / 8);
805 MachineInstrBuilder MIB =
806 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg);
807 for (
int I = 0,
E =
MI.getNumOperands() - 1;
I !=
E; ++
I) {
808 MachineOperand &Src =
MI.getOperand(
I + 1);
812 const TargetRegisterClass *SrcRC
813 = TRI.getConstrainedRegClassForOperand(Src, *MRI);
814 if (SrcRC && !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, *MRI))
818 if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
821 MI.eraseFromParent();
825bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(
MachineInstr &
MI)
const {
826 MachineBasicBlock *BB =
MI.getParent();
827 const int NumDst =
MI.getNumOperands() - 1;
829 MachineOperand &Src =
MI.getOperand(NumDst);
833 LLT DstTy = MRI->getType(DstReg0);
834 LLT SrcTy = MRI->getType(SrcReg);
839 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI);
841 const TargetRegisterClass *SrcRC =
842 TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank);
843 if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
849 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(SrcRC, DstSize / 8);
850 for (
int I = 0,
E = NumDst;
I !=
E; ++
I) {
851 MachineOperand &Dst =
MI.getOperand(
I);
853 if (SrcBank->
getID() == AMDGPU::SGPRRegBankID &&
854 SubRegs[
I] == AMDGPU::hi16) {
855 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_LSHR_B32), Dst.getReg())
859 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::COPY), Dst.getReg())
860 .
addReg(SrcReg, {}, SubRegs[
I]);
864 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegs[
I]);
865 if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
868 const TargetRegisterClass *DstRC =
869 TRI.getConstrainedRegClassForOperand(Dst, *MRI);
870 if (DstRC && !RBI.constrainGenericRegister(Dst.getReg(), *DstRC, *MRI))
874 MI.eraseFromParent();
878bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR(
MachineInstr &
MI)
const {
879 assert(
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC ||
880 MI.getOpcode() == AMDGPU::G_BUILD_VECTOR);
884 LLT SrcTy = MRI->getType(Src0);
888 if (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR && SrcSize >= 32) {
889 return selectG_MERGE_VALUES(
MI);
896 (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC &&
900 const RegisterBank *DstBank = RBI.getRegBank(Dst, *MRI, TRI);
901 if (DstBank->
getID() == AMDGPU::AGPRRegBankID)
904 assert(DstBank->
getID() == AMDGPU::SGPRRegBankID ||
905 DstBank->
getID() == AMDGPU::VGPRRegBankID);
906 const bool IsVector = DstBank->
getID() == AMDGPU::VGPRRegBankID;
909 MachineBasicBlock *BB =
MI.getParent();
919 const int64_t K0 = ConstSrc0->Value.getSExtValue();
920 const int64_t K1 = ConstSrc1->Value.getSExtValue();
921 uint32_t Lo16 =
static_cast<uint32_t
>(K0) & 0xffff;
922 uint32_t Hi16 =
static_cast<uint32_t
>(K1) & 0xffff;
923 uint32_t
Imm = Lo16 | (Hi16 << 16);
928 MI.eraseFromParent();
929 return RBI.constrainGenericRegister(Dst, AMDGPU::VGPR_32RegClass, *MRI);
934 MI.eraseFromParent();
935 return RBI.constrainGenericRegister(Dst, AMDGPU::SReg_32RegClass, *MRI);
946 if (Src1Def->
getOpcode() == AMDGPU::G_IMPLICIT_DEF) {
947 MI.setDesc(TII.get(AMDGPU::COPY));
950 IsVector ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
951 return RBI.constrainGenericRegister(Dst, RC, *MRI) &&
952 RBI.constrainGenericRegister(Src0, RC, *MRI);
955 return selectS16MergeToS32(
MI);
958bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(
MachineInstr &
I)
const {
959 const MachineOperand &MO =
I.getOperand(0);
963 const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, *MRI);
964 if ((!RC && !MRI->getRegBankOrNull(MO.
getReg())) ||
965 (RC && RBI.constrainGenericRegister(MO.
getReg(), *RC, *MRI))) {
966 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
973bool AMDGPUInstructionSelector::selectG_INSERT(
MachineInstr &
I)
const {
976 Register DstReg =
I.getOperand(0).getReg();
977 Register Src0Reg =
I.getOperand(1).getReg();
978 Register Src1Reg =
I.getOperand(2).getReg();
979 LLT Src1Ty = MRI->getType(Src1Reg);
981 unsigned DstSize = MRI->getType(DstReg).getSizeInBits();
984 int64_t
Offset =
I.getOperand(3).getImm();
987 if (
Offset % 32 != 0 || InsSize % 32 != 0)
994 unsigned SubReg = TRI.getSubRegFromChannel(
Offset / 32, InsSize / 32);
995 if (SubReg == AMDGPU::NoSubRegister)
998 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
999 const TargetRegisterClass *DstRC =
1000 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
1004 const RegisterBank *Src0Bank = RBI.getRegBank(Src0Reg, *MRI, TRI);
1005 const RegisterBank *Src1Bank = RBI.getRegBank(Src1Reg, *MRI, TRI);
1006 const TargetRegisterClass *Src0RC =
1007 TRI.getRegClassForSizeOnBank(DstSize, *Src0Bank);
1008 const TargetRegisterClass *Src1RC =
1009 TRI.getRegClassForSizeOnBank(InsSize, *Src1Bank);
1013 Src0RC = TRI.getSubClassWithSubReg(Src0RC, SubReg);
1014 if (!Src0RC || !Src1RC)
1017 if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
1018 !RBI.constrainGenericRegister(Src0Reg, *Src0RC, *MRI) ||
1019 !RBI.constrainGenericRegister(Src1Reg, *Src1RC, *MRI))
1023 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::INSERT_SUBREG), DstReg)
1028 I.eraseFromParent();
1032bool AMDGPUInstructionSelector::selectG_SBFX_UBFX(
MachineInstr &
MI)
const {
1035 Register OffsetReg =
MI.getOperand(2).getReg();
1036 Register WidthReg =
MI.getOperand(3).getReg();
1038 assert(RBI.getRegBank(DstReg, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID &&
1039 "scalar BFX instructions are expanded in regbankselect");
1040 assert(MRI->getType(
MI.getOperand(0).getReg()).getSizeInBits() == 32 &&
1041 "64-bit vector BFX instructions are expanded in regbankselect");
1044 MachineBasicBlock *
MBB =
MI.getParent();
1046 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SBFX;
1047 unsigned Opc = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
1052 MI.eraseFromParent();
1057bool AMDGPUInstructionSelector::selectInterpP1F16(
MachineInstr &
MI)
const {
1058 if (STI.getLDSBankCount() != 16)
1064 if (!RBI.constrainGenericRegister(M0Val, AMDGPU::SReg_32RegClass, *MRI) ||
1065 !RBI.constrainGenericRegister(Dst, AMDGPU::VGPR_32RegClass, *MRI) ||
1066 !RBI.constrainGenericRegister(Src0, AMDGPU::VGPR_32RegClass, *MRI))
1076 Register InterpMov = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1078 MachineBasicBlock *
MBB =
MI.getParent();
1082 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_INTERP_MOV_F32), InterpMov)
1085 .
addImm(
MI.getOperand(3).getImm());
1098 MI.eraseFromParent();
1107bool AMDGPUInstructionSelector::selectWritelane(
MachineInstr &
MI)
const {
1109 if (STI.getConstantBusLimit(AMDGPU::V_WRITELANE_B32) > 1)
1112 MachineBasicBlock *
MBB =
MI.getParent();
1116 Register LaneSelect =
MI.getOperand(3).getReg();
1119 auto MIB =
BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_WRITELANE_B32), VDst);
1121 std::optional<ValueAndVReg> ConstSelect =
1127 MIB.
addImm(ConstSelect->Value.getSExtValue() &
1130 std::optional<ValueAndVReg> ConstVal =
1136 STI.hasInv2PiInlineImm())) {
1137 MIB.
addImm(ConstVal->Value.getSExtValue());
1145 RBI.constrainGenericRegister(LaneSelect, AMDGPU::SReg_32_XM0RegClass, *MRI);
1147 BuildMI(*
MBB, *MIB,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
1155 MI.eraseFromParent();
1162bool AMDGPUInstructionSelector::selectDivScale(
MachineInstr &
MI)
const {
1166 LLT Ty = MRI->getType(Dst0);
1169 Opc = AMDGPU::V_DIV_SCALE_F32_e64;
1171 Opc = AMDGPU::V_DIV_SCALE_F64_e64;
1178 MachineBasicBlock *
MBB =
MI.getParent();
1182 unsigned ChooseDenom =
MI.getOperand(5).getImm();
1184 Register Src0 = ChooseDenom != 0 ? Numer : Denom;
1197 MI.eraseFromParent();
1202bool AMDGPUInstructionSelector::selectG_INTRINSIC(
MachineInstr &
I)
const {
1204 switch (IntrinsicID) {
1205 case Intrinsic::amdgcn_if_break: {
1210 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))
1211 .
add(
I.getOperand(0))
1212 .
add(
I.getOperand(2))
1213 .
add(
I.getOperand(3));
1215 Register DstReg =
I.getOperand(0).getReg();
1216 Register Src0Reg =
I.getOperand(2).getReg();
1217 Register Src1Reg =
I.getOperand(3).getReg();
1219 I.eraseFromParent();
1222 MRI->setRegClass(
Reg, TRI.getWaveMaskRegClass());
1226 case Intrinsic::amdgcn_interp_p1_f16:
1227 return selectInterpP1F16(
I);
1228 case Intrinsic::amdgcn_wqm:
1229 return constrainCopyLikeIntrin(
I, AMDGPU::WQM);
1230 case Intrinsic::amdgcn_softwqm:
1231 return constrainCopyLikeIntrin(
I, AMDGPU::SOFT_WQM);
1232 case Intrinsic::amdgcn_strict_wwm:
1233 case Intrinsic::amdgcn_wwm:
1234 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WWM);
1235 case Intrinsic::amdgcn_strict_wqm:
1236 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WQM);
1237 case Intrinsic::amdgcn_writelane:
1238 return selectWritelane(
I);
1239 case Intrinsic::amdgcn_div_scale:
1240 return selectDivScale(
I);
1241 case Intrinsic::amdgcn_icmp:
1242 case Intrinsic::amdgcn_fcmp:
1245 return selectIntrinsicCmp(
I);
1246 case Intrinsic::amdgcn_ballot:
1247 return selectBallot(
I);
1248 case Intrinsic::amdgcn_reloc_constant:
1249 return selectRelocConstant(
I);
1250 case Intrinsic::amdgcn_groupstaticsize:
1251 return selectGroupStaticSize(
I);
1252 case Intrinsic::returnaddress:
1253 return selectReturnAddress(
I);
1254 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
1255 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
1256 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
1257 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
1258 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
1259 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
1260 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
1261 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
1262 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
1263 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
1264 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
1265 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
1266 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
1267 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
1268 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:
1269 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:
1270 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:
1271 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:
1272 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:
1273 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:
1274 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:
1275 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:
1276 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:
1277 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:
1278 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:
1279 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
1280 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
1281 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8:
1282 return selectSMFMACIntrin(
I);
1283 case Intrinsic::amdgcn_permlane16_swap:
1284 case Intrinsic::amdgcn_permlane32_swap:
1285 return selectPermlaneSwapIntrin(
I, IntrinsicID);
1286 case Intrinsic::amdgcn_wave_shuffle:
1287 return selectWaveShuffleIntrin(
I);
1288 case Intrinsic::amdgcn_fma_legacy:
1289 if (!STI.hasFmaLegacy32Insts()) {
1294 case Intrinsic::amdgcn_sudot4:
1295 case Intrinsic::amdgcn_sudot8:
1296 if (!STI.hasDot8Insts()) {
1301 case Intrinsic::amdgcn_permlane16:
1302 case Intrinsic::amdgcn_permlanex16:
1303 if (!STI.hasPermlane16Insts()) {
1308 case Intrinsic::amdgcn_mov_dpp8:
1309 if (!STI.hasDPP8()) {
1314 case Intrinsic::amdgcn_tanh:
1315 if (!STI.hasTanhInsts()) {
1330 if (
Size == 16 && !ST.has16BitInsts())
1333 const auto Select = [&](
unsigned S16Opc,
unsigned TrueS16Opc,
1334 unsigned FakeS16Opc,
unsigned S32Opc,
1337 return ST.hasTrue16BitInsts()
1338 ? ST.useRealTrue16Insts() ? TrueS16Opc : FakeS16Opc
1349 return Select(AMDGPU::V_CMP_NE_U16_e64, AMDGPU::V_CMP_NE_U16_t16_e64,
1350 AMDGPU::V_CMP_NE_U16_fake16_e64, AMDGPU::V_CMP_NE_U32_e64,
1351 AMDGPU::V_CMP_NE_U64_e64);
1353 return Select(AMDGPU::V_CMP_EQ_U16_e64, AMDGPU::V_CMP_EQ_U16_t16_e64,
1354 AMDGPU::V_CMP_EQ_U16_fake16_e64, AMDGPU::V_CMP_EQ_U32_e64,
1355 AMDGPU::V_CMP_EQ_U64_e64);
1357 return Select(AMDGPU::V_CMP_GT_I16_e64, AMDGPU::V_CMP_GT_I16_t16_e64,
1358 AMDGPU::V_CMP_GT_I16_fake16_e64, AMDGPU::V_CMP_GT_I32_e64,
1359 AMDGPU::V_CMP_GT_I64_e64);
1361 return Select(AMDGPU::V_CMP_GE_I16_e64, AMDGPU::V_CMP_GE_I16_t16_e64,
1362 AMDGPU::V_CMP_GE_I16_fake16_e64, AMDGPU::V_CMP_GE_I32_e64,
1363 AMDGPU::V_CMP_GE_I64_e64);
1365 return Select(AMDGPU::V_CMP_LT_I16_e64, AMDGPU::V_CMP_LT_I16_t16_e64,
1366 AMDGPU::V_CMP_LT_I16_fake16_e64, AMDGPU::V_CMP_LT_I32_e64,
1367 AMDGPU::V_CMP_LT_I64_e64);
1369 return Select(AMDGPU::V_CMP_LE_I16_e64, AMDGPU::V_CMP_LE_I16_t16_e64,
1370 AMDGPU::V_CMP_LE_I16_fake16_e64, AMDGPU::V_CMP_LE_I32_e64,
1371 AMDGPU::V_CMP_LE_I64_e64);
1373 return Select(AMDGPU::V_CMP_GT_U16_e64, AMDGPU::V_CMP_GT_U16_t16_e64,
1374 AMDGPU::V_CMP_GT_U16_fake16_e64, AMDGPU::V_CMP_GT_U32_e64,
1375 AMDGPU::V_CMP_GT_U64_e64);
1377 return Select(AMDGPU::V_CMP_GE_U16_e64, AMDGPU::V_CMP_GE_U16_t16_e64,
1378 AMDGPU::V_CMP_GE_U16_fake16_e64, AMDGPU::V_CMP_GE_U32_e64,
1379 AMDGPU::V_CMP_GE_U64_e64);
1381 return Select(AMDGPU::V_CMP_LT_U16_e64, AMDGPU::V_CMP_LT_U16_t16_e64,
1382 AMDGPU::V_CMP_LT_U16_fake16_e64, AMDGPU::V_CMP_LT_U32_e64,
1383 AMDGPU::V_CMP_LT_U64_e64);
1385 return Select(AMDGPU::V_CMP_LE_U16_e64, AMDGPU::V_CMP_LE_U16_t16_e64,
1386 AMDGPU::V_CMP_LE_U16_fake16_e64, AMDGPU::V_CMP_LE_U32_e64,
1387 AMDGPU::V_CMP_LE_U64_e64);
1390 return Select(AMDGPU::V_CMP_EQ_F16_e64, AMDGPU::V_CMP_EQ_F16_t16_e64,
1391 AMDGPU::V_CMP_EQ_F16_fake16_e64, AMDGPU::V_CMP_EQ_F32_e64,
1392 AMDGPU::V_CMP_EQ_F64_e64);
1394 return Select(AMDGPU::V_CMP_GT_F16_e64, AMDGPU::V_CMP_GT_F16_t16_e64,
1395 AMDGPU::V_CMP_GT_F16_fake16_e64, AMDGPU::V_CMP_GT_F32_e64,
1396 AMDGPU::V_CMP_GT_F64_e64);
1398 return Select(AMDGPU::V_CMP_GE_F16_e64, AMDGPU::V_CMP_GE_F16_t16_e64,
1399 AMDGPU::V_CMP_GE_F16_fake16_e64, AMDGPU::V_CMP_GE_F32_e64,
1400 AMDGPU::V_CMP_GE_F64_e64);
1402 return Select(AMDGPU::V_CMP_LT_F16_e64, AMDGPU::V_CMP_LT_F16_t16_e64,
1403 AMDGPU::V_CMP_LT_F16_fake16_e64, AMDGPU::V_CMP_LT_F32_e64,
1404 AMDGPU::V_CMP_LT_F64_e64);
1406 return Select(AMDGPU::V_CMP_LE_F16_e64, AMDGPU::V_CMP_LE_F16_t16_e64,
1407 AMDGPU::V_CMP_LE_F16_fake16_e64, AMDGPU::V_CMP_LE_F32_e64,
1408 AMDGPU::V_CMP_LE_F64_e64);
1410 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1411 AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
1412 AMDGPU::V_CMP_NEQ_F64_e64);
1414 return Select(AMDGPU::V_CMP_O_F16_e64, AMDGPU::V_CMP_O_F16_t16_e64,
1415 AMDGPU::V_CMP_O_F16_fake16_e64, AMDGPU::V_CMP_O_F32_e64,
1416 AMDGPU::V_CMP_O_F64_e64);
1418 return Select(AMDGPU::V_CMP_U_F16_e64, AMDGPU::V_CMP_U_F16_t16_e64,
1419 AMDGPU::V_CMP_U_F16_fake16_e64, AMDGPU::V_CMP_U_F32_e64,
1420 AMDGPU::V_CMP_U_F64_e64);
1422 return Select(AMDGPU::V_CMP_NLG_F16_e64, AMDGPU::V_CMP_NLG_F16_t16_e64,
1423 AMDGPU::V_CMP_NLG_F16_fake16_e64, AMDGPU::V_CMP_NLG_F32_e64,
1424 AMDGPU::V_CMP_NLG_F64_e64);
1426 return Select(AMDGPU::V_CMP_NLE_F16_e64, AMDGPU::V_CMP_NLE_F16_t16_e64,
1427 AMDGPU::V_CMP_NLE_F16_fake16_e64, AMDGPU::V_CMP_NLE_F32_e64,
1428 AMDGPU::V_CMP_NLE_F64_e64);
1430 return Select(AMDGPU::V_CMP_NLT_F16_e64, AMDGPU::V_CMP_NLT_F16_t16_e64,
1431 AMDGPU::V_CMP_NLT_F16_fake16_e64, AMDGPU::V_CMP_NLT_F32_e64,
1432 AMDGPU::V_CMP_NLT_F64_e64);
1434 return Select(AMDGPU::V_CMP_NGE_F16_e64, AMDGPU::V_CMP_NGE_F16_t16_e64,
1435 AMDGPU::V_CMP_NGE_F16_fake16_e64, AMDGPU::V_CMP_NGE_F32_e64,
1436 AMDGPU::V_CMP_NGE_F64_e64);
1438 return Select(AMDGPU::V_CMP_NGT_F16_e64, AMDGPU::V_CMP_NGT_F16_t16_e64,
1439 AMDGPU::V_CMP_NGT_F16_fake16_e64, AMDGPU::V_CMP_NGT_F32_e64,
1440 AMDGPU::V_CMP_NGT_F64_e64);
1442 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1443 AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
1444 AMDGPU::V_CMP_NEQ_F64_e64);
1446 return Select(AMDGPU::V_CMP_TRU_F16_e64, AMDGPU::V_CMP_TRU_F16_t16_e64,
1447 AMDGPU::V_CMP_TRU_F16_fake16_e64, AMDGPU::V_CMP_TRU_F32_e64,
1448 AMDGPU::V_CMP_TRU_F64_e64);
1450 return Select(AMDGPU::V_CMP_F_F16_e64, AMDGPU::V_CMP_F_F16_t16_e64,
1451 AMDGPU::V_CMP_F_F16_fake16_e64, AMDGPU::V_CMP_F_F32_e64,
1452 AMDGPU::V_CMP_F_F64_e64);
1457 unsigned Size)
const {
1459 if (!STI.hasScalarCompareEq64())
1464 return AMDGPU::S_CMP_LG_U64;
1466 return AMDGPU::S_CMP_EQ_U64;
1475 return AMDGPU::S_CMP_LG_U32;
1477 return AMDGPU::S_CMP_EQ_U32;
1479 return AMDGPU::S_CMP_GT_I32;
1481 return AMDGPU::S_CMP_GE_I32;
1483 return AMDGPU::S_CMP_LT_I32;
1485 return AMDGPU::S_CMP_LE_I32;
1487 return AMDGPU::S_CMP_GT_U32;
1489 return AMDGPU::S_CMP_GE_U32;
1491 return AMDGPU::S_CMP_LT_U32;
1493 return AMDGPU::S_CMP_LE_U32;
1495 return AMDGPU::S_CMP_EQ_F32;
1497 return AMDGPU::S_CMP_GT_F32;
1499 return AMDGPU::S_CMP_GE_F32;
1501 return AMDGPU::S_CMP_LT_F32;
1503 return AMDGPU::S_CMP_LE_F32;
1505 return AMDGPU::S_CMP_LG_F32;
1507 return AMDGPU::S_CMP_O_F32;
1509 return AMDGPU::S_CMP_U_F32;
1511 return AMDGPU::S_CMP_NLG_F32;
1513 return AMDGPU::S_CMP_NLE_F32;
1515 return AMDGPU::S_CMP_NLT_F32;
1517 return AMDGPU::S_CMP_NGE_F32;
1519 return AMDGPU::S_CMP_NGT_F32;
1521 return AMDGPU::S_CMP_NEQ_F32;
1528 if (!STI.hasSALUFloatInsts())
1533 return AMDGPU::S_CMP_EQ_F16;
1535 return AMDGPU::S_CMP_GT_F16;
1537 return AMDGPU::S_CMP_GE_F16;
1539 return AMDGPU::S_CMP_LT_F16;
1541 return AMDGPU::S_CMP_LE_F16;
1543 return AMDGPU::S_CMP_LG_F16;
1545 return AMDGPU::S_CMP_O_F16;
1547 return AMDGPU::S_CMP_U_F16;
1549 return AMDGPU::S_CMP_NLG_F16;
1551 return AMDGPU::S_CMP_NLE_F16;
1553 return AMDGPU::S_CMP_NLT_F16;
1555 return AMDGPU::S_CMP_NGE_F16;
1557 return AMDGPU::S_CMP_NGT_F16;
1559 return AMDGPU::S_CMP_NEQ_F16;
1568bool AMDGPUInstructionSelector::selectG_ICMP_or_FCMP(
MachineInstr &
I)
const {
1573 Register SrcReg =
I.getOperand(2).getReg();
1574 unsigned Size = RBI.getSizeInBits(SrcReg, *MRI, TRI);
1578 Register CCReg =
I.getOperand(0).getReg();
1579 if (!isVCC(CCReg, *MRI)) {
1580 int Opcode = getS_CMPOpcode(Pred,
Size);
1583 MachineInstr *ICmp =
BuildMI(*BB, &
I,
DL, TII.get(Opcode))
1584 .
add(
I.getOperand(2))
1585 .
add(
I.getOperand(3));
1586 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CCReg)
1590 RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, *MRI);
1591 I.eraseFromParent();
1595 if (
I.getOpcode() == AMDGPU::G_FCMP)
1602 MachineInstrBuilder ICmp;
1605 ICmp =
BuildMI(*BB, &
I,
DL, TII.get(Opcode),
I.getOperand(0).getReg())
1607 .
add(
I.getOperand(2))
1609 .
add(
I.getOperand(3))
1612 ICmp =
BuildMI(*BB, &
I,
DL, TII.get(Opcode),
I.getOperand(0).getReg())
1613 .
add(
I.getOperand(2))
1614 .
add(
I.getOperand(3));
1618 *TRI.getBoolRC(), *MRI);
1620 I.eraseFromParent();
1624bool AMDGPUInstructionSelector::selectIntrinsicCmp(
MachineInstr &
I)
const {
1625 Register Dst =
I.getOperand(0).getReg();
1626 if (isVCC(Dst, *MRI))
1629 LLT DstTy = MRI->getType(Dst);
1635 Register SrcReg =
I.getOperand(2).getReg();
1636 unsigned Size = RBI.getSizeInBits(SrcReg, *MRI, TRI);
1644 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), Dst);
1645 I.eraseFromParent();
1646 return RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI);
1653 MachineInstrBuilder SelectedMI;
1654 MachineOperand &
LHS =
I.getOperand(2);
1655 MachineOperand &
RHS =
I.getOperand(3);
1656 auto [Src0, Src0Mods] = selectVOP3ModsImpl(
LHS.getReg());
1657 auto [Src1, Src1Mods] = selectVOP3ModsImpl(
RHS.getReg());
1659 copyToVGPRIfSrcFolded(Src0, Src0Mods,
LHS, &
I,
true);
1661 copyToVGPRIfSrcFolded(Src1, Src1Mods,
RHS, &
I,
true);
1662 SelectedMI =
BuildMI(*BB, &
I,
DL, TII.get(Opcode), Dst);
1664 SelectedMI.
addImm(Src0Mods);
1665 SelectedMI.
addReg(Src0Reg);
1667 SelectedMI.
addImm(Src1Mods);
1668 SelectedMI.
addReg(Src1Reg);
1674 RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI);
1677 I.eraseFromParent();
1688 if (
MI->getParent() !=
MBB)
1692 if (
MI->getOpcode() == AMDGPU::COPY) {
1695 if (DstRB && SrcRB && DstRB->
getID() == AMDGPU::VCCRegBankID &&
1696 SrcRB->getID() == AMDGPU::SGPRRegBankID)
1701 if (
MI->getOpcode() == AMDGPU::G_AMDGPU_COPY_VCC_SCC)
1717bool AMDGPUInstructionSelector::selectBallot(
MachineInstr &
I)
const {
1720 Register DstReg =
I.getOperand(0).getReg();
1721 Register SrcReg =
I.getOperand(2).getReg();
1722 const unsigned BallotSize = MRI->getType(DstReg).getSizeInBits();
1723 const unsigned WaveSize = STI.getWavefrontSize();
1727 if (BallotSize != WaveSize && (BallotSize != 64 || WaveSize != 32))
1730 std::optional<ValueAndVReg> Arg =
1735 if (BallotSize != WaveSize) {
1736 Dst = MRI->createVirtualRegister(TRI.getBoolRC());
1740 const int64_t
Value = Arg->Value.getZExtValue();
1743 unsigned Opcode = WaveSize == 64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
1750 if (!RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI))
1756 if (!RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI))
1760 unsigned AndOpc = WaveSize == 64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
1770 if (BallotSize != WaveSize) {
1771 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1773 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
1780 I.eraseFromParent();
1784bool AMDGPUInstructionSelector::selectRelocConstant(
MachineInstr &
I)
const {
1785 Register DstReg =
I.getOperand(0).getReg();
1786 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
1787 const TargetRegisterClass *DstRC = TRI.getRegClassForSizeOnBank(32, *DstBank);
1788 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
1791 const bool IsVALU = DstBank->
getID() == AMDGPU::VGPRRegBankID;
1793 Module *
M =
MF->getFunction().getParent();
1794 const MDNode *
Metadata =
I.getOperand(2).getMetadata();
1801 TII.get(IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32), DstReg)
1804 I.eraseFromParent();
1808bool AMDGPUInstructionSelector::selectGroupStaticSize(
MachineInstr &
I)
const {
1811 Register DstReg =
I.getOperand(0).getReg();
1812 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
1813 unsigned Mov = DstRB->
getID() == AMDGPU::SGPRRegBankID ?
1814 AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1822 const SIMachineFunctionInfo *MFI =
MF->getInfo<SIMachineFunctionInfo>();
1825 Module *
M =
MF->getFunction().getParent();
1826 const GlobalValue *GV =
1831 I.eraseFromParent();
1836bool AMDGPUInstructionSelector::selectReturnAddress(
MachineInstr &
I)
const {
1841 MachineOperand &Dst =
I.getOperand(0);
1843 unsigned Depth =
I.getOperand(2).getImm();
1845 const TargetRegisterClass *RC
1846 = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
1848 !RBI.constrainGenericRegister(DstReg, *RC, *MRI))
1853 MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction()) {
1856 I.eraseFromParent();
1860 MachineFrameInfo &MFI =
MF.getFrameInfo();
1865 Register ReturnAddrReg = TRI.getReturnAddressReg(
MF);
1867 AMDGPU::SReg_64RegClass,
DL);
1870 I.eraseFromParent();
1874bool AMDGPUInstructionSelector::selectEndCfIntrinsic(
MachineInstr &
MI)
const {
1877 MachineBasicBlock *BB =
MI.getParent();
1878 BuildMI(*BB, &
MI,
MI.getDebugLoc(), TII.get(AMDGPU::SI_END_CF))
1879 .
add(
MI.getOperand(1));
1882 MI.eraseFromParent();
1884 if (!MRI->getRegClassOrNull(
Reg))
1885 MRI->setRegClass(
Reg, TRI.getWaveMaskRegClass());
1889bool AMDGPUInstructionSelector::selectDSOrderedIntrinsic(
1891 MachineBasicBlock *
MBB =
MI.getParent();
1895 unsigned IndexOperand =
MI.getOperand(7).getImm();
1896 bool WaveRelease =
MI.getOperand(8).getImm() != 0;
1897 bool WaveDone =
MI.getOperand(9).getImm() != 0;
1899 if (WaveDone && !WaveRelease) {
1903 Fn,
"ds_ordered_count: wave_done requires wave_release",
DL));
1906 unsigned OrderedCountIndex = IndexOperand & 0x3f;
1907 IndexOperand &= ~0x3f;
1908 unsigned CountDw = 0;
1911 CountDw = (IndexOperand >> 24) & 0xf;
1912 IndexOperand &= ~(0xf << 24);
1914 if (CountDw < 1 || CountDw > 4) {
1917 Fn,
"ds_ordered_count: dword count must be between 1 and 4",
DL));
1925 Fn,
"ds_ordered_count: bad index operand",
DL));
1928 unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1;
1931 unsigned Offset0 = OrderedCountIndex << 2;
1932 unsigned Offset1 = WaveRelease | (WaveDone << 1) | (Instruction << 4);
1935 Offset1 |= (CountDw - 1) << 6;
1938 Offset1 |= ShaderType << 2;
1940 unsigned Offset = Offset0 | (Offset1 << 8);
1948 MachineInstrBuilder
DS =
1949 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::DS_ORDERED_COUNT), DstReg)
1954 if (!RBI.constrainGenericRegister(M0Val, AMDGPU::SReg_32RegClass, *MRI))
1958 MI.eraseFromParent();
1964 case Intrinsic::amdgcn_ds_gws_init:
1965 return AMDGPU::DS_GWS_INIT;
1966 case Intrinsic::amdgcn_ds_gws_barrier:
1967 return AMDGPU::DS_GWS_BARRIER;
1968 case Intrinsic::amdgcn_ds_gws_sema_v:
1969 return AMDGPU::DS_GWS_SEMA_V;
1970 case Intrinsic::amdgcn_ds_gws_sema_br:
1971 return AMDGPU::DS_GWS_SEMA_BR;
1972 case Intrinsic::amdgcn_ds_gws_sema_p:
1973 return AMDGPU::DS_GWS_SEMA_P;
1974 case Intrinsic::amdgcn_ds_gws_sema_release_all:
1975 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
1981bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(
MachineInstr &
MI,
1983 if (!STI.hasGWS() || (IID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
1984 !STI.hasGWSSemaReleaseAll()))
1988 const bool HasVSrc =
MI.getNumOperands() == 3;
1989 assert(HasVSrc ||
MI.getNumOperands() == 2);
1991 Register BaseOffset =
MI.getOperand(HasVSrc ? 2 : 1).getReg();
1992 const RegisterBank *OffsetRB = RBI.getRegBank(BaseOffset, *MRI, TRI);
1993 if (OffsetRB->
getID() != AMDGPU::SGPRRegBankID)
1999 MachineBasicBlock *
MBB =
MI.getParent();
2002 MachineInstr *Readfirstlane =
nullptr;
2007 if (OffsetDef->
getOpcode() == AMDGPU::V_READFIRSTLANE_B32) {
2008 Readfirstlane = OffsetDef;
2013 if (OffsetDef->
getOpcode() == AMDGPU::G_CONSTANT) {
2023 std::tie(BaseOffset, ImmOffset) =
2026 if (Readfirstlane) {
2029 if (!RBI.constrainGenericRegister(BaseOffset, AMDGPU::VGPR_32RegClass, *MRI))
2035 if (!RBI.constrainGenericRegister(BaseOffset,
2036 AMDGPU::SReg_32RegClass, *MRI))
2040 Register M0Base = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2055 const MCInstrDesc &InstrDesc = TII.get(
Opc);
2060 int Data0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
2061 const TargetRegisterClass *DataRC = TII.getRegClass(InstrDesc, Data0Idx);
2062 const TargetRegisterClass *SubRC =
2063 TRI.getSubRegisterClass(DataRC, AMDGPU::sub0);
2067 if (!RBI.constrainGenericRegister(VSrc, *DataRC, *MRI))
2077 Register DataReg = MRI->createVirtualRegister(DataRC);
2078 if (!RBI.constrainGenericRegister(VSrc, *SubRC, *MRI))
2081 Register UndefReg = MRI->createVirtualRegister(SubRC);
2100 MI.eraseFromParent();
2104bool AMDGPUInstructionSelector::selectDSAppendConsume(
MachineInstr &
MI,
2105 bool IsAppend)
const {
2106 Register PtrBase =
MI.getOperand(2).getReg();
2107 LLT PtrTy = MRI->getType(PtrBase);
2111 std::tie(PtrBase,
Offset) = selectDS1Addr1OffsetImpl(
MI.getOperand(2));
2114 if (!isDSOffsetLegal(PtrBase,
Offset)) {
2115 PtrBase =
MI.getOperand(2).getReg();
2119 MachineBasicBlock *
MBB =
MI.getParent();
2121 const unsigned Opc = IsAppend ? AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
2125 if (!RBI.constrainGenericRegister(PtrBase, AMDGPU::SReg_32RegClass, *MRI))
2132 MI.eraseFromParent();
2137bool AMDGPUInstructionSelector::selectInitWholeWave(
MachineInstr &
MI)
const {
2138 MachineFunction *
MF =
MI.getMF();
2139 SIMachineFunctionInfo *MFInfo =
MF->getInfo<SIMachineFunctionInfo>();
2150 TFE = TexFailCtrl & 0x1;
2152 LWE = TexFailCtrl & 0x2;
2155 return TexFailCtrl == 0;
2158bool AMDGPUInstructionSelector::selectImageIntrinsic(
2160 MachineBasicBlock *
MBB =
MI.getParent();
2166 Register ResultDef =
MI.getOperand(0).getReg();
2167 if (MRI->use_nodbg_empty(ResultDef))
2171 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
2180 const unsigned ArgOffset =
MI.getNumExplicitDefs() + 1;
2182 Register VDataIn = AMDGPU::NoRegister;
2183 Register VDataOut = AMDGPU::NoRegister;
2185 int NumVDataDwords = -1;
2186 bool IsD16 =
MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16 ||
2187 MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16;
2193 Unorm =
MI.getOperand(ArgOffset + Intr->
UnormIndex).getImm() != 0;
2197 bool IsTexFail =
false;
2199 TFE, LWE, IsTexFail))
2202 const int Flags =
MI.getOperand(ArgOffset + Intr->
NumArgs).getImm();
2203 const bool IsA16 = (
Flags & 1) != 0;
2204 const bool IsG16 = (
Flags & 2) != 0;
2207 if (IsA16 && !STI.hasG16() && !IsG16)
2211 unsigned DMaskLanes = 0;
2213 if (BaseOpcode->
Atomic) {
2215 VDataOut =
MI.getOperand(0).getReg();
2216 VDataIn =
MI.getOperand(2).getReg();
2217 LLT Ty = MRI->getType(VDataIn);
2220 const bool Is64Bit = BaseOpcode->
AtomicX2 ?
2225 assert(
MI.getOperand(3).getReg() == AMDGPU::NoRegister);
2227 DMask = Is64Bit ? 0xf : 0x3;
2228 NumVDataDwords = Is64Bit ? 4 : 2;
2230 DMask = Is64Bit ? 0x3 : 0x1;
2231 NumVDataDwords = Is64Bit ? 2 : 1;
2234 DMask =
MI.getOperand(ArgOffset + Intr->
DMaskIndex).getImm();
2237 if (BaseOpcode->
Store) {
2238 VDataIn =
MI.getOperand(1).getReg();
2239 VDataTy = MRI->getType(VDataIn);
2244 VDataOut =
MI.getOperand(0).getReg();
2245 VDataTy = MRI->getType(VDataOut);
2246 NumVDataDwords = DMaskLanes;
2248 if (IsD16 && !STI.hasUnpackedD16VMem())
2249 NumVDataDwords = (DMaskLanes + 1) / 2;
2254 if (Subtarget->hasG16() && IsG16) {
2255 const AMDGPU::MIMGG16MappingInfo *G16MappingInfo =
2258 IntrOpcode = G16MappingInfo->
G16;
2262 assert((!IsTexFail || DMaskLanes >= 1) &&
"should have legalized this");
2272 int NumVAddrRegs = 0;
2273 int NumVAddrDwords = 0;
2276 MachineOperand &AddrOp =
MI.getOperand(ArgOffset +
I);
2277 if (!AddrOp.
isReg())
2285 NumVAddrDwords += (MRI->getType(Addr).getSizeInBits() + 31) / 32;
2292 NumVAddrRegs != 1 &&
2293 (STI.hasPartialNSAEncoding() ? NumVAddrDwords >= NumVAddrRegs
2294 : NumVAddrDwords == NumVAddrRegs);
2295 if (UseNSA && !STI.hasFeature(AMDGPU::FeatureNSAEncoding)) {
2306 NumVDataDwords, NumVAddrDwords);
2307 }
else if (IsGFX12Plus) {
2309 NumVDataDwords, NumVAddrDwords);
2310 }
else if (IsGFX11Plus) {
2312 UseNSA ? AMDGPU::MIMGEncGfx11NSA
2313 : AMDGPU::MIMGEncGfx11Default,
2314 NumVDataDwords, NumVAddrDwords);
2315 }
else if (IsGFX10Plus) {
2317 UseNSA ? AMDGPU::MIMGEncGfx10NSA
2318 : AMDGPU::MIMGEncGfx10Default,
2319 NumVDataDwords, NumVAddrDwords);
2321 if (Subtarget->hasGFX90AInsts()) {
2323 NumVDataDwords, NumVAddrDwords);
2327 <<
"requested image instruction is not supported on this GPU\n");
2334 NumVDataDwords, NumVAddrDwords);
2337 NumVDataDwords, NumVAddrDwords);
2347 const bool Is64 = MRI->getType(VDataOut).getSizeInBits() == 64;
2349 Register TmpReg = MRI->createVirtualRegister(
2350 Is64 ? &AMDGPU::VReg_128RegClass : &AMDGPU::VReg_64RegClass);
2351 unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
2354 if (!MRI->use_empty(VDataOut)) {
2367 for (
int I = 0;
I != NumVAddrRegs; ++
I) {
2368 MachineOperand &SrcOp =
MI.getOperand(ArgOffset + Intr->
VAddrStart +
I);
2369 if (SrcOp.
isReg()) {
2388 STI.hasFeature(AMDGPU::FeatureR128A16) ? -1 : 0);
2390 MIB.
addImm(IsA16 ? -1 : 0);
2392 if (!Subtarget->hasGFX90AInsts()) {
2404 MIB.
addImm(IsD16 ? -1 : 0);
2406 MI.eraseFromParent();
2408 TII.enforceOperandRCAlignment(*MIB, AMDGPU::OpName::vaddr);
2414bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic(
2420 MachineBasicBlock *
MBB =
MI.getParent();
2425 unsigned Offset =
MI.getOperand(6).getImm();
2429 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2430 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
2431 Opc = AMDGPU::DS_BVH_STACK_RTN_B32;
2433 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
2434 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP1_RTN_B32;
2436 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
2437 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP2_RTN_B64;
2449 MI.eraseFromParent();
2454bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
2457 switch (IntrinsicID) {
2458 case Intrinsic::amdgcn_end_cf:
2459 return selectEndCfIntrinsic(
I);
2460 case Intrinsic::amdgcn_ds_ordered_add:
2461 case Intrinsic::amdgcn_ds_ordered_swap:
2462 return selectDSOrderedIntrinsic(
I, IntrinsicID);
2463 case Intrinsic::amdgcn_ds_gws_init:
2464 case Intrinsic::amdgcn_ds_gws_barrier:
2465 case Intrinsic::amdgcn_ds_gws_sema_v:
2466 case Intrinsic::amdgcn_ds_gws_sema_br:
2467 case Intrinsic::amdgcn_ds_gws_sema_p:
2468 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2469 return selectDSGWSIntrinsic(
I, IntrinsicID);
2470 case Intrinsic::amdgcn_ds_append:
2471 return selectDSAppendConsume(
I,
true);
2472 case Intrinsic::amdgcn_ds_consume:
2473 return selectDSAppendConsume(
I,
false);
2474 case Intrinsic::amdgcn_init_whole_wave:
2475 return selectInitWholeWave(
I);
2476 case Intrinsic::amdgcn_raw_buffer_load_lds:
2477 case Intrinsic::amdgcn_raw_buffer_load_async_lds:
2478 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
2479 case Intrinsic::amdgcn_raw_ptr_buffer_load_async_lds:
2480 case Intrinsic::amdgcn_struct_buffer_load_lds:
2481 case Intrinsic::amdgcn_struct_buffer_load_async_lds:
2482 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds:
2483 case Intrinsic::amdgcn_struct_ptr_buffer_load_async_lds:
2484 return selectBufferLoadLds(
I);
2489 case Intrinsic::amdgcn_load_to_lds:
2490 case Intrinsic::amdgcn_load_async_to_lds:
2491 case Intrinsic::amdgcn_global_load_lds:
2492 case Intrinsic::amdgcn_global_load_async_lds:
2493 return selectGlobalLoadLds(
I);
2494 case Intrinsic::amdgcn_tensor_load_to_lds:
2495 case Intrinsic::amdgcn_tensor_store_from_lds:
2496 return selectTensorLoadStore(
I, IntrinsicID);
2497 case Intrinsic::amdgcn_asyncmark:
2498 case Intrinsic::amdgcn_wait_asyncmark:
2499 if (!Subtarget->hasAsyncMark())
2502 case Intrinsic::amdgcn_exp_compr:
2503 if (!STI.hasCompressedExport()) {
2508 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2509 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
2510 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
2511 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
2512 return selectDSBvhStackIntrinsic(
I);
2513 case Intrinsic::amdgcn_s_alloc_vgpr: {
2519 Register ResReg =
I.getOperand(0).getReg();
2521 MachineInstr *AllocMI =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::S_ALLOC_VGPR))
2522 .
add(
I.getOperand(2));
2525 I.eraseFromParent();
2527 return RBI.constrainGenericRegister(ResReg, AMDGPU::SReg_32RegClass, *MRI);
2529 case Intrinsic::amdgcn_s_barrier_init:
2530 case Intrinsic::amdgcn_s_barrier_signal_var:
2531 return selectNamedBarrierInit(
I, IntrinsicID);
2532 case Intrinsic::amdgcn_s_wakeup_barrier: {
2533 if (!STI.hasSWakeupBarrier()) {
2537 return selectNamedBarrierInst(
I, IntrinsicID);
2539 case Intrinsic::amdgcn_s_barrier_join:
2540 case Intrinsic::amdgcn_s_get_named_barrier_state:
2541 return selectNamedBarrierInst(
I, IntrinsicID);
2542 case Intrinsic::amdgcn_s_get_barrier_state:
2543 return selectSGetBarrierState(
I, IntrinsicID);
2544 case Intrinsic::amdgcn_s_barrier_signal_isfirst:
2545 return selectSBarrierSignalIsfirst(
I, IntrinsicID);
2550bool AMDGPUInstructionSelector::selectG_SELECT(
MachineInstr &
I)
const {
2557 Register DstReg =
I.getOperand(0).getReg();
2558 unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI);
2560 const MachineOperand &CCOp =
I.getOperand(1);
2562 if (!isVCC(CCReg, *MRI)) {
2563 unsigned SelectOpcode =
Size == 64 ? AMDGPU::S_CSELECT_B64 :
2564 AMDGPU::S_CSELECT_B32;
2565 MachineInstr *CopySCC =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
2571 if (!MRI->getRegClassOrNull(CCReg))
2572 MRI->setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, *MRI));
2574 .
add(
I.getOperand(2))
2575 .
add(
I.getOperand(3));
2579 I.eraseFromParent();
2588 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
2590 .
add(
I.getOperand(3))
2592 .
add(
I.getOperand(2))
2593 .
add(
I.getOperand(1));
2596 I.eraseFromParent();
2600bool AMDGPUInstructionSelector::selectG_TRUNC(
MachineInstr &
I)
const {
2601 Register DstReg =
I.getOperand(0).getReg();
2602 Register SrcReg =
I.getOperand(1).getReg();
2603 const LLT DstTy = MRI->getType(DstReg);
2604 const LLT SrcTy = MRI->getType(SrcReg);
2607 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
2608 const RegisterBank *DstRB;
2614 DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
2619 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2624 const TargetRegisterClass *SrcRC =
2625 TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB);
2626 const TargetRegisterClass *DstRC =
2627 TRI.getRegClassForSizeOnBank(DstSize, *DstRB);
2628 if (!SrcRC || !DstRC)
2631 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
2632 !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI)) {
2637 if (DstRC == &AMDGPU::VGPR_16RegClass && SrcSize == 32) {
2638 assert(STI.useRealTrue16Insts());
2642 .
addReg(SrcReg, {}, AMDGPU::lo16);
2643 I.eraseFromParent();
2651 Register LoReg = MRI->createVirtualRegister(DstRC);
2652 Register HiReg = MRI->createVirtualRegister(DstRC);
2654 .
addReg(SrcReg, {}, AMDGPU::sub0);
2656 .
addReg(SrcReg, {}, AMDGPU::sub1);
2658 if (IsVALU && STI.hasSDWA()) {
2661 MachineInstr *MovSDWA =
2662 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg)
2672 Register TmpReg0 = MRI->createVirtualRegister(DstRC);
2673 Register TmpReg1 = MRI->createVirtualRegister(DstRC);
2674 Register ImmReg = MRI->createVirtualRegister(DstRC);
2676 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), TmpReg0)
2686 unsigned MovOpc = IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
2687 unsigned AndOpc = IsVALU ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
2688 unsigned OrOpc = IsVALU ? AMDGPU::V_OR_B32_e64 : AMDGPU::S_OR_B32;
2700 And.setOperandDead(3);
2701 Or.setOperandDead(3);
2705 I.eraseFromParent();
2713 unsigned SubRegIdx = DstSize < 32
2714 ?
static_cast<unsigned>(AMDGPU::sub0)
2715 : TRI.getSubRegFromChannel(0, DstSize / 32);
2716 if (SubRegIdx == AMDGPU::NoSubRegister)
2721 const TargetRegisterClass *SrcWithSubRC
2722 = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
2726 if (SrcWithSubRC != SrcRC) {
2727 if (!RBI.constrainGenericRegister(SrcReg, *SrcWithSubRC, *MRI))
2731 I.getOperand(1).setSubReg(SubRegIdx);
2734 I.setDesc(TII.get(TargetOpcode::COPY));
2741 int SignedMask =
static_cast<int>(Mask);
2742 return SignedMask >= -16 && SignedMask <= 64;
2746const RegisterBank *AMDGPUInstructionSelector::getArtifactRegBank(
2755 return &RBI.getRegBankFromRegClass(*RC, LLT());
2759bool AMDGPUInstructionSelector::selectG_SZA_EXT(
MachineInstr &
I)
const {
2760 bool InReg =
I.getOpcode() == AMDGPU::G_SEXT_INREG;
2761 bool Signed =
I.getOpcode() == AMDGPU::G_SEXT || InReg;
2764 const Register DstReg =
I.getOperand(0).getReg();
2765 const Register SrcReg =
I.getOperand(1).getReg();
2767 const LLT DstTy = MRI->getType(DstReg);
2768 const LLT SrcTy = MRI->getType(SrcReg);
2769 const unsigned SrcSize =
I.getOpcode() == AMDGPU::G_SEXT_INREG ?
2776 const RegisterBank *SrcBank = getArtifactRegBank(SrcReg, *MRI, TRI);
2779 if (
I.getOpcode() == AMDGPU::G_ANYEXT) {
2781 return selectCOPY(
I);
2783 const TargetRegisterClass *SrcRC =
2784 TRI.getRegClassForTypeOnBank(SrcTy, *SrcBank);
2785 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
2786 const TargetRegisterClass *DstRC =
2787 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
2789 Register UndefReg = MRI->createVirtualRegister(SrcRC);
2790 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2796 I.eraseFromParent();
2798 return RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) &&
2799 RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI);
2802 if (SrcBank->
getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
2808 MachineInstr *ExtI =
2812 I.eraseFromParent();
2817 const unsigned BFE =
Signed ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2818 MachineInstr *ExtI =
2823 I.eraseFromParent();
2828 if (SrcBank->
getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
2829 const TargetRegisterClass &SrcRC = InReg && DstSize > 32 ?
2830 AMDGPU::SReg_64RegClass : AMDGPU::SReg_32RegClass;
2831 if (!RBI.constrainGenericRegister(SrcReg, SrcRC, *MRI))
2834 if (
Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
2835 const unsigned SextOpc = SrcSize == 8 ?
2836 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
2839 I.eraseFromParent();
2840 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
2845 if (DstSize > 32 && SrcSize == 32) {
2846 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2847 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2850 .
addReg(SrcReg, {}, SubReg)
2858 .
addReg(SrcReg, {}, SubReg)
2859 .addImm(AMDGPU::sub0)
2862 I.eraseFromParent();
2863 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass,
2867 const unsigned BFE64 =
Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
2868 const unsigned BFE32 =
Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2871 if (DstSize > 32 && (SrcSize <= 32 || InReg)) {
2873 Register ExtReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
2874 Register UndefReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2875 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2877 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2879 .
addReg(SrcReg, {}, SubReg)
2880 .addImm(AMDGPU::sub0)
2888 I.eraseFromParent();
2889 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, *MRI);
2904 I.eraseFromParent();
2905 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
2929 if (Unmerge->getNumDefs() == 2 && Unmerge->getOperand(1).getReg() == In &&
2931 Out = Unmerge->getSourceReg();
2951 if (Shuffle->
getOpcode() != AMDGPU::G_SHUFFLE_VECTOR)
2958 assert(Mask.size() == 2);
2960 if (Mask[0] == 1 && Mask[1] <= 1) {
2968bool AMDGPUInstructionSelector::selectG_FPEXT(
MachineInstr &
I)
const {
2969 if (!Subtarget->hasSALUFloatInsts())
2972 Register Dst =
I.getOperand(0).getReg();
2973 const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
2974 if (DstRB->
getID() != AMDGPU::SGPRRegBankID)
2977 Register Src =
I.getOperand(1).getReg();
2983 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_CVT_HI_F32_F16), Dst)
2985 I.eraseFromParent();
2986 return RBI.constrainGenericRegister(Dst, AMDGPU::SReg_32RegClass, *MRI);
2993bool AMDGPUInstructionSelector::selectG_FNEG(
MachineInstr &
MI)
const {
3006 const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
3007 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
3012 MachineInstr *Fabs =
getOpcodeDef(TargetOpcode::G_FABS, Src, *MRI);
3016 if (!RBI.constrainGenericRegister(Src, AMDGPU::SReg_64RegClass, *MRI) ||
3017 !RBI.constrainGenericRegister(Dst, AMDGPU::SReg_64RegClass, *MRI))
3020 MachineBasicBlock *BB =
MI.getParent();
3022 Register LoReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
3023 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
3024 Register ConstReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
3025 Register OpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
3027 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
3028 .
addReg(Src, {}, AMDGPU::sub0);
3029 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
3030 .
addReg(Src, {}, AMDGPU::sub1);
3031 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
3035 unsigned Opc = Fabs ? AMDGPU::S_OR_B32 : AMDGPU::S_XOR_B32;
3040 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
3045 MI.eraseFromParent();
3050bool AMDGPUInstructionSelector::selectG_FABS(
MachineInstr &
MI)
const {
3052 const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
3053 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
3058 MachineBasicBlock *BB =
MI.getParent();
3060 Register LoReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
3061 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
3062 Register ConstReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
3063 Register OpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
3065 if (!RBI.constrainGenericRegister(Src, AMDGPU::SReg_64RegClass, *MRI) ||
3066 !RBI.constrainGenericRegister(Dst, AMDGPU::SReg_64RegClass, *MRI))
3069 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
3070 .
addReg(Src, {}, AMDGPU::sub0);
3071 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
3072 .
addReg(Src, {}, AMDGPU::sub1);
3073 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
3078 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_AND_B32), OpReg)
3082 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
3088 MI.eraseFromParent();
3093 return MI.getOpcode() == TargetOpcode::G_CONSTANT;
3096void AMDGPUInstructionSelector::getAddrModeInfo(
const MachineInstr &Load,
3099 unsigned OpNo =
Load.getOpcode() == AMDGPU::G_PREFETCH ? 0 : 1;
3100 const MachineInstr *PtrMI =
3101 MRI.getUniqueVRegDef(
Load.getOperand(OpNo).getReg());
3105 if (PtrMI->
getOpcode() != TargetOpcode::G_PTR_ADD)
3110 for (
unsigned i = 1; i != 3; ++i) {
3111 const MachineOperand &GEPOp = PtrMI->
getOperand(i);
3112 const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.
getReg());
3117 assert(GEPInfo.Imm == 0);
3121 const RegisterBank *OpBank = RBI.getRegBank(GEPOp.
getReg(), MRI, TRI);
3122 if (OpBank->
getID() == AMDGPU::SGPRRegBankID)
3123 GEPInfo.SgprParts.push_back(GEPOp.
getReg());
3125 GEPInfo.VgprParts.push_back(GEPOp.
getReg());
3129 getAddrModeInfo(*PtrMI, MRI, AddrInfo);
3132bool AMDGPUInstructionSelector::isSGPR(
Register Reg)
const {
3133 return RBI.getRegBank(
Reg, *MRI, TRI)->getID() == AMDGPU::SGPRRegBankID;
3136bool AMDGPUInstructionSelector::isInstrUniform(
const MachineInstr &
MI)
const {
3137 if (!
MI.hasOneMemOperand())
3140 const MachineMemOperand *MMO = *
MI.memoperands_begin();
3153 if (
MI.getOpcode() == AMDGPU::G_PREFETCH)
3154 return RBI.getRegBank(
MI.getOperand(0).getReg(), *MRI, TRI)->getID() ==
3155 AMDGPU::SGPRRegBankID;
3158 return I &&
I->getMetadata(
"amdgpu.uniform");
3162 for (
const GEPInfo &GEPInfo : AddrInfo) {
3163 if (!GEPInfo.VgprParts.empty())
3169void AMDGPUInstructionSelector::initM0(
MachineInstr &
I)
const {
3170 const LLT PtrTy = MRI->getType(
I.getOperand(1).getReg());
3173 STI.ldsRequiresM0Init()) {
3177 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
3182bool AMDGPUInstructionSelector::selectG_LOAD_STORE_ATOMICRMW(
3189 if (
Reg.isPhysical())
3193 const unsigned Opcode =
MI.getOpcode();
3195 if (Opcode == AMDGPU::COPY)
3198 if (Opcode == AMDGPU::G_AND || Opcode == AMDGPU::G_OR ||
3199 Opcode == AMDGPU::G_XOR)
3204 return GI->is(Intrinsic::amdgcn_class);
3206 return Opcode == AMDGPU::G_ICMP || Opcode == AMDGPU::G_FCMP;
3209bool AMDGPUInstructionSelector::selectG_BRCOND(
MachineInstr &
I)
const {
3211 MachineOperand &CondOp =
I.getOperand(0);
3217 const TargetRegisterClass *ConstrainRC;
3224 if (!isVCC(CondReg, *MRI)) {
3228 CondPhysReg = AMDGPU::SCC;
3229 BrOpcode = AMDGPU::S_CBRANCH_SCC1;
3230 ConstrainRC = &AMDGPU::SReg_32RegClass;
3237 const bool Is64 = STI.isWave64();
3238 const unsigned Opcode = Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
3239 const Register Exec = Is64 ? AMDGPU::EXEC : AMDGPU::EXEC_LO;
3241 Register TmpReg = MRI->createVirtualRegister(TRI.getBoolRC());
3242 BuildMI(*BB, &
I,
DL, TII.get(Opcode), TmpReg)
3249 CondPhysReg = TRI.getVCC();
3250 BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
3251 ConstrainRC = TRI.getBoolRC();
3254 if (!MRI->getRegClassOrNull(CondReg))
3255 MRI->setRegClass(CondReg, ConstrainRC);
3257 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CondPhysReg)
3260 .
addMBB(
I.getOperand(1).getMBB());
3262 I.eraseFromParent();
3266bool AMDGPUInstructionSelector::selectG_GLOBAL_VALUE(
3268 Register DstReg =
I.getOperand(0).getReg();
3269 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3270 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3271 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32));
3275 return RBI.constrainGenericRegister(
3276 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, *MRI);
3279bool AMDGPUInstructionSelector::selectG_PTRMASK(
MachineInstr &
I)
const {
3280 Register DstReg =
I.getOperand(0).getReg();
3281 Register SrcReg =
I.getOperand(1).getReg();
3282 Register MaskReg =
I.getOperand(2).getReg();
3283 LLT Ty = MRI->getType(DstReg);
3284 LLT MaskTy = MRI->getType(MaskReg);
3288 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3289 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
3290 const RegisterBank *MaskRB = RBI.getRegBank(MaskReg, *MRI, TRI);
3291 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3297 APInt MaskOnes =
VT->getKnownOnes(MaskReg).zext(64);
3301 const bool CanCopyLow32 = (MaskOnes & MaskLo32) == MaskLo32;
3302 const bool CanCopyHi32 = (MaskOnes & MaskHi32) == MaskHi32;
3305 !CanCopyLow32 && !CanCopyHi32) {
3306 auto MIB =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_AND_B64), DstReg)
3310 I.eraseFromParent();
3315 unsigned NewOpc = IsVGPR ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
3316 const TargetRegisterClass &RegRC
3317 = IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
3319 const TargetRegisterClass *DstRC = TRI.getRegClassForTypeOnBank(Ty, *DstRB);
3320 const TargetRegisterClass *SrcRC = TRI.getRegClassForTypeOnBank(Ty, *SrcRB);
3321 const TargetRegisterClass *MaskRC =
3322 TRI.getRegClassForTypeOnBank(MaskTy, *MaskRB);
3324 if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
3325 !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
3326 !RBI.constrainGenericRegister(MaskReg, *MaskRC, *MRI))
3331 "ptrmask should have been narrowed during legalize");
3333 auto NewOp =
BuildMI(*BB, &
I,
DL, TII.get(NewOpc), DstReg)
3339 I.eraseFromParent();
3343 Register HiReg = MRI->createVirtualRegister(&RegRC);
3344 Register LoReg = MRI->createVirtualRegister(&RegRC);
3347 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), LoReg)
3348 .
addReg(SrcReg, {}, AMDGPU::sub0);
3349 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), HiReg)
3350 .
addReg(SrcReg, {}, AMDGPU::sub1);
3359 Register MaskLo = MRI->createVirtualRegister(&RegRC);
3360 MaskedLo = MRI->createVirtualRegister(&RegRC);
3362 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskLo)
3363 .
addReg(MaskReg, {}, AMDGPU::sub0);
3364 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedLo)
3373 Register MaskHi = MRI->createVirtualRegister(&RegRC);
3374 MaskedHi = MRI->createVirtualRegister(&RegRC);
3376 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskHi)
3377 .
addReg(MaskReg, {}, AMDGPU::sub1);
3378 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedHi)
3383 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
3388 I.eraseFromParent();
3394static std::pair<Register, unsigned>
3401 std::tie(IdxBaseReg,
Offset) =
3403 if (IdxBaseReg == AMDGPU::NoRegister) {
3407 IdxBaseReg = IdxReg;
3414 if (
static_cast<unsigned>(
Offset) >= SubRegs.
size())
3415 return std::pair(IdxReg, SubRegs[0]);
3416 return std::pair(IdxBaseReg, SubRegs[
Offset]);
3419bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT(
3425 LLT DstTy = MRI->getType(DstReg);
3426 LLT SrcTy = MRI->getType(SrcReg);
3428 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3429 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
3430 const RegisterBank *IdxRB = RBI.getRegBank(IdxReg, *MRI, TRI);
3434 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3437 const TargetRegisterClass *SrcRC =
3438 TRI.getRegClassForTypeOnBank(SrcTy, *SrcRB);
3439 const TargetRegisterClass *DstRC =
3440 TRI.getRegClassForTypeOnBank(DstTy, *DstRB);
3441 if (!SrcRC || !DstRC)
3443 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
3444 !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
3445 !RBI.constrainGenericRegister(IdxReg, AMDGPU::SReg_32RegClass, *MRI))
3448 MachineBasicBlock *BB =
MI.getParent();
3456 if (SrcRB->
getID() == AMDGPU::SGPRRegBankID) {
3460 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3463 unsigned Opc = Is64 ? AMDGPU::S_MOVRELS_B64 : AMDGPU::S_MOVRELS_B32;
3465 .
addReg(SrcReg, {}, SubReg)
3467 MI.eraseFromParent();
3474 if (!STI.useVGPRIndexMode()) {
3475 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3477 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::V_MOVRELS_B32_e32), DstReg)
3478 .
addReg(SrcReg, {}, SubReg)
3480 MI.eraseFromParent();
3484 const MCInstrDesc &GPRIDXDesc =
3485 TII.getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*SrcRC),
true);
3491 MI.eraseFromParent();
3496bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT(
3503 LLT VecTy = MRI->getType(DstReg);
3504 LLT ValTy = MRI->getType(ValReg);
3508 const RegisterBank *VecRB = RBI.getRegBank(VecReg, *MRI, TRI);
3509 const RegisterBank *ValRB = RBI.getRegBank(ValReg, *MRI, TRI);
3510 const RegisterBank *IdxRB = RBI.getRegBank(IdxReg, *MRI, TRI);
3516 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3519 const TargetRegisterClass *VecRC =
3520 TRI.getRegClassForTypeOnBank(VecTy, *VecRB);
3521 const TargetRegisterClass *ValRC =
3522 TRI.getRegClassForTypeOnBank(ValTy, *ValRB);
3524 if (!RBI.constrainGenericRegister(VecReg, *VecRC, *MRI) ||
3525 !RBI.constrainGenericRegister(DstReg, *VecRC, *MRI) ||
3526 !RBI.constrainGenericRegister(ValReg, *ValRC, *MRI) ||
3527 !RBI.constrainGenericRegister(IdxReg, AMDGPU::SReg_32RegClass, *MRI))
3530 if (VecRB->
getID() == AMDGPU::VGPRRegBankID && ValSize != 32)
3534 std::tie(IdxReg, SubReg) =
3537 const bool IndexMode = VecRB->
getID() == AMDGPU::VGPRRegBankID &&
3538 STI.useVGPRIndexMode();
3540 MachineBasicBlock *BB =
MI.getParent();
3544 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3547 const MCInstrDesc &RegWriteOp = TII.getIndirectRegWriteMovRelPseudo(
3548 VecSize, ValSize, VecRB->
getID() == AMDGPU::SGPRRegBankID);
3553 MI.eraseFromParent();
3557 const MCInstrDesc &GPRIDXDesc =
3558 TII.getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*VecRC),
false);
3565 MI.eraseFromParent();
3571 case Intrinsic::amdgcn_raw_buffer_load_async_lds:
3572 case Intrinsic::amdgcn_raw_ptr_buffer_load_async_lds:
3573 case Intrinsic::amdgcn_struct_buffer_load_async_lds:
3574 case Intrinsic::amdgcn_struct_ptr_buffer_load_async_lds:
3575 case Intrinsic::amdgcn_load_async_to_lds:
3576 case Intrinsic::amdgcn_global_load_async_lds:
3582bool AMDGPUInstructionSelector::selectBufferLoadLds(
MachineInstr &
MI)
const {
3583 if (!Subtarget->hasVMemToLDSLoad())
3586 unsigned Size =
MI.getOperand(3).getImm();
3590 const bool HasVIndex =
MI.getNumOperands() == 9;
3594 VIndex =
MI.getOperand(4).getReg();
3598 Register VOffset =
MI.getOperand(4 + OpOffset).getReg();
3599 std::optional<ValueAndVReg> MaybeVOffset =
3601 const bool HasVOffset = !MaybeVOffset || MaybeVOffset->Value.getZExtValue();
3607 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_BOTHEN
3608 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_IDXEN
3609 : HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFEN
3610 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFSET;
3613 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_BOTHEN
3614 : AMDGPU::BUFFER_LOAD_USHORT_LDS_IDXEN
3615 : HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFEN
3616 : AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFSET;
3619 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_BOTHEN
3620 : AMDGPU::BUFFER_LOAD_DWORD_LDS_IDXEN
3621 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN
3622 : AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET;
3625 if (!Subtarget->hasLDSLoadB96_B128())
3628 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_BOTHEN
3629 : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_IDXEN
3630 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFEN
3631 : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFSET;
3634 if (!Subtarget->hasLDSLoadB96_B128())
3637 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_BOTHEN
3638 : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_IDXEN
3639 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFEN
3640 : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFSET;
3644 MachineBasicBlock *
MBB =
MI.getParent();
3647 .
add(
MI.getOperand(2));
3651 if (HasVIndex && HasVOffset) {
3652 Register IdxReg = MRI->createVirtualRegister(TRI.getVGPR64Class());
3653 BuildMI(*
MBB, &*MIB,
DL, TII.get(AMDGPU::REG_SEQUENCE), IdxReg)
3660 }
else if (HasVIndex) {
3662 }
else if (HasVOffset) {
3666 MIB.
add(
MI.getOperand(1));
3667 MIB.
add(
MI.getOperand(5 + OpOffset));
3668 MIB.
add(
MI.getOperand(6 + OpOffset));
3670 unsigned Aux =
MI.getOperand(7 + OpOffset).getImm();
3679 MachineMemOperand *LoadMMO = *
MI.memoperands_begin();
3684 MachinePointerInfo StorePtrI = LoadPtrI;
3695 MachineMemOperand *StoreMMO =
3701 MI.eraseFromParent();
3714 if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3720 return Def->getOperand(1).getReg();
3734 if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3742 return Def->getOperand(1).getReg();
3744 if (
VT->signBitIsZero(
Reg))
3745 return matchZeroExtendFromS32(
Reg);
3753AMDGPUInstructionSelector::matchZeroExtendFromS32OrS32(
Register Reg)
const {
3755 : matchZeroExtendFromS32(
Reg);
3761AMDGPUInstructionSelector::matchSignExtendFromS32OrS32(
Register Reg)
const {
3763 : matchSignExtendFromS32(
Reg);
3767AMDGPUInstructionSelector::matchExtendFromS32OrS32(
Register Reg,
3768 bool IsSigned)
const {
3770 return matchSignExtendFromS32OrS32(
Reg);
3772 return matchZeroExtendFromS32OrS32(
Reg);
3782 if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3789 return Def->getOperand(1).getReg();
3794bool AMDGPUInstructionSelector::selectGlobalLoadLds(
MachineInstr &
MI)
const{
3795 if (!Subtarget->hasVMemToLDSLoad())
3799 unsigned Size =
MI.getOperand(3).getImm();
3806 Opc = AMDGPU::GLOBAL_LOAD_LDS_UBYTE;
3809 Opc = AMDGPU::GLOBAL_LOAD_LDS_USHORT;
3812 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORD;
3815 if (!Subtarget->hasLDSLoadB96_B128())
3817 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX3;
3820 if (!Subtarget->hasLDSLoadB96_B128())
3822 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX4;
3826 MachineBasicBlock *
MBB =
MI.getParent();
3829 .
add(
MI.getOperand(2));
3835 if (!isSGPR(Addr)) {
3837 if (isSGPR(AddrDef->Reg)) {
3838 Addr = AddrDef->Reg;
3839 }
else if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
3842 if (isSGPR(SAddr)) {
3843 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
3844 if (
Register Off = matchZeroExtendFromS32(PtrBaseOffset)) {
3855 VOffset = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3867 MIB.
add(
MI.getOperand(4));
3869 unsigned Aux =
MI.getOperand(5).getImm();
3873 MachineMemOperand *LoadMMO = *
MI.memoperands_begin();
3875 LoadPtrI.
Offset =
MI.getOperand(4).getImm();
3876 MachinePointerInfo StorePtrI = LoadPtrI;
3885 MachineMemOperand *StoreMMO =
3887 sizeof(int32_t),
Align(4));
3891 MI.eraseFromParent();
3896bool AMDGPUInstructionSelector::selectTensorLoadStore(
MachineInstr &
MI,
3898 bool IsLoad = IID == Intrinsic::amdgcn_tensor_load_to_lds;
3900 IsLoad ? AMDGPU::TENSOR_LOAD_TO_LDS_d4 : AMDGPU::TENSOR_STORE_FROM_LDS_d4;
3904 const auto isAllZeros = [&](MachineOperand &Opnd) {
3905 const MachineInstr *
DefMI = MRI->getVRegDef(Opnd.getReg());
3914 Opc = IsLoad ? AMDGPU::TENSOR_LOAD_TO_LDS_d2
3915 : AMDGPU::TENSOR_STORE_FROM_LDS_d2;
3920 MachineBasicBlock *
MBB =
MI.getParent();
3922 .
add(
MI.getOperand(1))
3923 .
add(
MI.getOperand(2));
3925 if (NumGroups >= 4) {
3926 MIB.
add(
MI.getOperand(3))
3927 .
add(
MI.getOperand(4));
3931 .
add(
MI.getOperand(6));
3933 MI.eraseFromParent();
3937bool AMDGPUInstructionSelector::selectBVHIntersectRayIntrinsic(
3939 unsigned OpcodeOpIdx =
3940 MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY ? 1 : 3;
3941 MI.setDesc(TII.get(
MI.getOperand(OpcodeOpIdx).getImm()));
3942 MI.removeOperand(OpcodeOpIdx);
3943 MI.addImplicitDefUseOperands(*
MI.getMF());
3950bool AMDGPUInstructionSelector::selectSMFMACIntrin(
MachineInstr &
MI)
const {
3953 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
3954 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_F16_e64;
3956 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
3957 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_F16_e64;
3959 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
3960 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_BF16_e64;
3962 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
3963 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_BF16_e64;
3965 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
3966 Opc = AMDGPU::V_SMFMAC_I32_16X16X64_I8_e64;
3968 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
3969 Opc = AMDGPU::V_SMFMAC_I32_32X32X32_I8_e64;
3971 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
3972 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_BF8_e64;
3974 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
3975 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_FP8_e64;
3977 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
3978 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_BF8_e64;
3980 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
3981 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_FP8_e64;
3983 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
3984 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_BF8_e64;
3986 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
3987 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_FP8_e64;
3989 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
3990 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_BF8_e64;
3992 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
3993 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_FP8_e64;
3995 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:
3996 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_F16_e64;
3998 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:
3999 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_F16_e64;
4001 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:
4002 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF16_e64;
4004 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:
4005 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF16_e64;
4007 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:
4008 Opc = AMDGPU::V_SMFMAC_I32_16X16X128_I8_e64;
4010 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:
4011 Opc = AMDGPU::V_SMFMAC_I32_32X32X64_I8_e64;
4013 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:
4014 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_BF8_BF8_e64;
4016 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:
4017 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_BF8_FP8_e64;
4019 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:
4020 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_FP8_BF8_e64;
4022 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:
4023 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_FP8_FP8_e64;
4025 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:
4026 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_BF8_BF8_e64;
4028 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
4029 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_BF8_FP8_e64;
4031 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
4032 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_FP8_BF8_e64;
4034 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8:
4035 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_FP8_FP8_e64;
4041 auto VDst_In =
MI.getOperand(4);
4043 MI.setDesc(TII.get(
Opc));
4044 MI.removeOperand(4);
4045 MI.removeOperand(1);
4046 MI.addOperand(VDst_In);
4047 MI.addImplicitDefUseOperands(*
MI.getMF());
4048 const MCInstrDesc &MCID =
MI.getDesc();
4050 MI.getOperand(0).setIsEarlyClobber(
true);
4055bool AMDGPUInstructionSelector::selectPermlaneSwapIntrin(
4057 if (IntrID == Intrinsic::amdgcn_permlane16_swap &&
4058 !Subtarget->hasPermlane16Swap())
4060 if (IntrID == Intrinsic::amdgcn_permlane32_swap &&
4061 !Subtarget->hasPermlane32Swap())
4064 unsigned Opcode = IntrID == Intrinsic::amdgcn_permlane16_swap
4065 ? AMDGPU::V_PERMLANE16_SWAP_B32_e64
4066 : AMDGPU::V_PERMLANE32_SWAP_B32_e64;
4068 MI.removeOperand(2);
4069 MI.setDesc(TII.get(Opcode));
4072 MachineOperand &FI =
MI.getOperand(4);
4079bool AMDGPUInstructionSelector::selectWaveAddress(
MachineInstr &
MI)
const {
4082 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
4083 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
4084 MachineBasicBlock *
MBB =
MI.getParent();
4088 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_LSHRREV_B32_e64), DstReg)
4089 .
addImm(Subtarget->getWavefrontSizeLog2())
4094 .
addImm(Subtarget->getWavefrontSizeLog2())
4098 const TargetRegisterClass &RC =
4099 IsVALU ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
4100 if (!RBI.constrainGenericRegister(DstReg, RC, *MRI))
4103 MI.eraseFromParent();
4107bool AMDGPUInstructionSelector::selectWaveShuffleIntrin(
4110 MachineBasicBlock *
MBB =
MI.getParent();
4117 const LLT DstTy = MRI->getType(DstReg);
4119 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
4120 const TargetRegisterClass *DstRC =
4121 TRI.getRegClassForSizeOnBank(DstSize, *DstRB);
4126 if (!Subtarget->supportsBPermute())
4130 if (Subtarget->supportsWaveWideBPermute()) {
4131 Register ShiftIdxReg = MRI->createVirtualRegister(DstRC);
4132 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), ShiftIdxReg)
4142 assert(Subtarget->isWave64());
4146 MRI->createVirtualRegister(TRI.getRegClass(AMDGPU::SReg_32RegClassID));
4147 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefValReg);
4149 Register UndefExecReg = MRI->createVirtualRegister(
4150 TRI.getRegClass(AMDGPU::SReg_64_XEXECRegClassID));
4151 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefExecReg);
4153 Register PoisonValReg = MRI->createVirtualRegister(DstRC);
4154 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_SET_INACTIVE_B32), PoisonValReg)
4162 Register ShiftIdxReg = MRI->createVirtualRegister(DstRC);
4163 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), ShiftIdxReg)
4167 Register PoisonIdxReg = MRI->createVirtualRegister(DstRC);
4168 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_SET_INACTIVE_B32), PoisonIdxReg)
4176 Register SameSidePermReg = MRI->createVirtualRegister(DstRC);
4177 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::DS_BPERMUTE_B32), SameSidePermReg)
4182 Register SwappedValReg = MRI->createVirtualRegister(DstRC);
4183 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_PERMLANE64_B32), SwappedValReg)
4186 Register OppSidePermReg = MRI->createVirtualRegister(DstRC);
4187 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::DS_BPERMUTE_B32), OppSidePermReg)
4192 Register WWMSwapPermReg = MRI->createVirtualRegister(DstRC);
4193 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::STRICT_WWM), WWMSwapPermReg)
4200 Register ThreadIDReg = MRI->createVirtualRegister(DstRC);
4201 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_MBCNT_LO_U32_B32_e64), ThreadIDReg)
4205 Register XORReg = MRI->createVirtualRegister(DstRC);
4210 Register ANDReg = MRI->createVirtualRegister(DstRC);
4215 Register CompareReg = MRI->createVirtualRegister(
4216 TRI.getRegClass(AMDGPU::SReg_64_XEXECRegClassID));
4217 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_CMP_EQ_U32_e64), CompareReg)
4222 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
4230 MI.eraseFromParent();
4239 unsigned NumOpcodes = 0;
4252 const uint8_t SrcBits[3] = { 0xf0, 0xcc, 0xaa };
4263 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4277 if (Src.size() == 3) {
4284 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4285 if (Src[
I] ==
LHS) {
4295 Bits = SrcBits[Src.size()];
4301 switch (
MI->getOpcode()) {
4302 case TargetOpcode::G_AND:
4303 case TargetOpcode::G_OR:
4304 case TargetOpcode::G_XOR: {
4309 if (!getOperandBits(
LHS, LHSBits) ||
4310 !getOperandBits(
RHS, RHSBits)) {
4311 Src = std::move(Backup);
4312 return std::make_pair(0, 0);
4333 uint8_t LHSBitsOrig = LHSBits;
4334 uint8_t RHSBitsOrig = RHSBits;
4338 NumOpcodes += LHSOp.first;
4339 LHSBits = LHSOp.second;
4346 NumOpcodes += RHSOp.first;
4347 RHSBits = RHSOp.second;
4351 auto dependsOnSlot = [](
uint8_t TT,
int Slot) ->
bool {
4352 if (Slot < 0 || Slot > 2)
4354 const uint8_t Masks[3] = {0x0f, 0x33, 0x55};
4355 const int Shifts[3] = {4, 2, 1};
4356 return ((TT ^ (TT >> Shifts[Slot])) & Masks[Slot]) != 0;
4362 const uint8_t SrcBitsConst[3] = {0xf0, 0xcc, 0xaa};
4369 for (
int I = 0;
I < (int)S.size();
I++) {
4370 if (Bits == SrcBitsConst[
I] && S[
I] ==
Op)
4372 if (IsNegationOp && Bits == (
uint8_t)~SrcBitsConst[
I] &&
4373 S[
I] == NegatedInner)
4384 for (
int I = 0;
I < (int)SrcAfterLHS.
size() &&
I < 3;
I++) {
4385 if (
I < (
int)Src.size() && Src[
I] != SrcAfterLHS[
I] &&
4386 dependsOnSlot(LHSBits,
I)) {
4395 if (!Stale && !RHSOp.first) {
4396 int Slot = findSlot(RHSBitsOrig,
RHS, SrcBeforeRecurse);
4398 (Slot >= (
int)Src.size() || Src[Slot] != SrcBeforeRecurse[Slot]))
4404 if (!Stale && !LHSOp.first) {
4405 int Slot = findSlot(LHSBitsOrig,
LHS, SrcBeforeRecurse);
4407 (Slot >= (
int)Src.size() || Src[Slot] != SrcBeforeRecurse[Slot]))
4412 Src = std::move(SrcBeforeRecurse);
4413 LHSBits = LHSBitsOrig;
4414 RHSBits = RHSBitsOrig;
4420 return std::make_pair(0, 0);
4424 switch (
MI->getOpcode()) {
4425 case TargetOpcode::G_AND:
4426 TTbl = LHSBits & RHSBits;
4428 case TargetOpcode::G_OR:
4429 TTbl = LHSBits | RHSBits;
4431 case TargetOpcode::G_XOR:
4432 TTbl = LHSBits ^ RHSBits;
4438 return std::make_pair(NumOpcodes + 1, TTbl);
4441bool AMDGPUInstructionSelector::selectBITOP3(
MachineInstr &
MI)
const {
4442 if (!Subtarget->hasBitOp3Insts())
4446 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
4447 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
4453 unsigned NumOpcodes;
4455 std::tie(NumOpcodes, TTbl) =
BitOp3_Op(DstReg, Src, *MRI);
4459 if (NumOpcodes < 2 || Src.empty())
4462 const bool IsB32 = MRI->getType(DstReg) ==
LLT::scalar(32);
4463 if (NumOpcodes == 2 && IsB32) {
4471 }
else if (NumOpcodes < 4) {
4478 unsigned Opc = IsB32 ? AMDGPU::V_BITOP3_B32_e64 : AMDGPU::V_BITOP3_B16_e64;
4479 if (!IsB32 && STI.hasTrue16BitInsts())
4480 Opc = STI.useRealTrue16Insts() ? AMDGPU::V_BITOP3_B16_gfx1250_t16_e64
4481 : AMDGPU::V_BITOP3_B16_gfx1250_fake16_e64;
4482 unsigned CBL = STI.getConstantBusLimit(
Opc);
4483 MachineBasicBlock *
MBB =
MI.getParent();
4486 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4487 const RegisterBank *RB = RBI.getRegBank(Src[
I], *MRI, TRI);
4488 if (RB->
getID() != AMDGPU::SGPRRegBankID)
4494 Register NewReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4505 while (Src.size() < 3)
4506 Src.push_back(Src[0]);
4523 MI.eraseFromParent();
4528bool AMDGPUInstructionSelector::selectStackRestore(
MachineInstr &
MI)
const {
4530 if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, *MRI))
4533 MachineInstr *
DefMI = MRI->getVRegDef(SrcReg);
4535 Subtarget->getTargetLowering()->getStackPointerRegisterToSaveRestore();
4537 MachineBasicBlock *
MBB =
MI.getParent();
4541 WaveAddr = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
4544 .
addImm(Subtarget->getWavefrontSizeLog2())
4551 MI.eraseFromParent();
4557 if (!
I.isPreISelOpcode()) {
4559 return selectCOPY(
I);
4563 switch (
I.getOpcode()) {
4564 case TargetOpcode::G_AND:
4565 case TargetOpcode::G_OR:
4566 case TargetOpcode::G_XOR:
4567 if (selectBITOP3(
I))
4571 return selectG_AND_OR_XOR(
I);
4572 case TargetOpcode::G_ADD:
4573 case TargetOpcode::G_SUB:
4574 case TargetOpcode::G_PTR_ADD:
4577 return selectG_ADD_SUB(
I);
4578 case TargetOpcode::G_UADDO:
4579 case TargetOpcode::G_USUBO:
4580 case TargetOpcode::G_UADDE:
4581 case TargetOpcode::G_USUBE:
4582 return selectG_UADDO_USUBO_UADDE_USUBE(
I);
4583 case AMDGPU::G_AMDGPU_MAD_U64_U32:
4584 case AMDGPU::G_AMDGPU_MAD_I64_I32:
4585 return selectG_AMDGPU_MAD_64_32(
I);
4586 case TargetOpcode::G_INTTOPTR:
4587 case TargetOpcode::G_BITCAST:
4588 case TargetOpcode::G_PTRTOINT:
4589 case TargetOpcode::G_FREEZE:
4590 return selectCOPY(
I);
4591 case TargetOpcode::G_FNEG:
4594 return selectG_FNEG(
I);
4595 case TargetOpcode::G_FABS:
4598 return selectG_FABS(
I);
4599 case TargetOpcode::G_EXTRACT:
4600 return selectG_EXTRACT(
I);
4601 case TargetOpcode::G_MERGE_VALUES:
4602 case TargetOpcode::G_CONCAT_VECTORS:
4603 return selectG_MERGE_VALUES(
I);
4604 case TargetOpcode::G_UNMERGE_VALUES:
4605 return selectG_UNMERGE_VALUES(
I);
4606 case TargetOpcode::G_BUILD_VECTOR:
4607 case TargetOpcode::G_BUILD_VECTOR_TRUNC:
4608 return selectG_BUILD_VECTOR(
I);
4609 case TargetOpcode::G_IMPLICIT_DEF:
4610 return selectG_IMPLICIT_DEF(
I);
4611 case TargetOpcode::G_INSERT:
4612 return selectG_INSERT(
I);
4613 case TargetOpcode::G_INTRINSIC:
4614 case TargetOpcode::G_INTRINSIC_CONVERGENT:
4615 return selectG_INTRINSIC(
I);
4616 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
4617 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
4618 return selectG_INTRINSIC_W_SIDE_EFFECTS(
I);
4619 case TargetOpcode::G_ICMP:
4620 case TargetOpcode::G_FCMP:
4621 if (selectG_ICMP_or_FCMP(
I))
4624 case TargetOpcode::G_LOAD:
4625 case TargetOpcode::G_ZEXTLOAD:
4626 case TargetOpcode::G_SEXTLOAD:
4627 case TargetOpcode::G_STORE:
4628 case TargetOpcode::G_ATOMIC_CMPXCHG:
4629 case TargetOpcode::G_ATOMICRMW_XCHG:
4630 case TargetOpcode::G_ATOMICRMW_ADD:
4631 case TargetOpcode::G_ATOMICRMW_SUB:
4632 case TargetOpcode::G_ATOMICRMW_AND:
4633 case TargetOpcode::G_ATOMICRMW_OR:
4634 case TargetOpcode::G_ATOMICRMW_XOR:
4635 case TargetOpcode::G_ATOMICRMW_MIN:
4636 case TargetOpcode::G_ATOMICRMW_MAX:
4637 case TargetOpcode::G_ATOMICRMW_UMIN:
4638 case TargetOpcode::G_ATOMICRMW_UMAX:
4639 case TargetOpcode::G_ATOMICRMW_UINC_WRAP:
4640 case TargetOpcode::G_ATOMICRMW_UDEC_WRAP:
4641 case TargetOpcode::G_ATOMICRMW_USUB_COND:
4642 case TargetOpcode::G_ATOMICRMW_USUB_SAT:
4643 case TargetOpcode::G_ATOMICRMW_FADD:
4644 case TargetOpcode::G_ATOMICRMW_FMIN:
4645 case TargetOpcode::G_ATOMICRMW_FMAX:
4646 return selectG_LOAD_STORE_ATOMICRMW(
I);
4647 case TargetOpcode::G_SELECT:
4648 return selectG_SELECT(
I);
4649 case TargetOpcode::G_TRUNC:
4650 return selectG_TRUNC(
I);
4651 case TargetOpcode::G_SEXT:
4652 case TargetOpcode::G_ZEXT:
4653 case TargetOpcode::G_ANYEXT:
4654 case TargetOpcode::G_SEXT_INREG:
4658 if (MRI->getType(
I.getOperand(1).getReg()) !=
LLT::scalar(1) &&
4661 return selectG_SZA_EXT(
I);
4662 case TargetOpcode::G_FPEXT:
4663 if (selectG_FPEXT(
I))
4666 case TargetOpcode::G_BRCOND:
4667 return selectG_BRCOND(
I);
4668 case TargetOpcode::G_GLOBAL_VALUE:
4669 return selectG_GLOBAL_VALUE(
I);
4670 case TargetOpcode::G_PTRMASK:
4671 return selectG_PTRMASK(
I);
4672 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4673 return selectG_EXTRACT_VECTOR_ELT(
I);
4674 case TargetOpcode::G_INSERT_VECTOR_ELT:
4675 return selectG_INSERT_VECTOR_ELT(
I);
4676 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
4677 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
4678 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET:
4679 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
4680 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
4683 assert(Intr &&
"not an image intrinsic with image pseudo");
4684 return selectImageIntrinsic(
I, Intr);
4686 case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY:
4687 case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
4688 case AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY:
4689 return selectBVHIntersectRayIntrinsic(
I);
4690 case AMDGPU::G_SBFX:
4691 case AMDGPU::G_UBFX:
4692 return selectG_SBFX_UBFX(
I);
4693 case AMDGPU::G_SI_CALL:
4694 I.setDesc(TII.get(AMDGPU::SI_CALL));
4696 case AMDGPU::G_AMDGPU_WAVE_ADDRESS:
4697 return selectWaveAddress(
I);
4698 case AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_RETURN: {
4699 I.setDesc(TII.get(AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN));
4702 case AMDGPU::G_STACKRESTORE:
4703 return selectStackRestore(
I);
4705 return selectPHI(
I);
4706 case AMDGPU::G_AMDGPU_COPY_SCC_VCC:
4707 return selectCOPY_SCC_VCC(
I);
4708 case AMDGPU::G_AMDGPU_COPY_VCC_SCC:
4709 return selectCOPY_VCC_SCC(
I);
4710 case AMDGPU::G_AMDGPU_READANYLANE:
4711 return selectReadAnyLane(
I);
4712 case TargetOpcode::G_CONSTANT:
4713 case TargetOpcode::G_FCONSTANT:
4721AMDGPUInstructionSelector::selectVCSRC(
MachineOperand &Root)
const {
4728std::pair<Register, unsigned> AMDGPUInstructionSelector::selectVOP3ModsImpl(
4729 Register Src,
bool IsCanonicalizing,
bool AllowAbs,
bool OpSel)
const {
4733 if (
MI->getOpcode() == AMDGPU::G_FNEG) {
4734 Src =
MI->getOperand(1).getReg();
4737 }
else if (
MI->getOpcode() == AMDGPU::G_FSUB && IsCanonicalizing) {
4742 if (
LHS &&
LHS->isZero()) {
4744 Src =
MI->getOperand(2).getReg();
4748 if (AllowAbs &&
MI->getOpcode() == AMDGPU::G_FABS) {
4749 Src =
MI->getOperand(1).getReg();
4756 return std::pair(Src, Mods);
4759std::pair<Register, unsigned>
4760AMDGPUInstructionSelector::selectVOP3PModsF32Impl(
Register Src)
const {
4762 std::tie(Src, Mods) = selectVOP3ModsImpl(Src);
4764 return std::pair(Src, Mods);
4767Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
4769 bool ForceVGPR)
const {
4770 if ((Mods != 0 || ForceVGPR) &&
4771 RBI.getRegBank(Src, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID) {
4778 TII.
get(AMDGPU::COPY), VGPRSrc)
4790AMDGPUInstructionSelector::selectVSRC0(
MachineOperand &Root)
const {
4792 [=](MachineInstrBuilder &MIB) { MIB.
add(Root); }
4797AMDGPUInstructionSelector::selectVOP3Mods0(
MachineOperand &Root)
const {
4800 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
4803 [=](MachineInstrBuilder &MIB) {
4804 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4806 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
4807 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); },
4808 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
4813AMDGPUInstructionSelector::selectVOP3BMods0(
MachineOperand &Root)
const {
4816 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
4821 [=](MachineInstrBuilder &MIB) {
4822 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4824 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
4825 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); },
4826 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
4831AMDGPUInstructionSelector::selectVOP3OMods(
MachineOperand &Root)
const {
4833 [=](MachineInstrBuilder &MIB) { MIB.
add(Root); },
4834 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); },
4835 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
4840AMDGPUInstructionSelector::selectVOP3Mods(
MachineOperand &Root)
const {
4843 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
4846 [=](MachineInstrBuilder &MIB) {
4847 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4849 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4854AMDGPUInstructionSelector::selectVOP3ModsNonCanonicalizing(
4858 std::tie(Src, Mods) =
4859 selectVOP3ModsImpl(Root.
getReg(),
false);
4862 [=](MachineInstrBuilder &MIB) {
4863 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4865 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4870AMDGPUInstructionSelector::selectVOP3BMods(
MachineOperand &Root)
const {
4873 std::tie(Src, Mods) =
4874 selectVOP3ModsImpl(Root.
getReg(),
true,
4878 [=](MachineInstrBuilder &MIB) {
4879 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4881 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4886AMDGPUInstructionSelector::selectVOP3NoMods(
MachineOperand &Root)
const {
4889 if (
Def->getOpcode() == AMDGPU::G_FNEG ||
Def->getOpcode() == AMDGPU::G_FABS)
4892 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
4917 if (
MI->getOpcode() != AMDGPU::G_TRUNC)
4922 return DstSize * 2 == SrcSize;
4928 if (
MI->getOpcode() != AMDGPU::G_LSHR)
4932 std::optional<ValueAndVReg> ShiftAmt;
4933 if (
mi_match(
MI->getOperand(0).getReg(), MRI,
4936 unsigned Shift = ShiftAmt->Value.getZExtValue();
4937 return Shift * 2 == SrcSize;
4945 if (
MI->getOpcode() != AMDGPU::G_SHL)
4949 std::optional<ValueAndVReg> ShiftAmt;
4950 if (
mi_match(
MI->getOperand(0).getReg(), MRI,
4953 unsigned Shift = ShiftAmt->Value.getZExtValue();
4954 return Shift * 2 == SrcSize;
4962 if (
MI->getOpcode() != AMDGPU::G_UNMERGE_VALUES)
4964 return MI->getNumOperands() == 3 &&
MI->getOperand(0).isDef() &&
4965 MI->getOperand(1).isDef() && !
MI->getOperand(2).isDef();
5135static std::optional<std::pair<Register, SrcStatus>>
5140 unsigned Opc =
MI->getOpcode();
5144 case AMDGPU::G_BITCAST:
5145 return std::optional<std::pair<Register, SrcStatus>>(
5146 {
MI->getOperand(1).getReg(), Curr.second});
5148 if (
MI->getOperand(1).getReg().isPhysical())
5149 return std::nullopt;
5150 return std::optional<std::pair<Register, SrcStatus>>(
5151 {
MI->getOperand(1).getReg(), Curr.second});
5152 case AMDGPU::G_FNEG: {
5155 return std::nullopt;
5156 return std::optional<std::pair<Register, SrcStatus>>(
5157 {
MI->getOperand(1).getReg(), Stat});
5164 switch (Curr.second) {
5167 return std::optional<std::pair<Register, SrcStatus>>(
5170 if (Curr.first ==
MI->getOperand(0).getReg())
5171 return std::optional<std::pair<Register, SrcStatus>>(
5173 return std::optional<std::pair<Register, SrcStatus>>(
5185 return std::optional<std::pair<Register, SrcStatus>>(
5189 if (Curr.first ==
MI->getOperand(0).getReg())
5190 return std::optional<std::pair<Register, SrcStatus>>(
5192 return std::optional<std::pair<Register, SrcStatus>>(
5198 return std::optional<std::pair<Register, SrcStatus>>(
5203 return std::optional<std::pair<Register, SrcStatus>>(
5208 return std::optional<std::pair<Register, SrcStatus>>(
5213 return std::optional<std::pair<Register, SrcStatus>>(
5219 return std::nullopt;
5229 bool HasNeg =
false;
5231 bool HasOpsel =
true;
5236 unsigned Opc =
MI->getOpcode();
5238 if (
Opc == TargetOpcode::G_INTRINSIC) {
5241 if (IntrinsicID == Intrinsic::amdgcn_fdot2)
5268 while (
Depth <= MaxDepth && Curr.has_value()) {
5271 Statlist.push_back(Curr.value());
5278static std::pair<Register, SrcStatus>
5285 while (
Depth <= MaxDepth && Curr.has_value()) {
5291 LastSameOrNeg = Curr.value();
5296 return LastSameOrNeg;
5303 return Width1 == Width2;
5338 return isSameBitWidth(NewReg, RootReg, MRI) && IsHalfState(LoStat) &&
5339 IsHalfState(HiStat);
5342std::pair<Register, unsigned> AMDGPUInstructionSelector::selectVOP3PModsImpl(
5348 return {RootReg, Mods};
5351 SearchOptions SO(RootReg, MRI);
5364 if (MRI.getType(RootReg).getSizeInBits() == 128) {
5366 return {Stat.first, Mods};
5369 MachineInstr *
MI = MRI.getVRegDef(Stat.first);
5371 if (
MI->getOpcode() != AMDGPU::G_BUILD_VECTOR ||
MI->getNumOperands() != 3 ||
5372 (IsDOT && Subtarget->hasDOTOpSelHazard())) {
5374 return {Stat.first, Mods};
5380 if (StatlistHi.
empty()) {
5382 return {Stat.first, Mods};
5388 if (StatlistLo.
empty()) {
5390 return {Stat.first, Mods};
5393 for (
int I = StatlistHi.
size() - 1;
I >= 0;
I--) {
5394 for (
int J = StatlistLo.
size() - 1; J >= 0; J--) {
5395 if (StatlistHi[
I].first == StatlistLo[J].first &&
5397 StatlistHi[
I].first, RootReg, TII, MRI))
5398 return {StatlistHi[
I].first,
5399 updateMods(StatlistHi[
I].second, StatlistLo[J].second, Mods)};
5405 return {Stat.first, Mods};
5415 return RB->
getID() == RBNo;
5432 if (
checkRB(RootReg, AMDGPU::SGPRRegBankID, RBI, MRI,
TRI) ||
5433 checkRB(NewReg, AMDGPU::VGPRRegBankID, RBI, MRI,
TRI))
5437 if (
MI->getOpcode() == AMDGPU::COPY && NewReg ==
MI->getOperand(1).getReg()) {
5446 BuildMI(*BB,
MI,
MI->getDebugLoc(),
TII.get(AMDGPU::COPY), DstReg)
5454AMDGPUInstructionSelector::selectVOP3PRetHelper(
MachineOperand &Root,
5459 std::tie(
Reg, Mods) = selectVOP3PModsImpl(Root.
getReg(), MRI, IsDOT);
5463 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
5464 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
5469AMDGPUInstructionSelector::selectVOP3PMods(
MachineOperand &Root)
const {
5471 return selectVOP3PRetHelper(Root);
5475AMDGPUInstructionSelector::selectVOP3PModsDOT(
MachineOperand &Root)
const {
5477 return selectVOP3PRetHelper(Root,
true);
5481AMDGPUInstructionSelector::selectVOP3PNoModsDOT(
MachineOperand &Root)
const {
5485 std::tie(Src, Mods) = selectVOP3PModsImpl(Root.
getReg(), MRI,
true );
5489 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); }}};
5493AMDGPUInstructionSelector::selectVOP3PModsF32(
MachineOperand &Root)
const {
5496 std::tie(Src, Mods) = selectVOP3PModsF32Impl(Root.
getReg());
5499 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5500 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
5505AMDGPUInstructionSelector::selectVOP3PNoModsF32(
MachineOperand &Root)
const {
5508 std::tie(Src, Mods) = selectVOP3PModsF32Impl(Root.
getReg());
5512 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); }}};
5516AMDGPUInstructionSelector::selectWMMAOpSelVOP3PMods(
5519 "expected i1 value");
5525 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
5533 switch (Elts.
size()) {
5535 DstRegClass = &AMDGPU::VReg_256RegClass;
5538 DstRegClass = &AMDGPU::VReg_128RegClass;
5541 DstRegClass = &AMDGPU::VReg_64RegClass;
5548 auto MIB =
B.buildInstr(AMDGPU::REG_SEQUENCE)
5550 for (
unsigned i = 0; i < Elts.
size(); ++i) {
5561 if (ModOpcode == TargetOpcode::G_FNEG) {
5565 for (
auto El : Elts) {
5571 if (Elts.size() != NegAbsElts.
size()) {
5580 assert(ModOpcode == TargetOpcode::G_FABS);
5588AMDGPUInstructionSelector::selectWMMAModsF32NegAbs(
MachineOperand &Root)
const {
5594 assert(BV->getNumSources() > 0);
5596 MachineInstr *ElF32 = MRI->getVRegDef(BV->getSourceReg(0));
5597 unsigned ModOpcode = (ElF32->
getOpcode() == AMDGPU::G_FNEG)
5600 for (
unsigned i = 0; i < BV->getNumSources(); ++i) {
5601 ElF32 = MRI->getVRegDef(BV->getSourceReg(i));
5608 if (BV->getNumSources() == EltsF32.
size()) {
5614 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5615 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }}};
5619AMDGPUInstructionSelector::selectWMMAModsF16Neg(
MachineOperand &Root)
const {
5625 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
5633 if (CV->getNumSources() == EltsV2F16.
size()) {
5640 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5641 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }}};
5645AMDGPUInstructionSelector::selectWMMAModsF16NegAbs(
MachineOperand &Root)
const {
5651 assert(CV->getNumSources() > 0);
5652 MachineInstr *ElV2F16 = MRI->getVRegDef(CV->getSourceReg(0));
5654 unsigned ModOpcode = (ElV2F16->
getOpcode() == AMDGPU::G_FNEG)
5658 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
5659 ElV2F16 = MRI->getVRegDef(CV->getSourceReg(i));
5666 if (CV->getNumSources() == EltsV2F16.
size()) {
5673 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5674 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }}};
5678AMDGPUInstructionSelector::selectWMMAVISrc(
MachineOperand &Root)
const {
5679 std::optional<FPValueAndVReg> FPValReg;
5681 if (TII.isInlineConstant(FPValReg->Value)) {
5682 return {{[=](MachineInstrBuilder &MIB) {
5683 MIB.
addImm(FPValReg->Value.bitcastToAPInt().getSExtValue());
5693 if (TII.isInlineConstant(ICst)) {
5703AMDGPUInstructionSelector::selectSWMMACIndex8(
MachineOperand &Root)
const {
5709 std::optional<ValueAndVReg> ShiftAmt;
5711 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
5712 ShiftAmt->Value.getZExtValue() % 8 == 0) {
5713 Key = ShiftAmt->Value.getZExtValue() / 8;
5718 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5719 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Key); }
5724AMDGPUInstructionSelector::selectSWMMACIndex16(
MachineOperand &Root)
const {
5731 std::optional<ValueAndVReg> ShiftAmt;
5733 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
5734 ShiftAmt->Value.getZExtValue() == 16) {
5740 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5741 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Key); }
5746AMDGPUInstructionSelector::selectSWMMACIndex32(
MachineOperand &Root)
const {
5753 S32 = matchAnyExtendFromS32(Src);
5757 if (
Def->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {
5762 Src =
Def->getOperand(2).getReg();
5769 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5770 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Key); }
5775AMDGPUInstructionSelector::selectVOP3OpSelMods(
MachineOperand &Root)
const {
5778 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
5782 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5783 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
5789AMDGPUInstructionSelector::selectVINTERPMods(
MachineOperand &Root)
const {
5792 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
5798 [=](MachineInstrBuilder &MIB) {
5800 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
5802 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
5807AMDGPUInstructionSelector::selectVINTERPModsHi(
MachineOperand &Root)
const {
5810 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
5816 [=](MachineInstrBuilder &MIB) {
5818 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
5820 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
5827bool AMDGPUInstructionSelector::selectScaleOffset(
MachineOperand &Root,
5829 bool IsSigned)
const {
5830 if (!Subtarget->hasScaleOffset())
5834 MachineMemOperand *MMO = *
MI.memoperands_begin();
5846 OffsetReg =
Def->Reg;
5861 m_BinOp(IsSigned ? AMDGPU::S_MUL_I64_I32_PSEUDO : AMDGPU::S_MUL_U64,
5865 (
Mul->getOpcode() == (IsSigned ? AMDGPU::G_AMDGPU_MAD_I64_I32
5866 : AMDGPU::G_AMDGPU_MAD_U64_U32) ||
5867 (IsSigned &&
Mul->getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32 &&
5868 VT->signBitIsZero(
Mul->getOperand(2).getReg()))) &&
5881bool AMDGPUInstructionSelector::selectSmrdOffset(
MachineOperand &Root,
5885 bool *ScaleOffset)
const {
5887 MachineBasicBlock *
MBB =
MI->getParent();
5892 getAddrModeInfo(*
MI, *MRI, AddrInfo);
5894 if (AddrInfo.
empty())
5897 const GEPInfo &GEPI = AddrInfo[0];
5898 std::optional<int64_t> EncodedImm;
5901 *ScaleOffset =
false;
5906 if (GEPI.SgprParts.size() == 1 && GEPI.Imm != 0 && EncodedImm &&
5907 AddrInfo.
size() > 1) {
5908 const GEPInfo &GEPI2 = AddrInfo[1];
5909 if (GEPI2.SgprParts.size() == 2 && GEPI2.Imm == 0) {
5910 Register OffsetReg = GEPI2.SgprParts[1];
5913 selectScaleOffset(Root, OffsetReg,
false );
5914 OffsetReg = matchZeroExtendFromS32OrS32(OffsetReg);
5916 Base = GEPI2.SgprParts[0];
5917 *SOffset = OffsetReg;
5926 auto SKnown =
VT->getKnownBits(*SOffset);
5927 if (*
Offset + SKnown.getMinValue().getSExtValue() < 0)
5939 if (
Offset && GEPI.SgprParts.size() == 1 && EncodedImm) {
5940 Base = GEPI.SgprParts[0];
5946 if (SOffset && GEPI.SgprParts.size() == 1 &&
isUInt<32>(GEPI.Imm) &&
5952 Base = GEPI.SgprParts[0];
5953 *SOffset = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5954 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), *SOffset)
5959 if (SOffset && GEPI.SgprParts.size() && GEPI.Imm == 0) {
5960 Register OffsetReg = GEPI.SgprParts[1];
5962 *ScaleOffset = selectScaleOffset(Root, OffsetReg,
false );
5963 OffsetReg = matchZeroExtendFromS32OrS32(OffsetReg);
5965 Base = GEPI.SgprParts[0];
5966 *SOffset = OffsetReg;
5975AMDGPUInstructionSelector::selectSmrdImm(
MachineOperand &Root)
const {
5978 if (!selectSmrdOffset(Root,
Base,
nullptr, &
Offset,
5980 return std::nullopt;
5982 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(
Base); },
5983 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Offset); }}};
5987AMDGPUInstructionSelector::selectSmrdImm32(
MachineOperand &Root)
const {
5989 getAddrModeInfo(*Root.
getParent(), *MRI, AddrInfo);
5991 if (AddrInfo.
empty() || AddrInfo[0].SgprParts.size() != 1)
5992 return std::nullopt;
5994 const GEPInfo &GEPInfo = AddrInfo[0];
5995 Register PtrReg = GEPInfo.SgprParts[0];
5996 std::optional<int64_t> EncodedImm =
5999 return std::nullopt;
6002 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrReg); },
6003 [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedImm); }
6008AMDGPUInstructionSelector::selectSmrdSgpr(
MachineOperand &Root)
const {
6011 if (!selectSmrdOffset(Root,
Base, &SOffset,
nullptr,
6013 return std::nullopt;
6016 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(
Base); },
6017 [=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); },
6018 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }}};
6022AMDGPUInstructionSelector::selectSmrdSgprImm(
MachineOperand &Root)
const {
6026 if (!selectSmrdOffset(Root,
Base, &SOffset, &
Offset, &ScaleOffset))
6027 return std::nullopt;
6030 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(
Base); },
6031 [=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); },
6033 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }}};
6036std::pair<Register, int> AMDGPUInstructionSelector::selectFlatOffsetImpl(
6042 if (!STI.hasFlatInstOffsets())
6046 int64_t ConstOffset;
6048 std::tie(PtrBase, ConstOffset, IsInBounds) =
6049 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
6055 if (ConstOffset == 0 ||
6057 !isFlatScratchBaseLegal(Root.
getReg())) ||
6061 unsigned AddrSpace = (*
MI->memoperands_begin())->getAddrSpace();
6062 if (!TII.isLegalFLATOffset(ConstOffset, AddrSpace, FlatVariant))
6065 return std::pair(PtrBase, ConstOffset);
6069AMDGPUInstructionSelector::selectFlatOffset(
MachineOperand &Root)
const {
6073 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrWithOffset.first); },
6074 [=](MachineInstrBuilder &MIB) { MIB.
addImm(PtrWithOffset.second); },
6079AMDGPUInstructionSelector::selectGlobalOffset(
MachineOperand &Root)
const {
6080 auto PtrWithOffset =
6084 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrWithOffset.first); },
6085 [=](MachineInstrBuilder &MIB) { MIB.
addImm(PtrWithOffset.second); },
6090AMDGPUInstructionSelector::selectScratchOffset(
MachineOperand &Root)
const {
6091 auto PtrWithOffset =
6095 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrWithOffset.first); },
6096 [=](MachineInstrBuilder &MIB) { MIB.
addImm(PtrWithOffset.second); },
6102AMDGPUInstructionSelector::selectGlobalSAddr(
MachineOperand &Root,
6104 bool NeedIOffset)
const {
6107 int64_t ConstOffset;
6108 int64_t ImmOffset = 0;
6112 std::tie(PtrBase, ConstOffset, std::ignore) =
6113 getPtrBaseWithConstantOffset(Addr, *MRI);
6115 if (ConstOffset != 0) {
6120 ImmOffset = ConstOffset;
6123 if (isSGPR(PtrBaseDef->Reg)) {
6124 if (ConstOffset > 0) {
6130 int64_t SplitImmOffset = 0, RemainderOffset = ConstOffset;
6132 std::tie(SplitImmOffset, RemainderOffset) =
6137 if (Subtarget->hasSignedGVSOffset() ?
isInt<32>(RemainderOffset)
6140 MachineBasicBlock *
MBB =
MI->getParent();
6142 MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
6144 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
6146 .
addImm(RemainderOffset);
6150 [=](MachineInstrBuilder &MIB) {
6153 [=](MachineInstrBuilder &MIB) {
6156 [=](MachineInstrBuilder &MIB) { MIB.
addImm(SplitImmOffset); },
6157 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); },
6160 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrBase); },
6161 [=](MachineInstrBuilder &MIB) {
6164 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); },
6174 unsigned NumLiterals =
6175 !TII.isInlineConstant(APInt(32,
Lo_32(ConstOffset))) +
6176 !TII.isInlineConstant(APInt(32,
Hi_32(ConstOffset)));
6177 if (STI.getConstantBusLimit(AMDGPU::V_ADD_U32_e64) > NumLiterals)
6178 return std::nullopt;
6185 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
6190 if (isSGPR(SAddr)) {
6191 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
6195 bool ScaleOffset = selectScaleOffset(Root, PtrBaseOffset,
6196 Subtarget->hasSignedGVSOffset());
6197 if (
Register VOffset = matchExtendFromS32OrS32(
6198 PtrBaseOffset, Subtarget->hasSignedGVSOffset())) {
6200 return {{[=](MachineInstrBuilder &MIB) {
6203 [=](MachineInstrBuilder &MIB) {
6206 [=](MachineInstrBuilder &MIB) {
6209 [=](MachineInstrBuilder &MIB) {
6213 return {{[=](MachineInstrBuilder &MIB) {
6216 [=](MachineInstrBuilder &MIB) {
6219 [=](MachineInstrBuilder &MIB) {
6229 if (AddrDef->MI->getOpcode() == AMDGPU::G_IMPLICIT_DEF ||
6230 AddrDef->MI->getOpcode() == AMDGPU::G_CONSTANT || !isSGPR(AddrDef->Reg))
6231 return std::nullopt;
6236 MachineBasicBlock *
MBB =
MI->getParent();
6237 Register VOffset = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
6239 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32), VOffset)
6244 [=](MachineInstrBuilder &MIB) { MIB.
addReg(AddrDef->Reg); },
6245 [=](MachineInstrBuilder &MIB) { MIB.
addReg(VOffset); },
6246 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); },
6247 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); }
6250 [=](MachineInstrBuilder &MIB) { MIB.
addReg(AddrDef->Reg); },
6251 [=](MachineInstrBuilder &MIB) { MIB.
addReg(VOffset); },
6252 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); }
6257AMDGPUInstructionSelector::selectGlobalSAddr(
MachineOperand &Root)
const {
6258 return selectGlobalSAddr(Root, 0);
6262AMDGPUInstructionSelector::selectGlobalSAddrCPol(
MachineOperand &Root)
const {
6268 return selectGlobalSAddr(Root, PassedCPol);
6272AMDGPUInstructionSelector::selectGlobalSAddrCPolM0(
MachineOperand &Root)
const {
6278 return selectGlobalSAddr(Root, PassedCPol);
6282AMDGPUInstructionSelector::selectGlobalSAddrGLC(
MachineOperand &Root)
const {
6287AMDGPUInstructionSelector::selectGlobalSAddrNoIOffset(
6294 return selectGlobalSAddr(Root, PassedCPol,
false);
6298AMDGPUInstructionSelector::selectGlobalSAddrNoIOffsetM0(
6305 return selectGlobalSAddr(Root, PassedCPol,
false);
6309AMDGPUInstructionSelector::selectScratchSAddr(
MachineOperand &Root)
const {
6312 int64_t ConstOffset;
6313 int64_t ImmOffset = 0;
6317 std::tie(PtrBase, ConstOffset, std::ignore) =
6318 getPtrBaseWithConstantOffset(Addr, *MRI);
6320 if (ConstOffset != 0 && isFlatScratchBaseLegal(Addr) &&
6324 ImmOffset = ConstOffset;
6328 if (AddrDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
6329 int FI = AddrDef->MI->getOperand(1).
getIndex();
6332 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); }
6338 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
6339 Register LHS = AddrDef->MI->getOperand(1).getReg();
6340 Register RHS = AddrDef->MI->getOperand(2).getReg();
6344 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX &&
6345 isSGPR(RHSDef->Reg)) {
6346 int FI = LHSDef->MI->getOperand(1).getIndex();
6350 SAddr = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6352 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_I32), SAddr)
6360 return std::nullopt;
6363 [=](MachineInstrBuilder &MIB) { MIB.
addReg(SAddr); },
6364 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); }
6369bool AMDGPUInstructionSelector::checkFlatScratchSVSSwizzleBug(
6371 if (!Subtarget->hasFlatScratchSVSSwizzleBug())
6377 auto VKnown =
VT->getKnownBits(VAddr);
6380 uint64_t VMax = VKnown.getMaxValue().getZExtValue();
6381 uint64_t
SMax = SKnown.getMaxValue().getZExtValue();
6382 return (VMax & 3) + (
SMax & 3) >= 4;
6386AMDGPUInstructionSelector::selectScratchSVAddr(
MachineOperand &Root)
const {
6389 int64_t ConstOffset;
6390 int64_t ImmOffset = 0;
6394 std::tie(PtrBase, ConstOffset, std::ignore) =
6395 getPtrBaseWithConstantOffset(Addr, *MRI);
6398 if (ConstOffset != 0 &&
6402 ImmOffset = ConstOffset;
6406 if (AddrDef->MI->getOpcode() != AMDGPU::G_PTR_ADD)
6407 return std::nullopt;
6409 Register RHS = AddrDef->MI->getOperand(2).getReg();
6410 if (RBI.getRegBank(
RHS, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID)
6411 return std::nullopt;
6413 Register LHS = AddrDef->MI->getOperand(1).getReg();
6416 if (OrigAddr != Addr) {
6417 if (!isFlatScratchBaseLegalSVImm(OrigAddr))
6418 return std::nullopt;
6420 if (!isFlatScratchBaseLegalSV(OrigAddr))
6421 return std::nullopt;
6424 if (checkFlatScratchSVSSwizzleBug(
RHS,
LHS, ImmOffset))
6425 return std::nullopt;
6427 unsigned CPol = selectScaleOffset(Root,
RHS,
true )
6431 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
6432 int FI = LHSDef->MI->getOperand(1).getIndex();
6434 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
RHS); },
6436 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); },
6437 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }
6446 return std::nullopt;
6449 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
RHS); },
6450 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
LHS); },
6451 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); },
6452 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }
6457AMDGPUInstructionSelector::selectMUBUFScratchOffen(
MachineOperand &Root)
const {
6459 MachineBasicBlock *
MBB =
MI->getParent();
6461 const SIMachineFunctionInfo *
Info =
MF->getInfo<SIMachineFunctionInfo>();
6466 Register HighBits = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
6471 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
6475 return {{[=](MachineInstrBuilder &MIB) {
6478 [=](MachineInstrBuilder &MIB) {
6481 [=](MachineInstrBuilder &MIB) {
6486 [=](MachineInstrBuilder &MIB) {
6495 std::optional<int> FI;
6498 const MachineInstr *RootDef = MRI->getVRegDef(Root.
getReg());
6500 int64_t ConstOffset;
6501 std::tie(PtrBase, ConstOffset, std::ignore) =
6502 getPtrBaseWithConstantOffset(VAddr, *MRI);
6503 if (ConstOffset != 0) {
6504 if (TII.isLegalMUBUFImmOffset(ConstOffset) &&
6505 (!STI.privateMemoryResourceIsRangeChecked() ||
6506 VT->signBitIsZero(PtrBase))) {
6507 const MachineInstr *PtrBaseDef = MRI->getVRegDef(PtrBase);
6508 if (PtrBaseDef->
getOpcode() == AMDGPU::G_FRAME_INDEX)
6514 }
else if (RootDef->
getOpcode() == AMDGPU::G_FRAME_INDEX) {
6518 return {{[=](MachineInstrBuilder &MIB) {
6521 [=](MachineInstrBuilder &MIB) {
6527 [=](MachineInstrBuilder &MIB) {
6532 [=](MachineInstrBuilder &MIB) {
6537bool AMDGPUInstructionSelector::isDSOffsetLegal(
Register Base,
6542 if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled())
6547 return VT->signBitIsZero(
Base);
6550bool AMDGPUInstructionSelector::isDSOffset2Legal(
Register Base, int64_t Offset0,
6552 unsigned Size)
const {
6553 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
6558 if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled())
6563 return VT->signBitIsZero(
Base);
6568 return Addr->
getOpcode() == TargetOpcode::G_OR ||
6569 (Addr->
getOpcode() == TargetOpcode::G_PTR_ADD &&
6576bool AMDGPUInstructionSelector::isFlatScratchBaseLegal(
Register Addr)
const {
6584 if (STI.hasSignedScratchOffsets())
6590 if (AddrMI->
getOpcode() == TargetOpcode::G_PTR_ADD) {
6591 std::optional<ValueAndVReg> RhsValReg =
6597 if (RhsValReg && RhsValReg->Value.getSExtValue() < 0 &&
6598 RhsValReg->Value.getSExtValue() > -0x40000000)
6602 return VT->signBitIsZero(
LHS);
6607bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSV(
Register Addr)
const {
6615 if (STI.hasSignedScratchOffsets())
6620 return VT->signBitIsZero(
RHS) &&
VT->signBitIsZero(
LHS);
6625bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSVImm(
6629 if (STI.hasSignedScratchOffsets())
6634 std::optional<DefinitionAndSourceRegister> BaseDef =
6636 std::optional<ValueAndVReg> RHSOffset =
6646 (RHSOffset->Value.getSExtValue() < 0 &&
6647 RHSOffset->Value.getSExtValue() > -0x40000000)))
6650 Register LHS = BaseDef->MI->getOperand(1).getReg();
6651 Register RHS = BaseDef->MI->getOperand(2).getReg();
6652 return VT->signBitIsZero(
RHS) &&
VT->signBitIsZero(
LHS);
6655bool AMDGPUInstructionSelector::isUnneededShiftMask(
const MachineInstr &
MI,
6656 unsigned ShAmtBits)
const {
6657 assert(
MI.getOpcode() == TargetOpcode::G_AND);
6659 std::optional<APInt>
RHS =
6664 if (
RHS->countr_one() >= ShAmtBits)
6667 const APInt &LHSKnownZeros =
VT->getKnownZeroes(
MI.getOperand(1).getReg());
6668 return (LHSKnownZeros | *
RHS).countr_one() >= ShAmtBits;
6672AMDGPUInstructionSelector::selectMUBUFScratchOffset(
6675 const SIMachineFunctionInfo *
Info =
MF->getInfo<SIMachineFunctionInfo>();
6677 std::optional<DefinitionAndSourceRegister>
Def =
6679 assert(Def &&
"this shouldn't be an optional result");
6684 [=](MachineInstrBuilder &MIB) {
6687 [=](MachineInstrBuilder &MIB) {
6690 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
6701 if (!TII.isLegalMUBUFImmOffset(
Offset))
6709 [=](MachineInstrBuilder &MIB) {
6712 [=](MachineInstrBuilder &MIB) {
6720 !TII.isLegalMUBUFImmOffset(
Offset))
6724 [=](MachineInstrBuilder &MIB) {
6727 [=](MachineInstrBuilder &MIB) {
6734std::pair<Register, unsigned>
6735AMDGPUInstructionSelector::selectDS1Addr1OffsetImpl(
MachineOperand &Root)
const {
6736 const MachineInstr *RootDef = MRI->getVRegDef(Root.
getReg());
6737 int64_t ConstAddr = 0;
6741 std::tie(PtrBase,
Offset, std::ignore) =
6742 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
6745 if (isDSOffsetLegal(PtrBase,
Offset)) {
6747 return std::pair(PtrBase,
Offset);
6749 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
6758 return std::pair(Root.
getReg(), 0);
6762AMDGPUInstructionSelector::selectDS1Addr1Offset(
MachineOperand &Root)
const {
6765 std::tie(
Reg,
Offset) = selectDS1Addr1OffsetImpl(Root);
6767 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
6773AMDGPUInstructionSelector::selectDS64Bit4ByteAligned(
MachineOperand &Root)
const {
6774 return selectDSReadWrite2(Root, 4);
6778AMDGPUInstructionSelector::selectDS128Bit8ByteAligned(
MachineOperand &Root)
const {
6779 return selectDSReadWrite2(Root, 8);
6783AMDGPUInstructionSelector::selectDSReadWrite2(
MachineOperand &Root,
6784 unsigned Size)
const {
6789 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
6791 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Offset+1); }
6795std::pair<Register, unsigned>
6796AMDGPUInstructionSelector::selectDSReadWrite2Impl(
MachineOperand &Root,
6797 unsigned Size)
const {
6798 const MachineInstr *RootDef = MRI->getVRegDef(Root.
getReg());
6799 int64_t ConstAddr = 0;
6803 std::tie(PtrBase,
Offset, std::ignore) =
6804 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
6807 int64_t OffsetValue0 =
Offset;
6809 if (isDSOffset2Legal(PtrBase, OffsetValue0, OffsetValue1,
Size)) {
6811 return std::pair(PtrBase, OffsetValue0 /
Size);
6813 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
6821 return std::pair(Root.
getReg(), 0);
6829std::tuple<Register, int64_t, bool>
6830AMDGPUInstructionSelector::getPtrBaseWithConstantOffset(
6833 if (RootI->
getOpcode() != TargetOpcode::G_PTR_ADD)
6834 return {Root, 0,
false};
6837 std::optional<ValueAndVReg> MaybeOffset =
6840 return {Root, 0,
false};
6860 B.buildInstr(AMDGPU::S_MOV_B32)
6863 B.buildInstr(AMDGPU::S_MOV_B32)
6870 B.buildInstr(AMDGPU::REG_SEQUENCE)
6873 .addImm(AMDGPU::sub0)
6875 .addImm(AMDGPU::sub1);
6880 B.buildInstr(AMDGPU::S_MOV_B64)
6885 B.buildInstr(AMDGPU::REG_SEQUENCE)
6888 .addImm(AMDGPU::sub0_sub1)
6890 .addImm(AMDGPU::sub2_sub3);
6897 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
6906 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
6913AMDGPUInstructionSelector::MUBUFAddressData
6914AMDGPUInstructionSelector::parseMUBUFAddress(
Register Src)
const {
6915 MUBUFAddressData
Data;
6921 std::tie(PtrBase,
Offset, std::ignore) =
6922 getPtrBaseWithConstantOffset(Src, *MRI);
6928 if (MachineInstr *InputAdd
6930 Data.N2 = InputAdd->getOperand(1).getReg();
6931 Data.N3 = InputAdd->getOperand(2).getReg();
6946bool AMDGPUInstructionSelector::shouldUseAddr64(MUBUFAddressData Addr)
const {
6952 const RegisterBank *N0Bank = RBI.getRegBank(Addr.N0, *MRI, TRI);
6953 return N0Bank->
getID() == AMDGPU::VGPRRegBankID;
6959void AMDGPUInstructionSelector::splitIllegalMUBUFOffset(
6961 if (TII.isLegalMUBUFImmOffset(ImmOffset))
6965 SOffset = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6966 B.buildInstr(AMDGPU::S_MOV_B32)
6972bool AMDGPUInstructionSelector::selectMUBUFAddr64Impl(
6977 if (!STI.hasAddr64() || STI.useFlatForGlobal())
6980 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
6981 if (!shouldUseAddr64(AddrData))
6987 Offset = AddrData.Offset;
6993 if (RBI.getRegBank(N2, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
6995 if (RBI.getRegBank(N3, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
7008 }
else if (RBI.getRegBank(N0, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
7019 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
7023bool AMDGPUInstructionSelector::selectMUBUFOffsetImpl(
7028 if (STI.useFlatForGlobal())
7031 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
7032 if (shouldUseAddr64(AddrData))
7038 Offset = AddrData.Offset;
7044 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
7049AMDGPUInstructionSelector::selectMUBUFAddr64(
MachineOperand &Root)
const {
7055 if (!selectMUBUFAddr64Impl(Root, VAddr, RSrcReg, SOffset,
Offset))
7061 [=](MachineInstrBuilder &MIB) {
7064 [=](MachineInstrBuilder &MIB) {
7067 [=](MachineInstrBuilder &MIB) {
7070 else if (STI.hasRestrictedSOffset())
7071 MIB.
addReg(AMDGPU::SGPR_NULL);
7075 [=](MachineInstrBuilder &MIB) {
7085AMDGPUInstructionSelector::selectMUBUFOffset(
MachineOperand &Root)
const {
7090 if (!selectMUBUFOffsetImpl(Root, RSrcReg, SOffset,
Offset))
7094 [=](MachineInstrBuilder &MIB) {
7097 [=](MachineInstrBuilder &MIB) {
7100 else if (STI.hasRestrictedSOffset())
7101 MIB.
addReg(AMDGPU::SGPR_NULL);
7113AMDGPUInstructionSelector::selectBUFSOffset(
MachineOperand &Root)
const {
7118 SOffset = AMDGPU::SGPR_NULL;
7120 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); }}};
7124static std::optional<uint64_t>
7128 if (!OffsetVal || !
isInt<32>(*OffsetVal))
7129 return std::nullopt;
7130 return Lo_32(*OffsetVal);
7134AMDGPUInstructionSelector::selectSMRDBufferImm(
MachineOperand &Root)
const {
7135 std::optional<uint64_t> OffsetVal =
7140 std::optional<int64_t> EncodedImm =
7145 return {{ [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedImm); } }};
7149AMDGPUInstructionSelector::selectSMRDBufferImm32(
MachineOperand &Root)
const {
7156 std::optional<int64_t> EncodedImm =
7161 return {{ [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedImm); } }};
7165AMDGPUInstructionSelector::selectSMRDBufferSgprImm(
MachineOperand &Root)
const {
7173 return std::nullopt;
7175 std::optional<int64_t> EncodedOffset =
7178 return std::nullopt;
7181 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); },
7182 [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedOffset); }}};
7185std::pair<Register, unsigned>
7186AMDGPUInstructionSelector::selectVOP3PMadMixModsImpl(
MachineOperand &Root,
7187 bool &Matched)
const {
7192 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
7202 const auto CheckAbsNeg = [&]() {
7207 std::tie(Src, ModsTmp) = selectVOP3ModsImpl(Src);
7238AMDGPUInstructionSelector::selectVOP3PMadMixModsExt(
7243 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
7248 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
7249 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
7254AMDGPUInstructionSelector::selectVOP3PMadMixMods(
MachineOperand &Root)
const {
7258 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
7261 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
7262 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
7266bool AMDGPUInstructionSelector::selectSBarrierSignalIsfirst(
7270 Register CCReg =
I.getOperand(0).getReg();
7275 BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM))
7276 .
addImm(
I.getOperand(2).getImm());
7280 I.eraseFromParent();
7281 return RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32_XM0_XEXECRegClass,
7285bool AMDGPUInstructionSelector::selectSGetBarrierState(
7289 const MachineOperand &BarOp =
I.getOperand(2);
7290 std::optional<int64_t> BarValImm =
7294 auto CopyMIB =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
7298 MachineInstrBuilder MIB;
7299 unsigned Opc = BarValImm ? AMDGPU::S_GET_BARRIER_STATE_IMM
7300 : AMDGPU::S_GET_BARRIER_STATE_M0;
7303 auto DstReg =
I.getOperand(0).getReg();
7304 const TargetRegisterClass *DstRC =
7305 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
7306 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
7312 I.eraseFromParent();
7317 if (HasInlineConst) {
7321 case Intrinsic::amdgcn_s_barrier_join:
7322 return AMDGPU::S_BARRIER_JOIN_IMM;
7323 case Intrinsic::amdgcn_s_wakeup_barrier:
7324 return AMDGPU::S_WAKEUP_BARRIER_IMM;
7325 case Intrinsic::amdgcn_s_get_named_barrier_state:
7326 return AMDGPU::S_GET_BARRIER_STATE_IMM;
7332 case Intrinsic::amdgcn_s_barrier_join:
7333 return AMDGPU::S_BARRIER_JOIN_M0;
7334 case Intrinsic::amdgcn_s_wakeup_barrier:
7335 return AMDGPU::S_WAKEUP_BARRIER_M0;
7336 case Intrinsic::amdgcn_s_get_named_barrier_state:
7337 return AMDGPU::S_GET_BARRIER_STATE_M0;
7342bool AMDGPUInstructionSelector::selectNamedBarrierInit(
7346 const MachineOperand &BarOp =
I.getOperand(1);
7347 const MachineOperand &CntOp =
I.getOperand(2);
7351 if (IntrID == Intrinsic::amdgcn_s_barrier_signal_var) {
7352 std::optional<int64_t> CntImm =
7354 if (CntImm && *CntImm == 0) {
7355 std::optional<int64_t> BarValImm =
7358 auto BarID = ((*BarValImm) >> 4) & 0x3F;
7359 BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_IMM))
7361 I.eraseFromParent();
7368 Register TmpReg0 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7374 Register TmpReg1 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7381 Register TmpReg2 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7387 Register TmpReg3 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7388 constexpr unsigned ShAmt = 16;
7394 Register TmpReg4 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7404 unsigned Opc = IntrID == Intrinsic::amdgcn_s_barrier_init
7405 ? AMDGPU::S_BARRIER_INIT_M0
7406 : AMDGPU::S_BARRIER_SIGNAL_M0;
7407 MachineInstrBuilder MIB;
7410 I.eraseFromParent();
7414bool AMDGPUInstructionSelector::selectNamedBarrierInst(
7418 MachineOperand BarOp = IntrID == Intrinsic::amdgcn_s_get_named_barrier_state
7421 std::optional<int64_t> BarValImm =
7426 Register TmpReg0 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7432 Register TmpReg1 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7438 auto CopyMIB =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
7443 MachineInstrBuilder MIB;
7447 if (IntrID == Intrinsic::amdgcn_s_get_named_barrier_state) {
7448 auto DstReg =
I.getOperand(0).getReg();
7449 const TargetRegisterClass *DstRC =
7450 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
7451 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
7457 auto BarId = ((*BarValImm) >> 4) & 0x3F;
7461 I.eraseFromParent();
7468 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
7469 "Expected G_CONSTANT");
7470 MIB.
addImm(
MI.getOperand(1).getCImm()->getSExtValue());
7476 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
7477 "Expected G_CONSTANT");
7478 MIB.
addImm(-
MI.getOperand(1).getCImm()->getSExtValue());
7484 const MachineOperand &
Op =
MI.getOperand(1);
7485 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT &&
OpIdx == -1);
7486 MIB.
addImm(
Op.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
7489void AMDGPUInstructionSelector::renderCountTrailingOnesImm(
7491 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
7492 "Expected G_CONSTANT");
7493 MIB.
addImm(
MI.getOperand(1).getCImm()->getValue().countTrailingOnes());
7501 const MachineOperand &
Op =
MI.getOperand(
OpIdx);
7518 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7522void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_0_0(
7524 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7529void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_0_1(
7531 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7537void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_1_0(
7539 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7544void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_1_1(
7546 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7552void AMDGPUInstructionSelector::renderDstSelToOpSelXForm(
7554 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7559void AMDGPUInstructionSelector::renderSrcSelToOpSelXForm(
7561 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7566void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_2_0(
7568 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7573void AMDGPUInstructionSelector::renderDstSelToOpSel3XFormXForm(
7575 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7584 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7593 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7600void AMDGPUInstructionSelector::renderExtractCpolSetGLC(
7602 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7603 const uint32_t Cpol =
MI.getOperand(
OpIdx).getImm() &
7618 const APFloat &APF =
MI.getOperand(1).getFPImm()->getValueAPF();
7620 assert(ExpVal != INT_MIN);
7638 if (
MI.getOperand(
OpIdx).getImm())
7640 MIB.
addImm((int64_t)Mods);
7647 if (
MI.getOperand(
OpIdx).getImm())
7649 MIB.
addImm((int64_t)Mods);
7655 unsigned Val =
MI.getOperand(
OpIdx).getImm();
7663 MIB.
addImm((int64_t)Mods);
7669 uint32_t
V =
MI.getOperand(2).getImm();
7672 if (!Subtarget->hasSafeCUPrefetch())
7678void AMDGPUInstructionSelector::renderScaledMAIIntrinsicOperand(
7680 unsigned Val =
MI.getOperand(
OpIdx).getImm();
7689bool AMDGPUInstructionSelector::isInlineImmediate(
const APInt &Imm)
const {
7690 return TII.isInlineConstant(Imm);
7693bool AMDGPUInstructionSelector::isInlineImmediate(
const APFloat &Imm)
const {
7694 return TII.isInlineConstant(Imm);
MachineInstrBuilder MachineInstrBuilder & DefMI
static unsigned getIntrinsicID(const SDNode *N)
#define GET_GLOBALISEL_PREDICATES_INIT
#define GET_GLOBALISEL_TEMPORARIES_INIT
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static Register getLegalRegBank(Register NewReg, Register RootReg, const AMDGPURegisterBankInfo &RBI, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const SIInstrInfo &TII)
static bool isShlHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is shift left with half bits, such as reg0:2n =G_SHL reg1:2n, CONST(n)
static bool isNoUnsignedWrap(MachineInstr *Addr)
static Register buildOffsetSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
unsigned getNamedBarrierOp(bool HasInlineConst, Intrinsic::ID IntrID)
static bool checkRB(Register Reg, unsigned int RBNo, const AMDGPURegisterBankInfo &RBI, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI)
static unsigned updateMods(SrcStatus HiStat, SrcStatus LoStat, unsigned Mods)
static bool isTruncHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is truncating to half, such as reg0:n = G_TRUNC reg1:2n
static Register getWaveAddress(const MachineInstr *Def)
static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)
static bool shouldUseAndMask(unsigned Size, unsigned &Mask)
static std::pair< unsigned, uint8_t > BitOp3_Op(Register R, SmallVectorImpl< Register > &Src, const MachineRegisterInfo &MRI)
static TypeClass isVectorOfTwoOrScalar(Register Reg, const MachineRegisterInfo &MRI)
static bool isLaneMaskFromSameBlock(Register Reg, MachineRegisterInfo &MRI, MachineBasicBlock *MBB)
static bool parseTexFail(uint64_t TexFailCtrl, bool &TFE, bool &LWE, bool &IsTexFail)
static void addZeroImm(MachineInstrBuilder &MIB)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
static bool isConstant(const MachineInstr &MI)
static bool isSameBitWidth(Register Reg1, Register Reg2, const MachineRegisterInfo &MRI)
static Register buildRegSequence(SmallVectorImpl< Register > &Elts, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
static Register buildRSRC(MachineIRBuilder &B, MachineRegisterInfo &MRI, uint32_t FormatLo, uint32_t FormatHi, Register BasePtr)
Return a resource descriptor for use with an arbitrary 64-bit pointer.
static bool isAsyncLDSDMA(Intrinsic::ID Intr)
static void diagnoseUnsupportedIntrinsic(const MachineInstr &I)
static std::pair< Register, unsigned > computeIndirectRegIndex(MachineRegisterInfo &MRI, const SIRegisterInfo &TRI, const TargetRegisterClass *SuperRC, Register IdxReg, unsigned EltSize, GISelValueTracking &ValueTracking)
Return the register to use for the index value, and the subregister to use for the indirectly accesse...
static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64)
static std::pair< Register, SrcStatus > getLastSameOrNeg(Register Reg, const MachineRegisterInfo &MRI, SearchOptions SO, int MaxDepth=3)
static Register stripCopy(Register Reg, MachineRegisterInfo &MRI)
static std::optional< std::pair< Register, SrcStatus > > calcNextStatus(std::pair< Register, SrcStatus > Curr, const MachineRegisterInfo &MRI)
static Register stripBitCast(Register Reg, MachineRegisterInfo &MRI)
static std::optional< uint64_t > getConstantZext32Val(Register Reg, const MachineRegisterInfo &MRI)
Get an immediate that must be 32-bits, and treated as zero extended.
static bool isValidToPack(SrcStatus HiStat, SrcStatus LoStat, Register NewReg, Register RootReg, const SIInstrInfo &TII, const MachineRegisterInfo &MRI)
static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size, const GCNSubtarget &ST)
static SmallVector< std::pair< Register, SrcStatus > > getSrcStats(Register Reg, const MachineRegisterInfo &MRI, SearchOptions SO, int MaxDepth=3)
static bool isUnmergeHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test function, if the MI is reg0:n, reg1:n = G_UNMERGE_VALUES reg2:2n
static SrcStatus getNegStatus(Register Reg, SrcStatus S, const MachineRegisterInfo &MRI)
static bool isVCmpResult(Register Reg, MachineRegisterInfo &MRI)
static Register buildAddr64RSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
static bool isLshrHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is logic shift right with half bits, such as reg0:2n =G_LSHR reg1:2n,...
static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, SmallVectorImpl< Register > &Elts, Register &Src, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
This file declares the targeting of the InstructionSelector class for AMDGPU.
AMDGPU Register Bank Select
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool isAllZeros(StringRef Arr)
Return true if the array is empty or all zeros.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
Contains matchers for matching SSA Machine Instructions.
Machine Check Debug Module
This file declares the MachineIRBuilder class.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
MachineInstr unsigned OpIdx
static std::vector< std::pair< int, unsigned > > Swizzle(std::vector< std::pair< int, unsigned > > Src, R600InstrInfo::BankSwizzle Swz)
This is used to control valid status that current MI supports.
bool checkOptions(SrcStatus Stat) const
SearchOptions(Register Reg, const MachineRegisterInfo &MRI)
AMDGPUInstructionSelector(const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI, const AMDGPUTargetMachine &TM)
static const char * getName()
bool select(MachineInstr &I) override
Select the (possibly generic) instruction I to only use target-specific opcodes.
void setupMF(MachineFunction &MF, GISelValueTracking *VT, CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) override
Setup per-MF executor state.
uint32_t getLDSSize() const
LLVM_READONLY int getExactLog2Abs() const
Class for arbitrary precision integers.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
Get the array size.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
bool isFPPredicate() const
bool isIntPredicate() const
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
DILocation * get() const
Get the underlying DILocation.
Diagnostic information for unsupported feature in backend.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
std::optional< SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > > ComplexRendererFns
virtual void setupMF(MachineFunction &mf, GISelValueTracking *vt, CodeGenCoverage *covinfo=nullptr, ProfileSummaryInfo *psi=nullptr, BlockFrequencyInfo *bfi=nullptr)
Setup per-MF executor state.
CodeGenCoverage * CoverageInfo
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
TypeSize getValue() const
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void setReturnAddressIsTaken(bool s)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Helper class to build MachineInstr.
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & setOperandDead(unsigned OpIdx) const
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
LocationSize getSize() const
Return the size in bytes of the memory reference.
unsigned getAddrSpace() const
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const ConstantInt * getCImm() const
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
ArrayRef< int > getShuffleMask() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
bool isInternalRead() const
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
const RegisterBank * getRegBankOrNull(Register Reg) const
Return the register bank of Reg, or null if Reg has not been assigned a register bank or has been ass...
LLVM_ABI Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
LLVM_ABI MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Analysis providing profile information.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
static bool isGenericOpcode(unsigned Opc)
unsigned getID() const
Return the register class ID number.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
@ BUFFER_RESOURCE
Address space for 128-bit buffer resources.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
LLVM_READONLY const MIMGG16MappingInfo * getMIMGG16MappingInfo(unsigned G)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isGFX12Plus(const MCSubtargetInfo &STI)
constexpr int64_t getNullPointerValue(unsigned AS)
Get the null pointer value for the given address space.
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
LLVM_READONLY int32_t getGlobalSaddrOp(uint32_t Opcode)
bool isGFX13Plus(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfo(unsigned DimEnum)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
Intrinsic::ID getIntrinsicID(const MachineInstr &I)
Return the intrinsic ID for opcodes with the G_AMDGPU_INTRIN_ prefix.
std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg, GISelValueTracking *ValueTracking=nullptr, bool CheckNUW=false)
Returns base register and constant offset.
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
IndexMode
ARM Index Modes.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(const APInt &RequestedValue)
Matches a constant equal to RequestedValue.
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
UnaryOp_match< SrcTy, TargetOpcode::COPY > m_Copy(SrcTy &&Src)
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_XOR, true > m_GXor(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_SEXT > m_GSExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_FPEXT > m_GFPExt(const SrcTy &Src)
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
ConstantMatch< APInt > m_ICst(APInt &Cst)
SpecificConstantMatch m_AllOnesInt()
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
ImplicitDefMatch m_GImplicitDef()
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
BinaryOp_match< LHS, RHS, TargetOpcode::G_ASHR, false > m_GAShr(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
SpecificRegisterMatch m_SpecificReg(Register RequestedReg)
Matches a register only if it is equal to RequestedReg.
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
UnaryOp_match< SrcTy, TargetOpcode::G_FABS > m_GFabs(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ANYEXT > m_GAnyExt(const SrcTy &Src)
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, TargetOpcode::G_MUL, true > m_GMul(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
NodeAddr< DefNode * > Def
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
FunctionAddr VTableAddr Value
LLVM_ABI bool isBuildVectorAllZeros(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndef=false)
Return true if the specified instruction is a G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC where all of the...
LLVM_ABI Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
PointerUnion< const TargetRegisterClass *, const RegisterBank * > RegClassOrRegBank
Convenient type to represent either a register class or a register bank.
LLVM_ABI const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI void constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
LLVM_ABI std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
FunctionAddr VTableAddr uintptr_t uintptr_t Data
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
LLVM_ABI std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
LLVM_ABI Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
constexpr RegState getUndefRegState(bool B)
@ Default
The result value is uniform if and only if all operands are uniform.
unsigned AtomicNoRetBaseOpcode
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false, bool SelfAdd=false)
Compute knownbits resulting from addition of LHS and RHS.
int64_t Offset
Offset - This is an offset from the base Value*.
PointerUnion< const Value *, const PseudoSourceValue * > V
This is the IR pointer value for the access, or it is null if unknown.