29#include "llvm/IR/IntrinsicsAMDGPU.h" 
   32#define DEBUG_TYPE "amdgpu-isel" 
   37#define GET_GLOBALISEL_IMPL 
   38#define AMDGPUSubtarget GCNSubtarget 
   39#include "AMDGPUGenGlobalISel.inc" 
   40#undef GET_GLOBALISEL_IMPL 
   46    : TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM),
 
   49#include 
"AMDGPUGenGlobalISel.inc" 
   52#include 
"AMDGPUGenGlobalISel.inc" 
 
   64  MRI = &
MF.getRegInfo();
 
 
   72  return Def->getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS
 
   73             ? Def->getOperand(1).getReg()
 
 
   83  auto &RegClassOrBank = MRI.getRegClassOrRegBank(
Reg);
 
   84  const TargetRegisterClass *RC =
 
   87    const LLT Ty = MRI.getType(
Reg);
 
   91    return MRI.getVRegDef(
Reg)->getOpcode() != AMDGPU::G_TRUNC &&
 
   96  return RB->
getID() == AMDGPU::VCCRegBankID;
 
   99bool AMDGPUInstructionSelector::constrainCopyLikeIntrin(
MachineInstr &
MI,
 
  100                                                        unsigned NewOpc)
 const {
 
  101  MI.setDesc(TII.get(NewOpc));
 
  105  MachineOperand &Dst = 
MI.getOperand(0);
 
  106  MachineOperand &Src = 
MI.getOperand(1);
 
  112  const TargetRegisterClass *DstRC
 
  113    = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
 
  114  const TargetRegisterClass *SrcRC
 
  115    = TRI.getConstrainedRegClassForOperand(Src, *MRI);
 
  116  if (!DstRC || DstRC != SrcRC)
 
  119  return RBI.constrainGenericRegister(Dst.getReg(), *DstRC, *MRI) &&
 
  120         RBI.constrainGenericRegister(Src.getReg(), *SrcRC, *MRI);
 
  123bool AMDGPUInstructionSelector::selectCOPY(
MachineInstr &
I)
 const {
 
  126  I.setDesc(TII.get(TargetOpcode::COPY));
 
  128  const MachineOperand &Src = 
I.getOperand(1);
 
  129  MachineOperand &Dst = 
I.getOperand(0);
 
  133  if (isVCC(DstReg, *MRI)) {
 
  134    if (SrcReg == AMDGPU::SCC) {
 
  135      const TargetRegisterClass *RC
 
  136        = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
 
  139      return RBI.constrainGenericRegister(DstReg, *RC, *MRI);
 
  142    if (!isVCC(SrcReg, *MRI)) {
 
  144      if (!RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), *MRI))
 
  147      const TargetRegisterClass *SrcRC
 
  148        = TRI.getConstrainedRegClassForOperand(Src, *MRI);
 
  150      std::optional<ValueAndVReg> ConstVal =
 
  154            STI.isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
 
  156            .
addImm(ConstVal->Value.getBoolValue() ? -1 : 0);
 
  158        Register MaskedReg = MRI->createVirtualRegister(SrcRC);
 
  165          assert(Subtarget->useRealTrue16Insts());
 
  166          const int64_t NoMods = 0;
 
  167          BuildMI(*BB, &
I, 
DL, TII.get(AMDGPU::V_AND_B16_t16_e64), MaskedReg)
 
  173          BuildMI(*BB, &
I, 
DL, TII.get(AMDGPU::V_CMP_NE_U16_t16_e64), DstReg)
 
  180          bool IsSGPR = TRI.isSGPRClass(SrcRC);
 
  181          unsigned AndOpc = IsSGPR ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32;
 
  188          BuildMI(*BB, &
I, 
DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
 
  194      if (!MRI->getRegClassOrNull(SrcReg))
 
  195        MRI->setRegClass(SrcReg, SrcRC);
 
  200    const TargetRegisterClass *RC =
 
  201      TRI.getConstrainedRegClassForOperand(Dst, *MRI);
 
  202    if (RC && !RBI.constrainGenericRegister(DstReg, *RC, *MRI))
 
  208  for (
const MachineOperand &MO : 
I.operands()) {
 
  209    if (MO.getReg().isPhysical())
 
  212    const TargetRegisterClass *RC =
 
  213            TRI.getConstrainedRegClassForOperand(MO, *MRI);
 
  216    RBI.constrainGenericRegister(MO.getReg(), *RC, *MRI);
 
  221bool AMDGPUInstructionSelector::selectCOPY_SCC_VCC(
MachineInstr &
I)
 const {
 
  224  Register VCCReg = 
I.getOperand(1).getReg();
 
  228  if (STI.hasScalarCompareEq64()) {
 
  230        STI.isWave64() ? AMDGPU::S_CMP_LG_U64 : AMDGPU::S_CMP_LG_U32;
 
  233    Register DeadDst = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
 
  234    Cmp = 
BuildMI(*BB, &
I, 
DL, TII.get(AMDGPU::S_OR_B64), DeadDst)
 
  242  Register DstReg = 
I.getOperand(0).getReg();
 
  246  return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
 
  249bool AMDGPUInstructionSelector::selectCOPY_VCC_SCC(
MachineInstr &
I)
 const {
 
  253  Register DstReg = 
I.getOperand(0).getReg();
 
  254  Register SrcReg = 
I.getOperand(1).getReg();
 
  255  std::optional<ValueAndVReg> Arg =
 
  259    const int64_t 
Value = Arg->Value.getZExtValue();
 
  261      unsigned Opcode = STI.isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
 
  268    return RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), *MRI);
 
  274  unsigned SelectOpcode =
 
  275      STI.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
 
  284bool AMDGPUInstructionSelector::selectReadAnyLane(
MachineInstr &
I)
 const {
 
  285  Register DstReg = 
I.getOperand(0).getReg();
 
  286  Register SrcReg = 
I.getOperand(1).getReg();
 
  291  auto RFL = 
BuildMI(*BB, &
I, 
DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
 
  298bool AMDGPUInstructionSelector::selectPHI(
MachineInstr &
I)
 const {
 
  299  const Register DefReg = 
I.getOperand(0).getReg();
 
  300  const LLT DefTy = MRI->getType(DefReg);
 
  312    MRI->getRegClassOrRegBank(DefReg);
 
  314  const TargetRegisterClass *DefRC =
 
  323    DefRC = TRI.getRegClassForTypeOnBank(DefTy, RB);
 
  332  for (
unsigned i = 1; i != 
I.getNumOperands(); i += 2) {
 
  333    const Register SrcReg = 
I.getOperand(i).getReg();
 
  335    const RegisterBank *RB = MRI->getRegBankOrNull(SrcReg);
 
  337      const LLT SrcTy = MRI->getType(SrcReg);
 
  338      const TargetRegisterClass *SrcRC =
 
  339          TRI.getRegClassForTypeOnBank(SrcTy, *RB);
 
  340      if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
 
  345  I.setDesc(TII.get(TargetOpcode::PHI));
 
  346  return RBI.constrainGenericRegister(DefReg, *DefRC, *MRI);
 
  352                                           unsigned SubIdx)
 const {
 
  356  Register DstReg = MRI->createVirtualRegister(&SubRC);
 
  359    unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.
getSubReg(), SubIdx);
 
  361    BuildMI(*BB, 
MI, 
MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
 
  387    return Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
 
  389    return Is64 ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32;
 
  391    return Is64 ? AMDGPU::S_XOR_B64 : AMDGPU::S_XOR_B32;
 
 
  397bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(
MachineInstr &
I)
 const {
 
  398  Register DstReg = 
I.getOperand(0).getReg();
 
  399  unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI);
 
  401  const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
 
  402  if (DstRB->
getID() != AMDGPU::SGPRRegBankID &&
 
  403      DstRB->
getID() != AMDGPU::VCCRegBankID)
 
  406  bool Is64 = 
Size > 32 || (DstRB->
getID() == AMDGPU::VCCRegBankID &&
 
  418bool AMDGPUInstructionSelector::selectG_ADD_SUB(
MachineInstr &
I)
 const {
 
  421  Register DstReg = 
I.getOperand(0).getReg();
 
  423  LLT Ty = MRI->getType(DstReg);
 
  428  const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
 
  429  const bool IsSALU = DstRB->
getID() == AMDGPU::SGPRRegBankID;
 
  430  const bool Sub = 
I.getOpcode() == TargetOpcode::G_SUB;
 
  434      const unsigned Opc = 
Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
 
  437        .
add(
I.getOperand(1))
 
  438        .
add(
I.getOperand(2))
 
  444    if (STI.hasAddNoCarry()) {
 
  445      const unsigned Opc = 
Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64;
 
  446      I.setDesc(TII.get(
Opc));
 
  452    const unsigned Opc = 
Sub ? AMDGPU::V_SUB_CO_U32_e64 : AMDGPU::V_ADD_CO_U32_e64;
 
  454    Register UnusedCarry = MRI->createVirtualRegister(TRI.getWaveMaskRegClass());
 
  458      .
add(
I.getOperand(1))
 
  459      .
add(
I.getOperand(2))
 
  465  assert(!
Sub && 
"illegal sub should not reach here");
 
  467  const TargetRegisterClass &RC
 
  468    = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass;
 
  469  const TargetRegisterClass &HalfRC
 
  470    = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass;
 
  472  MachineOperand Lo1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub0));
 
  473  MachineOperand Lo2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub0));
 
  474  MachineOperand Hi1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub1));
 
  475  MachineOperand Hi2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub1));
 
  477  Register DstLo = MRI->createVirtualRegister(&HalfRC);
 
  478  Register DstHi = MRI->createVirtualRegister(&HalfRC);
 
  481    BuildMI(*BB, &
I, 
DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
 
  484    BuildMI(*BB, &
I, 
DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
 
  489    const TargetRegisterClass *CarryRC = TRI.getWaveMaskRegClass();
 
  490    Register CarryReg = MRI->createVirtualRegister(CarryRC);
 
  491    BuildMI(*BB, &
I, 
DL, TII.get(AMDGPU::V_ADD_CO_U32_e64), DstLo)
 
  496    MachineInstr *Addc = 
BuildMI(*BB, &
I, 
DL, TII.get(AMDGPU::V_ADDC_U32_e64), DstHi)
 
  507  BuildMI(*BB, &
I, 
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
 
  514  if (!RBI.constrainGenericRegister(DstReg, RC, *MRI))
 
  521bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE(
 
  526  Register Dst0Reg = 
I.getOperand(0).getReg();
 
  527  Register Dst1Reg = 
I.getOperand(1).getReg();
 
  528  const bool IsAdd = 
I.getOpcode() == AMDGPU::G_UADDO ||
 
  529                     I.getOpcode() == AMDGPU::G_UADDE;
 
  530  const bool HasCarryIn = 
I.getOpcode() == AMDGPU::G_UADDE ||
 
  531                          I.getOpcode() == AMDGPU::G_USUBE;
 
  533  if (isVCC(Dst1Reg, *MRI)) {
 
  534    unsigned NoCarryOpc =
 
  535        IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
 
  536    unsigned CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
 
  537    I.setDesc(TII.get(HasCarryIn ? CarryOpc : NoCarryOpc));
 
  543  Register Src0Reg = 
I.getOperand(2).getReg();
 
  544  Register Src1Reg = 
I.getOperand(3).getReg();
 
  547    BuildMI(*BB, &
I, 
DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
 
  548      .
addReg(
I.getOperand(4).getReg());
 
  551  unsigned NoCarryOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
 
  552  unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
 
  554  auto CarryInst = 
BuildMI(*BB, &
I, 
DL, TII.get(HasCarryIn ? CarryOpc : NoCarryOpc), Dst0Reg)
 
  555    .
add(
I.getOperand(2))
 
  556    .
add(
I.getOperand(3));
 
  558  if (MRI->use_nodbg_empty(Dst1Reg)) {
 
  559    CarryInst.setOperandDead(3); 
 
  561    BuildMI(*BB, &
I, 
DL, TII.get(AMDGPU::COPY), Dst1Reg)
 
  563    if (!MRI->getRegClassOrNull(Dst1Reg))
 
  564      MRI->setRegClass(Dst1Reg, &AMDGPU::SReg_32RegClass);
 
  567  if (!RBI.constrainGenericRegister(Dst0Reg, AMDGPU::SReg_32RegClass, *MRI) ||
 
  568      !RBI.constrainGenericRegister(Src0Reg, AMDGPU::SReg_32RegClass, *MRI) ||
 
  569      !RBI.constrainGenericRegister(Src1Reg, AMDGPU::SReg_32RegClass, *MRI))
 
  573      !RBI.constrainGenericRegister(
I.getOperand(4).getReg(),
 
  574                                    AMDGPU::SReg_32RegClass, *MRI))
 
  581bool AMDGPUInstructionSelector::selectG_AMDGPU_MAD_64_32(
 
  585  const bool IsUnsigned = 
I.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;
 
  586  bool UseNoCarry = Subtarget->hasMadU64U32NoCarry() &&
 
  587                    MRI->use_nodbg_empty(
I.getOperand(1).getReg());
 
  590  if (Subtarget->hasMADIntraFwdBug())
 
  591    Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_gfx11_e64
 
  592                     : AMDGPU::V_MAD_I64_I32_gfx11_e64;
 
  594    Opc = IsUnsigned ? AMDGPU::V_MAD_NC_U64_U32_e64
 
  595                     : AMDGPU::V_MAD_NC_I64_I32_e64;
 
  597    Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_e64 : AMDGPU::V_MAD_I64_I32_e64;
 
  602  I.setDesc(TII.get(
Opc));
 
  604  I.addImplicitDefUseOperands(*
MF);
 
  609bool AMDGPUInstructionSelector::selectG_EXTRACT(
MachineInstr &
I)
 const {
 
  611  Register DstReg = 
I.getOperand(0).getReg();
 
  612  Register SrcReg = 
I.getOperand(1).getReg();
 
  613  LLT DstTy = MRI->getType(DstReg);
 
  614  LLT SrcTy = MRI->getType(SrcReg);
 
  619  unsigned Offset = 
I.getOperand(2).getImm();
 
  620  if (
Offset % 32 != 0 || DstSize > 128)
 
  628  const TargetRegisterClass *DstRC =
 
  629    TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
 
  630  if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
 
  633  const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI);
 
  634  const TargetRegisterClass *SrcRC =
 
  635      TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank);
 
  640  SrcRC = TRI.getSubClassWithSubReg(SrcRC, 
SubReg);
 
  645                                    *SrcRC, 
I.getOperand(1));
 
  647  BuildMI(*BB, &
I, 
DL, TII.get(TargetOpcode::COPY), DstReg)
 
  654bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(
MachineInstr &
MI)
 const {
 
  655  MachineBasicBlock *BB = 
MI.getParent();
 
  657  LLT DstTy = MRI->getType(DstReg);
 
  658  LLT SrcTy = MRI->getType(
MI.getOperand(1).getReg());
 
  665  const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
 
  667  const TargetRegisterClass *DstRC =
 
  668      TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
 
  672  ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(DstRC, SrcSize / 8);
 
  673  MachineInstrBuilder MIB =
 
  674    BuildMI(*BB, &
MI, 
DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg);
 
  675  for (
int I = 0, 
E = 
MI.getNumOperands() - 1; 
I != 
E; ++
I) {
 
  676    MachineOperand &Src = 
MI.getOperand(
I + 1);
 
  680    const TargetRegisterClass *SrcRC
 
  681      = TRI.getConstrainedRegClassForOperand(Src, *MRI);
 
  682    if (SrcRC && !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, *MRI))
 
  686  if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
 
  689  MI.eraseFromParent();
 
  693bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(
MachineInstr &
MI)
 const {
 
  694  MachineBasicBlock *BB = 
MI.getParent();
 
  695  const int NumDst = 
MI.getNumOperands() - 1;
 
  697  MachineOperand &Src = 
MI.getOperand(NumDst);
 
  701  LLT DstTy = MRI->getType(DstReg0);
 
  702  LLT SrcTy = MRI->getType(SrcReg);
 
  707  const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI);
 
  709  const TargetRegisterClass *SrcRC =
 
  710      TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank);
 
  711  if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
 
  717  ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(SrcRC, DstSize / 8);
 
  718  for (
int I = 0, 
E = NumDst; 
I != 
E; ++
I) {
 
  719    MachineOperand &Dst = 
MI.getOperand(
I);
 
  720    BuildMI(*BB, &
MI, 
DL, TII.get(TargetOpcode::COPY), Dst.getReg())
 
  721      .
addReg(SrcReg, 0, SubRegs[
I]);
 
  724    SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegs[
I]);
 
  725    if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
 
  728    const TargetRegisterClass *DstRC =
 
  729      TRI.getConstrainedRegClassForOperand(Dst, *MRI);
 
  730    if (DstRC && !RBI.constrainGenericRegister(Dst.getReg(), *DstRC, *MRI))
 
  734  MI.eraseFromParent();
 
  738bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR(
MachineInstr &
MI)
 const {
 
  739  assert(
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC ||
 
  740         MI.getOpcode() == AMDGPU::G_BUILD_VECTOR);
 
  744  LLT SrcTy = MRI->getType(Src0);
 
  748  if (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR && SrcSize >= 32) {
 
  749    return selectG_MERGE_VALUES(
MI);
 
  756      (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC &&
 
  760  const RegisterBank *DstBank = RBI.getRegBank(Dst, *MRI, TRI);
 
  761  if (DstBank->
getID() == AMDGPU::AGPRRegBankID)
 
  764  assert(DstBank->
getID() == AMDGPU::SGPRRegBankID ||
 
  765         DstBank->
getID() == AMDGPU::VGPRRegBankID);
 
  766  const bool IsVector = DstBank->
getID() == AMDGPU::VGPRRegBankID;
 
  769  MachineBasicBlock *BB = 
MI.getParent();
 
  779      const int64_t K0 = ConstSrc0->Value.getSExtValue();
 
  780      const int64_t K1 = ConstSrc1->Value.getSExtValue();
 
  781      uint32_t Lo16 = 
static_cast<uint32_t
>(K0) & 0xffff;
 
  782      uint32_t Hi16 = 
static_cast<uint32_t
>(K1) & 0xffff;
 
  783      uint32_t 
Imm = Lo16 | (Hi16 << 16);
 
  788        MI.eraseFromParent();
 
  789        return RBI.constrainGenericRegister(Dst, AMDGPU::VGPR_32RegClass, *MRI);
 
  794      MI.eraseFromParent();
 
  795      return RBI.constrainGenericRegister(Dst, AMDGPU::SReg_32RegClass, *MRI);
 
  806  if (Src1Def->
getOpcode() == AMDGPU::G_IMPLICIT_DEF) {
 
  807    MI.setDesc(TII.get(AMDGPU::COPY));
 
  810        IsVector ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
 
  811    return RBI.constrainGenericRegister(Dst, RC, *MRI) &&
 
  812           RBI.constrainGenericRegister(Src0, RC, *MRI);
 
  817    Register TmpReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
  818    auto MIB = 
BuildMI(*BB, 
MI, 
DL, TII.get(AMDGPU::V_AND_B32_e32), TmpReg)
 
  824    MIB = 
BuildMI(*BB, 
MI, 
DL, TII.get(AMDGPU::V_LSHL_OR_B32_e64), Dst)
 
  831    MI.eraseFromParent();
 
  856  unsigned Opc = AMDGPU::S_PACK_LL_B32_B16;
 
  857  if (Shift0 && Shift1) {
 
  858    Opc = AMDGPU::S_PACK_HH_B32_B16;
 
  859    MI.getOperand(1).setReg(ShiftSrc0);
 
  860    MI.getOperand(2).setReg(ShiftSrc1);
 
  862    Opc = AMDGPU::S_PACK_LH_B32_B16;
 
  863    MI.getOperand(2).setReg(ShiftSrc1);
 
  867    if (ConstSrc1 && ConstSrc1->Value == 0) {
 
  869      auto MIB = 
BuildMI(*BB, &
MI, 
DL, TII.get(AMDGPU::S_LSHR_B32), Dst)
 
  874      MI.eraseFromParent();
 
  877    if (STI.hasSPackHL()) {
 
  878      Opc = AMDGPU::S_PACK_HL_B32_B16;
 
  879      MI.getOperand(1).setReg(ShiftSrc0);
 
  883  MI.setDesc(TII.get(
Opc));
 
  887bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(
MachineInstr &
I)
 const {
 
  888  const MachineOperand &MO = 
I.getOperand(0);
 
  892  const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, *MRI);
 
  893  if ((!RC && !MRI->getRegBankOrNull(MO.
getReg())) ||
 
  894      (RC && RBI.constrainGenericRegister(MO.
getReg(), *RC, *MRI))) {
 
  895    I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
 
  902bool AMDGPUInstructionSelector::selectG_INSERT(
MachineInstr &
I)
 const {
 
  905  Register DstReg = 
I.getOperand(0).getReg();
 
  906  Register Src0Reg = 
I.getOperand(1).getReg();
 
  907  Register Src1Reg = 
I.getOperand(2).getReg();
 
  908  LLT Src1Ty = MRI->getType(Src1Reg);
 
  910  unsigned DstSize = MRI->getType(DstReg).getSizeInBits();
 
  913  int64_t 
Offset = 
I.getOperand(3).getImm();
 
  916  if (
Offset % 32 != 0 || InsSize % 32 != 0)
 
  923  unsigned SubReg = TRI.getSubRegFromChannel(
Offset / 32, InsSize / 32);
 
  924  if (
SubReg == AMDGPU::NoSubRegister)
 
  927  const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
 
  928  const TargetRegisterClass *DstRC =
 
  929      TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
 
  933  const RegisterBank *Src0Bank = RBI.getRegBank(Src0Reg, *MRI, TRI);
 
  934  const RegisterBank *Src1Bank = RBI.getRegBank(Src1Reg, *MRI, TRI);
 
  935  const TargetRegisterClass *Src0RC =
 
  936      TRI.getRegClassForSizeOnBank(DstSize, *Src0Bank);
 
  937  const TargetRegisterClass *Src1RC =
 
  938      TRI.getRegClassForSizeOnBank(InsSize, *Src1Bank);
 
  942  Src0RC = TRI.getSubClassWithSubReg(Src0RC, 
SubReg);
 
  943  if (!Src0RC || !Src1RC)
 
  946  if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
 
  947      !RBI.constrainGenericRegister(Src0Reg, *Src0RC, *MRI) ||
 
  948      !RBI.constrainGenericRegister(Src1Reg, *Src1RC, *MRI))
 
  952  BuildMI(*BB, &
I, 
DL, TII.get(TargetOpcode::INSERT_SUBREG), DstReg)
 
  961bool AMDGPUInstructionSelector::selectG_SBFX_UBFX(
MachineInstr &
MI)
 const {
 
  964  Register OffsetReg = 
MI.getOperand(2).getReg();
 
  965  Register WidthReg = 
MI.getOperand(3).getReg();
 
  967  assert(RBI.getRegBank(DstReg, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID &&
 
  968         "scalar BFX instructions are expanded in regbankselect");
 
  969  assert(MRI->getType(
MI.getOperand(0).getReg()).getSizeInBits() == 32 &&
 
  970         "64-bit vector BFX instructions are expanded in regbankselect");
 
  973  MachineBasicBlock *
MBB = 
MI.getParent();
 
  975  bool IsSigned = 
MI.getOpcode() == TargetOpcode::G_SBFX;
 
  976  unsigned Opc = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
 
  981  MI.eraseFromParent();
 
  985bool AMDGPUInstructionSelector::selectInterpP1F16(
MachineInstr &
MI)
 const {
 
  986  if (STI.getLDSBankCount() != 16)
 
  992  if (!RBI.constrainGenericRegister(M0Val, AMDGPU::SReg_32RegClass, *MRI) ||
 
  993      !RBI.constrainGenericRegister(Dst, AMDGPU::VGPR_32RegClass, *MRI) ||
 
  994      !RBI.constrainGenericRegister(Src0, AMDGPU::VGPR_32RegClass, *MRI))
 
 1004  Register InterpMov = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
 1006  MachineBasicBlock *
MBB = 
MI.getParent();
 
 1010  BuildMI(*
MBB, &
MI, 
DL, TII.get(AMDGPU::V_INTERP_MOV_F32), InterpMov)
 
 1013    .
addImm(
MI.getOperand(3).getImm()); 
 
 1026  MI.eraseFromParent();
 
 1035bool AMDGPUInstructionSelector::selectWritelane(
MachineInstr &
MI)
 const {
 
 1037  if (STI.getConstantBusLimit(AMDGPU::V_WRITELANE_B32) > 1)
 
 1040  MachineBasicBlock *
MBB = 
MI.getParent();
 
 1044  Register LaneSelect = 
MI.getOperand(3).getReg();
 
 1047  auto MIB = 
BuildMI(*
MBB, &
MI, 
DL, TII.get(AMDGPU::V_WRITELANE_B32), VDst);
 
 1049  std::optional<ValueAndVReg> ConstSelect =
 
 1055    MIB.
addImm(ConstSelect->Value.getSExtValue() &
 
 1058    std::optional<ValueAndVReg> ConstVal =
 
 1064                                                 STI.hasInv2PiInlineImm())) {
 
 1065      MIB.
addImm(ConstVal->Value.getSExtValue());
 
 1073      RBI.constrainGenericRegister(LaneSelect, AMDGPU::SReg_32_XM0RegClass, *MRI);
 
 1075      BuildMI(*
MBB, *MIB, 
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
 
 1083  MI.eraseFromParent();
 
 1089bool AMDGPUInstructionSelector::selectDivScale(
MachineInstr &
MI)
 const {
 
 1093  LLT Ty = MRI->getType(Dst0);
 
 1096    Opc = AMDGPU::V_DIV_SCALE_F32_e64;
 
 1098    Opc = AMDGPU::V_DIV_SCALE_F64_e64;
 
 1105  MachineBasicBlock *
MBB = 
MI.getParent();
 
 1109  unsigned ChooseDenom = 
MI.getOperand(5).getImm();
 
 1111  Register Src0 = ChooseDenom != 0 ? Numer : Denom;
 
 1124  MI.eraseFromParent();
 
 1128bool AMDGPUInstructionSelector::selectG_INTRINSIC(
MachineInstr &
I)
 const {
 
 1130  switch (IntrinsicID) {
 
 1131  case Intrinsic::amdgcn_if_break: {
 
 1136    BuildMI(*BB, &
I, 
I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))
 
 1137      .
add(
I.getOperand(0))
 
 1138      .
add(
I.getOperand(2))
 
 1139      .
add(
I.getOperand(3));
 
 1141    Register DstReg = 
I.getOperand(0).getReg();
 
 1142    Register Src0Reg = 
I.getOperand(2).getReg();
 
 1143    Register Src1Reg = 
I.getOperand(3).getReg();
 
 1145    I.eraseFromParent();
 
 1148      MRI->setRegClass(
Reg, TRI.getWaveMaskRegClass());
 
 1152  case Intrinsic::amdgcn_interp_p1_f16:
 
 1153    return selectInterpP1F16(
I);
 
 1154  case Intrinsic::amdgcn_wqm:
 
 1155    return constrainCopyLikeIntrin(
I, AMDGPU::WQM);
 
 1156  case Intrinsic::amdgcn_softwqm:
 
 1157    return constrainCopyLikeIntrin(
I, AMDGPU::SOFT_WQM);
 
 1158  case Intrinsic::amdgcn_strict_wwm:
 
 1159  case Intrinsic::amdgcn_wwm:
 
 1160    return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WWM);
 
 1161  case Intrinsic::amdgcn_strict_wqm:
 
 1162    return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WQM);
 
 1163  case Intrinsic::amdgcn_writelane:
 
 1164    return selectWritelane(
I);
 
 1165  case Intrinsic::amdgcn_div_scale:
 
 1166    return selectDivScale(
I);
 
 1167  case Intrinsic::amdgcn_icmp:
 
 1168  case Intrinsic::amdgcn_fcmp:
 
 1171    return selectIntrinsicCmp(
I);
 
 1172  case Intrinsic::amdgcn_ballot:
 
 1173    return selectBallot(
I);
 
 1174  case Intrinsic::amdgcn_reloc_constant:
 
 1175    return selectRelocConstant(
I);
 
 1176  case Intrinsic::amdgcn_groupstaticsize:
 
 1177    return selectGroupStaticSize(
I);
 
 1178  case Intrinsic::returnaddress:
 
 1179    return selectReturnAddress(
I);
 
 1180  case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
 
 1181  case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
 
 1182  case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
 
 1183  case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
 
 1184  case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
 
 1185  case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
 
 1186  case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
 
 1187  case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
 
 1188  case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
 
 1189  case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
 
 1190  case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
 
 1191  case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
 
 1192  case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
 
 1193  case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
 
 1194  case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:
 
 1195  case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:
 
 1196  case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:
 
 1197  case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:
 
 1198  case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:
 
 1199  case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:
 
 1200  case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:
 
 1201  case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:
 
 1202  case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:
 
 1203  case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:
 
 1204  case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:
 
 1205  case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
 
 1206  case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
 
 1207  case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8:
 
 1208    return selectSMFMACIntrin(
I);
 
 1209  case Intrinsic::amdgcn_permlane16_swap:
 
 1210  case Intrinsic::amdgcn_permlane32_swap:
 
 1211    return selectPermlaneSwapIntrin(
I, IntrinsicID);
 
 1222  if (
Size == 16 && !ST.has16BitInsts())
 
 1225  const auto Select = [&](
unsigned S16Opc, 
unsigned TrueS16Opc,
 
 1226                          unsigned FakeS16Opc, 
unsigned S32Opc,
 
 1229      return ST.hasTrue16BitInsts()
 
 1230                 ? ST.useRealTrue16Insts() ? TrueS16Opc : FakeS16Opc
 
 1241    return Select(AMDGPU::V_CMP_NE_U16_e64, AMDGPU::V_CMP_NE_U16_t16_e64,
 
 1242                  AMDGPU::V_CMP_NE_U16_fake16_e64, AMDGPU::V_CMP_NE_U32_e64,
 
 1243                  AMDGPU::V_CMP_NE_U64_e64);
 
 1245    return Select(AMDGPU::V_CMP_EQ_U16_e64, AMDGPU::V_CMP_EQ_U16_t16_e64,
 
 1246                  AMDGPU::V_CMP_EQ_U16_fake16_e64, AMDGPU::V_CMP_EQ_U32_e64,
 
 1247                  AMDGPU::V_CMP_EQ_U64_e64);
 
 1249    return Select(AMDGPU::V_CMP_GT_I16_e64, AMDGPU::V_CMP_GT_I16_t16_e64,
 
 1250                  AMDGPU::V_CMP_GT_I16_fake16_e64, AMDGPU::V_CMP_GT_I32_e64,
 
 1251                  AMDGPU::V_CMP_GT_I64_e64);
 
 1253    return Select(AMDGPU::V_CMP_GE_I16_e64, AMDGPU::V_CMP_GE_I16_t16_e64,
 
 1254                  AMDGPU::V_CMP_GE_I16_fake16_e64, AMDGPU::V_CMP_GE_I32_e64,
 
 1255                  AMDGPU::V_CMP_GE_I64_e64);
 
 1257    return Select(AMDGPU::V_CMP_LT_I16_e64, AMDGPU::V_CMP_LT_I16_t16_e64,
 
 1258                  AMDGPU::V_CMP_LT_I16_fake16_e64, AMDGPU::V_CMP_LT_I32_e64,
 
 1259                  AMDGPU::V_CMP_LT_I64_e64);
 
 1261    return Select(AMDGPU::V_CMP_LE_I16_e64, AMDGPU::V_CMP_LE_I16_t16_e64,
 
 1262                  AMDGPU::V_CMP_LE_I16_fake16_e64, AMDGPU::V_CMP_LE_I32_e64,
 
 1263                  AMDGPU::V_CMP_LE_I64_e64);
 
 1265    return Select(AMDGPU::V_CMP_GT_U16_e64, AMDGPU::V_CMP_GT_U16_t16_e64,
 
 1266                  AMDGPU::V_CMP_GT_U16_fake16_e64, AMDGPU::V_CMP_GT_U32_e64,
 
 1267                  AMDGPU::V_CMP_GT_U64_e64);
 
 1269    return Select(AMDGPU::V_CMP_GE_U16_e64, AMDGPU::V_CMP_GE_U16_t16_e64,
 
 1270                  AMDGPU::V_CMP_GE_U16_fake16_e64, AMDGPU::V_CMP_GE_U32_e64,
 
 1271                  AMDGPU::V_CMP_GE_U64_e64);
 
 1273    return Select(AMDGPU::V_CMP_LT_U16_e64, AMDGPU::V_CMP_LT_U16_t16_e64,
 
 1274                  AMDGPU::V_CMP_LT_U16_fake16_e64, AMDGPU::V_CMP_LT_U32_e64,
 
 1275                  AMDGPU::V_CMP_LT_U64_e64);
 
 1277    return Select(AMDGPU::V_CMP_LE_U16_e64, AMDGPU::V_CMP_LE_U16_t16_e64,
 
 1278                  AMDGPU::V_CMP_LE_U16_fake16_e64, AMDGPU::V_CMP_LE_U32_e64,
 
 1279                  AMDGPU::V_CMP_LE_U64_e64);
 
 1282    return Select(AMDGPU::V_CMP_EQ_F16_e64, AMDGPU::V_CMP_EQ_F16_t16_e64,
 
 1283                  AMDGPU::V_CMP_EQ_F16_fake16_e64, AMDGPU::V_CMP_EQ_F32_e64,
 
 1284                  AMDGPU::V_CMP_EQ_F64_e64);
 
 1286    return Select(AMDGPU::V_CMP_GT_F16_e64, AMDGPU::V_CMP_GT_F16_t16_e64,
 
 1287                  AMDGPU::V_CMP_GT_F16_fake16_e64, AMDGPU::V_CMP_GT_F32_e64,
 
 1288                  AMDGPU::V_CMP_GT_F64_e64);
 
 1290    return Select(AMDGPU::V_CMP_GE_F16_e64, AMDGPU::V_CMP_GE_F16_t16_e64,
 
 1291                  AMDGPU::V_CMP_GE_F16_fake16_e64, AMDGPU::V_CMP_GE_F32_e64,
 
 1292                  AMDGPU::V_CMP_GE_F64_e64);
 
 1294    return Select(AMDGPU::V_CMP_LT_F16_e64, AMDGPU::V_CMP_LT_F16_t16_e64,
 
 1295                  AMDGPU::V_CMP_LT_F16_fake16_e64, AMDGPU::V_CMP_LT_F32_e64,
 
 1296                  AMDGPU::V_CMP_LT_F64_e64);
 
 1298    return Select(AMDGPU::V_CMP_LE_F16_e64, AMDGPU::V_CMP_LE_F16_t16_e64,
 
 1299                  AMDGPU::V_CMP_LE_F16_fake16_e64, AMDGPU::V_CMP_LE_F32_e64,
 
 1300                  AMDGPU::V_CMP_LE_F64_e64);
 
 1302    return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
 
 1303                  AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
 
 1304                  AMDGPU::V_CMP_NEQ_F64_e64);
 
 1306    return Select(AMDGPU::V_CMP_O_F16_e64, AMDGPU::V_CMP_O_F16_t16_e64,
 
 1307                  AMDGPU::V_CMP_O_F16_fake16_e64, AMDGPU::V_CMP_O_F32_e64,
 
 1308                  AMDGPU::V_CMP_O_F64_e64);
 
 1310    return Select(AMDGPU::V_CMP_U_F16_e64, AMDGPU::V_CMP_U_F16_t16_e64,
 
 1311                  AMDGPU::V_CMP_U_F16_fake16_e64, AMDGPU::V_CMP_U_F32_e64,
 
 1312                  AMDGPU::V_CMP_U_F64_e64);
 
 1314    return Select(AMDGPU::V_CMP_NLG_F16_e64, AMDGPU::V_CMP_NLG_F16_t16_e64,
 
 1315                  AMDGPU::V_CMP_NLG_F16_fake16_e64, AMDGPU::V_CMP_NLG_F32_e64,
 
 1316                  AMDGPU::V_CMP_NLG_F64_e64);
 
 1318    return Select(AMDGPU::V_CMP_NLE_F16_e64, AMDGPU::V_CMP_NLE_F16_t16_e64,
 
 1319                  AMDGPU::V_CMP_NLE_F16_fake16_e64, AMDGPU::V_CMP_NLE_F32_e64,
 
 1320                  AMDGPU::V_CMP_NLE_F64_e64);
 
 1322    return Select(AMDGPU::V_CMP_NLT_F16_e64, AMDGPU::V_CMP_NLT_F16_t16_e64,
 
 1323                  AMDGPU::V_CMP_NLT_F16_fake16_e64, AMDGPU::V_CMP_NLT_F32_e64,
 
 1324                  AMDGPU::V_CMP_NLT_F64_e64);
 
 1326    return Select(AMDGPU::V_CMP_NGE_F16_e64, AMDGPU::V_CMP_NGE_F16_t16_e64,
 
 1327                  AMDGPU::V_CMP_NGE_F16_fake16_e64, AMDGPU::V_CMP_NGE_F32_e64,
 
 1328                  AMDGPU::V_CMP_NGE_F64_e64);
 
 1330    return Select(AMDGPU::V_CMP_NGT_F16_e64, AMDGPU::V_CMP_NGT_F16_t16_e64,
 
 1331                  AMDGPU::V_CMP_NGT_F16_fake16_e64, AMDGPU::V_CMP_NGT_F32_e64,
 
 1332                  AMDGPU::V_CMP_NGT_F64_e64);
 
 1334    return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
 
 1335                  AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
 
 1336                  AMDGPU::V_CMP_NEQ_F64_e64);
 
 1338    return Select(AMDGPU::V_CMP_TRU_F16_e64, AMDGPU::V_CMP_TRU_F16_t16_e64,
 
 1339                  AMDGPU::V_CMP_TRU_F16_fake16_e64, AMDGPU::V_CMP_TRU_F32_e64,
 
 1340                  AMDGPU::V_CMP_TRU_F64_e64);
 
 1342    return Select(AMDGPU::V_CMP_F_F16_e64, AMDGPU::V_CMP_F_F16_t16_e64,
 
 1343                  AMDGPU::V_CMP_F_F16_fake16_e64, AMDGPU::V_CMP_F_F32_e64,
 
 1344                  AMDGPU::V_CMP_F_F64_e64);
 
 
 1349                                              unsigned Size)
 const {
 
 1351    if (!STI.hasScalarCompareEq64())
 
 1356      return AMDGPU::S_CMP_LG_U64;
 
 1358      return AMDGPU::S_CMP_EQ_U64;
 
 1367      return AMDGPU::S_CMP_LG_U32;
 
 1369      return AMDGPU::S_CMP_EQ_U32;
 
 1371      return AMDGPU::S_CMP_GT_I32;
 
 1373      return AMDGPU::S_CMP_GE_I32;
 
 1375      return AMDGPU::S_CMP_LT_I32;
 
 1377      return AMDGPU::S_CMP_LE_I32;
 
 1379      return AMDGPU::S_CMP_GT_U32;
 
 1381      return AMDGPU::S_CMP_GE_U32;
 
 1383      return AMDGPU::S_CMP_LT_U32;
 
 1385      return AMDGPU::S_CMP_LE_U32;
 
 1387      return AMDGPU::S_CMP_EQ_F32;
 
 1389      return AMDGPU::S_CMP_GT_F32;
 
 1391      return AMDGPU::S_CMP_GE_F32;
 
 1393      return AMDGPU::S_CMP_LT_F32;
 
 1395      return AMDGPU::S_CMP_LE_F32;
 
 1397      return AMDGPU::S_CMP_LG_F32;
 
 1399      return AMDGPU::S_CMP_O_F32;
 
 1401      return AMDGPU::S_CMP_U_F32;
 
 1403      return AMDGPU::S_CMP_NLG_F32;
 
 1405      return AMDGPU::S_CMP_NLE_F32;
 
 1407      return AMDGPU::S_CMP_NLT_F32;
 
 1409      return AMDGPU::S_CMP_NGE_F32;
 
 1411      return AMDGPU::S_CMP_NGT_F32;
 
 1413      return AMDGPU::S_CMP_NEQ_F32;
 
 1420    if (!STI.hasSALUFloatInsts())
 
 1425      return AMDGPU::S_CMP_EQ_F16;
 
 1427      return AMDGPU::S_CMP_GT_F16;
 
 1429      return AMDGPU::S_CMP_GE_F16;
 
 1431      return AMDGPU::S_CMP_LT_F16;
 
 1433      return AMDGPU::S_CMP_LE_F16;
 
 1435      return AMDGPU::S_CMP_LG_F16;
 
 1437      return AMDGPU::S_CMP_O_F16;
 
 1439      return AMDGPU::S_CMP_U_F16;
 
 1441      return AMDGPU::S_CMP_NLG_F16;
 
 1443      return AMDGPU::S_CMP_NLE_F16;
 
 1445      return AMDGPU::S_CMP_NLT_F16;
 
 1447      return AMDGPU::S_CMP_NGE_F16;
 
 1449      return AMDGPU::S_CMP_NGT_F16;
 
 1451      return AMDGPU::S_CMP_NEQ_F16;
 
 1460bool AMDGPUInstructionSelector::selectG_ICMP_or_FCMP(
MachineInstr &
I)
 const {
 
 1465  Register SrcReg = 
I.getOperand(2).getReg();
 
 1466  unsigned Size = RBI.getSizeInBits(SrcReg, *MRI, TRI);
 
 1470  Register CCReg = 
I.getOperand(0).getReg();
 
 1471  if (!isVCC(CCReg, *MRI)) {
 
 1472    int Opcode = getS_CMPOpcode(Pred, 
Size);
 
 1475    MachineInstr *ICmp = 
BuildMI(*BB, &
I, 
DL, TII.get(Opcode))
 
 1476            .
add(
I.getOperand(2))
 
 1477            .
add(
I.getOperand(3));
 
 1478    BuildMI(*BB, &
I, 
DL, TII.get(AMDGPU::COPY), CCReg)
 
 1482        RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, *MRI);
 
 1483    I.eraseFromParent();
 
 1487  if (
I.getOpcode() == AMDGPU::G_FCMP)
 
 1494  MachineInstrBuilder ICmp;
 
 1497    ICmp = 
BuildMI(*BB, &
I, 
DL, TII.get(Opcode), 
I.getOperand(0).getReg())
 
 1499               .
add(
I.getOperand(2))
 
 1501               .
add(
I.getOperand(3))
 
 1504    ICmp = 
BuildMI(*BB, &
I, 
DL, TII.get(Opcode), 
I.getOperand(0).getReg())
 
 1505               .
add(
I.getOperand(2))
 
 1506               .
add(
I.getOperand(3));
 
 1510                               *TRI.getBoolRC(), *MRI);
 
 1512  I.eraseFromParent();
 
 1516bool AMDGPUInstructionSelector::selectIntrinsicCmp(
MachineInstr &
I)
 const {
 
 1517  Register Dst = 
I.getOperand(0).getReg();
 
 1518  if (isVCC(Dst, *MRI))
 
 1521  LLT DstTy = MRI->getType(Dst);
 
 1527  Register SrcReg = 
I.getOperand(2).getReg();
 
 1528  unsigned Size = RBI.getSizeInBits(SrcReg, *MRI, TRI);
 
 1536    BuildMI(*BB, &
I, 
DL, TII.get(AMDGPU::IMPLICIT_DEF), Dst);
 
 1537    I.eraseFromParent();
 
 1538    return RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI);
 
 1545  MachineInstrBuilder SelectedMI;
 
 1546  MachineOperand &
LHS = 
I.getOperand(2);
 
 1547  MachineOperand &
RHS = 
I.getOperand(3);
 
 1548  auto [Src0, Src0Mods] = selectVOP3ModsImpl(
LHS.getReg());
 
 1549  auto [Src1, Src1Mods] = selectVOP3ModsImpl(
RHS.getReg());
 
 1551      copyToVGPRIfSrcFolded(Src0, Src0Mods, 
LHS, &
I,  
true);
 
 1553      copyToVGPRIfSrcFolded(Src1, Src1Mods, 
RHS, &
I,  
true);
 
 1554  SelectedMI = 
BuildMI(*BB, &
I, 
DL, TII.get(Opcode), Dst);
 
 1556    SelectedMI.
addImm(Src0Mods);
 
 1557  SelectedMI.
addReg(Src0Reg);
 
 1559    SelectedMI.
addImm(Src1Mods);
 
 1560  SelectedMI.
addReg(Src1Reg);
 
 1566  RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI);
 
 1570  I.eraseFromParent();
 
 1581  if (
MI->getParent() != 
MBB)
 
 1585  if (
MI->getOpcode() == AMDGPU::COPY) {
 
 1586    auto DstRB = 
MRI.getRegBankOrNull(
MI->getOperand(0).getReg());
 
 1587    auto SrcRB = 
MRI.getRegBankOrNull(
MI->getOperand(1).getReg());
 
 1588    if (DstRB && SrcRB && DstRB->
getID() == AMDGPU::VCCRegBankID &&
 
 1589        SrcRB->getID() == AMDGPU::SGPRRegBankID)
 
 
 1606bool AMDGPUInstructionSelector::selectBallot(
MachineInstr &
I)
 const {
 
 1609  Register DstReg = 
I.getOperand(0).getReg();
 
 1610  Register SrcReg = 
I.getOperand(2).getReg();
 
 1611  const unsigned BallotSize = MRI->getType(DstReg).getSizeInBits();
 
 1612  const unsigned WaveSize = STI.getWavefrontSize();
 
 1616  if (BallotSize != WaveSize && (BallotSize != 64 || WaveSize != 32))
 
 1619  std::optional<ValueAndVReg> Arg =
 
 1624  if (BallotSize != WaveSize) {
 
 1625    Dst = MRI->createVirtualRegister(TRI.getBoolRC());
 
 1629    const int64_t 
Value = Arg->Value.getZExtValue();
 
 1632      unsigned Opcode = WaveSize == 64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
 
 1639    if (!RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI))
 
 1645      if (!RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI))
 
 1649      unsigned AndOpc = WaveSize == 64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
 
 1660  if (BallotSize != WaveSize) {
 
 1661    Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
 
 1663    BuildMI(*BB, &
I, 
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
 
 1670  I.eraseFromParent();
 
 1674bool AMDGPUInstructionSelector::selectRelocConstant(
MachineInstr &
I)
 const {
 
 1675  Register DstReg = 
I.getOperand(0).getReg();
 
 1676  const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
 
 1677  const TargetRegisterClass *DstRC = TRI.getRegClassForSizeOnBank(32, *DstBank);
 
 1678  if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
 
 1681  const bool IsVALU = DstBank->
getID() == AMDGPU::VGPRRegBankID;
 
 1683  Module *
M = 
MF->getFunction().getParent();
 
 1684  const MDNode *
Metadata = 
I.getOperand(2).getMetadata();
 
 1691          TII.get(IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32), DstReg)
 
 1694  I.eraseFromParent();
 
 1698bool AMDGPUInstructionSelector::selectGroupStaticSize(
MachineInstr &
I)
 const {
 
 1701  Register DstReg = 
I.getOperand(0).getReg();
 
 1702  const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
 
 1703  unsigned Mov = DstRB->
getID() == AMDGPU::SGPRRegBankID ?
 
 1704    AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
 
 1712    const SIMachineFunctionInfo *MFI = 
MF->getInfo<SIMachineFunctionInfo>();
 
 1715    Module *
M = 
MF->getFunction().getParent();
 
 1716    const GlobalValue *GV =
 
 1721  I.eraseFromParent();
 
 1725bool AMDGPUInstructionSelector::selectReturnAddress(
MachineInstr &
I)
 const {
 
 1730  MachineOperand &Dst = 
I.getOperand(0);
 
 1732  unsigned Depth = 
I.getOperand(2).getImm();
 
 1734  const TargetRegisterClass *RC
 
 1735    = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
 
 1737      !RBI.constrainGenericRegister(DstReg, *RC, *MRI))
 
 1742      MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction()) {
 
 1745    I.eraseFromParent();
 
 1749  MachineFrameInfo &MFI = 
MF.getFrameInfo();
 
 1754  Register ReturnAddrReg = TRI.getReturnAddressReg(
MF);
 
 1756                                             AMDGPU::SReg_64RegClass, 
DL);
 
 1759  I.eraseFromParent();
 
 1763bool AMDGPUInstructionSelector::selectEndCfIntrinsic(
MachineInstr &
MI)
 const {
 
 1766  MachineBasicBlock *BB = 
MI.getParent();
 
 1767  BuildMI(*BB, &
MI, 
MI.getDebugLoc(), TII.get(AMDGPU::SI_END_CF))
 
 1768      .
add(
MI.getOperand(1));
 
 1771  MI.eraseFromParent();
 
 1773  if (!MRI->getRegClassOrNull(
Reg))
 
 1774    MRI->setRegClass(
Reg, TRI.getWaveMaskRegClass());
 
 1778bool AMDGPUInstructionSelector::selectDSOrderedIntrinsic(
 
 1780  MachineBasicBlock *
MBB = 
MI.getParent();
 
 1784  unsigned IndexOperand = 
MI.getOperand(7).getImm();
 
 1785  bool WaveRelease = 
MI.getOperand(8).getImm() != 0;
 
 1786  bool WaveDone = 
MI.getOperand(9).getImm() != 0;
 
 1788  if (WaveDone && !WaveRelease) {
 
 1792        Fn, 
"ds_ordered_count: wave_done requires wave_release", 
DL));
 
 1795  unsigned OrderedCountIndex = IndexOperand & 0x3f;
 
 1796  IndexOperand &= ~0x3f;
 
 1797  unsigned CountDw = 0;
 
 1800    CountDw = (IndexOperand >> 24) & 0xf;
 
 1801    IndexOperand &= ~(0xf << 24);
 
 1803    if (CountDw < 1 || CountDw > 4) {
 
 1806          Fn, 
"ds_ordered_count: dword count must be between 1 and 4", 
DL));
 
 1814        Fn, 
"ds_ordered_count: bad index operand", 
DL));
 
 1817  unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1;
 
 1820  unsigned Offset0 = OrderedCountIndex << 2;
 
 1821  unsigned Offset1 = WaveRelease | (WaveDone << 1) | (Instruction << 4);
 
 1824    Offset1 |= (CountDw - 1) << 6;
 
 1827    Offset1 |= ShaderType << 2;
 
 1829  unsigned Offset = Offset0 | (Offset1 << 8);
 
 1837  MachineInstrBuilder 
DS =
 
 1838    BuildMI(*
MBB, &
MI, 
DL, TII.get(AMDGPU::DS_ORDERED_COUNT), DstReg)
 
 1843  if (!RBI.constrainGenericRegister(M0Val, AMDGPU::SReg_32RegClass, *MRI))
 
 1847  MI.eraseFromParent();
 
 1853  case Intrinsic::amdgcn_ds_gws_init:
 
 1854    return AMDGPU::DS_GWS_INIT;
 
 1855  case Intrinsic::amdgcn_ds_gws_barrier:
 
 1856    return AMDGPU::DS_GWS_BARRIER;
 
 1857  case Intrinsic::amdgcn_ds_gws_sema_v:
 
 1858    return AMDGPU::DS_GWS_SEMA_V;
 
 1859  case Intrinsic::amdgcn_ds_gws_sema_br:
 
 1860    return AMDGPU::DS_GWS_SEMA_BR;
 
 1861  case Intrinsic::amdgcn_ds_gws_sema_p:
 
 1862    return AMDGPU::DS_GWS_SEMA_P;
 
 1863  case Intrinsic::amdgcn_ds_gws_sema_release_all:
 
 1864    return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
 
 
 1870bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(
MachineInstr &
MI,
 
 1872  if (!STI.hasGWS() || (IID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
 
 1873                        !STI.hasGWSSemaReleaseAll()))
 
 1877  const bool HasVSrc = 
MI.getNumOperands() == 3;
 
 1878  assert(HasVSrc || 
MI.getNumOperands() == 2);
 
 1880  Register BaseOffset = 
MI.getOperand(HasVSrc ? 2 : 1).getReg();
 
 1881  const RegisterBank *OffsetRB = RBI.getRegBank(BaseOffset, *MRI, TRI);
 
 1882  if (OffsetRB->
getID() != AMDGPU::SGPRRegBankID)
 
 1888  MachineBasicBlock *
MBB = 
MI.getParent();
 
 1891  MachineInstr *Readfirstlane = 
nullptr;
 
 1896  if (OffsetDef->
getOpcode() == AMDGPU::V_READFIRSTLANE_B32) {
 
 1897    Readfirstlane = OffsetDef;
 
 1902  if (OffsetDef->
getOpcode() == AMDGPU::G_CONSTANT) {
 
 1912    std::tie(BaseOffset, ImmOffset) =
 
 1915    if (Readfirstlane) {
 
 1918      if (!RBI.constrainGenericRegister(BaseOffset, AMDGPU::VGPR_32RegClass, *MRI))
 
 1924      if (!RBI.constrainGenericRegister(BaseOffset,
 
 1925                                        AMDGPU::SReg_32RegClass, *MRI))
 
 1929    Register M0Base = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
 
 1948    if (!RBI.constrainGenericRegister(VSrc, AMDGPU::VGPR_32RegClass, *MRI))
 
 1955  TII.enforceOperandRCAlignment(*MIB, AMDGPU::OpName::data0);
 
 1957  MI.eraseFromParent();
 
 1961bool AMDGPUInstructionSelector::selectDSAppendConsume(
MachineInstr &
MI,
 
 1962                                                      bool IsAppend)
 const {
 
 1963  Register PtrBase = 
MI.getOperand(2).getReg();
 
 1964  LLT PtrTy = MRI->getType(PtrBase);
 
 1968  std::tie(PtrBase, 
Offset) = selectDS1Addr1OffsetImpl(
MI.getOperand(2));
 
 1971  if (!isDSOffsetLegal(PtrBase, 
Offset)) {
 
 1972    PtrBase = 
MI.getOperand(2).getReg();
 
 1976  MachineBasicBlock *
MBB = 
MI.getParent();
 
 1978  const unsigned Opc = IsAppend ? AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
 
 1982  if (!RBI.constrainGenericRegister(PtrBase, AMDGPU::SReg_32RegClass, *MRI))
 
 1989  MI.eraseFromParent();
 
 1993bool AMDGPUInstructionSelector::selectInitWholeWave(
MachineInstr &
MI)
 const {
 
 1994  MachineFunction *
MF = 
MI.getParent()->getParent();
 
 1995  SIMachineFunctionInfo *MFInfo = 
MF->getInfo<SIMachineFunctionInfo>();
 
 2006  TFE = TexFailCtrl & 0x1;
 
 2008  LWE = TexFailCtrl & 0x2;
 
 2011  return TexFailCtrl == 0;
 
 
 2014bool AMDGPUInstructionSelector::selectImageIntrinsic(
 
 2016  MachineBasicBlock *
MBB = 
MI.getParent();
 
 2022    Register ResultDef = 
MI.getOperand(0).getReg();
 
 2023    if (MRI->use_nodbg_empty(ResultDef))
 
 2027  const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
 
 2035  const unsigned ArgOffset = 
MI.getNumExplicitDefs() + 1;
 
 2037  Register VDataIn = AMDGPU::NoRegister;
 
 2038  Register VDataOut = AMDGPU::NoRegister;
 
 2040  int NumVDataDwords = -1;
 
 2041  bool IsD16 = 
MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16 ||
 
 2042               MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16;
 
 2048    Unorm = 
MI.getOperand(ArgOffset + Intr->
UnormIndex).getImm() != 0;
 
 2052  bool IsTexFail = 
false;
 
 2054                    TFE, LWE, IsTexFail))
 
 2057  const int Flags = 
MI.getOperand(ArgOffset + Intr->
NumArgs).getImm();
 
 2058  const bool IsA16 = (
Flags & 1) != 0;
 
 2059  const bool IsG16 = (
Flags & 2) != 0;
 
 2062  if (IsA16 && !STI.hasG16() && !IsG16)
 
 2066  unsigned DMaskLanes = 0;
 
 2068  if (BaseOpcode->
Atomic) {
 
 2070      VDataOut = 
MI.getOperand(0).getReg();
 
 2071    VDataIn = 
MI.getOperand(2).getReg();
 
 2072    LLT Ty = MRI->getType(VDataIn);
 
 2075    const bool Is64Bit = BaseOpcode->
AtomicX2 ?
 
 2080      assert(
MI.getOperand(3).getReg() == AMDGPU::NoRegister);
 
 2082      DMask = Is64Bit ? 0xf : 0x3;
 
 2083      NumVDataDwords = Is64Bit ? 4 : 2;
 
 2085      DMask = Is64Bit ? 0x3 : 0x1;
 
 2086      NumVDataDwords = Is64Bit ? 2 : 1;
 
 2089    DMask = 
MI.getOperand(ArgOffset + Intr->
DMaskIndex).getImm();
 
 2092    if (BaseOpcode->
Store) {
 
 2093      VDataIn = 
MI.getOperand(1).getReg();
 
 2094      VDataTy = MRI->getType(VDataIn);
 
 2099      VDataOut = 
MI.getOperand(0).getReg();
 
 2100      VDataTy = MRI->getType(VDataOut);
 
 2101      NumVDataDwords = DMaskLanes;
 
 2103      if (IsD16 && !STI.hasUnpackedD16VMem())
 
 2104        NumVDataDwords = (DMaskLanes + 1) / 2;
 
 2109  if (Subtarget->hasG16() && IsG16) {
 
 2110    const AMDGPU::MIMGG16MappingInfo *G16MappingInfo =
 
 2113    IntrOpcode = G16MappingInfo->
G16; 
 
 2117  assert((!IsTexFail || DMaskLanes >= 1) && 
"should have legalized this");
 
 2127  int NumVAddrRegs = 0;
 
 2128  int NumVAddrDwords = 0;
 
 2131    MachineOperand &AddrOp = 
MI.getOperand(ArgOffset + 
I);
 
 2132    if (!AddrOp.
isReg())
 
 2140    NumVAddrDwords += (MRI->getType(Addr).getSizeInBits() + 31) / 32;
 
 2147      NumVAddrRegs != 1 &&
 
 2148      (STI.hasPartialNSAEncoding() ? NumVAddrDwords >= NumVAddrRegs
 
 2149                                   : NumVAddrDwords == NumVAddrRegs);
 
 2150  if (UseNSA && !STI.hasFeature(AMDGPU::FeatureNSAEncoding)) {
 
 2161                                   NumVDataDwords, NumVAddrDwords);
 
 2162  } 
else if (IsGFX11Plus) {
 
 2164                                   UseNSA ? AMDGPU::MIMGEncGfx11NSA
 
 2165                                          : AMDGPU::MIMGEncGfx11Default,
 
 2166                                   NumVDataDwords, NumVAddrDwords);
 
 2167  } 
else if (IsGFX10Plus) {
 
 2169                                   UseNSA ? AMDGPU::MIMGEncGfx10NSA
 
 2170                                          : AMDGPU::MIMGEncGfx10Default,
 
 2171                                   NumVDataDwords, NumVAddrDwords);
 
 2173    if (Subtarget->hasGFX90AInsts()) {
 
 2175                                     NumVDataDwords, NumVAddrDwords);
 
 2179            << 
"requested image instruction is not supported on this GPU\n");
 
 2186                                     NumVDataDwords, NumVAddrDwords);
 
 2189                                     NumVDataDwords, NumVAddrDwords);
 
 2199      const bool Is64 = MRI->getType(VDataOut).getSizeInBits() == 64;
 
 2201      Register TmpReg = MRI->createVirtualRegister(
 
 2202        Is64 ? &AMDGPU::VReg_128RegClass : &AMDGPU::VReg_64RegClass);
 
 2203      unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
 
 2206      if (!MRI->use_empty(VDataOut)) {
 
 2219  for (
int I = 0; 
I != NumVAddrRegs; ++
I) {
 
 2220    MachineOperand &SrcOp = 
MI.getOperand(ArgOffset + Intr->
VAddrStart + 
I);
 
 2221    if (SrcOp.
isReg()) {
 
 2240             STI.hasFeature(AMDGPU::FeatureR128A16) ? -1 : 0);
 
 2242    MIB.
addImm(IsA16 ? -1 : 0);
 
 2244  if (!Subtarget->hasGFX90AInsts()) {
 
 2256    MIB.
addImm(IsD16 ? -1 : 0);
 
 2258  MI.eraseFromParent();
 
 2260  TII.enforceOperandRCAlignment(*MIB, AMDGPU::OpName::vaddr);
 
 2266bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic(
 
 2272  MachineBasicBlock *
MBB = 
MI.getParent();
 
 2277  unsigned Offset = 
MI.getOperand(6).getImm();
 
 2281  case Intrinsic::amdgcn_ds_bvh_stack_rtn:
 
 2282  case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
 
 2283    Opc = AMDGPU::DS_BVH_STACK_RTN_B32;
 
 2285  case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
 
 2286    Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP1_RTN_B32;
 
 2288  case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
 
 2289    Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP2_RTN_B64;
 
 2301  MI.eraseFromParent();
 
 2305bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
 
 2308  switch (IntrinsicID) {
 
 2309  case Intrinsic::amdgcn_end_cf:
 
 2310    return selectEndCfIntrinsic(
I);
 
 2311  case Intrinsic::amdgcn_ds_ordered_add:
 
 2312  case Intrinsic::amdgcn_ds_ordered_swap:
 
 2313    return selectDSOrderedIntrinsic(
I, IntrinsicID);
 
 2314  case Intrinsic::amdgcn_ds_gws_init:
 
 2315  case Intrinsic::amdgcn_ds_gws_barrier:
 
 2316  case Intrinsic::amdgcn_ds_gws_sema_v:
 
 2317  case Intrinsic::amdgcn_ds_gws_sema_br:
 
 2318  case Intrinsic::amdgcn_ds_gws_sema_p:
 
 2319  case Intrinsic::amdgcn_ds_gws_sema_release_all:
 
 2320    return selectDSGWSIntrinsic(
I, IntrinsicID);
 
 2321  case Intrinsic::amdgcn_ds_append:
 
 2322    return selectDSAppendConsume(
I, 
true);
 
 2323  case Intrinsic::amdgcn_ds_consume:
 
 2324    return selectDSAppendConsume(
I, 
false);
 
 2325  case Intrinsic::amdgcn_init_whole_wave:
 
 2326    return selectInitWholeWave(
I);
 
 2327  case Intrinsic::amdgcn_raw_buffer_load_lds:
 
 2328  case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
 
 2329  case Intrinsic::amdgcn_struct_buffer_load_lds:
 
 2330  case Intrinsic::amdgcn_struct_ptr_buffer_load_lds:
 
 2331    return selectBufferLoadLds(
I);
 
 2336  case Intrinsic::amdgcn_load_to_lds:
 
 2337  case Intrinsic::amdgcn_global_load_lds:
 
 2338    return selectGlobalLoadLds(
I);
 
 2339  case Intrinsic::amdgcn_exp_compr:
 
 2340    if (!STI.hasCompressedExport()) {
 
 2342      F.getContext().diagnose(
 
 2343          DiagnosticInfoUnsupported(
F, 
"intrinsic not supported on subtarget",
 
 2348  case Intrinsic::amdgcn_ds_bvh_stack_rtn:
 
 2349  case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
 
 2350  case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
 
 2351  case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
 
 2352    return selectDSBvhStackIntrinsic(
I);
 
 2353  case Intrinsic::amdgcn_s_barrier_init:
 
 2354  case Intrinsic::amdgcn_s_barrier_signal_var:
 
 2355    return selectNamedBarrierInit(
I, IntrinsicID);
 
 2356  case Intrinsic::amdgcn_s_barrier_join:
 
 2357  case Intrinsic::amdgcn_s_get_named_barrier_state:
 
 2358    return selectNamedBarrierInst(
I, IntrinsicID);
 
 2359  case Intrinsic::amdgcn_s_get_barrier_state:
 
 2360    return selectSGetBarrierState(
I, IntrinsicID);
 
 2361  case Intrinsic::amdgcn_s_barrier_signal_isfirst:
 
 2362    return selectSBarrierSignalIsfirst(
I, IntrinsicID);
 
 2367bool AMDGPUInstructionSelector::selectG_SELECT(
MachineInstr &
I)
 const {
 
 2374  Register DstReg = 
I.getOperand(0).getReg();
 
 2375  unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI);
 
 2377  const MachineOperand &CCOp = 
I.getOperand(1);
 
 2379  if (!isVCC(CCReg, *MRI)) {
 
 2380    unsigned SelectOpcode = 
Size == 64 ? AMDGPU::S_CSELECT_B64 :
 
 2381                                         AMDGPU::S_CSELECT_B32;
 
 2382    MachineInstr *CopySCC = 
BuildMI(*BB, &
I, 
DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
 
 2388    if (!MRI->getRegClassOrNull(CCReg))
 
 2389        MRI->setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, *MRI));
 
 2391            .
add(
I.getOperand(2))
 
 2392            .
add(
I.getOperand(3));
 
 2397    I.eraseFromParent();
 
 2406      BuildMI(*BB, &
I, 
DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
 
 2408              .
add(
I.getOperand(3))
 
 2410              .
add(
I.getOperand(2))
 
 2411              .
add(
I.getOperand(1));
 
 2414  I.eraseFromParent();
 
 2418bool AMDGPUInstructionSelector::selectG_TRUNC(
MachineInstr &
I)
 const {
 
 2419  Register DstReg = 
I.getOperand(0).getReg();
 
 2420  Register SrcReg = 
I.getOperand(1).getReg();
 
 2421  const LLT DstTy = MRI->getType(DstReg);
 
 2422  const LLT SrcTy = MRI->getType(SrcReg);
 
 2425  const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
 
 2426  const RegisterBank *DstRB;
 
 2432    DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
 
 2437  const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
 
 2442  const TargetRegisterClass *SrcRC =
 
 2443      TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB);
 
 2444  const TargetRegisterClass *DstRC =
 
 2445      TRI.getRegClassForSizeOnBank(DstSize, *DstRB);
 
 2446  if (!SrcRC || !DstRC)
 
 2449  if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
 
 2450      !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI)) {
 
 2455  if (DstRC == &AMDGPU::VGPR_16RegClass && SrcSize == 32) {
 
 2456    assert(STI.useRealTrue16Insts());
 
 2460        .
addReg(SrcReg, 0, AMDGPU::lo16);
 
 2461    I.eraseFromParent();
 
 2469    Register LoReg = MRI->createVirtualRegister(DstRC);
 
 2470    Register HiReg = MRI->createVirtualRegister(DstRC);
 
 2472      .
addReg(SrcReg, 0, AMDGPU::sub0);
 
 2474      .
addReg(SrcReg, 0, AMDGPU::sub1);
 
 2476    if (IsVALU && STI.hasSDWA()) {
 
 2479      MachineInstr *MovSDWA =
 
 2480        BuildMI(*
MBB, 
I, 
DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg)
 
 2490      Register TmpReg0 = MRI->createVirtualRegister(DstRC);
 
 2491      Register TmpReg1 = MRI->createVirtualRegister(DstRC);
 
 2492      Register ImmReg = MRI->createVirtualRegister(DstRC);
 
 2494        BuildMI(*
MBB, 
I, 
DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), TmpReg0)
 
 2504      unsigned MovOpc = IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
 
 2505      unsigned AndOpc = IsVALU ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
 
 2506      unsigned OrOpc = IsVALU ? AMDGPU::V_OR_B32_e64 : AMDGPU::S_OR_B32;
 
 2518        And.setOperandDead(3); 
 
 2519        Or.setOperandDead(3); 
 
 2523    I.eraseFromParent();
 
 2531    unsigned SubRegIdx = DstSize < 32
 
 2532                             ? 
static_cast<unsigned>(AMDGPU::sub0)
 
 2533                             : TRI.getSubRegFromChannel(0, DstSize / 32);
 
 2534    if (SubRegIdx == AMDGPU::NoSubRegister)
 
 2539    const TargetRegisterClass *SrcWithSubRC
 
 2540      = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
 
 2544    if (SrcWithSubRC != SrcRC) {
 
 2545      if (!RBI.constrainGenericRegister(SrcReg, *SrcWithSubRC, *MRI))
 
 2549    I.getOperand(1).setSubReg(SubRegIdx);
 
 2552  I.setDesc(TII.get(TargetOpcode::COPY));
 
 2559  int SignedMask = 
static_cast<int>(Mask);
 
 2560  return SignedMask >= -16 && SignedMask <= 64;
 
 
 2564const RegisterBank *AMDGPUInstructionSelector::getArtifactRegBank(
 
 2573    return &RBI.getRegBankFromRegClass(*RC, LLT());
 
 2577bool AMDGPUInstructionSelector::selectG_SZA_EXT(
MachineInstr &
I)
 const {
 
 2578  bool InReg = 
I.getOpcode() == AMDGPU::G_SEXT_INREG;
 
 2579  bool Signed = 
I.getOpcode() == AMDGPU::G_SEXT || InReg;
 
 2582  const Register DstReg = 
I.getOperand(0).getReg();
 
 2583  const Register SrcReg = 
I.getOperand(1).getReg();
 
 2585  const LLT DstTy = MRI->getType(DstReg);
 
 2586  const LLT SrcTy = MRI->getType(SrcReg);
 
 2587  const unsigned SrcSize = 
I.getOpcode() == AMDGPU::G_SEXT_INREG ?
 
 2594  const RegisterBank *SrcBank = getArtifactRegBank(SrcReg, *MRI, TRI);
 
 2597  if (
I.getOpcode() == AMDGPU::G_ANYEXT) {
 
 2599      return selectCOPY(
I);
 
 2601    const TargetRegisterClass *SrcRC =
 
 2602        TRI.getRegClassForTypeOnBank(SrcTy, *SrcBank);
 
 2603    const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
 
 2604    const TargetRegisterClass *DstRC =
 
 2605        TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
 
 2607    Register UndefReg = MRI->createVirtualRegister(SrcRC);
 
 2608    BuildMI(
MBB, 
I, 
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
 
 2614    I.eraseFromParent();
 
 2616    return RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) &&
 
 2617           RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI);
 
 2620  if (SrcBank->
getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
 
 2626      MachineInstr *ExtI =
 
 2630      I.eraseFromParent();
 
 2634    const unsigned BFE = 
Signed ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
 
 2635    MachineInstr *ExtI =
 
 2640    I.eraseFromParent();
 
 2644  if (SrcBank->
getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
 
 2645    const TargetRegisterClass &SrcRC = InReg && DstSize > 32 ?
 
 2646      AMDGPU::SReg_64RegClass : AMDGPU::SReg_32RegClass;
 
 2647    if (!RBI.constrainGenericRegister(SrcReg, SrcRC, *MRI))
 
 2650    if (
Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
 
 2651      const unsigned SextOpc = SrcSize == 8 ?
 
 2652        AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
 
 2655      I.eraseFromParent();
 
 2656      return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
 
 2661    if (DstSize > 32 && SrcSize == 32) {
 
 2662      Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
 
 2663      unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
 
 2678      I.eraseFromParent();
 
 2679      return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass,
 
 2683    const unsigned BFE64 = 
Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
 
 2684    const unsigned BFE32 = 
Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
 
 2687    if (DstSize > 32 && (SrcSize <= 32 || InReg)) {
 
 2689      Register ExtReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
 
 2690      Register UndefReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
 
 2691      unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
 
 2693      BuildMI(
MBB, 
I, 
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
 
 2704      I.eraseFromParent();
 
 2705      return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, *MRI);
 
 2720    I.eraseFromParent();
 
 2721    return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
 
 2755  if (Shuffle->
getOpcode() != AMDGPU::G_SHUFFLE_VECTOR)
 
 2762  assert(Mask.size() == 2);
 
 2764  if (Mask[0] == 1 && Mask[1] <= 1) {
 
 
 2772bool AMDGPUInstructionSelector::selectG_FPEXT(
MachineInstr &
I)
 const {
 
 2773  if (!Subtarget->hasSALUFloatInsts())
 
 2776  Register Dst = 
I.getOperand(0).getReg();
 
 2777  const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
 
 2778  if (DstRB->
getID() != AMDGPU::SGPRRegBankID)
 
 2781  Register Src = 
I.getOperand(1).getReg();
 
 2787      BuildMI(*BB, &
I, 
I.getDebugLoc(), TII.get(AMDGPU::S_CVT_HI_F32_F16), Dst)
 
 2789      I.eraseFromParent();
 
 2790      return RBI.constrainGenericRegister(Dst, AMDGPU::SReg_32RegClass, *MRI);
 
 2797bool AMDGPUInstructionSelector::selectG_FNEG(
MachineInstr &
MI)
 const {
 
 2810  const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
 
 2811  if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
 
 2816  MachineInstr *Fabs = 
getOpcodeDef(TargetOpcode::G_FABS, Src, *MRI);
 
 2820  if (!RBI.constrainGenericRegister(Src, AMDGPU::SReg_64RegClass, *MRI) ||
 
 2821      !RBI.constrainGenericRegister(Dst, AMDGPU::SReg_64RegClass, *MRI))
 
 2824  MachineBasicBlock *BB = 
MI.getParent();
 
 2826  Register LoReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
 
 2827  Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
 
 2828  Register ConstReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
 
 2829  Register OpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
 
 2831  BuildMI(*BB, &
MI, 
DL, TII.get(AMDGPU::COPY), LoReg)
 
 2832    .
addReg(Src, 0, AMDGPU::sub0);
 
 2833  BuildMI(*BB, &
MI, 
DL, TII.get(AMDGPU::COPY), HiReg)
 
 2834    .
addReg(Src, 0, AMDGPU::sub1);
 
 2835  BuildMI(*BB, &
MI, 
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
 
 2839  unsigned Opc = Fabs ? AMDGPU::S_OR_B32 : AMDGPU::S_XOR_B32;
 
 2844  BuildMI(*BB, &
MI, 
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
 
 2849  MI.eraseFromParent();
 
 2854bool AMDGPUInstructionSelector::selectG_FABS(
MachineInstr &
MI)
 const {
 
 2856  const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
 
 2857  if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
 
 2862  MachineBasicBlock *BB = 
MI.getParent();
 
 2864  Register LoReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
 
 2865  Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
 
 2866  Register ConstReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
 
 2867  Register OpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
 
 2869  if (!RBI.constrainGenericRegister(Src, AMDGPU::SReg_64RegClass, *MRI) ||
 
 2870      !RBI.constrainGenericRegister(Dst, AMDGPU::SReg_64RegClass, *MRI))
 
 2873  BuildMI(*BB, &
MI, 
DL, TII.get(AMDGPU::COPY), LoReg)
 
 2874    .
addReg(Src, 0, AMDGPU::sub0);
 
 2875  BuildMI(*BB, &
MI, 
DL, TII.get(AMDGPU::COPY), HiReg)
 
 2876    .
addReg(Src, 0, AMDGPU::sub1);
 
 2877  BuildMI(*BB, &
MI, 
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
 
 2882  BuildMI(*BB, &
MI, 
DL, TII.get(AMDGPU::S_AND_B32), OpReg)
 
 2886  BuildMI(*BB, &
MI, 
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
 
 2892  MI.eraseFromParent();
 
 2897  return MI.getOpcode() == TargetOpcode::G_CONSTANT;
 
 
 2900void AMDGPUInstructionSelector::getAddrModeInfo(
const MachineInstr &Load,
 
 2903  unsigned OpNo = 
Load.getOpcode() == AMDGPU::G_PREFETCH ? 0 : 1;
 
 2904  const MachineInstr *PtrMI =
 
 2905      MRI.getUniqueVRegDef(
Load.getOperand(OpNo).getReg());
 
 2909  if (PtrMI->
getOpcode() != TargetOpcode::G_PTR_ADD)
 
 2914  for (
unsigned i = 1; i != 3; ++i) {
 
 2915    const MachineOperand &GEPOp = PtrMI->
getOperand(i);
 
 2916    const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.
getReg());
 
 2921      assert(GEPInfo.Imm == 0);
 
 2925    const RegisterBank *OpBank = RBI.getRegBank(GEPOp.
getReg(), MRI, TRI);
 
 2926    if (OpBank->
getID() == AMDGPU::SGPRRegBankID)
 
 2927      GEPInfo.SgprParts.push_back(GEPOp.
getReg());
 
 2929      GEPInfo.VgprParts.push_back(GEPOp.
getReg());
 
 2933  getAddrModeInfo(*PtrMI, MRI, AddrInfo);
 
 2936bool AMDGPUInstructionSelector::isSGPR(
Register Reg)
 const {
 
 2937  return RBI.getRegBank(
Reg, *MRI, TRI)->getID() == AMDGPU::SGPRRegBankID;
 
 2940bool AMDGPUInstructionSelector::isInstrUniform(
const MachineInstr &
MI)
 const {
 
 2941  if (!
MI.hasOneMemOperand())
 
 2944  const MachineMemOperand *MMO = *
MI.memoperands_begin();
 
 2957  if (
MI.getOpcode() == AMDGPU::G_PREFETCH)
 
 2958    return RBI.getRegBank(
MI.getOperand(0).getReg(), *MRI, TRI)->getID() ==
 
 2959           AMDGPU::SGPRRegBankID;
 
 2962  return I && 
I->getMetadata(
"amdgpu.uniform");
 
 2966  for (
const GEPInfo &GEPInfo : AddrInfo) {
 
 2967    if (!GEPInfo.VgprParts.empty())
 
 2973void AMDGPUInstructionSelector::initM0(
MachineInstr &
I)
 const {
 
 2974  const LLT PtrTy = MRI->getType(
I.getOperand(1).getReg());
 
 2977      STI.ldsRequiresM0Init()) {
 
 2981    BuildMI(*BB, &
I, 
I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
 
 2986bool AMDGPUInstructionSelector::selectG_LOAD_STORE_ATOMICRMW(
 
 2993  if (
Reg.isPhysical())
 
 2997  const unsigned Opcode = 
MI.getOpcode();
 
 2999  if (Opcode == AMDGPU::COPY)
 
 3002  if (Opcode == AMDGPU::G_AND || Opcode == AMDGPU::G_OR ||
 
 3003      Opcode == AMDGPU::G_XOR)
 
 3008    return GI->is(Intrinsic::amdgcn_class);
 
 3010  return Opcode == AMDGPU::G_ICMP || Opcode == AMDGPU::G_FCMP;
 
 
 3013bool AMDGPUInstructionSelector::selectG_BRCOND(
MachineInstr &
I)
 const {
 
 3015  MachineOperand &CondOp = 
I.getOperand(0);
 
 3021  const TargetRegisterClass *ConstrainRC;
 
 3028  if (!isVCC(CondReg, *MRI)) {
 
 3032    CondPhysReg = AMDGPU::SCC;
 
 3033    BrOpcode = AMDGPU::S_CBRANCH_SCC1;
 
 3034    ConstrainRC = &AMDGPU::SReg_32RegClass;
 
 3041      const bool Is64 = STI.isWave64();
 
 3042      const unsigned Opcode = Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
 
 3043      const Register Exec = Is64 ? AMDGPU::EXEC : AMDGPU::EXEC_LO;
 
 3045      Register TmpReg = MRI->createVirtualRegister(TRI.getBoolRC());
 
 3046      BuildMI(*BB, &
I, 
DL, TII.get(Opcode), TmpReg)
 
 3053    CondPhysReg = TRI.getVCC();
 
 3054    BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
 
 3055    ConstrainRC = TRI.getBoolRC();
 
 3058  if (!MRI->getRegClassOrNull(CondReg))
 
 3059    MRI->setRegClass(CondReg, ConstrainRC);
 
 3061  BuildMI(*BB, &
I, 
DL, TII.get(AMDGPU::COPY), CondPhysReg)
 
 3064    .
addMBB(
I.getOperand(1).getMBB());
 
 3066  I.eraseFromParent();
 
 3070bool AMDGPUInstructionSelector::selectG_GLOBAL_VALUE(
 
 3072  Register DstReg = 
I.getOperand(0).getReg();
 
 3073  const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
 
 3074  const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
 
 3075  I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32));
 
 3079  return RBI.constrainGenericRegister(
 
 3080    DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, *MRI);
 
 3083bool AMDGPUInstructionSelector::selectG_PTRMASK(
MachineInstr &
I)
 const {
 
 3084  Register DstReg = 
I.getOperand(0).getReg();
 
 3085  Register SrcReg = 
I.getOperand(1).getReg();
 
 3086  Register MaskReg = 
I.getOperand(2).getReg();
 
 3087  LLT Ty = MRI->getType(DstReg);
 
 3088  LLT MaskTy = MRI->getType(MaskReg);
 
 3092  const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
 
 3093  const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
 
 3094  const RegisterBank *MaskRB = RBI.getRegBank(MaskReg, *MRI, TRI);
 
 3095  const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
 
 3101  APInt MaskOnes = 
VT->getKnownOnes(MaskReg).zext(64);
 
 3105  const bool CanCopyLow32 = (MaskOnes & MaskLo32) == MaskLo32;
 
 3106  const bool CanCopyHi32 = (MaskOnes & MaskHi32) == MaskHi32;
 
 3109      !CanCopyLow32 && !CanCopyHi32) {
 
 3110    auto MIB = 
BuildMI(*BB, &
I, 
DL, TII.get(AMDGPU::S_AND_B64), DstReg)
 
 3114    I.eraseFromParent();
 
 3118  unsigned NewOpc = IsVGPR ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
 
 3119  const TargetRegisterClass &RegRC
 
 3120    = IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
 
 3122  const TargetRegisterClass *DstRC = TRI.getRegClassForTypeOnBank(Ty, *DstRB);
 
 3123  const TargetRegisterClass *SrcRC = TRI.getRegClassForTypeOnBank(Ty, *SrcRB);
 
 3124  const TargetRegisterClass *MaskRC =
 
 3125      TRI.getRegClassForTypeOnBank(MaskTy, *MaskRB);
 
 3127  if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
 
 3128      !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
 
 3129      !RBI.constrainGenericRegister(MaskReg, *MaskRC, *MRI))
 
 3134           "ptrmask should have been narrowed during legalize");
 
 3136    auto NewOp = 
BuildMI(*BB, &
I, 
DL, TII.get(NewOpc), DstReg)
 
 3142    I.eraseFromParent();
 
 3146  Register HiReg = MRI->createVirtualRegister(&RegRC);
 
 3147  Register LoReg = MRI->createVirtualRegister(&RegRC);
 
 3150  BuildMI(*BB, &
I, 
DL, TII.get(AMDGPU::COPY), LoReg)
 
 3151    .
addReg(SrcReg, 0, AMDGPU::sub0);
 
 3152  BuildMI(*BB, &
I, 
DL, TII.get(AMDGPU::COPY), HiReg)
 
 3153    .
addReg(SrcReg, 0, AMDGPU::sub1);
 
 3162    Register MaskLo = MRI->createVirtualRegister(&RegRC);
 
 3163    MaskedLo = MRI->createVirtualRegister(&RegRC);
 
 3165    BuildMI(*BB, &
I, 
DL, TII.get(AMDGPU::COPY), MaskLo)
 
 3166      .
addReg(MaskReg, 0, AMDGPU::sub0);
 
 3167    BuildMI(*BB, &
I, 
DL, TII.get(NewOpc), MaskedLo)
 
 3176    Register MaskHi = MRI->createVirtualRegister(&RegRC);
 
 3177    MaskedHi = MRI->createVirtualRegister(&RegRC);
 
 3179    BuildMI(*BB, &
I, 
DL, TII.get(AMDGPU::COPY), MaskHi)
 
 3180      .
addReg(MaskReg, 0, AMDGPU::sub1);
 
 3181    BuildMI(*BB, &
I, 
DL, TII.get(NewOpc), MaskedHi)
 
 3186  BuildMI(*BB, &
I, 
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
 
 3191  I.eraseFromParent();
 
 3197static std::pair<Register, unsigned>
 
 3204  std::tie(IdxBaseReg, 
Offset) =
 
 3206  if (IdxBaseReg == AMDGPU::NoRegister) {
 
 3210    IdxBaseReg = IdxReg;
 
 3217  if (
static_cast<unsigned>(
Offset) >= SubRegs.
size())
 
 3218    return std::pair(IdxReg, SubRegs[0]);
 
 3219  return std::pair(IdxBaseReg, SubRegs[
Offset]);
 
 
 3222bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT(
 
 3228  LLT DstTy = MRI->getType(DstReg);
 
 3229  LLT SrcTy = MRI->getType(SrcReg);
 
 3231  const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
 
 3232  const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
 
 3233  const RegisterBank *IdxRB = RBI.getRegBank(IdxReg, *MRI, TRI);
 
 3237  if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
 
 3240  const TargetRegisterClass *SrcRC =
 
 3241      TRI.getRegClassForTypeOnBank(SrcTy, *SrcRB);
 
 3242  const TargetRegisterClass *DstRC =
 
 3243      TRI.getRegClassForTypeOnBank(DstTy, *DstRB);
 
 3244  if (!SrcRC || !DstRC)
 
 3246  if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
 
 3247      !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
 
 3248      !RBI.constrainGenericRegister(IdxReg, AMDGPU::SReg_32RegClass, *MRI))
 
 3251  MachineBasicBlock *BB = 
MI.getParent();
 
 3259  if (SrcRB->
getID() == AMDGPU::SGPRRegBankID) {
 
 3263    BuildMI(*BB, &
MI, 
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
 
 3266    unsigned Opc = Is64 ? AMDGPU::S_MOVRELS_B64 : AMDGPU::S_MOVRELS_B32;
 
 3270    MI.eraseFromParent();
 
 3277  if (!STI.useVGPRIndexMode()) {
 
 3278    BuildMI(*BB, &
MI, 
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
 
 3280    BuildMI(*BB, &
MI, 
DL, TII.get(AMDGPU::V_MOVRELS_B32_e32), DstReg)
 
 3283    MI.eraseFromParent();
 
 3287  const MCInstrDesc &GPRIDXDesc =
 
 3288      TII.getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*SrcRC), 
true);
 
 3294  MI.eraseFromParent();
 
 3299bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT(
 
 3306  LLT VecTy = MRI->getType(DstReg);
 
 3307  LLT ValTy = MRI->getType(ValReg);
 
 3311  const RegisterBank *VecRB = RBI.getRegBank(VecReg, *MRI, TRI);
 
 3312  const RegisterBank *ValRB = RBI.getRegBank(ValReg, *MRI, TRI);
 
 3313  const RegisterBank *IdxRB = RBI.getRegBank(IdxReg, *MRI, TRI);
 
 3319  if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
 
 3322  const TargetRegisterClass *VecRC =
 
 3323      TRI.getRegClassForTypeOnBank(VecTy, *VecRB);
 
 3324  const TargetRegisterClass *ValRC =
 
 3325      TRI.getRegClassForTypeOnBank(ValTy, *ValRB);
 
 3327  if (!RBI.constrainGenericRegister(VecReg, *VecRC, *MRI) ||
 
 3328      !RBI.constrainGenericRegister(DstReg, *VecRC, *MRI) ||
 
 3329      !RBI.constrainGenericRegister(ValReg, *ValRC, *MRI) ||
 
 3330      !RBI.constrainGenericRegister(IdxReg, AMDGPU::SReg_32RegClass, *MRI))
 
 3333  if (VecRB->
getID() == AMDGPU::VGPRRegBankID && ValSize != 32)
 
 3337  std::tie(IdxReg, 
SubReg) =
 
 3340  const bool IndexMode = VecRB->
getID() == AMDGPU::VGPRRegBankID &&
 
 3341                         STI.useVGPRIndexMode();
 
 3343  MachineBasicBlock *BB = 
MI.getParent();
 
 3347    BuildMI(*BB, &
MI, 
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
 
 3350    const MCInstrDesc &RegWriteOp = TII.getIndirectRegWriteMovRelPseudo(
 
 3351        VecSize, ValSize, VecRB->
getID() == AMDGPU::SGPRRegBankID);
 
 3356    MI.eraseFromParent();
 
 3360  const MCInstrDesc &GPRIDXDesc =
 
 3361      TII.getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*VecRC), 
false);
 
 3368  MI.eraseFromParent();
 
 3372bool AMDGPUInstructionSelector::selectBufferLoadLds(
MachineInstr &
MI)
 const {
 
 3373  if (!Subtarget->hasVMemToLDSLoad())
 
 3376  unsigned Size = 
MI.getOperand(3).getImm();
 
 3379  const bool HasVIndex = 
MI.getNumOperands() == 9;
 
 3383    VIndex = 
MI.getOperand(4).getReg();
 
 3387  Register VOffset = 
MI.getOperand(4 + OpOffset).getReg();
 
 3388  std::optional<ValueAndVReg> MaybeVOffset =
 
 3390  const bool HasVOffset = !MaybeVOffset || MaybeVOffset->Value.getZExtValue();
 
 3396    Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_BOTHEN
 
 3397                                 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_IDXEN
 
 3398                    : HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFEN
 
 3399                                 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFSET;
 
 3402    Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_BOTHEN
 
 3403                                 : AMDGPU::BUFFER_LOAD_USHORT_LDS_IDXEN
 
 3404                    : HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFEN
 
 3405                                 : AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFSET;
 
 3408    Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_BOTHEN
 
 3409                                 : AMDGPU::BUFFER_LOAD_DWORD_LDS_IDXEN
 
 3410                    : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN
 
 3411                                 : AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET;
 
 3414    if (!Subtarget->hasLDSLoadB96_B128())
 
 3417    Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_BOTHEN
 
 3418                                 : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_IDXEN
 
 3419                    : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFEN
 
 3420                                 : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFSET;
 
 3423    if (!Subtarget->hasLDSLoadB96_B128())
 
 3426    Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_BOTHEN
 
 3427                                 : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_IDXEN
 
 3428                    : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFEN
 
 3429                                 : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFSET;
 
 3433  MachineBasicBlock *
MBB = 
MI.getParent();
 
 3436    .
add(
MI.getOperand(2));
 
 3440  if (HasVIndex && HasVOffset) {
 
 3441    Register IdxReg = MRI->createVirtualRegister(TRI.getVGPR64Class());
 
 3442    BuildMI(*
MBB, &*MIB, 
DL, TII.get(AMDGPU::REG_SEQUENCE), IdxReg)
 
 3449  } 
else if (HasVIndex) {
 
 3451  } 
else if (HasVOffset) {
 
 3455  MIB.
add(
MI.getOperand(1));            
 
 3456  MIB.
add(
MI.getOperand(5 + OpOffset)); 
 
 3457  MIB.
add(
MI.getOperand(6 + OpOffset)); 
 
 3459  unsigned Aux = 
MI.getOperand(7 + OpOffset).getImm();
 
 3467  MachineMemOperand *LoadMMO = *
MI.memoperands_begin();
 
 3472  MachinePointerInfo StorePtrI = LoadPtrI;
 
 3483  MachineMemOperand *StoreMMO =
 
 3489  MI.eraseFromParent();
 
 3501  if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
 
 3507    return Def->getOperand(1).getReg();
 
 3521  if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
 
 3529    return Def->getOperand(1).getReg();
 
 3531  if (
VT->signBitIsZero(
Reg))
 
 3532    return matchZeroExtendFromS32(
Reg);
 
 3540AMDGPUInstructionSelector::matchZeroExtendFromS32OrS32(
Register Reg)
 const {
 
 3542                                              : matchZeroExtendFromS32(
Reg);
 
 3548AMDGPUInstructionSelector::matchSignExtendFromS32OrS32(
Register Reg)
 const {
 
 3550                                              : matchSignExtendFromS32(
Reg);
 
 3554AMDGPUInstructionSelector::matchExtendFromS32OrS32(
Register Reg,
 
 3555                                                   bool IsSigned)
 const {
 
 3557    return matchSignExtendFromS32OrS32(
Reg);
 
 3559  return matchZeroExtendFromS32OrS32(
Reg);
 
 3569  if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
 
 3576    return Def->getOperand(1).getReg();
 
 3581bool AMDGPUInstructionSelector::selectGlobalLoadLds(
MachineInstr &
MI)
 const{
 
 3582  if (!Subtarget->hasVMemToLDSLoad())
 
 3586  unsigned Size = 
MI.getOperand(3).getImm();
 
 3592    Opc = AMDGPU::GLOBAL_LOAD_LDS_UBYTE;
 
 3595    Opc = AMDGPU::GLOBAL_LOAD_LDS_USHORT;
 
 3598    Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORD;
 
 3601    if (!Subtarget->hasLDSLoadB96_B128())
 
 3603    Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX3;
 
 3606    if (!Subtarget->hasLDSLoadB96_B128())
 
 3608    Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX4;
 
 3612  MachineBasicBlock *
MBB = 
MI.getParent();
 
 3615    .
add(
MI.getOperand(2));
 
 3621  if (!isSGPR(Addr)) {
 
 3623    if (isSGPR(AddrDef->Reg)) {
 
 3624      Addr = AddrDef->Reg;
 
 3625    } 
else if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
 
 3628      if (isSGPR(SAddr)) {
 
 3629        Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
 
 3630        if (
Register Off = matchZeroExtendFromS32(PtrBaseOffset)) {
 
 3641      VOffset = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
 3653  MIB.
add(
MI.getOperand(4)); 
 
 3655  unsigned Aux = 
MI.getOperand(5).getImm();
 
 3658  MachineMemOperand *LoadMMO = *
MI.memoperands_begin();
 
 3660  LoadPtrI.
Offset = 
MI.getOperand(4).getImm();
 
 3661  MachinePointerInfo StorePtrI = LoadPtrI;
 
 3670  MachineMemOperand *StoreMMO =
 
 3672                               sizeof(int32_t), 
Align(4));
 
 3676  MI.eraseFromParent();
 
 3680bool AMDGPUInstructionSelector::selectBVHIntersectRayIntrinsic(
 
 3682  unsigned OpcodeOpIdx =
 
 3683      MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY ? 1 : 3;
 
 3684  MI.setDesc(TII.get(
MI.getOperand(OpcodeOpIdx).getImm()));
 
 3685  MI.removeOperand(OpcodeOpIdx);
 
 3686  MI.addImplicitDefUseOperands(*
MI.getParent()->getParent());
 
 3692bool AMDGPUInstructionSelector::selectSMFMACIntrin(
MachineInstr &
MI)
 const {
 
 3695  case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
 
 3696    Opc = AMDGPU::V_SMFMAC_F32_16X16X32_F16_e64;
 
 3698  case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
 
 3699    Opc = AMDGPU::V_SMFMAC_F32_32X32X16_F16_e64;
 
 3701  case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
 
 3702    Opc = AMDGPU::V_SMFMAC_F32_16X16X32_BF16_e64;
 
 3704  case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
 
 3705    Opc = AMDGPU::V_SMFMAC_F32_32X32X16_BF16_e64;
 
 3707  case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
 
 3708    Opc = AMDGPU::V_SMFMAC_I32_16X16X64_I8_e64;
 
 3710  case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
 
 3711    Opc = AMDGPU::V_SMFMAC_I32_32X32X32_I8_e64;
 
 3713  case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
 
 3714    Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_BF8_e64;
 
 3716  case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
 
 3717    Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_FP8_e64;
 
 3719  case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
 
 3720    Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_BF8_e64;
 
 3722  case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
 
 3723    Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_FP8_e64;
 
 3725  case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
 
 3726    Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_BF8_e64;
 
 3728  case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
 
 3729    Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_FP8_e64;
 
 3731  case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
 
 3732    Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_BF8_e64;
 
 3734  case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
 
 3735    Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_FP8_e64;
 
 3737  case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:
 
 3738    Opc = AMDGPU::V_SMFMAC_F32_16X16X64_F16_e64;
 
 3740  case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:
 
 3741    Opc = AMDGPU::V_SMFMAC_F32_32X32X32_F16_e64;
 
 3743  case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:
 
 3744    Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF16_e64;
 
 3746  case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:
 
 3747    Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF16_e64;
 
 3749  case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:
 
 3750    Opc = AMDGPU::V_SMFMAC_I32_16X16X128_I8_e64;
 
 3752  case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:
 
 3753    Opc = AMDGPU::V_SMFMAC_I32_32X32X64_I8_e64;
 
 3755  case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:
 
 3756    Opc = AMDGPU::V_SMFMAC_F32_16X16X128_BF8_BF8_e64;
 
 3758  case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:
 
 3759    Opc = AMDGPU::V_SMFMAC_F32_16X16X128_BF8_FP8_e64;
 
 3761  case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:
 
 3762    Opc = AMDGPU::V_SMFMAC_F32_16X16X128_FP8_BF8_e64;
 
 3764  case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:
 
 3765    Opc = AMDGPU::V_SMFMAC_F32_16X16X128_FP8_FP8_e64;
 
 3767  case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:
 
 3768    Opc = AMDGPU::V_SMFMAC_F32_32X32X64_BF8_BF8_e64;
 
 3770  case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
 
 3771    Opc = AMDGPU::V_SMFMAC_F32_32X32X64_BF8_FP8_e64;
 
 3773  case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
 
 3774    Opc = AMDGPU::V_SMFMAC_F32_32X32X64_FP8_BF8_e64;
 
 3776  case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8:
 
 3777    Opc = AMDGPU::V_SMFMAC_F32_32X32X64_FP8_FP8_e64;
 
 3783  auto VDst_In = 
MI.getOperand(4);
 
 3785  MI.setDesc(TII.get(
Opc));
 
 3786  MI.removeOperand(4); 
 
 3787  MI.removeOperand(1); 
 
 3788  MI.addOperand(VDst_In); 
 
 3789  MI.addImplicitDefUseOperands(*
MI.getParent()->getParent());
 
 3793bool AMDGPUInstructionSelector::selectPermlaneSwapIntrin(
 
 3795  if (IntrID == Intrinsic::amdgcn_permlane16_swap &&
 
 3796      !Subtarget->hasPermlane16Swap())
 
 3798  if (IntrID == Intrinsic::amdgcn_permlane32_swap &&
 
 3799      !Subtarget->hasPermlane32Swap())
 
 3802  unsigned Opcode = IntrID == Intrinsic::amdgcn_permlane16_swap
 
 3803                        ? AMDGPU::V_PERMLANE16_SWAP_B32_e64
 
 3804                        : AMDGPU::V_PERMLANE32_SWAP_B32_e64;
 
 3806  MI.removeOperand(2);
 
 3807  MI.setDesc(TII.get(Opcode));
 
 3810  MachineOperand &FI = 
MI.getOperand(4);
 
 3816bool AMDGPUInstructionSelector::selectWaveAddress(
MachineInstr &
MI)
 const {
 
 3819  const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
 
 3820  const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
 
 3821  MachineBasicBlock *
MBB = 
MI.getParent();
 
 3825    BuildMI(*
MBB, 
MI, 
DL, TII.get(AMDGPU::V_LSHRREV_B32_e64), DstReg)
 
 3826      .
addImm(Subtarget->getWavefrontSizeLog2())
 
 3831      .
addImm(Subtarget->getWavefrontSizeLog2())
 
 3835  const TargetRegisterClass &RC =
 
 3836      IsVALU ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
 
 3837  if (!RBI.constrainGenericRegister(DstReg, RC, *MRI))
 
 3840  MI.eraseFromParent();
 
 3849  unsigned NumOpcodes = 0;
 
 3862    const uint8_t SrcBits[3] = { 0xf0, 0xcc, 0xaa };
 
 3873    for (
unsigned I = 0; 
I < Src.size(); ++
I) {
 
 3887    if (Src.size() == 3) {
 
 3894        for (
unsigned I = 0; 
I < Src.size(); ++
I) {
 
 3895          if (Src[
I] == 
LHS) {
 
 3905    Bits = SrcBits[Src.size()];
 
 3911  switch (
MI->getOpcode()) {
 
 3912  case TargetOpcode::G_AND:
 
 3913  case TargetOpcode::G_OR:
 
 3914  case TargetOpcode::G_XOR: {
 
 3919    if (!getOperandBits(
LHS, LHSBits) ||
 
 3920        !getOperandBits(
RHS, RHSBits)) {
 
 3922      return std::make_pair(0, 0);
 
 3928      NumOpcodes += 
Op.first;
 
 3929      LHSBits = 
Op.second;
 
 3934      NumOpcodes += 
Op.first;
 
 3935      RHSBits = 
Op.second;
 
 3940    return std::make_pair(0, 0);
 
 3944  switch (
MI->getOpcode()) {
 
 3945  case TargetOpcode::G_AND:
 
 3946    TTbl = LHSBits & RHSBits;
 
 3948  case TargetOpcode::G_OR:
 
 3949    TTbl = LHSBits | RHSBits;
 
 3951  case TargetOpcode::G_XOR:
 
 3952    TTbl = LHSBits ^ RHSBits;
 
 3958  return std::make_pair(NumOpcodes + 1, TTbl);
 
 
 3961bool AMDGPUInstructionSelector::selectBITOP3(
MachineInstr &
MI)
 const {
 
 3962  if (!Subtarget->hasBitOp3Insts())
 
 3966  const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
 
 3967  const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
 
 3973  unsigned NumOpcodes;
 
 3975  std::tie(NumOpcodes, TTbl) = 
BitOp3_Op(DstReg, Src, *MRI);
 
 3979  if (NumOpcodes < 2 || Src.empty())
 
 3982  const bool IsB32 = MRI->getType(DstReg) == 
LLT::scalar(32);
 
 3983  if (NumOpcodes == 2 && IsB32) {
 
 3991  } 
else if (NumOpcodes < 4) {
 
 3998  unsigned Opc = IsB32 ? AMDGPU::V_BITOP3_B32_e64 : AMDGPU::V_BITOP3_B16_e64;
 
 3999  if (!IsB32 && STI.hasTrue16BitInsts())
 
 4000    Opc = STI.useRealTrue16Insts() ? AMDGPU::V_BITOP3_B16_gfx1250_t16_e64
 
 4001                                   : AMDGPU::V_BITOP3_B16_gfx1250_fake16_e64;
 
 4002  unsigned CBL = STI.getConstantBusLimit(
Opc);
 
 4003  MachineBasicBlock *
MBB = 
MI.getParent();
 
 4006  for (
unsigned I = 0; 
I < Src.size(); ++
I) {
 
 4007    const RegisterBank *RB = RBI.getRegBank(Src[
I], *MRI, TRI);
 
 4008    if (RB->
getID() != AMDGPU::SGPRRegBankID)
 
 4014    Register NewReg =  MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
 4025  while (Src.size() < 3)
 
 4026    Src.push_back(Src[0]);
 
 4043  MI.eraseFromParent();
 
 4048bool AMDGPUInstructionSelector::selectStackRestore(
MachineInstr &
MI)
 const {
 
 4050  if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, *MRI))
 
 4053  MachineInstr *
DefMI = MRI->getVRegDef(SrcReg);
 
 4055      Subtarget->getTargetLowering()->getStackPointerRegisterToSaveRestore();
 
 4057  MachineBasicBlock *
MBB = 
MI.getParent();
 
 4061    WaveAddr = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
 
 4064      .
addImm(Subtarget->getWavefrontSizeLog2())
 
 4071  MI.eraseFromParent();
 
 4077  if (!
I.isPreISelOpcode()) {
 
 4079      return selectCOPY(
I);
 
 4083  switch (
I.getOpcode()) {
 
 4084  case TargetOpcode::G_AND:
 
 4085  case TargetOpcode::G_OR:
 
 4086  case TargetOpcode::G_XOR:
 
 4087    if (selectBITOP3(
I))
 
 4091    return selectG_AND_OR_XOR(
I);
 
 4092  case TargetOpcode::G_ADD:
 
 4093  case TargetOpcode::G_SUB:
 
 4094  case TargetOpcode::G_PTR_ADD:
 
 4097    return selectG_ADD_SUB(
I);
 
 4098  case TargetOpcode::G_UADDO:
 
 4099  case TargetOpcode::G_USUBO:
 
 4100  case TargetOpcode::G_UADDE:
 
 4101  case TargetOpcode::G_USUBE:
 
 4102    return selectG_UADDO_USUBO_UADDE_USUBE(
I);
 
 4103  case AMDGPU::G_AMDGPU_MAD_U64_U32:
 
 4104  case AMDGPU::G_AMDGPU_MAD_I64_I32:
 
 4105    return selectG_AMDGPU_MAD_64_32(
I);
 
 4106  case TargetOpcode::G_INTTOPTR:
 
 4107  case TargetOpcode::G_BITCAST:
 
 4108  case TargetOpcode::G_PTRTOINT:
 
 4109  case TargetOpcode::G_FREEZE:
 
 4110    return selectCOPY(
I);
 
 4111  case TargetOpcode::G_FNEG:
 
 4114    return selectG_FNEG(
I);
 
 4115  case TargetOpcode::G_FABS:
 
 4118    return selectG_FABS(
I);
 
 4119  case TargetOpcode::G_EXTRACT:
 
 4120    return selectG_EXTRACT(
I);
 
 4121  case TargetOpcode::G_MERGE_VALUES:
 
 4122  case TargetOpcode::G_CONCAT_VECTORS:
 
 4123    return selectG_MERGE_VALUES(
I);
 
 4124  case TargetOpcode::G_UNMERGE_VALUES:
 
 4125    return selectG_UNMERGE_VALUES(
I);
 
 4126  case TargetOpcode::G_BUILD_VECTOR:
 
 4127  case TargetOpcode::G_BUILD_VECTOR_TRUNC:
 
 4128    return selectG_BUILD_VECTOR(
I);
 
 4129  case TargetOpcode::G_IMPLICIT_DEF:
 
 4130    return selectG_IMPLICIT_DEF(
I);
 
 4131  case TargetOpcode::G_INSERT:
 
 4132    return selectG_INSERT(
I);
 
 4133  case TargetOpcode::G_INTRINSIC:
 
 4134  case TargetOpcode::G_INTRINSIC_CONVERGENT:
 
 4135    return selectG_INTRINSIC(
I);
 
 4136  case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
 
 4137  case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
 
 4138    return selectG_INTRINSIC_W_SIDE_EFFECTS(
I);
 
 4139  case TargetOpcode::G_ICMP:
 
 4140  case TargetOpcode::G_FCMP:
 
 4141    if (selectG_ICMP_or_FCMP(
I))
 
 4144  case TargetOpcode::G_LOAD:
 
 4145  case TargetOpcode::G_ZEXTLOAD:
 
 4146  case TargetOpcode::G_SEXTLOAD:
 
 4147  case TargetOpcode::G_STORE:
 
 4148  case TargetOpcode::G_ATOMIC_CMPXCHG:
 
 4149  case TargetOpcode::G_ATOMICRMW_XCHG:
 
 4150  case TargetOpcode::G_ATOMICRMW_ADD:
 
 4151  case TargetOpcode::G_ATOMICRMW_SUB:
 
 4152  case TargetOpcode::G_ATOMICRMW_AND:
 
 4153  case TargetOpcode::G_ATOMICRMW_OR:
 
 4154  case TargetOpcode::G_ATOMICRMW_XOR:
 
 4155  case TargetOpcode::G_ATOMICRMW_MIN:
 
 4156  case TargetOpcode::G_ATOMICRMW_MAX:
 
 4157  case TargetOpcode::G_ATOMICRMW_UMIN:
 
 4158  case TargetOpcode::G_ATOMICRMW_UMAX:
 
 4159  case TargetOpcode::G_ATOMICRMW_UINC_WRAP:
 
 4160  case TargetOpcode::G_ATOMICRMW_UDEC_WRAP:
 
 4161  case TargetOpcode::G_ATOMICRMW_FADD:
 
 4162  case TargetOpcode::G_ATOMICRMW_FMIN:
 
 4163  case TargetOpcode::G_ATOMICRMW_FMAX:
 
 4164    return selectG_LOAD_STORE_ATOMICRMW(
I);
 
 4165  case TargetOpcode::G_SELECT:
 
 4166    return selectG_SELECT(
I);
 
 4167  case TargetOpcode::G_TRUNC:
 
 4168    return selectG_TRUNC(
I);
 
 4169  case TargetOpcode::G_SEXT:
 
 4170  case TargetOpcode::G_ZEXT:
 
 4171  case TargetOpcode::G_ANYEXT:
 
 4172  case TargetOpcode::G_SEXT_INREG:
 
 4176    if (MRI->getType(
I.getOperand(1).getReg()) != 
LLT::scalar(1) &&
 
 4179    return selectG_SZA_EXT(
I);
 
 4180  case TargetOpcode::G_FPEXT:
 
 4181    if (selectG_FPEXT(
I))
 
 4184  case TargetOpcode::G_BRCOND:
 
 4185    return selectG_BRCOND(
I);
 
 4186  case TargetOpcode::G_GLOBAL_VALUE:
 
 4187    return selectG_GLOBAL_VALUE(
I);
 
 4188  case TargetOpcode::G_PTRMASK:
 
 4189    return selectG_PTRMASK(
I);
 
 4190  case TargetOpcode::G_EXTRACT_VECTOR_ELT:
 
 4191    return selectG_EXTRACT_VECTOR_ELT(
I);
 
 4192  case TargetOpcode::G_INSERT_VECTOR_ELT:
 
 4193    return selectG_INSERT_VECTOR_ELT(
I);
 
 4194  case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
 
 4195  case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
 
 4196  case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET:
 
 4197  case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
 
 4198  case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
 
 4201    assert(Intr && 
"not an image intrinsic with image pseudo");
 
 4202    return selectImageIntrinsic(
I, Intr);
 
 4204  case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY:
 
 4205  case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
 
 4206  case AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY:
 
 4207    return selectBVHIntersectRayIntrinsic(
I);
 
 4208  case AMDGPU::G_SBFX:
 
 4209  case AMDGPU::G_UBFX:
 
 4210    return selectG_SBFX_UBFX(
I);
 
 4211  case AMDGPU::G_SI_CALL:
 
 4212    I.setDesc(TII.get(AMDGPU::SI_CALL));
 
 4214  case AMDGPU::G_AMDGPU_WAVE_ADDRESS:
 
 4215    return selectWaveAddress(
I);
 
 4216  case AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_RETURN: {
 
 4217    I.setDesc(TII.get(AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN));
 
 4220  case AMDGPU::G_STACKRESTORE:
 
 4221    return selectStackRestore(
I);
 
 4223    return selectPHI(
I);
 
 4224  case AMDGPU::G_AMDGPU_COPY_SCC_VCC:
 
 4225    return selectCOPY_SCC_VCC(
I);
 
 4226  case AMDGPU::G_AMDGPU_COPY_VCC_SCC:
 
 4227    return selectCOPY_VCC_SCC(
I);
 
 4228  case AMDGPU::G_AMDGPU_READANYLANE:
 
 4229    return selectReadAnyLane(
I);
 
 4230  case TargetOpcode::G_CONSTANT:
 
 4231  case TargetOpcode::G_FCONSTANT:
 
 
 4239AMDGPUInstructionSelector::selectVCSRC(
MachineOperand &Root)
 const {
 
 4246std::pair<Register, unsigned> AMDGPUInstructionSelector::selectVOP3ModsImpl(
 
 4247    Register Src, 
bool IsCanonicalizing, 
bool AllowAbs, 
bool OpSel)
 const {
 
 4251  if (
MI->getOpcode() == AMDGPU::G_FNEG) {
 
 4252    Src = 
MI->getOperand(1).getReg();
 
 4255  } 
else if (
MI->getOpcode() == AMDGPU::G_FSUB && IsCanonicalizing) {
 
 4260    if (
LHS && 
LHS->isZero()) {
 
 4262      Src = 
MI->getOperand(2).getReg();
 
 4266  if (AllowAbs && 
MI->getOpcode() == AMDGPU::G_FABS) {
 
 4267    Src = 
MI->getOperand(1).getReg();
 
 4274  return std::pair(Src, Mods);
 
 4277Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
 
 4279    bool ForceVGPR)
 const {
 
 4280  if ((Mods != 0 || ForceVGPR) &&
 
 4281      RBI.getRegBank(Src, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID) {
 
 4288            TII.
get(AMDGPU::COPY), VGPRSrc)
 
 4300AMDGPUInstructionSelector::selectVSRC0(
MachineOperand &Root)
 const {
 
 4302      [=](MachineInstrBuilder &MIB) { MIB.
add(Root); }
 
 4307AMDGPUInstructionSelector::selectVOP3Mods0(
MachineOperand &Root)
 const {
 
 4310  std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
 
 4313      [=](MachineInstrBuilder &MIB) {
 
 4314        MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
 
 4316      [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }, 
 
 4317      [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); },    
 
 4318      [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }     
 
 4323AMDGPUInstructionSelector::selectVOP3BMods0(
MachineOperand &Root)
 const {
 
 4326  std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
 
 4331      [=](MachineInstrBuilder &MIB) {
 
 4332        MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
 
 4334      [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }, 
 
 4335      [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); },    
 
 4336      [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }     
 
 4341AMDGPUInstructionSelector::selectVOP3OMods(
MachineOperand &Root)
 const {
 
 4343      [=](MachineInstrBuilder &MIB) { MIB.
add(Root); },
 
 4344      [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }, 
 
 4345      [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }  
 
 4350AMDGPUInstructionSelector::selectVOP3Mods(
MachineOperand &Root)
 const {
 
 4353  std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
 
 4356      [=](MachineInstrBuilder &MIB) {
 
 4357        MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
 
 4359      [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); } 
 
 4364AMDGPUInstructionSelector::selectVOP3ModsNonCanonicalizing(
 
 4368  std::tie(Src, Mods) =
 
 4369      selectVOP3ModsImpl(Root.
getReg(), 
false);
 
 4372      [=](MachineInstrBuilder &MIB) {
 
 4373        MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
 
 4375      [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); } 
 
 4380AMDGPUInstructionSelector::selectVOP3BMods(
MachineOperand &Root)
 const {
 
 4383  std::tie(Src, Mods) =
 
 4384      selectVOP3ModsImpl(Root.
getReg(), 
true,
 
 4388      [=](MachineInstrBuilder &MIB) {
 
 4389        MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
 
 4391      [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); } 
 
 4396AMDGPUInstructionSelector::selectVOP3NoMods(
MachineOperand &Root)
 const {
 
 4399  if (
Def->getOpcode() == AMDGPU::G_FNEG || 
Def->getOpcode() == AMDGPU::G_FABS)
 
 4402      [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
 
 4427  if (
MI->getOpcode() != AMDGPU::G_TRUNC)
 
 4430  unsigned DstSize = 
MRI.getType(
MI->getOperand(0).getReg()).getSizeInBits();
 
 4431  unsigned SrcSize = 
MRI.getType(
MI->getOperand(1).getReg()).getSizeInBits();
 
 4432  return DstSize * 2 == SrcSize;
 
 
 4438  if (
MI->getOpcode() != AMDGPU::G_LSHR)
 
 4442  std::optional<ValueAndVReg> ShiftAmt;
 
 4445    unsigned SrcSize = 
MRI.getType(
MI->getOperand(1).getReg()).getSizeInBits();
 
 4446    unsigned Shift = ShiftAmt->Value.getZExtValue();
 
 4447    return Shift * 2 == SrcSize;
 
 
 4455  if (
MI->getOpcode() != AMDGPU::G_SHL)
 
 4459  std::optional<ValueAndVReg> ShiftAmt;
 
 4462    unsigned SrcSize = 
MRI.getType(
MI->getOperand(1).getReg()).getSizeInBits();
 
 4463    unsigned Shift = ShiftAmt->Value.getZExtValue();
 
 4464    return Shift * 2 == SrcSize;
 
 
 4472  if (
MI->getOpcode() != AMDGPU::G_UNMERGE_VALUES)
 
 4474  return MI->getNumOperands() == 3 && 
MI->getOperand(0).isDef() &&
 
 4475         MI->getOperand(1).isDef() && !
MI->getOperand(2).isDef();
 
 
 4645static std::optional<std::pair<Register, SrcStatus>>
 
 4650  unsigned Opc = 
MI->getOpcode();
 
 4654  case AMDGPU::G_BITCAST:
 
 4655    return std::optional<std::pair<Register, SrcStatus>>(
 
 4656        {
MI->getOperand(1).getReg(), Curr.second});
 
 4658    if (
MI->getOperand(1).getReg().isPhysical())
 
 4659      return std::nullopt;
 
 4660    return std::optional<std::pair<Register, SrcStatus>>(
 
 4661        {
MI->getOperand(1).getReg(), Curr.second});
 
 4662  case AMDGPU::G_FNEG: {
 
 4665      return std::nullopt;
 
 4666    return std::optional<std::pair<Register, SrcStatus>>(
 
 4667        {
MI->getOperand(1).getReg(), Stat});
 
 4674  switch (Curr.second) {
 
 4677      return std::optional<std::pair<Register, SrcStatus>>(
 
 4680      if (Curr.first == 
MI->getOperand(0).getReg())
 
 4681        return std::optional<std::pair<Register, SrcStatus>>(
 
 4683      return std::optional<std::pair<Register, SrcStatus>>(
 
 4695      return std::optional<std::pair<Register, SrcStatus>>(
 
 4699      if (Curr.first == 
MI->getOperand(0).getReg())
 
 4700        return std::optional<std::pair<Register, SrcStatus>>(
 
 4702      return std::optional<std::pair<Register, SrcStatus>>(
 
 4708      return std::optional<std::pair<Register, SrcStatus>>(
 
 4713      return std::optional<std::pair<Register, SrcStatus>>(
 
 4718      return std::optional<std::pair<Register, SrcStatus>>(
 
 4723      return std::optional<std::pair<Register, SrcStatus>>(
 
 4729  return std::nullopt;
 
 
 4739  bool HasNeg = 
false;
 
 4741  bool HasOpsel = 
true;
 
 4746    unsigned Opc = 
MI->getOpcode();
 
 4748    if (
Opc < TargetOpcode::GENERIC_OP_END) {
 
 4751    } 
else if (
Opc == TargetOpcode::G_INTRINSIC) {
 
 4754      if (IntrinsicID == Intrinsic::amdgcn_fdot2)
 
 
 
 4778  while (
Depth <= MaxDepth && Curr.has_value()) {
 
 4781      Statlist.push_back(Curr.value());
 
 
 4788static std::pair<Register, SrcStatus>
 
 4795  while (
Depth <= MaxDepth && Curr.has_value()) {
 
 4801        LastSameOrNeg = Curr.value();
 
 4806  return LastSameOrNeg;
 
 
 4811  unsigned Width1 = 
MRI.getType(Reg1).getSizeInBits();
 
 4812  unsigned Width2 = 
MRI.getType(Reg2).getSizeInBits();
 
 4813  return Width1 == Width2;
 
 
 4849         IsHalfState(HiStat);
 
 
 4852std::pair<Register, unsigned> AMDGPUInstructionSelector::selectVOP3PModsImpl(
 
 4858    return {RootReg, Mods};
 
 4861  SearchOptions SO(RootReg, MRI);
 
 4872  MachineInstr *
MI = MRI.getVRegDef(Stat.first);
 
 4874  if (
MI->getOpcode() != AMDGPU::G_BUILD_VECTOR || 
MI->getNumOperands() != 3 ||
 
 4875      (IsDOT && Subtarget->hasDOTOpSelHazard())) {
 
 4877    return {Stat.first, Mods};
 
 4883  if (StatlistHi.
empty()) {
 
 4885    return {Stat.first, Mods};
 
 4891  if (StatlistLo.
empty()) {
 
 4893    return {Stat.first, Mods};
 
 4896  for (
int I = StatlistHi.
size() - 1; 
I >= 0; 
I--) {
 
 4897    for (
int J = StatlistLo.
size() - 1; J >= 0; J--) {
 
 4898      if (StatlistHi[
I].first == StatlistLo[J].first &&
 
 4900                        StatlistHi[
I].first, RootReg, TII, MRI))
 
 4901        return {StatlistHi[
I].first,
 
 4902                updateMods(StatlistHi[
I].second, StatlistLo[J].second, Mods)};
 
 4908  return {Stat.first, Mods};
 
 4918  return RB->
getID() == RBNo;
 
 
 4935  if (
checkRB(RootReg, AMDGPU::SGPRRegBankID, RBI, 
MRI, 
TRI) ||
 
 4940  if (
MI->getOpcode() == AMDGPU::COPY && NewReg == 
MI->getOperand(1).getReg()) {
 
 4946  Register DstReg = 
MRI.cloneVirtualRegister(RootReg);
 
 4949      BuildMI(*BB, 
MI, 
MI->getDebugLoc(), 
TII.get(AMDGPU::COPY), DstReg)
 
 
 4957AMDGPUInstructionSelector::selectVOP3PRetHelper(
MachineOperand &Root,
 
 4962  std::tie(
Reg, Mods) = selectVOP3PModsImpl(Root.
getReg(), MRI, IsDOT);
 
 4966      [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
 
 4967      [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); } 
 
 4972AMDGPUInstructionSelector::selectVOP3PMods(
MachineOperand &Root)
 const {
 
 4974  return selectVOP3PRetHelper(Root);
 
 4978AMDGPUInstructionSelector::selectVOP3PModsDOT(
MachineOperand &Root)
 const {
 
 4980  return selectVOP3PRetHelper(Root, 
true);
 
 4984AMDGPUInstructionSelector::selectWMMAOpSelVOP3PMods(
 
 4987         "expected i1 value");
 
 4993      [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); } 
 
 5001  switch (Elts.
size()) {
 
 5003    DstRegClass = &AMDGPU::VReg_256RegClass;
 
 5006    DstRegClass = &AMDGPU::VReg_128RegClass;
 
 5009    DstRegClass = &AMDGPU::VReg_64RegClass;
 
 5016  auto MIB = 
B.buildInstr(AMDGPU::REG_SEQUENCE)
 
 5017                 .addDef(
MRI.createVirtualRegister(DstRegClass));
 
 5018  for (
unsigned i = 0; i < Elts.
size(); ++i) {
 
 
 5029  if (ModOpcode == TargetOpcode::G_FNEG) {
 
 5033    for (
auto El : Elts) {
 
 5039    if (Elts.size() != NegAbsElts.
size()) {
 
 5048    assert(ModOpcode == TargetOpcode::G_FABS);
 
 
 5056AMDGPUInstructionSelector::selectWMMAModsF32NegAbs(
MachineOperand &Root)
 const {
 
 5062    assert(BV->getNumSources() > 0);
 
 5064    MachineInstr *ElF32 = MRI->getVRegDef(BV->getSourceReg(0));
 
 5065    unsigned ModOpcode = (ElF32->
getOpcode() == AMDGPU::G_FNEG)
 
 5068    for (
unsigned i = 0; i < BV->getNumSources(); ++i) {
 
 5069      ElF32 = MRI->getVRegDef(BV->getSourceReg(i));
 
 5076    if (BV->getNumSources() == EltsF32.
size()) {
 
 5082  return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
 
 5083           [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }}};
 
 5087AMDGPUInstructionSelector::selectWMMAModsF16Neg(
MachineOperand &Root)
 const {
 
 5093    for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
 
 5101    if (CV->getNumSources() == EltsV2F16.
size()) {
 
 5108  return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
 
 5109           [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }}};
 
 5113AMDGPUInstructionSelector::selectWMMAModsF16NegAbs(
MachineOperand &Root)
 const {
 
 5119    assert(CV->getNumSources() > 0);
 
 5120    MachineInstr *ElV2F16 = MRI->getVRegDef(CV->getSourceReg(0));
 
 5122    unsigned ModOpcode = (ElV2F16->
getOpcode() == AMDGPU::G_FNEG)
 
 5126    for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
 
 5127      ElV2F16 = MRI->getVRegDef(CV->getSourceReg(i));
 
 5134    if (CV->getNumSources() == EltsV2F16.
size()) {
 
 5141  return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
 
 5142           [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }}};
 
 5146AMDGPUInstructionSelector::selectWMMAVISrc(
MachineOperand &Root)
 const {
 
 5147  std::optional<FPValueAndVReg> FPValReg;
 
 5149    if (TII.isInlineConstant(FPValReg->Value)) {
 
 5150      return {{[=](MachineInstrBuilder &MIB) {
 
 5151        MIB.
addImm(FPValReg->Value.bitcastToAPInt().getSExtValue());
 
 5161    if (TII.isInlineConstant(ICst)) {
 
 5171AMDGPUInstructionSelector::selectSWMMACIndex8(
MachineOperand &Root)
 const {
 
 5177  std::optional<ValueAndVReg> ShiftAmt;
 
 5179      MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
 
 5180      ShiftAmt->Value.getZExtValue() % 8 == 0) {
 
 5181    Key = ShiftAmt->Value.getZExtValue() / 8;
 
 5186      [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
 
 5187      [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Key); } 
 
 5192AMDGPUInstructionSelector::selectSWMMACIndex16(
MachineOperand &Root)
 const {
 
 5199  std::optional<ValueAndVReg> ShiftAmt;
 
 5201      MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
 
 5202      ShiftAmt->Value.getZExtValue() == 16) {
 
 5208      [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
 
 5209      [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Key); } 
 
 5214AMDGPUInstructionSelector::selectSWMMACIndex32(
MachineOperand &Root)
 const {
 
 5221    S32 = matchAnyExtendFromS32(Src);
 
 5225    if (
Def->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {
 
 5230        Src = 
Def->getOperand(2).getReg();
 
 5237      [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
 
 5238      [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Key); } 
 
 5243AMDGPUInstructionSelector::selectVOP3OpSelMods(
MachineOperand &Root)
 const {
 
 5246  std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
 
 5250      [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
 
 5251      [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); } 
 
 5257AMDGPUInstructionSelector::selectVINTERPMods(
MachineOperand &Root)
 const {
 
 5260  std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
 
 5266      [=](MachineInstrBuilder &MIB) {
 
 5268            copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,  
true));
 
 5270      [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }, 
 
 5275AMDGPUInstructionSelector::selectVINTERPModsHi(
MachineOperand &Root)
 const {
 
 5278  std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
 
 5284      [=](MachineInstrBuilder &MIB) {
 
 5286            copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,  
true));
 
 5288      [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }, 
 
 5295bool AMDGPUInstructionSelector::selectScaleOffset(
MachineOperand &Root,
 
 5297                                                  bool IsSigned)
 const {
 
 5298  if (!Subtarget->hasScaleOffset())
 
 5302  MachineMemOperand *MMO = *
MI.memoperands_begin();
 
 5314    OffsetReg = 
Def->Reg;
 
 5329          m_BinOp(IsSigned ? AMDGPU::S_MUL_I64_I32_PSEUDO : AMDGPU::S_MUL_U64,
 
 5333       (
Mul->getOpcode() == (IsSigned ? AMDGPU::G_AMDGPU_MAD_I64_I32
 
 5334                                      : AMDGPU::G_AMDGPU_MAD_U64_U32) ||
 
 5335        (IsSigned && 
Mul->getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32 &&
 
 5336         VT->signBitIsZero(
Mul->getOperand(2).getReg()))) &&
 
 5349bool AMDGPUInstructionSelector::selectSmrdOffset(
MachineOperand &Root,
 
 5353                                                 bool *ScaleOffset)
 const {
 
 5355  MachineBasicBlock *
MBB = 
MI->getParent();
 
 5360  getAddrModeInfo(*
MI, *MRI, AddrInfo);
 
 5362  if (AddrInfo.
empty())
 
 5365  const GEPInfo &GEPI = AddrInfo[0];
 
 5366  std::optional<int64_t> EncodedImm;
 
 5369    *ScaleOffset = 
false;
 
 5374    if (GEPI.SgprParts.size() == 1 && GEPI.Imm != 0 && EncodedImm &&
 
 5375        AddrInfo.
size() > 1) {
 
 5376      const GEPInfo &GEPI2 = AddrInfo[1];
 
 5377      if (GEPI2.SgprParts.size() == 2 && GEPI2.Imm == 0) {
 
 5378        Register OffsetReg = GEPI2.SgprParts[1];
 
 5381              selectScaleOffset(Root, OffsetReg, 
false );
 
 5382        OffsetReg = matchZeroExtendFromS32OrS32(OffsetReg);
 
 5384          Base = GEPI2.SgprParts[0];
 
 5385          *SOffset = OffsetReg;
 
 5394          auto SKnown = 
VT->getKnownBits(*SOffset);
 
 5395          if (*
Offset + SKnown.getMinValue().getSExtValue() < 0)
 
 5407  if (
Offset && GEPI.SgprParts.size() == 1 && EncodedImm) {
 
 5408    Base = GEPI.SgprParts[0];
 
 5414  if (SOffset && GEPI.SgprParts.size() == 1 && 
isUInt<32>(GEPI.Imm) &&
 
 5420    Base = GEPI.SgprParts[0];
 
 5421    *SOffset = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
 
 5422    BuildMI(*
MBB, 
MI, 
MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), *SOffset)
 
 5427  if (SOffset && GEPI.SgprParts.size() && GEPI.Imm == 0) {
 
 5428    Register OffsetReg = GEPI.SgprParts[1];
 
 5430      *ScaleOffset = selectScaleOffset(Root, OffsetReg, 
false );
 
 5431    OffsetReg = matchZeroExtendFromS32OrS32(OffsetReg);
 
 5433      Base = GEPI.SgprParts[0];
 
 5434      *SOffset = OffsetReg;
 
 5443AMDGPUInstructionSelector::selectSmrdImm(
MachineOperand &Root)
 const {
 
 5446  if (!selectSmrdOffset(Root, 
Base,  
nullptr, &
Offset,
 
 5448    return std::nullopt;
 
 5450  return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(
Base); },
 
 5451           [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Offset); }}};
 
 5455AMDGPUInstructionSelector::selectSmrdImm32(
MachineOperand &Root)
 const {
 
 5457  getAddrModeInfo(*Root.
getParent(), *MRI, AddrInfo);
 
 5459  if (AddrInfo.
empty() || AddrInfo[0].SgprParts.size() != 1)
 
 5460    return std::nullopt;
 
 5462  const GEPInfo &GEPInfo = AddrInfo[0];
 
 5463  Register PtrReg = GEPInfo.SgprParts[0];
 
 5464  std::optional<int64_t> EncodedImm =
 
 5467    return std::nullopt;
 
 5470    [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrReg); },
 
 5471    [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedImm); }
 
 5476AMDGPUInstructionSelector::selectSmrdSgpr(
MachineOperand &Root)
 const {
 
 5479  if (!selectSmrdOffset(Root, 
Base, &SOffset,  
nullptr,
 
 5481    return std::nullopt;
 
 5484  return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(
Base); },
 
 5485           [=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); },
 
 5486           [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }}};
 
 5490AMDGPUInstructionSelector::selectSmrdSgprImm(
MachineOperand &Root)
 const {
 
 5494  if (!selectSmrdOffset(Root, 
Base, &SOffset, &
Offset, &ScaleOffset))
 
 5495    return std::nullopt;
 
 5498  return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(
Base); },
 
 5499           [=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); },
 
 5501           [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }}};
 
 5504std::pair<Register, int>
 
 5505AMDGPUInstructionSelector::selectFlatOffsetImpl(
MachineOperand &Root,
 
 5506                                                uint64_t FlatVariant)
 const {
 
 5511  if (!STI.hasFlatInstOffsets())
 
 5515  int64_t ConstOffset;
 
 5517  std::tie(PtrBase, ConstOffset, IsInBounds) =
 
 5518      getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
 
 5524  if (ConstOffset == 0 ||
 
 5526       !isFlatScratchBaseLegal(Root.
getReg())) ||
 
 5530  unsigned AddrSpace = (*
MI->memoperands_begin())->getAddrSpace();
 
 5531  if (!TII.isLegalFLATOffset(ConstOffset, AddrSpace, FlatVariant))
 
 5534  return std::pair(PtrBase, ConstOffset);
 
 5538AMDGPUInstructionSelector::selectFlatOffset(
MachineOperand &Root)
 const {
 
 5542      [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrWithOffset.first); },
 
 5543      [=](MachineInstrBuilder &MIB) { MIB.
addImm(PtrWithOffset.second); },
 
 5548AMDGPUInstructionSelector::selectGlobalOffset(
MachineOperand &Root)
 const {
 
 5552      [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrWithOffset.first); },
 
 5553      [=](MachineInstrBuilder &MIB) { MIB.
addImm(PtrWithOffset.second); },
 
 5558AMDGPUInstructionSelector::selectScratchOffset(
MachineOperand &Root)
 const {
 
 5562      [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrWithOffset.first); },
 
 5563      [=](MachineInstrBuilder &MIB) { MIB.
addImm(PtrWithOffset.second); },
 
 5569AMDGPUInstructionSelector::selectGlobalSAddr(
MachineOperand &Root,
 
 5571                                             bool NeedIOffset)
 const {
 
 5574  int64_t ConstOffset;
 
 5575  int64_t ImmOffset = 0;
 
 5579  std::tie(PtrBase, ConstOffset, std::ignore) =
 
 5580      getPtrBaseWithConstantOffset(Addr, *MRI);
 
 5582  if (ConstOffset != 0) {
 
 5587      ImmOffset = ConstOffset;
 
 5590      if (isSGPR(PtrBaseDef->Reg)) {
 
 5591        if (ConstOffset > 0) {
 
 5597          int64_t SplitImmOffset = 0, RemainderOffset = ConstOffset;
 
 5599            std::tie(SplitImmOffset, RemainderOffset) =
 
 5604          if (Subtarget->hasSignedGVSOffset() ? 
isInt<32>(RemainderOffset)
 
 5607            MachineBasicBlock *
MBB = 
MI->getParent();
 
 5609                MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
 5611            BuildMI(*
MBB, 
MI, 
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
 
 5613                .
addImm(RemainderOffset);
 
 5617                  [=](MachineInstrBuilder &MIB) {
 
 5620                  [=](MachineInstrBuilder &MIB) {
 
 5623                  [=](MachineInstrBuilder &MIB) { MIB.
addImm(SplitImmOffset); },
 
 5624                  [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); },
 
 5627                [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrBase); }, 
 
 5628                [=](MachineInstrBuilder &MIB) {
 
 5631                [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); },
 
 5641        unsigned NumLiterals =
 
 5642            !TII.isInlineConstant(APInt(32, 
Lo_32(ConstOffset))) +
 
 5643            !TII.isInlineConstant(APInt(32, 
Hi_32(ConstOffset)));
 
 5644        if (STI.getConstantBusLimit(AMDGPU::V_ADD_U32_e64) > NumLiterals)
 
 5645          return std::nullopt;
 
 5652  if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
 
 5657    if (isSGPR(SAddr)) {
 
 5658      Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
 
 5662      bool ScaleOffset = selectScaleOffset(Root, PtrBaseOffset,
 
 5663                                           Subtarget->hasSignedGVSOffset());
 
 5664      if (
Register VOffset = matchExtendFromS32OrS32(
 
 5665              PtrBaseOffset, Subtarget->hasSignedGVSOffset())) {
 
 5667          return {{[=](MachineInstrBuilder &MIB) { 
 
 5670                   [=](MachineInstrBuilder &MIB) { 
 
 5673                   [=](MachineInstrBuilder &MIB) { 
 
 5676                   [=](MachineInstrBuilder &MIB) { 
 
 5680        return {{[=](MachineInstrBuilder &MIB) { 
 
 5683                 [=](MachineInstrBuilder &MIB) { 
 
 5686                 [=](MachineInstrBuilder &MIB) { 
 
 5696  if (AddrDef->MI->getOpcode() == AMDGPU::G_IMPLICIT_DEF ||
 
 5697      AddrDef->MI->getOpcode() == AMDGPU::G_CONSTANT || !isSGPR(AddrDef->Reg))
 
 5698    return std::nullopt;
 
 5703  MachineBasicBlock *
MBB = 
MI->getParent();
 
 5704  Register VOffset = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
 5706  BuildMI(*
MBB, 
MI, 
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32), VOffset)
 
 5711        [=](MachineInstrBuilder &MIB) { MIB.
addReg(AddrDef->Reg); }, 
 
 5712        [=](MachineInstrBuilder &MIB) { MIB.
addReg(VOffset); },      
 
 5713        [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); },    
 
 5714        [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); }      
 
 5717      [=](MachineInstrBuilder &MIB) { MIB.
addReg(AddrDef->Reg); }, 
 
 5718      [=](MachineInstrBuilder &MIB) { MIB.
addReg(VOffset); },      
 
 5719      [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); }      
 
 5724AMDGPUInstructionSelector::selectGlobalSAddr(
MachineOperand &Root)
 const {
 
 5725  return selectGlobalSAddr(Root, 0);
 
 5729AMDGPUInstructionSelector::selectGlobalSAddrCPol(
MachineOperand &Root)
 const {
 
 5735  return selectGlobalSAddr(Root, PassedCPol);
 
 5739AMDGPUInstructionSelector::selectGlobalSAddrCPolM0(
MachineOperand &Root)
 const {
 
 5745  return selectGlobalSAddr(Root, PassedCPol);
 
 5749AMDGPUInstructionSelector::selectGlobalSAddrGLC(
MachineOperand &Root)
 const {
 
 5754AMDGPUInstructionSelector::selectGlobalSAddrNoIOffset(
 
 5761  return selectGlobalSAddr(Root, PassedCPol, 
false);
 
 5765AMDGPUInstructionSelector::selectGlobalSAddrNoIOffsetM0(
 
 5772  return selectGlobalSAddr(Root, PassedCPol, 
false);
 
 5776AMDGPUInstructionSelector::selectScratchSAddr(
MachineOperand &Root)
 const {
 
 5779  int64_t ConstOffset;
 
 5780  int64_t ImmOffset = 0;
 
 5784  std::tie(PtrBase, ConstOffset, std::ignore) =
 
 5785      getPtrBaseWithConstantOffset(Addr, *MRI);
 
 5787  if (ConstOffset != 0 && isFlatScratchBaseLegal(Addr) &&
 
 5791    ImmOffset = ConstOffset;
 
 5795  if (AddrDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
 
 5796    int FI = AddrDef->MI->getOperand(1).
getIndex();
 
 5799        [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); } 
 
 5805  if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
 
 5806    Register LHS = AddrDef->MI->getOperand(1).getReg();
 
 5807    Register RHS = AddrDef->MI->getOperand(2).getReg();
 
 5811    if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX &&
 
 5812        isSGPR(RHSDef->Reg)) {
 
 5813      int FI = LHSDef->MI->getOperand(1).getIndex();
 
 5817      SAddr = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
 
 5819      BuildMI(*BB, &
I, 
DL, TII.get(AMDGPU::S_ADD_I32), SAddr)
 
 5827    return std::nullopt;
 
 5830      [=](MachineInstrBuilder &MIB) { MIB.
addReg(SAddr); }, 
 
 5831      [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); } 
 
 5836bool AMDGPUInstructionSelector::checkFlatScratchSVSSwizzleBug(
 
 5838  if (!Subtarget->hasFlatScratchSVSSwizzleBug())
 
 5844  auto VKnown = 
VT->getKnownBits(VAddr);
 
 5847  uint64_t VMax = VKnown.getMaxValue().getZExtValue();
 
 5848  uint64_t 
SMax = SKnown.getMaxValue().getZExtValue();
 
 5849  return (VMax & 3) + (
SMax & 3) >= 4;
 
 5853AMDGPUInstructionSelector::selectScratchSVAddr(
MachineOperand &Root)
 const {
 
 5856  int64_t ConstOffset;
 
 5857  int64_t ImmOffset = 0;
 
 5861  std::tie(PtrBase, ConstOffset, std::ignore) =
 
 5862      getPtrBaseWithConstantOffset(Addr, *MRI);
 
 5865  if (ConstOffset != 0 &&
 
 5869    ImmOffset = ConstOffset;
 
 5873  if (AddrDef->MI->getOpcode() != AMDGPU::G_PTR_ADD)
 
 5874    return std::nullopt;
 
 5876  Register RHS = AddrDef->MI->getOperand(2).getReg();
 
 5877  if (RBI.getRegBank(
RHS, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID)
 
 5878    return std::nullopt;
 
 5880  Register LHS = AddrDef->MI->getOperand(1).getReg();
 
 5883  if (OrigAddr != Addr) {
 
 5884    if (!isFlatScratchBaseLegalSVImm(OrigAddr))
 
 5885      return std::nullopt;
 
 5887    if (!isFlatScratchBaseLegalSV(OrigAddr))
 
 5888      return std::nullopt;
 
 5891  if (checkFlatScratchSVSSwizzleBug(
RHS, 
LHS, ImmOffset))
 
 5892    return std::nullopt;
 
 5894  unsigned CPol = selectScaleOffset(Root, 
RHS, 
true )
 
 5898  if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
 
 5899    int FI = LHSDef->MI->getOperand(1).getIndex();
 
 5901        [=](MachineInstrBuilder &MIB) { MIB.
addReg(
RHS); },       
 
 5903        [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); }, 
 
 5904        [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }       
 
 5913    return std::nullopt;
 
 5916      [=](MachineInstrBuilder &MIB) { MIB.
addReg(
RHS); },       
 
 5917      [=](MachineInstrBuilder &MIB) { MIB.
addReg(
LHS); },       
 
 5918      [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); }, 
 
 5919      [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }       
 
 5924AMDGPUInstructionSelector::selectMUBUFScratchOffen(
MachineOperand &Root)
 const {
 
 5926  MachineBasicBlock *
MBB = 
MI->getParent();
 
 5928  const SIMachineFunctionInfo *
Info = 
MF->getInfo<SIMachineFunctionInfo>();
 
 5933    Register HighBits = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
 5938    BuildMI(*
MBB, 
MI, 
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
 
 5942    return {{[=](MachineInstrBuilder &MIB) { 
 
 5945             [=](MachineInstrBuilder &MIB) { 
 
 5948             [=](MachineInstrBuilder &MIB) { 
 
 5953             [=](MachineInstrBuilder &MIB) { 
 
 5962  std::optional<int> FI;
 
 5965  const MachineInstr *RootDef = MRI->getVRegDef(Root.
getReg());
 
 5967  int64_t ConstOffset;
 
 5968  std::tie(PtrBase, ConstOffset, std::ignore) =
 
 5969      getPtrBaseWithConstantOffset(VAddr, *MRI);
 
 5970  if (ConstOffset != 0) {
 
 5971    if (TII.isLegalMUBUFImmOffset(ConstOffset) &&
 
 5972        (!STI.privateMemoryResourceIsRangeChecked() ||
 
 5973         VT->signBitIsZero(PtrBase))) {
 
 5974      const MachineInstr *PtrBaseDef = MRI->getVRegDef(PtrBase);
 
 5975      if (PtrBaseDef->
getOpcode() == AMDGPU::G_FRAME_INDEX)
 
 5981  } 
else if (RootDef->
getOpcode() == AMDGPU::G_FRAME_INDEX) {
 
 5985  return {{[=](MachineInstrBuilder &MIB) { 
 
 5988           [=](MachineInstrBuilder &MIB) { 
 
 5994           [=](MachineInstrBuilder &MIB) { 
 
 5999           [=](MachineInstrBuilder &MIB) { 
 
 6004bool AMDGPUInstructionSelector::isDSOffsetLegal(
Register Base,
 
 6009  if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled())
 
 6014  return VT->signBitIsZero(
Base);
 
 6017bool AMDGPUInstructionSelector::isDSOffset2Legal(
Register Base, int64_t Offset0,
 
 6019                                                 unsigned Size)
 const {
 
 6020  if (Offset0 % 
Size != 0 || Offset1 % 
Size != 0)
 
 6025  if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled())
 
 6030  return VT->signBitIsZero(
Base);
 
 6035  return Addr->
getOpcode() == TargetOpcode::G_OR ||
 
 6036         (Addr->
getOpcode() == TargetOpcode::G_PTR_ADD &&
 
 
 6043bool AMDGPUInstructionSelector::isFlatScratchBaseLegal(
Register Addr)
 const {
 
 6051  if (STI.hasSignedScratchOffsets())
 
 6057  if (AddrMI->
getOpcode() == TargetOpcode::G_PTR_ADD) {
 
 6058    std::optional<ValueAndVReg> RhsValReg =
 
 6064    if (RhsValReg && RhsValReg->Value.getSExtValue() < 0 &&
 
 6065        RhsValReg->Value.getSExtValue() > -0x40000000)
 
 6069  return VT->signBitIsZero(
LHS);
 
 6074bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSV(
Register Addr)
 const {
 
 6082  if (STI.hasSignedScratchOffsets())
 
 6087  return VT->signBitIsZero(
RHS) && 
VT->signBitIsZero(
LHS);
 
 6092bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSVImm(
 
 6096  if (STI.hasSignedScratchOffsets())
 
 6101  std::optional<DefinitionAndSourceRegister> BaseDef =
 
 6103  std::optional<ValueAndVReg> RHSOffset =
 
 6113       (RHSOffset->Value.getSExtValue() < 0 &&
 
 6114        RHSOffset->Value.getSExtValue() > -0x40000000)))
 
 6117  Register LHS = BaseDef->MI->getOperand(1).getReg();
 
 6118  Register RHS = BaseDef->MI->getOperand(2).getReg();
 
 6119  return VT->signBitIsZero(
RHS) && 
VT->signBitIsZero(
LHS);
 
 6122bool AMDGPUInstructionSelector::isUnneededShiftMask(
const MachineInstr &
MI,
 
 6123                                                    unsigned ShAmtBits)
 const {
 
 6124  assert(
MI.getOpcode() == TargetOpcode::G_AND);
 
 6126  std::optional<APInt> 
RHS =
 
 6131  if (
RHS->countr_one() >= ShAmtBits)
 
 6134  const APInt &LHSKnownZeros = 
VT->getKnownZeroes(
MI.getOperand(1).getReg());
 
 6135  return (LHSKnownZeros | *
RHS).countr_one() >= ShAmtBits;
 
 6139AMDGPUInstructionSelector::selectMUBUFScratchOffset(
 
 6142  const SIMachineFunctionInfo *
Info = 
MF->getInfo<SIMachineFunctionInfo>();
 
 6144  std::optional<DefinitionAndSourceRegister> 
Def =
 
 6146  assert(Def && 
"this shouldn't be an optional result");
 
 6151        [=](MachineInstrBuilder &MIB) { 
 
 6154        [=](MachineInstrBuilder &MIB) { 
 
 6157        [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); } 
 
 6168    if (!TII.isLegalMUBUFImmOffset(
Offset))
 
 6176        [=](MachineInstrBuilder &MIB) { 
 
 6179        [=](MachineInstrBuilder &MIB) { 
 
 6187      !TII.isLegalMUBUFImmOffset(
Offset))
 
 6191      [=](MachineInstrBuilder &MIB) { 
 
 6194      [=](MachineInstrBuilder &MIB) { 
 
 6201std::pair<Register, unsigned>
 
 6202AMDGPUInstructionSelector::selectDS1Addr1OffsetImpl(
MachineOperand &Root)
 const {
 
 6203  const MachineInstr *RootDef = MRI->getVRegDef(Root.
getReg());
 
 6204  int64_t ConstAddr = 0;
 
 6208  std::tie(PtrBase, 
Offset, std::ignore) =
 
 6209      getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
 
 6212    if (isDSOffsetLegal(PtrBase, 
Offset)) {
 
 6214      return std::pair(PtrBase, 
Offset);
 
 6216  } 
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
 
 6225  return std::pair(Root.
getReg(), 0);
 
 6229AMDGPUInstructionSelector::selectDS1Addr1Offset(
MachineOperand &Root)
 const {
 
 6232  std::tie(
Reg, 
Offset) = selectDS1Addr1OffsetImpl(Root);
 
 6234      [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
 
 6240AMDGPUInstructionSelector::selectDS64Bit4ByteAligned(
MachineOperand &Root)
 const {
 
 6241  return selectDSReadWrite2(Root, 4);
 
 6245AMDGPUInstructionSelector::selectDS128Bit8ByteAligned(
MachineOperand &Root)
 const {
 
 6246  return selectDSReadWrite2(Root, 8);
 
 6250AMDGPUInstructionSelector::selectDSReadWrite2(
MachineOperand &Root,
 
 6251                                              unsigned Size)
 const {
 
 6256      [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
 
 6258      [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Offset+1); }
 
 6262std::pair<Register, unsigned>
 
 6263AMDGPUInstructionSelector::selectDSReadWrite2Impl(
MachineOperand &Root,
 
 6264                                                  unsigned Size)
 const {
 
 6265  const MachineInstr *RootDef = MRI->getVRegDef(Root.
getReg());
 
 6266  int64_t ConstAddr = 0;
 
 6270  std::tie(PtrBase, 
Offset, std::ignore) =
 
 6271      getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
 
 6274    int64_t OffsetValue0 = 
Offset;
 
 6276    if (isDSOffset2Legal(PtrBase, OffsetValue0, OffsetValue1, 
Size)) {
 
 6278      return std::pair(PtrBase, OffsetValue0 / 
Size);
 
 6280  } 
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
 
 6288  return std::pair(Root.
getReg(), 0);
 
 6296std::tuple<Register, int64_t, bool>
 
 6297AMDGPUInstructionSelector::getPtrBaseWithConstantOffset(
 
 6300  if (RootI->
getOpcode() != TargetOpcode::G_PTR_ADD)
 
 6301    return {Root, 0, 
false};
 
 6304  std::optional<ValueAndVReg> MaybeOffset =
 
 6307    return {Root, 0, 
false};
 
 6322  Register RSrc2 = 
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
 
 6323  Register RSrc3 = 
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
 
 6324  Register RSrcHi = 
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
 
 6325  Register RSrc = 
MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
 
 6327  B.buildInstr(AMDGPU::S_MOV_B32)
 
 6330  B.buildInstr(AMDGPU::S_MOV_B32)
 
 6337  B.buildInstr(AMDGPU::REG_SEQUENCE)
 
 6340    .addImm(AMDGPU::sub0)
 
 6342    .addImm(AMDGPU::sub1);
 
 6346    RSrcLo = 
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
 
 6347    B.buildInstr(AMDGPU::S_MOV_B64)
 
 6352  B.buildInstr(AMDGPU::REG_SEQUENCE)
 
 6355    .addImm(AMDGPU::sub0_sub1)
 
 6357    .addImm(AMDGPU::sub2_sub3);
 
 
 6364  uint64_t DefaultFormat = 
TII.getDefaultRsrcDataFormat();
 
 
 6373  uint64_t DefaultFormat = 
TII.getDefaultRsrcDataFormat();
 
 
 6380AMDGPUInstructionSelector::MUBUFAddressData
 
 6381AMDGPUInstructionSelector::parseMUBUFAddress(
Register Src)
 const {
 
 6382  MUBUFAddressData 
Data;
 
 6388  std::tie(PtrBase, 
Offset, std::ignore) =
 
 6389      getPtrBaseWithConstantOffset(Src, *MRI);
 
 6395  if (MachineInstr *InputAdd
 
 6397    Data.N2 = InputAdd->getOperand(1).getReg();
 
 6398    Data.N3 = InputAdd->getOperand(2).getReg();
 
 6413bool AMDGPUInstructionSelector::shouldUseAddr64(MUBUFAddressData Addr)
 const {
 
 6419  const RegisterBank *N0Bank = RBI.getRegBank(Addr.N0, *MRI, TRI);
 
 6420  return N0Bank->
getID() == AMDGPU::VGPRRegBankID;
 
 6426void AMDGPUInstructionSelector::splitIllegalMUBUFOffset(
 
 6428  if (TII.isLegalMUBUFImmOffset(ImmOffset))
 
 6432  SOffset = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
 
 6433  B.buildInstr(AMDGPU::S_MOV_B32)
 
 6439bool AMDGPUInstructionSelector::selectMUBUFAddr64Impl(
 
 6444  if (!STI.hasAddr64() || STI.useFlatForGlobal())
 
 6447  MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
 
 6448  if (!shouldUseAddr64(AddrData))
 
 6454  Offset = AddrData.Offset;
 
 6460    if (RBI.getRegBank(N2, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
 
 6462      if (RBI.getRegBank(N3, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
 
 6475  } 
else if (RBI.getRegBank(N0, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
 
 6486  splitIllegalMUBUFOffset(
B, SOffset, 
Offset);
 
 6490bool AMDGPUInstructionSelector::selectMUBUFOffsetImpl(
 
 6495  if (STI.useFlatForGlobal())
 
 6498  MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
 
 6499  if (shouldUseAddr64(AddrData))
 
 6505  Offset = AddrData.Offset;
 
 6511  splitIllegalMUBUFOffset(
B, SOffset, 
Offset);
 
 6516AMDGPUInstructionSelector::selectMUBUFAddr64(
MachineOperand &Root)
 const {
 
 6522  if (!selectMUBUFAddr64Impl(Root, VAddr, RSrcReg, SOffset, 
Offset))
 
 6528      [=](MachineInstrBuilder &MIB) {  
 
 6531      [=](MachineInstrBuilder &MIB) { 
 
 6534      [=](MachineInstrBuilder &MIB) { 
 
 6537        else if (STI.hasRestrictedSOffset())
 
 6538          MIB.
addReg(AMDGPU::SGPR_NULL);
 
 6542      [=](MachineInstrBuilder &MIB) { 
 
 6552AMDGPUInstructionSelector::selectMUBUFOffset(
MachineOperand &Root)
 const {
 
 6557  if (!selectMUBUFOffsetImpl(Root, RSrcReg, SOffset, 
Offset))
 
 6561      [=](MachineInstrBuilder &MIB) {  
 
 6564      [=](MachineInstrBuilder &MIB) { 
 
 6567        else if (STI.hasRestrictedSOffset())
 
 6568          MIB.
addReg(AMDGPU::SGPR_NULL);
 
 6580AMDGPUInstructionSelector::selectBUFSOffset(
MachineOperand &Root)
 const {
 
 6585    SOffset = AMDGPU::SGPR_NULL;
 
 6587  return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); }}};
 
 6591static std::optional<uint64_t>
 
 6595  if (!OffsetVal || !
isInt<32>(*OffsetVal))
 
 6596    return std::nullopt;
 
 6597  return Lo_32(*OffsetVal);
 
 
 6601AMDGPUInstructionSelector::selectSMRDBufferImm(
MachineOperand &Root)
 const {
 
 6602  std::optional<uint64_t> OffsetVal =
 
 6607  std::optional<int64_t> EncodedImm =
 
 6612  return {{ [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedImm); }  }};
 
 6616AMDGPUInstructionSelector::selectSMRDBufferImm32(
MachineOperand &Root)
 const {
 
 6623  std::optional<int64_t> EncodedImm =
 
 6628  return {{ [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedImm); }  }};
 
 6632AMDGPUInstructionSelector::selectSMRDBufferSgprImm(
MachineOperand &Root)
 const {
 
 6640    return std::nullopt;
 
 6642  std::optional<int64_t> EncodedOffset =
 
 6645    return std::nullopt;
 
 6648  return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); },
 
 6649           [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedOffset); }}};
 
 6652std::pair<Register, unsigned>
 
 6653AMDGPUInstructionSelector::selectVOP3PMadMixModsImpl(
MachineOperand &Root,
 
 6654                                                     bool &Matched)
 const {
 
 6659  std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
 
 6669    const auto CheckAbsNeg = [&]() {
 
 6674        std::tie(Src, ModsTmp) = selectVOP3ModsImpl(Src);
 
 6705AMDGPUInstructionSelector::selectVOP3PMadMixModsExt(
 
 6710  std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
 
 6715      [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
 
 6716      [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); } 
 
 6721AMDGPUInstructionSelector::selectVOP3PMadMixMods(
MachineOperand &Root)
 const {
 
 6725  std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
 
 6728      [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
 
 6729      [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); } 
 
 6733bool AMDGPUInstructionSelector::selectSBarrierSignalIsfirst(
 
 6737  Register CCReg = 
I.getOperand(0).getReg();
 
 6742  BuildMI(*
MBB, &
I, 
DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM))
 
 6743      .
addImm(
I.getOperand(2).getImm());
 
 6747  I.eraseFromParent();
 
 6748  return RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32_XM0_XEXECRegClass,
 
 6752bool AMDGPUInstructionSelector::selectSGetBarrierState(
 
 6756  MachineOperand BarOp = 
I.getOperand(2);
 
 6757  std::optional<int64_t> BarValImm =
 
 6761    auto CopyMIB = 
BuildMI(*
MBB, &
I, 
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
 
 6765  MachineInstrBuilder MIB;
 
 6766  unsigned Opc = BarValImm ? AMDGPU::S_GET_BARRIER_STATE_IMM
 
 6767                           : AMDGPU::S_GET_BARRIER_STATE_M0;
 
 6770  auto DstReg = 
I.getOperand(0).getReg();
 
 6771  const TargetRegisterClass *DstRC =
 
 6772      TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
 
 6773  if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
 
 6779  I.eraseFromParent();
 
 6784  if (HasInlineConst) {
 
 6788    case Intrinsic::amdgcn_s_barrier_join:
 
 6789      return AMDGPU::S_BARRIER_JOIN_IMM;
 
 6790    case Intrinsic::amdgcn_s_get_named_barrier_state:
 
 6791      return AMDGPU::S_GET_BARRIER_STATE_IMM;
 
 6797    case Intrinsic::amdgcn_s_barrier_join:
 
 6798      return AMDGPU::S_BARRIER_JOIN_M0;
 
 6799    case Intrinsic::amdgcn_s_get_named_barrier_state:
 
 6800      return AMDGPU::S_GET_BARRIER_STATE_M0;
 
 
 6805bool AMDGPUInstructionSelector::selectNamedBarrierInit(
 
 6809  MachineOperand BarOp = 
I.getOperand(1);
 
 6810  MachineOperand CntOp = 
I.getOperand(2);
 
 6813  Register TmpReg0 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
 
 6819  Register TmpReg1 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
 
 6826  Register TmpReg2 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
 
 6832  Register TmpReg3 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
 
 6833  constexpr unsigned ShAmt = 16;
 
 6839  Register TmpReg4 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
 
 6849  unsigned Opc = IntrID == Intrinsic::amdgcn_s_barrier_init
 
 6850                     ? AMDGPU::S_BARRIER_INIT_M0
 
 6851                     : AMDGPU::S_BARRIER_SIGNAL_M0;
 
 6852  MachineInstrBuilder MIB;
 
 6855  I.eraseFromParent();
 
 6859bool AMDGPUInstructionSelector::selectNamedBarrierInst(
 
 6863  MachineOperand BarOp = IntrID == Intrinsic::amdgcn_s_get_named_barrier_state
 
 6866  std::optional<int64_t> BarValImm =
 
 6871    Register TmpReg0 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
 
 6877    Register TmpReg1 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
 
 6883    auto CopyMIB = 
BuildMI(*
MBB, &
I, 
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
 
 6888  MachineInstrBuilder MIB;
 
 6892  if (IntrID == Intrinsic::amdgcn_s_get_named_barrier_state) {
 
 6893    auto DstReg = 
I.getOperand(0).getReg();
 
 6894    const TargetRegisterClass *DstRC =
 
 6895        TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
 
 6896    if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
 
 6902    auto BarId = ((*BarValImm) >> 4) & 0x3F;
 
 6906  I.eraseFromParent();
 
 6913  assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT && 
OpIdx == -1 &&
 
 6914         "Expected G_CONSTANT");
 
 6915  MIB.
addImm(
MI.getOperand(1).getCImm()->getSExtValue());
 
 6921  assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT && 
OpIdx == -1 &&
 
 6922         "Expected G_CONSTANT");
 
 6923  MIB.
addImm(-
MI.getOperand(1).getCImm()->getSExtValue());
 
 6929  const MachineOperand &
Op = 
MI.getOperand(1);
 
 6930  assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT && 
OpIdx == -1);
 
 6931  MIB.
addImm(
Op.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
 
 6937  assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT && 
OpIdx == -1 &&
 
 6938         "Expected G_CONSTANT");
 
 6939  MIB.
addImm(
MI.getOperand(1).getCImm()->getValue().popcount());
 
 6947  const MachineOperand &
Op = 
MI.getOperand(
OpIdx);
 
 6964  assert(
OpIdx >= 0 && 
"expected to match an immediate operand");
 
 6968void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_0_0(
 
 6970  assert(
OpIdx >= 0 && 
"expected to match an immediate operand");
 
 6975void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_0_1(
 
 6977  assert(
OpIdx >= 0 && 
"expected to match an immediate operand");
 
 6983void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_1_0(
 
 6985  assert(
OpIdx >= 0 && 
"expected to match an immediate operand");
 
 6990void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_1_1(
 
 6992  assert(
OpIdx >= 0 && 
"expected to match an immediate operand");
 
 6998void AMDGPUInstructionSelector::renderDstSelToOpSelXForm(
 
 7000  assert(
OpIdx >= 0 && 
"expected to match an immediate operand");
 
 7005void AMDGPUInstructionSelector::renderSrcSelToOpSelXForm(
 
 7007  assert(
OpIdx >= 0 && 
"expected to match an immediate operand");
 
 7012void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_2_0(
 
 7014  assert(
OpIdx >= 0 && 
"expected to match an immediate operand");
 
 7019void AMDGPUInstructionSelector::renderDstSelToOpSel3XFormXForm(
 
 7021  assert(
OpIdx >= 0 && 
"expected to match an immediate operand");
 
 7030  assert(
OpIdx >= 0 && 
"expected to match an immediate operand");
 
 7039  assert(
OpIdx >= 0 && 
"expected to match an immediate operand");
 
 7046void AMDGPUInstructionSelector::renderExtractCpolSetGLC(
 
 7048  assert(
OpIdx >= 0 && 
"expected to match an immediate operand");
 
 7049  const uint32_t Cpol = 
MI.getOperand(
OpIdx).getImm() &
 
 7064  const APFloat &APF = 
MI.getOperand(1).getFPImm()->getValueAPF();
 
 7066  assert(ExpVal != INT_MIN);
 
 7084  if (
MI.getOperand(
OpIdx).getImm())
 
 7086  MIB.
addImm((int64_t)Mods);
 
 7093  if (
MI.getOperand(
OpIdx).getImm())
 
 7095  MIB.
addImm((int64_t)Mods);
 
 7101  unsigned Val = 
MI.getOperand(
OpIdx).getImm();
 
 7109  MIB.
addImm((int64_t)Mods);
 
 7115  uint32_t 
V = 
MI.getOperand(2).getImm();
 
 7118  if (!Subtarget->hasSafeCUPrefetch())
 
 7124void AMDGPUInstructionSelector::renderScaledMAIIntrinsicOperand(
 
 7126  unsigned Val = 
MI.getOperand(
OpIdx).getImm();
 
 7135bool AMDGPUInstructionSelector::isInlineImmediate(
const APInt &Imm)
 const {
 
 7136  return TII.isInlineConstant(Imm);
 
 7139bool AMDGPUInstructionSelector::isInlineImmediate(
const APFloat &Imm)
 const {
 
 7140  return TII.isInlineConstant(Imm);
 
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
static unsigned getIntrinsicID(const SDNode *N)
#define GET_GLOBALISEL_PREDICATES_INIT
#define GET_GLOBALISEL_TEMPORARIES_INIT
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static Register getLegalRegBank(Register NewReg, Register RootReg, const AMDGPURegisterBankInfo &RBI, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const SIInstrInfo &TII)
static bool isShlHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is shift left with half bits, such as reg0:2n =G_SHL reg1:2n, CONST(n)
static bool isNoUnsignedWrap(MachineInstr *Addr)
static Register buildOffsetSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
unsigned getNamedBarrierOp(bool HasInlineConst, Intrinsic::ID IntrID)
static bool checkRB(Register Reg, unsigned int RBNo, const AMDGPURegisterBankInfo &RBI, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI)
static unsigned updateMods(SrcStatus HiStat, SrcStatus LoStat, unsigned Mods)
static bool isTruncHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is truncating to half, such as reg0:n = G_TRUNC reg1:2n
static Register getWaveAddress(const MachineInstr *Def)
static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)
static bool shouldUseAndMask(unsigned Size, unsigned &Mask)
static std::pair< unsigned, uint8_t > BitOp3_Op(Register R, SmallVectorImpl< Register > &Src, const MachineRegisterInfo &MRI)
static TypeClass isVectorOfTwoOrScalar(Register Reg, const MachineRegisterInfo &MRI)
static bool isLaneMaskFromSameBlock(Register Reg, MachineRegisterInfo &MRI, MachineBasicBlock *MBB)
static bool parseTexFail(uint64_t TexFailCtrl, bool &TFE, bool &LWE, bool &IsTexFail)
static void addZeroImm(MachineInstrBuilder &MIB)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
static bool isConstant(const MachineInstr &MI)
static bool isSameBitWidth(Register Reg1, Register Reg2, const MachineRegisterInfo &MRI)
static Register buildRegSequence(SmallVectorImpl< Register > &Elts, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
static Register buildRSRC(MachineIRBuilder &B, MachineRegisterInfo &MRI, uint32_t FormatLo, uint32_t FormatHi, Register BasePtr)
Return a resource descriptor for use with an arbitrary 64-bit pointer.
static std::pair< Register, unsigned > computeIndirectRegIndex(MachineRegisterInfo &MRI, const SIRegisterInfo &TRI, const TargetRegisterClass *SuperRC, Register IdxReg, unsigned EltSize, GISelValueTracking &ValueTracking)
Return the register to use for the index value, and the subregister to use for the indirectly accesse...
static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64)
static std::pair< Register, SrcStatus > getLastSameOrNeg(Register Reg, const MachineRegisterInfo &MRI, SearchOptions SO, int MaxDepth=3)
static Register stripCopy(Register Reg, MachineRegisterInfo &MRI)
static std::optional< std::pair< Register, SrcStatus > > calcNextStatus(std::pair< Register, SrcStatus > Curr, const MachineRegisterInfo &MRI)
static Register stripBitCast(Register Reg, MachineRegisterInfo &MRI)
static std::optional< uint64_t > getConstantZext32Val(Register Reg, const MachineRegisterInfo &MRI)
Get an immediate that must be 32-bits, and treated as zero extended.
static bool isValidToPack(SrcStatus HiStat, SrcStatus LoStat, Register NewReg, Register RootReg, const SIInstrInfo &TII, const MachineRegisterInfo &MRI)
static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size, const GCNSubtarget &ST)
static SmallVector< std::pair< Register, SrcStatus > > getSrcStats(Register Reg, const MachineRegisterInfo &MRI, SearchOptions SO, int MaxDepth=3)
static bool isUnmergeHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test function, if the MI is reg0:n, reg1:n = G_UNMERGE_VALUES reg2:2n
static SrcStatus getNegStatus(Register Reg, SrcStatus S, const MachineRegisterInfo &MRI)
static bool isVCmpResult(Register Reg, MachineRegisterInfo &MRI)
static Register buildAddr64RSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
static bool isLshrHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is logic shift right with half bits, such as reg0:2n =G_LSHR reg1:2n,...
static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, SmallVectorImpl< Register > &Elts, Register &Src, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
This file declares the targeting of the InstructionSelector class for AMDGPU.
AMDGPU Register Bank Select
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
Contains matchers for matching SSA Machine Instructions.
Machine Check Debug Module
This file declares the MachineIRBuilder class.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
MachineInstr unsigned OpIdx
static std::vector< std::pair< int, unsigned > > Swizzle(std::vector< std::pair< int, unsigned > > Src, R600InstrInfo::BankSwizzle Swz)
This is used to control valid status that current MI supports.
bool checkOptions(SrcStatus Stat) const
SearchOptions(Register Reg, const MachineRegisterInfo &MRI)
AMDGPUInstructionSelector(const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI, const AMDGPUTargetMachine &TM)
static const char * getName()
bool select(MachineInstr &I) override
Select the (possibly generic) instruction I to only use target-specific opcodes.
void setupMF(MachineFunction &MF, GISelValueTracking *VT, CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) override
Setup per-MF executor state.
uint32_t getLDSSize() const
LLVM_READONLY int getExactLog2Abs() const
Class for arbitrary precision integers.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
bool isFPPredicate() const
bool isIntPredicate() const
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
LLVM_ABI DILocation * get() const
Get the underlying DILocation.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
std::optional< SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > > ComplexRendererFns
virtual void setupMF(MachineFunction &mf, GISelValueTracking *vt, CodeGenCoverage *covinfo=nullptr, ProfileSummaryInfo *psi=nullptr, BlockFrequencyInfo *bfi=nullptr)
Setup per-MF executor state.
CodeGenCoverage * CoverageInfo
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
TypeSize getValue() const
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void setReturnAddressIsTaken(bool s)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Helper class to build MachineInstr.
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & setOperandDead(unsigned OpIdx) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
LocationSize getSize() const
Return the size in bytes of the memory reference.
unsigned getAddrSpace() const
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const ConstantInt * getCImm() const
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
ArrayRef< int > getShuffleMask() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
bool isInternalRead() const
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Analysis providing profile information.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
unsigned getID() const
Return the register class ID number.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
@ BUFFER_RESOURCE
Address space for 128-bit buffer resources.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
LLVM_READONLY const MIMGG16MappingInfo * getMIMGG16MappingInfo(unsigned G)
LLVM_READONLY int getGlobalSaddrOp(uint16_t Opcode)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isGFX12Plus(const MCSubtargetInfo &STI)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfo(unsigned DimEnum)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
Intrinsic::ID getIntrinsicID(const MachineInstr &I)
Return the intrinsic ID for opcodes with the G_AMDGPU_INTRIN_ prefix.
std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg, GISelValueTracking *ValueTracking=nullptr, bool CheckNUW=false)
Returns base register and constant offset.
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
IndexMode
ARM Index Modes.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(const APInt &RequestedValue)
Matches a constant equal to RequestedValue.
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
UnaryOp_match< SrcTy, TargetOpcode::COPY > m_Copy(SrcTy &&Src)
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_XOR, true > m_GXor(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_SEXT > m_GSExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_FPEXT > m_GFPExt(const SrcTy &Src)
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
ConstantMatch< APInt > m_ICst(APInt &Cst)
SpecificConstantMatch m_AllOnesInt()
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
ImplicitDefMatch m_GImplicitDef()
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
BinaryOp_match< LHS, RHS, TargetOpcode::G_ASHR, false > m_GAShr(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
SpecificRegisterMatch m_SpecificReg(Register RequestedReg)
Matches a register only if it is equal to RequestedReg.
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
UnaryOp_match< SrcTy, TargetOpcode::G_FABS > m_GFabs(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ANYEXT > m_GAnyExt(const SrcTy &Src)
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, TargetOpcode::G_MUL, true > m_GMul(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
NodeAddr< DefNode * > Def
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
FunctionAddr VTableAddr Value
LLVM_ABI Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
PointerUnion< const TargetRegisterClass *, const RegisterBank * > RegClassOrRegBank
Convenient type to represent either a register class or a register bank.
LLVM_ABI const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
LLVM_ABI std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
FunctionAddr VTableAddr uintptr_t uintptr_t Data
unsigned getUndefRegState(bool B)
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
LLVM_ABI std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
LLVM_ABI Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
@ Default
The result values are uniform if and only if all operands are uniform.
unsigned AtomicNoRetBaseOpcode
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
int64_t Offset
Offset - This is an offset from the base Value*.
PointerUnion< const Value *, const PseudoSourceValue * > V
This is the IR pointer value for the access, or it is null if unknown.