doxygen/AMDGPUISelDAGToDAG_8cpp_source.html

//===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//==-----------------------------------------------------------------------===//

//

/// \file

/// Defines an instruction selector for the AMDGPU target.

//

//===----------------------------------------------------------------------===//


#include "AMDGPUISelDAGToDAG.h"

#include "AMDGPU.h"

#include "AMDGPUInstrInfo.h"

#include "AMDGPUSubtarget.h"

#include "AMDGPUTargetMachine.h"

#include "MCTargetDesc/AMDGPUMCTargetDesc.h"

#include "MCTargetDesc/R600MCTargetDesc.h"

#include "R600RegisterInfo.h"

#include "SIISelLowering.h"

#include "SIMachineFunctionInfo.h"

#include "llvm/Analysis/UniformityAnalysis.h"

#include "llvm/Analysis/ValueTracking.h"

#include "llvm/CodeGen/FunctionLoweringInfo.h"

#include "llvm/CodeGen/SelectionDAG.h"

#include "llvm/CodeGen/SelectionDAGISel.h"

#include "llvm/CodeGen/SelectionDAGNodes.h"

#include "llvm/IR/IntrinsicsAMDGPU.h"

#include "llvm/InitializePasses.h"

#include "llvm/Support/ErrorHandling.h"


#ifdef EXPENSIVE_CHECKS

#include "llvm/Analysis/LoopInfo.h"

#include "llvm/IR/Dominators.h"

#endif


#define DEBUG_TYPE "amdgpu-isel"


using namespace llvm;


//===----------------------------------------------------------------------===//

// Instruction Selector Implementation

//===----------------------------------------------------------------------===//


namespace {

static SDValue stripBitcast(SDValue Val) {

  return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;

}


// Figure out if this is really an extract of the high 16-bits of a dword.

static bool isExtractHiElt(SDValue In, SDValue &Out) {

  In = stripBitcast(In);


  if (In.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {

    if (ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(In.getOperand(1))) {

      if (!Idx->isOne())

        return false;

      Out = In.getOperand(0);

      return true;

    }

  }


  if (In.getOpcode() != ISD::TRUNCATE)

    return false;


  SDValue Srl = In.getOperand(0);

  if (Srl.getOpcode() == ISD::SRL) {

    if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {

      if (ShiftAmt->getZExtValue() == 16) {

        Out = stripBitcast(Srl.getOperand(0));

        return true;

      }

    }

  }


  return false;

}


// Look through operations that obscure just looking at the low 16-bits of the

// same register.

static SDValue stripExtractLoElt(SDValue In) {

  if (In.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {

    SDValue Idx = In.getOperand(1);

    if (isNullConstant(Idx) && In.getValueSizeInBits() <= 32)

      return In.getOperand(0);

  }


  if (In.getOpcode() == ISD::TRUNCATE) {

    SDValue Src = In.getOperand(0);

    if (Src.getValueType().getSizeInBits() == 32)

      return stripBitcast(Src);

  }


  return In;

}


} // end anonymous namespace


INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel",

                      "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)

INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo)

INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysis)

INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)

#ifdef EXPENSIVE_CHECKS

INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)

INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)

#endif

INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "amdgpu-isel",

                    "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)


/// This pass converts a legalized DAG into a AMDGPU-specific

// DAG, ready for instruction scheduling.

FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM,

                                        CodeGenOptLevel OptLevel) {

  return new AMDGPUDAGToDAGISel(TM, OptLevel);

}


AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM,

                                       CodeGenOptLevel OptLevel)

    : SelectionDAGISel(ID, TM, OptLevel) {

  EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;

}


bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {

#ifdef EXPENSIVE_CHECKS

  DominatorTree & DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();

  LoopInfo * LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();

  for (auto &L : LI->getLoopsInPreorder()) {

    assert(L->isLCSSAForm(DT));

  }

#endif

  Subtarget = &MF.getSubtarget<GCNSubtarget>();

  Mode = SIModeRegisterDefaults(MF.getFunction(), *Subtarget);

  return SelectionDAGISel::runOnMachineFunction(MF);

}


bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(unsigned Opc) const {

  // XXX - only need to list legal operations.

  switch (Opc) {

  case ISD::FADD:

  case ISD::FSUB:

  case ISD::FMUL:

  case ISD::FDIV:

  case ISD::FREM:

  case ISD::FCANONICALIZE:

  case ISD::UINT_TO_FP:

  case ISD::SINT_TO_FP:

  case ISD::FABS:

    // Fabs is lowered to a bit operation, but it's an and which will clear the

    // high bits anyway.

  case ISD::FSQRT:

  case ISD::FSIN:

  case ISD::FCOS:

  case ISD::FPOWI:

  case ISD::FPOW:

  case ISD::FLOG:

  case ISD::FLOG2:

  case ISD::FLOG10:

  case ISD::FEXP:

  case ISD::FEXP2:

  case ISD::FCEIL:

  case ISD::FTRUNC:

  case ISD::FRINT:

  case ISD::FNEARBYINT:

  case ISD::FROUNDEVEN:

  case ISD::FROUND:

  case ISD::FFLOOR:

  case ISD::FMINNUM:

  case ISD::FMAXNUM:

  case ISD::FLDEXP:

  case AMDGPUISD::FRACT:

  case AMDGPUISD::CLAMP:

  case AMDGPUISD::COS_HW:

  case AMDGPUISD::SIN_HW:

  case AMDGPUISD::FMIN3:

  case AMDGPUISD::FMAX3:

  case AMDGPUISD::FMED3:

  case AMDGPUISD::FMAD_FTZ:

  case AMDGPUISD::RCP:

  case AMDGPUISD::RSQ:

  case AMDGPUISD::RCP_IFLAG:

    // On gfx10, all 16-bit instructions preserve the high bits.

    return Subtarget->getGeneration() <= AMDGPUSubtarget::GFX9;

  case ISD::FP_ROUND:

    // We may select fptrunc (fma/mad) to mad_mixlo, which does not zero the

    // high bits on gfx9.

    // TODO: If we had the source node we could see if the source was fma/mad

    return Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS;

  case ISD::FMA:

  case ISD::FMAD:

  case AMDGPUISD::DIV_FIXUP:

    return Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS;

  default:

    // fcopysign, select and others may be lowered to 32-bit bit operations

    // which don't zero the high bits.

    return false;

  }

}


void AMDGPUDAGToDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {

  AU.addRequired<AMDGPUArgumentUsageInfo>();

  AU.addRequired<UniformityInfoWrapperPass>();

#ifdef EXPENSIVE_CHECKS

  AU.addRequired<DominatorTreeWrapperPass>();

  AU.addRequired<LoopInfoWrapperPass>();

#endif

  SelectionDAGISel::getAnalysisUsage(AU);

}


bool AMDGPUDAGToDAGISel::matchLoadD16FromBuildVector(SDNode *N) const {

  assert(Subtarget->d16PreservesUnusedBits());

  MVT VT = N->getValueType(0).getSimpleVT();

  if (VT != MVT::v2i16 && VT != MVT::v2f16)

    return false;


  SDValue Lo = N->getOperand(0);

  SDValue Hi = N->getOperand(1);


  LoadSDNode *LdHi = dyn_cast<LoadSDNode>(stripBitcast(Hi));


  // build_vector lo, (load ptr) -> load_d16_hi ptr, lo

  // build_vector lo, (zextload ptr from i8) -> load_d16_hi_u8 ptr, lo

  // build_vector lo, (sextload ptr from i8) -> load_d16_hi_i8 ptr, lo


  // Need to check for possible indirect dependencies on the other half of the

  // vector to avoid introducing a cycle.

  if (LdHi && Hi.hasOneUse() && !LdHi->isPredecessorOf(Lo.getNode())) {

    SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);


    SDValue TiedIn = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Lo);

    SDValue Ops[] = {

      LdHi->getChain(), LdHi->getBasePtr(), TiedIn

    };


    unsigned LoadOp = AMDGPUISD::LOAD_D16_HI;

    if (LdHi->getMemoryVT() == MVT::i8) {

      LoadOp = LdHi->getExtensionType() == ISD::SEXTLOAD ?

        AMDGPUISD::LOAD_D16_HI_I8 : AMDGPUISD::LOAD_D16_HI_U8;

    } else {

      assert(LdHi->getMemoryVT() == MVT::i16);

    }


    SDValue NewLoadHi =

      CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdHi), VTList,

                                  Ops, LdHi->getMemoryVT(),

                                  LdHi->getMemOperand());


    CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadHi);

    CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdHi, 1), NewLoadHi.getValue(1));

    return true;

  }


  // build_vector (load ptr), hi -> load_d16_lo ptr, hi

  // build_vector (zextload ptr from i8), hi -> load_d16_lo_u8 ptr, hi

  // build_vector (sextload ptr from i8), hi -> load_d16_lo_i8 ptr, hi

  LoadSDNode *LdLo = dyn_cast<LoadSDNode>(stripBitcast(Lo));

  if (LdLo && Lo.hasOneUse()) {

    SDValue TiedIn = getHi16Elt(Hi);

    if (!TiedIn || LdLo->isPredecessorOf(TiedIn.getNode()))

      return false;


    SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);

    unsigned LoadOp = AMDGPUISD::LOAD_D16_LO;

    if (LdLo->getMemoryVT() == MVT::i8) {

      LoadOp = LdLo->getExtensionType() == ISD::SEXTLOAD ?

        AMDGPUISD::LOAD_D16_LO_I8 : AMDGPUISD::LOAD_D16_LO_U8;

    } else {

      assert(LdLo->getMemoryVT() == MVT::i16);

    }


    TiedIn = CurDAG->getNode(ISD::BITCAST, SDLoc(N), VT, TiedIn);


    SDValue Ops[] = {

      LdLo->getChain(), LdLo->getBasePtr(), TiedIn

    };


    SDValue NewLoadLo =

      CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdLo), VTList,

                                  Ops, LdLo->getMemoryVT(),

                                  LdLo->getMemOperand());


    CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadLo);

    CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdLo, 1), NewLoadLo.getValue(1));

    return true;

  }


  return false;

}


void AMDGPUDAGToDAGISel::PreprocessISelDAG() {

  if (!Subtarget->d16PreservesUnusedBits())

    return;


  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();


  bool MadeChange = false;

  while (Position != CurDAG->allnodes_begin()) {

    SDNode *N = &*--Position;

    if (N->use_empty())

      continue;


    switch (N->getOpcode()) {

    case ISD::BUILD_VECTOR:

      // TODO: Match load d16 from shl (extload:i16), 16

      MadeChange |= matchLoadD16FromBuildVector(N);

      break;

    default:

      break;

    }

  }


  if (MadeChange) {

    CurDAG->RemoveDeadNodes();

    LLVM_DEBUG(dbgs() << "After PreProcess:\n";

               CurDAG->dump(););

  }

}


bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const {

  if (N->isUndef())

    return true;


  const SIInstrInfo *TII = Subtarget->getInstrInfo();

  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))

    return TII->isInlineConstant(C->getAPIntValue());


  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))

    return TII->isInlineConstant(C->getValueAPF());


  return false;

}


/// Determine the register class for \p OpNo

/// \returns The register class of the virtual register that will be used for

/// the given operand number \OpNo or NULL if the register class cannot be

/// determined.

const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,

                                                          unsigned OpNo) const {

  if (!N->isMachineOpcode()) {

    if (N->getOpcode() == ISD::CopyToReg) {

      Register Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();

      if (Reg.isVirtual()) {

        MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo();

        return MRI.getRegClass(Reg);

      }


      const SIRegisterInfo *TRI

        = static_cast<const GCNSubtarget *>(Subtarget)->getRegisterInfo();

      return TRI->getPhysRegBaseClass(Reg);

    }


    return nullptr;

  }


  switch (N->getMachineOpcode()) {

  default: {

    const MCInstrDesc &Desc =

        Subtarget->getInstrInfo()->get(N->getMachineOpcode());

    unsigned OpIdx = Desc.getNumDefs() + OpNo;

    if (OpIdx >= Desc.getNumOperands())

      return nullptr;

    int RegClass = Desc.operands()[OpIdx].RegClass;

    if (RegClass == -1)

      return nullptr;


    return Subtarget->getRegisterInfo()->getRegClass(RegClass);

  }

  case AMDGPU::REG_SEQUENCE: {

    unsigned RCID = N->getConstantOperandVal(0);

    const TargetRegisterClass *SuperRC =

        Subtarget->getRegisterInfo()->getRegClass(RCID);


    SDValue SubRegOp = N->getOperand(OpNo + 1);

    unsigned SubRegIdx = SubRegOp->getAsZExtVal();

    return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,

                                                              SubRegIdx);

  }

  }

}


SDNode *AMDGPUDAGToDAGISel::glueCopyToOp(SDNode *N, SDValue NewChain,

                                         SDValue Glue) const {

  SmallVector <SDValue, 8> Ops;

  Ops.push_back(NewChain); // Replace the chain.

  for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)

    Ops.push_back(N->getOperand(i));


  Ops.push_back(Glue);

  return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);

}


SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N, SDValue Val) const {

  const SITargetLowering& Lowering =

    *static_cast<const SITargetLowering*>(getTargetLowering());


  assert(N->getOperand(0).getValueType() == MVT::Other && "Expected chain");


  SDValue M0 = Lowering.copyToM0(*CurDAG, N->getOperand(0), SDLoc(N), Val);

  return glueCopyToOp(N, M0, M0.getValue(1));

}


SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(SDNode *N) const {

  unsigned AS = cast<MemSDNode>(N)->getAddressSpace();

  if (AS == AMDGPUAS::LOCAL_ADDRESS) {

    if (Subtarget->ldsRequiresM0Init())

      return glueCopyToM0(N, CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));

  } else if (AS == AMDGPUAS::REGION_ADDRESS) {

    MachineFunction &MF = CurDAG->getMachineFunction();

    unsigned Value = MF.getInfo<SIMachineFunctionInfo>()->getGDSSize();

    return

        glueCopyToM0(N, CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i32));

  }

  return N;

}


MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm,

                                                  EVT VT) const {

  SDNode *Lo = CurDAG->getMachineNode(

      AMDGPU::S_MOV_B32, DL, MVT::i32,

      CurDAG->getTargetConstant(Imm & 0xFFFFFFFF, DL, MVT::i32));

  SDNode *Hi =

      CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,

                             CurDAG->getTargetConstant(Imm >> 32, DL, MVT::i32));

  const SDValue Ops[] = {

      CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),

      SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),

      SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)};


  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);

}


void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {

  EVT VT = N->getValueType(0);

  unsigned NumVectorElts = VT.getVectorNumElements();

  EVT EltVT = VT.getVectorElementType();

  SDLoc DL(N);

  SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);


  if (NumVectorElts == 1) {

    CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),

                         RegClass);

    return;

  }


  assert(NumVectorElts <= 32 && "Vectors with more than 32 elements not "

                                  "supported yet");

  // 32 = Max Num Vector Elements

  // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)

  // 1 = Vector Register Class

  SmallVector<SDValue, 32 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);


  bool IsGCN = CurDAG->getSubtarget().getTargetTriple().getArch() ==

               Triple::amdgcn;

  RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);

  bool IsRegSeq = true;

  unsigned NOps = N->getNumOperands();

  for (unsigned i = 0; i < NOps; i++) {

    // XXX: Why is this here?

    if (isa<RegisterSDNode>(N->getOperand(i))) {

      IsRegSeq = false;

      break;

    }

    unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i)

                         : R600RegisterInfo::getSubRegFromChannel(i);

    RegSeqArgs[1 + (2 * i)] = N->getOperand(i);

    RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32);

  }

  if (NOps != NumVectorElts) {

    // Fill in the missing undef elements if this was a scalar_to_vector.

    assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);

    MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,

                                                   DL, EltVT);

    for (unsigned i = NOps; i < NumVectorElts; ++i) {

      unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i)

                           : R600RegisterInfo::getSubRegFromChannel(i);

      RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);

      RegSeqArgs[1 + (2 * i) + 1] =

          CurDAG->getTargetConstant(Sub, DL, MVT::i32);

    }

  }


  if (!IsRegSeq)

    SelectCode(N);

  CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);

}


void AMDGPUDAGToDAGISel::Select(SDNode *N) {

  unsigned int Opc = N->getOpcode();

  if (N->isMachineOpcode()) {

    N->setNodeId(-1);

    return;   // Already selected.

  }


  // isa<MemSDNode> almost works but is slightly too permissive for some DS

  // intrinsics.

  if (Opc == ISD::LOAD || Opc == ISD::STORE || isa<AtomicSDNode>(N) ||

      Opc == AMDGPUISD::ATOMIC_LOAD_FMIN ||

      Opc == AMDGPUISD::ATOMIC_LOAD_FMAX) {

    N = glueCopyToM0LDSInit(N);

    SelectCode(N);

    return;

  }


  switch (Opc) {

  default:

    break;

  // We are selecting i64 ADD here instead of custom lower it during

  // DAG legalization, so we can fold some i64 ADDs used for address

  // calculation into the LOAD and STORE instructions.

  case ISD::ADDC:

  case ISD::ADDE:

  case ISD::SUBC:

  case ISD::SUBE: {

    if (N->getValueType(0) != MVT::i64)

      break;


    SelectADD_SUB_I64(N);

    return;

  }

  case ISD::UADDO_CARRY:

  case ISD::USUBO_CARRY:

    if (N->getValueType(0) != MVT::i32)

      break;


    SelectAddcSubb(N);

    return;

  case ISD::UADDO:

  case ISD::USUBO: {

    SelectUADDO_USUBO(N);

    return;

  }

  case AMDGPUISD::FMUL_W_CHAIN: {

    SelectFMUL_W_CHAIN(N);

    return;

  }

  case AMDGPUISD::FMA_W_CHAIN: {

    SelectFMA_W_CHAIN(N);

    return;

  }


  case ISD::SCALAR_TO_VECTOR:

  case ISD::BUILD_VECTOR: {

    EVT VT = N->getValueType(0);

    unsigned NumVectorElts = VT.getVectorNumElements();

    if (VT.getScalarSizeInBits() == 16) {

      if (Opc == ISD::BUILD_VECTOR && NumVectorElts == 2) {

        if (SDNode *Packed = packConstantV2I16(N, *CurDAG)) {

          ReplaceNode(N, Packed);

          return;

        }

      }


      break;

    }


    assert(VT.getVectorElementType().bitsEq(MVT::i32));

    unsigned RegClassID =

        SIRegisterInfo::getSGPRClassForBitWidth(NumVectorElts * 32)->getID();

    SelectBuildVector(N, RegClassID);

    return;

  }

  case ISD::BUILD_PAIR: {

    SDValue RC, SubReg0, SubReg1;

    SDLoc DL(N);

    if (N->getValueType(0) == MVT::i128) {

      RC = CurDAG->getTargetConstant(AMDGPU::SGPR_128RegClassID, DL, MVT::i32);

      SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);

      SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);

    } else if (N->getValueType(0) == MVT::i64) {

      RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);

      SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);

      SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);

    } else {

      llvm_unreachable("Unhandled value type for BUILD_PAIR");

    }

    const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,

                            N->getOperand(1), SubReg1 };

    ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,

                                          N->getValueType(0), Ops));

    return;

  }


  case ISD::Constant:

  case ISD::ConstantFP: {

    if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))

      break;


    uint64_t Imm;

    if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N)) {

      Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();

      if (AMDGPU::isValid32BitLiteral(Imm, true))

        break;

    } else {

      ConstantSDNode *C = cast<ConstantSDNode>(N);

      Imm = C->getZExtValue();

      if (AMDGPU::isValid32BitLiteral(Imm, false))

        break;

    }


    SDLoc DL(N);

    ReplaceNode(N, buildSMovImm64(DL, Imm, N->getValueType(0)));

    return;

  }

  case AMDGPUISD::BFE_I32:

  case AMDGPUISD::BFE_U32: {

    // There is a scalar version available, but unlike the vector version which

    // has a separate operand for the offset and width, the scalar version packs

    // the width and offset into a single operand. Try to move to the scalar

    // version if the offsets are constant, so that we can try to keep extended

    // loads of kernel arguments in SGPRs.


    // TODO: Technically we could try to pattern match scalar bitshifts of

    // dynamic values, but it's probably not useful.

    ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));

    if (!Offset)

      break;


    ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));

    if (!Width)

      break;


    bool Signed = Opc == AMDGPUISD::BFE_I32;


    uint32_t OffsetVal = Offset->getZExtValue();

    uint32_t WidthVal = Width->getZExtValue();


    ReplaceNode(N, getBFE32(Signed, SDLoc(N), N->getOperand(0), OffsetVal,

                            WidthVal));

    return;

  }

  case AMDGPUISD::DIV_SCALE: {

    SelectDIV_SCALE(N);

    return;

  }

  case AMDGPUISD::MAD_I64_I32:

  case AMDGPUISD::MAD_U64_U32: {

    SelectMAD_64_32(N);

    return;

  }

  case ISD::SMUL_LOHI:

  case ISD::UMUL_LOHI:

    return SelectMUL_LOHI(N);

  case ISD::CopyToReg: {

    const SITargetLowering& Lowering =

      *static_cast<const SITargetLowering*>(getTargetLowering());

    N = Lowering.legalizeTargetIndependentNode(N, *CurDAG);

    break;

  }

  case ISD::AND:

  case ISD::SRL:

  case ISD::SRA:

  case ISD::SIGN_EXTEND_INREG:

    if (N->getValueType(0) != MVT::i32)

      break;


    SelectS_BFE(N);

    return;

  case ISD::BRCOND:

    SelectBRCOND(N);

    return;

  case ISD::FP_EXTEND:

    SelectFP_EXTEND(N);

    return;

  case AMDGPUISD::CVT_PKRTZ_F16_F32:

  case AMDGPUISD::CVT_PKNORM_I16_F32:

  case AMDGPUISD::CVT_PKNORM_U16_F32:

  case AMDGPUISD::CVT_PK_U16_U32:

  case AMDGPUISD::CVT_PK_I16_I32: {

    // Hack around using a legal type if f16 is illegal.

    if (N->getValueType(0) == MVT::i32) {

      MVT NewVT = Opc == AMDGPUISD::CVT_PKRTZ_F16_F32 ? MVT::v2f16 : MVT::v2i16;

      N = CurDAG->MorphNodeTo(N, N->getOpcode(), CurDAG->getVTList(NewVT),

                              { N->getOperand(0), N->getOperand(1) });

      SelectCode(N);

      return;

    }


    break;

  }

  case ISD::INTRINSIC_W_CHAIN: {

    SelectINTRINSIC_W_CHAIN(N);

    return;

  }

  case ISD::INTRINSIC_WO_CHAIN: {

    SelectINTRINSIC_WO_CHAIN(N);

    return;

  }

  case ISD::INTRINSIC_VOID: {

    SelectINTRINSIC_VOID(N);

    return;

  }

  case AMDGPUISD::WAVE_ADDRESS: {

    SelectWAVE_ADDRESS(N);

    return;

  }

  case ISD::STACKRESTORE: {

    SelectSTACKRESTORE(N);

    return;

  }

  }


  SelectCode(N);

}


bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {

  const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();

  const Instruction *Term = BB->getTerminator();

  return Term->getMetadata("amdgpu.uniform") ||

         Term->getMetadata("structurizecfg.uniform");

}


bool AMDGPUDAGToDAGISel::isUnneededShiftMask(const SDNode *N,

                                             unsigned ShAmtBits) const {

  assert(N->getOpcode() == ISD::AND);


  const APInt &RHS = N->getConstantOperandAPInt(1);

  if (RHS.countr_one() >= ShAmtBits)

    return true;


  const APInt &LHSKnownZeros = CurDAG->computeKnownBits(N->getOperand(0)).Zero;

  return (LHSKnownZeros | RHS).countr_one() >= ShAmtBits;

}


static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr,

                                          SDValue &N0, SDValue &N1) {

  if (Addr.getValueType() == MVT::i64 && Addr.getOpcode() == ISD::BITCAST &&

      Addr.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {

    // As we split 64-bit `or` earlier, it's complicated pattern to match, i.e.

    // (i64 (bitcast (v2i32 (build_vector

    //                        (or (extract_vector_elt V, 0), OFFSET),

    //                        (extract_vector_elt V, 1)))))

    SDValue Lo = Addr.getOperand(0).getOperand(0);

    if (Lo.getOpcode() == ISD::OR && DAG.isBaseWithConstantOffset(Lo)) {

      SDValue BaseLo = Lo.getOperand(0);

      SDValue BaseHi = Addr.getOperand(0).getOperand(1);

      // Check that split base (Lo and Hi) are extracted from the same one.

      if (BaseLo.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&

          BaseHi.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&

          BaseLo.getOperand(0) == BaseHi.getOperand(0) &&

          // Lo is statically extracted from index 0.

          isa<ConstantSDNode>(BaseLo.getOperand(1)) &&

          BaseLo.getConstantOperandVal(1) == 0 &&

          // Hi is statically extracted from index 0.

          isa<ConstantSDNode>(BaseHi.getOperand(1)) &&

          BaseHi.getConstantOperandVal(1) == 1) {

        N0 = BaseLo.getOperand(0).getOperand(0);

        N1 = Lo.getOperand(1);

        return true;

      }

    }

  }

  return false;

}


bool AMDGPUDAGToDAGISel::isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS,

                                                    SDValue &RHS) const {

  if (CurDAG->isBaseWithConstantOffset(Addr)) {

    LHS = Addr.getOperand(0);

    RHS = Addr.getOperand(1);

    return true;

  }


  if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, LHS, RHS)) {

    assert(LHS && RHS && isa<ConstantSDNode>(RHS));

    return true;

  }


  return false;

}


StringRef AMDGPUDAGToDAGISel::getPassName() const {

  return "AMDGPU DAG->DAG Pattern Instruction Selection";

}


//===----------------------------------------------------------------------===//

// Complex Patterns

//===----------------------------------------------------------------------===//


bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,

                                            SDValue &Offset) {

  return false;

}


bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,

                                            SDValue &Offset) {

  ConstantSDNode *C;

  SDLoc DL(Addr);


  if ((C = dyn_cast<ConstantSDNode>(Addr))) {

    Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);

    Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);

  } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&

             (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {

    Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);

    Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);

  } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&

            (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {

    Base = Addr.getOperand(0);

    Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);

  } else {

    Base = Addr;

    Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);

  }


  return true;

}


SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val,

                                                       const SDLoc &DL) const {

  SDNode *Mov = CurDAG->getMachineNode(

    AMDGPU::S_MOV_B32, DL, MVT::i32,

    CurDAG->getTargetConstant(Val, DL, MVT::i32));

  return SDValue(Mov, 0);

}


// FIXME: Should only handle uaddo_carry/usubo_carry

void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {

  SDLoc DL(N);

  SDValue LHS = N->getOperand(0);

  SDValue RHS = N->getOperand(1);


  unsigned Opcode = N->getOpcode();

  bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE);

  bool ProduceCarry =

      ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC;

  bool IsAdd = Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE;


  SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);

  SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);


  SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,

                                       DL, MVT::i32, LHS, Sub0);

  SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,

                                       DL, MVT::i32, LHS, Sub1);


  SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,

                                       DL, MVT::i32, RHS, Sub0);

  SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,

                                       DL, MVT::i32, RHS, Sub1);


  SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);


  static const unsigned OpcMap[2][2][2] = {

      {{AMDGPU::S_SUB_U32, AMDGPU::S_ADD_U32},

       {AMDGPU::V_SUB_CO_U32_e32, AMDGPU::V_ADD_CO_U32_e32}},

      {{AMDGPU::S_SUBB_U32, AMDGPU::S_ADDC_U32},

       {AMDGPU::V_SUBB_U32_e32, AMDGPU::V_ADDC_U32_e32}}};


  unsigned Opc = OpcMap[0][N->isDivergent()][IsAdd];

  unsigned CarryOpc = OpcMap[1][N->isDivergent()][IsAdd];


  SDNode *AddLo;

  if (!ConsumeCarry) {

    SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };

    AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args);

  } else {

    SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) };

    AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args);

  }

  SDValue AddHiArgs[] = {

    SDValue(Hi0, 0),

    SDValue(Hi1, 0),

    SDValue(AddLo, 1)

  };

  SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs);


  SDValue RegSequenceArgs[] = {

    CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),

    SDValue(AddLo,0),

    Sub0,

    SDValue(AddHi,0),

    Sub1,

  };

  SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,

                                               MVT::i64, RegSequenceArgs);


  if (ProduceCarry) {

    // Replace the carry-use

    ReplaceUses(SDValue(N, 1), SDValue(AddHi, 1));

  }


  // Replace the remaining uses.

  ReplaceNode(N, RegSequence);

}


void AMDGPUDAGToDAGISel::SelectAddcSubb(SDNode *N) {

  SDLoc DL(N);

  SDValue LHS = N->getOperand(0);

  SDValue RHS = N->getOperand(1);

  SDValue CI = N->getOperand(2);


  if (N->isDivergent()) {

    unsigned Opc = N->getOpcode() == ISD::UADDO_CARRY ? AMDGPU::V_ADDC_U32_e64

                                                      : AMDGPU::V_SUBB_U32_e64;

    CurDAG->SelectNodeTo(

        N, Opc, N->getVTList(),

        {LHS, RHS, CI,

         CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});

  } else {

    unsigned Opc = N->getOpcode() == ISD::UADDO_CARRY ? AMDGPU::S_ADD_CO_PSEUDO

                                                      : AMDGPU::S_SUB_CO_PSEUDO;

    CurDAG->SelectNodeTo(N, Opc, N->getVTList(), {LHS, RHS, CI});

  }

}


void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {

  // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned

  // carry out despite the _i32 name. These were renamed in VI to _U32.

  // FIXME: We should probably rename the opcodes here.

  bool IsAdd = N->getOpcode() == ISD::UADDO;

  bool IsVALU = N->isDivergent();


  for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); UI != E;

       ++UI)

    if (UI.getUse().getResNo() == 1) {

      if ((IsAdd && (UI->getOpcode() != ISD::UADDO_CARRY)) ||

          (!IsAdd && (UI->getOpcode() != ISD::USUBO_CARRY))) {

        IsVALU = true;

        break;

      }

    }


  if (IsVALU) {

    unsigned Opc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;


    CurDAG->SelectNodeTo(

        N, Opc, N->getVTList(),

        {N->getOperand(0), N->getOperand(1),

         CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});

  } else {

    unsigned Opc = N->getOpcode() == ISD::UADDO ? AMDGPU::S_UADDO_PSEUDO

                                                : AMDGPU::S_USUBO_PSEUDO;


    CurDAG->SelectNodeTo(N, Opc, N->getVTList(),

                         {N->getOperand(0), N->getOperand(1)});

  }

}


void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {

  SDLoc SL(N);

  //  src0_modifiers, src0,  src1_modifiers, src1, src2_modifiers, src2, clamp, omod

  SDValue Ops[10];


  SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);

  SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);

  SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]);

  Ops[8] = N->getOperand(0);

  Ops[9] = N->getOperand(4);


  // If there are no source modifiers, prefer fmac over fma because it can use

  // the smaller VOP2 encoding.

  bool UseFMAC = Subtarget->hasDLInsts() &&

                 cast<ConstantSDNode>(Ops[0])->isZero() &&

                 cast<ConstantSDNode>(Ops[2])->isZero() &&

                 cast<ConstantSDNode>(Ops[4])->isZero();

  unsigned Opcode = UseFMAC ? AMDGPU::V_FMAC_F32_e64 : AMDGPU::V_FMA_F32_e64;

  CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), Ops);

}


void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {

  SDLoc SL(N);

  //    src0_modifiers, src0,  src1_modifiers, src1, clamp, omod

  SDValue Ops[8];


  SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);

  SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);

  Ops[6] = N->getOperand(0);

  Ops[7] = N->getOperand(3);


  CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops);

}


// We need to handle this here because tablegen doesn't support matching

// instructions with multiple outputs.

void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {

  SDLoc SL(N);

  EVT VT = N->getValueType(0);


  assert(VT == MVT::f32 || VT == MVT::f64);


  unsigned Opc

    = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64_e64 : AMDGPU::V_DIV_SCALE_F32_e64;


  // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp,

  // omod

  SDValue Ops[8];

  SelectVOP3BMods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);

  SelectVOP3BMods(N->getOperand(1), Ops[3], Ops[2]);

  SelectVOP3BMods(N->getOperand(2), Ops[5], Ops[4]);

  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);

}


// We need to handle this here because tablegen doesn't support matching

// instructions with multiple outputs.

void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {

  SDLoc SL(N);

  bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32;

  unsigned Opc;

  if (Subtarget->hasMADIntraFwdBug())

    Opc = Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64

                 : AMDGPU::V_MAD_U64_U32_gfx11_e64;

  else

    Opc = Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;


  SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);

  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),

                    Clamp };

  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);

}


// We need to handle this here because tablegen doesn't support matching

// instructions with multiple outputs.

void AMDGPUDAGToDAGISel::SelectMUL_LOHI(SDNode *N) {

  SDLoc SL(N);

  bool Signed = N->getOpcode() == ISD::SMUL_LOHI;

  unsigned Opc;

  if (Subtarget->hasMADIntraFwdBug())

    Opc = Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64

                 : AMDGPU::V_MAD_U64_U32_gfx11_e64;

  else

    Opc = Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;


  SDValue Zero = CurDAG->getTargetConstant(0, SL, MVT::i64);

  SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);

  SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Zero, Clamp};

  SDNode *Mad = CurDAG->getMachineNode(Opc, SL, N->getVTList(), Ops);

  if (!SDValue(N, 0).use_empty()) {

    SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32);

    SDNode *Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,

                                        MVT::i32, SDValue(Mad, 0), Sub0);

    ReplaceUses(SDValue(N, 0), SDValue(Lo, 0));

  }

  if (!SDValue(N, 1).use_empty()) {

    SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32);

    SDNode *Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,

                                        MVT::i32, SDValue(Mad, 0), Sub1);

    ReplaceUses(SDValue(N, 1), SDValue(Hi, 0));

  }

  CurDAG->RemoveDeadNode(N);

}


bool AMDGPUDAGToDAGISel::isDSOffsetLegal(SDValue Base, unsigned Offset) const {

  if (!isUInt<16>(Offset))

    return false;


  if (!Base || Subtarget->hasUsableDSOffset() ||

      Subtarget->unsafeDSOffsetFoldingEnabled())

    return true;


  // On Southern Islands instruction with a negative base value and an offset

  // don't seem to work.

  return CurDAG->SignBitIsZero(Base);

}


bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,

                                              SDValue &Offset) const {

  SDLoc DL(Addr);

  if (CurDAG->isBaseWithConstantOffset(Addr)) {

    SDValue N0 = Addr.getOperand(0);

    SDValue N1 = Addr.getOperand(1);

    ConstantSDNode *C1 = cast<ConstantSDNode>(N1);

    if (isDSOffsetLegal(N0, C1->getSExtValue())) {

      // (add n0, c0)

      Base = N0;

      Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);

      return true;

    }

  } else if (Addr.getOpcode() == ISD::SUB) {

    // sub C, x -> add (sub 0, x), C

    if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {

      int64_t ByteOffset = C->getSExtValue();

      if (isDSOffsetLegal(SDValue(), ByteOffset)) {

        SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);


        // XXX - This is kind of hacky. Create a dummy sub node so we can check

        // the known bits in isDSOffsetLegal. We need to emit the selected node

        // here, so this is thrown away.

        SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,

                                      Zero, Addr.getOperand(1));


        if (isDSOffsetLegal(Sub, ByteOffset)) {

          SmallVector<SDValue, 3> Opnds;

          Opnds.push_back(Zero);

          Opnds.push_back(Addr.getOperand(1));


          // FIXME: Select to VOP3 version for with-carry.

          unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;

          if (Subtarget->hasAddNoCarry()) {

            SubOp = AMDGPU::V_SUB_U32_e64;

            Opnds.push_back(

                CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit

          }


          MachineSDNode *MachineSub =

              CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds);


          Base = SDValue(MachineSub, 0);

          Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);

          return true;

        }

      }

    }

  } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {

    // If we have a constant address, prefer to put the constant into the

    // offset. This can save moves to load the constant address since multiple

    // operations can share the zero base address register, and enables merging

    // into read2 / write2 instructions.


    SDLoc DL(Addr);


    if (isDSOffsetLegal(SDValue(), CAddr->getZExtValue())) {

      SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);

      MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,

                                 DL, MVT::i32, Zero);

      Base = SDValue(MovZero, 0);

      Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);

      return true;

    }

  }


  // default case

  Base = Addr;

  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);

  return true;

}


bool AMDGPUDAGToDAGISel::isDSOffset2Legal(SDValue Base, unsigned Offset0,

                                          unsigned Offset1,

                                          unsigned Size) const {

  if (Offset0 % Size != 0 || Offset1 % Size != 0)

    return false;

  if (!isUInt<8>(Offset0 / Size) || !isUInt<8>(Offset1 / Size))

    return false;


  if (!Base || Subtarget->hasUsableDSOffset() ||

      Subtarget->unsafeDSOffsetFoldingEnabled())

    return true;


  // On Southern Islands instruction with a negative base value and an offset

  // don't seem to work.

  return CurDAG->SignBitIsZero(Base);

}


// Return whether the operation has NoUnsignedWrap property.

static bool isNoUnsignedWrap(SDValue Addr) {

  return (Addr.getOpcode() == ISD::ADD &&

          Addr->getFlags().hasNoUnsignedWrap()) ||

         Addr->getOpcode() == ISD::OR;

}


// Check that the base address of flat scratch load/store in the form of `base +

// offset` is legal to be put in SGPR/VGPR (i.e. unsigned per hardware

// requirement). We always treat the first operand as the base address here.

bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegal(SDValue Addr) const {

  if (isNoUnsignedWrap(Addr))

    return true;


  // Starting with GFX12, VADDR and SADDR fields in VSCRATCH can use negative

  // values.

  if (Subtarget->hasSignedScratchOffsets())

    return true;


  auto LHS = Addr.getOperand(0);

  auto RHS = Addr.getOperand(1);


  // If the immediate offset is negative and within certain range, the base

  // address cannot also be negative. If the base is also negative, the sum

  // would be either negative or much larger than the valid range of scratch

  // memory a thread can access.

  ConstantSDNode *ImmOp = nullptr;

  if (Addr.getOpcode() == ISD::ADD && (ImmOp = dyn_cast<ConstantSDNode>(RHS))) {

    if (ImmOp->getSExtValue() < 0 && ImmOp->getSExtValue() > -0x40000000)

      return true;

  }


  return CurDAG->SignBitIsZero(LHS);

}


// Check address value in SGPR/VGPR are legal for flat scratch in the form

// of: SGPR + VGPR.

bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSV(SDValue Addr) const {

  if (isNoUnsignedWrap(Addr))

    return true;


  // Starting with GFX12, VADDR and SADDR fields in VSCRATCH can use negative

  // values.

  if (Subtarget->hasSignedScratchOffsets())

    return true;


  auto LHS = Addr.getOperand(0);

  auto RHS = Addr.getOperand(1);

  return CurDAG->SignBitIsZero(RHS) && CurDAG->SignBitIsZero(LHS);

}


// Check address value in SGPR/VGPR are legal for flat scratch in the form

// of: SGPR + VGPR + Imm.

bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSVImm(SDValue Addr) const {

  // Starting with GFX12, VADDR and SADDR fields in VSCRATCH can use negative

  // values.

  if (AMDGPU::isGFX12Plus(*Subtarget))

    return true;


  auto Base = Addr.getOperand(0);

  auto *RHSImm = cast<ConstantSDNode>(Addr.getOperand(1));

  // If the immediate offset is negative and within certain range, the base

  // address cannot also be negative. If the base is also negative, the sum

  // would be either negative or much larger than the valid range of scratch

  // memory a thread can access.

  if (isNoUnsignedWrap(Base) &&

      (isNoUnsignedWrap(Addr) ||

       (RHSImm->getSExtValue() < 0 && RHSImm->getSExtValue() > -0x40000000)))

    return true;


  auto LHS = Base.getOperand(0);

  auto RHS = Base.getOperand(1);

  return CurDAG->SignBitIsZero(RHS) && CurDAG->SignBitIsZero(LHS);

}


// TODO: If offset is too big, put low 16-bit into offset.

bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,

                                                   SDValue &Offset0,

                                                   SDValue &Offset1) const {

  return SelectDSReadWrite2(Addr, Base, Offset0, Offset1, 4);

}


bool AMDGPUDAGToDAGISel::SelectDS128Bit8ByteAligned(SDValue Addr, SDValue &Base,

                                                    SDValue &Offset0,

                                                    SDValue &Offset1) const {

  return SelectDSReadWrite2(Addr, Base, Offset0, Offset1, 8);

}


bool AMDGPUDAGToDAGISel::SelectDSReadWrite2(SDValue Addr, SDValue &Base,

                                            SDValue &Offset0, SDValue &Offset1,

                                            unsigned Size) const {

  SDLoc DL(Addr);


  if (CurDAG->isBaseWithConstantOffset(Addr)) {

    SDValue N0 = Addr.getOperand(0);

    SDValue N1 = Addr.getOperand(1);

    ConstantSDNode *C1 = cast<ConstantSDNode>(N1);

    unsigned OffsetValue0 = C1->getZExtValue();

    unsigned OffsetValue1 = OffsetValue0 + Size;


    // (add n0, c0)

    if (isDSOffset2Legal(N0, OffsetValue0, OffsetValue1, Size)) {

      Base = N0;

      Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);

      Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);

      return true;

    }

  } else if (Addr.getOpcode() == ISD::SUB) {

    // sub C, x -> add (sub 0, x), C

    if (const ConstantSDNode *C =

            dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {

      unsigned OffsetValue0 = C->getZExtValue();

      unsigned OffsetValue1 = OffsetValue0 + Size;


      if (isDSOffset2Legal(SDValue(), OffsetValue0, OffsetValue1, Size)) {

        SDLoc DL(Addr);

        SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);


        // XXX - This is kind of hacky. Create a dummy sub node so we can check

        // the known bits in isDSOffsetLegal. We need to emit the selected node

        // here, so this is thrown away.

        SDValue Sub =

            CurDAG->getNode(ISD::SUB, DL, MVT::i32, Zero, Addr.getOperand(1));


        if (isDSOffset2Legal(Sub, OffsetValue0, OffsetValue1, Size)) {

          SmallVector<SDValue, 3> Opnds;

          Opnds.push_back(Zero);

          Opnds.push_back(Addr.getOperand(1));

          unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;

          if (Subtarget->hasAddNoCarry()) {

            SubOp = AMDGPU::V_SUB_U32_e64;

            Opnds.push_back(

                CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit

          }


          MachineSDNode *MachineSub = CurDAG->getMachineNode(

              SubOp, DL, MVT::getIntegerVT(Size * 8), Opnds);


          Base = SDValue(MachineSub, 0);

          Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);

          Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);

          return true;

        }

      }

    }

  } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {

    unsigned OffsetValue0 = CAddr->getZExtValue();

    unsigned OffsetValue1 = OffsetValue0 + Size;


    if (isDSOffset2Legal(SDValue(), OffsetValue0, OffsetValue1, Size)) {

      SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);

      MachineSDNode *MovZero =

          CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, DL, MVT::i32, Zero);

      Base = SDValue(MovZero, 0);

      Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);

      Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);

      return true;

    }

  }


  // default case


  Base = Addr;

  Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);

  Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);

  return true;

}


bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &VAddr,

                                     SDValue &SOffset, SDValue &Offset,

                                     SDValue &Offen, SDValue &Idxen,

                                     SDValue &Addr64) const {

  // Subtarget prefers to use flat instruction

  // FIXME: This should be a pattern predicate and not reach here

  if (Subtarget->useFlatForGlobal())

    return false;


  SDLoc DL(Addr);


  Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);

  Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);

  Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);

  SOffset = Subtarget->hasRestrictedSOffset()

                ? CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32)

                : CurDAG->getTargetConstant(0, DL, MVT::i32);


  ConstantSDNode *C1 = nullptr;

  SDValue N0 = Addr;

  if (CurDAG->isBaseWithConstantOffset(Addr)) {

    C1 = cast<ConstantSDNode>(Addr.getOperand(1));

    if (isUInt<32>(C1->getZExtValue()))

      N0 = Addr.getOperand(0);

    else

      C1 = nullptr;

  }


  if (N0.getOpcode() == ISD::ADD) {

    // (add N2, N3) -> addr64, or

    // (add (add N2, N3), C1) -> addr64

    SDValue N2 = N0.getOperand(0);

    SDValue N3 = N0.getOperand(1);

    Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);


    if (N2->isDivergent()) {

      if (N3->isDivergent()) {

        // Both N2 and N3 are divergent. Use N0 (the result of the add) as the

        // addr64, and construct the resource from a 0 address.

        Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);

        VAddr = N0;

      } else {

        // N2 is divergent, N3 is not.

        Ptr = N3;

        VAddr = N2;

      }

    } else {

      // N2 is not divergent.

      Ptr = N2;

      VAddr = N3;

    }

    Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);

  } else if (N0->isDivergent()) {

    // N0 is divergent. Use it as the addr64, and construct the resource from a

    // 0 address.

    Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);

    VAddr = N0;

    Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);

  } else {

    // N0 -> offset, or

    // (N0 + C1) -> offset

    VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);

    Ptr = N0;

  }


  if (!C1) {

    // No offset.

    Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);

    return true;

  }


  const SIInstrInfo *TII = Subtarget->getInstrInfo();

  if (TII->isLegalMUBUFImmOffset(C1->getZExtValue())) {

    // Legal offset for instruction.

    Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);

    return true;

  }


  // Illegal offset, store it in soffset.

  Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);

  SOffset =

      SDValue(CurDAG->getMachineNode(

                  AMDGPU::S_MOV_B32, DL, MVT::i32,

                  CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),

              0);

  return true;

}


bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,

                                           SDValue &VAddr, SDValue &SOffset,

                                           SDValue &Offset) const {

  SDValue Ptr, Offen, Idxen, Addr64;


  // addr64 bit was removed for volcanic islands.

  // FIXME: This should be a pattern predicate and not reach here

  if (!Subtarget->hasAddr64())

    return false;


  if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64))

    return false;


  ConstantSDNode *C = cast<ConstantSDNode>(Addr64);

  if (C->getSExtValue()) {

    SDLoc DL(Addr);


    const SITargetLowering& Lowering =

      *static_cast<const SITargetLowering*>(getTargetLowering());


    SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);

    return true;

  }


  return false;

}


std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {

  SDLoc DL(N);


  auto *FI = dyn_cast<FrameIndexSDNode>(N);

  SDValue TFI =

      FI ? CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)) : N;


  // We rebase the base address into an absolute stack address and hence

  // use constant 0 for soffset. This value must be retained until

  // frame elimination and eliminateFrameIndex will choose the appropriate

  // frame register if need be.

  return std::pair(TFI, CurDAG->getTargetConstant(0, DL, MVT::i32));

}


bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,

                                                 SDValue Addr, SDValue &Rsrc,

                                                 SDValue &VAddr, SDValue &SOffset,

                                                 SDValue &ImmOffset) const {


  SDLoc DL(Addr);

  MachineFunction &MF = CurDAG->getMachineFunction();

  const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();


  Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);


  if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {

    int64_t Imm = CAddr->getSExtValue();

    const int64_t NullPtr =

        AMDGPUTargetMachine::getNullPointerValue(AMDGPUAS::PRIVATE_ADDRESS);

    // Don't fold null pointer.

    if (Imm != NullPtr) {

      const uint32_t MaxOffset = SIInstrInfo::getMaxMUBUFImmOffset(*Subtarget);

      SDValue HighBits =

          CurDAG->getTargetConstant(Imm & ~MaxOffset, DL, MVT::i32);

      MachineSDNode *MovHighBits = CurDAG->getMachineNode(

        AMDGPU::V_MOV_B32_e32, DL, MVT::i32, HighBits);

      VAddr = SDValue(MovHighBits, 0);


      SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);

      ImmOffset = CurDAG->getTargetConstant(Imm & MaxOffset, DL, MVT::i32);

      return true;

    }

  }


  if (CurDAG->isBaseWithConstantOffset(Addr)) {

    // (add n0, c1)


    SDValue N0 = Addr.getOperand(0);

    uint64_t C1 = Addr.getConstantOperandVal(1);


    // Offsets in vaddr must be positive if range checking is enabled.

    //

    // The total computation of vaddr + soffset + offset must not overflow.  If

    // vaddr is negative, even if offset is 0 the sgpr offset add will end up

    // overflowing.

    //

    // Prior to gfx9, MUBUF instructions with the vaddr offset enabled would

    // always perform a range check. If a negative vaddr base index was used,

    // this would fail the range check. The overall address computation would

    // compute a valid address, but this doesn't happen due to the range

    // check. For out-of-bounds MUBUF loads, a 0 is returned.

    //

    // Therefore it should be safe to fold any VGPR offset on gfx9 into the

    // MUBUF vaddr, but not on older subtargets which can only do this if the

    // sign bit is known 0.

    const SIInstrInfo *TII = Subtarget->getInstrInfo();

    if (TII->isLegalMUBUFImmOffset(C1) &&

        (!Subtarget->privateMemoryResourceIsRangeChecked() ||

         CurDAG->SignBitIsZero(N0))) {

      std::tie(VAddr, SOffset) = foldFrameIndex(N0);

      ImmOffset = CurDAG->getTargetConstant(C1, DL, MVT::i32);

      return true;

    }

  }


  // (node)

  std::tie(VAddr, SOffset) = foldFrameIndex(Addr);

  ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);

  return true;

}


static bool IsCopyFromSGPR(const SIRegisterInfo &TRI, SDValue Val) {

  if (Val.getOpcode() != ISD::CopyFromReg)

    return false;

  auto Reg = cast<RegisterSDNode>(Val.getOperand(1))->getReg();

  if (!Reg.isPhysical())

    return false;

  auto RC = TRI.getPhysRegBaseClass(Reg);

  return RC && TRI.isSGPRClass(RC);

}


bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,

                                                  SDValue Addr,

                                                  SDValue &SRsrc,

                                                  SDValue &SOffset,

                                                  SDValue &Offset) const {

  const SIRegisterInfo *TRI =

      static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());

  const SIInstrInfo *TII = Subtarget->getInstrInfo();

  MachineFunction &MF = CurDAG->getMachineFunction();

  const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();

  SDLoc DL(Addr);


  // CopyFromReg <sgpr>

  if (IsCopyFromSGPR(*TRI, Addr)) {

    SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);

    SOffset = Addr;

    Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);

    return true;

  }


  ConstantSDNode *CAddr;

  if (Addr.getOpcode() == ISD::ADD) {

    // Add (CopyFromReg <sgpr>) <constant>

    CAddr = dyn_cast<ConstantSDNode>(Addr.getOperand(1));

    if (!CAddr || !TII->isLegalMUBUFImmOffset(CAddr->getZExtValue()))

      return false;

    if (!IsCopyFromSGPR(*TRI, Addr.getOperand(0)))

      return false;


    SOffset = Addr.getOperand(0);

  } else if ((CAddr = dyn_cast<ConstantSDNode>(Addr)) &&

             TII->isLegalMUBUFImmOffset(CAddr->getZExtValue())) {

    // <constant>

    SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);

  } else {

    return false;

  }


  SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);


  Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i32);

  return true;

}


bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,

                                           SDValue &SOffset, SDValue &Offset

                                           ) const {

  SDValue Ptr, VAddr, Offen, Idxen, Addr64;

  const SIInstrInfo *TII = Subtarget->getInstrInfo();


  if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64))

    return false;


  if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&

      !cast<ConstantSDNode>(Idxen)->getSExtValue() &&

      !cast<ConstantSDNode>(Addr64)->getSExtValue()) {

    uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |

                    APInt::getAllOnes(32).getZExtValue(); // Size

    SDLoc DL(Addr);


    const SITargetLowering& Lowering =

      *static_cast<const SITargetLowering*>(getTargetLowering());


    SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);

    return true;

  }

  return false;

}


bool AMDGPUDAGToDAGISel::SelectBUFSOffset(SDValue ByteOffsetNode,

                                          SDValue &SOffset) const {

  if (Subtarget->hasRestrictedSOffset() && isNullConstant(ByteOffsetNode)) {

    SOffset = CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32);

    return true;

  }


  SOffset = ByteOffsetNode;

  return true;

}


// Find a load or store from corresponding pattern root.

// Roots may be build_vector, bitconvert or their combinations.

static MemSDNode* findMemSDNode(SDNode *N) {

  N = AMDGPUTargetLowering::stripBitcast(SDValue(N,0)).getNode();

  if (MemSDNode *MN = dyn_cast<MemSDNode>(N))

    return MN;

  assert(isa<BuildVectorSDNode>(N));

  for (SDValue V : N->op_values())

    if (MemSDNode *MN =

          dyn_cast<MemSDNode>(AMDGPUTargetLowering::stripBitcast(V)))

      return MN;

  llvm_unreachable("cannot find MemSDNode in the pattern!");

}


bool AMDGPUDAGToDAGISel::SelectFlatOffsetImpl(SDNode *N, SDValue Addr,

                                              SDValue &VAddr, SDValue &Offset,

                                              uint64_t FlatVariant) const {

  int64_t OffsetVal = 0;


  unsigned AS = findMemSDNode(N)->getAddressSpace();


  bool CanHaveFlatSegmentOffsetBug =

      Subtarget->hasFlatSegmentOffsetBug() &&

      FlatVariant == SIInstrFlags::FLAT &&

      (AS == AMDGPUAS::FLAT_ADDRESS || AS == AMDGPUAS::GLOBAL_ADDRESS);


  if (Subtarget->hasFlatInstOffsets() && !CanHaveFlatSegmentOffsetBug) {

    SDValue N0, N1;

    if (isBaseWithConstantOffset64(Addr, N0, N1) &&

        (FlatVariant != SIInstrFlags::FlatScratch ||

         isFlatScratchBaseLegal(Addr))) {

      int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();


      const SIInstrInfo *TII = Subtarget->getInstrInfo();

      if (TII->isLegalFLATOffset(COffsetVal, AS, FlatVariant)) {

        Addr = N0;

        OffsetVal = COffsetVal;

      } else {

        // If the offset doesn't fit, put the low bits into the offset field and

        // add the rest.

        //

        // For a FLAT instruction the hardware decides whether to access

        // global/scratch/shared memory based on the high bits of vaddr,

        // ignoring the offset field, so we have to ensure that when we add

        // remainder to vaddr it still points into the same underlying object.

        // The easiest way to do that is to make sure that we split the offset

        // into two pieces that are both >= 0 or both <= 0.


        SDLoc DL(N);

        uint64_t RemainderOffset;


        std::tie(OffsetVal, RemainderOffset) =

            TII->splitFlatOffset(COffsetVal, AS, FlatVariant);


        SDValue AddOffsetLo =

            getMaterializedScalarImm32(Lo_32(RemainderOffset), DL);

        SDValue Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);


        if (Addr.getValueType().getSizeInBits() == 32) {

          SmallVector<SDValue, 3> Opnds;

          Opnds.push_back(N0);

          Opnds.push_back(AddOffsetLo);

          unsigned AddOp = AMDGPU::V_ADD_CO_U32_e32;

          if (Subtarget->hasAddNoCarry()) {

            AddOp = AMDGPU::V_ADD_U32_e64;

            Opnds.push_back(Clamp);

          }

          Addr = SDValue(CurDAG->getMachineNode(AddOp, DL, MVT::i32, Opnds), 0);

        } else {

          // TODO: Should this try to use a scalar add pseudo if the base address

          // is uniform and saddr is usable?

          SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);

          SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);


          SDNode *N0Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,

                                                DL, MVT::i32, N0, Sub0);

          SDNode *N0Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,

                                                DL, MVT::i32, N0, Sub1);


          SDValue AddOffsetHi =

              getMaterializedScalarImm32(Hi_32(RemainderOffset), DL);


          SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i1);


          SDNode *Add =

              CurDAG->getMachineNode(AMDGPU::V_ADD_CO_U32_e64, DL, VTs,

                                     {AddOffsetLo, SDValue(N0Lo, 0), Clamp});


          SDNode *Addc = CurDAG->getMachineNode(

              AMDGPU::V_ADDC_U32_e64, DL, VTs,

              {AddOffsetHi, SDValue(N0Hi, 0), SDValue(Add, 1), Clamp});


          SDValue RegSequenceArgs[] = {

              CurDAG->getTargetConstant(AMDGPU::VReg_64RegClassID, DL, MVT::i32),

              SDValue(Add, 0), Sub0, SDValue(Addc, 0), Sub1};


          Addr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,

                                                MVT::i64, RegSequenceArgs),

                         0);

        }

      }

    }

  }


  VAddr = Addr;

  Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);

  return true;

}


bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N, SDValue Addr,

                                          SDValue &VAddr,

                                          SDValue &Offset) const {

  return SelectFlatOffsetImpl(N, Addr, VAddr, Offset, SIInstrFlags::FLAT);

}


bool AMDGPUDAGToDAGISel::SelectGlobalOffset(SDNode *N, SDValue Addr,

                                            SDValue &VAddr,

                                            SDValue &Offset) const {

  return SelectFlatOffsetImpl(N, Addr, VAddr, Offset, SIInstrFlags::FlatGlobal);

}


bool AMDGPUDAGToDAGISel::SelectScratchOffset(SDNode *N, SDValue Addr,

                                             SDValue &VAddr,

                                             SDValue &Offset) const {

  return SelectFlatOffsetImpl(N, Addr, VAddr, Offset,

                              SIInstrFlags::FlatScratch);

}


// If this matches zero_extend i32:x, return x

static SDValue matchZExtFromI32(SDValue Op) {

  if (Op.getOpcode() != ISD::ZERO_EXTEND)

    return SDValue();


  SDValue ExtSrc = Op.getOperand(0);

  return (ExtSrc.getValueType() == MVT::i32) ? ExtSrc : SDValue();

}


// Match (64-bit SGPR base) + (zext vgpr offset) + sext(imm offset)

bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N,

                                           SDValue Addr,

                                           SDValue &SAddr,

                                           SDValue &VOffset,

                                           SDValue &Offset) const {

  int64_t ImmOffset = 0;


  // Match the immediate offset first, which canonically is moved as low as

  // possible.


  SDValue LHS, RHS;

  if (isBaseWithConstantOffset64(Addr, LHS, RHS)) {

    int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();

    const SIInstrInfo *TII = Subtarget->getInstrInfo();


    if (TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::GLOBAL_ADDRESS,

                               SIInstrFlags::FlatGlobal)) {

      Addr = LHS;

      ImmOffset = COffsetVal;

    } else if (!LHS->isDivergent()) {

      if (COffsetVal > 0) {

        SDLoc SL(N);

        // saddr + large_offset -> saddr +

        //                         (voffset = large_offset & ~MaxOffset) +

        //                         (large_offset & MaxOffset);

        int64_t SplitImmOffset, RemainderOffset;

        std::tie(SplitImmOffset, RemainderOffset) = TII->splitFlatOffset(

            COffsetVal, AMDGPUAS::GLOBAL_ADDRESS, SIInstrFlags::FlatGlobal);


        if (isUInt<32>(RemainderOffset)) {

          SDNode *VMov = CurDAG->getMachineNode(

              AMDGPU::V_MOV_B32_e32, SL, MVT::i32,

              CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));

          VOffset = SDValue(VMov, 0);

          SAddr = LHS;

          Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i32);

          return true;

        }

      }


      // We are adding a 64 bit SGPR and a constant. If constant bus limit

      // is 1 we would need to perform 1 or 2 extra moves for each half of

      // the constant and it is better to do a scalar add and then issue a

      // single VALU instruction to materialize zero. Otherwise it is less

      // instructions to perform VALU adds with immediates or inline literals.

      unsigned NumLiterals =

          !TII->isInlineConstant(APInt(32, COffsetVal & 0xffffffff)) +

          !TII->isInlineConstant(APInt(32, COffsetVal >> 32));

      if (Subtarget->getConstantBusLimit(AMDGPU::V_ADD_U32_e64) > NumLiterals)

        return false;

    }

  }


  // Match the variable offset.

  if (Addr.getOpcode() == ISD::ADD) {

    LHS = Addr.getOperand(0);

    RHS = Addr.getOperand(1);


    if (!LHS->isDivergent()) {

      // add (i64 sgpr), (zero_extend (i32 vgpr))

      if (SDValue ZextRHS = matchZExtFromI32(RHS)) {

        SAddr = LHS;

        VOffset = ZextRHS;

      }

    }


    if (!SAddr && !RHS->isDivergent()) {

      // add (zero_extend (i32 vgpr)), (i64 sgpr)

      if (SDValue ZextLHS = matchZExtFromI32(LHS)) {

        SAddr = RHS;

        VOffset = ZextLHS;

      }

    }


    if (SAddr) {

      Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i32);

      return true;

    }

  }


  if (Addr->isDivergent() || Addr.getOpcode() == ISD::UNDEF ||

      isa<ConstantSDNode>(Addr))

    return false;


  // It's cheaper to materialize a single 32-bit zero for vaddr than the two

  // moves required to copy a 64-bit SGPR to VGPR.

  SAddr = Addr;

  SDNode *VMov =

      CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, SDLoc(Addr), MVT::i32,

                             CurDAG->getTargetConstant(0, SDLoc(), MVT::i32));

  VOffset = SDValue(VMov, 0);

  Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i32);

  return true;

}


static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr) {

  if (auto FI = dyn_cast<FrameIndexSDNode>(SAddr)) {

    SAddr = CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0));

  } else if (SAddr.getOpcode() == ISD::ADD &&

             isa<FrameIndexSDNode>(SAddr.getOperand(0))) {

    // Materialize this into a scalar move for scalar address to avoid

    // readfirstlane.

    auto FI = cast<FrameIndexSDNode>(SAddr.getOperand(0));

    SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),

                                              FI->getValueType(0));

    SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_I32, SDLoc(SAddr),

                                           MVT::i32, TFI, SAddr.getOperand(1)),

                    0);

  }


  return SAddr;

}


// Match (32-bit SGPR base) + sext(imm offset)

bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *Parent, SDValue Addr,

                                            SDValue &SAddr,

                                            SDValue &Offset) const {

  if (Addr->isDivergent())

    return false;


  SDLoc DL(Addr);


  int64_t COffsetVal = 0;


  if (CurDAG->isBaseWithConstantOffset(Addr) && isFlatScratchBaseLegal(Addr)) {

    COffsetVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();

    SAddr = Addr.getOperand(0);

  } else {

    SAddr = Addr;

  }


  SAddr = SelectSAddrFI(CurDAG, SAddr);


  const SIInstrInfo *TII = Subtarget->getInstrInfo();


  if (!TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS,

                              SIInstrFlags::FlatScratch)) {

    int64_t SplitImmOffset, RemainderOffset;

    std::tie(SplitImmOffset, RemainderOffset) = TII->splitFlatOffset(

        COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, SIInstrFlags::FlatScratch);


    COffsetVal = SplitImmOffset;


    SDValue AddOffset =

        SAddr.getOpcode() == ISD::TargetFrameIndex

            ? getMaterializedScalarImm32(Lo_32(RemainderOffset), DL)

            : CurDAG->getTargetConstant(RemainderOffset, DL, MVT::i32);

    SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_I32, DL, MVT::i32,

                                           SAddr, AddOffset),

                    0);

  }


  Offset = CurDAG->getTargetConstant(COffsetVal, DL, MVT::i16);


  return true;

}


// Check whether the flat scratch SVS swizzle bug affects this access.

bool AMDGPUDAGToDAGISel::checkFlatScratchSVSSwizzleBug(

    SDValue VAddr, SDValue SAddr, uint64_t ImmOffset) const {

  if (!Subtarget->hasFlatScratchSVSSwizzleBug())

    return false;


  // The bug affects the swizzling of SVS accesses if there is any carry out

  // from the two low order bits (i.e. from bit 1 into bit 2) when adding

  // voffset to (soffset + inst_offset).

  KnownBits VKnown = CurDAG->computeKnownBits(VAddr);

  KnownBits SKnown = KnownBits::computeForAddSub(

      /*Add=*/true, /*NSW=*/false, /*NUW=*/false,

      CurDAG->computeKnownBits(SAddr),

      KnownBits::makeConstant(APInt(32, ImmOffset)));

  uint64_t VMax = VKnown.getMaxValue().getZExtValue();

  uint64_t SMax = SKnown.getMaxValue().getZExtValue();

  return (VMax & 3) + (SMax & 3) >= 4;

}


bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,

                                             SDValue &VAddr, SDValue &SAddr,

                                             SDValue &Offset) const  {

  int64_t ImmOffset = 0;


  SDValue LHS, RHS;

  SDValue OrigAddr = Addr;

  if (isBaseWithConstantOffset64(Addr, LHS, RHS)) {

    int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();

    const SIInstrInfo *TII = Subtarget->getInstrInfo();


    if (TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, true)) {

      Addr = LHS;

      ImmOffset = COffsetVal;

    } else if (!LHS->isDivergent() && COffsetVal > 0) {

      SDLoc SL(N);

      // saddr + large_offset -> saddr + (vaddr = large_offset & ~MaxOffset) +

      //                         (large_offset & MaxOffset);

      int64_t SplitImmOffset, RemainderOffset;

      std::tie(SplitImmOffset, RemainderOffset)

        = TII->splitFlatOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, true);


      if (isUInt<32>(RemainderOffset)) {

        SDNode *VMov = CurDAG->getMachineNode(

          AMDGPU::V_MOV_B32_e32, SL, MVT::i32,

          CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));

        VAddr = SDValue(VMov, 0);

        SAddr = LHS;

        if (!isFlatScratchBaseLegal(Addr))

          return false;

        if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset))

          return false;

        Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i16);

        return true;

      }

    }

  }


  if (Addr.getOpcode() != ISD::ADD)

    return false;


  LHS = Addr.getOperand(0);

  RHS = Addr.getOperand(1);


  if (!LHS->isDivergent() && RHS->isDivergent()) {

    SAddr = LHS;

    VAddr = RHS;

  } else if (!RHS->isDivergent() && LHS->isDivergent()) {

    SAddr = RHS;

    VAddr = LHS;

  } else {

    return false;

  }


  if (OrigAddr != Addr) {

    if (!isFlatScratchBaseLegalSVImm(OrigAddr))

      return false;

  } else {

    if (!isFlatScratchBaseLegalSV(OrigAddr))

      return false;

  }


  if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))

    return false;

  SAddr = SelectSAddrFI(CurDAG, SAddr);

  Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);

  return true;

}


// Match an immediate (if Offset is not null) or an SGPR (if SOffset is

// not null) offset. If Imm32Only is true, match only 32-bit immediate

// offsets available on CI.

bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,

                                          SDValue *SOffset, SDValue *Offset,

                                          bool Imm32Only, bool IsBuffer) const {

  assert((!SOffset || !Offset) &&

         "Cannot match both soffset and offset at the same time!");


  ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);

  if (!C) {

    if (!SOffset)

      return false;

    if (ByteOffsetNode.getValueType().isScalarInteger() &&

        ByteOffsetNode.getValueType().getSizeInBits() == 32) {

      *SOffset = ByteOffsetNode;

      return true;

    }

    if (ByteOffsetNode.getOpcode() == ISD::ZERO_EXTEND) {

      if (ByteOffsetNode.getOperand(0).getValueType().getSizeInBits() == 32) {

        *SOffset = ByteOffsetNode.getOperand(0);

        return true;

      }

    }

    return false;

  }


  SDLoc SL(ByteOffsetNode);


  // GFX9 and GFX10 have signed byte immediate offsets. The immediate

  // offset for S_BUFFER instructions is unsigned.

  int64_t ByteOffset = IsBuffer ? C->getZExtValue() : C->getSExtValue();

  std::optional<int64_t> EncodedOffset =

      AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset, IsBuffer);

  if (EncodedOffset && Offset && !Imm32Only) {

    *Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);

    return true;

  }


  // SGPR and literal offsets are unsigned.

  if (ByteOffset < 0)

    return false;


  EncodedOffset = AMDGPU::getSMRDEncodedLiteralOffset32(*Subtarget, ByteOffset);

  if (EncodedOffset && Offset && Imm32Only) {

    *Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);

    return true;

  }


  if (!isUInt<32>(ByteOffset) && !isInt<32>(ByteOffset))

    return false;


  if (SOffset) {

    SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);

    *SOffset = SDValue(

        CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, C32Bit), 0);

    return true;

  }


  return false;

}


SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {

  if (Addr.getValueType() != MVT::i32)

    return Addr;


  // Zero-extend a 32-bit address.

  SDLoc SL(Addr);


  const MachineFunction &MF = CurDAG->getMachineFunction();

  const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();

  unsigned AddrHiVal = Info->get32BitAddressHighBits();

  SDValue AddrHi = CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32);


  const SDValue Ops[] = {

    CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32),

    Addr,

    CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),

    SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi),

            0),

    CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32),

  };


  return SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64,

                                        Ops), 0);

}


// Match a base and an immediate (if Offset is not null) or an SGPR (if

// SOffset is not null) or an immediate+SGPR offset. If Imm32Only is

// true, match only 32-bit immediate offsets available on CI.

bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase,

                                              SDValue *SOffset, SDValue *Offset,

                                              bool Imm32Only,

                                              bool IsBuffer) const {

  if (SOffset && Offset) {

    assert(!Imm32Only && !IsBuffer);

    SDValue B;

    return SelectSMRDBaseOffset(Addr, B, nullptr, Offset) &&

           SelectSMRDBaseOffset(B, SBase, SOffset, nullptr);

  }


  // A 32-bit (address + offset) should not cause unsigned 32-bit integer

  // wraparound, because s_load instructions perform the addition in 64 bits.

  if (Addr.getValueType() == MVT::i32 && Addr.getOpcode() == ISD::ADD &&

      !Addr->getFlags().hasNoUnsignedWrap())

    return false;


  SDValue N0, N1;

  // Extract the base and offset if possible.

  if (CurDAG->isBaseWithConstantOffset(Addr) || Addr.getOpcode() == ISD::ADD) {

    N0 = Addr.getOperand(0);

    N1 = Addr.getOperand(1);

  } else if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, N0, N1)) {

    assert(N0 && N1 && isa<ConstantSDNode>(N1));

  }

  if (!N0 || !N1)

    return false;

  if (SelectSMRDOffset(N1, SOffset, Offset, Imm32Only, IsBuffer)) {

    SBase = N0;

    return true;

  }

  if (SelectSMRDOffset(N0, SOffset, Offset, Imm32Only, IsBuffer)) {

    SBase = N1;

    return true;

  }

  return false;

}


bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,

                                    SDValue *SOffset, SDValue *Offset,

                                    bool Imm32Only) const {

  if (SelectSMRDBaseOffset(Addr, SBase, SOffset, Offset, Imm32Only)) {

    SBase = Expand32BitAddress(SBase);

    return true;

  }


  if (Addr.getValueType() == MVT::i32 && Offset && !SOffset) {

    SBase = Expand32BitAddress(Addr);

    *Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);

    return true;

  }


  return false;

}


bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,

                                       SDValue &Offset) const {

  return SelectSMRD(Addr, SBase, /* SOffset */ nullptr, &Offset);

}


bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,

                                         SDValue &Offset) const {

  assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS);

  return SelectSMRD(Addr, SBase, /* SOffset */ nullptr, &Offset,

                    /* Imm32Only */ true);

}


bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,

                                        SDValue &SOffset) const {

  return SelectSMRD(Addr, SBase, &SOffset, /* Offset */ nullptr);

}


bool AMDGPUDAGToDAGISel::SelectSMRDSgprImm(SDValue Addr, SDValue &SBase,

                                           SDValue &SOffset,

                                           SDValue &Offset) const {

  return SelectSMRD(Addr, SBase, &SOffset, &Offset);

}


bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue N, SDValue &Offset) const {

  return SelectSMRDOffset(N, /* SOffset */ nullptr, &Offset,

                          /* Imm32Only */ false, /* IsBuffer */ true);

}


bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue N,

                                               SDValue &Offset) const {

  assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS);

  return SelectSMRDOffset(N, /* SOffset */ nullptr, &Offset,

                          /* Imm32Only */ true, /* IsBuffer */ true);

}


bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgprImm(SDValue N, SDValue &SOffset,

                                                 SDValue &Offset) const {

  // Match the (soffset + offset) pair as a 32-bit register base and

  // an immediate offset.

  return N.getValueType() == MVT::i32 &&

         SelectSMRDBaseOffset(N, /* SBase */ SOffset, /* SOffset*/ nullptr,

                              &Offset, /* Imm32Only */ false,

                              /* IsBuffer */ true);

}


bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,

                                            SDValue &Base,

                                            SDValue &Offset) const {

  SDLoc DL(Index);


  if (CurDAG->isBaseWithConstantOffset(Index)) {

    SDValue N0 = Index.getOperand(0);

    SDValue N1 = Index.getOperand(1);

    ConstantSDNode *C1 = cast<ConstantSDNode>(N1);


    // (add n0, c0)

    // Don't peel off the offset (c0) if doing so could possibly lead

    // the base (n0) to be negative.

    // (or n0, |c0|) can never change a sign given isBaseWithConstantOffset.

    if (C1->getSExtValue() <= 0 || CurDAG->SignBitIsZero(N0) ||

        (Index->getOpcode() == ISD::OR && C1->getSExtValue() >= 0)) {

      Base = N0;

      Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);

      return true;

    }

  }


  if (isa<ConstantSDNode>(Index))

    return false;


  Base = Index;

  Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);

  return true;

}


SDNode *AMDGPUDAGToDAGISel::getBFE32(bool IsSigned, const SDLoc &DL,

                                     SDValue Val, uint32_t Offset,

                                     uint32_t Width) {

  if (Val->isDivergent()) {

    unsigned Opcode = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;

    SDValue Off = CurDAG->getTargetConstant(Offset, DL, MVT::i32);

    SDValue W = CurDAG->getTargetConstant(Width, DL, MVT::i32);


    return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, Off, W);

  }

  unsigned Opcode = IsSigned ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;

  // Transformation function, pack the offset and width of a BFE into

  // the format expected by the S_BFE_I32 / S_BFE_U32. In the second

  // source, bits [5:0] contain the offset and bits [22:16] the width.

  uint32_t PackedVal = Offset | (Width << 16);

  SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);


  return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);

}


void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {

  // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)

  // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)

  // Predicate: 0 < b <= c < 32


  const SDValue &Shl = N->getOperand(0);

  ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));

  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));


  if (B && C) {

    uint32_t BVal = B->getZExtValue();

    uint32_t CVal = C->getZExtValue();


    if (0 < BVal && BVal <= CVal && CVal < 32) {

      bool Signed = N->getOpcode() == ISD::SRA;

      ReplaceNode(N, getBFE32(Signed, SDLoc(N), Shl.getOperand(0), CVal - BVal,

                  32 - CVal));

      return;

    }

  }

  SelectCode(N);

}


void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {

  switch (N->getOpcode()) {

  case ISD::AND:

    if (N->getOperand(0).getOpcode() == ISD::SRL) {

      // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"

      // Predicate: isMask(mask)

      const SDValue &Srl = N->getOperand(0);

      ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));

      ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));


      if (Shift && Mask) {

        uint32_t ShiftVal = Shift->getZExtValue();

        uint32_t MaskVal = Mask->getZExtValue();


        if (isMask_32(MaskVal)) {

          uint32_t WidthVal = llvm::popcount(MaskVal);

          ReplaceNode(N, getBFE32(false, SDLoc(N), Srl.getOperand(0), ShiftVal,

                                  WidthVal));

          return;

        }

      }

    }

    break;

  case ISD::SRL:

    if (N->getOperand(0).getOpcode() == ISD::AND) {

      // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"

      // Predicate: isMask(mask >> b)

      const SDValue &And = N->getOperand(0);

      ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));

      ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));


      if (Shift && Mask) {

        uint32_t ShiftVal = Shift->getZExtValue();

        uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;


        if (isMask_32(MaskVal)) {

          uint32_t WidthVal = llvm::popcount(MaskVal);

          ReplaceNode(N, getBFE32(false, SDLoc(N), And.getOperand(0), ShiftVal,

                      WidthVal));

          return;

        }

      }

    } else if (N->getOperand(0).getOpcode() == ISD::SHL) {

      SelectS_BFEFromShifts(N);

      return;

    }

    break;

  case ISD::SRA:

    if (N->getOperand(0).getOpcode() == ISD::SHL) {

      SelectS_BFEFromShifts(N);

      return;

    }

    break;


  case ISD::SIGN_EXTEND_INREG: {

    // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8

    SDValue Src = N->getOperand(0);

    if (Src.getOpcode() != ISD::SRL)

      break;


    const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));

    if (!Amt)

      break;


    unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();

    ReplaceNode(N, getBFE32(true, SDLoc(N), Src.getOperand(0),

                            Amt->getZExtValue(), Width));

    return;

  }

  }


  SelectCode(N);

}


bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {

  assert(N->getOpcode() == ISD::BRCOND);

  if (!N->hasOneUse())

    return false;


  SDValue Cond = N->getOperand(1);

  if (Cond.getOpcode() == ISD::CopyToReg)

    Cond = Cond.getOperand(2);


  if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())

    return false;


  MVT VT = Cond.getOperand(0).getSimpleValueType();

  if (VT == MVT::i32)

    return true;


  if (VT == MVT::i64) {

    auto ST = static_cast<const GCNSubtarget *>(Subtarget);


    ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();

    return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64();

  }


  return false;

}


static SDValue combineBallotPattern(SDValue VCMP, bool &Negate) {

  assert(VCMP->getOpcode() == AMDGPUISD::SETCC);

  // Special case for amdgcn.ballot:

  // %Cond = i1 (and/or combination of i1 ISD::SETCCs)

  // %VCMP = i(WaveSize) AMDGPUISD::SETCC (ext %Cond), 0, setne/seteq

  // =>

  // Use i1 %Cond value instead of i(WaveSize) %VCMP.

  // This is possible because divergent ISD::SETCC is selected as V_CMP and

  // Cond becomes a i(WaveSize) full mask value.

  // Note that ballot doesn't use SETEQ condition but its easy to support it

  // here for completeness, so in this case Negate is set true on return.

  auto VCMP_CC = cast<CondCodeSDNode>(VCMP.getOperand(2))->get();

  if ((VCMP_CC == ISD::SETEQ || VCMP_CC == ISD::SETNE) &&

      isNullConstant(VCMP.getOperand(1))) {


    auto Cond = VCMP.getOperand(0);

    if (ISD::isExtOpcode(Cond->getOpcode())) // Skip extension.

      Cond = Cond.getOperand(0);


    if (isBoolSGPR(Cond)) {

      Negate = VCMP_CC == ISD::SETEQ;

      return Cond;

    }

  }

  return SDValue();

}


void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {

  SDValue Cond = N->getOperand(1);


  if (Cond.isUndef()) {

    CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other,

                         N->getOperand(2), N->getOperand(0));

    return;

  }


  const GCNSubtarget *ST = static_cast<const GCNSubtarget *>(Subtarget);

  const SIRegisterInfo *TRI = ST->getRegisterInfo();


  bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N);

  bool AndExec = !UseSCCBr;

  bool Negate = false;


  if (Cond.getOpcode() == ISD::SETCC &&

      Cond->getOperand(0)->getOpcode() == AMDGPUISD::SETCC) {

    SDValue VCMP = Cond->getOperand(0);

    auto CC = cast<CondCodeSDNode>(Cond->getOperand(2))->get();

    if ((CC == ISD::SETEQ || CC == ISD::SETNE) &&

        isNullConstant(Cond->getOperand(1)) &&

        // We may encounter ballot.i64 in wave32 mode on -O0.

        VCMP.getValueType().getSizeInBits() == ST->getWavefrontSize()) {

      // %VCMP = i(WaveSize) AMDGPUISD::SETCC ...

      // %C = i1 ISD::SETCC %VCMP, 0, setne/seteq

      // BRCOND i1 %C, %BB

      // =>

      // %VCMP = i(WaveSize) AMDGPUISD::SETCC ...

      // VCC = COPY i(WaveSize) %VCMP

      // S_CBRANCH_VCCNZ/VCCZ %BB

      Negate = CC == ISD::SETEQ;

      bool NegatedBallot = false;

      if (auto BallotCond = combineBallotPattern(VCMP, NegatedBallot)) {

        Cond = BallotCond;

        UseSCCBr = !BallotCond->isDivergent();

        Negate = Negate ^ NegatedBallot;

      } else {

        // TODO: don't use SCC here assuming that AMDGPUISD::SETCC is always

        // selected as V_CMP, but this may change for uniform condition.

        Cond = VCMP;

        UseSCCBr = false;

      }

    }

    // Cond is either V_CMP resulted from AMDGPUISD::SETCC or a combination of

    // V_CMPs resulted from ballot or ballot has uniform condition and SCC is

    // used.

    AndExec = false;

  }


  unsigned BrOp =

      UseSCCBr ? (Negate ? AMDGPU::S_CBRANCH_SCC0 : AMDGPU::S_CBRANCH_SCC1)

               : (Negate ? AMDGPU::S_CBRANCH_VCCZ : AMDGPU::S_CBRANCH_VCCNZ);

  Register CondReg = UseSCCBr ? AMDGPU::SCC : TRI->getVCC();

  SDLoc SL(N);


  if (AndExec) {

    // This is the case that we are selecting to S_CBRANCH_VCCNZ.  We have not

    // analyzed what generates the vcc value, so we do not know whether vcc

    // bits for disabled lanes are 0.  Thus we need to mask out bits for

    // disabled lanes.

    //

    // For the case that we select S_CBRANCH_SCC1 and it gets

    // changed to S_CBRANCH_VCCNZ in SIFixSGPRCopies, SIFixSGPRCopies calls

    // SIInstrInfo::moveToVALU which inserts the S_AND).

    //

    // We could add an analysis of what generates the vcc value here and omit

    // the S_AND when is unnecessary. But it would be better to add a separate

    // pass after SIFixSGPRCopies to do the unnecessary S_AND removal, so it

    // catches both cases.

    Cond = SDValue(CurDAG->getMachineNode(ST->isWave32() ? AMDGPU::S_AND_B32

                                                         : AMDGPU::S_AND_B64,

                     SL, MVT::i1,

                     CurDAG->getRegister(ST->isWave32() ? AMDGPU::EXEC_LO

                                                        : AMDGPU::EXEC,

                                         MVT::i1),

                    Cond),

                   0);

  }


  SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond);

  CurDAG->SelectNodeTo(N, BrOp, MVT::Other,

                       N->getOperand(2), // Basic Block

                       VCC.getValue(0));

}


void AMDGPUDAGToDAGISel::SelectFP_EXTEND(SDNode *N) {

  if (Subtarget->hasSALUFloatInsts() && N->getValueType(0) == MVT::f32 &&

      !N->isDivergent()) {

    SDValue Src = N->getOperand(0);

    if (Src.getValueType() == MVT::f16) {

      if (isExtractHiElt(Src, Src)) {

        CurDAG->SelectNodeTo(N, AMDGPU::S_CVT_HI_F32_F16, N->getVTList(),

                             {Src});

        return;

      }

    }

  }


  SelectCode(N);

}


void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) {

  // The address is assumed to be uniform, so if it ends up in a VGPR, it will

  // be copied to an SGPR with readfirstlane.

  unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?

    AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;


  SDValue Chain = N->getOperand(0);

  SDValue Ptr = N->getOperand(2);

  MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);

  MachineMemOperand *MMO = M->getMemOperand();

  bool IsGDS = M->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;


  SDValue Offset;

  if (CurDAG->isBaseWithConstantOffset(Ptr)) {

    SDValue PtrBase = Ptr.getOperand(0);

    SDValue PtrOffset = Ptr.getOperand(1);


    const APInt &OffsetVal = PtrOffset->getAsAPIntVal();

    if (isDSOffsetLegal(PtrBase, OffsetVal.getZExtValue())) {

      N = glueCopyToM0(N, PtrBase);

      Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);

    }

  }


  if (!Offset) {

    N = glueCopyToM0(N, Ptr);

    Offset = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);

  }


  SDValue Ops[] = {

    Offset,

    CurDAG->getTargetConstant(IsGDS, SDLoc(), MVT::i32),

    Chain,

    N->getOperand(N->getNumOperands() - 1) // New glue

  };


  SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);

  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});

}


// We need to handle this here because tablegen doesn't support matching

// instructions with multiple outputs.

void AMDGPUDAGToDAGISel::SelectDSBvhStackIntrinsic(SDNode *N) {

  unsigned Opc = AMDGPU::DS_BVH_STACK_RTN_B32;

  SDValue Ops[] = {N->getOperand(2), N->getOperand(3), N->getOperand(4),

                   N->getOperand(5), N->getOperand(0)};


  MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);

  MachineMemOperand *MMO = M->getMemOperand();

  SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);

  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});

}


static unsigned gwsIntrinToOpcode(unsigned IntrID) {

  switch (IntrID) {

  case Intrinsic::amdgcn_ds_gws_init:

    return AMDGPU::DS_GWS_INIT;

  case Intrinsic::amdgcn_ds_gws_barrier:

    return AMDGPU::DS_GWS_BARRIER;

  case Intrinsic::amdgcn_ds_gws_sema_v:

    return AMDGPU::DS_GWS_SEMA_V;

  case Intrinsic::amdgcn_ds_gws_sema_br:

    return AMDGPU::DS_GWS_SEMA_BR;

  case Intrinsic::amdgcn_ds_gws_sema_p:

    return AMDGPU::DS_GWS_SEMA_P;

  case Intrinsic::amdgcn_ds_gws_sema_release_all:

    return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;

  default:

    llvm_unreachable("not a gws intrinsic");

  }

}


void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) {

  if (!Subtarget->hasGWS() ||

      (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&

       !Subtarget->hasGWSSemaReleaseAll())) {

    // Let this error.

    SelectCode(N);

    return;

  }


  // Chain, intrinsic ID, vsrc, offset

  const bool HasVSrc = N->getNumOperands() == 4;

  assert(HasVSrc || N->getNumOperands() == 3);


  SDLoc SL(N);

  SDValue BaseOffset = N->getOperand(HasVSrc ? 3 : 2);

  int ImmOffset = 0;

  MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);

  MachineMemOperand *MMO = M->getMemOperand();


  // Don't worry if the offset ends up in a VGPR. Only one lane will have

  // effect, so SIFixSGPRCopies will validly insert readfirstlane.


  // The resource id offset is computed as (<isa opaque base> + M0[21:16] +

  // offset field) % 64. Some versions of the programming guide omit the m0

  // part, or claim it's from offset 0.

  if (ConstantSDNode *ConstOffset = dyn_cast<ConstantSDNode>(BaseOffset)) {

    // If we have a constant offset, try to use the 0 in m0 as the base.

    // TODO: Look into changing the default m0 initialization value. If the

    // default -1 only set the low 16-bits, we could leave it as-is and add 1 to

    // the immediate offset.

    glueCopyToM0(N, CurDAG->getTargetConstant(0, SL, MVT::i32));

    ImmOffset = ConstOffset->getZExtValue();

  } else {

    if (CurDAG->isBaseWithConstantOffset(BaseOffset)) {

      ImmOffset = BaseOffset.getConstantOperandVal(1);

      BaseOffset = BaseOffset.getOperand(0);

    }


    // Prefer to do the shift in an SGPR since it should be possible to use m0

    // as the result directly. If it's already an SGPR, it will be eliminated

    // later.

    SDNode *SGPROffset

      = CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL, MVT::i32,

                               BaseOffset);

    // Shift to offset in m0

    SDNode *M0Base

      = CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,

                               SDValue(SGPROffset, 0),

                               CurDAG->getTargetConstant(16, SL, MVT::i32));

    glueCopyToM0(N, SDValue(M0Base, 0));

  }


  SDValue Chain = N->getOperand(0);

  SDValue OffsetField = CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32);


  const unsigned Opc = gwsIntrinToOpcode(IntrID);

  SmallVector<SDValue, 5> Ops;

  if (HasVSrc)

    Ops.push_back(N->getOperand(2));

  Ops.push_back(OffsetField);

  Ops.push_back(Chain);


  SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);

  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});

}


void AMDGPUDAGToDAGISel::SelectInterpP1F16(SDNode *N) {

  if (Subtarget->getLDSBankCount() != 16) {

    // This is a single instruction with a pattern.

    SelectCode(N);

    return;

  }


  SDLoc DL(N);


  // This requires 2 instructions. It is possible to write a pattern to support

  // this, but the generated isel emitter doesn't correctly deal with multiple

  // output instructions using the same physical register input. The copy to m0

  // is incorrectly placed before the second instruction.

  //

  // TODO: Match source modifiers.

  //

  // def : Pat <

  //   (int_amdgcn_interp_p1_f16

  //    (VOP3Mods f32:$src0, i32:$src0_modifiers),

  //                             (i32 timm:$attrchan), (i32 timm:$attr),

  //                             (i1 timm:$high), M0),

  //   (V_INTERP_P1LV_F16 $src0_modifiers, VGPR_32:$src0, timm:$attr,

  //       timm:$attrchan, 0,

  //       (V_INTERP_MOV_F32 2, timm:$attr, timm:$attrchan), timm:$high)> {

  //   let Predicates = [has16BankLDS];

  // }


  // 16 bank LDS

  SDValue ToM0 = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, AMDGPU::M0,

                                      N->getOperand(5), SDValue());


  SDVTList VTs = CurDAG->getVTList(MVT::f32, MVT::Other);


  SDNode *InterpMov =

    CurDAG->getMachineNode(AMDGPU::V_INTERP_MOV_F32, DL, VTs, {

        CurDAG->getTargetConstant(2, DL, MVT::i32), // P0

        N->getOperand(3),  // Attr

        N->getOperand(2),  // Attrchan

        ToM0.getValue(1) // In glue

  });


  SDNode *InterpP1LV =

    CurDAG->getMachineNode(AMDGPU::V_INTERP_P1LV_F16, DL, MVT::f32, {

        CurDAG->getTargetConstant(0, DL, MVT::i32), // $src0_modifiers

        N->getOperand(1), // Src0

        N->getOperand(3), // Attr

        N->getOperand(2), // Attrchan

        CurDAG->getTargetConstant(0, DL, MVT::i32), // $src2_modifiers

        SDValue(InterpMov, 0), // Src2 - holds two f16 values selected by high

        N->getOperand(4), // high

        CurDAG->getTargetConstant(0, DL, MVT::i1), // $clamp

        CurDAG->getTargetConstant(0, DL, MVT::i32), // $omod

        SDValue(InterpMov, 1)

  });


  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), SDValue(InterpP1LV, 0));

}


void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {

  unsigned IntrID = N->getConstantOperandVal(1);

  switch (IntrID) {

  case Intrinsic::amdgcn_ds_append:

  case Intrinsic::amdgcn_ds_consume: {

    if (N->getValueType(0) != MVT::i32)

      break;

    SelectDSAppendConsume(N, IntrID);

    return;

  }

  case Intrinsic::amdgcn_ds_bvh_stack_rtn:

    SelectDSBvhStackIntrinsic(N);

    return;

  }


  SelectCode(N);

}


void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) {

  unsigned IntrID = N->getConstantOperandVal(0);

  unsigned Opcode = AMDGPU::INSTRUCTION_LIST_END;

  SDNode *ConvGlueNode = N->getGluedNode();

  if (ConvGlueNode) {

    // FIXME: Possibly iterate over multiple glue nodes?

    assert(ConvGlueNode->getOpcode() == ISD::CONVERGENCECTRL_GLUE);

    ConvGlueNode = ConvGlueNode->getOperand(0).getNode();

    ConvGlueNode =

        CurDAG->getMachineNode(TargetOpcode::CONVERGENCECTRL_GLUE, {},

                               MVT::Glue, SDValue(ConvGlueNode, 0));

  } else {

    ConvGlueNode = nullptr;

  }

  switch (IntrID) {

  case Intrinsic::amdgcn_wqm:

    Opcode = AMDGPU::WQM;

    break;

  case Intrinsic::amdgcn_softwqm:

    Opcode = AMDGPU::SOFT_WQM;

    break;

  case Intrinsic::amdgcn_wwm:

  case Intrinsic::amdgcn_strict_wwm:

    Opcode = AMDGPU::STRICT_WWM;

    break;

  case Intrinsic::amdgcn_strict_wqm:

    Opcode = AMDGPU::STRICT_WQM;

    break;

  case Intrinsic::amdgcn_interp_p1_f16:

    SelectInterpP1F16(N);

    return;

  case Intrinsic::amdgcn_inverse_ballot:

    switch (N->getOperand(1).getValueSizeInBits()) {

    case 32:

      Opcode = AMDGPU::S_INVERSE_BALLOT_U32;

      break;

    case 64:

      Opcode = AMDGPU::S_INVERSE_BALLOT_U64;

      break;

    default:

      llvm_unreachable("Unsupported size for inverse ballot mask.");

    }

    break;

  default:

    SelectCode(N);

    break;

  }


  if (Opcode != AMDGPU::INSTRUCTION_LIST_END) {

    SDValue Src = N->getOperand(1);

    CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), {Src});

  }


  if (ConvGlueNode) {

    SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());

    NewOps.push_back(SDValue(ConvGlueNode, 0));

    CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), NewOps);

  }

}


void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) {

  unsigned IntrID = N->getConstantOperandVal(1);

  switch (IntrID) {

  case Intrinsic::amdgcn_ds_gws_init:

  case Intrinsic::amdgcn_ds_gws_barrier:

  case Intrinsic::amdgcn_ds_gws_sema_v:

  case Intrinsic::amdgcn_ds_gws_sema_br:

  case Intrinsic::amdgcn_ds_gws_sema_p:

  case Intrinsic::amdgcn_ds_gws_sema_release_all:

    SelectDS_GWS(N, IntrID);

    return;

  default:

    break;

  }


  SelectCode(N);

}


void AMDGPUDAGToDAGISel::SelectWAVE_ADDRESS(SDNode *N) {

  SDValue Log2WaveSize =

    CurDAG->getTargetConstant(Subtarget->getWavefrontSizeLog2(), SDLoc(N), MVT::i32);

  CurDAG->SelectNodeTo(N, AMDGPU::S_LSHR_B32, N->getVTList(),

                       {N->getOperand(0), Log2WaveSize});

}


void AMDGPUDAGToDAGISel::SelectSTACKRESTORE(SDNode *N) {

  SDValue SrcVal = N->getOperand(1);

  if (SrcVal.getValueType() != MVT::i32) {

    SelectCode(N); // Emit default error

    return;

  }


  SDValue CopyVal;

  Register SP = TLI->getStackPointerRegisterToSaveRestore();

  SDLoc SL(N);


  if (SrcVal.getOpcode() == AMDGPUISD::WAVE_ADDRESS) {

    CopyVal = SrcVal.getOperand(0);

  } else {

    SDValue Log2WaveSize = CurDAG->getTargetConstant(

        Subtarget->getWavefrontSizeLog2(), SL, MVT::i32);


    if (N->isDivergent()) {

      SrcVal = SDValue(CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL,

                                              MVT::i32, SrcVal),

                       0);

    }


    CopyVal = SDValue(CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,

                                             {SrcVal, Log2WaveSize}),

                      0);

  }


  SDValue CopyToSP = CurDAG->getCopyToReg(N->getOperand(0), SL, SP, CopyVal);

  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), CopyToSP);

}


bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,

                                            unsigned &Mods,

                                            bool IsCanonicalizing,

                                            bool AllowAbs) const {

  Mods = SISrcMods::NONE;

  Src = In;


  if (Src.getOpcode() == ISD::FNEG) {

    Mods |= SISrcMods::NEG;

    Src = Src.getOperand(0);

  } else if (Src.getOpcode() == ISD::FSUB && IsCanonicalizing) {

    // Fold fsub [+-]0 into fneg. This may not have folded depending on the

    // denormal mode, but we're implicitly canonicalizing in a source operand.

    auto *LHS = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));

    if (LHS && LHS->isZero()) {

      Mods |= SISrcMods::NEG;

      Src = Src.getOperand(1);

    }

  }


  if (AllowAbs && Src.getOpcode() == ISD::FABS) {

    Mods |= SISrcMods::ABS;

    Src = Src.getOperand(0);

  }


  return true;

}


bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,

                                        SDValue &SrcMods) const {

  unsigned Mods;

  if (SelectVOP3ModsImpl(In, Src, Mods, /*IsCanonicalizing=*/true,

                         /*AllowAbs=*/true)) {

    SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);

    return true;

  }


  return false;

}


bool AMDGPUDAGToDAGISel::SelectVOP3ModsNonCanonicalizing(

    SDValue In, SDValue &Src, SDValue &SrcMods) const {

  unsigned Mods;

  if (SelectVOP3ModsImpl(In, Src, Mods, /*IsCanonicalizing=*/false,

                         /*AllowAbs=*/true)) {

    SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);

    return true;

  }


  return false;

}


bool AMDGPUDAGToDAGISel::SelectVOP3BMods(SDValue In, SDValue &Src,

                                         SDValue &SrcMods) const {

  unsigned Mods;

  if (SelectVOP3ModsImpl(In, Src, Mods,

                         /*IsCanonicalizing=*/true,

                         /*AllowAbs=*/false)) {

    SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);

    return true;

  }


  return false;

}


bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {

  if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG)

    return false;


  Src = In;

  return true;

}


bool AMDGPUDAGToDAGISel::SelectVINTERPModsImpl(SDValue In, SDValue &Src,

                                               SDValue &SrcMods,

                                               bool OpSel) const {

  unsigned Mods;

  if (SelectVOP3ModsImpl(In, Src, Mods,

                         /*IsCanonicalizing=*/true,

                         /*AllowAbs=*/false)) {

    if (OpSel)

      Mods |= SISrcMods::OP_SEL_0;

    SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);

    return true;

  }


  return false;

}


bool AMDGPUDAGToDAGISel::SelectVINTERPMods(SDValue In, SDValue &Src,

                                           SDValue &SrcMods) const {

  return SelectVINTERPModsImpl(In, Src, SrcMods, /* OpSel */ false);

}


bool AMDGPUDAGToDAGISel::SelectVINTERPModsHi(SDValue In, SDValue &Src,

                                             SDValue &SrcMods) const {

  return SelectVINTERPModsImpl(In, Src, SrcMods, /* OpSel */ true);

}


bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,

                                         SDValue &SrcMods, SDValue &Clamp,

                                         SDValue &Omod) const {

  SDLoc DL(In);

  Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);

  Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);


  return SelectVOP3Mods(In, Src, SrcMods);

}


bool AMDGPUDAGToDAGISel::SelectVOP3BMods0(SDValue In, SDValue &Src,

                                          SDValue &SrcMods, SDValue &Clamp,

                                          SDValue &Omod) const {

  SDLoc DL(In);

  Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);

  Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);


  return SelectVOP3BMods(In, Src, SrcMods);

}


bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,

                                         SDValue &Clamp, SDValue &Omod) const {

  Src = In;


  SDLoc DL(In);

  Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);

  Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);


  return true;

}


bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,

                                         SDValue &SrcMods, bool IsDOT) const {

  unsigned Mods = SISrcMods::NONE;

  Src = In;


  // TODO: Handle G_FSUB 0 as fneg

  if (Src.getOpcode() == ISD::FNEG) {

    Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);

    Src = Src.getOperand(0);

  }


  if (Src.getOpcode() == ISD::BUILD_VECTOR && Src.getNumOperands() == 2 &&

      (!IsDOT || !Subtarget->hasDOTOpSelHazard())) {

    unsigned VecMods = Mods;


    SDValue Lo = stripBitcast(Src.getOperand(0));

    SDValue Hi = stripBitcast(Src.getOperand(1));


    if (Lo.getOpcode() == ISD::FNEG) {

      Lo = stripBitcast(Lo.getOperand(0));

      Mods ^= SISrcMods::NEG;

    }


    if (Hi.getOpcode() == ISD::FNEG) {

      Hi = stripBitcast(Hi.getOperand(0));

      Mods ^= SISrcMods::NEG_HI;

    }


    if (isExtractHiElt(Lo, Lo))

      Mods |= SISrcMods::OP_SEL_0;


    if (isExtractHiElt(Hi, Hi))

      Mods |= SISrcMods::OP_SEL_1;


    unsigned VecSize = Src.getValueSizeInBits();

    Lo = stripExtractLoElt(Lo);

    Hi = stripExtractLoElt(Hi);


    if (Lo.getValueSizeInBits() > VecSize) {

      Lo = CurDAG->getTargetExtractSubreg(

        (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),

        MVT::getIntegerVT(VecSize), Lo);

    }


    if (Hi.getValueSizeInBits() > VecSize) {

      Hi = CurDAG->getTargetExtractSubreg(

        (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),

        MVT::getIntegerVT(VecSize), Hi);

    }


    assert(Lo.getValueSizeInBits() <= VecSize &&

           Hi.getValueSizeInBits() <= VecSize);


    if (Lo == Hi && !isInlineImmediate(Lo.getNode())) {

      // Really a scalar input. Just select from the low half of the register to

      // avoid packing.


      if (VecSize == 32 || VecSize == Lo.getValueSizeInBits()) {

        Src = Lo;

      } else {

        assert(Lo.getValueSizeInBits() == 32 && VecSize == 64);


        SDLoc SL(In);

        SDValue Undef = SDValue(

          CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SL,

                                 Lo.getValueType()), 0);

        auto RC = Lo->isDivergent() ? AMDGPU::VReg_64RegClassID

                                    : AMDGPU::SReg_64RegClassID;

        const SDValue Ops[] = {

          CurDAG->getTargetConstant(RC, SL, MVT::i32),

          Lo, CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),

          Undef, CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32) };


        Src = SDValue(CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SL,

                                             Src.getValueType(), Ops), 0);

      }

      SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);

      return true;

    }


    if (VecSize == 64 && Lo == Hi && isa<ConstantFPSDNode>(Lo)) {

      uint64_t Lit = cast<ConstantFPSDNode>(Lo)->getValueAPF()

                      .bitcastToAPInt().getZExtValue();

      if (AMDGPU::isInlinableLiteral32(Lit, Subtarget->hasInv2PiInlineImm())) {

        Src = CurDAG->getTargetConstant(Lit, SDLoc(In), MVT::i64);

        SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);

        return true;

      }

    }


    Mods = VecMods;

  }


  // Packed instructions do not have abs modifiers.

  Mods |= SISrcMods::OP_SEL_1;


  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);

  return true;

}


bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In, SDValue &Src,

                                            SDValue &SrcMods) const {

  return SelectVOP3PMods(In, Src, SrcMods, true);

}


bool AMDGPUDAGToDAGISel::SelectVOP3PModsNeg(SDValue In, SDValue &Src) const {

  const ConstantSDNode *C = cast<ConstantSDNode>(In);

  // Literal i1 value set in intrinsic, represents SrcMods for the next operand.

  // 1 promotes packed values to signed, 0 treats them as unsigned.

  assert(C->getAPIntValue().getBitWidth() == 1 && "expected i1 value");


  unsigned Mods = SISrcMods::OP_SEL_1;

  unsigned SrcSign = C->getZExtValue();

  if (SrcSign == 1)

    Mods ^= SISrcMods::NEG;


  Src = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);

  return true;

}


bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In,

                                                  SDValue &Src) const {

  const ConstantSDNode *C = cast<ConstantSDNode>(In);

  assert(C->getAPIntValue().getBitWidth() == 1 && "expected i1 value");


  unsigned Mods = SISrcMods::OP_SEL_1;

  unsigned SrcVal = C->getZExtValue();

  if (SrcVal == 1)

    Mods |= SISrcMods::OP_SEL_0;


  Src = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);

  return true;

}


static MachineSDNode *buildRegSequence32(SmallVectorImpl<SDValue> &Elts,

                                         llvm::SelectionDAG *CurDAG,

                                         const SDLoc &DL) {

  unsigned DstRegClass;

  EVT DstTy;

  switch (Elts.size()) {

  case 8:

    DstRegClass = AMDGPU::VReg_256RegClassID;

    DstTy = MVT::v8i32;

    break;

  case 4:

    DstRegClass = AMDGPU::VReg_128RegClassID;

    DstTy = MVT::v4i32;

    break;

  case 2:

    DstRegClass = AMDGPU::VReg_64RegClassID;

    DstTy = MVT::v2i32;

    break;

  default:

    llvm_unreachable("unhandled Reg sequence size");

  }


  SmallVector<SDValue, 17> Ops;

  Ops.push_back(CurDAG->getTargetConstant(DstRegClass, DL, MVT::i32));

  for (unsigned i = 0; i < Elts.size(); ++i) {

    Ops.push_back(Elts[i]);

    Ops.push_back(CurDAG->getTargetConstant(

        SIRegisterInfo::getSubRegFromChannel(i), DL, MVT::i32));

  }

  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, DstTy, Ops);

}


static MachineSDNode *buildRegSequence16(SmallVectorImpl<SDValue> &Elts,

                                         llvm::SelectionDAG *CurDAG,

                                         const SDLoc &DL) {

  SmallVector<SDValue, 8> PackedElts;

  assert("unhandled Reg sequence size" &&

         (Elts.size() == 8 || Elts.size() == 16));


  // Pack 16-bit elements in pairs into 32-bit register. If both elements are

  // unpacked from 32-bit source use it, otherwise pack them using v_perm.

  for (unsigned i = 0; i < Elts.size(); i += 2) {

    SDValue LoSrc = stripExtractLoElt(stripBitcast(Elts[i]));

    SDValue HiSrc;

    if (isExtractHiElt(Elts[i + 1], HiSrc) && LoSrc == HiSrc) {

      PackedElts.push_back(HiSrc);

    } else {

      SDValue PackLoLo = CurDAG->getTargetConstant(0x05040100, DL, MVT::i32);

      MachineSDNode *Packed =

          CurDAG->getMachineNode(AMDGPU::V_PERM_B32_e64, DL, MVT::i32,

                                 {Elts[i + 1], Elts[i], PackLoLo});

      PackedElts.push_back(SDValue(Packed, 0));

    }

  }


  return buildRegSequence32(PackedElts, CurDAG, DL);

}


static MachineSDNode *buildRegSequence(SmallVectorImpl<SDValue> &Elts,

                                       llvm::SelectionDAG *CurDAG,

                                       const SDLoc &DL, unsigned ElementSize) {

  if (ElementSize == 16)

    return buildRegSequence16(Elts, CurDAG, DL);

  if (ElementSize == 32)

    return buildRegSequence32(Elts, CurDAG, DL);

  llvm_unreachable("Unhandled element size");

}


static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods,

                                 SmallVectorImpl<SDValue> &Elts, SDValue &Src,

                                 llvm::SelectionDAG *CurDAG, const SDLoc &DL,

                                 unsigned ElementSize) {

  if (ModOpcode == ISD::FNEG) {

    Mods |= SISrcMods::NEG;

    // Check if all elements also have abs modifier

    SmallVector<SDValue, 8> NegAbsElts;

    for (auto El : Elts) {

      if (El.getOpcode() != ISD::FABS)

        break;

      NegAbsElts.push_back(El->getOperand(0));

    }

    if (Elts.size() != NegAbsElts.size()) {

      // Neg

      Src = SDValue(buildRegSequence(Elts, CurDAG, DL, ElementSize), 0);

    } else {

      // Neg and Abs

      Mods |= SISrcMods::NEG_HI;

      Src = SDValue(buildRegSequence(NegAbsElts, CurDAG, DL, ElementSize), 0);

    }

  } else {

    assert(ModOpcode == ISD::FABS);

    // Abs

    Mods |= SISrcMods::NEG_HI;

    Src = SDValue(buildRegSequence(Elts, CurDAG, DL, ElementSize), 0);

  }

}


// Check all f16 elements for modifiers while looking through b32 and v2b16

// build vector, stop if element does not satisfy ModifierCheck.

static void

checkWMMAElementsModifiersF16(BuildVectorSDNode *BV,

                              std::function<bool(SDValue)> ModifierCheck) {

  for (unsigned i = 0; i < BV->getNumOperands(); ++i) {

    if (auto *F16Pair =

            dyn_cast<BuildVectorSDNode>(stripBitcast(BV->getOperand(i)))) {

      for (unsigned i = 0; i < F16Pair->getNumOperands(); ++i) {

        SDValue ElF16 = stripBitcast(F16Pair->getOperand(i));

        if (!ModifierCheck(ElF16))

          break;

      }

    }

  }

}


bool AMDGPUDAGToDAGISel::SelectWMMAModsF16Neg(SDValue In, SDValue &Src,

                                              SDValue &SrcMods) const {

  Src = In;

  unsigned Mods = SISrcMods::OP_SEL_1;


  // mods are on f16 elements

  if (auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {

    SmallVector<SDValue, 8> EltsF16;


    checkWMMAElementsModifiersF16(BV, [&](SDValue Element) -> bool {

      if (Element.getOpcode() != ISD::FNEG)

        return false;

      EltsF16.push_back(Element.getOperand(0));

      return true;

    });


    // All elements have neg modifier

    if (BV->getNumOperands() * 2 == EltsF16.size()) {

      Src = SDValue(buildRegSequence16(EltsF16, CurDAG, SDLoc(In)), 0);

      Mods |= SISrcMods::NEG;

      Mods |= SISrcMods::NEG_HI;

    }

  }


  // mods are on v2f16 elements

  if (auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {

    SmallVector<SDValue, 8> EltsV2F16;

    for (unsigned i = 0; i < BV->getNumOperands(); ++i) {

      SDValue ElV2f16 = stripBitcast(BV->getOperand(i));

      // Based on first element decide which mod we match, neg or abs

      if (ElV2f16.getOpcode() != ISD::FNEG)

        break;

      EltsV2F16.push_back(ElV2f16.getOperand(0));

    }


    // All pairs of elements have neg modifier

    if (BV->getNumOperands() == EltsV2F16.size()) {

      Src = SDValue(buildRegSequence32(EltsV2F16, CurDAG, SDLoc(In)), 0);

      Mods |= SISrcMods::NEG;

      Mods |= SISrcMods::NEG_HI;

    }

  }


  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);

  return true;

}


bool AMDGPUDAGToDAGISel::SelectWMMAModsF16NegAbs(SDValue In, SDValue &Src,

                                                 SDValue &SrcMods) const {

  Src = In;

  unsigned Mods = SISrcMods::OP_SEL_1;

  unsigned ModOpcode;


  // mods are on f16 elements

  if (auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {

    SmallVector<SDValue, 8> EltsF16;

    checkWMMAElementsModifiersF16(BV, [&](SDValue ElF16) -> bool {

      // Based on first element decide which mod we match, neg or abs

      if (EltsF16.empty())

        ModOpcode = (ElF16.getOpcode() == ISD::FNEG) ? ISD::FNEG : ISD::FABS;

      if (ElF16.getOpcode() != ModOpcode)

        return false;

      EltsF16.push_back(ElF16.getOperand(0));

      return true;

    });


    // All elements have ModOpcode modifier

    if (BV->getNumOperands() * 2 == EltsF16.size())

      selectWMMAModsNegAbs(ModOpcode, Mods, EltsF16, Src, CurDAG, SDLoc(In),

                           16);

  }


  // mods are on v2f16 elements

  if (auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {

    SmallVector<SDValue, 8> EltsV2F16;


    for (unsigned i = 0; i < BV->getNumOperands(); ++i) {

      SDValue ElV2f16 = stripBitcast(BV->getOperand(i));

      // Based on first element decide which mod we match, neg or abs

      if (EltsV2F16.empty())

        ModOpcode = (ElV2f16.getOpcode() == ISD::FNEG) ? ISD::FNEG : ISD::FABS;

      if (ElV2f16->getOpcode() != ModOpcode)

        break;

      EltsV2F16.push_back(ElV2f16->getOperand(0));

    }


    // All elements have ModOpcode modifier

    if (BV->getNumOperands() == EltsV2F16.size())

      selectWMMAModsNegAbs(ModOpcode, Mods, EltsV2F16, Src, CurDAG, SDLoc(In),

                           32);

  }


  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);

  return true;

}


bool AMDGPUDAGToDAGISel::SelectWMMAModsF32NegAbs(SDValue In, SDValue &Src,

                                                 SDValue &SrcMods) const {

  Src = In;

  unsigned Mods = SISrcMods::OP_SEL_1;

  SmallVector<SDValue, 8> EltsF32;


  if (auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {

    assert(BV->getNumOperands() > 0);

    // Based on first element decide which mod we match, neg or abs

    SDValue ElF32 = stripBitcast(BV->getOperand(0));

    unsigned ModOpcode =

        (ElF32.getOpcode() == ISD::FNEG) ? ISD::FNEG : ISD::FABS;

    for (unsigned i = 0; i < BV->getNumOperands(); ++i) {

      SDValue ElF32 = stripBitcast(BV->getOperand(i));

      if (ElF32.getOpcode() != ModOpcode)

        break;

      EltsF32.push_back(ElF32.getOperand(0));

    }


    // All elements had ModOpcode modifier

    if (BV->getNumOperands() == EltsF32.size())

      selectWMMAModsNegAbs(ModOpcode, Mods, EltsF32, Src, CurDAG, SDLoc(In),

                           32);

  }


  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);

  return true;

}


bool AMDGPUDAGToDAGISel::SelectWMMAVISrc(SDValue In, SDValue &Src) const {

  if (auto *BV = dyn_cast<BuildVectorSDNode>(In)) {

    BitVector UndefElements;

    if (SDValue Splat = BV->getSplatValue(&UndefElements))

      if (isInlineImmediate(Splat.getNode())) {

        if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Splat)) {

          unsigned Imm = C->getAPIntValue().getSExtValue();

          Src = CurDAG->getTargetConstant(Imm, SDLoc(In), MVT::i32);

          return true;

        }

        if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Splat)) {

          unsigned Imm = C->getValueAPF().bitcastToAPInt().getSExtValue();

          Src = CurDAG->getTargetConstant(Imm, SDLoc(In), MVT::i32);

          return true;

        }

        llvm_unreachable("unhandled Constant node");

      }

  }


  // 16 bit splat

  SDValue SplatSrc32 = stripBitcast(In);

  if (auto *SplatSrc32BV = dyn_cast<BuildVectorSDNode>(SplatSrc32))

    if (SDValue Splat32 = SplatSrc32BV->getSplatValue()) {

      SDValue SplatSrc16 = stripBitcast(Splat32);

      if (auto *SplatSrc16BV = dyn_cast<BuildVectorSDNode>(SplatSrc16))

        if (SDValue Splat = SplatSrc16BV->getSplatValue()) {

          const SIInstrInfo *TII = Subtarget->getInstrInfo();

          std::optional<APInt> RawValue;

          if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Splat))

            RawValue = C->getValueAPF().bitcastToAPInt();

          else if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Splat))

            RawValue = C->getAPIntValue();


          if (RawValue.has_value()) {

            EVT VT = In.getValueType().getScalarType();

            if (VT.getSimpleVT() == MVT::f16 || VT.getSimpleVT() == MVT::bf16) {

              APFloat FloatVal(VT.getSimpleVT() == MVT::f16

                                   ? APFloatBase::IEEEhalf()

                                   : APFloatBase::BFloat(),

                               RawValue.value());

              if (TII->isInlineConstant(FloatVal)) {

                Src = CurDAG->getTargetConstant(RawValue.value(), SDLoc(In),

                                                MVT::i16);

                return true;

              }

            } else if (VT.getSimpleVT() == MVT::i16) {

              if (TII->isInlineConstant(RawValue.value())) {

                Src = CurDAG->getTargetConstant(RawValue.value(), SDLoc(In),

                                                MVT::i16);

                return true;

              }

            } else

              llvm_unreachable("unknown 16-bit type");

          }

        }

    }


  return false;

}


bool AMDGPUDAGToDAGISel::SelectSWMMACIndex8(SDValue In, SDValue &Src,

                                            SDValue &IndexKey) const {

  unsigned Key = 0;

  Src = In;


  if (In.getOpcode() == ISD::SRL) {

    const llvm::SDValue &ShiftSrc = In.getOperand(0);

    ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(In.getOperand(1));

    if (ShiftSrc.getValueType().getSizeInBits() == 32 && ShiftAmt &&

        ShiftAmt->getZExtValue() % 8 == 0) {

      Key = ShiftAmt->getZExtValue() / 8;

      Src = ShiftSrc;

    }

  }


  IndexKey = CurDAG->getTargetConstant(Key, SDLoc(In), MVT::i32);

  return true;

}


bool AMDGPUDAGToDAGISel::SelectSWMMACIndex16(SDValue In, SDValue &Src,

                                             SDValue &IndexKey) const {

  unsigned Key = 0;

  Src = In;


  if (In.getOpcode() == ISD::SRL) {

    const llvm::SDValue &ShiftSrc = In.getOperand(0);

    ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(In.getOperand(1));

    if (ShiftSrc.getValueType().getSizeInBits() == 32 && ShiftAmt &&

        ShiftAmt->getZExtValue() == 16) {

      Key = 1;

      Src = ShiftSrc;

    }

  }


  IndexKey = CurDAG->getTargetConstant(Key, SDLoc(In), MVT::i32);

  return true;

}


bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,

                                         SDValue &SrcMods) const {

  Src = In;

  // FIXME: Handle op_sel

  SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);

  return true;

}


bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src,

                                             SDValue &SrcMods) const {

  // FIXME: Handle op_sel

  return SelectVOP3Mods(In, Src, SrcMods);

}


// The return value is not whether the match is possible (which it always is),

// but whether or not it a conversion is really used.

bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,

                                                   unsigned &Mods) const {

  Mods = 0;

  SelectVOP3ModsImpl(In, Src, Mods);


  if (Src.getOpcode() == ISD::FP_EXTEND) {

    Src = Src.getOperand(0);

    assert(Src.getValueType() == MVT::f16);

    Src = stripBitcast(Src);


    // Be careful about folding modifiers if we already have an abs. fneg is

    // applied last, so we don't want to apply an earlier fneg.

    if ((Mods & SISrcMods::ABS) == 0) {

      unsigned ModsTmp;

      SelectVOP3ModsImpl(Src, Src, ModsTmp);


      if ((ModsTmp & SISrcMods::NEG) != 0)

        Mods ^= SISrcMods::NEG;


      if ((ModsTmp & SISrcMods::ABS) != 0)

        Mods |= SISrcMods::ABS;

    }


    // op_sel/op_sel_hi decide the source type and source.

    // If the source's op_sel_hi is set, it indicates to do a conversion from fp16.

    // If the sources's op_sel is set, it picks the high half of the source

    // register.


    Mods |= SISrcMods::OP_SEL_1;

    if (isExtractHiElt(Src, Src)) {

      Mods |= SISrcMods::OP_SEL_0;


      // TODO: Should we try to look for neg/abs here?

    }


    return true;

  }


  return false;

}


bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsExt(SDValue In, SDValue &Src,

                                                  SDValue &SrcMods) const {

  unsigned Mods = 0;

  if (!SelectVOP3PMadMixModsImpl(In, Src, Mods))

    return false;

  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);

  return true;

}


bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src,

                                               SDValue &SrcMods) const {

  unsigned Mods = 0;

  SelectVOP3PMadMixModsImpl(In, Src, Mods);

  SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);

  return true;

}


SDValue AMDGPUDAGToDAGISel::getHi16Elt(SDValue In) const {

  if (In.isUndef())

    return CurDAG->getUNDEF(MVT::i32);


  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) {

    SDLoc SL(In);

    return CurDAG->getConstant(C->getZExtValue() << 16, SL, MVT::i32);

  }


  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) {

    SDLoc SL(In);

    return CurDAG->getConstant(

      C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);

  }


  SDValue Src;

  if (isExtractHiElt(In, Src))

    return Src;


  return SDValue();

}


bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const {

  assert(CurDAG->getTarget().getTargetTriple().getArch() == Triple::amdgcn);


  const SIRegisterInfo *SIRI =

    static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());

  const SIInstrInfo * SII =

    static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());


  unsigned Limit = 0;

  bool AllUsesAcceptSReg = true;

  for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();

    Limit < 10 && U != E; ++U, ++Limit) {

    const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());


    // If the register class is unknown, it could be an unknown

    // register class that needs to be an SGPR, e.g. an inline asm

    // constraint

    if (!RC || SIRI->isSGPRClass(RC))

      return false;


    if (RC != &AMDGPU::VS_32RegClass && RC != &AMDGPU::VS_64RegClass) {

      AllUsesAcceptSReg = false;

      SDNode * User = *U;

      if (User->isMachineOpcode()) {

        unsigned Opc = User->getMachineOpcode();

        const MCInstrDesc &Desc = SII->get(Opc);

        if (Desc.isCommutable()) {

          unsigned OpIdx = Desc.getNumDefs() + U.getOperandNo();

          unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;

          if (SII->findCommutedOpIndices(Desc, OpIdx, CommuteIdx1)) {

            unsigned CommutedOpNo = CommuteIdx1 - Desc.getNumDefs();

            const TargetRegisterClass *CommutedRC = getOperandRegClass(*U, CommutedOpNo);

            if (CommutedRC == &AMDGPU::VS_32RegClass ||

                CommutedRC == &AMDGPU::VS_64RegClass)

              AllUsesAcceptSReg = true;

          }

        }

      }

      // If "AllUsesAcceptSReg == false" so far we haven't succeeded

      // commuting current user. This means have at least one use

      // that strictly require VGPR. Thus, we will not attempt to commute

      // other user instructions.

      if (!AllUsesAcceptSReg)

        break;

    }

  }

  return !AllUsesAcceptSReg && (Limit < 10);

}


bool AMDGPUDAGToDAGISel::isUniformLoad(const SDNode *N) const {

  auto Ld = cast<LoadSDNode>(N);


  const MachineMemOperand *MMO = Ld->getMemOperand();

  if (N->isDivergent() && !AMDGPUInstrInfo::isUniformMMO(MMO))

    return false;


  return MMO->getSize().hasValue() &&

         Ld->getAlign() >=

             Align(std::min(MMO->getSize().getValue().getKnownMinValue(),

                            uint64_t(4))) &&

         ((Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||

           Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) ||

          (Subtarget->getScalarizeGlobalBehavior() &&

           Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&

           Ld->isSimple() &&

           static_cast<const SITargetLowering *>(getTargetLowering())

               ->isMemOpHasNoClobberedMemOperand(N)));

}


void AMDGPUDAGToDAGISel::PostprocessISelDAG() {

  const AMDGPUTargetLowering& Lowering =

    *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());

  bool IsModified = false;

  do {

    IsModified = false;


    // Go over all selected nodes and try to fold them a bit more

    SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_begin();

    while (Position != CurDAG->allnodes_end()) {

      SDNode *Node = &*Position++;

      MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Node);

      if (!MachineNode)

        continue;


      SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);

      if (ResNode != Node) {

        if (ResNode)

          ReplaceUses(Node, ResNode);

        IsModified = true;

      }

    }

    CurDAG->RemoveDeadNodes();

  } while (IsModified);

}


char AMDGPUDAGToDAGISel::ID = 0;

MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105

SelectTypeKind::FP
@ FP

DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:74

getBaseWithOffsetUsingSplitOR
static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr, SDValue &N0, SDValue &N1)
Definition: AMDGPUISelDAGToDAG.cpp:726

buildRegSequence32
static MachineSDNode * buildRegSequence32(SmallVectorImpl< SDValue > &Elts, llvm::SelectionDAG *CurDAG, const SDLoc &DL)
Definition: AMDGPUISelDAGToDAG.cpp:3070

matchZExtFromI32
static SDValue matchZExtFromI32(SDValue Op)
Definition: AMDGPUISelDAGToDAG.cpp:1729

gwsIntrinToOpcode
static unsigned gwsIntrinToOpcode(unsigned IntrID)
Definition: AMDGPUISelDAGToDAG.cpp:2528

buildRegSequence
static MachineSDNode * buildRegSequence(SmallVectorImpl< SDValue > &Elts, llvm::SelectionDAG *CurDAG, const SDLoc &DL, unsigned ElementSize)
Definition: AMDGPUISelDAGToDAG.cpp:3128

SelectSAddrFI
static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr)
Definition: AMDGPUISelDAGToDAG.cpp:1833

findMemSDNode
static MemSDNode * findMemSDNode(SDNode *N)
Definition: AMDGPUISelDAGToDAG.cpp:1602

isNoUnsignedWrap
static bool isNoUnsignedWrap(SDValue Addr)
Definition: AMDGPUISelDAGToDAG.cpp:1147

buildRegSequence16
static MachineSDNode * buildRegSequence16(SmallVectorImpl< SDValue > &Elts, llvm::SelectionDAG *CurDAG, const SDLoc &DL)
Definition: AMDGPUISelDAGToDAG.cpp:3102

selectWMMAModsNegAbs
static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, SmallVectorImpl< SDValue > &Elts, SDValue &Src, llvm::SelectionDAG *CurDAG, const SDLoc &DL, unsigned ElementSize)
Definition: AMDGPUISelDAGToDAG.cpp:3138

IsCopyFromSGPR
static bool IsCopyFromSGPR(const SIRegisterInfo &TRI, SDValue Val)
Definition: AMDGPUISelDAGToDAG.cpp:1510

combineBallotPattern
static SDValue combineBallotPattern(SDValue VCMP, bool &Negate)
Definition: AMDGPUISelDAGToDAG.cpp:2346

checkWMMAElementsModifiersF16
static void checkWMMAElementsModifiersF16(BuildVectorSDNode *BV, std::function< bool(SDValue)> ModifierCheck)
Definition: AMDGPUISelDAGToDAG.cpp:3170

AMDGPUISelDAGToDAG.h
Defines an instruction selector for the AMDGPU target.

AMDGPUInstrInfo.h
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.

isNoUnsignedWrap
static bool isNoUnsignedWrap(MachineInstr *Addr)
Definition: AMDGPUInstructionSelector.cpp:4720

isExtractHiElt
static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)
Definition: AMDGPUInstructionSelector.cpp:2504

gwsIntrinToOpcode
static unsigned gwsIntrinToOpcode(unsigned IntrID)
Definition: AMDGPUInstructionSelector.cpp:1634

selectWMMAModsNegAbs
static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, SmallVectorImpl< Register > &Elts, Register &Src, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
Definition: AMDGPUInstructionSelector.cpp:3948

AMDGPUMCTargetDesc.h
Provides AMDGPU specific target descriptions.

AMDGPUSubtarget.h
Base class for AMDGPU specific classes of TargetSubtarget.

AMDGPUTargetMachine.h
The AMDGPU TargetMachine interface definition for hw codegen targets.

AMDGPU.h

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27

Idx
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Definition: DeadArgumentElimination.cpp:354

LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101

Dominators.h

Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:79

Size
uint64_t Size
Definition: ELFObjHandler.cpp:81

FunctionLoweringInfo.h

InitializePasses.h

LoopInfo.h

TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1875

Signed
@ Signed
Definition: NVPTXISelLowering.cpp:5481

if
if(VerifyEach)
Definition: PassBuilderBindings.cpp:71

TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47

INITIALIZE_PASS_DEPENDENCY
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55

INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59

INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52

Lowering
pre isel intrinsic Pre ISel Intrinsic Lowering
Definition: PreISelIntrinsicLowering.cpp:403

R600MCTargetDesc.h
Provides R600 specific target descriptions.

R600RegisterInfo.h
Interface definition for R600RegisterInfo.

Cond
const SmallVectorImpl< MachineOperand > & Cond
Definition: RISCVRedundantCopyElimination.cpp:75

CC
auto CC
Definition: RISCVRedundantCopyElimination.cpp:79

SIISelLowering.h
SI DAG Lowering interface definition.

assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

SIMachineFunctionInfo.h

SelectionDAGISel.h

SelectionDAGNodes.h

SelectionDAG.h

Ptr
@ Ptr
Definition: TargetLibraryInfo.cpp:76

UniformityAnalysis.h
LLVM IR instance of the generic uniformity analysis.

ValueTracking.h

RHS
Value * RHS
Definition: X86PartialReduction.cpp:76

LHS
Value * LHS
Definition: X86PartialReduction.cpp:75

Lo
support::ulittle16_t & Lo
Definition: aarch32.cpp:206

Hi
support::ulittle16_t & Hi
Definition: aarch32.cpp:205

AMDGPUDAGToDAGISel
AMDGPU specific code to select AMDGPU machine instructions for SelectionDAG operations.
Definition: AMDGPUISelDAGToDAG.h:71

AMDGPUDAGToDAGISel::SelectBuildVector
void SelectBuildVector(SDNode *N, unsigned RegClassID)
Definition: AMDGPUISelDAGToDAG.cpp:434

AMDGPUDAGToDAGISel::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: AMDGPUISelDAGToDAG.cpp:202

AMDGPUDAGToDAGISel::Select
void Select(SDNode *N) override
Main hook for targets to transform nodes into machine nodes.
Definition: AMDGPUISelDAGToDAG.cpp:489

AMDGPUDAGToDAGISel::runOnMachineFunction
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
Definition: AMDGPUISelDAGToDAG.cpp:126

AMDGPUDAGToDAGISel::ID
static char ID
Definition: AMDGPUISelDAGToDAG.h:86

AMDGPUDAGToDAGISel::PreprocessISelDAG
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
Definition: AMDGPUISelDAGToDAG.cpp:292

AMDGPUDAGToDAGISel::PostprocessISelDAG
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
Definition: AMDGPUISelDAGToDAG.cpp:3572

AMDGPUDAGToDAGISel::getPassName
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
Definition: AMDGPUISelDAGToDAG.cpp:773

AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel
AMDGPUDAGToDAGISel()=delete

AMDGPUDAGToDAGISel::matchLoadD16FromBuildVector
bool matchLoadD16FromBuildVector(SDNode *N) const
Definition: AMDGPUISelDAGToDAG.cpp:212

llvm::AMDGPUArgumentUsageInfo
Definition: AMDGPUArgumentUsageInfo.h:168

llvm::AMDGPUInstrInfo::isUniformMMO
static bool isUniformMMO(const MachineMemOperand *MMO)
Definition: AMDGPUInstrInfo.cpp:35

llvm::AMDGPUSubtarget::GFX9
@ GFX9
Definition: AMDGPUSubtarget.h:40

llvm::AMDGPUSubtarget::SEA_ISLANDS
@ SEA_ISLANDS
Definition: AMDGPUSubtarget.h:38

llvm::AMDGPUSubtarget::VOLCANIC_ISLANDS
@ VOLCANIC_ISLANDS
Definition: AMDGPUSubtarget.h:39

llvm::AMDGPUSubtarget::getWavefrontSizeLog2
unsigned getWavefrontSizeLog2() const
Definition: AMDGPUSubtarget.h:225

llvm::AMDGPUSubtarget::hasInv2PiInlineImm
bool hasInv2PiInlineImm() const
Definition: AMDGPUSubtarget.h:201

llvm::AMDGPUTargetLowering
Definition: AMDGPUISelLowering.h:27

llvm::AMDGPUTargetLowering::stripBitcast
static SDValue stripBitcast(SDValue Val)
Definition: AMDGPUISelLowering.h:184

llvm::AMDGPUTargetMachine::getNullPointerValue
static int64_t getNullPointerValue(unsigned AddrSpace)
Get the integer value of a null pointer in the given address space.
Definition: AMDGPUTargetMachine.cpp:733

llvm::AMDGPUTargetMachine::EnableLateStructurizeCFG
static bool EnableLateStructurizeCFG
Definition: AMDGPUTargetMachine.h:37

llvm::APFloat
Definition: APFloat.h:780

llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:76

llvm::APInt::getAllOnes
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:212

llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1491

llvm::APInt::countr_one
unsigned countr_one() const
Count the number of trailing one bits.
Definition: APInt.h:1606

llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47

llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75

llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:60

llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:221

llvm::BitVector
Definition: BitVector.h:82

llvm::BuildVectorSDNode
A "pseudo-class" with methods for operating on BUILD_VECTORs.
Definition: SelectionDAGNodes.h:2053

llvm::BuildVectorSDNode::getSplatValue
SDValue getSplatValue(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted value or a null value if this is not a splat.
Definition: SelectionDAG.cpp:12530

llvm::ConstantFPSDNode
Definition: SelectionDAGNodes.h:1690

llvm::ConstantSDNode
Definition: SelectionDAGNodes.h:1636

llvm::ConstantSDNode::getZExtValue
uint64_t getZExtValue() const
Definition: SelectionDAGNodes.h:1652

llvm::ConstantSDNode::getSExtValue
int64_t getSExtValue() const
Definition: SelectionDAGNodes.h:1653

llvm::DWARFExpression::Operation
This class represents an Operation in the Expression.
Definition: DWARFExpression.h:32

llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:317

llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162

llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311

llvm::GCNSubtarget
Definition: GCNSubtarget.h:35

llvm::GCNSubtarget::getLDSBankCount
int getLDSBankCount() const
Definition: GCNSubtarget.h:327

llvm::GCNSubtarget::hasUsableDSOffset
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
Definition: GCNSubtarget.h:455

llvm::GCNSubtarget::unsafeDSOffsetFoldingEnabled
bool unsafeDSOffsetFoldingEnabled() const
Definition: GCNSubtarget.h:459

llvm::GCNSubtarget::hasFlatInstOffsets
bool hasFlatInstOffsets() const
Definition: GCNSubtarget.h:614

llvm::GCNSubtarget::hasDLInsts
bool hasDLInsts() const
Definition: GCNSubtarget.h:755

llvm::GCNSubtarget::getInstrInfo
const SIInstrInfo * getInstrInfo() const override
Definition: GCNSubtarget.h:253

llvm::GCNSubtarget::getConstantBusLimit
unsigned getConstantBusLimit(unsigned Opcode) const
Definition: AMDGPUSubtarget.cpp:196

llvm::GCNSubtarget::hasMADIntraFwdBug
bool hasMADIntraFwdBug() const
Definition: GCNSubtarget.h:1032

llvm::GCNSubtarget::privateMemoryResourceIsRangeChecked
bool privateMemoryResourceIsRangeChecked() const
Definition: GCNSubtarget.h:540

llvm::GCNSubtarget::hasSignedScratchOffsets
bool hasSignedScratchOffsets() const
Definition: GCNSubtarget.h:1316

llvm::GCNSubtarget::getRegisterInfo
const SIRegisterInfo * getRegisterInfo() const override
Definition: GCNSubtarget.h:265

llvm::GCNSubtarget::hasDOTOpSelHazard
bool hasDOTOpSelHazard() const
Definition: GCNSubtarget.h:1161

llvm::GCNSubtarget::d16PreservesUnusedBits
bool d16PreservesUnusedBits() const
Definition: GCNSubtarget.h:682

llvm::GCNSubtarget::hasRestrictedSOffset
bool hasRestrictedSOffset() const
Definition: GCNSubtarget.h:1235

llvm::GCNSubtarget::hasFlatSegmentOffsetBug
bool hasFlatSegmentOffsetBug() const
Definition: GCNSubtarget.h:670

llvm::GCNSubtarget::getScalarizeGlobalBehavior
bool getScalarizeGlobalBehavior() const
Definition: GCNSubtarget.h:917

llvm::GCNSubtarget::ldsRequiresM0Init
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
Definition: GCNSubtarget.h:692

llvm::GCNSubtarget::hasFlatScratchSVSSwizzleBug
bool hasFlatScratchSVSSwizzleBug() const
Definition: GCNSubtarget.h:1218

llvm::GCNSubtarget::hasGWS
bool hasGWS() const
Definition: GCNSubtarget.h:1278

llvm::GCNSubtarget::useFlatForGlobal
bool useFlatForGlobal() const
Definition: GCNSubtarget.h:518

llvm::GCNSubtarget::getGeneration
Generation getGeneration() const
Definition: GCNSubtarget.h:304

llvm::GCNSubtarget::hasGWSSemaReleaseAll
bool hasGWSSemaReleaseAll() const
Definition: GCNSubtarget.h:706

llvm::GCNSubtarget::hasAddr64
bool hasAddr64() const
Definition: GCNSubtarget.h:368

llvm::GCNSubtarget::hasAddNoCarry
bool hasAddNoCarry() const
Definition: GCNSubtarget.h:714

llvm::GCNSubtarget::hasSALUFloatInsts
bool hasSALUFloatInsts() const
Definition: GCNSubtarget.h:1229

llvm::Instruction
Definition: Instruction.h:49

llvm::LoadSDNode
This class is used to represent ISD::LOAD nodes.
Definition: SelectionDAGNodes.h:2410

llvm::LoadSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition: SelectionDAGNodes.h:2429

llvm::LoadSDNode::getExtensionType
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Definition: SelectionDAGNodes.h:2425

llvm::LocationSize::hasValue
bool hasValue() const
Definition: MemoryLocation.h:166

llvm::LocationSize::getValue
TypeSize getValue() const
Definition: MemoryLocation.h:171

llvm::LoopInfoBase::getLoopsInPreorder
SmallVector< LoopT *, 4 > getLoopsInPreorder() const
Return all of the loops in the function in preorder across the loop nests, with siblings in forward p...
Definition: GenericLoopInfoImpl.h:595

llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:593

llvm::LoopInfo
Definition: LoopInfo.h:407

llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198

llvm::MCSubtargetInfo::getTargetTriple
const Triple & getTargetTriple() const
Definition: MCSubtargetInfo.h:108

llvm::MVT
Machine Value Type.
Definition: MachineValueType.h:34

llvm::MVT::getIntegerVT
static MVT getIntegerVT(unsigned BitWidth)
Definition: MachineValueType.h:437

llvm::MachineFunction
Definition: MachineFunction.h:259

llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:718

llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:728

llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:684

llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:816

llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:129

llvm::MachineMemOperand::getSize
LocationSize getSize() const
Return the size in bytes of the memory reference.
Definition: MachineMemOperand.h:239

llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:51

llvm::MachineSDNode
An SDNode that represents everything that will be needed to construct a MachineInstr.
Definition: SelectionDAGNodes.h:2980

llvm::MemIntrinsicSDNode
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
Definition: SelectionDAGNodes.h:1551

llvm::MemSDNode
This is an abstract virtual class for memory operations.
Definition: SelectionDAGNodes.h:1307

llvm::MemSDNode::getAddressSpace
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Definition: SelectionDAGNodes.h:1398

llvm::MemSDNode::getMemOperand
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
Definition: SelectionDAGNodes.h:1391

llvm::MemSDNode::getChain
const SDValue & getChain() const
Definition: SelectionDAGNodes.h:1410

llvm::MemSDNode::getMemoryVT
EVT getMemoryVT() const
Return the type of the in-memory value.
Definition: SelectionDAGNodes.h:1387

llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19

llvm::SDLoc
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Definition: SelectionDAGNodes.h:1136

llvm::SDNode::use_iterator
This class provides iterator support for SDUse operands that use a specific SDNode.
Definition: SelectionDAGNodes.h:762

llvm::SDNode
Represents one node in the SelectionDAG.
Definition: SelectionDAGNodes.h:477

llvm::SDNode::getAsAPIntVal
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
Definition: SelectionDAGNodes.h:1686

llvm::SDNode::getOpcode
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
Definition: SelectionDAGNodes.h:659

llvm::SDNode::isDivergent
bool isDivergent() const
Definition: SelectionDAGNodes.h:729

llvm::SDNode::getAsZExtVal
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
Definition: SelectionDAGNodes.h:1678

llvm::SDNode::getNumOperands
unsigned getNumOperands() const
Return the number of values used by this operation.
Definition: SelectionDAGNodes.h:925

llvm::SDNode::getOperand
const SDValue & getOperand(unsigned Num) const
Definition: SelectionDAGNodes.h:944

llvm::SDNode::isPredecessorOf
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
Definition: SelectionDAGNodes.h:847

llvm::SDNode::use_end
static use_iterator use_end()
Definition: SelectionDAGNodes.h:821

llvm::SDValue
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
Definition: SelectionDAGNodes.h:145

llvm::SDValue::getNode
SDNode * getNode() const
get the SDNode which holds the desired result
Definition: SelectionDAGNodes.h:159

llvm::SDValue::getValue
SDValue getValue(unsigned R) const
Definition: SelectionDAGNodes.h:179

llvm::SDValue::getValueType
EVT getValueType() const
Return the ValueType of the referenced return value.
Definition: SelectionDAGNodes.h:1171

llvm::SDValue::getOperand
const SDValue & getOperand(unsigned i) const
Definition: SelectionDAGNodes.h:1179

llvm::SDValue::getConstantOperandVal
uint64_t getConstantOperandVal(unsigned i) const
Definition: SelectionDAGNodes.h:1183

llvm::SDValue::getOpcode
unsigned getOpcode() const
Definition: SelectionDAGNodes.h:1167

llvm::SIInstrInfo
Definition: SIInstrInfo.h:83

llvm::SIInstrInfo::getMaxMUBUFImmOffset
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
Definition: SIInstrInfo.cpp:9006

llvm::SIInstrInfo::findCommutedOpIndices
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
Definition: SIInstrInfo.cpp:2825

llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition: SIMachineFunctionInfo.h:376

llvm::SIRegisterInfo
Definition: SIRegisterInfo.h:32

llvm::SIRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(unsigned RCID) const
Definition: SIRegisterInfo.cpp:3138

llvm::SIRegisterInfo::getSubRegFromChannel
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
Definition: SIRegisterInfo.cpp:541

llvm::SIRegisterInfo::getSGPRClassForBitWidth
static LLVM_READONLY const TargetRegisterClass * getSGPRClassForBitWidth(unsigned BitWidth)
Definition: SIRegisterInfo.cpp:2832

llvm::SIRegisterInfo::isSGPRClass
static bool isSGPRClass(const TargetRegisterClass *RC)
Definition: SIRegisterInfo.h:198

llvm::SITargetLowering
Definition: SIISelLowering.h:31

llvm::SelectionDAGISel
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
Definition: SelectionDAGISel.h:41

llvm::SelectionDAGISel::FuncInfo
std::unique_ptr< FunctionLoweringInfo > FuncInfo
Definition: SelectionDAGISel.h:45

llvm::SelectionDAGISel::TLI
const TargetLowering * TLI
Definition: SelectionDAGISel.h:56

llvm::SelectionDAGISel::MF
MachineFunction * MF
Definition: SelectionDAGISel.h:47

llvm::SelectionDAGISel::CurDAG
SelectionDAG * CurDAG
Definition: SelectionDAGISel.h:49

llvm::SelectionDAGISel::TII
const TargetInstrInfo * TII
Definition: SelectionDAGISel.h:55

llvm::SelectionDAGISel::ReplaceUses
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
Definition: SelectionDAGISel.h:344

llvm::SelectionDAGISel::runOnMachineFunction
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
Definition: SelectionDAGISel.cpp:409

llvm::SelectionDAGISel::ReplaceNode
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
Definition: SelectionDAGISel.h:365

llvm::SelectionDAGISel::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: SelectionDAGISel.cpp:364

llvm::SelectionDAGISel::getTargetLowering
const TargetLowering * getTargetLowering() const
Definition: SelectionDAGISel.h:75

llvm::SelectionDAG
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225

llvm::SelectionDAG::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:474

llvm::SelectionDAG::getVTList
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
Definition: SelectionDAG.cpp:10123

llvm::SelectionDAG::getMachineNode
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
Definition: SelectionDAG.cpp:10561

llvm::SelectionDAG::SelectNodeTo
SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
Definition: SelectionDAG.cpp:10353

llvm::SelectionDAG::MorphNodeTo
SDNode * MorphNodeTo(SDNode *N, unsigned Opc, SDVTList VTs, ArrayRef< SDValue > Ops)
This mutates the specified node to have the specified return type, opcode, and operands.
Definition: SelectionDAG.cpp:10460

llvm::SelectionDAG::allnodes_begin
allnodes_const_iterator allnodes_begin() const
Definition: SelectionDAG.h:531

llvm::SelectionDAG::getUNDEF
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:1099

llvm::SelectionDAG::allnodes_end
allnodes_const_iterator allnodes_end() const
Definition: SelectionDAG.h:532

llvm::SelectionDAG::setNodeMemRefs
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
Definition: SelectionDAG.cpp:10329

llvm::SelectionDAG::getTargetFrameIndex
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:727

llvm::SelectionDAG::getConstant
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
Definition: SelectionDAG.cpp:1602

llvm::SelectionDAG::dump
void dump() const
Definition: SelectionDAGDumper.cpp:994

llvm::SelectionDAG::SignBitIsZero
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
Definition: SelectionDAG.cpp:2661

llvm::SelectionDAG::getRegister
SDValue getRegister(unsigned Reg, EVT VT)
Definition: SelectionDAG.cpp:2244

llvm::SelectionDAG::RemoveDeadNodes
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
Definition: SelectionDAG.cpp:986

llvm::SelectionDAG::RemoveDeadNode
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
Definition: SelectionDAG.cpp:1040

llvm::SelectionDAG::getTargetExtractSubreg
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
Definition: SelectionDAG.cpp:10679

llvm::SelectionDAG::getTarget
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:473

llvm::SelectionDAG::getCopyToReg
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:773

llvm::SelectionDAG::getNode
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
Definition: SelectionDAG.cpp:9746

llvm::SelectionDAG::getTargetConstant
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:676

llvm::SelectionDAG::isBaseWithConstantOffset
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
Definition: SelectionDAG.cpp:5244

llvm::SelectionDAG::ReplaceAllUsesOfValueWith
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
Definition: SelectionDAG.cpp:11216

llvm::SelectionDAG::getMachineFunction
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:469

llvm::SelectionDAG::computeKnownBits
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
Definition: SelectionDAG.cpp:3092

llvm::SelectionDAG::getMemIntrinsicNode
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
Definition: SelectionDAG.cpp:8495

llvm::SelectionDAG::getEntryNode
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:554

llvm::SelectionDAG::allnodes_iterator
ilist< SDNode >::iterator allnodes_iterator
Definition: SelectionDAG.h:534

llvm::SmallVectorBase::empty
bool empty() const
Definition: SmallVector.h:94

llvm::SmallVectorBase::size
size_t size() const
Definition: SmallVector.h:91

llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition: SmallVector.h:426

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209

llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50

llvm::TargetInstrInfo::CommuteAnyOperandIndex
static const unsigned CommuteAnyOperandIndex
Definition: TargetInstrInfo.h:450

llvm::TargetLoweringBase::getStackPointerRegisterToSaveRestore
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
Definition: TargetLowering.h:1993

llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76

llvm::TargetMachine::getTargetTriple
const Triple & getTargetTriple() const
Definition: TargetMachine.h:125

llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:45

llvm::TargetRegisterClass::getID
unsigned getID() const
Return the register class ID number.
Definition: TargetRegisterInfo.h:74

llvm::Triple::amdgcn
@ amdgcn
Definition: Triple.h:74

llvm::Triple::getArch
ArchType getArch() const
Get the parsed architecture type of this triple.
Definition: Triple.h:372

llvm::UniformityInfoWrapperPass
Legacy analysis pass which computes a CycleInfo.
Definition: UniformityAnalysis.h:55

llvm::User
Definition: User.h:44

llvm::Value
LLVM Value Representation.
Definition: Value.h:74

llvm::details::FixedOrScalableQuantity::getKnownMinValue
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168

uint32_t

uint64_t

unsigned

ErrorHandling.h

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143

llvm::AMDGPUAS::CONSTANT_ADDRESS_32BIT
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
Definition: AMDGPUAddrSpace.h:38

llvm::AMDGPUAS::REGION_ADDRESS
@ REGION_ADDRESS
Address space for region memory. (GDS)
Definition: AMDGPUAddrSpace.h:32

llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPUAddrSpace.h:35

llvm::AMDGPUAS::CONSTANT_ADDRESS
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition: AMDGPUAddrSpace.h:34

llvm::AMDGPUAS::FLAT_ADDRESS
@ FLAT_ADDRESS
Address space for flat memory.
Definition: AMDGPUAddrSpace.h:30

llvm::AMDGPUAS::GLOBAL_ADDRESS
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
Definition: AMDGPUAddrSpace.h:31

llvm::AMDGPUAS::PRIVATE_ADDRESS
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPUAddrSpace.h:36

llvm::AMDGPUISD::CVT_PKRTZ_F16_F32
@ CVT_PKRTZ_F16_F32
Definition: AMDGPUISelLowering.h:527

llvm::AMDGPUISD::DIV_SCALE
@ DIV_SCALE
Definition: AMDGPUISelLowering.h:467

llvm::AMDGPUISD::CVT_PKNORM_I16_F32
@ CVT_PKNORM_I16_F32
Definition: AMDGPUISelLowering.h:528

llvm::AMDGPUISD::RCP
@ RCP
Definition: AMDGPUISelLowering.h:476

llvm::AMDGPUISD::SETCC
@ SETCC
Definition: AMDGPUISelLowering.h:438

llvm::AMDGPUISD::COS_HW
@ COS_HW
Definition: AMDGPUISelLowering.h:449

llvm::AMDGPUISD::FMAD_FTZ
@ FMAD_FTZ
Definition: AMDGPUISelLowering.h:472

llvm::AMDGPUISD::RCP_IFLAG
@ RCP_IFLAG
Definition: AMDGPUISelLowering.h:479

llvm::AMDGPUISD::DIV_FIXUP
@ DIV_FIXUP
Definition: AMDGPUISelLowering.h:469

llvm::AMDGPUISD::ATOMIC_LOAD_FMIN
@ ATOMIC_LOAD_FMIN
Definition: AMDGPUISelLowering.h:570

llvm::AMDGPUISD::DWORDADDR
@ DWORDADDR
Definition: AMDGPUISelLowering.h:429

llvm::AMDGPUISD::CVT_PK_U16_U32
@ CVT_PK_U16_U32
Definition: AMDGPUISelLowering.h:531

llvm::AMDGPUISD::LOAD_D16_LO_I8
@ LOAD_D16_LO_I8
Definition: AMDGPUISelLowering.h:559

llvm::AMDGPUISD::RSQ
@ RSQ
Definition: AMDGPUISelLowering.h:477

llvm::AMDGPUISD::FMA_W_CHAIN
@ FMA_W_CHAIN
Definition: AMDGPUISelLowering.h:444

llvm::AMDGPUISD::MAD_U64_U32
@ MAD_U64_U32
Definition: AMDGPUISelLowering.h:506

llvm::AMDGPUISD::LOAD_D16_HI_U8
@ LOAD_D16_HI_U8
Definition: AMDGPUISelLowering.h:558

llvm::AMDGPUISD::FRACT
@ FRACT
Definition: AMDGPUISelLowering.h:430

llvm::AMDGPUISD::LOAD_D16_HI_I8
@ LOAD_D16_HI_I8
Definition: AMDGPUISelLowering.h:557

llvm::AMDGPUISD::CVT_PK_I16_I32
@ CVT_PK_I16_I32
Definition: AMDGPUISelLowering.h:530

llvm::AMDGPUISD::FMED3
@ FMED3
Definition: AMDGPUISelLowering.h:460

llvm::AMDGPUISD::ATOMIC_LOAD_FMAX
@ ATOMIC_LOAD_FMAX
Definition: AMDGPUISelLowering.h:571

llvm::AMDGPUISD::FMIN3
@ FMIN3
Definition: AMDGPUISelLowering.h:457

llvm::AMDGPUISD::BFE_I32
@ BFE_I32
Definition: AMDGPUISelLowering.h:494

llvm::AMDGPUISD::LOAD_D16_LO
@ LOAD_D16_LO
Definition: AMDGPUISelLowering.h:556

llvm::AMDGPUISD::FMUL_W_CHAIN
@ FMUL_W_CHAIN
Definition: AMDGPUISelLowering.h:445

llvm::AMDGPUISD::LOAD_D16_HI
@ LOAD_D16_HI
Definition: AMDGPUISelLowering.h:555

llvm::AMDGPUISD::LOAD_D16_LO_U8
@ LOAD_D16_LO_U8
Definition: AMDGPUISelLowering.h:560

llvm::AMDGPUISD::BFE_U32
@ BFE_U32
Definition: AMDGPUISelLowering.h:493

llvm::AMDGPUISD::SIN_HW
@ SIN_HW
Definition: AMDGPUISelLowering.h:450

llvm::AMDGPUISD::MAD_I64_I32
@ MAD_I64_I32
Definition: AMDGPUISelLowering.h:507

llvm::AMDGPUISD::FMAX3
@ FMAX3
Definition: AMDGPUISelLowering.h:454

llvm::AMDGPUISD::CVT_PKNORM_U16_F32
@ CVT_PKNORM_U16_F32
Definition: AMDGPUISelLowering.h:529

llvm::AMDGPUISD::WAVE_ADDRESS
@ WAVE_ADDRESS
Definition: AMDGPUISelLowering.h:427

llvm::AMDGPUISD::CLAMP
@ CLAMP
CLAMP value between 0.0 and 1.0.
Definition: AMDGPUISelLowering.h:434

llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:395

llvm::AMDGPU::IsaInfo::TargetIDSetting::Off
@ Off

llvm::AMDGPU::PALMD::Key
Key
PAL metadata keys.
Definition: AMDGPUMetadata.h:487

llvm::AMDGPU::getSMRDEncodedLiteralOffset32
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
Definition: AMDGPUBaseInfo.cpp:2936

llvm::AMDGPU::isGFX12Plus
bool isGFX12Plus(const MCSubtargetInfo &STI)
Definition: AMDGPUBaseInfo.cpp:2213

llvm::AMDGPU::isValid32BitLiteral
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
Definition: AMDGPUBaseInfo.cpp:2811

llvm::AMDGPU::isInlinableLiteral32
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
Definition: AMDGPUBaseInfo.cpp:2619

llvm::AMDGPU::getSMRDEncodedOffset
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer)
Definition: AMDGPUBaseInfo.cpp:2914

llvm::ARMII::VecSize
@ VecSize
Definition: ARMBaseInfo.h:437

llvm::ARMISD::VCMP
@ VCMP
Definition: ARMISelLowering.h:148

llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73

llvm::ARM::ProfileKind::M
@ M

llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121

llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34

llvm::ISD::SETCC
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:751

llvm::ISD::STACKRESTORE
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1133

llvm::ISD::STORE
@ STORE
Definition: ISDOpcodes.h:1039

llvm::ISD::FLOG10
@ FLOG10
Definition: ISDOpcodes.h:949

llvm::ISD::SMUL_LOHI
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:251

llvm::ISD::UINT_TO_FP
@ UINT_TO_FP
Definition: ISDOpcodes.h:792

llvm::ISD::FPOW
@ FPOW
Definition: ISDOpcodes.h:937

llvm::ISD::ConstantFP
@ ConstantFP
Definition: ISDOpcodes.h:77

llvm::ISD::UADDO
@ UADDO
Definition: ISDOpcodes.h:325

llvm::ISD::FTRUNC
@ FTRUNC
Definition: ISDOpcodes.h:954

llvm::ISD::ADDC
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:270

llvm::ISD::FMAD
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition: ISDOpcodes.h:488

llvm::ISD::ADD
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:240

llvm::ISD::LOAD
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1038

llvm::ISD::FSUB
@ FSUB
Definition: ISDOpcodes.h:392

llvm::ISD::FMA
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:484

llvm::ISD::SUBC
@ SUBC
Definition: ISDOpcodes.h:271

llvm::ISD::FABS
@ FABS
Definition: ISDOpcodes.h:932

llvm::ISD::FNEARBYINT
@ FNEARBYINT
Definition: ISDOpcodes.h:956

llvm::ISD::INTRINSIC_VOID
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199

llvm::ISD::SINT_TO_FP
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:791

llvm::ISD::FADD
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:391

llvm::ISD::SRL
@ SRL
Definition: ISDOpcodes.h:708

llvm::ISD::BITCAST
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:904

llvm::ISD::BUILD_PAIR
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:230

llvm::ISD::FFLOOR
@ FFLOOR
Definition: ISDOpcodes.h:959

llvm::ISD::FLDEXP
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
Definition: ISDOpcodes.h:940

llvm::ISD::SRA
@ SRA
Definition: ISDOpcodes.h:707

llvm::ISD::USUBO
@ USUBO
Definition: ISDOpcodes.h:329

llvm::ISD::CONVERGENCECTRL_GLUE
@ CONVERGENCECTRL_GLUE
Definition: ISDOpcodes.h:1403

llvm::ISD::FLOG2
@ FLOG2
Definition: ISDOpcodes.h:948

llvm::ISD::SCALAR_TO_VECTOR
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:621

llvm::ISD::FMAXNUM
@ FMAXNUM
Definition: ISDOpcodes.h:973

llvm::ISD::FPOWI
@ FPOWI
Definition: ISDOpcodes.h:938

llvm::ISD::FRINT
@ FRINT
Definition: ISDOpcodes.h:955

llvm::ISD::FNEG
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:931

llvm::ISD::OR
@ OR
Definition: ISDOpcodes.h:682

llvm::ISD::FCANONICALIZE
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition: ISDOpcodes.h:501

llvm::ISD::UMUL_LOHI
@ UMUL_LOHI
Definition: ISDOpcodes.h:252

llvm::ISD::UNDEF
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:212

llvm::ISD::CopyFromReg
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:209

llvm::ISD::FROUND
@ FROUND
Definition: ISDOpcodes.h:957

llvm::ISD::SHL
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:706

llvm::ISD::FCOS
@ FCOS
Definition: ISDOpcodes.h:936

llvm::ISD::EXTRACT_VECTOR_ELT
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:536

llvm::ISD::CopyToReg
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:203

llvm::ISD::ZERO_EXTEND
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:781

llvm::ISD::FMUL
@ FMUL
Definition: ISDOpcodes.h:393

llvm::ISD::FMINNUM
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:972

llvm::ISD::SUB
@ SUB
Definition: ISDOpcodes.h:241

llvm::ISD::TargetFrameIndex
@ TargetFrameIndex
Definition: ISDOpcodes.h:166

llvm::ISD::SIGN_EXTEND_INREG
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:799

llvm::ISD::Constant
@ Constant
Definition: ISDOpcodes.h:76

llvm::ISD::FP_EXTEND
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:889

llvm::ISD::UADDO_CARRY
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:304

llvm::ISD::FROUNDEVEN
@ FROUNDEVEN
Definition: ISDOpcodes.h:958

llvm::ISD::FDIV
@ FDIV
Definition: ISDOpcodes.h:394

llvm::ISD::FREM
@ FREM
Definition: ISDOpcodes.h:395

llvm::ISD::AND
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:681

llvm::ISD::INTRINSIC_WO_CHAIN
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184

llvm::ISD::USUBO_CARRY
@ USUBO_CARRY
Definition: ISDOpcodes.h:305

llvm::ISD::FLOG
@ FLOG
Definition: ISDOpcodes.h:947

llvm::ISD::SUBE
@ SUBE
Definition: ISDOpcodes.h:281

llvm::ISD::ADDE
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:280

llvm::ISD::FSIN
@ FSIN
Definition: ISDOpcodes.h:935

llvm::ISD::FEXP
@ FEXP
Definition: ISDOpcodes.h:950

llvm::ISD::FCEIL
@ FCEIL
Definition: ISDOpcodes.h:953

llvm::ISD::FP_ROUND
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:870

llvm::ISD::FSQRT
@ FSQRT
Definition: ISDOpcodes.h:933

llvm::ISD::TRUNCATE
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:787

llvm::ISD::BRCOND
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1077

llvm::ISD::FEXP2
@ FEXP2
Definition: ISDOpcodes.h:951

llvm::ISD::INTRINSIC_W_CHAIN
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192

llvm::ISD::BUILD_VECTOR
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:516

llvm::ISD::isExtOpcode
bool isExtOpcode(unsigned Opcode)
Definition: ISDOpcodes.h:1601

llvm::ISD::CondCode
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1530

llvm::ISD::SETNE
@ SETNE
Definition: ISDOpcodes.h:1555

llvm::ISD::SETEQ
@ SETEQ
Definition: ISDOpcodes.h:1550

llvm::ISD::SEXTLOAD
@ SEXTLOAD
Definition: ISDOpcodes.h:1510

llvm::M68k::MemAddrModeKind::U
@ U

llvm::MCID::RegSequence
@ RegSequence
Definition: MCInstrDesc.h:182

llvm::RISCVFenceField::W
@ W
Definition: RISCVBaseInfo.h:315

llvm::RISCVMatInt::Imm
@ Imm
Definition: RISCVMatInt.h:24

llvm::RegState::Undef
@ Undef
Value of the register doesn't matter.
Definition: MachineInstrBuilder.h:53

llvm::SIInstrFlags::FlatGlobal
@ FlatGlobal
Definition: SIDefines.h:142

llvm::SIInstrFlags::FlatScratch
@ FlatScratch
Definition: SIDefines.h:157

llvm::SIInstrFlags::FLAT
@ FLAT
Definition: SIDefines.h:87

llvm::SIInstrFlags::IsDOT
@ IsDOT
Definition: SIDefines.h:154

llvm::SISrcMods::NONE
@ NONE
Definition: SIDefines.h:287

llvm::SISrcMods::ABS
@ ABS
Definition: SIDefines.h:289

llvm::SISrcMods::OP_SEL_0
@ OP_SEL_0
Definition: SIDefines.h:292

llvm::SISrcMods::NEG_HI
@ NEG_HI
Definition: SIDefines.h:291

llvm::SISrcMods::OP_SEL_1
@ OP_SEL_1
Definition: SIDefines.h:293

llvm::SISrcMods::NEG
@ NEG
Definition: SIDefines.h:288

llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:614

llvm::dwarf::Index
Index
Definition: Dwarf.h:872

llvm::logicalview::LVAttributeKind::Zero
@ Zero

llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58

llvm::sys::unicode::SBase
constexpr const char32_t SBase
Definition: UnicodeNameToCodepoint.cpp:254

llvm::tgtok::In
@ In
Definition: TGLexer.h:85

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::Offset
@ Offset
Definition: DWP.cpp:456

llvm::popcount
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385

llvm::isNullConstant
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
Definition: SelectionDAG.cpp:11592

llvm::isMask_32
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:251

llvm::isBoolSGPR
bool isBoolSGPR(SDValue V)
Definition: SIISelLowering.cpp:11379

llvm::ComplexDeinterleavingOperation::Splat
@ Splat

llvm::Hi_32
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition: MathExtras.h:138

llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163

llvm::CodeGenOptLevel
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54

llvm::Lo_32
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition: MathExtras.h:143

llvm::createAMDGPUISelDag
FunctionPass * createAMDGPUISelDag(TargetMachine &TM, CodeGenOptLevel OptLevel)
This pass converts a legalized DAG into a AMDGPU-specific.
Definition: AMDGPUISelDAGToDAG.cpp:115

llvm::RecurKind::SMax
@ SMax
Signed integer max implemented in terms of select(cmp()).

llvm::RecurKind::And
@ And
Bitwise or logical AND of integers.

llvm::RecurKind::Add
@ Add
Sum of integers.

llvm::M0
unsigned M0(unsigned Val)
Definition: VE.h:375

N
#define N

llvm::AMDGPUPerfHintAnalysis
Definition: AMDGPUPerfHintAnalysis.h:23

llvm::APFloatBase::IEEEhalf
static const fltSemantics & IEEEhalf() LLVM_READNONE
Definition: APFloat.cpp:247

llvm::APFloatBase::BFloat
static const fltSemantics & BFloat() LLVM_READNONE
Definition: APFloat.cpp:248

llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39

llvm::DWARFExpression::Operation::Description
Description of the encoding of one expression Op.
Definition: DWARFExpression.h:66

llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:34

llvm::EVT::getSizeInBits
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358

llvm::EVT::getScalarSizeInBits
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370

llvm::EVT::getSimpleVT
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306

llvm::EVT::bitsEq
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Definition: ValueTypes.h:246

llvm::EVT::getVectorElementType
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318

llvm::EVT::isScalarInteger
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:156

llvm::EVT::getVectorNumElements
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326

llvm::KnownBits
Definition: KnownBits.h:23

llvm::KnownBits::makeConstant
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
Definition: KnownBits.h:297

llvm::KnownBits::getMaxValue
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition: KnownBits.h:141

llvm::KnownBits::computeForAddSub
static KnownBits computeForAddSub(bool Add, bool NSW, bool NUW, const KnownBits &LHS, const KnownBits &RHS)
Compute known bits resulting from adding LHS and RHS.
Definition: KnownBits.cpp:57

llvm::KnownBits::Zero
APInt Zero
Definition: KnownBits.h:24

llvm::R600RegisterInfo::getSubRegFromChannel
static unsigned getSubRegFromChannel(unsigned Channel)
Definition: R600RegisterInfo.cpp:24

llvm::SDVTList
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Definition: SelectionDAGNodes.h:79

llvm::SIModeRegisterDefaults
Definition: SIModeRegisterDefaults.h:20