LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - AMDGPUISelDAGToDAG.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 819 852 96.1 %
Date: 2018-02-21 06:32:55 Functions: 75 81 92.6 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //==-----------------------------------------------------------------------===//
       9             : //
      10             : /// \file
      11             : /// \brief Defines an instruction selector for the AMDGPU target.
      12             : //
      13             : //===----------------------------------------------------------------------===//
      14             : 
      15             : #include "AMDGPU.h"
      16             : #include "AMDGPUArgumentUsageInfo.h"
      17             : #include "AMDGPUISelLowering.h" // For AMDGPUISD
      18             : #include "AMDGPUInstrInfo.h"
      19             : #include "AMDGPURegisterInfo.h"
      20             : #include "AMDGPUSubtarget.h"
      21             : #include "AMDGPUTargetMachine.h"
      22             : #include "SIDefines.h"
      23             : #include "SIISelLowering.h"
      24             : #include "SIInstrInfo.h"
      25             : #include "SIMachineFunctionInfo.h"
      26             : #include "SIRegisterInfo.h"
      27             : #include "llvm/ADT/APInt.h"
      28             : #include "llvm/ADT/SmallVector.h"
      29             : #include "llvm/ADT/StringRef.h"
      30             : #include "llvm/Analysis/ValueTracking.h"
      31             : #include "llvm/CodeGen/FunctionLoweringInfo.h"
      32             : #include "llvm/CodeGen/ISDOpcodes.h"
      33             : #include "llvm/CodeGen/MachineFunction.h"
      34             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      35             : #include "llvm/CodeGen/MachineValueType.h"
      36             : #include "llvm/CodeGen/SelectionDAG.h"
      37             : #include "llvm/CodeGen/SelectionDAGISel.h"
      38             : #include "llvm/CodeGen/SelectionDAGNodes.h"
      39             : #include "llvm/CodeGen/ValueTypes.h"
      40             : #include "llvm/IR/BasicBlock.h"
      41             : #include "llvm/IR/Instruction.h"
      42             : #include "llvm/MC/MCInstrDesc.h"
      43             : #include "llvm/Support/Casting.h"
      44             : #include "llvm/Support/CodeGen.h"
      45             : #include "llvm/Support/ErrorHandling.h"
      46             : #include "llvm/Support/MathExtras.h"
      47             : #include <cassert>
      48             : #include <cstdint>
      49             : #include <new>
      50             : #include <vector>
      51             : 
      52             : using namespace llvm;
      53             : 
      54             : namespace llvm {
      55             : 
      56             : class R600InstrInfo;
      57             : 
      58             : } // end namespace llvm
      59             : 
      60             : //===----------------------------------------------------------------------===//
      61             : // Instruction Selector Implementation
      62             : //===----------------------------------------------------------------------===//
      63             : 
      64             : namespace {
      65             : 
      66             : /// AMDGPU specific code to select AMDGPU machine instructions for
      67             : /// SelectionDAG operations.
      68             : class AMDGPUDAGToDAGISel : public SelectionDAGISel {
      69             :   // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
      70             :   // make the right decision when generating code for different targets.
      71             :   const AMDGPUSubtarget *Subtarget;
      72             :   AMDGPUAS AMDGPUASI;
      73             :   bool EnableLateStructurizeCFG;
      74             : 
      75             : public:
      76        1957 :   explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr,
      77             :                               CodeGenOpt::Level OptLevel = CodeGenOpt::Default)
      78        1957 :     : SelectionDAGISel(*TM, OptLevel) {
      79        1957 :     AMDGPUASI = AMDGPU::getAMDGPUAS(*TM);
      80        1957 :     EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
      81        1957 :   }
      82        1948 :   ~AMDGPUDAGToDAGISel() override = default;
      83             : 
      84        1949 :   void getAnalysisUsage(AnalysisUsage &AU) const override {
      85             :     AU.addRequired<AMDGPUArgumentUsageInfo>();
      86        1949 :     SelectionDAGISel::getAnalysisUsage(AU);
      87        1949 :   }
      88             : 
      89             :   bool runOnMachineFunction(MachineFunction &MF) override;
      90             :   void Select(SDNode *N) override;
      91             :   StringRef getPassName() const override;
      92             :   void PostprocessISelDAG() override;
      93             : 
      94             : protected:
      95             :   void SelectBuildVector(SDNode *N, unsigned RegClassID);
      96             : 
      97             : private:
      98             :   std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
      99             :   bool isNoNanSrc(SDValue N) const;
     100             :   bool isInlineImmediate(const SDNode *N) const;
     101             :   bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
     102             :                    const R600InstrInfo *TII);
     103             :   bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
     104             :   bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
     105             : 
     106             :   bool isConstantLoad(const MemSDNode *N, int cbID) const;
     107             :   bool isUniformBr(const SDNode *N) const;
     108             : 
     109             :   SDNode *glueCopyToM0(SDNode *N) const;
     110             : 
     111             :   const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
     112             :   bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
     113             :   bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
     114             :                                        SDValue& Offset);
     115             :   virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
     116             :   virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
     117             :   bool isDSOffsetLegal(const SDValue &Base, unsigned Offset,
     118             :                        unsigned OffsetBits) const;
     119             :   bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
     120             :   bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
     121             :                                  SDValue &Offset1) const;
     122             :   bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
     123             :                    SDValue &SOffset, SDValue &Offset, SDValue &Offen,
     124             :                    SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
     125             :                    SDValue &TFE) const;
     126             :   bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
     127             :                          SDValue &SOffset, SDValue &Offset, SDValue &GLC,
     128             :                          SDValue &SLC, SDValue &TFE) const;
     129             :   bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
     130             :                          SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
     131             :                          SDValue &SLC) const;
     132             :   bool SelectMUBUFScratchOffen(SDNode *Parent,
     133             :                                SDValue Addr, SDValue &RSrc, SDValue &VAddr,
     134             :                                SDValue &SOffset, SDValue &ImmOffset) const;
     135             :   bool SelectMUBUFScratchOffset(SDNode *Parent,
     136             :                                 SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
     137             :                                 SDValue &Offset) const;
     138             : 
     139             :   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
     140             :                          SDValue &Offset, SDValue &GLC, SDValue &SLC,
     141             :                          SDValue &TFE) const;
     142             :   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
     143             :                          SDValue &Offset, SDValue &SLC) const;
     144             :   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
     145             :                          SDValue &Offset) const;
     146             :   bool SelectMUBUFConstant(SDValue Constant,
     147             :                            SDValue &SOffset,
     148             :                            SDValue &ImmOffset) const;
     149             :   bool SelectMUBUFIntrinsicOffset(SDValue Offset, SDValue &SOffset,
     150             :                                   SDValue &ImmOffset) const;
     151             :   bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset,
     152             :                                    SDValue &ImmOffset, SDValue &VOffset) const;
     153             : 
     154             :   bool SelectFlatAtomic(SDValue Addr, SDValue &VAddr,
     155             :                         SDValue &Offset, SDValue &SLC) const;
     156             :   bool SelectFlatAtomicSigned(SDValue Addr, SDValue &VAddr,
     157             :                               SDValue &Offset, SDValue &SLC) const;
     158             : 
     159             :   template <bool IsSigned>
     160             :   bool SelectFlatOffset(SDValue Addr, SDValue &VAddr,
     161             :                         SDValue &Offset, SDValue &SLC) const;
     162             : 
     163             :   bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
     164             :                         bool &Imm) const;
     165             :   SDValue Expand32BitAddress(SDValue Addr) const;
     166             :   bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
     167             :                   bool &Imm) const;
     168             :   bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
     169             :   bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
     170             :   bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
     171             :   bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
     172             :   bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
     173             :   bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
     174             : 
     175             :   bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
     176             :   bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods) const;
     177             :   bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
     178             :   bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
     179             :   bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
     180             :                        SDValue &Clamp, SDValue &Omod) const;
     181             :   bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
     182             :                          SDValue &Clamp, SDValue &Omod) const;
     183             : 
     184             :   bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods,
     185             :                                  SDValue &Clamp,
     186             :                                  SDValue &Omod) const;
     187             : 
     188             :   bool SelectVOP3OMods(SDValue In, SDValue &Src,
     189             :                        SDValue &Clamp, SDValue &Omod) const;
     190             : 
     191             :   bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
     192             :   bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
     193             :                         SDValue &Clamp) const;
     194             : 
     195             :   bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
     196             :   bool SelectVOP3OpSel0(SDValue In, SDValue &Src, SDValue &SrcMods,
     197             :                         SDValue &Clamp) const;
     198             : 
     199             :   bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
     200             :   bool SelectVOP3OpSelMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
     201             :                             SDValue &Clamp) const;
     202             :   bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const;
     203             :   bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
     204             : 
     205             :   bool SelectHi16Elt(SDValue In, SDValue &Src) const;
     206             : 
     207             :   void SelectADD_SUB_I64(SDNode *N);
     208             :   void SelectUADDO_USUBO(SDNode *N);
     209             :   void SelectDIV_SCALE(SDNode *N);
     210             :   void SelectMAD_64_32(SDNode *N);
     211             :   void SelectFMA_W_CHAIN(SDNode *N);
     212             :   void SelectFMUL_W_CHAIN(SDNode *N);
     213             : 
     214             :   SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val,
     215             :                    uint32_t Offset, uint32_t Width);
     216             :   void SelectS_BFEFromShifts(SDNode *N);
     217             :   void SelectS_BFE(SDNode *N);
     218             :   bool isCBranchSCC(const SDNode *N) const;
     219             :   void SelectBRCOND(SDNode *N);
     220             :   void SelectFMAD(SDNode *N);
     221             :   void SelectATOMIC_CMP_SWAP(SDNode *N);
     222             : 
     223             : protected:
     224             :   // Include the pieces autogenerated from the target description.
     225             : #include "AMDGPUGenDAGISel.inc"
     226             : };
     227             : 
     228         548 : class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
     229             : public:
     230         275 :   explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) :
     231         275 :       AMDGPUDAGToDAGISel(TM, OptLevel) {}
     232             : 
     233             :   void Select(SDNode *N) override;
     234             : 
     235             :   bool SelectADDRIndirect(SDValue Addr, SDValue &Base,
     236             :                           SDValue &Offset) override;
     237             :   bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
     238             :                           SDValue &Offset) override;
     239             : };
     240             : 
     241             : }  // end anonymous namespace
     242             : 
     243       75276 : INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "isel",
     244             :                       "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
     245       75276 : INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo)
     246      354826 : INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "isel",
     247             :                     "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
     248             : 
     249             : /// \brief This pass converts a legalized DAG into a AMDGPU-specific
     250             : // DAG, ready for instruction scheduling.
     251        1682 : FunctionPass *llvm::createAMDGPUISelDag(TargetMachine *TM,
     252             :                                         CodeGenOpt::Level OptLevel) {
     253        1682 :   return new AMDGPUDAGToDAGISel(TM, OptLevel);
     254             : }
     255             : 
     256             : /// \brief This pass converts a legalized DAG into a R600-specific
     257             : // DAG, ready for instruction scheduling.
     258         275 : FunctionPass *llvm::createR600ISelDag(TargetMachine *TM,
     259             :                                       CodeGenOpt::Level OptLevel) {
     260         550 :   return new R600DAGToDAGISel(TM, OptLevel);
     261             : }
     262             : 
     263       18756 : bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
     264       18756 :   Subtarget = &MF.getSubtarget<AMDGPUSubtarget>();
     265       18756 :   return SelectionDAGISel::runOnMachineFunction(MF);
     266             : }
     267             : 
     268         492 : bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const {
     269         492 :   if (TM.Options.NoNaNsFPMath)
     270             :     return true;
     271             : 
     272             :   // TODO: Move into isKnownNeverNaN
     273         114 :   if (N->getFlags().isDefined())
     274         102 :     return N->getFlags().hasNoNaNs();
     275             : 
     276          12 :   return CurDAG->isKnownNeverNaN(N);
     277             : }
     278             : 
     279        5437 : bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const {
     280             :   const SIInstrInfo *TII
     281        5437 :     = static_cast<const SISubtarget *>(Subtarget)->getInstrInfo();
     282             : 
     283             :   if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
     284        9868 :     return TII->isInlineConstant(C->getAPIntValue());
     285             : 
     286             :   if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
     287        1431 :     return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
     288             : 
     289             :   return false;
     290             : }
     291             : 
     292             : /// \brief Determine the register class for \p OpNo
     293             : /// \returns The register class of the virtual register that will be used for
     294             : /// the given operand number \OpNo or NULL if the register class cannot be
     295             : /// determined.
     296       22498 : const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
     297             :                                                           unsigned OpNo) const {
     298       22498 :   if (!N->isMachineOpcode()) {
     299         636 :     if (N->getOpcode() == ISD::CopyToReg) {
     300        1272 :       unsigned Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
     301         636 :       if (TargetRegisterInfo::isVirtualRegister(Reg)) {
     302         319 :         MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo();
     303             :         return MRI.getRegClass(Reg);
     304             :       }
     305             : 
     306             :       const SIRegisterInfo *TRI
     307         317 :         = static_cast<const SISubtarget *>(Subtarget)->getRegisterInfo();
     308         317 :       return TRI->getPhysRegClass(Reg);
     309             :     }
     310             : 
     311             :     return nullptr;
     312             :   }
     313             : 
     314       21862 :   switch (N->getMachineOpcode()) {
     315       20177 :   default: {
     316             :     const MCInstrDesc &Desc =
     317       40354 :         Subtarget->getInstrInfo()->get(N->getMachineOpcode());
     318       40354 :     unsigned OpIdx = Desc.getNumDefs() + OpNo;
     319       40354 :     if (OpIdx >= Desc.getNumOperands())
     320             :       return nullptr;
     321       20177 :     int RegClass = Desc.OpInfo[OpIdx].RegClass;
     322       20177 :     if (RegClass == -1)
     323             :       return nullptr;
     324             : 
     325       20162 :     return Subtarget->getRegisterInfo()->getRegClass(RegClass);
     326             :   }
     327        1685 :   case AMDGPU::REG_SEQUENCE: {
     328        3370 :     unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
     329             :     const TargetRegisterClass *SuperRC =
     330        1685 :         Subtarget->getRegisterInfo()->getRegClass(RCID);
     331             : 
     332        3370 :     SDValue SubRegOp = N->getOperand(OpNo + 1);
     333        3370 :     unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
     334        1685 :     return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
     335        1685 :                                                               SubRegIdx);
     336             :   }
     337             :   }
     338             : }
     339             : 
     340       79565 : SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
     341       88877 :   if (cast<MemSDNode>(N)->getAddressSpace() != AMDGPUASI.LOCAL_ADDRESS ||
     342        9312 :       !Subtarget->ldsRequiresM0Init())
     343             :     return N;
     344             : 
     345             :   const SITargetLowering& Lowering =
     346        7120 :       *static_cast<const SITargetLowering*>(getTargetLowering());
     347             : 
     348             :   // Write max value to m0 before each load operation
     349             : 
     350       14240 :   SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N),
     351       21360 :                                  CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
     352             : 
     353        7120 :   SDValue Glue = M0.getValue(1);
     354             : 
     355             :   SmallVector <SDValue, 8> Ops;
     356       64688 :   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
     357       50448 :      Ops.push_back(N->getOperand(i));
     358             :   }
     359        7120 :   Ops.push_back(Glue);
     360       28480 :   return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
     361             : }
     362             : 
     363             : static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
     364       15246 :   switch (NumVectorElts) {
     365             :   case 1:
     366             :     return AMDGPU::SReg_32_XM0RegClassID;
     367        8228 :   case 2:
     368             :     return AMDGPU::SReg_64RegClassID;
     369        6618 :   case 4:
     370             :     return AMDGPU::SReg_128RegClassID;
     371         366 :   case 8:
     372             :     return AMDGPU::SReg_256RegClassID;
     373          34 :   case 16:
     374             :     return AMDGPU::SReg_512RegClassID;
     375             :   }
     376             : 
     377           0 :   llvm_unreachable("invalid vector size");
     378             : }
     379             : 
     380         547 : static bool getConstantValue(SDValue N, uint32_t &Out) {
     381             :   if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
     382         244 :     Out = C->getAPIntValue().getZExtValue();
     383             :     return true;
     384             :   }
     385             : 
     386             :   if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
     387         764 :     Out = C->getValueAPF().bitcastToAPInt().getZExtValue();
     388             :     return true;
     389             :   }
     390             : 
     391             :   return false;
     392             : }
     393             : 
     394       17661 : void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
     395       35322 :   EVT VT = N->getValueType(0);
     396       17661 :   unsigned NumVectorElts = VT.getVectorNumElements();
     397       17661 :   EVT EltVT = VT.getVectorElementType();
     398       17661 :   const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo();
     399             :   SDLoc DL(N);
     400       35322 :   SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
     401             : 
     402       17661 :   if (NumVectorElts == 1) {
     403           0 :     CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
     404             :                          RegClass);
     405             :     return;
     406             :   }
     407             : 
     408             :   assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
     409             :                                   "supported yet");
     410             :   // 16 = Max Num Vector Elements
     411             :   // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
     412             :   // 1 = Vector Register Class
     413       35322 :   SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
     414             : 
     415       35322 :   RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
     416             :   bool IsRegSeq = true;
     417       17661 :   unsigned NOps = N->getNumOperands();
     418      128217 :   for (unsigned i = 0; i < NOps; i++) {
     419             :     // XXX: Why is this here?
     420      110556 :     if (isa<RegisterSDNode>(N->getOperand(i))) {
     421             :       IsRegSeq = false;
     422             :       break;
     423             :     }
     424      110556 :     RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
     425      110556 :     RegSeqArgs[1 + (2 * i) + 1] =
     426       55278 :             CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL,
     427       55278 :                                       MVT::i32);
     428             :   }
     429       17661 :   if (NOps != NumVectorElts) {
     430             :     // Fill in the missing undef elements if this was a scalar_to_vector.
     431             :     assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
     432           4 :     MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
     433           4 :                                                    DL, EltVT);
     434          12 :     for (unsigned i = NOps; i < NumVectorElts; ++i) {
     435           8 :       RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
     436           8 :       RegSeqArgs[1 + (2 * i) + 1] =
     437           4 :         CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32);
     438             :     }
     439             :   }
     440             : 
     441       17661 :   if (!IsRegSeq)
     442             :     SelectCode(N);
     443       52983 :   CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
     444             : }
     445             : 
     446      446560 : void AMDGPUDAGToDAGISel::Select(SDNode *N) {
     447      446560 :   unsigned int Opc = N->getOpcode();
     448      446560 :   if (N->isMachineOpcode()) {
     449             :     N->setNodeId(-1);
     450             :     return;   // Already selected.
     451             :   }
     452             : 
     453      890409 :   if (isa<AtomicSDNode>(N) ||
     454             :       (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC ||
     455             :        Opc == AMDGPUISD::ATOMIC_LOAD_FADD ||
     456      444385 :        Opc == AMDGPUISD::ATOMIC_LOAD_FMIN ||
     457             :        Opc == AMDGPUISD::ATOMIC_LOAD_FMAX))
     458        1884 :     N = glueCopyToM0(N);
     459             : 
     460      446024 :   switch (Opc) {
     461             :   default:
     462             :     break;
     463             :   // We are selecting i64 ADD here instead of custom lower it during
     464             :   // DAG legalization, so we can fold some i64 ADDs used for address
     465             :   // calculation into the LOAD and STORE instructions.
     466             :   case ISD::ADDC:
     467             :   case ISD::ADDE:
     468             :   case ISD::SUBC:
     469             :   case ISD::SUBE: {
     470         240 :     if (N->getValueType(0) != MVT::i64)
     471             :       break;
     472             : 
     473         126 :     SelectADD_SUB_I64(N);
     474         126 :     return;
     475             :   }
     476         203 :   case ISD::UADDO:
     477             :   case ISD::USUBO: {
     478         203 :     SelectUADDO_USUBO(N);
     479         203 :     return;
     480             :   }
     481          45 :   case AMDGPUISD::FMUL_W_CHAIN: {
     482          45 :     SelectFMUL_W_CHAIN(N);
     483          45 :     return;
     484             :   }
     485         225 :   case AMDGPUISD::FMA_W_CHAIN: {
     486         225 :     SelectFMA_W_CHAIN(N);
     487         225 :     return;
     488             :   }
     489             : 
     490       15623 :   case ISD::SCALAR_TO_VECTOR:
     491             :   case ISD::BUILD_VECTOR: {
     492       31246 :     EVT VT = N->getValueType(0);
     493       15623 :     unsigned NumVectorElts = VT.getVectorNumElements();
     494             : 
     495             :     if (VT == MVT::v2i16 || VT == MVT::v2f16) {
     496         377 :       if (Opc == ISD::BUILD_VECTOR) {
     497             :         uint32_t LHSVal, RHSVal;
     498         547 :         if (getConstantValue(N->getOperand(0), LHSVal) &&
     499         170 :             getConstantValue(N->getOperand(1), RHSVal)) {
     500         143 :           uint32_t K = LHSVal | (RHSVal << 16);
     501         429 :           CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, VT,
     502         143 :                                CurDAG->getTargetConstant(K, SDLoc(N), MVT::i32));
     503         143 :           return;
     504             :         }
     505             :       }
     506             : 
     507         234 :       break;
     508             :     }
     509             : 
     510             :     assert(VT.getVectorElementType().bitsEq(MVT::i32));
     511             :     unsigned RegClassID = selectSGPRVectorRegClassID(NumVectorElts);
     512       15246 :     SelectBuildVector(N, RegClassID);
     513       15246 :     return;
     514             :   }
     515        1794 :   case ISD::BUILD_PAIR: {
     516             :     SDValue RC, SubReg0, SubReg1;
     517             :     SDLoc DL(N);
     518        1794 :     if (N->getValueType(0) == MVT::i128) {
     519           0 :       RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32);
     520           0 :       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
     521           0 :       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
     522        1794 :     } else if (N->getValueType(0) == MVT::i64) {
     523        1794 :       RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
     524        1794 :       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
     525        1794 :       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
     526             :     } else {
     527           0 :       llvm_unreachable("Unhandled value type for BUILD_PAIR");
     528             :     }
     529        1794 :     const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
     530        3588 :                             N->getOperand(1), SubReg1 };
     531        7176 :     ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
     532             :                                           N->getValueType(0), Ops));
     533             :     return;
     534             :   }
     535             : 
     536       23874 :   case ISD::Constant:
     537             :   case ISD::ConstantFP: {
     538       70429 :     if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
     539             :       break;
     540             : 
     541             :     uint64_t Imm;
     542             :     if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
     543         219 :       Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
     544             :     else {
     545             :       ConstantSDNode *C = cast<ConstantSDNode>(N);
     546        1120 :       Imm = C->getZExtValue();
     547             :     }
     548             : 
     549             :     SDLoc DL(N);
     550        3579 :     SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
     551             :                                 CurDAG->getConstant(Imm & 0xFFFFFFFF, DL,
     552        1193 :                                                     MVT::i32));
     553        3579 :     SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
     554        1193 :                                 CurDAG->getConstant(Imm >> 32, DL, MVT::i32));
     555             :     const SDValue Ops[] = {
     556        1193 :       CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
     557        1193 :       SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
     558        1193 :       SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
     559        3579 :     };
     560             : 
     561        4772 :     ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
     562             :                                           N->getValueType(0), Ops));
     563             :     return;
     564             :   }
     565       77681 :   case ISD::LOAD:
     566             :   case ISD::STORE: {
     567       77681 :     N = glueCopyToM0(N);
     568       77681 :     break;
     569             :   }
     570             : 
     571         148 :   case AMDGPUISD::BFE_I32:
     572             :   case AMDGPUISD::BFE_U32: {
     573             :     // There is a scalar version available, but unlike the vector version which
     574             :     // has a separate operand for the offset and width, the scalar version packs
     575             :     // the width and offset into a single operand. Try to move to the scalar
     576             :     // version if the offsets are constant, so that we can try to keep extended
     577             :     // loads of kernel arguments in SGPRs.
     578             : 
     579             :     // TODO: Technically we could try to pattern match scalar bitshifts of
     580             :     // dynamic values, but it's probably not useful.
     581         148 :     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
     582             :     if (!Offset)
     583             :       break;
     584             : 
     585             :     ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
     586             :     if (!Width)
     587             :       break;
     588             : 
     589             :     bool Signed = Opc == AMDGPUISD::BFE_I32;
     590             : 
     591         264 :     uint32_t OffsetVal = Offset->getZExtValue();
     592         264 :     uint32_t WidthVal = Width->getZExtValue();
     593             : 
     594         132 :     ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
     595         132 :                             SDLoc(N), N->getOperand(0), OffsetVal, WidthVal));
     596         132 :     return;
     597             :   }
     598         253 :   case AMDGPUISD::DIV_SCALE: {
     599         253 :     SelectDIV_SCALE(N);
     600         253 :     return;
     601             :   }
     602          20 :   case AMDGPUISD::MAD_I64_I32:
     603             :   case AMDGPUISD::MAD_U64_U32: {
     604          20 :     SelectMAD_64_32(N);
     605          20 :     return;
     606             :   }
     607       11032 :   case ISD::CopyToReg: {
     608             :     const SITargetLowering& Lowering =
     609       11032 :       *static_cast<const SITargetLowering*>(getTargetLowering());
     610       11032 :     N = Lowering.legalizeTargetIndependentNode(N, *CurDAG);
     611       11032 :     break;
     612             :   }
     613             :   case ISD::AND:
     614             :   case ISD::SRL:
     615             :   case ISD::SRA:
     616             :   case ISD::SIGN_EXTEND_INREG:
     617       22613 :     if (N->getValueType(0) != MVT::i32)
     618             :       break;
     619             : 
     620       16347 :     SelectS_BFE(N);
     621       16347 :     return;
     622         546 :   case ISD::BRCOND:
     623         546 :     SelectBRCOND(N);
     624         546 :     return;
     625        1549 :   case ISD::FMAD:
     626        1549 :     SelectFMAD(N);
     627        1549 :     return;
     628         195 :   case AMDGPUISD::ATOMIC_CMP_SWAP:
     629         195 :     SelectATOMIC_CMP_SWAP(N);
     630         195 :     return;
     631             :   }
     632             : 
     633             :   SelectCode(N);
     634             : }
     635             : 
     636        6797 : bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
     637       13594 :   if (!N->readMem())
     638             :     return false;
     639        6797 :   if (CbId == -1)
     640           0 :     return N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
     641             :            N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT;
     642             : 
     643        6797 :   return N->getAddressSpace() == AMDGPUASI.CONSTANT_BUFFER_0 + CbId;
     644             : }
     645             : 
     646         363 : bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
     647         363 :   const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
     648         363 :   const Instruction *Term = BB->getTerminator();
     649         362 :   return Term->getMetadata("amdgpu.uniform") ||
     650         363 :          Term->getMetadata("structurizecfg.uniform");
     651             : }
     652             : 
     653           0 : StringRef AMDGPUDAGToDAGISel::getPassName() const {
     654           0 :   return "AMDGPU DAG->DAG Pattern Instruction Selection";
     655             : }
     656             : 
     657             : //===----------------------------------------------------------------------===//
     658             : // Complex Patterns
     659             : //===----------------------------------------------------------------------===//
     660             : 
     661        6298 : bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
     662             :                                                          SDValue& IntPtr) {
     663             :   if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
     664       25192 :     IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
     665        6298 :                                        true);
     666             :     return true;
     667             :   }
     668             :   return false;
     669             : }
     670             : 
     671           0 : bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
     672             :     SDValue& BaseReg, SDValue &Offset) {
     673             :   if (!isa<ConstantSDNode>(Addr)) {
     674           0 :     BaseReg = Addr;
     675           0 :     Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
     676             :     return true;
     677             :   }
     678             :   return false;
     679             : }
     680             : 
     681           0 : bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
     682             :                                             SDValue &Offset) {
     683           0 :   return false;
     684             : }
     685             : 
     686           0 : bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
     687             :                                             SDValue &Offset) {
     688             :   ConstantSDNode *C;
     689             :   SDLoc DL(Addr);
     690             : 
     691             :   if ((C = dyn_cast<ConstantSDNode>(Addr))) {
     692           0 :     Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
     693           0 :     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
     694           0 :   } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
     695             :              (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
     696           0 :     Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
     697           0 :     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
     698           0 :   } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
     699             :             (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
     700           0 :     Base = Addr.getOperand(0);
     701           0 :     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
     702             :   } else {
     703           0 :     Base = Addr;
     704           0 :     Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
     705             :   }
     706             : 
     707           0 :   return true;
     708             : }
     709             : 
     710             : // FIXME: Should only handle addcarry/subcarry
     711         126 : void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
     712             :   SDLoc DL(N);
     713         126 :   SDValue LHS = N->getOperand(0);
     714         126 :   SDValue RHS = N->getOperand(1);
     715             : 
     716         126 :   unsigned Opcode = N->getOpcode();
     717         126 :   bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE);
     718             :   bool ProduceCarry =
     719         126 :       ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC;
     720         126 :   bool IsAdd = Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE;
     721             : 
     722         252 :   SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
     723         252 :   SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
     724             : 
     725         252 :   SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
     726         126 :                                        DL, MVT::i32, LHS, Sub0);
     727         252 :   SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
     728         126 :                                        DL, MVT::i32, LHS, Sub1);
     729             : 
     730         252 :   SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
     731         126 :                                        DL, MVT::i32, RHS, Sub0);
     732         252 :   SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
     733         126 :                                        DL, MVT::i32, RHS, Sub1);
     734             : 
     735         252 :   SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
     736             : 
     737         126 :   unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
     738         126 :   unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
     739             : 
     740             :   SDNode *AddLo;
     741         126 :   if (!ConsumeCarry) {
     742             :     SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
     743         240 :     AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args);
     744             :   } else {
     745           6 :     SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) };
     746          12 :     AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args);
     747             :   }
     748             :   SDValue AddHiArgs[] = {
     749             :     SDValue(Hi0, 0),
     750             :     SDValue(Hi1, 0),
     751             :     SDValue(AddLo, 1)
     752             :   };
     753         252 :   SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs);
     754             : 
     755             :   SDValue RegSequenceArgs[] = {
     756         126 :     CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
     757             :     SDValue(AddLo,0),
     758             :     Sub0,
     759             :     SDValue(AddHi,0),
     760             :     Sub1,
     761         378 :   };
     762         252 :   SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
     763         126 :                                                MVT::i64, RegSequenceArgs);
     764             : 
     765         126 :   if (ProduceCarry) {
     766             :     // Replace the carry-use
     767         252 :     CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(AddHi, 1));
     768             :   }
     769             : 
     770             :   // Replace the remaining uses.
     771         126 :   CurDAG->ReplaceAllUsesWith(N, RegSequence);
     772         126 :   CurDAG->RemoveDeadNode(N);
     773         126 : }
     774             : 
     775         203 : void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
     776             :   // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
     777             :   // carry out despite the _i32 name. These were renamed in VI to _U32.
     778             :   // FIXME: We should probably rename the opcodes here.
     779         203 :   unsigned Opc = N->getOpcode() == ISD::UADDO ?
     780             :     AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
     781             : 
     782         812 :   CurDAG->SelectNodeTo(N, Opc, N->getVTList(),
     783         203 :                        { N->getOperand(0), N->getOperand(1) });
     784         203 : }
     785             : 
     786         225 : void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
     787             :   SDLoc SL(N);
     788             :   //  src0_modifiers, src0,  src1_modifiers, src1, src2_modifiers, src2, clamp, omod
     789         225 :   SDValue Ops[10];
     790             : 
     791         225 :   SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
     792         225 :   SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
     793         225 :   SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]);
     794         225 :   Ops[8] = N->getOperand(0);
     795         225 :   Ops[9] = N->getOperand(4);
     796             : 
     797         675 :   CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32, N->getVTList(), Ops);
     798         225 : }
     799             : 
     800          45 : void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {
     801             :   SDLoc SL(N);
     802             :   //    src0_modifiers, src0,  src1_modifiers, src1, clamp, omod
     803          45 :   SDValue Ops[8];
     804             : 
     805          45 :   SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
     806          45 :   SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
     807          45 :   Ops[6] = N->getOperand(0);
     808          45 :   Ops[7] = N->getOperand(3);
     809             : 
     810         135 :   CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops);
     811          45 : }
     812             : 
     813             : // We need to handle this here because tablegen doesn't support matching
     814             : // instructions with multiple outputs.
     815         253 : void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
     816             :   SDLoc SL(N);
     817         253 :   EVT VT = N->getValueType(0);
     818             : 
     819             :   assert(VT == MVT::f32 || VT == MVT::f64);
     820             : 
     821             :   unsigned Opc
     822             :     = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
     823             : 
     824         253 :   SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
     825         759 :   CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
     826         253 : }
     827             : 
     828             : // We need to handle this here because tablegen doesn't support matching
     829             : // instructions with multiple outputs.
     830          20 : void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
     831             :   SDLoc SL(N);
     832          20 :   bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32;
     833          20 :   unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32 : AMDGPU::V_MAD_U64_U32;
     834             : 
     835          20 :   SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
     836          20 :   SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
     837          20 :                     Clamp };
     838          60 :   CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
     839          20 : }
     840             : 
     841        6582 : bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
     842             :                                          unsigned OffsetBits) const {
     843        6582 :   if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
     844         340 :       (OffsetBits == 8 && !isUInt<8>(Offset)))
     845             :     return false;
     846             : 
     847        7915 :   if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS ||
     848        1750 :       Subtarget->unsafeDSOffsetFoldingEnabled())
     849             :     return true;
     850             : 
     851             :   // On Southern Islands instruction with a negative base value and an offset
     852             :   // don't seem to work.
     853        1746 :   return CurDAG->SignBitIsZero(Base);
     854             : }
     855             : 
     856        8873 : bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
     857             :                                               SDValue &Offset) const {
     858             :   SDLoc DL(Addr);
     859        8873 :   if (CurDAG->isBaseWithConstantOffset(Addr)) {
     860        6226 :     SDValue N0 = Addr.getOperand(0);
     861        6226 :     SDValue N1 = Addr.getOperand(1);
     862             :     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
     863       12452 :     if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) {
     864             :       // (add n0, c0)
     865        5781 :       Base = N0;
     866       11562 :       Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
     867        5781 :       return true;
     868             :     }
     869        2647 :   } else if (Addr.getOpcode() == ISD::SUB) {
     870             :     // sub C, x -> add (sub 0, x), C
     871             :     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
     872          18 :       int64_t ByteOffset = C->getSExtValue();
     873          18 :       if (isUInt<16>(ByteOffset)) {
     874          32 :         SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
     875             : 
     876             :         // XXX - This is kind of hacky. Create a dummy sub node so we can check
     877             :         // the known bits in isDSOffsetLegal. We need to emit the selected node
     878             :         // here, so this is thrown away.
     879          16 :         SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
     880          16 :                                       Zero, Addr.getOperand(1));
     881             : 
     882          16 :         if (isDSOffsetLegal(Sub, ByteOffset, 16)) {
     883             :           // FIXME: Select to VOP3 version for with-carry.
     884          14 :           unsigned SubOp = Subtarget->hasAddNoCarry() ?
     885             :             AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32;
     886             : 
     887             :           MachineSDNode *MachineSub
     888          28 :             = CurDAG->getMachineNode(SubOp, DL, MVT::i32,
     889          14 :                                      Zero, Addr.getOperand(1));
     890             : 
     891          14 :           Base = SDValue(MachineSub, 0);
     892          28 :           Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
     893          14 :           return true;
     894             :         }
     895             :       }
     896             :     }
     897             :   } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
     898             :     // If we have a constant address, prefer to put the constant into the
     899             :     // offset. This can save moves to load the constant address since multiple
     900             :     // operations can share the zero base address register, and enables merging
     901             :     // into read2 / write2 instructions.
     902             : 
     903             :     SDLoc DL(Addr);
     904             : 
     905        1460 :     if (isUInt<16>(CAddr->getZExtValue())) {
     906        1452 :       SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
     907        1452 :       MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
     908         726 :                                  DL, MVT::i32, Zero);
     909         726 :       Base = SDValue(MovZero, 0);
     910        1452 :       Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
     911             :       return true;
     912             :     }
     913             :   }
     914             : 
     915             :   // default case
     916        2352 :   Base = Addr;
     917        7056 :   Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
     918        2352 :   return true;
     919             : }
     920             : 
     921             : // TODO: If offset is too big, put low 16-bit into offset.
     922         469 : bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
     923             :                                                    SDValue &Offset0,
     924             :                                                    SDValue &Offset1) const {
     925             :   SDLoc DL(Addr);
     926             : 
     927         469 :   if (CurDAG->isBaseWithConstantOffset(Addr)) {
     928         336 :     SDValue N0 = Addr.getOperand(0);
     929         336 :     SDValue N1 = Addr.getOperand(1);
     930             :     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
     931         672 :     unsigned DWordOffset0 = C1->getZExtValue() / 4;
     932         336 :     unsigned DWordOffset1 = DWordOffset0 + 1;
     933             :     // (add n0, c0)
     934         336 :     if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
     935         318 :       Base = N0;
     936         636 :       Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
     937         636 :       Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
     938         318 :       return true;
     939             :     }
     940         133 :   } else if (Addr.getOpcode() == ISD::SUB) {
     941             :     // sub C, x -> add (sub 0, x), C
     942             :     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
     943           8 :       unsigned DWordOffset0 = C->getZExtValue() / 4;
     944           4 :       unsigned DWordOffset1 = DWordOffset0 + 1;
     945             : 
     946           4 :       if (isUInt<8>(DWordOffset0)) {
     947             :         SDLoc DL(Addr);
     948           8 :         SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
     949             : 
     950             :         // XXX - This is kind of hacky. Create a dummy sub node so we can check
     951             :         // the known bits in isDSOffsetLegal. We need to emit the selected node
     952             :         // here, so this is thrown away.
     953           4 :         SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
     954           4 :                                       Zero, Addr.getOperand(1));
     955             : 
     956           4 :         if (isDSOffsetLegal(Sub, DWordOffset1, 8)) {
     957           2 :           unsigned SubOp = Subtarget->hasAddNoCarry() ?
     958             :             AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32;
     959             : 
     960             :           MachineSDNode *MachineSub
     961           4 :             = CurDAG->getMachineNode(SubOp, DL, MVT::i32,
     962           2 :                                      Zero, Addr.getOperand(1));
     963             : 
     964           2 :           Base = SDValue(MachineSub, 0);
     965           4 :           Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
     966           4 :           Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
     967             :           return true;
     968             :         }
     969             :       }
     970             :     }
     971             :   } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
     972          48 :     unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
     973          24 :     unsigned DWordOffset1 = DWordOffset0 + 1;
     974             :     assert(4 * DWordOffset0 == CAddr->getZExtValue());
     975             : 
     976          24 :     if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
     977          32 :       SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
     978             :       MachineSDNode *MovZero
     979          32 :         = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
     980          16 :                                  DL, MVT::i32, Zero);
     981          16 :       Base = SDValue(MovZero, 0);
     982          32 :       Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
     983          32 :       Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
     984             :       return true;
     985             :     }
     986             :   }
     987             : 
     988             :   // default case
     989             : 
     990             :   // FIXME: This is broken on SI where we still need to check if the base
     991             :   // pointer is positive here.
     992         133 :   Base = Addr;
     993         266 :   Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
     994         266 :   Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
     995         133 :   return true;
     996             : }
     997             : 
     998       44334 : bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
     999             :                                      SDValue &VAddr, SDValue &SOffset,
    1000             :                                      SDValue &Offset, SDValue &Offen,
    1001             :                                      SDValue &Idxen, SDValue &Addr64,
    1002             :                                      SDValue &GLC, SDValue &SLC,
    1003             :                                      SDValue &TFE) const {
    1004             :   // Subtarget prefers to use flat instruction
    1005       44334 :   if (Subtarget->useFlatForGlobal())
    1006             :     return false;
    1007             : 
    1008             :   SDLoc DL(Addr);
    1009             : 
    1010       33971 :   if (!GLC.getNode())
    1011       67942 :     GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
    1012       33971 :   if (!SLC.getNode())
    1013       67528 :     SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
    1014       67942 :   TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
    1015             : 
    1016       67942 :   Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
    1017       67942 :   Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
    1018       67942 :   Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
    1019       67942 :   SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
    1020             : 
    1021       33971 :   if (CurDAG->isBaseWithConstantOffset(Addr)) {
    1022       11417 :     SDValue N0 = Addr.getOperand(0);
    1023       11417 :     SDValue N1 = Addr.getOperand(1);
    1024             :     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
    1025             : 
    1026       11417 :     if (N0.getOpcode() == ISD::ADD) {
    1027             :       // (add (add N2, N3), C1) -> addr64
    1028        1605 :       SDValue N2 = N0.getOperand(0);
    1029        1605 :       SDValue N3 = N0.getOperand(1);
    1030        3210 :       Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
    1031        1605 :       Ptr = N2;
    1032        1605 :       VAddr = N3;
    1033             :     } else {
    1034             :       // (add N0, C1) -> offset
    1035       19624 :       VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
    1036        9812 :       Ptr = N0;
    1037             :     }
    1038             : 
    1039       22834 :     if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) {
    1040       22244 :       Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
    1041             :       return true;
    1042             :     }
    1043             : 
    1044         295 :     if (isUInt<32>(C1->getZExtValue())) {
    1045             :       // Illegal offset, store it in soffset.
    1046         570 :       Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
    1047         570 :       SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
    1048             :                    CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
    1049             :                         0);
    1050             :       return true;
    1051             :     }
    1052             :   }
    1053             : 
    1054       22564 :   if (Addr.getOpcode() == ISD::ADD) {
    1055             :     // (add N0, N1) -> addr64
    1056        3412 :     SDValue N0 = Addr.getOperand(0);
    1057        3412 :     SDValue N1 = Addr.getOperand(1);
    1058        6824 :     Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
    1059        3412 :     Ptr = N0;
    1060        3412 :     VAddr = N1;
    1061        6824 :     Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
    1062             :     return true;
    1063             :   }
    1064             : 
    1065             :   // default case -> offset
    1066       38304 :   VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
    1067       19152 :   Ptr = Addr;
    1068       38304 :   Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
    1069             : 
    1070             :   return true;
    1071             : }
    1072             : 
    1073       29917 : bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
    1074             :                                            SDValue &VAddr, SDValue &SOffset,
    1075             :                                            SDValue &Offset, SDValue &GLC,
    1076             :                                            SDValue &SLC, SDValue &TFE) const {
    1077       29917 :   SDValue Ptr, Offen, Idxen, Addr64;
    1078             : 
    1079             :   // addr64 bit was removed for volcanic islands.
    1080       29917 :   if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
    1081             :     return false;
    1082             : 
    1083       16975 :   if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
    1084             :               GLC, SLC, TFE))
    1085             :     return false;
    1086             : 
    1087             :   ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
    1088       29522 :   if (C->getSExtValue()) {
    1089             :     SDLoc DL(Addr);
    1090             : 
    1091             :     const SITargetLowering& Lowering =
    1092        4097 :       *static_cast<const SITargetLowering*>(getTargetLowering());
    1093             : 
    1094        4097 :     SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
    1095             :     return true;
    1096             :   }
    1097             : 
    1098             :   return false;
    1099             : }
    1100             : 
    1101         581 : bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
    1102             :                                            SDValue &VAddr, SDValue &SOffset,
    1103             :                                            SDValue &Offset,
    1104             :                                            SDValue &SLC) const {
    1105        1743 :   SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
    1106         581 :   SDValue GLC, TFE;
    1107             : 
    1108         581 :   return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE);
    1109             : }
    1110             : 
    1111             : static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
    1112             :   auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>();
    1113          89 :   return PSV && PSV->isStack();
    1114             : }
    1115             : 
    1116        6162 : std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
    1117        6162 :   const MachineFunction &MF = CurDAG->getMachineFunction();
    1118             :   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
    1119             : 
    1120             :   if (auto FI = dyn_cast<FrameIndexSDNode>(N)) {
    1121        4980 :     SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
    1122        9960 :                                               FI->getValueType(0));
    1123             : 
    1124             :     // If we can resolve this to a frame index access, this is relative to the
    1125             :     // frame pointer SGPR.
    1126       14940 :     return std::make_pair(TFI, CurDAG->getRegister(Info->getFrameOffsetReg(),
    1127        9960 :                                                    MVT::i32));
    1128             :   }
    1129             : 
    1130             :   // If we don't know this private access is a local stack object, it needs to
    1131             :   // be relative to the entry point's scratch wave offset register.
    1132        3546 :   return std::make_pair(N, CurDAG->getRegister(Info->getScratchWaveOffsetReg(),
    1133        2364 :                                                MVT::i32));
    1134             : }
    1135             : 
    1136        6168 : bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
    1137             :                                                  SDValue Addr, SDValue &Rsrc,
    1138             :                                                  SDValue &VAddr, SDValue &SOffset,
    1139             :                                                  SDValue &ImmOffset) const {
    1140             : 
    1141             :   SDLoc DL(Addr);
    1142        6168 :   MachineFunction &MF = CurDAG->getMachineFunction();
    1143        6168 :   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
    1144             : 
    1145       12336 :   Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
    1146             : 
    1147             :   if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
    1148           6 :     unsigned Imm = CAddr->getZExtValue();
    1149             : 
    1150          12 :     SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32);
    1151          12 :     MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
    1152           6 :                                                         DL, MVT::i32, HighBits);
    1153           6 :     VAddr = SDValue(MovHighBits, 0);
    1154             : 
    1155             :     // In a call sequence, stores to the argument stack area are relative to the
    1156             :     // stack pointer.
    1157           6 :     const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
    1158           0 :     unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
    1159             :       Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg();
    1160             : 
    1161          12 :     SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
    1162          12 :     ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
    1163             :     return true;
    1164             :   }
    1165             : 
    1166        6162 :   if (CurDAG->isBaseWithConstantOffset(Addr)) {
    1167             :     // (add n0, c1)
    1168             : 
    1169        5137 :     SDValue N0 = Addr.getOperand(0);
    1170        5137 :     SDValue N1 = Addr.getOperand(1);
    1171             : 
    1172             :     // Offsets in vaddr must be positive if range checking is enabled.
    1173             :     //
    1174             :     // The total computation of vaddr + soffset + offset must not overflow.  If
    1175             :     // vaddr is negative, even if offset is 0 the sgpr offset add will end up
    1176             :     // overflowing.
    1177             :     //
    1178             :     // Prior to gfx9, MUBUF instructions with the vaddr offset enabled would
    1179             :     // always perform a range check. If a negative vaddr base index was used,
    1180             :     // this would fail the range check. The overall address computation would
    1181             :     // compute a valid address, but this doesn't happen due to the range
    1182             :     // check. For out-of-bounds MUBUF loads, a 0 is returned.
    1183             :     //
    1184             :     // Therefore it should be safe to fold any VGPR offset on gfx9 into the
    1185             :     // MUBUF vaddr, but not on older subtargets which can only do this if the
    1186             :     // sign bit is known 0.
    1187             :     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
    1188       15397 :     if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue()) &&
    1189        9040 :         (!Subtarget->privateMemoryResourceIsRangeChecked() ||
    1190        3917 :          CurDAG->SignBitIsZero(N0))) {
    1191        9082 :       std::tie(VAddr, SOffset) = foldFrameIndex(N0);
    1192        9082 :       ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
    1193        4541 :       return true;
    1194             :     }
    1195             :   }
    1196             : 
    1197             :   // (node)
    1198        3242 :   std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
    1199        3242 :   ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
    1200        1621 :   return true;
    1201             : }
    1202             : 
    1203        6343 : bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
    1204             :                                                   SDValue Addr,
    1205             :                                                   SDValue &SRsrc,
    1206             :                                                   SDValue &SOffset,
    1207             :                                                   SDValue &Offset) const {
    1208             :   ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr);
    1209         362 :   if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue()))
    1210             :     return false;
    1211             : 
    1212             :   SDLoc DL(Addr);
    1213         175 :   MachineFunction &MF = CurDAG->getMachineFunction();
    1214         175 :   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
    1215             : 
    1216         350 :   SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
    1217             : 
    1218         175 :   const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
    1219          89 :   unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
    1220             :     Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg();
    1221             : 
    1222             :   // FIXME: Get from MachinePointerInfo? We should only be using the frame
    1223             :   // offset if we know this is in a call sequence.
    1224         350 :   SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
    1225             : 
    1226         350 :   Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
    1227             :   return true;
    1228             : }
    1229             : 
    1230       27359 : bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
    1231             :                                            SDValue &SOffset, SDValue &Offset,
    1232             :                                            SDValue &GLC, SDValue &SLC,
    1233             :                                            SDValue &TFE) const {
    1234       27359 :   SDValue Ptr, VAddr, Offen, Idxen, Addr64;
    1235             :   const SIInstrInfo *TII =
    1236       27359 :     static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
    1237             : 
    1238       27359 :   if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
    1239             :               GLC, SLC, TFE))
    1240             :     return false;
    1241             : 
    1242       38420 :   if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
    1243       57630 :       !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
    1244       19210 :       !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
    1245       18290 :     uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
    1246       36580 :                     APInt::getAllOnesValue(32).getZExtValue(); // Size
    1247             :     SDLoc DL(Addr);
    1248             : 
    1249             :     const SITargetLowering& Lowering =
    1250       18290 :       *static_cast<const SITargetLowering*>(getTargetLowering());
    1251             : 
    1252       18290 :     SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
    1253             :     return true;
    1254             :   }
    1255             :   return false;
    1256             : }
    1257             : 
    1258           8 : bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
    1259             :                                            SDValue &Soffset, SDValue &Offset
    1260             :                                            ) const {
    1261           8 :   SDValue GLC, SLC, TFE;
    1262             : 
    1263           8 :   return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
    1264             : }
    1265             : bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
    1266             :                                            SDValue &Soffset, SDValue &Offset,
    1267             :                                            SDValue &SLC) const {
    1268         517 :   SDValue GLC, TFE;
    1269             : 
    1270         517 :   return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
    1271             : }
    1272             : 
    1273         434 : bool AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant,
    1274             :                                              SDValue &SOffset,
    1275             :                                              SDValue &ImmOffset) const {
    1276             :   SDLoc DL(Constant);
    1277             :   const uint32_t Align = 4;
    1278             :   const uint32_t MaxImm = alignDown(4095, Align);
    1279         868 :   uint32_t Imm = cast<ConstantSDNode>(Constant)->getZExtValue();
    1280             :   uint32_t Overflow = 0;
    1281             : 
    1282         434 :   if (Imm > MaxImm) {
    1283          16 :     if (Imm <= MaxImm + 64) {
    1284             :       // Use an SOffset inline constant for 4..64
    1285           2 :       Overflow = Imm - MaxImm;
    1286             :       Imm = MaxImm;
    1287             :     } else {
    1288             :       // Try to keep the same value in SOffset for adjacent loads, so that
    1289             :       // the corresponding register contents can be re-used.
    1290             :       //
    1291             :       // Load values with all low-bits (except for alignment bits) set into
    1292             :       // SOffset, so that a larger range of values can be covered using
    1293             :       // s_movk_i32.
    1294             :       //
    1295             :       // Atomic operations fail to work correctly when individual address
    1296             :       // components are unaligned, even if their sum is aligned.
    1297          14 :       uint32_t High = (Imm + Align) & ~4095;
    1298          14 :       uint32_t Low = (Imm + Align) & 4095;
    1299             :       Imm = Low;
    1300          14 :       Overflow = High - Align;
    1301             :     }
    1302             :   }
    1303             : 
    1304             :   // There is a hardware bug in SI and CI which prevents address clamping in
    1305             :   // MUBUF instructions from working correctly with SOffsets. The immediate
    1306             :   // offset is unaffected.
    1307          32 :   if (Overflow > 0 &&
    1308          16 :       Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
    1309             :     return false;
    1310             : 
    1311         852 :   ImmOffset = CurDAG->getTargetConstant(Imm, DL, MVT::i16);
    1312             : 
    1313         426 :   if (Overflow <= 64)
    1314         838 :     SOffset = CurDAG->getTargetConstant(Overflow, DL, MVT::i32);
    1315             :   else
    1316          14 :     SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
    1317             :                       CurDAG->getTargetConstant(Overflow, DL, MVT::i32)),
    1318             :                       0);
    1319             : 
    1320             :   return true;
    1321             : }
    1322             : 
    1323         251 : bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicOffset(SDValue Offset,
    1324             :                                                     SDValue &SOffset,
    1325             :                                                     SDValue &ImmOffset) const {
    1326             :   SDLoc DL(Offset);
    1327             : 
    1328             :   if (!isa<ConstantSDNode>(Offset))
    1329             :     return false;
    1330             : 
    1331         251 :   return SelectMUBUFConstant(Offset, SOffset, ImmOffset);
    1332             : }
    1333             : 
    1334         383 : bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset,
    1335             :                                                      SDValue &SOffset,
    1336             :                                                      SDValue &ImmOffset,
    1337             :                                                      SDValue &VOffset) const {
    1338             :   SDLoc DL(Offset);
    1339             : 
    1340             :   // Don't generate an unnecessary voffset for constant offsets.
    1341             :   if (isa<ConstantSDNode>(Offset)) {
    1342         259 :     SDValue Tmp1, Tmp2;
    1343             : 
    1344             :     // When necessary, use a voffset in <= CI anyway to work around a hardware
    1345             :     // bug.
    1346         378 :     if (Subtarget->getGeneration() > AMDGPUSubtarget::SEA_ISLANDS ||
    1347         119 :         SelectMUBUFConstant(Offset, Tmp1, Tmp2))
    1348         251 :       return false;
    1349             :   }
    1350             : 
    1351         132 :   if (CurDAG->isBaseWithConstantOffset(Offset)) {
    1352          66 :     SDValue N0 = Offset.getOperand(0);
    1353          66 :     SDValue N1 = Offset.getOperand(1);
    1354         196 :     if (cast<ConstantSDNode>(N1)->getSExtValue() >= 0 &&
    1355          64 :         SelectMUBUFConstant(N1, SOffset, ImmOffset)) {
    1356          64 :       VOffset = N0;
    1357             :       return true;
    1358             :     }
    1359             :   }
    1360             : 
    1361         136 :   SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
    1362         136 :   ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
    1363          68 :   VOffset = Offset;
    1364             : 
    1365          68 :   return true;
    1366             : }
    1367             : 
    1368             : template <bool IsSigned>
    1369       10637 : bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDValue Addr,
    1370             :                                           SDValue &VAddr,
    1371             :                                           SDValue &Offset,
    1372             :                                           SDValue &SLC) const {
    1373             :   int64_t OffsetVal = 0;
    1374             : 
    1375       13201 :   if (Subtarget->hasFlatInstOffsets() &&
    1376        2564 :       CurDAG->isBaseWithConstantOffset(Addr)) {
    1377         380 :     SDValue N0 = Addr.getOperand(0);
    1378         380 :     SDValue N1 = Addr.getOperand(1);
    1379         380 :     int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
    1380             : 
    1381         380 :     if ((IsSigned && isInt<13>(COffsetVal)) ||
    1382          97 :         (!IsSigned && isUInt<12>(COffsetVal))) {
    1383             :       Addr = N0;
    1384             :       OffsetVal = COffsetVal;
    1385             :     }
    1386             :   }
    1387             : 
    1388       10637 :   VAddr = Addr;
    1389       31911 :   Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
    1390       31911 :   SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1);
    1391             : 
    1392       10637 :   return true;
    1393             : }
    1394             : 
    1395             : bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDValue Addr,
    1396             :                                           SDValue &VAddr,
    1397             :                                           SDValue &Offset,
    1398             :                                           SDValue &SLC) const {
    1399        1003 :   return SelectFlatOffset<false>(Addr, VAddr, Offset, SLC);
    1400             : }
    1401             : 
    1402             : bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDValue Addr,
    1403             :                                           SDValue &VAddr,
    1404             :                                           SDValue &Offset,
    1405             :                                           SDValue &SLC) const {
    1406         212 :   return SelectFlatOffset<true>(Addr, VAddr, Offset, SLC);
    1407             : }
    1408             : 
    1409       29413 : bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
    1410             :                                           SDValue &Offset, bool &Imm) const {
    1411             : 
    1412             :   // FIXME: Handle non-constant offsets.
    1413             :   ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
    1414             :   if (!C)
    1415             :     return false;
    1416             : 
    1417             :   SDLoc SL(ByteOffsetNode);
    1418       29358 :   AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration();
    1419       29358 :   int64_t ByteOffset = C->getSExtValue();
    1420       29358 :   int64_t EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset);
    1421             : 
    1422       29358 :   if (AMDGPU::isLegalSMRDImmOffset(*Subtarget, ByteOffset)) {
    1423       58556 :     Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
    1424       29278 :     Imm = true;
    1425             :     return true;
    1426             :   }
    1427             : 
    1428          80 :   if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset))
    1429             :     return false;
    1430             : 
    1431          68 :   if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) {
    1432             :     // 32-bit Immediates are supported on Sea Islands.
    1433          66 :     Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
    1434             :   } else {
    1435          70 :     SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
    1436          70 :     Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32,
    1437             :                                             C32Bit), 0);
    1438             :   }
    1439          68 :   Imm = false;
    1440             :   return true;
    1441             : }
    1442             : 
    1443       31195 : SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {
    1444             :   if (Addr.getValueType() != MVT::i32)
    1445       31083 :     return Addr;
    1446             : 
    1447             :   // Zero-extend a 32-bit address.
    1448             :   SDLoc SL(Addr);
    1449             : 
    1450         112 :   const MachineFunction &MF = CurDAG->getMachineFunction();
    1451             :   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
    1452         112 :   unsigned AddrHiVal = Info->get32BitAddressHighBits();
    1453         224 :   SDValue AddrHi = CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32);
    1454             : 
    1455             :   const SDValue Ops[] = {
    1456         112 :     CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32),
    1457             :     Addr,
    1458         112 :     CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
    1459         336 :     SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi),
    1460             :             0),
    1461         112 :     CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32),
    1462         336 :   };
    1463             : 
    1464         336 :   return SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64,
    1465         112 :                                         Ops), 0);
    1466             : }
    1467             : 
    1468       31195 : bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
    1469             :                                      SDValue &Offset, bool &Imm) const {
    1470             :   SDLoc SL(Addr);
    1471             : 
    1472       31195 :   if (CurDAG->isBaseWithConstantOffset(Addr)) {
    1473       28833 :     SDValue N0 = Addr.getOperand(0);
    1474       28833 :     SDValue N1 = Addr.getOperand(1);
    1475             : 
    1476       28833 :     if (SelectSMRDOffset(N1, Offset, Imm)) {
    1477       28821 :       SBase = Expand32BitAddress(N0);
    1478       28821 :       return true;
    1479             :     }
    1480             :   }
    1481        2374 :   SBase = Expand32BitAddress(Addr);
    1482        4748 :   Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
    1483        2374 :   Imm = true;
    1484        2374 :   return true;
    1485             : }
    1486             : 
    1487             : bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
    1488             :                                        SDValue &Offset) const {
    1489             :   bool Imm;
    1490       31162 :   return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
    1491             : }
    1492             : 
    1493           9 : bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
    1494             :                                          SDValue &Offset) const {
    1495             : 
    1496           9 :   if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
    1497             :     return false;
    1498             : 
    1499             :   bool Imm;
    1500           9 :   if (!SelectSMRD(Addr, SBase, Offset, Imm))
    1501             :     return false;
    1502             : 
    1503           9 :   return !Imm && isa<ConstantSDNode>(Offset);
    1504             : }
    1505             : 
    1506          24 : bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
    1507             :                                         SDValue &Offset) const {
    1508             :   bool Imm;
    1509          24 :   return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
    1510          24 :          !isa<ConstantSDNode>(Offset);
    1511             : }
    1512             : 
    1513             : bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
    1514             :                                              SDValue &Offset) const {
    1515             :   bool Imm;
    1516         566 :   return SelectSMRDOffset(Addr, Offset, Imm) && Imm;
    1517             : }
    1518             : 
    1519          38 : bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
    1520             :                                                SDValue &Offset) const {
    1521          38 :   if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
    1522             :     return false;
    1523             : 
    1524             :   bool Imm;
    1525          14 :   if (!SelectSMRDOffset(Addr, Offset, Imm))
    1526             :     return false;
    1527             : 
    1528           3 :   return !Imm && isa<ConstantSDNode>(Offset);
    1529             : }
    1530             : 
    1531       30537 : bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
    1532             :                                             SDValue &Base,
    1533             :                                             SDValue &Offset) const {
    1534             :   SDLoc DL(Index);
    1535             : 
    1536       30537 :   if (CurDAG->isBaseWithConstantOffset(Index)) {
    1537          80 :     SDValue N0 = Index.getOperand(0);
    1538          80 :     SDValue N1 = Index.getOperand(1);
    1539             :     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
    1540             : 
    1541             :     // (add n0, c0)
    1542          80 :     Base = N0;
    1543         160 :     Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
    1544             :     return true;
    1545             :   }
    1546             : 
    1547             :   if (isa<ConstantSDNode>(Index))
    1548             :     return false;
    1549             : 
    1550          77 :   Base = Index;
    1551         154 :   Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
    1552             :   return true;
    1553             : }
    1554             : 
    1555        3772 : SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL,
    1556             :                                      SDValue Val, uint32_t Offset,
    1557             :                                      uint32_t Width) {
    1558             :   // Transformation function, pack the offset and width of a BFE into
    1559             :   // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
    1560             :   // source, bits [5:0] contain the offset and bits [22:16] the width.
    1561        3772 :   uint32_t PackedVal = Offset | (Width << 16);
    1562        7544 :   SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
    1563             : 
    1564        7544 :   return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
    1565             : }
    1566             : 
    1567         179 : void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
    1568             :   // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
    1569             :   // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
    1570             :   // Predicate: 0 < b <= c < 32
    1571             : 
    1572         179 :   const SDValue &Shl = N->getOperand(0);
    1573         179 :   ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
    1574             :   ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
    1575             : 
    1576         179 :   if (B && C) {
    1577         326 :     uint32_t BVal = B->getZExtValue();
    1578         326 :     uint32_t CVal = C->getZExtValue();
    1579             : 
    1580         163 :     if (0 < BVal && BVal <= CVal && CVal < 32) {
    1581         159 :       bool Signed = N->getOpcode() == ISD::SRA;
    1582         159 :       unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
    1583             : 
    1584         477 :       ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal,
    1585             :                               32 - CVal));
    1586         159 :       return;
    1587             :     }
    1588             :   }
    1589             :   SelectCode(N);
    1590             : }
    1591             : 
    1592       16347 : void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
    1593       32694 :   switch (N->getOpcode()) {
    1594        5532 :   case ISD::AND:
    1595       11064 :     if (N->getOperand(0).getOpcode() == ISD::SRL) {
    1596             :       // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
    1597             :       // Predicate: isMask(mask)
    1598             :       const SDValue &Srl = N->getOperand(0);
    1599             :       ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
    1600             :       ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
    1601             : 
    1602        1583 :       if (Shift && Mask) {
    1603        3144 :         uint32_t ShiftVal = Shift->getZExtValue();
    1604        3144 :         uint32_t MaskVal = Mask->getZExtValue();
    1605             : 
    1606             :         if (isMask_32(MaskVal)) {
    1607             :           uint32_t WidthVal = countPopulation(MaskVal);
    1608             : 
    1609        2794 :           ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
    1610             :                                   Srl.getOperand(0), ShiftVal, WidthVal));
    1611        1397 :           return;
    1612             :         }
    1613             :       }
    1614             :     }
    1615             :     break;
    1616        4916 :   case ISD::SRL:
    1617        9832 :     if (N->getOperand(0).getOpcode() == ISD::AND) {
    1618             :       // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
    1619             :       // Predicate: isMask(mask >> b)
    1620             :       const SDValue &And = N->getOperand(0);
    1621             :       ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
    1622         809 :       ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
    1623             : 
    1624         809 :       if (Shift && Mask) {
    1625        1618 :         uint32_t ShiftVal = Shift->getZExtValue();
    1626        1618 :         uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
    1627             : 
    1628             :         if (isMask_32(MaskVal)) {
    1629             :           uint32_t WidthVal = countPopulation(MaskVal);
    1630             : 
    1631        1614 :           ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
    1632             :                                   And.getOperand(0), ShiftVal, WidthVal));
    1633         807 :           return;
    1634             :         }
    1635             :       }
    1636        4107 :     } else if (N->getOperand(0).getOpcode() == ISD::SHL) {
    1637          12 :       SelectS_BFEFromShifts(N);
    1638          12 :       return;
    1639             :     }
    1640             :     break;
    1641        1875 :   case ISD::SRA:
    1642        3750 :     if (N->getOperand(0).getOpcode() == ISD::SHL) {
    1643         167 :       SelectS_BFEFromShifts(N);
    1644         167 :       return;
    1645             :     }
    1646             :     break;
    1647             : 
    1648        4024 :   case ISD::SIGN_EXTEND_INREG: {
    1649             :     // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
    1650        4024 :     SDValue Src = N->getOperand(0);
    1651        4024 :     if (Src.getOpcode() != ISD::SRL)
    1652             :       break;
    1653             : 
    1654             :     const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
    1655             :     if (!Amt)
    1656             :       break;
    1657             : 
    1658        1277 :     unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
    1659        3831 :     ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0),
    1660        1277 :                             Amt->getZExtValue(), Width));
    1661             :     return;
    1662             :   }
    1663             :   }
    1664             : 
    1665             :   SelectCode(N);
    1666             : }
    1667             : 
    1668         468 : bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
    1669             :   assert(N->getOpcode() == ISD::BRCOND);
    1670             :   if (!N->hasOneUse())
    1671             :     return false;
    1672             : 
    1673         468 :   SDValue Cond = N->getOperand(1);
    1674         468 :   if (Cond.getOpcode() == ISD::CopyToReg)
    1675           0 :     Cond = Cond.getOperand(2);
    1676             : 
    1677         910 :   if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
    1678             :     return false;
    1679             : 
    1680             :   MVT VT = Cond.getOperand(0).getSimpleValueType();
    1681         438 :   if (VT == MVT::i32)
    1682             :     return true;
    1683             : 
    1684          87 :   if (VT == MVT::i64) {
    1685          29 :     auto ST = static_cast<const SISubtarget *>(Subtarget);
    1686             : 
    1687          29 :     ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
    1688          56 :     return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64();
    1689             :   }
    1690             : 
    1691             :   return false;
    1692             : }
    1693             : 
    1694         546 : void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
    1695         546 :   SDValue Cond = N->getOperand(1);
    1696             : 
    1697         546 :   if (Cond.isUndef()) {
    1698         156 :     CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other,
    1699             :                          N->getOperand(2), N->getOperand(0));
    1700          78 :     return;
    1701             :   }
    1702             : 
    1703         468 :   bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N);
    1704             :   unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ;
    1705         468 :   unsigned CondReg = UseSCCBr ? AMDGPU::SCC : AMDGPU::VCC;
    1706             :   SDLoc SL(N);
    1707             : 
    1708         468 :   if (!UseSCCBr) {
    1709             :     // This is the case that we are selecting to S_CBRANCH_VCCNZ.  We have not
    1710             :     // analyzed what generates the vcc value, so we do not know whether vcc
    1711             :     // bits for disabled lanes are 0.  Thus we need to mask out bits for
    1712             :     // disabled lanes.
    1713             :     //
    1714             :     // For the case that we select S_CBRANCH_SCC1 and it gets
    1715             :     // changed to S_CBRANCH_VCCNZ in SIFixSGPRCopies, SIFixSGPRCopies calls
    1716             :     // SIInstrInfo::moveToVALU which inserts the S_AND).
    1717             :     //
    1718             :     // We could add an analysis of what generates the vcc value here and omit
    1719             :     // the S_AND when is unnecessary. But it would be better to add a separate
    1720             :     // pass after SIFixSGPRCopies to do the unnecessary S_AND removal, so it
    1721             :     // catches both cases.
    1722         318 :     Cond = SDValue(CurDAG->getMachineNode(AMDGPU::S_AND_B64, SL, MVT::i1,
    1723             :                                CurDAG->getRegister(AMDGPU::EXEC, MVT::i1),
    1724             :                                Cond),
    1725             :                    0);
    1726             :   }
    1727             : 
    1728         936 :   SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond);
    1729         936 :   CurDAG->SelectNodeTo(N, BrOp, MVT::Other,
    1730         468 :                        N->getOperand(2), // Basic Block
    1731             :                        VCC.getValue(0));
    1732             : }
    1733             : 
    1734        1549 : void AMDGPUDAGToDAGISel::SelectFMAD(SDNode *N) {
    1735             :   MVT VT = N->getSimpleValueType(0);
    1736        1549 :   if (VT != MVT::f32 || !Subtarget->hasMadMixInsts()) {
    1737             :     SelectCode(N);
    1738        1472 :     return;
    1739             :   }
    1740             : 
    1741          77 :   SDValue Src0 = N->getOperand(0);
    1742          77 :   SDValue Src1 = N->getOperand(1);
    1743          77 :   SDValue Src2 = N->getOperand(2);
    1744             :   unsigned Src0Mods, Src1Mods, Src2Mods;
    1745             : 
    1746             :   // Avoid using v_mad_mix_f32 unless there is actually an operand using the
    1747             :   // conversion from f16.
    1748          77 :   bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods);
    1749          77 :   bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods);
    1750          77 :   bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods);
    1751             : 
    1752             :   assert(!Subtarget->hasFP32Denormals() &&
    1753             :          "fmad selected with denormals enabled");
    1754             :   // TODO: We can select this with f32 denormals enabled if all the sources are
    1755             :   // converted from f16 (in which case fmad isn't legal).
    1756             : 
    1757          77 :   if (Sel0 || Sel1 || Sel2) {
    1758             :     // For dummy operands.
    1759         110 :     SDValue Zero = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
    1760             :     SDValue Ops[] = {
    1761         110 :       CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0,
    1762         110 :       CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1,
    1763         110 :       CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2,
    1764         110 :       CurDAG->getTargetConstant(0, SDLoc(), MVT::i1),
    1765             :       Zero, Zero
    1766         440 :     };
    1767             : 
    1768         110 :     CurDAG->SelectNodeTo(N, AMDGPU::V_MAD_MIX_F32, MVT::f32, Ops);
    1769             :   } else {
    1770             :     SelectCode(N);
    1771             :   }
    1772             : }
    1773             : 
    1774             : // This is here because there isn't a way to use the generated sub0_sub1 as the
    1775             : // subreg index to EXTRACT_SUBREG in tablegen.
    1776         195 : void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
    1777             :   MemSDNode *Mem = cast<MemSDNode>(N);
    1778             :   unsigned AS = Mem->getAddressSpace();
    1779         195 :   if (AS == AMDGPUASI.FLAT_ADDRESS) {
    1780             :     SelectCode(N);
    1781         169 :     return;
    1782             :   }
    1783             : 
    1784             :   MVT VT = N->getSimpleValueType(0);
    1785             :   bool Is32 = (VT == MVT::i32);
    1786             :   SDLoc SL(N);
    1787             : 
    1788             :   MachineSDNode *CmpSwap = nullptr;
    1789         102 :   if (Subtarget->hasAddr64()) {
    1790          17 :     SDValue SRsrc, VAddr, SOffset, Offset, SLC;
    1791             : 
    1792          17 :     if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) {
    1793           8 :       unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN :
    1794             :         AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN;
    1795           8 :       SDValue CmpVal = Mem->getOperand(2);
    1796             : 
    1797             :       // XXX - Do we care about glue operands?
    1798             : 
    1799             :       SDValue Ops[] = {
    1800             :         CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain()
    1801           8 :       };
    1802             : 
    1803          24 :       CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
    1804             :     }
    1805             :   }
    1806             : 
    1807           8 :   if (!CmpSwap) {
    1808          43 :     SDValue SRsrc, SOffset, Offset, SLC;
    1809          43 :     if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) {
    1810          18 :       unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN :
    1811             :         AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN;
    1812             : 
    1813          18 :       SDValue CmpVal = Mem->getOperand(2);
    1814             :       SDValue Ops[] = {
    1815             :         CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain()
    1816          18 :       };
    1817             : 
    1818          54 :       CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
    1819             :     }
    1820             :   }
    1821             : 
    1822          51 :   if (!CmpSwap) {
    1823             :     SelectCode(N);
    1824             :     return;
    1825             :   }
    1826             : 
    1827          26 :   MachineSDNode::mmo_iterator MMOs = MF->allocateMemRefsArray(1);
    1828          26 :   *MMOs = Mem->getMemOperand();
    1829          26 :   CmpSwap->setMemRefs(MMOs, MMOs + 1);
    1830             : 
    1831          26 :   unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
    1832             :   SDValue Extract
    1833          52 :     = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0));
    1834             : 
    1835          26 :   ReplaceUses(SDValue(N, 0), Extract);
    1836          26 :   ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1));
    1837          26 :   CurDAG->RemoveDeadNode(N);
    1838             : }
    1839             : 
    1840         568 : bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
    1841             :                                             unsigned &Mods) const {
    1842         568 :   Mods = 0;
    1843       20654 :   Src = In;
    1844             : 
    1845       21222 :   if (Src.getOpcode() == ISD::FNEG) {
    1846          24 :     Mods |= SISrcMods::NEG;
    1847        1381 :     Src = Src.getOperand(0);
    1848             :   }
    1849             : 
    1850       41308 :   if (Src.getOpcode() == ISD::FABS) {
    1851         593 :     Mods |= SISrcMods::ABS;
    1852         593 :     Src = Src.getOperand(0);
    1853             :   }
    1854             : 
    1855         568 :   return true;
    1856             : }
    1857             : 
    1858       20086 : bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
    1859             :                                         SDValue &SrcMods) const {
    1860             :   unsigned Mods;
    1861             :   if (SelectVOP3ModsImpl(In, Src, Mods)) {
    1862       80344 :     SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
    1863             :     return true;
    1864             :   }
    1865             : 
    1866             :   return false;
    1867             : }
    1868             : 
    1869         492 : bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
    1870             :                                              SDValue &SrcMods) const {
    1871         492 :   SelectVOP3Mods(In, Src, SrcMods);
    1872         492 :   return isNoNanSrc(Src);
    1873             : }
    1874             : 
    1875             : bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
    1876        4027 :   if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG)
    1877             :     return false;
    1878             : 
    1879        3693 :   Src = In;
    1880             :   return true;
    1881             : }
    1882             : 
    1883        9575 : bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
    1884             :                                          SDValue &SrcMods, SDValue &Clamp,
    1885             :                                          SDValue &Omod) const {
    1886             :   SDLoc DL(In);
    1887       19150 :   Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
    1888       19150 :   Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
    1889             : 
    1890       19150 :   return SelectVOP3Mods(In, Src, SrcMods);
    1891             : }
    1892             : 
    1893          46 : bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
    1894             :                                                    SDValue &SrcMods,
    1895             :                                                    SDValue &Clamp,
    1896             :                                                    SDValue &Omod) const {
    1897         138 :   Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
    1898          46 :   return SelectVOP3Mods(In, Src, SrcMods);
    1899             : }
    1900             : 
    1901         385 : bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
    1902             :                                          SDValue &Clamp, SDValue &Omod) const {
    1903         385 :   Src = In;
    1904             : 
    1905             :   SDLoc DL(In);
    1906         770 :   Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
    1907         770 :   Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
    1908             : 
    1909         385 :   return true;
    1910             : }
    1911             : 
    1912             : static SDValue stripBitcast(SDValue Val) {
    1913        3084 :   return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
    1914             : }
    1915             : 
    1916             : // Figure out if this is really an extract of the high 16-bits of a dword.
    1917         564 : static bool isExtractHiElt(SDValue In, SDValue &Out) {
    1918             :   In = stripBitcast(In);
    1919         564 :   if (In.getOpcode() != ISD::TRUNCATE)
    1920             :     return false;
    1921             : 
    1922         176 :   SDValue Srl = In.getOperand(0);
    1923         176 :   if (Srl.getOpcode() == ISD::SRL) {
    1924             :     if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
    1925         218 :       if (ShiftAmt->getZExtValue() == 16) {
    1926         109 :         Out = stripBitcast(Srl.getOperand(0));
    1927         109 :         return true;
    1928             :       }
    1929             :     }
    1930             :   }
    1931             : 
    1932             :   return false;
    1933             : }
    1934             : 
    1935             : // Look through operations that obscure just looking at the low 16-bits of the
    1936             : // same register.
    1937         276 : static SDValue stripExtractLoElt(SDValue In) {
    1938         276 :   if (In.getOpcode() == ISD::TRUNCATE) {
    1939          18 :     SDValue Src = In.getOperand(0);
    1940          18 :     if (Src.getValueType().getSizeInBits() == 32)
    1941             :       return stripBitcast(Src);
    1942             :   }
    1943             : 
    1944         261 :   return In;
    1945             : }
    1946             : 
    1947         584 : bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
    1948             :                                          SDValue &SrcMods) const {
    1949             :   unsigned Mods = 0;
    1950         584 :   Src = In;
    1951             : 
    1952        1168 :   if (Src.getOpcode() == ISD::FNEG) {
    1953             :     Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
    1954          13 :     Src = Src.getOperand(0);
    1955             :   }
    1956             : 
    1957        1168 :   if (Src.getOpcode() == ISD::BUILD_VECTOR) {
    1958             :     unsigned VecMods = Mods;
    1959             : 
    1960         138 :     SDValue Lo = stripBitcast(Src.getOperand(0));
    1961         138 :     SDValue Hi = stripBitcast(Src.getOperand(1));
    1962             : 
    1963         138 :     if (Lo.getOpcode() == ISD::FNEG) {
    1964          11 :       Lo = stripBitcast(Lo.getOperand(0));
    1965          11 :       Mods ^= SISrcMods::NEG;
    1966             :     }
    1967             : 
    1968         138 :     if (Hi.getOpcode() == ISD::FNEG) {
    1969          11 :       Hi = stripBitcast(Hi.getOperand(0));
    1970          11 :       Mods ^= SISrcMods::NEG_HI;
    1971             :     }
    1972             : 
    1973         138 :     if (isExtractHiElt(Lo, Lo))
    1974          13 :       Mods |= SISrcMods::OP_SEL_0;
    1975             : 
    1976         138 :     if (isExtractHiElt(Hi, Hi))
    1977          11 :       Mods |= SISrcMods::OP_SEL_1;
    1978             : 
    1979         138 :     Lo = stripExtractLoElt(Lo);
    1980         138 :     Hi = stripExtractLoElt(Hi);
    1981             : 
    1982         112 :     if (Lo == Hi && !isInlineImmediate(Lo.getNode())) {
    1983             :       // Really a scalar input. Just select from the low half of the register to
    1984             :       // avoid packing.
    1985             : 
    1986          27 :       Src = Lo;
    1987         108 :       SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
    1988          27 :       return true;
    1989             :     }
    1990             : 
    1991             :     Mods = VecMods;
    1992             :   }
    1993             : 
    1994             :   // Packed instructions do not have abs modifiers.
    1995         557 :   Mods |= SISrcMods::OP_SEL_1;
    1996             : 
    1997        2228 :   SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
    1998         557 :   return true;
    1999             : }
    2000             : 
    2001         264 : bool AMDGPUDAGToDAGISel::SelectVOP3PMods0(SDValue In, SDValue &Src,
    2002             :                                           SDValue &SrcMods,
    2003             :                                           SDValue &Clamp) const {
    2004             :   SDLoc SL(In);
    2005             : 
    2006             :   // FIXME: Handle clamp and op_sel
    2007         528 :   Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
    2008             : 
    2009         528 :   return SelectVOP3PMods(In, Src, SrcMods);
    2010             : }
    2011             : 
    2012          18 : bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,
    2013             :                                          SDValue &SrcMods) const {
    2014          18 :   Src = In;
    2015             :   // FIXME: Handle op_sel
    2016          54 :   SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
    2017          18 :   return true;
    2018             : }
    2019             : 
    2020           6 : bool AMDGPUDAGToDAGISel::SelectVOP3OpSel0(SDValue In, SDValue &Src,
    2021             :                                           SDValue &SrcMods,
    2022             :                                           SDValue &Clamp) const {
    2023             :   SDLoc SL(In);
    2024             : 
    2025             :   // FIXME: Handle clamp
    2026          12 :   Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
    2027             : 
    2028          12 :   return SelectVOP3OpSel(In, Src, SrcMods);
    2029             : }
    2030             : 
    2031             : bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src,
    2032             :                                              SDValue &SrcMods) const {
    2033             :   // FIXME: Handle op_sel
    2034          24 :   return SelectVOP3Mods(In, Src, SrcMods);
    2035             : }
    2036             : 
    2037           8 : bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods0(SDValue In, SDValue &Src,
    2038             :                                               SDValue &SrcMods,
    2039             :                                               SDValue &Clamp) const {
    2040             :   SDLoc SL(In);
    2041             : 
    2042             :   // FIXME: Handle clamp
    2043          16 :   Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
    2044             : 
    2045           8 :   return SelectVOP3OpSelMods(In, Src, SrcMods);
    2046             : }
    2047             : 
    2048             : // The return value is not whether the match is possible (which it always is),
    2049             : // but whether or not it a conversion is really used.
    2050         336 : bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
    2051             :                                                    unsigned &Mods) const {
    2052         336 :   Mods = 0;
    2053         336 :   SelectVOP3ModsImpl(In, Src, Mods);
    2054             : 
    2055         672 :   if (Src.getOpcode() == ISD::FP_EXTEND) {
    2056         235 :     Src = Src.getOperand(0);
    2057             :     assert(Src.getValueType() == MVT::f16);
    2058         235 :     Src = stripBitcast(Src);
    2059             : 
    2060             :     // Be careful about folding modifiers if we already have an abs. fneg is
    2061             :     // applied last, so we don't want to apply an earlier fneg.
    2062         235 :     if ((Mods & SISrcMods::ABS) == 0) {
    2063             :       unsigned ModsTmp;
    2064         232 :       SelectVOP3ModsImpl(Src, Src, ModsTmp);
    2065             : 
    2066         232 :       if ((ModsTmp & SISrcMods::NEG) != 0)
    2067           2 :         Mods ^= SISrcMods::NEG;
    2068             : 
    2069         232 :       if ((ModsTmp & SISrcMods::ABS) != 0)
    2070           1 :         Mods |= SISrcMods::ABS;
    2071             :     }
    2072             : 
    2073             :     // op_sel/op_sel_hi decide the source type and source.
    2074             :     // If the source's op_sel_hi is set, it indicates to do a conversion from fp16.
    2075             :     // If the sources's op_sel is set, it picks the high half of the source
    2076             :     // register.
    2077             : 
    2078         235 :     Mods |= SISrcMods::OP_SEL_1;
    2079         235 :     if (isExtractHiElt(Src, Src)) {
    2080          59 :       Mods |= SISrcMods::OP_SEL_0;
    2081             : 
    2082             :       // TODO: Should we try to look for neg/abs here?
    2083             :     }
    2084             : 
    2085             :     return true;
    2086             :   }
    2087             : 
    2088             :   return false;
    2089             : }
    2090             : 
    2091         105 : bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src,
    2092             :                                                SDValue &SrcMods) const {
    2093         105 :   unsigned Mods = 0;
    2094         105 :   SelectVOP3PMadMixModsImpl(In, Src, Mods);
    2095         420 :   SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
    2096         105 :   return true;
    2097             : }
    2098             : 
    2099             : // TODO: Can we identify things like v_mad_mixhi_f16?
    2100          63 : bool AMDGPUDAGToDAGISel::SelectHi16Elt(SDValue In, SDValue &Src) const {
    2101          63 :   if (In.isUndef()) {
    2102           8 :     Src = In;
    2103             :     return true;
    2104             :   }
    2105             : 
    2106             :   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) {
    2107             :     SDLoc SL(In);
    2108           3 :     SDValue K = CurDAG->getTargetConstant(C->getZExtValue() << 16, SL, MVT::i32);
    2109           2 :     MachineSDNode *MovK = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
    2110           1 :                                                  SL, MVT::i32, K);
    2111           1 :     Src = SDValue(MovK, 0);
    2112             :     return true;
    2113             :   }
    2114             : 
    2115             :   if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) {
    2116             :     SDLoc SL(In);
    2117           1 :     SDValue K = CurDAG->getTargetConstant(
    2118           5 :       C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
    2119           2 :     MachineSDNode *MovK = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
    2120           1 :                                                  SL, MVT::i32, K);
    2121           1 :     Src = SDValue(MovK, 0);
    2122             :     return true;
    2123             :   }
    2124             : 
    2125          53 :   return isExtractHiElt(In, Src);
    2126             : }
    2127             : 
    2128       21009 : void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
    2129             :   const AMDGPUTargetLowering& Lowering =
    2130       21009 :     *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
    2131             :   bool IsModified = false;
    2132       23207 :   do {
    2133             :     IsModified = false;
    2134             : 
    2135             :     // Go over all selected nodes and try to fold them a bit more
    2136       23207 :     SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_begin();
    2137     1854438 :     while (Position != CurDAG->allnodes_end()) {
    2138             :       SDNode *Node = &*Position++;
    2139             :       MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Node);
    2140      403789 :       if (!MachineNode)
    2141      403789 :         continue;
    2142             : 
    2143      500223 :       SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
    2144      500223 :       if (ResNode != Node) {
    2145       34237 :         if (ResNode)
    2146       34160 :           ReplaceUses(Node, ResNode);
    2147             :         IsModified = true;
    2148             :       }
    2149             :     }
    2150       23207 :     CurDAG->RemoveDeadNodes();
    2151             :   } while (IsModified);
    2152       21009 : }
    2153             : 
    2154       82238 : void R600DAGToDAGISel::Select(SDNode *N) {
    2155       82238 :   unsigned int Opc = N->getOpcode();
    2156       82238 :   if (N->isMachineOpcode()) {
    2157             :     N->setNodeId(-1);
    2158             :     return;   // Already selected.
    2159             :   }
    2160             : 
    2161       82238 :   switch (Opc) {
    2162             :   default: break;
    2163        2415 :   case AMDGPUISD::BUILD_VERTICAL_VECTOR:
    2164             :   case ISD::SCALAR_TO_VECTOR:
    2165             :   case ISD::BUILD_VECTOR: {
    2166        4830 :     EVT VT = N->getValueType(0);
    2167        2415 :     unsigned NumVectorElts = VT.getVectorNumElements();
    2168             :     unsigned RegClassID;
    2169             :     // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
    2170             :     // that adds a 128 bits reg copy when going through TwoAddressInstructions
    2171             :     // pass. We want to avoid 128 bits copies as much as possible because they
    2172             :     // can't be bundled by our scheduler.
    2173        2415 :     switch(NumVectorElts) {
    2174             :     case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
    2175        2026 :     case 4:
    2176        2026 :       if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
    2177             :         RegClassID = AMDGPU::R600_Reg128VerticalRegClassID;
    2178             :       else
    2179             :         RegClassID = AMDGPU::R600_Reg128RegClassID;
    2180             :       break;
    2181           0 :     default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
    2182             :     }
    2183        2415 :     SelectBuildVector(N, RegClassID);
    2184             :     return;
    2185             :   }
    2186             :   }
    2187             : 
    2188             :   SelectCode(N);
    2189             : }
    2190             : 
    2191        1998 : bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
    2192             :                                           SDValue &Offset) {
    2193             :   ConstantSDNode *C;
    2194             :   SDLoc DL(Addr);
    2195             : 
    2196             :   if ((C = dyn_cast<ConstantSDNode>(Addr))) {
    2197           0 :     Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
    2198           0 :     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
    2199        1998 :   } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
    2200             :              (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
    2201        3454 :     Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
    2202        3454 :     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
    2203         542 :   } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
    2204             :             (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
    2205           0 :     Base = Addr.getOperand(0);
    2206           0 :     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
    2207             :   } else {
    2208         271 :     Base = Addr;
    2209         542 :     Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
    2210             :   }
    2211             : 
    2212        1998 :   return true;
    2213             : }
    2214             : 
    2215        1522 : bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
    2216             :                                           SDValue &Offset) {
    2217             :   ConstantSDNode *IMMOffset;
    2218             : 
    2219             :   if (Addr.getOpcode() == ISD::ADD
    2220             :       && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
    2221        2629 :       && isInt<16>(IMMOffset->getZExtValue())) {
    2222             : 
    2223         368 :       Base = Addr.getOperand(0);
    2224        1104 :       Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
    2225         368 :                                          MVT::i32);
    2226         368 :       return true;
    2227             :   // If the pointer address is constant, we can move it to the offset field.
    2228             :   } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
    2229         711 :              && isInt<16>(IMMOffset->getZExtValue())) {
    2230         711 :     Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
    2231         237 :                                   SDLoc(CurDAG->getEntryNode()),
    2232         237 :                                   AMDGPU::ZERO, MVT::i32);
    2233         711 :     Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
    2234         237 :                                        MVT::i32);
    2235         237 :     return true;
    2236             :   }
    2237             : 
    2238             :   // Default case, no offset
    2239         917 :   Base = Addr;
    2240        2751 :   Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
    2241         917 :   return true;
    2242             : }

Generated by: LCOV version 1.13