LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - AMDGPUISelDAGToDAG.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 611 922 66.3 %
Date: 2018-10-16 05:50:02 Functions: 46 81 56.8 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //==-----------------------------------------------------------------------===//
       9             : //
      10             : /// \file
      11             : /// Defines an instruction selector for the AMDGPU target.
      12             : //
      13             : //===----------------------------------------------------------------------===//
      14             : 
      15             : #include "AMDGPU.h"
      16             : #include "AMDGPUArgumentUsageInfo.h"
      17             : #include "AMDGPUISelLowering.h" // For AMDGPUISD
      18             : #include "AMDGPUInstrInfo.h"
      19             : #include "AMDGPUPerfHintAnalysis.h"
      20             : #include "AMDGPURegisterInfo.h"
      21             : #include "AMDGPUSubtarget.h"
      22             : #include "AMDGPUTargetMachine.h"
      23             : #include "SIDefines.h"
      24             : #include "SIISelLowering.h"
      25             : #include "SIInstrInfo.h"
      26             : #include "SIMachineFunctionInfo.h"
      27             : #include "SIRegisterInfo.h"
      28             : #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
      29             : #include "llvm/ADT/APInt.h"
      30             : #include "llvm/ADT/SmallVector.h"
      31             : #include "llvm/ADT/StringRef.h"
      32             : #include "llvm/Analysis/LegacyDivergenceAnalysis.h"
      33             : #include "llvm/Analysis/ValueTracking.h"
      34             : #include "llvm/CodeGen/FunctionLoweringInfo.h"
      35             : #include "llvm/CodeGen/ISDOpcodes.h"
      36             : #include "llvm/CodeGen/MachineFunction.h"
      37             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      38             : #include "llvm/CodeGen/SelectionDAG.h"
      39             : #include "llvm/CodeGen/SelectionDAGISel.h"
      40             : #include "llvm/CodeGen/SelectionDAGNodes.h"
      41             : #include "llvm/CodeGen/ValueTypes.h"
      42             : #include "llvm/IR/BasicBlock.h"
      43             : #include "llvm/IR/Instruction.h"
      44             : #include "llvm/MC/MCInstrDesc.h"
      45             : #include "llvm/Support/Casting.h"
      46             : #include "llvm/Support/CodeGen.h"
      47             : #include "llvm/Support/ErrorHandling.h"
      48             : #include "llvm/Support/MachineValueType.h"
      49             : #include "llvm/Support/MathExtras.h"
      50             : #include <cassert>
      51             : #include <cstdint>
      52             : #include <new>
      53             : #include <vector>
      54             : 
      55             : using namespace llvm;
      56             : 
      57             : namespace llvm {
      58             : 
      59             : class R600InstrInfo;
      60             : 
      61             : } // end namespace llvm
      62             : 
      63             : //===----------------------------------------------------------------------===//
      64             : // Instruction Selector Implementation
      65             : //===----------------------------------------------------------------------===//
      66             : 
      67             : namespace {
      68             : 
      69             : /// AMDGPU specific code to select AMDGPU machine instructions for
      70             : /// SelectionDAG operations.
      71             : class AMDGPUDAGToDAGISel : public SelectionDAGISel {
      72             :   // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
      73             :   // make the right decision when generating code for different targets.
      74             :   const GCNSubtarget *Subtarget;
      75             :   bool EnableLateStructurizeCFG;
      76             : 
      77             : public:
      78             :   explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr,
      79             :                               CodeGenOpt::Level OptLevel = CodeGenOpt::Default)
      80        1921 :     : SelectionDAGISel(*TM, OptLevel) {
      81        2203 :     EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
      82             :   }
      83        1911 :   ~AMDGPUDAGToDAGISel() override = default;
      84             : 
      85        2193 :   void getAnalysisUsage(AnalysisUsage &AU) const override {
      86             :     AU.addRequired<AMDGPUArgumentUsageInfo>();
      87             :     AU.addRequired<AMDGPUPerfHintAnalysis>();
      88             :     AU.addRequired<LegacyDivergenceAnalysis>();
      89        2193 :     SelectionDAGISel::getAnalysisUsage(AU);
      90        2193 :   }
      91             : 
      92             :   bool runOnMachineFunction(MachineFunction &MF) override;
      93             :   void Select(SDNode *N) override;
      94             :   StringRef getPassName() const override;
      95             :   void PostprocessISelDAG() override;
      96             : 
      97             : protected:
      98             :   void SelectBuildVector(SDNode *N, unsigned RegClassID);
      99             : 
     100             : private:
     101             :   std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
     102             :   bool isNoNanSrc(SDValue N) const;
     103             :   bool isInlineImmediate(const SDNode *N) const;
     104             :   bool isVGPRImm(const SDNode *N) const;
     105             :   bool isUniformLoad(const SDNode *N) const;
     106             :   bool isUniformBr(const SDNode *N) const;
     107             : 
     108             :   MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const;
     109             : 
     110             :   SDNode *glueCopyToM0(SDNode *N) const;
     111             : 
     112             :   const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
     113             :   virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
     114             :   virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
     115             :   bool isDSOffsetLegal(const SDValue &Base, unsigned Offset,
     116             :                        unsigned OffsetBits) const;
     117             :   bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
     118             :   bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
     119             :                                  SDValue &Offset1) const;
     120             :   bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
     121             :                    SDValue &SOffset, SDValue &Offset, SDValue &Offen,
     122             :                    SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
     123             :                    SDValue &TFE) const;
     124             :   bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
     125             :                          SDValue &SOffset, SDValue &Offset, SDValue &GLC,
     126             :                          SDValue &SLC, SDValue &TFE) const;
     127             :   bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
     128             :                          SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
     129             :                          SDValue &SLC) const;
     130             :   bool SelectMUBUFScratchOffen(SDNode *Parent,
     131             :                                SDValue Addr, SDValue &RSrc, SDValue &VAddr,
     132             :                                SDValue &SOffset, SDValue &ImmOffset) const;
     133             :   bool SelectMUBUFScratchOffset(SDNode *Parent,
     134             :                                 SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
     135             :                                 SDValue &Offset) const;
     136             : 
     137             :   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
     138             :                          SDValue &Offset, SDValue &GLC, SDValue &SLC,
     139             :                          SDValue &TFE) const;
     140             :   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
     141             :                          SDValue &Offset, SDValue &SLC) const;
     142             :   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
     143             :                          SDValue &Offset) const;
     144             : 
     145             :   bool SelectFlatAtomic(SDValue Addr, SDValue &VAddr,
     146             :                         SDValue &Offset, SDValue &SLC) const;
     147             :   bool SelectFlatAtomicSigned(SDValue Addr, SDValue &VAddr,
     148             :                               SDValue &Offset, SDValue &SLC) const;
     149             : 
     150             :   template <bool IsSigned>
     151             :   bool SelectFlatOffset(SDValue Addr, SDValue &VAddr,
     152             :                         SDValue &Offset, SDValue &SLC) const;
     153             : 
     154             :   bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
     155             :                         bool &Imm) const;
     156             :   SDValue Expand32BitAddress(SDValue Addr) const;
     157             :   bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
     158             :                   bool &Imm) const;
     159             :   bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
     160             :   bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
     161             :   bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
     162             :   bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
     163             :   bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
     164             :   bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
     165             : 
     166             :   bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
     167             :   bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods) const;
     168             :   bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
     169             :   bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
     170             :   bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
     171             :                        SDValue &Clamp, SDValue &Omod) const;
     172             :   bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
     173             :                          SDValue &Clamp, SDValue &Omod) const;
     174             : 
     175             :   bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods,
     176             :                                  SDValue &Clamp,
     177             :                                  SDValue &Omod) const;
     178             : 
     179             :   bool SelectVOP3OMods(SDValue In, SDValue &Src,
     180             :                        SDValue &Clamp, SDValue &Omod) const;
     181             : 
     182             :   bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
     183             :   bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
     184             :                         SDValue &Clamp) const;
     185             : 
     186             :   bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
     187             :   bool SelectVOP3OpSel0(SDValue In, SDValue &Src, SDValue &SrcMods,
     188             :                         SDValue &Clamp) const;
     189             : 
     190             :   bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
     191             :   bool SelectVOP3OpSelMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
     192             :                             SDValue &Clamp) const;
     193             :   bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const;
     194             :   bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
     195             : 
     196             :   bool SelectHi16Elt(SDValue In, SDValue &Src) const;
     197             : 
     198             :   void SelectADD_SUB_I64(SDNode *N);
     199             :   void SelectUADDO_USUBO(SDNode *N);
     200             :   void SelectDIV_SCALE(SDNode *N);
     201             :   void SelectMAD_64_32(SDNode *N);
     202             :   void SelectFMA_W_CHAIN(SDNode *N);
     203             :   void SelectFMUL_W_CHAIN(SDNode *N);
     204             : 
     205             :   SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val,
     206             :                    uint32_t Offset, uint32_t Width);
     207             :   void SelectS_BFEFromShifts(SDNode *N);
     208             :   void SelectS_BFE(SDNode *N);
     209             :   bool isCBranchSCC(const SDNode *N) const;
     210             :   void SelectBRCOND(SDNode *N);
     211             :   void SelectFMAD_FMA(SDNode *N);
     212             :   void SelectATOMIC_CMP_SWAP(SDNode *N);
     213             : 
     214             : protected:
     215             :   // Include the pieces autogenerated from the target description.
     216             : #include "AMDGPUGenDAGISel.inc"
     217             : };
     218             : 
     219             : class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
     220             :   const R600Subtarget *Subtarget;
     221             : 
     222             :   bool isConstantLoad(const MemSDNode *N, int cbID) const;
     223             :   bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
     224             :   bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
     225             :                                        SDValue& Offset);
     226             : public:
     227         282 :   explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) :
     228         282 :       AMDGPUDAGToDAGISel(TM, OptLevel) {}
     229             : 
     230             :   void Select(SDNode *N) override;
     231             : 
     232             :   bool SelectADDRIndirect(SDValue Addr, SDValue &Base,
     233             :                           SDValue &Offset) override;
     234             :   bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
     235             :                           SDValue &Offset) override;
     236             : 
     237             :   bool runOnMachineFunction(MachineFunction &MF) override;
     238             : protected:
     239             :   // Include the pieces autogenerated from the target description.
     240             : #include "R600GenDAGISel.inc"
     241             : };
     242             : 
     243             : }  // end anonymous namespace
     244             : 
     245       84407 : INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel",
     246             :                       "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
     247       84407 : INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo)
     248       84407 : INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysis)
     249       84407 : INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
     250      197401 : INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "amdgpu-isel",
     251             :                     "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
     252             : 
     253             : /// This pass converts a legalized DAG into a AMDGPU-specific
     254             : // DAG, ready for instruction scheduling.
     255        1921 : FunctionPass *llvm::createAMDGPUISelDag(TargetMachine *TM,
     256             :                                         CodeGenOpt::Level OptLevel) {
     257        1921 :   return new AMDGPUDAGToDAGISel(TM, OptLevel);
     258             : }
     259             : 
     260             : /// This pass converts a legalized DAG into a R600-specific
     261             : // DAG, ready for instruction scheduling.
     262         282 : FunctionPass *llvm::createR600ISelDag(TargetMachine *TM,
     263             :                                       CodeGenOpt::Level OptLevel) {
     264         282 :   return new R600DAGToDAGISel(TM, OptLevel);
     265             : }
     266             : 
     267       19642 : bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
     268       19642 :   Subtarget = &MF.getSubtarget<GCNSubtarget>();
     269       19642 :   return SelectionDAGISel::runOnMachineFunction(MF);
     270             : }
     271             : 
     272           0 : bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const {
     273           0 :   if (TM.Options.NoNaNsFPMath)
     274           0 :     return true;
     275             : 
     276             :   // TODO: Move into isKnownNeverNaN
     277           0 :   if (N->getFlags().isDefined())
     278           0 :     return N->getFlags().hasNoNaNs();
     279             : 
     280           0 :   return CurDAG->isKnownNeverNaN(N);
     281             : }
     282             : 
     283           0 : bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const {
     284           0 :   const SIInstrInfo *TII = Subtarget->getInstrInfo();
     285             : 
     286             :   if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
     287           0 :     return TII->isInlineConstant(C->getAPIntValue());
     288             : 
     289             :   if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
     290           0 :     return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
     291             : 
     292             :   return false;
     293             : }
     294             : 
     295             : /// Determine the register class for \p OpNo
     296             : /// \returns The register class of the virtual register that will be used for
     297             : /// the given operand number \OpNo or NULL if the register class cannot be
     298             : /// determined.
     299           0 : const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
     300             :                                                           unsigned OpNo) const {
     301           0 :   if (!N->isMachineOpcode()) {
     302           0 :     if (N->getOpcode() == ISD::CopyToReg) {
     303           0 :       unsigned Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
     304           0 :       if (TargetRegisterInfo::isVirtualRegister(Reg)) {
     305           0 :         MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo();
     306           0 :         return MRI.getRegClass(Reg);
     307             :       }
     308             : 
     309             :       const SIRegisterInfo *TRI
     310           0 :         = static_cast<const GCNSubtarget *>(Subtarget)->getRegisterInfo();
     311           0 :       return TRI->getPhysRegClass(Reg);
     312             :     }
     313             : 
     314           0 :     return nullptr;
     315             :   }
     316             : 
     317           0 :   switch (N->getMachineOpcode()) {
     318           0 :   default: {
     319             :     const MCInstrDesc &Desc =
     320           0 :         Subtarget->getInstrInfo()->get(N->getMachineOpcode());
     321           0 :     unsigned OpIdx = Desc.getNumDefs() + OpNo;
     322           0 :     if (OpIdx >= Desc.getNumOperands())
     323           0 :       return nullptr;
     324           0 :     int RegClass = Desc.OpInfo[OpIdx].RegClass;
     325           0 :     if (RegClass == -1)
     326           0 :       return nullptr;
     327             : 
     328           0 :     return Subtarget->getRegisterInfo()->getRegClass(RegClass);
     329             :   }
     330           0 :   case AMDGPU::REG_SEQUENCE: {
     331           0 :     unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
     332             :     const TargetRegisterClass *SuperRC =
     333           0 :         Subtarget->getRegisterInfo()->getRegClass(RCID);
     334             : 
     335           0 :     SDValue SubRegOp = N->getOperand(OpNo + 1);
     336           0 :     unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
     337           0 :     return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
     338           0 :                                                               SubRegIdx);
     339             :   }
     340             :   }
     341             : }
     342             : 
     343       78903 : SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
     344       78903 :   if (cast<MemSDNode>(N)->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS ||
     345       11556 :       !Subtarget->ldsRequiresM0Init())
     346             :     return N;
     347             : 
     348             :   const SITargetLowering& Lowering =
     349        8553 :       *static_cast<const SITargetLowering*>(getTargetLowering());
     350             : 
     351             :   // Write max value to m0 before each load operation
     352             : 
     353       17106 :   SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N),
     354       17110 :                                  CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
     355             : 
     356        8553 :   SDValue Glue = M0.getValue(1);
     357             : 
     358             :   SmallVector <SDValue, 8> Ops;
     359       38797 :   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
     360       60488 :      Ops.push_back(N->getOperand(i));
     361             :   }
     362        8553 :   Ops.push_back(Glue);
     363       34212 :   return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
     364             : }
     365             : 
     366           0 : MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm,
     367             :                                                   EVT VT) const {
     368           0 :   SDNode *Lo = CurDAG->getMachineNode(
     369             :       AMDGPU::S_MOV_B32, DL, MVT::i32,
     370             :       CurDAG->getConstant(Imm & 0xFFFFFFFF, DL, MVT::i32));
     371             :   SDNode *Hi =
     372           0 :       CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
     373             :                              CurDAG->getConstant(Imm >> 32, DL, MVT::i32));
     374             :   const SDValue Ops[] = {
     375           0 :       CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
     376           0 :       SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
     377           0 :       SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)};
     378             : 
     379           0 :   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
     380             : }
     381             : 
     382             : static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
     383       25915 :   switch (NumVectorElts) {
     384             :   case 1:
     385             :     return AMDGPU::SReg_32_XM0RegClassID;
     386       16527 :   case 2:
     387             :     return AMDGPU::SReg_64RegClassID;
     388        8634 :   case 4:
     389             :     return AMDGPU::SReg_128RegClassID;
     390         742 :   case 8:
     391             :     return AMDGPU::SReg_256RegClassID;
     392          12 :   case 16:
     393             :     return AMDGPU::SReg_512RegClassID;
     394             :   }
     395             : 
     396           0 :   llvm_unreachable("invalid vector size");
     397             : }
     398             : 
     399           0 : static bool getConstantValue(SDValue N, uint32_t &Out) {
     400             :   if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
     401           0 :     Out = C->getAPIntValue().getZExtValue();
     402           0 :     return true;
     403             :   }
     404             : 
     405             :   if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
     406           0 :     Out = C->getValueAPF().bitcastToAPInt().getZExtValue();
     407           0 :     return true;
     408             :   }
     409             : 
     410             :   return false;
     411             : }
     412             : 
     413       28370 : void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
     414       56740 :   EVT VT = N->getValueType(0);
     415             :   unsigned NumVectorElts = VT.getVectorNumElements();
     416       28370 :   EVT EltVT = VT.getVectorElementType();
     417             :   SDLoc DL(N);
     418       28370 :   SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
     419             : 
     420       28370 :   if (NumVectorElts == 1) {
     421           0 :     CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
     422             :                          RegClass);
     423             :     return;
     424             :   }
     425             : 
     426             :   assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
     427             :                                   "supported yet");
     428             :   // 16 = Max Num Vector Elements
     429             :   // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
     430             :   // 1 = Vector Register Class
     431       28370 :   SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
     432             : 
     433       28370 :   RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
     434             :   bool IsRegSeq = true;
     435       28370 :   unsigned NOps = N->getNumOperands();
     436      111118 :   for (unsigned i = 0; i < NOps; i++) {
     437             :     // XXX: Why is this here?
     438      165496 :     if (isa<RegisterSDNode>(N->getOperand(i))) {
     439             :       IsRegSeq = false;
     440             :       break;
     441             :     }
     442       82748 :     unsigned Sub = AMDGPURegisterInfo::getSubRegFromChannel(i);
     443      165496 :     RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
     444       82748 :     RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32);
     445             :   }
     446       28370 :   if (NOps != NumVectorElts) {
     447             :     // Fill in the missing undef elements if this was a scalar_to_vector.
     448             :     assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
     449           4 :     MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
     450             :                                                    DL, EltVT);
     451           8 :     for (unsigned i = NOps; i < NumVectorElts; ++i) {
     452           4 :       unsigned Sub = AMDGPURegisterInfo::getSubRegFromChannel(i);
     453           4 :       RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
     454           8 :       RegSeqArgs[1 + (2 * i) + 1] =
     455           4 :           CurDAG->getTargetConstant(Sub, DL, MVT::i32);
     456             :     }
     457             :   }
     458             : 
     459       28370 :   if (!IsRegSeq)
     460             :     SelectCode(N);
     461       85110 :   CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
     462             : }
     463             : 
     464      578538 : void AMDGPUDAGToDAGISel::Select(SDNode *N) {
     465      578538 :   unsigned int Opc = N->getOpcode();
     466      578538 :   if (N->isMachineOpcode()) {
     467             :     N->setNodeId(-1);
     468        1569 :     return;   // Already selected.
     469             :   }
     470             : 
     471      576969 :   if (isa<AtomicSDNode>(N) ||
     472             :       (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC ||
     473             :        Opc == AMDGPUISD::ATOMIC_LOAD_FADD ||
     474      575241 :        Opc == AMDGPUISD::ATOMIC_LOAD_FMIN ||
     475             :        Opc == AMDGPUISD::ATOMIC_LOAD_FMAX))
     476        1973 :     N = glueCopyToM0(N);
     477             : 
     478      576969 :   switch (Opc) {
     479             :   default:
     480             :     break;
     481             :   // We are selecting i64 ADD here instead of custom lower it during
     482             :   // DAG legalization, so we can fold some i64 ADDs used for address
     483             :   // calculation into the LOAD and STORE instructions.
     484             :   case ISD::ADDC:
     485             :   case ISD::ADDE:
     486             :   case ISD::SUBC:
     487             :   case ISD::SUBE: {
     488         240 :     if (N->getValueType(0) != MVT::i64)
     489             :       break;
     490             : 
     491         126 :     SelectADD_SUB_I64(N);
     492         126 :     return;
     493             :   }
     494         203 :   case ISD::UADDO:
     495             :   case ISD::USUBO: {
     496         203 :     SelectUADDO_USUBO(N);
     497         203 :     return;
     498             :   }
     499          48 :   case AMDGPUISD::FMUL_W_CHAIN: {
     500          48 :     SelectFMUL_W_CHAIN(N);
     501          48 :     return;
     502             :   }
     503         240 :   case AMDGPUISD::FMA_W_CHAIN: {
     504         240 :     SelectFMA_W_CHAIN(N);
     505         240 :     return;
     506             :   }
     507             : 
     508       26575 :   case ISD::SCALAR_TO_VECTOR:
     509             :   case ISD::BUILD_VECTOR: {
     510       53150 :     EVT VT = N->getValueType(0);
     511             :     unsigned NumVectorElts = VT.getVectorNumElements();
     512       26575 :     if (VT.getScalarSizeInBits() == 16) {
     513         660 :       if (Opc == ISD::BUILD_VECTOR && NumVectorElts == 2) {
     514             :         uint32_t LHSVal, RHSVal;
     515         904 :         if (getConstantValue(N->getOperand(0), LHSVal) &&
     516         246 :             getConstantValue(N->getOperand(1), RHSVal)) {
     517         207 :           uint32_t K = LHSVal | (RHSVal << 16);
     518         207 :           CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, VT,
     519         207 :                                CurDAG->getTargetConstant(K, SDLoc(N), MVT::i32));
     520         207 :           return;
     521             :         }
     522             :       }
     523             : 
     524         453 :       break;
     525             :     }
     526             : 
     527             :     assert(VT.getVectorElementType().bitsEq(MVT::i32));
     528             :     unsigned RegClassID = selectSGPRVectorRegClassID(NumVectorElts);
     529       25915 :     SelectBuildVector(N, RegClassID);
     530       25915 :     return;
     531             :   }
     532        9491 :   case ISD::BUILD_PAIR: {
     533             :     SDValue RC, SubReg0, SubReg1;
     534             :     SDLoc DL(N);
     535        9491 :     if (N->getValueType(0) == MVT::i128) {
     536           0 :       RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32);
     537           0 :       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
     538           0 :       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
     539             :     } else if (N->getValueType(0) == MVT::i64) {
     540        9491 :       RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
     541        9491 :       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
     542        9491 :       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
     543             :     } else {
     544           0 :       llvm_unreachable("Unhandled value type for BUILD_PAIR");
     545             :     }
     546        9491 :     const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
     547       18982 :                             N->getOperand(1), SubReg1 };
     548       28473 :     ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
     549             :                                           N->getValueType(0), Ops));
     550             :     return;
     551             :   }
     552             : 
     553       27612 :   case ISD::Constant:
     554             :   case ISD::ConstantFP: {
     555       55224 :     if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
     556             :       break;
     557             : 
     558             :     uint64_t Imm;
     559             :     if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
     560         231 :       Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
     561             :     else {
     562             :       ConstantSDNode *C = cast<ConstantSDNode>(N);
     563        1140 :       Imm = C->getZExtValue();
     564             :     }
     565             : 
     566             :     SDLoc DL(N);
     567        2434 :     ReplaceNode(N, buildSMovImm64(DL, Imm, N->getValueType(0)));
     568             :     return;
     569             :   }
     570       76930 :   case ISD::LOAD:
     571             :   case ISD::STORE:
     572             :   case ISD::ATOMIC_LOAD:
     573             :   case ISD::ATOMIC_STORE: {
     574       76930 :     N = glueCopyToM0(N);
     575       76930 :     break;
     576             :   }
     577             : 
     578         148 :   case AMDGPUISD::BFE_I32:
     579             :   case AMDGPUISD::BFE_U32: {
     580             :     // There is a scalar version available, but unlike the vector version which
     581             :     // has a separate operand for the offset and width, the scalar version packs
     582             :     // the width and offset into a single operand. Try to move to the scalar
     583             :     // version if the offsets are constant, so that we can try to keep extended
     584             :     // loads of kernel arguments in SGPRs.
     585             : 
     586             :     // TODO: Technically we could try to pattern match scalar bitshifts of
     587             :     // dynamic values, but it's probably not useful.
     588         148 :     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
     589             :     if (!Offset)
     590             :       break;
     591             : 
     592             :     ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
     593             :     if (!Width)
     594             :       break;
     595             : 
     596             :     bool Signed = Opc == AMDGPUISD::BFE_I32;
     597             : 
     598         132 :     uint32_t OffsetVal = Offset->getZExtValue();
     599         264 :     uint32_t WidthVal = Width->getZExtValue();
     600             : 
     601         256 :     ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
     602         132 :                             SDLoc(N), N->getOperand(0), OffsetVal, WidthVal));
     603         132 :     return;
     604             :   }
     605         277 :   case AMDGPUISD::DIV_SCALE: {
     606         277 :     SelectDIV_SCALE(N);
     607         277 :     return;
     608             :   }
     609          24 :   case AMDGPUISD::MAD_I64_I32:
     610             :   case AMDGPUISD::MAD_U64_U32: {
     611          24 :     SelectMAD_64_32(N);
     612          24 :     return;
     613             :   }
     614       17057 :   case ISD::CopyToReg: {
     615             :     const SITargetLowering& Lowering =
     616       17057 :       *static_cast<const SITargetLowering*>(getTargetLowering());
     617       17057 :     N = Lowering.legalizeTargetIndependentNode(N, *CurDAG);
     618       17057 :     break;
     619             :   }
     620             :   case ISD::AND:
     621             :   case ISD::SRL:
     622             :   case ISD::SRA:
     623             :   case ISD::SIGN_EXTEND_INREG:
     624       28276 :     if (N->getValueType(0) != MVT::i32)
     625             :       break;
     626             : 
     627       21466 :     SelectS_BFE(N);
     628       21466 :     return;
     629         576 :   case ISD::BRCOND:
     630         576 :     SelectBRCOND(N);
     631         576 :     return;
     632        2666 :   case ISD::FMAD:
     633             :   case ISD::FMA:
     634        2666 :     SelectFMAD_FMA(N);
     635        2666 :     return;
     636         197 :   case AMDGPUISD::ATOMIC_CMP_SWAP:
     637         197 :     SelectATOMIC_CMP_SWAP(N);
     638         197 :     return;
     639             :   case AMDGPUISD::CVT_PKRTZ_F16_F32:
     640             :   case AMDGPUISD::CVT_PKNORM_I16_F32:
     641             :   case AMDGPUISD::CVT_PKNORM_U16_F32:
     642             :   case AMDGPUISD::CVT_PK_U16_U32:
     643             :   case AMDGPUISD::CVT_PK_I16_I32: {
     644             :     // Hack around using a legal type if f16 is illegal.
     645         185 :     if (N->getValueType(0) == MVT::i32) {
     646          84 :       MVT NewVT = Opc == AMDGPUISD::CVT_PKRTZ_F16_F32 ? MVT::v2f16 : MVT::v2i16;
     647          84 :       N = CurDAG->MorphNodeTo(N, N->getOpcode(), CurDAG->getVTList(NewVT),
     648          84 :                               { N->getOperand(0), N->getOperand(1) });
     649             :       SelectCode(N);
     650             :       return;
     651             :     }
     652             :   }
     653             :   }
     654             : 
     655             :   SelectCode(N);
     656             : }
     657             : 
     658           0 : bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
     659           0 :   const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
     660           0 :   const Instruction *Term = BB->getTerminator();
     661           0 :   return Term->getMetadata("amdgpu.uniform") ||
     662           0 :          Term->getMetadata("structurizecfg.uniform");
     663             : }
     664             : 
     665           2 : StringRef AMDGPUDAGToDAGISel::getPassName() const {
     666           2 :   return "AMDGPU DAG->DAG Pattern Instruction Selection";
     667             : }
     668             : 
     669             : //===----------------------------------------------------------------------===//
     670             : // Complex Patterns
     671             : //===----------------------------------------------------------------------===//
     672             : 
     673           0 : bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
     674             :                                             SDValue &Offset) {
     675           0 :   return false;
     676             : }
     677             : 
     678           0 : bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
     679             :                                             SDValue &Offset) {
     680             :   ConstantSDNode *C;
     681             :   SDLoc DL(Addr);
     682             : 
     683             :   if ((C = dyn_cast<ConstantSDNode>(Addr))) {
     684           0 :     Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
     685           0 :     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
     686           0 :   } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
     687             :              (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
     688           0 :     Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
     689           0 :     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
     690           0 :   } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
     691             :             (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
     692           0 :     Base = Addr.getOperand(0);
     693           0 :     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
     694             :   } else {
     695           0 :     Base = Addr;
     696           0 :     Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
     697             :   }
     698             : 
     699           0 :   return true;
     700             : }
     701             : 
     702             : // FIXME: Should only handle addcarry/subcarry
     703         126 : void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
     704             :   SDLoc DL(N);
     705         126 :   SDValue LHS = N->getOperand(0);
     706         126 :   SDValue RHS = N->getOperand(1);
     707             : 
     708         126 :   unsigned Opcode = N->getOpcode();
     709         126 :   bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE);
     710             :   bool ProduceCarry =
     711         126 :       ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC;
     712         126 :   bool IsAdd = Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE;
     713             : 
     714         126 :   SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
     715         126 :   SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
     716             : 
     717         252 :   SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
     718             :                                        DL, MVT::i32, LHS, Sub0);
     719         252 :   SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
     720             :                                        DL, MVT::i32, LHS, Sub1);
     721             : 
     722         252 :   SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
     723             :                                        DL, MVT::i32, RHS, Sub0);
     724         252 :   SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
     725             :                                        DL, MVT::i32, RHS, Sub1);
     726             : 
     727         252 :   SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
     728             : 
     729         126 :   unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
     730         126 :   unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
     731             : 
     732             :   SDNode *AddLo;
     733         126 :   if (!ConsumeCarry) {
     734             :     SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
     735         240 :     AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args);
     736             :   } else {
     737           6 :     SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) };
     738          12 :     AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args);
     739             :   }
     740             :   SDValue AddHiArgs[] = {
     741             :     SDValue(Hi0, 0),
     742             :     SDValue(Hi1, 0),
     743             :     SDValue(AddLo, 1)
     744             :   };
     745         252 :   SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs);
     746             : 
     747             :   SDValue RegSequenceArgs[] = {
     748         126 :     CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
     749             :     SDValue(AddLo,0),
     750             :     Sub0,
     751             :     SDValue(AddHi,0),
     752             :     Sub1,
     753         126 :   };
     754         252 :   SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
     755             :                                                MVT::i64, RegSequenceArgs);
     756             : 
     757         126 :   if (ProduceCarry) {
     758             :     // Replace the carry-use
     759         252 :     ReplaceUses(SDValue(N, 1), SDValue(AddHi, 1));
     760             :   }
     761             : 
     762             :   // Replace the remaining uses.
     763         126 :   ReplaceNode(N, RegSequence);
     764         126 : }
     765             : 
     766           0 : void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
     767             :   // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
     768             :   // carry out despite the _i32 name. These were renamed in VI to _U32.
     769             :   // FIXME: We should probably rename the opcodes here.
     770           0 :   unsigned Opc = N->getOpcode() == ISD::UADDO ?
     771             :     AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
     772             : 
     773           0 :   CurDAG->SelectNodeTo(N, Opc, N->getVTList(),
     774           0 :                        { N->getOperand(0), N->getOperand(1) });
     775           0 : }
     776             : 
     777         240 : void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
     778             :   SDLoc SL(N);
     779             :   //  src0_modifiers, src0,  src1_modifiers, src1, src2_modifiers, src2, clamp, omod
     780         240 :   SDValue Ops[10];
     781             : 
     782         480 :   SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
     783         480 :   SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
     784         480 :   SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]);
     785         240 :   Ops[8] = N->getOperand(0);
     786         240 :   Ops[9] = N->getOperand(4);
     787             : 
     788         720 :   CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32, N->getVTList(), Ops);
     789         240 : }
     790             : 
     791          48 : void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {
     792             :   SDLoc SL(N);
     793             :   //    src0_modifiers, src0,  src1_modifiers, src1, clamp, omod
     794          48 :   SDValue Ops[8];
     795             : 
     796          96 :   SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
     797          96 :   SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
     798          48 :   Ops[6] = N->getOperand(0);
     799          48 :   Ops[7] = N->getOperand(3);
     800             : 
     801         144 :   CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops);
     802          48 : }
     803             : 
     804             : // We need to handle this here because tablegen doesn't support matching
     805             : // instructions with multiple outputs.
     806           0 : void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
     807             :   SDLoc SL(N);
     808           0 :   EVT VT = N->getValueType(0);
     809             : 
     810             :   assert(VT == MVT::f32 || VT == MVT::f64);
     811             : 
     812             :   unsigned Opc
     813           0 :     = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
     814             : 
     815           0 :   SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
     816           0 :   CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
     817           0 : }
     818             : 
     819             : // We need to handle this here because tablegen doesn't support matching
     820             : // instructions with multiple outputs.
     821           0 : void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
     822             :   SDLoc SL(N);
     823           0 :   bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32;
     824           0 :   unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32 : AMDGPU::V_MAD_U64_U32;
     825             : 
     826           0 :   SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
     827           0 :   SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
     828           0 :                     Clamp };
     829           0 :   CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
     830           0 : }
     831             : 
     832           0 : bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
     833             :                                          unsigned OffsetBits) const {
     834           0 :   if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
     835           0 :       (OffsetBits == 8 && !isUInt<8>(Offset)))
     836           0 :     return false;
     837             : 
     838           0 :   if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS ||
     839           0 :       Subtarget->unsafeDSOffsetFoldingEnabled())
     840           0 :     return true;
     841             : 
     842             :   // On Southern Islands instruction with a negative base value and an offset
     843             :   // don't seem to work.
     844           0 :   return CurDAG->SignBitIsZero(Base);
     845             : }
     846             : 
     847       10974 : bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
     848             :                                               SDValue &Offset) const {
     849             :   SDLoc DL(Addr);
     850       10974 :   if (CurDAG->isBaseWithConstantOffset(Addr)) {
     851        7592 :     SDValue N0 = Addr.getOperand(0);
     852        7592 :     SDValue N1 = Addr.getOperand(1);
     853             :     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
     854       15184 :     if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) {
     855             :       // (add n0, c0)
     856        7145 :       Base = N0;
     857        7145 :       Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
     858        7145 :       return true;
     859             :     }
     860        3382 :   } else if (Addr.getOpcode() == ISD::SUB) {
     861             :     // sub C, x -> add (sub 0, x), C
     862             :     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
     863          18 :       int64_t ByteOffset = C->getSExtValue();
     864          18 :       if (isUInt<16>(ByteOffset)) {
     865          16 :         SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
     866             : 
     867             :         // XXX - This is kind of hacky. Create a dummy sub node so we can check
     868             :         // the known bits in isDSOffsetLegal. We need to emit the selected node
     869             :         // here, so this is thrown away.
     870          16 :         SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
     871          16 :                                       Zero, Addr.getOperand(1));
     872             : 
     873          16 :         if (isDSOffsetLegal(Sub, ByteOffset, 16)) {
     874             :           // FIXME: Select to VOP3 version for with-carry.
     875          14 :           unsigned SubOp = Subtarget->hasAddNoCarry() ?
     876             :             AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32;
     877             : 
     878             :           MachineSDNode *MachineSub
     879          28 :             = CurDAG->getMachineNode(SubOp, DL, MVT::i32,
     880             :                                      Zero, Addr.getOperand(1));
     881             : 
     882          14 :           Base = SDValue(MachineSub, 0);
     883          14 :           Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
     884          14 :           return true;
     885             :         }
     886             :       }
     887             :     }
     888             :   } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
     889             :     // If we have a constant address, prefer to put the constant into the
     890             :     // offset. This can save moves to load the constant address since multiple
     891             :     // operations can share the zero base address register, and enables merging
     892             :     // into read2 / write2 instructions.
     893             : 
     894             :     SDLoc DL(Addr);
     895             : 
     896        1492 :     if (isUInt<16>(CAddr->getZExtValue())) {
     897         742 :       SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
     898        1484 :       MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
     899             :                                  DL, MVT::i32, Zero);
     900         742 :       Base = SDValue(MovZero, 0);
     901        1484 :       Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
     902             :       return true;
     903             :     }
     904             :   }
     905             : 
     906             :   // default case
     907        3073 :   Base = Addr;
     908        6146 :   Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
     909        3073 :   return true;
     910             : }
     911             : 
     912             : // TODO: If offset is too big, put low 16-bit into offset.
     913         594 : bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
     914             :                                                    SDValue &Offset0,
     915             :                                                    SDValue &Offset1) const {
     916             :   SDLoc DL(Addr);
     917             : 
     918         594 :   if (CurDAG->isBaseWithConstantOffset(Addr)) {
     919         461 :     SDValue N0 = Addr.getOperand(0);
     920         461 :     SDValue N1 = Addr.getOperand(1);
     921             :     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
     922         461 :     unsigned DWordOffset0 = C1->getZExtValue() / 4;
     923         461 :     unsigned DWordOffset1 = DWordOffset0 + 1;
     924             :     // (add n0, c0)
     925         461 :     if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
     926         446 :       Base = N0;
     927         446 :       Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
     928         446 :       Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
     929         446 :       return true;
     930             :     }
     931         133 :   } else if (Addr.getOpcode() == ISD::SUB) {
     932             :     // sub C, x -> add (sub 0, x), C
     933             :     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
     934           4 :       unsigned DWordOffset0 = C->getZExtValue() / 4;
     935           4 :       unsigned DWordOffset1 = DWordOffset0 + 1;
     936             : 
     937           4 :       if (isUInt<8>(DWordOffset0)) {
     938             :         SDLoc DL(Addr);
     939           4 :         SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
     940             : 
     941             :         // XXX - This is kind of hacky. Create a dummy sub node so we can check
     942             :         // the known bits in isDSOffsetLegal. We need to emit the selected node
     943             :         // here, so this is thrown away.
     944           4 :         SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
     945           4 :                                       Zero, Addr.getOperand(1));
     946             : 
     947           4 :         if (isDSOffsetLegal(Sub, DWordOffset1, 8)) {
     948           2 :           unsigned SubOp = Subtarget->hasAddNoCarry() ?
     949             :             AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32;
     950             : 
     951             :           MachineSDNode *MachineSub
     952           4 :             = CurDAG->getMachineNode(SubOp, DL, MVT::i32,
     953             :                                      Zero, Addr.getOperand(1));
     954             : 
     955           2 :           Base = SDValue(MachineSub, 0);
     956           2 :           Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
     957           2 :           Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
     958             :           return true;
     959             :         }
     960             :       }
     961             :     }
     962             :   } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
     963          24 :     unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
     964          24 :     unsigned DWordOffset1 = DWordOffset0 + 1;
     965             :     assert(4 * DWordOffset0 == CAddr->getZExtValue());
     966             : 
     967          24 :     if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
     968          16 :       SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
     969             :       MachineSDNode *MovZero
     970          32 :         = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
     971             :                                  DL, MVT::i32, Zero);
     972          16 :       Base = SDValue(MovZero, 0);
     973          16 :       Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
     974          16 :       Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
     975             :       return true;
     976             :     }
     977             :   }
     978             : 
     979             :   // default case
     980             : 
     981             :   // FIXME: This is broken on SI where we still need to check if the base
     982             :   // pointer is positive here.
     983         130 :   Base = Addr;
     984         130 :   Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
     985         130 :   Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
     986         130 :   return true;
     987             : }
     988             : 
     989       45247 : bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
     990             :                                      SDValue &VAddr, SDValue &SOffset,
     991             :                                      SDValue &Offset, SDValue &Offen,
     992             :                                      SDValue &Idxen, SDValue &Addr64,
     993             :                                      SDValue &GLC, SDValue &SLC,
     994             :                                      SDValue &TFE) const {
     995             :   // Subtarget prefers to use flat instruction
     996       45247 :   if (Subtarget->useFlatForGlobal())
     997             :     return false;
     998             : 
     999             :   SDLoc DL(Addr);
    1000             : 
    1001       31369 :   if (!GLC.getNode())
    1002       31369 :     GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
    1003       31369 :   if (!SLC.getNode())
    1004       31150 :     SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
    1005       31369 :   TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
    1006             : 
    1007       31369 :   Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
    1008       31369 :   Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
    1009       31369 :   Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
    1010       31369 :   SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
    1011             : 
    1012             :   ConstantSDNode *C1 = nullptr;
    1013       31369 :   SDValue N0 = Addr;
    1014       31369 :   if (CurDAG->isBaseWithConstantOffset(Addr)) {
    1015             :     C1 = cast<ConstantSDNode>(Addr.getOperand(1));
    1016       17800 :     if (isUInt<32>(C1->getZExtValue()))
    1017        8890 :       N0 = Addr.getOperand(0);
    1018             :     else
    1019             :       C1 = nullptr;
    1020             :   }
    1021             : 
    1022       31369 :   if (N0.getOpcode() == ISD::ADD) {
    1023             :     // (add N2, N3) -> addr64, or
    1024             :     // (add (add N2, N3), C1) -> addr64
    1025        4534 :     SDValue N2 = N0.getOperand(0);
    1026        4534 :     SDValue N3 = N0.getOperand(1);
    1027        4534 :     Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
    1028             : 
    1029        4534 :     if (N2->isDivergent()) {
    1030          14 :       if (N3->isDivergent()) {
    1031             :         // Both N2 and N3 are divergent. Use N0 (the result of the add) as the
    1032             :         // addr64, and construct the resource from a 0 address.
    1033           5 :         Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
    1034           5 :         VAddr = N0;
    1035             :       } else {
    1036             :         // N2 is divergent, N3 is not.
    1037           9 :         Ptr = N3;
    1038           9 :         VAddr = N2;
    1039             :       }
    1040             :     } else {
    1041             :       // N2 is not divergent.
    1042        4520 :       Ptr = N2;
    1043        4520 :       VAddr = N3;
    1044             :     }
    1045        4534 :     Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
    1046       26835 :   } else if (N0->isDivergent()) {
    1047             :     // N0 is divergent. Use it as the addr64, and construct the resource from a
    1048             :     // 0 address.
    1049          48 :     Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
    1050          48 :     VAddr = N0;
    1051          48 :     Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
    1052             :   } else {
    1053             :     // N0 -> offset, or
    1054             :     // (N0 + C1) -> offset
    1055       26787 :     VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
    1056       26787 :     Ptr = N0;
    1057             :   }
    1058             : 
    1059       31369 :   if (!C1) {
    1060             :     // No offset.
    1061       22479 :     Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
    1062       22479 :     return true;
    1063             :   }
    1064             : 
    1065       17780 :   if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) {
    1066             :     // Legal offset for instruction.
    1067        8605 :     Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
    1068        8605 :     return true;
    1069             :   }
    1070             : 
    1071             :   // Illegal offset, store it in soffset.
    1072         285 :   Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
    1073         285 :   SOffset =
    1074         855 :       SDValue(CurDAG->getMachineNode(
    1075             :                   AMDGPU::S_MOV_B32, DL, MVT::i32,
    1076             :                   CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
    1077             :               0);
    1078         285 :   return true;
    1079             : }
    1080             : 
    1081       30954 : bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
    1082             :                                            SDValue &VAddr, SDValue &SOffset,
    1083             :                                            SDValue &Offset, SDValue &GLC,
    1084             :                                            SDValue &SLC, SDValue &TFE) const {
    1085       30954 :   SDValue Ptr, Offen, Idxen, Addr64;
    1086             : 
    1087             :   // addr64 bit was removed for volcanic islands.
    1088       30954 :   if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
    1089             :     return false;
    1090             : 
    1091       16184 :   if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
    1092             :               GLC, SLC, TFE))
    1093             :     return false;
    1094             : 
    1095             :   ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
    1096       26290 :   if (C->getSExtValue()) {
    1097             :     SDLoc DL(Addr);
    1098             : 
    1099             :     const SITargetLowering& Lowering =
    1100        3633 :       *static_cast<const SITargetLowering*>(getTargetLowering());
    1101             : 
    1102        3633 :     SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
    1103             :     return true;
    1104             :   }
    1105             : 
    1106             :   return false;
    1107             : }
    1108             : 
    1109         617 : bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
    1110             :                                            SDValue &VAddr, SDValue &SOffset,
    1111             :                                            SDValue &Offset,
    1112             :                                            SDValue &SLC) const {
    1113        1234 :   SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
    1114         617 :   SDValue GLC, TFE;
    1115             : 
    1116         617 :   return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE);
    1117             : }
    1118             : 
    1119             : static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
    1120             :   auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>();
    1121           0 :   return PSV && PSV->isStack();
    1122             : }
    1123             : 
    1124           0 : std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
    1125           0 :   const MachineFunction &MF = CurDAG->getMachineFunction();
    1126             :   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
    1127             : 
    1128             :   if (auto FI = dyn_cast<FrameIndexSDNode>(N)) {
    1129           0 :     SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
    1130           0 :                                               FI->getValueType(0));
    1131             : 
    1132             :     // If we can resolve this to a frame index access, this is relative to the
    1133             :     // frame pointer SGPR.
    1134           0 :     return std::make_pair(TFI, CurDAG->getRegister(Info->getFrameOffsetReg(),
    1135           0 :                                                    MVT::i32));
    1136             :   }
    1137             : 
    1138             :   // If we don't know this private access is a local stack object, it needs to
    1139             :   // be relative to the entry point's scratch wave offset register.
    1140           0 :   return std::make_pair(N, CurDAG->getRegister(Info->getScratchWaveOffsetReg(),
    1141           0 :                                                MVT::i32));
    1142             : }
    1143             : 
    1144        6105 : bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
    1145             :                                                  SDValue Addr, SDValue &Rsrc,
    1146             :                                                  SDValue &VAddr, SDValue &SOffset,
    1147             :                                                  SDValue &ImmOffset) const {
    1148             : 
    1149             :   SDLoc DL(Addr);
    1150        6105 :   MachineFunction &MF = CurDAG->getMachineFunction();
    1151        6105 :   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
    1152             : 
    1153       12210 :   Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
    1154             : 
    1155             :   if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
    1156           6 :     unsigned Imm = CAddr->getZExtValue();
    1157             : 
    1158           6 :     SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32);
    1159          12 :     MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
    1160             :                                                         DL, MVT::i32, HighBits);
    1161           6 :     VAddr = SDValue(MovHighBits, 0);
    1162             : 
    1163             :     // In a call sequence, stores to the argument stack area are relative to the
    1164             :     // stack pointer.
    1165           6 :     const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
    1166             :     unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
    1167           6 :       Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg();
    1168             : 
    1169          12 :     SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
    1170           6 :     ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
    1171             :     return true;
    1172             :   }
    1173             : 
    1174        6099 :   if (CurDAG->isBaseWithConstantOffset(Addr)) {
    1175             :     // (add n0, c1)
    1176             : 
    1177        4888 :     SDValue N0 = Addr.getOperand(0);
    1178        4888 :     SDValue N1 = Addr.getOperand(1);
    1179             : 
    1180             :     // Offsets in vaddr must be positive if range checking is enabled.
    1181             :     //
    1182             :     // The total computation of vaddr + soffset + offset must not overflow.  If
    1183             :     // vaddr is negative, even if offset is 0 the sgpr offset add will end up
    1184             :     // overflowing.
    1185             :     //
    1186             :     // Prior to gfx9, MUBUF instructions with the vaddr offset enabled would
    1187             :     // always perform a range check. If a negative vaddr base index was used,
    1188             :     // this would fail the range check. The overall address computation would
    1189             :     // compute a valid address, but this doesn't happen due to the range
    1190             :     // check. For out-of-bounds MUBUF loads, a 0 is returned.
    1191             :     //
    1192             :     // Therefore it should be safe to fold any VGPR offset on gfx9 into the
    1193             :     // MUBUF vaddr, but not on older subtargets which can only do this if the
    1194             :     // sign bit is known 0.
    1195             :     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
    1196        9776 :     if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue()) &&
    1197        8542 :         (!Subtarget->privateMemoryResourceIsRangeChecked() ||
    1198        3668 :          CurDAG->SignBitIsZero(N0))) {
    1199        4271 :       std::tie(VAddr, SOffset) = foldFrameIndex(N0);
    1200        4271 :       ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
    1201        4271 :       return true;
    1202             :     }
    1203             :   }
    1204             : 
    1205             :   // (node)
    1206        1828 :   std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
    1207        1828 :   ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
    1208        1828 :   return true;
    1209             : }
    1210             : 
    1211           0 : bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
    1212             :                                                   SDValue Addr,
    1213             :                                                   SDValue &SRsrc,
    1214             :                                                   SDValue &SOffset,
    1215             :                                                   SDValue &Offset) const {
    1216             :   ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr);
    1217           0 :   if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue()))
    1218           0 :     return false;
    1219             : 
    1220           0 :   SDLoc DL(Addr);
    1221           0 :   MachineFunction &MF = CurDAG->getMachineFunction();
    1222           0 :   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
    1223             : 
    1224           0 :   SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
    1225             : 
    1226           0 :   const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
    1227             :   unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
    1228           0 :     Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg();
    1229             : 
    1230             :   // FIXME: Get from MachinePointerInfo? We should only be using the frame
    1231             :   // offset if we know this is in a call sequence.
    1232           0 :   SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
    1233             : 
    1234           0 :   Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
    1235             :   return true;
    1236             : }
    1237             : 
    1238       29063 : bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
    1239             :                                            SDValue &SOffset, SDValue &Offset,
    1240             :                                            SDValue &GLC, SDValue &SLC,
    1241             :                                            SDValue &TFE) const {
    1242       29063 :   SDValue Ptr, VAddr, Offen, Idxen, Addr64;
    1243             :   const SIInstrInfo *TII =
    1244       29063 :     static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
    1245             : 
    1246       29063 :   if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
    1247             :               GLC, SLC, TFE))
    1248             :     return false;
    1249             : 
    1250       36448 :   if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
    1251       36448 :       !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
    1252       18224 :       !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
    1253       17275 :     uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
    1254       17275 :                     APInt::getAllOnesValue(32).getZExtValue(); // Size
    1255             :     SDLoc DL(Addr);
    1256             : 
    1257             :     const SITargetLowering& Lowering =
    1258       17275 :       *static_cast<const SITargetLowering*>(getTargetLowering());
    1259             : 
    1260       17275 :     SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
    1261             :     return true;
    1262             :   }
    1263             :   return false;
    1264             : }
    1265             : 
    1266           8 : bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
    1267             :                                            SDValue &Soffset, SDValue &Offset
    1268             :                                            ) const {
    1269           8 :   SDValue GLC, SLC, TFE;
    1270             : 
    1271           8 :   return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
    1272             : }
    1273             : bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
    1274             :                                            SDValue &Soffset, SDValue &Offset,
    1275             :                                            SDValue &SLC) const {
    1276         552 :   SDValue GLC, TFE;
    1277             : 
    1278         552 :   return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
    1279             : }
    1280             : 
    1281             : template <bool IsSigned>
    1282           0 : bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDValue Addr,
    1283             :                                           SDValue &VAddr,
    1284             :                                           SDValue &Offset,
    1285             :                                           SDValue &SLC) const {
    1286             :   int64_t OffsetVal = 0;
    1287             : 
    1288           0 :   if (Subtarget->hasFlatInstOffsets() &&
    1289           0 :       CurDAG->isBaseWithConstantOffset(Addr)) {
    1290           0 :     SDValue N0 = Addr.getOperand(0);
    1291           0 :     SDValue N1 = Addr.getOperand(1);
    1292           0 :     int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
    1293             : 
    1294           0 :     if ((IsSigned && isInt<13>(COffsetVal)) ||
    1295           0 :         (!IsSigned && isUInt<12>(COffsetVal))) {
    1296           0 :       Addr = N0;
    1297             :       OffsetVal = COffsetVal;
    1298             :     }
    1299             :   }
    1300             : 
    1301           0 :   VAddr = Addr;
    1302           0 :   Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
    1303           0 :   SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1);
    1304             : 
    1305           0 :   return true;
    1306             : }
    1307           0 : 
    1308             : bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDValue Addr,
    1309             :                                           SDValue &VAddr,
    1310             :                                           SDValue &Offset,
    1311             :                                           SDValue &SLC) const {
    1312             :   return SelectFlatOffset<false>(Addr, VAddr, Offset, SLC);
    1313           0 : }
    1314           0 : 
    1315           0 : bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDValue Addr,
    1316           0 :                                           SDValue &VAddr,
    1317           0 :                                           SDValue &Offset,
    1318             :                                           SDValue &SLC) const {
    1319           0 :   return SelectFlatOffset<true>(Addr, VAddr, Offset, SLC);
    1320           0 : }
    1321           0 : 
    1322             : bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
    1323             :                                           SDValue &Offset, bool &Imm) const {
    1324             : 
    1325             :   // FIXME: Handle non-constant offsets.
    1326           0 :   ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
    1327           0 :   if (!C)
    1328           0 :     return false;
    1329             : 
    1330           0 :   SDLoc SL(ByteOffsetNode);
    1331             :   GCNSubtarget::Generation Gen = Subtarget->getGeneration();
    1332           0 :   int64_t ByteOffset = C->getSExtValue();
    1333             :   int64_t EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset);
    1334             : 
    1335             :   if (AMDGPU::isLegalSMRDImmOffset(*Subtarget, ByteOffset)) {
    1336             :     Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
    1337             :     Imm = true;
    1338           0 :     return true;
    1339           0 :   }
    1340           0 : 
    1341           0 :   if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset))
    1342           0 :     return false;
    1343             : 
    1344           0 :   if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) {
    1345             :     // 32-bit Immediates are supported on Sea Islands.
    1346           0 :     Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
    1347             :   } else {
    1348             :     SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
    1349             :     Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32,
    1350             :                                             C32Bit), 0);
    1351           0 :   }
    1352           0 :   Imm = false;
    1353           0 :   return true;
    1354             : }
    1355           0 : 
    1356             : SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {
    1357             :   if (Addr.getValueType() != MVT::i32)
    1358             :     return Addr;
    1359             : 
    1360             :   // Zero-extend a 32-bit address.
    1361             :   SDLoc SL(Addr);
    1362        1004 : 
    1363             :   const MachineFunction &MF = CurDAG->getMachineFunction();
    1364             :   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
    1365             :   unsigned AddrHiVal = Info->get32BitAddressHighBits();
    1366             :   SDValue AddrHi = CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32);
    1367             : 
    1368             :   const SDValue Ops[] = {
    1369         213 :     CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32),
    1370             :     Addr,
    1371             :     CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
    1372           0 :     SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi),
    1373             :             0),
    1374             :     CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32),
    1375             :   };
    1376             : 
    1377             :   return SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64,
    1378           0 :                                         Ops), 0);
    1379             : }
    1380           0 : 
    1381           0 : bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
    1382           0 :                                      SDValue &Offset, bool &Imm) const {
    1383           0 :   SDLoc SL(Addr);
    1384             : 
    1385           0 :   // A 32-bit (address + offset) should not cause unsigned 32-bit integer
    1386           0 :   // wraparound, because s_load instructions perform the addition in 64 bits.
    1387           0 :   if ((Addr.getValueType() != MVT::i32 ||
    1388           0 :        Addr->getFlags().hasNoUnsignedWrap()) &&
    1389             :       CurDAG->isBaseWithConstantOffset(Addr)) {
    1390             :     SDValue N0 = Addr.getOperand(0);
    1391           0 :     SDValue N1 = Addr.getOperand(1);
    1392           0 : 
    1393             :     if (SelectSMRDOffset(N1, Offset, Imm)) {
    1394           0 :       SBase = Expand32BitAddress(N0);
    1395             :       return true;
    1396           0 :     }
    1397             :   }
    1398           0 :   SBase = Expand32BitAddress(Addr);
    1399           0 :   Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
    1400             :   Imm = true;
    1401             :   return true;
    1402           0 : }
    1403           0 : 
    1404             : bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
    1405             :                                        SDValue &Offset) const {
    1406           0 :   bool Imm;
    1407           0 :   return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
    1408           0 : }
    1409             : 
    1410             : bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
    1411           0 :                                          SDValue &Offset) const {
    1412             : 
    1413           0 :   if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
    1414             :     return false;
    1415           0 : 
    1416           0 :   bool Imm;
    1417             :   if (!SelectSMRD(Addr, SBase, Offset, Imm))
    1418             :     return false;
    1419           0 : 
    1420             :   return !Imm && isa<ConstantSDNode>(Offset);
    1421           0 : }
    1422           0 : 
    1423             : bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
    1424           0 :                                         SDValue &Offset) const {
    1425           0 :   bool Imm;
    1426             :   return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
    1427           0 :          !isa<ConstantSDNode>(Offset);
    1428           0 : }
    1429             : 
    1430             : bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
    1431       26722 :                                              SDValue &Offset) const {
    1432             :   bool Imm;
    1433             :   return SelectSMRDOffset(Addr, Offset, Imm) && Imm;
    1434             : }
    1435             : 
    1436             : bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
    1437         118 :                                                SDValue &Offset) const {
    1438       26770 :   if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
    1439       26652 :     return false;
    1440       22750 : 
    1441       22750 :   bool Imm;
    1442             :   if (!SelectSMRDOffset(Addr, Offset, Imm))
    1443       22750 :     return false;
    1444       22738 : 
    1445       22738 :   return !Imm && isa<ConstantSDNode>(Offset);
    1446             : }
    1447             : 
    1448        3984 : bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
    1449        3984 :                                             SDValue &Base,
    1450        3984 :                                             SDValue &Offset) const {
    1451        3984 :   SDLoc DL(Index);
    1452             : 
    1453             :   if (CurDAG->isBaseWithConstantOffset(Index)) {
    1454             :     SDValue N0 = Index.getOperand(0);
    1455             :     SDValue N1 = Index.getOperand(1);
    1456             :     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
    1457       26695 : 
    1458             :     // (add n0, c0)
    1459             :     Base = N0;
    1460           9 :     Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
    1461             :     return true;
    1462             :   }
    1463           9 : 
    1464             :   if (isa<ConstantSDNode>(Index))
    1465             :     return false;
    1466             : 
    1467           9 :   Base = Index;
    1468             :   Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
    1469             :   return true;
    1470           9 : }
    1471             : 
    1472             : SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL,
    1473          18 :                                      SDValue Val, uint32_t Offset,
    1474             :                                      uint32_t Width) {
    1475             :   // Transformation function, pack the offset and width of a BFE into
    1476          18 :   // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
    1477          18 :   // source, bits [5:0] contain the offset and bits [22:16] the width.
    1478             :   uint32_t PackedVal = Offset | (Width << 16);
    1479             :   SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
    1480           0 : 
    1481             :   return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
    1482             : }
    1483         508 : 
    1484             : void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
    1485             :   // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
    1486           0 :   // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
    1487             :   // Predicate: 0 < b <= c < 32
    1488           0 : 
    1489           0 :   const SDValue &Shl = N->getOperand(0);
    1490             :   ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
    1491             :   ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
    1492           0 : 
    1493           0 :   if (B && C) {
    1494             :     uint32_t BVal = B->getZExtValue();
    1495           0 :     uint32_t CVal = C->getZExtValue();
    1496             : 
    1497             :     if (0 < BVal && BVal <= CVal && CVal < 32) {
    1498           0 :       bool Signed = N->getOpcode() == ISD::SRA;
    1499             :       unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
    1500             : 
    1501           0 :       ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal,
    1502             :                               32 - CVal));
    1503           0 :       return;
    1504           0 :     }
    1505           0 :   }
    1506             :   SelectCode(N);
    1507             : }
    1508             : 
    1509           0 : void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
    1510           0 :   switch (N->getOpcode()) {
    1511           0 :   case ISD::AND:
    1512             :     if (N->getOperand(0).getOpcode() == ISD::SRL) {
    1513             :       // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
    1514             :       // Predicate: isMask(mask)
    1515           0 :       const SDValue &Srl = N->getOperand(0);
    1516             :       ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
    1517           0 :       ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
    1518           0 : 
    1519           0 :       if (Shift && Mask) {
    1520             :         uint32_t ShiftVal = Shift->getZExtValue();
    1521             :         uint32_t MaskVal = Mask->getZExtValue();
    1522           0 : 
    1523             :         if (isMask_32(MaskVal)) {
    1524             :           uint32_t WidthVal = countPopulation(MaskVal);
    1525             : 
    1526             :           ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
    1527             :                                   Srl.getOperand(0), ShiftVal, WidthVal));
    1528           0 :           return;
    1529           0 :         }
    1530             :       }
    1531           0 :     }
    1532             :     break;
    1533             :   case ISD::SRL:
    1534         411 :     if (N->getOperand(0).getOpcode() == ISD::AND) {
    1535             :       // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
    1536             :       // Predicate: isMask(mask >> b)
    1537             :       const SDValue &And = N->getOperand(0);
    1538             :       ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
    1539         411 :       ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
    1540         411 : 
    1541             :       if (Shift && Mask) {
    1542             :         uint32_t ShiftVal = Shift->getZExtValue();
    1543         411 :         uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
    1544         391 : 
    1545         391 :         if (isMask_32(MaskVal)) {
    1546             :           uint32_t WidthVal = countPopulation(MaskVal);
    1547         391 : 
    1548         380 :           ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
    1549         380 :                                   And.getOperand(0), ShiftVal, WidthVal));
    1550             :           return;
    1551         380 :         }
    1552             :       }
    1553         380 :     } else if (N->getOperand(0).getOpcode() == ISD::SHL) {
    1554             :       SelectS_BFEFromShifts(N);
    1555             :       return;
    1556             :     }
    1557             :     break;
    1558             :   case ISD::SRA:
    1559       21466 :     if (N->getOperand(0).getOpcode() == ISD::SHL) {
    1560       42932 :       SelectS_BFEFromShifts(N);
    1561        7491 :       return;
    1562       14982 :     }
    1563             :     break;
    1564             : 
    1565             :   case ISD::SIGN_EXTEND_INREG: {
    1566             :     // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
    1567             :     SDValue Src = N->getOperand(0);
    1568             :     if (Src.getOpcode() != ISD::SRL)
    1569        2336 :       break;
    1570        2321 : 
    1571        4642 :     const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
    1572             :     if (!Amt)
    1573             :       break;
    1574             : 
    1575             :     unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
    1576        2143 :     ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0),
    1577             :                             Amt->getZExtValue(), Width));
    1578        2143 :     return;
    1579             :   }
    1580             :   }
    1581             : 
    1582             :   SelectCode(N);
    1583        6844 : }
    1584       13688 : 
    1585             : bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
    1586             :   assert(N->getOpcode() == ISD::BRCOND);
    1587             :   if (!N->hasOneUse())
    1588             :     return false;
    1589         803 : 
    1590             :   SDValue Cond = N->getOperand(1);
    1591         803 :   if (Cond.getOpcode() == ISD::CopyToReg)
    1592         800 :     Cond = Cond.getOperand(2);
    1593        1600 : 
    1594             :   if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
    1595             :     return false;
    1596             : 
    1597             :   MVT VT = Cond.getOperand(0).getSimpleValueType();
    1598         800 :   if (VT == MVT::i32)
    1599             :     return true;
    1600         800 : 
    1601             :   if (VT == MVT::i64) {
    1602             :     auto ST = static_cast<const GCNSubtarget *>(Subtarget);
    1603        6041 : 
    1604          16 :     ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
    1605          16 :     return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64();
    1606             :   }
    1607             : 
    1608        2493 :   return false;
    1609        4986 : }
    1610         395 : 
    1611         395 : void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
    1612             :   SDValue Cond = N->getOperand(1);
    1613             : 
    1614             :   if (Cond.isUndef()) {
    1615        4638 :     CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other,
    1616             :                          N->getOperand(2), N->getOperand(0));
    1617        4638 :     return;
    1618        4638 :   }
    1619             : 
    1620             :   bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N);
    1621             :   unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ;
    1622             :   unsigned CondReg = UseSCCBr ? AMDGPU::SCC : AMDGPU::VCC;
    1623             :   SDLoc SL(N);
    1624             : 
    1625        1401 :   if (!UseSCCBr) {
    1626        2802 :     // This is the case that we are selecting to S_CBRANCH_VCCNZ.  We have not
    1627        1401 :     // analyzed what generates the vcc value, so we do not know whether vcc
    1628             :     // bits for disabled lanes are 0.  Thus we need to mask out bits for
    1629             :     // disabled lanes.
    1630             :     //
    1631             :     // For the case that we select S_CBRANCH_SCC1 and it gets
    1632             :     // changed to S_CBRANCH_VCCNZ in SIFixSGPRCopies, SIFixSGPRCopies calls
    1633             :     // SIInstrInfo::moveToVALU which inserts the S_AND).
    1634             :     //
    1635           0 :     // We could add an analysis of what generates the vcc value here and omit
    1636             :     // the S_AND when is unnecessary. But it would be better to add a separate
    1637             :     // pass after SIFixSGPRCopies to do the unnecessary S_AND removal, so it
    1638           0 :     // catches both cases.
    1639             :     Cond = SDValue(CurDAG->getMachineNode(AMDGPU::S_AND_B64, SL, MVT::i1,
    1640           0 :                                CurDAG->getRegister(AMDGPU::EXEC, MVT::i1),
    1641           0 :                                Cond),
    1642           0 :                    0);
    1643             :   }
    1644           0 : 
    1645           0 :   SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond);
    1646             :   CurDAG->SelectNodeTo(N, BrOp, MVT::Other,
    1647             :                        N->getOperand(2), // Basic Block
    1648           0 :                        VCC.getValue(0));
    1649           0 : }
    1650             : 
    1651           0 : void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) {
    1652           0 :   MVT VT = N->getSimpleValueType(0);
    1653             :   bool IsFMA = N->getOpcode() == ISD::FMA;
    1654           0 :   if (VT != MVT::f32 || (!Subtarget->hasMadMixInsts() &&
    1655           0 :                          !Subtarget->hasFmaMixInsts()) ||
    1656             :       ((IsFMA && Subtarget->hasMadMixInsts()) ||
    1657             :        (!IsFMA && Subtarget->hasFmaMixInsts()))) {
    1658             :     SelectCode(N);
    1659             :     return;
    1660             :   }
    1661         576 : 
    1662         576 :   SDValue Src0 = N->getOperand(0);
    1663             :   SDValue Src1 = N->getOperand(1);
    1664         576 :   SDValue Src2 = N->getOperand(2);
    1665         156 :   unsigned Src0Mods, Src1Mods, Src2Mods;
    1666             : 
    1667          78 :   // Avoid using v_mad_mix_f32/v_fma_mix_f32 unless there is actually an operand
    1668             :   // using the conversion from f16.
    1669             :   bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods);
    1670         498 :   bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods);
    1671             :   bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods);
    1672         498 : 
    1673             :   assert((IsFMA || !Subtarget->hasFP32Denormals()) &&
    1674             :          "fmad selected with denormals enabled");
    1675         498 :   // TODO: We can select this with f32 denormals enabled if all the sources are
    1676             :   // converted from f16 (in which case fmad isn't legal).
    1677             : 
    1678             :   if (Sel0 || Sel1 || Sel2) {
    1679             :     // For dummy operands.
    1680             :     SDValue Zero = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
    1681             :     SDValue Ops[] = {
    1682             :       CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0,
    1683             :       CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1,
    1684             :       CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2,
    1685             :       CurDAG->getTargetConstant(0, SDLoc(), MVT::i1),
    1686             :       Zero, Zero
    1687             :     };
    1688             : 
    1689         248 :     CurDAG->SelectNodeTo(N,
    1690             :                          IsFMA ? AMDGPU::V_FMA_MIX_F32 : AMDGPU::V_MAD_MIX_F32,
    1691             :                          MVT::f32, Ops);
    1692             :   } else {
    1693             :     SelectCode(N);
    1694             :   }
    1695         996 : }
    1696         498 : 
    1697         498 : // This is here because there isn't a way to use the generated sub0_sub1 as the
    1698             : // subreg index to EXTRACT_SUBREG in tablegen.
    1699             : void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
    1700             :   MemSDNode *Mem = cast<MemSDNode>(N);
    1701        2666 :   unsigned AS = Mem->getAddressSpace();
    1702             :   if (AS == AMDGPUAS::FLAT_ADDRESS) {
    1703        2666 :     SelectCode(N);
    1704        2038 :     return;
    1705        4704 :   }
    1706         281 : 
    1707         135 :   MVT VT = N->getSimpleValueType(0);
    1708             :   bool Is32 = (VT == MVT::i32);
    1709        2477 :   SDLoc SL(N);
    1710             : 
    1711             :   MachineSDNode *CmpSwap = nullptr;
    1712         189 :   if (Subtarget->hasAddr64()) {
    1713         189 :     SDValue SRsrc, VAddr, SOffset, Offset, SLC;
    1714         189 : 
    1715             :     if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) {
    1716             :       unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN :
    1717             :         AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN;
    1718             :       SDValue CmpVal = Mem->getOperand(2);
    1719         189 : 
    1720         189 :       // XXX - Do we care about glue operands?
    1721         189 : 
    1722             :       SDValue Ops[] = {
    1723             :         CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain()
    1724             :       };
    1725             : 
    1726             :       CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
    1727             :     }
    1728         189 :   }
    1729             : 
    1730         118 :   if (!CmpSwap) {
    1731             :     SDValue SRsrc, SOffset, Offset, SLC;
    1732         118 :     if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) {
    1733         118 :       unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN :
    1734         118 :         AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN;
    1735         118 : 
    1736             :       SDValue CmpVal = Mem->getOperand(2);
    1737         354 :       SDValue Ops[] = {
    1738             :         CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain()
    1739         301 :       };
    1740             : 
    1741         118 :       CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
    1742             :     }
    1743             :   }
    1744             : 
    1745             :   if (!CmpSwap) {
    1746             :     SelectCode(N);
    1747             :     return;
    1748             :   }
    1749         197 : 
    1750             :   MachineMemOperand *MMO = Mem->getMemOperand();
    1751             :   CurDAG->setNodeMemRefs(CmpSwap, {MMO});
    1752         197 : 
    1753             :   unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
    1754         171 :   SDValue Extract
    1755             :     = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0));
    1756             : 
    1757             :   ReplaceUses(SDValue(N, 0), Extract);
    1758             :   ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1));
    1759             :   CurDAG->RemoveDeadNode(N);
    1760             : }
    1761             : 
    1762         104 : bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
    1763          17 :                                             unsigned &Mods) const {
    1764             :   Mods = 0;
    1765          17 :   Src = In;
    1766           8 : 
    1767             :   if (Src.getOpcode() == ISD::FNEG) {
    1768           8 :     Mods |= SISrcMods::NEG;
    1769             :     Src = Src.getOperand(0);
    1770             :   }
    1771             : 
    1772             :   if (Src.getOpcode() == ISD::FABS) {
    1773             :     Mods |= SISrcMods::ABS;
    1774           8 :     Src = Src.getOperand(0);
    1775             :   }
    1776          24 : 
    1777             :   return true;
    1778             : }
    1779             : 
    1780           8 : bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
    1781          44 :                                         SDValue &SrcMods) const {
    1782          44 :   unsigned Mods;
    1783          18 :   if (SelectVOP3ModsImpl(In, Src, Mods)) {
    1784             :     SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
    1785             :     return true;
    1786          18 :   }
    1787             : 
    1788             :   return false;
    1789          18 : }
    1790             : 
    1791          54 : bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
    1792             :                                              SDValue &SrcMods) const {
    1793             :   SelectVOP3Mods(In, Src, SrcMods);
    1794             :   return isNoNanSrc(Src);
    1795          52 : }
    1796             : 
    1797             : bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
    1798             :   if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG)
    1799             :     return false;
    1800          26 : 
    1801          52 :   Src = In;
    1802             :   return true;
    1803          26 : }
    1804             : 
    1805          52 : bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
    1806             :                                          SDValue &SrcMods, SDValue &Clamp,
    1807          26 :                                          SDValue &Omod) const {
    1808          26 :   SDLoc DL(In);
    1809          26 :   Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
    1810             :   Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
    1811             : 
    1812           0 :   return SelectVOP3Mods(In, Src, SrcMods);
    1813             : }
    1814           0 : 
    1815           0 : bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
    1816             :                                                    SDValue &SrcMods,
    1817           0 :                                                    SDValue &Clamp,
    1818           0 :                                                    SDValue &Omod) const {
    1819           0 :   Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
    1820             :   return SelectVOP3Mods(In, Src, SrcMods);
    1821             : }
    1822           0 : 
    1823           0 : bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
    1824           0 :                                          SDValue &Clamp, SDValue &Omod) const {
    1825             :   Src = In;
    1826             : 
    1827           0 :   SDLoc DL(In);
    1828             :   Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
    1829             :   Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
    1830           0 : 
    1831             :   return true;
    1832             : }
    1833           0 : 
    1834           0 : static SDValue stripBitcast(SDValue Val) {
    1835           0 :   return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
    1836             : }
    1837             : 
    1838             : // Figure out if this is really an extract of the high 16-bits of a dword.
    1839             : static bool isExtractHiElt(SDValue In, SDValue &Out) {
    1840             :   In = stripBitcast(In);
    1841         246 :   if (In.getOpcode() != ISD::TRUNCATE)
    1842             :     return false;
    1843         246 : 
    1844         246 :   SDValue Srl = In.getOperand(0);
    1845             :   if (Srl.getOpcode() == ISD::SRL) {
    1846             :     if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
    1847           0 :       if (ShiftAmt->getZExtValue() == 16) {
    1848        4603 :         Out = stripBitcast(Srl.getOperand(0));
    1849           0 :         return true;
    1850             :       }
    1851        4297 :     }
    1852           0 :   }
    1853             : 
    1854             :   return false;
    1855       11415 : }
    1856             : 
    1857             : // Look through operations that obscure just looking at the low 16-bits of the
    1858             : // same register.
    1859       11415 : static SDValue stripExtractLoElt(SDValue In) {
    1860       11415 :   if (In.getOpcode() == ISD::TRUNCATE) {
    1861             :     SDValue Src = In.getOperand(0);
    1862       11415 :     if (Src.getValueType().getSizeInBits() == 32)
    1863             :       return stripBitcast(Src);
    1864             :   }
    1865          59 : 
    1866             :   return In;
    1867             : }
    1868             : 
    1869         118 : bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
    1870          59 :                                          SDValue &SrcMods) const {
    1871             :   unsigned Mods = 0;
    1872             :   Src = In;
    1873           0 : 
    1874             :   if (Src.getOpcode() == ISD::FNEG) {
    1875           0 :     Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
    1876             :     Src = Src.getOperand(0);
    1877           0 :   }
    1878           0 : 
    1879           0 :   if (Src.getOpcode() == ISD::BUILD_VECTOR) {
    1880             :     unsigned VecMods = Mods;
    1881           0 : 
    1882             :     SDValue Lo = stripBitcast(Src.getOperand(0));
    1883             :     SDValue Hi = stripBitcast(Src.getOperand(1));
    1884             : 
    1885        2107 :     if (Lo.getOpcode() == ISD::FNEG) {
    1886             :       Lo = stripBitcast(Lo.getOperand(0));
    1887             :       Mods ^= SISrcMods::NEG;
    1888             :     }
    1889        1000 : 
    1890        1000 :     if (Hi.getOpcode() == ISD::FNEG) {
    1891        1000 :       Hi = stripBitcast(Hi.getOperand(0));
    1892             :       Mods ^= SISrcMods::NEG_HI;
    1893             :     }
    1894         382 : 
    1895         382 :     if (isExtractHiElt(Lo, Lo))
    1896             :       Mods |= SISrcMods::OP_SEL_0;
    1897         400 : 
    1898         188 :     if (isExtractHiElt(Hi, Hi))
    1899         188 :       Mods |= SISrcMods::OP_SEL_1;
    1900             : 
    1901             :     Lo = stripExtractLoElt(Lo);
    1902             :     Hi = stripExtractLoElt(Hi);
    1903             : 
    1904             :     if (Lo == Hi && !isInlineImmediate(Lo.getNode())) {
    1905             :       // Really a scalar input. Just select from the low half of the register to
    1906             :       // avoid packing.
    1907             : 
    1908             :       Src = Lo;
    1909         576 :       SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
    1910         576 :       return true;
    1911          91 :     }
    1912          91 : 
    1913          90 :     Mods = VecMods;
    1914             :   }
    1915             : 
    1916         486 :   // Packed instructions do not have abs modifiers.
    1917             :   Mods |= SISrcMods::OP_SEL_1;
    1918             : 
    1919        1056 :   SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
    1920             :   return true;
    1921             : }
    1922        1056 : 
    1923             : bool AMDGPUDAGToDAGISel::SelectVOP3PMods0(SDValue In, SDValue &Src,
    1924        2112 :                                           SDValue &SrcMods,
    1925             :                                           SDValue &Clamp) const {
    1926          15 :   SDLoc SL(In);
    1927             : 
    1928             :   // FIXME: Handle clamp and op_sel
    1929        2112 :   Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
    1930             : 
    1931             :   return SelectVOP3PMods(In, Src, SrcMods);
    1932         288 : }
    1933         288 : 
    1934             : bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,
    1935         288 :                                          SDValue &SrcMods) const {
    1936          11 :   Src = In;
    1937          11 :   // FIXME: Handle op_sel
    1938             :   SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
    1939             :   return true;
    1940         288 : }
    1941          11 : 
    1942          11 : bool AMDGPUDAGToDAGISel::SelectVOP3OpSel0(SDValue In, SDValue &Src,
    1943             :                                           SDValue &SrcMods,
    1944             :                                           SDValue &Clamp) const {
    1945         288 :   SDLoc SL(In);
    1946          13 : 
    1947             :   // FIXME: Handle clamp
    1948         288 :   Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
    1949          20 : 
    1950             :   return SelectVOP3OpSel(In, Src, SrcMods);
    1951         288 : }
    1952         288 : 
    1953             : bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src,
    1954         470 :                                              SDValue &SrcMods) const {
    1955             :   // FIXME: Handle op_sel
    1956             :   return SelectVOP3Mods(In, Src, SrcMods);
    1957             : }
    1958          35 : 
    1959          35 : bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods0(SDValue In, SDValue &Src,
    1960          35 :                                               SDValue &SrcMods,
    1961             :                                               SDValue &Clamp) const {
    1962             :   SDLoc SL(In);
    1963             : 
    1964             :   // FIXME: Handle clamp
    1965             :   Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
    1966             : 
    1967        1021 :   return SelectVOP3OpSelMods(In, Src, SrcMods);
    1968             : }
    1969        1021 : 
    1970        1021 : // The return value is not whether the match is possible (which it always is),
    1971             : // but whether or not it a conversion is really used.
    1972             : bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
    1973         481 :                                                    unsigned &Mods) const {
    1974             :   Mods = 0;
    1975             :   SelectVOP3ModsImpl(In, Src, Mods);
    1976             : 
    1977             :   if (Src.getOpcode() == ISD::FP_EXTEND) {
    1978             :     Src = Src.getOperand(0);
    1979         481 :     assert(Src.getValueType() == MVT::f16);
    1980             :     Src = stripBitcast(Src);
    1981         481 : 
    1982             :     // Be careful about folding modifiers if we already have an abs. fneg is
    1983             :     // applied last, so we don't want to apply an earlier fneg.
    1984           0 :     if ((Mods & SISrcMods::ABS) == 0) {
    1985             :       unsigned ModsTmp;
    1986           0 :       SelectVOP3ModsImpl(Src, Src, ModsTmp);
    1987             : 
    1988           0 :       if ((ModsTmp & SISrcMods::NEG) != 0)
    1989           0 :         Mods ^= SISrcMods::NEG;
    1990             : 
    1991             :       if ((ModsTmp & SISrcMods::ABS) != 0)
    1992          16 :         Mods |= SISrcMods::ABS;
    1993             :     }
    1994             : 
    1995             :     // op_sel/op_sel_hi decide the source type and source.
    1996             :     // If the source's op_sel_hi is set, it indicates to do a conversion from fp16.
    1997             :     // If the sources's op_sel is set, it picks the high half of the source
    1998          16 :     // register.
    1999             : 
    2000          16 :     Mods |= SISrcMods::OP_SEL_1;
    2001             :     if (isExtractHiElt(Src, Src)) {
    2002             :       Mods |= SISrcMods::OP_SEL_0;
    2003             : 
    2004             :       // TODO: Should we try to look for neg/abs here?
    2005             :     }
    2006          16 : 
    2007             :     return true;
    2008             :   }
    2009           8 : 
    2010             :   return false;
    2011             : }
    2012             : 
    2013             : bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src,
    2014             :                                                SDValue &SrcMods) const {
    2015           8 :   unsigned Mods = 0;
    2016             :   SelectVOP3PMadMixModsImpl(In, Src, Mods);
    2017           8 :   SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
    2018             :   return true;
    2019             : }
    2020             : 
    2021             : // TODO: Can we identify things like v_mad_mixhi_f16?
    2022           0 : bool AMDGPUDAGToDAGISel::SelectHi16Elt(SDValue In, SDValue &Src) const {
    2023             :   if (In.isUndef()) {
    2024           0 :     Src = In;
    2025           0 :     return true;
    2026             :   }
    2027           0 : 
    2028           0 :   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) {
    2029             :     SDLoc SL(In);
    2030           0 :     SDValue K = CurDAG->getTargetConstant(C->getZExtValue() << 16, SL, MVT::i32);
    2031             :     MachineSDNode *MovK = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
    2032             :                                                  SL, MVT::i32, K);
    2033             :     Src = SDValue(MovK, 0);
    2034           0 :     return true;
    2035             :   }
    2036           0 : 
    2037             :   if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) {
    2038           0 :     SDLoc SL(In);
    2039           0 :     SDValue K = CurDAG->getTargetConstant(
    2040             :       C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
    2041           0 :     MachineSDNode *MovK = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
    2042           0 :                                                  SL, MVT::i32, K);
    2043             :     Src = SDValue(MovK, 0);
    2044             :     return true;
    2045             :   }
    2046             : 
    2047             :   return isExtractHiElt(In, Src);
    2048             : }
    2049             : 
    2050           0 : bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const {
    2051           0 :   if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) {
    2052           0 :     return false;
    2053             :   }
    2054             :   const SIRegisterInfo *SIRI =
    2055             :     static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
    2056             :   const SIInstrInfo * SII =
    2057           0 :     static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
    2058             : 
    2059             :   unsigned Limit = 0;
    2060             :   bool AllUsesAcceptSReg = true;
    2061             :   for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
    2062             :     Limit < 10 && U != E; ++U, ++Limit) {
    2063           0 :     const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
    2064             : 
    2065           0 :     // If the register class is unknown, it could be an unknown
    2066           0 :     // register class that needs to be an SGPR, e.g. an inline asm
    2067           0 :     // constraint
    2068           0 :     if (!RC || SIRI->isSGPRClass(RC))
    2069             :       return false;
    2070             : 
    2071             :     if (RC != &AMDGPU::VS_32RegClass) {
    2072           0 :       AllUsesAcceptSReg = false;
    2073           0 :       SDNode * User = *U;
    2074           0 :       if (User->isMachineOpcode()) {
    2075           0 :         unsigned Opc = User->getMachineOpcode();
    2076             :         MCInstrDesc Desc = SII->get(Opc);
    2077             :         if (Desc.isCommutable()) {
    2078             :           unsigned OpIdx = Desc.getNumDefs() + U.getOperandNo();
    2079           0 :           unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
    2080           0 :           if (SII->findCommutedOpIndices(Desc, OpIdx, CommuteIdx1)) {
    2081           0 :             unsigned CommutedOpNo = CommuteIdx1 - Desc.getNumDefs();
    2082             :             const TargetRegisterClass *CommutedRC = getOperandRegClass(*U, CommutedOpNo);
    2083           0 :             if (CommutedRC == &AMDGPU::VS_32RegClass)
    2084           0 :               AllUsesAcceptSReg = true;
    2085             :           }
    2086             :         }
    2087             :       }
    2088           0 :       // If "AllUsesAcceptSReg == false" so far we haven't suceeded
    2089           0 :       // commuting current user. This means have at least one use
    2090           0 :       // that strictly require VGPR. Thus, we will not attempt to commute
    2091           0 :       // other user instructions.
    2092             :       if (!AllUsesAcceptSReg)
    2093           0 :         break;
    2094           0 :     }
    2095             :   }
    2096             :   return !AllUsesAcceptSReg && (Limit < 10);
    2097           0 : }
    2098             : 
    2099             : bool AMDGPUDAGToDAGISel::isUniformLoad(const SDNode * N) const {
    2100       22774 :   auto Ld = cast<LoadSDNode>(N);
    2101       22774 : 
    2102             :   return Ld->getAlignment() >= 4 &&
    2103             :         (
    2104             :           (
    2105       22774 :             (
    2106             :               Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS       ||
    2107       22774 :               Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT
    2108             :             )
    2109             :             &&
    2110             :             !N->isDivergent()
    2111       35240 :           )
    2112       35240 :           ||
    2113       26126 :           (
    2114             :             Subtarget->getScalarizeGlobalBehavior() &&
    2115             :             Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
    2116             :             !Ld->isVolatile() &&
    2117             :             !N->isDivergent() &&
    2118       52237 :             static_cast<const SITargetLowering *>(
    2119             :               getTargetLowering())->isMemOpHasNoClobberedMemOperand(N)
    2120             :           )
    2121       16122 :         );
    2122             : }
    2123             : 
    2124        4656 : void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
    2125             :   const AMDGPUTargetLowering& Lowering =
    2126        8212 :     *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
    2127        4106 :   bool IsModified = false;
    2128        1084 :   do {
    2129        1084 :     IsModified = false;
    2130        1084 : 
    2131        1000 :     // Go over all selected nodes and try to fold them a bit more
    2132        1000 :     SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_begin();
    2133        1000 :     while (Position != CurDAG->allnodes_end()) {
    2134             :       SDNode *Node = &*Position++;
    2135             :       MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Node);
    2136             :       if (!MachineNode)
    2137             :         continue;
    2138             : 
    2139             :       SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
    2140             :       if (ResNode != Node) {
    2141             :         if (ResNode)
    2142             :           ReplaceUses(Node, ResNode);
    2143             :         IsModified = true;
    2144             :       }
    2145             :     }
    2146       12770 :     CurDAG->RemoveDeadNodes();
    2147             :   } while (IsModified);
    2148             : }
    2149       68872 : 
    2150             : bool R600DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
    2151             :   Subtarget = &MF.getSubtarget<R600Subtarget>();
    2152      136687 :   return SelectionDAGISel::runOnMachineFunction(MF);
    2153             : }
    2154             : 
    2155             : bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
    2156       22512 :   if (!N->readMem())
    2157             :     return false;
    2158             :   if (CbId == -1)
    2159       45501 :     return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
    2160       45501 :            N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
    2161             : 
    2162       22549 :   return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId;
    2163             : }
    2164       39159 : 
    2165        8227 : bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
    2166        8227 :                                                          SDValue& IntPtr) {
    2167        7914 :   if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
    2168             :     IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
    2169        2416 :                                        true);
    2170             :     return true;
    2171       68872 :   }
    2172             :   return false;
    2173             : }
    2174       24548 : 
    2175             : bool R600DAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
    2176       24548 :     SDValue& BaseReg, SDValue &Offset) {
    2177             :   if (!isa<ConstantSDNode>(Addr)) {
    2178       26846 :     BaseReg = Addr;
    2179             :     Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
    2180             :     return true;
    2181             :   }
    2182       26846 :   return false;
    2183     2201008 : }
    2184             : 
    2185             : void R600DAGToDAGISel::Select(SDNode *N) {
    2186             :   unsigned int Opc = N->getOpcode();
    2187             :   if (N->isMachineOpcode()) {
    2188             :     N->setNodeId(-1);
    2189      592199 :     return;   // Already selected.
    2190      592199 :   }
    2191       35142 : 
    2192       35047 :   switch (Opc) {
    2193             :   default: break;
    2194             :   case AMDGPUISD::BUILD_VERTICAL_VECTOR:
    2195             :   case ISD::SCALAR_TO_VECTOR:
    2196       26846 :   case ISD::BUILD_VECTOR: {
    2197             :     EVT VT = N->getValueType(0);
    2198       24548 :     unsigned NumVectorElts = VT.getVectorNumElements();
    2199             :     unsigned RegClassID;
    2200        2298 :     // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
    2201        2298 :     // that adds a 128 bits reg copy when going through TwoAddressInstructions
    2202        2298 :     // pass. We want to avoid 128 bits copies as much as possible because they
    2203             :     // can't be bundled by our scheduler.
    2204             :     switch(NumVectorElts) {
    2205           0 :     case 2: RegClassID = R600::R600_Reg64RegClassID; break;
    2206           0 :     case 4:
    2207           0 :       if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
    2208           0 :         RegClassID = R600::R600_Reg128VerticalRegClassID;
    2209           0 :       else
    2210             :         RegClassID = R600::R600_Reg128RegClassID;
    2211             :       break;
    2212           0 :     default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
    2213             :     }
    2214             :     SelectBuildVector(N, RegClassID);
    2215           0 :     return;
    2216             :   }
    2217             :   }
    2218           0 : 
    2219           0 :   SelectCode(N);
    2220           0 : }
    2221             : 
    2222             : bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
    2223             :                                           SDValue &Offset) {
    2224             :   ConstantSDNode *C;
    2225           0 :   SDLoc DL(Addr);
    2226             : 
    2227             :   if ((C = dyn_cast<ConstantSDNode>(Addr))) {
    2228           0 :     Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
    2229           0 :     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
    2230           0 :   } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
    2231             :              (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
    2232             :     Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
    2233             :     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
    2234             :   } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
    2235       84708 :             (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
    2236       84708 :     Base = Addr.getOperand(0);
    2237       84708 :     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
    2238             :   } else {
    2239           0 :     Base = Addr;
    2240             :     Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
    2241             :   }
    2242       84708 : 
    2243             :   return true;
    2244        2455 : }
    2245             : 
    2246             : bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
    2247        4910 :                                           SDValue &Offset) {
    2248             :   ConstantSDNode *IMMOffset;
    2249             : 
    2250             :   if (Addr.getOpcode() == ISD::ADD
    2251             :       && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
    2252             :       && isInt<16>(IMMOffset->getZExtValue())) {
    2253             : 
    2254             :       Base = Addr.getOperand(0);
    2255             :       Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
    2256        2062 :                                          MVT::i32);
    2257        2062 :       return true;
    2258             :   // If the pointer address is constant, we can move it to the offset field.
    2259             :   } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
    2260             :              && isInt<16>(IMMOffset->getZExtValue())) {
    2261             :     Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
    2262           0 :                                   SDLoc(CurDAG->getEntryNode()),
    2263             :                                   R600::ZERO, MVT::i32);
    2264        2455 :     Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
    2265             :                                        MVT::i32);
    2266             :     return true;
    2267             :   }
    2268             : 
    2269             :   // Default case, no offset
    2270             :   Base = Addr;
    2271             :   Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
    2272        2101 :   return true;
    2273             : }

Generated by: LCOV version 1.13