LCOV - llvm-toolchain.info - lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

LCOV - code coverage report

Current view:	top level - lib/Target/AMDGPU - AMDGPUISelDAGToDAG.cpp (source / functions)		Hit	Total	Coverage
Test:	llvm-toolchain.info	Lines:	611	922	66.3 %
Date:	2018-10-20 13:21:21	Functions:	46	81	56.8 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //==-----------------------------------------------------------------------===//
       9             : //
      10             : /// \file
      11             : /// Defines an instruction selector for the AMDGPU target.
      12             : //
      13             : //===----------------------------------------------------------------------===//
      14             : 
      15             : #include "AMDGPU.h"
      16             : #include "AMDGPUArgumentUsageInfo.h"
      17             : #include "AMDGPUISelLowering.h" // For AMDGPUISD
      18             : #include "AMDGPUInstrInfo.h"
      19             : #include "AMDGPUPerfHintAnalysis.h"
      20             : #include "AMDGPURegisterInfo.h"
      21             : #include "AMDGPUSubtarget.h"
      22             : #include "AMDGPUTargetMachine.h"
      23             : #include "SIDefines.h"
      24             : #include "SIISelLowering.h"
      25             : #include "SIInstrInfo.h"
      26             : #include "SIMachineFunctionInfo.h"
      27             : #include "SIRegisterInfo.h"
      28             : #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
      29             : #include "llvm/ADT/APInt.h"
      30             : #include "llvm/ADT/SmallVector.h"
      31             : #include "llvm/ADT/StringRef.h"
      32             : #include "llvm/Analysis/LegacyDivergenceAnalysis.h"
      33             : #include "llvm/Analysis/ValueTracking.h"
      34             : #include "llvm/CodeGen/FunctionLoweringInfo.h"
      35             : #include "llvm/CodeGen/ISDOpcodes.h"
      36             : #include "llvm/CodeGen/MachineFunction.h"
      37             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      38             : #include "llvm/CodeGen/SelectionDAG.h"
      39             : #include "llvm/CodeGen/SelectionDAGISel.h"
      40             : #include "llvm/CodeGen/SelectionDAGNodes.h"
      41             : #include "llvm/CodeGen/ValueTypes.h"
      42             : #include "llvm/IR/BasicBlock.h"
      43             : #include "llvm/IR/Instruction.h"
      44             : #include "llvm/MC/MCInstrDesc.h"
      45             : #include "llvm/Support/Casting.h"
      46             : #include "llvm/Support/CodeGen.h"
      47             : #include "llvm/Support/ErrorHandling.h"
      48             : #include "llvm/Support/MachineValueType.h"
      49             : #include "llvm/Support/MathExtras.h"
      50             : #include <cassert>
      51             : #include <cstdint>
      52             : #include <new>
      53             : #include <vector>
      54             : 
      55             : using namespace llvm;
      56             : 
      57             : namespace llvm {
      58             : 
      59             : class R600InstrInfo;
      60             : 
      61             : } // end namespace llvm
      62             : 
      63             : //===----------------------------------------------------------------------===//
      64             : // Instruction Selector Implementation
      65             : //===----------------------------------------------------------------------===//
      66             : 
      67             : namespace {
      68             : 
      69             : /// AMDGPU specific code to select AMDGPU machine instructions for
      70             : /// SelectionDAG operations.
      71             : class AMDGPUDAGToDAGISel : public SelectionDAGISel {
      72             :   // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
      73             :   // make the right decision when generating code for different targets.
      74             :   const GCNSubtarget *Subtarget;
      75             :   bool EnableLateStructurizeCFG;
      76             : 
      77             : public:
      78             :   explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr,
      79             :                               CodeGenOpt::Level OptLevel = CodeGenOpt::Default)
      80        1957 :     : SelectionDAGISel(*TM, OptLevel) {
      81        2239 :     EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
      82             :   }
      83        1947 :   ~AMDGPUDAGToDAGISel() override = default;
      84             : 
      85        2229 :   void getAnalysisUsage(AnalysisUsage &AU) const override {
      86             :     AU.addRequired<AMDGPUArgumentUsageInfo>();
      87             :     AU.addRequired<AMDGPUPerfHintAnalysis>();
      88             :     AU.addRequired<LegacyDivergenceAnalysis>();
      89        2229 :     SelectionDAGISel::getAnalysisUsage(AU);
      90        2229 :   }
      91             : 
      92             :   bool runOnMachineFunction(MachineFunction &MF) override;
      93             :   void Select(SDNode *N) override;
      94             :   StringRef getPassName() const override;
      95             :   void PostprocessISelDAG() override;
      96             : 
      97             : protected:
      98             :   void SelectBuildVector(SDNode *N, unsigned RegClassID);
      99             : 
     100             : private:
     101             :   std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
     102             :   bool isNoNanSrc(SDValue N) const;
     103             :   bool isInlineImmediate(const SDNode *N) const;
     104             :   bool isVGPRImm(const SDNode *N) const;
     105             :   bool isUniformLoad(const SDNode *N) const;
     106             :   bool isUniformBr(const SDNode *N) const;
     107             : 
     108             :   MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const;
     109             : 
     110             :   SDNode *glueCopyToM0(SDNode *N) const;
     111             : 
     112             :   const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
     113             :   virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
     114             :   virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
     115             :   bool isDSOffsetLegal(const SDValue &Base, unsigned Offset,
     116             :                        unsigned OffsetBits) const;
     117             :   bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
     118             :   bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
     119             :                                  SDValue &Offset1) const;
     120             :   bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
     121             :                    SDValue &SOffset, SDValue &Offset, SDValue &Offen,
     122             :                    SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
     123             :                    SDValue &TFE) const;
     124             :   bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
     125             :                          SDValue &SOffset, SDValue &Offset, SDValue &GLC,
     126             :                          SDValue &SLC, SDValue &TFE) const;
     127             :   bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
     128             :                          SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
     129             :                          SDValue &SLC) const;
     130             :   bool SelectMUBUFScratchOffen(SDNode *Parent,
     131             :                                SDValue Addr, SDValue &RSrc, SDValue &VAddr,
     132             :                                SDValue &SOffset, SDValue &ImmOffset) const;
     133             :   bool SelectMUBUFScratchOffset(SDNode *Parent,
     134             :                                 SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
     135             :                                 SDValue &Offset) const;
     136             : 
     137             :   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
     138             :                          SDValue &Offset, SDValue &GLC, SDValue &SLC,
     139             :                          SDValue &TFE) const;
     140             :   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
     141             :                          SDValue &Offset, SDValue &SLC) const;
     142             :   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
     143             :                          SDValue &Offset) const;
     144             : 
     145             :   bool SelectFlatAtomic(SDValue Addr, SDValue &VAddr,
     146             :                         SDValue &Offset, SDValue &SLC) const;
     147             :   bool SelectFlatAtomicSigned(SDValue Addr, SDValue &VAddr,
     148             :                               SDValue &Offset, SDValue &SLC) const;
     149             : 
     150             :   template <bool IsSigned>
     151             :   bool SelectFlatOffset(SDValue Addr, SDValue &VAddr,
     152             :                         SDValue &Offset, SDValue &SLC) const;
     153             : 
     154             :   bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
     155             :                         bool &Imm) const;
     156             :   SDValue Expand32BitAddress(SDValue Addr) const;
     157             :   bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
     158             :                   bool &Imm) const;
     159             :   bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
     160             :   bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
     161             :   bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
     162             :   bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
     163             :   bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
     164             :   bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
     165             : 
     166             :   bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
     167             :   bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods) const;
     168             :   bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
     169             :   bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
     170             :   bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
     171             :                        SDValue &Clamp, SDValue &Omod) const;
     172             :   bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
     173             :                          SDValue &Clamp, SDValue &Omod) const;
     174             : 
     175             :   bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods,
     176             :                                  SDValue &Clamp,
     177             :                                  SDValue &Omod) const;
     178             : 
     179             :   bool SelectVOP3OMods(SDValue In, SDValue &Src,
     180             :                        SDValue &Clamp, SDValue &Omod) const;
     181             : 
     182             :   bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
     183             :   bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
     184             :                         SDValue &Clamp) const;
     185             : 
     186             :   bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
     187             :   bool SelectVOP3OpSel0(SDValue In, SDValue &Src, SDValue &SrcMods,
     188             :                         SDValue &Clamp) const;
     189             : 
     190             :   bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
     191             :   bool SelectVOP3OpSelMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
     192             :                             SDValue &Clamp) const;
     193             :   bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const;
     194             :   bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
     195             : 
     196             :   bool SelectHi16Elt(SDValue In, SDValue &Src) const;
     197             : 
     198             :   void SelectADD_SUB_I64(SDNode *N);
     199             :   void SelectUADDO_USUBO(SDNode *N);
     200             :   void SelectDIV_SCALE(SDNode *N);
     201             :   void SelectMAD_64_32(SDNode *N);
     202             :   void SelectFMA_W_CHAIN(SDNode *N);
     203             :   void SelectFMUL_W_CHAIN(SDNode *N);
     204             : 
     205             :   SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val,
     206             :                    uint32_t Offset, uint32_t Width);
     207             :   void SelectS_BFEFromShifts(SDNode *N);
     208             :   void SelectS_BFE(SDNode *N);
     209             :   bool isCBranchSCC(const SDNode *N) const;
     210             :   void SelectBRCOND(SDNode *N);
     211             :   void SelectFMAD_FMA(SDNode *N);
     212             :   void SelectATOMIC_CMP_SWAP(SDNode *N);
     213             : 
     214             : protected:
     215             :   // Include the pieces autogenerated from the target description.
     216             : #include "AMDGPUGenDAGISel.inc"
     217             : };
     218             : 
     219             : class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
     220             :   const R600Subtarget *Subtarget;
     221             : 
     222             :   bool isConstantLoad(const MemSDNode *N, int cbID) const;
     223             :   bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
     224             :   bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
     225             :                                        SDValue& Offset);
     226             : public:
     227         282 :   explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) :
     228         282 :       AMDGPUDAGToDAGISel(TM, OptLevel) {}
     229             : 
     230             :   void Select(SDNode *N) override;
     231             : 
     232             :   bool SelectADDRIndirect(SDValue Addr, SDValue &Base,
     233             :                           SDValue &Offset) override;
     234             :   bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
     235             :                           SDValue &Offset) override;
     236             : 
     237             :   bool runOnMachineFunction(MachineFunction &MF) override;
     238             : protected:
     239             :   // Include the pieces autogenerated from the target description.
     240             : #include "R600GenDAGISel.inc"
     241             : };
     242             : 
     243             : }  // end anonymous namespace
     244             : 
     245       85105 : INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel",
     246             :                       "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
     247       85105 : INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo)
     248       85105 : INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysis)
     249       85105 : INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
     250      199024 : INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "amdgpu-isel",
     251             :                     "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
     252             : 
     253             : /// This pass converts a legalized DAG into a AMDGPU-specific
     254             : // DAG, ready for instruction scheduling.
     255        1957 : FunctionPass *llvm::createAMDGPUISelDag(TargetMachine *TM,
     256             :                                         CodeGenOpt::Level OptLevel) {
     257        1957 :   return new AMDGPUDAGToDAGISel(TM, OptLevel);
     258             : }
     259             : 
     260             : /// This pass converts a legalized DAG into a R600-specific
     261             : // DAG, ready for instruction scheduling.
     262         282 : FunctionPass *llvm::createR600ISelDag(TargetMachine *TM,
     263             :                                       CodeGenOpt::Level OptLevel) {
     264         282 :   return new R600DAGToDAGISel(TM, OptLevel);
     265             : }
     266             : 
     267       19712 : bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
     268       19712 :   Subtarget = &MF.getSubtarget<GCNSubtarget>();
     269       19712 :   return SelectionDAGISel::runOnMachineFunction(MF);
     270             : }
     271             : 
     272           0 : bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const {
     273           0 :   if (TM.Options.NoNaNsFPMath)
     274           0 :     return true;
     275             : 
     276             :   // TODO: Move into isKnownNeverNaN
     277           0 :   if (N->getFlags().isDefined())
     278           0 :     return N->getFlags().hasNoNaNs();
     279             : 
     280           0 :   return CurDAG->isKnownNeverNaN(N);
     281             : }
     282             : 
     283           0 : bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const {
     284           0 :   const SIInstrInfo *TII = Subtarget->getInstrInfo();
     285             : 
     286             :   if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
     287           0 :     return TII->isInlineConstant(C->getAPIntValue());
     288             : 
     289             :   if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
     290           0 :     return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
     291             : 
     292             :   return false;
     293             : }
     294             : 
     295             : /// Determine the register class for \p OpNo
     296             : /// \returns The register class of the virtual register that will be used for
     297             : /// the given operand number \OpNo or NULL if the register class cannot be
     298             : /// determined.
     299           0 : const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
     300             :                                                           unsigned OpNo) const {
     301           0 :   if (!N->isMachineOpcode()) {
     302           0 :     if (N->getOpcode() == ISD::CopyToReg) {
     303           0 :       unsigned Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
     304           0 :       if (TargetRegisterInfo::isVirtualRegister(Reg)) {
     305           0 :         MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo();
     306           0 :         return MRI.getRegClass(Reg);
     307             :       }
     308             : 
     309             :       const SIRegisterInfo *TRI
     310           0 :         = static_cast<const GCNSubtarget *>(Subtarget)->getRegisterInfo();
     311           0 :       return TRI->getPhysRegClass(Reg);
     312             :     }
     313             : 
     314           0 :     return nullptr;
     315             :   }
     316             : 
     317           0 :   switch (N->getMachineOpcode()) {
     318           0 :   default: {
     319             :     const MCInstrDesc &Desc =
     320           0 :         Subtarget->getInstrInfo()->get(N->getMachineOpcode());
     321           0 :     unsigned OpIdx = Desc.getNumDefs() + OpNo;
     322           0 :     if (OpIdx >= Desc.getNumOperands())
     323           0 :       return nullptr;
     324           0 :     int RegClass = Desc.OpInfo[OpIdx].RegClass;
     325           0 :     if (RegClass == -1)
     326           0 :       return nullptr;
     327             : 
     328           0 :     return Subtarget->getRegisterInfo()->getRegClass(RegClass);
     329             :   }
     330           0 :   case AMDGPU::REG_SEQUENCE: {
     331           0 :     unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
     332             :     const TargetRegisterClass *SuperRC =
     333           0 :         Subtarget->getRegisterInfo()->getRegClass(RCID);
     334             : 
     335           0 :     SDValue SubRegOp = N->getOperand(OpNo + 1);
     336           0 :     unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
     337           0 :     return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
     338           0 :                                                               SubRegIdx);
     339             :   }
     340             :   }
     341             : }
     342             : 
     343       78970 : SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
     344       78970 :   if (cast<MemSDNode>(N)->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS ||
     345       11623 :       !Subtarget->ldsRequiresM0Init())
     346             :     return N;
     347             : 
     348             :   const SITargetLowering& Lowering =
     349        8620 :       *static_cast<const SITargetLowering*>(getTargetLowering());
     350             : 
     351             :   // Write max value to m0 before each load operation
     352             : 
     353       17240 :   SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N),
     354       17244 :                                  CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
     355             : 
     356        8620 :   SDValue Glue = M0.getValue(1);
     357             : 
     358             :   SmallVector <SDValue, 8> Ops;
     359       39106 :   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
     360       60972 :      Ops.push_back(N->getOperand(i));
     361             :   }
     362        8620 :   Ops.push_back(Glue);
     363       34480 :   return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
     364             : }
     365             : 
     366           0 : MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm,
     367             :                                                   EVT VT) const {
     368           0 :   SDNode *Lo = CurDAG->getMachineNode(
     369             :       AMDGPU::S_MOV_B32, DL, MVT::i32,
     370             :       CurDAG->getConstant(Imm & 0xFFFFFFFF, DL, MVT::i32));
     371             :   SDNode *Hi =
     372           0 :       CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
     373             :                              CurDAG->getConstant(Imm >> 32, DL, MVT::i32));
     374             :   const SDValue Ops[] = {
     375           0 :       CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
     376           0 :       SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
     377           0 :       SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)};
     378             : 
     379           0 :   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
     380             : }
     381             : 
     382             : static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
     383       25920 :   switch (NumVectorElts) {
     384             :   case 1:
     385             :     return AMDGPU::SReg_32_XM0RegClassID;
     386       16512 :   case 2:
     387             :     return AMDGPU::SReg_64RegClassID;
     388        8654 :   case 4:
     389             :     return AMDGPU::SReg_128RegClassID;
     390         742 :   case 8:
     391             :     return AMDGPU::SReg_256RegClassID;
     392          12 :   case 16:
     393             :     return AMDGPU::SReg_512RegClassID;
     394             :   }
     395             : 
     396           0 :   llvm_unreachable("invalid vector size");
     397             : }
     398             : 
     399           0 : static bool getConstantValue(SDValue N, uint32_t &Out) {
     400             :   if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
     401           0 :     Out = C->getAPIntValue().getZExtValue();
     402           0 :     return true;
     403             :   }
     404             : 
     405             :   if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
     406           0 :     Out = C->getValueAPF().bitcastToAPInt().getZExtValue();
     407           0 :     return true;
     408             :   }
     409             : 
     410             :   return false;
     411             : }
     412             : 
     413       28375 : void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
     414       56750 :   EVT VT = N->getValueType(0);
     415             :   unsigned NumVectorElts = VT.getVectorNumElements();
     416       28375 :   EVT EltVT = VT.getVectorElementType();
     417             :   SDLoc DL(N);
     418       28375 :   SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
     419             : 
     420       28375 :   if (NumVectorElts == 1) {
     421           0 :     CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
     422             :                          RegClass);
     423             :     return;
     424             :   }
     425             : 
     426             :   assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
     427             :                                   "supported yet");
     428             :   // 16 = Max Num Vector Elements
     429             :   // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
     430             :   // 1 = Vector Register Class
     431       28375 :   SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
     432             : 
     433       28375 :   RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
     434             :   bool IsRegSeq = true;
     435       28375 :   unsigned NOps = N->getNumOperands();
     436      111173 :   for (unsigned i = 0; i < NOps; i++) {
     437             :     // XXX: Why is this here?
     438      165596 :     if (isa<RegisterSDNode>(N->getOperand(i))) {
     439             :       IsRegSeq = false;
     440             :       break;
     441             :     }
     442       82798 :     unsigned Sub = AMDGPURegisterInfo::getSubRegFromChannel(i);
     443      165596 :     RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
     444       82798 :     RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32);
     445             :   }
     446       28375 :   if (NOps != NumVectorElts) {
     447             :     // Fill in the missing undef elements if this was a scalar_to_vector.
     448             :     assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
     449           4 :     MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
     450             :                                                    DL, EltVT);
     451           8 :     for (unsigned i = NOps; i < NumVectorElts; ++i) {
     452           4 :       unsigned Sub = AMDGPURegisterInfo::getSubRegFromChannel(i);
     453           4 :       RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
     454           8 :       RegSeqArgs[1 + (2 * i) + 1] =
     455           4 :           CurDAG->getTargetConstant(Sub, DL, MVT::i32);
     456             :     }
     457             :   }
     458             : 
     459       28375 :   if (!IsRegSeq)
     460             :     SelectCode(N);
     461       85125 :   CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
     462             : }
     463             : 
     464      580052 : void AMDGPUDAGToDAGISel::Select(SDNode *N) {
     465      580052 :   unsigned int Opc = N->getOpcode();
     466      580052 :   if (N->isMachineOpcode()) {
     467             :     N->setNodeId(-1);
     468        1569 :     return;   // Already selected.
     469             :   }
     470             : 
     471      578483 :   if (isa<AtomicSDNode>(N) ||
     472             :       (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC ||
     473             :        Opc == AMDGPUISD::ATOMIC_LOAD_FADD ||
     474      576755 :        Opc == AMDGPUISD::ATOMIC_LOAD_FMIN ||
     475             :        Opc == AMDGPUISD::ATOMIC_LOAD_FMAX))
     476        1973 :     N = glueCopyToM0(N);
     477             : 
     478      578483 :   switch (Opc) {
     479             :   default:
     480             :     break;
     481             :   // We are selecting i64 ADD here instead of custom lower it during
     482             :   // DAG legalization, so we can fold some i64 ADDs used for address
     483             :   // calculation into the LOAD and STORE instructions.
     484             :   case ISD::ADDC:
     485             :   case ISD::ADDE:
     486             :   case ISD::SUBC:
     487             :   case ISD::SUBE: {
     488         240 :     if (N->getValueType(0) != MVT::i64)
     489             :       break;
     490             : 
     491         126 :     SelectADD_SUB_I64(N);
     492         126 :     return;
     493             :   }
     494         203 :   case ISD::UADDO:
     495             :   case ISD::USUBO: {
     496         203 :     SelectUADDO_USUBO(N);
     497         203 :     return;
     498             :   }
     499          48 :   case AMDGPUISD::FMUL_W_CHAIN: {
     500          48 :     SelectFMUL_W_CHAIN(N);
     501          48 :     return;
     502             :   }
     503         240 :   case AMDGPUISD::FMA_W_CHAIN: {
     504         240 :     SelectFMA_W_CHAIN(N);
     505         240 :     return;
     506             :   }
     507             : 
     508       26580 :   case ISD::SCALAR_TO_VECTOR:
     509             :   case ISD::BUILD_VECTOR: {
     510       53160 :     EVT VT = N->getValueType(0);
     511             :     unsigned NumVectorElts = VT.getVectorNumElements();
     512       26580 :     if (VT.getScalarSizeInBits() == 16) {
     513         660 :       if (Opc == ISD::BUILD_VECTOR && NumVectorElts == 2) {
     514             :         uint32_t LHSVal, RHSVal;
     515         904 :         if (getConstantValue(N->getOperand(0), LHSVal) &&
     516         246 :             getConstantValue(N->getOperand(1), RHSVal)) {
     517         207 :           uint32_t K = LHSVal | (RHSVal << 16);
     518         207 :           CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, VT,
     519         207 :                                CurDAG->getTargetConstant(K, SDLoc(N), MVT::i32));
     520         207 :           return;
     521             :         }
     522             :       }
     523             : 
     524         453 :       break;
     525             :     }
     526             : 
     527             :     assert(VT.getVectorElementType().bitsEq(MVT::i32));
     528             :     unsigned RegClassID = selectSGPRVectorRegClassID(NumVectorElts);
     529       25920 :     SelectBuildVector(N, RegClassID);
     530       25920 :     return;
     531             :   }
     532        9491 :   case ISD::BUILD_PAIR: {
     533             :     SDValue RC, SubReg0, SubReg1;
     534             :     SDLoc DL(N);
     535        9491 :     if (N->getValueType(0) == MVT::i128) {
     536           0 :       RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32);
     537           0 :       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
     538           0 :       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
     539             :     } else if (N->getValueType(0) == MVT::i64) {
     540        9491 :       RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
     541        9491 :       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
     542        9491 :       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
     543             :     } else {
     544           0 :       llvm_unreachable("Unhandled value type for BUILD_PAIR");
     545             :     }
     546        9491 :     const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
     547       18982 :                             N->getOperand(1), SubReg1 };
     548       28473 :     ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
     549             :                                           N->getValueType(0), Ops));
     550             :     return;
     551             :   }
     552             : 
     553       27700 :   case ISD::Constant:
     554             :   case ISD::ConstantFP: {
     555       55400 :     if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
     556             :       break;
     557             : 
     558             :     uint64_t Imm;
     559             :     if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
     560         231 :       Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
     561             :     else {
     562             :       ConstantSDNode *C = cast<ConstantSDNode>(N);
     563        1140 :       Imm = C->getZExtValue();
     564             :     }
     565             : 
     566             :     SDLoc DL(N);
     567        2434 :     ReplaceNode(N, buildSMovImm64(DL, Imm, N->getValueType(0)));
     568             :     return;
     569             :   }
     570       76997 :   case ISD::LOAD:
     571             :   case ISD::STORE:
     572             :   case ISD::ATOMIC_LOAD:
     573             :   case ISD::ATOMIC_STORE: {
     574       76997 :     N = glueCopyToM0(N);
     575       76997 :     break;
     576             :   }
     577             : 
     578         148 :   case AMDGPUISD::BFE_I32:
     579             :   case AMDGPUISD::BFE_U32: {
     580             :     // There is a scalar version available, but unlike the vector version which
     581             :     // has a separate operand for the offset and width, the scalar version packs
     582             :     // the width and offset into a single operand. Try to move to the scalar
     583             :     // version if the offsets are constant, so that we can try to keep extended
     584             :     // loads of kernel arguments in SGPRs.
     585             : 
     586             :     // TODO: Technically we could try to pattern match scalar bitshifts of
     587             :     // dynamic values, but it's probably not useful.
     588         148 :     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
     589             :     if (!Offset)
     590             :       break;
     591             : 
     592             :     ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
     593             :     if (!Width)
     594             :       break;
     595             : 
     596             :     bool Signed = Opc == AMDGPUISD::BFE_I32;
     597             : 
     598         132 :     uint32_t OffsetVal = Offset->getZExtValue();
     599         264 :     uint32_t WidthVal = Width->getZExtValue();
     600             : 
     601         256 :     ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
     602         132 :                             SDLoc(N), N->getOperand(0), OffsetVal, WidthVal));
     603         132 :     return;
     604             :   }
     605         277 :   case AMDGPUISD::DIV_SCALE: {
     606         277 :     SelectDIV_SCALE(N);
     607         277 :     return;
     608             :   }
     609          24 :   case AMDGPUISD::MAD_I64_I32:
     610             :   case AMDGPUISD::MAD_U64_U32: {
     611          24 :     SelectMAD_64_32(N);
     612          24 :     return;
     613             :   }
     614       17183 :   case ISD::CopyToReg: {
     615             :     const SITargetLowering& Lowering =
     616       17183 :       *static_cast<const SITargetLowering*>(getTargetLowering());
     617       17183 :     N = Lowering.legalizeTargetIndependentNode(N, *CurDAG);
     618       17183 :     break;
     619             :   }
     620             :   case ISD::AND:
     621             :   case ISD::SRL:
     622             :   case ISD::SRA:
     623             :   case ISD::SIGN_EXTEND_INREG:
     624       28280 :     if (N->getValueType(0) != MVT::i32)
     625             :       break;
     626             : 
     627       21470 :     SelectS_BFE(N);
     628       21470 :     return;
     629         576 :   case ISD::BRCOND:
     630         576 :     SelectBRCOND(N);
     631         576 :     return;
     632        2666 :   case ISD::FMAD:
     633             :   case ISD::FMA:
     634        2666 :     SelectFMAD_FMA(N);
     635        2666 :     return;
     636         197 :   case AMDGPUISD::ATOMIC_CMP_SWAP:
     637         197 :     SelectATOMIC_CMP_SWAP(N);
     638         197 :     return;
     639             :   case AMDGPUISD::CVT_PKRTZ_F16_F32:
     640             :   case AMDGPUISD::CVT_PKNORM_I16_F32:
     641             :   case AMDGPUISD::CVT_PKNORM_U16_F32:
     642             :   case AMDGPUISD::CVT_PK_U16_U32:
     643             :   case AMDGPUISD::CVT_PK_I16_I32: {
     644             :     // Hack around using a legal type if f16 is illegal.
     645         185 :     if (N->getValueType(0) == MVT::i32) {
     646          84 :       MVT NewVT = Opc == AMDGPUISD::CVT_PKRTZ_F16_F32 ? MVT::v2f16 : MVT::v2i16;
     647          84 :       N = CurDAG->MorphNodeTo(N, N->getOpcode(), CurDAG->getVTList(NewVT),
     648          84 :                               { N->getOperand(0), N->getOperand(1) });
     649             :       SelectCode(N);
     650             :       return;
     651             :     }
     652             :   }
     653             :   }
     654             : 
     655             :   SelectCode(N);
     656             : }
     657             : 
     658           0 : bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
     659           0 :   const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
     660           0 :   const Instruction *Term = BB->getTerminator();
     661           0 :   return Term->getMetadata("amdgpu.uniform") ||
     662           0 :          Term->getMetadata("structurizecfg.uniform");
     663             : }
     664             : 
     665           2 : StringRef AMDGPUDAGToDAGISel::getPassName() const {
     666           2 :   return "AMDGPU DAG->DAG Pattern Instruction Selection";
     667             : }
     668             : 
     669             : //===----------------------------------------------------------------------===//
     670             : // Complex Patterns
     671             : //===----------------------------------------------------------------------===//
     672             : 
     673           0 : bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
     674             :                                             SDValue &Offset) {
     675           0 :   return false;
     676             : }
     677             : 
     678           0 : bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
     679             :                                             SDValue &Offset) {
     680             :   ConstantSDNode *C;
     681             :   SDLoc DL(Addr);
     682             : 
     683             :   if ((C = dyn_cast<ConstantSDNode>(Addr))) {
     684           0 :     Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
     685           0 :     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
     686           0 :   } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
     687             :              (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
     688           0 :     Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
     689           0 :     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
     690           0 :   } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
     691             :             (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
     692           0 :     Base = Addr.getOperand(0);
     693           0 :     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
     694             :   } else {
     695           0 :     Base = Addr;
     696           0 :     Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
     697             :   }
     698             : 
     699           0 :   return true;
     700             : }
     701             : 
     702             : // FIXME: Should only handle addcarry/subcarry
     703         126 : void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
     704             :   SDLoc DL(N);
     705         126 :   SDValue LHS = N->getOperand(0);
     706         126 :   SDValue RHS = N->getOperand(1);
     707             : 
     708         126 :   unsigned Opcode = N->getOpcode();
     709         126 :   bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE);
     710             :   bool ProduceCarry =
     711         126 :       ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC;
     712         126 :   bool IsAdd = Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE;
     713             : 
     714         126 :   SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
     715         126 :   SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
     716             : 
     717         252 :   SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
     718             :                                        DL, MVT::i32, LHS, Sub0);
     719         252 :   SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
     720             :                                        DL, MVT::i32, LHS, Sub1);
     721             : 
     722         252 :   SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
     723             :                                        DL, MVT::i32, RHS, Sub0);
     724         252 :   SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
     725             :                                        DL, MVT::i32, RHS, Sub1);
     726             : 
     727         252 :   SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
     728             : 
     729         126 :   unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
     730         126 :   unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
     731             : 
     732             :   SDNode *AddLo;
     733         126 :   if (!ConsumeCarry) {
     734             :     SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
     735         240 :     AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args);
     736             :   } else {
     737           6 :     SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) };
     738          12 :     AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args);
     739             :   }
     740             :   SDValue AddHiArgs[] = {
     741             :     SDValue(Hi0, 0),
     742             :     SDValue(Hi1, 0),
     743             :     SDValue(AddLo, 1)
     744             :   };
     745         252 :   SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs);
     746             : 
     747             :   SDValue RegSequenceArgs[] = {
     748         126 :     CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
     749             :     SDValue(AddLo,0),
     750             :     Sub0,
     751             :     SDValue(AddHi,0),
     752             :     Sub1,
     753         126 :   };
     754         252 :   SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
     755             :                                                MVT::i64, RegSequenceArgs);
     756             : 
     757         126 :   if (ProduceCarry) {
     758             :     // Replace the carry-use
     759         252 :     ReplaceUses(SDValue(N, 1), SDValue(AddHi, 1));
     760             :   }
     761             : 
     762             :   // Replace the remaining uses.
     763         126 :   ReplaceNode(N, RegSequence);
     764         126 : }
     765             : 
     766           0 : void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
     767             :   // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
     768             :   // carry out despite the _i32 name. These were renamed in VI to _U32.
     769             :   // FIXME: We should probably rename the opcodes here.
     770           0 :   unsigned Opc = N->getOpcode() == ISD::UADDO ?
     771             :     AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
     772             : 
     773           0 :   CurDAG->SelectNodeTo(N, Opc, N->getVTList(),
     774           0 :                        { N->getOperand(0), N->getOperand(1) });
     775           0 : }
     776             : 
     777         240 : void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
     778             :   SDLoc SL(N);
     779             :   //  src0_modifiers, src0,  src1_modifiers, src1, src2_modifiers, src2, clamp, omod
     780         240 :   SDValue Ops[10];
     781             : 
     782         480 :   SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
     783         480 :   SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
     784         480 :   SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]);
     785         240 :   Ops[8] = N->getOperand(0);
     786         240 :   Ops[9] = N->getOperand(4);
     787             : 
     788         720 :   CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32, N->getVTList(), Ops);
     789         240 : }
     790             : 
     791          48 : void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {
     792             :   SDLoc SL(N);
     793             :   //    src0_modifiers, src0,  src1_modifiers, src1, clamp, omod
     794          48 :   SDValue Ops[8];
     795             : 
     796          96 :   SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
     797          96 :   SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
     798          48 :   Ops[6] = N->getOperand(0);
     799          48 :   Ops[7] = N->getOperand(3);
     800             : 
     801         144 :   CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops);
     802          48 : }
     803             : 
     804             : // We need to handle this here because tablegen doesn't support matching
     805             : // instructions with multiple outputs.
     806           0 : void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
     807             :   SDLoc SL(N);
     808           0 :   EVT VT = N->getValueType(0);
     809             : 
     810             :   assert(VT == MVT::f32 || VT == MVT::f64);
     811             : 
     812             :   unsigned Opc
     813           0 :     = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
     814             : 
     815           0 :   SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
     816           0 :   CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
     817           0 : }
     818             : 
     819             : // We need to handle this here because tablegen doesn't support matching
     820             : // instructions with multiple outputs.
     821           0 : void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
     822             :   SDLoc SL(N);
     823           0 :   bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32;
     824           0 :   unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32 : AMDGPU::V_MAD_U64_U32;
     825             : 
     826           0 :   SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
     827           0 :   SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
     828           0 :                     Clamp };
     829           0 :   CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
     830           0 : }
     831             : 
     832           0 : bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
     833             :                                          unsigned OffsetBits) const {
     834           0 :   if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
     835           0 :       (OffsetBits == 8 && !isUInt<8>(Offset)))
     836           0 :     return false;
     837             : 
     838           0 :   if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS ||
     839           0 :       Subtarget->unsafeDSOffsetFoldingEnabled())
     840           0 :     return true;
     841             : 
     842             :   // On Southern Islands instruction with a negative base value and an offset
     843             :   // don't seem to work.
     844           0 :   return CurDAG->SignBitIsZero(Base);
     845             : }
     846             : 
     847       11081 : bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
     848             :                                               SDValue &Offset) const {
     849             :   SDLoc DL(Addr);
     850       11081 :   if (CurDAG->isBaseWithConstantOffset(Addr)) {
     851        7665 :     SDValue N0 = Addr.getOperand(0);
     852        7665 :     SDValue N1 = Addr.getOperand(1);
     853             :     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
     854       15330 :     if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) {
     855             :       // (add n0, c0)
     856        7216 :       Base = N0;
     857        7216 :       Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
     858        7216 :       return true;
     859             :     }
     860        3416 :   } else if (Addr.getOpcode() == ISD::SUB) {
     861             :     // sub C, x -> add (sub 0, x), C
     862             :     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
     863          18 :       int64_t ByteOffset = C->getSExtValue();
     864          18 :       if (isUInt<16>(ByteOffset)) {
     865          16 :         SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
     866             : 
     867             :         // XXX - This is kind of hacky. Create a dummy sub node so we can check
     868             :         // the known bits in isDSOffsetLegal. We need to emit the selected node
     869             :         // here, so this is thrown away.
     870          16 :         SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
     871          16 :                                       Zero, Addr.getOperand(1));
     872             : 
     873          16 :         if (isDSOffsetLegal(Sub, ByteOffset, 16)) {
     874             :           // FIXME: Select to VOP3 version for with-carry.
     875          14 :           unsigned SubOp = Subtarget->hasAddNoCarry() ?
     876             :             AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32;
     877             : 
     878             :           MachineSDNode *MachineSub
     879          28 :             = CurDAG->getMachineNode(SubOp, DL, MVT::i32,
     880             :                                      Zero, Addr.getOperand(1));
     881             : 
     882          14 :           Base = SDValue(MachineSub, 0);
     883          14 :           Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
     884          14 :           return true;
     885             :         }
     886             :       }
     887             :     }
     888             :   } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
     889             :     // If we have a constant address, prefer to put the constant into the
     890             :     // offset. This can save moves to load the constant address since multiple
     891             :     // operations can share the zero base address register, and enables merging
     892             :     // into read2 / write2 instructions.
     893             : 
     894             :     SDLoc DL(Addr);
     895             : 
     896        1500 :     if (isUInt<16>(CAddr->getZExtValue())) {
     897         746 :       SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
     898        1492 :       MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
     899             :                                  DL, MVT::i32, Zero);
     900         746 :       Base = SDValue(MovZero, 0);
     901        1492 :       Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
     902             :       return true;
     903             :     }
     904             :   }
     905             : 
     906             :   // default case
     907        3105 :   Base = Addr;
     908        6210 :   Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
     909        3105 :   return true;
     910             : }
     911             : 
     912             : // TODO: If offset is too big, put low 16-bit into offset.
     913         554 : bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
     914             :                                                    SDValue &Offset0,
     915             :                                                    SDValue &Offset1) const {
     916             :   SDLoc DL(Addr);
     917             : 
     918         554 :   if (CurDAG->isBaseWithConstantOffset(Addr)) {
     919         437 :     SDValue N0 = Addr.getOperand(0);
     920         437 :     SDValue N1 = Addr.getOperand(1);
     921             :     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
     922         437 :     unsigned DWordOffset0 = C1->getZExtValue() / 4;
     923         437 :     unsigned DWordOffset1 = DWordOffset0 + 1;
     924             :     // (add n0, c0)
     925         437 :     if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
     926         423 :       Base = N0;
     927         423 :       Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
     928         423 :       Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
     929         423 :       return true;
     930             :     }
     931         117 :   } else if (Addr.getOpcode() == ISD::SUB) {
     932             :     // sub C, x -> add (sub 0, x), C
     933             :     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
     934           4 :       unsigned DWordOffset0 = C->getZExtValue() / 4;
     935           4 :       unsigned DWordOffset1 = DWordOffset0 + 1;
     936             : 
     937           4 :       if (isUInt<8>(DWordOffset0)) {
     938             :         SDLoc DL(Addr);
     939           4 :         SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
     940             : 
     941             :         // XXX - This is kind of hacky. Create a dummy sub node so we can check
     942             :         // the known bits in isDSOffsetLegal. We need to emit the selected node
     943             :         // here, so this is thrown away.
     944           4 :         SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
     945           4 :                                       Zero, Addr.getOperand(1));
     946             : 
     947           4 :         if (isDSOffsetLegal(Sub, DWordOffset1, 8)) {
     948           2 :           unsigned SubOp = Subtarget->hasAddNoCarry() ?
     949             :             AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32;
     950             : 
     951             :           MachineSDNode *MachineSub
     952           4 :             = CurDAG->getMachineNode(SubOp, DL, MVT::i32,
     953             :                                      Zero, Addr.getOperand(1));
     954             : 
     955           2 :           Base = SDValue(MachineSub, 0);
     956           2 :           Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
     957           2 :           Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
     958             :           return true;
     959             :         }
     960             :       }
     961             :     }
     962             :   } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
     963          26 :     unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
     964          26 :     unsigned DWordOffset1 = DWordOffset0 + 1;
     965             :     assert(4 * DWordOffset0 == CAddr->getZExtValue());
     966             : 
     967          26 :     if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
     968          18 :       SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
     969             :       MachineSDNode *MovZero
     970          36 :         = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
     971             :                                  DL, MVT::i32, Zero);
     972          18 :       Base = SDValue(MovZero, 0);
     973          18 :       Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
     974          18 :       Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
     975             :       return true;
     976             :     }
     977             :   }
     978             : 
     979             :   // default case
     980             : 
     981         111 :   Base = Addr;
     982         111 :   Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
     983         111 :   Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
     984         111 :   return true;
     985             : }
     986             : 
     987       45247 : bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
     988             :                                      SDValue &VAddr, SDValue &SOffset,
     989             :                                      SDValue &Offset, SDValue &Offen,
     990             :                                      SDValue &Idxen, SDValue &Addr64,
     991             :                                      SDValue &GLC, SDValue &SLC,
     992             :                                      SDValue &TFE) const {
     993             :   // Subtarget prefers to use flat instruction
     994       45247 :   if (Subtarget->useFlatForGlobal())
     995             :     return false;
     996             : 
     997             :   SDLoc DL(Addr);
     998             : 
     999       31369 :   if (!GLC.getNode())
    1000       31369 :     GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
    1001       31369 :   if (!SLC.getNode())
    1002       31150 :     SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
    1003       31369 :   TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
    1004             : 
    1005       31369 :   Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
    1006       31369 :   Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
    1007       31369 :   Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
    1008       31369 :   SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
    1009             : 
    1010             :   ConstantSDNode *C1 = nullptr;
    1011       31369 :   SDValue N0 = Addr;
    1012       31369 :   if (CurDAG->isBaseWithConstantOffset(Addr)) {
    1013             :     C1 = cast<ConstantSDNode>(Addr.getOperand(1));
    1014       17800 :     if (isUInt<32>(C1->getZExtValue()))
    1015        8890 :       N0 = Addr.getOperand(0);
    1016             :     else
    1017             :       C1 = nullptr;
    1018             :   }
    1019             : 
    1020       31369 :   if (N0.getOpcode() == ISD::ADD) {
    1021             :     // (add N2, N3) -> addr64, or
    1022             :     // (add (add N2, N3), C1) -> addr64
    1023        4534 :     SDValue N2 = N0.getOperand(0);
    1024        4534 :     SDValue N3 = N0.getOperand(1);
    1025        4534 :     Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
    1026             : 
    1027        4534 :     if (N2->isDivergent()) {
    1028          14 :       if (N3->isDivergent()) {
    1029             :         // Both N2 and N3 are divergent. Use N0 (the result of the add) as the
    1030             :         // addr64, and construct the resource from a 0 address.
    1031           5 :         Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
    1032           5 :         VAddr = N0;
    1033             :       } else {
    1034             :         // N2 is divergent, N3 is not.
    1035           9 :         Ptr = N3;
    1036           9 :         VAddr = N2;
    1037             :       }
    1038             :     } else {
    1039             :       // N2 is not divergent.
    1040        4520 :       Ptr = N2;
    1041        4520 :       VAddr = N3;
    1042             :     }
    1043        4534 :     Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
    1044       26835 :   } else if (N0->isDivergent()) {
    1045             :     // N0 is divergent. Use it as the addr64, and construct the resource from a
    1046             :     // 0 address.
    1047          48 :     Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
    1048          48 :     VAddr = N0;
    1049          48 :     Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
    1050             :   } else {
    1051             :     // N0 -> offset, or
    1052             :     // (N0 + C1) -> offset
    1053       26787 :     VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
    1054       26787 :     Ptr = N0;
    1055             :   }
    1056             : 
    1057       31369 :   if (!C1) {
    1058             :     // No offset.
    1059       22479 :     Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
    1060       22479 :     return true;
    1061             :   }
    1062             : 
    1063       17780 :   if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) {
    1064             :     // Legal offset for instruction.
    1065        8605 :     Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
    1066        8605 :     return true;
    1067             :   }
    1068             : 
    1069             :   // Illegal offset, store it in soffset.
    1070         285 :   Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
    1071         285 :   SOffset =
    1072         855 :       SDValue(CurDAG->getMachineNode(
    1073             :                   AMDGPU::S_MOV_B32, DL, MVT::i32,
    1074             :                   CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
    1075             :               0);
    1076         285 :   return true;
    1077             : }
    1078             : 
    1079       30954 : bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
    1080             :                                            SDValue &VAddr, SDValue &SOffset,
    1081             :                                            SDValue &Offset, SDValue &GLC,
    1082             :                                            SDValue &SLC, SDValue &TFE) const {
    1083       30954 :   SDValue Ptr, Offen, Idxen, Addr64;
    1084             : 
    1085             :   // addr64 bit was removed for volcanic islands.
    1086       30954 :   if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
    1087             :     return false;
    1088             : 
    1089       16184 :   if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
    1090             :               GLC, SLC, TFE))
    1091             :     return false;
    1092             : 
    1093             :   ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
    1094       26290 :   if (C->getSExtValue()) {
    1095             :     SDLoc DL(Addr);
    1096             : 
    1097             :     const SITargetLowering& Lowering =
    1098        3633 :       *static_cast<const SITargetLowering*>(getTargetLowering());
    1099             : 
    1100        3633 :     SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
    1101             :     return true;
    1102             :   }
    1103             : 
    1104             :   return false;
    1105             : }
    1106             : 
    1107         617 : bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
    1108             :                                            SDValue &VAddr, SDValue &SOffset,
    1109             :                                            SDValue &Offset,
    1110             :                                            SDValue &SLC) const {
    1111        1234 :   SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
    1112         617 :   SDValue GLC, TFE;
    1113             : 
    1114         617 :   return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE);
    1115             : }
    1116             : 
    1117             : static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
    1118             :   auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>();
    1119           0 :   return PSV && PSV->isStack();
    1120             : }
    1121             : 
    1122           0 : std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
    1123           0 :   const MachineFunction &MF = CurDAG->getMachineFunction();
    1124             :   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
    1125             : 
    1126             :   if (auto FI = dyn_cast<FrameIndexSDNode>(N)) {
    1127           0 :     SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
    1128           0 :                                               FI->getValueType(0));
    1129             : 
    1130             :     // If we can resolve this to a frame index access, this is relative to the
    1131             :     // frame pointer SGPR.
    1132           0 :     return std::make_pair(TFI, CurDAG->getRegister(Info->getFrameOffsetReg(),
    1133           0 :                                                    MVT::i32));
    1134             :   }
    1135             : 
    1136             :   // If we don't know this private access is a local stack object, it needs to
    1137             :   // be relative to the entry point's scratch wave offset register.
    1138           0 :   return std::make_pair(N, CurDAG->getRegister(Info->getScratchWaveOffsetReg(),
    1139           0 :                                                MVT::i32));
    1140             : }
    1141             : 
    1142        6105 : bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
    1143             :                                                  SDValue Addr, SDValue &Rsrc,
    1144             :                                                  SDValue &VAddr, SDValue &SOffset,
    1145             :                                                  SDValue &ImmOffset) const {
    1146             : 
    1147             :   SDLoc DL(Addr);
    1148        6105 :   MachineFunction &MF = CurDAG->getMachineFunction();
    1149        6105 :   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
    1150             : 
    1151       12210 :   Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
    1152             : 
    1153             :   if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
    1154           6 :     unsigned Imm = CAddr->getZExtValue();
    1155             : 
    1156           6 :     SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32);
    1157          12 :     MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
    1158             :                                                         DL, MVT::i32, HighBits);
    1159           6 :     VAddr = SDValue(MovHighBits, 0);
    1160             : 
    1161             :     // In a call sequence, stores to the argument stack area are relative to the
    1162             :     // stack pointer.
    1163           6 :     const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
    1164             :     unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
    1165           6 :       Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg();
    1166             : 
    1167          12 :     SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
    1168           6 :     ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
    1169             :     return true;
    1170             :   }
    1171             : 
    1172        6099 :   if (CurDAG->isBaseWithConstantOffset(Addr)) {
    1173             :     // (add n0, c1)
    1174             : 
    1175        4888 :     SDValue N0 = Addr.getOperand(0);
    1176        4888 :     SDValue N1 = Addr.getOperand(1);
    1177             : 
    1178             :     // Offsets in vaddr must be positive if range checking is enabled.
    1179             :     //
    1180             :     // The total computation of vaddr + soffset + offset must not overflow.  If
    1181             :     // vaddr is negative, even if offset is 0 the sgpr offset add will end up
    1182             :     // overflowing.
    1183             :     //
    1184             :     // Prior to gfx9, MUBUF instructions with the vaddr offset enabled would
    1185             :     // always perform a range check. If a negative vaddr base index was used,
    1186             :     // this would fail the range check. The overall address computation would
    1187             :     // compute a valid address, but this doesn't happen due to the range
    1188             :     // check. For out-of-bounds MUBUF loads, a 0 is returned.
    1189             :     //
    1190             :     // Therefore it should be safe to fold any VGPR offset on gfx9 into the
    1191             :     // MUBUF vaddr, but not on older subtargets which can only do this if the
    1192             :     // sign bit is known 0.
    1193             :     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
    1194        9776 :     if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue()) &&
    1195        8542 :         (!Subtarget->privateMemoryResourceIsRangeChecked() ||
    1196        3668 :          CurDAG->SignBitIsZero(N0))) {
    1197        4271 :       std::tie(VAddr, SOffset) = foldFrameIndex(N0);
    1198        4271 :       ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
    1199        4271 :       return true;
    1200             :     }
    1201             :   }
    1202             : 
    1203             :   // (node)
    1204        1828 :   std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
    1205        1828 :   ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
    1206        1828 :   return true;
    1207             : }
    1208             : 
    1209           0 : bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
    1210             :                                                   SDValue Addr,
    1211             :                                                   SDValue &SRsrc,
    1212             :                                                   SDValue &SOffset,
    1213             :                                                   SDValue &Offset) const {
    1214             :   ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr);
    1215           0 :   if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue()))
    1216           0 :     return false;
    1217             : 
    1218           0 :   SDLoc DL(Addr);
    1219           0 :   MachineFunction &MF = CurDAG->getMachineFunction();
    1220           0 :   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
    1221             : 
    1222           0 :   SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
    1223             : 
    1224           0 :   const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
    1225             :   unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
    1226           0 :     Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg();
    1227             : 
    1228             :   // FIXME: Get from MachinePointerInfo? We should only be using the frame
    1229             :   // offset if we know this is in a call sequence.
    1230           0 :   SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
    1231             : 
    1232           0 :   Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
    1233             :   return true;
    1234             : }
    1235             : 
    1236       29063 : bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
    1237             :                                            SDValue &SOffset, SDValue &Offset,
    1238             :                                            SDValue &GLC, SDValue &SLC,
    1239             :                                            SDValue &TFE) const {
    1240       29063 :   SDValue Ptr, VAddr, Offen, Idxen, Addr64;
    1241             :   const SIInstrInfo *TII =
    1242       29063 :     static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
    1243             : 
    1244       29063 :   if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
    1245             :               GLC, SLC, TFE))
    1246             :     return false;
    1247             : 
    1248       36448 :   if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
    1249       36448 :       !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
    1250       18224 :       !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
    1251       17275 :     uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
    1252       17275 :                     APInt::getAllOnesValue(32).getZExtValue(); // Size
    1253             :     SDLoc DL(Addr);
    1254             : 
    1255             :     const SITargetLowering& Lowering =
    1256       17275 :       *static_cast<const SITargetLowering*>(getTargetLowering());
    1257             : 
    1258       17275 :     SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
    1259             :     return true;
    1260             :   }
    1261             :   return false;
    1262             : }
    1263             : 
    1264           8 : bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
    1265             :                                            SDValue &Soffset, SDValue &Offset
    1266             :                                            ) const {
    1267           8 :   SDValue GLC, SLC, TFE;
    1268             : 
    1269           8 :   return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
    1270             : }
    1271             : bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
    1272             :                                            SDValue &Soffset, SDValue &Offset,
    1273             :                                            SDValue &SLC) const {
    1274         552 :   SDValue GLC, TFE;
    1275             : 
    1276         552 :   return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
    1277             : }
    1278             : 
    1279             : template <bool IsSigned>
    1280           0 : bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDValue Addr,
    1281             :                                           SDValue &VAddr,
    1282             :                                           SDValue &Offset,
    1283             :                                           SDValue &SLC) const {
    1284             :   int64_t OffsetVal = 0;
    1285             : 
    1286           0 :   if (Subtarget->hasFlatInstOffsets() &&
    1287           0 :       CurDAG->isBaseWithConstantOffset(Addr)) {
    1288           0 :     SDValue N0 = Addr.getOperand(0);
    1289           0 :     SDValue N1 = Addr.getOperand(1);
    1290           0 :     int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
    1291             : 
    1292           0 :     if ((IsSigned && isInt<13>(COffsetVal)) ||
    1293           0 :         (!IsSigned && isUInt<12>(COffsetVal))) {
    1294           0 :       Addr = N0;
    1295             :       OffsetVal = COffsetVal;
    1296             :     }
    1297             :   }
    1298             : 
    1299           0 :   VAddr = Addr;
    1300           0 :   Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
    1301           0 :   SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1);
    1302             : 
    1303           0 :   return true;
    1304             : }
    1305           0 : 
    1306             : bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDValue Addr,
    1307             :                                           SDValue &VAddr,
    1308             :                                           SDValue &Offset,
    1309             :                                           SDValue &SLC) const {
    1310             :   return SelectFlatOffset<false>(Addr, VAddr, Offset, SLC);
    1311           0 : }
    1312           0 : 
    1313           0 : bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDValue Addr,
    1314           0 :                                           SDValue &VAddr,
    1315           0 :                                           SDValue &Offset,
    1316             :                                           SDValue &SLC) const {
    1317           0 :   return SelectFlatOffset<true>(Addr, VAddr, Offset, SLC);
    1318           0 : }
    1319           0 : 
    1320             : bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
    1321             :                                           SDValue &Offset, bool &Imm) const {
    1322             : 
    1323             :   // FIXME: Handle non-constant offsets.
    1324           0 :   ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
    1325           0 :   if (!C)
    1326           0 :     return false;
    1327             : 
    1328           0 :   SDLoc SL(ByteOffsetNode);
    1329             :   GCNSubtarget::Generation Gen = Subtarget->getGeneration();
    1330           0 :   int64_t ByteOffset = C->getSExtValue();
    1331             :   int64_t EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset);
    1332             : 
    1333             :   if (AMDGPU::isLegalSMRDImmOffset(*Subtarget, ByteOffset)) {
    1334             :     Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
    1335             :     Imm = true;
    1336           0 :     return true;
    1337           0 :   }
    1338           0 : 
    1339           0 :   if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset))
    1340           0 :     return false;
    1341             : 
    1342           0 :   if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) {
    1343             :     // 32-bit Immediates are supported on Sea Islands.
    1344           0 :     Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
    1345             :   } else {
    1346             :     SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
    1347             :     Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32,
    1348             :                                             C32Bit), 0);
    1349           0 :   }
    1350           0 :   Imm = false;
    1351           0 :   return true;
    1352             : }
    1353           0 : 
    1354             : SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {
    1355             :   if (Addr.getValueType() != MVT::i32)
    1356             :     return Addr;
    1357             : 
    1358             :   // Zero-extend a 32-bit address.
    1359             :   SDLoc SL(Addr);
    1360        1004 : 
    1361             :   const MachineFunction &MF = CurDAG->getMachineFunction();
    1362             :   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
    1363             :   unsigned AddrHiVal = Info->get32BitAddressHighBits();
    1364             :   SDValue AddrHi = CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32);
    1365             : 
    1366             :   const SDValue Ops[] = {
    1367         213 :     CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32),
    1368             :     Addr,
    1369             :     CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
    1370           0 :     SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi),
    1371             :             0),
    1372             :     CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32),
    1373             :   };
    1374             : 
    1375             :   return SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64,
    1376           0 :                                         Ops), 0);
    1377             : }
    1378           0 : 
    1379           0 : bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
    1380           0 :                                      SDValue &Offset, bool &Imm) const {
    1381           0 :   SDLoc SL(Addr);
    1382             : 
    1383           0 :   // A 32-bit (address + offset) should not cause unsigned 32-bit integer
    1384           0 :   // wraparound, because s_load instructions perform the addition in 64 bits.
    1385           0 :   if ((Addr.getValueType() != MVT::i32 ||
    1386           0 :        Addr->getFlags().hasNoUnsignedWrap()) &&
    1387             :       CurDAG->isBaseWithConstantOffset(Addr)) {
    1388             :     SDValue N0 = Addr.getOperand(0);
    1389           0 :     SDValue N1 = Addr.getOperand(1);
    1390           0 : 
    1391             :     if (SelectSMRDOffset(N1, Offset, Imm)) {
    1392           0 :       SBase = Expand32BitAddress(N0);
    1393             :       return true;
    1394           0 :     }
    1395             :   }
    1396           0 :   SBase = Expand32BitAddress(Addr);
    1397           0 :   Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
    1398             :   Imm = true;
    1399             :   return true;
    1400           0 : }
    1401           0 : 
    1402             : bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
    1403             :                                        SDValue &Offset) const {
    1404           0 :   bool Imm;
    1405           0 :   return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
    1406           0 : }
    1407             : 
    1408             : bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
    1409           0 :                                          SDValue &Offset) const {
    1410             : 
    1411           0 :   if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
    1412             :     return false;
    1413           0 : 
    1414           0 :   bool Imm;
    1415             :   if (!SelectSMRD(Addr, SBase, Offset, Imm))
    1416             :     return false;
    1417           0 : 
    1418             :   return !Imm && isa<ConstantSDNode>(Offset);
    1419           0 : }
    1420           0 : 
    1421             : bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
    1422           0 :                                         SDValue &Offset) const {
    1423           0 :   bool Imm;
    1424             :   return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
    1425           0 :          !isa<ConstantSDNode>(Offset);
    1426           0 : }
    1427             : 
    1428             : bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
    1429       26722 :                                              SDValue &Offset) const {
    1430             :   bool Imm;
    1431             :   return SelectSMRDOffset(Addr, Offset, Imm) && Imm;
    1432             : }
    1433             : 
    1434             : bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
    1435         118 :                                                SDValue &Offset) const {
    1436       26770 :   if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
    1437       26652 :     return false;
    1438       22750 : 
    1439       22750 :   bool Imm;
    1440             :   if (!SelectSMRDOffset(Addr, Offset, Imm))
    1441       22750 :     return false;
    1442       22738 : 
    1443       22738 :   return !Imm && isa<ConstantSDNode>(Offset);
    1444             : }
    1445             : 
    1446        3984 : bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
    1447        3984 :                                             SDValue &Base,
    1448        3984 :                                             SDValue &Offset) const {
    1449        3984 :   SDLoc DL(Index);
    1450             : 
    1451             :   if (CurDAG->isBaseWithConstantOffset(Index)) {
    1452             :     SDValue N0 = Index.getOperand(0);
    1453             :     SDValue N1 = Index.getOperand(1);
    1454             :     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
    1455       26695 : 
    1456             :     // (add n0, c0)
    1457             :     Base = N0;
    1458           9 :     Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
    1459             :     return true;
    1460             :   }
    1461           9 : 
    1462             :   if (isa<ConstantSDNode>(Index))
    1463             :     return false;
    1464             : 
    1465           9 :   Base = Index;
    1466             :   Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
    1467             :   return true;
    1468           9 : }
    1469             : 
    1470             : SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL,
    1471          18 :                                      SDValue Val, uint32_t Offset,
    1472             :                                      uint32_t Width) {
    1473             :   // Transformation function, pack the offset and width of a BFE into
    1474          18 :   // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
    1475          18 :   // source, bits [5:0] contain the offset and bits [22:16] the width.
    1476             :   uint32_t PackedVal = Offset | (Width << 16);
    1477             :   SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
    1478           0 : 
    1479             :   return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
    1480             : }
    1481         464 : 
    1482             : void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
    1483             :   // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
    1484           0 :   // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
    1485             :   // Predicate: 0 < b <= c < 32
    1486           0 : 
    1487           0 :   const SDValue &Shl = N->getOperand(0);
    1488             :   ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
    1489             :   ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
    1490           0 : 
    1491           0 :   if (B && C) {
    1492             :     uint32_t BVal = B->getZExtValue();
    1493           0 :     uint32_t CVal = C->getZExtValue();
    1494             : 
    1495             :     if (0 < BVal && BVal <= CVal && CVal < 32) {
    1496           0 :       bool Signed = N->getOpcode() == ISD::SRA;
    1497             :       unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
    1498             : 
    1499           0 :       ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal,
    1500             :                               32 - CVal));
    1501           0 :       return;
    1502           0 :     }
    1503           0 :   }
    1504             :   SelectCode(N);
    1505             : }
    1506             : 
    1507           0 : void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
    1508           0 :   switch (N->getOpcode()) {
    1509           0 :   case ISD::AND:
    1510             :     if (N->getOperand(0).getOpcode() == ISD::SRL) {
    1511             :       // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
    1512             :       // Predicate: isMask(mask)
    1513           0 :       const SDValue &Srl = N->getOperand(0);
    1514             :       ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
    1515           0 :       ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
    1516           0 : 
    1517           0 :       if (Shift && Mask) {
    1518             :         uint32_t ShiftVal = Shift->getZExtValue();
    1519             :         uint32_t MaskVal = Mask->getZExtValue();
    1520           0 : 
    1521             :         if (isMask_32(MaskVal)) {
    1522             :           uint32_t WidthVal = countPopulation(MaskVal);
    1523             : 
    1524             :           ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
    1525             :                                   Srl.getOperand(0), ShiftVal, WidthVal));
    1526           0 :           return;
    1527           0 :         }
    1528             :       }
    1529           0 :     }
    1530             :     break;
    1531             :   case ISD::SRL:
    1532         411 :     if (N->getOperand(0).getOpcode() == ISD::AND) {
    1533             :       // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
    1534             :       // Predicate: isMask(mask >> b)
    1535             :       const SDValue &And = N->getOperand(0);
    1536             :       ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
    1537         411 :       ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
    1538         411 : 
    1539             :       if (Shift && Mask) {
    1540             :         uint32_t ShiftVal = Shift->getZExtValue();
    1541         411 :         uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
    1542         391 : 
    1543         391 :         if (isMask_32(MaskVal)) {
    1544             :           uint32_t WidthVal = countPopulation(MaskVal);
    1545         391 : 
    1546         380 :           ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
    1547         380 :                                   And.getOperand(0), ShiftVal, WidthVal));
    1548             :           return;
    1549         380 :         }
    1550             :       }
    1551         380 :     } else if (N->getOperand(0).getOpcode() == ISD::SHL) {
    1552             :       SelectS_BFEFromShifts(N);
    1553             :       return;
    1554             :     }
    1555             :     break;
    1556             :   case ISD::SRA:
    1557       21470 :     if (N->getOperand(0).getOpcode() == ISD::SHL) {
    1558       42940 :       SelectS_BFEFromShifts(N);
    1559        7495 :       return;
    1560       14990 :     }
    1561             :     break;
    1562             : 
    1563             :   case ISD::SIGN_EXTEND_INREG: {
    1564             :     // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
    1565             :     SDValue Src = N->getOperand(0);
    1566             :     if (Src.getOpcode() != ISD::SRL)
    1567        2336 :       break;
    1568        2321 : 
    1569        4642 :     const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
    1570             :     if (!Amt)
    1571             :       break;
    1572             : 
    1573             :     unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
    1574        2143 :     ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0),
    1575             :                             Amt->getZExtValue(), Width));
    1576        2143 :     return;
    1577             :   }
    1578             :   }
    1579             : 
    1580             :   SelectCode(N);
    1581        6844 : }
    1582       13688 : 
    1583             : bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
    1584             :   assert(N->getOpcode() == ISD::BRCOND);
    1585             :   if (!N->hasOneUse())
    1586             :     return false;
    1587         803 : 
    1588             :   SDValue Cond = N->getOperand(1);
    1589         803 :   if (Cond.getOpcode() == ISD::CopyToReg)
    1590         800 :     Cond = Cond.getOperand(2);
    1591        1600 : 
    1592             :   if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
    1593             :     return false;
    1594             : 
    1595             :   MVT VT = Cond.getOperand(0).getSimpleValueType();
    1596         800 :   if (VT == MVT::i32)
    1597             :     return true;
    1598         800 : 
    1599             :   if (VT == MVT::i64) {
    1600             :     auto ST = static_cast<const GCNSubtarget *>(Subtarget);
    1601        6041 : 
    1602          16 :     ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
    1603          16 :     return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64();
    1604             :   }
    1605             : 
    1606        2493 :   return false;
    1607        4986 : }
    1608         395 : 
    1609         395 : void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
    1610             :   SDValue Cond = N->getOperand(1);
    1611             : 
    1612             :   if (Cond.isUndef()) {
    1613        4638 :     CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other,
    1614             :                          N->getOperand(2), N->getOperand(0));
    1615        4638 :     return;
    1616        4638 :   }
    1617             : 
    1618             :   bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N);
    1619             :   unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ;
    1620             :   unsigned CondReg = UseSCCBr ? AMDGPU::SCC : AMDGPU::VCC;
    1621             :   SDLoc SL(N);
    1622             : 
    1623        1401 :   if (!UseSCCBr) {
    1624        2802 :     // This is the case that we are selecting to S_CBRANCH_VCCNZ.  We have not
    1625        1401 :     // analyzed what generates the vcc value, so we do not know whether vcc
    1626             :     // bits for disabled lanes are 0.  Thus we need to mask out bits for
    1627             :     // disabled lanes.
    1628             :     //
    1629             :     // For the case that we select S_CBRANCH_SCC1 and it gets
    1630             :     // changed to S_CBRANCH_VCCNZ in SIFixSGPRCopies, SIFixSGPRCopies calls
    1631             :     // SIInstrInfo::moveToVALU which inserts the S_AND).
    1632             :     //
    1633           0 :     // We could add an analysis of what generates the vcc value here and omit
    1634             :     // the S_AND when is unnecessary. But it would be better to add a separate
    1635             :     // pass after SIFixSGPRCopies to do the unnecessary S_AND removal, so it
    1636           0 :     // catches both cases.
    1637             :     Cond = SDValue(CurDAG->getMachineNode(AMDGPU::S_AND_B64, SL, MVT::i1,
    1638           0 :                                CurDAG->getRegister(AMDGPU::EXEC, MVT::i1),
    1639           0 :                                Cond),
    1640           0 :                    0);
    1641             :   }
    1642           0 : 
    1643           0 :   SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond);
    1644             :   CurDAG->SelectNodeTo(N, BrOp, MVT::Other,
    1645             :                        N->getOperand(2), // Basic Block
    1646           0 :                        VCC.getValue(0));
    1647           0 : }
    1648             : 
    1649           0 : void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) {
    1650           0 :   MVT VT = N->getSimpleValueType(0);
    1651             :   bool IsFMA = N->getOpcode() == ISD::FMA;
    1652           0 :   if (VT != MVT::f32 || (!Subtarget->hasMadMixInsts() &&
    1653           0 :                          !Subtarget->hasFmaMixInsts()) ||
    1654             :       ((IsFMA && Subtarget->hasMadMixInsts()) ||
    1655             :        (!IsFMA && Subtarget->hasFmaMixInsts()))) {
    1656             :     SelectCode(N);
    1657             :     return;
    1658             :   }
    1659         576 : 
    1660         576 :   SDValue Src0 = N->getOperand(0);
    1661             :   SDValue Src1 = N->getOperand(1);
    1662         576 :   SDValue Src2 = N->getOperand(2);
    1663         156 :   unsigned Src0Mods, Src1Mods, Src2Mods;
    1664             : 
    1665          78 :   // Avoid using v_mad_mix_f32/v_fma_mix_f32 unless there is actually an operand
    1666             :   // using the conversion from f16.
    1667             :   bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods);
    1668         498 :   bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods);
    1669             :   bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods);
    1670         498 : 
    1671             :   assert((IsFMA || !Subtarget->hasFP32Denormals()) &&
    1672             :          "fmad selected with denormals enabled");
    1673         498 :   // TODO: We can select this with f32 denormals enabled if all the sources are
    1674             :   // converted from f16 (in which case fmad isn't legal).
    1675             : 
    1676             :   if (Sel0 || Sel1 || Sel2) {
    1677             :     // For dummy operands.
    1678             :     SDValue Zero = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
    1679             :     SDValue Ops[] = {
    1680             :       CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0,
    1681             :       CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1,
    1682             :       CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2,
    1683             :       CurDAG->getTargetConstant(0, SDLoc(), MVT::i1),
    1684             :       Zero, Zero
    1685             :     };
    1686             : 
    1687         248 :     CurDAG->SelectNodeTo(N,
    1688             :                          IsFMA ? AMDGPU::V_FMA_MIX_F32 : AMDGPU::V_MAD_MIX_F32,
    1689             :                          MVT::f32, Ops);
    1690             :   } else {
    1691             :     SelectCode(N);
    1692             :   }
    1693         996 : }
    1694         498 : 
    1695         498 : // This is here because there isn't a way to use the generated sub0_sub1 as the
    1696             : // subreg index to EXTRACT_SUBREG in tablegen.
    1697             : void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
    1698             :   MemSDNode *Mem = cast<MemSDNode>(N);
    1699        2666 :   unsigned AS = Mem->getAddressSpace();
    1700             :   if (AS == AMDGPUAS::FLAT_ADDRESS) {
    1701        2666 :     SelectCode(N);
    1702        2038 :     return;
    1703        4704 :   }
    1704         281 : 
    1705         135 :   MVT VT = N->getSimpleValueType(0);
    1706             :   bool Is32 = (VT == MVT::i32);
    1707        2477 :   SDLoc SL(N);
    1708             : 
    1709             :   MachineSDNode *CmpSwap = nullptr;
    1710         189 :   if (Subtarget->hasAddr64()) {
    1711         189 :     SDValue SRsrc, VAddr, SOffset, Offset, SLC;
    1712         189 : 
    1713             :     if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) {
    1714             :       unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN :
    1715             :         AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN;
    1716             :       SDValue CmpVal = Mem->getOperand(2);
    1717         189 : 
    1718         189 :       // XXX - Do we care about glue operands?
    1719         189 : 
    1720             :       SDValue Ops[] = {
    1721             :         CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain()
    1722             :       };
    1723             : 
    1724             :       CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
    1725             :     }
    1726         189 :   }
    1727             : 
    1728         118 :   if (!CmpSwap) {
    1729             :     SDValue SRsrc, SOffset, Offset, SLC;
    1730         118 :     if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) {
    1731         118 :       unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN :
    1732         118 :         AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN;
    1733         118 : 
    1734             :       SDValue CmpVal = Mem->getOperand(2);
    1735         354 :       SDValue Ops[] = {
    1736             :         CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain()
    1737         301 :       };
    1738             : 
    1739         118 :       CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
    1740             :     }
    1741             :   }
    1742             : 
    1743             :   if (!CmpSwap) {
    1744             :     SelectCode(N);
    1745             :     return;
    1746             :   }
    1747         197 : 
    1748             :   MachineMemOperand *MMO = Mem->getMemOperand();
    1749             :   CurDAG->setNodeMemRefs(CmpSwap, {MMO});
    1750         197 : 
    1751             :   unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
    1752         171 :   SDValue Extract
    1753             :     = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0));
    1754             : 
    1755             :   ReplaceUses(SDValue(N, 0), Extract);
    1756             :   ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1));
    1757             :   CurDAG->RemoveDeadNode(N);
    1758             : }
    1759             : 
    1760         104 : bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
    1761          17 :                                             unsigned &Mods) const {
    1762             :   Mods = 0;
    1763          17 :   Src = In;
    1764           8 : 
    1765             :   if (Src.getOpcode() == ISD::FNEG) {
    1766           8 :     Mods |= SISrcMods::NEG;
    1767             :     Src = Src.getOperand(0);
    1768             :   }
    1769             : 
    1770             :   if (Src.getOpcode() == ISD::FABS) {
    1771             :     Mods |= SISrcMods::ABS;
    1772           8 :     Src = Src.getOperand(0);
    1773             :   }
    1774          24 : 
    1775             :   return true;
    1776             : }
    1777             : 
    1778           8 : bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
    1779          44 :                                         SDValue &SrcMods) const {
    1780          44 :   unsigned Mods;
    1781          18 :   if (SelectVOP3ModsImpl(In, Src, Mods)) {
    1782             :     SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
    1783             :     return true;
    1784          18 :   }
    1785             : 
    1786             :   return false;
    1787          18 : }
    1788             : 
    1789          54 : bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
    1790             :                                              SDValue &SrcMods) const {
    1791             :   SelectVOP3Mods(In, Src, SrcMods);
    1792             :   return isNoNanSrc(Src);
    1793          52 : }
    1794             : 
    1795             : bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
    1796             :   if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG)
    1797             :     return false;
    1798          26 : 
    1799          52 :   Src = In;
    1800             :   return true;
    1801          26 : }
    1802             : 
    1803          52 : bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
    1804             :                                          SDValue &SrcMods, SDValue &Clamp,
    1805          26 :                                          SDValue &Omod) const {
    1806          26 :   SDLoc DL(In);
    1807          26 :   Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
    1808             :   Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
    1809             : 
    1810           0 :   return SelectVOP3Mods(In, Src, SrcMods);
    1811             : }
    1812           0 : 
    1813           0 : bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
    1814             :                                                    SDValue &SrcMods,
    1815           0 :                                                    SDValue &Clamp,
    1816           0 :                                                    SDValue &Omod) const {
    1817           0 :   Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
    1818             :   return SelectVOP3Mods(In, Src, SrcMods);
    1819             : }
    1820           0 : 
    1821           0 : bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
    1822           0 :                                          SDValue &Clamp, SDValue &Omod) const {
    1823             :   Src = In;
    1824             : 
    1825           0 :   SDLoc DL(In);
    1826             :   Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
    1827             :   Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
    1828           0 : 
    1829             :   return true;
    1830             : }
    1831           0 : 
    1832           0 : static SDValue stripBitcast(SDValue Val) {
    1833           0 :   return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
    1834             : }
    1835             : 
    1836             : // Figure out if this is really an extract of the high 16-bits of a dword.
    1837             : static bool isExtractHiElt(SDValue In, SDValue &Out) {
    1838             :   In = stripBitcast(In);
    1839         246 :   if (In.getOpcode() != ISD::TRUNCATE)
    1840             :     return false;
    1841         246 : 
    1842         246 :   SDValue Srl = In.getOperand(0);
    1843             :   if (Srl.getOpcode() == ISD::SRL) {
    1844             :     if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
    1845           0 :       if (ShiftAmt->getZExtValue() == 16) {
    1846        4603 :         Out = stripBitcast(Srl.getOperand(0));
    1847           0 :         return true;
    1848             :       }
    1849        4297 :     }
    1850           0 :   }
    1851             : 
    1852             :   return false;
    1853       11427 : }
    1854             : 
    1855             : // Look through operations that obscure just looking at the low 16-bits of the
    1856             : // same register.
    1857       11427 : static SDValue stripExtractLoElt(SDValue In) {
    1858       11427 :   if (In.getOpcode() == ISD::TRUNCATE) {
    1859             :     SDValue Src = In.getOperand(0);
    1860       11427 :     if (Src.getValueType().getSizeInBits() == 32)
    1861             :       return stripBitcast(Src);
    1862             :   }
    1863          59 : 
    1864             :   return In;
    1865             : }
    1866             : 
    1867         118 : bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
    1868          59 :                                          SDValue &SrcMods) const {
    1869             :   unsigned Mods = 0;
    1870             :   Src = In;
    1871           0 : 
    1872             :   if (Src.getOpcode() == ISD::FNEG) {
    1873           0 :     Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
    1874             :     Src = Src.getOperand(0);
    1875           0 :   }
    1876           0 : 
    1877           0 :   if (Src.getOpcode() == ISD::BUILD_VECTOR) {
    1878             :     unsigned VecMods = Mods;
    1879           0 : 
    1880             :     SDValue Lo = stripBitcast(Src.getOperand(0));
    1881             :     SDValue Hi = stripBitcast(Src.getOperand(1));
    1882             : 
    1883        2107 :     if (Lo.getOpcode() == ISD::FNEG) {
    1884             :       Lo = stripBitcast(Lo.getOperand(0));
    1885             :       Mods ^= SISrcMods::NEG;
    1886             :     }
    1887        1000 : 
    1888        1000 :     if (Hi.getOpcode() == ISD::FNEG) {
    1889        1000 :       Hi = stripBitcast(Hi.getOperand(0));
    1890             :       Mods ^= SISrcMods::NEG_HI;
    1891             :     }
    1892         382 : 
    1893         382 :     if (isExtractHiElt(Lo, Lo))
    1894             :       Mods |= SISrcMods::OP_SEL_0;
    1895         400 : 
    1896         188 :     if (isExtractHiElt(Hi, Hi))
    1897         188 :       Mods |= SISrcMods::OP_SEL_1;
    1898             : 
    1899             :     Lo = stripExtractLoElt(Lo);
    1900             :     Hi = stripExtractLoElt(Hi);
    1901             : 
    1902             :     if (Lo == Hi && !isInlineImmediate(Lo.getNode())) {
    1903             :       // Really a scalar input. Just select from the low half of the register to
    1904             :       // avoid packing.
    1905             : 
    1906             :       Src = Lo;
    1907         576 :       SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
    1908         576 :       return true;
    1909          91 :     }
    1910          91 : 
    1911          90 :     Mods = VecMods;
    1912             :   }
    1913             : 
    1914         486 :   // Packed instructions do not have abs modifiers.
    1915             :   Mods |= SISrcMods::OP_SEL_1;
    1916             : 
    1917        1056 :   SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
    1918             :   return true;
    1919             : }
    1920        1056 : 
    1921             : bool AMDGPUDAGToDAGISel::SelectVOP3PMods0(SDValue In, SDValue &Src,
    1922        2112 :                                           SDValue &SrcMods,
    1923             :                                           SDValue &Clamp) const {
    1924          15 :   SDLoc SL(In);
    1925             : 
    1926             :   // FIXME: Handle clamp and op_sel
    1927        2112 :   Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
    1928             : 
    1929             :   return SelectVOP3PMods(In, Src, SrcMods);
    1930         288 : }
    1931         288 : 
    1932             : bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,
    1933         288 :                                          SDValue &SrcMods) const {
    1934          11 :   Src = In;
    1935          11 :   // FIXME: Handle op_sel
    1936             :   SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
    1937             :   return true;
    1938         288 : }
    1939          11 : 
    1940          11 : bool AMDGPUDAGToDAGISel::SelectVOP3OpSel0(SDValue In, SDValue &Src,
    1941             :                                           SDValue &SrcMods,
    1942             :                                           SDValue &Clamp) const {
    1943         288 :   SDLoc SL(In);
    1944          13 : 
    1945             :   // FIXME: Handle clamp
    1946         288 :   Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
    1947          20 : 
    1948             :   return SelectVOP3OpSel(In, Src, SrcMods);
    1949         288 : }
    1950         288 : 
    1951             : bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src,
    1952         470 :                                              SDValue &SrcMods) const {
    1953             :   // FIXME: Handle op_sel
    1954             :   return SelectVOP3Mods(In, Src, SrcMods);
    1955             : }
    1956          35 : 
    1957          35 : bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods0(SDValue In, SDValue &Src,
    1958          35 :                                               SDValue &SrcMods,
    1959             :                                               SDValue &Clamp) const {
    1960             :   SDLoc SL(In);
    1961             : 
    1962             :   // FIXME: Handle clamp
    1963             :   Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
    1964             : 
    1965        1021 :   return SelectVOP3OpSelMods(In, Src, SrcMods);
    1966             : }
    1967        1021 : 
    1968        1021 : // The return value is not whether the match is possible (which it always is),
    1969             : // but whether or not it a conversion is really used.
    1970             : bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
    1971         481 :                                                    unsigned &Mods) const {
    1972             :   Mods = 0;
    1973             :   SelectVOP3ModsImpl(In, Src, Mods);
    1974             : 
    1975             :   if (Src.getOpcode() == ISD::FP_EXTEND) {
    1976             :     Src = Src.getOperand(0);
    1977         481 :     assert(Src.getValueType() == MVT::f16);
    1978             :     Src = stripBitcast(Src);
    1979         481 : 
    1980             :     // Be careful about folding modifiers if we already have an abs. fneg is
    1981             :     // applied last, so we don't want to apply an earlier fneg.
    1982           0 :     if ((Mods & SISrcMods::ABS) == 0) {
    1983             :       unsigned ModsTmp;
    1984           0 :       SelectVOP3ModsImpl(Src, Src, ModsTmp);
    1985             : 
    1986           0 :       if ((ModsTmp & SISrcMods::NEG) != 0)
    1987           0 :         Mods ^= SISrcMods::NEG;
    1988             : 
    1989             :       if ((ModsTmp & SISrcMods::ABS) != 0)
    1990          16 :         Mods |= SISrcMods::ABS;
    1991             :     }
    1992             : 
    1993             :     // op_sel/op_sel_hi decide the source type and source.
    1994             :     // If the source's op_sel_hi is set, it indicates to do a conversion from fp16.
    1995             :     // If the sources's op_sel is set, it picks the high half of the source
    1996          16 :     // register.
    1997             : 
    1998          16 :     Mods |= SISrcMods::OP_SEL_1;
    1999             :     if (isExtractHiElt(Src, Src)) {
    2000             :       Mods |= SISrcMods::OP_SEL_0;
    2001             : 
    2002             :       // TODO: Should we try to look for neg/abs here?
    2003             :     }
    2004          16 : 
    2005             :     return true;
    2006             :   }
    2007           8 : 
    2008             :   return false;
    2009             : }
    2010             : 
    2011             : bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src,
    2012             :                                                SDValue &SrcMods) const {
    2013           8 :   unsigned Mods = 0;
    2014             :   SelectVOP3PMadMixModsImpl(In, Src, Mods);
    2015           8 :   SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
    2016             :   return true;
    2017             : }
    2018             : 
    2019             : // TODO: Can we identify things like v_mad_mixhi_f16?
    2020           0 : bool AMDGPUDAGToDAGISel::SelectHi16Elt(SDValue In, SDValue &Src) const {
    2021             :   if (In.isUndef()) {
    2022           0 :     Src = In;
    2023           0 :     return true;
    2024             :   }
    2025           0 : 
    2026           0 :   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) {
    2027             :     SDLoc SL(In);
    2028           0 :     SDValue K = CurDAG->getTargetConstant(C->getZExtValue() << 16, SL, MVT::i32);
    2029             :     MachineSDNode *MovK = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
    2030             :                                                  SL, MVT::i32, K);
    2031             :     Src = SDValue(MovK, 0);
    2032           0 :     return true;
    2033             :   }
    2034           0 : 
    2035             :   if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) {
    2036           0 :     SDLoc SL(In);
    2037           0 :     SDValue K = CurDAG->getTargetConstant(
    2038             :       C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
    2039           0 :     MachineSDNode *MovK = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
    2040           0 :                                                  SL, MVT::i32, K);
    2041             :     Src = SDValue(MovK, 0);
    2042             :     return true;
    2043             :   }
    2044             : 
    2045             :   return isExtractHiElt(In, Src);
    2046             : }
    2047             : 
    2048           0 : bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const {
    2049           0 :   if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) {
    2050           0 :     return false;
    2051             :   }
    2052             :   const SIRegisterInfo *SIRI =
    2053             :     static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
    2054             :   const SIInstrInfo * SII =
    2055           0 :     static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
    2056             : 
    2057             :   unsigned Limit = 0;
    2058             :   bool AllUsesAcceptSReg = true;
    2059             :   for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
    2060             :     Limit < 10 && U != E; ++U, ++Limit) {
    2061           0 :     const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
    2062             : 
    2063           0 :     // If the register class is unknown, it could be an unknown
    2064           0 :     // register class that needs to be an SGPR, e.g. an inline asm
    2065           0 :     // constraint
    2066           0 :     if (!RC || SIRI->isSGPRClass(RC))
    2067             :       return false;
    2068             : 
    2069             :     if (RC != &AMDGPU::VS_32RegClass) {
    2070           0 :       AllUsesAcceptSReg = false;
    2071           0 :       SDNode * User = *U;
    2072           0 :       if (User->isMachineOpcode()) {
    2073           0 :         unsigned Opc = User->getMachineOpcode();
    2074             :         MCInstrDesc Desc = SII->get(Opc);
    2075             :         if (Desc.isCommutable()) {
    2076             :           unsigned OpIdx = Desc.getNumDefs() + U.getOperandNo();
    2077           0 :           unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
    2078           0 :           if (SII->findCommutedOpIndices(Desc, OpIdx, CommuteIdx1)) {
    2079           0 :             unsigned CommutedOpNo = CommuteIdx1 - Desc.getNumDefs();
    2080             :             const TargetRegisterClass *CommutedRC = getOperandRegClass(*U, CommutedOpNo);
    2081           0 :             if (CommutedRC == &AMDGPU::VS_32RegClass)
    2082           0 :               AllUsesAcceptSReg = true;
    2083             :           }
    2084             :         }
    2085             :       }
    2086           0 :       // If "AllUsesAcceptSReg == false" so far we haven't suceeded
    2087           0 :       // commuting current user. This means have at least one use
    2088           0 :       // that strictly require VGPR. Thus, we will not attempt to commute
    2089           0 :       // other user instructions.
    2090             :       if (!AllUsesAcceptSReg)
    2091           0 :         break;
    2092           0 :     }
    2093             :   }
    2094             :   return !AllUsesAcceptSReg && (Limit < 10);
    2095           0 : }
    2096             : 
    2097             : bool AMDGPUDAGToDAGISel::isUniformLoad(const SDNode * N) const {
    2098       22854 :   auto Ld = cast<LoadSDNode>(N);
    2099       22854 : 
    2100             :   return Ld->getAlignment() >= 4 &&
    2101             :         (
    2102             :           (
    2103       22854 :             (
    2104             :               Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS       ||
    2105       22854 :               Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT
    2106             :             )
    2107             :             &&
    2108             :             !N->isDivergent()
    2109       35312 :           )
    2110       35312 :           ||
    2111       26206 :           (
    2112             :             Subtarget->getScalarizeGlobalBehavior() &&
    2113             :             Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
    2114             :             !Ld->isVolatile() &&
    2115             :             !N->isDivergent() &&
    2116       52397 :             static_cast<const SITargetLowering *>(
    2117             :               getTargetLowering())->isMemOpHasNoClobberedMemOperand(N)
    2118             :           )
    2119       16140 :         );
    2120             : }
    2121             : 
    2122        4662 : void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
    2123             :   const AMDGPUTargetLowering& Lowering =
    2124        8208 :     *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
    2125        4104 :   bool IsModified = false;
    2126        1064 :   do {
    2127        1064 :     IsModified = false;
    2128        1064 : 
    2129         980 :     // Go over all selected nodes and try to fold them a bit more
    2130         980 :     SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_begin();
    2131         980 :     while (Position != CurDAG->allnodes_end()) {
    2132             :       SDNode *Node = &*Position++;
    2133             :       MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Node);
    2134             :       if (!MachineNode)
    2135             :         continue;
    2136             : 
    2137             :       SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
    2138             :       if (ResNode != Node) {
    2139             :         if (ResNode)
    2140             :           ReplaceUses(Node, ResNode);
    2141             :         IsModified = true;
    2142             :       }
    2143             :     }
    2144       12788 :     CurDAG->RemoveDeadNodes();
    2145             :   } while (IsModified);
    2146             : }
    2147       68887 : 
    2148             : bool R600DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
    2149             :   Subtarget = &MF.getSubtarget<R600Subtarget>();
    2150      136717 :   return SelectionDAGISel::runOnMachineFunction(MF);
    2151             : }
    2152             : 
    2153             : bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
    2154       22527 :   if (!N->readMem())
    2155             :     return false;
    2156             :   if (CbId == -1)
    2157       45501 :     return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
    2158       45501 :            N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
    2159             : 
    2160       22564 :   return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId;
    2161             : }
    2162       39189 : 
    2163        8227 : bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
    2164        8227 :                                                          SDValue& IntPtr) {
    2165        7914 :   if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
    2166             :     IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
    2167        2416 :                                        true);
    2168             :     return true;
    2169       68887 :   }
    2170             :   return false;
    2171             : }
    2172       24642 : 
    2173             : bool R600DAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
    2174       24642 :     SDValue& BaseReg, SDValue &Offset) {
    2175             :   if (!isa<ConstantSDNode>(Addr)) {
    2176       26940 :     BaseReg = Addr;
    2177             :     Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
    2178             :     return true;
    2179             :   }
    2180       26940 :   return false;
    2181     2204554 : }
    2182             : 
    2183             : void R600DAGToDAGISel::Select(SDNode *N) {
    2184             :   unsigned int Opc = N->getOpcode();
    2185             :   if (N->isMachineOpcode()) {
    2186             :     N->setNodeId(-1);
    2187      592741 :     return;   // Already selected.
    2188      592741 :   }
    2189       35142 : 
    2190       35047 :   switch (Opc) {
    2191             :   default: break;
    2192             :   case AMDGPUISD::BUILD_VERTICAL_VECTOR:
    2193             :   case ISD::SCALAR_TO_VECTOR:
    2194       26940 :   case ISD::BUILD_VECTOR: {
    2195             :     EVT VT = N->getValueType(0);
    2196       24642 :     unsigned NumVectorElts = VT.getVectorNumElements();
    2197             :     unsigned RegClassID;
    2198        2298 :     // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
    2199        2298 :     // that adds a 128 bits reg copy when going through TwoAddressInstructions
    2200        2298 :     // pass. We want to avoid 128 bits copies as much as possible because they
    2201             :     // can't be bundled by our scheduler.
    2202             :     switch(NumVectorElts) {
    2203           0 :     case 2: RegClassID = R600::R600_Reg64RegClassID; break;
    2204           0 :     case 4:
    2205           0 :       if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
    2206           0 :         RegClassID = R600::R600_Reg128VerticalRegClassID;
    2207           0 :       else
    2208             :         RegClassID = R600::R600_Reg128RegClassID;
    2209             :       break;
    2210           0 :     default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
    2211             :     }
    2212             :     SelectBuildVector(N, RegClassID);
    2213           0 :     return;
    2214             :   }
    2215             :   }
    2216           0 : 
    2217           0 :   SelectCode(N);
    2218           0 : }
    2219             : 
    2220             : bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
    2221             :                                           SDValue &Offset) {
    2222             :   ConstantSDNode *C;
    2223           0 :   SDLoc DL(Addr);
    2224             : 
    2225             :   if ((C = dyn_cast<ConstantSDNode>(Addr))) {
    2226           0 :     Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
    2227           0 :     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
    2228           0 :   } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
    2229             :              (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
    2230             :     Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
    2231             :     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
    2232             :   } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
    2233       84707 :             (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
    2234       84707 :     Base = Addr.getOperand(0);
    2235       84707 :     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
    2236             :   } else {
    2237           0 :     Base = Addr;
    2238             :     Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
    2239             :   }
    2240       84707 : 
    2241             :   return true;
    2242        2455 : }
    2243             : 
    2244             : bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
    2245        4910 :                                           SDValue &Offset) {
    2246             :   ConstantSDNode *IMMOffset;
    2247             : 
    2248             :   if (Addr.getOpcode() == ISD::ADD
    2249             :       && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
    2250             :       && isInt<16>(IMMOffset->getZExtValue())) {
    2251             : 
    2252             :       Base = Addr.getOperand(0);
    2253             :       Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
    2254        2062 :                                          MVT::i32);
    2255        2062 :       return true;
    2256             :   // If the pointer address is constant, we can move it to the offset field.
    2257             :   } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
    2258             :              && isInt<16>(IMMOffset->getZExtValue())) {
    2259             :     Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
    2260           0 :                                   SDLoc(CurDAG->getEntryNode()),
    2261             :                                   R600::ZERO, MVT::i32);
    2262        2455 :     Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
    2263             :                                        MVT::i32);
    2264             :     return true;
    2265             :   }
    2266             : 
    2267             :   // Default case, no offset
    2268             :   Base = Addr;
    2269             :   Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
    2270        2101 :   return true;
    2271             : }

Generated by: LCOV version 1.13