LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - AMDGPUISelDAGToDAG.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 826 857 96.4 %
Date: 2018-06-17 00:07:59 Functions: 76 81 93.8 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //==-----------------------------------------------------------------------===//
       9             : //
      10             : /// \file
      11             : /// Defines an instruction selector for the AMDGPU target.
      12             : //
      13             : //===----------------------------------------------------------------------===//
      14             : 
      15             : #include "AMDGPU.h"
      16             : #include "AMDGPUArgumentUsageInfo.h"
      17             : #include "AMDGPUISelLowering.h" // For AMDGPUISD
      18             : #include "AMDGPUInstrInfo.h"
      19             : #include "AMDGPUPerfHintAnalysis.h"
      20             : #include "AMDGPURegisterInfo.h"
      21             : #include "AMDGPUSubtarget.h"
      22             : #include "AMDGPUTargetMachine.h"
      23             : #include "SIDefines.h"
      24             : #include "SIISelLowering.h"
      25             : #include "SIInstrInfo.h"
      26             : #include "SIMachineFunctionInfo.h"
      27             : #include "SIRegisterInfo.h"
      28             : #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
      29             : #include "llvm/ADT/APInt.h"
      30             : #include "llvm/ADT/SmallVector.h"
      31             : #include "llvm/ADT/StringRef.h"
      32             : #include "llvm/Analysis/DivergenceAnalysis.h"
      33             : #include "llvm/Analysis/ValueTracking.h"
      34             : #include "llvm/CodeGen/FunctionLoweringInfo.h"
      35             : #include "llvm/CodeGen/ISDOpcodes.h"
      36             : #include "llvm/CodeGen/MachineFunction.h"
      37             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      38             : #include "llvm/CodeGen/SelectionDAG.h"
      39             : #include "llvm/CodeGen/SelectionDAGISel.h"
      40             : #include "llvm/CodeGen/SelectionDAGNodes.h"
      41             : #include "llvm/CodeGen/ValueTypes.h"
      42             : #include "llvm/IR/BasicBlock.h"
      43             : #include "llvm/IR/Instruction.h"
      44             : #include "llvm/MC/MCInstrDesc.h"
      45             : #include "llvm/Support/Casting.h"
      46             : #include "llvm/Support/CodeGen.h"
      47             : #include "llvm/Support/ErrorHandling.h"
      48             : #include "llvm/Support/MachineValueType.h"
      49             : #include "llvm/Support/MathExtras.h"
      50             : #include <cassert>
      51             : #include <cstdint>
      52             : #include <new>
      53             : #include <vector>
      54             : 
      55             : using namespace llvm;
      56             : 
      57             : namespace llvm {
      58             : 
      59             : class R600InstrInfo;
      60             : 
      61             : } // end namespace llvm
      62             : 
      63             : //===----------------------------------------------------------------------===//
      64             : // Instruction Selector Implementation
      65             : //===----------------------------------------------------------------------===//
      66             : 
      67             : namespace {
      68             : 
      69             : /// AMDGPU specific code to select AMDGPU machine instructions for
      70             : /// SelectionDAG operations.
      71             : class AMDGPUDAGToDAGISel : public SelectionDAGISel {
      72             :   // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
      73             :   // make the right decision when generating code for different targets.
      74             :   const AMDGPUSubtarget *Subtarget;
      75             :   AMDGPUAS AMDGPUASI;
      76             :   bool EnableLateStructurizeCFG;
      77             : 
      78             : public:
      79        2067 :   explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr,
      80             :                               CodeGenOpt::Level OptLevel = CodeGenOpt::Default)
      81        2067 :     : SelectionDAGISel(*TM, OptLevel) {
      82        2067 :     AMDGPUASI = AMDGPU::getAMDGPUAS(*TM);
      83        2067 :     EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
      84        2067 :   }
      85        2058 :   ~AMDGPUDAGToDAGISel() override = default;
      86             : 
      87        2059 :   void getAnalysisUsage(AnalysisUsage &AU) const override {
      88             :     AU.addRequired<AMDGPUArgumentUsageInfo>();
      89             :     AU.addRequired<AMDGPUPerfHintAnalysis>();
      90             :     AU.addRequired<DivergenceAnalysis>();
      91        2059 :     SelectionDAGISel::getAnalysisUsage(AU);
      92        2059 :   }
      93             : 
      94             :   bool runOnMachineFunction(MachineFunction &MF) override;
      95             :   void Select(SDNode *N) override;
      96             :   StringRef getPassName() const override;
      97             :   void PostprocessISelDAG() override;
      98             : 
      99             : protected:
     100             :   void SelectBuildVector(SDNode *N, unsigned RegClassID);
     101             : 
     102             : private:
     103             :   std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
     104             :   bool isNoNanSrc(SDValue N) const;
     105             :   bool isInlineImmediate(const SDNode *N) const;
     106             : 
     107             :   bool isConstantLoad(const MemSDNode *N, int cbID) const;
     108             :   bool isUniformBr(const SDNode *N) const;
     109             : 
     110             :   SDNode *glueCopyToM0(SDNode *N) const;
     111             : 
     112             :   const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
     113             :   bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
     114             :   bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
     115             :                                        SDValue& Offset);
     116             :   virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
     117             :   virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
     118             :   bool isDSOffsetLegal(const SDValue &Base, unsigned Offset,
     119             :                        unsigned OffsetBits) const;
     120             :   bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
     121             :   bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
     122             :                                  SDValue &Offset1) const;
     123             :   bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
     124             :                    SDValue &SOffset, SDValue &Offset, SDValue &Offen,
     125             :                    SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
     126             :                    SDValue &TFE) const;
     127             :   bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
     128             :                          SDValue &SOffset, SDValue &Offset, SDValue &GLC,
     129             :                          SDValue &SLC, SDValue &TFE) const;
     130             :   bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
     131             :                          SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
     132             :                          SDValue &SLC) const;
     133             :   bool SelectMUBUFScratchOffen(SDNode *Parent,
     134             :                                SDValue Addr, SDValue &RSrc, SDValue &VAddr,
     135             :                                SDValue &SOffset, SDValue &ImmOffset) const;
     136             :   bool SelectMUBUFScratchOffset(SDNode *Parent,
     137             :                                 SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
     138             :                                 SDValue &Offset) const;
     139             : 
     140             :   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
     141             :                          SDValue &Offset, SDValue &GLC, SDValue &SLC,
     142             :                          SDValue &TFE) const;
     143             :   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
     144             :                          SDValue &Offset, SDValue &SLC) const;
     145             :   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
     146             :                          SDValue &Offset) const;
     147             :   bool SelectMUBUFConstant(SDValue Constant,
     148             :                            SDValue &SOffset,
     149             :                            SDValue &ImmOffset) const;
     150             :   bool SelectMUBUFIntrinsicOffset(SDValue Offset, SDValue &SOffset,
     151             :                                   SDValue &ImmOffset) const;
     152             :   bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset,
     153             :                                    SDValue &ImmOffset, SDValue &VOffset) const;
     154             : 
     155             :   bool SelectFlatAtomic(SDValue Addr, SDValue &VAddr,
     156             :                         SDValue &Offset, SDValue &SLC) const;
     157             :   bool SelectFlatAtomicSigned(SDValue Addr, SDValue &VAddr,
     158             :                               SDValue &Offset, SDValue &SLC) const;
     159             : 
     160             :   template <bool IsSigned>
     161             :   bool SelectFlatOffset(SDValue Addr, SDValue &VAddr,
     162             :                         SDValue &Offset, SDValue &SLC) const;
     163             : 
     164             :   bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
     165             :                         bool &Imm) const;
     166             :   SDValue Expand32BitAddress(SDValue Addr) const;
     167             :   bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
     168             :                   bool &Imm) const;
     169             :   bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
     170             :   bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
     171             :   bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
     172             :   bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
     173             :   bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
     174             :   bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
     175             : 
     176             :   bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
     177             :   bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods) const;
     178             :   bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
     179             :   bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
     180             :   bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
     181             :                        SDValue &Clamp, SDValue &Omod) const;
     182             :   bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
     183             :                          SDValue &Clamp, SDValue &Omod) const;
     184             : 
     185             :   bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods,
     186             :                                  SDValue &Clamp,
     187             :                                  SDValue &Omod) const;
     188             : 
     189             :   bool SelectVOP3OMods(SDValue In, SDValue &Src,
     190             :                        SDValue &Clamp, SDValue &Omod) const;
     191             : 
     192             :   bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
     193             :   bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
     194             :                         SDValue &Clamp) const;
     195             : 
     196             :   bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
     197             :   bool SelectVOP3OpSel0(SDValue In, SDValue &Src, SDValue &SrcMods,
     198             :                         SDValue &Clamp) const;
     199             : 
     200             :   bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
     201             :   bool SelectVOP3OpSelMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
     202             :                             SDValue &Clamp) const;
     203             :   bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const;
     204             :   bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
     205             : 
     206             :   bool SelectHi16Elt(SDValue In, SDValue &Src) const;
     207             : 
     208             :   void SelectADD_SUB_I64(SDNode *N);
     209             :   void SelectUADDO_USUBO(SDNode *N);
     210             :   void SelectDIV_SCALE(SDNode *N);
     211             :   void SelectMAD_64_32(SDNode *N);
     212             :   void SelectFMA_W_CHAIN(SDNode *N);
     213             :   void SelectFMUL_W_CHAIN(SDNode *N);
     214             : 
     215             :   SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val,
     216             :                    uint32_t Offset, uint32_t Width);
     217             :   void SelectS_BFEFromShifts(SDNode *N);
     218             :   void SelectS_BFE(SDNode *N);
     219             :   bool isCBranchSCC(const SDNode *N) const;
     220             :   void SelectBRCOND(SDNode *N);
     221             :   void SelectFMAD_FMA(SDNode *N);
     222             :   void SelectATOMIC_CMP_SWAP(SDNode *N);
     223             : 
     224             : protected:
     225             :   // Include the pieces autogenerated from the target description.
     226             : #include "AMDGPUGenDAGISel.inc"
     227             : };
     228             : 
     229         554 : class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
     230             : public:
     231         278 :   explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) :
     232         278 :       AMDGPUDAGToDAGISel(TM, OptLevel) {}
     233             : 
     234             :   void Select(SDNode *N) override;
     235             : 
     236             :   bool SelectADDRIndirect(SDValue Addr, SDValue &Base,
     237             :                           SDValue &Offset) override;
     238             :   bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
     239             :                           SDValue &Offset) override;
     240             : };
     241             : 
     242             : }  // end anonymous namespace
     243             : 
     244       76336 : INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "isel",
     245             :                       "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
     246       76336 : INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo)
     247       76336 : INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysis)
     248       76336 : INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis)
     249      357084 : INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "isel",
     250             :                     "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
     251             : 
     252             : /// This pass converts a legalized DAG into a AMDGPU-specific
     253             : // DAG, ready for instruction scheduling.
     254        1789 : FunctionPass *llvm::createAMDGPUISelDag(TargetMachine *TM,
     255             :                                         CodeGenOpt::Level OptLevel) {
     256        1789 :   return new AMDGPUDAGToDAGISel(TM, OptLevel);
     257             : }
     258             : 
     259             : /// This pass converts a legalized DAG into a R600-specific
     260             : // DAG, ready for instruction scheduling.
     261         278 : FunctionPass *llvm::createR600ISelDag(TargetMachine *TM,
     262             :                                       CodeGenOpt::Level OptLevel) {
     263         556 :   return new R600DAGToDAGISel(TM, OptLevel);
     264             : }
     265             : 
     266       20072 : bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
     267       20072 :   Subtarget = &MF.getSubtarget<AMDGPUSubtarget>();
     268       20072 :   return SelectionDAGISel::runOnMachineFunction(MF);
     269             : }
     270             : 
     271         492 : bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const {
     272         492 :   if (TM.Options.NoNaNsFPMath)
     273             :     return true;
     274             : 
     275             :   // TODO: Move into isKnownNeverNaN
     276         114 :   if (N->getFlags().isDefined())
     277         102 :     return N->getFlags().hasNoNaNs();
     278             : 
     279          12 :   return CurDAG->isKnownNeverNaN(N);
     280             : }
     281             : 
     282        5317 : bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const {
     283             :   const SIInstrInfo *TII
     284        5317 :     = static_cast<const SISubtarget *>(Subtarget)->getInstrInfo();
     285             : 
     286             :   if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
     287        9548 :     return TII->isInlineConstant(C->getAPIntValue());
     288             : 
     289             :   if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
     290        1515 :     return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
     291             : 
     292             :   return false;
     293             : }
     294             : 
     295             : /// Determine the register class for \p OpNo
     296             : /// \returns The register class of the virtual register that will be used for
     297             : /// the given operand number \OpNo or NULL if the register class cannot be
     298             : /// determined.
     299       23907 : const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
     300             :                                                           unsigned OpNo) const {
     301       23907 :   if (!N->isMachineOpcode()) {
     302         641 :     if (N->getOpcode() == ISD::CopyToReg) {
     303        1282 :       unsigned Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
     304         641 :       if (TargetRegisterInfo::isVirtualRegister(Reg)) {
     305         321 :         MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo();
     306             :         return MRI.getRegClass(Reg);
     307             :       }
     308             : 
     309             :       const SIRegisterInfo *TRI
     310         320 :         = static_cast<const SISubtarget *>(Subtarget)->getRegisterInfo();
     311         320 :       return TRI->getPhysRegClass(Reg);
     312             :     }
     313             : 
     314             :     return nullptr;
     315             :   }
     316             : 
     317       23266 :   switch (N->getMachineOpcode()) {
     318       21651 :   default: {
     319             :     const MCInstrDesc &Desc =
     320       43302 :         Subtarget->getInstrInfo()->get(N->getMachineOpcode());
     321       43302 :     unsigned OpIdx = Desc.getNumDefs() + OpNo;
     322       43302 :     if (OpIdx >= Desc.getNumOperands())
     323             :       return nullptr;
     324       21651 :     int RegClass = Desc.OpInfo[OpIdx].RegClass;
     325       21651 :     if (RegClass == -1)
     326             :       return nullptr;
     327             : 
     328       21636 :     return Subtarget->getRegisterInfo()->getRegClass(RegClass);
     329             :   }
     330        1615 :   case AMDGPU::REG_SEQUENCE: {
     331        3230 :     unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
     332             :     const TargetRegisterClass *SuperRC =
     333        1615 :         Subtarget->getRegisterInfo()->getRegClass(RCID);
     334             : 
     335        3230 :     SDValue SubRegOp = N->getOperand(OpNo + 1);
     336        3230 :     unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
     337        1615 :     return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
     338        1615 :                                                               SubRegIdx);
     339             :   }
     340             :   }
     341             : }
     342             : 
     343       83509 : SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
     344       94809 :   if (cast<MemSDNode>(N)->getAddressSpace() != AMDGPUASI.LOCAL_ADDRESS ||
     345       11300 :       !Subtarget->ldsRequiresM0Init())
     346             :     return N;
     347             : 
     348             :   const SITargetLowering& Lowering =
     349        8393 :       *static_cast<const SITargetLowering*>(getTargetLowering());
     350             : 
     351             :   // Write max value to m0 before each load operation
     352             : 
     353       16786 :   SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N),
     354       25179 :                                  CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
     355             : 
     356        8393 :   SDValue Glue = M0.getValue(1);
     357             : 
     358             :   SmallVector <SDValue, 8> Ops;
     359       76148 :   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
     360       59362 :      Ops.push_back(N->getOperand(i));
     361             :   }
     362        8393 :   Ops.push_back(Glue);
     363       33572 :   return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
     364             : }
     365             : 
     366             : static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
     367       16198 :   switch (NumVectorElts) {
     368             :   case 1:
     369             :     return AMDGPU::SReg_32_XM0RegClassID;
     370        7991 :   case 2:
     371             :     return AMDGPU::SReg_64RegClassID;
     372        7555 :   case 4:
     373             :     return AMDGPU::SReg_128RegClassID;
     374         618 :   case 8:
     375             :     return AMDGPU::SReg_256RegClassID;
     376          34 :   case 16:
     377             :     return AMDGPU::SReg_512RegClassID;
     378             :   }
     379             : 
     380           0 :   llvm_unreachable("invalid vector size");
     381             : }
     382             : 
     383         730 : static bool getConstantValue(SDValue N, uint32_t &Out) {
     384             :   if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
     385         298 :     Out = C->getAPIntValue().getZExtValue();
     386             :     return true;
     387             :   }
     388             : 
     389             :   if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
     390         996 :     Out = C->getValueAPF().bitcastToAPInt().getZExtValue();
     391             :     return true;
     392             :   }
     393             : 
     394             :   return false;
     395             : }
     396             : 
     397       18628 : void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
     398       37256 :   EVT VT = N->getValueType(0);
     399       18628 :   unsigned NumVectorElts = VT.getVectorNumElements();
     400       18628 :   EVT EltVT = VT.getVectorElementType();
     401             :   SDLoc DL(N);
     402       37256 :   SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
     403             : 
     404       18628 :   if (NumVectorElts == 1) {
     405           0 :     CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
     406             :                          RegClass);
     407             :     return;
     408             :   }
     409             : 
     410             :   assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
     411             :                                   "supported yet");
     412             :   // 16 = Max Num Vector Elements
     413             :   // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
     414             :   // 1 = Vector Register Class
     415       37256 :   SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
     416             : 
     417       37256 :   RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
     418             :   bool IsRegSeq = true;
     419       18628 :   unsigned NOps = N->getNumOperands();
     420      139880 :   for (unsigned i = 0; i < NOps; i++) {
     421             :     // XXX: Why is this here?
     422      121252 :     if (isa<RegisterSDNode>(N->getOperand(i))) {
     423             :       IsRegSeq = false;
     424             :       break;
     425             :     }
     426       60626 :     unsigned Sub = AMDGPURegisterInfo::getSubRegFromChannel(i);
     427      181878 :     RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
     428      181878 :     RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32);
     429             :   }
     430       18628 :   if (NOps != NumVectorElts) {
     431             :     // Fill in the missing undef elements if this was a scalar_to_vector.
     432             :     assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
     433           4 :     MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
     434           4 :                                                    DL, EltVT);
     435          12 :     for (unsigned i = NOps; i < NumVectorElts; ++i) {
     436           4 :       unsigned Sub = AMDGPURegisterInfo::getSubRegFromChannel(i);
     437           8 :       RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
     438           8 :       RegSeqArgs[1 + (2 * i) + 1] =
     439           4 :           CurDAG->getTargetConstant(Sub, DL, MVT::i32);
     440             :     }
     441             :   }
     442             : 
     443       18628 :   if (!IsRegSeq)
     444             :     SelectCode(N);
     445       55884 :   CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
     446             : }
     447             : 
     448      485500 : void AMDGPUDAGToDAGISel::Select(SDNode *N) {
     449      485500 :   unsigned int Opc = N->getOpcode();
     450      485500 :   if (N->isMachineOpcode()) {
     451             :     N->setNodeId(-1);
     452             :     return;   // Already selected.
     453             :   }
     454             : 
     455      968221 :   if (isa<AtomicSDNode>(N) ||
     456             :       (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC ||
     457             :        Opc == AMDGPUISD::ATOMIC_LOAD_FADD ||
     458      483291 :        Opc == AMDGPUISD::ATOMIC_LOAD_FMIN ||
     459             :        Opc == AMDGPUISD::ATOMIC_LOAD_FMAX))
     460        1884 :     N = glueCopyToM0(N);
     461             : 
     462      484930 :   switch (Opc) {
     463             :   default:
     464             :     break;
     465             :   // We are selecting i64 ADD here instead of custom lower it during
     466             :   // DAG legalization, so we can fold some i64 ADDs used for address
     467             :   // calculation into the LOAD and STORE instructions.
     468             :   case ISD::ADDC:
     469             :   case ISD::ADDE:
     470             :   case ISD::SUBC:
     471             :   case ISD::SUBE: {
     472         240 :     if (N->getValueType(0) != MVT::i64)
     473             :       break;
     474             : 
     475         126 :     SelectADD_SUB_I64(N);
     476         126 :     return;
     477             :   }
     478         203 :   case ISD::UADDO:
     479             :   case ISD::USUBO: {
     480         203 :     SelectUADDO_USUBO(N);
     481         203 :     return;
     482             :   }
     483          45 :   case AMDGPUISD::FMUL_W_CHAIN: {
     484          45 :     SelectFMUL_W_CHAIN(N);
     485          45 :     return;
     486             :   }
     487         225 :   case AMDGPUISD::FMA_W_CHAIN: {
     488         225 :     SelectFMA_W_CHAIN(N);
     489         225 :     return;
     490             :   }
     491             : 
     492       16715 :   case ISD::SCALAR_TO_VECTOR:
     493             :   case ISD::BUILD_VECTOR: {
     494       33430 :     EVT VT = N->getValueType(0);
     495       16715 :     unsigned NumVectorElts = VT.getVectorNumElements();
     496             : 
     497             :     if (VT == MVT::v2i16 || VT == MVT::v2f16) {
     498         517 :       if (Opc == ISD::BUILD_VECTOR) {
     499             :         uint32_t LHSVal, RHSVal;
     500         730 :         if (getConstantValue(N->getOperand(0), LHSVal) &&
     501         213 :             getConstantValue(N->getOperand(1), RHSVal)) {
     502         185 :           uint32_t K = LHSVal | (RHSVal << 16);
     503         555 :           CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, VT,
     504         185 :                                CurDAG->getTargetConstant(K, SDLoc(N), MVT::i32));
     505         185 :           return;
     506             :         }
     507             :       }
     508             : 
     509         332 :       break;
     510             :     }
     511             : 
     512             :     assert(VT.getVectorElementType().bitsEq(MVT::i32));
     513             :     unsigned RegClassID = selectSGPRVectorRegClassID(NumVectorElts);
     514       16198 :     SelectBuildVector(N, RegClassID);
     515       16198 :     return;
     516             :   }
     517        2041 :   case ISD::BUILD_PAIR: {
     518             :     SDValue RC, SubReg0, SubReg1;
     519             :     SDLoc DL(N);
     520        2041 :     if (N->getValueType(0) == MVT::i128) {
     521           0 :       RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32);
     522           0 :       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
     523           0 :       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
     524        2041 :     } else if (N->getValueType(0) == MVT::i64) {
     525        2041 :       RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
     526        2041 :       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
     527        2041 :       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
     528             :     } else {
     529           0 :       llvm_unreachable("Unhandled value type for BUILD_PAIR");
     530             :     }
     531        2041 :     const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
     532        4082 :                             N->getOperand(1), SubReg1 };
     533        6123 :     ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
     534             :                                           N->getValueType(0), Ops));
     535             :     return;
     536             :   }
     537             : 
     538       25210 :   case ISD::Constant:
     539             :   case ISD::ConstantFP: {
     540       74421 :     if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
     541             :       break;
     542             : 
     543             :     uint64_t Imm;
     544             :     if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
     545         219 :       Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
     546             :     else {
     547             :       ConstantSDNode *C = cast<ConstantSDNode>(N);
     548        1136 :       Imm = C->getZExtValue();
     549             :     }
     550             : 
     551             :     SDLoc DL(N);
     552        3627 :     SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
     553             :                                 CurDAG->getConstant(Imm & 0xFFFFFFFF, DL,
     554        1209 :                                                     MVT::i32));
     555        3627 :     SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
     556        1209 :                                 CurDAG->getConstant(Imm >> 32, DL, MVT::i32));
     557             :     const SDValue Ops[] = {
     558        1209 :       CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
     559        1209 :       SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
     560        1209 :       SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
     561        3627 :     };
     562             : 
     563        3627 :     ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
     564             :                                           N->getValueType(0), Ops));
     565             :     return;
     566             :   }
     567       81625 :   case ISD::LOAD:
     568             :   case ISD::STORE: {
     569       81625 :     N = glueCopyToM0(N);
     570       81625 :     break;
     571             :   }
     572             : 
     573         148 :   case AMDGPUISD::BFE_I32:
     574             :   case AMDGPUISD::BFE_U32: {
     575             :     // There is a scalar version available, but unlike the vector version which
     576             :     // has a separate operand for the offset and width, the scalar version packs
     577             :     // the width and offset into a single operand. Try to move to the scalar
     578             :     // version if the offsets are constant, so that we can try to keep extended
     579             :     // loads of kernel arguments in SGPRs.
     580             : 
     581             :     // TODO: Technically we could try to pattern match scalar bitshifts of
     582             :     // dynamic values, but it's probably not useful.
     583         148 :     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
     584             :     if (!Offset)
     585             :       break;
     586             : 
     587             :     ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
     588             :     if (!Width)
     589             :       break;
     590             : 
     591             :     bool Signed = Opc == AMDGPUISD::BFE_I32;
     592             : 
     593         264 :     uint32_t OffsetVal = Offset->getZExtValue();
     594         264 :     uint32_t WidthVal = Width->getZExtValue();
     595             : 
     596         132 :     ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
     597         132 :                             SDLoc(N), N->getOperand(0), OffsetVal, WidthVal));
     598         132 :     return;
     599             :   }
     600         267 :   case AMDGPUISD::DIV_SCALE: {
     601         267 :     SelectDIV_SCALE(N);
     602         267 :     return;
     603             :   }
     604          20 :   case AMDGPUISD::MAD_I64_I32:
     605             :   case AMDGPUISD::MAD_U64_U32: {
     606          20 :     SelectMAD_64_32(N);
     607          20 :     return;
     608             :   }
     609       12074 :   case ISD::CopyToReg: {
     610             :     const SITargetLowering& Lowering =
     611       12074 :       *static_cast<const SITargetLowering*>(getTargetLowering());
     612       12074 :     N = Lowering.legalizeTargetIndependentNode(N, *CurDAG);
     613       12074 :     break;
     614             :   }
     615             :   case ISD::AND:
     616             :   case ISD::SRL:
     617             :   case ISD::SRA:
     618             :   case ISD::SIGN_EXTEND_INREG:
     619       26237 :     if (N->getValueType(0) != MVT::i32)
     620             :       break;
     621             : 
     622       19779 :     SelectS_BFE(N);
     623       19779 :     return;
     624         568 :   case ISD::BRCOND:
     625         568 :     SelectBRCOND(N);
     626         568 :     return;
     627        2621 :   case ISD::FMAD:
     628             :   case ISD::FMA:
     629        2621 :     SelectFMAD_FMA(N);
     630        2621 :     return;
     631         195 :   case AMDGPUISD::ATOMIC_CMP_SWAP:
     632         195 :     SelectATOMIC_CMP_SWAP(N);
     633         195 :     return;
     634             :   }
     635             : 
     636             :   SelectCode(N);
     637             : }
     638             : 
     639        7302 : bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
     640       14604 :   if (!N->readMem())
     641             :     return false;
     642        7302 :   if (CbId == -1)
     643           0 :     return N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
     644             :            N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT;
     645             : 
     646        7302 :   return N->getAddressSpace() == AMDGPUASI.CONSTANT_BUFFER_0 + CbId;
     647             : }
     648             : 
     649         368 : bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
     650         368 :   const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
     651         368 :   const Instruction *Term = BB->getTerminator();
     652         367 :   return Term->getMetadata("amdgpu.uniform") ||
     653         368 :          Term->getMetadata("structurizecfg.uniform");
     654             : }
     655             : 
     656           1 : StringRef AMDGPUDAGToDAGISel::getPassName() const {
     657           1 :   return "AMDGPU DAG->DAG Pattern Instruction Selection";
     658             : }
     659             : 
     660             : //===----------------------------------------------------------------------===//
     661             : // Complex Patterns
     662             : //===----------------------------------------------------------------------===//
     663             : 
     664        6376 : bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
     665             :                                                          SDValue& IntPtr) {
     666             :   if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
     667       25504 :     IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
     668        6376 :                                        true);
     669             :     return true;
     670             :   }
     671             :   return false;
     672             : }
     673             : 
     674           0 : bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
     675             :     SDValue& BaseReg, SDValue &Offset) {
     676             :   if (!isa<ConstantSDNode>(Addr)) {
     677           0 :     BaseReg = Addr;
     678           0 :     Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
     679             :     return true;
     680             :   }
     681             :   return false;
     682             : }
     683             : 
     684           0 : bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
     685             :                                             SDValue &Offset) {
     686           0 :   return false;
     687             : }
     688             : 
     689           0 : bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
     690             :                                             SDValue &Offset) {
     691             :   ConstantSDNode *C;
     692             :   SDLoc DL(Addr);
     693             : 
     694             :   if ((C = dyn_cast<ConstantSDNode>(Addr))) {
     695           0 :     Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
     696           0 :     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
     697           0 :   } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
     698             :              (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
     699           0 :     Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
     700           0 :     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
     701           0 :   } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
     702             :             (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
     703           0 :     Base = Addr.getOperand(0);
     704           0 :     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
     705             :   } else {
     706           0 :     Base = Addr;
     707           0 :     Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
     708             :   }
     709             : 
     710           0 :   return true;
     711             : }
     712             : 
     713             : // FIXME: Should only handle addcarry/subcarry
     714         126 : void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
     715             :   SDLoc DL(N);
     716         126 :   SDValue LHS = N->getOperand(0);
     717         126 :   SDValue RHS = N->getOperand(1);
     718             : 
     719         126 :   unsigned Opcode = N->getOpcode();
     720         126 :   bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE);
     721             :   bool ProduceCarry =
     722         126 :       ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC;
     723         126 :   bool IsAdd = Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE;
     724             : 
     725         252 :   SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
     726         252 :   SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
     727             : 
     728         252 :   SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
     729         126 :                                        DL, MVT::i32, LHS, Sub0);
     730         252 :   SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
     731         126 :                                        DL, MVT::i32, LHS, Sub1);
     732             : 
     733         252 :   SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
     734         126 :                                        DL, MVT::i32, RHS, Sub0);
     735         252 :   SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
     736         126 :                                        DL, MVT::i32, RHS, Sub1);
     737             : 
     738         252 :   SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
     739             : 
     740         126 :   unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
     741         126 :   unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
     742             : 
     743             :   SDNode *AddLo;
     744         126 :   if (!ConsumeCarry) {
     745             :     SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
     746         240 :     AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args);
     747             :   } else {
     748           6 :     SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) };
     749          12 :     AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args);
     750             :   }
     751             :   SDValue AddHiArgs[] = {
     752             :     SDValue(Hi0, 0),
     753             :     SDValue(Hi1, 0),
     754             :     SDValue(AddLo, 1)
     755             :   };
     756         252 :   SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs);
     757             : 
     758             :   SDValue RegSequenceArgs[] = {
     759         126 :     CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
     760             :     SDValue(AddLo,0),
     761             :     Sub0,
     762             :     SDValue(AddHi,0),
     763             :     Sub1,
     764         378 :   };
     765         252 :   SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
     766         126 :                                                MVT::i64, RegSequenceArgs);
     767             : 
     768         126 :   if (ProduceCarry) {
     769             :     // Replace the carry-use
     770         126 :     ReplaceUses(SDValue(N, 1), SDValue(AddHi, 1));
     771             :   }
     772             : 
     773             :   // Replace the remaining uses.
     774         126 :   ReplaceNode(N, RegSequence);
     775         126 : }
     776             : 
     777         203 : void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
     778             :   // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
     779             :   // carry out despite the _i32 name. These were renamed in VI to _U32.
     780             :   // FIXME: We should probably rename the opcodes here.
     781         203 :   unsigned Opc = N->getOpcode() == ISD::UADDO ?
     782             :     AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
     783             : 
     784         812 :   CurDAG->SelectNodeTo(N, Opc, N->getVTList(),
     785         203 :                        { N->getOperand(0), N->getOperand(1) });
     786         203 : }
     787             : 
     788         225 : void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
     789             :   SDLoc SL(N);
     790             :   //  src0_modifiers, src0,  src1_modifiers, src1, src2_modifiers, src2, clamp, omod
     791         225 :   SDValue Ops[10];
     792             : 
     793         225 :   SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
     794         225 :   SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
     795         225 :   SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]);
     796         225 :   Ops[8] = N->getOperand(0);
     797         225 :   Ops[9] = N->getOperand(4);
     798             : 
     799         675 :   CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32, N->getVTList(), Ops);
     800         225 : }
     801             : 
     802          45 : void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {
     803             :   SDLoc SL(N);
     804             :   //    src0_modifiers, src0,  src1_modifiers, src1, clamp, omod
     805          45 :   SDValue Ops[8];
     806             : 
     807          45 :   SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
     808          45 :   SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
     809          45 :   Ops[6] = N->getOperand(0);
     810          45 :   Ops[7] = N->getOperand(3);
     811             : 
     812         135 :   CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops);
     813          45 : }
     814             : 
     815             : // We need to handle this here because tablegen doesn't support matching
     816             : // instructions with multiple outputs.
     817         267 : void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
     818             :   SDLoc SL(N);
     819         267 :   EVT VT = N->getValueType(0);
     820             : 
     821             :   assert(VT == MVT::f32 || VT == MVT::f64);
     822             : 
     823             :   unsigned Opc
     824             :     = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
     825             : 
     826         267 :   SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
     827         801 :   CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
     828         267 : }
     829             : 
     830             : // We need to handle this here because tablegen doesn't support matching
     831             : // instructions with multiple outputs.
     832          20 : void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
     833             :   SDLoc SL(N);
     834          20 :   bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32;
     835          20 :   unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32 : AMDGPU::V_MAD_U64_U32;
     836             : 
     837          20 :   SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
     838          20 :   SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
     839          20 :                     Clamp };
     840          60 :   CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
     841          20 : }
     842             : 
     843        7916 : bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
     844             :                                          unsigned OffsetBits) const {
     845        7916 :   if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
     846         468 :       (OffsetBits == 8 && !isUInt<8>(Offset)))
     847             :     return false;
     848             : 
     849        9480 :   if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS ||
     850        1981 :       Subtarget->unsafeDSOffsetFoldingEnabled())
     851             :     return true;
     852             : 
     853             :   // On Southern Islands instruction with a negative base value and an offset
     854             :   // don't seem to work.
     855        1977 :   return CurDAG->SignBitIsZero(Base);
     856             : }
     857             : 
     858       10731 : bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
     859             :                                               SDValue &Offset) const {
     860             :   SDLoc DL(Addr);
     861       10731 :   if (CurDAG->isBaseWithConstantOffset(Addr)) {
     862        7432 :     SDValue N0 = Addr.getOperand(0);
     863        7432 :     SDValue N1 = Addr.getOperand(1);
     864             :     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
     865       14864 :     if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) {
     866             :       // (add n0, c0)
     867        6988 :       Base = N0;
     868       13976 :       Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
     869        6988 :       return true;
     870             :     }
     871        3299 :   } else if (Addr.getOpcode() == ISD::SUB) {
     872             :     // sub C, x -> add (sub 0, x), C
     873             :     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
     874          18 :       int64_t ByteOffset = C->getSExtValue();
     875          18 :       if (isUInt<16>(ByteOffset)) {
     876          32 :         SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
     877             : 
     878             :         // XXX - This is kind of hacky. Create a dummy sub node so we can check
     879             :         // the known bits in isDSOffsetLegal. We need to emit the selected node
     880             :         // here, so this is thrown away.
     881          16 :         SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
     882          16 :                                       Zero, Addr.getOperand(1));
     883             : 
     884          16 :         if (isDSOffsetLegal(Sub, ByteOffset, 16)) {
     885             :           // FIXME: Select to VOP3 version for with-carry.
     886          14 :           unsigned SubOp = Subtarget->hasAddNoCarry() ?
     887             :             AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32;
     888             : 
     889             :           MachineSDNode *MachineSub
     890          28 :             = CurDAG->getMachineNode(SubOp, DL, MVT::i32,
     891          14 :                                      Zero, Addr.getOperand(1));
     892             : 
     893          14 :           Base = SDValue(MachineSub, 0);
     894          28 :           Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
     895          14 :           return true;
     896             :         }
     897             :       }
     898             :     }
     899             :   } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
     900             :     // If we have a constant address, prefer to put the constant into the
     901             :     // offset. This can save moves to load the constant address since multiple
     902             :     // operations can share the zero base address register, and enables merging
     903             :     // into read2 / write2 instructions.
     904             : 
     905             :     SDLoc DL(Addr);
     906             : 
     907        1474 :     if (isUInt<16>(CAddr->getZExtValue())) {
     908        1466 :       SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
     909        1466 :       MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
     910         733 :                                  DL, MVT::i32, Zero);
     911         733 :       Base = SDValue(MovZero, 0);
     912        1466 :       Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
     913             :       return true;
     914             :     }
     915             :   }
     916             : 
     917             :   // default case
     918        2996 :   Base = Addr;
     919        8988 :   Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
     920        2996 :   return true;
     921             : }
     922             : 
     923             : // TODO: If offset is too big, put low 16-bit into offset.
     924         597 : bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
     925             :                                                    SDValue &Offset0,
     926             :                                                    SDValue &Offset1) const {
     927             :   SDLoc DL(Addr);
     928             : 
     929         597 :   if (CurDAG->isBaseWithConstantOffset(Addr)) {
     930         464 :     SDValue N0 = Addr.getOperand(0);
     931         464 :     SDValue N1 = Addr.getOperand(1);
     932             :     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
     933         928 :     unsigned DWordOffset0 = C1->getZExtValue() / 4;
     934         464 :     unsigned DWordOffset1 = DWordOffset0 + 1;
     935             :     // (add n0, c0)
     936         464 :     if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
     937         446 :       Base = N0;
     938         892 :       Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
     939         892 :       Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
     940         446 :       return true;
     941             :     }
     942         133 :   } else if (Addr.getOpcode() == ISD::SUB) {
     943             :     // sub C, x -> add (sub 0, x), C
     944             :     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
     945           8 :       unsigned DWordOffset0 = C->getZExtValue() / 4;
     946           4 :       unsigned DWordOffset1 = DWordOffset0 + 1;
     947             : 
     948           4 :       if (isUInt<8>(DWordOffset0)) {
     949             :         SDLoc DL(Addr);
     950           8 :         SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
     951             : 
     952             :         // XXX - This is kind of hacky. Create a dummy sub node so we can check
     953             :         // the known bits in isDSOffsetLegal. We need to emit the selected node
     954             :         // here, so this is thrown away.
     955           4 :         SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
     956           4 :                                       Zero, Addr.getOperand(1));
     957             : 
     958           4 :         if (isDSOffsetLegal(Sub, DWordOffset1, 8)) {
     959           2 :           unsigned SubOp = Subtarget->hasAddNoCarry() ?
     960             :             AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32;
     961             : 
     962             :           MachineSDNode *MachineSub
     963           4 :             = CurDAG->getMachineNode(SubOp, DL, MVT::i32,
     964           2 :                                      Zero, Addr.getOperand(1));
     965             : 
     966           2 :           Base = SDValue(MachineSub, 0);
     967           4 :           Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
     968           4 :           Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
     969             :           return true;
     970             :         }
     971             :       }
     972             :     }
     973             :   } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
     974          48 :     unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
     975          24 :     unsigned DWordOffset1 = DWordOffset0 + 1;
     976             :     assert(4 * DWordOffset0 == CAddr->getZExtValue());
     977             : 
     978          24 :     if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
     979          32 :       SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
     980             :       MachineSDNode *MovZero
     981          32 :         = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
     982          16 :                                  DL, MVT::i32, Zero);
     983          16 :       Base = SDValue(MovZero, 0);
     984          32 :       Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
     985          32 :       Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
     986             :       return true;
     987             :     }
     988             :   }
     989             : 
     990             :   // default case
     991             : 
     992             :   // FIXME: This is broken on SI where we still need to check if the base
     993             :   // pointer is positive here.
     994         133 :   Base = Addr;
     995         266 :   Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
     996         266 :   Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
     997         133 :   return true;
     998             : }
     999             : 
    1000       42881 : bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
    1001             :                                      SDValue &VAddr, SDValue &SOffset,
    1002             :                                      SDValue &Offset, SDValue &Offen,
    1003             :                                      SDValue &Idxen, SDValue &Addr64,
    1004             :                                      SDValue &GLC, SDValue &SLC,
    1005             :                                      SDValue &TFE) const {
    1006             :   // Subtarget prefers to use flat instruction
    1007       42881 :   if (Subtarget->useFlatForGlobal())
    1008             :     return false;
    1009             : 
    1010             :   SDLoc DL(Addr);
    1011             : 
    1012       31719 :   if (!GLC.getNode())
    1013       63438 :     GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
    1014       31719 :   if (!SLC.getNode())
    1015       63024 :     SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
    1016       63438 :   TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
    1017             : 
    1018       63438 :   Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
    1019       63438 :   Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
    1020       63438 :   Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
    1021       63438 :   SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
    1022             : 
    1023       31719 :   if (CurDAG->isBaseWithConstantOffset(Addr)) {
    1024        8867 :     SDValue N0 = Addr.getOperand(0);
    1025        8867 :     SDValue N1 = Addr.getOperand(1);
    1026             :     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
    1027             : 
    1028        8867 :     if (N0.getOpcode() == ISD::ADD) {
    1029             :       // (add (add N2, N3), C1) -> addr64
    1030        1589 :       SDValue N2 = N0.getOperand(0);
    1031        1589 :       SDValue N3 = N0.getOperand(1);
    1032        3178 :       Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
    1033        1589 :       Ptr = N2;
    1034        1589 :       VAddr = N3;
    1035             :     } else {
    1036             :       // (add N0, C1) -> offset
    1037       14556 :       VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
    1038        7278 :       Ptr = N0;
    1039             :     }
    1040             : 
    1041       17734 :     if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) {
    1042       17144 :       Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
    1043             :       return true;
    1044             :     }
    1045             : 
    1046         295 :     if (isUInt<32>(C1->getZExtValue())) {
    1047             :       // Illegal offset, store it in soffset.
    1048         570 :       Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
    1049         570 :       SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
    1050             :                    CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
    1051             :                         0);
    1052             :       return true;
    1053             :     }
    1054             :   }
    1055             : 
    1056       22862 :   if (Addr.getOpcode() == ISD::ADD) {
    1057             :     // (add N0, N1) -> addr64
    1058        3487 :     SDValue N0 = Addr.getOperand(0);
    1059        3487 :     SDValue N1 = Addr.getOperand(1);
    1060        6974 :     Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
    1061        3487 :     Ptr = N0;
    1062        3487 :     VAddr = N1;
    1063        6974 :     Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
    1064             :     return true;
    1065             :   }
    1066             : 
    1067             :   // default case -> offset
    1068       38750 :   VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
    1069       19375 :   Ptr = Addr;
    1070       38750 :   Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
    1071             : 
    1072             :   return true;
    1073             : }
    1074             : 
    1075       29504 : bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
    1076             :                                            SDValue &VAddr, SDValue &SOffset,
    1077             :                                            SDValue &Offset, SDValue &GLC,
    1078             :                                            SDValue &SLC, SDValue &TFE) const {
    1079       29504 :   SDValue Ptr, Offen, Idxen, Addr64;
    1080             : 
    1081             :   // addr64 bit was removed for volcanic islands.
    1082       29504 :   if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
    1083             :     return false;
    1084             : 
    1085       15970 :   if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
    1086             :               GLC, SLC, TFE))
    1087             :     return false;
    1088             : 
    1089             :   ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
    1090       27274 :   if (C->getSExtValue()) {
    1091             :     SDLoc DL(Addr);
    1092             : 
    1093             :     const SITargetLowering& Lowering =
    1094        4139 :       *static_cast<const SITargetLowering*>(getTargetLowering());
    1095             : 
    1096        4139 :     SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
    1097             :     return true;
    1098             :   }
    1099             : 
    1100             :   return false;
    1101             : }
    1102             : 
    1103         581 : bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
    1104             :                                            SDValue &VAddr, SDValue &SOffset,
    1105             :                                            SDValue &Offset,
    1106             :                                            SDValue &SLC) const {
    1107        1743 :   SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
    1108         581 :   SDValue GLC, TFE;
    1109             : 
    1110         581 :   return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE);
    1111             : }
    1112             : 
    1113             : static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
    1114             :   auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>();
    1115          89 :   return PSV && PSV->isStack();
    1116             : }
    1117             : 
    1118        6081 : std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
    1119        6081 :   const MachineFunction &MF = CurDAG->getMachineFunction();
    1120             :   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
    1121             : 
    1122             :   if (auto FI = dyn_cast<FrameIndexSDNode>(N)) {
    1123        4877 :     SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
    1124        9754 :                                               FI->getValueType(0));
    1125             : 
    1126             :     // If we can resolve this to a frame index access, this is relative to the
    1127             :     // frame pointer SGPR.
    1128       14631 :     return std::make_pair(TFI, CurDAG->getRegister(Info->getFrameOffsetReg(),
    1129        9754 :                                                    MVT::i32));
    1130             :   }
    1131             : 
    1132             :   // If we don't know this private access is a local stack object, it needs to
    1133             :   // be relative to the entry point's scratch wave offset register.
    1134        3612 :   return std::make_pair(N, CurDAG->getRegister(Info->getScratchWaveOffsetReg(),
    1135        2408 :                                                MVT::i32));
    1136             : }
    1137             : 
    1138        6087 : bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
    1139             :                                                  SDValue Addr, SDValue &Rsrc,
    1140             :                                                  SDValue &VAddr, SDValue &SOffset,
    1141             :                                                  SDValue &ImmOffset) const {
    1142             : 
    1143             :   SDLoc DL(Addr);
    1144        6087 :   MachineFunction &MF = CurDAG->getMachineFunction();
    1145        6087 :   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
    1146             : 
    1147       12174 :   Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
    1148             : 
    1149             :   if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
    1150           6 :     unsigned Imm = CAddr->getZExtValue();
    1151             : 
    1152          12 :     SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32);
    1153          12 :     MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
    1154           6 :                                                         DL, MVT::i32, HighBits);
    1155           6 :     VAddr = SDValue(MovHighBits, 0);
    1156             : 
    1157             :     // In a call sequence, stores to the argument stack area are relative to the
    1158             :     // stack pointer.
    1159           6 :     const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
    1160           0 :     unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
    1161             :       Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg();
    1162             : 
    1163          12 :     SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
    1164          12 :     ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
    1165             :     return true;
    1166             :   }
    1167             : 
    1168        6081 :   if (CurDAG->isBaseWithConstantOffset(Addr)) {
    1169             :     // (add n0, c1)
    1170             : 
    1171        5105 :     SDValue N0 = Addr.getOperand(0);
    1172        5105 :     SDValue N1 = Addr.getOperand(1);
    1173             : 
    1174             :     // Offsets in vaddr must be positive if range checking is enabled.
    1175             :     //
    1176             :     // The total computation of vaddr + soffset + offset must not overflow.  If
    1177             :     // vaddr is negative, even if offset is 0 the sgpr offset add will end up
    1178             :     // overflowing.
    1179             :     //
    1180             :     // Prior to gfx9, MUBUF instructions with the vaddr offset enabled would
    1181             :     // always perform a range check. If a negative vaddr base index was used,
    1182             :     // this would fail the range check. The overall address computation would
    1183             :     // compute a valid address, but this doesn't happen due to the range
    1184             :     // check. For out-of-bounds MUBUF loads, a 0 is returned.
    1185             :     //
    1186             :     // Therefore it should be safe to fold any VGPR offset on gfx9 into the
    1187             :     // MUBUF vaddr, but not on older subtargets which can only do this if the
    1188             :     // sign bit is known 0.
    1189             :     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
    1190       15301 :     if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue()) &&
    1191        8929 :         (!Subtarget->privateMemoryResourceIsRangeChecked() ||
    1192        3838 :          CurDAG->SignBitIsZero(N0))) {
    1193        9000 :       std::tie(VAddr, SOffset) = foldFrameIndex(N0);
    1194        9000 :       ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
    1195        4500 :       return true;
    1196             :     }
    1197             :   }
    1198             : 
    1199             :   // (node)
    1200        3162 :   std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
    1201        3162 :   ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
    1202        1581 :   return true;
    1203             : }
    1204             : 
    1205        6269 : bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
    1206             :                                                   SDValue Addr,
    1207             :                                                   SDValue &SRsrc,
    1208             :                                                   SDValue &SOffset,
    1209             :                                                   SDValue &Offset) const {
    1210             :   ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr);
    1211         376 :   if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue()))
    1212             :     return false;
    1213             : 
    1214             :   SDLoc DL(Addr);
    1215         182 :   MachineFunction &MF = CurDAG->getMachineFunction();
    1216         182 :   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
    1217             : 
    1218         364 :   SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
    1219             : 
    1220         182 :   const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
    1221          89 :   unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
    1222             :     Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg();
    1223             : 
    1224             :   // FIXME: Get from MachinePointerInfo? We should only be using the frame
    1225             :   // offset if we know this is in a call sequence.
    1226         364 :   SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
    1227             : 
    1228         364 :   Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
    1229             :   return true;
    1230             : }
    1231             : 
    1232       26911 : bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
    1233             :                                            SDValue &SOffset, SDValue &Offset,
    1234             :                                            SDValue &GLC, SDValue &SLC,
    1235             :                                            SDValue &TFE) const {
    1236       26911 :   SDValue Ptr, VAddr, Offen, Idxen, Addr64;
    1237             :   const SIInstrInfo *TII =
    1238       26911 :     static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
    1239             : 
    1240       26911 :   if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
    1241             :               GLC, SLC, TFE))
    1242             :     return false;
    1243             : 
    1244       36164 :   if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
    1245       54246 :       !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
    1246       18082 :       !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
    1247       17145 :     uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
    1248       34290 :                     APInt::getAllOnesValue(32).getZExtValue(); // Size
    1249             :     SDLoc DL(Addr);
    1250             : 
    1251             :     const SITargetLowering& Lowering =
    1252       17145 :       *static_cast<const SITargetLowering*>(getTargetLowering());
    1253             : 
    1254       17145 :     SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
    1255             :     return true;
    1256             :   }
    1257             :   return false;
    1258             : }
    1259             : 
    1260           8 : bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
    1261             :                                            SDValue &Soffset, SDValue &Offset
    1262             :                                            ) const {
    1263           8 :   SDValue GLC, SLC, TFE;
    1264             : 
    1265           8 :   return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
    1266             : }
    1267             : bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
    1268             :                                            SDValue &Soffset, SDValue &Offset,
    1269             :                                            SDValue &SLC) const {
    1270         517 :   SDValue GLC, TFE;
    1271             : 
    1272         517 :   return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
    1273             : }
    1274             : 
    1275         436 : bool AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant,
    1276             :                                              SDValue &SOffset,
    1277             :                                              SDValue &ImmOffset) const {
    1278             :   SDLoc DL(Constant);
    1279             :   const uint32_t Align = 4;
    1280             :   const uint32_t MaxImm = alignDown(4095, Align);
    1281         872 :   uint32_t Imm = cast<ConstantSDNode>(Constant)->getZExtValue();
    1282             :   uint32_t Overflow = 0;
    1283             : 
    1284         436 :   if (Imm > MaxImm) {
    1285          16 :     if (Imm <= MaxImm + 64) {
    1286             :       // Use an SOffset inline constant for 4..64
    1287           2 :       Overflow = Imm - MaxImm;
    1288             :       Imm = MaxImm;
    1289             :     } else {
    1290             :       // Try to keep the same value in SOffset for adjacent loads, so that
    1291             :       // the corresponding register contents can be re-used.
    1292             :       //
    1293             :       // Load values with all low-bits (except for alignment bits) set into
    1294             :       // SOffset, so that a larger range of values can be covered using
    1295             :       // s_movk_i32.
    1296             :       //
    1297             :       // Atomic operations fail to work correctly when individual address
    1298             :       // components are unaligned, even if their sum is aligned.
    1299          14 :       uint32_t High = (Imm + Align) & ~4095;
    1300          14 :       uint32_t Low = (Imm + Align) & 4095;
    1301             :       Imm = Low;
    1302          14 :       Overflow = High - Align;
    1303             :     }
    1304             :   }
    1305             : 
    1306             :   // There is a hardware bug in SI and CI which prevents address clamping in
    1307             :   // MUBUF instructions from working correctly with SOffsets. The immediate
    1308             :   // offset is unaffected.
    1309          32 :   if (Overflow > 0 &&
    1310          16 :       Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
    1311             :     return false;
    1312             : 
    1313         856 :   ImmOffset = CurDAG->getTargetConstant(Imm, DL, MVT::i16);
    1314             : 
    1315         428 :   if (Overflow <= 64)
    1316         842 :     SOffset = CurDAG->getTargetConstant(Overflow, DL, MVT::i32);
    1317             :   else
    1318          14 :     SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
    1319             :                       CurDAG->getTargetConstant(Overflow, DL, MVT::i32)),
    1320             :                       0);
    1321             : 
    1322             :   return true;
    1323             : }
    1324             : 
    1325         252 : bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicOffset(SDValue Offset,
    1326             :                                                     SDValue &SOffset,
    1327             :                                                     SDValue &ImmOffset) const {
    1328             :   SDLoc DL(Offset);
    1329             : 
    1330             :   if (!isa<ConstantSDNode>(Offset))
    1331             :     return false;
    1332             : 
    1333         252 :   return SelectMUBUFConstant(Offset, SOffset, ImmOffset);
    1334             : }
    1335             : 
    1336         389 : bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset,
    1337             :                                                      SDValue &SOffset,
    1338             :                                                      SDValue &ImmOffset,
    1339             :                                                      SDValue &VOffset) const {
    1340             :   SDLoc DL(Offset);
    1341             : 
    1342             :   // Don't generate an unnecessary voffset for constant offsets.
    1343             :   if (isa<ConstantSDNode>(Offset)) {
    1344         260 :     SDValue Tmp1, Tmp2;
    1345             : 
    1346             :     // When necessary, use a voffset in <= CI anyway to work around a hardware
    1347             :     // bug.
    1348         380 :     if (Subtarget->getGeneration() > AMDGPUSubtarget::SEA_ISLANDS ||
    1349         120 :         SelectMUBUFConstant(Offset, Tmp1, Tmp2))
    1350         252 :       return false;
    1351             :   }
    1352             : 
    1353         137 :   if (CurDAG->isBaseWithConstantOffset(Offset)) {
    1354          66 :     SDValue N0 = Offset.getOperand(0);
    1355          66 :     SDValue N1 = Offset.getOperand(1);
    1356         196 :     if (cast<ConstantSDNode>(N1)->getSExtValue() >= 0 &&
    1357          64 :         SelectMUBUFConstant(N1, SOffset, ImmOffset)) {
    1358          64 :       VOffset = N0;
    1359             :       return true;
    1360             :     }
    1361             :   }
    1362             : 
    1363         146 :   SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
    1364         146 :   ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
    1365          73 :   VOffset = Offset;
    1366             : 
    1367          73 :   return true;
    1368             : }
    1369             : 
    1370             : template <bool IsSigned>
    1371       11394 : bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDValue Addr,
    1372             :                                           SDValue &VAddr,
    1373             :                                           SDValue &Offset,
    1374             :                                           SDValue &SLC) const {
    1375             :   int64_t OffsetVal = 0;
    1376             : 
    1377       14405 :   if (Subtarget->hasFlatInstOffsets() &&
    1378        3011 :       CurDAG->isBaseWithConstantOffset(Addr)) {
    1379         434 :     SDValue N0 = Addr.getOperand(0);
    1380         434 :     SDValue N1 = Addr.getOperand(1);
    1381         434 :     int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
    1382             : 
    1383         434 :     if ((IsSigned && isInt<13>(COffsetVal)) ||
    1384          95 :         (!IsSigned && isUInt<12>(COffsetVal))) {
    1385             :       Addr = N0;
    1386             :       OffsetVal = COffsetVal;
    1387             :     }
    1388             :   }
    1389             : 
    1390       11394 :   VAddr = Addr;
    1391       34182 :   Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
    1392       34182 :   SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1);
    1393             : 
    1394       11394 :   return true;
    1395             : }
    1396             : 
    1397             : bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDValue Addr,
    1398             :                                           SDValue &VAddr,
    1399             :                                           SDValue &Offset,
    1400             :                                           SDValue &SLC) const {
    1401        1003 :   return SelectFlatOffset<false>(Addr, VAddr, Offset, SLC);
    1402             : }
    1403             : 
    1404             : bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDValue Addr,
    1405             :                                           SDValue &VAddr,
    1406             :                                           SDValue &Offset,
    1407             :                                           SDValue &SLC) const {
    1408         212 :   return SelectFlatOffset<true>(Addr, VAddr, Offset, SLC);
    1409             : }
    1410             : 
    1411       31312 : bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
    1412             :                                           SDValue &Offset, bool &Imm) const {
    1413             : 
    1414             :   // FIXME: Handle non-constant offsets.
    1415             :   ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
    1416             :   if (!C)
    1417             :     return false;
    1418             : 
    1419             :   SDLoc SL(ByteOffsetNode);
    1420       31257 :   AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration();
    1421       31257 :   int64_t ByteOffset = C->getSExtValue();
    1422       31257 :   int64_t EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset);
    1423             : 
    1424       31257 :   if (AMDGPU::isLegalSMRDImmOffset(*Subtarget, ByteOffset)) {
    1425       62342 :     Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
    1426       31171 :     Imm = true;
    1427             :     return true;
    1428             :   }
    1429             : 
    1430          86 :   if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset))
    1431             :     return false;
    1432             : 
    1433          74 :   if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) {
    1434             :     // 32-bit Immediates are supported on Sea Islands.
    1435          66 :     Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
    1436             :   } else {
    1437          82 :     SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
    1438          82 :     Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32,
    1439             :                                             C32Bit), 0);
    1440             :   }
    1441          74 :   Imm = false;
    1442             :   return true;
    1443             : }
    1444             : 
    1445       33556 : SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {
    1446             :   if (Addr.getValueType() != MVT::i32)
    1447       33442 :     return Addr;
    1448             : 
    1449             :   // Zero-extend a 32-bit address.
    1450             :   SDLoc SL(Addr);
    1451             : 
    1452         114 :   const MachineFunction &MF = CurDAG->getMachineFunction();
    1453             :   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
    1454         114 :   unsigned AddrHiVal = Info->get32BitAddressHighBits();
    1455         228 :   SDValue AddrHi = CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32);
    1456             : 
    1457             :   const SDValue Ops[] = {
    1458         114 :     CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32),
    1459             :     Addr,
    1460         114 :     CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
    1461         342 :     SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi),
    1462             :             0),
    1463         114 :     CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32),
    1464         342 :   };
    1465             : 
    1466         342 :   return SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64,
    1467         114 :                                         Ops), 0);
    1468             : }
    1469             : 
    1470       33556 : bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
    1471             :                                      SDValue &Offset, bool &Imm) const {
    1472             :   SDLoc SL(Addr);
    1473             : 
    1474       33556 :   if (CurDAG->isBaseWithConstantOffset(Addr)) {
    1475       30852 :     SDValue N0 = Addr.getOperand(0);
    1476       30852 :     SDValue N1 = Addr.getOperand(1);
    1477             : 
    1478       30852 :     if (SelectSMRDOffset(N1, Offset, Imm)) {
    1479       30840 :       SBase = Expand32BitAddress(N0);
    1480       30840 :       return true;
    1481             :     }
    1482             :   }
    1483        2716 :   SBase = Expand32BitAddress(Addr);
    1484        5432 :   Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
    1485        2716 :   Imm = true;
    1486        2716 :   return true;
    1487             : }
    1488             : 
    1489             : bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
    1490             :                                        SDValue &Offset) const {
    1491             :   bool Imm;
    1492       33520 :   return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
    1493             : }
    1494             : 
    1495           9 : bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
    1496             :                                          SDValue &Offset) const {
    1497             : 
    1498           9 :   if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
    1499             :     return false;
    1500             : 
    1501             :   bool Imm;
    1502           9 :   if (!SelectSMRD(Addr, SBase, Offset, Imm))
    1503             :     return false;
    1504             : 
    1505           9 :   return !Imm && isa<ConstantSDNode>(Offset);
    1506             : }
    1507             : 
    1508          27 : bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
    1509             :                                         SDValue &Offset) const {
    1510             :   bool Imm;
    1511          27 :   return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
    1512          27 :          !isa<ConstantSDNode>(Offset);
    1513             : }
    1514             : 
    1515             : bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
    1516             :                                              SDValue &Offset) const {
    1517             :   bool Imm;
    1518         446 :   return SelectSMRDOffset(Addr, Offset, Imm) && Imm;
    1519             : }
    1520             : 
    1521          38 : bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
    1522             :                                                SDValue &Offset) const {
    1523          38 :   if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
    1524             :     return false;
    1525             : 
    1526             :   bool Imm;
    1527          14 :   if (!SelectSMRDOffset(Addr, Offset, Imm))
    1528             :     return false;
    1529             : 
    1530           3 :   return !Imm && isa<ConstantSDNode>(Offset);
    1531             : }
    1532             : 
    1533       33927 : bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
    1534             :                                             SDValue &Base,
    1535             :                                             SDValue &Offset) const {
    1536             :   SDLoc DL(Index);
    1537             : 
    1538       33927 :   if (CurDAG->isBaseWithConstantOffset(Index)) {
    1539          80 :     SDValue N0 = Index.getOperand(0);
    1540          80 :     SDValue N1 = Index.getOperand(1);
    1541             :     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
    1542             : 
    1543             :     // (add n0, c0)
    1544          80 :     Base = N0;
    1545         160 :     Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
    1546             :     return true;
    1547             :   }
    1548             : 
    1549             :   if (isa<ConstantSDNode>(Index))
    1550             :     return false;
    1551             : 
    1552          81 :   Base = Index;
    1553         162 :   Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
    1554             :   return true;
    1555             : }
    1556             : 
    1557        3977 : SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL,
    1558             :                                      SDValue Val, uint32_t Offset,
    1559             :                                      uint32_t Width) {
    1560             :   // Transformation function, pack the offset and width of a BFE into
    1561             :   // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
    1562             :   // source, bits [5:0] contain the offset and bits [22:16] the width.
    1563        3977 :   uint32_t PackedVal = Offset | (Width << 16);
    1564        7954 :   SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
    1565             : 
    1566        7954 :   return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
    1567             : }
    1568             : 
    1569         194 : void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
    1570             :   // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
    1571             :   // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
    1572             :   // Predicate: 0 < b <= c < 32
    1573             : 
    1574         194 :   const SDValue &Shl = N->getOperand(0);
    1575         194 :   ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
    1576             :   ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
    1577             : 
    1578         194 :   if (B && C) {
    1579         348 :     uint32_t BVal = B->getZExtValue();
    1580         348 :     uint32_t CVal = C->getZExtValue();
    1581             : 
    1582         174 :     if (0 < BVal && BVal <= CVal && CVal < 32) {
    1583         163 :       bool Signed = N->getOpcode() == ISD::SRA;
    1584         163 :       unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
    1585             : 
    1586         489 :       ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal,
    1587             :                               32 - CVal));
    1588         163 :       return;
    1589             :     }
    1590             :   }
    1591             :   SelectCode(N);
    1592             : }
    1593             : 
    1594       19779 : void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
    1595       39558 :   switch (N->getOpcode()) {
    1596        6747 :   case ISD::AND:
    1597       13494 :     if (N->getOperand(0).getOpcode() == ISD::SRL) {
    1598             :       // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
    1599             :       // Predicate: isMask(mask)
    1600             :       const SDValue &Srl = N->getOperand(0);
    1601             :       ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
    1602             :       ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
    1603             : 
    1604        1791 :       if (Shift && Mask) {
    1605        3552 :         uint32_t ShiftVal = Shift->getZExtValue();
    1606        3552 :         uint32_t MaskVal = Mask->getZExtValue();
    1607             : 
    1608             :         if (isMask_32(MaskVal)) {
    1609             :           uint32_t WidthVal = countPopulation(MaskVal);
    1610             : 
    1611        3072 :           ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
    1612             :                                   Srl.getOperand(0), ShiftVal, WidthVal));
    1613        1536 :           return;
    1614             :         }
    1615             :       }
    1616             :     }
    1617             :     break;
    1618        6383 :   case ISD::SRL:
    1619       12766 :     if (N->getOperand(0).getOpcode() == ISD::AND) {
    1620             :       // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
    1621             :       // Predicate: isMask(mask >> b)
    1622             :       const SDValue &And = N->getOperand(0);
    1623             :       ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
    1624         800 :       ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
    1625             : 
    1626         800 :       if (Shift && Mask) {
    1627        1594 :         uint32_t ShiftVal = Shift->getZExtValue();
    1628        1594 :         uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
    1629             : 
    1630             :         if (isMask_32(MaskVal)) {
    1631             :           uint32_t WidthVal = countPopulation(MaskVal);
    1632             : 
    1633        1594 :           ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
    1634             :                                   And.getOperand(0), ShiftVal, WidthVal));
    1635         797 :           return;
    1636             :         }
    1637             :       }
    1638        5583 :     } else if (N->getOperand(0).getOpcode() == ISD::SHL) {
    1639          16 :       SelectS_BFEFromShifts(N);
    1640          16 :       return;
    1641             :     }
    1642             :     break;
    1643        2192 :   case ISD::SRA:
    1644        4384 :     if (N->getOperand(0).getOpcode() == ISD::SHL) {
    1645         178 :       SelectS_BFEFromShifts(N);
    1646         178 :       return;
    1647             :     }
    1648             :     break;
    1649             : 
    1650        4457 :   case ISD::SIGN_EXTEND_INREG: {
    1651             :     // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
    1652        4457 :     SDValue Src = N->getOperand(0);
    1653        4457 :     if (Src.getOpcode() != ISD::SRL)
    1654             :       break;
    1655             : 
    1656             :     const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
    1657             :     if (!Amt)
    1658             :       break;
    1659             : 
    1660        1349 :     unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
    1661        4047 :     ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0),
    1662        1349 :                             Amt->getZExtValue(), Width));
    1663             :     return;
    1664             :   }
    1665             :   }
    1666             : 
    1667             :   SelectCode(N);
    1668             : }
    1669             : 
    1670         490 : bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
    1671             :   assert(N->getOpcode() == ISD::BRCOND);
    1672             :   if (!N->hasOneUse())
    1673             :     return false;
    1674             : 
    1675         490 :   SDValue Cond = N->getOperand(1);
    1676         490 :   if (Cond.getOpcode() == ISD::CopyToReg)
    1677           0 :     Cond = Cond.getOperand(2);
    1678             : 
    1679         938 :   if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
    1680             :     return false;
    1681             : 
    1682             :   MVT VT = Cond.getOperand(0).getSimpleValueType();
    1683         444 :   if (VT == MVT::i32)
    1684             :     return true;
    1685             : 
    1686          88 :   if (VT == MVT::i64) {
    1687          29 :     auto ST = static_cast<const SISubtarget *>(Subtarget);
    1688             : 
    1689          29 :     ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
    1690          56 :     return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64();
    1691             :   }
    1692             : 
    1693             :   return false;
    1694             : }
    1695             : 
    1696         568 : void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
    1697         568 :   SDValue Cond = N->getOperand(1);
    1698             : 
    1699         568 :   if (Cond.isUndef()) {
    1700         156 :     CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other,
    1701             :                          N->getOperand(2), N->getOperand(0));
    1702          78 :     return;
    1703             :   }
    1704             : 
    1705         490 :   bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N);
    1706             :   unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ;
    1707         490 :   unsigned CondReg = UseSCCBr ? AMDGPU::SCC : AMDGPU::VCC;
    1708             :   SDLoc SL(N);
    1709             : 
    1710         490 :   if (!UseSCCBr) {
    1711             :     // This is the case that we are selecting to S_CBRANCH_VCCNZ.  We have not
    1712             :     // analyzed what generates the vcc value, so we do not know whether vcc
    1713             :     // bits for disabled lanes are 0.  Thus we need to mask out bits for
    1714             :     // disabled lanes.
    1715             :     //
    1716             :     // For the case that we select S_CBRANCH_SCC1 and it gets
    1717             :     // changed to S_CBRANCH_VCCNZ in SIFixSGPRCopies, SIFixSGPRCopies calls
    1718             :     // SIInstrInfo::moveToVALU which inserts the S_AND).
    1719             :     //
    1720             :     // We could add an analysis of what generates the vcc value here and omit
    1721             :     // the S_AND when is unnecessary. But it would be better to add a separate
    1722             :     // pass after SIFixSGPRCopies to do the unnecessary S_AND removal, so it
    1723             :     // catches both cases.
    1724         369 :     Cond = SDValue(CurDAG->getMachineNode(AMDGPU::S_AND_B64, SL, MVT::i1,
    1725             :                                CurDAG->getRegister(AMDGPU::EXEC, MVT::i1),
    1726             :                                Cond),
    1727             :                    0);
    1728             :   }
    1729             : 
    1730         980 :   SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond);
    1731         980 :   CurDAG->SelectNodeTo(N, BrOp, MVT::Other,
    1732         490 :                        N->getOperand(2), // Basic Block
    1733             :                        VCC.getValue(0));
    1734             : }
    1735             : 
    1736        2621 : void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) {
    1737             :   MVT VT = N->getSimpleValueType(0);
    1738        2621 :   bool IsFMA = N->getOpcode() == ISD::FMA;
    1739        3832 :   if (VT != MVT::f32 || (!Subtarget->hasMadMixInsts() &&
    1740        4692 :                          !Subtarget->hasFmaMixInsts()) ||
    1741         123 :       ((IsFMA && Subtarget->hasMadMixInsts()) ||
    1742         119 :        (!IsFMA && Subtarget->hasFmaMixInsts()))) {
    1743             :     SelectCode(N);
    1744        2456 :     return;
    1745             :   }
    1746             : 
    1747         165 :   SDValue Src0 = N->getOperand(0);
    1748         165 :   SDValue Src1 = N->getOperand(1);
    1749         165 :   SDValue Src2 = N->getOperand(2);
    1750             :   unsigned Src0Mods, Src1Mods, Src2Mods;
    1751             : 
    1752             :   // Avoid using v_mad_mix_f32/v_fma_mix_f32 unless there is actually an operand
    1753             :   // using the conversion from f16.
    1754         165 :   bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods);
    1755         165 :   bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods);
    1756         165 :   bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods);
    1757             : 
    1758             :   assert((IsFMA || !Subtarget->hasFP32Denormals()) &&
    1759             :          "fmad selected with denormals enabled");
    1760             :   // TODO: We can select this with f32 denormals enabled if all the sources are
    1761             :   // converted from f16 (in which case fmad isn't legal).
    1762             : 
    1763         165 :   if (Sel0 || Sel1 || Sel2) {
    1764             :     // For dummy operands.
    1765         182 :     SDValue Zero = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
    1766             :     SDValue Ops[] = {
    1767         182 :       CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0,
    1768         182 :       CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1,
    1769         182 :       CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2,
    1770         182 :       CurDAG->getTargetConstant(0, SDLoc(), MVT::i1),
    1771             :       Zero, Zero
    1772         728 :     };
    1773             : 
    1774         182 :     CurDAG->SelectNodeTo(N,
    1775             :                          IsFMA ? AMDGPU::V_FMA_MIX_F32 : AMDGPU::V_MAD_MIX_F32,
    1776          91 :                          MVT::f32, Ops);
    1777             :   } else {
    1778             :     SelectCode(N);
    1779             :   }
    1780             : }
    1781             : 
    1782             : // This is here because there isn't a way to use the generated sub0_sub1 as the
    1783             : // subreg index to EXTRACT_SUBREG in tablegen.
    1784         195 : void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
    1785             :   MemSDNode *Mem = cast<MemSDNode>(N);
    1786             :   unsigned AS = Mem->getAddressSpace();
    1787         195 :   if (AS == AMDGPUASI.FLAT_ADDRESS) {
    1788             :     SelectCode(N);
    1789         169 :     return;
    1790             :   }
    1791             : 
    1792             :   MVT VT = N->getSimpleValueType(0);
    1793             :   bool Is32 = (VT == MVT::i32);
    1794             :   SDLoc SL(N);
    1795             : 
    1796             :   MachineSDNode *CmpSwap = nullptr;
    1797         102 :   if (Subtarget->hasAddr64()) {
    1798          17 :     SDValue SRsrc, VAddr, SOffset, Offset, SLC;
    1799             : 
    1800          17 :     if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) {
    1801           8 :       unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN :
    1802             :         AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN;
    1803           8 :       SDValue CmpVal = Mem->getOperand(2);
    1804             : 
    1805             :       // XXX - Do we care about glue operands?
    1806             : 
    1807             :       SDValue Ops[] = {
    1808             :         CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain()
    1809           8 :       };
    1810             : 
    1811          24 :       CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
    1812             :     }
    1813             :   }
    1814             : 
    1815           8 :   if (!CmpSwap) {
    1816          43 :     SDValue SRsrc, SOffset, Offset, SLC;
    1817          43 :     if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) {
    1818          18 :       unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN :
    1819             :         AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN;
    1820             : 
    1821          18 :       SDValue CmpVal = Mem->getOperand(2);
    1822             :       SDValue Ops[] = {
    1823             :         CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain()
    1824          18 :       };
    1825             : 
    1826          54 :       CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
    1827             :     }
    1828             :   }
    1829             : 
    1830          51 :   if (!CmpSwap) {
    1831             :     SelectCode(N);
    1832             :     return;
    1833             :   }
    1834             : 
    1835          26 :   MachineSDNode::mmo_iterator MMOs = MF->allocateMemRefsArray(1);
    1836          26 :   *MMOs = Mem->getMemOperand();
    1837          26 :   CmpSwap->setMemRefs(MMOs, MMOs + 1);
    1838             : 
    1839          26 :   unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
    1840             :   SDValue Extract
    1841          52 :     = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0));
    1842             : 
    1843          26 :   ReplaceUses(SDValue(N, 0), Extract);
    1844          26 :   ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1));
    1845          26 :   CurDAG->RemoveDeadNode(N);
    1846             : }
    1847             : 
    1848         930 : bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
    1849             :                                             unsigned &Mods) const {
    1850         930 :   Mods = 0;
    1851       22057 :   Src = In;
    1852             : 
    1853       22987 :   if (Src.getOpcode() == ISD::FNEG) {
    1854          40 :     Mods |= SISrcMods::NEG;
    1855        1476 :     Src = Src.getOperand(0);
    1856             :   }
    1857             : 
    1858       44114 :   if (Src.getOpcode() == ISD::FABS) {
    1859         665 :     Mods |= SISrcMods::ABS;
    1860         665 :     Src = Src.getOperand(0);
    1861             :   }
    1862             : 
    1863         930 :   return true;
    1864             : }
    1865             : 
    1866       21127 : bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
    1867             :                                         SDValue &SrcMods) const {
    1868             :   unsigned Mods;
    1869             :   if (SelectVOP3ModsImpl(In, Src, Mods)) {
    1870       84508 :     SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
    1871             :     return true;
    1872             :   }
    1873             : 
    1874             :   return false;
    1875             : }
    1876             : 
    1877         492 : bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
    1878             :                                              SDValue &SrcMods) const {
    1879         492 :   SelectVOP3Mods(In, Src, SrcMods);
    1880         492 :   return isNoNanSrc(Src);
    1881             : }
    1882             : 
    1883             : bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
    1884        4613 :   if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG)
    1885             :     return false;
    1886             : 
    1887        4260 :   Src = In;
    1888             :   return true;
    1889             : }
    1890             : 
    1891       10056 : bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
    1892             :                                          SDValue &SrcMods, SDValue &Clamp,
    1893             :                                          SDValue &Omod) const {
    1894             :   SDLoc DL(In);
    1895       20112 :   Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
    1896       20112 :   Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
    1897             : 
    1898       20112 :   return SelectVOP3Mods(In, Src, SrcMods);
    1899             : }
    1900             : 
    1901          48 : bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
    1902             :                                                    SDValue &SrcMods,
    1903             :                                                    SDValue &Clamp,
    1904             :                                                    SDValue &Omod) const {
    1905         144 :   Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
    1906          48 :   return SelectVOP3Mods(In, Src, SrcMods);
    1907             : }
    1908             : 
    1909         387 : bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
    1910             :                                          SDValue &Clamp, SDValue &Omod) const {
    1911         387 :   Src = In;
    1912             : 
    1913             :   SDLoc DL(In);
    1914         774 :   Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
    1915         774 :   Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
    1916             : 
    1917         387 :   return true;
    1918             : }
    1919             : 
    1920             : static SDValue stripBitcast(SDValue Val) {
    1921        4330 :   return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
    1922             : }
    1923             : 
    1924             : // Figure out if this is really an extract of the high 16-bits of a dword.
    1925         764 : static bool isExtractHiElt(SDValue In, SDValue &Out) {
    1926             :   In = stripBitcast(In);
    1927         764 :   if (In.getOpcode() != ISD::TRUNCATE)
    1928             :     return false;
    1929             : 
    1930         274 :   SDValue Srl = In.getOperand(0);
    1931         274 :   if (Srl.getOpcode() == ISD::SRL) {
    1932             :     if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
    1933         342 :       if (ShiftAmt->getZExtValue() == 16) {
    1934         171 :         Out = stripBitcast(Srl.getOperand(0));
    1935         171 :         return true;
    1936             :       }
    1937             :     }
    1938             :   }
    1939             : 
    1940             :   return false;
    1941             : }
    1942             : 
    1943             : // Look through operations that obscure just looking at the low 16-bits of the
    1944             : // same register.
    1945         390 : static SDValue stripExtractLoElt(SDValue In) {
    1946         390 :   if (In.getOpcode() == ISD::TRUNCATE) {
    1947          28 :     SDValue Src = In.getOperand(0);
    1948          28 :     if (Src.getValueType().getSizeInBits() == 32)
    1949             :       return stripBitcast(Src);
    1950             :   }
    1951             : 
    1952         363 :   return In;
    1953             : }
    1954             : 
    1955         823 : bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
    1956             :                                          SDValue &SrcMods) const {
    1957             :   unsigned Mods = 0;
    1958         823 :   Src = In;
    1959             : 
    1960        1646 :   if (Src.getOpcode() == ISD::FNEG) {
    1961             :     Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
    1962          13 :     Src = Src.getOperand(0);
    1963             :   }
    1964             : 
    1965        1646 :   if (Src.getOpcode() == ISD::BUILD_VECTOR) {
    1966             :     unsigned VecMods = Mods;
    1967             : 
    1968         195 :     SDValue Lo = stripBitcast(Src.getOperand(0));
    1969         195 :     SDValue Hi = stripBitcast(Src.getOperand(1));
    1970             : 
    1971         195 :     if (Lo.getOpcode() == ISD::FNEG) {
    1972          11 :       Lo = stripBitcast(Lo.getOperand(0));
    1973          11 :       Mods ^= SISrcMods::NEG;
    1974             :     }
    1975             : 
    1976         195 :     if (Hi.getOpcode() == ISD::FNEG) {
    1977          11 :       Hi = stripBitcast(Hi.getOperand(0));
    1978          11 :       Mods ^= SISrcMods::NEG_HI;
    1979             :     }
    1980             : 
    1981         195 :     if (isExtractHiElt(Lo, Lo))
    1982          13 :       Mods |= SISrcMods::OP_SEL_0;
    1983             : 
    1984         195 :     if (isExtractHiElt(Hi, Hi))
    1985          25 :       Mods |= SISrcMods::OP_SEL_1;
    1986             : 
    1987         195 :     Lo = stripExtractLoElt(Lo);
    1988         195 :     Hi = stripExtractLoElt(Hi);
    1989             : 
    1990         148 :     if (Lo == Hi && !isInlineImmediate(Lo.getNode())) {
    1991             :       // Really a scalar input. Just select from the low half of the register to
    1992             :       // avoid packing.
    1993             : 
    1994          39 :       Src = Lo;
    1995         156 :       SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
    1996          39 :       return true;
    1997             :     }
    1998             : 
    1999             :     Mods = VecMods;
    2000             :   }
    2001             : 
    2002             :   // Packed instructions do not have abs modifiers.
    2003         784 :   Mods |= SISrcMods::OP_SEL_1;
    2004             : 
    2005        3136 :   SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
    2006         784 :   return true;
    2007             : }
    2008             : 
    2009         380 : bool AMDGPUDAGToDAGISel::SelectVOP3PMods0(SDValue In, SDValue &Src,
    2010             :                                           SDValue &SrcMods,
    2011             :                                           SDValue &Clamp) const {
    2012             :   SDLoc SL(In);
    2013             : 
    2014             :   // FIXME: Handle clamp and op_sel
    2015         760 :   Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
    2016             : 
    2017         760 :   return SelectVOP3PMods(In, Src, SrcMods);
    2018             : }
    2019             : 
    2020          48 : bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,
    2021             :                                          SDValue &SrcMods) const {
    2022          48 :   Src = In;
    2023             :   // FIXME: Handle op_sel
    2024         144 :   SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
    2025          48 :   return true;
    2026             : }
    2027             : 
    2028          16 : bool AMDGPUDAGToDAGISel::SelectVOP3OpSel0(SDValue In, SDValue &Src,
    2029             :                                           SDValue &SrcMods,
    2030             :                                           SDValue &Clamp) const {
    2031             :   SDLoc SL(In);
    2032             : 
    2033             :   // FIXME: Handle clamp
    2034          32 :   Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
    2035             : 
    2036          32 :   return SelectVOP3OpSel(In, Src, SrcMods);
    2037             : }
    2038             : 
    2039             : bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src,
    2040             :                                              SDValue &SrcMods) const {
    2041             :   // FIXME: Handle op_sel
    2042          27 :   return SelectVOP3Mods(In, Src, SrcMods);
    2043             : }
    2044             : 
    2045           9 : bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods0(SDValue In, SDValue &Src,
    2046             :                                               SDValue &SrcMods,
    2047             :                                               SDValue &Clamp) const {
    2048             :   SDLoc SL(In);
    2049             : 
    2050             :   // FIXME: Handle clamp
    2051          18 :   Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
    2052             : 
    2053           9 :   return SelectVOP3OpSelMods(In, Src, SrcMods);
    2054             : }
    2055             : 
    2056             : // The return value is not whether the match is possible (which it always is),
    2057             : // but whether or not it a conversion is really used.
    2058         603 : bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
    2059             :                                                    unsigned &Mods) const {
    2060         603 :   Mods = 0;
    2061         603 :   SelectVOP3ModsImpl(In, Src, Mods);
    2062             : 
    2063        1206 :   if (Src.getOpcode() == ISD::FP_EXTEND) {
    2064         333 :     Src = Src.getOperand(0);
    2065             :     assert(Src.getValueType() == MVT::f16);
    2066         333 :     Src = stripBitcast(Src);
    2067             : 
    2068             :     // Be careful about folding modifiers if we already have an abs. fneg is
    2069             :     // applied last, so we don't want to apply an earlier fneg.
    2070         333 :     if ((Mods & SISrcMods::ABS) == 0) {
    2071             :       unsigned ModsTmp;
    2072         327 :       SelectVOP3ModsImpl(Src, Src, ModsTmp);
    2073             : 
    2074         327 :       if ((ModsTmp & SISrcMods::NEG) != 0)
    2075           5 :         Mods ^= SISrcMods::NEG;
    2076             : 
    2077         327 :       if ((ModsTmp & SISrcMods::ABS) != 0)
    2078           6 :         Mods |= SISrcMods::ABS;
    2079             :     }
    2080             : 
    2081             :     // op_sel/op_sel_hi decide the source type and source.
    2082             :     // If the source's op_sel_hi is set, it indicates to do a conversion from fp16.
    2083             :     // If the sources's op_sel is set, it picks the high half of the source
    2084             :     // register.
    2085             : 
    2086         333 :     Mods |= SISrcMods::OP_SEL_1;
    2087         333 :     if (isExtractHiElt(Src, Src)) {
    2088         103 :       Mods |= SISrcMods::OP_SEL_0;
    2089             : 
    2090             :       // TODO: Should we try to look for neg/abs here?
    2091             :     }
    2092             : 
    2093             :     return true;
    2094             :   }
    2095             : 
    2096             :   return false;
    2097             : }
    2098             : 
    2099         108 : bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src,
    2100             :                                                SDValue &SrcMods) const {
    2101         108 :   unsigned Mods = 0;
    2102         108 :   SelectVOP3PMadMixModsImpl(In, Src, Mods);
    2103         432 :   SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
    2104         108 :   return true;
    2105             : }
    2106             : 
    2107             : // TODO: Can we identify things like v_mad_mixhi_f16?
    2108          46 : bool AMDGPUDAGToDAGISel::SelectHi16Elt(SDValue In, SDValue &Src) const {
    2109          46 :   if (In.isUndef()) {
    2110           3 :     Src = In;
    2111             :     return true;
    2112             :   }
    2113             : 
    2114             :   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) {
    2115             :     SDLoc SL(In);
    2116           3 :     SDValue K = CurDAG->getTargetConstant(C->getZExtValue() << 16, SL, MVT::i32);
    2117           2 :     MachineSDNode *MovK = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
    2118           1 :                                                  SL, MVT::i32, K);
    2119           1 :     Src = SDValue(MovK, 0);
    2120             :     return true;
    2121             :   }
    2122             : 
    2123             :   if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) {
    2124             :     SDLoc SL(In);
    2125           1 :     SDValue K = CurDAG->getTargetConstant(
    2126           5 :       C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
    2127           2 :     MachineSDNode *MovK = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
    2128           1 :                                                  SL, MVT::i32, K);
    2129           1 :     Src = SDValue(MovK, 0);
    2130             :     return true;
    2131             :   }
    2132             : 
    2133          41 :   return isExtractHiElt(In, Src);
    2134             : }
    2135             : 
    2136       22404 : void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
    2137             :   const AMDGPUTargetLowering& Lowering =
    2138       22404 :     *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
    2139             :   bool IsModified = false;
    2140       24636 :   do {
    2141             :     IsModified = false;
    2142             : 
    2143             :     // Go over all selected nodes and try to fold them a bit more
    2144       24636 :     SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_begin();
    2145     1962212 :     while (Position != CurDAG->allnodes_end()) {
    2146             :       SDNode *Node = &*Position++;
    2147             :       MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Node);
    2148      431351 :       if (!MachineNode)
    2149      431351 :         continue;
    2150             : 
    2151      525119 :       SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
    2152      525119 :       if (ResNode != Node) {
    2153       34617 :         if (ResNode)
    2154       34539 :           ReplaceUses(Node, ResNode);
    2155             :         IsModified = true;
    2156             :       }
    2157             :     }
    2158       24636 :     CurDAG->RemoveDeadNodes();
    2159             :   } while (IsModified);
    2160       22404 : }
    2161             : 
    2162       83303 : void R600DAGToDAGISel::Select(SDNode *N) {
    2163       83303 :   unsigned int Opc = N->getOpcode();
    2164       83303 :   if (N->isMachineOpcode()) {
    2165             :     N->setNodeId(-1);
    2166             :     return;   // Already selected.
    2167             :   }
    2168             : 
    2169       83303 :   switch (Opc) {
    2170             :   default: break;
    2171        2430 :   case AMDGPUISD::BUILD_VERTICAL_VECTOR:
    2172             :   case ISD::SCALAR_TO_VECTOR:
    2173             :   case ISD::BUILD_VECTOR: {
    2174        4860 :     EVT VT = N->getValueType(0);
    2175        2430 :     unsigned NumVectorElts = VT.getVectorNumElements();
    2176             :     unsigned RegClassID;
    2177             :     // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
    2178             :     // that adds a 128 bits reg copy when going through TwoAddressInstructions
    2179             :     // pass. We want to avoid 128 bits copies as much as possible because they
    2180             :     // can't be bundled by our scheduler.
    2181        2430 :     switch(NumVectorElts) {
    2182             :     case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
    2183        2040 :     case 4:
    2184        2040 :       if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
    2185             :         RegClassID = AMDGPU::R600_Reg128VerticalRegClassID;
    2186             :       else
    2187             :         RegClassID = AMDGPU::R600_Reg128RegClassID;
    2188             :       break;
    2189           0 :     default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
    2190             :     }
    2191        2430 :     SelectBuildVector(N, RegClassID);
    2192             :     return;
    2193             :   }
    2194             :   }
    2195             : 
    2196             :   SelectCode(N);
    2197             : }
    2198             : 
    2199        2065 : bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
    2200             :                                           SDValue &Offset) {
    2201             :   ConstantSDNode *C;
    2202             :   SDLoc DL(Addr);
    2203             : 
    2204             :   if ((C = dyn_cast<ConstantSDNode>(Addr))) {
    2205           0 :     Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
    2206           0 :     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
    2207        2065 :   } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
    2208             :              (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
    2209        3588 :     Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
    2210        3588 :     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
    2211         542 :   } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
    2212             :             (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
    2213           0 :     Base = Addr.getOperand(0);
    2214           0 :     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
    2215             :   } else {
    2216         271 :     Base = Addr;
    2217         542 :     Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
    2218             :   }
    2219             : 
    2220        2065 :   return true;
    2221             : }
    2222             : 
    2223        1561 : bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
    2224             :                                           SDValue &Offset) {
    2225             :   ConstantSDNode *IMMOffset;
    2226             : 
    2227             :   if (Addr.getOpcode() == ISD::ADD
    2228             :       && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
    2229        2683 :       && isInt<16>(IMMOffset->getZExtValue())) {
    2230             : 
    2231         373 :       Base = Addr.getOperand(0);
    2232        1119 :       Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
    2233         373 :                                          MVT::i32);
    2234         373 :       return true;
    2235             :   // If the pointer address is constant, we can move it to the offset field.
    2236             :   } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
    2237         711 :              && isInt<16>(IMMOffset->getZExtValue())) {
    2238         711 :     Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
    2239         237 :                                   SDLoc(CurDAG->getEntryNode()),
    2240         237 :                                   AMDGPU::ZERO, MVT::i32);
    2241         711 :     Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
    2242         237 :                                        MVT::i32);
    2243         237 :     return true;
    2244             :   }
    2245             : 
    2246             :   // Default case, no offset
    2247         951 :   Base = Addr;
    2248        2853 :   Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
    2249         951 :   return true;
    2250             : }

Generated by: LCOV version 1.13