LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - AMDGPUISelDAGToDAG.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 831 862 96.4 %
Date: 2018-07-13 00:08:38 Functions: 77 82 93.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //==-----------------------------------------------------------------------===//
       9             : //
      10             : /// \file
      11             : /// Defines an instruction selector for the AMDGPU target.
      12             : //
      13             : //===----------------------------------------------------------------------===//
      14             : 
      15             : #include "AMDGPU.h"
      16             : #include "AMDGPUArgumentUsageInfo.h"
      17             : #include "AMDGPUISelLowering.h" // For AMDGPUISD
      18             : #include "AMDGPUInstrInfo.h"
      19             : #include "AMDGPUPerfHintAnalysis.h"
      20             : #include "AMDGPURegisterInfo.h"
      21             : #include "AMDGPUSubtarget.h"
      22             : #include "AMDGPUTargetMachine.h"
      23             : #include "SIDefines.h"
      24             : #include "SIISelLowering.h"
      25             : #include "SIInstrInfo.h"
      26             : #include "SIMachineFunctionInfo.h"
      27             : #include "SIRegisterInfo.h"
      28             : #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
      29             : #include "llvm/ADT/APInt.h"
      30             : #include "llvm/ADT/SmallVector.h"
      31             : #include "llvm/ADT/StringRef.h"
      32             : #include "llvm/Analysis/DivergenceAnalysis.h"
      33             : #include "llvm/Analysis/ValueTracking.h"
      34             : #include "llvm/CodeGen/FunctionLoweringInfo.h"
      35             : #include "llvm/CodeGen/ISDOpcodes.h"
      36             : #include "llvm/CodeGen/MachineFunction.h"
      37             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      38             : #include "llvm/CodeGen/SelectionDAG.h"
      39             : #include "llvm/CodeGen/SelectionDAGISel.h"
      40             : #include "llvm/CodeGen/SelectionDAGNodes.h"
      41             : #include "llvm/CodeGen/ValueTypes.h"
      42             : #include "llvm/IR/BasicBlock.h"
      43             : #include "llvm/IR/Instruction.h"
      44             : #include "llvm/MC/MCInstrDesc.h"
      45             : #include "llvm/Support/Casting.h"
      46             : #include "llvm/Support/CodeGen.h"
      47             : #include "llvm/Support/ErrorHandling.h"
      48             : #include "llvm/Support/MachineValueType.h"
      49             : #include "llvm/Support/MathExtras.h"
      50             : #include <cassert>
      51             : #include <cstdint>
      52             : #include <new>
      53             : #include <vector>
      54             : 
      55             : using namespace llvm;
      56             : 
      57             : namespace llvm {
      58             : 
      59             : class R600InstrInfo;
      60             : 
      61             : } // end namespace llvm
      62             : 
      63             : //===----------------------------------------------------------------------===//
      64             : // Instruction Selector Implementation
      65             : //===----------------------------------------------------------------------===//
      66             : 
      67             : namespace {
      68             : 
      69             : /// AMDGPU specific code to select AMDGPU machine instructions for
      70             : /// SelectionDAG operations.
      71             : class AMDGPUDAGToDAGISel : public SelectionDAGISel {
      72             :   // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
      73             :   // make the right decision when generating code for different targets.
      74             :   const AMDGPUSubtarget *Subtarget;
      75             :   AMDGPUAS AMDGPUASI;
      76             :   bool EnableLateStructurizeCFG;
      77             : 
      78             : public:
      79        2066 :   explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr,
      80             :                               CodeGenOpt::Level OptLevel = CodeGenOpt::Default)
      81        2066 :     : SelectionDAGISel(*TM, OptLevel) {
      82        2066 :     AMDGPUASI = AMDGPU::getAMDGPUAS(*TM);
      83        2066 :     EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
      84        2066 :   }
      85        2057 :   ~AMDGPUDAGToDAGISel() override = default;
      86             : 
      87        2056 :   void getAnalysisUsage(AnalysisUsage &AU) const override {
      88             :     AU.addRequired<AMDGPUArgumentUsageInfo>();
      89             :     AU.addRequired<AMDGPUPerfHintAnalysis>();
      90             :     AU.addRequired<DivergenceAnalysis>();
      91        2056 :     SelectionDAGISel::getAnalysisUsage(AU);
      92        2056 :   }
      93             : 
      94             :   bool runOnMachineFunction(MachineFunction &MF) override;
      95             :   void Select(SDNode *N) override;
      96             :   StringRef getPassName() const override;
      97             :   void PostprocessISelDAG() override;
      98             : 
      99             : protected:
     100             :   void SelectBuildVector(SDNode *N, unsigned RegClassID);
     101             : 
     102             : private:
     103             :   std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
     104             :   bool isNoNanSrc(SDValue N) const;
     105             :   bool isInlineImmediate(const SDNode *N) const;
     106             : 
     107             :   bool isUniformBr(const SDNode *N) const;
     108             : 
     109             :   SDNode *glueCopyToM0(SDNode *N) const;
     110             : 
     111             :   const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
     112             :   virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
     113             :   virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
     114             :   bool isDSOffsetLegal(const SDValue &Base, unsigned Offset,
     115             :                        unsigned OffsetBits) const;
     116             :   bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
     117             :   bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
     118             :                                  SDValue &Offset1) const;
     119             :   bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
     120             :                    SDValue &SOffset, SDValue &Offset, SDValue &Offen,
     121             :                    SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
     122             :                    SDValue &TFE) const;
     123             :   bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
     124             :                          SDValue &SOffset, SDValue &Offset, SDValue &GLC,
     125             :                          SDValue &SLC, SDValue &TFE) const;
     126             :   bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
     127             :                          SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
     128             :                          SDValue &SLC) const;
     129             :   bool SelectMUBUFScratchOffen(SDNode *Parent,
     130             :                                SDValue Addr, SDValue &RSrc, SDValue &VAddr,
     131             :                                SDValue &SOffset, SDValue &ImmOffset) const;
     132             :   bool SelectMUBUFScratchOffset(SDNode *Parent,
     133             :                                 SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
     134             :                                 SDValue &Offset) const;
     135             : 
     136             :   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
     137             :                          SDValue &Offset, SDValue &GLC, SDValue &SLC,
     138             :                          SDValue &TFE) const;
     139             :   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
     140             :                          SDValue &Offset, SDValue &SLC) const;
     141             :   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
     142             :                          SDValue &Offset) const;
     143             :   bool SelectMUBUFConstant(SDValue Constant,
     144             :                            SDValue &SOffset,
     145             :                            SDValue &ImmOffset) const;
     146             :   bool SelectMUBUFIntrinsicOffset(SDValue Offset, SDValue &SOffset,
     147             :                                   SDValue &ImmOffset) const;
     148             :   bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset,
     149             :                                    SDValue &ImmOffset, SDValue &VOffset) const;
     150             : 
     151             :   bool SelectFlatAtomic(SDValue Addr, SDValue &VAddr,
     152             :                         SDValue &Offset, SDValue &SLC) const;
     153             :   bool SelectFlatAtomicSigned(SDValue Addr, SDValue &VAddr,
     154             :                               SDValue &Offset, SDValue &SLC) const;
     155             : 
     156             :   template <bool IsSigned>
     157             :   bool SelectFlatOffset(SDValue Addr, SDValue &VAddr,
     158             :                         SDValue &Offset, SDValue &SLC) const;
     159             : 
     160             :   bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
     161             :                         bool &Imm) const;
     162             :   SDValue Expand32BitAddress(SDValue Addr) const;
     163             :   bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
     164             :                   bool &Imm) const;
     165             :   bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
     166             :   bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
     167             :   bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
     168             :   bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
     169             :   bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
     170             :   bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
     171             : 
     172             :   bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
     173             :   bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods) const;
     174             :   bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
     175             :   bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
     176             :   bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
     177             :                        SDValue &Clamp, SDValue &Omod) const;
     178             :   bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
     179             :                          SDValue &Clamp, SDValue &Omod) const;
     180             : 
     181             :   bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods,
     182             :                                  SDValue &Clamp,
     183             :                                  SDValue &Omod) const;
     184             : 
     185             :   bool SelectVOP3OMods(SDValue In, SDValue &Src,
     186             :                        SDValue &Clamp, SDValue &Omod) const;
     187             : 
     188             :   bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
     189             :   bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
     190             :                         SDValue &Clamp) const;
     191             : 
     192             :   bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
     193             :   bool SelectVOP3OpSel0(SDValue In, SDValue &Src, SDValue &SrcMods,
     194             :                         SDValue &Clamp) const;
     195             : 
     196             :   bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
     197             :   bool SelectVOP3OpSelMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
     198             :                             SDValue &Clamp) const;
     199             :   bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const;
     200             :   bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
     201             : 
     202             :   bool SelectHi16Elt(SDValue In, SDValue &Src) const;
     203             : 
     204             :   void SelectADD_SUB_I64(SDNode *N);
     205             :   void SelectUADDO_USUBO(SDNode *N);
     206             :   void SelectDIV_SCALE(SDNode *N);
     207             :   void SelectMAD_64_32(SDNode *N);
     208             :   void SelectFMA_W_CHAIN(SDNode *N);
     209             :   void SelectFMUL_W_CHAIN(SDNode *N);
     210             : 
     211             :   SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val,
     212             :                    uint32_t Offset, uint32_t Width);
     213             :   void SelectS_BFEFromShifts(SDNode *N);
     214             :   void SelectS_BFE(SDNode *N);
     215             :   bool isCBranchSCC(const SDNode *N) const;
     216             :   void SelectBRCOND(SDNode *N);
     217             :   void SelectFMAD_FMA(SDNode *N);
     218             :   void SelectATOMIC_CMP_SWAP(SDNode *N);
     219             : 
     220             : protected:
     221             :   // Include the pieces autogenerated from the target description.
     222             : #include "AMDGPUGenDAGISel.inc"
     223             : };
     224             : 
     225         554 : class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
     226             :   const R600Subtarget *Subtarget;
     227             :   AMDGPUAS AMDGPUASI;
     228             : 
     229             :   bool isConstantLoad(const MemSDNode *N, int cbID) const;
     230             :   bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
     231             :   bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
     232             :                                        SDValue& Offset);
     233             : public:
     234         278 :   explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) :
     235         278 :       AMDGPUDAGToDAGISel(TM, OptLevel) {
     236         278 :     AMDGPUASI = AMDGPU::getAMDGPUAS(*TM);
     237             :       }
     238             : 
     239             :   void Select(SDNode *N) override;
     240             : 
     241             :   bool SelectADDRIndirect(SDValue Addr, SDValue &Base,
     242             :                           SDValue &Offset) override;
     243             :   bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
     244             :                           SDValue &Offset) override;
     245             : 
     246             :   bool runOnMachineFunction(MachineFunction &MF) override;
     247             : protected:
     248             :   // Include the pieces autogenerated from the target description.
     249             : #include "R600GenDAGISel.inc"
     250             : };
     251             : 
     252             : }  // end anonymous namespace
     253             : 
     254       73254 : INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "isel",
     255             :                       "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
     256       73254 : INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo)
     257       73254 : INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysis)
     258       73254 : INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis)
     259      342570 : INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "isel",
     260             :                     "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
     261             : 
     262             : /// This pass converts a legalized DAG into a AMDGPU-specific
     263             : // DAG, ready for instruction scheduling.
     264        1788 : FunctionPass *llvm::createAMDGPUISelDag(TargetMachine *TM,
     265             :                                         CodeGenOpt::Level OptLevel) {
     266        1788 :   return new AMDGPUDAGToDAGISel(TM, OptLevel);
     267             : }
     268             : 
     269             : /// This pass converts a legalized DAG into a R600-specific
     270             : // DAG, ready for instruction scheduling.
     271         278 : FunctionPass *llvm::createR600ISelDag(TargetMachine *TM,
     272             :                                       CodeGenOpt::Level OptLevel) {
     273         556 :   return new R600DAGToDAGISel(TM, OptLevel);
     274             : }
     275             : 
     276       17857 : bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
     277       17857 :   Subtarget = &MF.getSubtarget<AMDGPUSubtarget>();
     278       17857 :   return SelectionDAGISel::runOnMachineFunction(MF);
     279             : }
     280             : 
     281         492 : bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const {
     282         492 :   if (TM.Options.NoNaNsFPMath)
     283             :     return true;
     284             : 
     285             :   // TODO: Move into isKnownNeverNaN
     286         114 :   if (N->getFlags().isDefined())
     287         102 :     return N->getFlags().hasNoNaNs();
     288             : 
     289          12 :   return CurDAG->isKnownNeverNaN(N);
     290             : }
     291             : 
     292        5357 : bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const {
     293        5357 :   const SIInstrInfo *TII = Subtarget->getInstrInfo();
     294             : 
     295             :   if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
     296        9628 :     return TII->isInlineConstant(C->getAPIntValue());
     297             : 
     298             :   if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
     299        1010 :     return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
     300             : 
     301             :   return false;
     302             : }
     303             : 
     304             : /// Determine the register class for \p OpNo
     305             : /// \returns The register class of the virtual register that will be used for
     306             : /// the given operand number \OpNo or NULL if the register class cannot be
     307             : /// determined.
     308       23685 : const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
     309             :                                                           unsigned OpNo) const {
     310       23685 :   if (!N->isMachineOpcode()) {
     311         645 :     if (N->getOpcode() == ISD::CopyToReg) {
     312        1290 :       unsigned Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
     313         645 :       if (TargetRegisterInfo::isVirtualRegister(Reg)) {
     314         325 :         MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo();
     315             :         return MRI.getRegClass(Reg);
     316             :       }
     317             : 
     318             :       const SIRegisterInfo *TRI
     319         320 :         = static_cast<const SISubtarget *>(Subtarget)->getRegisterInfo();
     320         320 :       return TRI->getPhysRegClass(Reg);
     321             :     }
     322             : 
     323             :     return nullptr;
     324             :   }
     325             : 
     326       23040 :   switch (N->getMachineOpcode()) {
     327       21644 :   default: {
     328             :     const MCInstrDesc &Desc =
     329       43288 :         Subtarget->getInstrInfo()->get(N->getMachineOpcode());
     330       43288 :     unsigned OpIdx = Desc.getNumDefs() + OpNo;
     331       43288 :     if (OpIdx >= Desc.getNumOperands())
     332             :       return nullptr;
     333       21644 :     int RegClass = Desc.OpInfo[OpIdx].RegClass;
     334       21644 :     if (RegClass == -1)
     335             :       return nullptr;
     336             : 
     337       21629 :     return Subtarget->getRegisterInfo()->getRegClass(RegClass);
     338             :   }
     339        1396 :   case AMDGPU::REG_SEQUENCE: {
     340        2792 :     unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
     341             :     const TargetRegisterClass *SuperRC =
     342        1396 :         Subtarget->getRegisterInfo()->getRegClass(RCID);
     343             : 
     344        2792 :     SDValue SubRegOp = N->getOperand(OpNo + 1);
     345        2792 :     unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
     346        1396 :     return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
     347        1396 :                                                               SubRegIdx);
     348             :   }
     349             :   }
     350             : }
     351             : 
     352       74313 : SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
     353       85647 :   if (cast<MemSDNode>(N)->getAddressSpace() != AMDGPUASI.LOCAL_ADDRESS ||
     354       11334 :       !Subtarget->ldsRequiresM0Init())
     355             :     return N;
     356             : 
     357             :   const SITargetLowering& Lowering =
     358        8409 :       *static_cast<const SITargetLowering*>(getTargetLowering());
     359             : 
     360             :   // Write max value to m0 before each load operation
     361             : 
     362       16818 :   SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N),
     363       25227 :                                  CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
     364             : 
     365        8409 :   SDValue Glue = M0.getValue(1);
     366             : 
     367             :   SmallVector <SDValue, 8> Ops;
     368       76282 :   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
     369       59464 :      Ops.push_back(N->getOperand(i));
     370             :   }
     371        8409 :   Ops.push_back(Glue);
     372       33636 :   return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
     373             : }
     374             : 
     375             : static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
     376       24596 :   switch (NumVectorElts) {
     377             :   case 1:
     378             :     return AMDGPU::SReg_32_XM0RegClassID;
     379       15955 :   case 2:
     380             :     return AMDGPU::SReg_64RegClassID;
     381        7938 :   case 4:
     382             :     return AMDGPU::SReg_128RegClassID;
     383         662 :   case 8:
     384             :     return AMDGPU::SReg_256RegClassID;
     385          41 :   case 16:
     386             :     return AMDGPU::SReg_512RegClassID;
     387             :   }
     388             : 
     389           0 :   llvm_unreachable("invalid vector size");
     390             : }
     391             : 
     392         734 : static bool getConstantValue(SDValue N, uint32_t &Out) {
     393             :   if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
     394         302 :     Out = C->getAPIntValue().getZExtValue();
     395             :     return true;
     396             :   }
     397             : 
     398             :   if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
     399        1012 :     Out = C->getValueAPF().bitcastToAPInt().getZExtValue();
     400             :     return true;
     401             :   }
     402             : 
     403             :   return false;
     404             : }
     405             : 
     406       27026 : void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
     407       54052 :   EVT VT = N->getValueType(0);
     408       27026 :   unsigned NumVectorElts = VT.getVectorNumElements();
     409       27026 :   EVT EltVT = VT.getVectorElementType();
     410             :   SDLoc DL(N);
     411       54052 :   SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
     412             : 
     413       27026 :   if (NumVectorElts == 1) {
     414           0 :     CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
     415             :                          RegClass);
     416             :     return;
     417             :   }
     418             : 
     419             :   assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
     420             :                                   "supported yet");
     421             :   // 16 = Max Num Vector Elements
     422             :   // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
     423             :   // 1 = Vector Register Class
     424       54052 :   SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
     425             : 
     426       54052 :   RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
     427             :   bool IsRegSeq = true;
     428       27026 :   unsigned NOps = N->getNumOperands();
     429      184126 :   for (unsigned i = 0; i < NOps; i++) {
     430             :     // XXX: Why is this here?
     431      157100 :     if (isa<RegisterSDNode>(N->getOperand(i))) {
     432             :       IsRegSeq = false;
     433             :       break;
     434             :     }
     435       78550 :     unsigned Sub = AMDGPURegisterInfo::getSubRegFromChannel(i);
     436      235650 :     RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
     437      235650 :     RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32);
     438             :   }
     439       27026 :   if (NOps != NumVectorElts) {
     440             :     // Fill in the missing undef elements if this was a scalar_to_vector.
     441             :     assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
     442           4 :     MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
     443           4 :                                                    DL, EltVT);
     444          12 :     for (unsigned i = NOps; i < NumVectorElts; ++i) {
     445           4 :       unsigned Sub = AMDGPURegisterInfo::getSubRegFromChannel(i);
     446           8 :       RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
     447           8 :       RegSeqArgs[1 + (2 * i) + 1] =
     448           4 :           CurDAG->getTargetConstant(Sub, DL, MVT::i32);
     449             :     }
     450             :   }
     451             : 
     452       27026 :   if (!IsRegSeq)
     453             :     SelectCode(N);
     454       81078 :   CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
     455             : }
     456             : 
     457      517901 : void AMDGPUDAGToDAGISel::Select(SDNode *N) {
     458      517901 :   unsigned int Opc = N->getOpcode();
     459      517901 :   if (N->isMachineOpcode()) {
     460             :     N->setNodeId(-1);
     461             :     return;   // Already selected.
     462             :   }
     463             : 
     464     1031723 :   if (isa<AtomicSDNode>(N) ||
     465             :       (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC ||
     466             :        Opc == AMDGPUISD::ATOMIC_LOAD_FADD ||
     467      515034 :        Opc == AMDGPUISD::ATOMIC_LOAD_FMIN ||
     468             :        Opc == AMDGPUISD::ATOMIC_LOAD_FMAX))
     469        1900 :     N = glueCopyToM0(N);
     470             : 
     471      516689 :   switch (Opc) {
     472             :   default:
     473             :     break;
     474             :   // We are selecting i64 ADD here instead of custom lower it during
     475             :   // DAG legalization, so we can fold some i64 ADDs used for address
     476             :   // calculation into the LOAD and STORE instructions.
     477             :   case ISD::ADDC:
     478             :   case ISD::ADDE:
     479             :   case ISD::SUBC:
     480             :   case ISD::SUBE: {
     481         240 :     if (N->getValueType(0) != MVT::i64)
     482             :       break;
     483             : 
     484         126 :     SelectADD_SUB_I64(N);
     485         126 :     return;
     486             :   }
     487         203 :   case ISD::UADDO:
     488             :   case ISD::USUBO: {
     489         203 :     SelectUADDO_USUBO(N);
     490         203 :     return;
     491             :   }
     492          45 :   case AMDGPUISD::FMUL_W_CHAIN: {
     493          45 :     SelectFMUL_W_CHAIN(N);
     494          45 :     return;
     495             :   }
     496         225 :   case AMDGPUISD::FMA_W_CHAIN: {
     497         225 :     SelectFMA_W_CHAIN(N);
     498         225 :     return;
     499             :   }
     500             : 
     501       25117 :   case ISD::SCALAR_TO_VECTOR:
     502             :   case ISD::BUILD_VECTOR: {
     503       50234 :     EVT VT = N->getValueType(0);
     504       25117 :     unsigned NumVectorElts = VT.getVectorNumElements();
     505       25117 :     if (VT.getScalarSizeInBits() == 16) {
     506         521 :       if (Opc == ISD::BUILD_VECTOR && NumVectorElts == 2) {
     507             :         uint32_t LHSVal, RHSVal;
     508         734 :         if (getConstantValue(N->getOperand(0), LHSVal) &&
     509         215 :             getConstantValue(N->getOperand(1), RHSVal)) {
     510         189 :           uint32_t K = LHSVal | (RHSVal << 16);
     511         567 :           CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, VT,
     512         189 :                                CurDAG->getTargetConstant(K, SDLoc(N), MVT::i32));
     513         189 :           return;
     514             :         }
     515             :       }
     516             : 
     517         332 :       break;
     518             :     }
     519             : 
     520             :     assert(VT.getVectorElementType().bitsEq(MVT::i32));
     521             :     unsigned RegClassID = selectSGPRVectorRegClassID(NumVectorElts);
     522       24596 :     SelectBuildVector(N, RegClassID);
     523       24596 :     return;
     524             :   }
     525        9063 :   case ISD::BUILD_PAIR: {
     526             :     SDValue RC, SubReg0, SubReg1;
     527             :     SDLoc DL(N);
     528        9063 :     if (N->getValueType(0) == MVT::i128) {
     529           0 :       RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32);
     530           0 :       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
     531           0 :       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
     532        9063 :     } else if (N->getValueType(0) == MVT::i64) {
     533        9063 :       RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
     534        9063 :       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
     535        9063 :       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
     536             :     } else {
     537           0 :       llvm_unreachable("Unhandled value type for BUILD_PAIR");
     538             :     }
     539        9063 :     const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
     540       18126 :                             N->getOperand(1), SubReg1 };
     541       27189 :     ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
     542             :                                           N->getValueType(0), Ops));
     543             :     return;
     544             :   }
     545             : 
     546       24968 :   case ISD::Constant:
     547             :   case ISD::ConstantFP: {
     548       73695 :     if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
     549             :       break;
     550             : 
     551             :     uint64_t Imm;
     552             :     if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
     553         219 :       Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
     554             :     else {
     555             :       ConstantSDNode *C = cast<ConstantSDNode>(N);
     556        1136 :       Imm = C->getZExtValue();
     557             :     }
     558             : 
     559             :     SDLoc DL(N);
     560        3627 :     SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
     561             :                                 CurDAG->getConstant(Imm & 0xFFFFFFFF, DL,
     562        1209 :                                                     MVT::i32));
     563        3627 :     SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
     564        1209 :                                 CurDAG->getConstant(Imm >> 32, DL, MVT::i32));
     565             :     const SDValue Ops[] = {
     566        1209 :       CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
     567        1209 :       SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
     568        1209 :       SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
     569        3627 :     };
     570             : 
     571        3627 :     ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
     572             :                                           N->getValueType(0), Ops));
     573             :     return;
     574             :   }
     575       72413 :   case ISD::LOAD:
     576             :   case ISD::STORE:
     577             :   case ISD::ATOMIC_LOAD:
     578             :   case ISD::ATOMIC_STORE: {
     579       72413 :     N = glueCopyToM0(N);
     580       72413 :     break;
     581             :   }
     582             : 
     583         148 :   case AMDGPUISD::BFE_I32:
     584             :   case AMDGPUISD::BFE_U32: {
     585             :     // There is a scalar version available, but unlike the vector version which
     586             :     // has a separate operand for the offset and width, the scalar version packs
     587             :     // the width and offset into a single operand. Try to move to the scalar
     588             :     // version if the offsets are constant, so that we can try to keep extended
     589             :     // loads of kernel arguments in SGPRs.
     590             : 
     591             :     // TODO: Technically we could try to pattern match scalar bitshifts of
     592             :     // dynamic values, but it's probably not useful.
     593         148 :     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
     594             :     if (!Offset)
     595             :       break;
     596             : 
     597             :     ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
     598             :     if (!Width)
     599             :       break;
     600             : 
     601             :     bool Signed = Opc == AMDGPUISD::BFE_I32;
     602             : 
     603         264 :     uint32_t OffsetVal = Offset->getZExtValue();
     604         264 :     uint32_t WidthVal = Width->getZExtValue();
     605             : 
     606         132 :     ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
     607         132 :                             SDLoc(N), N->getOperand(0), OffsetVal, WidthVal));
     608         132 :     return;
     609             :   }
     610         267 :   case AMDGPUISD::DIV_SCALE: {
     611         267 :     SelectDIV_SCALE(N);
     612         267 :     return;
     613             :   }
     614          20 :   case AMDGPUISD::MAD_I64_I32:
     615             :   case AMDGPUISD::MAD_U64_U32: {
     616          20 :     SelectMAD_64_32(N);
     617          20 :     return;
     618             :   }
     619       12783 :   case ISD::CopyToReg: {
     620             :     const SITargetLowering& Lowering =
     621       12783 :       *static_cast<const SITargetLowering*>(getTargetLowering());
     622       12783 :     N = Lowering.legalizeTargetIndependentNode(N, *CurDAG);
     623       12783 :     break;
     624             :   }
     625             :   case ISD::AND:
     626             :   case ISD::SRL:
     627             :   case ISD::SRA:
     628             :   case ISD::SIGN_EXTEND_INREG:
     629       25740 :     if (N->getValueType(0) != MVT::i32)
     630             :       break;
     631             : 
     632       19226 :     SelectS_BFE(N);
     633       19226 :     return;
     634         580 :   case ISD::BRCOND:
     635         580 :     SelectBRCOND(N);
     636         580 :     return;
     637        2572 :   case ISD::FMAD:
     638             :   case ISD::FMA:
     639        2572 :     SelectFMAD_FMA(N);
     640        2572 :     return;
     641         195 :   case AMDGPUISD::ATOMIC_CMP_SWAP:
     642         195 :     SelectATOMIC_CMP_SWAP(N);
     643         195 :     return;
     644             :   }
     645             : 
     646             :   SelectCode(N);
     647             : }
     648             : 
     649         373 : bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
     650         373 :   const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
     651         373 :   const Instruction *Term = BB->getTerminator();
     652         372 :   return Term->getMetadata("amdgpu.uniform") ||
     653         373 :          Term->getMetadata("structurizecfg.uniform");
     654             : }
     655             : 
     656           1 : StringRef AMDGPUDAGToDAGISel::getPassName() const {
     657           1 :   return "AMDGPU DAG->DAG Pattern Instruction Selection";
     658             : }
     659             : 
     660             : //===----------------------------------------------------------------------===//
     661             : // Complex Patterns
     662             : //===----------------------------------------------------------------------===//
     663             : 
     664           0 : bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
     665             :                                             SDValue &Offset) {
     666           0 :   return false;
     667             : }
     668             : 
     669           0 : bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
     670             :                                             SDValue &Offset) {
     671             :   ConstantSDNode *C;
     672             :   SDLoc DL(Addr);
     673             : 
     674             :   if ((C = dyn_cast<ConstantSDNode>(Addr))) {
     675           0 :     Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
     676           0 :     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
     677           0 :   } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
     678             :              (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
     679           0 :     Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
     680           0 :     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
     681           0 :   } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
     682             :             (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
     683           0 :     Base = Addr.getOperand(0);
     684           0 :     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
     685             :   } else {
     686           0 :     Base = Addr;
     687           0 :     Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
     688             :   }
     689             : 
     690           0 :   return true;
     691             : }
     692             : 
     693             : // FIXME: Should only handle addcarry/subcarry
     694         126 : void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
     695             :   SDLoc DL(N);
     696         126 :   SDValue LHS = N->getOperand(0);
     697         126 :   SDValue RHS = N->getOperand(1);
     698             : 
     699         126 :   unsigned Opcode = N->getOpcode();
     700         126 :   bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE);
     701             :   bool ProduceCarry =
     702         126 :       ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC;
     703         126 :   bool IsAdd = Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE;
     704             : 
     705         252 :   SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
     706         252 :   SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
     707             : 
     708         252 :   SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
     709         126 :                                        DL, MVT::i32, LHS, Sub0);
     710         252 :   SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
     711         126 :                                        DL, MVT::i32, LHS, Sub1);
     712             : 
     713         252 :   SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
     714         126 :                                        DL, MVT::i32, RHS, Sub0);
     715         252 :   SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
     716         126 :                                        DL, MVT::i32, RHS, Sub1);
     717             : 
     718         252 :   SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
     719             : 
     720         126 :   unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
     721         126 :   unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
     722             : 
     723             :   SDNode *AddLo;
     724         126 :   if (!ConsumeCarry) {
     725             :     SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
     726         240 :     AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args);
     727             :   } else {
     728           6 :     SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) };
     729          12 :     AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args);
     730             :   }
     731             :   SDValue AddHiArgs[] = {
     732             :     SDValue(Hi0, 0),
     733             :     SDValue(Hi1, 0),
     734             :     SDValue(AddLo, 1)
     735             :   };
     736         252 :   SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs);
     737             : 
     738             :   SDValue RegSequenceArgs[] = {
     739         126 :     CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
     740             :     SDValue(AddLo,0),
     741             :     Sub0,
     742             :     SDValue(AddHi,0),
     743             :     Sub1,
     744         378 :   };
     745         252 :   SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
     746         126 :                                                MVT::i64, RegSequenceArgs);
     747             : 
     748         126 :   if (ProduceCarry) {
     749             :     // Replace the carry-use
     750         126 :     ReplaceUses(SDValue(N, 1), SDValue(AddHi, 1));
     751             :   }
     752             : 
     753             :   // Replace the remaining uses.
     754         126 :   ReplaceNode(N, RegSequence);
     755         126 : }
     756             : 
     757         203 : void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
     758             :   // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
     759             :   // carry out despite the _i32 name. These were renamed in VI to _U32.
     760             :   // FIXME: We should probably rename the opcodes here.
     761         203 :   unsigned Opc = N->getOpcode() == ISD::UADDO ?
     762             :     AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
     763             : 
     764         812 :   CurDAG->SelectNodeTo(N, Opc, N->getVTList(),
     765         203 :                        { N->getOperand(0), N->getOperand(1) });
     766         203 : }
     767             : 
     768         225 : void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
     769             :   SDLoc SL(N);
     770             :   //  src0_modifiers, src0,  src1_modifiers, src1, src2_modifiers, src2, clamp, omod
     771         225 :   SDValue Ops[10];
     772             : 
     773         225 :   SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
     774         225 :   SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
     775         225 :   SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]);
     776         225 :   Ops[8] = N->getOperand(0);
     777         225 :   Ops[9] = N->getOperand(4);
     778             : 
     779         675 :   CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32, N->getVTList(), Ops);
     780         225 : }
     781             : 
     782          45 : void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {
     783             :   SDLoc SL(N);
     784             :   //    src0_modifiers, src0,  src1_modifiers, src1, clamp, omod
     785          45 :   SDValue Ops[8];
     786             : 
     787          45 :   SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
     788          45 :   SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
     789          45 :   Ops[6] = N->getOperand(0);
     790          45 :   Ops[7] = N->getOperand(3);
     791             : 
     792         135 :   CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops);
     793          45 : }
     794             : 
     795             : // We need to handle this here because tablegen doesn't support matching
     796             : // instructions with multiple outputs.
     797         267 : void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
     798             :   SDLoc SL(N);
     799         267 :   EVT VT = N->getValueType(0);
     800             : 
     801             :   assert(VT == MVT::f32 || VT == MVT::f64);
     802             : 
     803             :   unsigned Opc
     804             :     = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
     805             : 
     806         267 :   SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
     807         801 :   CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
     808         267 : }
     809             : 
     810             : // We need to handle this here because tablegen doesn't support matching
     811             : // instructions with multiple outputs.
     812          20 : void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
     813             :   SDLoc SL(N);
     814          20 :   bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32;
     815          20 :   unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32 : AMDGPU::V_MAD_U64_U32;
     816             : 
     817          20 :   SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
     818          20 :   SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
     819          20 :                     Clamp };
     820          60 :   CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
     821          20 : }
     822             : 
     823        7928 : bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
     824             :                                          unsigned OffsetBits) const {
     825        7928 :   if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
     826         465 :       (OffsetBits == 8 && !isUInt<8>(Offset)))
     827             :     return false;
     828             : 
     829        9497 :   if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS ||
     830        1983 :       Subtarget->unsafeDSOffsetFoldingEnabled())
     831             :     return true;
     832             : 
     833             :   // On Southern Islands instruction with a negative base value and an offset
     834             :   // don't seem to work.
     835        1979 :   return CurDAG->SignBitIsZero(Base);
     836             : }
     837             : 
     838       10752 : bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
     839             :                                               SDValue &Offset) const {
     840             :   SDLoc DL(Addr);
     841       10752 :   if (CurDAG->isBaseWithConstantOffset(Addr)) {
     842        7447 :     SDValue N0 = Addr.getOperand(0);
     843        7447 :     SDValue N1 = Addr.getOperand(1);
     844             :     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
     845       14894 :     if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) {
     846             :       // (add n0, c0)
     847        7002 :       Base = N0;
     848       14004 :       Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
     849        7002 :       return true;
     850             :     }
     851        3305 :   } else if (Addr.getOpcode() == ISD::SUB) {
     852             :     // sub C, x -> add (sub 0, x), C
     853             :     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
     854          18 :       int64_t ByteOffset = C->getSExtValue();
     855          18 :       if (isUInt<16>(ByteOffset)) {
     856          32 :         SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
     857             : 
     858             :         // XXX - This is kind of hacky. Create a dummy sub node so we can check
     859             :         // the known bits in isDSOffsetLegal. We need to emit the selected node
     860             :         // here, so this is thrown away.
     861          16 :         SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
     862          16 :                                       Zero, Addr.getOperand(1));
     863             : 
     864          16 :         if (isDSOffsetLegal(Sub, ByteOffset, 16)) {
     865             :           // FIXME: Select to VOP3 version for with-carry.
     866          14 :           unsigned SubOp = Subtarget->hasAddNoCarry() ?
     867             :             AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32;
     868             : 
     869             :           MachineSDNode *MachineSub
     870          28 :             = CurDAG->getMachineNode(SubOp, DL, MVT::i32,
     871          14 :                                      Zero, Addr.getOperand(1));
     872             : 
     873          14 :           Base = SDValue(MachineSub, 0);
     874          28 :           Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
     875          14 :           return true;
     876             :         }
     877             :       }
     878             :     }
     879             :   } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
     880             :     // If we have a constant address, prefer to put the constant into the
     881             :     // offset. This can save moves to load the constant address since multiple
     882             :     // operations can share the zero base address register, and enables merging
     883             :     // into read2 / write2 instructions.
     884             : 
     885             :     SDLoc DL(Addr);
     886             : 
     887        1474 :     if (isUInt<16>(CAddr->getZExtValue())) {
     888        1466 :       SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
     889        1466 :       MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
     890         733 :                                  DL, MVT::i32, Zero);
     891         733 :       Base = SDValue(MovZero, 0);
     892        1466 :       Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
     893             :       return true;
     894             :     }
     895             :   }
     896             : 
     897             :   // default case
     898        3003 :   Base = Addr;
     899        9009 :   Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
     900        3003 :   return true;
     901             : }
     902             : 
     903             : // TODO: If offset is too big, put low 16-bit into offset.
     904         594 : bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
     905             :                                                    SDValue &Offset0,
     906             :                                                    SDValue &Offset1) const {
     907             :   SDLoc DL(Addr);
     908             : 
     909         594 :   if (CurDAG->isBaseWithConstantOffset(Addr)) {
     910         461 :     SDValue N0 = Addr.getOperand(0);
     911         461 :     SDValue N1 = Addr.getOperand(1);
     912             :     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
     913         922 :     unsigned DWordOffset0 = C1->getZExtValue() / 4;
     914         461 :     unsigned DWordOffset1 = DWordOffset0 + 1;
     915             :     // (add n0, c0)
     916         461 :     if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
     917         446 :       Base = N0;
     918         892 :       Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
     919         892 :       Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
     920         446 :       return true;
     921             :     }
     922         133 :   } else if (Addr.getOpcode() == ISD::SUB) {
     923             :     // sub C, x -> add (sub 0, x), C
     924             :     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
     925           8 :       unsigned DWordOffset0 = C->getZExtValue() / 4;
     926           4 :       unsigned DWordOffset1 = DWordOffset0 + 1;
     927             : 
     928           4 :       if (isUInt<8>(DWordOffset0)) {
     929             :         SDLoc DL(Addr);
     930           8 :         SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
     931             : 
     932             :         // XXX - This is kind of hacky. Create a dummy sub node so we can check
     933             :         // the known bits in isDSOffsetLegal. We need to emit the selected node
     934             :         // here, so this is thrown away.
     935           4 :         SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
     936           4 :                                       Zero, Addr.getOperand(1));
     937             : 
     938           4 :         if (isDSOffsetLegal(Sub, DWordOffset1, 8)) {
     939           2 :           unsigned SubOp = Subtarget->hasAddNoCarry() ?
     940             :             AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32;
     941             : 
     942             :           MachineSDNode *MachineSub
     943           4 :             = CurDAG->getMachineNode(SubOp, DL, MVT::i32,
     944           2 :                                      Zero, Addr.getOperand(1));
     945             : 
     946           2 :           Base = SDValue(MachineSub, 0);
     947           4 :           Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
     948           4 :           Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
     949             :           return true;
     950             :         }
     951             :       }
     952             :     }
     953             :   } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
     954          48 :     unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
     955          24 :     unsigned DWordOffset1 = DWordOffset0 + 1;
     956             :     assert(4 * DWordOffset0 == CAddr->getZExtValue());
     957             : 
     958          24 :     if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
     959          32 :       SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
     960             :       MachineSDNode *MovZero
     961          32 :         = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
     962          16 :                                  DL, MVT::i32, Zero);
     963          16 :       Base = SDValue(MovZero, 0);
     964          32 :       Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
     965          32 :       Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
     966             :       return true;
     967             :     }
     968             :   }
     969             : 
     970             :   // default case
     971             : 
     972             :   // FIXME: This is broken on SI where we still need to check if the base
     973             :   // pointer is positive here.
     974         130 :   Base = Addr;
     975         260 :   Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
     976         260 :   Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
     977         130 :   return true;
     978             : }
     979             : 
     980       42832 : bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
     981             :                                      SDValue &VAddr, SDValue &SOffset,
     982             :                                      SDValue &Offset, SDValue &Offen,
     983             :                                      SDValue &Idxen, SDValue &Addr64,
     984             :                                      SDValue &GLC, SDValue &SLC,
     985             :                                      SDValue &TFE) const {
     986             :   // Subtarget prefers to use flat instruction
     987       42832 :   if (Subtarget->useFlatForGlobal())
     988             :     return false;
     989             : 
     990             :   SDLoc DL(Addr);
     991             : 
     992       31552 :   if (!GLC.getNode())
     993       63104 :     GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
     994       31552 :   if (!SLC.getNode())
     995       62690 :     SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
     996       63104 :   TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
     997             : 
     998       63104 :   Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
     999       63104 :   Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
    1000       63104 :   Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
    1001       63104 :   SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
    1002             : 
    1003       31552 :   if (CurDAG->isBaseWithConstantOffset(Addr)) {
    1004        8849 :     SDValue N0 = Addr.getOperand(0);
    1005        8849 :     SDValue N1 = Addr.getOperand(1);
    1006             :     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
    1007             : 
    1008        8849 :     if (N0.getOpcode() == ISD::ADD) {
    1009             :       // (add (add N2, N3), C1) -> addr64
    1010        1589 :       SDValue N2 = N0.getOperand(0);
    1011        1589 :       SDValue N3 = N0.getOperand(1);
    1012        3178 :       Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
    1013        1589 :       Ptr = N2;
    1014        1589 :       VAddr = N3;
    1015             :     } else {
    1016             :       // (add N0, C1) -> offset
    1017       14520 :       VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
    1018        7260 :       Ptr = N0;
    1019             :     }
    1020             : 
    1021       17698 :     if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) {
    1022       17108 :       Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
    1023             :       return true;
    1024             :     }
    1025             : 
    1026         295 :     if (isUInt<32>(C1->getZExtValue())) {
    1027             :       // Illegal offset, store it in soffset.
    1028         570 :       Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
    1029         570 :       SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
    1030             :                    CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
    1031             :                         0);
    1032             :       return true;
    1033             :     }
    1034             :   }
    1035             : 
    1036       22713 :   if (Addr.getOpcode() == ISD::ADD) {
    1037             :     // (add N0, N1) -> addr64
    1038        3492 :     SDValue N0 = Addr.getOperand(0);
    1039        3492 :     SDValue N1 = Addr.getOperand(1);
    1040        6984 :     Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
    1041        3492 :     Ptr = N0;
    1042        3492 :     VAddr = N1;
    1043        6984 :     Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
    1044             :     return true;
    1045             :   }
    1046             : 
    1047             :   // default case -> offset
    1048       38442 :   VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
    1049       19221 :   Ptr = Addr;
    1050       38442 :   Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
    1051             : 
    1052             :   return true;
    1053             : }
    1054             : 
    1055       29395 : bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
    1056             :                                            SDValue &VAddr, SDValue &SOffset,
    1057             :                                            SDValue &Offset, SDValue &GLC,
    1058             :                                            SDValue &SLC, SDValue &TFE) const {
    1059       29395 :   SDValue Ptr, Offen, Idxen, Addr64;
    1060             : 
    1061             :   // addr64 bit was removed for volcanic islands.
    1062       29395 :   if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
    1063             :     return false;
    1064             : 
    1065       15927 :   if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
    1066             :               GLC, SLC, TFE))
    1067             :     return false;
    1068             : 
    1069             :   ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
    1070       27102 :   if (C->getSExtValue()) {
    1071             :     SDLoc DL(Addr);
    1072             : 
    1073             :     const SITargetLowering& Lowering =
    1074        4144 :       *static_cast<const SITargetLowering*>(getTargetLowering());
    1075             : 
    1076        4144 :     SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
    1077             :     return true;
    1078             :   }
    1079             : 
    1080             :   return false;
    1081             : }
    1082             : 
    1083         581 : bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
    1084             :                                            SDValue &VAddr, SDValue &SOffset,
    1085             :                                            SDValue &Offset,
    1086             :                                            SDValue &SLC) const {
    1087        1743 :   SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
    1088         581 :   SDValue GLC, TFE;
    1089             : 
    1090         581 :   return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE);
    1091             : }
    1092             : 
    1093             : static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
    1094             :   auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>();
    1095          89 :   return PSV && PSV->isStack();
    1096             : }
    1097             : 
    1098        6077 : std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
    1099        6077 :   const MachineFunction &MF = CurDAG->getMachineFunction();
    1100             :   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
    1101             : 
    1102             :   if (auto FI = dyn_cast<FrameIndexSDNode>(N)) {
    1103        4873 :     SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
    1104        9746 :                                               FI->getValueType(0));
    1105             : 
    1106             :     // If we can resolve this to a frame index access, this is relative to the
    1107             :     // frame pointer SGPR.
    1108       14619 :     return std::make_pair(TFI, CurDAG->getRegister(Info->getFrameOffsetReg(),
    1109        9746 :                                                    MVT::i32));
    1110             :   }
    1111             : 
    1112             :   // If we don't know this private access is a local stack object, it needs to
    1113             :   // be relative to the entry point's scratch wave offset register.
    1114        3612 :   return std::make_pair(N, CurDAG->getRegister(Info->getScratchWaveOffsetReg(),
    1115        2408 :                                                MVT::i32));
    1116             : }
    1117             : 
    1118        6083 : bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
    1119             :                                                  SDValue Addr, SDValue &Rsrc,
    1120             :                                                  SDValue &VAddr, SDValue &SOffset,
    1121             :                                                  SDValue &ImmOffset) const {
    1122             : 
    1123             :   SDLoc DL(Addr);
    1124        6083 :   MachineFunction &MF = CurDAG->getMachineFunction();
    1125        6083 :   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
    1126             : 
    1127       12166 :   Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
    1128             : 
    1129             :   if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
    1130           6 :     unsigned Imm = CAddr->getZExtValue();
    1131             : 
    1132          12 :     SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32);
    1133          12 :     MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
    1134           6 :                                                         DL, MVT::i32, HighBits);
    1135           6 :     VAddr = SDValue(MovHighBits, 0);
    1136             : 
    1137             :     // In a call sequence, stores to the argument stack area are relative to the
    1138             :     // stack pointer.
    1139           6 :     const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
    1140           0 :     unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
    1141             :       Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg();
    1142             : 
    1143          12 :     SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
    1144          12 :     ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
    1145             :     return true;
    1146             :   }
    1147             : 
    1148        6077 :   if (CurDAG->isBaseWithConstantOffset(Addr)) {
    1149             :     // (add n0, c1)
    1150             : 
    1151        5105 :     SDValue N0 = Addr.getOperand(0);
    1152        5105 :     SDValue N1 = Addr.getOperand(1);
    1153             : 
    1154             :     // Offsets in vaddr must be positive if range checking is enabled.
    1155             :     //
    1156             :     // The total computation of vaddr + soffset + offset must not overflow.  If
    1157             :     // vaddr is negative, even if offset is 0 the sgpr offset add will end up
    1158             :     // overflowing.
    1159             :     //
    1160             :     // Prior to gfx9, MUBUF instructions with the vaddr offset enabled would
    1161             :     // always perform a range check. If a negative vaddr base index was used,
    1162             :     // this would fail the range check. The overall address computation would
    1163             :     // compute a valid address, but this doesn't happen due to the range
    1164             :     // check. For out-of-bounds MUBUF loads, a 0 is returned.
    1165             :     //
    1166             :     // Therefore it should be safe to fold any VGPR offset on gfx9 into the
    1167             :     // MUBUF vaddr, but not on older subtargets which can only do this if the
    1168             :     // sign bit is known 0.
    1169             :     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
    1170       15301 :     if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue()) &&
    1171        8933 :         (!Subtarget->privateMemoryResourceIsRangeChecked() ||
    1172        3842 :          CurDAG->SignBitIsZero(N0))) {
    1173        9000 :       std::tie(VAddr, SOffset) = foldFrameIndex(N0);
    1174        9000 :       ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
    1175        4500 :       return true;
    1176             :     }
    1177             :   }
    1178             : 
    1179             :   // (node)
    1180        3154 :   std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
    1181        3154 :   ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
    1182        1577 :   return true;
    1183             : }
    1184             : 
    1185        6265 : bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
    1186             :                                                   SDValue Addr,
    1187             :                                                   SDValue &SRsrc,
    1188             :                                                   SDValue &SOffset,
    1189             :                                                   SDValue &Offset) const {
    1190             :   ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr);
    1191         376 :   if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue()))
    1192             :     return false;
    1193             : 
    1194             :   SDLoc DL(Addr);
    1195         182 :   MachineFunction &MF = CurDAG->getMachineFunction();
    1196         182 :   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
    1197             : 
    1198         364 :   SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
    1199             : 
    1200         182 :   const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
    1201          89 :   unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
    1202             :     Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg();
    1203             : 
    1204             :   // FIXME: Get from MachinePointerInfo? We should only be using the frame
    1205             :   // offset if we know this is in a call sequence.
    1206         364 :   SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
    1207             : 
    1208         364 :   Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
    1209             :   return true;
    1210             : }
    1211             : 
    1212       26905 : bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
    1213             :                                            SDValue &SOffset, SDValue &Offset,
    1214             :                                            SDValue &GLC, SDValue &SLC,
    1215             :                                            SDValue &TFE) const {
    1216       26905 :   SDValue Ptr, VAddr, Offen, Idxen, Addr64;
    1217             :   const SIInstrInfo *TII =
    1218       26905 :     static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
    1219             : 
    1220       26905 :   if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
    1221             :               GLC, SLC, TFE))
    1222             :     return false;
    1223             : 
    1224       36002 :   if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
    1225       54003 :       !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
    1226       18001 :       !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
    1227       17064 :     uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
    1228       34128 :                     APInt::getAllOnesValue(32).getZExtValue(); // Size
    1229             :     SDLoc DL(Addr);
    1230             : 
    1231             :     const SITargetLowering& Lowering =
    1232       17064 :       *static_cast<const SITargetLowering*>(getTargetLowering());
    1233             : 
    1234       17064 :     SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
    1235             :     return true;
    1236             :   }
    1237             :   return false;
    1238             : }
    1239             : 
    1240           8 : bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
    1241             :                                            SDValue &Soffset, SDValue &Offset
    1242             :                                            ) const {
    1243           8 :   SDValue GLC, SLC, TFE;
    1244             : 
    1245           8 :   return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
    1246             : }
    1247             : bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
    1248             :                                            SDValue &Soffset, SDValue &Offset,
    1249             :                                            SDValue &SLC) const {
    1250         517 :   SDValue GLC, TFE;
    1251             : 
    1252         517 :   return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
    1253             : }
    1254             : 
    1255         436 : bool AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant,
    1256             :                                              SDValue &SOffset,
    1257             :                                              SDValue &ImmOffset) const {
    1258             :   SDLoc DL(Constant);
    1259             :   const uint32_t Align = 4;
    1260             :   const uint32_t MaxImm = alignDown(4095, Align);
    1261         872 :   uint32_t Imm = cast<ConstantSDNode>(Constant)->getZExtValue();
    1262             :   uint32_t Overflow = 0;
    1263             : 
    1264         436 :   if (Imm > MaxImm) {
    1265          16 :     if (Imm <= MaxImm + 64) {
    1266             :       // Use an SOffset inline constant for 4..64
    1267           2 :       Overflow = Imm - MaxImm;
    1268             :       Imm = MaxImm;
    1269             :     } else {
    1270             :       // Try to keep the same value in SOffset for adjacent loads, so that
    1271             :       // the corresponding register contents can be re-used.
    1272             :       //
    1273             :       // Load values with all low-bits (except for alignment bits) set into
    1274             :       // SOffset, so that a larger range of values can be covered using
    1275             :       // s_movk_i32.
    1276             :       //
    1277             :       // Atomic operations fail to work correctly when individual address
    1278             :       // components are unaligned, even if their sum is aligned.
    1279          14 :       uint32_t High = (Imm + Align) & ~4095;
    1280          14 :       uint32_t Low = (Imm + Align) & 4095;
    1281             :       Imm = Low;
    1282          14 :       Overflow = High - Align;
    1283             :     }
    1284             :   }
    1285             : 
    1286             :   // There is a hardware bug in SI and CI which prevents address clamping in
    1287             :   // MUBUF instructions from working correctly with SOffsets. The immediate
    1288             :   // offset is unaffected.
    1289          32 :   if (Overflow > 0 &&
    1290          16 :       Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
    1291             :     return false;
    1292             : 
    1293         856 :   ImmOffset = CurDAG->getTargetConstant(Imm, DL, MVT::i16);
    1294             : 
    1295         428 :   if (Overflow <= 64)
    1296         842 :     SOffset = CurDAG->getTargetConstant(Overflow, DL, MVT::i32);
    1297             :   else
    1298          14 :     SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
    1299             :                       CurDAG->getTargetConstant(Overflow, DL, MVT::i32)),
    1300             :                       0);
    1301             : 
    1302             :   return true;
    1303             : }
    1304             : 
    1305         252 : bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicOffset(SDValue Offset,
    1306             :                                                     SDValue &SOffset,
    1307             :                                                     SDValue &ImmOffset) const {
    1308             :   SDLoc DL(Offset);
    1309             : 
    1310             :   if (!isa<ConstantSDNode>(Offset))
    1311             :     return false;
    1312             : 
    1313         252 :   return SelectMUBUFConstant(Offset, SOffset, ImmOffset);
    1314             : }
    1315             : 
    1316         389 : bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset,
    1317             :                                                      SDValue &SOffset,
    1318             :                                                      SDValue &ImmOffset,
    1319             :                                                      SDValue &VOffset) const {
    1320             :   SDLoc DL(Offset);
    1321             : 
    1322             :   // Don't generate an unnecessary voffset for constant offsets.
    1323             :   if (isa<ConstantSDNode>(Offset)) {
    1324         260 :     SDValue Tmp1, Tmp2;
    1325             : 
    1326             :     // When necessary, use a voffset in <= CI anyway to work around a hardware
    1327             :     // bug.
    1328         380 :     if (Subtarget->getGeneration() > AMDGPUSubtarget::SEA_ISLANDS ||
    1329         120 :         SelectMUBUFConstant(Offset, Tmp1, Tmp2))
    1330         252 :       return false;
    1331             :   }
    1332             : 
    1333         137 :   if (CurDAG->isBaseWithConstantOffset(Offset)) {
    1334          66 :     SDValue N0 = Offset.getOperand(0);
    1335          66 :     SDValue N1 = Offset.getOperand(1);
    1336         196 :     if (cast<ConstantSDNode>(N1)->getSExtValue() >= 0 &&
    1337          64 :         SelectMUBUFConstant(N1, SOffset, ImmOffset)) {
    1338          64 :       VOffset = N0;
    1339             :       return true;
    1340             :     }
    1341             :   }
    1342             : 
    1343         146 :   SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
    1344         146 :   ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
    1345          73 :   VOffset = Offset;
    1346             : 
    1347          73 :   return true;
    1348             : }
    1349             : 
    1350             : template <bool IsSigned>
    1351       11470 : bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDValue Addr,
    1352             :                                           SDValue &VAddr,
    1353             :                                           SDValue &Offset,
    1354             :                                           SDValue &SLC) const {
    1355             :   int64_t OffsetVal = 0;
    1356             : 
    1357       14541 :   if (Subtarget->hasFlatInstOffsets() &&
    1358        3071 :       CurDAG->isBaseWithConstantOffset(Addr)) {
    1359         433 :     SDValue N0 = Addr.getOperand(0);
    1360         433 :     SDValue N1 = Addr.getOperand(1);
    1361         433 :     int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
    1362             : 
    1363         433 :     if ((IsSigned && isInt<13>(COffsetVal)) ||
    1364          95 :         (!IsSigned && isUInt<12>(COffsetVal))) {
    1365             :       Addr = N0;
    1366             :       OffsetVal = COffsetVal;
    1367             :     }
    1368             :   }
    1369             : 
    1370       11470 :   VAddr = Addr;
    1371       34410 :   Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
    1372       34410 :   SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1);
    1373             : 
    1374       11470 :   return true;
    1375             : }
    1376             : 
    1377             : bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDValue Addr,
    1378             :                                           SDValue &VAddr,
    1379             :                                           SDValue &Offset,
    1380             :                                           SDValue &SLC) const {
    1381        1003 :   return SelectFlatOffset<false>(Addr, VAddr, Offset, SLC);
    1382             : }
    1383             : 
    1384             : bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDValue Addr,
    1385             :                                           SDValue &VAddr,
    1386             :                                           SDValue &Offset,
    1387             :                                           SDValue &SLC) const {
    1388         212 :   return SelectFlatOffset<true>(Addr, VAddr, Offset, SLC);
    1389             : }
    1390             : 
    1391       21417 : bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
    1392             :                                           SDValue &Offset, bool &Imm) const {
    1393             : 
    1394             :   // FIXME: Handle non-constant offsets.
    1395             :   ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
    1396             :   if (!C)
    1397             :     return false;
    1398             : 
    1399             :   SDLoc SL(ByteOffsetNode);
    1400       21362 :   AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration();
    1401       21362 :   int64_t ByteOffset = C->getSExtValue();
    1402       21362 :   int64_t EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset);
    1403             : 
    1404       21362 :   if (AMDGPU::isLegalSMRDImmOffset(*Subtarget, ByteOffset)) {
    1405       42552 :     Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
    1406       21276 :     Imm = true;
    1407             :     return true;
    1408             :   }
    1409             : 
    1410          86 :   if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset))
    1411             :     return false;
    1412             : 
    1413          74 :   if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) {
    1414             :     // 32-bit Immediates are supported on Sea Islands.
    1415          66 :     Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
    1416             :   } else {
    1417          82 :     SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
    1418          82 :     Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32,
    1419             :                                             C32Bit), 0);
    1420             :   }
    1421          74 :   Imm = false;
    1422             :   return true;
    1423             : }
    1424             : 
    1425       24076 : SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {
    1426             :   if (Addr.getValueType() != MVT::i32)
    1427       23962 :     return Addr;
    1428             : 
    1429             :   // Zero-extend a 32-bit address.
    1430             :   SDLoc SL(Addr);
    1431             : 
    1432         114 :   const MachineFunction &MF = CurDAG->getMachineFunction();
    1433             :   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
    1434         114 :   unsigned AddrHiVal = Info->get32BitAddressHighBits();
    1435         228 :   SDValue AddrHi = CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32);
    1436             : 
    1437             :   const SDValue Ops[] = {
    1438         114 :     CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32),
    1439             :     Addr,
    1440         114 :     CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
    1441         342 :     SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi),
    1442             :             0),
    1443         114 :     CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32),
    1444         342 :   };
    1445             : 
    1446         342 :   return SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64,
    1447         114 :                                         Ops), 0);
    1448             : }
    1449             : 
    1450       24076 : bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
    1451             :                                      SDValue &Offset, bool &Imm) const {
    1452             :   SDLoc SL(Addr);
    1453             : 
    1454       24076 :   if (CurDAG->isBaseWithConstantOffset(Addr)) {
    1455       20955 :     SDValue N0 = Addr.getOperand(0);
    1456       20955 :     SDValue N1 = Addr.getOperand(1);
    1457             : 
    1458       20955 :     if (SelectSMRDOffset(N1, Offset, Imm)) {
    1459       20943 :       SBase = Expand32BitAddress(N0);
    1460       20943 :       return true;
    1461             :     }
    1462             :   }
    1463        3133 :   SBase = Expand32BitAddress(Addr);
    1464        6266 :   Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
    1465        3133 :   Imm = true;
    1466        3133 :   return true;
    1467             : }
    1468             : 
    1469             : bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
    1470             :                                        SDValue &Offset) const {
    1471             :   bool Imm;
    1472       24040 :   return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
    1473             : }
    1474             : 
    1475           9 : bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
    1476             :                                          SDValue &Offset) const {
    1477             : 
    1478           9 :   if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
    1479             :     return false;
    1480             : 
    1481             :   bool Imm;
    1482           9 :   if (!SelectSMRD(Addr, SBase, Offset, Imm))
    1483             :     return false;
    1484             : 
    1485           9 :   return !Imm && isa<ConstantSDNode>(Offset);
    1486             : }
    1487             : 
    1488          27 : bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
    1489             :                                         SDValue &Offset) const {
    1490             :   bool Imm;
    1491          27 :   return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
    1492          27 :          !isa<ConstantSDNode>(Offset);
    1493             : }
    1494             : 
    1495             : bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
    1496             :                                              SDValue &Offset) const {
    1497             :   bool Imm;
    1498         448 :   return SelectSMRDOffset(Addr, Offset, Imm) && Imm;
    1499             : }
    1500             : 
    1501          38 : bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
    1502             :                                                SDValue &Offset) const {
    1503          38 :   if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
    1504             :     return false;
    1505             : 
    1506             :   bool Imm;
    1507          14 :   if (!SelectSMRDOffset(Addr, Offset, Imm))
    1508             :     return false;
    1509             : 
    1510           3 :   return !Imm && isa<ConstantSDNode>(Offset);
    1511             : }
    1512             : 
    1513       67495 : bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
    1514             :                                             SDValue &Base,
    1515             :                                             SDValue &Offset) const {
    1516             :   SDLoc DL(Index);
    1517             : 
    1518       67495 :   if (CurDAG->isBaseWithConstantOffset(Index)) {
    1519          80 :     SDValue N0 = Index.getOperand(0);
    1520          80 :     SDValue N1 = Index.getOperand(1);
    1521             :     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
    1522             : 
    1523             :     // (add n0, c0)
    1524          80 :     Base = N0;
    1525         160 :     Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
    1526             :     return true;
    1527             :   }
    1528             : 
    1529             :   if (isa<ConstantSDNode>(Index))
    1530             :     return false;
    1531             : 
    1532          81 :   Base = Index;
    1533         162 :   Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
    1534             :   return true;
    1535             : }
    1536             : 
    1537        3962 : SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL,
    1538             :                                      SDValue Val, uint32_t Offset,
    1539             :                                      uint32_t Width) {
    1540             :   // Transformation function, pack the offset and width of a BFE into
    1541             :   // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
    1542             :   // source, bits [5:0] contain the offset and bits [22:16] the width.
    1543        3962 :   uint32_t PackedVal = Offset | (Width << 16);
    1544        7924 :   SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
    1545             : 
    1546        7924 :   return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
    1547             : }
    1548             : 
    1549         209 : void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
    1550             :   // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
    1551             :   // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
    1552             :   // Predicate: 0 < b <= c < 32
    1553             : 
    1554         209 :   const SDValue &Shl = N->getOperand(0);
    1555         209 :   ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
    1556             :   ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
    1557             : 
    1558         209 :   if (B && C) {
    1559         378 :     uint32_t BVal = B->getZExtValue();
    1560         378 :     uint32_t CVal = C->getZExtValue();
    1561             : 
    1562         189 :     if (0 < BVal && BVal <= CVal && CVal < 32) {
    1563         178 :       bool Signed = N->getOpcode() == ISD::SRA;
    1564         178 :       unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
    1565             : 
    1566         534 :       ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal,
    1567             :                               32 - CVal));
    1568         178 :       return;
    1569             :     }
    1570             :   }
    1571             :   SelectCode(N);
    1572             : }
    1573             : 
    1574       19226 : void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
    1575       38452 :   switch (N->getOpcode()) {
    1576        6396 :   case ISD::AND:
    1577       12792 :     if (N->getOperand(0).getOpcode() == ISD::SRL) {
    1578             :       // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
    1579             :       // Predicate: isMask(mask)
    1580             :       const SDValue &Srl = N->getOperand(0);
    1581             :       ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
    1582             :       ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
    1583             : 
    1584        1704 :       if (Shift && Mask) {
    1585        3378 :         uint32_t ShiftVal = Shift->getZExtValue();
    1586        3378 :         uint32_t MaskVal = Mask->getZExtValue();
    1587             : 
    1588             :         if (isMask_32(MaskVal)) {
    1589             :           uint32_t WidthVal = countPopulation(MaskVal);
    1590             : 
    1591        3022 :           ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
    1592             :                                   Srl.getOperand(0), ShiftVal, WidthVal));
    1593        1511 :           return;
    1594             :         }
    1595             :       }
    1596             :     }
    1597             :     break;
    1598        6148 :   case ISD::SRL:
    1599       12296 :     if (N->getOperand(0).getOpcode() == ISD::AND) {
    1600             :       // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
    1601             :       // Predicate: isMask(mask >> b)
    1602             :       const SDValue &And = N->getOperand(0);
    1603             :       ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
    1604         797 :       ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
    1605             : 
    1606         797 :       if (Shift && Mask) {
    1607        1588 :         uint32_t ShiftVal = Shift->getZExtValue();
    1608        1588 :         uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
    1609             : 
    1610             :         if (isMask_32(MaskVal)) {
    1611             :           uint32_t WidthVal = countPopulation(MaskVal);
    1612             : 
    1613        1588 :           ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
    1614             :                                   And.getOperand(0), ShiftVal, WidthVal));
    1615         794 :           return;
    1616             :         }
    1617             :       }
    1618        5351 :     } else if (N->getOperand(0).getOpcode() == ISD::SHL) {
    1619          16 :       SelectS_BFEFromShifts(N);
    1620          16 :       return;
    1621             :     }
    1622             :     break;
    1623        2202 :   case ISD::SRA:
    1624        4404 :     if (N->getOperand(0).getOpcode() == ISD::SHL) {
    1625         193 :       SelectS_BFEFromShifts(N);
    1626         193 :       return;
    1627             :     }
    1628             :     break;
    1629             : 
    1630        4480 :   case ISD::SIGN_EXTEND_INREG: {
    1631             :     // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
    1632        4480 :     SDValue Src = N->getOperand(0);
    1633        4480 :     if (Src.getOpcode() != ISD::SRL)
    1634             :       break;
    1635             : 
    1636             :     const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
    1637             :     if (!Amt)
    1638             :       break;
    1639             : 
    1640        1347 :     unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
    1641        4041 :     ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0),
    1642        1347 :                             Amt->getZExtValue(), Width));
    1643             :     return;
    1644             :   }
    1645             :   }
    1646             : 
    1647             :   SelectCode(N);
    1648             : }
    1649             : 
    1650         502 : bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
    1651             :   assert(N->getOpcode() == ISD::BRCOND);
    1652             :   if (!N->hasOneUse())
    1653             :     return false;
    1654             : 
    1655         502 :   SDValue Cond = N->getOperand(1);
    1656         502 :   if (Cond.getOpcode() == ISD::CopyToReg)
    1657           0 :     Cond = Cond.getOperand(2);
    1658             : 
    1659         957 :   if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
    1660             :     return false;
    1661             : 
    1662             :   MVT VT = Cond.getOperand(0).getSimpleValueType();
    1663         449 :   if (VT == MVT::i32)
    1664             :     return true;
    1665             : 
    1666          88 :   if (VT == MVT::i64) {
    1667          29 :     auto ST = static_cast<const SISubtarget *>(Subtarget);
    1668             : 
    1669          29 :     ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
    1670          56 :     return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64();
    1671             :   }
    1672             : 
    1673             :   return false;
    1674             : }
    1675             : 
    1676         580 : void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
    1677         580 :   SDValue Cond = N->getOperand(1);
    1678             : 
    1679         580 :   if (Cond.isUndef()) {
    1680         156 :     CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other,
    1681             :                          N->getOperand(2), N->getOperand(0));
    1682          78 :     return;
    1683             :   }
    1684             : 
    1685         502 :   bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N);
    1686             :   unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ;
    1687         502 :   unsigned CondReg = UseSCCBr ? AMDGPU::SCC : AMDGPU::VCC;
    1688             :   SDLoc SL(N);
    1689             : 
    1690         502 :   if (!UseSCCBr) {
    1691             :     // This is the case that we are selecting to S_CBRANCH_VCCNZ.  We have not
    1692             :     // analyzed what generates the vcc value, so we do not know whether vcc
    1693             :     // bits for disabled lanes are 0.  Thus we need to mask out bits for
    1694             :     // disabled lanes.
    1695             :     //
    1696             :     // For the case that we select S_CBRANCH_SCC1 and it gets
    1697             :     // changed to S_CBRANCH_VCCNZ in SIFixSGPRCopies, SIFixSGPRCopies calls
    1698             :     // SIInstrInfo::moveToVALU which inserts the S_AND).
    1699             :     //
    1700             :     // We could add an analysis of what generates the vcc value here and omit
    1701             :     // the S_AND when is unnecessary. But it would be better to add a separate
    1702             :     // pass after SIFixSGPRCopies to do the unnecessary S_AND removal, so it
    1703             :     // catches both cases.
    1704         390 :     Cond = SDValue(CurDAG->getMachineNode(AMDGPU::S_AND_B64, SL, MVT::i1,
    1705             :                                CurDAG->getRegister(AMDGPU::EXEC, MVT::i1),
    1706             :                                Cond),
    1707             :                    0);
    1708             :   }
    1709             : 
    1710        1004 :   SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond);
    1711        1004 :   CurDAG->SelectNodeTo(N, BrOp, MVT::Other,
    1712         502 :                        N->getOperand(2), // Basic Block
    1713             :                        VCC.getValue(0));
    1714             : }
    1715             : 
    1716        2572 : void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) {
    1717             :   MVT VT = N->getSimpleValueType(0);
    1718        2572 :   bool IsFMA = N->getOpcode() == ISD::FMA;
    1719        3737 :   if (VT != MVT::f32 || (!Subtarget->hasMadMixInsts() &&
    1720        4594 :                          !Subtarget->hasFmaMixInsts()) ||
    1721         123 :       ((IsFMA && Subtarget->hasMadMixInsts()) ||
    1722         116 :        (!IsFMA && Subtarget->hasFmaMixInsts()))) {
    1723             :     SelectCode(N);
    1724        2410 :     return;
    1725             :   }
    1726             : 
    1727         162 :   SDValue Src0 = N->getOperand(0);
    1728         162 :   SDValue Src1 = N->getOperand(1);
    1729         162 :   SDValue Src2 = N->getOperand(2);
    1730             :   unsigned Src0Mods, Src1Mods, Src2Mods;
    1731             : 
    1732             :   // Avoid using v_mad_mix_f32/v_fma_mix_f32 unless there is actually an operand
    1733             :   // using the conversion from f16.
    1734         162 :   bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods);
    1735         162 :   bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods);
    1736         162 :   bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods);
    1737             : 
    1738             :   assert((IsFMA || !Subtarget->hasFP32Denormals()) &&
    1739             :          "fmad selected with denormals enabled");
    1740             :   // TODO: We can select this with f32 denormals enabled if all the sources are
    1741             :   // converted from f16 (in which case fmad isn't legal).
    1742             : 
    1743         162 :   if (Sel0 || Sel1 || Sel2) {
    1744             :     // For dummy operands.
    1745         182 :     SDValue Zero = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
    1746             :     SDValue Ops[] = {
    1747         182 :       CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0,
    1748         182 :       CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1,
    1749         182 :       CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2,
    1750         182 :       CurDAG->getTargetConstant(0, SDLoc(), MVT::i1),
    1751             :       Zero, Zero
    1752         728 :     };
    1753             : 
    1754         182 :     CurDAG->SelectNodeTo(N,
    1755             :                          IsFMA ? AMDGPU::V_FMA_MIX_F32 : AMDGPU::V_MAD_MIX_F32,
    1756          91 :                          MVT::f32, Ops);
    1757             :   } else {
    1758             :     SelectCode(N);
    1759             :   }
    1760             : }
    1761             : 
    1762             : // This is here because there isn't a way to use the generated sub0_sub1 as the
    1763             : // subreg index to EXTRACT_SUBREG in tablegen.
    1764         195 : void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
    1765             :   MemSDNode *Mem = cast<MemSDNode>(N);
    1766             :   unsigned AS = Mem->getAddressSpace();
    1767         195 :   if (AS == AMDGPUASI.FLAT_ADDRESS) {
    1768             :     SelectCode(N);
    1769         169 :     return;
    1770             :   }
    1771             : 
    1772             :   MVT VT = N->getSimpleValueType(0);
    1773             :   bool Is32 = (VT == MVT::i32);
    1774             :   SDLoc SL(N);
    1775             : 
    1776             :   MachineSDNode *CmpSwap = nullptr;
    1777         102 :   if (Subtarget->hasAddr64()) {
    1778          17 :     SDValue SRsrc, VAddr, SOffset, Offset, SLC;
    1779             : 
    1780          17 :     if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) {
    1781           8 :       unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN :
    1782             :         AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN;
    1783           8 :       SDValue CmpVal = Mem->getOperand(2);
    1784             : 
    1785             :       // XXX - Do we care about glue operands?
    1786             : 
    1787             :       SDValue Ops[] = {
    1788             :         CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain()
    1789           8 :       };
    1790             : 
    1791          24 :       CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
    1792             :     }
    1793             :   }
    1794             : 
    1795           8 :   if (!CmpSwap) {
    1796          43 :     SDValue SRsrc, SOffset, Offset, SLC;
    1797          43 :     if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) {
    1798          18 :       unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN :
    1799             :         AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN;
    1800             : 
    1801          18 :       SDValue CmpVal = Mem->getOperand(2);
    1802             :       SDValue Ops[] = {
    1803             :         CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain()
    1804          18 :       };
    1805             : 
    1806          54 :       CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
    1807             :     }
    1808             :   }
    1809             : 
    1810          51 :   if (!CmpSwap) {
    1811             :     SelectCode(N);
    1812             :     return;
    1813             :   }
    1814             : 
    1815          26 :   MachineSDNode::mmo_iterator MMOs = MF->allocateMemRefsArray(1);
    1816          26 :   *MMOs = Mem->getMemOperand();
    1817          26 :   CmpSwap->setMemRefs(MMOs, MMOs + 1);
    1818             : 
    1819          26 :   unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
    1820             :   SDValue Extract
    1821          52 :     = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0));
    1822             : 
    1823          26 :   ReplaceUses(SDValue(N, 0), Extract);
    1824          26 :   ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1));
    1825          26 :   CurDAG->RemoveDeadNode(N);
    1826             : }
    1827             : 
    1828         921 : bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
    1829             :                                             unsigned &Mods) const {
    1830         921 :   Mods = 0;
    1831       22792 :   Src = In;
    1832             : 
    1833       23713 :   if (Src.getOpcode() == ISD::FNEG) {
    1834          37 :     Mods |= SISrcMods::NEG;
    1835        1491 :     Src = Src.getOperand(0);
    1836             :   }
    1837             : 
    1838       45584 :   if (Src.getOpcode() == ISD::FABS) {
    1839         662 :     Mods |= SISrcMods::ABS;
    1840         662 :     Src = Src.getOperand(0);
    1841             :   }
    1842             : 
    1843         921 :   return true;
    1844             : }
    1845             : 
    1846       21871 : bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
    1847             :                                         SDValue &SrcMods) const {
    1848             :   unsigned Mods;
    1849             :   if (SelectVOP3ModsImpl(In, Src, Mods)) {
    1850       87484 :     SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
    1851             :     return true;
    1852             :   }
    1853             : 
    1854             :   return false;
    1855             : }
    1856             : 
    1857         492 : bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
    1858             :                                              SDValue &SrcMods) const {
    1859         492 :   SelectVOP3Mods(In, Src, SrcMods);
    1860         492 :   return isNoNanSrc(Src);
    1861             : }
    1862             : 
    1863             : bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
    1864        4564 :   if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG)
    1865             :     return false;
    1866             : 
    1867        4260 :   Src = In;
    1868             :   return true;
    1869             : }
    1870             : 
    1871       10447 : bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
    1872             :                                          SDValue &SrcMods, SDValue &Clamp,
    1873             :                                          SDValue &Omod) const {
    1874             :   SDLoc DL(In);
    1875       20894 :   Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
    1876       20894 :   Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
    1877             : 
    1878       20894 :   return SelectVOP3Mods(In, Src, SrcMods);
    1879             : }
    1880             : 
    1881          48 : bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
    1882             :                                                    SDValue &SrcMods,
    1883             :                                                    SDValue &Clamp,
    1884             :                                                    SDValue &Omod) const {
    1885         144 :   Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
    1886          48 :   return SelectVOP3Mods(In, Src, SrcMods);
    1887             : }
    1888             : 
    1889         533 : bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
    1890             :                                          SDValue &Clamp, SDValue &Omod) const {
    1891         533 :   Src = In;
    1892             : 
    1893             :   SDLoc DL(In);
    1894        1066 :   Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
    1895        1066 :   Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
    1896             : 
    1897         533 :   return true;
    1898             : }
    1899             : 
    1900             : static SDValue stripBitcast(SDValue Val) {
    1901        4330 :   return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
    1902             : }
    1903             : 
    1904             : // Figure out if this is really an extract of the high 16-bits of a dword.
    1905         764 : static bool isExtractHiElt(SDValue In, SDValue &Out) {
    1906             :   In = stripBitcast(In);
    1907         764 :   if (In.getOpcode() != ISD::TRUNCATE)
    1908             :     return false;
    1909             : 
    1910         274 :   SDValue Srl = In.getOperand(0);
    1911         274 :   if (Srl.getOpcode() == ISD::SRL) {
    1912             :     if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
    1913         342 :       if (ShiftAmt->getZExtValue() == 16) {
    1914         171 :         Out = stripBitcast(Srl.getOperand(0));
    1915         171 :         return true;
    1916             :       }
    1917             :     }
    1918             :   }
    1919             : 
    1920             :   return false;
    1921             : }
    1922             : 
    1923             : // Look through operations that obscure just looking at the low 16-bits of the
    1924             : // same register.
    1925         390 : static SDValue stripExtractLoElt(SDValue In) {
    1926         390 :   if (In.getOpcode() == ISD::TRUNCATE) {
    1927          28 :     SDValue Src = In.getOperand(0);
    1928          28 :     if (Src.getValueType().getSizeInBits() == 32)
    1929             :       return stripBitcast(Src);
    1930             :   }
    1931             : 
    1932         363 :   return In;
    1933             : }
    1934             : 
    1935         823 : bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
    1936             :                                          SDValue &SrcMods) const {
    1937             :   unsigned Mods = 0;
    1938         823 :   Src = In;
    1939             : 
    1940        1646 :   if (Src.getOpcode() == ISD::FNEG) {
    1941             :     Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
    1942          13 :     Src = Src.getOperand(0);
    1943             :   }
    1944             : 
    1945        1646 :   if (Src.getOpcode() == ISD::BUILD_VECTOR) {
    1946             :     unsigned VecMods = Mods;
    1947             : 
    1948         195 :     SDValue Lo = stripBitcast(Src.getOperand(0));
    1949         195 :     SDValue Hi = stripBitcast(Src.getOperand(1));
    1950             : 
    1951         195 :     if (Lo.getOpcode() == ISD::FNEG) {
    1952          11 :       Lo = stripBitcast(Lo.getOperand(0));
    1953          11 :       Mods ^= SISrcMods::NEG;
    1954             :     }
    1955             : 
    1956         195 :     if (Hi.getOpcode() == ISD::FNEG) {
    1957          11 :       Hi = stripBitcast(Hi.getOperand(0));
    1958          11 :       Mods ^= SISrcMods::NEG_HI;
    1959             :     }
    1960             : 
    1961         195 :     if (isExtractHiElt(Lo, Lo))
    1962          13 :       Mods |= SISrcMods::OP_SEL_0;
    1963             : 
    1964         195 :     if (isExtractHiElt(Hi, Hi))
    1965          25 :       Mods |= SISrcMods::OP_SEL_1;
    1966             : 
    1967         195 :     Lo = stripExtractLoElt(Lo);
    1968         195 :     Hi = stripExtractLoElt(Hi);
    1969             : 
    1970         148 :     if (Lo == Hi && !isInlineImmediate(Lo.getNode())) {
    1971             :       // Really a scalar input. Just select from the low half of the register to
    1972             :       // avoid packing.
    1973             : 
    1974          39 :       Src = Lo;
    1975         156 :       SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
    1976          39 :       return true;
    1977             :     }
    1978             : 
    1979             :     Mods = VecMods;
    1980             :   }
    1981             : 
    1982             :   // Packed instructions do not have abs modifiers.
    1983         784 :   Mods |= SISrcMods::OP_SEL_1;
    1984             : 
    1985        3136 :   SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
    1986         784 :   return true;
    1987             : }
    1988             : 
    1989         380 : bool AMDGPUDAGToDAGISel::SelectVOP3PMods0(SDValue In, SDValue &Src,
    1990             :                                           SDValue &SrcMods,
    1991             :                                           SDValue &Clamp) const {
    1992             :   SDLoc SL(In);
    1993             : 
    1994             :   // FIXME: Handle clamp and op_sel
    1995         760 :   Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
    1996             : 
    1997         760 :   return SelectVOP3PMods(In, Src, SrcMods);
    1998             : }
    1999             : 
    2000          48 : bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,
    2001             :                                          SDValue &SrcMods) const {
    2002          48 :   Src = In;
    2003             :   // FIXME: Handle op_sel
    2004         144 :   SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
    2005          48 :   return true;
    2006             : }
    2007             : 
    2008          16 : bool AMDGPUDAGToDAGISel::SelectVOP3OpSel0(SDValue In, SDValue &Src,
    2009             :                                           SDValue &SrcMods,
    2010             :                                           SDValue &Clamp) const {
    2011             :   SDLoc SL(In);
    2012             : 
    2013             :   // FIXME: Handle clamp
    2014          32 :   Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
    2015             : 
    2016          32 :   return SelectVOP3OpSel(In, Src, SrcMods);
    2017             : }
    2018             : 
    2019             : bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src,
    2020             :                                              SDValue &SrcMods) const {
    2021             :   // FIXME: Handle op_sel
    2022          27 :   return SelectVOP3Mods(In, Src, SrcMods);
    2023             : }
    2024             : 
    2025           9 : bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods0(SDValue In, SDValue &Src,
    2026             :                                               SDValue &SrcMods,
    2027             :                                               SDValue &Clamp) const {
    2028             :   SDLoc SL(In);
    2029             : 
    2030             :   // FIXME: Handle clamp
    2031          18 :   Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
    2032             : 
    2033           9 :   return SelectVOP3OpSelMods(In, Src, SrcMods);
    2034             : }
    2035             : 
    2036             : // The return value is not whether the match is possible (which it always is),
    2037             : // but whether or not it a conversion is really used.
    2038         594 : bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
    2039             :                                                    unsigned &Mods) const {
    2040         594 :   Mods = 0;
    2041         594 :   SelectVOP3ModsImpl(In, Src, Mods);
    2042             : 
    2043        1188 :   if (Src.getOpcode() == ISD::FP_EXTEND) {
    2044         333 :     Src = Src.getOperand(0);
    2045             :     assert(Src.getValueType() == MVT::f16);
    2046         333 :     Src = stripBitcast(Src);
    2047             : 
    2048             :     // Be careful about folding modifiers if we already have an abs. fneg is
    2049             :     // applied last, so we don't want to apply an earlier fneg.
    2050         333 :     if ((Mods & SISrcMods::ABS) == 0) {
    2051             :       unsigned ModsTmp;
    2052         327 :       SelectVOP3ModsImpl(Src, Src, ModsTmp);
    2053             : 
    2054         327 :       if ((ModsTmp & SISrcMods::NEG) != 0)
    2055           5 :         Mods ^= SISrcMods::NEG;
    2056             : 
    2057         327 :       if ((ModsTmp & SISrcMods::ABS) != 0)
    2058           6 :         Mods |= SISrcMods::ABS;
    2059             :     }
    2060             : 
    2061             :     // op_sel/op_sel_hi decide the source type and source.
    2062             :     // If the source's op_sel_hi is set, it indicates to do a conversion from fp16.
    2063             :     // If the sources's op_sel is set, it picks the high half of the source
    2064             :     // register.
    2065             : 
    2066         333 :     Mods |= SISrcMods::OP_SEL_1;
    2067         333 :     if (isExtractHiElt(Src, Src)) {
    2068         103 :       Mods |= SISrcMods::OP_SEL_0;
    2069             : 
    2070             :       // TODO: Should we try to look for neg/abs here?
    2071             :     }
    2072             : 
    2073             :     return true;
    2074             :   }
    2075             : 
    2076             :   return false;
    2077             : }
    2078             : 
    2079         108 : bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src,
    2080             :                                                SDValue &SrcMods) const {
    2081         108 :   unsigned Mods = 0;
    2082         108 :   SelectVOP3PMadMixModsImpl(In, Src, Mods);
    2083         432 :   SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
    2084         108 :   return true;
    2085             : }
    2086             : 
    2087             : // TODO: Can we identify things like v_mad_mixhi_f16?
    2088          46 : bool AMDGPUDAGToDAGISel::SelectHi16Elt(SDValue In, SDValue &Src) const {
    2089          46 :   if (In.isUndef()) {
    2090           3 :     Src = In;
    2091             :     return true;
    2092             :   }
    2093             : 
    2094             :   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) {
    2095             :     SDLoc SL(In);
    2096           3 :     SDValue K = CurDAG->getTargetConstant(C->getZExtValue() << 16, SL, MVT::i32);
    2097           2 :     MachineSDNode *MovK = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
    2098           1 :                                                  SL, MVT::i32, K);
    2099           1 :     Src = SDValue(MovK, 0);
    2100             :     return true;
    2101             :   }
    2102             : 
    2103             :   if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) {
    2104             :     SDLoc SL(In);
    2105           1 :     SDValue K = CurDAG->getTargetConstant(
    2106           5 :       C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
    2107           2 :     MachineSDNode *MovK = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
    2108           1 :                                                  SL, MVT::i32, K);
    2109           1 :     Src = SDValue(MovK, 0);
    2110             :     return true;
    2111             :   }
    2112             : 
    2113          41 :   return isExtractHiElt(In, Src);
    2114             : }
    2115             : 
    2116       22454 : void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
    2117             :   const AMDGPUTargetLowering& Lowering =
    2118       22454 :     *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
    2119             :   bool IsModified = false;
    2120       24696 :   do {
    2121             :     IsModified = false;
    2122             : 
    2123             :     // Go over all selected nodes and try to fold them a bit more
    2124       24696 :     SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_begin();
    2125     2039350 :     while (Position != CurDAG->allnodes_end()) {
    2126             :       SDNode *Node = &*Position++;
    2127             :       MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Node);
    2128      433193 :       if (!MachineNode)
    2129      433193 :         continue;
    2130             : 
    2131      561786 :       SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
    2132      561786 :       if (ResNode != Node) {
    2133       34638 :         if (ResNode)
    2134       34543 :           ReplaceUses(Node, ResNode);
    2135             :         IsModified = true;
    2136             :       }
    2137             :     }
    2138       24696 :     CurDAG->RemoveDeadNodes();
    2139             :   } while (IsModified);
    2140       22454 : }
    2141             : 
    2142        2239 : bool R600DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
    2143        2239 :   Subtarget = &MF.getSubtarget<R600Subtarget>();
    2144        2239 :   return SelectionDAGISel::runOnMachineFunction(MF);
    2145             : }
    2146             : 
    2147        3470 : bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
    2148        6940 :   if (!N->readMem())
    2149             :     return false;
    2150        3470 :   if (CbId == -1)
    2151           0 :     return N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
    2152             :            N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT;
    2153             : 
    2154        3470 :   return N->getAddressSpace() == AMDGPUASI.CONSTANT_BUFFER_0 + CbId;
    2155             : }
    2156             : 
    2157        6399 : bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
    2158             :                                                          SDValue& IntPtr) {
    2159             :   if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
    2160       25596 :     IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
    2161        6399 :                                        true);
    2162             :     return true;
    2163             :   }
    2164             :   return false;
    2165             : }
    2166             : 
    2167           0 : bool R600DAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
    2168             :     SDValue& BaseReg, SDValue &Offset) {
    2169             :   if (!isa<ConstantSDNode>(Addr)) {
    2170           0 :     BaseReg = Addr;
    2171           0 :     Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
    2172             :     return true;
    2173             :   }
    2174             :   return false;
    2175             : }
    2176             : 
    2177       83363 : void R600DAGToDAGISel::Select(SDNode *N) {
    2178       83363 :   unsigned int Opc = N->getOpcode();
    2179       83363 :   if (N->isMachineOpcode()) {
    2180             :     N->setNodeId(-1);
    2181             :     return;   // Already selected.
    2182             :   }
    2183             : 
    2184       83363 :   switch (Opc) {
    2185             :   default: break;
    2186        2430 :   case AMDGPUISD::BUILD_VERTICAL_VECTOR:
    2187             :   case ISD::SCALAR_TO_VECTOR:
    2188             :   case ISD::BUILD_VECTOR: {
    2189        4860 :     EVT VT = N->getValueType(0);
    2190        2430 :     unsigned NumVectorElts = VT.getVectorNumElements();
    2191             :     unsigned RegClassID;
    2192             :     // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
    2193             :     // that adds a 128 bits reg copy when going through TwoAddressInstructions
    2194             :     // pass. We want to avoid 128 bits copies as much as possible because they
    2195             :     // can't be bundled by our scheduler.
    2196        2430 :     switch(NumVectorElts) {
    2197             :     case 2: RegClassID = R600::R600_Reg64RegClassID; break;
    2198        2040 :     case 4:
    2199        2040 :       if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
    2200             :         RegClassID = R600::R600_Reg128VerticalRegClassID;
    2201             :       else
    2202             :         RegClassID = R600::R600_Reg128RegClassID;
    2203             :       break;
    2204           0 :     default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
    2205             :     }
    2206        2430 :     SelectBuildVector(N, RegClassID);
    2207             :     return;
    2208             :   }
    2209             :   }
    2210             : 
    2211             :   SelectCode(N);
    2212             : }
    2213             : 
    2214        2065 : bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
    2215             :                                           SDValue &Offset) {
    2216             :   ConstantSDNode *C;
    2217             :   SDLoc DL(Addr);
    2218             : 
    2219             :   if ((C = dyn_cast<ConstantSDNode>(Addr))) {
    2220           0 :     Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
    2221           0 :     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
    2222        2065 :   } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
    2223             :              (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
    2224        3588 :     Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
    2225        3588 :     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
    2226         542 :   } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
    2227             :             (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
    2228           0 :     Base = Addr.getOperand(0);
    2229           0 :     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
    2230             :   } else {
    2231         271 :     Base = Addr;
    2232         542 :     Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
    2233             :   }
    2234             : 
    2235        2065 :   return true;
    2236             : }
    2237             : 
    2238        1561 : bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
    2239             :                                           SDValue &Offset) {
    2240             :   ConstantSDNode *IMMOffset;
    2241             : 
    2242             :   if (Addr.getOpcode() == ISD::ADD
    2243             :       && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
    2244        2683 :       && isInt<16>(IMMOffset->getZExtValue())) {
    2245             : 
    2246         373 :       Base = Addr.getOperand(0);
    2247        1119 :       Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
    2248         373 :                                          MVT::i32);
    2249         373 :       return true;
    2250             :   // If the pointer address is constant, we can move it to the offset field.
    2251             :   } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
    2252         711 :              && isInt<16>(IMMOffset->getZExtValue())) {
    2253         711 :     Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
    2254         237 :                                   SDLoc(CurDAG->getEntryNode()),
    2255         237 :                                   R600::ZERO, MVT::i32);
    2256         711 :     Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
    2257         237 :                                        MVT::i32);
    2258         237 :     return true;
    2259             :   }
    2260             : 
    2261             :   // Default case, no offset
    2262         951 :   Base = Addr;
    2263        2853 :   Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
    2264         951 :   return true;
    2265             : }

Generated by: LCOV version 1.13